├── doc ├── bzip2 │ ├── bwt.psd │ ├── mtf.psd │ ├── bitmap.psd │ ├── diagram.psd │ ├── hexdump.psd │ ├── bwt-decode.png │ ├── bwt-encode.png │ ├── mtf-decode.png │ ├── tree-example.png │ ├── bitmap-decode.png │ ├── bzip2-format.docx │ ├── hexdump-complex.png │ ├── hexdump-simple.png │ ├── stream-complex.png │ └── stream-simple.png ├── bzip2-format.pdf ├── xflate-format.pdf └── xflate │ ├── hexdump.psd │ ├── tree-hclen.png │ ├── hexdump-simple.png │ ├── stream-complex.png │ ├── stream-simple.png │ ├── tree-example.png │ ├── xflate-format.docx │ └── hexdump-complex.png ├── testdata ├── binary.bin ├── random.bin ├── repeats.bin ├── zeros.go ├── random.go ├── huffman.go └── repeats.go ├── brotli ├── testdata │ ├── monkey.br │ ├── lcet10.txt.br │ ├── mapsdatazrh │ ├── ukkonooa.br │ ├── alice29.txt.br │ ├── asyoulik.txt.br │ ├── compressed_file │ ├── mapsdatazrh.br │ ├── plrabn12.txt.br │ ├── compressed_file.br │ ├── digits-best-1e4.br │ ├── digits-best-1e5.br │ ├── digits-best-1e6.br │ ├── random_org_10k.bin │ ├── twain-best-1e4.br │ ├── twain-best-1e5.br │ ├── twain-best-1e6.br │ ├── twain-speed-1e4.br │ ├── twain-speed-1e5.br │ ├── twain-speed-1e6.br │ ├── compressed_repeated │ ├── digits-speed-1e4.br │ ├── digits-speed-1e5.br │ ├── digits-speed-1e6.br │ ├── twain-default-1e4.br │ ├── twain-default-1e5.br │ ├── twain-default-1e6.br │ ├── compressed_repeated.br │ ├── digits-default-1e4.br │ ├── digits-default-1e5.br │ ├── digits-default-1e6.br │ ├── random_org_10k.bin.br │ ├── ukkonooa │ └── monkey ├── dict_encoder.go ├── prefix_test.go ├── writer_test.go ├── dict_encoder_test.go ├── prefix_encoder.go ├── bit_writer.go ├── writer.go ├── brotli_test.go ├── common_test.go ├── transform_test.go ├── common.go ├── dict_decoder.go ├── context.go └── dict_decoder_test.go ├── bzip2 ├── testdata │ ├── silesia_xray.bin │ ├── silesia_xray.bwt │ ├── gauntlet_test3.bin │ ├── gauntlet_test3.bwt │ ├── silesia_ooffice.bin │ ├── silesia_ooffice.bwt │ ├── testfiles_test3.bin │ ├── testfiles_test3.bwt │ ├── testfiles_test4.bin │ └── testfiles_test4.bwt ├── fuzz_off.go ├── writer_test.go ├── internal │ └── sais │ │ └── common.go ├── prefix_test.go ├── fuzz_on.go ├── rle1.go ├── common_test.go ├── common.go ├── bwt.go ├── mtf_rle2.go ├── rle1_test.go ├── bwt_test.go ├── bzip2_test.go └── mtf_rle2_test.go ├── go.mod ├── internal ├── gofuzz.go ├── debug.go ├── cgo │ ├── README.md │ ├── flate │ │ └── flate.go │ ├── zstd │ │ └── zstd.go │ ├── lzma │ │ └── lzma.go │ └── bzip2 │ │ └── bzip2.go ├── tool │ ├── bench │ │ ├── cgo_lzma.go │ │ ├── cgo_zstd.go │ │ ├── cgo_bzip2.go │ │ ├── cgo_flate.go │ │ ├── cgo_brotli.go │ │ ├── lib_kp.go │ │ ├── lib_std.go │ │ ├── lib_ds.go │ │ ├── lib_uk.go │ │ └── codec_test.go │ ├── bitgen │ │ └── main.go │ └── fuzz │ │ ├── fuzz.sh │ │ ├── bzip2_bwt │ │ └── bzip2_bwt.go │ │ ├── brotli │ │ └── brotli.go │ │ ├── xflate_meta │ │ └── xflate_meta.go │ │ └── bzip2 │ │ └── bzip2.go ├── release.go ├── testutil │ ├── rand.go │ ├── util_test.go │ └── util.go ├── prefix │ ├── encoder.go │ ├── range.go │ ├── wrap.go │ ├── debug.go │ ├── decoder.go │ └── writer.go ├── common.go └── errors │ └── errors.go ├── zfuzz.sh ├── zbench.sh ├── xflate ├── meta_fuzz.go ├── xflate_test.go ├── flate.go └── index.go ├── go.sum ├── flate ├── common.go └── dict_decoder.go ├── .travis.yml ├── LICENSE.md ├── zprof.sh ├── ztest.sh ├── api.go └── README.md /doc/bzip2/bwt.psd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/doc/bzip2/bwt.psd -------------------------------------------------------------------------------- /doc/bzip2/mtf.psd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/doc/bzip2/mtf.psd -------------------------------------------------------------------------------- /testdata/binary.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/testdata/binary.bin -------------------------------------------------------------------------------- /testdata/random.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/testdata/random.bin -------------------------------------------------------------------------------- /doc/bzip2-format.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/doc/bzip2-format.pdf -------------------------------------------------------------------------------- /doc/bzip2/bitmap.psd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/doc/bzip2/bitmap.psd -------------------------------------------------------------------------------- /doc/bzip2/diagram.psd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/doc/bzip2/diagram.psd -------------------------------------------------------------------------------- /doc/bzip2/hexdump.psd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/doc/bzip2/hexdump.psd -------------------------------------------------------------------------------- /doc/xflate-format.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/doc/xflate-format.pdf -------------------------------------------------------------------------------- /testdata/repeats.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/testdata/repeats.bin -------------------------------------------------------------------------------- /doc/bzip2/bwt-decode.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/doc/bzip2/bwt-decode.png -------------------------------------------------------------------------------- /doc/bzip2/bwt-encode.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/doc/bzip2/bwt-encode.png -------------------------------------------------------------------------------- /doc/bzip2/mtf-decode.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/doc/bzip2/mtf-decode.png -------------------------------------------------------------------------------- /doc/xflate/hexdump.psd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/doc/xflate/hexdump.psd -------------------------------------------------------------------------------- /brotli/testdata/monkey.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/brotli/testdata/monkey.br -------------------------------------------------------------------------------- /doc/bzip2/tree-example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/doc/bzip2/tree-example.png -------------------------------------------------------------------------------- /doc/xflate/tree-hclen.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/doc/xflate/tree-hclen.png -------------------------------------------------------------------------------- /brotli/testdata/lcet10.txt.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/brotli/testdata/lcet10.txt.br -------------------------------------------------------------------------------- /brotli/testdata/mapsdatazrh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/brotli/testdata/mapsdatazrh -------------------------------------------------------------------------------- /brotli/testdata/ukkonooa.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/brotli/testdata/ukkonooa.br -------------------------------------------------------------------------------- /doc/bzip2/bitmap-decode.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/doc/bzip2/bitmap-decode.png -------------------------------------------------------------------------------- /doc/bzip2/bzip2-format.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/doc/bzip2/bzip2-format.docx -------------------------------------------------------------------------------- /doc/bzip2/hexdump-complex.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/doc/bzip2/hexdump-complex.png -------------------------------------------------------------------------------- /doc/bzip2/hexdump-simple.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/doc/bzip2/hexdump-simple.png -------------------------------------------------------------------------------- /doc/bzip2/stream-complex.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/doc/bzip2/stream-complex.png -------------------------------------------------------------------------------- /doc/bzip2/stream-simple.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/doc/bzip2/stream-simple.png -------------------------------------------------------------------------------- /doc/xflate/hexdump-simple.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/doc/xflate/hexdump-simple.png -------------------------------------------------------------------------------- /doc/xflate/stream-complex.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/doc/xflate/stream-complex.png -------------------------------------------------------------------------------- /doc/xflate/stream-simple.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/doc/xflate/stream-simple.png -------------------------------------------------------------------------------- /doc/xflate/tree-example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/doc/xflate/tree-example.png -------------------------------------------------------------------------------- /doc/xflate/xflate-format.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/doc/xflate/xflate-format.docx -------------------------------------------------------------------------------- /brotli/testdata/alice29.txt.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/brotli/testdata/alice29.txt.br -------------------------------------------------------------------------------- /brotli/testdata/asyoulik.txt.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/brotli/testdata/asyoulik.txt.br -------------------------------------------------------------------------------- /brotli/testdata/compressed_file: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/brotli/testdata/compressed_file -------------------------------------------------------------------------------- /brotli/testdata/mapsdatazrh.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/brotli/testdata/mapsdatazrh.br -------------------------------------------------------------------------------- /brotli/testdata/plrabn12.txt.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/brotli/testdata/plrabn12.txt.br -------------------------------------------------------------------------------- /bzip2/testdata/silesia_xray.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/bzip2/testdata/silesia_xray.bin -------------------------------------------------------------------------------- /bzip2/testdata/silesia_xray.bwt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/bzip2/testdata/silesia_xray.bwt -------------------------------------------------------------------------------- /doc/xflate/hexdump-complex.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/doc/xflate/hexdump-complex.png -------------------------------------------------------------------------------- /brotli/testdata/compressed_file.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/brotli/testdata/compressed_file.br -------------------------------------------------------------------------------- /brotli/testdata/digits-best-1e4.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/brotli/testdata/digits-best-1e4.br -------------------------------------------------------------------------------- /brotli/testdata/digits-best-1e5.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/brotli/testdata/digits-best-1e5.br -------------------------------------------------------------------------------- /brotli/testdata/digits-best-1e6.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/brotli/testdata/digits-best-1e6.br -------------------------------------------------------------------------------- /brotli/testdata/random_org_10k.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/brotli/testdata/random_org_10k.bin -------------------------------------------------------------------------------- /brotli/testdata/twain-best-1e4.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/brotli/testdata/twain-best-1e4.br -------------------------------------------------------------------------------- /brotli/testdata/twain-best-1e5.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/brotli/testdata/twain-best-1e5.br -------------------------------------------------------------------------------- /brotli/testdata/twain-best-1e6.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/brotli/testdata/twain-best-1e6.br -------------------------------------------------------------------------------- /brotli/testdata/twain-speed-1e4.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/brotli/testdata/twain-speed-1e4.br -------------------------------------------------------------------------------- /brotli/testdata/twain-speed-1e5.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/brotli/testdata/twain-speed-1e5.br -------------------------------------------------------------------------------- /brotli/testdata/twain-speed-1e6.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/brotli/testdata/twain-speed-1e6.br -------------------------------------------------------------------------------- /bzip2/testdata/gauntlet_test3.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/bzip2/testdata/gauntlet_test3.bin -------------------------------------------------------------------------------- /bzip2/testdata/gauntlet_test3.bwt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/bzip2/testdata/gauntlet_test3.bwt -------------------------------------------------------------------------------- /bzip2/testdata/silesia_ooffice.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/bzip2/testdata/silesia_ooffice.bin -------------------------------------------------------------------------------- /bzip2/testdata/silesia_ooffice.bwt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/bzip2/testdata/silesia_ooffice.bwt -------------------------------------------------------------------------------- /bzip2/testdata/testfiles_test3.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/bzip2/testdata/testfiles_test3.bin -------------------------------------------------------------------------------- /bzip2/testdata/testfiles_test3.bwt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/bzip2/testdata/testfiles_test3.bwt -------------------------------------------------------------------------------- /bzip2/testdata/testfiles_test4.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/bzip2/testdata/testfiles_test4.bin -------------------------------------------------------------------------------- /bzip2/testdata/testfiles_test4.bwt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/bzip2/testdata/testfiles_test4.bwt -------------------------------------------------------------------------------- /brotli/testdata/compressed_repeated: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/brotli/testdata/compressed_repeated -------------------------------------------------------------------------------- /brotli/testdata/digits-speed-1e4.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/brotli/testdata/digits-speed-1e4.br -------------------------------------------------------------------------------- /brotli/testdata/digits-speed-1e5.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/brotli/testdata/digits-speed-1e5.br -------------------------------------------------------------------------------- /brotli/testdata/digits-speed-1e6.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/brotli/testdata/digits-speed-1e6.br -------------------------------------------------------------------------------- /brotli/testdata/twain-default-1e4.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/brotli/testdata/twain-default-1e4.br -------------------------------------------------------------------------------- /brotli/testdata/twain-default-1e5.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/brotli/testdata/twain-default-1e5.br -------------------------------------------------------------------------------- /brotli/testdata/twain-default-1e6.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/brotli/testdata/twain-default-1e6.br -------------------------------------------------------------------------------- /brotli/testdata/compressed_repeated.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/brotli/testdata/compressed_repeated.br -------------------------------------------------------------------------------- /brotli/testdata/digits-default-1e4.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/brotli/testdata/digits-default-1e4.br -------------------------------------------------------------------------------- /brotli/testdata/digits-default-1e5.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/brotli/testdata/digits-default-1e5.br -------------------------------------------------------------------------------- /brotli/testdata/digits-default-1e6.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/brotli/testdata/digits-default-1e6.br -------------------------------------------------------------------------------- /brotli/testdata/random_org_10k.bin.br: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsc/compress/master/brotli/testdata/random_org_10k.bin.br -------------------------------------------------------------------------------- /brotli/testdata/ukkonooa: -------------------------------------------------------------------------------- 1 | ukko nooa, ukko nooa oli kunnon mies, kun han meni saunaan, pisti laukun naulaan, ukko nooa, ukko nooa oli kunnon mies. -------------------------------------------------------------------------------- /brotli/dict_encoder.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package brotli 6 | -------------------------------------------------------------------------------- /brotli/prefix_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package brotli 6 | -------------------------------------------------------------------------------- /brotli/writer_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package brotli 6 | -------------------------------------------------------------------------------- /brotli/dict_encoder_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package brotli 6 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/dsnet/compress 2 | 3 | go 1.9 4 | 5 | require ( 6 | github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780 7 | github.com/klauspost/compress v1.4.1 8 | github.com/klauspost/cpuid v1.2.0 // indirect 9 | github.com/ulikunitz/xz v0.5.6 10 | ) 11 | -------------------------------------------------------------------------------- /internal/gofuzz.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // +build gofuzz 6 | 7 | package internal 8 | 9 | const ( 10 | Debug = true 11 | GoFuzz = true 12 | ) 13 | -------------------------------------------------------------------------------- /internal/debug.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // +build debug,!gofuzz 6 | 7 | package internal 8 | 9 | const ( 10 | Debug = true 11 | GoFuzz = false 12 | ) 13 | -------------------------------------------------------------------------------- /brotli/prefix_encoder.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package brotli 6 | 7 | type prefixEncoder struct{} 8 | 9 | func (pe *prefixEncoder) Init(codes []prefixCode) {} 10 | -------------------------------------------------------------------------------- /zfuzz.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Copyright 2017, Joe Tsai. All rights reserved. 4 | # Use of this source code is governed by a BSD-style 5 | # license that can be found in the LICENSE.md file. 6 | 7 | # zfuzz wraps internal/tool/fuzz and is useful for fuzz testing each of 8 | # the implementations in this repository. 9 | cd $(dirname "${BASH_SOURCE[0]}")/internal/tool/fuzz 10 | ./fuzz.sh "$@" 11 | -------------------------------------------------------------------------------- /bzip2/fuzz_off.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // +build !gofuzz 6 | 7 | // This file exists to suppress fuzzing details from release builds. 8 | 9 | package bzip2 10 | 11 | type fuzzReader struct{} 12 | 13 | func (*fuzzReader) updateChecksum(int64, uint32) {} 14 | -------------------------------------------------------------------------------- /internal/cgo/README.md: -------------------------------------------------------------------------------- 1 | **Note:** The cgo directory contains a collection of cgo wrappers over the 2 | canonical C implementation for each compression format. These cgo wrappers are 3 | only used by the fuzzer and bench tools to test for correctness and performance 4 | of the Go implementations relative to the C implementations. 5 | There are no unit tests for each wrapper since they are thoroughly tested by 6 | the aforementioned tools. 7 | -------------------------------------------------------------------------------- /internal/tool/bench/cgo_lzma.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // +build cgo,!no_cgo_lzma 6 | 7 | package main 8 | 9 | import "github.com/dsnet/compress/internal/cgo/lzma" 10 | 11 | func init() { 12 | RegisterEncoder(FormatLZMA2, "cgo", lzma.NewWriter) 13 | RegisterDecoder(FormatLZMA2, "cgo", lzma.NewReader) 14 | } 15 | -------------------------------------------------------------------------------- /internal/tool/bench/cgo_zstd.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // +build cgo,!no_cgo_zstd 6 | 7 | package main 8 | 9 | import "github.com/dsnet/compress/internal/cgo/zstd" 10 | 11 | func init() { 12 | RegisterEncoder(FormatZstd, "cgo", zstd.NewWriter) 13 | RegisterDecoder(FormatZstd, "cgo", zstd.NewReader) 14 | } 15 | -------------------------------------------------------------------------------- /internal/tool/bench/cgo_bzip2.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // +build cgo,!no_cgo_bzip2 6 | 7 | package main 8 | 9 | import "github.com/dsnet/compress/internal/cgo/bzip2" 10 | 11 | func init() { 12 | RegisterEncoder(FormatBZ2, "cgo", bzip2.NewWriter) 13 | RegisterDecoder(FormatBZ2, "cgo", bzip2.NewReader) 14 | } 15 | -------------------------------------------------------------------------------- /internal/tool/bench/cgo_flate.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // +build cgo,!no_cgo_flate 6 | 7 | package main 8 | 9 | import "github.com/dsnet/compress/internal/cgo/flate" 10 | 11 | func init() { 12 | RegisterEncoder(FormatFlate, "cgo", flate.NewWriter) 13 | RegisterDecoder(FormatFlate, "cgo", flate.NewReader) 14 | } 15 | -------------------------------------------------------------------------------- /internal/tool/bench/cgo_brotli.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // +build cgo,!no_cgo_brotli 6 | 7 | package main 8 | 9 | import "github.com/dsnet/compress/internal/cgo/brotli" 10 | 11 | func init() { 12 | RegisterEncoder(FormatBrotli, "cgo", brotli.NewWriter) 13 | RegisterDecoder(FormatBrotli, "cgo", brotli.NewReader) 14 | } 15 | -------------------------------------------------------------------------------- /zbench.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Copyright 2017, Joe Tsai. All rights reserved. 4 | # Use of this source code is governed by a BSD-style 5 | # license that can be found in the LICENSE.md file. 6 | 7 | # zbench wraps internal/tool/bench and is useful for comparing benchmarks from 8 | # the implementations in this repository relative to other implementations. 9 | # 10 | # See internal/tool/bench/main.go for more details. 11 | cd $(dirname "${BASH_SOURCE[0]}")/internal/tool/bench 12 | go run $(go list -f '{{ join .GoFiles "\n" }}') "$@" 13 | -------------------------------------------------------------------------------- /xflate/meta_fuzz.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // +build gofuzz 6 | 7 | // This file exists to export internal implementation details for fuzz testing. 8 | 9 | package xflate 10 | 11 | import ( 12 | "io" 13 | 14 | "github.com/dsnet/compress/xflate/internal/meta" 15 | ) 16 | 17 | func NewMetaReader(r io.Reader) *meta.Reader { 18 | return meta.NewReader(r) 19 | } 20 | 21 | func NewMetaWriter(r io.Writer) *meta.Writer { 22 | return meta.NewWriter(r) 23 | } 24 | -------------------------------------------------------------------------------- /testdata/zeros.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // +build ignore 6 | 7 | //go:generate go run zeros.go 8 | 9 | // Generates zeros.bin. This test file contains zeroed data throughout and 10 | // tests the best case compression scenario. 11 | package main 12 | 13 | import "io/ioutil" 14 | 15 | const ( 16 | name = "zeros.bin" 17 | size = 1 << 18 18 | ) 19 | 20 | func main() { 21 | b := make([]byte, size) 22 | if err := ioutil.WriteFile(name, b[:size], 0664); err != nil { 23 | panic(err) 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /internal/tool/bench/lib_kp.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // +build !no_lib_kp 6 | 7 | package main 8 | 9 | import ( 10 | "io" 11 | 12 | "github.com/klauspost/compress/flate" 13 | ) 14 | 15 | func init() { 16 | RegisterEncoder(FormatFlate, "kp", 17 | func(w io.Writer, lvl int) io.WriteCloser { 18 | zw, err := flate.NewWriter(w, lvl) 19 | if err != nil { 20 | panic(err) 21 | } 22 | return zw 23 | }) 24 | RegisterDecoder(FormatFlate, "kp", 25 | func(r io.Reader) io.ReadCloser { 26 | return flate.NewReader(r) 27 | }) 28 | } 29 | -------------------------------------------------------------------------------- /internal/tool/bitgen/main.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // BitGen to generate a binary from a BitGen formatting input. 6 | // It accepts the BitGen format from stdin and outputs to stdout. 7 | package main 8 | 9 | import ( 10 | "io/ioutil" 11 | "os" 12 | 13 | "github.com/dsnet/compress/internal/testutil" 14 | ) 15 | 16 | func main() { 17 | buf, err := ioutil.ReadAll(os.Stdin) 18 | if err != nil { 19 | panic(err) 20 | } 21 | 22 | buf = testutil.MustDecodeBitGen(string(buf)) 23 | 24 | _, err = os.Stdout.Write(buf) 25 | if err != nil { 26 | panic(err) 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780 h1:tFh1tRc4CA31yP6qDcu+Trax5wW5GuMxvkIba07qVLY= 2 | github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY= 3 | github.com/klauspost/compress v1.4.1 h1:8VMb5+0wMgdBykOV96DwNwKFQ+WTI4pzYURP99CcB9E= 4 | github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= 5 | github.com/klauspost/cpuid v1.2.0 h1:NMpwD2G9JSFOE1/TJjGSo5zG7Yb2bTe7eq1jH+irmeE= 6 | github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= 7 | github.com/ulikunitz/xz v0.5.6 h1:jGHAfXawEGZQ3blwU5wnWKQJvAraT7Ftq9EXjnXYgt8= 8 | github.com/ulikunitz/xz v0.5.6/go.mod h1:2bypXElzHzzJZwzH67Y6wb67pO62Rzfn7BSiF4ABRW8= 9 | -------------------------------------------------------------------------------- /brotli/bit_writer.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package brotli 6 | 7 | import "io" 8 | 9 | type bitWriter struct { 10 | wr io.Writer 11 | offset int64 // Number of bytes written to underlying io.Writer 12 | } 13 | 14 | func (bw *bitWriter) Init(w io.Writer) { 15 | return 16 | } 17 | 18 | func (bw *bitWriter) Write(buf []byte) (int, error) { 19 | return 0, nil 20 | } 21 | 22 | func (bw *bitWriter) WriteBits(val, nb uint) { 23 | return 24 | } 25 | 26 | func (bw *bitWriter) WritePads() { 27 | return 28 | } 29 | 30 | func (bw *bitWriter) WriteSymbol(pe *prefixEncoder, sym uint) { 31 | return 32 | } 33 | -------------------------------------------------------------------------------- /testdata/random.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // +build ignore 6 | 7 | //go:generate go run random.go 8 | 9 | // Generates random.bin. This test file contains random data throughout and 10 | // tests the worst case compression scenario. 11 | package main 12 | 13 | import ( 14 | "io/ioutil" 15 | "math/rand" 16 | ) 17 | 18 | const ( 19 | name = "random.bin" 20 | size = 1 << 18 21 | ) 22 | 23 | func main() { 24 | var b []byte 25 | r := rand.New(rand.NewSource(0)) 26 | 27 | for i := 0; i < size; i++ { 28 | b = append(b, byte(r.Int())) 29 | } 30 | if err := ioutil.WriteFile(name, b[:size], 0664); err != nil { 31 | panic(err) 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /brotli/testdata/monkey: -------------------------------------------------------------------------------- 1 | znxcvnmz,xvnm.,zxcnv.,xcn.z,vn.zvn.zxcvn.,zxcn.vn.v,znm.,vnzx.,vnzxc.vn.z,vnz.,nv.z,nvmzxc,nvzxcvcnm.,vczxvnzxcnvmxc.zmcnvzm.,nvmc,nzxmc,vn.mnnmzxc,vnxcnmv,znvzxcnmv,.xcnvm,zxcnzxv.zx,qweryweurqioweupropqwutioweupqrioweutiopweuriopweuriopqwurioputiopqwuriowuqerioupqweropuweropqwurweuqriopuropqwuriopuqwriopuqweopruioqweurqweuriouqweopruioupqiytioqtyiowtyqptypryoqweutioioqtweqruowqeytiowquiourowetyoqwupiotweuqiorweuqroipituqwiorqwtioweuriouytuioerytuioweryuitoweytuiweyuityeruirtyuqriqweuropqweiruioqweurioqwuerioqwyuituierwotueryuiotweyrtuiwertyioweryrueioqptyioruyiopqwtjkasdfhlafhlasdhfjklashjkfhasjklfhklasjdfhklasdhfjkalsdhfklasdhjkflahsjdkfhklasfhjkasdfhasfjkasdhfklsdhalghhaf;hdklasfhjklashjklfasdhfasdjklfhsdjklafsd;hkldadfjjklasdhfjasddfjklfhakjklasdjfkl;asdjfasfljasdfhjklasdfhjkaghjkashf;djfklasdjfkljasdklfjklasdjfkljasdfkljaklfj -------------------------------------------------------------------------------- /internal/release.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // +build !debug,!gofuzz 6 | 7 | package internal 8 | 9 | // Debug indicates whether the debug build tag was set. 10 | // 11 | // If set, programs may choose to print with more human-readable 12 | // debug information and also perform sanity checks that would otherwise be too 13 | // expensive to run in a release build. 14 | const Debug = false 15 | 16 | // GoFuzz indicates whether the gofuzz build tag was set. 17 | // 18 | // If set, programs may choose to disable certain checks (like checksums) that 19 | // would be nearly impossible for gofuzz to properly get right. 20 | // If GoFuzz is set, it implies that Debug is set as well. 21 | const GoFuzz = false 22 | -------------------------------------------------------------------------------- /internal/tool/bench/lib_std.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // +build !no_lib_std 6 | 7 | package main 8 | 9 | import ( 10 | "io" 11 | "io/ioutil" 12 | 13 | "compress/bzip2" 14 | "compress/flate" 15 | ) 16 | 17 | func init() { 18 | RegisterEncoder(FormatFlate, "std", 19 | func(w io.Writer, lvl int) io.WriteCloser { 20 | zw, err := flate.NewWriter(w, lvl) 21 | if err != nil { 22 | panic(err) 23 | } 24 | return zw 25 | }) 26 | RegisterDecoder(FormatFlate, "std", 27 | func(r io.Reader) io.ReadCloser { 28 | return flate.NewReader(r) 29 | }) 30 | RegisterDecoder(FormatBZ2, "std", 31 | func(r io.Reader) io.ReadCloser { 32 | return ioutil.NopCloser(bzip2.NewReader(r)) 33 | }) 34 | } 35 | -------------------------------------------------------------------------------- /brotli/writer.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package brotli 6 | 7 | import "io" 8 | 9 | type writer struct { 10 | InputOffset int64 // Total number of bytes issued to Write 11 | OutputOffset int64 // Total number of bytes written to underlying io.Writer 12 | 13 | wr bitWriter // Output destination 14 | err error // Persistent error 15 | } 16 | 17 | type writerConfig struct { 18 | _ struct{} // Blank field to prevent unkeyed struct literals 19 | } 20 | 21 | func newWriter(w io.Writer, conf *writerConfig) (*writer, error) { 22 | return nil, nil 23 | } 24 | 25 | func (bw *writer) Write(buf []byte) (int, error) { 26 | return 0, nil 27 | } 28 | 29 | func (bw *writer) Close() error { 30 | return nil 31 | } 32 | 33 | func (bw *writer) Reset(w io.Writer) error { 34 | return nil 35 | } 36 | -------------------------------------------------------------------------------- /bzip2/writer_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package bzip2 6 | 7 | import ( 8 | "bytes" 9 | "io" 10 | "io/ioutil" 11 | "testing" 12 | ) 13 | 14 | func BenchmarkEncode(b *testing.B) { 15 | runBenchmarks(b, func(b *testing.B, data []byte, lvl int) { 16 | b.StopTimer() 17 | b.ReportAllocs() 18 | 19 | br := new(bytes.Reader) 20 | wr, _ := NewWriter(nil, &WriterConfig{Level: lvl}) 21 | 22 | b.SetBytes(int64(len(data))) 23 | b.StartTimer() 24 | for i := 0; i < b.N; i++ { 25 | br.Reset(data) 26 | wr.Reset(ioutil.Discard) 27 | 28 | n, err := io.Copy(wr, br) 29 | if n != int64(len(data)) || err != nil { 30 | b.Fatalf("Copy() = (%d, %v), want (%d, nil)", n, err, len(data)) 31 | } 32 | if err := wr.Close(); err != nil { 33 | b.Fatalf("Close() = %v, want nil", err) 34 | } 35 | } 36 | }) 37 | } 38 | -------------------------------------------------------------------------------- /internal/tool/fuzz/fuzz.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | cd "$(dirname "${BASH_SOURCE[0]}")" 6 | 7 | if [ $# == 0 ]; then 8 | echo "Usage: $0 PKG" 9 | echo 10 | echo -e "Valid packages:\n\t$(ls -d */ | sed 's/\/*$//g' | tr '\n' ' ')" 11 | exit 1 12 | fi 13 | 14 | # Check that the fuzzing tools are available. 15 | for TOOL in go-fuzz go-fuzz-build; do 16 | command -v $TOOL >/dev/null 2>&1 || { 17 | echo "Aborting: could not locate $TOOL."; exit 1; 18 | } 19 | done 20 | 21 | # Clone the initial work directory if it does not exist. 22 | if [ ! -d ".work" ]; then 23 | echo "Fuzzing workdir does not exist." 24 | git clone https://github.com/dsnet/compress-fuzz.git .work 25 | fi 26 | 27 | PKG=$(echo $1 | sed 's/\/*$//g') 28 | PKG_PATH="github.com/dsnet/compress/internal/tool/fuzz" 29 | shift 30 | 31 | echo "Building..." 32 | go-fuzz-build -o=".work/$PKG-fuzz.zip" $PKG_PATH/$PKG 33 | 34 | echo "Fuzzing..." 35 | go-fuzz -bin=".work/$PKG-fuzz.zip" -workdir=".work/$PKG" "$@" 36 | -------------------------------------------------------------------------------- /brotli/brotli_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package brotli 6 | 7 | import ( 8 | "bytes" 9 | "errors" 10 | "flag" 11 | "os/exec" 12 | "strings" 13 | ) 14 | 15 | var zcheck = flag.Bool("zcheck", false, "verify test vectors with C brotli library") 16 | 17 | func cmdCompress(input []byte) ([]byte, error) { return cmdExec(input) } 18 | func cmdDecompress(input []byte) ([]byte, error) { return cmdExec(input, "-d") } 19 | 20 | // cmdExec executes the bzip2 tool, passing the input in as stdin. 21 | // It returns the stdout and an error. 22 | func cmdExec(input []byte, args ...string) ([]byte, error) { 23 | var bo, be bytes.Buffer 24 | cmd := exec.Command("bro", args...) 25 | cmd.Stdin = bytes.NewReader(input) 26 | cmd.Stdout = &bo 27 | cmd.Stderr = &be 28 | err := cmd.Run() 29 | ss := strings.Split(strings.TrimSpace(be.String()), "\n") 30 | if len(ss) > 0 && ss[len(ss)-1] != "" { 31 | // Assume any stderr indicates an error and last line is the message. 32 | return nil, errors.New(ss[len(ss)-1]) 33 | } 34 | return bo.Bytes(), err 35 | } 36 | -------------------------------------------------------------------------------- /internal/tool/fuzz/bzip2_bwt/bzip2_bwt.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // +build gofuzz 6 | 7 | package bzip2_bwt 8 | 9 | import ( 10 | "bytes" 11 | "hash/adler32" 12 | 13 | "github.com/dsnet/compress/bzip2" 14 | ) 15 | 16 | func Fuzz(data []byte) int { 17 | if len(data) == 0 { 18 | return -1 19 | } 20 | testReverse(data) 21 | testRoundTrip(data) 22 | return 0 23 | } 24 | 25 | // testReverse verifies that we can reverse the BWT on any arbitrary input 26 | // so long as we choose a valid origin pointer. 27 | func testReverse(data []byte) { 28 | data = append([]byte(nil), data...) // Make copy of data 29 | ptr := int(adler32.Checksum(data)) % len(data) 30 | bzip2.ReverseBWT(data, ptr) 31 | } 32 | 33 | // testRoundTrip verifies that a round-trip BWT faithfully reproduces the 34 | // input data set. 35 | func testRoundTrip(want []byte) { 36 | got := append([]byte(nil), want...) 37 | ptr := bzip2.ForwardBWT(got) 38 | bzip2.ReverseBWT(got, ptr) 39 | 40 | if ptr < 0 || ptr >= len(want) { 41 | panic("invalid origin pointer") 42 | } 43 | if !bytes.Equal(got, want) { 44 | panic("mismatching bytes") 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /internal/tool/bench/lib_ds.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // +build !no_lib_ds 6 | 7 | package main 8 | 9 | import ( 10 | "io" 11 | 12 | "github.com/dsnet/compress/brotli" 13 | "github.com/dsnet/compress/bzip2" 14 | "github.com/dsnet/compress/flate" 15 | ) 16 | 17 | func init() { 18 | RegisterDecoder(FormatBrotli, "ds", 19 | func(r io.Reader) io.ReadCloser { 20 | zr, err := brotli.NewReader(r, nil) 21 | if err != nil { 22 | panic(err) 23 | } 24 | return zr 25 | }) 26 | RegisterDecoder(FormatFlate, "ds", 27 | func(r io.Reader) io.ReadCloser { 28 | zr, err := flate.NewReader(r, nil) 29 | if err != nil { 30 | panic(err) 31 | } 32 | return zr 33 | }) 34 | RegisterEncoder(FormatBZ2, "ds", 35 | func(w io.Writer, lvl int) io.WriteCloser { 36 | zw, err := bzip2.NewWriter(w, &bzip2.WriterConfig{Level: lvl}) 37 | if err != nil { 38 | panic(err) 39 | } 40 | return zw 41 | }) 42 | RegisterDecoder(FormatBZ2, "ds", 43 | func(r io.Reader) io.ReadCloser { 44 | zr, err := bzip2.NewReader(r, nil) 45 | if err != nil { 46 | panic(err) 47 | } 48 | return zr 49 | }) 50 | } 51 | -------------------------------------------------------------------------------- /bzip2/internal/sais/common.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // Package sais implements a linear time suffix array algorithm. 6 | package sais 7 | 8 | //go:generate go run sais_gen.go byte sais_byte.go 9 | //go:generate go run sais_gen.go int sais_int.go 10 | 11 | // This package ports the C sais implementation by Yuta Mori. The ports are 12 | // located in sais_byte.go and sais_int.go, which are identical to each other 13 | // except for the types. Since Go does not support generics, we use generators to 14 | // create the two files. 15 | // 16 | // References: 17 | // https://sites.google.com/site/yuta256/sais 18 | // https://www.researchgate.net/publication/221313676_Linear_Time_Suffix_Array_Construction_Using_D-Critical_Substrings 19 | // https://www.researchgate.net/publication/224176324_Two_Efficient_Algorithms_for_Linear_Time_Suffix_Array_Construction 20 | 21 | // ComputeSA computes the suffix array of t and places the result in sa. 22 | // Both t and sa must be the same length. 23 | func ComputeSA(t []byte, sa []int) { 24 | if len(sa) != len(t) { 25 | panic("mismatching sizes") 26 | } 27 | computeSA_byte(t, sa, 0, len(t), 256) 28 | } 29 | -------------------------------------------------------------------------------- /flate/common.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // Package flate implements the DEFLATE compressed data format, 6 | // described in RFC 1951. 7 | package flate 8 | 9 | import ( 10 | "fmt" 11 | 12 | "github.com/dsnet/compress/internal/errors" 13 | ) 14 | 15 | const ( 16 | maxHistSize = 1 << 15 17 | endBlockSym = 256 18 | ) 19 | 20 | func errorf(c int, f string, a ...interface{}) error { 21 | return errors.Error{Code: c, Pkg: "flate", Msg: fmt.Sprintf(f, a...)} 22 | } 23 | 24 | func panicf(c int, f string, a ...interface{}) { 25 | errors.Panic(errorf(c, f, a...)) 26 | } 27 | 28 | // errWrap converts a lower-level errors.Error to be one from this package. 29 | // The replaceCode passed in will be used to replace the code for any errors 30 | // with the errors.Invalid code. 31 | // 32 | // For the Reader, set this to errors.Corrupted. 33 | // For the Writer, set this to errors.Internal. 34 | func errWrap(err error, replaceCode int) error { 35 | if cerr, ok := err.(errors.Error); ok { 36 | if errors.IsInvalid(cerr) { 37 | cerr.Code = replaceCode 38 | } 39 | err = errorf(cerr.Code, "%s", cerr.Msg) 40 | } 41 | return err 42 | } 43 | 44 | var errClosed = errorf(errors.Closed, "") 45 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | language: go 3 | before_install: 4 | - curl -L https://github.com/google/brotli/archive/v1.0.2.tar.gz | tar -zxv 5 | - (cd brotli-1.0.2 && mkdir out && cd out && ../configure-cmake && make && sudo make install) 6 | - rm -rf brotli-1.0.2 7 | - curl -L https://github.com/facebook/zstd/archive/v1.3.2.tar.gz | tar -zxv 8 | - (cd zstd-1.3.2 && sudo make install) 9 | - rm -rf zstd-1.3.2 10 | - sudo ldconfig 11 | - mkdir /tmp/go1.12 12 | - curl -L -s https://dl.google.com/go/go1.12.linux-amd64.tar.gz | tar -zxf - -C /tmp/go1.12 --strip-components 1 13 | - unset GOROOT 14 | - (GO111MODULE=on /tmp/go1.12/bin/go mod vendor) 15 | - (cd /tmp && GO111MODULE=on /tmp/go1.12/bin/go get golang.org/x/lint/golint@8f45f776aaf18cebc8d65861cc70c33c60471952) 16 | - (cd /tmp && GO111MODULE=on /tmp/go1.12/bin/go get honnef.co/go/tools/cmd/staticcheck@2019.1) 17 | matrix: 18 | include: 19 | - go: 1.9.x 20 | script: 21 | - go test -v -race ./... 22 | - go: 1.10.x 23 | script: 24 | - go test -v -race ./... 25 | - go: 1.11.x 26 | script: 27 | - go test -v -race ./... 28 | - go: 1.12.x 29 | script: 30 | - ./ztest.sh 31 | - go: master 32 | script: 33 | - go test -v -race ./... 34 | allow_failures: 35 | - go: master 36 | fast_finish: true 37 | -------------------------------------------------------------------------------- /internal/testutil/rand.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package testutil 6 | 7 | import ( 8 | "crypto/aes" 9 | "crypto/cipher" 10 | "encoding/binary" 11 | ) 12 | 13 | // Rand implements a deterministic pseudo-random number generator. 14 | // This differs from the math.Rand in that the exact output will be consistent 15 | // across different versions of Go. 16 | type Rand struct { 17 | cipher.Block 18 | blk [aes.BlockSize]byte 19 | } 20 | 21 | func NewRand(seed int) *Rand { 22 | var key [aes.BlockSize]byte 23 | binary.LittleEndian.PutUint64(key[:], uint64(seed)) 24 | r, _ := aes.NewCipher(key[:]) 25 | return &Rand{Block: r} 26 | } 27 | 28 | func (r *Rand) Int() int { 29 | r.Encrypt(r.blk[:], r.blk[:]) 30 | return int(binary.BigEndian.Uint32(r.blk[:]) >> 1) 31 | } 32 | 33 | func (r *Rand) Intn(n int) int { 34 | return r.Int() % n 35 | } 36 | 37 | func (r *Rand) Bytes(n int) []byte { 38 | b := make([]byte, n) 39 | bb := b 40 | for len(bb) > 0 { 41 | r.Encrypt(r.blk[:], r.blk[:]) 42 | cnt := copy(bb, r.blk[:]) 43 | bb = bb[cnt:] 44 | } 45 | return b 46 | } 47 | 48 | func (r *Rand) Perm(n int) []int { 49 | m := make([]int, n) 50 | for i := 0; i < n; i++ { 51 | j := r.Intn(i + 1) 52 | m[i] = m[j] 53 | m[j] = i 54 | } 55 | return m 56 | } 57 | -------------------------------------------------------------------------------- /internal/tool/bench/lib_uk.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // +build !no_lib_uk 6 | 7 | package main 8 | 9 | import ( 10 | "io" 11 | "io/ioutil" 12 | 13 | "github.com/ulikunitz/xz/lzma" 14 | ) 15 | 16 | func init() { 17 | RegisterEncoder(FormatLZMA2, "uk", 18 | func(w io.Writer, lvl int) io.WriteCloser { 19 | // This level conversion logic emulates the conversion found in 20 | // LZMA2Options.java from https://git.tukaani.org/?p=xz-java.git 21 | if lvl < 0 || lvl > 9 { 22 | panic("invalid level") 23 | } 24 | dict := [...]int{ 25 | 1 << 18, 1 << 20, 1 << 21, 1 << 22, 1 << 22, 26 | 1 << 23, 1 << 23, 1 << 24, 1 << 25, 1 << 26, 27 | }[lvl] 28 | match := lzma.HashTable4 29 | // TODO(dsnet): This currently crashes on zero.bin when using 30 | // BinaryTree on revision 76f94b7c69c6f84be96bcfc2443042b198689565. 31 | /* 32 | if lvl > 4 { 33 | match = lzma.BinaryTree 34 | } 35 | */ 36 | 37 | zw, err := lzma.Writer2Config{DictCap: dict, Matcher: match}.NewWriter2(w) 38 | if err != nil { 39 | panic(err) 40 | } 41 | return zw 42 | }) 43 | RegisterDecoder(FormatLZMA2, "uk", 44 | func(r io.Reader) io.ReadCloser { 45 | zr, err := lzma.NewReader2(r) 46 | if err != nil { 47 | panic(err) 48 | } 49 | return ioutil.NopCloser(zr) 50 | }) 51 | } 52 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright © 2015, Joe Tsai and The Go Authors. All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are met: 5 | 6 | * Redistributions of source code must retain the above copyright notice, this 7 | list of conditions and the following disclaimer. 8 | * Redistributions in binary form must reproduce the above copyright notice, 9 | this list of conditions and the following disclaimer in the documentation and/or 10 | other materials provided with the distribution. 11 | * Neither the copyright holder nor the names of its contributors may be used to 12 | endorse or promote products derived from this software without specific prior 13 | written permission. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY 19 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | -------------------------------------------------------------------------------- /brotli/common_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package brotli 6 | 7 | import ( 8 | "bytes" 9 | "hash/crc32" 10 | "testing" 11 | ) 12 | 13 | func TestTableCRC(t *testing.T) { 14 | // Convert transformLUT to byte array according to Appendix B of the RFC. 15 | var transformBuf bytes.Buffer 16 | for _, t := range transformLUT { 17 | transformBuf.WriteString(t.prefix + "\x00") 18 | transformBuf.WriteByte(byte(t.transform)) 19 | transformBuf.WriteString(t.suffix + "\x00") 20 | } 21 | 22 | vectors := []struct { 23 | crc uint32 24 | buf []byte 25 | }{ 26 | {crc: 0x5136cb04, buf: dictLUT[:]}, 27 | {crc: 0x8e91efb7, buf: contextLUT0[:]}, 28 | {crc: 0xd01a32f4, buf: contextLUT1[:]}, 29 | {crc: 0x0dd7a0d6, buf: contextLUT2[:]}, 30 | {crc: 0x3d965f81, buf: transformBuf.Bytes()}, 31 | } 32 | 33 | for i, v := range vectors { 34 | crc := crc32.ChecksumIEEE(v.buf) 35 | if crc != v.crc { 36 | t.Errorf("test %d, CRC-32 mismatch: got %08x, want %08x", i, crc, v.crc) 37 | } 38 | } 39 | } 40 | 41 | // This package relies on dynamic generation of LUTs to reduce the static 42 | // binary size. This benchmark attempts to measure the startup cost of init. 43 | // This benchmark is not thread-safe; so do not run it in parallel with other 44 | // tests or benchmarks! 45 | func BenchmarkInit(b *testing.B) { 46 | b.ReportAllocs() 47 | for i := 0; i < b.N; i++ { 48 | initLUTs() 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /internal/tool/fuzz/brotli/brotli.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // +build gofuzz 6 | 7 | package bzip2 8 | 9 | import ( 10 | "bytes" 11 | "io/ioutil" 12 | 13 | gbrotli "github.com/dsnet/compress/brotli" 14 | cbrotli "github.com/dsnet/compress/internal/cgo/brotli" 15 | ) 16 | 17 | func Fuzz(data []byte) int { 18 | // Decompress using the Go decoder. 19 | gr, err := gbrotli.NewReader(bytes.NewReader(data), nil) 20 | if err != nil { 21 | panic(err) 22 | } 23 | gb, gerr := ioutil.ReadAll(gr) 24 | if err := gr.Close(); gerr == nil { 25 | gerr = err 26 | } else if gerr != nil && err == nil { 27 | panic("nil on Close after non-nil error") 28 | } 29 | 30 | // Decompress using the C decoder. 31 | cr := cbrotli.NewReader(bytes.NewReader(data)) 32 | cb, cerr := ioutil.ReadAll(cr) 33 | if err := cr.Close(); cerr == nil { 34 | cerr = err 35 | } else if cerr != nil && err == nil { 36 | panic("nil on Close after non-nil error") 37 | } 38 | 39 | switch { 40 | case gerr == nil && cerr == nil: 41 | if !bytes.Equal(gb, cb) { 42 | panic("mismatching bytes") 43 | } 44 | case gerr != nil && cerr == nil: 45 | panic(gerr) 46 | case gerr == nil && cerr != nil: 47 | panic(cerr) 48 | default: 49 | // Ensure that both gb and cb have the same common prefix. 50 | if !bytes.HasPrefix(gb, cb) && !bytes.HasPrefix(cb, gb) { 51 | panic("mismatching leading bytes") 52 | } 53 | } 54 | 55 | if cerr == nil || gerr == nil { 56 | return 1 // Favor valid inputs 57 | } 58 | return 0 59 | } 60 | -------------------------------------------------------------------------------- /bzip2/prefix_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package bzip2 6 | 7 | import ( 8 | "reflect" 9 | "testing" 10 | 11 | "github.com/dsnet/compress/internal/prefix" 12 | ) 13 | 14 | func TestDegenerateCodes(t *testing.T) { 15 | vectors := []struct { 16 | input prefix.PrefixCodes 17 | output prefix.PrefixCodes 18 | }{{ 19 | input: []prefix.PrefixCode{ 20 | {Sym: 0, Len: 1}, 21 | }, 22 | output: []prefix.PrefixCode{ 23 | {Sym: 0, Len: 1, Val: 0}, // 0 24 | {Sym: 258, Len: 1, Val: 1}, // 1 25 | }, 26 | }, { 27 | input: []prefix.PrefixCode{ 28 | {Sym: 0, Len: 1}, 29 | {Sym: 1, Len: 1}, 30 | {Sym: 2, Len: 1}, 31 | }, 32 | output: []prefix.PrefixCode{ 33 | {Sym: 0, Len: 1, Val: 0}, // 0 34 | {Sym: 1, Len: 1, Val: 1}, // 1 35 | }, 36 | }, { 37 | input: []prefix.PrefixCode{ 38 | {Sym: 0, Len: 3}, 39 | {Sym: 1, Len: 4}, 40 | {Sym: 2, Len: 3}, 41 | }, 42 | output: []prefix.PrefixCode{ 43 | {Sym: 0, Len: 3, Val: 0}, // 000 44 | {Sym: 1, Len: 4, Val: 2}, // 0010 45 | {Sym: 2, Len: 3, Val: 4}, // 100 46 | {Sym: 258, Len: 4, Val: 10}, // 1010 47 | {Sym: 259, Len: 3, Val: 6}, // 110 48 | {Sym: 260, Len: 1, Val: 1}, // 1 49 | }, 50 | }, { 51 | input: []prefix.PrefixCode{ 52 | {Sym: 0, Len: 1}, 53 | {Sym: 1, Len: 3}, 54 | {Sym: 2, Len: 4}, 55 | {Sym: 3, Len: 3}, 56 | {Sym: 4, Len: 2}, 57 | }, 58 | output: []prefix.PrefixCode{ 59 | {Sym: 0, Len: 1, Val: 0}, // 0 60 | {Sym: 1, Len: 3, Val: 3}, // 011 61 | {Sym: 3, Len: 3, Val: 7}, // 111 62 | {Sym: 4, Len: 2, Val: 1}, // 01 63 | }, 64 | }} 65 | 66 | for i, v := range vectors { 67 | input := append(prefix.PrefixCodes(nil), v.input...) 68 | output := handleDegenerateCodes(input) 69 | 70 | if !reflect.DeepEqual(output, v.output) { 71 | t.Errorf("test %d, output mismatch:\ngot %v\nwant %v", i, output, v.output) 72 | } 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /internal/prefix/encoder.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package prefix 6 | 7 | import ( 8 | "sort" 9 | 10 | "github.com/dsnet/compress/internal" 11 | ) 12 | 13 | type Encoder struct { 14 | chunks []uint32 // First-level lookup map 15 | chunkMask uint32 // Mask the length of the chunks table 16 | 17 | NumSyms uint32 // Number of symbols 18 | } 19 | 20 | // Init initializes Encoder according to the codes provided. 21 | func (pe *Encoder) Init(codes PrefixCodes) { 22 | // Handle special case trees. 23 | if len(codes) <= 1 { 24 | switch { 25 | case len(codes) == 0: // Empty tree (should error if used later) 26 | *pe = Encoder{chunks: pe.chunks[:0], NumSyms: 0} 27 | case len(codes) == 1 && codes[0].Len == 0: // Single code tree (bit-length of zero) 28 | pe.chunks = append(pe.chunks[:0], codes[0].Val< 0; n >>= 1 { 45 | numChunks <<= 1 46 | } 47 | pe.NumSyms = uint32(len(codes)) 48 | 49 | retry: 50 | // Allocate and reset chunks. 51 | pe.chunks = allocUint32s(pe.chunks, numChunks) 52 | pe.chunkMask = uint32(numChunks - 1) 53 | for i := range pe.chunks { 54 | pe.chunks[i] = 0 // Logic below relies on zero value as uninitialized 55 | } 56 | 57 | // Insert each symbol, checking that there are no conflicts. 58 | for _, c := range codes { 59 | if pe.chunks[c.Sym&pe.chunkMask] > 0 { 60 | // Collision found our "hash" table, so grow and try again. 61 | numChunks <<= 1 62 | goto retry 63 | } 64 | pe.chunks[c.Sym&pe.chunkMask] = c.Val< /dev/null 44 | $PPROF -output=cpu.html -weblist=. $PKG_NAME.test $TMPDIR/cpu.profile 2> /dev/null 45 | $PPROF -output=mem_objects.svg -alloc_objects -web $PKG_NAME.test $TMPDIR/mem.profile 2> /dev/null 46 | $PPROF -output=mem_objects.html -alloc_objects -weblist=. $PKG_NAME.test $TMPDIR/mem.profile 2> /dev/null 47 | $PPROF -output=mem_space.svg -alloc_space -web $PKG_NAME.test $TMPDIR/mem.profile 2> /dev/null 48 | $PPROF -output=mem_space.html -alloc_space -weblist=. $PKG_NAME.test $TMPDIR/mem.profile 2> /dev/null 49 | fi 50 | 51 | rm -rf $DIR/_zprof_/$PKG_NAME 52 | mkdir -p $DIR/_zprof_/$PKG_NAME 53 | mv *.html *.svg $DIR/_zprof_/$PKG_NAME 2> /dev/null 54 | ) 55 | -------------------------------------------------------------------------------- /bzip2/fuzz_on.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // +build gofuzz 6 | 7 | // This file exists to export internal implementation details for fuzz testing. 8 | 9 | package bzip2 10 | 11 | func ForwardBWT(buf []byte) (ptr int) { 12 | var bwt burrowsWheelerTransform 13 | return bwt.Encode(buf) 14 | } 15 | 16 | func ReverseBWT(buf []byte, ptr int) { 17 | var bwt burrowsWheelerTransform 18 | bwt.Decode(buf, ptr) 19 | } 20 | 21 | type fuzzReader struct { 22 | Checksums Checksums 23 | } 24 | 25 | // updateChecksum updates Checksums. 26 | // 27 | // If a valid pos is provided, it appends the (pos, val) pair to the slice. 28 | // Otherwise, it will update the last record with the new value. 29 | func (fr *fuzzReader) updateChecksum(pos int64, val uint32) { 30 | if pos >= 0 { 31 | fr.Checksums = append(fr.Checksums, Checksum{pos, val}) 32 | } else { 33 | fr.Checksums[len(fr.Checksums)-1].Value = val 34 | } 35 | } 36 | 37 | type Checksum struct { 38 | Offset int64 // Bit offset of the checksum 39 | Value uint32 // Checksum value 40 | } 41 | 42 | type Checksums []Checksum 43 | 44 | // Apply overwrites all checksum fields in d with the ones in cs. 45 | func (cs Checksums) Apply(d []byte) []byte { 46 | d = append([]byte(nil), d...) 47 | for _, c := range cs { 48 | setU32(d, c.Offset, c.Value) 49 | } 50 | return d 51 | } 52 | 53 | func setU32(d []byte, pos int64, val uint32) { 54 | for i := uint(0); i < 32; i++ { 55 | bpos := uint64(pos) + uint64(i) 56 | d[bpos/8] &= ^byte(1 << (7 - bpos%8)) 57 | d[bpos/8] |= byte(val>>(31-i)) << (7 - bpos%8) 58 | } 59 | } 60 | 61 | // Verify checks that all checksum fields in d matches those in cs. 62 | func (cs Checksums) Verify(d []byte) bool { 63 | for _, c := range cs { 64 | if getU32(d, c.Offset) != c.Value { 65 | return false 66 | } 67 | } 68 | return true 69 | } 70 | 71 | func getU32(d []byte, pos int64) (val uint32) { 72 | for i := uint(0); i < 32; i++ { 73 | bpos := uint64(pos) + uint64(i) 74 | val |= (uint32(d[bpos/8] >> (7 - bpos%8))) << (31 - i) 75 | } 76 | return val 77 | } 78 | -------------------------------------------------------------------------------- /internal/tool/fuzz/xflate_meta/xflate_meta.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // +build gofuzz 6 | 7 | package xflate_meta 8 | 9 | import ( 10 | "bytes" 11 | "compress/flate" 12 | "io/ioutil" 13 | 14 | "github.com/dsnet/compress/xflate" 15 | ) 16 | 17 | func Fuzz(data []byte) int { 18 | mdata, ok := decodeMeta(data) 19 | if ok { 20 | testRoundTrip(mdata) 21 | return 1 22 | } else { 23 | testRoundTrip(data) 24 | return 0 25 | } 26 | } 27 | 28 | // decodeMeta attempts to decode the metadata. 29 | // If successful, it verifies that meta-encoded blocks are DEFLATE blocks. 30 | func decodeMeta(data []byte) ([]byte, bool) { 31 | r := bytes.NewReader(data) 32 | mr := xflate.NewMetaReader(r) 33 | b, err := ioutil.ReadAll(mr) 34 | if err != nil { 35 | return nil, false 36 | } 37 | pos := int(r.Size()) - r.Len() 38 | decompressMeta(data[:pos]) 39 | return b, true 40 | } 41 | 42 | // decompressMeta attempts to decompress the meta-encoded blocks. 43 | // It expects decompression to succeed and to output nothing. 44 | func decompressMeta(data []byte) { 45 | // Make a copy and append DEFLATE terminator block. 46 | data = append([]byte(nil), data...) 47 | data = append(data, []byte{0x01, 0x00, 0x00, 0xff, 0xff}...) 48 | 49 | r := bytes.NewReader(data) 50 | for r.Len() > 0 { 51 | zr := flate.NewReader(r) 52 | b, err := ioutil.ReadAll(zr) 53 | if err != nil { 54 | panic(err) 55 | } 56 | if len(b) > 0 { 57 | panic("non-zero meta-encoded block") 58 | } 59 | if err := zr.Close(); err != nil { 60 | panic(err) 61 | } 62 | } 63 | } 64 | 65 | // testRoundTrip encodes the input data and then decodes it, checking that the 66 | // metadata was losslessly preserved. 67 | func testRoundTrip(want []byte) { 68 | bb := new(bytes.Buffer) 69 | mw := xflate.NewMetaWriter(bb) 70 | n, err := mw.Write(want) 71 | if n != len(want) || err != nil { 72 | panic(err) 73 | } 74 | if err := mw.Close(); err != nil { 75 | panic(err) 76 | } 77 | 78 | got, ok := decodeMeta(bb.Bytes()) 79 | if !bytes.Equal(got, want) || !ok { 80 | panic("mismatching bytes") 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /ztest.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Copyright 2017, Joe Tsai. All rights reserved. 4 | # Use of this source code is governed by a BSD-style 5 | # license that can be found in the LICENSE.md file. 6 | 7 | cd $(go list -f '{{ .Dir }}' github.com/dsnet/compress) 8 | 9 | BOLD="\x1b[1mRunning: " 10 | PASS="\x1b[32mPASS" 11 | FAIL="\x1b[31mFAIL" 12 | RESET="\x1b[0m" 13 | 14 | echo -e "${BOLD}fmt${RESET}" 15 | RET_FMT=$(find . -name "*.go" | egrep -v "/(_.*_|\..*|testdata)/" | xargs gofmt -d) 16 | if [[ ! -z "$RET_FMT" ]]; then echo "$RET_FMT"; echo; fi 17 | 18 | echo -e "${BOLD}test${RESET}" 19 | RET_TEST=$(go test -race ./... | egrep -v "^(ok|[?])\s+") 20 | if [[ ! -z "$RET_TEST" ]]; then echo "$RET_TEST"; echo; fi 21 | 22 | echo -e "${BOLD}staticcheck${RESET}" 23 | RET_SCHK=$(staticcheck \ 24 | -ignore " 25 | github.com/dsnet/compress/brotli/*.go:SA4016 26 | github.com/dsnet/compress/brotli/*.go:S1023 27 | github.com/dsnet/compress/brotli/*.go:U1000 28 | github.com/dsnet/compress/bzip2/*.go:S1023 29 | github.com/dsnet/compress/flate/*.go:U1000 30 | github.com/dsnet/compress/internal/cgo/lzma/*.go:SA4000 31 | github.com/dsnet/compress/internal/prefix/*.go:S1004 32 | github.com/dsnet/compress/internal/prefix/*.go:S1023 33 | github.com/dsnet/compress/internal/prefix/*.go:SA4016 34 | github.com/dsnet/compress/internal/tool/bench/*.go:S1007 35 | github.com/dsnet/compress/xflate/internal/meta/*.go:S1023 36 | " ./... 2>&1) 37 | if [[ ! -z "$RET_SCHK" ]]; then echo "$RET_SCHK"; echo; fi 38 | 39 | echo -e "${BOLD}lint${RESET}" 40 | RET_LINT=$(golint ./... 2>&1 | 41 | egrep -v "^vendor/" | 42 | egrep -v "should have comment(.*)or be unexported" | 43 | egrep -v "^(.*)type name will be used as(.*)by other packages" | 44 | egrep -v "^brotli/transform.go:(.*)replace i [+]= 1 with i[+]{2}" | 45 | egrep -v "^internal/prefix/prefix.go:(.*)replace symBits(.*) [-]= 1 with symBits(.*)[-]{2}" | 46 | egrep -v "^xflate/common.go:(.*)NoCompression should be of the form" | 47 | egrep -v "^exit status") 48 | if [[ ! -z "$RET_LINT" ]]; then echo "$RET_LINT"; echo; fi 49 | 50 | if [[ ! -z "$RET_FMT" ]] || [ ! -z "$RET_TEST" ] || [[ ! -z "$RET_SCHK" ]] || [[ ! -z "$RET_LINT" ]]; then 51 | echo -e "${FAIL}${RESET}"; exit 1 52 | else 53 | echo -e "${PASS}${RESET}"; exit 0 54 | fi 55 | -------------------------------------------------------------------------------- /testdata/huffman.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // +build ignore 6 | 7 | //go:generate go run huffman.go 8 | 9 | // Generates huffman.txt. This test file heavily favors prefix based encodings 10 | // since some symbols are heavily favored over others. This leads to compression 11 | // savings that can be gained by assigning shorter prefix codes to those more 12 | // frequent symbols. The number of symbols used is large enough such that it 13 | // avoids LZ77 dictionary matches. 14 | package main 15 | 16 | import ( 17 | "io/ioutil" 18 | "math/rand" 19 | "unicode/utf8" 20 | ) 21 | 22 | const ( 23 | name = "huffman.txt" 24 | size = 1 << 18 25 | ) 26 | 27 | const ( 28 | alpha1 = "abcdefghijklmnopqrstuvwxyz" 29 | alpha2 = alpha1 + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 30 | alpha3 = alpha2 + "0123456789" + "+/" 31 | ) 32 | 33 | func main() { 34 | var b []byte 35 | r := rand.New(rand.NewSource(0)) 36 | 37 | for len(b) < size { 38 | n := 16 + r.Intn(64) // Length of substring 39 | p := r.Float32() 40 | switch { 41 | case p <= 0.75: 42 | // Write strings of base64 encoded values. 43 | for i := 0; i < n; i++ { 44 | p := r.Float32() 45 | switch { 46 | case p <= 0.1: 47 | // Write any lowercase letter. 48 | b = append(b, alpha1[r.Intn(len(alpha1))]) 49 | case p <= 0.7: 50 | // Write any lowercase or uppercase letter. 51 | b = append(b, alpha2[r.Intn(len(alpha2))]) 52 | case p <= 1.0: 53 | // Write any character from the base64 alphabet. 54 | b = append(b, alpha3[r.Intn(len(alpha3))]) 55 | } 56 | } 57 | case p <= 1.00: 58 | // Write strings of utf8 encoded values. 59 | for i := 0; i < n; i++ { 60 | p := r.Float32() 61 | switch { 62 | case p <= 0.65: 63 | // Write a 2-byte long utf8 code point. 64 | var buf [4]byte 65 | cnt := utf8.EncodeRune(buf[:], rune(0x80+r.Intn(0x780))) 66 | b = append(b, buf[:cnt]...) 67 | case p <= 1.00: 68 | // Write a 3-byte long utf8 code point. 69 | var buf [4]byte 70 | cnt := utf8.EncodeRune(buf[:], rune(0x800+r.Intn(0xF800))) 71 | b = append(b, buf[:cnt]...) 72 | } 73 | } 74 | } 75 | } 76 | 77 | if err := ioutil.WriteFile(name, b[:size], 0664); err != nil { 78 | panic(err) 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /brotli/transform_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package brotli 6 | 7 | import "testing" 8 | 9 | func TestTransform(t *testing.T) { 10 | vectors := []struct { 11 | id int 12 | input string 13 | output string 14 | }{ 15 | {id: 0, input: "Hello, world!", output: "Hello, world!"}, 16 | {id: 23, input: "groups of", output: "groups"}, 17 | {id: 42, input: "s for the ", output: "s for "}, 18 | {id: 48, input: "presentation", output: "prese"}, 19 | {id: 56, input: "maintenance", output: "maint"}, 20 | {id: 23, input: "Alexandria", output: "Alexand"}, 21 | {id: 23, input: "archives", output: "archi"}, 22 | {id: 49, input: "fighty", output: "fighting "}, 23 | {id: 49, input: "12", output: "1ing "}, 24 | {id: 49, input: "1", output: "ing "}, 25 | {id: 49, input: "", output: "ing "}, 26 | {id: 64, input: "123456789a", output: "1"}, 27 | {id: 64, input: "123456789", output: ""}, 28 | {id: 64, input: "1", output: ""}, 29 | {id: 64, input: "", output: ""}, 30 | {id: 3, input: "afloat", output: "float"}, 31 | {id: 3, input: "12", output: "2"}, 32 | {id: 3, input: "1", output: ""}, 33 | {id: 3, input: "", output: ""}, 34 | {id: 54, input: "123456789a", output: "a"}, 35 | {id: 54, input: "123456789", output: ""}, 36 | {id: 54, input: "1", output: ""}, 37 | {id: 54, input: "", output: ""}, 38 | {id: 73, input: "", output: " the of the "}, 39 | {id: 73, input: "dichlorodifluoromethanes", output: " the dichlorodifluoromethanes of the "}, 40 | {id: 15, input: "", output: " "}, 41 | {id: 15, input: "meow", output: " Meow "}, 42 | {id: 15, input: "-scale", output: " -scale "}, 43 | {id: 15, input: "почти", output: " Почти "}, 44 | {id: 15, input: "互联网", output: " 亗联网 "}, 45 | {id: 119, input: "", output: " ='"}, 46 | {id: 119, input: "meow", output: " MEOW='"}, 47 | {id: 119, input: "-scale", output: " -SCALE='"}, 48 | {id: 119, input: "почти", output: " ПОѧѢИ='"}, 49 | {id: 119, input: "互联网", output: " 亗聑罔='"}, 50 | } 51 | 52 | var buf [maxWordSize]byte 53 | for i, v := range vectors { 54 | cnt := transformWord(buf[:], []byte(v.input), v.id) 55 | output := string(buf[:cnt]) 56 | 57 | if output != v.output { 58 | t.Errorf("test %d, output mismatch: got %q, want %q", i, output, v.output) 59 | } 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /xflate/xflate_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package xflate 6 | 7 | import ( 8 | "bytes" 9 | "io" 10 | "testing" 11 | 12 | "github.com/dsnet/compress/internal/testutil" 13 | ) 14 | 15 | var ( 16 | testBinary = testutil.MustLoadFile("../testdata/binary.bin") 17 | testDigits = testutil.MustLoadFile("../testdata/digits.txt") 18 | testHuffman = testutil.MustLoadFile("../testdata/huffman.txt") 19 | testRandom = testutil.MustLoadFile("../testdata/random.bin") 20 | testRepeats = testutil.MustLoadFile("../testdata/repeats.bin") 21 | testTwain = testutil.MustLoadFile("../testdata/twain.txt") 22 | testZeros = testutil.MustLoadFile("../testdata/zeros.bin") 23 | ) 24 | 25 | func TestRoundTrip(t *testing.T) { 26 | vectors := []struct { 27 | name string 28 | input []byte 29 | }{ 30 | {"Nil", nil}, 31 | {"Binary", testBinary}, 32 | {"Digits", testDigits}, 33 | {"Huffman", testHuffman}, 34 | {"Random", testRandom}, 35 | {"Repeats", testRepeats}, 36 | {"Twain", testTwain}, 37 | {"Zeros", testZeros}, 38 | } 39 | 40 | for _, v := range vectors { 41 | v := v 42 | t.Run(v.name, func(t *testing.T) { 43 | t.Parallel() 44 | 45 | var wb, rb bytes.Buffer 46 | 47 | xw, err := NewWriter(&wb, &WriterConfig{ChunkSize: 1 << 10}) 48 | if err != nil { 49 | t.Errorf("unexpected error: NewWriter() = %v", err) 50 | } 51 | cnt, err := io.Copy(xw, bytes.NewReader(v.input)) 52 | if err != nil { 53 | t.Errorf("unexpected error: Write() = %v", err) 54 | } 55 | if cnt != int64(len(v.input)) { 56 | t.Errorf("write count mismatch: got %d, want %d", cnt, len(v.input)) 57 | } 58 | if err := xw.Close(); err != nil { 59 | t.Errorf("unexpected error: Close() = %v", err) 60 | } 61 | 62 | xr, err := NewReader(bytes.NewReader(wb.Bytes()), nil) 63 | if err != nil { 64 | t.Errorf("unexpected error: NewReader() = %v", err) 65 | } 66 | cnt, err = io.Copy(&rb, xr) 67 | if err != nil { 68 | t.Errorf("unexpected error: Read() = %v", err) 69 | } 70 | if cnt != int64(len(v.input)) { 71 | t.Errorf("read count mismatch: got %d, want %d", cnt, len(v.input)) 72 | } 73 | if err := xr.Close(); err != nil { 74 | t.Errorf("unexpected error: Close() = %v", err) 75 | } 76 | 77 | output := rb.Bytes() 78 | if got, want, ok := testutil.BytesCompare(output, v.input); !ok { 79 | t.Errorf("output data mismatch:\ngot %s\nwant %s", got, want) 80 | } 81 | }) 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /api.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // Package compress is a collection of compression libraries. 6 | package compress 7 | 8 | import ( 9 | "bufio" 10 | "io" 11 | 12 | "github.com/dsnet/compress/internal/errors" 13 | ) 14 | 15 | // The Error interface identifies all compression related errors. 16 | type Error interface { 17 | error 18 | CompressError() 19 | 20 | // IsDeprecated reports the use of a deprecated and unsupported feature. 21 | IsDeprecated() bool 22 | 23 | // IsCorrupted reports whether the input stream was corrupted. 24 | IsCorrupted() bool 25 | } 26 | 27 | var _ Error = errors.Error{} 28 | 29 | // ByteReader is an interface accepted by all decompression Readers. 30 | // It guarantees that the decompressor never reads more data than is necessary 31 | // from the underlying io.Reader. 32 | type ByteReader interface { 33 | io.Reader 34 | io.ByteReader 35 | } 36 | 37 | var _ ByteReader = (*bufio.Reader)(nil) 38 | 39 | // BufferedReader is an interface accepted by all decompression Readers. 40 | // It guarantees that the decompressor never reads more data than is necessary 41 | // from the underlying io.Reader. Since BufferedReader allows a decompressor 42 | // to peek at bytes further along in the stream without advancing the read 43 | // pointer, decompression can experience a significant performance gain when 44 | // provided a reader that satisfies this interface. Thus, a decompressor will 45 | // prefer this interface over ByteReader for performance reasons. 46 | // 47 | // The bufio.Reader satisfies this interface. 48 | type BufferedReader interface { 49 | io.Reader 50 | 51 | // Buffered returns the number of bytes currently buffered. 52 | // 53 | // This value becomes invalid following the next Read/Discard operation. 54 | Buffered() int 55 | 56 | // Peek returns the next n bytes without advancing the reader. 57 | // 58 | // If Peek returns fewer than n bytes, it also returns an error explaining 59 | // why the peek is short. Peek must support peeking of at least 8 bytes. 60 | // If 0 <= n <= Buffered(), Peek is guaranteed to succeed without reading 61 | // from the underlying io.Reader. 62 | // 63 | // This result becomes invalid following the next Read/Discard operation. 64 | Peek(n int) ([]byte, error) 65 | 66 | // Discard skips the next n bytes, returning the number of bytes discarded. 67 | // 68 | // If Discard skips fewer than n bytes, it also returns an error. 69 | // If 0 <= n <= Buffered(), Discard is guaranteed to succeed without reading 70 | // from the underlying io.Reader. 71 | Discard(n int) (int, error) 72 | } 73 | 74 | var _ BufferedReader = (*bufio.Reader)(nil) 75 | -------------------------------------------------------------------------------- /internal/prefix/range.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package prefix 6 | 7 | type RangeCode struct { 8 | Base uint32 // Starting base offset of the range 9 | Len uint32 // Bit-length of a subsequent integer to add to base offset 10 | } 11 | type RangeCodes []RangeCode 12 | 13 | type RangeEncoder struct { 14 | rcs RangeCodes 15 | lut [1024]uint32 16 | minBase uint 17 | } 18 | 19 | // End reports the non-inclusive ending range. 20 | func (rc RangeCode) End() uint32 { return rc.Base + (1 << rc.Len) } 21 | 22 | // MakeRangeCodes creates a RangeCodes, where each region is assumed to be 23 | // contiguously stacked, without any gaps, with bit-lengths taken from bits. 24 | func MakeRangeCodes(minBase uint, bits []uint) (rc RangeCodes) { 25 | for _, nb := range bits { 26 | rc = append(rc, RangeCode{Base: uint32(minBase), Len: uint32(nb)}) 27 | minBase += 1 << nb 28 | } 29 | return rc 30 | } 31 | 32 | // Base reports the inclusive starting range for all ranges. 33 | func (rcs RangeCodes) Base() uint32 { return rcs[0].Base } 34 | 35 | // End reports the non-inclusive ending range for all ranges. 36 | func (rcs RangeCodes) End() uint32 { return rcs[len(rcs)-1].End() } 37 | 38 | // checkValid reports whether the RangeCodes is valid. In order to be valid, 39 | // the following must hold true: 40 | // rcs[i-1].Base <= rcs[i].Base 41 | // rcs[i-1].End <= rcs[i].End 42 | // rcs[i-1].End >= rcs[i].Base 43 | // 44 | // Practically speaking, each range must be increasing and must not have any 45 | // gaps in between. It is okay for ranges to overlap. 46 | func (rcs RangeCodes) checkValid() bool { 47 | if len(rcs) == 0 { 48 | return false 49 | } 50 | pre := rcs[0] 51 | for _, cur := range rcs[1:] { 52 | preBase, preEnd := pre.Base, pre.End() 53 | curBase, curEnd := cur.Base, cur.End() 54 | if preBase > curBase || preEnd > curEnd || preEnd < curBase { 55 | return false 56 | } 57 | pre = cur 58 | } 59 | return true 60 | } 61 | 62 | func (re *RangeEncoder) Init(rcs RangeCodes) { 63 | if !rcs.checkValid() { 64 | panic("invalid range codes") 65 | } 66 | *re = RangeEncoder{rcs: rcs, minBase: uint(rcs.Base())} 67 | for sym, rc := range rcs { 68 | base := int(rc.Base) - int(re.minBase) 69 | end := int(rc.End()) - int(re.minBase) 70 | if base >= len(re.lut) { 71 | break 72 | } 73 | if end > len(re.lut) { 74 | end = len(re.lut) 75 | } 76 | for i := base; i < end; i++ { 77 | re.lut[i] = uint32(sym) 78 | } 79 | } 80 | } 81 | 82 | func (re *RangeEncoder) Encode(offset uint) (sym uint) { 83 | if idx := int(offset - re.minBase); idx < len(re.lut) { 84 | return uint(re.lut[idx]) 85 | } 86 | sym = uint(re.lut[len(re.lut)-1]) 87 | retry: 88 | if int(sym) >= len(re.rcs) || re.rcs[sym].Base > uint32(offset) { 89 | return sym - 1 90 | } 91 | sym++ 92 | goto retry // Avoid for-loop so that this function can be inlined 93 | } 94 | -------------------------------------------------------------------------------- /bzip2/rle1.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package bzip2 6 | 7 | import "github.com/dsnet/compress/internal/errors" 8 | 9 | // rleDone is a special "error" to indicate that the RLE stage is done. 10 | var rleDone = errorf(errors.Unknown, "RLE1 stage is completed") 11 | 12 | // runLengthEncoding implements the first RLE stage of bzip2. Every sequence 13 | // of 4..255 duplicated bytes is replaced by only the first 4 bytes, and a 14 | // single byte representing the repeat length. Similar to the C bzip2 15 | // implementation, the encoder will always terminate repeat sequences with a 16 | // count (even if it is the end of the buffer), and it will also never produce 17 | // run lengths of 256..259. The decoder can handle the latter case. 18 | // 19 | // For example, if the input was: 20 | // input: "AAAAAAABBBBCCCD" 21 | // 22 | // Then the output will be: 23 | // output: "AAAA\x03BBBB\x00CCCD" 24 | type runLengthEncoding struct { 25 | buf []byte 26 | idx int 27 | lastVal byte 28 | lastCnt int 29 | } 30 | 31 | func (rle *runLengthEncoding) Init(buf []byte) { 32 | *rle = runLengthEncoding{buf: buf} 33 | } 34 | 35 | func (rle *runLengthEncoding) Write(buf []byte) (int, error) { 36 | for i, b := range buf { 37 | if rle.lastVal != b { 38 | rle.lastCnt = 0 39 | } 40 | rle.lastCnt++ 41 | switch { 42 | case rle.lastCnt < 4: 43 | if rle.idx >= len(rle.buf) { 44 | return i, rleDone 45 | } 46 | rle.buf[rle.idx] = b 47 | rle.idx++ 48 | case rle.lastCnt == 4: 49 | if rle.idx+1 >= len(rle.buf) { 50 | return i, rleDone 51 | } 52 | rle.buf[rle.idx] = b 53 | rle.idx++ 54 | rle.buf[rle.idx] = 0 55 | rle.idx++ 56 | case rle.lastCnt < 256: 57 | rle.buf[rle.idx-1]++ 58 | default: 59 | if rle.idx >= len(rle.buf) { 60 | return i, rleDone 61 | } 62 | rle.lastCnt = 1 63 | rle.buf[rle.idx] = b 64 | rle.idx++ 65 | } 66 | rle.lastVal = b 67 | } 68 | return len(buf), nil 69 | } 70 | 71 | func (rle *runLengthEncoding) Read(buf []byte) (int, error) { 72 | for i := range buf { 73 | switch { 74 | case rle.lastCnt == -4: 75 | if rle.idx >= len(rle.buf) { 76 | return i, errorf(errors.Corrupted, "missing terminating run-length repeater") 77 | } 78 | rle.lastCnt = int(rle.buf[rle.idx]) 79 | rle.idx++ 80 | if rle.lastCnt > 0 { 81 | break // Break the switch 82 | } 83 | fallthrough // Count was zero, continue the work 84 | case rle.lastCnt <= 0: 85 | if rle.idx >= len(rle.buf) { 86 | return i, rleDone 87 | } 88 | b := rle.buf[rle.idx] 89 | rle.idx++ 90 | if b != rle.lastVal { 91 | rle.lastCnt = 0 92 | rle.lastVal = b 93 | } 94 | } 95 | buf[i] = rle.lastVal 96 | rle.lastCnt-- 97 | } 98 | return len(buf), nil 99 | } 100 | 101 | func (rle *runLengthEncoding) Bytes() []byte { return rle.buf[:rle.idx] } 102 | -------------------------------------------------------------------------------- /internal/testutil/util_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package testutil 6 | 7 | import "testing" 8 | 9 | func TestCompare(t *testing.T) { 10 | vectors := []struct { 11 | inA, inB string 12 | outA, outB string 13 | ok bool 14 | }{ 15 | {"", "", "", "", true}, 16 | {"", "foo", `""`, `"foo"`, false}, 17 | {"bar", "foo", `"bar"`, `"foo"`, false}, 18 | {"foo", "foo", "", "", true}, 19 | { 20 | "keyboardsmashfoo", "keyboardsmashbar", 21 | `"keyboardsmashfoo"`, `"keyboardsmashbar"`, 22 | false, 23 | }, 24 | { 25 | "keyboardsmashfoofjaewu893p4u4q893ru890q2urqr2r34r2fw42er32/q2890r3u0qv", 26 | "keyboardsmashfrioj8394ru4389", 27 | `"keyboardsmashfoofjaewu893p4u4q893ru890q2urqr2r34r2fw42er32/q2890"...(6 bytes)`, 28 | `"keyboardsmashfrioj8394ru4389"`, 29 | false, 30 | }, 31 | { 32 | "keyboardsmashkeyboardsmashkeyboardsmashkeyboardsmashkeyboardsmashfoofjaewu893p4u4q893ru890q2urqr2r34r2fw42er3fefewaf", 33 | "keyboardsmashkeyboardsmashkeyboardsmashkeyboardsmashkeyboardsmashfrioj8394ru4389", 34 | `(16 bytes)..."boardsmashkeyboardsmashkeyboardsmashkeyboardsmashfoofjaewu893p4u"...(36 bytes)`, 35 | `(16 bytes)..."boardsmashkeyboardsmashkeyboardsmashkeyboardsmashfrioj8394ru4389"`, 36 | false, 37 | }, 38 | { 39 | "keyboardsmashkeyboardsmashkeyboardsmashkeyboardsmashkeyboardsmashfoofjaewu893p4u4q893ru890q2urqr2r34r2fw42er3fefewaf", 40 | "keyboardsmashkeyboardsmashkeyboardsmashkeyboardsmashkeyboardsmashfriojfewafweafwaefweafewafwaefwaefwaefewafwae8394ru4389", 41 | `(34 bytes)..."smashkeyboardsmashkeyboardsmashfoofjaewu893p4u4q893ru890q2urqr2r"...(18 bytes)`, 42 | `(34 bytes)..."smashkeyboardsmashkeyboardsmashfriojfewafweafwaefweafewafwaefwae"...(22 bytes)`, 43 | false, 44 | }, 45 | { 46 | "keyboardsmashkeyboardsmashkeyboardsmashkeyboardsmashkeyboardsmashfoofjaewu893p4u4q893ru890q2urqr2r34r2fw42er3fefewaf", 47 | "\xfaO\xed\x93QK\xb1\xa9O!\xc0\xac\x8dD\xd8\xce\xc01\x1aa\x9c\x108\xbb", 48 | `6b6579626f617264736d6173686b6579626f617264736d6173686b6579626f61...(84 bytes)`, 49 | `fa4fed93514bb1a94f21c0ac8d44d8cec0311a619c1038bb`, 50 | false, 51 | }, 52 | { 53 | "keyboardsmashkeyboardsmashkeyboardsmashkeyboardsmashkeyboardsmashfoofjaewu893p4u4q893ru890q2urqr2r34r2fw42er3fefewaf", 54 | "keyboardsmashkeyboardsmashkeyboard\xfaO\xed\x93QK\xb1\xa9O!\xc0\xac\x8dD\xd8\xce\xc01\x1aa\x9c\x108\xbb", 55 | `(18 bytes)...617264736d6173686b6579626f617264736d6173686b6579626f617264736d61...(66 bytes)`, 56 | `(18 bytes)...617264736d6173686b6579626f617264fa4fed93514bb1a94f21c0ac8d44d8ce...(8 bytes)`, 57 | false, 58 | }, 59 | } 60 | 61 | for i, v := range vectors { 62 | sa, sb, ok := BytesCompare([]byte(v.inA), []byte(v.inB)) 63 | if sa != v.outA { 64 | t.Errorf("test %d, output A mismatch:\ngot %s\nwant %s", i, sa, v.outA) 65 | } 66 | if sb != v.outB { 67 | t.Errorf("test %d, output B mismatch:\ngot %s\nwant %s", i, sb, v.outB) 68 | } 69 | if ok != v.ok { 70 | t.Errorf("test %d, output equality mismatch: got %t, want %t", i, ok, v.ok) 71 | } 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /brotli/common.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // Package brotli implements the Brotli compressed data format, 6 | // described in RFC 7932. 7 | package brotli 8 | 9 | import ( 10 | "fmt" 11 | 12 | "github.com/dsnet/compress/internal/errors" 13 | ) 14 | 15 | func errorf(c int, f string, a ...interface{}) error { 16 | return errors.Error{Code: c, Pkg: "brotli", Msg: fmt.Sprintf(f, a...)} 17 | } 18 | 19 | // errWrap converts a lower-level errors.Error to be one from this package. 20 | // The replaceCode passed in will be used to replace the code for any errors 21 | // with the errors.Invalid code. 22 | // 23 | // For the Reader, set this to errors.Corrupted. 24 | // For the Writer, set this to errors.Internal. 25 | func errWrap(err error, replaceCode int) error { 26 | if cerr, ok := err.(errors.Error); ok { 27 | if errors.IsInvalid(cerr) { 28 | cerr.Code = replaceCode 29 | } 30 | err = errorf(cerr.Code, "%s", cerr.Msg) 31 | } 32 | return err 33 | } 34 | 35 | var ( 36 | errClosed = errorf(errors.Closed, "") 37 | errCorrupted = errorf(errors.Corrupted, "") 38 | errInvalid = errorf(errors.Invalid, "") 39 | errUnaligned = errorf(errors.Invalid, "non-aligned bit buffer") 40 | ) 41 | 42 | var ( 43 | reverseLUT [256]uint8 44 | ) 45 | 46 | func init() { 47 | initLUTs() 48 | } 49 | 50 | func initLUTs() { 51 | initCommonLUTs() 52 | initPrefixLUTs() 53 | initContextLUTs() 54 | initDictLUTs() 55 | } 56 | 57 | func initCommonLUTs() { 58 | for i := range reverseLUT { 59 | b := uint8(i) 60 | b = (b&0xaa)>>1 | (b&0x55)<<1 61 | b = (b&0xcc)>>2 | (b&0x33)<<2 62 | b = (b&0xf0)>>4 | (b&0x0f)<<4 63 | reverseLUT[i] = b 64 | } 65 | } 66 | 67 | // neededBits computes the minimum number of bits needed to encode n elements. 68 | func neededBits(n uint32) (nb uint) { 69 | for n--; n > 0; n >>= 1 { 70 | nb++ 71 | } 72 | return 73 | } 74 | 75 | // reverseUint32 reverses all bits of v. 76 | func reverseUint32(v uint32) (x uint32) { 77 | x |= uint32(reverseLUT[byte(v>>0)]) << 24 78 | x |= uint32(reverseLUT[byte(v>>8)]) << 16 79 | x |= uint32(reverseLUT[byte(v>>16)]) << 8 80 | x |= uint32(reverseLUT[byte(v>>24)]) << 0 81 | return x 82 | } 83 | 84 | // reverseBits reverses the lower n bits of v. 85 | func reverseBits(v uint32, n uint) uint32 { 86 | return reverseUint32(v << (32 - n)) 87 | } 88 | 89 | func allocUint8s(s []uint8, n int) []uint8 { 90 | if cap(s) >= n { 91 | return s[:n] 92 | } 93 | return make([]uint8, n, n*3/2) 94 | } 95 | 96 | func allocUint32s(s []uint32, n int) []uint32 { 97 | if cap(s) >= n { 98 | return s[:n] 99 | } 100 | return make([]uint32, n, n*3/2) 101 | } 102 | 103 | func extendSliceUints32s(s [][]uint32, n int) [][]uint32 { 104 | if cap(s) >= n { 105 | return s[:n] 106 | } 107 | ss := make([][]uint32, n, n*3/2) 108 | copy(ss, s[:cap(s)]) 109 | return ss 110 | } 111 | 112 | func extendDecoders(s []prefixDecoder, n int) []prefixDecoder { 113 | if cap(s) >= n { 114 | return s[:n] 115 | } 116 | ss := make([]prefixDecoder, n, n*3/2) 117 | copy(ss, s[:cap(s)]) 118 | return ss 119 | } 120 | -------------------------------------------------------------------------------- /bzip2/common_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package bzip2 6 | 7 | import ( 8 | "strconv" 9 | "testing" 10 | 11 | "github.com/dsnet/compress/internal/testutil" 12 | ) 13 | 14 | func TestCRC(t *testing.T) { 15 | vectors := []struct { 16 | crc uint32 17 | str string 18 | }{ 19 | {0x00000000, ""}, 20 | {0x19939b6b, "a"}, 21 | {0xe993fdcd, "ab"}, 22 | {0x648cbb73, "abc"}, 23 | {0x3d4c334b, "abcd"}, 24 | {0xa35b4df4, "abcde"}, 25 | {0xa0f54fb9, "abcdef"}, 26 | {0x077539d7, "abcdefg"}, 27 | {0x5024ec61, "abcdefgh"}, 28 | {0x63e0bcd4, "abcdefghi"}, 29 | {0x73826444, "abcdefghij"}, 30 | {0xbf786ee7, "Discard medicine more than two years old."}, 31 | {0x106324f0, "He who has a shady past knows that nice guys finish last."}, 32 | {0x0ef9b7d7, "I wouldn't marry him with a ten foot pole."}, 33 | {0x2f42217b, "Free! Free!/A trip/to Mars/for 900/empty jars/Burma Shave"}, 34 | {0xb64c598c, "The days of the digital watch are numbered. -Tom Stoppard"}, 35 | {0xf4e5a7c3, "Nepal premier won't resign."}, 36 | {0x2b43233e, "For every action there is an equal and opposite government program."}, 37 | {0x7b83ef6f, "His money is twice tainted: 'taint yours and 'taint mine."}, 38 | {0x503c2258, "There is no reason for any individual to have a computer in their home. -Ken Olsen, 1977"}, 39 | {0x4dc300fa, "It's a tiny change to the code and not completely disgusting. - Bob Manchek"}, 40 | {0x97fa4243, "size: a.out: bad magic"}, 41 | {0xc9549847, "The major problem is with sendmail. -Mark Horton"}, 42 | {0xeaa630ab, "Give me a rock, paper and scissors and I will move the world. CCFestoon"}, 43 | {0xcd8bb88c, "If the enemy is within range, then so are you."}, 44 | {0x95cc0d9d, "It's well we cannot hear the screams/That we create in others' dreams."}, 45 | {0x14c42897, "You remind me of a TV show, but that's all right: I watch it anyway."}, 46 | {0x0de498f1, "C is as portable as Stonehedge!!"}, 47 | {0x79e7cf74, "Even if I could be Shakespeare, I think I should still choose to be Faraday. - A. Huxley"}, 48 | {0x33e2329e, "The fugacity of a constituent in a mixture of gases at a given temperature is proportional to its mole fraction. Lewis-Randall Rule"}, 49 | {0xa4302570, "How can you write a big system without C++? -Paul Glick"}, 50 | } 51 | 52 | var crc crc 53 | for i, v := range vectors { 54 | splits := []int{ 55 | 0 * (len(v.str) / 1), 56 | 1 * (len(v.str) / 4), 57 | 2 * (len(v.str) / 4), 58 | 3 * (len(v.str) / 4), 59 | 1 * (len(v.str) / 1), 60 | } 61 | for _, j := range splits { 62 | str1, str2 := []byte(v.str[:j]), []byte(v.str[j:]) 63 | crc.val = 0 64 | crc.update(str1) 65 | if crc.update(str2); crc.val != v.crc { 66 | t.Errorf("test %d, crc.update(crc1, str2): got 0x%08x, want 0x%08x", i, crc.val, v.crc) 67 | } 68 | } 69 | } 70 | } 71 | 72 | func BenchmarkCRC(b *testing.B) { 73 | var c crc 74 | d := testutil.ResizeData([]byte("the quick brown fox jumped over the lazy dog"), 1<<16) 75 | for i := 1; i <= len(d); i <<= 4 { 76 | b.Run(strconv.Itoa(i), func(b *testing.B) { 77 | b.SetBytes(int64(i)) 78 | for j := 0; j < b.N; j++ { 79 | c.update(d[:i]) 80 | } 81 | }) 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /testdata/repeats.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // +build ignore 6 | 7 | //go:generate go run repeats.go 8 | 9 | // Generates repeats.bin. This test file heavily favors LZ77 based compression 10 | // since a large bulk of its data is a copy from some distance ago. Also, since 11 | // the source data is mostly random, prefix encoding does not benefit as much. 12 | package main 13 | 14 | import ( 15 | "io/ioutil" 16 | "math/rand" 17 | ) 18 | 19 | const ( 20 | name = "repeats.bin" 21 | size = 1 << 18 22 | ) 23 | 24 | func main() { 25 | var b []byte 26 | r := rand.New(rand.NewSource(0)) 27 | 28 | randLen := func() (l int) { 29 | p := r.Float32() 30 | switch { 31 | case p <= 0.15: // 4..7 32 | l = 4 + r.Intn(4) 33 | case p <= 0.30: // 8..15 34 | l = 8 + r.Intn(8) 35 | case p <= 0.45: // 16..31 36 | l = 16 + r.Intn(16) 37 | case p <= 0.60: // 32..63 38 | l = 32 + r.Intn(32) 39 | case p <= 0.75: // 64..127 40 | l = 64 + r.Intn(64) 41 | case p <= 0.90: // 128..255 42 | l = 128 + r.Intn(128) 43 | case p <= 1.0: // 256..511 44 | l = 256 + r.Intn(256) 45 | } 46 | return l 47 | } 48 | 49 | randDist := func() (d int) { 50 | for d == 0 || d > len(b) { 51 | p := r.Float32() 52 | switch { 53 | case p <= 0.1: // 1..1 54 | d = 1 + r.Intn(1) 55 | case p <= 0.2: // 2..3 56 | d = 2 + r.Intn(2) 57 | case p <= 0.3: // 4..7 58 | d = 4 + r.Intn(4) 59 | case p <= 0.4: // 8..15 60 | d = 8 + r.Intn(8) 61 | case p <= 0.5: // 16..31 62 | d = 16 + r.Intn(16) 63 | case p <= 0.55: // 32..63 64 | d = 32 + r.Intn(32) 65 | case p <= 0.60: // 64..127 66 | d = 64 + r.Intn(64) 67 | case p <= 0.65: // 128..255 68 | d = 128 + r.Intn(128) 69 | case p <= 0.70: // 256..511 70 | d = 256 + r.Intn(256) 71 | case p <= 0.75: // 512..1023 72 | d = 512 + r.Intn(512) 73 | case p <= 0.80: // 1024..2047 74 | d = 1024 + r.Intn(1024) 75 | case p <= 0.85: // 2048..4095 76 | d = 2048 + r.Intn(2048) 77 | case p <= 0.90: // 4096..8191 78 | d = 4096 + r.Intn(4096) 79 | case p <= 0.95: // 8192..16383 80 | d = 8192 + r.Intn(8192) 81 | case p <= 1.00: // 16384..32767 82 | d = 16384 + r.Intn(16384) 83 | } 84 | } 85 | return d 86 | } 87 | 88 | writeRand := func(l int) { 89 | for i := 0; i < l; i++ { 90 | b = append(b, byte(r.Int())) 91 | } 92 | } 93 | 94 | writeCopy := func(d, l int) { 95 | for i := 0; i < l; i++ { 96 | b = append(b, b[len(b)-d]) 97 | } 98 | } 99 | 100 | writeRand(randLen()) 101 | for len(b) < size { 102 | p := r.Float32() 103 | switch { 104 | case p <= 0.1: 105 | // Generate random new data. 106 | writeRand(randLen()) 107 | case p <= 0.9: 108 | // Write a long distance copy. 109 | d, l := randDist(), randLen() 110 | for d <= l { 111 | d, l = randDist(), randLen() 112 | } 113 | writeCopy(d, l) 114 | case p <= 1.0: 115 | // Write a possibly short distance copy. 116 | writeCopy(randDist(), randLen()) 117 | } 118 | } 119 | 120 | if err := ioutil.WriteFile(name, b[:size], 0664); err != nil { 121 | panic(err) 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /internal/common.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // Package internal is a collection of common compression algorithms. 6 | // 7 | // For performance reasons, these packages lack strong error checking and 8 | // require that the caller to ensure that strict invariants are kept. 9 | package internal 10 | 11 | var ( 12 | // IdentityLUT returns the input key itself. 13 | IdentityLUT = func() (lut [256]byte) { 14 | for i := range lut { 15 | lut[i] = uint8(i) 16 | } 17 | return lut 18 | }() 19 | 20 | // ReverseLUT returns the input key with its bits reversed. 21 | ReverseLUT = func() (lut [256]byte) { 22 | for i := range lut { 23 | b := uint8(i) 24 | b = (b&0xaa)>>1 | (b&0x55)<<1 25 | b = (b&0xcc)>>2 | (b&0x33)<<2 26 | b = (b&0xf0)>>4 | (b&0x0f)<<4 27 | lut[i] = b 28 | } 29 | return lut 30 | }() 31 | ) 32 | 33 | // ReverseUint32 reverses all bits of v. 34 | func ReverseUint32(v uint32) (x uint32) { 35 | x |= uint32(ReverseLUT[byte(v>>0)]) << 24 36 | x |= uint32(ReverseLUT[byte(v>>8)]) << 16 37 | x |= uint32(ReverseLUT[byte(v>>16)]) << 8 38 | x |= uint32(ReverseLUT[byte(v>>24)]) << 0 39 | return x 40 | } 41 | 42 | // ReverseUint32N reverses the lower n bits of v. 43 | func ReverseUint32N(v uint32, n uint) (x uint32) { 44 | return ReverseUint32(v << (32 - n)) 45 | } 46 | 47 | // ReverseUint64 reverses all bits of v. 48 | func ReverseUint64(v uint64) (x uint64) { 49 | x |= uint64(ReverseLUT[byte(v>>0)]) << 56 50 | x |= uint64(ReverseLUT[byte(v>>8)]) << 48 51 | x |= uint64(ReverseLUT[byte(v>>16)]) << 40 52 | x |= uint64(ReverseLUT[byte(v>>24)]) << 32 53 | x |= uint64(ReverseLUT[byte(v>>32)]) << 24 54 | x |= uint64(ReverseLUT[byte(v>>40)]) << 16 55 | x |= uint64(ReverseLUT[byte(v>>48)]) << 8 56 | x |= uint64(ReverseLUT[byte(v>>56)]) << 0 57 | return x 58 | } 59 | 60 | // ReverseUint64N reverses the lower n bits of v. 61 | func ReverseUint64N(v uint64, n uint) (x uint64) { 62 | return ReverseUint64(v << (64 - n)) 63 | } 64 | 65 | // MoveToFront is a data structure that allows for more efficient move-to-front 66 | // transformations. This specific implementation assumes that the alphabet is 67 | // densely packed within 0..255. 68 | type MoveToFront struct { 69 | dict [256]uint8 // Mapping from indexes to values 70 | tail int // Number of tail bytes that are already ordered 71 | } 72 | 73 | func (m *MoveToFront) Encode(vals []uint8) { 74 | copy(m.dict[:], IdentityLUT[:256-m.tail]) // Reset dict to be identity 75 | 76 | var max int 77 | for i, val := range vals { 78 | var idx uint8 // Reverse lookup idx in dict 79 | for di, dv := range m.dict { 80 | if dv == val { 81 | idx = uint8(di) 82 | break 83 | } 84 | } 85 | vals[i] = idx 86 | 87 | max |= int(idx) 88 | copy(m.dict[1:], m.dict[:idx]) 89 | m.dict[0] = val 90 | } 91 | m.tail = 256 - max - 1 92 | } 93 | 94 | func (m *MoveToFront) Decode(idxs []uint8) { 95 | copy(m.dict[:], IdentityLUT[:256-m.tail]) // Reset dict to be identity 96 | 97 | var max int 98 | for i, idx := range idxs { 99 | val := m.dict[idx] // Forward lookup val in dict 100 | idxs[i] = val 101 | 102 | max |= int(idx) 103 | copy(m.dict[1:], m.dict[:idx]) 104 | m.dict[0] = val 105 | } 106 | m.tail = 256 - max - 1 107 | } 108 | -------------------------------------------------------------------------------- /xflate/flate.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package xflate 6 | 7 | import ( 8 | "bufio" 9 | "compress/flate" 10 | "io" 11 | ) 12 | 13 | // TODO(dsnet): The standard library's version of flate.Reader and flate.Writer 14 | // do not track the input and output offsets. When we eventually switch over 15 | // to using the DEFLATE implementation in this repository, we can delete these. 16 | 17 | // countReader is a trivial io.Reader that counts the number of bytes read. 18 | type countReader struct { 19 | R io.Reader 20 | N int64 21 | } 22 | 23 | func (cr *countReader) Read(buf []byte) (int, error) { 24 | n, err := cr.R.Read(buf) 25 | cr.N += int64(n) 26 | return n, err 27 | } 28 | 29 | // flateReader is a trivial wrapper around flate.Reader keeps tracks of offsets. 30 | type flateReader struct { 31 | InputOffset int64 // Total number of bytes read from underlying io.Reader 32 | OutputOffset int64 // Total number of bytes emitted from Read 33 | 34 | zr io.ReadCloser 35 | br *bufio.Reader 36 | cr countReader 37 | } 38 | 39 | func newFlateReader(rd io.Reader) (*flateReader, error) { 40 | fr := new(flateReader) 41 | fr.cr = countReader{R: rd} 42 | fr.br = bufio.NewReader(&fr.cr) 43 | fr.zr = flate.NewReader(fr.br) 44 | return fr, nil 45 | } 46 | 47 | func (fr *flateReader) Reset(rd io.Reader) { 48 | *fr = flateReader{zr: fr.zr, br: fr.br} 49 | fr.cr = countReader{R: rd} 50 | fr.br.Reset(&fr.cr) 51 | fr.zr.(flate.Resetter).Reset(fr.br, nil) 52 | } 53 | 54 | func (fr *flateReader) Read(buf []byte) (int, error) { 55 | offset := fr.cr.N - int64(fr.br.Buffered()) 56 | n, err := fr.zr.Read(buf) 57 | fr.InputOffset += (fr.cr.N - int64(fr.br.Buffered())) - offset 58 | fr.OutputOffset += int64(n) 59 | return n, errWrap(err) 60 | } 61 | 62 | // countWriter is a trivial io.Writer that counts the number of bytes written. 63 | type countWriter struct { 64 | W io.Writer 65 | N int64 66 | } 67 | 68 | func (cw *countWriter) Write(buf []byte) (int, error) { 69 | n, err := cw.W.Write(buf) 70 | cw.N += int64(n) 71 | return n, err 72 | } 73 | 74 | // flateWriter is a trivial wrapper around flate.Writer keeps tracks of offsets. 75 | type flateWriter struct { 76 | InputOffset int64 // Total number of bytes issued to Write 77 | OutputOffset int64 // Total number of bytes written to underlying io.Writer 78 | 79 | zw *flate.Writer 80 | cw countWriter 81 | } 82 | 83 | func newFlateWriter(wr io.Writer, lvl int) (*flateWriter, error) { 84 | var err error 85 | fw := new(flateWriter) 86 | switch lvl { 87 | case 0: 88 | lvl = flate.DefaultCompression 89 | case -1: 90 | lvl = flate.NoCompression 91 | } 92 | fw.cw = countWriter{W: wr} 93 | fw.zw, err = flate.NewWriter(&fw.cw, lvl) 94 | return fw, errWrap(err) 95 | } 96 | 97 | func (fw *flateWriter) Reset(wr io.Writer) { 98 | *fw = flateWriter{zw: fw.zw} 99 | fw.cw = countWriter{W: wr} 100 | fw.zw.Reset(&fw.cw) 101 | } 102 | 103 | func (fw *flateWriter) Write(buf []byte) (int, error) { 104 | offset := fw.cw.N 105 | n, err := fw.zw.Write(buf) 106 | fw.OutputOffset += fw.cw.N - offset 107 | fw.InputOffset += int64(n) 108 | return n, errWrap(err) 109 | } 110 | 111 | func (fw *flateWriter) Flush() error { 112 | offset := fw.cw.N 113 | err := fw.zw.Flush() 114 | fw.OutputOffset += fw.cw.N - offset 115 | return errWrap(err) 116 | } 117 | -------------------------------------------------------------------------------- /internal/prefix/wrap.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package prefix 6 | 7 | import ( 8 | "bytes" 9 | "io" 10 | "strings" 11 | ) 12 | 13 | // For some of the common Readers, we wrap and extend them to satisfy the 14 | // compress.BufferedReader interface to improve performance. 15 | 16 | type buffer struct { 17 | *bytes.Buffer 18 | } 19 | 20 | type bytesReader struct { 21 | *bytes.Reader 22 | pos int64 23 | buf []byte 24 | arr [512]byte 25 | } 26 | 27 | type stringReader struct { 28 | *strings.Reader 29 | pos int64 30 | buf []byte 31 | arr [512]byte 32 | } 33 | 34 | func (r *buffer) Buffered() int { 35 | return r.Len() 36 | } 37 | 38 | func (r *buffer) Peek(n int) ([]byte, error) { 39 | b := r.Bytes() 40 | if len(b) < n { 41 | return b, io.EOF 42 | } 43 | return b[:n], nil 44 | } 45 | 46 | func (r *buffer) Discard(n int) (int, error) { 47 | b := r.Next(n) 48 | if len(b) < n { 49 | return len(b), io.EOF 50 | } 51 | return n, nil 52 | } 53 | 54 | func (r *bytesReader) Buffered() int { 55 | r.update() 56 | if r.Len() > len(r.buf) { 57 | return len(r.buf) 58 | } 59 | return r.Len() 60 | } 61 | 62 | func (r *bytesReader) Peek(n int) ([]byte, error) { 63 | if n > len(r.arr) { 64 | return nil, io.ErrShortBuffer 65 | } 66 | 67 | // Return sub-slice of local buffer if possible. 68 | r.update() 69 | if len(r.buf) >= n { 70 | return r.buf[:n], nil 71 | } 72 | 73 | // Fill entire local buffer, and return appropriate sub-slice. 74 | cnt, err := r.ReadAt(r.arr[:], r.pos) 75 | r.buf = r.arr[:cnt] 76 | if cnt < n { 77 | return r.arr[:cnt], err 78 | } 79 | return r.arr[:n], nil 80 | } 81 | 82 | func (r *bytesReader) Discard(n int) (int, error) { 83 | var err error 84 | if n > r.Len() { 85 | n, err = r.Len(), io.EOF 86 | } 87 | r.Seek(int64(n), io.SeekCurrent) 88 | return n, err 89 | } 90 | 91 | // update reslices the internal buffer to be consistent with the read offset. 92 | func (r *bytesReader) update() { 93 | pos, _ := r.Seek(0, io.SeekCurrent) 94 | if off := pos - r.pos; off >= 0 && off < int64(len(r.buf)) { 95 | r.buf, r.pos = r.buf[off:], pos 96 | } else { 97 | r.buf, r.pos = nil, pos 98 | } 99 | } 100 | 101 | func (r *stringReader) Buffered() int { 102 | r.update() 103 | if r.Len() > len(r.buf) { 104 | return len(r.buf) 105 | } 106 | return r.Len() 107 | } 108 | 109 | func (r *stringReader) Peek(n int) ([]byte, error) { 110 | if n > len(r.arr) { 111 | return nil, io.ErrShortBuffer 112 | } 113 | 114 | // Return sub-slice of local buffer if possible. 115 | r.update() 116 | if len(r.buf) >= n { 117 | return r.buf[:n], nil 118 | } 119 | 120 | // Fill entire local buffer, and return appropriate sub-slice. 121 | cnt, err := r.ReadAt(r.arr[:], r.pos) 122 | r.buf = r.arr[:cnt] 123 | if cnt < n { 124 | return r.arr[:cnt], err 125 | } 126 | return r.arr[:n], nil 127 | } 128 | 129 | func (r *stringReader) Discard(n int) (int, error) { 130 | var err error 131 | if n > r.Len() { 132 | n, err = r.Len(), io.EOF 133 | } 134 | r.Seek(int64(n), io.SeekCurrent) 135 | return n, err 136 | } 137 | 138 | // update reslices the internal buffer to be consistent with the read offset. 139 | func (r *stringReader) update() { 140 | pos, _ := r.Seek(0, io.SeekCurrent) 141 | if off := pos - r.pos; off >= 0 && off < int64(len(r.buf)) { 142 | r.buf, r.pos = r.buf[off:], pos 143 | } else { 144 | r.buf, r.pos = nil, pos 145 | } 146 | } 147 | -------------------------------------------------------------------------------- /bzip2/common.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // Package bzip2 implements the BZip2 compressed data format. 6 | // 7 | // Canonical C implementation: 8 | // http://bzip.org 9 | // 10 | // Unofficial format specification: 11 | // https://github.com/dsnet/compress/blob/master/doc/bzip2-format.pdf 12 | package bzip2 13 | 14 | import ( 15 | "fmt" 16 | "hash/crc32" 17 | 18 | "github.com/dsnet/compress/internal" 19 | "github.com/dsnet/compress/internal/errors" 20 | ) 21 | 22 | // There does not exist a formal specification of the BZip2 format. As such, 23 | // much of this work is derived by either reverse engineering the original C 24 | // source code or using secondary sources. 25 | // 26 | // Significant amounts of fuzz testing is done to ensure that outputs from 27 | // this package is properly decoded by the C library. Furthermore, we test that 28 | // both this package and the C library agree about what inputs are invalid. 29 | // 30 | // Compression stack: 31 | // Run-length encoding 1 (RLE1) 32 | // Burrows-Wheeler transform (BWT) 33 | // Move-to-front transform (MTF) 34 | // Run-length encoding 2 (RLE2) 35 | // Prefix encoding (PE) 36 | // 37 | // References: 38 | // http://bzip.org/ 39 | // https://en.wikipedia.org/wiki/Bzip2 40 | // https://code.google.com/p/jbzip2/ 41 | 42 | const ( 43 | BestSpeed = 1 44 | BestCompression = 9 45 | DefaultCompression = 6 46 | ) 47 | 48 | const ( 49 | hdrMagic = 0x425a // Hex of "BZ" 50 | blkMagic = 0x314159265359 // BCD of PI 51 | endMagic = 0x177245385090 // BCD of sqrt(PI) 52 | 53 | blockSize = 100000 54 | ) 55 | 56 | func errorf(c int, f string, a ...interface{}) error { 57 | return errors.Error{Code: c, Pkg: "bzip2", Msg: fmt.Sprintf(f, a...)} 58 | } 59 | 60 | func panicf(c int, f string, a ...interface{}) { 61 | errors.Panic(errorf(c, f, a...)) 62 | } 63 | 64 | // errWrap converts a lower-level errors.Error to be one from this package. 65 | // The replaceCode passed in will be used to replace the code for any errors 66 | // with the errors.Invalid code. 67 | // 68 | // For the Reader, set this to errors.Corrupted. 69 | // For the Writer, set this to errors.Internal. 70 | func errWrap(err error, replaceCode int) error { 71 | if cerr, ok := err.(errors.Error); ok { 72 | if errors.IsInvalid(cerr) { 73 | cerr.Code = replaceCode 74 | } 75 | err = errorf(cerr.Code, "%s", cerr.Msg) 76 | } 77 | return err 78 | } 79 | 80 | var errClosed = errorf(errors.Closed, "") 81 | 82 | // crc computes the CRC-32 used by BZip2. 83 | // 84 | // The CRC-32 computation in bzip2 treats bytes as having bits in big-endian 85 | // order. That is, the MSB is read before the LSB. Thus, we can use the 86 | // standard library version of CRC-32 IEEE with some minor adjustments. 87 | // 88 | // The byte array is used as an intermediate buffer to swap the bits of every 89 | // byte of the input. 90 | type crc struct { 91 | val uint32 92 | buf [256]byte 93 | } 94 | 95 | // update computes the CRC-32 of appending buf to c. 96 | func (c *crc) update(buf []byte) { 97 | cval := internal.ReverseUint32(c.val) 98 | for len(buf) > 0 { 99 | n := len(buf) 100 | if n > len(c.buf) { 101 | n = len(c.buf) 102 | } 103 | for i, b := range buf[:n] { 104 | c.buf[i] = internal.ReverseLUT[b] 105 | } 106 | cval = crc32.Update(cval, crc32.IEEETable, c.buf[:n]) 107 | buf = buf[n:] 108 | } 109 | c.val = internal.ReverseUint32(cval) 110 | } 111 | -------------------------------------------------------------------------------- /bzip2/bwt.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package bzip2 6 | 7 | import "github.com/dsnet/compress/bzip2/internal/sais" 8 | 9 | // The Burrows-Wheeler Transform implementation used here is based on the 10 | // Suffix Array by Induced Sorting (SA-IS) methodology by Nong, Zhang, and Chan. 11 | // This implementation uses the sais algorithm originally written by Yuta Mori. 12 | // 13 | // The SA-IS algorithm runs in O(n) and outputs a Suffix Array. There is a 14 | // mathematical relationship between Suffix Arrays and the Burrows-Wheeler 15 | // Transform, such that a SA can be converted to a BWT in O(n) time. 16 | // 17 | // References: 18 | // http://www.hpl.hp.com/techreports/Compaq-DEC/SRC-RR-124.pdf 19 | // https://github.com/cscott/compressjs/blob/master/lib/BWT.js 20 | // https://www.quora.com/How-can-I-optimize-burrows-wheeler-transform-and-inverse-transform-to-work-in-O-n-time-O-n-space 21 | type burrowsWheelerTransform struct { 22 | buf []byte 23 | sa []int 24 | perm []uint32 25 | } 26 | 27 | func (bwt *burrowsWheelerTransform) Encode(buf []byte) (ptr int) { 28 | if len(buf) == 0 { 29 | return -1 30 | } 31 | 32 | // TODO(dsnet): Find a way to avoid the duplicate input string method. 33 | // We only need to do this because suffix arrays (by definition) only 34 | // operate non-wrapped suffixes of a string. On the other hand, 35 | // the BWT specifically used in bzip2 operate on a strings that wrap-around 36 | // when being sorted. 37 | 38 | // Step 1: Concatenate the input string to itself so that we can use the 39 | // suffix array algorithm for bzip2's variant of BWT. 40 | n := len(buf) 41 | bwt.buf = append(append(bwt.buf[:0], buf...), buf...) 42 | if cap(bwt.sa) < 2*n { 43 | bwt.sa = make([]int, 2*n) 44 | } 45 | t := bwt.buf[:2*n] 46 | sa := bwt.sa[:2*n] 47 | 48 | // Step 2: Compute the suffix array (SA). The input string, t, will not be 49 | // modified, while the results will be written to the output, sa. 50 | sais.ComputeSA(t, sa) 51 | 52 | // Step 3: Convert the SA to a BWT. Since ComputeSA does not mutate the 53 | // input, we have two copies of the input; in buf and buf2. Thus, we write 54 | // the transformation to buf, while using buf2. 55 | var j int 56 | buf2 := t[n:] 57 | for _, i := range sa { 58 | if i < n { 59 | if i == 0 { 60 | ptr = j 61 | i = n 62 | } 63 | buf[j] = buf2[i-1] 64 | j++ 65 | } 66 | } 67 | return ptr 68 | } 69 | 70 | func (bwt *burrowsWheelerTransform) Decode(buf []byte, ptr int) { 71 | if len(buf) == 0 { 72 | return 73 | } 74 | 75 | // Step 1: Compute cumm, where cumm[ch] reports the total number of 76 | // characters that precede the character ch in the alphabet. 77 | var cumm [256]int 78 | for _, v := range buf { 79 | cumm[v]++ 80 | } 81 | var sum int 82 | for i, v := range cumm { 83 | cumm[i] = sum 84 | sum += v 85 | } 86 | 87 | // Step 2: Compute perm, where perm[ptr] contains a pointer to the next 88 | // byte in buf and the next pointer in perm itself. 89 | if cap(bwt.perm) < len(buf) { 90 | bwt.perm = make([]uint32, len(buf)) 91 | } 92 | perm := bwt.perm[:len(buf)] 93 | for i, b := range buf { 94 | perm[cumm[b]] = uint32(i) 95 | cumm[b]++ 96 | } 97 | 98 | // Step 3: Follow each pointer in perm to the next byte, starting with the 99 | // origin pointer. 100 | if cap(bwt.buf) < len(buf) { 101 | bwt.buf = make([]byte, len(buf)) 102 | } 103 | buf2 := bwt.buf[:len(buf)] 104 | i := perm[ptr] 105 | for j := range buf2 { 106 | buf2[j] = buf[i] 107 | i = perm[i] 108 | } 109 | copy(buf, buf2) 110 | } 111 | -------------------------------------------------------------------------------- /bzip2/mtf_rle2.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package bzip2 6 | 7 | import "github.com/dsnet/compress/internal/errors" 8 | 9 | // moveToFront implements both the MTF and RLE stages of bzip2 at the same time. 10 | // Any runs of zeros in the encoded output will be replaced by a sequence of 11 | // RUNA and RUNB symbols are encode the length of the run. 12 | // 13 | // The RLE encoding used can actually be encoded to and decoded from using 14 | // normal two's complement arithmetic. The methodology for doing so is below. 15 | // 16 | // Assuming the following: 17 | // num: The value being encoded by RLE encoding. 18 | // run: A sequence of RUNA and RUNB symbols represented as a binary integer, 19 | // where RUNA is the 0 bit, RUNB is the 1 bit, and least-significant RUN 20 | // symbols are at the least-significant bit positions. 21 | // cnt: The number of RUNA and RUNB symbols. 22 | // 23 | // Then the RLE encoding used by bzip2 has this mathematical property: 24 | // num+1 == (1< len(mtf.dictBuf) { 36 | panicf(errors.Internal, "alphabet too large") 37 | } 38 | copy(mtf.dictBuf[:], dict) 39 | mtf.dictLen = len(dict) 40 | mtf.blkSize = blkSize 41 | } 42 | 43 | func (mtf *moveToFront) Encode(vals []byte) (syms []uint16) { 44 | dict := mtf.dictBuf[:mtf.dictLen] 45 | syms = mtf.syms[:0] 46 | 47 | if len(vals) > mtf.blkSize { 48 | panicf(errors.Internal, "exceeded block size") 49 | } 50 | 51 | var lastNum uint32 52 | for _, val := range vals { 53 | // Normal move-to-front transform. 54 | var idx uint8 // Reverse lookup idx in dict 55 | for di, dv := range dict { 56 | if dv == val { 57 | idx = uint8(di) 58 | break 59 | } 60 | } 61 | copy(dict[1:], dict[:idx]) 62 | dict[0] = val 63 | 64 | // Run-length encoding augmentation. 65 | if idx == 0 { 66 | lastNum++ 67 | continue 68 | } 69 | if lastNum > 0 { 70 | for rc := lastNum + 1; rc != 1; rc >>= 1 { 71 | syms = append(syms, uint16(rc&1)) 72 | } 73 | lastNum = 0 74 | } 75 | syms = append(syms, uint16(idx)+1) 76 | } 77 | if lastNum > 0 { 78 | for rc := lastNum + 1; rc != 1; rc >>= 1 { 79 | syms = append(syms, uint16(rc&1)) 80 | } 81 | } 82 | mtf.syms = syms 83 | return syms 84 | } 85 | 86 | func (mtf *moveToFront) Decode(syms []uint16) (vals []byte) { 87 | dict := mtf.dictBuf[:mtf.dictLen] 88 | vals = mtf.vals[:0] 89 | 90 | var lastCnt uint 91 | var lastRun uint32 92 | for _, sym := range syms { 93 | // Run-length encoding augmentation. 94 | if sym < 2 { 95 | lastRun |= uint32(sym) << lastCnt 96 | lastCnt++ 97 | continue 98 | } 99 | if lastCnt > 0 { 100 | cnt := int((1< mtf.blkSize || lastCnt > 24 { 102 | panicf(errors.Corrupted, "run-length decoding exceeded block size") 103 | } 104 | for i := cnt; i > 0; i-- { 105 | vals = append(vals, dict[0]) 106 | } 107 | lastCnt, lastRun = 0, 0 108 | } 109 | 110 | // Normal move-to-front transform. 111 | val := dict[sym-1] // Forward lookup val in dict 112 | copy(dict[1:], dict[:sym-1]) 113 | dict[0] = val 114 | 115 | if len(vals) >= mtf.blkSize { 116 | panicf(errors.Corrupted, "run-length decoding exceeded block size") 117 | } 118 | vals = append(vals, val) 119 | } 120 | if lastCnt > 0 { 121 | cnt := int((1< mtf.blkSize || lastCnt > 24 { 123 | panicf(errors.Corrupted, "run-length decoding exceeded block size") 124 | } 125 | for i := cnt; i > 0; i-- { 126 | vals = append(vals, dict[0]) 127 | } 128 | } 129 | mtf.vals = vals 130 | return vals 131 | } 132 | -------------------------------------------------------------------------------- /internal/errors/errors.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // Package errors implements functions to manipulate compression errors. 6 | // 7 | // In idiomatic Go, it is an anti-pattern to use panics as a form of error 8 | // reporting in the API. Instead, the expected way to transmit errors is by 9 | // returning an error value. Unfortunately, the checking of "err != nil" in 10 | // tight loops commonly found in compression causes non-negligible performance 11 | // degradation. While this may not be idiomatic, the internal packages of this 12 | // repository rely on panics as a normal means to convey errors. In order to 13 | // ensure that these panics do not leak across the public API, the public 14 | // packages must recover from these panics and present an error value. 15 | // 16 | // The Panic and Recover functions in this package provide a safe way to 17 | // recover from errors only generated from within this repository. 18 | // 19 | // Example usage: 20 | // func Foo() (err error) { 21 | // defer errors.Recover(&err) 22 | // 23 | // if rand.Intn(2) == 0 { 24 | // // Unexpected panics will not be caught by Recover. 25 | // io.Closer(nil).Close() 26 | // } else { 27 | // // Errors thrown by Panic will be caught by Recover. 28 | // errors.Panic(errors.New("whoopsie")) 29 | // } 30 | // } 31 | // 32 | package errors 33 | 34 | import "strings" 35 | 36 | const ( 37 | // Unknown indicates that there is no classification for this error. 38 | Unknown = iota 39 | 40 | // Internal indicates that this error is due to an internal bug. 41 | // Users should file a issue report if this type of error is encountered. 42 | Internal 43 | 44 | // Invalid indicates that this error is due to the user misusing the API 45 | // and is indicative of a bug on the user's part. 46 | Invalid 47 | 48 | // Deprecated indicates the use of a deprecated and unsupported feature. 49 | Deprecated 50 | 51 | // Corrupted indicates that the input stream is corrupted. 52 | Corrupted 53 | 54 | // Closed indicates that the handlers are closed. 55 | Closed 56 | ) 57 | 58 | var codeMap = map[int]string{ 59 | Unknown: "unknown error", 60 | Internal: "internal error", 61 | Invalid: "invalid argument", 62 | Deprecated: "deprecated format", 63 | Corrupted: "corrupted input", 64 | Closed: "closed handler", 65 | } 66 | 67 | type Error struct { 68 | Code int // The error type 69 | Pkg string // Name of the package where the error originated 70 | Msg string // Descriptive message about the error (optional) 71 | } 72 | 73 | func (e Error) Error() string { 74 | var ss []string 75 | for _, s := range []string{e.Pkg, codeMap[e.Code], e.Msg} { 76 | if s != "" { 77 | ss = append(ss, s) 78 | } 79 | } 80 | return strings.Join(ss, ": ") 81 | } 82 | 83 | func (e Error) CompressError() {} 84 | func (e Error) IsInternal() bool { return e.Code == Internal } 85 | func (e Error) IsInvalid() bool { return e.Code == Invalid } 86 | func (e Error) IsDeprecated() bool { return e.Code == Deprecated } 87 | func (e Error) IsCorrupted() bool { return e.Code == Corrupted } 88 | func (e Error) IsClosed() bool { return e.Code == Closed } 89 | 90 | func IsInternal(err error) bool { return isCode(err, Internal) } 91 | func IsInvalid(err error) bool { return isCode(err, Invalid) } 92 | func IsDeprecated(err error) bool { return isCode(err, Deprecated) } 93 | func IsCorrupted(err error) bool { return isCode(err, Corrupted) } 94 | func IsClosed(err error) bool { return isCode(err, Closed) } 95 | 96 | func isCode(err error, code int) bool { 97 | if cerr, ok := err.(Error); ok && cerr.Code == code { 98 | return true 99 | } 100 | return false 101 | } 102 | 103 | // errWrap is used by Panic and Recover to ensure that only errors raised by 104 | // Panic are recovered by Recover. 105 | type errWrap struct{ e *error } 106 | 107 | func Recover(err *error) { 108 | switch ex := recover().(type) { 109 | case nil: 110 | // Do nothing. 111 | case errWrap: 112 | *err = *ex.e 113 | default: 114 | panic(ex) 115 | } 116 | } 117 | 118 | func Panic(err error) { 119 | panic(errWrap{&err}) 120 | } 121 | -------------------------------------------------------------------------------- /bzip2/rle1_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package bzip2 6 | 7 | import ( 8 | "bytes" 9 | "io" 10 | "strings" 11 | "testing" 12 | 13 | "github.com/dsnet/compress/internal/testutil" 14 | ) 15 | 16 | func TestRunLengthEncoder(t *testing.T) { 17 | vectors := []struct { 18 | size int 19 | input string 20 | output string 21 | done bool 22 | }{{ 23 | size: 0, 24 | input: "", 25 | output: "", 26 | }, { 27 | size: 6, 28 | input: "abc", 29 | output: "abc", 30 | }, { 31 | size: 6, 32 | input: "abcccc", 33 | output: "abccc", 34 | done: true, 35 | }, { 36 | size: 7, 37 | input: "abcccc", 38 | output: "abcccc\x00", 39 | }, { 40 | size: 14, 41 | input: "aaaabbbbcccc", 42 | output: "aaaa\x00bbbb\x00ccc", 43 | done: true, 44 | }, { 45 | size: 15, 46 | input: "aaaabbbbcccc", 47 | output: "aaaa\x00bbbb\x00cccc\x00", 48 | }, { 49 | size: 16, 50 | input: strings.Repeat("a", 4), 51 | output: "aaaa\x00", 52 | }, { 53 | size: 16, 54 | input: strings.Repeat("a", 255), 55 | output: "aaaa\xfb", 56 | }, { 57 | size: 16, 58 | input: strings.Repeat("a", 256), 59 | output: "aaaa\xfba", 60 | }, { 61 | size: 16, 62 | input: strings.Repeat("a", 259), 63 | output: "aaaa\xfbaaaa\x00", 64 | }, { 65 | size: 16, 66 | input: strings.Repeat("a", 500), 67 | output: "aaaa\xfbaaaa\xf1", 68 | }, { 69 | size: 64, 70 | input: "aaabbbcccddddddeeefgghiiijkllmmmmmmmmnnoo", 71 | output: "aaabbbcccdddd\x02eeefgghiiijkllmmmm\x04nnoo", 72 | }} 73 | 74 | buf := make([]byte, 3) 75 | for i, v := range vectors { 76 | rd := strings.NewReader(v.input) 77 | rle := new(runLengthEncoding) 78 | rle.Init(make([]byte, v.size)) 79 | _, err := io.CopyBuffer(rle, struct{ io.Reader }{rd}, buf) 80 | output := rle.Bytes() 81 | 82 | if got, want, ok := testutil.BytesCompare(output, []byte(v.output)); !ok { 83 | t.Errorf("test %d, output mismatch:\ngot %s\nwant %s", i, got, want) 84 | } 85 | if done := err == rleDone; done != v.done { 86 | t.Errorf("test %d, done mismatch: got %v want %v", i, done, v.done) 87 | } 88 | } 89 | } 90 | 91 | func TestRunLengthDecoder(t *testing.T) { 92 | vectors := []struct { 93 | input string 94 | output string 95 | fail bool 96 | }{{ 97 | input: "", 98 | output: "", 99 | }, { 100 | input: "abc", 101 | output: "abc", 102 | }, { 103 | input: "aaaa", 104 | output: "aaaa", 105 | fail: true, 106 | }, { 107 | input: "baaaa\x00aaaa", 108 | output: "baaaaaaaa", 109 | fail: true, 110 | }, { 111 | input: "abcccc\x00", 112 | output: "abcccc", 113 | }, { 114 | input: "aaaa\x00bbbb\x00ccc", 115 | output: "aaaabbbbccc", 116 | }, { 117 | input: "aaaa\x00bbbb\x00cccc\x00", 118 | output: "aaaabbbbcccc", 119 | }, { 120 | input: "aaaa\x00aaaa\x00aaaa\x00", 121 | output: "aaaaaaaaaaaa", 122 | }, { 123 | input: "aaaa\xffaaaa\xffaaaa\xff", 124 | output: strings.Repeat("a", 259*3), 125 | }, { 126 | input: "bbbaaaa\xffaaaa\xffaaaa\xff", 127 | output: "bbb" + strings.Repeat("a", 259*3), 128 | }, { 129 | input: "aaaa\x00", 130 | output: strings.Repeat("a", 4), 131 | }, { 132 | input: "aaaa\xfb", 133 | output: strings.Repeat("a", 255), 134 | }, { 135 | input: "aaaa\xfba", 136 | output: strings.Repeat("a", 256), 137 | }, { 138 | input: "aaaa\xfbaaaa\x00", 139 | output: strings.Repeat("a", 259), 140 | }, { 141 | input: "aaaa\xfbaaaa\xf1", 142 | output: strings.Repeat("a", 500), 143 | }, { 144 | input: "aaabbbcccdddd\x02eeefgghiiijkllmmmm\x04nnoo", 145 | output: "aaabbbcccddddddeeefgghiiijkllmmmmmmmmnnoo", 146 | }} 147 | 148 | buf := make([]byte, 3) 149 | for i, v := range vectors { 150 | wr := new(bytes.Buffer) 151 | rle := new(runLengthEncoding) 152 | rle.Init([]byte(v.input)) 153 | _, err := io.CopyBuffer(struct{ io.Writer }{wr}, rle, buf) 154 | output := wr.Bytes() 155 | 156 | if got, want, ok := testutil.BytesCompare(output, []byte(v.output)); !ok { 157 | t.Errorf("test %d, output mismatch:\ngot %s\nwant %s", i, got, want) 158 | } 159 | if fail := err != rleDone; fail != v.fail { 160 | t.Errorf("test %d, failure mismatch: got %t, want %t", i, fail, v.fail) 161 | } 162 | } 163 | } 164 | -------------------------------------------------------------------------------- /internal/tool/fuzz/bzip2/bzip2.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // +build gofuzz 6 | 7 | package bzip2 8 | 9 | import ( 10 | "bytes" 11 | "errors" 12 | "io/ioutil" 13 | 14 | "github.com/dsnet/compress" 15 | gbzip2 "github.com/dsnet/compress/bzip2" 16 | cbzip2 "github.com/dsnet/compress/internal/cgo/bzip2" 17 | ) 18 | 19 | func Fuzz(data []byte) int { 20 | data, ok := testDecoders(data, true) 21 | for i := 1; i <= 9; i++ { 22 | testGoEncoder(data, i) 23 | testCEncoder(data, i) 24 | } 25 | if ok { 26 | return 1 // Favor valid inputs 27 | } 28 | return 0 29 | } 30 | 31 | // testDecoders tests that the input can be handled by both Go and C decoders. 32 | // This test does not panic if both decoders run into an error, since it 33 | // means that they both agree that the input is bad. 34 | // 35 | // If updateCRCs is set, then the Go bzip2 implementation will ignore all 36 | // checksum errors and manually adjust the checksum values before running the 37 | // C implementation. This hack drastically increases the probability that 38 | // gofuzz can generate a "valid" file. 39 | func testDecoders(data []byte, updateCRCs bool) ([]byte, bool) { 40 | // Decompress using the Go decoder. 41 | gr, err := gbzip2.NewReader(bytes.NewReader(data), nil) 42 | if err != nil { 43 | panic(err) 44 | } 45 | gb, gerr := ioutil.ReadAll(gr) 46 | if err := gr.Close(); gerr == nil { 47 | gerr = err 48 | } else if gerr != nil && err == nil { 49 | panic("nil on Close after non-nil error") 50 | } 51 | 52 | // Check or update the checksums. 53 | if gerr == nil { 54 | if updateCRCs { 55 | data = gr.Checksums.Apply(data) 56 | } else if !gr.Checksums.Verify(data) { 57 | gerr = errors.New("bzip2: checksum error") 58 | } 59 | } 60 | 61 | // Decompress using the C decoder. 62 | cr := cbzip2.NewReader(bytes.NewReader(data)) 63 | cb, cerr := ioutil.ReadAll(cr) 64 | if err := cr.Close(); cerr == nil { 65 | cerr = err 66 | } else if cerr != nil && err == nil { 67 | panic("nil on Close after non-nil error") 68 | } 69 | 70 | switch { 71 | case gerr == nil && cerr == nil: 72 | if !bytes.Equal(gb, cb) { 73 | panic("mismatching bytes") 74 | } 75 | return gb, true 76 | case gerr != nil && cerr == nil: 77 | // Ignore deprecated errors since there are no plans to provide 78 | // these features in the Go implementation. 79 | if err, ok := gerr.(compress.Error); ok && err.IsDeprecated() { 80 | return cb, false 81 | } 82 | panic(gerr) 83 | case gerr == nil && cerr != nil: 84 | panic(cerr) 85 | default: 86 | // Ensure that both gb and cb have the same common prefix. 87 | if !bytes.HasPrefix(gb, cb) && !bytes.HasPrefix(cb, gb) { 88 | panic("mismatching leading bytes") 89 | } 90 | return nil, false 91 | } 92 | } 93 | 94 | // testGoEncoder encodes the input data with the Go encoder and then checks that 95 | // both the Go and C decoders can properly decompress the output. 96 | func testGoEncoder(data []byte, level int) { 97 | // Compress using the Go encoder. 98 | bb := new(bytes.Buffer) 99 | gw, err := gbzip2.NewWriter(bb, &gbzip2.WriterConfig{Level: level}) 100 | if err != nil { 101 | panic(err) 102 | } 103 | defer gw.Close() 104 | n, err := gw.Write(data) 105 | if n != len(data) || err != nil { 106 | panic(err) 107 | } 108 | if err := gw.Close(); err != nil { 109 | panic(err) 110 | } 111 | 112 | // Decompress using both the Go and C decoders. 113 | b, ok := testDecoders(bb.Bytes(), false) 114 | if !ok { 115 | panic("decoder error") 116 | } 117 | if !bytes.Equal(b, data) { 118 | panic("mismatching bytes") 119 | } 120 | } 121 | 122 | // testCEncoder encodes the input data with the C encoder and then checks that 123 | // both the Go and C decoders can properly decompress the output. 124 | func testCEncoder(data []byte, level int) { 125 | // Compress using the C encoder. 126 | bb := new(bytes.Buffer) 127 | cw := cbzip2.NewWriter(bb, level) 128 | defer cw.Close() 129 | n, err := cw.Write(data) 130 | if n != len(data) || err != nil { 131 | panic(err) 132 | } 133 | if err := cw.Close(); err != nil { 134 | panic(err) 135 | } 136 | 137 | // Decompress using both the Go and C decoders. 138 | b, ok := testDecoders(bb.Bytes(), false) 139 | if !ok { 140 | panic("decoder error") 141 | } 142 | if !bytes.Equal(b, data) { 143 | panic("mismatching bytes") 144 | } 145 | } 146 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Collection of compression libraries for Go # 2 | 3 | [![GoDoc](https://godoc.org/github.com/dsnet/compress/cmp?status.svg)](https://godoc.org/github.com/dsnet/compress) 4 | [![Build Status](https://travis-ci.org/dsnet/compress.svg?branch=master)](https://travis-ci.org/dsnet/compress) 5 | [![Report Card](https://goreportcard.com/badge/github.com/dsnet/compress)](https://goreportcard.com/report/github.com/dsnet/compress) 6 | 7 | ## Introduction ## 8 | 9 | **NOTE: This library is in active development. As such, there are no guarantees about the stability of the API. The author reserves the right to arbitrarily break the API for any reason.** 10 | 11 | This repository hosts a collection of compression related libraries. The goal of this project is to provide pure Go implementations for popular compression algorithms beyond what the Go standard library provides. The goals for these packages are as follows: 12 | * Maintainable: That the code remains well documented, well tested, readable, easy to maintain, and easy to verify that it conforms to the specification for the format being implemented. 13 | * Performant: To be able to compress and decompress within at least 80% of the rates that the C implementations are able to achieve. 14 | * Flexible: That the code provides low-level and fine granularity control over the compression streams similar to what the C APIs would provide. 15 | 16 | Of these three, the first objective is often at odds with the other two objectives and provides interesting challenges. Higher performance can often be achieved by muddling abstraction layers or using non-intuitive low-level primitives. Also, more features and functionality, while useful in some situations, often complicates the API. Thus, this package will attempt to satisfy all the goals, but will defer to favoring maintainability when the performance or flexibility benefits are not significant enough. 17 | 18 | 19 | ## Library Status ## 20 | 21 | For the packages available, only some features are currently implemented: 22 | 23 | | Package | Reader | Writer | 24 | | ------- | :----: | :----: | 25 | | brotli | :white_check_mark: | | 26 | | bzip2 | :white_check_mark: | :white_check_mark: | 27 | | flate | :white_check_mark: | | 28 | | xflate | :white_check_mark: | :white_check_mark: | 29 | 30 | This library is in active development. As such, there are no guarantees about the stability of the API. The author reserves the right to arbitrarily break the API for any reason. When the library becomes more mature, it is planned to eventually conform to some strict versioning scheme like [Semantic Versioning](http://semver.org/). 31 | 32 | However, in the meanwhile, this library does provide some basic API guarantees. For the types defined below, the method signatures are guaranteed to not change. Note that the author still reserves the right to change the fields within each ```Reader``` and ```Writer``` structs. 33 | ```go 34 | type ReaderConfig struct { ... } 35 | type Reader struct { ... } 36 | func NewReader(io.Reader, *ReaderConfig) (*Reader, error) { ... } 37 | func (*Reader) Read([]byte) (int, error) { ... } 38 | func (*Reader) Close() error { ... } 39 | 40 | type WriterConfig struct { ... } 41 | type Writer struct { ... } 42 | func NewWriter(io.Writer, *WriterConfig) (*Writer, error) { ... } 43 | func (*Writer) Write([]byte) (int, error) { ... } 44 | func (*Writer) Close() error { ... } 45 | ``` 46 | 47 | To see what work still remains, see the [Task List](https://github.com/dsnet/compress/wiki/Task-List). 48 | 49 | ## Performance ## 50 | 51 | See [Performance Metrics](https://github.com/dsnet/compress/wiki/Performance-Metrics). 52 | 53 | 54 | ## Frequently Asked Questions ## 55 | 56 | See [Frequently Asked Questions](https://github.com/dsnet/compress/wiki/Frequently-Asked-Questions). 57 | 58 | 59 | ## Installation ## 60 | 61 | Run the command: 62 | 63 | ```go get -u github.com/dsnet/compress``` 64 | 65 | This library requires `Go1.9` or higher in order to build. 66 | 67 | 68 | ## Packages ## 69 | 70 | | Package | Description | 71 | | :------ | :---------- | 72 | | [brotli](http://godoc.org/github.com/dsnet/compress/brotli) | Package brotli implements the Brotli format, described in RFC 7932. | 73 | | [bzip2](http://godoc.org/github.com/dsnet/compress/bzip2) | Package bzip2 implements the BZip2 compressed data format. | 74 | | [flate](http://godoc.org/github.com/dsnet/compress/flate) | Package flate implements the DEFLATE format, described in RFC 1951. | 75 | | [xflate](http://godoc.org/github.com/dsnet/compress/xflate) | Package xflate implements the XFLATE format, an random-access extension to DEFLATE. | 76 | -------------------------------------------------------------------------------- /internal/prefix/debug.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // +build debug 6 | 7 | package prefix 8 | 9 | import ( 10 | "fmt" 11 | "math" 12 | "strings" 13 | ) 14 | 15 | func max(a, b int) int { 16 | if a > b { 17 | return a 18 | } 19 | return b 20 | } 21 | 22 | func lenBase2(n uint) int { 23 | return int(math.Ceil(math.Log2(float64(n + 1)))) 24 | } 25 | func padBase2(v, n uint, m int) string { 26 | s := fmt.Sprintf("%b", 1< 0 { 28 | return strings.Repeat(" ", pad) + s 29 | } 30 | return s 31 | } 32 | 33 | func lenBase10(n int) int { 34 | return int(math.Ceil(math.Log10(float64(n + 1)))) 35 | } 36 | func padBase10(n, m int) string { 37 | s := fmt.Sprintf("%d", n) 38 | if pad := m - len(s); pad > 0 { 39 | return strings.Repeat(" ", pad) + s 40 | } 41 | return s 42 | } 43 | 44 | func (rc RangeCodes) String() string { 45 | var maxLen, maxBase int 46 | for _, c := range rc { 47 | maxLen = max(maxLen, int(c.Len)) 48 | maxBase = max(maxBase, int(c.Base)) 49 | } 50 | 51 | var ss []string 52 | ss = append(ss, "{") 53 | for i, c := range rc { 54 | base := padBase10(int(c.Base), lenBase10(maxBase)) 55 | if c.Len > 0 { 56 | base += fmt.Sprintf("-%d", c.End()-1) 57 | } 58 | ss = append(ss, fmt.Sprintf("\t%s: {len: %s, range: %s},", 59 | padBase10(int(i), lenBase10(len(rc)-1)), 60 | padBase10(int(c.Len), lenBase10(maxLen)), 61 | base, 62 | )) 63 | } 64 | ss = append(ss, "}") 65 | return strings.Join(ss, "\n") 66 | } 67 | 68 | func (pc PrefixCodes) String() string { 69 | var maxSym, maxLen, maxCnt int 70 | for _, c := range pc { 71 | maxSym = max(maxSym, int(c.Sym)) 72 | maxLen = max(maxLen, int(c.Len)) 73 | maxCnt = max(maxCnt, int(c.Cnt)) 74 | } 75 | 76 | var ss []string 77 | ss = append(ss, "{") 78 | for _, c := range pc { 79 | var cntStr string 80 | if maxCnt > 0 { 81 | cnt := int(32*float32(c.Cnt)/float32(maxCnt) + 0.5) 82 | cntStr = fmt.Sprintf("%s |%s", 83 | padBase10(int(c.Cnt), lenBase10(maxCnt)), 84 | strings.Repeat("#", cnt), 85 | ) 86 | } 87 | ss = append(ss, fmt.Sprintf("\t%s: %s, %s", 88 | padBase10(int(c.Sym), lenBase10(maxSym)), 89 | padBase2(uint(c.Val), uint(c.Len), maxLen), 90 | cntStr, 91 | )) 92 | } 93 | ss = append(ss, "}") 94 | return strings.Join(ss, "\n") 95 | } 96 | 97 | func (pd Decoder) String() string { 98 | var ss []string 99 | ss = append(ss, "{") 100 | if len(pd.chunks) > 0 { 101 | ss = append(ss, "\tchunks: {") 102 | for i, c := range pd.chunks { 103 | label := "sym" 104 | if uint(c&countMask) > uint(pd.chunkBits) { 105 | label = "idx" 106 | } 107 | ss = append(ss, fmt.Sprintf("\t\t%s: {%s: %s, len: %s}", 108 | padBase2(uint(i), uint(pd.chunkBits), int(pd.chunkBits)), 109 | label, padBase10(int(c>>countBits), 3), 110 | padBase10(int(c&countMask), 2), 111 | )) 112 | } 113 | ss = append(ss, "\t},") 114 | 115 | for j, links := range pd.links { 116 | ss = append(ss, fmt.Sprintf("\tlinks[%d]: {", j)) 117 | linkBits := lenBase2(uint(pd.linkMask)) 118 | for i, c := range links { 119 | ss = append(ss, fmt.Sprintf("\t\t%s: {sym: %s, len: %s},", 120 | padBase2(uint(i), uint(linkBits), int(linkBits)), 121 | padBase10(int(c>>countBits), 3), 122 | padBase10(int(c&countMask), 2), 123 | )) 124 | } 125 | ss = append(ss, "\t},") 126 | } 127 | } 128 | ss = append(ss, fmt.Sprintf("\tchunkMask: %b,", pd.chunkMask)) 129 | ss = append(ss, fmt.Sprintf("\tlinkMask: %b,", pd.linkMask)) 130 | ss = append(ss, fmt.Sprintf("\tchunkBits: %d,", pd.chunkBits)) 131 | ss = append(ss, fmt.Sprintf("\tMinBits: %d,", pd.MinBits)) 132 | ss = append(ss, fmt.Sprintf("\tNumSyms: %d,", pd.NumSyms)) 133 | ss = append(ss, "}") 134 | return strings.Join(ss, "\n") 135 | } 136 | 137 | func (pe Encoder) String() string { 138 | var maxLen int 139 | for _, c := range pe.chunks { 140 | maxLen = max(maxLen, int(c&countMask)) 141 | } 142 | 143 | var ss []string 144 | ss = append(ss, "{") 145 | if len(pe.chunks) > 0 { 146 | ss = append(ss, "\tchunks: {") 147 | for i, c := range pe.chunks { 148 | ss = append(ss, fmt.Sprintf("\t\t%s: %s,", 149 | padBase10(i, 3), 150 | padBase2(uint(c>>countBits), uint(c&countMask), maxLen), 151 | )) 152 | } 153 | ss = append(ss, "\t},") 154 | } 155 | ss = append(ss, fmt.Sprintf("\tchunkMask: %b,", pe.chunkMask)) 156 | ss = append(ss, fmt.Sprintf("\tNumSyms: %d,", pe.NumSyms)) 157 | ss = append(ss, "}") 158 | return strings.Join(ss, "\n") 159 | } 160 | -------------------------------------------------------------------------------- /brotli/dict_decoder.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package brotli 6 | 7 | // The dictDecoder implements the LZ77 sliding dictionary that is commonly used 8 | // in various compression formats. For performance reasons, this implementation 9 | // performs little to no sanity checks about the arguments. As such, the 10 | // invariants documented for each method call must be respected. Furthermore, 11 | // to reduce the memory footprint decompressing short streams, the dictionary 12 | // starts with a relatively small size and then lazily grows. 13 | 14 | const ( 15 | initSize = 4096 // Initial size allocated for sliding dictionary 16 | growFactor = 4 // Rate the dictionary is grown to match expected size 17 | ) 18 | 19 | type dictDecoder struct { 20 | // Invariant: len(hist) <= size 21 | size int // Sliding window size 22 | hist []byte // Sliding window history, dynamically grown to match size 23 | 24 | // Invariant: 0 <= rdPos <= wrPos <= len(hist) 25 | wrPos int // Current output position in buffer 26 | rdPos int // Have emitted hist[:rdPos] already 27 | full bool // Has a full window length been written yet? 28 | } 29 | 30 | func (dd *dictDecoder) Init(size int) { 31 | *dd = dictDecoder{hist: dd.hist} 32 | 33 | // Regardless of what size claims, start with a small dictionary to avoid 34 | // denial-of-service attacks with large memory allocation. 35 | dd.size = size 36 | if dd.hist == nil { 37 | dd.hist = make([]byte, initSize) 38 | } 39 | dd.hist = dd.hist[:cap(dd.hist)] 40 | if len(dd.hist) > dd.size { 41 | dd.hist = dd.hist[:dd.size] 42 | } 43 | for i := range dd.hist { 44 | dd.hist[i] = 0 // Zero out history to make LastBytes logic easier 45 | } 46 | } 47 | 48 | // HistSize reports the total amount of historical data in the dictionary. 49 | func (dd *dictDecoder) HistSize() int { 50 | if dd.full { 51 | return dd.size 52 | } 53 | return dd.wrPos 54 | } 55 | 56 | // AvailSize reports the available amount of output buffer space. 57 | func (dd *dictDecoder) AvailSize() int { 58 | return len(dd.hist) - dd.wrPos 59 | } 60 | 61 | // WriteSlice returns a slice of the available buffer to write data to. 62 | // 63 | // This invariant will be kept: len(s) <= AvailSize() 64 | func (dd *dictDecoder) WriteSlice() []byte { 65 | return dd.hist[dd.wrPos:] 66 | } 67 | 68 | // WriteMark advances the writer pointer by cnt. 69 | // 70 | // This invariant must be kept: 0 <= cnt <= AvailSize() 71 | func (dd *dictDecoder) WriteMark(cnt int) { 72 | dd.wrPos += cnt 73 | } 74 | 75 | // WriteCopy copies a string at a given (distance, length) to the output. 76 | // This returns the number of bytes copied and may be less than the requested 77 | // length if the available space in the output buffer is too small. 78 | // 79 | // This invariant must be kept: 0 < dist <= HistSize() 80 | func (dd *dictDecoder) WriteCopy(dist, length int) int { 81 | wrBase := dd.wrPos 82 | wrEnd := dd.wrPos + length 83 | if wrEnd > len(dd.hist) { 84 | wrEnd = len(dd.hist) 85 | } 86 | 87 | // Copy non-overlapping section after destination. 88 | rdPos := dd.wrPos - dist 89 | if rdPos < 0 { 90 | rdPos += len(dd.hist) 91 | dd.wrPos += copy(dd.hist[dd.wrPos:wrEnd], dd.hist[rdPos:]) 92 | rdPos = 0 93 | } 94 | 95 | // Copy overlapping section before destination. 96 | for dd.wrPos < wrEnd { 97 | dd.wrPos += copy(dd.hist[dd.wrPos:wrEnd], dd.hist[rdPos:dd.wrPos]) 98 | } 99 | return dd.wrPos - wrBase 100 | } 101 | 102 | // ReadFlush returns a slice of the historical buffer that is ready to be 103 | // emitted to the user. A call to ReadFlush is only valid after all of the data 104 | // from a previous call to ReadFlush has been consumed. 105 | func (dd *dictDecoder) ReadFlush() []byte { 106 | toRead := dd.hist[dd.rdPos:dd.wrPos] 107 | dd.rdPos = dd.wrPos 108 | if dd.wrPos == len(dd.hist) { 109 | if len(dd.hist) == dd.size { 110 | dd.wrPos, dd.rdPos = 0, 0 111 | dd.full = true 112 | } else { 113 | // Allocate a larger history buffer. 114 | size := cap(dd.hist) * growFactor 115 | if size > dd.size { 116 | size = dd.size 117 | } 118 | hist := make([]byte, size) 119 | copy(hist, dd.hist) 120 | dd.hist = hist 121 | } 122 | } 123 | return toRead 124 | } 125 | 126 | // LastBytes reports the last 2 bytes in the dictionary. If they do not exist, 127 | // then zero values are returned. 128 | func (dd *dictDecoder) LastBytes() (p1, p2 byte) { 129 | if dd.wrPos > 1 { 130 | return dd.hist[dd.wrPos-1], dd.hist[dd.wrPos-2] 131 | } else if dd.wrPos > 0 { 132 | return dd.hist[dd.wrPos-1], dd.hist[len(dd.hist)-1] 133 | } else { 134 | return dd.hist[len(dd.hist)-1], dd.hist[len(dd.hist)-2] 135 | } 136 | } 137 | -------------------------------------------------------------------------------- /xflate/index.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package xflate 6 | 7 | const ( 8 | unknownType = iota 9 | deflateType 10 | indexType 11 | footerType 12 | ) 13 | 14 | type index struct { 15 | // Records is a list of records that indicate the location of all chunks 16 | // in the stream. However, rather than recording the starting offset of 17 | // each chunk, only the ending offsets are recorded. 18 | // 19 | // The starting record {0, 0} is not included since it is implied. 20 | // The last record effectively holds the total size of the stream. 21 | Records []record 22 | 23 | BackSize int64 // Size of previous index when encoded 24 | IndexSize int64 // Size of this index when encoded 25 | } 26 | 27 | type record struct { 28 | CompOffset int64 // Offset in compressed stream where decompression can start from 29 | RawOffset int64 // The uncompressed offset that CompOffset is associated with 30 | Type int // Type of the record 31 | } 32 | 33 | // Reset resets the index. 34 | func (idx *index) Reset() { 35 | *idx = index{Records: idx.Records[:0]} 36 | } 37 | 38 | // AppendRecord appends a new record to the end of the index and reports whether 39 | // the operation was successful or not. 40 | func (idx *index) AppendRecord(compSize, rawSize int64, typ int) bool { 41 | if rawSize < 0 || compSize < 0 { 42 | return false // Invalid size 43 | } 44 | 45 | lastRec := idx.LastRecord() 46 | rec := record{ 47 | CompOffset: lastRec.CompOffset + compSize, 48 | RawOffset: lastRec.RawOffset + rawSize, 49 | Type: typ, 50 | } 51 | if rec.CompOffset < lastRec.CompOffset || rec.RawOffset < lastRec.RawOffset { 52 | return false // Overflow detected 53 | } 54 | idx.Records = append(idx.Records, rec) 55 | return true 56 | } 57 | 58 | // AppendIndex appends the contents of another index onto the current receiver 59 | // and reports whether the operation was successful or not. 60 | func (idx *index) AppendIndex(other *index) bool { 61 | var preRec record 62 | for i, rec := range other.Records { 63 | csize, rsize := rec.CompOffset-preRec.CompOffset, rec.RawOffset-preRec.RawOffset 64 | if !idx.AppendRecord(csize, rsize, rec.Type) { 65 | idx.Records = idx.Records[:len(idx.Records)-i] // Ensure atomic append 66 | return false 67 | } 68 | preRec = rec 69 | } 70 | return true 71 | } 72 | 73 | // Search searches for the record that best matches the raw offset given. 74 | // This search will return the location of the record with the lowest 75 | // RawOffset that is still greater than the given offset. 76 | // It return -1 if such a record does not exist. 77 | // 78 | // This method is intended to be used in conjunction with GetRecords, 79 | // which returns a pair of records (prev, curr). 80 | // With these records, the following can be computed: 81 | // 82 | // // Where in the underlying reader the decompressor should start from. 83 | // compOffset := prev.CompOffset 84 | // 85 | // // The total number of uncompressed bytes to discard to reach offset. 86 | // rawDiscard := offset - prev.RawOffset 87 | // 88 | // // The total compressed size of the current block. 89 | // compSize := curr.CompOffset - prev.CompOffset 90 | // 91 | // // The total uncompressed size of the current block. 92 | // rawSize := curr.RawOffset - prev.RawOffset 93 | // 94 | func (idx *index) Search(offset int64) int { 95 | recs := idx.Records 96 | i, imin, imax := -1, 0, len(recs)-1 97 | for imax >= imin && i == -1 { 98 | imid := (imin + imax) / 2 99 | gteCurr := bool(offset >= recs[imid].RawOffset) 100 | ltNext := bool(imid+1 >= len(recs) || offset < recs[imid+1].RawOffset) 101 | if gteCurr && ltNext { 102 | i = imid 103 | } else if gteCurr { 104 | imin = imid + 1 105 | } else { 106 | imax = imid - 1 107 | } 108 | } 109 | return i + 1 110 | } 111 | 112 | // GetRecords returns the previous and current records at the given position. 113 | // This method will automatically bind the search position within the bounds 114 | // of the index. Thus, this will return zero value records if the position is 115 | // too low, and the last record if the value is too high. 116 | func (idx *index) GetRecords(i int) (prev, curr record) { 117 | recs := idx.Records 118 | if i > len(recs) { 119 | i = len(recs) 120 | } 121 | if i-1 >= 0 && i-1 < len(recs) { 122 | prev = recs[i-1] 123 | } 124 | if i >= 0 && i < len(recs) { 125 | curr = recs[i] 126 | } else { 127 | curr = prev 128 | curr.Type = unknownType 129 | } 130 | return prev, curr 131 | } 132 | 133 | // LastRecord returns the last record if it exists, otherwise the zero value. 134 | func (idx *index) LastRecord() record { 135 | var rec record 136 | if len(idx.Records) > 0 { 137 | rec = idx.Records[len(idx.Records)-1] 138 | } 139 | return rec 140 | } 141 | -------------------------------------------------------------------------------- /internal/prefix/decoder.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package prefix 6 | 7 | import ( 8 | "sort" 9 | 10 | "github.com/dsnet/compress/internal" 11 | ) 12 | 13 | // The algorithm used to decode variable length codes is based on the lookup 14 | // method in zlib. If the code is less-than-or-equal to maxChunkBits, 15 | // then the symbol can be decoded using a single lookup into the chunks table. 16 | // Otherwise, the links table will be used for a second level lookup. 17 | // 18 | // The chunks slice is keyed by the contents of the bit buffer ANDed with 19 | // the chunkMask to avoid a out-of-bounds lookup. The value of chunks is a tuple 20 | // that is decoded as follow: 21 | // 22 | // var length = chunks[bitBuffer&chunkMask] & countMask 23 | // var symbol = chunks[bitBuffer&chunkMask] >> countBits 24 | // 25 | // If the decoded length is larger than chunkBits, then an overflow link table 26 | // must be used for further decoding. In this case, the symbol is actually the 27 | // index into the links tables. The second-level links table returned is 28 | // processed in the same way as the chunks table. 29 | // 30 | // if length > chunkBits { 31 | // var index = symbol // Previous symbol is index into links tables 32 | // length = links[index][bitBuffer>>chunkBits & linkMask] & countMask 33 | // symbol = links[index][bitBuffer>>chunkBits & linkMask] >> countBits 34 | // } 35 | // 36 | // See the following: 37 | // http://www.gzip.org/algorithm.txt 38 | 39 | type Decoder struct { 40 | chunks []uint32 // First-level lookup map 41 | links [][]uint32 // Second-level lookup map 42 | chunkMask uint32 // Mask the length of the chunks table 43 | linkMask uint32 // Mask the length of the link table 44 | chunkBits uint32 // Bit-length of the chunks table 45 | 46 | MinBits uint32 // The minimum number of bits to safely make progress 47 | NumSyms uint32 // Number of symbols 48 | } 49 | 50 | // Init initializes Decoder according to the codes provided. 51 | func (pd *Decoder) Init(codes PrefixCodes) { 52 | // Handle special case trees. 53 | if len(codes) <= 1 { 54 | switch { 55 | case len(codes) == 0: // Empty tree (should error if used later) 56 | *pd = Decoder{chunks: pd.chunks[:0], links: pd.links[:0], NumSyms: 0} 57 | case len(codes) == 1 && codes[0].Len == 0: // Single code tree (bit-length of zero) 58 | pd.chunks = append(pd.chunks[:0], codes[0].Sym< c.Len { 75 | minBits = c.Len 76 | } 77 | if maxBits < c.Len { 78 | maxBits = c.Len 79 | } 80 | } 81 | 82 | // Allocate chunks table as needed. 83 | const maxChunkBits = 9 // This can be tuned for better performance 84 | pd.NumSyms = uint32(len(codes)) 85 | pd.MinBits = minBits 86 | pd.chunkBits = maxBits 87 | if pd.chunkBits > maxChunkBits { 88 | pd.chunkBits = maxChunkBits 89 | } 90 | numChunks := 1 << pd.chunkBits 91 | pd.chunks = allocUint32s(pd.chunks, numChunks) 92 | pd.chunkMask = uint32(numChunks - 1) 93 | 94 | // Allocate links tables as needed. 95 | pd.links = pd.links[:0] 96 | pd.linkMask = 0 97 | if pd.chunkBits < maxBits { 98 | numLinks := 1 << (maxBits - pd.chunkBits) 99 | pd.linkMask = uint32(numLinks - 1) 100 | 101 | var linkIdx uint32 102 | for i := range pd.chunks { 103 | pd.chunks[i] = 0 // Logic below relies on zero value as uninitialized 104 | } 105 | for _, c := range codes { 106 | if c.Len > pd.chunkBits && pd.chunks[c.Val&pd.chunkMask] == 0 { 107 | pd.chunks[c.Val&pd.chunkMask] = (linkIdx << countBits) | (pd.chunkBits + 1) 108 | linkIdx++ 109 | } 110 | } 111 | 112 | pd.links = extendSliceUint32s(pd.links, int(linkIdx)) 113 | linksFlat := allocUint32s(pd.links[0], numLinks*int(linkIdx)) 114 | for i, j := 0, 0; i < len(pd.links); i, j = i+1, j+numLinks { 115 | pd.links[i] = linksFlat[j : j+numLinks] 116 | } 117 | } 118 | 119 | // Fill out chunks and links tables with values. 120 | for _, c := range codes { 121 | chunk := c.Sym<> countBits 129 | links := pd.links[linkIdx] 130 | skip := 1 << uint(c.Len-pd.chunkBits) 131 | for j := int(c.Val >> pd.chunkBits); j < len(links); j += skip { 132 | links[j] = chunk 133 | } 134 | } 135 | } 136 | } 137 | -------------------------------------------------------------------------------- /brotli/context.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package brotli 6 | 7 | // These constants are defined in RFC section 7.1. 8 | const ( 9 | contextLSB6 = iota 10 | contextMSB6 11 | contextUTF8 12 | contextSigned 13 | 14 | numContextModes 15 | ) 16 | 17 | // These constants are defined in RFC sections 2 and 7.3. 18 | const ( 19 | maxLitContextIDs = 64 20 | maxDistContextIDs = 4 21 | ) 22 | 23 | // These LUTs are taken directly from RFC section 7.1. 24 | var ( 25 | contextLUT0 = [256]uint8{ 26 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 4, 0, 0, 27 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28 | 8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12, 29 | 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12, 30 | 12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48, 31 | 52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12, 32 | 12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56, 33 | 60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12, 0, 34 | 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 35 | 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 36 | 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 37 | 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 38 | 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 39 | 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 40 | 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 41 | 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 42 | } 43 | 44 | contextLUT1 = [256]uint8{ 45 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 46 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 47 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 48 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 49 | 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 50 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 51 | 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 52 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0, 53 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 54 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 56 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 57 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 58 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 59 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 60 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 61 | } 62 | 63 | contextLUT2 = [256]uint8{ 64 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 65 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 66 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 67 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 68 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 69 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 70 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 71 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 72 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 73 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 74 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 75 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 76 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 77 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 78 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 79 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 80 | } 81 | ) 82 | 83 | // These LUTs are dynamically computed from the LUTs in the specification. 84 | var ( 85 | contextP1LUT [256 * numContextModes]uint8 86 | contextP2LUT [256 * numContextModes]uint8 87 | ) 88 | 89 | // initContextLUTs computes LUTs so that context ID computation can be 90 | // efficiently without any branches. 91 | func initContextLUTs() { 92 | for i := 0; i < 256; i++ { 93 | for m := 0; m < numContextModes; m++ { 94 | base := m << 8 95 | 96 | // Operations performed here are specified in RFC section 7.1. 97 | switch m { 98 | case contextLSB6: 99 | contextP1LUT[base+i] = byte(i) & 0x3f 100 | contextP2LUT[base+i] = 0 101 | case contextMSB6: 102 | contextP1LUT[base+i] = byte(i) >> 2 103 | contextP2LUT[base+i] = 0 104 | case contextUTF8: 105 | contextP1LUT[base+i] = contextLUT0[byte(i)] 106 | contextP2LUT[base+i] = contextLUT1[byte(i)] 107 | case contextSigned: 108 | contextP1LUT[base+i] = contextLUT2[byte(i)] << 3 109 | contextP2LUT[base+i] = contextLUT2[byte(i)] 110 | default: 111 | panic("unknown context mode") 112 | } 113 | } 114 | } 115 | } 116 | 117 | // getLitContextID computes the context ID for literals from RFC section 7.1. 118 | // Bytes p1 and p2 are the last and second-to-last byte, respectively. 119 | func getLitContextID(p1, p2 byte, mode uint8) uint8 { 120 | base := uint(mode) << 8 121 | return contextP1LUT[base+uint(p1)] | contextP2LUT[base+uint(p2)] 122 | } 123 | 124 | // getDistContextID computes the context ID for distances using the copy length 125 | // as specified in RFC section 7.2. 126 | func getDistContextID(l int) uint8 { 127 | if l > 4 { 128 | return 3 129 | } 130 | return uint8(l - 2) 131 | } 132 | -------------------------------------------------------------------------------- /bzip2/bwt_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package bzip2 6 | 7 | import ( 8 | "testing" 9 | 10 | "github.com/dsnet/compress/internal/testutil" 11 | ) 12 | 13 | func TestBurrowsWheelerTransform(t *testing.T) { 14 | vectors := []struct { 15 | input []byte // The input test string 16 | output []byte // Expected output string after BWT 17 | ptr int // The BWT origin pointer 18 | }{{ 19 | input: []byte(""), 20 | output: []byte(""), 21 | ptr: -1, 22 | }, { 23 | input: []byte("Hello, world!"), 24 | output: []byte(",do!lHrellwo "), 25 | ptr: 3, 26 | }, { 27 | input: []byte("SIX.MIXED.PIXIES.SIFT.SIXTY.PIXIE.DUST.BOXES"), 28 | output: []byte("TEXYDST.E.IXIXIXXSSMPPS.B..E.S.EUSFXDIIOIIIT"), 29 | ptr: 29, 30 | }, { 31 | input: []byte("0123456789"), 32 | output: []byte("9012345678"), 33 | ptr: 0, 34 | }, { 35 | input: []byte("9876543210"), 36 | output: []byte("1234567890"), 37 | ptr: 9, 38 | }, { 39 | input: []byte("The quick brown fox jumped over the lazy dog."), 40 | output: []byte("kynxederg.l ie hhpv otTu c uwd rfm eb qjoooza"), 41 | ptr: 9, 42 | }, { 43 | input: []byte("" + 44 | "Mary had a little lamb, its fleece was white as snow" + 45 | "Mary had a little lamb, its fleece was white as snow" + 46 | "Mary had a little lamb, its fleece was white as snow" + 47 | "Mary had a little lamb, its fleece was white as snow" + 48 | "Mary had a little lamb, its fleece was white as snow" + 49 | "Mary had a little lamb, its fleece was white as snow" + 50 | "Mary had a little lamb, its fleece was white as snow" + 51 | "Mary had a little lamb, its fleece was white as snow" + 52 | "Nary had a little lamb, its fleece was white as snow"), 53 | output: []byte("" + 54 | "dddddddddeeeeeeeeesssssssssyyyyyyyyy,,,,,,,,,eeeeeee" + 55 | "eeaaaaaaaaassssssssseeeeeeeeesssssssssbbbbbbbbbwwwww" + 56 | "wwww hhhhhhhhhlllllllllNMMMMMMMM www" + 57 | "wwwwwwmmmmmmmmmeeeeeeeeeaaaaaaaaatttttttttlllllllllc" + 58 | "cccccccceeeeeeeeelllllllll wwwwwwww" + 59 | "whhhhhhhhh lllllllll tttttttttffffff" + 60 | "fff aaaaaaaaasssssssssnnnnnnnnnaaaaaaaaatttt" + 61 | "tttttaaaaaaaaaaaaaaaaaa iiiiiiiiitttttttttii" + 62 | "iiiiiiiiiiiiiiiiooooooooo rrrrrrrrr"), 63 | ptr: 99, 64 | }, { 65 | input: []byte("" + 66 | "AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTCTCTGAC" + 67 | "AGCAGCTTCTGAACTGGTTACCTGCCGTGAGTAAATTAAAATTTTATTGACTTAGGTCACTAAA" + 68 | "TACTTTAACCAATATAGGCATAGCGCACAGACAGATAAAAATTACAGAGTACACAACATCCATG" + 69 | "AAACGCATTAGCACCACCATTACCACCACCATCACCACCACCATCACCATTACCATTACCACAG" + 70 | "GTAACGGTGCGGGCTGACGCGTACAGGAAACACAGAAAAAAGCCCGCACCTGACAGTGCGGGCT" + 71 | "TTTTTTTCGACCAAAGGTAACGAGGTAACAACCATGCGAGTGTTGAAGTTCGGCGGTACATCAG" + 72 | "TGGCAAATGCAGAACGTTTTCTGCGGGTTGCCGATATTCTGGAAAGCAATGCCAGGCAGGGGCA"), 73 | output: []byte("" + 74 | "TAGAATAAATGGAGACTCTAATACTCTACTGGAAACAGACCACAAACATACCTGGTCGTAGATT" + 75 | "CCCCCCATCCCTAAGAAACGAGTCCCCACATCATCACCTCGACTGGGCCGAGACTAAGCCCCCA" + 76 | "ACTGAACCCCCTTACGAAGGCGGAAGCTCCGCCCTGTAGAAAAGACGAATGCCAACCCCCGTAA" + 77 | "AAAAAAGAATAAAAGGCGAATAGCGCAATAGGGGAGCAATTTTCGTACTTATAGAGGAGTGATT" + 78 | "ATTCTTTCTAACACGGTGGACACTAGGCTATTTATTTGCGAAGATTTGGAACGGGCCCACAAAC" + 79 | "ACTGAGGGACGGATCGATATAGATGCTATCGGTGGGTGGTTTTATAATAAATAAGATATTGGTC" + 80 | "TTTCACTCCCCTGCAATCAGGCCGGCAGCGAATAAAAGACTTTGCATAGAGCTTTTACTGTTTC"), 81 | ptr: 99, 82 | }, { 83 | input: testutil.MustLoadFile("testdata/gauntlet_test3.bin"), 84 | output: testutil.MustLoadFile("testdata/gauntlet_test3.bwt"), 85 | ptr: 0, 86 | }, { 87 | input: testutil.MustLoadFile("testdata/silesia_ooffice.bin"), 88 | output: testutil.MustLoadFile("testdata/silesia_ooffice.bwt"), 89 | ptr: 461, 90 | }, { 91 | input: testutil.MustLoadFile("testdata/silesia_xray.bin"), 92 | output: testutil.MustLoadFile("testdata/silesia_xray.bwt"), 93 | ptr: 1532, 94 | }, { 95 | input: testutil.MustLoadFile("testdata/testfiles_test3.bin"), 96 | output: testutil.MustLoadFile("testdata/testfiles_test3.bwt"), 97 | ptr: 0, 98 | }, { 99 | input: testutil.MustLoadFile("testdata/testfiles_test4.bin"), 100 | output: testutil.MustLoadFile("testdata/testfiles_test4.bwt"), 101 | ptr: 1026, 102 | }} 103 | 104 | bwt := new(burrowsWheelerTransform) 105 | for i, v := range vectors { 106 | output := append([]byte(nil), v.input...) 107 | ptr := bwt.Encode(output) 108 | input := append([]byte(nil), v.output...) 109 | bwt.Decode(input, ptr) 110 | 111 | if got, want, ok := testutil.BytesCompare(input, v.input); !ok { 112 | t.Errorf("test %d, input mismatch:\ngot %s\nwant %s", i, got, want) 113 | } 114 | if got, want, ok := testutil.BytesCompare(output, v.output); !ok { 115 | t.Errorf("test %d, output mismatch:\ngot %s\nwant %s", i, got, want) 116 | } 117 | if ptr != v.ptr { 118 | t.Errorf("test %d, pointer mismatch: got %d, want %d", i, ptr, v.ptr) 119 | } 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /internal/testutil/util.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // Package testutil is a collection of testing helper methods. 6 | package testutil 7 | 8 | import ( 9 | "bytes" 10 | "encoding/hex" 11 | "fmt" 12 | "io" 13 | "io/ioutil" 14 | "strings" 15 | ) 16 | 17 | // ResizeData resizes the input. If n < 0, then the original input will be 18 | // returned as is. If n <= len(input), then the input slice will be truncated. 19 | // However, if n > len(input), then the input will be replicated to fill in 20 | // the missing bytes, but each replicated string will be XORed by some byte 21 | // mask to avoid favoring algorithms with large LZ77 windows. 22 | // 23 | // If n > len(input), then len(input) must be > 0. 24 | func ResizeData(input []byte, n int) []byte { 25 | if n < 0 { 26 | return input 27 | } 28 | if len(input) >= n { 29 | return input[:n] 30 | } 31 | if len(input) == 0 { 32 | panic("unable to replicate an empty string") 33 | } 34 | 35 | var mask byte 36 | output := make([]byte, n) 37 | for i := range output { 38 | idx := i % len(input) 39 | output[i] = input[idx] ^ mask 40 | if idx == len(input)-1 { 41 | mask++ 42 | } 43 | } 44 | return output 45 | } 46 | 47 | // MustLoadFile must load a file or else panics. 48 | func MustLoadFile(file string) []byte { 49 | b, err := ioutil.ReadFile(file) 50 | if err != nil { 51 | panic(err) 52 | } 53 | return b 54 | } 55 | 56 | // MustDecodeHex must decode a hexadecimal string or else panics. 57 | func MustDecodeHex(s string) []byte { 58 | b, err := hex.DecodeString(s) 59 | if err != nil { 60 | panic(err) 61 | } 62 | return b 63 | } 64 | 65 | // MustDecodeBitGen must decode a BitGen formatted string or else panics. 66 | func MustDecodeBitGen(s string) []byte { 67 | b, err := DecodeBitGen(s) 68 | if err != nil { 69 | panic(err) 70 | } 71 | return b 72 | } 73 | 74 | // BytesCompare compares inputs a and b and reports whether they are equal. 75 | // 76 | // If they are not equal, it returns two one-line strings that are 77 | // representative of the differences between the two strings. 78 | // The output will be quoted strings if it seems like the data is text, 79 | // otherwise, it will use hexadecimal strings. 80 | // 81 | // Example usage: 82 | // 83 | // if got, want, ok := testutil.BytesCompare(output, v.output); !ok { 84 | // t.Errorf("output mismatch:\ngot %s\nwant %s", got, want) 85 | // } 86 | // 87 | func BytesCompare(a, b []byte) (sa, sb string, ok bool) { 88 | if ok = bytes.Equal(a, b); ok { 89 | return 90 | } 91 | 92 | commonPrefix := func(a, b []byte) int { 93 | if len(a) > len(b) { 94 | a, b = b, a 95 | } 96 | for i := range a { 97 | if a[i] != b[i] { 98 | return i 99 | } 100 | } 101 | return len(a) 102 | } 103 | 104 | formatter := func(a, b []byte, format string, trimHead, maxLen int) (sa, sb string) { 105 | trimHead -= maxLen / 2 // Always provide context of equal bytes 106 | if trimHead < 0 { 107 | trimHead = 0 108 | } 109 | if trimHead > (len(a) - maxLen) { 110 | trimHead = (len(a) - maxLen) 111 | } 112 | if trimHead > (len(b) - maxLen) { 113 | trimHead = (len(b) - maxLen) 114 | } 115 | 116 | var head, atail, btail string 117 | if trimHead > 0 { 118 | a = a[trimHead:] 119 | b = b[trimHead:] 120 | head = fmt.Sprintf("(%d bytes)...", trimHead) 121 | } 122 | if len(a) > maxLen { 123 | atail = fmt.Sprintf("...(%d bytes)", len(a)-maxLen) 124 | a = a[:maxLen] 125 | } 126 | if len(b) > maxLen { 127 | btail = fmt.Sprintf("...(%d bytes)", len(b)-maxLen) 128 | b = b[:maxLen] 129 | } 130 | sa = fmt.Sprintf("%s"+format+"%s", head, a, atail) 131 | sb = fmt.Sprintf("%s"+format+"%s", head, b, btail) 132 | return sa, sb 133 | } 134 | 135 | const maxLen = 64 136 | n := commonPrefix(a, b) 137 | sa, sb = formatter(a, b, "%q", n, maxLen) // Favor quoted output, first 138 | if s := sa + sb; strings.Count(s, `\u`)+strings.Count(s, `\x`) > maxLen/8 { 139 | sa, sb = formatter(a, b, "%x", n, maxLen/2) // Fallback to hex, next 140 | } 141 | return sa, sb, false 142 | } 143 | 144 | // BuggyReader returns Err after N bytes have been read from R. 145 | type BuggyReader struct { 146 | R io.Reader 147 | N int64 // Number of valid bytes to read 148 | Err error // Return this error after N bytes 149 | } 150 | 151 | func (br *BuggyReader) Read(buf []byte) (int, error) { 152 | if int64(len(buf)) > br.N { 153 | buf = buf[:br.N] 154 | } 155 | n, err := br.R.Read(buf) 156 | br.N -= int64(n) 157 | if err == nil && br.N <= 0 { 158 | return n, br.Err 159 | } 160 | return n, err 161 | } 162 | 163 | // BuggyWriter returns Err after N bytes have been written to W. 164 | type BuggyWriter struct { 165 | W io.Writer 166 | N int64 // Number of valid bytes to write 167 | Err error // Return this error after N bytes 168 | } 169 | 170 | func (bw *BuggyWriter) Write(buf []byte) (int, error) { 171 | if int64(len(buf)) > bw.N { 172 | buf = buf[:bw.N] 173 | } 174 | n, err := bw.W.Write(buf) 175 | bw.N -= int64(n) 176 | if err == nil && bw.N <= 0 { 177 | return n, bw.Err 178 | } 179 | return n, err 180 | } 181 | -------------------------------------------------------------------------------- /bzip2/bzip2_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package bzip2 6 | 7 | import ( 8 | "bytes" 9 | "errors" 10 | "flag" 11 | "io" 12 | "os/exec" 13 | "strings" 14 | "testing" 15 | 16 | "github.com/dsnet/compress/internal/testutil" 17 | ) 18 | 19 | var zcheck = flag.Bool("zcheck", false, "verify test vectors with C bzip2 library") 20 | 21 | func cmdCompress(input []byte) ([]byte, error) { return cmdExec(input, "-z") } 22 | func cmdDecompress(input []byte) ([]byte, error) { return cmdExec(input, "-d") } 23 | 24 | // cmdExec executes the bzip2 tool, passing the input in as stdin. 25 | // It returns the stdout and an error. 26 | func cmdExec(input []byte, args ...string) ([]byte, error) { 27 | var bo, be bytes.Buffer 28 | cmd := exec.Command("bzip2", args...) 29 | cmd.Stdin = bytes.NewReader(input) 30 | cmd.Stdout = &bo 31 | cmd.Stderr = &be 32 | err := cmd.Run() 33 | ss := strings.Split(strings.TrimSpace(be.String()), "\n") 34 | if len(ss) > 0 && ss[len(ss)-1] != "" { 35 | // Assume any stderr indicates an error and last line is the message. 36 | return nil, errors.New(ss[len(ss)-1]) 37 | } 38 | return bo.Bytes(), err 39 | } 40 | 41 | var testdata = []struct { 42 | name string 43 | data []byte 44 | ratio float64 // The minimum expected ratio (uncompressed / compressed) 45 | }{ 46 | {"Nil", nil, 0}, 47 | {"Binary", testutil.MustLoadFile("../testdata/binary.bin"), 5.68}, 48 | {"Digits", testutil.MustLoadFile("../testdata/digits.txt"), 2.22}, 49 | {"Huffman", testutil.MustLoadFile("../testdata/huffman.txt"), 1.24}, 50 | {"Random", testutil.MustLoadFile("../testdata/random.bin"), 0.98}, 51 | {"Repeats", testutil.MustLoadFile("../testdata/repeats.bin"), 3.93}, 52 | {"Twain", testutil.MustLoadFile("../testdata/twain.txt"), 2.99}, 53 | {"Zeros", testutil.MustLoadFile("../testdata/zeros.bin"), 5825.0}, 54 | } 55 | 56 | var levels = []struct { 57 | name string 58 | level int 59 | }{ 60 | {"Speed", BestSpeed}, 61 | {"Default", DefaultCompression}, 62 | {"Compression", BestCompression}, 63 | } 64 | 65 | var sizes = []struct { 66 | name string 67 | size int 68 | }{ 69 | {"1e4", 1e4}, 70 | {"1e5", 1e5}, 71 | {"1e6", 1e6}, 72 | } 73 | 74 | func TestRoundTrip(t *testing.T) { 75 | for _, v := range testdata { 76 | v := v 77 | t.Run(v.name, func(t *testing.T) { 78 | t.Parallel() 79 | 80 | var buf1, buf2 bytes.Buffer 81 | 82 | // Compress the input. 83 | wr, err := NewWriter(&buf1, nil) 84 | if err != nil { 85 | t.Errorf("NewWriter() = (_, %v), want (_, nil)", err) 86 | } 87 | n, err := io.Copy(wr, bytes.NewReader(v.data)) 88 | if n != int64(len(v.data)) || err != nil { 89 | t.Errorf("Copy() = (%d, %v), want (%d, nil)", n, err, len(v.data)) 90 | } 91 | if err := wr.Close(); err != nil { 92 | t.Errorf("Close() = %v, want nil", err) 93 | } 94 | 95 | // Verify that the compression ratio is within expected bounds. 96 | ratio := float64(len(v.data)) / float64(buf1.Len()) 97 | if ratio < v.ratio { 98 | t.Errorf("poor compression ratio: %0.2f < %0.2f", ratio, v.ratio) 99 | } 100 | 101 | // Verify that the C library can decompress the output of Writer and 102 | // that the Reader can decompress the output of the C library. 103 | if *zcheck { 104 | zd, err := cmdDecompress(buf1.Bytes()) 105 | if err != nil { 106 | t.Errorf("unexpected cmdDecompress error: %v", err) 107 | } 108 | if got, want, ok := testutil.BytesCompare(zd, v.data); !ok { 109 | t.Errorf("output data mismatch:\ngot %s\nwant %s", got, want) 110 | } 111 | zc, err := cmdCompress(v.data) 112 | if err != nil { 113 | t.Errorf("unexpected cmdCompress error: %v", err) 114 | } 115 | zratio := float64(len(v.data)) / float64(len(zc)) 116 | if ratio < 0.9*zratio { 117 | t.Errorf("poor compression ratio: %0.2f < %0.2f", ratio, 0.9*zratio) 118 | } 119 | buf1.Reset() 120 | buf1.Write(zc) // Use output of C library for Reader test 121 | } 122 | 123 | // Decompress the output. 124 | rd, err := NewReader(&buf1, nil) 125 | if err != nil { 126 | t.Errorf("NewReader() = (_, %v), want (_, nil)", err) 127 | } 128 | n, err = io.Copy(&buf2, rd) 129 | if n != int64(len(v.data)) || err != nil { 130 | t.Errorf("Copy() = (%d, %v), want (%d, nil)", n, err, len(v.data)) 131 | } 132 | if err := rd.Close(); err != nil { 133 | t.Errorf("Close() = %v, want nil", err) 134 | } 135 | if got, want, ok := testutil.BytesCompare(buf2.Bytes(), v.data); !ok { 136 | t.Errorf("output data mismatch:\ngot %s\nwant %s", got, want) 137 | } 138 | }) 139 | } 140 | } 141 | 142 | func runBenchmarks(b *testing.B, f func(b *testing.B, buf []byte, lvl int)) { 143 | for _, td := range testdata { 144 | if len(td.data) == 0 { 145 | continue 146 | } 147 | if testing.Short() && !(td.name == "Twain" || td.name == "Digits") { 148 | continue 149 | } 150 | for _, tl := range levels { 151 | for _, ts := range sizes { 152 | buf := testutil.ResizeData(td.data, ts.size) 153 | b.Run(td.name+"/"+tl.name+"/"+ts.name, func(b *testing.B) { 154 | f(b, buf, tl.level) 155 | }) 156 | } 157 | } 158 | } 159 | } 160 | -------------------------------------------------------------------------------- /flate/dict_decoder.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package flate 6 | 7 | // The dictDecoder implements the LZ77 sliding dictionary that is commonly used 8 | // in various compression formats. For performance reasons, this implementation 9 | // performs little to no sanity checks about the arguments. As such, the 10 | // invariants documented for each method call must be respected. Furthermore, 11 | // to reduce the memory footprint decompressing short streams, the dictionary 12 | // starts with a relatively small size and then lazily grows. 13 | 14 | const ( 15 | initSize = 4096 // Initial size allocated for sliding dictionary 16 | growFactor = 4 // Rate the dictionary is grown to match expected size 17 | ) 18 | 19 | type dictDecoder struct { 20 | // Invariant: len(hist) <= size 21 | size int // Sliding window size 22 | hist []byte // Sliding window history, dynamically grown to match size 23 | 24 | // Invariant: 0 <= rdPos <= wrPos <= len(hist) 25 | wrPos int // Current output position in buffer 26 | rdPos int // Have emitted hist[:rdPos] already 27 | full bool // Has a full window length been written yet? 28 | } 29 | 30 | func (dd *dictDecoder) Init(size int) { 31 | *dd = dictDecoder{hist: dd.hist} 32 | 33 | // Regardless of what size claims, start with a small dictionary to avoid 34 | // denial-of-service attacks with large memory allocation. 35 | dd.size = size 36 | if dd.hist == nil { 37 | dd.hist = make([]byte, initSize) 38 | } 39 | dd.hist = dd.hist[:cap(dd.hist)] 40 | if len(dd.hist) > dd.size { 41 | dd.hist = dd.hist[:dd.size] 42 | } 43 | } 44 | 45 | // HistSize reports the total amount of historical data in the dictionary. 46 | func (dd *dictDecoder) HistSize() int { 47 | if dd.full { 48 | return dd.size 49 | } 50 | return dd.wrPos 51 | } 52 | 53 | // AvailSize reports the available amount of output buffer space. 54 | func (dd *dictDecoder) AvailSize() int { 55 | return len(dd.hist) - dd.wrPos 56 | } 57 | 58 | // WriteSlice returns a slice of the available buffer to write data to. 59 | // 60 | // This invariant will be kept: len(s) <= AvailSize() 61 | func (dd *dictDecoder) WriteSlice() []byte { 62 | return dd.hist[dd.wrPos:] 63 | } 64 | 65 | // WriteMark advances the write pointer by cnt. 66 | // 67 | // This invariant must be kept: 0 <= cnt <= AvailSize() 68 | func (dd *dictDecoder) WriteMark(cnt int) { 69 | dd.wrPos += cnt 70 | } 71 | 72 | // WriteByte writes a single byte to the dictionary. 73 | // 74 | // This invariant must be kept: 0 < AvailSize() 75 | func (dd *dictDecoder) WriteByte(c byte) { 76 | dd.hist[dd.wrPos] = c 77 | dd.wrPos++ 78 | } 79 | 80 | // TryWriteCopy tries to copy a string at a given (distance, length) to the 81 | // output. This specialized version is optimized for short distances. 82 | // 83 | // This method is designed to be inlined for performance reasons. 84 | // 85 | // This invariant must be kept: 0 < dist <= HistSize() 86 | func (dd *dictDecoder) TryWriteCopy(dist, length int) int { 87 | wrPos := dd.wrPos 88 | wrEnd := wrPos + length 89 | if wrPos < dist || wrEnd > len(dd.hist) { 90 | return 0 91 | } 92 | 93 | // Copy overlapping section before destination. 94 | wrBase := wrPos 95 | rdPos := wrPos - dist 96 | loop: 97 | wrPos += copy(dd.hist[wrPos:wrEnd], dd.hist[rdPos:wrPos]) 98 | if wrPos < wrEnd { 99 | goto loop // Avoid for-loop so that this function can be inlined 100 | } 101 | dd.wrPos = wrPos 102 | return wrPos - wrBase 103 | } 104 | 105 | // WriteCopy copies a string at a given (distance, length) to the output. 106 | // This returns the number of bytes copied and may be less than the requested 107 | // length if the available space in the output buffer is too small. 108 | // 109 | // This invariant must be kept: 0 < dist <= HistSize() 110 | func (dd *dictDecoder) WriteCopy(dist, length int) int { 111 | wrBase := dd.wrPos 112 | wrPos := wrBase 113 | rdPos := wrPos - dist 114 | wrEnd := wrPos + length 115 | if wrEnd > len(dd.hist) { 116 | wrEnd = len(dd.hist) 117 | } 118 | 119 | // Copy non-overlapping section after destination. 120 | if rdPos < 0 { 121 | rdPos += len(dd.hist) 122 | wrPos += copy(dd.hist[wrPos:wrEnd], dd.hist[rdPos:]) 123 | rdPos = 0 124 | } 125 | 126 | // Copy overlapping section before destination. 127 | for wrPos < wrEnd { 128 | wrPos += copy(dd.hist[wrPos:wrEnd], dd.hist[rdPos:wrPos]) 129 | } 130 | dd.wrPos = wrPos 131 | return wrPos - wrBase 132 | } 133 | 134 | // ReadFlush returns a slice of the historical buffer that is ready to be 135 | // emitted to the user. A call to ReadFlush is only valid after all of the data 136 | // from a previous call to ReadFlush has been consumed. 137 | func (dd *dictDecoder) ReadFlush() []byte { 138 | toRead := dd.hist[dd.rdPos:dd.wrPos] 139 | dd.rdPos = dd.wrPos 140 | if dd.wrPos == len(dd.hist) { 141 | if len(dd.hist) == dd.size { 142 | dd.wrPos, dd.rdPos = 0, 0 143 | dd.full = true 144 | } else { 145 | // Allocate a larger history buffer. 146 | size := cap(dd.hist) * growFactor 147 | if size > dd.size { 148 | size = dd.size 149 | } 150 | hist := make([]byte, size) 151 | copy(hist, dd.hist) 152 | dd.hist = hist 153 | } 154 | } 155 | return toRead 156 | } 157 | -------------------------------------------------------------------------------- /internal/tool/bench/codec_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package main 6 | 7 | import ( 8 | "bytes" 9 | "flag" 10 | "io" 11 | "os" 12 | "path/filepath" 13 | "runtime" 14 | "strings" 15 | "sync" 16 | "testing" 17 | 18 | "github.com/dsnet/compress/internal/testutil" 19 | ) 20 | 21 | // The unit tests can also be used to quickly test all of the implementations 22 | // with respect to each other for correctness. The command-line flags can be 23 | // used to specify any arbitrary corpus of test data to use. 24 | // 25 | // Example usage: 26 | // $ go test -c 27 | // $ ./bench.test \ 28 | // -paths $CORPUS_PATH \ 29 | // -globs "*.txt:*.bin" \ 30 | // -test.run "//fl/std|cgo" \ 31 | // -test.v 32 | 33 | var level int 34 | 35 | func TestMain(m *testing.M) { 36 | setDefaults() 37 | flag.Var(&paths, "paths", "List of paths to search for test files") 38 | flag.Var(&globs, "globs", "List of globs to match for test files") 39 | flag.IntVar(&level, "level", 6, "Default compression level to use") 40 | flag.Parse() 41 | os.Exit(m.Run()) 42 | } 43 | 44 | type semaphore chan struct{} 45 | 46 | func newSemaphore(n int) semaphore { return make(chan struct{}, n) } 47 | func (s *semaphore) Acquire() { *s <- struct{}{} } 48 | func (s *semaphore) Release() { <-*s } 49 | 50 | // Each sub-test is run in a goroutine so that we can have fine control over 51 | // exactly how many sub-tests are running. When running over a large corpus, 52 | // this helps prevent all the sub-tests from executing at once and OOMing 53 | // the machine. The semaphores below control the maximum number of concurrent 54 | // operations that can be running for each dimension. 55 | // 56 | // We avoid using t.Parallel since that causes t.Run to return immediately and 57 | // does not provide the caller with feedback that all sub-operations completed. 58 | // This causes the next operation to prematurely start, leading to overloads. 59 | var ( 60 | semaFiles = newSemaphore(runtime.NumCPU()) 61 | semaFormats = newSemaphore(runtime.NumCPU()) 62 | semaEncoders = newSemaphore(runtime.NumCPU()) 63 | semaDecoders = newSemaphore(runtime.NumCPU()) 64 | ) 65 | 66 | // TestCodecs tests that the output of each registered encoder is a valid input 67 | // for each registered decoder. This test runs in O(n^2) where n is the number 68 | // of registered codecs. This assumes that the number of test files and 69 | // compression formats stays relatively constant. 70 | func TestCodecs(t *testing.T) { 71 | var wg sync.WaitGroup 72 | defer wg.Wait() 73 | for _, fi := range getFiles(paths, globs) { 74 | fi := fi 75 | name := "File:" + strings.Replace(fi.Rel, string(filepath.Separator), "_", -1) 76 | goRun(t, &wg, &semaFiles, name, func(t *testing.T) { 77 | dd := testutil.MustLoadFile(fi.Abs) 78 | testFormats(t, dd) 79 | }) 80 | } 81 | } 82 | 83 | func testFormats(t *testing.T, dd []byte) { 84 | var wg sync.WaitGroup 85 | defer wg.Wait() 86 | for _, ft := range formats { 87 | ft := ft 88 | name := "Format:" + enumToFmt[ft] 89 | goRun(t, &wg, &semaFormats, name, func(t *testing.T) { 90 | if len(encoders[ft]) == 0 || len(decoders[ft]) == 0 { 91 | t.Skip("no codecs available") 92 | } 93 | testEncoders(t, ft, dd) 94 | }) 95 | } 96 | } 97 | 98 | func testEncoders(t *testing.T, ft Format, dd []byte) { 99 | var wg sync.WaitGroup 100 | defer wg.Wait() 101 | for encName := range encoders[ft] { 102 | encName := encName 103 | name := "Encoder:" + encName 104 | goRun(t, &wg, &semaEncoders, name, func(t *testing.T) { 105 | be := new(bytes.Buffer) 106 | zw := encoders[ft][encName](be, level) 107 | if _, err := io.Copy(zw, bytes.NewReader(dd)); err != nil { 108 | t.Fatalf("unexpected Write error: %v", err) 109 | } 110 | if err := zw.Close(); err != nil { 111 | t.Fatalf("unexpected Close error: %v", err) 112 | } 113 | de := be.Bytes() 114 | testDecoders(t, ft, dd, de) 115 | }) 116 | } 117 | } 118 | 119 | func testDecoders(t *testing.T, ft Format, dd, de []byte) { 120 | var wg sync.WaitGroup 121 | defer wg.Wait() 122 | for decName := range decoders[ft] { 123 | decName := decName 124 | name := "Decoder:" + decName 125 | goRun(t, &wg, &semaDecoders, name, func(t *testing.T) { 126 | bd := new(bytes.Buffer) 127 | zr := decoders[ft][decName](bytes.NewReader(de)) 128 | if _, err := io.Copy(bd, zr); err != nil { 129 | t.Fatalf("unexpected Read error: %v", err) 130 | } 131 | if err := zr.Close(); err != nil { 132 | t.Fatalf("unexpected Close error: %v", err) 133 | } 134 | if got, want, ok := testutil.BytesCompare(bd.Bytes(), dd); !ok { 135 | t.Errorf("data mismatch:\ngot %s\nwant %s", got, want) 136 | } 137 | }) 138 | } 139 | } 140 | 141 | func goRun(t *testing.T, wg *sync.WaitGroup, sm *semaphore, name string, fn func(t *testing.T)) { 142 | wg.Add(1) 143 | go func() { 144 | defer wg.Done() 145 | t.Run(name, func(t *testing.T) { 146 | sm.Acquire() 147 | defer sm.Release() 148 | defer recoverPanic(t) 149 | fn(t) 150 | }) 151 | }() 152 | } 153 | 154 | func recoverPanic(t *testing.T) { 155 | if ex := recover(); ex != nil { 156 | t.Fatalf("unexpected panic: %v", ex) 157 | } 158 | } 159 | -------------------------------------------------------------------------------- /bzip2/mtf_rle2_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package bzip2 6 | 7 | import ( 8 | "reflect" 9 | "testing" 10 | 11 | "github.com/dsnet/compress/internal/errors" 12 | ) 13 | 14 | func TestMoveToFront(t *testing.T) { 15 | getDict := func(buf []byte) []uint8 { 16 | var dictMap [256]bool 17 | for _, b := range buf { 18 | dictMap[b] = true 19 | } 20 | var dictArr [256]uint8 21 | dict := dictArr[:0] 22 | for j, b := range dictMap { 23 | if b { 24 | dict = append(dict, uint8(j)) 25 | } 26 | } 27 | return dict 28 | } 29 | 30 | vectors := []struct { 31 | size int // If zero, default to 1MiB 32 | input []byte 33 | output []uint16 34 | fail bool 35 | }{{ 36 | input: []byte{}, 37 | output: []uint16{}, 38 | }, { 39 | input: []byte{2, 2, 2, 2, 2, 2, 2, 2, 2, 2}, 40 | output: []uint16{1, 1, 0}, 41 | }, { 42 | input: []byte{9, 8, 7, 6, 5, 4, 3, 2, 1}, 43 | output: []uint16{9, 9, 9, 9, 9, 9, 9, 9, 9}, 44 | }, { 45 | input: []byte{42, 47, 42, 47, 42, 47, 42, 47, 42, 47, 42, 47}, 46 | output: []uint16{0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}, 47 | }, { 48 | input: []byte{0, 5, 2, 3, 4, 4, 3, 1, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 5, 2, 3, 3}, 49 | output: []uint16{0, 6, 4, 5, 6, 0, 2, 6, 4, 3, 0, 1, 4, 1, 5, 4, 4, 0}, 50 | }, { 51 | input: []byte{100, 111, 108, 104, 10, 114, 101, 108, 108, 119, 111, 32}, 52 | output: []uint16{3, 7, 7, 7, 5, 8, 8, 5, 0, 9, 7, 9}, 53 | }, { 54 | input: []byte{ 55 | 103, 33, 107, 121, 110, 120, 101, 100, 101, 114, 44, 100, 111, 10, 32, 56 | 108, 32, 105, 101, 108, 32, 104, 104, 112, 72, 118, 32, 111, 116, 84, 57 | 117, 32, 99, 32, 114, 101, 108, 117, 119, 108, 100, 119, 32, 114, 102, 58 | 109, 32, 101, 111, 98, 32, 113, 106, 111, 111, 32, 111, 122, 97, 59 | }, 60 | output: []uint16{ 61 | 13, 4, 17, 30, 21, 30, 16, 16, 2, 26, 12, 4, 24, 12, 13, 23, 2, 22, 9, 62 | 4, 4, 22, 0, 25, 18, 29, 5, 10, 28, 21, 29, 5, 25, 2, 17, 13, 13, 6, 30, 63 | 3, 17, 3, 7, 7, 27, 29, 4, 9, 13, 28, 4, 30, 30, 5, 0, 4, 2, 31, 31, 64 | }, 65 | }, { 66 | input: []byte{ 67 | 74, 69, 205, 44, 38, 175, 207, 101, 59, 108, 42, 155, 208, 50, 38, 115, 68 | 190, 138, 163, 35, 13, 172, 160, 74, 68, 173, 99, 57, 213, 158, 248, 69 | 209, 176, 52, 135, 21, 26, 248, 186, 186, 219, 113, 172, 163, 13, 22, 70 | 100, 134, 4, 141, 53, 244, 99, 126, 214, 59, 53, 43, 146, 67, 131, 51, 71 | 212, 146, 245, 72 | }, 73 | output: []uint16{20, 20, 44, 13, 11, 41, 45, 26, 22, 27, 17, 37, 46, 21, 74 | 10, 31, 46, 37, 42, 24, 21, 43, 43, 22, 33, 44, 35, 34, 49, 45, 54, 75 | 49, 48, 38, 46, 35, 37, 7, 49, 0, 52, 45, 19, 22, 21, 40, 45, 48, 42, 76 | 49, 46, 53, 24, 49, 53, 41, 6, 48, 52, 51, 52, 52, 53, 5, 54, 77 | }, 78 | }, { 79 | input: []byte{ 80 | 153, 45, 45, 38, 135, 179, 26, 154, 165, 170, 170, 170, 170, 18, 109, 81 | 240, 174, 150, 87, 164, 30, 30, 30, 30, 30, 30, 30, 148, 190, 10, 60, 82 | 13, 13, 13, 13, 13, 6, 81, 200, 13, 225, 32, 17, 43, 22, 179, 13, 13, 83 | 17, 236, 236, 236, 236, 236, 236, 236, 121, 211, 2, 211, 185, 54, 16, 84 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 50, 85 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 40, 86 | }, 87 | output: []uint16{ 88 | 27, 17, 0, 15, 25, 33, 15, 29, 31, 32, 0, 0, 17, 28, 40, 34, 33, 31, 89 | 34, 25, 1, 1, 34, 36, 23, 33, 25, 1, 0, 25, 34, 37, 4, 39, 32, 31, 34, 90 | 33, 26, 7, 0, 5, 40, 1, 1, 38, 40, 34, 2, 40, 40, 38, 38, 0, 1, 1, 0, 91 | 40, 2, 0, 1, 1, 0, 40, 92 | }, 93 | }, { 94 | size: 10, 95 | input: []byte{1, 1, 1, 1, 1, 2, 2, 2, 3, 3}, 96 | output: []uint16{0, 1, 2, 1, 3, 0}, 97 | fail: false, 98 | }, { 99 | size: 10, 100 | input: []byte{1, 1, 1, 1, 1, 2, 2, 2, 3, 3}, 101 | output: []uint16{0, 1, 2, 1, 3, 1}, 102 | fail: true, 103 | }, { 104 | size: 10, 105 | input: []byte{1, 1, 1, 1, 1, 2, 2, 2, 3, 3}, 106 | output: []uint16{0, 1, 2, 1, 3, 2, 2}, 107 | fail: true, 108 | }, { 109 | size: 10, 110 | input: []byte{1, 1, 1, 1, 1, 2, 2, 2, 3, 3}, 111 | output: []uint16{1, 1, 2, 1, 3, 0}, 112 | fail: true, 113 | }, { 114 | size: 9, 115 | input: []byte{1, 1, 1, 1, 1, 2, 2, 2, 3, 3}, 116 | fail: true, 117 | }} 118 | 119 | mtf := new(moveToFront) 120 | for i, v := range vectors { 121 | var err error 122 | var input []byte 123 | var output []uint16 124 | func() { 125 | defer errors.Recover(&err) 126 | if v.size == 0 { 127 | v.size = 1 << 20 128 | } 129 | dict := getDict(v.input) 130 | mtf.Init(dict, v.size) 131 | output = mtf.Encode(v.input) 132 | mtf.Init(dict, v.size) 133 | input = mtf.Decode(v.output) 134 | }() 135 | 136 | fail := err != nil 137 | if fail && !v.fail { 138 | t.Errorf("test %d, unexpected error: %v", i, err) 139 | } 140 | if !fail && v.fail { 141 | t.Errorf("test %d, unexpected success", i) 142 | } 143 | if fail || v.fail { 144 | continue 145 | } 146 | if !reflect.DeepEqual(input, v.input) && !(len(input) == 0 && len(v.input) == 0) { 147 | t.Errorf("test %d, input mismatch:\ngot %v\nwant %v", i, input, v.input) 148 | } 149 | if !reflect.DeepEqual(output, v.output) && !(len(output) == 0 && len(v.output) == 0) { 150 | t.Errorf("test %d, output mismatch:\ngot %v\nwant %v", i, output, v.output) 151 | } 152 | } 153 | } 154 | -------------------------------------------------------------------------------- /brotli/dict_decoder_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package brotli 6 | 7 | import ( 8 | "bytes" 9 | "strings" 10 | "testing" 11 | ) 12 | 13 | func TestDictDecoder(t *testing.T) { 14 | const abc = "ABC\n" 15 | const fox = "The quick brown fox jumped over the lazy dog!\n" 16 | const poem = "The Road Not Taken\nRobert Frost\n" + 17 | "\n" + 18 | "Two roads diverged in a yellow wood,\n" + 19 | "And sorry I could not travel both\n" + 20 | "And be one traveler, long I stood\n" + 21 | "And looked down one as far as I could\n" + 22 | "To where it bent in the undergrowth;\n" + 23 | "\n" + 24 | "Then took the other, as just as fair,\n" + 25 | "And having perhaps the better claim,\n" + 26 | "Because it was grassy and wanted wear;\n" + 27 | "Though as for that the passing there\n" + 28 | "Had worn them really about the same,\n" + 29 | "\n" + 30 | "And both that morning equally lay\n" + 31 | "In leaves no step had trodden black.\n" + 32 | "Oh, I kept the first for another day!\n" + 33 | "Yet knowing how way leads on to way,\n" + 34 | "I doubted if I should ever come back.\n" + 35 | "\n" + 36 | "I shall be telling this with a sigh\n" + 37 | "Somewhere ages and ages hence:\n" + 38 | "Two roads diverged in a wood, and I-\n" + 39 | "I took the one less traveled by,\n" + 40 | "And that has made all the difference.\n" 41 | var refs = []struct { 42 | dist int // Backward distance (0 if this is an insertion) 43 | length int // Length of copy or insertion 44 | }{ 45 | {0, 38}, {33, 3}, {0, 48}, {79, 3}, {0, 11}, {34, 5}, {0, 6}, {23, 7}, 46 | {0, 8}, {50, 3}, {0, 2}, {69, 3}, {34, 5}, {0, 4}, {97, 3}, {0, 4}, 47 | {43, 5}, {0, 6}, {7, 4}, {88, 7}, {0, 12}, {80, 3}, {0, 2}, {141, 4}, 48 | {0, 1}, {196, 3}, {0, 3}, {157, 3}, {0, 6}, {181, 3}, {0, 2}, {23, 3}, 49 | {77, 3}, {28, 5}, {128, 3}, {110, 4}, {70, 3}, {0, 4}, {85, 6}, {0, 2}, 50 | {182, 6}, {0, 4}, {133, 3}, {0, 7}, {47, 5}, {0, 20}, {112, 5}, {0, 1}, 51 | {58, 3}, {0, 8}, {59, 3}, {0, 4}, {173, 3}, {0, 5}, {114, 3}, {0, 4}, 52 | {92, 5}, {0, 2}, {71, 3}, {0, 2}, {76, 5}, {0, 1}, {46, 3}, {96, 4}, 53 | {130, 4}, {0, 3}, {360, 3}, {0, 3}, {178, 5}, {0, 7}, {75, 3}, {0, 3}, 54 | {45, 6}, {0, 6}, {299, 6}, {180, 3}, {70, 6}, {0, 1}, {48, 3}, {66, 4}, 55 | {0, 3}, {47, 5}, {0, 9}, {325, 3}, {0, 1}, {359, 3}, {318, 3}, {0, 2}, 56 | {199, 3}, {0, 1}, {344, 3}, {0, 3}, {248, 3}, {0, 10}, {310, 3}, {0, 3}, 57 | {93, 6}, {0, 3}, {252, 3}, {157, 4}, {0, 2}, {273, 5}, {0, 14}, {99, 4}, 58 | {0, 1}, {464, 4}, {0, 2}, {92, 4}, {495, 3}, {0, 1}, {322, 4}, {16, 4}, 59 | {0, 3}, {402, 3}, {0, 2}, {237, 4}, {0, 2}, {432, 4}, {0, 1}, {483, 5}, 60 | {0, 2}, {294, 4}, {0, 2}, {306, 3}, {113, 5}, {0, 1}, {26, 4}, {164, 3}, 61 | {488, 4}, {0, 1}, {542, 3}, {248, 6}, {0, 5}, {205, 3}, {0, 8}, {48, 3}, 62 | {449, 6}, {0, 2}, {192, 3}, {328, 4}, {9, 5}, {433, 3}, {0, 3}, {622, 25}, 63 | {615, 5}, {46, 5}, {0, 2}, {104, 3}, {475, 10}, {549, 3}, {0, 4}, {597, 8}, 64 | {314, 3}, {0, 1}, {473, 6}, {317, 5}, {0, 1}, {400, 3}, {0, 3}, {109, 3}, 65 | {151, 3}, {48, 4}, {0, 4}, {125, 3}, {108, 3}, {0, 2}, 66 | } 67 | 68 | var want string 69 | var buf bytes.Buffer 70 | var dd dictDecoder 71 | dd.Init(1 << 11) 72 | 73 | checkLastBytes := func(str string) { 74 | if len(str) < 2 { 75 | str = "\x00\x00" + str 76 | } 77 | str = str[len(str)-2:] 78 | p1, p2 := dd.LastBytes() 79 | got := string([]byte{p2, p1}) 80 | if got != str { 81 | t.Errorf("last bytes mismatch: got %q, want %q", got, str) 82 | } 83 | } 84 | writeCopy := func(dist, length int) { 85 | if dist < length { 86 | cnt := (dist + length - 1) / dist 87 | want += strings.Repeat(want[len(want)-dist:], cnt)[:length] 88 | } else { 89 | want += want[len(want)-dist:][:length] 90 | } 91 | 92 | for length > 0 { 93 | length -= dd.WriteCopy(dist, length) 94 | if dd.AvailSize() == 0 { 95 | buf.Write(dd.ReadFlush()) 96 | } 97 | } 98 | 99 | checkLastBytes(want) 100 | } 101 | writeString := func(str string) { 102 | want += str 103 | 104 | for len(str) > 0 { 105 | cnt := copy(dd.WriteSlice(), str) 106 | str = str[cnt:] 107 | dd.WriteMark(cnt) 108 | if dd.AvailSize() == 0 { 109 | buf.Write(dd.ReadFlush()) 110 | } 111 | } 112 | 113 | checkLastBytes(want) 114 | } 115 | 116 | writeString("") 117 | writeString(".") 118 | str := poem 119 | for _, ref := range refs { 120 | if ref.dist == 0 { 121 | writeString(str[:ref.length]) 122 | } else { 123 | writeCopy(ref.dist, ref.length) 124 | } 125 | str = str[ref.length:] 126 | } 127 | writeCopy(dd.HistSize(), 33) 128 | writeString(abc) 129 | writeCopy(len(abc), 59*len(abc)) 130 | writeString(fox) 131 | writeCopy(len(fox), 9*len(fox)) 132 | writeString(".") 133 | writeCopy(1, 9) 134 | writeString(strings.ToUpper(poem)) 135 | writeCopy(len(poem), 7*len(poem)) 136 | writeCopy(dd.HistSize(), 10) 137 | 138 | buf.Write(dd.ReadFlush()) 139 | if buf.String() != want { 140 | t.Errorf("final string mismatch:\ngot %q\nwant %q", buf.String(), want) 141 | } 142 | } 143 | 144 | func BenchmarkDictDecoderCopy(b *testing.B) { 145 | nb := 1 << 24 146 | b.SetBytes(int64(nb)) 147 | 148 | for i := 0; i < b.N; i++ { 149 | var dd dictDecoder 150 | dd.Init(1 << 16) 151 | 152 | copy(dd.WriteSlice(), "abc") 153 | dd.WriteMark(3) 154 | 155 | dist, length := 3, nb 156 | for length > 0 { 157 | length -= dd.WriteCopy(dist, length) 158 | if dd.AvailSize() == 0 { 159 | dd.ReadFlush() 160 | } 161 | } 162 | } 163 | } 164 | -------------------------------------------------------------------------------- /internal/cgo/flate/flate.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // +build cgo 6 | 7 | // Package flate implements the DEFLATE compressed data format, 8 | // described in RFC 1951, using C wrappers. 9 | package flate 10 | 11 | /* 12 | #cgo LDFLAGS: -lz 13 | 14 | #include 15 | #include "zlib.h" 16 | 17 | z_streamp zfDecCreate() { 18 | z_streamp state = calloc(1, sizeof(z_stream)); 19 | inflateInit2(state, -MAX_WBITS); 20 | return state; 21 | } 22 | 23 | int zfDecStream( 24 | z_streamp state, 25 | uInt* avail_in, Bytef* next_in, 26 | uInt* avail_out, Bytef* next_out 27 | ) { 28 | state->avail_in = *avail_in; 29 | state->avail_out = *avail_out; 30 | state->next_in = next_in; 31 | state->next_out = next_out; 32 | int ret = inflate(state, Z_NO_FLUSH); 33 | *avail_in = state->avail_in; 34 | *avail_out = state->avail_out; 35 | state->next_in = NULL; 36 | state->next_out = NULL; 37 | return ret; 38 | } 39 | 40 | void zfDecDestroy(z_streamp state) { 41 | inflateEnd(state); 42 | free(state); 43 | } 44 | 45 | z_streamp zfEncCreate(int level) { 46 | z_streamp state = calloc(1, sizeof(z_stream)); 47 | deflateInit2(state, level, Z_DEFLATED, -MAX_WBITS, MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY); 48 | return state; 49 | } 50 | 51 | int zfEncStream( 52 | z_streamp state, int flush, 53 | uInt* avail_in, Bytef* next_in, 54 | uInt* avail_out, Bytef* next_out 55 | ) { 56 | state->avail_in = *avail_in; 57 | state->avail_out = *avail_out; 58 | state->next_in = next_in; 59 | state->next_out = next_out; 60 | int ret = deflate(state, flush); 61 | *avail_in = state->avail_in; 62 | *avail_out = state->avail_out; 63 | state->next_in = NULL; 64 | state->next_out = NULL; 65 | return ret; 66 | } 67 | 68 | void zfEncDestroy(z_streamp state) { 69 | deflateEnd(state); 70 | free(state); 71 | } 72 | */ 73 | import "C" 74 | 75 | import ( 76 | "errors" 77 | "io" 78 | "unsafe" 79 | ) 80 | 81 | type reader struct { 82 | r io.Reader 83 | err error 84 | state C.z_streamp 85 | buf []byte 86 | arr [1 << 14]byte 87 | } 88 | 89 | func NewReader(r io.Reader) io.ReadCloser { 90 | zr := &reader{r: r, state: C.zfDecCreate()} 91 | if zr.state == nil { 92 | panic("flate: could not allocate decoder state") 93 | } 94 | return zr 95 | } 96 | 97 | func (zr *reader) Read(buf []byte) (int, error) { 98 | if zr.state == nil { 99 | return 0, io.ErrClosedPipe 100 | } 101 | 102 | var n int 103 | for zr.err == nil && (len(buf) > 0 && n == 0) { 104 | availIn, availOut, ptrIn, ptrOut := sizePtrs(zr.buf, buf) 105 | ret := C.zfDecStream(zr.state, &availIn, ptrIn, &availOut, ptrOut) 106 | n += len(buf) - int(availOut) 107 | buf = buf[len(buf)-int(availOut):] 108 | zr.buf = zr.buf[len(zr.buf)-int(availIn):] 109 | 110 | switch ret { 111 | case C.Z_OK: 112 | return n, nil 113 | case C.Z_BUF_ERROR: 114 | if len(zr.buf) == 0 { 115 | n1, err := zr.r.Read(zr.arr[:]) 116 | if n1 > 0 { 117 | zr.buf = zr.arr[:n1] 118 | } else if err != nil { 119 | if err == io.EOF { 120 | err = io.ErrUnexpectedEOF 121 | } 122 | zr.err = err 123 | } 124 | } 125 | case C.Z_STREAM_END: 126 | return n, io.EOF 127 | default: 128 | zr.err = errors.New("flate: corrupted input") 129 | } 130 | } 131 | return n, zr.err 132 | } 133 | 134 | func (zr *reader) Close() error { 135 | if zr.state != nil { 136 | defer func() { 137 | C.zfDecDestroy(zr.state) 138 | zr.state = nil 139 | }() 140 | } 141 | return zr.err 142 | } 143 | 144 | type writer struct { 145 | w io.Writer 146 | err error 147 | state C.z_streamp 148 | buf []byte 149 | arr [1 << 14]byte 150 | } 151 | 152 | func NewWriter(w io.Writer, level int) io.WriteCloser { 153 | if level < C.Z_NO_COMPRESSION || level > C.Z_BEST_COMPRESSION { 154 | panic("flate: invalid compression level") 155 | } 156 | 157 | zw := &writer{w: w, state: C.zfEncCreate(C.int(level))} 158 | if zw.state == nil { 159 | panic("flate: could not allocate encoder state") 160 | } 161 | return zw 162 | } 163 | 164 | func (zw *writer) Write(buf []byte) (int, error) { 165 | return zw.write(buf, C.Z_NO_FLUSH) 166 | } 167 | 168 | func (zw *writer) write(buf []byte, op C.int) (int, error) { 169 | if zw.state == nil { 170 | return 0, io.ErrClosedPipe 171 | } 172 | 173 | var n int 174 | flush := op != C.Z_NO_FLUSH 175 | for zw.err == nil && (len(buf) > 0 || flush) { 176 | availIn, availOut, ptrIn, ptrOut := sizePtrs(buf, zw.arr[:]) 177 | ret := C.zfEncStream(zw.state, op, &availIn, ptrIn, &availOut, ptrOut) 178 | n += len(buf) - int(availIn) 179 | buf = buf[len(buf)-int(availIn):] 180 | zw.buf = zw.arr[:len(zw.arr)-int(availOut)] 181 | 182 | if len(zw.buf) > 0 { 183 | if _, err := zw.w.Write(zw.buf); err != nil { 184 | zw.err = err 185 | } 186 | } 187 | switch ret { 188 | case C.Z_OK, C.Z_BUF_ERROR: 189 | continue // Do nothing 190 | case C.Z_STREAM_END: 191 | return n, zw.err 192 | default: 193 | zw.err = errors.New("flate: compression error") 194 | } 195 | } 196 | return n, zw.err 197 | } 198 | 199 | func (zw *writer) Close() error { 200 | if zw.state != nil { 201 | defer func() { 202 | C.zfEncDestroy(zw.state) 203 | zw.state = nil 204 | }() 205 | zw.write(nil, C.Z_FINISH) 206 | } 207 | return zw.err 208 | } 209 | 210 | func sizePtrs(in, out []byte) (sizeIn, sizeOut C.uInt, ptrIn, ptrOut *C.Bytef) { 211 | sizeIn = C.uInt(len(in)) 212 | sizeOut = C.uInt(len(out)) 213 | if len(in) > 0 { 214 | ptrIn = (*C.Bytef)(unsafe.Pointer(&in[0])) 215 | } 216 | if len(out) > 0 { 217 | ptrOut = (*C.Bytef)(unsafe.Pointer(&out[0])) 218 | } 219 | return 220 | } 221 | -------------------------------------------------------------------------------- /internal/prefix/writer.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | package prefix 6 | 7 | import ( 8 | "encoding/binary" 9 | "io" 10 | 11 | "github.com/dsnet/compress/internal/errors" 12 | ) 13 | 14 | // Writer implements a prefix encoder. For performance reasons, Writer will not 15 | // write bytes immediately to the underlying stream. 16 | type Writer struct { 17 | Offset int64 // Number of bytes written to the underlying io.Writer 18 | 19 | wr io.Writer 20 | bufBits uint64 // Buffer to hold some bits 21 | numBits uint // Number of valid bits in bufBits 22 | bigEndian bool // Are bits written in big-endian order? 23 | 24 | buf [512]byte 25 | cntBuf int 26 | } 27 | 28 | // Init initializes the bit Writer to write to w. If bigEndian is true, then 29 | // bits will be written starting from the most-significant bits of a byte 30 | // (as done in bzip2), otherwise it will write starting from the 31 | // least-significant bits of a byte (such as for deflate and brotli). 32 | func (pw *Writer) Init(w io.Writer, bigEndian bool) { 33 | *pw = Writer{wr: w, bigEndian: bigEndian} 34 | return 35 | } 36 | 37 | // BitsWritten reports the total number of bits issued to any Write method. 38 | func (pw *Writer) BitsWritten() int64 { 39 | return 8*pw.Offset + 8*int64(pw.cntBuf) + int64(pw.numBits) 40 | } 41 | 42 | // WritePads writes 0-7 bits to the bit buffer to achieve byte-alignment. 43 | func (pw *Writer) WritePads(v uint) { 44 | nb := -pw.numBits & 7 45 | pw.bufBits |= uint64(v) << pw.numBits 46 | pw.numBits += nb 47 | } 48 | 49 | // Write writes bytes from buf. 50 | // The bit-ordering mode does not affect this method. 51 | func (pw *Writer) Write(buf []byte) (cnt int, err error) { 52 | if pw.numBits > 0 || pw.cntBuf > 0 { 53 | if pw.numBits%8 != 0 { 54 | return 0, errorf(errors.Invalid, "non-aligned bit buffer") 55 | } 56 | if _, err := pw.Flush(); err != nil { 57 | return 0, err 58 | } 59 | } 60 | cnt, err = pw.wr.Write(buf) 61 | pw.Offset += int64(cnt) 62 | return cnt, err 63 | } 64 | 65 | // WriteOffset writes ofs in a (sym, extra) fashion using the provided prefix 66 | // Encoder and RangeEncoder. 67 | func (pw *Writer) WriteOffset(ofs uint, pe *Encoder, re *RangeEncoder) { 68 | sym := re.Encode(ofs) 69 | pw.WriteSymbol(sym, pe) 70 | rc := re.rcs[sym] 71 | pw.WriteBits(ofs-uint(rc.Base), uint(rc.Len)) 72 | } 73 | 74 | // TryWriteBits attempts to write nb bits using the contents of the bit buffer 75 | // alone. It reports whether it succeeded. 76 | // 77 | // This method is designed to be inlined for performance reasons. 78 | func (pw *Writer) TryWriteBits(v, nb uint) bool { 79 | if 64-pw.numBits < nb { 80 | return false 81 | } 82 | pw.bufBits |= uint64(v) << pw.numBits 83 | pw.numBits += nb 84 | return true 85 | } 86 | 87 | // WriteBits writes nb bits of v to the underlying writer. 88 | func (pw *Writer) WriteBits(v, nb uint) { 89 | if _, err := pw.PushBits(); err != nil { 90 | errors.Panic(err) 91 | } 92 | pw.bufBits |= uint64(v) << pw.numBits 93 | pw.numBits += nb 94 | } 95 | 96 | // TryWriteSymbol attempts to encode the next symbol using the contents of the 97 | // bit buffer alone. It reports whether it succeeded. 98 | // 99 | // This method is designed to be inlined for performance reasons. 100 | func (pw *Writer) TryWriteSymbol(sym uint, pe *Encoder) bool { 101 | chunk := pe.chunks[uint32(sym)&pe.chunkMask] 102 | nb := uint(chunk & countMask) 103 | if 64-pw.numBits < nb { 104 | return false 105 | } 106 | pw.bufBits |= uint64(chunk>>countBits) << pw.numBits 107 | pw.numBits += nb 108 | return true 109 | } 110 | 111 | // WriteSymbol writes the symbol using the provided prefix Encoder. 112 | func (pw *Writer) WriteSymbol(sym uint, pe *Encoder) { 113 | if _, err := pw.PushBits(); err != nil { 114 | errors.Panic(err) 115 | } 116 | chunk := pe.chunks[uint32(sym)&pe.chunkMask] 117 | nb := uint(chunk & countMask) 118 | pw.bufBits |= uint64(chunk>>countBits) << pw.numBits 119 | pw.numBits += nb 120 | } 121 | 122 | // Flush flushes all complete bytes from the bit buffer to the byte buffer, and 123 | // then flushes all bytes in the byte buffer to the underlying writer. 124 | // After this call, the bit Writer is will only withhold 7 bits at most. 125 | func (pw *Writer) Flush() (int64, error) { 126 | if pw.numBits < 8 && pw.cntBuf == 0 { 127 | return pw.Offset, nil 128 | } 129 | if _, err := pw.PushBits(); err != nil { 130 | return pw.Offset, err 131 | } 132 | cnt, err := pw.wr.Write(pw.buf[:pw.cntBuf]) 133 | pw.cntBuf -= cnt 134 | pw.Offset += int64(cnt) 135 | return pw.Offset, err 136 | } 137 | 138 | // PushBits pushes as many bytes as possible from the bit buffer to the byte 139 | // buffer, reporting the number of bits pushed. 140 | func (pw *Writer) PushBits() (uint, error) { 141 | if pw.cntBuf >= len(pw.buf)-8 { 142 | cnt, err := pw.wr.Write(pw.buf[:pw.cntBuf]) 143 | pw.cntBuf -= cnt 144 | pw.Offset += int64(cnt) 145 | if err != nil { 146 | return 0, err 147 | } 148 | } 149 | 150 | u := pw.bufBits 151 | if pw.bigEndian { 152 | // Swap all the bits within each byte. 153 | u = (u&0xaaaaaaaaaaaaaaaa)>>1 | (u&0x5555555555555555)<<1 154 | u = (u&0xcccccccccccccccc)>>2 | (u&0x3333333333333333)<<2 155 | u = (u&0xf0f0f0f0f0f0f0f0)>>4 | (u&0x0f0f0f0f0f0f0f0f)<<4 156 | } 157 | // Starting with Go 1.7, the compiler should use a wide integer 158 | // store here if the architecture supports it. 159 | binary.LittleEndian.PutUint64(pw.buf[pw.cntBuf:], u) 160 | 161 | nb := pw.numBits / 8 // Number of bytes to copy from bit buffer 162 | pw.cntBuf += int(nb) 163 | pw.bufBits >>= 8 * nb 164 | pw.numBits -= 8 * nb 165 | return 8 * nb, nil 166 | } 167 | -------------------------------------------------------------------------------- /internal/cgo/zstd/zstd.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // +build cgo 6 | 7 | // Package zstd implements the Zstandard compressed data format using C wrappers. 8 | package zstd 9 | 10 | /* 11 | // This relies upon the shared library built from github.com/facebook/zstd. 12 | // 13 | // The steps to build and install the shared library is as follows: 14 | // curl -L https://github.com/facebook/zstd/archive/v1.3.2.tar.gz | tar -zxv 15 | // cd zstd-1.3.2 16 | // sudo make install 17 | 18 | #cgo LDFLAGS: -lzstd 19 | 20 | #include 21 | #include 22 | #include "zstd.h" 23 | 24 | ZSTD_DStream* zsDecCreate() { 25 | ZSTD_DStream* state = ZSTD_createDStream(); 26 | ZSTD_initDStream(state); 27 | return state; 28 | } 29 | 30 | size_t zsDecStream( 31 | ZSTD_DStream* state, 32 | size_t* avail_in, uint8_t* next_in, 33 | size_t* avail_out, uint8_t* next_out 34 | ) { 35 | ZSTD_inBuffer in = {next_in, *avail_in, 0}; 36 | ZSTD_outBuffer out = {next_out, *avail_out, 0}; 37 | size_t ret = ZSTD_decompressStream(state, &out, &in); 38 | *avail_in = in.size - in.pos; 39 | *avail_out = out.size - out.pos; 40 | in.src = NULL; 41 | out.dst = NULL; 42 | return ret; 43 | } 44 | 45 | void zsDecDestroy(ZSTD_DStream* state) { 46 | ZSTD_freeDStream(state); 47 | } 48 | 49 | ZSTD_CStream* zsEncCreate(int level) { 50 | ZSTD_CStream* state = ZSTD_createCStream(); 51 | ZSTD_initCStream(state, level); 52 | return state; 53 | } 54 | 55 | size_t zsEncStream( 56 | ZSTD_CStream* state, int finish, 57 | size_t* avail_in, uint8_t* next_in, 58 | size_t* avail_out, uint8_t* next_out 59 | ) { 60 | ZSTD_inBuffer in = {next_in, *avail_in, 0}; 61 | ZSTD_outBuffer out = {next_out, *avail_out, 0}; 62 | size_t ret = finish ? 63 | ZSTD_endStream(state, &out) : ZSTD_compressStream(state, &out, &in); 64 | *avail_in = in.size - in.pos; 65 | *avail_out = out.size - out.pos; 66 | in.src = NULL; 67 | out.dst = NULL; 68 | return ret; 69 | } 70 | 71 | void zsEncDestroy(ZSTD_CStream* state) { 72 | ZSTD_freeCStream(state); 73 | } 74 | */ 75 | import "C" 76 | 77 | import ( 78 | "errors" 79 | "io" 80 | "unsafe" 81 | ) 82 | 83 | type reader struct { 84 | r io.Reader 85 | err error 86 | state *C.ZSTD_DStream 87 | buf []byte 88 | arr [1 << 14]byte 89 | } 90 | 91 | func NewReader(r io.Reader) io.ReadCloser { 92 | zr := &reader{r: r, state: C.zsDecCreate()} 93 | if zr.state == nil { 94 | panic("zstd: could not allocate decoder state") 95 | } 96 | return zr 97 | } 98 | 99 | func (zr *reader) Read(buf []byte) (int, error) { 100 | if zr.state == nil { 101 | return 0, io.ErrClosedPipe 102 | } 103 | 104 | var n int 105 | for zr.err == nil && (len(buf) > 0 && n == 0) { 106 | availIn, availOut, ptrIn, ptrOut := sizePtrs(zr.buf, buf) 107 | ret := C.zsDecStream(zr.state, &availIn, ptrIn, &availOut, ptrOut) 108 | n += len(buf) - int(availOut) 109 | buf = buf[len(buf)-int(availOut):] 110 | zr.buf = zr.buf[len(zr.buf)-int(availIn):] 111 | 112 | switch { 113 | case C.ZSTD_isError(ret) > 0: 114 | zr.err = errors.New("zstd: corrupted input") 115 | case ret == 0: 116 | return n, io.EOF 117 | case n > 0: 118 | return n, nil 119 | case len(zr.buf) == 0 && n == 0: 120 | n1, err := zr.r.Read(zr.arr[:]) 121 | if n1 > 0 { 122 | zr.buf = zr.arr[:n1] 123 | } else if err != nil { 124 | if err == io.EOF { 125 | err = io.ErrUnexpectedEOF 126 | } 127 | zr.err = err 128 | } 129 | } 130 | } 131 | return n, zr.err 132 | } 133 | 134 | func (zr *reader) Close() error { 135 | if zr.state != nil { 136 | defer func() { 137 | C.zsDecDestroy(zr.state) 138 | zr.state = nil 139 | }() 140 | } 141 | return zr.err 142 | } 143 | 144 | type writer struct { 145 | w io.Writer 146 | err error 147 | state *C.ZSTD_CStream 148 | buf []byte 149 | arr [1 << 14]byte 150 | } 151 | 152 | func NewWriter(w io.Writer, level int) io.WriteCloser { 153 | if level < 1 || level > 22 { 154 | panic("zstd: invalid compression level") 155 | } 156 | 157 | zw := &writer{w: w, state: C.zsEncCreate(C.int(level))} 158 | if zw.state == nil { 159 | panic("zstd: could not allocate encoder state") 160 | } 161 | return zw 162 | } 163 | 164 | func (zw *writer) Write(buf []byte) (int, error) { 165 | return zw.write(buf, 0) 166 | } 167 | 168 | func (zw *writer) write(buf []byte, finish C.int) (int, error) { 169 | if zw.state == nil { 170 | return 0, io.ErrClosedPipe 171 | } 172 | 173 | var n int 174 | for zw.err == nil && (len(buf) > 0 || finish > 0) { 175 | availIn, availOut, ptrIn, ptrOut := sizePtrs(buf, zw.arr[:]) 176 | ret := C.zsEncStream(zw.state, finish, &availIn, ptrIn, &availOut, ptrOut) 177 | n += len(buf) - int(availIn) 178 | buf = buf[len(buf)-int(availIn):] 179 | zw.buf = zw.arr[:len(zw.arr)-int(availOut)] 180 | 181 | if len(zw.buf) > 0 { 182 | if _, err := zw.w.Write(zw.buf); err != nil { 183 | zw.err = err 184 | } 185 | } 186 | switch { 187 | case C.ZSTD_isError(ret) > 0: 188 | zw.err = errors.New("zstd: compression error") 189 | case len(buf) == 0 && len(zw.buf) == 0: 190 | return n, zw.err 191 | case ret == 0 && finish > 0: 192 | return n, zw.err 193 | } 194 | } 195 | return n, zw.err 196 | } 197 | 198 | func (zw *writer) Close() error { 199 | if zw.state != nil { 200 | defer func() { 201 | C.zsEncDestroy(zw.state) 202 | zw.state = nil 203 | }() 204 | zw.write(nil, 1) 205 | } 206 | return zw.err 207 | } 208 | 209 | func sizePtrs(in, out []byte) (sizeIn, sizeOut C.size_t, ptrIn, ptrOut *C.uint8_t) { 210 | sizeIn = C.size_t(len(in)) 211 | sizeOut = C.size_t(len(out)) 212 | if len(in) > 0 { 213 | ptrIn = (*C.uint8_t)(unsafe.Pointer(&in[0])) 214 | } 215 | if len(out) > 0 { 216 | ptrOut = (*C.uint8_t)(unsafe.Pointer(&out[0])) 217 | } 218 | return 219 | } 220 | -------------------------------------------------------------------------------- /internal/cgo/lzma/lzma.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // +build cgo 6 | 7 | // Package lzma implements the LZMA2 compressed data format using C wrappers. 8 | package lzma 9 | 10 | /* 11 | #cgo LDFLAGS: -llzma 12 | 13 | #include 14 | #include 15 | #include "lzma.h" 16 | 17 | // zlState is a tuple of C allocated data structures. 18 | // 19 | // The liblzma documentation is not clear about whether the filters struct must 20 | // stay live past calls to lzma_raw_encoder and lzma_raw_decoder. 21 | // To be on the safe side, we allocate them and keep them around until the end. 22 | typedef struct { 23 | lzma_stream stream; 24 | lzma_filter filters[2]; 25 | lzma_options_lzma options; 26 | } zlState; 27 | 28 | zlState* zlDecCreate() { 29 | zlState* state = calloc(1, sizeof(zlState)); 30 | state->filters[0].id = LZMA_FILTER_LZMA2; 31 | state->filters[0].options = &state->options; 32 | state->filters[1].id = LZMA_VLI_UNKNOWN; 33 | state->options.dict_size = LZMA_DICT_SIZE_DEFAULT; 34 | 35 | assert(lzma_raw_decoder(&state->stream, state->filters) == LZMA_OK); 36 | return state; 37 | } 38 | 39 | zlState* zlEncCreate(int level) { 40 | zlState* state = calloc(1, sizeof(zlState)); 41 | state->filters[0].id = LZMA_FILTER_LZMA2; 42 | state->filters[0].options = &state->options; 43 | state->filters[1].id = LZMA_VLI_UNKNOWN; 44 | 45 | assert(!lzma_lzma_preset(&state->options, level)); 46 | assert(lzma_raw_encoder(&state->stream, state->filters) == LZMA_OK); 47 | return state; 48 | } 49 | 50 | lzma_ret zlStream( 51 | lzma_stream* strm, lzma_action action, 52 | size_t* avail_in, uint8_t* next_in, 53 | size_t* avail_out, uint8_t* next_out 54 | ) { 55 | strm->avail_in = *avail_in; 56 | strm->avail_out = *avail_out; 57 | strm->next_in = next_in; 58 | strm->next_out = next_out; 59 | lzma_ret ret = lzma_code(strm, action); 60 | *avail_in = strm->avail_in; 61 | *avail_out = strm->avail_out; 62 | strm->next_in = NULL; 63 | strm->next_out = NULL; 64 | return ret; 65 | } 66 | 67 | void zlDestroy(zlState* state) { 68 | lzma_end(&state->stream); 69 | free(state); 70 | } 71 | */ 72 | import "C" 73 | 74 | import ( 75 | "errors" 76 | "io" 77 | "unsafe" 78 | ) 79 | 80 | type reader struct { 81 | r io.Reader 82 | err error 83 | state *C.zlState 84 | buf []byte 85 | arr [1 << 14]byte 86 | } 87 | 88 | func NewReader(r io.Reader) io.ReadCloser { 89 | zr := &reader{r: r, state: C.zlDecCreate()} 90 | if zr.state == nil { 91 | panic("lzma: could not allocate decoder state") 92 | } 93 | return zr 94 | } 95 | 96 | func (zr *reader) Read(buf []byte) (int, error) { 97 | if zr.state == nil { 98 | return 0, io.ErrClosedPipe 99 | } 100 | 101 | var n int 102 | for zr.err == nil && (len(buf) > 0 && n == 0) { 103 | availIn, availOut, ptrIn, ptrOut := sizePtrs(zr.buf, buf) 104 | ret := C.zlStream(&zr.state.stream, 0, &availIn, ptrIn, &availOut, ptrOut) 105 | n += len(buf) - int(availOut) 106 | buf = buf[len(buf)-int(availOut):] 107 | zr.buf = zr.buf[len(zr.buf)-int(availIn):] 108 | 109 | switch ret { 110 | case C.LZMA_OK: 111 | return n, nil 112 | case C.LZMA_BUF_ERROR: 113 | if len(zr.buf) == 0 { 114 | n1, err := zr.r.Read(zr.arr[:]) 115 | if n1 > 0 { 116 | zr.buf = zr.arr[:n1] 117 | } else if err != nil { 118 | if err == io.EOF { 119 | err = io.ErrUnexpectedEOF 120 | } 121 | zr.err = err 122 | } 123 | } 124 | case C.LZMA_STREAM_END: 125 | return n, io.EOF 126 | default: 127 | zr.err = errors.New("lzma: corrupted input") 128 | } 129 | } 130 | return n, zr.err 131 | } 132 | 133 | func (zr *reader) Close() error { 134 | if zr.state != nil { 135 | defer func() { 136 | C.zlDestroy(zr.state) 137 | zr.state = nil 138 | }() 139 | } 140 | return zr.err 141 | } 142 | 143 | type writer struct { 144 | w io.Writer 145 | err error 146 | state *C.zlState 147 | buf []byte 148 | arr [1 << 14]byte 149 | } 150 | 151 | func NewWriter(w io.Writer, level int) io.WriteCloser { 152 | if level < 0 || level > 9 { 153 | panic("lzma: invalid compression level") 154 | } 155 | 156 | zw := &writer{w: w, state: C.zlEncCreate(C.int(level))} 157 | if zw.state == nil { 158 | panic("lzma: could not allocate encoder state") 159 | } 160 | return zw 161 | } 162 | 163 | func (zw *writer) Write(buf []byte) (int, error) { 164 | return zw.write(buf, C.LZMA_RUN) 165 | } 166 | 167 | func (zw *writer) write(buf []byte, op C.lzma_action) (int, error) { 168 | if zw.state == nil { 169 | return 0, io.ErrClosedPipe 170 | } 171 | 172 | var n int 173 | flush := op != C.LZMA_RUN 174 | for zw.err == nil && (len(buf) > 0 || flush) { 175 | availIn, availOut, ptrIn, ptrOut := sizePtrs(buf, zw.arr[:]) 176 | ret := C.zlStream(&zw.state.stream, op, &availIn, ptrIn, &availOut, ptrOut) 177 | n += len(buf) - int(availIn) 178 | buf = buf[len(buf)-int(availIn):] 179 | zw.buf = zw.arr[:len(zw.arr)-int(availOut)] 180 | 181 | if len(zw.buf) > 0 { 182 | if _, err := zw.w.Write(zw.buf); err != nil { 183 | zw.err = err 184 | } 185 | } 186 | switch ret { 187 | case C.LZMA_OK, C.LZMA_BUF_ERROR: 188 | continue // Do nothing 189 | case C.LZMA_STREAM_END: 190 | return n, zw.err 191 | default: 192 | zw.err = errors.New("lzma: compression error") 193 | } 194 | } 195 | return n, zw.err 196 | } 197 | 198 | func (zw *writer) Close() error { 199 | if zw.state != nil { 200 | defer func() { 201 | C.zlDestroy(zw.state) 202 | zw.state = nil 203 | }() 204 | zw.write(nil, C.LZMA_FINISH) 205 | } 206 | return zw.err 207 | } 208 | 209 | func sizePtrs(in, out []byte) (sizeIn, sizeOut C.size_t, ptrIn, ptrOut *C.uint8_t) { 210 | sizeIn = C.size_t(len(in)) 211 | sizeOut = C.size_t(len(out)) 212 | if len(in) > 0 { 213 | ptrIn = (*C.uint8_t)(unsafe.Pointer(&in[0])) 214 | } 215 | if len(out) > 0 { 216 | ptrOut = (*C.uint8_t)(unsafe.Pointer(&out[0])) 217 | } 218 | return 219 | } 220 | -------------------------------------------------------------------------------- /internal/cgo/bzip2/bzip2.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016, Joe Tsai. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE.md file. 4 | 5 | // +build cgo 6 | 7 | // Package bzip2 implements the BZip2 compressed data format using C wrappers. 8 | package bzip2 9 | 10 | /* 11 | #cgo LDFLAGS: -lbz2 12 | 13 | #include 14 | #include "bzlib.h" 15 | 16 | bz_stream* bzDecCreate() { 17 | bz_stream* state = calloc(1, sizeof(bz_stream)); 18 | BZ2_bzDecompressInit(state, 0, 0); 19 | return state; 20 | } 21 | 22 | int bzDecStream( 23 | bz_stream* state, 24 | unsigned int* avail_in, char* next_in, 25 | unsigned int* avail_out, char* next_out 26 | ) { 27 | state->avail_in = *avail_in; 28 | state->avail_out = *avail_out; 29 | state->next_in = next_in; 30 | state->next_out = next_out; 31 | int ret = BZ2_bzDecompress(state); 32 | *avail_in = state->avail_in; 33 | *avail_out = state->avail_out; 34 | state->next_in = NULL; 35 | state->next_out = NULL; 36 | return ret; 37 | } 38 | 39 | void bzDecDestroy(bz_stream* state) { 40 | BZ2_bzDecompressEnd(state); 41 | free(state); 42 | } 43 | 44 | bz_stream* bzEncCreate(int level) { 45 | bz_stream* state = calloc(1, sizeof(bz_stream)); 46 | BZ2_bzCompressInit(state, level, 0, 0); 47 | return state; 48 | } 49 | 50 | int bzEncStream( 51 | bz_stream* state, int mode, 52 | unsigned int* avail_in, char* next_in, 53 | unsigned int* avail_out, char* next_out 54 | ) { 55 | state->avail_in = *avail_in; 56 | state->avail_out = *avail_out; 57 | state->next_in = next_in; 58 | state->next_out = next_out; 59 | int ret = BZ2_bzCompress(state, mode); 60 | *avail_in = state->avail_in; 61 | *avail_out = state->avail_out; 62 | state->next_in = NULL; 63 | state->next_out = NULL; 64 | return ret; 65 | } 66 | 67 | void bzEncDestroy(bz_stream* state) { 68 | BZ2_bzCompressEnd(state); 69 | free(state); 70 | } 71 | */ 72 | import "C" 73 | 74 | import ( 75 | "errors" 76 | "io" 77 | "unsafe" 78 | ) 79 | 80 | type reader struct { 81 | r io.Reader 82 | err error 83 | state *C.bz_stream 84 | buf []byte 85 | arr [1 << 14]byte 86 | } 87 | 88 | func NewReader(r io.Reader) io.ReadCloser { 89 | zr := &reader{r: r, state: C.bzDecCreate()} 90 | if zr.state == nil { 91 | panic("bzip2: could not allocate decoder state") 92 | } 93 | return zr 94 | } 95 | 96 | func (zr *reader) Read(buf []byte) (int, error) { 97 | if zr.state == nil { 98 | return 0, io.ErrClosedPipe 99 | } 100 | 101 | var n int 102 | for zr.err == nil && (len(buf) > 0 && n == 0) { 103 | availIn, availOut, ptrIn, ptrOut := sizePtrs(zr.buf, buf) 104 | ret := C.bzDecStream(zr.state, &availIn, ptrIn, &availOut, ptrOut) 105 | n += len(buf) - int(availOut) 106 | buf = buf[len(buf)-int(availOut):] 107 | zr.buf = zr.buf[len(zr.buf)-int(availIn):] 108 | 109 | switch ret { 110 | case C.BZ_OK: 111 | if len(zr.buf) == 0 && n == 0 { 112 | n1, err := zr.r.Read(zr.arr[:]) 113 | if n1 > 0 { 114 | zr.buf = zr.arr[:n1] 115 | } else if err != nil { 116 | if err == io.EOF { 117 | err = io.ErrUnexpectedEOF 118 | } 119 | zr.err = err 120 | } 121 | } 122 | case C.BZ_STREAM_END: 123 | // Handle multi-stream files by re-setting the state. 124 | if len(zr.buf) == 0 { 125 | if _, err := io.ReadFull(zr.r, zr.arr[:1]); err != nil { 126 | if err == io.EOF { 127 | return n, io.EOF 128 | } 129 | zr.err = io.ErrUnexpectedEOF 130 | return n, zr.err 131 | } 132 | zr.buf = zr.arr[:1] 133 | } 134 | C.bzDecDestroy(zr.state) 135 | zr.state = C.bzDecCreate() 136 | default: 137 | zr.err = errors.New("bzip2: corrupted input") 138 | } 139 | } 140 | return n, zr.err 141 | } 142 | 143 | func (zr *reader) Close() error { 144 | if zr.state != nil { 145 | defer func() { 146 | C.bzDecDestroy(zr.state) 147 | zr.state = nil 148 | }() 149 | } 150 | return zr.err 151 | } 152 | 153 | type writer struct { 154 | w io.Writer 155 | err error 156 | state *C.bz_stream 157 | buf []byte 158 | arr [1 << 14]byte 159 | } 160 | 161 | func NewWriter(w io.Writer, level int) io.WriteCloser { 162 | if level < 1 || level > 9 { 163 | panic("bzip2: invalid compression level") 164 | } 165 | 166 | zw := &writer{w: w, state: C.bzEncCreate(C.int(level))} 167 | if zw.state == nil { 168 | panic("bzip2: could not allocate encoder state") 169 | } 170 | return zw 171 | } 172 | 173 | func (zw *writer) Write(buf []byte) (int, error) { 174 | return zw.write(buf, C.BZ_RUN) 175 | } 176 | 177 | func (zw *writer) write(buf []byte, op C.int) (int, error) { 178 | if zw.state == nil { 179 | return 0, io.ErrClosedPipe 180 | } 181 | 182 | var n int 183 | flush := op != C.BZ_RUN 184 | for zw.err == nil && (len(buf) > 0 || flush) { 185 | availIn, availOut, ptrIn, ptrOut := sizePtrs(buf, zw.arr[:]) 186 | ret := C.bzEncStream(zw.state, op, &availIn, ptrIn, &availOut, ptrOut) 187 | n += len(buf) - int(availIn) 188 | buf = buf[len(buf)-int(availIn):] 189 | zw.buf = zw.arr[:len(zw.arr)-int(availOut)] 190 | 191 | if len(zw.buf) > 0 { 192 | if _, err := zw.w.Write(zw.buf); err != nil { 193 | zw.err = err 194 | } 195 | } 196 | switch ret { 197 | case C.BZ_OK, C.BZ_RUN_OK, C.BZ_FLUSH_OK, C.BZ_FINISH_OK: 198 | continue // Do nothing 199 | case C.BZ_STREAM_END: 200 | return n, zw.err 201 | default: 202 | zw.err = errors.New("bzip2: compression error") 203 | } 204 | } 205 | return n, zw.err 206 | } 207 | 208 | func (zw *writer) Close() error { 209 | if zw.state != nil { 210 | defer func() { 211 | C.bzEncDestroy(zw.state) 212 | zw.state = nil 213 | }() 214 | zw.write(nil, C.BZ_FINISH) 215 | } 216 | return zw.err 217 | } 218 | 219 | func sizePtrs(in, out []byte) (sizeIn, sizeOut C.uint, ptrIn, ptrOut *C.char) { 220 | sizeIn = C.uint(len(in)) 221 | sizeOut = C.uint(len(out)) 222 | if len(in) > 0 { 223 | ptrIn = (*C.char)(unsafe.Pointer(&in[0])) 224 | } 225 | if len(out) > 0 { 226 | ptrOut = (*C.char)(unsafe.Pointer(&out[0])) 227 | } 228 | return 229 | } 230 | --------------------------------------------------------------------------------