├── .gitignore ├── .travis.yml ├── fuzz.go ├── lz4_test.go ├── LICENSE ├── README.md ├── lz4-example └── main.go ├── fuzzer └── main.go ├── reader.go └── writer.go /.gitignore: -------------------------------------------------------------------------------- 1 | /lz4-example/lz4-example 2 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | 3 | go: 4 | - 1.1 5 | - 1.2 6 | - 1.3 7 | - 1.4 8 | - 1.5 9 | - tip 10 | -------------------------------------------------------------------------------- /fuzz.go: -------------------------------------------------------------------------------- 1 | // +build gofuzz 2 | 3 | package lz4 4 | 5 | import "encoding/binary" 6 | 7 | func Fuzz(data []byte) int { 8 | 9 | if len(data) < 4 { 10 | return 0 11 | } 12 | 13 | ln := binary.LittleEndian.Uint32(data) 14 | if ln > (1 << 21) { 15 | return 0 16 | } 17 | 18 | if _, err := Decode(nil, data); err != nil { 19 | return 0 20 | } 21 | 22 | return 1 23 | } 24 | -------------------------------------------------------------------------------- /lz4_test.go: -------------------------------------------------------------------------------- 1 | package lz4 2 | 3 | import ( 4 | "bytes" 5 | "io/ioutil" 6 | "testing" 7 | ) 8 | 9 | var testfile, _ = ioutil.ReadFile("testdata/pg1661.txt") 10 | 11 | func roundtrip(t *testing.T, input []byte) { 12 | 13 | dst, err := Encode(nil, input) 14 | if err != nil { 15 | t.Errorf("got error during compression: %s", err) 16 | } 17 | 18 | output, err := Decode(nil, dst) 19 | 20 | if err != nil { 21 | t.Errorf("got error during decompress: %s", err) 22 | } 23 | 24 | if !bytes.Equal(output, input) { 25 | t.Errorf("roundtrip failed") 26 | } 27 | } 28 | 29 | func TestEmpty(t *testing.T) { 30 | roundtrip(t, nil) 31 | } 32 | 33 | func TestLengths(t *testing.T) { 34 | 35 | for i := 0; i < 1024; i++ { 36 | roundtrip(t, testfile[:i]) 37 | } 38 | 39 | for i := 1024; i < 4096; i += 23 { 40 | roundtrip(t, testfile[:i]) 41 | } 42 | } 43 | 44 | func TestWords(t *testing.T) { 45 | roundtrip(t, testfile) 46 | } 47 | 48 | func BenchmarkLZ4Encode(b *testing.B) { 49 | for i := 0; i < b.N; i++ { 50 | Encode(nil, testfile) 51 | } 52 | } 53 | 54 | func BenchmarkLZ4Decode(b *testing.B) { 55 | 56 | var compressed, _ = Encode(nil, testfile) 57 | 58 | b.ResetTimer() 59 | 60 | for i := 0; i < b.N; i++ { 61 | Decode(nil, compressed) 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2011-2012 Branimir Karadzic. All rights reserved. 2 | Copyright 2013 Damian Gryski. All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without modification, 5 | are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | 2. Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDER ``AS IS'' AND ANY EXPRESS OR 15 | IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 16 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT 17 | SHALL COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 18 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 19 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 21 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 22 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 23 | THE POSSIBILITY OF SUCH DAMAGE. 24 | 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | go-lz4 2 | ====== 3 | 4 | go-lz4 is port of LZ4 lossless compression algorithm to Go. The original C code 5 | is located at: 6 | 7 | https://github.com/Cyan4973/lz4 8 | 9 | Status 10 | ------ 11 | [![Build Status](https://secure.travis-ci.org/bkaradzic/go-lz4.png)](http://travis-ci.org/bkaradzic/go-lz4) 12 | [![GoDoc](https://godoc.org/github.com/bkaradzic/go-lz4?status.png)](https://godoc.org/github.com/bkaradzic/go-lz4) 13 | 14 | Usage 15 | ----- 16 | 17 | go get github.com/bkaradzic/go-lz4 18 | 19 | import "github.com/bkaradzic/go-lz4" 20 | 21 | The package name is `lz4` 22 | 23 | Notes 24 | ----- 25 | 26 | * go-lz4 saves a uint32 with the original uncompressed length at the beginning 27 | of the encoded buffer. They may get in the way of interoperability with 28 | other implementations. 29 | 30 | Alternative 31 | ----------- 32 | 33 | https://github.com/pierrec/lz4 34 | 35 | Contributors 36 | ------------ 37 | 38 | Damian Gryski ([@dgryski](https://github.com/dgryski)) 39 | Dustin Sallings ([@dustin](https://github.com/dustin)) 40 | 41 | Contact 42 | ------- 43 | 44 | [@bkaradzic](https://twitter.com/bkaradzic) 45 | http://www.stuckingeometry.com 46 | 47 | Project page 48 | https://github.com/bkaradzic/go-lz4 49 | 50 | License 51 | ------- 52 | 53 | Copyright 2011-2012 Branimir Karadzic. All rights reserved. 54 | Copyright 2013 Damian Gryski. All rights reserved. 55 | 56 | Redistribution and use in source and binary forms, with or without modification, 57 | are permitted provided that the following conditions are met: 58 | 59 | 1. Redistributions of source code must retain the above copyright notice, this 60 | list of conditions and the following disclaimer. 61 | 62 | 2. Redistributions in binary form must reproduce the above copyright notice, 63 | this list of conditions and the following disclaimer in the documentation 64 | and/or other materials provided with the distribution. 65 | 66 | THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDER ``AS IS'' AND ANY EXPRESS OR 67 | IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 68 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT 69 | SHALL COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 70 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 71 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 72 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 73 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 74 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 75 | THE POSSIBILITY OF SUCH DAMAGE. 76 | 77 | -------------------------------------------------------------------------------- /lz4-example/main.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2011 Branimir Karadzic. All rights reserved. 3 | * 4 | * Redistribution and use in source and binary forms, with or without modification, 5 | * are permitted provided that the following conditions are met: 6 | * 7 | * 1. Redistributions of source code must retain the above copyright notice, this 8 | * list of conditions and the following disclaimer. 9 | * 10 | * 2. Redistributions in binary form must reproduce the above copyright notice, 11 | * this list of conditions and the following disclaimer in the documentation 12 | * and/or other materials provided with the distribution. 13 | * 14 | * THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDER ``AS IS'' AND ANY EXPRESS OR 15 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 16 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT 17 | * SHALL COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 18 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 19 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 21 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 22 | * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 23 | * THE POSSIBILITY OF SUCH DAMAGE. 24 | */ 25 | 26 | package main 27 | 28 | import ( 29 | "flag" 30 | "fmt" 31 | "io/ioutil" 32 | "log" 33 | "os" 34 | "runtime/pprof" 35 | 36 | lz4 "github.com/bkaradzic/go-lz4" 37 | ) 38 | 39 | var ( 40 | decompress = flag.Bool("d", false, "decompress") 41 | ) 42 | 43 | func main() { 44 | 45 | var optCPUProfile = flag.String("cpuprofile", "", "profile") 46 | flag.Parse() 47 | 48 | if *optCPUProfile != "" { 49 | f, err := os.Create(*optCPUProfile) 50 | if err != nil { 51 | log.Fatal(err) 52 | } 53 | pprof.StartCPUProfile(f) 54 | defer pprof.StopCPUProfile() 55 | } 56 | 57 | args := flag.Args() 58 | 59 | var data []byte 60 | 61 | if len(args) < 2 { 62 | fmt.Print("Usage: lz4 [-d] \n") 63 | os.Exit(1) 64 | } 65 | 66 | input, err := os.OpenFile(args[0], os.O_RDONLY, 0644) 67 | if err != nil { 68 | fmt.Printf("Failed to open input file %s\n", args[0]) 69 | os.Exit(1) 70 | } 71 | defer input.Close() 72 | 73 | if *decompress { 74 | data, _ = ioutil.ReadAll(input) 75 | data, err = lz4.Decode(nil, data) 76 | if err != nil { 77 | fmt.Println("Failed to decode:", err) 78 | return 79 | } 80 | } else { 81 | data, _ = ioutil.ReadAll(input) 82 | data, err = lz4.Encode(nil, data) 83 | if err != nil { 84 | fmt.Println("Failed to encode:", err) 85 | return 86 | } 87 | } 88 | 89 | err = ioutil.WriteFile(args[1], data, 0644) 90 | if err != nil { 91 | fmt.Printf("Failed to open output file %s\n", args[1]) 92 | os.Exit(1) 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /fuzzer/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "math/rand" 5 | 6 | "github.com/bkaradzic/go-lz4" 7 | 8 | // lz4's API matches snappy's, so we can easily see how it performs 9 | // lz4 "code.google.com/p/snappy-go/snappy" 10 | ) 11 | 12 | var input = ` 13 | ADVENTURE I. A SCANDAL IN BOHEMIA 14 | 15 | I. 16 | 17 | To Sherlock Holmes she is always THE woman. I have seldom heard 18 | him mention her under any other name. In his eyes she eclipses 19 | and predominates the whole of her sex. It was not that he felt 20 | any emotion akin to love for Irene Adler. All emotions, and that 21 | one particularly, were abhorrent to his cold, precise but 22 | admirably balanced mind. He was, I take it, the most perfect 23 | reasoning and observing machine that the world has seen, but as a 24 | lover he would have placed himself in a false position. He never 25 | spoke of the softer passions, save with a gibe and a sneer. They 26 | were admirable things for the observer--excellent for drawing the 27 | veil from men's motives and actions. But for the trained reasoner 28 | to admit such intrusions into his own delicate and finely 29 | adjusted temperament was to introduce a distracting factor which 30 | might throw a doubt upon all his mental results. Grit in a 31 | sensitive instrument, or a crack in one of his own high-power 32 | lenses, would not be more disturbing than a strong emotion in a 33 | nature such as his. And yet there was but one woman to him, and 34 | that woman was the late Irene Adler, of dubious and questionable 35 | memory. 36 | 37 | I had seen little of Holmes lately. My marriage had drifted us 38 | away from each other. My own complete happiness, and the 39 | home-centred interests which rise up around the man who first 40 | finds himself master of his own establishment, were sufficient to 41 | absorb all my attention, while Holmes, who loathed every form of 42 | society with his whole Bohemian soul, remained in our lodgings in 43 | Baker Street, buried among his old books, and alternating from 44 | week to week between cocaine and ambition, the drowsiness of the 45 | drug, and the fierce energy of his own keen nature. He was still, 46 | as ever, deeply attracted by the study of crime, and occupied his 47 | immense faculties and extraordinary powers of observation in 48 | following out those clues, and clearing up those mysteries which 49 | had been abandoned as hopeless by the official police. From time 50 | to time I heard some vague account of his doings: of his summons 51 | to Odessa in the case of the Trepoff murder, of his clearing up 52 | of the singular tragedy of the Atkinson brothers at Trincomalee, 53 | and finally of the mission which he had accomplished so 54 | delicately and successfully for the reigning family of Holland. 55 | Beyond these signs of his activity, however, which I merely 56 | shared with all the readers of the daily press, I knew little of 57 | my former friend and companion. 58 | ` 59 | 60 | func main() { 61 | 62 | compressed, _ := lz4.Encode(nil, []byte(input)) 63 | 64 | modified := make([]byte, len(compressed)) 65 | 66 | for { 67 | copy(modified, compressed) 68 | for i := 0; i < 100; i++ { 69 | modified[rand.Intn(len(compressed)-4)+4] = byte(rand.Intn(256)) 70 | } 71 | lz4.Decode(nil, modified) 72 | } 73 | 74 | } 75 | -------------------------------------------------------------------------------- /reader.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2011-2012 Branimir Karadzic. All rights reserved. 3 | * 4 | * Redistribution and use in source and binary forms, with or without modification, 5 | * are permitted provided that the following conditions are met: 6 | * 7 | * 1. Redistributions of source code must retain the above copyright notice, this 8 | * list of conditions and the following disclaimer. 9 | * 10 | * 2. Redistributions in binary form must reproduce the above copyright notice, 11 | * this list of conditions and the following disclaimer in the documentation 12 | * and/or other materials provided with the distribution. 13 | * 14 | * THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDER ``AS IS'' AND ANY EXPRESS OR 15 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 16 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT 17 | * SHALL COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 18 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 19 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 21 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 22 | * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 23 | * THE POSSIBILITY OF SUCH DAMAGE. 24 | */ 25 | 26 | package lz4 27 | 28 | import ( 29 | "encoding/binary" 30 | "errors" 31 | "io" 32 | ) 33 | 34 | var ( 35 | // ErrCorrupt indicates the input was corrupt 36 | ErrCorrupt = errors.New("corrupt input") 37 | ) 38 | 39 | const ( 40 | mlBits = 4 41 | mlMask = (1 << mlBits) - 1 42 | runBits = 8 - mlBits 43 | runMask = (1 << runBits) - 1 44 | ) 45 | 46 | type decoder struct { 47 | src []byte 48 | dst []byte 49 | spos uint32 50 | dpos uint32 51 | ref uint32 52 | } 53 | 54 | func (d *decoder) readByte() (uint8, error) { 55 | if int(d.spos) == len(d.src) { 56 | return 0, io.EOF 57 | } 58 | b := d.src[d.spos] 59 | d.spos++ 60 | return b, nil 61 | } 62 | 63 | func (d *decoder) getLen() (uint32, error) { 64 | 65 | length := uint32(0) 66 | ln, err := d.readByte() 67 | if err != nil { 68 | return 0, ErrCorrupt 69 | } 70 | for ln == 255 { 71 | length += 255 72 | ln, err = d.readByte() 73 | if err != nil { 74 | return 0, ErrCorrupt 75 | } 76 | } 77 | length += uint32(ln) 78 | 79 | return length, nil 80 | } 81 | 82 | func (d *decoder) cp(length, decr uint32) { 83 | 84 | if int(d.ref+length) < int(d.dpos) { 85 | copy(d.dst[d.dpos:], d.dst[d.ref:d.ref+length]) 86 | } else { 87 | for ii := uint32(0); ii < length; ii++ { 88 | d.dst[d.dpos+ii] = d.dst[d.ref+ii] 89 | } 90 | } 91 | d.dpos += length 92 | d.ref += length - decr 93 | } 94 | 95 | func (d *decoder) finish(err error) error { 96 | if err == io.EOF { 97 | return nil 98 | } 99 | 100 | return err 101 | } 102 | 103 | // Decode returns the decoded form of src. The returned slice may be a 104 | // subslice of dst if it was large enough to hold the entire decoded block. 105 | func Decode(dst, src []byte) ([]byte, error) { 106 | 107 | if len(src) < 4 { 108 | return nil, ErrCorrupt 109 | } 110 | 111 | uncompressedLen := binary.LittleEndian.Uint32(src) 112 | 113 | if uncompressedLen == 0 { 114 | return nil, nil 115 | } 116 | 117 | if uncompressedLen > MaxInputSize { 118 | return nil, ErrTooLarge 119 | } 120 | 121 | if dst == nil || len(dst) < int(uncompressedLen) { 122 | dst = make([]byte, uncompressedLen) 123 | } 124 | 125 | d := decoder{src: src, dst: dst[:uncompressedLen], spos: 4} 126 | 127 | decr := []uint32{0, 3, 2, 3} 128 | 129 | for { 130 | code, err := d.readByte() 131 | if err != nil { 132 | return d.dst, d.finish(err) 133 | } 134 | 135 | length := uint32(code >> mlBits) 136 | if length == runMask { 137 | ln, err := d.getLen() 138 | if err != nil { 139 | return nil, ErrCorrupt 140 | } 141 | length += ln 142 | } 143 | 144 | if int(d.spos+length) > len(d.src) || int(d.dpos+length) > len(d.dst) { 145 | return nil, ErrCorrupt 146 | } 147 | 148 | for ii := uint32(0); ii < length; ii++ { 149 | d.dst[d.dpos+ii] = d.src[d.spos+ii] 150 | } 151 | 152 | d.spos += length 153 | d.dpos += length 154 | 155 | if int(d.spos) == len(d.src) { 156 | return d.dst, nil 157 | } 158 | 159 | if int(d.spos+2) >= len(d.src) { 160 | return nil, ErrCorrupt 161 | } 162 | 163 | back := uint32(d.src[d.spos]) | uint32(d.src[d.spos+1])<<8 164 | 165 | if back > d.dpos { 166 | return nil, ErrCorrupt 167 | } 168 | 169 | d.spos += 2 170 | d.ref = d.dpos - back 171 | 172 | length = uint32(code & mlMask) 173 | if length == mlMask { 174 | ln, err := d.getLen() 175 | if err != nil { 176 | return nil, ErrCorrupt 177 | } 178 | length += ln 179 | } 180 | 181 | literal := d.dpos - d.ref 182 | 183 | if literal < 4 { 184 | if int(d.dpos+4) > len(d.dst) { 185 | return nil, ErrCorrupt 186 | } 187 | 188 | d.cp(4, decr[literal]) 189 | } else { 190 | length += 4 191 | } 192 | 193 | if d.dpos+length > uncompressedLen { 194 | return nil, ErrCorrupt 195 | } 196 | 197 | d.cp(length, 0) 198 | } 199 | } 200 | -------------------------------------------------------------------------------- /writer.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2011-2012 Branimir Karadzic. All rights reserved. 3 | * 4 | * Redistribution and use in source and binary forms, with or without modification, 5 | * are permitted provided that the following conditions are met: 6 | * 7 | * 1. Redistributions of source code must retain the above copyright notice, this 8 | * list of conditions and the following disclaimer. 9 | * 10 | * 2. Redistributions in binary form must reproduce the above copyright notice, 11 | * this list of conditions and the following disclaimer in the documentation 12 | * and/or other materials provided with the distribution. 13 | * 14 | * THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDER ``AS IS'' AND ANY EXPRESS OR 15 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 16 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT 17 | * SHALL COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 18 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 19 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 21 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 22 | * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 23 | * THE POSSIBILITY OF SUCH DAMAGE. 24 | */ 25 | 26 | package lz4 27 | 28 | import ( 29 | "encoding/binary" 30 | "errors" 31 | ) 32 | 33 | const ( 34 | minMatch = 4 35 | hashLog = 17 36 | hashTableSize = 1 << hashLog 37 | hashShift = (minMatch * 8) - hashLog 38 | incompressible uint32 = 128 39 | uninitHash = 0x88888888 40 | 41 | // MaxInputSize is the largest buffer than can be compressed in a single block 42 | MaxInputSize = 0x7E000000 43 | ) 44 | 45 | var ( 46 | // ErrTooLarge indicates the input buffer was too large 47 | ErrTooLarge = errors.New("input too large") 48 | ) 49 | 50 | type encoder struct { 51 | src []byte 52 | dst []byte 53 | hashTable []uint32 54 | pos uint32 55 | anchor uint32 56 | dpos uint32 57 | } 58 | 59 | // CompressBound returns the maximum length of a lz4 block, given it's uncompressed length 60 | func CompressBound(isize int) int { 61 | if isize > MaxInputSize { 62 | return 0 63 | } 64 | return isize + ((isize) / 255) + 16 + 4 65 | } 66 | 67 | func (e *encoder) writeLiterals(length, mlLen, pos uint32) { 68 | 69 | ln := length 70 | 71 | var code byte 72 | if ln > runMask-1 { 73 | code = runMask 74 | } else { 75 | code = byte(ln) 76 | } 77 | 78 | if mlLen > mlMask-1 { 79 | e.dst[e.dpos] = (code << mlBits) + byte(mlMask) 80 | } else { 81 | e.dst[e.dpos] = (code << mlBits) + byte(mlLen) 82 | } 83 | e.dpos++ 84 | 85 | if code == runMask { 86 | ln -= runMask 87 | for ; ln > 254; ln -= 255 { 88 | e.dst[e.dpos] = 255 89 | e.dpos++ 90 | } 91 | 92 | e.dst[e.dpos] = byte(ln) 93 | e.dpos++ 94 | } 95 | 96 | for ii := uint32(0); ii < length; ii++ { 97 | e.dst[e.dpos+ii] = e.src[pos+ii] 98 | } 99 | 100 | e.dpos += length 101 | } 102 | 103 | // Encode returns the encoded form of src. The returned array may be a 104 | // sub-slice of dst if it was large enough to hold the entire output. 105 | func Encode(dst, src []byte) ([]byte, error) { 106 | 107 | if len(src) >= MaxInputSize { 108 | return nil, ErrTooLarge 109 | } 110 | 111 | if n := CompressBound(len(src)); len(dst) < n { 112 | dst = make([]byte, n) 113 | } 114 | 115 | e := encoder{src: src, dst: dst, hashTable: make([]uint32, hashTableSize)} 116 | 117 | binary.LittleEndian.PutUint32(dst, uint32(len(src))) 118 | e.dpos = 4 119 | 120 | var ( 121 | step uint32 = 1 122 | limit = incompressible 123 | ) 124 | 125 | for { 126 | if int(e.pos)+12 >= len(e.src) { 127 | e.writeLiterals(uint32(len(e.src))-e.anchor, 0, e.anchor) 128 | return e.dst[:e.dpos], nil 129 | } 130 | 131 | sequence := uint32(e.src[e.pos+3])<<24 | uint32(e.src[e.pos+2])<<16 | uint32(e.src[e.pos+1])<<8 | uint32(e.src[e.pos+0]) 132 | 133 | hash := (sequence * 2654435761) >> hashShift 134 | ref := e.hashTable[hash] + uninitHash 135 | e.hashTable[hash] = e.pos - uninitHash 136 | 137 | if ((e.pos-ref)>>16) != 0 || uint32(e.src[ref+3])<<24|uint32(e.src[ref+2])<<16|uint32(e.src[ref+1])<<8|uint32(e.src[ref+0]) != sequence { 138 | if e.pos-e.anchor > limit { 139 | limit <<= 1 140 | step += 1 + (step >> 2) 141 | } 142 | e.pos += step 143 | continue 144 | } 145 | 146 | if step > 1 { 147 | e.hashTable[hash] = ref - uninitHash 148 | e.pos -= step - 1 149 | step = 1 150 | continue 151 | } 152 | limit = incompressible 153 | 154 | ln := e.pos - e.anchor 155 | back := e.pos - ref 156 | 157 | anchor := e.anchor 158 | 159 | e.pos += minMatch 160 | ref += minMatch 161 | e.anchor = e.pos 162 | 163 | for int(e.pos) < len(e.src)-5 && e.src[e.pos] == e.src[ref] { 164 | e.pos++ 165 | ref++ 166 | } 167 | 168 | mlLen := e.pos - e.anchor 169 | 170 | e.writeLiterals(ln, mlLen, anchor) 171 | e.dst[e.dpos] = uint8(back) 172 | e.dst[e.dpos+1] = uint8(back >> 8) 173 | e.dpos += 2 174 | 175 | if mlLen > mlMask-1 { 176 | mlLen -= mlMask 177 | for mlLen > 254 { 178 | mlLen -= 255 179 | 180 | e.dst[e.dpos] = 255 181 | e.dpos++ 182 | } 183 | 184 | e.dst[e.dpos] = byte(mlLen) 185 | e.dpos++ 186 | } 187 | 188 | e.anchor = e.pos 189 | } 190 | } 191 | --------------------------------------------------------------------------------