├── Readme.md ├── ci.yml ├── example_test.go ├── rle.go └── rle_test.go /Readme.md: -------------------------------------------------------------------------------- 1 | 2 | # go-rle 3 | 4 | Go Run-length encoding (currently just ints). RLE is great for data with low cardinality, 5 | for example a log severity enum is an especially good use-case as most logs are INFO, many millions of points can be compressed to a few bytes. 6 | 7 | ## Badges 8 | 9 | [![GoDoc](https://godoc.org/github.com/tj/go-rle?status.svg)](https://godoc.org/github.com/tj/go-rle) 10 | ![](https://img.shields.io/badge/license-MIT-blue.svg) 11 | ![](https://img.shields.io/badge/status-stable-green.svg) 12 | [![](http://apex.sh/images/badge.svg)](https://apex.sh/) 13 | 14 | --- 15 | 16 | > [tjholowaychuk.com](http://tjholowaychuk.com)  ·  17 | > GitHub [@tj](https://github.com/tj)  ·  18 | > Twitter [@tjholowaychuk](https://twitter.com/tjholowaychuk) 19 | -------------------------------------------------------------------------------- /ci.yml: -------------------------------------------------------------------------------- 1 | version: 0.2 2 | 3 | phases: 4 | install: 5 | commands: 6 | - go get -t ./... 7 | build: 8 | commands: 9 | - go test -cover -v ./... 10 | -------------------------------------------------------------------------------- /example_test.go: -------------------------------------------------------------------------------- 1 | package rle_test 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/tj/go-rle" 7 | ) 8 | 9 | func Example() { 10 | b := rle.EncodeInt64([]int64{1, 1, 1, 1, 1, 1, 1, 1, 125, 1, 1, 1, 1, 1, 1, 1, 1}) 11 | fmt.Printf("buf: %#x\n", b) 12 | fmt.Printf("len: %v\n", len(b)) 13 | // Output: 14 | // buf: 0x0210fa01020210 15 | // len: 7 16 | } 17 | -------------------------------------------------------------------------------- /rle.go: -------------------------------------------------------------------------------- 1 | // Package rle implements run-length encoding for various types (just ints at the moment). 2 | package rle 3 | 4 | import ( 5 | "bytes" 6 | "encoding/binary" 7 | "io" 8 | ) 9 | 10 | // Int64Decoder is what it sounds like. 11 | type Int64Decoder struct { 12 | Value int64 13 | Run int64 14 | buf *bytes.Buffer 15 | err error 16 | } 17 | 18 | // NewInt64Decoder returns an int64 decoder. 19 | func NewInt64Decoder(buf []byte) *Int64Decoder { 20 | return &Int64Decoder{ 21 | buf: bytes.NewBuffer(buf), 22 | } 23 | } 24 | 25 | // Next returns true if a value was scanned. 26 | func (d *Int64Decoder) Next() bool { 27 | if d.Run > 1 { 28 | d.Run-- 29 | return true 30 | } 31 | 32 | num, err := binary.ReadVarint(d.buf) 33 | if err == io.EOF { 34 | return false 35 | } 36 | 37 | if err != nil { 38 | d.err = err 39 | return false 40 | } 41 | 42 | run, err := binary.ReadVarint(d.buf) 43 | if err == io.EOF { 44 | d.err = io.ErrUnexpectedEOF 45 | return false 46 | } 47 | 48 | if err != nil { 49 | d.err = err 50 | return false 51 | } 52 | 53 | d.Value = num 54 | d.Run = run 55 | 56 | return true 57 | } 58 | 59 | // Err returns any error which ocurred during decoding. 60 | func (d *Int64Decoder) Err() error { 61 | return d.err 62 | } 63 | 64 | // EncodeInt64 encoded run. 65 | func EncodeInt64(nums []int64) []byte { 66 | size := len(nums) 67 | 68 | if size == 0 { 69 | return nil 70 | } 71 | 72 | var b = make([]byte, 8) 73 | var buf bytes.Buffer 74 | var cur = nums[0] 75 | var run int64 76 | 77 | for i := 0; i < size; i++ { 78 | num := nums[i] 79 | 80 | if num != cur { 81 | n := binary.PutVarint(b, cur) 82 | buf.Write(b[:n]) 83 | n = binary.PutVarint(b, run) 84 | buf.Write(b[:n]) 85 | cur = num 86 | run = 0 87 | } 88 | 89 | run++ 90 | } 91 | 92 | n := binary.PutVarint(b, cur) 93 | buf.Write(b[:n]) 94 | n = binary.PutVarint(b, run) 95 | buf.Write(b[:n]) 96 | 97 | return buf.Bytes() 98 | } 99 | 100 | // DecodeInt64 encoded run. 101 | func DecodeInt64(buf []byte) (v []int64, err error) { 102 | s := NewInt64Decoder(buf) 103 | 104 | for s.Next() { 105 | v = append(v, s.Value) 106 | } 107 | 108 | return v, s.Err() 109 | } 110 | 111 | // DecodeInt64Card returns a map of value cardinality. 112 | func DecodeInt64Card(buf []byte) (v map[int64]uint64, err error) { 113 | d := NewInt64Decoder(buf) 114 | v = make(map[int64]uint64) 115 | 116 | for d.Next() { 117 | v[d.Value]++ 118 | } 119 | 120 | return v, d.Err() 121 | } 122 | -------------------------------------------------------------------------------- /rle_test.go: -------------------------------------------------------------------------------- 1 | package rle_test 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | "github.com/tj/go-rle" 8 | ) 9 | 10 | func TestEncodeInt64(t *testing.T) { 11 | { 12 | var nums []int64 13 | b := rle.EncodeInt64(nums) 14 | v, err := rle.DecodeInt64(b) 15 | assert.NoError(t, err) 16 | assert.Equal(t, nums, v) 17 | } 18 | 19 | { 20 | nums := []int64{1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2} 21 | b := rle.EncodeInt64(nums) 22 | v, err := rle.DecodeInt64(b) 23 | assert.NoError(t, err) 24 | assert.Equal(t, nums, v) 25 | } 26 | 27 | { 28 | nums := []int64{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1} 29 | b := rle.EncodeInt64(nums) 30 | v, err := rle.DecodeInt64(b) 31 | assert.NoError(t, err) 32 | assert.Equal(t, 2, len(b), "should be two bytes") 33 | assert.Equal(t, nums, v) 34 | } 35 | } 36 | 37 | func TestDecodeInt64(t *testing.T) { 38 | in := []int64{0, 0, 0, 0, 0, 3, 3, 4, 1, 1, 1} 39 | out := []int64{} 40 | 41 | d := rle.NewInt64Decoder(rle.EncodeInt64(in)) 42 | 43 | for d.Next() { 44 | out = append(out, d.Value) 45 | } 46 | 47 | assert.Equal(t, in, out) 48 | assert.NoError(t, d.Err()) 49 | } 50 | 51 | func TestDecodeInt64Card(t *testing.T) { 52 | nums := []int64{1, 1, 1, 1, 1, 1, 0, 0, 0, 2} 53 | b := rle.EncodeInt64(nums) 54 | v, err := rle.DecodeInt64Card(b) 55 | assert.NoError(t, err) 56 | assert.Equal(t, map[int64]uint64{1: 6, 0: 3, 2: 1}, v) 57 | } 58 | 59 | func BenchmarkEncodeInt64(b *testing.B) { 60 | nums100 := make([]int64, 100e3) 61 | nums500 := make([]int64, 500e3) 62 | nums1000 := make([]int64, 1e6) 63 | 64 | b.Run("100k", func(b *testing.B) { 65 | b.SetBytes(100e3 * 8) 66 | for i := 0; i < b.N; i++ { 67 | rle.EncodeInt64(nums100) 68 | } 69 | }) 70 | 71 | b.Run("500k", func(b *testing.B) { 72 | b.SetBytes(500e3 * 8) 73 | for i := 0; i < b.N; i++ { 74 | rle.EncodeInt64(nums500) 75 | } 76 | }) 77 | 78 | b.Run("1M", func(b *testing.B) { 79 | b.SetBytes(1e6 * 8) 80 | for i := 0; i < b.N; i++ { 81 | rle.EncodeInt64(nums1000) 82 | } 83 | }) 84 | } 85 | 86 | func BenchmarkDecodeInt64(b *testing.B) { 87 | nums100 := rle.EncodeInt64(make([]int64, 100e3)) 88 | nums500 := rle.EncodeInt64(make([]int64, 500e3)) 89 | nums1000 := rle.EncodeInt64(make([]int64, 1e6)) 90 | 91 | b.Run("100k", func(b *testing.B) { 92 | b.SetBytes(100e3 * 8) 93 | for i := 0; i < b.N; i++ { 94 | rle.DecodeInt64(nums100) 95 | } 96 | }) 97 | 98 | b.Run("500k", func(b *testing.B) { 99 | b.SetBytes(500e3 * 8) 100 | for i := 0; i < b.N; i++ { 101 | rle.DecodeInt64(nums500) 102 | } 103 | }) 104 | 105 | b.Run("1M", func(b *testing.B) { 106 | b.SetBytes(1e6 * 8) 107 | for i := 0; i < b.N; i++ { 108 | rle.DecodeInt64(nums1000) 109 | } 110 | }) 111 | } 112 | 113 | func BenchmarkInt64Decoder(b *testing.B) { 114 | nums100 := rle.EncodeInt64(make([]int64, 100e3)) 115 | nums500 := rle.EncodeInt64(make([]int64, 500e3)) 116 | nums1000 := rle.EncodeInt64(make([]int64, 1e6)) 117 | 118 | b.Run("100k", func(b *testing.B) { 119 | b.SetBytes(100e3 * 8) 120 | for i := 0; i < b.N; i++ { 121 | d := rle.NewInt64Decoder(nums100) 122 | for d.Next() { 123 | 124 | } 125 | } 126 | }) 127 | 128 | b.Run("500k", func(b *testing.B) { 129 | b.SetBytes(500e3 * 8) 130 | for i := 0; i < b.N; i++ { 131 | d := rle.NewInt64Decoder(nums500) 132 | for d.Next() { 133 | 134 | } 135 | } 136 | }) 137 | 138 | b.Run("1M", func(b *testing.B) { 139 | b.SetBytes(1e6 * 8) 140 | for i := 0; i < b.N; i++ { 141 | d := rle.NewInt64Decoder(nums1000) 142 | for d.Next() { 143 | 144 | } 145 | } 146 | }) 147 | } 148 | 149 | func BenchmarkDecodeInt64Card(b *testing.B) { 150 | nums := []int64{1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2} 151 | buf := rle.EncodeInt64(nums) 152 | for i := 0; i < b.N; i++ { 153 | rle.DecodeInt64Card(buf) 154 | } 155 | } 156 | --------------------------------------------------------------------------------