├── .gitignore ├── README ├── eval ├── eval-results.png ├── helper.go └── main.go ├── fuzz.go ├── testdata └── data.go ├── bstream.go ├── tsz_test.go └── tsz.go /.gitignore: -------------------------------------------------------------------------------- 1 | eval/eval 2 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | godoc: https://godoc.org/github.com/dgryski/go-tsz 2 | -------------------------------------------------------------------------------- /eval/eval-results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UlricQin/go-tsz/master/eval/eval-results.png -------------------------------------------------------------------------------- /eval/helper.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "math" 5 | ) 6 | 7 | func Round(f float64) float64 { 8 | if f < 0 { 9 | return math.Ceil(f - 0.5) 10 | } 11 | return math.Floor(f + .5) 12 | } 13 | 14 | func RoundNum(f float64, places int) float64 { 15 | shift := math.Pow(10, float64(places)) 16 | return Round(f*shift) / shift 17 | } 18 | -------------------------------------------------------------------------------- /fuzz.go: -------------------------------------------------------------------------------- 1 | // +build gofuzz 2 | 3 | package tsz 4 | 5 | import ( 6 | "encoding/binary" 7 | "fmt" 8 | "math" 9 | 10 | "github.com/dgryski/go-tsz/testdata" 11 | ) 12 | 13 | func Fuzz(data []byte) int { 14 | 15 | fuzzUnpack(data) 16 | 17 | if len(data) < 9 { 18 | return 0 19 | } 20 | 21 | t0 := uint32(1456236677) 22 | 23 | v := float64(10000) 24 | 25 | var vals []testdata.Point 26 | s := New(t0) 27 | t := t0 28 | for len(data) >= 10 { 29 | tdelta := uint32(binary.LittleEndian.Uint16(data)) 30 | if t == t0 { 31 | tdelta &= (1 << 14) - 1 32 | } 33 | t += tdelta 34 | data = data[2:] 35 | v += float64(int16(binary.LittleEndian.Uint16(data))) + float64(binary.LittleEndian.Uint16(data[2:]))/float64(math.MaxUint16) 36 | data = data[8:] 37 | vals = append(vals, testdata.Point{v, t}) 38 | s.Push(t, v) 39 | } 40 | 41 | it := s.Iter() 42 | 43 | var i int 44 | for it.Next() { 45 | gt, gv := it.Values() 46 | if gt != vals[i].T || (gv != vals[i].V || math.IsNaN(gv) && math.IsNaN(vals[i].V)) { 47 | panic(fmt.Sprintf("failure: gt=%v vals[i].T=%v gv=%v vals[i].V=%v", gt, vals[i].T, gv, vals[i].V)) 48 | } 49 | i++ 50 | } 51 | 52 | if i != len(vals) { 53 | panic("extra data") 54 | } 55 | 56 | return 1 57 | } 58 | 59 | func fuzzUnpack(data []byte) { 60 | 61 | it, err := NewIterator(data) 62 | if err != nil { 63 | return 64 | } 65 | 66 | for it.Next() { 67 | _, _ = it.Values() 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /testdata/data.go: -------------------------------------------------------------------------------- 1 | package testdata 2 | 3 | type Point struct { 4 | V float64 5 | T uint32 6 | } 7 | 8 | // 120 points every 60s 9 | var TwoHoursData = []Point{ 10 | {761, 1440583200}, {727, 1440583260}, {765, 1440583320}, {706, 1440583380}, {700, 1440583440}, 11 | {679, 1440583500}, {757, 1440583560}, {708, 1440583620}, {739, 1440583680}, {707, 1440583740}, 12 | {699, 1440583800}, {740, 1440583860}, {729, 1440583920}, {766, 1440583980}, {730, 1440584040}, 13 | {715, 1440584100}, {705, 1440584160}, {693, 1440584220}, {765, 1440584280}, {724, 1440584340}, 14 | {799, 1440584400}, {761, 1440584460}, {737, 1440584520}, {766, 1440584580}, {756, 1440584640}, 15 | {719, 1440584700}, {722, 1440584760}, {801, 1440584820}, {747, 1440584880}, {731, 1440584940}, 16 | {742, 1440585000}, {744, 1440585060}, {791, 1440585120}, {750, 1440585180}, {759, 1440585240}, 17 | {809, 1440585300}, {751, 1440585360}, {705, 1440585420}, {770, 1440585480}, {792, 1440585540}, 18 | {727, 1440585600}, {762, 1440585660}, {772, 1440585720}, {721, 1440585780}, {748, 1440585840}, 19 | {753, 1440585900}, {744, 1440585960}, {716, 1440586020}, {776, 1440586080}, {659, 1440586140}, 20 | {789, 1440586200}, {766, 1440586260}, {758, 1440586320}, {690, 1440586380}, {795, 1440586440}, 21 | {770, 1440586500}, {758, 1440586560}, {723, 1440586620}, {767, 1440586680}, {765, 1440586740}, 22 | {693, 1440586800}, {706, 1440586860}, {681, 1440586920}, {727, 1440586980}, {724, 1440587040}, 23 | {780, 1440587100}, {678, 1440587160}, {696, 1440587220}, {758, 1440587280}, {740, 1440587340}, 24 | {735, 1440587400}, {700, 1440587460}, {742, 1440587520}, {747, 1440587580}, {752, 1440587640}, 25 | {734, 1440587700}, {743, 1440587760}, {732, 1440587820}, {746, 1440587880}, {770, 1440587940}, 26 | {780, 1440588000}, {710, 1440588060}, {731, 1440588120}, {712, 1440588180}, {712, 1440588240}, 27 | {741, 1440588300}, {770, 1440588360}, {770, 1440588420}, {754, 1440588480}, {718, 1440588540}, 28 | {670, 1440588600}, {775, 1440588660}, {749, 1440588720}, {795, 1440588780}, {756, 1440588840}, 29 | {741, 1440588900}, {787, 1440588960}, {721, 1440589020}, {745, 1440589080}, {782, 1440589140}, 30 | {765, 1440589200}, {780, 1440589260}, {811, 1440589320}, {790, 1440589380}, {836, 1440589440}, 31 | {743, 1440589500}, {858, 1440589560}, {739, 1440589620}, {762, 1440589680}, {770, 1440589740}, 32 | {752, 1440589800}, {763, 1440589860}, {795, 1440589920}, {792, 1440589980}, {746, 1440590040}, 33 | {786, 1440590100}, {785, 1440590160}, {774, 1440590220}, {786, 1440590280}, {718, 1440590340}, 34 | } 35 | -------------------------------------------------------------------------------- /bstream.go: -------------------------------------------------------------------------------- 1 | package tsz 2 | 3 | import ( 4 | "io" 5 | ) 6 | 7 | // bstream is a stream of bits 8 | type bstream struct { 9 | // the data stream 10 | stream []byte 11 | 12 | // how many bits are valid in current byte 13 | count uint8 14 | } 15 | 16 | func newBReader(b []byte) *bstream { 17 | return &bstream{stream: b, count: 8} 18 | } 19 | 20 | func newBWriter(size int) *bstream { 21 | return &bstream{stream: make([]byte, 0, size), count: 0} 22 | } 23 | 24 | func (b *bstream) clone() *bstream { 25 | d := make([]byte, len(b.stream)) 26 | copy(d, b.stream) 27 | return &bstream{stream: d, count: b.count} 28 | } 29 | 30 | func (b *bstream) bytes() []byte { 31 | return b.stream 32 | } 33 | 34 | type bit bool 35 | 36 | const ( 37 | zero bit = false 38 | one bit = true 39 | ) 40 | 41 | func (b *bstream) writeBit(bit bit) { 42 | 43 | if b.count == 0 { 44 | b.stream = append(b.stream, 0) 45 | b.count = 8 46 | } 47 | 48 | i := len(b.stream) - 1 49 | 50 | if bit { 51 | b.stream[i] |= 1 << (b.count - 1) 52 | } 53 | 54 | b.count-- 55 | } 56 | 57 | func (b *bstream) writeByte(byt byte) { 58 | 59 | if b.count == 0 { 60 | b.stream = append(b.stream, 0) 61 | b.count = 8 62 | } 63 | 64 | i := len(b.stream) - 1 65 | 66 | // fill up b.b with b.count bits from byt 67 | b.stream[i] |= byt >> (8 - b.count) 68 | 69 | b.stream = append(b.stream, 0) 70 | i++ 71 | b.stream[i] = byt << b.count 72 | } 73 | 74 | func (b *bstream) writeBits(u uint64, nbits int) { 75 | u <<= (64 - uint(nbits)) 76 | for nbits >= 8 { 77 | byt := byte(u >> 56) 78 | b.writeByte(byt) 79 | u <<= 8 80 | nbits -= 8 81 | } 82 | 83 | for nbits > 0 { 84 | b.writeBit((u >> 63) == 1) 85 | u <<= 1 86 | nbits-- 87 | } 88 | } 89 | 90 | func (b *bstream) readBit() (bit, error) { 91 | 92 | if len(b.stream) == 0 { 93 | return false, io.EOF 94 | } 95 | 96 | if b.count == 0 { 97 | b.stream = b.stream[1:] 98 | // did we just run out of stuff to read? 99 | if len(b.stream) == 0 { 100 | return false, io.EOF 101 | } 102 | b.count = 8 103 | } 104 | 105 | b.count-- 106 | d := b.stream[0] & 0x80 107 | b.stream[0] <<= 1 108 | return d != 0, nil 109 | } 110 | 111 | func (b *bstream) readByte() (byte, error) { 112 | 113 | if len(b.stream) == 0 { 114 | return 0, io.EOF 115 | } 116 | 117 | if b.count == 0 { 118 | b.stream = b.stream[1:] 119 | 120 | if len(b.stream) == 0 { 121 | return 0, io.EOF 122 | } 123 | 124 | b.count = 8 125 | } 126 | 127 | if b.count == 8 { 128 | b.count = 0 129 | return b.stream[0], nil 130 | } 131 | 132 | byt := b.stream[0] 133 | b.stream = b.stream[1:] 134 | 135 | if len(b.stream) == 0 { 136 | return 0, io.EOF 137 | } 138 | 139 | byt |= b.stream[0] >> b.count 140 | b.stream[0] <<= (8 - b.count) 141 | 142 | return byt, nil 143 | } 144 | 145 | func (b *bstream) readBits(nbits int) (uint64, error) { 146 | 147 | var u uint64 148 | 149 | for nbits >= 8 { 150 | byt, err := b.readByte() 151 | if err != nil { 152 | return 0, err 153 | } 154 | 155 | u = (u << 8) | uint64(byt) 156 | nbits -= 8 157 | } 158 | 159 | var err error 160 | for nbits > 0 && err != io.EOF { 161 | byt, err := b.readBit() 162 | if err != nil { 163 | return 0, err 164 | } 165 | u <<= 1 166 | if byt { 167 | u |= 1 168 | } 169 | nbits-- 170 | } 171 | 172 | return u, nil 173 | } 174 | -------------------------------------------------------------------------------- /tsz_test.go: -------------------------------------------------------------------------------- 1 | package tsz 2 | 3 | import ( 4 | "github.com/dgryski/go-tsz/testdata" 5 | "testing" 6 | "time" 7 | ) 8 | 9 | func TestExampleEncoding(t *testing.T) { 10 | 11 | // Example from the paper 12 | t0, _ := time.ParseInLocation("Jan _2 2006 15:04:05", "Mar 24 2015 02:00:00", time.Local) 13 | tunix := uint32(t0.Unix()) 14 | 15 | s := New(tunix) 16 | 17 | tunix += 62 18 | s.Push(tunix, 12) 19 | 20 | tunix += 60 21 | s.Push(tunix, 12) 22 | 23 | tunix += 60 24 | s.Push(tunix, 24) 25 | 26 | // extra tests 27 | 28 | // floating point masking/shifting bug 29 | tunix += 60 30 | s.Push(tunix, 13) 31 | 32 | tunix += 60 33 | s.Push(tunix, 24) 34 | 35 | // delta-of-delta sizes 36 | tunix += 300 // == delta-of-delta of 240 37 | s.Push(tunix, 24) 38 | 39 | tunix += 900 // == delta-of-delta of 600 40 | s.Push(tunix, 24) 41 | 42 | tunix += 900 + 2050 // == delta-of-delta of 600 43 | s.Push(tunix, 24) 44 | 45 | it := s.Iter() 46 | 47 | tunix = uint32(t0.Unix()) 48 | want := []struct { 49 | t uint32 50 | v float64 51 | }{ 52 | {tunix + 62, 12}, 53 | {tunix + 122, 12}, 54 | {tunix + 182, 24}, 55 | 56 | {tunix + 242, 13}, 57 | {tunix + 302, 24}, 58 | 59 | {tunix + 602, 24}, 60 | {tunix + 1502, 24}, 61 | {tunix + 4452, 24}, 62 | } 63 | 64 | for _, w := range want { 65 | if !it.Next() { 66 | t.Fatalf("Next()=false, want true") 67 | } 68 | tt, vv := it.Values() 69 | if w.t != tt || w.v != vv { 70 | t.Errorf("Values()=(%v,%v), want (%v,%v)\n", tt, vv, w.t, w.v) 71 | } 72 | } 73 | 74 | if it.Next() { 75 | t.Fatalf("Next()=true, want false") 76 | } 77 | 78 | if err := it.Err(); err != nil { 79 | t.Errorf("it.Err()=%v, want nil", err) 80 | } 81 | } 82 | 83 | func TestRoundtrip(t *testing.T) { 84 | 85 | s := New(testdata.TwoHoursData[0].T) 86 | for _, p := range testdata.TwoHoursData { 87 | s.Push(p.T, p.V) 88 | } 89 | 90 | it := s.Iter() 91 | for _, w := range testdata.TwoHoursData { 92 | if !it.Next() { 93 | t.Fatalf("Next()=false, want true") 94 | } 95 | tt, vv := it.Values() 96 | // t.Logf("it.Values()=(%+v, %+v)\n", time.Unix(int64(tt), 0), vv) 97 | if w.T != tt || w.V != vv { 98 | t.Errorf("Values()=(%v,%v), want (%v,%v)\n", tt, vv, w.T, w.V) 99 | } 100 | } 101 | 102 | if it.Next() { 103 | t.Fatalf("Next()=true, want false") 104 | } 105 | 106 | if err := it.Err(); err != nil { 107 | t.Errorf("it.Err()=%v, want nil", err) 108 | } 109 | } 110 | 111 | func TestConcurrentRoundtripImmediateWrites(t *testing.T) { 112 | testConcurrentRoundtrip(t, time.Duration(0)) 113 | } 114 | func TestConcurrentRoundtrip1MsBetweenWrites(t *testing.T) { 115 | testConcurrentRoundtrip(t, time.Millisecond) 116 | } 117 | func TestConcurrentRoundtrip10MsBetweenWrites(t *testing.T) { 118 | testConcurrentRoundtrip(t, 10*time.Millisecond) 119 | } 120 | 121 | // Test reading while writing at the same time. 122 | func testConcurrentRoundtrip(t *testing.T, sleep time.Duration) { 123 | s := New(testdata.TwoHoursData[0].T) 124 | 125 | //notify the reader about the number of points that have been written. 126 | writeNotify := make(chan int) 127 | 128 | // notify the reader when we have finished. 129 | done := make(chan struct{}) 130 | 131 | // continuously iterate over the values of the series. 132 | // when a write is made, the total number of points in the series 133 | // will be sent over the channel, so we can make sure we are reading 134 | // the correct amount of values. 135 | go func(numPoints chan int, finished chan struct{}) { 136 | written := 0 137 | for { 138 | select { 139 | case written = <-numPoints: 140 | default: 141 | read := 0 142 | it := s.Iter() 143 | // read all of the points in the series. 144 | for it.Next() { 145 | tt, vv := it.Values() 146 | expectedT := testdata.TwoHoursData[read].T 147 | expectedV := testdata.TwoHoursData[read].V 148 | if expectedT != tt || expectedV != vv { 149 | t.Errorf("metric values dont match what was written. (%d, %f) != (%d, %f)\n", tt, vv, expectedT, expectedV) 150 | } 151 | read++ 152 | } 153 | //check that the number of points read matches the number of points 154 | // written to the series. 155 | if read != written && read != written+1 { 156 | // check if a point was written while we were running 157 | select { 158 | case written = <-numPoints: 159 | // a new point was written. 160 | if read != written && read != written+1 { 161 | t.Errorf("expexcted %d values in series, got %d", written, read) 162 | } 163 | default: 164 | t.Errorf("expexcted %d values in series, got %d", written, read) 165 | } 166 | } 167 | } 168 | //check if we have finished writing points. 169 | select { 170 | case <-finished: 171 | return 172 | default: 173 | } 174 | } 175 | }(writeNotify, done) 176 | 177 | // write points to the series. 178 | for i := 0; i < 100; i++ { 179 | s.Push(testdata.TwoHoursData[i].T, testdata.TwoHoursData[i].V) 180 | writeNotify <- i + 1 181 | time.Sleep(sleep) 182 | } 183 | done <- struct{}{} 184 | } 185 | 186 | func BenchmarkEncode(b *testing.B) { 187 | b.SetBytes(int64(len(testdata.TwoHoursData) * 12)) 188 | for i := 0; i < b.N; i++ { 189 | s := New(testdata.TwoHoursData[0].T) 190 | for _, tt := range testdata.TwoHoursData { 191 | s.Push(tt.T, tt.V) 192 | } 193 | } 194 | } 195 | 196 | func BenchmarkDecodeSeries(b *testing.B) { 197 | b.SetBytes(int64(len(testdata.TwoHoursData) * 12)) 198 | s := New(testdata.TwoHoursData[0].T) 199 | for _, tt := range testdata.TwoHoursData { 200 | s.Push(tt.T, tt.V) 201 | } 202 | 203 | b.ResetTimer() 204 | 205 | for i := 0; i < b.N; i++ { 206 | it := s.Iter() 207 | var j int 208 | for it.Next() { 209 | j++ 210 | } 211 | } 212 | } 213 | 214 | func BenchmarkDecodeByteSlice(b *testing.B) { 215 | b.SetBytes(int64(len(testdata.TwoHoursData) * 12)) 216 | s := New(testdata.TwoHoursData[0].T) 217 | for _, tt := range testdata.TwoHoursData { 218 | s.Push(tt.T, tt.V) 219 | } 220 | 221 | s.Finish() 222 | bytes := s.Bytes() 223 | buf := make([]byte, len(bytes)) 224 | b.ResetTimer() 225 | 226 | for i := 0; i < b.N; i++ { 227 | copy(buf, bytes) 228 | it, _ := NewIterator(buf) 229 | var j int 230 | for it.Next() { 231 | j++ 232 | } 233 | } 234 | } 235 | 236 | func TestEncodeSimilarFloats(t *testing.T) { 237 | tunix := uint32(time.Unix(0, 0).Unix()) 238 | s := New(tunix) 239 | want := []struct { 240 | t uint32 241 | v float64 242 | }{ 243 | {tunix, 6.00065e+06}, 244 | {tunix + 1, 6.000656e+06}, 245 | {tunix + 2, 6.000657e+06}, 246 | {tunix + 3, 6.000659e+06}, 247 | {tunix + 4, 6.000661e+06}, 248 | } 249 | 250 | for _, v := range want { 251 | s.Push(v.t, v.v) 252 | } 253 | 254 | s.Finish() 255 | 256 | it := s.Iter() 257 | 258 | for _, w := range want { 259 | if !it.Next() { 260 | t.Fatalf("Next()=false, want true") 261 | } 262 | tt, vv := it.Values() 263 | if w.t != tt || w.v != vv { 264 | t.Errorf("Values()=(%v,%v), want (%v,%v)\n", tt, vv, w.v, w.v) 265 | } 266 | } 267 | 268 | if it.Next() { 269 | t.Fatalf("Next()=true, want false") 270 | } 271 | 272 | if err := it.Err(); err != nil { 273 | t.Errorf("it.Err()=%v, want nil", err) 274 | } 275 | } 276 | -------------------------------------------------------------------------------- /tsz.go: -------------------------------------------------------------------------------- 1 | // Package tsz implement time-series compression 2 | /* 3 | 4 | http://www.vldb.org/pvldb/vol8/p1816-teller.pdf 5 | 6 | */ 7 | package tsz 8 | 9 | import ( 10 | "math" 11 | "sync" 12 | 13 | "github.com/dgryski/go-bits" 14 | ) 15 | 16 | // Series is the basic series primitive 17 | // you can concurrently put values, finish the stream, and create iterators 18 | type Series struct { 19 | sync.Mutex 20 | 21 | // TODO(dgryski): timestamps in the paper are uint64 22 | T0 uint32 23 | t uint32 24 | val float64 25 | 26 | bw bstream 27 | leading uint8 28 | trailing uint8 29 | finished bool 30 | 31 | tDelta uint32 32 | } 33 | 34 | func New(t0 uint32) *Series { 35 | s := Series{ 36 | T0: t0, 37 | leading: ^uint8(0), 38 | } 39 | 40 | // block header 41 | s.bw.writeBits(uint64(t0), 32) 42 | 43 | return &s 44 | 45 | } 46 | 47 | func (s *Series) Bytes() []byte { 48 | s.Lock() 49 | defer s.Unlock() 50 | return s.bw.bytes() 51 | } 52 | 53 | func finish(w *bstream) { 54 | // write an end-of-stream record 55 | w.writeBits(0x0f, 4) 56 | w.writeBits(0xffffffff, 32) 57 | w.writeBit(zero) 58 | } 59 | 60 | func (s *Series) Finish() { 61 | s.Lock() 62 | if !s.finished { 63 | finish(&s.bw) 64 | s.finished = true 65 | } 66 | s.Unlock() 67 | } 68 | 69 | func (s *Series) Push(t uint32, v float64) { 70 | s.Lock() 71 | defer s.Unlock() 72 | 73 | if s.t == 0 { 74 | // first point 75 | s.t = t 76 | s.val = v 77 | s.tDelta = t - s.T0 78 | s.bw.writeBits(uint64(s.tDelta), 14) 79 | s.bw.writeBits(math.Float64bits(v), 64) 80 | return 81 | } 82 | 83 | tDelta := t - s.t 84 | dod := int32(tDelta - s.tDelta) 85 | 86 | switch { 87 | case dod == 0: 88 | s.bw.writeBit(zero) 89 | case -63 <= dod && dod <= 64: 90 | s.bw.writeBits(0x02, 2) // '10' 91 | s.bw.writeBits(uint64(dod), 7) 92 | case -255 <= dod && dod <= 256: 93 | s.bw.writeBits(0x06, 3) // '110' 94 | s.bw.writeBits(uint64(dod), 9) 95 | case -2047 <= dod && dod <= 2048: 96 | s.bw.writeBits(0x0e, 4) // '1110' 97 | s.bw.writeBits(uint64(dod), 12) 98 | default: 99 | s.bw.writeBits(0x0f, 4) // '1111' 100 | s.bw.writeBits(uint64(dod), 32) 101 | } 102 | 103 | vDelta := math.Float64bits(v) ^ math.Float64bits(s.val) 104 | 105 | if vDelta == 0 { 106 | s.bw.writeBit(zero) 107 | } else { 108 | s.bw.writeBit(one) 109 | 110 | leading := uint8(bits.Clz(vDelta)) 111 | trailing := uint8(bits.Ctz(vDelta)) 112 | 113 | // clamp number of leading zeros to avoid overflow when encoding 114 | if leading >= 32 { 115 | leading = 31 116 | } 117 | 118 | // TODO(dgryski): check if it's 'cheaper' to reset the leading/trailing bits instead 119 | if s.leading != ^uint8(0) && leading >= s.leading && trailing >= s.trailing { 120 | s.bw.writeBit(zero) 121 | s.bw.writeBits(vDelta>>s.trailing, 64-int(s.leading)-int(s.trailing)) 122 | } else { 123 | s.leading, s.trailing = leading, trailing 124 | 125 | s.bw.writeBit(one) 126 | s.bw.writeBits(uint64(leading), 5) 127 | 128 | // Note that if leading == trailing == 0, then sigbits == 64. But that value doesn't actually fit into the 6 bits we have. 129 | // Luckily, we never need to encode 0 significant bits, since that would put us in the other case (vdelta == 0). 130 | // So instead we write out a 0 and adjust it back to 64 on unpacking. 131 | sigbits := 64 - leading - trailing 132 | s.bw.writeBits(uint64(sigbits), 6) 133 | s.bw.writeBits(vDelta>>trailing, int(sigbits)) 134 | } 135 | } 136 | 137 | s.tDelta = tDelta 138 | s.t = t 139 | s.val = v 140 | 141 | } 142 | 143 | func (s *Series) Iter() *Iter { 144 | s.Lock() 145 | w := s.bw.clone() 146 | s.Unlock() 147 | 148 | finish(w) 149 | iter, _ := bstreamIterator(w) 150 | return iter 151 | } 152 | 153 | // Iter lets you iterate over a series. It is not concurrency-safe. 154 | type Iter struct { 155 | T0 uint32 156 | 157 | t uint32 158 | val float64 159 | 160 | br bstream 161 | leading uint8 162 | trailing uint8 163 | 164 | finished bool 165 | 166 | tDelta uint32 167 | err error 168 | } 169 | 170 | func bstreamIterator(br *bstream) (*Iter, error) { 171 | 172 | br.count = 8 173 | 174 | t0, err := br.readBits(32) 175 | if err != nil { 176 | return nil, err 177 | } 178 | 179 | return &Iter{ 180 | T0: uint32(t0), 181 | br: *br, 182 | }, nil 183 | } 184 | 185 | func NewIterator(b []byte) (*Iter, error) { 186 | return bstreamIterator(newBReader(b)) 187 | } 188 | 189 | func (it *Iter) Next() bool { 190 | 191 | if it.err != nil || it.finished { 192 | return false 193 | } 194 | 195 | if it.t == 0 { 196 | // read first t and v 197 | tDelta, err := it.br.readBits(14) 198 | if err != nil { 199 | it.err = err 200 | return false 201 | } 202 | it.tDelta = uint32(tDelta) 203 | it.t = it.T0 + it.tDelta 204 | v, err := it.br.readBits(64) 205 | if err != nil { 206 | it.err = err 207 | return false 208 | } 209 | 210 | it.val = math.Float64frombits(v) 211 | 212 | return true 213 | } 214 | 215 | // read delta-of-delta 216 | var d byte 217 | for i := 0; i < 4; i++ { 218 | d <<= 1 219 | bit, err := it.br.readBit() 220 | if err != nil { 221 | it.err = err 222 | return false 223 | } 224 | if bit == zero { 225 | break 226 | } 227 | d |= 1 228 | } 229 | 230 | var dod int32 231 | var sz uint 232 | switch d { 233 | case 0x00: 234 | // dod == 0 235 | case 0x02: 236 | sz = 7 237 | case 0x06: 238 | sz = 9 239 | case 0x0e: 240 | sz = 12 241 | case 0x0f: 242 | bits, err := it.br.readBits(32) 243 | if err != nil { 244 | it.err = err 245 | return false 246 | } 247 | 248 | // end of stream 249 | if bits == 0xffffffff { 250 | it.finished = true 251 | return false 252 | } 253 | 254 | dod = int32(bits) 255 | } 256 | 257 | if sz != 0 { 258 | bits, err := it.br.readBits(int(sz)) 259 | if err != nil { 260 | it.err = err 261 | return false 262 | } 263 | if bits > (1 << (sz - 1)) { 264 | // or something 265 | bits = bits - (1 << sz) 266 | } 267 | dod = int32(bits) 268 | } 269 | 270 | tDelta := it.tDelta + uint32(dod) 271 | 272 | it.tDelta = tDelta 273 | it.t = it.t + it.tDelta 274 | 275 | // read compressed value 276 | bit, err := it.br.readBit() 277 | if err != nil { 278 | it.err = err 279 | return false 280 | } 281 | 282 | if bit == zero { 283 | // it.val = it.val 284 | } else { 285 | bit, err := it.br.readBit() 286 | if err != nil { 287 | it.err = err 288 | return false 289 | } 290 | if bit == zero { 291 | // reuse leading/trailing zero bits 292 | // it.leading, it.trailing = it.leading, it.trailing 293 | } else { 294 | bits, err := it.br.readBits(5) 295 | if err != nil { 296 | it.err = err 297 | return false 298 | } 299 | it.leading = uint8(bits) 300 | 301 | bits, err = it.br.readBits(6) 302 | if err != nil { 303 | it.err = err 304 | return false 305 | } 306 | mbits := uint8(bits) 307 | // 0 significant bits here means we overflowed and we actually need 64; see comment in encoder 308 | if mbits == 0 { 309 | mbits = 64 310 | } 311 | it.trailing = 64 - it.leading - mbits 312 | } 313 | 314 | mbits := int(64 - it.leading - it.trailing) 315 | bits, err := it.br.readBits(mbits) 316 | if err != nil { 317 | it.err = err 318 | return false 319 | } 320 | vbits := math.Float64bits(it.val) 321 | vbits ^= (bits << it.trailing) 322 | it.val = math.Float64frombits(vbits) 323 | } 324 | 325 | return true 326 | } 327 | 328 | func (it *Iter) Values() (uint32, float64) { 329 | return it.t, it.val 330 | } 331 | 332 | func (it *Iter) Err() error { 333 | return it.err 334 | } 335 | -------------------------------------------------------------------------------- /eval/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "github.com/dgryski/go-tsz" 6 | "github.com/dgryski/go-tsz/testdata" 7 | "math" 8 | "math/rand" 9 | "os" 10 | "text/tabwriter" 11 | ) 12 | 13 | // collection of 24h worth of minutely points, with different characteristics. 14 | var ConstantZero = make([]testdata.Point, 60*24) 15 | var ConstantOne = make([]testdata.Point, 60*24) 16 | var ConstantPos3f = make([]testdata.Point, 60*24) 17 | var ConstantNeg3f = make([]testdata.Point, 60*24) 18 | var ConstantPos0f = make([]testdata.Point, 60*24) 19 | var ConstantNeg0f = make([]testdata.Point, 60*24) 20 | var ConstantNearMaxf = make([]testdata.Point, 60*24) 21 | var ConstantNearMinf = make([]testdata.Point, 60*24) 22 | var ConstantNearMax0f = make([]testdata.Point, 60*24) 23 | var ConstantNearMin0f = make([]testdata.Point, 60*24) 24 | var Batch100ZeroOne = make([]testdata.Point, 60*24) 25 | var FlappingZeroOne = make([]testdata.Point, 60*24) 26 | 27 | var RandomTinyPosf = make([]testdata.Point, 60*24) 28 | var RandomTinyf = make([]testdata.Point, 60*24) 29 | var RandomTinyPos2f = make([]testdata.Point, 60*24) 30 | var RandomTiny2f = make([]testdata.Point, 60*24) 31 | var RandomTinyPos1f = make([]testdata.Point, 60*24) 32 | var RandomTiny1f = make([]testdata.Point, 60*24) 33 | var RandomTinyPos0f = make([]testdata.Point, 60*24) 34 | var RandomTiny0f = make([]testdata.Point, 60*24) 35 | 36 | var RandomSmallPosf = make([]testdata.Point, 60*24) 37 | var RandomSmallf = make([]testdata.Point, 60*24) 38 | var RandomSmallPos2f = make([]testdata.Point, 60*24) 39 | var RandomSmall2f = make([]testdata.Point, 60*24) 40 | var RandomSmallPos1f = make([]testdata.Point, 60*24) 41 | var RandomSmall1f = make([]testdata.Point, 60*24) 42 | var RandomSmallPos0f = make([]testdata.Point, 60*24) 43 | var RandomSmall0f = make([]testdata.Point, 60*24) 44 | 45 | var Random60kPosf = make([]testdata.Point, 60*24) 46 | var Random60kf = make([]testdata.Point, 60*24) 47 | var Random60kPos2f = make([]testdata.Point, 60*24) 48 | var Random60k2f = make([]testdata.Point, 60*24) 49 | var Random60kPos1f = make([]testdata.Point, 60*24) 50 | var Random60k1f = make([]testdata.Point, 60*24) 51 | var Random60kPos0f = make([]testdata.Point, 60*24) 52 | var Random60k0f = make([]testdata.Point, 60*24) 53 | 54 | var SmallTestDataPosf = make([]testdata.Point, 60*24) 55 | var SmallTestDataf = make([]testdata.Point, 60*24) 56 | var SmallTestDataPos0f = make([]testdata.Point, 60*24) 57 | var SmallTestData0f = make([]testdata.Point, 60*24) 58 | 59 | var RandomLargePosf = make([]testdata.Point, 60*24) 60 | var RandomLargef = make([]testdata.Point, 60*24) 61 | var RandomLargePos0f = make([]testdata.Point, 60*24) 62 | var RandomLarge0f = make([]testdata.Point, 60*24) 63 | var LargeTestDataPosf = make([]testdata.Point, 60*24) 64 | var LargeTestDataPos0f = make([]testdata.Point, 60*24) 65 | var LargeTestDataf = make([]testdata.Point, 60*24) 66 | var LargeTestData0f = make([]testdata.Point, 60*24) 67 | 68 | func main() { 69 | for i := 0; i < 60*24; i++ { 70 | ts := uint32(i * 60) 71 | ConstantZero[i] = testdata.Point{float64(0), ts} 72 | ConstantOne[i] = testdata.Point{float64(1), ts} 73 | ConstantPos3f[i] = testdata.Point{float64(1234.567), ts} 74 | ConstantNeg3f[i] = testdata.Point{float64(-1234.567), ts} 75 | ConstantPos0f[i] = testdata.Point{float64(1234), ts} 76 | ConstantNeg0f[i] = testdata.Point{float64(-1235), ts} 77 | ConstantNearMaxf[i] = testdata.Point{math.MaxFloat64 / 100, ts} 78 | ConstantNearMinf[i] = testdata.Point{-math.MaxFloat64 / 100, ts} 79 | ConstantNearMax0f[i] = testdata.Point{math.Floor(ConstantNearMaxf[i].V), ts} 80 | ConstantNearMin0f[i] = testdata.Point{math.Floor(ConstantNearMinf[i].V), ts} 81 | if i%200 < 100 { 82 | Batch100ZeroOne[i] = testdata.Point{float64(0), ts} 83 | } else { 84 | Batch100ZeroOne[i] = testdata.Point{float64(1), ts} 85 | } 86 | if i%2 == 0 { 87 | FlappingZeroOne[i] = testdata.Point{float64(0), ts} 88 | } else { 89 | FlappingZeroOne[i] = testdata.Point{float64(1), ts} 90 | } 91 | 92 | RandomTinyPosf[i] = testdata.Point{rand.ExpFloat64(), ts} // 0-inf, but most vals are very low, mostly between 0 and 2, rarely goes over 10 93 | RandomTinyf[i] = testdata.Point{rand.NormFloat64(), ts} // -inf to + inf, as many pos as neg, but similar as above, rarely goes under -10 or over +10 94 | RandomTinyPos2f[i] = testdata.Point{RoundNum(RandomTinyPosf[i].V, 2), ts} 95 | RandomTiny2f[i] = testdata.Point{RoundNum(RandomTinyf[i].V, 2), ts} 96 | RandomTinyPos1f[i] = testdata.Point{RoundNum(RandomTinyPosf[i].V, 1), ts} 97 | RandomTiny1f[i] = testdata.Point{RoundNum(RandomTinyf[i].V, 1), ts} 98 | RandomTinyPos0f[i] = testdata.Point{math.Floor(RandomTinyPosf[i].V), ts} 99 | RandomTiny0f[i] = testdata.Point{math.Floor(RandomTinyf[i].V), ts} 100 | 101 | RandomSmallPosf[i] = testdata.Point{RandomTinyPosf[i].V * 100, ts} // 0-inf, but most vals are very low, mostly between 0 and 200, rarely goes over 1000 102 | RandomSmallf[i] = testdata.Point{RandomTinyf[i].V * 100, ts} // -inf to + inf, as many pos as neg, but similar as above, rarely goes under -1000 or over +1000 103 | RandomSmallPos2f[i] = testdata.Point{RoundNum(RandomSmallPosf[i].V, 2), ts} 104 | RandomSmall2f[i] = testdata.Point{RoundNum(RandomSmallf[i].V, 2), ts} 105 | RandomSmallPos1f[i] = testdata.Point{RoundNum(RandomSmallPosf[i].V, 1), ts} 106 | RandomSmall1f[i] = testdata.Point{RoundNum(RandomSmallf[i].V, 1), ts} 107 | RandomSmallPos0f[i] = testdata.Point{math.Floor(RandomSmallPosf[i].V), ts} 108 | RandomSmall0f[i] = testdata.Point{math.Floor(RandomSmallf[i].V), ts} 109 | 110 | Random60kPosf[i] = testdata.Point{rand.Float64() * 60000, ts} 111 | Random60kf[i] = testdata.Point{Random60kPosf[i].V, ts} 112 | if rand.Int()%2 == 0 { 113 | Random60kf[i].V *= -1.0 114 | } 115 | Random60kPos2f[i] = testdata.Point{RoundNum(Random60kPosf[i].V, 2), ts} 116 | Random60k2f[i] = testdata.Point{RoundNum(Random60kf[i].V, 2), ts} 117 | Random60kPos1f[i] = testdata.Point{RoundNum(Random60kPosf[i].V, 1), ts} 118 | Random60k1f[i] = testdata.Point{RoundNum(Random60kf[i].V, 1), ts} 119 | Random60kPos0f[i] = testdata.Point{math.Floor(Random60kPosf[i].V), ts} 120 | Random60k0f[i] = testdata.Point{math.Floor(Random60kf[i].V), ts} 121 | 122 | SmallTestDataPosf[i] = testdata.Point{float64(testdata.TwoHoursData[i%120].V) * 1.234567, ts} // THD is 650-680, so this is 0-150 123 | if rand.Int()%2 == 0 { 124 | SmallTestDataf[i] = testdata.Point{SmallTestDataPosf[i].V, ts} // -150 - 150 125 | } else { 126 | SmallTestDataf[i] = testdata.Point{-1 * SmallTestDataPosf[i].V, ts} 127 | } 128 | SmallTestDataPos0f[i] = testdata.Point{math.Floor(SmallTestDataPosf[i].V), ts} // 0-150 129 | SmallTestData0f[i] = testdata.Point{math.Floor(SmallTestDataf[i].V), ts} // -150 - 150 130 | 131 | RandomLargePosf[i] = testdata.Point{rand.ExpFloat64() * 0.0001 * math.MaxFloat64, ts} // 0-inf, rarely goes over maxfloat/1000 132 | RandomLargef[i] = testdata.Point{rand.NormFloat64() * 0.0001 * math.MaxFloat64, ts} // same buth also negative 133 | RandomLargePos0f[i] = testdata.Point{math.Floor(RandomLargePosf[i].V), ts} 134 | RandomLarge0f[i] = testdata.Point{math.Floor(RandomLargef[i].V), ts} 135 | 136 | LargeTestDataPosf[i] = testdata.Point{float64(testdata.TwoHoursData[i%120].V) * 0.00001234567 * math.MaxFloat64, ts} // 0-maxfloat/1000 137 | if rand.Int()%2 == 0 { 138 | LargeTestDataf[i] = testdata.Point{LargeTestDataPosf[i].V, ts} // -maxfloat/1000 ~maxfloat/1000 139 | } else { 140 | LargeTestDataf[i] = testdata.Point{-1 * LargeTestDataPosf[i].V, ts} 141 | } 142 | 143 | LargeTestDataPos0f[i] = testdata.Point{math.Floor(LargeTestDataPosf[i].V), ts} // 0-maxfloat/1000 144 | LargeTestData0f[i] = testdata.Point{math.Floor(LargeTestDataf[i].V), ts} // -mf/1000 ~ mx/1000 145 | } 146 | 147 | intervals := []int{10, 30, 60, 120, 360, 720, 1440} 148 | do := func(data []testdata.Point, comment string) string { 149 | str := "" 150 | for _, points := range intervals { 151 | s := tsz.New(data[0].T) 152 | for _, tt := range data[0:points] { 153 | s.Push(tt.T, tt.V) 154 | } 155 | size := len(s.Bytes()) 156 | BPerPoint := float64(size) / float64(points) 157 | str += fmt.Sprintf("\033[31m%d\033[39m\t%.2f\t", size, BPerPoint) 158 | } 159 | str += comment + "\t" 160 | return str 161 | } 162 | w := new(tabwriter.Writer) 163 | w.Init(os.Stdout, 5, 0, 1, ' ', tabwriter.AlignRight) 164 | fmt.Println("=== help ===") 165 | fmt.Println("CS = chunk size in Bytes") 166 | fmt.Println("BPP = Bytes per point (CS/num-points)") 167 | fmt.Println("d = integers stored as float64") 168 | fmt.Println("f = float64's with a bunch of decimal numbers") 169 | fmt.Println(".Xf = float64's with X decimal numbers") 170 | fmt.Println("[num1] a - b [num2]: a range between a and b with the occasional outliers up to num1 and num2") 171 | fmt.Println("=== data ===") 172 | str := "test" 173 | for _, points := range intervals { 174 | str += fmt.Sprintf("\t \033[39m%dCS\033[39m\t%dBPP", points, points) 175 | } 176 | cmtTinyPos := "0 ~ 10 [inf]" 177 | cmtTinyPosNeg := "[-inf] -10 ~ 10 [inf]" 178 | cmtSmallPos := "0 ~ 1000 [inf]" 179 | cmtSmallPosNeg := "[-inf] -1000 ~ 1000 [inf]" 180 | cmt60kPos := "0 ~60k" 181 | cmt60kPosNeg := "-60k ~ 60k" 182 | cmtSmallTestPos := "0~150" 183 | cmtSmallTestPosNeg := "-150~150" 184 | cmtRandomLargePos := "0 ~ MaxFloat64/1000 [inf]" 185 | cmtRandomLargePosNeg := "[-inf] -MaxFloat64/1000 ~ MaxFloat64/1000 [inf]" 186 | cmtLargeTestPos := "0 ~ MaxFloat64/1000" 187 | cmtLargeTestPosNeg := "-MaxFloat64/1000 ~ MaxFloat64/1000" 188 | fmt.Fprintln(w, str+"\tcomment\t") 189 | fmt.Fprintln(w, "constant zero d\t"+do(ConstantZero, "")) 190 | fmt.Fprintln(w, "constant one d\t"+do(ConstantOne, "")) 191 | fmt.Fprintln(w, "constant pos .3f\t"+do(ConstantPos3f, "")) 192 | fmt.Fprintln(w, "constant neg .3f\t"+do(ConstantNeg3f, "")) 193 | fmt.Fprintln(w, "constant pos .0f\t"+do(ConstantPos0f, "")) 194 | fmt.Fprintln(w, "constant neg .0f\t"+do(ConstantNeg0f, "")) 195 | fmt.Fprintln(w, "constant nearmax f\t"+do(ConstantNearMaxf, "")) 196 | fmt.Fprintln(w, "constant nearmin f\t"+do(ConstantNearMinf, "")) 197 | fmt.Fprintln(w, "constant nearmax .0f\t"+do(ConstantNearMax0f, "")) 198 | fmt.Fprintln(w, "constant nearmin .0f\t"+do(ConstantNearMin0f, "")) 199 | fmt.Fprintln(w, "batch100 zero/one d\t"+do(Batch100ZeroOne, "")) 200 | fmt.Fprintln(w, "flapping zero/one d\t"+do(FlappingZeroOne, "")) 201 | fmt.Fprintln(w, "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t") 202 | fmt.Fprintln(w, "random tiny pos f\t"+do(RandomTinyPosf, cmtTinyPos)) 203 | fmt.Fprintln(w, "random tiny pos/neg f\t"+do(RandomTinyf, cmtTinyPosNeg)) 204 | fmt.Fprintln(w, "random tiny pos .2f\t"+do(RandomTinyPos2f, cmtTinyPos)) 205 | fmt.Fprintln(w, "random tiny pos/neg .2f\t"+do(RandomTiny2f, cmtTinyPosNeg)) 206 | fmt.Fprintln(w, "random tiny pos .1f\t"+do(RandomTinyPos1f, cmtTinyPos)) 207 | fmt.Fprintln(w, "random tiny pos/neg .1f\t"+do(RandomTiny1f, cmtTinyPosNeg)) 208 | fmt.Fprintln(w, "random tiny pos .0f\t"+do(RandomTinyPos0f, cmtTinyPos)) 209 | fmt.Fprintln(w, "random tiny pos/neg .0f\t"+do(RandomTiny0f, cmtTinyPosNeg)) 210 | fmt.Fprintln(w, "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t") 211 | fmt.Fprintln(w, "testdata small pos f\t"+do(SmallTestDataPosf, cmtSmallTestPos)) 212 | fmt.Fprintln(w, "testdata small pos/neg f\t"+do(SmallTestDataf, cmtSmallTestPosNeg)) 213 | fmt.Fprintln(w, "testdata small pos .0f\t"+do(SmallTestDataPos0f, cmtSmallTestPos)) 214 | fmt.Fprintln(w, "testdata small pos/neg .0f\t"+do(SmallTestData0f, cmtSmallTestPosNeg)) 215 | fmt.Fprintln(w, "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t") 216 | fmt.Fprintln(w, "random small pos f\t"+do(RandomSmallPosf, cmtSmallPos)) 217 | fmt.Fprintln(w, "random small pos/neg f\t"+do(RandomSmallf, cmtSmallPosNeg)) 218 | fmt.Fprintln(w, "random small pos .2f\t"+do(RandomSmallPos2f, cmtSmallPos)) 219 | fmt.Fprintln(w, "random small pos/neg .2f\t"+do(RandomSmall2f, cmtSmallPosNeg)) 220 | fmt.Fprintln(w, "random small pos .1f\t"+do(RandomSmallPos1f, cmtSmallPos)) 221 | fmt.Fprintln(w, "random small pos/neg .1f\t"+do(RandomSmall1f, cmtSmallPosNeg)) 222 | fmt.Fprintln(w, "random small pos .0f\t"+do(RandomSmallPos0f, cmtSmallPos)) 223 | fmt.Fprintln(w, "random small pos/neg .0f\t"+do(RandomSmall0f, cmtSmallPosNeg)) 224 | fmt.Fprintln(w, "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t") 225 | fmt.Fprintln(w, "random medium pos f\t"+do(Random60kPosf, cmt60kPos)) 226 | fmt.Fprintln(w, "random medium pos/neg f\t"+do(Random60kf, cmt60kPosNeg)) 227 | fmt.Fprintln(w, "random medium pos .2f\t"+do(Random60kPos2f, cmt60kPos)) 228 | fmt.Fprintln(w, "random medium pos/neg .2f\t"+do(Random60k2f, cmt60kPosNeg)) 229 | fmt.Fprintln(w, "random medium pos .1f\t"+do(Random60kPos1f, cmt60kPos)) 230 | fmt.Fprintln(w, "random medium pos/neg .1f\t"+do(Random60k1f, cmt60kPosNeg)) 231 | fmt.Fprintln(w, "random medium pos .0f\t"+do(Random60kPos0f, cmt60kPos)) 232 | fmt.Fprintln(w, "random medium pos/neg .0f\t"+do(Random60k0f, cmt60kPosNeg)) 233 | fmt.Fprintln(w, "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t") 234 | fmt.Fprintln(w, "testdata large pos f\t"+do(LargeTestDataPosf, cmtLargeTestPos)) 235 | fmt.Fprintln(w, "testdata large pos/neg f\t"+do(LargeTestDataf, cmtLargeTestPosNeg)) 236 | fmt.Fprintln(w, "testdata large pos .0f\t"+do(LargeTestDataPos0f, cmtLargeTestPos)) 237 | fmt.Fprintln(w, "testdata large pos/neg .0f\t"+do(LargeTestData0f, cmtLargeTestPosNeg)) 238 | fmt.Fprintln(w, "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t") 239 | fmt.Fprintln(w, "random large pos f\t"+do(RandomLargePosf, cmtRandomLargePos)) 240 | fmt.Fprintln(w, "random large pos/neg f\t"+do(RandomLargef, cmtRandomLargePosNeg)) 241 | fmt.Fprintln(w, "random large pos .0f\t"+do(RandomLargePos0f, cmtRandomLargePos)) 242 | fmt.Fprintln(w, "random large pos/neg .0f\t"+do(RandomLarge0f, cmtRandomLargePosNeg)) 243 | w.Flush() 244 | } 245 | --------------------------------------------------------------------------------