├── .gitignore ├── go.mod ├── utils.go ├── .circleci └── config.yml ├── demo └── main.go ├── LICENSE ├── stream_example_test.go ├── buffer.go ├── buffer_test.go ├── README.md ├── go.sum ├── sketch.go ├── summary.go ├── summary_test.go └── sketch_test.go /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.exe~ 4 | *.dll 5 | *.so 6 | *.dylib 7 | 8 | # Test binary, build with `go test -c` 9 | *.test 10 | 11 | # Output of the go coverage tool, specifically when used with LiteIDE 12 | *.out 13 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/axiomhq/quantiles 2 | 3 | go 1.12 4 | 5 | require ( 6 | github.com/beorn7/perks v1.0.0 7 | github.com/gogo/protobuf v1.3.2 // indirect 8 | github.com/pkg/errors v0.8.1 // indirect 9 | github.com/stretchr/testify v1.3.0 10 | github.com/stripe/veneur v12.0.0+incompatible 11 | ) 12 | -------------------------------------------------------------------------------- /utils.go: -------------------------------------------------------------------------------- 1 | package quantiles 2 | 3 | func maxInt64(a, b int64) int64 { 4 | if a > b { 5 | return a 6 | } 7 | return b 8 | } 9 | 10 | func maxFloat64(a, b float64) float64 { 11 | if a > b { 12 | return a 13 | } 14 | return b 15 | } 16 | 17 | func minFloat64(a, b float64) float64 { 18 | if a < b { 19 | return a 20 | } 21 | return b 22 | } 23 | -------------------------------------------------------------------------------- /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | jobs: 3 | build: 4 | docker: 5 | - image: circleci/golang:1.12 6 | steps: 7 | - checkout 8 | - run: 9 | name: Run tests 10 | command: | 11 | go get gotest.tools/gotestsum@v0.4.0 12 | mkdir -p test-results/gotestsum 13 | gotestsum --junitfile test-results/gotestsum/results.xml -f short-verbose -- ./... 14 | - run: 15 | name: Run benchmarks 16 | command: go test -bench . 17 | - store_test_results: 18 | path: test-results 19 | -------------------------------------------------------------------------------- /demo/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "time" 6 | 7 | "github.com/axiomhq/quantiles" 8 | "github.com/beorn7/perks/quantile" 9 | "github.com/stripe/veneur/tdigest" 10 | ) 11 | 12 | func bToMb(b uint64) uint64 { 13 | return b / 1024 / 1024 14 | } 15 | 16 | func veneur() { 17 | t := tdigest.NewMerging(20, false) 18 | now := time.Now() 19 | for i := 0.0; i < 1e6; i++ { 20 | t.Add(i, 1.0) 21 | } 22 | fmt.Println("veneur:", time.Since(now)) 23 | } 24 | 25 | func axiom() { 26 | qstream, _ := quantiles.New(0.01, 1000) 27 | now := time.Now() 28 | for i := 0.0; i < 1e6; i++ { 29 | if err := qstream.Push(i, 1.0); err != nil { 30 | panic(err) 31 | } 32 | } 33 | fmt.Println("axiom:", time.Since(now)) 34 | } 35 | 36 | func prom() { 37 | tstream := quantile.NewLowBiased(0.01) 38 | now := time.Now() 39 | for i := 0.0; i < 1e6; i++ { 40 | tstream.Insert(i) 41 | } 42 | fmt.Println("prometheus:", time.Since(now)) 43 | } 44 | 45 | func main() { 46 | veneur() 47 | prom() 48 | axiom() 49 | } 50 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Axiom Inc. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /stream_example_test.go: -------------------------------------------------------------------------------- 1 | package quantiles_test 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/axiomhq/quantiles" 7 | ) 8 | 9 | func Example() { 10 | sketch := quantiles.NewDefault() 11 | for i := 0.0; i < 1e6; i++ { 12 | if err := sketch.Push(i, 1.0); err != nil { 13 | panic(err) 14 | } 15 | } 16 | fmt.Print("ApproximationError:") 17 | fmt.Println(sketch.ApproximationError(1)) 18 | 19 | fmt.Print("Finalize:") 20 | fmt.Println(sketch.Finalize()) 21 | 22 | fmt.Print("GenerateQuantiles(4):") 23 | fmt.Println(sketch.GenerateQuantiles(4)) 24 | 25 | fmt.Print("GenerateQuantiles(10):") 26 | fmt.Println(sketch.GenerateQuantiles(10)) 27 | 28 | sum, err := sketch.FinalSummary() 29 | if err != nil { 30 | panic(err) 31 | } 32 | fmt.Print("GenerateQuantiles(4):") 33 | fmt.Println(sum.GenerateQuantiles(4)) 34 | 35 | // Output: 36 | // ApproximationError:0.006218905472636816 37 | // Finalize: 38 | // GenerateQuantiles(4):[0 249854 499710 749566 999999] 39 | // GenerateQuantiles(10):[0 98302 200702 299006 401406 499710 598014 700414 798718 900094 999999] 40 | // GenerateQuantiles(4):[0 249854 499710 749566 999999] 41 | } 42 | -------------------------------------------------------------------------------- /buffer.go: -------------------------------------------------------------------------------- 1 | package quantiles 2 | 3 | import ( 4 | "fmt" 5 | "sort" 6 | ) 7 | 8 | // byValue implements sort.Interface based on the value field. 9 | type byValue []bufEntry 10 | 11 | func (a byValue) Len() int { return len(a) } 12 | func (a byValue) Less(i, j int) bool { return a[i].value < a[j].value } 13 | func (a byValue) Swap(i, j int) { a[i], a[j] = a[j], a[i] } 14 | 15 | // bufEntry ... 16 | type bufEntry struct { 17 | value float64 18 | weight float64 19 | } 20 | 21 | type buffer struct { 22 | vec byValue 23 | maxSize int64 24 | curSize int64 25 | } 26 | 27 | func newBuffer(blockSize, maxElements int64) (*buffer, error) { 28 | maxSize := blockSize << 1 29 | if maxSize > maxElements { 30 | maxSize = maxElements 31 | } 32 | 33 | if maxSize <= 0 { 34 | return nil, fmt.Errorf("Invalid buffer specification: (%v, %v)", blockSize, maxElements) 35 | } 36 | 37 | return &buffer{ 38 | maxSize: maxSize, 39 | curSize: 0, 40 | vec: make([]bufEntry, maxSize), 41 | }, nil 42 | } 43 | 44 | func (buf *buffer) clone() *buffer { 45 | newBuffer := &buffer{ 46 | maxSize: buf.maxSize, 47 | curSize: buf.curSize, 48 | vec: make([]bufEntry, buf.maxSize), 49 | } 50 | for i, e := range buf.vec { 51 | newBuffer.vec[i] = e 52 | } 53 | return newBuffer 54 | } 55 | 56 | func (buf *buffer) push(value, weight float64) error { 57 | //QCHECK magic 58 | if buf.isFull() { 59 | return fmt.Errorf("Buffer already full: %v", buf.maxSize) 60 | } 61 | 62 | if weight > 0 { 63 | buf.vec[buf.curSize] = bufEntry{value, weight} 64 | buf.curSize++ 65 | } 66 | return nil 67 | } 68 | 69 | // generateEntryList returns a sorted vector view of the base buffer and clears the buffer. 70 | // Callers should minimize how often this is called, ideally only right after 71 | // the buffer becomes full. 72 | func (buf *buffer) generateEntryList() []bufEntry { 73 | sort.Sort(buf.vec[:buf.curSize]) 74 | ret := buf.vec[:buf.curSize] 75 | buf.vec = make([]bufEntry, buf.maxSize) 76 | if buf.curSize == 0 { 77 | return ret 78 | } 79 | buf.curSize = 0 80 | numEntries := 0 81 | for i := 1; i < len(ret); i++ { 82 | if ret[i].value != ret[i-1].value { 83 | numEntries++ 84 | ret[numEntries] = ret[i] 85 | } else { 86 | ret[numEntries].weight += ret[i].weight 87 | } 88 | } 89 | return ret[:numEntries+1] 90 | } 91 | 92 | // isFull ... 93 | func (buf *buffer) isFull() bool { 94 | return buf.curSize >= buf.maxSize 95 | } 96 | -------------------------------------------------------------------------------- /buffer_test.go: -------------------------------------------------------------------------------- 1 | package quantiles 2 | 3 | import ( 4 | "math/rand" 5 | "reflect" 6 | "testing" 7 | ) 8 | 9 | func TestBufferInvalid(t *testing.T) { 10 | if _, err := newBuffer(2, 0); err == nil { 11 | t.Error("expected error, got nil") 12 | } 13 | if _, err := newBuffer(0, 2); err == nil { 14 | t.Error("expected error, got nil") 15 | } 16 | } 17 | 18 | func TestBufferPushEntryNotFull(t *testing.T) { 19 | buf, err := newBuffer(2, 100) 20 | if err != nil { 21 | t.Error("expected no err, got", err) 22 | } 23 | buf.push(5, 9) 24 | buf.push(2, 3) 25 | buf.push(-1, 7) 26 | buf.push(3, 0) 27 | 28 | if buf.isFull() { 29 | t.Error("expected not full, got full") 30 | } 31 | if val := len(buf.vec); val == 2 { 32 | t.Error("expected 3, got full", val) 33 | } 34 | } 35 | 36 | func TestBufferPushEntryFull(t *testing.T) { 37 | buf, err := newBuffer(2, 100) 38 | if err != nil { 39 | t.Error("expected no err, got", err) 40 | } 41 | buf.push(5, 9) 42 | buf.push(2, 3) 43 | buf.push(-1, 7) 44 | buf.push(2, 1) 45 | 46 | expected := []bufEntry{} 47 | expected = append(expected, bufEntry{-1, 7}) 48 | expected = append(expected, bufEntry{2, 4}) 49 | expected = append(expected, bufEntry{5, 9}) 50 | 51 | if !buf.isFull() { 52 | t.Error("expected full, got not full") 53 | } 54 | if got := buf.generateEntryList(); !reflect.DeepEqual(expected, got) { 55 | t.Errorf("expected %v, got %v", expected, got) 56 | } 57 | } 58 | func TestBufferPushEntryFullDeath(t *testing.T) { 59 | buf, err := newBuffer(2, 100) 60 | if err != nil { 61 | t.Error("expected no err, got", err) 62 | } 63 | buf.push(5, 9) 64 | buf.push(2, 3) 65 | buf.push(-1, 7) 66 | buf.push(2, 1) 67 | 68 | expected := []bufEntry{} 69 | expected = append(expected, bufEntry{-1, 7}) 70 | expected = append(expected, bufEntry{2, 4}) 71 | expected = append(expected, bufEntry{5, 9}) 72 | 73 | if !buf.isFull() { 74 | t.Error("expected full, got not full") 75 | } 76 | if err := buf.push(6, 6); err == nil { 77 | t.Error("expected buffer already full") 78 | } 79 | } 80 | 81 | func push(n int) error { 82 | buf, _ := newBuffer(int64(n), int64(n)) 83 | for i := 0; i < n; i++ { 84 | if err := buf.push(rand.Float64(), rand.Float64()); err != nil { 85 | return err 86 | } 87 | } 88 | return nil 89 | } 90 | 91 | func BenchmarkPush100(b *testing.B) { 92 | // run the Fib function b.N times 93 | for n := 0; n < b.N; n++ { 94 | if err := push(100); err != nil { 95 | b.Error(err) 96 | return 97 | } 98 | } 99 | } 100 | 101 | func BenchmarkPush1000(b *testing.B) { 102 | // run the Fib function b.N times 103 | for n := 0; n < b.N; n++ { 104 | if err := push(1000); err != nil { 105 | b.Error(err) 106 | return 107 | } 108 | } 109 | } 110 | 111 | func BenchmarkPush10000(b *testing.B) { 112 | // run the Fib function b.N times 113 | for n := 0; n < b.N; n++ { 114 | if err := push(10000); err != nil { 115 | b.Error(err) 116 | return 117 | } 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # quantiles - Optimal Quantile Approximation in Streams 2 | [![GoDoc](https://godoc.org/github.com/axiomhq/quantiles?status.svg)](https://godoc.org/github.com/axiomhq/quantiles) [![CircleCI](https://circleci.com/gh/axiomhq/quantiles/tree/master.svg?style=svg)](https://circleci.com/gh/axiomhq/quantiles/tree/master) 3 | 4 | This is a translation of [TensorFlow's quantile helper class](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/boosted_trees/lib/quantiles), it aims to compute approximate quantiles with error bound guarantees for weighted data sets. 5 | This implementation is an adaptation of techniques from the following papers: 6 | * (2001) [Space-efficient online computation of quantile summaries](http://infolab.stanford.edu/~datar/courses/cs361a/papers/quantiles.pdf). 7 | * (2004) [Power-conserving computation of order-statistics over sensor networks](http://www.cis.upenn.edu/~mbgreen/papers/pods04.pdf). 8 | * (2007) [A fast algorithm for approximate quantiles in high speed data streams](http://web.cs.ucla.edu/~weiwang/paper/SSDBM07_2.pdf). 9 | * (2016) [XGBoost: A Scalable Tree Boosting System](https://arxiv.org/pdf/1603.02754.pdf). 10 | 11 | #### The key ideas at play are the following: 12 | * Maintain an in-memory multi-level quantile summary in a way to guarantee 13 | a maximum approximation error of `eps * W` per bucket where `W` is the total 14 | weight across all points in the input dataset. 15 | * Two base operations are defined: `MERGE` and `COMPRESS`. `MERGE` combines two 16 | summaries guaranteeing a `epsNew = max(eps1, eps2)`. `COMPRESS` compresses 17 | a summary to `b + 1` elements guaranteeing `epsNew = epsOld + 1/b`. 18 | * `b * sizeof(summary entry)` must ideally be small enough to fit in an 19 | average CPU L2 cache. 20 | * To distribute this algorithm with maintaining error bounds, we need 21 | the worker-computed summaries to have no more than `eps / h` error 22 | where h is the height of the distributed computation graph which 23 | is 2 for an MR with no combiner. 24 | 25 | We mainly want to max out IO bw by ensuring we're not compute-bound and 26 | using a reasonable amount of RAM. 27 | 28 | #### Complexity: 29 | * Compute: `O(n * log(1/eps * log(eps * n)))`. 30 | * Memory: `O(1/eps * log^2(eps * n))` <- for one worker streaming through the entire dataset. 31 | 32 | An epsilon value of zero would make the algorithm extremely inefficent and 33 | therefore, is disallowed. 34 | 35 | 36 | ## Example Usage 37 | ```go 38 | package quantiles_test 39 | 40 | import ( 41 | "fmt" 42 | 43 | "github.com/axiomhq/quantiles" 44 | ) 45 | 46 | func Example() { 47 | sketch := quantiles.NewDefault() 48 | for i := 0.0; i < 1e6; i++ { 49 | if err := sketch.Push(i, 1.0); err != nil { 50 | panic(err) 51 | } 52 | } 53 | fmt.Print("ApproximationError:") 54 | fmt.Println(sketch.ApproximationError(1)) // 0 55 | 56 | fmt.Print("Finalize:") 57 | fmt.Println(sketch.Finalize()) // 58 | 59 | 60 | fmt.Print("GenerateQuantiles(4):") 61 | fmt.Println(sketch.GenerateQuantiles(4)) // [0 251865 503730 746595 999999] 62 | 63 | 64 | fmt.Print("GenerateQuantiles(10):") 65 | fmt.Println(sketch.GenerateQuantiles(10)) // [0 98946 197892 296838 395789 503730 602676 701622 800568 899514 999999] 66 | 67 | sum, err := sketch.FinalSummary() 68 | if err != nil { 69 | panic(err) 70 | } 71 | fmt.Print("GenerateQuantiles(4):") 72 | fmt.Println(sum.GenerateQuantiles(4)) // [0 251865 503730 746595 999999] 73 | } 74 | ``` 75 | 76 | ## TODO 77 | * [x] Implement an online estimator without the need of finalizing the stream 78 | * [x] Add proper documentation 79 | * [ ] Benchmark 80 | * [ ] Add serialization 81 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/beorn7/perks v1.0.0 h1:HWo1m869IqiPhD389kmkxeTalrjNbbJTC8LXupb+sl0= 2 | github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= 3 | github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= 4 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 5 | github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= 6 | github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= 7 | github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= 8 | github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= 9 | github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I= 10 | github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 11 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 12 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 13 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 14 | github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q= 15 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= 16 | github.com/stripe/veneur v12.0.0+incompatible h1:goZhHLUUxzN7gbJlaULhoLEd3PAyvB6CjXmEkfsSQ/k= 17 | github.com/stripe/veneur v12.0.0+incompatible/go.mod h1:oEfQGGOeGcs/N7jAfByGwjGGAh1X9tF2gYpU5Nzuljk= 18 | github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= 19 | github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= 20 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 21 | golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= 22 | golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= 23 | golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= 24 | golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= 25 | golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 26 | golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 27 | golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 28 | golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= 29 | golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 30 | golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 31 | golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 32 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 33 | golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 34 | golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 35 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 36 | golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= 37 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 38 | golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= 39 | golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= 40 | golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= 41 | golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 42 | golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 43 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 44 | golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 45 | -------------------------------------------------------------------------------- /sketch.go: -------------------------------------------------------------------------------- 1 | package quantiles 2 | 3 | import ( 4 | "fmt" 5 | "math" 6 | ) 7 | 8 | var errFinalized = fmt.Errorf("Finalize() already called") 9 | 10 | // Sketch ... 11 | type Sketch struct { 12 | eps float64 13 | maxLevels int64 14 | blockSize int64 15 | buffer *buffer 16 | localSummary *Summary 17 | summaryLevels []*Summary 18 | finalized bool 19 | n uint64 20 | } 21 | 22 | // NewDefault returns a new Sketch with the eps = 0.01 and maxElements 1000 23 | func NewDefault() *Sketch { 24 | stream, _ := New(0.01, 1000) 25 | return stream 26 | } 27 | 28 | // New returns a new Sketch for a given eps and maxElements 29 | func New(eps float64, maxElements int64) (*Sketch, error) { 30 | if eps <= 0 { 31 | return nil, fmt.Errorf("an epsilon value of zero is not allowed") 32 | } 33 | 34 | maxLevels, blockSize, err := getQuantileSpecs(eps, maxElements) 35 | if err != nil { 36 | return nil, err 37 | } 38 | 39 | buffer, err := newBuffer(blockSize, maxElements) 40 | if err != nil { 41 | return nil, err 42 | } 43 | 44 | stream := &Sketch{ 45 | eps: eps, 46 | buffer: buffer, 47 | finalized: false, 48 | maxLevels: maxLevels, 49 | blockSize: blockSize, 50 | localSummary: newSummary(), 51 | summaryLevels: []*Summary{}, 52 | } 53 | return stream, nil 54 | } 55 | 56 | func (stream *Sketch) clone() *Sketch { 57 | newStream := &Sketch{ 58 | eps: stream.eps, 59 | buffer: stream.buffer.clone(), 60 | finalized: stream.finalized, 61 | maxLevels: stream.maxLevels, 62 | blockSize: stream.blockSize, 63 | localSummary: stream.localSummary.clone(), 64 | summaryLevels: stream.summaryLevels, 65 | } 66 | for i, sum := range stream.summaryLevels { 67 | newStream.summaryLevels[i] = sum.clone() 68 | } 69 | return newStream 70 | } 71 | 72 | // Push a value and a weight into the stream 73 | func (stream *Sketch) Push(value float64, weight float64) error { 74 | // Validate state. 75 | var err error 76 | if stream.finalized { 77 | return errFinalized 78 | } 79 | 80 | if err = stream.buffer.push(value, weight); err != nil { 81 | return err 82 | } 83 | 84 | if stream.buffer.isFull() { 85 | err = stream.pushBuffer(stream.buffer) 86 | } 87 | stream.n++ 88 | return err 89 | } 90 | 91 | func (stream *Sketch) pushBuffer(buf *buffer) error { 92 | // Validate state. 93 | if stream.finalized { 94 | return errFinalized 95 | } 96 | stream.localSummary.buildFromBufferEntries(buf.generateEntryList()) 97 | stream.localSummary.compress(stream.blockSize, stream.eps) 98 | return stream.propagateLocalSummary() 99 | } 100 | 101 | // PushSummary pushes full summary while maintaining approximation error invariants. 102 | func (stream *Sketch) PushSummary(summary []SumEntry) error { 103 | // Validate state. 104 | if stream.finalized { 105 | return errFinalized 106 | } 107 | stream.localSummary.buildFromSummaryEntries(summary) 108 | stream.localSummary.compress(stream.blockSize, stream.eps) 109 | return stream.propagateLocalSummary() 110 | } 111 | 112 | // Finalize flushes approximator and finalizes state. 113 | func (stream *Sketch) Finalize() error { 114 | // Validate state. 115 | if stream.finalized { 116 | return errFinalized 117 | } 118 | 119 | // Flush any remaining buffer elements. 120 | stream.pushBuffer(stream.buffer) 121 | 122 | // Create final merged summary 123 | stream.localSummary.Clear() 124 | for _, summary := range stream.summaryLevels { 125 | stream.localSummary.Merge(summary) 126 | } 127 | stream.localSummary.n = stream.n 128 | 129 | stream.summaryLevels = []*Summary{} 130 | stream.finalized = true 131 | return nil 132 | } 133 | 134 | /* 135 | propagates local summary through summary levels while maintaining 136 | approximation error invariants. 137 | */ 138 | func (stream *Sketch) propagateLocalSummary() error { 139 | // Validate state. 140 | if stream.finalized { 141 | return errFinalized 142 | } 143 | 144 | // No-op if there's nothing to add. 145 | if stream.localSummary.Size() <= 0 { 146 | return nil 147 | } 148 | 149 | for level, settled := int64(0), false; !settled; level++ { 150 | // Ensure we have enough depth. 151 | if int64(len(stream.summaryLevels)) <= level { 152 | stream.summaryLevels = append(stream.summaryLevels, &Summary{}) 153 | } 154 | 155 | // Merge summaries. 156 | currentSummary := stream.summaryLevels[level] 157 | stream.localSummary.Merge(currentSummary) 158 | 159 | // Check if we need to compress and propagate summary higher. 160 | if currentSummary.Size() == 0 || 161 | stream.localSummary.Size() <= stream.blockSize+1 { 162 | *currentSummary = *(stream.localSummary) 163 | stream.localSummary = newSummary() 164 | settled = true 165 | } else { 166 | // Compress, empty current level and propagate. 167 | stream.localSummary.compress(stream.blockSize, stream.eps) 168 | currentSummary.Clear() 169 | } 170 | } 171 | stream.localSummary.n = stream.n 172 | return nil 173 | } 174 | 175 | // Quantile ... 176 | func (stream *Sketch) Quantile(q float64) (float64, error) { 177 | if !stream.finalized { 178 | return 0, fmt.Errorf("Finalize() must be called before generating quantiles") 179 | } 180 | return stream.localSummary.Quantile(q) 181 | } 182 | 183 | /* 184 | GenerateQuantiles generates requested number of quantiles after finalizing stream. 185 | The returned quantiles can be queried using std::lower_bound to get 186 | the bucket for a given value. 187 | */ 188 | func (stream *Sketch) GenerateQuantiles(numQuantiles int64) ([]float64, error) { 189 | if !stream.finalized { 190 | return nil, fmt.Errorf("Finalize() must be called before generating quantiles") 191 | } 192 | return stream.localSummary.GenerateQuantiles(numQuantiles), nil 193 | } 194 | 195 | /* 196 | GenerateBoundaries generates requested number of boundaries after finalizing stream. 197 | The returned boundaries can be queried using std::lower_bound to get 198 | the bucket for a given value. 199 | The boundaries, while still guaranteeing approximation bounds, don't 200 | necessarily represent the actual quantiles of the distribution. 201 | Boundaries are preferable over quantiles when the caller is less 202 | interested in the actual quantiles distribution and more interested in 203 | getting a representative sample of boundary values. 204 | */ 205 | func (stream *Sketch) GenerateBoundaries(numBoundaries int64) ([]float64, error) { 206 | if !stream.finalized { 207 | return nil, fmt.Errorf("Finalize() must be called before generating quantiles") 208 | } 209 | return stream.localSummary.GenerateBoundaries(numBoundaries), nil 210 | } 211 | 212 | /* 213 | ApproximationError calculates approximation error for the specified level. 214 | If the passed level is negative, the approximation error for the entire 215 | summary is returned. Note that after Finalize is called, only the overall 216 | error is available. 217 | */ 218 | func (stream *Sketch) ApproximationError(level int64) (float64, error) { 219 | if stream.finalized { 220 | if level > 0 { 221 | return 0, fmt.Errorf("only overall error is available after Finalize()") 222 | } 223 | return stream.localSummary.ApproximationError(), nil 224 | } 225 | 226 | if len(stream.summaryLevels) == 0 { 227 | // No error even if base buffer isn't empty. 228 | return 0, nil 229 | } 230 | 231 | // If level is negative, we get the approximation error 232 | // for the top-most level which is the max approximation error 233 | // in all summaries by construction. 234 | if level < 0 { 235 | level = int64(len(stream.summaryLevels)) - 1 236 | } 237 | if level >= int64(len(stream.summaryLevels)) { 238 | return 0, fmt.Errorf("invalid level") 239 | } 240 | return stream.summaryLevels[level].ApproximationError(), nil 241 | } 242 | 243 | // MaxDepth ... 244 | func (stream *Sketch) MaxDepth() int { 245 | return len(stream.summaryLevels) 246 | } 247 | 248 | // FinalSummary ... 249 | func (stream *Sketch) FinalSummary() (*Summary, error) { 250 | if !stream.finalized { 251 | return nil, fmt.Errorf("Finalize() must be called before generating quantiles") 252 | } 253 | return stream.localSummary, nil 254 | } 255 | 256 | /* 257 | Helper method which, given the desired approximation error 258 | and an upper bound on the number of elements, computes the optimal 259 | number of levels and block size and returns them in the tuple. 260 | */ 261 | func getQuantileSpecs(eps float64, maxElements int64) (int64, int64, error) { 262 | var ( 263 | maxLevel int64 = 1 264 | blockSize int64 = 2 265 | ) 266 | if eps < 0 || eps >= 1 { 267 | return maxLevel, blockSize, fmt.Errorf("eps should be element of [0, 1)") 268 | } 269 | if maxElements <= 0 { 270 | return maxLevel, blockSize, fmt.Errorf("maxElements should be > 0") 271 | } 272 | 273 | if eps <= math.SmallestNonzeroFloat64 { 274 | // Exact quantile computation at the expense of RAM. 275 | maxLevel = 1 276 | blockSize = maxInt64(maxElements, 2) 277 | } else { 278 | // The bottom-most level will become full at most 279 | // (max_elements / block_size) times, the level above will become full 280 | // (max_elements / 2 * block_size) times and generally level l becomes 281 | // full (max_elements / 2^l * block_size) times until the last 282 | // level max_level becomes full at most once meaning when the inequality 283 | // (2^max_level * block_size >= max_elements) is satisfied. 284 | // In what follows, we jointly solve for max_level and block_size by 285 | // gradually increasing the level until the inequality above is satisfied. 286 | // We could alternatively set max_level = ceil(log2(eps * max_elements)); 287 | // and block_size = ceil(max_level / eps) + 1 but that tends to give more 288 | // pessimistic bounds and wastes RAM needlessly. 289 | 290 | blockSize = 2 291 | for maxLevel = 1; (uint64(1)< it2.value { 125 | sum.entries[num] = SumEntry{ 126 | value: it2.value, weight: it2.weight, 127 | minRank: it2.minRank + nextMinRank1, 128 | maxRank: it2.maxRank + it1.prevMaxRank(), 129 | } 130 | nextMinRank2 = it2.nextMinRank() 131 | j++ 132 | } else { 133 | sum.entries[num] = SumEntry{ 134 | value: it1.value, weight: it1.weight + it2.weight, 135 | minRank: it1.minRank + it2.minRank, 136 | maxRank: it1.maxRank + it2.maxRank, 137 | } 138 | nextMinRank1 = it1.nextMinRank() 139 | nextMinRank2 = it2.nextMinRank() 140 | i++ 141 | j++ 142 | } 143 | num++ 144 | } 145 | 146 | // Fill in any residual. 147 | for i != len(baseEntries) { 148 | it1 := baseEntries[i] 149 | sum.entries[num] = SumEntry{ 150 | value: it1.value, weight: it1.weight, 151 | minRank: it1.minRank + nextMinRank2, 152 | maxRank: it1.maxRank + otherEntries[len(otherEntries)-1].maxRank, 153 | } 154 | i++ 155 | num++ 156 | } 157 | for j != len(otherEntries) { 158 | it2 := otherEntries[j] 159 | sum.entries[num] = SumEntry{ 160 | value: it2.value, weight: it2.weight, 161 | minRank: it2.minRank + nextMinRank1, 162 | maxRank: it2.maxRank + baseEntries[len(baseEntries)-1].maxRank, 163 | } 164 | j++ 165 | num++ 166 | } 167 | sum.entries = sum.entries[:num] 168 | 169 | } 170 | 171 | func (sum *Summary) compress(sizeHint int64, minEps float64) { 172 | // No-op if we're already within the size requirement. 173 | sizeHint = maxInt64(sizeHint, 2) 174 | if int64(len(sum.entries)) <= sizeHint { 175 | return 176 | } 177 | 178 | // First compute the max error bound delta resulting from this compression. 179 | epsDelta := sum.TotalWeight() * maxFloat64(1/float64(sizeHint), minEps) 180 | 181 | // Compress elements ensuring approximation bounds and elements diversity are both maintained. 182 | var ( 183 | addAccumulator int64 184 | addStep = int64(len(sum.entries)) 185 | ) 186 | 187 | wi := 1 188 | li := wi 189 | 190 | for ri := 0; ri+1 != len(sum.entries); { 191 | ni := ri + 1 192 | for ni != len(sum.entries) && addAccumulator < addStep && 193 | sum.entries[ni].prevMaxRank()-sum.entries[ri].nextMinRank() <= epsDelta { 194 | addAccumulator += sizeHint 195 | ni++ 196 | } 197 | if sum.entries[ri] == sum.entries[ni-1] { 198 | ri++ 199 | } else { 200 | ri = ni - 1 201 | } 202 | 203 | sum.entries[wi] = sum.entries[ri] 204 | wi++ 205 | li = ri 206 | addAccumulator -= addStep 207 | } 208 | 209 | if li+1 != len(sum.entries) { 210 | sum.entries[wi] = sum.entries[len(sum.entries)-1] 211 | wi++ 212 | } 213 | 214 | sum.entries = sum.entries[:wi] 215 | } 216 | 217 | // GenerateBoundaries ... 218 | func (sum *Summary) GenerateBoundaries(numBoundaries int64) []float64 { 219 | // To construct the boundaries we first run a soft compress over a copy 220 | // of the summary and retrieve the values. 221 | // The resulting boundaries are guaranteed to both contain at least 222 | // num_boundaries unique elements and maintain approximation bounds. 223 | if len(sum.entries) == 0 { 224 | return []float64{} 225 | } 226 | 227 | // Generate soft compressed summary. 228 | compressedSummary := &Summary{} 229 | compressedSummary.buildFromSummaryEntries(sum.entries) 230 | // Set an epsilon for compression that's at most 1.0 / num_boundaries 231 | // more than epsilon of original our summary since the compression operation 232 | // adds ~1.0/num_boundaries to final approximation error. 233 | compressionEps := sum.ApproximationError() + 1.0/float64(numBoundaries) 234 | compressedSummary.compress(numBoundaries, compressionEps) 235 | 236 | // Return boundaries. 237 | output := make([]float64, len(compressedSummary.entries)) 238 | for _, entry := range compressedSummary.entries { 239 | output = append(output, entry.value) 240 | } 241 | return output 242 | } 243 | 244 | // Quantile returns the value for quantile q 245 | func (sum *Summary) Quantile(q float64) (float64, error) { 246 | // To construct the desired n-quantiles we repetitively query n ranks from the 247 | // original summary. The following algorithm is an efficient cache-friendly 248 | // O(n) implementation of that idea which avoids the cost of the repetitive 249 | // full rank queries O(nlogn). 250 | if q < 0 || q > 1 { 251 | return 0, fmt.Errorf("expected 0 <= q <= 1, got q = %v", q) 252 | } 253 | numQuantiles := int64(sum.n) 254 | if numQuantiles == 0 { 255 | return 0, nil 256 | } 257 | if len(sum.quantiles) == 0 { 258 | sum.quantiles = sum.GenerateQuantiles(numQuantiles + 1) 259 | } 260 | qIdx := int(float64(numQuantiles)*q + 0.5) 261 | return sum.quantiles[qIdx], nil 262 | } 263 | 264 | // GenerateQuantiles returns a slice of float64 of size numQuantiles+1, the ith entry is the `i * 1/numQuantiles+1` quantile 265 | func (sum *Summary) GenerateQuantiles(numQuantiles int64) []float64 { 266 | // To construct the desired n-quantiles we repetitively query n ranks from the 267 | // original summary. The following algorithm is an efficient cache-friendly 268 | // O(n) implementation of that idea which avoids the cost of the repetitive 269 | // full rank queries O(nlogn). 270 | if len(sum.entries) == 0 { 271 | return []float64{} 272 | } 273 | if numQuantiles < 2 { 274 | numQuantiles = 2 275 | } 276 | curIdx := 0 277 | output := make([]float64, numQuantiles+1) 278 | for rank := 0.0; rank <= float64(numQuantiles); rank++ { 279 | d2 := 2 * (rank * sum.entries[len(sum.entries)-1].maxRank / float64(numQuantiles)) 280 | nextIdx := curIdx + 1 281 | for nextIdx < len(sum.entries) && d2 >= sum.entries[nextIdx].minRank+sum.entries[nextIdx].maxRank { 282 | nextIdx++ 283 | } 284 | curIdx = nextIdx - 1 285 | // Determine insertion order. 286 | if nextIdx == len(sum.entries) || d2 < sum.entries[curIdx].nextMinRank()+sum.entries[nextIdx].prevMaxRank() { 287 | output[int(rank)] = sum.entries[curIdx].value 288 | } else { 289 | output[int(rank)] = sum.entries[nextIdx].value 290 | } 291 | } 292 | return output 293 | } 294 | 295 | // ApproximationError ... 296 | func (sum *Summary) ApproximationError() float64 { 297 | if len(sum.entries) == 0 { 298 | return 0 299 | } 300 | 301 | var maxGap float64 302 | for i := 1; i < len(sum.entries); i++ { 303 | it := sum.entries[i] 304 | if tmp := it.maxRank - it.minRank - it.weight; tmp > maxGap { 305 | maxGap = tmp 306 | } 307 | if tmp := it.prevMaxRank() - sum.entries[i-1].nextMinRank(); tmp > maxGap { 308 | maxGap = tmp 309 | } 310 | } 311 | return maxGap / sum.TotalWeight() 312 | } 313 | 314 | // MinValue returns the min weight value of the summary 315 | func (sum *Summary) MinValue() float64 { 316 | if len(sum.entries) != 0 { 317 | return sum.entries[0].value 318 | } 319 | return 0 320 | } 321 | 322 | // MaxValue returns the max weight value of the summary 323 | func (sum *Summary) MaxValue() float64 { 324 | if len(sum.entries) != 0 { 325 | return sum.entries[len(sum.entries)-1].value 326 | } 327 | return 0 328 | } 329 | 330 | // TotalWeight returns the total weight of the summary 331 | func (sum *Summary) TotalWeight() float64 { 332 | if len(sum.entries) != 0 { 333 | return sum.entries[len(sum.entries)-1].maxRank 334 | } 335 | return 0 336 | } 337 | 338 | // Size returns the size (num of entries) in the summary 339 | func (sum *Summary) Size() int64 { 340 | return int64(len(sum.entries)) 341 | } 342 | 343 | // Clear reset the summary 344 | func (sum *Summary) Clear() { 345 | sum.entries = []SumEntry{} 346 | } 347 | 348 | // Entries returns all summary entries 349 | func (sum *Summary) Entries() []SumEntry { 350 | return sum.entries 351 | } 352 | -------------------------------------------------------------------------------- /summary_test.go: -------------------------------------------------------------------------------- 1 | package quantiles 2 | 3 | import ( 4 | "math/rand" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/assert" 8 | ) 9 | 10 | type SummaryDummy struct { 11 | buffer1 *buffer 12 | buffer2 *buffer 13 | 14 | buffer1MinValue float64 15 | buffer1MaxValue float64 16 | buffer1TotalWeight float64 17 | 18 | buffer2MinValue float64 19 | buffer2MaxValue float64 20 | buffer2TotalWeight float64 21 | 22 | *Summary 23 | } 24 | 25 | func NewWeightedQuantilesSummaryDummy() (*SummaryDummy, error) { 26 | sum := &Summary{ 27 | entries: make([]SumEntry, 0), 28 | } 29 | wqsd := &SummaryDummy{ 30 | Summary: sum, 31 | buffer1MinValue: -13, 32 | buffer1MaxValue: 21, 33 | buffer1TotalWeight: 45, 34 | buffer2MinValue: -7, 35 | buffer2MaxValue: 11, 36 | buffer2TotalWeight: 30, 37 | } 38 | if err := wqsd.setup(); err != nil { 39 | return nil, err 40 | } 41 | return wqsd, nil 42 | } 43 | 44 | func (wqsd *SummaryDummy) setup() error { 45 | var err error 46 | wqsd.buffer1, err = newBuffer(10, 1000) 47 | if err != nil { 48 | return err 49 | } 50 | for _, val := range [][2]float64{ 51 | [2]float64{5, 9}, 52 | [2]float64{2, 3}, 53 | [2]float64{-1, 7}, 54 | [2]float64{-7, 1}, 55 | [2]float64{3, 2}, 56 | [2]float64{-2, 3}, 57 | [2]float64{21, 8}, 58 | [2]float64{-13, 4}, 59 | [2]float64{8, 2}, 60 | [2]float64{-5, 6}, 61 | } { 62 | if err := wqsd.buffer1.push(val[0], val[1]); err != nil { 63 | return err 64 | } 65 | } 66 | 67 | wqsd.buffer2, err = newBuffer(7, 1000) 68 | if err != nil { 69 | return err 70 | } 71 | for _, val := range [][2]float64{ 72 | [2]float64{9, 2}, 73 | [2]float64{-7, 3}, 74 | [2]float64{2, 1}, 75 | [2]float64{4, 13}, 76 | [2]float64{0, 5}, 77 | [2]float64{-5, 3}, 78 | [2]float64{11, 3}, 79 | } { 80 | if err := wqsd.buffer2.push(val[0], val[1]); err != nil { 81 | return err 82 | } 83 | } 84 | return nil 85 | } 86 | 87 | func TestSummaryBuildFromBuffer(t *testing.T) { 88 | wqsd, err := NewWeightedQuantilesSummaryDummy() 89 | if err != nil { 90 | t.Error(err) 91 | } 92 | sum := &Summary{} 93 | sum.buildFromBufferEntries(wqsd.buffer1.generateEntryList()) 94 | 95 | // We expect no approximation error because no compress operation occurred. 96 | if approx := sum.ApproximationError(); approx != 0 { 97 | t.Error("expected no approximation error, got", approx) 98 | } 99 | 100 | entries := sum.entries 101 | 102 | // First element's rmin should be zero. 103 | // EXPECT_EQ(summary.MinValue(), buffer1_min_value_) 104 | if val := sum.MinValue(); val != wqsd.buffer1MinValue { 105 | t.Error("first element's rmin should be zero, got", val) 106 | } 107 | // EXPECT_EQ(entries.front(), SummaryEntry(-13, 4, 0, 4)) 108 | exp := SumEntry{ 109 | value: -13, weight: 4, minRank: 0, maxRank: 4, 110 | } 111 | if val := entries[0]; val != exp { 112 | t.Errorf("expected %v, got %v", exp, val) 113 | } 114 | 115 | // Last element's rmax should be cumulative weight. 116 | // EXPECT_EQ(summary.MaxValue(), buffer1_max_value_) 117 | if val := sum.MaxValue(); val != wqsd.buffer1MaxValue { 118 | t.Errorf("expected %v, got %v", wqsd.buffer1MaxValue, val) 119 | } 120 | 121 | //EXPECT_EQ(entries.back(), SummaryEntry(21, 8, 37, 45)) 122 | exp = SumEntry{ 123 | value: 21, weight: 8, minRank: 37, maxRank: 45, 124 | } 125 | if val := entries[len(entries)-1]; val != exp { 126 | t.Errorf("expected %v, got %v", exp, val) 127 | } 128 | 129 | // Check total weight. 130 | // EXPECT_EQ(summary.TotalWeight(), buffer1_total_weight_) 131 | if val := sum.TotalWeight(); val != wqsd.buffer1TotalWeight { 132 | t.Errorf("expected %v, got %v", wqsd.buffer1TotalWeight, val) 133 | } 134 | } 135 | 136 | func TestSummaryCompressSeparately(t *testing.T) { 137 | wqsd, err := NewWeightedQuantilesSummaryDummy() 138 | if err != nil { 139 | t.Error(err) 140 | } 141 | entryList := wqsd.buffer1.generateEntryList() 142 | for newSize := int64(9); newSize >= 2; newSize-- { 143 | sum := &Summary{} 144 | sum.buildFromBufferEntries(entryList) 145 | sum.compress(newSize, 0) 146 | 147 | // Expect a max approximation error of 1 / n 148 | // ie. eps0 + 1/n but eps0 = 0. 149 | 150 | // EXPECT_TRUE(summary.Size() >= new_size && summary.Size() <= new_size + 2); 151 | if val := sum.Size(); val < newSize { 152 | t.Errorf("expected val >= newSize, got %v < %v", val, newSize) 153 | } else if val > newSize+2 { 154 | t.Errorf("expected val <= newSize+2, got %v > %v", val, newSize+2) 155 | } 156 | 157 | // EXPECT_LE(summary.ApproximationError(), 1.0 / new_size); 158 | if approx := sum.ApproximationError(); approx > 1.0/float64(newSize) { 159 | t.Errorf("expected approx <= newSize, got %v > %v", approx, 1.0/float64(newSize)) 160 | } 161 | 162 | // Min/Max elements and total weight should not change. 163 | // EXPECT_EQ(summary.MinValue(), buffer1_min_value_) 164 | if sum.MinValue() != wqsd.buffer1MinValue { 165 | t.Errorf("expected %v, got %v", wqsd.buffer1MinValue, sum.MinValue()) 166 | } 167 | // EXPECT_EQ(summary.MaxValue(), buffer1_max_value_) 168 | if sum.MaxValue() != wqsd.buffer1MaxValue { 169 | t.Errorf("expected %v, got %v", wqsd.buffer1MaxValue, sum.MaxValue()) 170 | } 171 | // EXPECT_EQ(summary.TotalWeight(), buffer1_total_weight_) 172 | if sum.TotalWeight() != wqsd.buffer1TotalWeight { 173 | t.Errorf("expected %v, got %v", wqsd.buffer1TotalWeight, sum.TotalWeight()) 174 | } 175 | } 176 | } 177 | func TestSummaryCompressSequentially(t *testing.T) { 178 | wqsd, err := NewWeightedQuantilesSummaryDummy() 179 | if err != nil { 180 | t.Error(err) 181 | } 182 | entryList := wqsd.buffer1.generateEntryList() 183 | sum := &Summary{} 184 | sum.buildFromBufferEntries(entryList) 185 | for newSize := int64(9); newSize >= 2; newSize -= 2 { 186 | 187 | prevEps := sum.ApproximationError() 188 | sum.compress(newSize, 0) 189 | 190 | // Expect a max approximation error of prev_eps + 1 / n. 191 | 192 | // EXPECT_TRUE(summary.Size() >= new_size && summary.Size() <= new_size + 2); 193 | if val := sum.Size(); val < newSize { 194 | t.Errorf("expected val >= newSize, got %v < %v", val, newSize) 195 | } else if val > newSize+2 { 196 | t.Errorf("expected val <= newSize+2, got %v > %v", val, newSize+2) 197 | } 198 | 199 | // EXPECT_LE(summary.ApproximationError(), 1.0 / new_size); 200 | if approx := sum.ApproximationError(); approx > prevEps+1.0/float64(newSize) { 201 | t.Errorf("expected approx <= newSize, got %v > %v", approx, prevEps+1.0/float64(newSize)) 202 | } 203 | 204 | // Min/Max elements and total weight should not change. 205 | // EXPECT_EQ(summary.MinValue(), buffer1_min_value_) 206 | if sum.MinValue() != wqsd.buffer1MinValue { 207 | t.Errorf("expected %v, got %v", wqsd.buffer1MinValue, sum.MinValue()) 208 | } 209 | // EXPECT_EQ(summary.MaxValue(), buffer1_max_value_) 210 | if sum.MaxValue() != wqsd.buffer1MaxValue { 211 | t.Errorf("expected %v, got %v", wqsd.buffer1MaxValue, sum.MaxValue()) 212 | } 213 | // EXPECT_EQ(summary.TotalWeight(), buffer1_total_weight_) 214 | if sum.TotalWeight() != wqsd.buffer1TotalWeight { 215 | t.Errorf("expected %v, got %v", wqsd.buffer1TotalWeight, sum.TotalWeight()) 216 | } 217 | } 218 | } 219 | 220 | func TestSummaryCompressRandomized(t *testing.T) { 221 | var ( 222 | prevSize int64 = 1 223 | size int64 = 2 224 | maxValue = float64(1 << 20) 225 | ) 226 | 227 | for size < (1 << 16) { 228 | buffer, err := newBuffer(size, size<<4) 229 | if err != nil { 230 | t.Error("expected no error, got", err) 231 | } 232 | for i := int64(0); i < size; i++ { 233 | buffer.push( 234 | rand.Float64()*maxValue, 235 | rand.Float64()*maxValue, 236 | ) 237 | } 238 | 239 | sum := &Summary{} 240 | sum.buildFromBufferEntries(buffer.generateEntryList()) 241 | newSize := maxInt64(rand.Int63n(size), 2) 242 | sum.compress(newSize, 0) 243 | 244 | // EXPECT_TRUE(summary.Size() >= new_size && summary.Size() <= new_size + 2); 245 | if val := sum.Size(); val < newSize { 246 | t.Errorf("expected val >= newSize, got %v < %v", val, newSize) 247 | } else if val > newSize+2 { 248 | t.Errorf("expected val <= newSize+2, got %v > %v", val, newSize+2) 249 | } 250 | 251 | // EXPECT_LE(summary.ApproximationError(), 1.0 / new_size); 252 | if approx := sum.ApproximationError(); approx > 1.0/float64(newSize) { 253 | t.Errorf("expected approx <= newSize, got %v > %v", approx, 1.0/float64(newSize)) 254 | } 255 | 256 | lastSize := size 257 | size += prevSize 258 | prevSize = lastSize 259 | } 260 | } 261 | 262 | func TestSummaryMergeSymmetry(t *testing.T) { 263 | assert := assert.New(t) 264 | 265 | wqsd, err := NewWeightedQuantilesSummaryDummy() 266 | if err != nil { 267 | t.Error(err) 268 | } 269 | 270 | list1 := wqsd.buffer1.generateEntryList() 271 | list2 := wqsd.buffer2.generateEntryList() 272 | sum1 := &Summary{} 273 | sum1.buildFromBufferEntries(list1) 274 | sum2 := &Summary{} 275 | sum2.buildFromBufferEntries(list2) 276 | 277 | sum1.Merge(sum2) 278 | assert.Equal(sum1.ApproximationError(), 0.0) 279 | assert.Equal(sum1.MinValue(), 280 | minFloat64(wqsd.buffer1MinValue, wqsd.buffer2MinValue)) 281 | 282 | assert.Equal(sum1.MaxValue(), 283 | maxFloat64(wqsd.buffer1MaxValue, wqsd.buffer2MaxValue)) 284 | assert.Equal(sum1.TotalWeight(), 285 | wqsd.buffer1TotalWeight+wqsd.buffer2TotalWeight) 286 | assert.Equal(sum1.Size(), int64(14)) 287 | 288 | sum1.buildFromBufferEntries(list1) 289 | sum2.Merge(sum1) 290 | assert.Equal(sum2.ApproximationError(), 0.0) 291 | assert.Equal(sum2.MinValue(), 292 | minFloat64(wqsd.buffer1MinValue, wqsd.buffer2MinValue)) 293 | assert.Equal(sum2.MaxValue(), 294 | maxFloat64(wqsd.buffer1MaxValue, wqsd.buffer2MaxValue)) 295 | assert.Equal(sum2.TotalWeight(), 296 | wqsd.buffer1TotalWeight+wqsd.buffer2TotalWeight) 297 | assert.Equal(sum2.Size(), int64(14)) 298 | } 299 | 300 | func TestSummaryCompressThenMerge(t *testing.T) { 301 | assert := assert.New(t) 302 | wqsd, err := NewWeightedQuantilesSummaryDummy() 303 | if err != nil { 304 | t.Error(err) 305 | } 306 | 307 | sum1 := &Summary{} 308 | sum1.buildFromBufferEntries(wqsd.buffer1.generateEntryList()) 309 | sum2 := &Summary{} 310 | sum2.buildFromBufferEntries(wqsd.buffer2.generateEntryList()) 311 | 312 | sum1.compress(5, 0) 313 | eps1 := 1.0 / 5 314 | // EXPECT_LE(summary.ApproximationError(), 1.0 / new_size); 315 | if approx := sum1.ApproximationError(); approx > eps1 { 316 | t.Errorf("expected approx <= newSize, got %v > %v", approx, eps1) 317 | } 318 | sum2.compress(3, 0) 319 | eps2 := 1.0 / 3 320 | // EXPECT_LE(summary.ApproximationError(), 1.0 / new_size); 321 | if approx := sum1.ApproximationError(); approx > eps1 { 322 | t.Errorf("expected approx <= newSize, got %v > %v", approx, eps2) 323 | } 324 | 325 | // Merge guarantees an approximation error of max(eps1, eps2). 326 | // Merge summary 2 into 1 and verify. 327 | sum1.Merge(sum2) 328 | if approx := sum1.ApproximationError(); approx > maxFloat64(eps1, eps2) { 329 | t.Errorf("expected approx <= newSize, got %v > %v", approx, maxFloat64(eps1, eps2)) 330 | } 331 | assert.Equal(sum1.MinValue(), 332 | minFloat64(wqsd.buffer1MinValue, wqsd.buffer2MinValue)) 333 | assert.Equal(sum1.MaxValue(), 334 | maxFloat64(wqsd.buffer1MaxValue, wqsd.buffer2MaxValue)) 335 | assert.Equal(sum1.TotalWeight(), 336 | wqsd.buffer1TotalWeight+wqsd.buffer2TotalWeight) 337 | } 338 | -------------------------------------------------------------------------------- /sketch_test.go: -------------------------------------------------------------------------------- 1 | package quantiles 2 | 3 | import ( 4 | "math" 5 | "math/rand" 6 | "testing" 7 | 8 | "github.com/stretchr/testify/assert" 9 | ) 10 | 11 | type tuple [2]int64 12 | 13 | func TestInvalidEps(t *testing.T) { 14 | assert := assert.New(t) 15 | _, _, err := getQuantileSpecs(-0.01, 0) 16 | assert.Error(err) 17 | _, _, err = getQuantileSpecs(1.01, 0) 18 | assert.Error(err) 19 | } 20 | func TestZeroEps(t *testing.T) { 21 | assert := assert.New(t) 22 | var ( 23 | tup tuple 24 | err error 25 | ) 26 | tup[0], tup[1], err = getQuantileSpecs(0, 0) 27 | assert.Error(err) 28 | tup[0], tup[1], err = getQuantileSpecs(0, 1) 29 | assert.Equal(tup, tuple{1, 2}) 30 | tup[0], tup[1], err = getQuantileSpecs(0, 20) 31 | assert.Equal(tup, tuple{1, 20}) 32 | } 33 | func TestNonZeroEps(t *testing.T) { 34 | assert := assert.New(t) 35 | var ( 36 | tup tuple 37 | err error 38 | ) 39 | tup[0], tup[1], err = getQuantileSpecs(0.01, 0) 40 | assert.Error(err) 41 | tup[0], tup[1], err = getQuantileSpecs(0.1, 320) 42 | assert.Equal(tup, tuple{4, 31}) 43 | tup[0], tup[1], err = getQuantileSpecs(0.01, 25600) 44 | assert.Equal(tup, tuple{6, 501}) 45 | tup[0], tup[1], err = getQuantileSpecs(0.01, 104857600) 46 | assert.Equal(tup, tuple{17, 1601}) 47 | tup[0], tup[1], err = getQuantileSpecs(0.1, 104857600) 48 | assert.Equal(tup, tuple{20, 191}) 49 | tup[0], tup[1], err = getQuantileSpecs(0.01, 1<<40) 50 | assert.Equal(tup, tuple{29, 2801}) 51 | tup[0], tup[1], err = getQuantileSpecs(0.001, 1<<40) 52 | assert.Equal(tup, tuple{26, 25001}) 53 | } 54 | 55 | func generateFixedUniformSummary(workerID int32, maxElements int64, totalWeight *float64, stream *Sketch) error { 56 | for i := int64(0); i < maxElements; i++ { 57 | x := float64(i) / float64(maxElements) 58 | if err := stream.Push(x, 1); err != nil { 59 | return err 60 | } 61 | *totalWeight++ 62 | } 63 | return stream.Finalize() 64 | } 65 | 66 | func generateRandUniformFixedWeightsSummary(workerID int32, maxElements int64, totalWeight *float64, stream *Sketch) error { 67 | for i := int64(0); i < maxElements; i++ { 68 | x := rand.Float64() 69 | stream.Push(x, 1) 70 | *totalWeight++ 71 | } 72 | return stream.Finalize() 73 | } 74 | 75 | func generateFixedNonUniformSummary(workerID int32, maxElements int64, totalWeight *float64, stream *Sketch) error { 76 | for i := int64(0); i < maxElements; i++ { 77 | x := float64(i) / float64(maxElements) 78 | stream.Push(x, x) 79 | *totalWeight += x 80 | } 81 | return stream.Finalize() 82 | } 83 | 84 | func generateRandUniformRandWeightsSummary(workerID int32, maxElements int64, totalWeight *float64, stream *Sketch) error { 85 | for i := int64(0); i < maxElements; i++ { 86 | x := rand.Float64() 87 | w := rand.Float64() 88 | stream.Push(x, w) 89 | *totalWeight += w 90 | } 91 | return stream.Finalize() 92 | } 93 | 94 | type workerSummaryGeneratorFunc func(int32, int64, *float64, *Sketch) error 95 | 96 | func testSingleWorkerStreams(t *testing.T, eps float64, maxElements int64, 97 | workerSummaryGenerator workerSummaryGeneratorFunc, 98 | expectedQuantiles []float64, quantilesMatcherEpsilon float64) { 99 | 100 | totalWeight := 0.0 101 | stream, err := New(eps, maxElements) 102 | if err != nil { 103 | t.Error("expected no error, got ", err) 104 | return 105 | } 106 | if err := workerSummaryGenerator(0, maxElements, &totalWeight, stream); err != nil { 107 | t.Error("expected no error, got ", err) 108 | return 109 | } 110 | 111 | // Ensure we didn't lose track of any elements and are 112 | // within approximation error bound. 113 | if val, err := stream.ApproximationError(0); err != nil { 114 | t.Error("expected no error, got ", err) 115 | return 116 | } else if val > eps { 117 | t.Errorf("expected val <= %v, got %v > %v", eps, val, eps) 118 | return 119 | } 120 | 121 | sum, err := stream.FinalSummary() 122 | if err != nil { 123 | t.Error("expected no error, got ", err) 124 | return 125 | } 126 | w := sum.TotalWeight() 127 | if math.Abs(totalWeight-w) > 1e-6 { 128 | t.Errorf("expected %v <= %v", math.Abs(totalWeight-w), 1e-6) 129 | return 130 | } 131 | 132 | // Verify expected quantiles. 133 | actuals, err := stream.GenerateQuantiles(int64(len(expectedQuantiles) - 1)) 134 | if err != nil { 135 | t.Error("expected no error, got ", err) 136 | return 137 | } 138 | for i, eq := range expectedQuantiles { 139 | if val := math.Abs(actuals[i] - eq); val > quantilesMatcherEpsilon { 140 | t.Errorf("expected %v <= %v", val, quantilesMatcherEpsilon) 141 | return 142 | } 143 | } 144 | } 145 | 146 | // Stream generators. 147 | func generateOneValue(workerID int32, maxElements int64, totalWeight *float64, stream *Sketch) error { 148 | stream.Push(10, 1) 149 | *totalWeight++ 150 | return stream.Finalize() 151 | } 152 | 153 | // Stream generators. 154 | func generateOneZeroWeightedValue(workerID int32, maxElements int64, totalWeight *float64, stream *Sketch) error { 155 | stream.Push(10, 0) 156 | return stream.Finalize() 157 | } 158 | 159 | func TestStreamOneValue(t *testing.T) { 160 | var ( 161 | eps = 0.01 162 | maxElements = int64(1 << 16) 163 | ) 164 | testSingleWorkerStreams(t, eps, maxElements, generateOneValue, 165 | []float64{10.0, 10.0, 10.0, 10.0, 10.0}, 1e-2) 166 | } 167 | 168 | func TestStreamOneZeroWeightValue(t *testing.T) { 169 | var ( 170 | eps = 0.01 171 | maxElements = int64(1 << 16) 172 | ) 173 | testSingleWorkerStreams(t, eps, maxElements, generateOneZeroWeightedValue, 174 | []float64{}, 1e-2) 175 | } 176 | 177 | func TestStreamFixedUniform(t *testing.T) { 178 | var ( 179 | eps = 0.01 180 | maxElements = int64(1 << 16) 181 | ) 182 | testSingleWorkerStreams(t, eps, maxElements, generateFixedUniformSummary, 183 | []float64{0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0}, 1e-2) 184 | } 185 | 186 | func TestStreamFixedNonUniform(t *testing.T) { 187 | var ( 188 | eps = 0.01 189 | maxElements = int64(1 << 16) 190 | ) 191 | testSingleWorkerStreams(t, eps, maxElements, generateFixedNonUniformSummary, 192 | []float64{0, math.Sqrt(0.1), math.Sqrt(0.2), math.Sqrt(0.3), math.Sqrt(0.4), math.Sqrt(0.5), math.Sqrt(0.6), math.Sqrt(0.7), math.Sqrt(0.8), math.Sqrt(0.9), 1.0}, 1e-2) 193 | } 194 | 195 | func TestStreamRandUniformFixedWeights(t *testing.T) { 196 | var ( 197 | eps = 0.01 198 | maxElements = int64(1 << 16) 199 | ) 200 | testSingleWorkerStreams(t, eps, maxElements, generateRandUniformFixedWeightsSummary, 201 | []float64{0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0}, 1e-2) 202 | } 203 | 204 | func TestStreamRandUniformRandWeights(t *testing.T) { 205 | var ( 206 | eps = 0.01 207 | maxElements = int64(1 << 16) 208 | ) 209 | testSingleWorkerStreams(t, eps, maxElements, generateRandUniformRandWeightsSummary, 210 | []float64{0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0}, 1e-2) 211 | } 212 | 213 | // Distributed tests. 214 | func testDistributedStreams(t *testing.T, numWorkers int32, eps float64, maxElements int64, 215 | workerSummaryGenerator workerSummaryGeneratorFunc, 216 | expectedQuantiles []float64, quantilesMatcherEpsilon float64) { 217 | 218 | // Simulate streams on each worker running independently 219 | totalWeight := 0.0 220 | workerSummaries := [][]SumEntry{} 221 | for i := int32(0); i < numWorkers; i++ { 222 | stream, err := New(eps/2, maxElements) 223 | if err != nil { 224 | t.Error("expected no error, got", err) 225 | return 226 | } 227 | workerSummaryGenerator(i, maxElements/int64(numWorkers), &totalWeight, stream) 228 | sum, err := stream.FinalSummary() 229 | if err != nil { 230 | t.Error("expected no error, got ", err) 231 | return 232 | } 233 | workerSummaries = append(workerSummaries, sum.entries) 234 | } 235 | 236 | // In the accumulation phase, we aggregate the summaries from each worker 237 | // and build an overall summary while maintaining error bounds by ensuring we 238 | // don't increase the error by more than eps / 2. 239 | reducerStream, err := New(eps, maxElements) 240 | if err != nil { 241 | t.Error("expected no error, got ", err) 242 | return 243 | } 244 | for _, summary := range workerSummaries { 245 | if err := reducerStream.PushSummary(summary); err != nil { 246 | t.Error("expected no error, got", err) 247 | return 248 | } 249 | } 250 | if err := reducerStream.Finalize(); err != nil { 251 | t.Error("expected no error, got", err) 252 | return 253 | } 254 | 255 | // Ensure we didn't lose track of any elements and are 256 | // within approximation error bound. 257 | if val, err := reducerStream.ApproximationError(0); err != nil { 258 | t.Error("expected no error, got ", err) 259 | return 260 | } else if val > eps { 261 | t.Errorf("expected val <= %v, got %v > %v", eps, val, eps) 262 | return 263 | } 264 | 265 | sum, err := reducerStream.FinalSummary() 266 | if err != nil { 267 | t.Error("expected no error, got ", err) 268 | return 269 | } 270 | w := sum.TotalWeight() 271 | if math.Abs(totalWeight-w) > totalWeight { 272 | t.Errorf("expected %v <= %v", math.Abs(totalWeight-w), totalWeight) 273 | return 274 | } 275 | 276 | // Verify expected quantiles. 277 | actuals, err := reducerStream.GenerateQuantiles(int64(len(expectedQuantiles) - 1)) 278 | if err != nil { 279 | t.Error("expected no error, got ", err) 280 | return 281 | } 282 | for i, eq := range expectedQuantiles { 283 | if val := math.Abs(actuals[i] - eq); val > quantilesMatcherEpsilon { 284 | t.Errorf("expected %v <= %v", val, quantilesMatcherEpsilon) 285 | return 286 | } 287 | } 288 | } 289 | 290 | func TestStreamFixedUniformDistributed(t *testing.T) { 291 | var ( 292 | numWorkers int32 = 10 293 | eps = 0.01 294 | maxElements = int64(numWorkers) * int64(1<<16) 295 | ) 296 | testDistributedStreams(t, numWorkers, eps, maxElements, generateFixedUniformSummary, 297 | []float64{0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0}, 1e-2) 298 | } 299 | 300 | func TestStreamFixedNonUniformDistributed(t *testing.T) { 301 | var ( 302 | numWorkers int32 = 10 303 | eps = 0.01 304 | maxElements = int64(numWorkers) * int64(1<<16) 305 | ) 306 | testDistributedStreams(t, numWorkers, eps, maxElements, generateFixedNonUniformSummary, 307 | []float64{0, math.Sqrt(0.1), math.Sqrt(0.2), math.Sqrt(0.3), math.Sqrt(0.4), math.Sqrt(0.5), math.Sqrt(0.6), math.Sqrt(0.7), math.Sqrt(0.8), math.Sqrt(0.9), 1.0}, 1e-2) 308 | 309 | } 310 | 311 | func TestRandUniformFixedWeightsDistributed(t *testing.T) { 312 | var ( 313 | numWorkers int32 = 10 314 | eps = 0.01 315 | maxElements = int64(numWorkers) * int64(1<<16) 316 | ) 317 | testDistributedStreams(t, numWorkers, eps, maxElements, generateRandUniformFixedWeightsSummary, 318 | []float64{0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0}, 1e-2) 319 | } 320 | 321 | func TestRandUniformRandWeightsDistributed(t *testing.T) { 322 | var ( 323 | numWorkers int32 = 10 324 | eps = 0.01 325 | maxElements = int64(numWorkers) * int64(1<<16) 326 | ) 327 | testDistributedStreams(t, numWorkers, eps, maxElements, generateRandUniformRandWeightsSummary, 328 | []float64{0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0}, 1e-2) 329 | } 330 | 331 | func TestSketchMedian(t *testing.T) { 332 | assert := assert.New(t) 333 | q, _ := New(0.5, 1000) 334 | 335 | for i := 0; i < 402; i++ { 336 | q.Push(10, 1) 337 | } 338 | 339 | for i := 0; i < 401; i++ { 340 | q.Push(5, 1) 341 | } 342 | // make sure median is 6 343 | q.Push(6, 1) 344 | q.Push(6, 1) 345 | 346 | exp := map[float64]float64{ 347 | 0.1: 5, 348 | 0.2: 5, 349 | 0.3: 5, 350 | 0.4: 5, 351 | 0.5: 6, 352 | 0.6: 10, 353 | 0.7: 10, 354 | 0.8: 10, 355 | 0.9: 10, 356 | } 357 | 358 | err := q.Finalize() 359 | assert.NoError(err) 360 | for i, val := range exp { 361 | x, err := q.Quantile(i) 362 | assert.NoError(err) 363 | assert.Equal(val, x) 364 | } 365 | 366 | } 367 | --------------------------------------------------------------------------------