├── internal ├── README ├── go-moremath │ ├── vec │ │ ├── package.go │ │ └── vec.go │ ├── README.md │ ├── scale │ │ ├── err.go │ │ ├── package.go │ │ ├── util.go │ │ ├── interface.go │ │ ├── linear.go │ │ ├── linear_test.go │ │ ├── log.go │ │ └── log_test.go │ ├── mathx │ │ ├── package.go │ │ ├── sign.go │ │ ├── beta_test.go │ │ ├── choose.go │ │ └── beta.go │ ├── stats │ │ ├── kdekernel_string.go │ │ ├── kdeboundarymethod_string.go │ │ ├── sample_test.go │ │ ├── locationhypothesis_string.go │ │ ├── hypergdist_test.go │ │ ├── package.go │ │ ├── normaldist_test.go │ │ ├── tdist.go │ │ ├── dist_test.go │ │ ├── linearhist.go │ │ ├── deltadist.go │ │ ├── util_test.go │ │ ├── hist.go │ │ ├── loghist.go │ │ ├── kde_test.go │ │ ├── ttest_test.go │ │ ├── utest_test.go │ │ ├── stream.go │ │ ├── alg.go │ │ ├── hypergdist.go │ │ ├── tdist_test.go │ │ ├── normaldist.go │ │ ├── ttest.go │ │ ├── dist.go │ │ ├── sample.go │ │ ├── utest.go │ │ ├── udist_test.go │ │ ├── kde.go │ │ └── udist.go │ ├── internal │ │ └── mathtest │ │ │ └── mathtest.go │ └── LICENSE └── import.sh ├── README.md ├── LICENSE └── main.go /internal/README: -------------------------------------------------------------------------------- 1 | go-moremath copied from github.com/aclements/go-moremath. 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Benchstat has moved. Please use [golang.org/x/perf/cmd/benchstat](https://golang.org/x/perf/cmd/benchstat)! 2 | -------------------------------------------------------------------------------- /internal/go-moremath/vec/package.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | // Package vec provides functions for float64 vectors. 6 | package vec // import "rsc.io/benchstat/internal/go-moremath/vec" 7 | -------------------------------------------------------------------------------- /internal/import.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | 5 | if [ -e go-moremath ]; then 6 | mv go-moremath go-moremath.old 7 | fi 8 | 9 | git clone --depth=1 http://github.com/aclements/go-moremath 10 | rm -rf go-moremath/.git 11 | sed -i -e 's,github.com/aclements/\(go-moremath\),rsc.io/benchstat/internal/\1,' $(find -name \*.go) 12 | -------------------------------------------------------------------------------- /internal/go-moremath/README.md: -------------------------------------------------------------------------------- 1 | These packages provide more specialized math routines than are 2 | available in the standard Go math package. go-moremath currently 3 | focuses on statistical routines, with particular focus on high-quality 4 | implementations and APIs for non-parametric methods. 5 | 6 | The API is not stable. 7 | 8 | Please see the [documentation](https://godoc.org/github.com/aclements/go-moremath). 9 | -------------------------------------------------------------------------------- /internal/go-moremath/scale/err.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package scale 6 | 7 | // RangeErr is an error that indicates some argument or value is out 8 | // of range. 9 | type RangeErr string 10 | 11 | func (r RangeErr) Error() string { 12 | return string(r) 13 | } 14 | -------------------------------------------------------------------------------- /internal/go-moremath/mathx/package.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | // Package mathx implements special functions not provided by the 6 | // standard math package. 7 | package mathx // import "rsc.io/benchstat/internal/go-moremath/mathx" 8 | 9 | import "math" 10 | 11 | var nan = math.NaN() 12 | -------------------------------------------------------------------------------- /internal/go-moremath/stats/kdekernel_string.go: -------------------------------------------------------------------------------- 1 | // generated by stringer -type=KDEKernel; DO NOT EDIT 2 | 3 | package stats 4 | 5 | import "fmt" 6 | 7 | const _KDEKernel_name = "GaussianKernelDeltaKernel" 8 | 9 | var _KDEKernel_index = [...]uint8{0, 14, 25} 10 | 11 | func (i KDEKernel) String() string { 12 | if i < 0 || i+1 >= KDEKernel(len(_KDEKernel_index)) { 13 | return fmt.Sprintf("KDEKernel(%d)", i) 14 | } 15 | return _KDEKernel_name[_KDEKernel_index[i]:_KDEKernel_index[i+1]] 16 | } 17 | -------------------------------------------------------------------------------- /internal/go-moremath/mathx/sign.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package mathx 6 | 7 | // Sign returns the sign of x: -1 if x < 0, 0 if x == 0, 1 if x > 0. 8 | // If x is NaN, it returns NaN. 9 | func Sign(x float64) float64 { 10 | if x == 0 { 11 | return 0 12 | } else if x < 0 { 13 | return -1 14 | } else if x > 0 { 15 | return 1 16 | } 17 | return nan 18 | } 19 | -------------------------------------------------------------------------------- /internal/go-moremath/scale/package.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | // Package scale provides abstractions for scales that map from one 6 | // domain to another and provide methods for indicating human-readable 7 | // intervals in the input domain. The most common type of scale is a 8 | // quantitative scale, such as a linear or log scale, which is 9 | // captured by the Quantitative interface. 10 | package scale 11 | -------------------------------------------------------------------------------- /internal/go-moremath/stats/kdeboundarymethod_string.go: -------------------------------------------------------------------------------- 1 | // generated by stringer -type=KDEBoundaryMethod; DO NOT EDIT 2 | 3 | package stats 4 | 5 | import "fmt" 6 | 7 | const _KDEBoundaryMethod_name = "BoundaryReflect" 8 | 9 | var _KDEBoundaryMethod_index = [...]uint8{0, 15} 10 | 11 | func (i KDEBoundaryMethod) String() string { 12 | if i < 0 || i+1 >= KDEBoundaryMethod(len(_KDEBoundaryMethod_index)) { 13 | return fmt.Sprintf("KDEBoundaryMethod(%d)", i) 14 | } 15 | return _KDEBoundaryMethod_name[_KDEBoundaryMethod_index[i]:_KDEBoundaryMethod_index[i+1]] 16 | } 17 | -------------------------------------------------------------------------------- /internal/go-moremath/stats/sample_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package stats 6 | 7 | import "testing" 8 | 9 | func TestSamplePercentile(t *testing.T) { 10 | s := Sample{Xs: []float64{15, 20, 35, 40, 50}} 11 | testFunc(t, "Percentile", s.Percentile, map[float64]float64{ 12 | -1: 15, 13 | 0: 15, 14 | .05: 15, 15 | .30: 19.666666666666666, 16 | .40: 27, 17 | .95: 50, 18 | 1: 50, 19 | 2: 50, 20 | }) 21 | } 22 | -------------------------------------------------------------------------------- /internal/go-moremath/stats/locationhypothesis_string.go: -------------------------------------------------------------------------------- 1 | // generated by stringer -type LocationHypothesis; DO NOT EDIT 2 | 3 | package stats 4 | 5 | import "fmt" 6 | 7 | const _LocationHypothesis_name = "LocationLessLocationDiffersLocationGreater" 8 | 9 | var _LocationHypothesis_index = [...]uint8{0, 12, 27, 42} 10 | 11 | func (i LocationHypothesis) String() string { 12 | i -= -1 13 | if i < 0 || i+1 >= LocationHypothesis(len(_LocationHypothesis_index)) { 14 | return fmt.Sprintf("LocationHypothesis(%d)", i+-1) 15 | } 16 | return _LocationHypothesis_name[_LocationHypothesis_index[i]:_LocationHypothesis_index[i+1]] 17 | } 18 | -------------------------------------------------------------------------------- /internal/go-moremath/stats/hypergdist_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package stats 6 | 7 | import ( 8 | "fmt" 9 | "testing" 10 | ) 11 | 12 | func TestHypergeometricDist(t *testing.T) { 13 | dist1 := HypergeometicDist{N: 50, K: 5, Draws: 10} 14 | testFunc(t, fmt.Sprintf("%+v.PMF", dist1), dist1.PMF, 15 | map[float64]float64{ 16 | -0.1: 0, 17 | 4: 0.003964583058, 18 | 4.9: 0.003964583058, // Test rounding 19 | 5: 0.000118937492, 20 | 5.9: 0.000118937492, 21 | 6: 0, 22 | }) 23 | testDiscreteCDF(t, fmt.Sprintf("%+v.CDF", dist1), dist1) 24 | } 25 | -------------------------------------------------------------------------------- /internal/go-moremath/stats/package.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | // Package stats implements several statistical distributions, 6 | // hypothesis tests, and functions for descriptive statistics. 7 | // 8 | // Currently stats is fairly small, but for what it does implement, it 9 | // focuses on high quality, fast implementations with good, idiomatic 10 | // Go APIs. 11 | package stats // import "rsc.io/benchstat/internal/go-moremath/stats" 12 | 13 | import ( 14 | "errors" 15 | "math" 16 | ) 17 | 18 | var inf = math.Inf(1) 19 | var nan = math.NaN() 20 | 21 | // TODO: Put all errors in the same place and maybe unify them. 22 | 23 | var ( 24 | ErrSamplesEqual = errors.New("all samples are equal") 25 | ) 26 | -------------------------------------------------------------------------------- /internal/go-moremath/mathx/beta_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package mathx 6 | 7 | import ( 8 | "testing" 9 | 10 | . "rsc.io/benchstat/internal/go-moremath/internal/mathtest" 11 | ) 12 | 13 | func TestBetaInc(t *testing.T) { 14 | // Example values from MATLAB betainc documentation. 15 | WantFunc(t, "I_0.5(%v, 3)", 16 | func(a float64) float64 { return BetaInc(0.5, a, 3) }, 17 | map[float64]float64{ 18 | 0: 1.00000000000000, 19 | 1: 0.87500000000000, 20 | 2: 0.68750000000000, 21 | 3: 0.50000000000000, 22 | 4: 0.34375000000000, 23 | 5: 0.22656250000000, 24 | 6: 0.14453125000000, 25 | 7: 0.08984375000000, 26 | 8: 0.05468750000000, 27 | 9: 0.03271484375000, 28 | 10: 0.01928710937500}) 29 | } 30 | -------------------------------------------------------------------------------- /internal/go-moremath/stats/normaldist_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package stats 6 | 7 | import ( 8 | "fmt" 9 | "math" 10 | "testing" 11 | ) 12 | 13 | func TestNormalDist(t *testing.T) { 14 | d := StdNormal 15 | 16 | testFunc(t, fmt.Sprintf("%+v.PDF", d), d.PDF, map[float64]float64{ 17 | -10000: 0, // approx 18 | -1: 1 / math.Sqrt(2*math.Pi) * math.Exp(-0.5), 19 | 0: 1 / math.Sqrt(2*math.Pi), 20 | 1: 1 / math.Sqrt(2*math.Pi) * math.Exp(-0.5), 21 | 10000: 0, // approx 22 | }) 23 | 24 | testFunc(t, fmt.Sprintf("%+v.CDF", d), d.CDF, map[float64]float64{ 25 | -10000: 0, // approx 26 | 0: 0.5, 27 | 10000: 1, // approx 28 | }) 29 | 30 | d2 := NormalDist{Mu: 2, Sigma: 5} 31 | testInvCDF(t, d, false) 32 | testInvCDF(t, d2, false) 33 | } 34 | -------------------------------------------------------------------------------- /internal/go-moremath/scale/util.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package scale 6 | 7 | // clamp clamps x to the range [0, 1]. 8 | func clamp(x float64) float64 { 9 | if x < 0 { 10 | return 0 11 | } 12 | if x > 1 { 13 | return 1 14 | } 15 | return x 16 | } 17 | 18 | // autoScale returns the smallest m for which fn(m) <= n. This is 19 | // intended to be used for auto-scaling tick values, where fn maps 20 | // from a tick "level" to the number of ticks at that level in the 21 | // scale's input range. 22 | // 23 | // fn must be a monotonically decreasing function. 24 | func autoScale(n int, fn func(level int) int, guess int) int { 25 | m := guess 26 | if fn(m) <= n { 27 | for m--; fn(m) <= n; m-- { 28 | } 29 | return m + 1 30 | } else { 31 | for m++; fn(m) > n; m++ { 32 | } 33 | return m 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /internal/go-moremath/stats/tdist.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package stats 6 | 7 | import ( 8 | "math" 9 | 10 | "rsc.io/benchstat/internal/go-moremath/mathx" 11 | ) 12 | 13 | // A TDist is a Student's t-distribution with V degrees of freedom. 14 | type TDist struct { 15 | V float64 16 | } 17 | 18 | func lgamma(x float64) float64 { 19 | y, _ := math.Lgamma(x) 20 | return y 21 | } 22 | 23 | func (t TDist) PDF(x float64) float64 { 24 | return math.Exp(lgamma((t.V+1)/2)-lgamma(t.V/2)) / 25 | math.Sqrt(t.V*math.Pi) * math.Pow(1+(x*x)/t.V, -(t.V+1)/2) 26 | } 27 | 28 | func (t TDist) CDF(x float64) float64 { 29 | if x == 0 { 30 | return 0.5 31 | } else if x > 0 { 32 | return 1 - 0.5*mathx.BetaInc(t.V/(t.V+x*x), t.V/2, 0.5) 33 | } else if x < 0 { 34 | return 1 - t.CDF(-x) 35 | } else { 36 | return math.NaN() 37 | } 38 | } 39 | 40 | func (t TDist) Bounds() (float64, float64) { 41 | return -4, 4 42 | } 43 | -------------------------------------------------------------------------------- /internal/go-moremath/stats/dist_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package stats 6 | 7 | import ( 8 | "fmt" 9 | "testing" 10 | ) 11 | 12 | type funnyCDF struct { 13 | left float64 14 | } 15 | 16 | func (f funnyCDF) CDF(x float64) float64 { 17 | switch { 18 | case x < f.left: 19 | return 0 20 | case x < f.left+1: 21 | return (x - f.left) / 2 22 | case x < f.left+2: 23 | return 0.5 24 | case x < f.left+3: 25 | return (x-f.left-2)/2 + 0.5 26 | default: 27 | return 1 28 | } 29 | } 30 | 31 | func (f funnyCDF) Bounds() (float64, float64) { 32 | return f.left, f.left + 3 33 | } 34 | 35 | func TestInvCDF(t *testing.T) { 36 | for _, f := range []funnyCDF{funnyCDF{1}, funnyCDF{-1.5}, funnyCDF{-4}} { 37 | testFunc(t, fmt.Sprintf("InvCDF(funnyCDF%+v)", f), InvCDF(f), 38 | map[float64]float64{ 39 | -0.1: nan, 40 | 0: f.left, 41 | 0.25: f.left + 0.5, 42 | 0.5: f.left + 1, 43 | 0.75: f.left + 2.5, 44 | 1: f.left + 3, 45 | 1.1: nan, 46 | }) 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /internal/go-moremath/stats/linearhist.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package stats 6 | 7 | // LinearHist is a Histogram with uniformly-sized bins. 8 | type LinearHist struct { 9 | min, max, delta float64 10 | low, high uint 11 | bins []uint 12 | } 13 | 14 | // NewLinearHist returns an empty histogram with nbins uniformly-sized 15 | // bins spanning [min, max]. 16 | func NewLinearHist(min, max float64, nbins int) *LinearHist { 17 | delta := float64(nbins) / (max - min) 18 | return &LinearHist{min, max, delta, 0, 0, make([]uint, nbins)} 19 | } 20 | 21 | func (h *LinearHist) bin(x float64) int { 22 | return int(h.delta * (x - h.min)) 23 | } 24 | 25 | func (h *LinearHist) Add(x float64) { 26 | bin := h.bin(x) 27 | if bin < 0 { 28 | h.low++ 29 | } else if bin >= len(h.bins) { 30 | h.high++ 31 | } else { 32 | h.bins[bin]++ 33 | } 34 | } 35 | 36 | func (h *LinearHist) Counts() (uint, []uint, uint) { 37 | return h.low, h.bins, h.high 38 | } 39 | 40 | func (h *LinearHist) BinToValue(bin float64) float64 { 41 | return h.min + bin*h.delta 42 | } 43 | -------------------------------------------------------------------------------- /internal/go-moremath/internal/mathtest/mathtest.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package mathtest 6 | 7 | import ( 8 | "fmt" 9 | "math" 10 | "sort" 11 | "strings" 12 | "testing" 13 | ) 14 | 15 | // Aeq returns true if expect and got are equal to 8 significant 16 | // figures (1 part in 100 million). 17 | func Aeq(expect, got float64) bool { 18 | if expect < 0 && got < 0 { 19 | expect, got = -expect, -got 20 | } 21 | return expect*0.99999999 <= got && got*0.99999999 <= expect 22 | } 23 | 24 | func WantFunc(t *testing.T, name string, f func(float64) float64, vals map[float64]float64) { 25 | xs := make([]float64, 0, len(vals)) 26 | for x := range vals { 27 | xs = append(xs, x) 28 | } 29 | sort.Float64s(xs) 30 | 31 | for _, x := range xs { 32 | want, got := vals[x], f(x) 33 | if math.IsNaN(want) && math.IsNaN(got) || Aeq(want, got) { 34 | continue 35 | } 36 | var label string 37 | if strings.Contains(name, "%v") { 38 | label = fmt.Sprintf(name, x) 39 | } else { 40 | label = fmt.Sprintf("%s(%v)", name, x) 41 | } 42 | t.Errorf("want %s=%v, got %v", label, want, got) 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /internal/go-moremath/stats/deltadist.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package stats 6 | 7 | // DeltaDist is the Dirac delta function, centered at T, with total 8 | // area 1. 9 | // 10 | // The CDF of the Dirac delta function is the Heaviside step function, 11 | // centered at T. Specifically, f(T) == 1. 12 | type DeltaDist struct { 13 | T float64 14 | } 15 | 16 | func (d DeltaDist) PDF(x float64) float64 { 17 | if x == d.T { 18 | return inf 19 | } 20 | return 0 21 | } 22 | 23 | func (d DeltaDist) pdfEach(xs []float64) []float64 { 24 | res := make([]float64, len(xs)) 25 | for i, x := range xs { 26 | if x == d.T { 27 | res[i] = inf 28 | } 29 | } 30 | return res 31 | } 32 | 33 | func (d DeltaDist) CDF(x float64) float64 { 34 | if x >= d.T { 35 | return 1 36 | } 37 | return 0 38 | } 39 | 40 | func (d DeltaDist) cdfEach(xs []float64) []float64 { 41 | res := make([]float64, len(xs)) 42 | for i, x := range xs { 43 | res[i] = d.CDF(x) 44 | } 45 | return res 46 | } 47 | 48 | func (d DeltaDist) InvCDF(y float64) float64 { 49 | if y < 0 || y > 1 { 50 | return nan 51 | } 52 | return d.T 53 | } 54 | 55 | func (d DeltaDist) Bounds() (float64, float64) { 56 | return d.T - 1, d.T + 1 57 | } 58 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2009 The Go Authors. All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are 5 | met: 6 | 7 | * Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above 10 | copyright notice, this list of conditions and the following disclaimer 11 | in the documentation and/or other materials provided with the 12 | distribution. 13 | * Neither the name of Google Inc. nor the names of its 14 | contributors may be used to endorse or promote products derived from 15 | this software without specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /internal/go-moremath/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015 The Go Authors. All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are 5 | met: 6 | 7 | * Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above 10 | copyright notice, this list of conditions and the following disclaimer 11 | in the documentation and/or other materials provided with the 12 | distribution. 13 | * Neither the name of Google Inc. nor the names of its 14 | contributors may be used to endorse or promote products derived from 15 | this software without specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /internal/go-moremath/mathx/choose.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package mathx 6 | 7 | import "math" 8 | 9 | const smallFactLimit = 20 // 20! => 62 bits 10 | var smallFact [smallFactLimit + 1]int64 11 | 12 | func init() { 13 | smallFact[0] = 1 14 | fact := int64(1) 15 | for n := int64(1); n <= smallFactLimit; n++ { 16 | fact *= n 17 | smallFact[n] = fact 18 | } 19 | } 20 | 21 | // Choose returns the binomial coefficient of n and k. 22 | func Choose(n, k int) float64 { 23 | if k == 0 || k == n { 24 | return 1 25 | } 26 | if k < 0 || n < k { 27 | return 0 28 | } 29 | if n <= smallFactLimit { // Implies k <= smallFactLimit 30 | // It's faster to do several integer multiplications 31 | // than it is to do an extra integer division. 32 | // Remarkably, this is also faster than pre-computing 33 | // Pascal's triangle (presumably because this is very 34 | // cache efficient). 35 | numer := int64(1) 36 | for n1 := int64(n - (k - 1)); n1 <= int64(n); n1++ { 37 | numer *= n1 38 | } 39 | denom := smallFact[k] 40 | return float64(numer / denom) 41 | } 42 | 43 | return math.Exp(lchoose(n, k)) 44 | } 45 | 46 | // Lchoose returns math.Log(Choose(n, k)). 47 | func Lchoose(n, k int) float64 { 48 | if k == 0 || k == n { 49 | return 0 50 | } 51 | if k < 0 || n < k { 52 | return math.NaN() 53 | } 54 | return lchoose(n, k) 55 | } 56 | 57 | func lchoose(n, k int) float64 { 58 | a, _ := math.Lgamma(float64(n + 1)) 59 | b, _ := math.Lgamma(float64(k + 1)) 60 | c, _ := math.Lgamma(float64(n - k + 1)) 61 | return a - b - c 62 | } 63 | -------------------------------------------------------------------------------- /internal/go-moremath/stats/util_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package stats 6 | 7 | import ( 8 | "fmt" 9 | "testing" 10 | 11 | "rsc.io/benchstat/internal/go-moremath/internal/mathtest" 12 | "rsc.io/benchstat/internal/go-moremath/vec" 13 | ) 14 | 15 | var aeq = mathtest.Aeq 16 | var testFunc = mathtest.WantFunc 17 | 18 | func testDiscreteCDF(t *testing.T, name string, dist DiscreteDist) { 19 | // Build the expected CDF out of the PMF. 20 | l, h := dist.Bounds() 21 | s := dist.Step() 22 | want := map[float64]float64{l - 0.1: 0, h: 1} 23 | sum := 0.0 24 | for x := l; x < h; x += s { 25 | sum += dist.PMF(x) 26 | want[x] = sum 27 | want[x+s/2] = sum 28 | } 29 | 30 | testFunc(t, name, dist.CDF, want) 31 | } 32 | 33 | func testInvCDF(t *testing.T, dist Dist, bounded bool) { 34 | inv := InvCDF(dist) 35 | name := fmt.Sprintf("InvCDF(%+v)", dist) 36 | cdfName := fmt.Sprintf("CDF(%+v)", dist) 37 | 38 | // Test bounds. 39 | vals := map[float64]float64{-0.01: nan, 1.01: nan} 40 | if !bounded { 41 | vals[0] = -inf 42 | vals[1] = inf 43 | } 44 | testFunc(t, name, inv, vals) 45 | 46 | if bounded { 47 | lo, hi := inv(0), inv(1) 48 | vals := map[float64]float64{ 49 | lo - 0.01: 0, lo: 0, 50 | hi: 1, hi + 0.01: 1, 51 | } 52 | testFunc(t, cdfName, dist.CDF, vals) 53 | if got := dist.CDF(lo + 0.01); !(got > 0) { 54 | t.Errorf("%s(0)=%v, but %s(%v)=0", name, lo, cdfName, lo+0.01) 55 | } 56 | if got := dist.CDF(hi - 0.01); !(got < 1) { 57 | t.Errorf("%s(1)=%v, but %s(%v)=1", name, hi, cdfName, hi-0.01) 58 | } 59 | } 60 | 61 | // Test points between. 62 | vals = map[float64]float64{} 63 | for _, p := range vec.Linspace(0, 1, 11) { 64 | if p == 0 || p == 1 { 65 | continue 66 | } 67 | x := inv(p) 68 | vals[x] = x 69 | } 70 | testFunc(t, fmt.Sprintf("InvCDF(CDF(%+v))", dist), 71 | func(x float64) float64 { 72 | return inv(dist.CDF(x)) 73 | }, 74 | vals) 75 | } 76 | -------------------------------------------------------------------------------- /internal/go-moremath/scale/interface.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package scale 6 | 7 | // A Quantative scale is an invertible function from some continuous 8 | // input range to an output domain of [0, 1]. 9 | type Quantitative interface { 10 | // Map maps from a value x in the input range to [0, 1]. If x 11 | // is outside the input range and clamping is enabled, x will 12 | // first be clamped to the input range. 13 | Map(x float64) float64 14 | 15 | // Unmap is the inverse of Map. That is, if x is in the input 16 | // range or clamping is disabled, x = Unmap(Map(x)). If 17 | // clamping is enabled and y is outside [0,1], the results are 18 | // undefined. 19 | Unmap(y float64) float64 20 | 21 | // SetClamp sets the clamping mode of this scale. 22 | SetClamp(bool) 23 | 24 | // Ticks returns a set of at most n major ticks, plus minor 25 | // ticks. These ticks will have "nice" values within the input 26 | // range. Both arrays are sorted in ascending order and minor 27 | // includes ticks in major. 28 | Ticks(n int) (major, minor []float64) 29 | 30 | // Nice expands the input range of this scale to "nice" values 31 | // for covering the input range with n major ticks. After 32 | // calling Nice(n), the first and last major ticks returned by 33 | // Ticks(n) will equal the lower and upper bounds of the input 34 | // range. 35 | Nice(n int) 36 | } 37 | 38 | // A QQ maps from a source Quantitative scale to a destination 39 | // Quantitative scale. 40 | type QQ struct { 41 | Src, Dest Quantitative 42 | } 43 | 44 | // Map maps from a value x in the source scale's input range to a 45 | // value y in the destination scale's input range. 46 | func (q QQ) Map(x float64) float64 { 47 | return q.Dest.Unmap(q.Src.Map(x)) 48 | } 49 | 50 | // Unmap maps from a value y in the destination scale's input range to 51 | // a value x in the source scale's input range. 52 | func (q QQ) Unmap(x float64) float64 { 53 | return q.Src.Unmap(q.Dest.Map(x)) 54 | } 55 | -------------------------------------------------------------------------------- /internal/go-moremath/vec/vec.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package vec 6 | 7 | import "math" 8 | 9 | // Vectorize returns a function g(xs) that applies f to each x in xs. 10 | // 11 | // f may be evaluated in parallel and in any order. 12 | func Vectorize(f func(float64) float64) func(xs []float64) []float64 { 13 | return func(xs []float64) []float64 { 14 | return Map(f, xs) 15 | } 16 | } 17 | 18 | // Map returns f(x) for each x in xs. 19 | // 20 | // f may be evaluated in parallel and in any order. 21 | func Map(f func(float64) float64, xs []float64) []float64 { 22 | // TODO(austin) Parallelize 23 | res := make([]float64, len(xs)) 24 | for i, x := range xs { 25 | res[i] = f(x) 26 | } 27 | return res 28 | } 29 | 30 | // Linspace returns num values spaced evenly between lo and hi, 31 | // inclusive. If num is 1, this returns an array consisting of lo. 32 | func Linspace(lo, hi float64, num int) []float64 { 33 | res := make([]float64, num) 34 | if num == 1 { 35 | res[0] = lo 36 | return res 37 | } 38 | for i := 0; i < num; i++ { 39 | res[i] = lo + float64(i)*(hi-lo)/float64(num-1) 40 | } 41 | return res 42 | } 43 | 44 | // Logspace returns num values spaced evenly on a logarithmic scale 45 | // between base**lo and base**hi, inclusive. 46 | func Logspace(lo, hi float64, num int, base float64) []float64 { 47 | res := Linspace(lo, hi, num) 48 | for i, x := range res { 49 | res[i] = math.Pow(base, x) 50 | } 51 | return res 52 | } 53 | 54 | // Sum returns the sum of xs. 55 | func Sum(xs []float64) float64 { 56 | sum := 0.0 57 | for _, x := range xs { 58 | sum += x 59 | } 60 | return sum 61 | } 62 | 63 | // Concat returns the concatenation of its arguments. It does not 64 | // modify its inputs. 65 | func Concat(xss ...[]float64) []float64 { 66 | total := 0 67 | for _, xs := range xss { 68 | total += len(xs) 69 | } 70 | out := make([]float64, total) 71 | pos := 0 72 | for _, xs := range xss { 73 | pos += copy(out[pos:], xs) 74 | } 75 | return out 76 | } 77 | -------------------------------------------------------------------------------- /internal/go-moremath/stats/hist.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package stats 6 | 7 | import "math" 8 | 9 | // TODO: Implement histograms on top of scales. 10 | 11 | type Histogram interface { 12 | // Add adds a sample with value x to histogram h. 13 | Add(x float64) 14 | 15 | // Counts returns the number of samples less than the lowest 16 | // bin, a slice of the number of samples in each bin, 17 | // and the number of samples greater than the highest bin. 18 | Counts() (under uint, counts []uint, over uint) 19 | 20 | // BinToValue returns the value that would appear at the given 21 | // bin index. 22 | // 23 | // For integral values of bin, BinToValue returns the lower 24 | // bound of bin. That is, a sample value x will be in bin if 25 | // bin is integral and 26 | // 27 | // BinToValue(bin) <= x < BinToValue(bin + 1) 28 | // 29 | // For non-integral values of bin, BinToValue interpolates 30 | // between the lower and upper bounds of math.Floor(bin). 31 | // 32 | // BinToValue is undefined if bin > 1 + the number of bins. 33 | BinToValue(bin float64) float64 34 | } 35 | 36 | // HistogramPercentile returns the x such that n*percentile samples in 37 | // hist are <= x, assuming values are distibuted within each bin 38 | // according to hist's distibution. 39 | // 40 | // If the percentile'th sample falls below the lowest bin or above the 41 | // highest bin, returns NaN. 42 | func HistogramPercentile(hist Histogram, percentile float64) float64 { 43 | under, counts, over := hist.Counts() 44 | total := under + over 45 | for _, count := range counts { 46 | total += count 47 | } 48 | 49 | goal := uint(float64(total) * percentile) 50 | if goal <= under || goal > total-over { 51 | return math.NaN() 52 | } 53 | for bin, count := range counts { 54 | if count > goal { 55 | return hist.BinToValue(float64(bin) + float64(goal)/float64(count)) 56 | } 57 | goal -= count 58 | } 59 | panic("goal count not reached") 60 | } 61 | 62 | // HistogramIQR returns the interquartile range of the samples in 63 | // hist. 64 | func HistogramIQR(hist Histogram) float64 { 65 | return HistogramPercentile(hist, 0.75) - HistogramPercentile(hist, 0.25) 66 | } 67 | -------------------------------------------------------------------------------- /internal/go-moremath/stats/loghist.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package stats 6 | 7 | import "math" 8 | 9 | // LogHist is a Histogram with logarithmically-spaced bins. 10 | type LogHist struct { 11 | b int 12 | m float64 13 | mOverLogb float64 14 | low, high uint 15 | bins []uint 16 | } 17 | 18 | // NewLogHist returns an empty logarithmic histogram with bins for 19 | // integral values of m * log_b(x) up to x = max. 20 | func NewLogHist(b int, m float64, max float64) *LogHist { 21 | // TODO(austin) Minimum value as well? If the samples are 22 | // actually integral, having fractional bin boundaries can 23 | // mess up smoothing. 24 | mOverLogb := m / math.Log(float64(b)) 25 | nbins := int(math.Ceil(mOverLogb * math.Log(max))) 26 | return &LogHist{b: b, m: m, mOverLogb: mOverLogb, low: 0, high: 0, bins: make([]uint, nbins)} 27 | } 28 | 29 | func (h *LogHist) bin(x float64) int { 30 | return int(h.mOverLogb * math.Log(x)) 31 | } 32 | 33 | func (h *LogHist) Add(x float64) { 34 | bin := h.bin(x) 35 | if bin < 0 { 36 | h.low++ 37 | } else if bin >= len(h.bins) { 38 | h.high++ 39 | } else { 40 | h.bins[bin]++ 41 | } 42 | } 43 | 44 | func (h *LogHist) Counts() (uint, []uint, uint) { 45 | return h.low, h.bins, h.high 46 | } 47 | 48 | func (h *LogHist) BinToValue(bin float64) float64 { 49 | return math.Pow(float64(h.b), bin/h.m) 50 | } 51 | 52 | func (h *LogHist) At(x float64) float64 { 53 | bin := h.bin(x) 54 | if bin < 0 || bin >= len(h.bins) { 55 | return 0 56 | } 57 | return float64(h.bins[bin]) 58 | } 59 | 60 | func (h *LogHist) Bounds() (float64, float64) { 61 | // XXX Plot will plot this on a linear axis. Maybe this 62 | // should be able to return the natural axis? 63 | // Maybe then we could also give it the bins for the tics. 64 | lowbin := 0 65 | if h.low == 0 { 66 | for bin, count := range h.bins { 67 | if count > 0 { 68 | lowbin = bin 69 | break 70 | } 71 | } 72 | } 73 | highbin := len(h.bins) 74 | if h.high == 0 { 75 | for bin := range h.bins { 76 | if h.bins[len(h.bins)-bin-1] > 0 { 77 | highbin = len(h.bins) - bin 78 | break 79 | } 80 | } 81 | } 82 | return h.BinToValue(float64(lowbin)), h.BinToValue(float64(highbin)) 83 | } 84 | -------------------------------------------------------------------------------- /internal/go-moremath/stats/kde_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package stats 6 | 7 | import ( 8 | "fmt" 9 | "testing" 10 | ) 11 | 12 | func TestKDEOneSample(t *testing.T) { 13 | x := float64(5) 14 | 15 | // Unweighted, fixed bandwidth 16 | kde := KDE{ 17 | Sample: Sample{Xs: []float64{x}}, 18 | Kernel: GaussianKernel, 19 | Bandwidth: 1, 20 | } 21 | if e, g := StdNormal.PDF(0), kde.PDF(x); !aeq(e, g) { 22 | t.Errorf("bad PDF value at sample: expected %g, got %g", e, g) 23 | } 24 | if e, g := 0.0, kde.PDF(-10000); !aeq(e, g) { 25 | t.Errorf("bad PDF value at low tail: expected %g, got %g", e, g) 26 | } 27 | if e, g := 0.0, kde.PDF(10000); !aeq(e, g) { 28 | t.Errorf("bad PDF value at high tail: expected %g, got %g", e, g) 29 | } 30 | 31 | if e, g := 0.5, kde.CDF(x); !aeq(e, g) { 32 | t.Errorf("bad CDF value at sample: expected %g, got %g", e, g) 33 | } 34 | if e, g := 0.0, kde.CDF(-10000); !aeq(e, g) { 35 | t.Errorf("bad CDF value at low tail: expected %g, got %g", e, g) 36 | } 37 | if e, g := 1.0, kde.CDF(10000); !aeq(e, g) { 38 | t.Errorf("bad CDF value at high tail: expected %g, got %g", e, g) 39 | } 40 | 41 | low, high := kde.Bounds() 42 | if e, g := x-2, low; e < g { 43 | t.Errorf("bad low bound: expected %g, got %g", e, g) 44 | } 45 | if e, g := x+2, high; e > g { 46 | t.Errorf("bad high bound: expected %g, got %g", e, g) 47 | } 48 | 49 | kde = KDE{ 50 | Sample: Sample{Xs: []float64{x}}, 51 | Kernel: EpanechnikovKernel, 52 | Bandwidth: 2, 53 | } 54 | testFunc(t, fmt.Sprintf("%+v.PDF", kde), kde.PDF, map[float64]float64{ 55 | x - 2: 0, 56 | x - 1: 0.5625 / 2, 57 | x: 0.75 / 2, 58 | x + 1: 0.5625 / 2, 59 | x + 2: 0, 60 | }) 61 | testFunc(t, fmt.Sprintf("%+v.CDF", kde), kde.CDF, map[float64]float64{ 62 | x - 2: 0, 63 | x - 1: 0.15625, 64 | x: 0.5, 65 | x + 1: 0.84375, 66 | x + 2: 1, 67 | }) 68 | } 69 | 70 | func TestKDETwoSamples(t *testing.T) { 71 | kde := KDE{ 72 | Sample: Sample{Xs: []float64{1, 3}}, 73 | Kernel: GaussianKernel, 74 | Bandwidth: 2, 75 | } 76 | testFunc(t, "PDF", kde.PDF, map[float64]float64{ 77 | 0: 0.120395730, 78 | 1: 0.160228251, 79 | 2: 0.176032663, 80 | 3: 0.160228251, 81 | 4: 0.120395730}) 82 | 83 | testFunc(t, "CDF", kde.CDF, map[float64]float64{ 84 | 0: 0.187672369, 85 | 1: 0.329327626, 86 | 2: 0.5, 87 | 3: 0.670672373, 88 | 4: 0.812327630}) 89 | } 90 | -------------------------------------------------------------------------------- /internal/go-moremath/stats/ttest_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package stats 6 | 7 | import "testing" 8 | 9 | func TestTTest(t *testing.T) { 10 | s1 := Sample{Xs: []float64{2, 1, 3, 4}} 11 | s2 := Sample{Xs: []float64{6, 5, 7, 9}} 12 | 13 | check := func(want, got *TTestResult) { 14 | if want.N1 != got.N1 || want.N2 != got.N2 || 15 | !aeq(want.T, got.T) || !aeq(want.DoF, got.DoF) || 16 | want.AltHypothesis != got.AltHypothesis || 17 | !aeq(want.P, got.P) { 18 | t.Errorf("want %+v, got %+v", want, got) 19 | } 20 | } 21 | check3 := func(test func(alt LocationHypothesis) (*TTestResult, error), n1, n2 int, t, dof float64, pless, pdiff, pgreater float64) { 22 | want := &TTestResult{N1: n1, N2: n2, T: t, DoF: dof} 23 | 24 | want.AltHypothesis = LocationLess 25 | want.P = pless 26 | got, _ := test(want.AltHypothesis) 27 | check(want, got) 28 | 29 | want.AltHypothesis = LocationDiffers 30 | want.P = pdiff 31 | got, _ = test(want.AltHypothesis) 32 | check(want, got) 33 | 34 | want.AltHypothesis = LocationGreater 35 | want.P = pgreater 36 | got, _ = test(want.AltHypothesis) 37 | check(want, got) 38 | } 39 | 40 | check3(func(alt LocationHypothesis) (*TTestResult, error) { 41 | return TwoSampleTTest(s1, s1, alt) 42 | }, 4, 4, 0, 6, 43 | 0.5, 1, 0.5) 44 | check3(func(alt LocationHypothesis) (*TTestResult, error) { 45 | return TwoSampleWelchTTest(s1, s1, alt) 46 | }, 4, 4, 0, 6, 47 | 0.5, 1, 0.5) 48 | 49 | check3(func(alt LocationHypothesis) (*TTestResult, error) { 50 | return TwoSampleTTest(s1, s2, alt) 51 | }, 4, 4, -3.9703446152237674, 6, 52 | 0.0036820296121056195, 0.0073640592242113214, 0.9963179703878944) 53 | check3(func(alt LocationHypothesis) (*TTestResult, error) { 54 | return TwoSampleWelchTTest(s1, s2, alt) 55 | }, 4, 4, -3.9703446152237674, 5.584615384615385, 56 | 0.004256431565689112, 0.0085128631313781695, 0.9957435684343109) 57 | 58 | check3(func(alt LocationHypothesis) (*TTestResult, error) { 59 | return PairedTTest(s1.Xs, s2.Xs, 0, alt) 60 | }, 4, 4, -17, 3, 61 | 0.0002216717691559955, 0.00044334353831207749, 0.999778328230844) 62 | 63 | check3(func(alt LocationHypothesis) (*TTestResult, error) { 64 | return OneSampleTTest(s1, 0, alt) 65 | }, 4, 0, 3.872983346207417, 3, 66 | 0.9847668541689145, 0.030466291662170977, 0.015233145831085482) 67 | check3(func(alt LocationHypothesis) (*TTestResult, error) { 68 | return OneSampleTTest(s1, 2.5, alt) 69 | }, 4, 0, 0, 3, 70 | 0.5, 1, 0.5) 71 | } 72 | -------------------------------------------------------------------------------- /internal/go-moremath/mathx/beta.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package mathx 6 | 7 | import "math" 8 | 9 | func lgamma(x float64) float64 { 10 | y, _ := math.Lgamma(x) 11 | return y 12 | } 13 | 14 | // Beta returns the value of the complete beta function B(a, b). 15 | func Beta(a, b float64) float64 { 16 | // B(x,y) = Γ(x)Γ(y) / Γ(x+y) 17 | return math.Exp(lgamma(a) + lgamma(b) - lgamma(a+b)) 18 | } 19 | 20 | // BetaInc returns the value of the regularized incomplete beta 21 | // function Iₓ(a, b). 22 | // 23 | // This is not to be confused with the "incomplete beta function", 24 | // which can be computed as BetaInc(x, a, b)*Beta(a, b). 25 | // 26 | // If x < 0 or x > 1, returns NaN. 27 | func BetaInc(x, a, b float64) float64 { 28 | // Based on Numerical Recipes in C, section 6.4. This uses the 29 | // continued fraction definition of I: 30 | // 31 | // (xᵃ*(1-x)ᵇ)/(a*B(a,b)) * (1/(1+(d₁/(1+(d₂/(1+...)))))) 32 | // 33 | // where B(a,b) is the beta function and 34 | // 35 | // d_{2m+1} = -(a+m)(a+b+m)x/((a+2m)(a+2m+1)) 36 | // d_{2m} = m(b-m)x/((a+2m-1)(a+2m)) 37 | if x < 0 || x > 1 { 38 | return math.NaN() 39 | } 40 | bt := 0.0 41 | if 0 < x && x < 1 { 42 | // Compute the coefficient before the continued 43 | // fraction. 44 | bt = math.Exp(lgamma(a+b) - lgamma(a) - lgamma(b) + 45 | a*math.Log(x) + b*math.Log(1-x)) 46 | } 47 | if x < (a+1)/(a+b+2) { 48 | // Compute continued fraction directly. 49 | return bt * betacf(x, a, b) / a 50 | } else { 51 | // Compute continued fraction after symmetry transform. 52 | return 1 - bt*betacf(1-x, b, a)/b 53 | } 54 | } 55 | 56 | // betacf is the continued fraction component of the regularized 57 | // incomplete beta function Iₓ(a, b). 58 | func betacf(x, a, b float64) float64 { 59 | const maxIterations = 200 60 | const epsilon = 3e-14 61 | 62 | raiseZero := func(z float64) float64 { 63 | if math.Abs(z) < math.SmallestNonzeroFloat64 { 64 | return math.SmallestNonzeroFloat64 65 | } 66 | return z 67 | } 68 | 69 | c := 1.0 70 | d := 1 / raiseZero(1-(a+b)*x/(a+1)) 71 | h := d 72 | for m := 1; m <= maxIterations; m++ { 73 | mf := float64(m) 74 | 75 | // Even step of the recurrence. 76 | numer := mf * (b - mf) * x / ((a + 2*mf - 1) * (a + 2*mf)) 77 | d = 1 / raiseZero(1+numer*d) 78 | c = raiseZero(1 + numer/c) 79 | h *= d * c 80 | 81 | // Odd step of the recurrence. 82 | numer = -(a + mf) * (a + b + mf) * x / ((a + 2*mf) * (a + 2*mf + 1)) 83 | d = 1 / raiseZero(1+numer*d) 84 | c = raiseZero(1 + numer/c) 85 | hfac := d * c 86 | h *= hfac 87 | 88 | if math.Abs(hfac-1) < epsilon { 89 | return h 90 | } 91 | } 92 | panic("betainc: a or b too big; failed to converge") 93 | } 94 | -------------------------------------------------------------------------------- /internal/go-moremath/stats/utest_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package stats 6 | 7 | import "testing" 8 | 9 | func TestMannWhitneyUTest(t *testing.T) { 10 | check := func(want, got *MannWhitneyUTestResult) { 11 | if want.N1 != got.N1 || want.N2 != got.N2 || 12 | !aeq(want.U, got.U) || 13 | want.AltHypothesis != got.AltHypothesis || 14 | !aeq(want.P, got.P) { 15 | t.Errorf("want %+v, got %+v", want, got) 16 | } 17 | } 18 | check3 := func(x1, x2 []float64, U float64, pless, pdiff, pgreater float64) { 19 | want := &MannWhitneyUTestResult{N1: len(x1), N2: len(x2), U: U} 20 | 21 | want.AltHypothesis = LocationLess 22 | want.P = pless 23 | got, _ := MannWhitneyUTest(x1, x2, want.AltHypothesis) 24 | check(want, got) 25 | 26 | want.AltHypothesis = LocationDiffers 27 | want.P = pdiff 28 | got, _ = MannWhitneyUTest(x1, x2, want.AltHypothesis) 29 | check(want, got) 30 | 31 | want.AltHypothesis = LocationGreater 32 | want.P = pgreater 33 | got, _ = MannWhitneyUTest(x1, x2, want.AltHypothesis) 34 | check(want, got) 35 | } 36 | 37 | s1 := []float64{2, 1, 3, 5} 38 | s2 := []float64{12, 11, 13, 15} 39 | s3 := []float64{0, 4, 6, 7} // Interleaved with s1, but no ties 40 | s4 := []float64{2, 2, 2, 2} 41 | s5 := []float64{1, 1, 1, 1, 1} 42 | 43 | // Small sample, no ties 44 | check3(s1, s2, 0, 0.014285714285714289, 0.028571428571428577, 1) 45 | check3(s2, s1, 16, 1, 0.028571428571428577, 0.014285714285714289) 46 | check3(s1, s3, 5, 0.24285714285714288, 0.485714285714285770, 0.8285714285714285) 47 | 48 | // Small sample, ties 49 | // TODO: Check these against some other implementation. 50 | check3(s1, s1, 8, 0.6285714285714286, 1, 0.6285714285714286) 51 | check3(s1, s4, 10, 0.8571428571428571, 0.7142857142857143, 0.3571428571428571) 52 | check3(s1, s5, 17.5, 1, 0, 0.04761904761904767) 53 | 54 | r, err := MannWhitneyUTest(s4, s4, LocationDiffers) 55 | if err != ErrSamplesEqual { 56 | t.Errorf("want ErrSamplesEqual, got %+v, %+v", r, err) 57 | } 58 | 59 | // Large samples. 60 | l1 := make([]float64, 500) 61 | for i := range l1 { 62 | l1[i] = float64(i * 2) 63 | } 64 | l2 := make([]float64, 600) 65 | for i := range l2 { 66 | l2[i] = float64(i*2 - 41) 67 | } 68 | l3 := append([]float64{}, l2...) 69 | for i := 0; i < 30; i++ { 70 | l3[i] = l1[i] 71 | } 72 | // For comparing with R's wilcox.test: 73 | // l1 <- seq(0, 499)*2 74 | // l2 <- seq(0,599)*2-41 75 | // l3 <- l2; for (i in 1:30) { l3[i] = l1[i] } 76 | 77 | check3(l1, l2, 135250, 0.0024667680407086112, 0.0049335360814172224, 0.9975346930458906) 78 | check3(l1, l1, 125000, 0.5000436801680628, 1, 0.5000436801680628) 79 | check3(l1, l3, 134845, 0.0019351907119808942, 0.0038703814239617884, 0.9980659818257166) 80 | } 81 | -------------------------------------------------------------------------------- /internal/go-moremath/stats/stream.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package stats 6 | 7 | import ( 8 | "fmt" 9 | "math" 10 | ) 11 | 12 | // TODO(austin) Unify more with Sample interface 13 | 14 | // StreamStats tracks basic statistics for a stream of data in O(1) 15 | // space. 16 | // 17 | // StreamStats should be initialized to its zero value. 18 | type StreamStats struct { 19 | Count uint 20 | Total, Min, Max float64 21 | 22 | // Numerically stable online mean 23 | mean float64 24 | meanOfSquares float64 25 | 26 | // Online variance 27 | vM2 float64 28 | } 29 | 30 | // Add updates s's statistics with sample value x. 31 | func (s *StreamStats) Add(x float64) { 32 | s.Total += x 33 | if s.Count == 0 { 34 | s.Min, s.Max = x, x 35 | } else { 36 | if x < s.Min { 37 | s.Min = x 38 | } 39 | if x > s.Max { 40 | s.Max = x 41 | } 42 | } 43 | s.Count++ 44 | 45 | // Update online mean, mean of squares, and variance. Online 46 | // variance based on Wikipedia's presentation ("Algorithms for 47 | // calculating variance") of Knuth's formulation of Welford 48 | // 1962. 49 | delta := x - s.mean 50 | s.mean += delta / float64(s.Count) 51 | s.meanOfSquares += (x*x - s.meanOfSquares) / float64(s.Count) 52 | s.vM2 += delta * (x - s.mean) 53 | } 54 | 55 | func (s *StreamStats) Weight() float64 { 56 | return float64(s.Count) 57 | } 58 | 59 | func (s *StreamStats) Mean() float64 { 60 | return s.mean 61 | } 62 | 63 | func (s *StreamStats) Variance() float64 { 64 | return s.vM2 / float64(s.Count-1) 65 | } 66 | 67 | func (s *StreamStats) StdDev() float64 { 68 | return math.Sqrt(s.Variance()) 69 | } 70 | 71 | func (s *StreamStats) RMS() float64 { 72 | return math.Sqrt(s.meanOfSquares) 73 | } 74 | 75 | // Combine updates s's statistics as if all samples added to o were 76 | // added to s. 77 | func (s *StreamStats) Combine(o *StreamStats) { 78 | count := s.Count + o.Count 79 | 80 | // Compute combined online variance statistics 81 | delta := o.mean - s.mean 82 | mean := s.mean + delta*float64(o.Count)/float64(count) 83 | vM2 := s.vM2 + o.vM2 + delta*delta*float64(s.Count)*float64(o.Count)/float64(count) 84 | 85 | s.Count = count 86 | s.Total += o.Total 87 | if o.Min < s.Min { 88 | s.Min = o.Min 89 | } 90 | if o.Max > s.Max { 91 | s.Max = o.Max 92 | } 93 | s.mean = mean 94 | s.meanOfSquares += (o.meanOfSquares - s.meanOfSquares) * float64(o.Count) / float64(count) 95 | s.vM2 = vM2 96 | } 97 | 98 | func (s *StreamStats) String() string { 99 | return fmt.Sprintf("Count=%d Total=%g Min=%g Mean=%g RMS=%g Max=%g StdDev=%g", s.Count, s.Total, s.Min, s.Mean(), s.RMS(), s.Max, s.StdDev()) 100 | } 101 | -------------------------------------------------------------------------------- /internal/go-moremath/stats/alg.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package stats 6 | 7 | // Miscellaneous helper algorithms 8 | 9 | import ( 10 | "fmt" 11 | 12 | "rsc.io/benchstat/internal/go-moremath/mathx" 13 | ) 14 | 15 | func maxint(a, b int) int { 16 | if a > b { 17 | return a 18 | } 19 | return b 20 | } 21 | 22 | func minint(a, b int) int { 23 | if a < b { 24 | return a 25 | } 26 | return b 27 | } 28 | 29 | func sumint(xs []int) int { 30 | sum := 0 31 | for _, x := range xs { 32 | sum += x 33 | } 34 | return sum 35 | } 36 | 37 | // bisect returns an x in [low, high] such that |f(x)| <= tolerance 38 | // using the bisection method. 39 | // 40 | // f(low) and f(high) must have opposite signs. 41 | // 42 | // If f does not have a root in this interval (e.g., it is 43 | // discontiguous), this returns the X of the apparent discontinuity 44 | // and false. 45 | func bisect(f func(float64) float64, low, high, tolerance float64) (float64, bool) { 46 | flow, fhigh := f(low), f(high) 47 | if -tolerance <= flow && flow <= tolerance { 48 | return low, true 49 | } 50 | if -tolerance <= fhigh && fhigh <= tolerance { 51 | return high, true 52 | } 53 | if mathx.Sign(flow) == mathx.Sign(fhigh) { 54 | panic(fmt.Sprintf("root of f is not bracketed by [low, high]; f(%g)=%g f(%g)=%g", low, flow, high, fhigh)) 55 | } 56 | for { 57 | mid := (high + low) / 2 58 | fmid := f(mid) 59 | if -tolerance <= fmid && fmid <= tolerance { 60 | return mid, true 61 | } 62 | if mid == high || mid == low { 63 | return mid, false 64 | } 65 | if mathx.Sign(fmid) == mathx.Sign(flow) { 66 | low = mid 67 | flow = fmid 68 | } else { 69 | high = mid 70 | fhigh = fmid 71 | } 72 | } 73 | } 74 | 75 | // bisectBool implements the bisection method on a boolean function. 76 | // It returns x1, x2 ∈ [low, high], x1 < x2 such that f(x1) != f(x2) 77 | // and x2 - x1 <= xtol. 78 | // 79 | // If f(low) == f(high), it panics. 80 | func bisectBool(f func(float64) bool, low, high, xtol float64) (x1, x2 float64) { 81 | flow, fhigh := f(low), f(high) 82 | if flow == fhigh { 83 | panic(fmt.Sprintf("root of f is not bracketed by [low, high]; f(%g)=%v f(%g)=%v", low, flow, high, fhigh)) 84 | } 85 | for { 86 | if high-low <= xtol { 87 | return low, high 88 | } 89 | mid := (high + low) / 2 90 | if mid == high || mid == low { 91 | return low, high 92 | } 93 | fmid := f(mid) 94 | if fmid == flow { 95 | low = mid 96 | flow = fmid 97 | } else { 98 | high = mid 99 | fhigh = fmid 100 | } 101 | } 102 | } 103 | 104 | // series returns the sum of the series f(0), f(1), ... 105 | // 106 | // This implementation is fast, but subject to round-off error. 107 | func series(f func(float64) float64) float64 { 108 | y, yp := 0.0, 1.0 109 | for n := 0.0; y != yp; n++ { 110 | yp = y 111 | y += f(n) 112 | } 113 | return y 114 | } 115 | -------------------------------------------------------------------------------- /internal/go-moremath/stats/hypergdist.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package stats 6 | 7 | import ( 8 | "math" 9 | 10 | "rsc.io/benchstat/internal/go-moremath/mathx" 11 | ) 12 | 13 | // HypergeometicDist is a hypergeometric distribution. 14 | type HypergeometicDist struct { 15 | // N is the size of the population. N >= 0. 16 | N int 17 | 18 | // K is the number of successes in the population. 0 <= K <= N. 19 | K int 20 | 21 | // Draws is the number of draws from the population. This is 22 | // usually written "n", but is called Draws here because of 23 | // limitations on Go identifier naming. 0 <= Draws <= N. 24 | Draws int 25 | } 26 | 27 | // PMF is the probability of getting exactly int(k) successes in 28 | // d.Draws draws with replacement from a population of size d.N that 29 | // contains exactly d.K successes. 30 | func (d HypergeometicDist) PMF(k float64) float64 { 31 | ki := int(math.Floor(k)) 32 | l, h := d.bounds() 33 | if ki < l || ki > h { 34 | return 0 35 | } 36 | return d.pmf(ki) 37 | } 38 | 39 | func (d HypergeometicDist) pmf(k int) float64 { 40 | return math.Exp(mathx.Lchoose(d.K, k) + mathx.Lchoose(d.N-d.K, d.Draws-k) - mathx.Lchoose(d.N, d.Draws)) 41 | } 42 | 43 | // CDF is the probability of getting int(k) or fewer successes in 44 | // d.Draws draws with replacement from a population of size d.N that 45 | // contains exactly d.K successes. 46 | func (d HypergeometicDist) CDF(k float64) float64 { 47 | // Based on Klotz, A Computational Approach to Statistics. 48 | ki := int(math.Floor(k)) 49 | l, h := d.bounds() 50 | if ki < l { 51 | return 0 52 | } else if ki >= h { 53 | return 1 54 | } 55 | // Use symmetry to compute the smaller sum. 56 | flip := false 57 | if ki > (d.Draws+1)/(d.N+1)*(d.K+1) { 58 | flip = true 59 | ki = d.K - ki - 1 60 | d.Draws = d.N - d.Draws 61 | } 62 | p := d.pmf(ki) * d.sum(ki) 63 | if flip { 64 | p = 1 - p 65 | } 66 | return p 67 | } 68 | 69 | func (d HypergeometicDist) sum(k int) float64 { 70 | const epsilon = 1e-14 71 | sum, ak := 1.0, 1.0 72 | L := maxint(0, d.Draws+d.K-d.N) 73 | for dk := 1; dk <= k-L && ak/sum > epsilon; dk++ { 74 | ak *= float64(1+k-dk) / float64(d.Draws-k+dk) 75 | ak *= float64(d.N-d.K-d.Draws+k+1-dk) / float64(d.K-k+dk) 76 | sum += ak 77 | } 78 | return sum 79 | } 80 | 81 | func (d HypergeometicDist) bounds() (int, int) { 82 | return maxint(0, d.Draws+d.K-d.N), minint(d.Draws, d.K) 83 | } 84 | 85 | func (d HypergeometicDist) Bounds() (float64, float64) { 86 | l, h := d.bounds() 87 | return float64(l), float64(h) 88 | } 89 | 90 | func (d HypergeometicDist) Step() float64 { 91 | return 1 92 | } 93 | 94 | func (d HypergeometicDist) Mean() float64 { 95 | return float64(d.Draws*d.K) / float64(d.N) 96 | } 97 | 98 | func (d HypergeometicDist) Variance() float64 { 99 | return float64(d.Draws*d.K*(d.N-d.K)*(d.N-d.Draws)) / 100 | float64(d.N*d.N*(d.N-1)) 101 | } 102 | -------------------------------------------------------------------------------- /internal/go-moremath/stats/tdist_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package stats 6 | 7 | import "testing" 8 | 9 | func TestT(t *testing.T) { 10 | testFunc(t, "PDF(%v|v=1)", TDist{1}.PDF, map[float64]float64{ 11 | -10: 0.0031515830315226806, 12 | -9: 0.0038818278802901312, 13 | -8: 0.0048970751720583188, 14 | -7: 0.0063661977236758151, 15 | -6: 0.0086029698968592104, 16 | -5: 0.012242687930145799, 17 | -4: 0.018724110951987692, 18 | -3: 0.031830988618379075, 19 | -2: 0.063661977236758149, 20 | -1: 0.15915494309189537, 21 | 0: 0.31830988618379075, 22 | 1: 0.15915494309189537, 23 | 2: 0.063661977236758149, 24 | 3: 0.031830988618379075, 25 | 4: 0.018724110951987692, 26 | 5: 0.012242687930145799, 27 | 6: 0.0086029698968592104, 28 | 7: 0.0063661977236758151, 29 | 8: 0.0048970751720583188, 30 | 9: 0.0038818278802901312}) 31 | testFunc(t, "PDF(%v|v=5)", TDist{5}.PDF, map[float64]float64{ 32 | -10: 4.0989816415343313e-05, 33 | -9: 7.4601664362590413e-05, 34 | -8: 0.00014444303269563934, 35 | -7: 0.00030134402928803911, 36 | -6: 0.00068848154013743002, 37 | -5: 0.0017574383788078445, 38 | -4: 0.0051237270519179133, 39 | -3: 0.017292578800222964, 40 | -2: 0.065090310326216455, 41 | -1: 0.21967979735098059, 42 | 0: 0.3796066898224944, 43 | 1: 0.21967979735098059, 44 | 2: 0.065090310326216455, 45 | 3: 0.017292578800222964, 46 | 4: 0.0051237270519179133, 47 | 5: 0.0017574383788078445, 48 | 6: 0.00068848154013743002, 49 | 7: 0.00030134402928803911, 50 | 8: 0.00014444303269563934, 51 | 9: 7.4601664362590413e-05}) 52 | 53 | testFunc(t, "CDF(%v|v=1)", TDist{1}.CDF, map[float64]float64{ 54 | -10: 0.03172551743055356, 55 | -9: 0.035223287477277272, 56 | -8: 0.039583424160565539, 57 | -7: 0.045167235300866547, 58 | -6: 0.052568456711253424, 59 | -5: 0.06283295818900117, 60 | -4: 0.077979130377369324, 61 | -3: 0.10241638234956672, 62 | -2: 0.14758361765043321, 63 | -1: 0.24999999999999978, 64 | 0: 0.5, 65 | 1: 0.75000000000000022, 66 | 2: 0.85241638234956674, 67 | 3: 0.89758361765043326, 68 | 4: 0.92202086962263075, 69 | 5: 0.93716704181099886, 70 | 6: 0.94743154328874657, 71 | 7: 0.95483276469913347, 72 | 8: 0.96041657583943452, 73 | 9: 0.96477671252272279}) 74 | testFunc(t, "CDF(%v|v=5)", TDist{5}.CDF, map[float64]float64{ 75 | -10: 8.5473787871481787e-05, 76 | -9: 0.00014133998712194845, 77 | -8: 0.00024645333028622187, 78 | -7: 0.00045837375719920225, 79 | -6: 0.00092306914479700695, 80 | -5: 0.0020523579900266612, 81 | -4: 0.0051617077404157259, 82 | -3: 0.015049623948731284, 83 | -2: 0.05096973941492914, 84 | -1: 0.18160873382456127, 85 | 0: 0.5, 86 | 1: 0.81839126617543867, 87 | 2: 0.9490302605850709, 88 | 3: 0.98495037605126878, 89 | 4: 0.99483829225958431, 90 | 5: 0.99794764200997332, 91 | 6: 0.99907693085520299, 92 | 7: 0.99954162624280074, 93 | 8: 0.99975354666971372, 94 | 9: 0.9998586600128780}) 95 | } 96 | -------------------------------------------------------------------------------- /internal/go-moremath/stats/normaldist.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package stats 6 | 7 | import ( 8 | "math" 9 | "math/rand" 10 | ) 11 | 12 | // NormalDist is a normal (Gaussian) distribution with mean Mu and 13 | // standard deviation Sigma. 14 | type NormalDist struct { 15 | Mu, Sigma float64 16 | } 17 | 18 | // StdNormal is the standard normal distribution (Mu = 0, Sigma = 1) 19 | var StdNormal = NormalDist{0, 1} 20 | 21 | // 1/sqrt(2 * pi) 22 | const invSqrt2Pi = 0.39894228040143267793994605993438186847585863116493465766592583 23 | 24 | func (n NormalDist) PDF(x float64) float64 { 25 | z := x - n.Mu 26 | return math.Exp(-z*z/(2*n.Sigma*n.Sigma)) * invSqrt2Pi / n.Sigma 27 | } 28 | 29 | func (n NormalDist) pdfEach(xs []float64) []float64 { 30 | res := make([]float64, len(xs)) 31 | if n.Mu == 0 && n.Sigma == 1 { 32 | // Standard normal fast path 33 | for i, x := range xs { 34 | res[i] = math.Exp(-x*x/2) * invSqrt2Pi 35 | } 36 | } else { 37 | a := -1 / (2 * n.Sigma * n.Sigma) 38 | b := invSqrt2Pi / n.Sigma 39 | for i, x := range xs { 40 | z := x - n.Mu 41 | res[i] = math.Exp(z*z*a) * b 42 | } 43 | } 44 | return res 45 | } 46 | 47 | func (n NormalDist) CDF(x float64) float64 { 48 | return math.Erfc(-(x-n.Mu)/(n.Sigma*math.Sqrt2)) / 2 49 | } 50 | 51 | func (n NormalDist) cdfEach(xs []float64) []float64 { 52 | res := make([]float64, len(xs)) 53 | a := 1 / (n.Sigma * math.Sqrt2) 54 | for i, x := range xs { 55 | res[i] = math.Erfc(-(x-n.Mu)*a) / 2 56 | } 57 | return res 58 | } 59 | 60 | func (n NormalDist) InvCDF(p float64) (x float64) { 61 | // This is based on Peter John Acklam's inverse normal CDF 62 | // algorithm: http://home.online.no/~pjacklam/notes/invnorm/ 63 | const ( 64 | a1 = -3.969683028665376e+01 65 | a2 = 2.209460984245205e+02 66 | a3 = -2.759285104469687e+02 67 | a4 = 1.383577518672690e+02 68 | a5 = -3.066479806614716e+01 69 | a6 = 2.506628277459239e+00 70 | 71 | b1 = -5.447609879822406e+01 72 | b2 = 1.615858368580409e+02 73 | b3 = -1.556989798598866e+02 74 | b4 = 6.680131188771972e+01 75 | b5 = -1.328068155288572e+01 76 | 77 | c1 = -7.784894002430293e-03 78 | c2 = -3.223964580411365e-01 79 | c3 = -2.400758277161838e+00 80 | c4 = -2.549732539343734e+00 81 | c5 = 4.374664141464968e+00 82 | c6 = 2.938163982698783e+00 83 | 84 | d1 = 7.784695709041462e-03 85 | d2 = 3.224671290700398e-01 86 | d3 = 2.445134137142996e+00 87 | d4 = 3.754408661907416e+00 88 | 89 | plow = 0.02425 90 | phigh = 1 - plow 91 | ) 92 | 93 | if p < 0 || p > 1 { 94 | return nan 95 | } else if p == 0 { 96 | return -inf 97 | } else if p == 1 { 98 | return inf 99 | } 100 | 101 | if p < plow { 102 | // Rational approximation for lower region. 103 | q := math.Sqrt(-2 * math.Log(p)) 104 | x = (((((c1*q+c2)*q+c3)*q+c4)*q+c5)*q + c6) / 105 | ((((d1*q+d2)*q+d3)*q+d4)*q + 1) 106 | } else if phigh < p { 107 | // Rational approximation for upper region. 108 | q := math.Sqrt(-2 * math.Log(1-p)) 109 | x = -(((((c1*q+c2)*q+c3)*q+c4)*q+c5)*q + c6) / 110 | ((((d1*q+d2)*q+d3)*q+d4)*q + 1) 111 | } else { 112 | // Rational approximation for central region. 113 | q := p - 0.5 114 | r := q * q 115 | x = (((((a1*r+a2)*r+a3)*r+a4)*r+a5)*r + a6) * q / 116 | (((((b1*r+b2)*r+b3)*r+b4)*r+b5)*r + 1) 117 | } 118 | 119 | // Refine approximation. 120 | e := 0.5*math.Erfc(-x/math.Sqrt2) - p 121 | u := e * math.Sqrt(2*math.Pi) * math.Exp(x*x/2) 122 | x = x - u/(1+x*u/2) 123 | 124 | // Adjust from standard normal. 125 | return x*n.Sigma + n.Mu 126 | } 127 | 128 | func (n NormalDist) Rand(r *rand.Rand) float64 { 129 | var x float64 130 | if r == nil { 131 | x = rand.NormFloat64() 132 | } else { 133 | x = r.NormFloat64() 134 | } 135 | return x*n.Sigma + n.Mu 136 | } 137 | 138 | func (n NormalDist) Bounds() (float64, float64) { 139 | const stddevs = 3 140 | return n.Mu - stddevs*n.Sigma, n.Mu + stddevs*n.Sigma 141 | } 142 | -------------------------------------------------------------------------------- /internal/go-moremath/scale/linear.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package scale 6 | 7 | import ( 8 | "math" 9 | 10 | "rsc.io/benchstat/internal/go-moremath/vec" 11 | ) 12 | 13 | type Linear struct { 14 | // Min and Max specify the lower and upper bounds of the input 15 | // range. The input range [Min, Max] will be linearly mapped 16 | // to the output domain [0, 1]. 17 | Min, Max float64 18 | 19 | // Base specifies a base for computing ticks. Ticks will be 20 | // placed at powers of Base; that is at n*Base^l for n ∈ ℤ and 21 | // some integer l. As a special case, a base of 0 alternates 22 | // between ticks at n*10^l and ticks at 5n*10^l. 23 | Base int 24 | 25 | // If Clamp is true, the input is clamped to [Min, Max]. 26 | Clamp bool 27 | } 28 | 29 | // *Linear is a Quantitative scale. 30 | var _ Quantitative = &Linear{} 31 | 32 | func (s Linear) Map(x float64) float64 { 33 | if s.Min == s.Max { 34 | return 0.5 35 | } 36 | y := (x - s.Min) / (s.Max - s.Min) 37 | if s.Clamp { 38 | y = clamp(y) 39 | } 40 | return y 41 | } 42 | 43 | func (s Linear) Unmap(y float64) float64 { 44 | return y*(s.Max-s.Min) + s.Min 45 | } 46 | 47 | func (s *Linear) SetClamp(clamp bool) { 48 | s.Clamp = clamp 49 | } 50 | 51 | // ebase sanity checks and returns the "effective base" of this scale. 52 | // If s.Base is 0, it returns 10. If s.Base is 1 or negative, it 53 | // panics. 54 | func (s Linear) ebase() int { 55 | if s.Base == 0 { 56 | return 10 57 | } else if s.Base == 1 { 58 | panic("scale.Linear cannot have a base of 1") 59 | } else if s.Base < 0 { 60 | panic("scale.Linear cannot have a negative base") 61 | } 62 | return s.Base 63 | } 64 | 65 | // In the default base, the tick levels are: 66 | // 67 | // Level -2 is a major tick at -0.1, 0, 0.1, etc. 68 | // Level -1 is a major tick at -1, -0.5, 0, 0.5, 1, etc. 69 | // Level 0 is a major tick at -1, 0, 1, etc. 70 | // Level 1 is a major tick at -10, -5, 0, 5, 10, etc. 71 | // Level 2 is a major tick at -10, 0, 10, etc. 72 | // 73 | // That is, level 0 is unit intervals, and we alternate between 74 | // interval *= 5 and interval *= 2. Combined, these give us interval 75 | // *= 10 at every other level. 76 | // 77 | // In non-default bases, level 0 is the same and we alternate between 78 | // interval *= 1 (for consistency) and interval *= base. 79 | 80 | func (s *Linear) guessLevel() int { 81 | return 2 * int(math.Log(s.Max-s.Min)/math.Log(float64(s.ebase()))) 82 | } 83 | 84 | func (s *Linear) spacingAtLevel(level int, roundOut bool) (firstN, lastN, spacing float64) { 85 | // Watch out! Integer division is round toward zero, but we 86 | // need round down, and modulus is signed. 87 | exp, double := math.Floor(float64(level)/2), (level%2 == 1 || level%2 == -1) 88 | spacing = math.Pow(float64(s.ebase()), exp) 89 | if double && s.Base == 0 { 90 | spacing *= 5 91 | } 92 | 93 | // Add a tiny bit of slack to the floor and ceiling below so 94 | // that rounding errors don't significantly affect tick marks. 95 | slack := (s.Max - s.Min) * 1e-10 96 | 97 | if roundOut { 98 | firstN = math.Floor((s.Min + slack) / spacing) 99 | lastN = math.Ceil((s.Max - slack) / spacing) 100 | } else { 101 | firstN = math.Ceil((s.Min - slack) / spacing) 102 | lastN = math.Floor((s.Max + slack) / spacing) 103 | } 104 | return 105 | } 106 | 107 | func (s Linear) Ticks(n int) (major, minor []float64) { 108 | if s.Min == s.Max { 109 | return []float64{s.Min}, []float64{} 110 | } else if s.Min > s.Max { 111 | s.Min, s.Max = s.Max, s.Min 112 | } 113 | 114 | // nticksAtLevel returns the number of ticks in [s.Min, s.Max] 115 | // at the given level. 116 | nticksAtLevel := func(level int) int { 117 | firstN, lastN, _ := s.spacingAtLevel(level, false) 118 | return int(lastN - firstN + 1) 119 | } 120 | 121 | level := autoScale(n, nticksAtLevel, s.guessLevel()) 122 | 123 | ticksAtLevel := func(level int) []float64 { 124 | firstN, lastN, spacing := s.spacingAtLevel(level, false) 125 | n := int(lastN - firstN + 1) 126 | return vec.Linspace(firstN*spacing, lastN*spacing, n) 127 | } 128 | 129 | return ticksAtLevel(level), ticksAtLevel(level - 1) 130 | } 131 | 132 | func (s *Linear) Nice(n int) { 133 | if s.Min == s.Max { 134 | s.Min -= 0.5 135 | s.Max += 0.5 136 | } else if s.Min > s.Max { 137 | s.Min, s.Max = s.Max, s.Min 138 | } 139 | 140 | nticksAtLevel := func(level int) int { 141 | firstN, lastN, _ := s.spacingAtLevel(level, true) 142 | return int(lastN - firstN + 1) 143 | } 144 | 145 | level := autoScale(n, nticksAtLevel, s.guessLevel()) 146 | 147 | firstN, lastN, spacing := s.spacingAtLevel(level, true) 148 | s.Min = firstN * spacing 149 | s.Max = lastN * spacing 150 | } 151 | -------------------------------------------------------------------------------- /internal/go-moremath/scale/linear_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package scale 6 | 7 | import ( 8 | "fmt" 9 | "testing" 10 | 11 | "rsc.io/benchstat/internal/go-moremath/internal/mathtest" 12 | "rsc.io/benchstat/internal/go-moremath/vec" 13 | ) 14 | 15 | func TestLinear(t *testing.T) { 16 | l := Linear{Min: -10, Max: 10} 17 | mathtest.WantFunc(t, fmt.Sprintf("%v.Map", l), l.Map, 18 | map[float64]float64{ 19 | -20: -0.5, 20 | -10: 0, 21 | 0: 0.5, 22 | 10: 1, 23 | 20: 1.5, 24 | }) 25 | mathtest.WantFunc(t, fmt.Sprintf("%v.Unmap", l), l.Unmap, 26 | map[float64]float64{ 27 | -0.5: -20, 28 | 0: -10, 29 | 0.5: 0, 30 | 1: 10, 31 | 1.5: 20, 32 | }) 33 | 34 | l.SetClamp(true) 35 | mathtest.WantFunc(t, fmt.Sprintf("%v.Map", l), l.Map, 36 | map[float64]float64{ 37 | -20: 0, 38 | -10: 0, 39 | 0: 0.5, 40 | 10: 1, 41 | 20: 1, 42 | }) 43 | mathtest.WantFunc(t, fmt.Sprintf("%v.Unmap", l), l.Unmap, 44 | map[float64]float64{ 45 | 0: -10, 46 | 0.5: 0, 47 | 1: 10, 48 | }) 49 | 50 | l = Linear{Min: 5, Max: 5} 51 | mathtest.WantFunc(t, fmt.Sprintf("%v.Map", l), l.Map, 52 | map[float64]float64{ 53 | -10: 0.5, 54 | 0: 0.5, 55 | 10: 0.5, 56 | }) 57 | mathtest.WantFunc(t, fmt.Sprintf("%v.Unmap", l), l.Unmap, 58 | map[float64]float64{ 59 | 0: 5, 60 | 0.5: 5, 61 | 1: 5, 62 | }) 63 | } 64 | 65 | func ticksEq(major, wmajor, minor, wminor []float64) bool { 66 | // TODO: It would be nice to have a deep Aeq. It could also 67 | // support checking predicates like LE(5) or IsNaN within 68 | // structures, which could be used in WantFunc. Heck, deep Aeq 69 | // could subsume WantFunc where the left side is a function 70 | // and the right side is a map from arguments to results, but 71 | // maybe it would be harder to produce a good error message. 72 | if len(major) != len(wmajor) || len(minor) != len(wminor) { 73 | return false 74 | } 75 | for i, v := range major { 76 | if !mathtest.Aeq(wmajor[i], v) { 77 | return false 78 | } 79 | } 80 | for i, v := range minor { 81 | if !mathtest.Aeq(wminor[i], v) { 82 | return false 83 | } 84 | } 85 | return true 86 | } 87 | 88 | func TestLinearTicks(t *testing.T) { 89 | l := Linear{Min: 0, Max: 100} 90 | major, minor := l.Ticks(5) 91 | wmajor, wminor := vec.Linspace(0, 100, 3), vec.Linspace(0, 100, 11) 92 | if !ticksEq(major, wmajor, minor, wminor) { 93 | t.Errorf("%v.Ticks(5) = %v, %v; want %v, %v", l, major, minor, wmajor, wminor) 94 | } 95 | 96 | major, minor = l.Ticks(2) 97 | wmajor, wminor = vec.Linspace(0, 100, 2), vec.Linspace(0, 100, 3) 98 | if !ticksEq(major, wmajor, minor, wminor) { 99 | t.Errorf("%v.Ticks(2) = %v, %v; want %v, %v", l, major, minor, wmajor, wminor) 100 | } 101 | 102 | l.Nice(2) 103 | major, minor = l.Ticks(2) 104 | if !ticksEq(major, wmajor, minor, wminor) { 105 | t.Errorf("%v.Ticks(2) = %v, %v; want %v, %v", l, major, minor, wmajor, wminor) 106 | } 107 | 108 | l = Linear{Min: 15.4, Max: 16.6} 109 | major, minor = l.Ticks(5) 110 | wmajor, wminor = vec.Linspace(15.5, 16.5, 3), vec.Linspace(15.4, 16.6, 13) 111 | if !ticksEq(major, wmajor, minor, wminor) { 112 | t.Errorf("%v.Ticks(5) = %v, %v; want %v, %v", l, major, minor, wmajor, wminor) 113 | } 114 | 115 | l.Nice(5) 116 | major, minor = l.Ticks(5) 117 | wmajor, wminor = vec.Linspace(15, 17, 5), vec.Linspace(15, 17, 21) 118 | if !ticksEq(major, wmajor, minor, wminor) { 119 | t.Errorf("%v.Ticks(5) = %v, %v; want %v, %v", l, major, minor, wmajor, wminor) 120 | } 121 | 122 | // Test negative tick levels. 123 | l = Linear{Min: 9.9989, Max: 10} 124 | major, minor = l.Ticks(2) 125 | wmajor, wminor = vec.Linspace(9.999, 10, 2), vec.Linspace(9.999, 10, 3) 126 | if !ticksEq(major, wmajor, minor, wminor) { 127 | t.Errorf("%v.Ticks(2) = %v, %v; want %v, %v", l, major, minor, wmajor, wminor) 128 | } 129 | 130 | l.Nice(2) 131 | major, minor = l.Ticks(2) 132 | wmajor, wminor = vec.Linspace(9.995, 10, 2), vec.Linspace(9.995, 10, 6) 133 | if !ticksEq(major, wmajor, minor, wminor) { 134 | t.Errorf("%v.Ticks(2) = %v, %v; want %v, %v", l, major, minor, wmajor, wminor) 135 | } 136 | 137 | // Test non-default bases. 138 | l = Linear{Min: 2, Max: 9, Base: 2} 139 | major, minor = l.Ticks(5) 140 | wmajor, wminor = vec.Linspace(2, 8, 4), vec.Linspace(2, 9, 8) 141 | if !ticksEq(major, wmajor, minor, wminor) { 142 | t.Errorf("%v.Ticks(5) = %v, %v; want %v, %v", l, major, minor, wmajor, wminor) 143 | } 144 | 145 | l.Nice(5) 146 | major, minor = l.Ticks(5) 147 | wmajor, wminor = vec.Linspace(2, 10, 5), vec.Linspace(2, 10, 9) 148 | if !ticksEq(major, wmajor, minor, wminor) { 149 | t.Errorf("%v.Ticks(5) = %v, %v; want %v, %v", l, major, minor, wmajor, wminor) 150 | } 151 | } 152 | -------------------------------------------------------------------------------- /internal/go-moremath/stats/ttest.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package stats 6 | 7 | import ( 8 | "errors" 9 | "math" 10 | ) 11 | 12 | // A TTestResult is the result of a t-test. 13 | type TTestResult struct { 14 | // N1 and N2 are the sizes of the input samples. For a 15 | // one-sample t-test, N2 is 0. 16 | N1, N2 int 17 | 18 | // T is the value of the t-statistic for this t-test. 19 | T float64 20 | 21 | // DoF is the degrees of freedom for this t-test. 22 | DoF float64 23 | 24 | // AltHypothesis specifies the alternative hypothesis tested 25 | // by this test against the null hypothesis that there is no 26 | // difference in the means of the samples. 27 | AltHypothesis LocationHypothesis 28 | 29 | // P is p-value for this t-test for the given null hypothesis. 30 | P float64 31 | } 32 | 33 | func newTTestResult(n1, n2 int, t, dof float64, alt LocationHypothesis) *TTestResult { 34 | dist := TDist{dof} 35 | var p float64 36 | switch alt { 37 | case LocationDiffers: 38 | p = 2 * (1 - dist.CDF(math.Abs(t))) 39 | case LocationLess: 40 | p = dist.CDF(t) 41 | case LocationGreater: 42 | p = 1 - dist.CDF(t) 43 | } 44 | return &TTestResult{N1: n1, N2: n2, T: t, DoF: dof, AltHypothesis: alt, P: p} 45 | } 46 | 47 | // A TTestSample is a sample that can be used for a one or two sample 48 | // t-test. 49 | type TTestSample interface { 50 | Weight() float64 51 | Mean() float64 52 | Variance() float64 53 | } 54 | 55 | var ( 56 | ErrSampleSize = errors.New("sample is too small") 57 | ErrZeroVariance = errors.New("sample has zero variance") 58 | ErrMismatchedSamples = errors.New("samples have different lengths") 59 | ) 60 | 61 | // TwoSampleTTest performs a two-sample (unpaired) Student's t-test on 62 | // samples x1 and x2. This is a test of the null hypothesis that x1 63 | // and x2 are drawn from populations with equal means. It assumes x1 64 | // and x2 are independent samples, that the distributions have equal 65 | // variance, and that the populations are normally distributed. 66 | func TwoSampleTTest(x1, x2 TTestSample, alt LocationHypothesis) (*TTestResult, error) { 67 | n1, n2 := x1.Weight(), x2.Weight() 68 | if n1 == 0 || n2 == 0 { 69 | return nil, ErrSampleSize 70 | } 71 | v1, v2 := x1.Variance(), x2.Variance() 72 | if v1 == 0 && v2 == 0 { 73 | return nil, ErrZeroVariance 74 | } 75 | 76 | dof := n1 + n2 - 2 77 | v12 := ((n1-1)*v1 + (n2-1)*v2) / dof 78 | t := (x1.Mean() - x2.Mean()) / math.Sqrt(v12*(1/n1+1/n2)) 79 | return newTTestResult(int(n1), int(n2), t, dof, alt), nil 80 | } 81 | 82 | // TwoSampleWelchTTest performs a two-sample (unpaired) Welch's t-test 83 | // on samples x1 and x2. This is like TwoSampleTTest, but does not 84 | // assume the distributions have equal variance. 85 | func TwoSampleWelchTTest(x1, x2 TTestSample, alt LocationHypothesis) (*TTestResult, error) { 86 | n1, n2 := x1.Weight(), x2.Weight() 87 | if n1 <= 1 || n2 <= 1 { 88 | // TODO: Can we still do this with n == 1? 89 | return nil, ErrSampleSize 90 | } 91 | v1, v2 := x1.Variance(), x2.Variance() 92 | if v1 == 0 && v2 == 0 { 93 | return nil, ErrZeroVariance 94 | } 95 | 96 | dof := math.Pow(v1/n1+v2/n2, 2) / 97 | (math.Pow(v1/n1, 2)/(n1-1) + math.Pow(v2/n2, 2)/(n2-1)) 98 | s := math.Sqrt(v1/n1 + v2/n2) 99 | t := (x1.Mean() - x2.Mean()) / s 100 | return newTTestResult(int(n1), int(n2), t, dof, alt), nil 101 | } 102 | 103 | // PairedTTest performs a two-sample paired t-test on samples x1 and 104 | // x2. If μ0 is non-zero, this tests if the average of the difference 105 | // is significantly different from μ0. If x1 and x2 are identical, 106 | // this returns nil. 107 | func PairedTTest(x1, x2 []float64, μ0 float64, alt LocationHypothesis) (*TTestResult, error) { 108 | if len(x1) != len(x2) { 109 | return nil, ErrMismatchedSamples 110 | } 111 | if len(x1) <= 1 { 112 | // TODO: Can we still do this with n == 1? 113 | return nil, ErrSampleSize 114 | } 115 | 116 | dof := float64(len(x1) - 1) 117 | 118 | diff := make([]float64, len(x1)) 119 | for i := range x1 { 120 | diff[i] = x1[i] - x2[i] 121 | } 122 | sd := StdDev(diff) 123 | if sd == 0 { 124 | // TODO: Can we still do the test? 125 | return nil, ErrZeroVariance 126 | } 127 | t := (Mean(diff) - μ0) * math.Sqrt(float64(len(x1))) / sd 128 | return newTTestResult(len(x1), len(x2), t, dof, alt), nil 129 | } 130 | 131 | // OneSampleTTest performs a one-sample t-test on sample x. This tests 132 | // the null hypothesis that the population mean is equal to μ0. This 133 | // assumes the distribution of the population of sample means is 134 | // normal. 135 | func OneSampleTTest(x TTestSample, μ0 float64, alt LocationHypothesis) (*TTestResult, error) { 136 | n, v := x.Weight(), x.Variance() 137 | if n == 0 { 138 | return nil, ErrSampleSize 139 | } 140 | if v == 0 { 141 | // TODO: Can we still do the test? 142 | return nil, ErrZeroVariance 143 | } 144 | dof := n - 1 145 | t := (x.Mean() - μ0) * math.Sqrt(n) / math.Sqrt(v) 146 | return newTTestResult(int(n), 0, t, dof, alt), nil 147 | } 148 | -------------------------------------------------------------------------------- /internal/go-moremath/scale/log.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package scale 6 | 7 | import "math" 8 | 9 | type Log struct { 10 | private struct{} 11 | 12 | // Min and Max specify the lower and upper bounds of the input 13 | // range. The input range [Min, Max] will be mapped to the 14 | // output domain [0, 1]. The range [Min, Max] must not include 15 | // 0. 16 | Min, Max float64 17 | 18 | // Base specifies the base of the logarithm for computing 19 | // ticks. Typically, ticks will be placed at Base^n for n ∈ ℤ. 20 | Base int 21 | 22 | // If Clamp is true, the input is clamped to [Min, Max]. 23 | Clamp bool 24 | 25 | // TODO: Let the user specify the minor ticks. Default to [1, 26 | // .. 9], but [1, 3] and [1, 2, 5] are common. 27 | } 28 | 29 | // *Log is a Quantitative scale. 30 | var _ Quantitative = &Log{} 31 | 32 | // NewLog constructs a Log scale. If the arguments are out of range, 33 | // it returns a RangeErr. 34 | func NewLog(min, max float64, base int) (Log, error) { 35 | if min > max { 36 | min, max = max, min 37 | } 38 | 39 | if base <= 1 { 40 | return Log{}, RangeErr("Log scale base must be 2 or more") 41 | } 42 | if min <= 0 && max >= 0 { 43 | return Log{}, RangeErr("Log scale range cannot include 0") 44 | } 45 | 46 | return Log{Min: min, Max: max, Base: base}, nil 47 | } 48 | 49 | func (s *Log) ebounds() (bool, float64, float64) { 50 | if s.Min < 0 { 51 | return true, -s.Max, -s.Min 52 | } 53 | return false, s.Min, s.Max 54 | } 55 | 56 | func (s Log) Map(x float64) float64 { 57 | neg, min, max := s.ebounds() 58 | if neg { 59 | x = -x 60 | } 61 | if x <= 0 { 62 | return math.NaN() 63 | } 64 | if min == max { 65 | return 0.5 66 | } 67 | 68 | logMin, logMax := math.Log(min), math.Log(max) 69 | y := (math.Log(x) - logMin) / (logMax - logMin) 70 | if neg { 71 | y = 1 - y 72 | } 73 | if s.Clamp { 74 | y = clamp(y) 75 | } 76 | return y 77 | } 78 | 79 | func (s Log) Unmap(y float64) float64 { 80 | neg, min, max := s.ebounds() 81 | if neg { 82 | y = 1 - y 83 | } 84 | logMin, logMax := math.Log(min), math.Log(max) 85 | x := math.Exp(y*(logMax-logMin) + logMin) 86 | if neg { 87 | x = -x 88 | } 89 | return x 90 | } 91 | 92 | func (s *Log) SetClamp(clamp bool) { 93 | s.Clamp = clamp 94 | } 95 | 96 | // The tick levels are: 97 | // 98 | // Level 0 is a major tick at Base^n (1, 10, 100, ...) 99 | // Level 1 is a major tick at Base^2^n (1, 100, 10000, ...) 100 | // Level 2 is a major tick at Base^4^n (1, 10000, 100000000, ...) 101 | // 102 | // That is, each level eliminates every other tick. Levels below 0 are 103 | // not defined. 104 | 105 | func logb(x float64, b float64) float64 { 106 | return math.Log(x) / math.Log(b) 107 | } 108 | 109 | func (s *Log) spacingAtLevel(level int, roundOut bool) (firstN, lastN, ebase float64) { 110 | _, min, max := s.ebounds() 111 | 112 | // Compute the effective base at this level. 113 | ebase = math.Pow(float64(s.Base), math.Pow(2, float64(level))) 114 | lmin, lmax := logb(min, ebase), logb(max, ebase) 115 | 116 | // Add a tiny bit of slack to the floor and ceiling so that 117 | // rounding errors don't significantly affect tick marks. 118 | slack := (lmax - lmin) * 1e-10 119 | 120 | if roundOut { 121 | firstN = math.Floor(lmin + slack) 122 | lastN = math.Ceil(lmax - slack) 123 | } else { 124 | firstN = math.Ceil(lmin - slack) 125 | lastN = math.Floor(lmax + slack) 126 | } 127 | 128 | return 129 | } 130 | 131 | func (s Log) Ticks(n int) (major, minor []float64) { 132 | if s.Min == s.Max { 133 | return []float64{s.Min}, []float64{} 134 | } 135 | 136 | neg, min, max := s.ebounds() 137 | 138 | // nticksAtLevel returns the number of ticks in [min, max] at 139 | // the given level. 140 | nticksAtLevel := func(level int) int { 141 | if level < 0 { 142 | const maxInt = int(^uint(0) >> 1) 143 | return maxInt 144 | } 145 | 146 | firstN, lastN, _ := s.spacingAtLevel(level, false) 147 | return int(lastN - firstN + 1) 148 | } 149 | 150 | level := autoScale(n, nticksAtLevel, 0) 151 | 152 | ticksAtLevel := func(level int) []float64 { 153 | ticks := []float64{} 154 | 155 | if level < 0 { 156 | // Minor ticks for level 0. Get the major 157 | // ticks, but round out so we can fill in 158 | // minor ticks outside of the major ticks. 159 | firstN, lastN, _ := s.spacingAtLevel(0, true) 160 | for n := firstN; n <= lastN; n++ { 161 | tick := math.Pow(float64(s.Base), n) 162 | step := tick 163 | for i := 0; i < s.Base-1; i++ { 164 | if min <= tick && tick <= max { 165 | ticks = append(ticks, tick) 166 | } 167 | tick += step 168 | } 169 | } 170 | } else { 171 | firstN, lastN, base := s.spacingAtLevel(level, false) 172 | for n := firstN; n <= lastN; n++ { 173 | ticks = append(ticks, math.Pow(base, n)) 174 | } 175 | } 176 | 177 | if neg { 178 | // Negate and reverse order of ticks. 179 | for i := 0; i < (len(ticks)+1)/2; i++ { 180 | j := len(ticks) - i - 1 181 | ticks[i], ticks[j] = -ticks[j], -ticks[i] 182 | } 183 | } 184 | 185 | return ticks 186 | } 187 | 188 | return ticksAtLevel(level), ticksAtLevel(level - 1) 189 | } 190 | 191 | func (s *Log) Nice(n int) { 192 | neg, _, _ := s.ebounds() 193 | 194 | nticksAtLevel := func(level int) int { 195 | firstN, lastN, _ := s.spacingAtLevel(level, true) 196 | return int(lastN - firstN + 1) 197 | } 198 | 199 | level := autoScale(n, nticksAtLevel, 0) 200 | 201 | firstN, lastN, base := s.spacingAtLevel(level, true) 202 | s.Min = math.Pow(base, firstN) 203 | s.Max = math.Pow(base, lastN) 204 | if neg { 205 | s.Min, s.Max = -s.Max, -s.Min 206 | } 207 | } 208 | -------------------------------------------------------------------------------- /internal/go-moremath/scale/log_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package scale 6 | 7 | import ( 8 | "fmt" 9 | "math" 10 | "testing" 11 | 12 | "rsc.io/benchstat/internal/go-moremath/internal/mathtest" 13 | "rsc.io/benchstat/internal/go-moremath/vec" 14 | ) 15 | 16 | func TestLog(t *testing.T) { 17 | l, err := NewLog(0, 10, 10) 18 | if _, ok := err.(RangeErr); !ok { 19 | t.Errorf("want RangeErr; got %v", err) 20 | } 21 | l, err = NewLog(-10, 0, 10) 22 | if _, ok := err.(RangeErr); !ok { 23 | t.Errorf("want RangeErr; got %v", err) 24 | } 25 | l, err = NewLog(-10, 10, 10) 26 | if _, ok := err.(RangeErr); !ok { 27 | t.Errorf("want RangeErr; got %v", err) 28 | } 29 | l, err = NewLog(10, 20, 0) 30 | if _, ok := err.(RangeErr); !ok { 31 | t.Errorf("want RangeErr; got %v", err) 32 | } 33 | 34 | l, _ = NewLog(1, 10, 10) 35 | mathtest.WantFunc(t, fmt.Sprintf("%v.Map", l), l.Map, 36 | map[float64]float64{ 37 | -1: math.NaN(), 38 | 0: math.NaN(), 39 | 0.1: -1, 40 | 1: 0, 41 | math.Pow(10, 0.5): 0.5, 42 | 10: 1, 43 | 100: 2, 44 | }) 45 | mathtest.WantFunc(t, fmt.Sprintf("%v.Unmap", l), l.Unmap, 46 | map[float64]float64{ 47 | -1: 0.1, 48 | 0: 1, 49 | 0.5: math.Pow(10, 0.5), 50 | 1: 10, 51 | 2: 100, 52 | }) 53 | 54 | l.SetClamp(true) 55 | mathtest.WantFunc(t, fmt.Sprintf("%v.Map", l), l.Map, 56 | map[float64]float64{ 57 | -1: math.NaN(), 58 | 0: math.NaN(), 59 | 0.1: 0, 60 | 1: 0, 61 | math.Pow(10, 0.5): 0.5, 62 | 10: 1, 63 | 100: 1, 64 | }) 65 | mathtest.WantFunc(t, fmt.Sprintf("%v.Unmap", l), l.Unmap, 66 | map[float64]float64{ 67 | 0: 1, 68 | 0.5: math.Pow(10, 0.5), 69 | 1: 10, 70 | }) 71 | 72 | l, _ = NewLog(-1, -10, 10) 73 | mathtest.WantFunc(t, fmt.Sprintf("%v.Map", l), l.Map, 74 | map[float64]float64{ 75 | 1: math.NaN(), 76 | 0: math.NaN(), 77 | -0.1: 2, 78 | -1: 1, 79 | -math.Pow(10, 0.5): 0.5, 80 | -10: 0, 81 | -100: -1, 82 | }) 83 | mathtest.WantFunc(t, fmt.Sprintf("%v.Unmap", l), l.Unmap, 84 | map[float64]float64{ 85 | 2: -0.1, 86 | 1: -1, 87 | 0.5: -math.Pow(10, 0.5), 88 | 0: -10, 89 | -1: -100, 90 | }) 91 | 92 | l, _ = NewLog(5, 5, 10) 93 | mathtest.WantFunc(t, fmt.Sprintf("%v.Map", l), l.Map, 94 | map[float64]float64{ 95 | -1: math.NaN(), 96 | 0: math.NaN(), 97 | 1: 0.5, 98 | 10: 0.5, 99 | }) 100 | mathtest.WantFunc(t, fmt.Sprintf("%v.Unmap", l), l.Unmap, 101 | map[float64]float64{ 102 | 0: 5, 103 | 0.5: 5, 104 | 1: 5, 105 | }) 106 | } 107 | 108 | func TestLogTicks(t *testing.T) { 109 | // Test the obvious. 110 | l, _ := NewLog(1, 10, 10) 111 | major, minor := l.Ticks(5) 112 | wmajor, wminor := vec.Logspace(0, 1, 2, 10), vec.Linspace(1, 10, 10) 113 | if !ticksEq(major, wmajor, minor, wminor) { 114 | t.Errorf("%v.Ticks(5) = %v, %v; want %v, %v", l, major, minor, wmajor, wminor) 115 | } 116 | 117 | // Test two orders of magnitude. 118 | l, _ = NewLog(1, 100, 10) 119 | major, minor = l.Ticks(5) 120 | wmajor, wminor = vec.Logspace(0, 2, 3, 10), vec.Concat(vec.Linspace(1, 9, 9), vec.Linspace(10, 100, 10)) 121 | if !ticksEq(major, wmajor, minor, wminor) { 122 | t.Errorf("%v.Ticks(5) = %v, %v; want %v, %v", l, major, minor, wmajor, wminor) 123 | } 124 | 125 | // Test many orders of magnitude (higher tick levels). 126 | l, _ = NewLog(1, 1e8, 10) 127 | major, minor = l.Ticks(5) 128 | wmajor, wminor = vec.Logspace(0, 4, 5, 100), vec.Logspace(0, 8, 9, 10) 129 | if !ticksEq(major, wmajor, minor, wminor) { 130 | t.Errorf("%v.Ticks(5) = %v, %v; want %v, %v", l, major, minor, wmajor, wminor) 131 | } 132 | 133 | major, minor = l.Ticks(4) 134 | wmajor, wminor = vec.Logspace(0, 2, 3, 10000), vec.Logspace(0, 4, 5, 100) 135 | if !ticksEq(major, wmajor, minor, wminor) { 136 | t.Errorf("%v.Ticks(5) = %v, %v; want %v, %v", l, major, minor, wmajor, wminor) 137 | } 138 | 139 | // Test minor ticks outside major ticks. 140 | l, _ = NewLog(0.91, 200, 10) 141 | major, minor = l.Ticks(5) 142 | wmajor, wminor = vec.Logspace(0, 2, 3, 10), vec.Concat(vec.Linspace(1, 9, 9), vec.Linspace(10, 100, 10), []float64{200}) 143 | if !ticksEq(major, wmajor, minor, wminor) { 144 | t.Errorf("%v.Ticks(5) = %v, %v; want %v, %v", l, major, minor, wmajor, wminor) 145 | } 146 | 147 | // Test nicing. 148 | l.Nice(5) 149 | major, minor = l.Ticks(5) 150 | wmajor, wminor = vec.Logspace(-1, 3, 5, 10), vec.Concat(vec.Linspace(0.1, 0.9, 9), vec.Linspace(1, 9, 9), vec.Linspace(10, 90, 9), vec.Linspace(100, 1000, 10)) 151 | if !ticksEq(major, wmajor, minor, wminor) { 152 | t.Errorf("%v.Ticks(5) = %v, %v; want %v, %v", l, major, minor, wmajor, wminor) 153 | } 154 | 155 | // Test negative ticks. 156 | neg := vec.Vectorize(func(x float64) float64 { return -x }) 157 | l, _ = NewLog(-1, -100, 10) 158 | major, minor = l.Ticks(5) 159 | wmajor, wminor = neg(vec.Logspace(2, 0, 3, 10)), neg(vec.Concat(vec.Linspace(100, 10, 10), vec.Linspace(9, 1, 9))) 160 | if !ticksEq(major, wmajor, minor, wminor) { 161 | t.Errorf("%v.Ticks(5) = %v, %v; want %v, %v", l, major, minor, wmajor, wminor) 162 | } 163 | 164 | major, minor = l.Ticks(2) 165 | wmajor, wminor = neg(vec.Logspace(1, 0, 2, 100)), neg(vec.Logspace(2, 0, 3, 10)) 166 | if !ticksEq(major, wmajor, minor, wminor) { 167 | t.Errorf("%v.Ticks(5) = %v, %v; want %v, %v", l, major, minor, wmajor, wminor) 168 | } 169 | 170 | l.Nice(5) 171 | major, minor = l.Ticks(5) 172 | wmajor, wminor = neg(vec.Logspace(2, 0, 3, 10)), neg(vec.Concat(vec.Linspace(100, 10, 10), vec.Linspace(9, 1, 9))) 173 | if !ticksEq(major, wmajor, minor, wminor) { 174 | t.Errorf("%v.Ticks(5) = %v, %v; want %v, %v", l, major, minor, wmajor, wminor) 175 | } 176 | 177 | // Test Min==Max. 178 | l, _ = NewLog(5, 5, 10) 179 | major, minor = l.Ticks(5) 180 | wmajor, wminor = []float64{5}, []float64{} 181 | if !ticksEq(major, wmajor, minor, wminor) { 182 | t.Errorf("%v.Ticks(5) = %v, %v; want %v, %v", l, major, minor, wmajor, wminor) 183 | } 184 | } 185 | -------------------------------------------------------------------------------- /internal/go-moremath/stats/dist.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package stats 6 | 7 | import "math/rand" 8 | 9 | // A DistCommon is a statistical distribution. DistCommon is a base 10 | // interface provided by both continuous and discrete distributions. 11 | type DistCommon interface { 12 | // CDF returns the cumulative probability Pr[X <= x]. 13 | // 14 | // For continuous distributions, the CDF is the integral of 15 | // the PDF from -inf to x. 16 | // 17 | // For discrete distributions, the CDF is the sum of the PMF 18 | // at all defined points from -inf to x, inclusive. Note that 19 | // the CDF of a discrete distribution is defined for the whole 20 | // real line (unlike the PMF) but has discontinuities where 21 | // the PMF is non-zero. 22 | // 23 | // The CDF is a monotonically increasing function and has a 24 | // domain of all real numbers. If the distribution has bounded 25 | // support, it has a range of [0, 1]; otherwise it has a range 26 | // of (0, 1). Finally, CDF(-inf)==0 and CDF(inf)==1. 27 | CDF(x float64) float64 28 | 29 | // Bounds returns reasonable bounds for this distribution's 30 | // PDF/PMF and CDF. The total weight outside of these bounds 31 | // should be approximately 0. 32 | // 33 | // For a discrete distribution, both bounds are integer 34 | // multiples of Step(). 35 | // 36 | // If this distribution has finite support, it returns exact 37 | // bounds l, h such that CDF(l')=0 for all l' < l and 38 | // CDF(h')=1 for all h' >= h. 39 | Bounds() (float64, float64) 40 | } 41 | 42 | // A Dist is a continuous statistical distribution. 43 | type Dist interface { 44 | DistCommon 45 | 46 | // PDF returns the value of the probability density function 47 | // of this distribution at x. 48 | PDF(x float64) float64 49 | } 50 | 51 | // A DiscreteDist is a discrete statistical distribution. 52 | // 53 | // Most discrete distributions are defined only at integral values of 54 | // the random variable. However, some are defined at other intervals, 55 | // so this interface takes a float64 value for the random variable. 56 | // The probability mass function rounds down to the nearest defined 57 | // point. Note that float64 values can exactly represent integer 58 | // values between ±2**53, so this generally shouldn't be an issue for 59 | // integer-valued distributions (likewise, for half-integer-valued 60 | // distributions, float64 can exactly represent all values between 61 | // ±2**52). 62 | type DiscreteDist interface { 63 | DistCommon 64 | 65 | // PMF returns the value of the probability mass function 66 | // Pr[X = x'], where x' is x rounded down to the nearest 67 | // defined point on the distribution. 68 | // 69 | // Note for implementers: for integer-valued distributions, 70 | // round x using int(math.Floor(x)). Do not use int(x), since 71 | // that truncates toward zero (unless all x <= 0 are handled 72 | // the same). 73 | PMF(x float64) float64 74 | 75 | // Step returns s, where the distribution is defined for sℕ. 76 | Step() float64 77 | } 78 | 79 | // TODO: Add a Support method for finite support distributions? Or 80 | // maybe just another return value from Bounds indicating that the 81 | // bounds are exact? 82 | 83 | // TODO: Plot method to return a pre-configured Plot object with 84 | // reasonable bounds and an integral function? Have to distinguish 85 | // PDF/CDF/InvCDF. Three methods? Argument? 86 | // 87 | // Doesn't have to be a method of Dist. Could be just a function that 88 | // takes a Dist and uses Bounds. 89 | 90 | // InvCDF returns the inverse CDF function of the given distribution 91 | // (also known as the quantile function or the percent point 92 | // function). This is a function f such that f(dist.CDF(x)) == x. If 93 | // dist.CDF is only weakly monotonic (that it, there are intervals 94 | // over which it is constant) and y > 0, f returns the smallest x that 95 | // satisfies this condition. In general, the inverse CDF is not 96 | // well-defined for y==0, but for convenience if y==0, f returns the 97 | // largest x that satisfies this condition. For distributions with 98 | // infinite support both the largest and smallest x are -Inf; however, 99 | // for distributions with finite support, this is the lower bound of 100 | // the support. 101 | // 102 | // If y < 0 or y > 1, f returns NaN. 103 | // 104 | // If dist implements InvCDF(float64) float64, this returns that 105 | // method. Otherwise, it returns a function that uses a generic 106 | // numerical method to construct the inverse CDF at y by finding x 107 | // such that dist.CDF(x) == y. This may have poor precision around 108 | // points of discontinuity, including f(0) and f(1). 109 | func InvCDF(dist DistCommon) func(y float64) (x float64) { 110 | type invCDF interface { 111 | InvCDF(float64) float64 112 | } 113 | if dist, ok := dist.(invCDF); ok { 114 | return dist.InvCDF 115 | } 116 | 117 | // Otherwise, use a numerical algorithm. 118 | // 119 | // TODO: For discrete distributions, use the step size to 120 | // inform this computation. 121 | return func(y float64) (x float64) { 122 | const almostInf = 1e100 123 | const xtol = 1e-16 124 | 125 | if y < 0 || y > 1 { 126 | return nan 127 | } else if y == 0 { 128 | l, _ := dist.Bounds() 129 | if dist.CDF(l) == 0 { 130 | // Finite support 131 | return l 132 | } else { 133 | // Infinite support 134 | return -inf 135 | } 136 | } else if y == 1 { 137 | _, h := dist.Bounds() 138 | if dist.CDF(h) == 1 { 139 | // Finite support 140 | return h 141 | } else { 142 | // Infinite support 143 | return inf 144 | } 145 | } 146 | 147 | // Find loX, hiX for which cdf(loX) < y <= cdf(hiX). 148 | var loX, loY, hiX, hiY float64 149 | x1, y1 := 0.0, dist.CDF(0) 150 | xdelta := 1.0 151 | if y1 < y { 152 | hiX, hiY = x1, y1 153 | for hiY < y && hiX != inf { 154 | loX, loY, hiX = hiX, hiY, hiX+xdelta 155 | hiY = dist.CDF(hiX) 156 | xdelta *= 2 157 | } 158 | } else { 159 | loX, loY = x1, y1 160 | for y <= loY && loX != -inf { 161 | hiX, hiY, loX = loX, loY, loX-xdelta 162 | loY = dist.CDF(loX) 163 | xdelta *= 2 164 | } 165 | } 166 | if loX == -inf { 167 | return loX 168 | } else if hiX == inf { 169 | return hiX 170 | } 171 | 172 | // Use bisection on the interval to find the smallest 173 | // x at which cdf(x) <= y. 174 | _, x = bisectBool(func(x float64) bool { 175 | return dist.CDF(x) < y 176 | }, loX, hiX, xtol) 177 | return 178 | } 179 | } 180 | 181 | // Rand returns a random number generator that draws from the given 182 | // distribution. The returned generator takes an optional source of 183 | // randomness; if this is nil, it uses the default global source. 184 | // 185 | // If dist implements Rand(*rand.Rand) float64, Rand returns that 186 | // method. Otherwise, it returns a generic generator based on dist's 187 | // inverse CDF (which may in turn use an efficient implementation or a 188 | // generic numerical implementation; see InvCDF). 189 | func Rand(dist DistCommon) func(*rand.Rand) float64 { 190 | type distRand interface { 191 | Rand(*rand.Rand) float64 192 | } 193 | if dist, ok := dist.(distRand); ok { 194 | return dist.Rand 195 | } 196 | 197 | // Otherwise, use a generic algorithm. 198 | inv := InvCDF(dist) 199 | return func(r *rand.Rand) float64 { 200 | var y float64 201 | for y == 0 { 202 | if r == nil { 203 | y = rand.Float64() 204 | } else { 205 | y = r.Float64() 206 | } 207 | } 208 | return inv(y) 209 | } 210 | } 211 | -------------------------------------------------------------------------------- /internal/go-moremath/stats/sample.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package stats 6 | 7 | import ( 8 | "math" 9 | "sort" 10 | 11 | "rsc.io/benchstat/internal/go-moremath/vec" 12 | ) 13 | 14 | // Sample is a collection of possibly weighted data points. 15 | type Sample struct { 16 | // Xs is the slice of sample values. 17 | Xs []float64 18 | 19 | // Weights[i] is the weight of sample Xs[i]. If Weights is 20 | // nil, all Xs have weight 1. Weights must have the same 21 | // length of Xs and all values must be non-negative. 22 | Weights []float64 23 | 24 | // Sorted indicates that Xs is sorted in ascending order. 25 | Sorted bool 26 | } 27 | 28 | // Bounds returns the minimum and maximum values of xs. 29 | func Bounds(xs []float64) (min float64, max float64) { 30 | if len(xs) == 0 { 31 | return math.NaN(), math.NaN() 32 | } 33 | min, max = xs[0], xs[0] 34 | for _, x := range xs { 35 | if x < min { 36 | min = x 37 | } 38 | if x > max { 39 | max = x 40 | } 41 | } 42 | return 43 | } 44 | 45 | // Bounds returns the minimum and maximum values of the Sample. 46 | // 47 | // If the Sample is weighted, this ignores samples with zero weight. 48 | // 49 | // This is constant time if s.Sorted and there are no zero-weighted 50 | // values. 51 | func (s Sample) Bounds() (min float64, max float64) { 52 | if len(s.Xs) == 0 || (!s.Sorted && s.Weights == nil) { 53 | return Bounds(s.Xs) 54 | } 55 | 56 | if s.Sorted { 57 | if s.Weights == nil { 58 | return s.Xs[0], s.Xs[len(s.Xs)-1] 59 | } 60 | min, max = math.NaN(), math.NaN() 61 | for i, w := range s.Weights { 62 | if w != 0 { 63 | min = s.Xs[i] 64 | break 65 | } 66 | } 67 | if math.IsNaN(min) { 68 | return 69 | } 70 | for i := range s.Weights { 71 | if s.Weights[len(s.Weights)-i-1] != 0 { 72 | max = s.Xs[len(s.Weights)-i-1] 73 | break 74 | } 75 | } 76 | } else { 77 | min, max = math.Inf(1), math.Inf(-1) 78 | for i, x := range s.Xs { 79 | w := s.Weights[i] 80 | if x < min && w != 0 { 81 | min = x 82 | } 83 | if x > max && w != 0 { 84 | max = x 85 | } 86 | } 87 | if math.IsInf(min, 0) { 88 | min, max = math.NaN(), math.NaN() 89 | } 90 | } 91 | return 92 | } 93 | 94 | // Sum returns the (possibly weighted) sum of the Sample. 95 | func (s Sample) Sum() float64 { 96 | if s.Weights == nil { 97 | return vec.Sum(s.Xs) 98 | } 99 | sum := 0.0 100 | for i, x := range s.Xs { 101 | sum += x * s.Weights[i] 102 | } 103 | return sum 104 | } 105 | 106 | // Weight returns the total weight of the Sasmple. 107 | func (s Sample) Weight() float64 { 108 | if s.Weights == nil { 109 | return float64(len(s.Xs)) 110 | } 111 | return vec.Sum(s.Weights) 112 | } 113 | 114 | // Mean returns the arithmetic mean of xs. 115 | func Mean(xs []float64) float64 { 116 | if len(xs) == 0 { 117 | return math.NaN() 118 | } 119 | m := 0.0 120 | for i, x := range xs { 121 | m += (x - m) / float64(i+1) 122 | } 123 | return m 124 | } 125 | 126 | // Mean returns the arithmetic mean of the Sample. 127 | func (s Sample) Mean() float64 { 128 | if len(s.Xs) == 0 || s.Weights == nil { 129 | return Mean(s.Xs) 130 | } 131 | 132 | m, wsum := 0.0, 0.0 133 | for i, x := range s.Xs { 134 | // Use weighted incremental mean: 135 | // m_i = (1 - w_i/wsum_i) * m_(i-1) + (w_i/wsum_i) * x_i 136 | // = m_(i-1) + (x_i - m_(i-1)) * (w_i/wsum_i) 137 | w := s.Weights[i] 138 | wsum += w 139 | m += (x - m) * w / wsum 140 | } 141 | return m 142 | } 143 | 144 | // GeoMean returns the geometric mean of xs. xs must be positive. 145 | func GeoMean(xs []float64) float64 { 146 | if len(xs) == 0 { 147 | return math.NaN() 148 | } 149 | m := 0.0 150 | for i, x := range xs { 151 | if x <= 0 { 152 | return math.NaN() 153 | } 154 | lx := math.Log(x) 155 | m += (lx - m) / float64(i+1) 156 | } 157 | return math.Exp(m) 158 | } 159 | 160 | // GeoMean returns the geometric mean of the Sample. All samples 161 | // values must be positive. 162 | func (s Sample) GeoMean() float64 { 163 | if len(s.Xs) == 0 || s.Weights == nil { 164 | return GeoMean(s.Xs) 165 | } 166 | 167 | m, wsum := 0.0, 0.0 168 | for i, x := range s.Xs { 169 | w := s.Weights[i] 170 | wsum += w 171 | lx := math.Log(x) 172 | m += (lx - m) * w / wsum 173 | } 174 | return math.Exp(m) 175 | } 176 | 177 | // Variance returns the sample variance of xs. 178 | func Variance(xs []float64) float64 { 179 | if len(xs) == 0 { 180 | return math.NaN() 181 | } else if len(xs) <= 1 { 182 | return 0 183 | } 184 | 185 | // Based on Wikipedia's presentation of Welford 1962 186 | // (http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Online_algorithm). 187 | // This is more numerically stable than the standard two-pass 188 | // formula and not prone to massive cancellation. 189 | mean, M2 := 0.0, 0.0 190 | for n, x := range xs { 191 | delta := x - mean 192 | mean += delta / float64(n+1) 193 | M2 += delta * (x - mean) 194 | } 195 | return M2 / float64(len(xs)-1) 196 | } 197 | 198 | func (s Sample) Variance() float64 { 199 | if len(s.Xs) == 0 || s.Weights == nil { 200 | return Variance(s.Xs) 201 | } 202 | // TODO(austin) 203 | panic("Weighted Variance not implemented") 204 | } 205 | 206 | // StdDev returns the sample standard deviation of xs. 207 | func StdDev(xs []float64) float64 { 208 | return math.Sqrt(Variance(xs)) 209 | } 210 | 211 | // StdDev returns the sample standard deviation of the Sample. 212 | func (s Sample) StdDev() float64 { 213 | if len(s.Xs) == 0 || s.Weights == nil { 214 | return StdDev(s.Xs) 215 | } 216 | // TODO(austin) 217 | panic("Weighted StdDev not implemented") 218 | } 219 | 220 | // Percentile returns the pctileth value from the Sample. This uses 221 | // interpolation method R8 from Hyndman and Fan (1996). 222 | // 223 | // pctile will be capped to the range [0, 1]. If len(xs) == 0 or all 224 | // weights are 0, returns NaN. 225 | // 226 | // Percentile(0.5) is the median. Percentile(0.25) and 227 | // Percentile(0.75) are the first and third quartiles, respectively. 228 | // 229 | // This is constant time if s.Sorted and s.Weights == nil. 230 | func (s Sample) Percentile(pctile float64) float64 { 231 | if len(s.Xs) == 0 { 232 | return math.NaN() 233 | } else if pctile <= 0 { 234 | min, _ := s.Bounds() 235 | return min 236 | } else if pctile >= 1 { 237 | _, max := s.Bounds() 238 | return max 239 | } 240 | 241 | if !s.Sorted { 242 | // TODO(austin) Use select algorithm instead 243 | s = *s.Copy().Sort() 244 | } 245 | 246 | if s.Weights == nil { 247 | N := float64(len(s.Xs)) 248 | //n := pctile * (N + 1) // R6 249 | n := 1/3.0 + pctile*(N+1/3.0) // R8 250 | kf, frac := math.Modf(n) 251 | k := int(kf) 252 | if k <= 0 { 253 | return s.Xs[0] 254 | } else if k >= len(s.Xs) { 255 | return s.Xs[len(s.Xs)-1] 256 | } 257 | return s.Xs[k-1] + frac*(s.Xs[k]-s.Xs[k-1]) 258 | } else { 259 | // TODO(austin): Implement interpolation 260 | 261 | target := s.Weight() * pctile 262 | 263 | // TODO(austin) If we had cumulative weights, we could 264 | // do this in log time. 265 | for i, weight := range s.Weights { 266 | target -= weight 267 | if target < 0 { 268 | return s.Xs[i] 269 | } 270 | } 271 | return s.Xs[len(s.Xs)-1] 272 | } 273 | } 274 | 275 | // IQR returns the interquartile range of the Sample. 276 | // 277 | // This is constant time if s.Sorted and s.Weights == nil. 278 | func (s Sample) IQR() float64 { 279 | if !s.Sorted { 280 | s = *s.Copy().Sort() 281 | } 282 | return s.Percentile(0.75) - s.Percentile(0.25) 283 | } 284 | 285 | type sampleSorter struct { 286 | xs []float64 287 | weights []float64 288 | } 289 | 290 | func (p *sampleSorter) Len() int { 291 | return len(p.xs) 292 | } 293 | 294 | func (p *sampleSorter) Less(i, j int) bool { 295 | return p.xs[i] < p.xs[j] 296 | } 297 | 298 | func (p *sampleSorter) Swap(i, j int) { 299 | p.xs[i], p.xs[j] = p.xs[j], p.xs[i] 300 | p.weights[i], p.weights[j] = p.weights[j], p.weights[i] 301 | } 302 | 303 | // Sort sorts the samples in place in s and returns s. 304 | // 305 | // A sorted sample improves the performance of some algorithms. 306 | func (s *Sample) Sort() *Sample { 307 | if s.Sorted || sort.Float64sAreSorted(s.Xs) { 308 | // All set 309 | } else if s.Weights == nil { 310 | sort.Float64s(s.Xs) 311 | } else { 312 | sort.Sort(&sampleSorter{s.Xs, s.Weights}) 313 | } 314 | s.Sorted = true 315 | return s 316 | } 317 | 318 | // Copy returns a copy of the Sample. 319 | // 320 | // The returned Sample shares no data with the original, so they can 321 | // be modified (for example, sorted) independently. 322 | func (s Sample) Copy() *Sample { 323 | xs := make([]float64, len(s.Xs)) 324 | copy(xs, s.Xs) 325 | 326 | weights := []float64(nil) 327 | if s.Weights != nil { 328 | weights = make([]float64, len(s.Weights)) 329 | copy(weights, s.Weights) 330 | } 331 | 332 | return &Sample{xs, weights, s.Sorted} 333 | } 334 | -------------------------------------------------------------------------------- /internal/go-moremath/stats/utest.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package stats 6 | 7 | import ( 8 | "math" 9 | "sort" 10 | 11 | "rsc.io/benchstat/internal/go-moremath/mathx" 12 | ) 13 | 14 | // A LocationHypothesis specifies the alternative hypothesis of a 15 | // location test such as a t-test or a Mann-Whitney U-test. The 16 | // default (zero) value is to test against the alternative hypothesis 17 | // that they differ. 18 | type LocationHypothesis int 19 | 20 | //go:generate stringer -type LocationHypothesis 21 | 22 | const ( 23 | // LocationLess specifies the alternative hypothesis that the 24 | // location of the first sample is less than the second. This 25 | // is a one-tailed test. 26 | LocationLess LocationHypothesis = -1 27 | 28 | // LocationDiffers specifies the alternative hypothesis that 29 | // the locations of the two samples are not equal. This is a 30 | // two-tailed test. 31 | LocationDiffers LocationHypothesis = 0 32 | 33 | // LocationGreater specifies the alternative hypothesis that 34 | // the location of the first sample is greater than the 35 | // second. This is a one-tailed test. 36 | LocationGreater LocationHypothesis = 1 37 | ) 38 | 39 | // A MannWhitneyUTestResult is the result of a Mann-Whitney U-test. 40 | type MannWhitneyUTestResult struct { 41 | // N1 and N2 are the sizes of the input samples. 42 | N1, N2 int 43 | 44 | // U is the value of the Mann-Whitney U statistic for this 45 | // test, generalized by counting ties as 0.5. 46 | // 47 | // Given the Cartesian product of the two samples, this is the 48 | // number of pairs in which the value from the first sample is 49 | // greater than the value of the second, plus 0.5 times the 50 | // number of pairs where the values from the two samples are 51 | // equal. Hence, U is always an integer multiple of 0.5 (it is 52 | // a whole integer if there are no ties) in the range [0, N1*N2]. 53 | // 54 | // U statistics always come in pairs, depending on which 55 | // sample is "first". The mirror U for the other sample can be 56 | // calculated as N1*N2 - U. 57 | // 58 | // There are many equivalent statistics with slightly 59 | // different definitions. The Wilcoxon (1945) W statistic 60 | // (generalized for ties) is U + (N1(N1+1))/2. It is also 61 | // common to use 2U to eliminate the half steps and Smid 62 | // (1956) uses N1*N2 - 2U to additionally center the 63 | // distribution. 64 | U float64 65 | 66 | // AltHypothesis specifies the alternative hypothesis tested 67 | // by this test against the null hypothesis that there is no 68 | // difference in the locations of the samples. 69 | AltHypothesis LocationHypothesis 70 | 71 | // P is the p-value of the Mann-Whitney test for the given 72 | // null hypothesis. 73 | P float64 74 | } 75 | 76 | // MannWhitneyExactLimit gives the largest sample size for which the 77 | // exact U distribution will be used for the Mann-Whitney U-test. 78 | // 79 | // Using the exact distribution is necessary for small sample sizes 80 | // because the distribution is highly irregular. However, computing 81 | // the distribution for large sample sizes is both computationally 82 | // expensive and unnecessary because it quickly approaches a normal 83 | // approximation. Computing the distribution for two 50 value samples 84 | // takes a few milliseconds on a 2014 laptop. 85 | var MannWhitneyExactLimit = 50 86 | 87 | // MannWhitneyTiesExactLimit gives the largest sample size for which 88 | // the exact U distribution will be used for the Mann-Whitney U-test 89 | // in the presence of ties. 90 | // 91 | // Computing this distribution is more expensive than computing the 92 | // distribution without ties, so this is set lower. Computing this 93 | // distribution for two 25 value samples takes about ten milliseconds 94 | // on a 2014 laptop. 95 | var MannWhitneyTiesExactLimit = 25 96 | 97 | // MannWhitneyUTest performs a Mann-Whitney U-test [1,2] of the null 98 | // hypothesis that two samples come from the same population against 99 | // the alternative hypothesis that one sample tends to have larger or 100 | // smaller values than the other. 101 | // 102 | // This is similar to a t-test, but unlike the t-test, the 103 | // Mann-Whitney U-test is non-parametric (it does not assume a normal 104 | // distribution). It has very slightly lower efficiency than the 105 | // t-test on normal distributions. 106 | // 107 | // Computing the exact U distribution is expensive for large sample 108 | // sizes, so this uses a normal approximation for sample sizes larger 109 | // than MannWhitneyExactLimit if there are no ties or 110 | // MannWhitneyTiesExactLimit if there are ties. This normal 111 | // approximation uses both the tie correction and the continuity 112 | // correction. 113 | // 114 | // This can fail with ErrSampleSize if either sample is empty or 115 | // ErrSamplesEqual if all sample values are equal. 116 | // 117 | // This is also known as a Mann-Whitney-Wilcoxon test and is 118 | // equivalent to the Wilcoxon rank-sum test, though the Wilcoxon 119 | // rank-sum test differs in nomenclature. 120 | // 121 | // [1] Mann, Henry B.; Whitney, Donald R. (1947). "On a Test of 122 | // Whether one of Two Random Variables is Stochastically Larger than 123 | // the Other". Annals of Mathematical Statistics 18 (1): 50–60. 124 | // 125 | // [2] Klotz, J. H. (1966). "The Wilcoxon, Ties, and the Computer". 126 | // Journal of the American Statistical Association 61 (315): 772-787. 127 | func MannWhitneyUTest(x1, x2 []float64, alt LocationHypothesis) (*MannWhitneyUTestResult, error) { 128 | n1, n2 := len(x1), len(x2) 129 | if n1 == 0 || n2 == 0 { 130 | return nil, ErrSampleSize 131 | } 132 | 133 | // Compute the U statistic and tie vector T. 134 | x1 = append([]float64(nil), x1...) 135 | x2 = append([]float64(nil), x2...) 136 | sort.Float64s(x1) 137 | sort.Float64s(x2) 138 | merged, labels := labeledMerge(x1, x2) 139 | 140 | R1 := 0.0 141 | T, hasTies := []int{}, false 142 | for i := 0; i < len(merged); { 143 | rank1, nx1, v1 := i+1, 0, merged[i] 144 | // Consume samples that tie this sample (including itself). 145 | for ; i < len(merged) && merged[i] == v1; i++ { 146 | if labels[i] == 1 { 147 | nx1++ 148 | } 149 | } 150 | // Assign all tied samples the average rank of the 151 | // samples, where merged[0] has rank 1. 152 | if nx1 != 0 { 153 | rank := float64(i+rank1) / 2 154 | R1 += rank * float64(nx1) 155 | } 156 | T = append(T, i-rank1+1) 157 | if i > rank1 { 158 | hasTies = true 159 | } 160 | } 161 | U1 := R1 - float64(n1*(n1+1))/2 162 | 163 | // Compute the smaller of U1 and U2 164 | U2 := float64(n1*n2) - U1 165 | Usmall := math.Min(U1, U2) 166 | 167 | var p float64 168 | if !hasTies && n1 <= MannWhitneyExactLimit && n2 <= MannWhitneyExactLimit || 169 | hasTies && n1 <= MannWhitneyTiesExactLimit && n2 <= MannWhitneyTiesExactLimit { 170 | // Use exact U distribution. U1 will be an integer. 171 | if len(T) == 1 { 172 | // All values are equal. Test is meaningless. 173 | return nil, ErrSamplesEqual 174 | } 175 | 176 | dist := UDist{N1: n1, N2: n2, T: T} 177 | switch alt { 178 | case LocationDiffers: 179 | if U1 == U2 { 180 | // The distribution is symmetric about 181 | // Usmall. Since the distribution is 182 | // discrete, the CDF is discontinuous 183 | // and if simply double CDF(Usmall), 184 | // we'll double count the 185 | // (non-infinitesimal) probability 186 | // mass at Usmall. What we want is 187 | // just the integral of the whole CDF, 188 | // which is 1. 189 | p = 1 190 | } else { 191 | p = dist.CDF(Usmall) * 2 192 | } 193 | 194 | case LocationLess: 195 | p = dist.CDF(U1) 196 | 197 | case LocationGreater: 198 | p = 1 - dist.CDF(U1-1) 199 | } 200 | } else { 201 | // Use normal approximation (with tie and continuity 202 | // correction). 203 | t := tieCorrection(T) 204 | N := float64(n1 + n2) 205 | μ_U := float64(n1*n2) / 2 206 | σ_U := math.Sqrt(float64(n1*n2) * ((N + 1) - t/(N*(N-1))) / 12) 207 | if σ_U == 0 { 208 | return nil, ErrSamplesEqual 209 | } 210 | numer := U1 - μ_U 211 | // Perform continuity correction. 212 | switch alt { 213 | case LocationDiffers: 214 | numer -= mathx.Sign(numer) * 0.5 215 | case LocationLess: 216 | numer += 0.5 217 | case LocationGreater: 218 | numer -= 0.5 219 | } 220 | z := numer / σ_U 221 | switch alt { 222 | case LocationDiffers: 223 | p = 2 * math.Min(StdNormal.CDF(z), 1-StdNormal.CDF(z)) 224 | case LocationLess: 225 | p = StdNormal.CDF(z) 226 | case LocationGreater: 227 | p = 1 - StdNormal.CDF(z) 228 | } 229 | } 230 | 231 | return &MannWhitneyUTestResult{N1: n1, N2: n2, U: U1, 232 | AltHypothesis: alt, P: p}, nil 233 | } 234 | 235 | // labeledMerge merges sorted lists x1 and x2 into sorted list merged. 236 | // labels[i] is 1 or 2 depending on whether merged[i] is a value from 237 | // x1 or x2, respectively. 238 | func labeledMerge(x1, x2 []float64) (merged []float64, labels []byte) { 239 | merged = make([]float64, len(x1)+len(x2)) 240 | labels = make([]byte, len(x1)+len(x2)) 241 | 242 | i, j, o := 0, 0, 0 243 | for i < len(x1) && j < len(x2) { 244 | if x1[i] < x2[j] { 245 | merged[o] = x1[i] 246 | labels[o] = 1 247 | i++ 248 | } else { 249 | merged[o] = x2[j] 250 | labels[o] = 2 251 | j++ 252 | } 253 | o++ 254 | } 255 | for ; i < len(x1); i++ { 256 | merged[o] = x1[i] 257 | labels[o] = 1 258 | o++ 259 | } 260 | for ; j < len(x2); j++ { 261 | merged[o] = x2[j] 262 | labels[o] = 2 263 | o++ 264 | } 265 | return 266 | } 267 | 268 | // tieCorrection computes the tie correction factor Σ_j (t_j³ - t_j) 269 | // where t_j is the number of ties in the j'th rank. 270 | func tieCorrection(ties []int) float64 { 271 | t := 0 272 | for _, tie := range ties { 273 | t += tie*tie*tie - tie 274 | } 275 | return float64(t) 276 | } 277 | -------------------------------------------------------------------------------- /internal/go-moremath/stats/udist_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package stats 6 | 7 | import ( 8 | "fmt" 9 | "math" 10 | "testing" 11 | 12 | "rsc.io/benchstat/internal/go-moremath/mathx" 13 | ) 14 | 15 | func aeqTable(a, b [][]float64) bool { 16 | if len(a) != len(b) { 17 | return false 18 | } 19 | for i := range a { 20 | if len(a[i]) != len(b[i]) { 21 | return false 22 | } 23 | for j := range a[i] { 24 | // "%f" precision 25 | if math.Abs(a[i][j]-b[i][j]) >= 0.000001 { 26 | return false 27 | } 28 | } 29 | } 30 | return true 31 | } 32 | 33 | // U distribution for N=3 up to U=5. 34 | var udist3 = [][]float64{ 35 | // m=1 2 3 36 | {0.250000, 0.100000, 0.050000}, // U=0 37 | {0.500000, 0.200000, 0.100000}, // U=1 38 | {0.750000, 0.400000, 0.200000}, // U=2 39 | {1.000000, 0.600000, 0.350000}, // U=3 40 | {1.000000, 0.800000, 0.500000}, // U=4 41 | {1.000000, 0.900000, 0.650000}, // U=5 42 | } 43 | 44 | // U distribution for N=5 up to U=5. 45 | var udist5 = [][]float64{ 46 | // m=1 2 3 4 5 47 | {0.166667, 0.047619, 0.017857, 0.007937, 0.003968}, // U=0 48 | {0.333333, 0.095238, 0.035714, 0.015873, 0.007937}, // U=1 49 | {0.500000, 0.190476, 0.071429, 0.031746, 0.015873}, // U=2 50 | {0.666667, 0.285714, 0.125000, 0.055556, 0.027778}, // U=3 51 | {0.833333, 0.428571, 0.196429, 0.095238, 0.047619}, // U=4 52 | {1.000000, 0.571429, 0.285714, 0.142857, 0.075397}, // U=5 53 | } 54 | 55 | func TestUDist(t *testing.T) { 56 | makeTable := func(n int) [][]float64 { 57 | out := make([][]float64, 6) 58 | for U := 0; U < 6; U++ { 59 | out[U] = make([]float64, n) 60 | for m := 1; m <= n; m++ { 61 | out[U][m-1] = UDist{N1: m, N2: n}.CDF(float64(U)) 62 | } 63 | } 64 | return out 65 | } 66 | fmtTable := func(a [][]float64) string { 67 | out := fmt.Sprintf("%8s", "m=") 68 | for m := 1; m <= len(a[0]); m++ { 69 | out += fmt.Sprintf("%9d", m) 70 | } 71 | out += "\n" 72 | 73 | for U, row := range a { 74 | out += fmt.Sprintf("U=%-6d", U) 75 | for m := 1; m <= len(a[0]); m++ { 76 | out += fmt.Sprintf(" %f", row[m-1]) 77 | } 78 | out += "\n" 79 | } 80 | return out 81 | } 82 | 83 | // Compare against tables given in Mann, Whitney (1947). 84 | got3 := makeTable(3) 85 | if !aeqTable(got3, udist3) { 86 | t.Errorf("For n=3, want:\n%sgot:\n%s", fmtTable(udist3), fmtTable(got3)) 87 | } 88 | 89 | got5 := makeTable(5) 90 | if !aeqTable(got5, udist5) { 91 | t.Errorf("For n=5, want:\n%sgot:\n%s", fmtTable(udist5), fmtTable(got5)) 92 | } 93 | } 94 | 95 | func BenchmarkUDist(b *testing.B) { 96 | for i := 0; i < b.N; i++ { 97 | // R uses the exact distribution up to N=50. 98 | // N*M/2=1250 is the hardest point to get the CDF for. 99 | UDist{N1: 50, N2: 50}.CDF(1250) 100 | } 101 | } 102 | 103 | func TestUDistTies(t *testing.T) { 104 | makeTable := func(m, N int, t []int, minx, maxx float64) [][]float64 { 105 | out := [][]float64{} 106 | dist := UDist{N1: m, N2: N - m, T: t} 107 | for x := minx; x <= maxx; x += 0.5 { 108 | // Convert x from uQt' to uQv'. 109 | U := x - float64(m*m)/2 110 | P := dist.CDF(U) 111 | if len(out) == 0 || !aeq(out[len(out)-1][1], P) { 112 | out = append(out, []float64{x, P}) 113 | } 114 | } 115 | return out 116 | } 117 | fmtTable := func(table [][]float64) string { 118 | out := "" 119 | for _, row := range table { 120 | out += fmt.Sprintf("%5.1f %f\n", row[0], row[1]) 121 | } 122 | return out 123 | } 124 | 125 | // Compare against Table 1 from Klotz (1966). 126 | got := makeTable(5, 10, []int{1, 1, 2, 1, 1, 2, 1, 1}, 12.5, 19.5) 127 | want := [][]float64{ 128 | {12.5, 0.003968}, {13.5, 0.007937}, 129 | {15.0, 0.023810}, {16.5, 0.047619}, 130 | {17.5, 0.071429}, {18.0, 0.087302}, 131 | {19.0, 0.134921}, {19.5, 0.138889}, 132 | } 133 | if !aeqTable(got, want) { 134 | t.Errorf("Want:\n%sgot:\n%s", fmtTable(want), fmtTable(got)) 135 | } 136 | 137 | got = makeTable(10, 21, []int{6, 5, 4, 3, 2, 1}, 52, 87) 138 | want = [][]float64{ 139 | {52.0, 0.000014}, {56.5, 0.000128}, 140 | {57.5, 0.000145}, {60.0, 0.000230}, 141 | {61.0, 0.000400}, {62.0, 0.000740}, 142 | {62.5, 0.000797}, {64.0, 0.000825}, 143 | {64.5, 0.001165}, {65.5, 0.001477}, 144 | {66.5, 0.002498}, {67.0, 0.002725}, 145 | {67.5, 0.002895}, {68.0, 0.003150}, 146 | {68.5, 0.003263}, {69.0, 0.003518}, 147 | {69.5, 0.003603}, {70.0, 0.005648}, 148 | {70.5, 0.005818}, {71.0, 0.006626}, 149 | {71.5, 0.006796}, {72.0, 0.008157}, 150 | {72.5, 0.009688}, {73.0, 0.009801}, 151 | {73.5, 0.010430}, {74.0, 0.011111}, 152 | {74.5, 0.014230}, {75.0, 0.014612}, 153 | {75.5, 0.017249}, {76.0, 0.018307}, 154 | {76.5, 0.020178}, {77.0, 0.022270}, 155 | {77.5, 0.023189}, {78.0, 0.026931}, 156 | {78.5, 0.028207}, {79.0, 0.029979}, 157 | {79.5, 0.030931}, {80.0, 0.038969}, 158 | {80.5, 0.043063}, {81.0, 0.044262}, 159 | {81.5, 0.046389}, {82.0, 0.049581}, 160 | {82.5, 0.056300}, {83.0, 0.058027}, 161 | {83.5, 0.063669}, {84.0, 0.067454}, 162 | {84.5, 0.074122}, {85.0, 0.077425}, 163 | {85.5, 0.083498}, {86.0, 0.094079}, 164 | {86.5, 0.096693}, {87.0, 0.101132}, 165 | } 166 | if !aeqTable(got, want) { 167 | t.Errorf("Want:\n%sgot:\n%s", fmtTable(want), fmtTable(got)) 168 | } 169 | 170 | got = makeTable(8, 16, []int{2, 2, 2, 2, 2, 2, 2, 2}, 32, 54) 171 | want = [][]float64{ 172 | {32.0, 0.000078}, {34.0, 0.000389}, 173 | {36.0, 0.001088}, {38.0, 0.002642}, 174 | {40.0, 0.005905}, {42.0, 0.011500}, 175 | {44.0, 0.021057}, {46.0, 0.035664}, 176 | {48.0, 0.057187}, {50.0, 0.086713}, 177 | {52.0, 0.126263}, {54.0, 0.175369}, 178 | } 179 | if !aeqTable(got, want) { 180 | t.Errorf("Want:\n%sgot:\n%s", fmtTable(want), fmtTable(got)) 181 | } 182 | 183 | // Check remaining tables from Klotz against the reference 184 | // implementation. 185 | checkRef := func(n1 int, tie []int) { 186 | wantPMF1, wantCDF1 := udistRef(n1, tie) 187 | 188 | dist := UDist{N1: n1, N2: sumint(tie) - n1, T: tie} 189 | gotPMF, wantPMF := [][]float64{}, [][]float64{} 190 | gotCDF, wantCDF := [][]float64{}, [][]float64{} 191 | N := sumint(tie) 192 | for U := 0.0; U <= float64(n1*(N-n1)); U += 0.5 { 193 | gotPMF = append(gotPMF, []float64{U, dist.PMF(U)}) 194 | gotCDF = append(gotCDF, []float64{U, dist.CDF(U)}) 195 | wantPMF = append(wantPMF, []float64{U, wantPMF1[int(U*2)]}) 196 | wantCDF = append(wantCDF, []float64{U, wantCDF1[int(U*2)]}) 197 | } 198 | if !aeqTable(wantPMF, gotPMF) { 199 | t.Errorf("For PMF of n1=%v, t=%v, want:\n%sgot:\n%s", n1, tie, fmtTable(wantPMF), fmtTable(gotPMF)) 200 | } 201 | if !aeqTable(wantCDF, gotCDF) { 202 | t.Errorf("For CDF of n1=%v, t=%v, want:\n%sgot:\n%s", n1, tie, fmtTable(wantCDF), fmtTable(gotCDF)) 203 | } 204 | } 205 | checkRef(5, []int{1, 1, 2, 1, 1, 2, 1, 1}) 206 | checkRef(5, []int{1, 1, 2, 1, 1, 1, 2, 1}) 207 | checkRef(5, []int{1, 3, 1, 2, 1, 1, 1}) 208 | checkRef(8, []int{1, 2, 1, 1, 1, 1, 2, 2, 1, 2}) 209 | checkRef(12, []int{3, 3, 4, 3, 4, 5}) 210 | checkRef(10, []int{1, 2, 3, 4, 5, 6}) 211 | } 212 | 213 | func BenchmarkUDistTies(b *testing.B) { 214 | // Worst case: just one tie. 215 | n := 20 216 | t := make([]int, 2*n-1) 217 | for i := range t { 218 | t[i] = 1 219 | } 220 | t[0] = 2 221 | 222 | for i := 0; i < b.N; i++ { 223 | UDist{N1: n, N2: n, T: t}.CDF(float64(n*n) / 2) 224 | } 225 | } 226 | 227 | func XTestPrintUmemo(t *testing.T) { 228 | // Reproduce table from Cheung, Klotz. 229 | ties := []int{4, 5, 3, 4, 6} 230 | printUmemo(makeUmemo(80, 10, ties), ties) 231 | } 232 | 233 | // udistRef computes the PMF and CDF of the U distribution for two 234 | // samples of sizes n1 and sum(t)-n1 with tie vector t. The returned 235 | // pmf and cdf are indexed by 2*U. 236 | // 237 | // This uses the "graphical method" of Klotz (1966). It is very slow 238 | // (Θ(∏ (t[i]+1)) = Ω(2^|t|)), but very correct, and hence useful as a 239 | // reference for testing faster implementations. 240 | func udistRef(n1 int, t []int) (pmf, cdf []float64) { 241 | // Enumerate all u vectors for which 0 <= u_i <= t_i. Count 242 | // the number of permutations of two samples of sizes n1 and 243 | // sum(t)-n1 with tie vector t and accumulate these counts by 244 | // their U statistics in count[2*U]. 245 | counts := make([]int, 1+2*n1*(sumint(t)-n1)) 246 | 247 | u := make([]int, len(t)) 248 | u[0] = -1 // Get enumeration started. 249 | enumu: 250 | for { 251 | // Compute the next u vector. 252 | u[0]++ 253 | for i := 0; i < len(u) && u[i] > t[i]; i++ { 254 | if i == len(u)-1 { 255 | // All u vectors have been enumerated. 256 | break enumu 257 | } 258 | // Carry. 259 | u[i+1]++ 260 | u[i] = 0 261 | } 262 | 263 | // Is this a legal u vector? 264 | if sumint(u) != n1 { 265 | // Klotz (1966) has a method for directly 266 | // enumerating legal u vectors, but the point 267 | // of this is to be correct, not fast. 268 | continue 269 | } 270 | 271 | // Compute 2*U statistic for this u vector. 272 | twoU, vsum := 0, 0 273 | for i, u_i := range u { 274 | v_i := t[i] - u_i 275 | // U = U + vsum*u_i + u_i*v_i/2 276 | twoU += 2*vsum*u_i + u_i*v_i 277 | vsum += v_i 278 | } 279 | 280 | // Compute Π choose(t_i, u_i). This is the number of 281 | // ways of permuting the input sample under u. 282 | prod := 1 283 | for i, u_i := range u { 284 | prod *= int(mathx.Choose(t[i], u_i) + 0.5) 285 | } 286 | 287 | // Accumulate the permutations on this u path. 288 | counts[twoU] += prod 289 | 290 | if false { 291 | // Print a table in the form of Klotz's 292 | // "direct enumeration" example. 293 | // 294 | // Convert 2U = 2UQV' to UQt' used in Klotz 295 | // examples. 296 | UQt := float64(twoU)/2 + float64(n1*n1)/2 297 | fmt.Printf("%+v %f %-2d\n", u, UQt, prod) 298 | } 299 | } 300 | 301 | // Convert counts into probabilities for PMF and CDF. 302 | pmf = make([]float64, len(counts)) 303 | cdf = make([]float64, len(counts)) 304 | total := int(mathx.Choose(sumint(t), n1) + 0.5) 305 | for i, count := range counts { 306 | pmf[i] = float64(count) / float64(total) 307 | if i > 0 { 308 | cdf[i] = cdf[i-1] 309 | } 310 | cdf[i] += pmf[i] 311 | } 312 | return 313 | } 314 | 315 | // printUmemo prints the output of makeUmemo for debugging. 316 | func printUmemo(A []map[ukey]float64, t []int) { 317 | fmt.Printf("K\tn1\t2*U\tpr\n") 318 | for K := len(A) - 1; K >= 0; K-- { 319 | for i, pr := range A[K] { 320 | _, ref := udistRef(i.n1, t[:K]) 321 | fmt.Printf("%v\t%v\t%v\t%v\t%v\n", K, i.n1, i.twoU, pr, ref[i.twoU]) 322 | } 323 | } 324 | } 325 | -------------------------------------------------------------------------------- /internal/go-moremath/stats/kde.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package stats 6 | 7 | import ( 8 | "fmt" 9 | "math" 10 | ) 11 | 12 | // A KDE is a distribution that estimates the underlying distribution 13 | // of a Sample using kernel density estimation. 14 | // 15 | // Kernel density estimation is a method for constructing an estimate 16 | // ƒ̂(x) of a unknown distribution ƒ(x) given a sample from that 17 | // distribution. Unlike many techniques, kernel density estimation is 18 | // non-parametric: in general, it doesn't assume any particular true 19 | // distribution (note, however, that the resulting distribution 20 | // depends deeply on the selected bandwidth, and many bandwidth 21 | // estimation techniques assume normal reference rules). 22 | // 23 | // A kernel density estimate is similar to a histogram, except that it 24 | // is a smooth probability estimate and does not require choosing a 25 | // bin size and discretizing the data. 26 | // 27 | // Sample is the only required field. All others have reasonable 28 | // defaults. 29 | type KDE struct { 30 | // Sample is the data sample underlying this KDE. 31 | Sample Sample 32 | 33 | // Kernel is the kernel to use for the KDE. 34 | Kernel KDEKernel 35 | 36 | // Bandwidth is the bandwidth to use for the KDE. 37 | // 38 | // If this is zero, the bandwidth is computed from the 39 | // provided data using a default bandwidth estimator 40 | // (currently BandwidthScott). 41 | Bandwidth float64 42 | 43 | // BoundaryMethod is the boundary correction method to use for 44 | // the KDE. The default value is BoundaryReflect; however, the 45 | // default bounds are effectively +/-inf, which is equivalent 46 | // to performing no boundary correction. 47 | BoundaryMethod KDEBoundaryMethod 48 | 49 | // [BoundaryMin, BoundaryMax) specify a bounded support for 50 | // the KDE. If both are 0 (their default values), they are 51 | // treated as +/-inf. 52 | // 53 | // To specify a half-bounded support, set Min to math.Inf(-1) 54 | // or Max to math.Inf(1). 55 | BoundaryMin float64 56 | BoundaryMax float64 57 | } 58 | 59 | // BandwidthSilverman is a bandwidth estimator implementing 60 | // Silverman's Rule of Thumb. It's fast, but not very robust to 61 | // outliers as it assumes data is approximately normal. 62 | // 63 | // Silverman, B. W. (1986) Density Estimation. 64 | func BandwidthSilverman(data interface { 65 | StdDev() float64 66 | Weight() float64 67 | }) float64 { 68 | return 1.06 * data.StdDev() * math.Pow(data.Weight(), -1.0/5) 69 | } 70 | 71 | // BandwidthScott is a bandwidth estimator implementing Scott's Rule. 72 | // This is generally robust to outliers: it chooses the minimum 73 | // between the sample's standard deviation and an robust estimator of 74 | // a Gaussian distribution's standard deviation. 75 | // 76 | // Scott, D. W. (1992) Multivariate Density Estimation: Theory, 77 | // Practice, and Visualization. 78 | func BandwidthScott(data interface { 79 | StdDev() float64 80 | Weight() float64 81 | Percentile(float64) float64 82 | }) float64 { 83 | iqr := data.Percentile(0.75) - data.Percentile(0.25) 84 | hScale := 1.06 * math.Pow(data.Weight(), -1.0/5) 85 | stdDev := data.StdDev() 86 | if stdDev < iqr/1.349 { 87 | // Use Silverman's Rule of Thumb 88 | return hScale * stdDev 89 | } else { 90 | // Use IQR/1.349 as a robust estimator of the standard 91 | // deviation of a Gaussian distribution. 92 | return hScale * (iqr / 1.349) 93 | } 94 | } 95 | 96 | // TODO(austin) Implement bandwidth estimator from Botev, Grotowski, 97 | // Kroese. (2010) Kernel Density Estimation via Diffusion. 98 | 99 | // KDEKernel represents a kernel to use for a KDE. 100 | type KDEKernel int 101 | 102 | //go:generate stringer -type=KDEKernel 103 | 104 | const ( 105 | // An EpanechnikovKernel is a smooth kernel with bounded 106 | // support. As a result, the KDE will also have bounded 107 | // support. It is "optimal" in the sense that it minimizes the 108 | // asymptotic mean integrated squared error (AMISE). 109 | EpanechnikovKernel KDEKernel = iota 110 | 111 | // A GaussianKernel is a Gaussian (normal) kernel. 112 | GaussianKernel 113 | 114 | // A DeltaKernel is a Dirac delta function. The PDF of such a 115 | // KDE is not well-defined, but the CDF will represent each 116 | // sample as an instantaneous increase. This kernel ignores 117 | // bandwidth and never requires boundary correction. 118 | DeltaKernel 119 | ) 120 | 121 | // KDEBoundaryMethod represents a boundary correction method for 122 | // constructing a KDE with bounded support. 123 | type KDEBoundaryMethod int 124 | 125 | //go:generate stringer -type=KDEBoundaryMethod 126 | 127 | const ( 128 | // BoundaryReflect reflects the density estimate at the 129 | // boundaries. For example, for a KDE with support [0, inf), 130 | // this is equivalent to ƒ̂ᵣ(x)=ƒ̂(x)+ƒ̂(-x) for x>=0. This is a 131 | // simple and fast technique, but enforces that ƒ̂ᵣ'(0)=0, so 132 | // it may not be applicable to all distributions. 133 | BoundaryReflect KDEBoundaryMethod = iota 134 | ) 135 | 136 | type kdeKernel interface { 137 | pdfEach(xs []float64) []float64 138 | cdfEach(xs []float64) []float64 139 | } 140 | 141 | func (k *KDE) prepare() (kdeKernel, bool) { 142 | // Compute bandwidth. 143 | if k.Bandwidth == 0 { 144 | k.Bandwidth = BandwidthScott(k.Sample) 145 | } 146 | 147 | // Construct kernel. 148 | kernel := kdeKernel(nil) 149 | switch k.Kernel { 150 | default: 151 | panic(fmt.Sprint("unknown kernel", k)) 152 | case EpanechnikovKernel: 153 | kernel = epanechnikovKernel{k.Bandwidth} 154 | case GaussianKernel: 155 | kernel = NormalDist{0, k.Bandwidth} 156 | case DeltaKernel: 157 | kernel = DeltaDist{0} 158 | } 159 | 160 | // Use boundary correction? 161 | bc := k.BoundaryMin != 0 || k.BoundaryMax != 0 162 | 163 | return kernel, bc 164 | } 165 | 166 | // TODO: For KDEs of histograms, make histograms able to create a 167 | // weighted Sample and simply require the caller to provide a 168 | // good bandwidth from a StreamStats. 169 | 170 | // normalizedXs returns x - kde.Sample.Xs. Evaluating kernels shifted 171 | // by kde.Sample.Xs all at x is equivalent to evaluating one unshifted 172 | // kernel at x - kde.Sample.Xs. 173 | func (kde *KDE) normalizedXs(x float64) []float64 { 174 | txs := make([]float64, len(kde.Sample.Xs)) 175 | for i, xi := range kde.Sample.Xs { 176 | txs[i] = x - xi 177 | } 178 | return txs 179 | } 180 | 181 | func (kde *KDE) PDF(x float64) float64 { 182 | kernel, bc := kde.prepare() 183 | 184 | // Apply boundary 185 | if bc && (x < kde.BoundaryMin || x >= kde.BoundaryMax) { 186 | return 0 187 | } 188 | 189 | y := func(x float64) float64 { 190 | // Shift kernel to each of kde.xs and evaluate at x 191 | ys := kernel.pdfEach(kde.normalizedXs(x)) 192 | 193 | // Kernel samples are weighted according to the weights of xs 194 | wys := Sample{Xs: ys, Weights: kde.Sample.Weights} 195 | 196 | return wys.Sum() / wys.Weight() 197 | } 198 | if !bc { 199 | return y(x) 200 | } 201 | switch kde.BoundaryMethod { 202 | default: 203 | panic("unknown boundary correction method") 204 | case BoundaryReflect: 205 | if math.IsInf(kde.BoundaryMax, 1) { 206 | return y(x) + y(2*kde.BoundaryMin-x) 207 | } else if math.IsInf(kde.BoundaryMin, -1) { 208 | return y(x) + y(2*kde.BoundaryMax-x) 209 | } else { 210 | d := 2 * (kde.BoundaryMax - kde.BoundaryMin) 211 | w := 2 * (x - kde.BoundaryMin) 212 | return series(func(n float64) float64 { 213 | // Points >= x 214 | return y(x+n*d) + y(x+n*d-w) 215 | }) + series(func(n float64) float64 { 216 | // Points < x 217 | return y(x-(n+1)*d+w) + y(x-(n+1)*d) 218 | }) 219 | } 220 | } 221 | } 222 | 223 | func (kde *KDE) CDF(x float64) float64 { 224 | kernel, bc := kde.prepare() 225 | 226 | // Apply boundary 227 | if bc { 228 | if x < kde.BoundaryMin { 229 | return 0 230 | } else if x >= kde.BoundaryMax { 231 | return 1 232 | } 233 | } 234 | 235 | y := func(x float64) float64 { 236 | // Shift kernel integral to each of cdf.xs and evaluate at x 237 | ys := kernel.cdfEach(kde.normalizedXs(x)) 238 | 239 | // Kernel samples are weighted according to the weights of xs 240 | wys := Sample{Xs: ys, Weights: kde.Sample.Weights} 241 | 242 | return wys.Sum() / wys.Weight() 243 | } 244 | if !bc { 245 | return y(x) 246 | } 247 | switch kde.BoundaryMethod { 248 | default: 249 | panic("unknown boundary correction method") 250 | case BoundaryReflect: 251 | if math.IsInf(kde.BoundaryMax, 1) { 252 | return y(x) - y(2*kde.BoundaryMin-x) 253 | } else if math.IsInf(kde.BoundaryMin, -1) { 254 | return y(x) + (1 - y(2*kde.BoundaryMax-x)) 255 | } else { 256 | d := 2 * (kde.BoundaryMax - kde.BoundaryMin) 257 | w := 2 * (x - kde.BoundaryMin) 258 | return series(func(n float64) float64 { 259 | // Windows >= x-w 260 | return y(x+n*d) - y(x+n*d-w) 261 | }) + series(func(n float64) float64 { 262 | // Windows < x-w 263 | return y(x-(n+1)*d) - y(x-(n+1)*d-w) 264 | }) 265 | } 266 | } 267 | } 268 | 269 | func (kde *KDE) Bounds() (low float64, high float64) { 270 | _, bc := kde.prepare() 271 | 272 | // TODO(austin) If this KDE came from a histogram, we'd better 273 | // not sample at a significantly higher rate than the 274 | // histogram. Maybe we want to just return the bounds of the 275 | // histogram? 276 | 277 | // TODO(austin) It would be nice if this could be instructed 278 | // to include all original data points, even if they are in 279 | // the tail. Probably that should just be up to the caller to 280 | // pass an axis derived from the bounds of the original data. 281 | 282 | // Use the lowest and highest samples as starting points 283 | lowX, highX := kde.Sample.Bounds() 284 | if lowX == highX { 285 | lowX -= 1 286 | highX += 1 287 | } 288 | 289 | // Find the end points that contain 99% of the CDF's weight. 290 | // Since bisect requires that the root be bracketed, start by 291 | // expanding our range if necessary. TODO(austin) This can 292 | // definitely be done faster. 293 | const ( 294 | lowY = 0.005 295 | highY = 0.995 296 | tolerance = 0.001 297 | ) 298 | for kde.CDF(lowX) > lowY { 299 | lowX -= highX - lowX 300 | } 301 | for kde.CDF(highX) < highY { 302 | highX += highX - lowX 303 | } 304 | // Explicitly accept discontinuities, since we may be using a 305 | // discontiguous kernel. 306 | low, _ = bisect(func(x float64) float64 { return kde.CDF(x) - lowY }, lowX, highX, tolerance) 307 | high, _ = bisect(func(x float64) float64 { return kde.CDF(x) - highY }, lowX, highX, tolerance) 308 | 309 | // Expand width by 20% to give some margins 310 | width := high - low 311 | low, high = low-0.1*width, high+0.1*width 312 | 313 | // Limit to bounds 314 | if bc { 315 | low = math.Max(low, kde.BoundaryMin) 316 | high = math.Min(high, kde.BoundaryMax) 317 | } 318 | 319 | return 320 | } 321 | 322 | type epanechnikovKernel struct { 323 | h float64 324 | } 325 | 326 | func (d epanechnikovKernel) pdfEach(xs []float64) []float64 { 327 | ys := make([]float64, len(xs)) 328 | a := 0.75 / d.h 329 | invhh := 1 / (d.h * d.h) 330 | for i, x := range xs { 331 | if -d.h < x && x < d.h { 332 | ys[i] = a * (1 - x*x*invhh) 333 | } 334 | } 335 | return ys 336 | } 337 | 338 | func (d epanechnikovKernel) cdfEach(xs []float64) []float64 { 339 | ys := make([]float64, len(xs)) 340 | invh := 1 / d.h 341 | for i, x := range xs { 342 | if x > d.h { 343 | ys[i] = 1 344 | } else if x > -d.h { 345 | u := x * invh 346 | ys[i] = 0.25 * (2 + 3*u - u*u*u) 347 | } 348 | } 349 | return ys 350 | } 351 | -------------------------------------------------------------------------------- /internal/go-moremath/stats/udist.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package stats 6 | 7 | import ( 8 | "math" 9 | 10 | "rsc.io/benchstat/internal/go-moremath/mathx" 11 | ) 12 | 13 | // A UDist is the discrete probability distribution of the 14 | // Mann-Whitney U statistic for a pair of samples of sizes N1 and N2. 15 | // 16 | // The details of computing this distribution with no ties can be 17 | // found in Mann, Henry B.; Whitney, Donald R. (1947). "On a Test of 18 | // Whether one of Two Random Variables is Stochastically Larger than 19 | // the Other". Annals of Mathematical Statistics 18 (1): 50–60. 20 | // Computing this distribution in the presence of ties is described in 21 | // Klotz, J. H. (1966). "The Wilcoxon, Ties, and the Computer". 22 | // Journal of the American Statistical Association 61 (315): 772-787 23 | // and Cheung, Ying Kuen; Klotz, Jerome H. (1997). "The Mann Whitney 24 | // Wilcoxon Distribution Using Linked Lists". Statistica Sinica 7: 25 | // 805-813 (the former paper contains details that are glossed over in 26 | // the latter paper but has mathematical typesetting issues, so it's 27 | // easiest to get the context from the former paper and the details 28 | // from the latter). 29 | type UDist struct { 30 | N1, N2 int 31 | 32 | // T is the count of the number of ties at each rank in the 33 | // input distributions. T may be nil, in which case it is 34 | // assumed there are no ties (which is equivalent to an M+N 35 | // slice of 1s). It must be the case that Sum(T) == M+N. 36 | T []int 37 | } 38 | 39 | // hasTies returns true if d has any tied samples. 40 | func (d UDist) hasTies() bool { 41 | for _, t := range d.T { 42 | if t > 1 { 43 | return true 44 | } 45 | } 46 | return false 47 | } 48 | 49 | // p returns the p_{d.N1,d.N2} function defined by Mann, Whitney 1947 50 | // for values of U from 0 up to and including the U argument. 51 | // 52 | // This algorithm runs in Θ(N1*N2*U) = O(N1²N2²) time and is quite 53 | // fast for small values of N1 and N2. However, it does not handle ties. 54 | func (d UDist) p(U int) []float64 { 55 | // This is a dynamic programming implementation of the 56 | // recursive recurrence definition given by Mann and Whitney: 57 | // 58 | // p_{n,m}(U) = (n * p_{n-1,m}(U-m) + m * p_{n,m-1}(U)) / (n+m) 59 | // p_{n,m}(U) = 0 if U < 0 60 | // p_{0,m}(U) = p{n,0}(U) = 1 / nCr(m+n, n) if U = 0 61 | // = 0 if U > 0 62 | // 63 | // (Note that there is a typo in the original paper. The first 64 | // recursive application of p should be for U-m, not U-M.) 65 | // 66 | // Since p{n,m} only depends on p{n-1,m} and p{n,m-1}, we only 67 | // need to store one "plane" of the three dimensional space at 68 | // a time. 69 | // 70 | // Furthermore, p_{n,m} = p_{m,n}, so we only construct values 71 | // for n <= m and obtain the rest through symmetry. 72 | // 73 | // We organize the computed values of p as followed: 74 | // 75 | // n → N 76 | // m * 77 | // ↓ * * 78 | // * * * 79 | // * * * * 80 | // * * * * 81 | // M * * * * 82 | // 83 | // where each * is a slice indexed by U. The code below 84 | // computes these left-to-right, top-to-bottom, so it only 85 | // stores one row of this matrix at a time. Furthermore, 86 | // computing an element in a given U slice only depends on the 87 | // same and smaller values of U, so we can overwrite the U 88 | // slice we're computing in place as long as we start with the 89 | // largest value of U. Finally, even though the recurrence 90 | // depends on (n,m) above the diagonal and we use symmetry to 91 | // mirror those across the diagonal to (m,n), the mirrored 92 | // indexes are always available in the current row, so this 93 | // mirroring does not interfere with our ability to recycle 94 | // state. 95 | 96 | N, M := d.N1, d.N2 97 | if N > M { 98 | N, M = M, N 99 | } 100 | 101 | memo := make([][]float64, N+1) 102 | for n := range memo { 103 | memo[n] = make([]float64, U+1) 104 | } 105 | 106 | for m := 0; m <= M; m++ { 107 | // Compute p_{0,m}. This is zero except for U=0. 108 | memo[0][0] = 1 109 | 110 | // Compute the remainder of this row. 111 | nlim := N 112 | if m < nlim { 113 | nlim = m 114 | } 115 | for n := 1; n <= nlim; n++ { 116 | lp := memo[n-1] // p_{n-1,m} 117 | var rp []float64 118 | if n <= m-1 { 119 | rp = memo[n] // p_{n,m-1} 120 | } else { 121 | rp = memo[m-1] // p{m-1,n} and m==n 122 | } 123 | 124 | // For a given n,m, U is at most n*m. 125 | // 126 | // TODO: Actually, it's at most ⌈n*m/2⌉, but 127 | // then we need to use more complex symmetries 128 | // in the inner loop below. 129 | ulim := n * m 130 | if U < ulim { 131 | ulim = U 132 | } 133 | 134 | out := memo[n] // p_{n,m} 135 | nplusm := float64(n + m) 136 | for U1 := ulim; U1 >= 0; U1-- { 137 | l := 0.0 138 | if U1-m >= 0 { 139 | l = float64(n) * lp[U1-m] 140 | } 141 | r := float64(m) * rp[U1] 142 | out[U1] = (l + r) / nplusm 143 | } 144 | } 145 | } 146 | return memo[N] 147 | } 148 | 149 | type ukey struct { 150 | n1 int // size of first sample 151 | twoU int // 2*U statistic for this permutation 152 | } 153 | 154 | // This computes the cumulative counts of the Mann-Whitney U 155 | // distribution in the presence of ties using the computation from 156 | // Cheung, Ying Kuen; Klotz, Jerome H. (1997). "The Mann Whitney 157 | // Wilcoxon Distribution Using Linked Lists". Statistica Sinica 7: 158 | // 805-813, with much guidance from appendix L of Klotz, A 159 | // Computational Approach to Statistics. 160 | // 161 | // makeUmemo constructs a table memo[K][ukey{n1, 2*U}], where K is the 162 | // number of ranks (up to len(t)), n1 is the size of the first sample 163 | // (up to the n1 argument), and U is the U statistic (up to the 164 | // argument twoU/2). The value of an entry in the memo table is the 165 | // number of permutations of a sample of size n1 in a ranking with tie 166 | // vector t[:K] having a U statistic <= U. 167 | func makeUmemo(twoU, n1 int, t []int) []map[ukey]float64 { 168 | // Another candidate for a fast implementation is van de Wiel, 169 | // "The split-up algorithm: a fast symbolic method for 170 | // computing p-values of distribution-free statistics". This 171 | // is what's used by R's coin package. It's a comparatively 172 | // recent publication, so it's presumably faster (or perhaps 173 | // just more general) than previous techniques, but I can't 174 | // get my hands on the paper. 175 | // 176 | // TODO: ~40% of this function's time is spent in mapassign on 177 | // the assignment lines in the two loops and another ~20% in 178 | // map access and iteration. Improving map behavior or 179 | // replacing the maps altogether with some other constant-time 180 | // structure could double performance. 181 | // 182 | // TODO: The worst case for this function is when there are 183 | // few ties. Yet the best case overall is when there are *no* 184 | // ties. Can we get the best of both worlds? Use the fast 185 | // algorithm for the most part when there are few ties and mix 186 | // in the general algorithm just where we need it? That's 187 | // certainly possible for sub-problems where t[:k] has no 188 | // ties, but that doesn't help if t[0] has a tie but nothing 189 | // else does. Is it possible to rearrange the ranks without 190 | // messing up our computation of the U statistic for 191 | // sub-problems? 192 | 193 | K := len(t) 194 | 195 | // Compute a coefficients. The a slice is indexed by k (a[0] 196 | // is unused). 197 | a := make([]int, K+1) 198 | a[1] = t[0] 199 | for k := 2; k <= K; k++ { 200 | a[k] = a[k-1] + t[k-2] + t[k-1] 201 | } 202 | 203 | // Create the memo table for the counts function, A. The A 204 | // slice is indexed by k (A[0] is unused). 205 | // 206 | // In "The Mann Whitney Distribution Using Linked Lists", they 207 | // use linked lists (*gasp*) for this, but within each K it's 208 | // really just a memoization table, so it's faster to use a 209 | // map. The outer structure is a slice indexed by k because we 210 | // need to find all memo entries with certain values of k. 211 | // 212 | // TODO: The n1 and twoU values in the ukeys follow strict 213 | // patterns. For each K value, the n1 values are every integer 214 | // between two bounds. For each (K, n1) value, the twoU values 215 | // are every integer multiple of a certain base between two 216 | // bounds. It might be worth turning these into directly 217 | // indexible slices. 218 | A := make([]map[ukey]float64, K+1) 219 | A[K] = map[ukey]float64{ukey{n1: n1, twoU: twoU}: 0} 220 | 221 | // Compute memo table (k, n1, twoU) triples from high K values 222 | // to low K values. This drives the recurrence relation 223 | // downward to figure out all of the needed argument triples. 224 | // 225 | // TODO: Is it possible to generate this table bottom-up? If 226 | // so, this could be a pure dynamic programming algorithm and 227 | // we could discard the K dimension. We could at least store 228 | // the inputs in a more compact representation that replaces 229 | // the twoU dimension with an interval and a step size (as 230 | // suggested by Cheung, Klotz, not that they make it at all 231 | // clear *why* they're suggesting this). 232 | tsum := sumint(t) // always ∑ t[0:k] 233 | for k := K - 1; k >= 2; k-- { 234 | tsum -= t[k] 235 | A[k] = make(map[ukey]float64) 236 | 237 | // Construct A[k] from A[k+1]. 238 | for A_kplus1 := range A[k+1] { 239 | rkLow := maxint(0, A_kplus1.n1-tsum) 240 | rkHigh := minint(A_kplus1.n1, t[k]) 241 | for rk := rkLow; rk <= rkHigh; rk++ { 242 | twoU_k := A_kplus1.twoU - rk*(a[k+1]-2*A_kplus1.n1+rk) 243 | n1_k := A_kplus1.n1 - rk 244 | if twoUmin(n1_k, t[:k], a) <= twoU_k && twoU_k <= twoUmax(n1_k, t[:k], a) { 245 | key := ukey{n1: n1_k, twoU: twoU_k} 246 | A[k][key] = 0 247 | } 248 | } 249 | } 250 | } 251 | 252 | // Fill counts in memo table from low K values to high K 253 | // values. This unwinds the recurrence relation. 254 | 255 | // Start with K==2 base case. 256 | // 257 | // TODO: Later computations depend on these, but these don't 258 | // depend on anything (including each other), so if K==2, we 259 | // can skip the memo table altogether. 260 | if K < 2 { 261 | panic("K < 2") 262 | } 263 | N_2 := t[0] + t[1] 264 | for A_2i := range A[2] { 265 | Asum := 0.0 266 | r2Low := maxint(0, A_2i.n1-t[0]) 267 | r2High := (A_2i.twoU - A_2i.n1*(t[0]-A_2i.n1)) / N_2 268 | for r2 := r2Low; r2 <= r2High; r2++ { 269 | Asum += mathx.Choose(t[0], A_2i.n1-r2) * 270 | mathx.Choose(t[1], r2) 271 | } 272 | A[2][A_2i] = Asum 273 | } 274 | 275 | // Derive counts for the rest of the memo table. 276 | tsum = t[0] // always ∑ t[0:k-1] 277 | for k := 3; k <= K; k++ { 278 | tsum += t[k-2] 279 | 280 | // Compute A[k] counts from A[k-1] counts. 281 | for A_ki := range A[k] { 282 | Asum := 0.0 283 | rkLow := maxint(0, A_ki.n1-tsum) 284 | rkHigh := minint(A_ki.n1, t[k-1]) 285 | for rk := rkLow; rk <= rkHigh; rk++ { 286 | twoU_kminus1 := A_ki.twoU - rk*(a[k]-2*A_ki.n1+rk) 287 | n1_kminus1 := A_ki.n1 - rk 288 | x, ok := A[k-1][ukey{n1: n1_kminus1, twoU: twoU_kminus1}] 289 | if !ok && twoUmax(n1_kminus1, t[:k-1], a) < twoU_kminus1 { 290 | x = mathx.Choose(tsum, n1_kminus1) 291 | } 292 | Asum += x * mathx.Choose(t[k-1], rk) 293 | } 294 | A[k][A_ki] = Asum 295 | } 296 | } 297 | 298 | return A 299 | } 300 | 301 | func twoUmin(n1 int, t, a []int) int { 302 | K := len(t) 303 | twoU := -n1 * n1 304 | n1_k := n1 305 | for k := 1; k <= K; k++ { 306 | twoU_k := minint(n1_k, t[k-1]) 307 | twoU += twoU_k * a[k] 308 | n1_k -= twoU_k 309 | } 310 | return twoU 311 | } 312 | 313 | func twoUmax(n1 int, t, a []int) int { 314 | K := len(t) 315 | twoU := -n1 * n1 316 | n1_k := n1 317 | for k := K; k > 0; k-- { 318 | twoU_k := minint(n1_k, t[k-1]) 319 | twoU += twoU_k * a[k] 320 | n1_k -= twoU_k 321 | } 322 | return twoU 323 | } 324 | 325 | func (d UDist) PMF(U float64) float64 { 326 | if U < 0 || U >= 0.5+float64(d.N1*d.N2) { 327 | return 0 328 | } 329 | 330 | if d.hasTies() { 331 | // makeUmemo computes the CDF directly. Take its 332 | // difference to get the PMF. 333 | p1, ok1 := makeUmemo(int(2*U)-1, d.N1, d.T)[len(d.T)][ukey{d.N1, int(2*U) - 1}] 334 | p2, ok2 := makeUmemo(int(2*U), d.N1, d.T)[len(d.T)][ukey{d.N1, int(2 * U)}] 335 | if !ok1 || !ok2 { 336 | panic("makeUmemo did not return expected memoization table") 337 | } 338 | return (p2 - p1) / mathx.Choose(d.N1+d.N2, d.N1) 339 | } 340 | 341 | // There are no ties. Use the fast algorithm. U must be integral. 342 | Ui := int(math.Floor(U)) 343 | // TODO: Use symmetry to minimize U 344 | return d.p(Ui)[Ui] 345 | } 346 | 347 | func (d UDist) CDF(U float64) float64 { 348 | if U < 0 { 349 | return 0 350 | } else if U >= float64(d.N1*d.N2) { 351 | return 1 352 | } 353 | 354 | if d.hasTies() { 355 | // TODO: Minimize U? 356 | p, ok := makeUmemo(int(2*U), d.N1, d.T)[len(d.T)][ukey{d.N1, int(2 * U)}] 357 | if !ok { 358 | panic("makeUmemo did not return expected memoization table") 359 | } 360 | return p / mathx.Choose(d.N1+d.N2, d.N1) 361 | } 362 | 363 | // There are no ties. Use the fast algorithm. U must be integral. 364 | Ui := int(math.Floor(U)) 365 | // The distribution is symmetric around U = m * n / 2. Sum up 366 | // whichever tail is smaller. 367 | flip := Ui >= (d.N1*d.N2+1)/2 368 | if flip { 369 | Ui = d.N1*d.N2 - Ui - 1 370 | } 371 | pdfs := d.p(Ui) 372 | p := 0.0 373 | for _, pdf := range pdfs[:Ui+1] { 374 | p += pdf 375 | } 376 | if flip { 377 | p = 1 - p 378 | } 379 | return p 380 | } 381 | 382 | func (d UDist) Step() float64 { 383 | return 0.5 384 | } 385 | 386 | func (d UDist) Bounds() (float64, float64) { 387 | // TODO: More precise bounds when there are ties. 388 | return 0, float64(d.N1 * d.N2) 389 | } 390 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | // Benchstat computes and compares statistics about benchmarks. 6 | // 7 | // This package has moved. Please use https://golang.org/x/perf/cmd/benchstat 8 | package main 9 | 10 | import ( 11 | "bytes" 12 | "flag" 13 | "fmt" 14 | "html" 15 | "io/ioutil" 16 | "log" 17 | "os" 18 | "strconv" 19 | "strings" 20 | "unicode/utf8" 21 | 22 | "rsc.io/benchstat/internal/go-moremath/stats" 23 | ) 24 | 25 | func usage() { 26 | fmt.Fprintf(os.Stderr, "usage: benchstat [options] old.txt [new.txt] [more.txt ...]\n") 27 | fmt.Fprintf(os.Stderr, "options:\n") 28 | flag.PrintDefaults() 29 | os.Exit(2) 30 | } 31 | 32 | var ( 33 | flagDeltaTest = flag.String("delta-test", "utest", "significance `test` to apply to delta: utest, ttest, or none") 34 | flagAlpha = flag.Float64("alpha", 0.05, "consider change significant if p < `α`") 35 | flagGeomean = flag.Bool("geomean", false, "print the geometric mean of each file") 36 | flagHTML = flag.Bool("html", false, "print results as an HTML table") 37 | ) 38 | 39 | var deltaTestNames = map[string]func(old, new *Benchstat) (float64, error){ 40 | "none": notest, 41 | "u": utest, 42 | "u-test": utest, 43 | "utest": utest, 44 | "t": ttest, 45 | "t-test": ttest, 46 | "ttest": ttest, 47 | } 48 | 49 | type row struct { 50 | cols []string 51 | } 52 | 53 | func newRow(cols ...string) *row { 54 | return &row{cols: cols} 55 | } 56 | 57 | func (r *row) add(col string) { 58 | r.cols = append(r.cols, col) 59 | } 60 | 61 | func (r *row) trim() { 62 | for len(r.cols) > 0 && r.cols[len(r.cols)-1] == "" { 63 | r.cols = r.cols[:len(r.cols)-1] 64 | } 65 | } 66 | 67 | func main() { 68 | log.SetPrefix("benchstat: ") 69 | log.SetFlags(0) 70 | flag.Usage = usage 71 | flag.Parse() 72 | deltaTest := deltaTestNames[strings.ToLower(*flagDeltaTest)] 73 | if flag.NArg() < 1 || deltaTest == nil { 74 | flag.Usage() 75 | } 76 | 77 | // Read in benchmark data. 78 | c := readFiles(flag.Args()) 79 | for _, stat := range c.Stats { 80 | stat.ComputeStats() 81 | } 82 | 83 | var tables [][]*row 84 | switch len(c.Configs) { 85 | case 2: 86 | before, after := c.Configs[0], c.Configs[1] 87 | key := BenchKey{} 88 | for _, key.Unit = range c.Units { 89 | var table []*row 90 | metric := metricOf(key.Unit) 91 | for _, key.Benchmark = range c.Benchmarks { 92 | key.Config = before 93 | old := c.Stats[key] 94 | key.Config = after 95 | new := c.Stats[key] 96 | if old == nil || new == nil { 97 | continue 98 | } 99 | if len(table) == 0 { 100 | table = append(table, newRow("name", "old "+metric, "new "+metric, "delta")) 101 | } 102 | 103 | pval, testerr := deltaTest(old, new) 104 | 105 | scaler := newScaler(old.Mean, old.Unit) 106 | row := newRow(key.Benchmark, old.Format(scaler), new.Format(scaler), "~ ") 107 | if testerr == stats.ErrZeroVariance { 108 | row.add("(zero variance)") 109 | } else if testerr == stats.ErrSampleSize { 110 | row.add("(too few samples)") 111 | } else if testerr == stats.ErrSamplesEqual { 112 | row.add("(all equal)") 113 | } else if testerr != nil { 114 | row.add(fmt.Sprintf("(%s)", testerr)) 115 | } else if pval < *flagAlpha { 116 | row.cols[3] = fmt.Sprintf("%+.2f%%", ((new.Mean/old.Mean)-1.0)*100.0) 117 | } 118 | if len(row.cols) == 4 && pval != -1 { 119 | row.add(fmt.Sprintf("(p=%0.3f n=%d+%d)", pval, len(old.RValues), len(new.RValues))) 120 | } 121 | table = append(table, row) 122 | } 123 | if len(table) > 0 { 124 | table = addGeomean(table, c, key.Unit, true) 125 | tables = append(tables, table) 126 | } 127 | } 128 | 129 | default: 130 | key := BenchKey{} 131 | for _, key.Unit = range c.Units { 132 | var table []*row 133 | metric := metricOf(key.Unit) 134 | 135 | if len(c.Configs) > 1 { 136 | hdr := newRow("name \\ " + metric) 137 | for _, config := range c.Configs { 138 | hdr.add(config) 139 | } 140 | table = append(table, hdr) 141 | } else { 142 | table = append(table, newRow("name", metric)) 143 | } 144 | 145 | for _, key.Benchmark = range c.Benchmarks { 146 | row := newRow(key.Benchmark) 147 | var scaler func(float64) string 148 | for _, key.Config = range c.Configs { 149 | stat := c.Stats[key] 150 | if stat == nil { 151 | row.add("") 152 | continue 153 | } 154 | if scaler == nil { 155 | scaler = newScaler(stat.Mean, stat.Unit) 156 | } 157 | row.add(stat.Format(scaler)) 158 | } 159 | row.trim() 160 | if len(row.cols) > 1 { 161 | table = append(table, row) 162 | } 163 | } 164 | table = addGeomean(table, c, key.Unit, false) 165 | tables = append(tables, table) 166 | } 167 | } 168 | 169 | numColumn := 0 170 | for _, table := range tables { 171 | for _, row := range table { 172 | if numColumn < len(row.cols) { 173 | numColumn = len(row.cols) 174 | } 175 | } 176 | } 177 | 178 | max := make([]int, numColumn) 179 | for _, table := range tables { 180 | for _, row := range table { 181 | for i, s := range row.cols { 182 | n := utf8.RuneCountInString(s) 183 | if max[i] < n { 184 | max[i] = n 185 | } 186 | } 187 | } 188 | } 189 | 190 | var buf bytes.Buffer 191 | for i, table := range tables { 192 | if i > 0 { 193 | fmt.Fprintf(&buf, "\n") 194 | } 195 | 196 | if *flagHTML { 197 | fmt.Fprintf(&buf, "\n") 198 | fmt.Fprintf(&buf, "\n") 199 | printRow := func(row *row, tag string) { 200 | fmt.Fprintf(&buf, "") 201 | for _, cell := range row.cols { 202 | fmt.Fprintf(&buf, "<%s>%s", tag, html.EscapeString(cell), tag) 203 | } 204 | fmt.Fprintf(&buf, "\n") 205 | } 206 | printRow(table[0], "th") 207 | for _, row := range table[1:] { 208 | printRow(row, "td") 209 | } 210 | fmt.Fprintf(&buf, "
\n") 211 | continue 212 | } 213 | 214 | // headings 215 | row := table[0] 216 | for i, s := range row.cols { 217 | switch i { 218 | case 0: 219 | fmt.Fprintf(&buf, "%-*s", max[i], s) 220 | default: 221 | fmt.Fprintf(&buf, " %-*s", max[i], s) 222 | case len(row.cols) - 1: 223 | fmt.Fprintf(&buf, " %s\n", s) 224 | } 225 | } 226 | 227 | // data 228 | for _, row := range table[1:] { 229 | for i, s := range row.cols { 230 | switch i { 231 | case 0: 232 | fmt.Fprintf(&buf, "%-*s", max[i], s) 233 | default: 234 | if i == len(row.cols)-1 && len(s) > 0 && s[0] == '(' { 235 | // Left-align p value. 236 | fmt.Fprintf(&buf, " %s", s) 237 | break 238 | } 239 | fmt.Fprintf(&buf, " %*s", max[i], s) 240 | } 241 | } 242 | fmt.Fprintf(&buf, "\n") 243 | } 244 | } 245 | 246 | os.Stdout.Write(buf.Bytes()) 247 | } 248 | 249 | func addGeomean(table []*row, c *Collection, unit string, delta bool) []*row { 250 | if !*flagGeomean { 251 | return table 252 | } 253 | 254 | row := newRow("[Geo mean]") 255 | key := BenchKey{Unit: unit} 256 | geomeans := []float64{} 257 | for _, key.Config = range c.Configs { 258 | var means []float64 259 | for _, key.Benchmark = range c.Benchmarks { 260 | stat := c.Stats[key] 261 | if stat != nil { 262 | means = append(means, stat.Mean) 263 | } 264 | } 265 | if len(means) == 0 { 266 | row.add("") 267 | delta = false 268 | } else { 269 | geomean := stats.GeoMean(means) 270 | geomeans = append(geomeans, geomean) 271 | row.add(newScaler(geomean, unit)(geomean) + " ") 272 | } 273 | } 274 | if delta { 275 | row.add(fmt.Sprintf("%+.2f%%", ((geomeans[1]/geomeans[0])-1.0)*100.0)) 276 | } 277 | return append(table, row) 278 | } 279 | 280 | func timeScaler(ns float64) func(float64) string { 281 | var format string 282 | var scale float64 283 | switch x := ns / 1e9; { 284 | case x >= 99.5: 285 | format, scale = "%.0fs", 1 286 | case x >= 9.95: 287 | format, scale = "%.1fs", 1 288 | case x >= 0.995: 289 | format, scale = "%.2fs", 1 290 | case x >= 0.0995: 291 | format, scale = "%.0fms", 1000 292 | case x >= 0.00995: 293 | format, scale = "%.1fms", 1000 294 | case x >= 0.000995: 295 | format, scale = "%.2fms", 1000 296 | case x >= 0.0000995: 297 | format, scale = "%.0fµs", 1000*1000 298 | case x >= 0.00000995: 299 | format, scale = "%.1fµs", 1000*1000 300 | case x >= 0.000000995: 301 | format, scale = "%.2fµs", 1000*1000 302 | case x >= 0.0000000995: 303 | format, scale = "%.0fns", 1000*1000*1000 304 | case x >= 0.00000000995: 305 | format, scale = "%.1fns", 1000*1000*1000 306 | default: 307 | format, scale = "%.2fns", 1000*1000*1000 308 | } 309 | return func(ns float64) string { 310 | return fmt.Sprintf(format, ns/1e9*scale) 311 | } 312 | } 313 | 314 | func newScaler(val float64, unit string) func(float64) string { 315 | if unit == "ns/op" { 316 | return timeScaler(val) 317 | } 318 | 319 | var format string 320 | var scale float64 321 | var suffix string 322 | 323 | prescale := 1.0 324 | if unit == "MB/s" { 325 | prescale = 1e6 326 | } 327 | 328 | switch x := val * prescale; { 329 | case x >= 99500000000000: 330 | format, scale, suffix = "%.0f", 1e12, "T" 331 | case x >= 9950000000000: 332 | format, scale, suffix = "%.1f", 1e12, "T" 333 | case x >= 995000000000: 334 | format, scale, suffix = "%.2f", 1e12, "T" 335 | case x >= 99500000000: 336 | format, scale, suffix = "%.0f", 1e9, "G" 337 | case x >= 9950000000: 338 | format, scale, suffix = "%.1f", 1e9, "G" 339 | case x >= 995000000: 340 | format, scale, suffix = "%.2f", 1e9, "G" 341 | case x >= 99500000: 342 | format, scale, suffix = "%.0f", 1e6, "M" 343 | case x >= 9950000: 344 | format, scale, suffix = "%.1f", 1e6, "M" 345 | case x >= 995000: 346 | format, scale, suffix = "%.2f", 1e6, "M" 347 | case x >= 99500: 348 | format, scale, suffix = "%.0f", 1e3, "k" 349 | case x >= 9950: 350 | format, scale, suffix = "%.1f", 1e3, "k" 351 | case x >= 995: 352 | format, scale, suffix = "%.2f", 1e3, "k" 353 | case x >= 99.5: 354 | format, scale, suffix = "%.0f", 1, "" 355 | case x >= 9.95: 356 | format, scale, suffix = "%.1f", 1, "" 357 | default: 358 | format, scale, suffix = "%.2f", 1, "" 359 | } 360 | 361 | if unit == "B/op" { 362 | suffix += "B" 363 | } 364 | if unit == "MB/s" { 365 | suffix += "B/s" 366 | } 367 | scale /= prescale 368 | 369 | return func(val float64) string { 370 | return fmt.Sprintf(format+suffix, val/scale) 371 | } 372 | } 373 | 374 | func (b *Benchstat) Format(scaler func(float64) string) string { 375 | diff := 1 - b.Min/b.Mean 376 | if d := b.Max/b.Mean - 1; d > diff { 377 | diff = d 378 | } 379 | s := scaler(b.Mean) 380 | if b.Mean == 0 { 381 | s += " " 382 | } else { 383 | s = fmt.Sprintf("%s ±%3s", s, fmt.Sprintf("%.0f%%", diff*100.0)) 384 | } 385 | return s 386 | } 387 | 388 | // ComputeStats updates the derived statistics in s from the raw 389 | // samples in s.Values. 390 | func (stat *Benchstat) ComputeStats() { 391 | // Discard outliers. 392 | values := stats.Sample{Xs: stat.Values} 393 | q1, q3 := values.Percentile(0.25), values.Percentile(0.75) 394 | lo, hi := q1-1.5*(q3-q1), q3+1.5*(q3-q1) 395 | for _, value := range stat.Values { 396 | if lo <= value && value <= hi { 397 | stat.RValues = append(stat.RValues, value) 398 | } 399 | } 400 | 401 | // Compute statistics of remaining data. 402 | stat.Min, stat.Max = stats.Bounds(stat.RValues) 403 | stat.Mean = stats.Mean(stat.RValues) 404 | } 405 | 406 | // A Benchstat is the metrics along one axis (e.g., ns/op or MB/s) 407 | // for all runs of a specific benchmark. 408 | type Benchstat struct { 409 | Unit string 410 | Values []float64 // metrics 411 | RValues []float64 // metrics with outliers removed 412 | Min float64 // min of RValues 413 | Mean float64 // mean of RValues 414 | Max float64 // max of RValues 415 | } 416 | 417 | // A BenchKey identifies one metric (e.g., "ns/op", "B/op") from one 418 | // benchmark (function name sans "Benchmark" prefix) in one 419 | // configuration (input file name). 420 | type BenchKey struct { 421 | Config, Benchmark, Unit string 422 | } 423 | 424 | type Collection struct { 425 | Stats map[BenchKey]*Benchstat 426 | 427 | // Configs, Benchmarks, and Units give the set of configs, 428 | // benchmarks, and units from the keys in Stats in an order 429 | // meant to match the order the benchmarks were read in. 430 | Configs, Benchmarks, Units []string 431 | } 432 | 433 | func (c *Collection) AddStat(key BenchKey) *Benchstat { 434 | if stat, ok := c.Stats[key]; ok { 435 | return stat 436 | } 437 | 438 | addString := func(strings *[]string, add string) { 439 | for _, s := range *strings { 440 | if s == add { 441 | return 442 | } 443 | } 444 | *strings = append(*strings, add) 445 | } 446 | addString(&c.Configs, key.Config) 447 | addString(&c.Benchmarks, key.Benchmark) 448 | addString(&c.Units, key.Unit) 449 | stat := &Benchstat{Unit: key.Unit} 450 | c.Stats[key] = stat 451 | return stat 452 | } 453 | 454 | // readFiles reads a set of benchmark files. 455 | func readFiles(files []string) *Collection { 456 | c := Collection{Stats: make(map[BenchKey]*Benchstat)} 457 | for _, file := range files { 458 | readFile(file, &c) 459 | } 460 | return &c 461 | } 462 | 463 | // readFile reads a set of benchmarks from a file in to a Collection. 464 | func readFile(file string, c *Collection) { 465 | c.Configs = append(c.Configs, file) 466 | key := BenchKey{Config: file} 467 | 468 | text, err := ioutil.ReadFile(file) 469 | if err != nil { 470 | log.Fatal(err) 471 | } 472 | for _, line := range strings.Split(string(text), "\n") { 473 | f := strings.Fields(line) 474 | if len(f) < 4 { 475 | continue 476 | } 477 | name := f[0] 478 | if !strings.HasPrefix(name, "Benchmark") { 479 | continue 480 | } 481 | name = strings.TrimPrefix(name, "Benchmark") 482 | n, _ := strconv.Atoi(f[1]) 483 | if n == 0 { 484 | continue 485 | } 486 | 487 | key.Benchmark = name 488 | for i := 2; i+2 <= len(f); i += 2 { 489 | val, err := strconv.ParseFloat(f[i], 64) 490 | if err != nil { 491 | continue 492 | } 493 | key.Unit = f[i+1] 494 | stat := c.AddStat(key) 495 | stat.Values = append(stat.Values, val) 496 | } 497 | } 498 | } 499 | 500 | func metricOf(unit string) string { 501 | switch unit { 502 | case "ns/op": 503 | return "time/op" 504 | case "B/op": 505 | return "alloc/op" 506 | case "MB/s": 507 | return "speed" 508 | default: 509 | return unit 510 | } 511 | } 512 | 513 | // Significance tests. 514 | 515 | func notest(old, new *Benchstat) (pval float64, err error) { 516 | return -1, nil 517 | } 518 | 519 | func ttest(old, new *Benchstat) (pval float64, err error) { 520 | t, err := stats.TwoSampleWelchTTest(stats.Sample{Xs: old.RValues}, stats.Sample{Xs: new.RValues}, stats.LocationDiffers) 521 | if err != nil { 522 | return -1, err 523 | } 524 | return t.P, nil 525 | } 526 | 527 | func utest(old, new *Benchstat) (pval float64, err error) { 528 | u, err := stats.MannWhitneyUTest(old.RValues, new.RValues, stats.LocationDiffers) 529 | if err != nil { 530 | return -1, err 531 | } 532 | return u.P, nil 533 | } 534 | --------------------------------------------------------------------------------