├── internal
    ├── README
    ├── go-moremath
    │   ├── vec
    │   │   ├── package.go
    │   │   └── vec.go
    │   ├── README.md
    │   ├── scale
    │   │   ├── err.go
    │   │   ├── package.go
    │   │   ├── util.go
    │   │   ├── interface.go
    │   │   ├── linear.go
    │   │   ├── linear_test.go
    │   │   ├── log.go
    │   │   └── log_test.go
    │   ├── mathx
    │   │   ├── package.go
    │   │   ├── sign.go
    │   │   ├── beta_test.go
    │   │   ├── choose.go
    │   │   └── beta.go
    │   ├── stats
    │   │   ├── kdekernel_string.go
    │   │   ├── kdeboundarymethod_string.go
    │   │   ├── sample_test.go
    │   │   ├── locationhypothesis_string.go
    │   │   ├── hypergdist_test.go
    │   │   ├── package.go
    │   │   ├── normaldist_test.go
    │   │   ├── tdist.go
    │   │   ├── dist_test.go
    │   │   ├── linearhist.go
    │   │   ├── deltadist.go
    │   │   ├── util_test.go
    │   │   ├── hist.go
    │   │   ├── loghist.go
    │   │   ├── kde_test.go
    │   │   ├── ttest_test.go
    │   │   ├── utest_test.go
    │   │   ├── stream.go
    │   │   ├── alg.go
    │   │   ├── hypergdist.go
    │   │   ├── tdist_test.go
    │   │   ├── normaldist.go
    │   │   ├── ttest.go
    │   │   ├── dist.go
    │   │   ├── sample.go
    │   │   ├── utest.go
    │   │   ├── udist_test.go
    │   │   ├── kde.go
    │   │   └── udist.go
    │   ├── internal
    │   │   └── mathtest
    │   │   │   └── mathtest.go
    │   └── LICENSE
    └── import.sh
├── README.md
├── LICENSE
└── main.go


/internal/README:
--------------------------------------------------------------------------------
1 | go-moremath copied from github.com/aclements/go-moremath.
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Benchstat has moved. Please use [golang.org/x/perf/cmd/benchstat](https://golang.org/x/perf/cmd/benchstat)!
2 | 


--------------------------------------------------------------------------------
/internal/go-moremath/vec/package.go:
--------------------------------------------------------------------------------
1 | // Copyright 2015 The Go Authors. All rights reserved.
2 | // Use of this source code is governed by a BSD-style
3 | // license that can be found in the LICENSE file.
4 | 
5 | // Package vec provides functions for float64 vectors.
6 | package vec // import "rsc.io/benchstat/internal/go-moremath/vec"
7 | 


--------------------------------------------------------------------------------
/internal/import.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | set -e
 4 | 
 5 | if [ -e go-moremath ]; then
 6 |     mv go-moremath go-moremath.old
 7 | fi
 8 | 
 9 | git clone --depth=1 http://github.com/aclements/go-moremath
10 | rm -rf go-moremath/.git
11 | sed -i -e 's,github.com/aclements/\(go-moremath\),rsc.io/benchstat/internal/\1,' $(find -name \*.go)
12 | 


--------------------------------------------------------------------------------
/internal/go-moremath/README.md:
--------------------------------------------------------------------------------
1 | These packages provide more specialized math routines than are
2 | available in the standard Go math package. go-moremath currently
3 | focuses on statistical routines, with particular focus on high-quality
4 | implementations and APIs for non-parametric methods.
5 | 
6 | The API is not stable.
7 | 
8 | Please see the [documentation](https://godoc.org/github.com/aclements/go-moremath).
9 | 


--------------------------------------------------------------------------------
/internal/go-moremath/scale/err.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package scale
 6 | 
 7 | // RangeErr is an error that indicates some argument or value is out
 8 | // of range.
 9 | type RangeErr string
10 | 
11 | func (r RangeErr) Error() string {
12 | 	return string(r)
13 | }
14 | 


--------------------------------------------------------------------------------
/internal/go-moremath/mathx/package.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | // Package mathx implements special functions not provided by the
 6 | // standard math package.
 7 | package mathx // import "rsc.io/benchstat/internal/go-moremath/mathx"
 8 | 
 9 | import "math"
10 | 
11 | var nan = math.NaN()
12 | 


--------------------------------------------------------------------------------
/internal/go-moremath/stats/kdekernel_string.go:
--------------------------------------------------------------------------------
 1 | // generated by stringer -type=KDEKernel; DO NOT EDIT
 2 | 
 3 | package stats
 4 | 
 5 | import "fmt"
 6 | 
 7 | const _KDEKernel_name = "GaussianKernelDeltaKernel"
 8 | 
 9 | var _KDEKernel_index = [...]uint8{0, 14, 25}
10 | 
11 | func (i KDEKernel) String() string {
12 | 	if i < 0 || i+1 >= KDEKernel(len(_KDEKernel_index)) {
13 | 		return fmt.Sprintf("KDEKernel(%d)", i)
14 | 	}
15 | 	return _KDEKernel_name[_KDEKernel_index[i]:_KDEKernel_index[i+1]]
16 | }
17 | 


--------------------------------------------------------------------------------
/internal/go-moremath/mathx/sign.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package mathx
 6 | 
 7 | // Sign returns the sign of x: -1 if x < 0, 0 if x == 0, 1 if x > 0.
 8 | // If x is NaN, it returns NaN.
 9 | func Sign(x float64) float64 {
10 | 	if x == 0 {
11 | 		return 0
12 | 	} else if x < 0 {
13 | 		return -1
14 | 	} else if x > 0 {
15 | 		return 1
16 | 	}
17 | 	return nan
18 | }
19 | 


--------------------------------------------------------------------------------
/internal/go-moremath/scale/package.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | // Package scale provides abstractions for scales that map from one
 6 | // domain to another and provide methods for indicating human-readable
 7 | // intervals in the input domain. The most common type of scale is a
 8 | // quantitative scale, such as a linear or log scale, which is
 9 | // captured by the Quantitative interface.
10 | package scale
11 | 


--------------------------------------------------------------------------------
/internal/go-moremath/stats/kdeboundarymethod_string.go:
--------------------------------------------------------------------------------
 1 | // generated by stringer -type=KDEBoundaryMethod; DO NOT EDIT
 2 | 
 3 | package stats
 4 | 
 5 | import "fmt"
 6 | 
 7 | const _KDEBoundaryMethod_name = "BoundaryReflect"
 8 | 
 9 | var _KDEBoundaryMethod_index = [...]uint8{0, 15}
10 | 
11 | func (i KDEBoundaryMethod) String() string {
12 | 	if i < 0 || i+1 >= KDEBoundaryMethod(len(_KDEBoundaryMethod_index)) {
13 | 		return fmt.Sprintf("KDEBoundaryMethod(%d)", i)
14 | 	}
15 | 	return _KDEBoundaryMethod_name[_KDEBoundaryMethod_index[i]:_KDEBoundaryMethod_index[i+1]]
16 | }
17 | 


--------------------------------------------------------------------------------
/internal/go-moremath/stats/sample_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package stats
 6 | 
 7 | import "testing"
 8 | 
 9 | func TestSamplePercentile(t *testing.T) {
10 | 	s := Sample{Xs: []float64{15, 20, 35, 40, 50}}
11 | 	testFunc(t, "Percentile", s.Percentile, map[float64]float64{
12 | 		-1:  15,
13 | 		0:   15,
14 | 		.05: 15,
15 | 		.30: 19.666666666666666,
16 | 		.40: 27,
17 | 		.95: 50,
18 | 		1:   50,
19 | 		2:   50,
20 | 	})
21 | }
22 | 


--------------------------------------------------------------------------------
/internal/go-moremath/stats/locationhypothesis_string.go:
--------------------------------------------------------------------------------
 1 | // generated by stringer -type LocationHypothesis; DO NOT EDIT
 2 | 
 3 | package stats
 4 | 
 5 | import "fmt"
 6 | 
 7 | const _LocationHypothesis_name = "LocationLessLocationDiffersLocationGreater"
 8 | 
 9 | var _LocationHypothesis_index = [...]uint8{0, 12, 27, 42}
10 | 
11 | func (i LocationHypothesis) String() string {
12 | 	i -= -1
13 | 	if i < 0 || i+1 >= LocationHypothesis(len(_LocationHypothesis_index)) {
14 | 		return fmt.Sprintf("LocationHypothesis(%d)", i+-1)
15 | 	}
16 | 	return _LocationHypothesis_name[_LocationHypothesis_index[i]:_LocationHypothesis_index[i+1]]
17 | }
18 | 


--------------------------------------------------------------------------------
/internal/go-moremath/stats/hypergdist_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package stats
 6 | 
 7 | import (
 8 | 	"fmt"
 9 | 	"testing"
10 | )
11 | 
12 | func TestHypergeometricDist(t *testing.T) {
13 | 	dist1 := HypergeometicDist{N: 50, K: 5, Draws: 10}
14 | 	testFunc(t, fmt.Sprintf("%+v.PMF", dist1), dist1.PMF,
15 | 		map[float64]float64{
16 | 			-0.1: 0,
17 | 			4:    0.003964583058,
18 | 			4.9:  0.003964583058, // Test rounding
19 | 			5:    0.000118937492,
20 | 			5.9:  0.000118937492,
21 | 			6:    0,
22 | 		})
23 | 	testDiscreteCDF(t, fmt.Sprintf("%+v.CDF", dist1), dist1)
24 | }
25 | 


--------------------------------------------------------------------------------
/internal/go-moremath/stats/package.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | // Package stats implements several statistical distributions,
 6 | // hypothesis tests, and functions for descriptive statistics.
 7 | //
 8 | // Currently stats is fairly small, but for what it does implement, it
 9 | // focuses on high quality, fast implementations with good, idiomatic
10 | // Go APIs.
11 | package stats // import "rsc.io/benchstat/internal/go-moremath/stats"
12 | 
13 | import (
14 | 	"errors"
15 | 	"math"
16 | )
17 | 
18 | var inf = math.Inf(1)
19 | var nan = math.NaN()
20 | 
21 | // TODO: Put all errors in the same place and maybe unify them.
22 | 
23 | var (
24 | 	ErrSamplesEqual = errors.New("all samples are equal")
25 | )
26 | 


--------------------------------------------------------------------------------
/internal/go-moremath/mathx/beta_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package mathx
 6 | 
 7 | import (
 8 | 	"testing"
 9 | 
10 | 	. "rsc.io/benchstat/internal/go-moremath/internal/mathtest"
11 | )
12 | 
13 | func TestBetaInc(t *testing.T) {
14 | 	// Example values from MATLAB betainc documentation.
15 | 	WantFunc(t, "I_0.5(%v, 3)",
16 | 		func(a float64) float64 { return BetaInc(0.5, a, 3) },
17 | 		map[float64]float64{
18 | 			0:  1.00000000000000,
19 | 			1:  0.87500000000000,
20 | 			2:  0.68750000000000,
21 | 			3:  0.50000000000000,
22 | 			4:  0.34375000000000,
23 | 			5:  0.22656250000000,
24 | 			6:  0.14453125000000,
25 | 			7:  0.08984375000000,
26 | 			8:  0.05468750000000,
27 | 			9:  0.03271484375000,
28 | 			10: 0.01928710937500})
29 | }
30 | 


--------------------------------------------------------------------------------
/internal/go-moremath/stats/normaldist_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package stats
 6 | 
 7 | import (
 8 | 	"fmt"
 9 | 	"math"
10 | 	"testing"
11 | )
12 | 
13 | func TestNormalDist(t *testing.T) {
14 | 	d := StdNormal
15 | 
16 | 	testFunc(t, fmt.Sprintf("%+v.PDF", d), d.PDF, map[float64]float64{
17 | 		-10000: 0, // approx
18 | 		-1:     1 / math.Sqrt(2*math.Pi) * math.Exp(-0.5),
19 | 		0:      1 / math.Sqrt(2*math.Pi),
20 | 		1:      1 / math.Sqrt(2*math.Pi) * math.Exp(-0.5),
21 | 		10000:  0, // approx
22 | 	})
23 | 
24 | 	testFunc(t, fmt.Sprintf("%+v.CDF", d), d.CDF, map[float64]float64{
25 | 		-10000: 0, // approx
26 | 		0:      0.5,
27 | 		10000:  1, // approx
28 | 	})
29 | 
30 | 	d2 := NormalDist{Mu: 2, Sigma: 5}
31 | 	testInvCDF(t, d, false)
32 | 	testInvCDF(t, d2, false)
33 | }
34 | 


--------------------------------------------------------------------------------
/internal/go-moremath/scale/util.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package scale
 6 | 
 7 | // clamp clamps x to the range [0, 1].
 8 | func clamp(x float64) float64 {
 9 | 	if x < 0 {
10 | 		return 0
11 | 	}
12 | 	if x > 1 {
13 | 		return 1
14 | 	}
15 | 	return x
16 | }
17 | 
18 | // autoScale returns the smallest m for which fn(m) <= n. This is
19 | // intended to be used for auto-scaling tick values, where fn maps
20 | // from a tick "level" to the number of ticks at that level in the
21 | // scale's input range.
22 | //
23 | // fn must be a monotonically decreasing function.
24 | func autoScale(n int, fn func(level int) int, guess int) int {
25 | 	m := guess
26 | 	if fn(m) <= n {
27 | 		for m--; fn(m) <= n; m-- {
28 | 		}
29 | 		return m + 1
30 | 	} else {
31 | 		for m++; fn(m) > n; m++ {
32 | 		}
33 | 		return m
34 | 	}
35 | }
36 | 


--------------------------------------------------------------------------------
/internal/go-moremath/stats/tdist.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package stats
 6 | 
 7 | import (
 8 | 	"math"
 9 | 
10 | 	"rsc.io/benchstat/internal/go-moremath/mathx"
11 | )
12 | 
13 | // A TDist is a Student's t-distribution with V degrees of freedom.
14 | type TDist struct {
15 | 	V float64
16 | }
17 | 
18 | func lgamma(x float64) float64 {
19 | 	y, _ := math.Lgamma(x)
20 | 	return y
21 | }
22 | 
23 | func (t TDist) PDF(x float64) float64 {
24 | 	return math.Exp(lgamma((t.V+1)/2)-lgamma(t.V/2)) /
25 | 		math.Sqrt(t.V*math.Pi) * math.Pow(1+(x*x)/t.V, -(t.V+1)/2)
26 | }
27 | 
28 | func (t TDist) CDF(x float64) float64 {
29 | 	if x == 0 {
30 | 		return 0.5
31 | 	} else if x > 0 {
32 | 		return 1 - 0.5*mathx.BetaInc(t.V/(t.V+x*x), t.V/2, 0.5)
33 | 	} else if x < 0 {
34 | 		return 1 - t.CDF(-x)
35 | 	} else {
36 | 		return math.NaN()
37 | 	}
38 | }
39 | 
40 | func (t TDist) Bounds() (float64, float64) {
41 | 	return -4, 4
42 | }
43 | 


--------------------------------------------------------------------------------
/internal/go-moremath/stats/dist_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package stats
 6 | 
 7 | import (
 8 | 	"fmt"
 9 | 	"testing"
10 | )
11 | 
12 | type funnyCDF struct {
13 | 	left float64
14 | }
15 | 
16 | func (f funnyCDF) CDF(x float64) float64 {
17 | 	switch {
18 | 	case x < f.left:
19 | 		return 0
20 | 	case x < f.left+1:
21 | 		return (x - f.left) / 2
22 | 	case x < f.left+2:
23 | 		return 0.5
24 | 	case x < f.left+3:
25 | 		return (x-f.left-2)/2 + 0.5
26 | 	default:
27 | 		return 1
28 | 	}
29 | }
30 | 
31 | func (f funnyCDF) Bounds() (float64, float64) {
32 | 	return f.left, f.left + 3
33 | }
34 | 
35 | func TestInvCDF(t *testing.T) {
36 | 	for _, f := range []funnyCDF{funnyCDF{1}, funnyCDF{-1.5}, funnyCDF{-4}} {
37 | 		testFunc(t, fmt.Sprintf("InvCDF(funnyCDF%+v)", f), InvCDF(f),
38 | 			map[float64]float64{
39 | 				-0.1: nan,
40 | 				0:    f.left,
41 | 				0.25: f.left + 0.5,
42 | 				0.5:  f.left + 1,
43 | 				0.75: f.left + 2.5,
44 | 				1:    f.left + 3,
45 | 				1.1:  nan,
46 | 			})
47 | 	}
48 | }
49 | 


--------------------------------------------------------------------------------
/internal/go-moremath/stats/linearhist.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package stats
 6 | 
 7 | // LinearHist is a Histogram with uniformly-sized bins.
 8 | type LinearHist struct {
 9 | 	min, max, delta float64
10 | 	low, high       uint
11 | 	bins            []uint
12 | }
13 | 
14 | // NewLinearHist returns an empty histogram with nbins uniformly-sized
15 | // bins spanning [min, max].
16 | func NewLinearHist(min, max float64, nbins int) *LinearHist {
17 | 	delta := float64(nbins) / (max - min)
18 | 	return &LinearHist{min, max, delta, 0, 0, make([]uint, nbins)}
19 | }
20 | 
21 | func (h *LinearHist) bin(x float64) int {
22 | 	return int(h.delta * (x - h.min))
23 | }
24 | 
25 | func (h *LinearHist) Add(x float64) {
26 | 	bin := h.bin(x)
27 | 	if bin < 0 {
28 | 		h.low++
29 | 	} else if bin >= len(h.bins) {
30 | 		h.high++
31 | 	} else {
32 | 		h.bins[bin]++
33 | 	}
34 | }
35 | 
36 | func (h *LinearHist) Counts() (uint, []uint, uint) {
37 | 	return h.low, h.bins, h.high
38 | }
39 | 
40 | func (h *LinearHist) BinToValue(bin float64) float64 {
41 | 	return h.min + bin*h.delta
42 | }
43 | 


--------------------------------------------------------------------------------
/internal/go-moremath/internal/mathtest/mathtest.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package mathtest
 6 | 
 7 | import (
 8 | 	"fmt"
 9 | 	"math"
10 | 	"sort"
11 | 	"strings"
12 | 	"testing"
13 | )
14 | 
15 | // Aeq returns true if expect and got are equal to 8 significant
16 | // figures (1 part in 100 million).
17 | func Aeq(expect, got float64) bool {
18 | 	if expect < 0 && got < 0 {
19 | 		expect, got = -expect, -got
20 | 	}
21 | 	return expect*0.99999999 <= got && got*0.99999999 <= expect
22 | }
23 | 
24 | func WantFunc(t *testing.T, name string, f func(float64) float64, vals map[float64]float64) {
25 | 	xs := make([]float64, 0, len(vals))
26 | 	for x := range vals {
27 | 		xs = append(xs, x)
28 | 	}
29 | 	sort.Float64s(xs)
30 | 
31 | 	for _, x := range xs {
32 | 		want, got := vals[x], f(x)
33 | 		if math.IsNaN(want) && math.IsNaN(got) || Aeq(want, got) {
34 | 			continue
35 | 		}
36 | 		var label string
37 | 		if strings.Contains(name, "%v") {
38 | 			label = fmt.Sprintf(name, x)
39 | 		} else {
40 | 			label = fmt.Sprintf("%s(%v)", name, x)
41 | 		}
42 | 		t.Errorf("want %s=%v, got %v", label, want, got)
43 | 	}
44 | }
45 | 


--------------------------------------------------------------------------------
/internal/go-moremath/stats/deltadist.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package stats
 6 | 
 7 | // DeltaDist is the Dirac delta function, centered at T, with total
 8 | // area 1.
 9 | //
10 | // The CDF of the Dirac delta function is the Heaviside step function,
11 | // centered at T. Specifically, f(T) == 1.
12 | type DeltaDist struct {
13 | 	T float64
14 | }
15 | 
16 | func (d DeltaDist) PDF(x float64) float64 {
17 | 	if x == d.T {
18 | 		return inf
19 | 	}
20 | 	return 0
21 | }
22 | 
23 | func (d DeltaDist) pdfEach(xs []float64) []float64 {
24 | 	res := make([]float64, len(xs))
25 | 	for i, x := range xs {
26 | 		if x == d.T {
27 | 			res[i] = inf
28 | 		}
29 | 	}
30 | 	return res
31 | }
32 | 
33 | func (d DeltaDist) CDF(x float64) float64 {
34 | 	if x >= d.T {
35 | 		return 1
36 | 	}
37 | 	return 0
38 | }
39 | 
40 | func (d DeltaDist) cdfEach(xs []float64) []float64 {
41 | 	res := make([]float64, len(xs))
42 | 	for i, x := range xs {
43 | 		res[i] = d.CDF(x)
44 | 	}
45 | 	return res
46 | }
47 | 
48 | func (d DeltaDist) InvCDF(y float64) float64 {
49 | 	if y < 0 || y > 1 {
50 | 		return nan
51 | 	}
52 | 	return d.T
53 | }
54 | 
55 | func (d DeltaDist) Bounds() (float64, float64) {
56 | 	return d.T - 1, d.T + 1
57 | }
58 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2009 The Go Authors. All rights reserved.
 2 | 
 3 | Redistribution and use in source and binary forms, with or without
 4 | modification, are permitted provided that the following conditions are
 5 | met:
 6 | 
 7 |    * Redistributions of source code must retain the above copyright
 8 | notice, this list of conditions and the following disclaimer.
 9 |    * Redistributions in binary form must reproduce the above
10 | copyright notice, this list of conditions and the following disclaimer
11 | in the documentation and/or other materials provided with the
12 | distribution.
13 |    * Neither the name of Google Inc. nor the names of its
14 | contributors may be used to endorse or promote products derived from
15 | this software without specific prior written permission.
16 | 
17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | 


--------------------------------------------------------------------------------
/internal/go-moremath/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2015 The Go Authors. All rights reserved.
 2 | 
 3 | Redistribution and use in source and binary forms, with or without
 4 | modification, are permitted provided that the following conditions are
 5 | met:
 6 | 
 7 |    * Redistributions of source code must retain the above copyright
 8 | notice, this list of conditions and the following disclaimer.
 9 |    * Redistributions in binary form must reproduce the above
10 | copyright notice, this list of conditions and the following disclaimer
11 | in the documentation and/or other materials provided with the
12 | distribution.
13 |    * Neither the name of Google Inc. nor the names of its
14 | contributors may be used to endorse or promote products derived from
15 | this software without specific prior written permission.
16 | 
17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | 


--------------------------------------------------------------------------------
/internal/go-moremath/mathx/choose.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package mathx
 6 | 
 7 | import "math"
 8 | 
 9 | const smallFactLimit = 20 // 20! => 62 bits
10 | var smallFact [smallFactLimit + 1]int64
11 | 
12 | func init() {
13 | 	smallFact[0] = 1
14 | 	fact := int64(1)
15 | 	for n := int64(1); n <= smallFactLimit; n++ {
16 | 		fact *= n
17 | 		smallFact[n] = fact
18 | 	}
19 | }
20 | 
21 | // Choose returns the binomial coefficient of n and k.
22 | func Choose(n, k int) float64 {
23 | 	if k == 0 || k == n {
24 | 		return 1
25 | 	}
26 | 	if k < 0 || n < k {
27 | 		return 0
28 | 	}
29 | 	if n <= smallFactLimit { // Implies k <= smallFactLimit
30 | 		// It's faster to do several integer multiplications
31 | 		// than it is to do an extra integer division.
32 | 		// Remarkably, this is also faster than pre-computing
33 | 		// Pascal's triangle (presumably because this is very
34 | 		// cache efficient).
35 | 		numer := int64(1)
36 | 		for n1 := int64(n - (k - 1)); n1 <= int64(n); n1++ {
37 | 			numer *= n1
38 | 		}
39 | 		denom := smallFact[k]
40 | 		return float64(numer / denom)
41 | 	}
42 | 
43 | 	return math.Exp(lchoose(n, k))
44 | }
45 | 
46 | // Lchoose returns math.Log(Choose(n, k)).
47 | func Lchoose(n, k int) float64 {
48 | 	if k == 0 || k == n {
49 | 		return 0
50 | 	}
51 | 	if k < 0 || n < k {
52 | 		return math.NaN()
53 | 	}
54 | 	return lchoose(n, k)
55 | }
56 | 
57 | func lchoose(n, k int) float64 {
58 | 	a, _ := math.Lgamma(float64(n + 1))
59 | 	b, _ := math.Lgamma(float64(k + 1))
60 | 	c, _ := math.Lgamma(float64(n - k + 1))
61 | 	return a - b - c
62 | }
63 | 


--------------------------------------------------------------------------------
/internal/go-moremath/stats/util_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package stats
 6 | 
 7 | import (
 8 | 	"fmt"
 9 | 	"testing"
10 | 
11 | 	"rsc.io/benchstat/internal/go-moremath/internal/mathtest"
12 | 	"rsc.io/benchstat/internal/go-moremath/vec"
13 | )
14 | 
15 | var aeq = mathtest.Aeq
16 | var testFunc = mathtest.WantFunc
17 | 
18 | func testDiscreteCDF(t *testing.T, name string, dist DiscreteDist) {
19 | 	// Build the expected CDF out of the PMF.
20 | 	l, h := dist.Bounds()
21 | 	s := dist.Step()
22 | 	want := map[float64]float64{l - 0.1: 0, h: 1}
23 | 	sum := 0.0
24 | 	for x := l; x < h; x += s {
25 | 		sum += dist.PMF(x)
26 | 		want[x] = sum
27 | 		want[x+s/2] = sum
28 | 	}
29 | 
30 | 	testFunc(t, name, dist.CDF, want)
31 | }
32 | 
33 | func testInvCDF(t *testing.T, dist Dist, bounded bool) {
34 | 	inv := InvCDF(dist)
35 | 	name := fmt.Sprintf("InvCDF(%+v)", dist)
36 | 	cdfName := fmt.Sprintf("CDF(%+v)", dist)
37 | 
38 | 	// Test bounds.
39 | 	vals := map[float64]float64{-0.01: nan, 1.01: nan}
40 | 	if !bounded {
41 | 		vals[0] = -inf
42 | 		vals[1] = inf
43 | 	}
44 | 	testFunc(t, name, inv, vals)
45 | 
46 | 	if bounded {
47 | 		lo, hi := inv(0), inv(1)
48 | 		vals := map[float64]float64{
49 | 			lo - 0.01: 0, lo: 0,
50 | 			hi: 1, hi + 0.01: 1,
51 | 		}
52 | 		testFunc(t, cdfName, dist.CDF, vals)
53 | 		if got := dist.CDF(lo + 0.01); !(got > 0) {
54 | 			t.Errorf("%s(0)=%v, but %s(%v)=0", name, lo, cdfName, lo+0.01)
55 | 		}
56 | 		if got := dist.CDF(hi - 0.01); !(got < 1) {
57 | 			t.Errorf("%s(1)=%v, but %s(%v)=1", name, hi, cdfName, hi-0.01)
58 | 		}
59 | 	}
60 | 
61 | 	// Test points between.
62 | 	vals = map[float64]float64{}
63 | 	for _, p := range vec.Linspace(0, 1, 11) {
64 | 		if p == 0 || p == 1 {
65 | 			continue
66 | 		}
67 | 		x := inv(p)
68 | 		vals[x] = x
69 | 	}
70 | 	testFunc(t, fmt.Sprintf("InvCDF(CDF(%+v))", dist),
71 | 		func(x float64) float64 {
72 | 			return inv(dist.CDF(x))
73 | 		},
74 | 		vals)
75 | }
76 | 


--------------------------------------------------------------------------------
/internal/go-moremath/scale/interface.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package scale
 6 | 
 7 | // A Quantative scale is an invertible function from some continuous
 8 | // input range to an output domain of [0, 1].
 9 | type Quantitative interface {
10 | 	// Map maps from a value x in the input range to [0, 1]. If x
11 | 	// is outside the input range and clamping is enabled, x will
12 | 	// first be clamped to the input range.
13 | 	Map(x float64) float64
14 | 
15 | 	// Unmap is the inverse of Map. That is, if x is in the input
16 | 	// range or clamping is disabled, x = Unmap(Map(x)). If
17 | 	// clamping is enabled and y is outside [0,1], the results are
18 | 	// undefined.
19 | 	Unmap(y float64) float64
20 | 
21 | 	// SetClamp sets the clamping mode of this scale.
22 | 	SetClamp(bool)
23 | 
24 | 	// Ticks returns a set of at most n major ticks, plus minor
25 | 	// ticks. These ticks will have "nice" values within the input
26 | 	// range. Both arrays are sorted in ascending order and minor
27 | 	// includes ticks in major.
28 | 	Ticks(n int) (major, minor []float64)
29 | 
30 | 	// Nice expands the input range of this scale to "nice" values
31 | 	// for covering the input range with n major ticks. After
32 | 	// calling Nice(n), the first and last major ticks returned by
33 | 	// Ticks(n) will equal the lower and upper bounds of the input
34 | 	// range.
35 | 	Nice(n int)
36 | }
37 | 
38 | // A QQ maps from a source Quantitative scale to a destination
39 | // Quantitative scale.
40 | type QQ struct {
41 | 	Src, Dest Quantitative
42 | }
43 | 
44 | // Map maps from a value x in the source scale's input range to a
45 | // value y in the destination scale's input range.
46 | func (q QQ) Map(x float64) float64 {
47 | 	return q.Dest.Unmap(q.Src.Map(x))
48 | }
49 | 
50 | // Unmap maps from a value y in the destination scale's input range to
51 | // a value x in the source scale's input range.
52 | func (q QQ) Unmap(x float64) float64 {
53 | 	return q.Src.Unmap(q.Dest.Map(x))
54 | }
55 | 


--------------------------------------------------------------------------------
/internal/go-moremath/vec/vec.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package vec
 6 | 
 7 | import "math"
 8 | 
 9 | // Vectorize returns a function g(xs) that applies f to each x in xs.
10 | //
11 | // f may be evaluated in parallel and in any order.
12 | func Vectorize(f func(float64) float64) func(xs []float64) []float64 {
13 | 	return func(xs []float64) []float64 {
14 | 		return Map(f, xs)
15 | 	}
16 | }
17 | 
18 | // Map returns f(x) for each x in xs.
19 | //
20 | // f may be evaluated in parallel and in any order.
21 | func Map(f func(float64) float64, xs []float64) []float64 {
22 | 	// TODO(austin) Parallelize
23 | 	res := make([]float64, len(xs))
24 | 	for i, x := range xs {
25 | 		res[i] = f(x)
26 | 	}
27 | 	return res
28 | }
29 | 
30 | // Linspace returns num values spaced evenly between lo and hi,
31 | // inclusive. If num is 1, this returns an array consisting of lo.
32 | func Linspace(lo, hi float64, num int) []float64 {
33 | 	res := make([]float64, num)
34 | 	if num == 1 {
35 | 		res[0] = lo
36 | 		return res
37 | 	}
38 | 	for i := 0; i < num; i++ {
39 | 		res[i] = lo + float64(i)*(hi-lo)/float64(num-1)
40 | 	}
41 | 	return res
42 | }
43 | 
44 | // Logspace returns num values spaced evenly on a logarithmic scale
45 | // between base**lo and base**hi, inclusive.
46 | func Logspace(lo, hi float64, num int, base float64) []float64 {
47 | 	res := Linspace(lo, hi, num)
48 | 	for i, x := range res {
49 | 		res[i] = math.Pow(base, x)
50 | 	}
51 | 	return res
52 | }
53 | 
54 | // Sum returns the sum of xs.
55 | func Sum(xs []float64) float64 {
56 | 	sum := 0.0
57 | 	for _, x := range xs {
58 | 		sum += x
59 | 	}
60 | 	return sum
61 | }
62 | 
63 | // Concat returns the concatenation of its arguments. It does not
64 | // modify its inputs.
65 | func Concat(xss ...[]float64) []float64 {
66 | 	total := 0
67 | 	for _, xs := range xss {
68 | 		total += len(xs)
69 | 	}
70 | 	out := make([]float64, total)
71 | 	pos := 0
72 | 	for _, xs := range xss {
73 | 		pos += copy(out[pos:], xs)
74 | 	}
75 | 	return out
76 | }
77 | 


--------------------------------------------------------------------------------
/internal/go-moremath/stats/hist.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package stats
 6 | 
 7 | import "math"
 8 | 
 9 | // TODO: Implement histograms on top of scales.
10 | 
11 | type Histogram interface {
12 | 	// Add adds a sample with value x to histogram h.
13 | 	Add(x float64)
14 | 
15 | 	// Counts returns the number of samples less than the lowest
16 | 	// bin, a slice of the number of samples in each bin,
17 | 	// and the number of samples greater than the highest bin.
18 | 	Counts() (under uint, counts []uint, over uint)
19 | 
20 | 	// BinToValue returns the value that would appear at the given
21 | 	// bin index.
22 | 	//
23 | 	// For integral values of bin, BinToValue returns the lower
24 | 	// bound of bin.  That is, a sample value x will be in bin if
25 | 	// bin is integral and
26 | 	//
27 | 	//    BinToValue(bin) <= x < BinToValue(bin + 1)
28 | 	//
29 | 	// For non-integral values of bin, BinToValue interpolates
30 | 	// between the lower and upper bounds of math.Floor(bin).
31 | 	//
32 | 	// BinToValue is undefined if bin > 1 + the number of bins.
33 | 	BinToValue(bin float64) float64
34 | }
35 | 
36 | // HistogramPercentile returns the x such that n*percentile samples in
37 | // hist are <= x, assuming values are distibuted within each bin
38 | // according to hist's distibution.
39 | //
40 | // If the percentile'th sample falls below the lowest bin or above the
41 | // highest bin, returns NaN.
42 | func HistogramPercentile(hist Histogram, percentile float64) float64 {
43 | 	under, counts, over := hist.Counts()
44 | 	total := under + over
45 | 	for _, count := range counts {
46 | 		total += count
47 | 	}
48 | 
49 | 	goal := uint(float64(total) * percentile)
50 | 	if goal <= under || goal > total-over {
51 | 		return math.NaN()
52 | 	}
53 | 	for bin, count := range counts {
54 | 		if count > goal {
55 | 			return hist.BinToValue(float64(bin) + float64(goal)/float64(count))
56 | 		}
57 | 		goal -= count
58 | 	}
59 | 	panic("goal count not reached")
60 | }
61 | 
62 | // HistogramIQR returns the interquartile range of the samples in
63 | // hist.
64 | func HistogramIQR(hist Histogram) float64 {
65 | 	return HistogramPercentile(hist, 0.75) - HistogramPercentile(hist, 0.25)
66 | }
67 | 


--------------------------------------------------------------------------------
/internal/go-moremath/stats/loghist.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package stats
 6 | 
 7 | import "math"
 8 | 
 9 | // LogHist is a Histogram with logarithmically-spaced bins.
10 | type LogHist struct {
11 | 	b         int
12 | 	m         float64
13 | 	mOverLogb float64
14 | 	low, high uint
15 | 	bins      []uint
16 | }
17 | 
18 | // NewLogHist returns an empty logarithmic histogram with bins for
19 | // integral values of m * log_b(x) up to x = max.
20 | func NewLogHist(b int, m float64, max float64) *LogHist {
21 | 	// TODO(austin) Minimum value as well?  If the samples are
22 | 	// actually integral, having fractional bin boundaries can
23 | 	// mess up smoothing.
24 | 	mOverLogb := m / math.Log(float64(b))
25 | 	nbins := int(math.Ceil(mOverLogb * math.Log(max)))
26 | 	return &LogHist{b: b, m: m, mOverLogb: mOverLogb, low: 0, high: 0, bins: make([]uint, nbins)}
27 | }
28 | 
29 | func (h *LogHist) bin(x float64) int {
30 | 	return int(h.mOverLogb * math.Log(x))
31 | }
32 | 
33 | func (h *LogHist) Add(x float64) {
34 | 	bin := h.bin(x)
35 | 	if bin < 0 {
36 | 		h.low++
37 | 	} else if bin >= len(h.bins) {
38 | 		h.high++
39 | 	} else {
40 | 		h.bins[bin]++
41 | 	}
42 | }
43 | 
44 | func (h *LogHist) Counts() (uint, []uint, uint) {
45 | 	return h.low, h.bins, h.high
46 | }
47 | 
48 | func (h *LogHist) BinToValue(bin float64) float64 {
49 | 	return math.Pow(float64(h.b), bin/h.m)
50 | }
51 | 
52 | func (h *LogHist) At(x float64) float64 {
53 | 	bin := h.bin(x)
54 | 	if bin < 0 || bin >= len(h.bins) {
55 | 		return 0
56 | 	}
57 | 	return float64(h.bins[bin])
58 | }
59 | 
60 | func (h *LogHist) Bounds() (float64, float64) {
61 | 	// XXX Plot will plot this on a linear axis.  Maybe this
62 | 	// should be able to return the natural axis?
63 | 	// Maybe then we could also give it the bins for the tics.
64 | 	lowbin := 0
65 | 	if h.low == 0 {
66 | 		for bin, count := range h.bins {
67 | 			if count > 0 {
68 | 				lowbin = bin
69 | 				break
70 | 			}
71 | 		}
72 | 	}
73 | 	highbin := len(h.bins)
74 | 	if h.high == 0 {
75 | 		for bin := range h.bins {
76 | 			if h.bins[len(h.bins)-bin-1] > 0 {
77 | 				highbin = len(h.bins) - bin
78 | 				break
79 | 			}
80 | 		}
81 | 	}
82 | 	return h.BinToValue(float64(lowbin)), h.BinToValue(float64(highbin))
83 | }
84 | 


--------------------------------------------------------------------------------
/internal/go-moremath/stats/kde_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package stats
 6 | 
 7 | import (
 8 | 	"fmt"
 9 | 	"testing"
10 | )
11 | 
12 | func TestKDEOneSample(t *testing.T) {
13 | 	x := float64(5)
14 | 
15 | 	// Unweighted, fixed bandwidth
16 | 	kde := KDE{
17 | 		Sample:    Sample{Xs: []float64{x}},
18 | 		Kernel:    GaussianKernel,
19 | 		Bandwidth: 1,
20 | 	}
21 | 	if e, g := StdNormal.PDF(0), kde.PDF(x); !aeq(e, g) {
22 | 		t.Errorf("bad PDF value at sample: expected %g, got %g", e, g)
23 | 	}
24 | 	if e, g := 0.0, kde.PDF(-10000); !aeq(e, g) {
25 | 		t.Errorf("bad PDF value at low tail: expected %g, got %g", e, g)
26 | 	}
27 | 	if e, g := 0.0, kde.PDF(10000); !aeq(e, g) {
28 | 		t.Errorf("bad PDF value at high tail: expected %g, got %g", e, g)
29 | 	}
30 | 
31 | 	if e, g := 0.5, kde.CDF(x); !aeq(e, g) {
32 | 		t.Errorf("bad CDF value at sample: expected %g, got %g", e, g)
33 | 	}
34 | 	if e, g := 0.0, kde.CDF(-10000); !aeq(e, g) {
35 | 		t.Errorf("bad CDF value at low tail: expected %g, got %g", e, g)
36 | 	}
37 | 	if e, g := 1.0, kde.CDF(10000); !aeq(e, g) {
38 | 		t.Errorf("bad CDF value at high tail: expected %g, got %g", e, g)
39 | 	}
40 | 
41 | 	low, high := kde.Bounds()
42 | 	if e, g := x-2, low; e < g {
43 | 		t.Errorf("bad low bound: expected %g, got %g", e, g)
44 | 	}
45 | 	if e, g := x+2, high; e > g {
46 | 		t.Errorf("bad high bound: expected %g, got %g", e, g)
47 | 	}
48 | 
49 | 	kde = KDE{
50 | 		Sample:    Sample{Xs: []float64{x}},
51 | 		Kernel:    EpanechnikovKernel,
52 | 		Bandwidth: 2,
53 | 	}
54 | 	testFunc(t, fmt.Sprintf("%+v.PDF", kde), kde.PDF, map[float64]float64{
55 | 		x - 2: 0,
56 | 		x - 1: 0.5625 / 2,
57 | 		x:     0.75 / 2,
58 | 		x + 1: 0.5625 / 2,
59 | 		x + 2: 0,
60 | 	})
61 | 	testFunc(t, fmt.Sprintf("%+v.CDF", kde), kde.CDF, map[float64]float64{
62 | 		x - 2: 0,
63 | 		x - 1: 0.15625,
64 | 		x:     0.5,
65 | 		x + 1: 0.84375,
66 | 		x + 2: 1,
67 | 	})
68 | }
69 | 
70 | func TestKDETwoSamples(t *testing.T) {
71 | 	kde := KDE{
72 | 		Sample:    Sample{Xs: []float64{1, 3}},
73 | 		Kernel:    GaussianKernel,
74 | 		Bandwidth: 2,
75 | 	}
76 | 	testFunc(t, "PDF", kde.PDF, map[float64]float64{
77 | 		0: 0.120395730,
78 | 		1: 0.160228251,
79 | 		2: 0.176032663,
80 | 		3: 0.160228251,
81 | 		4: 0.120395730})
82 | 
83 | 	testFunc(t, "CDF", kde.CDF, map[float64]float64{
84 | 		0: 0.187672369,
85 | 		1: 0.329327626,
86 | 		2: 0.5,
87 | 		3: 0.670672373,
88 | 		4: 0.812327630})
89 | }
90 | 


--------------------------------------------------------------------------------
/internal/go-moremath/stats/ttest_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package stats
 6 | 
 7 | import "testing"
 8 | 
 9 | func TestTTest(t *testing.T) {
10 | 	s1 := Sample{Xs: []float64{2, 1, 3, 4}}
11 | 	s2 := Sample{Xs: []float64{6, 5, 7, 9}}
12 | 
13 | 	check := func(want, got *TTestResult) {
14 | 		if want.N1 != got.N1 || want.N2 != got.N2 ||
15 | 			!aeq(want.T, got.T) || !aeq(want.DoF, got.DoF) ||
16 | 			want.AltHypothesis != got.AltHypothesis ||
17 | 			!aeq(want.P, got.P) {
18 | 			t.Errorf("want %+v, got %+v", want, got)
19 | 		}
20 | 	}
21 | 	check3 := func(test func(alt LocationHypothesis) (*TTestResult, error), n1, n2 int, t, dof float64, pless, pdiff, pgreater float64) {
22 | 		want := &TTestResult{N1: n1, N2: n2, T: t, DoF: dof}
23 | 
24 | 		want.AltHypothesis = LocationLess
25 | 		want.P = pless
26 | 		got, _ := test(want.AltHypothesis)
27 | 		check(want, got)
28 | 
29 | 		want.AltHypothesis = LocationDiffers
30 | 		want.P = pdiff
31 | 		got, _ = test(want.AltHypothesis)
32 | 		check(want, got)
33 | 
34 | 		want.AltHypothesis = LocationGreater
35 | 		want.P = pgreater
36 | 		got, _ = test(want.AltHypothesis)
37 | 		check(want, got)
38 | 	}
39 | 
40 | 	check3(func(alt LocationHypothesis) (*TTestResult, error) {
41 | 		return TwoSampleTTest(s1, s1, alt)
42 | 	}, 4, 4, 0, 6,
43 | 		0.5, 1, 0.5)
44 | 	check3(func(alt LocationHypothesis) (*TTestResult, error) {
45 | 		return TwoSampleWelchTTest(s1, s1, alt)
46 | 	}, 4, 4, 0, 6,
47 | 		0.5, 1, 0.5)
48 | 
49 | 	check3(func(alt LocationHypothesis) (*TTestResult, error) {
50 | 		return TwoSampleTTest(s1, s2, alt)
51 | 	}, 4, 4, -3.9703446152237674, 6,
52 | 		0.0036820296121056195, 0.0073640592242113214, 0.9963179703878944)
53 | 	check3(func(alt LocationHypothesis) (*TTestResult, error) {
54 | 		return TwoSampleWelchTTest(s1, s2, alt)
55 | 	}, 4, 4, -3.9703446152237674, 5.584615384615385,
56 | 		0.004256431565689112, 0.0085128631313781695, 0.9957435684343109)
57 | 
58 | 	check3(func(alt LocationHypothesis) (*TTestResult, error) {
59 | 		return PairedTTest(s1.Xs, s2.Xs, 0, alt)
60 | 	}, 4, 4, -17, 3,
61 | 		0.0002216717691559955, 0.00044334353831207749, 0.999778328230844)
62 | 
63 | 	check3(func(alt LocationHypothesis) (*TTestResult, error) {
64 | 		return OneSampleTTest(s1, 0, alt)
65 | 	}, 4, 0, 3.872983346207417, 3,
66 | 		0.9847668541689145, 0.030466291662170977, 0.015233145831085482)
67 | 	check3(func(alt LocationHypothesis) (*TTestResult, error) {
68 | 		return OneSampleTTest(s1, 2.5, alt)
69 | 	}, 4, 0, 0, 3,
70 | 		0.5, 1, 0.5)
71 | }
72 | 


--------------------------------------------------------------------------------
/internal/go-moremath/mathx/beta.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package mathx
 6 | 
 7 | import "math"
 8 | 
 9 | func lgamma(x float64) float64 {
10 | 	y, _ := math.Lgamma(x)
11 | 	return y
12 | }
13 | 
14 | // Beta returns the value of the complete beta function B(a, b).
15 | func Beta(a, b float64) float64 {
16 | 	// B(x,y) = Γ(x)Γ(y) / Γ(x+y)
17 | 	return math.Exp(lgamma(a) + lgamma(b) - lgamma(a+b))
18 | }
19 | 
20 | // BetaInc returns the value of the regularized incomplete beta
21 | // function Iₓ(a, b).
22 | //
23 | // This is not to be confused with the "incomplete beta function",
24 | // which can be computed as BetaInc(x, a, b)*Beta(a, b).
25 | //
26 | // If x < 0 or x > 1, returns NaN.
27 | func BetaInc(x, a, b float64) float64 {
28 | 	// Based on Numerical Recipes in C, section 6.4. This uses the
29 | 	// continued fraction definition of I:
30 | 	//
31 | 	//  (xᵃ*(1-x)ᵇ)/(a*B(a,b)) * (1/(1+(d₁/(1+(d₂/(1+...))))))
32 | 	//
33 | 	// where B(a,b) is the beta function and
34 | 	//
35 | 	//  d_{2m+1} = -(a+m)(a+b+m)x/((a+2m)(a+2m+1))
36 | 	//  d_{2m}   = m(b-m)x/((a+2m-1)(a+2m))
37 | 	if x < 0 || x > 1 {
38 | 		return math.NaN()
39 | 	}
40 | 	bt := 0.0
41 | 	if 0 < x && x < 1 {
42 | 		// Compute the coefficient before the continued
43 | 		// fraction.
44 | 		bt = math.Exp(lgamma(a+b) - lgamma(a) - lgamma(b) +
45 | 			a*math.Log(x) + b*math.Log(1-x))
46 | 	}
47 | 	if x < (a+1)/(a+b+2) {
48 | 		// Compute continued fraction directly.
49 | 		return bt * betacf(x, a, b) / a
50 | 	} else {
51 | 		// Compute continued fraction after symmetry transform.
52 | 		return 1 - bt*betacf(1-x, b, a)/b
53 | 	}
54 | }
55 | 
56 | // betacf is the continued fraction component of the regularized
57 | // incomplete beta function Iₓ(a, b).
58 | func betacf(x, a, b float64) float64 {
59 | 	const maxIterations = 200
60 | 	const epsilon = 3e-14
61 | 
62 | 	raiseZero := func(z float64) float64 {
63 | 		if math.Abs(z) < math.SmallestNonzeroFloat64 {
64 | 			return math.SmallestNonzeroFloat64
65 | 		}
66 | 		return z
67 | 	}
68 | 
69 | 	c := 1.0
70 | 	d := 1 / raiseZero(1-(a+b)*x/(a+1))
71 | 	h := d
72 | 	for m := 1; m <= maxIterations; m++ {
73 | 		mf := float64(m)
74 | 
75 | 		// Even step of the recurrence.
76 | 		numer := mf * (b - mf) * x / ((a + 2*mf - 1) * (a + 2*mf))
77 | 		d = 1 / raiseZero(1+numer*d)
78 | 		c = raiseZero(1 + numer/c)
79 | 		h *= d * c
80 | 
81 | 		// Odd step of the recurrence.
82 | 		numer = -(a + mf) * (a + b + mf) * x / ((a + 2*mf) * (a + 2*mf + 1))
83 | 		d = 1 / raiseZero(1+numer*d)
84 | 		c = raiseZero(1 + numer/c)
85 | 		hfac := d * c
86 | 		h *= hfac
87 | 
88 | 		if math.Abs(hfac-1) < epsilon {
89 | 			return h
90 | 		}
91 | 	}
92 | 	panic("betainc: a or b too big; failed to converge")
93 | }
94 | 


--------------------------------------------------------------------------------
/internal/go-moremath/stats/utest_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package stats
 6 | 
 7 | import "testing"
 8 | 
 9 | func TestMannWhitneyUTest(t *testing.T) {
10 | 	check := func(want, got *MannWhitneyUTestResult) {
11 | 		if want.N1 != got.N1 || want.N2 != got.N2 ||
12 | 			!aeq(want.U, got.U) ||
13 | 			want.AltHypothesis != got.AltHypothesis ||
14 | 			!aeq(want.P, got.P) {
15 | 			t.Errorf("want %+v, got %+v", want, got)
16 | 		}
17 | 	}
18 | 	check3 := func(x1, x2 []float64, U float64, pless, pdiff, pgreater float64) {
19 | 		want := &MannWhitneyUTestResult{N1: len(x1), N2: len(x2), U: U}
20 | 
21 | 		want.AltHypothesis = LocationLess
22 | 		want.P = pless
23 | 		got, _ := MannWhitneyUTest(x1, x2, want.AltHypothesis)
24 | 		check(want, got)
25 | 
26 | 		want.AltHypothesis = LocationDiffers
27 | 		want.P = pdiff
28 | 		got, _ = MannWhitneyUTest(x1, x2, want.AltHypothesis)
29 | 		check(want, got)
30 | 
31 | 		want.AltHypothesis = LocationGreater
32 | 		want.P = pgreater
33 | 		got, _ = MannWhitneyUTest(x1, x2, want.AltHypothesis)
34 | 		check(want, got)
35 | 	}
36 | 
37 | 	s1 := []float64{2, 1, 3, 5}
38 | 	s2 := []float64{12, 11, 13, 15}
39 | 	s3 := []float64{0, 4, 6, 7} // Interleaved with s1, but no ties
40 | 	s4 := []float64{2, 2, 2, 2}
41 | 	s5 := []float64{1, 1, 1, 1, 1}
42 | 
43 | 	// Small sample, no ties
44 | 	check3(s1, s2, 0, 0.014285714285714289, 0.028571428571428577, 1)
45 | 	check3(s2, s1, 16, 1, 0.028571428571428577, 0.014285714285714289)
46 | 	check3(s1, s3, 5, 0.24285714285714288, 0.485714285714285770, 0.8285714285714285)
47 | 
48 | 	// Small sample, ties
49 | 	// TODO: Check these against some other implementation.
50 | 	check3(s1, s1, 8, 0.6285714285714286, 1, 0.6285714285714286)
51 | 	check3(s1, s4, 10, 0.8571428571428571, 0.7142857142857143, 0.3571428571428571)
52 | 	check3(s1, s5, 17.5, 1, 0, 0.04761904761904767)
53 | 
54 | 	r, err := MannWhitneyUTest(s4, s4, LocationDiffers)
55 | 	if err != ErrSamplesEqual {
56 | 		t.Errorf("want ErrSamplesEqual, got %+v, %+v", r, err)
57 | 	}
58 | 
59 | 	// Large samples.
60 | 	l1 := make([]float64, 500)
61 | 	for i := range l1 {
62 | 		l1[i] = float64(i * 2)
63 | 	}
64 | 	l2 := make([]float64, 600)
65 | 	for i := range l2 {
66 | 		l2[i] = float64(i*2 - 41)
67 | 	}
68 | 	l3 := append([]float64{}, l2...)
69 | 	for i := 0; i < 30; i++ {
70 | 		l3[i] = l1[i]
71 | 	}
72 | 	// For comparing with R's wilcox.test:
73 | 	// l1 <- seq(0, 499)*2
74 | 	// l2 <- seq(0,599)*2-41
75 | 	// l3 <- l2; for (i in 1:30) { l3[i] = l1[i] }
76 | 
77 | 	check3(l1, l2, 135250, 0.0024667680407086112, 0.0049335360814172224, 0.9975346930458906)
78 | 	check3(l1, l1, 125000, 0.5000436801680628, 1, 0.5000436801680628)
79 | 	check3(l1, l3, 134845, 0.0019351907119808942, 0.0038703814239617884, 0.9980659818257166)
80 | }
81 | 


--------------------------------------------------------------------------------
/internal/go-moremath/stats/stream.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2015 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | package stats
  6 | 
  7 | import (
  8 | 	"fmt"
  9 | 	"math"
 10 | )
 11 | 
 12 | // TODO(austin) Unify more with Sample interface
 13 | 
 14 | // StreamStats tracks basic statistics for a stream of data in O(1)
 15 | // space.
 16 | //
 17 | // StreamStats should be initialized to its zero value.
 18 | type StreamStats struct {
 19 | 	Count           uint
 20 | 	Total, Min, Max float64
 21 | 
 22 | 	// Numerically stable online mean
 23 | 	mean          float64
 24 | 	meanOfSquares float64
 25 | 
 26 | 	// Online variance
 27 | 	vM2 float64
 28 | }
 29 | 
 30 | // Add updates s's statistics with sample value x.
 31 | func (s *StreamStats) Add(x float64) {
 32 | 	s.Total += x
 33 | 	if s.Count == 0 {
 34 | 		s.Min, s.Max = x, x
 35 | 	} else {
 36 | 		if x < s.Min {
 37 | 			s.Min = x
 38 | 		}
 39 | 		if x > s.Max {
 40 | 			s.Max = x
 41 | 		}
 42 | 	}
 43 | 	s.Count++
 44 | 
 45 | 	// Update online mean, mean of squares, and variance.  Online
 46 | 	// variance based on Wikipedia's presentation ("Algorithms for
 47 | 	// calculating variance") of Knuth's formulation of Welford
 48 | 	// 1962.
 49 | 	delta := x - s.mean
 50 | 	s.mean += delta / float64(s.Count)
 51 | 	s.meanOfSquares += (x*x - s.meanOfSquares) / float64(s.Count)
 52 | 	s.vM2 += delta * (x - s.mean)
 53 | }
 54 | 
 55 | func (s *StreamStats) Weight() float64 {
 56 | 	return float64(s.Count)
 57 | }
 58 | 
 59 | func (s *StreamStats) Mean() float64 {
 60 | 	return s.mean
 61 | }
 62 | 
 63 | func (s *StreamStats) Variance() float64 {
 64 | 	return s.vM2 / float64(s.Count-1)
 65 | }
 66 | 
 67 | func (s *StreamStats) StdDev() float64 {
 68 | 	return math.Sqrt(s.Variance())
 69 | }
 70 | 
 71 | func (s *StreamStats) RMS() float64 {
 72 | 	return math.Sqrt(s.meanOfSquares)
 73 | }
 74 | 
 75 | // Combine updates s's statistics as if all samples added to o were
 76 | // added to s.
 77 | func (s *StreamStats) Combine(o *StreamStats) {
 78 | 	count := s.Count + o.Count
 79 | 
 80 | 	// Compute combined online variance statistics
 81 | 	delta := o.mean - s.mean
 82 | 	mean := s.mean + delta*float64(o.Count)/float64(count)
 83 | 	vM2 := s.vM2 + o.vM2 + delta*delta*float64(s.Count)*float64(o.Count)/float64(count)
 84 | 
 85 | 	s.Count = count
 86 | 	s.Total += o.Total
 87 | 	if o.Min < s.Min {
 88 | 		s.Min = o.Min
 89 | 	}
 90 | 	if o.Max > s.Max {
 91 | 		s.Max = o.Max
 92 | 	}
 93 | 	s.mean = mean
 94 | 	s.meanOfSquares += (o.meanOfSquares - s.meanOfSquares) * float64(o.Count) / float64(count)
 95 | 	s.vM2 = vM2
 96 | }
 97 | 
 98 | func (s *StreamStats) String() string {
 99 | 	return fmt.Sprintf("Count=%d Total=%g Min=%g Mean=%g RMS=%g Max=%g StdDev=%g", s.Count, s.Total, s.Min, s.Mean(), s.RMS(), s.Max, s.StdDev())
100 | }
101 | 


--------------------------------------------------------------------------------
/internal/go-moremath/stats/alg.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2015 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | package stats
  6 | 
  7 | // Miscellaneous helper algorithms
  8 | 
  9 | import (
 10 | 	"fmt"
 11 | 
 12 | 	"rsc.io/benchstat/internal/go-moremath/mathx"
 13 | )
 14 | 
 15 | func maxint(a, b int) int {
 16 | 	if a > b {
 17 | 		return a
 18 | 	}
 19 | 	return b
 20 | }
 21 | 
 22 | func minint(a, b int) int {
 23 | 	if a < b {
 24 | 		return a
 25 | 	}
 26 | 	return b
 27 | }
 28 | 
 29 | func sumint(xs []int) int {
 30 | 	sum := 0
 31 | 	for _, x := range xs {
 32 | 		sum += x
 33 | 	}
 34 | 	return sum
 35 | }
 36 | 
 37 | // bisect returns an x in [low, high] such that |f(x)| <= tolerance
 38 | // using the bisection method.
 39 | //
 40 | // f(low) and f(high) must have opposite signs.
 41 | //
 42 | // If f does not have a root in this interval (e.g., it is
 43 | // discontiguous), this returns the X of the apparent discontinuity
 44 | // and false.
 45 | func bisect(f func(float64) float64, low, high, tolerance float64) (float64, bool) {
 46 | 	flow, fhigh := f(low), f(high)
 47 | 	if -tolerance <= flow && flow <= tolerance {
 48 | 		return low, true
 49 | 	}
 50 | 	if -tolerance <= fhigh && fhigh <= tolerance {
 51 | 		return high, true
 52 | 	}
 53 | 	if mathx.Sign(flow) == mathx.Sign(fhigh) {
 54 | 		panic(fmt.Sprintf("root of f is not bracketed by [low, high]; f(%g)=%g f(%g)=%g", low, flow, high, fhigh))
 55 | 	}
 56 | 	for {
 57 | 		mid := (high + low) / 2
 58 | 		fmid := f(mid)
 59 | 		if -tolerance <= fmid && fmid <= tolerance {
 60 | 			return mid, true
 61 | 		}
 62 | 		if mid == high || mid == low {
 63 | 			return mid, false
 64 | 		}
 65 | 		if mathx.Sign(fmid) == mathx.Sign(flow) {
 66 | 			low = mid
 67 | 			flow = fmid
 68 | 		} else {
 69 | 			high = mid
 70 | 			fhigh = fmid
 71 | 		}
 72 | 	}
 73 | }
 74 | 
 75 | // bisectBool implements the bisection method on a boolean function.
 76 | // It returns x1, x2 ∈ [low, high], x1 < x2 such that f(x1) != f(x2)
 77 | // and x2 - x1 <= xtol.
 78 | //
 79 | // If f(low) == f(high), it panics.
 80 | func bisectBool(f func(float64) bool, low, high, xtol float64) (x1, x2 float64) {
 81 | 	flow, fhigh := f(low), f(high)
 82 | 	if flow == fhigh {
 83 | 		panic(fmt.Sprintf("root of f is not bracketed by [low, high]; f(%g)=%v f(%g)=%v", low, flow, high, fhigh))
 84 | 	}
 85 | 	for {
 86 | 		if high-low <= xtol {
 87 | 			return low, high
 88 | 		}
 89 | 		mid := (high + low) / 2
 90 | 		if mid == high || mid == low {
 91 | 			return low, high
 92 | 		}
 93 | 		fmid := f(mid)
 94 | 		if fmid == flow {
 95 | 			low = mid
 96 | 			flow = fmid
 97 | 		} else {
 98 | 			high = mid
 99 | 			fhigh = fmid
100 | 		}
101 | 	}
102 | }
103 | 
104 | // series returns the sum of the series f(0), f(1), ...
105 | //
106 | // This implementation is fast, but subject to round-off error.
107 | func series(f func(float64) float64) float64 {
108 | 	y, yp := 0.0, 1.0
109 | 	for n := 0.0; y != yp; n++ {
110 | 		yp = y
111 | 		y += f(n)
112 | 	}
113 | 	return y
114 | }
115 | 


--------------------------------------------------------------------------------
/internal/go-moremath/stats/hypergdist.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2015 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | package stats
  6 | 
  7 | import (
  8 | 	"math"
  9 | 
 10 | 	"rsc.io/benchstat/internal/go-moremath/mathx"
 11 | )
 12 | 
 13 | // HypergeometicDist is a hypergeometric distribution.
 14 | type HypergeometicDist struct {
 15 | 	// N is the size of the population. N >= 0.
 16 | 	N int
 17 | 
 18 | 	// K is the number of successes in the population. 0 <= K <= N.
 19 | 	K int
 20 | 
 21 | 	// Draws is the number of draws from the population. This is
 22 | 	// usually written "n", but is called Draws here because of
 23 | 	// limitations on Go identifier naming. 0 <= Draws <= N.
 24 | 	Draws int
 25 | }
 26 | 
 27 | // PMF is the probability of getting exactly int(k) successes in
 28 | // d.Draws draws with replacement from a population of size d.N that
 29 | // contains exactly d.K successes.
 30 | func (d HypergeometicDist) PMF(k float64) float64 {
 31 | 	ki := int(math.Floor(k))
 32 | 	l, h := d.bounds()
 33 | 	if ki < l || ki > h {
 34 | 		return 0
 35 | 	}
 36 | 	return d.pmf(ki)
 37 | }
 38 | 
 39 | func (d HypergeometicDist) pmf(k int) float64 {
 40 | 	return math.Exp(mathx.Lchoose(d.K, k) + mathx.Lchoose(d.N-d.K, d.Draws-k) - mathx.Lchoose(d.N, d.Draws))
 41 | }
 42 | 
 43 | // CDF is the probability of getting int(k) or fewer successes in
 44 | // d.Draws draws with replacement from a population of size d.N that
 45 | // contains exactly d.K successes.
 46 | func (d HypergeometicDist) CDF(k float64) float64 {
 47 | 	// Based on Klotz, A Computational Approach to Statistics.
 48 | 	ki := int(math.Floor(k))
 49 | 	l, h := d.bounds()
 50 | 	if ki < l {
 51 | 		return 0
 52 | 	} else if ki >= h {
 53 | 		return 1
 54 | 	}
 55 | 	// Use symmetry to compute the smaller sum.
 56 | 	flip := false
 57 | 	if ki > (d.Draws+1)/(d.N+1)*(d.K+1) {
 58 | 		flip = true
 59 | 		ki = d.K - ki - 1
 60 | 		d.Draws = d.N - d.Draws
 61 | 	}
 62 | 	p := d.pmf(ki) * d.sum(ki)
 63 | 	if flip {
 64 | 		p = 1 - p
 65 | 	}
 66 | 	return p
 67 | }
 68 | 
 69 | func (d HypergeometicDist) sum(k int) float64 {
 70 | 	const epsilon = 1e-14
 71 | 	sum, ak := 1.0, 1.0
 72 | 	L := maxint(0, d.Draws+d.K-d.N)
 73 | 	for dk := 1; dk <= k-L && ak/sum > epsilon; dk++ {
 74 | 		ak *= float64(1+k-dk) / float64(d.Draws-k+dk)
 75 | 		ak *= float64(d.N-d.K-d.Draws+k+1-dk) / float64(d.K-k+dk)
 76 | 		sum += ak
 77 | 	}
 78 | 	return sum
 79 | }
 80 | 
 81 | func (d HypergeometicDist) bounds() (int, int) {
 82 | 	return maxint(0, d.Draws+d.K-d.N), minint(d.Draws, d.K)
 83 | }
 84 | 
 85 | func (d HypergeometicDist) Bounds() (float64, float64) {
 86 | 	l, h := d.bounds()
 87 | 	return float64(l), float64(h)
 88 | }
 89 | 
 90 | func (d HypergeometicDist) Step() float64 {
 91 | 	return 1
 92 | }
 93 | 
 94 | func (d HypergeometicDist) Mean() float64 {
 95 | 	return float64(d.Draws*d.K) / float64(d.N)
 96 | }
 97 | 
 98 | func (d HypergeometicDist) Variance() float64 {
 99 | 	return float64(d.Draws*d.K*(d.N-d.K)*(d.N-d.Draws)) /
100 | 		float64(d.N*d.N*(d.N-1))
101 | }
102 | 


--------------------------------------------------------------------------------
/internal/go-moremath/stats/tdist_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package stats
 6 | 
 7 | import "testing"
 8 | 
 9 | func TestT(t *testing.T) {
10 | 	testFunc(t, "PDF(%v|v=1)", TDist{1}.PDF, map[float64]float64{
11 | 		-10: 0.0031515830315226806,
12 | 		-9:  0.0038818278802901312,
13 | 		-8:  0.0048970751720583188,
14 | 		-7:  0.0063661977236758151,
15 | 		-6:  0.0086029698968592104,
16 | 		-5:  0.012242687930145799,
17 | 		-4:  0.018724110951987692,
18 | 		-3:  0.031830988618379075,
19 | 		-2:  0.063661977236758149,
20 | 		-1:  0.15915494309189537,
21 | 		0:   0.31830988618379075,
22 | 		1:   0.15915494309189537,
23 | 		2:   0.063661977236758149,
24 | 		3:   0.031830988618379075,
25 | 		4:   0.018724110951987692,
26 | 		5:   0.012242687930145799,
27 | 		6:   0.0086029698968592104,
28 | 		7:   0.0063661977236758151,
29 | 		8:   0.0048970751720583188,
30 | 		9:   0.0038818278802901312})
31 | 	testFunc(t, "PDF(%v|v=5)", TDist{5}.PDF, map[float64]float64{
32 | 		-10: 4.0989816415343313e-05,
33 | 		-9:  7.4601664362590413e-05,
34 | 		-8:  0.00014444303269563934,
35 | 		-7:  0.00030134402928803911,
36 | 		-6:  0.00068848154013743002,
37 | 		-5:  0.0017574383788078445,
38 | 		-4:  0.0051237270519179133,
39 | 		-3:  0.017292578800222964,
40 | 		-2:  0.065090310326216455,
41 | 		-1:  0.21967979735098059,
42 | 		0:   0.3796066898224944,
43 | 		1:   0.21967979735098059,
44 | 		2:   0.065090310326216455,
45 | 		3:   0.017292578800222964,
46 | 		4:   0.0051237270519179133,
47 | 		5:   0.0017574383788078445,
48 | 		6:   0.00068848154013743002,
49 | 		7:   0.00030134402928803911,
50 | 		8:   0.00014444303269563934,
51 | 		9:   7.4601664362590413e-05})
52 | 
53 | 	testFunc(t, "CDF(%v|v=1)", TDist{1}.CDF, map[float64]float64{
54 | 		-10: 0.03172551743055356,
55 | 		-9:  0.035223287477277272,
56 | 		-8:  0.039583424160565539,
57 | 		-7:  0.045167235300866547,
58 | 		-6:  0.052568456711253424,
59 | 		-5:  0.06283295818900117,
60 | 		-4:  0.077979130377369324,
61 | 		-3:  0.10241638234956672,
62 | 		-2:  0.14758361765043321,
63 | 		-1:  0.24999999999999978,
64 | 		0:   0.5,
65 | 		1:   0.75000000000000022,
66 | 		2:   0.85241638234956674,
67 | 		3:   0.89758361765043326,
68 | 		4:   0.92202086962263075,
69 | 		5:   0.93716704181099886,
70 | 		6:   0.94743154328874657,
71 | 		7:   0.95483276469913347,
72 | 		8:   0.96041657583943452,
73 | 		9:   0.96477671252272279})
74 | 	testFunc(t, "CDF(%v|v=5)", TDist{5}.CDF, map[float64]float64{
75 | 		-10: 8.5473787871481787e-05,
76 | 		-9:  0.00014133998712194845,
77 | 		-8:  0.00024645333028622187,
78 | 		-7:  0.00045837375719920225,
79 | 		-6:  0.00092306914479700695,
80 | 		-5:  0.0020523579900266612,
81 | 		-4:  0.0051617077404157259,
82 | 		-3:  0.015049623948731284,
83 | 		-2:  0.05096973941492914,
84 | 		-1:  0.18160873382456127,
85 | 		0:   0.5,
86 | 		1:   0.81839126617543867,
87 | 		2:   0.9490302605850709,
88 | 		3:   0.98495037605126878,
89 | 		4:   0.99483829225958431,
90 | 		5:   0.99794764200997332,
91 | 		6:   0.99907693085520299,
92 | 		7:   0.99954162624280074,
93 | 		8:   0.99975354666971372,
94 | 		9:   0.9998586600128780})
95 | }
96 | 


--------------------------------------------------------------------------------
/internal/go-moremath/stats/normaldist.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2015 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | package stats
  6 | 
  7 | import (
  8 | 	"math"
  9 | 	"math/rand"
 10 | )
 11 | 
 12 | // NormalDist is a normal (Gaussian) distribution with mean Mu and
 13 | // standard deviation Sigma.
 14 | type NormalDist struct {
 15 | 	Mu, Sigma float64
 16 | }
 17 | 
 18 | // StdNormal is the standard normal distribution (Mu = 0, Sigma = 1)
 19 | var StdNormal = NormalDist{0, 1}
 20 | 
 21 | // 1/sqrt(2 * pi)
 22 | const invSqrt2Pi = 0.39894228040143267793994605993438186847585863116493465766592583
 23 | 
 24 | func (n NormalDist) PDF(x float64) float64 {
 25 | 	z := x - n.Mu
 26 | 	return math.Exp(-z*z/(2*n.Sigma*n.Sigma)) * invSqrt2Pi / n.Sigma
 27 | }
 28 | 
 29 | func (n NormalDist) pdfEach(xs []float64) []float64 {
 30 | 	res := make([]float64, len(xs))
 31 | 	if n.Mu == 0 && n.Sigma == 1 {
 32 | 		// Standard normal fast path
 33 | 		for i, x := range xs {
 34 | 			res[i] = math.Exp(-x*x/2) * invSqrt2Pi
 35 | 		}
 36 | 	} else {
 37 | 		a := -1 / (2 * n.Sigma * n.Sigma)
 38 | 		b := invSqrt2Pi / n.Sigma
 39 | 		for i, x := range xs {
 40 | 			z := x - n.Mu
 41 | 			res[i] = math.Exp(z*z*a) * b
 42 | 		}
 43 | 	}
 44 | 	return res
 45 | }
 46 | 
 47 | func (n NormalDist) CDF(x float64) float64 {
 48 | 	return math.Erfc(-(x-n.Mu)/(n.Sigma*math.Sqrt2)) / 2
 49 | }
 50 | 
 51 | func (n NormalDist) cdfEach(xs []float64) []float64 {
 52 | 	res := make([]float64, len(xs))
 53 | 	a := 1 / (n.Sigma * math.Sqrt2)
 54 | 	for i, x := range xs {
 55 | 		res[i] = math.Erfc(-(x-n.Mu)*a) / 2
 56 | 	}
 57 | 	return res
 58 | }
 59 | 
 60 | func (n NormalDist) InvCDF(p float64) (x float64) {
 61 | 	// This is based on Peter John Acklam's inverse normal CDF
 62 | 	// algorithm: http://home.online.no/~pjacklam/notes/invnorm/
 63 | 	const (
 64 | 		a1 = -3.969683028665376e+01
 65 | 		a2 = 2.209460984245205e+02
 66 | 		a3 = -2.759285104469687e+02
 67 | 		a4 = 1.383577518672690e+02
 68 | 		a5 = -3.066479806614716e+01
 69 | 		a6 = 2.506628277459239e+00
 70 | 
 71 | 		b1 = -5.447609879822406e+01
 72 | 		b2 = 1.615858368580409e+02
 73 | 		b3 = -1.556989798598866e+02
 74 | 		b4 = 6.680131188771972e+01
 75 | 		b5 = -1.328068155288572e+01
 76 | 
 77 | 		c1 = -7.784894002430293e-03
 78 | 		c2 = -3.223964580411365e-01
 79 | 		c3 = -2.400758277161838e+00
 80 | 		c4 = -2.549732539343734e+00
 81 | 		c5 = 4.374664141464968e+00
 82 | 		c6 = 2.938163982698783e+00
 83 | 
 84 | 		d1 = 7.784695709041462e-03
 85 | 		d2 = 3.224671290700398e-01
 86 | 		d3 = 2.445134137142996e+00
 87 | 		d4 = 3.754408661907416e+00
 88 | 
 89 | 		plow  = 0.02425
 90 | 		phigh = 1 - plow
 91 | 	)
 92 | 
 93 | 	if p < 0 || p > 1 {
 94 | 		return nan
 95 | 	} else if p == 0 {
 96 | 		return -inf
 97 | 	} else if p == 1 {
 98 | 		return inf
 99 | 	}
100 | 
101 | 	if p < plow {
102 | 		// Rational approximation for lower region.
103 | 		q := math.Sqrt(-2 * math.Log(p))
104 | 		x = (((((c1*q+c2)*q+c3)*q+c4)*q+c5)*q + c6) /
105 | 			((((d1*q+d2)*q+d3)*q+d4)*q + 1)
106 | 	} else if phigh < p {
107 | 		// Rational approximation for upper region.
108 | 		q := math.Sqrt(-2 * math.Log(1-p))
109 | 		x = -(((((c1*q+c2)*q+c3)*q+c4)*q+c5)*q + c6) /
110 | 			((((d1*q+d2)*q+d3)*q+d4)*q + 1)
111 | 	} else {
112 | 		// Rational approximation for central region.
113 | 		q := p - 0.5
114 | 		r := q * q
115 | 		x = (((((a1*r+a2)*r+a3)*r+a4)*r+a5)*r + a6) * q /
116 | 			(((((b1*r+b2)*r+b3)*r+b4)*r+b5)*r + 1)
117 | 	}
118 | 
119 | 	// Refine approximation.
120 | 	e := 0.5*math.Erfc(-x/math.Sqrt2) - p
121 | 	u := e * math.Sqrt(2*math.Pi) * math.Exp(x*x/2)
122 | 	x = x - u/(1+x*u/2)
123 | 
124 | 	// Adjust from standard normal.
125 | 	return x*n.Sigma + n.Mu
126 | }
127 | 
128 | func (n NormalDist) Rand(r *rand.Rand) float64 {
129 | 	var x float64
130 | 	if r == nil {
131 | 		x = rand.NormFloat64()
132 | 	} else {
133 | 		x = r.NormFloat64()
134 | 	}
135 | 	return x*n.Sigma + n.Mu
136 | }
137 | 
138 | func (n NormalDist) Bounds() (float64, float64) {
139 | 	const stddevs = 3
140 | 	return n.Mu - stddevs*n.Sigma, n.Mu + stddevs*n.Sigma
141 | }
142 | 


--------------------------------------------------------------------------------
/internal/go-moremath/scale/linear.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2015 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | package scale
  6 | 
  7 | import (
  8 | 	"math"
  9 | 
 10 | 	"rsc.io/benchstat/internal/go-moremath/vec"
 11 | )
 12 | 
 13 | type Linear struct {
 14 | 	// Min and Max specify the lower and upper bounds of the input
 15 | 	// range. The input range [Min, Max] will be linearly mapped
 16 | 	// to the output domain [0, 1].
 17 | 	Min, Max float64
 18 | 
 19 | 	// Base specifies a base for computing ticks. Ticks will be
 20 | 	// placed at powers of Base; that is at n*Base^l for n ∈ ℤ and
 21 | 	// some integer l. As a special case, a base of 0 alternates
 22 | 	// between ticks at n*10^l and ticks at 5n*10^l.
 23 | 	Base int
 24 | 
 25 | 	// If Clamp is true, the input is clamped to [Min, Max].
 26 | 	Clamp bool
 27 | }
 28 | 
 29 | // *Linear is a Quantitative scale.
 30 | var _ Quantitative = &Linear{}
 31 | 
 32 | func (s Linear) Map(x float64) float64 {
 33 | 	if s.Min == s.Max {
 34 | 		return 0.5
 35 | 	}
 36 | 	y := (x - s.Min) / (s.Max - s.Min)
 37 | 	if s.Clamp {
 38 | 		y = clamp(y)
 39 | 	}
 40 | 	return y
 41 | }
 42 | 
 43 | func (s Linear) Unmap(y float64) float64 {
 44 | 	return y*(s.Max-s.Min) + s.Min
 45 | }
 46 | 
 47 | func (s *Linear) SetClamp(clamp bool) {
 48 | 	s.Clamp = clamp
 49 | }
 50 | 
 51 | // ebase sanity checks and returns the "effective base" of this scale.
 52 | // If s.Base is 0, it returns 10. If s.Base is 1 or negative, it
 53 | // panics.
 54 | func (s Linear) ebase() int {
 55 | 	if s.Base == 0 {
 56 | 		return 10
 57 | 	} else if s.Base == 1 {
 58 | 		panic("scale.Linear cannot have a base of 1")
 59 | 	} else if s.Base < 0 {
 60 | 		panic("scale.Linear cannot have a negative base")
 61 | 	}
 62 | 	return s.Base
 63 | }
 64 | 
 65 | // In the default base, the tick levels are:
 66 | //
 67 | // Level -2 is a major tick at -0.1, 0, 0.1, etc.
 68 | // Level -1 is a major tick at -1, -0.5, 0, 0.5, 1, etc.
 69 | // Level 0 is a major tick at -1, 0, 1, etc.
 70 | // Level 1 is a major tick at -10, -5, 0, 5, 10, etc.
 71 | // Level 2 is a major tick at -10, 0, 10, etc.
 72 | //
 73 | // That is, level 0 is unit intervals, and we alternate between
 74 | // interval *= 5 and interval *= 2. Combined, these give us interval
 75 | // *= 10 at every other level.
 76 | //
 77 | // In non-default bases, level 0 is the same and we alternate between
 78 | // interval *= 1 (for consistency) and interval *= base.
 79 | 
 80 | func (s *Linear) guessLevel() int {
 81 | 	return 2 * int(math.Log(s.Max-s.Min)/math.Log(float64(s.ebase())))
 82 | }
 83 | 
 84 | func (s *Linear) spacingAtLevel(level int, roundOut bool) (firstN, lastN, spacing float64) {
 85 | 	// Watch out! Integer division is round toward zero, but we
 86 | 	// need round down, and modulus is signed.
 87 | 	exp, double := math.Floor(float64(level)/2), (level%2 == 1 || level%2 == -1)
 88 | 	spacing = math.Pow(float64(s.ebase()), exp)
 89 | 	if double && s.Base == 0 {
 90 | 		spacing *= 5
 91 | 	}
 92 | 
 93 | 	// Add a tiny bit of slack to the floor and ceiling below so
 94 | 	// that rounding errors don't significantly affect tick marks.
 95 | 	slack := (s.Max - s.Min) * 1e-10
 96 | 
 97 | 	if roundOut {
 98 | 		firstN = math.Floor((s.Min + slack) / spacing)
 99 | 		lastN = math.Ceil((s.Max - slack) / spacing)
100 | 	} else {
101 | 		firstN = math.Ceil((s.Min - slack) / spacing)
102 | 		lastN = math.Floor((s.Max + slack) / spacing)
103 | 	}
104 | 	return
105 | }
106 | 
107 | func (s Linear) Ticks(n int) (major, minor []float64) {
108 | 	if s.Min == s.Max {
109 | 		return []float64{s.Min}, []float64{}
110 | 	} else if s.Min > s.Max {
111 | 		s.Min, s.Max = s.Max, s.Min
112 | 	}
113 | 
114 | 	// nticksAtLevel returns the number of ticks in [s.Min, s.Max]
115 | 	// at the given level.
116 | 	nticksAtLevel := func(level int) int {
117 | 		firstN, lastN, _ := s.spacingAtLevel(level, false)
118 | 		return int(lastN - firstN + 1)
119 | 	}
120 | 
121 | 	level := autoScale(n, nticksAtLevel, s.guessLevel())
122 | 
123 | 	ticksAtLevel := func(level int) []float64 {
124 | 		firstN, lastN, spacing := s.spacingAtLevel(level, false)
125 | 		n := int(lastN - firstN + 1)
126 | 		return vec.Linspace(firstN*spacing, lastN*spacing, n)
127 | 	}
128 | 
129 | 	return ticksAtLevel(level), ticksAtLevel(level - 1)
130 | }
131 | 
132 | func (s *Linear) Nice(n int) {
133 | 	if s.Min == s.Max {
134 | 		s.Min -= 0.5
135 | 		s.Max += 0.5
136 | 	} else if s.Min > s.Max {
137 | 		s.Min, s.Max = s.Max, s.Min
138 | 	}
139 | 
140 | 	nticksAtLevel := func(level int) int {
141 | 		firstN, lastN, _ := s.spacingAtLevel(level, true)
142 | 		return int(lastN - firstN + 1)
143 | 	}
144 | 
145 | 	level := autoScale(n, nticksAtLevel, s.guessLevel())
146 | 
147 | 	firstN, lastN, spacing := s.spacingAtLevel(level, true)
148 | 	s.Min = firstN * spacing
149 | 	s.Max = lastN * spacing
150 | }
151 | 


--------------------------------------------------------------------------------
/internal/go-moremath/scale/linear_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2015 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | package scale
  6 | 
  7 | import (
  8 | 	"fmt"
  9 | 	"testing"
 10 | 
 11 | 	"rsc.io/benchstat/internal/go-moremath/internal/mathtest"
 12 | 	"rsc.io/benchstat/internal/go-moremath/vec"
 13 | )
 14 | 
 15 | func TestLinear(t *testing.T) {
 16 | 	l := Linear{Min: -10, Max: 10}
 17 | 	mathtest.WantFunc(t, fmt.Sprintf("%v.Map", l), l.Map,
 18 | 		map[float64]float64{
 19 | 			-20: -0.5,
 20 | 			-10: 0,
 21 | 			0:   0.5,
 22 | 			10:  1,
 23 | 			20:  1.5,
 24 | 		})
 25 | 	mathtest.WantFunc(t, fmt.Sprintf("%v.Unmap", l), l.Unmap,
 26 | 		map[float64]float64{
 27 | 			-0.5: -20,
 28 | 			0:    -10,
 29 | 			0.5:  0,
 30 | 			1:    10,
 31 | 			1.5:  20,
 32 | 		})
 33 | 
 34 | 	l.SetClamp(true)
 35 | 	mathtest.WantFunc(t, fmt.Sprintf("%v.Map", l), l.Map,
 36 | 		map[float64]float64{
 37 | 			-20: 0,
 38 | 			-10: 0,
 39 | 			0:   0.5,
 40 | 			10:  1,
 41 | 			20:  1,
 42 | 		})
 43 | 	mathtest.WantFunc(t, fmt.Sprintf("%v.Unmap", l), l.Unmap,
 44 | 		map[float64]float64{
 45 | 			0:   -10,
 46 | 			0.5: 0,
 47 | 			1:   10,
 48 | 		})
 49 | 
 50 | 	l = Linear{Min: 5, Max: 5}
 51 | 	mathtest.WantFunc(t, fmt.Sprintf("%v.Map", l), l.Map,
 52 | 		map[float64]float64{
 53 | 			-10: 0.5,
 54 | 			0:   0.5,
 55 | 			10:  0.5,
 56 | 		})
 57 | 	mathtest.WantFunc(t, fmt.Sprintf("%v.Unmap", l), l.Unmap,
 58 | 		map[float64]float64{
 59 | 			0:   5,
 60 | 			0.5: 5,
 61 | 			1:   5,
 62 | 		})
 63 | }
 64 | 
 65 | func ticksEq(major, wmajor, minor, wminor []float64) bool {
 66 | 	// TODO: It would be nice to have a deep Aeq. It could also
 67 | 	// support checking predicates like LE(5) or IsNaN within
 68 | 	// structures, which could be used in WantFunc. Heck, deep Aeq
 69 | 	// could subsume WantFunc where the left side is a function
 70 | 	// and the right side is a map from arguments to results, but
 71 | 	// maybe it would be harder to produce a good error message.
 72 | 	if len(major) != len(wmajor) || len(minor) != len(wminor) {
 73 | 		return false
 74 | 	}
 75 | 	for i, v := range major {
 76 | 		if !mathtest.Aeq(wmajor[i], v) {
 77 | 			return false
 78 | 		}
 79 | 	}
 80 | 	for i, v := range minor {
 81 | 		if !mathtest.Aeq(wminor[i], v) {
 82 | 			return false
 83 | 		}
 84 | 	}
 85 | 	return true
 86 | }
 87 | 
 88 | func TestLinearTicks(t *testing.T) {
 89 | 	l := Linear{Min: 0, Max: 100}
 90 | 	major, minor := l.Ticks(5)
 91 | 	wmajor, wminor := vec.Linspace(0, 100, 3), vec.Linspace(0, 100, 11)
 92 | 	if !ticksEq(major, wmajor, minor, wminor) {
 93 | 		t.Errorf("%v.Ticks(5) = %v, %v; want %v, %v", l, major, minor, wmajor, wminor)
 94 | 	}
 95 | 
 96 | 	major, minor = l.Ticks(2)
 97 | 	wmajor, wminor = vec.Linspace(0, 100, 2), vec.Linspace(0, 100, 3)
 98 | 	if !ticksEq(major, wmajor, minor, wminor) {
 99 | 		t.Errorf("%v.Ticks(2) = %v, %v; want %v, %v", l, major, minor, wmajor, wminor)
100 | 	}
101 | 
102 | 	l.Nice(2)
103 | 	major, minor = l.Ticks(2)
104 | 	if !ticksEq(major, wmajor, minor, wminor) {
105 | 		t.Errorf("%v.Ticks(2) = %v, %v; want %v, %v", l, major, minor, wmajor, wminor)
106 | 	}
107 | 
108 | 	l = Linear{Min: 15.4, Max: 16.6}
109 | 	major, minor = l.Ticks(5)
110 | 	wmajor, wminor = vec.Linspace(15.5, 16.5, 3), vec.Linspace(15.4, 16.6, 13)
111 | 	if !ticksEq(major, wmajor, minor, wminor) {
112 | 		t.Errorf("%v.Ticks(5) = %v, %v; want %v, %v", l, major, minor, wmajor, wminor)
113 | 	}
114 | 
115 | 	l.Nice(5)
116 | 	major, minor = l.Ticks(5)
117 | 	wmajor, wminor = vec.Linspace(15, 17, 5), vec.Linspace(15, 17, 21)
118 | 	if !ticksEq(major, wmajor, minor, wminor) {
119 | 		t.Errorf("%v.Ticks(5) = %v, %v; want %v, %v", l, major, minor, wmajor, wminor)
120 | 	}
121 | 
122 | 	// Test negative tick levels.
123 | 	l = Linear{Min: 9.9989, Max: 10}
124 | 	major, minor = l.Ticks(2)
125 | 	wmajor, wminor = vec.Linspace(9.999, 10, 2), vec.Linspace(9.999, 10, 3)
126 | 	if !ticksEq(major, wmajor, minor, wminor) {
127 | 		t.Errorf("%v.Ticks(2) = %v, %v; want %v, %v", l, major, minor, wmajor, wminor)
128 | 	}
129 | 
130 | 	l.Nice(2)
131 | 	major, minor = l.Ticks(2)
132 | 	wmajor, wminor = vec.Linspace(9.995, 10, 2), vec.Linspace(9.995, 10, 6)
133 | 	if !ticksEq(major, wmajor, minor, wminor) {
134 | 		t.Errorf("%v.Ticks(2) = %v, %v; want %v, %v", l, major, minor, wmajor, wminor)
135 | 	}
136 | 
137 | 	// Test non-default bases.
138 | 	l = Linear{Min: 2, Max: 9, Base: 2}
139 | 	major, minor = l.Ticks(5)
140 | 	wmajor, wminor = vec.Linspace(2, 8, 4), vec.Linspace(2, 9, 8)
141 | 	if !ticksEq(major, wmajor, minor, wminor) {
142 | 		t.Errorf("%v.Ticks(5) = %v, %v; want %v, %v", l, major, minor, wmajor, wminor)
143 | 	}
144 | 
145 | 	l.Nice(5)
146 | 	major, minor = l.Ticks(5)
147 | 	wmajor, wminor = vec.Linspace(2, 10, 5), vec.Linspace(2, 10, 9)
148 | 	if !ticksEq(major, wmajor, minor, wminor) {
149 | 		t.Errorf("%v.Ticks(5) = %v, %v; want %v, %v", l, major, minor, wmajor, wminor)
150 | 	}
151 | }
152 | 


--------------------------------------------------------------------------------
/internal/go-moremath/stats/ttest.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2015 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | package stats
  6 | 
  7 | import (
  8 | 	"errors"
  9 | 	"math"
 10 | )
 11 | 
 12 | // A TTestResult is the result of a t-test.
 13 | type TTestResult struct {
 14 | 	// N1 and N2 are the sizes of the input samples. For a
 15 | 	// one-sample t-test, N2 is 0.
 16 | 	N1, N2 int
 17 | 
 18 | 	// T is the value of the t-statistic for this t-test.
 19 | 	T float64
 20 | 
 21 | 	// DoF is the degrees of freedom for this t-test.
 22 | 	DoF float64
 23 | 
 24 | 	// AltHypothesis specifies the alternative hypothesis tested
 25 | 	// by this test against the null hypothesis that there is no
 26 | 	// difference in the means of the samples.
 27 | 	AltHypothesis LocationHypothesis
 28 | 
 29 | 	// P is p-value for this t-test for the given null hypothesis.
 30 | 	P float64
 31 | }
 32 | 
 33 | func newTTestResult(n1, n2 int, t, dof float64, alt LocationHypothesis) *TTestResult {
 34 | 	dist := TDist{dof}
 35 | 	var p float64
 36 | 	switch alt {
 37 | 	case LocationDiffers:
 38 | 		p = 2 * (1 - dist.CDF(math.Abs(t)))
 39 | 	case LocationLess:
 40 | 		p = dist.CDF(t)
 41 | 	case LocationGreater:
 42 | 		p = 1 - dist.CDF(t)
 43 | 	}
 44 | 	return &TTestResult{N1: n1, N2: n2, T: t, DoF: dof, AltHypothesis: alt, P: p}
 45 | }
 46 | 
 47 | // A TTestSample is a sample that can be used for a one or two sample
 48 | // t-test.
 49 | type TTestSample interface {
 50 | 	Weight() float64
 51 | 	Mean() float64
 52 | 	Variance() float64
 53 | }
 54 | 
 55 | var (
 56 | 	ErrSampleSize        = errors.New("sample is too small")
 57 | 	ErrZeroVariance      = errors.New("sample has zero variance")
 58 | 	ErrMismatchedSamples = errors.New("samples have different lengths")
 59 | )
 60 | 
 61 | // TwoSampleTTest performs a two-sample (unpaired) Student's t-test on
 62 | // samples x1 and x2. This is a test of the null hypothesis that x1
 63 | // and x2 are drawn from populations with equal means. It assumes x1
 64 | // and x2 are independent samples, that the distributions have equal
 65 | // variance, and that the populations are normally distributed.
 66 | func TwoSampleTTest(x1, x2 TTestSample, alt LocationHypothesis) (*TTestResult, error) {
 67 | 	n1, n2 := x1.Weight(), x2.Weight()
 68 | 	if n1 == 0 || n2 == 0 {
 69 | 		return nil, ErrSampleSize
 70 | 	}
 71 | 	v1, v2 := x1.Variance(), x2.Variance()
 72 | 	if v1 == 0 && v2 == 0 {
 73 | 		return nil, ErrZeroVariance
 74 | 	}
 75 | 
 76 | 	dof := n1 + n2 - 2
 77 | 	v12 := ((n1-1)*v1 + (n2-1)*v2) / dof
 78 | 	t := (x1.Mean() - x2.Mean()) / math.Sqrt(v12*(1/n1+1/n2))
 79 | 	return newTTestResult(int(n1), int(n2), t, dof, alt), nil
 80 | }
 81 | 
 82 | // TwoSampleWelchTTest performs a two-sample (unpaired) Welch's t-test
 83 | // on samples x1 and x2. This is like TwoSampleTTest, but does not
 84 | // assume the distributions have equal variance.
 85 | func TwoSampleWelchTTest(x1, x2 TTestSample, alt LocationHypothesis) (*TTestResult, error) {
 86 | 	n1, n2 := x1.Weight(), x2.Weight()
 87 | 	if n1 <= 1 || n2 <= 1 {
 88 | 		// TODO: Can we still do this with n == 1?
 89 | 		return nil, ErrSampleSize
 90 | 	}
 91 | 	v1, v2 := x1.Variance(), x2.Variance()
 92 | 	if v1 == 0 && v2 == 0 {
 93 | 		return nil, ErrZeroVariance
 94 | 	}
 95 | 
 96 | 	dof := math.Pow(v1/n1+v2/n2, 2) /
 97 | 		(math.Pow(v1/n1, 2)/(n1-1) + math.Pow(v2/n2, 2)/(n2-1))
 98 | 	s := math.Sqrt(v1/n1 + v2/n2)
 99 | 	t := (x1.Mean() - x2.Mean()) / s
100 | 	return newTTestResult(int(n1), int(n2), t, dof, alt), nil
101 | }
102 | 
103 | // PairedTTest performs a two-sample paired t-test on samples x1 and
104 | // x2. If μ0 is non-zero, this tests if the average of the difference
105 | // is significantly different from μ0. If x1 and x2 are identical,
106 | // this returns nil.
107 | func PairedTTest(x1, x2 []float64, μ0 float64, alt LocationHypothesis) (*TTestResult, error) {
108 | 	if len(x1) != len(x2) {
109 | 		return nil, ErrMismatchedSamples
110 | 	}
111 | 	if len(x1) <= 1 {
112 | 		// TODO: Can we still do this with n == 1?
113 | 		return nil, ErrSampleSize
114 | 	}
115 | 
116 | 	dof := float64(len(x1) - 1)
117 | 
118 | 	diff := make([]float64, len(x1))
119 | 	for i := range x1 {
120 | 		diff[i] = x1[i] - x2[i]
121 | 	}
122 | 	sd := StdDev(diff)
123 | 	if sd == 0 {
124 | 		// TODO: Can we still do the test?
125 | 		return nil, ErrZeroVariance
126 | 	}
127 | 	t := (Mean(diff) - μ0) * math.Sqrt(float64(len(x1))) / sd
128 | 	return newTTestResult(len(x1), len(x2), t, dof, alt), nil
129 | }
130 | 
131 | // OneSampleTTest performs a one-sample t-test on sample x. This tests
132 | // the null hypothesis that the population mean is equal to μ0. This
133 | // assumes the distribution of the population of sample means is
134 | // normal.
135 | func OneSampleTTest(x TTestSample, μ0 float64, alt LocationHypothesis) (*TTestResult, error) {
136 | 	n, v := x.Weight(), x.Variance()
137 | 	if n == 0 {
138 | 		return nil, ErrSampleSize
139 | 	}
140 | 	if v == 0 {
141 | 		// TODO: Can we still do the test?
142 | 		return nil, ErrZeroVariance
143 | 	}
144 | 	dof := n - 1
145 | 	t := (x.Mean() - μ0) * math.Sqrt(n) / math.Sqrt(v)
146 | 	return newTTestResult(int(n), 0, t, dof, alt), nil
147 | }
148 | 


--------------------------------------------------------------------------------
/internal/go-moremath/scale/log.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2015 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | package scale
  6 | 
  7 | import "math"
  8 | 
  9 | type Log struct {
 10 | 	private struct{}
 11 | 
 12 | 	// Min and Max specify the lower and upper bounds of the input
 13 | 	// range. The input range [Min, Max] will be mapped to the
 14 | 	// output domain [0, 1]. The range [Min, Max] must not include
 15 | 	// 0.
 16 | 	Min, Max float64
 17 | 
 18 | 	// Base specifies the base of the logarithm for computing
 19 | 	// ticks. Typically, ticks will be placed at Base^n for n ∈ ℤ.
 20 | 	Base int
 21 | 
 22 | 	// If Clamp is true, the input is clamped to [Min, Max].
 23 | 	Clamp bool
 24 | 
 25 | 	// TODO: Let the user specify the minor ticks. Default to [1,
 26 | 	// .. 9], but [1, 3] and [1, 2, 5] are common.
 27 | }
 28 | 
 29 | // *Log is a Quantitative scale.
 30 | var _ Quantitative = &Log{}
 31 | 
 32 | // NewLog constructs a Log scale. If the arguments are out of range,
 33 | // it returns a RangeErr.
 34 | func NewLog(min, max float64, base int) (Log, error) {
 35 | 	if min > max {
 36 | 		min, max = max, min
 37 | 	}
 38 | 
 39 | 	if base <= 1 {
 40 | 		return Log{}, RangeErr("Log scale base must be 2 or more")
 41 | 	}
 42 | 	if min <= 0 && max >= 0 {
 43 | 		return Log{}, RangeErr("Log scale range cannot include 0")
 44 | 	}
 45 | 
 46 | 	return Log{Min: min, Max: max, Base: base}, nil
 47 | }
 48 | 
 49 | func (s *Log) ebounds() (bool, float64, float64) {
 50 | 	if s.Min < 0 {
 51 | 		return true, -s.Max, -s.Min
 52 | 	}
 53 | 	return false, s.Min, s.Max
 54 | }
 55 | 
 56 | func (s Log) Map(x float64) float64 {
 57 | 	neg, min, max := s.ebounds()
 58 | 	if neg {
 59 | 		x = -x
 60 | 	}
 61 | 	if x <= 0 {
 62 | 		return math.NaN()
 63 | 	}
 64 | 	if min == max {
 65 | 		return 0.5
 66 | 	}
 67 | 
 68 | 	logMin, logMax := math.Log(min), math.Log(max)
 69 | 	y := (math.Log(x) - logMin) / (logMax - logMin)
 70 | 	if neg {
 71 | 		y = 1 - y
 72 | 	}
 73 | 	if s.Clamp {
 74 | 		y = clamp(y)
 75 | 	}
 76 | 	return y
 77 | }
 78 | 
 79 | func (s Log) Unmap(y float64) float64 {
 80 | 	neg, min, max := s.ebounds()
 81 | 	if neg {
 82 | 		y = 1 - y
 83 | 	}
 84 | 	logMin, logMax := math.Log(min), math.Log(max)
 85 | 	x := math.Exp(y*(logMax-logMin) + logMin)
 86 | 	if neg {
 87 | 		x = -x
 88 | 	}
 89 | 	return x
 90 | }
 91 | 
 92 | func (s *Log) SetClamp(clamp bool) {
 93 | 	s.Clamp = clamp
 94 | }
 95 | 
 96 | // The tick levels are:
 97 | //
 98 | // Level 0 is a major tick at Base^n (1, 10, 100, ...)
 99 | // Level 1 is a major tick at Base^2^n (1, 100, 10000, ...)
100 | // Level 2 is a major tick at Base^4^n (1, 10000, 100000000, ...)
101 | //
102 | // That is, each level eliminates every other tick. Levels below 0 are
103 | // not defined.
104 | 
105 | func logb(x float64, b float64) float64 {
106 | 	return math.Log(x) / math.Log(b)
107 | }
108 | 
109 | func (s *Log) spacingAtLevel(level int, roundOut bool) (firstN, lastN, ebase float64) {
110 | 	_, min, max := s.ebounds()
111 | 
112 | 	// Compute the effective base at this level.
113 | 	ebase = math.Pow(float64(s.Base), math.Pow(2, float64(level)))
114 | 	lmin, lmax := logb(min, ebase), logb(max, ebase)
115 | 
116 | 	// Add a tiny bit of slack to the floor and ceiling so that
117 | 	// rounding errors don't significantly affect tick marks.
118 | 	slack := (lmax - lmin) * 1e-10
119 | 
120 | 	if roundOut {
121 | 		firstN = math.Floor(lmin + slack)
122 | 		lastN = math.Ceil(lmax - slack)
123 | 	} else {
124 | 		firstN = math.Ceil(lmin - slack)
125 | 		lastN = math.Floor(lmax + slack)
126 | 	}
127 | 
128 | 	return
129 | }
130 | 
131 | func (s Log) Ticks(n int) (major, minor []float64) {
132 | 	if s.Min == s.Max {
133 | 		return []float64{s.Min}, []float64{}
134 | 	}
135 | 
136 | 	neg, min, max := s.ebounds()
137 | 
138 | 	// nticksAtLevel returns the number of ticks in [min, max] at
139 | 	// the given level.
140 | 	nticksAtLevel := func(level int) int {
141 | 		if level < 0 {
142 | 			const maxInt = int(^uint(0) >> 1)
143 | 			return maxInt
144 | 		}
145 | 
146 | 		firstN, lastN, _ := s.spacingAtLevel(level, false)
147 | 		return int(lastN - firstN + 1)
148 | 	}
149 | 
150 | 	level := autoScale(n, nticksAtLevel, 0)
151 | 
152 | 	ticksAtLevel := func(level int) []float64 {
153 | 		ticks := []float64{}
154 | 
155 | 		if level < 0 {
156 | 			// Minor ticks for level 0. Get the major
157 | 			// ticks, but round out so we can fill in
158 | 			// minor ticks outside of the major ticks.
159 | 			firstN, lastN, _ := s.spacingAtLevel(0, true)
160 | 			for n := firstN; n <= lastN; n++ {
161 | 				tick := math.Pow(float64(s.Base), n)
162 | 				step := tick
163 | 				for i := 0; i < s.Base-1; i++ {
164 | 					if min <= tick && tick <= max {
165 | 						ticks = append(ticks, tick)
166 | 					}
167 | 					tick += step
168 | 				}
169 | 			}
170 | 		} else {
171 | 			firstN, lastN, base := s.spacingAtLevel(level, false)
172 | 			for n := firstN; n <= lastN; n++ {
173 | 				ticks = append(ticks, math.Pow(base, n))
174 | 			}
175 | 		}
176 | 
177 | 		if neg {
178 | 			// Negate and reverse order of ticks.
179 | 			for i := 0; i < (len(ticks)+1)/2; i++ {
180 | 				j := len(ticks) - i - 1
181 | 				ticks[i], ticks[j] = -ticks[j], -ticks[i]
182 | 			}
183 | 		}
184 | 
185 | 		return ticks
186 | 	}
187 | 
188 | 	return ticksAtLevel(level), ticksAtLevel(level - 1)
189 | }
190 | 
191 | func (s *Log) Nice(n int) {
192 | 	neg, _, _ := s.ebounds()
193 | 
194 | 	nticksAtLevel := func(level int) int {
195 | 		firstN, lastN, _ := s.spacingAtLevel(level, true)
196 | 		return int(lastN - firstN + 1)
197 | 	}
198 | 
199 | 	level := autoScale(n, nticksAtLevel, 0)
200 | 
201 | 	firstN, lastN, base := s.spacingAtLevel(level, true)
202 | 	s.Min = math.Pow(base, firstN)
203 | 	s.Max = math.Pow(base, lastN)
204 | 	if neg {
205 | 		s.Min, s.Max = -s.Max, -s.Min
206 | 	}
207 | }
208 | 


--------------------------------------------------------------------------------
/internal/go-moremath/scale/log_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2015 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | package scale
  6 | 
  7 | import (
  8 | 	"fmt"
  9 | 	"math"
 10 | 	"testing"
 11 | 
 12 | 	"rsc.io/benchstat/internal/go-moremath/internal/mathtest"
 13 | 	"rsc.io/benchstat/internal/go-moremath/vec"
 14 | )
 15 | 
 16 | func TestLog(t *testing.T) {
 17 | 	l, err := NewLog(0, 10, 10)
 18 | 	if _, ok := err.(RangeErr); !ok {
 19 | 		t.Errorf("want RangeErr; got %v", err)
 20 | 	}
 21 | 	l, err = NewLog(-10, 0, 10)
 22 | 	if _, ok := err.(RangeErr); !ok {
 23 | 		t.Errorf("want RangeErr; got %v", err)
 24 | 	}
 25 | 	l, err = NewLog(-10, 10, 10)
 26 | 	if _, ok := err.(RangeErr); !ok {
 27 | 		t.Errorf("want RangeErr; got %v", err)
 28 | 	}
 29 | 	l, err = NewLog(10, 20, 0)
 30 | 	if _, ok := err.(RangeErr); !ok {
 31 | 		t.Errorf("want RangeErr; got %v", err)
 32 | 	}
 33 | 
 34 | 	l, _ = NewLog(1, 10, 10)
 35 | 	mathtest.WantFunc(t, fmt.Sprintf("%v.Map", l), l.Map,
 36 | 		map[float64]float64{
 37 | 			-1:                math.NaN(),
 38 | 			0:                 math.NaN(),
 39 | 			0.1:               -1,
 40 | 			1:                 0,
 41 | 			math.Pow(10, 0.5): 0.5,
 42 | 			10:                1,
 43 | 			100:               2,
 44 | 		})
 45 | 	mathtest.WantFunc(t, fmt.Sprintf("%v.Unmap", l), l.Unmap,
 46 | 		map[float64]float64{
 47 | 			-1:  0.1,
 48 | 			0:   1,
 49 | 			0.5: math.Pow(10, 0.5),
 50 | 			1:   10,
 51 | 			2:   100,
 52 | 		})
 53 | 
 54 | 	l.SetClamp(true)
 55 | 	mathtest.WantFunc(t, fmt.Sprintf("%v.Map", l), l.Map,
 56 | 		map[float64]float64{
 57 | 			-1:                math.NaN(),
 58 | 			0:                 math.NaN(),
 59 | 			0.1:               0,
 60 | 			1:                 0,
 61 | 			math.Pow(10, 0.5): 0.5,
 62 | 			10:                1,
 63 | 			100:               1,
 64 | 		})
 65 | 	mathtest.WantFunc(t, fmt.Sprintf("%v.Unmap", l), l.Unmap,
 66 | 		map[float64]float64{
 67 | 			0:   1,
 68 | 			0.5: math.Pow(10, 0.5),
 69 | 			1:   10,
 70 | 		})
 71 | 
 72 | 	l, _ = NewLog(-1, -10, 10)
 73 | 	mathtest.WantFunc(t, fmt.Sprintf("%v.Map", l), l.Map,
 74 | 		map[float64]float64{
 75 | 			1:                  math.NaN(),
 76 | 			0:                  math.NaN(),
 77 | 			-0.1:               2,
 78 | 			-1:                 1,
 79 | 			-math.Pow(10, 0.5): 0.5,
 80 | 			-10:                0,
 81 | 			-100:               -1,
 82 | 		})
 83 | 	mathtest.WantFunc(t, fmt.Sprintf("%v.Unmap", l), l.Unmap,
 84 | 		map[float64]float64{
 85 | 			2:   -0.1,
 86 | 			1:   -1,
 87 | 			0.5: -math.Pow(10, 0.5),
 88 | 			0:   -10,
 89 | 			-1:  -100,
 90 | 		})
 91 | 
 92 | 	l, _ = NewLog(5, 5, 10)
 93 | 	mathtest.WantFunc(t, fmt.Sprintf("%v.Map", l), l.Map,
 94 | 		map[float64]float64{
 95 | 			-1: math.NaN(),
 96 | 			0:  math.NaN(),
 97 | 			1:  0.5,
 98 | 			10: 0.5,
 99 | 		})
100 | 	mathtest.WantFunc(t, fmt.Sprintf("%v.Unmap", l), l.Unmap,
101 | 		map[float64]float64{
102 | 			0:   5,
103 | 			0.5: 5,
104 | 			1:   5,
105 | 		})
106 | }
107 | 
108 | func TestLogTicks(t *testing.T) {
109 | 	// Test the obvious.
110 | 	l, _ := NewLog(1, 10, 10)
111 | 	major, minor := l.Ticks(5)
112 | 	wmajor, wminor := vec.Logspace(0, 1, 2, 10), vec.Linspace(1, 10, 10)
113 | 	if !ticksEq(major, wmajor, minor, wminor) {
114 | 		t.Errorf("%v.Ticks(5) = %v, %v; want %v, %v", l, major, minor, wmajor, wminor)
115 | 	}
116 | 
117 | 	// Test two orders of magnitude.
118 | 	l, _ = NewLog(1, 100, 10)
119 | 	major, minor = l.Ticks(5)
120 | 	wmajor, wminor = vec.Logspace(0, 2, 3, 10), vec.Concat(vec.Linspace(1, 9, 9), vec.Linspace(10, 100, 10))
121 | 	if !ticksEq(major, wmajor, minor, wminor) {
122 | 		t.Errorf("%v.Ticks(5) = %v, %v; want %v, %v", l, major, minor, wmajor, wminor)
123 | 	}
124 | 
125 | 	// Test many orders of magnitude (higher tick levels).
126 | 	l, _ = NewLog(1, 1e8, 10)
127 | 	major, minor = l.Ticks(5)
128 | 	wmajor, wminor = vec.Logspace(0, 4, 5, 100), vec.Logspace(0, 8, 9, 10)
129 | 	if !ticksEq(major, wmajor, minor, wminor) {
130 | 		t.Errorf("%v.Ticks(5) = %v, %v; want %v, %v", l, major, minor, wmajor, wminor)
131 | 	}
132 | 
133 | 	major, minor = l.Ticks(4)
134 | 	wmajor, wminor = vec.Logspace(0, 2, 3, 10000), vec.Logspace(0, 4, 5, 100)
135 | 	if !ticksEq(major, wmajor, minor, wminor) {
136 | 		t.Errorf("%v.Ticks(5) = %v, %v; want %v, %v", l, major, minor, wmajor, wminor)
137 | 	}
138 | 
139 | 	// Test minor ticks outside major ticks.
140 | 	l, _ = NewLog(0.91, 200, 10)
141 | 	major, minor = l.Ticks(5)
142 | 	wmajor, wminor = vec.Logspace(0, 2, 3, 10), vec.Concat(vec.Linspace(1, 9, 9), vec.Linspace(10, 100, 10), []float64{200})
143 | 	if !ticksEq(major, wmajor, minor, wminor) {
144 | 		t.Errorf("%v.Ticks(5) = %v, %v; want %v, %v", l, major, minor, wmajor, wminor)
145 | 	}
146 | 
147 | 	// Test nicing.
148 | 	l.Nice(5)
149 | 	major, minor = l.Ticks(5)
150 | 	wmajor, wminor = vec.Logspace(-1, 3, 5, 10), vec.Concat(vec.Linspace(0.1, 0.9, 9), vec.Linspace(1, 9, 9), vec.Linspace(10, 90, 9), vec.Linspace(100, 1000, 10))
151 | 	if !ticksEq(major, wmajor, minor, wminor) {
152 | 		t.Errorf("%v.Ticks(5) = %v, %v; want %v, %v", l, major, minor, wmajor, wminor)
153 | 	}
154 | 
155 | 	// Test negative ticks.
156 | 	neg := vec.Vectorize(func(x float64) float64 { return -x })
157 | 	l, _ = NewLog(-1, -100, 10)
158 | 	major, minor = l.Ticks(5)
159 | 	wmajor, wminor = neg(vec.Logspace(2, 0, 3, 10)), neg(vec.Concat(vec.Linspace(100, 10, 10), vec.Linspace(9, 1, 9)))
160 | 	if !ticksEq(major, wmajor, minor, wminor) {
161 | 		t.Errorf("%v.Ticks(5) = %v, %v; want %v, %v", l, major, minor, wmajor, wminor)
162 | 	}
163 | 
164 | 	major, minor = l.Ticks(2)
165 | 	wmajor, wminor = neg(vec.Logspace(1, 0, 2, 100)), neg(vec.Logspace(2, 0, 3, 10))
166 | 	if !ticksEq(major, wmajor, minor, wminor) {
167 | 		t.Errorf("%v.Ticks(5) = %v, %v; want %v, %v", l, major, minor, wmajor, wminor)
168 | 	}
169 | 
170 | 	l.Nice(5)
171 | 	major, minor = l.Ticks(5)
172 | 	wmajor, wminor = neg(vec.Logspace(2, 0, 3, 10)), neg(vec.Concat(vec.Linspace(100, 10, 10), vec.Linspace(9, 1, 9)))
173 | 	if !ticksEq(major, wmajor, minor, wminor) {
174 | 		t.Errorf("%v.Ticks(5) = %v, %v; want %v, %v", l, major, minor, wmajor, wminor)
175 | 	}
176 | 
177 | 	// Test Min==Max.
178 | 	l, _ = NewLog(5, 5, 10)
179 | 	major, minor = l.Ticks(5)
180 | 	wmajor, wminor = []float64{5}, []float64{}
181 | 	if !ticksEq(major, wmajor, minor, wminor) {
182 | 		t.Errorf("%v.Ticks(5) = %v, %v; want %v, %v", l, major, minor, wmajor, wminor)
183 | 	}
184 | }
185 | 


--------------------------------------------------------------------------------
/internal/go-moremath/stats/dist.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2015 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | package stats
  6 | 
  7 | import "math/rand"
  8 | 
  9 | // A DistCommon is a statistical distribution. DistCommon is a base
 10 | // interface provided by both continuous and discrete distributions.
 11 | type DistCommon interface {
 12 | 	// CDF returns the cumulative probability Pr[X <= x].
 13 | 	//
 14 | 	// For continuous distributions, the CDF is the integral of
 15 | 	// the PDF from -inf to x.
 16 | 	//
 17 | 	// For discrete distributions, the CDF is the sum of the PMF
 18 | 	// at all defined points from -inf to x, inclusive. Note that
 19 | 	// the CDF of a discrete distribution is defined for the whole
 20 | 	// real line (unlike the PMF) but has discontinuities where
 21 | 	// the PMF is non-zero.
 22 | 	//
 23 | 	// The CDF is a monotonically increasing function and has a
 24 | 	// domain of all real numbers. If the distribution has bounded
 25 | 	// support, it has a range of [0, 1]; otherwise it has a range
 26 | 	// of (0, 1). Finally, CDF(-inf)==0 and CDF(inf)==1.
 27 | 	CDF(x float64) float64
 28 | 
 29 | 	// Bounds returns reasonable bounds for this distribution's
 30 | 	// PDF/PMF and CDF. The total weight outside of these bounds
 31 | 	// should be approximately 0.
 32 | 	//
 33 | 	// For a discrete distribution, both bounds are integer
 34 | 	// multiples of Step().
 35 | 	//
 36 | 	// If this distribution has finite support, it returns exact
 37 | 	// bounds l, h such that CDF(l')=0 for all l' < l and
 38 | 	// CDF(h')=1 for all h' >= h.
 39 | 	Bounds() (float64, float64)
 40 | }
 41 | 
 42 | // A Dist is a continuous statistical distribution.
 43 | type Dist interface {
 44 | 	DistCommon
 45 | 
 46 | 	// PDF returns the value of the probability density function
 47 | 	// of this distribution at x.
 48 | 	PDF(x float64) float64
 49 | }
 50 | 
 51 | // A DiscreteDist is a discrete statistical distribution.
 52 | //
 53 | // Most discrete distributions are defined only at integral values of
 54 | // the random variable. However, some are defined at other intervals,
 55 | // so this interface takes a float64 value for the random variable.
 56 | // The probability mass function rounds down to the nearest defined
 57 | // point. Note that float64 values can exactly represent integer
 58 | // values between ±2**53, so this generally shouldn't be an issue for
 59 | // integer-valued distributions (likewise, for half-integer-valued
 60 | // distributions, float64 can exactly represent all values between
 61 | // ±2**52).
 62 | type DiscreteDist interface {
 63 | 	DistCommon
 64 | 
 65 | 	// PMF returns the value of the probability mass function
 66 | 	// Pr[X = x'], where x' is x rounded down to the nearest
 67 | 	// defined point on the distribution.
 68 | 	//
 69 | 	// Note for implementers: for integer-valued distributions,
 70 | 	// round x using int(math.Floor(x)). Do not use int(x), since
 71 | 	// that truncates toward zero (unless all x <= 0 are handled
 72 | 	// the same).
 73 | 	PMF(x float64) float64
 74 | 
 75 | 	// Step returns s, where the distribution is defined for sℕ.
 76 | 	Step() float64
 77 | }
 78 | 
 79 | // TODO: Add a Support method for finite support distributions? Or
 80 | // maybe just another return value from Bounds indicating that the
 81 | // bounds are exact?
 82 | 
 83 | // TODO: Plot method to return a pre-configured Plot object with
 84 | // reasonable bounds and an integral function? Have to distinguish
 85 | // PDF/CDF/InvCDF. Three methods? Argument?
 86 | //
 87 | // Doesn't have to be a method of Dist. Could be just a function that
 88 | // takes a Dist and uses Bounds.
 89 | 
 90 | // InvCDF returns the inverse CDF function of the given distribution
 91 | // (also known as the quantile function or the percent point
 92 | // function). This is a function f such that f(dist.CDF(x)) == x. If
 93 | // dist.CDF is only weakly monotonic (that it, there are intervals
 94 | // over which it is constant) and y > 0, f returns the smallest x that
 95 | // satisfies this condition. In general, the inverse CDF is not
 96 | // well-defined for y==0, but for convenience if y==0, f returns the
 97 | // largest x that satisfies this condition. For distributions with
 98 | // infinite support both the largest and smallest x are -Inf; however,
 99 | // for distributions with finite support, this is the lower bound of
100 | // the support.
101 | //
102 | // If y < 0 or y > 1, f returns NaN.
103 | //
104 | // If dist implements InvCDF(float64) float64, this returns that
105 | // method. Otherwise, it returns a function that uses a generic
106 | // numerical method to construct the inverse CDF at y by finding x
107 | // such that dist.CDF(x) == y. This may have poor precision around
108 | // points of discontinuity, including f(0) and f(1).
109 | func InvCDF(dist DistCommon) func(y float64) (x float64) {
110 | 	type invCDF interface {
111 | 		InvCDF(float64) float64
112 | 	}
113 | 	if dist, ok := dist.(invCDF); ok {
114 | 		return dist.InvCDF
115 | 	}
116 | 
117 | 	// Otherwise, use a numerical algorithm.
118 | 	//
119 | 	// TODO: For discrete distributions, use the step size to
120 | 	// inform this computation.
121 | 	return func(y float64) (x float64) {
122 | 		const almostInf = 1e100
123 | 		const xtol = 1e-16
124 | 
125 | 		if y < 0 || y > 1 {
126 | 			return nan
127 | 		} else if y == 0 {
128 | 			l, _ := dist.Bounds()
129 | 			if dist.CDF(l) == 0 {
130 | 				// Finite support
131 | 				return l
132 | 			} else {
133 | 				// Infinite support
134 | 				return -inf
135 | 			}
136 | 		} else if y == 1 {
137 | 			_, h := dist.Bounds()
138 | 			if dist.CDF(h) == 1 {
139 | 				// Finite support
140 | 				return h
141 | 			} else {
142 | 				// Infinite support
143 | 				return inf
144 | 			}
145 | 		}
146 | 
147 | 		// Find loX, hiX for which cdf(loX) < y <= cdf(hiX).
148 | 		var loX, loY, hiX, hiY float64
149 | 		x1, y1 := 0.0, dist.CDF(0)
150 | 		xdelta := 1.0
151 | 		if y1 < y {
152 | 			hiX, hiY = x1, y1
153 | 			for hiY < y && hiX != inf {
154 | 				loX, loY, hiX = hiX, hiY, hiX+xdelta
155 | 				hiY = dist.CDF(hiX)
156 | 				xdelta *= 2
157 | 			}
158 | 		} else {
159 | 			loX, loY = x1, y1
160 | 			for y <= loY && loX != -inf {
161 | 				hiX, hiY, loX = loX, loY, loX-xdelta
162 | 				loY = dist.CDF(loX)
163 | 				xdelta *= 2
164 | 			}
165 | 		}
166 | 		if loX == -inf {
167 | 			return loX
168 | 		} else if hiX == inf {
169 | 			return hiX
170 | 		}
171 | 
172 | 		// Use bisection on the interval to find the smallest
173 | 		// x at which cdf(x) <= y.
174 | 		_, x = bisectBool(func(x float64) bool {
175 | 			return dist.CDF(x) < y
176 | 		}, loX, hiX, xtol)
177 | 		return
178 | 	}
179 | }
180 | 
181 | // Rand returns a random number generator that draws from the given
182 | // distribution. The returned generator takes an optional source of
183 | // randomness; if this is nil, it uses the default global source.
184 | //
185 | // If dist implements Rand(*rand.Rand) float64, Rand returns that
186 | // method. Otherwise, it returns a generic generator based on dist's
187 | // inverse CDF (which may in turn use an efficient implementation or a
188 | // generic numerical implementation; see InvCDF).
189 | func Rand(dist DistCommon) func(*rand.Rand) float64 {
190 | 	type distRand interface {
191 | 		Rand(*rand.Rand) float64
192 | 	}
193 | 	if dist, ok := dist.(distRand); ok {
194 | 		return dist.Rand
195 | 	}
196 | 
197 | 	// Otherwise, use a generic algorithm.
198 | 	inv := InvCDF(dist)
199 | 	return func(r *rand.Rand) float64 {
200 | 		var y float64
201 | 		for y == 0 {
202 | 			if r == nil {
203 | 				y = rand.Float64()
204 | 			} else {
205 | 				y = r.Float64()
206 | 			}
207 | 		}
208 | 		return inv(y)
209 | 	}
210 | }
211 | 


--------------------------------------------------------------------------------
/internal/go-moremath/stats/sample.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2015 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | package stats
  6 | 
  7 | import (
  8 | 	"math"
  9 | 	"sort"
 10 | 
 11 | 	"rsc.io/benchstat/internal/go-moremath/vec"
 12 | )
 13 | 
 14 | // Sample is a collection of possibly weighted data points.
 15 | type Sample struct {
 16 | 	// Xs is the slice of sample values.
 17 | 	Xs []float64
 18 | 
 19 | 	// Weights[i] is the weight of sample Xs[i].  If Weights is
 20 | 	// nil, all Xs have weight 1.  Weights must have the same
 21 | 	// length of Xs and all values must be non-negative.
 22 | 	Weights []float64
 23 | 
 24 | 	// Sorted indicates that Xs is sorted in ascending order.
 25 | 	Sorted bool
 26 | }
 27 | 
 28 | // Bounds returns the minimum and maximum values of xs.
 29 | func Bounds(xs []float64) (min float64, max float64) {
 30 | 	if len(xs) == 0 {
 31 | 		return math.NaN(), math.NaN()
 32 | 	}
 33 | 	min, max = xs[0], xs[0]
 34 | 	for _, x := range xs {
 35 | 		if x < min {
 36 | 			min = x
 37 | 		}
 38 | 		if x > max {
 39 | 			max = x
 40 | 		}
 41 | 	}
 42 | 	return
 43 | }
 44 | 
 45 | // Bounds returns the minimum and maximum values of the Sample.
 46 | //
 47 | // If the Sample is weighted, this ignores samples with zero weight.
 48 | //
 49 | // This is constant time if s.Sorted and there are no zero-weighted
 50 | // values.
 51 | func (s Sample) Bounds() (min float64, max float64) {
 52 | 	if len(s.Xs) == 0 || (!s.Sorted && s.Weights == nil) {
 53 | 		return Bounds(s.Xs)
 54 | 	}
 55 | 
 56 | 	if s.Sorted {
 57 | 		if s.Weights == nil {
 58 | 			return s.Xs[0], s.Xs[len(s.Xs)-1]
 59 | 		}
 60 | 		min, max = math.NaN(), math.NaN()
 61 | 		for i, w := range s.Weights {
 62 | 			if w != 0 {
 63 | 				min = s.Xs[i]
 64 | 				break
 65 | 			}
 66 | 		}
 67 | 		if math.IsNaN(min) {
 68 | 			return
 69 | 		}
 70 | 		for i := range s.Weights {
 71 | 			if s.Weights[len(s.Weights)-i-1] != 0 {
 72 | 				max = s.Xs[len(s.Weights)-i-1]
 73 | 				break
 74 | 			}
 75 | 		}
 76 | 	} else {
 77 | 		min, max = math.Inf(1), math.Inf(-1)
 78 | 		for i, x := range s.Xs {
 79 | 			w := s.Weights[i]
 80 | 			if x < min && w != 0 {
 81 | 				min = x
 82 | 			}
 83 | 			if x > max && w != 0 {
 84 | 				max = x
 85 | 			}
 86 | 		}
 87 | 		if math.IsInf(min, 0) {
 88 | 			min, max = math.NaN(), math.NaN()
 89 | 		}
 90 | 	}
 91 | 	return
 92 | }
 93 | 
 94 | // Sum returns the (possibly weighted) sum of the Sample.
 95 | func (s Sample) Sum() float64 {
 96 | 	if s.Weights == nil {
 97 | 		return vec.Sum(s.Xs)
 98 | 	}
 99 | 	sum := 0.0
100 | 	for i, x := range s.Xs {
101 | 		sum += x * s.Weights[i]
102 | 	}
103 | 	return sum
104 | }
105 | 
106 | // Weight returns the total weight of the Sasmple.
107 | func (s Sample) Weight() float64 {
108 | 	if s.Weights == nil {
109 | 		return float64(len(s.Xs))
110 | 	}
111 | 	return vec.Sum(s.Weights)
112 | }
113 | 
114 | // Mean returns the arithmetic mean of xs.
115 | func Mean(xs []float64) float64 {
116 | 	if len(xs) == 0 {
117 | 		return math.NaN()
118 | 	}
119 | 	m := 0.0
120 | 	for i, x := range xs {
121 | 		m += (x - m) / float64(i+1)
122 | 	}
123 | 	return m
124 | }
125 | 
126 | // Mean returns the arithmetic mean of the Sample.
127 | func (s Sample) Mean() float64 {
128 | 	if len(s.Xs) == 0 || s.Weights == nil {
129 | 		return Mean(s.Xs)
130 | 	}
131 | 
132 | 	m, wsum := 0.0, 0.0
133 | 	for i, x := range s.Xs {
134 | 		// Use weighted incremental mean:
135 | 		//   m_i = (1 - w_i/wsum_i) * m_(i-1) + (w_i/wsum_i) * x_i
136 | 		//       = m_(i-1) + (x_i - m_(i-1)) * (w_i/wsum_i)
137 | 		w := s.Weights[i]
138 | 		wsum += w
139 | 		m += (x - m) * w / wsum
140 | 	}
141 | 	return m
142 | }
143 | 
144 | // GeoMean returns the geometric mean of xs. xs must be positive.
145 | func GeoMean(xs []float64) float64 {
146 | 	if len(xs) == 0 {
147 | 		return math.NaN()
148 | 	}
149 | 	m := 0.0
150 | 	for i, x := range xs {
151 | 		if x <= 0 {
152 | 			return math.NaN()
153 | 		}
154 | 		lx := math.Log(x)
155 | 		m += (lx - m) / float64(i+1)
156 | 	}
157 | 	return math.Exp(m)
158 | }
159 | 
160 | // GeoMean returns the geometric mean of the Sample. All samples
161 | // values must be positive.
162 | func (s Sample) GeoMean() float64 {
163 | 	if len(s.Xs) == 0 || s.Weights == nil {
164 | 		return GeoMean(s.Xs)
165 | 	}
166 | 
167 | 	m, wsum := 0.0, 0.0
168 | 	for i, x := range s.Xs {
169 | 		w := s.Weights[i]
170 | 		wsum += w
171 | 		lx := math.Log(x)
172 | 		m += (lx - m) * w / wsum
173 | 	}
174 | 	return math.Exp(m)
175 | }
176 | 
177 | // Variance returns the sample variance of xs.
178 | func Variance(xs []float64) float64 {
179 | 	if len(xs) == 0 {
180 | 		return math.NaN()
181 | 	} else if len(xs) <= 1 {
182 | 		return 0
183 | 	}
184 | 
185 | 	// Based on Wikipedia's presentation of Welford 1962
186 | 	// (http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Online_algorithm).
187 | 	// This is more numerically stable than the standard two-pass
188 | 	// formula and not prone to massive cancellation.
189 | 	mean, M2 := 0.0, 0.0
190 | 	for n, x := range xs {
191 | 		delta := x - mean
192 | 		mean += delta / float64(n+1)
193 | 		M2 += delta * (x - mean)
194 | 	}
195 | 	return M2 / float64(len(xs)-1)
196 | }
197 | 
198 | func (s Sample) Variance() float64 {
199 | 	if len(s.Xs) == 0 || s.Weights == nil {
200 | 		return Variance(s.Xs)
201 | 	}
202 | 	// TODO(austin)
203 | 	panic("Weighted Variance not implemented")
204 | }
205 | 
206 | // StdDev returns the sample standard deviation of xs.
207 | func StdDev(xs []float64) float64 {
208 | 	return math.Sqrt(Variance(xs))
209 | }
210 | 
211 | // StdDev returns the sample standard deviation of the Sample.
212 | func (s Sample) StdDev() float64 {
213 | 	if len(s.Xs) == 0 || s.Weights == nil {
214 | 		return StdDev(s.Xs)
215 | 	}
216 | 	// TODO(austin)
217 | 	panic("Weighted StdDev not implemented")
218 | }
219 | 
220 | // Percentile returns the pctileth value from the Sample. This uses
221 | // interpolation method R8 from Hyndman and Fan (1996).
222 | //
223 | // pctile will be capped to the range [0, 1]. If len(xs) == 0 or all
224 | // weights are 0, returns NaN.
225 | //
226 | // Percentile(0.5) is the median. Percentile(0.25) and
227 | // Percentile(0.75) are the first and third quartiles, respectively.
228 | //
229 | // This is constant time if s.Sorted and s.Weights == nil.
230 | func (s Sample) Percentile(pctile float64) float64 {
231 | 	if len(s.Xs) == 0 {
232 | 		return math.NaN()
233 | 	} else if pctile <= 0 {
234 | 		min, _ := s.Bounds()
235 | 		return min
236 | 	} else if pctile >= 1 {
237 | 		_, max := s.Bounds()
238 | 		return max
239 | 	}
240 | 
241 | 	if !s.Sorted {
242 | 		// TODO(austin) Use select algorithm instead
243 | 		s = *s.Copy().Sort()
244 | 	}
245 | 
246 | 	if s.Weights == nil {
247 | 		N := float64(len(s.Xs))
248 | 		//n := pctile * (N + 1) // R6
249 | 		n := 1/3.0 + pctile*(N+1/3.0) // R8
250 | 		kf, frac := math.Modf(n)
251 | 		k := int(kf)
252 | 		if k <= 0 {
253 | 			return s.Xs[0]
254 | 		} else if k >= len(s.Xs) {
255 | 			return s.Xs[len(s.Xs)-1]
256 | 		}
257 | 		return s.Xs[k-1] + frac*(s.Xs[k]-s.Xs[k-1])
258 | 	} else {
259 | 		// TODO(austin): Implement interpolation
260 | 
261 | 		target := s.Weight() * pctile
262 | 
263 | 		// TODO(austin) If we had cumulative weights, we could
264 | 		// do this in log time.
265 | 		for i, weight := range s.Weights {
266 | 			target -= weight
267 | 			if target < 0 {
268 | 				return s.Xs[i]
269 | 			}
270 | 		}
271 | 		return s.Xs[len(s.Xs)-1]
272 | 	}
273 | }
274 | 
275 | // IQR returns the interquartile range of the Sample.
276 | //
277 | // This is constant time if s.Sorted and s.Weights == nil.
278 | func (s Sample) IQR() float64 {
279 | 	if !s.Sorted {
280 | 		s = *s.Copy().Sort()
281 | 	}
282 | 	return s.Percentile(0.75) - s.Percentile(0.25)
283 | }
284 | 
285 | type sampleSorter struct {
286 | 	xs      []float64
287 | 	weights []float64
288 | }
289 | 
290 | func (p *sampleSorter) Len() int {
291 | 	return len(p.xs)
292 | }
293 | 
294 | func (p *sampleSorter) Less(i, j int) bool {
295 | 	return p.xs[i] < p.xs[j]
296 | }
297 | 
298 | func (p *sampleSorter) Swap(i, j int) {
299 | 	p.xs[i], p.xs[j] = p.xs[j], p.xs[i]
300 | 	p.weights[i], p.weights[j] = p.weights[j], p.weights[i]
301 | }
302 | 
303 | // Sort sorts the samples in place in s and returns s.
304 | //
305 | // A sorted sample improves the performance of some algorithms.
306 | func (s *Sample) Sort() *Sample {
307 | 	if s.Sorted || sort.Float64sAreSorted(s.Xs) {
308 | 		// All set
309 | 	} else if s.Weights == nil {
310 | 		sort.Float64s(s.Xs)
311 | 	} else {
312 | 		sort.Sort(&sampleSorter{s.Xs, s.Weights})
313 | 	}
314 | 	s.Sorted = true
315 | 	return s
316 | }
317 | 
318 | // Copy returns a copy of the Sample.
319 | //
320 | // The returned Sample shares no data with the original, so they can
321 | // be modified (for example, sorted) independently.
322 | func (s Sample) Copy() *Sample {
323 | 	xs := make([]float64, len(s.Xs))
324 | 	copy(xs, s.Xs)
325 | 
326 | 	weights := []float64(nil)
327 | 	if s.Weights != nil {
328 | 		weights = make([]float64, len(s.Weights))
329 | 		copy(weights, s.Weights)
330 | 	}
331 | 
332 | 	return &Sample{xs, weights, s.Sorted}
333 | }
334 | 


--------------------------------------------------------------------------------
/internal/go-moremath/stats/utest.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2015 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | package stats
  6 | 
  7 | import (
  8 | 	"math"
  9 | 	"sort"
 10 | 
 11 | 	"rsc.io/benchstat/internal/go-moremath/mathx"
 12 | )
 13 | 
 14 | // A LocationHypothesis specifies the alternative hypothesis of a
 15 | // location test such as a t-test or a Mann-Whitney U-test. The
 16 | // default (zero) value is to test against the alternative hypothesis
 17 | // that they differ.
 18 | type LocationHypothesis int
 19 | 
 20 | //go:generate stringer -type LocationHypothesis
 21 | 
 22 | const (
 23 | 	// LocationLess specifies the alternative hypothesis that the
 24 | 	// location of the first sample is less than the second. This
 25 | 	// is a one-tailed test.
 26 | 	LocationLess LocationHypothesis = -1
 27 | 
 28 | 	// LocationDiffers specifies the alternative hypothesis that
 29 | 	// the locations of the two samples are not equal. This is a
 30 | 	// two-tailed test.
 31 | 	LocationDiffers LocationHypothesis = 0
 32 | 
 33 | 	// LocationGreater specifies the alternative hypothesis that
 34 | 	// the location of the first sample is greater than the
 35 | 	// second. This is a one-tailed test.
 36 | 	LocationGreater LocationHypothesis = 1
 37 | )
 38 | 
 39 | // A MannWhitneyUTestResult is the result of a Mann-Whitney U-test.
 40 | type MannWhitneyUTestResult struct {
 41 | 	// N1 and N2 are the sizes of the input samples.
 42 | 	N1, N2 int
 43 | 
 44 | 	// U is the value of the Mann-Whitney U statistic for this
 45 | 	// test, generalized by counting ties as 0.5.
 46 | 	//
 47 | 	// Given the Cartesian product of the two samples, this is the
 48 | 	// number of pairs in which the value from the first sample is
 49 | 	// greater than the value of the second, plus 0.5 times the
 50 | 	// number of pairs where the values from the two samples are
 51 | 	// equal. Hence, U is always an integer multiple of 0.5 (it is
 52 | 	// a whole integer if there are no ties) in the range [0, N1*N2].
 53 | 	//
 54 | 	// U statistics always come in pairs, depending on which
 55 | 	// sample is "first". The mirror U for the other sample can be
 56 | 	// calculated as N1*N2 - U.
 57 | 	//
 58 | 	// There are many equivalent statistics with slightly
 59 | 	// different definitions. The Wilcoxon (1945) W statistic
 60 | 	// (generalized for ties) is U + (N1(N1+1))/2. It is also
 61 | 	// common to use 2U to eliminate the half steps and Smid
 62 | 	// (1956) uses N1*N2 - 2U to additionally center the
 63 | 	// distribution.
 64 | 	U float64
 65 | 
 66 | 	// AltHypothesis specifies the alternative hypothesis tested
 67 | 	// by this test against the null hypothesis that there is no
 68 | 	// difference in the locations of the samples.
 69 | 	AltHypothesis LocationHypothesis
 70 | 
 71 | 	// P is the p-value of the Mann-Whitney test for the given
 72 | 	// null hypothesis.
 73 | 	P float64
 74 | }
 75 | 
 76 | // MannWhitneyExactLimit gives the largest sample size for which the
 77 | // exact U distribution will be used for the Mann-Whitney U-test.
 78 | //
 79 | // Using the exact distribution is necessary for small sample sizes
 80 | // because the distribution is highly irregular. However, computing
 81 | // the distribution for large sample sizes is both computationally
 82 | // expensive and unnecessary because it quickly approaches a normal
 83 | // approximation. Computing the distribution for two 50 value samples
 84 | // takes a few milliseconds on a 2014 laptop.
 85 | var MannWhitneyExactLimit = 50
 86 | 
 87 | // MannWhitneyTiesExactLimit gives the largest sample size for which
 88 | // the exact U distribution will be used for the Mann-Whitney U-test
 89 | // in the presence of ties.
 90 | //
 91 | // Computing this distribution is more expensive than computing the
 92 | // distribution without ties, so this is set lower. Computing this
 93 | // distribution for two 25 value samples takes about ten milliseconds
 94 | // on a 2014 laptop.
 95 | var MannWhitneyTiesExactLimit = 25
 96 | 
 97 | // MannWhitneyUTest performs a Mann-Whitney U-test [1,2] of the null
 98 | // hypothesis that two samples come from the same population against
 99 | // the alternative hypothesis that one sample tends to have larger or
100 | // smaller values than the other.
101 | //
102 | // This is similar to a t-test, but unlike the t-test, the
103 | // Mann-Whitney U-test is non-parametric (it does not assume a normal
104 | // distribution). It has very slightly lower efficiency than the
105 | // t-test on normal distributions.
106 | //
107 | // Computing the exact U distribution is expensive for large sample
108 | // sizes, so this uses a normal approximation for sample sizes larger
109 | // than MannWhitneyExactLimit if there are no ties or
110 | // MannWhitneyTiesExactLimit if there are ties. This normal
111 | // approximation uses both the tie correction and the continuity
112 | // correction.
113 | //
114 | // This can fail with ErrSampleSize if either sample is empty or
115 | // ErrSamplesEqual if all sample values are equal.
116 | //
117 | // This is also known as a Mann-Whitney-Wilcoxon test and is
118 | // equivalent to the Wilcoxon rank-sum test, though the Wilcoxon
119 | // rank-sum test differs in nomenclature.
120 | //
121 | // [1] Mann, Henry B.; Whitney, Donald R. (1947). "On a Test of
122 | // Whether one of Two Random Variables is Stochastically Larger than
123 | // the Other". Annals of Mathematical Statistics 18 (1): 50–60.
124 | //
125 | // [2] Klotz, J. H. (1966). "The Wilcoxon, Ties, and the Computer".
126 | // Journal of the American Statistical Association 61 (315): 772-787.
127 | func MannWhitneyUTest(x1, x2 []float64, alt LocationHypothesis) (*MannWhitneyUTestResult, error) {
128 | 	n1, n2 := len(x1), len(x2)
129 | 	if n1 == 0 || n2 == 0 {
130 | 		return nil, ErrSampleSize
131 | 	}
132 | 
133 | 	// Compute the U statistic and tie vector T.
134 | 	x1 = append([]float64(nil), x1...)
135 | 	x2 = append([]float64(nil), x2...)
136 | 	sort.Float64s(x1)
137 | 	sort.Float64s(x2)
138 | 	merged, labels := labeledMerge(x1, x2)
139 | 
140 | 	R1 := 0.0
141 | 	T, hasTies := []int{}, false
142 | 	for i := 0; i < len(merged); {
143 | 		rank1, nx1, v1 := i+1, 0, merged[i]
144 | 		// Consume samples that tie this sample (including itself).
145 | 		for ; i < len(merged) && merged[i] == v1; i++ {
146 | 			if labels[i] == 1 {
147 | 				nx1++
148 | 			}
149 | 		}
150 | 		// Assign all tied samples the average rank of the
151 | 		// samples, where merged[0] has rank 1.
152 | 		if nx1 != 0 {
153 | 			rank := float64(i+rank1) / 2
154 | 			R1 += rank * float64(nx1)
155 | 		}
156 | 		T = append(T, i-rank1+1)
157 | 		if i > rank1 {
158 | 			hasTies = true
159 | 		}
160 | 	}
161 | 	U1 := R1 - float64(n1*(n1+1))/2
162 | 
163 | 	// Compute the smaller of U1 and U2
164 | 	U2 := float64(n1*n2) - U1
165 | 	Usmall := math.Min(U1, U2)
166 | 
167 | 	var p float64
168 | 	if !hasTies && n1 <= MannWhitneyExactLimit && n2 <= MannWhitneyExactLimit ||
169 | 		hasTies && n1 <= MannWhitneyTiesExactLimit && n2 <= MannWhitneyTiesExactLimit {
170 | 		// Use exact U distribution. U1 will be an integer.
171 | 		if len(T) == 1 {
172 | 			// All values are equal. Test is meaningless.
173 | 			return nil, ErrSamplesEqual
174 | 		}
175 | 
176 | 		dist := UDist{N1: n1, N2: n2, T: T}
177 | 		switch alt {
178 | 		case LocationDiffers:
179 | 			if U1 == U2 {
180 | 				// The distribution is symmetric about
181 | 				// Usmall. Since the distribution is
182 | 				// discrete, the CDF is discontinuous
183 | 				// and if simply double CDF(Usmall),
184 | 				// we'll double count the
185 | 				// (non-infinitesimal) probability
186 | 				// mass at Usmall. What we want is
187 | 				// just the integral of the whole CDF,
188 | 				// which is 1.
189 | 				p = 1
190 | 			} else {
191 | 				p = dist.CDF(Usmall) * 2
192 | 			}
193 | 
194 | 		case LocationLess:
195 | 			p = dist.CDF(U1)
196 | 
197 | 		case LocationGreater:
198 | 			p = 1 - dist.CDF(U1-1)
199 | 		}
200 | 	} else {
201 | 		// Use normal approximation (with tie and continuity
202 | 		// correction).
203 | 		t := tieCorrection(T)
204 | 		N := float64(n1 + n2)
205 | 		μ_U := float64(n1*n2) / 2
206 | 		σ_U := math.Sqrt(float64(n1*n2) * ((N + 1) - t/(N*(N-1))) / 12)
207 | 		if σ_U == 0 {
208 | 			return nil, ErrSamplesEqual
209 | 		}
210 | 		numer := U1 - μ_U
211 | 		// Perform continuity correction.
212 | 		switch alt {
213 | 		case LocationDiffers:
214 | 			numer -= mathx.Sign(numer) * 0.5
215 | 		case LocationLess:
216 | 			numer += 0.5
217 | 		case LocationGreater:
218 | 			numer -= 0.5
219 | 		}
220 | 		z := numer / σ_U
221 | 		switch alt {
222 | 		case LocationDiffers:
223 | 			p = 2 * math.Min(StdNormal.CDF(z), 1-StdNormal.CDF(z))
224 | 		case LocationLess:
225 | 			p = StdNormal.CDF(z)
226 | 		case LocationGreater:
227 | 			p = 1 - StdNormal.CDF(z)
228 | 		}
229 | 	}
230 | 
231 | 	return &MannWhitneyUTestResult{N1: n1, N2: n2, U: U1,
232 | 		AltHypothesis: alt, P: p}, nil
233 | }
234 | 
235 | // labeledMerge merges sorted lists x1 and x2 into sorted list merged.
236 | // labels[i] is 1 or 2 depending on whether merged[i] is a value from
237 | // x1 or x2, respectively.
238 | func labeledMerge(x1, x2 []float64) (merged []float64, labels []byte) {
239 | 	merged = make([]float64, len(x1)+len(x2))
240 | 	labels = make([]byte, len(x1)+len(x2))
241 | 
242 | 	i, j, o := 0, 0, 0
243 | 	for i < len(x1) && j < len(x2) {
244 | 		if x1[i] < x2[j] {
245 | 			merged[o] = x1[i]
246 | 			labels[o] = 1
247 | 			i++
248 | 		} else {
249 | 			merged[o] = x2[j]
250 | 			labels[o] = 2
251 | 			j++
252 | 		}
253 | 		o++
254 | 	}
255 | 	for ; i < len(x1); i++ {
256 | 		merged[o] = x1[i]
257 | 		labels[o] = 1
258 | 		o++
259 | 	}
260 | 	for ; j < len(x2); j++ {
261 | 		merged[o] = x2[j]
262 | 		labels[o] = 2
263 | 		o++
264 | 	}
265 | 	return
266 | }
267 | 
268 | // tieCorrection computes the tie correction factor Σ_j (t_j³ - t_j)
269 | // where t_j is the number of ties in the j'th rank.
270 | func tieCorrection(ties []int) float64 {
271 | 	t := 0
272 | 	for _, tie := range ties {
273 | 		t += tie*tie*tie - tie
274 | 	}
275 | 	return float64(t)
276 | }
277 | 


--------------------------------------------------------------------------------
/internal/go-moremath/stats/udist_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2015 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | package stats
  6 | 
  7 | import (
  8 | 	"fmt"
  9 | 	"math"
 10 | 	"testing"
 11 | 
 12 | 	"rsc.io/benchstat/internal/go-moremath/mathx"
 13 | )
 14 | 
 15 | func aeqTable(a, b [][]float64) bool {
 16 | 	if len(a) != len(b) {
 17 | 		return false
 18 | 	}
 19 | 	for i := range a {
 20 | 		if len(a[i]) != len(b[i]) {
 21 | 			return false
 22 | 		}
 23 | 		for j := range a[i] {
 24 | 			// "%f" precision
 25 | 			if math.Abs(a[i][j]-b[i][j]) >= 0.000001 {
 26 | 				return false
 27 | 			}
 28 | 		}
 29 | 	}
 30 | 	return true
 31 | }
 32 | 
 33 | // U distribution for N=3 up to U=5.
 34 | var udist3 = [][]float64{
 35 | 	//    m=1         2         3
 36 | 	{0.250000, 0.100000, 0.050000}, // U=0
 37 | 	{0.500000, 0.200000, 0.100000}, // U=1
 38 | 	{0.750000, 0.400000, 0.200000}, // U=2
 39 | 	{1.000000, 0.600000, 0.350000}, // U=3
 40 | 	{1.000000, 0.800000, 0.500000}, // U=4
 41 | 	{1.000000, 0.900000, 0.650000}, // U=5
 42 | }
 43 | 
 44 | // U distribution for N=5 up to U=5.
 45 | var udist5 = [][]float64{
 46 | 	//    m=1         2         3         4         5
 47 | 	{0.166667, 0.047619, 0.017857, 0.007937, 0.003968}, // U=0
 48 | 	{0.333333, 0.095238, 0.035714, 0.015873, 0.007937}, // U=1
 49 | 	{0.500000, 0.190476, 0.071429, 0.031746, 0.015873}, // U=2
 50 | 	{0.666667, 0.285714, 0.125000, 0.055556, 0.027778}, // U=3
 51 | 	{0.833333, 0.428571, 0.196429, 0.095238, 0.047619}, // U=4
 52 | 	{1.000000, 0.571429, 0.285714, 0.142857, 0.075397}, // U=5
 53 | }
 54 | 
 55 | func TestUDist(t *testing.T) {
 56 | 	makeTable := func(n int) [][]float64 {
 57 | 		out := make([][]float64, 6)
 58 | 		for U := 0; U < 6; U++ {
 59 | 			out[U] = make([]float64, n)
 60 | 			for m := 1; m <= n; m++ {
 61 | 				out[U][m-1] = UDist{N1: m, N2: n}.CDF(float64(U))
 62 | 			}
 63 | 		}
 64 | 		return out
 65 | 	}
 66 | 	fmtTable := func(a [][]float64) string {
 67 | 		out := fmt.Sprintf("%8s", "m=")
 68 | 		for m := 1; m <= len(a[0]); m++ {
 69 | 			out += fmt.Sprintf("%9d", m)
 70 | 		}
 71 | 		out += "\n"
 72 | 
 73 | 		for U, row := range a {
 74 | 			out += fmt.Sprintf("U=%-6d", U)
 75 | 			for m := 1; m <= len(a[0]); m++ {
 76 | 				out += fmt.Sprintf(" %f", row[m-1])
 77 | 			}
 78 | 			out += "\n"
 79 | 		}
 80 | 		return out
 81 | 	}
 82 | 
 83 | 	// Compare against tables given in Mann, Whitney (1947).
 84 | 	got3 := makeTable(3)
 85 | 	if !aeqTable(got3, udist3) {
 86 | 		t.Errorf("For n=3, want:\n%sgot:\n%s", fmtTable(udist3), fmtTable(got3))
 87 | 	}
 88 | 
 89 | 	got5 := makeTable(5)
 90 | 	if !aeqTable(got5, udist5) {
 91 | 		t.Errorf("For n=5, want:\n%sgot:\n%s", fmtTable(udist5), fmtTable(got5))
 92 | 	}
 93 | }
 94 | 
 95 | func BenchmarkUDist(b *testing.B) {
 96 | 	for i := 0; i < b.N; i++ {
 97 | 		// R uses the exact distribution up to N=50.
 98 | 		// N*M/2=1250 is the hardest point to get the CDF for.
 99 | 		UDist{N1: 50, N2: 50}.CDF(1250)
100 | 	}
101 | }
102 | 
103 | func TestUDistTies(t *testing.T) {
104 | 	makeTable := func(m, N int, t []int, minx, maxx float64) [][]float64 {
105 | 		out := [][]float64{}
106 | 		dist := UDist{N1: m, N2: N - m, T: t}
107 | 		for x := minx; x <= maxx; x += 0.5 {
108 | 			// Convert x from uQt' to uQv'.
109 | 			U := x - float64(m*m)/2
110 | 			P := dist.CDF(U)
111 | 			if len(out) == 0 || !aeq(out[len(out)-1][1], P) {
112 | 				out = append(out, []float64{x, P})
113 | 			}
114 | 		}
115 | 		return out
116 | 	}
117 | 	fmtTable := func(table [][]float64) string {
118 | 		out := ""
119 | 		for _, row := range table {
120 | 			out += fmt.Sprintf("%5.1f %f\n", row[0], row[1])
121 | 		}
122 | 		return out
123 | 	}
124 | 
125 | 	// Compare against Table 1 from Klotz (1966).
126 | 	got := makeTable(5, 10, []int{1, 1, 2, 1, 1, 2, 1, 1}, 12.5, 19.5)
127 | 	want := [][]float64{
128 | 		{12.5, 0.003968}, {13.5, 0.007937},
129 | 		{15.0, 0.023810}, {16.5, 0.047619},
130 | 		{17.5, 0.071429}, {18.0, 0.087302},
131 | 		{19.0, 0.134921}, {19.5, 0.138889},
132 | 	}
133 | 	if !aeqTable(got, want) {
134 | 		t.Errorf("Want:\n%sgot:\n%s", fmtTable(want), fmtTable(got))
135 | 	}
136 | 
137 | 	got = makeTable(10, 21, []int{6, 5, 4, 3, 2, 1}, 52, 87)
138 | 	want = [][]float64{
139 | 		{52.0, 0.000014}, {56.5, 0.000128},
140 | 		{57.5, 0.000145}, {60.0, 0.000230},
141 | 		{61.0, 0.000400}, {62.0, 0.000740},
142 | 		{62.5, 0.000797}, {64.0, 0.000825},
143 | 		{64.5, 0.001165}, {65.5, 0.001477},
144 | 		{66.5, 0.002498}, {67.0, 0.002725},
145 | 		{67.5, 0.002895}, {68.0, 0.003150},
146 | 		{68.5, 0.003263}, {69.0, 0.003518},
147 | 		{69.5, 0.003603}, {70.0, 0.005648},
148 | 		{70.5, 0.005818}, {71.0, 0.006626},
149 | 		{71.5, 0.006796}, {72.0, 0.008157},
150 | 		{72.5, 0.009688}, {73.0, 0.009801},
151 | 		{73.5, 0.010430}, {74.0, 0.011111},
152 | 		{74.5, 0.014230}, {75.0, 0.014612},
153 | 		{75.5, 0.017249}, {76.0, 0.018307},
154 | 		{76.5, 0.020178}, {77.0, 0.022270},
155 | 		{77.5, 0.023189}, {78.0, 0.026931},
156 | 		{78.5, 0.028207}, {79.0, 0.029979},
157 | 		{79.5, 0.030931}, {80.0, 0.038969},
158 | 		{80.5, 0.043063}, {81.0, 0.044262},
159 | 		{81.5, 0.046389}, {82.0, 0.049581},
160 | 		{82.5, 0.056300}, {83.0, 0.058027},
161 | 		{83.5, 0.063669}, {84.0, 0.067454},
162 | 		{84.5, 0.074122}, {85.0, 0.077425},
163 | 		{85.5, 0.083498}, {86.0, 0.094079},
164 | 		{86.5, 0.096693}, {87.0, 0.101132},
165 | 	}
166 | 	if !aeqTable(got, want) {
167 | 		t.Errorf("Want:\n%sgot:\n%s", fmtTable(want), fmtTable(got))
168 | 	}
169 | 
170 | 	got = makeTable(8, 16, []int{2, 2, 2, 2, 2, 2, 2, 2}, 32, 54)
171 | 	want = [][]float64{
172 | 		{32.0, 0.000078}, {34.0, 0.000389},
173 | 		{36.0, 0.001088}, {38.0, 0.002642},
174 | 		{40.0, 0.005905}, {42.0, 0.011500},
175 | 		{44.0, 0.021057}, {46.0, 0.035664},
176 | 		{48.0, 0.057187}, {50.0, 0.086713},
177 | 		{52.0, 0.126263}, {54.0, 0.175369},
178 | 	}
179 | 	if !aeqTable(got, want) {
180 | 		t.Errorf("Want:\n%sgot:\n%s", fmtTable(want), fmtTable(got))
181 | 	}
182 | 
183 | 	// Check remaining tables from Klotz against the reference
184 | 	// implementation.
185 | 	checkRef := func(n1 int, tie []int) {
186 | 		wantPMF1, wantCDF1 := udistRef(n1, tie)
187 | 
188 | 		dist := UDist{N1: n1, N2: sumint(tie) - n1, T: tie}
189 | 		gotPMF, wantPMF := [][]float64{}, [][]float64{}
190 | 		gotCDF, wantCDF := [][]float64{}, [][]float64{}
191 | 		N := sumint(tie)
192 | 		for U := 0.0; U <= float64(n1*(N-n1)); U += 0.5 {
193 | 			gotPMF = append(gotPMF, []float64{U, dist.PMF(U)})
194 | 			gotCDF = append(gotCDF, []float64{U, dist.CDF(U)})
195 | 			wantPMF = append(wantPMF, []float64{U, wantPMF1[int(U*2)]})
196 | 			wantCDF = append(wantCDF, []float64{U, wantCDF1[int(U*2)]})
197 | 		}
198 | 		if !aeqTable(wantPMF, gotPMF) {
199 | 			t.Errorf("For PMF of n1=%v, t=%v, want:\n%sgot:\n%s", n1, tie, fmtTable(wantPMF), fmtTable(gotPMF))
200 | 		}
201 | 		if !aeqTable(wantCDF, gotCDF) {
202 | 			t.Errorf("For CDF of n1=%v, t=%v, want:\n%sgot:\n%s", n1, tie, fmtTable(wantCDF), fmtTable(gotCDF))
203 | 		}
204 | 	}
205 | 	checkRef(5, []int{1, 1, 2, 1, 1, 2, 1, 1})
206 | 	checkRef(5, []int{1, 1, 2, 1, 1, 1, 2, 1})
207 | 	checkRef(5, []int{1, 3, 1, 2, 1, 1, 1})
208 | 	checkRef(8, []int{1, 2, 1, 1, 1, 1, 2, 2, 1, 2})
209 | 	checkRef(12, []int{3, 3, 4, 3, 4, 5})
210 | 	checkRef(10, []int{1, 2, 3, 4, 5, 6})
211 | }
212 | 
213 | func BenchmarkUDistTies(b *testing.B) {
214 | 	// Worst case: just one tie.
215 | 	n := 20
216 | 	t := make([]int, 2*n-1)
217 | 	for i := range t {
218 | 		t[i] = 1
219 | 	}
220 | 	t[0] = 2
221 | 
222 | 	for i := 0; i < b.N; i++ {
223 | 		UDist{N1: n, N2: n, T: t}.CDF(float64(n*n) / 2)
224 | 	}
225 | }
226 | 
227 | func XTestPrintUmemo(t *testing.T) {
228 | 	// Reproduce table from Cheung, Klotz.
229 | 	ties := []int{4, 5, 3, 4, 6}
230 | 	printUmemo(makeUmemo(80, 10, ties), ties)
231 | }
232 | 
233 | // udistRef computes the PMF and CDF of the U distribution for two
234 | // samples of sizes n1 and sum(t)-n1 with tie vector t. The returned
235 | // pmf and cdf are indexed by 2*U.
236 | //
237 | // This uses the "graphical method" of Klotz (1966). It is very slow
238 | // (Θ(∏ (t[i]+1)) = Ω(2^|t|)), but very correct, and hence useful as a
239 | // reference for testing faster implementations.
240 | func udistRef(n1 int, t []int) (pmf, cdf []float64) {
241 | 	// Enumerate all u vectors for which 0 <= u_i <= t_i. Count
242 | 	// the number of permutations of two samples of sizes n1 and
243 | 	// sum(t)-n1 with tie vector t and accumulate these counts by
244 | 	// their U statistics in count[2*U].
245 | 	counts := make([]int, 1+2*n1*(sumint(t)-n1))
246 | 
247 | 	u := make([]int, len(t))
248 | 	u[0] = -1 // Get enumeration started.
249 | enumu:
250 | 	for {
251 | 		// Compute the next u vector.
252 | 		u[0]++
253 | 		for i := 0; i < len(u) && u[i] > t[i]; i++ {
254 | 			if i == len(u)-1 {
255 | 				// All u vectors have been enumerated.
256 | 				break enumu
257 | 			}
258 | 			// Carry.
259 | 			u[i+1]++
260 | 			u[i] = 0
261 | 		}
262 | 
263 | 		// Is this a legal u vector?
264 | 		if sumint(u) != n1 {
265 | 			// Klotz (1966) has a method for directly
266 | 			// enumerating legal u vectors, but the point
267 | 			// of this is to be correct, not fast.
268 | 			continue
269 | 		}
270 | 
271 | 		// Compute 2*U statistic for this u vector.
272 | 		twoU, vsum := 0, 0
273 | 		for i, u_i := range u {
274 | 			v_i := t[i] - u_i
275 | 			// U = U + vsum*u_i + u_i*v_i/2
276 | 			twoU += 2*vsum*u_i + u_i*v_i
277 | 			vsum += v_i
278 | 		}
279 | 
280 | 		// Compute Π choose(t_i, u_i). This is the number of
281 | 		// ways of permuting the input sample under u.
282 | 		prod := 1
283 | 		for i, u_i := range u {
284 | 			prod *= int(mathx.Choose(t[i], u_i) + 0.5)
285 | 		}
286 | 
287 | 		// Accumulate the permutations on this u path.
288 | 		counts[twoU] += prod
289 | 
290 | 		if false {
291 | 			// Print a table in the form of Klotz's
292 | 			// "direct enumeration" example.
293 | 			//
294 | 			// Convert 2U = 2UQV' to UQt' used in Klotz
295 | 			// examples.
296 | 			UQt := float64(twoU)/2 + float64(n1*n1)/2
297 | 			fmt.Printf("%+v %f %-2d\n", u, UQt, prod)
298 | 		}
299 | 	}
300 | 
301 | 	// Convert counts into probabilities for PMF and CDF.
302 | 	pmf = make([]float64, len(counts))
303 | 	cdf = make([]float64, len(counts))
304 | 	total := int(mathx.Choose(sumint(t), n1) + 0.5)
305 | 	for i, count := range counts {
306 | 		pmf[i] = float64(count) / float64(total)
307 | 		if i > 0 {
308 | 			cdf[i] = cdf[i-1]
309 | 		}
310 | 		cdf[i] += pmf[i]
311 | 	}
312 | 	return
313 | }
314 | 
315 | // printUmemo prints the output of makeUmemo for debugging.
316 | func printUmemo(A []map[ukey]float64, t []int) {
317 | 	fmt.Printf("K\tn1\t2*U\tpr\n")
318 | 	for K := len(A) - 1; K >= 0; K-- {
319 | 		for i, pr := range A[K] {
320 | 			_, ref := udistRef(i.n1, t[:K])
321 | 			fmt.Printf("%v\t%v\t%v\t%v\t%v\n", K, i.n1, i.twoU, pr, ref[i.twoU])
322 | 		}
323 | 	}
324 | }
325 | 


--------------------------------------------------------------------------------
/internal/go-moremath/stats/kde.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2015 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | package stats
  6 | 
  7 | import (
  8 | 	"fmt"
  9 | 	"math"
 10 | )
 11 | 
 12 | // A KDE is a distribution that estimates the underlying distribution
 13 | // of a Sample using kernel density estimation.
 14 | //
 15 | // Kernel density estimation is a method for constructing an estimate
 16 | // ƒ̂(x) of a unknown distribution ƒ(x) given a sample from that
 17 | // distribution. Unlike many techniques, kernel density estimation is
 18 | // non-parametric: in general, it doesn't assume any particular true
 19 | // distribution (note, however, that the resulting distribution
 20 | // depends deeply on the selected bandwidth, and many bandwidth
 21 | // estimation techniques assume normal reference rules).
 22 | //
 23 | // A kernel density estimate is similar to a histogram, except that it
 24 | // is a smooth probability estimate and does not require choosing a
 25 | // bin size and discretizing the data.
 26 | //
 27 | // Sample is the only required field. All others have reasonable
 28 | // defaults.
 29 | type KDE struct {
 30 | 	// Sample is the data sample underlying this KDE.
 31 | 	Sample Sample
 32 | 
 33 | 	// Kernel is the kernel to use for the KDE.
 34 | 	Kernel KDEKernel
 35 | 
 36 | 	// Bandwidth is the bandwidth to use for the KDE.
 37 | 	//
 38 | 	// If this is zero, the bandwidth is computed from the
 39 | 	// provided data using a default bandwidth estimator
 40 | 	// (currently BandwidthScott).
 41 | 	Bandwidth float64
 42 | 
 43 | 	// BoundaryMethod is the boundary correction method to use for
 44 | 	// the KDE. The default value is BoundaryReflect; however, the
 45 | 	// default bounds are effectively +/-inf, which is equivalent
 46 | 	// to performing no boundary correction.
 47 | 	BoundaryMethod KDEBoundaryMethod
 48 | 
 49 | 	// [BoundaryMin, BoundaryMax) specify a bounded support for
 50 | 	// the KDE. If both are 0 (their default values), they are
 51 | 	// treated as +/-inf.
 52 | 	//
 53 | 	// To specify a half-bounded support, set Min to math.Inf(-1)
 54 | 	// or Max to math.Inf(1).
 55 | 	BoundaryMin float64
 56 | 	BoundaryMax float64
 57 | }
 58 | 
 59 | // BandwidthSilverman is a bandwidth estimator implementing
 60 | // Silverman's Rule of Thumb. It's fast, but not very robust to
 61 | // outliers as it assumes data is approximately normal.
 62 | //
 63 | // Silverman, B. W. (1986) Density Estimation.
 64 | func BandwidthSilverman(data interface {
 65 | 	StdDev() float64
 66 | 	Weight() float64
 67 | }) float64 {
 68 | 	return 1.06 * data.StdDev() * math.Pow(data.Weight(), -1.0/5)
 69 | }
 70 | 
 71 | // BandwidthScott is a bandwidth estimator implementing Scott's Rule.
 72 | // This is generally robust to outliers: it chooses the minimum
 73 | // between the sample's standard deviation and an robust estimator of
 74 | // a Gaussian distribution's standard deviation.
 75 | //
 76 | // Scott, D. W. (1992) Multivariate Density Estimation: Theory,
 77 | // Practice, and Visualization.
 78 | func BandwidthScott(data interface {
 79 | 	StdDev() float64
 80 | 	Weight() float64
 81 | 	Percentile(float64) float64
 82 | }) float64 {
 83 | 	iqr := data.Percentile(0.75) - data.Percentile(0.25)
 84 | 	hScale := 1.06 * math.Pow(data.Weight(), -1.0/5)
 85 | 	stdDev := data.StdDev()
 86 | 	if stdDev < iqr/1.349 {
 87 | 		// Use Silverman's Rule of Thumb
 88 | 		return hScale * stdDev
 89 | 	} else {
 90 | 		// Use IQR/1.349 as a robust estimator of the standard
 91 | 		// deviation of a Gaussian distribution.
 92 | 		return hScale * (iqr / 1.349)
 93 | 	}
 94 | }
 95 | 
 96 | // TODO(austin) Implement bandwidth estimator from Botev, Grotowski,
 97 | // Kroese. (2010) Kernel Density Estimation via Diffusion.
 98 | 
 99 | // KDEKernel represents a kernel to use for a KDE.
100 | type KDEKernel int
101 | 
102 | //go:generate stringer -type=KDEKernel
103 | 
104 | const (
105 | 	// An EpanechnikovKernel is a smooth kernel with bounded
106 | 	// support. As a result, the KDE will also have bounded
107 | 	// support. It is "optimal" in the sense that it minimizes the
108 | 	// asymptotic mean integrated squared error (AMISE).
109 | 	EpanechnikovKernel KDEKernel = iota
110 | 
111 | 	// A GaussianKernel is a Gaussian (normal) kernel.
112 | 	GaussianKernel
113 | 
114 | 	// A DeltaKernel is a Dirac delta function. The PDF of such a
115 | 	// KDE is not well-defined, but the CDF will represent each
116 | 	// sample as an instantaneous increase. This kernel ignores
117 | 	// bandwidth and never requires boundary correction.
118 | 	DeltaKernel
119 | )
120 | 
121 | // KDEBoundaryMethod represents a boundary correction method for
122 | // constructing a KDE with bounded support.
123 | type KDEBoundaryMethod int
124 | 
125 | //go:generate stringer -type=KDEBoundaryMethod
126 | 
127 | const (
128 | 	// BoundaryReflect reflects the density estimate at the
129 | 	// boundaries.  For example, for a KDE with support [0, inf),
130 | 	// this is equivalent to ƒ̂ᵣ(x)=ƒ̂(x)+ƒ̂(-x) for x>=0.  This is a
131 | 	// simple and fast technique, but enforces that ƒ̂ᵣ'(0)=0, so
132 | 	// it may not be applicable to all distributions.
133 | 	BoundaryReflect KDEBoundaryMethod = iota
134 | )
135 | 
136 | type kdeKernel interface {
137 | 	pdfEach(xs []float64) []float64
138 | 	cdfEach(xs []float64) []float64
139 | }
140 | 
141 | func (k *KDE) prepare() (kdeKernel, bool) {
142 | 	// Compute bandwidth.
143 | 	if k.Bandwidth == 0 {
144 | 		k.Bandwidth = BandwidthScott(k.Sample)
145 | 	}
146 | 
147 | 	// Construct kernel.
148 | 	kernel := kdeKernel(nil)
149 | 	switch k.Kernel {
150 | 	default:
151 | 		panic(fmt.Sprint("unknown kernel", k))
152 | 	case EpanechnikovKernel:
153 | 		kernel = epanechnikovKernel{k.Bandwidth}
154 | 	case GaussianKernel:
155 | 		kernel = NormalDist{0, k.Bandwidth}
156 | 	case DeltaKernel:
157 | 		kernel = DeltaDist{0}
158 | 	}
159 | 
160 | 	// Use boundary correction?
161 | 	bc := k.BoundaryMin != 0 || k.BoundaryMax != 0
162 | 
163 | 	return kernel, bc
164 | }
165 | 
166 | // TODO: For KDEs of histograms, make histograms able to create a
167 | // weighted Sample and simply require the caller to provide a
168 | // good bandwidth from a StreamStats.
169 | 
170 | // normalizedXs returns x - kde.Sample.Xs. Evaluating kernels shifted
171 | // by kde.Sample.Xs all at x is equivalent to evaluating one unshifted
172 | // kernel at x - kde.Sample.Xs.
173 | func (kde *KDE) normalizedXs(x float64) []float64 {
174 | 	txs := make([]float64, len(kde.Sample.Xs))
175 | 	for i, xi := range kde.Sample.Xs {
176 | 		txs[i] = x - xi
177 | 	}
178 | 	return txs
179 | }
180 | 
181 | func (kde *KDE) PDF(x float64) float64 {
182 | 	kernel, bc := kde.prepare()
183 | 
184 | 	// Apply boundary
185 | 	if bc && (x < kde.BoundaryMin || x >= kde.BoundaryMax) {
186 | 		return 0
187 | 	}
188 | 
189 | 	y := func(x float64) float64 {
190 | 		// Shift kernel to each of kde.xs and evaluate at x
191 | 		ys := kernel.pdfEach(kde.normalizedXs(x))
192 | 
193 | 		// Kernel samples are weighted according to the weights of xs
194 | 		wys := Sample{Xs: ys, Weights: kde.Sample.Weights}
195 | 
196 | 		return wys.Sum() / wys.Weight()
197 | 	}
198 | 	if !bc {
199 | 		return y(x)
200 | 	}
201 | 	switch kde.BoundaryMethod {
202 | 	default:
203 | 		panic("unknown boundary correction method")
204 | 	case BoundaryReflect:
205 | 		if math.IsInf(kde.BoundaryMax, 1) {
206 | 			return y(x) + y(2*kde.BoundaryMin-x)
207 | 		} else if math.IsInf(kde.BoundaryMin, -1) {
208 | 			return y(x) + y(2*kde.BoundaryMax-x)
209 | 		} else {
210 | 			d := 2 * (kde.BoundaryMax - kde.BoundaryMin)
211 | 			w := 2 * (x - kde.BoundaryMin)
212 | 			return series(func(n float64) float64 {
213 | 				// Points >= x
214 | 				return y(x+n*d) + y(x+n*d-w)
215 | 			}) + series(func(n float64) float64 {
216 | 				// Points < x
217 | 				return y(x-(n+1)*d+w) + y(x-(n+1)*d)
218 | 			})
219 | 		}
220 | 	}
221 | }
222 | 
223 | func (kde *KDE) CDF(x float64) float64 {
224 | 	kernel, bc := kde.prepare()
225 | 
226 | 	// Apply boundary
227 | 	if bc {
228 | 		if x < kde.BoundaryMin {
229 | 			return 0
230 | 		} else if x >= kde.BoundaryMax {
231 | 			return 1
232 | 		}
233 | 	}
234 | 
235 | 	y := func(x float64) float64 {
236 | 		// Shift kernel integral to each of cdf.xs and evaluate at x
237 | 		ys := kernel.cdfEach(kde.normalizedXs(x))
238 | 
239 | 		// Kernel samples are weighted according to the weights of xs
240 | 		wys := Sample{Xs: ys, Weights: kde.Sample.Weights}
241 | 
242 | 		return wys.Sum() / wys.Weight()
243 | 	}
244 | 	if !bc {
245 | 		return y(x)
246 | 	}
247 | 	switch kde.BoundaryMethod {
248 | 	default:
249 | 		panic("unknown boundary correction method")
250 | 	case BoundaryReflect:
251 | 		if math.IsInf(kde.BoundaryMax, 1) {
252 | 			return y(x) - y(2*kde.BoundaryMin-x)
253 | 		} else if math.IsInf(kde.BoundaryMin, -1) {
254 | 			return y(x) + (1 - y(2*kde.BoundaryMax-x))
255 | 		} else {
256 | 			d := 2 * (kde.BoundaryMax - kde.BoundaryMin)
257 | 			w := 2 * (x - kde.BoundaryMin)
258 | 			return series(func(n float64) float64 {
259 | 				// Windows >= x-w
260 | 				return y(x+n*d) - y(x+n*d-w)
261 | 			}) + series(func(n float64) float64 {
262 | 				// Windows < x-w
263 | 				return y(x-(n+1)*d) - y(x-(n+1)*d-w)
264 | 			})
265 | 		}
266 | 	}
267 | }
268 | 
269 | func (kde *KDE) Bounds() (low float64, high float64) {
270 | 	_, bc := kde.prepare()
271 | 
272 | 	// TODO(austin) If this KDE came from a histogram, we'd better
273 | 	// not sample at a significantly higher rate than the
274 | 	// histogram.  Maybe we want to just return the bounds of the
275 | 	// histogram?
276 | 
277 | 	// TODO(austin) It would be nice if this could be instructed
278 | 	// to include all original data points, even if they are in
279 | 	// the tail.  Probably that should just be up to the caller to
280 | 	// pass an axis derived from the bounds of the original data.
281 | 
282 | 	// Use the lowest and highest samples as starting points
283 | 	lowX, highX := kde.Sample.Bounds()
284 | 	if lowX == highX {
285 | 		lowX -= 1
286 | 		highX += 1
287 | 	}
288 | 
289 | 	// Find the end points that contain 99% of the CDF's weight.
290 | 	// Since bisect requires that the root be bracketed, start by
291 | 	// expanding our range if necessary.  TODO(austin) This can
292 | 	// definitely be done faster.
293 | 	const (
294 | 		lowY      = 0.005
295 | 		highY     = 0.995
296 | 		tolerance = 0.001
297 | 	)
298 | 	for kde.CDF(lowX) > lowY {
299 | 		lowX -= highX - lowX
300 | 	}
301 | 	for kde.CDF(highX) < highY {
302 | 		highX += highX - lowX
303 | 	}
304 | 	// Explicitly accept discontinuities, since we may be using a
305 | 	// discontiguous kernel.
306 | 	low, _ = bisect(func(x float64) float64 { return kde.CDF(x) - lowY }, lowX, highX, tolerance)
307 | 	high, _ = bisect(func(x float64) float64 { return kde.CDF(x) - highY }, lowX, highX, tolerance)
308 | 
309 | 	// Expand width by 20% to give some margins
310 | 	width := high - low
311 | 	low, high = low-0.1*width, high+0.1*width
312 | 
313 | 	// Limit to bounds
314 | 	if bc {
315 | 		low = math.Max(low, kde.BoundaryMin)
316 | 		high = math.Min(high, kde.BoundaryMax)
317 | 	}
318 | 
319 | 	return
320 | }
321 | 
322 | type epanechnikovKernel struct {
323 | 	h float64
324 | }
325 | 
326 | func (d epanechnikovKernel) pdfEach(xs []float64) []float64 {
327 | 	ys := make([]float64, len(xs))
328 | 	a := 0.75 / d.h
329 | 	invhh := 1 / (d.h * d.h)
330 | 	for i, x := range xs {
331 | 		if -d.h < x && x < d.h {
332 | 			ys[i] = a * (1 - x*x*invhh)
333 | 		}
334 | 	}
335 | 	return ys
336 | }
337 | 
338 | func (d epanechnikovKernel) cdfEach(xs []float64) []float64 {
339 | 	ys := make([]float64, len(xs))
340 | 	invh := 1 / d.h
341 | 	for i, x := range xs {
342 | 		if x > d.h {
343 | 			ys[i] = 1
344 | 		} else if x > -d.h {
345 | 			u := x * invh
346 | 			ys[i] = 0.25 * (2 + 3*u - u*u*u)
347 | 		}
348 | 	}
349 | 	return ys
350 | }
351 | 


--------------------------------------------------------------------------------
/internal/go-moremath/stats/udist.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2015 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | package stats
  6 | 
  7 | import (
  8 | 	"math"
  9 | 
 10 | 	"rsc.io/benchstat/internal/go-moremath/mathx"
 11 | )
 12 | 
 13 | // A UDist is the discrete probability distribution of the
 14 | // Mann-Whitney U statistic for a pair of samples of sizes N1 and N2.
 15 | //
 16 | // The details of computing this distribution with no ties can be
 17 | // found in Mann, Henry B.; Whitney, Donald R. (1947). "On a Test of
 18 | // Whether one of Two Random Variables is Stochastically Larger than
 19 | // the Other". Annals of Mathematical Statistics 18 (1): 50–60.
 20 | // Computing this distribution in the presence of ties is described in
 21 | // Klotz, J. H. (1966). "The Wilcoxon, Ties, and the Computer".
 22 | // Journal of the American Statistical Association 61 (315): 772-787
 23 | // and Cheung, Ying Kuen; Klotz, Jerome H. (1997). "The Mann Whitney
 24 | // Wilcoxon Distribution Using Linked Lists". Statistica Sinica 7:
 25 | // 805-813 (the former paper contains details that are glossed over in
 26 | // the latter paper but has mathematical typesetting issues, so it's
 27 | // easiest to get the context from the former paper and the details
 28 | // from the latter).
 29 | type UDist struct {
 30 | 	N1, N2 int
 31 | 
 32 | 	// T is the count of the number of ties at each rank in the
 33 | 	// input distributions. T may be nil, in which case it is
 34 | 	// assumed there are no ties (which is equivalent to an M+N
 35 | 	// slice of 1s). It must be the case that Sum(T) == M+N.
 36 | 	T []int
 37 | }
 38 | 
 39 | // hasTies returns true if d has any tied samples.
 40 | func (d UDist) hasTies() bool {
 41 | 	for _, t := range d.T {
 42 | 		if t > 1 {
 43 | 			return true
 44 | 		}
 45 | 	}
 46 | 	return false
 47 | }
 48 | 
 49 | // p returns the p_{d.N1,d.N2} function defined by Mann, Whitney 1947
 50 | // for values of U from 0 up to and including the U argument.
 51 | //
 52 | // This algorithm runs in Θ(N1*N2*U) = O(N1²N2²) time and is quite
 53 | // fast for small values of N1 and N2. However, it does not handle ties.
 54 | func (d UDist) p(U int) []float64 {
 55 | 	// This is a dynamic programming implementation of the
 56 | 	// recursive recurrence definition given by Mann and Whitney:
 57 | 	//
 58 | 	//   p_{n,m}(U) = (n * p_{n-1,m}(U-m) + m * p_{n,m-1}(U)) / (n+m)
 59 | 	//   p_{n,m}(U) = 0                           if U < 0
 60 | 	//   p_{0,m}(U) = p{n,0}(U) = 1 / nCr(m+n, n) if U = 0
 61 | 	//                          = 0               if U > 0
 62 | 	//
 63 | 	// (Note that there is a typo in the original paper. The first
 64 | 	// recursive application of p should be for U-m, not U-M.)
 65 | 	//
 66 | 	// Since p{n,m} only depends on p{n-1,m} and p{n,m-1}, we only
 67 | 	// need to store one "plane" of the three dimensional space at
 68 | 	// a time.
 69 | 	//
 70 | 	// Furthermore, p_{n,m} = p_{m,n}, so we only construct values
 71 | 	// for n <= m and obtain the rest through symmetry.
 72 | 	//
 73 | 	// We organize the computed values of p as followed:
 74 | 	//
 75 | 	//       n →   N
 76 | 	//     m *
 77 | 	//     ↓ * *
 78 | 	//       * * *
 79 | 	//       * * * *
 80 | 	//       * * * *
 81 | 	//     M * * * *
 82 | 	//
 83 | 	// where each * is a slice indexed by U. The code below
 84 | 	// computes these left-to-right, top-to-bottom, so it only
 85 | 	// stores one row of this matrix at a time. Furthermore,
 86 | 	// computing an element in a given U slice only depends on the
 87 | 	// same and smaller values of U, so we can overwrite the U
 88 | 	// slice we're computing in place as long as we start with the
 89 | 	// largest value of U. Finally, even though the recurrence
 90 | 	// depends on (n,m) above the diagonal and we use symmetry to
 91 | 	// mirror those across the diagonal to (m,n), the mirrored
 92 | 	// indexes are always available in the current row, so this
 93 | 	// mirroring does not interfere with our ability to recycle
 94 | 	// state.
 95 | 
 96 | 	N, M := d.N1, d.N2
 97 | 	if N > M {
 98 | 		N, M = M, N
 99 | 	}
100 | 
101 | 	memo := make([][]float64, N+1)
102 | 	for n := range memo {
103 | 		memo[n] = make([]float64, U+1)
104 | 	}
105 | 
106 | 	for m := 0; m <= M; m++ {
107 | 		// Compute p_{0,m}. This is zero except for U=0.
108 | 		memo[0][0] = 1
109 | 
110 | 		// Compute the remainder of this row.
111 | 		nlim := N
112 | 		if m < nlim {
113 | 			nlim = m
114 | 		}
115 | 		for n := 1; n <= nlim; n++ {
116 | 			lp := memo[n-1] // p_{n-1,m}
117 | 			var rp []float64
118 | 			if n <= m-1 {
119 | 				rp = memo[n] // p_{n,m-1}
120 | 			} else {
121 | 				rp = memo[m-1] // p{m-1,n} and m==n
122 | 			}
123 | 
124 | 			// For a given n,m, U is at most n*m.
125 | 			//
126 | 			// TODO: Actually, it's at most ⌈n*m/2⌉, but
127 | 			// then we need to use more complex symmetries
128 | 			// in the inner loop below.
129 | 			ulim := n * m
130 | 			if U < ulim {
131 | 				ulim = U
132 | 			}
133 | 
134 | 			out := memo[n] // p_{n,m}
135 | 			nplusm := float64(n + m)
136 | 			for U1 := ulim; U1 >= 0; U1-- {
137 | 				l := 0.0
138 | 				if U1-m >= 0 {
139 | 					l = float64(n) * lp[U1-m]
140 | 				}
141 | 				r := float64(m) * rp[U1]
142 | 				out[U1] = (l + r) / nplusm
143 | 			}
144 | 		}
145 | 	}
146 | 	return memo[N]
147 | }
148 | 
149 | type ukey struct {
150 | 	n1   int // size of first sample
151 | 	twoU int // 2*U statistic for this permutation
152 | }
153 | 
154 | // This computes the cumulative counts of the Mann-Whitney U
155 | // distribution in the presence of ties using the computation from
156 | // Cheung, Ying Kuen; Klotz, Jerome H. (1997). "The Mann Whitney
157 | // Wilcoxon Distribution Using Linked Lists". Statistica Sinica 7:
158 | // 805-813, with much guidance from appendix L of Klotz, A
159 | // Computational Approach to Statistics.
160 | //
161 | // makeUmemo constructs a table memo[K][ukey{n1, 2*U}], where K is the
162 | // number of ranks (up to len(t)), n1 is the size of the first sample
163 | // (up to the n1 argument), and U is the U statistic (up to the
164 | // argument twoU/2). The value of an entry in the memo table is the
165 | // number of permutations of a sample of size n1 in a ranking with tie
166 | // vector t[:K] having a U statistic <= U.
167 | func makeUmemo(twoU, n1 int, t []int) []map[ukey]float64 {
168 | 	// Another candidate for a fast implementation is van de Wiel,
169 | 	// "The split-up algorithm: a fast symbolic method for
170 | 	// computing p-values of distribution-free statistics". This
171 | 	// is what's used by R's coin package. It's a comparatively
172 | 	// recent publication, so it's presumably faster (or perhaps
173 | 	// just more general) than previous techniques, but I can't
174 | 	// get my hands on the paper.
175 | 	//
176 | 	// TODO: ~40% of this function's time is spent in mapassign on
177 | 	// the assignment lines in the two loops and another ~20% in
178 | 	// map access and iteration. Improving map behavior or
179 | 	// replacing the maps altogether with some other constant-time
180 | 	// structure could double performance.
181 | 	//
182 | 	// TODO: The worst case for this function is when there are
183 | 	// few ties. Yet the best case overall is when there are *no*
184 | 	// ties. Can we get the best of both worlds? Use the fast
185 | 	// algorithm for the most part when there are few ties and mix
186 | 	// in the general algorithm just where we need it? That's
187 | 	// certainly possible for sub-problems where t[:k] has no
188 | 	// ties, but that doesn't help if t[0] has a tie but nothing
189 | 	// else does. Is it possible to rearrange the ranks without
190 | 	// messing up our computation of the U statistic for
191 | 	// sub-problems?
192 | 
193 | 	K := len(t)
194 | 
195 | 	// Compute a coefficients. The a slice is indexed by k (a[0]
196 | 	// is unused).
197 | 	a := make([]int, K+1)
198 | 	a[1] = t[0]
199 | 	for k := 2; k <= K; k++ {
200 | 		a[k] = a[k-1] + t[k-2] + t[k-1]
201 | 	}
202 | 
203 | 	// Create the memo table for the counts function, A. The A
204 | 	// slice is indexed by k (A[0] is unused).
205 | 	//
206 | 	// In "The Mann Whitney Distribution Using Linked Lists", they
207 | 	// use linked lists (*gasp*) for this, but within each K it's
208 | 	// really just a memoization table, so it's faster to use a
209 | 	// map. The outer structure is a slice indexed by k because we
210 | 	// need to find all memo entries with certain values of k.
211 | 	//
212 | 	// TODO: The n1 and twoU values in the ukeys follow strict
213 | 	// patterns. For each K value, the n1 values are every integer
214 | 	// between two bounds. For each (K, n1) value, the twoU values
215 | 	// are every integer multiple of a certain base between two
216 | 	// bounds. It might be worth turning these into directly
217 | 	// indexible slices.
218 | 	A := make([]map[ukey]float64, K+1)
219 | 	A[K] = map[ukey]float64{ukey{n1: n1, twoU: twoU}: 0}
220 | 
221 | 	// Compute memo table (k, n1, twoU) triples from high K values
222 | 	// to low K values. This drives the recurrence relation
223 | 	// downward to figure out all of the needed argument triples.
224 | 	//
225 | 	// TODO: Is it possible to generate this table bottom-up? If
226 | 	// so, this could be a pure dynamic programming algorithm and
227 | 	// we could discard the K dimension. We could at least store
228 | 	// the inputs in a more compact representation that replaces
229 | 	// the twoU dimension with an interval and a step size (as
230 | 	// suggested by Cheung, Klotz, not that they make it at all
231 | 	// clear *why* they're suggesting this).
232 | 	tsum := sumint(t) // always ∑ t[0:k]
233 | 	for k := K - 1; k >= 2; k-- {
234 | 		tsum -= t[k]
235 | 		A[k] = make(map[ukey]float64)
236 | 
237 | 		// Construct A[k] from A[k+1].
238 | 		for A_kplus1 := range A[k+1] {
239 | 			rkLow := maxint(0, A_kplus1.n1-tsum)
240 | 			rkHigh := minint(A_kplus1.n1, t[k])
241 | 			for rk := rkLow; rk <= rkHigh; rk++ {
242 | 				twoU_k := A_kplus1.twoU - rk*(a[k+1]-2*A_kplus1.n1+rk)
243 | 				n1_k := A_kplus1.n1 - rk
244 | 				if twoUmin(n1_k, t[:k], a) <= twoU_k && twoU_k <= twoUmax(n1_k, t[:k], a) {
245 | 					key := ukey{n1: n1_k, twoU: twoU_k}
246 | 					A[k][key] = 0
247 | 				}
248 | 			}
249 | 		}
250 | 	}
251 | 
252 | 	// Fill counts in memo table from low K values to high K
253 | 	// values. This unwinds the recurrence relation.
254 | 
255 | 	// Start with K==2 base case.
256 | 	//
257 | 	// TODO: Later computations depend on these, but these don't
258 | 	// depend on anything (including each other), so if K==2, we
259 | 	// can skip the memo table altogether.
260 | 	if K < 2 {
261 | 		panic("K < 2")
262 | 	}
263 | 	N_2 := t[0] + t[1]
264 | 	for A_2i := range A[2] {
265 | 		Asum := 0.0
266 | 		r2Low := maxint(0, A_2i.n1-t[0])
267 | 		r2High := (A_2i.twoU - A_2i.n1*(t[0]-A_2i.n1)) / N_2
268 | 		for r2 := r2Low; r2 <= r2High; r2++ {
269 | 			Asum += mathx.Choose(t[0], A_2i.n1-r2) *
270 | 				mathx.Choose(t[1], r2)
271 | 		}
272 | 		A[2][A_2i] = Asum
273 | 	}
274 | 
275 | 	// Derive counts for the rest of the memo table.
276 | 	tsum = t[0] // always ∑ t[0:k-1]
277 | 	for k := 3; k <= K; k++ {
278 | 		tsum += t[k-2]
279 | 
280 | 		// Compute A[k] counts from A[k-1] counts.
281 | 		for A_ki := range A[k] {
282 | 			Asum := 0.0
283 | 			rkLow := maxint(0, A_ki.n1-tsum)
284 | 			rkHigh := minint(A_ki.n1, t[k-1])
285 | 			for rk := rkLow; rk <= rkHigh; rk++ {
286 | 				twoU_kminus1 := A_ki.twoU - rk*(a[k]-2*A_ki.n1+rk)
287 | 				n1_kminus1 := A_ki.n1 - rk
288 | 				x, ok := A[k-1][ukey{n1: n1_kminus1, twoU: twoU_kminus1}]
289 | 				if !ok && twoUmax(n1_kminus1, t[:k-1], a) < twoU_kminus1 {
290 | 					x = mathx.Choose(tsum, n1_kminus1)
291 | 				}
292 | 				Asum += x * mathx.Choose(t[k-1], rk)
293 | 			}
294 | 			A[k][A_ki] = Asum
295 | 		}
296 | 	}
297 | 
298 | 	return A
299 | }
300 | 
301 | func twoUmin(n1 int, t, a []int) int {
302 | 	K := len(t)
303 | 	twoU := -n1 * n1
304 | 	n1_k := n1
305 | 	for k := 1; k <= K; k++ {
306 | 		twoU_k := minint(n1_k, t[k-1])
307 | 		twoU += twoU_k * a[k]
308 | 		n1_k -= twoU_k
309 | 	}
310 | 	return twoU
311 | }
312 | 
313 | func twoUmax(n1 int, t, a []int) int {
314 | 	K := len(t)
315 | 	twoU := -n1 * n1
316 | 	n1_k := n1
317 | 	for k := K; k > 0; k-- {
318 | 		twoU_k := minint(n1_k, t[k-1])
319 | 		twoU += twoU_k * a[k]
320 | 		n1_k -= twoU_k
321 | 	}
322 | 	return twoU
323 | }
324 | 
325 | func (d UDist) PMF(U float64) float64 {
326 | 	if U < 0 || U >= 0.5+float64(d.N1*d.N2) {
327 | 		return 0
328 | 	}
329 | 
330 | 	if d.hasTies() {
331 | 		// makeUmemo computes the CDF directly. Take its
332 | 		// difference to get the PMF.
333 | 		p1, ok1 := makeUmemo(int(2*U)-1, d.N1, d.T)[len(d.T)][ukey{d.N1, int(2*U) - 1}]
334 | 		p2, ok2 := makeUmemo(int(2*U), d.N1, d.T)[len(d.T)][ukey{d.N1, int(2 * U)}]
335 | 		if !ok1 || !ok2 {
336 | 			panic("makeUmemo did not return expected memoization table")
337 | 		}
338 | 		return (p2 - p1) / mathx.Choose(d.N1+d.N2, d.N1)
339 | 	}
340 | 
341 | 	// There are no ties. Use the fast algorithm. U must be integral.
342 | 	Ui := int(math.Floor(U))
343 | 	// TODO: Use symmetry to minimize U
344 | 	return d.p(Ui)[Ui]
345 | }
346 | 
347 | func (d UDist) CDF(U float64) float64 {
348 | 	if U < 0 {
349 | 		return 0
350 | 	} else if U >= float64(d.N1*d.N2) {
351 | 		return 1
352 | 	}
353 | 
354 | 	if d.hasTies() {
355 | 		// TODO: Minimize U?
356 | 		p, ok := makeUmemo(int(2*U), d.N1, d.T)[len(d.T)][ukey{d.N1, int(2 * U)}]
357 | 		if !ok {
358 | 			panic("makeUmemo did not return expected memoization table")
359 | 		}
360 | 		return p / mathx.Choose(d.N1+d.N2, d.N1)
361 | 	}
362 | 
363 | 	// There are no ties. Use the fast algorithm. U must be integral.
364 | 	Ui := int(math.Floor(U))
365 | 	// The distribution is symmetric around U = m * n / 2. Sum up
366 | 	// whichever tail is smaller.
367 | 	flip := Ui >= (d.N1*d.N2+1)/2
368 | 	if flip {
369 | 		Ui = d.N1*d.N2 - Ui - 1
370 | 	}
371 | 	pdfs := d.p(Ui)
372 | 	p := 0.0
373 | 	for _, pdf := range pdfs[:Ui+1] {
374 | 		p += pdf
375 | 	}
376 | 	if flip {
377 | 		p = 1 - p
378 | 	}
379 | 	return p
380 | }
381 | 
382 | func (d UDist) Step() float64 {
383 | 	return 0.5
384 | }
385 | 
386 | func (d UDist) Bounds() (float64, float64) {
387 | 	// TODO: More precise bounds when there are ties.
388 | 	return 0, float64(d.N1 * d.N2)
389 | }
390 | 


--------------------------------------------------------------------------------
/main.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2015 The Go Authors.  All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | // Benchstat computes and compares statistics about benchmarks.
  6 | //
  7 | // This package has moved. Please use https://golang.org/x/perf/cmd/benchstat
  8 | package main
  9 | 
 10 | import (
 11 | 	"bytes"
 12 | 	"flag"
 13 | 	"fmt"
 14 | 	"html"
 15 | 	"io/ioutil"
 16 | 	"log"
 17 | 	"os"
 18 | 	"strconv"
 19 | 	"strings"
 20 | 	"unicode/utf8"
 21 | 
 22 | 	"rsc.io/benchstat/internal/go-moremath/stats"
 23 | )
 24 | 
 25 | func usage() {
 26 | 	fmt.Fprintf(os.Stderr, "usage: benchstat [options] old.txt [new.txt] [more.txt ...]\n")
 27 | 	fmt.Fprintf(os.Stderr, "options:\n")
 28 | 	flag.PrintDefaults()
 29 | 	os.Exit(2)
 30 | }
 31 | 
 32 | var (
 33 | 	flagDeltaTest = flag.String("delta-test", "utest", "significance `test` to apply to delta: utest, ttest, or none")
 34 | 	flagAlpha     = flag.Float64("alpha", 0.05, "consider change significant if p < `α`")
 35 | 	flagGeomean   = flag.Bool("geomean", false, "print the geometric mean of each file")
 36 | 	flagHTML      = flag.Bool("html", false, "print results as an HTML table")
 37 | )
 38 | 
 39 | var deltaTestNames = map[string]func(old, new *Benchstat) (float64, error){
 40 | 	"none":   notest,
 41 | 	"u":      utest,
 42 | 	"u-test": utest,
 43 | 	"utest":  utest,
 44 | 	"t":      ttest,
 45 | 	"t-test": ttest,
 46 | 	"ttest":  ttest,
 47 | }
 48 | 
 49 | type row struct {
 50 | 	cols []string
 51 | }
 52 | 
 53 | func newRow(cols ...string) *row {
 54 | 	return &row{cols: cols}
 55 | }
 56 | 
 57 | func (r *row) add(col string) {
 58 | 	r.cols = append(r.cols, col)
 59 | }
 60 | 
 61 | func (r *row) trim() {
 62 | 	for len(r.cols) > 0 && r.cols[len(r.cols)-1] == "" {
 63 | 		r.cols = r.cols[:len(r.cols)-1]
 64 | 	}
 65 | }
 66 | 
 67 | func main() {
 68 | 	log.SetPrefix("benchstat: ")
 69 | 	log.SetFlags(0)
 70 | 	flag.Usage = usage
 71 | 	flag.Parse()
 72 | 	deltaTest := deltaTestNames[strings.ToLower(*flagDeltaTest)]
 73 | 	if flag.NArg() < 1 || deltaTest == nil {
 74 | 		flag.Usage()
 75 | 	}
 76 | 
 77 | 	// Read in benchmark data.
 78 | 	c := readFiles(flag.Args())
 79 | 	for _, stat := range c.Stats {
 80 | 		stat.ComputeStats()
 81 | 	}
 82 | 
 83 | 	var tables [][]*row
 84 | 	switch len(c.Configs) {
 85 | 	case 2:
 86 | 		before, after := c.Configs[0], c.Configs[1]
 87 | 		key := BenchKey{}
 88 | 		for _, key.Unit = range c.Units {
 89 | 			var table []*row
 90 | 			metric := metricOf(key.Unit)
 91 | 			for _, key.Benchmark = range c.Benchmarks {
 92 | 				key.Config = before
 93 | 				old := c.Stats[key]
 94 | 				key.Config = after
 95 | 				new := c.Stats[key]
 96 | 				if old == nil || new == nil {
 97 | 					continue
 98 | 				}
 99 | 				if len(table) == 0 {
100 | 					table = append(table, newRow("name", "old "+metric, "new "+metric, "delta"))
101 | 				}
102 | 
103 | 				pval, testerr := deltaTest(old, new)
104 | 
105 | 				scaler := newScaler(old.Mean, old.Unit)
106 | 				row := newRow(key.Benchmark, old.Format(scaler), new.Format(scaler), "~   ")
107 | 				if testerr == stats.ErrZeroVariance {
108 | 					row.add("(zero variance)")
109 | 				} else if testerr == stats.ErrSampleSize {
110 | 					row.add("(too few samples)")
111 | 				} else if testerr == stats.ErrSamplesEqual {
112 | 					row.add("(all equal)")
113 | 				} else if testerr != nil {
114 | 					row.add(fmt.Sprintf("(%s)", testerr))
115 | 				} else if pval < *flagAlpha {
116 | 					row.cols[3] = fmt.Sprintf("%+.2f%%", ((new.Mean/old.Mean)-1.0)*100.0)
117 | 				}
118 | 				if len(row.cols) == 4 && pval != -1 {
119 | 					row.add(fmt.Sprintf("(p=%0.3f n=%d+%d)", pval, len(old.RValues), len(new.RValues)))
120 | 				}
121 | 				table = append(table, row)
122 | 			}
123 | 			if len(table) > 0 {
124 | 				table = addGeomean(table, c, key.Unit, true)
125 | 				tables = append(tables, table)
126 | 			}
127 | 		}
128 | 
129 | 	default:
130 | 		key := BenchKey{}
131 | 		for _, key.Unit = range c.Units {
132 | 			var table []*row
133 | 			metric := metricOf(key.Unit)
134 | 
135 | 			if len(c.Configs) > 1 {
136 | 				hdr := newRow("name \\ " + metric)
137 | 				for _, config := range c.Configs {
138 | 					hdr.add(config)
139 | 				}
140 | 				table = append(table, hdr)
141 | 			} else {
142 | 				table = append(table, newRow("name", metric))
143 | 			}
144 | 
145 | 			for _, key.Benchmark = range c.Benchmarks {
146 | 				row := newRow(key.Benchmark)
147 | 				var scaler func(float64) string
148 | 				for _, key.Config = range c.Configs {
149 | 					stat := c.Stats[key]
150 | 					if stat == nil {
151 | 						row.add("")
152 | 						continue
153 | 					}
154 | 					if scaler == nil {
155 | 						scaler = newScaler(stat.Mean, stat.Unit)
156 | 					}
157 | 					row.add(stat.Format(scaler))
158 | 				}
159 | 				row.trim()
160 | 				if len(row.cols) > 1 {
161 | 					table = append(table, row)
162 | 				}
163 | 			}
164 | 			table = addGeomean(table, c, key.Unit, false)
165 | 			tables = append(tables, table)
166 | 		}
167 | 	}
168 | 
169 | 	numColumn := 0
170 | 	for _, table := range tables {
171 | 		for _, row := range table {
172 | 			if numColumn < len(row.cols) {
173 | 				numColumn = len(row.cols)
174 | 			}
175 | 		}
176 | 	}
177 | 
178 | 	max := make([]int, numColumn)
179 | 	for _, table := range tables {
180 | 		for _, row := range table {
181 | 			for i, s := range row.cols {
182 | 				n := utf8.RuneCountInString(s)
183 | 				if max[i] < n {
184 | 					max[i] = n
185 | 				}
186 | 			}
187 | 		}
188 | 	}
189 | 
190 | 	var buf bytes.Buffer
191 | 	for i, table := range tables {
192 | 		if i > 0 {
193 | 			fmt.Fprintf(&buf, "\n")
194 | 		}
195 | 
196 | 		if *flagHTML {
197 | 			fmt.Fprintf(&buf, "<style>.benchstat tbody td:nth-child(1n+2) { text-align: right; padding: 0em 1em; }</style>\n")
198 | 			fmt.Fprintf(&buf, "<table class='benchstat'>\n")
199 | 			printRow := func(row *row, tag string) {
200 | 				fmt.Fprintf(&buf, "<tr>")
201 | 				for _, cell := range row.cols {
202 | 					fmt.Fprintf(&buf, "<%s>%s</%s>", tag, html.EscapeString(cell), tag)
203 | 				}
204 | 				fmt.Fprintf(&buf, "\n")
205 | 			}
206 | 			printRow(table[0], "th")
207 | 			for _, row := range table[1:] {
208 | 				printRow(row, "td")
209 | 			}
210 | 			fmt.Fprintf(&buf, "</table>\n")
211 | 			continue
212 | 		}
213 | 
214 | 		// headings
215 | 		row := table[0]
216 | 		for i, s := range row.cols {
217 | 			switch i {
218 | 			case 0:
219 | 				fmt.Fprintf(&buf, "%-*s", max[i], s)
220 | 			default:
221 | 				fmt.Fprintf(&buf, "  %-*s", max[i], s)
222 | 			case len(row.cols) - 1:
223 | 				fmt.Fprintf(&buf, "  %s\n", s)
224 | 			}
225 | 		}
226 | 
227 | 		// data
228 | 		for _, row := range table[1:] {
229 | 			for i, s := range row.cols {
230 | 				switch i {
231 | 				case 0:
232 | 					fmt.Fprintf(&buf, "%-*s", max[i], s)
233 | 				default:
234 | 					if i == len(row.cols)-1 && len(s) > 0 && s[0] == '(' {
235 | 						// Left-align p value.
236 | 						fmt.Fprintf(&buf, "  %s", s)
237 | 						break
238 | 					}
239 | 					fmt.Fprintf(&buf, "  %*s", max[i], s)
240 | 				}
241 | 			}
242 | 			fmt.Fprintf(&buf, "\n")
243 | 		}
244 | 	}
245 | 
246 | 	os.Stdout.Write(buf.Bytes())
247 | }
248 | 
249 | func addGeomean(table []*row, c *Collection, unit string, delta bool) []*row {
250 | 	if !*flagGeomean {
251 | 		return table
252 | 	}
253 | 
254 | 	row := newRow("[Geo mean]")
255 | 	key := BenchKey{Unit: unit}
256 | 	geomeans := []float64{}
257 | 	for _, key.Config = range c.Configs {
258 | 		var means []float64
259 | 		for _, key.Benchmark = range c.Benchmarks {
260 | 			stat := c.Stats[key]
261 | 			if stat != nil {
262 | 				means = append(means, stat.Mean)
263 | 			}
264 | 		}
265 | 		if len(means) == 0 {
266 | 			row.add("")
267 | 			delta = false
268 | 		} else {
269 | 			geomean := stats.GeoMean(means)
270 | 			geomeans = append(geomeans, geomean)
271 | 			row.add(newScaler(geomean, unit)(geomean) + "     ")
272 | 		}
273 | 	}
274 | 	if delta {
275 | 		row.add(fmt.Sprintf("%+.2f%%", ((geomeans[1]/geomeans[0])-1.0)*100.0))
276 | 	}
277 | 	return append(table, row)
278 | }
279 | 
280 | func timeScaler(ns float64) func(float64) string {
281 | 	var format string
282 | 	var scale float64
283 | 	switch x := ns / 1e9; {
284 | 	case x >= 99.5:
285 | 		format, scale = "%.0fs", 1
286 | 	case x >= 9.95:
287 | 		format, scale = "%.1fs", 1
288 | 	case x >= 0.995:
289 | 		format, scale = "%.2fs", 1
290 | 	case x >= 0.0995:
291 | 		format, scale = "%.0fms", 1000
292 | 	case x >= 0.00995:
293 | 		format, scale = "%.1fms", 1000
294 | 	case x >= 0.000995:
295 | 		format, scale = "%.2fms", 1000
296 | 	case x >= 0.0000995:
297 | 		format, scale = "%.0fµs", 1000*1000
298 | 	case x >= 0.00000995:
299 | 		format, scale = "%.1fµs", 1000*1000
300 | 	case x >= 0.000000995:
301 | 		format, scale = "%.2fµs", 1000*1000
302 | 	case x >= 0.0000000995:
303 | 		format, scale = "%.0fns", 1000*1000*1000
304 | 	case x >= 0.00000000995:
305 | 		format, scale = "%.1fns", 1000*1000*1000
306 | 	default:
307 | 		format, scale = "%.2fns", 1000*1000*1000
308 | 	}
309 | 	return func(ns float64) string {
310 | 		return fmt.Sprintf(format, ns/1e9*scale)
311 | 	}
312 | }
313 | 
314 | func newScaler(val float64, unit string) func(float64) string {
315 | 	if unit == "ns/op" {
316 | 		return timeScaler(val)
317 | 	}
318 | 
319 | 	var format string
320 | 	var scale float64
321 | 	var suffix string
322 | 
323 | 	prescale := 1.0
324 | 	if unit == "MB/s" {
325 | 		prescale = 1e6
326 | 	}
327 | 
328 | 	switch x := val * prescale; {
329 | 	case x >= 99500000000000:
330 | 		format, scale, suffix = "%.0f", 1e12, "T"
331 | 	case x >= 9950000000000:
332 | 		format, scale, suffix = "%.1f", 1e12, "T"
333 | 	case x >= 995000000000:
334 | 		format, scale, suffix = "%.2f", 1e12, "T"
335 | 	case x >= 99500000000:
336 | 		format, scale, suffix = "%.0f", 1e9, "G"
337 | 	case x >= 9950000000:
338 | 		format, scale, suffix = "%.1f", 1e9, "G"
339 | 	case x >= 995000000:
340 | 		format, scale, suffix = "%.2f", 1e9, "G"
341 | 	case x >= 99500000:
342 | 		format, scale, suffix = "%.0f", 1e6, "M"
343 | 	case x >= 9950000:
344 | 		format, scale, suffix = "%.1f", 1e6, "M"
345 | 	case x >= 995000:
346 | 		format, scale, suffix = "%.2f", 1e6, "M"
347 | 	case x >= 99500:
348 | 		format, scale, suffix = "%.0f", 1e3, "k"
349 | 	case x >= 9950:
350 | 		format, scale, suffix = "%.1f", 1e3, "k"
351 | 	case x >= 995:
352 | 		format, scale, suffix = "%.2f", 1e3, "k"
353 | 	case x >= 99.5:
354 | 		format, scale, suffix = "%.0f", 1, ""
355 | 	case x >= 9.95:
356 | 		format, scale, suffix = "%.1f", 1, ""
357 | 	default:
358 | 		format, scale, suffix = "%.2f", 1, ""
359 | 	}
360 | 
361 | 	if unit == "B/op" {
362 | 		suffix += "B"
363 | 	}
364 | 	if unit == "MB/s" {
365 | 		suffix += "B/s"
366 | 	}
367 | 	scale /= prescale
368 | 
369 | 	return func(val float64) string {
370 | 		return fmt.Sprintf(format+suffix, val/scale)
371 | 	}
372 | }
373 | 
374 | func (b *Benchstat) Format(scaler func(float64) string) string {
375 | 	diff := 1 - b.Min/b.Mean
376 | 	if d := b.Max/b.Mean - 1; d > diff {
377 | 		diff = d
378 | 	}
379 | 	s := scaler(b.Mean)
380 | 	if b.Mean == 0 {
381 | 		s += "     "
382 | 	} else {
383 | 		s = fmt.Sprintf("%s ±%3s", s, fmt.Sprintf("%.0f%%", diff*100.0))
384 | 	}
385 | 	return s
386 | }
387 | 
388 | // ComputeStats updates the derived statistics in s from the raw
389 | // samples in s.Values.
390 | func (stat *Benchstat) ComputeStats() {
391 | 	// Discard outliers.
392 | 	values := stats.Sample{Xs: stat.Values}
393 | 	q1, q3 := values.Percentile(0.25), values.Percentile(0.75)
394 | 	lo, hi := q1-1.5*(q3-q1), q3+1.5*(q3-q1)
395 | 	for _, value := range stat.Values {
396 | 		if lo <= value && value <= hi {
397 | 			stat.RValues = append(stat.RValues, value)
398 | 		}
399 | 	}
400 | 
401 | 	// Compute statistics of remaining data.
402 | 	stat.Min, stat.Max = stats.Bounds(stat.RValues)
403 | 	stat.Mean = stats.Mean(stat.RValues)
404 | }
405 | 
406 | // A Benchstat is the metrics along one axis (e.g., ns/op or MB/s)
407 | // for all runs of a specific benchmark.
408 | type Benchstat struct {
409 | 	Unit    string
410 | 	Values  []float64 // metrics
411 | 	RValues []float64 // metrics with outliers removed
412 | 	Min     float64   // min of RValues
413 | 	Mean    float64   // mean of RValues
414 | 	Max     float64   // max of RValues
415 | }
416 | 
417 | // A BenchKey identifies one metric (e.g., "ns/op", "B/op") from one
418 | // benchmark (function name sans "Benchmark" prefix) in one
419 | // configuration (input file name).
420 | type BenchKey struct {
421 | 	Config, Benchmark, Unit string
422 | }
423 | 
424 | type Collection struct {
425 | 	Stats map[BenchKey]*Benchstat
426 | 
427 | 	// Configs, Benchmarks, and Units give the set of configs,
428 | 	// benchmarks, and units from the keys in Stats in an order
429 | 	// meant to match the order the benchmarks were read in.
430 | 	Configs, Benchmarks, Units []string
431 | }
432 | 
433 | func (c *Collection) AddStat(key BenchKey) *Benchstat {
434 | 	if stat, ok := c.Stats[key]; ok {
435 | 		return stat
436 | 	}
437 | 
438 | 	addString := func(strings *[]string, add string) {
439 | 		for _, s := range *strings {
440 | 			if s == add {
441 | 				return
442 | 			}
443 | 		}
444 | 		*strings = append(*strings, add)
445 | 	}
446 | 	addString(&c.Configs, key.Config)
447 | 	addString(&c.Benchmarks, key.Benchmark)
448 | 	addString(&c.Units, key.Unit)
449 | 	stat := &Benchstat{Unit: key.Unit}
450 | 	c.Stats[key] = stat
451 | 	return stat
452 | }
453 | 
454 | // readFiles reads a set of benchmark files.
455 | func readFiles(files []string) *Collection {
456 | 	c := Collection{Stats: make(map[BenchKey]*Benchstat)}
457 | 	for _, file := range files {
458 | 		readFile(file, &c)
459 | 	}
460 | 	return &c
461 | }
462 | 
463 | // readFile reads a set of benchmarks from a file in to a Collection.
464 | func readFile(file string, c *Collection) {
465 | 	c.Configs = append(c.Configs, file)
466 | 	key := BenchKey{Config: file}
467 | 
468 | 	text, err := ioutil.ReadFile(file)
469 | 	if err != nil {
470 | 		log.Fatal(err)
471 | 	}
472 | 	for _, line := range strings.Split(string(text), "\n") {
473 | 		f := strings.Fields(line)
474 | 		if len(f) < 4 {
475 | 			continue
476 | 		}
477 | 		name := f[0]
478 | 		if !strings.HasPrefix(name, "Benchmark") {
479 | 			continue
480 | 		}
481 | 		name = strings.TrimPrefix(name, "Benchmark")
482 | 		n, _ := strconv.Atoi(f[1])
483 | 		if n == 0 {
484 | 			continue
485 | 		}
486 | 
487 | 		key.Benchmark = name
488 | 		for i := 2; i+2 <= len(f); i += 2 {
489 | 			val, err := strconv.ParseFloat(f[i], 64)
490 | 			if err != nil {
491 | 				continue
492 | 			}
493 | 			key.Unit = f[i+1]
494 | 			stat := c.AddStat(key)
495 | 			stat.Values = append(stat.Values, val)
496 | 		}
497 | 	}
498 | }
499 | 
500 | func metricOf(unit string) string {
501 | 	switch unit {
502 | 	case "ns/op":
503 | 		return "time/op"
504 | 	case "B/op":
505 | 		return "alloc/op"
506 | 	case "MB/s":
507 | 		return "speed"
508 | 	default:
509 | 		return unit
510 | 	}
511 | }
512 | 
513 | // Significance tests.
514 | 
515 | func notest(old, new *Benchstat) (pval float64, err error) {
516 | 	return -1, nil
517 | }
518 | 
519 | func ttest(old, new *Benchstat) (pval float64, err error) {
520 | 	t, err := stats.TwoSampleWelchTTest(stats.Sample{Xs: old.RValues}, stats.Sample{Xs: new.RValues}, stats.LocationDiffers)
521 | 	if err != nil {
522 | 		return -1, err
523 | 	}
524 | 	return t.P, nil
525 | }
526 | 
527 | func utest(old, new *Benchstat) (pval float64, err error) {
528 | 	u, err := stats.MannWhitneyUTest(old.RValues, new.RValues, stats.LocationDiffers)
529 | 	if err != nil {
530 | 		return -1, err
531 | 	}
532 | 	return u.P, nil
533 | }
534 | 


--------------------------------------------------------------------------------