├── README
├── change.go
├── change_test.go
└── example
    └── main.go


/README:
--------------------------------------------------------------------------------
1 | go-change: An online change detection algorithm.
2 | 
3 | For package documentation, please see http://godoc.org/github.com/dgryski/go-change
4 | 


--------------------------------------------------------------------------------
/change.go:
--------------------------------------------------------------------------------
  1 | // Package change implements an online change detection algorithm
  2 | /*
  3 | https://sites.cs.ucsb.edu/~yfwang/papers/as06.pdf
  4 | 
  5 | How effective is this algorithm?  The authors state
  6 | "[T]he estimation algorithm can be shown theoretically to give the correct
  7 | prediction and approach an unbiased estimator of the true changeover point if
  8 | the window length approaches infinity and the two distributions are
  9 | sufficiently dissimilar."
 10 | 
 11 | The algorithm works by examining the distributions on either side of a
 12 | suspected change point, and finding the index in the window where the two
 13 | distributions are most dissimilar.  As there is not guaranteed to be a change
 14 | point in the window, this implementation also performs a Student's t-test on
 15 | the two distributions to reduce the rate of false positives.
 16 | 
 17 | */
 18 | package change
 19 | 
 20 | import (
 21 | 	"math"
 22 | 
 23 | 	"github.com/dgryski/go-onlinestats"
 24 | )
 25 | 
 26 | // Stats are some descriptive statistics for a block of items.  It implements the interface needed by the t-test method of onlinestats.
 27 | type Stats struct {
 28 | 	mean     float64
 29 | 	variance float64
 30 | 	n        int
 31 | }
 32 | 
 33 | // Mean returns the mean of the data set
 34 | func (s Stats) Mean() float64 { return s.mean }
 35 | 
 36 | // Var returns the variance of the data set
 37 | func (s Stats) Var() float64 { return s.variance }
 38 | 
 39 | // Len returns the number of items in the data set
 40 | func (s Stats) Len() int { return s.n }
 41 | 
 42 | // Stddev returns the standard deviation of the sample
 43 | func (s Stats) Stddev() float64 { return math.Sqrt(s.variance) }
 44 | 
 45 | // ChangePoint is a potential change point found by Check().
 46 | type ChangePoint struct {
 47 | 	// Index is the offset into the data set of the suspected change point
 48 | 	Index int
 49 | 
 50 | 	// Difference is the difference in distribution means found by the Student's t-test
 51 | 	Difference float64
 52 | 
 53 | 	// Confidence is the confidence returned by a Student's t-test
 54 | 	Confidence float64
 55 | 
 56 | 	// Before is the statistics of the distribution before the change point
 57 | 	Before Stats
 58 | 
 59 | 	// After is the statistics of the distribution after the change point
 60 | 	After Stats
 61 | }
 62 | 
 63 | // DefaultMinSampleSize is the minimum sample size to consider from the window being checked
 64 | const DefaultMinSampleSize = 30
 65 | 
 66 | // Detector is a change detector.
 67 | type Detector struct {
 68 | 	MinSampleSize int
 69 | 	MinConfidence float64
 70 | }
 71 | 
 72 | // Check returns the index of a potential change point
 73 | func (d *Detector) Check(window []float64) *ChangePoint {
 74 | 
 75 | 	n := len(window)
 76 | 
 77 | 	// The paper provides recursive formulas for computing the means and
 78 | 	// standard deviations as we slide along the window.  This
 79 | 	// implementation uses alternate math based on cumulative sums.
 80 | 
 81 | 	// cumsum contains the cumulative sum of all elements <= i
 82 | 	// cumsumsq contains the cumulative sum of squares of all elements <= i
 83 | 	// TODO(dgryski): move this to a move numerically stable algorithm
 84 | 	cumsum := make([]float64, n)
 85 | 	cumsumsq := make([]float64, n)
 86 | 
 87 | 	var sum, sumsq float64
 88 | 	for i, v := range window {
 89 | 		sum += v
 90 | 		sumsq += v * v
 91 | 		cumsum[i] = sum
 92 | 		cumsumsq[i] = sumsq
 93 | 	}
 94 | 
 95 | 	// sb is our between-class scatter, the degree of dissimilarity of the
 96 | 	// two distributions.  This value is always positive, so we can set 0
 97 | 	// as the minimum and know that any valid value will be larger
 98 | 	var maxsb float64
 99 | 	var maxsbIdx int
100 | 
101 | 	// The paper also provides a metric sw, for 'within-class scatter',
102 | 	// which depends on the standard-deviation of the samples. It suggests
103 | 	// finding the point that minimizes the ratio sw/sb.  However, it then
104 | 	// proves that this is equivalent to maximizing sb.  The calculation of
105 | 	// sb depends only on the means of the two samples, and not of the
106 | 	// variances.  However, we calculate the variances so that we can pass
107 | 	// them to the T test later on.
108 | 
109 | 	var before, after Stats
110 | 
111 | 	// sane default
112 | 	minSampleSize := d.MinSampleSize
113 | 	if minSampleSize == 0 {
114 | 		minSampleSize = DefaultMinSampleSize
115 | 	}
116 | 
117 | 	for l := minSampleSize; l < (n - minSampleSize + 1); l++ {
118 | 		lidx := l - 1
119 | 		n1 := float64(l)
120 | 		mean1 := cumsum[lidx] / n1
121 | 
122 | 		n2 := float64(n - l)
123 | 		sum2 := (sum - cumsum[lidx])
124 | 		mean2 := sum2 / n2
125 | 
126 | 		sb := ((n1 * n2) / (n1 + n2)) * (mean1 - mean2) * (mean1 - mean2)
127 | 		if maxsb < sb {
128 | 			maxsb = sb
129 | 			maxsbIdx = l
130 | 
131 | 			// The variances are calculated only if needed to
132 | 			// reduce the math in the main loop
133 | 			var1 := (cumsumsq[lidx] - (cumsum[lidx]*cumsum[lidx])/(n1)) / (n1 - 1)
134 | 			var2 := ((sumsq - cumsumsq[lidx]) - (sum2*sum2)/(n2)) / (n2 - 1)
135 | 
136 | 			before.mean, before.variance, before.n = mean1, var1, l
137 | 			after.mean, after.variance, after.n = mean2, var2, n-l
138 | 		}
139 | 	}
140 | 
141 | 	var conf float64
142 | 	if before.n > 0 {
143 | 		// we found a difference
144 | 		conf = onlinestats.Welch(before, after)
145 | 	}
146 | 
147 | 	// not above our threshold
148 | 	if conf <= d.MinConfidence {
149 | 		return nil
150 | 	}
151 | 
152 | 	cp := &ChangePoint{
153 | 		Index:      maxsbIdx,
154 | 		Difference: after.Mean() - before.Mean(),
155 | 		Confidence: conf,
156 | 		Before:     before,
157 | 		After:      after,
158 | 	}
159 | 
160 | 	return cp
161 | }
162 | 
163 | // Stream monitors a stream of floats for changes
164 | type Stream struct {
165 | 	windowSize int
166 | 	blockSize  int
167 | 
168 | 	data []float64
169 | 
170 | 	items int
171 | 
172 | 	buffer []float64
173 | 	bufidx int
174 | 
175 | 	detector *Detector
176 | }
177 | 
178 | // NewStream constructs a new stream detector
179 | func NewStream(windowSize int, minSample int, blockSize int, confidence float64) *Stream {
180 | 	return &Stream{
181 | 		windowSize: windowSize,
182 | 		blockSize:  blockSize,
183 | 		data:       make([]float64, windowSize),
184 | 		buffer:     make([]float64, blockSize),
185 | 
186 | 		detector: &Detector{
187 | 			MinSampleSize: minSample,
188 | 			MinConfidence: confidence,
189 | 		},
190 | 	}
191 | }
192 | 
193 | // Push adds a float to the stream and calls the change detector
194 | func (s *Stream) Push(item float64) *ChangePoint {
195 | 	s.buffer[s.bufidx] = item
196 | 	s.bufidx++
197 | 	s.items++
198 | 
199 | 	if s.bufidx < s.blockSize {
200 | 		return nil
201 | 	}
202 | 
203 | 	copy(s.data[0:], s.data[s.blockSize:])
204 | 	copy(s.data[s.windowSize-s.blockSize:], s.buffer)
205 | 	s.bufidx = 0
206 | 
207 | 	if s.items < s.windowSize {
208 | 		return nil
209 | 	}
210 | 
211 | 	return s.detector.Check(s.data)
212 | }
213 | 
214 | // Window returns the current data window.  This should be treated as read-only
215 | func (s *Stream) Window() []float64 { return s.data }
216 | 


--------------------------------------------------------------------------------
/change_test.go:
--------------------------------------------------------------------------------
 1 | package change
 2 | 
 3 | import "testing"
 4 | 
 5 | func TestDetectChange(t *testing.T) {
 6 | 
 7 | 	var tests = []struct {
 8 | 		w   []float64
 9 | 		idx int
10 | 	}{
11 | 		{
12 | 			[]float64{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
13 | 			0, // no change point found
14 | 		},
15 | 
16 | 		{
17 | 			[]float64{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2},
18 | 			10, // the first 2
19 | 		},
20 | 		{
21 | 			[]float64{1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 3, 0, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2},
22 | 			0, // change occurs but not statistically significant
23 | 		},
24 | 	}
25 | 
26 | 	var detector = Detector{
27 | 		MinSampleSize: 5,
28 | 	}
29 | 
30 | 	for _, tt := range tests {
31 | 		r := detector.Check(tt.w)
32 | 		if (r == nil || r.Confidence < 0.95) && tt.idx == 0 {
33 | 			// no difference found and no difference expected -- good
34 | 		} else if r.Confidence >= 0.95 && r.Index == tt.idx {
35 | 			// difference found at expected location -- good
36 | 		} else {
37 | 			t.Errorf("DetectChange confidence=%f index=%d, wanted %d", r.Confidence, r.Index, tt.idx)
38 | 		}
39 | 	}
40 | }
41 | 


--------------------------------------------------------------------------------
/example/main.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"bufio"
  5 | 	"flag"
  6 | 	"fmt"
  7 | 	"html/template"
  8 | 	"io"
  9 | 	"log"
 10 | 	"math"
 11 | 	"os"
 12 | 	"sort"
 13 | 	"strconv"
 14 | 
 15 | 	"github.com/dgryski/go-change"
 16 | )
 17 | 
 18 | func main() {
 19 | 	windowSize := flag.Int("w", 120, "window size")
 20 | 	minSample := flag.Int("ms", 30, "min sample size")
 21 | 	blockSize := flag.Int("bs", 10, "block size")
 22 | 	compressPoints := flag.Int("cp", 10, "compress points for graph display")
 23 | 	fname := flag.String("f", "", "file name")
 24 | 	ymin := flag.Int("ymin", 0, "minimum y value for graph")
 25 | 
 26 | 	flag.Parse()
 27 | 
 28 | 	var f io.Reader
 29 | 
 30 | 	if *fname == "" {
 31 | 		log.Println("reading from stdin")
 32 | 		f = os.Stdin
 33 | 	} else {
 34 | 		var err error
 35 | 		f, err = os.Open(*fname)
 36 | 		if err != nil {
 37 | 			fmt.Println("open failed:", err)
 38 | 			return
 39 | 		}
 40 | 	}
 41 | 
 42 | 	scanner := bufio.NewScanner(f)
 43 | 
 44 | 	s := change.NewStream(*windowSize, *minSample, *blockSize, 0.995)
 45 | 
 46 | 	type graphPoints [2]float64
 47 | 	var graphData []graphPoints
 48 | 	var last []float64
 49 | 
 50 | 	var changePoints []int
 51 | 
 52 | 	var items int
 53 | 
 54 | 	for scanner.Scan() {
 55 | 		item, err := strconv.ParseFloat(scanner.Text(), 64)
 56 | 		if err != nil {
 57 | 			fmt.Printf("error parsing <%s>: %s\n", scanner.Text(), err)
 58 | 			continue
 59 | 		}
 60 | 
 61 | 		last = append(last, item)
 62 | 		items++
 63 | 		if items > 0 && items%*compressPoints == 0 {
 64 | 			sort.Float64s(last)
 65 | 			median := last[*compressPoints/2]
 66 | 			last = last[:0]
 67 | 
 68 | 			graphData = append(graphData, [2]float64{float64(items), median})
 69 | 		}
 70 | 
 71 | 		r := s.Push(item)
 72 | 
 73 | 		if r != nil {
 74 | 			diff := math.Abs(r.Difference / r.Before.Mean())
 75 | 			if r.Difference != 0 && diff > 0.06 {
 76 | 				log.Printf("difference found at offset=%d: %f %v\n", items-*windowSize+r.Index, diff, r)
 77 | 				changePoints = append(changePoints, items-*windowSize+r.Index)
 78 | 			}
 79 | 		}
 80 | 	}
 81 | 
 82 | 	if err := scanner.Err(); err != nil {
 83 | 		fmt.Printf("Error during scan: %v", err)
 84 | 	}
 85 | 
 86 | 	reportTmpl.Execute(os.Stdout, struct {
 87 | 		YMin         int
 88 | 		GraphData    []graphPoints
 89 | 		ChangePoints []int
 90 | 	}{
 91 | 		*ymin,
 92 | 		graphData,
 93 | 		changePoints,
 94 | 	})
 95 | }
 96 | 
 97 | var reportTmpl = template.Must(template.New("report").Parse(`
 98 | <html>
 99 | <script src="//cdnjs.cloudflare.com/ajax/libs/jquery/2.0.3/jquery.min.js"></script>
100 | <script src="//cdnjs.cloudflare.com/ajax/libs/flot/0.8.2/jquery.flot.min.js"></script>
101 | 
102 | <script type="text/javascript">
103 | 
104 |     var data = {{ .GraphData }};
105 | 
106 |     $(document).ready(function() {
107 |         $.plot($("#placeholder"), [data], {
108 |              yaxis: { min: {{ .YMin }} },
109 |              grid: {
110 |                 markings: [
111 |                   {{ range .ChangePoints }}{ color: '#000', lineWidth: 1, xaxis: { from: {{ . }}, to: {{ . }} } },
112 |                   {{ end }}
113 |                 ]
114 |               }
115 |            })
116 |         })
117 | 
118 | </script>
119 | 
120 | <body>
121 | 
122 | <div id="placeholder" style="width:1200px; height:400px"></div>
123 | 
124 | </body>
125 | </html>
126 | `))
127 | 


--------------------------------------------------------------------------------