├── LICENSE.md ├── README.md ├── algorithms.go ├── go.mod ├── go.sum ├── statistic.go ├── statistic_test.go ├── todo.md └── utils.go /LICENSE.md: -------------------------------------------------------------------------------- 1 | Skyline 2 | ---------- 3 | * Author:: Xianjie Gu () 4 | * Copyright:: Copyright (c) 2013 5 | * License:: MIT 6 | 7 | The MIT License 8 | 9 | Permission is hereby granted, free of charge, to any person obtaining a copy 10 | of this software and associated documentation files (the "Software"), to deal 11 | in the Software without restriction, including without limitation the rights 12 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | copies of the Software, and to permit persons to whom the Software is 14 | furnished to do so, subject to the following conditions: 15 | 16 | The above copyright notice and this permission notice shall be included in 17 | all copies or substantial portions of the Software. 18 | 19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 25 | THE SOFTWARE. 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Skyline's Algorithm 2 | 3 | golang version based on https://github.com/etsy/skyline/blob/master/src/analyzer/algorithms.py#L230 4 | 5 | some package are public domain code 6 | -------------------------------------------------------------------------------- /algorithms.go: -------------------------------------------------------------------------------- 1 | package skyline 2 | 3 | import ( 4 | "github.com/datastream/probab/dst" 5 | 6 | "math" 7 | "time" 8 | 9 | "github.com/datastream/adf" 10 | "github.com/gonum/stat" 11 | ) 12 | 13 | // This is no man's land. Do anything you want in here, 14 | // as long as you return a boolean that determines whether the input 15 | // timeseries is anomalous or not. 16 | 17 | // To add an algorithm, define it here, and add its name to settings.ALGORITHMS 18 | 19 | // TailAvg is a utility function used to calculate the average of the last three 20 | // datapoints in the series as a measure, instead of just the last datapoint. 21 | // It reduces noise, but it also reduces sensitivity and increases the delay 22 | // to detection. 23 | func TailAvg(series []float64) float64 { 24 | l := len(series) 25 | if l == 0 { 26 | return 0 27 | } 28 | if l < 3 { 29 | return series[l-1] 30 | } 31 | return (series[l-1] + series[l-2] + series[l-3]) / 3 32 | } 33 | 34 | // MedianAbsoluteDeviation function 35 | // A timeseries is anomalous if the deviation of its latest datapoint with 36 | // respect to the median is X times larger than the median of deviations. 37 | func MedianAbsoluteDeviation(timeseries []TimePoint) bool { 38 | series := ValueArray(timeseries) 39 | median := Median(series) 40 | var demedianed []float64 41 | for _, val := range series { 42 | demedianed = append(demedianed, math.Abs(val-median)) 43 | } 44 | medianDeviation := Median(demedianed) 45 | if medianDeviation == 0 { 46 | return false 47 | } 48 | testStatistic := demedianed[len(demedianed)-1] / medianDeviation 49 | if testStatistic > 6 { 50 | return true 51 | } 52 | return false 53 | } 54 | 55 | // Grubbs score 56 | // A timeseries is anomalous if the Z score is greater than the Grubb's score. 57 | func Grubbs(timeseries []TimePoint) bool { 58 | series := ValueArray(timeseries) 59 | mean, stdDev := stat.MeanStdDev(series, nil) 60 | tailAverage := TailAvg(series) 61 | // http://en.wikipedia.org/wiki/Grubbs'_test_for_outliers 62 | // G = (Y - Mean(Y)) / stdDev(Y) 63 | zScore := (tailAverage - mean) / stdDev 64 | lenSeries := len(series) 65 | // scipy.stats.t.isf(.05 / (2 * lenSeries) , lenSeries - 2) 66 | // when lenSeries is big, it eq stats.ZInvCDFFor(1-t) 67 | threshold := dst.StudentsTQtlFor(float64(lenSeries-2), 1-0.05/float64(2*lenSeries)) 68 | thresholdSquared := threshold * threshold 69 | // (l-1)/l * sqr(t/(l-2+t^2)) 70 | grubbsScore := (float64(lenSeries-1) / math.Sqrt(float64(lenSeries))) * math.Sqrt(thresholdSquared/(float64(lenSeries-2)+thresholdSquared)) 71 | return zScore > grubbsScore 72 | } 73 | 74 | // FirstHourAverage function 75 | // Calcuate the simple average over one hour, FULLDURATION seconds ago. 76 | // A timeseries is anomalous if the average of the last three datapoints 77 | // are outside of three standard deviations of this value. 78 | func FirstHourAverage(timeseries []TimePoint, fullDuration int64) bool { 79 | var series []float64 80 | lastHourThreshold := time.Now().Unix() - (fullDuration - 3600) 81 | for _, val := range timeseries { 82 | if val.GetTimestamp() < lastHourThreshold { 83 | series = append(series, val.GetValue()) 84 | } 85 | } 86 | mean, stdDev := stat.MeanStdDev(series, nil) 87 | t := TailAvg(ValueArray(timeseries)) 88 | return math.Abs(t-mean) > 3*stdDev 89 | } 90 | 91 | // SimpleStddevFromMovingAverage function 92 | // A timeseries is anomalous if the absolute value of the average of the latest 93 | // three datapoint minus the moving average is greater than one standard 94 | // deviation of the average. This does not exponentially weight the MA and so 95 | // is better for detecting anomalies with respect to the entire series. 96 | func SimpleStddevFromMovingAverage(timeseries []TimePoint) bool { 97 | series := ValueArray(timeseries) 98 | mean, stdDev := stat.MeanStdDev(series, nil) 99 | t := TailAvg(series) 100 | return math.Abs(t-mean) > 3*stdDev 101 | } 102 | 103 | // StddevFromMovingAverage function 104 | // A timeseries is anomalous if the absolute value of the average of the latest 105 | // three datapoint minus the moving average is greater than one standard 106 | // deviation of the moving average. This is better for finding anomalies with 107 | // respect to the short term trends. 108 | func StddevFromMovingAverage(timeseries []TimePoint) bool { 109 | series := ValueArray(timeseries) 110 | expAverage := Ewma(series, 50) 111 | stdDev := EwmStd(series, 50) 112 | return math.Abs(series[len(series)-1]-expAverage[len(expAverage)-1]) > (3 * stdDev[len(stdDev)-1]) 113 | } 114 | 115 | // MeanSubtractionCumulation function 116 | // A timeseries is anomalous if the value of the next datapoint in the 117 | // series is farther than a standard deviation out in cumulative terms 118 | // after subtracting the mean from each data point. 119 | func MeanSubtractionCumulation(timeseries []TimePoint) bool { 120 | series := ValueArray(timeseries) 121 | mean := stat.Mean(series[:len(series)-1], nil) 122 | for i, val := range series { 123 | series[i] = val - mean 124 | } 125 | stdDev := stat.StdDev(series[:len(series)-1], nil) 126 | // expAverage = pandas.stats.moments.ewma(series, com=15) 127 | return math.Abs(series[len(series)-1]) > 3*stdDev 128 | } 129 | 130 | // LeastSquares function 131 | // A timeseries is anomalous if the average of the last three datapoints 132 | // on a projected least squares model is greater than three sigma. 133 | func LeastSquares(timeseries []TimePoint) bool { 134 | x := TimeArray64(timeseries) 135 | y := ValueArray(timeseries) 136 | intercept, slope := stat.LinearRegression(x, y, nil, false) 137 | var errs []float64 138 | for _, val := range timeseries { 139 | projected := slope*float64(val.GetTimestamp()) + intercept 140 | errs = append(errs, val.GetValue()-projected) 141 | } 142 | l := len(errs) 143 | if l < 3 { 144 | return false 145 | } 146 | stdDev := stat.StdDev(errs, nil) 147 | t := (errs[l-1] + errs[l-2] + errs[l-3]) / 3 148 | return math.Abs(t) > stdDev*3 && math.Trunc(stdDev) != 0 && math.Trunc(t) != 0 149 | } 150 | 151 | // HistogramBins function 152 | // A timeseries is anomalous if the average of the last three datapoints falls 153 | // into a histogram bin with less than 20 other datapoints (you'll need to tweak 154 | // that number depending on your data) 155 | // Returns: the size of the bin which contains the tailAvg. Smaller bin size 156 | // means more anomalous. 157 | func HistogramBins(timeseries []TimePoint) bool { 158 | series := ValueArray(timeseries) 159 | t := TailAvg(series) 160 | hist, bins := Histogram(series, 15) 161 | for i, v := range hist { 162 | if v <= 20 { 163 | if i == 0 { 164 | if t <= bins[0] { 165 | return true 166 | } 167 | } else if t > bins[i] && t < bins[i+1] { 168 | return true 169 | } 170 | } 171 | } 172 | return false 173 | } 174 | 175 | // KsTest function 176 | // A timeseries is anomalous if 2 sample Kolmogorov-Smirnov test indicates 177 | // that data distribution for last 10 minutes is different from last hour. 178 | // It produces false positives on non-stationary series so Augmented 179 | // Dickey-Fuller test applied to check for stationarity. 180 | func KsTest(timeseries []TimePoint) bool { 181 | current := time.Now().Unix() 182 | hourAgo := current - 3600 183 | tenMinutesAgo := current - 600 184 | var reference []float64 185 | var probe []float64 186 | for _, val := range timeseries { 187 | if val.GetTimestamp() >= hourAgo && val.GetTimestamp() < tenMinutesAgo { 188 | reference = append(reference, val.GetValue()) 189 | } 190 | if val.GetTimestamp() >= tenMinutesAgo { 191 | probe = append(probe, val.GetValue()) 192 | } 193 | } 194 | if len(reference) < 20 || len(probe) < 20 { 195 | return false 196 | } 197 | _, ksPValue, ksD := KolmogorovSmirnov(reference, probe, 0.05) 198 | if ksPValue < 0.05 && ksD > 0.5 { 199 | ad := adf.New(reference, adf.DefaultPValue, 10) 200 | ad.Run() 201 | if ad.IsStationary() { 202 | return true 203 | } 204 | /* 205 | adf := ADFuller(reference, 10) 206 | if adf[1] < 0.05 { 207 | return true 208 | } 209 | */ 210 | } 211 | return false 212 | } 213 | 214 | // IsAnomalouslyAnomalous function 215 | // This method runs a meta-analysis on the metric to determine whether the 216 | // metric has a past history of triggering. TODO: weight intervals based on datapoint 217 | func IsAnomalouslyAnomalous(trigger_history []TimePoint, new_trigger TimePoint) (bool, []TimePoint) { 218 | if len(trigger_history) == 0 { 219 | trigger_history = append(trigger_history, new_trigger) 220 | return true, trigger_history 221 | } 222 | if (new_trigger.GetValue() == trigger_history[len(trigger_history)-1].GetValue()) && (new_trigger.GetTimestamp()-trigger_history[len(trigger_history)-1].GetTimestamp() <= 300) { 223 | return false, trigger_history 224 | } 225 | trigger_history = append(trigger_history, new_trigger) 226 | trigger_times := TimeArray(trigger_history) 227 | var intervals []float64 228 | for i := range trigger_times { 229 | if (i + 1) < len(trigger_times) { 230 | intervals = append(intervals, float64(trigger_times[i+1]-trigger_times[i])) 231 | } 232 | } 233 | mean, stdDev := stat.MeanStdDev(intervals, nil) 234 | return math.Abs(intervals[len(intervals)-1]-mean) > 3*stdDev, trigger_history 235 | } 236 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module skyline 2 | 3 | go 1.17 4 | 5 | require ( 6 | github.com/datastream/adf v0.0.0-20211223163840-35c9152797cf 7 | github.com/datastream/probab v0.0.0-20150902151906-d47400db423d 8 | github.com/datastream/skyline v0.0.0-20190414171723-8f283eb09017 9 | github.com/gonum/stat v0.0.0-20181125101827-41a0da705a5b 10 | ) 11 | 12 | require ( 13 | github.com/berkmancenter/ridge v0.0.0-20160817211126-84766bc56b56 // indirect 14 | github.com/datastream/go-fn v0.0.0-20130403065544-37331e464987 // indirect 15 | github.com/gonum/blas v0.0.0-20181208220705-f22b278b28ac // indirect 16 | github.com/gonum/floats v0.0.0-20181209220543-c233463c7e82 // indirect 17 | github.com/gonum/integrate v0.0.0-20181209220457-a422b5c0fdf2 // indirect 18 | github.com/gonum/internal v0.0.0-20181124074243-f884aa714029 // indirect 19 | github.com/gonum/lapack v0.0.0-20181123203213-e4cdc5a0bff9 // indirect 20 | github.com/gonum/matrix v0.0.0-20181209220409-c518dec07be9 // indirect 21 | github.com/skelterjohn/go.matrix v0.0.0-20130517144113-daa59528eefd // indirect 22 | gonum.org/v1/plot v0.10.0 // indirect 23 | ) 24 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= 2 | gioui.org v0.0.0-20210308172011-57750fc8a0a6/go.mod h1:RSH6KIUZ0p2xy5zHDxgAM4zumjgTw83q2ge/PI+yyw8= 3 | github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= 4 | github.com/ajstarks/svgo v0.0.0-20180226025133-644b8db467af/go.mod h1:K08gAheRH3/J6wwsYMMT4xOr94bZjxIelGM0+d/wbFw= 5 | github.com/ajstarks/svgo v0.0.0-20210923152817-c3b6e2f0c527 h1:NImof/JkF93OVWZY+PINgl6fPtQyF6f+hNUtZ0QZA1c= 6 | github.com/ajstarks/svgo v0.0.0-20210923152817-c3b6e2f0c527/go.mod h1:K08gAheRH3/J6wwsYMMT4xOr94bZjxIelGM0+d/wbFw= 7 | github.com/berkmancenter/ridge v0.0.0-20160817211126-84766bc56b56 h1:H/ONXoeBW3VrQ235HyELI+4BaIsoshiujKs6Nw7U1V8= 8 | github.com/berkmancenter/ridge v0.0.0-20160817211126-84766bc56b56/go.mod h1:+R4DTwYNHnJmn3Rx5cirLk2hfGx5ZlF1kiPXh6yiGZc= 9 | github.com/boombuler/barcode v1.0.0/go.mod h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8= 10 | github.com/boombuler/barcode v1.0.1/go.mod h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8= 11 | github.com/datastream/adf v0.0.0-20211223163840-35c9152797cf h1:nr8BP6Ytoy4bLz1V8W77DRvhZXNX33WTQySdL1aAaM4= 12 | github.com/datastream/adf v0.0.0-20211223163840-35c9152797cf/go.mod h1:RR7I2tOQAEn1lr/VbiRC1zUDvmlD7mbDqNc7FeWtPrE= 13 | github.com/datastream/go-fn v0.0.0-20130403065544-37331e464987 h1:S3JwKvmPJITKLLH7r3WYbBLjXu4lEZU9gFBLj01zaNU= 14 | github.com/datastream/go-fn v0.0.0-20130403065544-37331e464987/go.mod h1:bJl2ftsgvIWNGkufh7xMrXAATJUEdner7/2wCuHmVLI= 15 | github.com/datastream/probab v0.0.0-20150902151906-d47400db423d h1:wnkyVc4CQO5XlqF4RW4+y9qN05xY/frZn0IiT4Gi0qc= 16 | github.com/datastream/probab v0.0.0-20150902151906-d47400db423d/go.mod h1:qktd+m4xKlvhKU9bN9YipjWd79+vYDYb+N85egJl/NM= 17 | github.com/datastream/skyline v0.0.0-20190414171723-8f283eb09017 h1:vvq1G8dEtLuJVEorVj2C27Q2ON2cE6ESktL0+wir8KA= 18 | github.com/datastream/skyline v0.0.0-20190414171723-8f283eb09017/go.mod h1:zVyg6Dhk9VR9ICSy5rVU4APzU8SDwXR4YbzwxJHN/9A= 19 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 20 | github.com/fogleman/gg v1.2.1-0.20190220221249-0403632d5b90/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k= 21 | github.com/fogleman/gg v1.3.0 h1:/7zJX8F6AaYQc57WQCyN9cAIz+4bCJGO9B+dyW29am8= 22 | github.com/fogleman/gg v1.3.0/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k= 23 | github.com/go-fonts/dejavu v0.1.0/go.mod h1:4Wt4I4OU2Nq9asgDCteaAaWZOV24E+0/Pwo0gppep4g= 24 | github.com/go-fonts/latin-modern v0.2.0/go.mod h1:rQVLdDMK+mK1xscDwsqM5J8U2jrRa3T0ecnM9pNujks= 25 | github.com/go-fonts/liberation v0.1.1/go.mod h1:K6qoJYypsmfVjWg8KOVDQhLc8UDgIK2HYqyqAO9z7GY= 26 | github.com/go-fonts/liberation v0.2.0 h1:jAkAWJP4S+OsrPLZM4/eC9iW7CtHy+HBXrEwZXWo5VM= 27 | github.com/go-fonts/liberation v0.2.0/go.mod h1:K6qoJYypsmfVjWg8KOVDQhLc8UDgIK2HYqyqAO9z7GY= 28 | github.com/go-fonts/stix v0.1.0/go.mod h1:w/c1f0ldAUlJmLBvlbkvVXLAD+tAMqobIIQpmnUIzUY= 29 | github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= 30 | github.com/go-latex/latex v0.0.0-20210118124228-b3d85cf34e07/go.mod h1:CO1AlKB2CSIqUrmQPqA0gdRIlnLEY0gK5JGjh37zN5U= 31 | github.com/go-latex/latex v0.0.0-20210823091927-c0d11ff05a81 h1:6zl3BbBhdnMkpSj2YY30qV3gDcVBGtFgVsV3+/i+mKQ= 32 | github.com/go-latex/latex v0.0.0-20210823091927-c0d11ff05a81/go.mod h1:SX0U8uGpxhq9o2S/CELCSUxEWWAuoCUcVCQWv7G2OCk= 33 | github.com/go-pdf/fpdf v0.5.0 h1:GHpcYsiDV2hdo77VTOuTF9k1sN8F8IY7NjnCo9x+NPY= 34 | github.com/go-pdf/fpdf v0.5.0/go.mod h1:HzcnA+A23uwogo0tp9yU+l3V+KXhiESpt1PMayhOh5M= 35 | github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 h1:DACJavvAHhabrF08vX0COfcOBJRhZ8lUbR+ZWIs0Y5g= 36 | github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k= 37 | github.com/gonum/blas v0.0.0-20181208220705-f22b278b28ac h1:Q0Jsdxl5jbxouNs1TQYt0gxesYMU4VXRbsTlgDloZ50= 38 | github.com/gonum/blas v0.0.0-20181208220705-f22b278b28ac/go.mod h1:P32wAyui1PQ58Oce/KYkOqQv8cVw1zAapXOl+dRFGbc= 39 | github.com/gonum/floats v0.0.0-20181209220543-c233463c7e82 h1:EvokxLQsaaQjcWVWSV38221VAK7qc2zhaO17bKys/18= 40 | github.com/gonum/floats v0.0.0-20181209220543-c233463c7e82/go.mod h1:PxC8OnwL11+aosOB5+iEPoV3picfs8tUpkVd0pDo+Kg= 41 | github.com/gonum/integrate v0.0.0-20181209220457-a422b5c0fdf2 h1:GUSkTcIe1SlregbHNUKbYDhBsS8lNgYfIp4S4cToUyU= 42 | github.com/gonum/integrate v0.0.0-20181209220457-a422b5c0fdf2/go.mod h1:pDgmNM6seYpwvPos3q+zxlXMsbve6mOIPucUnUOrI7Y= 43 | github.com/gonum/internal v0.0.0-20181124074243-f884aa714029 h1:8jtTdc+Nfj9AR+0soOeia9UZSvYBvETVHZrugUowJ7M= 44 | github.com/gonum/internal v0.0.0-20181124074243-f884aa714029/go.mod h1:Pu4dmpkhSyOzRwuXkOgAvijx4o+4YMUJJo9OvPYMkks= 45 | github.com/gonum/lapack v0.0.0-20181123203213-e4cdc5a0bff9 h1:7qnwS9+oeSiOIsiUMajT+0R7HR6hw5NegnKPmn/94oI= 46 | github.com/gonum/lapack v0.0.0-20181123203213-e4cdc5a0bff9/go.mod h1:XA3DeT6rxh2EAE789SSiSJNqxPaC0aE9J8NTOI0Jo/A= 47 | github.com/gonum/matrix v0.0.0-20181209220409-c518dec07be9 h1:V2IgdyerlBa/MxaEFRbV5juy/C3MGdj4ePi+g6ePIp4= 48 | github.com/gonum/matrix v0.0.0-20181209220409-c518dec07be9/go.mod h1:0EXg4mc1CNP0HCqCz+K4ts155PXIlUywf0wqN+GfPZw= 49 | github.com/gonum/stat v0.0.0-20181125101827-41a0da705a5b h1:fbskpz/cPqWH8VqkQ7LJghFkl2KPAiIFUHrTJ2O3RGk= 50 | github.com/gonum/stat v0.0.0-20181125101827-41a0da705a5b/go.mod h1:Z4GIJBJO3Wa4gD4vbwQxXXZ+WHmW6E9ixmNrwvs0iZs= 51 | github.com/jung-kurt/gofpdf v1.0.0/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes= 52 | github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes= 53 | github.com/phpdave11/gofpdf v1.4.2/go.mod h1:zpO6xFn9yxo3YLyMvW8HcKWVdbNqgIfOOp2dXMnm1mY= 54 | github.com/phpdave11/gofpdi v1.0.12/go.mod h1:vBmVV0Do6hSBHC8uKUQ71JGW+ZGQq74llk/7bXwjDoI= 55 | github.com/phpdave11/gofpdi v1.0.13/go.mod h1:vBmVV0Do6hSBHC8uKUQ71JGW+ZGQq74llk/7bXwjDoI= 56 | github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 57 | github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 58 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 59 | github.com/ruudk/golang-pdf417 v0.0.0-20181029194003-1af4ab5afa58/go.mod h1:6lfFZQK844Gfx8o5WFuvpxWRwnSoipWe/p622j1v06w= 60 | github.com/ruudk/golang-pdf417 v0.0.0-20201230142125-a7e3863a1245/go.mod h1:pQAZKsJ8yyVxGRWYNEm9oFB8ieLgKFnamEyDmSA0BRk= 61 | github.com/skelterjohn/go.matrix v0.0.0-20130517144113-daa59528eefd h1:+ZLYzP9SYC3WU9buyb9H0l9DQxqVFOCkDG8QnNBMAlA= 62 | github.com/skelterjohn/go.matrix v0.0.0-20130517144113-daa59528eefd/go.mod h1:x7ui0Rh4QxcWEOgIfa3cr9q4W/wyLTDdzISxBmLVeX8= 63 | github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= 64 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 65 | golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= 66 | golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= 67 | golang.org/x/exp v0.0.0-20180807140117-3d87b88a115f/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= 68 | golang.org/x/exp v0.0.0-20190125153040-c74c464bbbf2/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= 69 | golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= 70 | golang.org/x/exp v0.0.0-20191002040644-a1355ae1e2c3/go.mod h1:NOZ3BPKG0ec/BKJQgnvsSFpcKLM5xXVWnvZS97DWHgE= 71 | golang.org/x/image v0.0.0-20180708004352-c73c2afc3b81/go.mod h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs= 72 | golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= 73 | golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= 74 | golang.org/x/image v0.0.0-20190910094157-69e4b8554b2a/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= 75 | golang.org/x/image v0.0.0-20200119044424-58c23975cae1/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= 76 | golang.org/x/image v0.0.0-20200430140353-33d19683fad8/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= 77 | golang.org/x/image v0.0.0-20200618115811-c13761719519/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= 78 | golang.org/x/image v0.0.0-20201208152932-35266b937fa6/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= 79 | golang.org/x/image v0.0.0-20210216034530-4410531fe030/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= 80 | golang.org/x/image v0.0.0-20210607152325-775e3b0c77b9/go.mod h1:023OzeP/+EPmXeapQh35lcL3II3LrY8Ic+EFFKVhULM= 81 | golang.org/x/image v0.0.0-20210628002857-a66eb6448b8d h1:RNPAfi2nHY7C2srAV8A49jpsYr0ADedCk1wq6fTMTvs= 82 | golang.org/x/image v0.0.0-20210628002857-a66eb6448b8d/go.mod h1:023OzeP/+EPmXeapQh35lcL3II3LrY8Ic+EFFKVhULM= 83 | golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o= 84 | golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY= 85 | golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 86 | golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 87 | golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 88 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 89 | golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 90 | golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 91 | golang.org/x/sys v0.0.0-20210304124612-50617c2ba197/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 92 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 93 | golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= 94 | golang.org/x/text v0.3.6 h1:aRYxNxv6iGQlyVaZmk6ZgYEDa+Jg18DxebPSrd6bg1M= 95 | golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= 96 | golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 97 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 98 | golang.org/x/tools v0.0.0-20190206041539-40960b6deb8e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 99 | golang.org/x/tools v0.0.0-20190927191325-030b2cf1153e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= 100 | golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 101 | gonum.org/v1/gonum v0.0.0-20180816165407-929014505bf4/go.mod h1:Y+Yx5eoAFn32cQvJDxZx5Dpnq+c3wtXuadVZAcxbbBo= 102 | gonum.org/v1/gonum v0.8.2/go.mod h1:oe/vMfY3deqTw+1EZJhuvEW2iwGF1bW9wwu7XCu0+v0= 103 | gonum.org/v1/gonum v0.9.3/go.mod h1:TZumC3NeyVQskjXqmyWt4S3bINhy7B4eYwW69EbyX+0= 104 | gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0/go.mod h1:wa6Ws7BG/ESfp6dHfk7C6KdzKA7wR7u/rKwOGE66zvw= 105 | gonum.org/v1/plot v0.0.0-20190515093506-e2840ee46a6b/go.mod h1:Wt8AAjI+ypCyYX3nZBvf6cAIx93T+c/OS2HFAYskSZc= 106 | gonum.org/v1/plot v0.9.0/go.mod h1:3Pcqqmp6RHvJI72kgb8fThyUnav364FOsdDo2aGW5lY= 107 | gonum.org/v1/plot v0.10.0 h1:ymLukg4XJlQnYUJCp+coQq5M7BsUJFk6XQE4HPflwdw= 108 | gonum.org/v1/plot v0.10.0/go.mod h1:JWIHJ7U20drSQb/aDpTetJzfC1KlAPldJLpkSy88dvQ= 109 | rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= 110 | -------------------------------------------------------------------------------- /statistic.go: -------------------------------------------------------------------------------- 1 | package skyline 2 | 3 | import ( 4 | "github.com/gonum/stat" 5 | "math" 6 | "sort" 7 | ) 8 | 9 | func unDef(f float64) bool { 10 | if math.IsNaN(f) { 11 | return true 12 | } 13 | if math.IsInf(f, 1) { 14 | return true 15 | } 16 | if math.IsInf(f, -1) { 17 | return true 18 | } 19 | return false 20 | } 21 | 22 | // Median series.median 23 | func Median(series []float64) float64 { 24 | var median float64 25 | sort.Float64s(series) 26 | Len := len(series) 27 | lhs := (Len - 1) / 2 28 | rhs := Len / 2 29 | if Len == 0 { 30 | return 0.0 31 | } 32 | if lhs == rhs { 33 | median = series[lhs] 34 | } else { 35 | median = (series[lhs] + series[rhs]) / 2.0 36 | } 37 | return median 38 | } 39 | 40 | // Ewma 41 | func Ewma(series []float64, com float64) []float64 { 42 | var cur float64 43 | var prev float64 44 | var oldw float64 45 | var adj float64 46 | N := len(series) 47 | ret := make([]float64, N) 48 | if N == 0 { 49 | return ret 50 | } 51 | oldw = com / (1 + com) 52 | adj = oldw 53 | ret[0] = series[0] / (1 + com) 54 | for i := 1; i < N; i++ { 55 | cur = series[i] 56 | prev = ret[i-1] 57 | if unDef(cur) { 58 | ret[i] = prev 59 | } else { 60 | if unDef(prev) { 61 | ret[i] = cur / (1 + com) 62 | } else { 63 | ret[i] = (com*prev + cur) / (1 + com) 64 | } 65 | } 66 | } 67 | for i := 0; i < N; i++ { 68 | cur = ret[i] 69 | if !math.IsNaN(cur) { 70 | ret[i] = ret[i] / (1. - adj) 71 | adj *= oldw 72 | } else { 73 | if i > 0 { 74 | ret[i] = ret[i-1] 75 | } 76 | } 77 | } 78 | return ret 79 | } 80 | 81 | // EwmStd Exponentially-weighted moving std 82 | func EwmStd(series []float64, com float64) []float64 { 83 | m1st := Ewma(series, com) 84 | var series2 []float64 85 | for _, val := range series { 86 | series2 = append(series2, val*val) 87 | } 88 | m2nd := Ewma(series2, com) 89 | l := len(m1st) 90 | var result []float64 91 | for i := 0; i < l; i++ { 92 | t := m2nd[i] - math.Pow(m1st[i], 2) 93 | t *= (1.0 + 2.0*com) / (2.0 * com) 94 | result = append(result, math.Sqrt(t)) 95 | } 96 | return result 97 | } 98 | 99 | // Histogram numpy.histogram 100 | func Histogram(series []float64, bins int) ([]int, []float64) { 101 | var binEdges []float64 102 | var hist []int 103 | l := len(series) 104 | if l == 0 { 105 | return hist, binEdges 106 | } 107 | sort.Float64s(series) 108 | w := (series[l-1] - series[0]) / float64(bins) 109 | for i := 0; i < bins; i++ { 110 | binEdges = append(binEdges, w*float64(i)+series[0]) 111 | if binEdges[len(binEdges)-1] >= series[l-1] { 112 | break 113 | } 114 | } 115 | binEdges = append(binEdges, w*float64(bins)+series[0]) 116 | bl := len(binEdges) 117 | hist = make([]int, bl-1) 118 | for i := 0; i < bl-1; i++ { 119 | for _, val := range series { 120 | if val >= binEdges[i] && val < binEdges[i+1] { 121 | hist[i] += 1 122 | continue 123 | } 124 | if i == (bl-2) && val >= binEdges[i] && val <= binEdges[i+1] { 125 | hist[i] += 1 126 | } 127 | } 128 | } 129 | return hist, binEdges 130 | } 131 | 132 | // KolmogorovSmirnov performs the two-sample Kolmogorov–Smirnov test. The null 133 | // hypothesis is that the two datasets are coming from the same continuous 134 | // distribution. The α parameter specifies the significance level. If the test 135 | // rejects the null hypothesis, the function returns true; otherwise, false is 136 | // returned. The second and third outputs of the function are the p-value and 137 | // Kolmogorov–Smirnov statistic of the test, respectively. 138 | // 139 | // https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test 140 | func KolmogorovSmirnov(data1, data2 []float64, α float64) (bool, float64, float64) { 141 | const ( 142 | terms = 101 143 | ) 144 | 145 | statistic := stat.KolmogorovSmirnov(data1, nil, data2, nil) 146 | 147 | // M. Stephens. Use of the Kolmogorov–Smirnov, Cramer-Von Mises and Related 148 | // Statistics Without Extensive Tables. Journal of the Royal Statistical 149 | // Society. Series B (Methodological), vol. 32, no. 1 (1970), pp. 115–122. 150 | // 151 | // http://www.jstor.org/stable/2984408 152 | n1, n2 := len(data1), len(data2) 153 | γ := math.Sqrt(float64(n1*n2) / float64(n1+n2)) 154 | λ := (γ + 0.12 + 0.11/γ) * statistic 155 | 156 | // Kolmogorov distribution 157 | // 158 | // https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test#Kolmogorov_distribution 159 | pvalue, sign, k := 0.0, 1.0, 1.0 160 | for i := 0; i < terms; i++ { 161 | pvalue += sign * math.Exp(-2*λ*λ*k*k) 162 | sign, k = -sign, k+1 163 | } 164 | pvalue *= 2 165 | if pvalue < 0 { 166 | pvalue = 0 167 | } else if pvalue > 1 { 168 | pvalue = 1 169 | } 170 | 171 | return α >= pvalue, pvalue, statistic 172 | } 173 | 174 | //np.searchsorted 175 | func searchsorted(array, values []float64) []int { 176 | var indexes []int 177 | for _, val := range values { 178 | indexes = append(indexes, location(array, val)) 179 | } 180 | return indexes 181 | } 182 | 183 | func location(array []float64, key float64) int { 184 | i := 0 185 | size := len(array) 186 | for { 187 | mid := (i + size) / 2 188 | if i == size { 189 | break 190 | } 191 | if array[mid] < key { 192 | i = mid + 1 193 | } else { 194 | size = mid 195 | } 196 | } 197 | return i 198 | } 199 | -------------------------------------------------------------------------------- /statistic_test.go: -------------------------------------------------------------------------------- 1 | package skyline_test 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/datastream/skyline" 7 | ) 8 | 9 | func TestMedian(t *testing.T) { 10 | series := []float64{0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8, 8.9, 9.01} 11 | if skyline.Median(series) != 5.05 { 12 | t.Fatal("wrong median", skyline.Median(series)) 13 | } 14 | } 15 | 16 | type TP struct { 17 | Timestamp int64 18 | Value float64 19 | } 20 | 21 | func (f *TP) GetValue() float64 { 22 | return f.Value 23 | } 24 | func (f *TP) GetTimestamp() int64 { 25 | return f.Timestamp 26 | } 27 | 28 | func testEwma(t *testing.T) { 29 | series := []float64{0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8, 8.9, 9.01} 30 | rst := []float64{0.09999999999999978, 0.6554455445544544, 1.214520977649978, 1.7772255876832508, 2.3435583786886025, 2.9135180706168184, 3.48710309969332, 4.064311618855566, 4.645141498269393, 5.121538107701817} 31 | rt := skyline.Ewma(series, 50) 32 | for i, v := range rt { 33 | if v != rst[i] { 34 | t.Fatal("ewma error", t) 35 | } 36 | } 37 | } 38 | func testEwmStd(t *testing.T) { 39 | series := []float64{0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8, 8.9, 9.01} 40 | rst := []float64{4.9526750297502914e-09, 0.5527160659008843, 0.902537317532201, 1.2357653238068602, 1.5629953497356235, 1.8872927402148911, 2.209889422762198, 2.531374353771067, 2.8520607676954124, 3.0195071357543375} 41 | rt := skyline.EwmStd(series, 50) 42 | for i, v := range rt { 43 | if v != rst[i] { 44 | t.Fatal("ewma error", t) 45 | } 46 | } 47 | } 48 | 49 | func testHistogram(t *testing.T) { 50 | series := []float64{0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8, 8.9, 9.01} 51 | hist := []int{1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 2} 52 | bin := []float64{0.1, 0.694, 1.288, 1.8820000000000001, 2.476, 3.07, 3.664, 4.257999999999999, 4.851999999999999, 5.446, 6.039999999999999, 6.6339999999999995, 7.228, 7.821999999999999, 8.415999999999999, 9.01} 53 | h, b := skyline.Histogram(series, 15) 54 | for i, v := range h { 55 | if v != hist[i] { 56 | t.Fatal("ewma error", t) 57 | } 58 | } 59 | for i, v := range b { 60 | if v != bin[i] { 61 | t.Fatal("ewma error", t) 62 | } 63 | } 64 | } 65 | 66 | func testKolmogorovSmirnov(t *testing.T) { 67 | reference := []float64{0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8, 8.9, 9.01, 1.2, 2, 4, 6, 9, 1, 22, 11, 19, 18.9, 11, 14} 68 | probe := []float64{0.4, 0.1, 1.3, 2.4, 6.5, 3.6, 5.7, 6.8, 8.9, 9, 9.1, 11.2, 1.2, 1.3, 14, 4, 5, 0.123, 9, 7, 8.1, 9.9, 2.1} 69 | _, ksPValue, ksD := skyline.KolmogorovSmirnov(reference, probe, 0.05) 70 | if ksD != 0.18577075098814222 || ksPValue != 0.789955481957006 { 71 | t.Fatal("ewma error", t) 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /todo.md: -------------------------------------------------------------------------------- 1 | ## mean_subtraction_cumulation 2 | 3 | expAverage is not used. 4 | 5 | ## run_selected_algorithm 6 | 7 | maybe need to be rewrited 8 | -------------------------------------------------------------------------------- /utils.go: -------------------------------------------------------------------------------- 1 | package skyline 2 | 3 | // TimePoint is basic data struct 4 | type TimePoint interface { 5 | GetTimestamp() int64 //x time 6 | GetValue() float64 //y value 7 | } 8 | 9 | // TimeArray return all timestamps in timeseries array 10 | func TimeArray(timeseries []TimePoint) []int64 { 11 | var t []int64 12 | for _, val := range timeseries { 13 | t = append(t, val.GetTimestamp()) 14 | } 15 | return t 16 | } 17 | 18 | // TimeArray64 return all timestamps in timeseries array 19 | func TimeArray64(timeseries []TimePoint) []float64 { 20 | var t []float64 21 | for _, val := range timeseries { 22 | t = append(t, float64(val.GetTimestamp())) 23 | } 24 | return t 25 | } 26 | 27 | // ValueArray return all values in timeseries array 28 | func ValueArray(timeseries []TimePoint) []float64 { 29 | var v []float64 30 | for _, val := range timeseries { 31 | v = append(v, val.GetValue()) 32 | } 33 | return v 34 | } 35 | --------------------------------------------------------------------------------