();
26 |
27 | double mean = 100.0f;
28 | double variance = 5.0f;
29 | int numToGenerate = 10000;
30 |
31 | for (int idx = 1; idx <= numToGenerate; ++idx){
32 | randomNumbers.add(getGaussian(mean, variance));
33 | }
34 |
35 | for (int idx = 1; idx <= numToGenerate; ++idx){
36 | if (idx < numToGenerate/2) {
37 | skewedNumbers.add(1.0);
38 | } else {
39 | skewedNumbers.add(100.0);
40 | }
41 | }
42 | double[] result = StatsUtils.swilk(randomNumbers);
43 | // p val > alpha will NOT reject the null hypothesis that
44 | // the values came from a normal distribution.
45 | Assert.assertTrue(result[1] >= 0.05);
46 | result = StatsUtils.swilk(skewedNumbers);
47 | // p val < alpha will reject the null hypothesis that
48 | // the values came from a normal distribution.
49 | Assert.assertTrue(result[1] <= 0.05);
50 | }
51 |
52 | private double getGaussian(double aMean, double aVariance){
53 | return aMean + fRandom.nextGaussian() * aVariance;
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/src/main/java/com/yahoo/egads/utilities/IdentifiedDoublePoint.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.yahoo.egads.utilities;
19 |
20 | import org.apache.commons.math3.ml.clustering.Clusterable;
21 | import org.apache.commons.math3.ml.clustering.DoublePoint;
22 |
23 | /**
24 | * A simple implementation of {@link Clusterable} for points with double coordinates, with an added id.
25 | * @version $Id$
26 | * @since 3.2
27 | */
28 | public class IdentifiedDoublePoint extends DoublePoint {
29 |
30 | private final int id;
31 |
32 | /**
33 | * Build an instance wrapping an double array.
34 | *
35 | * The wrapped array is referenced, it is not copied.
36 | *
37 | * @param point the n-dimensional point in double space
38 | */
39 | public IdentifiedDoublePoint(final double[] point, int id) {
40 | super(point);
41 | this.id = id;
42 | }
43 |
44 | /**
45 | * Build an instance wrapping an integer array.
46 | *
47 | * The wrapped array is copied to an internal double array.
48 | *
49 | * @param point the n-dimensional point in integer space
50 | */
51 | public IdentifiedDoublePoint(final int[] point, int id) {
52 | super(point);
53 | this.id = id;
54 | }
55 |
56 | public int getId() {
57 | return id;
58 | }
59 |
60 | }
--------------------------------------------------------------------------------
/src/test/java/com/yahoo/egads/TestChangePointDetectionAlgorithm.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2015, Yahoo Inc.
3 | * Copyrights licensed under the GPL License.
4 | * See the accompanying LICENSE file for terms.
5 | */
6 |
7 | package com.yahoo.egads;
8 |
9 | import java.util.ArrayList;
10 | import org.testng.Assert;
11 | import java.util.Properties;
12 | import org.testng.annotations.Test;
13 | import com.yahoo.egads.data.TimeSeries;
14 | import com.yahoo.egads.models.adm.AdaptiveKernelDensityChangePointDetector;
15 | import java.io.InputStream;
16 | import java.io.FileInputStream;
17 |
18 | public class TestChangePointDetectionAlgorithm {
19 | @Test
20 | public void testChangePointDetectionAlgorithm() throws Exception {
21 | String configFile = "src/test/resources/sample_config.ini";
22 | InputStream is = new FileInputStream(configFile);
23 | Properties p = new Properties();
24 | p.load(is);
25 | TimeSeries observedTS =
26 | com.yahoo.egads.utilities.FileUtils
27 | .createTimeSeries("src/test/resources/cp-obs.csv", p).get(0);
28 | TimeSeries expectedTS =
29 | com.yahoo.egads.utilities.FileUtils
30 | .createTimeSeries("src/test/resources/cp-exp.csv", p).get(0);
31 |
32 | int n = observedTS.size();
33 | Integer preWindowSize = 2 * 24 * 4;
34 | Integer postWindowSize = 2 * 24 * 4;
35 | Float confidence = 0.8F;
36 | float[] residuals = new float[n];
37 |
38 | // Computing the residuals
39 | for (int i = 0; i < n; ++i) {
40 | residuals[i] = observedTS.data.get(i).value - expectedTS.data.get(i).value;
41 | }
42 |
43 | p.setProperty("PRE_WINDOW_SIZE", preWindowSize.toString());
44 | p.setProperty("POST_WINDOW_SIZE", postWindowSize.toString());
45 | p.setProperty("CONFIDENCE", confidence.toString());
46 | p.setProperty("MAX_ANOMALY_TIME_AGO", "48");
47 |
48 | AdaptiveKernelDensityChangePointDetector cpd = new AdaptiveKernelDensityChangePointDetector(p);
49 | ArrayList changePoints = cpd.detectChangePoints(residuals, preWindowSize, postWindowSize, confidence);
50 | Assert.assertTrue(changePoints.size() == 1);
51 | }
52 | }
53 |
--------------------------------------------------------------------------------
/src/main/java/com/yahoo/egads/data/MetricMeta.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2015, Yahoo Inc.
3 | * Copyrights licensed under the GPL License.
4 | * See the accompanying LICENSE file for terms.
5 | */
6 |
7 | // data structure class
8 | // Contains meta-data about a metric.
9 |
10 | package com.yahoo.egads.data;
11 |
12 | import org.json.JSONObject;
13 | import org.json.JSONStringer;
14 |
15 | public class MetricMeta implements JsonAble {
16 | // member data ////////////////////////////////////////////////
17 |
18 | public String id;
19 | public boolean detectAnomalies = false;
20 | public String name;
21 | public String fileName;
22 | public String source;
23 | public String smoothing;
24 | public long[] seasons;
25 |
26 | // construction ////////////////////////////////////////////////
27 |
28 | public MetricMeta() {
29 | }
30 |
31 | public MetricMeta(String id_arg) {
32 | id = id_arg;
33 | }
34 |
35 | // methods ////////////////////////////////////////////////
36 |
37 | // display ////////////////////////////////////////////////
38 |
39 | public String toString() {
40 | StringBuffer str = new StringBuffer();
41 | str.append("id=" + id);
42 | str.append(" detectAnomalies=" + detectAnomalies);
43 | str.append(" name=" + name);
44 | str.append(" source=" + source);
45 | return str.toString();
46 | }
47 |
48 | public void toJson(JSONStringer json_out) throws Exception {
49 | JsonEncoder.toJson(this, json_out);
50 | }
51 |
52 | public void fromJson(JSONObject json_obj) throws Exception {
53 | JsonEncoder.fromJson(this, json_obj);
54 | }
55 |
56 | // test ////////////////////////////////////////////////
57 |
58 | // needed for unit tests
59 | public boolean equals(Object other_obj) {
60 | if (!(other_obj instanceof MetricMeta)) {
61 | return false;
62 | }
63 | MetricMeta other = (MetricMeta) other_obj;
64 | if (!equals(id, other.id)) {
65 | return false;
66 | }
67 | if (detectAnomalies != other.detectAnomalies) {
68 | return false;
69 | }
70 | if (!equals(name, other.name)) {
71 | return false;
72 | }
73 | if (!equals(source, other.source)) {
74 | return false;
75 | }
76 | return true;
77 | }
78 |
79 | public static boolean equals(Object o1, Object o2) {
80 | if (o1 == o2) {
81 | return true;
82 | }
83 | if (o1 == null && o2 != null) {
84 | return false;
85 | }
86 | if (o1 != null && o2 == null) {
87 | return false;
88 | }
89 | return o1.equals(o2);
90 | }
91 |
92 | }
93 |
--------------------------------------------------------------------------------
/src/test/java/com/yahoo/egads/TestAutoForecast.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2015, Yahoo Inc.
3 | * Copyrights licensed under the GPL License.
4 | * See the accompanying LICENSE file for terms.
5 | */
6 |
7 | package com.yahoo.egads;
8 |
9 | import com.yahoo.egads.data.Model;
10 | import com.yahoo.egads.models.tsmm.*;
11 | import com.yahoo.egads.utilities.*;
12 | import com.yahoo.egads.data.*;
13 | import java.util.ArrayList;
14 | import java.util.Properties;
15 | import java.io.FileInputStream;
16 | import java.io.InputStream;
17 | import com.yahoo.egads.control.ProcessableObject;
18 | import com.yahoo.egads.control.ProcessableObjectFactory;
19 | import org.testng.Assert;
20 | import org.testng.annotations.Test;
21 | import net.sourceforge.openforecast.Forecaster;
22 | import net.sourceforge.openforecast.ForecastingModel;
23 | import net.sourceforge.openforecast.DataSet;
24 | import net.sourceforge.openforecast.DataPoint;
25 | import net.sourceforge.openforecast.Observation;
26 | import java.util.*;
27 |
28 | // Tests the correct generation of the expected values for olympic
29 | // scoring.
30 | public class TestAutoForecast {
31 |
32 | @Test
33 | public void testAutoForecast() throws Exception {
34 |
35 | String configFile = "src/test/resources/sample_config.ini";
36 | InputStream is = new FileInputStream(configFile);
37 | Properties p = new Properties();
38 | p.load(is);
39 | ArrayList metrics = com.yahoo.egads.utilities.FileUtils
40 | .createTimeSeries("src/test/resources/sample_input.csv", p);
41 | AutoForecastModel model = new AutoForecastModel(p);
42 | model.train(metrics.get(0).data);
43 | TimeSeries.DataSequence sequence = new TimeSeries.DataSequence(metrics.get(0).startTime(),
44 | metrics.get(0).lastTime(),
45 | 3600);
46 |
47 | sequence.setLogicalIndices(metrics.get(0).startTime(), 3600);
48 | model.predict(sequence);
49 | Assert.assertEquals(verifyResults(sequence, metrics.get(0).data), true);
50 | }
51 |
52 | // Verifies that the two time-series are identical.
53 | private boolean verifyResults (TimeSeries.DataSequence computed, TimeSeries.DataSequence actual) {
54 | int n = computed.size();
55 | int n2 = actual.size();
56 | if (n != n2) {
57 | return false;
58 | }
59 | float precision = (float) 0.000001;
60 | float errorSum = (float) 0.0;
61 | for (int i = 0; i < n; i++) {
62 | errorSum += Math.abs(computed.get(i).value - actual.get(i).value);
63 | }
64 | errorSum /= n;
65 |
66 | if (errorSum <= 5152990) {
67 | return true;
68 | }
69 | return false;
70 | }
71 | }
72 |
--------------------------------------------------------------------------------
/src/main/java/com/yahoo/egads/control/DetectAnomalyProcessable.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2015, Yahoo Inc.
3 | * Copyrights licensed under the GPL License.
4 | * See the accompanying LICENSE file for terms.
5 | */
6 |
7 | // A template for doing Anomaly Detection.
8 |
9 | package com.yahoo.egads.control;
10 |
11 | import java.util.ArrayList;
12 |
13 | import com.yahoo.egads.data.Anomaly;
14 | import com.yahoo.egads.data.TimeSeries;
15 | import com.yahoo.egads.utilities.GUIUtils;
16 | import java.util.Properties;
17 |
18 | public class DetectAnomalyProcessable implements ProcessableObject {
19 | private ModelAdapter ma;
20 | private AnomalyDetector ad;
21 | private Properties config;
22 | private ArrayList anomalyList;
23 |
24 | public ArrayList getAnomalyList() {
25 | return anomalyList;
26 | }
27 |
28 |
29 | DetectAnomalyProcessable(ModelAdapter ma, AnomalyDetector ad, Properties config) {
30 | this.ma = ma;
31 | this.ad = ad;
32 | this.config = config;
33 | anomalyList = new ArrayList<>();
34 | }
35 |
36 | public void process() throws Exception {
37 |
38 | // Resetting the models
39 | ma.reset();
40 |
41 | // Training the model with the whole metric
42 | ma.train();
43 |
44 | // Finding the expected values
45 | ArrayList list = ma.forecast(
46 | ma.metric.startTime(), ma.metric.lastTime());
47 |
48 | // For each model's prediction in the ModelAdapter
49 | for (TimeSeries.DataSequence ds : list) {
50 | // Reseting the anomaly detectors
51 | ad.reset();
52 |
53 | // Unsupervised tuning of the anomaly detectors
54 | ad.tune(ds);
55 |
56 | // Detecting anomalies for each anomaly detection model in anomaly detector
57 | anomalyList = ad.detect(ad.metric, ds);
58 |
59 | // Writing the anomalies to AnomalyDB
60 | if (config.getProperty("OUTPUT") != null && config.getProperty("OUTPUT").equals("ANOMALY_DB")) {
61 | for (Anomaly anomaly : anomalyList) {
62 | // TODO: Batch Anomaly Process.
63 | }
64 | } else if (config.getProperty("OUTPUT") != null && config.getProperty("OUTPUT").equals("GUI")) {
65 | GUIUtils.plotResults(ma.metric.data, ds, anomalyList, config);
66 | } else if (config.getProperty("OUTPUT") != null && config.getProperty("OUTPUT").equals("PLOT")) {
67 | for (Anomaly anomaly : anomalyList) {
68 | System.out.print(anomaly.toPlotString());
69 | }
70 | } else {
71 | for (Anomaly anomaly : anomalyList) {
72 | System.out.print(anomaly.toPerlString());
73 | }
74 | }
75 | }
76 | }
77 |
78 | public ArrayList result() throws Exception {
79 | return getAnomalyList();
80 | }
81 | }
82 |
--------------------------------------------------------------------------------
/src/main/java/com/yahoo/egads/utilities/StdinProcessor.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2015, Yahoo Inc.
3 | * Copyrights licensed under the GPL License.
4 | * See the accompanying LICENSE file for terms.
5 | */
6 |
7 | package com.yahoo.egads.utilities;
8 |
9 | // Class that implements EGADS STDIN input processor.
10 |
11 | import com.yahoo.egads.control.ProcessableObject;
12 | import com.yahoo.egads.control.ProcessableObjectFactory;
13 | import java.util.Properties;
14 | import java.io.*;
15 | import java.util.ArrayList;
16 | import com.yahoo.egads.data.TimeSeries;
17 |
18 | public class StdinProcessor implements InputProcessor {
19 |
20 | public void processInput(Properties p) throws Exception {
21 | BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
22 | String s;
23 | Integer aggr = 1;
24 | if (p.getProperty("AGGREGATION") != null) {
25 | aggr = new Integer(p.getProperty("AGGREGATION"));
26 | }
27 | while ((s = in.readLine()) != null && s.length() != 0) {
28 | // Parse the time-series.
29 | ArrayList metrics = createTimeSeries(s, aggr);
30 | for (TimeSeries ts : metrics) {
31 | ProcessableObject po = ProcessableObjectFactory.create(ts, p);
32 | po.process();
33 | }
34 | }
35 | }
36 |
37 |
38 | // Format of the time-series: meta1\tmeta\2{(2014120205,0),(2014122207,1)}\t{(2014120205,0),(2014122207,0)}...
39 | // Creates a time-series from a file.
40 | private static ArrayList createTimeSeries(String s, Integer aggr) throws Exception {
41 | ArrayList output = new ArrayList();
42 | String[] tokens = s.split("\t");
43 | String meta = "meta";
44 |
45 | int tokenNum = 1;
46 | for (String t : tokens) {
47 | if (t.contains("{(")) {
48 | output.add(convertStringToTS(t, meta + "-" + Integer.toString(tokenNum)));
49 | tokenNum++;
50 | } else {
51 | meta += "-" + t;
52 | }
53 | }
54 |
55 | // Handle aggregation.
56 | if (aggr > 1) {
57 | for (TimeSeries t : output) {
58 | t.data = t.aggregate(aggr);
59 | t.meta.name += "_aggr_" + aggr;
60 | }
61 | }
62 | return output;
63 | }
64 |
65 | private static TimeSeries convertStringToTS(String s, String tokenNum) throws Exception {
66 | TimeSeries ts = new TimeSeries();
67 | ts.meta.fileName = tokenNum;
68 | ts.meta.name = tokenNum;
69 | String[] tuples = s.split("\\),");
70 | for (String tuple : tuples) {
71 | tuple = tuple.replaceAll("[{}\\(\\)]", "");
72 | String[] vals = tuple.split(",");
73 | Float val = new Float(vals[1]);
74 | ts.append(new Long(vals[0]), val);
75 | }
76 | return ts;
77 | }
78 | }
79 |
--------------------------------------------------------------------------------
/src/test/java/com/yahoo/egads/TestJsonEncoder.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2015, Yahoo Inc.
3 | * Copyrights licensed under the GPL License.
4 | * See the accompanying LICENSE file for terms.
5 | */
6 |
7 | package com.yahoo.egads;
8 |
9 | import com.yahoo.egads.data.*;
10 | import org.testng.Assert;
11 | import org.testng.annotations.Test;
12 |
13 | public class TestJsonEncoder {
14 |
15 | @Test
16 | public void testAnomalyInterval() throws Exception {
17 | // anomaly interval
18 | Anomaly.Interval interval = new Anomaly.Interval(1L, 2L, 3.33f);
19 | System.out.print("\n interval = " + interval);
20 | String interval_json = JsonEncoder.toJson(interval);
21 | System.out.print("\n interval json = " + interval_json);
22 | Anomaly.Interval interval2 = new Anomaly.Interval();
23 | JsonEncoder.fromJson(interval2, interval_json);
24 | System.out
25 | .print("\n interval2 json = " + JsonEncoder.toJson(interval2));
26 | Assert.assertEquals(interval, interval2);
27 | }
28 |
29 | @Test
30 | public void testAnomaly() throws Exception {
31 | // anomaly
32 | Anomaly anomaly = new Anomaly("ANOMALY_ID", new MetricMeta("metric1"));
33 | anomaly.addInterval(2, 2.2f);
34 | anomaly.addInterval(3, 3.3f);
35 | System.out.print("\n anomaly = " + anomaly);
36 | String anomaly_json = JsonEncoder.toJson(anomaly);
37 | System.out.print("\n anomaly json = " + anomaly_json);
38 | Anomaly anomaly2 = new Anomaly();
39 | JsonEncoder.fromJson(anomaly2, anomaly_json);
40 | System.out.print("\n anomaly2 json = " + JsonEncoder.toJson(anomaly2));
41 | Assert.assertEquals(anomaly, anomaly2);
42 | }
43 |
44 | @Test
45 | public void testTimeSeries() throws Exception {
46 | // time series
47 | TimeSeries series = new TimeSeries(new float[] { 1, 2, 4 });
48 | series.meta = new MetricMeta("time series meta");
49 | series.meta.smoothing = "for_all";
50 | System.out.print("\n\n series = " + series);
51 | String series_json = JsonEncoder.toJson(series);
52 | System.out.print("\n series json = " + series_json);
53 | TimeSeries series2 = new TimeSeries();
54 | JsonEncoder.fromJson(series2, series_json);
55 | System.out.print("\n series2 json = " + JsonEncoder.toJson(series2));
56 | Assert.assertEquals(series, series2);
57 | }
58 |
59 | @Test
60 | public void testMetricMeta() throws Exception {
61 | // metric meta
62 | MetricMeta metric = new MetricMeta("m1");
63 | metric.detectAnomalies = true;
64 | System.out.print("\n\n metric = " + metric);
65 | String metric_json = JsonEncoder.toJson(metric);
66 | System.out.print("\n metric json = " + metric_json);
67 | MetricMeta metric2 = new MetricMeta(null);
68 | JsonEncoder.fromJson(metric2, metric_json);
69 | System.out.print("\n\n metric2 = " + metric2);
70 | System.out.print("\n metric2 json = " + JsonEncoder.toJson(metric2));
71 | Assert.assertEquals(metric, metric2);
72 | }
73 |
74 | }
75 |
--------------------------------------------------------------------------------
/src/main/java/com/yahoo/egads/utilities/DateTimeCalculator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2015, Yahoo Inc.
3 | * Copyrights licensed under the GPL License.
4 | * See the accompanying LICENSE file for terms.
5 | */
6 |
7 | // Utility class for date/time formatting and conversion
8 |
9 | package com.yahoo.egads.utilities;
10 |
11 | import java.text.ParseException;
12 | import java.text.SimpleDateFormat;
13 | import java.util.*;
14 |
15 | public class DateTimeCalculator {
16 | // Fields //////////////////////////////////////////////////////////////////////////////////
17 | private static Calendar cal = Calendar.getInstance();
18 | private static SimpleDateFormat[] formatters = new SimpleDateFormat[] {
19 | new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS"),
20 | new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SS"),
21 | new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.S"),
22 | new SimpleDateFormat("yyyy-MM-dd HH:mm:ss:SSS"),
23 | new SimpleDateFormat("yyyy-MM-dd HH:mm:ss:SS"),
24 | new SimpleDateFormat("yyyy-MM-dd HH:mm:ss:S"),
25 | new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"),
26 | new SimpleDateFormat("yyyy-MM-dd") };
27 | private static SimpleDateFormat outputFormatter = new SimpleDateFormat(
28 | "yyyy-MM-dd HH:mm:ss");
29 |
30 | // Static Methods //////////////////////////////////////////////////////////////////////////
31 |
32 | // Time in string --> Date object
33 | public static Date getDate(String time) {
34 | Date date = null;
35 | for (int i = 0; i < formatters.length; ++i) {
36 | try {
37 | date = formatters[i].parse(time);
38 | break;
39 | } catch (ParseException e) {
40 |
41 | }
42 | }
43 |
44 | return date;
45 | }
46 |
47 | // You can just write "new Date ( timeInMilliSeconds )"
48 | // Milliseconds --> Date object
49 | public static Date getDate(long timeInMilliSeconds) {
50 | cal.setTimeInMillis(timeInMilliSeconds);
51 | return cal.getTime();
52 | }
53 |
54 | // Time in string --> Milliseconds
55 | public static long getMilliSeconds(String time) {
56 | Date date = getDate(time);
57 | cal.setTime(date);
58 | return cal.getTimeInMillis();
59 | }
60 |
61 | // Output the input time series into the standard output format
62 | public static String format(String time) {
63 | return outputFormatter.format(getDate(time));
64 | }
65 |
66 | // Milliseconds --> Time in string
67 | public static String format(long timeInMilliSeconds) {
68 | cal.setTimeInMillis(timeInMilliSeconds);
69 | return outputFormatter.format(cal.getTime());
70 | }
71 |
72 | // Add 'amount' to the specified time's field
73 | public static String add(String time, int field, int amount) {
74 | Date date = getDate(time);
75 |
76 | cal.setTime(date);
77 | cal.add(field, amount);
78 | date = cal.getTime();
79 |
80 | return outputFormatter.format(date);
81 | }
82 |
83 | // Add 'amount' to the specified time's field
84 | public static long add(long timeInMilliSeconds, int field, int amount) {
85 | cal.setTimeInMillis(timeInMilliSeconds);
86 | cal.add(field, amount);
87 | return cal.getTimeInMillis();
88 | }
89 | }
90 |
--------------------------------------------------------------------------------
/src/test/java/com/yahoo/egads/TestAnomalyDetect.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2015, Yahoo Inc.
3 | * Copyrights licensed under the GPL License.
4 | * See the accompanying LICENSE file for terms.
5 | */
6 |
7 | package com.yahoo.egads;
8 |
9 | import com.yahoo.egads.models.tsmm.OlympicModel;
10 | import com.yahoo.egads.models.adm.*;
11 | import com.yahoo.egads.data.Anomaly.IntervalSequence;
12 | import com.yahoo.egads.data.*;
13 | import java.util.ArrayList;
14 | import java.util.Properties;
15 | import java.io.FileInputStream;
16 | import java.io.InputStream;
17 | import org.testng.Assert;
18 | import org.testng.annotations.Test;
19 |
20 | // Tests the basic anoamly detection piece of EGADS.
21 | public class TestAnomalyDetect {
22 |
23 | @Test
24 | public void testOlympicModel() throws Exception {
25 | // Test cases: ref window: 10, 5
26 | // Drops: 0, 1
27 | String[] refWindows = new String[]{"10", "5"};
28 | String[] drops = new String[]{"0", "1"};
29 | // Load the true expected values from a file.
30 | String configFile = "src/test/resources/sample_config.ini";
31 | InputStream is = new FileInputStream(configFile);
32 | Properties p = new Properties();
33 | p.load(is);
34 | ArrayList actual_metric = com.yahoo.egads.utilities.FileUtils
35 | .createTimeSeries("src/test/resources/model_input.csv", p);
36 | p.setProperty("MAX_ANOMALY_TIME_AGO", "999999999");
37 | for (int w = 0; w < refWindows.length; w++) {
38 | for (int d = 0; d < drops.length; d++) {
39 | p.setProperty("NUM_WEEKS", refWindows[w]);
40 | p.setProperty("NUM_TO_DROP", drops[d]);
41 | p.setProperty("THRESHOLD", "mapee#100,mase#10");
42 | // Parse the input timeseries.
43 | ArrayList metrics = com.yahoo.egads.utilities.FileUtils
44 | .createTimeSeries("src/test/resources/model_output_" + refWindows[w] + "_" + drops[d] + ".csv", p);
45 | OlympicModel model = new OlympicModel(p);
46 | model.train(actual_metric.get(0).data);
47 | TimeSeries.DataSequence sequence = new TimeSeries.DataSequence(metrics.get(0).startTime(),
48 | metrics.get(0).lastTime(),
49 | 3600);
50 | sequence.setLogicalIndices(metrics.get(0).startTime(), 3600);
51 | model.predict(sequence);
52 | // Initialize the anomaly detector.
53 | ExtremeLowDensityModel bcm = new ExtremeLowDensityModel(p);
54 |
55 | // Initialize the DBScan anomaly detector.
56 | DBScanModel dbs = new DBScanModel(p);
57 | IntervalSequence anomalies = bcm.detect(actual_metric.get(0).data, sequence);
58 | dbs.tune(actual_metric.get(0).data, sequence);
59 | IntervalSequence anomaliesdb = dbs.detect(actual_metric.get(0).data, sequence);
60 |
61 | // Initialize the SimpleThreshold anomaly detector.
62 | SimpleThresholdModel stm = new SimpleThresholdModel(p);
63 |
64 | stm.tune(actual_metric.get(0).data, sequence);
65 | IntervalSequence anomaliesstm = stm.detect(actual_metric.get(0).data, sequence);
66 | Assert.assertTrue(anomalies.size() > 10);
67 | Assert.assertTrue(anomaliesdb.size() > 2);
68 | Assert.assertTrue(anomaliesstm.size() > 2);
69 | }
70 | }
71 | }
72 | }
73 |
--------------------------------------------------------------------------------
/src/main/java/com/yahoo/egads/models/tsmm/NaiveForecastingModel.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2015, Yahoo Inc.
3 | * Copyrights licensed under the GPL License.
4 | * See the accompanying LICENSE file for terms.
5 | */
6 |
7 | // Olympic scoring model considers the average of the last k weeks
8 | // (dropping the b highest and lowest values) as the current prediction.
9 |
10 | package com.yahoo.egads.models.tsmm;
11 |
12 | import com.yahoo.egads.data.*;
13 | import com.yahoo.egads.data.TimeSeries.Entry;
14 | import org.json.JSONObject;
15 | import org.json.JSONStringer;
16 | import java.util.Properties;
17 | import net.sourceforge.openforecast.DataSet;
18 | import net.sourceforge.openforecast.ForecastingModel;
19 | import net.sourceforge.openforecast.DataPoint;
20 | import net.sourceforge.openforecast.Observation;
21 | import java.util.*;
22 |
23 | // A naive forecasting model is a special case of the moving average forecasting model where the number of periods used for smoothing is 1.
24 | public class NaiveForecastingModel extends TimeSeriesAbstractModel {
25 | // methods ////////////////////////////////////////////////
26 |
27 | // The model that will be used for forecasting.
28 | private ForecastingModel forecaster;
29 |
30 | // Stores the historical values.
31 | private TimeSeries.DataSequence data;
32 |
33 | public NaiveForecastingModel(Properties config) {
34 | super(config);
35 | modelName = "NaiveForecastingModel";
36 | }
37 |
38 | public void reset() {
39 | // At this point, reset does nothing.
40 | }
41 |
42 | public void train(TimeSeries.DataSequence data) {
43 | this.data = data;
44 | int n = data.size();
45 | DataPoint dp = null;
46 | DataSet observedData = new DataSet();
47 | for (int i = 0; i < n; i++) {
48 | dp = new Observation(data.get(i).value);
49 | dp.setIndependentValue("x", i);
50 | observedData.add(dp);
51 | }
52 | observedData.setTimeVariable("x");
53 |
54 | forecaster = new net.sourceforge.openforecast.models.NaiveForecastingModel();
55 | forecaster.init(observedData);
56 | initForecastErrors(forecaster, data);
57 |
58 | logger.debug(getBias() + "\t" + getMAD() + "\t" + getMAPE() + "\t" + getMSE() + "\t" + getSAE() + "\t" + 0 + "\t" + 0);
59 | }
60 |
61 | public void update(TimeSeries.DataSequence data) {
62 |
63 | }
64 |
65 | public String getModelName() {
66 | return modelName;
67 | }
68 |
69 | public void predict(TimeSeries.DataSequence sequence) throws Exception {
70 | int n = data.size();
71 | DataSet requiredDataPoints = new DataSet();
72 | DataPoint dp;
73 |
74 | for (int count = 0; count < n; count++) {
75 | dp = new Observation(0.0);
76 | dp.setIndependentValue("x", count);
77 | requiredDataPoints.add(dp);
78 | }
79 | forecaster.forecast(requiredDataPoints);
80 |
81 | // Output the results
82 | Iterator it = requiredDataPoints.iterator();
83 | int i = 0;
84 | while (it.hasNext()) {
85 | DataPoint pnt = ((DataPoint) it.next());
86 | logger.info(data.get(i).time + "," + data.get(i).value + "," + pnt.getDependentValue());
87 | sequence.set(i, (new Entry(data.get(i).time, (float) pnt.getDependentValue())));
88 | i++;
89 | }
90 | }
91 |
92 | public void toJson(JSONStringer json_out) {
93 |
94 | }
95 |
96 | public void fromJson(JSONObject json_obj) {
97 |
98 | }
99 | }
100 |
--------------------------------------------------------------------------------
/src/main/java/com/yahoo/egads/models/tsmm/SimpleExponentialSmoothingModel.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2015, Yahoo Inc.
3 | * Copyrights licensed under the GPL License.
4 | * See the accompanying LICENSE file for terms.
5 | */
6 |
7 | // Olympic scoring model considers the average of the last k weeks
8 | // (dropping the b highest and lowest values) as the current prediction.
9 |
10 | package com.yahoo.egads.models.tsmm;
11 |
12 | import com.yahoo.egads.data.*;
13 | import com.yahoo.egads.data.TimeSeries.Entry;
14 | import org.json.JSONObject;
15 | import org.json.JSONStringer;
16 | import java.util.Properties;
17 | import net.sourceforge.openforecast.DataSet;
18 | import net.sourceforge.openforecast.ForecastingModel;
19 | import net.sourceforge.openforecast.DataPoint;
20 | import net.sourceforge.openforecast.Observation;
21 | import java.util.*;
22 |
23 | // A simple exponential smoothing forecast model is a very popular model used to produce a smoothed Time Series.
24 | public class SimpleExponentialSmoothingModel extends TimeSeriesAbstractModel {
25 | // methods ////////////////////////////////////////////////
26 |
27 | // The model that will be used for forecasting.
28 | private ForecastingModel forecaster;
29 |
30 | // Stores the historical values.
31 | private TimeSeries.DataSequence data;
32 |
33 | public SimpleExponentialSmoothingModel(Properties config) {
34 | super(config);
35 | modelName = "SimpleExponentialSmoothingModel";
36 | }
37 |
38 | public void reset() {
39 | // At this point, reset does nothing.
40 | }
41 |
42 | public void train(TimeSeries.DataSequence data) {
43 | this.data = data;
44 | int n = data.size();
45 | DataPoint dp = null;
46 | DataSet observedData = new DataSet();
47 | for (int i = 0; i < n; i++) {
48 | dp = new Observation(data.get(i).value);
49 | dp.setIndependentValue("x", i);
50 | observedData.add(dp);
51 | }
52 | observedData.setTimeVariable("x");
53 |
54 | // TODO: Make weights configurable.
55 | forecaster = new net.sourceforge.openforecast.models.SimpleExponentialSmoothingModel(0.75);
56 | forecaster.init(observedData);
57 | initForecastErrors(forecaster, data);
58 |
59 | logger.debug(getBias() + "\t" + getMAD() + "\t" + getMAPE() + "\t" + getMSE() + "\t" + getSAE() + "\t" + 0 + "\t" + 0);
60 | }
61 |
62 | public void update(TimeSeries.DataSequence data) {
63 |
64 | }
65 |
66 | public String getModelName() {
67 | return modelName;
68 | }
69 |
70 | public void predict(TimeSeries.DataSequence sequence) throws Exception {
71 | int n = data.size();
72 | DataSet requiredDataPoints = new DataSet();
73 | DataPoint dp;
74 |
75 | for (int count = 0; count < n; count++) {
76 | dp = new Observation(0.0);
77 | dp.setIndependentValue("x", count);
78 | requiredDataPoints.add(dp);
79 | }
80 | forecaster.forecast(requiredDataPoints);
81 |
82 | // Output the results
83 | Iterator it = requiredDataPoints.iterator();
84 | int i = 0;
85 | while (it.hasNext()) {
86 | DataPoint pnt = ((DataPoint) it.next());
87 | logger.info(data.get(i).time + "," + data.get(i).value + "," + pnt.getDependentValue());
88 | sequence.set(i, (new Entry(data.get(i).time, (float) pnt.getDependentValue())));
89 | i++;
90 | }
91 | }
92 |
93 | public void toJson(JSONStringer json_out) {
94 |
95 | }
96 |
97 | public void fromJson(JSONObject json_obj) {
98 |
99 | }
100 | }
101 |
--------------------------------------------------------------------------------
/src/main/java/com/yahoo/egads/models/tsmm/MovingAverageModel.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2015, Yahoo Inc.
3 | * Copyrights licensed under the GPL License.
4 | * See the accompanying LICENSE file for terms.
5 | */
6 |
7 | // Olympic scoring model considers the average of the last k weeks
8 | // (dropping the b highest and lowest values) as the current prediction.
9 |
10 | package com.yahoo.egads.models.tsmm;
11 |
12 | import com.yahoo.egads.data.*;
13 | import com.yahoo.egads.data.TimeSeries.Entry;
14 | import org.json.JSONObject;
15 | import org.json.JSONStringer;
16 | import java.util.Properties;
17 | import net.sourceforge.openforecast.DataSet;
18 | import net.sourceforge.openforecast.ForecastingModel;
19 | import net.sourceforge.openforecast.DataPoint;
20 | import net.sourceforge.openforecast.Observation;
21 | import java.util.*;
22 |
23 | // A moving average forecast model is based on an artificially constructed time series in which the value for a
24 | // given time period is replaced by the mean of that value and the values for some number of preceding and succeeding time periods.
25 | public class MovingAverageModel extends TimeSeriesAbstractModel {
26 | // methods ////////////////////////////////////////////////
27 |
28 | // The model that will be used for forecasting.
29 | private ForecastingModel forecaster;
30 |
31 | // Stores the historical values.
32 | private TimeSeries.DataSequence data;
33 |
34 | public MovingAverageModel(Properties config) {
35 | super(config);
36 | modelName = "MovingAverageModel";
37 | }
38 |
39 | public void reset() {
40 | // At this point, reset does nothing.
41 | }
42 |
43 | public void train(TimeSeries.DataSequence data) {
44 | this.data = data;
45 | int n = data.size();
46 | DataPoint dp = null;
47 | DataSet observedData = new DataSet();
48 | for (int i = 0; i < n; i++) {
49 | dp = new Observation(data.get(i).value);
50 | dp.setIndependentValue("x", i);
51 | observedData.add(dp);
52 | }
53 | observedData.setTimeVariable("x");
54 |
55 | // TODO: Make window configurable.
56 | forecaster = new net.sourceforge.openforecast.models.MovingAverageModel(2);
57 | forecaster.init(observedData);
58 | initForecastErrors(forecaster, data);
59 |
60 | logger.debug(getBias() + "\t" + getMAD() + "\t" + getMAPE() + "\t" + getMSE() + "\t" + getSAE() + "\t" + 0 + "\t" + 0);
61 | }
62 |
63 | public void update(TimeSeries.DataSequence data) {
64 |
65 | }
66 |
67 | public String getModelName() {
68 | return modelName;
69 | }
70 |
71 | public void predict(TimeSeries.DataSequence sequence) throws Exception {
72 | int n = data.size();
73 | DataSet requiredDataPoints = new DataSet();
74 | DataPoint dp;
75 |
76 | for (int count = 0; count < n; count++) {
77 | dp = new Observation(0.0);
78 | dp.setIndependentValue("x", count);
79 | requiredDataPoints.add(dp);
80 | }
81 | forecaster.forecast(requiredDataPoints);
82 |
83 | // Output the results
84 | Iterator it = requiredDataPoints.iterator();
85 | int i = 0;
86 | while (it.hasNext()) {
87 | DataPoint pnt = ((DataPoint) it.next());
88 | logger.info(data.get(i).time + "," + data.get(i).value + "," + pnt.getDependentValue());
89 | sequence.set(i, (new Entry(data.get(i).time, (float) pnt.getDependentValue())));
90 | i++;
91 | }
92 | }
93 |
94 | public void toJson(JSONStringer json_out) {
95 |
96 | }
97 |
98 | public void fromJson(JSONObject json_obj) {
99 |
100 | }
101 | }
102 |
--------------------------------------------------------------------------------
/src/main/java/com/yahoo/egads/models/adm/AnomalyDetectionAbstractModel.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2015, Yahoo Inc.
3 | * Copyrights licensed under the GPL License.
4 | * See the accompanying LICENSE file for terms.
5 | */
6 |
7 | package com.yahoo.egads.models.adm;
8 |
9 | import java.util.Properties;
10 |
11 | import org.json.JSONObject;
12 | import org.json.JSONStringer;
13 | import org.slf4j.Logger;
14 | import org.slf4j.LoggerFactory;
15 |
16 | import java.util.Map;
17 | import java.util.HashMap;
18 |
19 | import com.yahoo.egads.data.JsonEncoder;
20 |
21 | public abstract class AnomalyDetectionAbstractModel implements AnomalyDetectionModel {
22 |
23 | static final Logger logger = LoggerFactory.getLogger(AnomalyDetectionAbstractModel.class);
24 | protected float sDAutoSensitivity = 3;
25 | protected float amntAutoSensitivity = (float) 0.05;
26 | protected String outputDest = "";
27 | protected String modelName;
28 |
29 | public String getModelName() {
30 | return modelName;
31 | }
32 |
33 | public String getModelType() {
34 | return "Anomaly";
35 | }
36 |
37 | @Override
38 | public void toJson(JSONStringer json_out) throws Exception {
39 | JsonEncoder.toJson(this, json_out);
40 | }
41 |
42 | @Override
43 | public void fromJson(JSONObject json_obj) throws Exception {
44 | JsonEncoder.fromJson(this, json_obj);
45 | }
46 |
47 | protected String arrayF2S (Float[] input) {
48 | String ret = new String();
49 | if (input.length == 0) {
50 | return "";
51 | }
52 | if (input[0] == null) {
53 | ret = "Inf";
54 | } else {
55 | ret = input[0].toString();
56 | }
57 | for (int ix = 1; ix < input.length; ix++) {
58 | if (input[ix] == null) {
59 | ret += ":Inf";
60 | } else {
61 | ret += ":" + input[ix].toString();
62 | }
63 | }
64 | return ret;
65 | }
66 |
67 | // Parses the THRESHOLD config into a map.
68 | protected Map parseMap(String s) {
69 | if (s == null) {
70 | return new HashMap();
71 | }
72 | String[] pairs = s.split(",");
73 | Map myMap = new HashMap();
74 | for (int i = 0; i < pairs.length; i++) {
75 | String pair = pairs[i];
76 | String[] keyValue = pair.split("#");
77 | myMap.put(keyValue[0], Float.valueOf(keyValue[1]));
78 | }
79 | return myMap;
80 | }
81 |
82 | // Force the user to define this constructor that acts as a
83 | // factory method.
84 | public AnomalyDetectionAbstractModel(Properties config) {
85 | // Set the assumed amount of anomaly in your data.
86 | if (config.getProperty("AUTO_SENSITIVITY_ANOMALY_PCNT") != null) {
87 | this.amntAutoSensitivity = new Float(config.getProperty("AUTO_SENSITIVITY_ANOMALY_PCNT"));
88 | }
89 | // Set the standard deviation for auto sensitivity.
90 | if (config.getProperty("AUTO_SENSITIVITY_SD") != null) {
91 | this.sDAutoSensitivity = new Float(config.getProperty("AUTO_SENSITIVITY_SD"));
92 | }
93 | this.outputDest = config.getProperty("OUTPUT");
94 | }
95 |
96 | @Override
97 | public boolean isDetectionWindowPoint(int maxHrsAgo, long windowStart, long anomalyTime, long startTime) {
98 | long unixTime = System.currentTimeMillis() / 1000L;
99 | // consider 'windowStart' if it is greater than or equal to first timestamp
100 | if (windowStart >= startTime) {
101 | return (anomalyTime - windowStart) > 0;
102 | } else {
103 | // use detection window as max hours specified
104 | return ((unixTime - anomalyTime) / 3600) < maxHrsAgo;
105 | }
106 | }
107 | }
108 |
--------------------------------------------------------------------------------
/src/test/java/com/yahoo/egads/TestGetAnomalyList.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2015, Yahoo Inc.
3 | * Copyrights licensed under the GPL License.
4 | * See the accompanying LICENSE file for terms.
5 | */
6 |
7 | package com.yahoo.egads;
8 |
9 | import com.yahoo.egads.control.ProcessableObject;
10 | import com.yahoo.egads.control.ProcessableObjectFactory;
11 | import com.yahoo.egads.data.Anomaly;
12 | import com.yahoo.egads.data.TimeSeries;
13 |
14 | import org.testng.Assert;
15 | import org.testng.annotations.Test;
16 |
17 | import java.io.FileInputStream;
18 | import java.io.InputStream;
19 | import java.util.ArrayList;
20 | import java.util.Arrays;
21 | import java.util.List;
22 | import java.util.Properties;
23 |
24 | /*
25 | * Test the result() from ProcessableObject.
26 | */
27 | public class TestGetAnomalyList {
28 |
29 | @Test
30 | public void testUpdateModelProcessable() throws Exception {
31 | TimeSeries series = new TimeSeries();
32 | series.append(1L, 11.11f);
33 | series.append(2L, 22.22f);
34 | series.append(3L, 33.33f);
35 | series.append(5L, 55.55f);
36 | series.append(7L, 77.77f);
37 | series.append(9L, 99.99f);
38 | InputStream is = new FileInputStream("src/test/resources/sample_config.ini");
39 | Properties p = new Properties();
40 | p.load(is);
41 | p.setProperty("TS_MODEL","OlympicModel");
42 | p.setProperty("AD_MODEL","KSigmaModel");
43 | p.setProperty("OP_TYPE","UPDATE_MODEL");
44 |
45 | ProcessableObject po = ProcessableObjectFactory.create(series, p);
46 | po.process();
47 |
48 | Assert.assertEquals(po.result().toString(), "Updated");
49 | }
50 |
51 | @Test
52 | public void testDetectAnomalyProcessable() throws Exception {
53 |
54 | InputStream is = new FileInputStream("src/test/resources/sample_config.ini");
55 | Properties p = new Properties();
56 | p.load(is);
57 | p.setProperty("TS_MODEL","OlympicModel");
58 | p.setProperty("AD_MODEL","KSigmaModel");
59 | p.setProperty("MAX_ANOMALY_TIME_AGO","0");
60 | p.setProperty("OP_TYPE","DETECT_ANOMALY");
61 |
62 | ArrayList metrics = com.yahoo.egads.utilities.FileUtils
63 | .createTimeSeries("src/test/resources/sample_input.csv", p);
64 |
65 | // generate expected result
66 | Long anomalousTime = 1417194000L;
67 | Anomaly anomaly = new Anomaly("value",null);
68 | anomaly.addInterval(anomalousTime, anomalousTime,0.0f);
69 |
70 | // actual result
71 | ProcessableObject po = ProcessableObjectFactory.create(metrics.get(0), p);
72 | po.process();
73 |
74 | Assert.assertEquals(po.result().toString(), Arrays.asList(anomaly).toString());
75 | }
76 |
77 | @Test
78 | public void testTransformInputProcessable() throws Exception {
79 |
80 | InputStream is = new FileInputStream("src/test/resources/sample_config.ini");
81 | Properties p = new Properties();
82 | p.load(is);
83 | p.setProperty("TS_MODEL","OlympicModel");
84 | p.setProperty("AD_MODEL","KSigmaModel");
85 | p.setProperty("OP_TYPE","TRANSFORM_INPUT");
86 |
87 | TimeSeries series = new TimeSeries();
88 | series.append(1L, 11.11f);
89 | series.append(2L, 22.22f);
90 | series.append(3L, 33.33f);
91 |
92 | // generate expected result
93 | Long[] times = new Long[]{1L, 2L, 3L};
94 | Float[] values = new Float[]{11.11f, 22.22f, 33.33f};
95 |
96 | // actual result
97 | ProcessableObject po = ProcessableObjectFactory.create(series, p);
98 | po.process();
99 | List actual = (List)po.result();
100 | Assert.assertEquals(actual.get(0).getValues(), values);
101 | Assert.assertEquals(actual.get(0).getTimes(), times);
102 | }
103 | }
104 |
--------------------------------------------------------------------------------
/src/main/java/com/yahoo/egads/control/ProcessableObjectFactory.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2015, Yahoo Inc.
3 | * Copyrights licensed under the GPL License.
4 | * See the accompanying LICENSE file for terms.
5 | */
6 |
7 | // A factory to create tasks based on the data and the config.
8 |
9 | package com.yahoo.egads.control;
10 |
11 | import com.yahoo.egads.data.TimeSeries;
12 |
13 | import java.lang.reflect.Constructor;
14 | import java.util.Properties;
15 |
16 | import com.yahoo.egads.models.adm.*;
17 | import com.yahoo.egads.models.tsmm.*;
18 |
19 | public class ProcessableObjectFactory {
20 |
21 | public static ProcessableObject create(TimeSeries ts, Properties config) {
22 | if (config.getProperty("OP_TYPE") == null) {
23 | throw new IllegalArgumentException("OP_TYPE is NULL");
24 | }
25 | if (config.getProperty("OP_TYPE").equals("DETECT_ANOMALY")) {
26 | ModelAdapter ma = ProcessableObjectFactory.buildTSModel(ts, config);
27 | AnomalyDetector ad = ProcessableObjectFactory.buildAnomalyModel(ts, config);
28 | return (new DetectAnomalyProcessable(ma, ad, config));
29 | } else if (config.getProperty("OP_TYPE").equals("UPDATE_MODEL")) {
30 | ModelAdapter ma = ProcessableObjectFactory.buildTSModel(ts, config);
31 | return (new UpdateModelProcessable(ma, ts.data, config));
32 | } else if (config.getProperty("OP_TYPE").equals("TRANSFORM_INPUT")) {
33 | ModelAdapter ma = ProcessableObjectFactory.buildTSModel(ts, config);
34 | return (new TransformInputProcessable(ma, config));
35 | }
36 | // Should not be here.
37 | System.err.println("Unknown OP_TYPE, returning UPDATE_MODEL ProcessableObject");
38 | ModelAdapter ma = ProcessableObjectFactory.buildTSModel(ts, config);
39 | return (new UpdateModelProcessable(ma, ts.data, config));
40 | }
41 |
42 | private static ModelAdapter buildTSModel(TimeSeries ts, Properties config) {
43 | ModelAdapter ma = null;
44 | try {
45 | Long period = (long) -1;
46 | if (config.getProperty("PERIOD") != null) {
47 | period = new Long(config.getProperty("PERIOD"));
48 | }
49 | if (period == 0) {
50 | if (ts.size() > 1) {
51 | period = ts.data.get(1).time - ts.data.get(0).time;
52 | } else {
53 | period = (long) 1;
54 | }
55 | }
56 | ma = new ModelAdapter(ts, period);
57 | String modelType = config.getProperty("TS_MODEL");
58 |
59 | Class> tsModelClass = Class.forName("com.yahoo.egads.models.tsmm." + modelType);
60 | Constructor> constructor = tsModelClass.getConstructor(Properties.class);
61 | TimeSeriesAbstractModel m = (TimeSeriesAbstractModel) constructor.newInstance(config);
62 | ma.addModel(m);
63 | } catch (Exception e) {
64 | e.printStackTrace();
65 | }
66 | return ma;
67 | }
68 |
69 | private static AnomalyDetector buildAnomalyModel(TimeSeries ts, Properties config) {
70 | AnomalyDetector ad = null;
71 | try {
72 | Long period = (long) -1;
73 | if (config.getProperty("PERIOD") != null) {
74 | period = new Long(config.getProperty("PERIOD"));
75 | }
76 | if (period == 0) {
77 | if (ts.size() > 1) {
78 | period = ts.data.get(1).time - ts.data.get(0).time;
79 | } else {
80 | period = (long) 1;
81 | }
82 | }
83 | ad = new AnomalyDetector(ts, period);
84 | String modelType = config.getProperty("AD_MODEL");
85 |
86 | Class> tsModelClass = Class.forName("com.yahoo.egads.models.adm." + modelType);
87 | Constructor> constructor = tsModelClass.getConstructor(Properties.class);
88 | ad.addModel((AnomalyDetectionAbstractModel) constructor.newInstance(config));
89 | } catch (Exception e) {
90 | e.printStackTrace();
91 | }
92 | return ad;
93 | }
94 | }
95 |
--------------------------------------------------------------------------------
/src/main/java/com/yahoo/egads/models/adm/SimpleThresholdModel.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2015, Yahoo Inc.
3 | * Copyrights licensed under the GPL License.
4 | * See the accompanying LICENSE file for terms.
5 | */
6 |
7 | // A simple thresholding model that returns an anomaly if it is above/below a certain threashold.
8 |
9 | package com.yahoo.egads.models.adm;
10 |
11 | import java.util.Properties;
12 |
13 | import com.yahoo.egads.data.Anomaly.IntervalSequence;
14 | import com.yahoo.egads.data.Anomaly.Interval;
15 | import com.yahoo.egads.data.TimeSeries;
16 | import com.yahoo.egads.data.TimeSeries.DataSequence;
17 | import com.yahoo.egads.utilities.AutoSensitivity;
18 | import java.util.Map;
19 | import java.util.HashMap;
20 |
21 | import org.json.JSONObject;
22 | import org.json.JSONStringer;
23 |
24 | public class SimpleThresholdModel extends AnomalyDetectionAbstractModel {
25 |
26 | // The constructor takes a set of properties
27 | // needed for the simple model. This includes the sensitivity.
28 | private Map threshold;
29 | private int maxHrsAgo;
30 | private long windowStart;
31 | // Model name.
32 | private String modelName = "SimpleThresholdModel";
33 | private String simpleThrType = "AdaptiveKSigmaSensitivity";
34 |
35 | public SimpleThresholdModel(Properties config) {
36 | super(config);
37 |
38 | this.threshold = parseMap(config.getProperty("THRESHOLD"));
39 | this.maxHrsAgo = new Integer(config.getProperty("MAX_ANOMALY_TIME_AGO"));
40 | this.windowStart = new Long(config.getProperty("DETECTION_WINDOW_START_TIME"));
41 | if (config.getProperty("THRESHOLD") != null && this.threshold.isEmpty() == true) {
42 | throw new IllegalArgumentException("THRESHOLD PARSE ERROR");
43 | }
44 | if (config.getProperty("SIMPLE_THRESHOLD_TYPE") != null) {
45 | simpleThrType = config.getProperty("SIMPLE_THRESHOLD_TYPE");
46 | }
47 | }
48 |
49 | public void toJson(JSONStringer json_out) {
50 |
51 | }
52 |
53 | public void fromJson(JSONObject json_obj) {
54 |
55 | }
56 |
57 | @Override
58 | public String getType() {
59 | return "point_outlier";
60 | }
61 |
62 | public String getModelName() {
63 | return modelName;
64 | }
65 |
66 | @Override
67 | public void reset() {
68 | }
69 |
70 | @Override
71 | public void tune(DataSequence observedSeries, DataSequence expectedSeries) throws Exception {
72 | Float thr[] = null;
73 | if (simpleThrType.equals("AdaptiveKSigmaSensitivity")) {
74 | thr = AutoSensitivity.getAdaptiveKSigmaSensitivity(observedSeries.getValues(), amntAutoSensitivity);
75 | } else {
76 | thr = AutoSensitivity.getAdaptiveMaxMinSigmaSensitivity(observedSeries.getValues(), amntAutoSensitivity, sDAutoSensitivity);
77 | }
78 | if (!threshold.containsKey("max")) {
79 | threshold.put("max", thr[0]);
80 | }
81 | if (!threshold.containsKey("min")) {
82 | threshold.put("min", thr[1]);
83 | }
84 | }
85 |
86 | @Override
87 | public IntervalSequence detect(DataSequence observedSeries,
88 | DataSequence expectedSeries) throws Exception {
89 | IntervalSequence output = new IntervalSequence();
90 | Float[] thr = new Float[] {threshold.get("max"), threshold.get("min")};
91 | int n = observedSeries.size();
92 | for (int i = 0; i < n; i++) {
93 | TimeSeries.Entry entry = observedSeries.get(i);
94 |
95 | if (((thr[0] != null && entry.value >= thr[0]) || (thr[1] != null && entry.value <= thr[1])) &&
96 | (isDetectionWindowPoint(maxHrsAgo, windowStart, entry.time, observedSeries.get(0).time) || (maxHrsAgo == 0 && i == (n - 1)))) {
97 | if (thr[0] != null && entry.value >= thr[0]) {
98 | output.add(new Interval(entry.time, i, null, thr, entry.value, thr[0]));
99 | } else {
100 | output.add(new Interval(entry.time, i, null, thr, entry.value, thr[1]));
101 | }
102 | }
103 | }
104 |
105 | return output;
106 | }
107 | }
108 |
--------------------------------------------------------------------------------
/src/main/java/com/yahoo/egads/models/tsmm/RegressionModel.java:
--------------------------------------------------------------------------------
1 | //
2 | // OpenForecast - open source, general-purpose forecasting package.
3 | // Copyright (C) 2002-2011 Steven R. Gould
4 | //
5 | // This library is free software; you can redistribute it and/or
6 | // modify it under the terms of the GNU Lesser General Public
7 | // License as published by the Free Software Foundation; either
8 | // version 2.1 of the License, or (at your option) any later version.
9 | //
10 | // This library is distributed in the hope that it will be useful,
11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 | // Lesser General Public License for more details.
14 | //
15 | // You should have received a copy of the GNU Lesser General Public
16 | // License along with this library; if not, write to the Free Software
17 | // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 | //
19 |
20 | // Olympic scoring model considers the average of the last k weeks
21 | // (dropping the b highest and lowest values) as the current prediction.
22 |
23 | package com.yahoo.egads.models.tsmm;
24 |
25 | import com.yahoo.egads.data.*;
26 | import com.yahoo.egads.data.TimeSeries.Entry;
27 | import org.json.JSONObject;
28 | import org.json.JSONStringer;
29 | import java.util.Properties;
30 | import net.sourceforge.openforecast.DataSet;
31 | import net.sourceforge.openforecast.ForecastingModel;
32 | import net.sourceforge.openforecast.DataPoint;
33 | import net.sourceforge.openforecast.Observation;
34 | import java.util.*;
35 |
36 | // Implements a single variable linear regression model using the variable named in the constructor as the independent variable.
37 | public class RegressionModel extends TimeSeriesAbstractModel {
38 | // methods ////////////////////////////////////////////////
39 |
40 | // The model that will be used for forecasting.
41 | private ForecastingModel forecaster;
42 |
43 | // Stores the historical values.
44 | private TimeSeries.DataSequence data;
45 |
46 | public RegressionModel(Properties config) {
47 | super(config);
48 | modelName = "RegressionModel";
49 | }
50 |
51 | public void reset() {
52 | // At this point, reset does nothing.
53 | }
54 |
55 | public void train(TimeSeries.DataSequence data) {
56 | this.data = data;
57 | int n = data.size();
58 | DataPoint dp = null;
59 | DataSet observedData = new DataSet();
60 | for (int i = 0; i < n; i++) {
61 | dp = new Observation(data.get(i).value);
62 | dp.setIndependentValue("x", i);
63 | observedData.add(dp);
64 | }
65 | observedData.setTimeVariable("x");
66 |
67 | forecaster = new net.sourceforge.openforecast.models.RegressionModel("x");
68 | forecaster.init(observedData);
69 | initForecastErrors(forecaster, data);
70 |
71 | logger.debug(getBias() + "\t" + getMAD() + "\t" + getMAPE() + "\t" + getMSE() + "\t" + getSAE() + "\t" + 0 + "\t" + 0);
72 | }
73 |
74 | public void update(TimeSeries.DataSequence data) {
75 |
76 | }
77 |
78 | public String getModelName() {
79 | return modelName;
80 | }
81 |
82 | public void predict(TimeSeries.DataSequence sequence) throws Exception {
83 | int n = data.size();
84 | DataSet requiredDataPoints = new DataSet();
85 | DataPoint dp;
86 |
87 | for (int count = 0; count < n; count++) {
88 | dp = new Observation(0.0);
89 | dp.setIndependentValue("x", count);
90 | requiredDataPoints.add(dp);
91 | }
92 | forecaster.forecast(requiredDataPoints);
93 |
94 | // Output the results
95 | Iterator it = requiredDataPoints.iterator();
96 | int i = 0;
97 | while (it.hasNext()) {
98 | DataPoint pnt = ((DataPoint) it.next());
99 | logger.info(data.get(i).time + "," + data.get(i).value + "," + pnt.getDependentValue());
100 | sequence.set(i, (new Entry(data.get(i).time, (float) pnt.getDependentValue())));
101 | i++;
102 | }
103 | }
104 |
105 | public void toJson(JSONStringer json_out) {
106 |
107 | }
108 |
109 | public void fromJson(JSONObject json_obj) {
110 |
111 | }
112 | }
113 |
--------------------------------------------------------------------------------
/src/main/java/com/yahoo/egads/data/AnomalyErrorStorage.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2015, Yahoo Inc.
3 | * Copyrights licensed under the GPL License.
4 | * See the accompanying LICENSE file for terms.
5 | */
6 |
7 | package com.yahoo.egads.data;
8 |
9 | import com.yahoo.egads.data.TimeSeries.DataSequence;
10 | import java.util.Map;
11 | import java.util.HashMap;
12 | import java.util.ArrayList;
13 |
14 | public class AnomalyErrorStorage {
15 |
16 | // Denominator used in the MASE error metric.
17 | protected float maseDenom;
18 | // Maps error names to error indicies.
19 | protected Map errorToIndex;
20 | // Maps error index to error names.
21 | protected Map indexToError;
22 | boolean isInit = false;
23 |
24 | // Getter methods.
25 | public Map getErrorToIndex() {
26 | return errorToIndex;
27 | }
28 | public Map getIndexToError() {
29 | return indexToError;
30 | }
31 |
32 | // Force the user to define this constructor that acts as a
33 | // factory method.
34 | public AnomalyErrorStorage() {
35 | // Init error indicies that are filled in computeErrorMetrics method.
36 | errorToIndex = new HashMap();
37 | errorToIndex.put("mapee", 0);
38 | errorToIndex.put("mae", 1);
39 | errorToIndex.put("smape", 2);
40 | errorToIndex.put("mape", 3);
41 | errorToIndex.put("mase", 4);
42 | indexToError = new HashMap();
43 | indexToError.put(0, "mapee");
44 | indexToError.put(1, "mae");
45 | indexToError.put(2, "smape");
46 | indexToError.put(3, "mape");
47 | indexToError.put(4, "mase");
48 | }
49 |
50 | // Initializes all anomaly errors.
51 | public HashMap> initAnomalyErrors(DataSequence observedSeries, DataSequence expectedSeries) {
52 | int n = observedSeries.size();
53 |
54 | // init MASE.
55 | for (int i = 1; i < n; i++) {
56 | maseDenom += Math.abs(observedSeries.get(i).value - observedSeries.get(i - 1).value);
57 | }
58 | maseDenom = maseDenom / (n - 1);
59 | HashMap> allErrors = new HashMap>();
60 |
61 | for (int i = 0; i < n; i++) {
62 | Float[] errors = computeErrorMetrics(expectedSeries.get(i).value, observedSeries.get(i).value);
63 | for (int j = 0; j < errors.length; j++) {
64 | if (!allErrors.containsKey(indexToError.get(j))) {
65 | allErrors.put(indexToError.get(j), new ArrayList());
66 | }
67 | ArrayList tmp = allErrors.get(indexToError.get(j));
68 | tmp.add(errors[j]);
69 | allErrors.put(indexToError.get(j), tmp);
70 | }
71 | }
72 | isInit = true;
73 | return allErrors;
74 | }
75 |
76 | // Computes the standard error metrics including MAE, sMAPE, MAPE, MASE.
77 | public Float[] computeErrorMetrics(float expected, float actual) {
78 | float div = expected;
79 | if (expected == (float) 0.0) {
80 | div = (float) 0.0000000001;
81 | }
82 |
83 | // Mean Absolute Error.
84 | float mae = Math.abs(actual - expected);
85 | // Symmetric Mean Absolute Error.
86 | float smape = (200 * Math.abs(actual - expected)) / ((Math.abs(actual) + Math.abs(expected)) == 0 ? (float) 1.0 : (float) (Math.abs(actual) + Math.abs(expected)));
87 | // Mean Absolute Percentage Error.
88 | float mape = Math.abs(actual) == 0 ? (float) 0.0 : ((100 * Math.abs(actual - expected)) / (float) Math.abs(actual));
89 | // Mean Absolute Scaled Error.
90 | float mase = Math.abs(maseDenom) == 0.0 ? (float) 0.0 : Math.abs(actual - expected) / Math.abs(maseDenom);
91 | // Mean Absolute Percentage Error (scaled by the expected value).
92 | float mapee = (expected == actual) ? (float) 0.0 : Math.abs((100 * ((actual / div) - 1)));
93 |
94 | // Store all errors.
95 | Float[] errors = new Float[5];
96 | errors[0] = mapee;
97 | errors[1] = mae;
98 | errors[2] = smape;
99 | errors[3] = mape;
100 | errors[4] = mase;
101 |
102 | return errors;
103 | }
104 | }
105 |
--------------------------------------------------------------------------------
/src/main/java/com/yahoo/egads/models/tsmm/PolynomialRegressionModel.java:
--------------------------------------------------------------------------------
1 | //
2 | // OpenForecast - open source, general-purpose forecasting package.
3 | // Copyright (C) 2002-2011 Steven R. Gould
4 | //
5 | // This library is free software; you can redistribute it and/or
6 | // modify it under the terms of the GNU Lesser General Public
7 | // License as published by the Free Software Foundation; either
8 | // version 2.1 of the License, or (at your option) any later version.
9 | //
10 | // This library is distributed in the hope that it will be useful,
11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 | // Lesser General Public License for more details.
14 | //
15 | // You should have received a copy of the GNU Lesser General Public
16 | // License along with this library; if not, write to the Free Software
17 | // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 | //
19 |
20 | // Olympic scoring model considers the average of the last k weeks
21 | // (dropping the b highest and lowest values) as the current prediction.
22 |
23 | package com.yahoo.egads.models.tsmm;
24 |
25 | import com.yahoo.egads.data.*;
26 | import com.yahoo.egads.data.TimeSeries.Entry;
27 | import org.json.JSONObject;
28 | import org.json.JSONStringer;
29 | import java.util.Properties;
30 | import net.sourceforge.openforecast.DataSet;
31 | import net.sourceforge.openforecast.ForecastingModel;
32 | import net.sourceforge.openforecast.DataPoint;
33 | import net.sourceforge.openforecast.Observation;
34 | import java.util.*;
35 |
36 | // Implements a single variable polynomial regression model using the variable named in the constructor as the independent variable.
37 | public class PolynomialRegressionModel extends TimeSeriesAbstractModel {
38 | // methods ////////////////////////////////////////////////
39 |
40 | // The model that will be used for forecasting.
41 | private ForecastingModel forecaster;
42 |
43 | // Stores the historical values.
44 | private TimeSeries.DataSequence data;
45 |
46 | public PolynomialRegressionModel(Properties config) {
47 | super(config);
48 | modelName = "PolynomialRegressionModel";
49 | }
50 |
51 | public void reset() {
52 | // At this point, reset does nothing.
53 | }
54 |
55 | public void train(TimeSeries.DataSequence data) {
56 | this.data = data;
57 | int n = data.size();
58 | DataPoint dp = null;
59 | DataSet observedData = new DataSet();
60 | for (int i = 0; i < n; i++) {
61 | dp = new Observation(data.get(i).value);
62 | dp.setIndependentValue("x", i);
63 | observedData.add(dp);
64 | }
65 | observedData.setTimeVariable("x");
66 |
67 | // TODO: Make degrees configurable.
68 | forecaster = new net.sourceforge.openforecast.models.PolynomialRegressionModel("x", 3);
69 | forecaster.init(observedData);
70 | initForecastErrors(forecaster, data);
71 |
72 | logger.debug(getBias() + "\t" + getMAD() + "\t" + getMAPE() + "\t" + getMSE() + "\t" + getSAE() + "\t" + 0 + "\t" + 0);
73 | }
74 |
75 | public void update(TimeSeries.DataSequence data) {
76 |
77 | }
78 |
79 | public String getModelName() {
80 | return modelName;
81 | }
82 |
83 | public void predict(TimeSeries.DataSequence sequence) throws Exception {
84 | int n = data.size();
85 | DataSet requiredDataPoints = new DataSet();
86 | DataPoint dp;
87 |
88 | for (int count = 0; count < n; count++) {
89 | dp = new Observation(0.0);
90 | dp.setIndependentValue("x", count);
91 | requiredDataPoints.add(dp);
92 | }
93 | forecaster.forecast(requiredDataPoints);
94 |
95 | // Output the results
96 | Iterator it = requiredDataPoints.iterator();
97 | int i = 0;
98 | while (it.hasNext()) {
99 | DataPoint pnt = ((DataPoint) it.next());
100 | logger.info(data.get(i).time + "," + data.get(i).value + "," + pnt.getDependentValue());
101 | sequence.set(i, (new Entry(data.get(i).time, (float) pnt.getDependentValue())));
102 | i++;
103 | }
104 | }
105 |
106 | public void toJson(JSONStringer json_out) {
107 |
108 | }
109 |
110 | public void fromJson(JSONObject json_obj) {
111 |
112 | }
113 | }
114 |
--------------------------------------------------------------------------------
/src/main/java/com/yahoo/egads/models/tsmm/MultipleLinearRegressionModel.java:
--------------------------------------------------------------------------------
1 | //
2 | // OpenForecast - open source, general-purpose forecasting package.
3 | // Copyright (C) 2002-2011 Steven R. Gould
4 | //
5 | // This library is free software; you can redistribute it and/or
6 | // modify it under the terms of the GNU Lesser General Public
7 | // License as published by the Free Software Foundation; either
8 | // version 2.1 of the License, or (at your option) any later version.
9 | //
10 | // This library is distributed in the hope that it will be useful,
11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 | // Lesser General Public License for more details.
14 | //
15 | // You should have received a copy of the GNU Lesser General Public
16 | // License along with this library; if not, write to the Free Software
17 | // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 | //
19 |
20 | // Olympic scoring model considers the average of the last k weeks
21 | // (dropping the b highest and lowest values) as the current prediction.
22 |
23 | package com.yahoo.egads.models.tsmm;
24 |
25 | import com.yahoo.egads.data.*;
26 | import com.yahoo.egads.data.TimeSeries.Entry;
27 | import org.json.JSONObject;
28 | import org.json.JSONStringer;
29 | import java.util.Properties;
30 | import net.sourceforge.openforecast.DataSet;
31 | import net.sourceforge.openforecast.ForecastingModel;
32 | import net.sourceforge.openforecast.DataPoint;
33 | import net.sourceforge.openforecast.Observation;
34 | import java.util.*;
35 |
36 | // Implements a multiple variable linear regression model using the variables named in the constructor as
37 | // the independent variables, or the variables passed into one of the init methods.
38 | public class MultipleLinearRegressionModel extends TimeSeriesAbstractModel {
39 | // methods ////////////////////////////////////////////////
40 |
41 | // The model that will be used for forecasting.
42 | private ForecastingModel forecaster;
43 |
44 | // Stores the historical values.
45 | private TimeSeries.DataSequence data;
46 |
47 | public MultipleLinearRegressionModel(Properties config) {
48 | super(config);
49 | modelName = "MultipleLinearRegressionModel";
50 | }
51 |
52 | public void reset() {
53 | // At this point, reset does nothing.
54 | }
55 |
56 | public void train(TimeSeries.DataSequence data) {
57 | this.data = data;
58 | int n = data.size();
59 | DataPoint dp = null;
60 | DataSet observedData = new DataSet();
61 | for (int i = 0; i < n; i++) {
62 | dp = new Observation(data.get(i).value);
63 | dp.setIndependentValue("x", i);
64 | observedData.add(dp);
65 | }
66 | observedData.setTimeVariable("x");
67 |
68 | // TODO: Make configurable.
69 | forecaster = new net.sourceforge.openforecast.models.MultipleLinearRegressionModel();
70 | forecaster.init(observedData);
71 | initForecastErrors(forecaster, data);
72 |
73 | logger.debug(getBias() + "\t" + getMAD() + "\t" + getMAPE() + "\t" + getMSE() + "\t" + getSAE() + "\t" + 0 + "\t" + 0);
74 | }
75 |
76 | public void update(TimeSeries.DataSequence data) {
77 |
78 | }
79 |
80 | public String getModelName() {
81 | return modelName;
82 | }
83 |
84 | public void predict(TimeSeries.DataSequence sequence) throws Exception {
85 | int n = data.size();
86 | DataSet requiredDataPoints = new DataSet();
87 | DataPoint dp;
88 |
89 | for (int count = 0; count < n; count++) {
90 | dp = new Observation(0.0);
91 | dp.setIndependentValue("x", count);
92 | requiredDataPoints.add(dp);
93 | }
94 | forecaster.forecast(requiredDataPoints);
95 |
96 | // Output the results
97 | Iterator it = requiredDataPoints.iterator();
98 | int i = 0;
99 | while (it.hasNext()) {
100 | DataPoint pnt = ((DataPoint) it.next());
101 | logger.info(data.get(i).time + "," + data.get(i).value + "," + pnt.getDependentValue());
102 | sequence.set(i, (new Entry(data.get(i).time, (float) pnt.getDependentValue())));
103 | i++;
104 | }
105 | }
106 |
107 | public void toJson(JSONStringer json_out) {
108 |
109 | }
110 |
111 | public void fromJson(JSONObject json_obj) {
112 |
113 | }
114 | }
115 |
--------------------------------------------------------------------------------
/src/main/java/com/yahoo/egads/models/tsmm/DoubleExponentialSmoothingModel.java:
--------------------------------------------------------------------------------
1 | //
2 | // OpenForecast - open source, general-purpose forecasting package.
3 | // Copyright (C) 2002-2011 Steven R. Gould
4 | //
5 | // This library is free software; you can redistribute it and/or
6 | // modify it under the terms of the GNU Lesser General Public
7 | // License as published by the Free Software Foundation; either
8 | // version 2.1 of the License, or (at your option) any later version.
9 | //
10 | // This library is distributed in the hope that it will be useful,
11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 | // Lesser General Public License for more details.
14 | //
15 | // You should have received a copy of the GNU Lesser General Public
16 | // License along with this library; if not, write to the Free Software
17 | // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 | //
19 |
20 | // Olympic scoring model considers the average of the last k weeks
21 | // (dropping the b highest and lowest values) as the current prediction.
22 |
23 | package com.yahoo.egads.models.tsmm;
24 |
25 | import com.yahoo.egads.data.*;
26 | import com.yahoo.egads.data.TimeSeries.Entry;
27 | import org.json.JSONObject;
28 | import org.json.JSONStringer;
29 | import java.util.Properties;
30 | import net.sourceforge.openforecast.DataSet;
31 | import net.sourceforge.openforecast.ForecastingModel;
32 | import net.sourceforge.openforecast.DataPoint;
33 | import net.sourceforge.openforecast.Observation;
34 | import java.util.*;
35 |
36 | // Double exponential smoothing - also known as Holt exponential smoothing - is a refinement of the popular simple
37 | // exponential smoothing model but adds another component which takes into account any trend in the data.
38 | public class DoubleExponentialSmoothingModel extends TimeSeriesAbstractModel {
39 | // methods ////////////////////////////////////////////////
40 |
41 | // The model that will be used for forecasting.
42 | private ForecastingModel forecaster;
43 |
44 | // Stores the historical values.
45 | private TimeSeries.DataSequence data;
46 |
47 | public DoubleExponentialSmoothingModel(Properties config) {
48 | super(config);
49 | modelName = "DoubleExponentialSmoothingModel";
50 | }
51 |
52 | public void reset() {
53 | // At this point, reset does nothing.
54 | }
55 |
56 | public void train(TimeSeries.DataSequence data) {
57 | this.data = data;
58 | int n = data.size();
59 | DataPoint dp = null;
60 | DataSet observedData = new DataSet();
61 | for (int i = 0; i < n; i++) {
62 | dp = new Observation(data.get(i).value);
63 | dp.setIndependentValue("x", i);
64 | observedData.add(dp);
65 | }
66 | observedData.setTimeVariable("x");
67 |
68 | // TODO: Make weights configurable.
69 | forecaster = new net.sourceforge.openforecast.models.DoubleExponentialSmoothingModel(0.75, 0.1);
70 | forecaster.init(observedData);
71 | initForecastErrors(forecaster, data);
72 |
73 | logger.debug(getBias() + "\t" + getMAD() + "\t" + getMAPE() + "\t" + getMSE() + "\t" + getSAE() + "\t" + 0 + "\t" + 0);
74 | }
75 |
76 | public void update(TimeSeries.DataSequence data) {
77 |
78 | }
79 |
80 | public String getModelName() {
81 | return modelName;
82 | }
83 |
84 | public void predict(TimeSeries.DataSequence sequence) throws Exception {
85 | int n = data.size();
86 | DataSet requiredDataPoints = new DataSet();
87 | DataPoint dp;
88 |
89 | for (int count = 0; count < n; count++) {
90 | dp = new Observation(0.0);
91 | dp.setIndependentValue("x", count);
92 | requiredDataPoints.add(dp);
93 | }
94 | forecaster.forecast(requiredDataPoints);
95 |
96 | // Output the results
97 | Iterator it = requiredDataPoints.iterator();
98 | int i = 0;
99 | while (it.hasNext()) {
100 | DataPoint pnt = ((DataPoint) it.next());
101 | logger.info(data.get(i).time + "," + data.get(i).value + "," + pnt.getDependentValue());
102 | sequence.set(i, (new Entry(data.get(i).time, (float) pnt.getDependentValue())));
103 | i++;
104 | }
105 | }
106 |
107 | public void toJson(JSONStringer json_out) {
108 |
109 | }
110 |
111 | public void fromJson(JSONObject json_obj) {
112 |
113 | }
114 | }
115 |
--------------------------------------------------------------------------------
/src/main/java/com/yahoo/egads/models/tsmm/WeightedMovingAverageModel.java:
--------------------------------------------------------------------------------
1 | //
2 | // OpenForecast - open source, general-purpose forecasting package.
3 | // Copyright (C) 2002-2011 Steven R. Gould
4 | //
5 | // This library is free software; you can redistribute it and/or
6 | // modify it under the terms of the GNU Lesser General Public
7 | // License as published by the Free Software Foundation; either
8 | // version 2.1 of the License, or (at your option) any later version.
9 | //
10 | // This library is distributed in the hope that it will be useful,
11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 | // Lesser General Public License for more details.
14 | //
15 | // You should have received a copy of the GNU Lesser General Public
16 | // License along with this library; if not, write to the Free Software
17 | // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 | //
19 |
20 | // Olympic scoring model considers the average of the last k weeks
21 | // (dropping the b highest and lowest values) as the current prediction.
22 |
23 | package com.yahoo.egads.models.tsmm;
24 |
25 | import com.yahoo.egads.data.*;
26 | import com.yahoo.egads.data.TimeSeries.Entry;
27 | import org.json.JSONObject;
28 | import org.json.JSONStringer;
29 | import java.util.Properties;
30 | import net.sourceforge.openforecast.DataSet;
31 | import net.sourceforge.openforecast.ForecastingModel;
32 | import net.sourceforge.openforecast.DataPoint;
33 | import net.sourceforge.openforecast.Observation;
34 | import java.util.*;
35 |
36 | // A weighted moving average forecast model is based on an artificially constructed time series in which the value
37 | // for a given time period is replaced by the weighted mean of that value and the values for some number of preceding time periods.
38 | public class WeightedMovingAverageModel extends TimeSeriesAbstractModel {
39 | // methods ////////////////////////////////////////////////
40 |
41 | // The model that will be used for forecasting.
42 | private ForecastingModel forecaster;
43 |
44 | // Stores the historical values.
45 | private TimeSeries.DataSequence data;
46 |
47 | public WeightedMovingAverageModel(Properties config) {
48 | super(config);
49 | modelName = "WeightedMovingAverageModel";
50 | }
51 |
52 | public void reset() {
53 | // At this point, reset does nothing.
54 | }
55 |
56 | public void train(TimeSeries.DataSequence data) {
57 | this.data = data;
58 | int n = data.size();
59 | DataPoint dp = null;
60 | DataSet observedData = new DataSet();
61 | for (int i = 0; i < n; i++) {
62 | dp = new Observation(data.get(i).value);
63 | dp.setIndependentValue("x", i);
64 | observedData.add(dp);
65 | }
66 | observedData.setTimeVariable("x");
67 |
68 | // TODO: Make weights configurable.
69 | forecaster = new net.sourceforge.openforecast.models.WeightedMovingAverageModel(new double[]{0.75, 0.25});
70 | forecaster.init(observedData);
71 |
72 | initForecastErrors(forecaster, data);
73 |
74 | logger.debug(getBias() + "\t" + getMAD() + "\t" + getMAPE() + "\t" + getMSE() + "\t" + getSAE() + "\t" + 0 + "\t" + 0);
75 | }
76 |
77 | public void update(TimeSeries.DataSequence data) {
78 |
79 | }
80 |
81 | public String getModelName() {
82 | return modelName;
83 | }
84 |
85 | public void predict(TimeSeries.DataSequence sequence) throws Exception {
86 | int n = data.size();
87 | DataSet requiredDataPoints = new DataSet();
88 | DataPoint dp;
89 |
90 | for (int count = 0; count < n; count++) {
91 | dp = new Observation(0.0);
92 | dp.setIndependentValue("x", count);
93 | requiredDataPoints.add(dp);
94 | }
95 | forecaster.forecast(requiredDataPoints);
96 |
97 | // Output the results
98 | Iterator it = requiredDataPoints.iterator();
99 | int i = 0;
100 | while (it.hasNext()) {
101 | DataPoint pnt = ((DataPoint) it.next());
102 | logger.info(data.get(i).time + "," + data.get(i).value + "," + pnt.getDependentValue());
103 | sequence.set(i, (new Entry(data.get(i).time, (float) pnt.getDependentValue())));
104 | i++;
105 | }
106 | }
107 |
108 | public void toJson(JSONStringer json_out) {
109 |
110 | }
111 |
112 | public void fromJson(JSONObject json_obj) {
113 |
114 | }
115 | }
116 |
--------------------------------------------------------------------------------
/src/main/java/com/yahoo/egads/models/tsmm/SpectralSmoother.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2015, Yahoo Inc.
3 | * Copyrights licensed under the GPL License.
4 | * See the accompanying LICENSE file for terms.
5 | */
6 |
7 | package com.yahoo.egads.models.tsmm;
8 |
9 | import java.util.Properties;
10 | import java.util.Hashtable;
11 |
12 | import com.yahoo.egads.data.TimeSeries.Entry;
13 | import com.yahoo.egads.data.TimeSeries.DataSequence;
14 | import com.yahoo.egads.utilities.SpectralMethods;
15 | import com.yahoo.egads.utilities.SpectralMethods.FilteringMethod;
16 |
17 | /**
18 | * SpectralSmoother implements the smoothing technique based on the Singular Value Decomposition (SVD) of the input time-series' Hankel matrix.
19 | * For further details on the methodology please refer to utilities/SpectralMethods.java documentation.
20 | *
21 | * The input parameters:
22 | * 1. 'WINDOW_SIZE' determines the size of the sliding window for spectral smoothing. Typically should be larger than the
23 | * largest seasonality present in the time-series.
24 | * 2. 'FILTERING_METHOD' determines the filtering method to be used by spectral smoothing.
25 | * Possible values: K_GAP, VARIANCE, EXPLICIT, SMOOTHNESS, EIGEN_RATIO, GAP_RATIO
26 | * Refer to utilities/SpectralMethods.java documentation for more details.
27 | * 3. 'FILTERING_PARAM' determines the tuning parameter for the specified filtering method.
28 | * Refer to utilities/SpectralMethods.java documentation for more details.
29 | *
30 | * @author amizadeh
31 | *
32 | */
33 |
34 | public class SpectralSmoother extends TimeSeriesAbstractModel {
35 |
36 | protected Hashtable map = new Hashtable();
37 | protected int windowSize;
38 | protected FilteringMethod method;
39 | protected double methodParameter;
40 |
41 | public SpectralSmoother(Properties config) {
42 | super(config);
43 | if (config.getProperty("FILTERING_METHOD") == null) {
44 | throw new IllegalArgumentException("FILTERING_METHOD is NULL");
45 | }
46 |
47 | if (config.getProperty("WINDOW_SIZE") == null) {
48 | throw new IllegalArgumentException("WINDOW_SIZE is NULL");
49 | }
50 |
51 | this.windowSize = new Integer(config.getProperty("WINDOW_SIZE"));
52 | this.method = FilteringMethod.valueOf(config.getProperty("FILTERING_METHOD"));
53 |
54 | if (config.getProperty("FILTERING_PARAM") == null) {
55 | switch (this.method) {
56 | case VARIANCE:
57 | this.methodParameter = 0.99;
58 | break;
59 |
60 | case SMOOTHNESS:
61 | this.methodParameter = 0.97;
62 | break;
63 |
64 | case K_GAP:
65 | this.methodParameter = 8;
66 | break;
67 |
68 | case EXPLICIT:
69 | this.methodParameter = 10;
70 | break;
71 |
72 | case EIGEN_RATIO:
73 | this.methodParameter = 0.1;
74 | break;
75 |
76 | case GAP_RATIO:
77 | this.methodParameter = 0.01;
78 | break;
79 |
80 | default:
81 | throw new IllegalArgumentException("Invalid FILTERING_METHOD value");
82 | }
83 | } else {
84 | this.methodParameter = new Double(config.getProperty("FILTERING_PARAM"));
85 | }
86 | }
87 |
88 | @Override
89 | public void reset() {
90 | map.clear();
91 | }
92 |
93 | @Override
94 | public void train(DataSequence data) throws Exception {
95 | this.reset();
96 | DataSequence smoothedData = SpectralMethods.mFilter(data, windowSize, method, methodParameter);
97 |
98 | for (Entry e : smoothedData) {
99 | map.put(e.logicalIndex, e.value);
100 | }
101 | }
102 |
103 | @Override
104 | public void update(DataSequence data) throws Exception {
105 |
106 | DataSequence smoothedData = SpectralMethods.mFilter(data, windowSize, method, methodParameter);
107 |
108 | for (Entry e : smoothedData) {
109 | map.put(e.logicalIndex, e.value);
110 | }
111 | }
112 |
113 | @Override
114 | public void predict(DataSequence sequence) throws Exception {
115 |
116 | for (Entry e : sequence) {
117 | Float val = map.get(e.logicalIndex);
118 | e.value = (val == null) ? 0 : val;
119 | }
120 | }
121 |
122 | @Override
123 | public String getModelName() {
124 | return "SpectralSmoother";
125 | }
126 |
127 | }
128 |
--------------------------------------------------------------------------------
/src/main/java/com/yahoo/egads/models/tsmm/AutoForecastModel.java:
--------------------------------------------------------------------------------
1 | //
2 | // OpenForecast - open source, general-purpose forecasting package.
3 | // Copyright (C) 2002-2011 Steven R. Gould
4 | //
5 | // This library is free software; you can redistribute it and/or
6 | // modify it under the terms of the GNU Lesser General Public
7 | // License as published by the Free Software Foundation; either
8 | // version 2.1 of the License, or (at your option) any later version.
9 | //
10 | // This library is distributed in the hope that it will be useful,
11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 | // Lesser General Public License for more details.
14 | //
15 | // You should have received a copy of the GNU Lesser General Public
16 | // License along with this library; if not, write to the Free Software
17 | // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 | //
19 |
20 | // Olympic scoring model considers the average of the last k weeks
21 | // (dropping the b highest and lowest values) as the current prediction.
22 |
23 | package com.yahoo.egads.models.tsmm;
24 |
25 | import com.yahoo.egads.data.*;
26 | import org.json.JSONObject;
27 | import org.json.JSONStringer;
28 | import java.util.Properties;
29 |
30 | // Picks the best model from the available EGADS models.
31 | public class AutoForecastModel extends TimeSeriesAbstractModel {
32 | // Stores the properties file to init other models.
33 | private Properties p;
34 |
35 | // Stores the model.
36 | private TimeSeriesAbstractModel myModel = null;
37 |
38 | public AutoForecastModel(Properties config) {
39 | super(config);
40 | modelName = "AutoForecastModel";
41 | this.p = config;
42 | }
43 |
44 | public void reset() {
45 | // At this point, reset does nothing.
46 | }
47 |
48 | public void train(TimeSeries.DataSequence data) {
49 | // Init all.
50 | OlympicModel olympModel = new OlympicModel(p);
51 | MovingAverageModel movingAvg = new MovingAverageModel(p);
52 | MultipleLinearRegressionModel mlReg = new MultipleLinearRegressionModel(p);
53 | NaiveForecastingModel naive = new NaiveForecastingModel(p);
54 | PolynomialRegressionModel poly = new PolynomialRegressionModel(p);
55 | RegressionModel regr = new RegressionModel(p);
56 | SimpleExponentialSmoothingModel simpleExp = new SimpleExponentialSmoothingModel(p);
57 | TripleExponentialSmoothingModel tripleExp = new TripleExponentialSmoothingModel(p);
58 | WeightedMovingAverageModel weightAvg = new WeightedMovingAverageModel(p);
59 | DoubleExponentialSmoothingModel doubleExp = new DoubleExponentialSmoothingModel(p);
60 |
61 | // Train all.
62 | olympModel.train(data);
63 | movingAvg.train(data);
64 | mlReg.train(data);
65 | naive.train(data);
66 | poly.train(data);
67 | regr.train(data);
68 | simpleExp.train(data);
69 | tripleExp.train(data);
70 | weightAvg.train(data);
71 | doubleExp.train(data);
72 |
73 | // Pick best.
74 | if (betterThan(olympModel, myModel)) {
75 | myModel = olympModel;
76 | }
77 | if (betterThan(movingAvg, myModel)) {
78 | myModel = movingAvg;
79 | }
80 | if (betterThan(mlReg, myModel)) {
81 | myModel = mlReg;
82 | }
83 | if (betterThan(naive, myModel)) {
84 | myModel = naive;
85 | }
86 | if (betterThan(poly, myModel)) {
87 | myModel = poly;
88 | }
89 | if (betterThan(regr, myModel)) {
90 | myModel = regr;
91 | }
92 | if (betterThan(simpleExp, myModel)) {
93 | myModel = simpleExp;
94 | }
95 | if (betterThan(tripleExp, myModel)) {
96 | myModel = tripleExp;
97 | }
98 | if (betterThan(weightAvg, myModel)) {
99 | myModel = weightAvg;
100 | }
101 | if (betterThan(doubleExp, myModel)) {
102 | myModel = doubleExp;
103 | }
104 |
105 | initForecastErrors(myModel, data);
106 |
107 | logger.debug(getBias() + "\t" + getMAD() + "\t" + getMAPE() + "\t" + getMSE() + "\t" + getSAE() + "\t" + 0 + "\t" + 0);
108 | }
109 |
110 | public void update(TimeSeries.DataSequence data) {
111 |
112 | }
113 |
114 | public String getModelName() {
115 | return modelName;
116 | }
117 |
118 | public void predict(TimeSeries.DataSequence sequence) throws Exception {
119 | myModel.predict(sequence);
120 | }
121 |
122 | public void toJson(JSONStringer json_out) {
123 |
124 | }
125 |
126 | public void fromJson(JSONObject json_obj) {
127 |
128 | }
129 | }
130 |
--------------------------------------------------------------------------------
/src/test/java/com/yahoo/egads/data/TestWeightedValue.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2016, Yahoo Inc.
3 | * Copyrights licensed under the GPL License.
4 | * See the accompanying LICENSE file for terms.
5 | */
6 | package com.yahoo.egads.data;
7 |
8 | import static org.testng.AssertJUnit.assertEquals;
9 | import static org.testng.AssertJUnit.assertTrue;
10 |
11 | import java.util.List;
12 |
13 | import org.testng.annotations.Test;
14 |
15 | import com.google.common.collect.Lists;
16 |
17 | public class TestWeightedValue {
18 |
19 | @Test
20 | public void aggregate() throws Exception {
21 | final List values = Lists.newArrayList();
22 | values.add(new WeightedValue(1.5D, 5));
23 | values.add(new WeightedValue(42.5D, 1));
24 | values.add(new WeightedValue(-1.5D, 0));
25 | values.add(new WeightedValue(3.5D, 1));
26 |
27 | assertEquals(-1.5D, WeightedValue.aggregate(values, "MIN"), 0.0001);
28 | assertEquals(42.5D, WeightedValue.aggregate(values, "MAX"), 0.0001);
29 | assertEquals(46.0D, WeightedValue.aggregate(values, "SUM"), 0.0001);
30 | assertEquals(4D, WeightedValue.aggregate(values, "COUNT"), 0.0001);
31 | assertEquals(3.5D, WeightedValue.aggregate(values, "MEDIAN"), 0.0001);
32 | assertEquals(11.5D, WeightedValue.aggregate(values, "AVG"), 0.0001);
33 | assertEquals(7.6428D, WeightedValue.aggregate(values, "WAVG"), 0.0001);
34 |
35 | // add some NaNs and it shouldn't change
36 | values.clear();
37 | values.add(new WeightedValue(1.5D, 5));
38 | values.add(new WeightedValue(42.5D, 1));
39 | values.add(new WeightedValue(Double.NaN, 1));
40 | values.add(new WeightedValue(-1.5D, 0));
41 | values.add(new WeightedValue(Double.NaN, 4));
42 | values.add(new WeightedValue(3.5D, 1));
43 | values.add(new WeightedValue(Double.NaN, 5));
44 |
45 | assertEquals(-1.5D, WeightedValue.aggregate(values, "MIN"), 0.0001);
46 | assertEquals(42.5D, WeightedValue.aggregate(values, "MAX"), 0.0001);
47 | assertEquals(46.0D, WeightedValue.aggregate(values, "SUM"), 0.0001);
48 | assertEquals(4D, WeightedValue.aggregate(values, "COUNT"), 0.0001);
49 | assertEquals(3.5D, WeightedValue.aggregate(values, "MEDIAN"), 0.0001);
50 | assertEquals(11.5D, WeightedValue.aggregate(values, "AVG"), 0.0001);
51 | assertEquals(7.6428D, WeightedValue.aggregate(values, "WAVG"), 0.0001);
52 |
53 | // all nans should return nan (except count!)
54 | values.clear();
55 | values.add(new WeightedValue(Double.NaN, 1));
56 | values.add(new WeightedValue(Double.NaN, 2));
57 | values.add(new WeightedValue(Double.NaN, 5));
58 |
59 | assertTrue(Double.isNaN(WeightedValue.aggregate(values, "MIN")));
60 | assertTrue(Double.isNaN(WeightedValue.aggregate(values, "MAX")));
61 | assertTrue(Double.isNaN(WeightedValue.aggregate(values, "SUM")));
62 | assertEquals(0, WeightedValue.aggregate(values, "COUNT"), 0.0001);
63 | assertTrue(Double.isNaN(WeightedValue.aggregate(values, "MEDIAN")));
64 | assertTrue(Double.isNaN(WeightedValue.aggregate(values, "AVG")));
65 | assertTrue(Double.isNaN(WeightedValue.aggregate(values, "WAVG")));
66 |
67 | // empty always returns Nan.
68 | values.clear();
69 |
70 | assertTrue(Double.isNaN(WeightedValue.aggregate(values, "MIN")));
71 | assertTrue(Double.isNaN(WeightedValue.aggregate(values, "MAX")));
72 | assertTrue(Double.isNaN(WeightedValue.aggregate(values, "SUM")));
73 | assertEquals(0, WeightedValue.aggregate(values, "COUNT"), 0.0001);
74 | assertTrue(Double.isNaN(WeightedValue.aggregate(values, "MEDIAN")));
75 | assertTrue(Double.isNaN(WeightedValue.aggregate(values, "AVG")));
76 | assertTrue(Double.isNaN(WeightedValue.aggregate(values, "WAVG")));
77 | }
78 |
79 | @Test (expectedExceptions = IllegalArgumentException.class)
80 | public void aggregateNullList() throws Exception {
81 | WeightedValue.aggregate(null, "MIN");
82 | }
83 |
84 | @Test (expectedExceptions = IllegalArgumentException.class)
85 | public void aggregateNullAgg() throws Exception {
86 | final List values = Lists.newArrayList();
87 | values.add(new WeightedValue(1.5D, 5));
88 | WeightedValue.aggregate(values, null);
89 | }
90 |
91 | @Test (expectedExceptions = IllegalArgumentException.class)
92 | public void aggregateEmptyAgg() throws Exception {
93 | final List values = Lists.newArrayList();
94 | values.add(new WeightedValue(1.5D, 5));
95 | WeightedValue.aggregate(values, "");
96 | }
97 |
98 | @Test (expectedExceptions = IllegalArgumentException.class)
99 | public void aggregateUnknownAgg() throws Exception {
100 | final List values = Lists.newArrayList();
101 | values.add(new WeightedValue(1.5D, 5));
102 | WeightedValue.aggregate(values, "NOTIMPLEMENTED");
103 | }
104 | }
105 |
--------------------------------------------------------------------------------
/src/test/resources/sample_config.ini:
--------------------------------------------------------------------------------
1 | ###################
2 | ### Main Config ###
3 | ###################
4 |
5 | # Specifies the detection window start time in timeseries.
6 | # If you want to use the following MAX_ANOMALY_TIME_AGO
7 | # make this parameter '0'
8 | DETECTION_WINDOW_START_TIME 0
9 |
10 | # Only show anomalies no older than this.
11 | # If this is set to 0, then only output an anomaly
12 | # if it occurs on the last time-stamp.
13 | MAX_ANOMALY_TIME_AGO 999999999
14 |
15 | # Denotes how much should the time-series be aggregated by.
16 | # If set to 1 or less, this setting is ignored.
17 | AGGREGATION 1
18 |
19 | # OP_TYPE specifies the operation type.
20 | # Options: DETECT_ANOMALY,
21 | # UPDATE_MODEL,
22 | # TRANSFORM_INPUT
23 | OP_TYPE DETECT_ANOMALY
24 |
25 | # TS_MODEL specifies the time-series
26 | # model type.
27 | # Options: AutoForecastModel
28 | # DoubleExponentialSmoothingModel
29 | # MovingAverageModel
30 | # MultipleLinearRegressionModel
31 | # NaiveForecastingModel
32 | # OlympicModel
33 | # PolynomialRegressionModel
34 | # RegressionModel
35 | # SimpleExponentialSmoothingModel
36 | # TripleExponentialSmoothingModel
37 | # WeightedMovingAverageModel
38 | # SpectralSmoother
39 | # NullModel
40 | TS_MODEL OlympicModel
41 |
42 | # AD_MODEL specifies the anomaly-detection
43 | # model type.
44 | # Options: ExtremeLowDensityModel
45 | # AdaptiveKernelDensityChangePointDetector
46 | # KSigmaModel
47 | # NaiveModel
48 | # DBScanModel
49 | # SimpleThresholdModel
50 | AD_MODEL ExtremeLowDensityModel
51 |
52 | # Type of the simple threshold model.
53 | # Options: AdaptiveMaxMinSigmaSensitivity
54 | # AdaptiveKSigmaSensitivity
55 | # SIMPLE_THRESHOLD_TYPE
56 |
57 | # Specifies the input src.
58 | # Options: STDIN
59 | # CSV
60 | INPUT CSV
61 |
62 | # Specifies the output src.
63 | # Options: STD_OUT,
64 | # ANOMALY_DB
65 | # GUI
66 | # PLOT
67 | OUTPUT STD_OUT
68 |
69 | # THRESHOLD specifies the threshold (e.g., sensitivity) for anomaly detection model.
70 | # Comment out to auto-detect all thresholds.
71 | # Options: mapee,mae,smape,mape,mase,
72 | # or single numeric for simple threshold model.
73 | # THRESHOLD mape#10,mase#15
74 |
75 | #####################################
76 | ### Olympic Forecast Model Config ###
77 | #####################################
78 |
79 | # The possible time-shifts for Olympic Scoring.
80 | TIME_SHIFTS 0,1
81 |
82 | # The possible base windows for Olympic Scoring.
83 | BASE_WINDOWS 24,168
84 |
85 | # Period specifies the periodicity of the
86 | # time-series (e.g., the difference between successive time-stamps).
87 | # Options: (numeric)
88 | # 0 - auto detect.
89 | # -1 - disable.
90 | PERIOD -1
91 |
92 | # Fill missing values.
93 | # Options: 0,1
94 | FILL_MISSING 0
95 |
96 | # NUM_WEEKS specifies the number of weeks
97 | # to use in OlympicScoring.
98 | NUM_WEEKS 8
99 |
100 | # NUM_TO_DROP specifies the number of
101 | # highest and lowest points to drop.
102 | NUM_TO_DROP 0
103 |
104 | # If dynamic parameters is set to 1, then
105 | # EGADS will dynamically vary parameters (NUM_WEEKS)
106 | # to produce the best fit.
107 | DYNAMIC_PARAMETERS 0
108 |
109 | ############################
110 | ### NaiveModel Config ###
111 | ############################
112 |
113 | # Window size where the spike is to be found.
114 | WINDOW_SIZE 0.1
115 |
116 | ###################################################
117 | ### ExtremeLowDensityModel & DBScanModel Config ###
118 | ###################################################
119 |
120 | # Denotes the expected % of anomalies
121 | # in your data.
122 | AUTO_SENSITIVITY_ANOMALY_PCNT 0.01
123 |
124 | # Refers to the cluster standard deviation.
125 | AUTO_SENSITIVITY_SD 3.0
126 |
127 | #######################################################
128 | ### AdaptiveKernelDensityChangePointDetector Config ###
129 | #######################################################
130 |
131 | # Change point detection parameters
132 | PRE_WINDOW_SIZE 48
133 | POST_WINDOW_SIZE 48
134 | CONFIDENCE 0.8
135 |
136 | ###############################
137 | ### SpectralSmoother Config ###
138 | ###############################
139 |
140 | # WINDOW_SIZE should be greater than the size of longest important seasonality.
141 | # By default it is set to 192 = 8 * 24 which is worth of 8 days (> 1 week) for hourly time-series.
142 | WINDOW_SIZE 192
143 |
144 | # FILTERING_METHOD specifies the filtering method for Spectral Smoothing
145 | # Options: GAP_RATIO (Recommended: FILTERING_PARAM = 0.01)
146 | # EIGEN_RATIO (Recommended: FILTERING_PARAM = 0.1)
147 | # EXPLICIT (Recommended: FILTERING_PARAM = 10)
148 | # K_GAP (Recommended: FILTERING_PARAM = 8)
149 | # VARIANCE (Recommended: FILTERING_PARAM = 0.99)
150 | # SMOOTHNESS (Recommended: FILTERING_PARAM = 0.97)
151 | FILTERING_METHOD GAP_RATIO
152 |
153 | FILTERING_PARAM 0.01
154 |
155 | ##############################################
156 | ### TripleExponentialSmoothingModel Config ###
157 | ##############################################
158 |
159 | # Smoothing factor for the level component
160 | ALPHA 0.75
161 |
162 | # Smoothing factor for the trend component
163 | BETA 0.001
164 |
165 | # Smoothing factor for the seasonal component
166 | GAMMA 0.001
167 |
--------------------------------------------------------------------------------
/src/main/java/com/yahoo/egads/utilities/ListUtils.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2015, Yahoo Inc.
3 | * Copyrights licensed under the GPL License.
4 | * See the accompanying LICENSE file for terms.
5 | */
6 |
7 | // ListUtils provides some basic math operation on numerical lists
8 |
9 | package com.yahoo.egads.utilities;
10 |
11 | import java.util.Collections;
12 | import java.util.Iterator;
13 | import java.util.LinkedList;
14 | import java.util.List;
15 |
16 | public class ListUtils {
17 |
18 | public static float sumQ(List Q) {
19 | float res = 0;
20 | for (float f : Q) {
21 | res += f;
22 | }
23 |
24 | return res;
25 | }
26 |
27 | public static float sum2Q(List Q) {
28 | float res = 0;
29 | for (float f : Q) {
30 | res += (f * f);
31 | }
32 |
33 | return res;
34 | }
35 |
36 | public static void repQ(LinkedList Q, float value, int repitition) {
37 | Q.clear();
38 | for (int i = 0; i < repitition; ++i) {
39 | Q.addLast(value);
40 | }
41 | }
42 |
43 | public static float kernelSum(float x, List mean, List sds) {
44 | float res = 0;
45 | float sd = 1;
46 | Iterator it = sds.iterator();
47 | for (float m : mean) {
48 | if (it.hasNext()) {
49 | sd = it.next();
50 | }
51 |
52 | res += Math.exp(-Math.pow(x - m, 2) / (2 * Math.pow(sd, 2))) / (sd * Math.sqrt(2 * Math.PI));
53 | }
54 |
55 | return res;
56 | }
57 |
58 | public static float kernelSubSum(float x, List mean, List sds, int from, int to) {
59 | float res = 0;
60 | float sd = 1;
61 | Iterator it = sds.iterator();
62 | int i = 0;
63 |
64 | for (float m : mean) {
65 | if (it.hasNext()) {
66 | sd = it.next();
67 | }
68 |
69 | if (i >= from && i <= to) {
70 | res += Math.exp(-Math.pow(x - m, 2) / (2 * Math.pow(sd, 2))) / (sd * Math.sqrt(2 * Math.PI));
71 | }
72 |
73 | i++;
74 | if (i > to) {
75 | break;
76 | }
77 | }
78 |
79 | return res;
80 | }
81 |
82 | public static float sumLog(List Q) {
83 | float res = 0;
84 |
85 | for (float f : Q) {
86 | res += Math.log(f);
87 | }
88 |
89 | return res;
90 | }
91 |
92 | public static LinkedList kernelQ(List Q, List mean, List sds) {
93 | LinkedList res = new LinkedList();
94 | float sd = 1;
95 | float m = 0;
96 | Iterator it1 = mean.iterator();
97 | Iterator it2 = sds.iterator();
98 |
99 | for (float x : Q) {
100 | if (it1.hasNext()) {
101 | m = it1.next();
102 | }
103 |
104 | if (it2.hasNext()) {
105 | sd = it2.next();
106 | }
107 |
108 | res.addLast((float) (Math.exp(-Math.pow(x - m, 2) / (2 * Math.pow(sd, 2))) / (sd * Math.sqrt(2 * Math.PI))));
109 | }
110 |
111 | return res;
112 | }
113 |
114 | public static void addQ(LinkedList Q, List P) {
115 | Iterator it = P.iterator();
116 | float p = 0;
117 | LinkedList temp = new LinkedList();
118 |
119 | for (float q : Q) {
120 | if (it.hasNext()) {
121 | p = it.next();
122 | }
123 |
124 | temp.addLast(p + q);
125 | }
126 |
127 | Q.clear();
128 | Q.addAll(temp);
129 | }
130 |
131 | public static void subtractQ(LinkedList Q, List P) {
132 | Iterator it = P.iterator();
133 | float p = 0;
134 | LinkedList temp = new LinkedList();
135 |
136 | for (float q : Q) {
137 | if (it.hasNext()) {
138 | p = it.next();
139 | }
140 |
141 | temp.addLast(q - p);
142 | }
143 |
144 | Q.clear();
145 | Q.addAll(temp);
146 | }
147 |
148 | public static void multiplyQ(LinkedList Q, List P) {
149 | Iterator it = P.iterator();
150 | float p = 0;
151 | LinkedList temp = new LinkedList();
152 |
153 | for (float q : Q) {
154 | if (it.hasNext()) {
155 | p = it.next();
156 | }
157 |
158 | temp.addLast(q * p);
159 | }
160 |
161 | Q.clear();
162 | Q.addAll(temp);
163 | }
164 |
165 | public static LinkedList maxQ(List Q, float m) {
166 | LinkedList temp = new LinkedList();
167 |
168 | for (float q : Q) {
169 | temp.addLast(Math.max(q, m));
170 | }
171 |
172 | return temp;
173 | }
174 |
175 | public static float quantile(List Q, float probability) {
176 | Collections.sort(Q);
177 | int n = Q.size();
178 |
179 | float index = (n - 1) * probability;
180 | int lo = (int) Math.floor(index);
181 | int hi = (int) Math.ceil(index);
182 | float h = index - lo;
183 | float qs = (1 - h) * Q.get(lo) + h * Q.get(hi);
184 |
185 | return qs;
186 | }
187 | }
188 |
--------------------------------------------------------------------------------
/src/main/java/com/yahoo/egads/models/tsmm/TripleExponentialSmoothingModel.java:
--------------------------------------------------------------------------------
1 | //
2 | // OpenForecast - open source, general-purpose forecasting package.
3 | // Copyright (C) 2002-2011 Steven R. Gould
4 | //
5 | // This library is free software; you can redistribute it and/or
6 | // modify it under the terms of the GNU Lesser General Public
7 | // License as published by the Free Software Foundation; either
8 | // version 2.1 of the License, or (at your option) any later version.
9 | //
10 | // This library is distributed in the hope that it will be useful,
11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 | // Lesser General Public License for more details.
14 | //
15 | // You should have received a copy of the GNU Lesser General Public
16 | // License along with this library; if not, write to the Free Software
17 | // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 | //
19 |
20 | // Olympic scoring model considers the average of the last k weeks
21 | // (dropping the b highest and lowest values) as the current prediction.
22 |
23 | package com.yahoo.egads.models.tsmm;
24 |
25 | import com.yahoo.egads.data.*;
26 | import com.yahoo.egads.data.TimeSeries.Entry;
27 | import org.json.JSONObject;
28 | import org.json.JSONStringer;
29 | import java.util.Properties;
30 | import net.sourceforge.openforecast.DataSet;
31 | import net.sourceforge.openforecast.ForecastingModel;
32 | import net.sourceforge.openforecast.DataPoint;
33 | import net.sourceforge.openforecast.Observation;
34 | import java.util.*;
35 |
36 | // Triple exponential smoothing - also known as the Winters method - is a refinement of the popular double exponential
37 | // smoothing model but adds another component which takes into account any seasonality - or periodicity - in the data.
38 | public class TripleExponentialSmoothingModel extends TimeSeriesAbstractModel {
39 | // methods ////////////////////////////////////////////////
40 |
41 | // The model that will be used for forecasting.
42 | private ForecastingModel forecaster;
43 |
44 | // Stores the historical values.
45 | private TimeSeries.DataSequence data;
46 |
47 | //Store the smoothing factors for level, trend and seasonality
48 | private final double alpha;
49 | private final double beta;
50 | private final double gamma;
51 |
52 | public TripleExponentialSmoothingModel(Properties config) {
53 | super(config);
54 |
55 | String temp = config.getProperty("ALPHA", "0.75");
56 | if (temp == null || temp.isEmpty()) {
57 | throw new IllegalArgumentException("ALPHA is required, "
58 | + "e.g. 0.2 or 0.5");
59 | }
60 | alpha = Double.parseDouble(temp);
61 | temp = config.getProperty("BETA", "0.001");
62 | if (temp == null || temp.isEmpty()) {
63 | throw new IllegalArgumentException("BETA is required, "
64 | + "e.g. 0.2 or 0.5");
65 | }
66 | beta = Double.parseDouble(temp);
67 | temp = config.getProperty("GAMMA", "0.001");
68 | if (temp == null || temp.isEmpty()) {
69 | throw new IllegalArgumentException("GAMMA is required, "
70 | + "e.g. 0.2 or 0.5");
71 | }
72 | gamma = Double.parseDouble(temp);
73 | modelName = "TripleExponentialSmoothingModel";
74 | }
75 |
76 | public void reset() {
77 | // At this point, reset does nothing.
78 | }
79 |
80 | public void train(TimeSeries.DataSequence data) {
81 | this.data = data;
82 | int n = data.size();
83 | DataPoint dp = null;
84 | DataSet observedData = new DataSet();
85 | for (int i = 0; i < n; i++) {
86 | dp = new Observation(data.get(i).value);
87 | dp.setIndependentValue("x", i);
88 | observedData.add(dp);
89 | }
90 | observedData.setTimeVariable("x");
91 | observedData.setPeriodsPerYear(12);
92 |
93 | forecaster = new net.sourceforge.openforecast.models.TripleExponentialSmoothingModel(alpha, beta, gamma);
94 | forecaster.init(observedData);
95 | initForecastErrors(forecaster, data);
96 |
97 | logger.debug(getBias() + "\t" + getMAD() + "\t" + getMAPE() + "\t" + getMSE() + "\t" + getSAE() + "\t" + 0 + "\t" + 0);
98 | }
99 |
100 | public void update(TimeSeries.DataSequence data) {
101 |
102 | }
103 |
104 | public String getModelName() {
105 | return modelName;
106 | }
107 |
108 | public void predict(TimeSeries.DataSequence sequence) throws Exception {
109 | int n = data.size();
110 | DataSet requiredDataPoints = new DataSet();
111 | DataPoint dp;
112 |
113 | for (int count = 0; count < n; count++) {
114 | dp = new Observation(0.0);
115 | dp.setIndependentValue("x", count);
116 | requiredDataPoints.add(dp);
117 | }
118 | forecaster.forecast(requiredDataPoints);
119 |
120 | // Output the results
121 | Iterator it = requiredDataPoints.iterator();
122 | int i = 0;
123 | while (it.hasNext()) {
124 | DataPoint pnt = ((DataPoint) it.next());
125 | logger.info(data.get(i).time + "," + data.get(i).value + "," + pnt.getDependentValue());
126 | sequence.set(i, (new Entry(data.get(i).time, (float) pnt.getDependentValue())));
127 | i++;
128 | }
129 | }
130 |
131 | public void toJson(JSONStringer json_out) {
132 |
133 | }
134 |
135 | public void fromJson(JSONObject json_obj) {
136 |
137 | }
138 | }
139 |
--------------------------------------------------------------------------------
/src/test/java/com/yahoo/egads/TestOlympicModel.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2015, Yahoo Inc.
3 | * Copyrights licensed under the GPL License.
4 | * See the accompanying LICENSE file for terms.
5 | */
6 |
7 | package com.yahoo.egads;
8 |
9 | import com.yahoo.egads.data.Model;
10 | import com.yahoo.egads.models.tsmm.TimeSeriesAbstractModel;
11 | import com.yahoo.egads.models.tsmm.OlympicModel;
12 | import com.yahoo.egads.models.tsmm.MovingAverageModel;
13 | import com.yahoo.egads.utilities.*;
14 | import com.yahoo.egads.data.*;
15 | import java.util.ArrayList;
16 | import java.util.Properties;
17 | import java.io.FileInputStream;
18 | import java.io.InputStream;
19 | import com.yahoo.egads.control.ProcessableObject;
20 | import com.yahoo.egads.control.ProcessableObjectFactory;
21 | import org.testng.Assert;
22 | import org.testng.annotations.Test;
23 |
24 | // Tests the correct generation of the expected values for olympic
25 | // scoring.
26 | public class TestOlympicModel {
27 |
28 | @Test
29 | public void testOlympicModel() throws Exception {
30 | // Test cases: ref window: 10, 5
31 | // Drops: 0, 1
32 | String[] refWindows = new String[]{"10", "5"};
33 | String[] drops = new String[]{"0", "1"};
34 | // Load the true expected values from a file.
35 | String configFile = "src/test/resources/sample_config.ini";
36 | InputStream is = new FileInputStream(configFile);
37 | Properties p = new Properties();
38 | p.load(is);
39 | ArrayList actual_metric = com.yahoo.egads.utilities.FileUtils
40 | .createTimeSeries("src/test/resources/model_input.csv", p);
41 |
42 | for (int w = 0; w < refWindows.length; w++) {
43 | for (int d = 0; d < drops.length; d++) {
44 | p.setProperty("NUM_WEEKS", refWindows[w]);
45 | p.setProperty("NUM_TO_DROP", drops[d]);
46 | // Parse the input timeseries.
47 | ArrayList metrics = com.yahoo.egads.utilities.FileUtils
48 | .createTimeSeries("src/test/resources/model_output_" + refWindows[w] + "_" + drops[d] + ".csv", p);
49 | OlympicModel model = new OlympicModel(p);
50 | model.train(actual_metric.get(0).data);
51 | TimeSeries.DataSequence sequence = new TimeSeries.DataSequence(metrics.get(0).startTime(),
52 | metrics.get(0).lastTime(),
53 | 3600);
54 |
55 |
56 | sequence.setLogicalIndices(metrics.get(0).startTime(), 3600);
57 | model.predict(sequence);
58 | Assert.assertEquals(verifyResults(sequence, metrics.get(0).data), true);
59 | }
60 | }
61 | }
62 |
63 | // Verifies that the two time-series are identical.
64 | private boolean verifyResults (TimeSeries.DataSequence computed, TimeSeries.DataSequence actual) {
65 | int n = computed.size();
66 | int n2 = actual.size();
67 | if (n != n2) {
68 | return false;
69 | }
70 | float precision = (float) 0.000001;
71 | for (int i = 0; i < n; i++) {
72 | if (Math.abs(computed.get(i).value - actual.get(i).value) > precision) {
73 | return false;
74 | }
75 | }
76 | return true;
77 | }
78 |
79 | @Test
80 | public void testForecastErrors() throws Exception {
81 | String configFile = "src/test/resources/sample_config.ini";
82 | InputStream is = new FileInputStream(configFile);
83 | Properties p = new Properties();
84 | p.load(is);
85 | ArrayList actual_metric = com.yahoo.egads.utilities.FileUtils
86 | .createTimeSeries("src/test/resources/model_input.csv", p);
87 | OlympicModel olympicModel = new OlympicModel(p);
88 | olympicModel.train(actual_metric.get(0).data);
89 |
90 | Assert.assertEquals(olympicModel.getBias(), -26.315675155416635, 1e-10);
91 | Assert.assertEquals(olympicModel.getMAD(), 28.81582062080335, 1e-10);
92 | Assert.assertEquals(Double.isNaN(olympicModel.getMAPE()), true);
93 | Assert.assertEquals(olympicModel.getMSE(), 32616.547275296416, 1e-7);
94 | Assert.assertEquals(olympicModel.getSAE(), 41033.72856402397, 1e-7);
95 | }
96 |
97 | @Test
98 | public void testBetterThan() throws Exception {
99 | String configFile = "src/test/resources/sample_config.ini";
100 | InputStream is = new FileInputStream(configFile);
101 | Properties p = new Properties();
102 | p.load(is);
103 | ArrayList actual_metric = com.yahoo.egads.utilities.FileUtils
104 | .createTimeSeries("src/test/resources/model_input.csv", p);
105 | OlympicModel olympicModel = new OlympicModel(p);
106 | olympicModel.train(actual_metric.get(0).data);
107 |
108 | MovingAverageModel movingAverageModel = new MovingAverageModel(p);
109 | movingAverageModel.train(actual_metric.get(0).data);
110 |
111 | // movingAverageModel is better than olympicModel
112 | Assert.assertEquals(TimeSeriesAbstractModel.betterThan(movingAverageModel, olympicModel), true);
113 | Assert.assertEquals(TimeSeriesAbstractModel.betterThan(movingAverageModel, movingAverageModel), false);
114 | Assert.assertEquals(TimeSeriesAbstractModel.betterThan(olympicModel, movingAverageModel), false);
115 | Assert.assertEquals(TimeSeriesAbstractModel.betterThan(olympicModel, olympicModel), false);
116 | }
117 | }
118 |
--------------------------------------------------------------------------------
/src/main/java/com/yahoo/egads/models/adm/KSigmaModel.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2015, Yahoo Inc.
3 | * Copyrights licensed under the GPL License.
4 | * See the accompanying LICENSE file for terms.
5 | */
6 |
7 | // A simple thresholding model that returns an anomaly if it is above/below a certain threashold.
8 |
9 | package com.yahoo.egads.models.adm;
10 |
11 | import java.util.Properties;
12 | import java.util.Map;
13 | import java.util.HashMap;
14 | import java.util.ArrayList;
15 |
16 | import com.yahoo.egads.data.Anomaly.IntervalSequence;
17 | import com.yahoo.egads.data.Anomaly.Interval;
18 | import com.yahoo.egads.data.TimeSeries.DataSequence;
19 | import com.yahoo.egads.utilities.AutoSensitivity;
20 | import com.yahoo.egads.data.AnomalyErrorStorage;
21 |
22 | import org.json.JSONObject;
23 | import org.json.JSONStringer;
24 |
25 | public class KSigmaModel extends AnomalyDetectionAbstractModel {
26 |
27 | // The constructor takes a set of properties
28 | // needed for the simple model. This includes the sensitivity.
29 | private Map threshold;
30 | private int maxHrsAgo;
31 | private long windowStart;
32 | // modelName.
33 | public String modelName = "KSigmaModel";
34 | public AnomalyErrorStorage aes = new AnomalyErrorStorage();
35 |
36 | public KSigmaModel(Properties config) {
37 | super(config);
38 |
39 | if (config.getProperty("MAX_ANOMALY_TIME_AGO") == null) {
40 | throw new IllegalArgumentException("MAX_ANOMALY_TIME_AGO is NULL");
41 | }
42 | this.maxHrsAgo = new Integer(config.getProperty("MAX_ANOMALY_TIME_AGO"));
43 |
44 | this.windowStart = new Long(config.getProperty("DETECTION_WINDOW_START_TIME"));
45 |
46 | this.threshold = parseMap(config.getProperty("THRESHOLD"));
47 |
48 | if (config.getProperty("THRESHOLD") != null && this.threshold.isEmpty() == true) {
49 | throw new IllegalArgumentException("THRESHOLD PARSE ERROR");
50 | }
51 | }
52 |
53 | public void toJson(JSONStringer json_out) {
54 |
55 | }
56 |
57 | public void fromJson(JSONObject json_obj) {
58 |
59 | }
60 |
61 | public String getModelName() {
62 | return modelName;
63 | }
64 |
65 | @Override
66 | public String getType() {
67 | return "point_outlier";
68 | }
69 |
70 | @Override
71 | public void reset() {
72 | // At this point, reset does nothing.
73 | }
74 |
75 | @Override
76 | public void tune(DataSequence observedSeries, DataSequence expectedSeries) throws Exception {
77 | HashMap> allErrors = aes.initAnomalyErrors(observedSeries, expectedSeries);
78 |
79 | for (int i = 0; i < (aes.getIndexToError().keySet()).size(); i++) {
80 | // Add a new error metric if the error metric has not been
81 | // defined by the user.
82 | if (!threshold.containsKey(aes.getIndexToError().get(i))) {
83 | Float[] fArray = (allErrors.get(aes.getIndexToError().get(i))).toArray(new Float[(allErrors.get(aes.getIndexToError().get(i))).size()]);
84 | threshold.put(aes.getIndexToError().get(i), AutoSensitivity.getKSigmaSensitivity(fArray, sDAutoSensitivity));
85 | }
86 | }
87 | }
88 |
89 | // Returns true this point is identified as a potential anomaly.
90 | public boolean isAnomaly(Float[] errors, Map threshold) {
91 | // Cycle through all available thresholds and return
92 | // true if any of them matches.
93 | for (Map.Entry entry : threshold.entrySet()) {
94 | // disable mapee and mape.
95 | if (aes.getErrorToIndex().containsKey(entry.getKey()) == true &&
96 | Math.abs(errors[aes.getErrorToIndex().get(entry.getKey())]) >= Math.abs(entry.getValue())) {
97 | return true;
98 | }
99 | }
100 | return false;
101 | }
102 |
103 | @Override
104 | public IntervalSequence detect(DataSequence observedSeries,
105 | DataSequence expectedSeries) throws Exception {
106 |
107 | // At detection time, the anomaly thresholds shouldn't all be 0.
108 | Float threshSum = (float) 0.0;
109 | for (Map.Entry entry : this.threshold.entrySet()) {
110 | threshSum += Math.abs(entry.getValue());
111 | }
112 |
113 | // Get an array of thresholds.
114 | Float[] thresholdErrors = new Float[aes.getErrorToIndex().size()];
115 | for (Map.Entry entry : this.threshold.entrySet()) {
116 | thresholdErrors[aes.getErrorToIndex().get(entry.getKey())] = Math.abs(entry.getValue());
117 | }
118 |
119 | IntervalSequence output = new IntervalSequence();
120 | int n = observedSeries.size();
121 |
122 | for (int i = 0; i < n; i++) {
123 | Float[] errors = aes.computeErrorMetrics(expectedSeries.get(i).value, observedSeries.get(i).value);
124 | logger.debug("TS:" + observedSeries.get(i).time + ",E:" + arrayF2S(errors) + ",TE:" + arrayF2S(thresholdErrors) + ",OV:" + observedSeries.get(i).value + ",EV:" + expectedSeries.get(i).value);
125 | if (observedSeries.get(i).value != expectedSeries.get(i).value &&
126 | threshSum > (float) 0.0 &&
127 | isAnomaly(errors, threshold) == true &&
128 | (isDetectionWindowPoint(maxHrsAgo, windowStart, observedSeries.get(i).time, observedSeries.get(0).time) ||
129 | (maxHrsAgo == 0 && i == (n - 1)))) {
130 | output.add(new Interval(observedSeries.get(i).time,
131 | i,
132 | errors,
133 | thresholdErrors,
134 | observedSeries.get(i).value,
135 | expectedSeries.get(i).value));
136 | }
137 | }
138 | return output;
139 | }
140 | }
141 |
--------------------------------------------------------------------------------
/src/main/java/com/yahoo/egads/models/adm/ExtremeLowDensityModel.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2015, Yahoo Inc.
3 | * Copyrights licensed under the GPL License.
4 | * See the accompanying LICENSE file for terms.
5 | */
6 |
7 | // A simple thresholding model that returns an anomaly if it is above/below a certain threashold.
8 |
9 | package com.yahoo.egads.models.adm;
10 |
11 | import java.util.Properties;
12 | import java.util.Map;
13 | import java.util.HashMap;
14 | import java.util.ArrayList;
15 | import com.yahoo.egads.data.Anomaly.IntervalSequence;
16 | import com.yahoo.egads.data.Anomaly.Interval;
17 | import com.yahoo.egads.data.AnomalyErrorStorage;
18 | import com.yahoo.egads.data.TimeSeries.DataSequence;
19 | import com.yahoo.egads.utilities.AutoSensitivity;
20 |
21 | import org.json.JSONObject;
22 | import org.json.JSONStringer;
23 |
24 | public class ExtremeLowDensityModel extends AnomalyDetectionAbstractModel {
25 |
26 | // The constructor takes a set of properties
27 | // needed for the simple model. This includes the sensitivity.
28 | private Map threshold;
29 | private int maxHrsAgo;
30 | private long windowStart;
31 | // modelName.
32 | public String modelName = "ExtremeLowDensityModel";
33 | public AnomalyErrorStorage aes = new AnomalyErrorStorage();
34 |
35 | public ExtremeLowDensityModel(Properties config) {
36 | super(config);
37 |
38 | if (config.getProperty("MAX_ANOMALY_TIME_AGO") == null) {
39 | throw new IllegalArgumentException("MAX_ANOMALY_TIME_AGO is NULL");
40 | }
41 | this.maxHrsAgo = new Integer(config.getProperty("MAX_ANOMALY_TIME_AGO"));
42 |
43 | this.windowStart = new Long(config.getProperty("DETECTION_WINDOW_START_TIME"));
44 |
45 | this.threshold = parseMap(config.getProperty("THRESHOLD"));
46 |
47 | if (config.getProperty("THRESHOLD") != null && this.threshold.isEmpty() == true) {
48 | throw new IllegalArgumentException("THRESHOLD PARSE ERROR");
49 | }
50 | }
51 |
52 | public void toJson(JSONStringer json_out) {
53 |
54 | }
55 |
56 | public void fromJson(JSONObject json_obj) {
57 |
58 | }
59 |
60 | public String getModelName() {
61 | return modelName;
62 | }
63 |
64 | @Override
65 | public String getType() {
66 | return "point_outlier";
67 | }
68 |
69 | @Override
70 | public void reset() {
71 | // At this point, reset does nothing.
72 | }
73 |
74 | @Override
75 | public void tune(DataSequence observedSeries,
76 | DataSequence expectedSeries) throws Exception {
77 | // Compute the time-series of errors.
78 | HashMap> allErrors = aes.initAnomalyErrors(observedSeries, expectedSeries);
79 |
80 | for (int i = 0; i < (aes.getIndexToError().keySet()).size(); i++) {
81 | // Add a new error metric if the error metric has not been
82 | // defined by the user.
83 | if (!threshold.containsKey(aes.getIndexToError().get(i))) {
84 | Float[] fArray = (allErrors.get(aes.getIndexToError().get(i))).toArray(new Float[(allErrors.get(aes.getIndexToError().get(i))).size()]);
85 | threshold.put(aes.getIndexToError().get(i), AutoSensitivity.getLowDensitySensitivity(fArray, sDAutoSensitivity, amntAutoSensitivity));
86 | }
87 | }
88 | }
89 |
90 | // Returns true this point is identified as a potential anomaly.
91 | public boolean isAnomaly(Float[] errors, Map threshold) {
92 | // Cycle through all available thresholds and return
93 | // true if any of them matches.
94 | for (Map.Entry entry : threshold.entrySet()) {
95 | // disable mapee and mape.
96 | if (aes.getErrorToIndex().containsKey(entry.getKey()) == true &&
97 | Math.abs(errors[aes.getErrorToIndex().get(entry.getKey())]) >= Math.abs(entry.getValue())) {
98 | return true;
99 | }
100 | }
101 | return false;
102 | }
103 |
104 | @Override
105 | public IntervalSequence detect(DataSequence observedSeries,
106 | DataSequence expectedSeries) throws Exception {
107 |
108 | // At detection time, the anomaly thresholds shouldn't all be 0.
109 | Float threshSum = (float) 0.0;
110 | for (Map.Entry entry : this.threshold.entrySet()) {
111 | threshSum += Math.abs(entry.getValue());
112 | }
113 |
114 | // Get an array of thresholds.
115 | Float[] thresholdErrors = new Float[aes.getErrorToIndex().size()];
116 | for (Map.Entry entry : this.threshold.entrySet()) {
117 | thresholdErrors[aes.getErrorToIndex().get(entry.getKey())] = Math.abs(entry.getValue());
118 | }
119 |
120 | IntervalSequence output = new IntervalSequence();
121 | int n = observedSeries.size();
122 |
123 | for (int i = 0; i < n; i++) {
124 | Float[] errors = aes.computeErrorMetrics(expectedSeries.get(i).value, observedSeries.get(i).value);
125 | logger.debug("TS:" + observedSeries.get(i).time + ",E:" + arrayF2S(errors) + ",TE:" + arrayF2S(thresholdErrors) + ",OV:" + observedSeries.get(i).value + ",EV:" + expectedSeries.get(i).value);
126 | if (observedSeries.get(i).value != expectedSeries.get(i).value &&
127 | threshSum > (float) 0.0 &&
128 | isAnomaly(errors, threshold) == true &&
129 | (isDetectionWindowPoint(maxHrsAgo, windowStart, observedSeries.get(i).time, observedSeries.get(0).time) ||
130 | (maxHrsAgo == 0 && i == (n - 1)))) {
131 | output.add(new Interval(observedSeries.get(i).time,
132 | i,
133 | errors,
134 | thresholdErrors,
135 | observedSeries.get(i).value,
136 | expectedSeries.get(i).value));
137 | }
138 | }
139 | return output;
140 | }
141 | }
142 |
--------------------------------------------------------------------------------
/src/main/java/com/yahoo/egads/data/JsonEncoder.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2015, Yahoo Inc.
3 | * Copyrights licensed under the GPL License.
4 | * See the accompanying LICENSE file for terms.
5 | */
6 |
7 | // function class
8 | // Helps any object encode its member variables as JSON.
9 | // Any JsonAble object can customize its encoding in its own to/fromJson() function
10 |
11 | package com.yahoo.egads.data;
12 |
13 | import java.lang.reflect.Constructor;
14 | import java.lang.reflect.Field;
15 | import java.lang.reflect.Modifier;
16 | import java.lang.reflect.ParameterizedType;
17 | import java.util.ArrayList;
18 | import java.util.Iterator;
19 | import org.json.JSONArray;
20 | import org.json.JSONStringer;
21 | import org.json.JSONObject;
22 |
23 | public class JsonEncoder {
24 |
25 | // methods ////////////////////////////////////////////////
26 |
27 | public static String toJson(Object object) throws Exception {
28 | JSONStringer jsonOut = new JSONStringer();
29 | toJson(object, jsonOut);
30 | return jsonOut.toString();
31 | }
32 |
33 | public static void // modifies json_out
34 | toJson(Object object, JSONStringer json_out) throws Exception {
35 | json_out.object();
36 | // for each inherited class...
37 | for (Class c = object.getClass(); c != Object.class; c = c
38 | .getSuperclass()) {
39 | // for each member variable...
40 | Field[] fields = c.getDeclaredFields();
41 | for (Field f : fields) {
42 | // if variable is static/private... skip it
43 | if (Modifier.isStatic(f.getModifiers())) {
44 | continue;
45 | }
46 | if (Modifier.isPrivate(f.getModifiers())) {
47 | continue;
48 | }
49 | Object value = f.get(object);
50 |
51 | // if variable is a complex type... recurse on sub-objects
52 | if (value instanceof JsonAble) {
53 | json_out.key(f.getName());
54 | ((JsonAble) value).toJson(json_out);
55 | // if variable is an array... recurse on sub-objects
56 | } else if (value instanceof ArrayList) {
57 | json_out.key(f.getName());
58 | json_out.array();
59 | for (Object e : (ArrayList) value) {
60 | toJson(e, json_out);
61 | }
62 | json_out.endArray();
63 | // if variable is a simple type... convert to json
64 | } else {
65 | json_out.key(f.getName()).value(value);
66 | }
67 | }
68 | }
69 | json_out.endObject();
70 | }
71 |
72 | public static void fromJson(Object object, String json_str)
73 | throws Exception {
74 | JSONObject jsonObj = new JSONObject(json_str);
75 | fromJson(object, jsonObj);
76 | }
77 |
78 | public static void fromJson(Object object, JSONObject json_obj)
79 | throws Exception {
80 | // for each json key-value, that has a corresponding variable in object ...
81 | for (Iterator k = json_obj.keys(); k.hasNext();) {
82 | String key = (String) k.next();
83 | Object value = json_obj.get(key);
84 |
85 | // try to access object variable
86 | Field field = null;
87 | try {
88 | field = object.getClass().getField(key);
89 | } catch (Exception e) {
90 | continue;
91 | }
92 | if (Modifier.isStatic(field.getModifiers())) {
93 | continue;
94 | }
95 | if (Modifier.isPrivate(field.getModifiers())) {
96 | continue;
97 | }
98 | Object member = field.get(object);
99 |
100 | if (json_obj.isNull(key)) {
101 | field.set(object, null);
102 | continue;
103 | // if variable is container... recurse
104 | } else if (member instanceof JsonAble) {
105 | ((JsonAble) member).fromJson((JSONObject) value);
106 | // if variable is an array... recurse on sub-objects
107 | } else if (member instanceof ArrayList) {
108 | // Depends on existance of ArrayList template parameter, and T constructor with no arguments.
109 | // May be better to use custom fromJson() in member class.
110 | ArrayList memberArray = (ArrayList) member;
111 | JSONArray jsonArray = (JSONArray) value;
112 |
113 | // find array element constructor
114 | ParameterizedType arrayType = null;
115 | if (field.getGenericType() instanceof ParameterizedType) {
116 | arrayType = (ParameterizedType) field.getGenericType();
117 | }
118 | for (Class c = member.getClass(); arrayType == null
119 | && c != null; c = c.getSuperclass()) {
120 | if (c.getGenericSuperclass() instanceof ParameterizedType) {
121 | arrayType = (ParameterizedType) c
122 | .getGenericSuperclass();
123 | }
124 | }
125 | if (arrayType == null) {
126 | throw new Exception(
127 | "could not find ArrayList element type for field 'key'");
128 | }
129 | Class elementClass = (Class) (arrayType
130 | .getActualTypeArguments()[0]);
131 | Constructor elementConstructor = elementClass.getConstructor();
132 |
133 | // for each element in JSON array ... append element to member array, recursively decode element
134 | for (int i = 0; i < jsonArray.length(); ++i) {
135 | Object element = elementConstructor.newInstance();
136 | fromJson(element, jsonArray.getJSONObject(i));
137 | memberArray.add(element);
138 | }
139 | // if variable is simple value... set
140 | } else if (field.getType() == float.class) {
141 | field.set(object, (float) json_obj.getDouble(key));
142 | } else {
143 | field.set(object, value);
144 | }
145 | }
146 | }
147 | }
148 |
--------------------------------------------------------------------------------
/src/main/java/com/yahoo/egads/data/WeightedValue.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2016, Yahoo Inc.
3 | * Copyrights licensed under the GPL License.
4 | * See the accompanying LICENSE file for terms.
5 | */
6 | package com.yahoo.egads.data;
7 |
8 | import java.util.Collections;
9 | import java.util.List;
10 |
11 | import com.google.common.collect.Lists;
12 |
13 | /**
14 | * Class for holding a weighted value that can then be aggregated when stored
15 | * in a list using various functions.
16 | */
17 | public class WeightedValue implements Comparable {
18 | private final double value;
19 | private final int weight;
20 |
21 | /**
22 | * Default ctor.
23 | * @param value The value of the data point.
24 | * @param weight A weight for the data point.
25 | */
26 | public WeightedValue(final double value, final int weight) {
27 | this.value = value;
28 | this.weight = weight;
29 | }
30 |
31 | @Override
32 | public int compareTo(final WeightedValue other) {
33 | return Double.compare(value, other.value);
34 | }
35 |
36 | /**
37 | * Drops as many of the highest or lowest values as possible, leaving
38 | * at least one value in the list.
39 | * @param accumulator A non-null accumulator list.
40 | * @param count A count of 1 or more.
41 | * @param highest Drop higher values == true or drop lower values == false.
42 | */
43 | public static void drop(final List accumulator,
44 | final int count, final boolean highest) {
45 | for (int x = 0; x < count; x++) {
46 | if (accumulator.size() <= 1) {
47 | break;
48 | }
49 | if (highest) {
50 | accumulator.remove(Collections.max(accumulator));
51 | } else {
52 | accumulator.remove(Collections.min(accumulator));
53 | }
54 | }
55 | }
56 |
57 | /** @return The data point value. */
58 | public double getValue() {
59 | return value;
60 | }
61 |
62 | /** @return The weight for the data point. */
63 | public int getWeight() {
64 | return weight;
65 | }
66 |
67 | /**
68 | * Aggregates the values in the list using the given agg function.
69 | * For all functions, NaNs are skipped so if an entire list is NaN'd or the
70 | * list is empty, the results will be a NaN.
71 | * @param values A non-null list of values to aggregate.
72 | * @param agg A non-null or empty aggregator function to use.
73 | * @return An aggregated value or NaN.
74 | * @throws IllegalArgumentException if the values was null, agg was null
75 | * or empty or we had an unimplemented agg function.
76 | */
77 | public static double aggregate(final List values,
78 | final String agg) {
79 | if (agg == null || agg.isEmpty()) {
80 | throw new IllegalArgumentException("Aggregator cannot be null or empty");
81 | }
82 | if (values == null) {
83 | throw new IllegalArgumentException("Values cannot be null");
84 | }
85 | if (values.isEmpty()) {
86 | if (agg.equals("COUNT")) {
87 | return 0;
88 | }
89 | return Double.NaN;
90 | }
91 |
92 | // temps.
93 | int validCount = 0;
94 | double accumulator = 0;
95 |
96 | if (agg.equals("MAX")) {
97 | accumulator = Double.MIN_VALUE;
98 | for (final WeightedValue v : values) {
99 | if (Double.isFinite(v.value)) {
100 | if (v.value > accumulator) {
101 | accumulator = v.value;
102 | }
103 | ++validCount;
104 | }
105 | }
106 | if (validCount < 1) {
107 | return Double.NaN;
108 | }
109 | return accumulator;
110 | } else if (agg.equals("MIN")) {
111 | accumulator = Double.MAX_VALUE;
112 | for (final WeightedValue v : values) {
113 | if (Double.isFinite(v.value)) {
114 | if (v.value < accumulator) {
115 | accumulator = v.value;
116 | }
117 | ++validCount;
118 | }
119 | }
120 | if (validCount < 1) {
121 | return Double.NaN;
122 | }
123 | return accumulator;
124 | } else if (agg.equals("COUNT")) {
125 | int ctr = 0;
126 | for (final WeightedValue v : values) {
127 | if (Double.isFinite(v.value)) {
128 | ++ctr;
129 | }
130 | }
131 | return ctr;
132 | } else if (agg.equals("MEDIAN")) {
133 | final List sorted = Lists.newArrayList();
134 | for (final WeightedValue v : values) {
135 | if (Double.isFinite(v.value)) {
136 | sorted.add(v.value);
137 | ++validCount;
138 | }
139 | }
140 | if (validCount < 1) {
141 | return Double.NaN;
142 | }
143 | Collections.sort(sorted);
144 | return sorted.get(sorted.size() / 2);
145 | } else if (agg.equals("SUM")) {
146 | for (final WeightedValue v : values) {
147 | if (Double.isFinite(v.value)) {
148 | accumulator += v.value;
149 | ++validCount;
150 | }
151 | }
152 | if (validCount < 1) {
153 | return Double.NaN;
154 | }
155 | return accumulator;
156 | } else if (agg.equals("AVG")) {
157 | for (final WeightedValue v : values) {
158 | if (Double.isFinite(v.value)) {
159 | accumulator += v.value;
160 | ++validCount;
161 | }
162 | }
163 | if (validCount < 1) {
164 | return Double.NaN;
165 | }
166 | return accumulator / validCount;
167 | } else if (agg.equals("WAVG")) {
168 | for (final WeightedValue v : values) {
169 | if (Double.isFinite(v.value)) {
170 | accumulator += v.weight * v.value;
171 | validCount += v.weight;
172 | }
173 | }
174 | if (validCount < 1) {
175 | return Double.NaN;
176 | }
177 | return accumulator / validCount;
178 | }
179 |
180 | throw new IllegalArgumentException("Unimplemented aggregation "
181 | + "function: " + agg);
182 | }
183 | }
184 |
--------------------------------------------------------------------------------
/src/main/java/com/yahoo/egads/utilities/FileUtils.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2015, Yahoo Inc.
3 | * Copyrights licensed under the GPL License.
4 | * See the accompanying LICENSE file for terms.
5 | */
6 |
7 | // A utility for creating an array of timeseries objects from the
8 | // csv file.
9 |
10 | package com.yahoo.egads.utilities;
11 |
12 | import com.yahoo.egads.data.TimeSeries;
13 | import java.util.StringTokenizer;
14 | import java.util.ArrayList;
15 | import java.io.BufferedReader;
16 | import java.io.FileReader;
17 | import java.io.IOException;
18 | import java.util.Properties;
19 |
20 | public class FileUtils {
21 |
22 | // Creates a time-series from a file.
23 | public static ArrayList createTimeSeries(String csv_file, Properties config) {
24 | // Input file which needs to be parsed
25 | String fileToParse = csv_file;
26 | BufferedReader fileReader = null;
27 | ArrayList output = new ArrayList();
28 |
29 | // Delimiter used in CSV file
30 | final String delimiter = ",";
31 | Long interval = null;
32 | Long prev = null;
33 | Integer aggr = 1;
34 | boolean fillMissing = false;
35 | if (config.getProperty("FILL_MISSING") != null && config.getProperty("FILL_MISSING").equals("1")) {
36 | fillMissing = true;
37 | }
38 | if (config.getProperty("AGGREGATION") != null) {
39 | aggr = new Integer(config.getProperty("AGGREGATION"));
40 | }
41 | try {
42 | String line = "";
43 | // Create the file reader.
44 | fileReader = new BufferedReader(new FileReader(fileToParse));
45 |
46 | // Read the file line by line
47 | boolean firstLine = true;
48 | while ((line = fileReader.readLine()) != null) {
49 | // Get all tokens available in line.
50 | String[] tokens = line.split(delimiter);
51 | Long curTimestamp = null;
52 |
53 | // Check for the case where there is more than one line preceding the data
54 | if (firstLine == true) {
55 | if (!isNumeric(tokens[0]) && tokens[0].equals("timestamp") == false) {
56 | continue;
57 | }
58 | }
59 | if (firstLine == false && tokens.length > 1) {
60 | curTimestamp = (new Double(tokens[0])).longValue();
61 | }
62 | for (int i = 1; i < tokens.length; i++) {
63 | // Assume that the first line contains the column names.
64 | if (firstLine) {
65 | TimeSeries ts = new TimeSeries();
66 | ts.meta.fileName = csv_file;
67 | output.add(ts);
68 | if (isNumeric(tokens[i]) == false) { // Just in case there's a numeric column heading
69 | ts.meta.name = tokens[i];
70 | } else {
71 | ts.meta.name = "metric_" + i;
72 | output.get(i - 1).append((new Double(tokens[0])).longValue(),
73 | new Float(tokens[i]));
74 | }
75 | } else {
76 | // A naive missing data handler.
77 | if (interval != null && prev != null && interval > 0 && fillMissing == true) {
78 | if ((curTimestamp - prev) != interval) {
79 | int missingValues = (int) ((curTimestamp - prev) / interval);
80 |
81 | Long curTimestampToFill = prev + interval;
82 | for (int j = (missingValues - 1); j > 0; j--) {
83 | Float valToFill = new Float(tokens[i]);
84 | if (output.get(i - 1).size() >= missingValues) {
85 | valToFill = output.get(i - 1).data.get(output.get(i - 1).size() - missingValues).value;
86 | }
87 | output.get(i - 1).append(curTimestampToFill, valToFill);
88 | curTimestampToFill += interval;
89 | }
90 | }
91 | }
92 | // Infer interval.
93 | if (interval == null && prev != null) {
94 | interval = curTimestamp - new Long(prev);
95 | }
96 |
97 | output.get(i - 1).append(curTimestamp,
98 | new Float(tokens[i]));
99 | }
100 | }
101 | if (firstLine == false) {
102 | prev = curTimestamp;
103 | }
104 | firstLine = false;
105 | }
106 | } catch (Exception e) {
107 | e.printStackTrace();
108 | } finally {
109 | try {
110 | fileReader.close();
111 | } catch (IOException e) {
112 | e.printStackTrace();
113 | }
114 | }
115 | // Handle aggregation.
116 | if (aggr > 1) {
117 | for (TimeSeries t : output) {
118 | t.data = t.aggregate(aggr);
119 | t.meta.name += "_aggr_" + aggr;
120 | }
121 | }
122 | return output;
123 | }
124 |
125 | // Checks if the string is numeric.
126 | public static boolean isNumeric(String str) {
127 | try {
128 | Double.parseDouble(str);
129 | } catch (NumberFormatException nfe) {
130 | return false;
131 | }
132 | return true;
133 | }
134 |
135 | // Parses the string array property into an integer property.
136 | public static int[] splitInts(String str) throws IllegalArgumentException {
137 | StringTokenizer tokenizer = new StringTokenizer(str, ",");
138 | int n = tokenizer.countTokens();
139 | int[] list = new int[n];
140 | for (int i = 0; i < n; i++) {
141 | String token = tokenizer.nextToken();
142 | list[i] = Integer.parseInt(token);
143 | }
144 | return list;
145 | }
146 |
147 | // Initializes properties from a string (key:value, separated by ";").
148 | public static void initProperties(String config, Properties p) {
149 | String delims1 = ";";
150 | String delims2 = ":";
151 |
152 | StringTokenizer st1 = new StringTokenizer(config, delims1);
153 | while (st1.hasMoreElements()) {
154 | String[] st2 = (st1.nextToken()).split(delims2);
155 | p.setProperty(st2[0], st2[1]);
156 | }
157 | }
158 | }
159 |
--------------------------------------------------------------------------------
/src/main/java/com/yahoo/egads/models/adm/DBScanModel.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2015, Yahoo Inc.
3 | * Copyrights licensed under the GPL License.
4 | * See the accompanying LICENSE file for terms.
5 | */
6 |
7 | // A simple thresholding model that returns an anomaly if it is above/below a certain threashold.
8 |
9 | package com.yahoo.egads.models.adm;
10 |
11 | import java.util.Properties;
12 | import java.util.Map;
13 | import java.util.HashMap;
14 | import java.util.List;
15 | import java.util.ArrayList;
16 |
17 | import com.yahoo.egads.data.Anomaly.IntervalSequence;
18 | import com.yahoo.egads.data.Anomaly.Interval;
19 | import com.yahoo.egads.data.AnomalyErrorStorage;
20 | import com.yahoo.egads.data.TimeSeries.DataSequence;
21 | import com.yahoo.egads.utilities.DBSCANClusterer;
22 |
23 | import org.apache.commons.math3.ml.clustering.Cluster;
24 |
25 | import com.yahoo.egads.utilities.IdentifiedDoublePoint;
26 |
27 | import org.apache.commons.math3.ml.distance.EuclideanDistance;
28 | import org.json.JSONObject;
29 | import org.json.JSONStringer;
30 |
31 | public class DBScanModel extends AnomalyDetectionAbstractModel {
32 |
33 | // The constructor takes a set of properties
34 | // needed for the simple model. This includes the sensitivity.
35 | private Map threshold;
36 | private int maxHrsAgo;
37 | private long windowStart;
38 | // modelName.
39 | public String modelName = "DBScanModel";
40 | public AnomalyErrorStorage aes = new AnomalyErrorStorage();
41 | private DBSCANClusterer dbscan = null;
42 | private int minPoints = 2;
43 | private double eps = 500;
44 |
45 | public DBScanModel(Properties config) {
46 | super(config);
47 |
48 | if (config.getProperty("MAX_ANOMALY_TIME_AGO") == null) {
49 | throw new IllegalArgumentException("MAX_ANOMALY_TIME_AGO is NULL");
50 | }
51 | this.maxHrsAgo = new Integer(config.getProperty("MAX_ANOMALY_TIME_AGO"));
52 |
53 | this.windowStart = new Long(config.getProperty("DETECTION_WINDOW_START_TIME"));
54 |
55 | this.threshold = parseMap(config.getProperty("THRESHOLD"));
56 |
57 | if (config.getProperty("THRESHOLD") != null && this.threshold.isEmpty() == true) {
58 | throw new IllegalArgumentException("THRESHOLD PARSE ERROR");
59 | }
60 | }
61 |
62 | public void toJson(JSONStringer json_out) {
63 |
64 | }
65 |
66 | public void fromJson(JSONObject json_obj) {
67 |
68 | }
69 |
70 | public String getModelName() {
71 | return modelName;
72 | }
73 |
74 | @Override
75 | public String getType() {
76 | return "point_outlier";
77 | }
78 |
79 | @Override
80 | public void reset() {
81 | // At this point, reset does nothing.
82 | }
83 |
84 | @Override
85 | public void tune(DataSequence observedSeries,
86 | DataSequence expectedSeries) throws Exception {
87 | // Compute the time-series of errors.
88 | HashMap> allErrors = aes.initAnomalyErrors(observedSeries, expectedSeries);
89 | List points = new ArrayList();
90 | EuclideanDistance ed = new EuclideanDistance();
91 | int n = observedSeries.size();
92 |
93 | for (int i = 0; i < n; i++) {
94 | double[] d = new double[(aes.getIndexToError().keySet()).size()];
95 |
96 | for (int e = 0; e < (aes.getIndexToError().keySet()).size(); e++) {
97 | d[e] = allErrors.get(aes.getIndexToError().get(e)).get(i);
98 | }
99 | points.add(new IdentifiedDoublePoint(d, i));
100 | }
101 |
102 | double sum = 0.0;
103 | double count = 0.0;
104 | for (int i = 0; i < n; i++) {
105 | for (int j = 0; j < n; j++) {
106 | sum += ed.compute(points.get(i).getPoint(), points.get(j).getPoint());
107 | count++;
108 | }
109 | }
110 | eps = ((double) this.sDAutoSensitivity) * (sum / count);
111 | minPoints = ((int) Math.ceil(((double) this.amntAutoSensitivity) * ((double) n)));
112 | dbscan = new DBSCANClusterer(eps, minPoints);
113 | }
114 |
115 | @Override
116 | public IntervalSequence detect(DataSequence observedSeries,
117 | DataSequence expectedSeries) throws Exception {
118 |
119 | IntervalSequence output = new IntervalSequence();
120 | int n = observedSeries.size();
121 | // Get an array of thresholds.
122 | Float[] thresholdErrors = new Float[aes.getErrorToIndex().size()];
123 | for (Map.Entry entry : this.threshold.entrySet()) {
124 | thresholdErrors[aes.getErrorToIndex().get(entry.getKey())] = Math.abs(entry.getValue());
125 | }
126 |
127 | // Compute the time-series of errors.
128 | HashMap> allErrors = aes.initAnomalyErrors(observedSeries, expectedSeries);
129 | List points = new ArrayList();
130 |
131 | for (int i = 0; i < n; i++) {
132 | double[] d = new double[(aes.getIndexToError().keySet()).size()];
133 |
134 | for (int e = 0; e < (aes.getIndexToError().keySet()).size(); e++) {
135 | d[e] = allErrors.get(aes.getIndexToError().get(e)).get(i);
136 | }
137 | points.add(new IdentifiedDoublePoint(d, i));
138 | }
139 |
140 | List> cluster = dbscan.cluster(points);
141 | for(Cluster c: cluster) {
142 | for (IdentifiedDoublePoint p : c.getPoints()) {
143 | int i = p.getId();
144 | Float[] errors = aes.computeErrorMetrics(expectedSeries.get(p.getId()).value, observedSeries.get(p.getId()).value);
145 | logger.debug("TS:" + observedSeries.get(i).time + ",E:" + arrayF2S(errors) + ",TE:" + arrayF2S(thresholdErrors) + ",OV:" + observedSeries.get(i).value + ",EV:" + expectedSeries.get(i).value);
146 | if (observedSeries.get(p.getId()).value != expectedSeries.get(p.getId()).value &&
147 | (isDetectionWindowPoint(maxHrsAgo, windowStart, observedSeries.get(p.getId()).time, observedSeries.get(0).time) ||
148 | (maxHrsAgo == 0 && p.getId() == (n - 1)))) {
149 | output.add(new Interval(observedSeries.get(p.getId()).time,
150 | p.getId(),
151 | errors,
152 | thresholdErrors,
153 | observedSeries.get(p.getId()).value,
154 | expectedSeries.get(p.getId()).value));
155 | }
156 | }
157 | }
158 |
159 | return output;
160 | }
161 | }
162 |
--------------------------------------------------------------------------------
/src/main/java/com/yahoo/egads/utilities/AutoSensitivity.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2015, Yahoo Inc.
3 | * Copyrights licensed under the GPL License.
4 | * See the accompanying LICENSE file for terms.
5 | */
6 |
7 | // Provides the auto-sensitivity solution for EGADS
8 | // using the bootstrapping framework.
9 |
10 | package com.yahoo.egads.utilities;
11 | import java.util.ArrayList;
12 | import org.apache.commons.lang.ArrayUtils;
13 | import org.slf4j.Logger;
14 | import org.slf4j.LoggerFactory;
15 |
16 | import java.util.Arrays;
17 | import java.util.Collections;
18 |
19 | public class AutoSensitivity {
20 | private static final Logger logger = LoggerFactory.getLogger(AutoSensitivity.class);
21 |
22 | // Computes sensitivity based on the density distribution.
23 | // Assumes that anomalies constitute at most 5% of the data.
24 | public static Float getLowDensitySensitivity(Float[] data, float sDAutoSensitivy, float amntAutoSensitivity) {
25 | Float toReturn = Float.POSITIVE_INFINITY;
26 | Arrays.sort(data, Collections.reverseOrder());
27 | while (data.length > 0) {
28 |
29 | ArrayList fData = new ArrayList();
30 | fData.add(data[0]);
31 | data = ((Float[]) ArrayUtils.remove(data, 0));
32 |
33 | Float centroid = (float) fData.get(0);
34 | Float maxDelta = (float) sDAutoSensitivy * StatsUtils.getSD(data, StatsUtils.getMean(data));
35 |
36 | logger.debug("AutoSensitivity: Adding: " + fData.get(0) + " SD: " + maxDelta);
37 |
38 | // Add points while it's in the same cluster or not part of the other cluster.
39 | String localDebug = null;
40 | while (data.length > 0 &&
41 | (centroid - data[0]) <= ((float) (maxDelta))) {
42 | float maxDeltaInit = maxDelta;
43 | fData.add(data[0]);
44 | data = ((Float[]) ArrayUtils.remove(data, 0));
45 | Float[] tmp = new Float[fData.size()];
46 | tmp = fData.toArray(tmp);
47 | centroid = StatsUtils.getMean(tmp);
48 |
49 | if (data.length > 0) {
50 | Float sdOtherCluster = (float) StatsUtils.getSD(data, StatsUtils.getMean(data));
51 | maxDelta = sDAutoSensitivy * sdOtherCluster;
52 | logger.debug("AutoSensitivity: Adding: " + data[0] + " SD: " + maxDeltaInit + " SD': " + maxDelta);
53 | }
54 | }
55 | if (data.length > 0) {
56 | logger.debug("AutoSensitivity: Next Point I would have added is " + data[0]);
57 | }
58 |
59 | if (((double) fData.size() / (double) data.length) > amntAutoSensitivity) {
60 | // Cannot do anomaly detection.
61 | logger.debug("AutoSensitivity: Returning " + toReturn + " data size: " + data.length + " fData.size: " + fData.size());
62 | return toReturn;
63 | }
64 |
65 | toReturn = fData.get(fData.size() - 1);
66 | logger.debug("AutoSensitivity: Updating toReturn: " + toReturn + " SD: " + maxDelta);
67 | return toReturn;
68 | }
69 | return toReturn;
70 | }
71 |
72 | // Uses the simple KSigma rule to get the anoamly sensitivity.
73 | // Assumes that we have a normal distribution.
74 | public static Float getKSigmaSensitivity(Float[] data, float sDAutoSensitivity) {
75 | Float mean = StatsUtils.getMean(data);
76 | Float sd = StatsUtils.getSD(data, mean);
77 | return (mean + (sd * sDAutoSensitivity));
78 | }
79 |
80 | // Uses the mean as the base to find the static threshold.
81 | public static Float[] getAdaptiveKSigmaSensitivity(Float[] data, float amntAutoSens) {
82 | Float mean = StatsUtils.getMean(data);
83 | Float sd = StatsUtils.getSD(data, mean);
84 | if (sd == (float) 0.0) {
85 | sd = (float) 1.0;
86 | }
87 | Float[] ret = null;
88 | float k = (float) 1;
89 | float incr = (float) 1;
90 |
91 | Float max = null;
92 | Float min = null;
93 | float thresh = mean + Math.abs(sd * k);
94 | int howMany = howManyGreater(data, thresh);
95 |
96 | while (((float) howMany / (float) data.length) > amntAutoSens) {
97 | k += incr;
98 | thresh = mean + Math.abs(sd * k);
99 | howMany = howManyGreater(data, thresh);
100 | }
101 | if (((float) howMany / (float) data.length) <= amntAutoSens) {
102 | max = thresh;
103 | }
104 | k = 1;
105 | thresh = mean - Math.abs(sd * k);
106 | howMany = howManyLess(data, thresh);
107 |
108 | while (((float) howMany / (float) data.length) > amntAutoSens) {
109 | k += incr;
110 | thresh = mean - Math.abs(sd * k);
111 | howMany = howManyLess(data, thresh);
112 | }
113 | if (((float) howMany / (float) data.length) <= amntAutoSens) {
114 | min = thresh;
115 | }
116 | ret = new Float[]{max, min};
117 | return ret;
118 | }
119 |
120 | // Uses the max/min as the base to find the static threshold.
121 | public static Float[] getAdaptiveMaxMinSigmaSensitivity(Float[] data, float amntAutoSens, float k) {
122 | Arrays.sort(data);
123 | Float mean = StatsUtils.getMean(data);
124 | Float sd = StatsUtils.getSD(data, mean);
125 | if (sd == (float) 0.0) {
126 | sd = (float) 1.0;
127 | }
128 | Float[] ret = null;
129 |
130 | Float max = null;
131 | Float min = null;
132 | int i = 0;
133 | float thresh = data[i] + Math.abs(sd * k);
134 | int howMany = howManyLess(data, thresh);
135 | while (((float) howMany / (float) data.length) <= amntAutoSens) {
136 | min = thresh;
137 | i++;
138 | thresh = data[i] + Math.abs(sd * k);
139 | howMany = howManyLess(data, thresh);
140 | }
141 | i = data.length - 1;
142 | thresh = data[i] - Math.abs(sd * k);
143 | howMany = howManyGreater(data, thresh);
144 | while (((float) howMany / (float) data.length) <= amntAutoSens) {
145 | max = thresh;
146 | i--;
147 | thresh = data[i] - Math.abs(sd * k);
148 | howMany = howManyGreater(data, thresh);
149 | }
150 |
151 | ret = new Float[]{max, min};
152 | return ret;
153 | }
154 |
155 | private static int howManyGreater(Float[] data, Float value) {
156 | int numgreater = 0;
157 | for (Float f : data) {
158 | if (value <= f) {
159 | numgreater++;
160 | }
161 | }
162 | return numgreater;
163 | }
164 |
165 | private static int howManyLess(Float[] data, Float value) {
166 | int numless = 0;
167 | for (Float f : data) {
168 | if (value >= f) {
169 | numless++;
170 | }
171 | }
172 | return numless;
173 | }
174 | }
175 |
--------------------------------------------------------------------------------
/src/main/java/com/yahoo/egads/models/adm/NaiveModel.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2015, Yahoo Inc.
3 | * Copyrights licensed under the GPL License.
4 | * See the accompanying LICENSE file for terms.
5 | */
6 |
7 | // A simple model that does not require a forecasting model.
8 | // It looks weather or not the max value for the past N hours exceeds
9 | // X %.
10 |
11 | package com.yahoo.egads.models.adm;
12 |
13 | import java.util.Properties;
14 |
15 | import com.yahoo.egads.data.Anomaly.IntervalSequence;
16 | import com.yahoo.egads.data.Anomaly.Interval;
17 | import com.yahoo.egads.data.TimeSeries.DataSequence;
18 | import java.util.Map;
19 | import java.util.ArrayList;
20 | import java.util.HashMap;
21 | import com.yahoo.egads.data.AnomalyErrorStorage;
22 | import com.yahoo.egads.utilities.AutoSensitivity;
23 |
24 | import org.json.JSONObject;
25 | import org.json.JSONStringer;
26 |
27 | public class NaiveModel extends AnomalyDetectionAbstractModel {
28 |
29 | // The constructor takes a set of properties
30 | // needed for the simple model. This includes the sensitivity.
31 | private Map threshold;
32 | private int maxHrsAgo;
33 | private long windowStart;
34 | private Float window_size;
35 | // modelName.
36 | public static String modelName = "NaiveModel";
37 | public AnomalyErrorStorage aes = new AnomalyErrorStorage();
38 |
39 | public NaiveModel(Properties config) {
40 | super(config);
41 |
42 | if (config.getProperty("MAX_ANOMALY_TIME_AGO") == null) {
43 | throw new IllegalArgumentException("MAX_ANOMALY_TIME_AGO is NULL");
44 | }
45 | this.maxHrsAgo = new Integer(config.getProperty("MAX_ANOMALY_TIME_AGO"));
46 | this.windowStart = new Long(config.getProperty("DETECTION_WINDOW_START_TIME"));
47 | if (config.getProperty("WINDOW_SIZE") == null) {
48 | throw new IllegalArgumentException("WINDOW_SIZE is NULL");
49 | }
50 | this.window_size = new Float(config.getProperty("WINDOW_SIZE"));
51 | if (config.getProperty("THRESHOLD") == null) {
52 | throw new IllegalArgumentException("THRESHOLD is NULL");
53 | }
54 | this.threshold = parseMap(config.getProperty("THRESHOLD"));
55 | if (config.getProperty("THRESHOLD") != null && this.threshold.isEmpty() == true) {
56 | throw new IllegalArgumentException("THRESHOLD PARSE ERROR");
57 | }
58 | }
59 |
60 | public void toJson(JSONStringer json_out) {
61 |
62 | }
63 |
64 | public void fromJson(JSONObject json_obj) {
65 |
66 | }
67 |
68 | public String getModelName() {
69 | return modelName;
70 | }
71 |
72 | @Override
73 | public String getType() {
74 | return "point_outlier";
75 | }
76 |
77 | @Override
78 | public void reset() {
79 | // At this point, reset does nothing.
80 | }
81 |
82 | @Override
83 | public void tune(DataSequence observedSeries, DataSequence expectedSeries) throws Exception {
84 | // TODO: auto detect thresholds.
85 | }
86 | // Returns true this point is identified as a potential anomaly.
87 | public boolean isAnomaly(Float[] errors, Map threshold) {
88 | // Cycle through all available thresholds and return
89 | // true if any of them matches.
90 | for (Map.Entry entry : threshold.entrySet()) {
91 | // disable mapee and mape.
92 | if (aes.getErrorToIndex().containsKey(entry.getKey()) == true &&
93 | Math.abs(errors[aes.getErrorToIndex().get(entry.getKey())]) >= Math.abs(entry.getValue())) {
94 | return true;
95 | }
96 | }
97 | return false;
98 | }
99 |
100 | @Override
101 | public IntervalSequence detect(DataSequence observedSeries,
102 | DataSequence expectedSeries) throws Exception {
103 |
104 | // Get an array of thresholds.
105 | Float[] thresholdErrors = new Float[aes.getErrorToIndex().size()];
106 | for (Map.Entry entry : this.threshold.entrySet()) {
107 | thresholdErrors[aes.getErrorToIndex().get(entry.getKey())] = Math.abs(entry.getValue());
108 | }
109 |
110 | IntervalSequence output = new IntervalSequence();
111 | int n = observedSeries.size();
112 | Integer cutIndex = null;
113 |
114 | // Handle fractional windows which are interpreted as
115 | // % of the entire TimeSeries size.
116 | if (window_size < 1.0) {
117 | cutIndex = Math.round(window_size * ((float) n));
118 | } else {
119 | cutIndex = Math.round(window_size);
120 | }
121 |
122 | if (cutIndex + 1 > n) {
123 | return output;
124 | }
125 |
126 | Float[] observed = new Float[] {observedSeries.get(0).value, observedSeries.get(0).value};
127 | Float[] expected = new Float[] {expectedSeries.get(0).value, expectedSeries.get(0).value};
128 |
129 | int maxIndex = 0;
130 | int minIndex = 0;
131 |
132 | int anomaly = 0;
133 |
134 | for (int k = 0; k < n; k++) {
135 |
136 | if (observed[0] < observedSeries.get(k).value) {
137 | observed[0] = observedSeries.get(k).value;
138 | maxIndex = k;
139 | anomaly = 1;
140 | }
141 |
142 | if (observed[1] > observedSeries.get(k).value) {
143 | observed[1] = observedSeries.get(k).value;
144 | minIndex = k;
145 | anomaly = 1;
146 | }
147 |
148 | if (k < cutIndex) {
149 | continue;
150 | }
151 |
152 | expected[0] = Math.max(expected[0], observedSeries.get(k - cutIndex).value);
153 | expected[1] = Math.min(expected[1], observedSeries.get(k - cutIndex).value);
154 |
155 | // Check for anomalies for min/max.
156 | int anomalyIndex = 0;
157 | for (int i = 0; i < 2; i++) {
158 | Float[] errors = aes.computeErrorMetrics(expected[i], observed[i]);
159 | boolean actualAnomaly = false;
160 | if (i == 0 && observed[i] > expected[i]) {
161 | actualAnomaly = true;
162 | anomalyIndex = maxIndex;
163 | }
164 | if (i == 1 && observed[i] < expected[i]) {
165 | actualAnomaly = true;
166 | anomalyIndex = minIndex;
167 | }
168 |
169 | if (isAnomaly(errors, threshold) == true && actualAnomaly == true && anomaly == 1 &&
170 | (isDetectionWindowPoint(maxHrsAgo, windowStart, observedSeries.get(anomalyIndex).time, observedSeries.get(0).time) ||
171 | (maxHrsAgo == 0 && i == (n - 1)))) {
172 | anomaly = 0;
173 | logger.debug("TS:" + observedSeries.get(anomalyIndex).time + ",E:" + arrayF2S(errors) + ",TH:" + arrayF2S(thresholdErrors) + ",OV:" + observedSeries.get(anomalyIndex).value + ",EV:" + expected[i]);
174 | output.add(new Interval(observedSeries.get(anomalyIndex).time,
175 | anomalyIndex,
176 | errors,
177 | thresholdErrors,
178 | observed[i],
179 | expected[i],
180 | isAnomaly(errors, threshold)));
181 | }
182 | }
183 | }
184 | return output;
185 | }
186 | }
187 |
--------------------------------------------------------------------------------
/src/main/java/com/yahoo/egads/control/AnomalyDetector.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2015, Yahoo Inc.
3 | * Copyrights licensed under the GPL License.
4 | * See the accompanying LICENSE file for terms.
5 | */
6 |
7 | /*
8 | * Description: AnomalyDetector applies a set of anomaly detection models (AD algorithms) on a given metric.
9 | * AnomalyDetector provides concrete mechanisms to apply one or more abstract anomaly detection algorithms on
10 | * a time-series at the execution time; in other words, application of a certain anomaly detection algorithm on
11 | * a given time series should be carried out via an AnomalyDetector object.
12 | * The direct application of models on time series is discouraged in EGADS unless for test purposes.
13 | *
14 | * Inputs:
15 | * 1. The 'metric' time series
16 | * - Either an explicit TimeSeries object
17 | * - or the String name of the time series which would require the AnomalyDetector to connect to ModelDB to load
18 | * the appropriate anomaly detection models into the memory (under construction)
19 | *
20 | * 2. The model(s)
21 | * - Either an explicit AnomalyDetectionModel object via addModel()
22 | * - or implicitly loaded from ModelDB when the metric name is provided (under construction)
23 | *
24 | * Features:
25 | * 1. Resetting all the added anomaly detection models via reset()
26 | * 2. Tuning all the added anomaly detection models on the 'metric' via tune()
27 | * 3. Performing anomaly detection on the metric according to all the added anomaly detection models via detect()
28 | *
29 | * Details:
30 | * 1. The time units for interfacing with an AnomalyDetector object is the standard UNIX timestamp; however, AnomalyDetector
31 | * automatically performs logical indexing conversion for the abstract algorithms so that the actual models can
32 | * conveniently work with the logical index instead of UNIX timestamps. The conversion is:
33 | *
34 | * logical_index = (UNIX_timestamp - firstTimeStamp) div period
35 | * UNIX_timestamp = logical_index * period + firstTimeStamp
36 | **/
37 |
38 | package com.yahoo.egads.control;
39 |
40 | import java.util.ArrayList;
41 |
42 | import com.yahoo.egads.data.Anomaly;
43 | import com.yahoo.egads.data.Anomaly.IntervalSequence;
44 | import com.yahoo.egads.data.TimeSeries;
45 | import com.yahoo.egads.models.adm.AnomalyDetectionModel;
46 |
47 | public class AnomalyDetector {
48 |
49 | protected TimeSeries metric = null;
50 | protected ArrayList models = new ArrayList();
51 | protected ArrayList isTuned = new ArrayList();
52 | protected long firstTimeStamp = 0;
53 | protected long period;
54 |
55 | // Construction ////////////////////////////////////////////////////////////////////////////////
56 |
57 | public AnomalyDetector(TimeSeries theMetric, long period,
58 | long firstTimeStamp) throws Exception {
59 | if (theMetric == null) {
60 | throw new Exception("The input metric is null.");
61 | }
62 |
63 | metric = theMetric;
64 | this.period = period;
65 | this.firstTimeStamp = firstTimeStamp;
66 | }
67 |
68 | public AnomalyDetector(TimeSeries theMetric, long period) throws Exception {
69 | if (theMetric == null) {
70 | throw new Exception("The input metric is null.");
71 | }
72 |
73 | metric = theMetric;
74 | this.period = period;
75 |
76 | if (metric.data.size() > 0) {
77 | this.firstTimeStamp = metric.time(0);
78 | }
79 | }
80 |
81 | public AnomalyDetector(String theMetric, long period) throws Exception {
82 | this.period = period;
83 | // TODO:
84 | // 1 - load the models related to theMetric from ModelDB
85 | // 2 - push the loaded models into 'models'
86 | // 3 - create a new TimeSeries for theMetric and set 'metric'
87 | // 4 - set 'firstTimeStamp'
88 |
89 | int modelNum = models.size();
90 | for (int i = 0; i < modelNum; ++i) {
91 | isTuned.set(i, true);
92 | }
93 | }
94 |
95 | // Configuration Methods ////////////////////////////////////////////////////////////////
96 |
97 | public void setMetric(TimeSeries theMetric, long period) {
98 | metric = theMetric;
99 | this.period = period;
100 |
101 | if (metric.data.size() > 0) {
102 | this.firstTimeStamp = metric.time(0);
103 | }
104 |
105 | reset();
106 | }
107 |
108 | public void setMetric(TimeSeries theMetric, long period, long firstTimeStamp) {
109 | metric = theMetric;
110 | this.period = period;
111 | this.firstTimeStamp = firstTimeStamp;
112 | reset();
113 | }
114 |
115 | public void setMetric(String theMetric, long period) {
116 | this.period = period;
117 | firstTimeStamp = 0;
118 | models.clear();
119 | isTuned.clear();
120 |
121 | // TODO:
122 | // 1 - load the models related to theMetric from ModelDB
123 | // 2 - push the loaded models into 'models'
124 | // 3 - create a new TimeSeries for theMetric and set 'metric'
125 | // 4 - set 'firstTimeStamp'
126 |
127 | int modelNum = models.size();
128 | for (int i = 0; i < modelNum; ++i) {
129 | isTuned.set(i, true);
130 | }
131 | }
132 |
133 | public void addModel(AnomalyDetectionModel model) {
134 | model.reset();
135 | models.add(model);
136 | isTuned.add(false);
137 | }
138 |
139 | // Algorithmic Methods ////////////////////////////////////////////////////////////////////
140 |
141 | public void reset() {
142 | int i = 0;
143 | for (AnomalyDetectionModel model : models) {
144 | model.reset();
145 | isTuned.set(i, false);
146 | i++;
147 | }
148 | }
149 |
150 | public void tune(TimeSeries.DataSequence expectedValues) throws Exception {
151 | int i = 0;
152 |
153 | metric.data.setLogicalIndices(firstTimeStamp, period);
154 |
155 | for (AnomalyDetectionModel model : models) {
156 | if (!isTuned.get(i)) {
157 | model.tune(metric.data, expectedValues);
158 | isTuned.set(i, true);
159 | }
160 | i++;
161 | }
162 | }
163 |
164 | public ArrayList detect(TimeSeries observedSeries,
165 | TimeSeries.DataSequence expectedSeries) throws Exception {
166 | for (Boolean b : isTuned) {
167 | if (!b) {
168 | throw new Exception(
169 | "All the models need to be tuned before detection.");
170 | }
171 | }
172 |
173 | ArrayList result = new ArrayList();
174 | observedSeries.data.setLogicalIndices(firstTimeStamp, period);
175 | expectedSeries.setLogicalIndices(firstTimeStamp, period);
176 |
177 | for (AnomalyDetectionModel model : models) {
178 | Anomaly anomaly = new Anomaly(observedSeries.meta.name,
179 | observedSeries.meta);
180 | anomaly.modelName = model.getModelName();
181 | anomaly.type = model.getType();
182 | anomaly.intervals = model.detect(observedSeries.data,
183 | expectedSeries);
184 | anomaly.intervals.setLogicalIndices(firstTimeStamp, period);
185 | anomaly.intervals.setTimeStamps(firstTimeStamp, period);
186 | result.add(anomaly);
187 | }
188 |
189 | return result;
190 | }
191 | }
192 |
--------------------------------------------------------------------------------
/src/main/java/com/yahoo/egads/control/ModelAdapter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2015, Yahoo Inc.
3 | * Copyrights licensed under the GPL License.
4 | * See the accompanying LICENSE file for terms.
5 | */
6 |
7 | /*
8 | * Description: ModelAdapter applies a set of time-series models (algorithms) on a given metric. ModelAdapter provides
9 | * concrete mechanisms to apply one or more abstract algorithms on a time-series at the execution time; in other words,
10 | * application of a certain algorithm (model) on a given time series should be carried out via a ModelAdapter object.
11 | * The direct application of models on time series is discouraged in EGADS unless for test purposes.
12 | *
13 | * Inputs: 1. The 'metric' time series - Either an explicit TimeSeries object - or the String name of the time series
14 | * which would require the ModelAdapter to connect to ModelDB to load the appropriate models into the memory (under
15 | * construction)
16 | *
17 | * 2. The model(s) - Either an explicit TimeSeriesModel object via addModel() - or implicitly loaded from ModelDB when
18 | * the metric name is provided (under construction)
19 | *
20 | * Features: 1. Resetting all the added models via reset() 2. Training all the added models on the 'metric' via train()
21 | * 3. Updating all the added models for a new time series sequence via update() 4. Forecasting the value of the time
22 | * series according to all the added models for a given time period via forecast()
23 | *
24 | * Details: 1. The time units for interfacing with a ModelAdapter object is the standard UNIX timestamp; however,
25 | * ModelAdapter automatically performs logical indexing conversion for the abstract algorithms so that the actual models
26 | * can conveniently work with the logical index instead of UNIX timestamps. The conversion is:
27 | *
28 | * logical_index = (UNIX_timestamp - firstTimeStamp) div period UNIX_timestamp = logical_index * period + firstTimeStamp
29 | */
30 |
31 | package com.yahoo.egads.control;
32 |
33 | import java.util.ArrayList;
34 |
35 | import com.yahoo.egads.data.TimeSeries;
36 | import com.yahoo.egads.models.tsmm.TimeSeriesModel;
37 |
38 | public class ModelAdapter { // Encapsulates a metric and the models operating on it
39 |
40 | protected TimeSeries metric = null;
41 | protected ArrayList models = new ArrayList();
42 | protected ArrayList isTrained = new ArrayList();
43 | protected long firstTimeStamp = 0;
44 | protected long period;
45 |
46 | // Construction ///////////////////////////////////////////////////////////
47 |
48 | public ModelAdapter(TimeSeries theMetric, long period, long firstTimeStamp) throws Exception {
49 | if (theMetric == null) {
50 | throw new Exception("The input metric is null.");
51 | }
52 |
53 | metric = theMetric;
54 | this.period = period;
55 | this.firstTimeStamp = firstTimeStamp;
56 | }
57 |
58 | public ModelAdapter(TimeSeries theMetric, long period) throws Exception {
59 | if (theMetric == null) {
60 | throw new Exception("The input metric is null.");
61 | }
62 |
63 | metric = theMetric;
64 | this.period = period;
65 |
66 | if (metric.data.size() > 0) {
67 | this.firstTimeStamp = metric.time(0);
68 | }
69 | }
70 |
71 | public ModelAdapter(String theMetric, long period) throws Exception {
72 | this.period = period;
73 | // TODO:
74 | // 1 - load the models related to theMetric from ModelDB
75 | // 2 - push the loaded models into 'models'
76 | // 3 - create a new TimeSeries for theMetric and set 'metric'
77 | // 4 - set 'firstTimeStamp'
78 |
79 | int modelNum = models.size();
80 | for (int i = 0; i < modelNum; ++i) {
81 | isTrained.set(i, true);
82 | }
83 | }
84 |
85 | // Configuration Methods ////////////////////////////////////////////////////////////////
86 |
87 | public void setMetric(TimeSeries theMetric, long period) {
88 | metric = theMetric;
89 | this.period = period;
90 |
91 | if (metric.data.size() > 0) {
92 | this.firstTimeStamp = metric.time(0);
93 | }
94 |
95 | reset();
96 | }
97 |
98 | public void setMetric(TimeSeries theMetric, long period, long firstTimeStamp) {
99 | metric = theMetric;
100 | this.period = period;
101 | this.firstTimeStamp = firstTimeStamp;
102 | reset();
103 | }
104 |
105 | public void setMetric(String theMetric, long period) {
106 | this.period = period;
107 | firstTimeStamp = 0;
108 | models.clear();
109 | isTrained.clear();
110 |
111 | // TODO:
112 | // 1 - load the models related to theMetric from ModelDB
113 | // 2 - push the loaded models into 'models'
114 | // 3 - create a new TimeSeries for theMetric and set 'metric'
115 | // 4 - set 'firstTimeStamp'
116 |
117 | int modelNum = models.size();
118 | for (int i = 0; i < modelNum; ++i) {
119 | isTrained.set(i, true);
120 | }
121 | }
122 |
123 | public void addModel(TimeSeriesModel model) {
124 | model.reset();
125 | models.add(model);
126 | isTrained.add(false);
127 | }
128 |
129 | public String[] getModelNames() {
130 | String[] names = new String[models.size()];
131 | for (int i = 0; i < models.size(); ++i) {
132 | names[i] = models.get(i).getModelName();
133 | }
134 |
135 | return names;
136 | }
137 |
138 | // Algorithmic Methods ////////////////////////////////////////////////////////////////////
139 |
140 | public void reset() {
141 | int i = 0;
142 | for (TimeSeriesModel model : models) {
143 | model.reset();
144 | isTrained.set(i, false);
145 | i++;
146 | }
147 | }
148 |
149 | public void train() throws Exception {
150 | int i = 0;
151 |
152 | metric.data.setLogicalIndices(firstTimeStamp, period);
153 |
154 | for (TimeSeriesModel model : models) {
155 | if (!isTrained.get(i)) {
156 | model.train(metric.data);
157 | isTrained.set(i, true);
158 | }
159 | i++;
160 | }
161 | }
162 |
163 | public void update(TimeSeries.DataSequence newData) throws Exception {
164 | if (newData == null) {
165 | throw new Exception("The input data sequence is null.");
166 | }
167 |
168 | for (Boolean b : isTrained) {
169 | if (!b) {
170 | throw new Exception("All the models need to be trained before updating.");
171 | }
172 | }
173 |
174 | if (newData.size() > 0) {
175 | newData.setLogicalIndices(firstTimeStamp, period);
176 |
177 | for (TimeSeriesModel model : models) {
178 | model.update(newData);
179 | }
180 | }
181 | }
182 |
183 | public ArrayList forecast(long from, long to) throws Exception {
184 | for (Boolean b : isTrained) {
185 | if (!b) {
186 | throw new Exception("All the models need to be trained before forecasting.");
187 | }
188 | }
189 |
190 | ArrayList result = new ArrayList();
191 |
192 | for (TimeSeriesModel model : models) {
193 | TimeSeries.DataSequence sequence = null;
194 | if (period != -1) {
195 | sequence = new TimeSeries.DataSequence(from, to, period);
196 | sequence.setLogicalIndices(firstTimeStamp, period);
197 | } else {
198 | sequence = new TimeSeries.DataSequence(metric.data.getTimes(), metric.data.getValues());
199 | }
200 |
201 | model.predict(sequence);
202 | result.add(sequence);
203 | }
204 | return result;
205 | }
206 | }
207 |
--------------------------------------------------------------------------------