├── .gitignore
├── README.md
├── model-builder
├── iot_autoenc.R
├── iot_autoenc.py
└── resources
│ ├── normal_20170202_2229.csv
│ ├── pre-fail_20170202_2234.csv
│ ├── state_0_loop_0.csv
│ ├── state_1_loop_1.csv
│ ├── verify_0.csv
│ └── verify_20170202_2243.csv
├── notebooks
└── LSTM.ipynb
└── predictions
├── .idea
└── modules
│ ├── predictions-build.iml
│ └── predictions.iml
├── build.sbt
├── project
└── assembly.sbt
└── src
└── main
├── java
├── KafkaStreamsConsumerFacade.java
├── MapRStreamsConsumerFacade.java
└── iot_dl.java
├── resources
└── dl.properties
└── scala
├── Predictor.scala
└── RingBuffer.scala
/.gitignore:
--------------------------------------------------------------------------------
1 | target/
2 | .idea/
3 |
4 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # iot-pipeline
2 |
3 | Source code used to generate the autoencoder IOT prediction model and a predictor class using that model for Big Data Analytics Tokyo 2017 conference.
--------------------------------------------------------------------------------
/model-builder/iot_autoenc.R:
--------------------------------------------------------------------------------
1 | library(h2o)
2 | # Starts a local H2O node
3 | h2o.init()
4 |
5 | #
6 | # Function which "windows" the input matrix i.e. for matrix:
7 | # 1 2 3
8 | # 4 5 6
9 | # 7 8 9
10 | # 10 11 12
11 | #
12 | # And window 3 the result would be:
13 | # 1 2 3 4 5 6 7 8 9
14 | # 4 5 6 7 8 9 10 11 12
15 | #
16 | # It appends window-1 following rows to each row creating matrix of size dim(rows-window+1, columns*window)
17 | ngram <- function(inp, window){
18 | rows <- dim(inp)[1]
19 | cols <- dim(inp)[2]
20 | resRows <- rows - window + 1
21 |
22 | res <- matrix(, nrow = resRows, ncol = window*cols)
23 | for(idx in 1:resRows) {
24 | if(window-1 > 0) {
25 | newRow <- c(inp[idx,], t(inp[(idx+1):(idx+window-1),]))
26 | } else {
27 | newRow <- inp[idx,]
28 | }
29 | if(idx %% 10000 == 0) {
30 | print(idx)
31 | }
32 | res[idx,] <- t(newRow)
33 | }
34 | return(res)
35 | }
36 |
37 | # Read training data into memory
38 | iotRaw <- read.csv("resources/normal_20170202_2229.csv")
39 |
40 | # Select training columns
41 | iot <- as.matrix(iotRaw[,c("LinAccX..g.","LinAccY..g.","LinAccZ..g.")])
42 |
43 | # Set training window and ngram
44 | window <- 200
45 | iot <- ngram(iot, window)
46 |
47 | # Send the data to H2O
48 | iot.hex <- as.h2o(iot)
49 |
50 | # Run the deeplearning model in autoencoder mode
51 | neurons <- 50
52 | iot.dl = h2o.deeplearning(model_id = "iot_dl", x = 1:(ncol(iot)), training_frame = iot.hex, autoencoder = TRUE, hidden = c(neurons), epochs = 100,
53 | l1 = 1e-5, l2 = 1e-5, max_w2 = 10, activation = "TanhWithDropout", initial_weight_distribution = "UniformAdaptive", adaptive_rate = TRUE)
54 |
55 | # Make predictions for training data
56 | iot_error <- h2o.anomaly(iot.dl, iot.hex)
57 |
58 | # Get the prediction threshold as 2*sd -> this should be found empirically on running data
59 | threshold <- sd(iot_error)*2
60 | print(nrow(iot_error[iot_error > threshold])/nrow(iot.hex))
61 |
62 | # If required check the model on anomaly data
63 | #anomalyRaw <- read.csv("resources/pre-fail_20170202_2234.csv")
64 | #anomaly <- as.matrix(anomalyRaw[,c("LinAccX..g.","LinAccY..g.","LinAccZ..g.")])
65 | #anomaly <- ngram(anomaly, window)
66 | #anomaly.hex <- as.h2o(anomaly)
67 | #anomaly_error <- h2o.anomaly(iot.dl, anomaly.hex)
68 | #print(nrow(anomaly_error[anomaly_error > threshold])/nrow(anomaly.hex))
69 |
70 | # Check the model on verification data
71 | verifyRaw <- read.csv("resources/verify_20170202_2243.csv")
72 | verify <- as.matrix(verifyRaw[,c("LinAccX..g.","LinAccY..g.","LinAccZ..g.")])
73 | verify <- ngram(verify, window)
74 | verify.hex <- as.h2o(verify)
75 | verify_error <- h2o.anomaly(iot.dl, verify.hex)
76 | print(nrow(verify_error[verify_error > threshold])/nrow(verify.hex))
77 |
78 | # Exports the H2O model as a Java class
79 | exportPojo <- function() {
80 | h2o.download_pojo(iot.dl, path="../predictions/src/main/java/")
81 | # Download also downloads a utility class which we don't use for autoencoders
82 | unlink("../predictions/src/main/java/h2o-genmodel.jar")
83 | # Write th threshold to a properties file
84 | cat("threshold=",toString(threshold),file="../predictions/src/main/resources/dl.properties",sep="",append=F)
85 | }
86 |
87 | exportPojo()
88 |
89 | errors <- which(as.matrix(verify_error) > threshold, arr.ind=T)[,1]
90 | vals <- rep(list(1),length(errors))
91 |
92 | # Plot the result of our predictions for verification data
93 | attach(mtcars)
94 | par(mfrow=c(3,1))
95 | plot(verify[-c(errors),1], col="chartreuse4", , xlab="Time", ylab="LinAccX")
96 | points(x=errors,y=verify[errors,1], col="red")
97 | plot(verify[-c(errors),2], col="chartreuse4", xlab="Time", ylab="LinAccY")
98 | points(x=errors,y=verify[errors,2], col="red")
99 | plot(verify[-c(errors),3], col="chartreuse4", xlab="Time", ylab="LinAccZ")
100 | points(x=errors,y=verify[errors,3], col="red")
--------------------------------------------------------------------------------
/model-builder/iot_autoenc.py:
--------------------------------------------------------------------------------
1 | import h2o
2 | import pandas as pd
3 |
4 | # Starts a local H2O node
5 | h2o.init()
6 |
7 | #
8 | # Function which "windows" the input matrix i.e. for matrix:
9 | # 1 2 3
10 | # 4 5 6
11 | # 7 8 9
12 | # 10 11 12
13 | #
14 | # And window 3 the result would be:
15 | # 1 2 3 4 5 6 7 8 9
16 | # 4 5 6 7 8 9 10 11 12
17 | #
18 | # It appends window-1 following rows to each row creating matrix of size dim(rows-window+1, columns*window)
19 | def ngram(inp, window):
20 | rows = inp.shape[0]
21 | cols = inp.shape[1]
22 | resRows = rows - window + 1
23 |
24 | res = DataFrame(columns=range(window*cols))
25 | for idx in range(resRows):
26 | if window-1 > 0:
27 | newRow = c(inp[idx,], t(inp[(idx+1):(idx+window-1),]))
28 | elif:
29 | newRow = inp[idx,]
30 |
31 | if idx %% 10000 == 0:
32 | print(idx)
33 | res[idx,] = t(newRow)
34 | return(res)
35 |
36 | # Read training data into memory
37 | iotRaw = pd.from_csv('resources/normal_20170202_2229.csv')
38 |
39 | # Select training columns
40 | iot = iotRaw[["LinAccX..g.","LinAccY..g.","LinAccZ..g."]]
41 |
42 | # Set training window and ngram
43 | window = 200
44 | iot = ngram(iot, window)
45 |
46 | # Send the data to H2O
47 | iot_hex = h2o.H2OFrame(iot)
48 |
49 | # Run the deeplearning model in autoencoder mode
50 | neurons = 50
51 | iot_dl = h2o.estimators.deeplearning.H2OAutoEncoderEstimator(model_id = "iot_dl",
52 | autoencoder = TRUE,
53 | hidden = c(neurons),
54 | epochs = 100,
55 | l1 = 1e-5, l2 = 1e-5, max_w2 = 10,
56 | activation = "TanhWithDropout",
57 | initial_weight_distribution = "UniformAdaptive",
58 | adaptive_rate = TRUE)
59 |
60 | iot_dl.train(x = 1:(ncol(iot)), training_frame = iot_hex)
61 |
62 | # Make predictions for training data
63 | iot_error = h2o.anomaly(iot_dl, iot_hex)
64 |
65 | # Get the prediction threshold as 2*sd -> this should be found empirically on running data
66 | threshold = numpy.std(iot_error)*2
67 | print(nrow(iot_error[iot_error > threshold])/nrow(iot.hex))
68 |
69 | # Check the model on verification data
70 | verifyRaw = read.csv("resources/verify_20170202_2243.csv")
71 | verify = as.matrix(verifyRaw[,c("LinAccX..g.","LinAccY..g.","LinAccZ..g.")])
72 | verify = ngram(verify, window)
73 | verify.hex = as.h2o(verify)
74 | verify_error = h2o.anomaly(iot.dl, verify.hex)
75 | print(nrow(verify_error[verify_error > threshold])/nrow(verify.hex))
76 |
77 | # Exports the H2O model as a Java class
78 | def exportPojo():
79 | h2o.download_pojo(iot.dl, path="../predictions/src/main/java/")
80 | # Write th threshold to a properties file
81 | print("threshold=" + toString(threshold), file='../predictions/src/main/resources/dl.properties')
82 |
83 | exportPojo()
--------------------------------------------------------------------------------
/predictions/.idea/modules/predictions-build.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
--------------------------------------------------------------------------------
/predictions/.idea/modules/predictions.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
--------------------------------------------------------------------------------
/predictions/build.sbt:
--------------------------------------------------------------------------------
1 | name := "iot-predictions"
2 | version := "1.0"
3 | scalaVersion := "2.11.8"
4 |
5 | resolvers += Resolver.bintrayRepo("cakesolutions", "maven")
6 |
7 | resolvers += "mapr" at "http://repository.mapr.com/nexus/content/repositories/releases"
8 | resolvers += "confluentio" at "http://packages.confluent.io/maven/"
9 |
10 | libraryDependencies ++= Seq(
11 | "org.apache.kafka" % "kafka-clients" % "0.9.0.0-mapr-1602-streams-5.2.0",
12 | "ai.h2o" % "h2o-genmodel" % "3.10.2.2",
13 | "org.scala-lang.modules" %% "scala-parser-combinators" % "1.0.4",
14 | "io.confluent" % "kafka-json-serializer" % "3.1.0"
15 | )
16 |
17 | assemblyMergeStrategy in assembly := {
18 | case PathList("META-INF", xs @ _*) => MergeStrategy.discard
19 | case x => MergeStrategy.first
20 | }
--------------------------------------------------------------------------------
/predictions/project/assembly.sbt:
--------------------------------------------------------------------------------
1 | addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.3")
--------------------------------------------------------------------------------
/predictions/src/main/java/KafkaStreamsConsumerFacade.java:
--------------------------------------------------------------------------------
1 | import org.apache.kafka.clients.consumer.ConsumerRecords;
2 | import org.apache.kafka.clients.consumer.OffsetAndMetadata;
3 | import org.apache.kafka.common.TopicPartition;
4 |
5 | import java.util.Map;
6 |
7 | public interface KafkaStreamsConsumerFacade extends AutoCloseable {
8 | void prepareSetup();
9 |
10 | int partitionCount();
11 |
12 | void open();
13 |
14 | void close();
15 |
16 | String topic();
17 |
18 | ConsumerRecords poll();
19 |
20 | void commit(Map commitMap);
21 | }
22 |
--------------------------------------------------------------------------------
/predictions/src/main/java/MapRStreamsConsumerFacade.java:
--------------------------------------------------------------------------------
1 | import org.apache.kafka.clients.consumer.Consumer;
2 | import org.apache.kafka.clients.consumer.ConsumerConfig;
3 | import org.apache.kafka.clients.consumer.ConsumerRecords;
4 | import org.apache.kafka.clients.consumer.KafkaConsumer;
5 | import org.apache.kafka.clients.consumer.OffsetAndMetadata;
6 | import org.apache.kafka.common.TopicPartition;
7 | import org.apache.kafka.common.errors.UnknownTopicOrPartitionException;
8 | import org.apache.kafka.common.serialization.StringDeserializer;
9 | import org.slf4j.Logger;
10 | import org.slf4j.LoggerFactory;
11 |
12 | import java.util.Collections;
13 | import java.util.LinkedHashMap;
14 | import java.util.Map;
15 | import java.util.Properties;
16 | import java.util.concurrent.atomic.AtomicBoolean;
17 | import java.util.concurrent.atomic.AtomicInteger;
18 |
19 | /**
20 | * Facade for consumer side API of MapR Streams.
21 | * Handles multi-threaded use case through reference counting with open() and close().
22 | */
23 | public class MapRStreamsConsumerFacade implements KafkaStreamsConsumerFacade {
24 | private static final Logger log = LoggerFactory.getLogger(MapRStreamsConsumerFacade.class);
25 |
26 | private static final long DEFAULT_TIMEOUT = 2_500L;
27 |
28 | private final AtomicInteger referenceCount = new AtomicInteger(0);
29 | private Consumer streamsConsumer;
30 | private String pathTopic;
31 | private AtomicBoolean initialized = new AtomicBoolean(false);
32 |
33 | public MapRStreamsConsumerFacade(String pathTopic) {
34 | this.pathTopic = pathTopic;
35 | }
36 |
37 | private Properties createDefaultProperties() {
38 | Properties props = new Properties();
39 | final String topic = pathTopic.substring(pathTopic.indexOf(':'));
40 | props.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "52.196.31.33:8082");
41 | props.setProperty(ConsumerConfig.GROUP_ID_CONFIG, "gid." + topic);
42 | props.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
43 | props.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "latest");
44 | props.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG,
45 | StringDeserializer.class.getCanonicalName());
46 | props.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,
47 | StringDeserializer.class.getCanonicalName());
48 | log.debug("Properties: {}", props);
49 |
50 | return props;
51 | }
52 |
53 | @Override
54 | public void prepareSetup() {
55 | log.debug("prepareSetup() isInitialized={}", initialized.get());
56 | if (!initialized.getAndSet(true)) {
57 | streamsConsumer = new KafkaConsumer<>(createDefaultProperties());
58 | streamsConsumer.subscribe(Collections.singletonList(pathTopic));
59 |
60 | initializeCommitOffset();
61 | log.debug("prepareSetup() initialization complete!");
62 | }
63 | }
64 |
65 | private void initializeCommitOffset() {
66 | Map commitMap = new LinkedHashMap<>();
67 |
68 | for (TopicPartition topicPartition : streamsConsumer.assignment()) {
69 | try {
70 | streamsConsumer.committed(topicPartition);
71 | } catch (UnknownTopicOrPartitionException e) {
72 | commitMap.put(topicPartition, new OffsetAndMetadata(1));
73 | }
74 | }
75 | log.debug("initializeCommitOffset() commitMap.size(): {}", commitMap.size());
76 |
77 | if (commitMap.size() > 0) {
78 | streamsConsumer.commitSync(commitMap);
79 | }
80 | }
81 |
82 | @Override
83 | public int partitionCount() {
84 | return streamsConsumer.partitionsFor(pathTopic).size();
85 | }
86 |
87 | @Override
88 | public void open() {
89 | log.debug("open() refs= {}", referenceCount.incrementAndGet());
90 | }
91 |
92 | @Override
93 | public void close() {
94 | if (0 >= referenceCount.decrementAndGet()) {
95 | streamsConsumer.close();
96 | }
97 | log.debug("close() refs= {}", referenceCount.get());
98 | }
99 |
100 | @Override
101 | public String topic() {
102 | return pathTopic;
103 | }
104 |
105 | @Override
106 | @SuppressWarnings("unchecked")
107 | public ConsumerRecords poll() {
108 | return streamsConsumer.poll(DEFAULT_TIMEOUT);
109 | }
110 |
111 | @Override
112 | public void commit(Map commitMap) {
113 | streamsConsumer.commitSync(commitMap);
114 | }
115 |
116 | // for testing purposes ONLY!
117 | void setStreamsConsumer(Consumer streamsConsumer) {
118 | this.streamsConsumer = streamsConsumer;
119 | }
120 | }
121 |
--------------------------------------------------------------------------------
/predictions/src/main/resources/dl.properties:
--------------------------------------------------------------------------------
1 | threshold=0.0158264307898104
--------------------------------------------------------------------------------
/predictions/src/main/scala/Predictor.scala:
--------------------------------------------------------------------------------
1 | /**
2 | * Usage:
3 | *
4 | * 1) Build from the predictions folder with `sbt assembly`
5 | * 2) Run (on the server with MapR CLI set up) with
6 | * java -jar iot-predictions-assembly-1.0.jar [threshold=0.003|failureRate=0.005|features=test1,test2,test3|timer=100|predictionCacheSize=1000]
7 | */
8 |
9 | import java.util.Properties
10 |
11 | import hex.genmodel.GenModel
12 | import io.confluent.kafka.serializers.KafkaJsonSerializer
13 | import org.apache.kafka.clients.consumer.{ConsumerRecord, ConsumerRecords, OffsetAndMetadata}
14 | import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
15 | import org.apache.kafka.common.TopicPartition
16 |
17 | import scala.collection.mutable
18 | import scala.io.Source
19 |
20 | object Predictor {
21 |
22 | // H2O generated POJO model name
23 | val modelClassName = "iot_dl"
24 |
25 | // Queues to read our data from/write our predictions to
26 | val sensorTopic = "/streams/sensor:sensor1"
27 | val predictionTopic = "/streams/sensor:sensor-state-test"
28 | // Kafka producer
29 | val producer: KafkaProducer[String, Int] = makeProducer()
30 |
31 | // Current state of the machine
32 | var state = 0
33 |
34 | // Prediction configurations
35 | // Prediction window size, has to be the same as the window size used to model training
36 | var window: Int = 200
37 | // How long are persisting predictions
38 | var predictionCacheSize: Int = window*5
39 | // How many failure predictions do we consider as an actual failure.
40 | var failureRate: Double = 5.0/predictionCacheSize.toDouble
41 |
42 | // Feature names
43 | var headers = Array("LinAccX..g.","LinAccY..g.","LinAccZ..g.")
44 | // Number of features per observation
45 | var features: Int = headers.length
46 | // How often we send status updates to the system
47 | var timer: Double = window * 10
48 |
49 | var threshold: Double = {
50 | val props = new Properties()
51 | props.load(Source.fromURL(getClass.getResource("/dl.properties")).bufferedReader())
52 | val t = props.get("threshold").toString.toDouble
53 | println(s"Setting threshold to $t")
54 | t
55 | }
56 |
57 | // Create a Kafka producer
58 | private def makeProducer() = {
59 | val props = new Properties()
60 |
61 | props.put("key.serializer", classOf[KafkaJsonSerializer[String]].getCanonicalName)
62 | props.put("value.serializer", classOf[KafkaJsonSerializer[String]].getCanonicalName)
63 |
64 | new KafkaProducer[String, Int](props)
65 | }
66 |
67 | def main(args: Array[String]): Unit = {
68 | // Parse command line config
69 | for(arg <- args) {
70 | val kv = arg.split("=")
71 | if(kv(0) == "threshold") {
72 | print(s"Setting threshold to ${kv(1).toDouble} from the command line.")
73 | threshold = kv(1).toDouble
74 | } else if(kv(0) == "features") {
75 | print(s"Setting headers to [${kv(1)}]")
76 | headers = kv(1).split(",")
77 | features = headers.length
78 | } else if(kv(0) == "failureRate") {
79 | print(s"Setting failureRate to [${kv(1)}]")
80 | failureRate = kv(1).toDouble
81 | } else if(kv(0) == "timer") {
82 | print(s"Setting timer to [${kv(1)}]")
83 | timer = kv(1).toDouble
84 | } else if(kv(0) == "predictionCacheSize") {
85 | print(s"Setting predictionCacheSize to [${kv(1)}]")
86 | predictionCacheSize = kv(1).toInt
87 | failureRate = 1.0/(5.0*predictionCacheSize.toDouble)
88 | }
89 | }
90 |
91 | // Sender sending data to the Kafka queue
92 | new Thread() {
93 | override def run(): Unit = {
94 | while(true) {
95 | Thread.sleep(timer.toLong)
96 | pushPrediction(state)
97 | }
98 | }
99 | }.start()
100 |
101 | // Kafka consumer reading sensor data from the queue
102 | val consumer = new MapRStreamsConsumerFacade(sensorTopic)
103 | try {
104 | consumer.prepareSetup()
105 | println("Prepared")
106 | consumer.open()
107 | println("Opened")
108 | poll(consumer)
109 | } finally {
110 | producer.close()
111 | consumer.close()
112 | }
113 | }
114 |
115 | private def pushPrediction(label: Int) = {
116 | println(s"Pushing prediction $label")
117 |
118 | producer.send(
119 | new ProducerRecord[String, Int](
120 | predictionTopic,
121 | "state",
122 | label
123 | )
124 | )
125 | }
126 |
127 | import scala.collection.JavaConversions._
128 |
129 | def poll(consumer: MapRStreamsConsumerFacade): Unit = {
130 | val fullWindowFeatures = features * window
131 |
132 | // Data used for predictions
133 | val inputRB: RingBuffer[Double] = new RingBuffer(fullWindowFeatures)
134 |
135 | // Model generated by H2O
136 | val rawModel: GenModel = Class.forName(modelClassName).newInstance().asInstanceOf[GenModel]
137 |
138 | // Previous predictions
139 | val rb: RingBuffer[Int] = new RingBuffer(predictionCacheSize)
140 | while(true) {
141 | val commitMap = new mutable.LinkedHashMap[TopicPartition, OffsetAndMetadata]()
142 |
143 | val records: ConsumerRecords[String, String] = consumer.poll()
144 | println("Polled " + records.count())
145 |
146 | for(record: ConsumerRecord[String, String] <- records) {
147 | val split = record.value().replaceAll("\"", "").split(",")
148 | if(split.length >= features) {
149 | val input = split.takeRight(features).map(_.toDouble)
150 | for(i <- input) {
151 | inputRB.+=(i)
152 | }
153 |
154 | // If we have enough readings we can start predicting, we need to wait until we have WINDOW number of readings
155 | if(inputRB.length == fullWindowFeatures) {
156 | val preds = Array.fill[Double](fullWindowFeatures){0}
157 | // Making a prediction
158 | val pred = rawModel.score0(inputRB.toArray, preds)
159 |
160 | // Calculating the mean squared error of our prediction
161 | val rmse = inputRB.zip(pred).map { case (i, p) => (p - i) * (p - i) }.sum / (fullWindowFeatures).toDouble
162 |
163 | // If our RMSE if big enough we classify it as a failure 1, otherwise 0
164 | val label = if (rmse > threshold) 1 else 0
165 |
166 | rb.+=(label)
167 |
168 | // If failure rate % of predictions in our cache are failures - set the state to failed
169 | if ((rb.sum.toDouble / rb.length.toDouble) >= failureRate) {
170 | state = 1
171 | } else {
172 | state = 0
173 | }
174 | }
175 | }
176 | }
177 |
178 | if (commitMap.nonEmpty) {
179 | // Notify the Kafka consumer we got the data
180 | consumer.commit(commitMap.toMap[TopicPartition, OffsetAndMetadata])
181 | }
182 | }
183 | }
184 |
185 | }
186 |
--------------------------------------------------------------------------------
/predictions/src/main/scala/RingBuffer.scala:
--------------------------------------------------------------------------------
1 | import scala.collection.mutable._
2 | import scala.collection.generic._
3 |
4 | class RingBuffer[A](m: Int) extends Buffer[A] with GenericTraversableTemplate[A, RingBuffer] with BufferLike[A, RingBuffer[A]] with Builder[A, List[A]] {
5 | private val buf = new ListBuffer[A]
6 |
7 | private def resize(): Unit = while (buf.size > m) buf.remove(0)
8 |
9 | def length = buf.length
10 | override def apply(n: Int): A = buf.apply(n)
11 | def update(n: Int, x: A) = buf.update(n, x)
12 | def +=(x: A): this.type = { buf.+=(x); resize(); this }
13 | def clear() = buf.clear();
14 | def +=:(x: A): this.type = { buf.+=:(x); resize(); this }
15 | def insertAll(n: Int, seq: scala.collection.Traversable[A]): Unit = buf.insertAll(n, seq)
16 | override def remove(n: Int, count: Int) = buf.remove(n, count)
17 | def result(): List[A] = buf.result()
18 | override def toList: List[A] = buf.toList
19 | def prependToList(xs: List[A]): List[A] = buf.prependToList(xs)
20 | def remove(n: Int): A = buf.remove(n)
21 | override def -=(elem: A): this.type = { buf.-=(elem); this }
22 | override def iterator = buf.iterator
23 | // override def readOnly: List[A] = buf.readOnly
24 | override def equals(that: Any): Boolean = buf.equals(that)
25 | override def clone(): RingBuffer[A] = new RingBuffer(m) ++= this
26 | override def stringPrefix: String = "RingBuffer"
27 | override def companion: GenericCompanion[RingBuffer] = RingBuffer
28 | }
29 |
30 | object RingBuffer extends SeqFactory[RingBuffer] {
31 | implicit def canBuildFrom[A]: CanBuildFrom[Coll, A, RingBuffer[A]] = new GenericCanBuildFrom[A]
32 | def newBuilder[A]: Builder[A, RingBuffer[A]] = new GrowingBuilder(new RingBuffer[A](Int.MaxValue))
33 | }
34 |
--------------------------------------------------------------------------------