├── .gitignore ├── README.md ├── model-builder ├── iot_autoenc.R ├── iot_autoenc.py └── resources │ ├── normal_20170202_2229.csv │ ├── pre-fail_20170202_2234.csv │ ├── state_0_loop_0.csv │ ├── state_1_loop_1.csv │ ├── verify_0.csv │ └── verify_20170202_2243.csv ├── notebooks └── LSTM.ipynb └── predictions ├── .idea └── modules │ ├── predictions-build.iml │ └── predictions.iml ├── build.sbt ├── project └── assembly.sbt └── src └── main ├── java ├── KafkaStreamsConsumerFacade.java ├── MapRStreamsConsumerFacade.java └── iot_dl.java ├── resources └── dl.properties └── scala ├── Predictor.scala └── RingBuffer.scala /.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | .idea/ 3 | 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # iot-pipeline 2 | 3 | Source code used to generate the autoencoder IOT prediction model and a predictor class using that model for Big Data Analytics Tokyo 2017 conference. -------------------------------------------------------------------------------- /model-builder/iot_autoenc.R: -------------------------------------------------------------------------------- 1 | library(h2o) 2 | # Starts a local H2O node 3 | h2o.init() 4 | 5 | # 6 | # Function which "windows" the input matrix i.e. for matrix: 7 | # 1 2 3 8 | # 4 5 6 9 | # 7 8 9 10 | # 10 11 12 11 | # 12 | # And window 3 the result would be: 13 | # 1 2 3 4 5 6 7 8 9 14 | # 4 5 6 7 8 9 10 11 12 15 | # 16 | # It appends window-1 following rows to each row creating matrix of size dim(rows-window+1, columns*window) 17 | ngram <- function(inp, window){ 18 | rows <- dim(inp)[1] 19 | cols <- dim(inp)[2] 20 | resRows <- rows - window + 1 21 | 22 | res <- matrix(, nrow = resRows, ncol = window*cols) 23 | for(idx in 1:resRows) { 24 | if(window-1 > 0) { 25 | newRow <- c(inp[idx,], t(inp[(idx+1):(idx+window-1),])) 26 | } else { 27 | newRow <- inp[idx,] 28 | } 29 | if(idx %% 10000 == 0) { 30 | print(idx) 31 | } 32 | res[idx,] <- t(newRow) 33 | } 34 | return(res) 35 | } 36 | 37 | # Read training data into memory 38 | iotRaw <- read.csv("resources/normal_20170202_2229.csv") 39 | 40 | # Select training columns 41 | iot <- as.matrix(iotRaw[,c("LinAccX..g.","LinAccY..g.","LinAccZ..g.")]) 42 | 43 | # Set training window and ngram 44 | window <- 200 45 | iot <- ngram(iot, window) 46 | 47 | # Send the data to H2O 48 | iot.hex <- as.h2o(iot) 49 | 50 | # Run the deeplearning model in autoencoder mode 51 | neurons <- 50 52 | iot.dl = h2o.deeplearning(model_id = "iot_dl", x = 1:(ncol(iot)), training_frame = iot.hex, autoencoder = TRUE, hidden = c(neurons), epochs = 100, 53 | l1 = 1e-5, l2 = 1e-5, max_w2 = 10, activation = "TanhWithDropout", initial_weight_distribution = "UniformAdaptive", adaptive_rate = TRUE) 54 | 55 | # Make predictions for training data 56 | iot_error <- h2o.anomaly(iot.dl, iot.hex) 57 | 58 | # Get the prediction threshold as 2*sd -> this should be found empirically on running data 59 | threshold <- sd(iot_error)*2 60 | print(nrow(iot_error[iot_error > threshold])/nrow(iot.hex)) 61 | 62 | # If required check the model on anomaly data 63 | #anomalyRaw <- read.csv("resources/pre-fail_20170202_2234.csv") 64 | #anomaly <- as.matrix(anomalyRaw[,c("LinAccX..g.","LinAccY..g.","LinAccZ..g.")]) 65 | #anomaly <- ngram(anomaly, window) 66 | #anomaly.hex <- as.h2o(anomaly) 67 | #anomaly_error <- h2o.anomaly(iot.dl, anomaly.hex) 68 | #print(nrow(anomaly_error[anomaly_error > threshold])/nrow(anomaly.hex)) 69 | 70 | # Check the model on verification data 71 | verifyRaw <- read.csv("resources/verify_20170202_2243.csv") 72 | verify <- as.matrix(verifyRaw[,c("LinAccX..g.","LinAccY..g.","LinAccZ..g.")]) 73 | verify <- ngram(verify, window) 74 | verify.hex <- as.h2o(verify) 75 | verify_error <- h2o.anomaly(iot.dl, verify.hex) 76 | print(nrow(verify_error[verify_error > threshold])/nrow(verify.hex)) 77 | 78 | # Exports the H2O model as a Java class 79 | exportPojo <- function() { 80 | h2o.download_pojo(iot.dl, path="../predictions/src/main/java/") 81 | # Download also downloads a utility class which we don't use for autoencoders 82 | unlink("../predictions/src/main/java/h2o-genmodel.jar") 83 | # Write th threshold to a properties file 84 | cat("threshold=",toString(threshold),file="../predictions/src/main/resources/dl.properties",sep="",append=F) 85 | } 86 | 87 | exportPojo() 88 | 89 | errors <- which(as.matrix(verify_error) > threshold, arr.ind=T)[,1] 90 | vals <- rep(list(1),length(errors)) 91 | 92 | # Plot the result of our predictions for verification data 93 | attach(mtcars) 94 | par(mfrow=c(3,1)) 95 | plot(verify[-c(errors),1], col="chartreuse4", , xlab="Time", ylab="LinAccX") 96 | points(x=errors,y=verify[errors,1], col="red") 97 | plot(verify[-c(errors),2], col="chartreuse4", xlab="Time", ylab="LinAccY") 98 | points(x=errors,y=verify[errors,2], col="red") 99 | plot(verify[-c(errors),3], col="chartreuse4", xlab="Time", ylab="LinAccZ") 100 | points(x=errors,y=verify[errors,3], col="red") -------------------------------------------------------------------------------- /model-builder/iot_autoenc.py: -------------------------------------------------------------------------------- 1 | import h2o 2 | import pandas as pd 3 | 4 | # Starts a local H2O node 5 | h2o.init() 6 | 7 | # 8 | # Function which "windows" the input matrix i.e. for matrix: 9 | # 1 2 3 10 | # 4 5 6 11 | # 7 8 9 12 | # 10 11 12 13 | # 14 | # And window 3 the result would be: 15 | # 1 2 3 4 5 6 7 8 9 16 | # 4 5 6 7 8 9 10 11 12 17 | # 18 | # It appends window-1 following rows to each row creating matrix of size dim(rows-window+1, columns*window) 19 | def ngram(inp, window): 20 | rows = inp.shape[0] 21 | cols = inp.shape[1] 22 | resRows = rows - window + 1 23 | 24 | res = DataFrame(columns=range(window*cols)) 25 | for idx in range(resRows): 26 | if window-1 > 0: 27 | newRow = c(inp[idx,], t(inp[(idx+1):(idx+window-1),])) 28 | elif: 29 | newRow = inp[idx,] 30 | 31 | if idx %% 10000 == 0: 32 | print(idx) 33 | res[idx,] = t(newRow) 34 | return(res) 35 | 36 | # Read training data into memory 37 | iotRaw = pd.from_csv('resources/normal_20170202_2229.csv') 38 | 39 | # Select training columns 40 | iot = iotRaw[["LinAccX..g.","LinAccY..g.","LinAccZ..g."]] 41 | 42 | # Set training window and ngram 43 | window = 200 44 | iot = ngram(iot, window) 45 | 46 | # Send the data to H2O 47 | iot_hex = h2o.H2OFrame(iot) 48 | 49 | # Run the deeplearning model in autoencoder mode 50 | neurons = 50 51 | iot_dl = h2o.estimators.deeplearning.H2OAutoEncoderEstimator(model_id = "iot_dl", 52 | autoencoder = TRUE, 53 | hidden = c(neurons), 54 | epochs = 100, 55 | l1 = 1e-5, l2 = 1e-5, max_w2 = 10, 56 | activation = "TanhWithDropout", 57 | initial_weight_distribution = "UniformAdaptive", 58 | adaptive_rate = TRUE) 59 | 60 | iot_dl.train(x = 1:(ncol(iot)), training_frame = iot_hex) 61 | 62 | # Make predictions for training data 63 | iot_error = h2o.anomaly(iot_dl, iot_hex) 64 | 65 | # Get the prediction threshold as 2*sd -> this should be found empirically on running data 66 | threshold = numpy.std(iot_error)*2 67 | print(nrow(iot_error[iot_error > threshold])/nrow(iot.hex)) 68 | 69 | # Check the model on verification data 70 | verifyRaw = read.csv("resources/verify_20170202_2243.csv") 71 | verify = as.matrix(verifyRaw[,c("LinAccX..g.","LinAccY..g.","LinAccZ..g.")]) 72 | verify = ngram(verify, window) 73 | verify.hex = as.h2o(verify) 74 | verify_error = h2o.anomaly(iot.dl, verify.hex) 75 | print(nrow(verify_error[verify_error > threshold])/nrow(verify.hex)) 76 | 77 | # Exports the H2O model as a Java class 78 | def exportPojo(): 79 | h2o.download_pojo(iot.dl, path="../predictions/src/main/java/") 80 | # Write th threshold to a properties file 81 | print("threshold=" + toString(threshold), file='../predictions/src/main/resources/dl.properties') 82 | 83 | exportPojo() -------------------------------------------------------------------------------- /predictions/.idea/modules/predictions-build.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | -------------------------------------------------------------------------------- /predictions/.idea/modules/predictions.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | -------------------------------------------------------------------------------- /predictions/build.sbt: -------------------------------------------------------------------------------- 1 | name := "iot-predictions" 2 | version := "1.0" 3 | scalaVersion := "2.11.8" 4 | 5 | resolvers += Resolver.bintrayRepo("cakesolutions", "maven") 6 | 7 | resolvers += "mapr" at "http://repository.mapr.com/nexus/content/repositories/releases" 8 | resolvers += "confluentio" at "http://packages.confluent.io/maven/" 9 | 10 | libraryDependencies ++= Seq( 11 | "org.apache.kafka" % "kafka-clients" % "0.9.0.0-mapr-1602-streams-5.2.0", 12 | "ai.h2o" % "h2o-genmodel" % "3.10.2.2", 13 | "org.scala-lang.modules" %% "scala-parser-combinators" % "1.0.4", 14 | "io.confluent" % "kafka-json-serializer" % "3.1.0" 15 | ) 16 | 17 | assemblyMergeStrategy in assembly := { 18 | case PathList("META-INF", xs @ _*) => MergeStrategy.discard 19 | case x => MergeStrategy.first 20 | } -------------------------------------------------------------------------------- /predictions/project/assembly.sbt: -------------------------------------------------------------------------------- 1 | addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.3") -------------------------------------------------------------------------------- /predictions/src/main/java/KafkaStreamsConsumerFacade.java: -------------------------------------------------------------------------------- 1 | import org.apache.kafka.clients.consumer.ConsumerRecords; 2 | import org.apache.kafka.clients.consumer.OffsetAndMetadata; 3 | import org.apache.kafka.common.TopicPartition; 4 | 5 | import java.util.Map; 6 | 7 | public interface KafkaStreamsConsumerFacade extends AutoCloseable { 8 | void prepareSetup(); 9 | 10 | int partitionCount(); 11 | 12 | void open(); 13 | 14 | void close(); 15 | 16 | String topic(); 17 | 18 | ConsumerRecords poll(); 19 | 20 | void commit(Map commitMap); 21 | } 22 | -------------------------------------------------------------------------------- /predictions/src/main/java/MapRStreamsConsumerFacade.java: -------------------------------------------------------------------------------- 1 | import org.apache.kafka.clients.consumer.Consumer; 2 | import org.apache.kafka.clients.consumer.ConsumerConfig; 3 | import org.apache.kafka.clients.consumer.ConsumerRecords; 4 | import org.apache.kafka.clients.consumer.KafkaConsumer; 5 | import org.apache.kafka.clients.consumer.OffsetAndMetadata; 6 | import org.apache.kafka.common.TopicPartition; 7 | import org.apache.kafka.common.errors.UnknownTopicOrPartitionException; 8 | import org.apache.kafka.common.serialization.StringDeserializer; 9 | import org.slf4j.Logger; 10 | import org.slf4j.LoggerFactory; 11 | 12 | import java.util.Collections; 13 | import java.util.LinkedHashMap; 14 | import java.util.Map; 15 | import java.util.Properties; 16 | import java.util.concurrent.atomic.AtomicBoolean; 17 | import java.util.concurrent.atomic.AtomicInteger; 18 | 19 | /** 20 | * Facade for consumer side API of MapR Streams. 21 | * Handles multi-threaded use case through reference counting with open() and close(). 22 | */ 23 | public class MapRStreamsConsumerFacade implements KafkaStreamsConsumerFacade { 24 | private static final Logger log = LoggerFactory.getLogger(MapRStreamsConsumerFacade.class); 25 | 26 | private static final long DEFAULT_TIMEOUT = 2_500L; 27 | 28 | private final AtomicInteger referenceCount = new AtomicInteger(0); 29 | private Consumer streamsConsumer; 30 | private String pathTopic; 31 | private AtomicBoolean initialized = new AtomicBoolean(false); 32 | 33 | public MapRStreamsConsumerFacade(String pathTopic) { 34 | this.pathTopic = pathTopic; 35 | } 36 | 37 | private Properties createDefaultProperties() { 38 | Properties props = new Properties(); 39 | final String topic = pathTopic.substring(pathTopic.indexOf(':')); 40 | props.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "52.196.31.33:8082"); 41 | props.setProperty(ConsumerConfig.GROUP_ID_CONFIG, "gid." + topic); 42 | props.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false"); 43 | props.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "latest"); 44 | props.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, 45 | StringDeserializer.class.getCanonicalName()); 46 | props.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, 47 | StringDeserializer.class.getCanonicalName()); 48 | log.debug("Properties: {}", props); 49 | 50 | return props; 51 | } 52 | 53 | @Override 54 | public void prepareSetup() { 55 | log.debug("prepareSetup() isInitialized={}", initialized.get()); 56 | if (!initialized.getAndSet(true)) { 57 | streamsConsumer = new KafkaConsumer<>(createDefaultProperties()); 58 | streamsConsumer.subscribe(Collections.singletonList(pathTopic)); 59 | 60 | initializeCommitOffset(); 61 | log.debug("prepareSetup() initialization complete!"); 62 | } 63 | } 64 | 65 | private void initializeCommitOffset() { 66 | Map commitMap = new LinkedHashMap<>(); 67 | 68 | for (TopicPartition topicPartition : streamsConsumer.assignment()) { 69 | try { 70 | streamsConsumer.committed(topicPartition); 71 | } catch (UnknownTopicOrPartitionException e) { 72 | commitMap.put(topicPartition, new OffsetAndMetadata(1)); 73 | } 74 | } 75 | log.debug("initializeCommitOffset() commitMap.size(): {}", commitMap.size()); 76 | 77 | if (commitMap.size() > 0) { 78 | streamsConsumer.commitSync(commitMap); 79 | } 80 | } 81 | 82 | @Override 83 | public int partitionCount() { 84 | return streamsConsumer.partitionsFor(pathTopic).size(); 85 | } 86 | 87 | @Override 88 | public void open() { 89 | log.debug("open() refs= {}", referenceCount.incrementAndGet()); 90 | } 91 | 92 | @Override 93 | public void close() { 94 | if (0 >= referenceCount.decrementAndGet()) { 95 | streamsConsumer.close(); 96 | } 97 | log.debug("close() refs= {}", referenceCount.get()); 98 | } 99 | 100 | @Override 101 | public String topic() { 102 | return pathTopic; 103 | } 104 | 105 | @Override 106 | @SuppressWarnings("unchecked") 107 | public ConsumerRecords poll() { 108 | return streamsConsumer.poll(DEFAULT_TIMEOUT); 109 | } 110 | 111 | @Override 112 | public void commit(Map commitMap) { 113 | streamsConsumer.commitSync(commitMap); 114 | } 115 | 116 | // for testing purposes ONLY! 117 | void setStreamsConsumer(Consumer streamsConsumer) { 118 | this.streamsConsumer = streamsConsumer; 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /predictions/src/main/resources/dl.properties: -------------------------------------------------------------------------------- 1 | threshold=0.0158264307898104 -------------------------------------------------------------------------------- /predictions/src/main/scala/Predictor.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Usage: 3 | * 4 | * 1) Build from the predictions folder with `sbt assembly` 5 | * 2) Run (on the server with MapR CLI set up) with 6 | * java -jar iot-predictions-assembly-1.0.jar [threshold=0.003|failureRate=0.005|features=test1,test2,test3|timer=100|predictionCacheSize=1000] 7 | */ 8 | 9 | import java.util.Properties 10 | 11 | import hex.genmodel.GenModel 12 | import io.confluent.kafka.serializers.KafkaJsonSerializer 13 | import org.apache.kafka.clients.consumer.{ConsumerRecord, ConsumerRecords, OffsetAndMetadata} 14 | import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} 15 | import org.apache.kafka.common.TopicPartition 16 | 17 | import scala.collection.mutable 18 | import scala.io.Source 19 | 20 | object Predictor { 21 | 22 | // H2O generated POJO model name 23 | val modelClassName = "iot_dl" 24 | 25 | // Queues to read our data from/write our predictions to 26 | val sensorTopic = "/streams/sensor:sensor1" 27 | val predictionTopic = "/streams/sensor:sensor-state-test" 28 | // Kafka producer 29 | val producer: KafkaProducer[String, Int] = makeProducer() 30 | 31 | // Current state of the machine 32 | var state = 0 33 | 34 | // Prediction configurations 35 | // Prediction window size, has to be the same as the window size used to model training 36 | var window: Int = 200 37 | // How long are persisting predictions 38 | var predictionCacheSize: Int = window*5 39 | // How many failure predictions do we consider as an actual failure. 40 | var failureRate: Double = 5.0/predictionCacheSize.toDouble 41 | 42 | // Feature names 43 | var headers = Array("LinAccX..g.","LinAccY..g.","LinAccZ..g.") 44 | // Number of features per observation 45 | var features: Int = headers.length 46 | // How often we send status updates to the system 47 | var timer: Double = window * 10 48 | 49 | var threshold: Double = { 50 | val props = new Properties() 51 | props.load(Source.fromURL(getClass.getResource("/dl.properties")).bufferedReader()) 52 | val t = props.get("threshold").toString.toDouble 53 | println(s"Setting threshold to $t") 54 | t 55 | } 56 | 57 | // Create a Kafka producer 58 | private def makeProducer() = { 59 | val props = new Properties() 60 | 61 | props.put("key.serializer", classOf[KafkaJsonSerializer[String]].getCanonicalName) 62 | props.put("value.serializer", classOf[KafkaJsonSerializer[String]].getCanonicalName) 63 | 64 | new KafkaProducer[String, Int](props) 65 | } 66 | 67 | def main(args: Array[String]): Unit = { 68 | // Parse command line config 69 | for(arg <- args) { 70 | val kv = arg.split("=") 71 | if(kv(0) == "threshold") { 72 | print(s"Setting threshold to ${kv(1).toDouble} from the command line.") 73 | threshold = kv(1).toDouble 74 | } else if(kv(0) == "features") { 75 | print(s"Setting headers to [${kv(1)}]") 76 | headers = kv(1).split(",") 77 | features = headers.length 78 | } else if(kv(0) == "failureRate") { 79 | print(s"Setting failureRate to [${kv(1)}]") 80 | failureRate = kv(1).toDouble 81 | } else if(kv(0) == "timer") { 82 | print(s"Setting timer to [${kv(1)}]") 83 | timer = kv(1).toDouble 84 | } else if(kv(0) == "predictionCacheSize") { 85 | print(s"Setting predictionCacheSize to [${kv(1)}]") 86 | predictionCacheSize = kv(1).toInt 87 | failureRate = 1.0/(5.0*predictionCacheSize.toDouble) 88 | } 89 | } 90 | 91 | // Sender sending data to the Kafka queue 92 | new Thread() { 93 | override def run(): Unit = { 94 | while(true) { 95 | Thread.sleep(timer.toLong) 96 | pushPrediction(state) 97 | } 98 | } 99 | }.start() 100 | 101 | // Kafka consumer reading sensor data from the queue 102 | val consumer = new MapRStreamsConsumerFacade(sensorTopic) 103 | try { 104 | consumer.prepareSetup() 105 | println("Prepared") 106 | consumer.open() 107 | println("Opened") 108 | poll(consumer) 109 | } finally { 110 | producer.close() 111 | consumer.close() 112 | } 113 | } 114 | 115 | private def pushPrediction(label: Int) = { 116 | println(s"Pushing prediction $label") 117 | 118 | producer.send( 119 | new ProducerRecord[String, Int]( 120 | predictionTopic, 121 | "state", 122 | label 123 | ) 124 | ) 125 | } 126 | 127 | import scala.collection.JavaConversions._ 128 | 129 | def poll(consumer: MapRStreamsConsumerFacade): Unit = { 130 | val fullWindowFeatures = features * window 131 | 132 | // Data used for predictions 133 | val inputRB: RingBuffer[Double] = new RingBuffer(fullWindowFeatures) 134 | 135 | // Model generated by H2O 136 | val rawModel: GenModel = Class.forName(modelClassName).newInstance().asInstanceOf[GenModel] 137 | 138 | // Previous predictions 139 | val rb: RingBuffer[Int] = new RingBuffer(predictionCacheSize) 140 | while(true) { 141 | val commitMap = new mutable.LinkedHashMap[TopicPartition, OffsetAndMetadata]() 142 | 143 | val records: ConsumerRecords[String, String] = consumer.poll() 144 | println("Polled " + records.count()) 145 | 146 | for(record: ConsumerRecord[String, String] <- records) { 147 | val split = record.value().replaceAll("\"", "").split(",") 148 | if(split.length >= features) { 149 | val input = split.takeRight(features).map(_.toDouble) 150 | for(i <- input) { 151 | inputRB.+=(i) 152 | } 153 | 154 | // If we have enough readings we can start predicting, we need to wait until we have WINDOW number of readings 155 | if(inputRB.length == fullWindowFeatures) { 156 | val preds = Array.fill[Double](fullWindowFeatures){0} 157 | // Making a prediction 158 | val pred = rawModel.score0(inputRB.toArray, preds) 159 | 160 | // Calculating the mean squared error of our prediction 161 | val rmse = inputRB.zip(pred).map { case (i, p) => (p - i) * (p - i) }.sum / (fullWindowFeatures).toDouble 162 | 163 | // If our RMSE if big enough we classify it as a failure 1, otherwise 0 164 | val label = if (rmse > threshold) 1 else 0 165 | 166 | rb.+=(label) 167 | 168 | // If failure rate % of predictions in our cache are failures - set the state to failed 169 | if ((rb.sum.toDouble / rb.length.toDouble) >= failureRate) { 170 | state = 1 171 | } else { 172 | state = 0 173 | } 174 | } 175 | } 176 | } 177 | 178 | if (commitMap.nonEmpty) { 179 | // Notify the Kafka consumer we got the data 180 | consumer.commit(commitMap.toMap[TopicPartition, OffsetAndMetadata]) 181 | } 182 | } 183 | } 184 | 185 | } 186 | -------------------------------------------------------------------------------- /predictions/src/main/scala/RingBuffer.scala: -------------------------------------------------------------------------------- 1 | import scala.collection.mutable._ 2 | import scala.collection.generic._ 3 | 4 | class RingBuffer[A](m: Int) extends Buffer[A] with GenericTraversableTemplate[A, RingBuffer] with BufferLike[A, RingBuffer[A]] with Builder[A, List[A]] { 5 | private val buf = new ListBuffer[A] 6 | 7 | private def resize(): Unit = while (buf.size > m) buf.remove(0) 8 | 9 | def length = buf.length 10 | override def apply(n: Int): A = buf.apply(n) 11 | def update(n: Int, x: A) = buf.update(n, x) 12 | def +=(x: A): this.type = { buf.+=(x); resize(); this } 13 | def clear() = buf.clear(); 14 | def +=:(x: A): this.type = { buf.+=:(x); resize(); this } 15 | def insertAll(n: Int, seq: scala.collection.Traversable[A]): Unit = buf.insertAll(n, seq) 16 | override def remove(n: Int, count: Int) = buf.remove(n, count) 17 | def result(): List[A] = buf.result() 18 | override def toList: List[A] = buf.toList 19 | def prependToList(xs: List[A]): List[A] = buf.prependToList(xs) 20 | def remove(n: Int): A = buf.remove(n) 21 | override def -=(elem: A): this.type = { buf.-=(elem); this } 22 | override def iterator = buf.iterator 23 | // override def readOnly: List[A] = buf.readOnly 24 | override def equals(that: Any): Boolean = buf.equals(that) 25 | override def clone(): RingBuffer[A] = new RingBuffer(m) ++= this 26 | override def stringPrefix: String = "RingBuffer" 27 | override def companion: GenericCompanion[RingBuffer] = RingBuffer 28 | } 29 | 30 | object RingBuffer extends SeqFactory[RingBuffer] { 31 | implicit def canBuildFrom[A]: CanBuildFrom[Coll, A, RingBuffer[A]] = new GenericCanBuildFrom[A] 32 | def newBuilder[A]: Builder[A, RingBuffer[A]] = new GrowingBuilder(new RingBuffer[A](Int.MaxValue)) 33 | } 34 | --------------------------------------------------------------------------------