├── .gitignore ├── LICENSE ├── dl4j-deeplearning-iris ├── pom.xml ├── readme.md └── src │ ├── main │ ├── java │ │ └── com │ │ │ └── github │ │ │ └── megachucky │ │ │ └── kafka │ │ │ └── streams │ │ │ └── machinelearning │ │ │ └── models │ │ │ ├── DeepLearning4J_CSV_Iris_Model.java │ │ │ └── DeepLearning4J_CSV_Model_Inference.java │ └── resources │ │ ├── DL4J_Resources │ │ └── iris.txt │ │ ├── generatedModels │ │ └── DL4J │ │ │ └── DL4J_Iris_Model.zip │ │ └── log4j.properties │ └── test │ └── java │ └── com │ └── github │ ├── jukkakarvanen │ └── kafka │ │ └── streams │ │ └── integration │ │ └── utils │ │ ├── TestEmbeddedKafkaCluster.java │ │ └── TestKafkaStreams.java │ └── megachucky │ └── kafka │ └── streams │ └── machinelearning │ └── test │ └── Kafka_Streams_MachineLearning_DL4J_DeepLearning_Iris_IntegrationTest.java ├── h2o-gbm ├── pom.xml ├── readme.md └── src │ ├── main │ ├── java │ │ └── com │ │ │ └── github │ │ │ └── megachucky │ │ │ └── kafka │ │ │ └── streams │ │ │ └── machinelearning │ │ │ ├── Kafka_Streams_MachineLearning_H2O_Application.java │ │ │ ├── Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.java │ │ │ ├── Kafka_Streams_MachineLearning_H2O_GBM_Example.java │ │ │ └── models │ │ │ ├── deeplearning_fe7c1f02_08ec_4070_b784_c2531147e451.java │ │ │ └── gbm_pojo_test.java │ └── resources │ │ ├── generatedModels │ │ ├── DeepWater_model_python_1503570558230_1.zip │ │ └── GBM_model_python_1503397740678_1.zip │ │ └── log4j.properties │ └── test │ └── java │ └── com │ └── github │ └── megachucky │ └── kafka │ └── streams │ └── machinelearning │ ├── Kafka_Streams_MachineLearning_H2O_DeepLearning_ExampleTest.java │ ├── Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java │ ├── Kafka_Streams_MachineLearning_H2O_GBM_ExampleTest.java │ ├── Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java │ ├── TestEmbeddedKafkaCluster.java │ └── TestKafkaStreams.java ├── pom.xml ├── readme.md ├── src └── main │ ├── java │ └── com │ │ └── github │ │ └── megachucky │ │ └── kafka │ │ └── streams │ │ └── machinelearning │ │ └── StreamsStarterApp.java │ └── resources │ └── log4j.properties ├── tensorflow-image-recognition ├── pom.xml ├── readme.md └── src │ ├── main │ ├── java │ │ └── com │ │ │ └── github │ │ │ └── megachucky │ │ │ └── kafka │ │ │ └── streams │ │ │ └── machinelearning │ │ │ └── Kafka_Streams_TensorFlow_Image_Recognition_Example.java │ └── resources │ │ ├── TensorFlow_Images │ │ ├── devil.png │ │ ├── new_airplane.jpg │ │ ├── trained_airplane_1.jpg │ │ ├── trained_airplane_2.jpg │ │ └── trained_butterfly.jpg │ │ ├── generatedModels │ │ ├── CNN_inception5h │ │ │ ├── LICENSE │ │ │ ├── imagenet_comp_graph_label_strings.txt │ │ │ └── tensorflow_inception_graph.pb │ │ └── TensorFlow_Census │ │ │ ├── saved_model.pb │ │ │ ├── test.json │ │ │ └── variables │ │ │ ├── variables.data-00000-of-00001 │ │ │ └── variables.index │ │ └── log4j.properties │ └── test │ └── java │ └── com │ └── github │ ├── jukkakarvanen │ └── kafka │ │ └── streams │ │ └── integration │ │ └── utils │ │ ├── TestEmbeddedKafkaCluster.java │ │ └── TestKafkaStreams.java │ └── megachucky │ └── kafka │ └── streams │ └── machinelearning │ ├── Kafka_Streams_TensorFlow_Image_Recognition_ExampleTest.java │ └── test │ └── Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest.java └── tensorflow-keras ├── pom.xml ├── readme.md └── src ├── main └── resources │ ├── generatedModels │ └── Keras │ │ ├── keras-model-script.py │ │ └── simple_mlp.h5 │ └── log4j.properties └── test └── java └── com └── github ├── jukkakarvanen └── kafka │ └── streams │ └── integration │ └── utils │ ├── TestEmbeddedKafkaCluster.java │ └── TestKafkaStreams.java └── megachucky └── kafka └── streams └── machinelearning └── test └── Kafka_Streams_TensorFlow_Keras_Example_IntegrationTest.java /.gitignore: -------------------------------------------------------------------------------- 1 | # Eclipse 2 | .classpath 3 | .project 4 | .settings/ 5 | 6 | # Intellij 7 | .idea/ 8 | *.iml 9 | *.iws 10 | 11 | # Mac 12 | .DS_Store 13 | 14 | # Maven 15 | log/ 16 | target/ 17 | 18 | # Visual Studio Code 19 | .vscode/ 20 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /dl4j-deeplearning-iris/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | 6 | com.github.kaiwaehner.kafka.streams.machinelearning 7 | dl4j-deeplearning-iris 8 | CP55_AK25 9 | 10 | 11 | 12 | confluent 13 | http://packages.confluent.io/maven/ 14 | 15 | 16 | 17 | 18 | 1.8 19 | 2.5.0 20 | 2.12 21 | ${kafka.scala.version}.8 22 | 5.5.0 23 | UTF-8 24 | 25 | 26 | 27 | 28 | 31 | 32 | 33 | org.apache.kafka 34 | kafka-streams 35 | ${kafka.version} 36 | 37 | 38 | 42 | 43 | 45 | 46 | 47 | org.nd4j 48 | nd4j-native-platform 49 | 1.0.0-beta3 50 | 51 | 52 | 53 | 54 | 55 | org.deeplearning4j 56 | deeplearning4j-core 57 | 1.0.0-beta3 58 | 59 | 60 | 61 | org.deeplearning4j 62 | deeplearning4j-modelimport 63 | 1.0.0-beta3 64 | 65 | 66 | 67 | 68 | junit 69 | junit 70 | 4.12 71 | test 72 | 73 | 74 | org.assertj 75 | assertj-core 76 | 3.3.0 77 | test 78 | 79 | 80 | org.apache.kafka 81 | kafka_${kafka.scala.version} 82 | ${kafka.version} 83 | test 84 | test 85 | 86 | 87 | org.apache.kafka 88 | kafka-clients 89 | ${kafka.version} 90 | test 91 | test 92 | 93 | 94 | org.apache.kafka 95 | kafka-streams 96 | ${kafka.version} 97 | test 98 | test 99 | 100 | 101 | org.apache.curator 102 | curator-test 103 | 2.9.0 104 | test 105 | 106 | 107 | io.confluent 108 | kafka-schema-registry 109 | ${confluent.version} 110 | test 111 | 112 | 113 | io.confluent 114 | kafka-schema-registry 115 | ${confluent.version} 116 | 117 | tests 118 | test 119 | 120 | 121 | org.hamcrest 122 | hamcrest 123 | 2.1 124 | test 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | org.apache.maven.plugins 133 | maven-compiler-plugin 134 | 3.6.1 135 | 136 | 1.8 137 | 1.8 138 | 139 | 140 | 141 | 142 | 169 | 170 | 171 | 172 | -------------------------------------------------------------------------------- /dl4j-deeplearning-iris/readme.md: -------------------------------------------------------------------------------- 1 | # Machine Learning + Kafka Streams Examples 2 | 3 | General info in main [Readme](../readme.md) 4 | 5 | ### Example 3 - Iris Prediction using a Neural Network with DeepLearning4J (DL4J) 6 | **Use Case** 7 | 8 | Iris Species Prediction using a Neural Network. 9 | This is a famous example: Prediction of the Iris Species - implemented with many different ML algorithms. Here I use DeepLearning4J (DL4J) to build a neural network using Iris Dataset. 10 | 11 | **Machine Learning Technology** 12 | * [DeepLearning4J](https://deeplearning4j.org) 13 | * Pretty simple example to demo how to build, save and load neural networks with DL4J. [MultiLayerNetwork](https://deeplearning4j.org/doc/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.html) and [INDArray](http://nd4j.org/doc/org/nd4j/linalg/api/ndarray/INDArray.html) are the key APIs to look at if you want to understand the details. 14 | * The model is created via [DeepLearning4J_CSV_Model.java](src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/DeepLearning4J_CSV_Model.java) and stored in the resources: [DL4J_Iris_Model.zip](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/tree/master/src/main/resources/generatedModels/DL4J). No need to re-train, just for reference. Kudos to Adam Gibson who created this example as part of the DL4J project. 15 | 16 | **Unit Test** 17 | [Kafka_Streams_MachineLearning_DL4J_DeepLearning_Iris_IntegrationTest.java](src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_DL4J_DeepLearning_Iris_IntegrationTest.java) 18 | 19 | -------------------------------------------------------------------------------- /dl4j-deeplearning-iris/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/DeepLearning4J_CSV_Iris_Model.java: -------------------------------------------------------------------------------- 1 | package com.github.megachucky.kafka.streams.machinelearning.models; 2 | 3 | import java.io.File; 4 | 5 | import org.datavec.api.records.reader.RecordReader; 6 | import org.datavec.api.records.reader.impl.csv.CSVRecordReader; 7 | import org.datavec.api.split.FileSplit; 8 | import org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator; 9 | import org.deeplearning4j.nn.conf.MultiLayerConfiguration; 10 | import org.deeplearning4j.nn.conf.NeuralNetConfiguration; 11 | import org.deeplearning4j.nn.conf.layers.DenseLayer; 12 | import org.deeplearning4j.nn.conf.layers.OutputLayer; 13 | import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; 14 | import org.deeplearning4j.nn.weights.WeightInit; 15 | import org.deeplearning4j.optimize.listeners.ScoreIterationListener; 16 | import org.deeplearning4j.util.ModelSerializer; 17 | import org.nd4j.evaluation.classification.Evaluation; 18 | import org.nd4j.linalg.activations.Activation; 19 | import org.nd4j.linalg.api.ndarray.INDArray; 20 | import org.nd4j.linalg.io.ClassPathResource; 21 | import org.nd4j.linalg.dataset.DataSet; 22 | import org.nd4j.linalg.dataset.SplitTestAndTrain; 23 | import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; 24 | import org.nd4j.linalg.dataset.api.preprocessor.DataNormalization; 25 | import org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize; 26 | 27 | import org.nd4j.linalg.learning.config.Sgd; 28 | import org.nd4j.linalg.lossfunctions.LossFunctions; 29 | import org.slf4j.Logger; 30 | import org.slf4j.LoggerFactory; 31 | 32 | public class DeepLearning4J_CSV_Iris_Model { 33 | 34 | private static Logger log = LoggerFactory.getLogger(DeepLearning4J_CSV_Iris_Model.class); 35 | 36 | public static void main(String[] args) throws Exception { 37 | 38 | // First: get the dataset using the record reader. CSVRecordReader handles 39 | // loading/parsing 40 | int numLinesToSkip = 0; 41 | char delimiter = ','; 42 | RecordReader recordReader = new CSVRecordReader(numLinesToSkip, delimiter); 43 | recordReader.initialize(new FileSplit(new ClassPathResource("DL4J_Resources/iris.txt").getFile())); 44 | 45 | // Second: the RecordReaderDataSetIterator handles conversion to DataSet 46 | // objects, ready for use in neural network 47 | int labelIndex = 4; // 5 values in each row of the iris.txt CSV: 4 input features followed by an 48 | // integer label (class) index. Labels are the 5th value (index 4) in each row 49 | int numClasses = 3; // 3 classes (types of iris flowers) in the iris data set. Classes have integer 50 | // values 0, 1 or 2 51 | int batchSize = 150; // Iris data set: 150 examples total. We are loading all of them into one 52 | // DataSet (not recommended for large data sets) 53 | 54 | DataSetIterator iterator = new RecordReaderDataSetIterator(recordReader, batchSize, labelIndex, numClasses); 55 | DataSet allData = iterator.next(); 56 | allData.shuffle(); 57 | SplitTestAndTrain testAndTrain = allData.splitTestAndTrain(0.65); // Use 65% of data for training 58 | 59 | DataSet trainingData = testAndTrain.getTrain(); 60 | DataSet testData = testAndTrain.getTest(); 61 | 62 | // We need to normalize our data. We'll use NormalizeStandardize (which gives us 63 | // mean 0, unit variance): 64 | DataNormalization normalizer = new NormalizerStandardize(); 65 | normalizer.fit(trainingData); // Collect the statistics (mean/stdev) from the training data. This does not 66 | // modify the input data 67 | normalizer.transform(trainingData); // Apply normalization to the training data 68 | normalizer.transform(testData); // Apply normalization to the test data. This is using statistics calculated 69 | // from the *training* set 70 | 71 | final int numInputs = 4; 72 | int outputNum = 3; 73 | long seed = 6; 74 | 75 | log.info("Build model...."); 76 | MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(seed).activation(Activation.TANH) 77 | .weightInit(WeightInit.XAVIER).updater(new Sgd(0.1)).l2(1e-4).list() 78 | .layer(0, new DenseLayer.Builder().nIn(numInputs).nOut(3).build()) 79 | .layer(1, new DenseLayer.Builder().nIn(3).nOut(3).build()) 80 | .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) 81 | .activation(Activation.SOFTMAX).nIn(3).nOut(outputNum).build()) 82 | .build(); 83 | 84 | // run the model 85 | MultiLayerNetwork model = new MultiLayerNetwork(conf); 86 | model.init(); 87 | model.setListeners(new ScoreIterationListener(100)); 88 | 89 | for (int i = 0; i < 1000; i++) { 90 | model.fit(trainingData); 91 | } 92 | 93 | // evaluate the model on the test set 94 | Evaluation eval = new Evaluation(3); 95 | INDArray input = testData.getFeatures(); 96 | INDArray output = model.output(input); 97 | System.out.println("INPUT:" + input.toString()); 98 | eval.eval(testData.getLabels(), output); 99 | log.info(eval.stats()); 100 | 101 | // Save the model 102 | File locationToSave = new File("src/main/resources/generatedModels/DL4J/DL4J_Iris_Model.zip"); // Where to save 103 | // the network. 104 | // Note: the file 105 | // is in .zip 106 | // format - can 107 | // be opened 108 | // externally 109 | boolean saveUpdater = true; // Updater: i.e., the state for Momentum, RMSProp, Adagrad etc. Save this if you 110 | // want to train your network more in the future 111 | // ModelSerializer.writeModel(model, locationToSave, saveUpdater); 112 | 113 | // Load the model 114 | MultiLayerNetwork restored = ModelSerializer.restoreMultiLayerNetwork(locationToSave); 115 | 116 | System.out.println("Saved and loaded parameters are equal: " + model.params().equals(restored.params())); 117 | System.out.println("Saved and loaded configurations are equal: " 118 | + model.getLayerWiseConfigurations().equals(restored.getLayerWiseConfigurations())); 119 | 120 | } 121 | 122 | } -------------------------------------------------------------------------------- /dl4j-deeplearning-iris/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/DeepLearning4J_CSV_Model_Inference.java: -------------------------------------------------------------------------------- 1 | package com.github.megachucky.kafka.streams.machinelearning.models; 2 | 3 | import java.io.File; 4 | 5 | import org.datavec.api.records.reader.RecordReader; 6 | import org.datavec.api.records.reader.impl.csv.CSVRecordReader; 7 | import org.datavec.api.split.FileSplit; 8 | import org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator; 9 | import org.deeplearning4j.eval.Evaluation; 10 | import org.deeplearning4j.nn.conf.MultiLayerConfiguration; 11 | import org.deeplearning4j.nn.conf.NeuralNetConfiguration; 12 | import org.deeplearning4j.nn.conf.layers.DenseLayer; 13 | import org.deeplearning4j.nn.conf.layers.OutputLayer; 14 | import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; 15 | import org.deeplearning4j.nn.weights.WeightInit; 16 | import org.deeplearning4j.optimize.listeners.ScoreIterationListener; 17 | 18 | import org.deeplearning4j.util.ModelSerializer; 19 | import org.nd4j.linalg.activations.Activation; 20 | import org.nd4j.linalg.api.ndarray.INDArray; 21 | import org.nd4j.linalg.dataset.DataSet; 22 | import org.nd4j.linalg.dataset.SplitTestAndTrain; 23 | import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; 24 | import org.nd4j.linalg.dataset.api.preprocessor.DataNormalization; 25 | import org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize; 26 | import org.nd4j.linalg.factory.Nd4j; 27 | import org.nd4j.linalg.lossfunctions.LossFunctions; 28 | import org.slf4j.Logger; 29 | import org.slf4j.LoggerFactory; 30 | 31 | /** 32 | * @author Adam Gibson 33 | */ 34 | public class DeepLearning4J_CSV_Model_Inference { 35 | 36 | private static Logger log = LoggerFactory.getLogger(DeepLearning4J_CSV_Model_Inference.class); 37 | 38 | public static void main(String[] args) throws Exception { 39 | 40 | //Save the model 41 | File locationToSave = new File("src/main/resources/generatedModels/DL4J/DL4J_Iris_Model.zip"); //Where to save the network. Note: the file is in .zip format - can be opened externally 42 | 43 | 44 | //Load the model 45 | MultiLayerNetwork model = ModelSerializer.restoreMultiLayerNetwork(locationToSave); 46 | 47 | 48 | // Inference 49 | INDArray input = Nd4j.create(new double[] {5.0,3.5,1.6,0.6}); 50 | INDArray result = model.output(input); 51 | 52 | System.out.println("Probabilities: " + result.toString()); 53 | 54 | } 55 | 56 | } 57 | 58 | -------------------------------------------------------------------------------- /dl4j-deeplearning-iris/src/main/resources/DL4J_Resources/iris.txt: -------------------------------------------------------------------------------- 1 | 5.1,3.5,1.4,0.2,0 2 | 4.9,3.0,1.4,0.2,0 3 | 4.7,3.2,1.3,0.2,0 4 | 4.6,3.1,1.5,0.2,0 5 | 5.0,3.6,1.4,0.2,0 6 | 5.4,3.9,1.7,0.4,0 7 | 4.6,3.4,1.4,0.3,0 8 | 5.0,3.4,1.5,0.2,0 9 | 4.4,2.9,1.4,0.2,0 10 | 4.9,3.1,1.5,0.1,0 11 | 5.4,3.7,1.5,0.2,0 12 | 4.8,3.4,1.6,0.2,0 13 | 4.8,3.0,1.4,0.1,0 14 | 4.3,3.0,1.1,0.1,0 15 | 5.8,4.0,1.2,0.2,0 16 | 5.7,4.4,1.5,0.4,0 17 | 5.4,3.9,1.3,0.4,0 18 | 5.1,3.5,1.4,0.3,0 19 | 5.7,3.8,1.7,0.3,0 20 | 5.1,3.8,1.5,0.3,0 21 | 5.4,3.4,1.7,0.2,0 22 | 5.1,3.7,1.5,0.4,0 23 | 4.6,3.6,1.0,0.2,0 24 | 5.1,3.3,1.7,0.5,0 25 | 4.8,3.4,1.9,0.2,0 26 | 5.0,3.0,1.6,0.2,0 27 | 5.0,3.4,1.6,0.4,0 28 | 5.2,3.5,1.5,0.2,0 29 | 5.2,3.4,1.4,0.2,0 30 | 4.7,3.2,1.6,0.2,0 31 | 4.8,3.1,1.6,0.2,0 32 | 5.4,3.4,1.5,0.4,0 33 | 5.2,4.1,1.5,0.1,0 34 | 5.5,4.2,1.4,0.2,0 35 | 4.9,3.1,1.5,0.1,0 36 | 5.0,3.2,1.2,0.2,0 37 | 5.5,3.5,1.3,0.2,0 38 | 4.9,3.1,1.5,0.1,0 39 | 4.4,3.0,1.3,0.2,0 40 | 5.1,3.4,1.5,0.2,0 41 | 5.0,3.5,1.3,0.3,0 42 | 4.5,2.3,1.3,0.3,0 43 | 4.4,3.2,1.3,0.2,0 44 | 5.0,3.5,1.6,0.6,0 45 | 5.1,3.8,1.9,0.4,0 46 | 4.8,3.0,1.4,0.3,0 47 | 5.1,3.8,1.6,0.2,0 48 | 4.6,3.2,1.4,0.2,0 49 | 5.3,3.7,1.5,0.2,0 50 | 5.0,3.3,1.4,0.2,0 51 | 7.0,3.2,4.7,1.4,1 52 | 6.4,3.2,4.5,1.5,1 53 | 6.9,3.1,4.9,1.5,1 54 | 5.5,2.3,4.0,1.3,1 55 | 6.5,2.8,4.6,1.5,1 56 | 5.7,2.8,4.5,1.3,1 57 | 6.3,3.3,4.7,1.6,1 58 | 4.9,2.4,3.3,1.0,1 59 | 6.6,2.9,4.6,1.3,1 60 | 5.2,2.7,3.9,1.4,1 61 | 5.0,2.0,3.5,1.0,1 62 | 5.9,3.0,4.2,1.5,1 63 | 6.0,2.2,4.0,1.0,1 64 | 6.1,2.9,4.7,1.4,1 65 | 5.6,2.9,3.6,1.3,1 66 | 6.7,3.1,4.4,1.4,1 67 | 5.6,3.0,4.5,1.5,1 68 | 5.8,2.7,4.1,1.0,1 69 | 6.2,2.2,4.5,1.5,1 70 | 5.6,2.5,3.9,1.1,1 71 | 5.9,3.2,4.8,1.8,1 72 | 6.1,2.8,4.0,1.3,1 73 | 6.3,2.5,4.9,1.5,1 74 | 6.1,2.8,4.7,1.2,1 75 | 6.4,2.9,4.3,1.3,1 76 | 6.6,3.0,4.4,1.4,1 77 | 6.8,2.8,4.8,1.4,1 78 | 6.7,3.0,5.0,1.7,1 79 | 6.0,2.9,4.5,1.5,1 80 | 5.7,2.6,3.5,1.0,1 81 | 5.5,2.4,3.8,1.1,1 82 | 5.5,2.4,3.7,1.0,1 83 | 5.8,2.7,3.9,1.2,1 84 | 6.0,2.7,5.1,1.6,1 85 | 5.4,3.0,4.5,1.5,1 86 | 6.0,3.4,4.5,1.6,1 87 | 6.7,3.1,4.7,1.5,1 88 | 6.3,2.3,4.4,1.3,1 89 | 5.6,3.0,4.1,1.3,1 90 | 5.5,2.5,4.0,1.3,1 91 | 5.5,2.6,4.4,1.2,1 92 | 6.1,3.0,4.6,1.4,1 93 | 5.8,2.6,4.0,1.2,1 94 | 5.0,2.3,3.3,1.0,1 95 | 5.6,2.7,4.2,1.3,1 96 | 5.7,3.0,4.2,1.2,1 97 | 5.7,2.9,4.2,1.3,1 98 | 6.2,2.9,4.3,1.3,1 99 | 5.1,2.5,3.0,1.1,1 100 | 5.7,2.8,4.1,1.3,1 101 | 6.3,3.3,6.0,2.5,2 102 | 5.8,2.7,5.1,1.9,2 103 | 7.1,3.0,5.9,2.1,2 104 | 6.3,2.9,5.6,1.8,2 105 | 6.5,3.0,5.8,2.2,2 106 | 7.6,3.0,6.6,2.1,2 107 | 4.9,2.5,4.5,1.7,2 108 | 7.3,2.9,6.3,1.8,2 109 | 6.7,2.5,5.8,1.8,2 110 | 7.2,3.6,6.1,2.5,2 111 | 6.5,3.2,5.1,2.0,2 112 | 6.4,2.7,5.3,1.9,2 113 | 6.8,3.0,5.5,2.1,2 114 | 5.7,2.5,5.0,2.0,2 115 | 5.8,2.8,5.1,2.4,2 116 | 6.4,3.2,5.3,2.3,2 117 | 6.5,3.0,5.5,1.8,2 118 | 7.7,3.8,6.7,2.2,2 119 | 7.7,2.6,6.9,2.3,2 120 | 6.0,2.2,5.0,1.5,2 121 | 6.9,3.2,5.7,2.3,2 122 | 5.6,2.8,4.9,2.0,2 123 | 7.7,2.8,6.7,2.0,2 124 | 6.3,2.7,4.9,1.8,2 125 | 6.7,3.3,5.7,2.1,2 126 | 7.2,3.2,6.0,1.8,2 127 | 6.2,2.8,4.8,1.8,2 128 | 6.1,3.0,4.9,1.8,2 129 | 6.4,2.8,5.6,2.1,2 130 | 7.2,3.0,5.8,1.6,2 131 | 7.4,2.8,6.1,1.9,2 132 | 7.9,3.8,6.4,2.0,2 133 | 6.4,2.8,5.6,2.2,2 134 | 6.3,2.8,5.1,1.5,2 135 | 6.1,2.6,5.6,1.4,2 136 | 7.7,3.0,6.1,2.3,2 137 | 6.3,3.4,5.6,2.4,2 138 | 6.4,3.1,5.5,1.8,2 139 | 6.0,3.0,4.8,1.8,2 140 | 6.9,3.1,5.4,2.1,2 141 | 6.7,3.1,5.6,2.4,2 142 | 6.9,3.1,5.1,2.3,2 143 | 5.8,2.7,5.1,1.9,2 144 | 6.8,3.2,5.9,2.3,2 145 | 6.7,3.3,5.7,2.5,2 146 | 6.7,3.0,5.2,2.3,2 147 | 6.3,2.5,5.0,1.9,2 148 | 6.5,3.0,5.2,2.0,2 149 | 6.2,3.4,5.4,2.3,2 150 | 5.9,3.0,5.1,1.8,2 151 | -------------------------------------------------------------------------------- /dl4j-deeplearning-iris/src/main/resources/generatedModels/DL4J/DL4J_Iris_Model.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaiwaehner/kafka-streams-machine-learning-examples/3977e3928d2719fc710bb3d202851ee8904be2e7/dl4j-deeplearning-iris/src/main/resources/generatedModels/DL4J/DL4J_Iris_Model.zip -------------------------------------------------------------------------------- /dl4j-deeplearning-iris/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=INFO, stdout 2 | 3 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 4 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.stdout.layout.ConversionPattern=%p %m (%c:%L) %n -------------------------------------------------------------------------------- /dl4j-deeplearning-iris/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java: -------------------------------------------------------------------------------- 1 | package com.github.jukkakarvanen.kafka.streams.integration.utils; 2 | 3 | import org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster; 4 | import org.slf4j.Logger; 5 | import org.slf4j.LoggerFactory; 6 | 7 | import java.util.Properties; 8 | 9 | /** This is helper class to workaround for Failing stream tests in Windows environment KAFKA-6647. 10 | * 11 | * @author Jukka Karvanen 12 | * 13 | * The causing issue is https://issues.apache.org/jira/browse/KAFKA-6647 14 | * Replacing EmbeddedKafkaCluster with TestEmbeddedKafkaCluster will catch and ignore the exception 15 | * happening during the tear down of the test 16 | * The exception does not have affect to functionality 17 | */ 18 | 19 | public class TestEmbeddedKafkaCluster extends EmbeddedKafkaCluster { 20 | private static final Logger log = LoggerFactory.getLogger(TestEmbeddedKafkaCluster.class); 21 | 22 | public TestEmbeddedKafkaCluster(int numBrokers) { 23 | super(numBrokers); 24 | } 25 | 26 | public TestEmbeddedKafkaCluster(int numBrokers, Properties brokerConfig) { 27 | super(numBrokers, brokerConfig); 28 | } 29 | 30 | public TestEmbeddedKafkaCluster(int numBrokers, Properties brokerConfig, long mockTimeMillisStart) { 31 | super(numBrokers, brokerConfig, mockTimeMillisStart); 32 | } 33 | 34 | public TestEmbeddedKafkaCluster(int numBrokers, Properties brokerConfig, long mockTimeMillisStart, long mockTimeNanoStart) { 35 | super(numBrokers, brokerConfig, mockTimeMillisStart, mockTimeNanoStart); 36 | } 37 | 38 | public void after() { 39 | try { 40 | super.after(); 41 | } catch (RuntimeException e) { 42 | log.warn("Ignoring exception, test failing in Windows due this exception {}", e); 43 | } 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /dl4j-deeplearning-iris/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java: -------------------------------------------------------------------------------- 1 | package com.github.jukkakarvanen.kafka.streams.integration.utils; 2 | 3 | import org.apache.kafka.common.utils.Time; 4 | import org.apache.kafka.streams.KafkaClientSupplier; 5 | import org.apache.kafka.streams.KafkaStreams; 6 | import org.apache.kafka.streams.Topology; 7 | import org.slf4j.Logger; 8 | import org.slf4j.LoggerFactory; 9 | 10 | import java.util.Properties; 11 | 12 | /** This is helper class to workaround for Failing stream tests in Windows environment KAFKA-6647. 13 | * 14 | * @author Jukka Karvanen 15 | * 16 | * The causing issue is https://issues.apache.org/jira/browse/KAFKA-6647 17 | * Replacing KafkaStreams with TestKafkaStreams will catch and ignore the exception caused by cleanUp 18 | * The exception does not have affect to functionality 19 | */ 20 | 21 | public class TestKafkaStreams extends KafkaStreams { 22 | private static final Logger log = LoggerFactory.getLogger(TestKafkaStreams.class); 23 | 24 | public TestKafkaStreams(Topology topology, Properties props) { 25 | super(topology, props); 26 | } 27 | 28 | public TestKafkaStreams(Topology topology, Properties props, KafkaClientSupplier clientSupplier) { 29 | super(topology, props, clientSupplier); 30 | } 31 | 32 | public TestKafkaStreams(Topology topology, Properties props, Time time) { 33 | super(topology, props, time); 34 | } 35 | 36 | public TestKafkaStreams(Topology topology, Properties props, KafkaClientSupplier clientSupplier, Time time) { 37 | super(topology, props, clientSupplier, time); 38 | } 39 | 40 | public void cleanUp() { 41 | try { 42 | super.cleanUp(); 43 | } catch (RuntimeException e) { 44 | log.warn("Ignoring exception, test failing in Windows due this exception {}", e); 45 | } 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /dl4j-deeplearning-iris/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_DL4J_DeepLearning_Iris_IntegrationTest.java: -------------------------------------------------------------------------------- 1 | package com.github.megachucky.kafka.streams.machinelearning.test; 2 | 3 | import static org.assertj.core.api.Assertions.assertThat; 4 | 5 | import java.io.File; 6 | import java.util.Arrays; 7 | import java.util.List; 8 | import java.util.Properties; 9 | import java.util.stream.Stream; 10 | 11 | import com.github.jukkakarvanen.kafka.streams.integration.utils.TestEmbeddedKafkaCluster; 12 | import org.apache.kafka.clients.consumer.ConsumerConfig; 13 | import org.apache.kafka.clients.producer.ProducerConfig; 14 | import org.apache.kafka.common.serialization.Serdes; 15 | import org.apache.kafka.common.serialization.StringDeserializer; 16 | import org.apache.kafka.common.serialization.StringSerializer; 17 | import org.apache.kafka.common.utils.MockTime; 18 | import org.apache.kafka.streams.KafkaStreams; 19 | import org.apache.kafka.streams.KeyValue; 20 | import org.apache.kafka.streams.StreamsBuilder; 21 | import org.apache.kafka.streams.StreamsConfig; 22 | import org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster; 23 | import org.apache.kafka.streams.integration.utils.IntegrationTestUtils; 24 | import org.apache.kafka.streams.kstream.KStream; 25 | import org.apache.kafka.test.TestUtils; 26 | import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; 27 | import org.deeplearning4j.util.ModelSerializer; 28 | import org.junit.BeforeClass; 29 | import org.junit.ClassRule; 30 | import org.junit.Test; 31 | import org.nd4j.linalg.api.ndarray.INDArray; 32 | import org.nd4j.linalg.factory.Nd4j; 33 | 34 | /** 35 | * 36 | * @author Kai Waehner (www.kai-waehner.de) 37 | * 38 | * End-to-end integration test, using an embedded Kafka cluster and a 39 | * DL4J DeepLearning Model. 40 | * 41 | * Prediction of Iris Flower Type 1, 2 or 3. Model returns probability 42 | * for all three types, like [0.00/ 0.01/ 0.99]. 43 | */ 44 | public class Kafka_Streams_MachineLearning_DL4J_DeepLearning_Iris_IntegrationTest { 45 | 46 | @ClassRule 47 | public static final EmbeddedKafkaCluster CLUSTER = new TestEmbeddedKafkaCluster(1); 48 | 49 | private static final String inputTopic = "IrisInputTopic"; 50 | private static final String outputTopic = "IrisOutputTopic"; 51 | 52 | // Generated DL4J model 53 | private File locationDL4JModel = new File("src/main/resources/generatedModels/DL4J/DL4J_Iris_Model.zip"); 54 | 55 | // Prediction Value 56 | private static String irisPrediction = "unknown"; 57 | 58 | @BeforeClass 59 | public static void startKafkaCluster() throws Exception { 60 | CLUSTER.createTopic(inputTopic); 61 | CLUSTER.createTopic(outputTopic); 62 | } 63 | 64 | @Test 65 | public void shouldPredictIrisFlowerType() throws Exception { 66 | 67 | // Iris input data (the model returns probabilities for input being each of Iris 68 | // Type 1, 2 and 3) 69 | List inputValues = Arrays.asList("5.4,3.9,1.7,0.4", "7.0,3.2,4.7,1.4", "4.6,3.4,1.4,0.3"); 70 | 71 | // Step 1: Configure and start the processor topology. 72 | Properties streamsConfiguration = new Properties(); 73 | streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "kafka-streams-dl4j-iris-integration-test"); 74 | streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); 75 | 76 | // The commit interval for flushing records to state stores and 77 | // downstream must be lower than 78 | // this integration test's timeout (30 secs) to ensure we observe the 79 | // expected processing results. 80 | streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 10 * 1000); 81 | streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); 82 | // Use a temporary directory for storing state, which will be 83 | // automatically removed after the test. 84 | streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getAbsolutePath()); 85 | 86 | // Create DL4J object (see DeepLearning4J_CSV_Model.java) 87 | MultiLayerNetwork model = ModelSerializer.restoreMultiLayerNetwork(locationDL4JModel); 88 | 89 | // Configure Kafka Streams Application 90 | // Specify default (de)serializers for record keys and for record 91 | // values. 92 | streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); 93 | streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); 94 | 95 | // In the subsequent lines we define the processing topology of the 96 | // Streams application. 97 | final StreamsBuilder builder = new StreamsBuilder(); 98 | 99 | // Construct a `KStream` from the input topic "IrisInputTopic", where 100 | // message values 101 | // represent lines of text (for the sake of this example, we ignore 102 | // whatever may be stored 103 | // in the message keys). 104 | final KStream irisInputLines = builder.stream(inputTopic); 105 | 106 | // Stream Processor (in this case 'foreach' to add custom logic, i.e. apply the 107 | // analytic model) 108 | irisInputLines.foreach((key, value) -> { 109 | 110 | if (value != null && !value.equals("")) { 111 | System.out.println("#####################"); 112 | System.out.println("Iris Input:" + value); 113 | 114 | // TODO Easier way to map from String[] to double[] !!! 115 | String[] stringArray = value.split(","); 116 | Double[] doubleArray = Arrays.stream(stringArray).map(Double::valueOf).toArray(Double[]::new); 117 | double[] irisInput = Stream.of(doubleArray).mapToDouble(Double::doubleValue).toArray(); 118 | 119 | // Inference 120 | INDArray input = Nd4j.create(irisInput); 121 | INDArray result = model.output(input); 122 | 123 | System.out.println("Probabilities: " + result.toString()); 124 | 125 | irisPrediction = result.toString(); 126 | 127 | } 128 | 129 | }); 130 | 131 | // Transform message: Add prediction information 132 | KStream transformedMessage = irisInputLines 133 | .mapValues(value -> "Prediction: Iris Probability => " + irisPrediction); 134 | 135 | // Send prediction information to Output Topic 136 | transformedMessage.to(outputTopic); 137 | 138 | // Start Kafka Streams Application to process new incoming messages from 139 | // Input Topic 140 | final KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfiguration); 141 | streams.cleanUp(); 142 | streams.start(); 143 | System.out.println("Iris Prediction Microservice is running..."); 144 | System.out.println("Input to Kafka Topic 'IrisInputTopic'; Output to Kafka Topic 'IrisOutputTopic'"); 145 | 146 | // 147 | // Step 2: Produce some input data to the input topic. 148 | // 149 | Properties producerConfig = new Properties(); 150 | producerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); 151 | producerConfig.put(ProducerConfig.ACKS_CONFIG, "all"); 152 | producerConfig.put(ProducerConfig.RETRIES_CONFIG, 0); 153 | producerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class); 154 | producerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class); 155 | IntegrationTestUtils.produceValuesSynchronously(inputTopic, inputValues, producerConfig, new MockTime()); 156 | 157 | // 158 | // Step 3: Verify the application's output data. 159 | // 160 | Properties consumerConfig = new Properties(); 161 | consumerConfig.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); 162 | consumerConfig.put(ConsumerConfig.GROUP_ID_CONFIG, 163 | "machine-learning-example-integration-test-standard-consumer"); 164 | consumerConfig.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); 165 | consumerConfig.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class); 166 | consumerConfig.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class); 167 | List> response = IntegrationTestUtils 168 | .waitUntilMinKeyValueRecordsReceived(consumerConfig, outputTopic, 3); 169 | streams.close(); 170 | assertThat(response).isNotNull(); 171 | 172 | System.out.println("RESPONSE"); 173 | System.out.println(response.get(0).value); 174 | System.out.println("RESPONSE"); 175 | 176 | assertThat(response.get(0).value) 177 | .isEqualTo("Prediction: Iris Probability => [[ 0.0033, 0.1635, 0.8332]]"); 178 | 179 | assertThat(response.get(1).value) 180 | .isEqualTo("Prediction: Iris Probability => [[ 9.3033e-5, 0.0030, 0.9969]]"); 181 | 182 | assertThat(response.get(2).value) 183 | .isEqualTo("Prediction: Iris Probability => [[ 0.0113, 0.8152, 0.1736]]"); 184 | } 185 | 186 | } 187 | -------------------------------------------------------------------------------- /h2o-gbm/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | 6 | com.github.kaiwaehner.kafka.streams.machinelearning 7 | h2o-gbm 8 | CP55_AK25 9 | 10 | 11 | 12 | confluent 13 | http://packages.confluent.io/maven/ 14 | 15 | 16 | 17 | 18 | 1.8 19 | 2.5.0 20 | 2.12 21 | ${kafka.scala.version}.8 22 | 5.5.0 23 | UTF-8 24 | 25 | 26 | 27 | 28 | 31 | 32 | 33 | org.apache.kafka 34 | kafka-streams 35 | ${kafka.version} 36 | 37 | 38 | 39 | 40 | ai.h2o 41 | h2o-genmodel 42 | 3.14.0.1 43 | 44 | 45 | 46 | 47 | 48 | org.apache.kafka 49 | kafka-streams-test-utils 50 | ${kafka.version} 51 | test 52 | 53 | 54 | 55 | junit 56 | junit 57 | 4.12 58 | test 59 | 60 | 61 | org.assertj 62 | assertj-core 63 | 3.3.0 64 | test 65 | 66 | 67 | org.apache.kafka 68 | kafka_${kafka.scala.version} 69 | ${kafka.version} 70 | test 71 | test 72 | 73 | 74 | org.apache.kafka 75 | kafka-clients 76 | ${kafka.version} 77 | test 78 | test 79 | 80 | 81 | org.apache.kafka 82 | kafka-streams 83 | ${kafka.version} 84 | test 85 | test 86 | 87 | 88 | org.apache.curator 89 | curator-test 90 | 2.9.0 91 | test 92 | 93 | 94 | io.confluent 95 | kafka-schema-registry 96 | ${confluent.version} 97 | test 98 | 99 | 100 | io.confluent 101 | kafka-schema-registry 102 | ${confluent.version} 103 | 104 | tests 105 | test 106 | 107 | 108 | org.hamcrest 109 | hamcrest 110 | 2.1 111 | test 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | org.apache.maven.plugins 120 | maven-compiler-plugin 121 | 3.6.1 122 | 123 | 1.8 124 | 1.8 125 | 126 | 127 | 128 | 129 | 130 | org.apache.maven.plugins 131 | maven-assembly-plugin 132 | 2.5.2 133 | 134 | 135 | jar-with-dependencies 136 | 137 | 138 | 139 | true 140 | com.github.megachucky.kafka.streams.machinelearning.Kafka_Streams_MachineLearning_H2O_DeepLearning_Example 141 | 142 | 143 | 144 | 145 | 146 | assemble-all 147 | package 148 | 149 | single 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | -------------------------------------------------------------------------------- /h2o-gbm/readme.md: -------------------------------------------------------------------------------- 1 | # Machine Learning + Kafka Streams Examples 2 | 3 | General info in main [Readme](../readme.md) 4 | 5 | ## Example 1 - Gradient Boosting with H2O.ai for Prediction of Flight Delays 6 | 7 | ### Use Case 8 | 9 | Gradient Boosting Method (GBM) to predict flight delays. 10 | A H2O generated GBM Java model (POJO) is instantiated and used in a Kafka Streams application to do interference on new events. 11 | 12 | ### Machine Learning Technology 13 | 14 | * [H2O](https://www.h2o.ai) 15 | * Check the [H2O demo](https://github.com/h2oai/h2o-2/wiki/Hacking-Airline-DataSet-with-H2O) to understand the test and and how the model was built 16 | * You can re-use the generated Java model attached to this project ([gbm_pojo_test.java](src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/gbm_pojo_test.java)) or build your own model using R, Python, Flow UI or any other technologies supported by H2O framework. 17 | 18 | ### Source Code 19 | 20 | Business Logic (applying the analytic model to do the prediction): 21 | [Kafka_Streams_MachineLearning_H2O_Application.java](src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_Application.java) 22 | 23 | Specification of the used model: 24 | [Kafka_Streams_MachineLearning_H2O_GBM_Example.java](src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example.java) 25 | 26 | ### Automated Tests 27 | 28 | Unit Test using TopologyTestDriver: 29 | [Kafka_Streams_MachineLearning_H2O_GBM_ExampleTest.java](src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_ExampleTest.java) 30 | 31 | Integration Test using EmbeddedKafkaCluster: 32 | [Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java](src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java) 33 | 34 | ### Manual Testing 35 | 36 | You can easily test this by yourself. Here are the steps: 37 | 38 | * Start Kafka, e.g. with Confluent CLI: 39 | 40 | confluent local start kafka 41 | * Create topics AirlineInputTopic and AirlineOutputTopic 42 | 43 | kafka-topics --bootstrap-server localhost:9092 --create --topic AirlineInputTopic --partitions 3 --replication-factor 1 44 | 45 | kafka-topics --bootstrap-server localhost:9092 --create --topic AirlineOutputTopic --partitions 3 --replication-factor 1 46 | * Start the Kafka Streams app: 47 | 48 | java -cp h2o-gbm/target/h2o-gbm-CP53_AK23-jar-with-dependencies.jar com.github.megachucky.kafka.streams.machinelearning.Kafka_Streams_MachineLearning_H2O_GBM_Example 49 | * Send messages, e.g. with kafkacat: 50 | 51 | echo -e "1987,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES" | kafkacat -b localhost:9092 -P -t AirlineInputTopic 52 | * Consume predictions: 53 | 54 | kafka-console-consumer --bootstrap-server localhost:9092 --topic AirlineOutputTopic --from-beginning 55 | * Find more details in the unit test... 56 | 57 | ## H2O Deep Learning instead of H2O GBM Model 58 | 59 | The project includes another example with similar code to use a [H2O Deep Learning model](src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/deeplearning_fe7c1f02_08ec_4070_b784_c2531147e451.java) instead of H2O GBM Model: [Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java](src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java) 60 | This shows how you can easily test or replace different analytic models for one use case, or even use them for A/B testing. 61 | 62 | ### Source Code 63 | 64 | Business Logic (applying the analytic model to do the prediction): 65 | [Kafka_Streams_MachineLearning_H2O_Application.java](src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_Application.java) 66 | 67 | Specification of the used model: 68 | [Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.java](src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.java) 69 | 70 | ### Unit Test 71 | 72 | Unit Test using TopologyTestDriver: 73 | [Kafka_Streams_MachineLearning_H2O_DeepLearning_ExampleTest.java](src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_ExampleTest.java) 74 | 75 | Integration Test using EmbeddedKafkaCluster:[Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java](src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java) 76 | 77 | ### Manual Testing 78 | 79 | Same as above but change class to start app: 80 | 81 | * Start the Kafka Streams app: 82 | 83 | java -cp h2o-gbm/target/h2o-gbm-CP55_AK25-jar-with-dependencies.jar com.github.megachucky.kafka.streams.machinelearning.Kafka_Streams_MachineLearning_H2O_DeepLearning_Example -------------------------------------------------------------------------------- /h2o-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_Application.java: -------------------------------------------------------------------------------- 1 | package com.github.megachucky.kafka.streams.machinelearning; 2 | 3 | import hex.genmodel.easy.EasyPredictModelWrapper; 4 | import hex.genmodel.easy.RowData; 5 | import hex.genmodel.easy.exception.PredictException; 6 | import hex.genmodel.easy.prediction.BinomialModelPrediction; 7 | import org.apache.kafka.common.serialization.Serdes; 8 | import org.apache.kafka.streams.KafkaStreams; 9 | import org.apache.kafka.streams.StreamsBuilder; 10 | import org.apache.kafka.streams.StreamsConfig; 11 | import org.apache.kafka.streams.Topology; 12 | import org.apache.kafka.streams.kstream.KStream; 13 | import org.apache.kafka.streams.kstream.ValueMapper; 14 | 15 | import java.util.Properties; 16 | 17 | /** 18 | * @author Kai Waehner (www.kai-waehner.de) 19 | * 20 | * Creates a new Kafka Streams application for prediction of flight 21 | * delays The application uses the GBM model (built with 22 | * H2O.ai) to infer messages sent to Kafka topic "AirlineInputTopic". 23 | * The outcome of model inference is sent to Kafka topic 24 | * "AirlineOutputTopic". 25 | * 26 | * Refactored that all model use same base class, only modelName and applicationId passed in 27 | * Used Kafka_Streams_MachineLearning_H2O_GBM_Example_Refactored class as baseline for this 28 | * @author Jukka Karvanen / jukinimi.com 29 | * 30 | */ 31 | public class Kafka_Streams_MachineLearning_H2O_Application { 32 | 33 | public static final String INPUT_TOPIC = "AirlineInputTopic"; 34 | public static final String OUTPUT_TOPIC = "AirlineOutputTopic"; 35 | 36 | 37 | public static void execute(String bootstrapServers, String applictionId, String modelClassName) throws Exception { 38 | 39 | final Properties streamsConfiguration = getStreamConfiguration(bootstrapServers, applictionId); 40 | Topology topology = getStreamTopology(modelClassName); 41 | 42 | // Start Kafka Streams Application to process new incoming messages from Input 43 | // Topic 44 | final KafkaStreams streams = new KafkaStreams(topology, streamsConfiguration); 45 | streams.cleanUp(); 46 | streams.start(); 47 | System.out.println("Airline Delay Prediction Microservice is running..."); 48 | System.out.println("Input to Kafka Topic 'AirlineInputTopic'; Output to Kafka Topic 'AirlineOutputTopic'"); 49 | 50 | // Add shutdown hook to respond to SIGTERM and gracefully close Kafka 51 | // Streams 52 | Runtime.getRuntime().addShutdownHook(new Thread(streams::close)); 53 | 54 | } 55 | 56 | static Properties getStreamConfiguration(String bootstrapServers, String applicationId) { 57 | final Properties streamsConfiguration = new Properties(); 58 | // Give the Streams application a unique name. The name must be unique 59 | // in the Kafka cluster 60 | // against which the application is run. 61 | streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, applicationId); 62 | // Where to find Kafka broker(s). 63 | streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers); 64 | 65 | // Specify default (de)serializers for record keys and for record 66 | // values. 67 | streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); 68 | streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); 69 | 70 | // For illustrative purposes we disable record caches 71 | streamsConfiguration.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 0); 72 | return streamsConfiguration; 73 | } 74 | 75 | static Topology getStreamTopology(String modelClassName) throws InstantiationException, IllegalAccessException, ClassNotFoundException { 76 | // Create H2O object (see gbm_pojo_test.java) 77 | hex.genmodel.GenModel rawModel; 78 | rawModel = (hex.genmodel.GenModel) Class.forName(modelClassName).newInstance(); 79 | EasyPredictModelWrapper model = new EasyPredictModelWrapper(rawModel); 80 | 81 | // In the subsequent lines we define the processing topology of the 82 | // Streams application. 83 | final StreamsBuilder builder = new StreamsBuilder(); 84 | 85 | // Construct a `KStream` from the input topic "AirlineInputTopic", where 86 | // message values 87 | // represent lines of text (for the sake of this example, we ignore 88 | // whatever may be stored 89 | // in the message keys). 90 | final KStream airlineInputLines = builder.stream(INPUT_TOPIC); 91 | 92 | // Stream Processor (in this case 'mapValues' to add custom logic, i.e. apply 93 | // the analytic model) 94 | KStream transformedMessage = 95 | airlineInputLines.mapValues(value -> { 96 | 97 | // Year,Month,DayofMonth,DayOfWeek,DepTime,CRSDepTime,ArrTime,CRSArrTime,UniqueCarrier,FlightNum,TailNum,ActualElapsedTime,CRSElapsedTime,AirTime,ArrDelay,DepDelay,Origin,Dest,Distance,TaxiIn,TaxiOut,Cancelled,CancellationCode,Diverted,CarrierDelay,WeatherDelay,NASDelay,SecurityDelay,LateAircraftDelay,IsArrDelayed,IsDepDelayed 98 | // value: 99 | // YES, probably delayed: 100 | // 1987,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES 101 | // NO, probably not delayed: 102 | // 1999,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES 103 | 104 | if (value != null && !value.equals("")) { 105 | System.out.println("#####################"); 106 | System.out.println("Flight Input:" + value); 107 | 108 | String[] valuesArray = value.split(","); 109 | 110 | RowData row = new RowData(); 111 | row.put("Year", valuesArray[0]); 112 | row.put("Month", valuesArray[1]); 113 | row.put("DayofMonth", valuesArray[2]); 114 | row.put("DayOfWeek", valuesArray[3]); 115 | row.put("CRSDepTime", valuesArray[5]); 116 | row.put("UniqueCarrier", valuesArray[8]); 117 | row.put("Origin", valuesArray[16]); 118 | row.put("Dest", valuesArray[17]); 119 | BinomialModelPrediction p = null; 120 | try { 121 | p = model.predictBinomial(row); 122 | } catch (PredictException e) { 123 | e.printStackTrace(); 124 | } 125 | 126 | System.out.println("Label (aka prediction) is flight departure delayed: " + p.label); 127 | System.out.print("Class probabilities: "); 128 | for (int i = 0; i < p.classProbabilities.length; i++) { 129 | if (i > 0) { 130 | System.out.print(","); 131 | } 132 | System.out.print(p.classProbabilities[i]); 133 | } 134 | System.out.println(""); 135 | System.out.println("#####################"); 136 | return "Prediction: Is Airline delayed? => " + p.label; 137 | } 138 | //No prediction 139 | return null; 140 | }); 141 | 142 | // Send prediction information to Output Topic 143 | transformedMessage.to(OUTPUT_TOPIC); 144 | return builder.build(); 145 | } 146 | } 147 | -------------------------------------------------------------------------------- /h2o-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.java: -------------------------------------------------------------------------------- 1 | package com.github.megachucky.kafka.streams.machinelearning; 2 | 3 | import java.util.Properties; 4 | 5 | import org.apache.kafka.common.serialization.Serdes; 6 | import org.apache.kafka.streams.KafkaStreams; 7 | import org.apache.kafka.streams.StreamsBuilder; 8 | import org.apache.kafka.streams.StreamsConfig; 9 | import org.apache.kafka.streams.kstream.KStream; 10 | 11 | import hex.genmodel.easy.EasyPredictModelWrapper; 12 | import hex.genmodel.easy.RowData; 13 | import hex.genmodel.easy.exception.PredictException; 14 | import hex.genmodel.easy.prediction.BinomialModelPrediction; 15 | 16 | /** 17 | * @author Kai Waehner (www.kai-waehner.de) 18 | * 19 | * Creates a new Kafka Streams application for prediction of flight delays 20 | * The application uses the GBM model "deeplearning_fe7c1f02_08ec_4070_b784_c2531147e451" (built with H2O.ai) to infer messages 21 | * sent to Kafka topic "AirlineInputTopic". The outcome of model inference is sent to 22 | * Kafka topic "AirlineOutputTopic". 23 | * 24 | * Main logic now in parent Class 25 | * Refactoring to utilize common @link Kafka_Streams_MachineLearning_H2O_Application class 26 | * @author Jukka Karvanen / jukinimi.com 27 | * 28 | */ 29 | public class Kafka_Streams_MachineLearning_H2O_DeepLearning_Example extends Kafka_Streams_MachineLearning_H2O_Application { 30 | 31 | // Name of the generated H2O model 32 | static String modelClassName = "com.github.megachucky.kafka.streams.machinelearning.models.deeplearning_fe7c1f02_08ec_4070_b784_c2531147e451"; 33 | 34 | static final String APPLICATION_ID = "kafka-streams-h2o-deeplearning-example"; 35 | 36 | public static void main(final String[] args) throws Exception { 37 | 38 | // Configure Kafka Streams Application 39 | final String bootstrapServers = args.length > 0 ? args[0] : "localhost:9092"; 40 | execute(bootstrapServers, APPLICATION_ID, modelClassName); 41 | } 42 | 43 | } 44 | -------------------------------------------------------------------------------- /h2o-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example.java: -------------------------------------------------------------------------------- 1 | package com.github.megachucky.kafka.streams.machinelearning; 2 | 3 | import java.util.Properties; 4 | 5 | import org.apache.kafka.common.serialization.Serdes; 6 | import org.apache.kafka.streams.KafkaStreams; 7 | import org.apache.kafka.streams.StreamsBuilder; 8 | import org.apache.kafka.streams.StreamsConfig; 9 | import org.apache.kafka.streams.Topology; 10 | import org.apache.kafka.streams.kstream.KStream; 11 | 12 | import hex.genmodel.easy.EasyPredictModelWrapper; 13 | import hex.genmodel.easy.RowData; 14 | import hex.genmodel.easy.exception.PredictException; 15 | import hex.genmodel.easy.prediction.BinomialModelPrediction; 16 | 17 | /** 18 | * @author Kai Waehner (www.kai-waehner.de) 19 | * 20 | * Creates a new Kafka Streams application for prediction of flight 21 | * delays The application uses the GBM model "gbm_pojo_test" (built with 22 | * H2O.ai) to infer messages sent to Kafka topic "AirlineInputTopic". 23 | * The outcome of model inference is sent to Kafka topic 24 | * "AirlineOutputTopic". 25 | * 26 | * * Main logic now in parent Class 27 | * * Refactoring to utilize common @link Kafka_Streams_MachineLearning_H2O_Application class 28 | * * @author Jukka Karvanen / jukinimi.com 29 | */ 30 | public class Kafka_Streams_MachineLearning_H2O_GBM_Example extends Kafka_Streams_MachineLearning_H2O_Application { 31 | // Name of the generated H2O model 32 | static String modelClassName = "com.github.megachucky.kafka.streams.machinelearning.models.gbm_pojo_test"; 33 | 34 | static final String APPLICATION_ID = "kafka-streams-h2o-gbm-example"; 35 | 36 | public static void main(final String[] args) throws Exception { 37 | 38 | // Configure Kafka Streams Application 39 | final String bootstrapServers = args.length > 0 ? args[0] : "localhost:9092"; 40 | execute(bootstrapServers, APPLICATION_ID, modelClassName); 41 | } 42 | 43 | } 44 | -------------------------------------------------------------------------------- /h2o-gbm/src/main/resources/generatedModels/DeepWater_model_python_1503570558230_1.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaiwaehner/kafka-streams-machine-learning-examples/3977e3928d2719fc710bb3d202851ee8904be2e7/h2o-gbm/src/main/resources/generatedModels/DeepWater_model_python_1503570558230_1.zip -------------------------------------------------------------------------------- /h2o-gbm/src/main/resources/generatedModels/GBM_model_python_1503397740678_1.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaiwaehner/kafka-streams-machine-learning-examples/3977e3928d2719fc710bb3d202851ee8904be2e7/h2o-gbm/src/main/resources/generatedModels/GBM_model_python_1503397740678_1.zip -------------------------------------------------------------------------------- /h2o-gbm/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=INFO, stdout 2 | 3 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 4 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.stdout.layout.ConversionPattern=%p %m (%c:%L) %n -------------------------------------------------------------------------------- /h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_ExampleTest.java: -------------------------------------------------------------------------------- 1 | package com.github.megachucky.kafka.streams.machinelearning; 2 | 3 | import org.apache.kafka.clients.producer.ProducerRecord; 4 | import org.apache.kafka.common.serialization.StringDeserializer; 5 | import org.apache.kafka.common.serialization.StringSerializer; 6 | import org.apache.kafka.streams.KeyValue; 7 | import org.apache.kafka.streams.TopologyTestDriver; 8 | import org.apache.kafka.streams.test.ConsumerRecordFactory; 9 | import org.junit.After; 10 | import org.junit.Before; 11 | import org.junit.Test; 12 | 13 | import java.util.Arrays; 14 | import java.util.List; 15 | import java.util.stream.Collectors; 16 | 17 | import static org.assertj.core.api.AssertionsForInterfaceTypes.assertThat; 18 | 19 | /** 20 | * TopologyTestDriver based test about stream processing of 21 | * Kafka_Streams_TensorFlow_Image_Recognition_Example. 22 | * 23 | * @author Jukka Karvanen / jukinimi.com * Unit Test of 24 | * {@link Kafka_Streams_MachineLearning_H2O_DeepLearning_Example}, using 25 | * an TopologyTestDriver and a H2O DeepLearning model. 26 | * 27 | */ 28 | 29 | public class Kafka_Streams_MachineLearning_H2O_DeepLearning_ExampleTest { 30 | private TopologyTestDriver testDriver; 31 | 32 | private StringDeserializer stringDeserializer = new StringDeserializer(); 33 | private ConsumerRecordFactory recordFactory = new ConsumerRecordFactory<>( 34 | new StringSerializer(), new StringSerializer()); 35 | 36 | @Before 37 | public void setup() throws IllegalAccessException, ClassNotFoundException, InstantiationException { 38 | testDriver = new TopologyTestDriver( 39 | Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.getStreamTopology( 40 | Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.modelClassName), 41 | Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.getStreamConfiguration( 42 | "localhost:9092", 43 | Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.APPLICATION_ID)); 44 | } 45 | 46 | @After 47 | public void tearDown() { 48 | try { 49 | testDriver.close(); 50 | } catch (RuntimeException e) { 51 | // https://issues.apache.org/jira/browse/KAFKA-6647 causes exception when 52 | // executed in Windows, ignoring it 53 | // Logged stacktrace cannot be avoided 54 | System.out.println("Ignoring exception, test failing in Windows due this exception:" 55 | + e.getLocalizedMessage()); 56 | } 57 | } 58 | 59 | private String getOutput() { 60 | ProducerRecord output = testDriver.readOutput( 61 | Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.OUTPUT_TOPIC, stringDeserializer, 62 | stringDeserializer); 63 | assertThat(output).isNotNull(); 64 | return output.value(); 65 | } 66 | 67 | /** 68 | * Simple test validating only the prediction part of the output 69 | */ 70 | @Test 71 | public void testOne() { 72 | testDriver.pipeInput(recordFactory.create( 73 | Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.INPUT_TOPIC, null, 74 | "1987,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES", 75 | 1L)); 76 | assertThat(getOutput()).isEqualTo("Prediction: Is Airline delayed? => YES"); 77 | } 78 | 79 | /** 80 | * Test based on 81 | * Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest 82 | * 83 | */ 84 | @Test 85 | public void testList() { 86 | // Flight data (one single flight) --> We want to predict if it will be 87 | // delayed or not 88 | List inputValues = Arrays.asList( 89 | "1987,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES", 90 | "1999,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES"); 91 | List> records = inputValues.stream() 92 | .map(v -> new KeyValue(null, v)).collect(Collectors.toList()); 93 | 94 | testDriver.pipeInput(recordFactory.create( 95 | Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.INPUT_TOPIC, records, 1L, 100L)); 96 | assertThat(getOutput()).isEqualTo("Prediction: Is Airline delayed? => YES"); 97 | // This model predict also another flight to be delayed 98 | assertThat(getOutput()).isEqualTo("Prediction: Is Airline delayed? => YES"); 99 | } 100 | 101 | } 102 | -------------------------------------------------------------------------------- /h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java: -------------------------------------------------------------------------------- 1 | package com.github.megachucky.kafka.streams.machinelearning; 2 | 3 | import static org.assertj.core.api.Assertions.assertThat; 4 | 5 | import java.util.Arrays; 6 | import java.util.List; 7 | import java.util.Properties; 8 | 9 | import com.github.megachucky.kafka.streams.machinelearning.TestEmbeddedKafkaCluster; 10 | import org.apache.kafka.clients.consumer.ConsumerConfig; 11 | import org.apache.kafka.clients.producer.ProducerConfig; 12 | import org.apache.kafka.common.serialization.Serdes; 13 | import org.apache.kafka.common.serialization.StringDeserializer; 14 | import org.apache.kafka.common.serialization.StringSerializer; 15 | import org.apache.kafka.common.utils.MockTime; 16 | import org.apache.kafka.streams.KafkaStreams; 17 | import org.apache.kafka.streams.KeyValue; 18 | import org.apache.kafka.streams.StreamsBuilder; 19 | import org.apache.kafka.streams.StreamsConfig; 20 | import org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster; 21 | import org.apache.kafka.streams.integration.utils.IntegrationTestUtils; 22 | import org.apache.kafka.streams.kstream.KStream; 23 | import org.apache.kafka.test.TestUtils; 24 | import org.junit.BeforeClass; 25 | import org.junit.ClassRule; 26 | import org.junit.Test; 27 | 28 | import hex.genmodel.easy.EasyPredictModelWrapper; 29 | import hex.genmodel.easy.RowData; 30 | import hex.genmodel.easy.exception.PredictException; 31 | import hex.genmodel.easy.prediction.BinomialModelPrediction; 32 | 33 | /** 34 | * 35 | * @author Kai Waehner (www.kai-waehner.de) 36 | * 37 | * End-to-end integration test, using an embedded Kafka cluster and a 38 | * H2O.ai DeepLearning Model. Mostly identical to the GBM example, but 39 | * uses another Model which was built using H2O's DeepLearning 40 | * implementation. 41 | * 42 | */ 43 | public class Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest { 44 | 45 | @ClassRule 46 | public static final EmbeddedKafkaCluster CLUSTER = new TestEmbeddedKafkaCluster(1); 47 | 48 | private static final String inputTopic = "AirlineInputTopic"; 49 | private static final String outputTopic = "AirlineOutputTopic"; 50 | 51 | // Name of the generated H2O.ai model 52 | private static String modelClassName = "com.github.megachucky.kafka.streams.machinelearning.models.deeplearning_fe7c1f02_08ec_4070_b784_c2531147e451"; 53 | 54 | // Prediction Value 55 | private static String airlineDelayPreduction = "unknown"; 56 | 57 | @BeforeClass 58 | public static void startKafkaCluster() throws Exception { 59 | CLUSTER.createTopic(inputTopic); 60 | CLUSTER.createTopic(outputTopic); 61 | } 62 | 63 | @Test 64 | public void shouldPredictFlightDelay() throws Exception { 65 | 66 | // Flight data (one single flight) --> We want to predict if it will be 67 | // delayed or not 68 | List inputValues = Arrays.asList( 69 | "1987,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES", 70 | "1999,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES"); 71 | 72 | // Step 1: Configure and start the processor topology. 73 | // 74 | 75 | Properties streamsConfiguration = new Properties(); 76 | streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, 77 | "kafka-streams-h2o-deeplearning-integration-test"); 78 | streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); 79 | 80 | // The commit interval for flushing records to state stores and 81 | // downstream must be lower than 82 | // this integration test's timeout (30 secs) to ensure we observe the 83 | // expected processing results. 84 | streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 10 * 1000); 85 | streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); 86 | // Use a temporary directory for storing state, which will be 87 | // automatically removed after the test. 88 | streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getAbsolutePath()); 89 | 90 | // Create H2O object (see 91 | // deeplearning_fe7c1f02_08ec_4070_b784_c2531147e451.java) 92 | hex.genmodel.GenModel rawModel; 93 | rawModel = (hex.genmodel.GenModel) Class.forName(modelClassName).newInstance(); 94 | EasyPredictModelWrapper model = new EasyPredictModelWrapper(rawModel); 95 | 96 | // Configure Kafka Streams Application 97 | // Specify default (de)serializers for record keys and for record 98 | // values. 99 | streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); 100 | streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); 101 | 102 | // In the subsequent lines we define the processing topology of the 103 | // Streams application. 104 | final StreamsBuilder builder = new StreamsBuilder(); 105 | 106 | // Construct a `KStream` from the input topic "AirlineInputTopic", where 107 | // message values 108 | // represent lines of text (for the sake of this example, we ignore 109 | // whatever may be stored 110 | // in the message keys). 111 | final KStream airlineInputLines = builder.stream(inputTopic); 112 | 113 | // Stream Processor (in this case 'foreach' to add custom logic, i.e. 114 | // apply the analytic model) 115 | airlineInputLines.foreach((key, value) -> { 116 | 117 | // Year,Month,DayofMonth,DayOfWeek,DepTime,CRSDepTime,ArrTime,CRSArrTime,UniqueCarrier,FlightNum,TailNum,ActualElapsedTime,CRSElapsedTime,AirTime,ArrDelay,DepDelay,Origin,Dest,Distance,TaxiIn,TaxiOut,Cancelled,CancellationCode,Diverted,CarrierDelay,WeatherDelay,NASDelay,SecurityDelay,LateAircraftDelay,IsArrDelayed,IsDepDelayed 118 | // value: 119 | // 1987,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES 120 | if (value != null && !value.equals("")) { 121 | System.out.println("#####################"); 122 | System.out.println("Flight Input:" + value); 123 | 124 | String[] valuesArray = value.split(","); 125 | 126 | RowData row = new RowData(); 127 | row.put("Year", valuesArray[0]); 128 | row.put("Month", valuesArray[1]); 129 | row.put("DayofMonth", valuesArray[2]); 130 | row.put("DayOfWeek", valuesArray[3]); 131 | row.put("CRSDepTime", valuesArray[5]); 132 | row.put("UniqueCarrier", valuesArray[8]); 133 | row.put("Origin", valuesArray[16]); 134 | row.put("Dest", valuesArray[17]); 135 | BinomialModelPrediction p = null; 136 | try { 137 | p = model.predictBinomial(row); 138 | } catch (PredictException e) { 139 | e.printStackTrace(); 140 | } 141 | 142 | airlineDelayPreduction = p.label; 143 | System.out.println("Label (aka prediction) is flight departure delayed: " + p.label); 144 | System.out.print("Class probabilities: "); 145 | for (int i = 0; i < p.classProbabilities.length; i++) { 146 | if (i > 0) { 147 | System.out.print(","); 148 | } 149 | System.out.print(p.classProbabilities[i]); 150 | } 151 | System.out.println(""); 152 | System.out.println("#####################"); 153 | 154 | } 155 | 156 | }); 157 | 158 | // Transform message: Add prediction information 159 | KStream transformedMessage = airlineInputLines 160 | .mapValues(value -> "Prediction: Is Airline delayed? => " + airlineDelayPreduction); 161 | 162 | // Send prediction information to Output Topic 163 | transformedMessage.to(outputTopic); 164 | 165 | // Start Kafka Streams Application to process new incoming messages from 166 | // Input Topic 167 | final KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfiguration); 168 | streams.cleanUp(); 169 | streams.start(); 170 | System.out.println("Airline Delay Prediction Microservice is running..."); 171 | System.out.println("Input to Kafka Topic 'AirlineInputTopic'; Output to Kafka Topic 'AirlineOutpuTopic'"); 172 | 173 | // 174 | // Step 2: Produce some input data to the input topic. 175 | // 176 | Properties producerConfig = new Properties(); 177 | producerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); 178 | producerConfig.put(ProducerConfig.ACKS_CONFIG, "all"); 179 | producerConfig.put(ProducerConfig.RETRIES_CONFIG, 0); 180 | producerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class); 181 | producerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class); 182 | IntegrationTestUtils.produceValuesSynchronously(inputTopic, inputValues, producerConfig, new MockTime()); 183 | 184 | // 185 | // Step 3: Verify the application's output data. 186 | // 187 | Properties consumerConfig = new Properties(); 188 | consumerConfig.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); 189 | consumerConfig.put(ConsumerConfig.GROUP_ID_CONFIG, 190 | "machine-learning-example-integration-test-standard-consumer"); 191 | consumerConfig.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); 192 | consumerConfig.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class); 193 | consumerConfig.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class); 194 | List> response = IntegrationTestUtils 195 | .waitUntilMinKeyValueRecordsReceived(consumerConfig, outputTopic, 2); 196 | streams.close(); 197 | assertThat(response).isNotNull(); 198 | assertThat(response.get(0).value).isEqualTo("Prediction: Is Airline delayed? => YES"); 199 | 200 | assertThat(response.get(1).value).isEqualTo("Prediction: Is Airline delayed? => YES"); 201 | } 202 | 203 | } 204 | -------------------------------------------------------------------------------- /h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_ExampleTest.java: -------------------------------------------------------------------------------- 1 | package com.github.megachucky.kafka.streams.machinelearning; 2 | 3 | import org.apache.kafka.clients.producer.ProducerRecord; 4 | import org.apache.kafka.common.serialization.StringDeserializer; 5 | import org.apache.kafka.common.serialization.StringSerializer; 6 | import org.apache.kafka.streams.KeyValue; 7 | import org.apache.kafka.streams.TopologyTestDriver; 8 | import org.apache.kafka.streams.test.ConsumerRecordFactory; 9 | import org.junit.After; 10 | import org.junit.Before; 11 | import org.junit.Test; 12 | 13 | import java.io.IOException; 14 | import java.util.Arrays; 15 | import java.util.List; 16 | import java.util.stream.Collectors; 17 | 18 | import static org.assertj.core.api.AssertionsForInterfaceTypes.assertThat; 19 | 20 | /** 21 | * TopologyTestDriver based test about stream processing of 22 | * Kafka_Streams_TensorFlow_Image_Recognition_Example. 23 | * 24 | * @author Jukka Karvanen / jukinimi.com * Unit Test of 25 | * {@link Kafka_Streams_MachineLearning_H2O_GBM_Example}, using an 26 | * TopologyTestDriver and a H2O GBM model. 27 | * 28 | */ 29 | 30 | public class Kafka_Streams_MachineLearning_H2O_GBM_ExampleTest { 31 | private TopologyTestDriver testDriver; 32 | 33 | private StringDeserializer stringDeserializer = new StringDeserializer(); 34 | private ConsumerRecordFactory recordFactory = new ConsumerRecordFactory<>( 35 | new StringSerializer(), new StringSerializer()); 36 | 37 | @Before 38 | public void setup() throws IllegalAccessException, ClassNotFoundException, InstantiationException { 39 | testDriver = new TopologyTestDriver( 40 | Kafka_Streams_MachineLearning_H2O_GBM_Example.getStreamTopology( 41 | Kafka_Streams_MachineLearning_H2O_GBM_Example.modelClassName), 42 | Kafka_Streams_MachineLearning_H2O_GBM_Example.getStreamConfiguration("localhost:9092", 43 | Kafka_Streams_MachineLearning_H2O_GBM_Example.APPLICATION_ID)); 44 | } 45 | 46 | @After 47 | public void tearDown() { 48 | try { 49 | testDriver.close(); 50 | } catch (RuntimeException e) { 51 | // https://issues.apache.org/jira/browse/KAFKA-6647 causes exception when 52 | // executed in Windows, ignoring it 53 | // Logged stacktrace cannot be avoided 54 | System.out.println("Ignoring exception, test failing in Windows due this exception:" 55 | + e.getLocalizedMessage()); 56 | } 57 | } 58 | 59 | private String getOutput() { 60 | ProducerRecord output = testDriver.readOutput( 61 | Kafka_Streams_MachineLearning_H2O_GBM_Example.OUTPUT_TOPIC, stringDeserializer, 62 | stringDeserializer); 63 | assertThat(output).isNotNull(); 64 | return output.value(); 65 | } 66 | 67 | /** 68 | * Simple test validating only the prediction part of the output 69 | */ 70 | @Test 71 | public void testOne() { 72 | testDriver.pipeInput(recordFactory.create(Kafka_Streams_MachineLearning_H2O_GBM_Example.INPUT_TOPIC, 73 | null, 74 | "1987,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES", 75 | 1L)); 76 | assertThat(getOutput()).isEqualTo("Prediction: Is Airline delayed? => YES"); 77 | } 78 | 79 | /** 80 | * Test based on 81 | * Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest 82 | * 83 | */ 84 | @Test 85 | public void testList() { 86 | // Flight data (one single flight) --> We want to predict if it will be 87 | // delayed or not 88 | List inputValues = Arrays.asList( 89 | "1987,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES", 90 | "1999,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES"); 91 | List> records = inputValues.stream() 92 | .map(v -> new KeyValue(null, v)).collect(Collectors.toList()); 93 | 94 | testDriver.pipeInput(recordFactory.create(Kafka_Streams_MachineLearning_H2O_GBM_Example.INPUT_TOPIC, 95 | records, 1L, 100L)); 96 | assertThat(getOutput()).isEqualTo("Prediction: Is Airline delayed? => YES"); 97 | assertThat(getOutput()).isEqualTo("Prediction: Is Airline delayed? => NO"); 98 | } 99 | 100 | } 101 | -------------------------------------------------------------------------------- /h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java: -------------------------------------------------------------------------------- 1 | package com.github.megachucky.kafka.streams.machinelearning; 2 | 3 | import static org.assertj.core.api.Assertions.assertThat; 4 | 5 | import java.util.Arrays; 6 | import java.util.List; 7 | import java.util.Properties; 8 | 9 | import com.github.megachucky.kafka.streams.machinelearning.TestEmbeddedKafkaCluster; 10 | import org.apache.kafka.clients.consumer.ConsumerConfig; 11 | import org.apache.kafka.clients.producer.ProducerConfig; 12 | import org.apache.kafka.common.serialization.Serdes; 13 | import org.apache.kafka.common.serialization.StringDeserializer; 14 | import org.apache.kafka.common.serialization.StringSerializer; 15 | import org.apache.kafka.streams.KafkaStreams; 16 | import org.apache.kafka.streams.KeyValue; 17 | import org.apache.kafka.streams.StreamsBuilder; 18 | import org.apache.kafka.streams.StreamsConfig; 19 | import org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster; 20 | import org.apache.kafka.streams.integration.utils.IntegrationTestUtils; 21 | import org.apache.kafka.streams.kstream.KStream; 22 | import org.apache.kafka.test.TestUtils; 23 | import org.junit.BeforeClass; 24 | import org.junit.ClassRule; 25 | import org.junit.Test; 26 | 27 | import com.github.megachucky.kafka.streams.machinelearning.Kafka_Streams_MachineLearning_H2O_GBM_Example; 28 | 29 | import hex.genmodel.easy.EasyPredictModelWrapper; 30 | import hex.genmodel.easy.RowData; 31 | import hex.genmodel.easy.exception.PredictException; 32 | import hex.genmodel.easy.prediction.BinomialModelPrediction; 33 | import kafka.utils.MockTime; 34 | 35 | /** 36 | * 37 | * @author Kai Waehner (www.kai-waehner.de) 38 | * 39 | * End-to-end integration test based on 40 | * {@link Kafka_Streams_MachineLearning_H2O_GBM_Example}, using an 41 | * embedded Kafka cluster and a H2O.ai GBM Model. 42 | * 43 | * See {@link Kafka_Streams_MachineLearning_H2O_GBM_Example} for further 44 | * documentation. 45 | * 46 | */ 47 | public class Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest { 48 | 49 | @ClassRule 50 | // public static final EmbeddedSingleNodeKafkaCluster CLUSTER = new 51 | // EmbeddedSingleNodeKafkaCluster(); 52 | 53 | public static final EmbeddedKafkaCluster CLUSTER = new TestEmbeddedKafkaCluster(1); 54 | 55 | private static final String inputTopic = "AirlineInputTopic"; 56 | private static final String outputTopic = "AirlineOutputTopic"; 57 | 58 | // Name of the generated H2O.ai model 59 | private static String modelClassName = "com.github.megachucky.kafka.streams.machinelearning.models.gbm_pojo_test"; 60 | 61 | // Prediction Value 62 | private static String airlineDelayPreduction = "unknown"; 63 | 64 | @BeforeClass 65 | public static void startKafkaCluster() throws Exception { 66 | CLUSTER.createTopic(inputTopic); 67 | CLUSTER.createTopic(outputTopic); 68 | } 69 | 70 | @Test 71 | public void shouldPredictFlightDelay() throws Exception { 72 | 73 | // Flight data (one single flight) --> We want to predict if it will be 74 | // delayed or not 75 | List inputValues = Arrays.asList( 76 | "1987,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES", 77 | "1999,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES"); 78 | 79 | // Step 1: Configure and start the processor topology. 80 | // 81 | 82 | Properties streamsConfiguration = new Properties(); 83 | streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "kafka-streams-h2o-gbm-integration-test"); 84 | streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); 85 | 86 | // The commit interval for flushing records to state stores and 87 | // downstream must be lower than 88 | // this integration test's timeout (30 secs) to ensure we observe the 89 | // expected processing results. 90 | streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 10 * 1000); 91 | streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); 92 | // Use a temporary directory for storing state, which will be 93 | // automatically removed after the test. 94 | streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getAbsolutePath()); 95 | 96 | // Create H2O object (see gbm_pojo_test.java) 97 | hex.genmodel.GenModel rawModel; 98 | rawModel = (hex.genmodel.GenModel) Class.forName(modelClassName).newInstance(); 99 | EasyPredictModelWrapper model = new EasyPredictModelWrapper(rawModel); 100 | 101 | // Configure Kafka Streams Application 102 | // Specify default (de)serializers for record keys and for record 103 | // values. 104 | streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); 105 | streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); 106 | 107 | // In the subsequent lines we define the processing topology of the 108 | // Streams application. 109 | final StreamsBuilder builder = new StreamsBuilder(); 110 | 111 | // Construct a `KStream` from the input topic "AirlineInputTopic", where 112 | // message values 113 | // represent lines of text (for the sake of this example, we ignore 114 | // whatever may be stored 115 | // in the message keys). 116 | final KStream airlineInputLines = builder.stream(inputTopic); 117 | 118 | // Stream Processor (in this case 'foreach' to add custom logic, i.e. 119 | // apply the analytic model) 120 | 121 | airlineInputLines.foreach((key, value) -> { 122 | 123 | // Year,Month,DayofMonth,DayOfWeek,DepTime,CRSDepTime,ArrTime,CRSArrTime,UniqueCarrier,FlightNum,TailNum,ActualElapsedTime,CRSElapsedTime,AirTime,ArrDelay,DepDelay,Origin,Dest,Distance,TaxiIn,TaxiOut,Cancelled,CancellationCode,Diverted,CarrierDelay,WeatherDelay,NASDelay,SecurityDelay,LateAircraftDelay,IsArrDelayed,IsDepDelayed 124 | // value: 125 | // 1987,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES 126 | if (value != null && !value.equals("")) { 127 | System.out.println("#####################"); 128 | System.out.println("Flight Input:" + value); 129 | 130 | String[] valuesArray = value.split(","); 131 | 132 | RowData row = new RowData(); 133 | row.put("Year", valuesArray[0]); 134 | row.put("Month", valuesArray[1]); 135 | row.put("DayofMonth", valuesArray[2]); 136 | row.put("DayOfWeek", valuesArray[3]); 137 | row.put("CRSDepTime", valuesArray[5]); 138 | row.put("UniqueCarrier", valuesArray[8]); 139 | row.put("Origin", valuesArray[16]); 140 | row.put("Dest", valuesArray[17]); 141 | BinomialModelPrediction p = null; 142 | try { 143 | p = model.predictBinomial(row); 144 | } catch (PredictException e) { 145 | e.printStackTrace(); 146 | } 147 | 148 | airlineDelayPreduction = p.label; 149 | System.out.println("Label (aka prediction) is flight departure delayed: " + p.label); 150 | System.out.print("Class probabilities: "); 151 | for (int i = 0; i < p.classProbabilities.length; i++) { 152 | if (i > 0) { 153 | System.out.print(","); 154 | } 155 | System.out.print(p.classProbabilities[i]); 156 | } 157 | System.out.println(""); 158 | System.out.println("#####################"); 159 | 160 | } 161 | 162 | }); 163 | 164 | // Transform message: Add prediction information 165 | KStream transformedMessage = airlineInputLines 166 | .mapValues(value -> "Prediction: Is Airline delayed? => " + airlineDelayPreduction); 167 | 168 | // Send prediction information to Output Topic 169 | transformedMessage.to(outputTopic); 170 | 171 | // Start Kafka Streams Application to process new incoming messages from 172 | // Input Topic 173 | final KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfiguration); 174 | streams.cleanUp(); 175 | streams.start(); 176 | System.out.println("Airline Delay Prediction Microservice is running..."); 177 | System.out.println("Input to Kafka Topic 'AirlineInputTopic'; Output to Kafka Topic 'AirlineOutpuTopic'"); 178 | 179 | // 180 | // Step 2: Produce some input data to the input topic. 181 | // 182 | Properties producerConfig = new Properties(); 183 | producerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); 184 | producerConfig.put(ProducerConfig.ACKS_CONFIG, "all"); 185 | producerConfig.put(ProducerConfig.RETRIES_CONFIG, 0); 186 | producerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class); 187 | producerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class); 188 | IntegrationTestUtils.produceValuesSynchronously(inputTopic, inputValues, producerConfig, 189 | new MockTime()); 190 | 191 | // 192 | // Step 3: Verify the application's output data. 193 | // 194 | Properties consumerConfig = new Properties(); 195 | consumerConfig.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); 196 | consumerConfig.put(ConsumerConfig.GROUP_ID_CONFIG, 197 | "machine-learning-example-integration-test-standard-consumer"); 198 | consumerConfig.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); 199 | consumerConfig.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class); 200 | consumerConfig.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class); 201 | List> response = IntegrationTestUtils 202 | .waitUntilMinKeyValueRecordsReceived(consumerConfig, outputTopic, 2); 203 | streams.close(); 204 | assertThat(response).isNotNull(); 205 | assertThat(response.get(0).value).isEqualTo("Prediction: Is Airline delayed? => YES"); 206 | 207 | assertThat(response.get(1).value).isEqualTo("Prediction: Is Airline delayed? => NO"); 208 | } 209 | 210 | } 211 | -------------------------------------------------------------------------------- /h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/TestEmbeddedKafkaCluster.java: -------------------------------------------------------------------------------- 1 | package com.github.megachucky.kafka.streams.machinelearning; 2 | 3 | import org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster; 4 | import org.slf4j.Logger; 5 | import org.slf4j.LoggerFactory; 6 | 7 | import java.util.Properties; 8 | 9 | /** 10 | * This is helper class to workaround for Failing stream tests in Windows 11 | * environment KAFKA-6647. 12 | * 13 | * @author Jukka Karvanen 14 | * 15 | * The causing issue is https://issues.apache.org/jira/browse/KAFKA-6647 16 | * Replacing EmbeddedKafkaCluster with TestEmbeddedKafkaCluster will 17 | * catch and ignore the exception happening during the tear down of the 18 | * test The exception does not have affect to functionality 19 | */ 20 | 21 | public class TestEmbeddedKafkaCluster extends EmbeddedKafkaCluster { 22 | private static final Logger log = LoggerFactory.getLogger(TestEmbeddedKafkaCluster.class); 23 | 24 | public TestEmbeddedKafkaCluster(int numBrokers) { 25 | super(numBrokers); 26 | } 27 | 28 | public TestEmbeddedKafkaCluster(int numBrokers, Properties brokerConfig) { 29 | super(numBrokers, brokerConfig); 30 | } 31 | 32 | public TestEmbeddedKafkaCluster(int numBrokers, Properties brokerConfig, long mockTimeMillisStart) { 33 | super(numBrokers, brokerConfig, mockTimeMillisStart); 34 | } 35 | 36 | public TestEmbeddedKafkaCluster(int numBrokers, Properties brokerConfig, long mockTimeMillisStart, 37 | long mockTimeNanoStart) { 38 | super(numBrokers, brokerConfig, mockTimeMillisStart, mockTimeNanoStart); 39 | } 40 | 41 | public void after() { 42 | try { 43 | super.after(); 44 | } catch (RuntimeException e) { 45 | log.warn("Ignoring exception, test failing in Windows due this exception {}", e); 46 | } 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/TestKafkaStreams.java: -------------------------------------------------------------------------------- 1 | package com.github.megachucky.kafka.streams.machinelearning; 2 | 3 | import org.apache.kafka.common.utils.Time; 4 | import org.apache.kafka.streams.KafkaClientSupplier; 5 | import org.apache.kafka.streams.KafkaStreams; 6 | import org.apache.kafka.streams.Topology; 7 | import org.slf4j.Logger; 8 | import org.slf4j.LoggerFactory; 9 | 10 | import java.util.Properties; 11 | 12 | /** 13 | * This is helper class to workaround for Failing stream tests in Windows 14 | * environment KAFKA-6647. 15 | * 16 | * @author Jukka Karvanen 17 | * 18 | * The causing issue is https://issues.apache.org/jira/browse/KAFKA-6647 19 | * Replacing KafkaStreams with TestKafkaStreams will catch and ignore 20 | * the exception caused by cleanUp The exception does not have affect to 21 | * functionality 22 | */ 23 | 24 | public class TestKafkaStreams extends KafkaStreams { 25 | private static final Logger log = LoggerFactory.getLogger(TestKafkaStreams.class); 26 | 27 | public TestKafkaStreams(Topology topology, Properties props) { 28 | super(topology, props); 29 | } 30 | 31 | public TestKafkaStreams(Topology topology, Properties props, KafkaClientSupplier clientSupplier) { 32 | super(topology, props, clientSupplier); 33 | } 34 | 35 | public TestKafkaStreams(Topology topology, Properties props, Time time) { 36 | super(topology, props, time); 37 | } 38 | 39 | public TestKafkaStreams(Topology topology, Properties props, KafkaClientSupplier clientSupplier, Time time) { 40 | super(topology, props, clientSupplier, time); 41 | } 42 | 43 | public void cleanUp() { 44 | try { 45 | super.cleanUp(); 46 | } catch (RuntimeException e) { 47 | log.warn("Ignoring exception, test failing in Windows due this exception {}", e); 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | 6 | com.github.kaiwaehner.kafka.streams.machinelearning 7 | kafka-streams-machine-learning-examples 8 | CP53_AK23 9 | pom 10 | 11 | 12 | 13 | h2o-gbm 14 | 15 | tensorflow-image-recognition 16 | 17 | dl4j-deeplearning-iris 18 | 19 | tensorflow-keras 20 | 21 | 22 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Machine Learning + Kafka Streams Examples 2 | 3 | This project contains **examples which demonstrate how to deploy analytic models to mission-critical, scalable production leveraging [Apache Kafka](https://kafka.apache.org/) and its [Streams API](https://docs.confluent.io/current/streams/index.html).** 4 | Examples will include analytic models built with TensorFlow, Keras, H2O, Python, DeepLearning4J and other technologies. 5 | 6 | ![Kafka Open Source Ecosystem for a Scalable Mission Critical Machine Learning Infrastructure](http://www.kai-waehner.de/blog/wp-content/uploads/2017/10/Apache_Kafka_Ecosystem_Kafka_Streams_Machine_Learning.png "Kafka Open Source Ecosystem for a Scalable Mission Critical Machine Learning Infrastructure") 7 | 8 | ## Material (Blogs Posts, Slides, Videos) 9 | 10 | Here is some material about this topic if you want to read and listen to the theory instead of just doing hands-on: 11 | 12 | - Blog Post: [How to Build and Deploy Scalable Machine Learning in Production with Apache Kafka](https://www.confluent.io/blog/build-deploy-scalable-machine-learning-production-apache-kafka/) 13 | - Slide Deck: [Apache Kafka + Machine Learning => Intelligent Real Time Applications](https://www.slideshare.net/KaiWaehner/apache-kafka-streams-machine-learning-deep-learning) 14 | - Slide Deck: [Deep Learning at Extreme Scale (in the Cloud) 
with the Apache Kafka Open Source Ecosystem](https://www.slideshare.net/KaiWaehner/deep-learning-at-extreme-scale-in-the-cloud-with-the-apache-kafka-open-source-ecosystem) 15 | - Video Recording: [Deep Learning in Mission Critical and Scalable Real Time Applications with Open Source Frameworks](https://vimeo.com/jaxtv/review/256406763/7fbf4213be) 16 | - Blog Post: [Using Apache Kafka to Drive Cutting-Edge Machine Learning - Hybrid ML Architectures, AutoML, and more...](https://www.confluent.io/blog/using-apache-kafka-drive-cutting-edge-machine-learning) 17 | - Blog Post: [Machine Learning with Python, Jupyter, KSQL and TensorFlow](https://www.confluent.io/blog/machine-learning-with-python-jupyter-ksql-tensorflow) 18 | - Blog Post: [Streaming Machine Learning with Tiered Storage and Without a Data Lake](https://www.confluent.io/blog/streaming-machine-learning-with-tiered-storage/) 19 | 20 | ## Use Cases and Technologies 21 | 22 | ##### The following examples are already available including unit tests: 23 | 24 | * Deployment of a H2O GBM model to a Kafka Streams application for prediction of flight delays 25 | * Deployment of a H2O Deep Learning model to a Kafka Streams application for prediction of flight delays 26 | * Deployment of a pre-built TensorFlow CNN model for image recognition 27 | * Deployment of a DL4J model to predict the species of Iris flowers 28 | * Deployment of a Keras model (trained with TensorFlow backend) using the Import Model API from DeepLearning4J 29 | 30 | **More sophisticated use cases around Kafka Streams and other technologies will be added over time in this or related Github project. Some ideas**: 31 | 32 | * Image Recognition with H2O and TensorFlow (to show the difference of using H2O instead of using just low level TensorFlow APIs) 33 | * Anomaly Detection with Autoencoders leveraging DeepLearning4J. 34 | * Cross Selling and Customer Churn Detection using classical Machine Learning algorithms but also Deep Learning 35 | * Stateful Stream Processing to combine different model execution steps into a more powerful workflow instead of "just" inferencing single events (a good example might be a streaming process with sliding or session windows). 36 | * Keras to build different models with Python, TensorFlow, Theano and other Deep Learning frameworks under the hood + Kafka Streams as generic Machine Learning infrastructure to deploy, execute and monitor these different models. 37 | 38 | ##### Some other Github projects exist already with more ML + Kafka content: 39 | 40 | The most exciting and powerful example first: 41 | [Streaming Machine Learning at Scale from 100000 IoT Devices with HiveMQ, Apache Kafka and TensorFLow](https://github.com/kaiwaehner/hivemq-mqtt-tensorflow-kafka-realtime-iot-machine-learning-training-inference) 42 | 43 | Here some more demos: 44 | 45 | - Deep Learning UDF for KSQL: [Streaming Anomaly Detection of MQTT IoT Sensor Data using an Autoencoder](https://github.com/kaiwaehner/ksql-udf-deep-learning-mqtt-iot) 46 | - End-to-End ML Integration Demo: [Continuous Health Checks with Anomaly Detection using KSQL, Kafka Connect, Deep Learning and Elasticsearch](https://github.com/kaiwaehner/ksql-fork-with-deep-learning-function) 47 | - TensorFlow Serving + gRPC + Kafka Streams on Github => Stream Processing and RPC / Request-Response concepts combined: [Model inference with Apache Kafka, Kafka Streams and a TensorFlow model deployed on a TensorFlow Serving model server](https://github.com/kaiwaehner/tensorflow-serving-java-grpc-kafka-streams) 48 | - Solving the impedance mismatch between Data Scientist and Production Engineer: [Python, Jupyter, TensorFlow, Keras, Apache Kafka, KSQL](https://github.com/kaiwaehner/python-jupyter-apache-kafka-ksql-tensorflow-keras) 49 | 50 | ## Requirements, Installation and Usage 51 | The code is developed and tested on Mac and Linux operating systems. As Kafka does not support and work well on Windows, this is not tested at all. 52 | 53 | Java 8 and Maven 3 are required. Maven will download all required dependencies. 54 | 55 | Just download the project and run 56 | 57 | mvn clean package 58 | 59 | You can do this in main directory or each module separately. 60 | 61 | Apache Kafka 2.5 is currently used. The code is also compatible with Kafka and Kafka Streams 1.1 and 2.x. 62 | 63 | **Please make sure to run the Maven build without any changes first.** If it works without errors, you can change library versions, Java version, etc. and see if it still works or if you need to adjust code. 64 | 65 | Every examples includes an implementation and an unit test. The examples are very simple and lightweight. No further configuration is needed to build and run it. Though, for this reason, the generated models are also included (and increase the download size of the project). 66 | 67 | The unit tests use some Kafka helper classes like EmbeddedSingleNodeKafkaCluster in package **com.github.megachucky.kafka.streams.machinelearning.test.utils** so that you can run it without any other configuration or Kafka setup. 68 | If you want to run an implementation of a main class in package **com.github.megachucky.kafka.streams.machinelearning**, you need to start a Kafka cluster (with at least one Zookeeper and one Kafka broker running) and also create the required topics. So check out the unit tests first. 69 | 70 | 71 | ### Example 1 - Gradient Boosting with H2O.ai for Prediction of Flight Delays 72 | 73 | Detailed info in [h2o-gbm](h2o-gbm/readme.md) 74 | 75 | ### Example 2 - Convolutional Neural Network (CNN) with TensorFlow for Image Recognition 76 | 77 | Detailed info in [tensorflow-image-recognition](tensorflow-image-recognition/readme.md) 78 | 79 | ### Example 3 - Iris Prediction using a Neural Network with DeepLearning4J (DL4J) 80 | 81 | Detailed info in [dl4j-deeplearning-iris](dl4j-deeplearning-iris/readme.md) 82 | 83 | ### Example 4 - Python + Keras + TensorFlow + DeepLearning4j 84 | 85 | Detailed info in [tensorflow-kerasm](tensorflow-keras/readme.md) 86 | 87 | 88 | 89 | 90 | 91 | -------------------------------------------------------------------------------- /src/main/java/com/github/megachucky/kafka/streams/machinelearning/StreamsStarterApp.java: -------------------------------------------------------------------------------- 1 | package com.github.megachucky.kafka.streams.machinelearning; 2 | 3 | import java.util.Properties; 4 | 5 | import org.apache.kafka.clients.consumer.ConsumerConfig; 6 | import org.apache.kafka.common.serialization.Serdes; 7 | import org.apache.kafka.streams.KafkaStreams; 8 | import org.apache.kafka.streams.StreamsBuilder; 9 | import org.apache.kafka.streams.StreamsConfig; 10 | import org.apache.kafka.streams.kstream.KStream; 11 | 12 | /** 13 | * Demo Kakfa Streams app. Foundation for the other ML classes. 14 | * 15 | * @author kai.waehner (www.kai-waehner.de) 16 | * 17 | */ 18 | public class StreamsStarterApp { 19 | 20 | public static void main(String[] args) { 21 | 22 | Properties config = new Properties(); 23 | config.put(StreamsConfig.APPLICATION_ID_CONFIG, "streams-starter-app"); 24 | config.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092"); 25 | config.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); 26 | config.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass()); 27 | config.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass()); 28 | 29 | StreamsBuilder builder = new StreamsBuilder(); 30 | 31 | KStream kStream = builder.stream("streams-file-input"); 32 | // do stuff 33 | kStream.to("streams-wordcount-output"); 34 | 35 | KafkaStreams streams = new KafkaStreams(builder.build(), config); 36 | streams.cleanUp(); // only do this in dev - not in prod 37 | streams.start(); 38 | 39 | // print the topology 40 | System.out.println(streams.localThreadsMetadata().toString()); 41 | 42 | // shutdown hook to correctly close the streams application 43 | Runtime.getRuntime().addShutdownHook(new Thread(streams::close)); 44 | 45 | } 46 | 47 | } 48 | -------------------------------------------------------------------------------- /src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=INFO, stdout 2 | 3 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 4 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.stdout.layout.ConversionPattern=%p %m (%c:%L) %n -------------------------------------------------------------------------------- /tensorflow-image-recognition/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | 6 | com.github.kaiwaehner.kafka.streams.machinelearning 7 | tensorflow-image-recognition 8 | CP55_AK25 9 | 10 | 11 | 12 | confluent 13 | http://packages.confluent.io/maven/ 14 | 15 | 16 | 17 | 18 | 1.8 19 | 2.5.0 20 | 2.12 21 | ${kafka.scala.version}.8 22 | 5.5.0 23 | UTF-8 24 | 25 | 26 | 27 | 28 | 31 | 32 | 33 | org.apache.kafka 34 | kafka-streams 35 | ${kafka.version} 36 | 37 | 38 | 39 | 40 | org.tensorflow 41 | tensorflow 42 | 1.3.0 43 | 44 | 45 | 46 | 47 | org.apache.kafka 48 | kafka-streams-test-utils 49 | ${kafka.version} 50 | test 51 | 52 | 53 | 54 | junit 55 | junit 56 | 4.12 57 | test 58 | 59 | 60 | org.assertj 61 | assertj-core 62 | 3.3.0 63 | test 64 | 65 | 66 | org.apache.kafka 67 | kafka_${kafka.scala.version} 68 | ${kafka.version} 69 | test 70 | test 71 | 72 | 73 | org.apache.kafka 74 | kafka-clients 75 | ${kafka.version} 76 | test 77 | test 78 | 79 | 80 | org.apache.kafka 81 | kafka-streams 82 | ${kafka.version} 83 | test 84 | test 85 | 86 | 87 | org.apache.curator 88 | curator-test 89 | 2.9.0 90 | test 91 | 92 | 93 | io.confluent 94 | kafka-schema-registry 95 | ${confluent.version} 96 | test 97 | 98 | 99 | io.confluent 100 | kafka-schema-registry 101 | ${confluent.version} 102 | 103 | tests 104 | test 105 | 106 | 107 | org.hamcrest 108 | hamcrest 109 | 2.1 110 | test 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | org.apache.maven.plugins 119 | maven-compiler-plugin 120 | 3.6.1 121 | 122 | 1.8 123 | 1.8 124 | 125 | 126 | 127 | 128 | 129 | org.apache.maven.plugins 130 | maven-assembly-plugin 131 | 2.5.2 132 | 133 | 134 | jar-with-dependencies 135 | 136 | 137 | 138 | true 139 | com.github.megachucky.kafka.streams.machinelearning.Kafka_Streams_TensorFlow_Image_Recognition_Example 140 | 141 | 142 | 143 | 144 | 145 | 146 | assemble-all 147 | package 148 | 149 | single 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | -------------------------------------------------------------------------------- /tensorflow-image-recognition/readme.md: -------------------------------------------------------------------------------- 1 | # Machine Learning + Kafka Streams Examples 2 | 3 | General info in main [Readme](../readme.md) 4 | 5 | ### Example 2 - Convolutional Neural Network (CNN) with TensorFlow for Image Recognition 6 | **Use Case** 7 | 8 | Convolutional Neural Network (CNN) to for image recognition. 9 | A prebuilt TensorFlow CNN model is instantiated and used in a Kafka Streams application to do recognize new JPEG images. A Kafka Input Topic receives the location of a new images (another option would be to send the image in the Kafka message instead of just a link to it), infers the content of the picture via the TensorFlow model, and sends the result to a Kafka Output Topic. 10 | 11 | **Machine Learning Technology** 12 | * [TensorFlow](https://www.tensorflow.org/) 13 | * Leverages [TensorFlow for Java](https://www.tensorflow.org/install/install_java). These APIs are particularly well-suited for loading models created in Python and executing them within a Java application. Please note: The Java API doesn't yet include convenience functions (which you might know from [Keras](https://keras.io/)), thus a private helper class is used in the example for construction and execution of the pre-built TensorFlow model. 14 | * Check the official TensorFlow demo [LabelImage](https://github.com/kaiwaehner/tensorflow/blob/r1.3/tensorflow/java/src/main/java/org/tensorflow/examples/LabelImage.java) to understand this image recognition example 15 | * You can re-use the pre-trained TensorFlow model attached to this project [tensorflow_inception_graph.pb](http://arxiv.org/abs/1512.00567) or add your own model. 16 | * The 'images' folder contains models which were used for training the model (trained_airplane_1.jpg, trained_airplane_2.jpg, trained_butterfly.jpg) but also a new picture (new_airplane.jpg) which is not known by the model and using a different resolution than the others. Feel free to add your own pictures (they need to be trained, see list of trained pictures in the file: imagenet_comp_graph_label_strings.txt), otherwise the model will return 'unknown'. 17 | 18 | **Source Code** 19 | 20 | [Kafka_Streams_TensorFlow_Image_Recognition_Example.java](src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_TensorFlow_Image_Recognition_Example.java) 21 | 22 | **Unit Test** 23 | 24 | [Kafka_Streams_TensorFlow_Image_Recognition_ExampleTest.java](src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_TensorFlow_Image_Recognition_ExampleTest.java) 25 | [Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest.java](src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest.java) 26 | 27 | -------------------------------------------------------------------------------- /tensorflow-image-recognition/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_TensorFlow_Image_Recognition_Example.java: -------------------------------------------------------------------------------- 1 | package com.github.megachucky.kafka.streams.machinelearning; 2 | 3 | import java.io.IOException; 4 | import java.nio.charset.Charset; 5 | import java.nio.file.Files; 6 | import java.nio.file.Path; 7 | import java.nio.file.Paths; 8 | import java.util.Arrays; 9 | import java.util.List; 10 | import java.util.Properties; 11 | 12 | import org.apache.kafka.common.serialization.Serdes; 13 | import org.apache.kafka.streams.KafkaStreams; 14 | import org.apache.kafka.streams.StreamsBuilder; 15 | import org.apache.kafka.streams.StreamsConfig; 16 | import org.apache.kafka.streams.Topology; 17 | import org.apache.kafka.streams.kstream.KStream; 18 | import org.apache.kafka.streams.kstream.Printed; 19 | import org.tensorflow.DataType; 20 | import org.tensorflow.Graph; 21 | import org.tensorflow.Output; 22 | import org.tensorflow.Session; 23 | import org.tensorflow.Tensor; 24 | 25 | /** 26 | * @author Kai Waehner (www.kai-waehner.de) 27 | * 28 | * Creates a new Kafka Streams application for Image Recognition. The 29 | * application uses the CNN model "inception5h" (built with TensorFlow) 30 | * to infer messages sent to Kafka topic "ImageInputTopic". The outcome 31 | * of model inference is sent to Kafka topic "ImageOutputTopic". 32 | * 33 | */ 34 | public class Kafka_Streams_TensorFlow_Image_Recognition_Example { 35 | 36 | static final String imageInputTopic = "ImageInputTopic"; 37 | static final String imageOutputTopic = "ImageOutputTopic"; 38 | 39 | public static void main(final String[] args) throws Exception { 40 | // Configure Kafka Streams Application 41 | final String bootstrapServers = args.length > 0 ? args[0] : "localhost:9092"; 42 | final Properties streamsConfiguration = getStreamConfiguration(bootstrapServers); 43 | Topology topology = getStreamTopology(); 44 | 45 | // Start Kafka Streams Application to process new incoming images from the Input 46 | // Topic 47 | final KafkaStreams streams = new KafkaStreams(topology, streamsConfiguration); 48 | 49 | streams.cleanUp(); 50 | 51 | streams.start(); 52 | 53 | System.out.println("Image Recognition Microservice is running..."); 54 | 55 | System.out.println("Input to Kafka Topic " + imageInputTopic + "; Output to Kafka Topic " + imageOutputTopic); 56 | 57 | // Add shutdown hook to respond to SIGTERM and gracefully close Kafka 58 | // Streams 59 | Runtime.getRuntime().addShutdownHook(new Thread(streams::close)); 60 | 61 | } 62 | 63 | static Properties getStreamConfiguration(String bootstrapServers) { 64 | final Properties streamsConfiguration = new Properties(); 65 | // Give the Streams application a unique name. The name must be unique 66 | // in the Kafka cluster 67 | // against which the application is run. 68 | streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, 69 | "kafka-streams-tensorflow-image-recognition-example"); 70 | // Where to find Kafka broker(s). 71 | streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers); 72 | 73 | // Specify default (de)serializers for record keys and for record 74 | // values. 75 | streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); 76 | streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); 77 | return streamsConfiguration; 78 | } 79 | 80 | static Topology getStreamTopology() throws IOException { 81 | // Create TensorFlow object 82 | 83 | String modelDir = "src/main/resources/generatedModels/CNN_inception5h"; 84 | 85 | Path pathGraph = Paths.get(modelDir, "tensorflow_inception_graph.pb"); 86 | byte[] graphDef = Files.readAllBytes(pathGraph); 87 | 88 | Path pathModel = Paths.get(modelDir, "imagenet_comp_graph_label_strings.txt"); 89 | List labels = Files.readAllLines(pathModel, Charset.forName("UTF-8")); 90 | 91 | // In the subsequent lines we define the processing topology of the 92 | // Streams application. 93 | final StreamsBuilder builder = new StreamsBuilder(); 94 | 95 | // Construct a `KStream` from the input topic "ImageInputTopic", where 96 | // message values represent lines of text 97 | final KStream imageInputLines = builder.stream(imageInputTopic); 98 | 99 | //imageInputLines.print(Printed.toSysOut()); 100 | 101 | // Stream Processor (in this case inside mapValues to add custom logic, i.e. apply the 102 | // analytic model) 103 | // Transform message: Add prediction information 104 | KStream transformedMessage = 105 | imageInputLines.mapValues(value -> { 106 | 107 | String imageClassification = "unknown"; 108 | String imageProbability = "unknown"; 109 | 110 | String imageFile = value; 111 | 112 | Path pathImage = Paths.get(imageFile); 113 | byte[] imageBytes; 114 | try { 115 | imageBytes = Files.readAllBytes(pathImage); 116 | 117 | try (Tensor image = constructAndExecuteGraphToNormalizeImage(imageBytes)) { 118 | float[] labelProbabilities = executeInceptionGraph(graphDef, image); 119 | int bestLabelIdx = maxIndex(labelProbabilities); 120 | 121 | imageClassification = labels.get(bestLabelIdx); 122 | 123 | imageProbability = Float.toString(labelProbabilities[bestLabelIdx] * 100f); 124 | 125 | System.out.println(String.format("BEST MATCH: %s (%.2f%% likely)", imageClassification, 126 | labelProbabilities[bestLabelIdx] * 100f)); 127 | } 128 | 129 | } catch (IOException e) { 130 | e.printStackTrace(); 131 | } 132 | return "Prediction: What is the content of this picture? => " + imageClassification 133 | + ", probability = " + imageProbability; 134 | }); 135 | 136 | // Send prediction information to Output Topic 137 | transformedMessage.to(imageOutputTopic); 138 | 139 | return builder.build(); 140 | } 141 | 142 | 143 | // ######################################################################################## 144 | // Private helper class for construction and execution of the pre-built 145 | // TensorFlow model 146 | // ######################################################################################## 147 | 148 | private static Tensor constructAndExecuteGraphToNormalizeImage(byte[] imageBytes) { 149 | // Graph construction: using the OperationBuilder class to construct a graph to 150 | // decode, resize and normalize a JPEG image. 151 | 152 | try (Graph g = new Graph()) { 153 | GraphBuilder b = new GraphBuilder(g); 154 | // Some constants specific to the pre-trained model at: 155 | // https://storage.googleapis.com/download.tensorflow.org/models/inception5h.zip 156 | // 157 | // - The model was trained with images scaled to 224x224 pixels. 158 | // - The colors, represented as R, G, B in 1-byte each were 159 | // converted to 160 | // float using (value - Mean)/Scale. 161 | final int H = 224; 162 | final int W = 224; 163 | final float mean = 117f; 164 | final float scale = 1f; 165 | 166 | // Since the graph is being constructed once per execution here, we 167 | // can use a constant for the 168 | // input image. If the graph were to be re-used for multiple input 169 | // images, a placeholder would 170 | // have been more appropriate. 171 | final Output input = b.constant("input", imageBytes); 172 | final Output output = b 173 | .div(b.sub( 174 | b.resizeBilinear(b.expandDims(b.cast(b.decodeJpeg(input, 3), DataType.FLOAT), 175 | b.constant("make_batch", 0)), b.constant("size", new int[] { H, W })), 176 | b.constant("mean", mean)), b.constant("scale", scale)); 177 | try (Session s = new Session(g)) { 178 | return s.runner().fetch(output.op().name()).run().get(0); 179 | } 180 | } 181 | } 182 | 183 | private static float[] executeInceptionGraph(byte[] graphDef, Tensor image) { 184 | try (Graph g = new Graph()) { 185 | 186 | // Model loading: Using Graph.importGraphDef() to load a pre-trained Inception 187 | // model. 188 | g.importGraphDef(graphDef); 189 | 190 | // Graph execution: Using a Session to execute the graphs and find the best 191 | // label for an image. 192 | try (Session s = new Session(g); 193 | Tensor result = s.runner().feed("input", image).fetch("output").run().get(0)) { 194 | final long[] rshape = result.shape(); 195 | if (result.numDimensions() != 2 || rshape[0] != 1) { 196 | throw new RuntimeException(String.format( 197 | "Expected model to produce a [1 N] shaped tensor where N is the number of labels, instead it produced one with shape %s", 198 | Arrays.toString(rshape))); 199 | } 200 | int nlabels = (int) rshape[1]; 201 | return result.copyTo(new float[1][nlabels])[0]; 202 | } 203 | } 204 | } 205 | 206 | private static int maxIndex(float[] probabilities) { 207 | int best = 0; 208 | for (int i = 1; i < probabilities.length; ++i) { 209 | if (probabilities[i] > probabilities[best]) { 210 | best = i; 211 | } 212 | } 213 | return best; 214 | } 215 | 216 | // In the fullness of time, equivalents of the methods of this class should 217 | // be auto-generated from 218 | // the OpDefs linked into libtensorflow_jni.so. That would match what is 219 | // done in other languages 220 | // like Python, C++ and Go. 221 | static class GraphBuilder { 222 | GraphBuilder(Graph g) { 223 | this.g = g; 224 | } 225 | 226 | Output div(Output x, Output y) { 227 | return binaryOp("Div", x, y); 228 | } 229 | 230 | Output sub(Output x, Output y) { 231 | return binaryOp("Sub", x, y); 232 | } 233 | 234 | Output resizeBilinear(Output images, Output size) { 235 | return binaryOp("ResizeBilinear", images, size); 236 | } 237 | 238 | Output expandDims(Output input, Output dim) { 239 | return binaryOp("ExpandDims", input, dim); 240 | } 241 | 242 | Output cast(Output value, DataType dtype) { 243 | return g.opBuilder("Cast", "Cast").addInput(value).setAttr("DstT", dtype).build().output(0); 244 | } 245 | 246 | Output decodeJpeg(Output contents, long channels) { 247 | return g.opBuilder("DecodeJpeg", "DecodeJpeg").addInput(contents).setAttr("channels", channels).build() 248 | .output(0); 249 | } 250 | 251 | Output constant(String name, Object value) { 252 | try (Tensor t = Tensor.create(value)) { 253 | return g.opBuilder("Const", name).setAttr("dtype", t.dataType()).setAttr("value", t).build().output(0); 254 | } 255 | } 256 | 257 | private Output binaryOp(String type, Output in1, Output in2) { 258 | return g.opBuilder(type, type).addInput(in1).addInput(in2).build().output(0); 259 | } 260 | 261 | private Graph g; 262 | } 263 | 264 | } 265 | -------------------------------------------------------------------------------- /tensorflow-image-recognition/src/main/resources/TensorFlow_Images/devil.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaiwaehner/kafka-streams-machine-learning-examples/3977e3928d2719fc710bb3d202851ee8904be2e7/tensorflow-image-recognition/src/main/resources/TensorFlow_Images/devil.png -------------------------------------------------------------------------------- /tensorflow-image-recognition/src/main/resources/TensorFlow_Images/new_airplane.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaiwaehner/kafka-streams-machine-learning-examples/3977e3928d2719fc710bb3d202851ee8904be2e7/tensorflow-image-recognition/src/main/resources/TensorFlow_Images/new_airplane.jpg -------------------------------------------------------------------------------- /tensorflow-image-recognition/src/main/resources/TensorFlow_Images/trained_airplane_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaiwaehner/kafka-streams-machine-learning-examples/3977e3928d2719fc710bb3d202851ee8904be2e7/tensorflow-image-recognition/src/main/resources/TensorFlow_Images/trained_airplane_1.jpg -------------------------------------------------------------------------------- /tensorflow-image-recognition/src/main/resources/TensorFlow_Images/trained_airplane_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaiwaehner/kafka-streams-machine-learning-examples/3977e3928d2719fc710bb3d202851ee8904be2e7/tensorflow-image-recognition/src/main/resources/TensorFlow_Images/trained_airplane_2.jpg -------------------------------------------------------------------------------- /tensorflow-image-recognition/src/main/resources/TensorFlow_Images/trained_butterfly.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaiwaehner/kafka-streams-machine-learning-examples/3977e3928d2719fc710bb3d202851ee8904be2e7/tensorflow-image-recognition/src/main/resources/TensorFlow_Images/trained_butterfly.jpg -------------------------------------------------------------------------------- /tensorflow-image-recognition/src/main/resources/generatedModels/CNN_inception5h/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2015 The TensorFlow Authors. All rights reserved. 2 | 3 | Apache License 4 | Version 2.0, January 2004 5 | http://www.apache.org/licenses/ 6 | 7 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 8 | 9 | 1. Definitions. 10 | 11 | "License" shall mean the terms and conditions for use, reproduction, 12 | and distribution as defined by Sections 1 through 9 of this document. 13 | 14 | "Licensor" shall mean the copyright owner or entity authorized by 15 | the copyright owner that is granting the License. 16 | 17 | "Legal Entity" shall mean the union of the acting entity and all 18 | other entities that control, are controlled by, or are under common 19 | control with that entity. For the purposes of this definition, 20 | "control" means (i) the power, direct or indirect, to cause the 21 | direction or management of such entity, whether by contract or 22 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 23 | outstanding shares, or (iii) beneficial ownership of such entity. 24 | 25 | "You" (or "Your") shall mean an individual or Legal Entity 26 | exercising permissions granted by this License. 27 | 28 | "Source" form shall mean the preferred form for making modifications, 29 | including but not limited to software source code, documentation 30 | source, and configuration files. 31 | 32 | "Object" form shall mean any form resulting from mechanical 33 | transformation or translation of a Source form, including but 34 | not limited to compiled object code, generated documentation, 35 | and conversions to other media types. 36 | 37 | "Work" shall mean the work of authorship, whether in Source or 38 | Object form, made available under the License, as indicated by a 39 | copyright notice that is included in or attached to the work 40 | (an example is provided in the Appendix below). 41 | 42 | "Derivative Works" shall mean any work, whether in Source or Object 43 | form, that is based on (or derived from) the Work and for which the 44 | editorial revisions, annotations, elaborations, or other modifications 45 | represent, as a whole, an original work of authorship. For the purposes 46 | of this License, Derivative Works shall not include works that remain 47 | separable from, or merely link (or bind by name) to the interfaces of, 48 | the Work and Derivative Works thereof. 49 | 50 | "Contribution" shall mean any work of authorship, including 51 | the original version of the Work and any modifications or additions 52 | to that Work or Derivative Works thereof, that is intentionally 53 | submitted to Licensor for inclusion in the Work by the copyright owner 54 | or by an individual or Legal Entity authorized to submit on behalf of 55 | the copyright owner. For the purposes of this definition, "submitted" 56 | means any form of electronic, verbal, or written communication sent 57 | to the Licensor or its representatives, including but not limited to 58 | communication on electronic mailing lists, source code control systems, 59 | and issue tracking systems that are managed by, or on behalf of, the 60 | Licensor for the purpose of discussing and improving the Work, but 61 | excluding communication that is conspicuously marked or otherwise 62 | designated in writing by the copyright owner as "Not a Contribution." 63 | 64 | "Contributor" shall mean Licensor and any individual or Legal Entity 65 | on behalf of whom a Contribution has been received by Licensor and 66 | subsequently incorporated within the Work. 67 | 68 | 2. Grant of Copyright License. Subject to the terms and conditions of 69 | this License, each Contributor hereby grants to You a perpetual, 70 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 71 | copyright license to reproduce, prepare Derivative Works of, 72 | publicly display, publicly perform, sublicense, and distribute the 73 | Work and such Derivative Works in Source or Object form. 74 | 75 | 3. Grant of Patent License. Subject to the terms and conditions of 76 | this License, each Contributor hereby grants to You a perpetual, 77 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 78 | (except as stated in this section) patent license to make, have made, 79 | use, offer to sell, sell, import, and otherwise transfer the Work, 80 | where such license applies only to those patent claims licensable 81 | by such Contributor that are necessarily infringed by their 82 | Contribution(s) alone or by combination of their Contribution(s) 83 | with the Work to which such Contribution(s) was submitted. If You 84 | institute patent litigation against any entity (including a 85 | cross-claim or counterclaim in a lawsuit) alleging that the Work 86 | or a Contribution incorporated within the Work constitutes direct 87 | or contributory patent infringement, then any patent licenses 88 | granted to You under this License for that Work shall terminate 89 | as of the date such litigation is filed. 90 | 91 | 4. Redistribution. You may reproduce and distribute copies of the 92 | Work or Derivative Works thereof in any medium, with or without 93 | modifications, and in Source or Object form, provided that You 94 | meet the following conditions: 95 | 96 | (a) You must give any other recipients of the Work or 97 | Derivative Works a copy of this License; and 98 | 99 | (b) You must cause any modified files to carry prominent notices 100 | stating that You changed the files; and 101 | 102 | (c) You must retain, in the Source form of any Derivative Works 103 | that You distribute, all copyright, patent, trademark, and 104 | attribution notices from the Source form of the Work, 105 | excluding those notices that do not pertain to any part of 106 | the Derivative Works; and 107 | 108 | (d) If the Work includes a "NOTICE" text file as part of its 109 | distribution, then any Derivative Works that You distribute must 110 | include a readable copy of the attribution notices contained 111 | within such NOTICE file, excluding those notices that do not 112 | pertain to any part of the Derivative Works, in at least one 113 | of the following places: within a NOTICE text file distributed 114 | as part of the Derivative Works; within the Source form or 115 | documentation, if provided along with the Derivative Works; or, 116 | within a display generated by the Derivative Works, if and 117 | wherever such third-party notices normally appear. The contents 118 | of the NOTICE file are for informational purposes only and 119 | do not modify the License. You may add Your own attribution 120 | notices within Derivative Works that You distribute, alongside 121 | or as an addendum to the NOTICE text from the Work, provided 122 | that such additional attribution notices cannot be construed 123 | as modifying the License. 124 | 125 | You may add Your own copyright statement to Your modifications and 126 | may provide additional or different license terms and conditions 127 | for use, reproduction, or distribution of Your modifications, or 128 | for any such Derivative Works as a whole, provided Your use, 129 | reproduction, and distribution of the Work otherwise complies with 130 | the conditions stated in this License. 131 | 132 | 5. Submission of Contributions. Unless You explicitly state otherwise, 133 | any Contribution intentionally submitted for inclusion in the Work 134 | by You to the Licensor shall be under the terms and conditions of 135 | this License, without any additional terms or conditions. 136 | Notwithstanding the above, nothing herein shall supersede or modify 137 | the terms of any separate license agreement you may have executed 138 | with Licensor regarding such Contributions. 139 | 140 | 6. Trademarks. This License does not grant permission to use the trade 141 | names, trademarks, service marks, or product names of the Licensor, 142 | except as required for reasonable and customary use in describing the 143 | origin of the Work and reproducing the content of the NOTICE file. 144 | 145 | 7. Disclaimer of Warranty. Unless required by applicable law or 146 | agreed to in writing, Licensor provides the Work (and each 147 | Contributor provides its Contributions) on an "AS IS" BASIS, 148 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 149 | implied, including, without limitation, any warranties or conditions 150 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 151 | PARTICULAR PURPOSE. You are solely responsible for determining the 152 | appropriateness of using or redistributing the Work and assume any 153 | risks associated with Your exercise of permissions under this License. 154 | 155 | 8. Limitation of Liability. In no event and under no legal theory, 156 | whether in tort (including negligence), contract, or otherwise, 157 | unless required by applicable law (such as deliberate and grossly 158 | negligent acts) or agreed to in writing, shall any Contributor be 159 | liable to You for damages, including any direct, indirect, special, 160 | incidental, or consequential damages of any character arising as a 161 | result of this License or out of the use or inability to use the 162 | Work (including but not limited to damages for loss of goodwill, 163 | work stoppage, computer failure or malfunction, or any and all 164 | other commercial damages or losses), even if such Contributor 165 | has been advised of the possibility of such damages. 166 | 167 | 9. Accepting Warranty or Additional Liability. While redistributing 168 | the Work or Derivative Works thereof, You may choose to offer, 169 | and charge a fee for, acceptance of support, warranty, indemnity, 170 | or other liability obligations and/or rights consistent with this 171 | License. However, in accepting such obligations, You may act only 172 | on Your own behalf and on Your sole responsibility, not on behalf 173 | of any other Contributor, and only if You agree to indemnify, 174 | defend, and hold each Contributor harmless for any liability 175 | incurred by, or claims asserted against, such Contributor by reason 176 | of your accepting any such warranty or additional liability. 177 | 178 | END OF TERMS AND CONDITIONS 179 | 180 | APPENDIX: How to apply the Apache License to your work. 181 | 182 | To apply the Apache License to your work, attach the following 183 | boilerplate notice, with the fields enclosed by brackets "[]" 184 | replaced with your own identifying information. (Don't include 185 | the brackets!) The text should be enclosed in the appropriate 186 | comment syntax for the file format. We also recommend that a 187 | file or class name and description of purpose be included on the 188 | same "printed page" as the copyright notice for easier 189 | identification within third-party archives. 190 | 191 | Copyright 2015, The TensorFlow Authors. 192 | 193 | Licensed under the Apache License, Version 2.0 (the "License"); 194 | you may not use this file except in compliance with the License. 195 | You may obtain a copy of the License at 196 | 197 | http://www.apache.org/licenses/LICENSE-2.0 198 | 199 | Unless required by applicable law or agreed to in writing, software 200 | distributed under the License is distributed on an "AS IS" BASIS, 201 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 202 | See the License for the specific language governing permissions and 203 | limitations under the License. 204 | -------------------------------------------------------------------------------- /tensorflow-image-recognition/src/main/resources/generatedModels/CNN_inception5h/imagenet_comp_graph_label_strings.txt: -------------------------------------------------------------------------------- 1 | dummy 2 | kit fox 3 | English setter 4 | Siberian husky 5 | Australian terrier 6 | English springer 7 | grey whale 8 | lesser panda 9 | Egyptian cat 10 | ibex 11 | Persian cat 12 | cougar 13 | gazelle 14 | porcupine 15 | sea lion 16 | malamute 17 | badger 18 | Great Dane 19 | Walker hound 20 | Welsh springer spaniel 21 | whippet 22 | Scottish deerhound 23 | killer whale 24 | mink 25 | African elephant 26 | Weimaraner 27 | soft-coated wheaten terrier 28 | Dandie Dinmont 29 | red wolf 30 | Old English sheepdog 31 | jaguar 32 | otterhound 33 | bloodhound 34 | Airedale 35 | hyena 36 | meerkat 37 | giant schnauzer 38 | titi 39 | three-toed sloth 40 | sorrel 41 | black-footed ferret 42 | dalmatian 43 | black-and-tan coonhound 44 | papillon 45 | skunk 46 | Staffordshire bullterrier 47 | Mexican hairless 48 | Bouvier des Flandres 49 | weasel 50 | miniature poodle 51 | Cardigan 52 | malinois 53 | bighorn 54 | fox squirrel 55 | colobus 56 | tiger cat 57 | Lhasa 58 | impala 59 | coyote 60 | Yorkshire terrier 61 | Newfoundland 62 | brown bear 63 | red fox 64 | Norwegian elkhound 65 | Rottweiler 66 | hartebeest 67 | Saluki 68 | grey fox 69 | schipperke 70 | Pekinese 71 | Brabancon griffon 72 | West Highland white terrier 73 | Sealyham terrier 74 | guenon 75 | mongoose 76 | indri 77 | tiger 78 | Irish wolfhound 79 | wild boar 80 | EntleBucher 81 | zebra 82 | ram 83 | French bulldog 84 | orangutan 85 | basenji 86 | leopard 87 | Bernese mountain dog 88 | Maltese dog 89 | Norfolk terrier 90 | toy terrier 91 | vizsla 92 | cairn 93 | squirrel monkey 94 | groenendael 95 | clumber 96 | Siamese cat 97 | chimpanzee 98 | komondor 99 | Afghan hound 100 | Japanese spaniel 101 | proboscis monkey 102 | guinea pig 103 | white wolf 104 | ice bear 105 | gorilla 106 | borzoi 107 | toy poodle 108 | Kerry blue terrier 109 | ox 110 | Scotch terrier 111 | Tibetan mastiff 112 | spider monkey 113 | Doberman 114 | Boston bull 115 | Greater Swiss Mountain dog 116 | Appenzeller 117 | Shih-Tzu 118 | Irish water spaniel 119 | Pomeranian 120 | Bedlington terrier 121 | warthog 122 | Arabian camel 123 | siamang 124 | miniature schnauzer 125 | collie 126 | golden retriever 127 | Irish terrier 128 | affenpinscher 129 | Border collie 130 | hare 131 | boxer 132 | silky terrier 133 | beagle 134 | Leonberg 135 | German short-haired pointer 136 | patas 137 | dhole 138 | baboon 139 | macaque 140 | Chesapeake Bay retriever 141 | bull mastiff 142 | kuvasz 143 | capuchin 144 | pug 145 | curly-coated retriever 146 | Norwich terrier 147 | flat-coated retriever 148 | hog 149 | keeshond 150 | Eskimo dog 151 | Brittany spaniel 152 | standard poodle 153 | Lakeland terrier 154 | snow leopard 155 | Gordon setter 156 | dingo 157 | standard schnauzer 158 | hamster 159 | Tibetan terrier 160 | Arctic fox 161 | wire-haired fox terrier 162 | basset 163 | water buffalo 164 | American black bear 165 | Angora 166 | bison 167 | howler monkey 168 | hippopotamus 169 | chow 170 | giant panda 171 | American Staffordshire terrier 172 | Shetland sheepdog 173 | Great Pyrenees 174 | Chihuahua 175 | tabby 176 | marmoset 177 | Labrador retriever 178 | Saint Bernard 179 | armadillo 180 | Samoyed 181 | bluetick 182 | redbone 183 | polecat 184 | marmot 185 | kelpie 186 | gibbon 187 | llama 188 | miniature pinscher 189 | wood rabbit 190 | Italian greyhound 191 | lion 192 | cocker spaniel 193 | Irish setter 194 | dugong 195 | Indian elephant 196 | beaver 197 | Sussex spaniel 198 | Pembroke 199 | Blenheim spaniel 200 | Madagascar cat 201 | Rhodesian ridgeback 202 | lynx 203 | African hunting dog 204 | langur 205 | Ibizan hound 206 | timber wolf 207 | cheetah 208 | English foxhound 209 | briard 210 | sloth bear 211 | Border terrier 212 | German shepherd 213 | otter 214 | koala 215 | tusker 216 | echidna 217 | wallaby 218 | platypus 219 | wombat 220 | revolver 221 | umbrella 222 | schooner 223 | soccer ball 224 | accordion 225 | ant 226 | starfish 227 | chambered nautilus 228 | grand piano 229 | laptop 230 | strawberry 231 | airliner 232 | warplane 233 | airship 234 | balloon 235 | space shuttle 236 | fireboat 237 | gondola 238 | speedboat 239 | lifeboat 240 | canoe 241 | yawl 242 | catamaran 243 | trimaran 244 | container ship 245 | liner 246 | pirate 247 | aircraft carrier 248 | submarine 249 | wreck 250 | half track 251 | tank 252 | missile 253 | bobsled 254 | dogsled 255 | bicycle-built-for-two 256 | mountain bike 257 | freight car 258 | passenger car 259 | barrow 260 | shopping cart 261 | motor scooter 262 | forklift 263 | electric locomotive 264 | steam locomotive 265 | amphibian 266 | ambulance 267 | beach wagon 268 | cab 269 | convertible 270 | jeep 271 | limousine 272 | minivan 273 | Model T 274 | racer 275 | sports car 276 | go-kart 277 | golfcart 278 | moped 279 | snowplow 280 | fire engine 281 | garbage truck 282 | pickup 283 | tow truck 284 | trailer truck 285 | moving van 286 | police van 287 | recreational vehicle 288 | streetcar 289 | snowmobile 290 | tractor 291 | mobile home 292 | tricycle 293 | unicycle 294 | horse cart 295 | jinrikisha 296 | oxcart 297 | bassinet 298 | cradle 299 | crib 300 | four-poster 301 | bookcase 302 | china cabinet 303 | medicine chest 304 | chiffonier 305 | table lamp 306 | file 307 | park bench 308 | barber chair 309 | throne 310 | folding chair 311 | rocking chair 312 | studio couch 313 | toilet seat 314 | desk 315 | pool table 316 | dining table 317 | entertainment center 318 | wardrobe 319 | Granny Smith 320 | orange 321 | lemon 322 | fig 323 | pineapple 324 | banana 325 | jackfruit 326 | custard apple 327 | pomegranate 328 | acorn 329 | hip 330 | ear 331 | rapeseed 332 | corn 333 | buckeye 334 | organ 335 | upright 336 | chime 337 | drum 338 | gong 339 | maraca 340 | marimba 341 | steel drum 342 | banjo 343 | cello 344 | violin 345 | harp 346 | acoustic guitar 347 | electric guitar 348 | cornet 349 | French horn 350 | trombone 351 | harmonica 352 | ocarina 353 | panpipe 354 | bassoon 355 | oboe 356 | sax 357 | flute 358 | daisy 359 | yellow lady's slipper 360 | cliff 361 | valley 362 | alp 363 | volcano 364 | promontory 365 | sandbar 366 | coral reef 367 | lakeside 368 | seashore 369 | geyser 370 | hatchet 371 | cleaver 372 | letter opener 373 | plane 374 | power drill 375 | lawn mower 376 | hammer 377 | corkscrew 378 | can opener 379 | plunger 380 | screwdriver 381 | shovel 382 | plow 383 | chain saw 384 | cock 385 | hen 386 | ostrich 387 | brambling 388 | goldfinch 389 | house finch 390 | junco 391 | indigo bunting 392 | robin 393 | bulbul 394 | jay 395 | magpie 396 | chickadee 397 | water ouzel 398 | kite 399 | bald eagle 400 | vulture 401 | great grey owl 402 | black grouse 403 | ptarmigan 404 | ruffed grouse 405 | prairie chicken 406 | peacock 407 | quail 408 | partridge 409 | African grey 410 | macaw 411 | sulphur-crested cockatoo 412 | lorikeet 413 | coucal 414 | bee eater 415 | hornbill 416 | hummingbird 417 | jacamar 418 | toucan 419 | drake 420 | red-breasted merganser 421 | goose 422 | black swan 423 | white stork 424 | black stork 425 | spoonbill 426 | flamingo 427 | American egret 428 | little blue heron 429 | bittern 430 | crane 431 | limpkin 432 | American coot 433 | bustard 434 | ruddy turnstone 435 | red-backed sandpiper 436 | redshank 437 | dowitcher 438 | oystercatcher 439 | European gallinule 440 | pelican 441 | king penguin 442 | albatross 443 | great white shark 444 | tiger shark 445 | hammerhead 446 | electric ray 447 | stingray 448 | barracouta 449 | coho 450 | tench 451 | goldfish 452 | eel 453 | rock beauty 454 | anemone fish 455 | lionfish 456 | puffer 457 | sturgeon 458 | gar 459 | loggerhead 460 | leatherback turtle 461 | mud turtle 462 | terrapin 463 | box turtle 464 | banded gecko 465 | common iguana 466 | American chameleon 467 | whiptail 468 | agama 469 | frilled lizard 470 | alligator lizard 471 | Gila monster 472 | green lizard 473 | African chameleon 474 | Komodo dragon 475 | triceratops 476 | African crocodile 477 | American alligator 478 | thunder snake 479 | ringneck snake 480 | hognose snake 481 | green snake 482 | king snake 483 | garter snake 484 | water snake 485 | vine snake 486 | night snake 487 | boa constrictor 488 | rock python 489 | Indian cobra 490 | green mamba 491 | sea snake 492 | horned viper 493 | diamondback 494 | sidewinder 495 | European fire salamander 496 | common newt 497 | eft 498 | spotted salamander 499 | axolotl 500 | bullfrog 501 | tree frog 502 | tailed frog 503 | whistle 504 | wing 505 | paintbrush 506 | hand blower 507 | oxygen mask 508 | snorkel 509 | loudspeaker 510 | microphone 511 | screen 512 | mouse 513 | electric fan 514 | oil filter 515 | strainer 516 | space heater 517 | stove 518 | guillotine 519 | barometer 520 | rule 521 | odometer 522 | scale 523 | analog clock 524 | digital clock 525 | wall clock 526 | hourglass 527 | sundial 528 | parking meter 529 | stopwatch 530 | digital watch 531 | stethoscope 532 | syringe 533 | magnetic compass 534 | binoculars 535 | projector 536 | sunglasses 537 | loupe 538 | radio telescope 539 | bow 540 | cannon [ground] 541 | assault rifle 542 | rifle 543 | projectile 544 | computer keyboard 545 | typewriter keyboard 546 | crane 547 | lighter 548 | abacus 549 | cash machine 550 | slide rule 551 | desktop computer 552 | hand-held computer 553 | notebook 554 | web site 555 | harvester 556 | thresher 557 | printer 558 | slot 559 | vending machine 560 | sewing machine 561 | joystick 562 | switch 563 | hook 564 | car wheel 565 | paddlewheel 566 | pinwheel 567 | potter's wheel 568 | gas pump 569 | carousel 570 | swing 571 | reel 572 | radiator 573 | puck 574 | hard disc 575 | sunglass 576 | pick 577 | car mirror 578 | solar dish 579 | remote control 580 | disk brake 581 | buckle 582 | hair slide 583 | knot 584 | combination lock 585 | padlock 586 | nail 587 | safety pin 588 | screw 589 | muzzle 590 | seat belt 591 | ski 592 | candle 593 | jack-o'-lantern 594 | spotlight 595 | torch 596 | neck brace 597 | pier 598 | tripod 599 | maypole 600 | mousetrap 601 | spider web 602 | trilobite 603 | harvestman 604 | scorpion 605 | black and gold garden spider 606 | barn spider 607 | garden spider 608 | black widow 609 | tarantula 610 | wolf spider 611 | tick 612 | centipede 613 | isopod 614 | Dungeness crab 615 | rock crab 616 | fiddler crab 617 | king crab 618 | American lobster 619 | spiny lobster 620 | crayfish 621 | hermit crab 622 | tiger beetle 623 | ladybug 624 | ground beetle 625 | long-horned beetle 626 | leaf beetle 627 | dung beetle 628 | rhinoceros beetle 629 | weevil 630 | fly 631 | bee 632 | grasshopper 633 | cricket 634 | walking stick 635 | cockroach 636 | mantis 637 | cicada 638 | leafhopper 639 | lacewing 640 | dragonfly 641 | damselfly 642 | admiral 643 | ringlet 644 | monarch 645 | cabbage butterfly 646 | sulphur butterfly 647 | lycaenid 648 | jellyfish 649 | sea anemone 650 | brain coral 651 | flatworm 652 | nematode 653 | conch 654 | snail 655 | slug 656 | sea slug 657 | chiton 658 | sea urchin 659 | sea cucumber 660 | iron 661 | espresso maker 662 | microwave 663 | Dutch oven 664 | rotisserie 665 | toaster 666 | waffle iron 667 | vacuum 668 | dishwasher 669 | refrigerator 670 | washer 671 | Crock Pot 672 | frying pan 673 | wok 674 | caldron 675 | coffeepot 676 | teapot 677 | spatula 678 | altar 679 | triumphal arch 680 | patio 681 | steel arch bridge 682 | suspension bridge 683 | viaduct 684 | barn 685 | greenhouse 686 | palace 687 | monastery 688 | library 689 | apiary 690 | boathouse 691 | church 692 | mosque 693 | stupa 694 | planetarium 695 | restaurant 696 | cinema 697 | home theater 698 | lumbermill 699 | coil 700 | obelisk 701 | totem pole 702 | castle 703 | prison 704 | grocery store 705 | bakery 706 | barbershop 707 | bookshop 708 | butcher shop 709 | confectionery 710 | shoe shop 711 | tobacco shop 712 | toyshop 713 | fountain 714 | cliff dwelling 715 | yurt 716 | dock 717 | brass 718 | megalith 719 | bannister 720 | breakwater 721 | dam 722 | chainlink fence 723 | picket fence 724 | worm fence 725 | stone wall 726 | grille 727 | sliding door 728 | turnstile 729 | mountain tent 730 | scoreboard 731 | honeycomb 732 | plate rack 733 | pedestal 734 | beacon 735 | mashed potato 736 | bell pepper 737 | head cabbage 738 | broccoli 739 | cauliflower 740 | zucchini 741 | spaghetti squash 742 | acorn squash 743 | butternut squash 744 | cucumber 745 | artichoke 746 | cardoon 747 | mushroom 748 | shower curtain 749 | jean 750 | carton 751 | handkerchief 752 | sandal 753 | ashcan 754 | safe 755 | plate 756 | necklace 757 | croquet ball 758 | fur coat 759 | thimble 760 | pajama 761 | running shoe 762 | cocktail shaker 763 | chest 764 | manhole cover 765 | modem 766 | tub 767 | tray 768 | balance beam 769 | bagel 770 | prayer rug 771 | kimono 772 | hot pot 773 | whiskey jug 774 | knee pad 775 | book jacket 776 | spindle 777 | ski mask 778 | beer bottle 779 | crash helmet 780 | bottlecap 781 | tile roof 782 | mask 783 | maillot 784 | Petri dish 785 | football helmet 786 | bathing cap 787 | teddy bear 788 | holster 789 | pop bottle 790 | photocopier 791 | vestment 792 | crossword puzzle 793 | golf ball 794 | trifle 795 | suit 796 | water tower 797 | feather boa 798 | cloak 799 | red wine 800 | drumstick 801 | shield 802 | Christmas stocking 803 | hoopskirt 804 | menu 805 | stage 806 | bonnet 807 | meat loaf 808 | baseball 809 | face powder 810 | scabbard 811 | sunscreen 812 | beer glass 813 | hen-of-the-woods 814 | guacamole 815 | lampshade 816 | wool 817 | hay 818 | bow tie 819 | mailbag 820 | water jug 821 | bucket 822 | dishrag 823 | soup bowl 824 | eggnog 825 | mortar 826 | trench coat 827 | paddle 828 | chain 829 | swab 830 | mixing bowl 831 | potpie 832 | wine bottle 833 | shoji 834 | bulletproof vest 835 | drilling platform 836 | binder 837 | cardigan 838 | sweatshirt 839 | pot 840 | birdhouse 841 | hamper 842 | ping-pong ball 843 | pencil box 844 | pay-phone 845 | consomme 846 | apron 847 | punching bag 848 | backpack 849 | groom 850 | bearskin 851 | pencil sharpener 852 | broom 853 | mosquito net 854 | abaya 855 | mortarboard 856 | poncho 857 | crutch 858 | Polaroid camera 859 | space bar 860 | cup 861 | racket 862 | traffic light 863 | quill 864 | radio 865 | dough 866 | cuirass 867 | military uniform 868 | lipstick 869 | shower cap 870 | monitor 871 | oscilloscope 872 | mitten 873 | brassiere 874 | French loaf 875 | vase 876 | milk can 877 | rugby ball 878 | paper towel 879 | earthstar 880 | envelope 881 | miniskirt 882 | cowboy hat 883 | trolleybus 884 | perfume 885 | bathtub 886 | hotdog 887 | coral fungus 888 | bullet train 889 | pillow 890 | toilet tissue 891 | cassette 892 | carpenter's kit 893 | ladle 894 | stinkhorn 895 | lotion 896 | hair spray 897 | academic gown 898 | dome 899 | crate 900 | wig 901 | burrito 902 | pill bottle 903 | chain mail 904 | theater curtain 905 | window shade 906 | barrel 907 | washbasin 908 | ballpoint 909 | basketball 910 | bath towel 911 | cowboy boot 912 | gown 913 | window screen 914 | agaric 915 | cellular telephone 916 | nipple 917 | barbell 918 | mailbox 919 | lab coat 920 | fire screen 921 | minibus 922 | packet 923 | maze 924 | pole 925 | horizontal bar 926 | sombrero 927 | pickelhaube 928 | rain barrel 929 | wallet 930 | cassette player 931 | comic book 932 | piggy bank 933 | street sign 934 | bell cote 935 | fountain pen 936 | Windsor tie 937 | volleyball 938 | overskirt 939 | sarong 940 | purse 941 | bolo tie 942 | bib 943 | parachute 944 | sleeping bag 945 | television 946 | swimming trunks 947 | measuring cup 948 | espresso 949 | pizza 950 | breastplate 951 | shopping basket 952 | wooden spoon 953 | saltshaker 954 | chocolate sauce 955 | ballplayer 956 | goblet 957 | gyromitra 958 | stretcher 959 | water bottle 960 | dial telephone 961 | soap dispenser 962 | jersey 963 | school bus 964 | jigsaw puzzle 965 | plastic bag 966 | reflex camera 967 | diaper 968 | Band Aid 969 | ice lolly 970 | velvet 971 | tennis ball 972 | gasmask 973 | doormat 974 | Loafer 975 | ice cream 976 | pretzel 977 | quilt 978 | maillot 979 | tape player 980 | clog 981 | iPod 982 | bolete 983 | scuba diver 984 | pitcher 985 | matchstick 986 | bikini 987 | sock 988 | CD player 989 | lens cap 990 | thatch 991 | vault 992 | beaker 993 | bubble 994 | cheeseburger 995 | parallel bars 996 | flagpole 997 | coffee mug 998 | rubber eraser 999 | stole 1000 | carbonara 1001 | dumbbell -------------------------------------------------------------------------------- /tensorflow-image-recognition/src/main/resources/generatedModels/CNN_inception5h/tensorflow_inception_graph.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaiwaehner/kafka-streams-machine-learning-examples/3977e3928d2719fc710bb3d202851ee8904be2e7/tensorflow-image-recognition/src/main/resources/generatedModels/CNN_inception5h/tensorflow_inception_graph.pb -------------------------------------------------------------------------------- /tensorflow-image-recognition/src/main/resources/generatedModels/TensorFlow_Census/saved_model.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaiwaehner/kafka-streams-machine-learning-examples/3977e3928d2719fc710bb3d202851ee8904be2e7/tensorflow-image-recognition/src/main/resources/generatedModels/TensorFlow_Census/saved_model.pb -------------------------------------------------------------------------------- /tensorflow-image-recognition/src/main/resources/generatedModels/TensorFlow_Census/test.json: -------------------------------------------------------------------------------- 1 | {"age": 25, "workclass": " Private", "education": " 11th", "education_num": 7, "marital_status": " Never-married", "occupation": " Machine-op-inspct", "relationship": " Own-child", "race": " Black", "gender": " Male", "capital_gain": 0, "capital_loss": 0, "hours_per_week": 40, "native_country": " United-States"} 2 | -------------------------------------------------------------------------------- /tensorflow-image-recognition/src/main/resources/generatedModels/TensorFlow_Census/variables/variables.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaiwaehner/kafka-streams-machine-learning-examples/3977e3928d2719fc710bb3d202851ee8904be2e7/tensorflow-image-recognition/src/main/resources/generatedModels/TensorFlow_Census/variables/variables.data-00000-of-00001 -------------------------------------------------------------------------------- /tensorflow-image-recognition/src/main/resources/generatedModels/TensorFlow_Census/variables/variables.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaiwaehner/kafka-streams-machine-learning-examples/3977e3928d2719fc710bb3d202851ee8904be2e7/tensorflow-image-recognition/src/main/resources/generatedModels/TensorFlow_Census/variables/variables.index -------------------------------------------------------------------------------- /tensorflow-image-recognition/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=INFO, stdout 2 | 3 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 4 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.stdout.layout.ConversionPattern=%p %m (%c:%L) %n -------------------------------------------------------------------------------- /tensorflow-image-recognition/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java: -------------------------------------------------------------------------------- 1 | package com.github.jukkakarvanen.kafka.streams.integration.utils; 2 | 3 | import org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster; 4 | import org.slf4j.Logger; 5 | import org.slf4j.LoggerFactory; 6 | 7 | import java.util.Properties; 8 | 9 | /** This is helper class to workaround for Failing stream tests in Windows environment KAFKA-6647. 10 | * 11 | * @author Jukka Karvanen 12 | * 13 | * The causing issue is https://issues.apache.org/jira/browse/KAFKA-6647 14 | * Replacing EmbeddedKafkaCluster with TestEmbeddedKafkaCluster will catch and ignore the exception 15 | * happening during the tear down of the test 16 | * The exception does not have affect to functionality 17 | */ 18 | 19 | public class TestEmbeddedKafkaCluster extends EmbeddedKafkaCluster { 20 | private static final Logger log = LoggerFactory.getLogger(TestEmbeddedKafkaCluster.class); 21 | 22 | public TestEmbeddedKafkaCluster(int numBrokers) { 23 | super(numBrokers); 24 | } 25 | 26 | public TestEmbeddedKafkaCluster(int numBrokers, Properties brokerConfig) { 27 | super(numBrokers, brokerConfig); 28 | } 29 | 30 | public TestEmbeddedKafkaCluster(int numBrokers, Properties brokerConfig, long mockTimeMillisStart) { 31 | super(numBrokers, brokerConfig, mockTimeMillisStart); 32 | } 33 | 34 | public TestEmbeddedKafkaCluster(int numBrokers, Properties brokerConfig, long mockTimeMillisStart, long mockTimeNanoStart) { 35 | super(numBrokers, brokerConfig, mockTimeMillisStart, mockTimeNanoStart); 36 | } 37 | 38 | public void after() { 39 | try { 40 | super.after(); 41 | } catch (RuntimeException e) { 42 | log.warn("Ignoring exception, test failing in Windows due this exception {}", e); 43 | } 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /tensorflow-image-recognition/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java: -------------------------------------------------------------------------------- 1 | package com.github.jukkakarvanen.kafka.streams.integration.utils; 2 | 3 | import org.apache.kafka.common.utils.Time; 4 | import org.apache.kafka.streams.KafkaClientSupplier; 5 | import org.apache.kafka.streams.KafkaStreams; 6 | import org.apache.kafka.streams.Topology; 7 | import org.slf4j.Logger; 8 | import org.slf4j.LoggerFactory; 9 | 10 | import java.util.Properties; 11 | 12 | /** This is helper class to workaround for Failing stream tests in Windows environment KAFKA-6647. 13 | * 14 | * @author Jukka Karvanen 15 | * 16 | * The causing issue is https://issues.apache.org/jira/browse/KAFKA-6647 17 | * Replacing KafkaStreams with TestKafkaStreams will catch and ignore the exception caused by cleanUp 18 | * The exception does not have affect to functionality 19 | */ 20 | 21 | public class TestKafkaStreams extends KafkaStreams { 22 | private static final Logger log = LoggerFactory.getLogger(TestKafkaStreams.class); 23 | 24 | public TestKafkaStreams(Topology topology, Properties props) { 25 | super(topology, props); 26 | } 27 | 28 | public TestKafkaStreams(Topology topology, Properties props, KafkaClientSupplier clientSupplier) { 29 | super(topology, props, clientSupplier); 30 | } 31 | 32 | public TestKafkaStreams(Topology topology, Properties props, Time time) { 33 | super(topology, props, time); 34 | } 35 | 36 | public TestKafkaStreams(Topology topology, Properties props, KafkaClientSupplier clientSupplier, Time time) { 37 | super(topology, props, clientSupplier, time); 38 | } 39 | 40 | public void cleanUp() { 41 | try { 42 | super.cleanUp(); 43 | } catch (RuntimeException e) { 44 | log.warn("Ignoring exception, test failing in Windows due this exception {}", e); 45 | } 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /tensorflow-image-recognition/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_TensorFlow_Image_Recognition_ExampleTest.java: -------------------------------------------------------------------------------- 1 | package com.github.megachucky.kafka.streams.machinelearning; 2 | 3 | import org.apache.kafka.clients.producer.ProducerRecord; 4 | import org.apache.kafka.common.serialization.StringDeserializer; 5 | import org.apache.kafka.common.serialization.StringSerializer; 6 | import org.apache.kafka.streams.KeyValue; 7 | import org.apache.kafka.streams.TopologyTestDriver; 8 | import org.apache.kafka.streams.test.ConsumerRecordFactory; 9 | import org.junit.After; 10 | import org.junit.Before; 11 | import org.junit.Test; 12 | 13 | import java.io.IOException; 14 | import java.util.Arrays; 15 | import java.util.List; 16 | import java.util.stream.Collectors; 17 | 18 | import static org.assertj.core.api.AssertionsForInterfaceTypes.assertThat; 19 | 20 | /** 21 | * TopologyTestDriver based test about stream processing of Kafka_Streams_TensorFlow_Image_Recognition_Example. 22 | * 23 | * @author Jukka Karvanen / jukinimi.com 24 | * * Unit Test of 25 | * {@link Kafka_Streams_TensorFlow_Image_Recognition_Example}, using an 26 | * TopologyTestDriver and a TensorFlow CNN model. 27 | * 28 | */ 29 | 30 | public class Kafka_Streams_TensorFlow_Image_Recognition_ExampleTest { 31 | private TopologyTestDriver testDriver; 32 | 33 | private StringDeserializer stringDeserializer = new StringDeserializer(); 34 | private ConsumerRecordFactory recordFactory = new ConsumerRecordFactory<>(new StringSerializer(), new StringSerializer()); 35 | 36 | @Before 37 | public void setup() throws IOException { 38 | testDriver = new TopologyTestDriver(Kafka_Streams_TensorFlow_Image_Recognition_Example.getStreamTopology(), Kafka_Streams_TensorFlow_Image_Recognition_Example.getStreamConfiguration("localhost:9092")); 39 | } 40 | 41 | @After 42 | public void tearDown() { 43 | try { 44 | testDriver.close(); 45 | } catch (RuntimeException e) { 46 | // https://issues.apache.org/jira/browse/KAFKA-6647 causes exception when executed in Windows, ignoring it 47 | // Logged stacktrace cannot be avoided 48 | System.out.println("Ignoring exception, test failing in Windows due this exception:" + e.getLocalizedMessage()); 49 | } 50 | } 51 | 52 | private String getOutput() { 53 | ProducerRecord output = testDriver.readOutput(Kafka_Streams_TensorFlow_Image_Recognition_Example.imageOutputTopic, stringDeserializer, stringDeserializer); 54 | assertThat(output).isNotNull(); 55 | return output.value(); 56 | } 57 | 58 | /** Simple recognition test validating only the recognition part of the output 59 | */ 60 | @Test 61 | public void testOne() { 62 | testDriver.pipeInput(recordFactory.create(Kafka_Streams_TensorFlow_Image_Recognition_Example.imageInputTopic, null, "src/main/resources/TensorFlow_Images/new_airplane.jpg", 1L)); 63 | assertThat(getOutput()).contains("What is the content of this picture? => airliner"); 64 | } 65 | 66 | /** Test based on Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest 67 | * 68 | */ 69 | @Test 70 | public void testList() { 71 | // Images: 'unknown', Airliner, 'unknown', Butterfly 72 | List inputValues = Arrays.asList("src/main/resources/TensorFlow_Images/trained_airplane_2.jpg", 73 | "src/main/resources/TensorFlow_Images/devil.png", 74 | "src/main/resources/TensorFlow_Images/trained_butterfly.jpg"); 75 | List> records = inputValues.stream().map(v -> new KeyValue(null, v)).collect(Collectors.toList()); 76 | 77 | 78 | testDriver.pipeInput(recordFactory.create(Kafka_Streams_TensorFlow_Image_Recognition_Example.imageInputTopic, records, 1L, 100L)); 79 | assertThat(getOutput()).contains("What is the content of this picture? => airliner"); 80 | assertThat(getOutput()).doesNotContain("What is the content of this picture? => airliner"); 81 | assertThat(getOutput()).contains("What is the content of this picture? => cabbage butterfly"); 82 | } 83 | 84 | 85 | } 86 | -------------------------------------------------------------------------------- /tensorflow-image-recognition/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest.java: -------------------------------------------------------------------------------- 1 | package com.github.megachucky.kafka.streams.machinelearning.test; 2 | 3 | import static org.assertj.core.api.Assertions.assertThat; 4 | 5 | import java.io.IOException; 6 | import java.nio.charset.Charset; 7 | import java.nio.file.Files; 8 | import java.nio.file.Path; 9 | import java.nio.file.Paths; 10 | import java.util.Arrays; 11 | import java.util.List; 12 | import java.util.Properties; 13 | 14 | import com.github.jukkakarvanen.kafka.streams.integration.utils.TestEmbeddedKafkaCluster; 15 | import com.github.jukkakarvanen.kafka.streams.integration.utils.TestKafkaStreams; 16 | import org.apache.kafka.clients.consumer.ConsumerConfig; 17 | import org.apache.kafka.clients.producer.ProducerConfig; 18 | import org.apache.kafka.common.serialization.Serdes; 19 | import org.apache.kafka.common.serialization.StringDeserializer; 20 | import org.apache.kafka.common.serialization.StringSerializer; 21 | import org.apache.kafka.common.utils.MockTime; 22 | import org.apache.kafka.streams.KafkaStreams; 23 | import org.apache.kafka.streams.KeyValue; 24 | import org.apache.kafka.streams.StreamsBuilder; 25 | import org.apache.kafka.streams.StreamsConfig; 26 | import org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster; 27 | import org.apache.kafka.streams.integration.utils.IntegrationTestUtils; 28 | import org.apache.kafka.streams.kstream.KStream; 29 | import org.junit.BeforeClass; 30 | import org.junit.ClassRule; 31 | import org.junit.Test; 32 | import org.tensorflow.DataType; 33 | import org.tensorflow.Graph; 34 | import org.tensorflow.Output; 35 | import org.tensorflow.Session; 36 | import org.tensorflow.Tensor; 37 | 38 | import com.github.megachucky.kafka.streams.machinelearning.Kafka_Streams_TensorFlow_Image_Recognition_Example; 39 | 40 | /** 41 | * 42 | * @author Kai Waehner (www.kai-waehner.de) 43 | * 44 | * End-to-end integration test based on 45 | * {@link Kafka_Streams_TensorFlow_Image_Recognition_Example}, using an 46 | * embedded Kafka cluster and a TensorFlow CNN model. 47 | * 48 | * * 49 | */ 50 | public class Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest { 51 | 52 | @ClassRule 53 | public static final EmbeddedKafkaCluster CLUSTER = new TestEmbeddedKafkaCluster(1); 54 | 55 | private static final String inputTopic = "ImageInputTopic"; 56 | private static final String outputTopic = "ImageOutputTopic"; 57 | 58 | // Prediction Value 59 | private static String imageClassification = "unknown"; 60 | 61 | @BeforeClass 62 | public static void startKafkaCluster() throws Exception { 63 | CLUSTER.createTopic(inputTopic); 64 | CLUSTER.createTopic(outputTopic); 65 | } 66 | 67 | @Test 68 | public void shouldRecognizeImages() throws Exception { 69 | 70 | // Images: 'unknown', Airliner, 'unknown', Butterfly 71 | List inputValues = Arrays.asList("src/main/resources/TensorFlow_Images/trained_airplane_2.jpg", 72 | "src/main/resources/TensorFlow_Images/devil.png", 73 | "src/main/resources/TensorFlow_Images/trained_butterfly.jpg"); 74 | 75 | // ######################################################## 76 | // Step 1: Configure and start the processor topology. 77 | // ######################################################## 78 | 79 | Properties streamsConfiguration = new Properties(); 80 | streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, 81 | "kafka-streams-tensorflow-image-recognition-integration-test"); 82 | streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); 83 | 84 | // Create TensorFlow object 85 | String modelDir = "src/main/resources/generatedModels/CNN_inception5h"; 86 | 87 | Path pathGraph = Paths.get(modelDir, "tensorflow_inception_graph.pb"); 88 | byte[] graphDef = Files.readAllBytes(pathGraph); 89 | 90 | Path pathModel = Paths.get(modelDir, "imagenet_comp_graph_label_strings.txt"); 91 | List labels = Files.readAllLines(pathModel, Charset.forName("UTF-8")); 92 | 93 | // Configure Kafka Streams Application 94 | // Specify default (de)serializers for record keys and for record 95 | // values. 96 | streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); 97 | streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); 98 | 99 | // In the subsequent lines we define the processing topology of the 100 | // Streams application. 101 | final StreamsBuilder builder = new StreamsBuilder(); 102 | 103 | // Construct a `KStream` from the input topic "AirlineInputTopic", where 104 | // message values 105 | // represent lines of text (for the sake of this example, we ignore 106 | // whatever may be stored 107 | // in the message keys). 108 | final KStream imageInputLines = builder.stream(inputTopic); 109 | 110 | // Stream Processor (in this case 'foreach' to add custom logic, i.e. 111 | // apply the analytic model) 112 | imageInputLines.foreach((key, value) -> { 113 | 114 | imageClassification = "unknown"; 115 | 116 | String imageFile = value; 117 | 118 | Path pathImage = Paths.get(imageFile); 119 | byte[] imageBytes; 120 | try { 121 | imageBytes = Files.readAllBytes(pathImage); 122 | 123 | // Load and execute TensorFlow graph 124 | try (Tensor image = constructAndExecuteGraphToNormalizeImage(imageBytes)) { 125 | float[] labelProbabilities = executeInceptionGraph(graphDef, image); 126 | int bestLabelIdx = maxIndex(labelProbabilities); 127 | 128 | imageClassification = labels.get(bestLabelIdx); 129 | 130 | System.out.println(String.format("BEST MATCH: %s (%.2f%% likely)", imageClassification, 131 | labelProbabilities[bestLabelIdx] * 100f)); 132 | } 133 | 134 | } catch (IOException e) { 135 | e.printStackTrace(); 136 | } 137 | 138 | }); 139 | 140 | // Transform message: Add prediction information 141 | KStream transformedMessage = imageInputLines 142 | .mapValues(value -> "Image Recognition: What is content of the picture? => " + imageClassification); 143 | 144 | // Send prediction information to Output Topic 145 | transformedMessage.to(outputTopic); 146 | 147 | // Start Kafka Streams Application to process new incoming messages from 148 | // Input Topic 149 | final KafkaStreams streams = new TestKafkaStreams(builder.build(), streamsConfiguration); 150 | streams.cleanUp(); 151 | streams.start(); 152 | System.out.println("Image Recognition Microservice is running..."); 153 | System.out.println("Input to Kafka Topic " + inputTopic + "; Output to Kafka Topic " + outputTopic); 154 | 155 | // ######################################################## 156 | // Step 2: Produce some input data to the input topic. 157 | // ######################################################## 158 | 159 | Properties producerConfig = new Properties(); 160 | producerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); 161 | producerConfig.put(ProducerConfig.ACKS_CONFIG, "all"); 162 | producerConfig.put(ProducerConfig.RETRIES_CONFIG, 0); 163 | producerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class); 164 | producerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class); 165 | IntegrationTestUtils.produceValuesSynchronously(inputTopic, inputValues, producerConfig, new MockTime()); 166 | 167 | // ######################################################## 168 | // Step 3: Verify the application's output data. 169 | // ######################################################## 170 | 171 | Properties consumerConfig = new Properties(); 172 | consumerConfig.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); 173 | consumerConfig.put(ConsumerConfig.GROUP_ID_CONFIG, 174 | "kafka-streams-tensorflow-image-recognition-integration-test-standard-consumer"); 175 | consumerConfig.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); 176 | consumerConfig.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class); 177 | consumerConfig.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class); 178 | List> response = IntegrationTestUtils 179 | .waitUntilMinKeyValueRecordsReceived(consumerConfig, outputTopic, 3); 180 | streams.close(); 181 | assertThat(response).isNotNull(); 182 | assertThat(response.get(0).value).isEqualTo("Image Recognition: What is content of the picture? => airliner"); 183 | assertThat(response.get(1).value) 184 | .isNotEqualTo("Image Recognition: What is content of the picture? => airliner"); 185 | assertThat(response.get(2).value) 186 | .isEqualTo("Image Recognition: What is content of the picture? => cabbage butterfly"); 187 | 188 | } 189 | 190 | // ######################################################################################## 191 | // Private helper class for construction and execution of the pre-built 192 | // TensorFlow model 193 | // ######################################################################################## 194 | 195 | private static Tensor constructAndExecuteGraphToNormalizeImage(byte[] imageBytes) { 196 | try (Graph g = new Graph()) { 197 | GraphBuilder b = new GraphBuilder(g); 198 | // Some constants specific to the pre-trained model at: 199 | // https://storage.googleapis.com/download.tensorflow.org/models/inception5h.zip 200 | // 201 | // - The model was trained with images scaled to 224x224 pixels. 202 | // - The colors, represented as R, G, B in 1-byte each were 203 | // converted to 204 | // float using (value - Mean)/Scale. 205 | final int H = 224; 206 | final int W = 224; 207 | final float mean = 117f; 208 | final float scale = 1f; 209 | 210 | // Since the graph is being constructed once per execution here, we 211 | // can use a constant for the 212 | // input image. If the graph were to be re-used for multiple input 213 | // images, a placeholder would 214 | // have been more appropriate. 215 | final Output input = b.constant("input", imageBytes); 216 | final Output output = b 217 | .div(b.sub( 218 | b.resizeBilinear(b.expandDims(b.cast(b.decodeJpeg(input, 3), DataType.FLOAT), 219 | b.constant("make_batch", 0)), b.constant("size", new int[] { H, W })), 220 | b.constant("mean", mean)), b.constant("scale", scale)); 221 | try (Session s = new Session(g)) { 222 | return s.runner().fetch(output.op().name()).run().get(0); 223 | } 224 | } 225 | } 226 | 227 | private static float[] executeInceptionGraph(byte[] graphDef, Tensor image) { 228 | try (Graph g = new Graph()) { 229 | g.importGraphDef(graphDef); 230 | try (Session s = new Session(g); 231 | Tensor result = s.runner().feed("input", image).fetch("output").run().get(0)) { 232 | final long[] rshape = result.shape(); 233 | if (result.numDimensions() != 2 || rshape[0] != 1) { 234 | throw new RuntimeException(String.format( 235 | "Expected model to produce a [1 N] shaped tensor where N is the number of labels, instead it produced one with shape %s", 236 | Arrays.toString(rshape))); 237 | } 238 | int nlabels = (int) rshape[1]; 239 | return result.copyTo(new float[1][nlabels])[0]; 240 | } 241 | } 242 | } 243 | 244 | private static int maxIndex(float[] probabilities) { 245 | int best = 0; 246 | for (int i = 1; i < probabilities.length; ++i) { 247 | if (probabilities[i] > probabilities[best]) { 248 | best = i; 249 | } 250 | } 251 | return best; 252 | } 253 | 254 | // In the fullness of time, equivalents of the methods of this class should 255 | // be auto-generated from 256 | // the OpDefs linked into libtensorflow_jni.so. That would match what is 257 | // done in other languages 258 | // like Python, C++ and Go. 259 | static class GraphBuilder { 260 | GraphBuilder(Graph g) { 261 | this.g = g; 262 | } 263 | 264 | Output div(Output x, Output y) { 265 | return binaryOp("Div", x, y); 266 | } 267 | 268 | Output sub(Output x, Output y) { 269 | return binaryOp("Sub", x, y); 270 | } 271 | 272 | Output resizeBilinear(Output images, Output size) { 273 | return binaryOp("ResizeBilinear", images, size); 274 | } 275 | 276 | Output expandDims(Output input, Output dim) { 277 | return binaryOp("ExpandDims", input, dim); 278 | } 279 | 280 | Output cast(Output value, DataType dtype) { 281 | return g.opBuilder("Cast", "Cast").addInput(value).setAttr("DstT", dtype).build().output(0); 282 | } 283 | 284 | Output decodeJpeg(Output contents, long channels) { 285 | return g.opBuilder("DecodeJpeg", "DecodeJpeg").addInput(contents).setAttr("channels", channels).build() 286 | .output(0); 287 | } 288 | 289 | Output constant(String name, Object value) { 290 | try (Tensor t = Tensor.create(value)) { 291 | return g.opBuilder("Const", name).setAttr("dtype", t.dataType()).setAttr("value", t).build().output(0); 292 | } 293 | } 294 | 295 | private Output binaryOp(String type, Output in1, Output in2) { 296 | return g.opBuilder(type, type).addInput(in1).addInput(in2).build().output(0); 297 | } 298 | 299 | private Graph g; 300 | } 301 | 302 | } 303 | -------------------------------------------------------------------------------- /tensorflow-keras/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | 6 | com.github.kaiwaehner.kafka.streams.machinelearning 7 | tensorflow-keras 8 | CP55_AK25 9 | 10 | 11 | 12 | confluent 13 | http://packages.confluent.io/maven/ 14 | 15 | 16 | 17 | 18 | 1.8 19 | 2.5.0 20 | 2.12 21 | ${kafka.scala.version}.8 22 | 5.5.0 23 | UTF-8 24 | 25 | 26 | 27 | 28 | 31 | 32 | 33 | org.apache.kafka 34 | kafka-streams 35 | ${kafka.version} 36 | 37 | 38 | 42 | 43 | 45 | 46 | 47 | org.nd4j 48 | nd4j-native-platform 49 | 1.0.0-beta3 50 | 51 | 52 | 53 | 54 | 55 | org.deeplearning4j 56 | deeplearning4j-core 57 | 1.0.0-beta3 58 | 59 | 60 | 61 | org.deeplearning4j 62 | deeplearning4j-modelimport 63 | 1.0.0-beta3 64 | 65 | 66 | 67 | 68 | 69 | org.tensorflow 70 | tensorflow 71 | 1.3.0 72 | 73 | 74 | 75 | 76 | 77 | junit 78 | junit 79 | 4.12 80 | test 81 | 82 | 83 | org.assertj 84 | assertj-core 85 | 3.3.0 86 | test 87 | 88 | 89 | org.apache.kafka 90 | kafka_${kafka.scala.version} 91 | ${kafka.version} 92 | test 93 | test 94 | 95 | 96 | org.apache.kafka 97 | kafka-clients 98 | ${kafka.version} 99 | test 100 | test 101 | 102 | 103 | org.apache.kafka 104 | kafka-streams 105 | ${kafka.version} 106 | test 107 | test 108 | 109 | 110 | org.apache.curator 111 | curator-test 112 | 2.9.0 113 | test 114 | 115 | 116 | io.confluent 117 | kafka-schema-registry 118 | ${confluent.version} 119 | test 120 | 121 | 122 | io.confluent 123 | kafka-schema-registry 124 | ${confluent.version} 125 | 126 | tests 127 | test 128 | 129 | 130 | org.hamcrest 131 | hamcrest 132 | 2.1 133 | test 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | org.apache.maven.plugins 142 | maven-compiler-plugin 143 | 3.6.1 144 | 145 | 1.8 146 | 1.8 147 | 148 | 149 | 150 | 151 | 178 | 179 | 180 | 181 | -------------------------------------------------------------------------------- /tensorflow-keras/readme.md: -------------------------------------------------------------------------------- 1 | # Machine Learning + Kafka Streams Examples 2 | 3 | General info in main [Readme](../readme.md) 4 | 5 | ### Example 4 - Python + Keras + TensorFlow + DeepLearning4j 6 | 7 | **Use Case** 8 | 9 | Development of an analytic model trained with Python, Keras and TensorFlow and deployment to Java and Kafka ecosystem. No business case, just a technical demonstration of a simple 'Hello World' Keras model. Feel free to replace the model with any other Keras model trained with your backend of choice. You just need to replace the model binary (and use a model which is compatible with DeepLearning4J 's model importer). 10 | 11 | **Machine Learning Technology** 12 | * [Python](https://www.python.org/) 13 | * [DeepLearning4J](https://deeplearning4j.org) 14 | * [Keras](https://keras.io/) - a high-level neural networks API, written in Python and capable of running on top of TensorFlow, CNTK, or Theano. 15 | * [TensorFlow](https://www.tensorflow.org/) - used as backend under the hood of Keras 16 | * DeepLearning4J 's [KerasModelImport feature](https://deeplearning4j.org/docs/latest/keras-import-overview) is used for importing the Keras / TensorFlow model into Java. The used model is its 'Hello World' model example. 17 | * The Keras model was trained with this [Python script](src/main/resources/generatedModels/Keras/keras-model-script.py). 18 | 19 | **Unit Test** 20 | 21 | [Kafka_Streams_TensorFlow_Keras_Example_IntegrationTest.java](src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Keras_Example_IntegrationTest.java) 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /tensorflow-keras/src/main/resources/generatedModels/Keras/keras-model-script.py: -------------------------------------------------------------------------------- 1 | # Simple Keras Model (source: https://deeplearning4j.org/docs/latest/keras-import-overview) 2 | 3 | from keras.models import Sequential 4 | from keras.layers import Dense 5 | 6 | model = Sequential() 7 | model.add(Dense(units=64, activation='relu', input_dim=100)) 8 | model.add(Dense(units=10, activation='softmax')) 9 | model.compile(loss='categorical_crossentropy',optimizer='sgd', metrics=['accuracy']) 10 | 11 | model.save('simple_mlp.h5') -------------------------------------------------------------------------------- /tensorflow-keras/src/main/resources/generatedModels/Keras/simple_mlp.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaiwaehner/kafka-streams-machine-learning-examples/3977e3928d2719fc710bb3d202851ee8904be2e7/tensorflow-keras/src/main/resources/generatedModels/Keras/simple_mlp.h5 -------------------------------------------------------------------------------- /tensorflow-keras/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=INFO, stdout 2 | 3 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 4 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.stdout.layout.ConversionPattern=%p %m (%c:%L) %n -------------------------------------------------------------------------------- /tensorflow-keras/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java: -------------------------------------------------------------------------------- 1 | package com.github.jukkakarvanen.kafka.streams.integration.utils; 2 | 3 | import org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster; 4 | import org.slf4j.Logger; 5 | import org.slf4j.LoggerFactory; 6 | 7 | import java.util.Properties; 8 | 9 | /** This is helper class to workaround for Failing stream tests in Windows environment KAFKA-6647. 10 | * 11 | * @author Jukka Karvanen 12 | * 13 | * The causing issue is https://issues.apache.org/jira/browse/KAFKA-6647 14 | * Replacing EmbeddedKafkaCluster with TestEmbeddedKafkaCluster will catch and ignore the exception 15 | * happening during the tear down of the test 16 | * The exception does not have affect to functionality 17 | */ 18 | 19 | public class TestEmbeddedKafkaCluster extends EmbeddedKafkaCluster { 20 | private static final Logger log = LoggerFactory.getLogger(TestEmbeddedKafkaCluster.class); 21 | 22 | public TestEmbeddedKafkaCluster(int numBrokers) { 23 | super(numBrokers); 24 | } 25 | 26 | public TestEmbeddedKafkaCluster(int numBrokers, Properties brokerConfig) { 27 | super(numBrokers, brokerConfig); 28 | } 29 | 30 | public TestEmbeddedKafkaCluster(int numBrokers, Properties brokerConfig, long mockTimeMillisStart) { 31 | super(numBrokers, brokerConfig, mockTimeMillisStart); 32 | } 33 | 34 | public TestEmbeddedKafkaCluster(int numBrokers, Properties brokerConfig, long mockTimeMillisStart, long mockTimeNanoStart) { 35 | super(numBrokers, brokerConfig, mockTimeMillisStart, mockTimeNanoStart); 36 | } 37 | 38 | public void after() { 39 | try { 40 | super.after(); 41 | } catch (RuntimeException e) { 42 | log.warn("Ignoring exception, test failing in Windows due this exception {}", e); 43 | } 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /tensorflow-keras/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java: -------------------------------------------------------------------------------- 1 | package com.github.jukkakarvanen.kafka.streams.integration.utils; 2 | 3 | import org.apache.kafka.common.utils.Time; 4 | import org.apache.kafka.streams.KafkaClientSupplier; 5 | import org.apache.kafka.streams.KafkaStreams; 6 | import org.apache.kafka.streams.Topology; 7 | import org.slf4j.Logger; 8 | import org.slf4j.LoggerFactory; 9 | 10 | import java.util.Properties; 11 | 12 | /** This is helper class to workaround for Failing stream tests in Windows environment KAFKA-6647. 13 | * 14 | * @author Jukka Karvanen 15 | * 16 | * The causing issue is https://issues.apache.org/jira/browse/KAFKA-6647 17 | * Replacing KafkaStreams with TestKafkaStreams will catch and ignore the exception caused by cleanUp 18 | * The exception does not have affect to functionality 19 | */ 20 | 21 | public class TestKafkaStreams extends KafkaStreams { 22 | private static final Logger log = LoggerFactory.getLogger(TestKafkaStreams.class); 23 | 24 | public TestKafkaStreams(Topology topology, Properties props) { 25 | super(topology, props); 26 | } 27 | 28 | public TestKafkaStreams(Topology topology, Properties props, KafkaClientSupplier clientSupplier) { 29 | super(topology, props, clientSupplier); 30 | } 31 | 32 | public TestKafkaStreams(Topology topology, Properties props, Time time) { 33 | super(topology, props, time); 34 | } 35 | 36 | public TestKafkaStreams(Topology topology, Properties props, KafkaClientSupplier clientSupplier, Time time) { 37 | super(topology, props, clientSupplier, time); 38 | } 39 | 40 | public void cleanUp() { 41 | try { 42 | super.cleanUp(); 43 | } catch (RuntimeException e) { 44 | log.warn("Ignoring exception, test failing in Windows due this exception {}", e); 45 | } 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /tensorflow-keras/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Keras_Example_IntegrationTest.java: -------------------------------------------------------------------------------- 1 | package com.github.megachucky.kafka.streams.machinelearning.test; 2 | 3 | import static org.assertj.core.api.Assertions.assertThat; 4 | 5 | import java.util.Arrays; 6 | import java.util.List; 7 | import java.util.Properties; 8 | 9 | import com.github.jukkakarvanen.kafka.streams.integration.utils.TestEmbeddedKafkaCluster; 10 | import com.github.jukkakarvanen.kafka.streams.integration.utils.TestKafkaStreams; 11 | import org.apache.kafka.clients.consumer.ConsumerConfig; 12 | import org.apache.kafka.clients.producer.ProducerConfig; 13 | import org.apache.kafka.common.serialization.Serdes; 14 | import org.apache.kafka.common.serialization.StringDeserializer; 15 | import org.apache.kafka.common.serialization.StringSerializer; 16 | import org.apache.kafka.common.utils.MockTime; 17 | import org.apache.kafka.streams.KafkaStreams; 18 | import org.apache.kafka.streams.KeyValue; 19 | import org.apache.kafka.streams.StreamsBuilder; 20 | import org.apache.kafka.streams.StreamsConfig; 21 | import org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster; 22 | import org.apache.kafka.streams.integration.utils.IntegrationTestUtils; 23 | import org.apache.kafka.streams.kstream.KStream; 24 | import org.deeplearning4j.nn.modelimport.keras.KerasModelImport; 25 | import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; 26 | import org.junit.BeforeClass; 27 | import org.junit.ClassRule; 28 | import org.junit.Test; 29 | import org.nd4j.linalg.api.ndarray.INDArray; 30 | import org.nd4j.linalg.factory.Nd4j; 31 | import org.nd4j.linalg.io.ClassPathResource; 32 | 33 | /** 34 | * 35 | * @author Kai Waehner - 36 | * http://www.kai-waehner.de 37 | * 38 | * End-to-end integration test based on 39 | * {@link Kafka_Streams_TensorFlow_Keras_Example}, using an embedded 40 | * Kafka cluster and a Keras model (trained with TensorFlow backend). 41 | * Imported via DL4J Keras Java API support. 42 | * 43 | * * 44 | */ 45 | public class Kafka_Streams_TensorFlow_Keras_Example_IntegrationTest { 46 | 47 | @ClassRule 48 | public static final EmbeddedKafkaCluster CLUSTER = new TestEmbeddedKafkaCluster(1); 49 | 50 | private static final String inputTopic = "InputTopic"; 51 | private static final String outputTopic = "OutputTopic"; 52 | 53 | private static String prediction = "unknown"; 54 | 55 | private static INDArray output = null; 56 | 57 | @BeforeClass 58 | public static void startKafkaCluster() throws Exception { 59 | CLUSTER.createTopic(inputTopic); 60 | CLUSTER.createTopic(outputTopic); 61 | } 62 | 63 | @Test 64 | public void shouldPredictValues() throws Exception { 65 | 66 | // ######################################################## 67 | // Step 1: Load Keras Model using DeepLearning4J API 68 | // ######################################################## 69 | String simpleMlp = new ClassPathResource("generatedModels/Keras/simple_mlp.h5").getFile().getPath(); 70 | System.out.println(simpleMlp.toString()); 71 | 72 | MultiLayerNetwork model = KerasModelImport.importKerasSequentialModelAndWeights(simpleMlp); 73 | 74 | // Create test data which is sent from Kafka Producer into Input Topic 75 | List inputValues = Arrays.asList("256,100"); 76 | 77 | // #################################################################### 78 | // Step 2: Configure and start the Kafka Streams processor topology. 79 | // #################################################################### 80 | 81 | Properties streamsConfiguration = new Properties(); 82 | streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, 83 | "kafka-streams-tensorflow-keras-integration-test"); 84 | streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); 85 | 86 | // Configure Kafka Streams Application 87 | // Specify default (de)serializers for record keys and for record 88 | // values. 89 | streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); 90 | streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); 91 | 92 | // In the subsequent lines we define the processing topology of the 93 | // Streams application. 94 | final StreamsBuilder builder = new StreamsBuilder(); 95 | 96 | // Construct a `KStream` from the input topic, where 97 | // message values represent lines of text (for the sake of this example, we 98 | // ignore whatever may be stored in the message keys). 99 | final KStream inputEvents = builder.stream(inputTopic); 100 | 101 | // ############################################################### 102 | // THIS IS WHERE WE DO REAL TIME MODEL INFERENCE FOR EACH EVENT 103 | // ############################################################### 104 | inputEvents.foreach((key, value) -> { 105 | 106 | // Transform input values (list of Strings) to expected DL4J parameters (two 107 | // Integer values): 108 | String[] valuesAsArray = value.split(","); 109 | INDArray input = Nd4j.create(Integer.parseInt(valuesAsArray[0]), Integer.parseInt(valuesAsArray[1])); 110 | 111 | // Apply the analytic model: 112 | output = model.output(input); 113 | prediction = output.toString(); 114 | 115 | }); 116 | 117 | // Transform message: Add prediction result 118 | KStream transformedMessage = inputEvents.mapValues(value -> "Prediction => " + prediction); 119 | 120 | // Send prediction result to Output Topic 121 | transformedMessage.to(outputTopic); 122 | 123 | // Start Kafka Streams Application to process new incoming messages from 124 | // Input Topic 125 | final KafkaStreams streams = new TestKafkaStreams(builder.build(), streamsConfiguration); 126 | streams.cleanUp(); 127 | streams.start(); 128 | System.out.println("Prediction Microservice is running..."); 129 | System.out.println("Input to Kafka Topic " + inputTopic + "; Output to Kafka Topic " + outputTopic); 130 | 131 | // ######################################################## 132 | // Step 3: Produce some input data to the input topic. 133 | // ######################################################## 134 | 135 | Properties producerConfig = new Properties(); 136 | producerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); 137 | producerConfig.put(ProducerConfig.ACKS_CONFIG, "all"); 138 | producerConfig.put(ProducerConfig.RETRIES_CONFIG, 0); 139 | producerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class); 140 | producerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class); 141 | IntegrationTestUtils.produceValuesSynchronously(inputTopic, inputValues, producerConfig, new MockTime()); 142 | 143 | // ######################################################## 144 | // Step 4: Verify the application's output data. 145 | // ######################################################## 146 | 147 | Properties consumerConfig = new Properties(); 148 | consumerConfig.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); 149 | consumerConfig.put(ConsumerConfig.GROUP_ID_CONFIG, 150 | "kafka-streams-tensorflow-keras-integration-test-standard-consumer"); 151 | consumerConfig.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); 152 | consumerConfig.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class); 153 | consumerConfig.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class); 154 | List> response = IntegrationTestUtils 155 | .waitUntilMinKeyValueRecordsReceived(consumerConfig, outputTopic, 1); 156 | streams.close(); 157 | 158 | System.out.println("VALUE: " + response.get(0).value); 159 | 160 | assertThat(response).isNotNull(); 161 | assertThat(response.get(0).value).doesNotMatch("Value => unknown"); 162 | assertThat(response.get(0).value).contains("0.1000, 0.1000, 0.1000"); 163 | } 164 | 165 | } 166 | --------------------------------------------------------------------------------