├── .gitignore
├── LICENSE
├── dl4j-deeplearning-iris
├── pom.xml
├── readme.md
└── src
│ ├── main
│ ├── java
│ │ └── com
│ │ │ └── github
│ │ │ └── megachucky
│ │ │ └── kafka
│ │ │ └── streams
│ │ │ └── machinelearning
│ │ │ └── models
│ │ │ ├── DeepLearning4J_CSV_Iris_Model.java
│ │ │ └── DeepLearning4J_CSV_Model_Inference.java
│ └── resources
│ │ ├── DL4J_Resources
│ │ └── iris.txt
│ │ ├── generatedModels
│ │ └── DL4J
│ │ │ └── DL4J_Iris_Model.zip
│ │ └── log4j.properties
│ └── test
│ └── java
│ └── com
│ └── github
│ ├── jukkakarvanen
│ └── kafka
│ │ └── streams
│ │ └── integration
│ │ └── utils
│ │ ├── TestEmbeddedKafkaCluster.java
│ │ └── TestKafkaStreams.java
│ └── megachucky
│ └── kafka
│ └── streams
│ └── machinelearning
│ └── test
│ └── Kafka_Streams_MachineLearning_DL4J_DeepLearning_Iris_IntegrationTest.java
├── h2o-gbm
├── pom.xml
├── readme.md
└── src
│ ├── main
│ ├── java
│ │ └── com
│ │ │ └── github
│ │ │ └── megachucky
│ │ │ └── kafka
│ │ │ └── streams
│ │ │ └── machinelearning
│ │ │ ├── Kafka_Streams_MachineLearning_H2O_Application.java
│ │ │ ├── Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.java
│ │ │ ├── Kafka_Streams_MachineLearning_H2O_GBM_Example.java
│ │ │ └── models
│ │ │ ├── deeplearning_fe7c1f02_08ec_4070_b784_c2531147e451.java
│ │ │ └── gbm_pojo_test.java
│ └── resources
│ │ ├── generatedModels
│ │ ├── DeepWater_model_python_1503570558230_1.zip
│ │ └── GBM_model_python_1503397740678_1.zip
│ │ └── log4j.properties
│ └── test
│ └── java
│ └── com
│ └── github
│ └── megachucky
│ └── kafka
│ └── streams
│ └── machinelearning
│ ├── Kafka_Streams_MachineLearning_H2O_DeepLearning_ExampleTest.java
│ ├── Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java
│ ├── Kafka_Streams_MachineLearning_H2O_GBM_ExampleTest.java
│ ├── Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java
│ ├── TestEmbeddedKafkaCluster.java
│ └── TestKafkaStreams.java
├── pom.xml
├── readme.md
├── src
└── main
│ ├── java
│ └── com
│ │ └── github
│ │ └── megachucky
│ │ └── kafka
│ │ └── streams
│ │ └── machinelearning
│ │ └── StreamsStarterApp.java
│ └── resources
│ └── log4j.properties
├── tensorflow-image-recognition
├── pom.xml
├── readme.md
└── src
│ ├── main
│ ├── java
│ │ └── com
│ │ │ └── github
│ │ │ └── megachucky
│ │ │ └── kafka
│ │ │ └── streams
│ │ │ └── machinelearning
│ │ │ └── Kafka_Streams_TensorFlow_Image_Recognition_Example.java
│ └── resources
│ │ ├── TensorFlow_Images
│ │ ├── devil.png
│ │ ├── new_airplane.jpg
│ │ ├── trained_airplane_1.jpg
│ │ ├── trained_airplane_2.jpg
│ │ └── trained_butterfly.jpg
│ │ ├── generatedModels
│ │ ├── CNN_inception5h
│ │ │ ├── LICENSE
│ │ │ ├── imagenet_comp_graph_label_strings.txt
│ │ │ └── tensorflow_inception_graph.pb
│ │ └── TensorFlow_Census
│ │ │ ├── saved_model.pb
│ │ │ ├── test.json
│ │ │ └── variables
│ │ │ ├── variables.data-00000-of-00001
│ │ │ └── variables.index
│ │ └── log4j.properties
│ └── test
│ └── java
│ └── com
│ └── github
│ ├── jukkakarvanen
│ └── kafka
│ │ └── streams
│ │ └── integration
│ │ └── utils
│ │ ├── TestEmbeddedKafkaCluster.java
│ │ └── TestKafkaStreams.java
│ └── megachucky
│ └── kafka
│ └── streams
│ └── machinelearning
│ ├── Kafka_Streams_TensorFlow_Image_Recognition_ExampleTest.java
│ └── test
│ └── Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest.java
└── tensorflow-keras
├── pom.xml
├── readme.md
└── src
├── main
└── resources
│ ├── generatedModels
│ └── Keras
│ │ ├── keras-model-script.py
│ │ └── simple_mlp.h5
│ └── log4j.properties
└── test
└── java
└── com
└── github
├── jukkakarvanen
└── kafka
│ └── streams
│ └── integration
│ └── utils
│ ├── TestEmbeddedKafkaCluster.java
│ └── TestKafkaStreams.java
└── megachucky
└── kafka
└── streams
└── machinelearning
└── test
└── Kafka_Streams_TensorFlow_Keras_Example_IntegrationTest.java
/.gitignore:
--------------------------------------------------------------------------------
1 | # Eclipse
2 | .classpath
3 | .project
4 | .settings/
5 |
6 | # Intellij
7 | .idea/
8 | *.iml
9 | *.iws
10 |
11 | # Mac
12 | .DS_Store
13 |
14 | # Maven
15 | log/
16 | target/
17 |
18 | # Visual Studio Code
19 | .vscode/
20 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "{}"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright {yyyy} {name of copyright owner}
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/dl4j-deeplearning-iris/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
4 | 4.0.0
5 |
6 | com.github.kaiwaehner.kafka.streams.machinelearning
7 | dl4j-deeplearning-iris
8 | CP55_AK25
9 |
10 |
11 |
12 | confluent
13 | http://packages.confluent.io/maven/
14 |
15 |
16 |
17 |
18 | 1.8
19 | 2.5.0
20 | 2.12
21 | ${kafka.scala.version}.8
22 | 5.5.0
23 | UTF-8
24 |
25 |
26 |
27 |
28 |
31 |
32 |
33 | org.apache.kafka
34 | kafka-streams
35 | ${kafka.version}
36 |
37 |
38 |
42 |
43 |
45 |
46 |
47 | org.nd4j
48 | nd4j-native-platform
49 | 1.0.0-beta3
50 |
51 |
52 |
53 |
54 |
55 | org.deeplearning4j
56 | deeplearning4j-core
57 | 1.0.0-beta3
58 |
59 |
60 |
61 | org.deeplearning4j
62 | deeplearning4j-modelimport
63 | 1.0.0-beta3
64 |
65 |
66 |
67 |
68 | junit
69 | junit
70 | 4.12
71 | test
72 |
73 |
74 | org.assertj
75 | assertj-core
76 | 3.3.0
77 | test
78 |
79 |
80 | org.apache.kafka
81 | kafka_${kafka.scala.version}
82 | ${kafka.version}
83 | test
84 | test
85 |
86 |
87 | org.apache.kafka
88 | kafka-clients
89 | ${kafka.version}
90 | test
91 | test
92 |
93 |
94 | org.apache.kafka
95 | kafka-streams
96 | ${kafka.version}
97 | test
98 | test
99 |
100 |
101 | org.apache.curator
102 | curator-test
103 | 2.9.0
104 | test
105 |
106 |
107 | io.confluent
108 | kafka-schema-registry
109 | ${confluent.version}
110 | test
111 |
112 |
113 | io.confluent
114 | kafka-schema-registry
115 | ${confluent.version}
116 |
117 | tests
118 | test
119 |
120 |
121 | org.hamcrest
122 | hamcrest
123 | 2.1
124 | test
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 | org.apache.maven.plugins
133 | maven-compiler-plugin
134 | 3.6.1
135 |
136 | 1.8
137 | 1.8
138 |
139 |
140 |
141 |
142 |
169 |
170 |
171 |
172 |
--------------------------------------------------------------------------------
/dl4j-deeplearning-iris/readme.md:
--------------------------------------------------------------------------------
1 | # Machine Learning + Kafka Streams Examples
2 |
3 | General info in main [Readme](../readme.md)
4 |
5 | ### Example 3 - Iris Prediction using a Neural Network with DeepLearning4J (DL4J)
6 | **Use Case**
7 |
8 | Iris Species Prediction using a Neural Network.
9 | This is a famous example: Prediction of the Iris Species - implemented with many different ML algorithms. Here I use DeepLearning4J (DL4J) to build a neural network using Iris Dataset.
10 |
11 | **Machine Learning Technology**
12 | * [DeepLearning4J](https://deeplearning4j.org)
13 | * Pretty simple example to demo how to build, save and load neural networks with DL4J. [MultiLayerNetwork](https://deeplearning4j.org/doc/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.html) and [INDArray](http://nd4j.org/doc/org/nd4j/linalg/api/ndarray/INDArray.html) are the key APIs to look at if you want to understand the details.
14 | * The model is created via [DeepLearning4J_CSV_Model.java](src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/DeepLearning4J_CSV_Model.java) and stored in the resources: [DL4J_Iris_Model.zip](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/tree/master/src/main/resources/generatedModels/DL4J). No need to re-train, just for reference. Kudos to Adam Gibson who created this example as part of the DL4J project.
15 |
16 | **Unit Test**
17 | [Kafka_Streams_MachineLearning_DL4J_DeepLearning_Iris_IntegrationTest.java](src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_DL4J_DeepLearning_Iris_IntegrationTest.java)
18 |
19 |
--------------------------------------------------------------------------------
/dl4j-deeplearning-iris/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/DeepLearning4J_CSV_Iris_Model.java:
--------------------------------------------------------------------------------
1 | package com.github.megachucky.kafka.streams.machinelearning.models;
2 |
3 | import java.io.File;
4 |
5 | import org.datavec.api.records.reader.RecordReader;
6 | import org.datavec.api.records.reader.impl.csv.CSVRecordReader;
7 | import org.datavec.api.split.FileSplit;
8 | import org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator;
9 | import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
10 | import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
11 | import org.deeplearning4j.nn.conf.layers.DenseLayer;
12 | import org.deeplearning4j.nn.conf.layers.OutputLayer;
13 | import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
14 | import org.deeplearning4j.nn.weights.WeightInit;
15 | import org.deeplearning4j.optimize.listeners.ScoreIterationListener;
16 | import org.deeplearning4j.util.ModelSerializer;
17 | import org.nd4j.evaluation.classification.Evaluation;
18 | import org.nd4j.linalg.activations.Activation;
19 | import org.nd4j.linalg.api.ndarray.INDArray;
20 | import org.nd4j.linalg.io.ClassPathResource;
21 | import org.nd4j.linalg.dataset.DataSet;
22 | import org.nd4j.linalg.dataset.SplitTestAndTrain;
23 | import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
24 | import org.nd4j.linalg.dataset.api.preprocessor.DataNormalization;
25 | import org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize;
26 |
27 | import org.nd4j.linalg.learning.config.Sgd;
28 | import org.nd4j.linalg.lossfunctions.LossFunctions;
29 | import org.slf4j.Logger;
30 | import org.slf4j.LoggerFactory;
31 |
32 | public class DeepLearning4J_CSV_Iris_Model {
33 |
34 | private static Logger log = LoggerFactory.getLogger(DeepLearning4J_CSV_Iris_Model.class);
35 |
36 | public static void main(String[] args) throws Exception {
37 |
38 | // First: get the dataset using the record reader. CSVRecordReader handles
39 | // loading/parsing
40 | int numLinesToSkip = 0;
41 | char delimiter = ',';
42 | RecordReader recordReader = new CSVRecordReader(numLinesToSkip, delimiter);
43 | recordReader.initialize(new FileSplit(new ClassPathResource("DL4J_Resources/iris.txt").getFile()));
44 |
45 | // Second: the RecordReaderDataSetIterator handles conversion to DataSet
46 | // objects, ready for use in neural network
47 | int labelIndex = 4; // 5 values in each row of the iris.txt CSV: 4 input features followed by an
48 | // integer label (class) index. Labels are the 5th value (index 4) in each row
49 | int numClasses = 3; // 3 classes (types of iris flowers) in the iris data set. Classes have integer
50 | // values 0, 1 or 2
51 | int batchSize = 150; // Iris data set: 150 examples total. We are loading all of them into one
52 | // DataSet (not recommended for large data sets)
53 |
54 | DataSetIterator iterator = new RecordReaderDataSetIterator(recordReader, batchSize, labelIndex, numClasses);
55 | DataSet allData = iterator.next();
56 | allData.shuffle();
57 | SplitTestAndTrain testAndTrain = allData.splitTestAndTrain(0.65); // Use 65% of data for training
58 |
59 | DataSet trainingData = testAndTrain.getTrain();
60 | DataSet testData = testAndTrain.getTest();
61 |
62 | // We need to normalize our data. We'll use NormalizeStandardize (which gives us
63 | // mean 0, unit variance):
64 | DataNormalization normalizer = new NormalizerStandardize();
65 | normalizer.fit(trainingData); // Collect the statistics (mean/stdev) from the training data. This does not
66 | // modify the input data
67 | normalizer.transform(trainingData); // Apply normalization to the training data
68 | normalizer.transform(testData); // Apply normalization to the test data. This is using statistics calculated
69 | // from the *training* set
70 |
71 | final int numInputs = 4;
72 | int outputNum = 3;
73 | long seed = 6;
74 |
75 | log.info("Build model....");
76 | MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(seed).activation(Activation.TANH)
77 | .weightInit(WeightInit.XAVIER).updater(new Sgd(0.1)).l2(1e-4).list()
78 | .layer(0, new DenseLayer.Builder().nIn(numInputs).nOut(3).build())
79 | .layer(1, new DenseLayer.Builder().nIn(3).nOut(3).build())
80 | .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD)
81 | .activation(Activation.SOFTMAX).nIn(3).nOut(outputNum).build())
82 | .build();
83 |
84 | // run the model
85 | MultiLayerNetwork model = new MultiLayerNetwork(conf);
86 | model.init();
87 | model.setListeners(new ScoreIterationListener(100));
88 |
89 | for (int i = 0; i < 1000; i++) {
90 | model.fit(trainingData);
91 | }
92 |
93 | // evaluate the model on the test set
94 | Evaluation eval = new Evaluation(3);
95 | INDArray input = testData.getFeatures();
96 | INDArray output = model.output(input);
97 | System.out.println("INPUT:" + input.toString());
98 | eval.eval(testData.getLabels(), output);
99 | log.info(eval.stats());
100 |
101 | // Save the model
102 | File locationToSave = new File("src/main/resources/generatedModels/DL4J/DL4J_Iris_Model.zip"); // Where to save
103 | // the network.
104 | // Note: the file
105 | // is in .zip
106 | // format - can
107 | // be opened
108 | // externally
109 | boolean saveUpdater = true; // Updater: i.e., the state for Momentum, RMSProp, Adagrad etc. Save this if you
110 | // want to train your network more in the future
111 | // ModelSerializer.writeModel(model, locationToSave, saveUpdater);
112 |
113 | // Load the model
114 | MultiLayerNetwork restored = ModelSerializer.restoreMultiLayerNetwork(locationToSave);
115 |
116 | System.out.println("Saved and loaded parameters are equal: " + model.params().equals(restored.params()));
117 | System.out.println("Saved and loaded configurations are equal: "
118 | + model.getLayerWiseConfigurations().equals(restored.getLayerWiseConfigurations()));
119 |
120 | }
121 |
122 | }
--------------------------------------------------------------------------------
/dl4j-deeplearning-iris/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/DeepLearning4J_CSV_Model_Inference.java:
--------------------------------------------------------------------------------
1 | package com.github.megachucky.kafka.streams.machinelearning.models;
2 |
3 | import java.io.File;
4 |
5 | import org.datavec.api.records.reader.RecordReader;
6 | import org.datavec.api.records.reader.impl.csv.CSVRecordReader;
7 | import org.datavec.api.split.FileSplit;
8 | import org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator;
9 | import org.deeplearning4j.eval.Evaluation;
10 | import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
11 | import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
12 | import org.deeplearning4j.nn.conf.layers.DenseLayer;
13 | import org.deeplearning4j.nn.conf.layers.OutputLayer;
14 | import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
15 | import org.deeplearning4j.nn.weights.WeightInit;
16 | import org.deeplearning4j.optimize.listeners.ScoreIterationListener;
17 |
18 | import org.deeplearning4j.util.ModelSerializer;
19 | import org.nd4j.linalg.activations.Activation;
20 | import org.nd4j.linalg.api.ndarray.INDArray;
21 | import org.nd4j.linalg.dataset.DataSet;
22 | import org.nd4j.linalg.dataset.SplitTestAndTrain;
23 | import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
24 | import org.nd4j.linalg.dataset.api.preprocessor.DataNormalization;
25 | import org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize;
26 | import org.nd4j.linalg.factory.Nd4j;
27 | import org.nd4j.linalg.lossfunctions.LossFunctions;
28 | import org.slf4j.Logger;
29 | import org.slf4j.LoggerFactory;
30 |
31 | /**
32 | * @author Adam Gibson
33 | */
34 | public class DeepLearning4J_CSV_Model_Inference {
35 |
36 | private static Logger log = LoggerFactory.getLogger(DeepLearning4J_CSV_Model_Inference.class);
37 |
38 | public static void main(String[] args) throws Exception {
39 |
40 | //Save the model
41 | File locationToSave = new File("src/main/resources/generatedModels/DL4J/DL4J_Iris_Model.zip"); //Where to save the network. Note: the file is in .zip format - can be opened externally
42 |
43 |
44 | //Load the model
45 | MultiLayerNetwork model = ModelSerializer.restoreMultiLayerNetwork(locationToSave);
46 |
47 |
48 | // Inference
49 | INDArray input = Nd4j.create(new double[] {5.0,3.5,1.6,0.6});
50 | INDArray result = model.output(input);
51 |
52 | System.out.println("Probabilities: " + result.toString());
53 |
54 | }
55 |
56 | }
57 |
58 |
--------------------------------------------------------------------------------
/dl4j-deeplearning-iris/src/main/resources/DL4J_Resources/iris.txt:
--------------------------------------------------------------------------------
1 | 5.1,3.5,1.4,0.2,0
2 | 4.9,3.0,1.4,0.2,0
3 | 4.7,3.2,1.3,0.2,0
4 | 4.6,3.1,1.5,0.2,0
5 | 5.0,3.6,1.4,0.2,0
6 | 5.4,3.9,1.7,0.4,0
7 | 4.6,3.4,1.4,0.3,0
8 | 5.0,3.4,1.5,0.2,0
9 | 4.4,2.9,1.4,0.2,0
10 | 4.9,3.1,1.5,0.1,0
11 | 5.4,3.7,1.5,0.2,0
12 | 4.8,3.4,1.6,0.2,0
13 | 4.8,3.0,1.4,0.1,0
14 | 4.3,3.0,1.1,0.1,0
15 | 5.8,4.0,1.2,0.2,0
16 | 5.7,4.4,1.5,0.4,0
17 | 5.4,3.9,1.3,0.4,0
18 | 5.1,3.5,1.4,0.3,0
19 | 5.7,3.8,1.7,0.3,0
20 | 5.1,3.8,1.5,0.3,0
21 | 5.4,3.4,1.7,0.2,0
22 | 5.1,3.7,1.5,0.4,0
23 | 4.6,3.6,1.0,0.2,0
24 | 5.1,3.3,1.7,0.5,0
25 | 4.8,3.4,1.9,0.2,0
26 | 5.0,3.0,1.6,0.2,0
27 | 5.0,3.4,1.6,0.4,0
28 | 5.2,3.5,1.5,0.2,0
29 | 5.2,3.4,1.4,0.2,0
30 | 4.7,3.2,1.6,0.2,0
31 | 4.8,3.1,1.6,0.2,0
32 | 5.4,3.4,1.5,0.4,0
33 | 5.2,4.1,1.5,0.1,0
34 | 5.5,4.2,1.4,0.2,0
35 | 4.9,3.1,1.5,0.1,0
36 | 5.0,3.2,1.2,0.2,0
37 | 5.5,3.5,1.3,0.2,0
38 | 4.9,3.1,1.5,0.1,0
39 | 4.4,3.0,1.3,0.2,0
40 | 5.1,3.4,1.5,0.2,0
41 | 5.0,3.5,1.3,0.3,0
42 | 4.5,2.3,1.3,0.3,0
43 | 4.4,3.2,1.3,0.2,0
44 | 5.0,3.5,1.6,0.6,0
45 | 5.1,3.8,1.9,0.4,0
46 | 4.8,3.0,1.4,0.3,0
47 | 5.1,3.8,1.6,0.2,0
48 | 4.6,3.2,1.4,0.2,0
49 | 5.3,3.7,1.5,0.2,0
50 | 5.0,3.3,1.4,0.2,0
51 | 7.0,3.2,4.7,1.4,1
52 | 6.4,3.2,4.5,1.5,1
53 | 6.9,3.1,4.9,1.5,1
54 | 5.5,2.3,4.0,1.3,1
55 | 6.5,2.8,4.6,1.5,1
56 | 5.7,2.8,4.5,1.3,1
57 | 6.3,3.3,4.7,1.6,1
58 | 4.9,2.4,3.3,1.0,1
59 | 6.6,2.9,4.6,1.3,1
60 | 5.2,2.7,3.9,1.4,1
61 | 5.0,2.0,3.5,1.0,1
62 | 5.9,3.0,4.2,1.5,1
63 | 6.0,2.2,4.0,1.0,1
64 | 6.1,2.9,4.7,1.4,1
65 | 5.6,2.9,3.6,1.3,1
66 | 6.7,3.1,4.4,1.4,1
67 | 5.6,3.0,4.5,1.5,1
68 | 5.8,2.7,4.1,1.0,1
69 | 6.2,2.2,4.5,1.5,1
70 | 5.6,2.5,3.9,1.1,1
71 | 5.9,3.2,4.8,1.8,1
72 | 6.1,2.8,4.0,1.3,1
73 | 6.3,2.5,4.9,1.5,1
74 | 6.1,2.8,4.7,1.2,1
75 | 6.4,2.9,4.3,1.3,1
76 | 6.6,3.0,4.4,1.4,1
77 | 6.8,2.8,4.8,1.4,1
78 | 6.7,3.0,5.0,1.7,1
79 | 6.0,2.9,4.5,1.5,1
80 | 5.7,2.6,3.5,1.0,1
81 | 5.5,2.4,3.8,1.1,1
82 | 5.5,2.4,3.7,1.0,1
83 | 5.8,2.7,3.9,1.2,1
84 | 6.0,2.7,5.1,1.6,1
85 | 5.4,3.0,4.5,1.5,1
86 | 6.0,3.4,4.5,1.6,1
87 | 6.7,3.1,4.7,1.5,1
88 | 6.3,2.3,4.4,1.3,1
89 | 5.6,3.0,4.1,1.3,1
90 | 5.5,2.5,4.0,1.3,1
91 | 5.5,2.6,4.4,1.2,1
92 | 6.1,3.0,4.6,1.4,1
93 | 5.8,2.6,4.0,1.2,1
94 | 5.0,2.3,3.3,1.0,1
95 | 5.6,2.7,4.2,1.3,1
96 | 5.7,3.0,4.2,1.2,1
97 | 5.7,2.9,4.2,1.3,1
98 | 6.2,2.9,4.3,1.3,1
99 | 5.1,2.5,3.0,1.1,1
100 | 5.7,2.8,4.1,1.3,1
101 | 6.3,3.3,6.0,2.5,2
102 | 5.8,2.7,5.1,1.9,2
103 | 7.1,3.0,5.9,2.1,2
104 | 6.3,2.9,5.6,1.8,2
105 | 6.5,3.0,5.8,2.2,2
106 | 7.6,3.0,6.6,2.1,2
107 | 4.9,2.5,4.5,1.7,2
108 | 7.3,2.9,6.3,1.8,2
109 | 6.7,2.5,5.8,1.8,2
110 | 7.2,3.6,6.1,2.5,2
111 | 6.5,3.2,5.1,2.0,2
112 | 6.4,2.7,5.3,1.9,2
113 | 6.8,3.0,5.5,2.1,2
114 | 5.7,2.5,5.0,2.0,2
115 | 5.8,2.8,5.1,2.4,2
116 | 6.4,3.2,5.3,2.3,2
117 | 6.5,3.0,5.5,1.8,2
118 | 7.7,3.8,6.7,2.2,2
119 | 7.7,2.6,6.9,2.3,2
120 | 6.0,2.2,5.0,1.5,2
121 | 6.9,3.2,5.7,2.3,2
122 | 5.6,2.8,4.9,2.0,2
123 | 7.7,2.8,6.7,2.0,2
124 | 6.3,2.7,4.9,1.8,2
125 | 6.7,3.3,5.7,2.1,2
126 | 7.2,3.2,6.0,1.8,2
127 | 6.2,2.8,4.8,1.8,2
128 | 6.1,3.0,4.9,1.8,2
129 | 6.4,2.8,5.6,2.1,2
130 | 7.2,3.0,5.8,1.6,2
131 | 7.4,2.8,6.1,1.9,2
132 | 7.9,3.8,6.4,2.0,2
133 | 6.4,2.8,5.6,2.2,2
134 | 6.3,2.8,5.1,1.5,2
135 | 6.1,2.6,5.6,1.4,2
136 | 7.7,3.0,6.1,2.3,2
137 | 6.3,3.4,5.6,2.4,2
138 | 6.4,3.1,5.5,1.8,2
139 | 6.0,3.0,4.8,1.8,2
140 | 6.9,3.1,5.4,2.1,2
141 | 6.7,3.1,5.6,2.4,2
142 | 6.9,3.1,5.1,2.3,2
143 | 5.8,2.7,5.1,1.9,2
144 | 6.8,3.2,5.9,2.3,2
145 | 6.7,3.3,5.7,2.5,2
146 | 6.7,3.0,5.2,2.3,2
147 | 6.3,2.5,5.0,1.9,2
148 | 6.5,3.0,5.2,2.0,2
149 | 6.2,3.4,5.4,2.3,2
150 | 5.9,3.0,5.1,1.8,2
151 |
--------------------------------------------------------------------------------
/dl4j-deeplearning-iris/src/main/resources/generatedModels/DL4J/DL4J_Iris_Model.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaiwaehner/kafka-streams-machine-learning-examples/3977e3928d2719fc710bb3d202851ee8904be2e7/dl4j-deeplearning-iris/src/main/resources/generatedModels/DL4J/DL4J_Iris_Model.zip
--------------------------------------------------------------------------------
/dl4j-deeplearning-iris/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | log4j.rootLogger=INFO, stdout
2 |
3 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender
4 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
5 | log4j.appender.stdout.layout.ConversionPattern=%p %m (%c:%L) %n
--------------------------------------------------------------------------------
/dl4j-deeplearning-iris/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java:
--------------------------------------------------------------------------------
1 | package com.github.jukkakarvanen.kafka.streams.integration.utils;
2 |
3 | import org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster;
4 | import org.slf4j.Logger;
5 | import org.slf4j.LoggerFactory;
6 |
7 | import java.util.Properties;
8 |
9 | /** This is helper class to workaround for Failing stream tests in Windows environment KAFKA-6647.
10 | *
11 | * @author Jukka Karvanen
12 | *
13 | * The causing issue is https://issues.apache.org/jira/browse/KAFKA-6647
14 | * Replacing EmbeddedKafkaCluster with TestEmbeddedKafkaCluster will catch and ignore the exception
15 | * happening during the tear down of the test
16 | * The exception does not have affect to functionality
17 | */
18 |
19 | public class TestEmbeddedKafkaCluster extends EmbeddedKafkaCluster {
20 | private static final Logger log = LoggerFactory.getLogger(TestEmbeddedKafkaCluster.class);
21 |
22 | public TestEmbeddedKafkaCluster(int numBrokers) {
23 | super(numBrokers);
24 | }
25 |
26 | public TestEmbeddedKafkaCluster(int numBrokers, Properties brokerConfig) {
27 | super(numBrokers, brokerConfig);
28 | }
29 |
30 | public TestEmbeddedKafkaCluster(int numBrokers, Properties brokerConfig, long mockTimeMillisStart) {
31 | super(numBrokers, brokerConfig, mockTimeMillisStart);
32 | }
33 |
34 | public TestEmbeddedKafkaCluster(int numBrokers, Properties brokerConfig, long mockTimeMillisStart, long mockTimeNanoStart) {
35 | super(numBrokers, brokerConfig, mockTimeMillisStart, mockTimeNanoStart);
36 | }
37 |
38 | public void after() {
39 | try {
40 | super.after();
41 | } catch (RuntimeException e) {
42 | log.warn("Ignoring exception, test failing in Windows due this exception {}", e);
43 | }
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/dl4j-deeplearning-iris/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java:
--------------------------------------------------------------------------------
1 | package com.github.jukkakarvanen.kafka.streams.integration.utils;
2 |
3 | import org.apache.kafka.common.utils.Time;
4 | import org.apache.kafka.streams.KafkaClientSupplier;
5 | import org.apache.kafka.streams.KafkaStreams;
6 | import org.apache.kafka.streams.Topology;
7 | import org.slf4j.Logger;
8 | import org.slf4j.LoggerFactory;
9 |
10 | import java.util.Properties;
11 |
12 | /** This is helper class to workaround for Failing stream tests in Windows environment KAFKA-6647.
13 | *
14 | * @author Jukka Karvanen
15 | *
16 | * The causing issue is https://issues.apache.org/jira/browse/KAFKA-6647
17 | * Replacing KafkaStreams with TestKafkaStreams will catch and ignore the exception caused by cleanUp
18 | * The exception does not have affect to functionality
19 | */
20 |
21 | public class TestKafkaStreams extends KafkaStreams {
22 | private static final Logger log = LoggerFactory.getLogger(TestKafkaStreams.class);
23 |
24 | public TestKafkaStreams(Topology topology, Properties props) {
25 | super(topology, props);
26 | }
27 |
28 | public TestKafkaStreams(Topology topology, Properties props, KafkaClientSupplier clientSupplier) {
29 | super(topology, props, clientSupplier);
30 | }
31 |
32 | public TestKafkaStreams(Topology topology, Properties props, Time time) {
33 | super(topology, props, time);
34 | }
35 |
36 | public TestKafkaStreams(Topology topology, Properties props, KafkaClientSupplier clientSupplier, Time time) {
37 | super(topology, props, clientSupplier, time);
38 | }
39 |
40 | public void cleanUp() {
41 | try {
42 | super.cleanUp();
43 | } catch (RuntimeException e) {
44 | log.warn("Ignoring exception, test failing in Windows due this exception {}", e);
45 | }
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/dl4j-deeplearning-iris/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_DL4J_DeepLearning_Iris_IntegrationTest.java:
--------------------------------------------------------------------------------
1 | package com.github.megachucky.kafka.streams.machinelearning.test;
2 |
3 | import static org.assertj.core.api.Assertions.assertThat;
4 |
5 | import java.io.File;
6 | import java.util.Arrays;
7 | import java.util.List;
8 | import java.util.Properties;
9 | import java.util.stream.Stream;
10 |
11 | import com.github.jukkakarvanen.kafka.streams.integration.utils.TestEmbeddedKafkaCluster;
12 | import org.apache.kafka.clients.consumer.ConsumerConfig;
13 | import org.apache.kafka.clients.producer.ProducerConfig;
14 | import org.apache.kafka.common.serialization.Serdes;
15 | import org.apache.kafka.common.serialization.StringDeserializer;
16 | import org.apache.kafka.common.serialization.StringSerializer;
17 | import org.apache.kafka.common.utils.MockTime;
18 | import org.apache.kafka.streams.KafkaStreams;
19 | import org.apache.kafka.streams.KeyValue;
20 | import org.apache.kafka.streams.StreamsBuilder;
21 | import org.apache.kafka.streams.StreamsConfig;
22 | import org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster;
23 | import org.apache.kafka.streams.integration.utils.IntegrationTestUtils;
24 | import org.apache.kafka.streams.kstream.KStream;
25 | import org.apache.kafka.test.TestUtils;
26 | import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
27 | import org.deeplearning4j.util.ModelSerializer;
28 | import org.junit.BeforeClass;
29 | import org.junit.ClassRule;
30 | import org.junit.Test;
31 | import org.nd4j.linalg.api.ndarray.INDArray;
32 | import org.nd4j.linalg.factory.Nd4j;
33 |
34 | /**
35 | *
36 | * @author Kai Waehner (www.kai-waehner.de)
37 | *
38 | * End-to-end integration test, using an embedded Kafka cluster and a
39 | * DL4J DeepLearning Model.
40 | *
41 | * Prediction of Iris Flower Type 1, 2 or 3. Model returns probability
42 | * for all three types, like [0.00/ 0.01/ 0.99].
43 | */
44 | public class Kafka_Streams_MachineLearning_DL4J_DeepLearning_Iris_IntegrationTest {
45 |
46 | @ClassRule
47 | public static final EmbeddedKafkaCluster CLUSTER = new TestEmbeddedKafkaCluster(1);
48 |
49 | private static final String inputTopic = "IrisInputTopic";
50 | private static final String outputTopic = "IrisOutputTopic";
51 |
52 | // Generated DL4J model
53 | private File locationDL4JModel = new File("src/main/resources/generatedModels/DL4J/DL4J_Iris_Model.zip");
54 |
55 | // Prediction Value
56 | private static String irisPrediction = "unknown";
57 |
58 | @BeforeClass
59 | public static void startKafkaCluster() throws Exception {
60 | CLUSTER.createTopic(inputTopic);
61 | CLUSTER.createTopic(outputTopic);
62 | }
63 |
64 | @Test
65 | public void shouldPredictIrisFlowerType() throws Exception {
66 |
67 | // Iris input data (the model returns probabilities for input being each of Iris
68 | // Type 1, 2 and 3)
69 | List inputValues = Arrays.asList("5.4,3.9,1.7,0.4", "7.0,3.2,4.7,1.4", "4.6,3.4,1.4,0.3");
70 |
71 | // Step 1: Configure and start the processor topology.
72 | Properties streamsConfiguration = new Properties();
73 | streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "kafka-streams-dl4j-iris-integration-test");
74 | streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
75 |
76 | // The commit interval for flushing records to state stores and
77 | // downstream must be lower than
78 | // this integration test's timeout (30 secs) to ensure we observe the
79 | // expected processing results.
80 | streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 10 * 1000);
81 | streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
82 | // Use a temporary directory for storing state, which will be
83 | // automatically removed after the test.
84 | streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getAbsolutePath());
85 |
86 | // Create DL4J object (see DeepLearning4J_CSV_Model.java)
87 | MultiLayerNetwork model = ModelSerializer.restoreMultiLayerNetwork(locationDL4JModel);
88 |
89 | // Configure Kafka Streams Application
90 | // Specify default (de)serializers for record keys and for record
91 | // values.
92 | streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
93 | streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
94 |
95 | // In the subsequent lines we define the processing topology of the
96 | // Streams application.
97 | final StreamsBuilder builder = new StreamsBuilder();
98 |
99 | // Construct a `KStream` from the input topic "IrisInputTopic", where
100 | // message values
101 | // represent lines of text (for the sake of this example, we ignore
102 | // whatever may be stored
103 | // in the message keys).
104 | final KStream irisInputLines = builder.stream(inputTopic);
105 |
106 | // Stream Processor (in this case 'foreach' to add custom logic, i.e. apply the
107 | // analytic model)
108 | irisInputLines.foreach((key, value) -> {
109 |
110 | if (value != null && !value.equals("")) {
111 | System.out.println("#####################");
112 | System.out.println("Iris Input:" + value);
113 |
114 | // TODO Easier way to map from String[] to double[] !!!
115 | String[] stringArray = value.split(",");
116 | Double[] doubleArray = Arrays.stream(stringArray).map(Double::valueOf).toArray(Double[]::new);
117 | double[] irisInput = Stream.of(doubleArray).mapToDouble(Double::doubleValue).toArray();
118 |
119 | // Inference
120 | INDArray input = Nd4j.create(irisInput);
121 | INDArray result = model.output(input);
122 |
123 | System.out.println("Probabilities: " + result.toString());
124 |
125 | irisPrediction = result.toString();
126 |
127 | }
128 |
129 | });
130 |
131 | // Transform message: Add prediction information
132 | KStream transformedMessage = irisInputLines
133 | .mapValues(value -> "Prediction: Iris Probability => " + irisPrediction);
134 |
135 | // Send prediction information to Output Topic
136 | transformedMessage.to(outputTopic);
137 |
138 | // Start Kafka Streams Application to process new incoming messages from
139 | // Input Topic
140 | final KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfiguration);
141 | streams.cleanUp();
142 | streams.start();
143 | System.out.println("Iris Prediction Microservice is running...");
144 | System.out.println("Input to Kafka Topic 'IrisInputTopic'; Output to Kafka Topic 'IrisOutputTopic'");
145 |
146 | //
147 | // Step 2: Produce some input data to the input topic.
148 | //
149 | Properties producerConfig = new Properties();
150 | producerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
151 | producerConfig.put(ProducerConfig.ACKS_CONFIG, "all");
152 | producerConfig.put(ProducerConfig.RETRIES_CONFIG, 0);
153 | producerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
154 | producerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
155 | IntegrationTestUtils.produceValuesSynchronously(inputTopic, inputValues, producerConfig, new MockTime());
156 |
157 | //
158 | // Step 3: Verify the application's output data.
159 | //
160 | Properties consumerConfig = new Properties();
161 | consumerConfig.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
162 | consumerConfig.put(ConsumerConfig.GROUP_ID_CONFIG,
163 | "machine-learning-example-integration-test-standard-consumer");
164 | consumerConfig.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
165 | consumerConfig.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
166 | consumerConfig.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
167 | List> response = IntegrationTestUtils
168 | .waitUntilMinKeyValueRecordsReceived(consumerConfig, outputTopic, 3);
169 | streams.close();
170 | assertThat(response).isNotNull();
171 |
172 | System.out.println("RESPONSE");
173 | System.out.println(response.get(0).value);
174 | System.out.println("RESPONSE");
175 |
176 | assertThat(response.get(0).value)
177 | .isEqualTo("Prediction: Iris Probability => [[ 0.0033, 0.1635, 0.8332]]");
178 |
179 | assertThat(response.get(1).value)
180 | .isEqualTo("Prediction: Iris Probability => [[ 9.3033e-5, 0.0030, 0.9969]]");
181 |
182 | assertThat(response.get(2).value)
183 | .isEqualTo("Prediction: Iris Probability => [[ 0.0113, 0.8152, 0.1736]]");
184 | }
185 |
186 | }
187 |
--------------------------------------------------------------------------------
/h2o-gbm/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
4 | 4.0.0
5 |
6 | com.github.kaiwaehner.kafka.streams.machinelearning
7 | h2o-gbm
8 | CP55_AK25
9 |
10 |
11 |
12 | confluent
13 | http://packages.confluent.io/maven/
14 |
15 |
16 |
17 |
18 | 1.8
19 | 2.5.0
20 | 2.12
21 | ${kafka.scala.version}.8
22 | 5.5.0
23 | UTF-8
24 |
25 |
26 |
27 |
28 |
31 |
32 |
33 | org.apache.kafka
34 | kafka-streams
35 | ${kafka.version}
36 |
37 |
38 |
39 |
40 | ai.h2o
41 | h2o-genmodel
42 | 3.14.0.1
43 |
44 |
45 |
46 |
47 |
48 | org.apache.kafka
49 | kafka-streams-test-utils
50 | ${kafka.version}
51 | test
52 |
53 |
54 |
55 | junit
56 | junit
57 | 4.12
58 | test
59 |
60 |
61 | org.assertj
62 | assertj-core
63 | 3.3.0
64 | test
65 |
66 |
67 | org.apache.kafka
68 | kafka_${kafka.scala.version}
69 | ${kafka.version}
70 | test
71 | test
72 |
73 |
74 | org.apache.kafka
75 | kafka-clients
76 | ${kafka.version}
77 | test
78 | test
79 |
80 |
81 | org.apache.kafka
82 | kafka-streams
83 | ${kafka.version}
84 | test
85 | test
86 |
87 |
88 | org.apache.curator
89 | curator-test
90 | 2.9.0
91 | test
92 |
93 |
94 | io.confluent
95 | kafka-schema-registry
96 | ${confluent.version}
97 | test
98 |
99 |
100 | io.confluent
101 | kafka-schema-registry
102 | ${confluent.version}
103 |
104 | tests
105 | test
106 |
107 |
108 | org.hamcrest
109 | hamcrest
110 | 2.1
111 | test
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 | org.apache.maven.plugins
120 | maven-compiler-plugin
121 | 3.6.1
122 |
123 | 1.8
124 | 1.8
125 |
126 |
127 |
128 |
129 |
130 | org.apache.maven.plugins
131 | maven-assembly-plugin
132 | 2.5.2
133 |
134 |
135 | jar-with-dependencies
136 |
137 |
138 |
139 | true
140 | com.github.megachucky.kafka.streams.machinelearning.Kafka_Streams_MachineLearning_H2O_DeepLearning_Example
141 |
142 |
143 |
144 |
145 |
146 | assemble-all
147 | package
148 |
149 | single
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
--------------------------------------------------------------------------------
/h2o-gbm/readme.md:
--------------------------------------------------------------------------------
1 | # Machine Learning + Kafka Streams Examples
2 |
3 | General info in main [Readme](../readme.md)
4 |
5 | ## Example 1 - Gradient Boosting with H2O.ai for Prediction of Flight Delays
6 |
7 | ### Use Case
8 |
9 | Gradient Boosting Method (GBM) to predict flight delays.
10 | A H2O generated GBM Java model (POJO) is instantiated and used in a Kafka Streams application to do interference on new events.
11 |
12 | ### Machine Learning Technology
13 |
14 | * [H2O](https://www.h2o.ai)
15 | * Check the [H2O demo](https://github.com/h2oai/h2o-2/wiki/Hacking-Airline-DataSet-with-H2O) to understand the test and and how the model was built
16 | * You can re-use the generated Java model attached to this project ([gbm_pojo_test.java](src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/gbm_pojo_test.java)) or build your own model using R, Python, Flow UI or any other technologies supported by H2O framework.
17 |
18 | ### Source Code
19 |
20 | Business Logic (applying the analytic model to do the prediction):
21 | [Kafka_Streams_MachineLearning_H2O_Application.java](src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_Application.java)
22 |
23 | Specification of the used model:
24 | [Kafka_Streams_MachineLearning_H2O_GBM_Example.java](src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example.java)
25 |
26 | ### Automated Tests
27 |
28 | Unit Test using TopologyTestDriver:
29 | [Kafka_Streams_MachineLearning_H2O_GBM_ExampleTest.java](src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_ExampleTest.java)
30 |
31 | Integration Test using EmbeddedKafkaCluster:
32 | [Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java](src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java)
33 |
34 | ### Manual Testing
35 |
36 | You can easily test this by yourself. Here are the steps:
37 |
38 | * Start Kafka, e.g. with Confluent CLI:
39 |
40 | confluent local start kafka
41 | * Create topics AirlineInputTopic and AirlineOutputTopic
42 |
43 | kafka-topics --bootstrap-server localhost:9092 --create --topic AirlineInputTopic --partitions 3 --replication-factor 1
44 |
45 | kafka-topics --bootstrap-server localhost:9092 --create --topic AirlineOutputTopic --partitions 3 --replication-factor 1
46 | * Start the Kafka Streams app:
47 |
48 | java -cp h2o-gbm/target/h2o-gbm-CP53_AK23-jar-with-dependencies.jar com.github.megachucky.kafka.streams.machinelearning.Kafka_Streams_MachineLearning_H2O_GBM_Example
49 | * Send messages, e.g. with kafkacat:
50 |
51 | echo -e "1987,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES" | kafkacat -b localhost:9092 -P -t AirlineInputTopic
52 | * Consume predictions:
53 |
54 | kafka-console-consumer --bootstrap-server localhost:9092 --topic AirlineOutputTopic --from-beginning
55 | * Find more details in the unit test...
56 |
57 | ## H2O Deep Learning instead of H2O GBM Model
58 |
59 | The project includes another example with similar code to use a [H2O Deep Learning model](src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/deeplearning_fe7c1f02_08ec_4070_b784_c2531147e451.java) instead of H2O GBM Model: [Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java](src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java)
60 | This shows how you can easily test or replace different analytic models for one use case, or even use them for A/B testing.
61 |
62 | ### Source Code
63 |
64 | Business Logic (applying the analytic model to do the prediction):
65 | [Kafka_Streams_MachineLearning_H2O_Application.java](src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_Application.java)
66 |
67 | Specification of the used model:
68 | [Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.java](src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.java)
69 |
70 | ### Unit Test
71 |
72 | Unit Test using TopologyTestDriver:
73 | [Kafka_Streams_MachineLearning_H2O_DeepLearning_ExampleTest.java](src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_ExampleTest.java)
74 |
75 | Integration Test using EmbeddedKafkaCluster:[Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java](src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java)
76 |
77 | ### Manual Testing
78 |
79 | Same as above but change class to start app:
80 |
81 | * Start the Kafka Streams app:
82 |
83 | java -cp h2o-gbm/target/h2o-gbm-CP55_AK25-jar-with-dependencies.jar com.github.megachucky.kafka.streams.machinelearning.Kafka_Streams_MachineLearning_H2O_DeepLearning_Example
--------------------------------------------------------------------------------
/h2o-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_Application.java:
--------------------------------------------------------------------------------
1 | package com.github.megachucky.kafka.streams.machinelearning;
2 |
3 | import hex.genmodel.easy.EasyPredictModelWrapper;
4 | import hex.genmodel.easy.RowData;
5 | import hex.genmodel.easy.exception.PredictException;
6 | import hex.genmodel.easy.prediction.BinomialModelPrediction;
7 | import org.apache.kafka.common.serialization.Serdes;
8 | import org.apache.kafka.streams.KafkaStreams;
9 | import org.apache.kafka.streams.StreamsBuilder;
10 | import org.apache.kafka.streams.StreamsConfig;
11 | import org.apache.kafka.streams.Topology;
12 | import org.apache.kafka.streams.kstream.KStream;
13 | import org.apache.kafka.streams.kstream.ValueMapper;
14 |
15 | import java.util.Properties;
16 |
17 | /**
18 | * @author Kai Waehner (www.kai-waehner.de)
19 | *
20 | * Creates a new Kafka Streams application for prediction of flight
21 | * delays The application uses the GBM model (built with
22 | * H2O.ai) to infer messages sent to Kafka topic "AirlineInputTopic".
23 | * The outcome of model inference is sent to Kafka topic
24 | * "AirlineOutputTopic".
25 | *
26 | * Refactored that all model use same base class, only modelName and applicationId passed in
27 | * Used Kafka_Streams_MachineLearning_H2O_GBM_Example_Refactored class as baseline for this
28 | * @author Jukka Karvanen / jukinimi.com
29 | *
30 | */
31 | public class Kafka_Streams_MachineLearning_H2O_Application {
32 |
33 | public static final String INPUT_TOPIC = "AirlineInputTopic";
34 | public static final String OUTPUT_TOPIC = "AirlineOutputTopic";
35 |
36 |
37 | public static void execute(String bootstrapServers, String applictionId, String modelClassName) throws Exception {
38 |
39 | final Properties streamsConfiguration = getStreamConfiguration(bootstrapServers, applictionId);
40 | Topology topology = getStreamTopology(modelClassName);
41 |
42 | // Start Kafka Streams Application to process new incoming messages from Input
43 | // Topic
44 | final KafkaStreams streams = new KafkaStreams(topology, streamsConfiguration);
45 | streams.cleanUp();
46 | streams.start();
47 | System.out.println("Airline Delay Prediction Microservice is running...");
48 | System.out.println("Input to Kafka Topic 'AirlineInputTopic'; Output to Kafka Topic 'AirlineOutputTopic'");
49 |
50 | // Add shutdown hook to respond to SIGTERM and gracefully close Kafka
51 | // Streams
52 | Runtime.getRuntime().addShutdownHook(new Thread(streams::close));
53 |
54 | }
55 |
56 | static Properties getStreamConfiguration(String bootstrapServers, String applicationId) {
57 | final Properties streamsConfiguration = new Properties();
58 | // Give the Streams application a unique name. The name must be unique
59 | // in the Kafka cluster
60 | // against which the application is run.
61 | streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, applicationId);
62 | // Where to find Kafka broker(s).
63 | streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
64 |
65 | // Specify default (de)serializers for record keys and for record
66 | // values.
67 | streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
68 | streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
69 |
70 | // For illustrative purposes we disable record caches
71 | streamsConfiguration.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 0);
72 | return streamsConfiguration;
73 | }
74 |
75 | static Topology getStreamTopology(String modelClassName) throws InstantiationException, IllegalAccessException, ClassNotFoundException {
76 | // Create H2O object (see gbm_pojo_test.java)
77 | hex.genmodel.GenModel rawModel;
78 | rawModel = (hex.genmodel.GenModel) Class.forName(modelClassName).newInstance();
79 | EasyPredictModelWrapper model = new EasyPredictModelWrapper(rawModel);
80 |
81 | // In the subsequent lines we define the processing topology of the
82 | // Streams application.
83 | final StreamsBuilder builder = new StreamsBuilder();
84 |
85 | // Construct a `KStream` from the input topic "AirlineInputTopic", where
86 | // message values
87 | // represent lines of text (for the sake of this example, we ignore
88 | // whatever may be stored
89 | // in the message keys).
90 | final KStream airlineInputLines = builder.stream(INPUT_TOPIC);
91 |
92 | // Stream Processor (in this case 'mapValues' to add custom logic, i.e. apply
93 | // the analytic model)
94 | KStream transformedMessage =
95 | airlineInputLines.mapValues(value -> {
96 |
97 | // Year,Month,DayofMonth,DayOfWeek,DepTime,CRSDepTime,ArrTime,CRSArrTime,UniqueCarrier,FlightNum,TailNum,ActualElapsedTime,CRSElapsedTime,AirTime,ArrDelay,DepDelay,Origin,Dest,Distance,TaxiIn,TaxiOut,Cancelled,CancellationCode,Diverted,CarrierDelay,WeatherDelay,NASDelay,SecurityDelay,LateAircraftDelay,IsArrDelayed,IsDepDelayed
98 | // value:
99 | // YES, probably delayed:
100 | // 1987,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES
101 | // NO, probably not delayed:
102 | // 1999,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES
103 |
104 | if (value != null && !value.equals("")) {
105 | System.out.println("#####################");
106 | System.out.println("Flight Input:" + value);
107 |
108 | String[] valuesArray = value.split(",");
109 |
110 | RowData row = new RowData();
111 | row.put("Year", valuesArray[0]);
112 | row.put("Month", valuesArray[1]);
113 | row.put("DayofMonth", valuesArray[2]);
114 | row.put("DayOfWeek", valuesArray[3]);
115 | row.put("CRSDepTime", valuesArray[5]);
116 | row.put("UniqueCarrier", valuesArray[8]);
117 | row.put("Origin", valuesArray[16]);
118 | row.put("Dest", valuesArray[17]);
119 | BinomialModelPrediction p = null;
120 | try {
121 | p = model.predictBinomial(row);
122 | } catch (PredictException e) {
123 | e.printStackTrace();
124 | }
125 |
126 | System.out.println("Label (aka prediction) is flight departure delayed: " + p.label);
127 | System.out.print("Class probabilities: ");
128 | for (int i = 0; i < p.classProbabilities.length; i++) {
129 | if (i > 0) {
130 | System.out.print(",");
131 | }
132 | System.out.print(p.classProbabilities[i]);
133 | }
134 | System.out.println("");
135 | System.out.println("#####################");
136 | return "Prediction: Is Airline delayed? => " + p.label;
137 | }
138 | //No prediction
139 | return null;
140 | });
141 |
142 | // Send prediction information to Output Topic
143 | transformedMessage.to(OUTPUT_TOPIC);
144 | return builder.build();
145 | }
146 | }
147 |
--------------------------------------------------------------------------------
/h2o-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.java:
--------------------------------------------------------------------------------
1 | package com.github.megachucky.kafka.streams.machinelearning;
2 |
3 | import java.util.Properties;
4 |
5 | import org.apache.kafka.common.serialization.Serdes;
6 | import org.apache.kafka.streams.KafkaStreams;
7 | import org.apache.kafka.streams.StreamsBuilder;
8 | import org.apache.kafka.streams.StreamsConfig;
9 | import org.apache.kafka.streams.kstream.KStream;
10 |
11 | import hex.genmodel.easy.EasyPredictModelWrapper;
12 | import hex.genmodel.easy.RowData;
13 | import hex.genmodel.easy.exception.PredictException;
14 | import hex.genmodel.easy.prediction.BinomialModelPrediction;
15 |
16 | /**
17 | * @author Kai Waehner (www.kai-waehner.de)
18 | *
19 | * Creates a new Kafka Streams application for prediction of flight delays
20 | * The application uses the GBM model "deeplearning_fe7c1f02_08ec_4070_b784_c2531147e451" (built with H2O.ai) to infer messages
21 | * sent to Kafka topic "AirlineInputTopic". The outcome of model inference is sent to
22 | * Kafka topic "AirlineOutputTopic".
23 | *
24 | * Main logic now in parent Class
25 | * Refactoring to utilize common @link Kafka_Streams_MachineLearning_H2O_Application class
26 | * @author Jukka Karvanen / jukinimi.com
27 | *
28 | */
29 | public class Kafka_Streams_MachineLearning_H2O_DeepLearning_Example extends Kafka_Streams_MachineLearning_H2O_Application {
30 |
31 | // Name of the generated H2O model
32 | static String modelClassName = "com.github.megachucky.kafka.streams.machinelearning.models.deeplearning_fe7c1f02_08ec_4070_b784_c2531147e451";
33 |
34 | static final String APPLICATION_ID = "kafka-streams-h2o-deeplearning-example";
35 |
36 | public static void main(final String[] args) throws Exception {
37 |
38 | // Configure Kafka Streams Application
39 | final String bootstrapServers = args.length > 0 ? args[0] : "localhost:9092";
40 | execute(bootstrapServers, APPLICATION_ID, modelClassName);
41 | }
42 |
43 | }
44 |
--------------------------------------------------------------------------------
/h2o-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example.java:
--------------------------------------------------------------------------------
1 | package com.github.megachucky.kafka.streams.machinelearning;
2 |
3 | import java.util.Properties;
4 |
5 | import org.apache.kafka.common.serialization.Serdes;
6 | import org.apache.kafka.streams.KafkaStreams;
7 | import org.apache.kafka.streams.StreamsBuilder;
8 | import org.apache.kafka.streams.StreamsConfig;
9 | import org.apache.kafka.streams.Topology;
10 | import org.apache.kafka.streams.kstream.KStream;
11 |
12 | import hex.genmodel.easy.EasyPredictModelWrapper;
13 | import hex.genmodel.easy.RowData;
14 | import hex.genmodel.easy.exception.PredictException;
15 | import hex.genmodel.easy.prediction.BinomialModelPrediction;
16 |
17 | /**
18 | * @author Kai Waehner (www.kai-waehner.de)
19 | *
20 | * Creates a new Kafka Streams application for prediction of flight
21 | * delays The application uses the GBM model "gbm_pojo_test" (built with
22 | * H2O.ai) to infer messages sent to Kafka topic "AirlineInputTopic".
23 | * The outcome of model inference is sent to Kafka topic
24 | * "AirlineOutputTopic".
25 | *
26 | * * Main logic now in parent Class
27 | * * Refactoring to utilize common @link Kafka_Streams_MachineLearning_H2O_Application class
28 | * * @author Jukka Karvanen / jukinimi.com
29 | */
30 | public class Kafka_Streams_MachineLearning_H2O_GBM_Example extends Kafka_Streams_MachineLearning_H2O_Application {
31 | // Name of the generated H2O model
32 | static String modelClassName = "com.github.megachucky.kafka.streams.machinelearning.models.gbm_pojo_test";
33 |
34 | static final String APPLICATION_ID = "kafka-streams-h2o-gbm-example";
35 |
36 | public static void main(final String[] args) throws Exception {
37 |
38 | // Configure Kafka Streams Application
39 | final String bootstrapServers = args.length > 0 ? args[0] : "localhost:9092";
40 | execute(bootstrapServers, APPLICATION_ID, modelClassName);
41 | }
42 |
43 | }
44 |
--------------------------------------------------------------------------------
/h2o-gbm/src/main/resources/generatedModels/DeepWater_model_python_1503570558230_1.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaiwaehner/kafka-streams-machine-learning-examples/3977e3928d2719fc710bb3d202851ee8904be2e7/h2o-gbm/src/main/resources/generatedModels/DeepWater_model_python_1503570558230_1.zip
--------------------------------------------------------------------------------
/h2o-gbm/src/main/resources/generatedModels/GBM_model_python_1503397740678_1.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaiwaehner/kafka-streams-machine-learning-examples/3977e3928d2719fc710bb3d202851ee8904be2e7/h2o-gbm/src/main/resources/generatedModels/GBM_model_python_1503397740678_1.zip
--------------------------------------------------------------------------------
/h2o-gbm/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | log4j.rootLogger=INFO, stdout
2 |
3 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender
4 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
5 | log4j.appender.stdout.layout.ConversionPattern=%p %m (%c:%L) %n
--------------------------------------------------------------------------------
/h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_ExampleTest.java:
--------------------------------------------------------------------------------
1 | package com.github.megachucky.kafka.streams.machinelearning;
2 |
3 | import org.apache.kafka.clients.producer.ProducerRecord;
4 | import org.apache.kafka.common.serialization.StringDeserializer;
5 | import org.apache.kafka.common.serialization.StringSerializer;
6 | import org.apache.kafka.streams.KeyValue;
7 | import org.apache.kafka.streams.TopologyTestDriver;
8 | import org.apache.kafka.streams.test.ConsumerRecordFactory;
9 | import org.junit.After;
10 | import org.junit.Before;
11 | import org.junit.Test;
12 |
13 | import java.util.Arrays;
14 | import java.util.List;
15 | import java.util.stream.Collectors;
16 |
17 | import static org.assertj.core.api.AssertionsForInterfaceTypes.assertThat;
18 |
19 | /**
20 | * TopologyTestDriver based test about stream processing of
21 | * Kafka_Streams_TensorFlow_Image_Recognition_Example.
22 | *
23 | * @author Jukka Karvanen / jukinimi.com * Unit Test of
24 | * {@link Kafka_Streams_MachineLearning_H2O_DeepLearning_Example}, using
25 | * an TopologyTestDriver and a H2O DeepLearning model.
26 | *
27 | */
28 |
29 | public class Kafka_Streams_MachineLearning_H2O_DeepLearning_ExampleTest {
30 | private TopologyTestDriver testDriver;
31 |
32 | private StringDeserializer stringDeserializer = new StringDeserializer();
33 | private ConsumerRecordFactory recordFactory = new ConsumerRecordFactory<>(
34 | new StringSerializer(), new StringSerializer());
35 |
36 | @Before
37 | public void setup() throws IllegalAccessException, ClassNotFoundException, InstantiationException {
38 | testDriver = new TopologyTestDriver(
39 | Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.getStreamTopology(
40 | Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.modelClassName),
41 | Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.getStreamConfiguration(
42 | "localhost:9092",
43 | Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.APPLICATION_ID));
44 | }
45 |
46 | @After
47 | public void tearDown() {
48 | try {
49 | testDriver.close();
50 | } catch (RuntimeException e) {
51 | // https://issues.apache.org/jira/browse/KAFKA-6647 causes exception when
52 | // executed in Windows, ignoring it
53 | // Logged stacktrace cannot be avoided
54 | System.out.println("Ignoring exception, test failing in Windows due this exception:"
55 | + e.getLocalizedMessage());
56 | }
57 | }
58 |
59 | private String getOutput() {
60 | ProducerRecord output = testDriver.readOutput(
61 | Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.OUTPUT_TOPIC, stringDeserializer,
62 | stringDeserializer);
63 | assertThat(output).isNotNull();
64 | return output.value();
65 | }
66 |
67 | /**
68 | * Simple test validating only the prediction part of the output
69 | */
70 | @Test
71 | public void testOne() {
72 | testDriver.pipeInput(recordFactory.create(
73 | Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.INPUT_TOPIC, null,
74 | "1987,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES",
75 | 1L));
76 | assertThat(getOutput()).isEqualTo("Prediction: Is Airline delayed? => YES");
77 | }
78 |
79 | /**
80 | * Test based on
81 | * Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest
82 | *
83 | */
84 | @Test
85 | public void testList() {
86 | // Flight data (one single flight) --> We want to predict if it will be
87 | // delayed or not
88 | List inputValues = Arrays.asList(
89 | "1987,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES",
90 | "1999,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES");
91 | List> records = inputValues.stream()
92 | .map(v -> new KeyValue(null, v)).collect(Collectors.toList());
93 |
94 | testDriver.pipeInput(recordFactory.create(
95 | Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.INPUT_TOPIC, records, 1L, 100L));
96 | assertThat(getOutput()).isEqualTo("Prediction: Is Airline delayed? => YES");
97 | // This model predict also another flight to be delayed
98 | assertThat(getOutput()).isEqualTo("Prediction: Is Airline delayed? => YES");
99 | }
100 |
101 | }
102 |
--------------------------------------------------------------------------------
/h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java:
--------------------------------------------------------------------------------
1 | package com.github.megachucky.kafka.streams.machinelearning;
2 |
3 | import static org.assertj.core.api.Assertions.assertThat;
4 |
5 | import java.util.Arrays;
6 | import java.util.List;
7 | import java.util.Properties;
8 |
9 | import com.github.megachucky.kafka.streams.machinelearning.TestEmbeddedKafkaCluster;
10 | import org.apache.kafka.clients.consumer.ConsumerConfig;
11 | import org.apache.kafka.clients.producer.ProducerConfig;
12 | import org.apache.kafka.common.serialization.Serdes;
13 | import org.apache.kafka.common.serialization.StringDeserializer;
14 | import org.apache.kafka.common.serialization.StringSerializer;
15 | import org.apache.kafka.common.utils.MockTime;
16 | import org.apache.kafka.streams.KafkaStreams;
17 | import org.apache.kafka.streams.KeyValue;
18 | import org.apache.kafka.streams.StreamsBuilder;
19 | import org.apache.kafka.streams.StreamsConfig;
20 | import org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster;
21 | import org.apache.kafka.streams.integration.utils.IntegrationTestUtils;
22 | import org.apache.kafka.streams.kstream.KStream;
23 | import org.apache.kafka.test.TestUtils;
24 | import org.junit.BeforeClass;
25 | import org.junit.ClassRule;
26 | import org.junit.Test;
27 |
28 | import hex.genmodel.easy.EasyPredictModelWrapper;
29 | import hex.genmodel.easy.RowData;
30 | import hex.genmodel.easy.exception.PredictException;
31 | import hex.genmodel.easy.prediction.BinomialModelPrediction;
32 |
33 | /**
34 | *
35 | * @author Kai Waehner (www.kai-waehner.de)
36 | *
37 | * End-to-end integration test, using an embedded Kafka cluster and a
38 | * H2O.ai DeepLearning Model. Mostly identical to the GBM example, but
39 | * uses another Model which was built using H2O's DeepLearning
40 | * implementation.
41 | *
42 | */
43 | public class Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest {
44 |
45 | @ClassRule
46 | public static final EmbeddedKafkaCluster CLUSTER = new TestEmbeddedKafkaCluster(1);
47 |
48 | private static final String inputTopic = "AirlineInputTopic";
49 | private static final String outputTopic = "AirlineOutputTopic";
50 |
51 | // Name of the generated H2O.ai model
52 | private static String modelClassName = "com.github.megachucky.kafka.streams.machinelearning.models.deeplearning_fe7c1f02_08ec_4070_b784_c2531147e451";
53 |
54 | // Prediction Value
55 | private static String airlineDelayPreduction = "unknown";
56 |
57 | @BeforeClass
58 | public static void startKafkaCluster() throws Exception {
59 | CLUSTER.createTopic(inputTopic);
60 | CLUSTER.createTopic(outputTopic);
61 | }
62 |
63 | @Test
64 | public void shouldPredictFlightDelay() throws Exception {
65 |
66 | // Flight data (one single flight) --> We want to predict if it will be
67 | // delayed or not
68 | List inputValues = Arrays.asList(
69 | "1987,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES",
70 | "1999,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES");
71 |
72 | // Step 1: Configure and start the processor topology.
73 | //
74 |
75 | Properties streamsConfiguration = new Properties();
76 | streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG,
77 | "kafka-streams-h2o-deeplearning-integration-test");
78 | streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
79 |
80 | // The commit interval for flushing records to state stores and
81 | // downstream must be lower than
82 | // this integration test's timeout (30 secs) to ensure we observe the
83 | // expected processing results.
84 | streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 10 * 1000);
85 | streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
86 | // Use a temporary directory for storing state, which will be
87 | // automatically removed after the test.
88 | streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getAbsolutePath());
89 |
90 | // Create H2O object (see
91 | // deeplearning_fe7c1f02_08ec_4070_b784_c2531147e451.java)
92 | hex.genmodel.GenModel rawModel;
93 | rawModel = (hex.genmodel.GenModel) Class.forName(modelClassName).newInstance();
94 | EasyPredictModelWrapper model = new EasyPredictModelWrapper(rawModel);
95 |
96 | // Configure Kafka Streams Application
97 | // Specify default (de)serializers for record keys and for record
98 | // values.
99 | streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
100 | streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
101 |
102 | // In the subsequent lines we define the processing topology of the
103 | // Streams application.
104 | final StreamsBuilder builder = new StreamsBuilder();
105 |
106 | // Construct a `KStream` from the input topic "AirlineInputTopic", where
107 | // message values
108 | // represent lines of text (for the sake of this example, we ignore
109 | // whatever may be stored
110 | // in the message keys).
111 | final KStream airlineInputLines = builder.stream(inputTopic);
112 |
113 | // Stream Processor (in this case 'foreach' to add custom logic, i.e.
114 | // apply the analytic model)
115 | airlineInputLines.foreach((key, value) -> {
116 |
117 | // Year,Month,DayofMonth,DayOfWeek,DepTime,CRSDepTime,ArrTime,CRSArrTime,UniqueCarrier,FlightNum,TailNum,ActualElapsedTime,CRSElapsedTime,AirTime,ArrDelay,DepDelay,Origin,Dest,Distance,TaxiIn,TaxiOut,Cancelled,CancellationCode,Diverted,CarrierDelay,WeatherDelay,NASDelay,SecurityDelay,LateAircraftDelay,IsArrDelayed,IsDepDelayed
118 | // value:
119 | // 1987,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES
120 | if (value != null && !value.equals("")) {
121 | System.out.println("#####################");
122 | System.out.println("Flight Input:" + value);
123 |
124 | String[] valuesArray = value.split(",");
125 |
126 | RowData row = new RowData();
127 | row.put("Year", valuesArray[0]);
128 | row.put("Month", valuesArray[1]);
129 | row.put("DayofMonth", valuesArray[2]);
130 | row.put("DayOfWeek", valuesArray[3]);
131 | row.put("CRSDepTime", valuesArray[5]);
132 | row.put("UniqueCarrier", valuesArray[8]);
133 | row.put("Origin", valuesArray[16]);
134 | row.put("Dest", valuesArray[17]);
135 | BinomialModelPrediction p = null;
136 | try {
137 | p = model.predictBinomial(row);
138 | } catch (PredictException e) {
139 | e.printStackTrace();
140 | }
141 |
142 | airlineDelayPreduction = p.label;
143 | System.out.println("Label (aka prediction) is flight departure delayed: " + p.label);
144 | System.out.print("Class probabilities: ");
145 | for (int i = 0; i < p.classProbabilities.length; i++) {
146 | if (i > 0) {
147 | System.out.print(",");
148 | }
149 | System.out.print(p.classProbabilities[i]);
150 | }
151 | System.out.println("");
152 | System.out.println("#####################");
153 |
154 | }
155 |
156 | });
157 |
158 | // Transform message: Add prediction information
159 | KStream transformedMessage = airlineInputLines
160 | .mapValues(value -> "Prediction: Is Airline delayed? => " + airlineDelayPreduction);
161 |
162 | // Send prediction information to Output Topic
163 | transformedMessage.to(outputTopic);
164 |
165 | // Start Kafka Streams Application to process new incoming messages from
166 | // Input Topic
167 | final KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfiguration);
168 | streams.cleanUp();
169 | streams.start();
170 | System.out.println("Airline Delay Prediction Microservice is running...");
171 | System.out.println("Input to Kafka Topic 'AirlineInputTopic'; Output to Kafka Topic 'AirlineOutpuTopic'");
172 |
173 | //
174 | // Step 2: Produce some input data to the input topic.
175 | //
176 | Properties producerConfig = new Properties();
177 | producerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
178 | producerConfig.put(ProducerConfig.ACKS_CONFIG, "all");
179 | producerConfig.put(ProducerConfig.RETRIES_CONFIG, 0);
180 | producerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
181 | producerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
182 | IntegrationTestUtils.produceValuesSynchronously(inputTopic, inputValues, producerConfig, new MockTime());
183 |
184 | //
185 | // Step 3: Verify the application's output data.
186 | //
187 | Properties consumerConfig = new Properties();
188 | consumerConfig.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
189 | consumerConfig.put(ConsumerConfig.GROUP_ID_CONFIG,
190 | "machine-learning-example-integration-test-standard-consumer");
191 | consumerConfig.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
192 | consumerConfig.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
193 | consumerConfig.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
194 | List> response = IntegrationTestUtils
195 | .waitUntilMinKeyValueRecordsReceived(consumerConfig, outputTopic, 2);
196 | streams.close();
197 | assertThat(response).isNotNull();
198 | assertThat(response.get(0).value).isEqualTo("Prediction: Is Airline delayed? => YES");
199 |
200 | assertThat(response.get(1).value).isEqualTo("Prediction: Is Airline delayed? => YES");
201 | }
202 |
203 | }
204 |
--------------------------------------------------------------------------------
/h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_ExampleTest.java:
--------------------------------------------------------------------------------
1 | package com.github.megachucky.kafka.streams.machinelearning;
2 |
3 | import org.apache.kafka.clients.producer.ProducerRecord;
4 | import org.apache.kafka.common.serialization.StringDeserializer;
5 | import org.apache.kafka.common.serialization.StringSerializer;
6 | import org.apache.kafka.streams.KeyValue;
7 | import org.apache.kafka.streams.TopologyTestDriver;
8 | import org.apache.kafka.streams.test.ConsumerRecordFactory;
9 | import org.junit.After;
10 | import org.junit.Before;
11 | import org.junit.Test;
12 |
13 | import java.io.IOException;
14 | import java.util.Arrays;
15 | import java.util.List;
16 | import java.util.stream.Collectors;
17 |
18 | import static org.assertj.core.api.AssertionsForInterfaceTypes.assertThat;
19 |
20 | /**
21 | * TopologyTestDriver based test about stream processing of
22 | * Kafka_Streams_TensorFlow_Image_Recognition_Example.
23 | *
24 | * @author Jukka Karvanen / jukinimi.com * Unit Test of
25 | * {@link Kafka_Streams_MachineLearning_H2O_GBM_Example}, using an
26 | * TopologyTestDriver and a H2O GBM model.
27 | *
28 | */
29 |
30 | public class Kafka_Streams_MachineLearning_H2O_GBM_ExampleTest {
31 | private TopologyTestDriver testDriver;
32 |
33 | private StringDeserializer stringDeserializer = new StringDeserializer();
34 | private ConsumerRecordFactory recordFactory = new ConsumerRecordFactory<>(
35 | new StringSerializer(), new StringSerializer());
36 |
37 | @Before
38 | public void setup() throws IllegalAccessException, ClassNotFoundException, InstantiationException {
39 | testDriver = new TopologyTestDriver(
40 | Kafka_Streams_MachineLearning_H2O_GBM_Example.getStreamTopology(
41 | Kafka_Streams_MachineLearning_H2O_GBM_Example.modelClassName),
42 | Kafka_Streams_MachineLearning_H2O_GBM_Example.getStreamConfiguration("localhost:9092",
43 | Kafka_Streams_MachineLearning_H2O_GBM_Example.APPLICATION_ID));
44 | }
45 |
46 | @After
47 | public void tearDown() {
48 | try {
49 | testDriver.close();
50 | } catch (RuntimeException e) {
51 | // https://issues.apache.org/jira/browse/KAFKA-6647 causes exception when
52 | // executed in Windows, ignoring it
53 | // Logged stacktrace cannot be avoided
54 | System.out.println("Ignoring exception, test failing in Windows due this exception:"
55 | + e.getLocalizedMessage());
56 | }
57 | }
58 |
59 | private String getOutput() {
60 | ProducerRecord output = testDriver.readOutput(
61 | Kafka_Streams_MachineLearning_H2O_GBM_Example.OUTPUT_TOPIC, stringDeserializer,
62 | stringDeserializer);
63 | assertThat(output).isNotNull();
64 | return output.value();
65 | }
66 |
67 | /**
68 | * Simple test validating only the prediction part of the output
69 | */
70 | @Test
71 | public void testOne() {
72 | testDriver.pipeInput(recordFactory.create(Kafka_Streams_MachineLearning_H2O_GBM_Example.INPUT_TOPIC,
73 | null,
74 | "1987,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES",
75 | 1L));
76 | assertThat(getOutput()).isEqualTo("Prediction: Is Airline delayed? => YES");
77 | }
78 |
79 | /**
80 | * Test based on
81 | * Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest
82 | *
83 | */
84 | @Test
85 | public void testList() {
86 | // Flight data (one single flight) --> We want to predict if it will be
87 | // delayed or not
88 | List inputValues = Arrays.asList(
89 | "1987,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES",
90 | "1999,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES");
91 | List> records = inputValues.stream()
92 | .map(v -> new KeyValue(null, v)).collect(Collectors.toList());
93 |
94 | testDriver.pipeInput(recordFactory.create(Kafka_Streams_MachineLearning_H2O_GBM_Example.INPUT_TOPIC,
95 | records, 1L, 100L));
96 | assertThat(getOutput()).isEqualTo("Prediction: Is Airline delayed? => YES");
97 | assertThat(getOutput()).isEqualTo("Prediction: Is Airline delayed? => NO");
98 | }
99 |
100 | }
101 |
--------------------------------------------------------------------------------
/h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java:
--------------------------------------------------------------------------------
1 | package com.github.megachucky.kafka.streams.machinelearning;
2 |
3 | import static org.assertj.core.api.Assertions.assertThat;
4 |
5 | import java.util.Arrays;
6 | import java.util.List;
7 | import java.util.Properties;
8 |
9 | import com.github.megachucky.kafka.streams.machinelearning.TestEmbeddedKafkaCluster;
10 | import org.apache.kafka.clients.consumer.ConsumerConfig;
11 | import org.apache.kafka.clients.producer.ProducerConfig;
12 | import org.apache.kafka.common.serialization.Serdes;
13 | import org.apache.kafka.common.serialization.StringDeserializer;
14 | import org.apache.kafka.common.serialization.StringSerializer;
15 | import org.apache.kafka.streams.KafkaStreams;
16 | import org.apache.kafka.streams.KeyValue;
17 | import org.apache.kafka.streams.StreamsBuilder;
18 | import org.apache.kafka.streams.StreamsConfig;
19 | import org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster;
20 | import org.apache.kafka.streams.integration.utils.IntegrationTestUtils;
21 | import org.apache.kafka.streams.kstream.KStream;
22 | import org.apache.kafka.test.TestUtils;
23 | import org.junit.BeforeClass;
24 | import org.junit.ClassRule;
25 | import org.junit.Test;
26 |
27 | import com.github.megachucky.kafka.streams.machinelearning.Kafka_Streams_MachineLearning_H2O_GBM_Example;
28 |
29 | import hex.genmodel.easy.EasyPredictModelWrapper;
30 | import hex.genmodel.easy.RowData;
31 | import hex.genmodel.easy.exception.PredictException;
32 | import hex.genmodel.easy.prediction.BinomialModelPrediction;
33 | import kafka.utils.MockTime;
34 |
35 | /**
36 | *
37 | * @author Kai Waehner (www.kai-waehner.de)
38 | *
39 | * End-to-end integration test based on
40 | * {@link Kafka_Streams_MachineLearning_H2O_GBM_Example}, using an
41 | * embedded Kafka cluster and a H2O.ai GBM Model.
42 | *
43 | * See {@link Kafka_Streams_MachineLearning_H2O_GBM_Example} for further
44 | * documentation.
45 | *
46 | */
47 | public class Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest {
48 |
49 | @ClassRule
50 | // public static final EmbeddedSingleNodeKafkaCluster CLUSTER = new
51 | // EmbeddedSingleNodeKafkaCluster();
52 |
53 | public static final EmbeddedKafkaCluster CLUSTER = new TestEmbeddedKafkaCluster(1);
54 |
55 | private static final String inputTopic = "AirlineInputTopic";
56 | private static final String outputTopic = "AirlineOutputTopic";
57 |
58 | // Name of the generated H2O.ai model
59 | private static String modelClassName = "com.github.megachucky.kafka.streams.machinelearning.models.gbm_pojo_test";
60 |
61 | // Prediction Value
62 | private static String airlineDelayPreduction = "unknown";
63 |
64 | @BeforeClass
65 | public static void startKafkaCluster() throws Exception {
66 | CLUSTER.createTopic(inputTopic);
67 | CLUSTER.createTopic(outputTopic);
68 | }
69 |
70 | @Test
71 | public void shouldPredictFlightDelay() throws Exception {
72 |
73 | // Flight data (one single flight) --> We want to predict if it will be
74 | // delayed or not
75 | List inputValues = Arrays.asList(
76 | "1987,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES",
77 | "1999,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES");
78 |
79 | // Step 1: Configure and start the processor topology.
80 | //
81 |
82 | Properties streamsConfiguration = new Properties();
83 | streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "kafka-streams-h2o-gbm-integration-test");
84 | streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
85 |
86 | // The commit interval for flushing records to state stores and
87 | // downstream must be lower than
88 | // this integration test's timeout (30 secs) to ensure we observe the
89 | // expected processing results.
90 | streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 10 * 1000);
91 | streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
92 | // Use a temporary directory for storing state, which will be
93 | // automatically removed after the test.
94 | streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getAbsolutePath());
95 |
96 | // Create H2O object (see gbm_pojo_test.java)
97 | hex.genmodel.GenModel rawModel;
98 | rawModel = (hex.genmodel.GenModel) Class.forName(modelClassName).newInstance();
99 | EasyPredictModelWrapper model = new EasyPredictModelWrapper(rawModel);
100 |
101 | // Configure Kafka Streams Application
102 | // Specify default (de)serializers for record keys and for record
103 | // values.
104 | streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
105 | streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
106 |
107 | // In the subsequent lines we define the processing topology of the
108 | // Streams application.
109 | final StreamsBuilder builder = new StreamsBuilder();
110 |
111 | // Construct a `KStream` from the input topic "AirlineInputTopic", where
112 | // message values
113 | // represent lines of text (for the sake of this example, we ignore
114 | // whatever may be stored
115 | // in the message keys).
116 | final KStream airlineInputLines = builder.stream(inputTopic);
117 |
118 | // Stream Processor (in this case 'foreach' to add custom logic, i.e.
119 | // apply the analytic model)
120 |
121 | airlineInputLines.foreach((key, value) -> {
122 |
123 | // Year,Month,DayofMonth,DayOfWeek,DepTime,CRSDepTime,ArrTime,CRSArrTime,UniqueCarrier,FlightNum,TailNum,ActualElapsedTime,CRSElapsedTime,AirTime,ArrDelay,DepDelay,Origin,Dest,Distance,TaxiIn,TaxiOut,Cancelled,CancellationCode,Diverted,CarrierDelay,WeatherDelay,NASDelay,SecurityDelay,LateAircraftDelay,IsArrDelayed,IsDepDelayed
124 | // value:
125 | // 1987,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES
126 | if (value != null && !value.equals("")) {
127 | System.out.println("#####################");
128 | System.out.println("Flight Input:" + value);
129 |
130 | String[] valuesArray = value.split(",");
131 |
132 | RowData row = new RowData();
133 | row.put("Year", valuesArray[0]);
134 | row.put("Month", valuesArray[1]);
135 | row.put("DayofMonth", valuesArray[2]);
136 | row.put("DayOfWeek", valuesArray[3]);
137 | row.put("CRSDepTime", valuesArray[5]);
138 | row.put("UniqueCarrier", valuesArray[8]);
139 | row.put("Origin", valuesArray[16]);
140 | row.put("Dest", valuesArray[17]);
141 | BinomialModelPrediction p = null;
142 | try {
143 | p = model.predictBinomial(row);
144 | } catch (PredictException e) {
145 | e.printStackTrace();
146 | }
147 |
148 | airlineDelayPreduction = p.label;
149 | System.out.println("Label (aka prediction) is flight departure delayed: " + p.label);
150 | System.out.print("Class probabilities: ");
151 | for (int i = 0; i < p.classProbabilities.length; i++) {
152 | if (i > 0) {
153 | System.out.print(",");
154 | }
155 | System.out.print(p.classProbabilities[i]);
156 | }
157 | System.out.println("");
158 | System.out.println("#####################");
159 |
160 | }
161 |
162 | });
163 |
164 | // Transform message: Add prediction information
165 | KStream transformedMessage = airlineInputLines
166 | .mapValues(value -> "Prediction: Is Airline delayed? => " + airlineDelayPreduction);
167 |
168 | // Send prediction information to Output Topic
169 | transformedMessage.to(outputTopic);
170 |
171 | // Start Kafka Streams Application to process new incoming messages from
172 | // Input Topic
173 | final KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfiguration);
174 | streams.cleanUp();
175 | streams.start();
176 | System.out.println("Airline Delay Prediction Microservice is running...");
177 | System.out.println("Input to Kafka Topic 'AirlineInputTopic'; Output to Kafka Topic 'AirlineOutpuTopic'");
178 |
179 | //
180 | // Step 2: Produce some input data to the input topic.
181 | //
182 | Properties producerConfig = new Properties();
183 | producerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
184 | producerConfig.put(ProducerConfig.ACKS_CONFIG, "all");
185 | producerConfig.put(ProducerConfig.RETRIES_CONFIG, 0);
186 | producerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
187 | producerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
188 | IntegrationTestUtils.produceValuesSynchronously(inputTopic, inputValues, producerConfig,
189 | new MockTime());
190 |
191 | //
192 | // Step 3: Verify the application's output data.
193 | //
194 | Properties consumerConfig = new Properties();
195 | consumerConfig.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
196 | consumerConfig.put(ConsumerConfig.GROUP_ID_CONFIG,
197 | "machine-learning-example-integration-test-standard-consumer");
198 | consumerConfig.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
199 | consumerConfig.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
200 | consumerConfig.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
201 | List> response = IntegrationTestUtils
202 | .waitUntilMinKeyValueRecordsReceived(consumerConfig, outputTopic, 2);
203 | streams.close();
204 | assertThat(response).isNotNull();
205 | assertThat(response.get(0).value).isEqualTo("Prediction: Is Airline delayed? => YES");
206 |
207 | assertThat(response.get(1).value).isEqualTo("Prediction: Is Airline delayed? => NO");
208 | }
209 |
210 | }
211 |
--------------------------------------------------------------------------------
/h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/TestEmbeddedKafkaCluster.java:
--------------------------------------------------------------------------------
1 | package com.github.megachucky.kafka.streams.machinelearning;
2 |
3 | import org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster;
4 | import org.slf4j.Logger;
5 | import org.slf4j.LoggerFactory;
6 |
7 | import java.util.Properties;
8 |
9 | /**
10 | * This is helper class to workaround for Failing stream tests in Windows
11 | * environment KAFKA-6647.
12 | *
13 | * @author Jukka Karvanen
14 | *
15 | * The causing issue is https://issues.apache.org/jira/browse/KAFKA-6647
16 | * Replacing EmbeddedKafkaCluster with TestEmbeddedKafkaCluster will
17 | * catch and ignore the exception happening during the tear down of the
18 | * test The exception does not have affect to functionality
19 | */
20 |
21 | public class TestEmbeddedKafkaCluster extends EmbeddedKafkaCluster {
22 | private static final Logger log = LoggerFactory.getLogger(TestEmbeddedKafkaCluster.class);
23 |
24 | public TestEmbeddedKafkaCluster(int numBrokers) {
25 | super(numBrokers);
26 | }
27 |
28 | public TestEmbeddedKafkaCluster(int numBrokers, Properties brokerConfig) {
29 | super(numBrokers, brokerConfig);
30 | }
31 |
32 | public TestEmbeddedKafkaCluster(int numBrokers, Properties brokerConfig, long mockTimeMillisStart) {
33 | super(numBrokers, brokerConfig, mockTimeMillisStart);
34 | }
35 |
36 | public TestEmbeddedKafkaCluster(int numBrokers, Properties brokerConfig, long mockTimeMillisStart,
37 | long mockTimeNanoStart) {
38 | super(numBrokers, brokerConfig, mockTimeMillisStart, mockTimeNanoStart);
39 | }
40 |
41 | public void after() {
42 | try {
43 | super.after();
44 | } catch (RuntimeException e) {
45 | log.warn("Ignoring exception, test failing in Windows due this exception {}", e);
46 | }
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/TestKafkaStreams.java:
--------------------------------------------------------------------------------
1 | package com.github.megachucky.kafka.streams.machinelearning;
2 |
3 | import org.apache.kafka.common.utils.Time;
4 | import org.apache.kafka.streams.KafkaClientSupplier;
5 | import org.apache.kafka.streams.KafkaStreams;
6 | import org.apache.kafka.streams.Topology;
7 | import org.slf4j.Logger;
8 | import org.slf4j.LoggerFactory;
9 |
10 | import java.util.Properties;
11 |
12 | /**
13 | * This is helper class to workaround for Failing stream tests in Windows
14 | * environment KAFKA-6647.
15 | *
16 | * @author Jukka Karvanen
17 | *
18 | * The causing issue is https://issues.apache.org/jira/browse/KAFKA-6647
19 | * Replacing KafkaStreams with TestKafkaStreams will catch and ignore
20 | * the exception caused by cleanUp The exception does not have affect to
21 | * functionality
22 | */
23 |
24 | public class TestKafkaStreams extends KafkaStreams {
25 | private static final Logger log = LoggerFactory.getLogger(TestKafkaStreams.class);
26 |
27 | public TestKafkaStreams(Topology topology, Properties props) {
28 | super(topology, props);
29 | }
30 |
31 | public TestKafkaStreams(Topology topology, Properties props, KafkaClientSupplier clientSupplier) {
32 | super(topology, props, clientSupplier);
33 | }
34 |
35 | public TestKafkaStreams(Topology topology, Properties props, Time time) {
36 | super(topology, props, time);
37 | }
38 |
39 | public TestKafkaStreams(Topology topology, Properties props, KafkaClientSupplier clientSupplier, Time time) {
40 | super(topology, props, clientSupplier, time);
41 | }
42 |
43 | public void cleanUp() {
44 | try {
45 | super.cleanUp();
46 | } catch (RuntimeException e) {
47 | log.warn("Ignoring exception, test failing in Windows due this exception {}", e);
48 | }
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
4 | 4.0.0
5 |
6 | com.github.kaiwaehner.kafka.streams.machinelearning
7 | kafka-streams-machine-learning-examples
8 | CP53_AK23
9 | pom
10 |
11 |
12 |
13 | h2o-gbm
14 |
15 | tensorflow-image-recognition
16 |
17 | dl4j-deeplearning-iris
18 |
19 | tensorflow-keras
20 |
21 |
22 |
--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 | # Machine Learning + Kafka Streams Examples
2 |
3 | This project contains **examples which demonstrate how to deploy analytic models to mission-critical, scalable production leveraging [Apache Kafka](https://kafka.apache.org/) and its [Streams API](https://docs.confluent.io/current/streams/index.html).**
4 | Examples will include analytic models built with TensorFlow, Keras, H2O, Python, DeepLearning4J and other technologies.
5 |
6 | 
7 |
8 | ## Material (Blogs Posts, Slides, Videos)
9 |
10 | Here is some material about this topic if you want to read and listen to the theory instead of just doing hands-on:
11 |
12 | - Blog Post: [How to Build and Deploy Scalable Machine Learning in Production with Apache Kafka](https://www.confluent.io/blog/build-deploy-scalable-machine-learning-production-apache-kafka/)
13 | - Slide Deck: [Apache Kafka + Machine Learning => Intelligent Real Time Applications](https://www.slideshare.net/KaiWaehner/apache-kafka-streams-machine-learning-deep-learning)
14 | - Slide Deck: [Deep Learning at Extreme Scale (in the Cloud)
with the Apache Kafka Open Source Ecosystem](https://www.slideshare.net/KaiWaehner/deep-learning-at-extreme-scale-in-the-cloud-with-the-apache-kafka-open-source-ecosystem)
15 | - Video Recording: [Deep Learning in Mission Critical and Scalable Real Time Applications with Open Source Frameworks](https://vimeo.com/jaxtv/review/256406763/7fbf4213be)
16 | - Blog Post: [Using Apache Kafka to Drive Cutting-Edge Machine Learning - Hybrid ML Architectures, AutoML, and more...](https://www.confluent.io/blog/using-apache-kafka-drive-cutting-edge-machine-learning)
17 | - Blog Post: [Machine Learning with Python, Jupyter, KSQL and TensorFlow](https://www.confluent.io/blog/machine-learning-with-python-jupyter-ksql-tensorflow)
18 | - Blog Post: [Streaming Machine Learning with Tiered Storage and Without a Data Lake](https://www.confluent.io/blog/streaming-machine-learning-with-tiered-storage/)
19 |
20 | ## Use Cases and Technologies
21 |
22 | ##### The following examples are already available including unit tests:
23 |
24 | * Deployment of a H2O GBM model to a Kafka Streams application for prediction of flight delays
25 | * Deployment of a H2O Deep Learning model to a Kafka Streams application for prediction of flight delays
26 | * Deployment of a pre-built TensorFlow CNN model for image recognition
27 | * Deployment of a DL4J model to predict the species of Iris flowers
28 | * Deployment of a Keras model (trained with TensorFlow backend) using the Import Model API from DeepLearning4J
29 |
30 | **More sophisticated use cases around Kafka Streams and other technologies will be added over time in this or related Github project. Some ideas**:
31 |
32 | * Image Recognition with H2O and TensorFlow (to show the difference of using H2O instead of using just low level TensorFlow APIs)
33 | * Anomaly Detection with Autoencoders leveraging DeepLearning4J.
34 | * Cross Selling and Customer Churn Detection using classical Machine Learning algorithms but also Deep Learning
35 | * Stateful Stream Processing to combine different model execution steps into a more powerful workflow instead of "just" inferencing single events (a good example might be a streaming process with sliding or session windows).
36 | * Keras to build different models with Python, TensorFlow, Theano and other Deep Learning frameworks under the hood + Kafka Streams as generic Machine Learning infrastructure to deploy, execute and monitor these different models.
37 |
38 | ##### Some other Github projects exist already with more ML + Kafka content:
39 |
40 | The most exciting and powerful example first:
41 | [Streaming Machine Learning at Scale from 100000 IoT Devices with HiveMQ, Apache Kafka and TensorFLow](https://github.com/kaiwaehner/hivemq-mqtt-tensorflow-kafka-realtime-iot-machine-learning-training-inference)
42 |
43 | Here some more demos:
44 |
45 | - Deep Learning UDF for KSQL: [Streaming Anomaly Detection of MQTT IoT Sensor Data using an Autoencoder](https://github.com/kaiwaehner/ksql-udf-deep-learning-mqtt-iot)
46 | - End-to-End ML Integration Demo: [Continuous Health Checks with Anomaly Detection using KSQL, Kafka Connect, Deep Learning and Elasticsearch](https://github.com/kaiwaehner/ksql-fork-with-deep-learning-function)
47 | - TensorFlow Serving + gRPC + Kafka Streams on Github => Stream Processing and RPC / Request-Response concepts combined: [Model inference with Apache Kafka, Kafka Streams and a TensorFlow model deployed on a TensorFlow Serving model server](https://github.com/kaiwaehner/tensorflow-serving-java-grpc-kafka-streams)
48 | - Solving the impedance mismatch between Data Scientist and Production Engineer: [Python, Jupyter, TensorFlow, Keras, Apache Kafka, KSQL](https://github.com/kaiwaehner/python-jupyter-apache-kafka-ksql-tensorflow-keras)
49 |
50 | ## Requirements, Installation and Usage
51 | The code is developed and tested on Mac and Linux operating systems. As Kafka does not support and work well on Windows, this is not tested at all.
52 |
53 | Java 8 and Maven 3 are required. Maven will download all required dependencies.
54 |
55 | Just download the project and run
56 |
57 | mvn clean package
58 |
59 | You can do this in main directory or each module separately.
60 |
61 | Apache Kafka 2.5 is currently used. The code is also compatible with Kafka and Kafka Streams 1.1 and 2.x.
62 |
63 | **Please make sure to run the Maven build without any changes first.** If it works without errors, you can change library versions, Java version, etc. and see if it still works or if you need to adjust code.
64 |
65 | Every examples includes an implementation and an unit test. The examples are very simple and lightweight. No further configuration is needed to build and run it. Though, for this reason, the generated models are also included (and increase the download size of the project).
66 |
67 | The unit tests use some Kafka helper classes like EmbeddedSingleNodeKafkaCluster in package **com.github.megachucky.kafka.streams.machinelearning.test.utils** so that you can run it without any other configuration or Kafka setup.
68 | If you want to run an implementation of a main class in package **com.github.megachucky.kafka.streams.machinelearning**, you need to start a Kafka cluster (with at least one Zookeeper and one Kafka broker running) and also create the required topics. So check out the unit tests first.
69 |
70 |
71 | ### Example 1 - Gradient Boosting with H2O.ai for Prediction of Flight Delays
72 |
73 | Detailed info in [h2o-gbm](h2o-gbm/readme.md)
74 |
75 | ### Example 2 - Convolutional Neural Network (CNN) with TensorFlow for Image Recognition
76 |
77 | Detailed info in [tensorflow-image-recognition](tensorflow-image-recognition/readme.md)
78 |
79 | ### Example 3 - Iris Prediction using a Neural Network with DeepLearning4J (DL4J)
80 |
81 | Detailed info in [dl4j-deeplearning-iris](dl4j-deeplearning-iris/readme.md)
82 |
83 | ### Example 4 - Python + Keras + TensorFlow + DeepLearning4j
84 |
85 | Detailed info in [tensorflow-kerasm](tensorflow-keras/readme.md)
86 |
87 |
88 |
89 |
90 |
91 |
--------------------------------------------------------------------------------
/src/main/java/com/github/megachucky/kafka/streams/machinelearning/StreamsStarterApp.java:
--------------------------------------------------------------------------------
1 | package com.github.megachucky.kafka.streams.machinelearning;
2 |
3 | import java.util.Properties;
4 |
5 | import org.apache.kafka.clients.consumer.ConsumerConfig;
6 | import org.apache.kafka.common.serialization.Serdes;
7 | import org.apache.kafka.streams.KafkaStreams;
8 | import org.apache.kafka.streams.StreamsBuilder;
9 | import org.apache.kafka.streams.StreamsConfig;
10 | import org.apache.kafka.streams.kstream.KStream;
11 |
12 | /**
13 | * Demo Kakfa Streams app. Foundation for the other ML classes.
14 | *
15 | * @author kai.waehner (www.kai-waehner.de)
16 | *
17 | */
18 | public class StreamsStarterApp {
19 |
20 | public static void main(String[] args) {
21 |
22 | Properties config = new Properties();
23 | config.put(StreamsConfig.APPLICATION_ID_CONFIG, "streams-starter-app");
24 | config.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
25 | config.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
26 | config.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
27 | config.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass());
28 |
29 | StreamsBuilder builder = new StreamsBuilder();
30 |
31 | KStream kStream = builder.stream("streams-file-input");
32 | // do stuff
33 | kStream.to("streams-wordcount-output");
34 |
35 | KafkaStreams streams = new KafkaStreams(builder.build(), config);
36 | streams.cleanUp(); // only do this in dev - not in prod
37 | streams.start();
38 |
39 | // print the topology
40 | System.out.println(streams.localThreadsMetadata().toString());
41 |
42 | // shutdown hook to correctly close the streams application
43 | Runtime.getRuntime().addShutdownHook(new Thread(streams::close));
44 |
45 | }
46 |
47 | }
48 |
--------------------------------------------------------------------------------
/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | log4j.rootLogger=INFO, stdout
2 |
3 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender
4 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
5 | log4j.appender.stdout.layout.ConversionPattern=%p %m (%c:%L) %n
--------------------------------------------------------------------------------
/tensorflow-image-recognition/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
4 | 4.0.0
5 |
6 | com.github.kaiwaehner.kafka.streams.machinelearning
7 | tensorflow-image-recognition
8 | CP55_AK25
9 |
10 |
11 |
12 | confluent
13 | http://packages.confluent.io/maven/
14 |
15 |
16 |
17 |
18 | 1.8
19 | 2.5.0
20 | 2.12
21 | ${kafka.scala.version}.8
22 | 5.5.0
23 | UTF-8
24 |
25 |
26 |
27 |
28 |
31 |
32 |
33 | org.apache.kafka
34 | kafka-streams
35 | ${kafka.version}
36 |
37 |
38 |
39 |
40 | org.tensorflow
41 | tensorflow
42 | 1.3.0
43 |
44 |
45 |
46 |
47 | org.apache.kafka
48 | kafka-streams-test-utils
49 | ${kafka.version}
50 | test
51 |
52 |
53 |
54 | junit
55 | junit
56 | 4.12
57 | test
58 |
59 |
60 | org.assertj
61 | assertj-core
62 | 3.3.0
63 | test
64 |
65 |
66 | org.apache.kafka
67 | kafka_${kafka.scala.version}
68 | ${kafka.version}
69 | test
70 | test
71 |
72 |
73 | org.apache.kafka
74 | kafka-clients
75 | ${kafka.version}
76 | test
77 | test
78 |
79 |
80 | org.apache.kafka
81 | kafka-streams
82 | ${kafka.version}
83 | test
84 | test
85 |
86 |
87 | org.apache.curator
88 | curator-test
89 | 2.9.0
90 | test
91 |
92 |
93 | io.confluent
94 | kafka-schema-registry
95 | ${confluent.version}
96 | test
97 |
98 |
99 | io.confluent
100 | kafka-schema-registry
101 | ${confluent.version}
102 |
103 | tests
104 | test
105 |
106 |
107 | org.hamcrest
108 | hamcrest
109 | 2.1
110 | test
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 | org.apache.maven.plugins
119 | maven-compiler-plugin
120 | 3.6.1
121 |
122 | 1.8
123 | 1.8
124 |
125 |
126 |
127 |
128 |
129 | org.apache.maven.plugins
130 | maven-assembly-plugin
131 | 2.5.2
132 |
133 |
134 | jar-with-dependencies
135 |
136 |
137 |
138 | true
139 | com.github.megachucky.kafka.streams.machinelearning.Kafka_Streams_TensorFlow_Image_Recognition_Example
140 |
141 |
142 |
143 |
144 |
145 |
146 | assemble-all
147 | package
148 |
149 | single
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
--------------------------------------------------------------------------------
/tensorflow-image-recognition/readme.md:
--------------------------------------------------------------------------------
1 | # Machine Learning + Kafka Streams Examples
2 |
3 | General info in main [Readme](../readme.md)
4 |
5 | ### Example 2 - Convolutional Neural Network (CNN) with TensorFlow for Image Recognition
6 | **Use Case**
7 |
8 | Convolutional Neural Network (CNN) to for image recognition.
9 | A prebuilt TensorFlow CNN model is instantiated and used in a Kafka Streams application to do recognize new JPEG images. A Kafka Input Topic receives the location of a new images (another option would be to send the image in the Kafka message instead of just a link to it), infers the content of the picture via the TensorFlow model, and sends the result to a Kafka Output Topic.
10 |
11 | **Machine Learning Technology**
12 | * [TensorFlow](https://www.tensorflow.org/)
13 | * Leverages [TensorFlow for Java](https://www.tensorflow.org/install/install_java). These APIs are particularly well-suited for loading models created in Python and executing them within a Java application. Please note: The Java API doesn't yet include convenience functions (which you might know from [Keras](https://keras.io/)), thus a private helper class is used in the example for construction and execution of the pre-built TensorFlow model.
14 | * Check the official TensorFlow demo [LabelImage](https://github.com/kaiwaehner/tensorflow/blob/r1.3/tensorflow/java/src/main/java/org/tensorflow/examples/LabelImage.java) to understand this image recognition example
15 | * You can re-use the pre-trained TensorFlow model attached to this project [tensorflow_inception_graph.pb](http://arxiv.org/abs/1512.00567) or add your own model.
16 | * The 'images' folder contains models which were used for training the model (trained_airplane_1.jpg, trained_airplane_2.jpg, trained_butterfly.jpg) but also a new picture (new_airplane.jpg) which is not known by the model and using a different resolution than the others. Feel free to add your own pictures (they need to be trained, see list of trained pictures in the file: imagenet_comp_graph_label_strings.txt), otherwise the model will return 'unknown'.
17 |
18 | **Source Code**
19 |
20 | [Kafka_Streams_TensorFlow_Image_Recognition_Example.java](src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_TensorFlow_Image_Recognition_Example.java)
21 |
22 | **Unit Test**
23 |
24 | [Kafka_Streams_TensorFlow_Image_Recognition_ExampleTest.java](src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_TensorFlow_Image_Recognition_ExampleTest.java)
25 | [Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest.java](src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest.java)
26 |
27 |
--------------------------------------------------------------------------------
/tensorflow-image-recognition/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_TensorFlow_Image_Recognition_Example.java:
--------------------------------------------------------------------------------
1 | package com.github.megachucky.kafka.streams.machinelearning;
2 |
3 | import java.io.IOException;
4 | import java.nio.charset.Charset;
5 | import java.nio.file.Files;
6 | import java.nio.file.Path;
7 | import java.nio.file.Paths;
8 | import java.util.Arrays;
9 | import java.util.List;
10 | import java.util.Properties;
11 |
12 | import org.apache.kafka.common.serialization.Serdes;
13 | import org.apache.kafka.streams.KafkaStreams;
14 | import org.apache.kafka.streams.StreamsBuilder;
15 | import org.apache.kafka.streams.StreamsConfig;
16 | import org.apache.kafka.streams.Topology;
17 | import org.apache.kafka.streams.kstream.KStream;
18 | import org.apache.kafka.streams.kstream.Printed;
19 | import org.tensorflow.DataType;
20 | import org.tensorflow.Graph;
21 | import org.tensorflow.Output;
22 | import org.tensorflow.Session;
23 | import org.tensorflow.Tensor;
24 |
25 | /**
26 | * @author Kai Waehner (www.kai-waehner.de)
27 | *
28 | * Creates a new Kafka Streams application for Image Recognition. The
29 | * application uses the CNN model "inception5h" (built with TensorFlow)
30 | * to infer messages sent to Kafka topic "ImageInputTopic". The outcome
31 | * of model inference is sent to Kafka topic "ImageOutputTopic".
32 | *
33 | */
34 | public class Kafka_Streams_TensorFlow_Image_Recognition_Example {
35 |
36 | static final String imageInputTopic = "ImageInputTopic";
37 | static final String imageOutputTopic = "ImageOutputTopic";
38 |
39 | public static void main(final String[] args) throws Exception {
40 | // Configure Kafka Streams Application
41 | final String bootstrapServers = args.length > 0 ? args[0] : "localhost:9092";
42 | final Properties streamsConfiguration = getStreamConfiguration(bootstrapServers);
43 | Topology topology = getStreamTopology();
44 |
45 | // Start Kafka Streams Application to process new incoming images from the Input
46 | // Topic
47 | final KafkaStreams streams = new KafkaStreams(topology, streamsConfiguration);
48 |
49 | streams.cleanUp();
50 |
51 | streams.start();
52 |
53 | System.out.println("Image Recognition Microservice is running...");
54 |
55 | System.out.println("Input to Kafka Topic " + imageInputTopic + "; Output to Kafka Topic " + imageOutputTopic);
56 |
57 | // Add shutdown hook to respond to SIGTERM and gracefully close Kafka
58 | // Streams
59 | Runtime.getRuntime().addShutdownHook(new Thread(streams::close));
60 |
61 | }
62 |
63 | static Properties getStreamConfiguration(String bootstrapServers) {
64 | final Properties streamsConfiguration = new Properties();
65 | // Give the Streams application a unique name. The name must be unique
66 | // in the Kafka cluster
67 | // against which the application is run.
68 | streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG,
69 | "kafka-streams-tensorflow-image-recognition-example");
70 | // Where to find Kafka broker(s).
71 | streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
72 |
73 | // Specify default (de)serializers for record keys and for record
74 | // values.
75 | streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
76 | streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
77 | return streamsConfiguration;
78 | }
79 |
80 | static Topology getStreamTopology() throws IOException {
81 | // Create TensorFlow object
82 |
83 | String modelDir = "src/main/resources/generatedModels/CNN_inception5h";
84 |
85 | Path pathGraph = Paths.get(modelDir, "tensorflow_inception_graph.pb");
86 | byte[] graphDef = Files.readAllBytes(pathGraph);
87 |
88 | Path pathModel = Paths.get(modelDir, "imagenet_comp_graph_label_strings.txt");
89 | List labels = Files.readAllLines(pathModel, Charset.forName("UTF-8"));
90 |
91 | // In the subsequent lines we define the processing topology of the
92 | // Streams application.
93 | final StreamsBuilder builder = new StreamsBuilder();
94 |
95 | // Construct a `KStream` from the input topic "ImageInputTopic", where
96 | // message values represent lines of text
97 | final KStream imageInputLines = builder.stream(imageInputTopic);
98 |
99 | //imageInputLines.print(Printed.toSysOut());
100 |
101 | // Stream Processor (in this case inside mapValues to add custom logic, i.e. apply the
102 | // analytic model)
103 | // Transform message: Add prediction information
104 | KStream transformedMessage =
105 | imageInputLines.mapValues(value -> {
106 |
107 | String imageClassification = "unknown";
108 | String imageProbability = "unknown";
109 |
110 | String imageFile = value;
111 |
112 | Path pathImage = Paths.get(imageFile);
113 | byte[] imageBytes;
114 | try {
115 | imageBytes = Files.readAllBytes(pathImage);
116 |
117 | try (Tensor image = constructAndExecuteGraphToNormalizeImage(imageBytes)) {
118 | float[] labelProbabilities = executeInceptionGraph(graphDef, image);
119 | int bestLabelIdx = maxIndex(labelProbabilities);
120 |
121 | imageClassification = labels.get(bestLabelIdx);
122 |
123 | imageProbability = Float.toString(labelProbabilities[bestLabelIdx] * 100f);
124 |
125 | System.out.println(String.format("BEST MATCH: %s (%.2f%% likely)", imageClassification,
126 | labelProbabilities[bestLabelIdx] * 100f));
127 | }
128 |
129 | } catch (IOException e) {
130 | e.printStackTrace();
131 | }
132 | return "Prediction: What is the content of this picture? => " + imageClassification
133 | + ", probability = " + imageProbability;
134 | });
135 |
136 | // Send prediction information to Output Topic
137 | transformedMessage.to(imageOutputTopic);
138 |
139 | return builder.build();
140 | }
141 |
142 |
143 | // ########################################################################################
144 | // Private helper class for construction and execution of the pre-built
145 | // TensorFlow model
146 | // ########################################################################################
147 |
148 | private static Tensor constructAndExecuteGraphToNormalizeImage(byte[] imageBytes) {
149 | // Graph construction: using the OperationBuilder class to construct a graph to
150 | // decode, resize and normalize a JPEG image.
151 |
152 | try (Graph g = new Graph()) {
153 | GraphBuilder b = new GraphBuilder(g);
154 | // Some constants specific to the pre-trained model at:
155 | // https://storage.googleapis.com/download.tensorflow.org/models/inception5h.zip
156 | //
157 | // - The model was trained with images scaled to 224x224 pixels.
158 | // - The colors, represented as R, G, B in 1-byte each were
159 | // converted to
160 | // float using (value - Mean)/Scale.
161 | final int H = 224;
162 | final int W = 224;
163 | final float mean = 117f;
164 | final float scale = 1f;
165 |
166 | // Since the graph is being constructed once per execution here, we
167 | // can use a constant for the
168 | // input image. If the graph were to be re-used for multiple input
169 | // images, a placeholder would
170 | // have been more appropriate.
171 | final Output input = b.constant("input", imageBytes);
172 | final Output output = b
173 | .div(b.sub(
174 | b.resizeBilinear(b.expandDims(b.cast(b.decodeJpeg(input, 3), DataType.FLOAT),
175 | b.constant("make_batch", 0)), b.constant("size", new int[] { H, W })),
176 | b.constant("mean", mean)), b.constant("scale", scale));
177 | try (Session s = new Session(g)) {
178 | return s.runner().fetch(output.op().name()).run().get(0);
179 | }
180 | }
181 | }
182 |
183 | private static float[] executeInceptionGraph(byte[] graphDef, Tensor image) {
184 | try (Graph g = new Graph()) {
185 |
186 | // Model loading: Using Graph.importGraphDef() to load a pre-trained Inception
187 | // model.
188 | g.importGraphDef(graphDef);
189 |
190 | // Graph execution: Using a Session to execute the graphs and find the best
191 | // label for an image.
192 | try (Session s = new Session(g);
193 | Tensor result = s.runner().feed("input", image).fetch("output").run().get(0)) {
194 | final long[] rshape = result.shape();
195 | if (result.numDimensions() != 2 || rshape[0] != 1) {
196 | throw new RuntimeException(String.format(
197 | "Expected model to produce a [1 N] shaped tensor where N is the number of labels, instead it produced one with shape %s",
198 | Arrays.toString(rshape)));
199 | }
200 | int nlabels = (int) rshape[1];
201 | return result.copyTo(new float[1][nlabels])[0];
202 | }
203 | }
204 | }
205 |
206 | private static int maxIndex(float[] probabilities) {
207 | int best = 0;
208 | for (int i = 1; i < probabilities.length; ++i) {
209 | if (probabilities[i] > probabilities[best]) {
210 | best = i;
211 | }
212 | }
213 | return best;
214 | }
215 |
216 | // In the fullness of time, equivalents of the methods of this class should
217 | // be auto-generated from
218 | // the OpDefs linked into libtensorflow_jni.so. That would match what is
219 | // done in other languages
220 | // like Python, C++ and Go.
221 | static class GraphBuilder {
222 | GraphBuilder(Graph g) {
223 | this.g = g;
224 | }
225 |
226 | Output div(Output x, Output y) {
227 | return binaryOp("Div", x, y);
228 | }
229 |
230 | Output sub(Output x, Output y) {
231 | return binaryOp("Sub", x, y);
232 | }
233 |
234 | Output resizeBilinear(Output images, Output size) {
235 | return binaryOp("ResizeBilinear", images, size);
236 | }
237 |
238 | Output expandDims(Output input, Output dim) {
239 | return binaryOp("ExpandDims", input, dim);
240 | }
241 |
242 | Output cast(Output value, DataType dtype) {
243 | return g.opBuilder("Cast", "Cast").addInput(value).setAttr("DstT", dtype).build().output(0);
244 | }
245 |
246 | Output decodeJpeg(Output contents, long channels) {
247 | return g.opBuilder("DecodeJpeg", "DecodeJpeg").addInput(contents).setAttr("channels", channels).build()
248 | .output(0);
249 | }
250 |
251 | Output constant(String name, Object value) {
252 | try (Tensor t = Tensor.create(value)) {
253 | return g.opBuilder("Const", name).setAttr("dtype", t.dataType()).setAttr("value", t).build().output(0);
254 | }
255 | }
256 |
257 | private Output binaryOp(String type, Output in1, Output in2) {
258 | return g.opBuilder(type, type).addInput(in1).addInput(in2).build().output(0);
259 | }
260 |
261 | private Graph g;
262 | }
263 |
264 | }
265 |
--------------------------------------------------------------------------------
/tensorflow-image-recognition/src/main/resources/TensorFlow_Images/devil.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaiwaehner/kafka-streams-machine-learning-examples/3977e3928d2719fc710bb3d202851ee8904be2e7/tensorflow-image-recognition/src/main/resources/TensorFlow_Images/devil.png
--------------------------------------------------------------------------------
/tensorflow-image-recognition/src/main/resources/TensorFlow_Images/new_airplane.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaiwaehner/kafka-streams-machine-learning-examples/3977e3928d2719fc710bb3d202851ee8904be2e7/tensorflow-image-recognition/src/main/resources/TensorFlow_Images/new_airplane.jpg
--------------------------------------------------------------------------------
/tensorflow-image-recognition/src/main/resources/TensorFlow_Images/trained_airplane_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaiwaehner/kafka-streams-machine-learning-examples/3977e3928d2719fc710bb3d202851ee8904be2e7/tensorflow-image-recognition/src/main/resources/TensorFlow_Images/trained_airplane_1.jpg
--------------------------------------------------------------------------------
/tensorflow-image-recognition/src/main/resources/TensorFlow_Images/trained_airplane_2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaiwaehner/kafka-streams-machine-learning-examples/3977e3928d2719fc710bb3d202851ee8904be2e7/tensorflow-image-recognition/src/main/resources/TensorFlow_Images/trained_airplane_2.jpg
--------------------------------------------------------------------------------
/tensorflow-image-recognition/src/main/resources/TensorFlow_Images/trained_butterfly.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaiwaehner/kafka-streams-machine-learning-examples/3977e3928d2719fc710bb3d202851ee8904be2e7/tensorflow-image-recognition/src/main/resources/TensorFlow_Images/trained_butterfly.jpg
--------------------------------------------------------------------------------
/tensorflow-image-recognition/src/main/resources/generatedModels/CNN_inception5h/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright 2015 The TensorFlow Authors. All rights reserved.
2 |
3 | Apache License
4 | Version 2.0, January 2004
5 | http://www.apache.org/licenses/
6 |
7 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
8 |
9 | 1. Definitions.
10 |
11 | "License" shall mean the terms and conditions for use, reproduction,
12 | and distribution as defined by Sections 1 through 9 of this document.
13 |
14 | "Licensor" shall mean the copyright owner or entity authorized by
15 | the copyright owner that is granting the License.
16 |
17 | "Legal Entity" shall mean the union of the acting entity and all
18 | other entities that control, are controlled by, or are under common
19 | control with that entity. For the purposes of this definition,
20 | "control" means (i) the power, direct or indirect, to cause the
21 | direction or management of such entity, whether by contract or
22 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
23 | outstanding shares, or (iii) beneficial ownership of such entity.
24 |
25 | "You" (or "Your") shall mean an individual or Legal Entity
26 | exercising permissions granted by this License.
27 |
28 | "Source" form shall mean the preferred form for making modifications,
29 | including but not limited to software source code, documentation
30 | source, and configuration files.
31 |
32 | "Object" form shall mean any form resulting from mechanical
33 | transformation or translation of a Source form, including but
34 | not limited to compiled object code, generated documentation,
35 | and conversions to other media types.
36 |
37 | "Work" shall mean the work of authorship, whether in Source or
38 | Object form, made available under the License, as indicated by a
39 | copyright notice that is included in or attached to the work
40 | (an example is provided in the Appendix below).
41 |
42 | "Derivative Works" shall mean any work, whether in Source or Object
43 | form, that is based on (or derived from) the Work and for which the
44 | editorial revisions, annotations, elaborations, or other modifications
45 | represent, as a whole, an original work of authorship. For the purposes
46 | of this License, Derivative Works shall not include works that remain
47 | separable from, or merely link (or bind by name) to the interfaces of,
48 | the Work and Derivative Works thereof.
49 |
50 | "Contribution" shall mean any work of authorship, including
51 | the original version of the Work and any modifications or additions
52 | to that Work or Derivative Works thereof, that is intentionally
53 | submitted to Licensor for inclusion in the Work by the copyright owner
54 | or by an individual or Legal Entity authorized to submit on behalf of
55 | the copyright owner. For the purposes of this definition, "submitted"
56 | means any form of electronic, verbal, or written communication sent
57 | to the Licensor or its representatives, including but not limited to
58 | communication on electronic mailing lists, source code control systems,
59 | and issue tracking systems that are managed by, or on behalf of, the
60 | Licensor for the purpose of discussing and improving the Work, but
61 | excluding communication that is conspicuously marked or otherwise
62 | designated in writing by the copyright owner as "Not a Contribution."
63 |
64 | "Contributor" shall mean Licensor and any individual or Legal Entity
65 | on behalf of whom a Contribution has been received by Licensor and
66 | subsequently incorporated within the Work.
67 |
68 | 2. Grant of Copyright License. Subject to the terms and conditions of
69 | this License, each Contributor hereby grants to You a perpetual,
70 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
71 | copyright license to reproduce, prepare Derivative Works of,
72 | publicly display, publicly perform, sublicense, and distribute the
73 | Work and such Derivative Works in Source or Object form.
74 |
75 | 3. Grant of Patent License. Subject to the terms and conditions of
76 | this License, each Contributor hereby grants to You a perpetual,
77 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
78 | (except as stated in this section) patent license to make, have made,
79 | use, offer to sell, sell, import, and otherwise transfer the Work,
80 | where such license applies only to those patent claims licensable
81 | by such Contributor that are necessarily infringed by their
82 | Contribution(s) alone or by combination of their Contribution(s)
83 | with the Work to which such Contribution(s) was submitted. If You
84 | institute patent litigation against any entity (including a
85 | cross-claim or counterclaim in a lawsuit) alleging that the Work
86 | or a Contribution incorporated within the Work constitutes direct
87 | or contributory patent infringement, then any patent licenses
88 | granted to You under this License for that Work shall terminate
89 | as of the date such litigation is filed.
90 |
91 | 4. Redistribution. You may reproduce and distribute copies of the
92 | Work or Derivative Works thereof in any medium, with or without
93 | modifications, and in Source or Object form, provided that You
94 | meet the following conditions:
95 |
96 | (a) You must give any other recipients of the Work or
97 | Derivative Works a copy of this License; and
98 |
99 | (b) You must cause any modified files to carry prominent notices
100 | stating that You changed the files; and
101 |
102 | (c) You must retain, in the Source form of any Derivative Works
103 | that You distribute, all copyright, patent, trademark, and
104 | attribution notices from the Source form of the Work,
105 | excluding those notices that do not pertain to any part of
106 | the Derivative Works; and
107 |
108 | (d) If the Work includes a "NOTICE" text file as part of its
109 | distribution, then any Derivative Works that You distribute must
110 | include a readable copy of the attribution notices contained
111 | within such NOTICE file, excluding those notices that do not
112 | pertain to any part of the Derivative Works, in at least one
113 | of the following places: within a NOTICE text file distributed
114 | as part of the Derivative Works; within the Source form or
115 | documentation, if provided along with the Derivative Works; or,
116 | within a display generated by the Derivative Works, if and
117 | wherever such third-party notices normally appear. The contents
118 | of the NOTICE file are for informational purposes only and
119 | do not modify the License. You may add Your own attribution
120 | notices within Derivative Works that You distribute, alongside
121 | or as an addendum to the NOTICE text from the Work, provided
122 | that such additional attribution notices cannot be construed
123 | as modifying the License.
124 |
125 | You may add Your own copyright statement to Your modifications and
126 | may provide additional or different license terms and conditions
127 | for use, reproduction, or distribution of Your modifications, or
128 | for any such Derivative Works as a whole, provided Your use,
129 | reproduction, and distribution of the Work otherwise complies with
130 | the conditions stated in this License.
131 |
132 | 5. Submission of Contributions. Unless You explicitly state otherwise,
133 | any Contribution intentionally submitted for inclusion in the Work
134 | by You to the Licensor shall be under the terms and conditions of
135 | this License, without any additional terms or conditions.
136 | Notwithstanding the above, nothing herein shall supersede or modify
137 | the terms of any separate license agreement you may have executed
138 | with Licensor regarding such Contributions.
139 |
140 | 6. Trademarks. This License does not grant permission to use the trade
141 | names, trademarks, service marks, or product names of the Licensor,
142 | except as required for reasonable and customary use in describing the
143 | origin of the Work and reproducing the content of the NOTICE file.
144 |
145 | 7. Disclaimer of Warranty. Unless required by applicable law or
146 | agreed to in writing, Licensor provides the Work (and each
147 | Contributor provides its Contributions) on an "AS IS" BASIS,
148 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
149 | implied, including, without limitation, any warranties or conditions
150 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
151 | PARTICULAR PURPOSE. You are solely responsible for determining the
152 | appropriateness of using or redistributing the Work and assume any
153 | risks associated with Your exercise of permissions under this License.
154 |
155 | 8. Limitation of Liability. In no event and under no legal theory,
156 | whether in tort (including negligence), contract, or otherwise,
157 | unless required by applicable law (such as deliberate and grossly
158 | negligent acts) or agreed to in writing, shall any Contributor be
159 | liable to You for damages, including any direct, indirect, special,
160 | incidental, or consequential damages of any character arising as a
161 | result of this License or out of the use or inability to use the
162 | Work (including but not limited to damages for loss of goodwill,
163 | work stoppage, computer failure or malfunction, or any and all
164 | other commercial damages or losses), even if such Contributor
165 | has been advised of the possibility of such damages.
166 |
167 | 9. Accepting Warranty or Additional Liability. While redistributing
168 | the Work or Derivative Works thereof, You may choose to offer,
169 | and charge a fee for, acceptance of support, warranty, indemnity,
170 | or other liability obligations and/or rights consistent with this
171 | License. However, in accepting such obligations, You may act only
172 | on Your own behalf and on Your sole responsibility, not on behalf
173 | of any other Contributor, and only if You agree to indemnify,
174 | defend, and hold each Contributor harmless for any liability
175 | incurred by, or claims asserted against, such Contributor by reason
176 | of your accepting any such warranty or additional liability.
177 |
178 | END OF TERMS AND CONDITIONS
179 |
180 | APPENDIX: How to apply the Apache License to your work.
181 |
182 | To apply the Apache License to your work, attach the following
183 | boilerplate notice, with the fields enclosed by brackets "[]"
184 | replaced with your own identifying information. (Don't include
185 | the brackets!) The text should be enclosed in the appropriate
186 | comment syntax for the file format. We also recommend that a
187 | file or class name and description of purpose be included on the
188 | same "printed page" as the copyright notice for easier
189 | identification within third-party archives.
190 |
191 | Copyright 2015, The TensorFlow Authors.
192 |
193 | Licensed under the Apache License, Version 2.0 (the "License");
194 | you may not use this file except in compliance with the License.
195 | You may obtain a copy of the License at
196 |
197 | http://www.apache.org/licenses/LICENSE-2.0
198 |
199 | Unless required by applicable law or agreed to in writing, software
200 | distributed under the License is distributed on an "AS IS" BASIS,
201 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
202 | See the License for the specific language governing permissions and
203 | limitations under the License.
204 |
--------------------------------------------------------------------------------
/tensorflow-image-recognition/src/main/resources/generatedModels/CNN_inception5h/imagenet_comp_graph_label_strings.txt:
--------------------------------------------------------------------------------
1 | dummy
2 | kit fox
3 | English setter
4 | Siberian husky
5 | Australian terrier
6 | English springer
7 | grey whale
8 | lesser panda
9 | Egyptian cat
10 | ibex
11 | Persian cat
12 | cougar
13 | gazelle
14 | porcupine
15 | sea lion
16 | malamute
17 | badger
18 | Great Dane
19 | Walker hound
20 | Welsh springer spaniel
21 | whippet
22 | Scottish deerhound
23 | killer whale
24 | mink
25 | African elephant
26 | Weimaraner
27 | soft-coated wheaten terrier
28 | Dandie Dinmont
29 | red wolf
30 | Old English sheepdog
31 | jaguar
32 | otterhound
33 | bloodhound
34 | Airedale
35 | hyena
36 | meerkat
37 | giant schnauzer
38 | titi
39 | three-toed sloth
40 | sorrel
41 | black-footed ferret
42 | dalmatian
43 | black-and-tan coonhound
44 | papillon
45 | skunk
46 | Staffordshire bullterrier
47 | Mexican hairless
48 | Bouvier des Flandres
49 | weasel
50 | miniature poodle
51 | Cardigan
52 | malinois
53 | bighorn
54 | fox squirrel
55 | colobus
56 | tiger cat
57 | Lhasa
58 | impala
59 | coyote
60 | Yorkshire terrier
61 | Newfoundland
62 | brown bear
63 | red fox
64 | Norwegian elkhound
65 | Rottweiler
66 | hartebeest
67 | Saluki
68 | grey fox
69 | schipperke
70 | Pekinese
71 | Brabancon griffon
72 | West Highland white terrier
73 | Sealyham terrier
74 | guenon
75 | mongoose
76 | indri
77 | tiger
78 | Irish wolfhound
79 | wild boar
80 | EntleBucher
81 | zebra
82 | ram
83 | French bulldog
84 | orangutan
85 | basenji
86 | leopard
87 | Bernese mountain dog
88 | Maltese dog
89 | Norfolk terrier
90 | toy terrier
91 | vizsla
92 | cairn
93 | squirrel monkey
94 | groenendael
95 | clumber
96 | Siamese cat
97 | chimpanzee
98 | komondor
99 | Afghan hound
100 | Japanese spaniel
101 | proboscis monkey
102 | guinea pig
103 | white wolf
104 | ice bear
105 | gorilla
106 | borzoi
107 | toy poodle
108 | Kerry blue terrier
109 | ox
110 | Scotch terrier
111 | Tibetan mastiff
112 | spider monkey
113 | Doberman
114 | Boston bull
115 | Greater Swiss Mountain dog
116 | Appenzeller
117 | Shih-Tzu
118 | Irish water spaniel
119 | Pomeranian
120 | Bedlington terrier
121 | warthog
122 | Arabian camel
123 | siamang
124 | miniature schnauzer
125 | collie
126 | golden retriever
127 | Irish terrier
128 | affenpinscher
129 | Border collie
130 | hare
131 | boxer
132 | silky terrier
133 | beagle
134 | Leonberg
135 | German short-haired pointer
136 | patas
137 | dhole
138 | baboon
139 | macaque
140 | Chesapeake Bay retriever
141 | bull mastiff
142 | kuvasz
143 | capuchin
144 | pug
145 | curly-coated retriever
146 | Norwich terrier
147 | flat-coated retriever
148 | hog
149 | keeshond
150 | Eskimo dog
151 | Brittany spaniel
152 | standard poodle
153 | Lakeland terrier
154 | snow leopard
155 | Gordon setter
156 | dingo
157 | standard schnauzer
158 | hamster
159 | Tibetan terrier
160 | Arctic fox
161 | wire-haired fox terrier
162 | basset
163 | water buffalo
164 | American black bear
165 | Angora
166 | bison
167 | howler monkey
168 | hippopotamus
169 | chow
170 | giant panda
171 | American Staffordshire terrier
172 | Shetland sheepdog
173 | Great Pyrenees
174 | Chihuahua
175 | tabby
176 | marmoset
177 | Labrador retriever
178 | Saint Bernard
179 | armadillo
180 | Samoyed
181 | bluetick
182 | redbone
183 | polecat
184 | marmot
185 | kelpie
186 | gibbon
187 | llama
188 | miniature pinscher
189 | wood rabbit
190 | Italian greyhound
191 | lion
192 | cocker spaniel
193 | Irish setter
194 | dugong
195 | Indian elephant
196 | beaver
197 | Sussex spaniel
198 | Pembroke
199 | Blenheim spaniel
200 | Madagascar cat
201 | Rhodesian ridgeback
202 | lynx
203 | African hunting dog
204 | langur
205 | Ibizan hound
206 | timber wolf
207 | cheetah
208 | English foxhound
209 | briard
210 | sloth bear
211 | Border terrier
212 | German shepherd
213 | otter
214 | koala
215 | tusker
216 | echidna
217 | wallaby
218 | platypus
219 | wombat
220 | revolver
221 | umbrella
222 | schooner
223 | soccer ball
224 | accordion
225 | ant
226 | starfish
227 | chambered nautilus
228 | grand piano
229 | laptop
230 | strawberry
231 | airliner
232 | warplane
233 | airship
234 | balloon
235 | space shuttle
236 | fireboat
237 | gondola
238 | speedboat
239 | lifeboat
240 | canoe
241 | yawl
242 | catamaran
243 | trimaran
244 | container ship
245 | liner
246 | pirate
247 | aircraft carrier
248 | submarine
249 | wreck
250 | half track
251 | tank
252 | missile
253 | bobsled
254 | dogsled
255 | bicycle-built-for-two
256 | mountain bike
257 | freight car
258 | passenger car
259 | barrow
260 | shopping cart
261 | motor scooter
262 | forklift
263 | electric locomotive
264 | steam locomotive
265 | amphibian
266 | ambulance
267 | beach wagon
268 | cab
269 | convertible
270 | jeep
271 | limousine
272 | minivan
273 | Model T
274 | racer
275 | sports car
276 | go-kart
277 | golfcart
278 | moped
279 | snowplow
280 | fire engine
281 | garbage truck
282 | pickup
283 | tow truck
284 | trailer truck
285 | moving van
286 | police van
287 | recreational vehicle
288 | streetcar
289 | snowmobile
290 | tractor
291 | mobile home
292 | tricycle
293 | unicycle
294 | horse cart
295 | jinrikisha
296 | oxcart
297 | bassinet
298 | cradle
299 | crib
300 | four-poster
301 | bookcase
302 | china cabinet
303 | medicine chest
304 | chiffonier
305 | table lamp
306 | file
307 | park bench
308 | barber chair
309 | throne
310 | folding chair
311 | rocking chair
312 | studio couch
313 | toilet seat
314 | desk
315 | pool table
316 | dining table
317 | entertainment center
318 | wardrobe
319 | Granny Smith
320 | orange
321 | lemon
322 | fig
323 | pineapple
324 | banana
325 | jackfruit
326 | custard apple
327 | pomegranate
328 | acorn
329 | hip
330 | ear
331 | rapeseed
332 | corn
333 | buckeye
334 | organ
335 | upright
336 | chime
337 | drum
338 | gong
339 | maraca
340 | marimba
341 | steel drum
342 | banjo
343 | cello
344 | violin
345 | harp
346 | acoustic guitar
347 | electric guitar
348 | cornet
349 | French horn
350 | trombone
351 | harmonica
352 | ocarina
353 | panpipe
354 | bassoon
355 | oboe
356 | sax
357 | flute
358 | daisy
359 | yellow lady's slipper
360 | cliff
361 | valley
362 | alp
363 | volcano
364 | promontory
365 | sandbar
366 | coral reef
367 | lakeside
368 | seashore
369 | geyser
370 | hatchet
371 | cleaver
372 | letter opener
373 | plane
374 | power drill
375 | lawn mower
376 | hammer
377 | corkscrew
378 | can opener
379 | plunger
380 | screwdriver
381 | shovel
382 | plow
383 | chain saw
384 | cock
385 | hen
386 | ostrich
387 | brambling
388 | goldfinch
389 | house finch
390 | junco
391 | indigo bunting
392 | robin
393 | bulbul
394 | jay
395 | magpie
396 | chickadee
397 | water ouzel
398 | kite
399 | bald eagle
400 | vulture
401 | great grey owl
402 | black grouse
403 | ptarmigan
404 | ruffed grouse
405 | prairie chicken
406 | peacock
407 | quail
408 | partridge
409 | African grey
410 | macaw
411 | sulphur-crested cockatoo
412 | lorikeet
413 | coucal
414 | bee eater
415 | hornbill
416 | hummingbird
417 | jacamar
418 | toucan
419 | drake
420 | red-breasted merganser
421 | goose
422 | black swan
423 | white stork
424 | black stork
425 | spoonbill
426 | flamingo
427 | American egret
428 | little blue heron
429 | bittern
430 | crane
431 | limpkin
432 | American coot
433 | bustard
434 | ruddy turnstone
435 | red-backed sandpiper
436 | redshank
437 | dowitcher
438 | oystercatcher
439 | European gallinule
440 | pelican
441 | king penguin
442 | albatross
443 | great white shark
444 | tiger shark
445 | hammerhead
446 | electric ray
447 | stingray
448 | barracouta
449 | coho
450 | tench
451 | goldfish
452 | eel
453 | rock beauty
454 | anemone fish
455 | lionfish
456 | puffer
457 | sturgeon
458 | gar
459 | loggerhead
460 | leatherback turtle
461 | mud turtle
462 | terrapin
463 | box turtle
464 | banded gecko
465 | common iguana
466 | American chameleon
467 | whiptail
468 | agama
469 | frilled lizard
470 | alligator lizard
471 | Gila monster
472 | green lizard
473 | African chameleon
474 | Komodo dragon
475 | triceratops
476 | African crocodile
477 | American alligator
478 | thunder snake
479 | ringneck snake
480 | hognose snake
481 | green snake
482 | king snake
483 | garter snake
484 | water snake
485 | vine snake
486 | night snake
487 | boa constrictor
488 | rock python
489 | Indian cobra
490 | green mamba
491 | sea snake
492 | horned viper
493 | diamondback
494 | sidewinder
495 | European fire salamander
496 | common newt
497 | eft
498 | spotted salamander
499 | axolotl
500 | bullfrog
501 | tree frog
502 | tailed frog
503 | whistle
504 | wing
505 | paintbrush
506 | hand blower
507 | oxygen mask
508 | snorkel
509 | loudspeaker
510 | microphone
511 | screen
512 | mouse
513 | electric fan
514 | oil filter
515 | strainer
516 | space heater
517 | stove
518 | guillotine
519 | barometer
520 | rule
521 | odometer
522 | scale
523 | analog clock
524 | digital clock
525 | wall clock
526 | hourglass
527 | sundial
528 | parking meter
529 | stopwatch
530 | digital watch
531 | stethoscope
532 | syringe
533 | magnetic compass
534 | binoculars
535 | projector
536 | sunglasses
537 | loupe
538 | radio telescope
539 | bow
540 | cannon [ground]
541 | assault rifle
542 | rifle
543 | projectile
544 | computer keyboard
545 | typewriter keyboard
546 | crane
547 | lighter
548 | abacus
549 | cash machine
550 | slide rule
551 | desktop computer
552 | hand-held computer
553 | notebook
554 | web site
555 | harvester
556 | thresher
557 | printer
558 | slot
559 | vending machine
560 | sewing machine
561 | joystick
562 | switch
563 | hook
564 | car wheel
565 | paddlewheel
566 | pinwheel
567 | potter's wheel
568 | gas pump
569 | carousel
570 | swing
571 | reel
572 | radiator
573 | puck
574 | hard disc
575 | sunglass
576 | pick
577 | car mirror
578 | solar dish
579 | remote control
580 | disk brake
581 | buckle
582 | hair slide
583 | knot
584 | combination lock
585 | padlock
586 | nail
587 | safety pin
588 | screw
589 | muzzle
590 | seat belt
591 | ski
592 | candle
593 | jack-o'-lantern
594 | spotlight
595 | torch
596 | neck brace
597 | pier
598 | tripod
599 | maypole
600 | mousetrap
601 | spider web
602 | trilobite
603 | harvestman
604 | scorpion
605 | black and gold garden spider
606 | barn spider
607 | garden spider
608 | black widow
609 | tarantula
610 | wolf spider
611 | tick
612 | centipede
613 | isopod
614 | Dungeness crab
615 | rock crab
616 | fiddler crab
617 | king crab
618 | American lobster
619 | spiny lobster
620 | crayfish
621 | hermit crab
622 | tiger beetle
623 | ladybug
624 | ground beetle
625 | long-horned beetle
626 | leaf beetle
627 | dung beetle
628 | rhinoceros beetle
629 | weevil
630 | fly
631 | bee
632 | grasshopper
633 | cricket
634 | walking stick
635 | cockroach
636 | mantis
637 | cicada
638 | leafhopper
639 | lacewing
640 | dragonfly
641 | damselfly
642 | admiral
643 | ringlet
644 | monarch
645 | cabbage butterfly
646 | sulphur butterfly
647 | lycaenid
648 | jellyfish
649 | sea anemone
650 | brain coral
651 | flatworm
652 | nematode
653 | conch
654 | snail
655 | slug
656 | sea slug
657 | chiton
658 | sea urchin
659 | sea cucumber
660 | iron
661 | espresso maker
662 | microwave
663 | Dutch oven
664 | rotisserie
665 | toaster
666 | waffle iron
667 | vacuum
668 | dishwasher
669 | refrigerator
670 | washer
671 | Crock Pot
672 | frying pan
673 | wok
674 | caldron
675 | coffeepot
676 | teapot
677 | spatula
678 | altar
679 | triumphal arch
680 | patio
681 | steel arch bridge
682 | suspension bridge
683 | viaduct
684 | barn
685 | greenhouse
686 | palace
687 | monastery
688 | library
689 | apiary
690 | boathouse
691 | church
692 | mosque
693 | stupa
694 | planetarium
695 | restaurant
696 | cinema
697 | home theater
698 | lumbermill
699 | coil
700 | obelisk
701 | totem pole
702 | castle
703 | prison
704 | grocery store
705 | bakery
706 | barbershop
707 | bookshop
708 | butcher shop
709 | confectionery
710 | shoe shop
711 | tobacco shop
712 | toyshop
713 | fountain
714 | cliff dwelling
715 | yurt
716 | dock
717 | brass
718 | megalith
719 | bannister
720 | breakwater
721 | dam
722 | chainlink fence
723 | picket fence
724 | worm fence
725 | stone wall
726 | grille
727 | sliding door
728 | turnstile
729 | mountain tent
730 | scoreboard
731 | honeycomb
732 | plate rack
733 | pedestal
734 | beacon
735 | mashed potato
736 | bell pepper
737 | head cabbage
738 | broccoli
739 | cauliflower
740 | zucchini
741 | spaghetti squash
742 | acorn squash
743 | butternut squash
744 | cucumber
745 | artichoke
746 | cardoon
747 | mushroom
748 | shower curtain
749 | jean
750 | carton
751 | handkerchief
752 | sandal
753 | ashcan
754 | safe
755 | plate
756 | necklace
757 | croquet ball
758 | fur coat
759 | thimble
760 | pajama
761 | running shoe
762 | cocktail shaker
763 | chest
764 | manhole cover
765 | modem
766 | tub
767 | tray
768 | balance beam
769 | bagel
770 | prayer rug
771 | kimono
772 | hot pot
773 | whiskey jug
774 | knee pad
775 | book jacket
776 | spindle
777 | ski mask
778 | beer bottle
779 | crash helmet
780 | bottlecap
781 | tile roof
782 | mask
783 | maillot
784 | Petri dish
785 | football helmet
786 | bathing cap
787 | teddy bear
788 | holster
789 | pop bottle
790 | photocopier
791 | vestment
792 | crossword puzzle
793 | golf ball
794 | trifle
795 | suit
796 | water tower
797 | feather boa
798 | cloak
799 | red wine
800 | drumstick
801 | shield
802 | Christmas stocking
803 | hoopskirt
804 | menu
805 | stage
806 | bonnet
807 | meat loaf
808 | baseball
809 | face powder
810 | scabbard
811 | sunscreen
812 | beer glass
813 | hen-of-the-woods
814 | guacamole
815 | lampshade
816 | wool
817 | hay
818 | bow tie
819 | mailbag
820 | water jug
821 | bucket
822 | dishrag
823 | soup bowl
824 | eggnog
825 | mortar
826 | trench coat
827 | paddle
828 | chain
829 | swab
830 | mixing bowl
831 | potpie
832 | wine bottle
833 | shoji
834 | bulletproof vest
835 | drilling platform
836 | binder
837 | cardigan
838 | sweatshirt
839 | pot
840 | birdhouse
841 | hamper
842 | ping-pong ball
843 | pencil box
844 | pay-phone
845 | consomme
846 | apron
847 | punching bag
848 | backpack
849 | groom
850 | bearskin
851 | pencil sharpener
852 | broom
853 | mosquito net
854 | abaya
855 | mortarboard
856 | poncho
857 | crutch
858 | Polaroid camera
859 | space bar
860 | cup
861 | racket
862 | traffic light
863 | quill
864 | radio
865 | dough
866 | cuirass
867 | military uniform
868 | lipstick
869 | shower cap
870 | monitor
871 | oscilloscope
872 | mitten
873 | brassiere
874 | French loaf
875 | vase
876 | milk can
877 | rugby ball
878 | paper towel
879 | earthstar
880 | envelope
881 | miniskirt
882 | cowboy hat
883 | trolleybus
884 | perfume
885 | bathtub
886 | hotdog
887 | coral fungus
888 | bullet train
889 | pillow
890 | toilet tissue
891 | cassette
892 | carpenter's kit
893 | ladle
894 | stinkhorn
895 | lotion
896 | hair spray
897 | academic gown
898 | dome
899 | crate
900 | wig
901 | burrito
902 | pill bottle
903 | chain mail
904 | theater curtain
905 | window shade
906 | barrel
907 | washbasin
908 | ballpoint
909 | basketball
910 | bath towel
911 | cowboy boot
912 | gown
913 | window screen
914 | agaric
915 | cellular telephone
916 | nipple
917 | barbell
918 | mailbox
919 | lab coat
920 | fire screen
921 | minibus
922 | packet
923 | maze
924 | pole
925 | horizontal bar
926 | sombrero
927 | pickelhaube
928 | rain barrel
929 | wallet
930 | cassette player
931 | comic book
932 | piggy bank
933 | street sign
934 | bell cote
935 | fountain pen
936 | Windsor tie
937 | volleyball
938 | overskirt
939 | sarong
940 | purse
941 | bolo tie
942 | bib
943 | parachute
944 | sleeping bag
945 | television
946 | swimming trunks
947 | measuring cup
948 | espresso
949 | pizza
950 | breastplate
951 | shopping basket
952 | wooden spoon
953 | saltshaker
954 | chocolate sauce
955 | ballplayer
956 | goblet
957 | gyromitra
958 | stretcher
959 | water bottle
960 | dial telephone
961 | soap dispenser
962 | jersey
963 | school bus
964 | jigsaw puzzle
965 | plastic bag
966 | reflex camera
967 | diaper
968 | Band Aid
969 | ice lolly
970 | velvet
971 | tennis ball
972 | gasmask
973 | doormat
974 | Loafer
975 | ice cream
976 | pretzel
977 | quilt
978 | maillot
979 | tape player
980 | clog
981 | iPod
982 | bolete
983 | scuba diver
984 | pitcher
985 | matchstick
986 | bikini
987 | sock
988 | CD player
989 | lens cap
990 | thatch
991 | vault
992 | beaker
993 | bubble
994 | cheeseburger
995 | parallel bars
996 | flagpole
997 | coffee mug
998 | rubber eraser
999 | stole
1000 | carbonara
1001 | dumbbell
--------------------------------------------------------------------------------
/tensorflow-image-recognition/src/main/resources/generatedModels/CNN_inception5h/tensorflow_inception_graph.pb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaiwaehner/kafka-streams-machine-learning-examples/3977e3928d2719fc710bb3d202851ee8904be2e7/tensorflow-image-recognition/src/main/resources/generatedModels/CNN_inception5h/tensorflow_inception_graph.pb
--------------------------------------------------------------------------------
/tensorflow-image-recognition/src/main/resources/generatedModels/TensorFlow_Census/saved_model.pb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaiwaehner/kafka-streams-machine-learning-examples/3977e3928d2719fc710bb3d202851ee8904be2e7/tensorflow-image-recognition/src/main/resources/generatedModels/TensorFlow_Census/saved_model.pb
--------------------------------------------------------------------------------
/tensorflow-image-recognition/src/main/resources/generatedModels/TensorFlow_Census/test.json:
--------------------------------------------------------------------------------
1 | {"age": 25, "workclass": " Private", "education": " 11th", "education_num": 7, "marital_status": " Never-married", "occupation": " Machine-op-inspct", "relationship": " Own-child", "race": " Black", "gender": " Male", "capital_gain": 0, "capital_loss": 0, "hours_per_week": 40, "native_country": " United-States"}
2 |
--------------------------------------------------------------------------------
/tensorflow-image-recognition/src/main/resources/generatedModels/TensorFlow_Census/variables/variables.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaiwaehner/kafka-streams-machine-learning-examples/3977e3928d2719fc710bb3d202851ee8904be2e7/tensorflow-image-recognition/src/main/resources/generatedModels/TensorFlow_Census/variables/variables.data-00000-of-00001
--------------------------------------------------------------------------------
/tensorflow-image-recognition/src/main/resources/generatedModels/TensorFlow_Census/variables/variables.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaiwaehner/kafka-streams-machine-learning-examples/3977e3928d2719fc710bb3d202851ee8904be2e7/tensorflow-image-recognition/src/main/resources/generatedModels/TensorFlow_Census/variables/variables.index
--------------------------------------------------------------------------------
/tensorflow-image-recognition/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | log4j.rootLogger=INFO, stdout
2 |
3 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender
4 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
5 | log4j.appender.stdout.layout.ConversionPattern=%p %m (%c:%L) %n
--------------------------------------------------------------------------------
/tensorflow-image-recognition/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java:
--------------------------------------------------------------------------------
1 | package com.github.jukkakarvanen.kafka.streams.integration.utils;
2 |
3 | import org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster;
4 | import org.slf4j.Logger;
5 | import org.slf4j.LoggerFactory;
6 |
7 | import java.util.Properties;
8 |
9 | /** This is helper class to workaround for Failing stream tests in Windows environment KAFKA-6647.
10 | *
11 | * @author Jukka Karvanen
12 | *
13 | * The causing issue is https://issues.apache.org/jira/browse/KAFKA-6647
14 | * Replacing EmbeddedKafkaCluster with TestEmbeddedKafkaCluster will catch and ignore the exception
15 | * happening during the tear down of the test
16 | * The exception does not have affect to functionality
17 | */
18 |
19 | public class TestEmbeddedKafkaCluster extends EmbeddedKafkaCluster {
20 | private static final Logger log = LoggerFactory.getLogger(TestEmbeddedKafkaCluster.class);
21 |
22 | public TestEmbeddedKafkaCluster(int numBrokers) {
23 | super(numBrokers);
24 | }
25 |
26 | public TestEmbeddedKafkaCluster(int numBrokers, Properties brokerConfig) {
27 | super(numBrokers, brokerConfig);
28 | }
29 |
30 | public TestEmbeddedKafkaCluster(int numBrokers, Properties brokerConfig, long mockTimeMillisStart) {
31 | super(numBrokers, brokerConfig, mockTimeMillisStart);
32 | }
33 |
34 | public TestEmbeddedKafkaCluster(int numBrokers, Properties brokerConfig, long mockTimeMillisStart, long mockTimeNanoStart) {
35 | super(numBrokers, brokerConfig, mockTimeMillisStart, mockTimeNanoStart);
36 | }
37 |
38 | public void after() {
39 | try {
40 | super.after();
41 | } catch (RuntimeException e) {
42 | log.warn("Ignoring exception, test failing in Windows due this exception {}", e);
43 | }
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/tensorflow-image-recognition/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java:
--------------------------------------------------------------------------------
1 | package com.github.jukkakarvanen.kafka.streams.integration.utils;
2 |
3 | import org.apache.kafka.common.utils.Time;
4 | import org.apache.kafka.streams.KafkaClientSupplier;
5 | import org.apache.kafka.streams.KafkaStreams;
6 | import org.apache.kafka.streams.Topology;
7 | import org.slf4j.Logger;
8 | import org.slf4j.LoggerFactory;
9 |
10 | import java.util.Properties;
11 |
12 | /** This is helper class to workaround for Failing stream tests in Windows environment KAFKA-6647.
13 | *
14 | * @author Jukka Karvanen
15 | *
16 | * The causing issue is https://issues.apache.org/jira/browse/KAFKA-6647
17 | * Replacing KafkaStreams with TestKafkaStreams will catch and ignore the exception caused by cleanUp
18 | * The exception does not have affect to functionality
19 | */
20 |
21 | public class TestKafkaStreams extends KafkaStreams {
22 | private static final Logger log = LoggerFactory.getLogger(TestKafkaStreams.class);
23 |
24 | public TestKafkaStreams(Topology topology, Properties props) {
25 | super(topology, props);
26 | }
27 |
28 | public TestKafkaStreams(Topology topology, Properties props, KafkaClientSupplier clientSupplier) {
29 | super(topology, props, clientSupplier);
30 | }
31 |
32 | public TestKafkaStreams(Topology topology, Properties props, Time time) {
33 | super(topology, props, time);
34 | }
35 |
36 | public TestKafkaStreams(Topology topology, Properties props, KafkaClientSupplier clientSupplier, Time time) {
37 | super(topology, props, clientSupplier, time);
38 | }
39 |
40 | public void cleanUp() {
41 | try {
42 | super.cleanUp();
43 | } catch (RuntimeException e) {
44 | log.warn("Ignoring exception, test failing in Windows due this exception {}", e);
45 | }
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/tensorflow-image-recognition/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_TensorFlow_Image_Recognition_ExampleTest.java:
--------------------------------------------------------------------------------
1 | package com.github.megachucky.kafka.streams.machinelearning;
2 |
3 | import org.apache.kafka.clients.producer.ProducerRecord;
4 | import org.apache.kafka.common.serialization.StringDeserializer;
5 | import org.apache.kafka.common.serialization.StringSerializer;
6 | import org.apache.kafka.streams.KeyValue;
7 | import org.apache.kafka.streams.TopologyTestDriver;
8 | import org.apache.kafka.streams.test.ConsumerRecordFactory;
9 | import org.junit.After;
10 | import org.junit.Before;
11 | import org.junit.Test;
12 |
13 | import java.io.IOException;
14 | import java.util.Arrays;
15 | import java.util.List;
16 | import java.util.stream.Collectors;
17 |
18 | import static org.assertj.core.api.AssertionsForInterfaceTypes.assertThat;
19 |
20 | /**
21 | * TopologyTestDriver based test about stream processing of Kafka_Streams_TensorFlow_Image_Recognition_Example.
22 | *
23 | * @author Jukka Karvanen / jukinimi.com
24 | * * Unit Test of
25 | * {@link Kafka_Streams_TensorFlow_Image_Recognition_Example}, using an
26 | * TopologyTestDriver and a TensorFlow CNN model.
27 | *
28 | */
29 |
30 | public class Kafka_Streams_TensorFlow_Image_Recognition_ExampleTest {
31 | private TopologyTestDriver testDriver;
32 |
33 | private StringDeserializer stringDeserializer = new StringDeserializer();
34 | private ConsumerRecordFactory recordFactory = new ConsumerRecordFactory<>(new StringSerializer(), new StringSerializer());
35 |
36 | @Before
37 | public void setup() throws IOException {
38 | testDriver = new TopologyTestDriver(Kafka_Streams_TensorFlow_Image_Recognition_Example.getStreamTopology(), Kafka_Streams_TensorFlow_Image_Recognition_Example.getStreamConfiguration("localhost:9092"));
39 | }
40 |
41 | @After
42 | public void tearDown() {
43 | try {
44 | testDriver.close();
45 | } catch (RuntimeException e) {
46 | // https://issues.apache.org/jira/browse/KAFKA-6647 causes exception when executed in Windows, ignoring it
47 | // Logged stacktrace cannot be avoided
48 | System.out.println("Ignoring exception, test failing in Windows due this exception:" + e.getLocalizedMessage());
49 | }
50 | }
51 |
52 | private String getOutput() {
53 | ProducerRecord output = testDriver.readOutput(Kafka_Streams_TensorFlow_Image_Recognition_Example.imageOutputTopic, stringDeserializer, stringDeserializer);
54 | assertThat(output).isNotNull();
55 | return output.value();
56 | }
57 |
58 | /** Simple recognition test validating only the recognition part of the output
59 | */
60 | @Test
61 | public void testOne() {
62 | testDriver.pipeInput(recordFactory.create(Kafka_Streams_TensorFlow_Image_Recognition_Example.imageInputTopic, null, "src/main/resources/TensorFlow_Images/new_airplane.jpg", 1L));
63 | assertThat(getOutput()).contains("What is the content of this picture? => airliner");
64 | }
65 |
66 | /** Test based on Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest
67 | *
68 | */
69 | @Test
70 | public void testList() {
71 | // Images: 'unknown', Airliner, 'unknown', Butterfly
72 | List inputValues = Arrays.asList("src/main/resources/TensorFlow_Images/trained_airplane_2.jpg",
73 | "src/main/resources/TensorFlow_Images/devil.png",
74 | "src/main/resources/TensorFlow_Images/trained_butterfly.jpg");
75 | List> records = inputValues.stream().map(v -> new KeyValue(null, v)).collect(Collectors.toList());
76 |
77 |
78 | testDriver.pipeInput(recordFactory.create(Kafka_Streams_TensorFlow_Image_Recognition_Example.imageInputTopic, records, 1L, 100L));
79 | assertThat(getOutput()).contains("What is the content of this picture? => airliner");
80 | assertThat(getOutput()).doesNotContain("What is the content of this picture? => airliner");
81 | assertThat(getOutput()).contains("What is the content of this picture? => cabbage butterfly");
82 | }
83 |
84 |
85 | }
86 |
--------------------------------------------------------------------------------
/tensorflow-image-recognition/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest.java:
--------------------------------------------------------------------------------
1 | package com.github.megachucky.kafka.streams.machinelearning.test;
2 |
3 | import static org.assertj.core.api.Assertions.assertThat;
4 |
5 | import java.io.IOException;
6 | import java.nio.charset.Charset;
7 | import java.nio.file.Files;
8 | import java.nio.file.Path;
9 | import java.nio.file.Paths;
10 | import java.util.Arrays;
11 | import java.util.List;
12 | import java.util.Properties;
13 |
14 | import com.github.jukkakarvanen.kafka.streams.integration.utils.TestEmbeddedKafkaCluster;
15 | import com.github.jukkakarvanen.kafka.streams.integration.utils.TestKafkaStreams;
16 | import org.apache.kafka.clients.consumer.ConsumerConfig;
17 | import org.apache.kafka.clients.producer.ProducerConfig;
18 | import org.apache.kafka.common.serialization.Serdes;
19 | import org.apache.kafka.common.serialization.StringDeserializer;
20 | import org.apache.kafka.common.serialization.StringSerializer;
21 | import org.apache.kafka.common.utils.MockTime;
22 | import org.apache.kafka.streams.KafkaStreams;
23 | import org.apache.kafka.streams.KeyValue;
24 | import org.apache.kafka.streams.StreamsBuilder;
25 | import org.apache.kafka.streams.StreamsConfig;
26 | import org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster;
27 | import org.apache.kafka.streams.integration.utils.IntegrationTestUtils;
28 | import org.apache.kafka.streams.kstream.KStream;
29 | import org.junit.BeforeClass;
30 | import org.junit.ClassRule;
31 | import org.junit.Test;
32 | import org.tensorflow.DataType;
33 | import org.tensorflow.Graph;
34 | import org.tensorflow.Output;
35 | import org.tensorflow.Session;
36 | import org.tensorflow.Tensor;
37 |
38 | import com.github.megachucky.kafka.streams.machinelearning.Kafka_Streams_TensorFlow_Image_Recognition_Example;
39 |
40 | /**
41 | *
42 | * @author Kai Waehner (www.kai-waehner.de)
43 | *
44 | * End-to-end integration test based on
45 | * {@link Kafka_Streams_TensorFlow_Image_Recognition_Example}, using an
46 | * embedded Kafka cluster and a TensorFlow CNN model.
47 | *
48 | * *
49 | */
50 | public class Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest {
51 |
52 | @ClassRule
53 | public static final EmbeddedKafkaCluster CLUSTER = new TestEmbeddedKafkaCluster(1);
54 |
55 | private static final String inputTopic = "ImageInputTopic";
56 | private static final String outputTopic = "ImageOutputTopic";
57 |
58 | // Prediction Value
59 | private static String imageClassification = "unknown";
60 |
61 | @BeforeClass
62 | public static void startKafkaCluster() throws Exception {
63 | CLUSTER.createTopic(inputTopic);
64 | CLUSTER.createTopic(outputTopic);
65 | }
66 |
67 | @Test
68 | public void shouldRecognizeImages() throws Exception {
69 |
70 | // Images: 'unknown', Airliner, 'unknown', Butterfly
71 | List inputValues = Arrays.asList("src/main/resources/TensorFlow_Images/trained_airplane_2.jpg",
72 | "src/main/resources/TensorFlow_Images/devil.png",
73 | "src/main/resources/TensorFlow_Images/trained_butterfly.jpg");
74 |
75 | // ########################################################
76 | // Step 1: Configure and start the processor topology.
77 | // ########################################################
78 |
79 | Properties streamsConfiguration = new Properties();
80 | streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG,
81 | "kafka-streams-tensorflow-image-recognition-integration-test");
82 | streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
83 |
84 | // Create TensorFlow object
85 | String modelDir = "src/main/resources/generatedModels/CNN_inception5h";
86 |
87 | Path pathGraph = Paths.get(modelDir, "tensorflow_inception_graph.pb");
88 | byte[] graphDef = Files.readAllBytes(pathGraph);
89 |
90 | Path pathModel = Paths.get(modelDir, "imagenet_comp_graph_label_strings.txt");
91 | List labels = Files.readAllLines(pathModel, Charset.forName("UTF-8"));
92 |
93 | // Configure Kafka Streams Application
94 | // Specify default (de)serializers for record keys and for record
95 | // values.
96 | streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
97 | streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
98 |
99 | // In the subsequent lines we define the processing topology of the
100 | // Streams application.
101 | final StreamsBuilder builder = new StreamsBuilder();
102 |
103 | // Construct a `KStream` from the input topic "AirlineInputTopic", where
104 | // message values
105 | // represent lines of text (for the sake of this example, we ignore
106 | // whatever may be stored
107 | // in the message keys).
108 | final KStream imageInputLines = builder.stream(inputTopic);
109 |
110 | // Stream Processor (in this case 'foreach' to add custom logic, i.e.
111 | // apply the analytic model)
112 | imageInputLines.foreach((key, value) -> {
113 |
114 | imageClassification = "unknown";
115 |
116 | String imageFile = value;
117 |
118 | Path pathImage = Paths.get(imageFile);
119 | byte[] imageBytes;
120 | try {
121 | imageBytes = Files.readAllBytes(pathImage);
122 |
123 | // Load and execute TensorFlow graph
124 | try (Tensor image = constructAndExecuteGraphToNormalizeImage(imageBytes)) {
125 | float[] labelProbabilities = executeInceptionGraph(graphDef, image);
126 | int bestLabelIdx = maxIndex(labelProbabilities);
127 |
128 | imageClassification = labels.get(bestLabelIdx);
129 |
130 | System.out.println(String.format("BEST MATCH: %s (%.2f%% likely)", imageClassification,
131 | labelProbabilities[bestLabelIdx] * 100f));
132 | }
133 |
134 | } catch (IOException e) {
135 | e.printStackTrace();
136 | }
137 |
138 | });
139 |
140 | // Transform message: Add prediction information
141 | KStream transformedMessage = imageInputLines
142 | .mapValues(value -> "Image Recognition: What is content of the picture? => " + imageClassification);
143 |
144 | // Send prediction information to Output Topic
145 | transformedMessage.to(outputTopic);
146 |
147 | // Start Kafka Streams Application to process new incoming messages from
148 | // Input Topic
149 | final KafkaStreams streams = new TestKafkaStreams(builder.build(), streamsConfiguration);
150 | streams.cleanUp();
151 | streams.start();
152 | System.out.println("Image Recognition Microservice is running...");
153 | System.out.println("Input to Kafka Topic " + inputTopic + "; Output to Kafka Topic " + outputTopic);
154 |
155 | // ########################################################
156 | // Step 2: Produce some input data to the input topic.
157 | // ########################################################
158 |
159 | Properties producerConfig = new Properties();
160 | producerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
161 | producerConfig.put(ProducerConfig.ACKS_CONFIG, "all");
162 | producerConfig.put(ProducerConfig.RETRIES_CONFIG, 0);
163 | producerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
164 | producerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
165 | IntegrationTestUtils.produceValuesSynchronously(inputTopic, inputValues, producerConfig, new MockTime());
166 |
167 | // ########################################################
168 | // Step 3: Verify the application's output data.
169 | // ########################################################
170 |
171 | Properties consumerConfig = new Properties();
172 | consumerConfig.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
173 | consumerConfig.put(ConsumerConfig.GROUP_ID_CONFIG,
174 | "kafka-streams-tensorflow-image-recognition-integration-test-standard-consumer");
175 | consumerConfig.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
176 | consumerConfig.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
177 | consumerConfig.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
178 | List> response = IntegrationTestUtils
179 | .waitUntilMinKeyValueRecordsReceived(consumerConfig, outputTopic, 3);
180 | streams.close();
181 | assertThat(response).isNotNull();
182 | assertThat(response.get(0).value).isEqualTo("Image Recognition: What is content of the picture? => airliner");
183 | assertThat(response.get(1).value)
184 | .isNotEqualTo("Image Recognition: What is content of the picture? => airliner");
185 | assertThat(response.get(2).value)
186 | .isEqualTo("Image Recognition: What is content of the picture? => cabbage butterfly");
187 |
188 | }
189 |
190 | // ########################################################################################
191 | // Private helper class for construction and execution of the pre-built
192 | // TensorFlow model
193 | // ########################################################################################
194 |
195 | private static Tensor constructAndExecuteGraphToNormalizeImage(byte[] imageBytes) {
196 | try (Graph g = new Graph()) {
197 | GraphBuilder b = new GraphBuilder(g);
198 | // Some constants specific to the pre-trained model at:
199 | // https://storage.googleapis.com/download.tensorflow.org/models/inception5h.zip
200 | //
201 | // - The model was trained with images scaled to 224x224 pixels.
202 | // - The colors, represented as R, G, B in 1-byte each were
203 | // converted to
204 | // float using (value - Mean)/Scale.
205 | final int H = 224;
206 | final int W = 224;
207 | final float mean = 117f;
208 | final float scale = 1f;
209 |
210 | // Since the graph is being constructed once per execution here, we
211 | // can use a constant for the
212 | // input image. If the graph were to be re-used for multiple input
213 | // images, a placeholder would
214 | // have been more appropriate.
215 | final Output input = b.constant("input", imageBytes);
216 | final Output output = b
217 | .div(b.sub(
218 | b.resizeBilinear(b.expandDims(b.cast(b.decodeJpeg(input, 3), DataType.FLOAT),
219 | b.constant("make_batch", 0)), b.constant("size", new int[] { H, W })),
220 | b.constant("mean", mean)), b.constant("scale", scale));
221 | try (Session s = new Session(g)) {
222 | return s.runner().fetch(output.op().name()).run().get(0);
223 | }
224 | }
225 | }
226 |
227 | private static float[] executeInceptionGraph(byte[] graphDef, Tensor image) {
228 | try (Graph g = new Graph()) {
229 | g.importGraphDef(graphDef);
230 | try (Session s = new Session(g);
231 | Tensor result = s.runner().feed("input", image).fetch("output").run().get(0)) {
232 | final long[] rshape = result.shape();
233 | if (result.numDimensions() != 2 || rshape[0] != 1) {
234 | throw new RuntimeException(String.format(
235 | "Expected model to produce a [1 N] shaped tensor where N is the number of labels, instead it produced one with shape %s",
236 | Arrays.toString(rshape)));
237 | }
238 | int nlabels = (int) rshape[1];
239 | return result.copyTo(new float[1][nlabels])[0];
240 | }
241 | }
242 | }
243 |
244 | private static int maxIndex(float[] probabilities) {
245 | int best = 0;
246 | for (int i = 1; i < probabilities.length; ++i) {
247 | if (probabilities[i] > probabilities[best]) {
248 | best = i;
249 | }
250 | }
251 | return best;
252 | }
253 |
254 | // In the fullness of time, equivalents of the methods of this class should
255 | // be auto-generated from
256 | // the OpDefs linked into libtensorflow_jni.so. That would match what is
257 | // done in other languages
258 | // like Python, C++ and Go.
259 | static class GraphBuilder {
260 | GraphBuilder(Graph g) {
261 | this.g = g;
262 | }
263 |
264 | Output div(Output x, Output y) {
265 | return binaryOp("Div", x, y);
266 | }
267 |
268 | Output sub(Output x, Output y) {
269 | return binaryOp("Sub", x, y);
270 | }
271 |
272 | Output resizeBilinear(Output images, Output size) {
273 | return binaryOp("ResizeBilinear", images, size);
274 | }
275 |
276 | Output expandDims(Output input, Output dim) {
277 | return binaryOp("ExpandDims", input, dim);
278 | }
279 |
280 | Output cast(Output value, DataType dtype) {
281 | return g.opBuilder("Cast", "Cast").addInput(value).setAttr("DstT", dtype).build().output(0);
282 | }
283 |
284 | Output decodeJpeg(Output contents, long channels) {
285 | return g.opBuilder("DecodeJpeg", "DecodeJpeg").addInput(contents).setAttr("channels", channels).build()
286 | .output(0);
287 | }
288 |
289 | Output constant(String name, Object value) {
290 | try (Tensor t = Tensor.create(value)) {
291 | return g.opBuilder("Const", name).setAttr("dtype", t.dataType()).setAttr("value", t).build().output(0);
292 | }
293 | }
294 |
295 | private Output binaryOp(String type, Output in1, Output in2) {
296 | return g.opBuilder(type, type).addInput(in1).addInput(in2).build().output(0);
297 | }
298 |
299 | private Graph g;
300 | }
301 |
302 | }
303 |
--------------------------------------------------------------------------------
/tensorflow-keras/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
4 | 4.0.0
5 |
6 | com.github.kaiwaehner.kafka.streams.machinelearning
7 | tensorflow-keras
8 | CP55_AK25
9 |
10 |
11 |
12 | confluent
13 | http://packages.confluent.io/maven/
14 |
15 |
16 |
17 |
18 | 1.8
19 | 2.5.0
20 | 2.12
21 | ${kafka.scala.version}.8
22 | 5.5.0
23 | UTF-8
24 |
25 |
26 |
27 |
28 |
31 |
32 |
33 | org.apache.kafka
34 | kafka-streams
35 | ${kafka.version}
36 |
37 |
38 |
42 |
43 |
45 |
46 |
47 | org.nd4j
48 | nd4j-native-platform
49 | 1.0.0-beta3
50 |
51 |
52 |
53 |
54 |
55 | org.deeplearning4j
56 | deeplearning4j-core
57 | 1.0.0-beta3
58 |
59 |
60 |
61 | org.deeplearning4j
62 | deeplearning4j-modelimport
63 | 1.0.0-beta3
64 |
65 |
66 |
67 |
68 |
69 | org.tensorflow
70 | tensorflow
71 | 1.3.0
72 |
73 |
74 |
75 |
76 |
77 | junit
78 | junit
79 | 4.12
80 | test
81 |
82 |
83 | org.assertj
84 | assertj-core
85 | 3.3.0
86 | test
87 |
88 |
89 | org.apache.kafka
90 | kafka_${kafka.scala.version}
91 | ${kafka.version}
92 | test
93 | test
94 |
95 |
96 | org.apache.kafka
97 | kafka-clients
98 | ${kafka.version}
99 | test
100 | test
101 |
102 |
103 | org.apache.kafka
104 | kafka-streams
105 | ${kafka.version}
106 | test
107 | test
108 |
109 |
110 | org.apache.curator
111 | curator-test
112 | 2.9.0
113 | test
114 |
115 |
116 | io.confluent
117 | kafka-schema-registry
118 | ${confluent.version}
119 | test
120 |
121 |
122 | io.confluent
123 | kafka-schema-registry
124 | ${confluent.version}
125 |
126 | tests
127 | test
128 |
129 |
130 | org.hamcrest
131 | hamcrest
132 | 2.1
133 | test
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 | org.apache.maven.plugins
142 | maven-compiler-plugin
143 | 3.6.1
144 |
145 | 1.8
146 | 1.8
147 |
148 |
149 |
150 |
151 |
178 |
179 |
180 |
181 |
--------------------------------------------------------------------------------
/tensorflow-keras/readme.md:
--------------------------------------------------------------------------------
1 | # Machine Learning + Kafka Streams Examples
2 |
3 | General info in main [Readme](../readme.md)
4 |
5 | ### Example 4 - Python + Keras + TensorFlow + DeepLearning4j
6 |
7 | **Use Case**
8 |
9 | Development of an analytic model trained with Python, Keras and TensorFlow and deployment to Java and Kafka ecosystem. No business case, just a technical demonstration of a simple 'Hello World' Keras model. Feel free to replace the model with any other Keras model trained with your backend of choice. You just need to replace the model binary (and use a model which is compatible with DeepLearning4J 's model importer).
10 |
11 | **Machine Learning Technology**
12 | * [Python](https://www.python.org/)
13 | * [DeepLearning4J](https://deeplearning4j.org)
14 | * [Keras](https://keras.io/) - a high-level neural networks API, written in Python and capable of running on top of TensorFlow, CNTK, or Theano.
15 | * [TensorFlow](https://www.tensorflow.org/) - used as backend under the hood of Keras
16 | * DeepLearning4J 's [KerasModelImport feature](https://deeplearning4j.org/docs/latest/keras-import-overview) is used for importing the Keras / TensorFlow model into Java. The used model is its 'Hello World' model example.
17 | * The Keras model was trained with this [Python script](src/main/resources/generatedModels/Keras/keras-model-script.py).
18 |
19 | **Unit Test**
20 |
21 | [Kafka_Streams_TensorFlow_Keras_Example_IntegrationTest.java](src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Keras_Example_IntegrationTest.java)
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
--------------------------------------------------------------------------------
/tensorflow-keras/src/main/resources/generatedModels/Keras/keras-model-script.py:
--------------------------------------------------------------------------------
1 | # Simple Keras Model (source: https://deeplearning4j.org/docs/latest/keras-import-overview)
2 |
3 | from keras.models import Sequential
4 | from keras.layers import Dense
5 |
6 | model = Sequential()
7 | model.add(Dense(units=64, activation='relu', input_dim=100))
8 | model.add(Dense(units=10, activation='softmax'))
9 | model.compile(loss='categorical_crossentropy',optimizer='sgd', metrics=['accuracy'])
10 |
11 | model.save('simple_mlp.h5')
--------------------------------------------------------------------------------
/tensorflow-keras/src/main/resources/generatedModels/Keras/simple_mlp.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaiwaehner/kafka-streams-machine-learning-examples/3977e3928d2719fc710bb3d202851ee8904be2e7/tensorflow-keras/src/main/resources/generatedModels/Keras/simple_mlp.h5
--------------------------------------------------------------------------------
/tensorflow-keras/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | log4j.rootLogger=INFO, stdout
2 |
3 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender
4 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
5 | log4j.appender.stdout.layout.ConversionPattern=%p %m (%c:%L) %n
--------------------------------------------------------------------------------
/tensorflow-keras/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java:
--------------------------------------------------------------------------------
1 | package com.github.jukkakarvanen.kafka.streams.integration.utils;
2 |
3 | import org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster;
4 | import org.slf4j.Logger;
5 | import org.slf4j.LoggerFactory;
6 |
7 | import java.util.Properties;
8 |
9 | /** This is helper class to workaround for Failing stream tests in Windows environment KAFKA-6647.
10 | *
11 | * @author Jukka Karvanen
12 | *
13 | * The causing issue is https://issues.apache.org/jira/browse/KAFKA-6647
14 | * Replacing EmbeddedKafkaCluster with TestEmbeddedKafkaCluster will catch and ignore the exception
15 | * happening during the tear down of the test
16 | * The exception does not have affect to functionality
17 | */
18 |
19 | public class TestEmbeddedKafkaCluster extends EmbeddedKafkaCluster {
20 | private static final Logger log = LoggerFactory.getLogger(TestEmbeddedKafkaCluster.class);
21 |
22 | public TestEmbeddedKafkaCluster(int numBrokers) {
23 | super(numBrokers);
24 | }
25 |
26 | public TestEmbeddedKafkaCluster(int numBrokers, Properties brokerConfig) {
27 | super(numBrokers, brokerConfig);
28 | }
29 |
30 | public TestEmbeddedKafkaCluster(int numBrokers, Properties brokerConfig, long mockTimeMillisStart) {
31 | super(numBrokers, brokerConfig, mockTimeMillisStart);
32 | }
33 |
34 | public TestEmbeddedKafkaCluster(int numBrokers, Properties brokerConfig, long mockTimeMillisStart, long mockTimeNanoStart) {
35 | super(numBrokers, brokerConfig, mockTimeMillisStart, mockTimeNanoStart);
36 | }
37 |
38 | public void after() {
39 | try {
40 | super.after();
41 | } catch (RuntimeException e) {
42 | log.warn("Ignoring exception, test failing in Windows due this exception {}", e);
43 | }
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/tensorflow-keras/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java:
--------------------------------------------------------------------------------
1 | package com.github.jukkakarvanen.kafka.streams.integration.utils;
2 |
3 | import org.apache.kafka.common.utils.Time;
4 | import org.apache.kafka.streams.KafkaClientSupplier;
5 | import org.apache.kafka.streams.KafkaStreams;
6 | import org.apache.kafka.streams.Topology;
7 | import org.slf4j.Logger;
8 | import org.slf4j.LoggerFactory;
9 |
10 | import java.util.Properties;
11 |
12 | /** This is helper class to workaround for Failing stream tests in Windows environment KAFKA-6647.
13 | *
14 | * @author Jukka Karvanen
15 | *
16 | * The causing issue is https://issues.apache.org/jira/browse/KAFKA-6647
17 | * Replacing KafkaStreams with TestKafkaStreams will catch and ignore the exception caused by cleanUp
18 | * The exception does not have affect to functionality
19 | */
20 |
21 | public class TestKafkaStreams extends KafkaStreams {
22 | private static final Logger log = LoggerFactory.getLogger(TestKafkaStreams.class);
23 |
24 | public TestKafkaStreams(Topology topology, Properties props) {
25 | super(topology, props);
26 | }
27 |
28 | public TestKafkaStreams(Topology topology, Properties props, KafkaClientSupplier clientSupplier) {
29 | super(topology, props, clientSupplier);
30 | }
31 |
32 | public TestKafkaStreams(Topology topology, Properties props, Time time) {
33 | super(topology, props, time);
34 | }
35 |
36 | public TestKafkaStreams(Topology topology, Properties props, KafkaClientSupplier clientSupplier, Time time) {
37 | super(topology, props, clientSupplier, time);
38 | }
39 |
40 | public void cleanUp() {
41 | try {
42 | super.cleanUp();
43 | } catch (RuntimeException e) {
44 | log.warn("Ignoring exception, test failing in Windows due this exception {}", e);
45 | }
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/tensorflow-keras/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Keras_Example_IntegrationTest.java:
--------------------------------------------------------------------------------
1 | package com.github.megachucky.kafka.streams.machinelearning.test;
2 |
3 | import static org.assertj.core.api.Assertions.assertThat;
4 |
5 | import java.util.Arrays;
6 | import java.util.List;
7 | import java.util.Properties;
8 |
9 | import com.github.jukkakarvanen.kafka.streams.integration.utils.TestEmbeddedKafkaCluster;
10 | import com.github.jukkakarvanen.kafka.streams.integration.utils.TestKafkaStreams;
11 | import org.apache.kafka.clients.consumer.ConsumerConfig;
12 | import org.apache.kafka.clients.producer.ProducerConfig;
13 | import org.apache.kafka.common.serialization.Serdes;
14 | import org.apache.kafka.common.serialization.StringDeserializer;
15 | import org.apache.kafka.common.serialization.StringSerializer;
16 | import org.apache.kafka.common.utils.MockTime;
17 | import org.apache.kafka.streams.KafkaStreams;
18 | import org.apache.kafka.streams.KeyValue;
19 | import org.apache.kafka.streams.StreamsBuilder;
20 | import org.apache.kafka.streams.StreamsConfig;
21 | import org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster;
22 | import org.apache.kafka.streams.integration.utils.IntegrationTestUtils;
23 | import org.apache.kafka.streams.kstream.KStream;
24 | import org.deeplearning4j.nn.modelimport.keras.KerasModelImport;
25 | import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
26 | import org.junit.BeforeClass;
27 | import org.junit.ClassRule;
28 | import org.junit.Test;
29 | import org.nd4j.linalg.api.ndarray.INDArray;
30 | import org.nd4j.linalg.factory.Nd4j;
31 | import org.nd4j.linalg.io.ClassPathResource;
32 |
33 | /**
34 | *
35 | * @author Kai Waehner -
36 | * http://www.kai-waehner.de
37 | *
38 | * End-to-end integration test based on
39 | * {@link Kafka_Streams_TensorFlow_Keras_Example}, using an embedded
40 | * Kafka cluster and a Keras model (trained with TensorFlow backend).
41 | * Imported via DL4J Keras Java API support.
42 | *
43 | * *
44 | */
45 | public class Kafka_Streams_TensorFlow_Keras_Example_IntegrationTest {
46 |
47 | @ClassRule
48 | public static final EmbeddedKafkaCluster CLUSTER = new TestEmbeddedKafkaCluster(1);
49 |
50 | private static final String inputTopic = "InputTopic";
51 | private static final String outputTopic = "OutputTopic";
52 |
53 | private static String prediction = "unknown";
54 |
55 | private static INDArray output = null;
56 |
57 | @BeforeClass
58 | public static void startKafkaCluster() throws Exception {
59 | CLUSTER.createTopic(inputTopic);
60 | CLUSTER.createTopic(outputTopic);
61 | }
62 |
63 | @Test
64 | public void shouldPredictValues() throws Exception {
65 |
66 | // ########################################################
67 | // Step 1: Load Keras Model using DeepLearning4J API
68 | // ########################################################
69 | String simpleMlp = new ClassPathResource("generatedModels/Keras/simple_mlp.h5").getFile().getPath();
70 | System.out.println(simpleMlp.toString());
71 |
72 | MultiLayerNetwork model = KerasModelImport.importKerasSequentialModelAndWeights(simpleMlp);
73 |
74 | // Create test data which is sent from Kafka Producer into Input Topic
75 | List inputValues = Arrays.asList("256,100");
76 |
77 | // ####################################################################
78 | // Step 2: Configure and start the Kafka Streams processor topology.
79 | // ####################################################################
80 |
81 | Properties streamsConfiguration = new Properties();
82 | streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG,
83 | "kafka-streams-tensorflow-keras-integration-test");
84 | streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
85 |
86 | // Configure Kafka Streams Application
87 | // Specify default (de)serializers for record keys and for record
88 | // values.
89 | streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
90 | streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
91 |
92 | // In the subsequent lines we define the processing topology of the
93 | // Streams application.
94 | final StreamsBuilder builder = new StreamsBuilder();
95 |
96 | // Construct a `KStream` from the input topic, where
97 | // message values represent lines of text (for the sake of this example, we
98 | // ignore whatever may be stored in the message keys).
99 | final KStream inputEvents = builder.stream(inputTopic);
100 |
101 | // ###############################################################
102 | // THIS IS WHERE WE DO REAL TIME MODEL INFERENCE FOR EACH EVENT
103 | // ###############################################################
104 | inputEvents.foreach((key, value) -> {
105 |
106 | // Transform input values (list of Strings) to expected DL4J parameters (two
107 | // Integer values):
108 | String[] valuesAsArray = value.split(",");
109 | INDArray input = Nd4j.create(Integer.parseInt(valuesAsArray[0]), Integer.parseInt(valuesAsArray[1]));
110 |
111 | // Apply the analytic model:
112 | output = model.output(input);
113 | prediction = output.toString();
114 |
115 | });
116 |
117 | // Transform message: Add prediction result
118 | KStream transformedMessage = inputEvents.mapValues(value -> "Prediction => " + prediction);
119 |
120 | // Send prediction result to Output Topic
121 | transformedMessage.to(outputTopic);
122 |
123 | // Start Kafka Streams Application to process new incoming messages from
124 | // Input Topic
125 | final KafkaStreams streams = new TestKafkaStreams(builder.build(), streamsConfiguration);
126 | streams.cleanUp();
127 | streams.start();
128 | System.out.println("Prediction Microservice is running...");
129 | System.out.println("Input to Kafka Topic " + inputTopic + "; Output to Kafka Topic " + outputTopic);
130 |
131 | // ########################################################
132 | // Step 3: Produce some input data to the input topic.
133 | // ########################################################
134 |
135 | Properties producerConfig = new Properties();
136 | producerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
137 | producerConfig.put(ProducerConfig.ACKS_CONFIG, "all");
138 | producerConfig.put(ProducerConfig.RETRIES_CONFIG, 0);
139 | producerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
140 | producerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
141 | IntegrationTestUtils.produceValuesSynchronously(inputTopic, inputValues, producerConfig, new MockTime());
142 |
143 | // ########################################################
144 | // Step 4: Verify the application's output data.
145 | // ########################################################
146 |
147 | Properties consumerConfig = new Properties();
148 | consumerConfig.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
149 | consumerConfig.put(ConsumerConfig.GROUP_ID_CONFIG,
150 | "kafka-streams-tensorflow-keras-integration-test-standard-consumer");
151 | consumerConfig.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
152 | consumerConfig.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
153 | consumerConfig.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
154 | List> response = IntegrationTestUtils
155 | .waitUntilMinKeyValueRecordsReceived(consumerConfig, outputTopic, 1);
156 | streams.close();
157 |
158 | System.out.println("VALUE: " + response.get(0).value);
159 |
160 | assertThat(response).isNotNull();
161 | assertThat(response.get(0).value).doesNotMatch("Value => unknown");
162 | assertThat(response.get(0).value).contains("0.1000, 0.1000, 0.1000");
163 | }
164 |
165 | }
166 |
--------------------------------------------------------------------------------