├── .gitignore
├── .scalafmt.conf
├── LICENSE
├── NOTICE
├── README.md
├── build.sbt
├── project
    ├── Dependencies.scala
    ├── Versions.scala
    ├── build.properties
    └── plugins.sbt
├── scalastyle-config.xml
└── src
    ├── main
        └── scala
        │   └── com
        │       └── lightbend
        │           └── kafka
        │               └── scala
        │                   └── streams
        │                       ├── DefaultSerdes.scala
        │                       ├── FunctionConversions.scala
        │                       ├── ImplicitConversions.scala
        │                       ├── KGroupedStreamS.scala
        │                       ├── KGroupedTableS.scala
        │                       ├── KStreamS.scala
        │                       ├── KTableS.scala
        │                       ├── ScalaSerde.scala
        │                       ├── SessionWindowedKStreamS.scala
        │                       ├── StreamsBuilderS.scala
        │                       └── TimeWindowedKStreamS.scala
    └── test
        ├── resources
            ├── log4j.properties
            └── logback.xml
        └── scala
            └── com
                └── lightbend
                    └── kafka
                        └── scala
                            ├── server
                                ├── KafkaLocalServer.scala
                                ├── MessageListener.scala
                                ├── MessageSender.scala
                                ├── RecordProcessorTrait.scala
                                └── Utils.scala
                            └── streams
                                ├── KafkaStreamsMergeTest.scala
                                ├── KafkaStreamsTest.scala
                                ├── ProbabilisticCountingScalaIntegrationTest.scala
                                ├── PunctuateTest.scala
                                ├── StreamToTableJoinScalaIntegrationTestImplicitSerdes.scala
                                ├── StreamToTableJoinScalaIntegrationTestImplicitSerdesWithAvro.scala
                                ├── StreamToTableJoinTestData.scala
                                └── algebird
                                    ├── CMSStore.scala
                                    ├── CMSStoreBuilder.scala
                                    ├── CMSStoreChangeLogger.scala
                                    └── TopCMSSerde.scala


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.class
 2 | *.log
 3 | .cache
 4 | .history
 5 | .DS_Store
 6 | .lib/
 7 | app/*
 8 | dist/*
 9 | target/
10 | tmp/
11 | lib_managed/
12 | src_managed/
13 | project/boot/
14 | project/target/
15 | project/project/
16 | project/plugins/project/
17 | #idea
18 | .idea
19 | *.iml
20 | .idea_modules
21 | *.json
22 | *.json--
23 | tmp/
24 | local_state_data/
25 | *.swp
26 | .scala_dependencies
27 | .worksheet
28 | ingest-intrusion-data/ingest-intrusiondata-tmp/*
29 | bigdl/source/lib
30 | release/staging/
31 | deploy.conf
32 | 
33 | kafka-stream-s/project/build.properties
34 | kafka-stream-q/project/build.properties
35 | kafka-stream-q-example-proc/project/build.properties
36 | kafka-stream-q-example-dsl/project/build.properties
37 | 
38 | kafka-stream-q-example-proc/src/main/resources/application-proc.conf
39 | kafka-stream-q-example-dsl/src/main/resources/application-dsl.conf
40 | 


--------------------------------------------------------------------------------
/.scalafmt.conf:
--------------------------------------------------------------------------------
1 | maxColumn = 120
2 | continuationIndent.defnSite = 2
3 | assumeStandardLibraryStripMargin = true
4 | danglingParentheses = true
5 | align = more
6 | rewrite.rules = [SortImports, RedundantBraces, RedundantParens, SortModifiers]
7 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
1 | Kafka Streams Scala
2 | Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
3 | Copyright 2017-2018 Alexis Seigneurin.
4 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | **Note:** *Scala API for Kafka Streams have been accepted for inclusion in Apache Kafka. We have been working with the Kafka team since the last couple of months working towards meeting the standards and guidelines for this activity. Lightbend and Alexis Seigneurin have contributed this library (with some changes) to the Kafka community. This is already available on [Apache Kafka trunk](https://github.com/apache/kafka/tree/trunk/streams/streams-scala) and will be included in the upcoming release of Kafka. Hence it does not make much sense to update this project on a regular basis. For some time however, we will continue to provide support for fixing bugs only.*
  2 | 
  3 | # A Thin Scala Wrapper Around the Kafka Streams Java API
  4 | 
  5 | [![Build Status](https://secure.travis-ci.org/lightbend/kafka-streams-scala.png)](http://travis-ci.org/lightbend/kafka-streams-scala)
  6 | 
  7 | The library wraps Java APIs in Scala thereby providing:
  8 | 
  9 | 1. much better type inference in Scala
 10 | 2. less boilerplate in application code
 11 | 3. the usual builder-style composition that developers get with the original Java API
 12 | 4. complete compile time type safety
 13 | 
 14 | The design of the library was inspired by the work started by Alexis Seigneurin in [this repository](https://github.com/aseigneurin/kafka-streams-scala). 
 15 | 
 16 | ## Quick Start
 17 | 
 18 | `kafka-streams-scala` is published and cross-built for Scala `2.11`, and `2.12`, so you can just add the following to your build:
 19 | 
 20 | ```scala
 21 | val kafka_streams_scala_version = "0.2.1"
 22 | 
 23 | libraryDependencies ++= Seq("com.lightbend" %%
 24 |   "kafka-streams-scala" % kafka_streams_scala_version)
 25 | ```
 26 | 
 27 | > Note: `kafka-streams-scala` supports onwards Kafka Streams `1.0.0`.
 28 | 
 29 | The API docs for `kafka-streams-scala` is available [here](https://developer.lightbend.com/docs/api/kafka-streams-scala/0.2.1/com/lightbend/kafka/scala/streams) for Scala 2.12 and [here](https://developer.lightbend.com/docs/api/kafka-streams-scala_2.11/0.2.1/#package) for Scala 2.11.
 30 | 
 31 | ## Running the Tests
 32 | 
 33 | The library comes with an embedded Kafka server. To run the tests, simply run `sbt testOnly` and all tests will run on the local embedded server.
 34 | 
 35 | > The embedded server is started and stopped for every test and takes quite a bit of resources. Hence it's recommended that you allocate more heap space to `sbt` when running the tests. e.g. `sbt -mem 2000`.
 36 | 
 37 | ```bash
 38 | $ sbt -mem 2000
 39 | > +clean
 40 | > +test
 41 | ```
 42 | 
 43 | ## Type Inference and Composition
 44 | 
 45 | Here's a sample code fragment using the Scala wrapper library. Compare this with the Scala code from the same [example](https://github.com/confluentinc/kafka-streams-examples/blob/4.0.0-post/src/test/scala/io/confluent/examples/streams/StreamToTableJoinScalaIntegrationTest.scala) in Confluent's repository.
 46 | 
 47 | ```scala
 48 | // Compute the total per region by summing the individual click counts per region.
 49 | val clicksPerRegion: KTableS[String, Long] = userClicksStream
 50 | 
 51 |   // Join the stream against the table.
 52 |   .leftJoin(userRegionsTable, (clicks: Long, region: String) => (if (region == null) "UNKNOWN" else region, clicks))
 53 | 
 54 |   // Change the stream from <user> -> <region, clicks> to <region> -> <clicks>
 55 |   .map((_, regionWithClicks) => regionWithClicks)
 56 | 
 57 |   // Compute the total per region by summing the individual click counts per region.
 58 |   .groupByKey
 59 |   .reduce(_ + _)
 60 | ```
 61 | 
 62 | ## Implicit Serdes
 63 | 
 64 | One of the areas where the Java APIs' verbosity can be reduced is through a succinct way to pass serializers and de-serializers to the various functions. The library uses the power of Scala implicits towards this end. The library makes some decisions that help implement more succinct serdes in a type safe manner:
 65 | 
 66 | 1. No use of configuration based default serdes. Java APIs allow the user to define default key and value serdes as part of the configuration. This configuration, being implemented as `java.util.Properties` is type-unsafe and hence can result in runtime errors in case the user misses any of the serdes to be specified or plugs in an incorrect serde. `kafka-streams-scala` makes this completely type-safe by allowing all serdes to be specified through Scala implicits.
 67 | 2. The library offers implicit conversions from serdes to `Serialized`, `Produced`, `Consumed` or `Joined`. Hence as a user you just have to pass in the implicit serde and all conversions to `Serialized`, `Produced`, `Consumed` or `Joined` will be taken care of automatically.
 68 | 
 69 | 
 70 | ### Default Serdes
 71 | 
 72 | The library offers a module that contains all the default serdes for the primitives. Importing the object will bring in scope all such primitives and helps reduce implicit hell.
 73 | 
 74 | ```scala
 75 | object DefaultSerdes {
 76 |   implicit val stringSerde: Serde[String] = Serdes.String()
 77 |   implicit val longSerde: Serde[Long] = Serdes.Long().asInstanceOf[Serde[Long]]
 78 |   implicit val byteArraySerde: Serde[Array[Byte]] = Serdes.ByteArray()
 79 |   implicit val bytesSerde: Serde[org.apache.kafka.common.utils.Bytes] = Serdes.Bytes()
 80 |   implicit val floatSerde: Serde[Float] = Serdes.Float().asInstanceOf[Serde[Float]]
 81 |   implicit val doubleSerde: Serde[Double] = Serdes.Double().asInstanceOf[Serde[Double]]
 82 |   implicit val integerSerde: Serde[Int] = Serdes.Integer().asInstanceOf[Serde[Int]]
 83 | }
 84 | ```
 85 | 
 86 | ### Compile time typesafe
 87 | 
 88 | Not only the serdes, but `DefaultSerdes` also brings into scope implicit  `Serialized`, `Produced`, `Consumed` and `Joined` instances. So all APIs that accept `Serialized`, `Produced`, `Consumed` or `Joined` will get these instances automatically with an `import DefaultSerdes._`.
 89 | 
 90 | Just one import of `DefaultSerdes._` and the following code does not need a bit of `Serialized`, `Produced`, `Consumed` or `Joined` to be specified explicitly or through the default config. **And the best part is that for any missing instances of these you get a compilation error.** ..
 91 | 
 92 | ```scala
 93 | import DefaultSerdes._
 94 | 
 95 | val clicksPerRegion: KTableS[String, Long] =
 96 |   userClicksStream
 97 | 
 98 |   // Join the stream against the table.
 99 |   .leftJoin(userRegionsTable, (clicks: Long, region: String) => (if (region == null) "UNKNOWN" else region, clicks))
100 | 
101 |   // Change the stream from <user> -> <region, clicks> to <region> -> <clicks>
102 |   .map((_, regionWithClicks) => regionWithClicks)
103 | 
104 |   // Compute the total per region by summing the individual click counts per region.
105 |   .groupByKey
106 |   .reduce(_ + _)
107 | 
108 |   // Write the (continuously updating) results to the output topic.
109 |   clicksPerRegion.toStream.to(outputTopic)
110 | ```
111 | 


--------------------------------------------------------------------------------
/build.sbt:
--------------------------------------------------------------------------------
 1 | import Dependencies._
 2 | 
 3 | name := "kafka-streams-scala"
 4 | organization := "com.lightbend"
 5 | version := "0.2.1"
 6 | scalaVersion := Versions.Scala_2_12_Version
 7 | crossScalaVersions := Versions.CrossScalaVersions
 8 | scalacOptions := Seq("-Xexperimental", "-unchecked", "-deprecation", "-Ywarn-unused-import")
 9 | licenses := Seq("Apache 2" -> new URL("http://www.apache.org/licenses/LICENSE-2.0.txt"))
10 | developers := List(
11 |   Developer("debasishg", "Debasish Ghosh", "@debasishg", url("https://github.com/debasishg")),
12 |   Developer("blublinsky", "Boris Lublinsky", "@blublinsky", url("https://github.com/blublinsky")),
13 |   Developer("maasg", "Gerard Maas", "@maasg", url("https://github.com/maasg"))
14 | )
15 | organizationName := "lightbend"
16 | organizationHomepage := Option(url("http://lightbend.com/"))
17 | homepage := scmInfo.value map (_.browseUrl)
18 | scmInfo := Option(
19 |   ScmInfo(url("https://github.com/lightbend/kafka-streams-scala"), "git@github.com:lightbend/kafka-streams-scala.git")
20 | )
21 | 
22 | parallelExecution in Test := false
23 | testFrameworks += new TestFramework("minitest.runner.Framework")
24 | 
25 | libraryDependencies ++= Seq(
26 |   kafkaStreams excludeAll (ExclusionRule("org.slf4j", "slf4j-log4j12"), ExclusionRule("org.apache.zookeeper",
27 |                                                                                       "zookeeper")),
28 |   scalaLogging % "test",
29 |   logback      % "test",
30 |   kafka        % "test" excludeAll (ExclusionRule("org.slf4j", "slf4j-log4j12"), ExclusionRule("org.apache.zookeeper",
31 |                                                                                         "zookeeper")),
32 |   curator      % "test",
33 |   minitest     % "test",
34 |   minitestLaws % "test",
35 |   algebird     % "test",
36 |   chill        % "test",
37 |   avro4s       % "test"
38 | )
39 | 
40 | credentials += Credentials(Path.userHome / ".ivy2" / ".credentials")
41 | publishTo := {
42 |   val nexus = "https://oss.sonatype.org/"
43 |   if (isSnapshot.value) Some("snapshots" at nexus + "content/repositories/snapshots")
44 |   else Option("releases" at nexus + "service/local/staging/deploy/maven2")
45 | }
46 | publishArtifact in Test := true
47 | 


--------------------------------------------------------------------------------
/project/Dependencies.scala:
--------------------------------------------------------------------------------
 1 | import sbt._
 2 | import Versions._
 3 | 
 4 | object Dependencies {
 5 | 
 6 |   implicit class Exclude(module: ModuleID) {
 7 |     def log4jExclude: ModuleID =
 8 |       module.excludeAll(ExclusionRule("log4j"))
 9 | 
10 |     def driverExclusions: ModuleID =
11 |       module.log4jExclude
12 |         .exclude("com.google.guava", "guava")
13 |         .excludeAll(ExclusionRule("org.slf4j"))
14 |   }
15 | 
16 |   val kafkaStreams = "org.apache.kafka"           % "kafka-streams"   % KafkaVersion
17 |   val scalaLogging = "com.typesafe.scala-logging" %% "scala-logging"  % ScalaLoggingVersion
18 |   val logback      = "ch.qos.logback"             % "logback-classic" % LogbackVersion
19 |   val kafka        = "org.apache.kafka"           %% "kafka"          % KafkaVersion
20 |   val curator      = "org.apache.curator"         % "curator-test"    % CuratorVersion
21 |   val minitest     = "io.monix"                   %% "minitest"       % MinitestVersion
22 |   val minitestLaws = "io.monix"                   %% "minitest-laws"  % MinitestVersion
23 |   val algebird     = "com.twitter"                %% "algebird-core"  % AlgebirdVersion
24 |   val chill        = "com.twitter"                %% "chill"          % ChillVersion
25 |   val avro4s       = "com.sksamuel.avro4s"        %% "avro4s-core"    % Avro4sVersion
26 | }
27 | 


--------------------------------------------------------------------------------
/project/Versions.scala:
--------------------------------------------------------------------------------
 1 | object Versions {
 2 |   val AlgebirdVersion     = "0.13.0"
 3 |   val ChillVersion        = "0.9.2"
 4 |   val LogbackVersion      = "1.2.3"
 5 |   val KafkaVersion        = "1.0.0"
 6 |   val ScalaLoggingVersion = "3.5.0"
 7 |   val CuratorVersion      = "4.0.0"
 8 |   val MinitestVersion     = "2.0.0"
 9 |   val JDKVersion          = "1.8"
10 |   val Scala_2_12_Version  = "2.12.6"
11 |   val Scala_2_11_Version  = "2.11.12"
12 |   val Avro4sVersion       = "1.8.3"
13 |   val CrossScalaVersions  = Seq(Scala_2_12_Version, Scala_2_11_Version)
14 | }
15 | 


--------------------------------------------------------------------------------
/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version=1.1.4
2 | 


--------------------------------------------------------------------------------
/project/plugins.sbt:
--------------------------------------------------------------------------------
1 | addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "1.0.0")
2 | addSbtPlugin("com.geirsson"   % "sbt-scalafmt"           % "1.5.0")
3 | 


--------------------------------------------------------------------------------
/scalastyle-config.xml:
--------------------------------------------------------------------------------
  1 | <scalastyle>
  2 |  <name>Scalastyle standard configuration</name>
  3 |  <check level="warning" class="org.scalastyle.file.FileTabChecker" enabled="true"></check>
  4 |  <check level="warning" class="org.scalastyle.file.FileLengthChecker" enabled="true">
  5 |   <parameters>
  6 |    <parameter name="maxFileLength"><![CDATA[800]]></parameter>
  7 |   </parameters>
  8 |  </check>
  9 |  <check level="warning" class="org.scalastyle.file.HeaderMatchesChecker" enabled="false">
 10 |   <parameters>
 11 |    <parameter name="header"><![CDATA[// Copyright (C) 2011-2012 the original author or authors.
 12 | // See the LICENCE.txt file distributed with this work for additional
 13 | // information regarding copyright ownership.
 14 | //
 15 | // Licensed under the Apache License, Version 2.0 (the "License");
 16 | // you may not use this file except in compliance with the License.
 17 | // You may obtain a copy of the License at
 18 | //
 19 | // http://www.apache.org/licenses/LICENSE-2.0
 20 | //
 21 | // Unless required by applicable law or agreed to in writing, software
 22 | // distributed under the License is distributed on an "AS IS" BASIS,
 23 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 24 | // See the License for the specific language governing permissions and
 25 | // limitations under the License.]]></parameter>
 26 |   </parameters>
 27 |  </check>
 28 |  <check level="warning" class="org.scalastyle.scalariform.SpacesAfterPlusChecker" enabled="true"></check>
 29 |  <check level="warning" class="org.scalastyle.file.WhitespaceEndOfLineChecker" enabled="true"></check>
 30 |  <check level="warning" class="org.scalastyle.scalariform.SpacesBeforePlusChecker" enabled="true"></check>
 31 |  <check level="warning" class="org.scalastyle.file.FileLineLengthChecker" enabled="true">
 32 |   <parameters>
 33 |    <parameter name="maxLineLength"><![CDATA[160]]></parameter>
 34 |    <parameter name="tabSize"><![CDATA[4]]></parameter>
 35 |   </parameters>
 36 |  </check>
 37 |  <check level="warning" class="org.scalastyle.scalariform.ClassNamesChecker" enabled="true">
 38 |   <parameters>
 39 |    <parameter name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter>
 40 |   </parameters>
 41 |  </check>
 42 |  <check level="warning" class="org.scalastyle.scalariform.ObjectNamesChecker" enabled="true">
 43 |   <parameters>
 44 |    <parameter name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter>
 45 |   </parameters>
 46 |  </check>
 47 |  <check level="warning" class="org.scalastyle.scalariform.PackageObjectNamesChecker" enabled="true">
 48 |   <parameters>
 49 |    <parameter name="regex"><![CDATA[^[a-z][A-Za-z]*$]]></parameter>
 50 |   </parameters>
 51 |  </check>
 52 |  <check level="warning" class="org.scalastyle.scalariform.EqualsHashCodeChecker" enabled="true"></check>
 53 |  <check level="warning" class="org.scalastyle.scalariform.IllegalImportsChecker" enabled="true">
 54 |   <parameters>
 55 |    <parameter name="illegalImports"><![CDATA[sun._,java.awt._]]></parameter>
 56 |   </parameters>
 57 |  </check>
 58 |  <check level="warning" class="org.scalastyle.scalariform.ParameterNumberChecker" enabled="true">
 59 |   <parameters>
 60 |    <parameter name="maxParameters"><![CDATA[8]]></parameter>
 61 |   </parameters>
 62 |  </check>
 63 |  <check level="warning" class="org.scalastyle.scalariform.MagicNumberChecker" enabled="false">
 64 |   <parameters>
 65 |    <parameter name="ignore"><![CDATA[-1,0,1,2,3]]></parameter>
 66 |   </parameters>
 67 |  </check>
 68 |  <check level="warning" class="org.scalastyle.scalariform.NoWhitespaceBeforeLeftBracketChecker" enabled="true"></check>
 69 |  <check level="warning" class="org.scalastyle.scalariform.NoWhitespaceAfterLeftBracketChecker" enabled="true"></check>
 70 |  <check level="warning" class="org.scalastyle.scalariform.ReturnChecker" enabled="true"></check>
 71 |  <check level="warning" class="org.scalastyle.scalariform.NullChecker" enabled="true"></check>
 72 |  <check level="warning" class="org.scalastyle.scalariform.NoCloneChecker" enabled="true"></check>
 73 |  <check level="warning" class="org.scalastyle.scalariform.NoFinalizeChecker" enabled="true"></check>
 74 |  <check level="warning" class="org.scalastyle.scalariform.CovariantEqualsChecker" enabled="true"></check>
 75 |  <check level="warning" class="org.scalastyle.scalariform.StructuralTypeChecker" enabled="true"></check>
 76 |  <check level="warning" class="org.scalastyle.file.RegexChecker" enabled="true">
 77 |   <parameters>
 78 |    <parameter name="regex"><![CDATA[println]]></parameter>
 79 |   </parameters>
 80 |  </check>
 81 |  <check level="warning" class="org.scalastyle.scalariform.NumberOfTypesChecker" enabled="true">
 82 |   <parameters>
 83 |    <parameter name="maxTypes"><![CDATA[30]]></parameter>
 84 |   </parameters>
 85 |  </check>
 86 |  <check level="warning" class="org.scalastyle.scalariform.CyclomaticComplexityChecker" enabled="true">
 87 |   <parameters>
 88 |    <parameter name="maximum"><![CDATA[10]]></parameter>
 89 |   </parameters>
 90 |  </check>
 91 |  <check level="warning" class="org.scalastyle.scalariform.UppercaseLChecker" enabled="true"></check>
 92 |  <check level="warning" class="org.scalastyle.scalariform.SimplifyBooleanExpressionChecker" enabled="true"></check>
 93 |  <check level="warning" class="org.scalastyle.scalariform.IfBraceChecker" enabled="false">
 94 |   <parameters>
 95 |    <parameter name="singleLineAllowed"><![CDATA[true]]></parameter>
 96 |    <parameter name="doubleLineAllowed"><![CDATA[false]]></parameter>
 97 |   </parameters>
 98 |  </check>
 99 |  <check level="warning" class="org.scalastyle.scalariform.MethodLengthChecker" enabled="true">
100 |   <parameters>
101 |    <parameter name="maxLength"><![CDATA[50]]></parameter>
102 |   </parameters>
103 |  </check>
104 |  <check level="warning" class="org.scalastyle.scalariform.MethodNamesChecker" enabled="true">
105 |   <parameters>
106 |    <parameter name="regex"><![CDATA[^[a-z][A-Za-z0-9]*$]]></parameter>
107 |   </parameters>
108 |  </check>
109 |  <check level="warning" class="org.scalastyle.scalariform.NumberOfMethodsInTypeChecker" enabled="true">
110 |   <parameters>
111 |    <parameter name="maxMethods"><![CDATA[30]]></parameter>
112 |   </parameters>
113 |  </check>
114 |  <check level="warning" class="org.scalastyle.scalariform.PublicMethodsHaveTypeChecker" enabled="true"></check>
115 |  <check level="warning" class="org.scalastyle.file.NewLineAtEofChecker" enabled="true"></check>
116 |  <check level="warning" class="org.scalastyle.file.NoNewLineAtEofChecker" enabled="false"></check>
117 | </scalastyle>


--------------------------------------------------------------------------------
/src/main/scala/com/lightbend/kafka/scala/streams/DefaultSerdes.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |   * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
 3 |   * Copyright 2017-2018 Alexis Seigneurin.
 4 |   */
 5 | package com.lightbend.kafka.scala.streams
 6 | 
 7 | import org.apache.kafka.common.serialization.{Serde, Serdes}
 8 | 
 9 | /**
10 |   * Implicit values for default serdes
11 |   */
12 | object DefaultSerdes {
13 |   implicit val stringSerde: Serde[String]                             = Serdes.String()
14 |   implicit val longSerde: Serde[Long]                                 = Serdes.Long().asInstanceOf[Serde[Long]]
15 |   implicit val byteArraySerde: Serde[Array[Byte]]                     = Serdes.ByteArray()
16 |   implicit val bytesSerde: Serde[org.apache.kafka.common.utils.Bytes] = Serdes.Bytes()
17 |   implicit val floatSerde: Serde[Float]                               = Serdes.Float().asInstanceOf[Serde[Float]]
18 |   implicit val doubleSerde: Serde[Double]                             = Serdes.Double().asInstanceOf[Serde[Double]]
19 |   implicit val integerSerde: Serde[Int]                               = Serdes.Integer().asInstanceOf[Serde[Int]]
20 | }
21 | 


--------------------------------------------------------------------------------
/src/main/scala/com/lightbend/kafka/scala/streams/FunctionConversions.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |   * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
 3 |   * Copyright 2017-2018 Alexis Seigneurin.
 4 |   */
 5 | package com.lightbend.kafka.scala.streams
 6 | 
 7 | import org.apache.kafka.streams.KeyValue
 8 | import org.apache.kafka.streams.kstream._
 9 | 
10 | /**
11 |   * Implicit classes that offer conversions of Scala function literals to
12 |   * SAM (Single Abstract Method) objects in Java. These make the Scala APIs much
13 |   * more expressive, with less boilerplate and more succinct.
14 |   */
15 | object FunctionConversions {
16 | 
17 |   implicit class PredicateFromFunction[K, V](val test: (K, V) => Boolean) extends AnyVal {
18 |     def asPredicate: Predicate[K, V] = test(_, _)
19 |   }
20 | 
21 |   implicit class MapperFromFunction[T, U, V](val f: (T, U) => V) extends AnyVal {
22 |     def asKeyValueMapper: KeyValueMapper[T, U, V] = (k: T, v: U) => f(k, v)
23 |     def asValueJoiner: ValueJoiner[T, U, V]       = (v1, v2) => f(v1, v2)
24 |   }
25 | 
26 |   implicit class KeyValueMapperFromFunction[K, V, KR, VR](val f: (K, V) => (KR, VR)) extends AnyVal {
27 |     def asKeyValueMapper: KeyValueMapper[K, V, KeyValue[KR, VR]] = (k, v) => {
28 |       val (kr, vr) = f(k, v)
29 |       KeyValue.pair(kr, vr)
30 |     }
31 |   }
32 | 
33 |   implicit class ValueMapperFromFunction[V, VR](val f: V => VR) extends AnyVal {
34 |     def asValueMapper: ValueMapper[V, VR] = v => f(v)
35 |   }
36 | 
37 |   implicit class AggregatorFromFunction[K, V, VR](val f: (K, V, VR) => VR) extends AnyVal {
38 |     def asAggregator: Aggregator[K, V, VR] = (k, v, r) => f(k, v, r)
39 |   }
40 | 
41 |   implicit class MergerFromFunction[K, VR](val f: (K, VR, VR) => VR) extends AnyVal {
42 |     def asMerger: Merger[K, VR] = (k, v1, v2) => f(k, v1, v2)
43 |   }
44 | 
45 |   implicit class ReducerFromFunction[V](val f: (V, V) => V) extends AnyVal {
46 |     def asReducer: Reducer[V] = (v1, v2) => f(v1, v2)
47 |   }
48 | 
49 |   implicit class InitializerFromFunction[T](val f: () => T) extends AnyVal {
50 |     def asInitializer: Initializer[T] = () => f()
51 |   }
52 | 
53 | }
54 | 


--------------------------------------------------------------------------------
/src/main/scala/com/lightbend/kafka/scala/streams/ImplicitConversions.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |   * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
 3 |   * Copyright 2017-2018 Alexis Seigneurin.
 4 |   */
 5 | package com.lightbend.kafka.scala.streams
 6 | 
 7 | import org.apache.kafka.streams.kstream._
 8 | import org.apache.kafka.streams.{Consumed, KeyValue}
 9 | import org.apache.kafka.common.serialization.Serde
10 | 
11 | import scala.language.implicitConversions
12 | 
13 | /**
14 |   * Implicit conversions between the Scala wrapper objects and the underlying Java
15 |   * objects.
16 |   */
17 | object ImplicitConversions {
18 | 
19 |   implicit def wrapKStream[K, V](inner: KStream[K, V]): KStreamS[K, V] =
20 |     new KStreamS[K, V](inner)
21 | 
22 |   implicit def wrapKGroupedStream[K, V](inner: KGroupedStream[K, V]): KGroupedStreamS[K, V] =
23 |     new KGroupedStreamS[K, V](inner)
24 | 
25 |   implicit def wrapSessionWindowedKStream[K, V](inner: SessionWindowedKStream[K, V]): SessionWindowedKStreamS[K, V] =
26 |     new SessionWindowedKStreamS[K, V](inner)
27 | 
28 |   implicit def wrapTimeWindowedKStream[K, V](inner: TimeWindowedKStream[K, V]): TimeWindowedKStreamS[K, V] =
29 |     new TimeWindowedKStreamS[K, V](inner)
30 | 
31 |   implicit def wrapKTable[K, V](inner: KTable[K, V]): KTableS[K, V] =
32 |     new KTableS[K, V](inner)
33 | 
34 |   implicit def wrapKGroupedTable[K, V](inner: KGroupedTable[K, V]): KGroupedTableS[K, V] =
35 |     new KGroupedTableS[K, V](inner)
36 | 
37 |   implicit def tuple2ToKeyValue[K, V](tuple: (K, V)): KeyValue[K, V] = new KeyValue(tuple._1, tuple._2)
38 | 
39 |   //scalastyle:on null
40 |   // we would also like to allow users implicit serdes
41 |   // and these implicits will convert them to `Serialized`, `Produced` or `Consumed`
42 | 
43 |   implicit def serializedFromSerde[K, V](implicit keySerde: Serde[K], valueSerde: Serde[V]): Serialized[K, V] =
44 |     Serialized.`with`(keySerde, valueSerde)
45 | 
46 |   implicit def consumedFromSerde[K, V](implicit keySerde: Serde[K], valueSerde: Serde[V]): Consumed[K, V] =
47 |     Consumed.`with`(keySerde, valueSerde)
48 | 
49 |   implicit def producedFromSerde[K, V](implicit keySerde: Serde[K], valueSerde: Serde[V]): Produced[K, V] =
50 |     Produced.`with`(keySerde, valueSerde)
51 | 
52 |   implicit def joinedFromKVOSerde[K, V, VO](implicit keySerde: Serde[K],
53 |                                             valueSerde: Serde[V],
54 |                                             otherValueSerde: Serde[VO]): Joined[K, V, VO] =
55 |     Joined.`with`(keySerde, valueSerde, otherValueSerde)
56 | }
57 | 


--------------------------------------------------------------------------------
/src/main/scala/com/lightbend/kafka/scala/streams/KGroupedStreamS.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |   * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
 3 |   * Copyright 2017-2018 Alexis Seigneurin.
 4 |   */
 5 | package com.lightbend.kafka.scala.streams
 6 | 
 7 | import org.apache.kafka.streams.kstream._
 8 | import org.apache.kafka.streams.state.KeyValueStore
 9 | import org.apache.kafka.common.utils.Bytes
10 | import org.apache.kafka.common.serialization.Serde
11 | import ImplicitConversions._
12 | import FunctionConversions._
13 | 
14 | /**
15 |   * Wraps the Java class KGroupedStream and delegates method calls to the underlying Java object.
16 |   */
17 | class KGroupedStreamS[K, V](inner: KGroupedStream[K, V]) {
18 | 
19 |   def count(): KTableS[K, Long] = {
20 |     val c: KTableS[K, java.lang.Long] = inner.count()
21 |     c.mapValues[Long](Long2long _)
22 |   }
23 | 
24 |   def count(store: String, keySerde: Option[Serde[K]] = None): KTableS[K, Long] = {
25 |     val materialized = keySerde.foldLeft(Materialized.as[K, java.lang.Long, KeyValueStore[Bytes, Array[Byte]]](store))(
26 |       (m, serde) => m.withKeySerde(serde)
27 |     )
28 | 
29 |     val c: KTableS[K, java.lang.Long] = inner.count(materialized)
30 |     c.mapValues[Long](Long2long _)
31 |   }
32 | 
33 |   def reduce(reducer: (V, V) => V): KTableS[K, V] =
34 |     inner.reduce((v1, v2) => reducer(v1, v2))
35 | 
36 |   def reduce(reducer: (V, V) => V, materialized: Materialized[K, V, KeyValueStore[Bytes, Array[Byte]]]): KTableS[K, V] =
37 |     // need this explicit asReducer for Scala 2.11 or else the SAM conversion doesn't take place
38 |     // works perfectly with Scala 2.12 though
39 |     inner.reduce(((v1: V, v2: V) => reducer(v1, v2)).asReducer, materialized)
40 | 
41 |   def reduce(reducer: (V, V) => V, storeName: String)(implicit keySerde: Serde[K],
42 |                                                       valueSerde: Serde[V]): KTableS[K, V] =
43 |     // need this explicit asReducer for Scala 2.11 or else the SAM conversion doesn't take place
44 |     // works perfectly with Scala 2.12 though
45 |     inner.reduce(
46 |       ((v1: V, v2: V) => reducer(v1, v2)).asReducer,
47 |       Materialized
48 |         .as[K, V, KeyValueStore[Bytes, Array[Byte]]](storeName)
49 |         .withKeySerde(keySerde)
50 |         .withValueSerde(valueSerde)
51 |     )
52 | 
53 |   def aggregate[VR](initializer: () => VR, aggregator: (K, V, VR) => VR): KTableS[K, VR] =
54 |     inner.aggregate(initializer.asInitializer, aggregator.asAggregator)
55 | 
56 |   def aggregate[VR](initializer: () => VR,
57 |                     aggregator: (K, V, VR) => VR,
58 |                     materialized: Materialized[K, VR, KeyValueStore[Bytes, Array[Byte]]]): KTableS[K, VR] =
59 |     inner.aggregate(initializer.asInitializer, aggregator.asAggregator, materialized)
60 | 
61 |   def windowedBy(windows: SessionWindows): SessionWindowedKStreamS[K, V] =
62 |     inner.windowedBy(windows)
63 | 
64 |   def windowedBy[W <: Window](windows: Windows[W]): TimeWindowedKStreamS[K, V] =
65 |     inner.windowedBy(windows)
66 | }
67 | 


--------------------------------------------------------------------------------
/src/main/scala/com/lightbend/kafka/scala/streams/KGroupedTableS.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |   * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
 3 |   * Copyright 2017-2018 Alexis Seigneurin.
 4 |   */
 5 | package com.lightbend.kafka.scala.streams
 6 | 
 7 | import ImplicitConversions._
 8 | import org.apache.kafka.streams.kstream._
 9 | import org.apache.kafka.streams.state.KeyValueStore
10 | import org.apache.kafka.common.utils.Bytes
11 | import FunctionConversions._
12 | 
13 | /**
14 |   * Wraps the Java class KGroupedTable and delegates method calls to the underlying Java object.
15 |   */
16 | class KGroupedTableS[K, V](inner: KGroupedTable[K, V]) {
17 | 
18 |   type ByteArrayKVStore = KeyValueStore[Bytes, Array[Byte]]
19 | 
20 |   def count(): KTableS[K, Long] = {
21 |     val c: KTableS[K, java.lang.Long] = inner.count()
22 |     c.mapValues[Long](Long2long(_))
23 |   }
24 | 
25 |   def count(materialized: Materialized[K, Long, ByteArrayKVStore]): KTableS[K, Long] =
26 |     inner.count(materialized)
27 | 
28 |   def reduce(adder: (V, V) => V, subTractor: (V, V) => V): KTableS[K, V] =
29 |     // need this explicit asReducer for Scala 2.11 or else the SAM conversion doesn't take place
30 |     // works perfectly with Scala 2.12 though
31 |     inner.reduce(((v1, v2) => adder(v1, v2)).asReducer, ((v1, v2) => subTractor(v1, v2)).asReducer)
32 | 
33 |   def reduce(adder: (V, V) => V,
34 |              subtractor: (V, V) => V,
35 |              materialized: Materialized[K, V, ByteArrayKVStore]): KTableS[K, V] =
36 |     // need this explicit asReducer for Scala 2.11 or else the SAM conversion doesn't take place
37 |     // works perfectly with Scala 2.12 though
38 |     inner.reduce(((v1, v2) => adder(v1, v2)).asReducer, ((v1, v2) => subtractor(v1, v2)).asReducer, materialized)
39 | 
40 |   def aggregate[VR](initializer: () => VR, adder: (K, V, VR) => VR, subtractor: (K, V, VR) => VR): KTableS[K, VR] =
41 |     inner.aggregate(initializer.asInitializer, adder.asAggregator, subtractor.asAggregator)
42 | 
43 |   def aggregate[VR](initializer: () => VR,
44 |                     adder: (K, V, VR) => VR,
45 |                     subtractor: (K, V, VR) => VR,
46 |                     materialized: Materialized[K, VR, ByteArrayKVStore]): KTableS[K, VR] =
47 |     inner.aggregate(initializer.asInitializer, adder.asAggregator, subtractor.asAggregator, materialized)
48 | }
49 | 


--------------------------------------------------------------------------------
/src/main/scala/com/lightbend/kafka/scala/streams/KStreamS.scala:
--------------------------------------------------------------------------------
  1 | /**
  2 |   * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
  3 |   * Copyright 2017-2018 Alexis Seigneurin.
  4 |   */
  5 | package com.lightbend.kafka.scala.streams
  6 | 
  7 | import org.apache.kafka.streams.KeyValue
  8 | import org.apache.kafka.streams.kstream._
  9 | import org.apache.kafka.streams.processor.{Processor, ProcessorContext, ProcessorSupplier}
 10 | import ImplicitConversions._
 11 | import FunctionConversions._
 12 | 
 13 | import scala.collection.JavaConverters._
 14 | 
 15 | /**
 16 |   * Wraps the Java class KStream and delegates method calls to the underlying Java object.
 17 |   */
 18 | class KStreamS[K, V](val inner: KStream[K, V]) {
 19 | 
 20 |   def filter(predicate: (K, V) => Boolean): KStreamS[K, V] =
 21 |     inner.filter(predicate(_, _))
 22 | 
 23 |   def filterNot(predicate: (K, V) => Boolean): KStreamS[K, V] =
 24 |     inner.filterNot(predicate(_, _))
 25 | 
 26 |   def selectKey[KR](mapper: (K, V) => KR): KStreamS[KR, V] =
 27 |     inner.selectKey[KR]((k: K, v: V) => mapper(k, v))
 28 | 
 29 |   def map[KR, VR](mapper: (K, V) => (KR, VR)): KStreamS[KR, VR] = {
 30 |     val kvMapper = mapper.tupled andThen tuple2ToKeyValue
 31 |     inner.map[KR, VR]((k, v) => kvMapper(k, v))
 32 |   }
 33 | 
 34 |   def mapValues[VR](mapper: V => VR): KStreamS[K, VR] =
 35 |     inner.mapValues[VR](mapper(_))
 36 | 
 37 |   def flatMap[KR, VR](mapper: (K, V) => Iterable[(KR, VR)]): KStreamS[KR, VR] = {
 38 |     val kvMapper = mapper.tupled andThen (iter => iter.map(tuple2ToKeyValue).asJava)
 39 |     inner.flatMap[KR, VR]((k, v) => kvMapper(k, v))
 40 |   }
 41 | 
 42 |   def flatMapValues[VR](processor: V => Iterable[VR]): KStreamS[K, VR] =
 43 |     inner.flatMapValues[VR]((v) => processor(v).asJava)
 44 | 
 45 |   def print(printed: Printed[K, V]): Unit = inner.print(printed)
 46 | 
 47 |   def foreach(action: (K, V) => Unit): Unit =
 48 |     inner.foreach((k, v) => action(k, v))
 49 | 
 50 |   def branch(predicates: ((K, V) => Boolean)*): Array[KStreamS[K, V]] =
 51 |     inner.branch(predicates.map(_.asPredicate): _*).map(kstream => wrapKStream(kstream))
 52 | 
 53 |   def through(topic: String)(implicit produced: Produced[K, V]): KStreamS[K, V] =
 54 |     inner.through(topic, produced)
 55 | 
 56 |   def to(topic: String)(implicit produced: Produced[K, V]): Unit =
 57 |     inner.to(topic, produced)
 58 | 
 59 |   //scalastyle:off null
 60 |   def transform[K1, V1](transformerSupplier: () => Transformer[K, V, (K1, V1)],
 61 |                         stateStoreNames: String*): KStreamS[K1, V1] = {
 62 | 
 63 |     val transformerSupplierJ: TransformerSupplier[K, V, KeyValue[K1, V1]] = () => {
 64 |       val transformerS: Transformer[K, V, (K1, V1)] = transformerSupplier()
 65 |       new Transformer[K, V, KeyValue[K1, V1]] {
 66 |         override def transform(key: K, value: V): KeyValue[K1, V1] =
 67 |           transformerS.transform(key, value) match {
 68 |             case (k1, v1) => KeyValue.pair(k1, v1)
 69 |             case _        => null
 70 |           }
 71 | 
 72 |         override def init(context: ProcessorContext): Unit = transformerS.init(context)
 73 | 
 74 |         @deprecated(
 75 |           "Please use Punctuator functional interface at https://kafka.apache.org/10/javadoc/org/apache/kafka/streams/processor/Punctuator.html instead",
 76 |           "0.1.3"
 77 |         ) // scalastyle:ignore
 78 |         override def punctuate(timestamp: Long): KeyValue[K1, V1] =
 79 |           transformerS.punctuate(timestamp) match {
 80 |             case (k1, v1) => KeyValue.pair[K1, V1](k1, v1)
 81 |             case _        => null
 82 |           }
 83 | 
 84 |         override def close(): Unit = transformerS.close()
 85 |       }
 86 |     }
 87 |     inner.transform(transformerSupplierJ, stateStoreNames: _*)
 88 |   }
 89 |   //scalastyle:on null
 90 | 
 91 |   def transformValues[VR](valueTransformerSupplier: () => ValueTransformer[V, VR],
 92 |                           stateStoreNames: String*): KStreamS[K, VR] = {
 93 | 
 94 |     val valueTransformerSupplierJ: ValueTransformerSupplier[V, VR] = () => valueTransformerSupplier()
 95 |     inner.transformValues[VR](valueTransformerSupplierJ, stateStoreNames: _*)
 96 |   }
 97 | 
 98 |   def process(processorSupplier: () => Processor[K, V], stateStoreNames: String*): Unit = {
 99 | 
100 |     val processorSupplierJ: ProcessorSupplier[K, V] = () => processorSupplier()
101 |     inner.process(processorSupplierJ, stateStoreNames: _*)
102 |   }
103 | 
104 |   /**
105 |     * If `Serialized[K, V]` is found in the implicit scope, then use it, else
106 |     * use the API with the default serializers.
107 |     *
108 |     * Usage Pattern 1: No implicits in scope, use default serializers
109 |     * - .groupByKey
110 |     *
111 |     * Usage Pattern 2: Use implicit `Serialized` in scope
112 |     * implicit val serialized = Serialized.`with`(stringSerde, longSerde)
113 |     * - .groupByKey
114 |     *
115 |     * Usage Pattern 3: uses the implicit conversion from the serdes to `Serialized`
116 |     * implicit val stringSerde: Serde[String] = Serdes.String()
117 |     * implicit val longSerde: Serde[Long] = Serdes.Long().asInstanceOf[Serde[Long]]
118 |     * - .groupByKey
119 |     */
120 |   def groupByKey(implicit serialized: Serialized[K, V]): KGroupedStreamS[K, V] =
121 |     inner.groupByKey(serialized)
122 | 
123 |   def groupBy[KR](selector: (K, V) => KR)(implicit serialized: Serialized[KR, V]): KGroupedStreamS[KR, V] =
124 |     inner.groupBy(selector.asKeyValueMapper, serialized)
125 | 
126 |   def join[VO, VR](otherStream: KStreamS[K, VO], joiner: (V, VO) => VR, windows: JoinWindows)(
127 |     implicit joined: Joined[K, V, VO]
128 |   ): KStreamS[K, VR] =
129 |     inner.join[VO, VR](otherStream.inner, joiner.asValueJoiner, windows, joined)
130 | 
131 |   def join[VT, VR](table: KTableS[K, VT], joiner: (V, VT) => VR)(implicit joined: Joined[K, V, VT]): KStreamS[K, VR] =
132 |     inner.join[VT, VR](table.inner, joiner.asValueJoiner, joined)
133 | 
134 |   def join[GK, GV, RV](globalKTable: GlobalKTable[GK, GV],
135 |                        keyValueMapper: (K, V) => GK,
136 |                        joiner: (V, GV) => RV): KStreamS[K, RV] =
137 |     inner.join[GK, GV, RV](globalKTable, keyValueMapper(_, _), joiner(_, _))
138 | 
139 |   def leftJoin[VO, VR](otherStream: KStreamS[K, VO], joiner: (V, VO) => VR, windows: JoinWindows)(
140 |     implicit joined: Joined[K, V, VO]
141 |   ): KStreamS[K, VR] =
142 |     inner.leftJoin[VO, VR](otherStream.inner, joiner.asValueJoiner, windows, joined)
143 | 
144 |   def leftJoin[VT, VR](table: KTableS[K, VT],
145 |                        joiner: (V, VT) => VR)(implicit joined: Joined[K, V, VT]): KStreamS[K, VR] =
146 |     inner.leftJoin[VT, VR](table.inner, joiner.asValueJoiner, joined)
147 | 
148 |   def leftJoin[GK, GV, RV](globalKTable: GlobalKTable[GK, GV],
149 |                            keyValueMapper: (K, V) => GK,
150 |                            joiner: (V, GV) => RV): KStreamS[K, RV] =
151 |     inner.leftJoin[GK, GV, RV](globalKTable, keyValueMapper.asKeyValueMapper, joiner.asValueJoiner)
152 | 
153 |   def outerJoin[VO, VR](otherStream: KStreamS[K, VO], joiner: (V, VO) => VR, windows: JoinWindows)(
154 |     implicit joined: Joined[K, V, VO]
155 |   ): KStreamS[K, VR] =
156 |     inner.outerJoin[VO, VR](otherStream.inner, joiner.asValueJoiner, windows, joined)
157 | 
158 |   def merge(stream: KStreamS[K, V]): KStreamS[K, V] = inner.merge(stream.inner)
159 | 
160 |   def peek(action: (K, V) => Unit): KStreamS[K, V] =
161 |     inner.peek(action(_, _))
162 | 
163 |   // -- EXTENSIONS TO KAFKA STREAMS --
164 | 
165 |   // applies the predicate to know what messages should go to the left stream (predicate == true)
166 |   // or to the right stream (predicate == false)
167 |   def split(predicate: (K, V) => Boolean): (KStreamS[K, V], KStreamS[K, V]) =
168 |     (this.filter(predicate), this.filterNot(predicate))
169 | 
170 | }
171 | 


--------------------------------------------------------------------------------
/src/main/scala/com/lightbend/kafka/scala/streams/KTableS.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |   * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
 3 |   * Copyright 2017-2018 Alexis Seigneurin.
 4 |   */
 5 | package com.lightbend.kafka.scala.streams
 6 | 
 7 | import org.apache.kafka.streams.kstream._
 8 | import org.apache.kafka.streams.state.KeyValueStore
 9 | import org.apache.kafka.common.utils.Bytes
10 | import ImplicitConversions._
11 | import FunctionConversions._
12 | 
13 | /**
14 |   * Wraps the Java class KTable and delegates method calls to the underlying Java object.
15 |   */
16 | class KTableS[K, V](val inner: KTable[K, V]) {
17 | 
18 |   def filter(predicate: (K, V) => Boolean): KTableS[K, V] =
19 |     inner.filter(predicate(_, _))
20 | 
21 |   def filter(predicate: (K, V) => Boolean,
22 |              materialized: Materialized[K, V, KeyValueStore[Bytes, Array[Byte]]]): KTableS[K, V] =
23 |     inner.filter(predicate.asPredicate, materialized)
24 | 
25 |   def filterNot(predicate: (K, V) => Boolean): KTableS[K, V] =
26 |     inner.filterNot(predicate(_, _))
27 | 
28 |   def filterNot(predicate: (K, V) => Boolean,
29 |                 materialized: Materialized[K, V, KeyValueStore[Bytes, Array[Byte]]]): KTableS[K, V] =
30 |     inner.filterNot(predicate.asPredicate, materialized)
31 | 
32 |   def mapValues[VR](mapper: V => VR): KTableS[K, VR] =
33 |     inner.mapValues[VR](mapper.asValueMapper)
34 | 
35 |   def mapValues[VR](mapper: V => VR,
36 |                     materialized: Materialized[K, VR, KeyValueStore[Bytes, Array[Byte]]]): KTableS[K, VR] =
37 |     inner.mapValues[VR](mapper.asValueMapper, materialized)
38 | 
39 |   def toStream: KStreamS[K, V] = inner.toStream
40 | 
41 |   def toStream[KR](mapper: (K, V) => KR): KStreamS[KR, V] =
42 |     inner.toStream[KR](mapper.asKeyValueMapper)
43 | 
44 |   def groupBy[KR, VR](selector: (K, V) => (KR, VR))(implicit serialized: Serialized[KR, VR]): KGroupedTableS[KR, VR] =
45 |     inner.groupBy(selector.asKeyValueMapper, serialized)
46 | 
47 |   def join[VO, VR](other: KTableS[K, VO], joiner: (V, VO) => VR): KTableS[K, VR] =
48 |     inner.join[VO, VR](other.inner, joiner.asValueJoiner)
49 | 
50 |   def join[VO, VR](other: KTableS[K, VO],
51 |                    joiner: (V, VO) => VR,
52 |                    materialized: Materialized[K, VR, KeyValueStore[Bytes, Array[Byte]]]): KTableS[K, VR] =
53 |     inner.join[VO, VR](other.inner, joiner.asValueJoiner, materialized)
54 | 
55 |   def leftJoin[VO, VR](other: KTableS[K, VO], joiner: (V, VO) => VR): KTableS[K, VR] =
56 |     inner.leftJoin[VO, VR](other.inner, joiner.asValueJoiner)
57 | 
58 |   def leftJoin[VO, VR](other: KTableS[K, VO],
59 |                        joiner: (V, VO) => VR,
60 |                        materialized: Materialized[K, VR, KeyValueStore[Bytes, Array[Byte]]]): KTableS[K, VR] =
61 |     inner.leftJoin[VO, VR](other.inner, joiner.asValueJoiner, materialized)
62 | 
63 |   def outerJoin[VO, VR](other: KTableS[K, VO], joiner: (V, VO) => VR): KTableS[K, VR] =
64 |     inner.outerJoin[VO, VR](other.inner, joiner.asValueJoiner)
65 | 
66 |   def outerJoin[VO, VR](other: KTableS[K, VO],
67 |                         joiner: (V, VO) => VR,
68 |                         materialized: Materialized[K, VR, KeyValueStore[Bytes, Array[Byte]]]): KTableS[K, VR] =
69 |     inner.outerJoin[VO, VR](other.inner, joiner.asValueJoiner, materialized)
70 | 
71 |   def queryableStoreName: String =
72 |     inner.queryableStoreName
73 | }
74 | 


--------------------------------------------------------------------------------
/src/main/scala/com/lightbend/kafka/scala/streams/ScalaSerde.scala:
--------------------------------------------------------------------------------
 1 | // adopted from Openshine implementation
 2 | package com.lightbend.kafka.scala.streams
 3 | 
 4 | import org.apache.kafka.common.serialization.{Serde, Deserializer => JDeserializer, Serializer => JSerializer}
 5 | 
 6 | trait ScalaSerde[T] extends Serde[T] {
 7 |   override def deserializer(): JDeserializer[T]
 8 | 
 9 |   override def serializer(): JSerializer[T]
10 | 
11 |   override def configure(configs: java.util.Map[String, _], isKey: Boolean): Unit = ()
12 | 
13 |   override def close(): Unit = ()
14 | }
15 | 
16 | trait StatelessScalaSerde[T >: Null] extends Serde[T] with ScalaSerde[T] {
17 |   def serialize(data: T): Array[Byte]
18 |   def deserialize(data: Array[Byte]): Option[T]
19 | 
20 |   override def deserializer(): Deserializer[T] =
21 |     (data: Array[Byte]) => deserialize(data)
22 | 
23 |   override def serializer(): Serializer[T] =
24 |     (data: T) => serialize(data)
25 | }
26 | 
27 | trait Deserializer[T >: Null] extends JDeserializer[T] {
28 |   override def configure(configs: java.util.Map[String, _], isKey: Boolean): Unit = ()
29 | 
30 |   override def close(): Unit = ()
31 | 
32 |   override def deserialize(topic: String, data: Array[Byte]): T =
33 |     Option(data).flatMap(deserialize).orNull
34 | 
35 |   def deserialize(data: Array[Byte]): Option[T]
36 | }
37 | 
38 | trait Serializer[T] extends JSerializer[T] {
39 |   override def configure(configs: java.util.Map[String, _], isKey: Boolean): Unit = ()
40 | 
41 |   override def close(): Unit = ()
42 | 
43 |   override def serialize(topic: String, data: T): Array[Byte] =
44 |     Option(data).map(serialize).orNull
45 | 
46 |   def serialize(data: T): Array[Byte]
47 | }
48 | 


--------------------------------------------------------------------------------
/src/main/scala/com/lightbend/kafka/scala/streams/SessionWindowedKStreamS.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |   * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
 3 |   * Copyright 2017-2018 Alexis Seigneurin.
 4 |   */
 5 | package com.lightbend.kafka.scala.streams
 6 | 
 7 | import org.apache.kafka.streams.kstream._
 8 | import org.apache.kafka.streams.state.SessionStore
 9 | import org.apache.kafka.common.utils.Bytes
10 | import FunctionConversions._
11 | 
12 | import ImplicitConversions._
13 | 
14 | /**
15 |   * Wraps the Java class SessionWindowedKStream and delegates method calls to the underlying Java object.
16 |   */
17 | class SessionWindowedKStreamS[K, V](val inner: SessionWindowedKStream[K, V]) {
18 | 
19 |   def aggregate[VR](initializer: () => VR,
20 |                     aggregator: (K, V, VR) => VR,
21 |                     merger: (K, VR, VR) => VR): KTableS[Windowed[K], VR] =
22 |     inner.aggregate(initializer.asInitializer, aggregator.asAggregator, merger.asMerger)
23 | 
24 |   def aggregate[VR](initializer: () => VR,
25 |                     aggregator: (K, V, VR) => VR,
26 |                     merger: (K, VR, VR) => VR,
27 |                     materialized: Materialized[K, VR, SessionStore[Bytes, Array[Byte]]]): KTableS[Windowed[K], VR] =
28 |     inner.aggregate(initializer.asInitializer, aggregator.asAggregator, merger.asMerger, materialized)
29 | 
30 |   def count(): KTableS[Windowed[K], Long] = {
31 |     val c: KTableS[Windowed[K], java.lang.Long] = inner.count()
32 |     c.mapValues[Long](Long2long(_))
33 |   }
34 | 
35 |   def count(materialized: Materialized[K, Long, SessionStore[Bytes, Array[Byte]]]): KTableS[Windowed[K], Long] =
36 |     inner.count(materialized)
37 | 
38 |   def reduce(reducer: (V, V) => V): KTableS[Windowed[K], V] =
39 |     inner.reduce((v1, v2) => reducer(v1, v2))
40 | 
41 |   def reduce(reducer: (V, V) => V,
42 |              materialized: Materialized[K, V, SessionStore[Bytes, Array[Byte]]]): KTableS[Windowed[K], V] =
43 |     inner.reduce(reducer.asReducer, materialized)
44 | }
45 | 


--------------------------------------------------------------------------------
/src/main/scala/com/lightbend/kafka/scala/streams/StreamsBuilderS.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |   * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
 3 |   * Copyright 2017-2018 Alexis Seigneurin.
 4 |   */
 5 | package com.lightbend.kafka.scala.streams
 6 | 
 7 | import java.util.regex.Pattern
 8 | 
 9 | import com.lightbend.kafka.scala.streams.ImplicitConversions._
10 | import org.apache.kafka.common.utils.Bytes
11 | import org.apache.kafka.streams.kstream.{GlobalKTable, Materialized}
12 | import org.apache.kafka.streams.processor.{ProcessorSupplier, StateStore}
13 | import org.apache.kafka.streams.state.{KeyValueStore, StoreBuilder}
14 | import org.apache.kafka.streams.{Consumed, StreamsBuilder, Topology}
15 | 
16 | import scala.collection.JavaConverters._
17 | 
18 | /**
19 |   * Wraps the Java class StreamsBuilder and delegates method calls to the underlying Java object.
20 |   */
21 | class StreamsBuilderS(inner: StreamsBuilder = new StreamsBuilder) {
22 | 
23 |   def stream[K, V](topic: String)(implicit consumed: Consumed[K, V]): KStreamS[K, V] =
24 |     inner.stream[K, V](topic, consumed)
25 | 
26 |   def stream[K, V](topics: List[String])(implicit consumed: Consumed[K, V]): KStreamS[K, V] =
27 |     inner.stream[K, V](topics.asJava, consumed)
28 | 
29 |   def stream[K, V](topicPattern: Pattern)(implicit consumed: Consumed[K, V]): KStreamS[K, V] =
30 |     inner.stream[K, V](topicPattern, consumed)
31 | 
32 |   def table[K, V](topic: String)(implicit consumed: Consumed[K, V]): KTableS[K, V] =
33 |     inner.table[K, V](topic, consumed)
34 | 
35 |   def table[K, V](topic: String, materialized: Materialized[K, V, KeyValueStore[Bytes, Array[Byte]]])(
36 |     implicit consumed: Consumed[K, V]
37 |   ): KTableS[K, V] =
38 |     inner.table[K, V](topic, consumed, materialized)
39 | 
40 |   def globalTable[K, V](topic: String)(implicit consumed: Consumed[K, V]): GlobalKTable[K, V] =
41 |     inner.globalTable(topic, consumed)
42 | 
43 |   def globalTable[K, V](topic: String, materialized: Materialized[K, V, KeyValueStore[Bytes, Array[Byte]]])(
44 |     implicit consumed: Consumed[K, V]
45 |   ): GlobalKTable[K, V] =
46 |     inner.globalTable(topic, consumed, materialized)
47 | 
48 |   def addStateStore(builder: StoreBuilder[_ <: StateStore]): StreamsBuilder = inner.addStateStore(builder)
49 | 
50 |   def addGlobalStore(storeBuilder: StoreBuilder[_ <: StateStore],
51 |                      topic: String,
52 |                      sourceName: String,
53 |                      consumed: Consumed[_, _],
54 |                      processorName: String,
55 |                      stateUpdateSupplier: ProcessorSupplier[_, _]): StreamsBuilder =
56 |     inner.addGlobalStore(storeBuilder, topic, sourceName, consumed, processorName, stateUpdateSupplier)
57 | 
58 |   def build(): Topology = inner.build()
59 | }
60 | 


--------------------------------------------------------------------------------
/src/main/scala/com/lightbend/kafka/scala/streams/TimeWindowedKStreamS.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |   * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
 3 |   * Copyright 2017-2018 Alexis Seigneurin.
 4 |   */
 5 | package com.lightbend.kafka.scala.streams
 6 | 
 7 | import org.apache.kafka.streams.kstream._
 8 | import org.apache.kafka.streams.state.WindowStore
 9 | import org.apache.kafka.common.utils.Bytes
10 | import org.apache.kafka.common.serialization.Serde
11 | import ImplicitConversions._
12 | import FunctionConversions._
13 | 
14 | /**
15 |   * Wraps the Java class TimeWindowedKStream and delegates method calls to the underlying Java object.
16 |   */
17 | class TimeWindowedKStreamS[K, V](val inner: TimeWindowedKStream[K, V]) {
18 | 
19 |   def aggregate[VR](initializer: () => VR, aggregator: (K, V, VR) => VR): KTableS[Windowed[K], VR] =
20 |     inner.aggregate(initializer.asInitializer, aggregator.asAggregator)
21 | 
22 |   def aggregate[VR](initializer: () => VR,
23 |                     aggregator: (K, V, VR) => VR,
24 |                     materialized: Materialized[K, VR, WindowStore[Bytes, Array[Byte]]]): KTableS[Windowed[K], VR] =
25 |     inner.aggregate(initializer.asInitializer, aggregator.asAggregator, materialized)
26 | 
27 |   def count(): KTableS[Windowed[K], Long] = {
28 |     val c: KTableS[Windowed[K], java.lang.Long] = inner.count()
29 |     c.mapValues[Long](Long2long(_))
30 |   }
31 | 
32 |   def count(store: String, keySerde: Option[Serde[K]] = None): KTableS[Windowed[K], Long] = {
33 |     val materialized = {
34 |       val m = Materialized.as[K, java.lang.Long, WindowStore[Bytes, Array[Byte]]](store)
35 |       keySerde.foldLeft(m)((m, serde) => m.withKeySerde(serde))
36 |     }
37 |     val c: KTableS[Windowed[K], java.lang.Long] = inner.count(materialized)
38 |     c.mapValues[Long](Long2long(_))
39 |   }
40 | 
41 |   def reduce(reducer: (V, V) => V): KTableS[Windowed[K], V] =
42 |     inner.reduce(reducer.asReducer)
43 | 
44 |   def reduce(reducer: (V, V) => V,
45 |              materialized: Materialized[K, V, WindowStore[Bytes, Array[Byte]]]): KTableS[Windowed[K], V] =
46 |     inner.reduce(reducer.asReducer, materialized)
47 | }
48 | 


--------------------------------------------------------------------------------
/src/test/resources/log4j.properties:
--------------------------------------------------------------------------------
 1 | # Set root logger level to DEBUG and its only appender to A1.
 2 | log4j.rootLogger=ERROR, R
 3 | 
 4 | # A1 is set to be a ConsoleAppender.
 5 | log4j.appender.A1=org.apache.log4j.ConsoleAppender
 6 | 
 7 | log4j.appender.R=org.apache.log4j.RollingFileAppender
 8 | log4j.appender.R.File=logs/kafka-server.log
 9 | 
10 | log4j.appender.R.MaxFileSize=100KB
11 | # Keep one backup file
12 | log4j.appender.R.MaxBackupIndex=1
13 | 
14 | # A1 uses PatternLayout.
15 | log4j.appender.R.layout=org.apache.log4j.PatternLayout
16 | log4j.appender.R.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
17 | 


--------------------------------------------------------------------------------
/src/test/resources/logback.xml:
--------------------------------------------------------------------------------
 1 | <configuration>                                                                
 2 |     <appender name="FILE" class="ch.qos.logback.core.FileAppender">            
 3 |         <file>logs/kstream.log</file>                                  
 4 |         <append>true</append>                                                  
 5 |         <encoder>                                                              
 6 |             <pattern>%d{HH:mm:ss.SSS} TKD [%thread] %-5level %logger{36} - %msg%n</pattern>
 7 |         </encoder>                                                             
 8 |     </appender>                                                                
 9 | 
10 |     <appender name="ASYNC" class="ch.qos.logback.classic.AsyncAppender">       
11 |         <appender-ref ref="FILE" />                                            
12 |     </appender>                                                                
13 | 
14 |     <root level="INFO">                                                       
15 |         <appender-ref ref="ASYNC" />                                           
16 |     </root>                                                                    
17 | </configuration> 
18 | 
19 | 


--------------------------------------------------------------------------------
/src/test/scala/com/lightbend/kafka/scala/server/KafkaLocalServer.scala:
--------------------------------------------------------------------------------
  1 | /**
  2 |   * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
  3 |   */
  4 | package com.lightbend.kafka.scala.server
  5 | 
  6 | // Loosely based on Lagom implementation at
  7 | //  https://github.com/lagom/lagom/blob/master/dev/kafka-server/src/main/scala/com/lightbend/lagom/internal/kafka/KafkaLocalServer.scala
  8 | 
  9 | import java.io.{File, IOException}
 10 | import java.util.Properties
 11 | 
 12 | import org.apache.curator.test.TestingServer
 13 | import com.typesafe.scalalogging.LazyLogging
 14 | 
 15 | import kafka.server.{KafkaConfig, KafkaServerStartable}
 16 | 
 17 | import scala.util.{Failure, Success, Try}
 18 | 
 19 | import kafka.admin.{AdminUtils, RackAwareMode}
 20 | import kafka.utils.ZkUtils
 21 | 
 22 | class KafkaLocalServer private (kafkaProperties: Properties, zooKeeperServer: ZooKeeperLocalServer)
 23 |     extends LazyLogging {
 24 | 
 25 |   import KafkaLocalServer._
 26 | 
 27 |   private var broker = null.asInstanceOf[KafkaServerStartable] // scalastyle:ignore
 28 |   private var zkUtils: ZkUtils =
 29 |     ZkUtils.apply(s"localhost:${zooKeeperServer.getPort()}",
 30 |                   DEFAULT_ZK_SESSION_TIMEOUT_MS,
 31 |                   DEFAULT_ZK_CONNECTION_TIMEOUT_MS,
 32 |                   false)
 33 | 
 34 |   def start(): Unit = {
 35 | 
 36 |     broker = KafkaServerStartable.fromProps(kafkaProperties)
 37 |     broker.startup()
 38 |   }
 39 | 
 40 |   //scalastyle:off null
 41 |   def stop(): Unit =
 42 |     if (broker != null) {
 43 |       broker.shutdown()
 44 |       zooKeeperServer.stop()
 45 |       broker = null.asInstanceOf[KafkaServerStartable]
 46 |     }
 47 |   //scalastyle:on null
 48 | 
 49 |   /**
 50 |     * Create a Kafka topic with 1 partition and a replication factor of 1.
 51 |     *
 52 |     * @param topic The name of the topic.
 53 |     */
 54 |   def createTopic(topic: String): Unit =
 55 |     createTopic(topic, 1, 1, new Properties)
 56 | 
 57 |   /**
 58 |     * Create a Kafka topic with the given parameters.
 59 |     *
 60 |     * @param topic       The name of the topic.
 61 |     * @param partitions  The number of partitions for this topic.
 62 |     * @param replication The replication factor for (the partitions of) this topic.
 63 |     */
 64 |   def createTopic(topic: String, partitions: Int, replication: Int): Unit =
 65 |     createTopic(topic, partitions, replication, new Properties)
 66 | 
 67 |   /**
 68 |     * Create a Kafka topic with the given parameters.
 69 |     *
 70 |     * @param topic       The name of the topic.
 71 |     * @param partitions  The number of partitions for this topic.
 72 |     * @param replication The replication factor for (partitions of) this topic.
 73 |     * @param topicConfig Additional topic-level configuration settings.
 74 |     */
 75 |   def createTopic(topic: String, partitions: Int, replication: Int, topicConfig: Properties): Unit =
 76 |     AdminUtils.createTopic(zkUtils, topic, partitions, replication, topicConfig, RackAwareMode.Enforced)
 77 | 
 78 |   def deleteTopic(topic: String): Unit = AdminUtils.deleteTopic(zkUtils, topic)
 79 | }
 80 | 
 81 | import Utils._
 82 | 
 83 | object KafkaLocalServer extends LazyLogging {
 84 |   final val DefaultPort                        = 9092
 85 |   final val DefaultResetOnStart                = true
 86 |   private val DEFAULT_ZK_CONNECT               = "localhost:2181"
 87 |   private val DEFAULT_ZK_SESSION_TIMEOUT_MS    = 10 * 1000
 88 |   private val DEFAULT_ZK_CONNECTION_TIMEOUT_MS = 8 * 1000
 89 | 
 90 |   final val basDir = "tmp/"
 91 | 
 92 |   final private val kafkaDataFolderName = "kafka_data"
 93 | 
 94 |   def apply(cleanOnStart: Boolean, localStateDir: Option[String] = None): KafkaLocalServer =
 95 |     this(DefaultPort, ZooKeeperLocalServer.DefaultPort, cleanOnStart, localStateDir)
 96 | 
 97 |   def apply(kafkaPort: Int,
 98 |             zookeeperServerPort: Int,
 99 |             cleanOnStart: Boolean,
100 |             localStateDir: Option[String]): KafkaLocalServer = {
101 | 
102 |     // delete kafka data dir on clean start
103 |     val kafkaDataDir: File = (for {
104 |       kdir <- dataDirectory(basDir, kafkaDataFolderName)
105 |       _    <- if (cleanOnStart) deleteDirectory(kdir) else Try(())
106 |     } yield kdir) match {
107 |       case Success(d)  => d
108 |       case Failure(ex) => throw ex
109 |     }
110 | 
111 |     // delete kafka local state dir on clean start
112 |     localStateDir.foreach { d =>
113 |       for {
114 |         kdir <- dataDirectory("", d)
115 |         _    <- if (cleanOnStart) deleteDirectory(kdir) else Try(())
116 |       } yield (())
117 |     }
118 | 
119 |     logger.info(s"Kafka data directory is $kafkaDataDir.")
120 | 
121 |     val kafkaProperties = createKafkaProperties(kafkaPort, zookeeperServerPort, kafkaDataDir)
122 | 
123 |     val zk = new ZooKeeperLocalServer(zookeeperServerPort, cleanOnStart)
124 |     zk.start()
125 |     new KafkaLocalServer(kafkaProperties, zk)
126 |   }
127 | 
128 |   /**
129 |     * Creates a Properties instance for Kafka customized with values passed in argument.
130 |     */
131 |   private def createKafkaProperties(kafkaPort: Int, zookeeperServerPort: Int, dataDir: File): Properties = {
132 | 
133 |     // TODO: Probably should be externalized into properties. Was rushing this in
134 |     val kafkaProperties = new Properties
135 |     kafkaProperties.put(KafkaConfig.ListenersProp, s"PLAINTEXT://localhost:$kafkaPort")
136 |     kafkaProperties.put(KafkaConfig.ZkConnectProp, s"localhost:$zookeeperServerPort")
137 |     kafkaProperties.put(KafkaConfig.ZkConnectionTimeoutMsProp, "6000")
138 |     kafkaProperties.put(KafkaConfig.BrokerIdProp, "0")
139 |     kafkaProperties.put(KafkaConfig.NumNetworkThreadsProp, "3")
140 |     kafkaProperties.put(KafkaConfig.NumIoThreadsProp, "8")
141 |     kafkaProperties.put(KafkaConfig.SocketSendBufferBytesProp, "102400")
142 |     kafkaProperties.put(KafkaConfig.SocketReceiveBufferBytesProp, "102400")
143 |     kafkaProperties.put(KafkaConfig.SocketRequestMaxBytesProp, "104857600")
144 |     kafkaProperties.put(KafkaConfig.NumPartitionsProp, "1")
145 |     kafkaProperties.put(KafkaConfig.NumRecoveryThreadsPerDataDirProp, "1")
146 |     kafkaProperties.put(KafkaConfig.OffsetsTopicReplicationFactorProp, "1")
147 |     kafkaProperties.put(KafkaConfig.TransactionsTopicReplicationFactorProp, "1")
148 |     kafkaProperties.put(KafkaConfig.LogRetentionTimeHoursProp, "2")
149 |     kafkaProperties.put(KafkaConfig.LogSegmentBytesProp, "1073741824")
150 |     kafkaProperties.put(KafkaConfig.LogCleanupIntervalMsProp, "300000")
151 |     kafkaProperties.put(KafkaConfig.AutoCreateTopicsEnableProp, "true")
152 |     kafkaProperties.put(KafkaConfig.ControlledShutdownEnableProp, "true")
153 |     kafkaProperties.put(KafkaConfig.LogDirProp, dataDir.getAbsolutePath)
154 | 
155 |     kafkaProperties
156 |   }
157 | }
158 | 
159 | private class ZooKeeperLocalServer(port: Int, cleanOnStart: Boolean) extends LazyLogging {
160 | 
161 |   import KafkaLocalServer._
162 |   import ZooKeeperLocalServer._
163 | 
164 |   private var zooKeeper = null.asInstanceOf[TestingServer] // scalastyle:ignore
165 | 
166 |   def start(): Unit = {
167 |     // delete kafka data dir on clean start
168 |     val zookeeperDataDir: File = (for {
169 |       zdir <- dataDirectory(basDir, zookeeperDataFolderName)
170 |       _    <- if (cleanOnStart) deleteDirectory(zdir) else Try(())
171 |     } yield zdir) match {
172 |       case Success(d)  => d
173 |       case Failure(ex) => throw ex
174 |     }
175 |     logger.info(s"Zookeeper data directory is $zookeeperDataDir.")
176 | 
177 |     zooKeeper = new TestingServer(port, zookeeperDataDir, false)
178 | 
179 |     zooKeeper.start() // blocking operation
180 |   }
181 | 
182 |   // scalastyle:off null
183 |   def stop(): Unit =
184 |     if (zooKeeper != null)
185 |       try {
186 |         zooKeeper.stop()
187 |         zooKeeper = null.asInstanceOf[TestingServer]
188 |       } catch {
189 |         case _: IOException => () // nothing to do if an exception is thrown while shutting down
190 |       }
191 |   //scalastyle:on null
192 | 
193 |   def getPort(): Int = port
194 | }
195 | 
196 | object ZooKeeperLocalServer {
197 |   final val DefaultPort                     = 2181
198 |   final private val zookeeperDataFolderName = "zookeeper_data"
199 | }
200 | 


--------------------------------------------------------------------------------
/src/test/scala/com/lightbend/kafka/scala/server/MessageListener.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |   * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
 3 |   */
 4 | package com.lightbend.kafka.scala.server
 5 | 
 6 | import org.apache.kafka.clients.consumer.{ConsumerConfig, KafkaConsumer}
 7 | import org.apache.kafka.streams.KeyValue
 8 | import scala.collection.JavaConverters._
 9 | import scala.collection.mutable.ListBuffer
10 | 
11 | object MessageListener {
12 |   private val AUTO_COMMIT_INTERVAL_MS_CONFIG = "1000"  // Frequency of offset commits
13 |   private val SESSION_TIMEOUT_MS_CONFIG      = "30000" // The timeout used to detect failures - should be greater then processing time
14 |   private val MAX_POLL_RECORDS_CONFIG        = "50"    // Max number of records consumed in a single poll
15 | 
16 |   def consumerProperties(brokers: String,
17 |                          group: String,
18 |                          keyDeserializer: String,
19 |                          valueDeserializer: String): Map[String, AnyRef] =
20 |     Map[String, AnyRef](
21 |       ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG        -> brokers,
22 |       ConsumerConfig.GROUP_ID_CONFIG                 -> group,
23 |       ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG       -> "true",
24 |       ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG  -> AUTO_COMMIT_INTERVAL_MS_CONFIG,
25 |       ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG       -> SESSION_TIMEOUT_MS_CONFIG,
26 |       ConsumerConfig.MAX_POLL_RECORDS_CONFIG         -> MAX_POLL_RECORDS_CONFIG,
27 |       ConsumerConfig.AUTO_OFFSET_RESET_CONFIG        -> "latest",
28 |       ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG   -> keyDeserializer,
29 |       ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG -> valueDeserializer
30 |     )
31 | 
32 |   def apply[K, V](brokers: String,
33 |                   topic: String,
34 |                   group: String,
35 |                   keyDeserializer: String,
36 |                   valueDeserializer: String,
37 |                   processor: RecordProcessorTrait[K, V]): MessageListener[K, V] =
38 |     new MessageListener[K, V](brokers, topic, group, keyDeserializer, valueDeserializer, processor)
39 | }
40 | 
41 | class MessageListener[K, V](brokers: String,
42 |                             topic: String,
43 |                             group: String,
44 |                             keyDeserializer: String,
45 |                             valueDeserializer: String,
46 |                             processor: RecordProcessorTrait[K, V]) {
47 | 
48 |   import MessageListener._
49 | 
50 |   def readKeyValues(maxMessages: Int): List[KeyValue[K, V]] = {
51 |     val pollIntervalMs     = 100
52 |     val maxTotalPollTimeMs = 2000
53 |     var totalPollTimeMs    = 0
54 | 
55 |     val consumer =
56 |       new KafkaConsumer[K, V](consumerProperties(brokers, group, keyDeserializer, valueDeserializer).asJava)
57 |     consumer.subscribe(Seq(topic).asJava)
58 | 
59 |     val consumedValues = ListBuffer.empty[KeyValue[K, V]]
60 | 
61 |     while (totalPollTimeMs < maxTotalPollTimeMs && continueConsuming(consumedValues.size, maxMessages)) {
62 |       totalPollTimeMs = totalPollTimeMs + pollIntervalMs
63 |       val records = consumer.poll(pollIntervalMs)
64 |       records.asScala.foreach { record =>
65 |         processor.processRecord(record)
66 |         consumedValues += new KeyValue(record.key, record.value)
67 |       }
68 |     }
69 |     consumer.close()
70 |     consumedValues.toList
71 |   }
72 | 
73 |   def continueConsuming(messagesConsumed: Int, maxMessages: Int): Boolean =
74 |     maxMessages <= 0 || messagesConsumed < maxMessages
75 | 
76 |   def waitUntilMinKeyValueRecordsReceived(
77 |     expectedNumRecords: Int,
78 |     waitTime: Long,
79 |     startTime: Long = System.currentTimeMillis(),
80 |     accumData: ListBuffer[KeyValue[K, V]] = ListBuffer.empty[KeyValue[K, V]]
81 |   ): List[KeyValue[K, V]] = {
82 | 
83 |     val readData = readKeyValues(-1)
84 |     accumData ++= readData
85 | 
86 |     if (accumData.size >= expectedNumRecords) accumData.toList
87 |     else if (System.currentTimeMillis() > startTime + waitTime)
88 |       throw new AssertionError(
89 |         s"Expected $expectedNumRecords but received only ${accumData.size} records before timeout $waitTime ms"
90 |       )
91 |     else {
92 |       Thread.sleep(Math.min(waitTime, 1000L))
93 |       waitUntilMinKeyValueRecordsReceived(expectedNumRecords, waitTime, startTime, accumData)
94 |     }
95 |   }
96 | }
97 | 


--------------------------------------------------------------------------------
/src/test/scala/com/lightbend/kafka/scala/server/MessageSender.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |   * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
 3 |   */
 4 | package com.lightbend.kafka.scala.server
 5 | 
 6 | import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord, RecordMetadata}
 7 | import java.util.Properties
 8 | 
 9 | object MessageSender {
10 |   private val ACKS_CONFIG       = "all"  // Blocking on the full commit of the record
11 |   private val RETRIES_CONFIG    = "1"    // Number of retries on put
12 |   private val BATCH_SIZE_CONFIG = "1024" // Buffers for unsent records for each partition - controlls batching
13 |   private val LINGER_MS_CONFIG  = "1"    // Timeout for more records to arive - controlls batching
14 | 
15 |   private val BUFFER_MEMORY_CONFIG = "1024000" // Controls the total amount of memory available to the producer for buffering.
16 |   // If records are sent faster than they can be transmitted to the server then this
17 |   // buffer space will be exhausted. When the buffer space is exhausted additional
18 |   // send calls will block. The threshold for time to block is determined by max.block.ms
19 |   // after which it throws a TimeoutException.
20 | 
21 |   def providerProperties(brokers: String, keySerializer: String, valueSerializer: String): Properties = {
22 |     val props = new Properties
23 |     props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers)
24 |     props.put(ProducerConfig.ACKS_CONFIG, ACKS_CONFIG)
25 |     props.put(ProducerConfig.RETRIES_CONFIG, RETRIES_CONFIG)
26 |     props.put(ProducerConfig.BATCH_SIZE_CONFIG, BATCH_SIZE_CONFIG)
27 |     props.put(ProducerConfig.LINGER_MS_CONFIG, LINGER_MS_CONFIG)
28 |     props.put(ProducerConfig.BUFFER_MEMORY_CONFIG, BUFFER_MEMORY_CONFIG)
29 |     props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, keySerializer)
30 |     props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, valueSerializer)
31 |     props
32 |   }
33 | 
34 |   def apply[K, V](brokers: String, keySerializer: String, valueSerializer: String): MessageSender[K, V] =
35 |     new MessageSender[K, V](brokers, keySerializer, valueSerializer)
36 | }
37 | 
38 | class MessageSender[K, V](val brokers: String, val keySerializer: String, val valueSerializer: String) {
39 | 
40 |   import MessageSender._
41 |   val producer = new KafkaProducer[K, V](providerProperties(brokers, keySerializer, valueSerializer))
42 | 
43 |   def writeKeyValue(topic: String, key: K, value: V): Unit = {
44 |     val result = producer.send(new ProducerRecord[K, V](topic, key, value)).get
45 |     producer.flush()
46 |   }
47 | 
48 |   def writeValue(topic: String, value: V): Unit = {
49 |     val result = producer.send(new ProducerRecord[K, V](topic, null.asInstanceOf[K], value)).get // scalastyle:ignore
50 |     producer.flush()
51 |   }
52 | 
53 |   def batchWriteValue(topic: String, batch: Seq[V]): Seq[RecordMetadata] = {
54 |     val result = batch.map(value => producer.send(new ProducerRecord[K, V](topic, null.asInstanceOf[K], value)).get) // scalastyle:ignore
55 |     producer.flush()
56 |     result
57 |   }
58 | 
59 |   def close(): Unit =
60 |     producer.close()
61 | }
62 | 


--------------------------------------------------------------------------------
/src/test/scala/com/lightbend/kafka/scala/server/RecordProcessorTrait.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |   * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
 3 |   */
 4 | package com.lightbend.kafka.scala.server
 5 | 
 6 | import org.apache.kafka.clients.consumer.ConsumerRecord
 7 | 
 8 | // A trait, that should be implemented by any listener implementation
 9 | 
10 | trait RecordProcessorTrait[K, V] {
11 |   def processRecord(record: ConsumerRecord[K, V]): Unit
12 | }
13 | 


--------------------------------------------------------------------------------
/src/test/scala/com/lightbend/kafka/scala/server/Utils.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |   * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
 3 |   */
 4 | package com.lightbend.kafka.scala.server
 5 | 
 6 | import java.io.File
 7 | import java.nio.file.{FileVisitOption, Files, Paths}
 8 | import java.util.Comparator
 9 | 
10 | import scala.util.Try
11 | import scala.collection.JavaConverters._
12 | 
13 | object Utils {
14 |   def deleteDirectory(directory: File): Try[Unit] = Try {
15 |     if (directory.exists()) {
16 |       val rootPath = Paths.get(directory.getAbsolutePath)
17 | 
18 |       val files =
19 |         Files.walk(rootPath, FileVisitOption.FOLLOW_LINKS).sorted(Comparator.reverseOrder()).iterator().asScala
20 |       files.foreach(Files.delete)
21 |     }
22 |   }
23 | 
24 |   def dataDirectory(baseDir: String, directoryName: String): Try[File] = Try {
25 | 
26 |     val dataDirectory = new File(baseDir + directoryName)
27 | 
28 |     if (dataDirectory.exists() && !dataDirectory.isDirectory())
29 |       throw new IllegalArgumentException(
30 |         s"Cannot use $directoryName as a directory name because a file with that name already exists in $dataDirectory."
31 |       )
32 |     dataDirectory
33 |   }
34 | }
35 | 


--------------------------------------------------------------------------------
/src/test/scala/com/lightbend/kafka/scala/streams/KafkaStreamsMergeTest.scala:
--------------------------------------------------------------------------------
  1 | /**
  2 |   * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
  3 |   */
  4 | package com.lightbend.kafka.scala.streams
  5 | 
  6 | import java.util.Properties
  7 | import java.util.regex.Pattern
  8 | 
  9 | import com.lightbend.kafka.scala.server.{KafkaLocalServer, MessageListener, MessageSender, RecordProcessorTrait}
 10 | import minitest.TestSuite
 11 | import org.apache.kafka.clients.consumer.ConsumerRecord
 12 | import org.apache.kafka.common.serialization._
 13 | import org.apache.kafka.streams.{KafkaStreams, KeyValue, StreamsConfig}
 14 | import ImplicitConversions._
 15 | import com.typesafe.scalalogging.LazyLogging
 16 | 
 17 | object KafkaStreamsMergeTest extends TestSuite[KafkaLocalServer] with WordCountMergeTestData with LazyLogging {
 18 | 
 19 |   override def setup(): KafkaLocalServer = {
 20 |     val s = KafkaLocalServer(cleanOnStart = true, Some(localStateDir))
 21 |     s.start()
 22 |     s
 23 |   }
 24 | 
 25 |   override def tearDown(server: KafkaLocalServer): Unit =
 26 |     server.stop()
 27 | 
 28 |   test("should count words") { server =>
 29 |     server.createTopic(inputTopic1)
 30 |     server.createTopic(inputTopic2)
 31 |     server.createTopic(outputTopic)
 32 | 
 33 |     //
 34 |     // Step 1: Configure and start the processor topology.
 35 |     //
 36 |     import DefaultSerdes._
 37 | 
 38 |     val streamsConfiguration = new Properties()
 39 |     streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, s"wordcount-${scala.util.Random.nextInt(100)}")
 40 |     streamsConfiguration.put(StreamsConfig.CLIENT_ID_CONFIG, "wordcountgroup")
 41 | 
 42 |     streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, brokers)
 43 |     streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, localStateDir)
 44 | 
 45 |     val builder = new StreamsBuilderS()
 46 | 
 47 |     val textLines1 = builder.stream[String, String](inputTopic1)
 48 |     val textLines2 = builder.stream[String, String](inputTopic2)
 49 | 
 50 |     val textLines = textLines1.merge(textLines2)
 51 | 
 52 |     val pattern = Pattern.compile("\\W+", Pattern.UNICODE_CHARACTER_CLASS)
 53 | 
 54 |     val wordCounts: KTableS[String, Long] =
 55 |       textLines
 56 |         .flatMapValues(v => pattern.split(v.toLowerCase))
 57 |         .groupBy((k, v) => v)
 58 |         .count()
 59 | 
 60 |     wordCounts.toStream.to(outputTopic)
 61 | 
 62 |     val streams = new KafkaStreams(builder.build(), streamsConfiguration)
 63 |     streams.start()
 64 | 
 65 |     //
 66 |     // Step 2: Produce some input data to the input topics.
 67 |     //
 68 |     val sender =
 69 |       MessageSender[String, String](brokers, classOf[StringSerializer].getName, classOf[StringSerializer].getName)
 70 |     val mvals1 = sender.batchWriteValue(inputTopic1, inputValues)
 71 |     val mvals2 = sender.batchWriteValue(inputTopic2, inputValues)
 72 | 
 73 |     //
 74 |     // Step 3: Verify the application's output data.
 75 |     //
 76 |     val listener = MessageListener(brokers,
 77 |                                    outputTopic,
 78 |                                    "wordcountgroup",
 79 |                                    classOf[StringDeserializer].getName,
 80 |                                    classOf[LongDeserializer].getName,
 81 |                                    new RecordProcessor)
 82 | 
 83 |     val l = listener.waitUntilMinKeyValueRecordsReceived(expectedWordCounts.size, 30000)
 84 | 
 85 |     assertEquals(l.sortBy(_.key), expectedWordCounts.sortBy(_.key))
 86 | 
 87 |     streams.close()
 88 |   }
 89 | 
 90 |   class RecordProcessor extends RecordProcessorTrait[String, Long] {
 91 |     override def processRecord(record: ConsumerRecord[String, Long]): Unit = {
 92 |       // logger.info(s"Get Message $record")
 93 |     }
 94 |   }
 95 | 
 96 | }
 97 | 
 98 | trait WordCountMergeTestData {
 99 |   val inputTopic1   = s"inputTopic1.${scala.util.Random.nextInt(100)}"
100 |   val inputTopic2   = s"inputTopic2.${scala.util.Random.nextInt(100)}"
101 |   val outputTopic   = s"outputTpic.${scala.util.Random.nextInt(100)}"
102 |   val brokers       = "localhost:9092"
103 |   val localStateDir = "local_state_data"
104 | 
105 |   val inputValues = List(
106 |     "Hello Kafka Streams",
107 |     "All streams lead to Kafka",
108 |     "Join Kafka Summit",
109 |     "И теперь пошли русские слова"
110 |   )
111 | 
112 |   val expectedWordCounts: List[KeyValue[String, Long]] = List(
113 |     new KeyValue("hello", 2L),
114 |     new KeyValue("all", 2L),
115 |     new KeyValue("streams", 4L),
116 |     new KeyValue("lead", 2L),
117 |     new KeyValue("to", 2L),
118 |     new KeyValue("join", 2L),
119 |     new KeyValue("kafka", 6L),
120 |     new KeyValue("summit", 2L),
121 |     new KeyValue("и", 2L),
122 |     new KeyValue("теперь", 2L),
123 |     new KeyValue("пошли", 2L),
124 |     new KeyValue("русские", 2L),
125 |     new KeyValue("слова", 2L)
126 |   )
127 | }
128 | 


--------------------------------------------------------------------------------
/src/test/scala/com/lightbend/kafka/scala/streams/KafkaStreamsTest.scala:
--------------------------------------------------------------------------------
  1 | /**
  2 |   * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
  3 |   */
  4 | package com.lightbend.kafka.scala.streams
  5 | 
  6 | import java.util.Properties
  7 | import java.util.regex.Pattern
  8 | 
  9 | import com.lightbend.kafka.scala.server.{KafkaLocalServer, MessageListener, MessageSender, RecordProcessorTrait}
 10 | import minitest.TestSuite
 11 | import org.apache.kafka.clients.consumer.ConsumerRecord
 12 | import org.apache.kafka.common.serialization._
 13 | import org.apache.kafka.streams.{KafkaStreams, KeyValue, StreamsConfig}
 14 | import ImplicitConversions._
 15 | import com.typesafe.scalalogging.LazyLogging
 16 | 
 17 | object KafkaStreamsTest extends TestSuite[KafkaLocalServer] with WordCountTestData with LazyLogging {
 18 | 
 19 |   override def setup(): KafkaLocalServer = {
 20 |     val s = KafkaLocalServer(cleanOnStart = true, Some(localStateDir))
 21 |     s.start()
 22 |     s
 23 |   }
 24 | 
 25 |   override def tearDown(server: KafkaLocalServer): Unit =
 26 |     server.stop()
 27 | 
 28 |   test("should count words") { server =>
 29 |     server.createTopic(inputTopic)
 30 |     server.createTopic(outputTopic)
 31 | 
 32 |     //
 33 |     // Step 1: Configure and start the processor topology.
 34 |     //
 35 |     import DefaultSerdes._
 36 | 
 37 |     val streamsConfiguration = new Properties()
 38 |     streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, s"wordcount-${scala.util.Random.nextInt(100)}")
 39 |     streamsConfiguration.put(StreamsConfig.CLIENT_ID_CONFIG, "wordcountgroup")
 40 | 
 41 |     streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, brokers)
 42 |     streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, localStateDir)
 43 | 
 44 |     val builder = new StreamsBuilderS()
 45 | 
 46 |     val textLines = builder.stream[String, String](inputTopic)
 47 | 
 48 |     val pattern = Pattern.compile("\\W+", Pattern.UNICODE_CHARACTER_CLASS)
 49 | 
 50 |     val wordCounts: KTableS[String, Long] =
 51 |       textLines
 52 |         .flatMapValues(v => pattern.split(v.toLowerCase))
 53 |         .groupBy((k, v) => v)
 54 |         .count()
 55 | 
 56 |     wordCounts.toStream.to(outputTopic)
 57 | 
 58 |     val streams = new KafkaStreams(builder.build(), streamsConfiguration)
 59 |     streams.start()
 60 | 
 61 |     //
 62 |     // Step 2: Produce some input data to the input topic.
 63 |     //
 64 |     val sender =
 65 |       MessageSender[String, String](brokers, classOf[StringSerializer].getName, classOf[StringSerializer].getName)
 66 |     val mvals = sender.batchWriteValue(inputTopic, inputValues)
 67 | 
 68 |     //
 69 |     // Step 3: Verify the application's output data.
 70 |     //
 71 |     val listener = MessageListener(brokers,
 72 |                                    outputTopic,
 73 |                                    "wordcountgroup",
 74 |                                    classOf[StringDeserializer].getName,
 75 |                                    classOf[LongDeserializer].getName,
 76 |                                    new RecordProcessor)
 77 | 
 78 |     val l = listener.waitUntilMinKeyValueRecordsReceived(expectedWordCounts.size, 30000)
 79 | 
 80 |     assertEquals(l.sortBy(_.key), expectedWordCounts.sortBy(_.key))
 81 | 
 82 |     streams.close()
 83 |   }
 84 | 
 85 |   class RecordProcessor extends RecordProcessorTrait[String, Long] {
 86 |     override def processRecord(record: ConsumerRecord[String, Long]): Unit = {
 87 |       // logger.info(s"Get Message $record")
 88 |     }
 89 |   }
 90 | 
 91 | }
 92 | 
 93 | trait WordCountTestData {
 94 |   val inputTopic    = s"inputTopic.${scala.util.Random.nextInt(100)}"
 95 |   val outputTopic   = s"outputTopic.${scala.util.Random.nextInt(100)}"
 96 |   val brokers       = "localhost:9092"
 97 |   val localStateDir = "local_state_data"
 98 | 
 99 |   val inputValues = List(
100 |     "Hello Kafka Streams",
101 |     "All streams lead to Kafka",
102 |     "Join Kafka Summit",
103 |     "И теперь пошли русские слова"
104 |   )
105 | 
106 |   val expectedWordCounts: List[KeyValue[String, Long]] = List(
107 |     new KeyValue("hello", 1L),
108 |     new KeyValue("all", 1L),
109 |     new KeyValue("streams", 2L),
110 |     new KeyValue("lead", 1L),
111 |     new KeyValue("to", 1L),
112 |     new KeyValue("join", 1L),
113 |     new KeyValue("kafka", 3L),
114 |     new KeyValue("summit", 1L),
115 |     new KeyValue("и", 1L),
116 |     new KeyValue("теперь", 1L),
117 |     new KeyValue("пошли", 1L),
118 |     new KeyValue("русские", 1L),
119 |     new KeyValue("слова", 1L)
120 |   )
121 | }
122 | 


--------------------------------------------------------------------------------
/src/test/scala/com/lightbend/kafka/scala/streams/ProbabilisticCountingScalaIntegrationTest.scala:
--------------------------------------------------------------------------------
  1 | /**
  2 |   * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
  3 |   * Adapted from Confluent Inc. whose copyright is reproduced below.
  4 |   */
  5 | /*
  6 |  * Copyright Confluent Inc.
  7 |  *
  8 |  * Licensed under the Apache License, Version 2.0 (the "License");
  9 |  * you may not use this file except in compliance with the License.
 10 |  * You may obtain a copy of the License at
 11 |  *
 12 |  *    http://www.apache.org/licenses/LICENSE-2.0
 13 |  *
 14 |  * Unless required by applicable law or agreed to in writing, software
 15 |  * distributed under the License is distributed on an "AS IS" BASIS,
 16 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 17 |  * See the License for the specific language governing permissions and
 18 |  * limitations under the License.
 19 |  */
 20 | package com.lightbend.kafka.scala.streams
 21 | 
 22 | import java.util.Properties
 23 | 
 24 | import com.lightbend.kafka.scala.server.{KafkaLocalServer, MessageListener, MessageSender, RecordProcessorTrait}
 25 | import com.lightbend.kafka.scala.streams.algebird.{CMSStore, CMSStoreBuilder}
 26 | import minitest.TestSuite
 27 | import org.apache.kafka.clients.consumer.ConsumerRecord
 28 | import org.apache.kafka.common.serialization._
 29 | import org.apache.kafka.streams.kstream.Transformer
 30 | import org.apache.kafka.streams.processor.ProcessorContext
 31 | import org.apache.kafka.streams.{KafkaStreams, KeyValue, StreamsConfig}
 32 | import ImplicitConversions._
 33 | import com.typesafe.scalalogging.LazyLogging
 34 | 
 35 | /**
 36 |   * End-to-end integration test that demonstrates how to probabilistically count items in an input stream.
 37 |   *
 38 |   * This example uses a custom state store implementation, [[CMSStore]], that is backed by a
 39 |   * Count-Min Sketch data structure.
 40 |   */
 41 | trait ProbabilisticCountingScalaIntegrationTestData extends LazyLogging {
 42 |   val brokers       = "localhost:9092"
 43 |   val inputTopic    = s"inputTopic.${scala.util.Random.nextInt(100)}"
 44 |   val outputTopic   = s"output-topic.${scala.util.Random.nextInt(100)}"
 45 |   val localStateDir = "local_state_data"
 46 | 
 47 |   val inputTextLines: Seq[String] = Seq(
 48 |     "Hello Kafka Streams",
 49 |     "All streams lead to Kafka",
 50 |     "Join Kafka Summit"
 51 |   )
 52 | 
 53 |   val expectedWordCounts: Seq[KeyValue[String, Long]] = Seq(
 54 |     ("hello", 1L),
 55 |     ("kafka", 1L),
 56 |     ("streams", 1L),
 57 |     ("all", 1L),
 58 |     ("streams", 2L),
 59 |     ("lead", 1L),
 60 |     ("to", 1L),
 61 |     ("kafka", 2L),
 62 |     ("join", 1L),
 63 |     ("kafka", 3L),
 64 |     ("summit", 1L)
 65 |   )
 66 | }
 67 | 
 68 | object ProbabilisticCountingScalaIntegrationTest
 69 |     extends TestSuite[KafkaLocalServer]
 70 |     with ProbabilisticCountingScalaIntegrationTestData {
 71 | 
 72 |   override def setup(): KafkaLocalServer = {
 73 |     val s = KafkaLocalServer(cleanOnStart = true, Some(localStateDir))
 74 |     s.start()
 75 |     s
 76 |   }
 77 | 
 78 |   override def tearDown(server: KafkaLocalServer): Unit =
 79 |     server.stop()
 80 | 
 81 |   test("shouldProbabilisticallyCountWords") { server =>
 82 |     server.createTopic(inputTopic)
 83 |     server.createTopic(outputTopic)
 84 | 
 85 |     //
 86 |     // Step 1: Configure and start the processor topology.
 87 |     //
 88 |     val streamsConfiguration: Properties = {
 89 |       val p = new Properties()
 90 |       p.put(StreamsConfig.APPLICATION_ID_CONFIG,
 91 |             s"probabilistic-counting-scala-integration-test-${scala.util.Random.nextInt(100)}")
 92 |       p.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, brokers)
 93 |       p.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.ByteArray.getClass.getName)
 94 |       p.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String.getClass.getName)
 95 |       p.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, "10000")
 96 |       p.put(StreamsConfig.STATE_DIR_CONFIG, localStateDir)
 97 |       p
 98 |     }
 99 | 
100 |     val builder = new StreamsBuilderS()
101 | 
102 |     val cmsStoreName = "cms-store"
103 |     val cmsStoreBuilder = {
104 |       val changelogConfig: java.util.HashMap[String, String] = {
105 |         val cfg              = new java.util.HashMap[String, String]
106 |         val segmentSizeBytes = (20 * 1024 * 1024).toString
107 |         cfg.put("segment.bytes", segmentSizeBytes)
108 |         cfg
109 |       }
110 |       new CMSStoreBuilder[String](cmsStoreName, Serdes.String())
111 |         .withLoggingEnabled(changelogConfig)
112 |     }
113 |     builder.addStateStore(cmsStoreBuilder)
114 | 
115 |     class ProbabilisticCounter extends Transformer[Array[Byte], String, (String, Long)] {
116 | 
117 |       private var cmsState: CMSStore[String]         = _
118 |       private var processorContext: ProcessorContext = _
119 | 
120 |       override def init(processorContext: ProcessorContext): Unit = {
121 |         this.processorContext = processorContext
122 |         cmsState = this.processorContext.getStateStore(cmsStoreName).asInstanceOf[CMSStore[String]]
123 |       }
124 | 
125 |       override def transform(key: Array[Byte], value: String): (String, Long) = {
126 |         // Count the record value, think: "+ 1"
127 |         cmsState.put(value, this.processorContext.timestamp())
128 | 
129 |         // In this example: emit the latest count estimate for the record value.  We could also do
130 |         // something different, e.g. periodically output the latest heavy hitters via `punctuate`.
131 |         (value, cmsState.get(value))
132 |       }
133 | 
134 |       //scalastyle:off null
135 |       override def punctuate(l: Long): (String, Long) = null
136 |       //scalastyle:on null
137 |       override def close(): Unit = {}
138 |     }
139 | 
140 |     implicit val stringSerde: Serde[String]         = Serdes.String()
141 |     implicit val byteArraySerde: Serde[Array[Byte]] = Serdes.ByteArray()
142 |     implicit val longSerde: Serde[Long]             = Serdes.Long().asInstanceOf[Serde[Long]]
143 | 
144 |     // Read the input from Kafka.
145 |     val textLines: KStreamS[Array[Byte], String] = builder.stream(inputTopic)
146 | 
147 |     textLines
148 |       .flatMapValues(value => value.toLowerCase.split("\\W+").toIterable)
149 |       .transform(() => new ProbabilisticCounter, cmsStoreName)
150 |       .to(outputTopic)
151 | 
152 |     val streams: KafkaStreams = new KafkaStreams(builder.build(), streamsConfiguration)
153 |     streams.start()
154 | 
155 |     // Step 2: Publish some input text lines.
156 |     val sender =
157 |       MessageSender[String, String](brokers, classOf[StringSerializer].getName, classOf[StringSerializer].getName)
158 |     sender.batchWriteValue(inputTopic, inputTextLines)
159 |     // Step 3: Verify the application's output data.
160 | 
161 |     val listener = MessageListener(brokers,
162 |                                    outputTopic,
163 |                                    "probwordcountgroup",
164 |                                    classOf[StringDeserializer].getName,
165 |                                    classOf[LongDeserializer].getName,
166 |                                    new RecordProcessor)
167 | 
168 |     val l = listener.waitUntilMinKeyValueRecordsReceived(expectedWordCounts.size, 30000)
169 | 
170 |     assertEquals(l.sortBy(_.key), expectedWordCounts.sortBy(_.key))
171 |     streams.close()
172 |   }
173 | 
174 |   class RecordProcessor extends RecordProcessorTrait[String, Long] {
175 |     override def processRecord(record: ConsumerRecord[String, Long]): Unit = {
176 |       // logger.info(s"Get Message $record")
177 |     }
178 |   }
179 | 
180 | }
181 | 


--------------------------------------------------------------------------------
/src/test/scala/com/lightbend/kafka/scala/streams/PunctuateTest.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |   * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
 3 |   */
 4 | package com.lightbend.kafka.scala.streams
 5 | 
 6 | import java.util.Properties
 7 | 
 8 | import com.lightbend.kafka.scala.server.{KafkaLocalServer, MessageSender}
 9 | import com.typesafe.scalalogging.LazyLogging
10 | import minitest.TestSuite
11 | import org.apache.kafka.common.serialization._
12 | import org.apache.kafka.streams.processor.{AbstractProcessor, ProcessorContext, PunctuationType}
13 | import org.apache.kafka.streams.{KafkaStreams, StreamsConfig, Topology}
14 | 
15 | /**
16 |   * This sample is using usage of punctuate, which is significantly changed in version 1.0 and
17 |   * Kafka Streams Processor APIs (https://kafka.apache.org/10/documentation/streams/developer-guide/processor-api.html)
18 |   * This code is based on the article "Problems With Kafka Streams:
19 |   * The Saga Continues" (https://dzone.com/articles/problems-with-kafka-streams-the-saga-continues)
20 |   */
21 | object PunctuateTest extends TestSuite[KafkaLocalServer] with PunctuateTestData with LazyLogging {
22 | 
23 |   override def setup(): KafkaLocalServer = {
24 |     val s = KafkaLocalServer(cleanOnStart = true, Some(localStateDir))
25 |     s.start()
26 |     s
27 |   }
28 | 
29 |   override def tearDown(server: KafkaLocalServer): Unit =
30 |     server.stop()
31 | 
32 |   test("should punctuate execution") { server =>
33 |     server.createTopic(inputTopic)
34 | 
35 |     //
36 |     // Step 1: Configure and start the processor topology.
37 |     //
38 | 
39 |     val streamsConfiguration = new Properties()
40 |     streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, s"punctuate-${scala.util.Random.nextInt(100)}")
41 |     streamsConfiguration.put(StreamsConfig.CLIENT_ID_CONFIG, "punctuategroup")
42 | 
43 |     streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, brokers)
44 |     streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName())
45 |     streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName())
46 | 
47 |     val topology = new Topology
48 |     // Data input streams
49 |     topology.addSource("data", inputTopic)
50 |     // Processors
51 |     topology.addProcessor("data processor", () => new SampleProcessor(5000), "data")
52 |     val streams = new KafkaStreams(topology, streamsConfiguration)
53 |     streams.start()
54 |     // Allpw time for the streams to start up
55 |     Thread.sleep(5000L)
56 | 
57 |     //
58 |     // Step 2: Produce some input data to the input topic.
59 |     //
60 |     val sender =
61 |       MessageSender[String, String](brokers, classOf[StringSerializer].getName, classOf[StringSerializer].getName)
62 |     for (i <- 0 to 15) {
63 |       sender.writeValue(inputTopic, i.toString)
64 |       Thread.sleep(1000L) // sleep for 1 sec
65 |     }
66 | 
67 |     // End test
68 |     Thread.sleep(5000L) // sleep for 10 sec
69 |     streams.close()
70 |   }
71 | 
72 |   class SampleProcessor(punctuateTime: Long) extends AbstractProcessor[String, String] {
73 | 
74 |     var ctx: ProcessorContext = _
75 |     var message               = ""
76 | 
77 |     override def init(context: ProcessorContext): Unit = {
78 |       ctx = context
79 |       ctx.schedule(punctuateTime,
80 |                    PunctuationType.STREAM_TIME,
81 |                    (timestamp: Long) => logger.info(s"Punctuator called at $timestamp, current message $message"))
82 |     }
83 | 
84 |     override def process(key: String, value: String): Unit = {
85 |       logger.info(s"Processing new message $value")
86 |       message = value
87 |     }
88 |   }
89 | }
90 | 
91 | trait PunctuateTestData {
92 |   val inputTopic    = s"inputTopic.${scala.util.Random.nextInt(100)}"
93 |   val outputTopic   = s"outputTopic.${scala.util.Random.nextInt(100)}"
94 |   val brokers       = "localhost:9092"
95 |   val localStateDir = "local_state_data"
96 | }
97 | 


--------------------------------------------------------------------------------
/src/test/scala/com/lightbend/kafka/scala/streams/StreamToTableJoinScalaIntegrationTestImplicitSerdes.scala:
--------------------------------------------------------------------------------
  1 | /**
  2 |   * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
  3 |   * Adapted from Confluent Inc. whose copyright is reproduced below.
  4 |   */
  5 | /*
  6 |  * Copyright Confluent Inc.
  7 |  *
  8 |  * Licensed under the Apache License, Version 2.0 (the "License");
  9 |  * you may not use this file except in compliance with the License.
 10 |  * You may obtain a copy of the License at
 11 |  *
 12 |  *    http://www.apache.org/licenses/LICENSE-2.0
 13 |  *
 14 |  * Unless required by applicable law or agreed to in writing, software
 15 |  * distributed under the License is distributed on an "AS IS" BASIS,
 16 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 17 |  * See the License for the specific language governing permissions and
 18 |  * limitations under the License.
 19 |  */
 20 | package com.lightbend.kafka.scala.streams
 21 | 
 22 | import java.util.Properties
 23 | 
 24 | import minitest.TestSuite
 25 | import com.lightbend.kafka.scala.server.{KafkaLocalServer, MessageListener, MessageSender, RecordProcessorTrait}
 26 | import org.apache.kafka.common.serialization._
 27 | import org.apache.kafka.streams._
 28 | import org.apache.kafka.clients.consumer.ConsumerRecord
 29 | import ImplicitConversions._
 30 | import com.typesafe.scalalogging.LazyLogging
 31 | 
 32 | /**
 33 |   * End-to-end integration test that demonstrates how to perform a join between a KStream and a
 34 |   * KTable (think: KStream.leftJoin(KTable)), i.e. an example of a stateful computation.
 35 |   *
 36 |   * See StreamToTableJoinIntegrationTest for the equivalent Java example.
 37 |   *
 38 |   * Note: We intentionally use JUnit4 (wrapped by ScalaTest) for implementing this Scala integration
 39 |   * test so it is easier to compare this Scala code with the equivalent Java code at
 40 |   * StreamToTableJoinIntegrationTest.  One difference is that, to simplify the Scala/Junit integration, we
 41 |   * switched from BeforeClass (which must be `static`) to Before as well as from @ClassRule (which
 42 |   * must be `static` and `public`) to a workaround combination of `@Rule def` and a `private val`.
 43 |   */
 44 | object StreamToTableJoinScalaIntegrationTestImplicitSerdes
 45 |     extends TestSuite[KafkaLocalServer]
 46 |     with StreamToTableJoinTestData
 47 |     with LazyLogging {
 48 | 
 49 |   override def setup(): KafkaLocalServer = {
 50 |     val s = KafkaLocalServer(cleanOnStart = true, Some(localStateDir))
 51 |     s.start()
 52 |     s
 53 |   }
 54 | 
 55 |   override def tearDown(server: KafkaLocalServer): Unit =
 56 |     server.stop()
 57 | 
 58 |   test("should count clicks per region") { server =>
 59 |     server.createTopic(userClicksTopic)
 60 |     server.createTopic(userRegionsTopic)
 61 |     server.createTopic(outputTopic)
 62 | 
 63 |     //
 64 |     // Step 1: Configure and start the processor topology.
 65 |     //
 66 |     // DefaultSerdes brings into scope implicit serdes (mostly for primitives) that will set up all Serialized, Produced,
 67 |     // Consumed and Joined instances. So all APIs below that accept Serialized, Produced, Consumed or Joined will
 68 |     // get these instances automatically
 69 |     import DefaultSerdes._
 70 | 
 71 |     // we don't have any serde declared as part of configuration. Even if they are declared here, the
 72 |     // Scala APIs will ignore them. But it's possible to declare serdes here and use them through
 73 |     // Java APIs
 74 |     val streamsConfiguration: Properties = {
 75 |       val p = new Properties()
 76 |       p.put(StreamsConfig.APPLICATION_ID_CONFIG,
 77 |             s"stream-table-join-scala-integration-test-implicit-ser-${scala.util.Random.nextInt(100)}")
 78 |       p.put(StreamsConfig.CLIENT_ID_CONFIG, "join-scala-integration-test-implicit-ser-standard-consumer")
 79 |       p.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, brokers)
 80 |       p.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, "100")
 81 |       p.put(StreamsConfig.STATE_DIR_CONFIG, localStateDir)
 82 |       p
 83 |     }
 84 | 
 85 |     val builder = new StreamsBuilderS()
 86 | 
 87 |     val userClicksStream: KStreamS[String, Long] = builder.stream(userClicksTopic)
 88 | 
 89 |     val userRegionsTable: KTableS[String, String] = builder.table(userRegionsTopic)
 90 | 
 91 |     // Compute the total per region by summing the individual click counts per region.
 92 |     val clicksPerRegion: KTableS[String, Long] =
 93 |       userClicksStream
 94 | 
 95 |       // Join the stream against the table.
 96 |         .leftJoin(userRegionsTable,
 97 |                   (clicks: Long, region: String) => (if (region == null) "UNKNOWN" else region, clicks))
 98 | 
 99 |         // Change the stream from <user> -> <region, clicks> to <region> -> <clicks>
100 |         .map((_, regionWithClicks) => regionWithClicks)
101 | 
102 |         // Compute the total per region by summing the individual click counts per region.
103 |         .groupByKey
104 |         .reduce(_ + _)
105 | 
106 |     // Write the (continuously updating) results to the output topic.
107 |     clicksPerRegion.toStream.to(outputTopic)
108 | 
109 |     val streams: KafkaStreams = new KafkaStreams(builder.build(), streamsConfiguration)
110 | 
111 |     streams.setUncaughtExceptionHandler(
112 |       (_: Thread, e: Throwable) =>
113 |         try {
114 |           logger.error(s"Stream terminated because of uncaught exception .. Shutting down app", e)
115 |           e.printStackTrace()
116 |           val closed: Unit = streams.close()
117 |           logger.info(s"Exiting application after streams close ($closed)")
118 |         } catch {
119 |           case x: Exception => x.printStackTrace()
120 |         } finally {
121 |           logger.debug("Exiting application ..")
122 |           System.exit(-1)
123 |       }
124 |     )
125 | 
126 |     streams.start()
127 | 
128 |     //
129 |     // Step 2: Publish user-region information.
130 |     //
131 |     // To keep this code example simple and easier to understand/reason about, we publish all
132 |     // user-region records before any user-click records (cf. step 3).  In practice though,
133 |     // data records would typically be arriving concurrently in both input streams/topics.
134 |     //
135 |     val sender1 =
136 |       MessageSender[String, String](brokers, classOf[StringSerializer].getName, classOf[StringSerializer].getName)
137 |     userRegions.foreach(r => sender1.writeKeyValue(userRegionsTopic, r.key, r.value))
138 | 
139 |     //
140 |     // Step 3: Publish some user click events.
141 |     //
142 |     val sender2 =
143 |       MessageSender[String, Long](brokers, classOf[StringSerializer].getName, classOf[LongSerializer].getName)
144 |     userClicks.foreach(r => sender2.writeKeyValue(userClicksTopic, r.key, r.value))
145 | 
146 |     //
147 |     // Step 4: Verify the application's output data.
148 |     //
149 |     val listener = MessageListener(
150 |       brokers,
151 |       outputTopic,
152 |       "join-scala-integration-test-standard-consumer",
153 |       classOf[StringDeserializer].getName,
154 |       classOf[LongDeserializer].getName,
155 |       new RecordProcessor
156 |     )
157 | 
158 |     val l = listener.waitUntilMinKeyValueRecordsReceived(expectedClicksPerRegion.size, 30000) // scalastyle:ignore
159 |     streams.close()
160 |     assertEquals(l.sortBy(_.key), expectedClicksPerRegion.sortBy(_.key))
161 |   }
162 | 
163 |   class RecordProcessor extends RecordProcessorTrait[String, Long] {
164 |     override def processRecord(record: ConsumerRecord[String, Long]): Unit = {
165 |       //logger.info(s"Get Message $record")
166 |     }
167 |   }
168 | }
169 | 


--------------------------------------------------------------------------------
/src/test/scala/com/lightbend/kafka/scala/streams/StreamToTableJoinScalaIntegrationTestImplicitSerdesWithAvro.scala:
--------------------------------------------------------------------------------
  1 | /**
  2 |   * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
  3 |   * Adapted from Confluent Inc. whose copyright is reproduced below.
  4 |   */
  5 | /*
  6 |  * Copyright Confluent Inc.
  7 |  *
  8 |  * Licensed under the Apache License, Version 2.0 (the "License");
  9 |  * you may not use this file except in compliance with the License.
 10 |  * You may obtain a copy of the License at
 11 |  *
 12 |  *    http://www.apache.org/licenses/LICENSE-2.0
 13 |  *
 14 |  * Unless required by applicable law or agreed to in writing, software
 15 |  * distributed under the License is distributed on an "AS IS" BASIS,
 16 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 17 |  * See the License for the specific language governing permissions and
 18 |  * limitations under the License.
 19 |  */
 20 | package com.lightbend.kafka.scala.streams
 21 | 
 22 | import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
 23 | import java.util.Properties
 24 | 
 25 | import com.lightbend.kafka.scala.server.{KafkaLocalServer, MessageListener, MessageSender, RecordProcessorTrait}
 26 | import com.sksamuel.avro4s._
 27 | import minitest.TestSuite
 28 | import org.apache.kafka.clients.consumer.ConsumerRecord
 29 | import org.apache.kafka.common.serialization._
 30 | import org.apache.kafka.streams._
 31 | import ImplicitConversions._
 32 | 
 33 | object StreamToTableJoinScalaIntegrationTestImplicitSerdesWithAvro
 34 |     extends TestSuite[KafkaLocalServer]
 35 |     with StreamToTableJoinTestData {
 36 | 
 37 |   case class UserClicks(clicks: Long)
 38 | 
 39 |   // adopted from Openshine implementation
 40 |   class AvroSerde[T >: Null: SchemaFor: FromRecord: ToRecord] extends StatelessScalaSerde[T] {
 41 | 
 42 |     override def serialize(data: T): Array[Byte] = {
 43 |       val baos   = new ByteArrayOutputStream()
 44 |       val output = AvroOutputStream.binary[T](baos)
 45 |       output.write(data)
 46 |       output.close()
 47 |       baos.toByteArray
 48 |     }
 49 | 
 50 |     override def deserialize(data: Array[Byte]): Option[T] = {
 51 |       val in    = new ByteArrayInputStream(data)
 52 |       val input = AvroInputStream.binary[T](in)
 53 |       input.iterator.toSeq.headOption
 54 |     }
 55 |   }
 56 | 
 57 |   /** Our implicit Serde implementation for the values we want to serialize
 58 |     * as avro
 59 |     */
 60 |   implicit val userClicksSerde: Serde[UserClicks] = new AvroSerde
 61 | 
 62 |   /**
 63 |     * End-to-end integration test that demonstrates how to perform a join
 64 |     * between a KStream and a
 65 |     * KTable (think: KStream.leftJoin(KTable)), i.e. an example of a stateful
 66 |     * computation.
 67 |     *
 68 |     * See StreamToTableJoinIntegrationTest for the equivalent Java example.
 69 |     *
 70 |     * Note: We intentionally use JUnit4 (wrapped by ScalaTest) for
 71 |     * implementing this Scala integration
 72 |     * test so it is easier to compare this Scala code with the equivalent
 73 |     * Java code at
 74 |     * StreamToTableJoinIntegrationTest.  One difference is that, to simplify
 75 |     * the Scala/Junit integration, we
 76 |     * switched from BeforeClass (which must be `static`) to Before as well as
 77 |     * from @ClassRule (which
 78 |     * must be `static` and `public`) to a workaround combination of `@Rule
 79 |     * def` and a `private val`.
 80 |     */
 81 |   override def setup(): KafkaLocalServer = {
 82 |     val s = KafkaLocalServer(true, Some(localStateDir))
 83 |     s.start()
 84 |     s
 85 |   }
 86 | 
 87 |   override def tearDown(server: KafkaLocalServer): Unit =
 88 |     server.stop()
 89 | 
 90 |   test("should count clicks per region") { server =>
 91 |     server.createTopic(userClicksTopic)
 92 |     server.createTopic(userRegionsTopic)
 93 |     server.createTopic(outputTopic)
 94 | 
 95 |     // DefaultSerdes brings into scope implicit serdes (mostly for primitives) that will set up all Serialized, Produced,
 96 |     // Consumed and Joined instances. So all APIs below that accept Serialized, Produced, Consumed or Joined will
 97 |     // get these instances automatically
 98 |     import DefaultSerdes._
 99 | 
100 |     //
101 |     // Step 1: Configure and start the processor topology.
102 |     //
103 |     // we don't have any serde declared as part of configuration. Even if they are declared here, the
104 |     // Scala APIs will ignore them. But it's possible to declare serdes here and use them through
105 |     // Java APIs
106 |     val streamsConfiguration: Properties = {
107 |       val p = new Properties()
108 |       p.put(StreamsConfig.APPLICATION_ID_CONFIG,
109 |             s"stream-table-join-scala-integration-test-implicit-serdes-${scala.util.Random.nextInt(100)}")
110 |       p.put(StreamsConfig.CLIENT_ID_CONFIG, "join-scala-integration-test-implicit-serdes-standard-consumer")
111 |       p.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, brokers)
112 |       p.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, "100")
113 |       p.put(StreamsConfig.STATE_DIR_CONFIG, localStateDir)
114 |       p
115 |     }
116 | 
117 |     implicit val builder = new StreamsBuilderS()
118 | 
119 |     val userClicksStream: KStreamS[String, UserClicks] = builder.stream(userClicksTopic)
120 | 
121 |     val userRegionsTable: KTableS[String, String] = builder.table(userRegionsTopic)
122 | 
123 |     // Compute the total per region by summing the individual click counts per region.
124 |     val clicksPerRegion: KTableS[String, Long] =
125 |       userClicksStream
126 | 
127 |       // Join the stream against the table.
128 |         .leftJoin(userRegionsTable,
129 |                   (clicks: UserClicks, region: String) => (if (region == null) "UNKNOWN" else region, clicks.clicks))
130 | 
131 |         // Change the stream from <user> -> <region, clicks> to <region> -> <clicks>
132 |         .map((_, regionWithClicks) => regionWithClicks)
133 | 
134 |         // Compute the total per region by summing the individual click counts per region.
135 |         .groupByKey
136 |         .reduce(_ + _)
137 | 
138 |     // Write the (continuously updating) results to the output topic.
139 |     clicksPerRegion.toStream.to(outputTopic)
140 | 
141 |     val streams: KafkaStreams = new KafkaStreams(builder.build(), streamsConfiguration)
142 | 
143 |     streams
144 |       .setUncaughtExceptionHandler(new Thread.UncaughtExceptionHandler() {
145 |         override def uncaughtException(t: Thread, e: Throwable): Unit =
146 |           try {
147 |             println(s"Stream terminated because of uncaught exception .. Shutting " +
148 |                       s"down app",
149 |                     e)
150 |             e.printStackTrace
151 |             val closed = streams.close()
152 |             println(s"Exiting application after streams close ($closed)")
153 |           } catch {
154 |             case x: Exception => x.printStackTrace
155 |           } finally {
156 |             println("Exiting application ..")
157 |             System.exit(-1)
158 |           }
159 |       })
160 | 
161 |     streams.start()
162 | 
163 |     //
164 |     // Step 2: Publish user-region information.
165 |     //
166 |     // To keep this code example simple and easier to understand/reason
167 |     // about, we publish all
168 |     // user-region records before any user-click records (cf. step 3).  In
169 |     // practice though,
170 |     // data records would typically be arriving concurrently in both input
171 |     // streams/topics.
172 |     val sender1 =
173 |       MessageSender[String, String](brokers, classOf[StringSerializer].getName, classOf[StringSerializer].getName)
174 |     userRegions.foreach(r => sender1.writeKeyValue(userRegionsTopic, r.key, r.value))
175 | 
176 |     //
177 |     // Step 3: Publish some user click events.
178 |     //
179 |     val sender2 = MessageSender[String, Array[Byte]](brokers,
180 |                                                      classOf[StringSerializer].getName,
181 |                                                      classOf[ByteArraySerializer].getName)
182 |     userClicks
183 |       .map(
184 |         kv =>
185 |           new KeyValue[String, Array[Byte]](
186 |             kv.key,
187 |             new AvroSerde[UserClicks].serialize(UserClicks(kv.value))
188 |         )
189 |       )
190 |       .foreach(r => sender2.writeKeyValue(userClicksTopic, r.key, r.value))
191 | 
192 |     //
193 |     // Step 4: Verify the application's output data.
194 |     //
195 |     val listener = MessageListener(
196 |       brokers,
197 |       outputTopic,
198 |       "join-scala-integration-test-standard-consumer",
199 |       classOf[StringDeserializer].getName,
200 |       classOf[LongDeserializer].getName,
201 |       new RecordProcessor
202 |     )
203 | 
204 |     val l = listener
205 |       .waitUntilMinKeyValueRecordsReceived(expectedClicksPerRegion.size, 30000)
206 |     streams.close()
207 |     assertEquals(l.sortBy(_.key), expectedClicksPerRegion.sortBy(_.key))
208 |   }
209 | 
210 |   class RecordProcessor extends RecordProcessorTrait[String, Long] {
211 |     override def processRecord(record: ConsumerRecord[String, Long]): Unit = {
212 |       // println(s"Get Message $record")
213 |     }
214 |   }
215 | 
216 | }
217 | 


--------------------------------------------------------------------------------
/src/test/scala/com/lightbend/kafka/scala/streams/StreamToTableJoinTestData.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |   * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
 3 |   * Adapted from Confluent Inc. whose copyright is reproduced below.
 4 |   */
 5 | /*
 6 |  * Copyright Confluent Inc.
 7 |  *
 8 |  * Licensed under the Apache License, Version 2.0 (the "License");
 9 |  * you may not use this file except in compliance with the License.
10 |  * You may obtain a copy of the License at
11 |  *
12 |  *    http://www.apache.org/licenses/LICENSE-2.0
13 |  *
14 |  * Unless required by applicable law or agreed to in writing, software
15 |  * distributed under the License is distributed on an "AS IS" BASIS,
16 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 |  * See the License for the specific language governing permissions and
18 |  * limitations under the License.
19 |  */
20 | package com.lightbend.kafka.scala.streams
21 | 
22 | import org.apache.kafka.streams.KeyValue
23 | 
24 | trait StreamToTableJoinTestData {
25 |   val brokers = "localhost:9092"
26 | 
27 |   val userClicksTopic  = s"user-clicks.${scala.util.Random.nextInt(100)}"
28 |   val userRegionsTopic = s"user-regions.${scala.util.Random.nextInt(100)}"
29 |   val outputTopic      = s"output-topic.${scala.util.Random.nextInt(100)}"
30 |   val localStateDir    = "local_state_data"
31 | 
32 |   // Input 1: Clicks per user (multiple records allowed per user).
33 |   val userClicks: Seq[KeyValue[String, Long]] = Seq(
34 |     new KeyValue("alice", 13L),
35 |     new KeyValue("bob", 4L),
36 |     new KeyValue("chao", 25L),
37 |     new KeyValue("bob", 19L),
38 |     new KeyValue("dave", 56L),
39 |     new KeyValue("eve", 78L),
40 |     new KeyValue("alice", 40L),
41 |     new KeyValue("fang", 99L)
42 |   )
43 | 
44 |   // Input 2: Region per user (multiple records allowed per user).
45 |   val userRegions: Seq[KeyValue[String, String]] = Seq(
46 |     new KeyValue("alice", "asia"), /* Alice lived in Asia originally... */
47 |     new KeyValue("bob", "americas"),
48 |     new KeyValue("chao", "asia"),
49 |     new KeyValue("dave", "europe"),
50 |     new KeyValue("alice", "europe"), /* ...but moved to Europe some time later. */
51 |     new KeyValue("eve", "americas"),
52 |     new KeyValue("fang", "asia")
53 |   )
54 | 
55 |   val expectedClicksPerRegion: Seq[KeyValue[String, Long]] = Seq(
56 |     new KeyValue("americas", 101L),
57 |     new KeyValue("europe", 109L),
58 |     new KeyValue("asia", 124L)
59 |   )
60 | }
61 | 


--------------------------------------------------------------------------------
/src/test/scala/com/lightbend/kafka/scala/streams/algebird/CMSStore.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright Confluent Inc.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *    http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | package com.lightbend.kafka.scala.streams
 17 | package algebird
 18 | 
 19 | import com.twitter.algebird.{CMSHasher, TopCMS, TopPctCMS}
 20 | import org.apache.kafka.common.serialization.Serdes
 21 | import org.apache.kafka.streams.processor.{ProcessorContext, StateStore}
 22 | import org.apache.kafka.streams.state.StateSerdes
 23 | 
 24 | /**
 25 |   * An in-memory store that leverages the Count-Min Sketch implementation of
 26 |   * [[https://github.com/twitter/algebird Twitter Algebird]].
 27 |   *
 28 |   * This store allows you to probabilistically count items of type T with a
 29 |   * [[https://en.wikipedia.org/wiki/Count%E2%80%93min_sketch Count-Min Sketch]] data structure.
 30 |   * Here, the counts returned by the store will be approximate counts, i.e. estimations, because a
 31 |   * Count-Min Sketch trades slightly inaccurate counts for greatly reduced space utilization
 32 |   * (however, the estimation error is mathematically proven to be bounded).
 33 |   * With probability at least `1 - delta`, this estimate is within `eps * N` of the true frequency
 34 |   * (i.e., `true frequency <= estimate <= true frequency + eps * N`), where `N` is the total number
 35 |   * of items counted ("seen" in the input) so far (cf. [[CMSStore#totalCount]]).
 36 |   *
 37 |   * A traditional Count-Min Sketch is a fixed-size data structure that is essentially an array of
 38 |   * counters of a particular width (derived from the parameter `eps`) and depth (derived from the
 39 |   * parameter `delta`).  The CMS variant used in this store, [[TopPctCMS]], additionally tracks the
 40 |   * so-called "heavy hitters" among the counted items (i.e. the items with the largest counts) based
 41 |   * on a percentage threshold; the size of heavy hitters is still bounded, however, hence the total
 42 |   * size of the [[TopPctCMS]] data structure is still fixed.
 43 |   *
 44 |   * =Fault-tolerance=
 45 |   *
 46 |   * This store supports changelogging its state to Kafka and is thus fault-tolerant.  Every time the
 47 |   * store is flushed (cf. [[org.apache.kafka.streams.StreamsConfig.COMMIT_INTERVAL_MS_CONFIG]]) the
 48 |   * underlying CMS data structure is written to the store's changelog topic.  For many use cases
 49 |   * this approach should be sufficiently efficient because the absolute size of a CMS is typically
 50 |   * rather small (a few KBs up to a megabyte, depending on the CMS settings, which are determined by
 51 |   * e.g. your error bound requirements for approximate counts).
 52 |   *
 53 |   * =Usage=
 54 |   *
 55 |   * Note: Twitter Algebird is best used with Scala, so all the examples below are in Scala, too.
 56 |   *
 57 |   * In a Kafka Streams application, you'd typically create this store as such:
 58 |   *
 59 |   * {{{
 60 |   * val builder: StreamsBuilder = new StreamsBuilder()
 61 |   *
 62 |   * // In this example, we create a store for type [[String]].
 63 |   * // It's recommended to reduce Kafka's log segment size for the changelogs of CMS stores, which
 64 |   * // you can do by passing the respective Kafka setting to the CMSStoreBuilder via `withLoggingEnabled()`.
 65 |   * builder.addStateStore(new CMSStoreBuilder[String]("my-cms-store-name", Serdes.String()))
 66 |   * }}}
 67 |   *
 68 |   * Then you'd use the store within a [[org.apache.kafka.streams.processor.Processor]] or a
 69 |   * [[org.apache.kafka.streams.kstream.Transformer]] similar to:
 70 |   *
 71 |   * {{{
 72 |   * class ProbabilisticCounter extends Transformer[Array[Byte], String, KeyValue[String, Long]] {
 73 |   *
 74 |   *   private var cmsState: CMSStore[String] = _
 75 |   *   private var processorContext: ProcessorContext = _
 76 |   *
 77 |   *   override def init(processorContext: ProcessorContext): Unit = {
 78 |   *     this.processorContext = processorContext
 79 |   *     cmsState = this.processorContext.getStateStore("my-cms-store-name").asInstanceOf[CMSStore[String]]
 80 |   *   }
 81 |   *
 82 |   *   override def transform(key: Array[Byte], value: String): KeyValue[String, Long] = {
 83 |   *     // Count the record value, think: "+ 1"
 84 |   *     cmsState.put(value)
 85 |   *
 86 |   *     // Emit the latest count estimate for the record value
 87 |   *     KeyValue.pair[String, Long](value, cmsState.get(value))
 88 |   *   }
 89 |   *
 90 |   *   override def punctuate(l: Long): KeyValue[String, Long] = null
 91 |   *
 92 |   *   override def close(): Unit = {}
 93 |   * }
 94 |   * }}}
 95 |   *
 96 |   * @param name            The name of this store instance
 97 |   * @param loggingEnabled  Whether or not changelogging (fault-tolerance) is enabled for this store.
 98 |   * @param delta           CMS parameter: A bound on the probability that a query estimate does not
 99 |   *                        lie within some small interval (an interval that depends on `eps`) around
100 |   *                        the truth.
101 |   *                        See [[TopPctCMS]] and [[com.twitter.algebird.CMSMonoid]].
102 |   * @param eps             CMS parameter: One-sided error bound on the error of each point query,
103 |   *                        i.e. frequency estimate.
104 |   *                        See [[TopPctCMS]] and [[com.twitter.algebird.CMSMonoid]].
105 |   * @param seed            CMS parameter: A seed to initialize the random number generator used to
106 |   *                        create the pairwise independent hash functions.  Typically you do not
107 |   *                        need to change this.
108 |   *                        See [[TopPctCMS]] and [[com.twitter.algebird.CMSMonoid]].
109 |   * @param heavyHittersPct CMS parameter: A threshold for finding heavy hitters, i.e., items that
110 |   *                        appear at least (heavyHittersPct * totalCount) times in the stream.
111 |   *                        Every item that appears at least `(heavyHittersPct * totalCount)` times
112 |   *                        is included, and with probability `p >= 1 - delta`, no item whose count
113 |   *                        is less than `(heavyHittersPct - eps) * totalCount` is included.
114 |   *                        This also means that this parameter is an upper bound on the number of
115 |   *                        heavy hitters that will be tracked: the set of heavy hitters contains at
116 |   *                        most `1 / heavyHittersPct` elements.  For example, if
117 |   *                        `heavyHittersPct=0.01` (or 0.25), then at most `1 / 0.01 = 100` items
118 |   *                        or `1 / 0.25 = 4` items) will be tracked/returned as heavy hitters.
119 |   *                        This parameter can thus control the memory footprint required for
120 |   *                        tracking heavy hitters.
121 |   *                        See [[TopPctCMS]] and [[com.twitter.algebird.TopPctCMSMonoid]].
122 |   * @tparam T The type used to identify the items to be counted with the CMS.  For example, if
123 |   *           you want to count the occurrence of user names, you could use count user names
124 |   *           directly with `T=String`;  alternatively, you could map each username to a unique
125 |   *           numeric ID expressed as a `Long`, and then count the occurrences of those `Long`s with
126 |   *           a CMS of type `T=Long`.  Note that such a mapping between the items of your problem
127 |   *           domain and their identifiers used for counting via CMS should be bijective.
128 |   *           We require a [[CMSHasher]] context bound for `K`, see [[CMSHasher]] for available
129 |   *           implicits that can be imported.
130 |   *           See [[com.twitter.algebird.CMSMonoid]] for further information.
131 |   */
132 | class CMSStore[T: CMSHasher](override val name: String,
133 |                              val loggingEnabled: Boolean = true,
134 |                              val delta: Double = 1E-10,
135 |                              val eps: Double = 0.001,
136 |                              val seed: Int = 1,
137 |                              val heavyHittersPct: Double = 0.01)
138 |     extends StateStore {
139 | 
140 |   private val cmsMonoid = TopPctCMS.monoid[T](eps, delta, seed, heavyHittersPct)
141 | 
142 |   /**
143 |     * The "storage backend" of this store.
144 |     *
145 |     * Needs proper initializing in case the store's changelog is empty.
146 |     */
147 |   private var cms: TopCMS[T] = cmsMonoid.zero
148 | 
149 |   private var timestampOfLastStateStoreUpdate: Long = 0L
150 | 
151 |   private var changeLogger: CMSStoreChangeLogger[Integer, TopCMS[T]] = _
152 | 
153 |   /**
154 |     * The record key used to write to the state's changelog.
155 |     *
156 |     * This key can be a constant because:
157 |     *
158 |     * 1. We always write the full CMS when writing to the changelog.
159 |     * 2. A CMS does not retain information about which items were counted, i.e. it does not track
160 |     * information about the keyspace (in the case of this store, the only information about the
161 |     * keyspace are the heavy hitters); so, unless we opted for a different approach than (1)
162 |     * above, we cannot leverage keyspace information anyways.
163 |     * 3. We use a [[CMSStoreChangeLogger]] that uses a stream task's
164 |     * [[org.apache.kafka.streams.processor.TaskId]] to identify the changelog partition to write to.
165 |     * Thus only one particular stream task will ever be writing to that changelog partition.
166 |     * 4. When restoring from the changelog, a stream task will read only its own (one) changelog
167 |     * partition.
168 |     *
169 |     * In other words, we can hardcode the changelog key because only the "right" stream task will be
170 |     * (a) writing to AND (b) reading from the respective partition of the changelog.
171 |     */
172 |   private[algebird] val changelogKey = 42
173 | 
174 |   /**
175 |     * For unit testing
176 |     */
177 |   private[algebird] def cmsFrom(items: Seq[T]): TopCMS[T] = cmsMonoid.create(items)
178 | 
179 |   /**
180 |     * For unit testing
181 |     */
182 |   private[algebird] def cmsFrom(item: T): TopCMS[T] = cmsMonoid.create(item)
183 | 
184 |   @volatile private var open: Boolean = false
185 | 
186 |   /**
187 |     * Initializes this store, including restoring the store's state from its changelog.
188 |     */
189 |   override def init(context: ProcessorContext, root: StateStore) {
190 |     val serdes = new StateSerdes[Integer, TopCMS[T]](name, Serdes.Integer(), TopCMSSerde[T])
191 |     changeLogger = new CMSStoreChangeLogger[Integer, TopCMS[T]](name, context, serdes)
192 | 
193 |     // Note: We must manually guard with `loggingEnabled` here because `context.register()` ignores
194 |     // that parameter.
195 |     if (root != null && loggingEnabled)
196 |       context.register(root,
197 |                        loggingEnabled,
198 |                        (_, value) =>
199 |                          if (value == null)
200 |                            cms = cmsMonoid.zero
201 |                          else
202 |                            cms = serdes.valueFrom(value))
203 | 
204 |     open = true
205 |   }
206 | 
207 |   /**
208 |     * Returns the estimated count of the item.
209 |     *
210 |     * @param item item to be counted
211 |     * @return estimated count
212 |     */
213 |   def get(item: T): Long = cms.frequency(item).estimate
214 | 
215 |   /**
216 |     * Counts the item.
217 |     *
218 |     * @param item item to be counted
219 |     */
220 |   def put(item: T, timestamp: Long): Unit = {
221 |     cms = cms + item
222 |     timestampOfLastStateStoreUpdate = timestamp
223 |   }
224 | 
225 |   /**
226 |     * The top items counted so far, with the percentage-based cut-off being defined by the CMS
227 |     * parameter `heavyHittersPct`.
228 |     *
229 |     * @return the top items counted so far
230 |     */
231 |   def heavyHitters: Set[T] = cms.heavyHitters
232 | 
233 |   /**
234 |     * Returns the total number of items counted ("seen" in the input) so far.
235 |     *
236 |     * This number is not the same as the total number of <em>unique</em> items counted so far, i.e.
237 |     * it is not the cardinality of the set of items.
238 |     *
239 |     * Example: After having counted the input "foo", "bar", "foo", the return value would be 3.
240 |     *
241 |     * @return number of count operations so far
242 |     */
243 |   def totalCount: Long = cms.totalCount
244 | 
245 |   override val persistent: Boolean = false
246 | 
247 |   override def isOpen: Boolean = open
248 | 
249 |   /**
250 |     * Periodically saves the latest CMS state to Kafka.
251 |     *
252 |     * =Implementation detail=
253 |     *
254 |     * The changelog records have the form: (hardcodedKey, CMS).  That is, we are backing up the
255 |     * underlying CMS data structure in its entirety to Kafka.
256 |     */
257 |   override def flush() {
258 |     if (loggingEnabled)
259 |       changeLogger.logChange(changelogKey, cms, timestampOfLastStateStoreUpdate)
260 |   }
261 | 
262 |   override def close() {
263 |     open = false
264 |   }
265 | 
266 | }
267 | 


--------------------------------------------------------------------------------
/src/test/scala/com/lightbend/kafka/scala/streams/algebird/CMSStoreBuilder.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright Confluent Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *    http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | package com.lightbend.kafka.scala.streams
17 | package algebird
18 | 
19 | import java.util
20 | 
21 | import com.twitter.algebird.CMSHasher
22 | import org.apache.kafka.common.serialization.Serde
23 | import org.apache.kafka.streams.state.StoreBuilder
24 | 
25 | /**
26 |   * A factory for Kafka Streams to instantiate a [[CMSStore]].
27 |   *
28 |   * =Usage=
29 |   *
30 |   * The [[CMSStore]]'s changelog will typically have rather few and small records per partition.
31 |   * To improve efficiency we thus set a smaller log segment size (`segment.bytes`) than Kafka's
32 |   * default of 1GB.
33 |   *
34 |   * {{{
35 |   * val changeloggingEnabled = true
36 |   * val changelogConfig = {
37 |   *   val cfg = new java.util.HashMap[String, String]
38 |   *   val segmentSizeBytes = (20 * 1024 * 1024).toString
39 |   *   cfg.put("segment.bytes", segmentSizeBytes)
40 |   *   cfg
41 |   * }
42 |   * new CMSStoreSupplier[String](cmsStoreName, Serdes.String(), changeloggingEnabled, changelogConfig)
43 |   * }}}
44 |   */
45 | class CMSStoreBuilder[T: CMSHasher](val name: String, val serde: Serde[T]) extends StoreBuilder[CMSStore[T]] {
46 | 
47 |   var loggingEnabled                      = false
48 |   var logConfig: util.Map[String, String] = new util.HashMap[String, String]()
49 | 
50 |   override def build(): CMSStore[T] = new CMSStore[T](name, loggingEnabled)
51 | 
52 |   override def withCachingEnabled(): StoreBuilder[CMSStore[T]] =
53 |     throw new UnsupportedOperationException("caching not supported")
54 | 
55 |   override def withLoggingEnabled(config: util.Map[String, String]): CMSStoreBuilder[T] = {
56 |     loggingEnabled = true
57 |     logConfig.clear()
58 |     logConfig.putAll(config)
59 |     this
60 |   }
61 | 
62 |   override def withLoggingDisabled(): CMSStoreBuilder[T] = {
63 |     loggingEnabled = false
64 |     logConfig.clear()
65 |     this
66 |   }
67 | }
68 | 


--------------------------------------------------------------------------------
/src/test/scala/com/lightbend/kafka/scala/streams/algebird/CMSStoreChangeLogger.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright Confluent Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *    http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | package com.lightbend.kafka.scala.streams
17 | package algebird
18 | 
19 | import org.apache.kafka.streams.processor.ProcessorContext
20 | import org.apache.kafka.streams.processor.internals.{ProcessorStateManager, RecordCollector}
21 | import org.apache.kafka.streams.state.StateSerdes
22 | 
23 | /**
24 |   * Copied from Kafka's [[org.apache.kafka.streams.state.internals.StoreChangeLogger]].
25 |   *
26 |   * If StoreChangeLogger had been public, we would have used it as-is.
27 |   *
28 |   * Note that the use of array-typed keys is discouraged because they result in incorrect caching
29 |   * behavior.  If you intend to work on byte arrays as key, for example, you may want to wrap them
30 |   * with the [[org.apache.kafka.common.utils.Bytes]] class.
31 |   */
32 | class CMSStoreChangeLogger[K, V](val storeName: String,
33 |                                  val context: ProcessorContext,
34 |                                  val partition: Int,
35 |                                  val serialization: StateSerdes[K, V]) {
36 | 
37 |   private val topic     = ProcessorStateManager.storeChangelogTopic(context.applicationId, storeName)
38 |   private val collector = context.asInstanceOf[RecordCollector.Supplier].recordCollector
39 | 
40 |   def this(storeName: String, context: ProcessorContext, serialization: StateSerdes[K, V]) =
41 |     this(storeName, context, context.taskId.partition, serialization)
42 | 
43 |   def logChange(key: K, value: V, timestamp: Long) =
44 |     if (collector != null) {
45 |       val keySerializer   = serialization.keySerializer
46 |       val valueSerializer = serialization.valueSerializer
47 |       collector.send(this.topic, key, value, this.partition, timestamp, keySerializer, valueSerializer)
48 |     }
49 | 
50 | }
51 | 


--------------------------------------------------------------------------------
/src/test/scala/com/lightbend/kafka/scala/streams/algebird/TopCMSSerde.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright Confluent Inc.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *    http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | package com.lightbend.kafka.scala.streams
17 | package algebird
18 | 
19 | import java.util
20 | 
21 | import com.twitter.algebird.TopCMS
22 | import com.twitter.chill.ScalaKryoInstantiator
23 | import org.apache.kafka.common.errors.SerializationException
24 | import org.apache.kafka.common.serialization._
25 | 
26 | class TopCMSSerializer[T] extends Serializer[TopCMS[T]] {
27 | 
28 |   override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {
29 |     // nothing to do
30 |   }
31 | 
32 |   override def serialize(topic: String, cms: TopCMS[T]): Array[Byte] =
33 |     if (cms == null) null // scalastyle:ignore
34 |     else ScalaKryoInstantiator.defaultPool.toBytesWithClass(cms)
35 | 
36 |   override def close(): Unit = {
37 |     // nothing to do
38 |   }
39 | 
40 | }
41 | 
42 | class TopCMSDeserializer[T] extends Deserializer[TopCMS[T]] {
43 | 
44 |   override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {
45 |     // nothing to do
46 |   }
47 | 
48 |   //scalastyle:off null
49 |   override def deserialize(topic: String, bytes: Array[Byte]): TopCMS[T] =
50 |     if (bytes == null) null
51 |     else if (bytes.isEmpty) throw new SerializationException("byte array must not be empty")
52 |     else ScalaKryoInstantiator.defaultPool.fromBytes(bytes).asInstanceOf[TopCMS[T]]
53 |   //scalastyle:on null
54 |   override def close(): Unit = {
55 |     // nothing to do
56 |   }
57 | 
58 | }
59 | 
60 | /**
61 |   * A [[Serde]] for [[TopCMS]].
62 |   *
63 |   * =Usage=
64 |   *
65 |   * {{{
66 |   * val anyTopic = "any-topic"
67 |   * val cms: TopCMS[String] = ???
68 |   * val serde: Serde[TopCMS[String]] = TopCMSSerde[String]
69 |   *
70 |   * val bytes: Array[Byte] = serde.serializer().serialize(anyTopic, cms)
71 |   * val restoredCms: TopCMS[String] = serde.deserializer().deserialize(anyTopic, bytes)
72 |   * }}}
73 |   *
74 |   * =Future Work=
75 |   *
76 |   * We could perhaps be more efficient if we serialized not the full [[TopCMS]] instance but only
77 |   * its relevant fields.
78 |   */
79 | object TopCMSSerde {
80 | 
81 |   def apply[T]: Serde[TopCMS[T]] = Serdes.serdeFrom(new TopCMSSerializer[T], new TopCMSDeserializer[T])
82 | 
83 | }
84 | 


--------------------------------------------------------------------------------