├── .gitignore ├── .scalafmt.conf ├── LICENSE ├── NOTICE ├── README.md ├── build.sbt ├── project ├── Dependencies.scala ├── Versions.scala ├── build.properties └── plugins.sbt ├── scalastyle-config.xml └── src ├── main └── scala │ └── com │ └── lightbend │ └── kafka │ └── scala │ └── streams │ ├── DefaultSerdes.scala │ ├── FunctionConversions.scala │ ├── ImplicitConversions.scala │ ├── KGroupedStreamS.scala │ ├── KGroupedTableS.scala │ ├── KStreamS.scala │ ├── KTableS.scala │ ├── ScalaSerde.scala │ ├── SessionWindowedKStreamS.scala │ ├── StreamsBuilderS.scala │ └── TimeWindowedKStreamS.scala └── test ├── resources ├── log4j.properties └── logback.xml └── scala └── com └── lightbend └── kafka └── scala ├── server ├── KafkaLocalServer.scala ├── MessageListener.scala ├── MessageSender.scala ├── RecordProcessorTrait.scala └── Utils.scala └── streams ├── KafkaStreamsMergeTest.scala ├── KafkaStreamsTest.scala ├── ProbabilisticCountingScalaIntegrationTest.scala ├── PunctuateTest.scala ├── StreamToTableJoinScalaIntegrationTestImplicitSerdes.scala ├── StreamToTableJoinScalaIntegrationTestImplicitSerdesWithAvro.scala ├── StreamToTableJoinTestData.scala └── algebird ├── CMSStore.scala ├── CMSStoreBuilder.scala ├── CMSStoreChangeLogger.scala └── TopCMSSerde.scala /.gitignore: -------------------------------------------------------------------------------- 1 | *.class 2 | *.log 3 | .cache 4 | .history 5 | .DS_Store 6 | .lib/ 7 | app/* 8 | dist/* 9 | target/ 10 | tmp/ 11 | lib_managed/ 12 | src_managed/ 13 | project/boot/ 14 | project/target/ 15 | project/project/ 16 | project/plugins/project/ 17 | #idea 18 | .idea 19 | *.iml 20 | .idea_modules 21 | *.json 22 | *.json-- 23 | tmp/ 24 | local_state_data/ 25 | *.swp 26 | .scala_dependencies 27 | .worksheet 28 | ingest-intrusion-data/ingest-intrusiondata-tmp/* 29 | bigdl/source/lib 30 | release/staging/ 31 | deploy.conf 32 | 33 | kafka-stream-s/project/build.properties 34 | kafka-stream-q/project/build.properties 35 | kafka-stream-q-example-proc/project/build.properties 36 | kafka-stream-q-example-dsl/project/build.properties 37 | 38 | kafka-stream-q-example-proc/src/main/resources/application-proc.conf 39 | kafka-stream-q-example-dsl/src/main/resources/application-dsl.conf 40 | -------------------------------------------------------------------------------- /.scalafmt.conf: -------------------------------------------------------------------------------- 1 | maxColumn = 120 2 | continuationIndent.defnSite = 2 3 | assumeStandardLibraryStripMargin = true 4 | danglingParentheses = true 5 | align = more 6 | rewrite.rules = [SortImports, RedundantBraces, RedundantParens, SortModifiers] 7 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Kafka Streams Scala 2 | Copyright (C) 2018 Lightbend Inc. 3 | Copyright 2017-2018 Alexis Seigneurin. 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | **Note:** *Scala API for Kafka Streams have been accepted for inclusion in Apache Kafka. We have been working with the Kafka team since the last couple of months working towards meeting the standards and guidelines for this activity. Lightbend and Alexis Seigneurin have contributed this library (with some changes) to the Kafka community. This is already available on [Apache Kafka trunk](https://github.com/apache/kafka/tree/trunk/streams/streams-scala) and will be included in the upcoming release of Kafka. Hence it does not make much sense to update this project on a regular basis. For some time however, we will continue to provide support for fixing bugs only.* 2 | 3 | # A Thin Scala Wrapper Around the Kafka Streams Java API 4 | 5 | [![Build Status](https://secure.travis-ci.org/lightbend/kafka-streams-scala.png)](http://travis-ci.org/lightbend/kafka-streams-scala) 6 | 7 | The library wraps Java APIs in Scala thereby providing: 8 | 9 | 1. much better type inference in Scala 10 | 2. less boilerplate in application code 11 | 3. the usual builder-style composition that developers get with the original Java API 12 | 4. complete compile time type safety 13 | 14 | The design of the library was inspired by the work started by Alexis Seigneurin in [this repository](https://github.com/aseigneurin/kafka-streams-scala). 15 | 16 | ## Quick Start 17 | 18 | `kafka-streams-scala` is published and cross-built for Scala `2.11`, and `2.12`, so you can just add the following to your build: 19 | 20 | ```scala 21 | val kafka_streams_scala_version = "0.2.1" 22 | 23 | libraryDependencies ++= Seq("com.lightbend" %% 24 | "kafka-streams-scala" % kafka_streams_scala_version) 25 | ``` 26 | 27 | > Note: `kafka-streams-scala` supports onwards Kafka Streams `1.0.0`. 28 | 29 | The API docs for `kafka-streams-scala` is available [here](https://developer.lightbend.com/docs/api/kafka-streams-scala/0.2.1/com/lightbend/kafka/scala/streams) for Scala 2.12 and [here](https://developer.lightbend.com/docs/api/kafka-streams-scala_2.11/0.2.1/#package) for Scala 2.11. 30 | 31 | ## Running the Tests 32 | 33 | The library comes with an embedded Kafka server. To run the tests, simply run `sbt testOnly` and all tests will run on the local embedded server. 34 | 35 | > The embedded server is started and stopped for every test and takes quite a bit of resources. Hence it's recommended that you allocate more heap space to `sbt` when running the tests. e.g. `sbt -mem 2000`. 36 | 37 | ```bash 38 | $ sbt -mem 2000 39 | > +clean 40 | > +test 41 | ``` 42 | 43 | ## Type Inference and Composition 44 | 45 | Here's a sample code fragment using the Scala wrapper library. Compare this with the Scala code from the same [example](https://github.com/confluentinc/kafka-streams-examples/blob/4.0.0-post/src/test/scala/io/confluent/examples/streams/StreamToTableJoinScalaIntegrationTest.scala) in Confluent's repository. 46 | 47 | ```scala 48 | // Compute the total per region by summing the individual click counts per region. 49 | val clicksPerRegion: KTableS[String, Long] = userClicksStream 50 | 51 | // Join the stream against the table. 52 | .leftJoin(userRegionsTable, (clicks: Long, region: String) => (if (region == null) "UNKNOWN" else region, clicks)) 53 | 54 | // Change the stream from -> to -> 55 | .map((_, regionWithClicks) => regionWithClicks) 56 | 57 | // Compute the total per region by summing the individual click counts per region. 58 | .groupByKey 59 | .reduce(_ + _) 60 | ``` 61 | 62 | ## Implicit Serdes 63 | 64 | One of the areas where the Java APIs' verbosity can be reduced is through a succinct way to pass serializers and de-serializers to the various functions. The library uses the power of Scala implicits towards this end. The library makes some decisions that help implement more succinct serdes in a type safe manner: 65 | 66 | 1. No use of configuration based default serdes. Java APIs allow the user to define default key and value serdes as part of the configuration. This configuration, being implemented as `java.util.Properties` is type-unsafe and hence can result in runtime errors in case the user misses any of the serdes to be specified or plugs in an incorrect serde. `kafka-streams-scala` makes this completely type-safe by allowing all serdes to be specified through Scala implicits. 67 | 2. The library offers implicit conversions from serdes to `Serialized`, `Produced`, `Consumed` or `Joined`. Hence as a user you just have to pass in the implicit serde and all conversions to `Serialized`, `Produced`, `Consumed` or `Joined` will be taken care of automatically. 68 | 69 | 70 | ### Default Serdes 71 | 72 | The library offers a module that contains all the default serdes for the primitives. Importing the object will bring in scope all such primitives and helps reduce implicit hell. 73 | 74 | ```scala 75 | object DefaultSerdes { 76 | implicit val stringSerde: Serde[String] = Serdes.String() 77 | implicit val longSerde: Serde[Long] = Serdes.Long().asInstanceOf[Serde[Long]] 78 | implicit val byteArraySerde: Serde[Array[Byte]] = Serdes.ByteArray() 79 | implicit val bytesSerde: Serde[org.apache.kafka.common.utils.Bytes] = Serdes.Bytes() 80 | implicit val floatSerde: Serde[Float] = Serdes.Float().asInstanceOf[Serde[Float]] 81 | implicit val doubleSerde: Serde[Double] = Serdes.Double().asInstanceOf[Serde[Double]] 82 | implicit val integerSerde: Serde[Int] = Serdes.Integer().asInstanceOf[Serde[Int]] 83 | } 84 | ``` 85 | 86 | ### Compile time typesafe 87 | 88 | Not only the serdes, but `DefaultSerdes` also brings into scope implicit `Serialized`, `Produced`, `Consumed` and `Joined` instances. So all APIs that accept `Serialized`, `Produced`, `Consumed` or `Joined` will get these instances automatically with an `import DefaultSerdes._`. 89 | 90 | Just one import of `DefaultSerdes._` and the following code does not need a bit of `Serialized`, `Produced`, `Consumed` or `Joined` to be specified explicitly or through the default config. **And the best part is that for any missing instances of these you get a compilation error.** .. 91 | 92 | ```scala 93 | import DefaultSerdes._ 94 | 95 | val clicksPerRegion: KTableS[String, Long] = 96 | userClicksStream 97 | 98 | // Join the stream against the table. 99 | .leftJoin(userRegionsTable, (clicks: Long, region: String) => (if (region == null) "UNKNOWN" else region, clicks)) 100 | 101 | // Change the stream from -> to -> 102 | .map((_, regionWithClicks) => regionWithClicks) 103 | 104 | // Compute the total per region by summing the individual click counts per region. 105 | .groupByKey 106 | .reduce(_ + _) 107 | 108 | // Write the (continuously updating) results to the output topic. 109 | clicksPerRegion.toStream.to(outputTopic) 110 | ``` 111 | -------------------------------------------------------------------------------- /build.sbt: -------------------------------------------------------------------------------- 1 | import Dependencies._ 2 | 3 | name := "kafka-streams-scala" 4 | organization := "com.lightbend" 5 | version := "0.2.1" 6 | scalaVersion := Versions.Scala_2_12_Version 7 | crossScalaVersions := Versions.CrossScalaVersions 8 | scalacOptions := Seq("-Xexperimental", "-unchecked", "-deprecation", "-Ywarn-unused-import") 9 | licenses := Seq("Apache 2" -> new URL("http://www.apache.org/licenses/LICENSE-2.0.txt")) 10 | developers := List( 11 | Developer("debasishg", "Debasish Ghosh", "@debasishg", url("https://github.com/debasishg")), 12 | Developer("blublinsky", "Boris Lublinsky", "@blublinsky", url("https://github.com/blublinsky")), 13 | Developer("maasg", "Gerard Maas", "@maasg", url("https://github.com/maasg")) 14 | ) 15 | organizationName := "lightbend" 16 | organizationHomepage := Option(url("http://lightbend.com/")) 17 | homepage := scmInfo.value map (_.browseUrl) 18 | scmInfo := Option( 19 | ScmInfo(url("https://github.com/lightbend/kafka-streams-scala"), "git@github.com:lightbend/kafka-streams-scala.git") 20 | ) 21 | 22 | parallelExecution in Test := false 23 | testFrameworks += new TestFramework("minitest.runner.Framework") 24 | 25 | libraryDependencies ++= Seq( 26 | kafkaStreams excludeAll (ExclusionRule("org.slf4j", "slf4j-log4j12"), ExclusionRule("org.apache.zookeeper", 27 | "zookeeper")), 28 | scalaLogging % "test", 29 | logback % "test", 30 | kafka % "test" excludeAll (ExclusionRule("org.slf4j", "slf4j-log4j12"), ExclusionRule("org.apache.zookeeper", 31 | "zookeeper")), 32 | curator % "test", 33 | minitest % "test", 34 | minitestLaws % "test", 35 | algebird % "test", 36 | chill % "test", 37 | avro4s % "test" 38 | ) 39 | 40 | credentials += Credentials(Path.userHome / ".ivy2" / ".credentials") 41 | publishTo := { 42 | val nexus = "https://oss.sonatype.org/" 43 | if (isSnapshot.value) Some("snapshots" at nexus + "content/repositories/snapshots") 44 | else Option("releases" at nexus + "service/local/staging/deploy/maven2") 45 | } 46 | publishArtifact in Test := true 47 | -------------------------------------------------------------------------------- /project/Dependencies.scala: -------------------------------------------------------------------------------- 1 | import sbt._ 2 | import Versions._ 3 | 4 | object Dependencies { 5 | 6 | implicit class Exclude(module: ModuleID) { 7 | def log4jExclude: ModuleID = 8 | module.excludeAll(ExclusionRule("log4j")) 9 | 10 | def driverExclusions: ModuleID = 11 | module.log4jExclude 12 | .exclude("com.google.guava", "guava") 13 | .excludeAll(ExclusionRule("org.slf4j")) 14 | } 15 | 16 | val kafkaStreams = "org.apache.kafka" % "kafka-streams" % KafkaVersion 17 | val scalaLogging = "com.typesafe.scala-logging" %% "scala-logging" % ScalaLoggingVersion 18 | val logback = "ch.qos.logback" % "logback-classic" % LogbackVersion 19 | val kafka = "org.apache.kafka" %% "kafka" % KafkaVersion 20 | val curator = "org.apache.curator" % "curator-test" % CuratorVersion 21 | val minitest = "io.monix" %% "minitest" % MinitestVersion 22 | val minitestLaws = "io.monix" %% "minitest-laws" % MinitestVersion 23 | val algebird = "com.twitter" %% "algebird-core" % AlgebirdVersion 24 | val chill = "com.twitter" %% "chill" % ChillVersion 25 | val avro4s = "com.sksamuel.avro4s" %% "avro4s-core" % Avro4sVersion 26 | } 27 | -------------------------------------------------------------------------------- /project/Versions.scala: -------------------------------------------------------------------------------- 1 | object Versions { 2 | val AlgebirdVersion = "0.13.0" 3 | val ChillVersion = "0.9.2" 4 | val LogbackVersion = "1.2.3" 5 | val KafkaVersion = "1.0.0" 6 | val ScalaLoggingVersion = "3.5.0" 7 | val CuratorVersion = "4.0.0" 8 | val MinitestVersion = "2.0.0" 9 | val JDKVersion = "1.8" 10 | val Scala_2_12_Version = "2.12.6" 11 | val Scala_2_11_Version = "2.11.12" 12 | val Avro4sVersion = "1.8.3" 13 | val CrossScalaVersions = Seq(Scala_2_12_Version, Scala_2_11_Version) 14 | } 15 | -------------------------------------------------------------------------------- /project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=1.1.4 2 | -------------------------------------------------------------------------------- /project/plugins.sbt: -------------------------------------------------------------------------------- 1 | addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "1.0.0") 2 | addSbtPlugin("com.geirsson" % "sbt-scalafmt" % "1.5.0") 3 | -------------------------------------------------------------------------------- /scalastyle-config.xml: -------------------------------------------------------------------------------- 1 | 2 | Scalastyle standard configuration 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | -------------------------------------------------------------------------------- /src/main/scala/com/lightbend/kafka/scala/streams/DefaultSerdes.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2018 Lightbend Inc. 3 | * Copyright 2017-2018 Alexis Seigneurin. 4 | */ 5 | package com.lightbend.kafka.scala.streams 6 | 7 | import org.apache.kafka.common.serialization.{Serde, Serdes} 8 | 9 | /** 10 | * Implicit values for default serdes 11 | */ 12 | object DefaultSerdes { 13 | implicit val stringSerde: Serde[String] = Serdes.String() 14 | implicit val longSerde: Serde[Long] = Serdes.Long().asInstanceOf[Serde[Long]] 15 | implicit val byteArraySerde: Serde[Array[Byte]] = Serdes.ByteArray() 16 | implicit val bytesSerde: Serde[org.apache.kafka.common.utils.Bytes] = Serdes.Bytes() 17 | implicit val floatSerde: Serde[Float] = Serdes.Float().asInstanceOf[Serde[Float]] 18 | implicit val doubleSerde: Serde[Double] = Serdes.Double().asInstanceOf[Serde[Double]] 19 | implicit val integerSerde: Serde[Int] = Serdes.Integer().asInstanceOf[Serde[Int]] 20 | } 21 | -------------------------------------------------------------------------------- /src/main/scala/com/lightbend/kafka/scala/streams/FunctionConversions.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2018 Lightbend Inc. 3 | * Copyright 2017-2018 Alexis Seigneurin. 4 | */ 5 | package com.lightbend.kafka.scala.streams 6 | 7 | import org.apache.kafka.streams.KeyValue 8 | import org.apache.kafka.streams.kstream._ 9 | 10 | /** 11 | * Implicit classes that offer conversions of Scala function literals to 12 | * SAM (Single Abstract Method) objects in Java. These make the Scala APIs much 13 | * more expressive, with less boilerplate and more succinct. 14 | */ 15 | object FunctionConversions { 16 | 17 | implicit class PredicateFromFunction[K, V](val test: (K, V) => Boolean) extends AnyVal { 18 | def asPredicate: Predicate[K, V] = test(_, _) 19 | } 20 | 21 | implicit class MapperFromFunction[T, U, V](val f: (T, U) => V) extends AnyVal { 22 | def asKeyValueMapper: KeyValueMapper[T, U, V] = (k: T, v: U) => f(k, v) 23 | def asValueJoiner: ValueJoiner[T, U, V] = (v1, v2) => f(v1, v2) 24 | } 25 | 26 | implicit class KeyValueMapperFromFunction[K, V, KR, VR](val f: (K, V) => (KR, VR)) extends AnyVal { 27 | def asKeyValueMapper: KeyValueMapper[K, V, KeyValue[KR, VR]] = (k, v) => { 28 | val (kr, vr) = f(k, v) 29 | KeyValue.pair(kr, vr) 30 | } 31 | } 32 | 33 | implicit class ValueMapperFromFunction[V, VR](val f: V => VR) extends AnyVal { 34 | def asValueMapper: ValueMapper[V, VR] = v => f(v) 35 | } 36 | 37 | implicit class AggregatorFromFunction[K, V, VR](val f: (K, V, VR) => VR) extends AnyVal { 38 | def asAggregator: Aggregator[K, V, VR] = (k, v, r) => f(k, v, r) 39 | } 40 | 41 | implicit class MergerFromFunction[K, VR](val f: (K, VR, VR) => VR) extends AnyVal { 42 | def asMerger: Merger[K, VR] = (k, v1, v2) => f(k, v1, v2) 43 | } 44 | 45 | implicit class ReducerFromFunction[V](val f: (V, V) => V) extends AnyVal { 46 | def asReducer: Reducer[V] = (v1, v2) => f(v1, v2) 47 | } 48 | 49 | implicit class InitializerFromFunction[T](val f: () => T) extends AnyVal { 50 | def asInitializer: Initializer[T] = () => f() 51 | } 52 | 53 | } 54 | -------------------------------------------------------------------------------- /src/main/scala/com/lightbend/kafka/scala/streams/ImplicitConversions.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2018 Lightbend Inc. 3 | * Copyright 2017-2018 Alexis Seigneurin. 4 | */ 5 | package com.lightbend.kafka.scala.streams 6 | 7 | import org.apache.kafka.streams.kstream._ 8 | import org.apache.kafka.streams.{Consumed, KeyValue} 9 | import org.apache.kafka.common.serialization.Serde 10 | 11 | import scala.language.implicitConversions 12 | 13 | /** 14 | * Implicit conversions between the Scala wrapper objects and the underlying Java 15 | * objects. 16 | */ 17 | object ImplicitConversions { 18 | 19 | implicit def wrapKStream[K, V](inner: KStream[K, V]): KStreamS[K, V] = 20 | new KStreamS[K, V](inner) 21 | 22 | implicit def wrapKGroupedStream[K, V](inner: KGroupedStream[K, V]): KGroupedStreamS[K, V] = 23 | new KGroupedStreamS[K, V](inner) 24 | 25 | implicit def wrapSessionWindowedKStream[K, V](inner: SessionWindowedKStream[K, V]): SessionWindowedKStreamS[K, V] = 26 | new SessionWindowedKStreamS[K, V](inner) 27 | 28 | implicit def wrapTimeWindowedKStream[K, V](inner: TimeWindowedKStream[K, V]): TimeWindowedKStreamS[K, V] = 29 | new TimeWindowedKStreamS[K, V](inner) 30 | 31 | implicit def wrapKTable[K, V](inner: KTable[K, V]): KTableS[K, V] = 32 | new KTableS[K, V](inner) 33 | 34 | implicit def wrapKGroupedTable[K, V](inner: KGroupedTable[K, V]): KGroupedTableS[K, V] = 35 | new KGroupedTableS[K, V](inner) 36 | 37 | implicit def tuple2ToKeyValue[K, V](tuple: (K, V)): KeyValue[K, V] = new KeyValue(tuple._1, tuple._2) 38 | 39 | //scalastyle:on null 40 | // we would also like to allow users implicit serdes 41 | // and these implicits will convert them to `Serialized`, `Produced` or `Consumed` 42 | 43 | implicit def serializedFromSerde[K, V](implicit keySerde: Serde[K], valueSerde: Serde[V]): Serialized[K, V] = 44 | Serialized.`with`(keySerde, valueSerde) 45 | 46 | implicit def consumedFromSerde[K, V](implicit keySerde: Serde[K], valueSerde: Serde[V]): Consumed[K, V] = 47 | Consumed.`with`(keySerde, valueSerde) 48 | 49 | implicit def producedFromSerde[K, V](implicit keySerde: Serde[K], valueSerde: Serde[V]): Produced[K, V] = 50 | Produced.`with`(keySerde, valueSerde) 51 | 52 | implicit def joinedFromKVOSerde[K, V, VO](implicit keySerde: Serde[K], 53 | valueSerde: Serde[V], 54 | otherValueSerde: Serde[VO]): Joined[K, V, VO] = 55 | Joined.`with`(keySerde, valueSerde, otherValueSerde) 56 | } 57 | -------------------------------------------------------------------------------- /src/main/scala/com/lightbend/kafka/scala/streams/KGroupedStreamS.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2018 Lightbend Inc. 3 | * Copyright 2017-2018 Alexis Seigneurin. 4 | */ 5 | package com.lightbend.kafka.scala.streams 6 | 7 | import org.apache.kafka.streams.kstream._ 8 | import org.apache.kafka.streams.state.KeyValueStore 9 | import org.apache.kafka.common.utils.Bytes 10 | import org.apache.kafka.common.serialization.Serde 11 | import ImplicitConversions._ 12 | import FunctionConversions._ 13 | 14 | /** 15 | * Wraps the Java class KGroupedStream and delegates method calls to the underlying Java object. 16 | */ 17 | class KGroupedStreamS[K, V](inner: KGroupedStream[K, V]) { 18 | 19 | def count(): KTableS[K, Long] = { 20 | val c: KTableS[K, java.lang.Long] = inner.count() 21 | c.mapValues[Long](Long2long _) 22 | } 23 | 24 | def count(store: String, keySerde: Option[Serde[K]] = None): KTableS[K, Long] = { 25 | val materialized = keySerde.foldLeft(Materialized.as[K, java.lang.Long, KeyValueStore[Bytes, Array[Byte]]](store))( 26 | (m, serde) => m.withKeySerde(serde) 27 | ) 28 | 29 | val c: KTableS[K, java.lang.Long] = inner.count(materialized) 30 | c.mapValues[Long](Long2long _) 31 | } 32 | 33 | def reduce(reducer: (V, V) => V): KTableS[K, V] = 34 | inner.reduce((v1, v2) => reducer(v1, v2)) 35 | 36 | def reduce(reducer: (V, V) => V, materialized: Materialized[K, V, KeyValueStore[Bytes, Array[Byte]]]): KTableS[K, V] = 37 | // need this explicit asReducer for Scala 2.11 or else the SAM conversion doesn't take place 38 | // works perfectly with Scala 2.12 though 39 | inner.reduce(((v1: V, v2: V) => reducer(v1, v2)).asReducer, materialized) 40 | 41 | def reduce(reducer: (V, V) => V, storeName: String)(implicit keySerde: Serde[K], 42 | valueSerde: Serde[V]): KTableS[K, V] = 43 | // need this explicit asReducer for Scala 2.11 or else the SAM conversion doesn't take place 44 | // works perfectly with Scala 2.12 though 45 | inner.reduce( 46 | ((v1: V, v2: V) => reducer(v1, v2)).asReducer, 47 | Materialized 48 | .as[K, V, KeyValueStore[Bytes, Array[Byte]]](storeName) 49 | .withKeySerde(keySerde) 50 | .withValueSerde(valueSerde) 51 | ) 52 | 53 | def aggregate[VR](initializer: () => VR, aggregator: (K, V, VR) => VR): KTableS[K, VR] = 54 | inner.aggregate(initializer.asInitializer, aggregator.asAggregator) 55 | 56 | def aggregate[VR](initializer: () => VR, 57 | aggregator: (K, V, VR) => VR, 58 | materialized: Materialized[K, VR, KeyValueStore[Bytes, Array[Byte]]]): KTableS[K, VR] = 59 | inner.aggregate(initializer.asInitializer, aggregator.asAggregator, materialized) 60 | 61 | def windowedBy(windows: SessionWindows): SessionWindowedKStreamS[K, V] = 62 | inner.windowedBy(windows) 63 | 64 | def windowedBy[W <: Window](windows: Windows[W]): TimeWindowedKStreamS[K, V] = 65 | inner.windowedBy(windows) 66 | } 67 | -------------------------------------------------------------------------------- /src/main/scala/com/lightbend/kafka/scala/streams/KGroupedTableS.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2018 Lightbend Inc. 3 | * Copyright 2017-2018 Alexis Seigneurin. 4 | */ 5 | package com.lightbend.kafka.scala.streams 6 | 7 | import ImplicitConversions._ 8 | import org.apache.kafka.streams.kstream._ 9 | import org.apache.kafka.streams.state.KeyValueStore 10 | import org.apache.kafka.common.utils.Bytes 11 | import FunctionConversions._ 12 | 13 | /** 14 | * Wraps the Java class KGroupedTable and delegates method calls to the underlying Java object. 15 | */ 16 | class KGroupedTableS[K, V](inner: KGroupedTable[K, V]) { 17 | 18 | type ByteArrayKVStore = KeyValueStore[Bytes, Array[Byte]] 19 | 20 | def count(): KTableS[K, Long] = { 21 | val c: KTableS[K, java.lang.Long] = inner.count() 22 | c.mapValues[Long](Long2long(_)) 23 | } 24 | 25 | def count(materialized: Materialized[K, Long, ByteArrayKVStore]): KTableS[K, Long] = 26 | inner.count(materialized) 27 | 28 | def reduce(adder: (V, V) => V, subTractor: (V, V) => V): KTableS[K, V] = 29 | // need this explicit asReducer for Scala 2.11 or else the SAM conversion doesn't take place 30 | // works perfectly with Scala 2.12 though 31 | inner.reduce(((v1, v2) => adder(v1, v2)).asReducer, ((v1, v2) => subTractor(v1, v2)).asReducer) 32 | 33 | def reduce(adder: (V, V) => V, 34 | subtractor: (V, V) => V, 35 | materialized: Materialized[K, V, ByteArrayKVStore]): KTableS[K, V] = 36 | // need this explicit asReducer for Scala 2.11 or else the SAM conversion doesn't take place 37 | // works perfectly with Scala 2.12 though 38 | inner.reduce(((v1, v2) => adder(v1, v2)).asReducer, ((v1, v2) => subtractor(v1, v2)).asReducer, materialized) 39 | 40 | def aggregate[VR](initializer: () => VR, adder: (K, V, VR) => VR, subtractor: (K, V, VR) => VR): KTableS[K, VR] = 41 | inner.aggregate(initializer.asInitializer, adder.asAggregator, subtractor.asAggregator) 42 | 43 | def aggregate[VR](initializer: () => VR, 44 | adder: (K, V, VR) => VR, 45 | subtractor: (K, V, VR) => VR, 46 | materialized: Materialized[K, VR, ByteArrayKVStore]): KTableS[K, VR] = 47 | inner.aggregate(initializer.asInitializer, adder.asAggregator, subtractor.asAggregator, materialized) 48 | } 49 | -------------------------------------------------------------------------------- /src/main/scala/com/lightbend/kafka/scala/streams/KStreamS.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2018 Lightbend Inc. 3 | * Copyright 2017-2018 Alexis Seigneurin. 4 | */ 5 | package com.lightbend.kafka.scala.streams 6 | 7 | import org.apache.kafka.streams.KeyValue 8 | import org.apache.kafka.streams.kstream._ 9 | import org.apache.kafka.streams.processor.{Processor, ProcessorContext, ProcessorSupplier} 10 | import ImplicitConversions._ 11 | import FunctionConversions._ 12 | 13 | import scala.collection.JavaConverters._ 14 | 15 | /** 16 | * Wraps the Java class KStream and delegates method calls to the underlying Java object. 17 | */ 18 | class KStreamS[K, V](val inner: KStream[K, V]) { 19 | 20 | def filter(predicate: (K, V) => Boolean): KStreamS[K, V] = 21 | inner.filter(predicate(_, _)) 22 | 23 | def filterNot(predicate: (K, V) => Boolean): KStreamS[K, V] = 24 | inner.filterNot(predicate(_, _)) 25 | 26 | def selectKey[KR](mapper: (K, V) => KR): KStreamS[KR, V] = 27 | inner.selectKey[KR]((k: K, v: V) => mapper(k, v)) 28 | 29 | def map[KR, VR](mapper: (K, V) => (KR, VR)): KStreamS[KR, VR] = { 30 | val kvMapper = mapper.tupled andThen tuple2ToKeyValue 31 | inner.map[KR, VR]((k, v) => kvMapper(k, v)) 32 | } 33 | 34 | def mapValues[VR](mapper: V => VR): KStreamS[K, VR] = 35 | inner.mapValues[VR](mapper(_)) 36 | 37 | def flatMap[KR, VR](mapper: (K, V) => Iterable[(KR, VR)]): KStreamS[KR, VR] = { 38 | val kvMapper = mapper.tupled andThen (iter => iter.map(tuple2ToKeyValue).asJava) 39 | inner.flatMap[KR, VR]((k, v) => kvMapper(k, v)) 40 | } 41 | 42 | def flatMapValues[VR](processor: V => Iterable[VR]): KStreamS[K, VR] = 43 | inner.flatMapValues[VR]((v) => processor(v).asJava) 44 | 45 | def print(printed: Printed[K, V]): Unit = inner.print(printed) 46 | 47 | def foreach(action: (K, V) => Unit): Unit = 48 | inner.foreach((k, v) => action(k, v)) 49 | 50 | def branch(predicates: ((K, V) => Boolean)*): Array[KStreamS[K, V]] = 51 | inner.branch(predicates.map(_.asPredicate): _*).map(kstream => wrapKStream(kstream)) 52 | 53 | def through(topic: String)(implicit produced: Produced[K, V]): KStreamS[K, V] = 54 | inner.through(topic, produced) 55 | 56 | def to(topic: String)(implicit produced: Produced[K, V]): Unit = 57 | inner.to(topic, produced) 58 | 59 | //scalastyle:off null 60 | def transform[K1, V1](transformerSupplier: () => Transformer[K, V, (K1, V1)], 61 | stateStoreNames: String*): KStreamS[K1, V1] = { 62 | 63 | val transformerSupplierJ: TransformerSupplier[K, V, KeyValue[K1, V1]] = () => { 64 | val transformerS: Transformer[K, V, (K1, V1)] = transformerSupplier() 65 | new Transformer[K, V, KeyValue[K1, V1]] { 66 | override def transform(key: K, value: V): KeyValue[K1, V1] = 67 | transformerS.transform(key, value) match { 68 | case (k1, v1) => KeyValue.pair(k1, v1) 69 | case _ => null 70 | } 71 | 72 | override def init(context: ProcessorContext): Unit = transformerS.init(context) 73 | 74 | @deprecated( 75 | "Please use Punctuator functional interface at https://kafka.apache.org/10/javadoc/org/apache/kafka/streams/processor/Punctuator.html instead", 76 | "0.1.3" 77 | ) // scalastyle:ignore 78 | override def punctuate(timestamp: Long): KeyValue[K1, V1] = 79 | transformerS.punctuate(timestamp) match { 80 | case (k1, v1) => KeyValue.pair[K1, V1](k1, v1) 81 | case _ => null 82 | } 83 | 84 | override def close(): Unit = transformerS.close() 85 | } 86 | } 87 | inner.transform(transformerSupplierJ, stateStoreNames: _*) 88 | } 89 | //scalastyle:on null 90 | 91 | def transformValues[VR](valueTransformerSupplier: () => ValueTransformer[V, VR], 92 | stateStoreNames: String*): KStreamS[K, VR] = { 93 | 94 | val valueTransformerSupplierJ: ValueTransformerSupplier[V, VR] = () => valueTransformerSupplier() 95 | inner.transformValues[VR](valueTransformerSupplierJ, stateStoreNames: _*) 96 | } 97 | 98 | def process(processorSupplier: () => Processor[K, V], stateStoreNames: String*): Unit = { 99 | 100 | val processorSupplierJ: ProcessorSupplier[K, V] = () => processorSupplier() 101 | inner.process(processorSupplierJ, stateStoreNames: _*) 102 | } 103 | 104 | /** 105 | * If `Serialized[K, V]` is found in the implicit scope, then use it, else 106 | * use the API with the default serializers. 107 | * 108 | * Usage Pattern 1: No implicits in scope, use default serializers 109 | * - .groupByKey 110 | * 111 | * Usage Pattern 2: Use implicit `Serialized` in scope 112 | * implicit val serialized = Serialized.`with`(stringSerde, longSerde) 113 | * - .groupByKey 114 | * 115 | * Usage Pattern 3: uses the implicit conversion from the serdes to `Serialized` 116 | * implicit val stringSerde: Serde[String] = Serdes.String() 117 | * implicit val longSerde: Serde[Long] = Serdes.Long().asInstanceOf[Serde[Long]] 118 | * - .groupByKey 119 | */ 120 | def groupByKey(implicit serialized: Serialized[K, V]): KGroupedStreamS[K, V] = 121 | inner.groupByKey(serialized) 122 | 123 | def groupBy[KR](selector: (K, V) => KR)(implicit serialized: Serialized[KR, V]): KGroupedStreamS[KR, V] = 124 | inner.groupBy(selector.asKeyValueMapper, serialized) 125 | 126 | def join[VO, VR](otherStream: KStreamS[K, VO], joiner: (V, VO) => VR, windows: JoinWindows)( 127 | implicit joined: Joined[K, V, VO] 128 | ): KStreamS[K, VR] = 129 | inner.join[VO, VR](otherStream.inner, joiner.asValueJoiner, windows, joined) 130 | 131 | def join[VT, VR](table: KTableS[K, VT], joiner: (V, VT) => VR)(implicit joined: Joined[K, V, VT]): KStreamS[K, VR] = 132 | inner.join[VT, VR](table.inner, joiner.asValueJoiner, joined) 133 | 134 | def join[GK, GV, RV](globalKTable: GlobalKTable[GK, GV], 135 | keyValueMapper: (K, V) => GK, 136 | joiner: (V, GV) => RV): KStreamS[K, RV] = 137 | inner.join[GK, GV, RV](globalKTable, keyValueMapper(_, _), joiner(_, _)) 138 | 139 | def leftJoin[VO, VR](otherStream: KStreamS[K, VO], joiner: (V, VO) => VR, windows: JoinWindows)( 140 | implicit joined: Joined[K, V, VO] 141 | ): KStreamS[K, VR] = 142 | inner.leftJoin[VO, VR](otherStream.inner, joiner.asValueJoiner, windows, joined) 143 | 144 | def leftJoin[VT, VR](table: KTableS[K, VT], 145 | joiner: (V, VT) => VR)(implicit joined: Joined[K, V, VT]): KStreamS[K, VR] = 146 | inner.leftJoin[VT, VR](table.inner, joiner.asValueJoiner, joined) 147 | 148 | def leftJoin[GK, GV, RV](globalKTable: GlobalKTable[GK, GV], 149 | keyValueMapper: (K, V) => GK, 150 | joiner: (V, GV) => RV): KStreamS[K, RV] = 151 | inner.leftJoin[GK, GV, RV](globalKTable, keyValueMapper.asKeyValueMapper, joiner.asValueJoiner) 152 | 153 | def outerJoin[VO, VR](otherStream: KStreamS[K, VO], joiner: (V, VO) => VR, windows: JoinWindows)( 154 | implicit joined: Joined[K, V, VO] 155 | ): KStreamS[K, VR] = 156 | inner.outerJoin[VO, VR](otherStream.inner, joiner.asValueJoiner, windows, joined) 157 | 158 | def merge(stream: KStreamS[K, V]): KStreamS[K, V] = inner.merge(stream.inner) 159 | 160 | def peek(action: (K, V) => Unit): KStreamS[K, V] = 161 | inner.peek(action(_, _)) 162 | 163 | // -- EXTENSIONS TO KAFKA STREAMS -- 164 | 165 | // applies the predicate to know what messages should go to the left stream (predicate == true) 166 | // or to the right stream (predicate == false) 167 | def split(predicate: (K, V) => Boolean): (KStreamS[K, V], KStreamS[K, V]) = 168 | (this.filter(predicate), this.filterNot(predicate)) 169 | 170 | } 171 | -------------------------------------------------------------------------------- /src/main/scala/com/lightbend/kafka/scala/streams/KTableS.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2018 Lightbend Inc. 3 | * Copyright 2017-2018 Alexis Seigneurin. 4 | */ 5 | package com.lightbend.kafka.scala.streams 6 | 7 | import org.apache.kafka.streams.kstream._ 8 | import org.apache.kafka.streams.state.KeyValueStore 9 | import org.apache.kafka.common.utils.Bytes 10 | import ImplicitConversions._ 11 | import FunctionConversions._ 12 | 13 | /** 14 | * Wraps the Java class KTable and delegates method calls to the underlying Java object. 15 | */ 16 | class KTableS[K, V](val inner: KTable[K, V]) { 17 | 18 | def filter(predicate: (K, V) => Boolean): KTableS[K, V] = 19 | inner.filter(predicate(_, _)) 20 | 21 | def filter(predicate: (K, V) => Boolean, 22 | materialized: Materialized[K, V, KeyValueStore[Bytes, Array[Byte]]]): KTableS[K, V] = 23 | inner.filter(predicate.asPredicate, materialized) 24 | 25 | def filterNot(predicate: (K, V) => Boolean): KTableS[K, V] = 26 | inner.filterNot(predicate(_, _)) 27 | 28 | def filterNot(predicate: (K, V) => Boolean, 29 | materialized: Materialized[K, V, KeyValueStore[Bytes, Array[Byte]]]): KTableS[K, V] = 30 | inner.filterNot(predicate.asPredicate, materialized) 31 | 32 | def mapValues[VR](mapper: V => VR): KTableS[K, VR] = 33 | inner.mapValues[VR](mapper.asValueMapper) 34 | 35 | def mapValues[VR](mapper: V => VR, 36 | materialized: Materialized[K, VR, KeyValueStore[Bytes, Array[Byte]]]): KTableS[K, VR] = 37 | inner.mapValues[VR](mapper.asValueMapper, materialized) 38 | 39 | def toStream: KStreamS[K, V] = inner.toStream 40 | 41 | def toStream[KR](mapper: (K, V) => KR): KStreamS[KR, V] = 42 | inner.toStream[KR](mapper.asKeyValueMapper) 43 | 44 | def groupBy[KR, VR](selector: (K, V) => (KR, VR))(implicit serialized: Serialized[KR, VR]): KGroupedTableS[KR, VR] = 45 | inner.groupBy(selector.asKeyValueMapper, serialized) 46 | 47 | def join[VO, VR](other: KTableS[K, VO], joiner: (V, VO) => VR): KTableS[K, VR] = 48 | inner.join[VO, VR](other.inner, joiner.asValueJoiner) 49 | 50 | def join[VO, VR](other: KTableS[K, VO], 51 | joiner: (V, VO) => VR, 52 | materialized: Materialized[K, VR, KeyValueStore[Bytes, Array[Byte]]]): KTableS[K, VR] = 53 | inner.join[VO, VR](other.inner, joiner.asValueJoiner, materialized) 54 | 55 | def leftJoin[VO, VR](other: KTableS[K, VO], joiner: (V, VO) => VR): KTableS[K, VR] = 56 | inner.leftJoin[VO, VR](other.inner, joiner.asValueJoiner) 57 | 58 | def leftJoin[VO, VR](other: KTableS[K, VO], 59 | joiner: (V, VO) => VR, 60 | materialized: Materialized[K, VR, KeyValueStore[Bytes, Array[Byte]]]): KTableS[K, VR] = 61 | inner.leftJoin[VO, VR](other.inner, joiner.asValueJoiner, materialized) 62 | 63 | def outerJoin[VO, VR](other: KTableS[K, VO], joiner: (V, VO) => VR): KTableS[K, VR] = 64 | inner.outerJoin[VO, VR](other.inner, joiner.asValueJoiner) 65 | 66 | def outerJoin[VO, VR](other: KTableS[K, VO], 67 | joiner: (V, VO) => VR, 68 | materialized: Materialized[K, VR, KeyValueStore[Bytes, Array[Byte]]]): KTableS[K, VR] = 69 | inner.outerJoin[VO, VR](other.inner, joiner.asValueJoiner, materialized) 70 | 71 | def queryableStoreName: String = 72 | inner.queryableStoreName 73 | } 74 | -------------------------------------------------------------------------------- /src/main/scala/com/lightbend/kafka/scala/streams/ScalaSerde.scala: -------------------------------------------------------------------------------- 1 | // adopted from Openshine implementation 2 | package com.lightbend.kafka.scala.streams 3 | 4 | import org.apache.kafka.common.serialization.{Serde, Deserializer => JDeserializer, Serializer => JSerializer} 5 | 6 | trait ScalaSerde[T] extends Serde[T] { 7 | override def deserializer(): JDeserializer[T] 8 | 9 | override def serializer(): JSerializer[T] 10 | 11 | override def configure(configs: java.util.Map[String, _], isKey: Boolean): Unit = () 12 | 13 | override def close(): Unit = () 14 | } 15 | 16 | trait StatelessScalaSerde[T >: Null] extends Serde[T] with ScalaSerde[T] { 17 | def serialize(data: T): Array[Byte] 18 | def deserialize(data: Array[Byte]): Option[T] 19 | 20 | override def deserializer(): Deserializer[T] = 21 | (data: Array[Byte]) => deserialize(data) 22 | 23 | override def serializer(): Serializer[T] = 24 | (data: T) => serialize(data) 25 | } 26 | 27 | trait Deserializer[T >: Null] extends JDeserializer[T] { 28 | override def configure(configs: java.util.Map[String, _], isKey: Boolean): Unit = () 29 | 30 | override def close(): Unit = () 31 | 32 | override def deserialize(topic: String, data: Array[Byte]): T = 33 | Option(data).flatMap(deserialize).orNull 34 | 35 | def deserialize(data: Array[Byte]): Option[T] 36 | } 37 | 38 | trait Serializer[T] extends JSerializer[T] { 39 | override def configure(configs: java.util.Map[String, _], isKey: Boolean): Unit = () 40 | 41 | override def close(): Unit = () 42 | 43 | override def serialize(topic: String, data: T): Array[Byte] = 44 | Option(data).map(serialize).orNull 45 | 46 | def serialize(data: T): Array[Byte] 47 | } 48 | -------------------------------------------------------------------------------- /src/main/scala/com/lightbend/kafka/scala/streams/SessionWindowedKStreamS.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2018 Lightbend Inc. 3 | * Copyright 2017-2018 Alexis Seigneurin. 4 | */ 5 | package com.lightbend.kafka.scala.streams 6 | 7 | import org.apache.kafka.streams.kstream._ 8 | import org.apache.kafka.streams.state.SessionStore 9 | import org.apache.kafka.common.utils.Bytes 10 | import FunctionConversions._ 11 | 12 | import ImplicitConversions._ 13 | 14 | /** 15 | * Wraps the Java class SessionWindowedKStream and delegates method calls to the underlying Java object. 16 | */ 17 | class SessionWindowedKStreamS[K, V](val inner: SessionWindowedKStream[K, V]) { 18 | 19 | def aggregate[VR](initializer: () => VR, 20 | aggregator: (K, V, VR) => VR, 21 | merger: (K, VR, VR) => VR): KTableS[Windowed[K], VR] = 22 | inner.aggregate(initializer.asInitializer, aggregator.asAggregator, merger.asMerger) 23 | 24 | def aggregate[VR](initializer: () => VR, 25 | aggregator: (K, V, VR) => VR, 26 | merger: (K, VR, VR) => VR, 27 | materialized: Materialized[K, VR, SessionStore[Bytes, Array[Byte]]]): KTableS[Windowed[K], VR] = 28 | inner.aggregate(initializer.asInitializer, aggregator.asAggregator, merger.asMerger, materialized) 29 | 30 | def count(): KTableS[Windowed[K], Long] = { 31 | val c: KTableS[Windowed[K], java.lang.Long] = inner.count() 32 | c.mapValues[Long](Long2long(_)) 33 | } 34 | 35 | def count(materialized: Materialized[K, Long, SessionStore[Bytes, Array[Byte]]]): KTableS[Windowed[K], Long] = 36 | inner.count(materialized) 37 | 38 | def reduce(reducer: (V, V) => V): KTableS[Windowed[K], V] = 39 | inner.reduce((v1, v2) => reducer(v1, v2)) 40 | 41 | def reduce(reducer: (V, V) => V, 42 | materialized: Materialized[K, V, SessionStore[Bytes, Array[Byte]]]): KTableS[Windowed[K], V] = 43 | inner.reduce(reducer.asReducer, materialized) 44 | } 45 | -------------------------------------------------------------------------------- /src/main/scala/com/lightbend/kafka/scala/streams/StreamsBuilderS.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2018 Lightbend Inc. 3 | * Copyright 2017-2018 Alexis Seigneurin. 4 | */ 5 | package com.lightbend.kafka.scala.streams 6 | 7 | import java.util.regex.Pattern 8 | 9 | import com.lightbend.kafka.scala.streams.ImplicitConversions._ 10 | import org.apache.kafka.common.utils.Bytes 11 | import org.apache.kafka.streams.kstream.{GlobalKTable, Materialized} 12 | import org.apache.kafka.streams.processor.{ProcessorSupplier, StateStore} 13 | import org.apache.kafka.streams.state.{KeyValueStore, StoreBuilder} 14 | import org.apache.kafka.streams.{Consumed, StreamsBuilder, Topology} 15 | 16 | import scala.collection.JavaConverters._ 17 | 18 | /** 19 | * Wraps the Java class StreamsBuilder and delegates method calls to the underlying Java object. 20 | */ 21 | class StreamsBuilderS(inner: StreamsBuilder = new StreamsBuilder) { 22 | 23 | def stream[K, V](topic: String)(implicit consumed: Consumed[K, V]): KStreamS[K, V] = 24 | inner.stream[K, V](topic, consumed) 25 | 26 | def stream[K, V](topics: List[String])(implicit consumed: Consumed[K, V]): KStreamS[K, V] = 27 | inner.stream[K, V](topics.asJava, consumed) 28 | 29 | def stream[K, V](topicPattern: Pattern)(implicit consumed: Consumed[K, V]): KStreamS[K, V] = 30 | inner.stream[K, V](topicPattern, consumed) 31 | 32 | def table[K, V](topic: String)(implicit consumed: Consumed[K, V]): KTableS[K, V] = 33 | inner.table[K, V](topic, consumed) 34 | 35 | def table[K, V](topic: String, materialized: Materialized[K, V, KeyValueStore[Bytes, Array[Byte]]])( 36 | implicit consumed: Consumed[K, V] 37 | ): KTableS[K, V] = 38 | inner.table[K, V](topic, consumed, materialized) 39 | 40 | def globalTable[K, V](topic: String)(implicit consumed: Consumed[K, V]): GlobalKTable[K, V] = 41 | inner.globalTable(topic, consumed) 42 | 43 | def globalTable[K, V](topic: String, materialized: Materialized[K, V, KeyValueStore[Bytes, Array[Byte]]])( 44 | implicit consumed: Consumed[K, V] 45 | ): GlobalKTable[K, V] = 46 | inner.globalTable(topic, consumed, materialized) 47 | 48 | def addStateStore(builder: StoreBuilder[_ <: StateStore]): StreamsBuilder = inner.addStateStore(builder) 49 | 50 | def addGlobalStore(storeBuilder: StoreBuilder[_ <: StateStore], 51 | topic: String, 52 | sourceName: String, 53 | consumed: Consumed[_, _], 54 | processorName: String, 55 | stateUpdateSupplier: ProcessorSupplier[_, _]): StreamsBuilder = 56 | inner.addGlobalStore(storeBuilder, topic, sourceName, consumed, processorName, stateUpdateSupplier) 57 | 58 | def build(): Topology = inner.build() 59 | } 60 | -------------------------------------------------------------------------------- /src/main/scala/com/lightbend/kafka/scala/streams/TimeWindowedKStreamS.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2018 Lightbend Inc. 3 | * Copyright 2017-2018 Alexis Seigneurin. 4 | */ 5 | package com.lightbend.kafka.scala.streams 6 | 7 | import org.apache.kafka.streams.kstream._ 8 | import org.apache.kafka.streams.state.WindowStore 9 | import org.apache.kafka.common.utils.Bytes 10 | import org.apache.kafka.common.serialization.Serde 11 | import ImplicitConversions._ 12 | import FunctionConversions._ 13 | 14 | /** 15 | * Wraps the Java class TimeWindowedKStream and delegates method calls to the underlying Java object. 16 | */ 17 | class TimeWindowedKStreamS[K, V](val inner: TimeWindowedKStream[K, V]) { 18 | 19 | def aggregate[VR](initializer: () => VR, aggregator: (K, V, VR) => VR): KTableS[Windowed[K], VR] = 20 | inner.aggregate(initializer.asInitializer, aggregator.asAggregator) 21 | 22 | def aggregate[VR](initializer: () => VR, 23 | aggregator: (K, V, VR) => VR, 24 | materialized: Materialized[K, VR, WindowStore[Bytes, Array[Byte]]]): KTableS[Windowed[K], VR] = 25 | inner.aggregate(initializer.asInitializer, aggregator.asAggregator, materialized) 26 | 27 | def count(): KTableS[Windowed[K], Long] = { 28 | val c: KTableS[Windowed[K], java.lang.Long] = inner.count() 29 | c.mapValues[Long](Long2long(_)) 30 | } 31 | 32 | def count(store: String, keySerde: Option[Serde[K]] = None): KTableS[Windowed[K], Long] = { 33 | val materialized = { 34 | val m = Materialized.as[K, java.lang.Long, WindowStore[Bytes, Array[Byte]]](store) 35 | keySerde.foldLeft(m)((m, serde) => m.withKeySerde(serde)) 36 | } 37 | val c: KTableS[Windowed[K], java.lang.Long] = inner.count(materialized) 38 | c.mapValues[Long](Long2long(_)) 39 | } 40 | 41 | def reduce(reducer: (V, V) => V): KTableS[Windowed[K], V] = 42 | inner.reduce(reducer.asReducer) 43 | 44 | def reduce(reducer: (V, V) => V, 45 | materialized: Materialized[K, V, WindowStore[Bytes, Array[Byte]]]): KTableS[Windowed[K], V] = 46 | inner.reduce(reducer.asReducer, materialized) 47 | } 48 | -------------------------------------------------------------------------------- /src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Set root logger level to DEBUG and its only appender to A1. 2 | log4j.rootLogger=ERROR, R 3 | 4 | # A1 is set to be a ConsoleAppender. 5 | log4j.appender.A1=org.apache.log4j.ConsoleAppender 6 | 7 | log4j.appender.R=org.apache.log4j.RollingFileAppender 8 | log4j.appender.R.File=logs/kafka-server.log 9 | 10 | log4j.appender.R.MaxFileSize=100KB 11 | # Keep one backup file 12 | log4j.appender.R.MaxBackupIndex=1 13 | 14 | # A1 uses PatternLayout. 15 | log4j.appender.R.layout=org.apache.log4j.PatternLayout 16 | log4j.appender.R.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n 17 | -------------------------------------------------------------------------------- /src/test/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | logs/kstream.log 4 | true 5 | 6 | %d{HH:mm:ss.SSS} TKD [%thread] %-5level %logger{36} - %msg%n 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /src/test/scala/com/lightbend/kafka/scala/server/KafkaLocalServer.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2018 Lightbend Inc. 3 | */ 4 | package com.lightbend.kafka.scala.server 5 | 6 | // Loosely based on Lagom implementation at 7 | // https://github.com/lagom/lagom/blob/master/dev/kafka-server/src/main/scala/com/lightbend/lagom/internal/kafka/KafkaLocalServer.scala 8 | 9 | import java.io.{File, IOException} 10 | import java.util.Properties 11 | 12 | import org.apache.curator.test.TestingServer 13 | import com.typesafe.scalalogging.LazyLogging 14 | 15 | import kafka.server.{KafkaConfig, KafkaServerStartable} 16 | 17 | import scala.util.{Failure, Success, Try} 18 | 19 | import kafka.admin.{AdminUtils, RackAwareMode} 20 | import kafka.utils.ZkUtils 21 | 22 | class KafkaLocalServer private (kafkaProperties: Properties, zooKeeperServer: ZooKeeperLocalServer) 23 | extends LazyLogging { 24 | 25 | import KafkaLocalServer._ 26 | 27 | private var broker = null.asInstanceOf[KafkaServerStartable] // scalastyle:ignore 28 | private var zkUtils: ZkUtils = 29 | ZkUtils.apply(s"localhost:${zooKeeperServer.getPort()}", 30 | DEFAULT_ZK_SESSION_TIMEOUT_MS, 31 | DEFAULT_ZK_CONNECTION_TIMEOUT_MS, 32 | false) 33 | 34 | def start(): Unit = { 35 | 36 | broker = KafkaServerStartable.fromProps(kafkaProperties) 37 | broker.startup() 38 | } 39 | 40 | //scalastyle:off null 41 | def stop(): Unit = 42 | if (broker != null) { 43 | broker.shutdown() 44 | zooKeeperServer.stop() 45 | broker = null.asInstanceOf[KafkaServerStartable] 46 | } 47 | //scalastyle:on null 48 | 49 | /** 50 | * Create a Kafka topic with 1 partition and a replication factor of 1. 51 | * 52 | * @param topic The name of the topic. 53 | */ 54 | def createTopic(topic: String): Unit = 55 | createTopic(topic, 1, 1, new Properties) 56 | 57 | /** 58 | * Create a Kafka topic with the given parameters. 59 | * 60 | * @param topic The name of the topic. 61 | * @param partitions The number of partitions for this topic. 62 | * @param replication The replication factor for (the partitions of) this topic. 63 | */ 64 | def createTopic(topic: String, partitions: Int, replication: Int): Unit = 65 | createTopic(topic, partitions, replication, new Properties) 66 | 67 | /** 68 | * Create a Kafka topic with the given parameters. 69 | * 70 | * @param topic The name of the topic. 71 | * @param partitions The number of partitions for this topic. 72 | * @param replication The replication factor for (partitions of) this topic. 73 | * @param topicConfig Additional topic-level configuration settings. 74 | */ 75 | def createTopic(topic: String, partitions: Int, replication: Int, topicConfig: Properties): Unit = 76 | AdminUtils.createTopic(zkUtils, topic, partitions, replication, topicConfig, RackAwareMode.Enforced) 77 | 78 | def deleteTopic(topic: String): Unit = AdminUtils.deleteTopic(zkUtils, topic) 79 | } 80 | 81 | import Utils._ 82 | 83 | object KafkaLocalServer extends LazyLogging { 84 | final val DefaultPort = 9092 85 | final val DefaultResetOnStart = true 86 | private val DEFAULT_ZK_CONNECT = "localhost:2181" 87 | private val DEFAULT_ZK_SESSION_TIMEOUT_MS = 10 * 1000 88 | private val DEFAULT_ZK_CONNECTION_TIMEOUT_MS = 8 * 1000 89 | 90 | final val basDir = "tmp/" 91 | 92 | final private val kafkaDataFolderName = "kafka_data" 93 | 94 | def apply(cleanOnStart: Boolean, localStateDir: Option[String] = None): KafkaLocalServer = 95 | this(DefaultPort, ZooKeeperLocalServer.DefaultPort, cleanOnStart, localStateDir) 96 | 97 | def apply(kafkaPort: Int, 98 | zookeeperServerPort: Int, 99 | cleanOnStart: Boolean, 100 | localStateDir: Option[String]): KafkaLocalServer = { 101 | 102 | // delete kafka data dir on clean start 103 | val kafkaDataDir: File = (for { 104 | kdir <- dataDirectory(basDir, kafkaDataFolderName) 105 | _ <- if (cleanOnStart) deleteDirectory(kdir) else Try(()) 106 | } yield kdir) match { 107 | case Success(d) => d 108 | case Failure(ex) => throw ex 109 | } 110 | 111 | // delete kafka local state dir on clean start 112 | localStateDir.foreach { d => 113 | for { 114 | kdir <- dataDirectory("", d) 115 | _ <- if (cleanOnStart) deleteDirectory(kdir) else Try(()) 116 | } yield (()) 117 | } 118 | 119 | logger.info(s"Kafka data directory is $kafkaDataDir.") 120 | 121 | val kafkaProperties = createKafkaProperties(kafkaPort, zookeeperServerPort, kafkaDataDir) 122 | 123 | val zk = new ZooKeeperLocalServer(zookeeperServerPort, cleanOnStart) 124 | zk.start() 125 | new KafkaLocalServer(kafkaProperties, zk) 126 | } 127 | 128 | /** 129 | * Creates a Properties instance for Kafka customized with values passed in argument. 130 | */ 131 | private def createKafkaProperties(kafkaPort: Int, zookeeperServerPort: Int, dataDir: File): Properties = { 132 | 133 | // TODO: Probably should be externalized into properties. Was rushing this in 134 | val kafkaProperties = new Properties 135 | kafkaProperties.put(KafkaConfig.ListenersProp, s"PLAINTEXT://localhost:$kafkaPort") 136 | kafkaProperties.put(KafkaConfig.ZkConnectProp, s"localhost:$zookeeperServerPort") 137 | kafkaProperties.put(KafkaConfig.ZkConnectionTimeoutMsProp, "6000") 138 | kafkaProperties.put(KafkaConfig.BrokerIdProp, "0") 139 | kafkaProperties.put(KafkaConfig.NumNetworkThreadsProp, "3") 140 | kafkaProperties.put(KafkaConfig.NumIoThreadsProp, "8") 141 | kafkaProperties.put(KafkaConfig.SocketSendBufferBytesProp, "102400") 142 | kafkaProperties.put(KafkaConfig.SocketReceiveBufferBytesProp, "102400") 143 | kafkaProperties.put(KafkaConfig.SocketRequestMaxBytesProp, "104857600") 144 | kafkaProperties.put(KafkaConfig.NumPartitionsProp, "1") 145 | kafkaProperties.put(KafkaConfig.NumRecoveryThreadsPerDataDirProp, "1") 146 | kafkaProperties.put(KafkaConfig.OffsetsTopicReplicationFactorProp, "1") 147 | kafkaProperties.put(KafkaConfig.TransactionsTopicReplicationFactorProp, "1") 148 | kafkaProperties.put(KafkaConfig.LogRetentionTimeHoursProp, "2") 149 | kafkaProperties.put(KafkaConfig.LogSegmentBytesProp, "1073741824") 150 | kafkaProperties.put(KafkaConfig.LogCleanupIntervalMsProp, "300000") 151 | kafkaProperties.put(KafkaConfig.AutoCreateTopicsEnableProp, "true") 152 | kafkaProperties.put(KafkaConfig.ControlledShutdownEnableProp, "true") 153 | kafkaProperties.put(KafkaConfig.LogDirProp, dataDir.getAbsolutePath) 154 | 155 | kafkaProperties 156 | } 157 | } 158 | 159 | private class ZooKeeperLocalServer(port: Int, cleanOnStart: Boolean) extends LazyLogging { 160 | 161 | import KafkaLocalServer._ 162 | import ZooKeeperLocalServer._ 163 | 164 | private var zooKeeper = null.asInstanceOf[TestingServer] // scalastyle:ignore 165 | 166 | def start(): Unit = { 167 | // delete kafka data dir on clean start 168 | val zookeeperDataDir: File = (for { 169 | zdir <- dataDirectory(basDir, zookeeperDataFolderName) 170 | _ <- if (cleanOnStart) deleteDirectory(zdir) else Try(()) 171 | } yield zdir) match { 172 | case Success(d) => d 173 | case Failure(ex) => throw ex 174 | } 175 | logger.info(s"Zookeeper data directory is $zookeeperDataDir.") 176 | 177 | zooKeeper = new TestingServer(port, zookeeperDataDir, false) 178 | 179 | zooKeeper.start() // blocking operation 180 | } 181 | 182 | // scalastyle:off null 183 | def stop(): Unit = 184 | if (zooKeeper != null) 185 | try { 186 | zooKeeper.stop() 187 | zooKeeper = null.asInstanceOf[TestingServer] 188 | } catch { 189 | case _: IOException => () // nothing to do if an exception is thrown while shutting down 190 | } 191 | //scalastyle:on null 192 | 193 | def getPort(): Int = port 194 | } 195 | 196 | object ZooKeeperLocalServer { 197 | final val DefaultPort = 2181 198 | final private val zookeeperDataFolderName = "zookeeper_data" 199 | } 200 | -------------------------------------------------------------------------------- /src/test/scala/com/lightbend/kafka/scala/server/MessageListener.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2018 Lightbend Inc. 3 | */ 4 | package com.lightbend.kafka.scala.server 5 | 6 | import org.apache.kafka.clients.consumer.{ConsumerConfig, KafkaConsumer} 7 | import org.apache.kafka.streams.KeyValue 8 | import scala.collection.JavaConverters._ 9 | import scala.collection.mutable.ListBuffer 10 | 11 | object MessageListener { 12 | private val AUTO_COMMIT_INTERVAL_MS_CONFIG = "1000" // Frequency of offset commits 13 | private val SESSION_TIMEOUT_MS_CONFIG = "30000" // The timeout used to detect failures - should be greater then processing time 14 | private val MAX_POLL_RECORDS_CONFIG = "50" // Max number of records consumed in a single poll 15 | 16 | def consumerProperties(brokers: String, 17 | group: String, 18 | keyDeserializer: String, 19 | valueDeserializer: String): Map[String, AnyRef] = 20 | Map[String, AnyRef]( 21 | ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> brokers, 22 | ConsumerConfig.GROUP_ID_CONFIG -> group, 23 | ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG -> "true", 24 | ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG -> AUTO_COMMIT_INTERVAL_MS_CONFIG, 25 | ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG -> SESSION_TIMEOUT_MS_CONFIG, 26 | ConsumerConfig.MAX_POLL_RECORDS_CONFIG -> MAX_POLL_RECORDS_CONFIG, 27 | ConsumerConfig.AUTO_OFFSET_RESET_CONFIG -> "latest", 28 | ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG -> keyDeserializer, 29 | ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG -> valueDeserializer 30 | ) 31 | 32 | def apply[K, V](brokers: String, 33 | topic: String, 34 | group: String, 35 | keyDeserializer: String, 36 | valueDeserializer: String, 37 | processor: RecordProcessorTrait[K, V]): MessageListener[K, V] = 38 | new MessageListener[K, V](brokers, topic, group, keyDeserializer, valueDeserializer, processor) 39 | } 40 | 41 | class MessageListener[K, V](brokers: String, 42 | topic: String, 43 | group: String, 44 | keyDeserializer: String, 45 | valueDeserializer: String, 46 | processor: RecordProcessorTrait[K, V]) { 47 | 48 | import MessageListener._ 49 | 50 | def readKeyValues(maxMessages: Int): List[KeyValue[K, V]] = { 51 | val pollIntervalMs = 100 52 | val maxTotalPollTimeMs = 2000 53 | var totalPollTimeMs = 0 54 | 55 | val consumer = 56 | new KafkaConsumer[K, V](consumerProperties(brokers, group, keyDeserializer, valueDeserializer).asJava) 57 | consumer.subscribe(Seq(topic).asJava) 58 | 59 | val consumedValues = ListBuffer.empty[KeyValue[K, V]] 60 | 61 | while (totalPollTimeMs < maxTotalPollTimeMs && continueConsuming(consumedValues.size, maxMessages)) { 62 | totalPollTimeMs = totalPollTimeMs + pollIntervalMs 63 | val records = consumer.poll(pollIntervalMs) 64 | records.asScala.foreach { record => 65 | processor.processRecord(record) 66 | consumedValues += new KeyValue(record.key, record.value) 67 | } 68 | } 69 | consumer.close() 70 | consumedValues.toList 71 | } 72 | 73 | def continueConsuming(messagesConsumed: Int, maxMessages: Int): Boolean = 74 | maxMessages <= 0 || messagesConsumed < maxMessages 75 | 76 | def waitUntilMinKeyValueRecordsReceived( 77 | expectedNumRecords: Int, 78 | waitTime: Long, 79 | startTime: Long = System.currentTimeMillis(), 80 | accumData: ListBuffer[KeyValue[K, V]] = ListBuffer.empty[KeyValue[K, V]] 81 | ): List[KeyValue[K, V]] = { 82 | 83 | val readData = readKeyValues(-1) 84 | accumData ++= readData 85 | 86 | if (accumData.size >= expectedNumRecords) accumData.toList 87 | else if (System.currentTimeMillis() > startTime + waitTime) 88 | throw new AssertionError( 89 | s"Expected $expectedNumRecords but received only ${accumData.size} records before timeout $waitTime ms" 90 | ) 91 | else { 92 | Thread.sleep(Math.min(waitTime, 1000L)) 93 | waitUntilMinKeyValueRecordsReceived(expectedNumRecords, waitTime, startTime, accumData) 94 | } 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /src/test/scala/com/lightbend/kafka/scala/server/MessageSender.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2018 Lightbend Inc. 3 | */ 4 | package com.lightbend.kafka.scala.server 5 | 6 | import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord, RecordMetadata} 7 | import java.util.Properties 8 | 9 | object MessageSender { 10 | private val ACKS_CONFIG = "all" // Blocking on the full commit of the record 11 | private val RETRIES_CONFIG = "1" // Number of retries on put 12 | private val BATCH_SIZE_CONFIG = "1024" // Buffers for unsent records for each partition - controlls batching 13 | private val LINGER_MS_CONFIG = "1" // Timeout for more records to arive - controlls batching 14 | 15 | private val BUFFER_MEMORY_CONFIG = "1024000" // Controls the total amount of memory available to the producer for buffering. 16 | // If records are sent faster than they can be transmitted to the server then this 17 | // buffer space will be exhausted. When the buffer space is exhausted additional 18 | // send calls will block. The threshold for time to block is determined by max.block.ms 19 | // after which it throws a TimeoutException. 20 | 21 | def providerProperties(brokers: String, keySerializer: String, valueSerializer: String): Properties = { 22 | val props = new Properties 23 | props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers) 24 | props.put(ProducerConfig.ACKS_CONFIG, ACKS_CONFIG) 25 | props.put(ProducerConfig.RETRIES_CONFIG, RETRIES_CONFIG) 26 | props.put(ProducerConfig.BATCH_SIZE_CONFIG, BATCH_SIZE_CONFIG) 27 | props.put(ProducerConfig.LINGER_MS_CONFIG, LINGER_MS_CONFIG) 28 | props.put(ProducerConfig.BUFFER_MEMORY_CONFIG, BUFFER_MEMORY_CONFIG) 29 | props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, keySerializer) 30 | props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, valueSerializer) 31 | props 32 | } 33 | 34 | def apply[K, V](brokers: String, keySerializer: String, valueSerializer: String): MessageSender[K, V] = 35 | new MessageSender[K, V](brokers, keySerializer, valueSerializer) 36 | } 37 | 38 | class MessageSender[K, V](val brokers: String, val keySerializer: String, val valueSerializer: String) { 39 | 40 | import MessageSender._ 41 | val producer = new KafkaProducer[K, V](providerProperties(brokers, keySerializer, valueSerializer)) 42 | 43 | def writeKeyValue(topic: String, key: K, value: V): Unit = { 44 | val result = producer.send(new ProducerRecord[K, V](topic, key, value)).get 45 | producer.flush() 46 | } 47 | 48 | def writeValue(topic: String, value: V): Unit = { 49 | val result = producer.send(new ProducerRecord[K, V](topic, null.asInstanceOf[K], value)).get // scalastyle:ignore 50 | producer.flush() 51 | } 52 | 53 | def batchWriteValue(topic: String, batch: Seq[V]): Seq[RecordMetadata] = { 54 | val result = batch.map(value => producer.send(new ProducerRecord[K, V](topic, null.asInstanceOf[K], value)).get) // scalastyle:ignore 55 | producer.flush() 56 | result 57 | } 58 | 59 | def close(): Unit = 60 | producer.close() 61 | } 62 | -------------------------------------------------------------------------------- /src/test/scala/com/lightbend/kafka/scala/server/RecordProcessorTrait.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2018 Lightbend Inc. 3 | */ 4 | package com.lightbend.kafka.scala.server 5 | 6 | import org.apache.kafka.clients.consumer.ConsumerRecord 7 | 8 | // A trait, that should be implemented by any listener implementation 9 | 10 | trait RecordProcessorTrait[K, V] { 11 | def processRecord(record: ConsumerRecord[K, V]): Unit 12 | } 13 | -------------------------------------------------------------------------------- /src/test/scala/com/lightbend/kafka/scala/server/Utils.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2018 Lightbend Inc. 3 | */ 4 | package com.lightbend.kafka.scala.server 5 | 6 | import java.io.File 7 | import java.nio.file.{FileVisitOption, Files, Paths} 8 | import java.util.Comparator 9 | 10 | import scala.util.Try 11 | import scala.collection.JavaConverters._ 12 | 13 | object Utils { 14 | def deleteDirectory(directory: File): Try[Unit] = Try { 15 | if (directory.exists()) { 16 | val rootPath = Paths.get(directory.getAbsolutePath) 17 | 18 | val files = 19 | Files.walk(rootPath, FileVisitOption.FOLLOW_LINKS).sorted(Comparator.reverseOrder()).iterator().asScala 20 | files.foreach(Files.delete) 21 | } 22 | } 23 | 24 | def dataDirectory(baseDir: String, directoryName: String): Try[File] = Try { 25 | 26 | val dataDirectory = new File(baseDir + directoryName) 27 | 28 | if (dataDirectory.exists() && !dataDirectory.isDirectory()) 29 | throw new IllegalArgumentException( 30 | s"Cannot use $directoryName as a directory name because a file with that name already exists in $dataDirectory." 31 | ) 32 | dataDirectory 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/test/scala/com/lightbend/kafka/scala/streams/KafkaStreamsMergeTest.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2018 Lightbend Inc. 3 | */ 4 | package com.lightbend.kafka.scala.streams 5 | 6 | import java.util.Properties 7 | import java.util.regex.Pattern 8 | 9 | import com.lightbend.kafka.scala.server.{KafkaLocalServer, MessageListener, MessageSender, RecordProcessorTrait} 10 | import minitest.TestSuite 11 | import org.apache.kafka.clients.consumer.ConsumerRecord 12 | import org.apache.kafka.common.serialization._ 13 | import org.apache.kafka.streams.{KafkaStreams, KeyValue, StreamsConfig} 14 | import ImplicitConversions._ 15 | import com.typesafe.scalalogging.LazyLogging 16 | 17 | object KafkaStreamsMergeTest extends TestSuite[KafkaLocalServer] with WordCountMergeTestData with LazyLogging { 18 | 19 | override def setup(): KafkaLocalServer = { 20 | val s = KafkaLocalServer(cleanOnStart = true, Some(localStateDir)) 21 | s.start() 22 | s 23 | } 24 | 25 | override def tearDown(server: KafkaLocalServer): Unit = 26 | server.stop() 27 | 28 | test("should count words") { server => 29 | server.createTopic(inputTopic1) 30 | server.createTopic(inputTopic2) 31 | server.createTopic(outputTopic) 32 | 33 | // 34 | // Step 1: Configure and start the processor topology. 35 | // 36 | import DefaultSerdes._ 37 | 38 | val streamsConfiguration = new Properties() 39 | streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, s"wordcount-${scala.util.Random.nextInt(100)}") 40 | streamsConfiguration.put(StreamsConfig.CLIENT_ID_CONFIG, "wordcountgroup") 41 | 42 | streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, brokers) 43 | streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, localStateDir) 44 | 45 | val builder = new StreamsBuilderS() 46 | 47 | val textLines1 = builder.stream[String, String](inputTopic1) 48 | val textLines2 = builder.stream[String, String](inputTopic2) 49 | 50 | val textLines = textLines1.merge(textLines2) 51 | 52 | val pattern = Pattern.compile("\\W+", Pattern.UNICODE_CHARACTER_CLASS) 53 | 54 | val wordCounts: KTableS[String, Long] = 55 | textLines 56 | .flatMapValues(v => pattern.split(v.toLowerCase)) 57 | .groupBy((k, v) => v) 58 | .count() 59 | 60 | wordCounts.toStream.to(outputTopic) 61 | 62 | val streams = new KafkaStreams(builder.build(), streamsConfiguration) 63 | streams.start() 64 | 65 | // 66 | // Step 2: Produce some input data to the input topics. 67 | // 68 | val sender = 69 | MessageSender[String, String](brokers, classOf[StringSerializer].getName, classOf[StringSerializer].getName) 70 | val mvals1 = sender.batchWriteValue(inputTopic1, inputValues) 71 | val mvals2 = sender.batchWriteValue(inputTopic2, inputValues) 72 | 73 | // 74 | // Step 3: Verify the application's output data. 75 | // 76 | val listener = MessageListener(brokers, 77 | outputTopic, 78 | "wordcountgroup", 79 | classOf[StringDeserializer].getName, 80 | classOf[LongDeserializer].getName, 81 | new RecordProcessor) 82 | 83 | val l = listener.waitUntilMinKeyValueRecordsReceived(expectedWordCounts.size, 30000) 84 | 85 | assertEquals(l.sortBy(_.key), expectedWordCounts.sortBy(_.key)) 86 | 87 | streams.close() 88 | } 89 | 90 | class RecordProcessor extends RecordProcessorTrait[String, Long] { 91 | override def processRecord(record: ConsumerRecord[String, Long]): Unit = { 92 | // logger.info(s"Get Message $record") 93 | } 94 | } 95 | 96 | } 97 | 98 | trait WordCountMergeTestData { 99 | val inputTopic1 = s"inputTopic1.${scala.util.Random.nextInt(100)}" 100 | val inputTopic2 = s"inputTopic2.${scala.util.Random.nextInt(100)}" 101 | val outputTopic = s"outputTpic.${scala.util.Random.nextInt(100)}" 102 | val brokers = "localhost:9092" 103 | val localStateDir = "local_state_data" 104 | 105 | val inputValues = List( 106 | "Hello Kafka Streams", 107 | "All streams lead to Kafka", 108 | "Join Kafka Summit", 109 | "И теперь пошли русские слова" 110 | ) 111 | 112 | val expectedWordCounts: List[KeyValue[String, Long]] = List( 113 | new KeyValue("hello", 2L), 114 | new KeyValue("all", 2L), 115 | new KeyValue("streams", 4L), 116 | new KeyValue("lead", 2L), 117 | new KeyValue("to", 2L), 118 | new KeyValue("join", 2L), 119 | new KeyValue("kafka", 6L), 120 | new KeyValue("summit", 2L), 121 | new KeyValue("и", 2L), 122 | new KeyValue("теперь", 2L), 123 | new KeyValue("пошли", 2L), 124 | new KeyValue("русские", 2L), 125 | new KeyValue("слова", 2L) 126 | ) 127 | } 128 | -------------------------------------------------------------------------------- /src/test/scala/com/lightbend/kafka/scala/streams/KafkaStreamsTest.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2018 Lightbend Inc. 3 | */ 4 | package com.lightbend.kafka.scala.streams 5 | 6 | import java.util.Properties 7 | import java.util.regex.Pattern 8 | 9 | import com.lightbend.kafka.scala.server.{KafkaLocalServer, MessageListener, MessageSender, RecordProcessorTrait} 10 | import minitest.TestSuite 11 | import org.apache.kafka.clients.consumer.ConsumerRecord 12 | import org.apache.kafka.common.serialization._ 13 | import org.apache.kafka.streams.{KafkaStreams, KeyValue, StreamsConfig} 14 | import ImplicitConversions._ 15 | import com.typesafe.scalalogging.LazyLogging 16 | 17 | object KafkaStreamsTest extends TestSuite[KafkaLocalServer] with WordCountTestData with LazyLogging { 18 | 19 | override def setup(): KafkaLocalServer = { 20 | val s = KafkaLocalServer(cleanOnStart = true, Some(localStateDir)) 21 | s.start() 22 | s 23 | } 24 | 25 | override def tearDown(server: KafkaLocalServer): Unit = 26 | server.stop() 27 | 28 | test("should count words") { server => 29 | server.createTopic(inputTopic) 30 | server.createTopic(outputTopic) 31 | 32 | // 33 | // Step 1: Configure and start the processor topology. 34 | // 35 | import DefaultSerdes._ 36 | 37 | val streamsConfiguration = new Properties() 38 | streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, s"wordcount-${scala.util.Random.nextInt(100)}") 39 | streamsConfiguration.put(StreamsConfig.CLIENT_ID_CONFIG, "wordcountgroup") 40 | 41 | streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, brokers) 42 | streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, localStateDir) 43 | 44 | val builder = new StreamsBuilderS() 45 | 46 | val textLines = builder.stream[String, String](inputTopic) 47 | 48 | val pattern = Pattern.compile("\\W+", Pattern.UNICODE_CHARACTER_CLASS) 49 | 50 | val wordCounts: KTableS[String, Long] = 51 | textLines 52 | .flatMapValues(v => pattern.split(v.toLowerCase)) 53 | .groupBy((k, v) => v) 54 | .count() 55 | 56 | wordCounts.toStream.to(outputTopic) 57 | 58 | val streams = new KafkaStreams(builder.build(), streamsConfiguration) 59 | streams.start() 60 | 61 | // 62 | // Step 2: Produce some input data to the input topic. 63 | // 64 | val sender = 65 | MessageSender[String, String](brokers, classOf[StringSerializer].getName, classOf[StringSerializer].getName) 66 | val mvals = sender.batchWriteValue(inputTopic, inputValues) 67 | 68 | // 69 | // Step 3: Verify the application's output data. 70 | // 71 | val listener = MessageListener(brokers, 72 | outputTopic, 73 | "wordcountgroup", 74 | classOf[StringDeserializer].getName, 75 | classOf[LongDeserializer].getName, 76 | new RecordProcessor) 77 | 78 | val l = listener.waitUntilMinKeyValueRecordsReceived(expectedWordCounts.size, 30000) 79 | 80 | assertEquals(l.sortBy(_.key), expectedWordCounts.sortBy(_.key)) 81 | 82 | streams.close() 83 | } 84 | 85 | class RecordProcessor extends RecordProcessorTrait[String, Long] { 86 | override def processRecord(record: ConsumerRecord[String, Long]): Unit = { 87 | // logger.info(s"Get Message $record") 88 | } 89 | } 90 | 91 | } 92 | 93 | trait WordCountTestData { 94 | val inputTopic = s"inputTopic.${scala.util.Random.nextInt(100)}" 95 | val outputTopic = s"outputTopic.${scala.util.Random.nextInt(100)}" 96 | val brokers = "localhost:9092" 97 | val localStateDir = "local_state_data" 98 | 99 | val inputValues = List( 100 | "Hello Kafka Streams", 101 | "All streams lead to Kafka", 102 | "Join Kafka Summit", 103 | "И теперь пошли русские слова" 104 | ) 105 | 106 | val expectedWordCounts: List[KeyValue[String, Long]] = List( 107 | new KeyValue("hello", 1L), 108 | new KeyValue("all", 1L), 109 | new KeyValue("streams", 2L), 110 | new KeyValue("lead", 1L), 111 | new KeyValue("to", 1L), 112 | new KeyValue("join", 1L), 113 | new KeyValue("kafka", 3L), 114 | new KeyValue("summit", 1L), 115 | new KeyValue("и", 1L), 116 | new KeyValue("теперь", 1L), 117 | new KeyValue("пошли", 1L), 118 | new KeyValue("русские", 1L), 119 | new KeyValue("слова", 1L) 120 | ) 121 | } 122 | -------------------------------------------------------------------------------- /src/test/scala/com/lightbend/kafka/scala/streams/ProbabilisticCountingScalaIntegrationTest.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2018 Lightbend Inc. 3 | * Adapted from Confluent Inc. whose copyright is reproduced below. 4 | */ 5 | /* 6 | * Copyright Confluent Inc. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | package com.lightbend.kafka.scala.streams 21 | 22 | import java.util.Properties 23 | 24 | import com.lightbend.kafka.scala.server.{KafkaLocalServer, MessageListener, MessageSender, RecordProcessorTrait} 25 | import com.lightbend.kafka.scala.streams.algebird.{CMSStore, CMSStoreBuilder} 26 | import minitest.TestSuite 27 | import org.apache.kafka.clients.consumer.ConsumerRecord 28 | import org.apache.kafka.common.serialization._ 29 | import org.apache.kafka.streams.kstream.Transformer 30 | import org.apache.kafka.streams.processor.ProcessorContext 31 | import org.apache.kafka.streams.{KafkaStreams, KeyValue, StreamsConfig} 32 | import ImplicitConversions._ 33 | import com.typesafe.scalalogging.LazyLogging 34 | 35 | /** 36 | * End-to-end integration test that demonstrates how to probabilistically count items in an input stream. 37 | * 38 | * This example uses a custom state store implementation, [[CMSStore]], that is backed by a 39 | * Count-Min Sketch data structure. 40 | */ 41 | trait ProbabilisticCountingScalaIntegrationTestData extends LazyLogging { 42 | val brokers = "localhost:9092" 43 | val inputTopic = s"inputTopic.${scala.util.Random.nextInt(100)}" 44 | val outputTopic = s"output-topic.${scala.util.Random.nextInt(100)}" 45 | val localStateDir = "local_state_data" 46 | 47 | val inputTextLines: Seq[String] = Seq( 48 | "Hello Kafka Streams", 49 | "All streams lead to Kafka", 50 | "Join Kafka Summit" 51 | ) 52 | 53 | val expectedWordCounts: Seq[KeyValue[String, Long]] = Seq( 54 | ("hello", 1L), 55 | ("kafka", 1L), 56 | ("streams", 1L), 57 | ("all", 1L), 58 | ("streams", 2L), 59 | ("lead", 1L), 60 | ("to", 1L), 61 | ("kafka", 2L), 62 | ("join", 1L), 63 | ("kafka", 3L), 64 | ("summit", 1L) 65 | ) 66 | } 67 | 68 | object ProbabilisticCountingScalaIntegrationTest 69 | extends TestSuite[KafkaLocalServer] 70 | with ProbabilisticCountingScalaIntegrationTestData { 71 | 72 | override def setup(): KafkaLocalServer = { 73 | val s = KafkaLocalServer(cleanOnStart = true, Some(localStateDir)) 74 | s.start() 75 | s 76 | } 77 | 78 | override def tearDown(server: KafkaLocalServer): Unit = 79 | server.stop() 80 | 81 | test("shouldProbabilisticallyCountWords") { server => 82 | server.createTopic(inputTopic) 83 | server.createTopic(outputTopic) 84 | 85 | // 86 | // Step 1: Configure and start the processor topology. 87 | // 88 | val streamsConfiguration: Properties = { 89 | val p = new Properties() 90 | p.put(StreamsConfig.APPLICATION_ID_CONFIG, 91 | s"probabilistic-counting-scala-integration-test-${scala.util.Random.nextInt(100)}") 92 | p.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, brokers) 93 | p.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.ByteArray.getClass.getName) 94 | p.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String.getClass.getName) 95 | p.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, "10000") 96 | p.put(StreamsConfig.STATE_DIR_CONFIG, localStateDir) 97 | p 98 | } 99 | 100 | val builder = new StreamsBuilderS() 101 | 102 | val cmsStoreName = "cms-store" 103 | val cmsStoreBuilder = { 104 | val changelogConfig: java.util.HashMap[String, String] = { 105 | val cfg = new java.util.HashMap[String, String] 106 | val segmentSizeBytes = (20 * 1024 * 1024).toString 107 | cfg.put("segment.bytes", segmentSizeBytes) 108 | cfg 109 | } 110 | new CMSStoreBuilder[String](cmsStoreName, Serdes.String()) 111 | .withLoggingEnabled(changelogConfig) 112 | } 113 | builder.addStateStore(cmsStoreBuilder) 114 | 115 | class ProbabilisticCounter extends Transformer[Array[Byte], String, (String, Long)] { 116 | 117 | private var cmsState: CMSStore[String] = _ 118 | private var processorContext: ProcessorContext = _ 119 | 120 | override def init(processorContext: ProcessorContext): Unit = { 121 | this.processorContext = processorContext 122 | cmsState = this.processorContext.getStateStore(cmsStoreName).asInstanceOf[CMSStore[String]] 123 | } 124 | 125 | override def transform(key: Array[Byte], value: String): (String, Long) = { 126 | // Count the record value, think: "+ 1" 127 | cmsState.put(value, this.processorContext.timestamp()) 128 | 129 | // In this example: emit the latest count estimate for the record value. We could also do 130 | // something different, e.g. periodically output the latest heavy hitters via `punctuate`. 131 | (value, cmsState.get(value)) 132 | } 133 | 134 | //scalastyle:off null 135 | override def punctuate(l: Long): (String, Long) = null 136 | //scalastyle:on null 137 | override def close(): Unit = {} 138 | } 139 | 140 | implicit val stringSerde: Serde[String] = Serdes.String() 141 | implicit val byteArraySerde: Serde[Array[Byte]] = Serdes.ByteArray() 142 | implicit val longSerde: Serde[Long] = Serdes.Long().asInstanceOf[Serde[Long]] 143 | 144 | // Read the input from Kafka. 145 | val textLines: KStreamS[Array[Byte], String] = builder.stream(inputTopic) 146 | 147 | textLines 148 | .flatMapValues(value => value.toLowerCase.split("\\W+").toIterable) 149 | .transform(() => new ProbabilisticCounter, cmsStoreName) 150 | .to(outputTopic) 151 | 152 | val streams: KafkaStreams = new KafkaStreams(builder.build(), streamsConfiguration) 153 | streams.start() 154 | 155 | // Step 2: Publish some input text lines. 156 | val sender = 157 | MessageSender[String, String](brokers, classOf[StringSerializer].getName, classOf[StringSerializer].getName) 158 | sender.batchWriteValue(inputTopic, inputTextLines) 159 | // Step 3: Verify the application's output data. 160 | 161 | val listener = MessageListener(brokers, 162 | outputTopic, 163 | "probwordcountgroup", 164 | classOf[StringDeserializer].getName, 165 | classOf[LongDeserializer].getName, 166 | new RecordProcessor) 167 | 168 | val l = listener.waitUntilMinKeyValueRecordsReceived(expectedWordCounts.size, 30000) 169 | 170 | assertEquals(l.sortBy(_.key), expectedWordCounts.sortBy(_.key)) 171 | streams.close() 172 | } 173 | 174 | class RecordProcessor extends RecordProcessorTrait[String, Long] { 175 | override def processRecord(record: ConsumerRecord[String, Long]): Unit = { 176 | // logger.info(s"Get Message $record") 177 | } 178 | } 179 | 180 | } 181 | -------------------------------------------------------------------------------- /src/test/scala/com/lightbend/kafka/scala/streams/PunctuateTest.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2018 Lightbend Inc. 3 | */ 4 | package com.lightbend.kafka.scala.streams 5 | 6 | import java.util.Properties 7 | 8 | import com.lightbend.kafka.scala.server.{KafkaLocalServer, MessageSender} 9 | import com.typesafe.scalalogging.LazyLogging 10 | import minitest.TestSuite 11 | import org.apache.kafka.common.serialization._ 12 | import org.apache.kafka.streams.processor.{AbstractProcessor, ProcessorContext, PunctuationType} 13 | import org.apache.kafka.streams.{KafkaStreams, StreamsConfig, Topology} 14 | 15 | /** 16 | * This sample is using usage of punctuate, which is significantly changed in version 1.0 and 17 | * Kafka Streams Processor APIs (https://kafka.apache.org/10/documentation/streams/developer-guide/processor-api.html) 18 | * This code is based on the article "Problems With Kafka Streams: 19 | * The Saga Continues" (https://dzone.com/articles/problems-with-kafka-streams-the-saga-continues) 20 | */ 21 | object PunctuateTest extends TestSuite[KafkaLocalServer] with PunctuateTestData with LazyLogging { 22 | 23 | override def setup(): KafkaLocalServer = { 24 | val s = KafkaLocalServer(cleanOnStart = true, Some(localStateDir)) 25 | s.start() 26 | s 27 | } 28 | 29 | override def tearDown(server: KafkaLocalServer): Unit = 30 | server.stop() 31 | 32 | test("should punctuate execution") { server => 33 | server.createTopic(inputTopic) 34 | 35 | // 36 | // Step 1: Configure and start the processor topology. 37 | // 38 | 39 | val streamsConfiguration = new Properties() 40 | streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, s"punctuate-${scala.util.Random.nextInt(100)}") 41 | streamsConfiguration.put(StreamsConfig.CLIENT_ID_CONFIG, "punctuategroup") 42 | 43 | streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, brokers) 44 | streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()) 45 | streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()) 46 | 47 | val topology = new Topology 48 | // Data input streams 49 | topology.addSource("data", inputTopic) 50 | // Processors 51 | topology.addProcessor("data processor", () => new SampleProcessor(5000), "data") 52 | val streams = new KafkaStreams(topology, streamsConfiguration) 53 | streams.start() 54 | // Allpw time for the streams to start up 55 | Thread.sleep(5000L) 56 | 57 | // 58 | // Step 2: Produce some input data to the input topic. 59 | // 60 | val sender = 61 | MessageSender[String, String](brokers, classOf[StringSerializer].getName, classOf[StringSerializer].getName) 62 | for (i <- 0 to 15) { 63 | sender.writeValue(inputTopic, i.toString) 64 | Thread.sleep(1000L) // sleep for 1 sec 65 | } 66 | 67 | // End test 68 | Thread.sleep(5000L) // sleep for 10 sec 69 | streams.close() 70 | } 71 | 72 | class SampleProcessor(punctuateTime: Long) extends AbstractProcessor[String, String] { 73 | 74 | var ctx: ProcessorContext = _ 75 | var message = "" 76 | 77 | override def init(context: ProcessorContext): Unit = { 78 | ctx = context 79 | ctx.schedule(punctuateTime, 80 | PunctuationType.STREAM_TIME, 81 | (timestamp: Long) => logger.info(s"Punctuator called at $timestamp, current message $message")) 82 | } 83 | 84 | override def process(key: String, value: String): Unit = { 85 | logger.info(s"Processing new message $value") 86 | message = value 87 | } 88 | } 89 | } 90 | 91 | trait PunctuateTestData { 92 | val inputTopic = s"inputTopic.${scala.util.Random.nextInt(100)}" 93 | val outputTopic = s"outputTopic.${scala.util.Random.nextInt(100)}" 94 | val brokers = "localhost:9092" 95 | val localStateDir = "local_state_data" 96 | } 97 | -------------------------------------------------------------------------------- /src/test/scala/com/lightbend/kafka/scala/streams/StreamToTableJoinScalaIntegrationTestImplicitSerdes.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2018 Lightbend Inc. 3 | * Adapted from Confluent Inc. whose copyright is reproduced below. 4 | */ 5 | /* 6 | * Copyright Confluent Inc. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | package com.lightbend.kafka.scala.streams 21 | 22 | import java.util.Properties 23 | 24 | import minitest.TestSuite 25 | import com.lightbend.kafka.scala.server.{KafkaLocalServer, MessageListener, MessageSender, RecordProcessorTrait} 26 | import org.apache.kafka.common.serialization._ 27 | import org.apache.kafka.streams._ 28 | import org.apache.kafka.clients.consumer.ConsumerRecord 29 | import ImplicitConversions._ 30 | import com.typesafe.scalalogging.LazyLogging 31 | 32 | /** 33 | * End-to-end integration test that demonstrates how to perform a join between a KStream and a 34 | * KTable (think: KStream.leftJoin(KTable)), i.e. an example of a stateful computation. 35 | * 36 | * See StreamToTableJoinIntegrationTest for the equivalent Java example. 37 | * 38 | * Note: We intentionally use JUnit4 (wrapped by ScalaTest) for implementing this Scala integration 39 | * test so it is easier to compare this Scala code with the equivalent Java code at 40 | * StreamToTableJoinIntegrationTest. One difference is that, to simplify the Scala/Junit integration, we 41 | * switched from BeforeClass (which must be `static`) to Before as well as from @ClassRule (which 42 | * must be `static` and `public`) to a workaround combination of `@Rule def` and a `private val`. 43 | */ 44 | object StreamToTableJoinScalaIntegrationTestImplicitSerdes 45 | extends TestSuite[KafkaLocalServer] 46 | with StreamToTableJoinTestData 47 | with LazyLogging { 48 | 49 | override def setup(): KafkaLocalServer = { 50 | val s = KafkaLocalServer(cleanOnStart = true, Some(localStateDir)) 51 | s.start() 52 | s 53 | } 54 | 55 | override def tearDown(server: KafkaLocalServer): Unit = 56 | server.stop() 57 | 58 | test("should count clicks per region") { server => 59 | server.createTopic(userClicksTopic) 60 | server.createTopic(userRegionsTopic) 61 | server.createTopic(outputTopic) 62 | 63 | // 64 | // Step 1: Configure and start the processor topology. 65 | // 66 | // DefaultSerdes brings into scope implicit serdes (mostly for primitives) that will set up all Serialized, Produced, 67 | // Consumed and Joined instances. So all APIs below that accept Serialized, Produced, Consumed or Joined will 68 | // get these instances automatically 69 | import DefaultSerdes._ 70 | 71 | // we don't have any serde declared as part of configuration. Even if they are declared here, the 72 | // Scala APIs will ignore them. But it's possible to declare serdes here and use them through 73 | // Java APIs 74 | val streamsConfiguration: Properties = { 75 | val p = new Properties() 76 | p.put(StreamsConfig.APPLICATION_ID_CONFIG, 77 | s"stream-table-join-scala-integration-test-implicit-ser-${scala.util.Random.nextInt(100)}") 78 | p.put(StreamsConfig.CLIENT_ID_CONFIG, "join-scala-integration-test-implicit-ser-standard-consumer") 79 | p.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, brokers) 80 | p.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, "100") 81 | p.put(StreamsConfig.STATE_DIR_CONFIG, localStateDir) 82 | p 83 | } 84 | 85 | val builder = new StreamsBuilderS() 86 | 87 | val userClicksStream: KStreamS[String, Long] = builder.stream(userClicksTopic) 88 | 89 | val userRegionsTable: KTableS[String, String] = builder.table(userRegionsTopic) 90 | 91 | // Compute the total per region by summing the individual click counts per region. 92 | val clicksPerRegion: KTableS[String, Long] = 93 | userClicksStream 94 | 95 | // Join the stream against the table. 96 | .leftJoin(userRegionsTable, 97 | (clicks: Long, region: String) => (if (region == null) "UNKNOWN" else region, clicks)) 98 | 99 | // Change the stream from -> to -> 100 | .map((_, regionWithClicks) => regionWithClicks) 101 | 102 | // Compute the total per region by summing the individual click counts per region. 103 | .groupByKey 104 | .reduce(_ + _) 105 | 106 | // Write the (continuously updating) results to the output topic. 107 | clicksPerRegion.toStream.to(outputTopic) 108 | 109 | val streams: KafkaStreams = new KafkaStreams(builder.build(), streamsConfiguration) 110 | 111 | streams.setUncaughtExceptionHandler( 112 | (_: Thread, e: Throwable) => 113 | try { 114 | logger.error(s"Stream terminated because of uncaught exception .. Shutting down app", e) 115 | e.printStackTrace() 116 | val closed: Unit = streams.close() 117 | logger.info(s"Exiting application after streams close ($closed)") 118 | } catch { 119 | case x: Exception => x.printStackTrace() 120 | } finally { 121 | logger.debug("Exiting application ..") 122 | System.exit(-1) 123 | } 124 | ) 125 | 126 | streams.start() 127 | 128 | // 129 | // Step 2: Publish user-region information. 130 | // 131 | // To keep this code example simple and easier to understand/reason about, we publish all 132 | // user-region records before any user-click records (cf. step 3). In practice though, 133 | // data records would typically be arriving concurrently in both input streams/topics. 134 | // 135 | val sender1 = 136 | MessageSender[String, String](brokers, classOf[StringSerializer].getName, classOf[StringSerializer].getName) 137 | userRegions.foreach(r => sender1.writeKeyValue(userRegionsTopic, r.key, r.value)) 138 | 139 | // 140 | // Step 3: Publish some user click events. 141 | // 142 | val sender2 = 143 | MessageSender[String, Long](brokers, classOf[StringSerializer].getName, classOf[LongSerializer].getName) 144 | userClicks.foreach(r => sender2.writeKeyValue(userClicksTopic, r.key, r.value)) 145 | 146 | // 147 | // Step 4: Verify the application's output data. 148 | // 149 | val listener = MessageListener( 150 | brokers, 151 | outputTopic, 152 | "join-scala-integration-test-standard-consumer", 153 | classOf[StringDeserializer].getName, 154 | classOf[LongDeserializer].getName, 155 | new RecordProcessor 156 | ) 157 | 158 | val l = listener.waitUntilMinKeyValueRecordsReceived(expectedClicksPerRegion.size, 30000) // scalastyle:ignore 159 | streams.close() 160 | assertEquals(l.sortBy(_.key), expectedClicksPerRegion.sortBy(_.key)) 161 | } 162 | 163 | class RecordProcessor extends RecordProcessorTrait[String, Long] { 164 | override def processRecord(record: ConsumerRecord[String, Long]): Unit = { 165 | //logger.info(s"Get Message $record") 166 | } 167 | } 168 | } 169 | -------------------------------------------------------------------------------- /src/test/scala/com/lightbend/kafka/scala/streams/StreamToTableJoinScalaIntegrationTestImplicitSerdesWithAvro.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2018 Lightbend Inc. 3 | * Adapted from Confluent Inc. whose copyright is reproduced below. 4 | */ 5 | /* 6 | * Copyright Confluent Inc. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | package com.lightbend.kafka.scala.streams 21 | 22 | import java.io.{ByteArrayInputStream, ByteArrayOutputStream} 23 | import java.util.Properties 24 | 25 | import com.lightbend.kafka.scala.server.{KafkaLocalServer, MessageListener, MessageSender, RecordProcessorTrait} 26 | import com.sksamuel.avro4s._ 27 | import minitest.TestSuite 28 | import org.apache.kafka.clients.consumer.ConsumerRecord 29 | import org.apache.kafka.common.serialization._ 30 | import org.apache.kafka.streams._ 31 | import ImplicitConversions._ 32 | 33 | object StreamToTableJoinScalaIntegrationTestImplicitSerdesWithAvro 34 | extends TestSuite[KafkaLocalServer] 35 | with StreamToTableJoinTestData { 36 | 37 | case class UserClicks(clicks: Long) 38 | 39 | // adopted from Openshine implementation 40 | class AvroSerde[T >: Null: SchemaFor: FromRecord: ToRecord] extends StatelessScalaSerde[T] { 41 | 42 | override def serialize(data: T): Array[Byte] = { 43 | val baos = new ByteArrayOutputStream() 44 | val output = AvroOutputStream.binary[T](baos) 45 | output.write(data) 46 | output.close() 47 | baos.toByteArray 48 | } 49 | 50 | override def deserialize(data: Array[Byte]): Option[T] = { 51 | val in = new ByteArrayInputStream(data) 52 | val input = AvroInputStream.binary[T](in) 53 | input.iterator.toSeq.headOption 54 | } 55 | } 56 | 57 | /** Our implicit Serde implementation for the values we want to serialize 58 | * as avro 59 | */ 60 | implicit val userClicksSerde: Serde[UserClicks] = new AvroSerde 61 | 62 | /** 63 | * End-to-end integration test that demonstrates how to perform a join 64 | * between a KStream and a 65 | * KTable (think: KStream.leftJoin(KTable)), i.e. an example of a stateful 66 | * computation. 67 | * 68 | * See StreamToTableJoinIntegrationTest for the equivalent Java example. 69 | * 70 | * Note: We intentionally use JUnit4 (wrapped by ScalaTest) for 71 | * implementing this Scala integration 72 | * test so it is easier to compare this Scala code with the equivalent 73 | * Java code at 74 | * StreamToTableJoinIntegrationTest. One difference is that, to simplify 75 | * the Scala/Junit integration, we 76 | * switched from BeforeClass (which must be `static`) to Before as well as 77 | * from @ClassRule (which 78 | * must be `static` and `public`) to a workaround combination of `@Rule 79 | * def` and a `private val`. 80 | */ 81 | override def setup(): KafkaLocalServer = { 82 | val s = KafkaLocalServer(true, Some(localStateDir)) 83 | s.start() 84 | s 85 | } 86 | 87 | override def tearDown(server: KafkaLocalServer): Unit = 88 | server.stop() 89 | 90 | test("should count clicks per region") { server => 91 | server.createTopic(userClicksTopic) 92 | server.createTopic(userRegionsTopic) 93 | server.createTopic(outputTopic) 94 | 95 | // DefaultSerdes brings into scope implicit serdes (mostly for primitives) that will set up all Serialized, Produced, 96 | // Consumed and Joined instances. So all APIs below that accept Serialized, Produced, Consumed or Joined will 97 | // get these instances automatically 98 | import DefaultSerdes._ 99 | 100 | // 101 | // Step 1: Configure and start the processor topology. 102 | // 103 | // we don't have any serde declared as part of configuration. Even if they are declared here, the 104 | // Scala APIs will ignore them. But it's possible to declare serdes here and use them through 105 | // Java APIs 106 | val streamsConfiguration: Properties = { 107 | val p = new Properties() 108 | p.put(StreamsConfig.APPLICATION_ID_CONFIG, 109 | s"stream-table-join-scala-integration-test-implicit-serdes-${scala.util.Random.nextInt(100)}") 110 | p.put(StreamsConfig.CLIENT_ID_CONFIG, "join-scala-integration-test-implicit-serdes-standard-consumer") 111 | p.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, brokers) 112 | p.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, "100") 113 | p.put(StreamsConfig.STATE_DIR_CONFIG, localStateDir) 114 | p 115 | } 116 | 117 | implicit val builder = new StreamsBuilderS() 118 | 119 | val userClicksStream: KStreamS[String, UserClicks] = builder.stream(userClicksTopic) 120 | 121 | val userRegionsTable: KTableS[String, String] = builder.table(userRegionsTopic) 122 | 123 | // Compute the total per region by summing the individual click counts per region. 124 | val clicksPerRegion: KTableS[String, Long] = 125 | userClicksStream 126 | 127 | // Join the stream against the table. 128 | .leftJoin(userRegionsTable, 129 | (clicks: UserClicks, region: String) => (if (region == null) "UNKNOWN" else region, clicks.clicks)) 130 | 131 | // Change the stream from -> to -> 132 | .map((_, regionWithClicks) => regionWithClicks) 133 | 134 | // Compute the total per region by summing the individual click counts per region. 135 | .groupByKey 136 | .reduce(_ + _) 137 | 138 | // Write the (continuously updating) results to the output topic. 139 | clicksPerRegion.toStream.to(outputTopic) 140 | 141 | val streams: KafkaStreams = new KafkaStreams(builder.build(), streamsConfiguration) 142 | 143 | streams 144 | .setUncaughtExceptionHandler(new Thread.UncaughtExceptionHandler() { 145 | override def uncaughtException(t: Thread, e: Throwable): Unit = 146 | try { 147 | println(s"Stream terminated because of uncaught exception .. Shutting " + 148 | s"down app", 149 | e) 150 | e.printStackTrace 151 | val closed = streams.close() 152 | println(s"Exiting application after streams close ($closed)") 153 | } catch { 154 | case x: Exception => x.printStackTrace 155 | } finally { 156 | println("Exiting application ..") 157 | System.exit(-1) 158 | } 159 | }) 160 | 161 | streams.start() 162 | 163 | // 164 | // Step 2: Publish user-region information. 165 | // 166 | // To keep this code example simple and easier to understand/reason 167 | // about, we publish all 168 | // user-region records before any user-click records (cf. step 3). In 169 | // practice though, 170 | // data records would typically be arriving concurrently in both input 171 | // streams/topics. 172 | val sender1 = 173 | MessageSender[String, String](brokers, classOf[StringSerializer].getName, classOf[StringSerializer].getName) 174 | userRegions.foreach(r => sender1.writeKeyValue(userRegionsTopic, r.key, r.value)) 175 | 176 | // 177 | // Step 3: Publish some user click events. 178 | // 179 | val sender2 = MessageSender[String, Array[Byte]](brokers, 180 | classOf[StringSerializer].getName, 181 | classOf[ByteArraySerializer].getName) 182 | userClicks 183 | .map( 184 | kv => 185 | new KeyValue[String, Array[Byte]]( 186 | kv.key, 187 | new AvroSerde[UserClicks].serialize(UserClicks(kv.value)) 188 | ) 189 | ) 190 | .foreach(r => sender2.writeKeyValue(userClicksTopic, r.key, r.value)) 191 | 192 | // 193 | // Step 4: Verify the application's output data. 194 | // 195 | val listener = MessageListener( 196 | brokers, 197 | outputTopic, 198 | "join-scala-integration-test-standard-consumer", 199 | classOf[StringDeserializer].getName, 200 | classOf[LongDeserializer].getName, 201 | new RecordProcessor 202 | ) 203 | 204 | val l = listener 205 | .waitUntilMinKeyValueRecordsReceived(expectedClicksPerRegion.size, 30000) 206 | streams.close() 207 | assertEquals(l.sortBy(_.key), expectedClicksPerRegion.sortBy(_.key)) 208 | } 209 | 210 | class RecordProcessor extends RecordProcessorTrait[String, Long] { 211 | override def processRecord(record: ConsumerRecord[String, Long]): Unit = { 212 | // println(s"Get Message $record") 213 | } 214 | } 215 | 216 | } 217 | -------------------------------------------------------------------------------- /src/test/scala/com/lightbend/kafka/scala/streams/StreamToTableJoinTestData.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2018 Lightbend Inc. 3 | * Adapted from Confluent Inc. whose copyright is reproduced below. 4 | */ 5 | /* 6 | * Copyright Confluent Inc. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | package com.lightbend.kafka.scala.streams 21 | 22 | import org.apache.kafka.streams.KeyValue 23 | 24 | trait StreamToTableJoinTestData { 25 | val brokers = "localhost:9092" 26 | 27 | val userClicksTopic = s"user-clicks.${scala.util.Random.nextInt(100)}" 28 | val userRegionsTopic = s"user-regions.${scala.util.Random.nextInt(100)}" 29 | val outputTopic = s"output-topic.${scala.util.Random.nextInt(100)}" 30 | val localStateDir = "local_state_data" 31 | 32 | // Input 1: Clicks per user (multiple records allowed per user). 33 | val userClicks: Seq[KeyValue[String, Long]] = Seq( 34 | new KeyValue("alice", 13L), 35 | new KeyValue("bob", 4L), 36 | new KeyValue("chao", 25L), 37 | new KeyValue("bob", 19L), 38 | new KeyValue("dave", 56L), 39 | new KeyValue("eve", 78L), 40 | new KeyValue("alice", 40L), 41 | new KeyValue("fang", 99L) 42 | ) 43 | 44 | // Input 2: Region per user (multiple records allowed per user). 45 | val userRegions: Seq[KeyValue[String, String]] = Seq( 46 | new KeyValue("alice", "asia"), /* Alice lived in Asia originally... */ 47 | new KeyValue("bob", "americas"), 48 | new KeyValue("chao", "asia"), 49 | new KeyValue("dave", "europe"), 50 | new KeyValue("alice", "europe"), /* ...but moved to Europe some time later. */ 51 | new KeyValue("eve", "americas"), 52 | new KeyValue("fang", "asia") 53 | ) 54 | 55 | val expectedClicksPerRegion: Seq[KeyValue[String, Long]] = Seq( 56 | new KeyValue("americas", 101L), 57 | new KeyValue("europe", 109L), 58 | new KeyValue("asia", 124L) 59 | ) 60 | } 61 | -------------------------------------------------------------------------------- /src/test/scala/com/lightbend/kafka/scala/streams/algebird/CMSStore.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright Confluent Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.lightbend.kafka.scala.streams 17 | package algebird 18 | 19 | import com.twitter.algebird.{CMSHasher, TopCMS, TopPctCMS} 20 | import org.apache.kafka.common.serialization.Serdes 21 | import org.apache.kafka.streams.processor.{ProcessorContext, StateStore} 22 | import org.apache.kafka.streams.state.StateSerdes 23 | 24 | /** 25 | * An in-memory store that leverages the Count-Min Sketch implementation of 26 | * [[https://github.com/twitter/algebird Twitter Algebird]]. 27 | * 28 | * This store allows you to probabilistically count items of type T with a 29 | * [[https://en.wikipedia.org/wiki/Count%E2%80%93min_sketch Count-Min Sketch]] data structure. 30 | * Here, the counts returned by the store will be approximate counts, i.e. estimations, because a 31 | * Count-Min Sketch trades slightly inaccurate counts for greatly reduced space utilization 32 | * (however, the estimation error is mathematically proven to be bounded). 33 | * With probability at least `1 - delta`, this estimate is within `eps * N` of the true frequency 34 | * (i.e., `true frequency <= estimate <= true frequency + eps * N`), where `N` is the total number 35 | * of items counted ("seen" in the input) so far (cf. [[CMSStore#totalCount]]). 36 | * 37 | * A traditional Count-Min Sketch is a fixed-size data structure that is essentially an array of 38 | * counters of a particular width (derived from the parameter `eps`) and depth (derived from the 39 | * parameter `delta`). The CMS variant used in this store, [[TopPctCMS]], additionally tracks the 40 | * so-called "heavy hitters" among the counted items (i.e. the items with the largest counts) based 41 | * on a percentage threshold; the size of heavy hitters is still bounded, however, hence the total 42 | * size of the [[TopPctCMS]] data structure is still fixed. 43 | * 44 | * =Fault-tolerance= 45 | * 46 | * This store supports changelogging its state to Kafka and is thus fault-tolerant. Every time the 47 | * store is flushed (cf. [[org.apache.kafka.streams.StreamsConfig.COMMIT_INTERVAL_MS_CONFIG]]) the 48 | * underlying CMS data structure is written to the store's changelog topic. For many use cases 49 | * this approach should be sufficiently efficient because the absolute size of a CMS is typically 50 | * rather small (a few KBs up to a megabyte, depending on the CMS settings, which are determined by 51 | * e.g. your error bound requirements for approximate counts). 52 | * 53 | * =Usage= 54 | * 55 | * Note: Twitter Algebird is best used with Scala, so all the examples below are in Scala, too. 56 | * 57 | * In a Kafka Streams application, you'd typically create this store as such: 58 | * 59 | * {{{ 60 | * val builder: StreamsBuilder = new StreamsBuilder() 61 | * 62 | * // In this example, we create a store for type [[String]]. 63 | * // It's recommended to reduce Kafka's log segment size for the changelogs of CMS stores, which 64 | * // you can do by passing the respective Kafka setting to the CMSStoreBuilder via `withLoggingEnabled()`. 65 | * builder.addStateStore(new CMSStoreBuilder[String]("my-cms-store-name", Serdes.String())) 66 | * }}} 67 | * 68 | * Then you'd use the store within a [[org.apache.kafka.streams.processor.Processor]] or a 69 | * [[org.apache.kafka.streams.kstream.Transformer]] similar to: 70 | * 71 | * {{{ 72 | * class ProbabilisticCounter extends Transformer[Array[Byte], String, KeyValue[String, Long]] { 73 | * 74 | * private var cmsState: CMSStore[String] = _ 75 | * private var processorContext: ProcessorContext = _ 76 | * 77 | * override def init(processorContext: ProcessorContext): Unit = { 78 | * this.processorContext = processorContext 79 | * cmsState = this.processorContext.getStateStore("my-cms-store-name").asInstanceOf[CMSStore[String]] 80 | * } 81 | * 82 | * override def transform(key: Array[Byte], value: String): KeyValue[String, Long] = { 83 | * // Count the record value, think: "+ 1" 84 | * cmsState.put(value) 85 | * 86 | * // Emit the latest count estimate for the record value 87 | * KeyValue.pair[String, Long](value, cmsState.get(value)) 88 | * } 89 | * 90 | * override def punctuate(l: Long): KeyValue[String, Long] = null 91 | * 92 | * override def close(): Unit = {} 93 | * } 94 | * }}} 95 | * 96 | * @param name The name of this store instance 97 | * @param loggingEnabled Whether or not changelogging (fault-tolerance) is enabled for this store. 98 | * @param delta CMS parameter: A bound on the probability that a query estimate does not 99 | * lie within some small interval (an interval that depends on `eps`) around 100 | * the truth. 101 | * See [[TopPctCMS]] and [[com.twitter.algebird.CMSMonoid]]. 102 | * @param eps CMS parameter: One-sided error bound on the error of each point query, 103 | * i.e. frequency estimate. 104 | * See [[TopPctCMS]] and [[com.twitter.algebird.CMSMonoid]]. 105 | * @param seed CMS parameter: A seed to initialize the random number generator used to 106 | * create the pairwise independent hash functions. Typically you do not 107 | * need to change this. 108 | * See [[TopPctCMS]] and [[com.twitter.algebird.CMSMonoid]]. 109 | * @param heavyHittersPct CMS parameter: A threshold for finding heavy hitters, i.e., items that 110 | * appear at least (heavyHittersPct * totalCount) times in the stream. 111 | * Every item that appears at least `(heavyHittersPct * totalCount)` times 112 | * is included, and with probability `p >= 1 - delta`, no item whose count 113 | * is less than `(heavyHittersPct - eps) * totalCount` is included. 114 | * This also means that this parameter is an upper bound on the number of 115 | * heavy hitters that will be tracked: the set of heavy hitters contains at 116 | * most `1 / heavyHittersPct` elements. For example, if 117 | * `heavyHittersPct=0.01` (or 0.25), then at most `1 / 0.01 = 100` items 118 | * or `1 / 0.25 = 4` items) will be tracked/returned as heavy hitters. 119 | * This parameter can thus control the memory footprint required for 120 | * tracking heavy hitters. 121 | * See [[TopPctCMS]] and [[com.twitter.algebird.TopPctCMSMonoid]]. 122 | * @tparam T The type used to identify the items to be counted with the CMS. For example, if 123 | * you want to count the occurrence of user names, you could use count user names 124 | * directly with `T=String`; alternatively, you could map each username to a unique 125 | * numeric ID expressed as a `Long`, and then count the occurrences of those `Long`s with 126 | * a CMS of type `T=Long`. Note that such a mapping between the items of your problem 127 | * domain and their identifiers used for counting via CMS should be bijective. 128 | * We require a [[CMSHasher]] context bound for `K`, see [[CMSHasher]] for available 129 | * implicits that can be imported. 130 | * See [[com.twitter.algebird.CMSMonoid]] for further information. 131 | */ 132 | class CMSStore[T: CMSHasher](override val name: String, 133 | val loggingEnabled: Boolean = true, 134 | val delta: Double = 1E-10, 135 | val eps: Double = 0.001, 136 | val seed: Int = 1, 137 | val heavyHittersPct: Double = 0.01) 138 | extends StateStore { 139 | 140 | private val cmsMonoid = TopPctCMS.monoid[T](eps, delta, seed, heavyHittersPct) 141 | 142 | /** 143 | * The "storage backend" of this store. 144 | * 145 | * Needs proper initializing in case the store's changelog is empty. 146 | */ 147 | private var cms: TopCMS[T] = cmsMonoid.zero 148 | 149 | private var timestampOfLastStateStoreUpdate: Long = 0L 150 | 151 | private var changeLogger: CMSStoreChangeLogger[Integer, TopCMS[T]] = _ 152 | 153 | /** 154 | * The record key used to write to the state's changelog. 155 | * 156 | * This key can be a constant because: 157 | * 158 | * 1. We always write the full CMS when writing to the changelog. 159 | * 2. A CMS does not retain information about which items were counted, i.e. it does not track 160 | * information about the keyspace (in the case of this store, the only information about the 161 | * keyspace are the heavy hitters); so, unless we opted for a different approach than (1) 162 | * above, we cannot leverage keyspace information anyways. 163 | * 3. We use a [[CMSStoreChangeLogger]] that uses a stream task's 164 | * [[org.apache.kafka.streams.processor.TaskId]] to identify the changelog partition to write to. 165 | * Thus only one particular stream task will ever be writing to that changelog partition. 166 | * 4. When restoring from the changelog, a stream task will read only its own (one) changelog 167 | * partition. 168 | * 169 | * In other words, we can hardcode the changelog key because only the "right" stream task will be 170 | * (a) writing to AND (b) reading from the respective partition of the changelog. 171 | */ 172 | private[algebird] val changelogKey = 42 173 | 174 | /** 175 | * For unit testing 176 | */ 177 | private[algebird] def cmsFrom(items: Seq[T]): TopCMS[T] = cmsMonoid.create(items) 178 | 179 | /** 180 | * For unit testing 181 | */ 182 | private[algebird] def cmsFrom(item: T): TopCMS[T] = cmsMonoid.create(item) 183 | 184 | @volatile private var open: Boolean = false 185 | 186 | /** 187 | * Initializes this store, including restoring the store's state from its changelog. 188 | */ 189 | override def init(context: ProcessorContext, root: StateStore) { 190 | val serdes = new StateSerdes[Integer, TopCMS[T]](name, Serdes.Integer(), TopCMSSerde[T]) 191 | changeLogger = new CMSStoreChangeLogger[Integer, TopCMS[T]](name, context, serdes) 192 | 193 | // Note: We must manually guard with `loggingEnabled` here because `context.register()` ignores 194 | // that parameter. 195 | if (root != null && loggingEnabled) 196 | context.register(root, 197 | loggingEnabled, 198 | (_, value) => 199 | if (value == null) 200 | cms = cmsMonoid.zero 201 | else 202 | cms = serdes.valueFrom(value)) 203 | 204 | open = true 205 | } 206 | 207 | /** 208 | * Returns the estimated count of the item. 209 | * 210 | * @param item item to be counted 211 | * @return estimated count 212 | */ 213 | def get(item: T): Long = cms.frequency(item).estimate 214 | 215 | /** 216 | * Counts the item. 217 | * 218 | * @param item item to be counted 219 | */ 220 | def put(item: T, timestamp: Long): Unit = { 221 | cms = cms + item 222 | timestampOfLastStateStoreUpdate = timestamp 223 | } 224 | 225 | /** 226 | * The top items counted so far, with the percentage-based cut-off being defined by the CMS 227 | * parameter `heavyHittersPct`. 228 | * 229 | * @return the top items counted so far 230 | */ 231 | def heavyHitters: Set[T] = cms.heavyHitters 232 | 233 | /** 234 | * Returns the total number of items counted ("seen" in the input) so far. 235 | * 236 | * This number is not the same as the total number of unique items counted so far, i.e. 237 | * it is not the cardinality of the set of items. 238 | * 239 | * Example: After having counted the input "foo", "bar", "foo", the return value would be 3. 240 | * 241 | * @return number of count operations so far 242 | */ 243 | def totalCount: Long = cms.totalCount 244 | 245 | override val persistent: Boolean = false 246 | 247 | override def isOpen: Boolean = open 248 | 249 | /** 250 | * Periodically saves the latest CMS state to Kafka. 251 | * 252 | * =Implementation detail= 253 | * 254 | * The changelog records have the form: (hardcodedKey, CMS). That is, we are backing up the 255 | * underlying CMS data structure in its entirety to Kafka. 256 | */ 257 | override def flush() { 258 | if (loggingEnabled) 259 | changeLogger.logChange(changelogKey, cms, timestampOfLastStateStoreUpdate) 260 | } 261 | 262 | override def close() { 263 | open = false 264 | } 265 | 266 | } 267 | -------------------------------------------------------------------------------- /src/test/scala/com/lightbend/kafka/scala/streams/algebird/CMSStoreBuilder.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright Confluent Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.lightbend.kafka.scala.streams 17 | package algebird 18 | 19 | import java.util 20 | 21 | import com.twitter.algebird.CMSHasher 22 | import org.apache.kafka.common.serialization.Serde 23 | import org.apache.kafka.streams.state.StoreBuilder 24 | 25 | /** 26 | * A factory for Kafka Streams to instantiate a [[CMSStore]]. 27 | * 28 | * =Usage= 29 | * 30 | * The [[CMSStore]]'s changelog will typically have rather few and small records per partition. 31 | * To improve efficiency we thus set a smaller log segment size (`segment.bytes`) than Kafka's 32 | * default of 1GB. 33 | * 34 | * {{{ 35 | * val changeloggingEnabled = true 36 | * val changelogConfig = { 37 | * val cfg = new java.util.HashMap[String, String] 38 | * val segmentSizeBytes = (20 * 1024 * 1024).toString 39 | * cfg.put("segment.bytes", segmentSizeBytes) 40 | * cfg 41 | * } 42 | * new CMSStoreSupplier[String](cmsStoreName, Serdes.String(), changeloggingEnabled, changelogConfig) 43 | * }}} 44 | */ 45 | class CMSStoreBuilder[T: CMSHasher](val name: String, val serde: Serde[T]) extends StoreBuilder[CMSStore[T]] { 46 | 47 | var loggingEnabled = false 48 | var logConfig: util.Map[String, String] = new util.HashMap[String, String]() 49 | 50 | override def build(): CMSStore[T] = new CMSStore[T](name, loggingEnabled) 51 | 52 | override def withCachingEnabled(): StoreBuilder[CMSStore[T]] = 53 | throw new UnsupportedOperationException("caching not supported") 54 | 55 | override def withLoggingEnabled(config: util.Map[String, String]): CMSStoreBuilder[T] = { 56 | loggingEnabled = true 57 | logConfig.clear() 58 | logConfig.putAll(config) 59 | this 60 | } 61 | 62 | override def withLoggingDisabled(): CMSStoreBuilder[T] = { 63 | loggingEnabled = false 64 | logConfig.clear() 65 | this 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /src/test/scala/com/lightbend/kafka/scala/streams/algebird/CMSStoreChangeLogger.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright Confluent Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.lightbend.kafka.scala.streams 17 | package algebird 18 | 19 | import org.apache.kafka.streams.processor.ProcessorContext 20 | import org.apache.kafka.streams.processor.internals.{ProcessorStateManager, RecordCollector} 21 | import org.apache.kafka.streams.state.StateSerdes 22 | 23 | /** 24 | * Copied from Kafka's [[org.apache.kafka.streams.state.internals.StoreChangeLogger]]. 25 | * 26 | * If StoreChangeLogger had been public, we would have used it as-is. 27 | * 28 | * Note that the use of array-typed keys is discouraged because they result in incorrect caching 29 | * behavior. If you intend to work on byte arrays as key, for example, you may want to wrap them 30 | * with the [[org.apache.kafka.common.utils.Bytes]] class. 31 | */ 32 | class CMSStoreChangeLogger[K, V](val storeName: String, 33 | val context: ProcessorContext, 34 | val partition: Int, 35 | val serialization: StateSerdes[K, V]) { 36 | 37 | private val topic = ProcessorStateManager.storeChangelogTopic(context.applicationId, storeName) 38 | private val collector = context.asInstanceOf[RecordCollector.Supplier].recordCollector 39 | 40 | def this(storeName: String, context: ProcessorContext, serialization: StateSerdes[K, V]) = 41 | this(storeName, context, context.taskId.partition, serialization) 42 | 43 | def logChange(key: K, value: V, timestamp: Long) = 44 | if (collector != null) { 45 | val keySerializer = serialization.keySerializer 46 | val valueSerializer = serialization.valueSerializer 47 | collector.send(this.topic, key, value, this.partition, timestamp, keySerializer, valueSerializer) 48 | } 49 | 50 | } 51 | -------------------------------------------------------------------------------- /src/test/scala/com/lightbend/kafka/scala/streams/algebird/TopCMSSerde.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright Confluent Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.lightbend.kafka.scala.streams 17 | package algebird 18 | 19 | import java.util 20 | 21 | import com.twitter.algebird.TopCMS 22 | import com.twitter.chill.ScalaKryoInstantiator 23 | import org.apache.kafka.common.errors.SerializationException 24 | import org.apache.kafka.common.serialization._ 25 | 26 | class TopCMSSerializer[T] extends Serializer[TopCMS[T]] { 27 | 28 | override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = { 29 | // nothing to do 30 | } 31 | 32 | override def serialize(topic: String, cms: TopCMS[T]): Array[Byte] = 33 | if (cms == null) null // scalastyle:ignore 34 | else ScalaKryoInstantiator.defaultPool.toBytesWithClass(cms) 35 | 36 | override def close(): Unit = { 37 | // nothing to do 38 | } 39 | 40 | } 41 | 42 | class TopCMSDeserializer[T] extends Deserializer[TopCMS[T]] { 43 | 44 | override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = { 45 | // nothing to do 46 | } 47 | 48 | //scalastyle:off null 49 | override def deserialize(topic: String, bytes: Array[Byte]): TopCMS[T] = 50 | if (bytes == null) null 51 | else if (bytes.isEmpty) throw new SerializationException("byte array must not be empty") 52 | else ScalaKryoInstantiator.defaultPool.fromBytes(bytes).asInstanceOf[TopCMS[T]] 53 | //scalastyle:on null 54 | override def close(): Unit = { 55 | // nothing to do 56 | } 57 | 58 | } 59 | 60 | /** 61 | * A [[Serde]] for [[TopCMS]]. 62 | * 63 | * =Usage= 64 | * 65 | * {{{ 66 | * val anyTopic = "any-topic" 67 | * val cms: TopCMS[String] = ??? 68 | * val serde: Serde[TopCMS[String]] = TopCMSSerde[String] 69 | * 70 | * val bytes: Array[Byte] = serde.serializer().serialize(anyTopic, cms) 71 | * val restoredCms: TopCMS[String] = serde.deserializer().deserialize(anyTopic, bytes) 72 | * }}} 73 | * 74 | * =Future Work= 75 | * 76 | * We could perhaps be more efficient if we serialized not the full [[TopCMS]] instance but only 77 | * its relevant fields. 78 | */ 79 | object TopCMSSerde { 80 | 81 | def apply[T]: Serde[TopCMS[T]] = Serdes.serdeFrom(new TopCMSSerializer[T], new TopCMSDeserializer[T]) 82 | 83 | } 84 | --------------------------------------------------------------------------------