├── lib
    ├── project
    │   ├── build.properties
    │   ├── Versions.scala
    │   └── Dependencies.scala
    ├── src
    │   └── main
    │   │   └── scala
    │   │       └── com
    │   │           └── lightbend
    │   │               └── kafka
    │   │                   └── scala
    │   │                       ├── iq
    │   │                           ├── serializers
    │   │                           │   ├── ModelSerializer.scala
    │   │                           │   ├── SpecificAvroSerDeserializer.scala
    │   │                           │   ├── SpecificAvroSerde.scala
    │   │                           │   └── Serializers.scala
    │   │                           ├── http
    │   │                           │   ├── HttpRequester.scala
    │   │                           │   ├── InteractiveQueryHttpService.scala
    │   │                           │   └── KeyValueFetcher.scala
    │   │                           └── services
    │   │                           │   ├── MetadataService.scala
    │   │                           │   └── LocalStateStoreQuery.scala
    │   │                       └── package.scala
    ├── build.sbt
    └── README.md
├── examples
    ├── project
    │   ├── build.properties
    │   ├── plugins.sbt
    │   ├── Versions.scala
    │   ├── Dependencies.scala
    │   └── Common.scala
    ├── kafka-local-server
    │   └── src
    │   │   └── main
    │   │       └── scala
    │   │           └── com
    │   │               └── lightbend
    │   │                   └── kafka
    │   │                       └── scala
    │   │                           └── server
    │   │                               ├── RecordProcessorTrait.scala
    │   │                               ├── Utils.scala
    │   │                               ├── MessageSender.scala
    │   │                               ├── MessageListener.scala
    │   │                               └── KafkaLocalServer.scala
    ├── example-dsl
    │   ├── src
    │   │   └── main
    │   │   │   ├── scala
    │   │   │       └── com
    │   │   │       │   └── lightbend
    │   │   │       │       └── kafka
    │   │   │       │           └── scala
    │   │   │       │               └── iq
    │   │   │       │                   ├── example
    │   │   │       │                       ├── models
    │   │   │       │                       │   ├── LogRecord.scala
    │   │   │       │                       │   └── LogParseUtil.scala
    │   │   │       │                       ├── serializers
    │   │   │       │                       │   ├── Tuple2Serializer.scala
    │   │   │       │                       │   ├── SpecificAvroSerdeWithSchemaRegistry.scala
    │   │   │       │                       │   ├── SpecificAvroDeserializerWithSchemaRegistry.scala
    │   │   │       │                       │   ├── SpecificAvroSerializerWithSchemaRegistry.scala
    │   │   │       │                       │   └── AppSerializers.scala
    │   │   │       │                       ├── http
    │   │   │       │                       │   ├── SummaryInfoFetcher.scala
    │   │   │       │                       │   └── WeblogDSLHttpService.scala
    │   │   │       │                       ├── ingestion
    │   │   │       │                       │   └── DataIngestion.scala
    │   │   │       │                       ├── config
    │   │   │       │                       │   └── KStreamConfig.scala
    │   │   │       │                       ├── WeblogWorkflow.scala
    │   │   │       │                       └── WeblogProcessing.scala
    │   │   │       │                   └── package.scala
    │   │   │   └── resources
    │   │   │       ├── log4j.properties
    │   │   │       ├── com
    │   │   │           └── lightbend
    │   │   │           │   └── kafka
    │   │   │           │       └── scala
    │   │   │           │           └── iq
    │   │   │           │               └── example
    │   │   │           │                   └── LogRecord.avsc
    │   │   │       ├── logback-dsl.xml
    │   │   │       └── application-dsl.conf.template
    │   └── README.md
    ├── example-proc
    │   ├── src
    │   │   └── main
    │   │   │   ├── scala
    │   │   │       └── com
    │   │   │       │   └── lightbend
    │   │   │       │       └── kafka
    │   │   │       │           └── scala
    │   │   │       │               └── iq
    │   │   │       │                   ├── example
    │   │   │       │                       ├── models
    │   │   │       │                       │   ├── LogRecord.scala
    │   │   │       │                       │   └── LogParseUtil.scala
    │   │   │       │                       ├── serializers
    │   │   │       │                       │   ├── AppSerializers.scala
    │   │   │       │                       │   └── Tuple2Serializer.scala
    │   │   │       │                       ├── services
    │   │   │       │                       │   └── LocalStateStoreQuery.scala
    │   │   │       │                       ├── processor
    │   │   │       │                       │   ├── BFStoreSupplier.scala
    │   │   │       │                       │   ├── BFStoreType.scala
    │   │   │       │                       │   ├── BFStoreBuilder.scala
    │   │   │       │                       │   ├── BFStoreChangeLogger.scala
    │   │   │       │                       │   ├── WeblogProcessor.scala
    │   │   │       │                       │   ├── BFSerde.scala
    │   │   │       │                       │   └── BFStore.scala
    │   │   │       │                       ├── http
    │   │   │       │                       │   ├── WeblogProcHttpService.scala
    │   │   │       │                       │   └── BFValueFetcher.scala
    │   │   │       │                       ├── ingestion
    │   │   │       │                       │   └── DataIngestion.scala
    │   │   │       │                       ├── config
    │   │   │       │                       │   └── KStreamConfig.scala
    │   │   │       │                       ├── WeblogDriver.scala
    │   │   │       │                       └── WeblogWorkflow.scala
    │   │   │       │                   └── package.scala
    │   │   │   └── resources
    │   │   │       ├── log4j.properties
    │   │   │       ├── logback-proc.xml
    │   │   │       └── application-proc.conf.template
    │   └── README.md
    └── build.sbt
├── .travis.yml
├── .gitignore
├── README.md
└── LICENSE


/lib/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version=1.1.0
2 | 


--------------------------------------------------------------------------------
/examples/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version=1.1.0
2 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: scala
 2 | sudo: false
 3 | jdk: oraclejdk8
 4 | cache:
 5 |   directories:
 6 |   - "$HOME/.ivy2/cache"
 7 |   - "$HOME/.sbt/launchers"
 8 | before_cache:
 9 | - find $HOME/.sbt -name "*.lock" | xargs rm
10 | - find $HOME/.ivy2 -name "ivydata-*.properties" | xargs rm
11 | matrix:
12 |   include:
13 |   - env: PROJECT="lib"
14 | script:
15 | - cd "${PROJECT}"
16 | - sbt +test
17 | 


--------------------------------------------------------------------------------
/examples/kafka-local-server/src/main/scala/com/lightbend/kafka/scala/server/RecordProcessorTrait.scala:
--------------------------------------------------------------------------------
 1 | package com.lightbend.kafka.scala.server
 2 | 
 3 | import org.apache.kafka.clients.consumer.ConsumerRecord
 4 | 
 5 | // A trait, that should be implemented by any listener implementation
 6 | 
 7 | trait RecordProcessorTrait[K, V] {
 8 |   def processRecord(record: ConsumerRecord[K, V]): Unit
 9 | }
10 | 


--------------------------------------------------------------------------------
/examples/project/plugins.sbt:
--------------------------------------------------------------------------------
 1 | resolvers += "Bintray Repository" at "https://dl.bintray.com/shmishleniy/"
 2 | 
 3 | resolvers += "JAnalyse Repository" at "http://www.janalyse.fr/repository/"
 4 | 
 5 | addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.5")
 6 | addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.9.0")
 7 | addSbtPlugin("com.typesafe.sbt" % "sbt-native-packager" % "1.3.2")
 8 | 
 9 | addSbtPlugin("com.cavorite" % "sbt-avro-1-8" % "1.1.3")
10 | 
11 | 


--------------------------------------------------------------------------------
/examples/example-dsl/src/main/scala/com/lightbend/kafka/scala/iq/example/models/LogRecord.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
 3 |  */
 4 | 
 5 | package com.lightbend.kafka.scala.iq.example
 6 | package models
 7 | 
 8 | import java.time.OffsetDateTime
 9 | 
10 | case class LogRecord(
11 |   host: String, 
12 |   clientId: String, 
13 |   user: String, 
14 |   timestamp: OffsetDateTime, 
15 |   method: String,
16 |   endpoint: String, 
17 |   protocol: String, 
18 |   httpReplyCode: Int, 
19 |   payloadSize: Long
20 | )
21 | 


--------------------------------------------------------------------------------
/examples/example-proc/src/main/scala/com/lightbend/kafka/scala/iq/example/models/LogRecord.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
 3 |  */
 4 | 
 5 | package com.lightbend.kafka.scala.iq.example
 6 | package models
 7 | 
 8 | import java.time.OffsetDateTime
 9 | 
10 | case class LogRecord(
11 |   host: String, 
12 |   clientId: String, 
13 |   user: String, 
14 |   timestamp: OffsetDateTime, 
15 |   method: String,
16 |   endpoint: String, 
17 |   protocol: String, 
18 |   httpReplyCode: Int, 
19 |   payloadSize: Long
20 | )
21 | 


--------------------------------------------------------------------------------
/lib/project/Versions.scala:
--------------------------------------------------------------------------------
 1 | object Versions {
 2 |   val algebirdVersion = "0.13.0"
 3 |   val chillVersion = "0.9.2"
 4 |   val logbackVersion = "1.2.3"
 5 |   val kafkaVersion = "1.0.0"
 6 |   val scalaLoggingVersion = "3.5.0"
 7 |   val curatorVersion = "4.0.0"
 8 |   val minitestVersion = "2.0.0"
 9 |   val JDKVersion = "1.8"
10 |   val scalaVersion = "2.12.4"
11 |   val crossScalaVersions = Seq(scalaVersion, "2.11.11")
12 |   val circeVersion = "0.8.0"
13 |   val akkaVersion = "2.5.3"
14 |   val akkaHttpVersion = "10.0.11"
15 |   val akkaHttpCirceVersion = "1.17.0"
16 |   val bijectionVersion = "0.9.5"
17 | }
18 | 


--------------------------------------------------------------------------------
/examples/example-dsl/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
 1 | # Set root logger level to DEBUG and its only appender to A1.
 2 | log4j.rootLogger=ERROR, R
 3 | 
 4 | # A1 is set to be a ConsoleAppender.
 5 | log4j.appender.A1=org.apache.log4j.ConsoleAppender
 6 | 
 7 | log4j.appender.R=org.apache.log4j.RollingFileAppender
 8 | log4j.appender.R.File=logs/kafka-server.log
 9 | 
10 | log4j.appender.R.MaxFileSize=100KB
11 | # Keep one backup file
12 | log4j.appender.R.MaxBackupIndex=1
13 | 
14 | # A1 uses PatternLayout.
15 | log4j.appender.R.layout=org.apache.log4j.PatternLayout
16 | log4j.appender.R.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
17 | 


--------------------------------------------------------------------------------
/examples/example-proc/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
 1 | # Set root logger level to DEBUG and its only appender to A1.
 2 | log4j.rootLogger=ERROR, R
 3 | 
 4 | # A1 is set to be a ConsoleAppender.
 5 | log4j.appender.A1=org.apache.log4j.ConsoleAppender
 6 | 
 7 | log4j.appender.R=org.apache.log4j.RollingFileAppender
 8 | log4j.appender.R.File=logs/kafka-server.log
 9 | 
10 | log4j.appender.R.MaxFileSize=100KB
11 | # Keep one backup file
12 | log4j.appender.R.MaxBackupIndex=1
13 | 
14 | # A1 uses PatternLayout.
15 | log4j.appender.R.layout=org.apache.log4j.PatternLayout
16 | log4j.appender.R.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
17 | 


--------------------------------------------------------------------------------
/examples/example-proc/src/main/scala/com/lightbend/kafka/scala/iq/example/serializers/AppSerializers.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
 3 |  */
 4 | 
 5 | package com.lightbend.kafka.scala.iq.example
 6 | package serializers
 7 | 
 8 | import models.LogRecord
 9 | import org.apache.kafka.common.serialization.Serdes
10 | import com.lightbend.kafka.scala.iq.serializers._
11 | 
12 | trait AppSerializers extends Serializers {
13 |   final val ts = new Tuple2Serializer[String, String]()
14 |   final val ms = new ModelSerializer[LogRecord]()
15 |   final val logRecordSerde = Serdes.serdeFrom(ms, ms)
16 |   final val tuple2StringSerde = Serdes.serdeFrom(ts, ts)
17 | }
18 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.class
 2 | *.log
 3 | .cache
 4 | .history
 5 | .DS_Store
 6 | .lib/
 7 | app/*
 8 | dist/*
 9 | target/
10 | tmp/
11 | logs/
12 | build/
13 | lib_managed/
14 | src_managed/
15 | project/boot/
16 | project/target/
17 | project/project/
18 | project/plugins/project/
19 | #idea
20 | .idea
21 | *.iml
22 | .idea_modules
23 | *.json
24 | *.json--
25 | tmp/
26 | local_state_data/
27 | *.swp
28 | .scala_dependencies
29 | .worksheet
30 | release/staging/
31 | 
32 | lib/project/build.properties
33 | examples/project/build.properties
34 | examples/kafka-local-server/project/build.properties
35 | 
36 | examples/example-proc/src/main/resources/application-proc.conf
37 | examples/example-dsl/src/main/resources/application-dsl.conf
38 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # This library is not maintained anymore.
 2 | 
 3 | 
 4 | ### HTTP Endpoints for Interactive Queries for Kafka Streams
 5 | 
 6 | [![Build Status](https://secure.travis-ci.org/lightbend/kafka-streams-query.png)](http://travis-ci.org/lightbend/kafka-streams-query)
 7 | 
 8 | Library offering http based query on top of Kafka Streams Interactive Queries. The project has 2 parts:
 9 | 
10 | 1. The core library, as described [here](lib/README.md)
11 | 2. A couple of example implementations, as described [here](examples/example-dsl/README.md) and [here](examples/example-proc/README.md)
12 | 
13 | Please go through the above links to learn more about the library.
14 | 
15 | > **NOTE:** This functionality may now exist in Kafka Streams itself. Check its current capabilities before introducing this library to your projects.
16 | 


--------------------------------------------------------------------------------
/examples/project/Versions.scala:
--------------------------------------------------------------------------------
 1 | object Versions {
 2 |   val ksVersion = "0.1.2"
 3 |   val kqVersion = "0.1.1"
 4 |   val scala2_12Version = "2.12.4"
 5 |   val scala2_11Version = "2.11.11"
 6 |   val scalaVersion = scala2_12Version
 7 |   val crossScalaVersions = Seq(scala2_12Version, scala2_11Version)
 8 |   val algebirdVersion = "0.13.0"
 9 |   val chillVersion = "0.9.2"
10 |   val bijectionVersion = "0.9.5"
11 |   val alpakkaFileVersion = "0.16"
12 |   val reactiveKafkaVersion = "0.18"
13 |   val confluentPlatformVersion = "3.3.0"
14 |   val akkaVersion = "2.5.3"
15 |   val akkaHttpVersion = "10.0.11"
16 |   val akkaHttpCirceVersion = "1.17.0"
17 |   val circeVersion = "0.8.0"
18 |   val scalaLoggingVersion = "3.5.0"
19 |   val logbackVersion = "1.2.3"  
20 |   val curatorVersion = "4.0.0"
21 |   val kafkaVersion = "1.0.0"
22 | }
23 | 
24 | 


--------------------------------------------------------------------------------
/lib/src/main/scala/com/lightbend/kafka/scala/iq/serializers/ModelSerializer.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
 3 |  */
 4 | 
 5 | package com.lightbend.kafka.scala.iq
 6 | package serializers
 7 | 
 8 | import java.util.Map
 9 | 
10 | import io.circe._, io.circe.parser._, io.circe.syntax._
11 | 
12 | 
13 | class ModelSerializer[T : Encoder : Decoder] extends SerDeserializer[T] {
14 | 
15 |   override def configure(configs: Map[String, _], isKey: Boolean): Unit = ()
16 | 
17 |   override def serialize(topic: String, t: T): Array[Byte] =
18 |     t.asJson.noSpaces.getBytes(CHARSET)
19 | 
20 |   override def deserialize(topic: String, bytes: Array[Byte]): T =
21 |     decode[T](new String(bytes, CHARSET)) match {
22 |       case Right(t) => t
23 |       case Left(err) => throw new IllegalArgumentException(err.toString)
24 |     }
25 | 
26 |   override def close(): Unit = ()
27 | }
28 | 


--------------------------------------------------------------------------------
/lib/src/main/scala/com/lightbend/kafka/scala/iq/serializers/SpecificAvroSerDeserializer.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
 3 |  */
 4 | 
 5 | package com.lightbend.kafka.scala.iq
 6 | package serializers
 7 | 
 8 | import com.twitter.bijection.Injection
 9 | import org.apache.avro.specific.SpecificRecordBase
10 | import java.util.{Map => JMap}
11 | 
12 | import scala.util.Try
13 | 
14 | class SpecificAvroSerDeserializer[T <: SpecificRecordBase](injection: Injection[T, Array[Byte]]) extends SerDeserializer[T] {
15 |   val inverted: Array[Byte] => Try[T] = injection.invert _
16 | 
17 |   override def configure(configs: JMap[String, _], isKey: Boolean): Unit = ()
18 | 
19 |   override def serialize(topic: String, record: T): Array[Byte] =  injection(record)
20 | 
21 |   override def deserialize(s: String, bytes: Array[Byte]): T =  inverted(bytes).get
22 | 
23 |   override def close(): Unit = ()
24 | }
25 | 


--------------------------------------------------------------------------------
/examples/example-proc/src/main/scala/com/lightbend/kafka/scala/iq/example/services/LocalStateStoreQuery.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
 3 |  */
 4 | 
 5 | package com.lightbend.kafka.scala.iq.example
 6 | package services
 7 | 
 8 | import org.apache.kafka.streams.KafkaStreams
 9 | 
10 | import scala.concurrent.{Future, ExecutionContext}
11 | import akka.actor.ActorSystem
12 | 
13 | import processor.BFStoreType
14 | import com.twitter.algebird.Hash128
15 | 
16 | import com.lightbend.kafka.scala.iq.services.LocalStateStoreQuery
17 | 
18 | class AppStateStoreQuery[K, V] extends LocalStateStoreQuery[K, V] {
19 | 
20 |   def queryBFStore(streams: KafkaStreams, store: String, value: K)
21 |     (implicit ex: ExecutionContext, mk: Hash128[K], as: ActorSystem): Future[Boolean] = {
22 | 
23 |     val q = new BFStoreType[K]()(mk)
24 |     retry(streams.store(store, q), DelayBetweenRetries, MaxRetryCount)(ex, as.scheduler).map(_.read(value))
25 |   }
26 | }
27 | 


--------------------------------------------------------------------------------
/examples/example-dsl/src/main/resources/com/lightbend/kafka/scala/iq/example/LogRecord.avsc:
--------------------------------------------------------------------------------
 1 | {
 2 |     "namespace": "com.lightbend.kafka.scala.iq.example",
 3 |      "type": "record",
 4 |      "name": "LogRecordAvro",
 5 |      "fields":[
 6 |          {
 7 |             "name": "host", "type": "string"
 8 |          },
 9 |          {
10 |             "name": "clientId",  "type": "string"
11 |          },
12 |          {
13 |             "name": "user", "type": "string"
14 |          },
15 |          {
16 |             "name": "timestamp",  "type": "string"
17 |          },
18 |          {
19 |             "name": "method",  "type": "string"
20 |          },
21 |          {
22 |             "name": "endpoint",  "type": "string"
23 |          },
24 |          {
25 |             "name": "protocol",  "type": "string"
26 |          },
27 |          {
28 |             "name": "httpReplyCode",  "type": "int"
29 |          },
30 |          {
31 |             "name": "payloadSize",  "type": "long"
32 |          }
33 |      ]
34 | }
35 | 
36 | 


--------------------------------------------------------------------------------
/lib/src/main/scala/com/lightbend/kafka/scala/iq/serializers/SpecificAvroSerde.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
 3 |  */
 4 | 
 5 | package com.lightbend.kafka.scala.iq
 6 | package serializers
 7 | 
 8 | import org.apache.kafka.common.serialization.{ Deserializer, Serde, Serializer }
 9 | 
10 | import org.apache.avro.Schema
11 | 
12 | import com.twitter.bijection.Injection
13 | import com.twitter.bijection.avro.SpecificAvroCodecs
14 | 
15 | import java.util.Map
16 | 
17 | class SpecificAvroSerde[T <: org.apache.avro.specific.SpecificRecordBase](schema: Schema) extends Serde[T] {
18 | 
19 |   val recordInjection: Injection[T, Array[Byte]] = SpecificAvroCodecs.toBinary(schema)
20 |   val avroSerde = new SpecificAvroSerDeserializer(recordInjection)
21 | 
22 |   override def serializer(): Serializer[T] = avroSerde
23 | 
24 |   override def deserializer(): Deserializer[T] = avroSerde
25 | 
26 |   override def configure(configs: Map[String, _], isKey: Boolean): Unit = ()
27 | 
28 |   override def close(): Unit = ()
29 | }
30 | 


--------------------------------------------------------------------------------
/examples/example-dsl/src/main/scala/com/lightbend/kafka/scala/iq/example/serializers/Tuple2Serializer.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
 3 |  */
 4 | 
 5 | package com.lightbend.kafka.scala.iq.example
 6 | package serializers
 7 | 
 8 | import org.apache.kafka.common.serialization.{ Deserializer, Serializer }
 9 | 
10 | import io.circe._, io.circe.generic.auto._, io.circe.parser._, io.circe.syntax._
11 | 
12 | class Tuple2Serializer[T : Encoder : Decoder,
13 |                        U : Encoder : Decoder] extends Serializer[(T, U)] with Deserializer[(T, U)] {
14 | 
15 |   override def configure(configs: java.util.Map[String, _], isKey: Boolean) = {}
16 | 
17 |   override def serialize(topic: String, data: (T, U)) =
18 |     data.asJson.noSpaces.getBytes(CHARSET)
19 | 
20 |   override def deserialize(topic: String, bytes: Array[Byte]) = {
21 |     decode[(T, U)](new String(bytes, CHARSET)) match {
22 |       case Right(t) => t
23 |       case Left(err) => throw new IllegalArgumentException(err.toString)
24 |     }
25 |   }
26 | 
27 |   override def close() = {}
28 | }
29 | 
30 | 


--------------------------------------------------------------------------------
/examples/example-proc/src/main/scala/com/lightbend/kafka/scala/iq/example/serializers/Tuple2Serializer.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
 3 |  */
 4 | 
 5 | package com.lightbend.kafka.scala.iq.example
 6 | package serializers
 7 | 
 8 | import org.apache.kafka.common.serialization.{ Deserializer, Serializer }
 9 | 
10 | import io.circe._, io.circe.generic.auto._, io.circe.parser._, io.circe.syntax._
11 | 
12 | class Tuple2Serializer[T : Encoder : Decoder,
13 |                        U : Encoder : Decoder] extends Serializer[(T, U)] with Deserializer[(T, U)] {
14 | 
15 |   override def configure(configs: java.util.Map[String, _], isKey: Boolean) = {}
16 | 
17 |   override def serialize(topic: String, data: (T, U)) =
18 |     data.asJson.noSpaces.getBytes(CHARSET)
19 | 
20 |   override def deserialize(topic: String, bytes: Array[Byte]) = {
21 |     decode[(T, U)](new String(bytes, CHARSET)) match {
22 |       case Right(t) => t
23 |       case Left(err) => throw new IllegalArgumentException(err.toString)
24 |     }
25 |   }
26 | 
27 |   override def close() = {}
28 | }
29 | 
30 | 


--------------------------------------------------------------------------------
/examples/kafka-local-server/src/main/scala/com/lightbend/kafka/scala/server/Utils.scala:
--------------------------------------------------------------------------------
 1 | package com.lightbend.kafka.scala.server
 2 | 
 3 | 
 4 | import java.io.File
 5 | import java.nio.file.{ FileVisitOption, Files, Paths }
 6 | import java.util.Comparator
 7 | 
 8 | import scala.util.{ Try, Success, Failure }
 9 | import scala.collection.JavaConverters._
10 | 
11 | object Utils {
12 |   def deleteDirectory(directory: File): Try[Unit] = Try {
13 |     if (directory.exists()) {
14 |       val rootPath = Paths.get(directory.getAbsolutePath)
15 | 
16 |       val files = Files.walk(rootPath, FileVisitOption.FOLLOW_LINKS).sorted(Comparator.reverseOrder()).iterator().asScala
17 |       files.foreach(Files.delete)
18 |     } 
19 |   }
20 | 
21 |   def dataDirectory(baseDir: String, directoryName: String): Try[File] = Try {
22 | 
23 |     val dataDirectory = new File(baseDir + directoryName)
24 | 
25 |     if (dataDirectory.exists() && !dataDirectory.isDirectory())
26 |       throw new IllegalArgumentException(
27 |         s"Cannot use $directoryName as a directory name because a file with that name already exists in $dataDirectory.")
28 | 
29 |     dataDirectory
30 |   }
31 | }
32 | 


--------------------------------------------------------------------------------
/examples/example-dsl/src/main/scala/com/lightbend/kafka/scala/iq/example/serializers/SpecificAvroSerdeWithSchemaRegistry.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
 3 |  */
 4 | 
 5 | package com.lightbend.kafka.scala.iq.example
 6 | package serializers
 7 | 
 8 | import org.apache.kafka.common.serialization.{ Deserializer, Serde, Serdes, Serializer }
 9 | 
10 | import java.util.Map
11 | 
12 | class SpecificAvroSerdeWithSchemaRegistry[T <: org.apache.avro.specific.SpecificRecord] extends Serde[T] {
13 | 
14 |   val inner: Serde[T] = Serdes.serdeFrom(new SpecificAvroSerializerWithSchemaRegistry[T](), new SpecificAvroDeserializerWithSchemaRegistry[T]()) 
15 | 
16 |   override def serializer(): Serializer[T] = inner.serializer()
17 | 
18 |   override def deserializer(): Deserializer[T] = inner.deserializer()
19 | 
20 |   override def configure(configs: Map[String, _], isKey: Boolean): Unit = {
21 |     inner.serializer().configure(configs, isKey)
22 |     inner.deserializer().configure(configs, isKey)
23 |   }
24 | 
25 |   override def close(): Unit = {
26 |     inner.serializer().close()
27 |     inner.deserializer().close()
28 |   }
29 | }
30 | 


--------------------------------------------------------------------------------
/examples/example-proc/src/main/scala/com/lightbend/kafka/scala/iq/example/processor/BFStoreSupplier.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
 3 |  */
 4 | 
 5 | package com.lightbend.kafka.scala.iq.example
 6 | package processor
 7 | 
 8 | import org.apache.kafka.common.serialization.Serde
 9 | import org.apache.kafka.streams.state.StoreSupplier
10 | import com.twitter.algebird.Hash128
11 | 
12 | class BFStoreSupplier[T: Hash128](val name: String,
13 |                                   val serde: Serde[T],
14 |                                   val loggingEnabled: Boolean,
15 |                                   val logConfig: java.util.Map[String, String]) extends StoreSupplier[BFStore[T]] {
16 | 
17 |   def this(name: String, serde: Serde[T]) {
18 |     this(name, serde, true, new java.util.HashMap[String, String])
19 |   }
20 | 
21 |   def this(name: String, serde: Serde[T], loggingEnabled: Boolean) {
22 |     this(name, serde, loggingEnabled, new java.util.HashMap[String, String])
23 |   }
24 | 
25 |   override def get(): BFStore[T] = new BFStore[T](name, width = 1048576)
26 | 
27 |   override def metricsScope(): String = ""
28 | 
29 | }
30 | 
31 | 


--------------------------------------------------------------------------------
/examples/example-proc/src/main/scala/com/lightbend/kafka/scala/iq/example/http/WeblogProcHttpService.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
 3 |  */
 4 | 
 5 | package com.lightbend.kafka.scala.iq.example
 6 | package http
 7 | 
 8 | import akka.actor.ActorSystem
 9 | 
10 | import akka.stream.ActorMaterializer
11 | 
12 | import io.circe.syntax._
13 | 
14 | import org.apache.kafka.streams.state.HostInfo
15 | 
16 | import scala.concurrent.ExecutionContext
17 | import com.lightbend.kafka.scala.iq.http.InteractiveQueryHttpService
18 | 
19 | 
20 | class WeblogProcHttpService(
21 |   hostInfo: HostInfo, 
22 |   bfValueFetcher: BFValueFetcher,
23 |   actorSystem: ActorSystem,
24 |   actorMaterializer: ActorMaterializer,
25 |   ec: ExecutionContext
26 | ) extends InteractiveQueryHttpService(hostInfo, actorSystem, actorMaterializer, ec) { 
27 | 
28 |   // define the routes
29 |   val routes = handleExceptions(myExceptionHandler) {
30 |     pathPrefix("weblog") {
31 |       (get & pathPrefix("access" / "check") & path(Segment)) { hostKey =>
32 |         complete {
33 |           bfValueFetcher.checkIfPresent(hostKey).map(_.asJson)
34 |         }
35 |       }
36 |     }
37 |   }
38 | }
39 | 
40 | 


--------------------------------------------------------------------------------
/examples/example-dsl/src/main/scala/com/lightbend/kafka/scala/iq/example/serializers/SpecificAvroDeserializerWithSchemaRegistry.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
 3 |  */
 4 | 
 5 | package com.lightbend.kafka.scala.iq.example
 6 | package serializers
 7 | 
 8 | import org.apache.kafka.common.serialization.Deserializer
 9 | 
10 | import scala.collection.JavaConverters._
11 | 
12 | import java.util.{ Map => JMap }
13 | 
14 | import io.confluent.kafka.serializers.KafkaAvroDeserializer
15 | 
16 | import io.confluent.kafka.serializers.KafkaAvroDeserializerConfig.SPECIFIC_AVRO_READER_CONFIG
17 | 
18 | class SpecificAvroDeserializerWithSchemaRegistry[T <: org.apache.avro.specific.SpecificRecord] extends Deserializer[T] {
19 | 
20 |   val inner: KafkaAvroDeserializer = new KafkaAvroDeserializer()
21 | 
22 |   override def configure(configs: JMap[String, _], isKey: Boolean): Unit = {
23 |     val effectiveConfigs = Map(SPECIFIC_AVRO_READER_CONFIG -> true) ++ configs.asScala
24 |     inner.configure(effectiveConfigs.asJava, isKey)
25 |   }
26 | 
27 |   override def deserialize(s: String, bytes: Array[Byte]): T = inner.deserialize(s, bytes).asInstanceOf[T]
28 | 
29 |   override def close(): Unit = inner.close()
30 | }
31 | 


--------------------------------------------------------------------------------
/examples/example-dsl/src/main/resources/logback-dsl.xml:
--------------------------------------------------------------------------------
 1 | <configuration>                                                                
 2 |     <appender name="FILE" class="ch.qos.logback.core.FileAppender">            
 3 |         <file>logs/example-dsl.log</file>                                  
 4 |         <append>true</append>                                                  
 5 |         <encoder>                                                              
 6 |             <pattern>%d{HH:mm:ss.SSS} TKD [%thread] %-5level %logger{36} - %msg%n</pattern>
 7 |         </encoder>                                                             
 8 |     </appender>                                                                
 9 | 
10 |     <appender name="ASYNC" class="ch.qos.logback.classic.AsyncAppender">       
11 |         <appender-ref ref="FILE" />                                            
12 |     </appender>                                                                
13 | 
14 |     <root level="INFO">                                                       
15 |         <appender-ref ref="ASYNC" />                                           
16 |     </root>                                                                    
17 | </configuration> 
18 | 
19 | 


--------------------------------------------------------------------------------
/examples/example-proc/src/main/resources/logback-proc.xml:
--------------------------------------------------------------------------------
 1 | <configuration>                                                                
 2 |     <appender name="FILE" class="ch.qos.logback.core.FileAppender">            
 3 |         <file>logs/example-proc.log</file>                                  
 4 |         <append>true</append>                                                  
 5 |         <encoder>                                                              
 6 |             <pattern>%d{HH:mm:ss.SSS} TKD [%thread] %-5level %logger{36} - %msg%n</pattern>
 7 |         </encoder>                                                             
 8 |     </appender>                                                                
 9 | 
10 |     <appender name="ASYNC" class="ch.qos.logback.classic.AsyncAppender">       
11 |         <appender-ref ref="FILE" />                                            
12 |     </appender>                                                                
13 | 
14 |     <root level="INFO">                                                       
15 |         <appender-ref ref="ASYNC" />                                           
16 |     </root>                                                                    
17 | </configuration> 
18 | 
19 | 


--------------------------------------------------------------------------------
/examples/example-dsl/src/main/scala/com/lightbend/kafka/scala/iq/example/serializers/SpecificAvroSerializerWithSchemaRegistry.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
 3 |  */
 4 | 
 5 | package com.lightbend.kafka.scala.iq.example
 6 | package serializers
 7 | 
 8 | import org.apache.kafka.common.serialization.Serializer
 9 | 
10 | import scala.collection.JavaConverters._
11 | import scala.collection.immutable.Map
12 | 
13 | import java.util.{ Map => JMap }
14 | 
15 | import io.confluent.kafka.serializers.KafkaAvroSerializer
16 | 
17 | import io.confluent.kafka.serializers.KafkaAvroDeserializerConfig.SPECIFIC_AVRO_READER_CONFIG
18 | 
19 | class SpecificAvroSerializerWithSchemaRegistry[T <: org.apache.avro.specific.SpecificRecord] extends Serializer[T] {
20 | 
21 |   val inner: KafkaAvroSerializer = new KafkaAvroSerializer()
22 | 
23 |   override def configure(configs: JMap[String, _], isKey: Boolean): Unit = {
24 |     val effectiveConfigs = Map(SPECIFIC_AVRO_READER_CONFIG -> true) ++ configs.asScala
25 |     inner.configure(effectiveConfigs.asJava, isKey)
26 |   }
27 | 
28 |   override def serialize(topic: String, record: T): Array[Byte] =
29 |     inner.serialize(topic, record)
30 | 
31 |   override def close(): Unit = inner.close()
32 | }
33 | 


--------------------------------------------------------------------------------
/examples/example-dsl/src/main/scala/com/lightbend/kafka/scala/iq/example/models/LogParseUtil.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
 3 |  */
 4 | 
 5 | package com.lightbend.kafka.scala.iq.example
 6 | package models
 7 | 
 8 | import java.time.OffsetDateTime
 9 | import java.time.format.DateTimeFormatter
10 | 
11 | import scala.util.Try
12 | 
13 | object LogParseUtil {
14 |   final val logRegex = """^(\S+) (\S+) (\S+) \[([\w:/]+\s[+\-]\d{4})\] "(\S+) (\S+)\s*(\S*)" (\d{3}) (\S+)""".r
15 | 
16 |   def parseLine(line: String): Try[LogRecord] = Try {
17 |     logRegex.findFirstIn(line) match {
18 |       case Some(logRegex(host, clientId, user, timestamp, method, endpoint, protocol, httpReplyCode, bytes)) =>
19 |         LogRecord(host, clientId, user, parseTimestamp(timestamp), method, endpoint, protocol, httpReplyCode.toInt, toSafeInt(bytes))
20 |       case _ => throw new IllegalArgumentException(s"Cannot parse line $line")
21 |     }
22 |   }
23 | 
24 |   private def parseTimestamp(s: String): OffsetDateTime = {
25 |     val f = DateTimeFormatter.ofPattern("dd/MMM/yyyy:HH:mm:ss Z")
26 |     OffsetDateTime.from(f.parse(s))
27 |   }
28 | 
29 |   private def toSafeInt(s: String): Int = try {
30 |     s.toInt
31 |   } catch {
32 |     case _: Exception => 0
33 |   }
34 | }
35 | 


--------------------------------------------------------------------------------
/examples/example-proc/src/main/scala/com/lightbend/kafka/scala/iq/example/models/LogParseUtil.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
 3 |  */
 4 | 
 5 | package com.lightbend.kafka.scala.iq.example
 6 | package models
 7 | 
 8 | import java.time.OffsetDateTime
 9 | import java.time.format.DateTimeFormatter
10 | 
11 | import scala.util.Try
12 | 
13 | object LogParseUtil {
14 |   final val logRegex = """^(\S+) (\S+) (\S+) \[([\w:/]+\s[+\-]\d{4})\] "(\S+) (\S+)\s*(\S*)" (\d{3}) (\S+)""".r
15 | 
16 |   def parseLine(line: String): Try[LogRecord] = Try {
17 |     logRegex.findFirstIn(line) match {
18 |       case Some(logRegex(host, clientId, user, timestamp, method, endpoint, protocol, httpReplyCode, bytes)) =>
19 |         LogRecord(host, clientId, user, parseTimestamp(timestamp), method, endpoint, protocol, httpReplyCode.toInt, toSafeInt(bytes))
20 |       case _ => throw new IllegalArgumentException(s"Cannot parse line $line")
21 |     }
22 |   }
23 | 
24 |   private def parseTimestamp(s: String): OffsetDateTime = {
25 |     val f = DateTimeFormatter.ofPattern("dd/MMM/yyyy:HH:mm:ss Z")
26 |     OffsetDateTime.from(f.parse(s))
27 |   }
28 | 
29 |   private def toSafeInt(s: String): Int = try {
30 |     s.toInt
31 |   } catch {
32 |     case _: Exception => 0
33 |   }
34 | }
35 | 


--------------------------------------------------------------------------------
/examples/example-proc/src/main/scala/com/lightbend/kafka/scala/iq/example/processor/BFStoreType.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
 3 |  */
 4 | 
 5 | package com.lightbend.kafka.scala.iq.example
 6 | package processor
 7 | 
 8 | import org.apache.kafka.streams.state.QueryableStoreType
 9 | import org.apache.kafka.streams.processor.StateStore
10 | import org.apache.kafka.streams.state.internals.StateStoreProvider
11 | 
12 | import com.twitter.algebird.Hash128
13 | 
14 | import scala.collection.JavaConverters._
15 | 
16 | class BFStoreType[T: Hash128] extends QueryableStoreType[ReadableBFStore[T]] {
17 |   def accepts(stateStore: StateStore) = stateStore.isInstanceOf[BFStore[T]]
18 | 
19 |   def create(storeProvider: StateStoreProvider, storeName: String): BFStoreTypeWrapper[T] = 
20 |     new BFStoreTypeWrapper[T](storeProvider, storeName, this)
21 | }
22 | 
23 | class BFStoreTypeWrapper[T: Hash128](val provider: StateStoreProvider, val storeName: String, 
24 |   val bfStoreType: QueryableStoreType[ReadableBFStore[T]]) extends ReadableBFStore[T] {
25 | 
26 |   def read(value: T): Boolean = {
27 |     val stores: List[ReadableBFStore[T]] = provider.stores(storeName, bfStoreType).asScala.toList
28 |     stores.map(store => store.read(value)).exists(_ == true)
29 |   }
30 | }
31 | 


--------------------------------------------------------------------------------
/examples/example-proc/src/main/scala/com/lightbend/kafka/scala/iq/example/processor/BFStoreBuilder.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
 3 |  */
 4 | 
 5 | package com.lightbend.kafka.scala.iq.example
 6 | package processor
 7 | 
 8 | import org.apache.kafka.streams.state.StoreBuilder
 9 | import com.twitter.algebird.Hash128
10 | 
11 | class BFStoreBuilder[T: Hash128](val storeSupplier: BFStoreSupplier[T]) extends StoreBuilder[BFStore[T]] {
12 | 
13 |     override def name(): String = storeSupplier.name
14 | 
15 |     override def build(): BFStore[T] = storeSupplier.get()
16 | 
17 |     override def logConfig: java.util.Map[String, String] = storeSupplier.logConfig
18 | 
19 |     override def loggingEnabled(): Boolean = storeSupplier.loggingEnabled
20 | 
21 |     override def withCachingEnabled(): BFStoreBuilder[T] = this
22 | 
23 |     override def withLoggingDisabled(): BFStoreBuilder[T] = {
24 |       storeSupplier.logConfig.clear()
25 |       this
26 |     }
27 | 
28 |     override def withLoggingEnabled(config: java.util.Map[String, String]): BFStoreBuilder[T] = { 
29 |       new BFStoreBuilder[T](
30 |         new BFStoreSupplier(
31 |           storeSupplier.name, 
32 |           storeSupplier.serde, 
33 |           storeSupplier.loggingEnabled, 
34 |           config
35 |         )
36 |       )
37 |     }
38 | }
39 | 


--------------------------------------------------------------------------------
/lib/src/main/scala/com/lightbend/kafka/scala/package.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
 3 |  */
 4 | 
 5 | package com.lightbend.kafka.scala
 6 | 
 7 | import java.nio.charset.Charset
 8 | 
 9 | import scala.concurrent.duration._
10 | import scala.concurrent.ExecutionContext
11 | import scala.concurrent.Future
12 | import akka.pattern.after
13 | import akka.actor.Scheduler
14 | 
15 | package object iq {
16 |   final val CHARSET = Charset.forName("UTF-8")
17 | 
18 |   def translateHostInterface(host: String) = {
19 |     if (host == "0.0.0.0") {
20 |       java.net.InetAddress.getLocalHost.getHostAddress
21 |     } else {
22 |       host
23 |     }
24 |   }
25 | 
26 |   /**
27 |    * Given an operation that produces a T, returns a Future containing the result of T, unless an exception is thrown,
28 |    * in which case the operation will be retried after _delay_ time, if there are more possible retries, which is configured through
29 |    * the _retries_ parameter. If the operation does not succeed and there is no retries left, the resulting Future will 
30 |    * contain the last failure.
31 |    **/
32 |   // https://gist.github.com/viktorklang/9414163
33 |   def retry[T](op: => T, delay: FiniteDuration, retries: Int)(implicit ec: ExecutionContext, s: Scheduler): Future[T] =
34 |     Future(op) recoverWith { case _ if retries > 0 => after(delay, s)(retry(op, delay, retries - 1)) }
35 | }
36 | 


--------------------------------------------------------------------------------
/examples/example-proc/src/main/scala/com/lightbend/kafka/scala/iq/example/processor/BFStoreChangeLogger.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
 3 |  */
 4 | 
 5 | package com.lightbend.kafka.scala.iq.example
 6 | package processor
 7 | 
 8 | import org.apache.kafka.streams.processor.ProcessorContext
 9 | import org.apache.kafka.streams.processor.internals.{ProcessorStateManager, RecordCollector}
10 | import org.apache.kafka.streams.state.StateSerdes
11 | 
12 | class BFStoreChangeLogger[K, V](val storeName: String,
13 |                                 val context: ProcessorContext,
14 |                                 val partition: Int,
15 |                                 val serialization: StateSerdes[K, V]) {
16 | 
17 |   private val topic = ProcessorStateManager.storeChangelogTopic(context.applicationId, storeName)
18 |   private val collector = context.asInstanceOf[RecordCollector.Supplier].recordCollector
19 | 
20 |   def this(storeName: String, context: ProcessorContext, serialization: StateSerdes[K, V]) {
21 |     this(storeName, context, context.taskId.partition, serialization)
22 |   }
23 | 
24 |   def logChange(key: K, value: V): Unit = {
25 |     if (collector != null) {
26 |       val keySerializer = serialization.keySerializer
27 |       val valueSerializer = serialization.valueSerializer
28 |       collector.send(this.topic, key, value, this.partition, context.timestamp, keySerializer, valueSerializer)
29 |     }
30 |   }
31 | }
32 | 
33 | 


--------------------------------------------------------------------------------
/examples/example-dsl/src/main/scala/com/lightbend/kafka/scala/iq/example/serializers/AppSerializers.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
 3 |  */
 4 | 
 5 | package com.lightbend.kafka.scala.iq.example
 6 | package serializers
 7 | 
 8 | import models.LogRecord
 9 | import org.apache.kafka.common.serialization.Serdes
10 | import io.confluent.kafka.serializers.AbstractKafkaAvroSerDeConfig
11 | import com.lightbend.kafka.scala.iq.serializers._
12 | 
13 | trait AppSerializers extends Serializers {
14 |   final val ts = new Tuple2Serializer[String, String]()
15 |   final val ms = new ModelSerializer[LogRecord]()
16 |   final val logRecordSerde = Serdes.serdeFrom(ms, ms)
17 |   final val tuple2StringSerde = Serdes.serdeFrom(ts, ts)
18 | 
19 |   /**
20 |    * The Serde instance varies depending on whether we are using Schema Registry. If we are using
21 |    * schema registry, we use the serde provided by Confluent, else we use Avro serialization backed by
22 |    * Twitter's bijection library
23 |    */ 
24 |   def logRecordAvroSerde(maybeSchemaRegistryUrl: Option[String]) = maybeSchemaRegistryUrl.map { url =>
25 |     val serde = new SpecificAvroSerdeWithSchemaRegistry[LogRecordAvro]()
26 |     serde.configure(
27 |         java.util.Collections.singletonMap(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, url),
28 |         false)
29 |     serde
30 |   }.getOrElse {
31 |     new SpecificAvroSerde[LogRecordAvro](LogRecordAvro.SCHEMA$)
32 |   }
33 | }
34 | 


--------------------------------------------------------------------------------
/examples/example-proc/src/main/scala/com/lightbend/kafka/scala/iq/example/processor/WeblogProcessor.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
 3 |  */
 4 | 
 5 | package com.lightbend.kafka.scala.iq.example
 6 | package processor
 7 | 
 8 | import scala.util.{ Success, Failure }
 9 | import org.apache.kafka.streams.processor.{ AbstractProcessor, ProcessorContext, PunctuationType, Punctuator }
10 | import models.LogParseUtil
11 | import com.typesafe.scalalogging.LazyLogging
12 | 
13 | class WeblogProcessor extends AbstractProcessor[String, String] with LazyLogging {
14 |   private var bfStore: BFStore[String] = _ 
15 | 
16 |   override def init(context: ProcessorContext): Unit = {
17 |     super.init(context)
18 |     this.context.schedule(
19 |       1000, 
20 |       PunctuationType.WALL_CLOCK_TIME, 
21 |       new Punctuator() { 
22 |         override def punctuate(timestamp: Long): Unit = () 
23 |       }
24 |     )
25 |     bfStore = this.context.getStateStore(WeblogDriver.LOG_COUNT_STATE_STORE).asInstanceOf[BFStore[String]]
26 |   }
27 | 
28 |   override def process(dummy: String, record: String): Unit = LogParseUtil.parseLine(record) match {
29 |     case Success(r) => { 
30 |       bfStore + r.host
31 |       bfStore.changeLogger.logChange(bfStore.changelogKey, bfStore.bf)
32 |     }
33 |     case Failure(ex) => {
34 |       logger.warn(s"Error processing record $record .. skipping", ex)
35 |     }
36 |   }
37 | 
38 |   override def punctuate(timestamp: Long): Unit = super.punctuate(timestamp)
39 |   override def close(): Unit = {}
40 | }
41 | 


--------------------------------------------------------------------------------
/examples/example-proc/src/main/scala/com/lightbend/kafka/scala/iq/example/processor/BFSerde.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
 3 |  */
 4 | 
 5 | package com.lightbend.kafka.scala.iq.example
 6 | package processor
 7 | 
 8 | import java.util
 9 | 
10 | import com.twitter.algebird.BF
11 | import com.twitter.chill.ScalaKryoInstantiator
12 | import org.apache.kafka.common.errors.SerializationException
13 | import org.apache.kafka.common.serialization._
14 | 
15 | class BFSerializer[T] extends Serializer[BF[T]] {
16 | 
17 |   override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {
18 |     // nothing to do
19 |   }
20 | 
21 |   override def serialize(topic: String, bf: BF[T]): Array[Byte] =
22 |     if (bf == null) null
23 |     else ScalaKryoInstantiator.defaultPool.toBytesWithClass(bf)
24 | 
25 |   override def close(): Unit = {
26 |     // nothing to do
27 |   }
28 | 
29 | }
30 | 
31 | class BFDeserializer[T] extends Deserializer[BF[T]] {
32 | 
33 |   override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {
34 |     // nothing to do
35 |   }
36 | 
37 |   override def deserialize(topic: String, bytes: Array[Byte]): BF[T] =
38 |     if (bytes == null) null
39 |     else if (bytes.isEmpty) throw new SerializationException("byte array must not be empty")
40 |     else ScalaKryoInstantiator.defaultPool.fromBytes(bytes).asInstanceOf[BF[T]]
41 | 
42 |   override def close(): Unit = {
43 |     // nothing to do
44 |   }
45 | 
46 | }
47 | 
48 | object BFSerde {
49 | 
50 |   def apply[T]: Serde[BF[T]] = Serdes.serdeFrom(new BFSerializer[T], new BFDeserializer[T])
51 | 
52 | }
53 | 
54 | 


--------------------------------------------------------------------------------
/examples/example-proc/src/main/resources/application-proc.conf.template:
--------------------------------------------------------------------------------
 1 | akka {
 2 |   loglevel = INFO
 3 |   log-config-on-start = on
 4 |   loggers = ["akka.event.slf4j.Slf4jLogger"]
 5 |   logging-filter = "akka.event.slf4j.Slf4jLoggingFilter"
 6 |   event-handlers = ["akka.event.slf4j.Slf4jEventHandler"]
 7 | }
 8 | 
 9 | kafka {
10 |   # true if use local kafka server
11 |   # false otherwise
12 |   # if true, then setting of brokers below is ignored and set to that of KafkaLocalServer
13 |   localserver = true
14 | 
15 |   ## bootstrap servers for Kafka
16 |   brokers = "localhost:9092"
17 |   brokers = ${?KAFKA_BROKERS}
18 | 
19 |   ## consumer group
20 |   group = "group-proc"
21 |   group = ${?KAFKA_GROUP_PROC}
22 | 
23 |   ## the source topic - processing starts with
24 |   ## data in this topic (to be loaded by ingestion)
25 |   fromtopic = "server-log-proc"
26 |   fromtopic = ${?KAFKA_FROM_TOPIC_PROC}
27 | 
28 |   ## error topic for the initial processing
29 |   errortopic = "logerr-proc"
30 |   errortopic = ${?KAFKA_ERROR_TOPIC_PROC}
31 | 
32 |   ## folder where state stores are created by Kafka Streams
33 |   statestoredir = "/tmp/kafka-streams"
34 |   statestoredir = ${?STATESTOREDIR}
35 | 
36 |   ## settings for data ingestion
37 |   loader {
38 |     sourcetopic = ${kafka.fromtopic}
39 |     sourcetopic = ${?KAFKA_FROM_TOPIC_PROC}
40 | 
41 |     directorytowatch = "/Users/myhome/ClarkNet-HTTP"
42 |     directorytowatch = ${?DIRECTORY_TO_WATCH}
43 | 
44 |     pollinterval = 1 second
45 |   }
46 | }
47 | 
48 | # http endpoints of the weblog microservice
49 | http {
50 |   # The port the dashboard listens on
51 |   port = 7071
52 |   port = ${?PORT0}
53 | 
54 |   # The interface the dashboard listens on
55 |   interface = "localhost"
56 |   interface = ${?INTERFACE_PROC}
57 | }
58 | 
59 | 


--------------------------------------------------------------------------------
/lib/src/main/scala/com/lightbend/kafka/scala/iq/serializers/Serializers.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
 3 |  */
 4 | 
 5 | package com.lightbend.kafka.scala.iq
 6 | package serializers
 7 | 
 8 | import org.apache.kafka.streams.kstream.Windowed
 9 | import org.apache.kafka.common.serialization._
10 | import org.apache.kafka.streams.kstream.internals.{WindowedDeserializer, WindowedSerializer}
11 | 
12 | trait SerDeserializer[T] extends Serializer[T] with Deserializer[T]
13 | 
14 | trait Serializers {
15 |   final val stringSerializer = new StringSerializer()
16 |   final val stringDeserializer = new StringDeserializer()
17 |   final val byteArraySerializer = new ByteArraySerializer()
18 |   final val byteArrayDeserializer = new ByteArrayDeserializer()
19 | 
20 |   final val windowedStringSerializer: WindowedSerializer[String] = new WindowedSerializer[String](stringSerializer)
21 |   final val windowedStringDeserializer: WindowedDeserializer[String] = new WindowedDeserializer[String](stringDeserializer)
22 |   final val windowedStringSerde: Serde[Windowed[String]] = Serdes.serdeFrom(windowedStringSerializer, windowedStringDeserializer)
23 | 
24 |   final val windowedByteArraySerializer: WindowedSerializer[Array[Byte]] = new WindowedSerializer[Array[Byte]](byteArraySerializer)
25 |   final val windowedByteArrayDeserializer: WindowedDeserializer[Array[Byte]] = new WindowedDeserializer[Array[Byte]](byteArrayDeserializer)
26 |   final val windowedByteArraySerde: Serde[Windowed[Array[Byte]]] = Serdes.serdeFrom(windowedByteArraySerializer, windowedByteArrayDeserializer)
27 | 
28 |   final val stringSerde = Serdes.String()
29 |   final val longSerde: Serde[Long] = Serdes.Long().asInstanceOf[Serde[Long]]
30 |   final val byteArraySerde = Serdes.ByteArray()
31 | }
32 | 


--------------------------------------------------------------------------------
/lib/build.sbt:
--------------------------------------------------------------------------------
 1 | import Dependencies._
 2 | 
 3 | name := "kafka-streams-query"
 4 | 
 5 | organization := "com.lightbend"
 6 | 
 7 | version := "0.1.1"
 8 | 
 9 | scalaVersion := Versions.scalaVersion
10 | 
11 | crossScalaVersions := Versions.crossScalaVersions
12 | 
13 | scalacOptions := Seq("-Xexperimental", "-unchecked", "-deprecation", "-Ywarn-unused-import")
14 | 
15 | parallelExecution in Test := false
16 | 
17 | libraryDependencies ++= Seq(
18 |   kafkaStreams excludeAll(ExclusionRule("org.slf4j", "slf4j-log4j12"), ExclusionRule("org.apache.zookeeper", "zookeeper")),
19 |   scalaLogging,
20 |   circeCore,
21 |   circeGeneric,
22 |   circeParser,
23 |   akkaHttp,
24 |   akkaStreams,
25 |   akkaHttpCirce,
26 |   akkaSlf4j,
27 |   bijection
28 | )
29 | 
30 | licenses := Seq("Apache 2" -> new URL("http://www.apache.org/licenses/LICENSE-2.0.txt"))
31 | 
32 | developers := List(
33 |   Developer("debasishg", "Debasish Ghosh", "@debasishg", url("https://github.com/debasishg")),
34 |   Developer("blublinsky", "Boris Lublinsky", "@blublinsky", url("https://github.com/blublinsky")),
35 |   Developer("maasg", "Gerard Maas", "@maasg", url("https://github.com/maasg")),
36 |   Developer("seglo", "Sean Glover", "@seglo", url("https://github.com/seglo"))
37 | )
38 | 
39 | organizationName := "lightbend"
40 | 
41 | organizationHomepage := Some(url("http://lightbend.com/"))
42 | 
43 | homepage := scmInfo.value map (_.browseUrl)
44 | 
45 | scmInfo := Some(ScmInfo(url("https://github.com/lightbend/kafka-streams-query"), "git@github.com:lightbend/kafka-streams-query.git"))
46 | 
47 | credentials += Credentials(Path.userHome / ".ivy2" / ".credentials")
48 | 
49 | publishTo := {
50 |   val nexus = "https://oss.sonatype.org/"
51 |   if (isSnapshot.value) Some("snapshots" at nexus + "content/repositories/snapshots")
52 |   else Some("releases" at nexus + "service/local/staging/deploy/maven2")
53 | }
54 | 
55 | publishMavenStyle := true
56 | 
57 | publishArtifact in Test := false
58 | 


--------------------------------------------------------------------------------
/lib/project/Dependencies.scala:
--------------------------------------------------------------------------------
 1 | import sbt._
 2 | import Versions._
 3 | 
 4 | object Dependencies {
 5 | 
 6 |   implicit class Exclude(module: ModuleID) {
 7 |     def log4jExclude: ModuleID =
 8 |       module excludeAll(ExclusionRule("log4j"))
 9 | 
10 |     def driverExclusions: ModuleID =
11 |       module.log4jExclude.exclude("com.google.guava", "guava")
12 |         .excludeAll(ExclusionRule("org.slf4j"))
13 |   }
14 | 
15 |   val kafkaStreams    = "org.apache.kafka"                % "kafka-streams"     % kafkaVersion
16 |   val scalaLogging    = "com.typesafe.scala-logging"     %% "scala-logging"     % scalaLoggingVersion
17 |   val logback         = "ch.qos.logback"                  % "logback-classic"   % logbackVersion
18 |   val kafka           = "org.apache.kafka"               %% "kafka"             % kafkaVersion
19 |   val curator         = "org.apache.curator"              % "curator-test"      % curatorVersion
20 |   val minitest        = "io.monix"                       %% "minitest"          % minitestVersion
21 |   val minitestLaws    = "io.monix"                       %% "minitest-laws"     % minitestVersion
22 |   val algebird        = "com.twitter"                    %% "algebird-core"     % algebirdVersion 
23 |   val chill           = "com.twitter"                    %% "chill"             % chillVersion 
24 |   val circeCore       = "io.circe"                       %% "circe-core"        % circeVersion
25 |   val circeGeneric    = "io.circe"                       %% "circe-generic"     % circeVersion
26 |   val circeParser     = "io.circe"                       %% "circe-parser"      % circeVersion
27 |   val akkaSlf4j       = "com.typesafe.akka"              %% "akka-slf4j"        % akkaVersion
28 |   val akkaStreams     = "com.typesafe.akka"              %% "akka-stream"       % akkaVersion
29 |   val akkaHttp        = "com.typesafe.akka"              %% "akka-http"         % akkaHttpVersion
30 |   val akkaHttpCirce   = "de.heikoseeberger"              %% "akka-http-circe"   % akkaHttpCirceVersion
31 |   val bijection       = "com.twitter"                    %% "bijection-avro"    % bijectionVersion
32 | }
33 | 
34 | 
35 | 
36 | 


--------------------------------------------------------------------------------
/examples/example-dsl/src/main/scala/com/lightbend/kafka/scala/iq/package.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
 3 |  */
 4 | 
 5 | package com.lightbend.kafka.scala.iq
 6 | 
 7 | import java.nio.charset.Charset
 8 | import cats.syntax.either._
 9 | import java.time.OffsetDateTime
10 | import io.circe._, io.circe.generic.semiauto._
11 | import example.models.LogRecord
12 | 
13 | import scala.concurrent.duration._
14 | import scala.concurrent.ExecutionContext
15 | import scala.concurrent.Future
16 | import akka.pattern.after
17 | import akka.actor.Scheduler
18 | 
19 | package object example {
20 |   final val CHARSET = Charset.forName("UTF-8")
21 | 
22 |   implicit val encodeOffsetDateTime: Encoder[OffsetDateTime] = Encoder.encodeString.contramap[OffsetDateTime](_.toString)
23 | 
24 |   implicit val decodeInstant: Decoder[OffsetDateTime] = Decoder.decodeString.emap { str =>
25 |     Either.catchNonFatal(OffsetDateTime.parse(str)).leftMap(t => "OffsetDateTime")
26 |   }
27 |   
28 |   implicit val logRecordDecoder: Decoder[LogRecord] = deriveDecoder[LogRecord]
29 |   implicit val logRecordEncoder: Encoder[LogRecord] = deriveEncoder[LogRecord]
30 | 
31 |   implicit def asFiniteDuration(d: java.time.Duration) =
32 |     scala.concurrent.duration.Duration.fromNanos(d.toNanos)
33 | 
34 |   def translateHostInterface(host: String) = host match {
35 |     case "0.0.0.0" => java.net.InetAddress.getLocalHost.getHostAddress
36 |     case x => x
37 |   }
38 | 
39 |   /**
40 |    * Given an operation that produces a T, returns a Future containing the result of T, unless an exception is thrown,
41 |    * in which case the operation will be retried after _delay_ time, if there are more possible retries, which is configured through
42 |    * the _retries_ parameter. If the operation does not succeed and there is no retries left, the resulting Future will 
43 |    * contain the last failure.
44 |    **/
45 |   // https://gist.github.com/viktorklang/9414163
46 |   def retry[T](op: => T, delay: FiniteDuration, retries: Int)(implicit ec: ExecutionContext, s: Scheduler): Future[T] =
47 |     Future(op) recoverWith { case _ if retries > 0 => after(delay, s)(retry(op, delay, retries - 1)) }
48 | }
49 | 


--------------------------------------------------------------------------------
/examples/example-proc/src/main/scala/com/lightbend/kafka/scala/iq/package.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
 3 |  */
 4 | 
 5 | package com.lightbend.kafka.scala.iq
 6 | 
 7 | import java.nio.charset.Charset
 8 | import cats.syntax.either._
 9 | import java.time.OffsetDateTime
10 | import io.circe._, io.circe.generic.semiauto._
11 | import example.models.LogRecord
12 | 
13 | import scala.concurrent.duration._
14 | import scala.concurrent.ExecutionContext
15 | import scala.concurrent.Future
16 | import akka.pattern.after
17 | import akka.actor.Scheduler
18 | 
19 | package object example {
20 |   final val CHARSET = Charset.forName("UTF-8")
21 | 
22 |   implicit val encodeOffsetDateTime: Encoder[OffsetDateTime] = Encoder.encodeString.contramap[OffsetDateTime](_.toString)
23 | 
24 |   implicit val decodeInstant: Decoder[OffsetDateTime] = Decoder.decodeString.emap { str =>
25 |     Either.catchNonFatal(OffsetDateTime.parse(str)).leftMap(t => "OffsetDateTime")
26 |   }
27 |   
28 |   implicit val logRecordDecoder: Decoder[LogRecord] = deriveDecoder[LogRecord]
29 |   implicit val logRecordEncoder: Encoder[LogRecord] = deriveEncoder[LogRecord]
30 | 
31 |   implicit def asFiniteDuration(d: java.time.Duration) =
32 |     scala.concurrent.duration.Duration.fromNanos(d.toNanos)
33 | 
34 |   def translateHostInterface(host: String) = host match {
35 |     case "0.0.0.0" => java.net.InetAddress.getLocalHost.getHostAddress
36 |     case x => x
37 |   }
38 | 
39 |   /**
40 |    * Given an operation that produces a T, returns a Future containing the result of T, unless an exception is thrown,
41 |    * in which case the operation will be retried after _delay_ time, if there are more possible retries, which is configured through
42 |    * the _retries_ parameter. If the operation does not succeed and there is no retries left, the resulting Future will 
43 |    * contain the last failure.
44 |    **/
45 |   // https://gist.github.com/viktorklang/9414163
46 |   def retry[T](op: => T, delay: FiniteDuration, retries: Int)(implicit ec: ExecutionContext, s: Scheduler): Future[T] =
47 |     Future(op) recoverWith { case _ if retries > 0 => after(delay, s)(retry(op, delay, retries - 1)) }
48 | }
49 | 


--------------------------------------------------------------------------------
/lib/src/main/scala/com/lightbend/kafka/scala/iq/http/HttpRequester.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
 3 |  */
 4 | 
 5 | package com.lightbend.kafka.scala.iq
 6 | package http
 7 | 
 8 | import akka.actor.ActorSystem
 9 | import akka.http.scaladsl.Http
10 | 
11 | import akka.http.scaladsl.model.{ HttpResponse, HttpRequest, ResponseEntity }
12 | import akka.http.scaladsl.model.StatusCodes._
13 | import akka.http.scaladsl.unmarshalling.{ Unmarshal, Unmarshaller }
14 | 
15 | import akka.stream.ActorMaterializer
16 | 
17 | import scala.concurrent.{ Future, ExecutionContext}
18 | 
19 | import com.typesafe.scalalogging.LazyLogging
20 | import services.HostStoreInfo
21 | import java.io.IOException
22 | 
23 | /**
24 |  * Provides a generic API over HTTP to query from a host and a store. The result is
25 |  * returned as a Future.
26 |  */ 
27 | class HttpRequester(val actorSystem: ActorSystem, val mat: ActorMaterializer,
28 |                     val executionContext: ExecutionContext) extends LazyLogging {
29 | 
30 |   private implicit val as: ActorSystem = actorSystem
31 |   private implicit val mt: ActorMaterializer = mat
32 |   private implicit val ec: ExecutionContext = executionContext
33 | 
34 |   private def apiRequest(path: String, host: HostStoreInfo): Future[HttpResponse] =
35 |     Http().singleRequest(HttpRequest(uri = s"http://${host.host}:${host.port}$path"))
36 | 
37 |   def queryFromHost[V](host: HostStoreInfo, 
38 |     path: String)(implicit u: Unmarshaller[ResponseEntity, V]): Future[V] = {
39 |     apiRequest(path, host).flatMap { response =>
40 |       response.status match {
41 |         case OK          => Unmarshal(response.entity).to[V]
42 |          
43 |         case BadRequest  => {
44 |           logger.error(s"$path: incorrect path")
45 |           Future.failed(new IOException(s"$path: incorrect path"))
46 |         }
47 | 
48 |         case otherStatus => Unmarshal(response.entity).to[String].flatMap { entity =>
49 |           val error = s"state fetch request failed with status code ${otherStatus} and entity $entity"
50 |           logger.error(error)
51 |           Future.failed(new IOException(error))
52 |         }
53 |       }
54 |     }
55 |   }
56 | }
57 | 


--------------------------------------------------------------------------------
/examples/example-proc/src/main/scala/com/lightbend/kafka/scala/iq/example/http/BFValueFetcher.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
 3 |  */
 4 | 
 5 | package com.lightbend.kafka.scala.iq.example
 6 | package http
 7 | 
 8 | import akka.actor.ActorSystem
 9 | 
10 | import org.apache.kafka.streams.{ KafkaStreams }
11 | import org.apache.kafka.streams.state.HostInfo
12 | 
13 | import scala.concurrent.{ Future, ExecutionContext}
14 | import scala.util.{ Success, Failure }
15 | 
16 | import com.typesafe.scalalogging.LazyLogging
17 | import com.lightbend.kafka.scala.iq.services.{ MetadataService, HostStoreInfo }
18 | import services.AppStateStoreQuery
19 | import com.lightbend.kafka.scala.iq.http.HttpRequester
20 | import de.heikoseeberger.akkahttpcirce.FailFastCirceSupport
21 | import serializers.AppSerializers
22 | 
23 | class BFValueFetcher(
24 |   metadataService: MetadataService, 
25 |   localStateStoreQuery: AppStateStoreQuery[String, Long],
26 |   httpRequester: HttpRequester, 
27 |   streams: KafkaStreams, 
28 |   executionContext: ExecutionContext, 
29 |   hostInfo: HostInfo)(implicit actorSystem: ActorSystem) extends LazyLogging with FailFastCirceSupport with AppSerializers {
30 | 
31 |   private implicit val ec: ExecutionContext = executionContext
32 | 
33 |   def checkIfPresent(hostKey: String): Future[Boolean] = {
34 | 
35 |     val store = WeblogDriver.LOG_COUNT_STATE_STORE
36 |     val path = s"/weblog/access/check/$hostKey"
37 | 
38 |     metadataService.streamsMetadataForStoreAndKey(store, hostKey, stringSerializer) match {
39 |       case Success(host) => {
40 |         // hostKey is on another instance. call the other instance to fetch the data.
41 |         if (!thisHost(host)) {
42 |           logger.warn(s"Key $hostKey is on another instance not on ${translateHostInterface(hostInfo.host)}:${hostInfo.port} - requerying ..")
43 |           httpRequester.queryFromHost[Boolean](host, path)
44 |         } else {
45 |           // hostKey is on this instance
46 |           localStateStoreQuery.queryBFStore(streams, store, hostKey)
47 |         }
48 |       }
49 |       case Failure(ex) => Future.failed(ex)
50 |     }
51 |   }
52 | 
53 |   private def thisHost(host: HostStoreInfo): Boolean =
54 |     host.host.equals(translateHostInterface(hostInfo.host)) && host.port == hostInfo.port
55 | }
56 | 
57 | 
58 | 


--------------------------------------------------------------------------------
/examples/example-dsl/src/main/scala/com/lightbend/kafka/scala/iq/example/http/SummaryInfoFetcher.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
 3 |  */
 4 | 
 5 | package com.lightbend.kafka.scala.iq.example
 6 | package http
 7 | 
 8 | import com.lightbend.kafka.scala.iq.http.KeyValueFetcher
 9 | import scala.concurrent.Future
10 | 
11 | class SummaryInfoFetcher(kvf: KeyValueFetcher[String, Long]) {
12 |   def fetchAccessCountSummary(hostKey: String): Future[Long] =
13 |     kvf.fetch(hostKey, WeblogProcessing.ACCESS_COUNT_PER_HOST_STORE, "/weblog/access/" + hostKey)
14 | 
15 |   def fetchPayloadSizeSummary(hostKey: String): Future[Long] =
16 |     kvf.fetch(hostKey, WeblogProcessing.PAYLOAD_SIZE_PER_HOST_STORE, "/weblog/bytes/" + hostKey)
17 | 
18 |   def fetchRangeAccessCountSummary(fromKey: String, toKey: String): Future[List[(String, Long)]] =
19 |     kvf.fetchRange(fromKey, toKey, WeblogProcessing.ACCESS_COUNT_PER_HOST_STORE, "/weblog/access/range/")
20 | 
21 |   def fetchRangePayloadSizeSummary(fromKey: String, toKey: String): Future[List[(String, Long)]] =
22 |     kvf.fetchRange(fromKey, toKey, WeblogProcessing.PAYLOAD_SIZE_PER_HOST_STORE, "/weblog/bytes/range/")
23 | 
24 |   def fetchAllAccessCountSummary: Future[List[(String, Long)]] =
25 |     kvf.fetchAll(WeblogProcessing.ACCESS_COUNT_PER_HOST_STORE, "/weblog/access/ALL")
26 | 
27 |   def fetchAllPayloadSizeSummary: Future[List[(String, Long)]] =
28 |     kvf.fetchAll(WeblogProcessing.PAYLOAD_SIZE_PER_HOST_STORE, "/weblog/bytes/ALL")
29 | 
30 |   def fetchApproxAccessCountNumEntries: Future[Long] =
31 |     kvf.fetchApproxNumEntries(WeblogProcessing.ACCESS_COUNT_PER_HOST_STORE, "/weblog/access/COUNT")
32 | 
33 |   def fetchApproxPayloadNumEntries: Future[Long] =
34 |     kvf.fetchApproxNumEntries(WeblogProcessing.PAYLOAD_SIZE_PER_HOST_STORE, "/weblog/bytes/COUNT")
35 | 
36 |   def fetchWindowedAccessCountSummary(hostKey: String, fromTime: Long, toTime: Long): Future[List[(Long, Long)]] = 
37 |     kvf.fetchWindowed(hostKey, WeblogProcessing.WINDOWED_ACCESS_COUNT_PER_HOST_STORE, "/weblog/access/win/", fromTime, toTime) 
38 | 
39 |   def fetchWindowedPayloadSizeSummary(hostKey: String, fromTime: Long, toTime: Long): Future[List[(Long, Long)]] = 
40 |     kvf.fetchWindowed(hostKey, WeblogProcessing.WINDOWED_PAYLOAD_SIZE_PER_HOST_STORE, "/weblog/bytes/win/", fromTime, toTime) 
41 |   
42 | }
43 | 


--------------------------------------------------------------------------------
/examples/example-dsl/src/main/scala/com/lightbend/kafka/scala/iq/example/ingestion/DataIngestion.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
 3 |  */
 4 | 
 5 | package com.lightbend.kafka.scala.iq.example
 6 | package ingestion
 7 | 
 8 | import java.nio.file.{ Path, FileSystems }
 9 | 
10 | import akka.{ NotUsed, Done }
11 | import akka.util.ByteString
12 | import akka.actor.ActorSystem
13 | 
14 | import akka.stream.ActorMaterializer
15 | import akka.stream.scaladsl.{ Framing, Source }
16 | import akka.stream.alpakka.file.DirectoryChange._
17 | import akka.stream.alpakka.file.scaladsl._
18 | 
19 | import akka.kafka.ProducerSettings
20 | import akka.kafka.scaladsl.Producer
21 | 
22 | import org.apache.kafka.clients.producer.ProducerRecord
23 | 
24 | import scala.concurrent.duration._
25 | import scala.concurrent.Future
26 | 
27 | import config.KStreamConfig._
28 | import serializers.AppSerializers
29 | import com.typesafe.scalalogging.LazyLogging
30 | 
31 | object DataIngestion extends LazyLogging with AppSerializers {
32 |   def registerForIngestion(config: ConfigData)
33 |     (implicit system: ActorSystem, materializer: ActorMaterializer): Future[Done] = {
34 | 
35 |     val fs = FileSystems.getDefault
36 | 
37 |     config.directoryToWatch.map { dir =>
38 |       DirectoryChangesSource(fs.getPath(dir),
39 |         config.pollInterval, 
40 |         maxBufferSize = 1024).runForeach {
41 | 
42 |         case (path, _@(Creation | Modification)) => {
43 |           val _ = produce(path, config)
44 |           ()
45 |         }
46 |         case (_, Deletion) => ()
47 |       }
48 |     }.getOrElse(Future.failed(new IllegalArgumentException("No directoryToWatch set in data ingestion module")))
49 |   }
50 |    
51 |   private def produce(path: Path, config: ConfigData)
52 |     (implicit system: ActorSystem, materializer: ActorMaterializer): NotUsed = {
53 | 
54 |     val MAX_CHUNK_SIZE = 25000
55 |     val POLLING_INTERVAL = 250 millis
56 | 
57 |     val producerSettings = ProducerSettings(system, byteArraySerde.serializer, stringSerializer).withBootstrapServers(config.brokers)
58 |     
59 |     val logLines: Source[String, NotUsed] =
60 |       FileTailSource(path, MAX_CHUNK_SIZE, 0, POLLING_INTERVAL)
61 |         .via(Framing.delimiter(ByteString.fromString("\n"), MAX_CHUNK_SIZE))
62 |         .map(_.utf8String)
63 | 
64 |     logLines
65 |       .map(new ProducerRecord[Array[Byte], String](config.sourceTopic, _))
66 |       .to(Producer.plainSink(producerSettings))
67 |       .run()
68 |   }
69 | }
70 | 


--------------------------------------------------------------------------------
/examples/example-proc/src/main/scala/com/lightbend/kafka/scala/iq/example/ingestion/DataIngestion.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
 3 |  */
 4 | 
 5 | package com.lightbend.kafka.scala.iq.example
 6 | package ingestion
 7 | 
 8 | import java.nio.file.{ Path, FileSystems }
 9 | 
10 | import akka.{ NotUsed, Done }
11 | import akka.util.ByteString
12 | import akka.actor.ActorSystem
13 | 
14 | import akka.stream.ActorMaterializer
15 | import akka.stream.scaladsl.{ Framing, Source }
16 | import akka.stream.alpakka.file.DirectoryChange._
17 | import akka.stream.alpakka.file.scaladsl._
18 | 
19 | import akka.kafka.ProducerSettings
20 | import akka.kafka.scaladsl.Producer
21 | 
22 | import org.apache.kafka.clients.producer.ProducerRecord
23 | 
24 | import scala.concurrent.duration._
25 | import scala.concurrent.Future
26 | 
27 | import config.KStreamConfig._
28 | import serializers.AppSerializers
29 | import com.typesafe.scalalogging.LazyLogging
30 | 
31 | object DataIngestion extends LazyLogging with AppSerializers {
32 |   def registerForIngestion(config: ConfigData)
33 |     (implicit system: ActorSystem, materializer: ActorMaterializer): Future[Done] = {
34 | 
35 |     val fs = FileSystems.getDefault
36 | 
37 |     config.directoryToWatch.map { dir =>
38 |       DirectoryChangesSource(fs.getPath(dir),
39 |         config.pollInterval, 
40 |         maxBufferSize = 1024).runForeach {
41 | 
42 |         case (path, _@(Creation | Modification)) => {
43 |           val _ = produce(path, config)
44 |           ()
45 |         }
46 |         case (_, Deletion) => ()
47 |       }
48 |     }.getOrElse(Future.failed(new IllegalArgumentException("No directoryToWatch set in data ingestion module")))
49 |   }
50 |    
51 |   private def produce(path: Path, config: ConfigData)
52 |     (implicit system: ActorSystem, materializer: ActorMaterializer): NotUsed = {
53 | 
54 |     val MAX_CHUNK_SIZE = 25000
55 |     val POLLING_INTERVAL = 250 millis
56 | 
57 |     val producerSettings = ProducerSettings(system, byteArraySerde.serializer, stringSerializer)
58 |       .withBootstrapServers(config.brokers)
59 |     
60 |     val logLines: Source[String, NotUsed] =
61 |       FileTailSource(path, MAX_CHUNK_SIZE, 0, POLLING_INTERVAL)
62 |         .via(Framing.delimiter(ByteString.fromString("\n"), MAX_CHUNK_SIZE))
63 |         .map(_.utf8String)
64 | 
65 |     logLines
66 |       .map(new ProducerRecord[Array[Byte], String](config.sourceTopic, _))
67 |       .to(Producer.plainSink(producerSettings))
68 |       .run()
69 |   }
70 | }
71 | 


--------------------------------------------------------------------------------
/lib/src/main/scala/com/lightbend/kafka/scala/iq/http/InteractiveQueryHttpService.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
 3 |  */
 4 | 
 5 | package com.lightbend.kafka.scala.iq
 6 | package http
 7 | 
 8 | import akka.actor.ActorSystem
 9 | 
10 | import akka.http.scaladsl.server.Directives
11 | import akka.http.scaladsl.Http
12 | 
13 | import akka.http.scaladsl.model.{ HttpRequest, HttpResponse }
14 | import akka.http.scaladsl.model.StatusCodes._
15 | import akka.http.scaladsl.server.ExceptionHandler
16 | import de.heikoseeberger.akkahttpcirce.FailFastCirceSupport
17 | 
18 | import akka.stream.ActorMaterializer
19 | import akka.stream.scaladsl.Flow
20 | 
21 | import org.apache.kafka.streams.state.HostInfo
22 | 
23 | import scala.concurrent.{ Future, ExecutionContext}
24 | import scala.util.{ Success, Failure }
25 | 
26 | import com.typesafe.scalalogging.LazyLogging
27 | 
28 | 
29 | /**
30 |  * The interactive http query service. Offers APIs to start and stop the service.
31 |  */ 
32 | abstract class InteractiveQueryHttpService(hostInfo: HostInfo,
33 |     actorSystem: ActorSystem,
34 |     actorMaterializer: ActorMaterializer,
35 |     ec: ExecutionContext)
36 |     extends Directives with FailFastCirceSupport with LazyLogging {
37 | 
38 |   implicit val _actorSystem = actorSystem
39 |   implicit val _actorMaterializer = actorMaterializer
40 |   implicit val _ec = ec
41 | 
42 |   val myExceptionHandler = ExceptionHandler {
43 |     case ex: Exception =>
44 |       extractUri { uri =>
45 |         logger.error(s"Request to $uri could not be handled normally", ex)
46 |         complete(HttpResponse(InternalServerError, entity = "Request Failed!"))
47 |       }
48 |   }
49 | 
50 |   // define the routes
51 |   val routes: Flow[HttpRequest, HttpResponse, Any]
52 |   var bindingFuture: Future[Http.ServerBinding] = _
53 | 
54 | 
55 |   // start the http server
56 |   def start(): Unit = {
57 |     bindingFuture = Http().bindAndHandle(routes, hostInfo.host, hostInfo.port)
58 | 
59 |     bindingFuture.onComplete {
60 |       case Success(serverBinding) =>
61 |         logger.info(s"Server bound to ${serverBinding.localAddress} ")
62 | 
63 |       case Failure(ex) =>
64 |         logger.error(s"Failed to bind to ${hostInfo.host}:${hostInfo.port}!", ex)
65 |         actorSystem.terminate()
66 |     }
67 |   }
68 | 
69 | 
70 |   // stop the http server
71 |   def stop(): Unit = {
72 |     logger.info("Stopping the http server")
73 |     bindingFuture
74 |       .flatMap(_.unbind())
75 |       .onComplete(_ => actorSystem.terminate())
76 |   }
77 | }
78 | 
79 | 


--------------------------------------------------------------------------------
/examples/example-dsl/src/main/resources/application-dsl.conf.template:
--------------------------------------------------------------------------------
 1 | akka {
 2 |   loglevel = INFO
 3 |   log-config-on-start = on
 4 |   loggers = ["akka.event.slf4j.Slf4jLogger"]
 5 |   logging-filter = "akka.event.slf4j.Slf4jLoggingFilter"
 6 |   event-handlers = ["akka.event.slf4j.Slf4jEventHandler"]
 7 | }
 8 | 
 9 | kafka {
10 |   # true if use local kafka server
11 |   # false otherwise
12 |   # if true, then setting of brokers below is ignored and set to that of KafkaLocalServer
13 |   localserver = true
14 | 
15 |   ## bootstrap servers for Kafka
16 |   brokers = "localhost:9092"
17 |   brokers = ${?KAFKA_BROKERS}
18 | 
19 |   ## consumer group
20 |   group = "group-dsl"
21 |   group = ${?KAFKA_GROUP_DSL}
22 | 
23 |   ## the source topic - processing starts with
24 |   ## data in this topic (to be loaded by ingestion)
25 |   fromtopic = "server-log-dsl"
26 |   fromtopic = ${?KAFKA_FROM_TOPIC_DSL}
27 | 
28 |   ## processed records goes here in json of LogRecord
29 |   totopic = "processed-log"
30 |   totopic = ${?KAFKA_TO_TOPIC_DSL}
31 | 
32 |   ## this gets the avro serialized data from totopic for processing by Kafka Connect
33 |   ## HDFS sink connector
34 |   avrotopic = "avro-topic"
35 |   avrotopic = ${?KAFKA_AVRO_TOPIC_DSL}
36 | 
37 |   ## summary access information gets pushed here
38 |   summaryaccesstopic = "summary-access-log"
39 |   summaryaccesstopic = ${?KAFKA_SUMMARY_ACCESS_TOPIC_DSL}
40 | 
41 |   ## windowed summary access information gets pushed here
42 |   windowedsummaryaccesstopic = "windowed-summary-access-log"
43 |   windowedsummaryaccesstopic = ${?KAFKA_WINDOWED_SUMMARY_ACCESS_TOPIC_DSL}
44 | 
45 |   ## summary payload information gets pushed here
46 |   summarypayloadtopic = "summary-payload-log"
47 |   summarypayloadtopic = ${?KAFKA_SUMMARY_PAYLOAD_TOPIC_DSL}
48 | 
49 |   ## windowed summary payload information gets pushed here
50 |   windowedsummarypayloadtopic = "windowed-summary-payload-log"
51 |   windowedsummarypayloadtopic = ${?KAFKA_WINDOWED_SUMMARY_PAYLOAD_TOPIC_DSL}
52 | 
53 |   ## error topic for the initial processing
54 |   errortopic = "logerr-dsl"
55 |   errortopic = ${?KAFKA_ERROR_TOPIC_DSL}
56 | 
57 |   # schemaregistryurl = "http://localhost:8081"
58 |   # schemaregistryurl = ${?SCHEMA_REGISTRY_URL}
59 | 
60 |   ## folder where state stores are created by Kafka Streams
61 |   statestoredir = "/tmp/kafka-streams"
62 |   statestoredir = ${?STATESTOREDIR}
63 | 
64 |   ## settings for data ingestion
65 |   loader {
66 |     sourcetopic = ${kafka.fromtopic}
67 |     sourcetopic = ${?KAFKA_FROM_TOPIC_DSL}
68 | 
69 |     directorytowatch = "/Users/myhome/ClarkNet-HTTP"
70 |     directorytowatch = ${?DIRECTORY_TO_WATCH}
71 | 
72 |     pollinterval = 1 second
73 |   }
74 | }
75 | 
76 | # http endpoints of the weblog microservice
77 | http {
78 |   # The port the dashboard listens on
79 |   port = 7070
80 |   port = ${?PORT0}
81 | 
82 |   # The interface the dashboard listens on
83 |   interface = "localhost"
84 |   interface = ${?INTERFACE_DSL}
85 | }
86 | 
87 | 


--------------------------------------------------------------------------------
/examples/kafka-local-server/src/main/scala/com/lightbend/kafka/scala/server/MessageSender.scala:
--------------------------------------------------------------------------------
 1 | package com.lightbend.kafka.scala.server
 2 | 
 3 | import org.apache.kafka.clients.producer.{ KafkaProducer, ProducerConfig, ProducerRecord, RecordMetadata }
 4 | import java.util.Properties
 5 | 
 6 | object MessageSender {
 7 |   private val ACKS_CONFIG = "all" // Blocking on the full commit of the record
 8 |   private val RETRIES_CONFIG = "1" // Number of retries on put
 9 |   private val BATCH_SIZE_CONFIG = "1024" // Buffers for unsent records for each partition - controlls batching
10 |   private val LINGER_MS_CONFIG = "1" // Timeout for more records to arive - controlls batching
11 | 
12 |   private val BUFFER_MEMORY_CONFIG = "1024000" // Controls the total amount of memory available to the producer for buffering. 
13 |                                                // If records are sent faster than they can be transmitted to the server then this 
14 |                                                // buffer space will be exhausted. When the buffer space is exhausted additional 
15 |                                                // send calls will block. The threshold for time to block is determined by max.block.ms 
16 |                                                // after which it throws a TimeoutException.
17 | 
18 |   def providerProperties(brokers: String, keySerializer: String, valueSerializer: String): Properties = {
19 |     val props = new Properties
20 |     props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers)
21 |     props.put(ProducerConfig.ACKS_CONFIG, ACKS_CONFIG)
22 |     props.put(ProducerConfig.RETRIES_CONFIG, RETRIES_CONFIG)
23 |     props.put(ProducerConfig.BATCH_SIZE_CONFIG, BATCH_SIZE_CONFIG)
24 |     props.put(ProducerConfig.LINGER_MS_CONFIG, LINGER_MS_CONFIG)
25 |     props.put(ProducerConfig.BUFFER_MEMORY_CONFIG, BUFFER_MEMORY_CONFIG)
26 |     props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, keySerializer)
27 |     props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, valueSerializer)
28 |     props
29 |   }
30 | 
31 |   def apply[K, V](brokers: String, keySerializer: String, valueSerializer: String): MessageSender[K, V] =
32 |     new MessageSender[K, V](brokers, keySerializer, valueSerializer)
33 | }
34 | 
35 | class MessageSender[K, V](val brokers: String, val keySerializer: String, val valueSerializer: String) {
36 | 
37 |   import MessageSender._
38 |   val producer = new KafkaProducer[K, V](providerProperties(brokers, keySerializer, valueSerializer))
39 | 
40 |   def writeKeyValue(topic: String, key: K, value: V): Unit = {
41 |     producer.send(new ProducerRecord[K, V](topic, key, value)).get
42 |     producer.flush()
43 |   }
44 | 
45 |   def writeValue(topic: String, value: V): Unit = {
46 |     producer.send(new ProducerRecord[K, V](topic, null.asInstanceOf[K], value)).get
47 |     producer.flush()
48 |   }
49 | 
50 |   def batchWriteValue(topic: String, batch: Seq[V]): Seq[RecordMetadata] = {
51 |     val result = batch.map(value => {
52 |       producer.send(new ProducerRecord[K, V](topic, null.asInstanceOf[K], value)).get})
53 |     producer.flush()
54 |     result
55 |   }
56 | 
57 |   def close(): Unit = {
58 |     producer.close()
59 |   }
60 | }
61 | 


--------------------------------------------------------------------------------
/examples/example-dsl/src/main/scala/com/lightbend/kafka/scala/iq/example/http/WeblogDSLHttpService.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
 3 |  */
 4 | 
 5 | package com.lightbend.kafka.scala.iq.example
 6 | package http
 7 | 
 8 | import akka.actor.ActorSystem
 9 | 
10 | import akka.stream.ActorMaterializer
11 | 
12 | import io.circe.generic.auto._
13 | import io.circe.syntax._
14 | 
15 | import org.apache.kafka.streams.state.HostInfo
16 | 
17 | import scala.concurrent.ExecutionContext
18 | import com.lightbend.kafka.scala.iq.http.InteractiveQueryHttpService
19 | 
20 | 
21 | class WeblogDSLHttpService(
22 |   hostInfo: HostInfo, 
23 |   summaryInfoFetcher: SummaryInfoFetcher,
24 |   actorSystem: ActorSystem,
25 |   actorMaterializer: ActorMaterializer,
26 |   ec: ExecutionContext
27 | ) extends InteractiveQueryHttpService(hostInfo, actorSystem, actorMaterializer, ec) { 
28 | 
29 | 
30 |   // define the routes
31 |   val routes = handleExceptions(myExceptionHandler) {
32 |     pathPrefix("weblog") {
33 |       (get & pathPrefix("access" / "win") & path(Segment)) { hostKey =>
34 |         complete {
35 |           summaryInfoFetcher.fetchWindowedAccessCountSummary(hostKey, 0, System.currentTimeMillis).map(_.asJson)
36 |         }
37 |       } ~
38 |       (get & pathPrefix("bytes" / "win") & path(Segment)) { hostKey =>
39 |         complete {
40 |           summaryInfoFetcher.fetchWindowedPayloadSizeSummary(hostKey, 0, System.currentTimeMillis).map(_.asJson)
41 |         }
42 |       } ~
43 |       (get & pathPrefix("access" / "win" / Segment / LongNumber / LongNumber)) { (hostKey, fromTime, toTime) =>
44 |         complete {
45 |           summaryInfoFetcher.fetchWindowedAccessCountSummary(hostKey, fromTime, toTime).map(_.asJson)
46 |         }
47 |       } ~
48 |       (get & pathPrefix("bytes" / "win" / Segment / LongNumber / LongNumber)) { (hostKey, fromTime, toTime) =>
49 |         complete {
50 |           summaryInfoFetcher.fetchWindowedPayloadSizeSummary(hostKey, fromTime, toTime).map(_.asJson)
51 |         }
52 |       } ~
53 |       (get & pathPrefix("access" / "range" / Segment / Segment)) { (fromKey, toKey) =>
54 |         complete {
55 |           summaryInfoFetcher.fetchRangeAccessCountSummary(fromKey, toKey).map(_.asJson)
56 |         }
57 |       } ~
58 |       (get & pathPrefix("bytes" / "range" / Segment / Segment)) { (fromKey, toKey) =>
59 |         complete {
60 |           summaryInfoFetcher.fetchRangePayloadSizeSummary(fromKey, toKey).map(_.asJson)
61 |         }
62 |       } ~
63 |       (get & pathPrefix("access") & path(Segment)) { hostKey =>
64 |         complete {
65 |           if (hostKey == "ALL") summaryInfoFetcher.fetchAllAccessCountSummary.map(_.asJson)
66 |           else if (hostKey == "COUNT") summaryInfoFetcher.fetchApproxAccessCountNumEntries.map(_.asJson)
67 |           else summaryInfoFetcher.fetchAccessCountSummary(hostKey).map(_.asJson)
68 |         }
69 |       } ~
70 |       (get & pathPrefix("bytes") & path(Segment)) { hostKey =>
71 |         complete {
72 |           if (hostKey == "ALL") summaryInfoFetcher.fetchAllPayloadSizeSummary.map(_.asJson)
73 |           else if (hostKey == "COUNT") summaryInfoFetcher.fetchApproxPayloadNumEntries.map(_.asJson)
74 |           else summaryInfoFetcher.fetchPayloadSizeSummary(hostKey).map(_.asJson)
75 |         }
76 |       }
77 |     }
78 |   }
79 | }
80 | 


--------------------------------------------------------------------------------
/lib/src/main/scala/com/lightbend/kafka/scala/iq/services/MetadataService.scala:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
 3 |  */
 4 | 
 5 | /*
 6 |  * Copyright Confluent Inc.
 7 |  *
 8 |  * Licensed under the Apache License, Version 2.0 (the "License");
 9 |  * you may not use this file except in compliance with the License.
10 |  * You may obtain a copy of the License at
11 |  *
12 |  *    http://www.apache.org/licenses/LICENSE-2.0
13 |  *
14 |  * Unless required by applicable law or agreed to in writing, software
15 |  * distributed under the License is distributed on an "AS IS" BASIS,
16 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 |  * See the License for the specific language governing permissions and
18 |  * limitations under the License.
19 |  */
20 | 
21 | package com.lightbend.kafka.scala.iq
22 | package services
23 | 
24 | import org.apache.kafka.common.serialization.Serializer
25 | import org.apache.kafka.streams.KafkaStreams
26 | import org.apache.kafka.streams.state.StreamsMetadata
27 | 
28 | import scala.collection.JavaConverters._
29 | import scala.util.{Failure, Success, Try}
30 | import com.typesafe.scalalogging.LazyLogging
31 | 
32 | case class HostStoreInfo(host: String, port: Int, storeNames: Set[String])
33 | 
34 | /**
35 |  * Looks up StreamsMetadata from KafkaStreams 
36 |  * Adapted from https://github.com/confluentinc/kafka-streams-examples/blob/4.0.0-post/src/main/java/io/confluent/examples/streams/interactivequeries/MetadataService.java
37 |  */
38 | class MetadataService(val streams: KafkaStreams) extends LazyLogging {
39 | 
40 |   /**
41 |    * Get the metadata for all of the instances of this Kafka Streams application
42 |    * @return List of {@link HostStoreInfo}
43 |    */
44 |   def streamsMetadata(): List[HostStoreInfo] = {
45 |     // Get metadata for all of the instances of this Kafka Streams application
46 |     streams.allMetadata().asScala.toList.map(streamsMetadataToHostStoreInfo)
47 |   }
48 | 
49 |   /**
50 |    * Get the metadata for all instances of this Kafka Streams application that currently
51 |    * has the provided store.
52 |    * @param store   The store to locate
53 |    * @return  List of {@link HostStoreInfo}
54 |    */
55 |   def streamsMetadataForStore(store: String): List[HostStoreInfo] = {
56 |     // Get metadata for all of the instances of this Kafka Streams application hosting the store
57 |     streams.allMetadataForStore(store).asScala.toList.map(streamsMetadataToHostStoreInfo)
58 |   }
59 | 
60 |   /**
61 |    * Find the metadata for the instance of this Kafka Streams Application that has the given
62 |    * store and would have the given key if it exists.
63 |    * @param store   Store to find
64 |    * @param key     The key to find
65 |    * @return {@link HostStoreInfo}
66 |    */
67 |   def streamsMetadataForStoreAndKey[K](store: String, key: K, serializer: Serializer[K]): Try[HostStoreInfo] = {
68 |     // Get metadata for the instances of this Kafka Streams application hosting the store and
69 |     // potentially the value for key
70 |     logger.info(s"Finding streams metadata for $store, $key, $serializer")
71 |     streams.metadataForKey(store, key, serializer) match {
72 |       case null => Failure(new IllegalArgumentException(s"Metadata for key $key not found in $store"))
73 |       case metadata => Success(new HostStoreInfo(metadata.host, metadata.port, metadata.stateStoreNames.asScala.toSet))
74 |     }
75 |   }
76 | 
77 |   private[services] val streamsMetadataToHostStoreInfo: StreamsMetadata => HostStoreInfo = metadata => {
78 |     HostStoreInfo(metadata.host(), metadata.port(), metadata.stateStoreNames().asScala.toSet)
79 |   }
80 | }
81 | 


--------------------------------------------------------------------------------
/examples/project/Dependencies.scala:
--------------------------------------------------------------------------------
 1 | import sbt._
 2 | import Keys._
 3 | import Versions._
 4 | 
 5 | object Dependencies {
 6 | 
 7 |   object Common {
 8 |   
 9 |     val ks              = "com.lightbend"                %% "kafka-streams-scala"      % ksVersion exclude("org.slf4j", "slf4j-log4j12")
10 |     val kq              = "com.lightbend"                %% "kafka-streams-query"      % kqVersion exclude("org.slf4j", "slf4j-log4j12")
11 |     val alpakka         = "com.lightbend.akka"           %% "akka-stream-alpakka-file" % alpakkaFileVersion
12 |     val reactiveKafka   = "com.typesafe.akka"            %% "akka-stream-kafka"        % reactiveKafkaVersion
13 |     val akkaSlf4j       = "com.typesafe.akka"            %% "akka-slf4j"               % akkaVersion
14 |     val akkaStreams     = "com.typesafe.akka"            %% "akka-stream"              % akkaVersion
15 |     val akkaHttp        = "com.typesafe.akka"            %% "akka-http"                % akkaHttpVersion
16 |     val akkaHttpCirce   = "de.heikoseeberger"            %% "akka-http-circe"          % akkaHttpCirceVersion
17 |     val circeCore       = "io.circe"                     %% "circe-core"               % circeVersion
18 |     val circeGeneric    = "io.circe"                     %% "circe-generic"            % circeVersion
19 |     val circeParser     = "io.circe"                     %% "circe-parser"             % circeVersion
20 |     val logback         = "ch.qos.logback"                % "logback-classic"          % logbackVersion
21 |     val scalaLogging    = "com.typesafe.scala-logging"   %% "scala-logging"            % scalaLoggingVersion
22 |   }
23 | 
24 |   object Dsl {
25 |   
26 |     val bijection       = "com.twitter"                  %% "bijection-avro"           % bijectionVersion
27 |     val confluentAvro   = "io.confluent"                  % "kafka-avro-serializer"    % confluentPlatformVersion exclude("org.slf4j", "slf4j-log4j12")
28 |     val kafka           = "org.apache.kafka"             %% "kafka"                    % kafkaVersion excludeAll(ExclusionRule("org.slf4j", "slf4j-log4j12"), ExclusionRule("org.apache.zookeeper", "zookeeper")) 
29 |   }
30 | 
31 |   object Proc {
32 |     val algebird        = "com.twitter"                  %% "algebird-core"            % algebirdVersion 
33 |     val chill           = "com.twitter"                  %% "chill"                    % chillVersion 
34 |   }
35 | 
36 |   object Server {
37 |     val scalaLogging    = "com.typesafe.scala-logging"   %% "scala-logging"            % scalaLoggingVersion
38 |     val curator         = "org.apache.curator"            % "curator-test"             % curatorVersion
39 |     val kafkaStreams    = "org.apache.kafka"              % "kafka-streams"            % kafkaVersion
40 |     val kafka           = "org.apache.kafka"             %% "kafka"                    % kafkaVersion excludeAll(ExclusionRule("org.slf4j", "slf4j-log4j12"), ExclusionRule("org.apache.zookeeper", "zookeeper")) 
41 |   }
42 | 
43 |   val commonDependencies: Seq[ModuleID] = Seq(Common.ks,
44 |     Common.kq,
45 |     Common.alpakka,
46 |     Common.reactiveKafka,
47 |     Common.akkaSlf4j,
48 |     Common.akkaStreams,
49 |     Common.akkaHttp,
50 |     Common.akkaHttpCirce,
51 |     Common.circeCore,
52 |     Common.circeGeneric,
53 |     Common.circeParser,
54 |     Common.logback,
55 |     Common.scalaLogging
56 |   )
57 | 
58 |   val dslDependencies: Seq[ModuleID] = commonDependencies ++ Seq(Dsl.bijection,
59 |     Dsl.confluentAvro,
60 |     Dsl.kafka
61 |   )
62 | 
63 |   val procDependencies: Seq[ModuleID] = commonDependencies ++ Seq(Proc.algebird, Proc.chill)
64 |   val serverDependencies: Seq[ModuleID] = Seq(Server.scalaLogging, Server.curator, Server.kafkaStreams, Server.kafka) 
65 | }
66 | 


--------------------------------------------------------------------------------
/examples/example-proc/src/main/scala/com/lightbend/kafka/scala/iq/example/processor/BFStore.scala:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
  3 |  */
  4 | 
  5 | package com.lightbend.kafka.scala.iq.example
  6 | package processor
  7 | 
  8 | import com.twitter.algebird.{BloomFilterMonoid, BF, Hash128, Approximate}
  9 | import org.apache.kafka.common.serialization.Serdes
 10 | import org.apache.kafka.streams.processor.{ProcessorContext, StateStore}
 11 | import org.apache.kafka.streams.state.StateSerdes
 12 | 
 13 | /**
 14 |  * Bloom Filter as a StateStore. The only query it supports is membership.
 15 |  */ 
 16 | class BFStore[T: Hash128](override val name: String,
 17 |                           val loggingEnabled: Boolean = true,
 18 |                           val numHashes: Int = 6,
 19 |                           val width: Int = 32,
 20 |                           val seed: Int = 1) extends WriteableBFStore[T] with StateStore {
 21 | 
 22 |   private val bfMonoid = new BloomFilterMonoid[T](numHashes, width)
 23 | 
 24 |   /**
 25 |     * The "storage backend" of this store.
 26 |     *
 27 |     * Needs proper initializing in case the store's changelog is empty.
 28 |     */
 29 |   private[processor] var bf: BF[T] = bfMonoid.zero
 30 | 
 31 |   private[processor] var changeLogger: BFStoreChangeLogger[Integer, BF[T]] = _
 32 | 
 33 |   private[processor] val changelogKey = 42
 34 |   private final val ACCEPTABLE_PROBABILITY = 0.75
 35 | 
 36 |   private[processor] def bfFrom(items: Seq[T]): BF[T] = bfMonoid.create(items:_*)
 37 | 
 38 |   private[processor] def bfFrom(item: T): BF[T] = bfMonoid.create(item)
 39 | 
 40 |   @volatile private var open: Boolean = false
 41 | 
 42 |   /**
 43 |     * Initializes this store, including restoring the store's state from its changelog.
 44 |     */
 45 |   override def init(context: ProcessorContext, root: StateStore): Unit = {
 46 |     val serdes = new StateSerdes[Integer, BF[T]](
 47 |       name,
 48 |       Serdes.Integer(),
 49 |       BFSerde[T])
 50 | 
 51 |     changeLogger = new BFStoreChangeLogger[Integer, BF[T]](name, context, serdes)
 52 | 
 53 |     // Note: We must manually guard with `loggingEnabled` here because `context.register()` ignores
 54 |     // that parameter.
 55 |     if (root != null && loggingEnabled) {
 56 |       context.register(root, loggingEnabled, (_, value) => {
 57 |         if (value == null) {
 58 |           bf = bfMonoid.zero
 59 |         }
 60 |         else {
 61 |           bf = serdes.valueFrom(value)
 62 |         }
 63 |       })
 64 |     }
 65 | 
 66 |     open = true
 67 |   }
 68 | 
 69 |   def +(item: T): Unit = bf = bf + item
 70 | 
 71 |   def contains(item: T): Boolean = {
 72 |     val v = bf.contains(item)
 73 |     v.isTrue && v.withProb > ACCEPTABLE_PROBABILITY
 74 |   }
 75 | 
 76 |   def maybeContains(item: T): Boolean = bf.maybeContains(item)
 77 |   def size: Approximate[Long] = bf.size
 78 | 
 79 | 
 80 |   override val persistent: Boolean = false
 81 | 
 82 |   override def isOpen: Boolean = open
 83 | 
 84 |   /**
 85 |     * Periodically saves the latest BF state to Kafka.
 86 |     *
 87 |     * =Implementation detail=
 88 |     *
 89 |     * The changelog records have the form: (hardcodedKey, BF).  That is, we are backing up the
 90 |     * underlying CMS data structure in its entirety to Kafka.
 91 |     */
 92 |   override def flush(): Unit = {
 93 |     // if (loggingEnabled) {
 94 |       // changeLogger.logChange(changelogKey, bf)
 95 |     // }
 96 |   }
 97 | 
 98 |   override def close(): Unit = {
 99 |     open = false
100 |   }
101 | 
102 |   override def read(value: T): Boolean = contains(value)
103 | 
104 |   override def write(value: T): Unit = this + value
105 | 
106 | }
107 | 
108 | abstract class ReadableBFStore[T: Hash128] {
109 |   def read(value: T): Boolean
110 | }
111 | 
112 | abstract class WriteableBFStore[T: Hash128] extends ReadableBFStore[T] {
113 |   def write(value: T): Unit
114 | }
115 | 


--------------------------------------------------------------------------------
/examples/kafka-local-server/src/main/scala/com/lightbend/kafka/scala/server/MessageListener.scala:
--------------------------------------------------------------------------------
 1 | package com.lightbend.kafka.scala.server
 2 | 
 3 | import org.apache.kafka.clients.consumer.{ ConsumerConfig, KafkaConsumer }
 4 | import org.apache.kafka.streams.KeyValue
 5 | import scala.collection.JavaConverters._
 6 | import scala.collection.mutable.ListBuffer
 7 | 
 8 | 
 9 | object MessageListener {
10 |   private val AUTO_COMMIT_INTERVAL_MS_CONFIG = "1000" // Frequency of offset commits
11 |   private val SESSION_TIMEOUT_MS_CONFIG = "30000" // The timeout used to detect failures - should be greater then processing time
12 |   private val MAX_POLL_RECORDS_CONFIG = "50" // Max number of records consumed in a single poll
13 | 
14 |   def consumerProperties(brokers: String, group: String, keyDeserializer: String, valueDeserializer: String): Map[String, AnyRef] = {
15 |     Map[String, AnyRef](
16 |       ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> brokers,
17 |       ConsumerConfig.GROUP_ID_CONFIG -> group,
18 |       ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG -> "true",
19 |       ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG -> AUTO_COMMIT_INTERVAL_MS_CONFIG,
20 |       ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG -> SESSION_TIMEOUT_MS_CONFIG,
21 |       ConsumerConfig.MAX_POLL_RECORDS_CONFIG -> MAX_POLL_RECORDS_CONFIG,
22 |       ConsumerConfig.AUTO_OFFSET_RESET_CONFIG -> "latest",
23 |       ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG -> keyDeserializer,
24 |       ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG -> valueDeserializer
25 |     )
26 |   }
27 | 
28 |   def apply[K, V](brokers: String, topic: String, group: String, keyDeserializer: String, valueDeserializer: String,
29 |                   processor: RecordProcessorTrait[K, V]): MessageListener[K, V] =
30 |     new MessageListener[K, V](brokers, topic, group, keyDeserializer, valueDeserializer, processor)
31 | }
32 | 
33 | class MessageListener[K, V](
34 |   brokers: String, 
35 |   topic: String, 
36 |   group: String, 
37 |   keyDeserializer: String, 
38 |   valueDeserializer: String,
39 |   processor: RecordProcessorTrait[K, V]) {
40 | 
41 |   import MessageListener._
42 | 
43 |   def readKeyValues(maxMessages: Int): List[KeyValue[K, V]] = {
44 |     val pollIntervalMs = 100
45 |     val maxTotalPollTimeMs = 2000
46 |     var totalPollTimeMs = 0
47 | 
48 |     val consumer = new KafkaConsumer[K, V](consumerProperties(brokers, group, keyDeserializer, valueDeserializer).asJava)
49 |     consumer.subscribe(Seq(topic).asJava)
50 | 
51 |     val consumedValues = ListBuffer.empty[KeyValue[K, V]]
52 | 
53 |     while (totalPollTimeMs < maxTotalPollTimeMs && continueConsuming(consumedValues.size, maxMessages)) {
54 |       totalPollTimeMs = totalPollTimeMs + pollIntervalMs
55 |       val records = consumer.poll(pollIntervalMs)
56 |       records.asScala.foreach { record => 
57 |         processor.processRecord(record)
58 |         consumedValues += new KeyValue(record.key, record.value)
59 |       }
60 |     }
61 |     consumer.close()
62 |     consumedValues.toList
63 |   }
64 | 
65 |   def continueConsuming(messagesConsumed: Int, maxMessages: Int): Boolean = {
66 |     maxMessages <= 0 || messagesConsumed < maxMessages
67 |   }
68 | 
69 |   def waitUntilMinKeyValueRecordsReceived(expectedNumRecords: Int, waitTime: Long, 
70 |     startTime: Long = System.currentTimeMillis(), 
71 |     accumData: ListBuffer[KeyValue[K, V]] = ListBuffer.empty[KeyValue[K, V]]): List[KeyValue[K, V]] = {
72 | 
73 |     val readData = readKeyValues(-1)
74 |     accumData ++= readData
75 | 
76 |     if (accumData.size >= expectedNumRecords) accumData.toList
77 |     else if (System.currentTimeMillis() > startTime + waitTime) {
78 |       throw new AssertionError(
79 |         s"Expected $expectedNumRecords but received only ${accumData.size} records before timeout $waitTime ms")
80 |     } else {
81 |       Thread.sleep(Math.min(waitTime, 1000L))
82 |       waitUntilMinKeyValueRecordsReceived(expectedNumRecords, waitTime, startTime, accumData)
83 |     }
84 |   }
85 | }
86 | 


--------------------------------------------------------------------------------
/examples/example-proc/src/main/scala/com/lightbend/kafka/scala/iq/example/config/KStreamConfig.scala:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
  3 |  */
  4 | 
  5 | package com.lightbend.kafka.scala.iq.example
  6 | package config
  7 | 
  8 | import cats.data._
  9 | import cats.instances.all._
 10 | 
 11 | import scala.util.Try
 12 | import com.typesafe.config.Config
 13 | import scala.concurrent.duration._
 14 | import com.lightbend.kafka.scala.server._
 15 | 
 16 | 
 17 | /**
 18 |  * This object wraps the native Java config APIs into a monadic
 19 |  * interpreter
 20 |  */ 
 21 | object KStreamConfig {
 22 | 
 23 |   private[KStreamConfig] case class KafkaSettings(
 24 |     serverSettings: ServerSettings,
 25 |     topicSettings: TopicSettings
 26 |   )
 27 | 
 28 |   private[KStreamConfig] case class ServerSettings(
 29 |     localServer: Boolean,
 30 |     brokers: String, 
 31 |     stateStoreDir: String
 32 |   )
 33 | 
 34 |   private[KStreamConfig] case class TopicSettings(
 35 |     fromTopic: String, 
 36 |     errorTopic: String
 37 |   )
 38 | 
 39 |   private[KStreamConfig] case class HttpSettings(
 40 |     interface: String,
 41 |     port: Int
 42 |   )
 43 | 
 44 |   private[KStreamConfig] case class DataLoaderSettings(
 45 |     sourceTopic: String,
 46 |     directoryToWatch: Option[String],
 47 |     pollInterval: FiniteDuration
 48 |   )
 49 | 
 50 |   case class ConfigData(ks: KafkaSettings, hs: HttpSettings, dls: DataLoaderSettings) {
 51 |     def localServer = ks.serverSettings.localServer
 52 |     def brokers = ks.serverSettings.brokers
 53 |     def fromTopic = ks.topicSettings.fromTopic
 54 |     def errorTopic = ks.topicSettings.errorTopic
 55 |     def stateStoreDir = ks.serverSettings.stateStoreDir
 56 |     def httpInterface = hs.interface
 57 |     def httpPort = hs.port
 58 |     def sourceTopic = dls.sourceTopic
 59 |     def directoryToWatch = dls.directoryToWatch
 60 |     def pollInterval = dls.pollInterval
 61 |   }
 62 | 
 63 |   type ConfigReader[A] = ReaderT[Try, Config, A]
 64 | 
 65 |   private def getStringMaybe(config: Config, key: String): Option[String] = try {
 66 |     val str = config.getString(key)
 67 |     if (str.trim.isEmpty) None else Some(str)
 68 |   } catch {
 69 |     case _: Exception => None
 70 |   }
 71 | 
 72 |   private def fromKafkaConfig: ConfigReader[KafkaSettings] = Kleisli { (config: Config) =>
 73 |     Try {
 74 |       val local = config.getBoolean("kafka.localserver")
 75 |       val serverSettings = 
 76 |         if (local) {
 77 |           ServerSettings(
 78 |             local,
 79 |             s"localhost:${KafkaLocalServer.DefaultPort}",
 80 |             config.getString("kafka.statestoredir")
 81 |           )
 82 |         } else {
 83 |           ServerSettings(
 84 |             local,
 85 |             config.getString("kafka.brokers"),
 86 |             config.getString("kafka.statestoredir")
 87 |           )
 88 |         }
 89 |       KafkaSettings(
 90 |         serverSettings,
 91 |         TopicSettings(
 92 |           config.getString("kafka.fromtopic"),
 93 |           config.getString("kafka.errortopic")
 94 |         )
 95 |       )
 96 |     }
 97 |   }
 98 | 
 99 |   private def fromHttpConfig: ConfigReader[HttpSettings] = Kleisli { (config: Config) =>
100 |     Try {
101 |       HttpSettings(
102 |         config.getString("http.interface"),
103 |         config.getInt("http.port")
104 |       )
105 |     }
106 |   }
107 | 
108 |   private def fromDataLoaderConfig: ConfigReader[DataLoaderSettings] = Kleisli { (config: Config) =>
109 |     Try {
110 |       DataLoaderSettings(
111 |         config.getString("kafka.loader.sourcetopic"),
112 |         getStringMaybe(config, "kafka.loader.directorytowatch"),
113 |         config.getDuration("kafka.loader.pollinterval")
114 |       )
115 |     }
116 |   }
117 | 
118 |   def fromConfig: ConfigReader[ConfigData] = for {
119 |     k <- fromKafkaConfig
120 |     h <- fromHttpConfig
121 |     d <- fromDataLoaderConfig
122 |   } yield ConfigData(k, h, d)
123 | }
124 | 
125 | 


--------------------------------------------------------------------------------
/lib/src/main/scala/com/lightbend/kafka/scala/iq/services/LocalStateStoreQuery.scala:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
  3 |  */
  4 | 
  5 | package com.lightbend.kafka.scala.iq
  6 | package services
  7 | 
  8 | import org.apache.kafka.streams.KafkaStreams
  9 | import org.apache.kafka.streams.state.{QueryableStoreType, QueryableStoreTypes, ReadOnlyKeyValueStore, ReadOnlyWindowStore}
 10 | 
 11 | import scala.collection.JavaConverters._
 12 | import scala.concurrent.{ExecutionContext, Future}
 13 | import scala.concurrent.duration._
 14 | import com.typesafe.scalalogging.LazyLogging
 15 | import akka.actor.ActorSystem
 16 | 
 17 | /**
 18 |  * Abstraction that supports query from a local state store. The query supports retry semantics if 
 19 |  * invoked during Kafka Streams' rebalancing act when states may migrate across stores.
 20 |  */ 
 21 | class LocalStateStoreQuery[K, V] extends LazyLogging {
 22 | 
 23 |   final val MaxRetryCount = 10
 24 |   final val DelayBetweenRetries = 1.second
 25 | 
 26 |   /**
 27 |    * For all the following query methods, we need to implement a retry semantics when we invoke
 28 |    * `streams.store()`. This is because if the application is run in a distributed mode (multiple
 29 |    * instances), this function call can throw `InvalidStateStoreException` if state stores are being
 30 |    * migrated when the call is made. And migration is done when new instances of the application come up
 31 |    * or Kafka Streams does a rebalancing.
 32 |    *
 33 |    * In such cases we need to retry till the rebalancing is complete or we run out of retry count.
 34 |    */
 35 |   private def _retry[T](op: => T )(implicit ec: ExecutionContext, as: ActorSystem): Future[T] = {
 36 |     retry(op, DelayBetweenRetries, MaxRetryCount)(ec, as.scheduler)
 37 |   }
 38 | 
 39 |   /**
 40 |    * Query for a key
 41 |    */ 
 42 |   def queryStateStore(streams: KafkaStreams, store: String, key: K)
 43 |     (implicit ex: ExecutionContext, as: ActorSystem): Future[V] = {
 44 | 
 45 |     val q: QueryableStoreType[ReadOnlyKeyValueStore[K, V]] = QueryableStoreTypes.keyValueStore()
 46 |     _retry(streams.store(store, q)).map(_.get(key))
 47 |   }
 48 | 
 49 |   /**
 50 |    * Query all
 51 |    */ 
 52 |   def queryStateStoreForAll(streams: KafkaStreams, store: String)
 53 |     (implicit ex: ExecutionContext, as: ActorSystem): Future[List[(K, V)]] = {
 54 | 
 55 |     def fetchNClose(rs: ReadOnlyKeyValueStore[K, V]) = {
 56 |       val kvi = rs.all
 57 |       val kvs = kvi.asScala.toList.map(kv => (kv.key, kv.value))
 58 |       kvi.close()
 59 |       kvs
 60 |     }
 61 | 
 62 |     val q: QueryableStoreType[ReadOnlyKeyValueStore[K, V]] = QueryableStoreTypes.keyValueStore()
 63 |     _retry(streams.store(store, q)).map(fetchNClose)
 64 |   }
 65 | 
 66 |   /**
 67 |    * Query for a range of keys
 68 |    */ 
 69 |   def queryStateStoreForRange(streams: KafkaStreams, store: String, fromKey: K, toKey: K)
 70 |     (implicit ex: ExecutionContext, as: ActorSystem): Future[List[(K, V)]] = {
 71 | 
 72 |     def fetchNClose(rs: ReadOnlyKeyValueStore[K, V]) = {
 73 |       val kvi = rs.range(fromKey, toKey)
 74 |       val kvs = kvi.asScala.toList.map(kv => (kv.key, kv.value))
 75 |       kvi.close()
 76 |       kvs
 77 |     }
 78 | 
 79 |     val q: QueryableStoreType[ReadOnlyKeyValueStore[K, V]] = QueryableStoreTypes.keyValueStore()
 80 |     _retry(streams.store(store, q)).map(fetchNClose)
 81 |   }
 82 | 
 83 |   /**
 84 |    * Query approximate num entries
 85 |    */ 
 86 |   def queryStateStoreForApproxNumEntries(streams: KafkaStreams, store: String)
 87 |     (implicit ex: ExecutionContext, as: ActorSystem): Future[Long] = {
 88 | 
 89 |     val q: QueryableStoreType[ReadOnlyKeyValueStore[K, V]] = QueryableStoreTypes.keyValueStore()
 90 |     _retry(streams.store(store, q)).map(_.approximateNumEntries)
 91 |   }
 92 | 
 93 |   /**
 94 |    * Query for a window
 95 |    */
 96 |   def queryWindowedStateStore(streams: KafkaStreams, store: String, key: K, fromTime: Long, toTime: Long)
 97 |     (implicit ex: ExecutionContext, as: ActorSystem): Future[List[(Long, V)]] = {
 98 | 
 99 |     val q: QueryableStoreType[ReadOnlyWindowStore[K, V]] = QueryableStoreTypes.windowStore()
100 | 
101 |     _retry(streams.store(store, q)).map(
102 |       _.fetch(key, fromTime, toTime)
103 |        .asScala
104 |        .toList
105 |        .map(kv => (Long2long(kv.key), kv.value)))
106 |   }
107 | }
108 | 


--------------------------------------------------------------------------------
/examples/project/Common.scala:
--------------------------------------------------------------------------------
 1 | import sbt._
 2 | import Keys._
 3 | 
 4 | object Common {
 5 | 
 6 |   val settings: Seq[Def.Setting[_]] = Seq(
 7 |     resolvers += "confluent" at "http://packages.confluent.io/maven/",
 8 |     scalaVersion := Versions.scalaVersion,
 9 |     scalacOptions ++= Seq(
10 |       "-deprecation",                      // Emit warning and location for usages of deprecated APIs.
11 |       "-encoding", "utf-8",                // Specify character encoding used by source files.
12 |       "-explaintypes",                     // Explain type errors in more detail.
13 |       "-feature",                          // Emit warning and location for usages of features that should be imported explicitly.
14 |       "-language:existentials",            // Existential types (besides wildcard types) can be written and inferred
15 |       "-language:experimental.macros",     // Allow macro definition (besides implementation and application)
16 |       "-language:higherKinds",             // Allow higher-kinded types
17 |       "-language:implicitConversions",     // Allow definition of implicit functions called views
18 |       "-language:postfixOps",              // Allow postfix operator
19 |       "-unchecked",                        // Enable additional warnings where generated code depends on assumptions.
20 |       "-Xcheckinit",                       // Wrap field accessors to throw an exception on uninitialized access.
21 |       "-Xfatal-warnings",                  // Fail the compilation if there are any warnings.
22 |       "-Xfuture",                          // Turn on future language features.
23 |       "-Xlint:adapted-args",               // Warn if an argument list is modified to match the receiver.
24 |       "-Xlint:by-name-right-associative",  // By-name parameter of right associative operator.
25 |       "-Xlint:constant",                   // Evaluation of a constant arithmetic expression results in an error.
26 |       "-Xlint:delayedinit-select",         // Selecting member of DelayedInit.
27 |       "-Xlint:doc-detached",               // A Scaladoc comment appears to be detached from its element.
28 |       "-Xlint:inaccessible",               // Warn about inaccessible types in method signatures.
29 |       "-Xlint:infer-any",                  // Warn when a type argument is inferred to be `Any`.
30 |       "-Xlint:missing-interpolator",       // A string literal appears to be missing an interpolator id.
31 |       "-Xlint:nullary-override",           // Warn when non-nullary `def f()' overrides nullary `def f'.
32 |       "-Xlint:nullary-unit",               // Warn when nullary methods return Unit.
33 |       "-Xlint:option-implicit",            // Option.apply used implicit view.
34 |       "-Xlint:package-object-classes",     // Class or object defined in package object.
35 |       "-Xlint:poly-implicit-overload",     // Parameterized overloaded implicit methods are not visible as view bounds.
36 |       "-Xlint:private-shadow",             // A private field (or class parameter) shadows a superclass field.
37 |       "-Xlint:stars-align",                // Pattern sequence wildcard must align with sequence component.
38 |       "-Xlint:type-parameter-shadow",      // A local type parameter shadows a type already in scope.
39 |       "-Xlint:unsound-match",              // Pattern match may not be typesafe.
40 |       "-Yno-adapted-args",                 // Do not adapt an argument list (either by inserting () or creating a tuple) to match the receiver.
41 |       "-Ypartial-unification",             // Enable partial unification in type constructor inference
42 |       "-Ywarn-dead-code",                  // Warn when dead code is identified.
43 |       "-Ywarn-extra-implicit",             // Warn when more than one implicit parameter section is defined.
44 |       "-Ywarn-inaccessible",               // Warn about inaccessible types in method signatures.
45 |       "-Ywarn-infer-any",                  // Warn when a type argument is inferred to be `Any`.
46 |       "-Ywarn-nullary-override",           // Warn when non-nullary `def f()' overrides nullary `def f'.
47 |       "-Ywarn-nullary-unit",               // Warn when nullary methods return Unit.
48 |       "-Ywarn-unused:implicits",           // Warn if an implicit parameter is unused.
49 |       "-Ywarn-unused:locals",              // Warn if a local definition is unused.
50 |       "-Ywarn-unused:params",              // Warn if a value parameter is unused.
51 |       "-Ywarn-unused:patvars",             // Warn if a variable bound in a pattern is unused.
52 |       "-Ywarn-unused:privates",            // Warn if a private member is unused.
53 |       "-Ywarn-value-discard"               // Warn when non-Unit expression results are unused.
54 |     )
55 |   )
56 | }
57 | 


--------------------------------------------------------------------------------
/examples/example-proc/src/main/scala/com/lightbend/kafka/scala/iq/example/WeblogDriver.scala:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
  3 |  */
  4 | 
  5 | package com.lightbend.kafka.scala.iq.example
  6 | 
  7 | import java.util.Properties
  8 | import java.util.concurrent.Executors
  9 | 
 10 | import scala.concurrent.ExecutionContext
 11 | 
 12 | import akka.actor.ActorSystem
 13 | import akka.stream.ActorMaterializer
 14 | 
 15 | import org.apache.kafka.streams.Topology
 16 | import org.apache.kafka.streams.state.HostInfo
 17 | import org.apache.kafka.streams.{ StreamsConfig, KafkaStreams }
 18 | import org.apache.kafka.common.serialization.Serdes
 19 | import org.apache.kafka.clients.consumer.ConsumerConfig;
 20 | 
 21 | import config.KStreamConfig._
 22 | import http.{ WeblogProcHttpService, BFValueFetcher }
 23 | import services.AppStateStoreQuery
 24 | import processor.{ BFStoreSupplier, BFStoreBuilder, WeblogProcessor }
 25 | 
 26 | import com.lightbend.kafka.scala.iq.services.MetadataService
 27 | import com.lightbend.kafka.scala.iq.http.HttpRequester
 28 | 
 29 | object WeblogDriver extends WeblogWorkflow {
 30 | 
 31 |   final val LOG_COUNT_STATE_STORE = "log-counts"
 32 | 
 33 |   def main(args: Array[String]): Unit = workflow()
 34 | 
 35 |   override def startRestProxy(streams: KafkaStreams, hostInfo: HostInfo,
 36 |     actorSystem: ActorSystem, materializer: ActorMaterializer): WeblogProcHttpService = {
 37 | 
 38 |     implicit val system = actorSystem
 39 | 
 40 |     lazy val defaultParallelism: Int = {
 41 |       val rt = Runtime.getRuntime()
 42 |       rt.availableProcessors() * 4
 43 |     }
 44 | 
 45 |     def defaultExecutionContext(parallelism: Int = defaultParallelism): ExecutionContext = 
 46 |       ExecutionContext.fromExecutor(Executors.newFixedThreadPool(parallelism))
 47 |     
 48 |     val executionContext = defaultExecutionContext()
 49 | 
 50 |     // service for fetching metadata information
 51 |     val metadataService = new MetadataService(streams)
 52 |   
 53 |     // service for fetching from local state store
 54 |     val localStateStoreQuery = new AppStateStoreQuery[String, Long]
 55 |   
 56 |     // http service for request handling
 57 |     val httpRequester = new HttpRequester(system, materializer, executionContext)
 58 |   
 59 |     val restService = new WeblogProcHttpService(
 60 |       hostInfo, 
 61 |       new BFValueFetcher(metadataService, localStateStoreQuery, httpRequester, streams, executionContext, hostInfo),
 62 |       system, materializer, executionContext
 63 |     )
 64 |     restService.start()
 65 |     restService
 66 |   }
 67 |   
 68 |   override def createStreams(config: ConfigData): KafkaStreams = {
 69 |     val changelogConfig = {
 70 |       val cfg = new java.util.HashMap[String, String]
 71 |       val segmentSizeBytes = (20 * 1024 * 1024).toString
 72 |       cfg.put("segment.bytes", segmentSizeBytes)
 73 |       cfg
 74 |     }
 75 | 
 76 |     // Kafka stream configuration
 77 |     val streamingConfig = {
 78 |       val settings = new Properties
 79 |       settings.put(StreamsConfig.APPLICATION_ID_CONFIG, "kstream-log-count")
 80 |       settings.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, config.brokers)
 81 |       settings.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String.getClass.getName)
 82 |       settings.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String.getClass.getName)
 83 | 
 84 |       // setting offset reset to earliest so that we can re-run the demo code with the same pre-loaded data
 85 |       // Note: To re-run the demo, you need to use the offset reset tool:
 86 |       // https://cwiki.apache.org/confluence/display/KAFKA/Kafka+Streams+Application+Reset+Tool
 87 |       settings.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")
 88 | 
 89 |       // need this for query service
 90 |       val endpointHostName = translateHostInterface(config.httpInterface)
 91 |       logger.info(s"Endpoint host name $endpointHostName")
 92 | 
 93 |       settings.put(StreamsConfig.APPLICATION_SERVER_CONFIG, s"$endpointHostName:${config.httpPort}")
 94 | 
 95 |       // default is /tmp/kafka-streams
 96 |       settings.put(StreamsConfig.STATE_DIR_CONFIG, config.stateStoreDir)
 97 | 
 98 |       // Set the commit interval to 500ms so that any changes are flushed frequently and the summary
 99 |       // data are updated with low latency.
100 |       settings.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, "500");
101 | 
102 |       settings
103 |     }
104 | 
105 |     val topology: Topology = new Topology()
106 |     topology.addSource("Source", config.fromTopic)
107 |             .addProcessor("Process", WeblogProcessorSupplier, "Source")
108 |             .addStateStore(
109 |               new BFStoreBuilder[String](new BFStoreSupplier[String](LOG_COUNT_STATE_STORE, stringSerde, true, changelogConfig)), 
110 |               "Process"
111 |             )
112 | 
113 |     new KafkaStreams(topology, streamingConfig)
114 |   }
115 | }
116 | 
117 | import org.apache.kafka.streams.processor.ProcessorSupplier
118 | object WeblogProcessorSupplier extends ProcessorSupplier[String, String] {
119 |   override def get(): WeblogProcessor = new WeblogProcessor()
120 | }
121 | 


--------------------------------------------------------------------------------
/lib/README.md:
--------------------------------------------------------------------------------
 1 | # HTTP Layer for Interactive Queries in Kafka Streams
 2 | 
 3 | Kafka Streams' stateful streaming creates and uses local state information in the node where the application is running. If the application runs in a distributed mode on multiple nodes, then each node contains the respective state information. Kafka Streams does not publish any unifying API that allows you to query across all the nodes for the state information. However it has a set of infrastructure components that can be used to implement a query service based on your favorite end points.
 4 | 
 5 | Interactive Queries were introduced on version `0.10.1` and the main goal is stated as follows:
 6 | 
 7 | > This feature allows you to treat the stream processing layer as a lightweight embedded database and, more concretely, to directly query the latest state of your stream processing application, without needing to materialize that state to external databases or external storage first.
 8 | 
 9 | However Kafka Streams documentation also makes it clear that the query layer for the global state of your application does not come out of the box.
10 | 
11 | > Kafka Streams provides all the required functionality for interactively querying your application’s state out of the box, with but one exception: if you want to expose your application’s full state via interactive queries, then – for reasons we explain further down below – it is your responsibility to add an appropriate RPC layer to your application that allows application instances to communicate over the network. If, however, you only need to let your application instances access their own local state, then you do not need to add such an RPC layer at all.
12 | 
13 | The goal of this small library is to offer such a query layer based on [akka-http](https://doc.akka.io/docs/akka-http/current/scala/http/).
14 | 
15 | ## Quick Start
16 | 
17 | `kafka-streams-query` is published and cross-built for Scala `2.11`, and `2.12`, so you can just add the following to your build:
18 | 
19 | ```scala
20 | val kafka_streams_query_version = "0.1.1"
21 | 
22 | libraryDependencies ++= Seq("com.lightbend" %%
23 |   "kafka-streams-query" % kafka_streams_query_version)
24 | ```
25 | 
26 | > Note: `kafka-streams-query` supports Kafka Streams `1.0.0`.
27 | 
28 | The API docs for `kafka-streams-query` is available [here](https://developer.lightbend.com/docs/api/kafka-streams-query/0.1.1/com/lightbend/kafka/scala/iq) for Scala 2.12 and [here](https://developer.lightbend.com/docs/api/kafka-streams-query_2.11/0.1.1/#package) for Scala 2.11.
29 | 
30 | ## The Library
31 | 
32 | The library is organized around 3 main packages containing the following:
33 | 
34 | 1. `http`: The main end point implementations including a class `InteractiveQueryHttpService` that provides methods for starting and stopping the HTTP service. The other classes provided are `HttpRequester` that handles the request, does some validations and forwards the request to `KeyValueFetcher` that invokes the actual service for fetching the state information.
35 | 2. `services`: This layer interacts with the underlying Kafka Streams APIs to fetch data from the local state. The 2 classes in this layer are (a) `MetadataService` that uses Kafka Streams API to fetch the metadata for the state and (b) `LocalStateStoreQueryService` that does the actual query for the state.
36 | 3. `serializers`: A bunch of serializers useful for application development that help you serialize your model structures.
37 | 
38 | ## Distributed Query
39 | 
40 | If the application is run in a distributed mode across multiple physical nodes, local state information is spread across all the nodes. The `http` services that the library offers can handle this and provide a unified view of the global application state.
41 | 
42 | Consider the following scenario:
43 | 
44 | 1. The application is deployed in 3 nodes with IPs, `ip1`, `ip2` and `ip3`. Assuming the application uses this library, the HTTP services run on port `7070` in each of the nodes.
45 | 2. The user queries for some information from `http://ip1:7070/<path>/<to>/<key>`.
46 | 
47 | It may so happen that the `<key>` that she is looking for may not reside in host `ip1`. The query service handles this situation by interacting with the `MetadataService` as follows:
48 | 
49 | 1. User queries from host `ip1`
50 | 2. Check `MetadataService` to get information about the `key` that the user is looking for
51 | 3. If the metadata for the key indicates that the data is part of the local state in `ip1`, then we are done. Return the query result
52 | 4. Otherwise, get the host information from the metadata where this state resides
53 | 5. Query the appropriate node by reissuing the HTTP request to get the state information
54 | 
55 | ## Handling Rebalancing of Partitions
56 | 
57 | It may so happen that when the user does the query, Kafka Streams may be doing a partition rebalancing when states may migrate from one store (node) to another. During such a situation Kafka Streams throws `InvalidStateStoreException`.
58 | 
59 | Migration is typically done when new instances of the application come up or Kafka Streams does a rebalancing. The library handles such situation through retry semantics. The query API will continue to retry until the rebalancing is complete or the retry count is exhausted.
60 | 


--------------------------------------------------------------------------------
/examples/example-dsl/src/main/scala/com/lightbend/kafka/scala/iq/example/config/KStreamConfig.scala:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
  3 |  */
  4 | 
  5 | package com.lightbend.kafka.scala.iq.example
  6 | package config
  7 | 
  8 | import cats.data._
  9 | import cats.instances.all._
 10 | 
 11 | import scala.util.Try
 12 | import com.typesafe.config.Config
 13 | import scala.concurrent.duration._
 14 | import com.lightbend.kafka.scala.server._
 15 | 
 16 | 
 17 | /**
 18 |  * This object wraps the native Java config APIs into a monadic
 19 |  * interpreter
 20 |  */ 
 21 | object KStreamConfig {
 22 | 
 23 |   private[KStreamConfig] case class KafkaSettings(
 24 |     serverSettings: ServerSettings,
 25 |     topicSettings: TopicSettings
 26 |   )
 27 | 
 28 |   private[KStreamConfig] case class ServerSettings(
 29 |     localServer: Boolean,
 30 |     brokers: String, 
 31 |     schemaRegistryUrl: Option[String],
 32 |     stateStoreDir: String
 33 |   )
 34 | 
 35 |   private[KStreamConfig] case class TopicSettings(
 36 |     fromTopic: String, 
 37 |     errorTopic: String,
 38 |     toTopic: String, 
 39 |     avroTopic: String, 
 40 |     summaryAccessTopic: String, 
 41 |     windowedSummaryAccessTopic: String, 
 42 |     summaryPayloadTopic: String, 
 43 |     windowedSummaryPayloadTopic: String
 44 |   )
 45 | 
 46 |   private[KStreamConfig] case class HttpSettings(
 47 |     interface: String,
 48 |     port: Int
 49 |   )
 50 | 
 51 |   private[KStreamConfig] case class DataLoaderSettings(
 52 |     sourceTopic: String,
 53 |     directoryToWatch: Option[String],
 54 |     pollInterval: FiniteDuration
 55 |   )
 56 | 
 57 |   case class ConfigData(ks: KafkaSettings, hs: HttpSettings, dls: DataLoaderSettings) {
 58 |     def localServer = ks.serverSettings.localServer
 59 |     def brokers = ks.serverSettings.brokers
 60 |     def schemaRegistryUrl = ks.serverSettings.schemaRegistryUrl
 61 |     def fromTopic = ks.topicSettings.fromTopic
 62 |     def toTopic = ks.topicSettings.toTopic
 63 |     def avroTopic = ks.topicSettings.avroTopic
 64 |     def summaryAccessTopic = ks.topicSettings.summaryAccessTopic
 65 |     def windowedSummaryAccessTopic = ks.topicSettings.windowedSummaryAccessTopic
 66 |     def summaryPayloadTopic = ks.topicSettings.summaryPayloadTopic
 67 |     def windowedSummaryPayloadTopic = ks.topicSettings.windowedSummaryPayloadTopic
 68 |     def errorTopic = ks.topicSettings.errorTopic
 69 |     def stateStoreDir = ks.serverSettings.stateStoreDir
 70 |     def httpInterface = hs.interface
 71 |     def httpPort = hs.port
 72 |     def sourceTopic = dls.sourceTopic
 73 |     def directoryToWatch = dls.directoryToWatch
 74 |     def pollInterval = dls.pollInterval
 75 |   }
 76 | 
 77 |   type ConfigReader[A] = ReaderT[Try, Config, A]
 78 | 
 79 |   private def getStringMaybe(config: Config, key: String): Option[String] = try {
 80 |     val str = config.getString(key)
 81 |     if (str.trim.isEmpty) None else Some(str)
 82 |   } catch {
 83 |     case _: Exception => None
 84 |   }
 85 | 
 86 |   private def fromKafkaConfig: ConfigReader[KafkaSettings] = Kleisli { (config: Config) =>
 87 |     Try {
 88 |       val local = config.getBoolean("kafka.localserver")
 89 |       val serverSettings = 
 90 |         if (local) {
 91 |           ServerSettings(
 92 |             local,
 93 |             s"localhost:${KafkaLocalServer.DefaultPort}",
 94 |             getStringMaybe(config, "kafka.schemaregistryurl"),
 95 |             config.getString("kafka.statestoredir")
 96 |           )
 97 |         } else {
 98 |           ServerSettings(
 99 |             local,
100 |             config.getString("kafka.brokers"),
101 |             getStringMaybe(config, "kafka.schemaregistryurl"),
102 |             config.getString("kafka.statestoredir")
103 |           )
104 |         }
105 | 
106 |       KafkaSettings(
107 |         serverSettings,
108 |         TopicSettings(
109 |           config.getString("kafka.fromtopic"),
110 |           config.getString("kafka.errortopic"),
111 |           config.getString("kafka.totopic"),
112 |           config.getString("kafka.avrotopic"),
113 |           config.getString("kafka.summaryaccesstopic"),
114 |           config.getString("kafka.windowedsummaryaccesstopic"),
115 |           config.getString("kafka.summarypayloadtopic"),
116 |           config.getString("kafka.windowedsummarypayloadtopic")
117 |         )
118 |       )
119 |     }
120 |   }
121 | 
122 |   private def fromHttpConfig: ConfigReader[HttpSettings] = Kleisli { (config: Config) =>
123 |     Try {
124 |       HttpSettings(
125 |         config.getString("http.interface"),
126 |         config.getInt("http.port")
127 |       )
128 |     }
129 |   }
130 | 
131 |   private def fromDataLoaderConfig: ConfigReader[DataLoaderSettings] = Kleisli { (config: Config) =>
132 |     Try {
133 |       DataLoaderSettings(
134 |         config.getString("kafka.loader.sourcetopic"),
135 |         getStringMaybe(config, "kafka.loader.directorytowatch"),
136 |         config.getDuration("kafka.loader.pollinterval")
137 |       )
138 |     }
139 |   }
140 | 
141 |   def fromConfig: ConfigReader[ConfigData] = for {
142 |     k <- fromKafkaConfig
143 |     h <- fromHttpConfig
144 |     d <- fromDataLoaderConfig
145 |   } yield ConfigData(k, h, d)
146 | }
147 | 
148 | 


--------------------------------------------------------------------------------
/examples/build.sbt:
--------------------------------------------------------------------------------
  1 | import sbtassembly.MergeStrategy
  2 | import NativePackagerHelper._
  3 | 
  4 | name := "QueryExampleProject-root"
  5 | 
  6 | version in ThisBuild := "0.1.1"
  7 | 
  8 | scalaVersion := Versions.scalaVersion
  9 | 
 10 | def appProject(id: String)(base:String = id) = Project(id, base = file(base))
 11 |   .enablePlugins(JavaAppPackaging)
 12 | 
 13 | // standalone run of the dsl example application
 14 | lazy val dslRun = (project in file("./example-dsl"))
 15 |   .settings(Common.settings: _*)
 16 |   .settings(libraryDependencies ++= Dependencies.dslDependencies)
 17 |   .settings (
 18 |     fork in run := true,
 19 |     mainClass in Compile := Some("com.lightbend.kafka.scala.iq.example.WeblogProcessing"),
 20 |     scalacOptions := Seq("-Xexperimental", "-unchecked", "-deprecation", "-Ywarn-unused-import"),
 21 |     javaOptions in run ++= Seq(
 22 |       "-Dconfig.file=" + (resourceDirectory in Compile).value / "application-dsl.conf",
 23 |       "-Dlogback.configurationFile=" + (resourceDirectory in Compile).value / "logback-dsl.xml",
 24 |       "-Dlog4j.configurationFile=" + (resourceDirectory in Compile).value / "log4j.properties"),
 25 |     (sourceDirectory in AvroConfig) := baseDirectory.value / "src/main/resources/com/lightbend/kafka/scala/iq/example",
 26 |     (stringType in AvroConfig) := "String",
 27 |     addCommandAlias("dsl", "dslRun/run")
 28 |   )
 29 |   .dependsOn(server)
 30 | 
 31 | // packaged run of the dsl example application
 32 | lazy val dslPackage = appProject("dslPackage")("build/dsl")
 33 |   .settings(
 34 |     scalaVersion := Versions.scalaVersion,
 35 |     resourceDirectory in Compile := (resourceDirectory in (dslRun, Compile)).value,
 36 |     mappings in Universal ++= {
 37 |       Seq(((resourceDirectory in Compile).value / "application-dsl.conf") -> "conf/application.conf") ++
 38 |         Seq(((resourceDirectory in Compile).value / "logback-dsl.xml") -> "conf/logback.xml") ++
 39 |         Seq(((resourceDirectory in Compile).value / "log4j.properties") -> "conf/log4j.properties")
 40 |     },
 41 |     assemblyMergeStrategy in assembly := {
 42 |       case PathList("application-dsl.conf") => MergeStrategy.discard
 43 |       case PathList("logback-dsl.xml") => MergeStrategy.discard
 44 |       case PathList("META-INF", "MANIFEST.MF") => MergeStrategy.discard
 45 |       case PathList("META-INF", xs @ _*) => MergeStrategy.last
 46 |       case PathList("META-INF", "io.netty.versions.properties") => MergeStrategy.last
 47 |       case x =>
 48 |         val oldStrategy = (assemblyMergeStrategy in assembly).value
 49 |         oldStrategy(x)
 50 |     },
 51 |     scriptClasspath := Seq("../conf/") ++ scriptClasspath.value,
 52 |     mainClass in Compile := Some("com.lightbend.kafka.scala.iq.example.WeblogProcessing")
 53 |   )
 54 |   .dependsOn(server, dslRun)
 55 | 
 56 | // standalone run of the proc example application
 57 | lazy val procRun = (project in file("./example-proc"))
 58 |   .settings(Common.settings: _*)
 59 |   .settings(libraryDependencies ++= Dependencies.procDependencies)
 60 |   .settings (
 61 |     fork in run := true,
 62 |     mainClass in Compile := Some("com.lightbend.kafka.scala.iq.example.WeblogDriver"),
 63 |     scalacOptions := Seq("-Xexperimental", "-unchecked", "-deprecation", "-Ywarn-unused-import"),
 64 |     javaOptions in run ++= Seq(
 65 |       "-Dconfig.file=" + (resourceDirectory in Compile).value / "application-proc.conf",
 66 |       "-Dlogback.configurationFile=" + (resourceDirectory in Compile).value / "logback-proc.xml",
 67 |       "-Dlog4j.configurationFile=" + (resourceDirectory in Compile).value / "log4j.properties"),
 68 |     addCommandAlias("proc", "procRun/run")
 69 |   )
 70 |   .dependsOn(server)
 71 | 
 72 | // packaged run of the proc example application
 73 | lazy val procPackage = appProject("procPackage")("build/proc")
 74 |   .settings(
 75 |     scalaVersion := Versions.scalaVersion,
 76 |     resourceDirectory in Compile := (resourceDirectory in (procRun, Compile)).value,
 77 |     mappings in Universal ++= {
 78 |       Seq(((resourceDirectory in Compile).value / "application-proc.conf") -> "conf/application.conf") ++
 79 |         Seq(((resourceDirectory in Compile).value / "logback-proc.xml") -> "conf/logback.xml") ++
 80 |         Seq(((resourceDirectory in Compile).value / "log4j.properties") -> "conf/log4j.properties")
 81 |     },
 82 |     assemblyMergeStrategy in assembly := {
 83 |       case PathList("application-proc.conf") => MergeStrategy.discard
 84 |       case PathList("logback-proc.xml") => MergeStrategy.discard
 85 |       case PathList("META-INF", "MANIFEST.MF") => MergeStrategy.discard
 86 |       case PathList("META-INF", xs @ _*) => MergeStrategy.last
 87 |       case PathList("META-INF", "io.netty.versions.properties") => MergeStrategy.last
 88 |       case x =>
 89 |         val oldStrategy = (assemblyMergeStrategy in assembly).value
 90 |         oldStrategy(x)
 91 |     },
 92 |     scriptClasspath := Seq("../conf/") ++ scriptClasspath.value,
 93 |     mainClass in Compile := Some("com.lightbend.kafka.scala.iq.example.WeblogDriver")
 94 |   )
 95 |   .dependsOn(server, procRun)
 96 | 
 97 | lazy val server = (project in file("./kafka-local-server")).
 98 |     settings(Common.settings: _*).
 99 |     settings(libraryDependencies ++= Dependencies.serverDependencies)
100 | 
101 | lazy val root = (project in file(".")).
102 |     aggregate(dslRun, dslPackage, procRun, procPackage, server)
103 | 


--------------------------------------------------------------------------------
/examples/example-proc/src/main/scala/com/lightbend/kafka/scala/iq/example/WeblogWorkflow.scala:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
  3 |  */
  4 | 
  5 | package com.lightbend.kafka.scala.iq.example
  6 | 
  7 | import com.typesafe.config.ConfigFactory
  8 | 
  9 | import java.util.concurrent.TimeUnit
 10 | 
 11 | import akka.actor.ActorSystem
 12 | import akka.stream.ActorMaterializer
 13 | 
 14 | import org.apache.kafka.streams.KafkaStreams
 15 | import org.apache.kafka.streams.state.HostInfo
 16 | 
 17 | import scala.util.{ Success, Failure }
 18 | import scala.concurrent.duration._
 19 | import sys.process._
 20 | import com.typesafe.scalalogging.LazyLogging
 21 | 
 22 | import config.KStreamConfig._
 23 | import serializers._
 24 | 
 25 | import com.lightbend.kafka.scala.iq.http.InteractiveQueryHttpService
 26 | 
 27 | import ingestion.DataIngestion
 28 | import com.lightbend.kafka.scala.server._
 29 | 
 30 | trait WeblogWorkflow extends LazyLogging with AppSerializers {
 31 | 
 32 |   def workflow(): Unit = {
 33 |     
 34 |     // get config info
 35 |     val config: ConfigData = fromConfig(ConfigFactory.load()) match {
 36 |       case Success(c)  => c
 37 |       case Failure(ex) => throw ex
 38 |     }
 39 | 
 40 |     logger.info(s"config = $config")
 41 |     val maybeServer = startLocalServerIfSetInConfig(config)
 42 | 
 43 |     // setup REST endpoints
 44 |     val restEndpointPort = config.httpPort
 45 |     val restEndpointHostName = config.httpInterface
 46 |     val restEndpoint = new HostInfo(restEndpointHostName, restEndpointPort)
 47 |     
 48 |     logger.info("Connecting to Kafka cluster via bootstrap servers " + config.brokers)
 49 |     logger.warn("REST endpoint at http://" + restEndpointHostName + ":" + restEndpointPort)
 50 |     println("Connecting to Kafka cluster via bootstrap servers " + config.brokers)
 51 |     println("REST endpoint at http://" + restEndpointHostName + ":" + restEndpointPort)
 52 | 
 53 |     implicit val system = ActorSystem()
 54 |     implicit val materializer = ActorMaterializer()
 55 | 
 56 |     import system.dispatcher
 57 | 
 58 |     // register for data ingestion
 59 |     // whenever we find new / changed files in the configured location, we run data loading
 60 |     // However `directoryToWatch` may not be set if we are trying to run the application in
 61 |     // distributed mode with multiple instances. In that case only one instance will do the ingestion
 62 |     // and for subsequent instances of the application, we don't need to do the ingestion.
 63 |     // Ingestion can be done only from one instance
 64 |     config.directoryToWatch.foreach { d =>
 65 |       DataIngestion.registerForIngestion(config)
 66 | 
 67 |       // schedule a run by touching the data folder
 68 |       system.scheduler.scheduleOnce(1 minute) {
 69 |         Seq("/bin/sh", "-c", s"touch $d/*").!
 70 |         ()
 71 |       }
 72 |     }
 73 | 
 74 |     // set up the topology
 75 |     val streams: KafkaStreams = createStreams(config)
 76 | 
 77 |     // Need to be done for running the application after resetting the state store
 78 |     // should not be done in production
 79 |     streams.cleanUp()
 80 | 
 81 |     // Start the Restful proxy for servicing remote access to state stores
 82 |     val restService = startRestProxy(streams, restEndpoint, system, materializer)
 83 | 
 84 |     // need to exit for any stream exception
 85 |     // mesos will restart the application
 86 |     streams.setUncaughtExceptionHandler(new Thread.UncaughtExceptionHandler() {
 87 |       override def uncaughtException(t: Thread, e: Throwable): Unit = try {
 88 |         logger.error(s"Stream terminated because of uncaught exception .. Shutting down app", e)
 89 |         restService.stop()
 90 |         logger.error(s"Stopping streams service ..")
 91 |         val closed = streams.close(1, TimeUnit.MINUTES)
 92 |         logger.error(s"Exiting application after streams close ($closed)")
 93 |       } catch {
 94 |         case x: Exception => x.printStackTrace
 95 |       } finally {
 96 |         logger.error("Exiting application ..")
 97 |         logger.error(s"Stopping kafka server ..")
 98 |         maybeServer.foreach(_.stop())
 99 |         System.exit(-1)
100 |       }
101 |     })
102 | 
103 |     // Now that we have finished the definition of the processing topology we can actually run
104 |     // it via `start()`.  The Streams application as a whole can be launched just like any
105 |     // normal Java application that has a `main()` method.
106 |     streams.start()
107 | 
108 |     // Add shutdown hook to respond to SIGTERM and gracefully close Kafka Streams
109 |     Runtime.getRuntime().addShutdownHook(new Thread(() => try {
110 |       restService.stop()
111 |       val closed = streams.close(1, TimeUnit.MINUTES)
112 |       logger.error(s"Exiting application after streams close ($closed)")
113 |       maybeServer.foreach(_.stop())
114 |     } catch {
115 |       case _: Exception => // ignored
116 |     }))
117 |   }  
118 | 
119 |   private def createTopics(config: ConfigData, server: KafkaLocalServer) = {
120 |     import config._
121 |     List(fromTopic, errorTopic).foreach(server.createTopic(_))
122 |   }
123 | 
124 |   private def startLocalServerIfSetInConfig(config: ConfigData): Option[KafkaLocalServer] = if (config.localServer) {
125 |     val s = KafkaLocalServer(true, Some(config.stateStoreDir))
126 |     s.start()
127 |     createTopics(config, s)
128 |     Some(s)
129 |   } else None 
130 | 
131 |   def createStreams(config: ConfigData): KafkaStreams
132 |   def startRestProxy(streams: KafkaStreams, hostInfo: HostInfo,
133 |     actorSystem: ActorSystem, materializer: ActorMaterializer): InteractiveQueryHttpService
134 | }
135 | 


--------------------------------------------------------------------------------
/examples/example-dsl/src/main/scala/com/lightbend/kafka/scala/iq/example/WeblogWorkflow.scala:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
  3 |  */
  4 | 
  5 | package com.lightbend.kafka.scala.iq.example
  6 | 
  7 | import com.typesafe.config.ConfigFactory
  8 | 
  9 | import java.util.concurrent.TimeUnit
 10 | 
 11 | import akka.actor.ActorSystem
 12 | import akka.stream.ActorMaterializer
 13 | 
 14 | import org.apache.kafka.streams.KafkaStreams
 15 | import org.apache.kafka.streams.state.HostInfo
 16 | 
 17 | import scala.util.{ Success, Failure }
 18 | import scala.concurrent.duration._
 19 | import sys.process._
 20 | import com.typesafe.scalalogging.LazyLogging
 21 | 
 22 | import config.KStreamConfig._
 23 | import serializers._
 24 | 
 25 | import com.lightbend.kafka.scala.iq.http.InteractiveQueryHttpService
 26 | import com.lightbend.kafka.scala.server._
 27 | 
 28 | import ingestion.DataIngestion
 29 | 
 30 | trait WeblogWorkflow extends LazyLogging with AppSerializers {
 31 | 
 32 |   def workflow(): Unit = {
 33 |     
 34 |     // get config info
 35 |     val config: ConfigData = fromConfig(ConfigFactory.load()) match {
 36 |       case Success(c)  => c
 37 |       case Failure(ex) => throw ex
 38 |     }
 39 | 
 40 |     logger.info(s"config = $config")
 41 |     config.schemaRegistryUrl.foreach { url =>
 42 |       logger.info(s"Schema Registry will be used - please ensure schema registry service is up and running at $url")
 43 |     }
 44 | 
 45 |     val maybeServer = startLocalServerIfSetInConfig(config)
 46 | 
 47 |     // setup REST endpoints
 48 |     val restEndpointPort = config.httpPort
 49 |     val restEndpointHostName = config.httpInterface
 50 |     val restEndpoint = new HostInfo(restEndpointHostName, restEndpointPort)
 51 |     
 52 |     logger.info("Connecting to Kafka cluster via bootstrap servers " + config.brokers)
 53 |     logger.warn("REST endpoint at http://" + restEndpointHostName + ":" + restEndpointPort)
 54 |     println("REST endpoint at http://" + restEndpointHostName + ":" + restEndpointPort)
 55 | 
 56 |     implicit val system = ActorSystem()
 57 |     implicit val materializer = ActorMaterializer()
 58 | 
 59 |     import system.dispatcher
 60 | 
 61 |     // register for data ingestion
 62 |     // whenever we find new / changed files in the configured location, we run data loading
 63 |     // However `directoryToWatch` may not be set if we are trying to run the application in
 64 |     // distributed mode with multiple instances. In that case only one instance will do the ingestion
 65 |     // and for subsequent instances of the application, we don't need to do the ingestion.
 66 |     // Ingestion can be done only from one instance
 67 |     config.directoryToWatch.foreach { d =>
 68 |       DataIngestion.registerForIngestion(config)
 69 | 
 70 |       // schedule a run by touching the data folder
 71 |       system.scheduler.scheduleOnce(1 minute) {
 72 |         Seq("/bin/sh", "-c", s"touch $d/*").!
 73 |         ()
 74 |       }
 75 |     }
 76 | 
 77 |     // set up the topology
 78 |     val streams: KafkaStreams = createStreams(config)
 79 | 
 80 |     // Need to be done for running the application after resetting the state store
 81 |     // should not be done in production
 82 |     streams.cleanUp()
 83 | 
 84 |     // Start the Restful proxy for servicing remote access to state stores
 85 |     val restService = startRestProxy(streams, restEndpoint, system, materializer)
 86 | 
 87 |     // need to exit for any stream exception
 88 |     // mesos will restart the application
 89 |     streams.setUncaughtExceptionHandler(new Thread.UncaughtExceptionHandler() {
 90 |       override def uncaughtException(t: Thread, e: Throwable): Unit = try {
 91 |         logger.error(s"Stream terminated because of uncaught exception .. Shutting down app", e)
 92 |         restService.stop()
 93 |         logger.error(s"Stopping streams service ..")
 94 |         val closed = streams.close(1, TimeUnit.MINUTES)
 95 |         logger.error(s"Exiting application after streams close ($closed)")
 96 |       } catch {
 97 |         case x: Exception => x.printStackTrace
 98 |       } finally {
 99 |         logger.error("Exiting application ..")
100 |         logger.error(s"Stopping kafka server ..")
101 |         maybeServer.foreach(_.stop())
102 |         System.exit(-1)
103 |       }
104 |     })
105 | 
106 |     // Now that we have finished the definition of the processing topology we can actually run
107 |     // it via `start()`.  The Streams application as a whole can be launched just like any
108 |     // normal Java application that has a `main()` method.
109 |     streams.start()
110 | 
111 |     // Add shutdown hook to respond to SIGTERM and gracefully close Kafka Streams
112 |     Runtime.getRuntime().addShutdownHook(new Thread(() => try {
113 |       restService.stop()
114 |       val closed = streams.close(1, TimeUnit.MINUTES)
115 |       logger.error(s"Exiting application after streams close ($closed)")
116 |       maybeServer.foreach(_.stop())
117 |     } catch {
118 |       case _: Exception => // ignored
119 |     }))
120 |   }  
121 | 
122 |   private def createTopics(config: ConfigData, server: KafkaLocalServer) = {
123 |     import config._
124 |     List(fromTopic, 
125 |       errorTopic,
126 |       toTopic, 
127 |       avroTopic, 
128 |       summaryAccessTopic, 
129 |       windowedSummaryAccessTopic, 
130 |       summaryPayloadTopic, 
131 |       windowedSummaryPayloadTopic).foreach(server.createTopic(_))
132 |   }
133 | 
134 |   private def startLocalServerIfSetInConfig(config: ConfigData): Option[KafkaLocalServer] = if (config.localServer) {
135 |     val s = KafkaLocalServer(true, Some(config.stateStoreDir))
136 |     s.start()
137 |     createTopics(config, s)
138 |     Some(s)
139 |   } else None 
140 | 
141 |   def createStreams(config: ConfigData): KafkaStreams
142 |   def startRestProxy(streams: KafkaStreams, hostInfo: HostInfo,
143 |     actorSystem: ActorSystem, materializer: ActorMaterializer): InteractiveQueryHttpService
144 | }
145 | 


--------------------------------------------------------------------------------
/lib/src/main/scala/com/lightbend/kafka/scala/iq/http/KeyValueFetcher.scala:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
  3 |  */
  4 | 
  5 | package com.lightbend.kafka.scala.iq
  6 | package http
  7 | 
  8 | import akka.actor.ActorSystem
  9 | 
 10 | import org.apache.kafka.streams.{ KafkaStreams }
 11 | import org.apache.kafka.streams.state.HostInfo
 12 | import org.apache.kafka.common.serialization.Serializer
 13 | 
 14 | import scala.concurrent.{ Future, ExecutionContext}
 15 | import scala.util.{ Success, Failure }
 16 | 
 17 | import com.typesafe.scalalogging.LazyLogging
 18 | import services.{ MetadataService, HostStoreInfo, LocalStateStoreQuery }
 19 | import de.heikoseeberger.akkahttpcirce.FailFastCirceSupport
 20 | import akka.http.scaladsl.model.ResponseEntity
 21 | import akka.http.scaladsl.unmarshalling.Unmarshaller
 22 | import serializers.Serializers
 23 | import io.circe.Decoder
 24 | 
 25 | /**
 26 |  * Abstraction for fetching information from a key/value state store based on the
 27 |  * key and the store name passed in the API.
 28 |  *
 29 |  * Supports basic fetch as well as fetch over a time window.
 30 |  *
 31 |  * The fetch APIs support retry semantics in case the key is not available in the local state store. It
 32 |  * then fetches the store information from the MetadataService and then requeries that store
 33 |  * to get the information.
 34 |  */ 
 35 | class KeyValueFetcher[K: Decoder, V: Decoder](
 36 |   metadataService: MetadataService, 
 37 |   localStateStoreQuery: LocalStateStoreQuery[K, V],
 38 |   httpRequester: HttpRequester, 
 39 |   streams: KafkaStreams, 
 40 |   executionContext: ExecutionContext, 
 41 |   hostInfo: HostInfo)(implicit actorSystem: ActorSystem, keySerializer: Serializer[K], u: Unmarshaller[ResponseEntity, V])
 42 | 
 43 |   extends LazyLogging 
 44 |   with FailFastCirceSupport with Serializers {
 45 | 
 46 |   private implicit val ec: ExecutionContext = executionContext
 47 | 
 48 |   /**
 49 |    * Query for a key
 50 |    */ 
 51 |   def fetch(key: K, store: String, path: String): Future[V] = { 
 52 | 
 53 |     metadataService.streamsMetadataForStoreAndKey(store, key, keySerializer) match {
 54 |       case Success(host) => {
 55 |         // key is on another instance. call the other instance to fetch the data.
 56 |         if (!thisHost(host)) {
 57 |           logger.warn(s"Key $key is on another instance not on $host - requerying ..")
 58 |           httpRequester.queryFromHost[V](host, path)
 59 |         } else {
 60 |           // key is on this instance
 61 |           localStateStoreQuery.queryStateStore(streams, store, key)
 62 |         }
 63 |       }
 64 |       case Failure(ex) => Future.failed(ex)
 65 |     }
 66 |   }
 67 | 
 68 |   /**
 69 |    * Query all: Warning - this may be large depending on the data set
 70 |    */ 
 71 |   def fetchAll(store: String, path: String): Future[List[(K, V)]] = { 
 72 | 
 73 |     def fetchAllKVs(host: HostStoreInfo): Future[List[(K, V)]] = {
 74 |       if (!thisHost(host)) {
 75 |           
 76 |         // host is remote - need to requery
 77 |         httpRequester.queryFromHost[List[(K, V)]](host, path)
 78 |       } else {
 79 | 
 80 |         // fetch all kvs for this local store
 81 |         localStateStoreQuery.queryStateStoreForAll(streams, store)
 82 |       }
 83 |     }
 84 | 
 85 |     fetchKVs(store, fetchAllKVs)
 86 |   }
 87 | 
 88 |   /**
 89 |    * Query for a range of keys
 90 |    */ 
 91 |   def fetchRange(fromKey: K, toKey: K, store: String, path: String): Future[List[(K, V)]] = { 
 92 | 
 93 |     def fetchKVsInRange(host: HostStoreInfo): Future[List[(K, V)]] = {
 94 |       if (!thisHost(host)) {
 95 |           
 96 |         // host is remote - need to requery
 97 |         httpRequester.queryFromHost[List[(K, V)]](host, path)
 98 |       } else {
 99 | 
100 |         // fetch all kvs in range for this local store
101 |         localStateStoreQuery.queryStateStoreForRange(streams, store, fromKey, toKey)
102 |       }
103 |     }
104 | 
105 |     fetchKVs(store, fetchKVsInRange)
106 |   }
107 | 
108 |   private def fetchKVs(store: String, fn: HostStoreInfo => Future[List[(K, V)]]): Future[List[(K, V)]] = 
109 |     metadataService.streamsMetadataForStore(store) match {
110 | 
111 |       // metadata could not be found for this store
112 |       case Nil => Future.failed(new Exception(s"No metadata found for $store"))
113 | 
114 |       // all hosts that have this store with the same application id
115 |       case hosts => Future.traverse(hosts)(fn).map(_.flatten)
116 |     }
117 | 
118 |   /**
119 |    * Query all hosts to find the sum of approximate number of entries
120 |    */ 
121 |   def fetchApproxNumEntries(store: String, path: String): Future[Long] = { 
122 | 
123 |     def fetchApproxNumEntries(host: HostStoreInfo): Future[Long] = {
124 |       if (!thisHost(host)) {
125 |           
126 |         // host is remote - need to requery
127 |         httpRequester.queryFromHost[Long](host, path)
128 |       } else {
129 | 
130 |         // fetch approx num entries for this local store
131 |         localStateStoreQuery.queryStateStoreForApproxNumEntries(streams, store)
132 |       }
133 |     }
134 | 
135 |     metadataService.streamsMetadataForStore(store) match {
136 | 
137 |       // metadata could not be found for this store
138 |       case Nil => Future.failed(new Exception(s"No metadata found for $store"))
139 | 
140 |       // all hosts that have this store with the same application id
141 |       case hosts => Future.traverse(hosts)(fetchApproxNumEntries).map(_.sum)
142 |     }
143 |   }
144 | 
145 |   /**
146 |    * Query for a window
147 |    */
148 |   def fetchWindowed(key: K, store: String, path: String, 
149 |     fromTime: Long, toTime: Long): Future[List[(Long, V)]] = 
150 | 
151 |     metadataService.streamsMetadataForStoreAndKey(store, key, keySerializer) match {
152 |       case Success(host) => {
153 |         // key is on another instance. call the other instance to fetch the data.
154 |         if (!thisHost(host)) {
155 |           logger.warn(s"Key $key is on another instance not on $host - requerying ..")
156 |           httpRequester.queryFromHost[List[(Long, V)]](host, path)
157 |         } else {
158 |           // key is on this instance
159 |           localStateStoreQuery.queryWindowedStateStore(streams, store, key, fromTime, toTime)
160 |         }
161 |       }
162 |       case Failure(ex) => Future.failed(ex)
163 |     }
164 | 
165 |   private def thisHost(host: HostStoreInfo): Boolean = 
166 |     host.host.equals(translateHostInterface(hostInfo.host)) && host.port == hostInfo.port
167 | }
168 | 
169 | 


--------------------------------------------------------------------------------
/examples/kafka-local-server/src/main/scala/com/lightbend/kafka/scala/server/KafkaLocalServer.scala:
--------------------------------------------------------------------------------
  1 | package com.lightbend.kafka.scala.server
  2 | 
  3 | // Loosely based on Lagom implementation at
  4 | //  https://github.com/lagom/lagom/blob/master/dev/kafka-server/src/main/scala/com/lightbend/lagom/internal/kafka/KafkaLocalServer.scala
  5 | 
  6 | import java.io.{ IOException, File }
  7 | import java.nio.file.{ FileVisitOption, Files, Paths }
  8 | import java.util.Properties
  9 | 
 10 | import org.apache.curator.test.TestingServer
 11 | import com.typesafe.scalalogging.LazyLogging
 12 | 
 13 | import kafka.server.{KafkaConfig, KafkaServerStartable}
 14 | 
 15 | import scala.collection.JavaConverters._
 16 | import scala.util.{ Try, Success, Failure }
 17 | import java.util.Comparator
 18 | 
 19 | import kafka.admin.{AdminUtils, RackAwareMode}
 20 | import kafka.utils.ZkUtils
 21 | 
 22 | class KafkaLocalServer private (kafkaProperties: Properties, zooKeeperServer: ZooKeeperLocalServer)
 23 |   extends LazyLogging {
 24 | 
 25 |   import KafkaLocalServer._
 26 | 
 27 |   private var broker = null.asInstanceOf[KafkaServerStartable]
 28 |   private val zkUtils : ZkUtils =
 29 |     ZkUtils.apply(s"localhost:${zooKeeperServer.getPort()}", DEFAULT_ZK_SESSION_TIMEOUT_MS, DEFAULT_ZK_CONNECTION_TIMEOUT_MS, false)
 30 | 
 31 |   def start(): Unit = {
 32 | 
 33 |     broker = KafkaServerStartable.fromProps(kafkaProperties)
 34 |     broker.startup()
 35 |   }
 36 | 
 37 |   def stop(): Unit = {
 38 |     if (broker != null) {
 39 |       broker.shutdown()
 40 |       zooKeeperServer.stop()
 41 |       broker = null.asInstanceOf[KafkaServerStartable]
 42 |     }
 43 |   }
 44 | 
 45 |   /**
 46 |     * Create a Kafka topic with 1 partition and a replication factor of 1.
 47 |     *
 48 |     * @param topic The name of the topic.
 49 |     */
 50 |   def createTopic(topic: String): Unit = {
 51 |     createTopic(topic, 1, 1, new Properties)
 52 |   }
 53 | 
 54 |   /**
 55 |     * Create a Kafka topic with the given parameters.
 56 |     *
 57 |     * @param topic       The name of the topic.
 58 |     * @param partitions  The number of partitions for this topic.
 59 |     * @param replication The replication factor for (the partitions of) this topic.
 60 |     */
 61 |   def createTopic(topic: String, partitions: Int, replication: Int): Unit = {
 62 |     createTopic(topic, partitions, replication, new Properties)
 63 |   }
 64 | 
 65 |   /**
 66 |     * Create a Kafka topic with the given parameters.
 67 |     *
 68 |     * @param topic       The name of the topic.
 69 |     * @param partitions  The number of partitions for this topic.
 70 |     * @param replication The replication factor for (partitions of) this topic.
 71 |     * @param topicConfig Additional topic-level configuration settings.
 72 |     */
 73 |   def createTopic(topic: String, partitions: Int, replication: Int, topicConfig: Properties): Unit = {
 74 |     AdminUtils.createTopic(zkUtils, topic, partitions, replication, topicConfig, RackAwareMode.Enforced)
 75 |   }
 76 | 
 77 |   def deleteTopic(topic: String) = AdminUtils.deleteTopic(zkUtils, topic)
 78 | }
 79 | 
 80 | import Utils._
 81 | 
 82 | object KafkaLocalServer extends LazyLogging {
 83 |   final val DefaultPort = 9092
 84 |   final val DefaultResetOnStart = true
 85 |   private val DEFAULT_ZK_SESSION_TIMEOUT_MS = 10 * 1000
 86 |   private val DEFAULT_ZK_CONNECTION_TIMEOUT_MS = 8 * 1000
 87 | 
 88 |   final val basDir = "tmp/"
 89 | 
 90 |   private final val kafkaDataFolderName = "kafka_data"
 91 | 
 92 |   def apply(cleanOnStart: Boolean, localStateDir: Option[String] = None): KafkaLocalServer = 
 93 |     this(DefaultPort, ZooKeeperLocalServer.DefaultPort, cleanOnStart, localStateDir)
 94 | 
 95 |   def apply(kafkaPort: Int, zookeeperServerPort: Int, cleanOnStart: Boolean, localStateDir: Option[String]): KafkaLocalServer = {
 96 | 
 97 |     // delete kafka data dir on clean start
 98 |     val kafkaDataDir: File = (for {
 99 |       kdir <- dataDirectory(basDir, kafkaDataFolderName)
100 |       _    <- if (cleanOnStart) deleteDirectory(kdir) else Try(())
101 |     } yield kdir) match {
102 |       case Success(d) => d
103 |       case Failure(ex) => throw ex
104 |     }
105 | 
106 |     // delete kafka local state dir on clean start
107 |     localStateDir.foreach { d =>
108 |       for {
109 |         kdir <- dataDirectory("", d)
110 |         _    <- if (cleanOnStart) deleteDirectory(kdir) else Try(())
111 |       } yield (())
112 |     }
113 | 
114 |     logger.info(s"Kafka data directory is $kafkaDataDir.")
115 | 
116 |     val kafkaProperties = createKafkaProperties(kafkaPort, zookeeperServerPort, kafkaDataDir)
117 | 
118 |     val zk = new ZooKeeperLocalServer(zookeeperServerPort, cleanOnStart)
119 |     zk.start()
120 |     new KafkaLocalServer(kafkaProperties, zk)
121 |   }
122 | 
123 |   /**
124 |     * Creates a Properties instance for Kafka customized with values passed in argument.
125 |     */
126 |   private def createKafkaProperties(kafkaPort: Int, zookeeperServerPort: Int, dataDir: File): Properties = {
127 | 
128 |     // TODO: Probably should be externalized into properties. Was rushing this in     
129 |     val kafkaProperties = new Properties
130 |     kafkaProperties.put(KafkaConfig.ListenersProp, s"PLAINTEXT://localhost:$kafkaPort")
131 |     kafkaProperties.put(KafkaConfig.ZkConnectProp, s"localhost:$zookeeperServerPort")
132 |     kafkaProperties.put(KafkaConfig.ZkConnectionTimeoutMsProp, "6000")
133 |     kafkaProperties.put(KafkaConfig.BrokerIdProp, "0")
134 |     kafkaProperties.put(KafkaConfig.NumNetworkThreadsProp, "3")
135 |     kafkaProperties.put(KafkaConfig.NumIoThreadsProp, "8")
136 |     kafkaProperties.put(KafkaConfig.SocketSendBufferBytesProp, "102400")
137 |     kafkaProperties.put(KafkaConfig.SocketReceiveBufferBytesProp, "102400")
138 |     kafkaProperties.put(KafkaConfig.SocketRequestMaxBytesProp, "104857600")
139 |     kafkaProperties.put(KafkaConfig.NumPartitionsProp, "1")
140 |     kafkaProperties.put(KafkaConfig.NumRecoveryThreadsPerDataDirProp, "1")
141 |     kafkaProperties.put(KafkaConfig.OffsetsTopicReplicationFactorProp, "1")
142 |     kafkaProperties.put(KafkaConfig.TransactionsTopicReplicationFactorProp, "1")
143 |     kafkaProperties.put(KafkaConfig.LogRetentionTimeHoursProp, "2")
144 |     kafkaProperties.put(KafkaConfig.LogSegmentBytesProp, "1073741824")
145 |     kafkaProperties.put(KafkaConfig.LogCleanupIntervalMsProp, "300000")
146 |     kafkaProperties.put(KafkaConfig.AutoCreateTopicsEnableProp, "true")
147 |     kafkaProperties.put(KafkaConfig.ControlledShutdownEnableProp, "true")
148 |     kafkaProperties.put(KafkaConfig.LogDirProp, dataDir.getAbsolutePath)
149 | 
150 |     kafkaProperties
151 |   }
152 | }
153 | 
154 | private class ZooKeeperLocalServer(port: Int, cleanOnStart: Boolean) extends LazyLogging {
155 | 
156 |   import KafkaLocalServer._
157 |   import ZooKeeperLocalServer._
158 | 
159 |   private var zooKeeper = null.asInstanceOf[TestingServer]
160 | 
161 |   def start(): Unit = {
162 |     // delete kafka data dir on clean start
163 |     val zookeeperDataDir: File = (for {
164 |       zdir <- dataDirectory(basDir, zookeeperDataFolderName)
165 |       _    <- if (cleanOnStart) deleteDirectory(zdir) else Try(())
166 |     } yield zdir) match {
167 |       case Success(d) => d
168 |       case Failure(ex) => throw ex
169 |     }
170 |     logger.info(s"Zookeeper data directory is $zookeeperDataDir.")
171 | 
172 |     zooKeeper = new TestingServer(port, zookeeperDataDir, false)
173 | 
174 |     zooKeeper.start() // blocking operation
175 |   }
176 | 
177 |   def stop(): Unit = {
178 |     if (zooKeeper != null)
179 |       try {
180 |         zooKeeper.stop()
181 |         zooKeeper = null.asInstanceOf[TestingServer]
182 |       }
183 |       catch {
184 |         case _: IOException => () // nothing to do if an exception is thrown while shutting down
185 |       }
186 |   }
187 | 
188 |   def getPort() : Int = port
189 | }
190 | 
191 | object ZooKeeperLocalServer {
192 |   final val DefaultPort = 2181
193 |   private final val zookeeperDataFolderName = "zookeeper_data"
194 | }
195 | 


--------------------------------------------------------------------------------
/examples/example-proc/README.md:
--------------------------------------------------------------------------------
  1 | ## Example implementation of HTTP based Interactive Query Service
  2 | 
  3 | The current implementation demonstrates the following usages in Kafka Streams along with an HTTP based interactive query service:
  4 | 
  5 | 1. Data ingestion
  6 | 2. Data transformation using Kafka Streams Procedure based implementation
  7 | 3. Implementing a custom state store (based on bloom filter)
  8 | 4. Managing local state with custom state store
  9 | 5. Interactive query service with HTTP end points
 10 | 
 11 | The implementation is based on the [ClarkNet dataset](http://ita.ee.lbl.gov/html/contrib/ClarkNet-HTTP.html), which has to be downloaded in a local folder.
 12 | 
 13 | ## Build and Run Locally
 14 | 
 15 | By default the application runs through an embedded local Kafka Server. In case you want to run separate instances of Kafka and Zookeeper servers, change `kafka.localserver` to `false` in `application.conf`.
 16 | 
 17 | To run the application, do the following steps.
 18 | 
 19 | ### Build the Libraries
 20 | 
 21 | This example application depends on [kafka-streams-scala](https://github.com/lightbend/kafka-streams-scala) and [kafka-streams-query](https://github.com/lightbend/kafka-streams-query/tree/develop/lib). Ensure that you have the proper versions of these libraries in your classpath. Note that in this example Scala 2.12.4 and Kafka 1.0.0 are used.
 22 | 
 23 | ### Start ZooKeeper and Kafka
 24 | 
 25 | > This is only required if the setting of `kafka.localserver` is `false` in `application.conf`. If this is set to `true`, the application runs with an embedded local Kafka server. However, note that if you want to run the application in a distributed mode(see below for details of running in distributed mode), you need to run a separate Kafka and Zookeeper server.
 26 | 
 27 | Start ZooKeeper and Kafka, if not already running. You can download Kafka 1.0.0 for Scala 2.12 [here](https://kafka.apache.org/documentation/#quickstart), then follow the [Quick Start](https://kafka.apache.org/documentation/#quickstart) instructions for running ZooKeeper and Kafka, steps 1 and 2.
 28 | 
 29 | ### Download the ClarkNet dataset
 30 | 
 31 | Download the [ClarkNet dataset](http://ita.ee.lbl.gov/html/contrib/ClarkNet-HTTP.html) and put it in a convenient local folder.
 32 | 
 33 | ### Configure the Application Properties
 34 | 
 35 | Copy `src/main/resources/application-proc.conf.template` to  `src/main/resources/application-proc.conf`.
 36 | 
 37 | Edit `src/main/resources/application-proc.conf` and set the entry for `directorytowatch` to match the folder name where you installed the ClarkNet dataset.
 38 | 
 39 | And note that you can run the application with a bundled local Kafka server by setting `kafka.localserver` to `true` in the `application.conf` file.
 40 | 
 41 | ### Create the Kafka Topics
 42 | 
 43 | > This is only required if the setting of `kafka.localserver` is `false` in `application.conf`. If this is set to `true`, the application runs with an embedded local Kafka server and creates all necessary topics on its own. However, note that if you want to run the application in a distributed mode(see below for details of running in distributed mode), you need to run a separate Kafka and Zookeeper server.
 44 | 
 45 | Create the topics using the `kafka-topics.sh` command that comes with the Kafka distribution. We'll refer to the directory where you installed Kafka as `$KAFKA_HOME`. Run the following commands:
 46 | 
 47 | ```bash
 48 | $KAFKA_HOME/bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic logerr-proc
 49 | $KAFKA_HOME/bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic server-log-proc
 50 | ```
 51 | 
 52 | ### Run the Application!
 53 | 
 54 | Now run the application as follows:
 55 | 
 56 | ```bash
 57 | $ sbt
 58 | > clean
 59 | > compile
 60 | > proc
 61 | ```
 62 | 
 63 | This will start the application. Now you can query on the global state using `curl`:
 64 | 
 65 | ```bash
 66 | $ ## We are querying against a bloom filter based store which checks membership. 
 67 | $ ## Since world.std.com is a hostkey present in the dataset, we get true here.
 68 | $ curl http://localhost:7071/weblog/access/check/world.std.com
 69 | true
 70 | $
 71 | $ ## We are querying against a bloom filter based store which checks membership. 
 72 | $ ## Since world.std.co is not a valid hostkey in the dataset, we get false
 73 | $ ## here.
 74 | $ curl http://localhost:7071/weblog/access/check/world.stx.co
 75 | false
 76 | ```
 77 | 
 78 | ## Run in Distributed Mode
 79 | 
 80 | The http query layer is designed to work even when your application runs in the distributed mode. Running your Kafka Streams application in the distributed mode means that all the instances must have the same application id.
 81 | 
 82 | > In order to run the application in distributed mode, you need to run an external Kafka and Zookeeper server. Set `kafka.localserver` to `false` to enable this setting.
 83 | 
 84 | Here are the steps that you need to follow to run the application in distributed mode. We assume here you are running both the instances in the same node with different port numbers. It's fairly easy to scale this on different nodes.
 85 | 
 86 | ### Step 1: Build and configure for distribution
 87 | 
 88 | ```bash
 89 | $ sbt
 90 | > procPackage/universal:packageZipTarball
 91 | ```
 92 | 
 93 | This creates a distribution under a folder `<project home>/build`.
 94 | 
 95 | ```bash
 96 | $ pwd
 97 | <project home>
 98 | $ cd build/proc/target/universal
 99 | $ ls
100 | procpackage-0.0.1.tgz
101 | ## unpack the distribution
102 | $ tar xvfz procpackage-0.0.1.tgz
103 | $ cd procpackage-0.0.1
104 | $ ls
105 | bin	   conf	lib
106 | $ cd conf
107 | $ ls
108 | application.conf	logback.xml
109 | ## change the above 2 files based on your requirements.
110 | $ cd ..
111 | $ pwd
112 | <...>/procpackage-0.0.1
113 | ```
114 | 
115 | ### Step 2: Run the first instance of the application
116 | Ensure the following:
117 | 
118 | 1. Zookeeper and Kafka are running
119 | 2. All topics mentioned above are created
120 | 3. The folder mentioned in `directoryToWatch` in `application.conf` has the data file
121 | 
122 | ```bash
123 | $ pwd
124 | <...>/procpackage-0.0.1
125 | $ bin/dslpackage
126 | ```
127 | 
128 | This starts the single instance of the application. After some time you will see data printed in the console regarding the host access information as present from the data file.
129 | 
130 | In the log file, created under `<...>/procpackage-0.0.1/logs`, check if the REST service has started and note the host and port details. It should be something like `localhost:7070` (the default setting in `application.conf`).
131 | 
132 | ### Step 3: Run the second instance of the application
133 | 
134 | If you decide to run multiple instances of the application you may choose to split the dataset into 2 parts and keep them in different folders. Also you need to copy the current distribution in some other folder and start the second instance from there, since you need to run it with changed settings in `application.conf`. Say we want to copy in a folder named `clarknet-2`.
135 | 
136 | ```bash
137 | $ cp <project home>/build/proc/target/universal/procpackage-0.0.1.tgz clarknet-2
138 | $ cd clarknet-2
139 | $ tar xvfz procpackage-0.0.1.tgz
140 | ## unpack the distribution
141 | $ cd procpackage-0.0.1
142 | $ ls
143 | bin	   conf	lib
144 | $ cd conf
145 | $ ls
146 | application.conf	logback.xml
147 | ## change the above 2 files based on your requirements.
148 | $ cd ..
149 | $ pwd
150 | <...>/procpackage-0.0.1
151 | ```
152 | 
153 | The following settings need to be changed in `application.conf` before you can run the second instance:
154 | 
155 | 1. `dcos.kafka.statestoredir` - This is the folder where the local state information gets persisted by Kafka streams. This has to be different for every new instance set up.
156 | 2. `dcos.kafka.loader.directorytowatch` - The data folder because we would like to ingest different data for the 2 instances.
157 | 3. `dcos.http.interface` and `dcos.http.port` - The REST service endpoints. If the node is not different then it can be `localhost` for both.
158 | 
159 | ```bash
160 | $ pwd
161 | <...>/procpackage-0.0.1
162 | $ bin/procpackage
163 | ```
164 | 
165 | This will start the second instance. Check the log file to verify that the REST endpoints are properly started.
166 | 
167 | ### Step 4: Do query
168 | 
169 | The idea of a distributed interactive query interface is to allow the user to query for *all* keys using *any* of the end points where the REST service are running. Assume that the 2 instances are running at `localhost:7070` and `localhost:7071`. 
170 | 
171 | Here are a few examples:
172 | 
173 | ```bash
174 | ## world.std.com was loaded by the first instance of the app
175 | ## Query using the end points corresponding to the first instance gives correct result
176 | $ curl localhost:7070/weblog/access/check/world.std.com
177 | true
178 | 
179 | ## we get correct result even if we query using the end points of of the second instance
180 | $ curl localhost:7071/weblog/access/check/world.std.com
181 | true
182 | 
183 | ## ppp19.glas.apc.org was loaded by the second instance of the app
184 | ## Query using the end points corresponding to the first instance also gives correct result
185 | $ curl localhost:7070/weblog/access/check/ppp19.glas.apc.org
186 | true
187 | ```
188 | 


--------------------------------------------------------------------------------
/examples/example-dsl/src/main/scala/com/lightbend/kafka/scala/iq/example/WeblogProcessing.scala:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) 2018 Lightbend Inc. <https://www.lightbend.com>
  3 |  */
  4 | 
  5 | package com.lightbend.kafka.scala.iq.example
  6 | 
  7 | import java.io.StringWriter
  8 | import java.time.format.DateTimeFormatter
  9 | import java.util.Properties
 10 | import java.util.concurrent.Executors
 11 | 
 12 | import io.confluent.kafka.serializers.AbstractKafkaAvroSerDeConfig
 13 | 
 14 | import akka.actor.ActorSystem
 15 | import akka.stream.ActorMaterializer
 16 | 
 17 | import com.lightbend.kafka.scala.iq.http.{HttpRequester, KeyValueFetcher }
 18 | import com.lightbend.kafka.scala.iq.services.{ MetadataService, LocalStateStoreQuery }
 19 | 
 20 | import config.KStreamConfig._
 21 | import http.{ WeblogDSLHttpService, SummaryInfoFetcher }
 22 | import models.{LogParseUtil, LogRecord}
 23 | 
 24 | import de.heikoseeberger.akkahttpcirce.FailFastCirceSupport
 25 | 
 26 | import org.apache.kafka.clients.consumer.ConsumerConfig
 27 | import org.apache.kafka.common.serialization.Serdes
 28 | import org.apache.kafka.streams.kstream._
 29 | import org.apache.kafka.streams.Consumed
 30 | import org.apache.kafka.streams.state.HostInfo
 31 | import org.apache.kafka.streams.{KafkaStreams, StreamsConfig}
 32 | 
 33 | import com.lightbend.kafka.scala.streams._
 34 | 
 35 | import scala.concurrent.ExecutionContext
 36 | import scala.util.{Failure, Success}
 37 | 
 38 | import serializers.AppSerializers
 39 | 
 40 | object WeblogProcessing extends WeblogWorkflow with AppSerializers with FailFastCirceSupport {
 41 | 
 42 |   final val ACCESS_COUNT_PER_HOST_STORE = "access-count-per-host"
 43 |   final val PAYLOAD_SIZE_PER_HOST_STORE = "payload-size-per-host"
 44 |   final val WINDOWED_ACCESS_COUNT_PER_HOST_STORE = "windowed-access-count-per-host"
 45 |   final val WINDOWED_PAYLOAD_SIZE_PER_HOST_STORE = "windowed-payload-size-per-host"
 46 | 
 47 |   def main(args: Array[String]): Unit = workflow()
 48 | 
 49 |   override def startRestProxy(streams: KafkaStreams, hostInfo: HostInfo,
 50 |                               actorSystem: ActorSystem, materializer: ActorMaterializer): WeblogDSLHttpService = {
 51 | 
 52 |     implicit val system = actorSystem
 53 | 
 54 |     lazy val defaultParallelism: Int = {
 55 |       val rt = Runtime.getRuntime
 56 |       rt.availableProcessors() * 4
 57 |     }
 58 | 
 59 |     def defaultExecutionContext(parallelism: Int = defaultParallelism): ExecutionContext =
 60 |       ExecutionContext.fromExecutor(Executors.newFixedThreadPool(parallelism))
 61 | 
 62 |     val executionContext = defaultExecutionContext()
 63 | 
 64 |     // service for fetching metadata information
 65 |     val metadataService = new MetadataService(streams)
 66 | 
 67 |     // service for fetching from local state store
 68 |     val localStateStoreQuery = new LocalStateStoreQuery[String, Long]
 69 | 
 70 |     // http service for request handling
 71 |     val httpRequester = new HttpRequester(system, materializer, executionContext)
 72 | 
 73 |     implicit val ss = stringSerializer
 74 |     val restService = new WeblogDSLHttpService(
 75 |       hostInfo,
 76 |       new SummaryInfoFetcher(
 77 |         new KeyValueFetcher(metadataService, localStateStoreQuery, httpRequester, streams, executionContext, hostInfo)
 78 |       ),
 79 |       system, materializer, executionContext
 80 |     )
 81 |     restService.start()
 82 |     restService
 83 |   }
 84 | 
 85 |   override def createStreams(config: ConfigData): KafkaStreams = {
 86 |     // Kafka stream configuration
 87 |     val streamingConfig = {
 88 |       val settings = new Properties
 89 |       settings.put(StreamsConfig.APPLICATION_ID_CONFIG, "kstream-weblog-processing")
 90 |       settings.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, config.brokers)
 91 | 
 92 |       config.schemaRegistryUrl.foreach{ url =>
 93 |         settings.put(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, url)
 94 |       }
 95 | 
 96 |       settings.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.ByteArray.getClass.getName)
 97 |       settings.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String.getClass.getName)
 98 | 
 99 |       // setting offset reset to earliest so that we can re-run the demo code with the same pre-loaded data
100 |       // Note: To re-run the demo, you need to use the offset reset tool:
101 |       // https://cwiki.apache.org/confluence/display/KAFKA/Kafka+Streams+Application+Reset+Tool
102 |       settings.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")
103 | 
104 |       // need this for query service
105 |       val endpointHostName = translateHostInterface(config.httpInterface)
106 |       logger.info(s"Endpoint host name $endpointHostName")
107 | 
108 |       settings.put(StreamsConfig.APPLICATION_SERVER_CONFIG, s"$endpointHostName:${config.httpPort}")
109 | 
110 |       // default is /tmp/kafka-streams
111 |       settings.put(StreamsConfig.STATE_DIR_CONFIG, config.stateStoreDir)
112 | 
113 |       // Set the commit interval to 500ms so that any changes are flushed frequently and the summary
114 |       // data are updated with low latency.
115 |       settings.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, "500")
116 | 
117 |       settings
118 |     }
119 | 
120 |     implicit val builder = new StreamsBuilderS
121 | 
122 |     generateLogRecords(config)
123 | 
124 |     //
125 |     // assumption : the topic contains serialized records of LogRecord (serialized through logRecordSerde)
126 |     val logRecords = 
127 |       builder.stream(List(config.toTopic), Consumed.`with`(byteArraySerde, logRecordSerde))
128 | 
129 |     generateAvro(logRecords, config)
130 |     hostCountSummary(logRecords, config)
131 |     totalPayloadPerHostSummary(logRecords, config)
132 | 
133 |     new KafkaStreams(builder.build(), streamingConfig)
134 |   }
135 | 
136 |   /**
137 |     * Clean and format input data.  Redirect records that cause a parsing error to the error topic.
138 |     */
139 |   def generateLogRecords(config: ConfigData)(implicit builder: StreamsBuilderS): Unit = {
140 | 
141 |     // will read network data from `fromTopic`
142 |     val logs = builder.stream[Array[Byte], String](config.fromTopic)
143 | 
144 |     def predicateValid: (Array[Byte], Extracted) => Boolean = { (_, value) =>
145 |       value match {
146 |         case ValidLogRecord(_) => true
147 |         case _ => false
148 |       }
149 |     }
150 | 
151 |     def predicateError: (Array[Byte], Extracted) => Boolean = { (_, value) =>
152 |       value match {
153 |         case ValueError(_, _) => true
154 |         case _ => false
155 |       }
156 |     }
157 | 
158 |     // extract values after transformation
159 |     val filtered = logs.mapValues { record =>
160 |       LogParseUtil.parseLine(record) match {
161 |         case Success(r) => ValidLogRecord(r)
162 |         case Failure(ex) => ValueError(ex, record)
163 |       }
164 |     }.branch(predicateValid, predicateError)
165 | 
166 |     // push the labelled data
167 |     filtered(0).mapValues {
168 |       case ValidLogRecord(r) => r
169 |       case _ => ??? // should never happen since we pre-emptively filtered with `branch`
170 |     }.to(config.toTopic, Produced.`with`(byteArraySerde, logRecordSerde))
171 | 
172 |     // push the extraction errors
173 |     filtered(1).mapValues {
174 |       case ValueError(_, v) =>
175 |         val writer = new StringWriter()
176 |         (writer.toString, v)
177 |       case _ => ??? // should never happen since we pre-emptively filtered with `branch`
178 |     }.to(config.errorTopic, Produced.`with`(byteArraySerde, tuple2StringSerde))
179 |   }
180 | 
181 |   sealed abstract class Extracted { }
182 |   final case class ValidLogRecord(record: LogRecord) extends Extracted
183 |   final case class ValueError(exception: Throwable, originalRecord: String) extends Extracted
184 | 
185 |   def generateAvro(logRecords: KStreamS[Array[Byte], LogRecord], config: ConfigData): Unit = {
186 |     logRecords.mapValues(makeAvro)
187 |       .to(config.avroTopic, Produced.`with`(byteArraySerde, logRecordAvroSerde(config.schemaRegistryUrl)))
188 |   }
189 | 
190 |   /**
191 |     * Transform a LogRecord into an Avro SpecificRecord, LogRecordAvro, generated by the Avro compiler
192 |     */
193 |   def makeAvro(record: LogRecord): LogRecordAvro =
194 |     LogRecordAvro.newBuilder()
195 |       .setHost(record.host)
196 |       .setClientId(record.clientId)
197 |       .setUser(record.user)
198 |       .setTimestamp(record.timestamp.format(DateTimeFormatter.ofPattern("yyyy MM dd")))
199 |       .setMethod(record.method)
200 |       .setEndpoint(record.endpoint)
201 |       .setProtocol(record.protocol)
202 |       .setHttpReplyCode(record.httpReplyCode)
203 |       .setPayloadSize(record.payloadSize)
204 |       .build()
205 | 
206 |   /**
207 |     * Summary count of number of times each host has been accessed
208 |     */
209 |   def hostCountSummary(logRecords: KStreamS[Array[Byte], LogRecord], config: ConfigData)(implicit builder: StreamsBuilderS): Unit = {
210 | 
211 |     val groupedStream =
212 |       logRecords.mapValues(_.host)
213 |         .map((_, value) => (value, value))
214 |         .groupByKey(Serialized.`with`(stringSerde, stringSerde))
215 |     
216 |     // since this is a KTable (changelog stream), only the latest summarized information
217 |     // for a host will be the correct one - all earlier records will be considered out of date
218 |     //
219 |     // materialize the summarized information into a topic
220 |     groupedStream.count(ACCESS_COUNT_PER_HOST_STORE, Some(stringSerde))
221 |       .toStream.to(config.summaryAccessTopic, Produced.`with`(stringSerde, longSerde))
222 | 
223 |     groupedStream.windowedBy(TimeWindows.of(60000))
224 |       .count(WINDOWED_ACCESS_COUNT_PER_HOST_STORE, Some(stringSerde))
225 |       .toStream.to(config.windowedSummaryAccessTopic, Produced.`with`(windowedStringSerde, longSerde))
226 | 
227 |     // print the topic info (for debugging)
228 |     builder.stream(List(config.summaryAccessTopic), Consumed.`with`(stringSerde, longSerde))
229 |       .print(Printed.toSysOut[String, Long].withKeyValueMapper { new KeyValueMapper[String, Long, String]() {
230 |         def apply(key: String, value: Long) = s"""$key / $value"""
231 |       }})
232 |   }
233 | 
234 |   /**
235 |     * Aggregate value of payloadSize per host
236 |     */
237 |   def totalPayloadPerHostSummary(logRecords: KStreamS[Array[Byte], LogRecord], config: ConfigData)(implicit builder: StreamsBuilderS): Unit = {
238 |     val groupedStream =
239 |       logRecords.mapValues(record => (record.host, record.payloadSize))
240 |         .map { case (_, (host, size)) => (host, size) }
241 |         .groupByKey(Serialized.`with`(stringSerde, longSerde))
242 | 
243 |     // materialize the summarized information into a topic
244 |     groupedStream
245 |       .aggregate(
246 |         () => 0L,
247 |         (_: String, s: Long, agg: Long) => s + agg,
248 |         Materialized.as(PAYLOAD_SIZE_PER_HOST_STORE)
249 |           .withKeySerde(stringSerde)
250 |           .withValueSerde(longSerde)
251 |       )
252 |       .toStream.to(config.summaryPayloadTopic, Produced.`with`(stringSerde, longSerde))
253 | 
254 |     groupedStream
255 |       .windowedBy(TimeWindows.of(60000))
256 |       .aggregate(
257 |         () => 0L,
258 |         (_: String, s: Long, agg: Long) => s + agg,
259 |         Materialized.as(WINDOWED_PAYLOAD_SIZE_PER_HOST_STORE)
260 |           .withKeySerde(stringSerde)
261 |           .withValueSerde(longSerde)
262 |       )
263 |       .toStream.to(config.windowedSummaryPayloadTopic, Produced.`with`(windowedStringSerde, longSerde))
264 | 
265 |     builder.stream(List(config.summaryPayloadTopic), Consumed.`with`(stringSerde, longSerde))
266 |       .print(Printed.toSysOut[String, Long].withKeyValueMapper { new KeyValueMapper[String, Long, String]() {
267 |         def apply(key: String, value: Long) = s"""$key / $value"""
268 |       }})
269 |   }
270 | }
271 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/examples/example-dsl/README.md:
--------------------------------------------------------------------------------
  1 | ## Example Implementation of HTTP-based Interactive Query Service
  2 | 
  3 | This example demonstrates the following features in Kafka Streams along with an HTTP based interactive query service:
  4 | 
  5 | 1. Data ingestion
  6 | 2. Data transformation using a Kafka Streams DSL-based implementation
  7 | 3. Managing local state with key-value stores
  8 | 4. Interactive query service with HTTP end points
  9 | 
 10 | The implementation is based on the [ClarkNet dataset](http://ita.ee.lbl.gov/html/contrib/ClarkNet-HTTP.html), which has to be downloaded in a local folder.
 11 | 
 12 | ## Build and Run Locally
 13 | 
 14 | By default the application runs through an embedded local Kafka Server. In case you want to run separate instances of Kafka and Zookeeper servers, change `kafka.localserver` to `false` in `application.conf`.
 15 | 
 16 | To run the application, do the following steps.
 17 | 
 18 | ### Build the Libraries
 19 | 
 20 | This example application depends on [kafka-streams-scala](https://github.com/lightbend/kafka-streams-scala) and [kafka-streams-query](https://github.com/lightbend/kafka-streams-query/tree/develop/lib). Ensure that you have the proper versions of these libraries in your classpath. Note that in this example Scala 2.12.4 and Kafka 1.0.0 are used.
 21 | 
 22 | If you've made local changes `kafka-streams-query` then you'll need to publish them to your local ivy repository using `sbt publishLocal` from within the `./lib/` directory.
 23 | 
 24 | ### Start ZooKeeper and Kafka
 25 | 
 26 | > This is only required if the setting of `kafka.localserver` is `false` in `application.conf`. If this is set to `true`, the application runs with an embedded local Kafka server. However, note that if you want to run the application in a distributed mode(see below for details of running in distributed mode), you need to run a separate Kafka and Zookeeper server.
 27 | 
 28 | Start ZooKeeper and Kafka, if not already running. You can download Kafka 1.0.0 for Scala 2.12 [here](https://kafka.apache.org/documentation/#quickstart), then follow the [Quick Start](https://kafka.apache.org/documentation/#quickstart) instructions for running ZooKeeper and Kafka, steps 1 and 2.
 29 | 
 30 | ### Download the ClarkNet dataset
 31 | 
 32 | Download the [ClarkNet dataset](http://ita.ee.lbl.gov/html/contrib/ClarkNet-HTTP.html) and put it in a convenient local folder.
 33 | 
 34 | ### Configure the Application Properties
 35 | 
 36 | Copy `src/main/resources/application-dsl.conf.template` to  `src/main/resources/application-dsl.conf`.
 37 | 
 38 | Edit `src/main/resources/application-dsl.conf` and set the entry for `directorytowatch` to match the folder name where you installed the ClarkNet dataset.
 39 | 
 40 | And note that you can run the application with a bundled local Kafka server by setting `kafka.localserver` to `true` in the `application.conf` file.
 41 | 
 42 | ### Create the Kafka Topics
 43 | 
 44 | > This is only required if the setting of `kafka.localserver` is `false` in `application.conf`. If this is set to 
 45 | `true`, the application runs with an embedded local Kafka server and creates all necessary topics on its own. However, 
 46 | note that if you want to run the application in a distributed mode (see below for details of running in distributed 
 47 | mode), you need to run a separate Kafka and Zookeeper server.  If you're running in distributed mode then topics should 
 48 | have more than 1 partition.
 49 | 
 50 | Create the topics using the `kafka-topics.sh` command that comes with the Kafka distribution.  We'll refer to the directory where you installed Kafka as `$KAFKA_HOME`. Run the following commands:
 51 | 
 52 | ```bash
 53 | $KAFKA_HOME/bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 3 --topic logerr-dsl
 54 | $KAFKA_HOME/bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 3 --topic server-log-dsl
 55 | $KAFKA_HOME/bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 3 --topic processed-log
 56 | $KAFKA_HOME/bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 3 --topic summary-access-log
 57 | $KAFKA_HOME/bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 3 --topic windowed-summary-access-log
 58 | $KAFKA_HOME/bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 3 --topic summary-payload-log
 59 | $KAFKA_HOME/bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 3 --topic windowed-summary-payload-log
 60 | $KAFKA_HOME/bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 3 --topic avro-topic
 61 | ```
 62 | 
 63 | ### Run the Application!
 64 | 
 65 | Now run the application as follows:
 66 | 
 67 | ```bash
 68 | $ sbt
 69 | > clean
 70 | > compile
 71 | > dsl
 72 | ```
 73 | 
 74 | This will start the application. Now you can query on the global state using `curl`:
 75 | 
 76 | ```bash
 77 | $ ## The example application has a timer to `touch` the files in the watched
 78 | $ ## directory 1 minute after the app starts to trigger the streaming to begin.  Touch 
 79 | $ ## the ClarkNet dataset again, or add new files, to stream more entries.
 80 | $
 81 | $ ## Fetch the number of accesses made to the host world.std.com as per the downloaded 
 82 | $ ## data file
 83 | $ curl http://localhost:7070/weblog/access/world.std.com
 84 | 15
 85 | $
 86 | $ ## If you specify ALL as the key-name then it will fetch a list of all key-values 
 87 | $ ## from all the stores that has the access information with the same application id
 88 | $ curl http://localhost:7070/weblog/access/ALL
 89 | [["204.249.225.59",1],["access9.accsyst.com",2],["cssu24.cs.ust.hk",1],["cyclom1-1-6.intersource.com",1],["d24-1.cpe.Brisbane.aone.net.au",1],["er6.rutgers.edu",1],["world.std.com",3]]
 90 | $
 91 | $ ## If you specify COUNT as the key-name then it will fetch the sum of count of all 
 92 | $ ## approximate number of entries from all the stores that has the access information
 93 | $ ## with the same application id
 94 | $ curl http://localhost:7070/weblog/access/COUNT
 95 | 7
 96 | $ ## Query access counts by a range of keys.  The "from" key must be less than the "to"
 97 | $ ## key.  For example, "a.com" < "z.org"
 98 | $ curl http://localhost:7070/weblog/access/range/a.com/z.org
 99 | [["access9.accsyst.com",4],["cssu24.cs.ust.hk",2],["cyclom1-1-6.intersource.com",2],["d24-1.cpe.Brisbane.aone.net.au",2],["er6.rutgers.edu",2],["reddit.com",2],["world.std.com",6]]
100 | $
101 | $ ## Query a time window for a key.  The "from" and "to" parameters must be represented
102 | $ ## as a milliseconds since epoch long number.  The "from" time must be less than the 
103 | $ ## "to time. Stream elements are windowed using ingest time and not event time.  For 
104 | $ ## example, get all time windows for world.std.com from epoch 0 to current epoch.
105 | $ curl http://localhost:7070/weblog/access/win/world.std.com/0/$(date +%s%3N)
106 | [[1517518200000,6],[1517518260000,3]]
107 | $ ##
108 | $ ## Fetch the number of bytes in the reply for queries to the host 
109 | $ ## world.std.com as per the downloaded data file
110 | $ curl http://localhost:7070/weblog/bytes/world.std.com
111 | 124532
112 | ```
113 | 
114 | ## Run in Distributed Mode
115 | 
116 | The http query layer is designed to work even when your application runs in the distributed mode. Running your Kafka Streams application in the distributed mode means that all the instances must have the same application id.
117 | 
118 | > In order to run the application in distributed mode, you need to run an external Kafka and Zookeeper server. Set `kafka.localserver` to `false` to enable this setting.
119 | 
120 | Here are the steps that you need to follow to run the application in distributed mode. We assume here you are running both the instances in the same node with different port numbers. It's fairly easy to scale this on different nodes.
121 | 
122 | ### Step 1: Build and configure for distribution
123 | 
124 | ```bash
125 | $ sbt
126 | > dslPackage/universal:packageZipTarball
127 | ```
128 | 
129 | This creates a distribution under a folder `<project home>/build`.
130 | 
131 | ```bash
132 | $ pwd
133 | <project home>
134 | $ cd build/dsl/target/universal
135 | $ ls
136 | dslpackage-0.0.1.tgz
137 | ## unpack the distribution
138 | $ tar xvfz dslpackage-0.0.1.tgz
139 | $ cd dslpackage-0.0.1
140 | $ ls
141 | bin	   conf	lib
142 | $ cd conf
143 | $ ls
144 | application.conf	logback.xml
145 | ## change the above 2 files based on your requirements.
146 | $ cd ..
147 | $ pwd
148 | <...>/dslpackage-0.0.1
149 | ```
150 | 
151 | ### Step 2: Run the first instance of the application
152 | Ensure the following:
153 | 
154 | 1. Zookeeper and Kafka are running
155 | 2. All topics mentioned above are created with more than 1 partition
156 | 3. The folder mentioned in `directoryToWatch` in `application.conf` has the data file
157 | 
158 | ```bash
159 | $ pwd
160 | <...>/dslpackage-0.0.1
161 | $ bin/dslpackage
162 | ```
163 | 
164 | This starts the single instance of the application. After some time you will see data printed in the console regarding the host access information as present from the data file.
165 | 
166 | In the log file, created under `<...>/dslpackage-0.0.1/logs`, check if the REST service has started and note the host and port details. It should be something like `localhost:7070` (the default setting in `application.conf`).
167 | 
168 | ### Step 3: Run the second instance of the application
169 | 
170 | If you decide to run multiple instances of the application you may choose to split the dataset into 2 parts and keep them in different folders. Also you need to copy the current distribution in some other folder and start the second instance from there, since you need to run it with changed settings in `application.conf`. Say we want to copy in a folder named `clarknet-2`.
171 | 
172 | ```bash
173 | $ cp <project home>/build/dsl/target/universal/dslpackage-0.0.1.tgz clarknet-2
174 | $ cd clarknet-2
175 | $ tar xvfz dslpackage-0.0.1.tgz
176 | ## unpack the distribution
177 | $ cd dslpackage-0.0.1
178 | $ ls
179 | bin	   conf	lib
180 | $ cd conf
181 | $ ls
182 | application.conf	logback.xml
183 | ## change the above 2 files based on your requirements.
184 | $ cd ..
185 | $ pwd
186 | <...>/dslpackage-0.0.1
187 | ```
188 | 
189 | The following settings need to be changed in `application.conf` before you can run the second instance:
190 | 
191 | 1. `dcos.kafka.statestoredir` - This is the folder where the local state information gets persisted by Kafka streams. This has to be different for every new instance set up.
192 | 2. `dcos.kafka.loader.directorytowatch` - The data folder because we would like to ingest different data for the 2 instances.
193 | 3. `dcos.http.interface` and `dcos.http.port` - The REST service endpoints. If the node is not different then it can be `localhost` for both.
194 | 
195 | ```bash
196 | $ pwd
197 | <...>/dslpackage-0.0.1
198 | $ bin/dslpackage
199 | ```
200 | 
201 | This will start the second instance. Check the log file to verify that the REST endpoints are properly started.
202 | 
203 | ### Step 4: Do query
204 | 
205 | The idea of a distributed interactive query interface is to allow the user to query for *all* keys using *any* of the end points where the REST service are running. Assume that the 2 instances are running at `localhost:7070` and `localhost:7071`. 
206 | 
207 | Here are a few examples:
208 | 
209 | ```bash
210 | ## world.std.com was loaded by the first instance of the app
211 | ## Query using the end points corresponding to the first instance gives correct result
212 | $ curl localhost:7070/weblog/access/world.std.com
213 | 14
214 | 
215 | ## we get correct result even if we query using the end points of of the second instance
216 | $ curl localhost:7071/weblog/access/world.std.com
217 | 14
218 | 
219 | ## ppp19.glas.apc.org was loaded by the second instance of the app
220 | ## Query using the end points corresponding to the first instance also gives correct result
221 | $ curl localhost:7070/weblog/access/ppp19.glas.apc.org
222 | 17
223 | ```
224 | 
225 | ### Step 5: Clean up application's Kafka Streams internal topics
226 | 
227 | When running in distributed mode, Kafka Streams event stores are backed by internal Kafka Streams topics so that state 
228 | can be restored on different instances of the app if there's a failure.  To reset to a clean state you can use the
229 | [Kafka Streams Application Reset tool](https://cwiki.apache.org/confluence/display/KAFKA/Kafka+Streams+Application+Reset+Tool)
230 | This will delete internal Kafka Streams topics associated with a specified application id.  Note that you must have 
231 | `delete.topics.enable` set to true in your Broker configuration to delete topics.
232 | 
233 | An example of run this tool:
234 | 
235 | ```
236 | $ ./kafka-streams-application-reset.sh \
237 | --application-id kstream-weblog-processing \
238 | --bootstrap-servers kafka-0-broker:9092 \
239 | --zookeeper localhost:2181
240 | No input or intermediate topics specified. Skipping seek.
241 | Deleting all internal/auto-created topics for application kstream-weblog-processing
242 | Topic kstream-weblog-processing-windowed-access-count-per-host-changelog is marked for deletion.
243 | Note: This will have no impact if delete.topic.enable is not set to true.
244 | Topic kstream-weblog-processing-windowed-payload-size-per-host-repartition is marked for deletion.
245 | Note: This will have no impact if delete.topic.enable is not set to true.
246 | Topic kstream-weblog-processing-access-count-per-host-changelog is marked for deletion.
247 | Note: This will have no impact if delete.topic.enable is not set to true.
248 | Topic kstream-weblog-processing-payload-size-per-host-repartition is marked for deletion.
249 | Note: This will have no impact if delete.topic.enable is not set to true.
250 | Topic kstream-weblog-processing-windowed-payload-size-per-host-changelog is marked for deletion.
251 | Note: This will have no impact if delete.topic.enable is not set to true.
252 | Topic kstream-weblog-processing-windowed-access-count-per-host-repartition is marked for deletion.
253 | Note: This will have no impact if delete.topic.enable is not set to true.
254 | Topic kstream-weblog-processing-payload-size-per-host-changelog is marked for deletion.
255 | Note: This will have no impact if delete.topic.enable is not set to true.
256 | Topic kstream-weblog-processing-access-count-per-host-repartition is marked for deletion.
257 | Note: This will have no impact if delete.topic.enable is not set to true.
258 | Done.
259 | ```


--------------------------------------------------------------------------------