├── .gitignore ├── .scalafmt.conf ├── build.sbt ├── project ├── Dependencies.scala ├── build.properties └── plugins.sbt └── src ├── it └── scala │ ├── KafkaSettings.scala │ ├── KeyValueStoreSpec.scala │ ├── ReadOnlyTableSpec.scala │ ├── RecordStreamSpec.scala │ └── UnitSpec.scala └── main └── scala └── com └── iravid └── fs2 └── kafka ├── EnvT.scala ├── client ├── CommitQueue.scala ├── ConsumerAlgebra.scala ├── Producer.scala ├── RecordStream.scala ├── Subscription.scala ├── commands.scala ├── package.scala └── settings.scala ├── codecs ├── KafkaDecoder.scala └── KafkaEncoder.scala ├── model └── package.scala └── streams ├── KVStore.scala ├── KVStores.scala ├── Key.scala ├── PolyKVStore.scala ├── Table.scala └── Tables.scala /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by https://www.gitignore.io/api/sbt,scala 2 | 3 | ### SBT ### 4 | # Simple Build Tool 5 | # http://www.scala-sbt.org/release/docs/Getting-Started/Directories.html#configuring-version-control 6 | 7 | dist/* 8 | target/ 9 | lib_managed/ 10 | src_managed/ 11 | project/boot/ 12 | project/plugins/project/ 13 | .history 14 | .cache 15 | .lib/ 16 | 17 | ### Scala ### 18 | *.class 19 | *.log 20 | 21 | 22 | # End of https://www.gitignore.io/api/sbt,scala -------------------------------------------------------------------------------- /.scalafmt.conf: -------------------------------------------------------------------------------- 1 | maxColumn = 100 2 | 3 | continuationIndent { 4 | callSite = 2 5 | defnSite = 2 6 | } 7 | 8 | spaces { 9 | inImportCurlyBraces = true 10 | } 11 | 12 | align { 13 | arrowEnumeratorGenerator = true 14 | openParenCallSite = false 15 | 16 | tokens = [ 17 | { code = "<-", owner = "Enumerator.Generator" } 18 | { code = "=", owner = "(Enumerator.Val|Term.Arg.Named)" } 19 | { code = "%", owner = "Term.ApplyInfix" } 20 | { code = "%%", owner = "Term.ApplyInfix" } 21 | { code = "=>", owner = "Case" } 22 | { code = "->", owner = "Term.Apply" } 23 | ] 24 | } 25 | 26 | binPack { 27 | parentConstructors = true 28 | } 29 | 30 | verticalMultilineAtDefinitionSite = false 31 | 32 | newlines { 33 | sometimesBeforeColonInMethodReturnType = true 34 | afterImplicitKWInVerticalMultiline = true 35 | } 36 | 37 | assumeStandardLibraryStripMargin = true 38 | 39 | rewrite.rules = [RedundantBraces, PreferCurlyFors, SortImports] -------------------------------------------------------------------------------- /build.sbt: -------------------------------------------------------------------------------- 1 | import Dependencies._ 2 | 3 | lazy val root = (project in file(".")) 4 | .configs(IntegrationTest) 5 | .settings(Defaults.itSettings) 6 | .settings(inConfig(IntegrationTest)(ScalafmtPlugin.scalafmtConfigSettings)) 7 | .settings( 8 | inThisBuild( 9 | List( 10 | organization := "com.iravid", 11 | scalaVersion := "2.12.6", 12 | version := "0.1.0-SNAPSHOT" 13 | )), 14 | name := "fs2-kafka-streams", 15 | autoCompilerPlugins := true, 16 | addCompilerPlugin("org.spire-math" %% "kind-projector" % "0.9.4"), 17 | addCompilerPlugin("com.lihaoyi" %% "acyclic" % "0.1.7"), 18 | addCompilerPlugin("com.olegpy" %% "better-monadic-for" % "0.2.4"), 19 | scalafmtOnCompile := true, 20 | libraryDependencies ++= Seq( 21 | "org.apache.kafka" % "kafka-clients" % "2.0.0", 22 | "org.typelevel" %% "cats-core" % "1.4.0", 23 | "org.typelevel" %% "cats-effect" % "1.0.0", 24 | "co.fs2" %% "fs2-core" % "1.0.0-RC2", 25 | "org.scodec" %% "scodec-bits" % "1.1.5", 26 | "org.scodec" %% "scodec-core" % "1.10.3", 27 | "org.rocksdb" % "rocksdbjni" % "5.13.2", 28 | "com.lihaoyi" %% "acyclic" % "0.1.7" % "provided", 29 | "org.scalatest" %% "scalatest" % "3.0.4" % "it,test", 30 | "org.scalacheck" %% "scalacheck" % "1.14.0" % "it,test" 31 | ), 32 | scalacOptions ++= Seq("-P:acyclic:force") 33 | ) 34 | -------------------------------------------------------------------------------- /project/Dependencies.scala: -------------------------------------------------------------------------------- 1 | import sbt._ 2 | 3 | object Dependencies { 4 | lazy val scalaTest = "org.scalatest" %% "scalatest" % "3.0.5" 5 | } 6 | -------------------------------------------------------------------------------- /project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=1.1.6 2 | -------------------------------------------------------------------------------- /project/plugins.sbt: -------------------------------------------------------------------------------- 1 | addSbtPlugin("com.geirsson" % "sbt-scalafmt" % "1.5.1") 2 | addSbtPlugin("io.github.davidgregory084" % "sbt-tpolecat" % "0.1.4") 3 | -------------------------------------------------------------------------------- /src/it/scala/KafkaSettings.scala: -------------------------------------------------------------------------------- 1 | package com.iravid.fs2.kafka.client 2 | 3 | import cats.effect.{ IO, Resource } 4 | import cats.implicits._ 5 | import com.iravid.fs2.kafka.codecs.{ KafkaDecoder, KafkaEncoder } 6 | import org.apache.kafka.clients.admin.{ AdminClient, AdminClientConfig, NewTopic } 7 | import org.scalacheck.{ Gen, Shrink } 8 | import org.scalatest.Suite 9 | 10 | import scala.collection.JavaConverters._ 11 | import scala.concurrent.ExecutionContext 12 | import scala.concurrent.duration._ 13 | 14 | trait KafkaSettings { self: Suite => 15 | def mkConsumerSettings(groupId: String, bufferSize: Int) = ConsumerSettings( 16 | Map( 17 | "bootstrap.servers" -> s"localhost:9092", 18 | "group.id" -> groupId, 19 | "auto.offset.reset" -> "earliest" 20 | ), 21 | 10, 22 | 10, 23 | bufferSize, 24 | bufferSize, 25 | 50.millis, 26 | 50.millis, 27 | 1.second 28 | ) 29 | 30 | def mkProducerSettings = ProducerSettings( 31 | Map( 32 | "bootstrap.servers" -> s"localhost:9092", 33 | ), 34 | 5.seconds 35 | ) 36 | 37 | implicit def decoder: KafkaDecoder[String] = 38 | KafkaDecoder.instance(rec => Right(new String(rec.value, "UTF-8"))) 39 | 40 | implicit def encoder: KafkaEncoder[String] = 41 | KafkaEncoder.instance(str => (None, KafkaEncoder.Value(str.getBytes))) 42 | 43 | def produce(settings: ProducerSettings, topic: String, data: List[(Int, String)])( 44 | implicit ec: ExecutionContext) = 45 | Producer.create[IO](settings) use { producer => 46 | implicit val cs = IO.contextShift(ec) 47 | data parTraverse { 48 | case (partition, msg) => 49 | Producer.produce[IO, String](producer, msg, topic, partition, None) 50 | } 51 | } 52 | 53 | def nonEmptyStr = Gen.nonEmptyListOf(Gen.alphaLowerChar).map(_.mkString) 54 | 55 | implicit def noShrink[T]: Shrink[T] = Shrink.shrinkAny 56 | 57 | def createCustomTopic(topic: String, 58 | topicConfig: Map[String, String] = Map.empty, 59 | partitions: Int = 1, 60 | replicationFactor: Int = 1): Resource[IO, Unit] = 61 | Resource 62 | .make( 63 | IO( 64 | AdminClient.create(Map[String, Object]( 65 | AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG -> s"localhost:9092", 66 | ).asJava)))(client => IO(client.close)) 67 | .flatMap { client => 68 | Resource.make(IO { 69 | client 70 | .createTopics(Seq(new NewTopic(topic, partitions, replicationFactor.toShort) 71 | .configs(topicConfig.asJava)).asJava) 72 | .all 73 | .get(5, SECONDS) 74 | 75 | () 76 | })(_ => 77 | IO { 78 | client.deleteTopics(Seq(topic).asJava).all.get(5, SECONDS) 79 | () 80 | }) 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /src/it/scala/KeyValueStoreSpec.scala: -------------------------------------------------------------------------------- 1 | package com.iravid.fs2.kafka.streams 2 | 3 | import cats.effect.{ IO, Resource } 4 | import cats.implicits._ 5 | import com.iravid.fs2.kafka.UnitSpec 6 | import com.iravid.fs2.kafka.client.KafkaSettings 7 | import java.io.IOException 8 | import java.nio.file.FileVisitResult 9 | import java.nio.file.attribute.BasicFileAttributes 10 | import java.nio.file.{ Files, Path, SimpleFileVisitor } 11 | import scodec._, codecs._, codecs.implicits._ 12 | 13 | case class Record(i: String, k: Int, bla: Boolean) 14 | object Record { 15 | implicit val codec: Codec[Record] = 16 | (variableSizeBytes(uint16, utf8) :: int32 :: bool).as[Record] 17 | } 18 | 19 | class KeyValueStoreSpec extends UnitSpec with KafkaSettings { 20 | val kvStores = new RocksDBKVStores[IO] 21 | val testData = Record("hello", 15, true) 22 | 23 | def tempDir: Resource[IO, Path] = 24 | Resource.make(IO(Files.createTempDirectory("rocksdbtests")))(dir => 25 | IO { 26 | Files.walkFileTree( 27 | dir, 28 | new SimpleFileVisitor[Path] { 29 | override def visitFile(file: Path, attrs: BasicFileAttributes) = { 30 | Files.delete(file) 31 | FileVisitResult.CONTINUE 32 | } 33 | 34 | override def postVisitDirectory(dir: Path, ex: IOException) = { 35 | Files.delete(dir) 36 | FileVisitResult.CONTINUE 37 | } 38 | } 39 | ) 40 | 41 | () 42 | }) 43 | 44 | "The RocksDBKeyValueStore" must { 45 | implicit val key: Key.Aux[Codec, String, Record] = Key.instance 46 | 47 | "work properly" in { 48 | val storeResource: Resource[IO, KVStore[IO, String, Record]] = for { 49 | dir <- tempDir 50 | store <- kvStores.open(dir) 51 | } yield store.monomorphize[String, Record] 52 | 53 | val (first, second, third, values) = (storeResource use { store => 54 | for { 55 | first <- store.get("hello") 56 | _ <- store.put("hello", testData) 57 | second <- store.get("hello") 58 | _ <- store.delete("hello") 59 | third <- store.get("hello") 60 | _ <- store.put("hello", testData) 61 | _ <- store.put("hello2", testData) 62 | values <- store.scan.compile.toVector 63 | } yield (first, second, third, values) 64 | }).unsafeRunSync() 65 | 66 | first shouldBe empty 67 | second.value shouldBe testData 68 | third shouldBe empty 69 | values should contain allOf ("hello" -> testData, "hello2" -> testData) 70 | } 71 | 72 | "reopen databases properly" in { 73 | val program = for { 74 | databaseDir <- tempDir 75 | firstRecord <- Resource.liftF { 76 | kvStores.open(databaseDir) use { firstStore => 77 | firstStore.get("hello") <* 78 | firstStore.put("hello", testData) 79 | } 80 | } 81 | secondRecord <- Resource.liftF { 82 | kvStores.open(databaseDir) use { secondStore => 83 | secondStore.get("hello") 84 | } 85 | } 86 | } yield (firstRecord, secondRecord) 87 | val (first, second) = program.use(IO(_)).unsafeRunSync 88 | 89 | first shouldBe empty 90 | second.value shouldBe testData 91 | } 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /src/it/scala/ReadOnlyTableSpec.scala: -------------------------------------------------------------------------------- 1 | package com.iravid.fs2.kafka.streams 2 | 3 | import cats.effect.Resource 4 | import cats.implicits._ 5 | import cats.effect.IO 6 | import com.iravid.fs2.kafka.UnitSpec 7 | import com.iravid.fs2.kafka.client._ 8 | import com.iravid.fs2.kafka.codecs.{ KafkaDecoder, KafkaEncoder } 9 | import fs2.Stream 10 | import fs2.concurrent.SignallingRef 11 | import java.nio.charset.StandardCharsets 12 | import org.scalacheck.Gen 13 | 14 | import scala.concurrent.ExecutionContext.Implicits.global 15 | import scala.concurrent.duration._ 16 | 17 | case class Customer(userId: String, name: String) 18 | object Customer { 19 | implicit val kafkaEncoder: KafkaEncoder[Customer] = 20 | KafkaEncoder.instance { customer => 21 | val key = KafkaEncoder.Key(customer.userId.getBytes(StandardCharsets.UTF_8)).some 22 | val value = 23 | KafkaEncoder.Value(s"${customer.userId},${customer.name}".getBytes(StandardCharsets.UTF_8)) 24 | 25 | (key, value) 26 | } 27 | 28 | implicit val kafkaDecoder: KafkaDecoder[Customer] = 29 | KafkaDecoder.instance { byteRecord => 30 | val Array(userId, name) = 31 | new String(byteRecord.value, StandardCharsets.UTF_8).split(",") 32 | 33 | Right(Customer(userId, name)) 34 | } 35 | } 36 | 37 | class ReadOnlyTableSpec extends UnitSpec with KafkaSettings { 38 | implicit val timer = IO.timer(global) 39 | implicit val shift = IO.contextShift(global) 40 | 41 | val userIdGen = Gen.oneOf("bob", "alice", "joe", "anyref") 42 | 43 | val customerGen = for { 44 | userId <- userIdGen 45 | name <- Gen.identifier 46 | } yield Customer(userId, name) 47 | 48 | def customersProducer(producer: ByteProducer, interrupt: SignallingRef[IO, Boolean]) = 49 | Stream 50 | .awakeEvery[IO](1.second) 51 | .evalMap(_ => IO(customerGen.sample.get)) 52 | .evalTap(customer => IO(println(s"Customer: ${customer}"))) 53 | .interruptWhen(interrupt) 54 | .evalMap(Producer.produce[IO, Customer](producer, _, "customers", 0, None)) 55 | 56 | def customersTable = { 57 | val consumerSettings = mkConsumerSettings("customers_consumer", 1000) 58 | 59 | for { 60 | consumer <- KafkaConsumer[IO](consumerSettings) 61 | recordStream <- RecordStream.plain[IO, Customer]( 62 | consumerSettings, 63 | consumer, 64 | Subscription.Topics(List("customers")), 65 | None 66 | ) 67 | table <- Resource.liftF(Tables.inMemory.plain(recordStream)(_.userId)) 68 | } yield table 69 | } 70 | 71 | def userClickStream(interrupt: SignallingRef[IO, Boolean]) = 72 | Stream 73 | .awakeEvery[IO](1.second) 74 | .evalMap(_ => IO(userIdGen.sample.get)) 75 | .interruptWhen(interrupt) 76 | 77 | def joinWith[A, K, V](stream: Stream[IO, A], table: ReadOnlyTable[IO, K, V])(key: A => K) = 78 | stream.evalMap(a => table.get(key(a)).tupleLeft(a)) 79 | 80 | def program = 81 | for { 82 | signal <- Resource.liftF(SignallingRef[IO, Boolean](false)) 83 | producer <- Producer.create[IO](mkProducerSettings) 84 | customersFiber <- Resource.liftF { 85 | customersProducer(producer, signal).compile.drain.start 86 | } 87 | table <- customersTable 88 | printerFiber <- Resource.liftF( 89 | joinWith(userClickStream(signal), table)(identity) 90 | .evalTap(pair => IO(println(s"Join: ${pair}"))) 91 | .compile 92 | .drain 93 | .start 94 | ) 95 | } yield signal 96 | 97 | "A table-based program" must { 98 | "work properly" in { 99 | val r = program use { signal => 100 | timer.sleep(10.seconds) >> 101 | signal.set(true) 102 | } 103 | 104 | r.unsafeRunSync() 105 | } 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /src/it/scala/RecordStreamSpec.scala: -------------------------------------------------------------------------------- 1 | package com.iravid.fs2.kafka.client 2 | 3 | import cats.implicits._ 4 | import cats.effect.IO 5 | import com.iravid.fs2.kafka.UnitSpec 6 | import fs2.Stream 7 | import org.scalacheck.Gen 8 | 9 | import scala.concurrent.ExecutionContext.Implicits.global 10 | import scala.concurrent.duration._ 11 | 12 | class RecordStreamIntegrationSpec extends UnitSpec with KafkaSettings { 13 | implicit val shift = IO.contextShift(global) 14 | implicit val timer = IO.timer(global) 15 | 16 | def partitionedProgram(consumerSettings: ConsumerSettings, 17 | producerSettings: ProducerSettings, 18 | topic: String, 19 | data: List[(Int, String)]) = { 20 | val recordStream = 21 | for { 22 | consumer <- KafkaConsumer[IO](consumerSettings) 23 | recordStream <- RecordStream.partitioned[IO, String]( 24 | consumerSettings, 25 | consumer, 26 | Subscription.Topics(List(topic)), 27 | None) 28 | } yield recordStream 29 | 30 | for { 31 | _ <- produce(producerSettings, topic, data) 32 | results <- recordStream use { stream => 33 | stream.records 34 | .map { 35 | case (_, stream) => stream 36 | } 37 | .parJoinUnbounded 38 | .chunkN(data.length, true) 39 | .evalMap { recs => 40 | stream.commitQueue 41 | .requestCommit(recs.foldMap(rec => 42 | CommitRequest(rec.env.topic, rec.env.partition, rec.env.offset))) 43 | .as(recs.map(_.fa)) 44 | } 45 | .flatMap(Stream.chunk(_).covary[IO]) 46 | .take(data.length.toLong) 47 | .compile 48 | .toVector 49 | .timeout(30.seconds) 50 | } 51 | } yield results 52 | } 53 | 54 | def plainProgram(consumerSettings: ConsumerSettings, 55 | producerSettings: ProducerSettings, 56 | topic: String, 57 | data: List[String]) = { 58 | val recordStream = 59 | for { 60 | consumer <- KafkaConsumer[IO](consumerSettings) 61 | recordStream <- RecordStream 62 | .plain[IO, String]( 63 | consumerSettings, 64 | consumer, 65 | Subscription.Topics(List(topic)), 66 | None) 67 | } yield recordStream 68 | 69 | for { 70 | _ <- produce(producerSettings, topic, data.tupleLeft(0)) 71 | results <- recordStream use { recordStream => 72 | recordStream.records 73 | .chunkN(data.length, true) 74 | .evalMap { records => 75 | val commitReq = 76 | records.foldMap(record => 77 | CommitRequest(record.env.topic, record.env.partition, record.env.offset)) 78 | recordStream.commitQueue 79 | .requestCommit(commitReq) 80 | .as(records.map(_.fa)) 81 | } 82 | .flatMap(Stream.chunk(_).covary[IO]) 83 | .take(data.length.toLong) 84 | .compile 85 | .toVector 86 | .timeout(30.seconds) 87 | } 88 | } yield results 89 | } 90 | 91 | "The plain consumer" should { 92 | "work properly" in { 93 | forAll((nonEmptyStr, "groupId"), (nonEmptyStr, "topic"), (Gen.listOf(Gen.alphaStr), "data")) { 94 | (groupId: String, topic: String, data: List[String]) => 95 | val consumerSettings = 96 | mkConsumerSettings(groupId, 100) 97 | val producerSettings = mkProducerSettings 98 | val results = 99 | plainProgram(consumerSettings, producerSettings, topic, data) 100 | .unsafeRunSync() 101 | 102 | results.collect { case Right(a) => a } should contain theSameElementsAs data 103 | } 104 | } 105 | 106 | "handle data lengths bigger than the buffer size" in { 107 | forAll((nonEmptyStr, "groupId"), (nonEmptyStr, "topic"), (Gen.listOf(Gen.alphaStr), "data")) { 108 | (groupId: String, topic: String, data: List[String]) => 109 | val consumerSettings = 110 | mkConsumerSettings(groupId, (data.length / 2) max 1) 111 | val producerSettings = mkProducerSettings 112 | val results = 113 | plainProgram(consumerSettings, producerSettings, topic, data) 114 | .unsafeRunSync() 115 | 116 | results.collect { case Right(a) => a } should contain theSameElementsAs data 117 | } 118 | } 119 | } 120 | 121 | "The partitioned consumer" should { 122 | "work properly" in { 123 | val dataGen = for { 124 | partitions <- Gen.chooseNum(1, 8) 125 | data <- Gen.listOf(Gen.zip(Gen.chooseNum(0, partitions - 1), Gen.alphaStr)) 126 | } yield (partitions, data) 127 | 128 | forAll((nonEmptyStr, "topic"), (nonEmptyStr, "groupId"), (dataGen, "data")) { 129 | case (topic, groupId, (partitions, data)) => 130 | val results = createCustomTopic(topic, partitions = partitions) use { _ => 131 | val consumerSettings = mkConsumerSettings(groupId, 100) 132 | val producerSettings = mkProducerSettings 133 | 134 | partitionedProgram(consumerSettings, producerSettings, topic, data) 135 | } 136 | 137 | results 138 | .unsafeRunSync() 139 | .collect { case Right(a) => a } should contain theSameElementsAs 140 | (data.map(_._2)) 141 | } 142 | } 143 | } 144 | } 145 | -------------------------------------------------------------------------------- /src/it/scala/UnitSpec.scala: -------------------------------------------------------------------------------- 1 | package com.iravid.fs2.kafka 2 | 3 | import org.scalatest.prop.GeneratorDrivenPropertyChecks 4 | import org.scalatest._ 5 | 6 | abstract class UnitSpec 7 | extends WordSpec with Matchers with GeneratorDrivenPropertyChecks with OptionValues 8 | -------------------------------------------------------------------------------- /src/main/scala/com/iravid/fs2/kafka/EnvT.scala: -------------------------------------------------------------------------------- 1 | package com.iravid.fs2.kafka 2 | 3 | import cats.{ Applicative, Eval, Traverse } 4 | 5 | case class EnvT[E, F[_], A](env: E, fa: F[A]) 6 | 7 | object EnvT { 8 | implicit def traverse[E, F[_]: Traverse]: Traverse[EnvT[E, F, ?]] = 9 | new Traverse[EnvT[E, F, ?]] { 10 | def traverse[G[_], A, B](fa: EnvT[E, F, A])(f: A => G[B])( 11 | implicit G: Applicative[G]): G[EnvT[E, F, B]] = 12 | G.map(Traverse[F].traverse(fa.fa)(f))(EnvT(fa.env, _)) 13 | 14 | def foldLeft[A, B](fa: EnvT[E, F, A], b: B)(f: (B, A) => B): B = 15 | Traverse[F].foldLeft(fa.fa, b)(f) 16 | 17 | def foldRight[A, B](fa: EnvT[E, F, A], b: Eval[B])(f: (A, Eval[B]) => Eval[B]): Eval[B] = 18 | Traverse[F].foldRight(fa.fa, b)(f) 19 | 20 | override def map[A, B](fa: EnvT[E, F, A])(f: A => B): EnvT[E, F, B] = 21 | fa.copy(fa = Traverse[F].map(fa.fa)(f)) 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/main/scala/com/iravid/fs2/kafka/client/CommitQueue.scala: -------------------------------------------------------------------------------- 1 | package com.iravid.fs2.kafka.client 2 | 3 | import cats.effect.Concurrent 4 | import cats.effect.concurrent.Deferred 5 | import cats.implicits._ 6 | import fs2.concurrent.Queue 7 | import fs2.Stream 8 | 9 | case class CommitQueue[F[_]](queue: Queue[F, (Deferred[F, Either[Throwable, Unit]], CommitRequest)], 10 | batchSize: Int) { 11 | def requestCommit(request: CommitRequest)(implicit F: Concurrent[F]): F[Unit] = 12 | for { 13 | deferred <- Deferred[F, Either[Throwable, Unit]] 14 | _ <- queue.enqueue1((deferred, request)) 15 | _ <- deferred.get.rethrow 16 | } yield () 17 | 18 | def batchedDequeue( 19 | implicit F: Concurrent[F]): Stream[F, (Deferred[F, Either[Throwable, Unit]], CommitRequest)] = 20 | Stream 21 | .repeatEval(queue.dequeueChunk1(batchSize)) 22 | .evalMap { chunk => 23 | val (defs, reqs) = chunk.toList.unzip 24 | 25 | Deferred[F, Either[Throwable, Unit]].map { deferred => 26 | (new Deferred[F, Either[Throwable, Unit]] { 27 | def get = deferred.get 28 | def complete(a: Either[Throwable, Unit]) = 29 | defs.traverse_(_.complete(a)) 30 | }, reqs.foldMap(identity)) 31 | } 32 | } 33 | } 34 | 35 | object CommitQueue { 36 | def create[F[_]: Concurrent](size: Int, batchSize: Int) = 37 | Queue 38 | .bounded[F, (Deferred[F, Either[Throwable, Unit]], CommitRequest)](size) 39 | .map(CommitQueue(_, batchSize)) 40 | } 41 | -------------------------------------------------------------------------------- /src/main/scala/com/iravid/fs2/kafka/client/ConsumerAlgebra.scala: -------------------------------------------------------------------------------- 1 | package com.iravid.fs2.kafka.client 2 | 3 | import cats.effect.{ ContextShift, ExitCase, Resource, Sync } 4 | import cats.implicits._ 5 | import cats.effect.{ ConcurrentEffect, Timer } 6 | import com.iravid.fs2.kafka.model.ByteRecord 7 | import java.util.{ Collection => JCollection, Properties } 8 | import org.apache.kafka.clients.consumer.{ ConsumerRebalanceListener, ConsumerRecords } 9 | import org.apache.kafka.common.TopicPartition 10 | import org.apache.kafka.common.errors.WakeupException 11 | import org.apache.kafka.common.serialization.ByteArrayDeserializer 12 | import java.time.{ Duration => JDuration } 13 | 14 | import scala.concurrent.duration.FiniteDuration 15 | import scala.collection.JavaConverters._ 16 | 17 | trait Consumer[F[_]] { 18 | def commit(data: OffsetMap): F[Unit] 19 | 20 | def poll(pollTimeout: FiniteDuration, 21 | wakeupTimeout: FiniteDuration): F[Map[TopicPartition, List[ByteRecord]]] 22 | 23 | def subscribe(subscription: Subscription, listener: Rebalance.Listener[F]): F[Unit] 24 | 25 | def unsubscribe: F[Unit] 26 | 27 | def pause(partitions: List[TopicPartition]): F[Unit] 28 | 29 | def resume(partitions: List[TopicPartition]): F[Unit] 30 | 31 | def seek(partition: TopicPartition, offset: Long): F[Unit] 32 | 33 | def seekToBeginning(partitions: List[TopicPartition]): F[Unit] 34 | 35 | def seekToEnd(partitions: List[TopicPartition]): F[Unit] 36 | } 37 | 38 | class KafkaConsumer[F[_]](consumer: ByteConsumer)(implicit F: ConcurrentEffect[F], timer: Timer[F]) 39 | extends Consumer[F] { 40 | def commit(data: OffsetMap): F[Unit] = 41 | F.delay(consumer.commitSync(data.asJava)) 42 | 43 | def adaptConsumerRecords( 44 | records: ConsumerRecords[Array[Byte], Array[Byte]]): F[Map[TopicPartition, List[ByteRecord]]] = 45 | F.delay { 46 | val builder = Map.newBuilder[TopicPartition, List[ByteRecord]] 47 | val partitions = records.partitions().iterator() 48 | 49 | while (partitions.hasNext()) { 50 | val partition = partitions.next() 51 | val recordList = records.records(partition).iterator() 52 | val recordsBuilder = List.newBuilder[ByteRecord] 53 | 54 | while (recordList.hasNext()) { 55 | recordsBuilder += recordList.next() 56 | } 57 | 58 | builder += partition -> recordsBuilder.result() 59 | } 60 | 61 | builder.result() 62 | } 63 | 64 | def poll(pollTimeout: FiniteDuration, 65 | wakeupTimeout: FiniteDuration): F[Map[TopicPartition, List[ByteRecord]]] = 66 | F.race( 67 | timer.sleep(wakeupTimeout), 68 | F.bracketCase(F.start(F.delay(consumer.poll(JDuration.ofMillis(pollTimeout.toMillis)))))( 69 | _.join.flatMap(adaptConsumerRecords)) { 70 | case (_, ExitCase.Completed) => F.delay(()) 71 | case (_, ExitCase.Error(_)) => F.delay(consumer.wakeup()) 72 | case (_, ExitCase.Canceled) => F.delay(consumer.wakeup()) 73 | } 74 | ) flatMap { 75 | case Left(_) => F.raiseError(new WakeupException) 76 | case Right(result) => F.pure(result) 77 | } 78 | 79 | def subscribe(subscription: Subscription, listener: Rebalance.Listener[F]): F[Unit] = { 80 | val rebalanceListener = new ConsumerRebalanceListener { 81 | def onPartitionsAssigned(jpartitions: JCollection[TopicPartition]): Unit = 82 | F.toIO(listener(Rebalance.Assign(jpartitions.asScala.toList))).unsafeRunSync 83 | def onPartitionsRevoked(jpartitions: JCollection[TopicPartition]): Unit = 84 | F.toIO(listener(Rebalance.Revoke(jpartitions.asScala.toList))).unsafeRunSync 85 | } 86 | 87 | subscription match { 88 | case Subscription.Topics(topics) => 89 | F.delay(consumer.subscribe(topics.asJava, rebalanceListener)) 90 | case Subscription.Pattern(pattern) => 91 | for { 92 | pattern <- F.delay(java.util.regex.Pattern.compile(pattern)) 93 | _ <- F.delay(consumer.subscribe(pattern, rebalanceListener)) 94 | } yield () 95 | } 96 | } 97 | 98 | def unsubscribe: F[Unit] = F.delay(consumer.unsubscribe()) 99 | 100 | def pause(partitions: List[TopicPartition]): F[Unit] = 101 | F.delay(consumer.pause(partitions.asJava)) 102 | 103 | def resume(partitions: List[TopicPartition]): F[Unit] = 104 | F.delay(consumer.resume(partitions.asJava)) 105 | 106 | def seek(partition: TopicPartition, offset: Long): F[Unit] = 107 | F.delay(consumer.seek(partition, offset)) 108 | 109 | def seekToBeginning(partitions: List[TopicPartition]): F[Unit] = 110 | F.delay(consumer.seekToBeginning(partitions.asJava)) 111 | 112 | def seekToEnd(partitions: List[TopicPartition]): F[Unit] = 113 | F.delay(consumer.seekToEnd(partitions.asJava)) 114 | } 115 | 116 | object KafkaConsumer { 117 | def consumer[F[_]](settings: Properties)(implicit F: Sync[F]) = 118 | Resource.make( 119 | F.delay(new ByteConsumer(settings, new ByteArrayDeserializer, new ByteArrayDeserializer)) 120 | )(consumer => F.delay(consumer.close())) 121 | 122 | def apply[F[_]: ConcurrentEffect: Timer: ContextShift]( 123 | settings: ConsumerSettings): Resource[F, Consumer[F]] = 124 | consumer(settings.driverProperties).map(new KafkaConsumer(_)) 125 | } 126 | -------------------------------------------------------------------------------- /src/main/scala/com/iravid/fs2/kafka/client/Producer.scala: -------------------------------------------------------------------------------- 1 | package com.iravid.fs2.kafka.client 2 | 3 | import cats.Id 4 | import cats.effect.{ Async, Resource, Sync } 5 | import com.iravid.fs2.kafka.EnvT 6 | import com.iravid.fs2.kafka.codecs.KafkaEncoder 7 | import com.iravid.fs2.kafka.model.{ ByteProducerRecord, ProducerResult } 8 | import java.util.concurrent.TimeUnit 9 | import org.apache.kafka.clients.producer.{ Callback, RecordMetadata } 10 | import org.apache.kafka.common.header.Header 11 | import org.apache.kafka.common.serialization.ByteArraySerializer 12 | 13 | import scala.collection.JavaConverters._ 14 | 15 | object Producer { 16 | def create[F[_]: Sync](settings: ProducerSettings): Resource[F, ByteProducer] = 17 | Resource.make(Sync[F].delay { 18 | new ByteProducer(settings.driverProperties, new ByteArraySerializer, new ByteArraySerializer) 19 | })(producer => 20 | Sync[F].delay(producer.close(settings.closeTimeout.toMillis, TimeUnit.MILLISECONDS))) 21 | 22 | def toProducerRecord[T: KafkaEncoder](t: T, 23 | topic: String, 24 | partition: Int, 25 | timestamp: Option[Long]): ByteProducerRecord = { 26 | val (key, value) = KafkaEncoder[T].encode(t) 27 | 28 | new ByteProducerRecord( 29 | topic, 30 | partition, 31 | timestamp.map(new java.lang.Long(_)).orNull, 32 | key.map(_.data).orNull, 33 | value.data, 34 | List.empty[Header].asJava) 35 | } 36 | 37 | def produce[F[_]: Async, T: KafkaEncoder](producer: ByteProducer, 38 | data: T, 39 | topic: String, 40 | partition: Int, 41 | timestamp: Option[Long]): F[ProducerResult[T]] = 42 | Async[F].async { cb => 43 | val record = toProducerRecord(data, topic, partition, timestamp) 44 | 45 | producer.send( 46 | record, 47 | new Callback { 48 | override def onCompletion(metadata: RecordMetadata, exception: Exception): Unit = 49 | if (exception eq null) cb(Right(EnvT[RecordMetadata, Id, T](metadata, data))) 50 | else cb(Left(exception)) 51 | } 52 | ) 53 | 54 | () 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/main/scala/com/iravid/fs2/kafka/client/RecordStream.scala: -------------------------------------------------------------------------------- 1 | package com.iravid.fs2.kafka.client 2 | 3 | import cats.{ Applicative, Apply, Functor, MonadError } 4 | import cats.effect._, cats.effect.implicits._, cats.implicits._ 5 | import cats.effect.concurrent.{ Deferred, Ref } 6 | import com.iravid.fs2.kafka.EnvT 7 | import com.iravid.fs2.kafka.codecs.KafkaDecoder 8 | import com.iravid.fs2.kafka.model.{ ByteRecord, ConsumerMessage, Result } 9 | import fs2._ 10 | import fs2.concurrent.Queue 11 | import org.apache.kafka.common.TopicPartition 12 | 13 | object RecordStream { 14 | case class Partitioned[F[_], T]( 15 | commitQueue: CommitQueue[F], 16 | records: Stream[F, (TopicPartition, Stream[F, ConsumerMessage[Result, T]])]) 17 | case class Plain[F[_], T](commitQueue: CommitQueue[F], 18 | records: Stream[F, ConsumerMessage[Result, T]]) 19 | 20 | case class InconsistentPartitionState(operation: String, 21 | partition: TopicPartition, 22 | tracker: Set[TopicPartition]) 23 | extends Exception(s"""|Inconsistent partition state while executing ${operation}! 24 | |Suspicious partition: ${partition} 25 | |Partition tracker: ${tracker}""".stripMargin) 26 | 27 | case class PartitionHandle[F[_]](recordCount: Ref[F, Int], 28 | data: Queue[F, Option[Chunk[ByteRecord]]]) { 29 | def enqueue(chunk: Chunk[ByteRecord])(implicit F: Apply[F]): F[Unit] = 30 | recordCount.update(_ + chunk.size) *> 31 | data.enqueue1(chunk.some) 32 | 33 | def complete: F[Unit] = data.enqueue1(none) 34 | 35 | def dequeue(implicit F: Functor[F]): Stream[F, ByteRecord] = 36 | data.dequeue.unNoneTerminate 37 | .evalMap { chunk => 38 | recordCount.update(_ - chunk.size).as(chunk) 39 | } 40 | .flatMap(Stream.chunk(_)) 41 | } 42 | 43 | object PartitionHandle { 44 | def create[F[_]: Concurrent]: F[PartitionHandle[F]] = 45 | for { 46 | recordCount <- Ref[F].of(0) 47 | queue <- Queue 48 | .unbounded[F, Option[Chunk[ByteRecord]]] 49 | } yield PartitionHandle(recordCount, queue) 50 | } 51 | 52 | case class Resources[F[_], T]( 53 | consumer: Consumer[F], 54 | polls: Stream[F, Poll.type], 55 | commits: CommitQueue[F], 56 | shutdownQueue: Queue[F, None.type], 57 | partitionTracker: Ref[F, Map[TopicPartition, PartitionHandle[F]]], 58 | pausedPartitions: Ref[F, Set[TopicPartition]], 59 | pendingRebalances: Ref[F, List[Rebalance]], 60 | partitionsQueue: Queue[ 61 | F, 62 | Either[Throwable, Option[(TopicPartition, Stream[F, ConsumerMessage[Result, T]])]]] 63 | ) { 64 | def commandStream(implicit F: Concurrent[F]) 65 | : Stream[F, Either[(Deferred[F, Either[Throwable, Unit]], CommitRequest), Poll.type]] = 66 | shutdownQueue.dequeue 67 | .mergeHaltL(commits.batchedDequeue.either(polls).map(_.some)) 68 | .unNoneTerminate 69 | } 70 | 71 | def applyRebalanceEvents[F[_], T: KafkaDecoder]( 72 | partitionTracker: Map[TopicPartition, PartitionHandle[F]], 73 | partitionsQueue: Queue[ 74 | F, 75 | Either[Throwable, Option[(TopicPartition, Stream[F, ConsumerMessage[Result, T]])]]], 76 | rebalances: List[Rebalance])( 77 | implicit F: Concurrent[F]): F[Map[TopicPartition, PartitionHandle[F]]] = 78 | rebalances.foldLeftM(partitionTracker) { 79 | case (tracker, Rebalance.Assign(partitions)) => 80 | partitions 81 | .traverse { tp => 82 | partitionTracker.get(tp) match { 83 | case None => 84 | for { 85 | h <- PartitionHandle.create 86 | _ <- partitionsQueue.enqueue1( 87 | (tp, h.dequeue through deserialize[F, T]).some.asRight) 88 | } yield (tp, h) 89 | case Some(_) => 90 | F.raiseError[(TopicPartition, PartitionHandle[F])]( 91 | InconsistentPartitionState("Rebalance.Assign", tp, tracker.keySet)) 92 | } 93 | } 94 | .map(tracker ++ _) 95 | 96 | case (tracker, Rebalance.Revoke(partitions)) => 97 | partitions 98 | .traverse_ { tp => 99 | partitionTracker.get(tp) match { 100 | case Some(h) => 101 | h.complete 102 | case None => 103 | F.raiseError[Unit]( 104 | InconsistentPartitionState("Rebalance.Revoke", tp, tracker.keySet)) 105 | } 106 | } 107 | .as(tracker -- partitions) 108 | } 109 | 110 | def resumePartitions[F[_]]( 111 | settings: ConsumerSettings, 112 | pausedPartitions: Set[TopicPartition], 113 | partitionTracker: Map[TopicPartition, PartitionHandle[F]] 114 | )(implicit F: Concurrent[F]): F[List[TopicPartition]] = 115 | pausedPartitions.toList.flatTraverse { tp => 116 | partitionTracker.get(tp) match { 117 | case Some(handle) => 118 | handle.recordCount.get.map { count => 119 | if (count <= settings.partitionOutputBufferSize) 120 | List(tp) 121 | else Nil 122 | } 123 | case None => 124 | F.raiseError[List[TopicPartition]]( 125 | InconsistentPartitionState("resumePartitions", tp, partitionTracker.keySet)) 126 | } 127 | } 128 | 129 | def distributeRecords[F[_]]( 130 | settings: ConsumerSettings, 131 | partitionTracker: Map[TopicPartition, PartitionHandle[F]], 132 | records: Map[TopicPartition, List[ByteRecord]])(implicit F: MonadError[F, Throwable]) = 133 | records.toList 134 | .flatTraverse { 135 | case (tp, records) => 136 | partitionTracker.get(tp) match { 137 | case Some(handle) => 138 | for { 139 | _ <- handle.enqueue(Chunk.seq(records)) 140 | recordCount <- handle.recordCount.get 141 | shouldPause = if (recordCount <= settings.partitionOutputBufferSize) 142 | Nil 143 | else 144 | List(tp) 145 | } yield shouldPause 146 | case None => 147 | F.raiseError[List[TopicPartition]]( 148 | InconsistentPartitionState("distributeRecords", tp, partitionTracker.keySet)) 149 | } 150 | } 151 | 152 | def commandHandler[F[_], T: KafkaDecoder]( 153 | resources: Resources[F, T], 154 | settings: ConsumerSettings, 155 | command: Either[(Deferred[F, Either[Throwable, Unit]], CommitRequest), Poll.type])( 156 | implicit F: Concurrent[F]): F[Unit] = 157 | command match { 158 | case Left((deferred, req)) => 159 | (resources.consumer 160 | .commit(req.offsets) 161 | .void 162 | .attempt >>= deferred.complete).void 163 | case Right(Poll) => 164 | for { 165 | resumablePartitions <- for { 166 | paused <- resources.pausedPartitions.get 167 | tracker <- resources.partitionTracker.get 168 | resumable <- resumePartitions(settings, paused, tracker) 169 | } yield resumable 170 | _ <- resources.consumer.resume(resumablePartitions) *> 171 | resources.pausedPartitions.update(_ -- resumablePartitions) 172 | 173 | records <- resources.consumer.poll(settings.pollTimeout, settings.wakeupTimeout) 174 | rebalances <- resources.pendingRebalances.getAndSet(Nil).map(_.reverse) 175 | 176 | _ <- resources.partitionTracker.get >>= 177 | (applyRebalanceEvents(_, resources.partitionsQueue, rebalances)) >>= 178 | (resources.partitionTracker.set(_)) 179 | 180 | partitionsToPause <- for { 181 | tracker <- resources.partitionTracker.get 182 | partitionsToPause <- distributeRecords(settings, tracker, records) 183 | } yield partitionsToPause 184 | _ <- resources.consumer.pause(partitionsToPause) *> 185 | resources.pausedPartitions.update(_ ++ partitionsToPause) 186 | } yield () 187 | } 188 | 189 | def pollingLoop[F[_], T: KafkaDecoder](resources: Resources[F, T], settings: ConsumerSettings)( 190 | implicit F: Concurrent[F]) = 191 | resources.commandStream 192 | .evalMap(commandHandler(resources, settings, _)) 193 | 194 | def recoverOffsets[F[_]](rebalance: Rebalance, recoveryFn: TopicPartition => F[Long])( 195 | implicit F: Applicative[F]): F[List[(TopicPartition, Long)]] = 196 | rebalance match { 197 | case Rebalance.Revoke(_) => F.pure(List()) 198 | case Rebalance.Assign(tps) => 199 | tps.traverse(tp => recoveryFn(tp).tupleLeft(tp)) 200 | } 201 | 202 | def partitioned[F[_], T: KafkaDecoder]( 203 | settings: ConsumerSettings, 204 | consumer: Consumer[F], 205 | subscription: Subscription, 206 | offsetRecoveryFn: Option[TopicPartition => F[Long]] 207 | )(implicit F: ConcurrentEffect[F], timer: Timer[F]): Resource[F, Partitioned[F, T]] = 208 | for { 209 | pendingRebalances <- Resource.liftF(Ref[F].of(List[Rebalance]())) 210 | rebalanceListener: Rebalance.Listener[F] = rebalance => 211 | for { 212 | _ <- offsetRecoveryFn.traverse { fn => 213 | for { 214 | offsets <- recoverOffsets(rebalance, fn) 215 | _ <- offsets.traverse_(tp => consumer.seek(tp._1, tp._2)) 216 | } yield () 217 | } 218 | _ <- pendingRebalances.update(rebalance :: _) 219 | } yield () 220 | 221 | _ <- Resource.make(consumer.subscribe(subscription, rebalanceListener))(_ => 222 | consumer.unsubscribe) 223 | 224 | resources <- Resource.liftF { 225 | for { 226 | partitionTracker <- Ref[F].of(Map.empty[TopicPartition, PartitionHandle[F]]) 227 | partitionsQueue <- Queue.unbounded[ 228 | F, 229 | Either[Throwable, 230 | Option[(TopicPartition, 231 | Stream[F, ConsumerMessage[Result, T]])]]] 232 | pausedPartitions <- Ref[F].of(Set.empty[TopicPartition]) 233 | commitQueue <- CommitQueue.create[F]( 234 | settings.maxPendingCommits, 235 | settings.commitBatchSize 236 | ) 237 | shutdownQueue <- Queue.bounded[F, None.type](1) 238 | polls = Stream(Poll) ++ Stream.fixedRate(settings.pollInterval).as(Poll) 239 | 240 | } yield 241 | Resources( 242 | consumer, 243 | polls, 244 | commitQueue, 245 | shutdownQueue, 246 | partitionTracker, 247 | pausedPartitions, 248 | pendingRebalances, 249 | partitionsQueue) 250 | } 251 | 252 | partitionsOut = resources.partitionsQueue.dequeue.rethrow.unNoneTerminate 253 | 254 | _ <- Resource.make { 255 | pollingLoop(resources, settings).compile.drain 256 | .handleErrorWith(e => resources.partitionsQueue.enqueue1(e.asLeft)) 257 | .start 258 | }(fiber => resources.shutdownQueue.enqueue1(None) *> fiber.join) 259 | 260 | } yield Partitioned(resources.commits, partitionsOut) 261 | 262 | def plain[F[_]: ConcurrentEffect: Timer, T: KafkaDecoder]( 263 | settings: ConsumerSettings, 264 | consumer: Consumer[F], 265 | subscription: Subscription, 266 | offsetRecoveryFn: Option[TopicPartition => F[Long]]): Resource[F, Plain[F, T]] = 267 | partitioned[F, T](settings, consumer, subscription, offsetRecoveryFn).map { 268 | case Partitioned(commitQueue, records) => 269 | Plain( 270 | commitQueue, 271 | records.map { 272 | case (_, stream) => stream 273 | }.parJoinUnbounded 274 | ) 275 | } 276 | 277 | def deserialize[F[_], T: KafkaDecoder]: Pipe[F, ByteRecord, ConsumerMessage[Result, T]] = 278 | _.map(rec => EnvT(rec, KafkaDecoder[T].decode(rec))) 279 | } 280 | -------------------------------------------------------------------------------- /src/main/scala/com/iravid/fs2/kafka/client/Subscription.scala: -------------------------------------------------------------------------------- 1 | package com.iravid.fs2.kafka.client 2 | 3 | sealed trait Subscription 4 | object Subscription { 5 | case class Topics(topics: List[String]) extends Subscription 6 | case class Pattern(pattern: String) extends Subscription 7 | } 8 | -------------------------------------------------------------------------------- /src/main/scala/com/iravid/fs2/kafka/client/commands.scala: -------------------------------------------------------------------------------- 1 | package com.iravid.fs2.kafka.client 2 | 3 | import cats.kernel.{ Monoid, Semigroup } 4 | import cats.implicits._ 5 | import org.apache.kafka.clients.consumer.OffsetAndMetadata 6 | import org.apache.kafka.common.TopicPartition 7 | 8 | case class CommitRequest(offsets: OffsetMap) 9 | object CommitRequest { 10 | def apply(topic: String, partition: Int, offset: Long): CommitRequest = 11 | CommitRequest(Map(new TopicPartition(topic, partition) -> new OffsetAndMetadata(offset))) 12 | 13 | implicit val offsetAndMetadataSemigroup: Semigroup[OffsetAndMetadata] = 14 | Semigroup.instance { (x, y) => 15 | if (x.offset >= y.offset) x 16 | else y 17 | } 18 | 19 | implicit val commitRequestMonoid: Monoid[CommitRequest] = 20 | new Monoid[CommitRequest] { 21 | def empty: CommitRequest = CommitRequest(Map.empty) 22 | def combine(x: CommitRequest, y: CommitRequest): CommitRequest = 23 | CommitRequest(x.offsets |+| y.offsets) 24 | } 25 | } 26 | 27 | case object Poll 28 | 29 | sealed trait Rebalance 30 | object Rebalance { 31 | type Listener[F[_]] = Rebalance => F[Unit] 32 | 33 | case class Assign(partitions: List[TopicPartition]) extends Rebalance 34 | case class Revoke(partitions: List[TopicPartition]) extends Rebalance 35 | } 36 | -------------------------------------------------------------------------------- /src/main/scala/com/iravid/fs2/kafka/client/package.scala: -------------------------------------------------------------------------------- 1 | package com.iravid.fs2.kafka 2 | 3 | import java.util.{ Map => JMap } 4 | import org.apache.kafka.clients.consumer.{ KafkaConsumer => JKafkaConsumer, OffsetAndMetadata } 5 | import org.apache.kafka.clients.producer.{ KafkaProducer => JKafkaProducer } 6 | import org.apache.kafka.common.TopicPartition 7 | 8 | package object client { 9 | type ByteConsumer = JKafkaConsumer[Array[Byte], Array[Byte]] 10 | 11 | type ByteProducer = JKafkaProducer[Array[Byte], Array[Byte]] 12 | 13 | type OffsetMap = Map[TopicPartition, OffsetAndMetadata] 14 | type JOffsetMap = JMap[TopicPartition, OffsetAndMetadata] 15 | } 16 | -------------------------------------------------------------------------------- /src/main/scala/com/iravid/fs2/kafka/client/settings.scala: -------------------------------------------------------------------------------- 1 | package com.iravid.fs2.kafka.client 2 | 3 | import scala.concurrent.duration.FiniteDuration 4 | import java.util.Properties 5 | 6 | import scala.collection.JavaConverters._ 7 | 8 | case class ConsumerSettings(driverSettings: Map[String, String], 9 | maxPendingCommits: Int, 10 | commitBatchSize: Int, 11 | outputBufferSize: Int, 12 | partitionOutputBufferSize: Int, 13 | pollTimeout: FiniteDuration, 14 | pollInterval: FiniteDuration, 15 | wakeupTimeout: FiniteDuration) { 16 | def driverProperties: Properties = { 17 | val props = new java.util.Properties() 18 | props.putAll(driverSettings.asJava) 19 | props 20 | } 21 | } 22 | 23 | case class ProducerSettings(driverSettings: Map[String, String], closeTimeout: FiniteDuration) { 24 | def driverProperties: Properties = { 25 | val props = new java.util.Properties() 26 | props.putAll(driverSettings.asJava) 27 | props 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /src/main/scala/com/iravid/fs2/kafka/codecs/KafkaDecoder.scala: -------------------------------------------------------------------------------- 1 | package com.iravid.fs2.kafka.codecs 2 | 3 | import com.iravid.fs2.kafka.model.{ ByteRecord, Result } 4 | import cats.MonadError 5 | import scala.annotation.tailrec 6 | 7 | object KafkaDecoder { 8 | def apply[T: KafkaDecoder]: KafkaDecoder[T] = implicitly 9 | 10 | def instance[T](f: ByteRecord => Result[T]): KafkaDecoder[T] = 11 | new KafkaDecoder[T] { 12 | def decode(record: ByteRecord) = f(record) 13 | } 14 | 15 | implicit val monad: MonadError[KafkaDecoder, Throwable] = 16 | new MonadError[KafkaDecoder, Throwable] { 17 | def pure[A](a: A): KafkaDecoder[A] = instance(_ => Right(a)) 18 | 19 | def handleErrorWith[A](fa: KafkaDecoder[A])(f: Throwable => KafkaDecoder[A]) = 20 | instance { record => 21 | fa.decode(record).fold(e => f(e).decode(record), Right(_)) 22 | } 23 | 24 | def raiseError[A](e: Throwable): KafkaDecoder[A] = instance(_ => Left(e)) 25 | 26 | def flatMap[A, B](fa: KafkaDecoder[A])(f: A => KafkaDecoder[B]): KafkaDecoder[B] = 27 | instance { record => 28 | fa.decode(record).flatMap(f(_).decode(record)) 29 | } 30 | 31 | def tailRecM[A, B](a: A)(f: A => KafkaDecoder[Either[A, B]]): KafkaDecoder[B] = { 32 | @tailrec 33 | def go(data: ByteRecord, a: A): Result[B] = 34 | f(a).decode(data) match { 35 | case Right(Left(a)) => go(data, a) 36 | case Right(Right(b)) => Right(b) 37 | case Left(e) => Left(e) 38 | } 39 | 40 | instance { record => 41 | go(record, a) 42 | } 43 | } 44 | } 45 | 46 | implicit val decoderForByteRecord: KafkaDecoder[ByteRecord] = 47 | instance(Right(_)) 48 | } 49 | 50 | trait KafkaDecoder[T] { 51 | def decode(record: ByteRecord): Result[T] 52 | } 53 | -------------------------------------------------------------------------------- /src/main/scala/com/iravid/fs2/kafka/codecs/KafkaEncoder.scala: -------------------------------------------------------------------------------- 1 | package com.iravid.fs2.kafka.codecs 2 | 3 | import cats.Contravariant 4 | 5 | object KafkaEncoder { 6 | def apply[T: KafkaEncoder]: KafkaEncoder[T] = implicitly 7 | 8 | def instance[T](f: T => (Option[Key], Value)): KafkaEncoder[T] = 9 | new KafkaEncoder[T] { 10 | def encode(t: T) = f(t) 11 | } 12 | 13 | implicit val contravariant: Contravariant[KafkaEncoder] = new Contravariant[KafkaEncoder] { 14 | def contramap[A, B](fa: KafkaEncoder[A])(f: B => A): KafkaEncoder[B] = 15 | instance { b => 16 | fa.encode(f(b)) 17 | } 18 | } 19 | 20 | case class Key(data: Array[Byte]) extends AnyVal 21 | case class Value(data: Array[Byte]) extends AnyVal 22 | } 23 | 24 | trait KafkaEncoder[T] { 25 | import KafkaEncoder._ 26 | 27 | def encode(t: T): (Option[Key], Value) 28 | } 29 | -------------------------------------------------------------------------------- /src/main/scala/com/iravid/fs2/kafka/model/package.scala: -------------------------------------------------------------------------------- 1 | package com.iravid.fs2.kafka 2 | 3 | import cats.Id 4 | import org.apache.kafka.clients.consumer.ConsumerRecord 5 | import org.apache.kafka.clients.producer.{ ProducerRecord, RecordMetadata } 6 | 7 | package object model { 8 | type ByteRecord = ConsumerRecord[Array[Byte], Array[Byte]] 9 | 10 | type ByteProducerRecord = ProducerRecord[Array[Byte], Array[Byte]] 11 | 12 | type ConsumerMessage[F[_], A] = EnvT[ByteRecord, F, A] 13 | 14 | type ProducerResult[A] = EnvT[RecordMetadata, Id, A] 15 | 16 | type Result[A] = Either[Throwable, A] 17 | } 18 | -------------------------------------------------------------------------------- /src/main/scala/com/iravid/fs2/kafka/streams/KVStore.scala: -------------------------------------------------------------------------------- 1 | package com.iravid.fs2.kafka.streams 2 | 3 | import fs2.Stream 4 | 5 | trait KVStore[F[_], K, V] { 6 | type ColumnFamilyHandle 7 | 8 | def columnFamilies: F[List[ColumnFamilyHandle]] 9 | def createColumnFamily(name: String): F[ColumnFamilyHandle] 10 | def getColumnFamily(name: String): F[Option[ColumnFamilyHandle]] 11 | def dropColumnFamily(handle: ColumnFamilyHandle): F[Unit] 12 | 13 | def get(k: K): F[Option[V]] 14 | def get(columnFamily: ColumnFamilyHandle, k: K): F[Option[V]] 15 | 16 | def getAll(ks: List[K]): F[Map[K, V]] 17 | def getAll(columnFamily: ColumnFamilyHandle, ks: List[K]): F[Map[K, V]] 18 | 19 | def put(k: K, v: V): F[Unit] 20 | def put(columnFamily: ColumnFamilyHandle, k: K, v: V): F[Unit] 21 | 22 | def delete(k: K): F[Unit] 23 | def delete(columnFamily: ColumnFamilyHandle, k: K): F[Unit] 24 | 25 | def scan: Stream[F, (K, V)] 26 | def scan(columnFamily: ColumnFamilyHandle): Stream[F, (K, V)] 27 | } 28 | -------------------------------------------------------------------------------- /src/main/scala/com/iravid/fs2/kafka/streams/KVStores.scala: -------------------------------------------------------------------------------- 1 | package com.iravid.fs2.kafka.streams 2 | 3 | import cats.effect.concurrent.Ref 4 | import cats.effect.{ Resource, Sync } 5 | import cats.implicits._ 6 | import java.nio.file.Path 7 | import org.rocksdb._ 8 | import scodec.Codec 9 | 10 | import scala.collection.JavaConverters._ 11 | 12 | trait KVStores[C[_], P, F[_]] { 13 | def open(storeKey: P): Resource[F, PolyKVStore[F, C]] 14 | } 15 | 16 | class RocksDBKVStores[F[_]](implicit F: Sync[F]) extends KVStores[Codec, Path, F] { 17 | def listColumnFamilies(storeKey: Path): F[List[ColumnFamilyDescriptor]] = 18 | F.delay( 19 | RocksDB 20 | .listColumnFamilies(new Options(), storeKey.toAbsolutePath.toString) 21 | .asScala 22 | .toList 23 | .map(new ColumnFamilyDescriptor(_)) 24 | ) 25 | 26 | def open(storeKey: Path): Resource[F, PolyKVStore[F, Codec]] = 27 | Resource 28 | .make { 29 | for { 30 | cfDescs <- listColumnFamilies(storeKey) 31 | (store, handles) <- F.delay { 32 | val handles = new java.util.ArrayList[ColumnFamilyHandle]() 33 | val path = storeKey.toAbsolutePath.toString 34 | val store = 35 | if (cfDescs.nonEmpty) 36 | RocksDB.open(path, cfDescs.asJava, handles) 37 | else 38 | RocksDB.open( 39 | new DBOptions() 40 | .setCreateIfMissing(true) 41 | .setCreateMissingColumnFamilies(true), 42 | path, 43 | List(new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY)).asJava, 44 | handles 45 | ) 46 | 47 | (store, handles.asScala.toList) 48 | } 49 | 50 | (default, rest) <- { 51 | val (before, after) = 52 | handles.span(h => !java.util.Arrays.equals(h.getName, RocksDB.DEFAULT_COLUMN_FAMILY)) 53 | val default = after.headOption 54 | val rest = before ++ after.drop(1) 55 | 56 | default match { 57 | case Some(d) => F.pure((d, rest)) 58 | case None => 59 | F.raiseError[(ColumnFamilyHandle, List[ColumnFamilyHandle])]( 60 | new Exception("Could not locate default column family!")) 61 | } 62 | } 63 | 64 | handlesRef <- Ref[F].of(rest.map(h => h.getID -> h).toMap) 65 | } yield (store, handlesRef, default) 66 | } { 67 | case (rocksdb, columnFamilyHandles, defaultColumnFamily) => 68 | for { 69 | handles <- columnFamilyHandles.get 70 | _ <- handles.values.toList.traverse_ { handle => 71 | F.delay(handle.close()) 72 | } 73 | _ <- F.delay(defaultColumnFamily.close()) 74 | _ <- F.delay(rocksdb.close()) 75 | } yield () 76 | } 77 | .map { 78 | case (rocksdb, columnFamilyHandles, defaultColumnFamily) => 79 | new RocksDBPolyKVStore( 80 | rocksdb, 81 | columnFamilyHandles, 82 | defaultColumnFamily 83 | ) 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /src/main/scala/com/iravid/fs2/kafka/streams/Key.scala: -------------------------------------------------------------------------------- 1 | package com.iravid.fs2.kafka.streams 2 | 3 | trait Key[C[_], K] { 4 | type Value 5 | def KeyTC: C[K] 6 | def ValueTC: C[Value] 7 | } 8 | 9 | object Key { 10 | type Aux[C[_], K, V] = Key[C, K] { type Value = V } 11 | 12 | def instance[C[_], K: C, V: C]: Aux[C, K, V] = 13 | new Key[C, K] { 14 | type Value = V 15 | def KeyTC: C[K] = implicitly 16 | def ValueTC: C[V] = implicitly 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /src/main/scala/com/iravid/fs2/kafka/streams/PolyKVStore.scala: -------------------------------------------------------------------------------- 1 | package com.iravid.fs2.kafka.streams 2 | 3 | import cats.effect.Sync 4 | import cats.effect.concurrent.Ref 5 | import cats.implicits._ 6 | import fs2.Stream 7 | import java.nio.charset.StandardCharsets 8 | import org.rocksdb.{ 9 | ColumnFamilyDescriptor, 10 | ColumnFamilyHandle => RocksDBColFHandle, 11 | RocksDB, 12 | WriteBatch, 13 | WriteOptions 14 | } 15 | import scodec.Codec 16 | import scodec.bits.BitVector 17 | 18 | import scala.collection.JavaConverters._ 19 | 20 | trait PolyKVStore[F[_], C[_]] { self => 21 | type ColumnFamilyHandle 22 | 23 | def columnFamilies: F[List[ColumnFamilyHandle]] 24 | def createColumnFamily(name: String): F[ColumnFamilyHandle] 25 | def getColumnFamily(name: String): F[Option[ColumnFamilyHandle]] 26 | def dropColumnFamily(handle: ColumnFamilyHandle): F[Unit] 27 | 28 | def get[K](k: K)(implicit K: Key[C, K]): F[Option[K.Value]] 29 | def get[K](columnFamily: ColumnFamilyHandle, k: K)(implicit K: Key[C, K]): F[Option[K.Value]] 30 | 31 | def getAll[K](ks: List[K])(implicit K: Key[C, K]): F[Map[K, K.Value]] 32 | def getAll[K](columnFamily: ColumnFamilyHandle, ks: List[K])( 33 | implicit K: Key[C, K]): F[Map[K, K.Value]] 34 | 35 | def put[K, V](k: K, v: V)(implicit K: Key.Aux[C, K, V]): F[Unit] 36 | def put[K, V](columnFamily: ColumnFamilyHandle, k: K, v: V)(implicit K: Key.Aux[C, K, V]): F[Unit] 37 | 38 | def putAll[K, V](data: List[(K, V)])(implicit K: Key.Aux[C, K, V]): F[Unit] 39 | def putAll[K, V](columnFamily: ColumnFamilyHandle, data: List[(K, V)])( 40 | implicit K: Key.Aux[C, K, V]): F[Unit] 41 | 42 | def delete[K](k: K)(implicit K: Key[C, K]): F[Unit] 43 | def delete[K](columnFamily: ColumnFamilyHandle, k: K)(implicit K: Key[C, K]): F[Unit] 44 | 45 | def scan[K, V](implicit K: Key.Aux[C, K, V]): Stream[F, (K, V)] 46 | def scan[K, V](columnFamily: ColumnFamilyHandle)(implicit K: Key.Aux[C, K, V]): Stream[F, (K, V)] 47 | 48 | def monomorphize[K, V](implicit K: Key.Aux[C, K, V]) 49 | : KVStore[F, K, V] { type ColumnFamilyHandle = self.ColumnFamilyHandle } = 50 | new KVStore[F, K, V] { 51 | type ColumnFamilyHandle = self.ColumnFamilyHandle 52 | 53 | def columnFamilies = self.columnFamilies 54 | def createColumnFamily(name: String) = self.createColumnFamily(name) 55 | def getColumnFamily(name: String) = self.getColumnFamily(name) 56 | def dropColumnFamily(handle: ColumnFamilyHandle) = self.dropColumnFamily(handle) 57 | 58 | def get(k: K) = self.get(k) 59 | def get(cf: ColumnFamilyHandle, k: K) = self.get(cf, k) 60 | 61 | def getAll(ks: List[K]) = self.getAll(ks) 62 | def getAll(cf: ColumnFamilyHandle, ks: List[K]) = self.getAll(cf, ks) 63 | 64 | def put(k: K, v: V) = self.put(k, v) 65 | def put(cf: ColumnFamilyHandle, k: K, v: V) = self.put(cf, k, v) 66 | 67 | def delete(k: K) = self.delete(k) 68 | def delete(cf: ColumnFamilyHandle, k: K) = self.delete(cf, k) 69 | 70 | def scan = self.scan 71 | def scan(cf: ColumnFamilyHandle) = self.scan(cf) 72 | } 73 | } 74 | 75 | object PolyKVStore { 76 | type Aux[F[_], C[_], CF] = PolyKVStore[F, C] { type ColumnFamilyHandle = CF } 77 | } 78 | 79 | class RocksDBPolyKVStore[F[_]](rocksdb: RocksDB, 80 | rocksDbColumnFamilies: Ref[F, Map[Int, RocksDBColFHandle]], 81 | defaultColumnFamily: RocksDBColFHandle)(implicit F: Sync[F]) 82 | extends PolyKVStore[F, Codec] { 83 | type ColumnFamilyHandle = Int 84 | 85 | def columnFamilies: F[List[ColumnFamilyHandle]] = 86 | rocksDbColumnFamilies.get.map(_.values.map(_.getID).toList) 87 | def createColumnFamily(name: String) = 88 | F.delay( 89 | rocksdb.createColumnFamily( 90 | new ColumnFamilyDescriptor(name.getBytes(StandardCharsets.UTF_8)))) 91 | .flatMap { handle => 92 | rocksDbColumnFamilies.update(_ + (handle.getID -> handle)).as(handle.getID) 93 | } 94 | def getColumnFamily(name: String) = 95 | rocksDbColumnFamilies.get.map { cfMap => 96 | cfMap 97 | .find { 98 | case (_, handle) => 99 | new String(handle.getName, StandardCharsets.UTF_8) == name 100 | } 101 | .map(_._1) 102 | } 103 | def dropColumnFamily(handle: ColumnFamilyHandle): F[Unit] = 104 | rocksDbColumnFamilies.get 105 | .flatMap(_.get(handle).traverse_(h => F.delay(rocksdb.dropColumnFamily(h)))) *> 106 | rocksDbColumnFamilies.update(_ - handle) 107 | 108 | def get0[K](h: RocksDBColFHandle, k: K)(implicit K: Key[Codec, K]) = 109 | F.delay { 110 | Option(rocksdb.get(h, K.KeyTC.encode(k).require.toByteArray)) 111 | .map(bytes => K.ValueTC.decodeValue(BitVector.view(bytes)).require) 112 | } 113 | 114 | def get[K](k: K)(implicit K: Key[Codec, K]): F[Option[K.Value]] = get0(defaultColumnFamily, k) 115 | def get[K](columnFamily: ColumnFamilyHandle, k: K)( 116 | implicit K: Key[Codec, K]): F[Option[K.Value]] = 117 | rocksDbColumnFamilies.get 118 | .flatMap( 119 | _.get(columnFamily) 120 | .flatTraverse(get0(_, k))) 121 | 122 | def getAll0[K](h: RocksDBColFHandle, ks: List[K])(implicit K: Key[Codec, K]): F[Map[K, K.Value]] = 123 | F.delay { 124 | val serializedKeys = ks.map(K.KeyTC.encode(_).require.toByteArray) 125 | val result = 126 | rocksdb.multiGet(List.fill(ks.size)(h).asJava, serializedKeys.asJava).asScala.toMap 127 | 128 | result.map { 129 | case (k, v) => 130 | K.KeyTC.decodeValue(BitVector.view(k)).require -> 131 | K.ValueTC.decodeValue(BitVector.view(v)).require 132 | } 133 | } 134 | def getAll[K: Key[Codec, ?]](ks: List[K]) = getAll0(defaultColumnFamily, ks) 135 | def getAll[K](columnFamily: ColumnFamilyHandle, ks: List[K])( 136 | implicit K: Key[Codec, K]): F[Map[K, K.Value]] = 137 | rocksDbColumnFamilies.get.flatMap(_.get(columnFamily) match { 138 | case Some(h) => getAll0(h, ks) 139 | case None => F.pure(Map()) 140 | }) 141 | 142 | def delete0[K](h: RocksDBColFHandle, k: K)(implicit K: Key[Codec, K]) = F.delay { 143 | rocksdb.delete(h, K.KeyTC.encode(k).require.toByteArray) 144 | } 145 | def delete[K: Key[Codec, ?]](k: K) = delete0(defaultColumnFamily, k) 146 | def delete[K: Key[Codec, ?]](columnFamily: ColumnFamilyHandle, k: K) = 147 | rocksDbColumnFamilies.get.flatMap( 148 | _.get(columnFamily) 149 | .traverse_(delete0(_, k))) 150 | 151 | def put0[K, V](h: RocksDBColFHandle, k: K, v: V)(implicit K: Key.Aux[Codec, K, V]) = 152 | F.delay { 153 | rocksdb.put( 154 | h, 155 | K.KeyTC.encode(k).require.toByteArray, 156 | K.ValueTC.encode(v).require.toByteArray 157 | ) 158 | } 159 | def put[K, V](k: K, v: V)(implicit K: Key.Aux[Codec, K, V]) = put0(defaultColumnFamily, k, v) 160 | def put[K, V](columnFamily: ColumnFamilyHandle, k: K, v: V)(implicit K: Key.Aux[Codec, K, V]) = 161 | rocksDbColumnFamilies.get.flatMap( 162 | _.get(columnFamily) 163 | .traverse_(put0(_, k, v))) 164 | 165 | def putAll0[K, V](h: RocksDBColFHandle, data: List[(K, V)])(implicit K: Key.Aux[Codec, K, V]) = 166 | F.bracket(F.delay(new WriteBatch())) { writeBatch => 167 | F.bracket(F.delay(new WriteOptions())) { writeOptions => 168 | F.delay { 169 | data.foreach { kv => 170 | writeBatch.put( 171 | h, 172 | K.KeyTC.encode(kv._1).require.toByteArray, 173 | K.ValueTC.encode(kv._2).require.toByteArray 174 | ) 175 | } 176 | 177 | rocksdb.write(writeOptions, writeBatch) 178 | } 179 | }(wo => F.delay(wo.close())) 180 | }(wb => F.delay(wb.close())) 181 | 182 | def putAll[K, V](data: List[(K, V)])(implicit K: Key.Aux[Codec, K, V]): F[Unit] = 183 | putAll0(defaultColumnFamily, data) 184 | def putAll[K, V](columnFamily: ColumnFamilyHandle, data: List[(K, V)])( 185 | implicit K: Key.Aux[Codec, K, V]): F[Unit] = 186 | rocksDbColumnFamilies.get.flatMap(_.get(columnFamily).traverse_(putAll0(_, data))) 187 | 188 | def scan0[K, V](h: RocksDBColFHandle)(implicit K: Key.Aux[Codec, K, V]): Stream[F, (K, V)] = 189 | Stream 190 | .bracket { 191 | F.delay { 192 | val iterator = rocksdb.newIterator(h) 193 | iterator.seekToFirst() 194 | iterator 195 | } 196 | }(iterator => F.delay(iterator.close())) 197 | .flatMap { iterator => 198 | Stream.repeatEval { 199 | F.delay { 200 | if (iterator.isValid()) { 201 | val key = K.KeyTC.decodeValue(BitVector.view(iterator.key())).require 202 | val value = K.ValueTC.decodeValue(BitVector.view(iterator.value())).require 203 | 204 | iterator.next() 205 | 206 | Some((key, value)) 207 | } else None 208 | } 209 | }.unNoneTerminate 210 | } 211 | 212 | def scan[K, V](implicit K: Key.Aux[Codec, K, V]): Stream[F, (K, V)] = scan0(defaultColumnFamily) 213 | def scan[K, V](columnFamily: ColumnFamilyHandle)( 214 | implicit K: Key.Aux[Codec, K, V]): Stream[F, (K, V)] = 215 | Stream.eval(rocksDbColumnFamilies.get.map(_.get(columnFamily))) flatMap { 216 | case None => Stream.empty 217 | case Some(h) => scan0(h) 218 | } 219 | } 220 | -------------------------------------------------------------------------------- /src/main/scala/com/iravid/fs2/kafka/streams/Table.scala: -------------------------------------------------------------------------------- 1 | package com.iravid.fs2.kafka.streams 2 | 3 | import cats.{ Functor, Monad } 4 | import cats.effect.Sync 5 | import cats.implicits._ 6 | import cats.effect.concurrent.Ref 7 | import fs2.Stream 8 | import scodec.{ Attempt, Codec, Err } 9 | 10 | trait ReadOnlyTable[F[_], K, V] { 11 | def get(k: K): F[Option[V]] 12 | 13 | def getAll(ks: List[K]): F[Map[K, Option[V]]] 14 | 15 | def scan: Stream[F, (K, V)] 16 | } 17 | 18 | trait Table[F[_], K, V] { table => 19 | def put(k: K, v: V): F[Unit] 20 | 21 | def putAll(data: List[(K, V)]): F[Unit] 22 | 23 | def get(k: K): F[Option[V]] 24 | 25 | def getAll(ks: List[K]): F[Map[K, Option[V]]] 26 | 27 | def commit(offset: Long): F[Unit] 28 | 29 | def lastCommittedOffset: F[Long] 30 | 31 | def delete(k: K): F[Unit] 32 | 33 | def scan: Stream[F, (K, V)] 34 | 35 | def view: ReadOnlyTable[F, K, V] = 36 | new ReadOnlyTable[F, K, V] { 37 | def get(k: K): F[Option[V]] = table.get(k) 38 | def getAll(ks: List[K]): F[Map[K, Option[V]]] = table.getAll(ks) 39 | def scan: Stream[F, (K, V)] = table.scan 40 | } 41 | } 42 | 43 | object InMemoryTable { 44 | case class State[K, V](data: Map[K, V], offset: Long) 45 | 46 | def create[F[_]: Functor: Sync, K, V]: F[InMemoryTable[F, K, V]] = 47 | Ref[F].of(State(Map.empty[K, V], 0L)).map(new InMemoryTable(_)) 48 | } 49 | 50 | class InMemoryTable[F[_]: Functor, K, V](ref: Ref[F, InMemoryTable.State[K, V]]) 51 | extends Table[F, K, V] { 52 | def put(k: K, v: V): F[Unit] = 53 | ref.update(state => state.copy(data = state.data + (k -> v))) 54 | 55 | def putAll(data: List[(K, V)]): F[Unit] = 56 | ref.update(state => state.copy(data = state.data ++ data)) 57 | 58 | def get(k: K): F[Option[V]] = 59 | ref.get.map(_.data.get(k)) 60 | 61 | def getAll(k: List[K]): F[Map[K, Option[V]]] = 62 | ref.get.map(state => k.fproduct(state.data.get).toMap) 63 | 64 | def commit(offset: Long): F[Unit] = 65 | ref.update(state => state.copy(offset = offset)) 66 | 67 | def lastCommittedOffset: F[Long] = 68 | ref.get.map(_.offset) 69 | 70 | def delete(k: K): F[Unit] = 71 | ref.update(state => state.copy(data = state.data - k)) 72 | 73 | def scan: Stream[F, (K, V)] = 74 | Stream.eval(ref.get).flatMap(state => Stream.emits(state.data.toList)) 75 | } 76 | 77 | object RocksDBTable { 78 | case object CommitKey { 79 | import scodec.codecs._ 80 | 81 | val value = "CommitKey" 82 | 83 | implicit val codec = utf8_32.exmap[CommitKey]( 84 | { 85 | case `value` => Attempt.successful(CommitKey) 86 | case other => Attempt.failure(Err.General(s"Bad value for CommitKey: ${other}", Nil)) 87 | }, 88 | _ => Attempt.successful(value) 89 | ) 90 | 91 | implicit val K: Key.Aux[Codec, CommitKey.type, Long] = 92 | Key.instance(codec, ulong(63)) 93 | } 94 | 95 | type CommitKey = CommitKey.type 96 | 97 | val DataCF = "data" 98 | val OffsetsCF = "offsets" 99 | 100 | def create[F[_]: Monad, K: Codec, V: Codec, CF](store: PolyKVStore.Aux[F, Codec, CF]) = { 101 | val dataCF = store.getColumnFamily(DataCF).flatMap { 102 | case Some(cf) => cf.pure[F] 103 | case None => store.createColumnFamily(DataCF) 104 | } 105 | 106 | val offsetsCF = store.getColumnFamily(OffsetsCF).flatMap { 107 | case Some(cf) => cf.pure[F] 108 | case None => store.createColumnFamily(OffsetsCF) 109 | } 110 | 111 | (dataCF, offsetsCF).mapN(new RocksDBTable[F, K, V, CF](store, _, _)) 112 | } 113 | } 114 | 115 | class RocksDBTable[F[_]: Functor, K: Codec, V: Codec, CF](store: PolyKVStore.Aux[F, Codec, CF], 116 | dataCF: CF, 117 | offsetsCF: CF) 118 | extends Table[F, K, V] { 119 | import RocksDBTable._ 120 | 121 | implicit val K: Key.Aux[Codec, K, V] = Key.instance 122 | 123 | def put(k: K, v: V): F[Unit] = store.put(dataCF, k, v) 124 | 125 | def putAll(data: List[(K, V)]): F[Unit] = store.putAll(dataCF, data) 126 | 127 | def get(k: K): F[Option[V]] = store.get(dataCF, k) 128 | 129 | def getAll(ks: List[K]): F[Map[K, Option[V]]] = 130 | store.getAll(dataCF, ks).map { retrievedMap => 131 | ks.fproduct(retrievedMap.get).toMap 132 | } 133 | 134 | def delete(k: K): F[Unit] = store.delete(dataCF, k) 135 | 136 | def commit(offset: Long): F[Unit] = 137 | store.put(offsetsCF, CommitKey, offset) 138 | 139 | def lastCommittedOffset: F[Long] = 140 | store.get(offsetsCF, CommitKey).map(_.getOrElse(0L)) 141 | 142 | def scan: Stream[F, (K, V)] = 143 | store.scan(dataCF) 144 | } 145 | -------------------------------------------------------------------------------- /src/main/scala/com/iravid/fs2/kafka/streams/Tables.scala: -------------------------------------------------------------------------------- 1 | package com.iravid.fs2.kafka.streams 2 | 3 | import cats.effect.{ Concurrent, Resource } 4 | import cats.effect.implicits._ 5 | import cats.implicits._ 6 | import cats.kernel.Order 7 | import com.iravid.fs2.kafka.EnvT 8 | import com.iravid.fs2.kafka.client.{ CommitRequest, RecordStream } 9 | import com.iravid.fs2.kafka.model.ByteRecord 10 | import fs2.Stream 11 | import java.nio.file.Path 12 | import org.apache.kafka.common.TopicPartition 13 | import scodec.Codec 14 | 15 | object Tables { 16 | object inMemory { 17 | def partitioned[F[_]: Concurrent, K, T](recordStream: RecordStream.Partitioned[F, T])( 18 | key: T => K): Stream[F, (TopicPartition, ReadOnlyTable[F, K, T])] = 19 | recordStream.records.flatMap { 20 | case (tp, stream) => 21 | Stream.eval(InMemoryTable.create[F, K, T]).flatMap { table => 22 | val updateFiber = stream.chunks 23 | .evalMap { recordChunk => 24 | val (offsets, data) = recordChunk.toList.collect { 25 | case EnvT(metadata, Right(t)) => 26 | (metadata.offset, (key(t), t)) 27 | }.unzip 28 | 29 | val commitOffset = offsets.maximumOption 30 | 31 | table.putAll(data) *> 32 | commitOffset.traverse_ { offset => 33 | table.commit(offset) *> 34 | recordStream.commitQueue.requestCommit( 35 | CommitRequest(tp.topic, tp.partition, offset)) 36 | } 37 | } 38 | .compile 39 | .drain 40 | .start 41 | 42 | Stream.eval(updateFiber).as(tp -> table.view) 43 | } 44 | } 45 | 46 | def plain[F[_]: Concurrent, K, T](recordStream: RecordStream.Plain[F, T])( 47 | key: T => K): F[ReadOnlyTable[F, K, T]] = 48 | InMemoryTable.create[F, K, T].flatMap { table => 49 | val updateFiber = recordStream.records.chunks 50 | .evalMap { recordChunk => 51 | val (offsets, data) = recordChunk.toList.collect { 52 | case EnvT(metadata, Right(t)) => 53 | (metadata, (key(t), t)) 54 | }.unzip 55 | 56 | val commitOffset = offsets.maximumOption(Order.by((t: ByteRecord) => t.offset)) 57 | 58 | table.putAll(data) *> 59 | commitOffset.traverse_ { metadata => 60 | table.commit(metadata.offset) *> 61 | recordStream.commitQueue.requestCommit( 62 | CommitRequest(metadata.topic, metadata.partition, metadata.offset)) 63 | } 64 | } 65 | .compile 66 | .drain 67 | .start 68 | 69 | updateFiber.as(table.view) 70 | } 71 | } 72 | 73 | object persistent { 74 | def partitioned[F[_]: Concurrent, K: Codec, V: Codec]( 75 | stores: KVStores[Codec, Path, F], 76 | storeKey: TopicPartition => Path, 77 | recordStream: RecordStream.Partitioned[F, V])( 78 | key: V => K): Stream[F, (TopicPartition, ReadOnlyTable[F, K, V])] = 79 | recordStream.records.flatMap { 80 | case (tp, records) => 81 | Stream.resource(stores.open(storeKey(tp))).flatMap { store => 82 | Stream.eval(RocksDBTable.create[F, K, V, store.ColumnFamilyHandle](store)).flatMap { 83 | table => 84 | val updateFiber = records.chunks 85 | .evalMap { recordChunk => 86 | val (offsets, data) = recordChunk.toList.collect { 87 | case EnvT(metadata, Right(t)) => 88 | (metadata.offset, (key(t), t)) 89 | }.unzip 90 | 91 | val commitOffset = offsets.maximumOption 92 | 93 | table.putAll(data) *> 94 | commitOffset.traverse_ { offset => 95 | table.commit(offset) *> 96 | recordStream.commitQueue.requestCommit( 97 | CommitRequest(tp.topic, tp.partition, offset)) 98 | } 99 | 100 | } 101 | .compile 102 | .drain 103 | .start 104 | 105 | Stream.eval(updateFiber.as(tp -> table.view)) 106 | } 107 | } 108 | } 109 | 110 | def plain[F[_]: Concurrent, K: Codec, V: Codec]( 111 | stores: KVStores[Codec, Path, F], 112 | storeKey: Path, 113 | recordStream: RecordStream.Plain[F, V])(key: V => K): Resource[F, ReadOnlyTable[F, K, V]] = 114 | for { 115 | store <- stores.open(storeKey) 116 | table <- Resource.liftF(RocksDBTable.create[F, K, V, store.ColumnFamilyHandle](store)) 117 | _ <- Resource.liftF { 118 | recordStream.records.chunks 119 | .evalMap { recordChunk => 120 | val (offsets, data) = recordChunk.toList.collect { 121 | case EnvT(metadata, Right(t)) => 122 | (metadata, (key(t), t)) 123 | }.unzip 124 | 125 | val commitOffset = offsets.maximumOption(Order.by((t: ByteRecord) => t.offset)) 126 | 127 | table.putAll(data) *> 128 | commitOffset.traverse_ { metadata => 129 | table.commit(metadata.offset) *> 130 | recordStream.commitQueue.requestCommit( 131 | CommitRequest(metadata.topic, metadata.partition, metadata.offset)) 132 | } 133 | } 134 | .compile 135 | .drain 136 | .start 137 | } 138 | } yield table.view 139 | } 140 | } 141 | --------------------------------------------------------------------------------