├── .gitignore ├── README.md ├── pom.xml └── src ├── main ├── resources │ └── avro │ │ ├── Client.avsc │ │ ├── Coupon.avsc │ │ ├── Purchase.avsc │ │ ├── Sale.avsc │ │ ├── SaleAndStore.avsc │ │ └── Song.avsc └── scala │ └── org │ └── ardlema │ ├── aggregating │ └── AggregateTopologyBuilder.scala │ ├── enrichment │ └── EnrichmentTopologyBuilder.scala │ ├── filtering │ └── FilterTopologyBuilder.scala │ ├── joining │ └── JoinTopologyBuilder.scala │ └── solutions │ ├── aggregating │ └── AggregateTopologyBuilder.scala │ ├── enrichment │ └── EnrichmentTopologyBuilder.scala │ ├── filtering │ └── FilterTopologyBuilder.scala │ └── joining │ └── JoinTopologyBuilder.scala └── test └── scala └── org └── ardlema ├── aggregating └── AggregateTopologySpec.scala ├── enrichment └── EnrichmentTopologySpec.scala ├── filtering └── FilterTopologySpec.scala ├── infra ├── KafkaGlobalProperties.scala └── KafkaInfra.scala └── joining ├── Clock.scala ├── GenericTimeStampExtractor.scala ├── JoinTopologySpec.scala └── SystemClock.scala /.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | 3 | #IntelliJ IDEA 4 | *.iml 5 | .idea/ 6 | 7 | #Maven 8 | dependency-reduced-pom.xml 9 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Learn kafka streams by making the tests pass 2 | 3 | The goal of this project is to provide a bunch of tests with some challenges to be completed for anyone who wants to improve his/her Kafka streams skills. 4 | 5 | The tests will start up a whole Kafka ecosystem infrastructure for you (Apache Kafka, Apache Zookeeper and Schema Registry) so, this way, you only need to focus on the fun part: play around with the Kafka Streams API! 6 | 7 | I will be adding more challenges so please stay tuned. Collaborations, ideas on new challenges & PRs are more than welcome. 8 | 9 | 10 | # The challenges 11 | 12 | ## 1. Filtering VIP clients 13 | 14 | 15 |
16 | Click to expand! 17 | 18 | In this challenge you will need to filter a stream of client events. These events will follow the following structure: 19 | 20 | ``` 21 | {"name": "name", "type": "string"}, 22 | {"name": "age", "type": "int"}, 23 | {"name": "vip", "type": "boolean"} 24 | ``` 25 | 26 | To make the test pass you must get rid of all the clients who are not VIPs (vip field equals to false). 27 | 28 | ### Instructions 29 | 30 | 1. Start by executing the test (FilterTopologySpec) to make sure that the test runs smoothly. You can execute the test in your IDE or by executing the following console command: 31 | 32 | ``` 33 | mvn test 34 | ``` 35 | 36 | The test should execute properly but it also should fail (do not worry you need to make it pass). 37 | 38 | 2. Now it is time to make the test pass! To do so goes to the FilterTopologyBuilder class (within the filtering package) 39 | 3. Add your code to the filterVIPClients method 40 | 4. Execute the test again until you get the green flag ;) 41 |
42 | 43 | ## 2. Enriching sale events 44 | 45 |
46 | Click to expand! 47 | 48 | In this challenge you will be facing a pretty common use case when working with streaming systems: enrich the events with information from a third-party system. In this scenario 49 | for the sake of simplicity we will be using an in-memory hashmap to enrich events that simulate sales. The challenge is quite simple, the events will follow the following structure: 50 | 51 | ``` 52 | {"name": "amount", "type": "float"}, 53 | {"name": "product", "type": "string"}, 54 | {"name": "storeid", "type": "int"} 55 | ``` 56 | 57 | On receiving an event you should check if the storeid exists within the storesInformation hashmap, if so, you should pick up the values storeAddress and storeCity from the hashmap, create a 58 | new event with the following structure: 59 | 60 | ``` 61 | {"name": "amount", "type": "float"}, 62 | {"name": "product", "type": "string"}, 63 | {"name": "storeaddress", "type": "string"}, 64 | {"name": "storecity", "type": "string"} 65 | ``` 66 | 67 | and send this event to the outputtopic. If the hashmap does not include the storeid of the event you should redirect this event to a error output topic. 68 | 69 | If you have any further questions we strongly recommend to have a look at the EnrichmentTopologySpec test. 70 | 71 | ### Instructions 72 | 73 | 1. Start by executing the EnrichmentTopologySpec and check out that the tests run properly and fail 74 | 2. Go to the EnrichmentTopologyBuilder class (the one within the enrichment package) 75 | 3. Add your code within the createTopology method 76 | 4. Execute the test again until it passes 77 |
78 | 79 | 80 | ## 3. Joining coupons and purchase events 81 | 82 |
83 | Click to expand! 84 | 85 | Let's say that you are working for an on-line retail company. The company wants to launch an innovative marketing campaign to apply discounts to products. The idea is to emit coupons or offers that will be available 86 | just for 5 minutes, this way the clients that want to take advantage of the discount should buy the product within the next 5 minutes after the coupon has been launched. 87 | 88 | To pass this challenge you have to join the coupons and purchase streams (taking into account the 5 minutes slot!!) 89 | 90 | See following the coupon events structure: 91 | 92 | ``` 93 | {"name": "timestamp", "type": "long"}, 94 | {"name": "productid", "type": "string"}, 95 | {"name": "discount", "type": "float"} 96 | ``` 97 | 98 | And this is the purchase events structure: 99 | 100 | ``` 101 | {"name": "timestamp", "type": "long"}, 102 | {"name": "productid", "type": "string"}, 103 | {"name": "productdescription", "type": "string"}, 104 | {"name": "amount", "type": "float"} 105 | ``` 106 | 107 | When you are dealing with this challenge take into account that the stream events will join by their key! 108 | 109 | ### Instructions 110 | 111 | 1. Start by executing the JoinTopologySpec and check out that the tests run properly and fail 112 | 2. Have a look at the GenericTimeStampExtractor that I have prepared for you (to better understand how we deal with message event times) 113 | 3. Go to the JoinTopologyBuilder class (the one within the joining package) 114 | 4. Add your code within the createTopology method (there are some tips that might help you) 115 | 5. Execute the test again until it passes 116 |
117 | 118 | ## 4. Aggregating songs data 119 | 120 | 121 |
122 | Click to expand! 123 | 124 | In this challenge you will need to aggregate a stream of song events. These events will follow the following structure: 125 | 126 | ``` 127 | {"name": "title", "type": "string"}, 128 | {"name": "duration", "type": "int"}, 129 | {"name": "artist", "type": "string"}, 130 | {"name": "album", "type": "string"}, 131 | {"name": "genre", "type": "string"} 132 | ``` 133 | 134 | To make the test pass you must aggregate the songs by artist, let's say that our stream has just received the following songs: 135 | 136 | ``` 137 | new Song("Closer", 122, "Nine Inch Nails", "The Downward Spiral", "rock") 138 | new Song("Heresy", 98, "Nine Inch Nails", "The Downward Spiral", "rock") 139 | new Song("Wide Awake", 265, "Audioslave", "Revelations", "rock") 140 | new Song("Wish", 112, "Nine Inch Nails", "Broken", "rock") 141 | new Song("Until we fall", 215, "Audioslave", "Revelations", "rock") 142 | ``` 143 | 144 | As we said, our streaming app should aggregate the songs by artist (generating internally a Ktable) and should output the results to out output topic. On receiving the previous songs our output should be: 145 | 146 | ``` 147 | Nine Inch Nails 3 148 | Audioslave 2 149 | ``` 150 | 151 | ### Instructions 152 | 153 | 1. Start by executing the test (AggregateTopologySpec) to make sure that the test runs smoothly. The test should execute properly but it also should fail (do not worry you need to make it pass). 154 | 2. Now it is time to make the test pass! To do so goes to the AggregateTopologyBuilder class (within the aggregating package) 155 | 3. Add your code to the createTopology(...) method 156 | 4. Execute the test again until you get the green flag ;) 157 |
158 | 159 | # Solutions 160 | 161 | There is a solutions package where you will be able to find my proposed solution for the challenges. Try not to cheat and make the test pass on your own. 162 | 163 | Do please let me know if you find a better solution by submitting your PRs. -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | ardlema 8 | kafka-streams-workshop 9 | 1.0 10 | 11 | 12 | 2 13 | 11 14 | 11 15 | 3.5.3 16 | 1.8 17 | 1.2.3 18 | 4.12 19 | 1.3.0 20 | 3.0.4 21 | 3.7.2 22 | 2.0.0 23 | 5.0.0 24 | 1.7.7 25 | 26 | 27 | 28 | 29 | confluent 30 | http://packages.confluent.io/maven/ 31 | 32 | 33 | 34 | 35 | 36 | org.scala-lang 37 | scala-library 38 | ${scala.majorVersion}.${scala.minorVersion}.${scala.patchVersion} 39 | 40 | 41 | 42 | ch.qos.logback 43 | logback-classic 44 | ${logback.version} 45 | 46 | 47 | com.typesafe.scala-logging 48 | scala-logging_${scala.majorVersion}.${scala.minorVersion} 49 | ${scala-logging.version} 50 | 51 | 52 | 53 | org.apache.kafka 54 | kafka-clients 55 | ${kafka.version} 56 | 57 | 58 | org.apache.kafka 59 | kafka_${scala.majorVersion}.${scala.minorVersion} 60 | ${kafka.version} 61 | 62 | 63 | org.apache.kafka 64 | kafka-streams 65 | ${kafka.version} 66 | 67 | 68 | org.apache.kafka 69 | kafka-streams-scala_${scala.majorVersion}.${scala.minorVersion} 70 | ${kafka.version} 71 | 72 | 73 | io.confluent 74 | kafka-avro-serializer 75 | ${confluent.version} 76 | 77 | 78 | io.confluent 79 | kafka-schema-registry 80 | ${confluent.version} 81 | 82 | 83 | io.confluent 84 | kafka-streams-avro-serde 85 | ${confluent.version} 86 | 87 | 88 | org.json4s 89 | json4s-jackson_${scala.majorVersion}.${scala.minorVersion} 90 | ${json4s.version} 91 | 92 | 93 | 94 | org.apache.avro 95 | avro 96 | ${avro.version} 97 | 98 | 99 | org.apache.avro 100 | avro-maven-plugin 101 | ${avro.version} 102 | 103 | 104 | 105 | junit 106 | junit 107 | ${junit.version} 108 | 109 | 110 | org.scalatest 111 | scalatest_${scala.majorVersion}.${scala.minorVersion} 112 | ${scalatest.version} 113 | test 114 | 115 | 116 | org.apache.kafka 117 | kafka-clients 118 | ${kafka.version} 119 | test 120 | 121 | 122 | org.apache.kafka 123 | kafka-streams-test-utils 124 | ${kafka.version} 125 | test 126 | 127 | 128 | org.apache.curator 129 | curator-test 130 | 2.12.0 131 | 132 | 133 | 134 | 135 | src/main/scala 136 | 137 | 138 | net.alchim31.maven 139 | scala-maven-plugin 140 | 3.2.2 141 | 142 | false 143 | modified-only 144 | 145 | 146 | 147 | default 148 | 149 | add-source 150 | compile 151 | testCompile 152 | doc 153 | doc-jar 154 | 155 | 156 | 157 | 158 | 159 | org.apache.maven.plugins 160 | maven-compiler-plugin 161 | 3.5.1 162 | 163 | 1.8 164 | 1.8 165 | 166 | 167 | 168 | org.apache.avro 169 | avro-maven-plugin 170 | ${avro.version} 171 | 172 | 173 | generate-sources 174 | 175 | schema 176 | protocol 177 | idl-protocol 178 | 179 | 180 | ${project.basedir}/src/main/resources/avro 181 | 182 | 183 | 184 | 185 | 186 | org.apache.maven.plugins 187 | maven-shade-plugin 188 | 2.4.3 189 | 190 | 191 | package 192 | 193 | shade 194 | 195 | 196 | 197 | 199 | reference.conf 200 | 201 | 203 | 204 | org.ust.ga.AnalyticsAPI 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | org.apache.maven.plugins 214 | maven-surefire-plugin 215 | 2.7 216 | 217 | false 218 | 219 | 220 | 221 | org.scalatest 222 | scalatest-maven-plugin 223 | 1.0 224 | 225 | ${project.build.directory}/surefire-reports 226 | . 227 | WDF TestSuite.txt 228 | 229 | 230 | 231 | test 232 | 233 | test 234 | 235 | 236 | 237 | 238 | 239 | org.scoverage 240 | scoverage-maven-plugin 241 | ${scoverage.plugin.version} 242 | 243 | true 244 | ${scala.majorVersion}.${scala.minorVersion}.8 245 | 246 | 247 | 248 | 249 | 250 | -------------------------------------------------------------------------------- /src/main/resources/avro/Client.avsc: -------------------------------------------------------------------------------- 1 | {"namespace": "JavaSessionize.avro", 2 | "type": "record", 3 | "name": "Client", 4 | "fields": [ 5 | {"name": "name", "type": "string"}, 6 | {"name": "age", "type": "int"}, 7 | {"name": "vip", "type": "boolean"} 8 | ] 9 | } -------------------------------------------------------------------------------- /src/main/resources/avro/Coupon.avsc: -------------------------------------------------------------------------------- 1 | {"namespace": "JavaSessionize.avro", 2 | "type": "record", 3 | "name": "Coupon", 4 | "fields": [ 5 | {"name": "timestamp", "type": "long"}, 6 | {"name": "productid", "type": "string"}, 7 | {"name": "discount", "type": "float"} 8 | ] 9 | } -------------------------------------------------------------------------------- /src/main/resources/avro/Purchase.avsc: -------------------------------------------------------------------------------- 1 | {"namespace": "JavaSessionize.avro", 2 | "type": "record", 3 | "name": "Purchase", 4 | "fields": [ 5 | {"name": "timestamp", "type": "long"}, 6 | {"name": "productid", "type": "string"}, 7 | {"name": "productdescription", "type": "string"}, 8 | {"name": "amount", "type": "float"} 9 | ] 10 | } -------------------------------------------------------------------------------- /src/main/resources/avro/Sale.avsc: -------------------------------------------------------------------------------- 1 | {"namespace": "JavaSessionize.avro", 2 | "type": "record", 3 | "name": "Sale", 4 | "fields": [ 5 | {"name": "amount", "type": "float"}, 6 | {"name": "product", "type": "string"}, 7 | {"name": "storeid", "type": "int"} 8 | ] 9 | } -------------------------------------------------------------------------------- /src/main/resources/avro/SaleAndStore.avsc: -------------------------------------------------------------------------------- 1 | {"namespace": "JavaSessionize.avro", 2 | "type": "record", 3 | "name": "SaleAndStore", 4 | "fields": [ 5 | {"name": "amount", "type": "float"}, 6 | {"name": "product", "type": "string"}, 7 | {"name": "storeaddress", "type": "string"}, 8 | {"name": "storecity", "type": "string"} 9 | ] 10 | } -------------------------------------------------------------------------------- /src/main/resources/avro/Song.avsc: -------------------------------------------------------------------------------- 1 | {"namespace": "JavaSessionize.avro", 2 | "type": "record", 3 | "name": "Song", 4 | "fields": [ 5 | {"name": "title", "type": "string"}, 6 | {"name": "duration", "type": "int"}, 7 | {"name": "artist", "type": "string"}, 8 | {"name": "album", "type": "string"}, 9 | {"name": "genre", "type": "string"} 10 | ] 11 | } -------------------------------------------------------------------------------- /src/main/scala/org/ardlema/aggregating/AggregateTopologyBuilder.scala: -------------------------------------------------------------------------------- 1 | package org.ardlema.aggregating 2 | 3 | import java.lang.{Long => JLong} 4 | import java.util.Collections 5 | 6 | import JavaSessionize.avro.Song 7 | import io.confluent.kafka.serializers.AbstractKafkaAvroSerDeConfig 8 | import io.confluent.kafka.streams.serdes.avro.SpecificAvroSerde 9 | import org.apache.kafka.common.serialization.Serdes 10 | import org.apache.kafka.streams.kstream._ 11 | import org.apache.kafka.streams.{StreamsBuilder, Topology} 12 | 13 | object AggregateTopologyBuilder { 14 | 15 | def getAvroSongSerde(schemaRegistryHost: String, schemaRegistryPort: String) = { 16 | val specificAvroSerde = new SpecificAvroSerde[Song]() 17 | specificAvroSerde.configure( 18 | Collections.singletonMap(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, s"""http://$schemaRegistryHost:$schemaRegistryPort/"""), 19 | false) 20 | specificAvroSerde 21 | } 22 | 23 | def createTopology(schemaRegistryHost: String, 24 | schemaRegistryPort: String, 25 | inputTopic: String, 26 | outputTopic: String): Topology = { 27 | 28 | val builder = new StreamsBuilder() 29 | val initialStream = builder.stream(inputTopic, Consumed.`with`(Serdes.String(), getAvroSongSerde(schemaRegistryHost, schemaRegistryPort))) 30 | 31 | //TODO: Add your code here to aggregate the songs to get the total count of songs by artist 32 | initialStream.to(outputTopic) 33 | builder.build() 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/main/scala/org/ardlema/enrichment/EnrichmentTopologyBuilder.scala: -------------------------------------------------------------------------------- 1 | package org.ardlema.enrichment 2 | 3 | import java.util.Collections 4 | 5 | import JavaSessionize.avro.{Sale, SaleAndStore} 6 | import io.confluent.kafka.serializers.AbstractKafkaAvroSerDeConfig 7 | import io.confluent.kafka.streams.serdes.avro.SpecificAvroSerde 8 | import org.apache.kafka.common.serialization.Serdes 9 | import org.apache.kafka.streams.kstream.Consumed 10 | import org.apache.kafka.streams.{StreamsBuilder, Topology} 11 | 12 | object EnrichmentTopologyBuilder { 13 | 14 | case class StoreInformation(storeAddress: String, storeCity: String) 15 | 16 | val storesInformation = Map(1234 -> StoreInformation("C/ Narvaez, 78", "Madrid"), 17 | 5678 -> StoreInformation("C/ Pradillo, 33", "Madrid")) 18 | 19 | def getAvroSaleSerde(schemaRegistryHost: String, schemaRegistryPort: String) = { 20 | val specificAvroSerde = new SpecificAvroSerde[Sale]() 21 | specificAvroSerde.configure( 22 | Collections.singletonMap(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, s"""http://$schemaRegistryHost:$schemaRegistryPort/"""), 23 | false) 24 | specificAvroSerde 25 | } 26 | 27 | def getAvroSaleAndStoreSerde(schemaRegistryHost: String, schemaRegistryPort: String) = { 28 | val specificAvroSerde = new SpecificAvroSerde[SaleAndStore]() 29 | specificAvroSerde.configure( 30 | Collections.singletonMap(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, s"""http://$schemaRegistryHost:$schemaRegistryPort/"""), 31 | false) 32 | specificAvroSerde 33 | } 34 | 35 | def createTopology(schemaRegistryHost: String, 36 | schemaRegistryPort: String, 37 | inputTopic: String, 38 | outputTopic: String, 39 | outputTopicError: String): Topology = { 40 | 41 | 42 | val builder = new StreamsBuilder() 43 | val initialStream = builder.stream(inputTopic, Consumed.`with`(Serdes.String(), getAvroSaleSerde(schemaRegistryHost, schemaRegistryPort))) 44 | 45 | //TODO: Check out whether the store id from the sales event exists within the storesInformation hashmap. If it exists you should modify the event, 46 | // convert it to a SaleAndStore object and redirect it to the outputTopic topic. If it does not exist you should redirect the event to the outputTopicError topic. 47 | 48 | initialStream.to(outputTopic) 49 | initialStream.to(outputTopicError) 50 | 51 | builder.build() 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/main/scala/org/ardlema/filtering/FilterTopologyBuilder.scala: -------------------------------------------------------------------------------- 1 | package org.ardlema.filtering 2 | 3 | import java.util.Collections 4 | 5 | import JavaSessionize.avro.Client 6 | import io.confluent.kafka.serializers.AbstractKafkaAvroSerDeConfig 7 | import io.confluent.kafka.streams.serdes.avro.SpecificAvroSerde 8 | import org.apache.kafka.common.serialization.Serdes 9 | import org.apache.kafka.streams.kstream.{Consumed, KStream} 10 | import org.apache.kafka.streams.{StreamsBuilder, Topology} 11 | 12 | object FilterTopologyBuilder { 13 | 14 | def getAvroSerde() = { 15 | val specificAvroSerde = new SpecificAvroSerde[Client]() 16 | specificAvroSerde.configure( 17 | Collections.singletonMap(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, "http://localhost:8081/"), 18 | false) 19 | specificAvroSerde 20 | } 21 | 22 | def createTopology(): Topology = { 23 | 24 | val builder = new StreamsBuilder() 25 | val initialStream = builder.stream("input-topic", Consumed.`with`(Serdes.String(), getAvroSerde())) 26 | val vipClients = filterVIPClients(initialStream) 27 | vipClients.to("output-topic") 28 | builder.build() 29 | } 30 | 31 | //TODO: Make the proper transformations to the clientStream to get rid of the non VIP clients to make the test pass!! 32 | def filterVIPClients(clientStream: KStream[String, Client]): KStream[String, Client] = { 33 | clientStream 34 | } 35 | 36 | } 37 | -------------------------------------------------------------------------------- /src/main/scala/org/ardlema/joining/JoinTopologyBuilder.scala: -------------------------------------------------------------------------------- 1 | package org.ardlema.joining 2 | 3 | import java.util.Collections 4 | 5 | import JavaSessionize.avro.{Coupon, Purchase} 6 | import io.confluent.kafka.serializers.AbstractKafkaAvroSerDeConfig 7 | import io.confluent.kafka.streams.serdes.avro.SpecificAvroSerde 8 | import org.apache.kafka.streams.{StreamsBuilder, Topology} 9 | 10 | object JoinTopologyBuilder { 11 | 12 | def getAvroPurchaseSerde(schemaRegistryHost: String, schemaRegistryPort: String) = { 13 | val specificAvroSerde = new SpecificAvroSerde[Purchase]() 14 | specificAvroSerde.configure( 15 | Collections.singletonMap(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, s"""http://$schemaRegistryHost:$schemaRegistryPort/"""), 16 | false) 17 | specificAvroSerde 18 | } 19 | 20 | def getAvroCouponSerde(schemaRegistryHost: String, schemaRegistryPort: String) = { 21 | val specificAvroSerde = new SpecificAvroSerde[Coupon]() 22 | specificAvroSerde.configure( 23 | Collections.singletonMap(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, s"""http://$schemaRegistryHost:$schemaRegistryPort/"""), 24 | false) 25 | specificAvroSerde 26 | } 27 | 28 | def createTopology(schemaRegistryHost: String, 29 | schemaRegistryPort: String, 30 | couponInputTopic: String, 31 | purchaseInputTopic: String, 32 | outputTopic: String): Topology = { 33 | 34 | val builder = new StreamsBuilder() 35 | 36 | //TODO: You receive the coupon and purchase input topics and you should join them using a 5 minutes window. 37 | // Tip 1: take into account that the streams always join by its key so you should think about this before joining them! 38 | // Tip 2: use getAvroPurchaseSerde and getAvroCouponSerde to provide the proper serdes when consuming the input topics 39 | 40 | builder.build() 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/main/scala/org/ardlema/solutions/aggregating/AggregateTopologyBuilder.scala: -------------------------------------------------------------------------------- 1 | package org.ardlema.solutions.aggregating 2 | 3 | import java.util.Collections 4 | 5 | import JavaSessionize.avro.Song 6 | import io.confluent.kafka.serializers.AbstractKafkaAvroSerDeConfig 7 | import io.confluent.kafka.streams.serdes.avro.SpecificAvroSerde 8 | import org.apache.kafka.common.serialization.Serde 9 | import org.apache.kafka.streams.Topology 10 | import org.apache.kafka.streams.scala.ImplicitConversions._ 11 | import org.apache.kafka.streams.scala.kstream.{KGroupedStream, KStream, KTable} 12 | import org.apache.kafka.streams.scala.{Serdes, StreamsBuilder} 13 | 14 | 15 | object AggregateTopologyBuilder { 16 | 17 | def getAvroSongSerde(schemaRegistryHost: String, schemaRegistryPort: String) = { 18 | val specificAvroSerde = new SpecificAvroSerde[Song]() 19 | specificAvroSerde.configure( 20 | Collections.singletonMap(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, s"""http://$schemaRegistryHost:$schemaRegistryPort/"""), 21 | false) 22 | specificAvroSerde 23 | } 24 | 25 | def createTopology(schemaRegistryHost: String, 26 | schemaRegistryPort: String, 27 | inputTopic: String, 28 | outputTopic: String): Topology = { 29 | 30 | 31 | implicit val stringSerde: Serde[String] = Serdes.String 32 | implicit val longSerde: Serde[Long] = Serdes.Long 33 | implicit val avroSongSerde: SpecificAvroSerde[Song] = getAvroSongSerde(schemaRegistryHost, schemaRegistryPort) 34 | 35 | 36 | val builder: StreamsBuilder = new StreamsBuilder() 37 | val initialStream: KStream[String, Song] = builder.stream(inputTopic) 38 | 39 | val songsMappedByArtistStream: KStream[String, Long] = initialStream.map((_, song) => (song.getArtist.toString, 1L)) 40 | 41 | val songsGroupByArtistStream: KGroupedStream[String, Long] = songsMappedByArtistStream.groupByKey 42 | 43 | val songsByArtistTable: KTable[String, Long] = songsGroupByArtistStream.count() 44 | 45 | songsByArtistTable.toStream.to(outputTopic) 46 | 47 | builder.build() 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/main/scala/org/ardlema/solutions/enrichment/EnrichmentTopologyBuilder.scala: -------------------------------------------------------------------------------- 1 | package org.ardlema.solutions.enrichment 2 | 3 | import java.util.Collections 4 | 5 | import JavaSessionize.avro.{Sale, SaleAndStore} 6 | import io.confluent.kafka.serializers.AbstractKafkaAvroSerDeConfig 7 | import io.confluent.kafka.streams.serdes.avro.SpecificAvroSerde 8 | import org.apache.kafka.common.serialization.{Serde, Serdes} 9 | import org.apache.kafka.streams.Topology 10 | import org.apache.kafka.streams.scala.ImplicitConversions._ 11 | import org.apache.kafka.streams.scala.StreamsBuilder 12 | import org.apache.kafka.streams.scala.kstream.KStream 13 | 14 | object EnrichmentTopologyBuilder { 15 | 16 | case class StoreInformation(storeAddress: String, storeCity: String) 17 | 18 | val storesInformation = Map(1234 -> StoreInformation("C/ Narvaez, 78", "Madrid"), 19 | 5678 -> StoreInformation("C/ Pradillo, 33", "Madrid")) 20 | 21 | def getAvroSaleSerde(schemaRegistryHost: String, schemaRegistryPort: String) = { 22 | val specificAvroSerde = new SpecificAvroSerde[Sale]() 23 | specificAvroSerde.configure( 24 | Collections.singletonMap(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, s"""http://$schemaRegistryHost:$schemaRegistryPort/"""), 25 | false) 26 | specificAvroSerde 27 | } 28 | 29 | def getAvroSaleAndStoreSerde(schemaRegistryHost: String, schemaRegistryPort: String) = { 30 | val specificAvroSerde = new SpecificAvroSerde[SaleAndStore]() 31 | specificAvroSerde.configure( 32 | Collections.singletonMap(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, s"""http://$schemaRegistryHost:$schemaRegistryPort/"""), 33 | false) 34 | specificAvroSerde 35 | } 36 | 37 | def createTopology(schemaRegistryHost: String, 38 | schemaRegistryPort: String, 39 | inputTopic: String, 40 | outputTopic: String, 41 | outputTopicError: String): Topology = { 42 | 43 | implicit val serdes: Serde[String] = Serdes.String() 44 | implicit val avroSaleSerde: SpecificAvroSerde[Sale] = getAvroSaleSerde(schemaRegistryHost, schemaRegistryPort) 45 | implicit val avroSaleAndStoreSerde: SpecificAvroSerde[SaleAndStore] = getAvroSaleAndStoreSerde(schemaRegistryHost, schemaRegistryPort) 46 | 47 | val existsStoreId: (String, Sale) => Boolean = (_, sale) => storesInformation.contains(sale.getStoreid) 48 | 49 | val notExistsStoreId: (String, Sale) => Boolean = (_, sale) => !storesInformation.contains(sale.getStoreid) 50 | 51 | val saleToStore: Sale => SaleAndStore = (sale: Sale) => { 52 | val storeInfo = storesInformation(sale.getStoreid) 53 | new SaleAndStore( 54 | sale.getAmount, 55 | sale.getProduct, 56 | storeInfo.storeAddress, 57 | storeInfo.storeCity) 58 | } 59 | 60 | val builder = new StreamsBuilder() 61 | val initialStream: KStream[String, Sale] = builder.stream(inputTopic) 62 | val splittedStream: Array[KStream[String, Sale]] = initialStream.branch(existsStoreId, notExistsStoreId) 63 | val saleAndStoreStream: KStream[String, SaleAndStore] = splittedStream(0) 64 | .mapValues[SaleAndStore](saleToStore) 65 | val errorStream = splittedStream(1) 66 | 67 | saleAndStoreStream.to(outputTopic) 68 | errorStream.to(outputTopicError) 69 | 70 | builder.build() 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /src/main/scala/org/ardlema/solutions/filtering/FilterTopologyBuilder.scala: -------------------------------------------------------------------------------- 1 | package org.ardlema.solutions.filtering 2 | 3 | import java.util.Collections 4 | 5 | import JavaSessionize.avro.Client 6 | import io.confluent.kafka.serializers.AbstractKafkaAvroSerDeConfig 7 | import io.confluent.kafka.streams.serdes.avro.SpecificAvroSerde 8 | import org.apache.kafka.common.serialization.Serde 9 | import org.apache.kafka.streams.Topology 10 | import org.apache.kafka.streams.scala.ImplicitConversions._ 11 | import org.apache.kafka.streams.scala.kstream.KStream 12 | import org.apache.kafka.streams.scala.{Serdes, StreamsBuilder} 13 | 14 | object FilterTopologyBuilder { 15 | 16 | def getAvroSerde() = { 17 | val specificAvroSerde = new SpecificAvroSerde[Client]() 18 | specificAvroSerde.configure( 19 | Collections.singletonMap(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, "http://localhost:8081/"), 20 | false) 21 | specificAvroSerde 22 | } 23 | 24 | def createTopology(): Topology = { 25 | implicit val keySerde: Serde[String] = Serdes.String 26 | implicit val valueSerde: SpecificAvroSerde[Client] = getAvroSerde() 27 | 28 | val builder = new StreamsBuilder() 29 | val initialStream: KStream[String, Client] = builder.stream("input-topic") 30 | val vipClientsStream = filterVIPClients(initialStream) 31 | vipClientsStream.to("output-topic") 32 | builder.build() 33 | } 34 | 35 | def filterVIPClients(clientStream: KStream[String, Client]): KStream[String, Client] = { 36 | clientStream.filter((_, client) => client.getVip) 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/main/scala/org/ardlema/solutions/joining/JoinTopologyBuilder.scala: -------------------------------------------------------------------------------- 1 | package org.ardlema.solutions.joining 2 | 3 | import java.util.Collections 4 | 5 | import JavaSessionize.avro.{Coupon, Purchase} 6 | import io.confluent.kafka.serializers.AbstractKafkaAvroSerDeConfig 7 | import io.confluent.kafka.streams.serdes.avro.SpecificAvroSerde 8 | import org.apache.kafka.common.serialization.Serde 9 | import org.apache.kafka.streams.Topology 10 | import org.apache.kafka.streams.kstream.JoinWindows 11 | import org.apache.kafka.streams.scala.ImplicitConversions._ 12 | import org.apache.kafka.streams.scala.kstream.KStream 13 | import org.apache.kafka.streams.scala.{Serdes, StreamsBuilder} 14 | 15 | import scala.concurrent.duration._ 16 | 17 | object JoinTopologyBuilder { 18 | 19 | def getAvroPurchaseSerde(schemaRegistryHost: String, schemaRegistryPort: String) = { 20 | val specificAvroSerde = new SpecificAvroSerde[Purchase]() 21 | specificAvroSerde.configure( 22 | Collections.singletonMap(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, s"""http://$schemaRegistryHost:$schemaRegistryPort/"""), 23 | false) 24 | specificAvroSerde 25 | } 26 | 27 | def getAvroCouponSerde(schemaRegistryHost: String, schemaRegistryPort: String) = { 28 | val specificAvroSerde = new SpecificAvroSerde[Coupon]() 29 | specificAvroSerde.configure( 30 | Collections.singletonMap(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, s"""http://$schemaRegistryHost:$schemaRegistryPort/"""), 31 | false) 32 | specificAvroSerde 33 | } 34 | 35 | def createTopology(schemaRegistryHost: String, 36 | schemaRegistryPort: String, 37 | couponInputTopic: String, 38 | purchaseInputTopic: String, 39 | outputTopic: String): Topology = { 40 | 41 | implicit val stringSerde: Serde[String] = Serdes.String 42 | implicit val avroPurchaseSerde: SpecificAvroSerde[Purchase] = getAvroPurchaseSerde(schemaRegistryHost, schemaRegistryPort) 43 | implicit val avroCouponSerde: SpecificAvroSerde[Coupon] = getAvroCouponSerde(schemaRegistryHost, schemaRegistryPort) 44 | 45 | val couponPurchaseValueJoiner: (Coupon, Purchase) => Purchase = (coupon: Coupon, purchase: Purchase) => { 46 | val discount = coupon.getDiscount * purchase.getAmount / 100 47 | new Purchase(purchase.getTimestamp, purchase.getProductid, purchase.getProductdescription, purchase.getAmount - discount) 48 | } 49 | 50 | val fiveMinuteWindow: JoinWindows = JoinWindows.of(5.minutes.toMillis).after(5.minutes.toMillis) 51 | 52 | val builder = new StreamsBuilder() 53 | 54 | val initialCouponStream: KStream[String, Coupon] = builder.stream(couponInputTopic) 55 | val initialPurchaseStream: KStream[String, Purchase] = builder.stream(purchaseInputTopic) 56 | 57 | val couponStreamKeyedByProductId: KStream[String, Coupon] = initialCouponStream 58 | .selectKey((_, coupon) => coupon.getProductid.toString) 59 | val purchaseStreamKeyedByProductId: KStream[String, Purchase] = initialPurchaseStream 60 | .selectKey((_, purchase) => purchase.getProductid.toString) 61 | 62 | couponStreamKeyedByProductId 63 | .join(purchaseStreamKeyedByProductId)(couponPurchaseValueJoiner, fiveMinuteWindow) 64 | .to(outputTopic) 65 | 66 | builder.build() 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /src/test/scala/org/ardlema/aggregating/AggregateTopologySpec.scala: -------------------------------------------------------------------------------- 1 | package org.ardlema.aggregating 2 | 3 | import java.lang.{Long => JLong} 4 | import java.util.Properties 5 | 6 | import JavaSessionize.avro.Song 7 | import io.confluent.kafka.schemaregistry.rest.SchemaRegistryConfig 8 | import io.confluent.kafka.serializers.AbstractKafkaAvroSerDeConfig 9 | import kafka.server.KafkaConfig 10 | import org.apache.kafka.clients.consumer.ConsumerConfig 11 | import org.apache.kafka.clients.producer.ProducerRecord 12 | import org.apache.kafka.common.serialization._ 13 | 14 | import scala.util.Random 15 | import org.apache.kafka.streams._ 16 | import org.apache.kafka.streams.test.{ConsumerRecordFactory, OutputVerifier} 17 | import org.ardlema.infra.{KafkaGlobalProperties, KafkaInfra} 18 | import org.scalatest.{BeforeAndAfter, FunSpec} 19 | 20 | 21 | trait KafkaPropertiesAggregate { 22 | 23 | //The clean-up method does not work for us so we generate a new app key for each test to overcome this issue 24 | def generateRandomApplicationKey: String = { 25 | val randomSeed = Random.alphanumeric 26 | randomSeed.take(12).mkString 27 | } 28 | 29 | val zookeeperHost = "localhost" 30 | val zookeeperPort = "2186" 31 | val zookeeperPortAsInt = zookeeperPort.toInt 32 | val kafkaHost = "localhost" 33 | val kafkaPort = "9097" 34 | val applicationKey = generateRandomApplicationKey 35 | val schemaRegistryHost = "localhost" 36 | val schemaRegistryPort = "8086" 37 | val logDir = "/tmp/kafka-aggregate-logs" 38 | val inputTopic = "song-input-topic" 39 | val outputTopic = "song-output-topic" 40 | } 41 | 42 | trait KafkaConfiguration extends KafkaPropertiesAggregate with KafkaGlobalProperties { 43 | val kafkaConfig = new Properties() 44 | kafkaConfig.put(bootstrapServerKey, s"""$kafkaHost:$kafkaPort""") 45 | kafkaConfig.put("zookeeper.host", zookeeperHost) 46 | kafkaConfig.put("zookeeper.port", zookeeperPort) 47 | kafkaConfig.put(schemaRegistryUrlKey, s"""http://$schemaRegistryHost:$schemaRegistryPort""") 48 | kafkaConfig.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()) 49 | kafkaConfig.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, 50 | AggregateTopologyBuilder.getAvroSongSerde(schemaRegistryHost, schemaRegistryPort).getClass.getName) 51 | kafkaConfig.put(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, s"""http://$schemaRegistryHost:$schemaRegistryPort""") 52 | kafkaConfig.put(groupIdKey, groupIdValue) 53 | kafkaConfig.put(KafkaConfig.BrokerIdProp, defaultBrokerIdProp) 54 | kafkaConfig.put(KafkaConfig.HostNameProp, kafkaHost) 55 | kafkaConfig.put(KafkaConfig.PortProp, kafkaPort) 56 | kafkaConfig.put(KafkaConfig.NumPartitionsProp, defaultPartitions) 57 | kafkaConfig.put(KafkaConfig.AutoCreateTopicsEnableProp, defaultAutoCreateTopics) 58 | kafkaConfig.put(applicationIdKey, applicationKey) 59 | kafkaConfig.put(bootstrapServerKey, s"""$kafkaHost:$kafkaPort""") 60 | kafkaConfig.put(KafkaConfig.HostNameProp, kafkaHost) 61 | kafkaConfig.put(KafkaConfig.PortProp, kafkaPort) 62 | kafkaConfig.put(cacheMaxBytesBufferingKey, "0") 63 | kafkaConfig.put("offsets.topic.replication.factor", "1") 64 | kafkaConfig.put("log.dir", logDir) 65 | kafkaConfig.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, "1000") 66 | kafkaConfig.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest") 67 | } 68 | 69 | class AggregateTopologySpec 70 | extends FunSpec 71 | with KafkaInfra 72 | with BeforeAndAfter 73 | with KafkaConfiguration { 74 | 75 | val topology = AggregateTopologyBuilder.createTopology( 76 | schemaRegistryHost, 77 | schemaRegistryPort, 78 | inputTopic, 79 | outputTopic) 80 | val testDriver = new TopologyTestDriver(topology, kafkaConfig) 81 | 82 | after { 83 | testDriver.close() 84 | } 85 | 86 | describe("The topology") { 87 | 88 | it("should aggregate the songs by artists") { 89 | val schemaRegistryConfig = new Properties() 90 | schemaRegistryConfig.put(SchemaRegistryConfig.KAFKASTORE_BOOTSTRAP_SERVERS_CONFIG, s"""PLAINTEXT://$kafkaHost:$kafkaPort""") 91 | schemaRegistryConfig.put(SchemaRegistryConfig.KAFKASTORE_TOPIC_CONFIG, "schemaregistrytopic") 92 | schemaRegistryConfig.put("port", schemaRegistryPort) 93 | 94 | 95 | withKafkaServerAndSchemaRegistry(kafkaConfig, schemaRegistryConfig, zookeeperPortAsInt) { () => 96 | val recordFactory = new ConsumerRecordFactory(new StringSerializer(), 97 | AggregateTopologyBuilder.getAvroSongSerde(schemaRegistryHost, schemaRegistryPort).serializer()) 98 | 99 | val song1 = new Song("Closer", 122, "Nine Inch Nails", "The Downward Spiral", "rock") 100 | val song2 = new Song("Heresy", 98, "Nine Inch Nails", "The Downward Spiral", "rock") 101 | val song3 = new Song("Wide Awake", 265, "Audioslave", "Revelations", "rock") 102 | val song4 = new Song("Wish", 112, "Nine Inch Nails", "Broken", "rock") 103 | val song5 = new Song("Until we fall", 215, "Audioslave", "Revelations", "rock") 104 | 105 | val consumerRecordFactory1 = recordFactory.create(inputTopic, "a", song1) 106 | val consumerRecordFactory2 = recordFactory.create(inputTopic, "a", song2) 107 | val consumerRecordFactory3 = recordFactory.create(inputTopic, "a", song3) 108 | val consumerRecordFactory4 = recordFactory.create(inputTopic, "a", song4) 109 | val consumerRecordFactory5 = recordFactory.create(inputTopic, "a", song5) 110 | testDriver.pipeInput(consumerRecordFactory1) 111 | val outputRecord1: ProducerRecord[String, JLong] = testDriver.readOutput(outputTopic, 112 | new StringDeserializer(), 113 | new LongDeserializer()) 114 | OutputVerifier.compareKeyValue(outputRecord1, "Nine Inch Nails", new JLong(1)) 115 | testDriver.pipeInput(consumerRecordFactory2) 116 | val outputRecord2: ProducerRecord[String, JLong] = testDriver.readOutput(outputTopic, 117 | new StringDeserializer(), 118 | new LongDeserializer()) 119 | OutputVerifier.compareKeyValue(outputRecord2, "Nine Inch Nails", new JLong(2)) 120 | testDriver.pipeInput(consumerRecordFactory3) 121 | val outputRecord3: ProducerRecord[String, JLong] = testDriver.readOutput(outputTopic, 122 | new StringDeserializer(), 123 | new LongDeserializer()) 124 | OutputVerifier.compareKeyValue(outputRecord3, "Audioslave", new JLong(1)) 125 | testDriver.pipeInput(consumerRecordFactory4) 126 | val outputRecord4: ProducerRecord[String, JLong] = testDriver.readOutput(outputTopic, 127 | new StringDeserializer(), 128 | new LongDeserializer()) 129 | OutputVerifier.compareKeyValue(outputRecord4, "Nine Inch Nails", new JLong(3)) 130 | testDriver.pipeInput(consumerRecordFactory5) 131 | val outputRecord5: ProducerRecord[String, JLong] = testDriver.readOutput(outputTopic, 132 | new StringDeserializer(), 133 | new LongDeserializer()) 134 | OutputVerifier.compareKeyValue(outputRecord5, "Audioslave", new JLong(2)) 135 | } 136 | } 137 | } 138 | } 139 | -------------------------------------------------------------------------------- /src/test/scala/org/ardlema/enrichment/EnrichmentTopologySpec.scala: -------------------------------------------------------------------------------- 1 | package org.ardlema.enrichment 2 | 3 | import java.util.Properties 4 | 5 | import JavaSessionize.avro.{Sale, SaleAndStore} 6 | import io.confluent.kafka.schemaregistry.rest.SchemaRegistryConfig 7 | import io.confluent.kafka.serializers.AbstractKafkaAvroSerDeConfig 8 | import kafka.server.KafkaConfig 9 | import org.apache.kafka.common.serialization.{Serdes, StringDeserializer, StringSerializer} 10 | import org.apache.kafka.streams.test.{ConsumerRecordFactory, OutputVerifier} 11 | import org.apache.kafka.streams.{StreamsConfig, TopologyTestDriver} 12 | import org.ardlema.infra.{KafkaGlobalProperties, KafkaInfra} 13 | import org.scalatest.{FunSpec, Matchers} 14 | 15 | trait KafkaPropertiesEnrichment { 16 | 17 | val zookeeperHost = "localhost" 18 | val zookeeperPort = "2182" 19 | val zookeeperPortAsInt = zookeeperPort.toInt 20 | val kafkaHost = "localhost" 21 | val kafkaPort = "9093" 22 | val applicationKey = "enrichmentapp" 23 | val schemaRegistryHost = "localhost" 24 | val schemaRegistryPort = "8082" 25 | val inputTopic = "input-topic-enrichment" 26 | val outputTopic = "output-topic-enrichment-saleandstore" 27 | val outputTopicError = "output-topic-enrichment-error" 28 | val logDir = "/tmp/kafka-enrichment-logs" 29 | } 30 | 31 | class EnrichmentTopologySpec extends FunSpec with Matchers with KafkaGlobalProperties with KafkaPropertiesEnrichment with KafkaInfra { 32 | 33 | describe("The topology") { 34 | 35 | it("should enrichment the sales events") { 36 | val kafkaConfig = new Properties() 37 | kafkaConfig.put(bootstrapServerKey, s"""$kafkaHost:$kafkaPort""") 38 | kafkaConfig.put("zookeeper.host", zookeeperHost) 39 | kafkaConfig.put("zookeeper.port", zookeeperPort) 40 | kafkaConfig.put(schemaRegistryUrlKey, s"""http://$schemaRegistryHost:$schemaRegistryPort""") 41 | kafkaConfig.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()) 42 | kafkaConfig.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, EnrichmentTopologyBuilder.getAvroSaleSerde( 43 | schemaRegistryHost, 44 | schemaRegistryPort).getClass.getName) 45 | kafkaConfig.put(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, s"""http://$schemaRegistryHost:$schemaRegistryPort""") 46 | kafkaConfig.put(groupIdKey, groupIdValue) 47 | kafkaConfig.put(KafkaConfig.BrokerIdProp, defaultBrokerIdProp) 48 | kafkaConfig.put(KafkaConfig.HostNameProp, kafkaHost) 49 | kafkaConfig.put(KafkaConfig.PortProp, kafkaPort) 50 | kafkaConfig.put(KafkaConfig.NumPartitionsProp, defaultPartitions) 51 | kafkaConfig.put(KafkaConfig.AutoCreateTopicsEnableProp, defaultAutoCreateTopics) 52 | kafkaConfig.put(applicationIdKey, applicationKey) 53 | kafkaConfig.put(bootstrapServerKey, s"""$kafkaHost:$kafkaPort""") 54 | kafkaConfig.put(KafkaConfig.HostNameProp, kafkaHost) 55 | kafkaConfig.put(KafkaConfig.PortProp, kafkaPort) 56 | kafkaConfig.put(cacheMaxBytesBufferingKey, "0") 57 | kafkaConfig.put("offsets.topic.replication.factor", "1") 58 | kafkaConfig.put("log.dir", logDir) 59 | 60 | val schemaRegistryConfig = new Properties() 61 | schemaRegistryConfig.put(SchemaRegistryConfig.KAFKASTORE_BOOTSTRAP_SERVERS_CONFIG, s"""PLAINTEXT://$kafkaHost:$kafkaPort""") 62 | schemaRegistryConfig.put(SchemaRegistryConfig.KAFKASTORE_TOPIC_CONFIG, "schemaregistrytopic") 63 | schemaRegistryConfig.put("port", schemaRegistryPort) 64 | schemaRegistryConfig.put("avro.compatibility.level", "none") 65 | 66 | 67 | withKafkaServerAndSchemaRegistry(kafkaConfig, schemaRegistryConfig, zookeeperPortAsInt) { () => 68 | val testDriver = new TopologyTestDriver(EnrichmentTopologyBuilder.createTopology(schemaRegistryHost, 69 | schemaRegistryPort, 70 | inputTopic, 71 | outputTopic, 72 | outputTopicError), kafkaConfig) 73 | val recordFactory = new ConsumerRecordFactory(new StringSerializer(), 74 | EnrichmentTopologyBuilder.getAvroSaleSerde(schemaRegistryHost, schemaRegistryPort).serializer()) 75 | 76 | val sale1 = new Sale(1250.85F, "Jurassic Park T-shirt", 1234) 77 | val saleAndStore1 = new SaleAndStore(1250.85F, "Jurassic Park T-shirt", "C/ Narvaez, 78", "Madrid") 78 | val saleRecordFactory1 = recordFactory.create(inputTopic, "a", sale1) 79 | testDriver.pipeInput(saleRecordFactory1) 80 | val outputRecord1 = testDriver.readOutput(outputTopic, 81 | new StringDeserializer(), 82 | EnrichmentTopologyBuilder.getAvroSaleAndStoreSerde(schemaRegistryHost, schemaRegistryPort).deserializer()) 83 | OutputVerifier.compareKeyValue(outputRecord1, "a", saleAndStore1) 84 | 85 | val sale2 = new Sale(3434.85F, "Hat", 6666) 86 | val saleRecordFactory2 = recordFactory.create(inputTopic, "a", sale2) 87 | testDriver.pipeInput(saleRecordFactory2) 88 | val outputRecord2 = testDriver.readOutput(outputTopicError, 89 | new StringDeserializer(), 90 | EnrichmentTopologyBuilder.getAvroSaleSerde(schemaRegistryHost, schemaRegistryPort).deserializer()) 91 | OutputVerifier.compareKeyValue(outputRecord2, "a", sale2) 92 | } 93 | } 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /src/test/scala/org/ardlema/filtering/FilterTopologySpec.scala: -------------------------------------------------------------------------------- 1 | package org.ardlema.filtering 2 | 3 | import java.util.Properties 4 | 5 | import JavaSessionize.avro.Client 6 | import io.confluent.kafka.schemaregistry.rest.SchemaRegistryConfig 7 | import io.confluent.kafka.serializers.AbstractKafkaAvroSerDeConfig 8 | import kafka.server.KafkaConfig 9 | import org.apache.kafka.common.serialization.{Serdes, StringDeserializer, StringSerializer} 10 | import org.apache.kafka.streams.test.{ConsumerRecordFactory, OutputVerifier} 11 | import org.apache.kafka.streams.{StreamsConfig, TopologyTestDriver} 12 | import org.ardlema.infra.{KafkaGlobalProperties, KafkaInfra} 13 | import org.junit.Assert 14 | import org.scalatest.FunSpec 15 | 16 | trait KafkaPropertiesFilter { 17 | 18 | val zookeeperHost = "localhost" 19 | val zookeeperPort = "2181" 20 | val zookeeperPortAsInt = zookeeperPort.toInt 21 | val kafkaHost = "localhost" 22 | val kafkaPort = "9092" 23 | val applicationKey = "filtertapp" 24 | val schemaRegistryHost = "localhost" 25 | val schemaRegistryPort = "8081" 26 | val logDir = "/tmp/kafka-filter-logs" 27 | } 28 | 29 | class FilterTopologySpec extends FunSpec with KafkaGlobalProperties with KafkaPropertiesFilter with KafkaInfra { 30 | 31 | describe("The topology") { 32 | 33 | it("should filter the VIP clients") { 34 | val kafkaConfig = new Properties() 35 | kafkaConfig.put(bootstrapServerKey, s"""$kafkaHost:$kafkaPort""") 36 | kafkaConfig.put("zookeeper.host", zookeeperHost) 37 | kafkaConfig.put("zookeeper.port", zookeeperPort) 38 | kafkaConfig.put(schemaRegistryUrlKey, s"""http://$schemaRegistryHost:$schemaRegistryPort""") 39 | kafkaConfig.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()) 40 | kafkaConfig.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, FilterTopologyBuilder.getAvroSerde().getClass.getName) 41 | kafkaConfig.put(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, s"""http://$schemaRegistryHost:$schemaRegistryPort""") 42 | kafkaConfig.put(groupIdKey, groupIdValue) 43 | kafkaConfig.put(KafkaConfig.BrokerIdProp, defaultBrokerIdProp) 44 | kafkaConfig.put(KafkaConfig.HostNameProp, kafkaHost) 45 | kafkaConfig.put(KafkaConfig.PortProp, kafkaPort) 46 | kafkaConfig.put(KafkaConfig.NumPartitionsProp, defaultPartitions) 47 | kafkaConfig.put(KafkaConfig.AutoCreateTopicsEnableProp, defaultAutoCreateTopics) 48 | kafkaConfig.put(applicationIdKey, applicationKey) 49 | kafkaConfig.put(bootstrapServerKey, s"""$kafkaHost:$kafkaPort""") 50 | kafkaConfig.put(KafkaConfig.HostNameProp, kafkaHost) 51 | kafkaConfig.put(KafkaConfig.PortProp, kafkaPort) 52 | kafkaConfig.put(cacheMaxBytesBufferingKey, "0") 53 | kafkaConfig.put("offsets.topic.replication.factor", "1") 54 | kafkaConfig.put("log.dir", logDir) 55 | 56 | val schemaRegistryConfig = new Properties() 57 | schemaRegistryConfig.put(SchemaRegistryConfig.KAFKASTORE_BOOTSTRAP_SERVERS_CONFIG, s"""PLAINTEXT://$kafkaHost:$kafkaPort""") 58 | schemaRegistryConfig.put(SchemaRegistryConfig.KAFKASTORE_TOPIC_CONFIG, "schemaregistrytopic") 59 | schemaRegistryConfig.put("port", schemaRegistryPort) 60 | 61 | 62 | withKafkaServerAndSchemaRegistry(kafkaConfig, schemaRegistryConfig, zookeeperPortAsInt) { () => 63 | val testDriver = new TopologyTestDriver(FilterTopologyBuilder.createTopology(), kafkaConfig) 64 | val recordFactory = new ConsumerRecordFactory(new StringSerializer(), FilterTopologyBuilder.getAvroSerde().serializer()) 65 | 66 | val client1 = new Client("alberto", 39, true) 67 | val consumerRecordFactory1 = recordFactory.create("input-topic", "a", client1, 9999L) 68 | testDriver.pipeInput(consumerRecordFactory1) 69 | val outputRecord1 = testDriver.readOutput("output-topic", new StringDeserializer(), FilterTopologyBuilder.getAvroSerde().deserializer()) 70 | OutputVerifier.compareKeyValue(outputRecord1, "a", client1) 71 | 72 | val client2 = new Client("fran", 35, false) 73 | val consumerRecordFactory2 = recordFactory.create("input-topic", "b", client2, 9999L) 74 | testDriver.pipeInput(consumerRecordFactory2) 75 | val outputRecord2 = testDriver.readOutput("output-topic", new StringDeserializer(), FilterTopologyBuilder.getAvroSerde().deserializer()) 76 | Assert.assertNull(outputRecord2) 77 | 78 | val client3 = new Client("maria", 37, true) 79 | val consumerRecordFactory3 = recordFactory.create("input-topic", "c", client3, 9999L) 80 | testDriver.pipeInput(consumerRecordFactory3) 81 | val outputRecord3 = testDriver.readOutput("output-topic", new StringDeserializer(), FilterTopologyBuilder.getAvroSerde().deserializer()) 82 | OutputVerifier.compareKeyValue(outputRecord3, "c", client3) 83 | } 84 | } 85 | } 86 | } -------------------------------------------------------------------------------- /src/test/scala/org/ardlema/infra/KafkaGlobalProperties.scala: -------------------------------------------------------------------------------- 1 | package org.ardlema.infra 2 | 3 | trait KafkaGlobalProperties { 4 | 5 | val defaultAutoCreateTopics = "true" 6 | val defaultPartitions = "1" 7 | val defaultBrokerIdProp = "0" 8 | val bootstrapServerKey = "bootstrap.servers" 9 | val schemaRegistryUrlKey = "schema.registry.url" 10 | val keySerializerKey = "key.serializer" 11 | val keyDeserializerKey = "key.deserializer" 12 | val listenersKey = "listeners" 13 | val groupIdKey = "group.id" 14 | val groupIdValue = "prove_group" 15 | val valueSerializerKey = "value.serializer" 16 | val valueDeserializerKey = "value.deserializer" 17 | val applicationIdKey = "application.id" 18 | val autoCreateTopicsKey = "auto.create.topics.enable" 19 | val zookeeperPortConfig = "zookeeper.port" 20 | val zookeeperHostConfig = "zookeeper.host" 21 | val cacheMaxBytesBufferingKey = "cache.max.bytes.buffering" 22 | } 23 | -------------------------------------------------------------------------------- /src/test/scala/org/ardlema/infra/KafkaInfra.scala: -------------------------------------------------------------------------------- 1 | package org.ardlema.infra 2 | 3 | import java.util.Properties 4 | 5 | import io.confluent.kafka.schemaregistry.rest.{SchemaRegistryConfig, SchemaRegistryRestApplication} 6 | import kafka.server.{KafkaConfig, KafkaServer} 7 | import org.apache.curator.test.TestingServer 8 | 9 | 10 | trait KafkaInfra { 11 | 12 | def withKafkaServerAndSchemaRegistry(kafkaProperties: Properties, schemaRegistryProps: Properties, zookeeperPort: Int) (testFunction: () => Any): Unit = { 13 | val zookeeperServer = new TestingServer(zookeeperPort) 14 | zookeeperServer.start() 15 | kafkaProperties.put(KafkaConfig.ZkConnectProp, zookeeperServer.getConnectString) 16 | val kafkaConfig: KafkaConfig = new KafkaConfig(kafkaProperties) 17 | val kafkaServer = new KafkaServer(kafkaConfig) 18 | kafkaServer.startup() 19 | schemaRegistryProps.put(SchemaRegistryConfig.KAFKASTORE_CONNECTION_URL_CONFIG, zookeeperServer.getConnectString) 20 | val schemaRegistryConfig = new SchemaRegistryConfig(schemaRegistryProps) 21 | val restApp = new SchemaRegistryRestApplication(schemaRegistryConfig) 22 | val restServer = restApp.createServer() 23 | restServer.start() 24 | testFunction() 25 | kafkaServer.shutdown() 26 | zookeeperServer.stop() 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/test/scala/org/ardlema/joining/Clock.scala: -------------------------------------------------------------------------------- 1 | package org.ardlema.joining 2 | 3 | import java.time.Instant 4 | 5 | trait Clock { 6 | 7 | def now(): Instant 8 | 9 | } 10 | -------------------------------------------------------------------------------- /src/test/scala/org/ardlema/joining/GenericTimeStampExtractor.scala: -------------------------------------------------------------------------------- 1 | package org.ardlema.joining 2 | 3 | import JavaSessionize.avro.{Coupon, Purchase} 4 | import org.apache.kafka.clients.consumer.ConsumerRecord 5 | import org.apache.kafka.streams.processor.TimestampExtractor 6 | 7 | class GenericTimeStampExtractor extends TimestampExtractor { 8 | 9 | override def extract(record: ConsumerRecord[AnyRef, AnyRef], l: Long): Long = { 10 | val topic = record.topic() 11 | 12 | topic match { 13 | case "purchase-input" | "purchase-notjoin-input" => { 14 | val purchase = record.value().asInstanceOf[Purchase] 15 | purchase.getTimestamp 16 | } 17 | case "coupon-input" | "coupon-notjoin-input" => { 18 | val coupon = record.value().asInstanceOf[Coupon] 19 | coupon.getTimestamp 20 | } 21 | } 22 | 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/test/scala/org/ardlema/joining/JoinTopologySpec.scala: -------------------------------------------------------------------------------- 1 | package org.ardlema.joining 2 | 3 | import java.time.Duration 4 | import java.util.Properties 5 | 6 | import JavaSessionize.avro.{Coupon, Purchase} 7 | import io.confluent.kafka.schemaregistry.rest.SchemaRegistryConfig 8 | import io.confluent.kafka.serializers.AbstractKafkaAvroSerDeConfig 9 | import kafka.server.KafkaConfig 10 | import org.apache.kafka.clients.consumer.ConsumerConfig 11 | import org.apache.kafka.clients.producer.ProducerRecord 12 | import org.apache.kafka.common.serialization.{Serdes, StringDeserializer, StringSerializer} 13 | import org.apache.kafka.streams._ 14 | import org.apache.kafka.streams.test.ConsumerRecordFactory 15 | import org.ardlema.infra.{KafkaGlobalProperties, KafkaInfra} 16 | import org.junit.Assert 17 | import org.scalatest.{FlatSpec, Matchers} 18 | 19 | 20 | trait KafkaPropertiesJsonConfigurer extends KafkaGlobalProperties { 21 | 22 | def getKafkaConfigProps(kafkaHost: String, 23 | kafkaPort: String, 24 | zookeeperHost: String, 25 | zookeeperPort: String, 26 | schemaRegistryHost: String, 27 | schemaRegistryPort: String, 28 | applicationKey: String, 29 | logDir: String): Properties = { 30 | val kafkaConfig = new Properties() 31 | kafkaConfig.put(bootstrapServerKey, s"""$kafkaHost:$kafkaPort""") 32 | kafkaConfig.put("zookeeper.host", zookeeperHost) 33 | kafkaConfig.put("zookeeper.port", zookeeperPort) 34 | kafkaConfig.put(schemaRegistryUrlKey, s"""http://$schemaRegistryHost:$schemaRegistryPort""") 35 | kafkaConfig.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()) 36 | kafkaConfig.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, JoinTopologyBuilder.getAvroPurchaseSerde(schemaRegistryHost, schemaRegistryPort).getClass.getName) 37 | kafkaConfig.put(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, s"""http://$schemaRegistryHost:$schemaRegistryPort""") 38 | kafkaConfig.put(groupIdKey, groupIdValue) 39 | kafkaConfig.put(KafkaConfig.BrokerIdProp, defaultBrokerIdProp) 40 | kafkaConfig.put(KafkaConfig.HostNameProp, kafkaHost) 41 | kafkaConfig.put(KafkaConfig.PortProp, kafkaPort) 42 | kafkaConfig.put(KafkaConfig.NumPartitionsProp, defaultPartitions) 43 | kafkaConfig.put(KafkaConfig.AutoCreateTopicsEnableProp, defaultAutoCreateTopics) 44 | kafkaConfig.put(applicationIdKey, applicationKey) 45 | kafkaConfig.put(bootstrapServerKey, s"""$kafkaHost:$kafkaPort""") 46 | kafkaConfig.put(KafkaConfig.HostNameProp, kafkaHost) 47 | kafkaConfig.put(KafkaConfig.PortProp, kafkaPort) 48 | kafkaConfig.put(cacheMaxBytesBufferingKey, "0") 49 | kafkaConfig.put("offsets.topic.replication.factor", "1") 50 | kafkaConfig.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, "1000") 51 | kafkaConfig.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest") 52 | kafkaConfig.put(StreamsConfig.DEFAULT_TIMESTAMP_EXTRACTOR_CLASS_CONFIG, new GenericTimeStampExtractor().getClass.getName) 53 | kafkaConfig.put("log.dir", logDir) 54 | kafkaConfig 55 | } 56 | 57 | def getSchemaRegistryProps(kafkaHost: String, kafkaPort: String, schemaRegistryPort: String): Properties = { 58 | val schemaRegistryConfig = new Properties() 59 | schemaRegistryConfig.put(SchemaRegistryConfig.KAFKASTORE_BOOTSTRAP_SERVERS_CONFIG, s"""PLAINTEXT://$kafkaHost:$kafkaPort""") 60 | schemaRegistryConfig.put(SchemaRegistryConfig.KAFKASTORE_TOPIC_CONFIG, "schemaregistrytopic") 61 | schemaRegistryConfig.put("port", schemaRegistryPort) 62 | 63 | schemaRegistryConfig 64 | } 65 | } 66 | 67 | trait KafkaPropertiesJoin { 68 | 69 | val zookeeperHost = "localhost" 70 | val zookeeperPort = "2183" 71 | val zookeeperPortAsInt = zookeeperPort.toInt 72 | val kafkaHost = "localhost" 73 | val kafkaPort = "9094" 74 | val applicationKey = "joinapp" 75 | val schemaRegistryHost = "localhost" 76 | val schemaRegistryPort = "8083" 77 | val purchaseInputTopic = "purchase-input" 78 | val couponInputTopic = "coupon-input" 79 | val outputTopic = "joined-output" 80 | val logDir = "/tmp/kafka-join-logs" 81 | } 82 | 83 | trait KafkaPropertiesNotJoin { 84 | 85 | val zookeeperHost = "localhost" 86 | val zookeeperPort = "2185" 87 | val zookeeperPortAsInt = zookeeperPort.toInt 88 | val kafkaHost = "localhost" 89 | val kafkaPort = "9096" 90 | val applicationKey = "notjoinapp" 91 | val schemaRegistryHost = "localhost" 92 | val schemaRegistryPort = "8085" 93 | val purchaseInputTopic = "purchase-notjoin-input" 94 | val couponInputTopic = "coupon-notjoin-input" 95 | val outputTopic = "not-joined-output" 96 | val logDir = "/tmp/kafka-not-join-logs" 97 | } 98 | 99 | class JoinTopologySpec 100 | extends FlatSpec 101 | with KafkaPropertiesJoin 102 | with KafkaInfra 103 | with SystemClock 104 | with Matchers 105 | with KafkaPropertiesJsonConfigurer { 106 | 107 | "The topology" should "join sale events with the promo ones and apply the discounts" in new KafkaPropertiesJoin { 108 | val kafkaConfig = getKafkaConfigProps(kafkaHost, kafkaPort, zookeeperHost, zookeeperPort, schemaRegistryHost, schemaRegistryPort, applicationKey, logDir) 109 | 110 | val schemaRegistryConfig = getSchemaRegistryProps(kafkaHost, kafkaPort, schemaRegistryPort) 111 | 112 | withKafkaServerAndSchemaRegistry(kafkaConfig, schemaRegistryConfig, zookeeperPortAsInt) { () => 113 | val testDriver = new TopologyTestDriver(JoinTopologyBuilder.createTopology(schemaRegistryHost, 114 | schemaRegistryPort, 115 | couponInputTopic, 116 | purchaseInputTopic, 117 | outputTopic), kafkaConfig) 118 | val purchaseRecordFactory = new ConsumerRecordFactory(new StringSerializer(), 119 | JoinTopologyBuilder.getAvroPurchaseSerde( 120 | schemaRegistryHost, 121 | schemaRegistryPort).serializer()) 122 | val couponRecordFactory = new ConsumerRecordFactory(new StringSerializer(), 123 | JoinTopologyBuilder.getAvroCouponSerde( 124 | schemaRegistryHost, 125 | schemaRegistryPort).serializer()) 126 | 127 | 128 | val coupon1Time = now() 129 | val coupon1 = new Coupon(coupon1Time.toEpochMilli, "1234", 10F) 130 | // Purchase within the five minutes after the coupon - The discount should be applied 131 | val coupon1TimePlusThreeMinutes = coupon1Time.plus(Duration.ofMinutes(3)) 132 | val purchase1 = new Purchase(coupon1TimePlusThreeMinutes.toEpochMilli, "1234", "Red Glass", 25.00F) 133 | val couponRecordFactory1 = couponRecordFactory.create(couponInputTopic, "c1", coupon1) 134 | val purchaseRecordFactory1 = purchaseRecordFactory.create(purchaseInputTopic, "p1", purchase1) 135 | 136 | testDriver.pipeInput(couponRecordFactory1) 137 | testDriver.pipeInput(purchaseRecordFactory1) 138 | val outputRecord1: ProducerRecord[String, Purchase] = testDriver.readOutput(outputTopic, 139 | new StringDeserializer(), 140 | JoinTopologyBuilder.getAvroPurchaseSerde( 141 | schemaRegistryHost, 142 | schemaRegistryPort).deserializer()) 143 | outputRecord1.value().getAmount should be(22.50F) 144 | 145 | testDriver.close() 146 | } 147 | } 148 | 149 | "The topology" should "not join sale events when the purchase exceeds the timeout" in new KafkaPropertiesNotJoin { 150 | val kafkaConfig = getKafkaConfigProps(kafkaHost, kafkaPort, zookeeperHost, zookeeperPort, schemaRegistryHost, schemaRegistryPort, applicationKey, logDir) 151 | 152 | val schemaRegistryConfig = getSchemaRegistryProps(kafkaHost, kafkaPort, schemaRegistryPort) 153 | 154 | 155 | withKafkaServerAndSchemaRegistry(kafkaConfig, schemaRegistryConfig, zookeeperPortAsInt) { () => 156 | val testDriver = new TopologyTestDriver(JoinTopologyBuilder.createTopology(schemaRegistryHost, 157 | schemaRegistryPort, 158 | couponInputTopic, 159 | purchaseInputTopic, 160 | outputTopic), kafkaConfig) 161 | val purchaseRecordFactory = new ConsumerRecordFactory(new StringSerializer(), 162 | JoinTopologyBuilder.getAvroPurchaseSerde( 163 | schemaRegistryHost, 164 | schemaRegistryPort).serializer()) 165 | val couponRecordFactory = new ConsumerRecordFactory(new StringSerializer(), 166 | JoinTopologyBuilder.getAvroCouponSerde( 167 | schemaRegistryHost, 168 | schemaRegistryPort).serializer()) 169 | 170 | 171 | val coupon2Time = now() 172 | val coupon2 = new Coupon(coupon2Time.toEpochMilli, "5678", 10F) 173 | // Purchase after the five minutes of the coupon release - The discount should NOT be applied 174 | val coupon2TimePlusEightMinutes = coupon2Time.plus(Duration.ofMinutes(8)) 175 | val purchase2 = new Purchase(coupon2TimePlusEightMinutes.toEpochMilli, "5678", "White Glass", 25.00F) 176 | val couponRecordFactory2 = couponRecordFactory.create(couponInputTopic, "c2", coupon2) 177 | val purchaseRecordFactory2 = purchaseRecordFactory.create(purchaseInputTopic, "p2", purchase2) 178 | testDriver.pipeInput(couponRecordFactory2) 179 | testDriver.pipeInput(purchaseRecordFactory2) 180 | val outputRecord2 = testDriver.readOutput(outputTopic, 181 | new StringDeserializer(), 182 | JoinTopologyBuilder.getAvroPurchaseSerde( 183 | schemaRegistryHost, 184 | schemaRegistryPort).deserializer()) 185 | Assert.assertNull(outputRecord2) 186 | 187 | testDriver.close() 188 | } 189 | } 190 | } 191 | 192 | 193 | 194 | 195 | -------------------------------------------------------------------------------- /src/test/scala/org/ardlema/joining/SystemClock.scala: -------------------------------------------------------------------------------- 1 | package org.ardlema.joining 2 | 3 | import java.time.Instant 4 | 5 | trait SystemClock extends Clock { 6 | 7 | def now(): Instant = Instant.now() 8 | 9 | } 10 | --------------------------------------------------------------------------------