├── README.md ├── docs └── zookeeper_tutorial.pdf └── labs ├── 01-starting-kafka.md ├── 02-create-topic.md ├── 03-producer-consumer.md └── 04-kafka-examples ├── bin ├── java-producer-consumer-demo.sh └── kafka-run-class.sh ├── kafka-examples.iml ├── pom.xml └── src └── main └── java └── kafka └── examples ├── Consumer.java ├── KafkaConsumerProducerDemo.java ├── KafkaProperties.java └── Producer.java /README.md: -------------------------------------------------------------------------------- 1 | # kafka-learning 2 | 3 |

4 | 5 |

6 | 7 | ## Overview 8 | - [Home](https://kafka.apache.org/) 9 | - [QuickStart](https://kafka.apache.org/quickstart) 10 | 11 | ## Tutorials 12 | - [Cloudurable Kafka Tutorial](http://cloudurable.com/blog/kafka-tutorial/index.html) 13 | - [Learning Journal Kafka Tutorial](https://www.youtube.com/playlist?list=PLkz1SCf5iB4enAR00Z46JwY9GGkaS2NON) 14 | - [Udemy Apache Kafka Series - Learning Apache Kafka for Beginners](https://goo.gl/GaCKQN) (All Level) 15 | - [Udemy Apache Kafka Series - Kafka Connect Hands-on Learning](https://goo.gl/wLLLY9) (Intermediate) 16 | - [Udemy Apache Kafka Series - Kafka Streams for Data Processing](https://goo.gl/bro314) (Intermediate) 17 | - [Udemy Apache Kafka Series - Kafka Cluster Setup & Administration](https://goo.gl/1uYAuU) (Expert) 18 | - [Udemy Apache Kafka Series - Confluent Schema Registry & REST Proxy](https://goo.gl/XgWcVz) (Intermediate) 19 | 20 | 21 | ## Books 22 | - [Kafka: The Definitive Guide: Real-Time Data and Stream Processing at Scale](https://www.amazon.com/Kafka-Definitive-Real-Time-Stream-Processing/dp/1491936169) 23 | - [Kafka: The Definitive Guide (Preview Edition)](https://www.confluent.io/resources/kafka-definitive-guide-preview-edition/) 24 | 25 | ## Videos 26 | - [ETL Is Dead, Long Live Streams: real-time streams w/ Apache Kafka](https://www.youtube.com/watch?v=I32hmY4diFY) (2017.02) 27 | - [Airstream: Spark Streaming At Airbnb](https://youtu.be/tJ1uIHQtoNc) (2016.07) 28 | - [Developing Real-Time Data Pipelines with Apache Kafka](https://www.youtube.com/watch?v=GRPLRONVDWY) (2016.03) 29 | - [Building Realtime Data Pipelines with Kafka Connect and Spark Streaming](https://youtu.be/wMLAlJimPzk) (2016.02) 30 | - [Putting Apache Kafka to Use for Event Streams](https://www.youtube.com/watch?v=el-SqcZLZlI) (2015.04) 31 | - [Introducing Exactly Once Semantics in Apache Kafka with Matthias J. Sax](https://www.youtube.com/watch?v=Wo9jlaz8h0k) (2017.06) 32 | - [Exactly Once Streaming from Kafka - Cody Koeninger (Kixer)](https://www.youtube.com/watch?v=fXnNEq1v3VA) (2015.06) 33 | 34 | ## Articles 35 | - [Apache Kafka Goes 1.0](https://www.confluent.io/blog/apache-kafka-goes-1-0/) 36 | - [Exactly-once Semantics are Possible: Here’s How Kafka Does it](https://www.confluent.io/blog/exactly-once-semantics-are-possible-heres-how-apache-kafka-does-it/) 37 | - [Broker Config](https://kafka.apache.org/documentation/#brokerconfigs) 38 | - [How to choose the number of topics/partitions in a Kafka cluster?](https://www.confluent.io/blog/how-to-choose-the-number-of-topicspartitions-in-a-kafka-cluster/) (2015.03) 39 | - [min.insync.replicas gotchas: What does it actually do?](https://logallthethings.com/2016/07/11/min-insync-replicas-what-does-it-actually-do/) (2016.07) 40 | - [How Kafka’s Storage Internals Work](https://thehoard.blog/how-kafkas-storage-internals-work-3a29b02e026) (2016.10) 41 | - Getting Started with Apache Kafka for the Baffled [Part 1](http://www.shayne.me/blog/2015/2015-06-16-everything-about-kafka-part-1/), [Part 2](http://www.shayne.me/blog/2015/2015-06-25-everything-about-kafka-part-2/) (2015.06) 42 | 43 | ## Tools 44 | - [Kafka cluster as Kubernetes StatefulSet](https://github.com/Yolean/kubernetes-kafka) 45 | - [Kafka Manager](https://github.com/yahoo/kafka-manager) 46 | - [Kafka Docker](https://github.com/wurstmeister/kafka-docker) 47 | 48 | ## Links 49 | - [Wiki](https://cwiki.apache.org/confluence/display/KAFKA/Index) 50 | - [FAQ](https://cwiki.apache.org/confluence/display/KAFKA/FAQ) 51 | - [Committers](http://kafka.apache.org/committers) 52 | - [Ecosystem](https://cwiki.apache.org/confluence/display/KAFKA/Ecosystem) 53 | - [Papers & Talks](https://cwiki.apache.org/confluence/display/KAFKA/Kafka+papers+and+presentations) 54 | - [Performance Testing](https://cwiki.apache.org/confluence/display/KAFKA/Performance+testing) 55 | - [Awesome Kafka](https://github.com/infoslack/awesome-kafka) 56 | -------------------------------------------------------------------------------- /docs/zookeeper_tutorial.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziwon/kafka-learning/76128b32c88f057d06584528fee04c0965d033c0/docs/zookeeper_tutorial.pdf -------------------------------------------------------------------------------- /labs/01-starting-kafka.md: -------------------------------------------------------------------------------- 1 | Docker for Mac >= 1.12, Linux, Docker for Windows 10: 2 | 3 | docker run --rm -it \ 4 | -p 2181:2181 -p 3030:3030 -p 8081:8081 \ 5 | -p 8082:8082 -p 8083:8083 -p 9092:9092 \ 6 | -e ADV_HOST=127.0.0.1 \ 7 | landoop/fast-data-dev 8 | 9 | Docker toolbox: 10 | 11 | docker run --rm -it \ 12 | -p 2181:2181 -p 3030:3030 -p 8081:8081 \ 13 | -p 8082:8082 -p 8083:8083 -p 9092:9092 \ 14 | -e ADV_HOST=192.168.99.100 \ 15 | landoop/fast-data-dev 16 | 17 | Kafka command lines tools: 18 | 19 | docker run --rm -it --net=host landoop/fast-data-dev bash 20 | 21 | 22 | Results: 23 | 24 | docker run --rm -it \ 25 | -p 2181:2181 -p 3030:3030 -p 8081:8081 \ 26 | -p 8082:8082 -p 8083:8083 -p 9092:9092 \ 27 | -e ADV_HOST=127.0.0.1 \ 28 | landoop/fast-data-dev 29 | Setting advertised host to 127.0.0.1. 30 | Operating system RAM available is 1755 MiB, which is less than the lowest 31 | recommended of 5120 MiB. Your system performance may be seriously impacted. 32 | Starting services. 33 | This is landoop’s fast-data-dev. Kafka 0.10.2.1, Confluent OSS 3.2.1. 34 | You may visit http://127.0.0.1:3030 in about a minute. 35 | 2017-05-20 02:23:39,015 CRIT Supervisor running as root (no user in config file) 36 | 2017-05-20 02:23:39,015 WARN No file matches via include "/etc/supervisord.d/*.conf" 37 | 2017-05-20 02:23:39,020 INFO supervisord started with pid 6 38 | 2017-05-20 02:23:40,028 INFO spawned: 'sample-data' with pid 94 39 | 2017-05-20 02:23:40,032 INFO spawned: 'zookeeper' with pid 95 40 | 2017-05-20 02:23:40,041 INFO spawned: 'caddy' with pid 97 41 | 2017-05-20 02:23:40,044 INFO spawned: 'broker' with pid 99 42 | 2017-05-20 02:23:40,047 INFO spawned: 'smoke-tests' with pid 103 43 | 2017-05-20 02:23:40,050 INFO spawned: 'connect-distributed' with pid 106 44 | 2017-05-20 02:23:40,053 INFO spawned: 'logs-to-kafka' with pid 107 45 | 2017-05-20 02:23:40,057 INFO spawned: 'schema-registry' with pid 109 46 | 2017-05-20 02:23:40,060 INFO spawned: 'rest-proxy' with pid 110 47 | 2017-05-20 02:23:41,112 INFO success: sample-data entered RUNNING state, process has stayed up for > than 1 seconds (startsecs) 48 | 2017-05-20 02:23:41,112 INFO success: zookeeper entered RUNNING state, process has stayed up for > than 1 seconds (startsecs) 49 | 2017-05-20 02:23:41,113 INFO success: caddy entered RUNNING state, process has stayed up for > than 1 seconds (startsecs) 50 | 2017-05-20 02:23:41,113 INFO success: broker entered RUNNING state, process has stayed up for > than 1 seconds (startsecs) 51 | 2017-05-20 02:23:41,113 INFO success: smoke-tests entered RUNNING state, process has stayed up for > than 1 seconds (startsecs) 52 | 2017-05-20 02:23:41,113 INFO success: connect-distributed entered RUNNING state, process has stayed up for > than 1 seconds (startsecs) 53 | 2017-05-20 02:23:41,113 INFO success: logs-to-kafka entered RUNNING state, process has stayed up for > than 1 seconds (startsecs) 54 | 2017-05-20 02:23:41,114 INFO success: schema-registry entered RUNNING state, process has stayed up for > than 1 seconds (startsecs) 55 | 2017-05-20 02:23:41,114 INFO success: rest-proxy entered RUNNING state, process has stayed up for > than 1 seconds (startsecs) 56 | 2017-05-20 02:23:42,757 INFO exited: schema-registry (exit status 1; not expected) 57 | 2017-05-20 02:23:42,765 INFO spawned: 'schema-registry' with pid 291 58 | 2017-05-20 02:23:43,055 INFO exited: rest-proxy (exit status 1; not expected) 59 | 2017-05-20 02:23:43,537 INFO spawned: 'rest-proxy' with pid 330 60 | 2017-05-20 02:23:44,147 INFO success: schema-registry entered RUNNING state, process has stayed up for > than 1 seconds (startsecs) 61 | 2017-05-20 02:23:44,562 INFO success: rest-proxy entered RUNNING state, process has stayed up for > than 1 seconds (startsecs) 62 | -------------------------------------------------------------------------------- /labs/02-create-topic.md: -------------------------------------------------------------------------------- 1 | To get Kafka command lines tools: 2 | 3 | docker run --rm -it --net=host landoop/fast-data-dev bash 4 | 5 | Query all the comands in this Kafka box: 6 | 7 | root@fast-data-dev / $ kafka- 8 | kafka-acls kafka-replica-verification 9 | kafka-avro-console-consumer kafka-rest-run-class 10 | kafka-avro-console-producer kafka-rest-start 11 | kafka-configs kafka-rest-stop 12 | kafka-console-consumer kafka-rest-stop-service 13 | kafka-console-producer kafka-run-class 14 | kafka-consumer-groups kafka-server-start 15 | kafka-consumer-offset-checker kafka-server-stop 16 | kafka-consumer-perf-test kafka-simple-consumer-shell 17 | kafka-mirror-maker kafka-streams-application-reset 18 | kafka-preferred-replica-election kafka-topics 19 | kafka-producer-perf-test kafka-verifiable-consumer 20 | kafka-reassign-partitions kafka-verifiable-producer 21 | kafka-replay-log-producer 22 | 23 | Options for `kafta-topics`: 24 | 25 | root@fast-data-dev / $ kafka-topics 26 | Create, delete, describe, or change a topic. 27 | Option Description 28 | ------ ----------- 29 | --alter Alter the number of partitions, 30 | replica assignment, and/or 31 | configuration for the topic. 32 | --config A topic configuration override for the 33 | topic being created or altered.The 34 | following is a list of valid 35 | configurations: 36 | cleanup.policy 37 | compression.type 38 | delete.retention.ms 39 | file.delete.delay.ms 40 | flush.messages 41 | flush.ms 42 | follower.replication.throttled. 43 | replicas 44 | index.interval.bytes 45 | leader.replication.throttled.replicas 46 | max.message.bytes 47 | message.format.version 48 | message.timestamp.difference.max.ms 49 | message.timestamp.type 50 | min.cleanable.dirty.ratio 51 | min.compaction.lag.ms 52 | min.insync.replicas 53 | preallocate 54 | retention.bytes 55 | retention.ms 56 | segment.bytes 57 | segment.index.bytes 58 | segment.jitter.ms 59 | segment.ms 60 | unclean.leader.election.enable 61 | See the Kafka documentation for full 62 | details on the topic configs. 63 | --create Create a new topic. 64 | --delete Delete a topic 65 | --delete-config A topic configuration override to be 66 | removed for an existing topic (see 67 | the list of configurations under the 68 | --config option). 69 | --describe List details for the given topics. 70 | --disable-rack-aware Disable rack aware replica assignment 71 | --force Suppress console prompts 72 | --help Print usage information. 73 | --if-exists if set when altering or deleting 74 | topics, the action will only execute 75 | if the topic exists 76 | --if-not-exists if set when creating topics, the 77 | action will only execute if the 78 | topic does not already exist 79 | --list List all available topics. 80 | --partitions The number of partitions for the topic 81 | being created or altered (WARNING: 82 | If partitions are increased for a 83 | topic that has a key, the partition 84 | logic or ordering of the messages 85 | will be affected 86 | --replica-assignment 91 | --replication-factor partition in the topic being created. 93 | --topic The topic to be create, alter or 94 | describe. Can also accept a regular 95 | expression except for --create option 96 | --topics-with-overrides if set when describing topics, only 97 | show topics that have overridden 98 | configs 99 | --unavailable-partitions if set when describing topics, only 100 | show partitions whose leader is not 101 | available 102 | --under-replicated-partitions if set when describing topics, only 103 | show under replicated partitions 104 | --zookeeper REQUIRED: The connection string for 105 | the zookeeper connection in the form 106 | host:port. Multiple URLS can be 107 | given to allow fail-over. 108 | 109 | 110 | Create a topic: 111 | 112 | root@fast-data-dev / $ kafka-topics --zookeeper 127.0.0.1:2181 --create --topic first_topic --partition 3 --replication-factor 1 113 | WARNING: Due to limitations in metric names, topics with a period ('.') or underscore ('_') could collide. To avoid issues it is best to use either, but not both. 114 | Created topic "first_topic". 115 | 116 | Describe a topic: 117 | 118 | root@fast-data-dev / $ kafka-topics --zookeeper 127.0.0.1:2181 --describe --topic first_topic 119 | Topic:first_topic PartitionCount:3 ReplicationFactor:1 Configs: 120 | Topic: first_topic Partition: 0 Leader: 0 Replicas: 0 Isr: 0 121 | Topic: first_topic Partition: 1 Leader: 0 Replicas: 0 Isr: 0 122 | Topic: first_topic Partition: 2 Leader: 0 Replicas: 0 Isr: 0 123 | 124 | Delete a topic: 125 | 126 | root@fast-data-dev / kafka-topics --zookeeper 127.0.0.1:2181 --topic second_topic --delete 127 | Topic second_topic is marked for deletion. 128 | Note: This will have no impact if delete.topic.enable is not set to true. 129 | -------------------------------------------------------------------------------- /labs/03-producer-consumer.md: -------------------------------------------------------------------------------- 1 | Options for `kafka-console-producer`: 2 | 3 | Read data from standard input and publish it to Kafka. 4 | Option Description 5 | ------ ----------- 6 | --batch-size Number of messages to send in a single 7 | batch if they are not being sent 8 | synchronously. (default: 200) 9 | --broker-list REQUIRED: The broker list string in 10 | the form HOST1:PORT1,HOST2:PORT2. 11 | --compression-codec [String: The compression codec: either 'none', 12 | compression-codec] 'gzip', 'snappy', or 'lz4'.If 13 | specified without value, then it 14 | defaults to 'gzip' 15 | --key-serializer implementation to use for 17 | serializing keys. (default: kafka. 18 | serializer.DefaultEncoder) 19 | --line-reader The class name of the class to use for 20 | reading lines from standard in. By 21 | default each line is read as a 22 | separate message. (default: kafka. 23 | tools. 24 | ConsoleProducer$LineMessageReader) 25 | --max-block-ms block for during a send request 27 | (default: 60000) 28 | --max-memory-bytes to buffer records waiting to be sent 30 | to the server. (default: 33554432) 31 | --max-partition-memory-bytes partition. When records are received 33 | which are smaller than this size the 34 | producer will attempt to 35 | optimistically group them together 36 | until this size is reached. 37 | (default: 16384) 38 | --message-send-max-retries Brokers can fail receiving the message 39 | for multiple reasons, and being 40 | unavailable transiently is just one 41 | of them. This property specifies the 42 | number of retires before the 43 | producer give up and drop this 44 | message. (default: 3) 45 | --metadata-expiry-ms after which we force a refresh of 47 | metadata even if we haven't seen any 48 | leadership changes. (default: 300000) 49 | --old-producer Use the old producer implementation. 50 | --producer-property properties in the form key=value to 52 | the producer. 53 | --producer.config Producer config properties file. Note 54 | that [producer-property] takes 55 | precedence over this config. 56 | --property A mechanism to pass user-defined 57 | properties in the form key=value to 58 | the message reader. This allows 59 | custom configuration for a user- 60 | defined message reader. 61 | --queue-enqueuetimeout-ms 2147483647) 63 | --queue-size If set and the producer is running in 64 | asynchronous mode, this gives the 65 | maximum amount of messages will 66 | queue awaiting sufficient batch 67 | size. (default: 10000) 68 | --request-required-acks requests (default: 1) 70 | --request-timeout-ms requests. Value must be non-negative 72 | and non-zero (default: 1500) 73 | --retry-backoff-ms Before each retry, the producer 74 | refreshes the metadata of relevant 75 | topics. Since leader election takes 76 | a bit of time, this property 77 | specifies the amount of time that 78 | the producer waits before refreshing 79 | the metadata. (default: 100) 80 | --socket-buffer-size The size of the tcp RECV size. 81 | (default: 102400) 82 | --sync If set message send requests to the 83 | brokers are synchronously, one at a 84 | time as they arrive. 85 | --timeout If set and the producer is running in 86 | asynchronous mode, this gives the 87 | maximum amount of time a message 88 | will queue awaiting sufficient batch 89 | size. The value is given in ms. 90 | (default: 1000) 91 | --topic REQUIRED: The topic id to produce 92 | messages to. 93 | --value-serializer implementation to use for 95 | serializing values. (default: kafka. 96 | serializer.DefaultEncoder) 97 | 98 | 99 | Publish data to a topic: 100 | 101 | root@fast-data-dev / $ echo "your message" | kafka-console-producer --broker-list 127.0.0.1:9092 --topic first_topic 102 | 103 | Options for `kafka-console-consumer`: 104 | 105 | The console consumer is a tool that reads data from Kafka and outputs it to standard output. 106 | Option Description 107 | ------ ----------- 108 | --blacklist Blacklist of topics to exclude from 109 | consumption. 110 | --bootstrap-server used): The server to connect to. 112 | --consumer-property properties in the form key=value to 114 | the consumer. 115 | --consumer.config Consumer config properties file. Note 116 | that [consumer-property] takes 117 | precedence over this config. 118 | --csv-reporter-enabled If set, the CSV metrics reporter will 119 | be enabled 120 | --delete-consumer-offsets If specified, the consumer path in 121 | zookeeper is deleted when starting up 122 | --enable-systest-events Log lifecycle events of the consumer 123 | in addition to logging consumed 124 | messages. (This is specific for 125 | system tests.) 126 | --formatter The name of a class to use for 127 | formatting kafka messages for 128 | display. (default: kafka.tools. 129 | DefaultMessageFormatter) 130 | --from-beginning If the consumer does not already have 131 | an established offset to consume 132 | from, start with the earliest 133 | message present in the log rather 134 | than the latest message. 135 | --key-deserializer 137 | --max-messages The maximum number of messages to 138 | consume before exiting. If not set, 139 | consumption is continual. 140 | --metrics-dir this parameter isset, the csv 142 | metrics will be outputed here 143 | --new-consumer Use the new consumer implementation. 144 | This is the default. 145 | --offset The offset id to consume from (a non- 146 | negative number), or 'earliest' 147 | which means from beginning, or 148 | 'latest' which means from end 149 | (default: latest) 150 | --partition The partition to consume from. 151 | --property The properties to initialize the 152 | message formatter. 153 | --skip-message-on-error If there is an error when processing a 154 | message, skip it instead of halt. 155 | --timeout-ms If specified, exit if no message is 156 | available for consumption for the 157 | specified interval. 158 | --topic The topic id to consume on. 159 | --value-deserializer 161 | --whitelist Whitelist of topics to include for 162 | consumption. 163 | --zookeeper REQUIRED (only when using old 164 | consumer): The connection string for 165 | the zookeeper connection in the form 166 | host:port. Multiple URLS can be 167 | given to allow fail-over. 168 | 169 | 170 | Consuming data from a topic: 171 | 172 | root@fast-data-dev / $ kafka-console-consumer --bootstrap-server 127.0.0.1:9092 --topic first_topic --from-beginning 173 | 174 | 175 | 176 | Consuming data from a topic using `consumer-property`: 177 | 178 | root@fast-data-dev / $ kafka-console-consumer --bootstrap-server 127.0.0.1:9092 --topic first_topic --consumer-property group.id=testgroup --from-beginning 179 | 180 | 181 | Java Producer Demo: 182 | 183 | 184 | ```java 185 | import org.apache.kafka.clients.producer.Producer; 186 | import org.apache.kafka.clients.producer.ProducerRecord; 187 | import org.apache.kafka.common.serialization.IntegerDeserializer; 188 | import org.apache.kafka.common.serialization.StringSerializer; 189 | 190 | import java.util.Properties; 191 | 192 | public class KafkaProducerDemo { 193 | public static void main(String[] args) { 194 | Properties properties = new Properties(); 195 | 196 | // kafka bootstrap server 197 | properties.setProperty("bootstrap.servers", "127.0.0.1:9092"); 198 | properties.setProperty("key.serializer", StringSerializer.class.getName()); 199 | properties.setProperty("value.serializer", StringSerializer.class.getName()); 200 | // producer acks 201 | properties.setProperty("acks", "1"); 202 | properties.setProperty("retries", "3"); 203 | properties.setProperty("linger.ms", "1"); 204 | 205 | Producer producer = new org.apache.kafka.clients.producer.KafkaProducer(properties); 206 | 207 | 208 | for (int key=0; key < 10; key++){ 209 | ProducerRecord producerRecord = 210 | new ProducerRecord("second_topic", Integer.toString(key), "message that has key: " + Integer.toString(key)); 211 | producer.send(producerRecord); 212 | } 213 | 214 | 215 | 216 | producer.close(); 217 | } 218 | } 219 | ``` 220 | 221 | 222 | Java Consumer Demo: 223 | 224 | ```java 225 | import org.apache.kafka.clients.consumer.ConsumerRecord; 226 | import org.apache.kafka.clients.consumer.ConsumerRecords; 227 | import org.apache.kafka.clients.consumer.KafkaConsumer; 228 | import org.apache.kafka.common.serialization.StringDeserializer; 229 | 230 | import java.util.Arrays; 231 | import java.util.Properties; 232 | 233 | public class KafkaConsumerDemo { 234 | public static void main(String[] args) { 235 | Properties properties = new Properties(); 236 | 237 | // kafka bootstrap server 238 | properties.setProperty("bootstrap.servers", "127.0.0.1:9092"); 239 | properties.setProperty("key.deserializer", StringDeserializer.class.getName()); 240 | properties.setProperty("value.deserializer", StringDeserializer.class.getName()); 241 | 242 | properties.setProperty("group.id", "test"); 243 | properties.setProperty("enable.auto.commit", "false"); 244 | // properties.setProperty("auto.commit.interval.ms", "1000"); 245 | properties.setProperty("auto.offset.reset", "earliest"); 246 | 247 | KafkaConsumer kafkaConsumer = new KafkaConsumer(properties); 248 | kafkaConsumer.subscribe(Arrays.asList("second_topic")); 249 | 250 | while(true) { 251 | ConsumerRecords consumerRecords = kafkaConsumer.poll(100); 252 | for (ConsumerRecord consumerRecord : consumerRecords) { 253 | // consumerRecord.value(); 254 | // consumerRecord.key(); 255 | // consumerRecord.offset(); 256 | // consumerRecord.partition(); 257 | // consumerRecord.topic(); 258 | // consumerRecord.timestamp(); 259 | 260 | System.out.println("Partition: " + consumerRecord.partition() + 261 | ", Offset: " + consumerRecord.offset() + 262 | ", Key: " + consumerRecord.key() + 263 | ", Value: " + consumerRecord.value()); 264 | 265 | } 266 | kafkaConsumer.commitSync(); 267 | } 268 | } 269 | } 270 | ``` 271 | -------------------------------------------------------------------------------- /labs/04-kafka-examples/bin/java-producer-consumer-demo.sh: -------------------------------------------------------------------------------- 1 | 2 | #!/bin/bash 3 | # Licensed to the Apache Software Foundation (ASF) under one or more 4 | # contributor license agreements. See the NOTICE file distributed with 5 | # this work for additional information regarding copyright ownership. 6 | # The ASF licenses this file to You under the Apache License, Version 2.0 7 | # (the "License"); you may not use this file except in compliance with 8 | # the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | base_dir=$(dirname $0)/.. 19 | 20 | if [ "x$KAFKA_HEAP_OPTS" = "x" ]; then 21 | export KAFKA_HEAP_OPTS="-Xmx512M" 22 | fi 23 | exec $base_dir/bin/kafka-run-class.sh kafka.examples.KafkaConsumerProducerDemo $@ 24 | -------------------------------------------------------------------------------- /labs/04-kafka-examples/bin/kafka-run-class.sh: -------------------------------------------------------------------------------- 1 | 2 | #!/bin/bash 3 | # Licensed to the Apache Software Foundation (ASF) under one or more 4 | # contributor license agreements. See the NOTICE file distributed with 5 | # this work for additional information regarding copyright ownership. 6 | # The ASF licenses this file to You under the Apache License, Version 2.0 7 | # (the "License"); you may not use this file except in compliance with 8 | # the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | if [ $# -lt 1 ]; 19 | then 20 | echo "USAGE: $0 [-daemon] [-name servicename] [-loggc] classname [opts]" 21 | exit 1 22 | fi 23 | 24 | # CYGINW == 1 if Cygwin is detected, else 0. 25 | if [[ $(uname -a) =~ "CYGWIN" ]]; then 26 | CYGWIN=1 27 | else 28 | CYGWIN=0 29 | fi 30 | 31 | if [ -z "$INCLUDE_TEST_JARS" ]; then 32 | INCLUDE_TEST_JARS=false 33 | fi 34 | 35 | # Exclude jars not necessary for running commands. 36 | regex="(-(test|src|scaladoc|javadoc)\.jar|jar.asc)$" 37 | should_include_file() { 38 | if [ "$INCLUDE_TEST_JARS" = true ]; then 39 | return 0 40 | fi 41 | file=$1 42 | if [ -z "$(echo "$file" | egrep "$regex")" ] ; then 43 | return 0 44 | else 45 | return 1 46 | fi 47 | } 48 | 49 | base_dir=$(dirname $0)/.. 50 | 51 | if [ -z "$SCALA_VERSION" ]; then 52 | SCALA_VERSION=2.11.11 53 | fi 54 | 55 | if [ -z "$SCALA_BINARY_VERSION" ]; then 56 | SCALA_BINARY_VERSION=$(echo $SCALA_VERSION | cut -f 1-2 -d '.') 57 | fi 58 | 59 | # run ./gradlew copyDependantLibs to get all dependant jars in a local dir 60 | shopt -s nullglob 61 | for dir in "$base_dir"/core/build/dependant-libs-${SCALA_VERSION}*; 62 | do 63 | if [ -z "$CLASSPATH" ] ; then 64 | CLASSPATH="$dir/*" 65 | else 66 | CLASSPATH="$CLASSPATH:$dir/*" 67 | fi 68 | done 69 | 70 | for file in "$base_dir"/examples/build/libs/kafka-examples*.jar; 71 | do 72 | if should_include_file "$file"; then 73 | CLASSPATH="$CLASSPATH":"$file" 74 | fi 75 | done 76 | 77 | for file in "$base_dir"/clients/build/libs/kafka-clients*.jar; 78 | do 79 | if should_include_file "$file"; then 80 | CLASSPATH="$CLASSPATH":"$file" 81 | fi 82 | done 83 | 84 | for file in "$base_dir"/streams/build/libs/kafka-streams*.jar; 85 | do 86 | if should_include_file "$file"; then 87 | CLASSPATH="$CLASSPATH":"$file" 88 | fi 89 | done 90 | 91 | for file in "$base_dir"/streams/examples/build/libs/kafka-streams-examples*.jar; 92 | do 93 | if should_include_file "$file"; then 94 | CLASSPATH="$CLASSPATH":"$file" 95 | fi 96 | done 97 | 98 | for file in "$base_dir"/streams/build/dependant-libs-${SCALA_VERSION}/rocksdb*.jar; 99 | do 100 | CLASSPATH="$CLASSPATH":"$file" 101 | done 102 | 103 | for file in "$base_dir"/tools/build/libs/kafka-tools*.jar; 104 | do 105 | if should_include_file "$file"; then 106 | CLASSPATH="$CLASSPATH":"$file" 107 | fi 108 | done 109 | 110 | for dir in "$base_dir"/tools/build/dependant-libs-${SCALA_VERSION}*; 111 | do 112 | CLASSPATH="$CLASSPATH:$dir/*" 113 | done 114 | 115 | for cc_pkg in "api" "transforms" "runtime" "file" "json" "tools" 116 | do 117 | for file in "$base_dir"/connect/${cc_pkg}/build/libs/connect-${cc_pkg}*.jar; 118 | do 119 | if should_include_file "$file"; then 120 | CLASSPATH="$CLASSPATH":"$file" 121 | fi 122 | done 123 | if [ -d "$base_dir/connect/${cc_pkg}/build/dependant-libs" ] ; then 124 | CLASSPATH="$CLASSPATH:$base_dir/connect/${cc_pkg}/build/dependant-libs/*" 125 | fi 126 | done 127 | 128 | # classpath addition for release 129 | for file in "$base_dir"/libs/*; 130 | do 131 | if should_include_file "$file"; then 132 | CLASSPATH="$CLASSPATH":"$file" 133 | fi 134 | done 135 | 136 | for file in "$base_dir"/core/build/libs/kafka_${SCALA_BINARY_VERSION}*.jar; 137 | do 138 | if should_include_file "$file"; then 139 | CLASSPATH="$CLASSPATH":"$file" 140 | fi 141 | done 142 | shopt -u nullglob 143 | 144 | if [ -z "$CLASSPATH" ] ; then 145 | echo "Classpath is empty. Please build the project first e.g. by running './gradlew jar -Pscala_version=$SCALA_VERSION'" 146 | exit 1 147 | fi 148 | 149 | # JMX settings 150 | if [ -z "$KAFKA_JMX_OPTS" ]; then 151 | KAFKA_JMX_OPTS="-Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false " 152 | fi 153 | 154 | # JMX port to use 155 | if [ $JMX_PORT ]; then 156 | KAFKA_JMX_OPTS="$KAFKA_JMX_OPTS -Dcom.sun.management.jmxremote.port=$JMX_PORT " 157 | fi 158 | 159 | # Log directory to use 160 | if [ "x$LOG_DIR" = "x" ]; then 161 | LOG_DIR="$base_dir/logs" 162 | fi 163 | 164 | # Log4j settings 165 | if [ -z "$KAFKA_LOG4J_OPTS" ]; then 166 | # Log to console. This is a tool. 167 | LOG4J_DIR="$base_dir/config/tools-log4j.properties" 168 | # If Cygwin is detected, LOG4J_DIR is converted to Windows format. 169 | (( CYGWIN )) && LOG4J_DIR=$(cygpath --path --mixed "${LOG4J_DIR}") 170 | KAFKA_LOG4J_OPTS="-Dlog4j.configuration=file:${LOG4J_DIR}" 171 | else 172 | # create logs directory 173 | if [ ! -d "$LOG_DIR" ]; then 174 | mkdir -p "$LOG_DIR" 175 | fi 176 | fi 177 | 178 | # If Cygwin is detected, LOG_DIR is converted to Windows format. 179 | (( CYGWIN )) && LOG_DIR=$(cygpath --path --mixed "${LOG_DIR}") 180 | KAFKA_LOG4J_OPTS="-Dkafka.logs.dir=$LOG_DIR $KAFKA_LOG4J_OPTS" 181 | 182 | # Generic jvm settings you want to add 183 | if [ -z "$KAFKA_OPTS" ]; then 184 | KAFKA_OPTS="" 185 | fi 186 | 187 | # Set Debug options if enabled 188 | if [ "x$KAFKA_DEBUG" != "x" ]; then 189 | 190 | # Use default ports 191 | DEFAULT_JAVA_DEBUG_PORT="5005" 192 | 193 | if [ -z "$JAVA_DEBUG_PORT" ]; then 194 | JAVA_DEBUG_PORT="$DEFAULT_JAVA_DEBUG_PORT" 195 | fi 196 | 197 | # Use the defaults if JAVA_DEBUG_OPTS was not set 198 | DEFAULT_JAVA_DEBUG_OPTS="-agentlib:jdwp=transport=dt_socket,server=y,suspend=${DEBUG_SUSPEND_FLAG:-n},address=$JAVA_DEBUG_PORT" 199 | if [ -z "$JAVA_DEBUG_OPTS" ]; then 200 | JAVA_DEBUG_OPTS="$DEFAULT_JAVA_DEBUG_OPTS" 201 | fi 202 | 203 | echo "Enabling Java debug options: $JAVA_DEBUG_OPTS" 204 | KAFKA_OPTS="$JAVA_DEBUG_OPTS $KAFKA_OPTS" 205 | fi 206 | 207 | # Which java to use 208 | if [ -z "$JAVA_HOME" ]; then 209 | JAVA="java" 210 | else 211 | JAVA="$JAVA_HOME/bin/java" 212 | fi 213 | 214 | # Memory options 215 | if [ -z "$KAFKA_HEAP_OPTS" ]; then 216 | KAFKA_HEAP_OPTS="-Xmx256M" 217 | fi 218 | 219 | # JVM performance options 220 | if [ -z "$KAFKA_JVM_PERFORMANCE_OPTS" ]; then 221 | KAFKA_JVM_PERFORMANCE_OPTS="-server -XX:+UseG1GC -XX:MaxGCPauseMillis=20 -XX:InitiatingHeapOccupancyPercent=35 -XX:+ExplicitGCInvokesConcurrent -Djava.awt.headless=true" 222 | fi 223 | 224 | 225 | while [ $# -gt 0 ]; do 226 | COMMAND=$1 227 | case $COMMAND in 228 | -name) 229 | DAEMON_NAME=$2 230 | CONSOLE_OUTPUT_FILE=$LOG_DIR/$DAEMON_NAME.out 231 | shift 2 232 | ;; 233 | -loggc) 234 | if [ -z "$KAFKA_GC_LOG_OPTS" ]; then 235 | GC_LOG_ENABLED="true" 236 | fi 237 | shift 238 | ;; 239 | -daemon) 240 | DAEMON_MODE="true" 241 | shift 242 | ;; 243 | *) 244 | break 245 | ;; 246 | esac 247 | done 248 | 249 | # GC options 250 | GC_FILE_SUFFIX='-gc.log' 251 | GC_LOG_FILE_NAME='' 252 | if [ "x$GC_LOG_ENABLED" = "xtrue" ]; then 253 | GC_LOG_FILE_NAME=$DAEMON_NAME$GC_FILE_SUFFIX 254 | # the first segment of the version number, which is '1' for releases before Java 9 255 | # it then becomes '9', '10', ... 256 | JAVA_MAJOR_VERSION=$($JAVA -version 2>&1 | sed -E -n 's/.* version "([^.-]*).*"/\1/p') 257 | if [[ "$JAVA_MAJOR_VERSION" -ge "9" ]] ; then 258 | KAFKA_GC_LOG_OPTS="-Xlog:gc*:file=$LOG_DIR/$GC_LOG_FILE_NAME:time,tags:filecount=10,filesize=102400" 259 | else 260 | KAFKA_GC_LOG_OPTS="-Xloggc:$LOG_DIR/$GC_LOG_FILE_NAME -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -XX:+PrintGCTimeStamps -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=100M" 261 | fi 262 | fi 263 | 264 | # If Cygwin is detected, classpath is converted to Windows format. 265 | (( CYGWIN )) && CLASSPATH=$(cygpath --path --mixed "${CLASSPATH}") 266 | 267 | # Launch mode 268 | if [ "x$DAEMON_MODE" = "xtrue" ]; then 269 | nohup $JAVA $KAFKA_HEAP_OPTS $KAFKA_JVM_PERFORMANCE_OPTS $KAFKA_GC_LOG_OPTS $KAFKA_JMX_OPTS $KAFKA_LOG4J_OPTS -cp $CLASSPATH $KAFKA_OPTS "$@" > "$CONSOLE_OUTPUT_FILE" 2>&1 < /dev/null & 270 | else 271 | exec $JAVA $KAFKA_HEAP_OPTS $KAFKA_JVM_PERFORMANCE_OPTS $KAFKA_GC_LOG_OPTS $KAFKA_JMX_OPTS $KAFKA_LOG4J_OPTS -cp $CLASSPATH $KAFKA_OPTS "$@" 272 | fi 273 | -------------------------------------------------------------------------------- /labs/04-kafka-examples/kafka-examples.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /labs/04-kafka-examples/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | ziwon.github.io 8 | kafka-examples 9 | 1.0-SNAPSHOT 10 | 11 | 12 | 13 | org.apache.kafka 14 | kafka_2.11 15 | 0.10.0.0 16 | 17 | 18 | 19 | 20 | 21 | 22 | org.apache.maven.plugins 23 | maven-compiler-plugin 24 | 25 | 1.7 26 | 1.7 27 | 28 | 29 | 30 | maven-assembly-plugin 31 | 2.4 32 | 33 | 34 | 35 | kafka.examples.KafkaConsumerProducerDemo 36 | 37 | 38 | 39 | jar-with-dependencies 40 | 41 | 42 | 43 | 44 | make-assembly 45 | package 46 | 47 | single 48 | 49 | 50 | 51 | 52 | 53 | org.skife.maven 54 | really-executable-jar-maven-plugin 55 | 1.1.0 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | kafka-example 64 | 65 | 66 | 67 | 68 | package 69 | 70 | really-executable-jar 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | -------------------------------------------------------------------------------- /labs/04-kafka-examples/src/main/java/kafka/examples/Consumer.java: -------------------------------------------------------------------------------- 1 | package kafka.examples; 2 | 3 | import kafka.utils.ShutdownableThread; 4 | import org.apache.kafka.clients.consumer.ConsumerConfig; 5 | import org.apache.kafka.clients.consumer.ConsumerRecord; 6 | import org.apache.kafka.clients.consumer.ConsumerRecords; 7 | import org.apache.kafka.clients.consumer.KafkaConsumer; 8 | 9 | import java.util.Collections; 10 | import java.util.Properties; 11 | 12 | /** 13 | * Created by randy on 17/08/2017. 14 | */ 15 | public class Consumer extends ShutdownableThread { 16 | private final KafkaConsumer consumer; 17 | private final String topic; 18 | 19 | public Consumer(String topic) { 20 | super("KafkaConsumerExample", false); 21 | Properties props = new Properties(); 22 | props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, KafkaProperties.KAFKA_SERVER_URL + ":" + KafkaProperties.KAFKA_SERVER_PORT); 23 | props.put(ConsumerConfig.GROUP_ID_CONFIG, "DemoConsumer"); 24 | props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "true"); 25 | props.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, "1000"); 26 | props.put(ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG, "30000"); 27 | props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.IntegerDeserializer"); 28 | props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer"); 29 | 30 | consumer = new KafkaConsumer<>(props); 31 | this.topic = topic; 32 | } 33 | 34 | @Override 35 | public void doWork() { 36 | consumer.subscribe(Collections.singletonList(this.topic)); 37 | ConsumerRecords records = consumer.poll(1000); 38 | for (ConsumerRecord record : records) { 39 | System.out.println("Received message: (" + record.key() + ", " + record.value() + ") at offset " + record.offset()); 40 | } 41 | } 42 | 43 | @Override 44 | public String name() { 45 | return null; 46 | } 47 | 48 | @Override 49 | public boolean isInterruptible() { 50 | return false; 51 | } 52 | } 53 | 54 | -------------------------------------------------------------------------------- /labs/04-kafka-examples/src/main/java/kafka/examples/KafkaConsumerProducerDemo.java: -------------------------------------------------------------------------------- 1 | package kafka.examples; 2 | 3 | /** 4 | * Created by randy on 17/08/2017. 5 | */ 6 | public class KafkaConsumerProducerDemo { 7 | public static void main(String[] args) { 8 | boolean isAsync = args.length == 0 || !args[0].trim().equalsIgnoreCase("sync"); 9 | Producer producerThread = new Producer(KafkaProperties.TOPIC, isAsync); 10 | producerThread.start(); 11 | 12 | Consumer consumerThread = new Consumer(KafkaProperties.TOPIC); 13 | consumerThread.start(); 14 | 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /labs/04-kafka-examples/src/main/java/kafka/examples/KafkaProperties.java: -------------------------------------------------------------------------------- 1 | package kafka.examples; 2 | 3 | /** 4 | * Created by randy on 17/08/2017. 5 | */ 6 | public class KafkaProperties { 7 | public static final String TOPIC = "topic1"; 8 | public static final String KAFKA_SERVER_URL = "localhost"; 9 | public static final int KAFKA_SERVER_PORT = 9092; 10 | public static final int KAFKA_PRODUCER_BUFFER_SIZE = 64 * 1024; 11 | public static final int CONNECTION_TIMEOUT = 100000; 12 | public static final String TOPIC2 = "topic2"; 13 | public static final String TOPIC3 = "topic3"; 14 | public static final String CLIENT_ID = "SimpleConsumerDemoClient"; 15 | 16 | private KafkaProperties() {} 17 | } 18 | -------------------------------------------------------------------------------- /labs/04-kafka-examples/src/main/java/kafka/examples/Producer.java: -------------------------------------------------------------------------------- 1 | package kafka.examples; 2 | 3 | import org.apache.kafka.clients.producer.Callback; 4 | import org.apache.kafka.clients.producer.KafkaProducer; 5 | import org.apache.kafka.clients.producer.ProducerRecord; 6 | import org.apache.kafka.clients.producer.RecordMetadata; 7 | 8 | import java.util.Properties; 9 | import java.util.concurrent.ExecutionException; 10 | 11 | /** 12 | * Created by randy on 17/08/2017. 13 | */ 14 | 15 | public class Producer extends Thread { 16 | private final KafkaProducer producer; 17 | private final String topic; 18 | private final Boolean isAsync; 19 | 20 | public Producer(String topic, Boolean isAsync) { 21 | Properties props = new Properties(); 22 | props.put("bootstrap.servers", KafkaProperties.KAFKA_SERVER_URL + ":" + KafkaProperties.KAFKA_SERVER_PORT); 23 | props.put("client.id", "DemoProducer"); 24 | props.put("key.serializer", "org.apache.kafka.common.serialization.IntegerSerializer"); 25 | props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer"); 26 | producer = new KafkaProducer<>(props); 27 | this.topic = topic; 28 | this.isAsync = isAsync; 29 | } 30 | 31 | public void run() { 32 | int messageNo = 1; 33 | while (true) { 34 | String messageStr = "Message_" + messageNo; 35 | long startTime = System.currentTimeMillis(); 36 | if (isAsync) { // Send asynchronously 37 | producer.send(new ProducerRecord<>(topic, messageNo, messageStr), new DemoCallBack(startTime, messageNo, messageStr)); 38 | } else { // Send synchronously 39 | try { 40 | producer.send(new ProducerRecord<>(topic, messageNo, messageStr)).get(); 41 | System.out.println("Sent message: (" + messageNo + ", " + messageStr + ")"); 42 | } catch (InterruptedException | ExecutionException e) { 43 | e.printStackTrace(); 44 | } 45 | } 46 | ++messageNo; 47 | } 48 | } 49 | } 50 | 51 | class DemoCallBack implements Callback { 52 | 53 | private final long startTime; 54 | private final int key; 55 | private final String message; 56 | 57 | public DemoCallBack(long startTime, int key, String message) { 58 | this.startTime = startTime; 59 | this.key = key; 60 | this.message = message; 61 | } 62 | 63 | public void onCompletion(RecordMetadata metadata, Exception exception) { 64 | long elapsedTime = System.currentTimeMillis() - startTime; 65 | if (metadata != null) { 66 | System.out.println("message(" + key + ", " + message + ") sent to partition(" + metadata.partition() + "), " + "offset(" + metadata.offset() + ") in " + elapsedTime + " ms"); 67 | } else { 68 | exception.printStackTrace(); 69 | } 70 | } 71 | } 72 | --------------------------------------------------------------------------------