├── README.md
├── docs
└── zookeeper_tutorial.pdf
└── labs
├── 01-starting-kafka.md
├── 02-create-topic.md
├── 03-producer-consumer.md
└── 04-kafka-examples
├── bin
├── java-producer-consumer-demo.sh
└── kafka-run-class.sh
├── kafka-examples.iml
├── pom.xml
└── src
└── main
└── java
└── kafka
└── examples
├── Consumer.java
├── KafkaConsumerProducerDemo.java
├── KafkaProperties.java
└── Producer.java
/README.md:
--------------------------------------------------------------------------------
1 | # kafka-learning
2 |
3 |
4 |
5 |
6 |
7 | ## Overview
8 | - [Home](https://kafka.apache.org/)
9 | - [QuickStart](https://kafka.apache.org/quickstart)
10 |
11 | ## Tutorials
12 | - [Cloudurable Kafka Tutorial](http://cloudurable.com/blog/kafka-tutorial/index.html)
13 | - [Learning Journal Kafka Tutorial](https://www.youtube.com/playlist?list=PLkz1SCf5iB4enAR00Z46JwY9GGkaS2NON)
14 | - [Udemy Apache Kafka Series - Learning Apache Kafka for Beginners](https://goo.gl/GaCKQN) (All Level)
15 | - [Udemy Apache Kafka Series - Kafka Connect Hands-on Learning](https://goo.gl/wLLLY9) (Intermediate)
16 | - [Udemy Apache Kafka Series - Kafka Streams for Data Processing](https://goo.gl/bro314) (Intermediate)
17 | - [Udemy Apache Kafka Series - Kafka Cluster Setup & Administration](https://goo.gl/1uYAuU) (Expert)
18 | - [Udemy Apache Kafka Series - Confluent Schema Registry & REST Proxy](https://goo.gl/XgWcVz) (Intermediate)
19 |
20 |
21 | ## Books
22 | - [Kafka: The Definitive Guide: Real-Time Data and Stream Processing at Scale](https://www.amazon.com/Kafka-Definitive-Real-Time-Stream-Processing/dp/1491936169)
23 | - [Kafka: The Definitive Guide (Preview Edition)](https://www.confluent.io/resources/kafka-definitive-guide-preview-edition/)
24 |
25 | ## Videos
26 | - [ETL Is Dead, Long Live Streams: real-time streams w/ Apache Kafka](https://www.youtube.com/watch?v=I32hmY4diFY) (2017.02)
27 | - [Airstream: Spark Streaming At Airbnb](https://youtu.be/tJ1uIHQtoNc) (2016.07)
28 | - [Developing Real-Time Data Pipelines with Apache Kafka](https://www.youtube.com/watch?v=GRPLRONVDWY) (2016.03)
29 | - [Building Realtime Data Pipelines with Kafka Connect and Spark Streaming](https://youtu.be/wMLAlJimPzk) (2016.02)
30 | - [Putting Apache Kafka to Use for Event Streams](https://www.youtube.com/watch?v=el-SqcZLZlI) (2015.04)
31 | - [Introducing Exactly Once Semantics in Apache Kafka with Matthias J. Sax](https://www.youtube.com/watch?v=Wo9jlaz8h0k) (2017.06)
32 | - [Exactly Once Streaming from Kafka - Cody Koeninger (Kixer)](https://www.youtube.com/watch?v=fXnNEq1v3VA) (2015.06)
33 |
34 | ## Articles
35 | - [Apache Kafka Goes 1.0](https://www.confluent.io/blog/apache-kafka-goes-1-0/)
36 | - [Exactly-once Semantics are Possible: Here’s How Kafka Does it](https://www.confluent.io/blog/exactly-once-semantics-are-possible-heres-how-apache-kafka-does-it/)
37 | - [Broker Config](https://kafka.apache.org/documentation/#brokerconfigs)
38 | - [How to choose the number of topics/partitions in a Kafka cluster?](https://www.confluent.io/blog/how-to-choose-the-number-of-topicspartitions-in-a-kafka-cluster/) (2015.03)
39 | - [min.insync.replicas gotchas: What does it actually do?](https://logallthethings.com/2016/07/11/min-insync-replicas-what-does-it-actually-do/) (2016.07)
40 | - [How Kafka’s Storage Internals Work](https://thehoard.blog/how-kafkas-storage-internals-work-3a29b02e026) (2016.10)
41 | - Getting Started with Apache Kafka for the Baffled [Part 1](http://www.shayne.me/blog/2015/2015-06-16-everything-about-kafka-part-1/), [Part 2](http://www.shayne.me/blog/2015/2015-06-25-everything-about-kafka-part-2/) (2015.06)
42 |
43 | ## Tools
44 | - [Kafka cluster as Kubernetes StatefulSet](https://github.com/Yolean/kubernetes-kafka)
45 | - [Kafka Manager](https://github.com/yahoo/kafka-manager)
46 | - [Kafka Docker](https://github.com/wurstmeister/kafka-docker)
47 |
48 | ## Links
49 | - [Wiki](https://cwiki.apache.org/confluence/display/KAFKA/Index)
50 | - [FAQ](https://cwiki.apache.org/confluence/display/KAFKA/FAQ)
51 | - [Committers](http://kafka.apache.org/committers)
52 | - [Ecosystem](https://cwiki.apache.org/confluence/display/KAFKA/Ecosystem)
53 | - [Papers & Talks](https://cwiki.apache.org/confluence/display/KAFKA/Kafka+papers+and+presentations)
54 | - [Performance Testing](https://cwiki.apache.org/confluence/display/KAFKA/Performance+testing)
55 | - [Awesome Kafka](https://github.com/infoslack/awesome-kafka)
56 |
--------------------------------------------------------------------------------
/docs/zookeeper_tutorial.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ziwon/kafka-learning/76128b32c88f057d06584528fee04c0965d033c0/docs/zookeeper_tutorial.pdf
--------------------------------------------------------------------------------
/labs/01-starting-kafka.md:
--------------------------------------------------------------------------------
1 | Docker for Mac >= 1.12, Linux, Docker for Windows 10:
2 |
3 | docker run --rm -it \
4 | -p 2181:2181 -p 3030:3030 -p 8081:8081 \
5 | -p 8082:8082 -p 8083:8083 -p 9092:9092 \
6 | -e ADV_HOST=127.0.0.1 \
7 | landoop/fast-data-dev
8 |
9 | Docker toolbox:
10 |
11 | docker run --rm -it \
12 | -p 2181:2181 -p 3030:3030 -p 8081:8081 \
13 | -p 8082:8082 -p 8083:8083 -p 9092:9092 \
14 | -e ADV_HOST=192.168.99.100 \
15 | landoop/fast-data-dev
16 |
17 | Kafka command lines tools:
18 |
19 | docker run --rm -it --net=host landoop/fast-data-dev bash
20 |
21 |
22 | Results:
23 |
24 | docker run --rm -it \
25 | -p 2181:2181 -p 3030:3030 -p 8081:8081 \
26 | -p 8082:8082 -p 8083:8083 -p 9092:9092 \
27 | -e ADV_HOST=127.0.0.1 \
28 | landoop/fast-data-dev
29 | Setting advertised host to 127.0.0.1.
30 | Operating system RAM available is 1755 MiB, which is less than the lowest
31 | recommended of 5120 MiB. Your system performance may be seriously impacted.
32 | Starting services.
33 | This is landoop’s fast-data-dev. Kafka 0.10.2.1, Confluent OSS 3.2.1.
34 | You may visit http://127.0.0.1:3030 in about a minute.
35 | 2017-05-20 02:23:39,015 CRIT Supervisor running as root (no user in config file)
36 | 2017-05-20 02:23:39,015 WARN No file matches via include "/etc/supervisord.d/*.conf"
37 | 2017-05-20 02:23:39,020 INFO supervisord started with pid 6
38 | 2017-05-20 02:23:40,028 INFO spawned: 'sample-data' with pid 94
39 | 2017-05-20 02:23:40,032 INFO spawned: 'zookeeper' with pid 95
40 | 2017-05-20 02:23:40,041 INFO spawned: 'caddy' with pid 97
41 | 2017-05-20 02:23:40,044 INFO spawned: 'broker' with pid 99
42 | 2017-05-20 02:23:40,047 INFO spawned: 'smoke-tests' with pid 103
43 | 2017-05-20 02:23:40,050 INFO spawned: 'connect-distributed' with pid 106
44 | 2017-05-20 02:23:40,053 INFO spawned: 'logs-to-kafka' with pid 107
45 | 2017-05-20 02:23:40,057 INFO spawned: 'schema-registry' with pid 109
46 | 2017-05-20 02:23:40,060 INFO spawned: 'rest-proxy' with pid 110
47 | 2017-05-20 02:23:41,112 INFO success: sample-data entered RUNNING state, process has stayed up for > than 1 seconds (startsecs)
48 | 2017-05-20 02:23:41,112 INFO success: zookeeper entered RUNNING state, process has stayed up for > than 1 seconds (startsecs)
49 | 2017-05-20 02:23:41,113 INFO success: caddy entered RUNNING state, process has stayed up for > than 1 seconds (startsecs)
50 | 2017-05-20 02:23:41,113 INFO success: broker entered RUNNING state, process has stayed up for > than 1 seconds (startsecs)
51 | 2017-05-20 02:23:41,113 INFO success: smoke-tests entered RUNNING state, process has stayed up for > than 1 seconds (startsecs)
52 | 2017-05-20 02:23:41,113 INFO success: connect-distributed entered RUNNING state, process has stayed up for > than 1 seconds (startsecs)
53 | 2017-05-20 02:23:41,113 INFO success: logs-to-kafka entered RUNNING state, process has stayed up for > than 1 seconds (startsecs)
54 | 2017-05-20 02:23:41,114 INFO success: schema-registry entered RUNNING state, process has stayed up for > than 1 seconds (startsecs)
55 | 2017-05-20 02:23:41,114 INFO success: rest-proxy entered RUNNING state, process has stayed up for > than 1 seconds (startsecs)
56 | 2017-05-20 02:23:42,757 INFO exited: schema-registry (exit status 1; not expected)
57 | 2017-05-20 02:23:42,765 INFO spawned: 'schema-registry' with pid 291
58 | 2017-05-20 02:23:43,055 INFO exited: rest-proxy (exit status 1; not expected)
59 | 2017-05-20 02:23:43,537 INFO spawned: 'rest-proxy' with pid 330
60 | 2017-05-20 02:23:44,147 INFO success: schema-registry entered RUNNING state, process has stayed up for > than 1 seconds (startsecs)
61 | 2017-05-20 02:23:44,562 INFO success: rest-proxy entered RUNNING state, process has stayed up for > than 1 seconds (startsecs)
62 |
--------------------------------------------------------------------------------
/labs/02-create-topic.md:
--------------------------------------------------------------------------------
1 | To get Kafka command lines tools:
2 |
3 | docker run --rm -it --net=host landoop/fast-data-dev bash
4 |
5 | Query all the comands in this Kafka box:
6 |
7 | root@fast-data-dev / $ kafka-
8 | kafka-acls kafka-replica-verification
9 | kafka-avro-console-consumer kafka-rest-run-class
10 | kafka-avro-console-producer kafka-rest-start
11 | kafka-configs kafka-rest-stop
12 | kafka-console-consumer kafka-rest-stop-service
13 | kafka-console-producer kafka-run-class
14 | kafka-consumer-groups kafka-server-start
15 | kafka-consumer-offset-checker kafka-server-stop
16 | kafka-consumer-perf-test kafka-simple-consumer-shell
17 | kafka-mirror-maker kafka-streams-application-reset
18 | kafka-preferred-replica-election kafka-topics
19 | kafka-producer-perf-test kafka-verifiable-consumer
20 | kafka-reassign-partitions kafka-verifiable-producer
21 | kafka-replay-log-producer
22 |
23 | Options for `kafta-topics`:
24 |
25 | root@fast-data-dev / $ kafka-topics
26 | Create, delete, describe, or change a topic.
27 | Option Description
28 | ------ -----------
29 | --alter Alter the number of partitions,
30 | replica assignment, and/or
31 | configuration for the topic.
32 | --config A topic configuration override for the
33 | topic being created or altered.The
34 | following is a list of valid
35 | configurations:
36 | cleanup.policy
37 | compression.type
38 | delete.retention.ms
39 | file.delete.delay.ms
40 | flush.messages
41 | flush.ms
42 | follower.replication.throttled.
43 | replicas
44 | index.interval.bytes
45 | leader.replication.throttled.replicas
46 | max.message.bytes
47 | message.format.version
48 | message.timestamp.difference.max.ms
49 | message.timestamp.type
50 | min.cleanable.dirty.ratio
51 | min.compaction.lag.ms
52 | min.insync.replicas
53 | preallocate
54 | retention.bytes
55 | retention.ms
56 | segment.bytes
57 | segment.index.bytes
58 | segment.jitter.ms
59 | segment.ms
60 | unclean.leader.election.enable
61 | See the Kafka documentation for full
62 | details on the topic configs.
63 | --create Create a new topic.
64 | --delete Delete a topic
65 | --delete-config A topic configuration override to be
66 | removed for an existing topic (see
67 | the list of configurations under the
68 | --config option).
69 | --describe List details for the given topics.
70 | --disable-rack-aware Disable rack aware replica assignment
71 | --force Suppress console prompts
72 | --help Print usage information.
73 | --if-exists if set when altering or deleting
74 | topics, the action will only execute
75 | if the topic exists
76 | --if-not-exists if set when creating topics, the
77 | action will only execute if the
78 | topic does not already exist
79 | --list List all available topics.
80 | --partitions The number of partitions for the topic
81 | being created or altered (WARNING:
82 | If partitions are increased for a
83 | topic that has a key, the partition
84 | logic or ordering of the messages
85 | will be affected
86 | --replica-assignment
91 | --replication-factor partition in the topic being created.
93 | --topic The topic to be create, alter or
94 | describe. Can also accept a regular
95 | expression except for --create option
96 | --topics-with-overrides if set when describing topics, only
97 | show topics that have overridden
98 | configs
99 | --unavailable-partitions if set when describing topics, only
100 | show partitions whose leader is not
101 | available
102 | --under-replicated-partitions if set when describing topics, only
103 | show under replicated partitions
104 | --zookeeper REQUIRED: The connection string for
105 | the zookeeper connection in the form
106 | host:port. Multiple URLS can be
107 | given to allow fail-over.
108 |
109 |
110 | Create a topic:
111 |
112 | root@fast-data-dev / $ kafka-topics --zookeeper 127.0.0.1:2181 --create --topic first_topic --partition 3 --replication-factor 1
113 | WARNING: Due to limitations in metric names, topics with a period ('.') or underscore ('_') could collide. To avoid issues it is best to use either, but not both.
114 | Created topic "first_topic".
115 |
116 | Describe a topic:
117 |
118 | root@fast-data-dev / $ kafka-topics --zookeeper 127.0.0.1:2181 --describe --topic first_topic
119 | Topic:first_topic PartitionCount:3 ReplicationFactor:1 Configs:
120 | Topic: first_topic Partition: 0 Leader: 0 Replicas: 0 Isr: 0
121 | Topic: first_topic Partition: 1 Leader: 0 Replicas: 0 Isr: 0
122 | Topic: first_topic Partition: 2 Leader: 0 Replicas: 0 Isr: 0
123 |
124 | Delete a topic:
125 |
126 | root@fast-data-dev / kafka-topics --zookeeper 127.0.0.1:2181 --topic second_topic --delete
127 | Topic second_topic is marked for deletion.
128 | Note: This will have no impact if delete.topic.enable is not set to true.
129 |
--------------------------------------------------------------------------------
/labs/03-producer-consumer.md:
--------------------------------------------------------------------------------
1 | Options for `kafka-console-producer`:
2 |
3 | Read data from standard input and publish it to Kafka.
4 | Option Description
5 | ------ -----------
6 | --batch-size Number of messages to send in a single
7 | batch if they are not being sent
8 | synchronously. (default: 200)
9 | --broker-list REQUIRED: The broker list string in
10 | the form HOST1:PORT1,HOST2:PORT2.
11 | --compression-codec [String: The compression codec: either 'none',
12 | compression-codec] 'gzip', 'snappy', or 'lz4'.If
13 | specified without value, then it
14 | defaults to 'gzip'
15 | --key-serializer implementation to use for
17 | serializing keys. (default: kafka.
18 | serializer.DefaultEncoder)
19 | --line-reader The class name of the class to use for
20 | reading lines from standard in. By
21 | default each line is read as a
22 | separate message. (default: kafka.
23 | tools.
24 | ConsoleProducer$LineMessageReader)
25 | --max-block-ms block for during a send request
27 | (default: 60000)
28 | --max-memory-bytes to buffer records waiting to be sent
30 | to the server. (default: 33554432)
31 | --max-partition-memory-bytes partition. When records are received
33 | which are smaller than this size the
34 | producer will attempt to
35 | optimistically group them together
36 | until this size is reached.
37 | (default: 16384)
38 | --message-send-max-retries Brokers can fail receiving the message
39 | for multiple reasons, and being
40 | unavailable transiently is just one
41 | of them. This property specifies the
42 | number of retires before the
43 | producer give up and drop this
44 | message. (default: 3)
45 | --metadata-expiry-ms after which we force a refresh of
47 | metadata even if we haven't seen any
48 | leadership changes. (default: 300000)
49 | --old-producer Use the old producer implementation.
50 | --producer-property properties in the form key=value to
52 | the producer.
53 | --producer.config Producer config properties file. Note
54 | that [producer-property] takes
55 | precedence over this config.
56 | --property A mechanism to pass user-defined
57 | properties in the form key=value to
58 | the message reader. This allows
59 | custom configuration for a user-
60 | defined message reader.
61 | --queue-enqueuetimeout-ms 2147483647)
63 | --queue-size If set and the producer is running in
64 | asynchronous mode, this gives the
65 | maximum amount of messages will
66 | queue awaiting sufficient batch
67 | size. (default: 10000)
68 | --request-required-acks requests (default: 1)
70 | --request-timeout-ms requests. Value must be non-negative
72 | and non-zero (default: 1500)
73 | --retry-backoff-ms Before each retry, the producer
74 | refreshes the metadata of relevant
75 | topics. Since leader election takes
76 | a bit of time, this property
77 | specifies the amount of time that
78 | the producer waits before refreshing
79 | the metadata. (default: 100)
80 | --socket-buffer-size The size of the tcp RECV size.
81 | (default: 102400)
82 | --sync If set message send requests to the
83 | brokers are synchronously, one at a
84 | time as they arrive.
85 | --timeout If set and the producer is running in
86 | asynchronous mode, this gives the
87 | maximum amount of time a message
88 | will queue awaiting sufficient batch
89 | size. The value is given in ms.
90 | (default: 1000)
91 | --topic REQUIRED: The topic id to produce
92 | messages to.
93 | --value-serializer implementation to use for
95 | serializing values. (default: kafka.
96 | serializer.DefaultEncoder)
97 |
98 |
99 | Publish data to a topic:
100 |
101 | root@fast-data-dev / $ echo "your message" | kafka-console-producer --broker-list 127.0.0.1:9092 --topic first_topic
102 |
103 | Options for `kafka-console-consumer`:
104 |
105 | The console consumer is a tool that reads data from Kafka and outputs it to standard output.
106 | Option Description
107 | ------ -----------
108 | --blacklist Blacklist of topics to exclude from
109 | consumption.
110 | --bootstrap-server used): The server to connect to.
112 | --consumer-property properties in the form key=value to
114 | the consumer.
115 | --consumer.config Consumer config properties file. Note
116 | that [consumer-property] takes
117 | precedence over this config.
118 | --csv-reporter-enabled If set, the CSV metrics reporter will
119 | be enabled
120 | --delete-consumer-offsets If specified, the consumer path in
121 | zookeeper is deleted when starting up
122 | --enable-systest-events Log lifecycle events of the consumer
123 | in addition to logging consumed
124 | messages. (This is specific for
125 | system tests.)
126 | --formatter The name of a class to use for
127 | formatting kafka messages for
128 | display. (default: kafka.tools.
129 | DefaultMessageFormatter)
130 | --from-beginning If the consumer does not already have
131 | an established offset to consume
132 | from, start with the earliest
133 | message present in the log rather
134 | than the latest message.
135 | --key-deserializer
137 | --max-messages The maximum number of messages to
138 | consume before exiting. If not set,
139 | consumption is continual.
140 | --metrics-dir this parameter isset, the csv
142 | metrics will be outputed here
143 | --new-consumer Use the new consumer implementation.
144 | This is the default.
145 | --offset The offset id to consume from (a non-
146 | negative number), or 'earliest'
147 | which means from beginning, or
148 | 'latest' which means from end
149 | (default: latest)
150 | --partition The partition to consume from.
151 | --property The properties to initialize the
152 | message formatter.
153 | --skip-message-on-error If there is an error when processing a
154 | message, skip it instead of halt.
155 | --timeout-ms If specified, exit if no message is
156 | available for consumption for the
157 | specified interval.
158 | --topic The topic id to consume on.
159 | --value-deserializer
161 | --whitelist Whitelist of topics to include for
162 | consumption.
163 | --zookeeper REQUIRED (only when using old
164 | consumer): The connection string for
165 | the zookeeper connection in the form
166 | host:port. Multiple URLS can be
167 | given to allow fail-over.
168 |
169 |
170 | Consuming data from a topic:
171 |
172 | root@fast-data-dev / $ kafka-console-consumer --bootstrap-server 127.0.0.1:9092 --topic first_topic --from-beginning
173 |
174 |
175 |
176 | Consuming data from a topic using `consumer-property`:
177 |
178 | root@fast-data-dev / $ kafka-console-consumer --bootstrap-server 127.0.0.1:9092 --topic first_topic --consumer-property group.id=testgroup --from-beginning
179 |
180 |
181 | Java Producer Demo:
182 |
183 |
184 | ```java
185 | import org.apache.kafka.clients.producer.Producer;
186 | import org.apache.kafka.clients.producer.ProducerRecord;
187 | import org.apache.kafka.common.serialization.IntegerDeserializer;
188 | import org.apache.kafka.common.serialization.StringSerializer;
189 |
190 | import java.util.Properties;
191 |
192 | public class KafkaProducerDemo {
193 | public static void main(String[] args) {
194 | Properties properties = new Properties();
195 |
196 | // kafka bootstrap server
197 | properties.setProperty("bootstrap.servers", "127.0.0.1:9092");
198 | properties.setProperty("key.serializer", StringSerializer.class.getName());
199 | properties.setProperty("value.serializer", StringSerializer.class.getName());
200 | // producer acks
201 | properties.setProperty("acks", "1");
202 | properties.setProperty("retries", "3");
203 | properties.setProperty("linger.ms", "1");
204 |
205 | Producer producer = new org.apache.kafka.clients.producer.KafkaProducer(properties);
206 |
207 |
208 | for (int key=0; key < 10; key++){
209 | ProducerRecord producerRecord =
210 | new ProducerRecord("second_topic", Integer.toString(key), "message that has key: " + Integer.toString(key));
211 | producer.send(producerRecord);
212 | }
213 |
214 |
215 |
216 | producer.close();
217 | }
218 | }
219 | ```
220 |
221 |
222 | Java Consumer Demo:
223 |
224 | ```java
225 | import org.apache.kafka.clients.consumer.ConsumerRecord;
226 | import org.apache.kafka.clients.consumer.ConsumerRecords;
227 | import org.apache.kafka.clients.consumer.KafkaConsumer;
228 | import org.apache.kafka.common.serialization.StringDeserializer;
229 |
230 | import java.util.Arrays;
231 | import java.util.Properties;
232 |
233 | public class KafkaConsumerDemo {
234 | public static void main(String[] args) {
235 | Properties properties = new Properties();
236 |
237 | // kafka bootstrap server
238 | properties.setProperty("bootstrap.servers", "127.0.0.1:9092");
239 | properties.setProperty("key.deserializer", StringDeserializer.class.getName());
240 | properties.setProperty("value.deserializer", StringDeserializer.class.getName());
241 |
242 | properties.setProperty("group.id", "test");
243 | properties.setProperty("enable.auto.commit", "false");
244 | // properties.setProperty("auto.commit.interval.ms", "1000");
245 | properties.setProperty("auto.offset.reset", "earliest");
246 |
247 | KafkaConsumer kafkaConsumer = new KafkaConsumer(properties);
248 | kafkaConsumer.subscribe(Arrays.asList("second_topic"));
249 |
250 | while(true) {
251 | ConsumerRecords consumerRecords = kafkaConsumer.poll(100);
252 | for (ConsumerRecord consumerRecord : consumerRecords) {
253 | // consumerRecord.value();
254 | // consumerRecord.key();
255 | // consumerRecord.offset();
256 | // consumerRecord.partition();
257 | // consumerRecord.topic();
258 | // consumerRecord.timestamp();
259 |
260 | System.out.println("Partition: " + consumerRecord.partition() +
261 | ", Offset: " + consumerRecord.offset() +
262 | ", Key: " + consumerRecord.key() +
263 | ", Value: " + consumerRecord.value());
264 |
265 | }
266 | kafkaConsumer.commitSync();
267 | }
268 | }
269 | }
270 | ```
271 |
--------------------------------------------------------------------------------
/labs/04-kafka-examples/bin/java-producer-consumer-demo.sh:
--------------------------------------------------------------------------------
1 |
2 | #!/bin/bash
3 | # Licensed to the Apache Software Foundation (ASF) under one or more
4 | # contributor license agreements. See the NOTICE file distributed with
5 | # this work for additional information regarding copyright ownership.
6 | # The ASF licenses this file to You under the Apache License, Version 2.0
7 | # (the "License"); you may not use this file except in compliance with
8 | # the License. You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 | base_dir=$(dirname $0)/..
19 |
20 | if [ "x$KAFKA_HEAP_OPTS" = "x" ]; then
21 | export KAFKA_HEAP_OPTS="-Xmx512M"
22 | fi
23 | exec $base_dir/bin/kafka-run-class.sh kafka.examples.KafkaConsumerProducerDemo $@
24 |
--------------------------------------------------------------------------------
/labs/04-kafka-examples/bin/kafka-run-class.sh:
--------------------------------------------------------------------------------
1 |
2 | #!/bin/bash
3 | # Licensed to the Apache Software Foundation (ASF) under one or more
4 | # contributor license agreements. See the NOTICE file distributed with
5 | # this work for additional information regarding copyright ownership.
6 | # The ASF licenses this file to You under the Apache License, Version 2.0
7 | # (the "License"); you may not use this file except in compliance with
8 | # the License. You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 | if [ $# -lt 1 ];
19 | then
20 | echo "USAGE: $0 [-daemon] [-name servicename] [-loggc] classname [opts]"
21 | exit 1
22 | fi
23 |
24 | # CYGINW == 1 if Cygwin is detected, else 0.
25 | if [[ $(uname -a) =~ "CYGWIN" ]]; then
26 | CYGWIN=1
27 | else
28 | CYGWIN=0
29 | fi
30 |
31 | if [ -z "$INCLUDE_TEST_JARS" ]; then
32 | INCLUDE_TEST_JARS=false
33 | fi
34 |
35 | # Exclude jars not necessary for running commands.
36 | regex="(-(test|src|scaladoc|javadoc)\.jar|jar.asc)$"
37 | should_include_file() {
38 | if [ "$INCLUDE_TEST_JARS" = true ]; then
39 | return 0
40 | fi
41 | file=$1
42 | if [ -z "$(echo "$file" | egrep "$regex")" ] ; then
43 | return 0
44 | else
45 | return 1
46 | fi
47 | }
48 |
49 | base_dir=$(dirname $0)/..
50 |
51 | if [ -z "$SCALA_VERSION" ]; then
52 | SCALA_VERSION=2.11.11
53 | fi
54 |
55 | if [ -z "$SCALA_BINARY_VERSION" ]; then
56 | SCALA_BINARY_VERSION=$(echo $SCALA_VERSION | cut -f 1-2 -d '.')
57 | fi
58 |
59 | # run ./gradlew copyDependantLibs to get all dependant jars in a local dir
60 | shopt -s nullglob
61 | for dir in "$base_dir"/core/build/dependant-libs-${SCALA_VERSION}*;
62 | do
63 | if [ -z "$CLASSPATH" ] ; then
64 | CLASSPATH="$dir/*"
65 | else
66 | CLASSPATH="$CLASSPATH:$dir/*"
67 | fi
68 | done
69 |
70 | for file in "$base_dir"/examples/build/libs/kafka-examples*.jar;
71 | do
72 | if should_include_file "$file"; then
73 | CLASSPATH="$CLASSPATH":"$file"
74 | fi
75 | done
76 |
77 | for file in "$base_dir"/clients/build/libs/kafka-clients*.jar;
78 | do
79 | if should_include_file "$file"; then
80 | CLASSPATH="$CLASSPATH":"$file"
81 | fi
82 | done
83 |
84 | for file in "$base_dir"/streams/build/libs/kafka-streams*.jar;
85 | do
86 | if should_include_file "$file"; then
87 | CLASSPATH="$CLASSPATH":"$file"
88 | fi
89 | done
90 |
91 | for file in "$base_dir"/streams/examples/build/libs/kafka-streams-examples*.jar;
92 | do
93 | if should_include_file "$file"; then
94 | CLASSPATH="$CLASSPATH":"$file"
95 | fi
96 | done
97 |
98 | for file in "$base_dir"/streams/build/dependant-libs-${SCALA_VERSION}/rocksdb*.jar;
99 | do
100 | CLASSPATH="$CLASSPATH":"$file"
101 | done
102 |
103 | for file in "$base_dir"/tools/build/libs/kafka-tools*.jar;
104 | do
105 | if should_include_file "$file"; then
106 | CLASSPATH="$CLASSPATH":"$file"
107 | fi
108 | done
109 |
110 | for dir in "$base_dir"/tools/build/dependant-libs-${SCALA_VERSION}*;
111 | do
112 | CLASSPATH="$CLASSPATH:$dir/*"
113 | done
114 |
115 | for cc_pkg in "api" "transforms" "runtime" "file" "json" "tools"
116 | do
117 | for file in "$base_dir"/connect/${cc_pkg}/build/libs/connect-${cc_pkg}*.jar;
118 | do
119 | if should_include_file "$file"; then
120 | CLASSPATH="$CLASSPATH":"$file"
121 | fi
122 | done
123 | if [ -d "$base_dir/connect/${cc_pkg}/build/dependant-libs" ] ; then
124 | CLASSPATH="$CLASSPATH:$base_dir/connect/${cc_pkg}/build/dependant-libs/*"
125 | fi
126 | done
127 |
128 | # classpath addition for release
129 | for file in "$base_dir"/libs/*;
130 | do
131 | if should_include_file "$file"; then
132 | CLASSPATH="$CLASSPATH":"$file"
133 | fi
134 | done
135 |
136 | for file in "$base_dir"/core/build/libs/kafka_${SCALA_BINARY_VERSION}*.jar;
137 | do
138 | if should_include_file "$file"; then
139 | CLASSPATH="$CLASSPATH":"$file"
140 | fi
141 | done
142 | shopt -u nullglob
143 |
144 | if [ -z "$CLASSPATH" ] ; then
145 | echo "Classpath is empty. Please build the project first e.g. by running './gradlew jar -Pscala_version=$SCALA_VERSION'"
146 | exit 1
147 | fi
148 |
149 | # JMX settings
150 | if [ -z "$KAFKA_JMX_OPTS" ]; then
151 | KAFKA_JMX_OPTS="-Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false "
152 | fi
153 |
154 | # JMX port to use
155 | if [ $JMX_PORT ]; then
156 | KAFKA_JMX_OPTS="$KAFKA_JMX_OPTS -Dcom.sun.management.jmxremote.port=$JMX_PORT "
157 | fi
158 |
159 | # Log directory to use
160 | if [ "x$LOG_DIR" = "x" ]; then
161 | LOG_DIR="$base_dir/logs"
162 | fi
163 |
164 | # Log4j settings
165 | if [ -z "$KAFKA_LOG4J_OPTS" ]; then
166 | # Log to console. This is a tool.
167 | LOG4J_DIR="$base_dir/config/tools-log4j.properties"
168 | # If Cygwin is detected, LOG4J_DIR is converted to Windows format.
169 | (( CYGWIN )) && LOG4J_DIR=$(cygpath --path --mixed "${LOG4J_DIR}")
170 | KAFKA_LOG4J_OPTS="-Dlog4j.configuration=file:${LOG4J_DIR}"
171 | else
172 | # create logs directory
173 | if [ ! -d "$LOG_DIR" ]; then
174 | mkdir -p "$LOG_DIR"
175 | fi
176 | fi
177 |
178 | # If Cygwin is detected, LOG_DIR is converted to Windows format.
179 | (( CYGWIN )) && LOG_DIR=$(cygpath --path --mixed "${LOG_DIR}")
180 | KAFKA_LOG4J_OPTS="-Dkafka.logs.dir=$LOG_DIR $KAFKA_LOG4J_OPTS"
181 |
182 | # Generic jvm settings you want to add
183 | if [ -z "$KAFKA_OPTS" ]; then
184 | KAFKA_OPTS=""
185 | fi
186 |
187 | # Set Debug options if enabled
188 | if [ "x$KAFKA_DEBUG" != "x" ]; then
189 |
190 | # Use default ports
191 | DEFAULT_JAVA_DEBUG_PORT="5005"
192 |
193 | if [ -z "$JAVA_DEBUG_PORT" ]; then
194 | JAVA_DEBUG_PORT="$DEFAULT_JAVA_DEBUG_PORT"
195 | fi
196 |
197 | # Use the defaults if JAVA_DEBUG_OPTS was not set
198 | DEFAULT_JAVA_DEBUG_OPTS="-agentlib:jdwp=transport=dt_socket,server=y,suspend=${DEBUG_SUSPEND_FLAG:-n},address=$JAVA_DEBUG_PORT"
199 | if [ -z "$JAVA_DEBUG_OPTS" ]; then
200 | JAVA_DEBUG_OPTS="$DEFAULT_JAVA_DEBUG_OPTS"
201 | fi
202 |
203 | echo "Enabling Java debug options: $JAVA_DEBUG_OPTS"
204 | KAFKA_OPTS="$JAVA_DEBUG_OPTS $KAFKA_OPTS"
205 | fi
206 |
207 | # Which java to use
208 | if [ -z "$JAVA_HOME" ]; then
209 | JAVA="java"
210 | else
211 | JAVA="$JAVA_HOME/bin/java"
212 | fi
213 |
214 | # Memory options
215 | if [ -z "$KAFKA_HEAP_OPTS" ]; then
216 | KAFKA_HEAP_OPTS="-Xmx256M"
217 | fi
218 |
219 | # JVM performance options
220 | if [ -z "$KAFKA_JVM_PERFORMANCE_OPTS" ]; then
221 | KAFKA_JVM_PERFORMANCE_OPTS="-server -XX:+UseG1GC -XX:MaxGCPauseMillis=20 -XX:InitiatingHeapOccupancyPercent=35 -XX:+ExplicitGCInvokesConcurrent -Djava.awt.headless=true"
222 | fi
223 |
224 |
225 | while [ $# -gt 0 ]; do
226 | COMMAND=$1
227 | case $COMMAND in
228 | -name)
229 | DAEMON_NAME=$2
230 | CONSOLE_OUTPUT_FILE=$LOG_DIR/$DAEMON_NAME.out
231 | shift 2
232 | ;;
233 | -loggc)
234 | if [ -z "$KAFKA_GC_LOG_OPTS" ]; then
235 | GC_LOG_ENABLED="true"
236 | fi
237 | shift
238 | ;;
239 | -daemon)
240 | DAEMON_MODE="true"
241 | shift
242 | ;;
243 | *)
244 | break
245 | ;;
246 | esac
247 | done
248 |
249 | # GC options
250 | GC_FILE_SUFFIX='-gc.log'
251 | GC_LOG_FILE_NAME=''
252 | if [ "x$GC_LOG_ENABLED" = "xtrue" ]; then
253 | GC_LOG_FILE_NAME=$DAEMON_NAME$GC_FILE_SUFFIX
254 | # the first segment of the version number, which is '1' for releases before Java 9
255 | # it then becomes '9', '10', ...
256 | JAVA_MAJOR_VERSION=$($JAVA -version 2>&1 | sed -E -n 's/.* version "([^.-]*).*"/\1/p')
257 | if [[ "$JAVA_MAJOR_VERSION" -ge "9" ]] ; then
258 | KAFKA_GC_LOG_OPTS="-Xlog:gc*:file=$LOG_DIR/$GC_LOG_FILE_NAME:time,tags:filecount=10,filesize=102400"
259 | else
260 | KAFKA_GC_LOG_OPTS="-Xloggc:$LOG_DIR/$GC_LOG_FILE_NAME -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -XX:+PrintGCTimeStamps -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=100M"
261 | fi
262 | fi
263 |
264 | # If Cygwin is detected, classpath is converted to Windows format.
265 | (( CYGWIN )) && CLASSPATH=$(cygpath --path --mixed "${CLASSPATH}")
266 |
267 | # Launch mode
268 | if [ "x$DAEMON_MODE" = "xtrue" ]; then
269 | nohup $JAVA $KAFKA_HEAP_OPTS $KAFKA_JVM_PERFORMANCE_OPTS $KAFKA_GC_LOG_OPTS $KAFKA_JMX_OPTS $KAFKA_LOG4J_OPTS -cp $CLASSPATH $KAFKA_OPTS "$@" > "$CONSOLE_OUTPUT_FILE" 2>&1 < /dev/null &
270 | else
271 | exec $JAVA $KAFKA_HEAP_OPTS $KAFKA_JVM_PERFORMANCE_OPTS $KAFKA_GC_LOG_OPTS $KAFKA_JMX_OPTS $KAFKA_LOG4J_OPTS -cp $CLASSPATH $KAFKA_OPTS "$@"
272 | fi
273 |
--------------------------------------------------------------------------------
/labs/04-kafka-examples/kafka-examples.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
--------------------------------------------------------------------------------
/labs/04-kafka-examples/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | ziwon.github.io
8 | kafka-examples
9 | 1.0-SNAPSHOT
10 |
11 |
12 |
13 | org.apache.kafka
14 | kafka_2.11
15 | 0.10.0.0
16 |
17 |
18 |
19 |
20 |
21 |
22 | org.apache.maven.plugins
23 | maven-compiler-plugin
24 |
25 | 1.7
26 | 1.7
27 |
28 |
29 |
30 | maven-assembly-plugin
31 | 2.4
32 |
33 |
34 |
35 | kafka.examples.KafkaConsumerProducerDemo
36 |
37 |
38 |
39 | jar-with-dependencies
40 |
41 |
42 |
43 |
44 | make-assembly
45 | package
46 |
47 | single
48 |
49 |
50 |
51 |
52 |
53 | org.skife.maven
54 | really-executable-jar-maven-plugin
55 | 1.1.0
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 | kafka-example
64 |
65 |
66 |
67 |
68 | package
69 |
70 | really-executable-jar
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
--------------------------------------------------------------------------------
/labs/04-kafka-examples/src/main/java/kafka/examples/Consumer.java:
--------------------------------------------------------------------------------
1 | package kafka.examples;
2 |
3 | import kafka.utils.ShutdownableThread;
4 | import org.apache.kafka.clients.consumer.ConsumerConfig;
5 | import org.apache.kafka.clients.consumer.ConsumerRecord;
6 | import org.apache.kafka.clients.consumer.ConsumerRecords;
7 | import org.apache.kafka.clients.consumer.KafkaConsumer;
8 |
9 | import java.util.Collections;
10 | import java.util.Properties;
11 |
12 | /**
13 | * Created by randy on 17/08/2017.
14 | */
15 | public class Consumer extends ShutdownableThread {
16 | private final KafkaConsumer consumer;
17 | private final String topic;
18 |
19 | public Consumer(String topic) {
20 | super("KafkaConsumerExample", false);
21 | Properties props = new Properties();
22 | props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, KafkaProperties.KAFKA_SERVER_URL + ":" + KafkaProperties.KAFKA_SERVER_PORT);
23 | props.put(ConsumerConfig.GROUP_ID_CONFIG, "DemoConsumer");
24 | props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "true");
25 | props.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, "1000");
26 | props.put(ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG, "30000");
27 | props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.IntegerDeserializer");
28 | props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
29 |
30 | consumer = new KafkaConsumer<>(props);
31 | this.topic = topic;
32 | }
33 |
34 | @Override
35 | public void doWork() {
36 | consumer.subscribe(Collections.singletonList(this.topic));
37 | ConsumerRecords records = consumer.poll(1000);
38 | for (ConsumerRecord record : records) {
39 | System.out.println("Received message: (" + record.key() + ", " + record.value() + ") at offset " + record.offset());
40 | }
41 | }
42 |
43 | @Override
44 | public String name() {
45 | return null;
46 | }
47 |
48 | @Override
49 | public boolean isInterruptible() {
50 | return false;
51 | }
52 | }
53 |
54 |
--------------------------------------------------------------------------------
/labs/04-kafka-examples/src/main/java/kafka/examples/KafkaConsumerProducerDemo.java:
--------------------------------------------------------------------------------
1 | package kafka.examples;
2 |
3 | /**
4 | * Created by randy on 17/08/2017.
5 | */
6 | public class KafkaConsumerProducerDemo {
7 | public static void main(String[] args) {
8 | boolean isAsync = args.length == 0 || !args[0].trim().equalsIgnoreCase("sync");
9 | Producer producerThread = new Producer(KafkaProperties.TOPIC, isAsync);
10 | producerThread.start();
11 |
12 | Consumer consumerThread = new Consumer(KafkaProperties.TOPIC);
13 | consumerThread.start();
14 |
15 | }
16 | }
17 |
--------------------------------------------------------------------------------
/labs/04-kafka-examples/src/main/java/kafka/examples/KafkaProperties.java:
--------------------------------------------------------------------------------
1 | package kafka.examples;
2 |
3 | /**
4 | * Created by randy on 17/08/2017.
5 | */
6 | public class KafkaProperties {
7 | public static final String TOPIC = "topic1";
8 | public static final String KAFKA_SERVER_URL = "localhost";
9 | public static final int KAFKA_SERVER_PORT = 9092;
10 | public static final int KAFKA_PRODUCER_BUFFER_SIZE = 64 * 1024;
11 | public static final int CONNECTION_TIMEOUT = 100000;
12 | public static final String TOPIC2 = "topic2";
13 | public static final String TOPIC3 = "topic3";
14 | public static final String CLIENT_ID = "SimpleConsumerDemoClient";
15 |
16 | private KafkaProperties() {}
17 | }
18 |
--------------------------------------------------------------------------------
/labs/04-kafka-examples/src/main/java/kafka/examples/Producer.java:
--------------------------------------------------------------------------------
1 | package kafka.examples;
2 |
3 | import org.apache.kafka.clients.producer.Callback;
4 | import org.apache.kafka.clients.producer.KafkaProducer;
5 | import org.apache.kafka.clients.producer.ProducerRecord;
6 | import org.apache.kafka.clients.producer.RecordMetadata;
7 |
8 | import java.util.Properties;
9 | import java.util.concurrent.ExecutionException;
10 |
11 | /**
12 | * Created by randy on 17/08/2017.
13 | */
14 |
15 | public class Producer extends Thread {
16 | private final KafkaProducer producer;
17 | private final String topic;
18 | private final Boolean isAsync;
19 |
20 | public Producer(String topic, Boolean isAsync) {
21 | Properties props = new Properties();
22 | props.put("bootstrap.servers", KafkaProperties.KAFKA_SERVER_URL + ":" + KafkaProperties.KAFKA_SERVER_PORT);
23 | props.put("client.id", "DemoProducer");
24 | props.put("key.serializer", "org.apache.kafka.common.serialization.IntegerSerializer");
25 | props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
26 | producer = new KafkaProducer<>(props);
27 | this.topic = topic;
28 | this.isAsync = isAsync;
29 | }
30 |
31 | public void run() {
32 | int messageNo = 1;
33 | while (true) {
34 | String messageStr = "Message_" + messageNo;
35 | long startTime = System.currentTimeMillis();
36 | if (isAsync) { // Send asynchronously
37 | producer.send(new ProducerRecord<>(topic, messageNo, messageStr), new DemoCallBack(startTime, messageNo, messageStr));
38 | } else { // Send synchronously
39 | try {
40 | producer.send(new ProducerRecord<>(topic, messageNo, messageStr)).get();
41 | System.out.println("Sent message: (" + messageNo + ", " + messageStr + ")");
42 | } catch (InterruptedException | ExecutionException e) {
43 | e.printStackTrace();
44 | }
45 | }
46 | ++messageNo;
47 | }
48 | }
49 | }
50 |
51 | class DemoCallBack implements Callback {
52 |
53 | private final long startTime;
54 | private final int key;
55 | private final String message;
56 |
57 | public DemoCallBack(long startTime, int key, String message) {
58 | this.startTime = startTime;
59 | this.key = key;
60 | this.message = message;
61 | }
62 |
63 | public void onCompletion(RecordMetadata metadata, Exception exception) {
64 | long elapsedTime = System.currentTimeMillis() - startTime;
65 | if (metadata != null) {
66 | System.out.println("message(" + key + ", " + message + ") sent to partition(" + metadata.partition() + "), " + "offset(" + metadata.offset() + ") in " + elapsedTime + " ms");
67 | } else {
68 | exception.printStackTrace();
69 | }
70 | }
71 | }
72 |
--------------------------------------------------------------------------------