Kafka

├── .gitignore ├── Diagrams.html ├── EXAMPLES ├── 01 Multiple Partitions │ └── basic-kafka-commands.txt ├── 02 Multiple Brokers │ ├── basic-kafka-commands.txt │ └── config │ │ ├── server0.properties │ │ ├── server1.properties │ │ └── server2.properties ├── 03 Multiple Brokers and Replication │ ├── basic-kafka-commands.txt │ └── config │ │ ├── server0.properties │ │ ├── server1.properties │ │ └── server2.properties ├── 04 Consumer Groups │ └── basic-kafka-commands.txt └── 05 Performance Testing │ ├── basic-kafka-commands.txt │ └── config │ ├── server0.properties │ ├── server1.properties │ └── server2.properties ├── PROJECTS ├── Java │ └── Kafka │ │ ├── .idea │ │ ├── compiler.xml │ │ ├── dictionaries │ │ │ └── bogdan.xml │ │ ├── misc.xml │ │ └── workspace.xml │ │ ├── Kafka.iml │ │ ├── pom.xml │ │ ├── src │ │ └── main │ │ │ └── java │ │ │ ├── ConsumerWithAutoCommit.java │ │ │ ├── ConsumerWithManualCommit.java │ │ │ ├── ConsumerWithPartitionsAssignment.java │ │ │ └── Producer.java │ │ └── target │ │ └── classes │ │ ├── ConsumerWithAutoCommit.class │ │ ├── ConsumerWithManualCommit.class │ │ ├── ConsumerWithPartitionsAssignment.class │ │ ├── META-INF │ │ └── Kafka.kotlin_module │ │ └── Producer.class ├── Node │ └── kafka │ │ ├── consumer.js │ │ ├── package-lock.json │ │ ├── package.json │ │ └── producer.js └── Python │ └── kafka │ ├── consumer.py │ └── producer.py ├── README.md └── basic-kafka-commands.txt /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | 3 | node_modules/ -------------------------------------------------------------------------------- /Diagrams.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Kafka 6 | 7 | 8 |

9 | 10 | 11 | -------------------------------------------------------------------------------- /EXAMPLES/01 Multiple Partitions/basic-kafka-commands.txt: -------------------------------------------------------------------------------- 1 | Basic KAFKA Commands 2 | 3 | START ZOOKEEPER 4 | bin/zookeeper-server-start.sh config/zookeeper.properties 5 | 6 | START KAFKA BROKER 7 | bin/kafka-server-start.sh config/server.properties 8 | 9 | CREATE TOPIC 10 | bin/kafka-topics.sh \ 11 | --bootstrap-server localhost:9092 \ 12 | --create \ 13 | --replication-factor 1 \ 14 | --partitions 3 \ 15 | --topic animals 16 | 17 | LIST TOPICS 18 | bin/kafka-topics.sh \ 19 | --bootstrap-server localhost:9092 \ 20 | --list 21 | 22 | TOPIC DETAILS 23 | bin/kafka-topics.sh \ 24 | --bootstrap-server localhost:9092 \ 25 | --describe \ 26 | --topic animals 27 | 28 | START CONSOLE PRODUCER 29 | bin/kafka-console-producer.sh \ 30 | --broker-list localhost:9092 \ 31 | --topic animals 32 | 33 | START CONSOLE CONSUMER 34 | bin/kafka-console-consumer.sh \ 35 | --bootstrap-server localhost:9092 \ 36 | --topic test 37 | 38 | START CONSOLE CONSUMER AND READ MESSAGES FROM BEGINNING 39 | bin/kafka-console-consumer.sh \ 40 | --bootstrap-server localhost:9092 \ 41 | --topic animals \ 42 | --from-beginning 43 | 44 | START CONSOLE CONSUMER AND READ MESSAGES FROM BEGINNING FROM SPECIFIC PARTITION 45 | bin/kafka-console-consumer.sh \ 46 | --bootstrap-server localhost:9092 \ 47 | --partition 2 \ 48 | --topic animals \ 49 | --from-beginning 50 | 51 | START CONSOLE CONSUMER AND READ MESSAGES FROM SPECIFIC OFFSET FROM SPECIFIC PARTITION 52 | bin/kafka-console-consumer.sh \ 53 | --bootstrap-server localhost:9092 \ 54 | --partition 2 \ 55 | --topic animals \ 56 | --offset 0 57 | 58 | START CONSOLE CONSUMER WITH SPECIFIC CONSUMER GROUP 59 | bin/kafka-console-consumer.sh \ 60 | --bootstrap-server localhost:9092 \ 61 | --topic test \ 62 | --group test \ 63 | --from-beginning 64 | 65 | LIST CONSUMER GROUPS 66 | bin/kafka-consumer-groups.sh \ 67 | --bootstrap-server localhost:9092 \ 68 | --list 69 | 70 | CONSUMER GROUP DETAILS 71 | bin/kafka-consumer-groups.sh \ 72 | --bootstrap-server localhost:9092 \ 73 | --group test \ 74 | --describe 75 | 76 | -------------------------------------------------------------------------------- /EXAMPLES/02 Multiple Brokers/basic-kafka-commands.txt: -------------------------------------------------------------------------------- 1 | Basic KAFKA Commands 2 | 3 | START ZOOKEEPER 4 | bin/zookeeper-server-start.sh config/zookeeper.properties 5 | 6 | START KAFKA BROKER 7 | bin/kafka-server-start.sh config/server0.properties 8 | bin/kafka-server-start.sh config/server1.properties 9 | bin/kafka-server-start.sh config/server2.properties 10 | 11 | GET INFORMATION FROM ZOOKEEPER ABOUT ACTIVE BROKER IDS 12 | bin/zookeeper-shell.sh localhost:2181 ls /brokers/ids 13 | 14 | GET INFORMATION FROM ZOOKEEPER ABOUT SPECIFIC BROKER BY ID 15 | bin/zookeeper-shell.sh localhost:2181 get /brokers/ids/0 16 | 17 | CREATE TOPIC 18 | bin/kafka-topics.sh \ 19 | --bootstrap-server localhost:9092,localhost:9093,localhost:9094 \ 20 | --create \ 21 | --replication-factor 3 \ 22 | --partitions 5 \ 23 | --topic animals 24 | 25 | LIST TOPICS 26 | bin/kafka-topics.sh \ 27 | --bootstrap-server localhost:9092,localhost:9093,localhost:9094 \ 28 | --list 29 | 30 | TOPIC DETAILS 31 | bin/kafka-topics.sh \ 32 | --bootstrap-server localhost:9092,localhost:9093,localhost:9094 \ 33 | --describe \ 34 | --topic cars 35 | 36 | START CONSOLE PRODUCER 37 | bin/kafka-console-producer.sh \ 38 | --broker-list localhost:9092,localhost:9093,localhost:9094 \ 39 | --topic cars 40 | 41 | START CONSOLE CONSUMER 42 | bin/kafka-console-consumer.sh \ 43 | --bootstrap-server localhost:9092,localhost:9093,localhost:9094 \ 44 | --topic cars 45 | 46 | START CONSOLE CONSUMER AND READ MESSAGES FROM BEGINNING 47 | bin/kafka-console-consumer.sh \ 48 | --bootstrap-server localhost:9092,localhost:9093,localhost:9094 \ 49 | --topic test-topic \ 50 | --from-beginning 51 | 52 | 53 | -------------------------------------------------------------------------------- /EXAMPLES/02 Multiple Brokers/config/server0.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # see kafka.server.KafkaConfig for additional details and defaults 17 | 18 | ############################# Server Basics ############################# 19 | 20 | # The id of the broker. This must be set to a unique integer for each broker. 21 | broker.id=0 22 | 23 | ############################# Socket Server Settings ############################# 24 | 25 | # The address the socket server listens on. It will get the value returned from 26 | # java.net.InetAddress.getCanonicalHostName() if not configured. 27 | # FORMAT: 28 | # listeners = listener_name://host_name:port 29 | # EXAMPLE: 30 | # listeners = PLAINTEXT://your.host.name:9092 31 | #listeners=PLAINTEXT://:9092 32 | 33 | # Hostname and port the broker will advertise to producers and consumers. If not set, 34 | # it uses the value for "listeners" if configured. Otherwise, it will use the value 35 | # returned from java.net.InetAddress.getCanonicalHostName(). 36 | #advertised.listeners=PLAINTEXT://your.host.name:9092 37 | 38 | # Maps listener names to security protocols, the default is for them to be the same. See the config documentation for more details 39 | #listener.security.protocol.map=PLAINTEXT:PLAINTEXT,SSL:SSL,SASL_PLAINTEXT:SASL_PLAINTEXT,SASL_SSL:SASL_SSL 40 | 41 | # The number of threads that the server uses for receiving requests from the network and sending responses to the network 42 | num.network.threads=3 43 | 44 | # The number of threads that the server uses for processing requests, which may include disk I/O 45 | num.io.threads=8 46 | 47 | # The send buffer (SO_SNDBUF) used by the socket server 48 | socket.send.buffer.bytes=102400 49 | 50 | # The receive buffer (SO_RCVBUF) used by the socket server 51 | socket.receive.buffer.bytes=102400 52 | 53 | # The maximum size of a request that the socket server will accept (protection against OOM) 54 | socket.request.max.bytes=104857600 55 | 56 | 57 | ############################# Log Basics ############################# 58 | 59 | # A comma separated list of directories under which to store log files 60 | log.dirs=/tmp/kafka-logs-0 61 | 62 | # The default number of log partitions per topic. More partitions allow greater 63 | # parallelism for consumption, but this will also result in more files across 64 | # the brokers. 65 | num.partitions=1 66 | 67 | # The number of threads per data directory to be used for log recovery at startup and flushing at shutdown. 68 | # This value is recommended to be increased for installations with data dirs located in RAID array. 69 | num.recovery.threads.per.data.dir=1 70 | 71 | ############################# Internal Topic Settings ############################# 72 | # The replication factor for the group metadata internal topics "__consumer_offsets" and "__transaction_state" 73 | # For anything other than development testing, a value greater than 1 is recommended to ensure availability such as 3. 74 | offsets.topic.replication.factor=1 75 | transaction.state.log.replication.factor=1 76 | transaction.state.log.min.isr=1 77 | 78 | ############################# Log Flush Policy ############################# 79 | 80 | # Messages are immediately written to the filesystem but by default we only fsync() to sync 81 | # the OS cache lazily. The following configurations control the flush of data to disk. 82 | # There are a few important trade-offs here: 83 | # 1. Durability: Unflushed data may be lost if you are not using replication. 84 | # 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. 85 | # 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to excessive seeks. 86 | # The settings below allow one to configure the flush policy to flush data after a period of time or 87 | # every N messages (or both). This can be done globally and overridden on a per-topic basis. 88 | 89 | # The number of messages to accept before forcing a flush of data to disk 90 | #log.flush.interval.messages=10000 91 | 92 | # The maximum amount of time a message can sit in a log before we force a flush 93 | #log.flush.interval.ms=1000 94 | 95 | ############################# Log Retention Policy ############################# 96 | 97 | # The following configurations control the disposal of log segments. The policy can 98 | # be set to delete segments after a period of time, or after a given size has accumulated. 99 | # A segment will be deleted whenever *either* of these criteria are met. Deletion always happens 100 | # from the end of the log. 101 | 102 | # The minimum age of a log file to be eligible for deletion due to age 103 | log.retention.hours=168 104 | 105 | # A size-based retention policy for logs. Segments are pruned from the log unless the remaining 106 | # segments drop below log.retention.bytes. Functions independently of log.retention.hours. 107 | #log.retention.bytes=1073741824 108 | 109 | # The maximum size of a log segment file. When this size is reached a new log segment will be created. 110 | log.segment.bytes=1073741824 111 | 112 | # The interval at which log segments are checked to see if they can be deleted according 113 | # to the retention policies 114 | log.retention.check.interval.ms=300000 115 | 116 | ############################# Zookeeper ############################# 117 | 118 | # Zookeeper connection string (see zookeeper docs for details). 119 | # This is a comma separated host:port pairs, each corresponding to a zk 120 | # server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". 121 | # You can also append an optional chroot string to the urls to specify the 122 | # root directory for all kafka znodes. 123 | zookeeper.connect=localhost:2181 124 | 125 | # Timeout in ms for connecting to zookeeper 126 | zookeeper.connection.timeout.ms=6000 127 | 128 | 129 | ############################# Group Coordinator Settings ############################# 130 | 131 | # The following configuration specifies the time, in milliseconds, that the GroupCoordinator will delay the initial consumer rebalance. 132 | # The rebalance will be further delayed by the value of group.initial.rebalance.delay.ms as new members join the group, up to a maximum of max.poll.interval.ms. 133 | # The default value for this is 3 seconds. 134 | # We override this to 0 here as it makes for a better out-of-the-box experience for development and testing. 135 | # However, in production environments the default value of 3 seconds is more suitable as this will help to avoid unnecessary, and potentially expensive, rebalances during application startup. 136 | group.initial.rebalance.delay.ms=0 137 | -------------------------------------------------------------------------------- /EXAMPLES/02 Multiple Brokers/config/server1.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # see kafka.server.KafkaConfig for additional details and defaults 17 | 18 | ############################# Server Basics ############################# 19 | 20 | # The id of the broker. This must be set to a unique integer for each broker. 21 | broker.id=1 22 | 23 | ############################# Socket Server Settings ############################# 24 | 25 | # The address the socket server listens on. It will get the value returned from 26 | # java.net.InetAddress.getCanonicalHostName() if not configured. 27 | # FORMAT: 28 | # listeners = listener_name://host_name:port 29 | # EXAMPLE: 30 | # listeners = PLAINTEXT://your.host.name:9092 31 | listeners=PLAINTEXT://:9093 32 | 33 | # Hostname and port the broker will advertise to producers and consumers. If not set, 34 | # it uses the value for "listeners" if configured. Otherwise, it will use the value 35 | # returned from java.net.InetAddress.getCanonicalHostName(). 36 | #advertised.listeners=PLAINTEXT://your.host.name:9092 37 | 38 | # Maps listener names to security protocols, the default is for them to be the same. See the config documentation for more details 39 | #listener.security.protocol.map=PLAINTEXT:PLAINTEXT,SSL:SSL,SASL_PLAINTEXT:SASL_PLAINTEXT,SASL_SSL:SASL_SSL 40 | 41 | # The number of threads that the server uses for receiving requests from the network and sending responses to the network 42 | num.network.threads=3 43 | 44 | # The number of threads that the server uses for processing requests, which may include disk I/O 45 | num.io.threads=8 46 | 47 | # The send buffer (SO_SNDBUF) used by the socket server 48 | socket.send.buffer.bytes=102400 49 | 50 | # The receive buffer (SO_RCVBUF) used by the socket server 51 | socket.receive.buffer.bytes=102400 52 | 53 | # The maximum size of a request that the socket server will accept (protection against OOM) 54 | socket.request.max.bytes=104857600 55 | 56 | 57 | ############################# Log Basics ############################# 58 | 59 | # A comma separated list of directories under which to store log files 60 | log.dirs=/tmp/kafka-logs-1 61 | 62 | # The default number of log partitions per topic. More partitions allow greater 63 | # parallelism for consumption, but this will also result in more files across 64 | # the brokers. 65 | num.partitions=1 66 | 67 | # The number of threads per data directory to be used for log recovery at startup and flushing at shutdown. 68 | # This value is recommended to be increased for installations with data dirs located in RAID array. 69 | num.recovery.threads.per.data.dir=1 70 | 71 | ############################# Internal Topic Settings ############################# 72 | # The replication factor for the group metadata internal topics "__consumer_offsets" and "__transaction_state" 73 | # For anything other than development testing, a value greater than 1 is recommended to ensure availability such as 3. 74 | offsets.topic.replication.factor=1 75 | transaction.state.log.replication.factor=1 76 | transaction.state.log.min.isr=1 77 | 78 | ############################# Log Flush Policy ############################# 79 | 80 | # Messages are immediately written to the filesystem but by default we only fsync() to sync 81 | # the OS cache lazily. The following configurations control the flush of data to disk. 82 | # There are a few important trade-offs here: 83 | # 1. Durability: Unflushed data may be lost if you are not using replication. 84 | # 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. 85 | # 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to excessive seeks. 86 | # The settings below allow one to configure the flush policy to flush data after a period of time or 87 | # every N messages (or both). This can be done globally and overridden on a per-topic basis. 88 | 89 | # The number of messages to accept before forcing a flush of data to disk 90 | #log.flush.interval.messages=10000 91 | 92 | # The maximum amount of time a message can sit in a log before we force a flush 93 | #log.flush.interval.ms=1000 94 | 95 | ############################# Log Retention Policy ############################# 96 | 97 | # The following configurations control the disposal of log segments. The policy can 98 | # be set to delete segments after a period of time, or after a given size has accumulated. 99 | # A segment will be deleted whenever *either* of these criteria are met. Deletion always happens 100 | # from the end of the log. 101 | 102 | # The minimum age of a log file to be eligible for deletion due to age 103 | log.retention.hours=168 104 | 105 | # A size-based retention policy for logs. Segments are pruned from the log unless the remaining 106 | # segments drop below log.retention.bytes. Functions independently of log.retention.hours. 107 | #log.retention.bytes=1073741824 108 | 109 | # The maximum size of a log segment file. When this size is reached a new log segment will be created. 110 | log.segment.bytes=1073741824 111 | 112 | # The interval at which log segments are checked to see if they can be deleted according 113 | # to the retention policies 114 | log.retention.check.interval.ms=300000 115 | 116 | ############################# Zookeeper ############################# 117 | 118 | # Zookeeper connection string (see zookeeper docs for details). 119 | # This is a comma separated host:port pairs, each corresponding to a zk 120 | # server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". 121 | # You can also append an optional chroot string to the urls to specify the 122 | # root directory for all kafka znodes. 123 | zookeeper.connect=localhost:2181 124 | 125 | # Timeout in ms for connecting to zookeeper 126 | zookeeper.connection.timeout.ms=6000 127 | 128 | 129 | ############################# Group Coordinator Settings ############################# 130 | 131 | # The following configuration specifies the time, in milliseconds, that the GroupCoordinator will delay the initial consumer rebalance. 132 | # The rebalance will be further delayed by the value of group.initial.rebalance.delay.ms as new members join the group, up to a maximum of max.poll.interval.ms. 133 | # The default value for this is 3 seconds. 134 | # We override this to 0 here as it makes for a better out-of-the-box experience for development and testing. 135 | # However, in production environments the default value of 3 seconds is more suitable as this will help to avoid unnecessary, and potentially expensive, rebalances during application startup. 136 | group.initial.rebalance.delay.ms=0 137 | -------------------------------------------------------------------------------- /EXAMPLES/02 Multiple Brokers/config/server2.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # see kafka.server.KafkaConfig for additional details and defaults 17 | 18 | ############################# Server Basics ############################# 19 | 20 | # The id of the broker. This must be set to a unique integer for each broker. 21 | broker.id=2 22 | 23 | ############################# Socket Server Settings ############################# 24 | 25 | # The address the socket server listens on. It will get the value returned from 26 | # java.net.InetAddress.getCanonicalHostName() if not configured. 27 | # FORMAT: 28 | # listeners = listener_name://host_name:port 29 | # EXAMPLE: 30 | # listeners = PLAINTEXT://your.host.name:9092 31 | listeners=PLAINTEXT://:9094 32 | 33 | # Hostname and port the broker will advertise to producers and consumers. If not set, 34 | # it uses the value for "listeners" if configured. Otherwise, it will use the value 35 | # returned from java.net.InetAddress.getCanonicalHostName(). 36 | #advertised.listeners=PLAINTEXT://your.host.name:9092 37 | 38 | # Maps listener names to security protocols, the default is for them to be the same. See the config documentation for more details 39 | #listener.security.protocol.map=PLAINTEXT:PLAINTEXT,SSL:SSL,SASL_PLAINTEXT:SASL_PLAINTEXT,SASL_SSL:SASL_SSL 40 | 41 | # The number of threads that the server uses for receiving requests from the network and sending responses to the network 42 | num.network.threads=3 43 | 44 | # The number of threads that the server uses for processing requests, which may include disk I/O 45 | num.io.threads=8 46 | 47 | # The send buffer (SO_SNDBUF) used by the socket server 48 | socket.send.buffer.bytes=102400 49 | 50 | # The receive buffer (SO_RCVBUF) used by the socket server 51 | socket.receive.buffer.bytes=102400 52 | 53 | # The maximum size of a request that the socket server will accept (protection against OOM) 54 | socket.request.max.bytes=104857600 55 | 56 | 57 | ############################# Log Basics ############################# 58 | 59 | # A comma separated list of directories under which to store log files 60 | log.dirs=/tmp/kafka-logs-2 61 | 62 | # The default number of log partitions per topic. More partitions allow greater 63 | # parallelism for consumption, but this will also result in more files across 64 | # the brokers. 65 | num.partitions=1 66 | 67 | # The number of threads per data directory to be used for log recovery at startup and flushing at shutdown. 68 | # This value is recommended to be increased for installations with data dirs located in RAID array. 69 | num.recovery.threads.per.data.dir=1 70 | 71 | ############################# Internal Topic Settings ############################# 72 | # The replication factor for the group metadata internal topics "__consumer_offsets" and "__transaction_state" 73 | # For anything other than development testing, a value greater than 1 is recommended to ensure availability such as 3. 74 | offsets.topic.replication.factor=1 75 | transaction.state.log.replication.factor=1 76 | transaction.state.log.min.isr=1 77 | 78 | ############################# Log Flush Policy ############################# 79 | 80 | # Messages are immediately written to the filesystem but by default we only fsync() to sync 81 | # the OS cache lazily. The following configurations control the flush of data to disk. 82 | # There are a few important trade-offs here: 83 | # 1. Durability: Unflushed data may be lost if you are not using replication. 84 | # 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. 85 | # 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to excessive seeks. 86 | # The settings below allow one to configure the flush policy to flush data after a period of time or 87 | # every N messages (or both). This can be done globally and overridden on a per-topic basis. 88 | 89 | # The number of messages to accept before forcing a flush of data to disk 90 | #log.flush.interval.messages=10000 91 | 92 | # The maximum amount of time a message can sit in a log before we force a flush 93 | #log.flush.interval.ms=1000 94 | 95 | ############################# Log Retention Policy ############################# 96 | 97 | # The following configurations control the disposal of log segments. The policy can 98 | # be set to delete segments after a period of time, or after a given size has accumulated. 99 | # A segment will be deleted whenever *either* of these criteria are met. Deletion always happens 100 | # from the end of the log. 101 | 102 | # The minimum age of a log file to be eligible for deletion due to age 103 | log.retention.hours=168 104 | 105 | # A size-based retention policy for logs. Segments are pruned from the log unless the remaining 106 | # segments drop below log.retention.bytes. Functions independently of log.retention.hours. 107 | #log.retention.bytes=1073741824 108 | 109 | # The maximum size of a log segment file. When this size is reached a new log segment will be created. 110 | log.segment.bytes=1073741824 111 | 112 | # The interval at which log segments are checked to see if they can be deleted according 113 | # to the retention policies 114 | log.retention.check.interval.ms=300000 115 | 116 | ############################# Zookeeper ############################# 117 | 118 | # Zookeeper connection string (see zookeeper docs for details). 119 | # This is a comma separated host:port pairs, each corresponding to a zk 120 | # server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". 121 | # You can also append an optional chroot string to the urls to specify the 122 | # root directory for all kafka znodes. 123 | zookeeper.connect=localhost:2181 124 | 125 | # Timeout in ms for connecting to zookeeper 126 | zookeeper.connection.timeout.ms=6000 127 | 128 | 129 | ############################# Group Coordinator Settings ############################# 130 | 131 | # The following configuration specifies the time, in milliseconds, that the GroupCoordinator will delay the initial consumer rebalance. 132 | # The rebalance will be further delayed by the value of group.initial.rebalance.delay.ms as new members join the group, up to a maximum of max.poll.interval.ms. 133 | # The default value for this is 3 seconds. 134 | # We override this to 0 here as it makes for a better out-of-the-box experience for development and testing. 135 | # However, in production environments the default value of 3 seconds is more suitable as this will help to avoid unnecessary, and potentially expensive, rebalances during application startup. 136 | group.initial.rebalance.delay.ms=0 137 | -------------------------------------------------------------------------------- /EXAMPLES/03 Multiple Brokers and Replication/basic-kafka-commands.txt: -------------------------------------------------------------------------------- 1 | Basic KAFKA Commands 2 | 3 | START ZOOKEEPER 4 | bin/zookeeper-server-start.sh config/zookeeper.properties 5 | 6 | START KAFKA BROKER 7 | bin/kafka-server-start.sh config/server0.properties 8 | bin/kafka-server-start.sh config/server1.properties 9 | bin/kafka-server-start.sh config/server2.properties 10 | 11 | GET INFORMATION FROM ZOOKEEPER ABOUT ACTIVE BROKER IDS 12 | bin/zookeeper-shell.sh localhost:2181 ls /brokers/ids 13 | 14 | GET INFORMATION FROM ZOOKEEPER ABOUT SPECIFIC BROKER BY ID 15 | bin/zookeeper-shell.sh localhost:2181 get /brokers/ids/0 16 | 17 | CREATE TOPIC 18 | bin/kafka-topics.sh \ 19 | --bootstrap-server localhost:9092,localhost:9093,localhost:9094 \ 20 | --create \ 21 | --replication-factor 3 \ 22 | --partitions 7 \ 23 | --topic months 24 | 25 | LIST TOPICS 26 | bin/kafka-topics.sh \ 27 | --bootstrap-server localhost:9092,localhost:9093,localhost:9094 \ 28 | --list 29 | 30 | TOPIC DETAILS 31 | bin/kafka-topics.sh \ 32 | --bootstrap-server localhost:9092,localhost:9093,localhost:9094 \ 33 | --describe \ 34 | --topic months 35 | 36 | START CONSOLE PRODUCER 37 | bin/kafka-console-producer.sh \ 38 | --broker-list localhost:9092,localhost:9093,localhost:9094 \ 39 | --topic months 40 | 41 | START CONSOLE CONSUMER 42 | bin/kafka-console-consumer.sh \ 43 | --bootstrap-server localhost:9092,localhost:9093,localhost:9094 \ 44 | --topic months 45 | 46 | START CONSOLE CONSUMER AND READ MESSAGES FROM BEGINNING 47 | bin/kafka-console-consumer.sh \ 48 | --bootstrap-server localhost:9092,localhost:9093,localhost:9094 \ 49 | --topic months \ 50 | --from-beginning 51 | 52 | START CONSOLE CONSUMER AND READ MESSAGES FROM SPECIFIC PARTITION 53 | bin/kafka-console-consumer.sh \ 54 | --bootstrap-server localhost:9092,localhost:9093,localhost:9094 \ 55 | --topic months \ 56 | --partition 6 \ 57 | --from-beginning 58 | 59 | START CONSOLE CONSUMER AND READ MESSAGES FROM SPECIFIC PARTITION AND SPECIFIC OFFSET 60 | bin/kafka-console-consumer.sh \ 61 | --bootstrap-server localhost:9092,localhost:9093,localhost:9094 \ 62 | --topic months \ 63 | --partition 3 \ 64 | --offset 2 65 | 66 | 67 | -------------------------------------------------------------------------------- /EXAMPLES/03 Multiple Brokers and Replication/config/server0.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # see kafka.server.KafkaConfig for additional details and defaults 17 | 18 | ############################# Server Basics ############################# 19 | 20 | # The id of the broker. This must be set to a unique integer for each broker. 21 | broker.id=0 22 | 23 | ############################# Socket Server Settings ############################# 24 | 25 | # The address the socket server listens on. It will get the value returned from 26 | # java.net.InetAddress.getCanonicalHostName() if not configured. 27 | # FORMAT: 28 | # listeners = listener_name://host_name:port 29 | # EXAMPLE: 30 | # listeners = PLAINTEXT://your.host.name:9092 31 | #listeners=PLAINTEXT://:9092 32 | 33 | # Hostname and port the broker will advertise to producers and consumers. If not set, 34 | # it uses the value for "listeners" if configured. Otherwise, it will use the value 35 | # returned from java.net.InetAddress.getCanonicalHostName(). 36 | #advertised.listeners=PLAINTEXT://your.host.name:9092 37 | 38 | # Maps listener names to security protocols, the default is for them to be the same. See the config documentation for more details 39 | #listener.security.protocol.map=PLAINTEXT:PLAINTEXT,SSL:SSL,SASL_PLAINTEXT:SASL_PLAINTEXT,SASL_SSL:SASL_SSL 40 | 41 | # The number of threads that the server uses for receiving requests from the network and sending responses to the network 42 | num.network.threads=3 43 | 44 | # The number of threads that the server uses for processing requests, which may include disk I/O 45 | num.io.threads=8 46 | 47 | # The send buffer (SO_SNDBUF) used by the socket server 48 | socket.send.buffer.bytes=102400 49 | 50 | # The receive buffer (SO_RCVBUF) used by the socket server 51 | socket.receive.buffer.bytes=102400 52 | 53 | # The maximum size of a request that the socket server will accept (protection against OOM) 54 | socket.request.max.bytes=104857600 55 | 56 | 57 | ############################# Log Basics ############################# 58 | 59 | # A comma separated list of directories under which to store log files 60 | log.dirs=/tmp/kafka-logs-0 61 | 62 | # The default number of log partitions per topic. More partitions allow greater 63 | # parallelism for consumption, but this will also result in more files across 64 | # the brokers. 65 | num.partitions=1 66 | 67 | # The number of threads per data directory to be used for log recovery at startup and flushing at shutdown. 68 | # This value is recommended to be increased for installations with data dirs located in RAID array. 69 | num.recovery.threads.per.data.dir=1 70 | 71 | ############################# Internal Topic Settings ############################# 72 | # The replication factor for the group metadata internal topics "__consumer_offsets" and "__transaction_state" 73 | # For anything other than development testing, a value greater than 1 is recommended to ensure availability such as 3. 74 | offsets.topic.replication.factor=1 75 | transaction.state.log.replication.factor=1 76 | transaction.state.log.min.isr=1 77 | 78 | ############################# Log Flush Policy ############################# 79 | 80 | # Messages are immediately written to the filesystem but by default we only fsync() to sync 81 | # the OS cache lazily. The following configurations control the flush of data to disk. 82 | # There are a few important trade-offs here: 83 | # 1. Durability: Unflushed data may be lost if you are not using replication. 84 | # 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. 85 | # 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to excessive seeks. 86 | # The settings below allow one to configure the flush policy to flush data after a period of time or 87 | # every N messages (or both). This can be done globally and overridden on a per-topic basis. 88 | 89 | # The number of messages to accept before forcing a flush of data to disk 90 | #log.flush.interval.messages=10000 91 | 92 | # The maximum amount of time a message can sit in a log before we force a flush 93 | #log.flush.interval.ms=1000 94 | 95 | ############################# Log Retention Policy ############################# 96 | 97 | # The following configurations control the disposal of log segments. The policy can 98 | # be set to delete segments after a period of time, or after a given size has accumulated. 99 | # A segment will be deleted whenever *either* of these criteria are met. Deletion always happens 100 | # from the end of the log. 101 | 102 | # The minimum age of a log file to be eligible for deletion due to age 103 | log.retention.hours=168 104 | 105 | # A size-based retention policy for logs. Segments are pruned from the log unless the remaining 106 | # segments drop below log.retention.bytes. Functions independently of log.retention.hours. 107 | #log.retention.bytes=1073741824 108 | 109 | # The maximum size of a log segment file. When this size is reached a new log segment will be created. 110 | log.segment.bytes=1073741824 111 | 112 | # The interval at which log segments are checked to see if they can be deleted according 113 | # to the retention policies 114 | log.retention.check.interval.ms=300000 115 | 116 | ############################# Zookeeper ############################# 117 | 118 | # Zookeeper connection string (see zookeeper docs for details). 119 | # This is a comma separated host:port pairs, each corresponding to a zk 120 | # server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". 121 | # You can also append an optional chroot string to the urls to specify the 122 | # root directory for all kafka znodes. 123 | zookeeper.connect=localhost:2181 124 | 125 | # Timeout in ms for connecting to zookeeper 126 | zookeeper.connection.timeout.ms=6000 127 | 128 | 129 | ############################# Group Coordinator Settings ############################# 130 | 131 | # The following configuration specifies the time, in milliseconds, that the GroupCoordinator will delay the initial consumer rebalance. 132 | # The rebalance will be further delayed by the value of group.initial.rebalance.delay.ms as new members join the group, up to a maximum of max.poll.interval.ms. 133 | # The default value for this is 3 seconds. 134 | # We override this to 0 here as it makes for a better out-of-the-box experience for development and testing. 135 | # However, in production environments the default value of 3 seconds is more suitable as this will help to avoid unnecessary, and potentially expensive, rebalances during application startup. 136 | group.initial.rebalance.delay.ms=0 137 | -------------------------------------------------------------------------------- /EXAMPLES/03 Multiple Brokers and Replication/config/server1.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # see kafka.server.KafkaConfig for additional details and defaults 17 | 18 | ############################# Server Basics ############################# 19 | 20 | # The id of the broker. This must be set to a unique integer for each broker. 21 | broker.id=1 22 | 23 | ############################# Socket Server Settings ############################# 24 | 25 | # The address the socket server listens on. It will get the value returned from 26 | # java.net.InetAddress.getCanonicalHostName() if not configured. 27 | # FORMAT: 28 | # listeners = listener_name://host_name:port 29 | # EXAMPLE: 30 | # listeners = PLAINTEXT://your.host.name:9092 31 | listeners=PLAINTEXT://:9093 32 | 33 | # Hostname and port the broker will advertise to producers and consumers. If not set, 34 | # it uses the value for "listeners" if configured. Otherwise, it will use the value 35 | # returned from java.net.InetAddress.getCanonicalHostName(). 36 | #advertised.listeners=PLAINTEXT://your.host.name:9092 37 | 38 | # Maps listener names to security protocols, the default is for them to be the same. See the config documentation for more details 39 | #listener.security.protocol.map=PLAINTEXT:PLAINTEXT,SSL:SSL,SASL_PLAINTEXT:SASL_PLAINTEXT,SASL_SSL:SASL_SSL 40 | 41 | # The number of threads that the server uses for receiving requests from the network and sending responses to the network 42 | num.network.threads=3 43 | 44 | # The number of threads that the server uses for processing requests, which may include disk I/O 45 | num.io.threads=8 46 | 47 | # The send buffer (SO_SNDBUF) used by the socket server 48 | socket.send.buffer.bytes=102400 49 | 50 | # The receive buffer (SO_RCVBUF) used by the socket server 51 | socket.receive.buffer.bytes=102400 52 | 53 | # The maximum size of a request that the socket server will accept (protection against OOM) 54 | socket.request.max.bytes=104857600 55 | 56 | 57 | ############################# Log Basics ############################# 58 | 59 | # A comma separated list of directories under which to store log files 60 | log.dirs=/tmp/kafka-logs-1 61 | 62 | # The default number of log partitions per topic. More partitions allow greater 63 | # parallelism for consumption, but this will also result in more files across 64 | # the brokers. 65 | num.partitions=1 66 | 67 | # The number of threads per data directory to be used for log recovery at startup and flushing at shutdown. 68 | # This value is recommended to be increased for installations with data dirs located in RAID array. 69 | num.recovery.threads.per.data.dir=1 70 | 71 | ############################# Internal Topic Settings ############################# 72 | # The replication factor for the group metadata internal topics "__consumer_offsets" and "__transaction_state" 73 | # For anything other than development testing, a value greater than 1 is recommended to ensure availability such as 3. 74 | offsets.topic.replication.factor=1 75 | transaction.state.log.replication.factor=1 76 | transaction.state.log.min.isr=1 77 | 78 | ############################# Log Flush Policy ############################# 79 | 80 | # Messages are immediately written to the filesystem but by default we only fsync() to sync 81 | # the OS cache lazily. The following configurations control the flush of data to disk. 82 | # There are a few important trade-offs here: 83 | # 1. Durability: Unflushed data may be lost if you are not using replication. 84 | # 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. 85 | # 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to excessive seeks. 86 | # The settings below allow one to configure the flush policy to flush data after a period of time or 87 | # every N messages (or both). This can be done globally and overridden on a per-topic basis. 88 | 89 | # The number of messages to accept before forcing a flush of data to disk 90 | #log.flush.interval.messages=10000 91 | 92 | # The maximum amount of time a message can sit in a log before we force a flush 93 | #log.flush.interval.ms=1000 94 | 95 | ############################# Log Retention Policy ############################# 96 | 97 | # The following configurations control the disposal of log segments. The policy can 98 | # be set to delete segments after a period of time, or after a given size has accumulated. 99 | # A segment will be deleted whenever *either* of these criteria are met. Deletion always happens 100 | # from the end of the log. 101 | 102 | # The minimum age of a log file to be eligible for deletion due to age 103 | log.retention.hours=168 104 | 105 | # A size-based retention policy for logs. Segments are pruned from the log unless the remaining 106 | # segments drop below log.retention.bytes. Functions independently of log.retention.hours. 107 | #log.retention.bytes=1073741824 108 | 109 | # The maximum size of a log segment file. When this size is reached a new log segment will be created. 110 | log.segment.bytes=1073741824 111 | 112 | # The interval at which log segments are checked to see if they can be deleted according 113 | # to the retention policies 114 | log.retention.check.interval.ms=300000 115 | 116 | ############################# Zookeeper ############################# 117 | 118 | # Zookeeper connection string (see zookeeper docs for details). 119 | # This is a comma separated host:port pairs, each corresponding to a zk 120 | # server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". 121 | # You can also append an optional chroot string to the urls to specify the 122 | # root directory for all kafka znodes. 123 | zookeeper.connect=localhost:2181 124 | 125 | # Timeout in ms for connecting to zookeeper 126 | zookeeper.connection.timeout.ms=6000 127 | 128 | 129 | ############################# Group Coordinator Settings ############################# 130 | 131 | # The following configuration specifies the time, in milliseconds, that the GroupCoordinator will delay the initial consumer rebalance. 132 | # The rebalance will be further delayed by the value of group.initial.rebalance.delay.ms as new members join the group, up to a maximum of max.poll.interval.ms. 133 | # The default value for this is 3 seconds. 134 | # We override this to 0 here as it makes for a better out-of-the-box experience for development and testing. 135 | # However, in production environments the default value of 3 seconds is more suitable as this will help to avoid unnecessary, and potentially expensive, rebalances during application startup. 136 | group.initial.rebalance.delay.ms=0 137 | -------------------------------------------------------------------------------- /EXAMPLES/03 Multiple Brokers and Replication/config/server2.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # see kafka.server.KafkaConfig for additional details and defaults 17 | 18 | ############################# Server Basics ############################# 19 | 20 | # The id of the broker. This must be set to a unique integer for each broker. 21 | broker.id=2 22 | 23 | ############################# Socket Server Settings ############################# 24 | 25 | # The address the socket server listens on. It will get the value returned from 26 | # java.net.InetAddress.getCanonicalHostName() if not configured. 27 | # FORMAT: 28 | # listeners = listener_name://host_name:port 29 | # EXAMPLE: 30 | # listeners = PLAINTEXT://your.host.name:9092 31 | listeners=PLAINTEXT://:9094 32 | 33 | # Hostname and port the broker will advertise to producers and consumers. If not set, 34 | # it uses the value for "listeners" if configured. Otherwise, it will use the value 35 | # returned from java.net.InetAddress.getCanonicalHostName(). 36 | #advertised.listeners=PLAINTEXT://your.host.name:9092 37 | 38 | # Maps listener names to security protocols, the default is for them to be the same. See the config documentation for more details 39 | #listener.security.protocol.map=PLAINTEXT:PLAINTEXT,SSL:SSL,SASL_PLAINTEXT:SASL_PLAINTEXT,SASL_SSL:SASL_SSL 40 | 41 | # The number of threads that the server uses for receiving requests from the network and sending responses to the network 42 | num.network.threads=3 43 | 44 | # The number of threads that the server uses for processing requests, which may include disk I/O 45 | num.io.threads=8 46 | 47 | # The send buffer (SO_SNDBUF) used by the socket server 48 | socket.send.buffer.bytes=102400 49 | 50 | # The receive buffer (SO_RCVBUF) used by the socket server 51 | socket.receive.buffer.bytes=102400 52 | 53 | # The maximum size of a request that the socket server will accept (protection against OOM) 54 | socket.request.max.bytes=104857600 55 | 56 | 57 | ############################# Log Basics ############################# 58 | 59 | # A comma separated list of directories under which to store log files 60 | log.dirs=/tmp/kafka-logs-2 61 | 62 | # The default number of log partitions per topic. More partitions allow greater 63 | # parallelism for consumption, but this will also result in more files across 64 | # the brokers. 65 | num.partitions=1 66 | 67 | # The number of threads per data directory to be used for log recovery at startup and flushing at shutdown. 68 | # This value is recommended to be increased for installations with data dirs located in RAID array. 69 | num.recovery.threads.per.data.dir=1 70 | 71 | ############################# Internal Topic Settings ############################# 72 | # The replication factor for the group metadata internal topics "__consumer_offsets" and "__transaction_state" 73 | # For anything other than development testing, a value greater than 1 is recommended to ensure availability such as 3. 74 | offsets.topic.replication.factor=1 75 | transaction.state.log.replication.factor=1 76 | transaction.state.log.min.isr=1 77 | 78 | ############################# Log Flush Policy ############################# 79 | 80 | # Messages are immediately written to the filesystem but by default we only fsync() to sync 81 | # the OS cache lazily. The following configurations control the flush of data to disk. 82 | # There are a few important trade-offs here: 83 | # 1. Durability: Unflushed data may be lost if you are not using replication. 84 | # 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. 85 | # 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to excessive seeks. 86 | # The settings below allow one to configure the flush policy to flush data after a period of time or 87 | # every N messages (or both). This can be done globally and overridden on a per-topic basis. 88 | 89 | # The number of messages to accept before forcing a flush of data to disk 90 | #log.flush.interval.messages=10000 91 | 92 | # The maximum amount of time a message can sit in a log before we force a flush 93 | #log.flush.interval.ms=1000 94 | 95 | ############################# Log Retention Policy ############################# 96 | 97 | # The following configurations control the disposal of log segments. The policy can 98 | # be set to delete segments after a period of time, or after a given size has accumulated. 99 | # A segment will be deleted whenever *either* of these criteria are met. Deletion always happens 100 | # from the end of the log. 101 | 102 | # The minimum age of a log file to be eligible for deletion due to age 103 | log.retention.hours=168 104 | 105 | # A size-based retention policy for logs. Segments are pruned from the log unless the remaining 106 | # segments drop below log.retention.bytes. Functions independently of log.retention.hours. 107 | #log.retention.bytes=1073741824 108 | 109 | # The maximum size of a log segment file. When this size is reached a new log segment will be created. 110 | log.segment.bytes=1073741824 111 | 112 | # The interval at which log segments are checked to see if they can be deleted according 113 | # to the retention policies 114 | log.retention.check.interval.ms=300000 115 | 116 | ############################# Zookeeper ############################# 117 | 118 | # Zookeeper connection string (see zookeeper docs for details). 119 | # This is a comma separated host:port pairs, each corresponding to a zk 120 | # server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". 121 | # You can also append an optional chroot string to the urls to specify the 122 | # root directory for all kafka znodes. 123 | zookeeper.connect=localhost:2181 124 | 125 | # Timeout in ms for connecting to zookeeper 126 | zookeeper.connection.timeout.ms=6000 127 | 128 | 129 | ############################# Group Coordinator Settings ############################# 130 | 131 | # The following configuration specifies the time, in milliseconds, that the GroupCoordinator will delay the initial consumer rebalance. 132 | # The rebalance will be further delayed by the value of group.initial.rebalance.delay.ms as new members join the group, up to a maximum of max.poll.interval.ms. 133 | # The default value for this is 3 seconds. 134 | # We override this to 0 here as it makes for a better out-of-the-box experience for development and testing. 135 | # However, in production environments the default value of 3 seconds is more suitable as this will help to avoid unnecessary, and potentially expensive, rebalances during application startup. 136 | group.initial.rebalance.delay.ms=0 137 | -------------------------------------------------------------------------------- /EXAMPLES/04 Consumer Groups/basic-kafka-commands.txt: -------------------------------------------------------------------------------- 1 | Basic KAFKA Commands 2 | 3 | START ZOOKEEPER 4 | bin/zookeeper-server-start.sh config/zookeeper.properties 5 | 6 | START KAFKA BROKER 7 | bin/kafka-server-start.sh config/server.properties 8 | 9 | CREATE TOPIC 10 | bin/kafka-topics.sh \ 11 | --bootstrap-server localhost:9092 \ 12 | --create \ 13 | --replication-factor 1 \ 14 | --partitions 5 \ 15 | --topic numbers 16 | 17 | LIST TOPICS 18 | bin/kafka-topics.sh \ 19 | --bootstrap-server localhost:9092 \ 20 | --list 21 | 22 | TOPIC DETAILS 23 | bin/kafka-topics.sh \ 24 | --bootstrap-server localhost:9092 \ 25 | --describe \ 26 | --topic numbers 27 | 28 | START CONSOLE PRODUCER 29 | bin/kafka-console-producer.sh \ 30 | --broker-list localhost:9092 \ 31 | --topic numbers 32 | 33 | START CONSOLE CONSUMER 34 | bin/kafka-console-consumer.sh \ 35 | --bootstrap-server localhost:9092 \ 36 | --topic numbers 37 | 38 | START CONSOLE CONSUMER AND READ FROM SPECIFIC PARTITION 39 | bin/kafka-console-consumer.sh \ 40 | --bootstrap-server localhost:9092 \ 41 | --partition 4 \ 42 | --from-beginning \ 43 | --topic numbers 44 | 45 | START CONSOLE CONSUMER AND READ MESSAGES FROM BEGINNING 46 | bin/kafka-console-consumer.sh \ 47 | --bootstrap-server localhost:9092 \ 48 | --topic numbers \ 49 | --from-beginning 50 | 51 | START CONSOLE CONSUMER WITH SPECIFIC CONSUMER GROUP 52 | bin/kafka-console-consumer.sh \ 53 | --bootstrap-server localhost:9092 \ 54 | --topic numbers \ 55 | --group nums \ 56 | --from-beginning 57 | 58 | LIST CONSUMER GROUPS 59 | bin/kafka-consumer-groups.sh \ 60 | --bootstrap-server localhost:9092 \ 61 | --list 62 | 63 | CONSUMER GROUP DETAILS 64 | bin/kafka-consumer-groups.sh \ 65 | --bootstrap-server localhost:9092 \ 66 | --group nums \ 67 | --describe 68 | 69 | -------------------------------------------------------------------------------- /EXAMPLES/05 Performance Testing/basic-kafka-commands.txt: -------------------------------------------------------------------------------- 1 | Basic KAFKA Commands 2 | 3 | START ZOOKEEPER 4 | bin/zookeeper-server-start.sh config/zookeeper.properties 5 | 6 | START KAFKA BROKER 7 | bin/kafka-server-start.sh config/server0.properties 8 | bin/kafka-server-start.sh config/server1.properties 9 | bin/kafka-server-start.sh config/server2.properties 10 | 11 | CREATE TOPIC 12 | bin/kafka-topics.sh \ 13 | --bootstrap-server localhost:9092 \ 14 | --create \ 15 | --replication-factor 3 \ 16 | --partitions 100 \ 17 | --topic perf 18 | 19 | LIST TOPICS 20 | bin/kafka-topics.sh \ 21 | --bootstrap-server localhost:9092 \ 22 | --list 23 | 24 | TOPIC DETAILS 25 | bin/kafka-topics.sh \ 26 | --bootstrap-server localhost:9092 \ 27 | --describe \ 28 | --topic perf 29 | 30 | START CONSOLE PRODUCER 31 | bin/kafka-console-producer.sh \ 32 | --broker-list localhost:9092 \ 33 | --topic perf 34 | 35 | START CONSOLE CONSUMER 36 | bin/kafka-console-consumer.sh \ 37 | --bootstrap-server localhost:9092 \ 38 | --topic perf 39 | 40 | START CONSOLE CONSUMER AND READ FROM SPECIFIC PARTITION 41 | bin/kafka-console-consumer.sh \ 42 | --bootstrap-server localhost:9092 \ 43 | --partition 4 \ 44 | --from-beginning \ 45 | --topic perf 46 | 47 | START CONSOLE CONSUMER AND READ MESSAGES FROM BEGINNING 48 | bin/kafka-console-consumer.sh \ 49 | --bootstrap-server localhost:9092 \ 50 | --topic perf \ 51 | --from-beginning 52 | 53 | START CONSOLE CONSUMER WITH SPECIFIC CONSUMER GROUP 54 | bin/kafka-console-consumer.sh \ 55 | --bootstrap-server localhost:9092 \ 56 | --topic perf2 \ 57 | --group perf \ 58 | --from-beginning 59 | 60 | LIST CONSUMER GROUPS 61 | bin/kafka-consumer-groups.sh \ 62 | --bootstrap-server localhost:9092 \ 63 | --list 64 | 65 | CONSUMER GROUP DETAILS 66 | bin/kafka-consumer-groups.sh \ 67 | --bootstrap-server localhost:9092 \ 68 | --group perf \ 69 | --describe 70 | 71 | PRODUCER PERFORMANCE TEST 72 | bin/kafka-producer-perf-test.sh \ 73 | --topic perf2 \ 74 | --num-records 1000 \ 75 | --throughput 10 \ 76 | --record-size 1000 \ 77 | --producer-props \ 78 | bootstrap.servers=localhost:9092 79 | 80 | 81 | CONSUMER PERFORMANCE TEST 82 | bin/kafka-consumer-perf-test.sh \ 83 | --broker-list localhost:9092 \ 84 | --topic perf \ 85 | --messages 1000000 86 | 87 | -------------------------------------------------------------------------------- /EXAMPLES/05 Performance Testing/config/server0.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # see kafka.server.KafkaConfig for additional details and defaults 17 | 18 | ############################# Server Basics ############################# 19 | 20 | # The id of the broker. This must be set to a unique integer for each broker. 21 | broker.id=0 22 | 23 | ############################# Socket Server Settings ############################# 24 | 25 | # The address the socket server listens on. It will get the value returned from 26 | # java.net.InetAddress.getCanonicalHostName() if not configured. 27 | # FORMAT: 28 | # listeners = listener_name://host_name:port 29 | # EXAMPLE: 30 | # listeners = PLAINTEXT://your.host.name:9092 31 | #listeners=PLAINTEXT://:9092 32 | 33 | # Hostname and port the broker will advertise to producers and consumers. If not set, 34 | # it uses the value for "listeners" if configured. Otherwise, it will use the value 35 | # returned from java.net.InetAddress.getCanonicalHostName(). 36 | #advertised.listeners=PLAINTEXT://your.host.name:9092 37 | 38 | # Maps listener names to security protocols, the default is for them to be the same. See the config documentation for more details 39 | #listener.security.protocol.map=PLAINTEXT:PLAINTEXT,SSL:SSL,SASL_PLAINTEXT:SASL_PLAINTEXT,SASL_SSL:SASL_SSL 40 | 41 | # The number of threads that the server uses for receiving requests from the network and sending responses to the network 42 | num.network.threads=3 43 | 44 | # The number of threads that the server uses for processing requests, which may include disk I/O 45 | num.io.threads=8 46 | 47 | # The send buffer (SO_SNDBUF) used by the socket server 48 | socket.send.buffer.bytes=102400 49 | 50 | # The receive buffer (SO_RCVBUF) used by the socket server 51 | socket.receive.buffer.bytes=102400 52 | 53 | # The maximum size of a request that the socket server will accept (protection against OOM) 54 | socket.request.max.bytes=104857600 55 | 56 | 57 | ############################# Log Basics ############################# 58 | 59 | # A comma separated list of directories under which to store log files 60 | log.dirs=/tmp/kafka-logs-0 61 | 62 | # The default number of log partitions per topic. More partitions allow greater 63 | # parallelism for consumption, but this will also result in more files across 64 | # the brokers. 65 | num.partitions=1 66 | 67 | # The number of threads per data directory to be used for log recovery at startup and flushing at shutdown. 68 | # This value is recommended to be increased for installations with data dirs located in RAID array. 69 | num.recovery.threads.per.data.dir=1 70 | 71 | ############################# Internal Topic Settings ############################# 72 | # The replication factor for the group metadata internal topics "__consumer_offsets" and "__transaction_state" 73 | # For anything other than development testing, a value greater than 1 is recommended to ensure availability such as 3. 74 | offsets.topic.replication.factor=1 75 | transaction.state.log.replication.factor=1 76 | transaction.state.log.min.isr=1 77 | 78 | ############################# Log Flush Policy ############################# 79 | 80 | # Messages are immediately written to the filesystem but by default we only fsync() to sync 81 | # the OS cache lazily. The following configurations control the flush of data to disk. 82 | # There are a few important trade-offs here: 83 | # 1. Durability: Unflushed data may be lost if you are not using replication. 84 | # 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. 85 | # 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to excessive seeks. 86 | # The settings below allow one to configure the flush policy to flush data after a period of time or 87 | # every N messages (or both). This can be done globally and overridden on a per-topic basis. 88 | 89 | # The number of messages to accept before forcing a flush of data to disk 90 | #log.flush.interval.messages=10000 91 | 92 | # The maximum amount of time a message can sit in a log before we force a flush 93 | #log.flush.interval.ms=1000 94 | 95 | ############################# Log Retention Policy ############################# 96 | 97 | # The following configurations control the disposal of log segments. The policy can 98 | # be set to delete segments after a period of time, or after a given size has accumulated. 99 | # A segment will be deleted whenever *either* of these criteria are met. Deletion always happens 100 | # from the end of the log. 101 | 102 | # The minimum age of a log file to be eligible for deletion due to age 103 | log.retention.hours=168 104 | 105 | # A size-based retention policy for logs. Segments are pruned from the log unless the remaining 106 | # segments drop below log.retention.bytes. Functions independently of log.retention.hours. 107 | #log.retention.bytes=1073741824 108 | 109 | # The maximum size of a log segment file. When this size is reached a new log segment will be created. 110 | log.segment.bytes=1073741824 111 | 112 | # The interval at which log segments are checked to see if they can be deleted according 113 | # to the retention policies 114 | log.retention.check.interval.ms=300000 115 | 116 | ############################# Zookeeper ############################# 117 | 118 | # Zookeeper connection string (see zookeeper docs for details). 119 | # This is a comma separated host:port pairs, each corresponding to a zk 120 | # server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". 121 | # You can also append an optional chroot string to the urls to specify the 122 | # root directory for all kafka znodes. 123 | zookeeper.connect=localhost:2181 124 | 125 | # Timeout in ms for connecting to zookeeper 126 | zookeeper.connection.timeout.ms=6000 127 | 128 | 129 | ############################# Group Coordinator Settings ############################# 130 | 131 | # The following configuration specifies the time, in milliseconds, that the GroupCoordinator will delay the initial consumer rebalance. 132 | # The rebalance will be further delayed by the value of group.initial.rebalance.delay.ms as new members join the group, up to a maximum of max.poll.interval.ms. 133 | # The default value for this is 3 seconds. 134 | # We override this to 0 here as it makes for a better out-of-the-box experience for development and testing. 135 | # However, in production environments the default value of 3 seconds is more suitable as this will help to avoid unnecessary, and potentially expensive, rebalances during application startup. 136 | group.initial.rebalance.delay.ms=0 137 | -------------------------------------------------------------------------------- /EXAMPLES/05 Performance Testing/config/server1.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # see kafka.server.KafkaConfig for additional details and defaults 17 | 18 | ############################# Server Basics ############################# 19 | 20 | # The id of the broker. This must be set to a unique integer for each broker. 21 | broker.id=1 22 | 23 | ############################# Socket Server Settings ############################# 24 | 25 | # The address the socket server listens on. It will get the value returned from 26 | # java.net.InetAddress.getCanonicalHostName() if not configured. 27 | # FORMAT: 28 | # listeners = listener_name://host_name:port 29 | # EXAMPLE: 30 | # listeners = PLAINTEXT://your.host.name:9092 31 | listeners=PLAINTEXT://:9093 32 | 33 | # Hostname and port the broker will advertise to producers and consumers. If not set, 34 | # it uses the value for "listeners" if configured. Otherwise, it will use the value 35 | # returned from java.net.InetAddress.getCanonicalHostName(). 36 | #advertised.listeners=PLAINTEXT://your.host.name:9092 37 | 38 | # Maps listener names to security protocols, the default is for them to be the same. See the config documentation for more details 39 | #listener.security.protocol.map=PLAINTEXT:PLAINTEXT,SSL:SSL,SASL_PLAINTEXT:SASL_PLAINTEXT,SASL_SSL:SASL_SSL 40 | 41 | # The number of threads that the server uses for receiving requests from the network and sending responses to the network 42 | num.network.threads=3 43 | 44 | # The number of threads that the server uses for processing requests, which may include disk I/O 45 | num.io.threads=8 46 | 47 | # The send buffer (SO_SNDBUF) used by the socket server 48 | socket.send.buffer.bytes=102400 49 | 50 | # The receive buffer (SO_RCVBUF) used by the socket server 51 | socket.receive.buffer.bytes=102400 52 | 53 | # The maximum size of a request that the socket server will accept (protection against OOM) 54 | socket.request.max.bytes=104857600 55 | 56 | 57 | ############################# Log Basics ############################# 58 | 59 | # A comma separated list of directories under which to store log files 60 | log.dirs=/tmp/kafka-logs-1 61 | 62 | # The default number of log partitions per topic. More partitions allow greater 63 | # parallelism for consumption, but this will also result in more files across 64 | # the brokers. 65 | num.partitions=1 66 | 67 | # The number of threads per data directory to be used for log recovery at startup and flushing at shutdown. 68 | # This value is recommended to be increased for installations with data dirs located in RAID array. 69 | num.recovery.threads.per.data.dir=1 70 | 71 | ############################# Internal Topic Settings ############################# 72 | # The replication factor for the group metadata internal topics "__consumer_offsets" and "__transaction_state" 73 | # For anything other than development testing, a value greater than 1 is recommended to ensure availability such as 3. 74 | offsets.topic.replication.factor=1 75 | transaction.state.log.replication.factor=1 76 | transaction.state.log.min.isr=1 77 | 78 | ############################# Log Flush Policy ############################# 79 | 80 | # Messages are immediately written to the filesystem but by default we only fsync() to sync 81 | # the OS cache lazily. The following configurations control the flush of data to disk. 82 | # There are a few important trade-offs here: 83 | # 1. Durability: Unflushed data may be lost if you are not using replication. 84 | # 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. 85 | # 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to excessive seeks. 86 | # The settings below allow one to configure the flush policy to flush data after a period of time or 87 | # every N messages (or both). This can be done globally and overridden on a per-topic basis. 88 | 89 | # The number of messages to accept before forcing a flush of data to disk 90 | #log.flush.interval.messages=10000 91 | 92 | # The maximum amount of time a message can sit in a log before we force a flush 93 | #log.flush.interval.ms=1000 94 | 95 | ############################# Log Retention Policy ############################# 96 | 97 | # The following configurations control the disposal of log segments. The policy can 98 | # be set to delete segments after a period of time, or after a given size has accumulated. 99 | # A segment will be deleted whenever *either* of these criteria are met. Deletion always happens 100 | # from the end of the log. 101 | 102 | # The minimum age of a log file to be eligible for deletion due to age 103 | log.retention.hours=168 104 | 105 | # A size-based retention policy for logs. Segments are pruned from the log unless the remaining 106 | # segments drop below log.retention.bytes. Functions independently of log.retention.hours. 107 | #log.retention.bytes=1073741824 108 | 109 | # The maximum size of a log segment file. When this size is reached a new log segment will be created. 110 | log.segment.bytes=1073741824 111 | 112 | # The interval at which log segments are checked to see if they can be deleted according 113 | # to the retention policies 114 | log.retention.check.interval.ms=300000 115 | 116 | ############################# Zookeeper ############################# 117 | 118 | # Zookeeper connection string (see zookeeper docs for details). 119 | # This is a comma separated host:port pairs, each corresponding to a zk 120 | # server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". 121 | # You can also append an optional chroot string to the urls to specify the 122 | # root directory for all kafka znodes. 123 | zookeeper.connect=localhost:2181 124 | 125 | # Timeout in ms for connecting to zookeeper 126 | zookeeper.connection.timeout.ms=6000 127 | 128 | 129 | ############################# Group Coordinator Settings ############################# 130 | 131 | # The following configuration specifies the time, in milliseconds, that the GroupCoordinator will delay the initial consumer rebalance. 132 | # The rebalance will be further delayed by the value of group.initial.rebalance.delay.ms as new members join the group, up to a maximum of max.poll.interval.ms. 133 | # The default value for this is 3 seconds. 134 | # We override this to 0 here as it makes for a better out-of-the-box experience for development and testing. 135 | # However, in production environments the default value of 3 seconds is more suitable as this will help to avoid unnecessary, and potentially expensive, rebalances during application startup. 136 | group.initial.rebalance.delay.ms=0 137 | -------------------------------------------------------------------------------- /EXAMPLES/05 Performance Testing/config/server2.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # see kafka.server.KafkaConfig for additional details and defaults 17 | 18 | ############################# Server Basics ############################# 19 | 20 | # The id of the broker. This must be set to a unique integer for each broker. 21 | broker.id=2 22 | 23 | ############################# Socket Server Settings ############################# 24 | 25 | # The address the socket server listens on. It will get the value returned from 26 | # java.net.InetAddress.getCanonicalHostName() if not configured. 27 | # FORMAT: 28 | # listeners = listener_name://host_name:port 29 | # EXAMPLE: 30 | # listeners = PLAINTEXT://your.host.name:9092 31 | listeners=PLAINTEXT://:9094 32 | 33 | # Hostname and port the broker will advertise to producers and consumers. If not set, 34 | # it uses the value for "listeners" if configured. Otherwise, it will use the value 35 | # returned from java.net.InetAddress.getCanonicalHostName(). 36 | #advertised.listeners=PLAINTEXT://your.host.name:9092 37 | 38 | # Maps listener names to security protocols, the default is for them to be the same. See the config documentation for more details 39 | #listener.security.protocol.map=PLAINTEXT:PLAINTEXT,SSL:SSL,SASL_PLAINTEXT:SASL_PLAINTEXT,SASL_SSL:SASL_SSL 40 | 41 | # The number of threads that the server uses for receiving requests from the network and sending responses to the network 42 | num.network.threads=3 43 | 44 | # The number of threads that the server uses for processing requests, which may include disk I/O 45 | num.io.threads=8 46 | 47 | # The send buffer (SO_SNDBUF) used by the socket server 48 | socket.send.buffer.bytes=102400 49 | 50 | # The receive buffer (SO_RCVBUF) used by the socket server 51 | socket.receive.buffer.bytes=102400 52 | 53 | # The maximum size of a request that the socket server will accept (protection against OOM) 54 | socket.request.max.bytes=104857600 55 | 56 | 57 | ############################# Log Basics ############################# 58 | 59 | # A comma separated list of directories under which to store log files 60 | log.dirs=/tmp/kafka-logs-2 61 | 62 | # The default number of log partitions per topic. More partitions allow greater 63 | # parallelism for consumption, but this will also result in more files across 64 | # the brokers. 65 | num.partitions=1 66 | 67 | # The number of threads per data directory to be used for log recovery at startup and flushing at shutdown. 68 | # This value is recommended to be increased for installations with data dirs located in RAID array. 69 | num.recovery.threads.per.data.dir=1 70 | 71 | ############################# Internal Topic Settings ############################# 72 | # The replication factor for the group metadata internal topics "__consumer_offsets" and "__transaction_state" 73 | # For anything other than development testing, a value greater than 1 is recommended to ensure availability such as 3. 74 | offsets.topic.replication.factor=1 75 | transaction.state.log.replication.factor=1 76 | transaction.state.log.min.isr=1 77 | 78 | ############################# Log Flush Policy ############################# 79 | 80 | # Messages are immediately written to the filesystem but by default we only fsync() to sync 81 | # the OS cache lazily. The following configurations control the flush of data to disk. 82 | # There are a few important trade-offs here: 83 | # 1. Durability: Unflushed data may be lost if you are not using replication. 84 | # 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. 85 | # 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to excessive seeks. 86 | # The settings below allow one to configure the flush policy to flush data after a period of time or 87 | # every N messages (or both). This can be done globally and overridden on a per-topic basis. 88 | 89 | # The number of messages to accept before forcing a flush of data to disk 90 | #log.flush.interval.messages=10000 91 | 92 | # The maximum amount of time a message can sit in a log before we force a flush 93 | #log.flush.interval.ms=1000 94 | 95 | ############################# Log Retention Policy ############################# 96 | 97 | # The following configurations control the disposal of log segments. The policy can 98 | # be set to delete segments after a period of time, or after a given size has accumulated. 99 | # A segment will be deleted whenever *either* of these criteria are met. Deletion always happens 100 | # from the end of the log. 101 | 102 | # The minimum age of a log file to be eligible for deletion due to age 103 | log.retention.hours=168 104 | 105 | # A size-based retention policy for logs. Segments are pruned from the log unless the remaining 106 | # segments drop below log.retention.bytes. Functions independently of log.retention.hours. 107 | #log.retention.bytes=1073741824 108 | 109 | # The maximum size of a log segment file. When this size is reached a new log segment will be created. 110 | log.segment.bytes=1073741824 111 | 112 | # The interval at which log segments are checked to see if they can be deleted according 113 | # to the retention policies 114 | log.retention.check.interval.ms=300000 115 | 116 | ############################# Zookeeper ############################# 117 | 118 | # Zookeeper connection string (see zookeeper docs for details). 119 | # This is a comma separated host:port pairs, each corresponding to a zk 120 | # server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". 121 | # You can also append an optional chroot string to the urls to specify the 122 | # root directory for all kafka znodes. 123 | zookeeper.connect=localhost:2181 124 | 125 | # Timeout in ms for connecting to zookeeper 126 | zookeeper.connection.timeout.ms=6000 127 | 128 | 129 | ############################# Group Coordinator Settings ############################# 130 | 131 | # The following configuration specifies the time, in milliseconds, that the GroupCoordinator will delay the initial consumer rebalance. 132 | # The rebalance will be further delayed by the value of group.initial.rebalance.delay.ms as new members join the group, up to a maximum of max.poll.interval.ms. 133 | # The default value for this is 3 seconds. 134 | # We override this to 0 here as it makes for a better out-of-the-box experience for development and testing. 135 | # However, in production environments the default value of 3 seconds is more suitable as this will help to avoid unnecessary, and potentially expensive, rebalances during application startup. 136 | group.initial.rebalance.delay.ms=0 137 | -------------------------------------------------------------------------------- /PROJECTS/Java/Kafka/.idea/compiler.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /PROJECTS/Java/Kafka/.idea/dictionaries/bogdan.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | acks 5 | 6 | 7 | -------------------------------------------------------------------------------- /PROJECTS/Java/Kafka/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /PROJECTS/Java/Kafka/.idea/workspace.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 10 | 11 | 16 | 17 | 18 | 23 | 24 | 25 | 26 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 42 | 43 | 49 | 50 | 56 | 57 | 63 | 64 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 1582090995389 94 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | -------------------------------------------------------------------------------- /PROJECTS/Java/Kafka/Kafka.iml: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /PROJECTS/Java/Kafka/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | com.stashchuk 8 | Kafka 9 | 1.0-SNAPSHOT 10 | 11 | 12 | org.apache.kafka 13 | kafka-clients 14 | 2.4.0 15 | 16 | 17 | org.slf4j 18 | slf4j-simple 19 | 1.7.30 20 | 21 | 22 | 23 | 24 | 25 | 26 | 1.8 27 | 1.8 28 | 29 | 30 | -------------------------------------------------------------------------------- /PROJECTS/Java/Kafka/src/main/java/ConsumerWithAutoCommit.java: -------------------------------------------------------------------------------- 1 | import org.apache.kafka.clients.consumer.*; 2 | 3 | import java.time.Duration; 4 | import java.util.*; 5 | 6 | public class ConsumerWithAutoCommit { 7 | public static void main(String[] args) { 8 | Properties props = new Properties(); 9 | props.put("bootstrap.servers", "localhost:9092, localhost:9093, localhost:9094"); 10 | props.put("group.id", "first-group"); 11 | props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); 12 | props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); 13 | props.put("enable.auto.commit", "true"); 14 | props.put("auto.commit.interval.ms", "1000"); 15 | 16 | String topics[] = {"numbers"}; 17 | 18 | KafkaConsumer consumer = new KafkaConsumer(props); 19 | consumer.subscribe(Arrays.asList(topics)); 20 | 21 | try { 22 | while (true) { 23 | ConsumerRecords records = consumer.poll(Duration.ofMillis(100)); 24 | for (ConsumerRecord record : records) { 25 | String message = String.format("offset = %d, key = %s, value = %s, partition = %s%n", record.offset(), record.key(), record.value(), record.partition()); 26 | System.out.println(message); 27 | } 28 | } 29 | } catch (Exception e) { 30 | e.printStackTrace(); 31 | } finally { 32 | consumer.close(); 33 | } 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /PROJECTS/Java/Kafka/src/main/java/ConsumerWithManualCommit.java: -------------------------------------------------------------------------------- 1 | import org.apache.kafka.clients.consumer.*; 2 | 3 | import java.io.FileWriter; 4 | import java.io.IOException; 5 | import java.time.Duration; 6 | import java.util.*; 7 | 8 | public class ConsumerWithManualCommit { 9 | public static void main(String[] args) throws IOException { 10 | Properties props = new Properties(); 11 | props.put("bootstrap.servers", "localhost:9092, localhost:9093, localhost:9094"); 12 | props.put("group.id", "second-group"); 13 | props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); 14 | props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); 15 | 16 | String topics[] = {"numbers"}; 17 | 18 | KafkaConsumer consumer = new KafkaConsumer(props); 19 | consumer.subscribe(Arrays.asList(topics)); 20 | 21 | final int minBatchSize = 200; 22 | List> buffer = new ArrayList<>(); 23 | // Change path here to match path on your computer 24 | FileWriter fileWriter = new FileWriter("/Users/bogdan/Desktop/numbers.txt",true); 25 | 26 | try { 27 | while (true) { 28 | ConsumerRecords records = consumer.poll(Duration.ofMillis(100)); 29 | for (ConsumerRecord record : records) { 30 | buffer.add(record); 31 | String message = String.format("offset = %d, key = %s, value = %s, partition = %s%n", record.offset(), record.key(), record.value(), record.partition()); 32 | System.out.println(message); 33 | } 34 | if (buffer.size() >= minBatchSize) { 35 | // Write to file 36 | fileWriter.append(buffer.toString()); 37 | consumer.commitSync(); 38 | buffer.clear(); 39 | } 40 | } 41 | } catch (Exception e) { 42 | e.printStackTrace(); 43 | } finally { 44 | consumer.close(); 45 | fileWriter.close(); 46 | } 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /PROJECTS/Java/Kafka/src/main/java/ConsumerWithPartitionsAssignment.java: -------------------------------------------------------------------------------- 1 | import org.apache.kafka.clients.consumer.*; 2 | import org.apache.kafka.common.TopicPartition; 3 | 4 | import java.time.Duration; 5 | import java.util.*; 6 | 7 | public class ConsumerWithPartitionsAssignment { 8 | public static void main(String[] args) { 9 | Properties props = new Properties(); 10 | props.put("bootstrap.servers", "localhost:9092, localhost:9093, localhost:9094"); 11 | props.put("group.id", "third-group"); 12 | props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); 13 | props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); 14 | props.put("enable.auto.commit", "true"); 15 | props.put("auto.commit.interval.ms", "1000"); 16 | 17 | String topic = "numbers"; 18 | TopicPartition partitions[] = { 19 | new TopicPartition(topic, 2), 20 | new TopicPartition(topic,4) 21 | }; 22 | 23 | KafkaConsumer consumer = new KafkaConsumer(props); 24 | consumer.assign(Arrays.asList(partitions)); 25 | 26 | try { 27 | while (true) { 28 | ConsumerRecords records = consumer.poll(Duration.ofMillis(100)); 29 | for (ConsumerRecord record : records) { 30 | String message = String.format("offset = %d, key = %s, value = %s, partition = %s%n", record.offset(), record.key(), record.value(), record.partition()); 31 | System.out.println(message); 32 | } 33 | } 34 | } catch (Exception e) { 35 | e.printStackTrace(); 36 | } finally { 37 | consumer.close(); 38 | } 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /PROJECTS/Java/Kafka/src/main/java/Producer.java: -------------------------------------------------------------------------------- 1 | import org.apache.kafka.clients.producer.*; 2 | 3 | import java.util.Date; 4 | import java.util.Properties; 5 | 6 | public class Producer { 7 | public static void main(String[] args) { 8 | String clientId = "my-producer"; 9 | 10 | Properties props = new Properties(); 11 | props.put("bootstrap.servers", "localhost:9092, localhost:9093, localhost:9094"); 12 | props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer"); 13 | props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer"); 14 | props.put("acks", "all"); 15 | props.put("client.id", clientId); 16 | 17 | KafkaProducer producer = new KafkaProducer(props); 18 | 19 | int numOfRecords = 100; 20 | String topic = "numbers"; 21 | 22 | // EXAMPLE 1 - Numbers as strings for key and value without any delay 23 | for (int i = 0; i < numOfRecords; i++) { 24 | System.out.println("Message " + i + " was just sent"); 25 | producer.send(new ProducerRecord<>(topic, Integer.toString(i), Integer.toString(i))); 26 | } 27 | producer.close(); 28 | 29 | // // EXAMPLE 2 - Formatted string as message and messages are sent with 300ms delay (3 messages / second) 30 | // try { 31 | // for (int i = 0; i < numOfRecords; i++) { 32 | // String message = String.format("Producer %s has sent message %s at %s", clientId, i, new Date()); 33 | // System.out.println(message); 34 | // producer.send(new ProducerRecord<>(topic, Integer.toString(i), message)); 35 | // Thread.sleep(300); 36 | // } 37 | // } catch (Exception e) { 38 | // e.printStackTrace(); 39 | // } finally { 40 | // producer.close(); 41 | // } 42 | 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /PROJECTS/Java/Kafka/target/classes/ConsumerWithAutoCommit.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bstashchuk/apache-kafka-course/3831b82d56f471b0b35edacac2c33b61520ce5a6/PROJECTS/Java/Kafka/target/classes/ConsumerWithAutoCommit.class -------------------------------------------------------------------------------- /PROJECTS/Java/Kafka/target/classes/ConsumerWithManualCommit.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bstashchuk/apache-kafka-course/3831b82d56f471b0b35edacac2c33b61520ce5a6/PROJECTS/Java/Kafka/target/classes/ConsumerWithManualCommit.class -------------------------------------------------------------------------------- /PROJECTS/Java/Kafka/target/classes/ConsumerWithPartitionsAssignment.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bstashchuk/apache-kafka-course/3831b82d56f471b0b35edacac2c33b61520ce5a6/PROJECTS/Java/Kafka/target/classes/ConsumerWithPartitionsAssignment.class -------------------------------------------------------------------------------- /PROJECTS/Java/Kafka/target/classes/META-INF/Kafka.kotlin_module: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /PROJECTS/Java/Kafka/target/classes/Producer.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bstashchuk/apache-kafka-course/3831b82d56f471b0b35edacac2c33b61520ce5a6/PROJECTS/Java/Kafka/target/classes/Producer.class -------------------------------------------------------------------------------- /PROJECTS/Node/kafka/consumer.js: -------------------------------------------------------------------------------- 1 | const { Kafka } = require('kafkajs') 2 | 3 | const kafka = new Kafka({ 4 | clientId: 'my-consumer', 5 | brokers: ['localhost:9092', 'localhost:9093', 'localhost:9094'] 6 | }) 7 | 8 | const consumer = kafka.consumer({ groupId: 'consumer-group' }) 9 | const topic = 'animals' 10 | 11 | const run = async () => { 12 | // Consuming 13 | await consumer.connect() 14 | 15 | await consumer.subscribe({ topic }) 16 | 17 | await consumer.run({ 18 | eachMessage: async ({ partition, message }) => { 19 | console.log({ 20 | partition, 21 | offset: message.offset, 22 | value: message.value.toString(), 23 | }) 24 | }, 25 | }) 26 | } 27 | 28 | run().catch(console.error) -------------------------------------------------------------------------------- /PROJECTS/Node/kafka/package-lock.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "kafka", 3 | "version": "1.0.0", 4 | "lockfileVersion": 1, 5 | "requires": true, 6 | "dependencies": { 7 | "chance": { 8 | "version": "1.1.4", 9 | "resolved": "https://registry.npmjs.org/chance/-/chance-1.1.4.tgz", 10 | "integrity": "sha512-pXPDSu3knKlb6H7ahQfpq//J9mSOxYK8SMtp8MV/nRJh8aLRDIl0ipLH8At8+nVogVwtvPZzyIzY/EbcY/cLuQ==" 11 | }, 12 | "kafkajs": { 13 | "version": "1.12.0", 14 | "resolved": "https://registry.npmjs.org/kafkajs/-/kafkajs-1.12.0.tgz", 15 | "integrity": "sha512-Izkd9iFRgeeKaHEgVpGQH08ygzCbHSxTbnu8W3G3uiNaVjGibUTmTwjv1Qf2M8NORXcPfzwVyg6bBlVj4SKr9g==", 16 | "requires": { 17 | "long": "^4.0.0" 18 | } 19 | }, 20 | "long": { 21 | "version": "4.0.0", 22 | "resolved": "https://registry.npmjs.org/long/-/long-4.0.0.tgz", 23 | "integrity": "sha512-XsP+KhQif4bjX1kbuSiySJFNAehNxgLb6hPRGJ9QsUr8ajHkuXGdrHmFUTUUXhDwVX2R5bY4JNZEwbUiMhV+MA==" 24 | } 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /PROJECTS/Node/kafka/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "kafka", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1" 8 | }, 9 | "author": "", 10 | "license": "ISC", 11 | "dependencies": { 12 | "chance": "1.1.4", 13 | "kafkajs": "1.12.0" 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /PROJECTS/Node/kafka/producer.js: -------------------------------------------------------------------------------- 1 | const { Kafka } = require('kafkajs') 2 | const Chance = require('chance') 3 | 4 | const chance = new Chance() 5 | 6 | const kafka = new Kafka({ 7 | clientId: 'my-producer', 8 | brokers: ['localhost:9092', 'localhost:9093', 'localhost:9094'] 9 | }) 10 | 11 | const producer = kafka.producer() 12 | const topic = 'animals' 13 | 14 | const produceMessage = async () => { 15 | const value = chance.animal(); 16 | console.log(value); 17 | 18 | try { 19 | await producer.send({ 20 | topic, 21 | messages: [ 22 | { value }, 23 | ], 24 | }) 25 | } catch (error) { 26 | console.log(error); 27 | } 28 | } 29 | 30 | const run = async () => { 31 | // Producing 32 | await producer.connect() 33 | setInterval(produceMessage, 1000) 34 | } 35 | 36 | run().catch(console.error) -------------------------------------------------------------------------------- /PROJECTS/Python/kafka/consumer.py: -------------------------------------------------------------------------------- 1 | from kafka import KafkaConsumer 2 | consumer = KafkaConsumer( 3 | 'names', 4 | bootstrap_servers=['localhost:9092', 'localhost:9093', 'localhost:9094'], 5 | group_id='names-consumer-group' 6 | ) 7 | 8 | for message in consumer: 9 | print(message) -------------------------------------------------------------------------------- /PROJECTS/Python/kafka/producer.py: -------------------------------------------------------------------------------- 1 | import time 2 | from kafka import KafkaProducer 3 | from faker import Faker 4 | fake = Faker() 5 | producer = KafkaProducer(bootstrap_servers=['localhost:9092', 'localhost:9093', 'localhost:9094']) 6 | for _ in range(100): 7 | name = fake.name() 8 | producer.send('names', name.encode('utf-8')) 9 | print(name) 10 | time.sleep(20) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # apache-kafka-course 2 | This repository is for the Apache Kafka Course 3 | -------------------------------------------------------------------------------- /basic-kafka-commands.txt: -------------------------------------------------------------------------------- 1 | Basic KAFKA Commands 2 | 3 | START ZOOKEEPER 4 | bin/zookeeper-server-start.sh config/zookeeper.properties 5 | 6 | START KAFKA BROKER 7 | bin/kafka-server-start.sh config/server.properties 8 | 9 | CREATE TOPIC 10 | bin/kafka-topics.sh \ 11 | --bootstrap-server localhost:9092 \ 12 | --create \ 13 | --replication-factor 1 \ 14 | --partitions 3 \ 15 | --topic test 16 | 17 | LIST TOPICS 18 | bin/kafka-topics.sh \ 19 | --bootstrap-server localhost:9092 \ 20 | --list 21 | 22 | TOPIC DETAILS 23 | bin/kafka-topics.sh \ 24 | --bootstrap-server localhost:9092 \ 25 | --describe \ 26 | --topic test 27 | 28 | START CONSOLE PRODUCER 29 | bin/kafka-console-producer.sh \ 30 | --broker-list localhost:9092 \ 31 | --topic test 32 | 33 | START CONSOLE CONSUMER 34 | bin/kafka-console-consumer.sh \ 35 | --bootstrap-server localhost:9092 \ 36 | --topic test 37 | 38 | START CONSOLE CONSUMER AND READ MESSAGES FROM BEGINNING 39 | bin/kafka-console-consumer.sh \ 40 | --bootstrap-server localhost:9092 \ 41 | --topic test \ 42 | --from-beginning 43 | 44 | START CONSOLE CONSUMER WITH SPECIFIC CONSUMER GROUP 45 | bin/kafka-console-consumer.sh \ 46 | --bootstrap-server localhost:9092 \ 47 | --topic test \ 48 | --group test \ 49 | --from-beginning 50 | 51 | LIST CONSUMER GROUPS 52 | bin/kafka-consumer-groups.sh \ 53 | --bootstrap-server localhost:9092 \ 54 | --list 55 | 56 | CONSUMER GROUP DETAILS 57 | bin/kafka-consumer-groups.sh \ 58 | --bootstrap-server localhost:9092 \ 59 | --group test \ 60 | --describe 61 | 62 | --------------------------------------------------------------------------------