partitionInfos = consumer.partitionsFor(topic);
40 | // 将所有Partition的offset设置为10
41 | int resetOffsetValue = 10;
42 | partitionInfos.forEach(partitionInfo -> currentOffsets.put(new TopicPartition(partitionInfo.topic(),
43 | partitionInfo.partition()), new OffsetAndMetadata(resetOffsetValue)));
44 | consumer.commitSync(currentOffsets);
45 |
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/Kafka-Utils-Java/src/main/java/com/yhyr/comsumer/CommitSpecialOffsetTriggerRebalance.java:
--------------------------------------------------------------------------------
1 | package com.yhyr.comsumer;
2 |
3 | import org.apache.kafka.clients.consumer.KafkaConsumer;
4 | import org.apache.kafka.clients.consumer.OffsetAndMetadata;
5 | import org.apache.kafka.common.PartitionInfo;
6 | import org.apache.kafka.common.TopicPartition;
7 |
8 | import java.util.*;
9 |
10 | /**
11 | * Class Subject: 修改给定Group、指定Topic下各Partition的offset;
12 | * 通过触发Group的Rebalance, 实现在不停止原始服务的前提下改变原始服务的消费位置
13 | * Java API的默认client.id的前缀为: consumer-xxx; 根据Rebalance的机制, 可以通过特殊的client.id来指定分区的分配结果,
14 | * 从而实现offset的修改
15 | *
16 | * Tips: offset的修改必须是基于Partition而言的, 而非是Topic
17 | *
18 | * @author yhyr
19 | * @since 2018/12/30 18:25
20 | */
21 | public class CommitSpecialOffsetTriggerRebalance {
22 | private static Map currentOffsets = new HashMap<>();
23 |
24 | public static void main(String[] args) {
25 | String brokers = "localhost:9092";
26 | String group = "group_test";
27 | String topic = "topic_demo";
28 |
29 | Properties props = new Properties();
30 | // 指定client.id的值, 保证在排序时在首位
31 | String clientId = "aaa";
32 | props.put("client.id", clientId);
33 | props.put("bootstrap.servers", brokers);
34 | props.put("group.id", group);
35 | props.put("enable.auto.commit", "true");
36 | props.put("auto.offset.reset", "latest");
37 | props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
38 | props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
39 | KafkaConsumer consumer = new KafkaConsumer<>(props);
40 |
41 | // Trigger Rebalance
42 | consumer.subscribe(Collections.singletonList(topic));
43 | consumer.poll(0);
44 |
45 | // 获取Topic的Partition信息
46 | List partitionInfos = consumer.partitionsFor(topic);
47 | // 将所有Partition的offset设置为10
48 | int resetOffsetValue = 10;
49 | partitionInfos.forEach(partitionInfo -> currentOffsets
50 | .put(new TopicPartition(partitionInfo.topic(), partitionInfo.partition()),
51 | new OffsetAndMetadata(resetOffsetValue)));
52 | consumer.commitSync(currentOffsets);
53 |
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/Kafka-Utils-Java/src/main/java/com/yhyr/comsumer/CommonConsumer.java:
--------------------------------------------------------------------------------
1 | package com.yhyr.comsumer;
2 |
3 | import org.apache.kafka.clients.consumer.ConsumerRecord;
4 | import org.apache.kafka.clients.consumer.ConsumerRecords;
5 | import org.apache.kafka.clients.consumer.KafkaConsumer;
6 |
7 | import java.util.Collections;
8 | import java.util.Properties;
9 |
10 | /**
11 | * @author yhyr
12 | * @since 2018/12/30 18:01
13 | */
14 | public class CommonConsumer {
15 | public static void main(String[] args) {
16 | String brokers = "localhost:9092";
17 | String group = "group_test";
18 | String topic = "topic_demo";
19 |
20 | Properties props = new Properties();
21 | props.put("bootstrap.servers", brokers);
22 | props.put("group.id", group);
23 | props.put("enable.auto.commit", "true");
24 | props.put("auto.offset.reset", "latest");
25 | props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
26 | props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
27 |
28 | KafkaConsumer consumer = new KafkaConsumer<>(props);
29 | consumer.subscribe(Collections.singletonList(topic));
30 |
31 | while (true) {
32 | ConsumerRecords records = consumer.poll(100);
33 |
34 | for (ConsumerRecord record : records) {
35 | System.out.println(record.toString());
36 | }
37 | }
38 |
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/Kafka-Utils-Java/src/main/java/com/yhyr/comsumer/ConsumerGroupOffset.java:
--------------------------------------------------------------------------------
1 | package com.yhyr.comsumer;
2 |
3 | import kafka.coordinator.group.GroupMetadataManager;
4 | import org.apache.kafka.clients.consumer.ConsumerRecord;
5 | import org.apache.kafka.clients.consumer.ConsumerRecords;
6 | import org.apache.kafka.clients.consumer.KafkaConsumer;
7 | import org.apache.kafka.common.TopicPartition;
8 |
9 | import java.util.Collections;
10 | import java.util.Properties;
11 |
12 | /**
13 | * Class Subject: 消费指定Group对应 __consumer_offsets 中的Offset信息
14 | *
15 | * @author yhyr
16 | * @since 2019/01/08 10:48
17 | */
18 | public class ConsumerGroupOffset {
19 | public static void main(String[] args) {
20 | String brokers = "localhost:9092";
21 | String group = "group_test";
22 | String topic = "__consumer_offsets";
23 |
24 | Properties props = new Properties();
25 | props.put("bootstrap.servers", brokers);
26 | props.put("group.id", group);
27 | props.put("enable.auto.commit", "false");
28 | props.put("auto.offset.reset", "earliest");
29 | props.put("max.poll.records", 1);
30 | props.put("key.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer");
31 | props.put("value.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer");
32 |
33 | KafkaConsumer consumer = new KafkaConsumer<>(props);
34 | // 计算Group对应的Partition Id
35 | int partitionId = Math.abs(group.hashCode() % 50);
36 | // 消费指定Partition下的Offset信息
37 | consumer.assign(Collections.singletonList(new TopicPartition(topic, partitionId)));
38 |
39 | while (true) {
40 | ConsumerRecords records = consumer.poll(100);
41 | for (ConsumerRecord record : records) {
42 | GroupMetadataManager.OffsetsMessageFormatter formatter = new GroupMetadataManager.OffsetsMessageFormatter();
43 | formatter.writeTo(record, System.out);
44 | }
45 | }
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/Kafka-Utils-Java/src/main/java/com/yhyr/comsumer/ConsumerSpecialOffset.java:
--------------------------------------------------------------------------------
1 | package com.yhyr.comsumer;
2 |
3 | import org.apache.kafka.clients.consumer.ConsumerRecord;
4 | import org.apache.kafka.clients.consumer.ConsumerRecords;
5 | import org.apache.kafka.clients.consumer.KafkaConsumer;
6 |
7 | import java.util.Collections;
8 | import java.util.Properties;
9 |
10 | /**
11 | * Class Subject: 从指定Offset处开始消费
12 | *
13 | * Tips: Consumer默认会自动帮你提交Offset信息(enable.auto.commit=true); 如果只想基于特定Offset做数据嗅探,
14 | * 而不改变broker端原始的offset信息, 可以指定enable.auto.commit=false;
15 | *
16 | * @author yhyr
17 | * @since 2018/12/30 18:24
18 | */
19 | public class ConsumerSpecialOffset {
20 | public static void main(String[] args) {
21 | String brokers = "localhost:9092";
22 | String topic = "topic_demo";
23 | String group = "group_test";
24 |
25 | int customPartitionOffset = 500;
26 |
27 | Properties props = new Properties();
28 | props.put("bootstrap.servers", brokers);
29 | props.put("group.id", group);
30 | props.put("enable.auto.commit", "false");
31 | props.put("auto.offset.reset", "latest");
32 | props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
33 | props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
34 | KafkaConsumer consumer = new KafkaConsumer<>(props);
35 |
36 | /**
37 | * KafkaConsumer.seek 是基于客户端的行为;
38 | *
39 | * 因此在订阅Topic后, 必须调用一次poll方法, 用来获取Client被分配的Partition, 方可通过seek重新指定消费位置
40 | */
41 | consumer.subscribe(Collections.singletonList(topic));
42 | consumer.poll(0);
43 | consumer.assignment().forEach(topicPartition -> consumer.seek(topicPartition, customPartitionOffset));
44 |
45 | while (true) {
46 | ConsumerRecords records = consumer.poll(100);
47 | for (ConsumerRecord record : records) {
48 | System.out.println(record.toString());
49 | }
50 | }
51 | }
52 | }
53 |
--------------------------------------------------------------------------------
/Kafka-Utils-Java/src/main/java/com/yhyr/comsumer/GetEffectiveOffset.java:
--------------------------------------------------------------------------------
1 | package com.yhyr.comsumer;
2 |
3 | import org.apache.kafka.clients.consumer.KafkaConsumer;
4 | import org.apache.kafka.common.TopicPartition;
5 |
6 | import java.util.List;
7 | import java.util.Map;
8 | import java.util.Properties;
9 | import java.util.stream.Collectors;
10 |
11 | /**
12 | * Class Subject: 获取指定Topic各Partition的最大、最小Offset值; 适用于Kafka 1.0 以后
13 | *
14 | * Tips: 本质上获取的是指定Topic的有效数据集的偏移量; 和是否被消费无关
15 | *
16 | * @author yhyr
17 | * @since 2018/12/30 18:32
18 | */
19 | public class GetEffectiveOffset {
20 | public static void main(String[] args) {
21 |
22 | String brokers = "localhost:9092";
23 | String topic = "topic_demo";
24 |
25 | Properties props = new Properties();
26 | props.put("bootstrap.servers", brokers);
27 | props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
28 | props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
29 | KafkaConsumer consumer = new KafkaConsumer<>(props);
30 |
31 | List topicPartitions = consumer.partitionsFor(topic).stream()
32 | .map(partitionInfo -> new TopicPartition(partitionInfo.topic(), partitionInfo.partition()))
33 | .collect(Collectors.toList());
34 |
35 | Map beginningOffsets = consumer.beginningOffsets(topicPartitions);
36 | Map endOffsets = consumer.endOffsets(topicPartitions);
37 |
38 | beginningOffsets.forEach((tp, offset) -> System.out.println(String
39 | .format("%s => beginning Offset = %s; end Offset = %s", tp, offset, endOffsets.get(tp))));
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/Kafka-Utils-Java/src/main/java/com/yhyr/comsumer/GetOffsetWithTimestamp.java:
--------------------------------------------------------------------------------
1 | package com.yhyr.comsumer;
2 |
3 | import org.apache.kafka.clients.consumer.KafkaConsumer;
4 | import org.apache.kafka.clients.consumer.OffsetAndTimestamp;
5 | import org.apache.kafka.common.TopicPartition;
6 |
7 | import java.text.ParseException;
8 | import java.text.SimpleDateFormat;
9 | import java.util.HashMap;
10 | import java.util.Map;
11 | import java.util.Properties;
12 |
13 | /**
14 | * Class Subject: 查询broker在指定时间窗内写入消息的offset范围; 适用于Kafka 0.10.1.0 以后
15 | *
16 | * Tips: KafkaConsumer.offsetsForTimes 查找符合给定时间的第一条消息的offset; 如果不存在,则返回null
17 | *
18 | * @author yhyr
19 | * @since 2018/12/30 18:28
20 | */
21 | public class GetOffsetWithTimestamp {
22 | private static final String DATE_PATTERN = "yyyy-MM-dd HH:mm:ss";
23 |
24 | public static void main(String[] args) {
25 | String brokers = "localhost:9092";
26 | String topic = "topic_demo";
27 |
28 | String beginTime = "2018-12-30 16:32:49";
29 | String endTime = "2018-12-30 16:35:51";
30 |
31 | Properties props = new Properties();
32 | props.put("bootstrap.servers", brokers);
33 | props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
34 | props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
35 | KafkaConsumer consumer = new KafkaConsumer<>(props);
36 |
37 | // Find Begin Offset
38 | Map beginOffsetSearchMap = new HashMap<>();
39 | consumer.partitionsFor(topic).forEach(
40 | partitionInfo -> beginOffsetSearchMap.put(
41 | new TopicPartition(partitionInfo.topic(), partitionInfo.partition()),
42 | stringToTimestamp(beginTime, DATE_PATTERN)));
43 | Map resultOfBeginOffset = consumer.offsetsForTimes(beginOffsetSearchMap);
44 |
45 | // Find End Offset
46 | Map endOffsetSearchMap = new HashMap<>();
47 | consumer.partitionsFor(topic).forEach(
48 | partitionInfo -> endOffsetSearchMap.put(
49 | new TopicPartition(partitionInfo.topic(), partitionInfo.partition()),
50 | stringToTimestamp(endTime, DATE_PATTERN)));
51 | Map resultOfEndOffset = consumer.offsetsForTimes(endOffsetSearchMap);
52 |
53 | // Format Print
54 | resultOfBeginOffset.forEach((key, value) -> System.out.println(String.format(
55 | "From %s to %s, %s offset range = [%s, %s]", beginTime, endTime, key, formatPrint(value),
56 | formatPrint(resultOfEndOffset.get(key)))));
57 | }
58 |
59 | private static Object formatPrint(OffsetAndTimestamp offsetAndTimestamp) {
60 | return (offsetAndTimestamp == null) ? "null" : offsetAndTimestamp.offset();
61 | }
62 |
63 | private static Long stringToTimestamp(String date, String pattern) {
64 | SimpleDateFormat sdf = new SimpleDateFormat(pattern);
65 | try {
66 | return sdf.parse(date).getTime();
67 | } catch (ParseException e) {
68 | e.printStackTrace();
69 | return null;
70 | }
71 | }
72 | }
73 |
--------------------------------------------------------------------------------
/Kafka-Utils-Java/src/main/java/com/yhyr/comsumer/ListenerRebalance.java:
--------------------------------------------------------------------------------
1 | package com.yhyr.comsumer;
2 |
3 | import org.apache.kafka.clients.consumer.*;
4 | import org.apache.kafka.common.TopicPartition;
5 |
6 | import java.util.*;
7 |
8 | /**
9 | * Class Subject: 在客户端监听Group的Rebalance行为;
10 | *
11 | * * 在触发Rebalance前 记录当前正在处理各Partition的offset信息; 在Rebalance后记录被重新分配的Partition信息
12 | *
13 | * @author yhyr
14 | * @since 2018/12/30 18:25
15 | */
16 | public class ListenerRebalance {
17 | private static Map currentOffsets = new HashMap<>();
18 |
19 | private static String brokers = "localhost:9092";
20 | private static String group = "group_test";
21 | private static String topic = "topic_demo";
22 |
23 | private static KafkaConsumer consumer;
24 |
25 | static {
26 | Properties props = new Properties();
27 | props.put("bootstrap.servers", brokers);
28 | props.put("group.id", group);
29 | props.put("enable.auto.commit", "true");
30 | props.put("auto.commit.interval.ms", "1000");
31 | props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
32 | props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
33 | consumer = new KafkaConsumer<>(props);
34 | }
35 |
36 | private class CustomHandleRebalance implements ConsumerRebalanceListener {
37 | @Override
38 | public void onPartitionsRevoked(Collection collection) {
39 | System.out.println(String.format(
40 | "Before Rebalance, Assignment partitions is: %s; Current each partition's latest offset is: %s",
41 | collection.toString(), currentOffsets.toString()));
42 | }
43 |
44 | @Override
45 | public void onPartitionsAssigned(Collection collection) {
46 | System.out.println(String.format(
47 | "After Rebalance, Assignment partitions is: %s; Current each partition's latest offset is: %s",
48 | collection.toString(), currentOffsets.toString()));
49 | }
50 | }
51 |
52 | private void consumer() {
53 | try {
54 | consumer.subscribe(Collections.singletonList(topic), new CustomHandleRebalance());
55 | while (true) {
56 | ConsumerRecords records = consumer.poll(100);
57 | for (ConsumerRecord record : records) {
58 | // record current msg's offset
59 | currentOffsets.put(new TopicPartition(record.topic(), record.partition()), new OffsetAndMetadata(
60 | record.offset()));
61 | // processing msg
62 | System.out.println("Processing msg : " + record.toString());
63 | }
64 | }
65 | } catch (Exception e) {
66 | System.out.println("Unexpected error: " + e.getMessage());
67 | } finally {
68 | consumer.commitSync(currentOffsets);
69 | }
70 | }
71 |
72 | public static void main(String[] args) {
73 | ListenerRebalance action = new ListenerRebalance();
74 | action.consumer();
75 | }
76 | }
77 |
--------------------------------------------------------------------------------
/Kafka-Utils-Java/src/main/java/com/yhyr/producer/CommonProducer.java:
--------------------------------------------------------------------------------
1 | package com.yhyr.producer;
2 |
3 | import org.apache.kafka.clients.producer.KafkaProducer;
4 | import org.apache.kafka.clients.producer.Producer;
5 | import org.apache.kafka.clients.producer.ProducerRecord;
6 |
7 | import java.util.Properties;
8 | import java.util.stream.IntStream;
9 |
10 | /**
11 | * @author yhyr
12 | * @since 2018/12/30 18:02
13 | */
14 | public class CommonProducer {
15 | public static void main(String[] args) {
16 | String topic = "topic_demo";
17 | String brokerList = "localhost:9092";
18 |
19 | Properties props = new Properties();
20 | props.put("bootstrap.servers", brokerList);
21 | props.put("acks", "all");
22 | props.put("retries", 0);
23 | props.put("batch.size", 16384);
24 | props.put("linger.ms", 1);
25 | props.put("buffer.memory", 33554432);
26 | props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
27 | props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
28 |
29 | Producer producer = new KafkaProducer<>(props);
30 |
31 | IntStream.range(0, 1000).>mapToObj(
32 | i -> new ProducerRecord<>(topic, "msg -> " + i)).forEach(producer::send);
33 |
34 | System.out.println("send message over");
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/Kafka-Utils-Python/consumer/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | @Author YH YR
4 | @Time 2018/12/31 13:17
5 | """
6 |
--------------------------------------------------------------------------------
/Kafka-Utils-Python/consumer/commit_special_offset.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Subject: 修改给定Group、Topic下各Partition的offset; 不触发Group的Rebalance
4 | Tips: offset的修改必须是基于Partition而言的, 而非是Topic
5 |
6 | @Author YH YR
7 | @Time 2018/12/31 14:10
8 | """
9 | from kafka import KafkaConsumer, TopicPartition, OffsetAndMetadata
10 |
11 |
12 | class CommitSpecialOffset:
13 | def __init__(self, broker_list, group_name, topic):
14 | self.topic = topic
15 | self.consumer = KafkaConsumer(group_id=group_name, bootstrap_servers=broker_list)
16 |
17 | def reset_offset(self, reset_offset_value):
18 | partitions_offset = {}
19 | for partition_id in self.consumer.partitions_for_topic(self.topic):
20 | partitions_offset[TopicPartition(topic, partition_id)] = OffsetAndMetadata(reset_offset_value, '')
21 |
22 | self.consumer.commit(partitions_offset)
23 |
24 |
25 | if __name__ == '__main__':
26 | broker_list = 'localhost:9092'
27 | group_name = 'group_test'
28 | topic = 'topic_demo'
29 |
30 | action = CommitSpecialOffset(broker_list, group_name, topic)
31 | action.reset_offset(500)
32 |
--------------------------------------------------------------------------------
/Kafka-Utils-Python/consumer/commit_special_offset_trigger_rebalance.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Subject: 修改给定Group、Topic下各Partition的offset
4 | 通过触发Group的Rebalance, 实现在不停止原始服务的前提下改变原始服务的消费位置
5 | kafka-python的client.id前缀为'kafka-python-'; 根据Rebalance的机制,
6 | 可以通过特殊的client.id来指定分区的分配结果,从而实现offset的修改
7 | Tips: offset的修改必须是基于Partition而言的, 而非是Topic
8 |
9 | @Author YH YR
10 | @Time 2018/12/31 14:10
11 | """
12 | from kafka import KafkaConsumer, TopicPartition, OffsetAndMetadata
13 |
14 |
15 | class CommitSpecialOffset:
16 | def __init__(self, broker_list, group_name, topic, client_id=None):
17 | self.topic = topic
18 | self.consumer = KafkaConsumer(group_id=group_name, bootstrap_servers=broker_list, client_id=client_id)
19 |
20 | def reset_offset(self, reset_offset_value):
21 | # Trigger Rebalance
22 | self.consumer.subscribe(self.topic)
23 | self.consumer.poll(0)
24 | # Reset Offset
25 | partitions_offset = {}
26 | for partition_id in self.consumer.partitions_for_topic(self.topic):
27 | partitions_offset[TopicPartition(topic, partition_id)] = OffsetAndMetadata(reset_offset_value, '')
28 |
29 | self.consumer.commit(partitions_offset)
30 |
31 |
32 | if __name__ == '__main__':
33 | broker_list = 'localhost:9092'
34 | group_name = 'group_test'
35 | topic = 'topic_demo'
36 | client_id = 'aaa'
37 |
38 | action = CommitSpecialOffset(broker_list, group_name, topic, client_id)
39 | action.reset_offset(10)
40 |
--------------------------------------------------------------------------------
/Kafka-Utils-Python/consumer/common_consumer.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | @Author YH YR
4 | @Time 2018/12/31 13:35
5 | """
6 |
7 | from kafka import KafkaConsumer
8 |
9 |
10 | class KafkaConsumerUtil:
11 | def __init__(self, broker_list, group_name, topic, enable_auto_commit=True, auto_offset_reset='latest'):
12 | self.broker_list = broker_list
13 | self.topic = topic
14 | self.group_name = group_name
15 | self.enable_auto_commit = enable_auto_commit
16 | self.auto_offset_reset = auto_offset_reset
17 |
18 | def consumer(self, process_msg):
19 | consumer = KafkaConsumer(*self.topic, group_id=self.group_name, bootstrap_servers=self.broker_list,
20 | enable_auto_commit=self.enable_auto_commit, auto_offset_reset=self.auto_offset_reset)
21 | for msg in consumer:
22 | process_msg(msg)
23 |
24 | consumer.highwater()
25 |
26 |
27 | def print_msg(msg_dic):
28 | print(msg_dic)
29 |
30 |
31 | if __name__ == '__main__':
32 | broker_list = 'localhost:9092'
33 | group_name = 'group_test'
34 | topic = ['topic_demo']
35 |
36 | action = KafkaConsumerUtil(broker_list, group_name, topic)
37 | action.consumer(print_msg)
38 |
--------------------------------------------------------------------------------
/Kafka-Utils-Python/consumer/consumer_special_offset.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Subject: 从指定Offset处开始消费消息
4 | Tips:
5 | Kafka默认会自动提交Offset信息(enable_auto_commit=True),
6 | 如果只是想基于特定Offset做数据嗅探, 而不改变broker端原始的offset信息, 可以指定enable_auto_commit=False
7 |
8 | 消息的消费是基于具体的Partition而言, 所以指定Offset的值实际是作用于具体的一个或多个Partition, 而非是Topic
9 |
10 | @Author YH YR
11 | @Time 2018/12/31 14:09
12 | """
13 |
14 | from kafka import KafkaConsumer
15 |
16 |
17 | class ConsumerSpecialOffset:
18 | def __init__(self, broker_list, group_name, topic, enable_auto_commit=True, auto_offset_reset='latest'):
19 | self.broker_list = broker_list
20 | self.topic = topic
21 | self.group_name = group_name
22 | self.enable_auto_commit = enable_auto_commit
23 | self.auto_offset_reset = auto_offset_reset
24 |
25 | def consumer_from_offset(self, process_msg, offset):
26 | consumer = KafkaConsumer(group_id=self.group_name, bootstrap_servers=self.broker_list,
27 | enable_auto_commit=self.enable_auto_commit, auto_offset_reset=self.auto_offset_reset)
28 | consumer.subscribe(self.topic)
29 | consumer.poll(0)
30 | for topic_partition in consumer.assignment():
31 | consumer.seek(topic_partition, offset)
32 | while True:
33 | consumer_records = consumer.poll(100)
34 | for partition_info, records in consumer_records.items():
35 | for record in records:
36 | process_msg(record)
37 |
38 |
39 | def print_msg(msg_dic):
40 | print(msg_dic)
41 |
42 |
43 | if __name__ == '__main__':
44 | broker_list = 'localhost:9092'
45 | group_name = 'group_test'
46 | topic = ['topic_demo']
47 |
48 | action = ConsumerSpecialOffset(broker_list, group_name, topic)
49 | action.consumer_from_offset(print_msg, 1000)
50 |
--------------------------------------------------------------------------------
/Kafka-Utils-Python/consumer/consumer_timestamp_window.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Subject: 消费指定时间窗内所产生的消息
4 |
5 | @Author YH YR
6 | @Time 2019/01/23 21:23
7 | """
8 | import time
9 | from kafka import KafkaConsumer, TopicPartition
10 |
11 |
12 | class ConsumerTimeStampWindow:
13 | def __init__(self, broker_list, group_name, topic, enable_auto_commit=True, auto_offset_reset='latest'):
14 | self.topic = topic
15 | self.consumer = KafkaConsumer(group_id=group_name, bootstrap_servers=broker_list,
16 | enable_auto_commit=enable_auto_commit, auto_offset_reset=auto_offset_reset)
17 |
18 | def consumer_from_offset_window(self, process_msg, begin_time, end_time):
19 | self.consumer.subscribe(self.topic)
20 | self.consumer.poll(0)
21 |
22 | begin_offset_dic, end_offset_dic = self.get_offset_time_window(begin_time, end_time)
23 | for topic_partition, offset_and_timestamp in begin_offset_dic.items():
24 | self.consumer.seek(topic_partition, offset_and_timestamp[0])
25 |
26 | topic_partition_info = self.consumer.assignment()
27 | partition_consumer_finish_flag = dict(zip(topic_partition_info, [False] * len(topic_partition_info)))
28 |
29 | while True:
30 | if False not in partition_consumer_finish_flag.values():
31 | return
32 | consumer_records = self.consumer.poll(100)
33 | for partition_info, records in consumer_records.items():
34 | if partition_consumer_finish_flag[partition_info]:
35 | print('-------------- {0} consumer finish --------------'.format(partition_info))
36 | break
37 | for record in records:
38 | if record.offset <= end_offset_dic[partition_info][0]:
39 | process_msg(record)
40 | else:
41 | partition_consumer_finish_flag[partition_info] = True
42 |
43 | def get_offset_time_window(self, begin_time, end_time):
44 | partitions_structs = []
45 |
46 | for partition_id in self.consumer.partitions_for_topic(self.topic):
47 | partitions_structs.append(TopicPartition(self.topic, partition_id))
48 |
49 | begin_search = {}
50 | for partition in partitions_structs:
51 | begin_search[partition] = begin_time if isinstance(begin_time, int) else self.__str_to_timestamp(begin_time)
52 | begin_offset = self.consumer.offsets_for_times(begin_search)
53 |
54 | end_search = {}
55 | for partition in partitions_structs:
56 | end_search[partition] = end_time if isinstance(end_time, int) else self.__str_to_timestamp(end_time)
57 | end_offset = self.consumer.offsets_for_times(end_search)
58 |
59 | for topic_partition, offset_and_timestamp in begin_offset.items():
60 | b_offset = 'null' if offset_and_timestamp is None else offset_and_timestamp[0]
61 | e_offset = 'null' if end_offset[topic_partition] is None else end_offset[topic_partition][0]
62 | print('Between {0} and {1}, {2} offset range = [{3}, {4}]'.format(begin_time, end_time, topic_partition,
63 | b_offset, e_offset))
64 | return begin_offset, end_offset
65 |
66 | @staticmethod
67 | def __str_to_timestamp(str_time, format_type='%Y-%m-%d %H:%M:%S'):
68 | time_array = time.strptime(str_time, format_type)
69 | return int(time.mktime(time_array)) * 1000
70 |
71 |
72 | def print_msg(msg_dic):
73 | print(msg_dic)
74 |
75 |
76 | if __name__ == '__main__':
77 | broker_list = 'localhost:9092'
78 | group_name = 'group_test'
79 | topic = 'topic_demo'
80 |
81 | action = ConsumerTimeStampWindow(broker_list, group_name, topic)
82 | action.consumer_from_offset_window(print_msg, '2019-01-23 21:30:00', '2019-01-23 21:36:00')
83 |
--------------------------------------------------------------------------------
/Kafka-Utils-Python/consumer/get_effective_offset.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Subject: 获取给定Topic各Partition的最大、最小offset值; 适用于Kafka 1.0 以后
4 | Tips: 本质上获取的是指定Topic的有效数据集的偏移量; 和是否被消费无关
5 |
6 | @Author YH YR
7 | @Time 2018/12/31 14:12
8 | """
9 | from kafka import KafkaConsumer, TopicPartition
10 |
11 |
12 | class GetEffectiveOffset:
13 | def __init__(self, broker_list, group_name, topic):
14 | self.topic = topic
15 | self.consumer = KafkaConsumer(group_id=group_name, bootstrap_servers=broker_list)
16 |
17 | def get_offset(self):
18 | partitions_structs = []
19 |
20 | for partition_id in self.consumer.partitions_for_topic(self.topic):
21 | partitions_structs.append(TopicPartition(self.topic, partition_id))
22 |
23 | beginning_offset = self.consumer.beginning_offsets(partitions_structs)
24 | end_offset = self.consumer.end_offsets(partitions_structs)
25 |
26 | for partition, offset in beginning_offset.items():
27 | print('{0} => beginning offset = {1}; end offset = {2}'.format(partition, offset,
28 | end_offset[partition]))
29 |
30 |
31 | if __name__ == '__main__':
32 | broker_list = 'localhost:9092'
33 | group_name = 'group_test'
34 | topic = 'topic_demo'
35 |
36 | action = GetEffectiveOffset(broker_list, group_name, topic)
37 | action.get_offset()
38 |
--------------------------------------------------------------------------------
/Kafka-Utils-Python/consumer/get_offset_with_timestamp.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Subject: 查询broker在指定时间窗内写入消息的offset范围; 适用于Kafka 0.10.1.0 以后
4 | Tips:
5 | KafkaConsumer.offsetsForTimes 查找符合给定时间的第一条消息的offset; 如果不存在,则返回null
6 | 时间戳为毫秒级
7 |
8 | @Author YH YR
9 | @Time 2018/12/31 14:13
10 | """
11 | import time
12 | from kafka import KafkaConsumer, TopicPartition
13 |
14 |
15 | class GetOffsetWithTimestamp:
16 | def __init__(self, broker_list, topic):
17 | self.topic = topic
18 | self.consumer = KafkaConsumer(bootstrap_servers=broker_list)
19 |
20 | def get_offset_time_window(self, begin_time, end_time):
21 | partitions_structs = []
22 |
23 | for partition_id in self.consumer.partitions_for_topic(self.topic):
24 | partitions_structs.append(TopicPartition(self.topic, partition_id))
25 |
26 | begin_search = {}
27 | for partition in partitions_structs:
28 | begin_search[partition] = begin_time if isinstance(begin_time, int) else self.__str_to_timestamp(begin_time)
29 | begin_offset = self.consumer.offsets_for_times(begin_search)
30 |
31 | end_search = {}
32 | for partition in partitions_structs:
33 | end_search[partition] = end_time if isinstance(end_time, int) else self.__str_to_timestamp(end_time)
34 | end_offset = self.consumer.offsets_for_times(end_search)
35 |
36 | for topic_partition, offset_and_timestamp in begin_offset.items():
37 | b_offset = 'null' if offset_and_timestamp is None else offset_and_timestamp[0]
38 | e_offset = 'null' if end_offset[topic_partition] is None else end_offset[topic_partition][0]
39 | print('Between {0} and {1}, {2} offset range = [{3}, {4}]'.format(begin_time, end_time, topic_partition,
40 | b_offset, e_offset))
41 |
42 | @staticmethod
43 | def __str_to_timestamp(str_time, format_type='%Y-%m-%d %H:%M:%S'):
44 | time_array = time.strptime(str_time, format_type)
45 | return int(time.mktime(time_array)) * 1000
46 |
47 |
48 | if __name__ == '__main__':
49 | broker_list = 'localhost:9092'
50 | topic = 'topic_demo'
51 |
52 | action = GetOffsetWithTimestamp(broker_list, topic)
53 | action.get_offset_time_window('2018-12-30 17:00:00', '2018-12-30 20:00:00')
54 |
--------------------------------------------------------------------------------
/Kafka-Utils-Python/consumer/listener_rebalance.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Subject: 在客户端监听Group的Rebalance行为
4 | Tips:
5 | 可以通过自定义Consumer Client_id 来实现Rebalance后Partition的分配结果
6 | Python Consumer 默认的 Client_id 命名规则为: kafka-python-{version}
7 |
8 |
9 | @Author YH YR
10 | @Time 2018/12/31 14:13
11 | """
12 | import time
13 | from kafka import KafkaConsumer, ConsumerRebalanceListener
14 |
15 |
16 | class CustomHandleRebalance(ConsumerRebalanceListener):
17 | def on_partitions_revoked(self, revoked):
18 | print('Before Rebalance, Assignment partitions is: {0}'.format(revoked))
19 |
20 | def on_partitions_assigned(self, assigned):
21 | print('After Rebalance, Assignment partition is: {0}'.format(assigned))
22 |
23 |
24 | class ListenerRebalance:
25 | def __init__(self, broker_list, group_name, topic, enable_auto_commit=True, auto_offset_reset='latest',
26 | client_id=None):
27 | self.broker_list = broker_list
28 | self.topic = topic
29 | self.group_name = group_name
30 | self.enable_auto_commit = enable_auto_commit
31 | self.auto_offset_reset = auto_offset_reset
32 | self.client_id = client_id
33 |
34 | def consumer(self, process_msg):
35 | consumer = KafkaConsumer(group_id=self.group_name, bootstrap_servers=self.broker_list, client_id=self.client_id,
36 | enable_auto_commit=self.enable_auto_commit, auto_offset_reset=self.auto_offset_reset)
37 |
38 | consumer.subscribe(self.topic, listener=CustomHandleRebalance())
39 |
40 | while True:
41 | consumer_records = consumer.poll(100, max_records=1)
42 | for partition_info, records in consumer_records.items():
43 | for record in records:
44 | process_msg(record)
45 | time.sleep(2)
46 |
47 |
48 | def print_msg(msg_dic):
49 | print(msg_dic)
50 |
51 |
52 | if __name__ == '__main__':
53 | broker_list = 'localhost:9092'
54 | group_name = 'group_test'
55 | topic = ['topic_demo']
56 |
57 | action = ListenerRebalance(broker_list, group_name, topic)
58 | action.consumer(print_msg)
59 |
--------------------------------------------------------------------------------
/Kafka-Utils-Python/producer/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | @Author YH YR
4 | @Time 2018/12/31 13:17
5 | """
6 |
--------------------------------------------------------------------------------
/Kafka-Utils-Python/producer/common_producer.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | @Author YH YR
4 | @Time 2018/12/31 13:21
5 | """
6 |
7 | from kafka import KafkaProducer
8 |
9 |
10 | class CommonKafkaProducer:
11 | def __init__(self, broker_list):
12 | self.producer = KafkaProducer(bootstrap_servers=broker_list)
13 |
14 | def produce(self, topic, msg):
15 | self.producer.send(topic, bytes(msg, encoding='utf8'))
16 |
17 | def __del__(self):
18 | self.producer.close()
19 |
20 |
21 | if __name__ == '__main__':
22 | broker_list = 'localhost:9092'
23 | topic = 'topic_demo'
24 |
25 | action = CommonKafkaProducer(broker_list)
26 | for i in range(1000):
27 | action.produce(topic, 'msg -> {0}'.format(i))
28 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Kafka-Utils
2 |
3 | ## 环境依赖
4 |
5 | > Kafka 1.0.1
6 | >
7 | > JDK 1.8
8 | >
9 | > kafka-clients 1.0.1
10 | >
11 | > Python 3.6.5
12 | >
13 | > kafka-python 1.4.3
14 |
15 | 基于Java和Python实现Kafka集中常见的工具类;会不定期的作以补充和完善。
16 |
17 | 目前包含
18 |
19 | + 指定消费的起始Offset
20 | + 不停服修改Offset
21 | + 获取Partition的有效Offset范围
22 | + 根据时间戳筛选Offset
23 | + Rebalance监控
24 | + 消费__consumer_offsets中的消息
25 | + 消费指定时间窗内产生的所有消息
--------------------------------------------------------------------------------