├── .gitignore ├── Kafka-Utils-Java ├── pom.xml └── src │ └── main │ └── java │ └── com │ └── yhyr │ ├── comsumer │ ├── CommitSpecialOffset.java │ ├── CommitSpecialOffsetTriggerRebalance.java │ ├── CommonConsumer.java │ ├── ConsumerGroupOffset.java │ ├── ConsumerSpecialOffset.java │ ├── GetEffectiveOffset.java │ ├── GetOffsetWithTimestamp.java │ └── ListenerRebalance.java │ └── producer │ └── CommonProducer.java ├── Kafka-Utils-Python ├── consumer │ ├── __init__.py │ ├── commit_special_offset.py │ ├── commit_special_offset_trigger_rebalance.py │ ├── common_consumer.py │ ├── consumer_special_offset.py │ ├── consumer_timestamp_window.py │ ├── get_effective_offset.py │ ├── get_offset_with_timestamp.py │ └── listener_rebalance.py └── producer │ ├── __init__.py │ └── common_producer.py └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | .git/ 3 | *.iml 4 | 5 | *.pyc 6 | 7 | *target/ 8 | logs/ 9 | gitpull.sh 10 | /.settings/ 11 | .classpath 12 | .project 13 | bin/ -------------------------------------------------------------------------------- /Kafka-Utils-Java/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | com.yhyr 8 | kafka-utils 9 | 1.0-SNAPSHOT 10 | 11 | 12 | 13 | org.apache.kafka 14 | kafka_2.11 15 | 1.0.1 16 | 17 | 18 | 19 | 20 | 21 | 22 | org.apache.maven.plugins 23 | maven-compiler-plugin 24 | 25 | 1.8 26 | 1.8 27 | 28 | 29 | 30 | maven-assembly-plugin 31 | 3.0.0 32 | 33 | 34 | 35 | streams.WordCount 36 | 37 | 38 | 39 | jar-with-dependencies 40 | 41 | 42 | 43 | 44 | make-assembly 45 | package 46 | 47 | single 48 | 49 | 50 | 51 | 52 | 53 | 54 | -------------------------------------------------------------------------------- /Kafka-Utils-Java/src/main/java/com/yhyr/comsumer/CommitSpecialOffset.java: -------------------------------------------------------------------------------- 1 | package com.yhyr.comsumer; 2 | 3 | import org.apache.kafka.clients.consumer.KafkaConsumer; 4 | import org.apache.kafka.clients.consumer.OffsetAndMetadata; 5 | import org.apache.kafka.common.PartitionInfo; 6 | import org.apache.kafka.common.TopicPartition; 7 | 8 | import java.util.HashMap; 9 | import java.util.List; 10 | import java.util.Map; 11 | import java.util.Properties; 12 | 13 | /** 14 | * Class Subject: 修改给定Group、指定Topic下各Partition的offset; 不触发Group的Rebalance 15 | * 16 | * Tips: offset的修改必须是基于Partition而言的, 而非是Topic 17 | * 18 | * @author yhyr 19 | * @since 2018/12/30 18:25 20 | */ 21 | public class CommitSpecialOffset { 22 | private static Map currentOffsets = new HashMap<>(); 23 | 24 | public static void main(String[] args) { 25 | String brokers = "localhost:9092"; 26 | String group = "group_test"; 27 | String topic = "topic_demo"; 28 | 29 | Properties props = new Properties(); 30 | props.put("bootstrap.servers", brokers); 31 | props.put("group.id", group); 32 | props.put("enable.auto.commit", "true"); 33 | props.put("auto.offset.reset", "latest"); 34 | props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); 35 | props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); 36 | KafkaConsumer consumer = new KafkaConsumer<>(props); 37 | 38 | // 获取Topic的Partition信息 39 | List partitionInfos = consumer.partitionsFor(topic); 40 | // 将所有Partition的offset设置为10 41 | int resetOffsetValue = 10; 42 | partitionInfos.forEach(partitionInfo -> currentOffsets.put(new TopicPartition(partitionInfo.topic(), 43 | partitionInfo.partition()), new OffsetAndMetadata(resetOffsetValue))); 44 | consumer.commitSync(currentOffsets); 45 | 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /Kafka-Utils-Java/src/main/java/com/yhyr/comsumer/CommitSpecialOffsetTriggerRebalance.java: -------------------------------------------------------------------------------- 1 | package com.yhyr.comsumer; 2 | 3 | import org.apache.kafka.clients.consumer.KafkaConsumer; 4 | import org.apache.kafka.clients.consumer.OffsetAndMetadata; 5 | import org.apache.kafka.common.PartitionInfo; 6 | import org.apache.kafka.common.TopicPartition; 7 | 8 | import java.util.*; 9 | 10 | /** 11 | * Class Subject: 修改给定Group、指定Topic下各Partition的offset; 12 | * 通过触发Group的Rebalance, 实现在不停止原始服务的前提下改变原始服务的消费位置 13 | * Java API的默认client.id的前缀为: consumer-xxx; 根据Rebalance的机制, 可以通过特殊的client.id来指定分区的分配结果, 14 | * 从而实现offset的修改 15 | *

16 | * Tips: offset的修改必须是基于Partition而言的, 而非是Topic 17 | * 18 | * @author yhyr 19 | * @since 2018/12/30 18:25 20 | */ 21 | public class CommitSpecialOffsetTriggerRebalance { 22 | private static Map currentOffsets = new HashMap<>(); 23 | 24 | public static void main(String[] args) { 25 | String brokers = "localhost:9092"; 26 | String group = "group_test"; 27 | String topic = "topic_demo"; 28 | 29 | Properties props = new Properties(); 30 | // 指定client.id的值, 保证在排序时在首位 31 | String clientId = "aaa"; 32 | props.put("client.id", clientId); 33 | props.put("bootstrap.servers", brokers); 34 | props.put("group.id", group); 35 | props.put("enable.auto.commit", "true"); 36 | props.put("auto.offset.reset", "latest"); 37 | props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); 38 | props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); 39 | KafkaConsumer consumer = new KafkaConsumer<>(props); 40 | 41 | // Trigger Rebalance 42 | consumer.subscribe(Collections.singletonList(topic)); 43 | consumer.poll(0); 44 | 45 | // 获取Topic的Partition信息 46 | List partitionInfos = consumer.partitionsFor(topic); 47 | // 将所有Partition的offset设置为10 48 | int resetOffsetValue = 10; 49 | partitionInfos.forEach(partitionInfo -> currentOffsets 50 | .put(new TopicPartition(partitionInfo.topic(), partitionInfo.partition()), 51 | new OffsetAndMetadata(resetOffsetValue))); 52 | consumer.commitSync(currentOffsets); 53 | 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /Kafka-Utils-Java/src/main/java/com/yhyr/comsumer/CommonConsumer.java: -------------------------------------------------------------------------------- 1 | package com.yhyr.comsumer; 2 | 3 | import org.apache.kafka.clients.consumer.ConsumerRecord; 4 | import org.apache.kafka.clients.consumer.ConsumerRecords; 5 | import org.apache.kafka.clients.consumer.KafkaConsumer; 6 | 7 | import java.util.Collections; 8 | import java.util.Properties; 9 | 10 | /** 11 | * @author yhyr 12 | * @since 2018/12/30 18:01 13 | */ 14 | public class CommonConsumer { 15 | public static void main(String[] args) { 16 | String brokers = "localhost:9092"; 17 | String group = "group_test"; 18 | String topic = "topic_demo"; 19 | 20 | Properties props = new Properties(); 21 | props.put("bootstrap.servers", brokers); 22 | props.put("group.id", group); 23 | props.put("enable.auto.commit", "true"); 24 | props.put("auto.offset.reset", "latest"); 25 | props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); 26 | props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); 27 | 28 | KafkaConsumer consumer = new KafkaConsumer<>(props); 29 | consumer.subscribe(Collections.singletonList(topic)); 30 | 31 | while (true) { 32 | ConsumerRecords records = consumer.poll(100); 33 | 34 | for (ConsumerRecord record : records) { 35 | System.out.println(record.toString()); 36 | } 37 | } 38 | 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /Kafka-Utils-Java/src/main/java/com/yhyr/comsumer/ConsumerGroupOffset.java: -------------------------------------------------------------------------------- 1 | package com.yhyr.comsumer; 2 | 3 | import kafka.coordinator.group.GroupMetadataManager; 4 | import org.apache.kafka.clients.consumer.ConsumerRecord; 5 | import org.apache.kafka.clients.consumer.ConsumerRecords; 6 | import org.apache.kafka.clients.consumer.KafkaConsumer; 7 | import org.apache.kafka.common.TopicPartition; 8 | 9 | import java.util.Collections; 10 | import java.util.Properties; 11 | 12 | /** 13 | * Class Subject: 消费指定Group对应 __consumer_offsets 中的Offset信息 14 | * 15 | * @author yhyr 16 | * @since 2019/01/08 10:48 17 | */ 18 | public class ConsumerGroupOffset { 19 | public static void main(String[] args) { 20 | String brokers = "localhost:9092"; 21 | String group = "group_test"; 22 | String topic = "__consumer_offsets"; 23 | 24 | Properties props = new Properties(); 25 | props.put("bootstrap.servers", brokers); 26 | props.put("group.id", group); 27 | props.put("enable.auto.commit", "false"); 28 | props.put("auto.offset.reset", "earliest"); 29 | props.put("max.poll.records", 1); 30 | props.put("key.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer"); 31 | props.put("value.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer"); 32 | 33 | KafkaConsumer consumer = new KafkaConsumer<>(props); 34 | // 计算Group对应的Partition Id 35 | int partitionId = Math.abs(group.hashCode() % 50); 36 | // 消费指定Partition下的Offset信息 37 | consumer.assign(Collections.singletonList(new TopicPartition(topic, partitionId))); 38 | 39 | while (true) { 40 | ConsumerRecords records = consumer.poll(100); 41 | for (ConsumerRecord record : records) { 42 | GroupMetadataManager.OffsetsMessageFormatter formatter = new GroupMetadataManager.OffsetsMessageFormatter(); 43 | formatter.writeTo(record, System.out); 44 | } 45 | } 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /Kafka-Utils-Java/src/main/java/com/yhyr/comsumer/ConsumerSpecialOffset.java: -------------------------------------------------------------------------------- 1 | package com.yhyr.comsumer; 2 | 3 | import org.apache.kafka.clients.consumer.ConsumerRecord; 4 | import org.apache.kafka.clients.consumer.ConsumerRecords; 5 | import org.apache.kafka.clients.consumer.KafkaConsumer; 6 | 7 | import java.util.Collections; 8 | import java.util.Properties; 9 | 10 | /** 11 | * Class Subject: 从指定Offset处开始消费 12 | *

13 | * Tips: Consumer默认会自动帮你提交Offset信息(enable.auto.commit=true); 如果只想基于特定Offset做数据嗅探, 14 | * 而不改变broker端原始的offset信息, 可以指定enable.auto.commit=false; 15 | * 16 | * @author yhyr 17 | * @since 2018/12/30 18:24 18 | */ 19 | public class ConsumerSpecialOffset { 20 | public static void main(String[] args) { 21 | String brokers = "localhost:9092"; 22 | String topic = "topic_demo"; 23 | String group = "group_test"; 24 | 25 | int customPartitionOffset = 500; 26 | 27 | Properties props = new Properties(); 28 | props.put("bootstrap.servers", brokers); 29 | props.put("group.id", group); 30 | props.put("enable.auto.commit", "false"); 31 | props.put("auto.offset.reset", "latest"); 32 | props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); 33 | props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); 34 | KafkaConsumer consumer = new KafkaConsumer<>(props); 35 | 36 | /** 37 | * KafkaConsumer.seek 是基于客户端的行为; 38 | * 39 | * 因此在订阅Topic后, 必须调用一次poll方法, 用来获取Client被分配的Partition, 方可通过seek重新指定消费位置 40 | */ 41 | consumer.subscribe(Collections.singletonList(topic)); 42 | consumer.poll(0); 43 | consumer.assignment().forEach(topicPartition -> consumer.seek(topicPartition, customPartitionOffset)); 44 | 45 | while (true) { 46 | ConsumerRecords records = consumer.poll(100); 47 | for (ConsumerRecord record : records) { 48 | System.out.println(record.toString()); 49 | } 50 | } 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /Kafka-Utils-Java/src/main/java/com/yhyr/comsumer/GetEffectiveOffset.java: -------------------------------------------------------------------------------- 1 | package com.yhyr.comsumer; 2 | 3 | import org.apache.kafka.clients.consumer.KafkaConsumer; 4 | import org.apache.kafka.common.TopicPartition; 5 | 6 | import java.util.List; 7 | import java.util.Map; 8 | import java.util.Properties; 9 | import java.util.stream.Collectors; 10 | 11 | /** 12 | * Class Subject: 获取指定Topic各Partition的最大、最小Offset值; 适用于Kafka 1.0 以后 13 | * 14 | * Tips: 本质上获取的是指定Topic的有效数据集的偏移量; 和是否被消费无关 15 | * 16 | * @author yhyr 17 | * @since 2018/12/30 18:32 18 | */ 19 | public class GetEffectiveOffset { 20 | public static void main(String[] args) { 21 | 22 | String brokers = "localhost:9092"; 23 | String topic = "topic_demo"; 24 | 25 | Properties props = new Properties(); 26 | props.put("bootstrap.servers", brokers); 27 | props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); 28 | props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); 29 | KafkaConsumer consumer = new KafkaConsumer<>(props); 30 | 31 | List topicPartitions = consumer.partitionsFor(topic).stream() 32 | .map(partitionInfo -> new TopicPartition(partitionInfo.topic(), partitionInfo.partition())) 33 | .collect(Collectors.toList()); 34 | 35 | Map beginningOffsets = consumer.beginningOffsets(topicPartitions); 36 | Map endOffsets = consumer.endOffsets(topicPartitions); 37 | 38 | beginningOffsets.forEach((tp, offset) -> System.out.println(String 39 | .format("%s => beginning Offset = %s; end Offset = %s", tp, offset, endOffsets.get(tp)))); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /Kafka-Utils-Java/src/main/java/com/yhyr/comsumer/GetOffsetWithTimestamp.java: -------------------------------------------------------------------------------- 1 | package com.yhyr.comsumer; 2 | 3 | import org.apache.kafka.clients.consumer.KafkaConsumer; 4 | import org.apache.kafka.clients.consumer.OffsetAndTimestamp; 5 | import org.apache.kafka.common.TopicPartition; 6 | 7 | import java.text.ParseException; 8 | import java.text.SimpleDateFormat; 9 | import java.util.HashMap; 10 | import java.util.Map; 11 | import java.util.Properties; 12 | 13 | /** 14 | * Class Subject: 查询broker在指定时间窗内写入消息的offset范围; 适用于Kafka 0.10.1.0 以后 15 | * 16 | * Tips: KafkaConsumer.offsetsForTimes 查找符合给定时间的第一条消息的offset; 如果不存在,则返回null 17 | * 18 | * @author yhyr 19 | * @since 2018/12/30 18:28 20 | */ 21 | public class GetOffsetWithTimestamp { 22 | private static final String DATE_PATTERN = "yyyy-MM-dd HH:mm:ss"; 23 | 24 | public static void main(String[] args) { 25 | String brokers = "localhost:9092"; 26 | String topic = "topic_demo"; 27 | 28 | String beginTime = "2018-12-30 16:32:49"; 29 | String endTime = "2018-12-30 16:35:51"; 30 | 31 | Properties props = new Properties(); 32 | props.put("bootstrap.servers", brokers); 33 | props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); 34 | props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); 35 | KafkaConsumer consumer = new KafkaConsumer<>(props); 36 | 37 | // Find Begin Offset 38 | Map beginOffsetSearchMap = new HashMap<>(); 39 | consumer.partitionsFor(topic).forEach( 40 | partitionInfo -> beginOffsetSearchMap.put( 41 | new TopicPartition(partitionInfo.topic(), partitionInfo.partition()), 42 | stringToTimestamp(beginTime, DATE_PATTERN))); 43 | Map resultOfBeginOffset = consumer.offsetsForTimes(beginOffsetSearchMap); 44 | 45 | // Find End Offset 46 | Map endOffsetSearchMap = new HashMap<>(); 47 | consumer.partitionsFor(topic).forEach( 48 | partitionInfo -> endOffsetSearchMap.put( 49 | new TopicPartition(partitionInfo.topic(), partitionInfo.partition()), 50 | stringToTimestamp(endTime, DATE_PATTERN))); 51 | Map resultOfEndOffset = consumer.offsetsForTimes(endOffsetSearchMap); 52 | 53 | // Format Print 54 | resultOfBeginOffset.forEach((key, value) -> System.out.println(String.format( 55 | "From %s to %s, %s offset range = [%s, %s]", beginTime, endTime, key, formatPrint(value), 56 | formatPrint(resultOfEndOffset.get(key))))); 57 | } 58 | 59 | private static Object formatPrint(OffsetAndTimestamp offsetAndTimestamp) { 60 | return (offsetAndTimestamp == null) ? "null" : offsetAndTimestamp.offset(); 61 | } 62 | 63 | private static Long stringToTimestamp(String date, String pattern) { 64 | SimpleDateFormat sdf = new SimpleDateFormat(pattern); 65 | try { 66 | return sdf.parse(date).getTime(); 67 | } catch (ParseException e) { 68 | e.printStackTrace(); 69 | return null; 70 | } 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /Kafka-Utils-Java/src/main/java/com/yhyr/comsumer/ListenerRebalance.java: -------------------------------------------------------------------------------- 1 | package com.yhyr.comsumer; 2 | 3 | import org.apache.kafka.clients.consumer.*; 4 | import org.apache.kafka.common.TopicPartition; 5 | 6 | import java.util.*; 7 | 8 | /** 9 | * Class Subject: 在客户端监听Group的Rebalance行为; 10 | * 11 | * * 在触发Rebalance前 记录当前正在处理各Partition的offset信息; 在Rebalance后记录被重新分配的Partition信息 12 | * 13 | * @author yhyr 14 | * @since 2018/12/30 18:25 15 | */ 16 | public class ListenerRebalance { 17 | private static Map currentOffsets = new HashMap<>(); 18 | 19 | private static String brokers = "localhost:9092"; 20 | private static String group = "group_test"; 21 | private static String topic = "topic_demo"; 22 | 23 | private static KafkaConsumer consumer; 24 | 25 | static { 26 | Properties props = new Properties(); 27 | props.put("bootstrap.servers", brokers); 28 | props.put("group.id", group); 29 | props.put("enable.auto.commit", "true"); 30 | props.put("auto.commit.interval.ms", "1000"); 31 | props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); 32 | props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); 33 | consumer = new KafkaConsumer<>(props); 34 | } 35 | 36 | private class CustomHandleRebalance implements ConsumerRebalanceListener { 37 | @Override 38 | public void onPartitionsRevoked(Collection collection) { 39 | System.out.println(String.format( 40 | "Before Rebalance, Assignment partitions is: %s; Current each partition's latest offset is: %s", 41 | collection.toString(), currentOffsets.toString())); 42 | } 43 | 44 | @Override 45 | public void onPartitionsAssigned(Collection collection) { 46 | System.out.println(String.format( 47 | "After Rebalance, Assignment partitions is: %s; Current each partition's latest offset is: %s", 48 | collection.toString(), currentOffsets.toString())); 49 | } 50 | } 51 | 52 | private void consumer() { 53 | try { 54 | consumer.subscribe(Collections.singletonList(topic), new CustomHandleRebalance()); 55 | while (true) { 56 | ConsumerRecords records = consumer.poll(100); 57 | for (ConsumerRecord record : records) { 58 | // record current msg's offset 59 | currentOffsets.put(new TopicPartition(record.topic(), record.partition()), new OffsetAndMetadata( 60 | record.offset())); 61 | // processing msg 62 | System.out.println("Processing msg : " + record.toString()); 63 | } 64 | } 65 | } catch (Exception e) { 66 | System.out.println("Unexpected error: " + e.getMessage()); 67 | } finally { 68 | consumer.commitSync(currentOffsets); 69 | } 70 | } 71 | 72 | public static void main(String[] args) { 73 | ListenerRebalance action = new ListenerRebalance(); 74 | action.consumer(); 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /Kafka-Utils-Java/src/main/java/com/yhyr/producer/CommonProducer.java: -------------------------------------------------------------------------------- 1 | package com.yhyr.producer; 2 | 3 | import org.apache.kafka.clients.producer.KafkaProducer; 4 | import org.apache.kafka.clients.producer.Producer; 5 | import org.apache.kafka.clients.producer.ProducerRecord; 6 | 7 | import java.util.Properties; 8 | import java.util.stream.IntStream; 9 | 10 | /** 11 | * @author yhyr 12 | * @since 2018/12/30 18:02 13 | */ 14 | public class CommonProducer { 15 | public static void main(String[] args) { 16 | String topic = "topic_demo"; 17 | String brokerList = "localhost:9092"; 18 | 19 | Properties props = new Properties(); 20 | props.put("bootstrap.servers", brokerList); 21 | props.put("acks", "all"); 22 | props.put("retries", 0); 23 | props.put("batch.size", 16384); 24 | props.put("linger.ms", 1); 25 | props.put("buffer.memory", 33554432); 26 | props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer"); 27 | props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer"); 28 | 29 | Producer producer = new KafkaProducer<>(props); 30 | 31 | IntStream.range(0, 1000).>mapToObj( 32 | i -> new ProducerRecord<>(topic, "msg -> " + i)).forEach(producer::send); 33 | 34 | System.out.println("send message over"); 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /Kafka-Utils-Python/consumer/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | @Author YH YR 4 | @Time 2018/12/31 13:17 5 | """ 6 | -------------------------------------------------------------------------------- /Kafka-Utils-Python/consumer/commit_special_offset.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Subject: 修改给定Group、Topic下各Partition的offset; 不触发Group的Rebalance 4 | Tips: offset的修改必须是基于Partition而言的, 而非是Topic 5 | 6 | @Author YH YR 7 | @Time 2018/12/31 14:10 8 | """ 9 | from kafka import KafkaConsumer, TopicPartition, OffsetAndMetadata 10 | 11 | 12 | class CommitSpecialOffset: 13 | def __init__(self, broker_list, group_name, topic): 14 | self.topic = topic 15 | self.consumer = KafkaConsumer(group_id=group_name, bootstrap_servers=broker_list) 16 | 17 | def reset_offset(self, reset_offset_value): 18 | partitions_offset = {} 19 | for partition_id in self.consumer.partitions_for_topic(self.topic): 20 | partitions_offset[TopicPartition(topic, partition_id)] = OffsetAndMetadata(reset_offset_value, '') 21 | 22 | self.consumer.commit(partitions_offset) 23 | 24 | 25 | if __name__ == '__main__': 26 | broker_list = 'localhost:9092' 27 | group_name = 'group_test' 28 | topic = 'topic_demo' 29 | 30 | action = CommitSpecialOffset(broker_list, group_name, topic) 31 | action.reset_offset(500) 32 | -------------------------------------------------------------------------------- /Kafka-Utils-Python/consumer/commit_special_offset_trigger_rebalance.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Subject: 修改给定Group、Topic下各Partition的offset 4 | 通过触发Group的Rebalance, 实现在不停止原始服务的前提下改变原始服务的消费位置 5 | kafka-python的client.id前缀为'kafka-python-'; 根据Rebalance的机制, 6 | 可以通过特殊的client.id来指定分区的分配结果,从而实现offset的修改 7 | Tips: offset的修改必须是基于Partition而言的, 而非是Topic 8 | 9 | @Author YH YR 10 | @Time 2018/12/31 14:10 11 | """ 12 | from kafka import KafkaConsumer, TopicPartition, OffsetAndMetadata 13 | 14 | 15 | class CommitSpecialOffset: 16 | def __init__(self, broker_list, group_name, topic, client_id=None): 17 | self.topic = topic 18 | self.consumer = KafkaConsumer(group_id=group_name, bootstrap_servers=broker_list, client_id=client_id) 19 | 20 | def reset_offset(self, reset_offset_value): 21 | # Trigger Rebalance 22 | self.consumer.subscribe(self.topic) 23 | self.consumer.poll(0) 24 | # Reset Offset 25 | partitions_offset = {} 26 | for partition_id in self.consumer.partitions_for_topic(self.topic): 27 | partitions_offset[TopicPartition(topic, partition_id)] = OffsetAndMetadata(reset_offset_value, '') 28 | 29 | self.consumer.commit(partitions_offset) 30 | 31 | 32 | if __name__ == '__main__': 33 | broker_list = 'localhost:9092' 34 | group_name = 'group_test' 35 | topic = 'topic_demo' 36 | client_id = 'aaa' 37 | 38 | action = CommitSpecialOffset(broker_list, group_name, topic, client_id) 39 | action.reset_offset(10) 40 | -------------------------------------------------------------------------------- /Kafka-Utils-Python/consumer/common_consumer.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | @Author YH YR 4 | @Time 2018/12/31 13:35 5 | """ 6 | 7 | from kafka import KafkaConsumer 8 | 9 | 10 | class KafkaConsumerUtil: 11 | def __init__(self, broker_list, group_name, topic, enable_auto_commit=True, auto_offset_reset='latest'): 12 | self.broker_list = broker_list 13 | self.topic = topic 14 | self.group_name = group_name 15 | self.enable_auto_commit = enable_auto_commit 16 | self.auto_offset_reset = auto_offset_reset 17 | 18 | def consumer(self, process_msg): 19 | consumer = KafkaConsumer(*self.topic, group_id=self.group_name, bootstrap_servers=self.broker_list, 20 | enable_auto_commit=self.enable_auto_commit, auto_offset_reset=self.auto_offset_reset) 21 | for msg in consumer: 22 | process_msg(msg) 23 | 24 | consumer.highwater() 25 | 26 | 27 | def print_msg(msg_dic): 28 | print(msg_dic) 29 | 30 | 31 | if __name__ == '__main__': 32 | broker_list = 'localhost:9092' 33 | group_name = 'group_test' 34 | topic = ['topic_demo'] 35 | 36 | action = KafkaConsumerUtil(broker_list, group_name, topic) 37 | action.consumer(print_msg) 38 | -------------------------------------------------------------------------------- /Kafka-Utils-Python/consumer/consumer_special_offset.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Subject: 从指定Offset处开始消费消息 4 | Tips: 5 | Kafka默认会自动提交Offset信息(enable_auto_commit=True), 6 | 如果只是想基于特定Offset做数据嗅探, 而不改变broker端原始的offset信息, 可以指定enable_auto_commit=False 7 | 8 | 消息的消费是基于具体的Partition而言, 所以指定Offset的值实际是作用于具体的一个或多个Partition, 而非是Topic 9 | 10 | @Author YH YR 11 | @Time 2018/12/31 14:09 12 | """ 13 | 14 | from kafka import KafkaConsumer 15 | 16 | 17 | class ConsumerSpecialOffset: 18 | def __init__(self, broker_list, group_name, topic, enable_auto_commit=True, auto_offset_reset='latest'): 19 | self.broker_list = broker_list 20 | self.topic = topic 21 | self.group_name = group_name 22 | self.enable_auto_commit = enable_auto_commit 23 | self.auto_offset_reset = auto_offset_reset 24 | 25 | def consumer_from_offset(self, process_msg, offset): 26 | consumer = KafkaConsumer(group_id=self.group_name, bootstrap_servers=self.broker_list, 27 | enable_auto_commit=self.enable_auto_commit, auto_offset_reset=self.auto_offset_reset) 28 | consumer.subscribe(self.topic) 29 | consumer.poll(0) 30 | for topic_partition in consumer.assignment(): 31 | consumer.seek(topic_partition, offset) 32 | while True: 33 | consumer_records = consumer.poll(100) 34 | for partition_info, records in consumer_records.items(): 35 | for record in records: 36 | process_msg(record) 37 | 38 | 39 | def print_msg(msg_dic): 40 | print(msg_dic) 41 | 42 | 43 | if __name__ == '__main__': 44 | broker_list = 'localhost:9092' 45 | group_name = 'group_test' 46 | topic = ['topic_demo'] 47 | 48 | action = ConsumerSpecialOffset(broker_list, group_name, topic) 49 | action.consumer_from_offset(print_msg, 1000) 50 | -------------------------------------------------------------------------------- /Kafka-Utils-Python/consumer/consumer_timestamp_window.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Subject: 消费指定时间窗内所产生的消息 4 | 5 | @Author YH YR 6 | @Time 2019/01/23 21:23 7 | """ 8 | import time 9 | from kafka import KafkaConsumer, TopicPartition 10 | 11 | 12 | class ConsumerTimeStampWindow: 13 | def __init__(self, broker_list, group_name, topic, enable_auto_commit=True, auto_offset_reset='latest'): 14 | self.topic = topic 15 | self.consumer = KafkaConsumer(group_id=group_name, bootstrap_servers=broker_list, 16 | enable_auto_commit=enable_auto_commit, auto_offset_reset=auto_offset_reset) 17 | 18 | def consumer_from_offset_window(self, process_msg, begin_time, end_time): 19 | self.consumer.subscribe(self.topic) 20 | self.consumer.poll(0) 21 | 22 | begin_offset_dic, end_offset_dic = self.get_offset_time_window(begin_time, end_time) 23 | for topic_partition, offset_and_timestamp in begin_offset_dic.items(): 24 | self.consumer.seek(topic_partition, offset_and_timestamp[0]) 25 | 26 | topic_partition_info = self.consumer.assignment() 27 | partition_consumer_finish_flag = dict(zip(topic_partition_info, [False] * len(topic_partition_info))) 28 | 29 | while True: 30 | if False not in partition_consumer_finish_flag.values(): 31 | return 32 | consumer_records = self.consumer.poll(100) 33 | for partition_info, records in consumer_records.items(): 34 | if partition_consumer_finish_flag[partition_info]: 35 | print('-------------- {0} consumer finish --------------'.format(partition_info)) 36 | break 37 | for record in records: 38 | if record.offset <= end_offset_dic[partition_info][0]: 39 | process_msg(record) 40 | else: 41 | partition_consumer_finish_flag[partition_info] = True 42 | 43 | def get_offset_time_window(self, begin_time, end_time): 44 | partitions_structs = [] 45 | 46 | for partition_id in self.consumer.partitions_for_topic(self.topic): 47 | partitions_structs.append(TopicPartition(self.topic, partition_id)) 48 | 49 | begin_search = {} 50 | for partition in partitions_structs: 51 | begin_search[partition] = begin_time if isinstance(begin_time, int) else self.__str_to_timestamp(begin_time) 52 | begin_offset = self.consumer.offsets_for_times(begin_search) 53 | 54 | end_search = {} 55 | for partition in partitions_structs: 56 | end_search[partition] = end_time if isinstance(end_time, int) else self.__str_to_timestamp(end_time) 57 | end_offset = self.consumer.offsets_for_times(end_search) 58 | 59 | for topic_partition, offset_and_timestamp in begin_offset.items(): 60 | b_offset = 'null' if offset_and_timestamp is None else offset_and_timestamp[0] 61 | e_offset = 'null' if end_offset[topic_partition] is None else end_offset[topic_partition][0] 62 | print('Between {0} and {1}, {2} offset range = [{3}, {4}]'.format(begin_time, end_time, topic_partition, 63 | b_offset, e_offset)) 64 | return begin_offset, end_offset 65 | 66 | @staticmethod 67 | def __str_to_timestamp(str_time, format_type='%Y-%m-%d %H:%M:%S'): 68 | time_array = time.strptime(str_time, format_type) 69 | return int(time.mktime(time_array)) * 1000 70 | 71 | 72 | def print_msg(msg_dic): 73 | print(msg_dic) 74 | 75 | 76 | if __name__ == '__main__': 77 | broker_list = 'localhost:9092' 78 | group_name = 'group_test' 79 | topic = 'topic_demo' 80 | 81 | action = ConsumerTimeStampWindow(broker_list, group_name, topic) 82 | action.consumer_from_offset_window(print_msg, '2019-01-23 21:30:00', '2019-01-23 21:36:00') 83 | -------------------------------------------------------------------------------- /Kafka-Utils-Python/consumer/get_effective_offset.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Subject: 获取给定Topic各Partition的最大、最小offset值; 适用于Kafka 1.0 以后 4 | Tips: 本质上获取的是指定Topic的有效数据集的偏移量; 和是否被消费无关 5 | 6 | @Author YH YR 7 | @Time 2018/12/31 14:12 8 | """ 9 | from kafka import KafkaConsumer, TopicPartition 10 | 11 | 12 | class GetEffectiveOffset: 13 | def __init__(self, broker_list, group_name, topic): 14 | self.topic = topic 15 | self.consumer = KafkaConsumer(group_id=group_name, bootstrap_servers=broker_list) 16 | 17 | def get_offset(self): 18 | partitions_structs = [] 19 | 20 | for partition_id in self.consumer.partitions_for_topic(self.topic): 21 | partitions_structs.append(TopicPartition(self.topic, partition_id)) 22 | 23 | beginning_offset = self.consumer.beginning_offsets(partitions_structs) 24 | end_offset = self.consumer.end_offsets(partitions_structs) 25 | 26 | for partition, offset in beginning_offset.items(): 27 | print('{0} => beginning offset = {1}; end offset = {2}'.format(partition, offset, 28 | end_offset[partition])) 29 | 30 | 31 | if __name__ == '__main__': 32 | broker_list = 'localhost:9092' 33 | group_name = 'group_test' 34 | topic = 'topic_demo' 35 | 36 | action = GetEffectiveOffset(broker_list, group_name, topic) 37 | action.get_offset() 38 | -------------------------------------------------------------------------------- /Kafka-Utils-Python/consumer/get_offset_with_timestamp.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Subject: 查询broker在指定时间窗内写入消息的offset范围; 适用于Kafka 0.10.1.0 以后 4 | Tips: 5 | KafkaConsumer.offsetsForTimes 查找符合给定时间的第一条消息的offset; 如果不存在,则返回null 6 | 时间戳为毫秒级 7 | 8 | @Author YH YR 9 | @Time 2018/12/31 14:13 10 | """ 11 | import time 12 | from kafka import KafkaConsumer, TopicPartition 13 | 14 | 15 | class GetOffsetWithTimestamp: 16 | def __init__(self, broker_list, topic): 17 | self.topic = topic 18 | self.consumer = KafkaConsumer(bootstrap_servers=broker_list) 19 | 20 | def get_offset_time_window(self, begin_time, end_time): 21 | partitions_structs = [] 22 | 23 | for partition_id in self.consumer.partitions_for_topic(self.topic): 24 | partitions_structs.append(TopicPartition(self.topic, partition_id)) 25 | 26 | begin_search = {} 27 | for partition in partitions_structs: 28 | begin_search[partition] = begin_time if isinstance(begin_time, int) else self.__str_to_timestamp(begin_time) 29 | begin_offset = self.consumer.offsets_for_times(begin_search) 30 | 31 | end_search = {} 32 | for partition in partitions_structs: 33 | end_search[partition] = end_time if isinstance(end_time, int) else self.__str_to_timestamp(end_time) 34 | end_offset = self.consumer.offsets_for_times(end_search) 35 | 36 | for topic_partition, offset_and_timestamp in begin_offset.items(): 37 | b_offset = 'null' if offset_and_timestamp is None else offset_and_timestamp[0] 38 | e_offset = 'null' if end_offset[topic_partition] is None else end_offset[topic_partition][0] 39 | print('Between {0} and {1}, {2} offset range = [{3}, {4}]'.format(begin_time, end_time, topic_partition, 40 | b_offset, e_offset)) 41 | 42 | @staticmethod 43 | def __str_to_timestamp(str_time, format_type='%Y-%m-%d %H:%M:%S'): 44 | time_array = time.strptime(str_time, format_type) 45 | return int(time.mktime(time_array)) * 1000 46 | 47 | 48 | if __name__ == '__main__': 49 | broker_list = 'localhost:9092' 50 | topic = 'topic_demo' 51 | 52 | action = GetOffsetWithTimestamp(broker_list, topic) 53 | action.get_offset_time_window('2018-12-30 17:00:00', '2018-12-30 20:00:00') 54 | -------------------------------------------------------------------------------- /Kafka-Utils-Python/consumer/listener_rebalance.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Subject: 在客户端监听Group的Rebalance行为 4 | Tips: 5 | 可以通过自定义Consumer Client_id 来实现Rebalance后Partition的分配结果 6 | Python Consumer 默认的 Client_id 命名规则为: kafka-python-{version} 7 | 8 | 9 | @Author YH YR 10 | @Time 2018/12/31 14:13 11 | """ 12 | import time 13 | from kafka import KafkaConsumer, ConsumerRebalanceListener 14 | 15 | 16 | class CustomHandleRebalance(ConsumerRebalanceListener): 17 | def on_partitions_revoked(self, revoked): 18 | print('Before Rebalance, Assignment partitions is: {0}'.format(revoked)) 19 | 20 | def on_partitions_assigned(self, assigned): 21 | print('After Rebalance, Assignment partition is: {0}'.format(assigned)) 22 | 23 | 24 | class ListenerRebalance: 25 | def __init__(self, broker_list, group_name, topic, enable_auto_commit=True, auto_offset_reset='latest', 26 | client_id=None): 27 | self.broker_list = broker_list 28 | self.topic = topic 29 | self.group_name = group_name 30 | self.enable_auto_commit = enable_auto_commit 31 | self.auto_offset_reset = auto_offset_reset 32 | self.client_id = client_id 33 | 34 | def consumer(self, process_msg): 35 | consumer = KafkaConsumer(group_id=self.group_name, bootstrap_servers=self.broker_list, client_id=self.client_id, 36 | enable_auto_commit=self.enable_auto_commit, auto_offset_reset=self.auto_offset_reset) 37 | 38 | consumer.subscribe(self.topic, listener=CustomHandleRebalance()) 39 | 40 | while True: 41 | consumer_records = consumer.poll(100, max_records=1) 42 | for partition_info, records in consumer_records.items(): 43 | for record in records: 44 | process_msg(record) 45 | time.sleep(2) 46 | 47 | 48 | def print_msg(msg_dic): 49 | print(msg_dic) 50 | 51 | 52 | if __name__ == '__main__': 53 | broker_list = 'localhost:9092' 54 | group_name = 'group_test' 55 | topic = ['topic_demo'] 56 | 57 | action = ListenerRebalance(broker_list, group_name, topic) 58 | action.consumer(print_msg) 59 | -------------------------------------------------------------------------------- /Kafka-Utils-Python/producer/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | @Author YH YR 4 | @Time 2018/12/31 13:17 5 | """ 6 | -------------------------------------------------------------------------------- /Kafka-Utils-Python/producer/common_producer.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | @Author YH YR 4 | @Time 2018/12/31 13:21 5 | """ 6 | 7 | from kafka import KafkaProducer 8 | 9 | 10 | class CommonKafkaProducer: 11 | def __init__(self, broker_list): 12 | self.producer = KafkaProducer(bootstrap_servers=broker_list) 13 | 14 | def produce(self, topic, msg): 15 | self.producer.send(topic, bytes(msg, encoding='utf8')) 16 | 17 | def __del__(self): 18 | self.producer.close() 19 | 20 | 21 | if __name__ == '__main__': 22 | broker_list = 'localhost:9092' 23 | topic = 'topic_demo' 24 | 25 | action = CommonKafkaProducer(broker_list) 26 | for i in range(1000): 27 | action.produce(topic, 'msg -> {0}'.format(i)) 28 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Kafka-Utils 2 | 3 | ## 环境依赖 4 | 5 | > Kafka 1.0.1 6 | > 7 | > JDK 1.8 8 | > 9 | > kafka-clients 1.0.1 10 | > 11 | > Python 3.6.5 12 | > 13 | > kafka-python 1.4.3 14 | 15 | 基于Java和Python实现Kafka集中常见的工具类;会不定期的作以补充和完善。 16 | 17 | 目前包含 18 | 19 | + 指定消费的起始Offset 20 | + 不停服修改Offset 21 | + 获取Partition的有效Offset范围 22 | + 根据时间戳筛选Offset 23 | + Rebalance监控 24 | + 消费__consumer_offsets中的消息 25 | + 消费指定时间窗内产生的所有消息 --------------------------------------------------------------------------------