├── .covrc ├── .gitignore ├── .gitmodules ├── .travis.yml ├── AUTHORS.md ├── CHANGES.md ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.rst ├── benchmarks ├── README ├── consumer_performance.py ├── load_example.py ├── producer_performance.py ├── record_batch_compose.py ├── record_batch_read.py └── varint_speed.py ├── build_integration.sh ├── docs ├── Makefile ├── apidoc │ ├── BrokerConnection.rst │ ├── ClusterMetadata.rst │ ├── KafkaAdmin.rst │ ├── KafkaClient.rst │ ├── KafkaConsumer.rst │ ├── KafkaProducer.rst │ ├── SimpleProducer.rst │ ├── kafka.consumer.rst │ ├── kafka.coordinator.assignors.rst │ ├── kafka.coordinator.rst │ ├── kafka.partitioner.rst │ ├── kafka.producer.rst │ ├── kafka.protocol.rst │ ├── kafka.rst │ └── modules.rst ├── changelog.rst ├── compatibility.rst ├── conf.py ├── index.rst ├── install.rst ├── license.rst ├── make.bat ├── requirements.txt ├── simple.rst ├── support.rst ├── tests.rst └── usage.rst ├── example.py ├── kafka ├── __init__.py ├── admin │ ├── __init__.py │ ├── config_resource.py │ ├── kafka.py │ ├── new_partitions.py │ └── new_topic.py ├── client.py ├── client_async.py ├── cluster.py ├── codec.py ├── common.py ├── conn.py ├── consumer │ ├── __init__.py │ ├── base.py │ ├── fetcher.py │ ├── group.py │ ├── multiprocess.py │ ├── simple.py │ └── subscription_state.py ├── context.py ├── coordinator │ ├── __init__.py │ ├── assignors │ │ ├── __init__.py │ │ ├── abstract.py │ │ ├── range.py │ │ └── roundrobin.py │ ├── base.py │ ├── consumer.py │ ├── heartbeat.py │ └── protocol.py ├── errors.py ├── future.py ├── metrics │ ├── __init__.py │ ├── compound_stat.py │ ├── dict_reporter.py │ ├── kafka_metric.py │ ├── measurable.py │ ├── measurable_stat.py │ ├── metric_config.py │ ├── metric_name.py │ ├── metrics.py │ ├── metrics_reporter.py │ ├── quota.py │ ├── stat.py │ └── stats │ │ ├── __init__.py │ │ ├── avg.py │ │ ├── count.py │ │ ├── histogram.py │ │ ├── max_stat.py │ │ ├── min_stat.py │ │ ├── percentile.py │ │ ├── percentiles.py │ │ ├── rate.py │ │ ├── sampled_stat.py │ │ ├── sensor.py │ │ └── total.py ├── partitioner │ ├── __init__.py │ ├── base.py │ ├── default.py │ ├── hashed.py │ └── roundrobin.py ├── producer │ ├── __init__.py │ ├── base.py │ ├── buffer.py │ ├── future.py │ ├── kafka.py │ ├── keyed.py │ ├── record_accumulator.py │ ├── sender.py │ └── simple.py ├── protocol │ ├── __init__.py │ ├── abstract.py │ ├── admin.py │ ├── api.py │ ├── commit.py │ ├── fetch.py │ ├── frame.py │ ├── group.py │ ├── legacy.py │ ├── message.py │ ├── metadata.py │ ├── offset.py │ ├── parser.py │ ├── pickle.py │ ├── produce.py │ ├── struct.py │ └── types.py ├── record │ ├── README │ ├── __init__.py │ ├── _crc32c.py │ ├── abc.py │ ├── default_records.py │ ├── legacy_records.py │ ├── memory_records.py │ └── util.py ├── serializer │ ├── __init__.py │ └── abstract.py ├── structs.py ├── util.py ├── vendor │ ├── __init__.py │ ├── enum34.py │ ├── selectors34.py │ ├── six.py │ └── socketpair.py └── version.py ├── pylint.rc ├── requirements-dev.txt ├── servers ├── 0.10.0.0 │ └── resources │ │ ├── kafka.properties │ │ ├── log4j.properties │ │ └── zookeeper.properties ├── 0.10.0.1 │ └── resources │ │ ├── kafka.properties │ │ ├── log4j.properties │ │ └── zookeeper.properties ├── 0.10.1.1 │ └── resources │ │ ├── kafka.properties │ │ ├── log4j.properties │ │ └── zookeeper.properties ├── 0.10.2.1 │ └── resources │ │ ├── kafka.properties │ │ ├── log4j.properties │ │ └── zookeeper.properties ├── 0.11.0.0 │ └── resources │ │ ├── kafka.properties │ │ ├── log4j.properties │ │ └── zookeeper.properties ├── 0.11.0.1 │ └── resources │ │ ├── kafka.properties │ │ ├── log4j.properties │ │ └── zookeeper.properties ├── 0.11.0.2 │ └── resources │ │ ├── kafka.properties │ │ ├── log4j.properties │ │ └── zookeeper.properties ├── 0.8.0 │ └── resources │ │ ├── kafka.properties │ │ ├── log4j.properties │ │ └── zookeeper.properties ├── 0.8.1.1 │ └── resources │ │ ├── kafka.properties │ │ ├── log4j.properties │ │ └── zookeeper.properties ├── 0.8.1 │ └── resources │ │ ├── kafka.properties │ │ ├── log4j.properties │ │ └── zookeeper.properties ├── 0.8.2.0 │ └── resources │ │ ├── kafka.properties │ │ ├── log4j.properties │ │ └── zookeeper.properties ├── 0.8.2.1 │ └── resources │ │ ├── kafka.properties │ │ ├── log4j.properties │ │ └── zookeeper.properties ├── 0.8.2.2 │ └── resources │ │ ├── kafka.properties │ │ ├── log4j.properties │ │ └── zookeeper.properties ├── 0.9.0.0 │ └── resources │ │ ├── kafka.properties │ │ ├── log4j.properties │ │ └── zookeeper.properties ├── 0.9.0.1 │ └── resources │ │ ├── kafka.properties │ │ ├── log4j.properties │ │ └── zookeeper.properties ├── 1.0.0 │ └── resources │ │ ├── kafka.properties │ │ ├── log4j.properties │ │ └── zookeeper.properties ├── 1.0.1 │ └── resources │ │ ├── kafka.properties │ │ ├── log4j.properties │ │ └── zookeeper.properties └── trunk │ └── resources │ ├── kafka.properties │ ├── log4j.properties │ └── zookeeper.properties ├── setup.cfg ├── setup.py ├── test ├── __init__.py ├── conftest.py ├── fixtures.py ├── record │ ├── test_default_records.py │ ├── test_legacy_records.py │ ├── test_records.py │ └── test_util.py ├── service.py ├── test_admin.py ├── test_assignors.py ├── test_client.py ├── test_client_async.py ├── test_client_integration.py ├── test_cluster.py ├── test_codec.py ├── test_conn.py ├── test_consumer.py ├── test_consumer_group.py ├── test_consumer_integration.py ├── test_context.py ├── test_coordinator.py ├── test_failover_integration.py ├── test_fetcher.py ├── test_metrics.py ├── test_package.py ├── test_partitioner.py ├── test_producer.py ├── test_producer_integration.py ├── test_producer_legacy.py ├── test_protocol.py ├── test_protocol_legacy.py ├── test_sender.py ├── test_subscription_state.py ├── test_util.py └── testutil.py └── tox.ini /.covrc: -------------------------------------------------------------------------------- 1 | [run] 2 | omit = 3 | kafka/vendor/* 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.egg-info 2 | *.pyc 3 | .tox 4 | build 5 | dist 6 | MANIFEST 7 | env 8 | servers/*/kafka-bin* 9 | servers/*/resources/ssl* 10 | .coverage* 11 | .noseids 12 | docs/_build 13 | .cache* 14 | .idea/ 15 | integration-test/ 16 | tests-env/ 17 | .pytest_cache/ 18 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robinhood/kafka-python/3689da3d5c02e362d872cf1fb2d65201419c4b93/.gitmodules -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | python: 4 | - 2.7 5 | - 3.4 6 | - 3.5 7 | - 3.6 8 | - pypy 9 | 10 | env: 11 | - KAFKA_VERSION=0.8.2.2 12 | - KAFKA_VERSION=0.9.0.1 13 | - KAFKA_VERSION=0.10.2.1 14 | - KAFKA_VERSION=0.11.0.2 15 | - KAFKA_VERSION=1.0.1 16 | 17 | sudo: false 18 | 19 | addons: 20 | apt: 21 | packages: 22 | - libsnappy-dev 23 | 24 | cache: 25 | directories: 26 | - $HOME/.cache/pip 27 | - servers/ 28 | 29 | before_install: 30 | - ./build_integration.sh 31 | 32 | install: 33 | - pip install tox coveralls 34 | - pip install . 35 | 36 | script: 37 | - tox -e `if [ "$TRAVIS_PYTHON_VERSION" == "pypy" ]; then echo pypy; else echo py${TRAVIS_PYTHON_VERSION/./}; fi` 38 | 39 | after_success: 40 | - coveralls 41 | -------------------------------------------------------------------------------- /AUTHORS.md: -------------------------------------------------------------------------------- 1 | # Current Maintainer 2 | * Dana Powers, [@dpkp](https://github.com/dpkp) 3 | 4 | # Original Author and First Commit 5 | * David Arthur, [@mumrah](https://github.com/mumrah) 6 | 7 | # Contributors - 2015 (alpha by username) 8 | * Alex Couture-Beil, [@alexcb](https://github.com/alexcb) 9 | * Ali-Akber Saifee, [@alisaifee](https://github.com/alisaifee) 10 | * Christophe-Marie Duquesne, [@chmduquesne](https://github.com/chmduquesne) 11 | * Thomas Dimson, [@cosbynator](https://github.com/cosbynator) 12 | * Kasper Jacobsen, [@Dinoshauer](https://github.com/Dinoshauer) 13 | * Ross Duggan, [@duggan](https://github.com/duggan) 14 | * Enrico Canzonieri, [@ecanzonieri](https://github.com/ecanzonieri) 15 | * haosdent, [@haosdent](https://github.com/haosdent) 16 | * Arturo Filastò, [@hellais](https://github.com/hellais) 17 | * Job Evers‐Meltzer, [@jobevers](https://github.com/jobevers) 18 | * Martin Olveyra, [@kalessin](https://github.com/kalessin) 19 | * Kubilay Kocak, [@koobs](https://github.com/koobs) 20 | * Matthew L Daniel 21 | * Eric Hewitt, [@meandthewallaby](https://github.com/meandthewallaby) 22 | * Oliver Jowett [@mutability](https://github.com/mutability) 23 | * Shaolei Zhou, [@reAsOn2010](https://github.com/reAsOn2010) 24 | * Oskari Saarenmaa, [@saaros](https://github.com/saaros) 25 | * John Anderson, [@sontek](https://github.com/sontek) 26 | * Eduard Iskandarov, [@toidi](https://github.com/toidi) 27 | * Todd Palino, [@toddpalino](https://github.com/toddpalino) 28 | * trbs, [@trbs](https://github.com/trbs) 29 | * Viktor Shlapakov, [@vshlapakov](https://github.com/vshlapakov) 30 | * Will Daly, [@wedaly](https://github.com/wedaly) 31 | * Warren Kiser, [@wkiser](https://github.com/wkiser) 32 | * William Ting, [@wting](https://github.com/wting) 33 | * Zack Dever, [@zackdever](https://github.com/zackdever) 34 | 35 | # More Contributors 36 | * Bruno Renié, [@brutasse](https://github.com/brutasse) 37 | * Thomas Dimson, [@cosbynator](https://github.com/cosbynator) 38 | * Jesse Myers, [@jessemyers](https://github.com/jessemyers) 39 | * Mahendra M, [@mahendra](https://github.com/mahendra) 40 | * Miguel Eduardo Gil Biraud, [@mgilbir](https://github.com/mgilbir) 41 | * Marc Labbé, [@mrtheb](https://github.com/mrtheb) 42 | * Patrick Lucas, [@patricklucas](https://github.com/patricklucas) 43 | * Omar Ghishan, [@rdiomar](https://github.com/rdiomar) - RIP, Omar. 2014 44 | * Ivan Pouzyrevsky, [@sandello](https://github.com/sandello) 45 | * Lou Marvin Caraig, [@se7entyse7en](https://github.com/se7entyse7en) 46 | * waliaashish85, [@waliaashish85](https://github.com/waliaashish85) 47 | * Mark Roberts, [@wizzat](https://github.com/wizzat) 48 | * Christophe Lecointe [@christophelec](https://github.com/christophelec) 49 | * Mohamed Helmi Hichri [@hellich](https://github.com/hellich) 50 | 51 | Thanks to all who have contributed! 52 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include kafka *.py 2 | include README.rst 3 | include LICENSE 4 | include AUTHORS.md 5 | include CHANGES.md 6 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Some simple testing tasks (sorry, UNIX only). 2 | 3 | FLAGS= 4 | KAFKA_VERSION=0.11.0.2 5 | SCALA_VERSION=2.12 6 | 7 | setup: 8 | pip install -r requirements-dev.txt 9 | pip install -Ue . 10 | 11 | servers/$(KAFKA_VERSION)/kafka-bin: 12 | KAFKA_VERSION=$(KAFKA_VERSION) SCALA_VERSION=$(SCALA_VERSION) ./build_integration.sh 13 | 14 | build-integration: servers/$(KAFKA_VERSION)/kafka-bin 15 | 16 | # Test and produce coverage using tox. This is the same as is run on Travis 17 | test36: build-integration 18 | KAFKA_VERSION=$(KAFKA_VERSION) SCALA_VERSION=$(SCALA_VERSION) tox -e py36 -- $(FLAGS) 19 | 20 | test27: build-integration 21 | KAFKA_VERSION=$(KAFKA_VERSION) SCALA_VERSION=$(SCALA_VERSION) tox -e py27 -- $(FLAGS) 22 | 23 | # Test using py.test directly if you want to use local python. Useful for other 24 | # platforms that require manual installation for C libraries, ie. Windows. 25 | test-local: build-integration 26 | KAFKA_VERSION=$(KAFKA_VERSION) SCALA_VERSION=$(SCALA_VERSION) py.test \ 27 | --pylint --pylint-rcfile=pylint.rc --pylint-error-types=EF $(FLAGS) kafka test 28 | 29 | cov-local: build-integration 30 | KAFKA_VERSION=$(KAFKA_VERSION) SCALA_VERSION=$(SCALA_VERSION) py.test \ 31 | --pylint --pylint-rcfile=pylint.rc --pylint-error-types=EF --cov=kafka \ 32 | --cov-config=.covrc --cov-report html $(FLAGS) kafka test 33 | @echo "open file://`pwd`/htmlcov/index.html" 34 | 35 | # Check the readme for syntax errors, which can lead to invalid formatting on 36 | # PyPi homepage (https://pypi.python.org/pypi/kafka-python) 37 | check-readme: 38 | python setup.py check -rms 39 | 40 | clean: 41 | rm -rf `find . -name __pycache__` 42 | rm -f `find . -type f -name '*.py[co]' ` 43 | rm -f `find . -type f -name '*~' ` 44 | rm -f `find . -type f -name '.*~' ` 45 | rm -f `find . -type f -name '@*' ` 46 | rm -f `find . -type f -name '#*#' ` 47 | rm -f `find . -type f -name '*.orig' ` 48 | rm -f `find . -type f -name '*.rej' ` 49 | rm -f .coverage 50 | rm -rf htmlcov 51 | rm -rf docs/_build/ 52 | rm -rf cover 53 | rm -rf dist 54 | 55 | doc: 56 | make -C docs html 57 | @echo "open file://`pwd`/docs/_build/html/index.html" 58 | 59 | .PHONY: all test36 test27 test-local cov-local clean doc 60 | -------------------------------------------------------------------------------- /benchmarks/README: -------------------------------------------------------------------------------- 1 | The `record_batch_*` benchmarks in this section are written using 2 | ``perf`` library, created by Viktor Stinner. For more information on how to get 3 | reliable results of test runs please consult 4 | https://perf.readthedocs.io/en/latest/run_benchmark.html. 5 | -------------------------------------------------------------------------------- /benchmarks/load_example.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import threading, logging, time 3 | 4 | from kafka import KafkaConsumer, KafkaProducer 5 | 6 | msg_size = 524288 7 | 8 | producer_stop = threading.Event() 9 | consumer_stop = threading.Event() 10 | 11 | class Producer(threading.Thread): 12 | big_msg = b'1' * msg_size 13 | 14 | def run(self): 15 | producer = KafkaProducer(bootstrap_servers='localhost:9092') 16 | self.sent = 0 17 | 18 | while not producer_stop.is_set(): 19 | producer.send('my-topic', self.big_msg) 20 | self.sent += 1 21 | producer.flush() 22 | 23 | 24 | class Consumer(threading.Thread): 25 | 26 | def run(self): 27 | consumer = KafkaConsumer(bootstrap_servers='localhost:9092', 28 | auto_offset_reset='earliest') 29 | consumer.subscribe(['my-topic']) 30 | self.valid = 0 31 | self.invalid = 0 32 | 33 | for message in consumer: 34 | if len(message.value) == msg_size: 35 | self.valid += 1 36 | else: 37 | self.invalid += 1 38 | 39 | if consumer_stop.is_set(): 40 | break 41 | 42 | consumer.close() 43 | 44 | def main(): 45 | threads = [ 46 | Producer(), 47 | Consumer() 48 | ] 49 | 50 | for t in threads: 51 | t.start() 52 | 53 | time.sleep(10) 54 | producer_stop.set() 55 | consumer_stop.set() 56 | print 'Messages sent: %d' % threads[0].sent 57 | print 'Messages recvd: %d' % threads[1].valid 58 | print 'Messages invalid: %d' % threads[1].invalid 59 | 60 | if __name__ == "__main__": 61 | logging.basicConfig( 62 | format='%(asctime)s.%(msecs)s:%(name)s:%(thread)d:%(levelname)s:%(process)d:%(message)s', 63 | level=logging.INFO 64 | ) 65 | main() 66 | -------------------------------------------------------------------------------- /benchmarks/record_batch_compose.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from __future__ import print_function 3 | import hashlib 4 | import itertools 5 | import os 6 | import random 7 | 8 | import perf 9 | 10 | from kafka.record.memory_records import MemoryRecordsBuilder 11 | 12 | 13 | DEFAULT_BATCH_SIZE = 1600 * 1024 14 | KEY_SIZE = 6 15 | VALUE_SIZE = 60 16 | TIMESTAMP_RANGE = [1505824130000, 1505824140000] 17 | 18 | # With values above v1 record is 100 bytes, so 10 000 bytes for 100 messages 19 | MESSAGES_PER_BATCH = 100 20 | 21 | 22 | def random_bytes(length): 23 | buffer = bytearray(length) 24 | for i in range(length): 25 | buffer[i] = random.randint(0, 255) 26 | return bytes(buffer) 27 | 28 | 29 | def prepare(): 30 | return iter(itertools.cycle([ 31 | (random_bytes(KEY_SIZE), 32 | random_bytes(VALUE_SIZE), 33 | random.randint(*TIMESTAMP_RANGE) 34 | ) 35 | for _ in range(int(MESSAGES_PER_BATCH * 1.94)) 36 | ])) 37 | 38 | 39 | def finalize(results): 40 | # Just some strange code to make sure PyPy does execute the main code 41 | # properly, without optimizing it away 42 | hash_val = hashlib.md5() 43 | for buf in results: 44 | hash_val.update(buf) 45 | print(hash_val, file=open(os.devnull, "w")) 46 | 47 | 48 | def func(loops, magic): 49 | # Jit can optimize out the whole function if the result is the same each 50 | # time, so we need some randomized input data ) 51 | precomputed_samples = prepare() 52 | results = [] 53 | 54 | # Main benchmark code. 55 | t0 = perf.perf_counter() 56 | for _ in range(loops): 57 | batch = MemoryRecordsBuilder( 58 | magic, batch_size=DEFAULT_BATCH_SIZE, compression_type=0) 59 | for _ in range(MESSAGES_PER_BATCH): 60 | key, value, timestamp = next(precomputed_samples) 61 | size = batch.append( 62 | timestamp=timestamp, key=key, value=value) 63 | assert size 64 | batch.close() 65 | results.append(batch.buffer()) 66 | 67 | res = perf.perf_counter() - t0 68 | 69 | finalize(results) 70 | 71 | return res 72 | 73 | 74 | runner = perf.Runner() 75 | runner.bench_time_func('batch_append_v0', func, 0) 76 | runner.bench_time_func('batch_append_v1', func, 1) 77 | runner.bench_time_func('batch_append_v2', func, 2) 78 | -------------------------------------------------------------------------------- /benchmarks/record_batch_read.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import print_function 3 | import hashlib 4 | import itertools 5 | import os 6 | import random 7 | 8 | import perf 9 | 10 | from kafka.record.memory_records import MemoryRecords, MemoryRecordsBuilder 11 | 12 | 13 | DEFAULT_BATCH_SIZE = 1600 * 1024 14 | KEY_SIZE = 6 15 | VALUE_SIZE = 60 16 | TIMESTAMP_RANGE = [1505824130000, 1505824140000] 17 | 18 | BATCH_SAMPLES = 5 19 | MESSAGES_PER_BATCH = 100 20 | 21 | 22 | def random_bytes(length): 23 | buffer = bytearray(length) 24 | for i in range(length): 25 | buffer[i] = random.randint(0, 255) 26 | return bytes(buffer) 27 | 28 | 29 | def prepare(magic): 30 | samples = [] 31 | for _ in range(BATCH_SAMPLES): 32 | batch = MemoryRecordsBuilder( 33 | magic, batch_size=DEFAULT_BATCH_SIZE, compression_type=0) 34 | for _ in range(MESSAGES_PER_BATCH): 35 | size = batch.append( 36 | random.randint(*TIMESTAMP_RANGE), 37 | random_bytes(KEY_SIZE), 38 | random_bytes(VALUE_SIZE), 39 | headers=[]) 40 | assert size 41 | batch.close() 42 | samples.append(bytes(batch.buffer())) 43 | 44 | return iter(itertools.cycle(samples)) 45 | 46 | 47 | def finalize(results): 48 | # Just some strange code to make sure PyPy does execute the code above 49 | # properly 50 | hash_val = hashlib.md5() 51 | for buf in results: 52 | hash_val.update(buf) 53 | print(hash_val, file=open(os.devnull, "w")) 54 | 55 | 56 | def func(loops, magic): 57 | # Jit can optimize out the whole function if the result is the same each 58 | # time, so we need some randomized input data ) 59 | precomputed_samples = prepare(magic) 60 | results = [] 61 | 62 | # Main benchmark code. 63 | batch_data = next(precomputed_samples) 64 | t0 = perf.perf_counter() 65 | for _ in range(loops): 66 | records = MemoryRecords(batch_data) 67 | while records.has_next(): 68 | batch = records.next_batch() 69 | batch.validate_crc() 70 | for record in batch: 71 | results.append(record.value) 72 | 73 | res = perf.perf_counter() - t0 74 | finalize(results) 75 | 76 | return res 77 | 78 | 79 | runner = perf.Runner() 80 | runner.bench_time_func('batch_read_v0', func, 0) 81 | runner.bench_time_func('batch_read_v1', func, 1) 82 | runner.bench_time_func('batch_read_v2', func, 2) 83 | -------------------------------------------------------------------------------- /build_integration.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | : ${ALL_RELEASES:="0.8.2.2 0.9.0.1 0.10.1.1 0.10.2.1 0.11.0.2 1.0.1"} 4 | : ${SCALA_VERSION:=2.11} 5 | : ${DIST_BASE_URL:=https://archive.apache.org/dist/kafka/} 6 | : ${KAFKA_SRC_GIT:=https://github.com/apache/kafka.git} 7 | 8 | # On travis CI, empty KAFKA_VERSION means skip integration tests 9 | # so we don't try to get binaries 10 | # Otherwise it means test all official releases, so we get all of them! 11 | if [ -z "$KAFKA_VERSION" -a -z "$TRAVIS" ]; then 12 | KAFKA_VERSION=$ALL_RELEASES 13 | fi 14 | 15 | pushd servers 16 | mkdir -p dist 17 | pushd dist 18 | for kafka in $KAFKA_VERSION; do 19 | if [ "$kafka" == "trunk" ]; then 20 | if [ ! -d "$kafka" ]; then 21 | git clone $KAFKA_SRC_GIT $kafka 22 | fi 23 | pushd $kafka 24 | git pull 25 | ./gradlew -PscalaVersion=$SCALA_VERSION -Pversion=$kafka releaseTarGz -x signArchives 26 | popd 27 | # Not sure how to construct the .tgz name accurately, so use a wildcard (ugh) 28 | tar xzvf $kafka/core/build/distributions/kafka_*.tgz -C ../$kafka/ 29 | rm $kafka/core/build/distributions/kafka_*.tgz 30 | rm -rf ../$kafka/kafka-bin 31 | mv ../$kafka/kafka_* ../$kafka/kafka-bin 32 | else 33 | echo "-------------------------------------" 34 | echo "Checking kafka binaries for ${kafka}" 35 | echo 36 | # kafka 0.8.0 is only available w/ scala 2.8.0 37 | if [ "$kafka" == "0.8.0" ]; then 38 | KAFKA_ARTIFACT="kafka_2.8.0-${kafka}.tar.gz" 39 | else 40 | KAFKA_ARTIFACT="kafka_${SCALA_VERSION}-${kafka}.tgz" 41 | fi 42 | if [ ! -f "../$kafka/kafka-bin/bin/kafka-run-class.sh" ]; then 43 | if [ -f "${KAFKA_ARTIFACT}" ]; then 44 | echo "Using cached artifact: ${KAFKA_ARTIFACT}" 45 | else 46 | echo "Downloading kafka ${kafka} tarball" 47 | TARBALL=${DIST_BASE_URL}${kafka}/${KAFKA_ARTIFACT} 48 | if command -v wget 2>/dev/null; then 49 | wget -N $TARBALL 50 | else 51 | echo "wget not found... using curl" 52 | curl -f $TARBALL -o ${KAFKA_ARTIFACT} 53 | fi 54 | fi 55 | echo 56 | echo "Extracting kafka ${kafka} binaries" 57 | tar xzvf ${KAFKA_ARTIFACT} -C ../$kafka/ 58 | rm -rf ../$kafka/kafka-bin 59 | mv ../$kafka/${KAFKA_ARTIFACT/%.t*/} ../$kafka/kafka-bin 60 | if [ ! -f "../$kafka/kafka-bin/bin/kafka-run-class.sh" ]; then 61 | echo "Extraction Failed ($kafka/kafka-bin/bin/kafka-run-class.sh does not exist)!" 62 | exit 1 63 | fi 64 | else 65 | echo "$kafka is already installed in servers/$kafka/ -- skipping" 66 | fi 67 | fi 68 | echo 69 | done 70 | popd 71 | popd 72 | -------------------------------------------------------------------------------- /docs/apidoc/BrokerConnection.rst: -------------------------------------------------------------------------------- 1 | BrokerConnection 2 | ================ 3 | 4 | .. autoclass:: kafka.BrokerConnection 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/apidoc/ClusterMetadata.rst: -------------------------------------------------------------------------------- 1 | ClusterMetadata 2 | =========== 3 | 4 | .. autoclass:: kafka.cluster.ClusterMetadata 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/apidoc/KafkaAdmin.rst: -------------------------------------------------------------------------------- 1 | KafkaAdmin 2 | =========== 3 | 4 | .. autoclass:: kafka.admin.KafkaAdmin 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/apidoc/KafkaClient.rst: -------------------------------------------------------------------------------- 1 | KafkaClient 2 | =========== 3 | 4 | .. autoclass:: kafka.client.KafkaClient 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/apidoc/KafkaConsumer.rst: -------------------------------------------------------------------------------- 1 | KafkaConsumer 2 | ============= 3 | 4 | .. autoclass:: kafka.KafkaConsumer 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/apidoc/KafkaProducer.rst: -------------------------------------------------------------------------------- 1 | KafkaProducer 2 | ============= 3 | 4 | .. autoclass:: kafka.KafkaProducer 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/apidoc/SimpleProducer.rst: -------------------------------------------------------------------------------- 1 | SimpleProducer 2 | ============== 3 | 4 | .. autoclass:: kafka.producer.SimpleProducer 5 | :members: 6 | :show-inheritance: 7 | 8 | .. autoclass:: kafka.producer.KeyedProducer 9 | :members: 10 | :show-inheritance: 11 | 12 | .. automodule:: kafka.producer.base 13 | :members: 14 | :show-inheritance: 15 | -------------------------------------------------------------------------------- /docs/apidoc/kafka.consumer.rst: -------------------------------------------------------------------------------- 1 | kafka.consumer package 2 | ====================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | kafka.consumer.base module 8 | -------------------------- 9 | 10 | .. automodule:: kafka.consumer.base 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | kafka.consumer.kafka module 16 | --------------------------- 17 | 18 | .. automodule:: kafka.consumer.kafka 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | kafka.consumer.multiprocess module 24 | ---------------------------------- 25 | 26 | .. automodule:: kafka.consumer.multiprocess 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | kafka.consumer.simple module 32 | ---------------------------- 33 | 34 | .. automodule:: kafka.consumer.simple 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | 40 | Module contents 41 | --------------- 42 | 43 | .. automodule:: kafka.consumer 44 | :members: 45 | :undoc-members: 46 | :show-inheritance: 47 | -------------------------------------------------------------------------------- /docs/apidoc/kafka.coordinator.assignors.rst: -------------------------------------------------------------------------------- 1 | kafka.coordinator.assignors package 2 | =================================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | kafka.coordinator.assignors.abstract module 8 | ------------------------------------------- 9 | 10 | .. automodule:: kafka.coordinator.assignors.abstract 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | kafka.coordinator.assignors.roundrobin module 16 | --------------------------------------------- 17 | 18 | .. automodule:: kafka.coordinator.assignors.roundrobin 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | 24 | Module contents 25 | --------------- 26 | 27 | .. automodule:: kafka.coordinator.assignors 28 | :members: 29 | :undoc-members: 30 | :show-inheritance: 31 | -------------------------------------------------------------------------------- /docs/apidoc/kafka.coordinator.rst: -------------------------------------------------------------------------------- 1 | kafka.coordinator package 2 | ========================= 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | kafka.coordinator.assignors 10 | 11 | Submodules 12 | ---------- 13 | 14 | kafka.coordinator.base module 15 | ----------------------------- 16 | 17 | .. automodule:: kafka.coordinator.base 18 | :members: 19 | :undoc-members: 20 | :show-inheritance: 21 | 22 | kafka.coordinator.consumer module 23 | --------------------------------- 24 | 25 | .. automodule:: kafka.coordinator.consumer 26 | :members: 27 | :undoc-members: 28 | :show-inheritance: 29 | 30 | kafka.coordinator.heartbeat module 31 | ---------------------------------- 32 | 33 | .. automodule:: kafka.coordinator.heartbeat 34 | :members: 35 | :undoc-members: 36 | :show-inheritance: 37 | 38 | 39 | Module contents 40 | --------------- 41 | 42 | .. automodule:: kafka.coordinator 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | -------------------------------------------------------------------------------- /docs/apidoc/kafka.partitioner.rst: -------------------------------------------------------------------------------- 1 | kafka.partitioner package 2 | ========================= 3 | 4 | Submodules 5 | ---------- 6 | 7 | kafka.partitioner.base module 8 | ----------------------------- 9 | 10 | .. automodule:: kafka.partitioner.base 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | kafka.partitioner.hashed module 16 | ------------------------------- 17 | 18 | .. automodule:: kafka.partitioner.hashed 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | kafka.partitioner.roundrobin module 24 | ----------------------------------- 25 | 26 | .. automodule:: kafka.partitioner.roundrobin 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | 32 | Module contents 33 | --------------- 34 | 35 | .. automodule:: kafka.partitioner 36 | :members: 37 | :undoc-members: 38 | :show-inheritance: 39 | -------------------------------------------------------------------------------- /docs/apidoc/kafka.producer.rst: -------------------------------------------------------------------------------- 1 | kafka.producer package 2 | ====================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | kafka.producer.base module 8 | -------------------------- 9 | 10 | .. automodule:: kafka.producer.base 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | kafka.producer.keyed module 16 | --------------------------- 17 | 18 | .. automodule:: kafka.producer.keyed 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | kafka.producer.simple module 24 | ---------------------------- 25 | 26 | .. automodule:: kafka.producer.simple 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | 32 | Module contents 33 | --------------- 34 | 35 | .. automodule:: kafka.producer 36 | :members: 37 | :undoc-members: 38 | :show-inheritance: 39 | -------------------------------------------------------------------------------- /docs/apidoc/kafka.protocol.rst: -------------------------------------------------------------------------------- 1 | kafka.protocol package 2 | ====================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | kafka.protocol.abstract module 8 | ------------------------------ 9 | 10 | .. automodule:: kafka.protocol.abstract 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | kafka.protocol.admin module 16 | --------------------------- 17 | 18 | .. automodule:: kafka.protocol.admin 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | kafka.protocol.api module 24 | ------------------------- 25 | 26 | .. automodule:: kafka.protocol.api 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | kafka.protocol.commit module 32 | ---------------------------- 33 | 34 | .. automodule:: kafka.protocol.commit 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | kafka.protocol.fetch module 40 | --------------------------- 41 | 42 | .. automodule:: kafka.protocol.fetch 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | 47 | kafka.protocol.group module 48 | --------------------------- 49 | 50 | .. automodule:: kafka.protocol.group 51 | :members: 52 | :undoc-members: 53 | :show-inheritance: 54 | 55 | kafka.protocol.legacy module 56 | ---------------------------- 57 | 58 | .. automodule:: kafka.protocol.legacy 59 | :members: 60 | :undoc-members: 61 | :show-inheritance: 62 | 63 | kafka.protocol.message module 64 | ----------------------------- 65 | 66 | .. automodule:: kafka.protocol.message 67 | :members: 68 | :undoc-members: 69 | :show-inheritance: 70 | 71 | kafka.protocol.metadata module 72 | ------------------------------ 73 | 74 | .. automodule:: kafka.protocol.metadata 75 | :members: 76 | :undoc-members: 77 | :show-inheritance: 78 | 79 | kafka.protocol.offset module 80 | ---------------------------- 81 | 82 | .. automodule:: kafka.protocol.offset 83 | :members: 84 | :undoc-members: 85 | :show-inheritance: 86 | 87 | kafka.protocol.pickle module 88 | ---------------------------- 89 | 90 | .. automodule:: kafka.protocol.pickle 91 | :members: 92 | :undoc-members: 93 | :show-inheritance: 94 | 95 | kafka.protocol.produce module 96 | ----------------------------- 97 | 98 | .. automodule:: kafka.protocol.produce 99 | :members: 100 | :undoc-members: 101 | :show-inheritance: 102 | 103 | kafka.protocol.struct module 104 | ---------------------------- 105 | 106 | .. automodule:: kafka.protocol.struct 107 | :members: 108 | :undoc-members: 109 | :show-inheritance: 110 | 111 | kafka.protocol.types module 112 | --------------------------- 113 | 114 | .. automodule:: kafka.protocol.types 115 | :members: 116 | :undoc-members: 117 | :show-inheritance: 118 | 119 | 120 | Module contents 121 | --------------- 122 | 123 | .. automodule:: kafka.protocol 124 | :members: 125 | :undoc-members: 126 | :show-inheritance: 127 | -------------------------------------------------------------------------------- /docs/apidoc/kafka.rst: -------------------------------------------------------------------------------- 1 | kafka package 2 | ============= 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | kafka.cluster 10 | kafka.consumer 11 | kafka.partitioner 12 | kafka.producer 13 | 14 | Submodules 15 | ---------- 16 | 17 | kafka.cluster module 18 | -------------------- 19 | 20 | .. automodule:: kafka.cluster 21 | :members: 22 | :undoc-members: 23 | :show-inheritance: 24 | 25 | 26 | kafka.client module 27 | ------------------- 28 | 29 | .. automodule:: kafka.client 30 | :members: 31 | :undoc-members: 32 | :show-inheritance: 33 | 34 | kafka.codec module 35 | ------------------ 36 | 37 | .. automodule:: kafka.codec 38 | :members: 39 | :undoc-members: 40 | :show-inheritance: 41 | 42 | kafka.common module 43 | ------------------- 44 | 45 | .. automodule:: kafka.common 46 | :members: 47 | :undoc-members: 48 | :show-inheritance: 49 | 50 | kafka.conn module 51 | ----------------- 52 | 53 | .. automodule:: kafka.conn 54 | :members: 55 | :undoc-members: 56 | :show-inheritance: 57 | 58 | kafka.context module 59 | -------------------- 60 | 61 | .. automodule:: kafka.context 62 | :members: 63 | :undoc-members: 64 | :show-inheritance: 65 | 66 | kafka.protocol module 67 | --------------------- 68 | 69 | .. automodule:: kafka.protocol 70 | :members: 71 | :undoc-members: 72 | :show-inheritance: 73 | 74 | kafka.util module 75 | ----------------- 76 | 77 | .. automodule:: kafka.util 78 | :members: 79 | :undoc-members: 80 | :show-inheritance: 81 | 82 | 83 | Module contents 84 | --------------- 85 | 86 | .. automodule:: kafka 87 | :members: 88 | :undoc-members: 89 | :show-inheritance: 90 | -------------------------------------------------------------------------------- /docs/apidoc/modules.rst: -------------------------------------------------------------------------------- 1 | kafka-python API 2 | **************** 3 | 4 | .. toctree:: 5 | 6 | KafkaConsumer 7 | KafkaProducer 8 | KafkaAdmin 9 | KafkaClient 10 | BrokerConnection 11 | ClusterMetadata 12 | -------------------------------------------------------------------------------- /docs/compatibility.rst: -------------------------------------------------------------------------------- 1 | Compatibility 2 | ------------- 3 | 4 | .. image:: https://img.shields.io/badge/kafka-1.0%2C%200.11%2C%200.10%2C%200.9%2C%200.8-brightgreen.svg 5 | :target: https://kafka-python.readthedocs.io/compatibility.html 6 | .. image:: https://img.shields.io/pypi/pyversions/kafka-python.svg 7 | :target: https://pypi.python.org/pypi/kafka-python 8 | 9 | kafka-python is compatible with (and tested against) broker versions 1.0 10 | through 0.8.0 . kafka-python is not compatible with the 0.8.2-beta release. 11 | 12 | kafka-python is tested on python 2.7, 3.4, 3.5, 3.6 and pypy. 13 | 14 | Builds and tests via Travis-CI. See https://travis-ci.org/dpkp/kafka-python 15 | -------------------------------------------------------------------------------- /docs/install.rst: -------------------------------------------------------------------------------- 1 | Install 2 | ####### 3 | 4 | Install with your favorite package manager 5 | 6 | Latest Release 7 | ************** 8 | Pip: 9 | 10 | .. code:: bash 11 | 12 | pip install kafka-python 13 | 14 | Releases are also listed at https://github.com/dpkp/kafka-python/releases 15 | 16 | 17 | Bleeding-Edge 18 | ************* 19 | 20 | .. code:: bash 21 | 22 | git clone https://github.com/dpkp/kafka-python 23 | pip install ./kafka-python 24 | 25 | 26 | Optional LZ4 install 27 | ******************** 28 | 29 | To enable LZ4 compression/decompression, install python-lz4: 30 | 31 | >>> pip install lz4 32 | 33 | 34 | Optional Snappy install 35 | *********************** 36 | 37 | Install Development Libraries 38 | ============================= 39 | 40 | Download and build Snappy from https://google.github.io/snappy/ 41 | 42 | Ubuntu: 43 | 44 | .. code:: bash 45 | 46 | apt-get install libsnappy-dev 47 | 48 | OSX: 49 | 50 | .. code:: bash 51 | 52 | brew install snappy 53 | 54 | From Source: 55 | 56 | .. code:: bash 57 | 58 | wget https://github.com/google/snappy/releases/download/1.1.3/snappy-1.1.3.tar.gz 59 | tar xzvf snappy-1.1.3.tar.gz 60 | cd snappy-1.1.3 61 | ./configure 62 | make 63 | sudo make install 64 | 65 | Install Python Module 66 | ===================== 67 | 68 | Install the `python-snappy` module 69 | 70 | .. code:: bash 71 | 72 | pip install python-snappy 73 | 74 | 75 | Optional crc32c install 76 | *********************** 77 | Highly recommended if you are using Kafka 11+ brokers. For those `kafka-python` 78 | uses a new message protocol version, that requires calculation of `crc32c`, 79 | which differs from `zlib.crc32` hash implementation. By default `kafka-python` 80 | calculates it in pure python, which is quite slow. To speed it up we optionally 81 | support https://pypi.python.org/pypi/crc32c package if it's installed. 82 | 83 | .. code:: bash 84 | 85 | pip install crc32c 86 | -------------------------------------------------------------------------------- /docs/license.rst: -------------------------------------------------------------------------------- 1 | License 2 | ------- 3 | 4 | .. image:: https://img.shields.io/badge/license-Apache%202-blue.svg 5 | :target: https://github.com/dpkp/kafka-python/blob/master/LICENSE 6 | 7 | Apache License, v2.0. See `LICENSE `_. 8 | 9 | Copyright 2016, Dana Powers, David Arthur, and Contributors 10 | (See `AUTHORS `_). 11 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx 2 | sphinxcontrib-napoleon 3 | sphinx_rtd_theme 4 | 5 | # Install kafka-python in editable mode 6 | # This allows the sphinx autodoc module 7 | # to load the Python modules and extract docstrings. 8 | # -e .. 9 | -------------------------------------------------------------------------------- /docs/support.rst: -------------------------------------------------------------------------------- 1 | Support 2 | ------- 3 | 4 | For support, see github issues at https://github.com/dpkp/kafka-python 5 | 6 | Limited IRC chat at #kafka-python on freenode (general chat is #apache-kafka). 7 | 8 | For information about Apache Kafka generally, see https://kafka.apache.org/ 9 | 10 | For general discussion of kafka-client design and implementation (not python 11 | specific), see https://groups.google.com/forum/m/#!forum/kafka-clients 12 | -------------------------------------------------------------------------------- /docs/tests.rst: -------------------------------------------------------------------------------- 1 | Tests 2 | ===== 3 | 4 | .. image:: https://coveralls.io/repos/dpkp/kafka-python/badge.svg?branch=master&service=github 5 | :target: https://coveralls.io/github/dpkp/kafka-python?branch=master 6 | .. image:: https://travis-ci.org/dpkp/kafka-python.svg?branch=master 7 | :target: https://travis-ci.org/dpkp/kafka-python 8 | 9 | Test environments are managed via tox. The test suite is run via pytest. 10 | Individual tests are written using unittest, pytest, and in some cases, 11 | doctest. 12 | 13 | Linting is run via pylint, but is generally skipped on pypy due to pylint 14 | compatibility / performance issues. 15 | 16 | For test coverage details, see https://coveralls.io/github/dpkp/kafka-python 17 | 18 | The test suite includes unit tests that mock network interfaces, as well as 19 | integration tests that setup and teardown kafka broker (and zookeeper) 20 | fixtures for client / consumer / producer testing. 21 | 22 | 23 | Unit tests 24 | ------------------ 25 | 26 | To run the tests locally, install tox: 27 | 28 | .. code:: bash 29 | 30 | pip install tox 31 | 32 | For more details, see https://tox.readthedocs.io/en/latest/install.html 33 | 34 | Then simply run tox, optionally setting the python environment. 35 | If unset, tox will loop through all environments. 36 | 37 | .. code:: bash 38 | 39 | tox -e py27 40 | tox -e py35 41 | 42 | # run protocol tests only 43 | tox -- -v test.test_protocol 44 | 45 | # re-run the last failing test, dropping into pdb 46 | tox -e py27 -- --lf --pdb 47 | 48 | # see available (pytest) options 49 | tox -e py27 -- --help 50 | 51 | 52 | Integration tests 53 | ----------------- 54 | 55 | .. code:: bash 56 | 57 | KAFKA_VERSION=0.8.2.2 tox -e py27 58 | KAFKA_VERSION=1.0.1 tox -e py36 59 | 60 | 61 | Integration tests start Kafka and Zookeeper fixtures. This requires downloading 62 | kafka server binaries: 63 | 64 | .. code:: bash 65 | 66 | ./build_integration.sh 67 | 68 | By default, this will install the broker versions listed in build_integration.sh's `ALL_RELEASES` 69 | into the servers/ directory. To install a specific version, set the `KAFKA_VERSION` variable: 70 | 71 | .. code:: bash 72 | 73 | KAFKA_VERSION=1.0.1 ./build_integration.sh 74 | 75 | Then to run the tests against a specific Kafka version, simply set the `KAFKA_VERSION` 76 | env variable to the server build you want to use for testing: 77 | 78 | .. code:: bash 79 | 80 | KAFKA_VERSION=1.0.1 tox -e py36 81 | 82 | To test against the kafka source tree, set KAFKA_VERSION=trunk 83 | [optionally set SCALA_VERSION (defaults to the value set in `build_integration.sh`)] 84 | 85 | .. code:: bash 86 | 87 | SCALA_VERSION=2.12 KAFKA_VERSION=trunk ./build_integration.sh 88 | KAFKA_VERSION=trunk tox -e py36 89 | -------------------------------------------------------------------------------- /docs/usage.rst: -------------------------------------------------------------------------------- 1 | Usage 2 | ***** 3 | 4 | 5 | KafkaConsumer 6 | ============= 7 | 8 | .. code:: python 9 | 10 | from kafka import KafkaConsumer 11 | 12 | # To consume latest messages and auto-commit offsets 13 | consumer = KafkaConsumer('my-topic', 14 | group_id='my-group', 15 | bootstrap_servers=['localhost:9092']) 16 | for message in consumer: 17 | # message value and key are raw bytes -- decode if necessary! 18 | # e.g., for unicode: `message.value.decode('utf-8')` 19 | print ("%s:%d:%d: key=%s value=%s" % (message.topic, message.partition, 20 | message.offset, message.key, 21 | message.value)) 22 | 23 | # consume earliest available messages, don't commit offsets 24 | KafkaConsumer(auto_offset_reset='earliest', enable_auto_commit=False) 25 | 26 | # consume json messages 27 | KafkaConsumer(value_deserializer=lambda m: json.loads(m.decode('ascii'))) 28 | 29 | # consume msgpack 30 | KafkaConsumer(value_deserializer=msgpack.unpackb) 31 | 32 | # StopIteration if no message after 1sec 33 | KafkaConsumer(consumer_timeout_ms=1000) 34 | 35 | # Subscribe to a regex topic pattern 36 | consumer = KafkaConsumer() 37 | consumer.subscribe(pattern='^awesome.*') 38 | 39 | # Use multiple consumers in parallel w/ 0.9 kafka brokers 40 | # typically you would run each on a different server / process / CPU 41 | consumer1 = KafkaConsumer('my-topic', 42 | group_id='my-group', 43 | bootstrap_servers='my.server.com') 44 | consumer2 = KafkaConsumer('my-topic', 45 | group_id='my-group', 46 | bootstrap_servers='my.server.com') 47 | 48 | 49 | There are many configuration options for the consumer class. See 50 | :class:`~kafka.KafkaConsumer` API documentation for more details. 51 | 52 | 53 | KafkaProducer 54 | ============== 55 | 56 | .. code:: python 57 | 58 | from kafka import KafkaProducer 59 | from kafka.errors import KafkaError 60 | 61 | producer = KafkaProducer(bootstrap_servers=['broker1:1234']) 62 | 63 | # Asynchronous by default 64 | future = producer.send('my-topic', b'raw_bytes') 65 | 66 | # Block for 'synchronous' sends 67 | try: 68 | record_metadata = future.get(timeout=10) 69 | except KafkaError: 70 | # Decide what to do if produce request failed... 71 | log.exception() 72 | pass 73 | 74 | # Successful result returns assigned partition and offset 75 | print (record_metadata.topic) 76 | print (record_metadata.partition) 77 | print (record_metadata.offset) 78 | 79 | # produce keyed messages to enable hashed partitioning 80 | producer.send('my-topic', key=b'foo', value=b'bar') 81 | 82 | # encode objects via msgpack 83 | producer = KafkaProducer(value_serializer=msgpack.dumps) 84 | producer.send('msgpack-topic', {'key': 'value'}) 85 | 86 | # produce json messages 87 | producer = KafkaProducer(value_serializer=lambda m: json.dumps(m).encode('ascii')) 88 | producer.send('json-topic', {'key': 'value'}) 89 | 90 | # produce asynchronously 91 | for _ in range(100): 92 | producer.send('my-topic', b'msg') 93 | 94 | def on_send_success(record_metadata): 95 | print(record_metadata.topic) 96 | print(record_metadata.partition) 97 | print(record_metadata.offset) 98 | 99 | def on_send_error(excp): 100 | log.error('I am an errback', exc_info=excp) 101 | # handle exception 102 | 103 | # produce asynchronously with callbacks 104 | producer.send('my-topic', b'raw_bytes').add_callback(on_send_success).add_errback(on_send_error) 105 | 106 | # block until all async messages are sent 107 | producer.flush() 108 | 109 | # configure multiple retries 110 | producer = KafkaProducer(retries=5) 111 | -------------------------------------------------------------------------------- /example.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import threading, logging, time 3 | import multiprocessing 4 | 5 | from kafka import KafkaConsumer, KafkaProducer 6 | 7 | 8 | class Producer(threading.Thread): 9 | def __init__(self): 10 | threading.Thread.__init__(self) 11 | self.stop_event = threading.Event() 12 | 13 | def stop(self): 14 | self.stop_event.set() 15 | 16 | def run(self): 17 | producer = KafkaProducer(bootstrap_servers='localhost:9092') 18 | 19 | while not self.stop_event.is_set(): 20 | producer.send('my-topic', b"test") 21 | producer.send('my-topic', b"\xc2Hola, mundo!") 22 | time.sleep(1) 23 | 24 | producer.close() 25 | 26 | class Consumer(multiprocessing.Process): 27 | def __init__(self): 28 | multiprocessing.Process.__init__(self) 29 | self.stop_event = multiprocessing.Event() 30 | 31 | def stop(self): 32 | self.stop_event.set() 33 | 34 | def run(self): 35 | consumer = KafkaConsumer(bootstrap_servers='localhost:9092', 36 | auto_offset_reset='earliest', 37 | consumer_timeout_ms=1000) 38 | consumer.subscribe(['my-topic']) 39 | 40 | while not self.stop_event.is_set(): 41 | for message in consumer: 42 | print(message) 43 | if self.stop_event.is_set(): 44 | break 45 | 46 | consumer.close() 47 | 48 | 49 | def main(): 50 | tasks = [ 51 | Producer(), 52 | Consumer() 53 | ] 54 | 55 | for t in tasks: 56 | t.start() 57 | 58 | time.sleep(10) 59 | 60 | for task in tasks: 61 | task.stop() 62 | 63 | for task in tasks: 64 | task.join() 65 | 66 | 67 | if __name__ == "__main__": 68 | logging.basicConfig( 69 | format='%(asctime)s.%(msecs)s:%(name)s:%(thread)d:%(levelname)s:%(process)d:%(message)s', 70 | level=logging.INFO 71 | ) 72 | main() 73 | -------------------------------------------------------------------------------- /kafka/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | __title__ = 'kafka' 4 | from kafka.version import __version__ 5 | __author__ = 'Dana Powers' 6 | __license__ = 'Apache License 2.0' 7 | __copyright__ = 'Copyright 2016 Dana Powers, David Arthur, and Contributors' 8 | 9 | # Set default logging handler to avoid "No handler found" warnings. 10 | import logging 11 | try: # Python 2.7+ 12 | from logging import NullHandler 13 | except ImportError: 14 | class NullHandler(logging.Handler): 15 | def emit(self, record): 16 | pass 17 | 18 | logging.getLogger(__name__).addHandler(NullHandler()) 19 | 20 | 21 | from kafka.admin import KafkaAdmin 22 | from kafka.consumer import KafkaConsumer 23 | from kafka.consumer.subscription_state import ConsumerRebalanceListener 24 | from kafka.producer import KafkaProducer 25 | from kafka.conn import BrokerConnection 26 | from kafka.protocol import ( 27 | create_message, create_gzip_message, create_snappy_message) 28 | from kafka.partitioner import RoundRobinPartitioner, HashedPartitioner, Murmur2Partitioner 29 | from kafka.serializer import Serializer, Deserializer 30 | from kafka.structs import TopicPartition, OffsetAndMetadata 31 | 32 | # To be deprecated when KafkaProducer interface is released 33 | from kafka.client import SimpleClient 34 | from kafka.producer import SimpleProducer, KeyedProducer 35 | 36 | # deprecated in favor of KafkaConsumer 37 | from kafka.consumer import SimpleConsumer, MultiProcessConsumer 38 | 39 | 40 | import warnings 41 | class KafkaClient(SimpleClient): 42 | def __init__(self, *args, **kwargs): 43 | warnings.warn('The legacy KafkaClient interface has been moved to' 44 | ' kafka.SimpleClient - this import will break in a' 45 | ' future release', DeprecationWarning) 46 | super(KafkaClient, self).__init__(*args, **kwargs) 47 | 48 | 49 | __all__ = [ 50 | 'KafkaAdmin', 51 | 'KafkaConsumer', 'KafkaProducer', 'KafkaClient', 'BrokerConnection', 52 | 'SimpleClient', 'SimpleProducer', 'KeyedProducer', 53 | 'RoundRobinPartitioner', 'HashedPartitioner', 54 | 'create_message', 'create_gzip_message', 'create_snappy_message', 55 | 'SimpleConsumer', 'MultiProcessConsumer', 'ConsumerRebalanceListener', 56 | ] 57 | -------------------------------------------------------------------------------- /kafka/admin/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from kafka.admin.config_resource import ConfigResource, ConfigResourceType 4 | from kafka.admin.kafka import KafkaAdmin 5 | from kafka.admin.new_topic import NewTopic 6 | from kafka.admin.new_partitions import NewPartitions 7 | 8 | __all__ = [ 9 | 'ConfigResource', 'ConfigResourceType', 'KafkaAdmin', 'NewTopic', 'NewPartitions' 10 | ] 11 | -------------------------------------------------------------------------------- /kafka/admin/config_resource.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | # enum in stdlib as of py3.4 4 | try: 5 | from enum import IntEnum # pylint: disable=import-error 6 | except ImportError: 7 | # vendored backport module 8 | from kafka.vendor.enum34 import IntEnum 9 | 10 | 11 | class ConfigResourceType(IntEnum): 12 | """An enumerated type of config resources""" 13 | 14 | BROKER = 4, 15 | TOPIC = 2 16 | 17 | 18 | class ConfigResource(object): 19 | """A class for specifying config resources. 20 | Arguments: 21 | resource_type (ConfigResourceType): the type of kafka resource 22 | name (string): The name of the kafka resource 23 | configs ({key : value}): A maps of config keys to values. 24 | """ 25 | 26 | def __init__( 27 | self, 28 | resource_type, 29 | name, 30 | configs=None 31 | ): 32 | if not isinstance(resource_type, (ConfigResourceType)): 33 | resource_type = ConfigResourceType[str(resource_type).upper()] # pylint: disable-msg=unsubscriptable-object 34 | self.resource_type = resource_type 35 | self.name = name 36 | self.configs = configs 37 | -------------------------------------------------------------------------------- /kafka/admin/new_partitions.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | 4 | class NewPartitions(object): 5 | """A class for new partition creation on existing topics. Note that the length of new_assignments, if specified, 6 | must be the difference between the new total number of partitions and the existing number of partitions. 7 | Arguments: 8 | total_count (int): the total number of partitions that should exist on the topic 9 | new_assignments ([[int]]): an array of arrays of replica assignments for new partitions. 10 | If not set, broker assigns replicas per an internal algorithm. 11 | """ 12 | 13 | def __init__( 14 | self, 15 | total_count, 16 | new_assignments=None 17 | ): 18 | self.total_count = total_count 19 | self.new_assignments = new_assignments 20 | -------------------------------------------------------------------------------- /kafka/admin/new_topic.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from kafka.errors import IllegalArgumentError 4 | 5 | 6 | class NewTopic(object): 7 | """ A class for new topic creation 8 | Arguments: 9 | name (string): name of the topic 10 | num_partitions (int): number of partitions 11 | or -1 if replica_assignment has been specified 12 | replication_factor (int): replication factor or -1 if 13 | replica assignment is specified 14 | replica_assignment (dict of int: [int]): A mapping containing 15 | partition id and replicas to assign to it. 16 | topic_configs (dict of str: str): A mapping of config key 17 | and value for the topic. 18 | """ 19 | 20 | def __init__( 21 | self, 22 | name, 23 | num_partitions, 24 | replication_factor, 25 | replica_assignments=None, 26 | topic_configs=None, 27 | ): 28 | if not (num_partitions == -1 or replication_factor == -1) ^ (replica_assignments is None): 29 | raise IllegalArgumentError('either num_partitions/replication_factor or replica_assignment must be specified') 30 | self.name = name 31 | self.num_partitions = num_partitions 32 | self.replication_factor = replication_factor 33 | self.replica_assignments = replica_assignments or {} 34 | self.topic_configs = topic_configs or {} 35 | -------------------------------------------------------------------------------- /kafka/common.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from kafka.structs import * 4 | from kafka.errors import * 5 | -------------------------------------------------------------------------------- /kafka/consumer/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from kafka.consumer.simple import SimpleConsumer 4 | from kafka.consumer.multiprocess import MultiProcessConsumer 5 | from kafka.consumer.group import KafkaConsumer 6 | 7 | __all__ = [ 8 | 'SimpleConsumer', 'MultiProcessConsumer', 'KafkaConsumer' 9 | ] 10 | -------------------------------------------------------------------------------- /kafka/coordinator/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robinhood/kafka-python/3689da3d5c02e362d872cf1fb2d65201419c4b93/kafka/coordinator/__init__.py -------------------------------------------------------------------------------- /kafka/coordinator/assignors/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robinhood/kafka-python/3689da3d5c02e362d872cf1fb2d65201419c4b93/kafka/coordinator/assignors/__init__.py -------------------------------------------------------------------------------- /kafka/coordinator/assignors/abstract.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import abc 4 | import logging 5 | 6 | log = logging.getLogger(__name__) 7 | 8 | 9 | class AbstractPartitionAssignor(object): 10 | """ 11 | Abstract assignor implementation which does some common grunt work (in particular collecting 12 | partition counts which are always needed in assignors). 13 | """ 14 | 15 | @abc.abstractproperty 16 | def name(self): 17 | """.name should be a string identifying the assignor""" 18 | pass 19 | 20 | @abc.abstractmethod 21 | def assign(self, cluster, members): 22 | """Perform group assignment given cluster metadata and member subscriptions 23 | 24 | Arguments: 25 | cluster (ClusterMetadata): metadata for use in assignment 26 | members (dict of {member_id: MemberMetadata}): decoded metadata for 27 | each member in the group. 28 | 29 | Returns: 30 | dict: {member_id: MemberAssignment} 31 | """ 32 | pass 33 | 34 | @abc.abstractmethod 35 | def metadata(self, topics): 36 | """Generate ProtocolMetadata to be submitted via JoinGroupRequest. 37 | 38 | Arguments: 39 | topics (set): a member's subscribed topics 40 | 41 | Returns: 42 | MemberMetadata struct 43 | """ 44 | pass 45 | 46 | @abc.abstractmethod 47 | def on_assignment(self, assignment): 48 | """Callback that runs on each assignment. 49 | 50 | This method can be used to update internal state, if any, of the 51 | partition assignor. 52 | 53 | Arguments: 54 | assignment (MemberAssignment): the member's assignment 55 | """ 56 | pass 57 | -------------------------------------------------------------------------------- /kafka/coordinator/assignors/range.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import collections 4 | import logging 5 | 6 | from kafka.vendor import six 7 | 8 | from kafka.coordinator.assignors.abstract import AbstractPartitionAssignor 9 | from kafka.coordinator.protocol import ConsumerProtocolMemberMetadata, ConsumerProtocolMemberAssignment 10 | 11 | log = logging.getLogger(__name__) 12 | 13 | 14 | class RangePartitionAssignor(AbstractPartitionAssignor): 15 | """ 16 | The range assignor works on a per-topic basis. For each topic, we lay out 17 | the available partitions in numeric order and the consumers in 18 | lexicographic order. We then divide the number of partitions by the total 19 | number of consumers to determine the number of partitions to assign to each 20 | consumer. If it does not evenly divide, then the first few consumers will 21 | have one extra partition. 22 | 23 | For example, suppose there are two consumers C0 and C1, two topics t0 and 24 | t1, and each topic has 3 partitions, resulting in partitions t0p0, t0p1, 25 | t0p2, t1p0, t1p1, and t1p2. 26 | 27 | The assignment will be: 28 | C0: [t0p0, t0p1, t1p0, t1p1] 29 | C1: [t0p2, t1p2] 30 | """ 31 | name = 'range' 32 | version = 0 33 | 34 | @classmethod 35 | def assign(cls, cluster, member_metadata): 36 | consumers_per_topic = collections.defaultdict(list) 37 | for member, metadata in six.iteritems(member_metadata): 38 | for topic in metadata.subscription: 39 | consumers_per_topic[topic].append(member) 40 | 41 | # construct {member_id: {topic: [partition, ...]}} 42 | assignment = collections.defaultdict(dict) 43 | 44 | for topic, consumers_for_topic in six.iteritems(consumers_per_topic): 45 | partitions = cluster.partitions_for_topic(topic) 46 | if partitions is None: 47 | log.warning('No partition metadata for topic %s', topic) 48 | continue 49 | partitions = sorted(list(partitions)) 50 | partitions_for_topic = len(partitions) 51 | consumers_for_topic.sort() 52 | 53 | partitions_per_consumer = len(partitions) // len(consumers_for_topic) 54 | consumers_with_extra = len(partitions) % len(consumers_for_topic) 55 | 56 | for i in range(len(consumers_for_topic)): 57 | start = partitions_per_consumer * i 58 | start += min(i, consumers_with_extra) 59 | length = partitions_per_consumer 60 | if not i + 1 > consumers_with_extra: 61 | length += 1 62 | member = consumers_for_topic[i] 63 | assignment[member][topic] = partitions[start:start+length] 64 | 65 | protocol_assignment = {} 66 | for member_id in member_metadata: 67 | protocol_assignment[member_id] = ConsumerProtocolMemberAssignment( 68 | cls.version, 69 | sorted(assignment[member_id].items()), 70 | b'') 71 | return protocol_assignment 72 | 73 | @classmethod 74 | def metadata(cls, topics): 75 | return ConsumerProtocolMemberMetadata(cls.version, list(topics), b'') 76 | 77 | @classmethod 78 | def on_assignment(cls, assignment): 79 | pass 80 | -------------------------------------------------------------------------------- /kafka/coordinator/assignors/roundrobin.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import collections 4 | import itertools 5 | import logging 6 | 7 | from kafka.vendor import six 8 | 9 | from kafka.coordinator.assignors.abstract import AbstractPartitionAssignor 10 | from kafka.coordinator.protocol import ConsumerProtocolMemberMetadata, ConsumerProtocolMemberAssignment 11 | from kafka.structs import TopicPartition 12 | 13 | log = logging.getLogger(__name__) 14 | 15 | 16 | class RoundRobinPartitionAssignor(AbstractPartitionAssignor): 17 | """ 18 | The roundrobin assignor lays out all the available partitions and all the 19 | available consumers. It then proceeds to do a roundrobin assignment from 20 | partition to consumer. If the subscriptions of all consumer instances are 21 | identical, then the partitions will be uniformly distributed. (i.e., the 22 | partition ownership counts will be within a delta of exactly one across all 23 | consumers.) 24 | 25 | For example, suppose there are two consumers C0 and C1, two topics t0 and 26 | t1, and each topic has 3 partitions, resulting in partitions t0p0, t0p1, 27 | t0p2, t1p0, t1p1, and t1p2. 28 | 29 | The assignment will be: 30 | C0: [t0p0, t0p2, t1p1] 31 | C1: [t0p1, t1p0, t1p2] 32 | 33 | When subscriptions differ across consumer instances, the assignment process 34 | still considers each consumer instance in round robin fashion but skips 35 | over an instance if it is not subscribed to the topic. Unlike the case when 36 | subscriptions are identical, this can result in imbalanced assignments. 37 | 38 | For example, suppose we have three consumers C0, C1, C2, and three topics 39 | t0, t1, t2, with unbalanced partitions t0p0, t1p0, t1p1, t2p0, t2p1, t2p2, 40 | where C0 is subscribed to t0; C1 is subscribed to t0, t1; and C2 is 41 | subscribed to t0, t1, t2. 42 | 43 | The assignment will be: 44 | C0: [t0p0] 45 | C1: [t1p0] 46 | C2: [t1p1, t2p0, t2p1, t2p2] 47 | """ 48 | name = 'roundrobin' 49 | version = 0 50 | 51 | @classmethod 52 | def assign(cls, cluster, member_metadata): 53 | all_topics = set() 54 | for metadata in six.itervalues(member_metadata): 55 | all_topics.update(metadata.subscription) 56 | 57 | all_topic_partitions = [] 58 | for topic in all_topics: 59 | partitions = cluster.partitions_for_topic(topic) 60 | if partitions is None: 61 | log.warning('No partition metadata for topic %s', topic) 62 | continue 63 | for partition in partitions: 64 | all_topic_partitions.append(TopicPartition(topic, partition)) 65 | all_topic_partitions.sort() 66 | 67 | # construct {member_id: {topic: [partition, ...]}} 68 | assignment = collections.defaultdict(lambda: collections.defaultdict(list)) 69 | 70 | member_iter = itertools.cycle(sorted(member_metadata.keys())) 71 | for partition in all_topic_partitions: 72 | member_id = next(member_iter) 73 | 74 | # Because we constructed all_topic_partitions from the set of 75 | # member subscribed topics, we should be safe assuming that 76 | # each topic in all_topic_partitions is in at least one member 77 | # subscription; otherwise this could yield an infinite loop 78 | while partition.topic not in member_metadata[member_id].subscription: 79 | member_id = next(member_iter) 80 | assignment[member_id][partition.topic].append(partition.partition) 81 | 82 | protocol_assignment = {} 83 | for member_id in member_metadata: 84 | protocol_assignment[member_id] = ConsumerProtocolMemberAssignment( 85 | cls.version, 86 | sorted(assignment[member_id].items()), 87 | b'') 88 | return protocol_assignment 89 | 90 | @classmethod 91 | def metadata(cls, topics): 92 | return ConsumerProtocolMemberMetadata(cls.version, list(topics), b'') 93 | 94 | @classmethod 95 | def on_assignment(cls, assignment): 96 | pass 97 | -------------------------------------------------------------------------------- /kafka/coordinator/heartbeat.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division 2 | 3 | import copy 4 | import time 5 | 6 | 7 | class Heartbeat(object): 8 | DEFAULT_CONFIG = { 9 | 'group_id': None, 10 | 'heartbeat_interval_ms': 3000, 11 | 'session_timeout_ms': 10000, 12 | 'max_poll_interval_ms': 300000, 13 | 'retry_backoff_ms': 100, 14 | } 15 | 16 | def __init__(self, **configs): 17 | self.config = copy.copy(self.DEFAULT_CONFIG) 18 | for key in self.config: 19 | if key in configs: 20 | self.config[key] = configs[key] 21 | 22 | if self.config['group_id'] is not None: 23 | assert (self.config['heartbeat_interval_ms'] 24 | <= self.config['session_timeout_ms']), ( 25 | 'Heartbeat interval must be lower than the session timeout') 26 | 27 | self.last_send = -1 * float('inf') 28 | self.last_receive = -1 * float('inf') 29 | self.last_poll = -1 * float('inf') 30 | self.last_reset = time.time() 31 | self.heartbeat_failed = None 32 | 33 | def poll(self): 34 | self.last_poll = time.time() 35 | 36 | def sent_heartbeat(self): 37 | self.last_send = time.time() 38 | self.heartbeat_failed = False 39 | 40 | def fail_heartbeat(self): 41 | self.heartbeat_failed = True 42 | 43 | def received_heartbeat(self): 44 | self.last_receive = time.time() 45 | 46 | def time_to_next_heartbeat(self): 47 | """Returns seconds (float) remaining before next heartbeat should be sent""" 48 | time_since_last_heartbeat = time.time() - max(self.last_send, self.last_reset) 49 | if self.heartbeat_failed: 50 | delay_to_next_heartbeat = self.config['retry_backoff_ms'] / 1000 51 | else: 52 | delay_to_next_heartbeat = self.config['heartbeat_interval_ms'] / 1000 53 | return max(0, delay_to_next_heartbeat - time_since_last_heartbeat) 54 | 55 | def should_heartbeat(self): 56 | return self.time_to_next_heartbeat() == 0 57 | 58 | def session_timeout_expired(self): 59 | last_recv = max(self.last_receive, self.last_reset) 60 | return (time.time() - last_recv) > (self.config['session_timeout_ms'] / 1000) 61 | 62 | def reset_timeouts(self): 63 | self.last_reset = time.time() 64 | self.last_poll = time.time() 65 | self.heartbeat_failed = False 66 | 67 | def poll_timeout_expired(self): 68 | return (time.time() - self.last_poll) > (self.config['max_poll_interval_ms'] / 1000) 69 | -------------------------------------------------------------------------------- /kafka/coordinator/protocol.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from kafka.protocol.struct import Struct 4 | from kafka.protocol.types import Array, Bytes, Int16, Int32, Schema, String 5 | from kafka.structs import TopicPartition 6 | 7 | 8 | class ConsumerProtocolMemberMetadata(Struct): 9 | SCHEMA = Schema( 10 | ('version', Int16), 11 | ('subscription', Array(String('utf-8'))), 12 | ('user_data', Bytes)) 13 | 14 | 15 | class ConsumerProtocolMemberAssignment(Struct): 16 | SCHEMA = Schema( 17 | ('version', Int16), 18 | ('assignment', Array( 19 | ('topic', String('utf-8')), 20 | ('partitions', Array(Int32)))), 21 | ('user_data', Bytes)) 22 | 23 | def partitions(self): 24 | return [TopicPartition(topic, partition) 25 | for topic, partitions in self.assignment # pylint: disable-msg=no-member 26 | for partition in partitions] 27 | 28 | 29 | class ConsumerProtocol(object): 30 | PROTOCOL_TYPE = 'consumer' 31 | ASSIGNMENT_STRATEGIES = ('range', 'roundrobin') 32 | METADATA = ConsumerProtocolMemberMetadata 33 | ASSIGNMENT = ConsumerProtocolMemberAssignment 34 | -------------------------------------------------------------------------------- /kafka/future.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import functools 4 | import logging 5 | 6 | log = logging.getLogger(__name__) 7 | 8 | 9 | class Future(object): 10 | error_on_callbacks = False # and errbacks 11 | 12 | def __init__(self): 13 | self.is_done = False 14 | self.value = None 15 | self.exception = None 16 | self._callbacks = [] 17 | self._errbacks = [] 18 | 19 | def succeeded(self): 20 | return self.is_done and not bool(self.exception) 21 | 22 | def failed(self): 23 | return self.is_done and bool(self.exception) 24 | 25 | def retriable(self): 26 | try: 27 | return self.exception.retriable 28 | except AttributeError: 29 | return False 30 | 31 | def success(self, value): 32 | assert not self.is_done, 'Future is already complete' 33 | self.value = value 34 | self.is_done = True 35 | if self._callbacks: 36 | self._call_backs('callback', self._callbacks, self.value) 37 | return self 38 | 39 | def failure(self, e): 40 | assert not self.is_done, 'Future is already complete' 41 | self.exception = e if type(e) is not type else e() 42 | assert isinstance(self.exception, BaseException), ( 43 | 'future failed without an exception') 44 | self.is_done = True 45 | self._call_backs('errback', self._errbacks, self.exception) 46 | return self 47 | 48 | def add_callback(self, f, *args, **kwargs): 49 | if args or kwargs: 50 | f = functools.partial(f, *args, **kwargs) 51 | if self.is_done and not self.exception: 52 | self._call_backs('callback', [f], self.value) 53 | else: 54 | self._callbacks.append(f) 55 | return self 56 | 57 | def add_errback(self, f, *args, **kwargs): 58 | if args or kwargs: 59 | f = functools.partial(f, *args, **kwargs) 60 | if self.is_done and self.exception: 61 | self._call_backs('errback', [f], self.exception) 62 | else: 63 | self._errbacks.append(f) 64 | return self 65 | 66 | def add_both(self, f, *args, **kwargs): 67 | self.add_callback(f, *args, **kwargs) 68 | self.add_errback(f, *args, **kwargs) 69 | return self 70 | 71 | def chain(self, future): 72 | self.add_callback(future.success) 73 | self.add_errback(future.failure) 74 | return self 75 | 76 | def _call_backs(self, back_type, backs, value): 77 | for f in backs: 78 | try: 79 | f(value) 80 | except Exception as e: 81 | log.exception('Error processing %s', back_type) 82 | if self.error_on_callbacks: 83 | raise e 84 | -------------------------------------------------------------------------------- /kafka/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from kafka.metrics.compound_stat import NamedMeasurable 4 | from kafka.metrics.dict_reporter import DictReporter 5 | from kafka.metrics.kafka_metric import KafkaMetric 6 | from kafka.metrics.measurable import AnonMeasurable 7 | from kafka.metrics.metric_config import MetricConfig 8 | from kafka.metrics.metric_name import MetricName 9 | from kafka.metrics.metrics import Metrics 10 | from kafka.metrics.quota import Quota 11 | 12 | __all__ = [ 13 | 'AnonMeasurable', 'DictReporter', 'KafkaMetric', 'MetricConfig', 14 | 'MetricName', 'Metrics', 'NamedMeasurable', 'Quota' 15 | ] 16 | -------------------------------------------------------------------------------- /kafka/metrics/compound_stat.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import abc 4 | 5 | from kafka.metrics.stat import AbstractStat 6 | 7 | 8 | class AbstractCompoundStat(AbstractStat): 9 | """ 10 | A compound stat is a stat where a single measurement and associated 11 | data structure feeds many metrics. This is the example for a 12 | histogram which has many associated percentiles. 13 | """ 14 | __metaclass__ = abc.ABCMeta 15 | 16 | def stats(self): 17 | """ 18 | Return list of NamedMeasurable 19 | """ 20 | raise NotImplementedError 21 | 22 | 23 | class NamedMeasurable(object): 24 | def __init__(self, metric_name, measurable_stat): 25 | self._name = metric_name 26 | self._stat = measurable_stat 27 | 28 | @property 29 | def name(self): 30 | return self._name 31 | 32 | @property 33 | def stat(self): 34 | return self._stat 35 | -------------------------------------------------------------------------------- /kafka/metrics/dict_reporter.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import logging 4 | import threading 5 | 6 | from kafka.metrics.metrics_reporter import AbstractMetricsReporter 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | class DictReporter(AbstractMetricsReporter): 12 | """A basic dictionary based metrics reporter. 13 | 14 | Store all metrics in a two level dictionary of category > name > metric. 15 | """ 16 | def __init__(self, prefix=''): 17 | self._lock = threading.Lock() 18 | self._prefix = prefix if prefix else '' # never allow None 19 | self._store = {} 20 | 21 | def snapshot(self): 22 | """ 23 | Return a nested dictionary snapshot of all metrics and their 24 | values at this time. Example: 25 | { 26 | 'category': { 27 | 'metric1_name': 42.0, 28 | 'metric2_name': 'foo' 29 | } 30 | } 31 | """ 32 | return dict((category, dict((name, metric.value()) 33 | for name, metric in list(metrics.items()))) 34 | for category, metrics in 35 | list(self._store.items())) 36 | 37 | def init(self, metrics): 38 | for metric in metrics: 39 | self.metric_change(metric) 40 | 41 | def metric_change(self, metric): 42 | with self._lock: 43 | category = self.get_category(metric) 44 | if category not in self._store: 45 | self._store[category] = {} 46 | self._store[category][metric.metric_name.name] = metric 47 | 48 | def metric_removal(self, metric): 49 | with self._lock: 50 | category = self.get_category(metric) 51 | metrics = self._store.get(category, {}) 52 | removed = metrics.pop(metric.metric_name.name, None) 53 | if not metrics: 54 | self._store.pop(category, None) 55 | return removed 56 | 57 | def get_category(self, metric): 58 | """ 59 | Return a string category for the metric. 60 | 61 | The category is made up of this reporter's prefix and the 62 | metric's group and tags. 63 | 64 | Examples: 65 | prefix = 'foo', group = 'bar', tags = {'a': 1, 'b': 2} 66 | returns: 'foo.bar.a=1,b=2' 67 | 68 | prefix = 'foo', group = 'bar', tags = None 69 | returns: 'foo.bar' 70 | 71 | prefix = None, group = 'bar', tags = None 72 | returns: 'bar' 73 | """ 74 | tags = ','.join('%s=%s' % (k, v) for k, v in 75 | sorted(metric.metric_name.tags.items())) 76 | return '.'.join(x for x in 77 | [self._prefix, metric.metric_name.group, tags] if x) 78 | 79 | def configure(self, configs): 80 | pass 81 | 82 | def close(self): 83 | pass 84 | -------------------------------------------------------------------------------- /kafka/metrics/kafka_metric.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import time 4 | 5 | 6 | class KafkaMetric(object): 7 | # NOTE java constructor takes a lock instance 8 | def __init__(self, metric_name, measurable, config): 9 | if not metric_name: 10 | raise ValueError('metric_name must be non-empty') 11 | if not measurable: 12 | raise ValueError('measurable must be non-empty') 13 | self._metric_name = metric_name 14 | self._measurable = measurable 15 | self._config = config 16 | 17 | @property 18 | def metric_name(self): 19 | return self._metric_name 20 | 21 | @property 22 | def measurable(self): 23 | return self._measurable 24 | 25 | @property 26 | def config(self): 27 | return self._config 28 | 29 | @config.setter 30 | def config(self, config): 31 | self._config = config 32 | 33 | def value(self, time_ms=None): 34 | if time_ms is None: 35 | time_ms = time.time() * 1000 36 | return self.measurable.measure(self.config, time_ms) 37 | -------------------------------------------------------------------------------- /kafka/metrics/measurable.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import abc 4 | 5 | 6 | class AbstractMeasurable(object): 7 | """A measurable quantity that can be registered as a metric""" 8 | @abc.abstractmethod 9 | def measure(self, config, now): 10 | """ 11 | Measure this quantity and return the result 12 | 13 | Arguments: 14 | config (MetricConfig): The configuration for this metric 15 | now (int): The POSIX time in milliseconds the measurement 16 | is being taken 17 | 18 | Returns: 19 | The measured value 20 | """ 21 | raise NotImplementedError 22 | 23 | 24 | class AnonMeasurable(AbstractMeasurable): 25 | def __init__(self, measure_fn): 26 | self._measure_fn = measure_fn 27 | 28 | def measure(self, config, now): 29 | return float(self._measure_fn(config, now)) 30 | -------------------------------------------------------------------------------- /kafka/metrics/measurable_stat.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import abc 4 | 5 | from kafka.metrics.measurable import AbstractMeasurable 6 | from kafka.metrics.stat import AbstractStat 7 | 8 | 9 | class AbstractMeasurableStat(AbstractStat, AbstractMeasurable): 10 | """ 11 | An AbstractMeasurableStat is an AbstractStat that is also 12 | an AbstractMeasurable (i.e. can produce a single floating point value). 13 | This is the interface used for most of the simple statistics such 14 | as Avg, Max, Count, etc. 15 | """ 16 | __metaclass__ = abc.ABCMeta 17 | -------------------------------------------------------------------------------- /kafka/metrics/metric_config.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import sys 4 | 5 | 6 | class MetricConfig(object): 7 | """Configuration values for metrics""" 8 | def __init__(self, quota=None, samples=2, event_window=sys.maxsize, 9 | time_window_ms=30 * 1000, tags=None): 10 | """ 11 | Arguments: 12 | quota (Quota, optional): Upper or lower bound of a value. 13 | samples (int, optional): Max number of samples kept per metric. 14 | event_window (int, optional): Max number of values per sample. 15 | time_window_ms (int, optional): Max age of an individual sample. 16 | tags (dict of {str: str}, optional): Tags for each metric. 17 | """ 18 | self.quota = quota 19 | self._samples = samples 20 | self.event_window = event_window 21 | self.time_window_ms = time_window_ms 22 | # tags should be OrderedDict (not supported in py26) 23 | self.tags = tags if tags else {} 24 | 25 | @property 26 | def samples(self): 27 | return self._samples 28 | 29 | @samples.setter 30 | def samples(self, value): 31 | if value < 1: 32 | raise ValueError('The number of samples must be at least 1.') 33 | self._samples = value 34 | -------------------------------------------------------------------------------- /kafka/metrics/metric_name.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import copy 4 | 5 | 6 | class MetricName(object): 7 | """ 8 | This class encapsulates a metric's name, logical group and its 9 | related attributes (tags). 10 | 11 | group, tags parameters can be used to create unique metric names. 12 | e.g. domainName:type=group,key1=val1,key2=val2 13 | 14 | Usage looks something like this: 15 | 16 | # set up metrics: 17 | metric_tags = {'client-id': 'producer-1', 'topic': 'topic'} 18 | metric_config = MetricConfig(tags=metric_tags) 19 | 20 | # metrics is the global repository of metrics and sensors 21 | metrics = Metrics(metric_config) 22 | 23 | sensor = metrics.sensor('message-sizes') 24 | metric_name = metrics.metric_name('message-size-avg', 25 | 'producer-metrics', 26 | 'average message size') 27 | sensor.add(metric_name, Avg()) 28 | 29 | metric_name = metrics.metric_name('message-size-max', 30 | sensor.add(metric_name, Max()) 31 | 32 | tags = {'client-id': 'my-client', 'topic': 'my-topic'} 33 | metric_name = metrics.metric_name('message-size-min', 34 | 'producer-metrics', 35 | 'message minimum size', tags) 36 | sensor.add(metric_name, Min()) 37 | 38 | # as messages are sent we record the sizes 39 | sensor.record(message_size) 40 | """ 41 | 42 | def __init__(self, name, group, description=None, tags=None): 43 | """ 44 | Arguments: 45 | name (str): The name of the metric. 46 | group (str): The logical group name of the metrics to which this 47 | metric belongs. 48 | description (str, optional): A human-readable description to 49 | include in the metric. 50 | tags (dict, optional): Additional key/val attributes of the metric. 51 | """ 52 | if not (name and group): 53 | raise ValueError('name and group must be non-empty.') 54 | if tags is not None and not isinstance(tags, dict): 55 | raise ValueError('tags must be a dict if present.') 56 | 57 | self._name = name 58 | self._group = group 59 | self._description = description 60 | self._tags = copy.copy(tags) 61 | self._hash = 0 62 | 63 | @property 64 | def name(self): 65 | return self._name 66 | 67 | @property 68 | def group(self): 69 | return self._group 70 | 71 | @property 72 | def description(self): 73 | return self._description 74 | 75 | @property 76 | def tags(self): 77 | return copy.copy(self._tags) 78 | 79 | def __hash__(self): 80 | if self._hash != 0: 81 | return self._hash 82 | prime = 31 83 | result = 1 84 | result = prime * result + hash(self.group) 85 | result = prime * result + hash(self.name) 86 | tags_hash = hash(frozenset(self.tags.items())) if self.tags else 0 87 | result = prime * result + tags_hash 88 | self._hash = result 89 | return result 90 | 91 | def __eq__(self, other): 92 | if self is other: 93 | return True 94 | if other is None: 95 | return False 96 | return (type(self) == type(other) and 97 | self.group == other.group and 98 | self.name == other.name and 99 | self.tags == other.tags) 100 | 101 | def __ne__(self, other): 102 | return not self.__eq__(other) 103 | 104 | def __str__(self): 105 | return 'MetricName(name=%s, group=%s, description=%s, tags=%s)' % ( 106 | self.name, self.group, self.description, self.tags) 107 | -------------------------------------------------------------------------------- /kafka/metrics/metrics_reporter.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import abc 4 | 5 | 6 | class AbstractMetricsReporter(object): 7 | """ 8 | An abstract class to allow things to listen as new metrics 9 | are created so they can be reported. 10 | """ 11 | __metaclass__ = abc.ABCMeta 12 | 13 | @abc.abstractmethod 14 | def init(self, metrics): 15 | """ 16 | This is called when the reporter is first registered 17 | to initially register all existing metrics 18 | 19 | Arguments: 20 | metrics (list of KafkaMetric): All currently existing metrics 21 | """ 22 | raise NotImplementedError 23 | 24 | @abc.abstractmethod 25 | def metric_change(self, metric): 26 | """ 27 | This is called whenever a metric is updated or added 28 | 29 | Arguments: 30 | metric (KafkaMetric) 31 | """ 32 | raise NotImplementedError 33 | 34 | @abc.abstractmethod 35 | def metric_removal(self, metric): 36 | """ 37 | This is called whenever a metric is removed 38 | 39 | Arguments: 40 | metric (KafkaMetric) 41 | """ 42 | raise NotImplementedError 43 | 44 | @abc.abstractmethod 45 | def configure(self, configs): 46 | """ 47 | Configure this class with the given key-value pairs 48 | 49 | Arguments: 50 | configs (dict of {str, ?}) 51 | """ 52 | raise NotImplementedError 53 | 54 | @abc.abstractmethod 55 | def close(self): 56 | """Called when the metrics repository is closed.""" 57 | raise NotImplementedError 58 | -------------------------------------------------------------------------------- /kafka/metrics/quota.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | 4 | class Quota(object): 5 | """An upper or lower bound for metrics""" 6 | def __init__(self, bound, is_upper): 7 | self._bound = bound 8 | self._upper = is_upper 9 | 10 | @staticmethod 11 | def upper_bound(upper_bound): 12 | return Quota(upper_bound, True) 13 | 14 | @staticmethod 15 | def lower_bound(lower_bound): 16 | return Quota(lower_bound, False) 17 | 18 | def is_upper_bound(self): 19 | return self._upper 20 | 21 | @property 22 | def bound(self): 23 | return self._bound 24 | 25 | def is_acceptable(self, value): 26 | return ((self.is_upper_bound() and value <= self.bound) or 27 | (not self.is_upper_bound() and value >= self.bound)) 28 | 29 | def __hash__(self): 30 | prime = 31 31 | result = prime + self.bound 32 | return prime * result + self.is_upper_bound() 33 | 34 | def __eq__(self, other): 35 | if self is other: 36 | return True 37 | return (type(self) == type(other) and 38 | self.bound == other.bound and 39 | self.is_upper_bound() == other.is_upper_bound()) 40 | 41 | def __ne__(self, other): 42 | return not self.__eq__(other) 43 | -------------------------------------------------------------------------------- /kafka/metrics/stat.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import abc 4 | 5 | 6 | class AbstractStat(object): 7 | """ 8 | An AbstractStat is a quantity such as average, max, etc that is computed 9 | off the stream of updates to a sensor 10 | """ 11 | __metaclass__ = abc.ABCMeta 12 | 13 | @abc.abstractmethod 14 | def record(self, config, value, time_ms): 15 | """ 16 | Record the given value 17 | 18 | Arguments: 19 | config (MetricConfig): The configuration to use for this metric 20 | value (float): The value to record 21 | timeMs (int): The POSIX time in milliseconds this value occurred 22 | """ 23 | raise NotImplementedError 24 | -------------------------------------------------------------------------------- /kafka/metrics/stats/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from kafka.metrics.stats.avg import Avg 4 | from kafka.metrics.stats.count import Count 5 | from kafka.metrics.stats.histogram import Histogram 6 | from kafka.metrics.stats.max_stat import Max 7 | from kafka.metrics.stats.min_stat import Min 8 | from kafka.metrics.stats.percentile import Percentile 9 | from kafka.metrics.stats.percentiles import Percentiles 10 | from kafka.metrics.stats.rate import Rate 11 | from kafka.metrics.stats.sensor import Sensor 12 | from kafka.metrics.stats.total import Total 13 | 14 | __all__ = [ 15 | 'Avg', 'Count', 'Histogram', 'Max', 'Min', 'Percentile', 'Percentiles', 16 | 'Rate', 'Sensor', 'Total' 17 | ] 18 | -------------------------------------------------------------------------------- /kafka/metrics/stats/avg.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from kafka.metrics.stats.sampled_stat import AbstractSampledStat 4 | 5 | 6 | class Avg(AbstractSampledStat): 7 | """ 8 | An AbstractSampledStat that maintains a simple average over its samples. 9 | """ 10 | def __init__(self): 11 | super(Avg, self).__init__(0.0) 12 | 13 | def update(self, sample, config, value, now): 14 | sample.value += value 15 | 16 | def combine(self, samples, config, now): 17 | total_sum = 0 18 | total_count = 0 19 | for sample in samples: 20 | total_sum += sample.value 21 | total_count += sample.event_count 22 | if not total_count: 23 | return 0 24 | return float(total_sum) / total_count 25 | -------------------------------------------------------------------------------- /kafka/metrics/stats/count.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from kafka.metrics.stats.sampled_stat import AbstractSampledStat 4 | 5 | 6 | class Count(AbstractSampledStat): 7 | """ 8 | An AbstractSampledStat that maintains a simple count of what it has seen. 9 | """ 10 | def __init__(self): 11 | super(Count, self).__init__(0.0) 12 | 13 | def update(self, sample, config, value, now): 14 | sample.value += 1.0 15 | 16 | def combine(self, samples, config, now): 17 | return float(sum(sample.value for sample in samples)) 18 | -------------------------------------------------------------------------------- /kafka/metrics/stats/histogram.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import math 4 | 5 | 6 | class Histogram(object): 7 | def __init__(self, bin_scheme): 8 | self._hist = [0.0] * bin_scheme.bins 9 | self._count = 0.0 10 | self._bin_scheme = bin_scheme 11 | 12 | def record(self, value): 13 | self._hist[self._bin_scheme.to_bin(value)] += 1.0 14 | self._count += 1.0 15 | 16 | def value(self, quantile): 17 | if self._count == 0.0: 18 | return float('NaN') 19 | _sum = 0.0 20 | quant = float(quantile) 21 | for i, value in enumerate(self._hist[:-1]): 22 | _sum += value 23 | if _sum / self._count > quant: 24 | return self._bin_scheme.from_bin(i) 25 | return float('inf') 26 | 27 | @property 28 | def counts(self): 29 | return self._hist 30 | 31 | def clear(self): 32 | for i in range(self._hist): 33 | self._hist[i] = 0.0 34 | self._count = 0 35 | 36 | def __str__(self): 37 | values = ['%.10f:%.0f' % (self._bin_scheme.from_bin(i), value) for 38 | i, value in enumerate(self._hist[:-1])] 39 | values.append('%s:%s' % (float('inf'), self._hist[-1])) 40 | return '{%s}' % ','.join(values) 41 | 42 | class ConstantBinScheme(object): 43 | def __init__(self, bins, min_val, max_val): 44 | if bins < 2: 45 | raise ValueError('Must have at least 2 bins.') 46 | self._min = float(min_val) 47 | self._max = float(max_val) 48 | self._bins = int(bins) 49 | self._bucket_width = (max_val - min_val) / (bins - 2) 50 | 51 | @property 52 | def bins(self): 53 | return self._bins 54 | 55 | def from_bin(self, b): 56 | if b == 0: 57 | return float('-inf') 58 | elif b == self._bins - 1: 59 | return float('inf') 60 | else: 61 | return self._min + (b - 1) * self._bucket_width 62 | 63 | def to_bin(self, x): 64 | if x < self._min: 65 | return 0 66 | elif x > self._max: 67 | return self._bins - 1 68 | else: 69 | return int(((x - self._min) / self._bucket_width) + 1) 70 | 71 | class LinearBinScheme(object): 72 | def __init__(self, num_bins, max_val): 73 | self._bins = num_bins 74 | self._max = max_val 75 | self._scale = max_val / (num_bins * (num_bins - 1) / 2) 76 | 77 | @property 78 | def bins(self): 79 | return self._bins 80 | 81 | def from_bin(self, b): 82 | if b == self._bins - 1: 83 | return float('inf') 84 | else: 85 | unscaled = (b * (b + 1.0)) / 2.0 86 | return unscaled * self._scale 87 | 88 | def to_bin(self, x): 89 | if x < 0.0: 90 | raise ValueError('Values less than 0.0 not accepted.') 91 | elif x > self._max: 92 | return self._bins - 1 93 | else: 94 | scaled = x / self._scale 95 | return int(-0.5 + math.sqrt(2.0 * scaled + 0.25)) 96 | -------------------------------------------------------------------------------- /kafka/metrics/stats/max_stat.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from kafka.metrics.stats.sampled_stat import AbstractSampledStat 4 | 5 | 6 | class Max(AbstractSampledStat): 7 | """An AbstractSampledStat that gives the max over its samples.""" 8 | def __init__(self): 9 | super(Max, self).__init__(float('-inf')) 10 | 11 | def update(self, sample, config, value, now): 12 | sample.value = max(sample.value, value) 13 | 14 | def combine(self, samples, config, now): 15 | if not samples: 16 | return float('-inf') 17 | return float(max(sample.value for sample in samples)) 18 | -------------------------------------------------------------------------------- /kafka/metrics/stats/min_stat.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import sys 4 | 5 | from kafka.metrics.stats.sampled_stat import AbstractSampledStat 6 | 7 | 8 | class Min(AbstractSampledStat): 9 | """An AbstractSampledStat that gives the min over its samples.""" 10 | def __init__(self): 11 | super(Min, self).__init__(float(sys.maxsize)) 12 | 13 | def update(self, sample, config, value, now): 14 | sample.value = min(sample.value, value) 15 | 16 | def combine(self, samples, config, now): 17 | if not samples: 18 | return float(sys.maxsize) 19 | return float(min(sample.value for sample in samples)) 20 | -------------------------------------------------------------------------------- /kafka/metrics/stats/percentile.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | 4 | class Percentile(object): 5 | def __init__(self, metric_name, percentile): 6 | self._metric_name = metric_name 7 | self._percentile = float(percentile) 8 | 9 | @property 10 | def name(self): 11 | return self._metric_name 12 | 13 | @property 14 | def percentile(self): 15 | return self._percentile 16 | -------------------------------------------------------------------------------- /kafka/metrics/stats/percentiles.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from kafka.metrics import AnonMeasurable, NamedMeasurable 4 | from kafka.metrics.compound_stat import AbstractCompoundStat 5 | from kafka.metrics.stats import Histogram 6 | from kafka.metrics.stats.sampled_stat import AbstractSampledStat 7 | 8 | 9 | class BucketSizing(object): 10 | CONSTANT = 0 11 | LINEAR = 1 12 | 13 | 14 | class Percentiles(AbstractSampledStat, AbstractCompoundStat): 15 | """A compound stat that reports one or more percentiles""" 16 | def __init__(self, size_in_bytes, bucketing, max_val, min_val=0.0, 17 | percentiles=None): 18 | super(Percentiles, self).__init__(0.0) 19 | self._percentiles = percentiles or [] 20 | self._buckets = int(size_in_bytes / 4) 21 | if bucketing == BucketSizing.CONSTANT: 22 | self._bin_scheme = Histogram.ConstantBinScheme(self._buckets, 23 | min_val, max_val) 24 | elif bucketing == BucketSizing.LINEAR: 25 | if min_val != 0.0: 26 | raise ValueError('Linear bucket sizing requires min_val' 27 | ' to be 0.0.') 28 | self.bin_scheme = Histogram.LinearBinScheme(self._buckets, max_val) 29 | else: 30 | ValueError('Unknown bucket type: %s' % bucketing) 31 | 32 | def stats(self): 33 | measurables = [] 34 | 35 | def make_measure_fn(pct): 36 | return lambda config, now: self.value(config, now, 37 | pct / 100.0) 38 | 39 | for percentile in self._percentiles: 40 | measure_fn = make_measure_fn(percentile.percentile) 41 | stat = NamedMeasurable(percentile.name, AnonMeasurable(measure_fn)) 42 | measurables.append(stat) 43 | return measurables 44 | 45 | def value(self, config, now, quantile): 46 | self.purge_obsolete_samples(config, now) 47 | count = sum(sample.event_count for sample in self._samples) 48 | if count == 0.0: 49 | return float('NaN') 50 | sum_val = 0.0 51 | quant = float(quantile) 52 | for b in range(self._buckets): 53 | for sample in self._samples: 54 | assert type(sample) is self.HistogramSample 55 | hist = sample.histogram.counts 56 | sum_val += hist[b] 57 | if sum_val / count > quant: 58 | return self._bin_scheme.from_bin(b) 59 | return float('inf') 60 | 61 | def combine(self, samples, config, now): 62 | return self.value(config, now, 0.5) 63 | 64 | def new_sample(self, time_ms): 65 | return Percentiles.HistogramSample(self._bin_scheme, time_ms) 66 | 67 | def update(self, sample, config, value, time_ms): 68 | assert type(sample) is self.HistogramSample 69 | sample.histogram.record(value) 70 | 71 | class HistogramSample(AbstractSampledStat.Sample): 72 | def __init__(self, scheme, now): 73 | super(Percentiles.HistogramSample, self).__init__(0.0, now) 74 | self.histogram = Histogram(scheme) 75 | -------------------------------------------------------------------------------- /kafka/metrics/stats/sampled_stat.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import abc 4 | 5 | from kafka.metrics.measurable_stat import AbstractMeasurableStat 6 | 7 | 8 | class AbstractSampledStat(AbstractMeasurableStat): 9 | """ 10 | An AbstractSampledStat records a single scalar value measured over 11 | one or more samples. Each sample is recorded over a configurable 12 | window. The window can be defined by number of events or elapsed 13 | time (or both, if both are given the window is complete when 14 | *either* the event count or elapsed time criterion is met). 15 | 16 | All the samples are combined to produce the measurement. When a 17 | window is complete the oldest sample is cleared and recycled to 18 | begin recording the next sample. 19 | 20 | Subclasses of this class define different statistics measured 21 | using this basic pattern. 22 | """ 23 | __metaclass__ = abc.ABCMeta 24 | 25 | def __init__(self, initial_value): 26 | self._initial_value = initial_value 27 | self._samples = [] 28 | self._current = 0 29 | 30 | @abc.abstractmethod 31 | def update(self, sample, config, value, time_ms): 32 | raise NotImplementedError 33 | 34 | @abc.abstractmethod 35 | def combine(self, samples, config, now): 36 | raise NotImplementedError 37 | 38 | def record(self, config, value, time_ms): 39 | sample = self.current(time_ms) 40 | if sample.is_complete(time_ms, config): 41 | sample = self._advance(config, time_ms) 42 | self.update(sample, config, float(value), time_ms) 43 | sample.event_count += 1 44 | 45 | def new_sample(self, time_ms): 46 | return self.Sample(self._initial_value, time_ms) 47 | 48 | def measure(self, config, now): 49 | self.purge_obsolete_samples(config, now) 50 | return float(self.combine(self._samples, config, now)) 51 | 52 | def current(self, time_ms): 53 | if not self._samples: 54 | self._samples.append(self.new_sample(time_ms)) 55 | return self._samples[self._current] 56 | 57 | def oldest(self, now): 58 | if not self._samples: 59 | self._samples.append(self.new_sample(now)) 60 | oldest = self._samples[0] 61 | for sample in self._samples[1:]: 62 | if sample.last_window_ms < oldest.last_window_ms: 63 | oldest = sample 64 | return oldest 65 | 66 | def purge_obsolete_samples(self, config, now): 67 | """ 68 | Timeout any windows that have expired in the absence of any events 69 | """ 70 | expire_age = config.samples * config.time_window_ms 71 | for sample in self._samples: 72 | if now - sample.last_window_ms >= expire_age: 73 | sample.reset(now) 74 | 75 | def _advance(self, config, time_ms): 76 | self._current = (self._current + 1) % config.samples 77 | if self._current >= len(self._samples): 78 | sample = self.new_sample(time_ms) 79 | self._samples.append(sample) 80 | return sample 81 | else: 82 | sample = self.current(time_ms) 83 | sample.reset(time_ms) 84 | return sample 85 | 86 | class Sample(object): 87 | 88 | def __init__(self, initial_value, now): 89 | self.initial_value = initial_value 90 | self.event_count = 0 91 | self.last_window_ms = now 92 | self.value = initial_value 93 | 94 | def reset(self, now): 95 | self.event_count = 0 96 | self.last_window_ms = now 97 | self.value = self.initial_value 98 | 99 | def is_complete(self, time_ms, config): 100 | return (time_ms - self.last_window_ms >= config.time_window_ms or 101 | self.event_count >= config.event_window) 102 | -------------------------------------------------------------------------------- /kafka/metrics/stats/total.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from kafka.metrics.measurable_stat import AbstractMeasurableStat 4 | 5 | 6 | class Total(AbstractMeasurableStat): 7 | """An un-windowed cumulative total maintained over all time.""" 8 | def __init__(self, value=0.0): 9 | self._total = value 10 | 11 | def record(self, config, value, now): 12 | self._total += value 13 | 14 | def measure(self, config, now): 15 | return float(self._total) 16 | -------------------------------------------------------------------------------- /kafka/partitioner/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from kafka.partitioner.default import DefaultPartitioner 4 | from kafka.partitioner.hashed import HashedPartitioner, Murmur2Partitioner, LegacyPartitioner 5 | from kafka.partitioner.roundrobin import RoundRobinPartitioner 6 | 7 | __all__ = [ 8 | 'DefaultPartitioner', 'RoundRobinPartitioner', 'HashedPartitioner', 9 | 'Murmur2Partitioner', 'LegacyPartitioner' 10 | ] 11 | -------------------------------------------------------------------------------- /kafka/partitioner/base.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | 4 | class Partitioner(object): 5 | """ 6 | Base class for a partitioner 7 | """ 8 | def __init__(self, partitions=None): 9 | """ 10 | Initialize the partitioner 11 | 12 | Arguments: 13 | partitions: A list of available partitions (during startup) OPTIONAL. 14 | """ 15 | self.partitions = partitions 16 | 17 | def __call__(self, key, all_partitions=None, available_partitions=None): 18 | """ 19 | Takes a string key, num_partitions and available_partitions as argument and returns 20 | a partition to be used for the message 21 | 22 | Arguments: 23 | key: the key to use for partitioning. 24 | all_partitions: a list of the topic's partitions. 25 | available_partitions: a list of the broker's currently avaliable partitions(optional). 26 | """ 27 | raise NotImplementedError('partition function has to be implemented') 28 | -------------------------------------------------------------------------------- /kafka/partitioner/default.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import random 4 | 5 | from kafka.partitioner.hashed import murmur2 6 | 7 | 8 | class DefaultPartitioner(object): 9 | """Default partitioner. 10 | 11 | Hashes key to partition using murmur2 hashing (from java client) 12 | If key is None, selects partition randomly from available, 13 | or from all partitions if none are currently available 14 | """ 15 | @classmethod 16 | def __call__(cls, key, all_partitions, available): 17 | """ 18 | Get the partition corresponding to key 19 | :param key: partitioning key 20 | :param all_partitions: list of all partitions sorted by partition ID 21 | :param available: list of available partitions in no particular order 22 | :return: one of the values from all_partitions or available 23 | """ 24 | if key is None: 25 | if available: 26 | return random.choice(available) 27 | return random.choice(all_partitions) 28 | 29 | idx = murmur2(key) 30 | idx &= 0x7fffffff 31 | idx %= len(all_partitions) 32 | return all_partitions[idx] 33 | -------------------------------------------------------------------------------- /kafka/partitioner/hashed.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from kafka.vendor import six 4 | 5 | from kafka.partitioner.base import Partitioner 6 | 7 | 8 | class Murmur2Partitioner(Partitioner): 9 | """ 10 | Implements a partitioner which selects the target partition based on 11 | the hash of the key. Attempts to apply the same hashing 12 | function as mainline java client. 13 | """ 14 | def __call__(self, key, partitions=None, available=None): 15 | if available: 16 | return self.partition(key, available) 17 | return self.partition(key, partitions) 18 | 19 | def partition(self, key, partitions=None): 20 | if not partitions: 21 | partitions = self.partitions 22 | 23 | # https://github.com/apache/kafka/blob/0.8.2/clients/src/main/java/org/apache/kafka/clients/producer/internals/Partitioner.java#L69 24 | idx = (murmur2(key) & 0x7fffffff) % len(partitions) 25 | 26 | return partitions[idx] 27 | 28 | 29 | class LegacyPartitioner(object): 30 | """DEPRECATED -- See Issue 374 31 | 32 | Implements a partitioner which selects the target partition based on 33 | the hash of the key 34 | """ 35 | def __init__(self, partitions): 36 | self.partitions = partitions 37 | 38 | def partition(self, key, partitions=None): 39 | if not partitions: 40 | partitions = self.partitions 41 | size = len(partitions) 42 | idx = hash(key) % size 43 | 44 | return partitions[idx] 45 | 46 | 47 | # Default will change to Murmur2 in 0.10 release 48 | HashedPartitioner = LegacyPartitioner 49 | 50 | 51 | # https://github.com/apache/kafka/blob/0.8.2/clients/src/main/java/org/apache/kafka/common/utils/Utils.java#L244 52 | def murmur2(data): 53 | """Pure-python Murmur2 implementation. 54 | 55 | Based on java client, see org.apache.kafka.common.utils.Utils.murmur2 56 | 57 | Args: 58 | data (bytes): opaque bytes 59 | 60 | Returns: MurmurHash2 of data 61 | """ 62 | # Python2 bytes is really a str, causing the bitwise operations below to fail 63 | # so convert to bytearray. 64 | if six.PY2: 65 | data = bytearray(bytes(data)) 66 | 67 | length = len(data) 68 | seed = 0x9747b28c 69 | # 'm' and 'r' are mixing constants generated offline. 70 | # They're not really 'magic', they just happen to work well. 71 | m = 0x5bd1e995 72 | r = 24 73 | 74 | # Initialize the hash to a random value 75 | h = seed ^ length 76 | length4 = length // 4 77 | 78 | for i in range(length4): 79 | i4 = i * 4 80 | k = ((data[i4 + 0] & 0xff) + 81 | ((data[i4 + 1] & 0xff) << 8) + 82 | ((data[i4 + 2] & 0xff) << 16) + 83 | ((data[i4 + 3] & 0xff) << 24)) 84 | k &= 0xffffffff 85 | k *= m 86 | k &= 0xffffffff 87 | k ^= (k % 0x100000000) >> r # k ^= k >>> r 88 | k &= 0xffffffff 89 | k *= m 90 | k &= 0xffffffff 91 | 92 | h *= m 93 | h &= 0xffffffff 94 | h ^= k 95 | h &= 0xffffffff 96 | 97 | # Handle the last few bytes of the input array 98 | extra_bytes = length % 4 99 | if extra_bytes >= 3: 100 | h ^= (data[(length & ~3) + 2] & 0xff) << 16 101 | h &= 0xffffffff 102 | if extra_bytes >= 2: 103 | h ^= (data[(length & ~3) + 1] & 0xff) << 8 104 | h &= 0xffffffff 105 | if extra_bytes >= 1: 106 | h ^= (data[length & ~3] & 0xff) 107 | h &= 0xffffffff 108 | h *= m 109 | h &= 0xffffffff 110 | 111 | h ^= (h % 0x100000000) >> 13 # h >>> 13; 112 | h &= 0xffffffff 113 | h *= m 114 | h &= 0xffffffff 115 | h ^= (h % 0x100000000) >> 15 # h >>> 15; 116 | h &= 0xffffffff 117 | 118 | return h 119 | -------------------------------------------------------------------------------- /kafka/partitioner/roundrobin.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from kafka.partitioner.base import Partitioner 4 | 5 | 6 | class RoundRobinPartitioner(Partitioner): 7 | def __init__(self, partitions=None): 8 | self.partitions_iterable = CachedPartitionCycler(partitions) 9 | if partitions: 10 | self._set_partitions(partitions) 11 | else: 12 | self.partitions = None 13 | 14 | def __call__(self, key, all_partitions=None, available_partitions=None): 15 | if available_partitions: 16 | cur_partitions = available_partitions 17 | else: 18 | cur_partitions = all_partitions 19 | if not self.partitions: 20 | self._set_partitions(cur_partitions) 21 | elif cur_partitions != self.partitions_iterable.partitions and cur_partitions is not None: 22 | self._set_partitions(cur_partitions) 23 | return next(self.partitions_iterable) 24 | 25 | def _set_partitions(self, available_partitions): 26 | self.partitions = available_partitions 27 | self.partitions_iterable.set_partitions(available_partitions) 28 | 29 | def partition(self, key, all_partitions=None, available_partitions=None): 30 | return self.__call__(key, all_partitions, available_partitions) 31 | 32 | 33 | class CachedPartitionCycler(object): 34 | def __init__(self, partitions=None): 35 | self.partitions = partitions 36 | if partitions: 37 | assert type(partitions) is list 38 | self.cur_pos = None 39 | 40 | def __next__(self): 41 | return self.next() 42 | 43 | @staticmethod 44 | def _index_available(cur_pos, partitions): 45 | return cur_pos < len(partitions) 46 | 47 | def set_partitions(self, partitions): 48 | if self.cur_pos: 49 | if not self._index_available(self.cur_pos, partitions): 50 | self.cur_pos = 0 51 | self.partitions = partitions 52 | return None 53 | 54 | self.partitions = partitions 55 | next_item = self.partitions[self.cur_pos] 56 | if next_item in partitions: 57 | self.cur_pos = partitions.index(next_item) 58 | else: 59 | self.cur_pos = 0 60 | return None 61 | self.partitions = partitions 62 | 63 | def next(self): 64 | assert self.partitions is not None 65 | if self.cur_pos is None or not self._index_available(self.cur_pos, self.partitions): 66 | self.cur_pos = 1 67 | return self.partitions[0] 68 | cur_item = self.partitions[self.cur_pos] 69 | self.cur_pos += 1 70 | return cur_item 71 | -------------------------------------------------------------------------------- /kafka/producer/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from kafka.producer.kafka import KafkaProducer 4 | from kafka.producer.simple import SimpleProducer 5 | from kafka.producer.keyed import KeyedProducer 6 | 7 | __all__ = [ 8 | 'KafkaProducer', 9 | 'SimpleProducer', 'KeyedProducer' # deprecated 10 | ] 11 | -------------------------------------------------------------------------------- /kafka/producer/future.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import collections 4 | import threading 5 | 6 | from kafka import errors as Errors 7 | from kafka.future import Future 8 | 9 | 10 | class FutureProduceResult(Future): 11 | def __init__(self, topic_partition): 12 | super(FutureProduceResult, self).__init__() 13 | self.topic_partition = topic_partition 14 | self._latch = threading.Event() 15 | 16 | def success(self, value): 17 | ret = super(FutureProduceResult, self).success(value) 18 | self._latch.set() 19 | return ret 20 | 21 | def failure(self, error): 22 | ret = super(FutureProduceResult, self).failure(error) 23 | self._latch.set() 24 | return ret 25 | 26 | def wait(self, timeout=None): 27 | # wait() on python2.6 returns None instead of the flag value 28 | return self._latch.wait(timeout) or self._latch.is_set() 29 | 30 | 31 | class FutureRecordMetadata(Future): 32 | def __init__(self, produce_future, relative_offset, timestamp_ms, checksum, serialized_key_size, serialized_value_size, serialized_header_size): 33 | super(FutureRecordMetadata, self).__init__() 34 | self._produce_future = produce_future 35 | # packing args as a tuple is a minor speed optimization 36 | self.args = (relative_offset, timestamp_ms, checksum, serialized_key_size, serialized_value_size, serialized_header_size) 37 | produce_future.add_callback(self._produce_success) 38 | produce_future.add_errback(self.failure) 39 | 40 | def _produce_success(self, offset_and_timestamp): 41 | offset, produce_timestamp_ms = offset_and_timestamp 42 | 43 | # Unpacking from args tuple is minor speed optimization 44 | (relative_offset, timestamp_ms, checksum, 45 | serialized_key_size, serialized_value_size, serialized_header_size) = self.args 46 | 47 | # None is when Broker does not support the API (<0.10) and 48 | # -1 is when the broker is configured for CREATE_TIME timestamps 49 | if produce_timestamp_ms is not None and produce_timestamp_ms != -1: 50 | timestamp_ms = produce_timestamp_ms 51 | if offset != -1 and relative_offset is not None: 52 | offset += relative_offset 53 | tp = self._produce_future.topic_partition 54 | metadata = RecordMetadata(tp[0], tp[1], tp, offset, timestamp_ms, 55 | checksum, serialized_key_size, 56 | serialized_value_size, serialized_header_size) 57 | self.success(metadata) 58 | 59 | def get(self, timeout=None): 60 | if not self.is_done and not self._produce_future.wait(timeout): 61 | raise Errors.KafkaTimeoutError( 62 | "Timeout after waiting for %s secs." % timeout) 63 | assert self.is_done 64 | if self.failed(): 65 | raise self.exception # pylint: disable-msg=raising-bad-type 66 | return self.value 67 | 68 | 69 | RecordMetadata = collections.namedtuple( 70 | 'RecordMetadata', ['topic', 'partition', 'topic_partition', 'offset', 'timestamp', 71 | 'checksum', 'serialized_key_size', 'serialized_value_size', 'serialized_header_size']) 72 | -------------------------------------------------------------------------------- /kafka/producer/keyed.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import logging 4 | import warnings 5 | 6 | from kafka.producer.base import Producer 7 | from kafka.partitioner import HashedPartitioner 8 | 9 | 10 | log = logging.getLogger(__name__) 11 | 12 | 13 | class KeyedProducer(Producer): 14 | """ 15 | A producer which distributes messages to partitions based on the key 16 | 17 | See Producer class for Arguments 18 | 19 | Additional Arguments: 20 | partitioner: A partitioner class that will be used to get the partition 21 | to send the message to. Must be derived from Partitioner. 22 | Defaults to HashedPartitioner. 23 | """ 24 | def __init__(self, *args, **kwargs): 25 | self.partitioner_class = kwargs.pop('partitioner', HashedPartitioner) 26 | self.partitioners = {} 27 | super(KeyedProducer, self).__init__(*args, **kwargs) 28 | 29 | def _next_partition(self, topic, key): 30 | if topic not in self.partitioners: 31 | if not self.client.has_metadata_for_topic(topic): 32 | self.client.load_metadata_for_topics(topic, ignore_leadernotavailable=True) 33 | 34 | self.partitioners[topic] = self.partitioner_class(self.client.get_partition_ids_for_topic(topic)) 35 | 36 | partitioner = self.partitioners[topic] 37 | return partitioner.partition(key) 38 | 39 | def send_messages(self, topic, key, *msg): 40 | partition = self._next_partition(topic, key) 41 | return self._send_messages(topic, partition, *msg, key=key) 42 | 43 | # DEPRECATED 44 | def send(self, topic, key, msg): 45 | warnings.warn("KeyedProducer.send is deprecated in favor of send_messages", DeprecationWarning) 46 | return self.send_messages(topic, key, msg) 47 | 48 | def __repr__(self): 49 | return '' % self.async_send 50 | -------------------------------------------------------------------------------- /kafka/producer/simple.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from itertools import cycle 4 | import logging 5 | import random 6 | 7 | from kafka.vendor.six.moves import range 8 | 9 | from kafka.producer.base import Producer 10 | 11 | 12 | log = logging.getLogger(__name__) 13 | 14 | 15 | class SimpleProducer(Producer): 16 | """A simple, round-robin producer. 17 | 18 | See Producer class for Base Arguments 19 | 20 | Additional Arguments: 21 | random_start (bool, optional): randomize the initial partition which 22 | the first message block will be published to, otherwise 23 | if false, the first message block will always publish 24 | to partition 0 before cycling through each partition, 25 | defaults to True. 26 | """ 27 | def __init__(self, *args, **kwargs): 28 | self.partition_cycles = {} 29 | self.random_start = kwargs.pop('random_start', True) 30 | super(SimpleProducer, self).__init__(*args, **kwargs) 31 | 32 | def _next_partition(self, topic): 33 | if topic not in self.partition_cycles: 34 | if not self.client.has_metadata_for_topic(topic): 35 | self.client.ensure_topic_exists(topic) 36 | 37 | self.partition_cycles[topic] = cycle(self.client.get_partition_ids_for_topic(topic)) 38 | 39 | # Randomize the initial partition that is returned 40 | if self.random_start: 41 | num_partitions = len(self.client.get_partition_ids_for_topic(topic)) 42 | for _ in range(random.randint(0, num_partitions-1)): 43 | next(self.partition_cycles[topic]) 44 | 45 | return next(self.partition_cycles[topic]) 46 | 47 | def send_messages(self, topic, *msg): 48 | partition = self._next_partition(topic) 49 | return super(SimpleProducer, self).send_messages( 50 | topic, partition, *msg 51 | ) 52 | 53 | def __repr__(self): 54 | return '' % self.async_send 55 | -------------------------------------------------------------------------------- /kafka/protocol/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from kafka.protocol.legacy import ( 4 | create_message, create_gzip_message, 5 | create_snappy_message, create_message_set, 6 | CODEC_NONE, CODEC_GZIP, CODEC_SNAPPY, ALL_CODECS, 7 | ATTRIBUTE_CODEC_MASK, KafkaProtocol, 8 | ) 9 | 10 | API_KEYS = { 11 | 0: 'Produce', 12 | 1: 'Fetch', 13 | 2: 'ListOffsets', 14 | 3: 'Metadata', 15 | 4: 'LeaderAndIsr', 16 | 5: 'StopReplica', 17 | 6: 'UpdateMetadata', 18 | 7: 'ControlledShutdown', 19 | 8: 'OffsetCommit', 20 | 9: 'OffsetFetch', 21 | 10: 'FindCoordinator', 22 | 11: 'JoinGroup', 23 | 12: 'Heartbeat', 24 | 13: 'LeaveGroup', 25 | 14: 'SyncGroup', 26 | 15: 'DescribeGroups', 27 | 16: 'ListGroups', 28 | 17: 'SaslHandshake', 29 | 18: 'ApiVersions', 30 | 19: 'CreateTopics', 31 | 20: 'DeleteTopics', 32 | 21: 'DeleteRecords', 33 | 22: 'InitProducerId', 34 | 23: 'OffsetForLeaderEpoch', 35 | 24: 'AddPartitionsToTxn', 36 | 25: 'AddOffsetsToTxn', 37 | 26: 'EndTxn', 38 | 27: 'WriteTxnMarkers', 39 | 28: 'TxnOffsetCommit', 40 | 29: 'DescribeAcls', 41 | 30: 'CreateAcls', 42 | 31: 'DeleteAcls', 43 | 32: 'DescribeConfigs', 44 | 33: 'AlterConfigs', 45 | 36: 'SaslAuthenticate', 46 | 37: 'CreatePartitions', 47 | 38: 'CreateDelegationToken', 48 | 39: 'RenewDelegationToken', 49 | 40: 'ExpireDelegationToken', 50 | 41: 'DescribeDelegationToken', 51 | 42: 'DeleteGroups', 52 | } 53 | -------------------------------------------------------------------------------- /kafka/protocol/abstract.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import abc 4 | 5 | 6 | class AbstractType(object): 7 | __metaclass__ = abc.ABCMeta 8 | 9 | @abc.abstractmethod 10 | def encode(cls, value): # pylint: disable=no-self-argument 11 | pass 12 | 13 | @abc.abstractmethod 14 | def decode(cls, data): # pylint: disable=no-self-argument 15 | pass 16 | 17 | @classmethod 18 | def repr(cls, value): 19 | return repr(value) 20 | -------------------------------------------------------------------------------- /kafka/protocol/api.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import abc 4 | 5 | from kafka.protocol.struct import Struct 6 | from kafka.protocol.types import Int16, Int32, String, Schema 7 | 8 | 9 | class RequestHeader(Struct): 10 | SCHEMA = Schema( 11 | ('api_key', Int16), 12 | ('api_version', Int16), 13 | ('correlation_id', Int32), 14 | ('client_id', String('utf-8')) 15 | ) 16 | 17 | def __init__(self, request, correlation_id=0, client_id='kafka-python'): 18 | super(RequestHeader, self).__init__( 19 | request.API_KEY, request.API_VERSION, correlation_id, client_id 20 | ) 21 | 22 | 23 | class Request(Struct): 24 | __metaclass__ = abc.ABCMeta 25 | 26 | @abc.abstractproperty 27 | def API_KEY(self): 28 | """Integer identifier for api request""" 29 | pass 30 | 31 | @abc.abstractproperty 32 | def API_VERSION(self): 33 | """Integer of api request version""" 34 | pass 35 | 36 | @abc.abstractproperty 37 | def SCHEMA(self): 38 | """An instance of Schema() representing the request structure""" 39 | pass 40 | 41 | @abc.abstractproperty 42 | def RESPONSE_TYPE(self): 43 | """The Response class associated with the api request""" 44 | pass 45 | 46 | def expect_response(self): 47 | """Override this method if an api request does not always generate a response""" 48 | return True 49 | 50 | 51 | class Response(Struct): 52 | __metaclass__ = abc.ABCMeta 53 | 54 | @abc.abstractproperty 55 | def API_KEY(self): 56 | """Integer identifier for api request/response""" 57 | pass 58 | 59 | @abc.abstractproperty 60 | def API_VERSION(self): 61 | """Integer of api request/response version""" 62 | pass 63 | 64 | @abc.abstractproperty 65 | def SCHEMA(self): 66 | """An instance of Schema() representing the response structure""" 67 | pass 68 | -------------------------------------------------------------------------------- /kafka/protocol/frame.py: -------------------------------------------------------------------------------- 1 | class KafkaBytes(bytearray): 2 | def __init__(self, size): 3 | super(KafkaBytes, self).__init__(size) 4 | self._idx = 0 5 | 6 | def read(self, nbytes=None): 7 | if nbytes is None: 8 | nbytes = len(self) - self._idx 9 | start = self._idx 10 | self._idx += nbytes 11 | if self._idx > len(self): 12 | self._idx = len(self) 13 | return bytes(self[start:self._idx]) 14 | 15 | def write(self, data): 16 | start = self._idx 17 | self._idx += len(data) 18 | self[start:self._idx] = data 19 | 20 | def seek(self, idx): 21 | self._idx = idx 22 | 23 | def tell(self): 24 | return self._idx 25 | 26 | def __str__(self): 27 | return 'KafkaBytes(%d)' % len(self) 28 | 29 | def __repr__(self): 30 | return str(self) 31 | -------------------------------------------------------------------------------- /kafka/protocol/offset.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from kafka.protocol.api import Request, Response 4 | from kafka.protocol.types import Array, Int8, Int16, Int32, Int64, Schema, String 5 | 6 | UNKNOWN_OFFSET = -1 7 | 8 | 9 | class OffsetResetStrategy(object): 10 | LATEST = -1 11 | EARLIEST = -2 12 | NONE = 0 13 | 14 | 15 | class OffsetResponse_v0(Response): 16 | API_KEY = 2 17 | API_VERSION = 0 18 | SCHEMA = Schema( 19 | ('topics', Array( 20 | ('topic', String('utf-8')), 21 | ('partitions', Array( 22 | ('partition', Int32), 23 | ('error_code', Int16), 24 | ('offsets', Array(Int64)))))) 25 | ) 26 | 27 | class OffsetResponse_v1(Response): 28 | API_KEY = 2 29 | API_VERSION = 1 30 | SCHEMA = Schema( 31 | ('topics', Array( 32 | ('topic', String('utf-8')), 33 | ('partitions', Array( 34 | ('partition', Int32), 35 | ('error_code', Int16), 36 | ('timestamp', Int64), 37 | ('offset', Int64))))) 38 | ) 39 | 40 | 41 | class OffsetResponse_v2(Response): 42 | API_KEY = 2 43 | API_VERSION = 2 44 | SCHEMA = Schema( 45 | ('throttle_time_ms', Int32), 46 | ('topics', Array( 47 | ('topic', String('utf-8')), 48 | ('partitions', Array( 49 | ('partition', Int32), 50 | ('error_code', Int16), 51 | ('timestamp', Int64), 52 | ('offset', Int64))))) 53 | ) 54 | 55 | 56 | class OffsetRequest_v0(Request): 57 | API_KEY = 2 58 | API_VERSION = 0 59 | RESPONSE_TYPE = OffsetResponse_v0 60 | SCHEMA = Schema( 61 | ('replica_id', Int32), 62 | ('topics', Array( 63 | ('topic', String('utf-8')), 64 | ('partitions', Array( 65 | ('partition', Int32), 66 | ('timestamp', Int64), 67 | ('max_offsets', Int32))))) 68 | ) 69 | DEFAULTS = { 70 | 'replica_id': -1 71 | } 72 | 73 | class OffsetRequest_v1(Request): 74 | API_KEY = 2 75 | API_VERSION = 1 76 | RESPONSE_TYPE = OffsetResponse_v1 77 | SCHEMA = Schema( 78 | ('replica_id', Int32), 79 | ('topics', Array( 80 | ('topic', String('utf-8')), 81 | ('partitions', Array( 82 | ('partition', Int32), 83 | ('timestamp', Int64))))) 84 | ) 85 | DEFAULTS = { 86 | 'replica_id': -1 87 | } 88 | 89 | 90 | class OffsetRequest_v2(Request): 91 | API_KEY = 2 92 | API_VERSION = 2 93 | RESPONSE_TYPE = OffsetResponse_v2 94 | SCHEMA = Schema( 95 | ('replica_id', Int32), 96 | ('isolation_level', Int8), # <- added isolation_level 97 | ('topics', Array( 98 | ('topic', String('utf-8')), 99 | ('partitions', Array( 100 | ('partition', Int32), 101 | ('timestamp', Int64))))) 102 | ) 103 | DEFAULTS = { 104 | 'replica_id': -1 105 | } 106 | 107 | 108 | OffsetRequest = [OffsetRequest_v0, OffsetRequest_v1, OffsetRequest_v2] 109 | OffsetResponse = [OffsetResponse_v0, OffsetResponse_v1, OffsetResponse_v2] 110 | -------------------------------------------------------------------------------- /kafka/protocol/pickle.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | try: 4 | import copyreg # pylint: disable=import-error 5 | except ImportError: 6 | import copy_reg as copyreg # pylint: disable=import-error 7 | 8 | import types 9 | 10 | 11 | def _pickle_method(method): 12 | try: 13 | func_name = method.__func__.__name__ 14 | obj = method.__self__ 15 | cls = method.__self__.__class__ 16 | except AttributeError: 17 | func_name = method.im_func.__name__ 18 | obj = method.im_self 19 | cls = method.im_class 20 | 21 | return _unpickle_method, (func_name, obj, cls) 22 | 23 | 24 | def _unpickle_method(func_name, obj, cls): 25 | for cls in cls.mro(): 26 | try: 27 | func = cls.__dict__[func_name] 28 | except KeyError: 29 | pass 30 | else: 31 | break 32 | return func.__get__(obj, cls) 33 | 34 | # https://bytes.com/topic/python/answers/552476-why-cant-you-pickle-instancemethods 35 | copyreg.pickle(types.MethodType, _pickle_method, _unpickle_method) 36 | -------------------------------------------------------------------------------- /kafka/protocol/struct.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from io import BytesIO 4 | 5 | from kafka.protocol.abstract import AbstractType 6 | from kafka.protocol.types import Schema 7 | 8 | from kafka.util import WeakMethod 9 | 10 | 11 | class Struct(AbstractType): 12 | SCHEMA = Schema() 13 | 14 | def __init__(self, *args, **kwargs): 15 | if len(args) == len(self.SCHEMA.fields): 16 | for i, name in enumerate(self.SCHEMA.names): 17 | self.__dict__[name] = args[i] 18 | elif len(args) > 0: 19 | raise ValueError('Args must be empty or mirror schema') 20 | else: 21 | for name in self.SCHEMA.names: 22 | self.__dict__[name] = kwargs.pop(name, None) 23 | if kwargs: 24 | raise ValueError('Keyword(s) not in schema %s: %s' 25 | % (list(self.SCHEMA.names), 26 | ', '.join(kwargs.keys()))) 27 | 28 | # overloading encode() to support both class and instance 29 | # Without WeakMethod() this creates circular ref, which 30 | # causes instances to "leak" to garbage 31 | self.encode = WeakMethod(self._encode_self) 32 | 33 | @classmethod 34 | def encode(cls, item): # pylint: disable=E0202 35 | bits = [] 36 | for i, field in enumerate(cls.SCHEMA.fields): 37 | bits.append(field.encode(item[i])) 38 | return b''.join(bits) 39 | 40 | def _encode_self(self): 41 | return self.SCHEMA.encode( 42 | [self.__dict__[name] for name in self.SCHEMA.names] 43 | ) 44 | 45 | @classmethod 46 | def decode(cls, data): 47 | if isinstance(data, bytes): 48 | data = BytesIO(data) 49 | return cls(*[field.decode(data) for field in cls.SCHEMA.fields]) 50 | 51 | def __repr__(self): 52 | key_vals = [] 53 | for name, field in zip(self.SCHEMA.names, self.SCHEMA.fields): 54 | key_vals.append('%s=%s' % (name, field.repr(self.__dict__[name]))) 55 | return self.__class__.__name__ + '(' + ', '.join(key_vals) + ')' 56 | 57 | def __hash__(self): 58 | return hash(self.encode()) 59 | 60 | def __eq__(self, other): 61 | if self.SCHEMA != other.SCHEMA: 62 | return False 63 | for attr in self.SCHEMA.names: 64 | if self.__dict__[attr] != other.__dict__[attr]: 65 | return False 66 | return True 67 | 68 | """ 69 | class MetaStruct(type): 70 | def __new__(cls, clsname, bases, dct): 71 | nt = namedtuple(clsname, [name for (name, _) in dct['SCHEMA']]) 72 | bases = tuple([Struct, nt] + list(bases)) 73 | return super(MetaStruct, cls).__new__(cls, clsname, bases, dct) 74 | """ 75 | -------------------------------------------------------------------------------- /kafka/record/README: -------------------------------------------------------------------------------- 1 | Module structured mostly based on 2 | kafka/clients/src/main/java/org/apache/kafka/common/record/ module of Java 3 | Client. 4 | 5 | See abc.py for abstract declarations. `ABCRecords` is used as a facade to hide 6 | version differences. `ABCRecordBatch` subclasses will implement actual parsers 7 | for different versions (v0/v1 as LegacyBatch and v2 as DefaultBatch. Names 8 | taken from Java). 9 | -------------------------------------------------------------------------------- /kafka/record/__init__.py: -------------------------------------------------------------------------------- 1 | from kafka.record.memory_records import MemoryRecords, MemoryRecordsBuilder 2 | 3 | __all__ = ["MemoryRecords", "MemoryRecordsBuilder"] 4 | -------------------------------------------------------------------------------- /kafka/record/abc.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import abc 3 | 4 | 5 | class ABCRecord(object): 6 | __metaclass__ = abc.ABCMeta 7 | 8 | @abc.abstractproperty 9 | def offset(self): 10 | """ Absolute offset of record 11 | """ 12 | 13 | @abc.abstractproperty 14 | def timestamp(self): 15 | """ Epoch milliseconds 16 | """ 17 | 18 | @abc.abstractproperty 19 | def timestamp_type(self): 20 | """ CREATE_TIME(0) or APPEND_TIME(1) 21 | """ 22 | 23 | @abc.abstractproperty 24 | def key(self): 25 | """ Bytes key or None 26 | """ 27 | 28 | @abc.abstractproperty 29 | def value(self): 30 | """ Bytes value or None 31 | """ 32 | 33 | @abc.abstractproperty 34 | def checksum(self): 35 | """ Prior to v2 format CRC was contained in every message. This will 36 | be the checksum for v0 and v1 and None for v2 and above. 37 | """ 38 | 39 | @abc.abstractproperty 40 | def headers(self): 41 | """ If supported by version list of key-value tuples, or empty list if 42 | not supported by format. 43 | """ 44 | 45 | 46 | class ABCRecordBatchBuilder(object): 47 | __metaclass__ = abc.ABCMeta 48 | 49 | @abc.abstractmethod 50 | def append(self, offset, timestamp, key, value, headers=None): 51 | """ Writes record to internal buffer. 52 | 53 | Arguments: 54 | offset (int): Relative offset of record, starting from 0 55 | timestamp (int or None): Timestamp in milliseconds since beginning 56 | of the epoch (midnight Jan 1, 1970 (UTC)). If omitted, will be 57 | set to current time. 58 | key (bytes or None): Key of the record 59 | value (bytes or None): Value of the record 60 | headers (List[Tuple[str, bytes]]): Headers of the record. Header 61 | keys can not be ``None``. 62 | 63 | Returns: 64 | (bytes, int): Checksum of the written record (or None for v2 and 65 | above) and size of the written record. 66 | """ 67 | 68 | @abc.abstractmethod 69 | def size_in_bytes(self, offset, timestamp, key, value, headers): 70 | """ Return the expected size change on buffer (uncompressed) if we add 71 | this message. This will account for varint size changes and give a 72 | reliable size. 73 | """ 74 | 75 | @abc.abstractmethod 76 | def build(self): 77 | """ Close for append, compress if needed, write size and header and 78 | return a ready to send buffer object. 79 | 80 | Return: 81 | bytearray: finished batch, ready to send. 82 | """ 83 | 84 | 85 | class ABCRecordBatch(object): 86 | """ For v2 incapsulates a RecordBatch, for v0/v1 a single (maybe 87 | compressed) message. 88 | """ 89 | __metaclass__ = abc.ABCMeta 90 | 91 | @abc.abstractmethod 92 | def __iter__(self): 93 | """ Return iterator over records (ABCRecord instances). Will decompress 94 | if needed. 95 | """ 96 | 97 | 98 | class ABCRecords(object): 99 | __metaclass__ = abc.ABCMeta 100 | 101 | @abc.abstractmethod 102 | def __init__(self, buffer): 103 | """ Initialize with bytes-like object conforming to the buffer 104 | interface (ie. bytes, bytearray, memoryview etc.). 105 | """ 106 | 107 | @abc.abstractmethod 108 | def size_in_bytes(self): 109 | """ Returns the size of inner buffer. 110 | """ 111 | 112 | @abc.abstractmethod 113 | def next_batch(self): 114 | """ Return next batch of records (ABCRecordBatch instances). 115 | """ 116 | 117 | @abc.abstractmethod 118 | def has_next(self): 119 | """ True if there are more batches to read, False otherwise. 120 | """ 121 | -------------------------------------------------------------------------------- /kafka/serializer/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from kafka.serializer.abstract import Serializer, Deserializer 4 | -------------------------------------------------------------------------------- /kafka/serializer/abstract.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import abc 4 | 5 | 6 | class Serializer(object): 7 | __meta__ = abc.ABCMeta 8 | 9 | def __init__(self, **config): 10 | pass 11 | 12 | @abc.abstractmethod 13 | def serialize(self, topic, value): 14 | pass 15 | 16 | def close(self): 17 | pass 18 | 19 | 20 | class Deserializer(object): 21 | __meta__ = abc.ABCMeta 22 | 23 | def __init__(self, **config): 24 | pass 25 | 26 | @abc.abstractmethod 27 | def deserialize(self, topic, bytes_): 28 | pass 29 | 30 | def close(self): 31 | pass 32 | -------------------------------------------------------------------------------- /kafka/structs.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from collections import namedtuple 4 | 5 | 6 | # SimpleClient Payload Structs - Deprecated 7 | 8 | # https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-MetadataAPI 9 | MetadataRequest = namedtuple("MetadataRequest", 10 | ["topics"]) 11 | 12 | MetadataResponse = namedtuple("MetadataResponse", 13 | ["brokers", "topics"]) 14 | 15 | # https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-ConsumerMetadataRequest 16 | ConsumerMetadataRequest = namedtuple("ConsumerMetadataRequest", 17 | ["groups"]) 18 | 19 | ConsumerMetadataResponse = namedtuple("ConsumerMetadataResponse", 20 | ["error", "nodeId", "host", "port"]) 21 | 22 | # https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-ProduceAPI 23 | ProduceRequestPayload = namedtuple("ProduceRequestPayload", 24 | ["topic", "partition", "messages"]) 25 | 26 | ProduceResponsePayload = namedtuple("ProduceResponsePayload", 27 | ["topic", "partition", "error", "offset"]) 28 | 29 | # https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-FetchAPI 30 | FetchRequestPayload = namedtuple("FetchRequestPayload", 31 | ["topic", "partition", "offset", "max_bytes"]) 32 | 33 | FetchResponsePayload = namedtuple("FetchResponsePayload", 34 | ["topic", "partition", "error", "highwaterMark", "messages"]) 35 | 36 | # https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-OffsetAPI 37 | OffsetRequestPayload = namedtuple("OffsetRequestPayload", 38 | ["topic", "partition", "time", "max_offsets"]) 39 | 40 | ListOffsetRequestPayload = namedtuple("ListOffsetRequestPayload", 41 | ["topic", "partition", "time"]) 42 | 43 | OffsetResponsePayload = namedtuple("OffsetResponsePayload", 44 | ["topic", "partition", "error", "offsets"]) 45 | 46 | ListOffsetResponsePayload = namedtuple("ListOffsetResponsePayload", 47 | ["topic", "partition", "error", "timestamp", "offset"]) 48 | 49 | # https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-OffsetCommit/FetchAPI 50 | OffsetCommitRequestPayload = namedtuple("OffsetCommitRequestPayload", 51 | ["topic", "partition", "offset", "metadata"]) 52 | 53 | OffsetCommitResponsePayload = namedtuple("OffsetCommitResponsePayload", 54 | ["topic", "partition", "error"]) 55 | 56 | OffsetFetchRequestPayload = namedtuple("OffsetFetchRequestPayload", 57 | ["topic", "partition"]) 58 | 59 | OffsetFetchResponsePayload = namedtuple("OffsetFetchResponsePayload", 60 | ["topic", "partition", "offset", "metadata", "error"]) 61 | 62 | 63 | 64 | # Other useful structs 65 | TopicPartition = namedtuple("TopicPartition", 66 | ["topic", "partition"]) 67 | 68 | BrokerMetadata = namedtuple("BrokerMetadata", 69 | ["nodeId", "host", "port", "rack"]) 70 | 71 | PartitionMetadata = namedtuple("PartitionMetadata", 72 | ["topic", "partition", "leader", "replicas", "isr", "error"]) 73 | 74 | OffsetAndMetadata = namedtuple("OffsetAndMetadata", 75 | ["offset", "metadata"]) 76 | 77 | OffsetAndTimestamp = namedtuple("OffsetAndTimestamp", 78 | ["offset", "timestamp"]) 79 | 80 | 81 | # Deprecated structs 82 | OffsetAndMessage = namedtuple("OffsetAndMessage", 83 | ["offset", "message"]) 84 | 85 | Message = namedtuple("Message", 86 | ["magic", "attributes", "key", "value"]) 87 | 88 | KafkaMessage = namedtuple("KafkaMessage", 89 | ["topic", "partition", "offset", "key", "value"]) 90 | 91 | 92 | # Define retry policy for async producer 93 | # Limit value: int >= 0, 0 means no retries 94 | RetryOptions = namedtuple("RetryOptions", 95 | ["limit", "backoff_ms", "retry_on_timeouts"]) 96 | -------------------------------------------------------------------------------- /kafka/vendor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robinhood/kafka-python/3689da3d5c02e362d872cf1fb2d65201419c4b93/kafka/vendor/__init__.py -------------------------------------------------------------------------------- /kafka/vendor/socketpair.py: -------------------------------------------------------------------------------- 1 | # pylint: skip-file 2 | # vendored from https://github.com/mhils/backports.socketpair 3 | from __future__ import absolute_import 4 | 5 | import sys 6 | import socket 7 | import errno 8 | 9 | _LOCALHOST = '127.0.0.1' 10 | _LOCALHOST_V6 = '::1' 11 | 12 | if not hasattr(socket, "socketpair"): 13 | # Origin: https://gist.github.com/4325783, by Geert Jansen. Public domain. 14 | def socketpair(family=socket.AF_INET, type=socket.SOCK_STREAM, proto=0): 15 | if family == socket.AF_INET: 16 | host = _LOCALHOST 17 | elif family == socket.AF_INET6: 18 | host = _LOCALHOST_V6 19 | else: 20 | raise ValueError("Only AF_INET and AF_INET6 socket address families " 21 | "are supported") 22 | if type != socket.SOCK_STREAM: 23 | raise ValueError("Only SOCK_STREAM socket type is supported") 24 | if proto != 0: 25 | raise ValueError("Only protocol zero is supported") 26 | 27 | # We create a connected TCP socket. Note the trick with 28 | # setblocking(False) that prevents us from having to create a thread. 29 | lsock = socket.socket(family, type, proto) 30 | try: 31 | lsock.bind((host, 0)) 32 | lsock.listen(min(socket.SOMAXCONN, 128)) 33 | # On IPv6, ignore flow_info and scope_id 34 | addr, port = lsock.getsockname()[:2] 35 | csock = socket.socket(family, type, proto) 36 | try: 37 | csock.setblocking(False) 38 | if sys.version_info >= (3, 0): 39 | try: 40 | csock.connect((addr, port)) 41 | except (BlockingIOError, InterruptedError): 42 | pass 43 | else: 44 | try: 45 | csock.connect((addr, port)) 46 | except socket.error as e: 47 | if e.errno != errno.WSAEWOULDBLOCK: 48 | raise 49 | csock.setblocking(True) 50 | ssock, _ = lsock.accept() 51 | except Exception: 52 | csock.close() 53 | raise 54 | finally: 55 | lsock.close() 56 | return (ssock, csock) 57 | 58 | socket.socketpair = socketpair 59 | -------------------------------------------------------------------------------- /kafka/version.py: -------------------------------------------------------------------------------- 1 | __version__ = '1.4.4.dev' 2 | -------------------------------------------------------------------------------- /pylint.rc: -------------------------------------------------------------------------------- 1 | [TYPECHECK] 2 | ignored-classes=SyncManager,_socketobject 3 | ignored-modules=kafka.vendor.six.moves 4 | generated-members=py.* 5 | 6 | [MESSAGES CONTROL] 7 | disable=E1129 8 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | flake8==3.4.1 2 | pytest==3.4.0 3 | pytest-cov==2.5.1 4 | docker-py==1.10.6 5 | coveralls==1.2.0 6 | Sphinx==1.6.4 7 | lz4==0.19.1 8 | xxhash==1.0.1 9 | python-snappy==0.5.1 10 | tox==2.9.1 11 | pylint==1.8.2 12 | pytest-pylint==0.7.1 13 | pytest-mock==1.6.3 14 | sphinx-rtd-theme==0.2.4 15 | crc32c==1.2 16 | -------------------------------------------------------------------------------- /servers/0.10.0.0/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | log4j.rootLogger=INFO, stdout, logfile 17 | 18 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 19 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 20 | log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n 21 | 22 | log4j.appender.logfile=org.apache.log4j.FileAppender 23 | log4j.appender.logfile.File=${kafka.logs.dir}/server.log 24 | log4j.appender.logfile.layout=org.apache.log4j.PatternLayout 25 | log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n 26 | -------------------------------------------------------------------------------- /servers/0.10.0.0/resources/zookeeper.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # the directory where the snapshot is stored. 16 | dataDir={tmp_dir} 17 | # the port at which the clients will connect 18 | clientPort={port} 19 | clientPortAddress={host} 20 | # disable the per-ip limit on the number of connections since this is a non-production config 21 | maxClientCnxns=0 22 | -------------------------------------------------------------------------------- /servers/0.10.0.1/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | log4j.rootLogger=INFO, stdout, logfile 17 | 18 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 19 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 20 | log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n 21 | 22 | log4j.appender.logfile=org.apache.log4j.FileAppender 23 | log4j.appender.logfile.File=${kafka.logs.dir}/server.log 24 | log4j.appender.logfile.layout=org.apache.log4j.PatternLayout 25 | log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n 26 | -------------------------------------------------------------------------------- /servers/0.10.0.1/resources/zookeeper.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # the directory where the snapshot is stored. 16 | dataDir={tmp_dir} 17 | # the port at which the clients will connect 18 | clientPort={port} 19 | clientPortAddress={host} 20 | # disable the per-ip limit on the number of connections since this is a non-production config 21 | maxClientCnxns=0 22 | -------------------------------------------------------------------------------- /servers/0.10.1.1/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | log4j.rootLogger=INFO, stdout, logfile 17 | 18 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 19 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 20 | log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n 21 | 22 | log4j.appender.logfile=org.apache.log4j.FileAppender 23 | log4j.appender.logfile.File=${kafka.logs.dir}/server.log 24 | log4j.appender.logfile.layout=org.apache.log4j.PatternLayout 25 | log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n 26 | -------------------------------------------------------------------------------- /servers/0.10.1.1/resources/zookeeper.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # the directory where the snapshot is stored. 16 | dataDir={tmp_dir} 17 | # the port at which the clients will connect 18 | clientPort={port} 19 | clientPortAddress={host} 20 | # disable the per-ip limit on the number of connections since this is a non-production config 21 | maxClientCnxns=0 22 | -------------------------------------------------------------------------------- /servers/0.10.2.1/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | log4j.rootLogger=INFO, stdout, logfile 17 | 18 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 19 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 20 | log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n 21 | 22 | log4j.appender.logfile=org.apache.log4j.FileAppender 23 | log4j.appender.logfile.File=${kafka.logs.dir}/server.log 24 | log4j.appender.logfile.layout=org.apache.log4j.PatternLayout 25 | log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n 26 | -------------------------------------------------------------------------------- /servers/0.10.2.1/resources/zookeeper.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # the directory where the snapshot is stored. 16 | dataDir={tmp_dir} 17 | # the port at which the clients will connect 18 | clientPort={port} 19 | clientPortAddress={host} 20 | # disable the per-ip limit on the number of connections since this is a non-production config 21 | maxClientCnxns=0 22 | -------------------------------------------------------------------------------- /servers/0.11.0.0/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | log4j.rootLogger=INFO, stdout, logfile 17 | 18 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 19 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 20 | log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n 21 | 22 | log4j.appender.logfile=org.apache.log4j.FileAppender 23 | log4j.appender.logfile.File=${kafka.logs.dir}/server.log 24 | log4j.appender.logfile.layout=org.apache.log4j.PatternLayout 25 | log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n 26 | -------------------------------------------------------------------------------- /servers/0.11.0.0/resources/zookeeper.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # the directory where the snapshot is stored. 16 | dataDir={tmp_dir} 17 | # the port at which the clients will connect 18 | clientPort={port} 19 | clientPortAddress={host} 20 | # disable the per-ip limit on the number of connections since this is a non-production config 21 | maxClientCnxns=0 22 | -------------------------------------------------------------------------------- /servers/0.11.0.1/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | log4j.rootLogger=INFO, stdout, logfile 17 | 18 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 19 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 20 | log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n 21 | 22 | log4j.appender.logfile=org.apache.log4j.FileAppender 23 | log4j.appender.logfile.File=${kafka.logs.dir}/server.log 24 | log4j.appender.logfile.layout=org.apache.log4j.PatternLayout 25 | log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n 26 | -------------------------------------------------------------------------------- /servers/0.11.0.1/resources/zookeeper.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # the directory where the snapshot is stored. 16 | dataDir={tmp_dir} 17 | # the port at which the clients will connect 18 | clientPort={port} 19 | clientPortAddress={host} 20 | # disable the per-ip limit on the number of connections since this is a non-production config 21 | maxClientCnxns=0 22 | -------------------------------------------------------------------------------- /servers/0.11.0.2/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | log4j.rootLogger=INFO, stdout, logfile 17 | 18 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 19 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 20 | log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n 21 | 22 | log4j.appender.logfile=org.apache.log4j.FileAppender 23 | log4j.appender.logfile.File=${kafka.logs.dir}/server.log 24 | log4j.appender.logfile.layout=org.apache.log4j.PatternLayout 25 | log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n 26 | -------------------------------------------------------------------------------- /servers/0.11.0.2/resources/zookeeper.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # the directory where the snapshot is stored. 16 | dataDir={tmp_dir} 17 | # the port at which the clients will connect 18 | clientPort={port} 19 | clientPortAddress={host} 20 | # disable the per-ip limit on the number of connections since this is a non-production config 21 | maxClientCnxns=0 22 | -------------------------------------------------------------------------------- /servers/0.8.0/resources/kafka.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | ############################# Server Basics ############################# 17 | 18 | broker.id={broker_id} 19 | 20 | ############################# Socket Server Settings ############################# 21 | 22 | port={port} 23 | host.name={host} 24 | 25 | num.network.threads=2 26 | num.io.threads=2 27 | 28 | socket.send.buffer.bytes=1048576 29 | socket.receive.buffer.bytes=1048576 30 | socket.request.max.bytes=104857600 31 | 32 | ############################# Log Basics ############################# 33 | 34 | log.dirs={tmp_dir}/data 35 | num.partitions={partitions} 36 | default.replication.factor={replicas} 37 | 38 | ## Short Replica Lag -- Drops failed brokers out of ISR 39 | replica.lag.time.max.ms=1000 40 | replica.socket.timeout.ms=1000 41 | 42 | ############################# Log Flush Policy ############################# 43 | 44 | log.flush.interval.messages=10000 45 | log.flush.interval.ms=1000 46 | 47 | ############################# Log Retention Policy ############################# 48 | 49 | log.retention.hours=168 50 | log.segment.bytes=536870912 51 | log.cleanup.interval.mins=1 52 | 53 | ############################# Zookeeper ############################# 54 | 55 | zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} 56 | 57 | # Timeout in ms for connecting to zookeeper 58 | zookeeper.connection.timeout.ms=1000000 59 | # We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly 60 | zookeeper.session.timeout.ms=500 61 | 62 | kafka.metrics.polling.interval.secs=5 63 | kafka.metrics.reporters=kafka.metrics.KafkaCSVMetricsReporter 64 | kafka.csv.metrics.dir={tmp_dir} 65 | kafka.csv.metrics.reporter.enabled=false 66 | 67 | log.cleanup.policy=delete 68 | -------------------------------------------------------------------------------- /servers/0.8.0/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | log4j.rootLogger=INFO, stdout, logfile 17 | 18 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 19 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 20 | log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n 21 | 22 | log4j.appender.logfile=org.apache.log4j.FileAppender 23 | log4j.appender.logfile.File=${kafka.logs.dir}/server.log 24 | log4j.appender.logfile.layout=org.apache.log4j.PatternLayout 25 | log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n 26 | -------------------------------------------------------------------------------- /servers/0.8.0/resources/zookeeper.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | dataDir={tmp_dir} 17 | clientPortAddress={host} 18 | clientPort={port} 19 | maxClientCnxns=0 20 | -------------------------------------------------------------------------------- /servers/0.8.1.1/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | log4j.rootLogger=INFO, stdout, logfile 17 | 18 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 19 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 20 | log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n 21 | 22 | log4j.appender.logfile=org.apache.log4j.FileAppender 23 | log4j.appender.logfile.File=${kafka.logs.dir}/server.log 24 | log4j.appender.logfile.layout=org.apache.log4j.PatternLayout 25 | log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n 26 | -------------------------------------------------------------------------------- /servers/0.8.1.1/resources/zookeeper.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # the directory where the snapshot is stored. 16 | dataDir={tmp_dir} 17 | # the port at which the clients will connect 18 | clientPort={port} 19 | clientPortAddress={host} 20 | # disable the per-ip limit on the number of connections since this is a non-production config 21 | maxClientCnxns=0 22 | -------------------------------------------------------------------------------- /servers/0.8.1/resources/kafka.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | ############################# Server Basics ############################# 17 | 18 | broker.id={broker_id} 19 | 20 | ############################# Socket Server Settings ############################# 21 | 22 | port={port} 23 | host.name={host} 24 | 25 | num.network.threads=2 26 | num.io.threads=2 27 | 28 | socket.send.buffer.bytes=1048576 29 | socket.receive.buffer.bytes=1048576 30 | socket.request.max.bytes=104857600 31 | 32 | ############################# Log Basics ############################# 33 | 34 | log.dirs={tmp_dir}/data 35 | num.partitions={partitions} 36 | default.replication.factor={replicas} 37 | 38 | ## Short Replica Lag -- Drops failed brokers out of ISR 39 | replica.lag.time.max.ms=1000 40 | replica.socket.timeout.ms=1000 41 | 42 | ############################# Log Flush Policy ############################# 43 | 44 | log.flush.interval.messages=10000 45 | log.flush.interval.ms=1000 46 | 47 | ############################# Log Retention Policy ############################# 48 | 49 | log.retention.hours=168 50 | log.segment.bytes=536870912 51 | log.retention.check.interval.ms=60000 52 | log.cleanup.interval.mins=1 53 | log.cleaner.enable=false 54 | 55 | ############################# Zookeeper ############################# 56 | 57 | # Zookeeper connection string (see zookeeper docs for details). 58 | # This is a comma separated host:port pairs, each corresponding to a zk 59 | # server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". 60 | # You can also append an optional chroot string to the urls to specify the 61 | # root directory for all kafka znodes. 62 | zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} 63 | 64 | # Timeout in ms for connecting to zookeeper 65 | zookeeper.connection.timeout.ms=1000000 66 | # We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly 67 | zookeeper.session.timeout.ms=500 68 | -------------------------------------------------------------------------------- /servers/0.8.1/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | log4j.rootLogger=INFO, stdout, logfile 17 | 18 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 19 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 20 | log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n 21 | 22 | log4j.appender.logfile=org.apache.log4j.FileAppender 23 | log4j.appender.logfile.File=${kafka.logs.dir}/server.log 24 | log4j.appender.logfile.layout=org.apache.log4j.PatternLayout 25 | log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n 26 | -------------------------------------------------------------------------------- /servers/0.8.1/resources/zookeeper.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | dataDir={tmp_dir} 17 | clientPortAddress={host} 18 | clientPort={port} 19 | maxClientCnxns=0 20 | -------------------------------------------------------------------------------- /servers/0.8.2.0/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | log4j.rootLogger=INFO, stdout, logfile 17 | 18 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 19 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 20 | log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n 21 | 22 | log4j.appender.logfile=org.apache.log4j.FileAppender 23 | log4j.appender.logfile.File=${kafka.logs.dir}/server.log 24 | log4j.appender.logfile.layout=org.apache.log4j.PatternLayout 25 | log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n 26 | -------------------------------------------------------------------------------- /servers/0.8.2.0/resources/zookeeper.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # the directory where the snapshot is stored. 16 | dataDir={tmp_dir} 17 | # the port at which the clients will connect 18 | clientPort={port} 19 | clientPortAddress={host} 20 | # disable the per-ip limit on the number of connections since this is a non-production config 21 | maxClientCnxns=0 22 | -------------------------------------------------------------------------------- /servers/0.8.2.1/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | log4j.rootLogger=INFO, stdout, logfile 17 | 18 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 19 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 20 | log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n 21 | 22 | log4j.appender.logfile=org.apache.log4j.FileAppender 23 | log4j.appender.logfile.File=${kafka.logs.dir}/server.log 24 | log4j.appender.logfile.layout=org.apache.log4j.PatternLayout 25 | log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n 26 | -------------------------------------------------------------------------------- /servers/0.8.2.1/resources/zookeeper.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # the directory where the snapshot is stored. 16 | dataDir={tmp_dir} 17 | # the port at which the clients will connect 18 | clientPort={port} 19 | clientPortAddress={host} 20 | # disable the per-ip limit on the number of connections since this is a non-production config 21 | maxClientCnxns=0 22 | -------------------------------------------------------------------------------- /servers/0.8.2.2/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | log4j.rootLogger=INFO, stdout, logfile 17 | 18 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 19 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 20 | log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n 21 | 22 | log4j.appender.logfile=org.apache.log4j.FileAppender 23 | log4j.appender.logfile.File=${kafka.logs.dir}/server.log 24 | log4j.appender.logfile.layout=org.apache.log4j.PatternLayout 25 | log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n 26 | -------------------------------------------------------------------------------- /servers/0.8.2.2/resources/zookeeper.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # the directory where the snapshot is stored. 16 | dataDir={tmp_dir} 17 | # the port at which the clients will connect 18 | clientPort={port} 19 | clientPortAddress={host} 20 | # disable the per-ip limit on the number of connections since this is a non-production config 21 | maxClientCnxns=0 22 | -------------------------------------------------------------------------------- /servers/0.9.0.0/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | log4j.rootLogger=INFO, stdout, logfile 17 | 18 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 19 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 20 | log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n 21 | 22 | log4j.appender.logfile=org.apache.log4j.FileAppender 23 | log4j.appender.logfile.File=${kafka.logs.dir}/server.log 24 | log4j.appender.logfile.layout=org.apache.log4j.PatternLayout 25 | log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n 26 | -------------------------------------------------------------------------------- /servers/0.9.0.0/resources/zookeeper.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # the directory where the snapshot is stored. 16 | dataDir={tmp_dir} 17 | # the port at which the clients will connect 18 | clientPort={port} 19 | clientPortAddress={host} 20 | # disable the per-ip limit on the number of connections since this is a non-production config 21 | maxClientCnxns=0 22 | -------------------------------------------------------------------------------- /servers/0.9.0.1/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | log4j.rootLogger=INFO, stdout, logfile 17 | 18 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 19 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 20 | log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n 21 | 22 | log4j.appender.logfile=org.apache.log4j.FileAppender 23 | log4j.appender.logfile.File=${kafka.logs.dir}/server.log 24 | log4j.appender.logfile.layout=org.apache.log4j.PatternLayout 25 | log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n 26 | -------------------------------------------------------------------------------- /servers/0.9.0.1/resources/zookeeper.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # the directory where the snapshot is stored. 16 | dataDir={tmp_dir} 17 | # the port at which the clients will connect 18 | clientPort={port} 19 | clientPortAddress={host} 20 | # disable the per-ip limit on the number of connections since this is a non-production config 21 | maxClientCnxns=0 22 | -------------------------------------------------------------------------------- /servers/1.0.0/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | log4j.rootLogger=INFO, stdout, logfile 17 | 18 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 19 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 20 | log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n 21 | 22 | log4j.appender.logfile=org.apache.log4j.FileAppender 23 | log4j.appender.logfile.File=${kafka.logs.dir}/server.log 24 | log4j.appender.logfile.layout=org.apache.log4j.PatternLayout 25 | log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n 26 | -------------------------------------------------------------------------------- /servers/1.0.0/resources/zookeeper.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # the directory where the snapshot is stored. 16 | dataDir={tmp_dir} 17 | # the port at which the clients will connect 18 | clientPort={port} 19 | clientPortAddress={host} 20 | # disable the per-ip limit on the number of connections since this is a non-production config 21 | maxClientCnxns=0 22 | -------------------------------------------------------------------------------- /servers/1.0.1/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | log4j.rootLogger=INFO, stdout, logfile 17 | 18 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 19 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 20 | log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n 21 | 22 | log4j.appender.logfile=org.apache.log4j.FileAppender 23 | log4j.appender.logfile.File=${kafka.logs.dir}/server.log 24 | log4j.appender.logfile.layout=org.apache.log4j.PatternLayout 25 | log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n 26 | -------------------------------------------------------------------------------- /servers/1.0.1/resources/zookeeper.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # the directory where the snapshot is stored. 16 | dataDir={tmp_dir} 17 | # the port at which the clients will connect 18 | clientPort={port} 19 | clientPortAddress={host} 20 | # disable the per-ip limit on the number of connections since this is a non-production config 21 | maxClientCnxns=0 22 | -------------------------------------------------------------------------------- /servers/trunk/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | log4j.rootLogger=INFO, stdout 17 | 18 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 19 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 20 | log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n 21 | 22 | log4j.logger.kafka=DEBUG, stdout 23 | log4j.logger.org.I0Itec.zkclient.ZkClient=INFO, stdout 24 | log4j.logger.org.apache.zookeeper=INFO, stdout 25 | -------------------------------------------------------------------------------- /servers/trunk/resources/zookeeper.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # the directory where the snapshot is stored. 16 | dataDir={tmp_dir} 17 | # the port at which the clients will connect 18 | clientPort={port} 19 | clientPortAddress={host} 20 | # disable the per-ip limit on the number of connections since this is a non-production config 21 | maxClientCnxns=0 22 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal=1 3 | 4 | [metadata] 5 | license_file = LICENSE 6 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | from setuptools import setup, Command, find_packages 5 | 6 | # Pull version from source without importing 7 | # since we can't import something we haven't built yet :) 8 | exec(open('kafka/version.py').read()) 9 | 10 | class Tox(Command): 11 | 12 | user_options = [] 13 | 14 | def initialize_options(self): 15 | pass 16 | 17 | def finalize_options(self): 18 | pass 19 | 20 | @classmethod 21 | def run(cls): 22 | import tox 23 | sys.exit(tox.cmdline([])) 24 | 25 | 26 | test_require = ['tox', 'mock'] 27 | if sys.version_info < (2, 7): 28 | test_require.append('unittest2') 29 | 30 | here = os.path.abspath(os.path.dirname(__file__)) 31 | 32 | with open(os.path.join(here, 'README.rst')) as f: 33 | README = f.read() 34 | 35 | setup( 36 | name="kafka-python", 37 | version=__version__, 38 | 39 | tests_require=test_require, 40 | cmdclass={"test": Tox}, 41 | packages=find_packages(exclude=['test']), 42 | author="Dana Powers", 43 | author_email="dana.powers@gmail.com", 44 | url="https://github.com/dpkp/kafka-python", 45 | license="Apache License 2.0", 46 | description="Pure Python client for Apache Kafka", 47 | long_description=README, 48 | keywords="apache kafka", 49 | classifiers=[ 50 | "Development Status :: 5 - Production/Stable", 51 | "Intended Audience :: Developers", 52 | "License :: OSI Approved :: Apache Software License", 53 | "Programming Language :: Python", 54 | "Programming Language :: Python :: 2", 55 | "Programming Language :: Python :: 2.7", 56 | "Programming Language :: Python :: 3", 57 | "Programming Language :: Python :: 3.4", 58 | "Programming Language :: Python :: 3.5", 59 | "Programming Language :: Python :: 3.6", 60 | "Programming Language :: Python :: Implementation :: PyPy", 61 | "Topic :: Software Development :: Libraries :: Python Modules", 62 | ] 63 | ) 64 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import sys 4 | 5 | if sys.version_info < (2, 7): 6 | import unittest2 as unittest # pylint: disable=import-error 7 | else: 8 | import unittest 9 | 10 | # Set default logging handler to avoid "No handler found" warnings. 11 | import logging 12 | try: # Python 2.7+ 13 | from logging import NullHandler 14 | except ImportError: 15 | class NullHandler(logging.Handler): 16 | def emit(self, record): 17 | pass 18 | 19 | logging.getLogger(__name__).addHandler(NullHandler()) 20 | 21 | from kafka.future import Future 22 | Future.error_on_callbacks = True # always fail during testing 23 | -------------------------------------------------------------------------------- /test/record/test_util.py: -------------------------------------------------------------------------------- 1 | import struct 2 | import pytest 3 | from kafka.record import util 4 | 5 | 6 | varint_data = [ 7 | (b"\x00", 0), 8 | (b"\x01", -1), 9 | (b"\x02", 1), 10 | (b"\x7E", 63), 11 | (b"\x7F", -64), 12 | (b"\x80\x01", 64), 13 | (b"\x81\x01", -65), 14 | (b"\xFE\x7F", 8191), 15 | (b"\xFF\x7F", -8192), 16 | (b"\x80\x80\x01", 8192), 17 | (b"\x81\x80\x01", -8193), 18 | (b"\xFE\xFF\x7F", 1048575), 19 | (b"\xFF\xFF\x7F", -1048576), 20 | (b"\x80\x80\x80\x01", 1048576), 21 | (b"\x81\x80\x80\x01", -1048577), 22 | (b"\xFE\xFF\xFF\x7F", 134217727), 23 | (b"\xFF\xFF\xFF\x7F", -134217728), 24 | (b"\x80\x80\x80\x80\x01", 134217728), 25 | (b"\x81\x80\x80\x80\x01", -134217729), 26 | (b"\xFE\xFF\xFF\xFF\x7F", 17179869183), 27 | (b"\xFF\xFF\xFF\xFF\x7F", -17179869184), 28 | (b"\x80\x80\x80\x80\x80\x01", 17179869184), 29 | (b"\x81\x80\x80\x80\x80\x01", -17179869185), 30 | (b"\xFE\xFF\xFF\xFF\xFF\x7F", 2199023255551), 31 | (b"\xFF\xFF\xFF\xFF\xFF\x7F", -2199023255552), 32 | (b"\x80\x80\x80\x80\x80\x80\x01", 2199023255552), 33 | (b"\x81\x80\x80\x80\x80\x80\x01", -2199023255553), 34 | (b"\xFE\xFF\xFF\xFF\xFF\xFF\x7F", 281474976710655), 35 | (b"\xFF\xFF\xFF\xFF\xFF\xFF\x7F", -281474976710656), 36 | (b"\x80\x80\x80\x80\x80\x80\x80\x01", 281474976710656), 37 | (b"\x81\x80\x80\x80\x80\x80\x80\x01", -281474976710657), 38 | (b"\xFE\xFF\xFF\xFF\xFF\xFF\xFF\x7F", 36028797018963967), 39 | (b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x7F", -36028797018963968), 40 | (b"\x80\x80\x80\x80\x80\x80\x80\x80\x01", 36028797018963968), 41 | (b"\x81\x80\x80\x80\x80\x80\x80\x80\x01", -36028797018963969), 42 | (b"\xFE\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x7F", 4611686018427387903), 43 | (b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x7F", -4611686018427387904), 44 | (b"\x80\x80\x80\x80\x80\x80\x80\x80\x80\x01", 4611686018427387904), 45 | (b"\x81\x80\x80\x80\x80\x80\x80\x80\x80\x01", -4611686018427387905), 46 | ] 47 | 48 | 49 | @pytest.mark.parametrize("encoded, decoded", varint_data) 50 | def test_encode_varint(encoded, decoded): 51 | res = bytearray() 52 | util.encode_varint(decoded, res.append) 53 | assert res == encoded 54 | 55 | 56 | @pytest.mark.parametrize("encoded, decoded", varint_data) 57 | def test_decode_varint(encoded, decoded): 58 | # We add a bit of bytes around just to check position is calculated 59 | # correctly 60 | value, pos = util.decode_varint( 61 | bytearray(b"\x01\xf0" + encoded + b"\xff\x01"), 2) 62 | assert value == decoded 63 | assert pos - 2 == len(encoded) 64 | 65 | 66 | @pytest.mark.parametrize("encoded, decoded", varint_data) 67 | def test_size_of_varint(encoded, decoded): 68 | assert util.size_of_varint(decoded) == len(encoded) 69 | 70 | 71 | @pytest.mark.parametrize("crc32_func", [util.crc32c_c, util.crc32c_py]) 72 | def test_crc32c(crc32_func): 73 | def make_crc(data): 74 | crc = crc32_func(data) 75 | return struct.pack(">I", crc) 76 | assert make_crc(b"") == b"\x00\x00\x00\x00" 77 | assert make_crc(b"a") == b"\xc1\xd0\x43\x30" 78 | 79 | # Took from librdkafka testcase 80 | long_text = b"""\ 81 | This software is provided 'as-is', without any express or implied 82 | warranty. In no event will the author be held liable for any damages 83 | arising from the use of this software. 84 | 85 | Permission is granted to anyone to use this software for any purpose, 86 | including commercial applications, and to alter it and redistribute it 87 | freely, subject to the following restrictions: 88 | 89 | 1. The origin of this software must not be misrepresented; you must not 90 | claim that you wrote the original software. If you use this software 91 | in a product, an acknowledgment in the product documentation would be 92 | appreciated but is not required. 93 | 2. Altered source versions must be plainly marked as such, and must not be 94 | misrepresented as being the original software. 95 | 3. This notice may not be removed or altered from any source distribution.""" 96 | assert make_crc(long_text) == b"\x7d\xcd\xe1\x13" 97 | -------------------------------------------------------------------------------- /test/service.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import logging 4 | import os 5 | import re 6 | import select 7 | import subprocess 8 | import sys 9 | import threading 10 | import time 11 | 12 | __all__ = [ 13 | 'ExternalService', 14 | 'SpawnedService', 15 | ] 16 | 17 | log = logging.getLogger(__name__) 18 | 19 | 20 | class ExternalService(object): 21 | def __init__(self, host, port): 22 | log.info("Using already running service at %s:%d", host, port) 23 | self.host = host 24 | self.port = port 25 | 26 | def open(self): 27 | pass 28 | 29 | def close(self): 30 | pass 31 | 32 | 33 | class SpawnedService(threading.Thread): 34 | def __init__(self, args=None, env=None): 35 | super(SpawnedService, self).__init__() 36 | 37 | if args is None: 38 | raise TypeError("args parameter is required") 39 | self.args = args 40 | self.env = env 41 | self.captured_stdout = [] 42 | self.captured_stderr = [] 43 | 44 | self.should_die = threading.Event() 45 | self.child = None 46 | self.alive = False 47 | self.daemon = True 48 | 49 | def _spawn(self): 50 | if self.alive: return 51 | if self.child and self.child.poll() is None: return 52 | 53 | self.child = subprocess.Popen( 54 | self.args, 55 | preexec_fn=os.setsid, # to avoid propagating signals 56 | env=self.env, 57 | bufsize=1, 58 | stdout=subprocess.PIPE, 59 | stderr=subprocess.PIPE) 60 | self.alive = True 61 | 62 | def _despawn(self): 63 | if self.child.poll() is None: 64 | self.child.terminate() 65 | self.alive = False 66 | for _ in range(50): 67 | if self.child.poll() is not None: 68 | self.child = None 69 | break 70 | time.sleep(0.1) 71 | else: 72 | self.child.kill() 73 | 74 | def run(self): 75 | self._spawn() 76 | while True: 77 | try: 78 | (rds, _, _) = select.select([self.child.stdout, self.child.stderr], [], [], 1) 79 | except select.error as ex: 80 | if ex.args[0] == 4: 81 | continue 82 | else: 83 | raise 84 | 85 | if self.child.stdout in rds: 86 | line = self.child.stdout.readline() 87 | self.captured_stdout.append(line.decode('utf-8').rstrip()) 88 | 89 | if self.child.stderr in rds: 90 | line = self.child.stderr.readline() 91 | self.captured_stderr.append(line.decode('utf-8').rstrip()) 92 | 93 | if self.child.poll() is not None: 94 | self.dump_logs() 95 | break 96 | 97 | if self.should_die.is_set(): 98 | self._despawn() 99 | break 100 | 101 | def dump_logs(self): 102 | sys.stderr.write('\n'.join(self.captured_stderr)) 103 | sys.stdout.write('\n'.join(self.captured_stdout)) 104 | 105 | def wait_for(self, pattern, timeout=30): 106 | start = time.time() 107 | while True: 108 | elapsed = time.time() - start 109 | if elapsed >= timeout: 110 | log.error("Waiting for %r timed out after %d seconds", pattern, timeout) 111 | return False 112 | 113 | if re.search(pattern, '\n'.join(self.captured_stdout), re.IGNORECASE) is not None: 114 | log.info("Found pattern %r in %d seconds via stdout", pattern, elapsed) 115 | return True 116 | if re.search(pattern, '\n'.join(self.captured_stderr), re.IGNORECASE) is not None: 117 | log.info("Found pattern %r in %d seconds via stderr", pattern, elapsed) 118 | return True 119 | time.sleep(0.1) 120 | 121 | def stop(self): 122 | self.should_die.set() 123 | self.join() 124 | -------------------------------------------------------------------------------- /test/test_admin.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import kafka.admin 4 | from kafka.errors import IllegalArgumentError 5 | 6 | 7 | def test_config_resource(): 8 | with pytest.raises(KeyError): 9 | bad_resource = kafka.admin.ConfigResource('something', 'foo') 10 | good_resource = kafka.admin.ConfigResource('broker', 'bar') 11 | assert(good_resource.resource_type == kafka.admin.ConfigResourceType.BROKER) 12 | assert(good_resource.name == 'bar') 13 | assert(good_resource.configs is None) 14 | good_resource = kafka.admin.ConfigResource(kafka.admin.ConfigResourceType.TOPIC, 'baz', {'frob' : 'nob'}) 15 | assert(good_resource.resource_type == kafka.admin.ConfigResourceType.TOPIC) 16 | assert(good_resource.name == 'baz') 17 | assert(good_resource.configs == {'frob' : 'nob'}) 18 | 19 | 20 | def test_new_partitions(): 21 | good_partitions = kafka.admin.NewPartitions(6) 22 | assert(good_partitions.total_count == 6) 23 | assert(good_partitions.new_assignments is None) 24 | good_partitions = kafka.admin.NewPartitions(7, [[1, 2, 3]]) 25 | assert(good_partitions.total_count == 7) 26 | assert(good_partitions.new_assignments == [[1, 2, 3]]) 27 | 28 | 29 | def test_new_topic(): 30 | with pytest.raises(IllegalArgumentError): 31 | bad_topic = kafka.admin.NewTopic('foo', -1, -1) 32 | with pytest.raises(IllegalArgumentError): 33 | bad_topic = kafka.admin.NewTopic('foo', 1, -1) 34 | with pytest.raises(IllegalArgumentError): 35 | bad_topic = kafka.admin.NewTopic('foo', 1, 1, {1 : [1, 1, 1]}) 36 | good_topic = kafka.admin.NewTopic('foo', 1, 2) 37 | assert(good_topic.name == 'foo') 38 | assert(good_topic.num_partitions == 1) 39 | assert(good_topic.replication_factor == 2) 40 | assert(good_topic.replica_assignments == {}) 41 | assert(good_topic.topic_configs == {}) 42 | good_topic = kafka.admin.NewTopic('bar', -1, -1, {1 : [1, 2, 3]}, {'key' : 'value'}) 43 | assert(good_topic.name == 'bar') 44 | assert(good_topic.num_partitions == -1) 45 | assert(good_topic.replication_factor == -1) 46 | assert(good_topic.replica_assignments == {1: [1, 2, 3]}) 47 | assert(good_topic.topic_configs == {'key' : 'value'}) 48 | -------------------------------------------------------------------------------- /test/test_assignors.py: -------------------------------------------------------------------------------- 1 | # pylint: skip-file 2 | from __future__ import absolute_import 3 | 4 | import pytest 5 | 6 | from kafka.coordinator.assignors.range import RangePartitionAssignor 7 | from kafka.coordinator.assignors.roundrobin import RoundRobinPartitionAssignor 8 | from kafka.coordinator.protocol import ( 9 | ConsumerProtocolMemberMetadata, ConsumerProtocolMemberAssignment) 10 | 11 | 12 | @pytest.fixture 13 | def cluster(mocker): 14 | cluster = mocker.MagicMock() 15 | cluster.partitions_for_topic.return_value = set([0, 1, 2]) 16 | return cluster 17 | 18 | 19 | def test_assignor_roundrobin(cluster): 20 | assignor = RoundRobinPartitionAssignor 21 | 22 | member_metadata = { 23 | 'C0': assignor.metadata(set(['t0', 't1'])), 24 | 'C1': assignor.metadata(set(['t0', 't1'])), 25 | } 26 | 27 | ret = assignor.assign(cluster, member_metadata) 28 | expected = { 29 | 'C0': ConsumerProtocolMemberAssignment( 30 | assignor.version, [('t0', [0, 2]), ('t1', [1])], b''), 31 | 'C1': ConsumerProtocolMemberAssignment( 32 | assignor.version, [('t0', [1]), ('t1', [0, 2])], b'') 33 | } 34 | assert ret == expected 35 | assert set(ret) == set(expected) 36 | for member in ret: 37 | assert ret[member].encode() == expected[member].encode() 38 | 39 | 40 | def test_assignor_range(cluster): 41 | assignor = RangePartitionAssignor 42 | 43 | member_metadata = { 44 | 'C0': assignor.metadata(set(['t0', 't1'])), 45 | 'C1': assignor.metadata(set(['t0', 't1'])), 46 | } 47 | 48 | ret = assignor.assign(cluster, member_metadata) 49 | expected = { 50 | 'C0': ConsumerProtocolMemberAssignment( 51 | assignor.version, [('t0', [0, 1]), ('t1', [0, 1])], b''), 52 | 'C1': ConsumerProtocolMemberAssignment( 53 | assignor.version, [('t0', [2]), ('t1', [2])], b'') 54 | } 55 | assert ret == expected 56 | assert set(ret) == set(expected) 57 | for member in ret: 58 | assert ret[member].encode() == expected[member].encode() 59 | -------------------------------------------------------------------------------- /test/test_client_integration.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from kafka.errors import KafkaTimeoutError 4 | from kafka.protocol import create_message 5 | from kafka.structs import ( 6 | FetchRequestPayload, OffsetCommitRequestPayload, OffsetFetchRequestPayload, 7 | ProduceRequestPayload) 8 | 9 | from test.fixtures import ZookeeperFixture, KafkaFixture 10 | from test.testutil import KafkaIntegrationTestCase, kafka_versions 11 | 12 | 13 | class TestKafkaClientIntegration(KafkaIntegrationTestCase): 14 | @classmethod 15 | def setUpClass(cls): # noqa 16 | if not os.environ.get('KAFKA_VERSION'): 17 | return 18 | 19 | cls.zk = ZookeeperFixture.instance() 20 | cls.server = KafkaFixture.instance(0, cls.zk) 21 | 22 | @classmethod 23 | def tearDownClass(cls): # noqa 24 | if not os.environ.get('KAFKA_VERSION'): 25 | return 26 | 27 | cls.server.close() 28 | cls.zk.close() 29 | 30 | def test_consume_none(self): 31 | fetch = FetchRequestPayload(self.topic, 0, 0, 1024) 32 | 33 | fetch_resp, = self.client.send_fetch_request([fetch]) 34 | self.assertEqual(fetch_resp.error, 0) 35 | self.assertEqual(fetch_resp.topic, self.topic) 36 | self.assertEqual(fetch_resp.partition, 0) 37 | 38 | messages = list(fetch_resp.messages) 39 | self.assertEqual(len(messages), 0) 40 | 41 | def test_ensure_topic_exists(self): 42 | 43 | # assume that self.topic was created by setUp 44 | # if so, this should succeed 45 | self.client.ensure_topic_exists(self.topic, timeout=1) 46 | 47 | # ensure_topic_exists should fail with KafkaTimeoutError 48 | with self.assertRaises(KafkaTimeoutError): 49 | self.client.ensure_topic_exists('this_topic_doesnt_exist', timeout=0) 50 | 51 | def test_send_produce_request_maintains_request_response_order(self): 52 | 53 | self.client.ensure_topic_exists('foo') 54 | self.client.ensure_topic_exists('bar') 55 | 56 | requests = [ 57 | ProduceRequestPayload( 58 | 'foo', 0, 59 | [create_message(b'a'), create_message(b'b')]), 60 | ProduceRequestPayload( 61 | 'bar', 1, 62 | [create_message(b'a'), create_message(b'b')]), 63 | ProduceRequestPayload( 64 | 'foo', 1, 65 | [create_message(b'a'), create_message(b'b')]), 66 | ProduceRequestPayload( 67 | 'bar', 0, 68 | [create_message(b'a'), create_message(b'b')]), 69 | ] 70 | 71 | responses = self.client.send_produce_request(requests) 72 | while len(responses): 73 | request = requests.pop() 74 | response = responses.pop() 75 | self.assertEqual(request.topic, response.topic) 76 | self.assertEqual(request.partition, response.partition) 77 | 78 | 79 | #################### 80 | # Offset Tests # 81 | #################### 82 | 83 | @kafka_versions('>=0.8.1') 84 | def test_commit_fetch_offsets(self): 85 | req = OffsetCommitRequestPayload(self.topic, 0, 42, 'metadata') 86 | (resp,) = self.client.send_offset_commit_request('group', [req]) 87 | self.assertEqual(resp.error, 0) 88 | 89 | req = OffsetFetchRequestPayload(self.topic, 0) 90 | (resp,) = self.client.send_offset_fetch_request('group', [req]) 91 | self.assertEqual(resp.error, 0) 92 | self.assertEqual(resp.offset, 42) 93 | self.assertEqual(resp.metadata, '') # Metadata isn't stored for now 94 | -------------------------------------------------------------------------------- /test/test_cluster.py: -------------------------------------------------------------------------------- 1 | # pylint: skip-file 2 | from __future__ import absolute_import 3 | 4 | import pytest 5 | 6 | from kafka.cluster import ClusterMetadata 7 | from kafka.protocol.metadata import MetadataResponse 8 | 9 | 10 | def test_empty_broker_list(): 11 | cluster = ClusterMetadata() 12 | assert len(cluster.brokers()) == 0 13 | 14 | cluster.update_metadata(MetadataResponse[0]( 15 | [(0, 'foo', 12), (1, 'bar', 34)], [])) 16 | assert len(cluster.brokers()) == 2 17 | 18 | # empty broker list response should be ignored 19 | cluster.update_metadata(MetadataResponse[0]( 20 | [], # empty brokers 21 | [(17, 'foo', []), (17, 'bar', [])])) # topics w/ error 22 | assert len(cluster.brokers()) == 2 23 | -------------------------------------------------------------------------------- /test/test_package.py: -------------------------------------------------------------------------------- 1 | from . import unittest 2 | 3 | 4 | class TestPackage(unittest.TestCase): 5 | def test_top_level_namespace(self): 6 | import kafka as kafka1 7 | self.assertEqual(kafka1.KafkaConsumer.__name__, "KafkaConsumer") 8 | self.assertEqual(kafka1.consumer.__name__, "kafka.consumer") 9 | self.assertEqual(kafka1.codec.__name__, "kafka.codec") 10 | 11 | def test_submodule_namespace(self): 12 | import kafka.client as client1 13 | self.assertEqual(client1.__name__, "kafka.client") 14 | 15 | from kafka import client as client2 16 | self.assertEqual(client2.__name__, "kafka.client") 17 | 18 | from kafka.client import SimpleClient as SimpleClient1 19 | self.assertEqual(SimpleClient1.__name__, "SimpleClient") 20 | 21 | from kafka.codec import gzip_encode as gzip_encode1 22 | self.assertEqual(gzip_encode1.__name__, "gzip_encode") 23 | 24 | from kafka import SimpleClient as SimpleClient2 25 | self.assertEqual(SimpleClient2.__name__, "SimpleClient") 26 | 27 | from kafka.codec import snappy_encode 28 | self.assertEqual(snappy_encode.__name__, "snappy_encode") 29 | -------------------------------------------------------------------------------- /test/test_partitioner.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import pytest 4 | 5 | from kafka.partitioner import DefaultPartitioner, Murmur2Partitioner, RoundRobinPartitioner 6 | from kafka.partitioner.hashed import murmur2 7 | 8 | 9 | def test_default_partitioner(): 10 | partitioner = DefaultPartitioner() 11 | all_partitions = available = list(range(100)) 12 | # partitioner should return the same partition for the same key 13 | p1 = partitioner(b'foo', all_partitions, available) 14 | p2 = partitioner(b'foo', all_partitions, available) 15 | assert p1 == p2 16 | assert p1 in all_partitions 17 | 18 | # when key is None, choose one of available partitions 19 | assert partitioner(None, all_partitions, [123]) == 123 20 | 21 | # with fallback to all_partitions 22 | assert partitioner(None, all_partitions, []) in all_partitions 23 | 24 | 25 | def test_roundrobin_partitioner(): 26 | partitioner = RoundRobinPartitioner() 27 | all_partitions = available = list(range(100)) 28 | # partitioner should cycle between partitions 29 | i = 0 30 | max_partition = all_partitions[len(all_partitions) - 1] 31 | while i <= max_partition: 32 | assert i == partitioner(None, all_partitions, available) 33 | i += 1 34 | 35 | i = 0 36 | while i <= int(max_partition / 2): 37 | assert i == partitioner(None, all_partitions, available) 38 | i += 1 39 | 40 | # test dynamic partition re-assignment 41 | available = available[:-25] 42 | 43 | while i <= max(available): 44 | assert i == partitioner(None, all_partitions, available) 45 | i += 1 46 | 47 | all_partitions = list(range(200)) 48 | available = all_partitions 49 | 50 | max_partition = all_partitions[len(all_partitions) - 1] 51 | while i <= max_partition: 52 | assert i == partitioner(None, all_partitions, available) 53 | i += 1 54 | 55 | 56 | @pytest.mark.parametrize("bytes_payload,partition_number", [ 57 | (b'', 681), (b'a', 524), (b'ab', 434), (b'abc', 107), (b'123456789', 566), 58 | (b'\x00 ', 742) 59 | ]) 60 | def test_murmur2_java_compatibility(bytes_payload, partition_number): 61 | p = Murmur2Partitioner(range(1000)) 62 | # compare with output from Kafka's org.apache.kafka.clients.producer.Partitioner 63 | assert p.partition(bytes_payload) == partition_number 64 | 65 | 66 | def test_murmur2_not_ascii(): 67 | # Verify no regression of murmur2() bug encoding py2 bytes that don't ascii encode 68 | murmur2(b'\xa4') 69 | murmur2(b'\x81' * 1000) 70 | -------------------------------------------------------------------------------- /test/test_sender.py: -------------------------------------------------------------------------------- 1 | # pylint: skip-file 2 | from __future__ import absolute_import 3 | 4 | import pytest 5 | import io 6 | 7 | from kafka.client_async import KafkaClient 8 | from kafka.cluster import ClusterMetadata 9 | from kafka.metrics import Metrics 10 | from kafka.protocol.produce import ProduceRequest 11 | from kafka.producer.record_accumulator import RecordAccumulator, ProducerBatch 12 | from kafka.producer.sender import Sender 13 | from kafka.record.memory_records import MemoryRecordsBuilder 14 | from kafka.structs import TopicPartition 15 | 16 | 17 | @pytest.fixture 18 | def client(mocker): 19 | _cli = mocker.Mock(spec=KafkaClient(bootstrap_servers=(), api_version=(0, 9))) 20 | _cli.cluster = mocker.Mock(spec=ClusterMetadata()) 21 | return _cli 22 | 23 | 24 | @pytest.fixture 25 | def accumulator(): 26 | return RecordAccumulator() 27 | 28 | 29 | @pytest.fixture 30 | def metrics(): 31 | return Metrics() 32 | 33 | 34 | @pytest.fixture 35 | def sender(client, accumulator, metrics): 36 | return Sender(client, client.cluster, accumulator, metrics) 37 | 38 | 39 | @pytest.mark.parametrize(("api_version", "produce_version"), [ 40 | ((0, 10), 2), 41 | ((0, 9), 1), 42 | ((0, 8), 0) 43 | ]) 44 | def test_produce_request(sender, mocker, api_version, produce_version): 45 | sender.config['api_version'] = api_version 46 | tp = TopicPartition('foo', 0) 47 | buffer = io.BytesIO() 48 | records = MemoryRecordsBuilder( 49 | magic=1, compression_type=0, batch_size=100000) 50 | batch = ProducerBatch(tp, records, buffer) 51 | records.close() 52 | produce_request = sender._produce_request(0, 0, 0, [batch]) 53 | assert isinstance(produce_request, ProduceRequest[produce_version]) 54 | -------------------------------------------------------------------------------- /test/test_subscription_state.py: -------------------------------------------------------------------------------- 1 | # pylint: skip-file 2 | from __future__ import absolute_import 3 | 4 | import pytest 5 | 6 | from kafka.consumer.subscription_state import SubscriptionState 7 | 8 | @pytest.mark.parametrize(('topic_name', 'expectation'), [ 9 | (0, pytest.raises(TypeError)), 10 | (None, pytest.raises(TypeError)), 11 | ('', pytest.raises(ValueError)), 12 | ('.', pytest.raises(ValueError)), 13 | ('..', pytest.raises(ValueError)), 14 | ('a' * 250, pytest.raises(ValueError)), 15 | ('abc/123', pytest.raises(ValueError)), 16 | ('/abc/123', pytest.raises(ValueError)), 17 | ('/abc123', pytest.raises(ValueError)), 18 | ('name with space', pytest.raises(ValueError)), 19 | ('name*with*stars', pytest.raises(ValueError)), 20 | ('name+with+plus', pytest.raises(ValueError)), 21 | ]) 22 | def test_topic_name_validation(topic_name, expectation): 23 | state = SubscriptionState() 24 | with expectation: 25 | state._ensure_valid_topic_name(topic_name) 26 | -------------------------------------------------------------------------------- /test/test_util.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import struct 3 | 4 | from kafka.vendor import six 5 | from . import unittest 6 | 7 | import kafka.errors 8 | import kafka.structs 9 | import kafka.util 10 | 11 | 12 | class UtilTest(unittest.TestCase): 13 | @unittest.skip("Unwritten") 14 | def test_relative_unpack(self): 15 | pass 16 | 17 | def test_write_int_string(self): 18 | self.assertEqual( 19 | kafka.util.write_int_string(b'some string'), 20 | b'\x00\x00\x00\x0bsome string' 21 | ) 22 | 23 | def test_write_int_string__unicode(self): 24 | with self.assertRaises(TypeError) as cm: 25 | kafka.util.write_int_string(u'unicode') 26 | #: :type: TypeError 27 | te = cm.exception 28 | if six.PY2: 29 | self.assertIn('unicode', str(te)) 30 | else: 31 | self.assertIn('str', str(te)) 32 | self.assertIn('to be bytes', str(te)) 33 | 34 | def test_write_int_string__empty(self): 35 | self.assertEqual( 36 | kafka.util.write_int_string(b''), 37 | b'\x00\x00\x00\x00' 38 | ) 39 | 40 | def test_write_int_string__null(self): 41 | self.assertEqual( 42 | kafka.util.write_int_string(None), 43 | b'\xff\xff\xff\xff' 44 | ) 45 | 46 | def test_read_short_string(self): 47 | self.assertEqual(kafka.util.read_short_string(b'\xff\xff', 0), (None, 2)) 48 | self.assertEqual(kafka.util.read_short_string(b'\x00\x00', 0), (b'', 2)) 49 | self.assertEqual(kafka.util.read_short_string(b'\x00\x0bsome string', 0), (b'some string', 13)) 50 | 51 | def test_relative_unpack2(self): 52 | self.assertEqual( 53 | kafka.util.relative_unpack('>hh', b'\x00\x01\x00\x00\x02', 0), 54 | ((1, 0), 4) 55 | ) 56 | 57 | def test_relative_unpack3(self): 58 | with self.assertRaises(kafka.errors.BufferUnderflowError): 59 | kafka.util.relative_unpack('>hh', '\x00', 0) 60 | 61 | def test_group_by_topic_and_partition(self): 62 | t = kafka.structs.TopicPartition 63 | 64 | l = [ 65 | t("a", 1), 66 | t("a", 2), 67 | t("a", 3), 68 | t("b", 3), 69 | ] 70 | 71 | self.assertEqual(kafka.util.group_by_topic_and_partition(l), { 72 | "a": { 73 | 1: t("a", 1), 74 | 2: t("a", 2), 75 | 3: t("a", 3), 76 | }, 77 | "b": { 78 | 3: t("b", 3), 79 | } 80 | }) 81 | 82 | # should not be able to group duplicate topic-partitions 83 | t1 = t("a", 1) 84 | with self.assertRaises(AssertionError): 85 | kafka.util.group_by_topic_and_partition([t1, t1]) 86 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py{26,27,34,35,36,py}, docs 3 | 4 | [pytest] 5 | testpaths = kafka test 6 | doctest_optionflags = modules 7 | addopts = --durations=10 8 | log_format = %(created)f %(filename)-23s %(threadName)s %(message)s 9 | 10 | [testenv] 11 | deps = 12 | pytest 13 | pytest-cov 14 | py{27,34,35,36,py}: pylint 15 | py{27,34,35,36,py}: pytest-pylint 16 | pytest-mock 17 | mock 18 | python-snappy 19 | lz4 20 | xxhash 21 | crc32c 22 | py26: unittest2 23 | decorator 24 | commands = 25 | py.test {posargs:--pylint --pylint-rcfile=pylint.rc --pylint-error-types=EF --cov=kafka --cov-config=.covrc} 26 | setenv = 27 | PROJECT_ROOT = {toxinidir} 28 | passenv = KAFKA_VERSION 29 | 30 | [testenv:py26] 31 | # pylint doesn't support python2.6 32 | commands = py.test {posargs:--cov=kafka --cov-config=.covrc} 33 | 34 | [testenv:pypy] 35 | # pylint is super slow on pypy... 36 | commands = py.test {posargs:--cov=kafka --cov-config=.covrc} 37 | 38 | [testenv:docs] 39 | deps = 40 | sphinx_rtd_theme 41 | sphinx 42 | 43 | commands = 44 | sphinx-apidoc -o docs/apidoc/ kafka/ 45 | sphinx-build -b html docs/ docs/_build 46 | --------------------------------------------------------------------------------