├── .gitignore ├── .travis.yml ├── LICENSE ├── MANIFEST.in ├── README.rst ├── docs ├── clients.rst ├── clients │ ├── producer.rst │ ├── single.rst │ └── zkgrouped.rst ├── code │ ├── clients.rst │ ├── cluster.rst │ ├── compression.rst │ ├── constants.rst │ ├── exceptions.rst │ ├── modules │ │ ├── client.rst │ │ ├── cluster.rst │ │ ├── compression.gzip.rst │ │ ├── compression.snappy.rst │ │ ├── connection.rst │ │ ├── constants.rst │ │ ├── consumer.rst │ │ ├── events.rst │ │ ├── exc.rst │ │ ├── grouped.rst │ │ ├── iterables.rst │ │ ├── producer.rst │ │ ├── protocol.coordinator.rst │ │ ├── protocol.describe_groups.rst │ │ ├── protocol.fetch.rst │ │ ├── protocol.heartbeat.rst │ │ ├── protocol.join_group.rst │ │ ├── protocol.leave_group.rst │ │ ├── protocol.list_groups.rst │ │ ├── protocol.messages.rst │ │ ├── protocol.metadata.rst │ │ ├── protocol.offset.rst │ │ ├── protocol.offset_commit.rst │ │ ├── protocol.offset_fetch.rst │ │ ├── protocol.part.rst │ │ ├── protocol.primitives.rst │ │ ├── protocol.produce.rst │ │ ├── protocol.request.rst │ │ ├── protocol.response.rst │ │ ├── protocol.sync_group.rst │ │ ├── single.rst │ │ ├── zookeeper.allocator.rst │ │ ├── zookeeper.party.rst │ │ └── zookeeper.shared_set.rst │ ├── protocol_basics.rst │ ├── protocol_definition.rst │ ├── utils.rst │ └── zookeeper.rst ├── conf.py ├── index.rst ├── releases.rst ├── releases │ ├── 0.9.0.rst │ ├── 0.9.1.rst │ ├── 0.9.2.rst │ └── 0.9.3.rst ├── requirements.txt ├── source_docs.rst ├── spelling_wordlist.txt ├── static │ ├── custom.css │ └── ship.png └── templates │ └── page.html ├── examples ├── counter.py ├── maker.py └── worker.py ├── kiel ├── __init__.py ├── clients │ ├── __init__.py │ ├── client.py │ ├── consumer.py │ ├── grouped.py │ ├── producer.py │ └── single.py ├── cluster.py ├── compression │ ├── __init__.py │ ├── gzip.py │ └── snappy.py ├── connection.py ├── constants.py ├── events.py ├── exc.py ├── iterables.py ├── protocol │ ├── __init__.py │ ├── coordinator.py │ ├── describe_groups.py │ ├── errors.py │ ├── fetch.py │ ├── heartbeat.py │ ├── join_group.py │ ├── leave_group.py │ ├── list_groups.py │ ├── messages.py │ ├── metadata.py │ ├── offset.py │ ├── offset_commit.py │ ├── offset_fetch.py │ ├── part.py │ ├── primitives.py │ ├── produce.py │ ├── request.py │ ├── response.py │ └── sync_group.py └── zookeeper │ ├── __init__.py │ ├── allocator.py │ ├── party.py │ └── shared_set.py ├── setup.cfg ├── setup.py ├── tests ├── __init__.py ├── cases │ ├── __init__.py │ ├── async.py │ └── client.py ├── clients │ ├── __init__.py │ ├── test_client.py │ ├── test_consumer.py │ ├── test_grouped.py │ ├── test_producer.py │ └── test_single.py ├── compression │ ├── __init__.py │ ├── test_gzip.py │ └── test_snappy.py ├── protocol │ ├── __init__.py │ ├── test_messages.py │ └── test_primitives.py ├── test_cluster.py ├── test_connection.py ├── test_constants.py ├── test_docstrings.py ├── test_events.py ├── test_exc.py ├── test_iterables.py └── zookeeper │ ├── __init__.py │ ├── test_allocator.py │ └── test_party.py └── tox.ini /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.egg-info 3 | .coverage 4 | .tox 5 | .docbuild 6 | *.egg 7 | build 8 | dist 9 | __pycache__ 10 | .cache 11 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3.5 3 | envs: 4 | - TOXENV=py27 5 | - TOXENV=py35 6 | - TOXENV=pypy 7 | before_install: 8 | - pip install codeclimate-test-reporter 9 | - sudo apt-get -qq update 10 | - sudo apt-get install -y libsnappy-dev 11 | - pip install python-snappy 12 | install: pip install tox 13 | script: tox 14 | after_success: 15 | - codeclimate-test-reporter 16 | deploy: 17 | - provider: pypi 18 | user: wglass 19 | password: 20 | secure: tc7INI/VL+YIQU22VD+XogrN++48mA/HHxsnVNJooDypOfk6kSr0HT0VmuizXY+eL2aS7WbI3RPQI/kDJjxpz8XnuW7JIvAOJ+EGzVXSDNprdg1Rtlg3GTxRgtTaJteiXge7DOrHuewpJVq8JifofsFW76uz3yvoFq/GaXA1wLgUPqyUQ7VqSRCDbzoBtmL7EBlHWwprXCPcgYAEP6PCKHBpmuXo3pIdPoo5rRme3Rd4MP2SRhoI7QheKhvP0p/EDzfFRt/3qfNeXZ2QT1QEdXgtAYF3uuJRzScgGIRI0TwRLCEqkqaWhFD/0g0YQwVvb+diQILv15zzdz4kIMA2xpXF1o9RsQyQFVAsRisAjJo2uV7oC7JtvqMcBzUq+S39Dl6KRKQdbcpFn739UauwmIgb27OqoPrkOiFne0I07Mbv6KA3aDv8V60YRTLMWaO0WnKEYK+enI4z2gbgmCpQ7zLQ8h2MJpHCSLraUlyJ0gf4umPBxNasGN1IVIX/8XH8zaweGG1DbVmiG8WKdgNHkZ0ljHDgK6YRTrTFkrYE7BU1UVlI+4+KQWiFwb9kzd5UMUxDCBazw5v+9/nWagquME0G035k9eO7vwhGkFvcAOS9Yk5h5H5DNVvk2+Kj62v7yDFYDRufZROB5RF+lmWENKMfv9/nfK1WjsaKFgCR1L8= 21 | on: 22 | tags: true 23 | distributions: sdist bdist_wheel 24 | repo: wglass/kiel 25 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE README.rst 2 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | ========================== 2 | Kiel: Kafka Tornado Client 3 | ========================== 4 | 5 | .. image:: 6 | https://img.shields.io/pypi/v/kiel.svg 7 | :target: http://pypi.python.org/pypi/kiel 8 | :alt: Python Package Version 9 | .. image:: 10 | https://readthedocs.org/projects/kiel/badge/?version=latest 11 | :alt: Documentation Status 12 | :target: http://kiel.readthedocs.org/en/latest/ 13 | .. image:: 14 | https://travis-ci.org/wglass/kiel.svg?branch=master 15 | :alt: Build Status 16 | :target: https://travis-ci.org/wglass/kiel 17 | .. image:: 18 | https://codeclimate.com/github/wglass/kiel/badges/gpa.svg 19 | :alt: Code Climate 20 | :target: https://codeclimate.com/github/wglass/kiel 21 | .. image:: 22 | https://codeclimate.com/github/wglass/kiel/badges/coverage.svg 23 | :alt: Test Coverage 24 | :target: https://codeclimate.com/github/wglass/kiel/coverage 25 | 26 | 27 | Kiel is a pure python Kafka_ client library for use with Tornado_ 28 | applications. 29 | 30 | 31 | Installation 32 | ------------ 33 | 34 | Pip 35 | ~~~ 36 | 37 | Kiel is available via PyPI_, installation is as easy as:: 38 | 39 | pip install kiel 40 | 41 | 42 | Manual 43 | ~~~~~~ 44 | 45 | To install manually, first clone this here repo and: 46 | 47 | .. parsed-literal:: 48 | 49 | cd kiel 50 | python setup.py install 51 | 52 | 53 | Documentation 54 | ------------- 55 | 56 | More detailed information can be found on `Read The Docs`_. 57 | 58 | 59 | Quick Consumer Example 60 | ~~~~~~~~~~~~~~~~~~~~~~ 61 | 62 | .. code-block:: python 63 | 64 | from kiel import clients 65 | from tornado import gen, ioloop 66 | 67 | 68 | @gen.coroutine 69 | def consume(): 70 | c = clients.SingleConsumer(brokers=["localhost"]) 71 | 72 | yield c.connect() 73 | 74 | while True: 75 | msgs = yield c.consume("examples.colors") 76 | for msg in msgs: 77 | print(msg["color"]) 78 | 79 | 80 | def run(): 81 | loop = ioloop.IOloop.instance() 82 | 83 | loop.add_callback(consume) 84 | 85 | try: 86 | loop.start() 87 | except KeyboardInterrupt: 88 | loop.stop() 89 | 90 | 91 | Development 92 | ~~~~~~~~~~~ 93 | 94 | The code is hosted on GitHub_ 95 | 96 | To file a bug or possible enhancement see the `Issue Tracker`_, also found 97 | on GitHub. 98 | 99 | 100 | License 101 | ~~~~~~~ 102 | \(c\) 2015-2016 William Glass 103 | 104 | Kiel is licensed under the terms of the Apache License (2.0). See the LICENSE_ 105 | file for more details. 106 | 107 | 108 | .. _Kafka: http://kafka.apache.org/ 109 | .. _Tornado: http://tornadoweb.org/ 110 | .. _PyPI: http://pypi.python.org/pypi/kiel 111 | .. _`Read The Docs`: http://kiel.readthedocs.org/ 112 | .. _GitHub: https://github.com/wglass/kiel 113 | .. _`Issue Tracker`: https://github.com/wglass/kiel/issues 114 | .. _LICENSE: https://github.com/wglass/kiel/blob/master/LICENSE 115 | -------------------------------------------------------------------------------- /docs/clients.rst: -------------------------------------------------------------------------------- 1 | Clients 2 | ======= 3 | 4 | There are three client classes available: 5 | 6 | * :doc:`Producer ` 7 | * :doc:`SingleConsumer ` 8 | * :doc:`GroupedConsumer ` 9 | 10 | .. toctree:: 11 | :hidden: 12 | :titlesonly: 13 | 14 | clients/producer 15 | clients/single 16 | clients/zkgrouped 17 | -------------------------------------------------------------------------------- /docs/clients/producer.rst: -------------------------------------------------------------------------------- 1 | =================== 2 | The Producer Client 3 | =================== 4 | 5 | The producer client is used to *produce* messages to any number of Kafka 6 | topics. The ``Producer`` class can be imported via the ``kiel.clients`` 7 | module, the API for the ``Producer`` class is very simple and mostly a matter 8 | of instantiating the class with a list of bootstrap brokers plus the 9 | serializer/key maker combo that fits the use-case then yielding to the 10 | ``connect()`` method then producing via the ``produce()`` method: 11 | 12 | .. code-block:: python 13 | 14 | from kiel import clients 15 | from tornado import gen 16 | 17 | producer = clients.Producer( 18 | ["kafka01", "kafka02"], 19 | key_maker=None, 20 | partitioner=None, 21 | serializer=None, 22 | compression=None, 23 | batch_size=1, 24 | required_acks=1, 25 | ack_timeout=500, # milliseconds 26 | ) 27 | 28 | @gen.coroutine 29 | def run(): 30 | yield producer.connect() 31 | yield producer.produce("example.topic", {"my": "message"}) 32 | 33 | 34 | The only *required* constructor parameter is the list of bootstrap broker 35 | hosts. These will be used to retrieve broker and topic metadata, each bootstrap 36 | host is attempted one at a time in order until a successful metadata response. 37 | 38 | 39 | Which Message Goes Where 40 | ~~~~~~~~~~~~~~~~~~~~~~~~ 41 | 42 | In any non-trivial setup the producer will need to be configured with a 43 | *partitioning strategy*. That is, the producer will need to be configured to 44 | send messages to their intended Kafka topic partition. This is done by 45 | specifying `The Key Maker`_ and `The Partitioner`_. 46 | 47 | The Key Maker 48 | ------------- 49 | 50 | The "key maker" is a function that takes a message as a single argument and 51 | returns a "key" value. This key value will then in turn be used by `The Partitioner`_ 52 | 53 | For example, messages tracking user activity might have something like a ``user_id`` 54 | field that would be handy to key off of: 55 | 56 | .. code-block:: python 57 | 58 | def key_maker(msg): 59 | return msg.get("user_id") 60 | 61 | This key value would be stored in Kafka along with the full message value. 62 | 63 | .. warning:: 64 | 65 | It's important to have the ``key_maker`` function be as resilient as 66 | possible (e.g. the example used ``.get()`` rather than indexing). If a 67 | message came through without a ``user_id`` the ``produce()`` call would fail 68 | with a ``KeyError``. 69 | 70 | If no key maker is given, the default function generates ``None`` for all messages. 71 | 72 | The Partitioner 73 | --------------- 74 | 75 | The "partitioner" is a function that takes two arguments: the key value of a 76 | message as generated by `The Key Maker`_ and a list of the partition numbers of 77 | the target Kafka topic, and is expected to return one of the partition numbers. 78 | 79 | A simple example that expects a numeric key value and uses the modulo operator: 80 | 81 | .. code-block:: python 82 | 83 | def partitioner(key, partitions): 84 | return partitions[key % len(partitions)] 85 | 86 | A modulo strategy used along with an incrementing key value is a good way to 87 | spread messages across partitions evenly. 88 | 89 | .. note:: 90 | 91 | The number of partitions for a topic can change over time, if you rely on 92 | messages with the same key always winding up in the same place you may want 93 | to look into a `consistent hashing`_ strategy (e.g. with the 94 | `hash_ring module`_). 95 | 96 | If no partitioner is given, the default function chooses a random partition. 97 | 98 | 99 | Modulo Strategy Example 100 | ----------------------- 101 | 102 | This strategy spreads messages as evenly as possible, using a counter variable 103 | so that the messages' key space isn't polluted with a counter. 104 | 105 | .. code-block:: python 106 | 107 | from tornado import gen 108 | from kiel import clients 109 | 110 | counter = 0 111 | 112 | def key_maker(msg): 113 | global counter 114 | 115 | counter += 1 116 | return counter 117 | 118 | def partitioner(key, partitions): 119 | return partitions[key % len(partitions)] 120 | 121 | @gen.coroutine 122 | def produce(): 123 | p = clients.Producer( 124 | ["kafka01"], key_maker=key_maker, partitioner=partitioner 125 | ) 126 | 127 | yield p.connect() 128 | 129 | while True: 130 | yield p.produce("example.topic", {"how": "now", "brown": "cow"} 131 | 132 | 133 | Consistent Hashing Example 134 | -------------------------- 135 | 136 | This strategy attempts to consistently choose the same partition based 137 | on the key value (in this case a ``user_id``). 138 | 139 | .. code-block:: python 140 | 141 | from hash_ring import HashRing 142 | from tornado import gen 143 | from kiel import clients 144 | 145 | # this could be simplified to a simple operator.itemgetter("user_id") 146 | def key_maker(msg): 147 | return msg["user_id"] 148 | 149 | def partitioner(key, partitions): 150 | ring = HashRing(partitions) 151 | return ring.get_node(key) 152 | 153 | p = clients.Producer( 154 | ["kafka01"], key_maker=key_maker, partitioner=partitioner 155 | ) 156 | 157 | # meanwhile in some handler somewhere... 158 | @gen.coroutine 159 | def get(): 160 | # do some stuff... 161 | yield p.produce("activity.logins", {"user_id": self.user_id}) 162 | 163 | 164 | Compression and Serialization 165 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 166 | 167 | There are two options that determine *what* exactly gets copied into Kafka: 168 | the ``serializer`` function and the ``compression`` choice. 169 | 170 | The Serializer 171 | -------------- 172 | 173 | This simple function takes a single message object and returns a string 174 | representation of the message. 175 | 176 | Messages don't need to be dictionaries, but they *do* need to be serializeable 177 | in order to be passed onto Kafka. 178 | 179 | Custom message example: 180 | 181 | .. code-block:: python 182 | 183 | import json 184 | from kiel import clients 185 | 186 | 187 | class Thing(object): 188 | 189 | def __init__(self, stuff): 190 | self.stuff = stuff 191 | 192 | # etc... 193 | 194 | def serialize(self): 195 | return json.dumps({"stuff": self.stuff}) 196 | 197 | 198 | def serializer(msg): 199 | return msg.serializer() 200 | 201 | p = clients.Producer(["broker01"], serializer=serializer) 202 | 203 | 204 | @gen.coroutine 205 | def produce(): 206 | yield p.connect() 207 | 208 | thing = Thing(stuff="foo") 209 | 210 | yield p.produce("example.things", thing) 211 | 212 | 213 | .. note:: 214 | 215 | The default serializer is a ``json_serializer` that merely calls 216 | ``json.dumps(msg)``. Note that this assumes messages that are json 217 | serializeable (i.e. dictonaries). 218 | 219 | Compression Choices 220 | ------------------- 221 | 222 | There are three total compression options available: 223 | 224 | * Gzip_ 225 | * Snappy_ 226 | * No Compression 227 | 228 | These are specified via special constants, found in the `kiel.constants` module: 229 | 230 | .. code-block:: python 231 | 232 | from kiel import clients, constants 233 | 234 | # with gzip 235 | p = clients.Producer(["kafka01"], compression=constants.GZIP) 236 | 237 | # with snappy 238 | p = clients.Producer(["kafka01"], compression=constants.SNAPPY) 239 | 240 | 241 | The gzip option has no dependencies as the python standard library includes a 242 | ``gzip`` module. The snappy module however requires `python-snappy`_ to be 243 | installed (which in turn requires the snappy library and the ``cffi`` module to 244 | be installed). 245 | 246 | By default no compression scheme is used. 247 | 248 | .. note:: 249 | 250 | If you use the snappy compression option, any consumer clients of your 251 | messages must *also* have the snappy dependencies installed. 252 | 253 | 254 | Batch Size and ACKs 255 | ~~~~~~~~~~~~~~~~~~~ 256 | 257 | Options relating to batching and ACKs (i.e. how many brokers ACKnowledge a 258 | message before returning) can have a big effect on throughput. No system is 259 | created equal so the best strategy is to start with a baseline and tweak the 260 | options until a happy throughput/latency ratio is met. 261 | 262 | Batch Size 263 | ---------- 264 | 265 | This simple integer option determines how many messages to to "flush" to the 266 | brokers. Kafka allows for a single request to contain an arbitrary number of 267 | messages targeting any number of topic/partitions. 268 | 269 | .. code-block:: python 270 | 271 | import random 272 | from tornado import gen 273 | from kiel import clients, constants 274 | 275 | # send batches of 10 messages, gzip'ed 276 | p = clients.Producer(["kafka01"], batch_size=10, compression=constants.GZIP) 277 | 278 | @gen.coroutine 279 | def send(): 280 | while True: 281 | yield p.produce( 282 | "topics.colors", {"color": random.choice(["red", "blue", "white"] 283 | ) 284 | 285 | Batching is very useful when used in conjunction with the `Compression Choices`_ 286 | as sets of messages sent to the same partition will be compressed **together** 287 | which is much more efficient. 288 | 289 | .. warning:: 290 | 291 | Brokers limit the maximum size of accepted requests (via the 292 | ``message.max.bytes`` option, see `broker config docs`_). At this time the producer 293 | isn't smart enough to split up the requests, but will continually log an error 294 | message each time the error response is recieved from the broker. 295 | 296 | Required ACKs 297 | ------------- 298 | 299 | This ``required_acks`` option can determine how "durable" the storage of a message 300 | is. There are two useful values: ``1`` and ``-1``. 301 | 302 | ``1``: This more or less means "consider the message committed once the target 303 | broker has it". It can increase throughput but at a greater risk of data loss 304 | if brokers go down. 305 | 306 | ``-1``: This value tells kafka to not consider a message "committed" until **all** 307 | in-sync replicas acknowledge it. 308 | 309 | .. warning:: 310 | 311 | Up until Kafka 0.9 it's been possible to use other values for this option, 312 | but that is changing and in future versions of Kafka a "require acks" value 313 | > 1 will cause an exception. See `KIP-1`_ for details 314 | 315 | ACK Timeout 316 | ----------- 317 | 318 | The ``ack_timeout`` value tells the recieving brokers how long they can wait for 319 | other acknowledgements. The timeout is not exact, from the protocol docs: 320 | 321 | (1) it does not include network latency, 322 | (2) the timer begins at the beginning of the processing of this request so if 323 | many requests are queued due to server overload that wait time will not be 324 | included 325 | (3) we will not terminate a local write so if the local write time exceeds 326 | this timeout it will not be respected 327 | 328 | .. _`consistent hashing`: https://en.wikipedia.org/wiki/Consistent_hashing 329 | .. _`hash_ring module`: https://pypi.python.org/pypi/hash_ring 330 | .. _Gzip: https://www.gnu.org/software/gzip/ 331 | .. _Snappy: http://google.github.io/snappy/ 332 | .. _`python-snappy`: https://github.com/andrix/python-snappy 333 | .. _`broker config docs`: http://kafka.apache.org/documentation.html#brokerconfigs 334 | .. _KIP-1: https://cwiki.apache.org/confluence/display/KAFKA/KIP-1+-+Remove+support+of+request.required.acks 335 | -------------------------------------------------------------------------------- /docs/clients/single.rst: -------------------------------------------------------------------------------- 1 | ========================== 2 | The Single Consumer Client 3 | ========================== 4 | 5 | The ``SingleConsumer`` client class is used when you want to consume messages 6 | but don't need to coordinate consumer instances amongst themselves. It's 7 | importable via the ``kiel.clients`` module and provides a ``consume()`` 8 | method capable of starting at the beginning or end of a topic *or* a given 9 | ``datetime`` or ``timedelta``. 10 | 11 | .. code-block:: python 12 | 13 | from kiel import clients 14 | from tornado import gen 15 | 16 | consumer = clients.SingleConsumer( 17 | ["kafka01", "kafka02"], 18 | deserializer=None, 19 | max_wait_time=1000, # in milliseconds 20 | min_bytes=1, 21 | max_bytes=(1024 * 1024), 22 | ) 23 | 24 | @gen.coroutine 25 | def run(): 26 | yield consumer.connect() 27 | msgs = yield consumer.consume("example.topic") 28 | for msg in msgs: 29 | print(msg) 30 | 31 | 32 | The only *required* constructor parameter is the list of bootstrap broker 33 | hosts. 34 | 35 | 36 | Where to Start 37 | -------------- 38 | 39 | Other than the topic to consume, the ``consume()`` method also takes an optional 40 | parameter of where in the topic's history to start. 41 | 42 | .. note:: 43 | 44 | The ``start`` parameter is honored in only two cases 45 | 46 | * when consuming from a topic for the first time 47 | * an "offset out of range" error is encountered. 48 | 49 | There are four different possible kinds of values: 50 | 51 | * ``SingleConsumer.END`` **(default)** 52 | 53 | This denotes the tail end of the topic, the ``consume()`` call will return 54 | messages once some are available. 55 | 56 | * ``SingleConsumer.BEGINNING`` 57 | 58 | The very beginning of a topic (often 0). Useful for re-processing topics. 59 | 60 | * ``datetime`` 61 | 62 | Starts consuming a topic at roughly the point it was at a given time (in 63 | UTC). 64 | 65 | * ``timedelta`` 66 | 67 | Starts consuming a topic at roughly the point it was at a *reliative* 68 | time. 69 | 70 | 71 | .. warning:: 72 | 73 | The time-based options rely on epoch seconds and are vulnerable to clock 74 | skew between brokers and client servers. 75 | 76 | 77 | The Deserializer 78 | ---------------- 79 | 80 | The JSON Default 81 | ~~~~~~~~~~~~~~~~ 82 | 83 | By default ``json.dumps`` is used as a deserializer. This works in conjunction 84 | with the default serializer on the ``Producer`` class: 85 | 86 | .. code-block:: python 87 | 88 | import random 89 | 90 | from kiel import clients 91 | from tornado import gen 92 | 93 | producer = clients.Producer(["kafka01"]) 94 | consumer = clients.SingleConsumer(["kafka01"]) 95 | 96 | @gen.coroutine 97 | def produce(): 98 | yield producer.connect() 99 | while True: 100 | yield producer.produce( 101 | "example.colors", {"color": random.choice(["blue", "red"])} 102 | ) 103 | 104 | @gen.coroutine 105 | def consume(): 106 | yield consumer.connect() 107 | while True: 108 | msgs = yield consumer.consume("example.colors") 109 | for msg in msgs: 110 | print(msg["color"]) 111 | 112 | Customizing 113 | ~~~~~~~~~~~ 114 | 115 | Deserializing can be customized via the ``deserializer`` constructor parameter. 116 | The given callable will be passed a message's value as a single argument. 117 | 118 | A trivial example where messages are rot-13 encoded: 119 | 120 | .. code-block:: python 121 | 122 | import codecs 123 | 124 | from kiel import clients 125 | from tornado import gen 126 | 127 | 128 | def deserialize(value): 129 | return codecs.decode(value, "rot_13") 130 | 131 | consumer = clients.SingleConsumer(["kafka01"], deserializer=deserialize) 132 | 133 | @gen.coroutine 134 | def consume(): 135 | yield consumer.connect() 136 | while True: 137 | msgs = yield consumer.consume("example.colors") 138 | for msg in msgs: 139 | print(msg["color"]) 140 | 141 | 142 | Limiting Responses 143 | ------------------ 144 | 145 | Max and Min Bytes 146 | ~~~~~~~~~~~~~~~~~ 147 | 148 | The size window of responses can be controlled via the ``min_bytes`` and 149 | ``max_bytes`` constructor arguments. These direct the Kafka brokers to 150 | not respond until *at least* ``min_bytes`` of data is present and to 151 | construct responses *no greater* ``max_bytes``. 152 | 153 | .. note:: 154 | 155 | The ``max_bytes`` directive isn't *exact* as it only limits the data in 156 | the partition clauses of responses, there will still be other overhead. 157 | The Kafka protocol does not recognize an overal "max bytes" setting but 158 | has a *per partition* maximum, which the consumer calculates as 159 | ``max_bytes`` / number of partitions. 160 | 161 | This can be helpful for consumers starting from the beginning of a large topic 162 | and must throttle the otherwise-massive initial responses. 163 | 164 | .. code-block:: python 165 | 166 | from kiel import clients 167 | from tornado import gen 168 | 169 | consumer = clients.SingleConsumer( 170 | ["kafka01"], 171 | min_bytes=1024, 172 | max_bytes=(10 * 1024 * 1024) 173 | ) 174 | 175 | @gen.coroutine 176 | def start_from_beginning(): 177 | yield consumer.connect() 178 | 179 | msgs = yield consumer.consume("example.topic", start=consumer.BEGINNING) 180 | while msgs: 181 | # process msgs, etc. 182 | msgs = yield consumer.consume("example.topic", start=consumer.BEGINNING) 183 | 184 | Response Wait Time 185 | ~~~~~~~~~~~~~~~~~~ 186 | 187 | The ``max_wait_time`` constructor argument can be used to tell brokers how long 188 | the consumer is willing to wait for data. If the ``max_wait_time`` is reached 189 | before data is available the broker will respond with a retriable "timeout" error 190 | code and the ``consume()`` call will return with an empty list. 191 | 192 | 193 | Compression 194 | ----------- 195 | 196 | Kafka bakes compression into the wire protocol itself so the consumer classes 197 | take care of decompression for you. 198 | 199 | .. warning:: 200 | 201 | Naturally, if you're using compression schemes with external dependencies 202 | (i.e. non-gzip schemes) when producing messages your consumers must *also* 203 | have those dependencies installed! 204 | -------------------------------------------------------------------------------- /docs/clients/zkgrouped.rst: -------------------------------------------------------------------------------- 1 | =========================== 2 | The Grouped Consumer Client 3 | =========================== 4 | 5 | The ``GroupedConsumer`` client class is used in cases where a set of consumers 6 | must coordinate which partitions to consume amongst themselves. The class 7 | uses an "allocator" function to dole out partitions and Zookeeper_ to store 8 | the resulting allocation. Like the other client classes it's importable via 9 | ``kiel.clients``. 10 | 11 | .. code-block:: python 12 | 13 | from kiel import clients 14 | from tornado import gen 15 | 16 | consumer = clients.GroupedConsumer( 17 | ["kafka01", "kafka02"], 18 | "my-consumer-group", 19 | ["zookeeper01", "zookeeper02", "zookeeper03"], 20 | deserializer=None, 21 | partition_allocator=None, 22 | autocommit=True, 23 | max_wait_time=1000, # in milliseconds 24 | min_bytes=1, 25 | max_bytes=(1024 * 1024), 26 | ) 27 | 28 | @gen.coroutine 29 | def run(): 30 | yield consumer.connect() 31 | msgs = yield consumer.consume("example.task.queue") 32 | for msg in msgs: 33 | process(msg) 34 | 35 | 36 | The bootstrap broker hosts, group name and zookeeper ensemble hosts are all 37 | required constructor arguments. 38 | 39 | .. note:: 40 | 41 | The list of Zookeeper hosts should include *all* of the hosts in the 42 | ensemble. The Kafka brokers relay data about other brokers whereas zookeeper 43 | hosts do not. 44 | 45 | 46 | Allocation 47 | ---------- 48 | 49 | Allocation works via the "partition allocator function", customizable via the 50 | ``partition_allocator`` constructor argument. The allocator will assign *all* 51 | known partitions, regardless of planned use. This is so that we don't need to 52 | checks for needed redistributions whenever a new topic is consumed. 53 | 54 | .. warning:: 55 | 56 | During a re-allocation it is entirely possible for a message to be consumed 57 | twice, this is known as "`at most once`_" delivery semantics. If using the 58 | client as a job queue worker, make sure to either design the jobs to be 59 | idempotent or to track completion state in a separate data store. 60 | 61 | The Default Naive Function 62 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 63 | 64 | The default allocator function uses a simple round-robin algorithm. Each member 65 | in the group is cycled over and given a single partition until there are no 66 | partitions left. 67 | 68 | This ensures a relatively even number of partitions spread over the group, but 69 | does not account for some members having more capacity than others. It also 70 | does not account for differences in partition counts between topics. It is 71 | entirely possible for a group member to inadvertently wind up with all of the 72 | partitions of a certain topic and other members to not. 73 | 74 | Customizing 75 | ~~~~~~~~~~~ 76 | 77 | The allocator can be customized to be any stable function that meets the following 78 | requirements: 79 | 80 | * takes two arguments: a sorted list of member names and a sorted list of strings 81 | denoting partitions with the format ``:``: 82 | 83 | .. code-block:: python 84 | 85 | members = ["client01:434533", "client02:12345"] 86 | partitions = [ 87 | "example.topic.1:0", 88 | "example.topic.1:1", 89 | "example.topic.1:2", 90 | "example.topic.1:3", 91 | "example.topic.2:0", 92 | "example.topic.2:1", 93 | ] 94 | 95 | * returns a dictionary keyed on member name, with nested dictionarys as values 96 | keyed on topic name with a list of partition ids as values: 97 | 98 | .. code-block:: python 99 | 100 | { 101 | "client01:434533": { 102 | "example.topic.1": [0, 3], 103 | "example.topic.2": [1], 104 | }, 105 | "client02:12345": 106 | "example.topic.1": [1, 2], 107 | "example.topic.2": [0] 108 | } 109 | } 110 | 111 | 112 | Some examples would be to account for CPU count or available memory so that 113 | more powerful members take on more work. 114 | 115 | .. note:: 116 | 117 | It is very important that any allocation function be *stable*. That is, each 118 | member should always get the same result from the function if the same 119 | argument values are given. 120 | 121 | The Deserializer 122 | ---------------- 123 | 124 | The JSON Default 125 | ~~~~~~~~~~~~~~~~ 126 | 127 | By default ``json.dumps`` is used as a deserializer. This works in conjunction 128 | with the default serializer on the ``Producer`` class: 129 | 130 | .. code-block:: python 131 | 132 | import random 133 | 134 | from kiel import clients 135 | from tornado import gen 136 | 137 | producer = clients.Producer(["kafka01"]) 138 | consumer = clients.GroupedConsumer( 139 | ["kafka01"], "work-group", ["zk01", "zk02", "zk03"] 140 | ) 141 | 142 | @gen.coroutine 143 | def produce(): 144 | yield producer.connect() 145 | while True: 146 | yield producer.produce( 147 | "example.colors", {"color": random.choice(["blue", "red"])} 148 | ) 149 | 150 | @gen.coroutine 151 | def consume(): 152 | yield consumer.connect() 153 | while True: 154 | msgs = yield consumer.consume("example.colors") 155 | for msg in msgs: 156 | print(msg["color"]) 157 | 158 | Customizing 159 | ~~~~~~~~~~~ 160 | 161 | Deserializing can be customized via the ``deserializer`` constructor parameter. 162 | The given callable will be passed a message's value as a single argument. 163 | 164 | A trivial example where messages are rot-13 encoded: 165 | 166 | .. code-block:: python 167 | 168 | import codecs 169 | 170 | from kiel import clients 171 | from tornado import gen 172 | 173 | 174 | def deserialize(value): 175 | return codecs.decode(value, "rot_13") 176 | 177 | consumer = clients.GroupedConsumer( 178 | ["kafka01"], "work-group", ["zk01", "zk02", "zk03"], 179 | deserializer=deserialize 180 | ) 181 | 182 | @gen.coroutine 183 | def consume(): 184 | yield consumer.connect() 185 | while True: 186 | msgs = yield consumer.consume("example.colors") 187 | for msg in msgs: 188 | print(msg["color"]) 189 | 190 | 191 | Limiting Responses 192 | ------------------ 193 | 194 | Max and Min Bytes 195 | ~~~~~~~~~~~~~~~~~ 196 | 197 | The size window of responses can be controlled via the ``min_bytes`` and 198 | ``max_bytes`` constructor arguments. These direct the Kafka brokers to 199 | not respond until *at least* ``min_bytes`` of data is present and to 200 | construct responses *no greater* ``max_bytes``. 201 | 202 | .. note:: 203 | 204 | The ``max_bytes`` directive isn't *exact* as it only limits the data in 205 | the partition clauses of responses, there will still be other overhead. 206 | The Kafka protocol does not recognize an overal "max bytes" setting but 207 | has a *per partition* maximum, which the consumer calculates as 208 | ``max_bytes`` / number of partitions. 209 | 210 | This can be helpful for consumers starting from the beginning of a large topic 211 | and must throttle the otherwise-massive initial responses. 212 | 213 | .. code-block:: python 214 | 215 | from kiel import clients 216 | from tornado import gen 217 | 218 | consumer = clients.GroupedConsumer( 219 | ["kafka01"], "work-group", ["zk01", "zk02", "zk03"], 220 | min_bytes=1024, 221 | max_bytes=(10 * 1024 * 1024) 222 | ) 223 | 224 | @gen.coroutine 225 | def start_from_beginning(): 226 | yield consumer.connect() 227 | 228 | msgs = yield consumer.consume("example.topic") 229 | while msgs: 230 | # process msgs, etc. 231 | msgs = yield consumer.consume("example.topic") 232 | 233 | Response Wait Time 234 | ~~~~~~~~~~~~~~~~~~ 235 | 236 | The ``max_wait_time`` constructor argument can be used to tell brokers how long 237 | the consumer is willing to wait for data. If the ``max_wait_time`` is reached 238 | before data is available the broker will respond with a retriable "timeout" error 239 | code and the ``consume()`` call will return with an empty list. 240 | 241 | 242 | Compression 243 | ----------- 244 | 245 | Kafka bakes compression into the wire protocol itself so the consumer classes 246 | take care of decompression for you. 247 | 248 | .. warning:: 249 | 250 | Naturally, if you're using compression schemes with external dependencies 251 | (i.e. non-gzip schemes) when producing messages your consumers must *also* 252 | have those dependencies installed! 253 | 254 | 255 | .. _Zookeeper: https://zookeeper.apache.org/ 256 | .. _`at most once`: http://kafka.apache.org/documentation.html#semantics 257 | -------------------------------------------------------------------------------- /docs/code/clients.rst: -------------------------------------------------------------------------------- 1 | Clients 2 | ======= 3 | 4 | .. toctree:: 5 | 6 | modules/client 7 | modules/producer 8 | modules/consumer 9 | modules/single 10 | modules/grouped 11 | -------------------------------------------------------------------------------- /docs/code/cluster.rst: -------------------------------------------------------------------------------- 1 | Cluster Management 2 | ================== 3 | 4 | .. toctree:: 5 | 6 | modules/cluster 7 | modules/connection 8 | -------------------------------------------------------------------------------- /docs/code/compression.rst: -------------------------------------------------------------------------------- 1 | Compression 2 | =========== 3 | 4 | .. toctree:: 5 | 6 | modules/compression.gzip 7 | modules/compression.snappy 8 | -------------------------------------------------------------------------------- /docs/code/constants.rst: -------------------------------------------------------------------------------- 1 | Constants 2 | ========= 3 | 4 | .. toctree:: 5 | 6 | modules/constants 7 | -------------------------------------------------------------------------------- /docs/code/exceptions.rst: -------------------------------------------------------------------------------- 1 | Exceptions 2 | ========== 3 | 4 | .. toctree:: 5 | 6 | modules/exc 7 | -------------------------------------------------------------------------------- /docs/code/modules/client.rst: -------------------------------------------------------------------------------- 1 | ``kiel.clients.client`` 2 | ======================= 3 | 4 | .. automodule:: kiel.clients.client 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/code/modules/cluster.rst: -------------------------------------------------------------------------------- 1 | ``kiel.cluster`` 2 | ================ 3 | 4 | .. automodule:: kiel.cluster 5 | :members: 6 | :special-members: 7 | :undoc-members: 8 | :show-inheritance: 9 | :exclude-members: __dict__, __weakref__, __module__, __init__ 10 | -------------------------------------------------------------------------------- /docs/code/modules/compression.gzip.rst: -------------------------------------------------------------------------------- 1 | ``kiel.compression.gzip`` 2 | ========================= 3 | 4 | .. automodule:: kiel.compression.gzip 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/code/modules/compression.snappy.rst: -------------------------------------------------------------------------------- 1 | ``kiel.compression.snappy`` 2 | =========================== 3 | 4 | .. automodule:: kiel.compression.snappy 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/code/modules/connection.rst: -------------------------------------------------------------------------------- 1 | ``kiel.connection`` 2 | =================== 3 | 4 | .. automodule:: kiel.connection 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/code/modules/constants.rst: -------------------------------------------------------------------------------- 1 | ``kiel.constants`` 2 | ================== 3 | 4 | .. automodule:: kiel.constants 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/code/modules/consumer.rst: -------------------------------------------------------------------------------- 1 | ``kiel.clients.consumer`` 2 | ========================= 3 | 4 | .. automodule:: kiel.clients.consumer 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/code/modules/events.rst: -------------------------------------------------------------------------------- 1 | ``kiel.events`` 2 | =============== 3 | 4 | .. automodule:: kiel.events 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/code/modules/exc.rst: -------------------------------------------------------------------------------- 1 | ``kiel.exc`` 2 | ============ 3 | 4 | .. automodule:: kiel.exc 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/code/modules/grouped.rst: -------------------------------------------------------------------------------- 1 | ``kiel.clients.grouped`` 2 | ======================== 3 | 4 | .. automodule:: kiel.clients.grouped 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/code/modules/iterables.rst: -------------------------------------------------------------------------------- 1 | ``kiel.iterables`` 2 | ================== 3 | 4 | .. automodule:: kiel.iterables 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/code/modules/producer.rst: -------------------------------------------------------------------------------- 1 | ``kiel.clients.producer`` 2 | ========================= 3 | 4 | .. automodule:: kiel.clients.producer 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/code/modules/protocol.coordinator.rst: -------------------------------------------------------------------------------- 1 | ``kiel.protocol.coordinator`` 2 | ============================= 3 | 4 | .. automodule:: kiel.protocol.coordinator 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/code/modules/protocol.describe_groups.rst: -------------------------------------------------------------------------------- 1 | ``kiel.protocol.describe_groups`` 2 | ================================= 3 | 4 | .. automodule:: kiel.protocol.describe_groups 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/code/modules/protocol.fetch.rst: -------------------------------------------------------------------------------- 1 | ``kiel.protocol.fetch`` 2 | ======================= 3 | 4 | .. automodule:: kiel.protocol.fetch 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/code/modules/protocol.heartbeat.rst: -------------------------------------------------------------------------------- 1 | ``kiel.protocol.heartbeat`` 2 | =========================== 3 | 4 | .. automodule:: kiel.protocol.heartbeat 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/code/modules/protocol.join_group.rst: -------------------------------------------------------------------------------- 1 | ``kiel.protocol.join_group`` 2 | ============================ 3 | 4 | .. automodule:: kiel.protocol.join_group 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/code/modules/protocol.leave_group.rst: -------------------------------------------------------------------------------- 1 | ``kiel.protocol.leave_group`` 2 | ============================= 3 | 4 | .. automodule:: kiel.protocol.leave_group 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/code/modules/protocol.list_groups.rst: -------------------------------------------------------------------------------- 1 | ``kiel.protocol.list_groups`` 2 | ============================= 3 | 4 | .. automodule:: kiel.protocol.list_groups 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/code/modules/protocol.messages.rst: -------------------------------------------------------------------------------- 1 | ``kiel.protocol.messages`` 2 | ========================== 3 | 4 | .. automodule:: kiel.protocol.messages 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/code/modules/protocol.metadata.rst: -------------------------------------------------------------------------------- 1 | ``kiel.protocol.metadata`` 2 | ========================== 3 | 4 | .. automodule:: kiel.protocol.metadata 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/code/modules/protocol.offset.rst: -------------------------------------------------------------------------------- 1 | ``kiel.protocol.offset`` 2 | ======================== 3 | 4 | .. automodule:: kiel.protocol.offset 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/code/modules/protocol.offset_commit.rst: -------------------------------------------------------------------------------- 1 | ``kiel.protocol.offset_commit`` 2 | =============================== 3 | 4 | .. automodule:: kiel.protocol.offset_commit 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/code/modules/protocol.offset_fetch.rst: -------------------------------------------------------------------------------- 1 | ``kiel.protocol.offset_fetch`` 2 | ============================== 3 | 4 | .. automodule:: kiel.protocol.offset_fetch 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/code/modules/protocol.part.rst: -------------------------------------------------------------------------------- 1 | ``kiel.protocol.part`` 2 | ====================== 3 | 4 | .. automodule:: kiel.protocol.part 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/code/modules/protocol.primitives.rst: -------------------------------------------------------------------------------- 1 | ``kiel.protocol.primitives`` 2 | ============================ 3 | 4 | .. automodule:: kiel.protocol.primitives 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/code/modules/protocol.produce.rst: -------------------------------------------------------------------------------- 1 | ``kiel.protocol.produce`` 2 | ========================= 3 | 4 | .. automodule:: kiel.protocol.produce 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/code/modules/protocol.request.rst: -------------------------------------------------------------------------------- 1 | ``kiel.protocol.request`` 2 | ========================= 3 | 4 | .. automodule:: kiel.protocol.request 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/code/modules/protocol.response.rst: -------------------------------------------------------------------------------- 1 | ``kiel.protocol.response`` 2 | ========================== 3 | 4 | .. automodule:: kiel.protocol.response 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/code/modules/protocol.sync_group.rst: -------------------------------------------------------------------------------- 1 | ``kiel.protocol.sync_group`` 2 | ============================ 3 | 4 | .. automodule:: kiel.protocol.sync_group 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/code/modules/single.rst: -------------------------------------------------------------------------------- 1 | ``kiel.clients.single`` 2 | ======================= 3 | 4 | .. automodule:: kiel.clients.single 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/code/modules/zookeeper.allocator.rst: -------------------------------------------------------------------------------- 1 | ``kiel.zookeeper.allocator`` 2 | ============================ 3 | 4 | .. automodule:: kiel.zookeeper.allocator 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/code/modules/zookeeper.party.rst: -------------------------------------------------------------------------------- 1 | ``kiel.zookeeper.party`` 2 | ======================== 3 | 4 | .. automodule:: kiel.zookeeper.party 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/code/modules/zookeeper.shared_set.rst: -------------------------------------------------------------------------------- 1 | ``kiel.zookeeper.shared_set`` 2 | ============================= 3 | 4 | .. automodule:: kiel.zookeeper.shared_set 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/code/protocol_basics.rst: -------------------------------------------------------------------------------- 1 | Protocol Basics 2 | =============== 3 | 4 | .. toctree:: 5 | 6 | modules/protocol.primitives 7 | modules/protocol.part 8 | modules/protocol.request 9 | modules/protocol.response 10 | modules/protocol.messages 11 | -------------------------------------------------------------------------------- /docs/code/protocol_definition.rst: -------------------------------------------------------------------------------- 1 | Protocol Definition 2 | =================== 3 | 4 | .. toctree:: 5 | 6 | modules/protocol.metadata 7 | modules/protocol.fetch 8 | modules/protocol.produce 9 | modules/protocol.offset 10 | modules/protocol.offset_commit 11 | modules/protocol.offset_fetch 12 | modules/protocol.coordinator 13 | modules/protocol.heartbeat 14 | modules/protocol.join_group 15 | modules/protocol.sync_group 16 | modules/protocol.leave_group 17 | modules/protocol.list_groups 18 | modules/protocol.describe_groups 19 | -------------------------------------------------------------------------------- /docs/code/utils.rst: -------------------------------------------------------------------------------- 1 | Utility Modules 2 | =============== 3 | 4 | .. toctree:: 5 | 6 | modules/iterables 7 | modules/events 8 | -------------------------------------------------------------------------------- /docs/code/zookeeper.rst: -------------------------------------------------------------------------------- 1 | Zookeeper Helper Modules 2 | ======================== 3 | 4 | .. toctree:: 5 | 6 | modules/zookeeper.party 7 | modules/zookeeper.shared_set 8 | modules/zookeeper.allocator 9 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. title:: Kiel: Kafka Tornado Client 2 | 3 | ========================== 4 | Kiel: Kafka Tornado Client 5 | ========================== 6 | 7 | Kiel is a pure python Kafka_ client library for use with Tornado_ 8 | applications, built with ease-of-use in mind. 9 | 10 | There are three client classes available: 11 | 12 | * :doc:`Producer ` 13 | * :doc:`SingleConsumer ` 14 | * :doc:`GroupedConsumer ` 15 | 16 | 17 | Installation 18 | ------------ 19 | 20 | Pip 21 | ~~~ 22 | 23 | Kiel is available via PyPI_, installation is as easy as:: 24 | 25 | pip install kiel 26 | 27 | 28 | Manual 29 | ~~~~~~ 30 | 31 | To install manually, first download and unzip the :current_tarball:`z`, then: 32 | 33 | .. parsed-literal:: 34 | 35 | tar -zxvf kiel-|version|.tar.gz 36 | cd kiel-|version| 37 | python setup.py install 38 | 39 | 40 | Examples 41 | -------- 42 | 43 | Example scripts can be found in the `examples directory`_ in the repo. 44 | 45 | Quick Consumer Example 46 | ~~~~~~~~~~~~~~~~~~~~~~ 47 | 48 | .. code-block:: python 49 | 50 | from kiel import clients 51 | from tornado import gen, ioloop 52 | 53 | 54 | @gen.coroutine 55 | def consume(): 56 | c = clients.SingleConsumer(brokers=["localhost"]) 57 | 58 | yield c.connect() 59 | 60 | while True: 61 | msgs = yield c.consume("examples.colors") 62 | for msg in msgs: 63 | print(msg["color"]) 64 | 65 | 66 | def run(): 67 | loop = ioloop.IOloop.instance() 68 | 69 | loop.add_callback(consume) 70 | 71 | try: 72 | loop.start() 73 | except KeyboardInterrupt: 74 | loop.stop() 75 | 76 | 77 | Development 78 | ----------- 79 | 80 | The code is hosted on GitHub_ 81 | 82 | To file a bug or possible enhancement see the `Issue Tracker`_, also found 83 | on GitHub. 84 | 85 | 86 | License 87 | ------- 88 | 89 | Kiel is licensed under the terms of the Apache License (2.0). See the LICENSE_ 90 | file for more details. 91 | 92 | 93 | .. _Kafka: http://kafka.apache.org/ 94 | .. _Tornado: http://tornadoweb.org/ 95 | .. _`current zipfile`: https://github.com/wglass/kiel/archive/master.zip 96 | .. _PyPI: http://pypi.python.org/pypi/kiel 97 | .. _`examples directory`: https://github.com/wglass/kiel/tree/master/examples 98 | .. _GitHub: https://github.com/wglass/kiel 99 | .. _`Issue Tracker`: https://github.com/wglass/kiel/issues 100 | .. _LICENSE: https://github.com/wglass/kiel/blob/master/LICENSE 101 | 102 | 103 | .. toctree:: 104 | :hidden: 105 | :titlesonly: 106 | :maxdepth: 3 107 | 108 | clients 109 | releases 110 | source_docs 111 | -------------------------------------------------------------------------------- /docs/releases.rst: -------------------------------------------------------------------------------- 1 | ============= 2 | Release Notes 3 | ============= 4 | 5 | .. toctree:: 6 | :maxdepth: 2 7 | :glob: 8 | 9 | releases/* 10 | -------------------------------------------------------------------------------- /docs/releases/0.9.0.rst: -------------------------------------------------------------------------------- 1 | 0.9.0 2 | ~~~~~ 3 | 4 | * Initial public release. 5 | -------------------------------------------------------------------------------- /docs/releases/0.9.1.rst: -------------------------------------------------------------------------------- 1 | 0.9.1 2 | ~~~~~ 3 | 4 | * Fix bug where Producer would prematurely discard pending messages if two flush() 5 | calls happened before a response was processed. 6 | 7 | * Added preliminary definitions of the new group-related APIs in Kafka 0.9.0 and 8 | newer (along with the new error codes). 9 | 10 | * Updated protocol objects' repr() methods to show their attribute values. 11 | -------------------------------------------------------------------------------- /docs/releases/0.9.2.rst: -------------------------------------------------------------------------------- 1 | 0.9.2 2 | ~~~~~ 3 | 4 | * Fix issues when serializing compressed message sets 5 | 6 | * Log unexpected exceptions when reading or writing to/from the IOStream 7 | 8 | * Update Travis CI config to automatically deploy to pypi (hopefully) 9 | 10 | * Handle closed connections gracefully. 11 | -------------------------------------------------------------------------------- /docs/releases/0.9.3.rst: -------------------------------------------------------------------------------- 1 | 0.9.3 2 | ~~~~~ 3 | 4 | * Simple version bump to get CI working. 5 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | Sphinx 2 | sphinx-bootstrap-theme 3 | sphinxcontrib-spelling 4 | pyenchant 5 | -------------------------------------------------------------------------------- /docs/source_docs.rst: -------------------------------------------------------------------------------- 1 | Source Docs 2 | =========== 3 | 4 | .. toctree:: 5 | :titlesonly: 6 | 7 | code/clients 8 | code/cluster 9 | code/protocol_basics 10 | code/protocol_definition 11 | code/compression 12 | code/zookeeper 13 | code/exceptions 14 | code/constants 15 | code/utils 16 | -------------------------------------------------------------------------------- /docs/spelling_wordlist.txt: -------------------------------------------------------------------------------- 1 | Kafka 2 | -------------------------------------------------------------------------------- /docs/static/custom.css: -------------------------------------------------------------------------------- 1 | /* 2 | * colors: 3 | * #474350 4 | * #5c6d70 5 | * #c1b4ae 6 | * #ffffff 7 | * #92140c 8 | */ 9 | 10 | .navbar-default { 11 | background-color: #474350; 12 | } 13 | .navbar-default a { 14 | text-decoration: none; 15 | } 16 | .navbar-default .navbar-brand { 17 | color: #ffffff; 18 | } 19 | .navbar-default .navbar-brand:hover { 20 | color: #5c6d70; 21 | } 22 | .navbar-default .navbar-nav>li>a { 23 | color: #ffffff; 24 | } 25 | .navbar-default .navbar-nav>li>a:hover { 26 | color: #5c6d70; 27 | } 28 | .navbar-form .form-control { 29 | color: #ffffff; 30 | } 31 | .navbar-form .form-control:focus { 32 | box-shadow: inset 0 -2px 0 #92140c; 33 | } 34 | .alert-info { 35 | background-color: #5c6d70; 36 | } 37 | .alert-warning { 38 | background-color: #92140c; 39 | } 40 | .alert-warning code { 41 | color: #ffffff; 42 | } 43 | .alert { 44 | color: #ffffff; 45 | } 46 | .alert a:not(.close) { 47 | color: #92140c; 48 | } 49 | .alert-warning a:not(.close) { 50 | color: #ffffff; 51 | font-style: italic; 52 | } 53 | a { 54 | color: #5c6d70; 55 | font-weight: bold; 56 | } 57 | a:hover { 58 | color: #92140c; 59 | } 60 | code { 61 | color: #92140c; 62 | } 63 | dt:target, .highlighted { 64 | background-color: #c1b4ae; 65 | } 66 | -------------------------------------------------------------------------------- /docs/static/ship.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wglass/kiel/12664fae522d95536c977bc3868951e7dce3daa0/docs/static/ship.png -------------------------------------------------------------------------------- /docs/templates/page.html: -------------------------------------------------------------------------------- 1 | {# Import the theme's layout. #} 2 | {% extends "!page.html" %} 3 | 4 | {# Custom CSS overrides #} 5 | {% set bootswatch_css_custom = ['_static/custom.css'] %} 6 | -------------------------------------------------------------------------------- /examples/counter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import argparse 4 | import collections 5 | import logging 6 | 7 | from tornado import gen, ioloop 8 | 9 | from kiel.clients import SingleConsumer 10 | 11 | 12 | log = logging.getLogger() 13 | 14 | 15 | parser = argparse.ArgumentParser( 16 | description="Example script that consumes messages of a given topic." 17 | ) 18 | parser.add_argument( 19 | "brokers", type=lambda v: v.split(","), 20 | help="Comma-separated list of bootstrap broker servers" 21 | ) 22 | parser.add_argument( 23 | "topic", type=str, 24 | help="Topic to consume" 25 | ) 26 | parser.add_argument( 27 | "--status_interval", type=int, default=5, 28 | help="Interval (in seconds) to print the current status." 29 | ) 30 | parser.add_argument( 31 | "--debug", type=bool, default=False, 32 | help="Sets the logging level to DEBUG" 33 | ) 34 | 35 | 36 | color_counter = collections.Counter() 37 | 38 | 39 | @gen.coroutine 40 | def run(c, args): 41 | yield c.connect() 42 | 43 | while True: 44 | msgs = yield c.consume(args.topic) 45 | 46 | color_counter.update([msg["color"] for msg in msgs]) 47 | 48 | 49 | def show_status(): 50 | print ( 51 | "counts: \n%s" % "\n".join([ 52 | "\t%s: %s" % (color, count) 53 | for color, count in color_counter.most_common() 54 | ]) 55 | ) 56 | 57 | 58 | def main(): 59 | args = parser.parse_args() 60 | loop = ioloop.IOLoop.instance() 61 | 62 | if args.debug: 63 | log.setLevel(logging.DEBUG) 64 | 65 | c = SingleConsumer(brokers=args.brokers) 66 | 67 | loop.add_callback(run, c, args) 68 | status_callback = ioloop.PeriodicCallback( 69 | show_status, args.status_interval * 1000 70 | ) 71 | 72 | def wind_down(_): 73 | status_callback.stop() 74 | loop.stop() 75 | 76 | try: 77 | status_callback.start() 78 | loop.start() 79 | except KeyboardInterrupt: 80 | c.close().add_done_callback(wind_down) 81 | 82 | 83 | if __name__ == "__main__": 84 | main() 85 | -------------------------------------------------------------------------------- /examples/maker.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import argparse 4 | import logging 5 | import random 6 | import time 7 | 8 | from tornado import gen, ioloop 9 | 10 | from kiel.clients import Producer 11 | 12 | 13 | log = logging.getLogger() 14 | 15 | 16 | parser = argparse.ArgumentParser( 17 | description="Example script that produces messages to a given topic." 18 | ) 19 | parser.add_argument( 20 | "brokers", type=lambda v: v.split(","), 21 | help="Comma-separated list of bootstrap broker servers" 22 | ) 23 | parser.add_argument( 24 | "topic", type=str, 25 | help="Topic to publish to" 26 | ) 27 | parser.add_argument( 28 | "--compression", type=str, default=None, 29 | choices=(None, "gzip", "snappy"), 30 | help="Which compression to use for messages (gzip, snappy or None)" 31 | ) 32 | parser.add_argument( 33 | "--batch_size", type=int, default=1, 34 | help="Number of messages to batch into single server requests." 35 | ) 36 | parser.add_argument( 37 | "--status_interval", type=int, default=5, 38 | help="Interval (in seconds) to print the current status." 39 | ) 40 | parser.add_argument( 41 | "--debug", type=bool, default=False, 42 | help="Sets the logging level to DEBUG" 43 | ) 44 | 45 | 46 | counter = 0 47 | last_count = 0 48 | last_status = 0 49 | colors = [ 50 | "red", "green", "blue", "yellow", "orange", "purple", "white", "black" 51 | ] 52 | 53 | 54 | def key_maker(msg): 55 | return msg["counter"] 56 | 57 | 58 | def partitioner(key, partitions): 59 | return partitions[key % len(partitions)] 60 | 61 | 62 | @gen.coroutine 63 | def run(p, args): 64 | global counter 65 | 66 | yield p.connect() 67 | 68 | while True: 69 | counter += 1 70 | 71 | yield p.produce( 72 | args.topic, {"counter": counter, "color": random.choice(colors)} 73 | ) 74 | 75 | 76 | def show_status(): 77 | global last_count 78 | global last_status 79 | 80 | now = time.time() 81 | 82 | if not last_status: 83 | last_status = now 84 | 85 | count_since_last_status = counter - last_count 86 | time_since_last_status = (now - last_status) or 1 87 | 88 | print( 89 | "%s events (%s/sec)" % ( 90 | count_since_last_status, 91 | count_since_last_status / time_since_last_status 92 | ) 93 | ) 94 | 95 | last_count = counter 96 | last_status = now 97 | 98 | 99 | def main(): 100 | args = parser.parse_args() 101 | loop = ioloop.IOLoop.instance() 102 | 103 | if args.debug: 104 | log.setLevel(logging.DEBUG) 105 | 106 | p = Producer( 107 | brokers=args.brokers, 108 | key_maker=key_maker, 109 | partitioner=partitioner, 110 | batch_size=args.batch_size, 111 | compression=args.compression 112 | ) 113 | 114 | loop.add_callback(run, p, args) 115 | status_callback = ioloop.PeriodicCallback( 116 | show_status, args.status_interval * 1000 117 | ) 118 | 119 | def stop_loop(_): 120 | status_callback.stop() 121 | loop.stop() 122 | 123 | try: 124 | status_callback.start() 125 | loop.start() 126 | except KeyboardInterrupt: 127 | p.close().add_done_callback(stop_loop) 128 | 129 | 130 | if __name__ == "__main__": 131 | main() 132 | -------------------------------------------------------------------------------- /examples/worker.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import argparse 4 | import logging 5 | 6 | from tornado import gen, ioloop 7 | 8 | from kiel.clients import GroupedConsumer 9 | 10 | 11 | log = logging.getLogger() 12 | 13 | 14 | parser = argparse.ArgumentParser( 15 | description="Example grouped consumer that prints out messages it gets." 16 | ) 17 | parser.add_argument( 18 | "brokers", type=lambda v: v.split(","), 19 | help="Comma-separated list of bootstrap broker servers" 20 | ) 21 | parser.add_argument( 22 | "zk_hosts", type=lambda v: v.split(","), 23 | help="Comma-separated list of zookeeper servers." 24 | ) 25 | parser.add_argument( 26 | "topic", type=str, 27 | help="Topic to publish to" 28 | ) 29 | parser.add_argument( 30 | "--debug", type=bool, default=False, 31 | help="Sets the logging level to DEBUG" 32 | ) 33 | 34 | 35 | def process_message(msg): 36 | print(msg) 37 | 38 | 39 | @gen.coroutine 40 | def run(c, args): 41 | yield c.connect() 42 | 43 | while True: 44 | msgs = yield c.consume(args.topic) 45 | 46 | for msg in msgs: 47 | process_message(msg) 48 | 49 | if msgs: 50 | c.commit_offsets() 51 | 52 | 53 | if __name__ == "__main__": 54 | args = parser.parse_args() 55 | loop = ioloop.IOLoop.instance() 56 | 57 | if args.debug: 58 | log.setLevel(logging.DEBUG) 59 | 60 | c = GroupedConsumer( 61 | brokers=args.brokers, 62 | group="worker-group", 63 | zk_hosts=args.zk_hosts, 64 | autocommit=False 65 | ) 66 | 67 | loop.add_callback(run, c, args) 68 | 69 | try: 70 | loop.start() 71 | except KeyboardInterrupt: 72 | c.close().add_done_callback(lambda f: loop.stop()) 73 | -------------------------------------------------------------------------------- /kiel/__init__.py: -------------------------------------------------------------------------------- 1 | version_info = (0, 9, 4) 2 | 3 | __version__ = ".".join(map(str, version_info)) + "-dev" 4 | -------------------------------------------------------------------------------- /kiel/clients/__init__.py: -------------------------------------------------------------------------------- 1 | from .single import SingleConsumer # noqa 2 | from .grouped import GroupedConsumer # noqa 3 | from .producer import Producer # noqa 4 | -------------------------------------------------------------------------------- /kiel/clients/client.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import six 4 | from tornado import gen, iostream 5 | 6 | from kiel.exc import BrokerConnectionError, UnhandledResponseError 7 | from kiel.cluster import Cluster 8 | 9 | 10 | log = logging.getLogger(__name__) 11 | 12 | 13 | class Client(object): 14 | """ 15 | Base class for all client classes. 16 | 17 | Handles basic cluster management and request sending. 18 | """ 19 | def __init__(self, brokers): 20 | super(Client, self).__init__() 21 | 22 | self.cluster = Cluster(brokers) 23 | 24 | self.heal_cluster = False 25 | self.closing = False 26 | 27 | @gen.coroutine 28 | def connect(self): 29 | """ 30 | Starts the underlying cluster, connecting and gathering metadata. 31 | """ 32 | yield self.cluster.start() 33 | 34 | @gen.coroutine 35 | def close(self): 36 | """ 37 | Marks a client as closing and winds down connections. 38 | 39 | Calls the ``wind_down()`` coroutine that subclasses must implement. 40 | """ 41 | self.closing = True 42 | 43 | yield self.wind_down() 44 | 45 | self.cluster.stop() 46 | 47 | @gen.coroutine 48 | def wind_down(self): 49 | """ 50 | Cleanup method left to subclasses to define. 51 | 52 | Called by ``close()``, should clean up any subclass-specific resources. 53 | """ 54 | raise NotImplementedError 55 | 56 | @gen.coroutine 57 | def send(self, request_by_broker): 58 | """ 59 | Sends a dict of requests keyed on broker ID and handles responses. 60 | 61 | Returns a dictionary of the results of 62 | ``handle__response`` method calls, keyed to the 63 | corresponding broker ID. 64 | 65 | Raises ``UnhandledResponseError`` if the client subclass does not have 66 | a ``handle__response`` method available to handle an 67 | incoming response object. 68 | 69 | If an error occurs in a response, the ``heal_cluster`` flag is set 70 | and the ``heal()`` method on the cluster is called after processing 71 | each response. 72 | 73 | Responses are handled in the order they come in, but this method does 74 | not yield a value until all responses are handled. 75 | """ 76 | iterator = gen.WaitIterator(**{ 77 | str(broker_id): self.cluster[broker_id].send(request) 78 | for broker_id, request in six.iteritems(request_by_broker) 79 | }) 80 | 81 | results = {} 82 | while not iterator.done(): 83 | try: 84 | response = yield iterator.next() 85 | except BrokerConnectionError as e: 86 | log.info("Connection to %s:%s lost", e.host, e.port) 87 | self.heal_cluster = True 88 | continue 89 | except iostream.StreamClosedError: 90 | log.info("Connection to broker lost.") 91 | continue 92 | except Exception: 93 | log.exception("Error sending request.") 94 | self.heal_cluster = True 95 | continue 96 | 97 | handler = getattr(self, "handle_%s_response" % response.api, None) 98 | if handler is None: 99 | raise UnhandledResponseError(response.api) 100 | 101 | result = yield gen.maybe_future(handler(response)) 102 | results[int(iterator.current_index)] = result 103 | 104 | if self.heal_cluster: 105 | yield self.cluster.heal() 106 | self.heal_cluster = False 107 | 108 | raise gen.Return(results) 109 | -------------------------------------------------------------------------------- /kiel/clients/consumer.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import logging 3 | import json 4 | import socket 5 | 6 | import six 7 | from tornado import gen 8 | 9 | from kiel.exc import NoOffsetsError 10 | from kiel.protocol import fetch, errors 11 | from kiel.constants import CONSUMER_REPLICA_ID, ERROR_CODES 12 | 13 | from .client import Client 14 | 15 | 16 | log = logging.getLogger(__name__) 17 | 18 | 19 | class BaseConsumer(Client): 20 | """ 21 | Base class for consumers, provides `consume()` but no parition allocation. 22 | 23 | Allows for customizing the ``deserialier`` used. Default is a JSON 24 | deserializer. 25 | """ 26 | def __init__( 27 | self, 28 | brokers, 29 | deserializer=None, 30 | max_wait_time=1000, # in milliseconds 31 | min_bytes=1, 32 | max_bytes=(1024 * 1024), 33 | ): 34 | super(BaseConsumer, self).__init__(brokers) 35 | 36 | self.name = ":".join([socket.gethostname(), str(id(self))]) 37 | 38 | self.deserializer = deserializer or json.loads 39 | 40 | self.max_wait_time = max_wait_time 41 | self.min_bytes = min_bytes 42 | self.max_bytes = max_bytes 43 | 44 | self.offsets = collections.defaultdict( 45 | lambda: collections.defaultdict(int) 46 | ) 47 | self.synced_offsets = set() 48 | 49 | @property 50 | def allocation(self): 51 | """ 52 | Property meant to denote which topics and partitions this consumer 53 | should be aware of. 54 | 55 | This is left to subclasses to implement, as it is one of the main 56 | behavioral differences between a single consumer and a grouped 57 | consumer. 58 | """ 59 | raise NotImplementedError 60 | 61 | @gen.coroutine 62 | def determine_offsets(self, topic, start=None): 63 | """ 64 | Subclass coroutine function for setting values in ``self.offsets``. 65 | 66 | Kafka offers a simple "offset" api as well as a more involved set 67 | of offset fetch and commit apis. Determining which ones to use and 68 | how is left to the subclasses. 69 | """ 70 | raise NotImplementedError 71 | 72 | @gen.coroutine 73 | def consume(self, topic, start=None): 74 | """ 75 | Fetches from a given topics returns a list of deserialized values. 76 | 77 | If the given topic is not known to have synced offsets, a call to 78 | `determine_offsets()` is made first. 79 | 80 | If a topic is unknown entirely the cluster's ``heal()`` method is 81 | called and the check retried. 82 | 83 | Since error codes and deserialization are taken care of by 84 | `handle_fetch_response` this method merely yields to wait on the 85 | deserialized results and returns a flattened list. 86 | """ 87 | if self.closing: 88 | return 89 | 90 | if topic not in self.synced_offsets: 91 | try: 92 | yield self.determine_offsets(topic, start) 93 | except NoOffsetsError: 94 | log.error("Unable to determine offsets for topic %s", topic) 95 | raise gen.Return([]) 96 | self.synced_offsets.add(topic) 97 | 98 | if topic not in self.allocation or not self.allocation[topic]: 99 | log.debug("Consuming unknown topic %s, reloading metadata", topic) 100 | yield self.cluster.heal() 101 | 102 | if topic not in self.allocation or not self.allocation[topic]: 103 | log.error("Consuming unknown topic %s and not auto-created", topic) 104 | raise gen.Return([]) 105 | 106 | ordered = collections.defaultdict(list) 107 | for partition_id in self.allocation[topic]: 108 | leader = self.cluster.get_leader(topic, partition_id) 109 | ordered[leader].append(partition_id) 110 | 111 | requests = {} 112 | for leader, partitions in six.iteritems(ordered): 113 | max_partition_bytes = int(self.max_bytes / len(partitions)) 114 | requests[leader] = fetch.FetchRequest( 115 | replica_id=CONSUMER_REPLICA_ID, 116 | max_wait_time=self.max_wait_time, 117 | min_bytes=self.min_bytes, 118 | topics=[ 119 | fetch.TopicRequest(name=topic, partitions=[ 120 | fetch.PartitionRequest( 121 | partition_id=partition_id, 122 | offset=self.offsets[topic][partition_id], 123 | max_bytes=max_partition_bytes, 124 | ) 125 | for partition_id in partitions 126 | ]) 127 | ] 128 | ) 129 | 130 | results = yield self.send(requests) 131 | raise gen.Return([ 132 | msg for messageset in results.values() for msg in messageset 133 | if messageset 134 | ]) 135 | 136 | def handle_fetch_response(self, response): 137 | """ 138 | Handler for responses from the message "fetch" api. 139 | 140 | Messages returned with the "no error" code are deserialized and 141 | collected, the full resulting list is returned. 142 | 143 | A retriable error code will cause the cluster "heal" flag to be set. 144 | 145 | An error indicating that the offset used for the partition was out 146 | of range will cause the offending topic's offsets to be redetermined 147 | on the next call to `consume()`. 148 | 149 | .. note:: 150 | This class and its subclasses assume that fetch requests are made 151 | on one topic at a time, so this handler only deals with the first 152 | topic returned. 153 | """ 154 | messages = [] 155 | 156 | # we only fetch one topic so we can assume only one comes back 157 | topic = response.topics[0].name 158 | for partition in response.topics[0].partitions: 159 | code = partition.error_code 160 | if code == errors.no_error: 161 | messages.extend(self.deserialize_messages(topic, partition)) 162 | elif code in errors.retriable: 163 | self.heal_cluster = True 164 | elif code == errors.offset_out_of_range: 165 | log.warn("Offset out of range for topic %s", topic) 166 | self.synced_offsets.discard(topic) 167 | else: 168 | log.error( 169 | "Got error %s for topic %s partition %s", 170 | ERROR_CODES[code], topic, partition.partition_id 171 | ) 172 | 173 | return messages 174 | 175 | def deserialize_messages(self, topic_name, partition): 176 | """ 177 | Calls the ``deserializer`` on each ``Message`` value and gives the 178 | result. 179 | 180 | If an error is encountered when deserializing it is logged and the 181 | offending message is skipped. 182 | 183 | After each successful deserialization the ``self.offsets`` entry for 184 | the particular topic/partition pair is incremented. 185 | """ 186 | messages = [] 187 | for offset, msg in partition.message_set.messages: 188 | try: 189 | value = self.deserializer(msg.value) 190 | except Exception: 191 | log.exception( 192 | "Error deserializing message: '%r'", 193 | getattr(msg, "value", "No value on msg!") 194 | ) 195 | continue 196 | 197 | messages.append(value) 198 | self.offsets[topic_name][partition.partition_id] = offset + 1 199 | 200 | return messages 201 | -------------------------------------------------------------------------------- /kiel/clients/producer.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import logging 3 | import json 4 | import random 5 | 6 | import six 7 | from tornado import gen 8 | 9 | from kiel.protocol import produce as produce_api, messages, errors 10 | from kiel.constants import SUPPORTED_COMPRESSION, ERROR_CODES 11 | from kiel.iterables import drain 12 | 13 | from .client import Client 14 | 15 | 16 | log = logging.getLogger(__name__) 17 | 18 | 19 | class Producer(Client): 20 | """ 21 | Client class used to "produce" messages to Kafka topics. 22 | 23 | Allows for customizing the ``serializer``, ``key_maker`` and 24 | ``partitioner`` functions. By default a JSON serializer is used, along 25 | with a no-op key maker and a partitioner that chooses at random. 26 | """ 27 | def __init__( 28 | self, 29 | brokers, 30 | serializer=None, 31 | key_maker=None, 32 | partitioner=None, 33 | batch_size=1, 34 | compression=None, 35 | required_acks=-1, 36 | ack_timeout=500, # milliseconds 37 | ): 38 | super(Producer, self).__init__(brokers) 39 | 40 | if compression not in SUPPORTED_COMPRESSION: 41 | raise ValueError( 42 | "Invalid compression value %s,must be one of %s", 43 | compression, ", ".join(map(str, SUPPORTED_COMPRESSION)) 44 | ) 45 | self.compression = compression 46 | 47 | def json_serializer(message): 48 | return json.dumps(message, sort_keys=True) 49 | 50 | def null_key_maker(_): 51 | return None 52 | 53 | def random_partitioner(_, partitions): 54 | return random.choice(partitions) 55 | 56 | self.serializer = serializer or json_serializer 57 | self.key_maker = key_maker or null_key_maker 58 | self.partitioner = partitioner or random_partitioner 59 | 60 | self.batch_size = batch_size 61 | self.required_acks = required_acks 62 | self.ack_timeout = ack_timeout 63 | 64 | # dictionary of topic -> messages 65 | self.unsent = collections.defaultdict(list) 66 | # dictionary of correlation id -> topic -> partition -> messages 67 | self.sent = collections.defaultdict( 68 | lambda: collections.defaultdict(dict) 69 | ) 70 | 71 | @property 72 | def unsent_count(self): 73 | """ 74 | Property representing the sum total of pending messages to be sent. 75 | """ 76 | return sum([len(unsent) for unsent in self.unsent.values()]) 77 | 78 | @gen.coroutine 79 | def produce(self, topic, message): 80 | """ 81 | Primary method that queues messages up to be flushed to the brokers. 82 | 83 | Performs sanity checks to make sure we're not closing and that the 84 | topic given is known. 85 | 86 | If the topic given is *not* known, the ``heal()`` method on the cluster 87 | is called and the check is performed again. 88 | 89 | Depending on the ``batch_size`` attribute this call may not actually 90 | send any requests and merely keeps the pending messages in the 91 | ``unsent`` structure. 92 | """ 93 | if self.closing: 94 | log.warn("Producing to %s topic while closing.", topic) 95 | return 96 | 97 | if topic not in self.cluster.topics: 98 | log.debug("Producing to unknown topic %s, loading metadata", topic) 99 | yield self.cluster.heal() 100 | 101 | if topic not in self.cluster.topics: 102 | log.error("Unknown topic %s and not auto-created", topic) 103 | return 104 | 105 | self.unsent[topic].append( 106 | messages.Message( 107 | magic=0, 108 | attributes=0, 109 | key=self.key_maker(message), 110 | value=self.serializer(message) 111 | ) 112 | ) 113 | 114 | if not self.batch_size or self.unsent_count >= self.batch_size: 115 | yield self.flush() 116 | 117 | def queue_retries(self, topic, msgs): 118 | """ 119 | Re-inserts the given messages into the ``unsent`` structure. 120 | 121 | This also sets the flag to denote that a cluster "heal" is necessary. 122 | """ 123 | log.debug("Queueing %d messages for retry", len(msgs)) 124 | self.unsent[topic].extend(msgs) 125 | self.heal_cluster = True 126 | 127 | @gen.coroutine 128 | def flush(self): 129 | """ 130 | Transforms the ``unsent`` structure to produce requests and sends them. 131 | 132 | The first order of business is to order the pending messages in 133 | ``unsent`` based on partition leader. If a message's partition leader 134 | is not a know broker, the message is queued up to be retried and the 135 | flag denoting that a cluster ``heal()`` call is needed is set. 136 | 137 | Once the legitimate messages are ordered, instances of ProduceRequest 138 | are created for each broker and sent. 139 | """ 140 | if not self.unsent: 141 | return 142 | 143 | # leader -> topic -> partition -> message list 144 | ordered = collections.defaultdict( 145 | lambda: collections.defaultdict( 146 | lambda: collections.defaultdict(list) 147 | ) 148 | ) 149 | 150 | to_retry = collections.defaultdict(list) 151 | 152 | for topic, msgs in drain(self.unsent): 153 | for msg in msgs: 154 | partition = self.partitioner( 155 | msg.key, self.cluster.topics[topic] 156 | ) 157 | leader = self.cluster.get_leader(topic, partition) 158 | if leader not in self.cluster: 159 | to_retry[topic].append(msg) 160 | continue 161 | ordered[leader][topic][partition].append(msg) 162 | 163 | requests = {} 164 | for leader, topics in six.iteritems(ordered): 165 | requests[leader] = produce_api.ProduceRequest( 166 | required_acks=self.required_acks, 167 | timeout=self.ack_timeout, 168 | topics=[] 169 | ) 170 | for topic, partitions in six.iteritems(topics): 171 | requests[leader].topics.append( 172 | produce_api.TopicRequest(name=topic, partitions=[]) 173 | ) 174 | for partition_id, msgs in six.iteritems(partitions): 175 | requests[leader].topics[-1].partitions.append( 176 | produce_api.PartitionRequest( 177 | partition_id=partition_id, 178 | message_set=messages.MessageSet.compressed( 179 | self.compression, msgs 180 | ) 181 | ) 182 | ) 183 | self.sent[ 184 | requests[leader].correlation_id 185 | ][topic][partition_id] = msgs 186 | 187 | for topic, msgs in six.iteritems(to_retry): 188 | self.queue_retries(topic, msgs) 189 | 190 | yield self.send(requests) 191 | 192 | def handle_produce_response(self, response): 193 | """ 194 | Handler for produce api responses, discards or retries as needed. 195 | 196 | For the "no error" result, the corresponding messages are discarded 197 | from the ``sent`` structure. 198 | 199 | For retriable error codes the affected messages are queued up to be 200 | retried. 201 | 202 | .. warning:: 203 | For fatal error codes the error is logged and no further action is 204 | taken. The affected messages are not retried and effectively written 205 | over with the next call to `produce()`. 206 | """ 207 | for topic in response.topics: 208 | for partition in topic.partitions: 209 | code = partition.error_code 210 | if code == errors.no_error: 211 | pass 212 | elif code in errors.retriable: 213 | msgs = self.sent[response.correlation_id][topic.name].pop( 214 | partition.partition_id 215 | ) 216 | self.queue_retries(topic.name, msgs) 217 | else: 218 | log.error( 219 | "Got error %s for topic %s partition %s", 220 | ERROR_CODES[code], topic.name, partition.partition_id 221 | ) 222 | 223 | self.sent.pop(response.correlation_id) 224 | 225 | @gen.coroutine 226 | def wind_down(self): 227 | """ 228 | Flushes the unsent messages so that none are lost when closing down. 229 | """ 230 | yield self.flush() 231 | -------------------------------------------------------------------------------- /kiel/clients/single.py: -------------------------------------------------------------------------------- 1 | import calendar 2 | import collections 3 | import datetime 4 | import logging 5 | import time 6 | 7 | from tornado import gen 8 | 9 | from kiel.exc import NoOffsetsError 10 | from kiel.protocol import offset, errors 11 | from kiel.constants import ERROR_CODES, CONSUMER_REPLICA_ID 12 | 13 | from .consumer import BaseConsumer 14 | 15 | 16 | EPOCH = datetime.datetime(1970, 1, 1) 17 | 18 | 19 | log = logging.getLogger(__name__) 20 | 21 | 22 | class SingleConsumer(BaseConsumer): 23 | """ 24 | Usable consumer class for isolated-consumer use cases. 25 | 26 | By "isolated" consumer, that means that the consumer runs independently 27 | of other consumers and does not need to apportion work among others. 28 | 29 | Uses the basic ``offset`` api to determine topic/partition offsets. 30 | """ 31 | #: special offset api value for 'beginning offset' 32 | BEGINNING = -2 33 | #: special offset api value for 'very latest offset' 34 | END = -1 35 | 36 | @property 37 | def allocation(self): 38 | """ 39 | For single consumers the allocation is all topics and partitions. 40 | """ 41 | return self.cluster.topics 42 | 43 | @gen.coroutine 44 | def determine_offsets(self, topic, start=None): 45 | """ 46 | Sends OffsetRequests to the cluster for a given topic and start point. 47 | 48 | The ``start`` parameter can be any of of ``datetime.datetime``, 49 | ``datetime.timedelta`` or one of `SingleConsumer.BEGINNING` or 50 | `SingleConsumer.END`. The value is translated into epoch seconds 51 | if need be and used for the "time" parameter for the offset requests. 52 | 53 | An offset request is sent to each of the leader brokers for the given 54 | topic. 55 | """ 56 | log.info("Getting offsets for topic %s with start %s", topic, start) 57 | 58 | if start is None: 59 | start = self.END 60 | 61 | offset_time = start_to_timestamp(start) 62 | 63 | def request_factory(): 64 | return offset.OffsetRequest( 65 | replica_id=CONSUMER_REPLICA_ID, 66 | topics=[ 67 | offset.TopicRequest(name=topic, partitions=[]) 68 | ] 69 | ) 70 | 71 | requests = collections.defaultdict(request_factory) 72 | 73 | for partition_id in self.allocation[topic]: 74 | leader = self.cluster.get_leader(topic, partition_id) 75 | requests[leader].topics[0].partitions.append( 76 | offset.PartitionRequest( 77 | partition_id=partition_id, 78 | time=offset_time, 79 | max_offsets=1 80 | ) 81 | ) 82 | 83 | log.debug( 84 | "Sending offset request to %d leaders.", len(requests.keys()) 85 | ) 86 | yield self.send(requests) 87 | 88 | raise gen.Return(True) 89 | 90 | def handle_offset_response(self, response): 91 | """ 92 | Handles responses from the offset api and sets ``self.offsets`` values. 93 | 94 | A succesful response will update the topic/partition pair entry in 95 | the ``self.offsets`` structure. 96 | 97 | A retriable error code response will cause the cluster's ``heal()`` 98 | method to be called at the end of processing and the offending topic's 99 | offsets to be re-evaluated on the next `consume()` call. 100 | """ 101 | # we only fetch one topic so we can assume only one comes back 102 | topic = response.topics[0].name 103 | for partition in response.topics[0].partitions: 104 | code = partition.error_code 105 | if code == errors.no_error: 106 | offset = partition.offsets[0] 107 | self.offsets[topic][partition.partition_id] = offset 108 | elif code in errors.retriable: 109 | self.heal_cluster = True 110 | self.synced_offsets.discard(topic) 111 | else: 112 | log.error( 113 | "Got error %s for topic %s partition %s", 114 | ERROR_CODES[code], topic, partition.partition_id 115 | ) 116 | raise NoOffsetsError 117 | 118 | @gen.coroutine 119 | def wind_down(self): 120 | """ 121 | The single consumer keeps little to no state so wind down is a no-op. 122 | """ 123 | pass 124 | 125 | 126 | def start_to_timestamp(start): 127 | """ 128 | Helper method for translating "start" values into offset api values. 129 | 130 | Valid values are instances of ``datetime.datetime``, ``datetime.timedelta`` 131 | or one of `SingleConsumer.BEGINNING` or `SingleConsumer.END`. 132 | """ 133 | if isinstance(start, datetime.datetime): 134 | offset_time = (start - EPOCH).total_seconds() 135 | elif isinstance(start, datetime.timedelta): 136 | now = calendar.timegm(time.gmtime()) 137 | offset_time = now - start.total_seconds() 138 | else: 139 | offset_time = start 140 | 141 | return offset_time 142 | -------------------------------------------------------------------------------- /kiel/cluster.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import logging 3 | 4 | from tornado import gen, iostream 5 | 6 | from kiel.protocol import metadata, errors 7 | from kiel.constants import DEFAULT_KAFKA_PORT 8 | from kiel.exc import BrokerConnectionError, NoBrokersError 9 | 10 | from .connection import Connection 11 | 12 | 13 | log = logging.getLogger(__name__) 14 | 15 | 16 | class Cluster(object): 17 | """ 18 | Class representing a Kafka cluster. 19 | 20 | Handles a dict of ``Connection`` objects, one for each known broker 21 | and keyed off of the broker ID. 22 | 23 | Also keeps metadata information for topics, their partitions, and the 24 | partition leader brokers. 25 | """ 26 | def __init__(self, bootstrap_hosts): 27 | self.bootstrap_hosts = bootstrap_hosts 28 | 29 | self.conns = {} 30 | self.topics = collections.defaultdict(list) 31 | self.leaders = collections.defaultdict(dict) 32 | 33 | def __getitem__(self, broker_id): 34 | """ 35 | Proxies to the ``__getitem__`` of the underlying conns dictionary. 36 | 37 | Allows for the client to say 38 | 39 | ``self.cluster[broker_id].send()`` 40 | 41 | and such. 42 | """ 43 | return self.conns[broker_id] 44 | 45 | def __contains__(self, broker_id): 46 | """ 47 | Proxies the ``__contains__`` method of the conns dictionary. 48 | 49 | Allows for the client to test if a broker is present via 50 | 51 | ``broker_id in self.cluster`` 52 | """ 53 | return broker_id in self.conns 54 | 55 | def __iter__(self): 56 | """ 57 | Procies the ``__iter__`` method of the conns dictionary. 58 | 59 | In effect allows for iterating over known broker_id values: 60 | 61 | ``for broker_id in self.cluster:`` 62 | """ 63 | return iter(self.conns) 64 | 65 | def get_leader(self, topic, partition_id): 66 | """ 67 | Returns the leader broker ID for a given topic/partition combo. 68 | """ 69 | return self.leaders[topic][partition_id] 70 | 71 | @gen.coroutine 72 | def start(self): 73 | """ 74 | Establishes connections to the brokers in a cluster as well as 75 | gathers topic/partition metadata. 76 | 77 | Cycles through each bootstrap host and attempts to send a metadata 78 | request. Once a metadata request is successful the `heal()` method 79 | is called. 80 | """ 81 | response = None 82 | 83 | for host in self.bootstrap_hosts: 84 | if ":" in host: 85 | host, port = host.split(":") 86 | else: 87 | port = DEFAULT_KAFKA_PORT 88 | 89 | conn = Connection(host, int(port)) 90 | 91 | log.info("Using bootstrap host '%s'", host) 92 | 93 | try: 94 | yield conn.connect() 95 | except (iostream.StreamClosedError, BrokerConnectionError): 96 | log.warn("Could not connect to bootstrap %s:%s", host, port) 97 | continue 98 | except Exception: 99 | log.exception("Error connecting to bootstrap host '%s'", host) 100 | continue 101 | 102 | response = yield conn.send(metadata.MetadataRequest(topics=[])) 103 | 104 | conn.close() 105 | break 106 | 107 | if not response: 108 | raise NoBrokersError 109 | 110 | log.info("Metadata gathered, setting up connections.") 111 | yield self.heal(response) 112 | 113 | @gen.coroutine 114 | def heal(self, response=None): 115 | """ 116 | Syncs the state of the cluster with metadata retrieved from a broker. 117 | 118 | If not response argument is given, a call to `get_metatadata()` fetches 119 | fresh information. 120 | 121 | As a first step this will cull any closing/aborted connections from the 122 | cluster. This is followed by repeated calls to `process_brokers()` and 123 | `process_topics()` until both signal that there are no missing brokers 124 | or topics. 125 | """ 126 | if not response: 127 | response = yield self.get_metadata() 128 | 129 | broker_ids = list(self.conns.keys()) 130 | for broker_id in broker_ids: 131 | if self.conns[broker_id].closing: 132 | log.debug( 133 | "Removing %s:%s from cluster", 134 | self.conns[broker_id].host, self.conns[broker_id].port 135 | ) 136 | self.conns.pop(broker_id) 137 | 138 | missing_conns = yield self.process_brokers(response.brokers) 139 | missing_topics = self.process_topics(response.topics) 140 | while missing_conns or missing_topics: 141 | response = yield self.get_metadata(topics=list(missing_topics)) 142 | missing_conns = yield self.process_brokers(response.brokers) 143 | missing_topics = self.process_topics(response.topics) 144 | 145 | @gen.coroutine 146 | def get_metadata(self, topics=None): 147 | """ 148 | Retrieves metadata from a broker in the cluster, optionally limited 149 | to a set of topics. 150 | 151 | Each connection in the cluster is tried until one works. If no 152 | connection in the cluster responds, a ``NoBrokersError`` is raised. 153 | """ 154 | log.debug("Gathering metadata (topics=%s)", topics) 155 | if topics is None: 156 | topics = [] 157 | 158 | response = None 159 | for conn in self.conns.values(): 160 | try: 161 | response = yield conn.send( 162 | metadata.MetadataRequest(topics=topics) 163 | ) 164 | break 165 | except (iostream.StreamClosedError, BrokerConnectionError): 166 | continue 167 | 168 | if not response: 169 | raise NoBrokersError 170 | 171 | raise gen.Return(response) 172 | 173 | @gen.coroutine 174 | def process_brokers(self, brokers): 175 | """ 176 | Syncs the ``self.conn`` connection dictionary with given broker 177 | metadata, returning a set of broker IDs that were in the metadata but 178 | had failing connections. 179 | 180 | Known connections that are not present in the given metadata will have 181 | ``abort()`` called on them. 182 | """ 183 | to_drop = set(self.conns.keys()) - set([b.broker_id for b in brokers]) 184 | 185 | missing = set() 186 | 187 | for broker in brokers: 188 | if broker.broker_id in self.conns: 189 | continue 190 | 191 | try: 192 | conn = Connection(broker.host, broker.port) 193 | yield conn.connect() 194 | self.conns[broker.broker_id] = conn 195 | except iostream.StreamClosedError: 196 | log.warn( 197 | "Could not add broker %s (%s:%s)", 198 | broker.broker_id, broker.host, broker.port, 199 | ) 200 | missing.add(broker.broker_id) 201 | continue 202 | except Exception: 203 | log.exception( 204 | "Error adding broker %s (%s:%s)", 205 | broker.broker_id, broker.host, broker.port, 206 | ) 207 | missing.add(broker.broker_id) 208 | continue 209 | 210 | for broker_id in to_drop: 211 | self.conns[broker_id].abort() 212 | 213 | raise gen.Return(missing) 214 | 215 | def process_topics(self, response_topics): 216 | """ 217 | Syncs the cluster's topic/partition metadata with a given response. 218 | Returns a set of topic names that were either missing data or had 219 | unknown leader IDs. 220 | 221 | Works by iterating over the topic metadatas and their partitions, 222 | checking for error codes and a connection matching the leader ID. 223 | 224 | Once complete the ``self.topics`` and ``self.leaders`` dictonaries are 225 | set with the newly validated information. 226 | """ 227 | topics = collections.defaultdict(list) 228 | leaders = collections.defaultdict(dict) 229 | 230 | missing = set() 231 | 232 | for topic in response_topics: 233 | if topic.error_code == errors.unknown_topic_or_partition: 234 | log.error("Unknown topic %s", topic.name) 235 | continue 236 | if topic.error_code == errors.replica_not_available: 237 | missing.add(topic.name) 238 | continue 239 | 240 | for partition in topic.partitions: 241 | if partition.error_code == errors.leader_not_available: 242 | log.warn( 243 | "Leader not available for %s|%s, election in progress", 244 | topic.name, partition.partition_id 245 | ) 246 | missing.add(topic.name) 247 | continue 248 | if partition.leader not in self.conns: 249 | log.warn( 250 | "Leader for %s|%s not in current connections.", 251 | topic.name, partition.partition_id 252 | ) 253 | missing.add(topic.name) 254 | continue 255 | 256 | topics[topic.name].append(partition.partition_id) 257 | leaders[topic.name][partition.partition_id] = partition.leader 258 | 259 | self.topics = topics 260 | self.leaders = leaders 261 | 262 | return missing 263 | 264 | def stop(self): 265 | """ 266 | Simple method that calls ``close()`` on each connection. 267 | """ 268 | for conn in self.conns.values(): 269 | conn.close() 270 | -------------------------------------------------------------------------------- /kiel/compression/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wglass/kiel/12664fae522d95536c977bc3868951e7dce3daa0/kiel/compression/__init__.py -------------------------------------------------------------------------------- /kiel/compression/gzip.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import gzip 4 | 5 | from six import BytesIO 6 | 7 | 8 | def compress(data): 9 | """ 10 | Compresses a given bit of data via the ``gzip`` stdlib module. 11 | 12 | .. note:: 13 | 14 | This assumes the given data is a byte string, already decoded. 15 | """ 16 | buff = BytesIO() 17 | 18 | with gzip.GzipFile(fileobj=buff, mode='w') as fd: 19 | fd.write(data) 20 | 21 | buff.seek(0) 22 | result = buff.read() 23 | 24 | buff.close() 25 | 26 | return result 27 | 28 | 29 | def decompress(data): 30 | """ 31 | Decompresses given data via the ``gzip`` module. 32 | 33 | Decoding is left as an exercise for the client code. 34 | """ 35 | buff = BytesIO(data) 36 | 37 | with gzip.GzipFile(fileobj=buff, mode='r') as fd: 38 | result = fd.read() 39 | 40 | buff.close() 41 | 42 | return result 43 | -------------------------------------------------------------------------------- /kiel/compression/snappy.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import struct 4 | 5 | try: 6 | import snappy as snappy 7 | snappy_available = True 8 | except ImportError: # pragma: no cover 9 | snappy_available = False 10 | 11 | from six import BytesIO 12 | 13 | 14 | DEFAULT_VERSION = 1 15 | MIN_COMPAT_VERSION = 1 16 | 17 | MAGIC_HEADER = ( 18 | -126, b'S', b'N', b'A', b'P', b'P', b'Y', 0, 19 | DEFAULT_VERSION, MIN_COMPAT_VERSION 20 | ) 21 | 22 | BLOCK_SIZE = 32 * 1024 # 32kb, in bytes 23 | 24 | raw_header = struct.pack("!bccccccbii", *MAGIC_HEADER) 25 | 26 | 27 | def compress(data): 28 | """ 29 | Compresses given data via the snappy algorithm. 30 | 31 | The result is preceded with a header containing the string 'SNAPPY' and the 32 | default and min-compat versions (both ``1``). 33 | 34 | The block size for the compression is hard-coded at 32kb. 35 | 36 | If ``python-snappy`` is not installed a ``RuntimeError`` is raised. 37 | """ 38 | if not snappy_available: 39 | raise RuntimeError("Snappy compression unavailable.") 40 | 41 | buff = BytesIO() 42 | buff.write(raw_header) 43 | 44 | for block_num in range(0, len(data), BLOCK_SIZE): 45 | block = data[block_num:block_num + BLOCK_SIZE] 46 | compressed = snappy.compress(block) 47 | 48 | buff.write(struct.pack("!i", len(compressed))) 49 | buff.write(compressed) 50 | 51 | result = buff.getvalue() 52 | 53 | buff.close() 54 | 55 | return result 56 | 57 | 58 | def decompress(data): 59 | """ 60 | Decompresses the given data via the snappy algorithm. 61 | 62 | If ``python-snappy`` is not installed a ``RuntimeError`` is raised. 63 | """ 64 | if not snappy_available: 65 | raise RuntimeError("Snappy compression unavailable.") 66 | 67 | buff_offset = len(raw_header) # skip the header 68 | length = len(data) - len(raw_header) 69 | 70 | output = BytesIO() 71 | 72 | while buff_offset <= length: 73 | block_size = struct.unpack_from("!i", data, buff_offset)[0] 74 | buff_offset += struct.calcsize("!i") 75 | 76 | block = struct.unpack_from("!%ds" % block_size, data, buff_offset)[0] 77 | buff_offset += block_size 78 | 79 | output.write(snappy.uncompress(block)) 80 | 81 | result = output.getvalue() 82 | 83 | output.close() 84 | 85 | return result 86 | -------------------------------------------------------------------------------- /kiel/connection.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import logging 3 | import socket 4 | import struct 5 | import sys 6 | 7 | from tornado import ioloop, iostream, gen, concurrent 8 | 9 | from kiel.exc import BrokerConnectionError 10 | from kiel.protocol import ( 11 | metadata, coordinator, 12 | produce, fetch, 13 | offset, offset_commit, offset_fetch 14 | ) 15 | 16 | 17 | log = logging.getLogger(__name__) 18 | 19 | # all messages start with a 4-byte signed integer representing raw payload size 20 | size_struct = struct.Struct("!i") 21 | # all responses start with a 4-byte correlation ID to match with the request 22 | correlation_struct = struct.Struct("!i") 23 | 24 | response_classes = { 25 | "metadata": metadata.MetadataResponse, 26 | "produce": produce.ProduceResponse, 27 | "fetch": fetch.FetchResponse, 28 | "offset": offset.OffsetResponse, 29 | "offset_commit": offset_commit.OffsetCommitResponse, 30 | "offset_fetch": offset_fetch.OffsetFetchResponse, 31 | "group_coordinator": coordinator.GroupCoordinatorResponse 32 | } 33 | 34 | 35 | class Connection(object): 36 | """ 37 | This class represents a single connection to a single broker host. 38 | 39 | Does not protect against any exceptions when connecting, those are expected 40 | to be handled by the cluster object. 41 | 42 | The main use of this class is the `send()` method, used to send protocol 43 | request classes over the wire. 44 | 45 | .. note:: 46 | This is the only class where the ``correlation_id`` should be used. 47 | These IDs are used to correlate requests and responses over a single 48 | connection and are meaningless outside said connection. 49 | """ 50 | def __init__(self, host, port): 51 | self.host = host 52 | self.port = int(port) 53 | 54 | self.stream = None 55 | self.closing = False 56 | 57 | self.api_correlation = {} 58 | self.pending = {} 59 | 60 | @gen.coroutine 61 | def connect(self): 62 | """ 63 | Connects to the broker host and fires the ``read_loop`` callback. 64 | 65 | The socket is wrapped in a tornado ``iostream.IOStream`` to take 66 | advantage of its handy async methods. 67 | """ 68 | sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0) 69 | self.stream = iostream.IOStream(sock) 70 | 71 | log.info("Connecting to broker %s:%d", self.host, self.port) 72 | yield self.stream.connect((self.host, self.port)) 73 | 74 | ioloop.IOLoop.current().add_callback(self.read_loop) 75 | 76 | def close(self): 77 | """ 78 | Sets the ``closing`` attribute to ``True`` and calls ``close()`` on the 79 | underlying stream. 80 | """ 81 | self.closing = True 82 | self.stream.close() 83 | 84 | @contextlib.contextmanager 85 | def socket_error_handling(self, failure_message): 86 | """ 87 | helper contextmanager for handling errors during IOStream operations. 88 | 89 | Handles the StreamClosedError case by setting the ``closing`` flag, 90 | logs any unexpected exceptions with a failure message. 91 | """ 92 | try: 93 | yield 94 | except iostream.StreamClosedError: 95 | self.closing = True 96 | except Exception: 97 | if not self.closing: 98 | log.exception(failure_message) 99 | self.abort() 100 | 101 | def send(self, message): 102 | """ 103 | Sends a serialized request to the broker and returns a pending future. 104 | 105 | If any error occurs when writing immediately or asynchronously, the 106 | `abort()` method is called. 107 | 108 | The retured ``Future`` is stored in the ``self.pending`` dictionary 109 | keyed on correlation id, so that clients can say 110 | 111 | ``response = yield conn.send(message)`` 112 | 113 | and expect the correctly correlated response (or a raised exception) 114 | regardless of when the broker responds. 115 | """ 116 | f = concurrent.Future() 117 | 118 | if self.closing: 119 | f.set_exception(BrokerConnectionError(self.host, self.port)) 120 | return f 121 | 122 | payload = message.serialize() 123 | payload = size_struct.pack(len(payload)) + payload 124 | 125 | self.api_correlation[message.correlation_id] = message.api 126 | self.pending[message.correlation_id] = f 127 | 128 | def handle_write(write_future): 129 | with self.socket_error_handling("Error writing to socket."): 130 | write_future.result() 131 | 132 | with self.socket_error_handling("Error writing to socket."): 133 | self.stream.write(payload).add_done_callback(handle_write) 134 | 135 | return f 136 | 137 | @gen.coroutine 138 | def read_loop(self): 139 | """ 140 | Infinite loop that reads messages off of the socket while not closed. 141 | 142 | When a message is received its corresponding pending Future is set 143 | to have the message as its result. 144 | 145 | This is never used directly and is fired as a separate callback on the 146 | I/O loop via the `connect()` method. 147 | """ 148 | while not self.closing: 149 | with self.socket_error_handling("Error reading from socket."): 150 | message = yield self.read_message() 151 | self.pending.pop(message.correlation_id).set_result(message) 152 | 153 | def abort(self): 154 | """ 155 | Aborts a connection and puts all pending futures into an error state. 156 | 157 | If ``sys.exc_info()`` is set (i.e. this is being called in an exception 158 | handler) then pending futures will have that exc info set. Otherwise 159 | a ``BrokerConnectionError`` is used. 160 | """ 161 | if self.closing: 162 | return 163 | 164 | log.warn("Aborting connection to %s:%s", self.host, self.port) 165 | 166 | self.close() 167 | self.api_correlation.clear() 168 | while self.pending: 169 | _, pending = self.pending.popitem() 170 | exc_info = sys.exc_info() 171 | if any(exc_info): 172 | pending.set_exc_info(sys.exc_info()) 173 | else: 174 | pending.set_exception( 175 | BrokerConnectionError(self.host, self.port) 176 | ) 177 | 178 | @gen.coroutine 179 | def read_message(self): 180 | """ 181 | Constructs a response class instance from bytes on the stream. 182 | 183 | Works by leveraging the ``IOStream.read_bytes()`` method. Steps: 184 | 185 | 1) first the size of the entire payload is pulled 186 | 2) then the correlation id so that we can match this response to the 187 | corresponding pending Future 188 | 3) the api of the resonse is looked up via the correlation id 189 | 4) the corresponding response class's deserialize() method is used to 190 | decipher the raw payload 191 | """ 192 | raw_size = yield self.stream.read_bytes(size_struct.size) 193 | size = size_struct.unpack(raw_size)[0] 194 | 195 | raw_correlation = yield self.stream.read_bytes(correlation_struct.size) 196 | correlation_id = correlation_struct.unpack_from(raw_correlation)[0] 197 | 198 | size -= correlation_struct.size 199 | 200 | raw_payload = yield self.stream.read_bytes(size) 201 | api = self.api_correlation.pop(correlation_id) 202 | 203 | response = response_classes[api].deserialize(raw_payload) 204 | response.correlation_id = correlation_id 205 | 206 | raise gen.Return(response) 207 | -------------------------------------------------------------------------------- /kiel/constants.py: -------------------------------------------------------------------------------- 1 | DEFAULT_KAFKA_PORT = 9092 2 | 3 | #: Compression flag value denoting ``gzip`` was used 4 | GZIP = 1 5 | #: Compression flag value denoting ``snappy`` was used 6 | SNAPPY = 2 7 | #: This set denotes the compression schemes currently supported by Kiel 8 | SUPPORTED_COMPRESSION = (None, GZIP, SNAPPY) 9 | 10 | CLIENT_ID = "kiel" 11 | 12 | #: The "api version" value sent over the wire. Currently always 0 13 | API_VERSION = 0 14 | #: Mapping of response api codes and their names 15 | API_KEYS = { 16 | "produce": 0, 17 | "fetch": 1, 18 | "offset": 2, 19 | "metadata": 3, 20 | "offset_commit": 8, 21 | "offset_fetch": 9, 22 | "group_coordinator": 10, 23 | "join_group": 11, 24 | "heartbeat": 12, 25 | "leave_group": 13, 26 | "sync_group": 14, 27 | "describe_groups": 15, 28 | "list_groups": 16, 29 | } 30 | 31 | 32 | #: All consumers use replica id -1, other values are meant to be 33 | #: used by Kafka itself. 34 | CONSUMER_REPLICA_ID = -1 35 | 36 | #: A mapping of known error codes to their string values 37 | ERROR_CODES = { 38 | 0: "no_error", 39 | -1: "unknown", 40 | 1: "offset_out_of_range", 41 | 2: "invalid_message", 42 | 3: "unknown_topic_or_partition", 43 | 4: "invalid_message_size", 44 | 5: "leader_not_available", 45 | 6: "not_partition_leader", 46 | 7: "request_timed_out", 47 | 8: "broker_not_available", 48 | 9: "replica_not_available", 49 | 10: "message_size_too_large", 50 | 11: "stale_controller_epoch", 51 | 12: "offset_metadata_too_large", 52 | 14: "offsets_load_in_progress", 53 | 15: "coordinator_not_available", 54 | 16: "not_coordinator", 55 | 17: "invalid_topic", 56 | 18: "record_list_too_large", 57 | 19: "not_enough_replicas", 58 | 20: "not_enough_replicas_after_append", 59 | 21: "invalid_required_acks", 60 | 22: "illegal_generation", 61 | 23: "inconsistent_group_protocol", 62 | 24: "invalid_group_id", 63 | 25: "unknown_member_id", 64 | 26: "invalid_session_timeout", 65 | 27: "rebalance_in_progress", 66 | 28: "invalid_commit_offset_size", 67 | 29: "topic_authorization_failed", 68 | 30: "group_authorization_failed", 69 | 31: "cluster_authorization_failed", 70 | } 71 | #: Set of error codes marked "retryable" by the Kafka docs. 72 | RETRIABLE_CODES = set([ 73 | "invalid_message", 74 | "unknown_topic_or_partition", 75 | "leader_not_available", 76 | "not_partition_leader", 77 | "request_timed_out", 78 | "offsets_load_in_progress", 79 | "coordinator_not_available", 80 | "not_coordinator", 81 | "not_enough_replicas", 82 | "not_enough_replicas_after_append", 83 | ]) 84 | -------------------------------------------------------------------------------- /kiel/events.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import six 4 | 5 | 6 | def wait_on_event(event, timeout=None): 7 | """ 8 | Waits on a single threading Event, with an optional timeout. 9 | 10 | This is here for compatibility reasons as python 2 can't reliably wait 11 | on an event without a timeout and python 3 doesn't define a ``maxint``. 12 | """ 13 | if timeout is not None: 14 | event.wait(timeout) 15 | return 16 | 17 | if six.PY2: 18 | # Thanks to a bug in python 2's threading lib, we can't simply call 19 | # .wait() with no timeout since it would wind up ignoring signals. 20 | while not event.is_set(): 21 | event.wait(sys.maxint) 22 | else: 23 | event.wait() 24 | -------------------------------------------------------------------------------- /kiel/exc.py: -------------------------------------------------------------------------------- 1 | class KielError(Exception): 2 | """ 3 | Base exception for all Kiel-specific errors. 4 | """ 5 | pass 6 | 7 | 8 | class NoBrokersError(KielError): 9 | """ 10 | Error raised when a ``Cluster`` has no available connections. 11 | """ 12 | pass 13 | 14 | 15 | class NoOffsetsError(KielError): 16 | """ 17 | Error raised when requests fetching offsets fail fatally. 18 | """ 19 | pass 20 | 21 | 22 | class BrokerConnectionError(KielError): 23 | """ 24 | This error is raised when a single broker ``Connection`` goes bad. 25 | """ 26 | def __init__(self, host, port, broker_id=None): 27 | self.host = host 28 | self.port = port 29 | self.broker_id = broker_id 30 | 31 | def __str__(self): 32 | return "Error connecting to %s:%s" % (self.host, self.port) 33 | 34 | 35 | class UnhandledResponseError(KielError): 36 | """ 37 | Error raised when a client recieves a response but has no handler method. 38 | 39 | Any client that sends a request for an api is expected to define a 40 | corresponding ``handle__response`` method. 41 | """ 42 | def __init__(self, api): 43 | self.api = api 44 | 45 | def __str__(self): 46 | return "No handler method for '%s' api" % self.api 47 | -------------------------------------------------------------------------------- /kiel/iterables.py: -------------------------------------------------------------------------------- 1 | def drain(iterable): 2 | """ 3 | Helper method that empties an iterable as it is iterated over. 4 | 5 | Works for: 6 | 7 | * ``dict`` 8 | * ``collections.deque`` 9 | * ``list`` 10 | * ``set`` 11 | """ 12 | if getattr(iterable, "popleft", False): 13 | def next_item(coll): 14 | return coll.popleft() 15 | elif getattr(iterable, "popitem", False): 16 | def next_item(coll): 17 | return coll.popitem() 18 | else: 19 | def next_item(coll): 20 | return coll.pop() 21 | 22 | while True: 23 | try: 24 | yield next_item(iterable) 25 | except (IndexError, KeyError): 26 | raise StopIteration 27 | -------------------------------------------------------------------------------- /kiel/protocol/__init__.py: -------------------------------------------------------------------------------- 1 | from .errors import errors # noqa 2 | -------------------------------------------------------------------------------- /kiel/protocol/coordinator.py: -------------------------------------------------------------------------------- 1 | from .request import Request 2 | from .response import Response 3 | from .primitives import String, Int16, Int32 4 | 5 | 6 | api_name = "group_coordinator" 7 | 8 | __all__ = [ 9 | "GroupCoordinatorRequest", 10 | "GroupCoordinatorResponse", 11 | ] 12 | 13 | 14 | class GroupCoordinatorRequest(Request): 15 | """ 16 | :: 17 | 18 | GroupCoordinatorRequest => 19 | group_id => String 20 | """ 21 | api = "group_coordinator" 22 | 23 | parts = ( 24 | ("group", String), 25 | ) 26 | 27 | 28 | class GroupCoordinatorResponse(Response): 29 | """ 30 | :: 31 | 32 | GroupCoordinatorResponse => 33 | error_code => Int16 34 | coordinator_id => Int32 35 | coordinator_host => String 36 | coordinator_port => Int32 37 | """ 38 | api = "group_coordinator" 39 | 40 | parts = ( 41 | ("error_code", Int16), 42 | ("coordinator_id", Int32), 43 | ("coordinator_host", String), 44 | ("coordinator_port", Int32), 45 | ) 46 | -------------------------------------------------------------------------------- /kiel/protocol/describe_groups.py: -------------------------------------------------------------------------------- 1 | from .part import Part 2 | from .request import Request 3 | from .response import Response 4 | from .primitives import Array, String, Int16, Int32, Bytes 5 | 6 | 7 | api_name = "describe_groups" 8 | 9 | 10 | __all__ = [ 11 | "DescribeGroupsRequest", 12 | "DescribeGroupsResponse", 13 | "GroupDescription", 14 | "MemberDescription", 15 | "Assignment", 16 | "TopicAssignment", 17 | ] 18 | 19 | 20 | class DescribeGroupsRequest(Request): 21 | """ 22 | :: 23 | 24 | DescribeGroupRequest => 25 | groups => [String] 26 | """ 27 | api = "describe_groups" 28 | 29 | parts = ( 30 | ("groups", Array.of(String)), 31 | ) 32 | 33 | 34 | class TopicAssignment(Part): 35 | """ 36 | :: 37 | 38 | TopicAssignment => 39 | name => String 40 | partitions => [Int32] 41 | """ 42 | parts = ( 43 | ("name", String), 44 | ("partitions", Array.of(Int32)), 45 | ) 46 | 47 | 48 | class Assignment(Part): 49 | """ 50 | :: 51 | 52 | Assignment => 53 | version => Int16 54 | topics => [TopicAssignment] 55 | user_data => Bytes 56 | """ 57 | parts = ( 58 | ("version", Int16), 59 | ("topics", Array.of(TopicAssignment)), 60 | ("user_data", Bytes), 61 | ) 62 | 63 | 64 | class MemberDescription(Part): 65 | """ 66 | :: 67 | 68 | MemberDescription => 69 | member_id => String 70 | client_id => String 71 | client_host => String 72 | metadata => Bytes 73 | assignment => Assignment 74 | """ 75 | parts = ( 76 | ("member_id", String), 77 | ("client_id", String), 78 | ("client_host", String), 79 | ("metadata", Bytes), 80 | ("assignment", Assignment), 81 | ) 82 | 83 | 84 | class GroupDescription(Part): 85 | """ 86 | :: 87 | 88 | GroupDescription => 89 | error_code => Int16 90 | group_id => String 91 | state => String 92 | protocol_type => String 93 | protocol => String 94 | members => [MemberDescription] 95 | """ 96 | parts = ( 97 | ("error_code", Int16), 98 | ("group_id", String), 99 | ("state", String), 100 | ("protocol_type", String), 101 | ("protocol", String), 102 | ("members", Array.of(MemberDescription)), 103 | ) 104 | 105 | 106 | class DescribeGroupsResponse(Response): 107 | """ 108 | :: 109 | 110 | DescribeGroupResponse => 111 | groups => [GroupDescription] 112 | """ 113 | api = "describe_groups" 114 | 115 | parts = ( 116 | ("groups", Array.of(GroupDescription)), 117 | ) 118 | -------------------------------------------------------------------------------- /kiel/protocol/errors.py: -------------------------------------------------------------------------------- 1 | import six 2 | 3 | from kiel.constants import ERROR_CODES, RETRIABLE_CODES 4 | 5 | 6 | class Errors(object): 7 | pass 8 | 9 | 10 | errors = Errors() 11 | errors.retriable = set() 12 | 13 | for code, error in six.iteritems(ERROR_CODES): 14 | setattr(errors, error, code) 15 | if error in RETRIABLE_CODES: 16 | errors.retriable.add(code) 17 | -------------------------------------------------------------------------------- /kiel/protocol/fetch.py: -------------------------------------------------------------------------------- 1 | from .part import Part 2 | from .request import Request 3 | from .response import Response 4 | from .messages import MessageSet 5 | from .primitives import Array, String, Int16, Int32, Int64 6 | 7 | 8 | api_name = "fetch" 9 | 10 | 11 | __all__ = [ 12 | "FetchRequest", 13 | "TopicRequest", 14 | "PartitionRequest", 15 | "FetchResponse", 16 | "TopicResponse", 17 | "PartitionResponse", 18 | ] 19 | 20 | 21 | class PartitionRequest(Part): 22 | """ 23 | :: 24 | 25 | PartitionRequest => 26 | partition_id => Int32 27 | offset => Int64 28 | max_bytes => Int32 29 | """ 30 | parts = ( 31 | ("partition_id", Int32), 32 | ("offset", Int64), 33 | ("max_bytes", Int32), 34 | ) 35 | 36 | 37 | class TopicRequest(Part): 38 | """ 39 | :: 40 | 41 | TopicRequest => 42 | name => String 43 | partitions => [PartitionRequest] 44 | """ 45 | parts = ( 46 | ("name", String), 47 | ("partitions", Array.of(PartitionRequest)), 48 | ) 49 | 50 | 51 | class FetchRequest(Request): 52 | """ 53 | :: 54 | 55 | FetchRequest => 56 | replica_id => Int32 57 | max_wait_time => Int32 58 | min_bytes => Int32 59 | topics => [TopicRequest] 60 | """ 61 | api = "fetch" 62 | 63 | parts = ( 64 | ("replica_id", Int32), 65 | ("max_wait_time", Int32), 66 | ("min_bytes", Int32), 67 | ("topics", Array.of(TopicRequest)), 68 | ) 69 | 70 | 71 | class PartitionResponse(Part): 72 | """ 73 | :: 74 | 75 | PartitionResponse => 76 | partition_id => Int32 77 | error_code => Int16 78 | highwater_mark_offset => Int64 79 | message_set => MessageSet 80 | """ 81 | parts = ( 82 | ("partition_id", Int32), 83 | ("error_code", Int16), 84 | ("highwater_mark_offset", Int64), 85 | ("message_set", MessageSet), 86 | ) 87 | 88 | 89 | class TopicResponse(Part): 90 | """ 91 | :: 92 | 93 | TopicResponse => 94 | name => String 95 | partitions => [PartitionResponse] 96 | """ 97 | parts = ( 98 | ("name", String), 99 | ("partitions", Array.of(PartitionResponse)), 100 | ) 101 | 102 | 103 | class FetchResponse(Response): 104 | """ 105 | :: 106 | 107 | FetchResponse => 108 | topics => [TopicResponse] 109 | """ 110 | api = "fetch" 111 | 112 | parts = ( 113 | ("topics", Array.of(TopicResponse)), 114 | ) 115 | -------------------------------------------------------------------------------- /kiel/protocol/heartbeat.py: -------------------------------------------------------------------------------- 1 | from .request import Request 2 | from .response import Response 3 | from .primitives import String, Int16, Int32 4 | 5 | 6 | api_name = "heartbeat" 7 | 8 | 9 | __all__ = [ 10 | "HeartbeatRequest", 11 | "HeartbeatResponse" 12 | ] 13 | 14 | 15 | class HeartbeatRequest(Request): 16 | """ 17 | :: 18 | 19 | HeartbeatRequest => 20 | group_id => String 21 | generation_id = Int32 22 | member_id => String 23 | """ 24 | api = "heartbeat" 25 | 26 | parts = ( 27 | "group_id", String, 28 | "generation_id", Int32, 29 | "member_id", String, 30 | ) 31 | 32 | 33 | class HeartbeatResponse(Response): 34 | """ 35 | :: 36 | 37 | HeartbeatResponse => 38 | error_code => Int16 39 | """ 40 | api = "heartbeat" 41 | 42 | parts = ( 43 | "error_code", Int16 44 | ) 45 | -------------------------------------------------------------------------------- /kiel/protocol/join_group.py: -------------------------------------------------------------------------------- 1 | from .part import Part 2 | from .request import Request 3 | from .response import Response 4 | from .primitives import Array, String, Bytes, Int16, Int32 5 | 6 | 7 | api_name = "join_group" 8 | 9 | 10 | __all__ = [ 11 | "JoinGroupRequest", 12 | "JoinGroupResponse", 13 | "GroupProtocol", 14 | "Member", 15 | ] 16 | 17 | 18 | class GroupProtocol(Part): 19 | """ 20 | :: 21 | 22 | GroupProtocol => 23 | name => String 24 | version => Int16 25 | subscription => Array.of(String) 26 | user_data => Bytes 27 | """ 28 | parts = ( 29 | ("name", String), 30 | ("version", Int16), 31 | ("subscription", Array.of(String)), 32 | ("user_data", Bytes), 33 | ) 34 | 35 | 36 | class JoinGroupRequest(Request): 37 | """ 38 | :: 39 | 40 | JoinGroupRequest => 41 | group_id => String 42 | session_timeout => Int32 43 | member_id => String 44 | protocol_type => String 45 | group_protocols => [GroupProtocol] 46 | """ 47 | api = "join_group" 48 | 49 | parts = ( 50 | ("group_id", String), 51 | ("session_timeout", Int32), 52 | ("member_id", String), 53 | ("protocol_type", String), 54 | ("group_protocols", Array.of(GroupProtocol)), 55 | ) 56 | 57 | 58 | class Member(Part): 59 | """ 60 | :: 61 | 62 | Member => 63 | member_id => String 64 | metadata => Bytes 65 | """ 66 | parts = ( 67 | ("member_id", String), 68 | ("metadata", Bytes), 69 | ) 70 | 71 | 72 | class JoinGroupResponse(Response): 73 | """ 74 | :: 75 | 76 | JoinGroupResponse => 77 | error_code => Int16 78 | generation_id => Int32 79 | protocol => String 80 | leader_id => String 81 | member_id => String 82 | members => [Member] 83 | """ 84 | api = "join_group" 85 | 86 | parts = ( 87 | ("error_code", Int16), 88 | ("generation_id", Int32), 89 | ("protocol", String), 90 | ("leader_id", String), 91 | ("member_id", String), 92 | ("members", Array.of(Member)), 93 | ) 94 | -------------------------------------------------------------------------------- /kiel/protocol/leave_group.py: -------------------------------------------------------------------------------- 1 | from .request import Request 2 | from .response import Response 3 | from .primitives import String, Int16 4 | 5 | 6 | api_name = "leave_group" 7 | 8 | 9 | __all__ = [ 10 | "LeaveGroupRequest", 11 | "LeaveGroupResponse", 12 | ] 13 | 14 | 15 | class LeaveGroupRequest(Request): 16 | """ 17 | :: 18 | 19 | LeaveGroupRequest => 20 | group_id => String 21 | member_id => String 22 | """ 23 | api = "leave_group" 24 | 25 | parts = ( 26 | ("group_id", String), 27 | ("member_id", String), 28 | ) 29 | 30 | 31 | class LeaveGroupResponse(Response): 32 | """ 33 | :: 34 | 35 | LeaveGroupResponse => 36 | error_code => Int16 37 | """ 38 | api = "leave_group" 39 | 40 | parts = ( 41 | ("error_code", Int16), 42 | ) 43 | -------------------------------------------------------------------------------- /kiel/protocol/list_groups.py: -------------------------------------------------------------------------------- 1 | from .part import Part 2 | from .request import Request 3 | from .response import Response 4 | from .primitives import Array, Int16, String 5 | 6 | 7 | api_name = "list_groups" 8 | 9 | 10 | __all__ = [ 11 | "ListGroupsRequest", 12 | "ListGroupsResponse", 13 | "Group", 14 | ] 15 | 16 | 17 | class ListGroupsRequest(Request): 18 | """ 19 | :: 20 | 21 | ListGroupsRequest => 22 | """ 23 | api = "list_groups" 24 | 25 | parts = () 26 | 27 | 28 | class Group(Part): 29 | """ 30 | :: 31 | 32 | Group => 33 | group_id => String 34 | protocol_type = > String 35 | """ 36 | parts = ( 37 | ("group_id", String), 38 | ("protocol_type", String), 39 | ) 40 | 41 | 42 | class ListGroupsResponse(Response): 43 | """ 44 | :: 45 | 46 | ListGroupsResponse => 47 | error_code => Int16 48 | groups => [Group] 49 | """ 50 | api = "list_groups" 51 | 52 | parts = ( 53 | ("error_code", Int16), 54 | ("groups", Array.of(Group)), 55 | ) 56 | -------------------------------------------------------------------------------- /kiel/protocol/messages.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import struct 3 | import zlib 4 | 5 | from kiel.constants import GZIP, SNAPPY 6 | from kiel.compression import gzip, snappy 7 | 8 | from .part import Part 9 | from .primitives import Int8, Int32, Int64, Bytes 10 | 11 | 12 | log = logging.getLogger(__name__) 13 | 14 | 15 | class MessageSet(object): 16 | """ 17 | Class representing a set of `Message` instances. 18 | 19 | Kafka's compression scheme works by taking a set of messages, compressing 20 | them with the chosen compression scheme, and then wrapping the result as 21 | the value of an envelope message, called the "message set". 22 | """ 23 | def __init__(self, messages): 24 | self.messages = messages 25 | 26 | @classmethod 27 | def compressed(cls, compression, msgs): 28 | """ 29 | Returns a `MessageSet` with the given messages optionally compressed. 30 | 31 | If compression is used, the message set is rendered, compressed, and 32 | then a *new* message set is created with the raw output as a value 33 | in a single message. 34 | 35 | If no compression is set, the returned instance will have a 36 | (, ) tuple for each given message, where the offset 37 | is -1. 38 | """ 39 | if not compression: 40 | return cls([(-1, msg) for msg in msgs]) 41 | 42 | set_format, set_data = cls([(-1, msg) for msg in msgs]).render() 43 | 44 | # compressed message sets are nested and don't include the size 45 | set_format = set_format[1:] 46 | set_data.pop(0) 47 | 48 | raw_set = struct.pack("!" + set_format, *set_data) 49 | 50 | if compression == GZIP: 51 | compressed_set = gzip.compress(raw_set) 52 | elif compression == SNAPPY: 53 | compressed_set = snappy.compress(raw_set) 54 | 55 | container_msg = Message( 56 | magic=0, 57 | attributes=compression, 58 | key=None, 59 | value=compressed_set 60 | ) 61 | 62 | return cls([(-1, container_msg)]) 63 | 64 | def render(self): 65 | """ 66 | Returns a tuple of format and data suitable for ``struct.pack``. 67 | 68 | Each (, ) tuple in ``self.messages`` is `render()`-ed 69 | and the output collected int a single format and data list, prefaced 70 | with a single integer denoting the size of the message set. 71 | """ 72 | fmt = ["i"] 73 | data = [] 74 | total_size = 0 75 | 76 | for offset, message in self.messages: 77 | offset_format, offset_data = Int64(offset).render() 78 | message_format, message_data = message.render() 79 | 80 | message_size = struct.calcsize(message_format) 81 | size_format, size_data = Int32(message_size).render() 82 | 83 | message_format = "".join([ 84 | offset_format, size_format, message_format 85 | ]) 86 | total_size += struct.calcsize("!" + message_format) 87 | 88 | fmt.append(message_format) 89 | data.extend(offset_data) 90 | data.extend(size_data) 91 | data.extend(message_data) 92 | 93 | data.insert(0, total_size) 94 | 95 | return "".join(fmt), data 96 | 97 | def __eq__(self, other): 98 | """ 99 | Tests equivalence of message sets. 100 | 101 | Merely checks the equivalence of the ``messages`` attributes. 102 | Compression is handled implicitly since messages containing the 103 | compressed value of the same sub-messages should still be equivalent. 104 | """ 105 | return self.messages == other.messages 106 | 107 | def __repr__(self): 108 | return "[%s]" % ", ".join([str(m) for _, m in self.messages]) 109 | 110 | @classmethod 111 | def parse(cls, buff, offset, size=None): 112 | """ 113 | Given a buffer and offset, returns the parsed `MessageSet` and offset. 114 | 115 | Starts by determining the size of the raw payload to parse, and 116 | continuously parses the ``Int64`` offset and ``Int32`` size of a 117 | message then the `Message` itself. 118 | 119 | If a parsed message's flags denote compression, `parse()` is called 120 | recursively on the message's value in order to unpack the compressed 121 | nested messages. 122 | """ 123 | if size is None: 124 | size, offset = Int32.parse(buff, offset) 125 | 126 | end = offset + size 127 | 128 | messages = [] 129 | while not offset == end: 130 | try: 131 | message_offset, offset = Int64.parse(buff, offset) 132 | _, offset = Int32.parse(buff, offset) # message size 133 | message, offset = Message.parse(buff, offset) 134 | except struct.error: 135 | # ending messages can sometimes be cut off 136 | break 137 | 138 | if message.attributes: # compression involved, set is nested 139 | set_size = len(message.value) 140 | nested_set, _ = cls.parse(message.value, 0, size=set_size) 141 | messages.extend(nested_set.messages) 142 | else: 143 | messages.append((message_offset, message)) 144 | 145 | return cls(messages), offset 146 | 147 | 148 | class Message(Part): 149 | """ 150 | Basic ``Part`` subclass representing a single Kafka message. 151 | :: 152 | 153 | Message => 154 | crc => Int32 155 | magic => Int8 156 | attributes => Int8 157 | key => Bytes 158 | value => Bytes 159 | """ 160 | parts = ( 161 | ("crc", Int32), 162 | ("magic", Int8), 163 | ("attributes", Int8), 164 | ("key", Bytes), 165 | ("value", Bytes), 166 | ) 167 | 168 | def render(self, parts=None): 169 | """ 170 | Renders just like the base ``Part`` class, but with CRC32 verification. 171 | """ 172 | fmt, data = super(Message, self).render(self.parts[1:]) 173 | 174 | payload = struct.pack("!" + fmt, *data) 175 | 176 | crc = zlib.crc32(payload) 177 | if crc > (2**31): 178 | crc -= 2**32 179 | 180 | fmt = "i%ds" % len(payload) 181 | 182 | return fmt, [crc, payload] 183 | 184 | @classmethod 185 | def parse(cls, buff, offset): 186 | """ 187 | Given a buffer and offset, returns the parsed `Message` and new offset. 188 | 189 | Parses via the basic ``Part`` parse method, but with added 190 | decompression support. 191 | 192 | If a parsed message's attributes denote that compression has been used, 193 | the value is run through the corresponding ``decompress()`` method. 194 | """ 195 | message, offset = super(Message, cls).parse(buff, offset) 196 | 197 | # the compression scheme is stored in the lowest 2 bits of 'attributes' 198 | compression = message.attributes & 0b00000011 199 | 200 | if not compression: 201 | return message, offset 202 | 203 | if compression == GZIP: 204 | message.value = gzip.decompress(message.value) 205 | elif compression == SNAPPY: 206 | message.value = snappy.decompress(message.value) 207 | 208 | return message, offset 209 | 210 | def __eq__(self, other): 211 | """ 212 | Tests equivalency of two messages by comparing the ``key`` and 213 | ``value``. 214 | """ 215 | return self.key == other.key and self.value == other.value 216 | 217 | def __repr__(self): 218 | return "%s => %s" % (self.key, self.value) 219 | -------------------------------------------------------------------------------- /kiel/protocol/metadata.py: -------------------------------------------------------------------------------- 1 | from .part import Part 2 | from .request import Request 3 | from .response import Response 4 | from .primitives import Int16, Int32, Array, String 5 | 6 | 7 | api_name = "metadata" 8 | 9 | __all__ = [ 10 | "MetadataRequest", 11 | "MetadataResponse", 12 | "TopicMetadata", 13 | "PartitionMetadata", 14 | "Broker", 15 | ] 16 | 17 | 18 | class MetadataRequest(Request): 19 | """ 20 | :: 21 | 22 | MetadataRequest => 23 | topics => [String] 24 | """ 25 | api = "metadata" 26 | 27 | parts = ( 28 | ("topics", Array.of(String)), 29 | ) 30 | 31 | 32 | class Broker(Part): 33 | """ 34 | :: 35 | 36 | Broker => 37 | broker_id => Int32 38 | host => String 39 | port => Int32 40 | """ 41 | parts = ( 42 | ("broker_id", Int32), 43 | ("host", String), 44 | ("port", Int32), 45 | ) 46 | 47 | 48 | class PartitionMetadata(Part): 49 | """ 50 | :: 51 | 52 | PartitionMetadata => 53 | error_code => Int16 54 | partition_id => Int32 55 | leader => Int32 56 | replicas => [Int32] 57 | isrs => [Int32] 58 | """ 59 | parts = ( 60 | ("error_code", Int16), 61 | ("partition_id", Int32), 62 | ("leader", Int32), 63 | ("replicas", Array.of(Int32)), 64 | ("isrs", Array.of(Int32)), 65 | ) 66 | 67 | 68 | class TopicMetadata(Part): 69 | """ 70 | :: 71 | 72 | TopicMetadata => 73 | error_code => Int16 74 | name => String 75 | partitions => [PartitionMetadata] 76 | """ 77 | parts = ( 78 | ("error_code", Int16), 79 | ("name", String), 80 | ("partitions", Array.of(PartitionMetadata)), 81 | ) 82 | 83 | 84 | class MetadataResponse(Response): 85 | """ 86 | :: 87 | 88 | MetadataResponse => 89 | brokers => [Broker] 90 | topics => [TopicMetadata] 91 | """ 92 | api = "metadata" 93 | 94 | parts = ( 95 | ("brokers", Array.of(Broker)), 96 | ("topics", Array.of(TopicMetadata)), 97 | ) 98 | -------------------------------------------------------------------------------- /kiel/protocol/offset.py: -------------------------------------------------------------------------------- 1 | from .part import Part 2 | from .request import Request 3 | from .response import Response 4 | from .primitives import Int16, Int32, Int64, String, Array 5 | 6 | 7 | api_name = "offset" 8 | 9 | __all__ = [ 10 | "OffsetRequest", 11 | "TopicRequest", 12 | "PartitionRequest", 13 | "OffsetResponse", 14 | "TopicResponse", 15 | "PartitionResponse", 16 | ] 17 | 18 | 19 | class PartitionRequest(Part): 20 | """ 21 | :: 22 | 23 | PartitionRequeset => 24 | partition_id => Int32 25 | time => Int64 26 | max_offsets => Int32 27 | """ 28 | parts = ( 29 | ("partition_id", Int32), 30 | ("time", Int64), 31 | ("max_offsets", Int32), 32 | ) 33 | 34 | 35 | class TopicRequest(Part): 36 | """ 37 | :: 38 | 39 | TopicRequest => 40 | name => String 41 | partitions => [PartitionRequest] 42 | """ 43 | parts = ( 44 | ("name", String), 45 | ("partitions", Array.of(PartitionRequest)), 46 | ) 47 | 48 | 49 | class OffsetRequest(Request): 50 | """ 51 | :: 52 | 53 | OffsetRequest => 54 | replica_id => Int32 55 | topics => [TopicRequest] 56 | """ 57 | api = "offset" 58 | 59 | parts = ( 60 | ("replica_id", Int32), 61 | ("topics", Array.of(TopicRequest)), 62 | ) 63 | 64 | 65 | class PartitionResponse(Part): 66 | """ 67 | :: 68 | 69 | PartitionResponse => 70 | partition_id => Int32 71 | error_code => Int16 72 | offsets => [Int64] 73 | """ 74 | parts = ( 75 | ("partition_id", Int32), 76 | ("error_code", Int16), 77 | ("offsets", Array.of(Int64)), 78 | ) 79 | 80 | 81 | class TopicResponse(Part): 82 | """ 83 | :: 84 | 85 | TopicResponse => 86 | name => String 87 | partitions => [PartitionResponse] 88 | """ 89 | parts = ( 90 | ("name", String), 91 | ("partitions", Array.of(PartitionResponse)), 92 | ) 93 | 94 | 95 | class OffsetResponse(Response): 96 | """ 97 | :: 98 | 99 | OffsetResponse => 100 | topics => [TopicResponse] 101 | """ 102 | api = "offset" 103 | 104 | parts = ( 105 | ("topics", Array.of(TopicResponse)), 106 | ) 107 | -------------------------------------------------------------------------------- /kiel/protocol/offset_commit.py: -------------------------------------------------------------------------------- 1 | from .part import Part 2 | from .request import Request 3 | from .response import Response 4 | from .primitives import Array, String, Int16, Int32, Int64 5 | 6 | 7 | api_name = "offset_commit" 8 | 9 | __all__ = [ 10 | "OffsetCommitV2Request", 11 | "OffsetCommitV1Request", 12 | "OffsetCommitV0Request", 13 | "TopicV1Request", 14 | "TopicRequest", 15 | "PartitionV1Request", 16 | "PartitionRequest", 17 | "OffsetCommitResponse", 18 | "TopicResponse", 19 | "PartitionResponse", 20 | ] 21 | 22 | 23 | class PartitionRequest(Part): 24 | """ 25 | :: 26 | 27 | PartitionV1Request => 28 | partition_id => Int32 29 | offset => Int64 30 | metadata => String 31 | """ 32 | parts = ( 33 | ("partition_id", Int32), 34 | ("offset", Int64), 35 | ("metadata", String), 36 | ) 37 | 38 | 39 | class PartitionV1Request(Part): 40 | """ 41 | :: 42 | 43 | PartitionV1Request => 44 | partition_id => Int32 45 | offset => Int64 46 | timestamp => Int64 47 | metadata => String 48 | """ 49 | parts = ( 50 | ("partition_id", Int32), 51 | ("offset", Int64), 52 | ("timestamp", Int64), 53 | ("metadata", String), 54 | ) 55 | 56 | 57 | class TopicRequest(Part): 58 | """ 59 | :: 60 | 61 | TopicRequest => 62 | name => String 63 | partitions => [PartitionRequest] 64 | """ 65 | parts = ( 66 | ("name", String), 67 | ("partitions", Array.of(PartitionRequest)), 68 | ) 69 | 70 | 71 | class TopicV1Request(Part): 72 | """ 73 | :: 74 | 75 | TopicRequest => 76 | name => String 77 | partitions => [PartitionV1Request] 78 | """ 79 | parts = ( 80 | ("name", String), 81 | ("partitions", PartitionV1Request), 82 | ) 83 | 84 | 85 | class OffsetCommitV0Request(Request): 86 | """ 87 | :: 88 | 89 | OffsetCommitV0Request => 90 | group => String 91 | topics => [TopicRequest] 92 | """ 93 | api = "offset_commit" 94 | 95 | parts = ( 96 | ("group", String), 97 | ("topics", Array.of(TopicRequest)), 98 | ) 99 | 100 | 101 | class OffsetCommitV1Request(Request): 102 | """ 103 | :: 104 | 105 | OffsetCommitV1Request => 106 | group => String 107 | generation => Int32 108 | consumer_id => Int32 109 | topics => [TopicV1Request] 110 | """ 111 | api = "offset_commit" 112 | 113 | parts = ( 114 | ("group", String), 115 | ("generation", Int32), 116 | ("consumer_id", Int32), 117 | ("topics", Array.of(TopicV1Request)), 118 | ) 119 | 120 | 121 | class OffsetCommitV2Request(Request): 122 | """ 123 | :: 124 | 125 | OffsetCommitV2Request => 126 | group => String 127 | generation => Int32 128 | consumer_id => Int32 129 | retention_time => Int64 130 | topics => [TopicRequest] 131 | """ 132 | api = "offset_commit" 133 | 134 | parts = ( 135 | ("group", String), 136 | ("generation", Int32), 137 | ("consumer_id", Int32), 138 | ("retention_time", Int64), 139 | ("topics", Array.of(TopicRequest)), 140 | ) 141 | 142 | 143 | class PartitionResponse(Part): 144 | """ 145 | :: 146 | 147 | PartitionResponse => 148 | partition_id => Int32 149 | error_code => Int16 150 | """ 151 | api = "offset_commit" 152 | 153 | parts = ( 154 | ("partition_id", Int32), 155 | ("error_code", Int16), 156 | ) 157 | 158 | 159 | class TopicResponse(Part): 160 | """ 161 | :: 162 | 163 | TopicResponse => 164 | name => String 165 | partitions => [PartitionResponse] 166 | """ 167 | parts = ( 168 | ("name", String), 169 | ("partitions", Array.of(PartitionResponse)), 170 | ) 171 | 172 | 173 | class OffsetCommitResponse(Response): 174 | """ 175 | :: 176 | 177 | OffsetCommitResponse => 178 | topics => [TopicResponse] 179 | """ 180 | api = "offset_commit" 181 | 182 | parts = ( 183 | ("topics", Array.of(TopicResponse)), 184 | ) 185 | -------------------------------------------------------------------------------- /kiel/protocol/offset_fetch.py: -------------------------------------------------------------------------------- 1 | from .part import Part 2 | from .request import Request 3 | from .response import Response 4 | from .primitives import Array, String, Int16, Int32, Int64 5 | 6 | 7 | api_name = "offset_fetch" 8 | 9 | __all__ = [ 10 | "OffsetFetchRequest", 11 | "TopicRequest", 12 | "OffsetFetchResponse", 13 | "TopicResponse", 14 | "PartitionResponse", 15 | ] 16 | 17 | 18 | class TopicRequest(Part): 19 | """ 20 | :: 21 | 22 | TopicRequest => 23 | name => String 24 | partitions => [In32] 25 | """ 26 | parts = ( 27 | ("name", String), 28 | ("partitions", Array.of(Int32)), 29 | ) 30 | 31 | 32 | class OffsetFetchRequest(Request): 33 | """ 34 | :: 35 | 36 | OffsetFetchRequest => 37 | group_name => String 38 | topics => [TopicRequest] 39 | """ 40 | api = "offset_fetch" 41 | 42 | parts = ( 43 | ("group_name", String), 44 | ("topics", Array.of(TopicRequest)), 45 | ) 46 | 47 | 48 | class PartitionResponse(Part): 49 | """ 50 | :: 51 | 52 | PartitionResponse => 53 | partition_id => Int32 54 | offset => Int64 55 | metadata => String 56 | error_code => Int16 57 | """ 58 | parts = ( 59 | ("partition_id", Int32), 60 | ("offset", Int64), 61 | ("metadata", String), 62 | ("error_code", Int16), 63 | ) 64 | 65 | 66 | class TopicResponse(Part): 67 | """ 68 | :: 69 | 70 | TopicResponse => 71 | name => String 72 | partitions => [PartitionResponse] 73 | """ 74 | parts = ( 75 | ("name", String), 76 | ("partitions", Array.of(PartitionResponse)), 77 | ) 78 | 79 | 80 | class OffsetFetchResponse(Response): 81 | """ 82 | :: 83 | 84 | OffsetFetchResponse => 85 | topics => [TopicResponse] 86 | """ 87 | api = "offset_fetch" 88 | 89 | parts = ( 90 | ("topics", Array.of(TopicResponse)), 91 | ) 92 | -------------------------------------------------------------------------------- /kiel/protocol/part.py: -------------------------------------------------------------------------------- 1 | from .primitives import Primitive 2 | 3 | 4 | class Part(object): 5 | """ 6 | Composable building block used to define Kafka protocol parts. 7 | 8 | Behaves much like the `Primitive` class but has named "sub parts" 9 | stored in a ``parts`` class attribute, that can hold any `Part` or 10 | `Primitive` subclass. 11 | """ 12 | parts = () 13 | 14 | def __init__(self, **kwargs): 15 | part_names = [item[0] for item in self.parts] 16 | 17 | for name, value in kwargs.items(): 18 | if name not in part_names: 19 | raise ValueError("Unknown part name: '%s'" % name) 20 | 21 | setattr(self, name, value) 22 | 23 | def render(self, parts=None): 24 | """ 25 | Returns a two-element tuple with the ``struct`` format and values. 26 | 27 | Iterates over the applicable sub-parts and calls `render()` on them, 28 | accumulating the format string and values. 29 | 30 | Optionally takes a subset of parts to render, default behavior is to 31 | render all sub-parts belonging to the class. 32 | """ 33 | if not parts: 34 | parts = self.parts 35 | 36 | fmt = [] 37 | data = [] 38 | 39 | for name, part_class in parts: 40 | if issubclass(part_class, Primitive): 41 | part = part_class(getattr(self, name, None)) 42 | else: 43 | part = getattr(self, name, None) 44 | 45 | part_format, part_data = part.render() 46 | 47 | fmt.extend(part_format) 48 | data.extend(part_data) 49 | 50 | return "".join(fmt), data 51 | 52 | @classmethod 53 | def parse(cls, buff, offset): 54 | """ 55 | Given a buffer and offset, returns the parsed value and new offset. 56 | 57 | Calls `parse()` on the given buffer for each sub-part in order and 58 | creates a new instance with the results. 59 | """ 60 | values = {} 61 | 62 | for name, part in cls.parts: 63 | value, new_offset = part.parse(buff, offset) 64 | 65 | values[name] = value 66 | offset = new_offset 67 | 68 | return cls(**values), offset 69 | 70 | def __eq__(self, other): 71 | """ 72 | `Part` instances are equal if all of their sub-parts are also equal. 73 | """ 74 | try: 75 | return all([ 76 | getattr(self, part_name) == getattr(other, part_name) 77 | for part_name, part_class in self.parts 78 | ]) 79 | except AttributeError: 80 | return False 81 | 82 | def __repr__(self): 83 | 84 | def subpart_string(part_info): 85 | part_name, part_class = part_info 86 | 87 | if not part_class.__name__.startswith("ArrayOf"): 88 | return "%s=%s" % (part_name, getattr(self, part_name, None)) 89 | 90 | return "%s=[%s]" % ( 91 | part_name, 92 | ", ".join([ 93 | str(item) for item in getattr(self, part_name, []) 94 | ]) 95 | ) 96 | 97 | return "%s(%s)" % ( 98 | self.__class__.__name__, 99 | ", ".join([subpart_string(part) for part in self.parts]) 100 | ) 101 | -------------------------------------------------------------------------------- /kiel/protocol/primitives.py: -------------------------------------------------------------------------------- 1 | import struct 2 | 3 | import six 4 | 5 | 6 | class Primitive(object): 7 | """ 8 | The most basic structure of the protocol. Subclassed, never used directly. 9 | 10 | Used as a building block for the various actually-used primitives outlined 11 | in the Kafka wire protcol docs: 12 | 13 | https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol 14 | """ 15 | fmt = None 16 | 17 | def __init__(self, value): 18 | self.value = value 19 | 20 | def render(self): 21 | """ 22 | Returns a two-element tuple with the ``struct`` format and list value. 23 | 24 | The value is wrapped in a list, as there are some primitives that deal 25 | with multiple values. Any caller of `render()` should expect a list. 26 | """ 27 | return self.fmt, [self.value] 28 | 29 | @classmethod 30 | def parse(cls, buff, offset): 31 | """ 32 | Given a buffer and offset, returns the parsed value and new offset. 33 | 34 | Uses the ``fmt`` class attribute to unpack the data from the buffer 35 | and determine the used up number of bytes. 36 | """ 37 | primitive_struct = struct.Struct("!" + cls.fmt) 38 | 39 | value = primitive_struct.unpack_from(buff, offset)[0] 40 | offset += primitive_struct.size 41 | 42 | return value, offset 43 | 44 | def __eq__(self, other): 45 | """ 46 | Basic equality method that tests equality of the ``value`` attributes. 47 | """ 48 | return self.value == other.value 49 | 50 | def __repr__(self): 51 | return "%s(%s)" % (self.__class__.__name__, self.value) 52 | 53 | 54 | class VariablePrimitive(Primitive): 55 | """ 56 | Base primitive for variable-length scalar primitives (strings and bytes). 57 | """ 58 | size_primitive = None 59 | 60 | def render(self): 61 | """ 62 | Returns the ``struct`` format and list of the size and value. 63 | 64 | The format is derived from the size primitive and the length of the 65 | resulting encoded value (e.g. the format for a string of 'foo' ends 66 | up as 'h3s'. 67 | 68 | .. note :: 69 | The value is expected to be string-able (wrapped in ``str()``) and is 70 | then encoded as UTF-8. 71 | """ 72 | size_format = self.size_primitive.fmt 73 | 74 | if self.value is None: 75 | return size_format, [-1] 76 | 77 | value = self.value 78 | 79 | if not isinstance(value, six.binary_type): 80 | if not isinstance(value, six.string_types): 81 | value = str(value) 82 | 83 | value = value.encode("utf-8") 84 | 85 | size = len(value) 86 | 87 | fmt = "%s%ds" % (size_format, size) 88 | 89 | return fmt, [size, value] 90 | 91 | @classmethod 92 | def parse(cls, buff, offset): 93 | """ 94 | Given a buffer and offset, returns the parsed value and new offset. 95 | 96 | Parses the ``size_primitive`` first to determine how many more bytes to 97 | consume to extract the value. 98 | """ 99 | size, offset = cls.size_primitive.parse(buff, offset) 100 | if size == -1: 101 | return None, offset 102 | 103 | var_struct = struct.Struct("!%ds" % size) 104 | 105 | value = var_struct.unpack_from(buff, offset)[0] 106 | 107 | try: 108 | value = value.decode("utf-8") 109 | except UnicodeDecodeError: 110 | pass 111 | 112 | offset += var_struct.size 113 | 114 | return value, offset 115 | 116 | 117 | class Int8(Primitive): 118 | """ 119 | Represents an 8-bit signed integer. 120 | """ 121 | fmt = "b" 122 | 123 | 124 | class Int16(Primitive): 125 | """ 126 | Represents an 16-bit signed integer. 127 | """ 128 | fmt = "h" 129 | 130 | 131 | class Int32(Primitive): 132 | """ 133 | Represents an 32-bit signed integer. 134 | """ 135 | fmt = "i" 136 | 137 | 138 | class Int64(Primitive): 139 | """ 140 | Represents an 64-bit signed integer. 141 | """ 142 | fmt = "q" 143 | 144 | 145 | class String(VariablePrimitive): 146 | """ 147 | Represents a string value, length denoted by a 16-bit signed integer. 148 | """ 149 | size_primitive = Int16 150 | 151 | def __repr__(self): 152 | return repr(self.value) 153 | 154 | 155 | class Bytes(VariablePrimitive): 156 | """ 157 | Represents a bytestring value, length denoted by a 32-bit signed integer. 158 | """ 159 | size_primitive = Int32 160 | 161 | 162 | class Array(Primitive): 163 | """ 164 | Represents an array of any arbitrary `Primitive` or ``Part``. 165 | 166 | Not used directly but rather by its ``of()`` classmethod to denote an 167 | ``Array.of()``. 168 | """ 169 | item_class = None 170 | 171 | @classmethod 172 | def of(cls, part_class): 173 | """ 174 | Creates a new class with the ``item_class`` attribute properly set. 175 | """ 176 | copy = type( 177 | "ArrayOf%s" % part_class.__name__, 178 | cls.__bases__, dict(cls.__dict__) 179 | ) 180 | copy.item_class = part_class 181 | 182 | return copy 183 | 184 | def render(self): 185 | """ 186 | Creates a composite ``struct`` format and the data to render with it. 187 | 188 | The format and data are prefixed with a 32-bit integer denoting the 189 | number of elements, after which each of the items in the array value 190 | are ``render()``-ed and added to the format and data as well. 191 | """ 192 | value = self.value 193 | if value is None: 194 | value = [] 195 | 196 | fmt = [Int32.fmt] 197 | data = [len(value)] 198 | 199 | for item_value in value: 200 | if issubclass(self.item_class, Primitive): 201 | item = self.item_class(item_value) 202 | else: 203 | item = item_value 204 | 205 | item_format, item_data = item.render() 206 | fmt.extend(item_format) 207 | data.extend(item_data) 208 | 209 | return "".join(fmt), data 210 | 211 | @classmethod 212 | def parse(cls, buff, offset): 213 | """ 214 | Parses a raw buffer at offset and returns the resulting array value. 215 | 216 | Starts off by `parse()`-ing the 32-bit element count, followed by 217 | parsing items out of the buffer "count" times. 218 | """ 219 | count, offset = Int32.parse(buff, offset) 220 | 221 | values = [] 222 | for _ in range(count): 223 | value, new_offset = cls.item_class.parse(buff, offset) 224 | 225 | values.append(value) 226 | offset = new_offset 227 | 228 | return values, offset 229 | 230 | def __repr__(self): 231 | return "[%s]" % ", ".join(map(repr, self.value)) 232 | -------------------------------------------------------------------------------- /kiel/protocol/produce.py: -------------------------------------------------------------------------------- 1 | from .part import Part 2 | from .request import Request 3 | from .response import Response 4 | from .messages import MessageSet 5 | from .primitives import Int16, Int32, Int64, Array, String 6 | 7 | 8 | api_name = "produce" 9 | 10 | __all__ = [ 11 | "ProduceRequest", 12 | "TopicRequest", 13 | "PartitionRequest", 14 | "ProduceResponse", 15 | "TopicResponse", 16 | "PartitionResponse", 17 | ] 18 | 19 | 20 | class PartitionRequest(Part): 21 | """ 22 | :: 23 | 24 | PartitionRequest => 25 | partition_id => Int32 26 | message_set => MessageSet 27 | """ 28 | parts = ( 29 | ("partition_id", Int32), 30 | ("message_set", MessageSet), 31 | ) 32 | 33 | 34 | class TopicRequest(Part): 35 | """ 36 | :: 37 | 38 | TopicRequest => 39 | name => String 40 | partitions => [PartitionRequest] 41 | """ 42 | parts = ( 43 | ("name", String), 44 | ("partitions", Array.of(PartitionRequest)), 45 | ) 46 | 47 | 48 | class ProduceRequest(Request): 49 | """ 50 | :: 51 | 52 | ProduceRequest => 53 | required_acs => Int16 54 | timeout => Int32 55 | topics => [TopicRequest] 56 | """ 57 | api = "produce" 58 | 59 | parts = ( 60 | ("required_acks", Int16), 61 | ("timeout", Int32), 62 | ("topics", Array.of(TopicRequest)), 63 | ) 64 | 65 | 66 | class PartitionResponse(Part): 67 | """ 68 | :: 69 | 70 | PartitionResponse => 71 | partition_id => Int32 72 | error_code => Int16 73 | offset => Int64 74 | """ 75 | parts = ( 76 | ("partition_id", Int32), 77 | ("error_code", Int16), 78 | ("offset", Int64), 79 | ) 80 | 81 | 82 | class TopicResponse(Part): 83 | """ 84 | :: 85 | 86 | TopicResponse => 87 | name => String 88 | partitions => [PartitionResponse] 89 | """ 90 | parts = ( 91 | ("name", String), 92 | ("partitions", Array.of(PartitionResponse)), 93 | ) 94 | 95 | 96 | class ProduceResponse(Response): 97 | """ 98 | :: 99 | 100 | ProduceResponse => 101 | topics => [TopicResponse] 102 | """ 103 | api = "produce" 104 | 105 | parts = ( 106 | ("topics", Array.of(TopicResponse)), 107 | ) 108 | -------------------------------------------------------------------------------- /kiel/protocol/request.py: -------------------------------------------------------------------------------- 1 | import binascii 2 | import hashlib 3 | import os 4 | import socket 5 | import struct 6 | 7 | from six import BytesIO 8 | 9 | from kiel.constants import CLIENT_ID, API_VERSION, API_KEYS 10 | 11 | from .part import Part 12 | from .primitives import Int16, Int32, String 13 | 14 | 15 | machine_hash = hashlib.md5() 16 | machine_hash.update(socket.gethostname().encode("UTF-8")) 17 | machine_bytes = machine_hash.digest()[0:4] 18 | 19 | #: Seed value for correlation IDs, based on the machine name and PID 20 | last_id = (int(binascii.hexlify(machine_bytes), 16) + os.getpid() & 0xffffff) 21 | 22 | 23 | def generate_correlation_id(): 24 | """ 25 | Creates a new ``correlation_id`` for requests. 26 | 27 | Increments the ``last_id`` value so each generated ID is unique for 28 | this machine and process. 29 | """ 30 | global last_id 31 | 32 | last_id += 1 33 | 34 | return last_id 35 | 36 | 37 | class Request(Part): 38 | """ 39 | Base class for all requests sent to brokers. 40 | 41 | A specialized subclass of ``Part`` with attributes for correlating 42 | responses and prefacing payloads with client/api metadata. 43 | """ 44 | api = None 45 | 46 | def __init__(self, **kwargs): 47 | super(Request, self).__init__(**kwargs) 48 | 49 | self.client_id = CLIENT_ID 50 | self.api_key = API_KEYS[self.api] 51 | self.api_version = API_VERSION 52 | self.correlation_id = generate_correlation_id() 53 | 54 | def serialize(self): 55 | """ 56 | Returns a bytesring representation of the request instance. 57 | 58 | Prefaces the output with certain information:: 59 | 60 | api_key => Int16 61 | api_version => Int16 62 | correlation_id => Int32 63 | client_id => String 64 | 65 | Since this is a ``Part`` subclass the rest is a matter of 66 | appending the result of a ``render()`` call. 67 | """ 68 | buff = BytesIO() 69 | 70 | preamble_parts = ( 71 | ("api_key", Int16), 72 | ("api_version", Int16), 73 | ("correlation_id", Int32), 74 | ("client_id", String), 75 | ) 76 | 77 | preamble_format, data = self.render(preamble_parts) 78 | 79 | payload_format, payload_data = self.render() 80 | 81 | fmt = "".join(["!", preamble_format, payload_format]) 82 | data.extend(payload_data) 83 | 84 | buff.write(struct.pack(fmt, *data)) 85 | 86 | return buff.getvalue() 87 | -------------------------------------------------------------------------------- /kiel/protocol/response.py: -------------------------------------------------------------------------------- 1 | from .part import Part 2 | 3 | 4 | class Response(Part): 5 | """ 6 | Base class for all api response classes. 7 | 8 | A simple class, has only an ``api`` attribute expected to be defined by 9 | subclasses, and a `deserialize()` classmethod. 10 | """ 11 | api = None 12 | 13 | @classmethod 14 | def deserialize(cls, raw_bytes): 15 | """ 16 | Deserializes the given raw bytes into an instance. 17 | 18 | Since this is a subclass of ``Part`` but a top-level one (i.e. no other 19 | subclass of ``Part`` would have a ``Response`` as a part) this merely 20 | has to parse the raw bytes and discard the resulting offset. 21 | """ 22 | instance, _ = cls.parse(raw_bytes, offset=0) 23 | 24 | return instance 25 | -------------------------------------------------------------------------------- /kiel/protocol/sync_group.py: -------------------------------------------------------------------------------- 1 | from .part import Part 2 | from .request import Request 3 | from .response import Response 4 | from .primitives import Array, String, Bytes, Int16, Int32 5 | 6 | 7 | api_name = "sync_group" 8 | 9 | 10 | __all__ = [ 11 | "SyncGroupRequest", 12 | "SyncGroupResponse", 13 | "MemberAssignment", 14 | "TopicAssignment", 15 | ] 16 | 17 | 18 | class TopicAssignment(Part): 19 | """ 20 | :: 21 | 22 | TopicAssignment => 23 | name => String 24 | partitions => [Int32] 25 | """ 26 | parts = ( 27 | ("name", String), 28 | ("partitions", Array.of(Int32)), 29 | ) 30 | 31 | 32 | class Assignment(Part): 33 | """ 34 | :: 35 | 36 | Assignment => 37 | version => Int16 38 | topics => [TopicAssignment] 39 | user_data => Bytes 40 | """ 41 | parts = ( 42 | ("version", Int16), 43 | ("topics", Array.of(TopicAssignment)), 44 | ("user_data", Bytes), 45 | ) 46 | 47 | 48 | class MemberAssignment(Part): 49 | """ 50 | :: 51 | 52 | MemberAssignment => 53 | member_id => String 54 | assignment => Assignment 55 | """ 56 | parts = ( 57 | ("member_id", String), 58 | ("assignment", Assignment), 59 | ) 60 | 61 | 62 | class SyncGroupRequest(Request): 63 | """ 64 | :: 65 | 66 | SyncGroupRequest => 67 | group_id => String 68 | generation_id => Int32 69 | member_id => String 70 | assignments => [MemberAssignment] 71 | """ 72 | api = "sync_group" 73 | 74 | parts = ( 75 | ("group_id", String), 76 | ("generation_id", Int32), 77 | ("member_id", String), 78 | ("assignments", Array.of(MemberAssignment)), 79 | ) 80 | 81 | 82 | class SyncGroupResponse(Response): 83 | """ 84 | :: 85 | 86 | SyncGroupResponse => 87 | error_code => Int16 88 | assignments => [MemberAssignment] 89 | """ 90 | api = "sync_group" 91 | 92 | parts = ( 93 | ("error_code", Int16), 94 | ("assignments", Array.of(MemberAssignment)), 95 | ) 96 | -------------------------------------------------------------------------------- /kiel/zookeeper/__init__.py: -------------------------------------------------------------------------------- 1 | from .party import Party # noqa 2 | from .shared_set import SharedSet # noqa 3 | -------------------------------------------------------------------------------- /kiel/zookeeper/allocator.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import threading 3 | 4 | import six 5 | from tornado import concurrent 6 | from kazoo import client 7 | 8 | from kiel.zookeeper import Party, SharedSet 9 | from kiel.events import wait_on_event 10 | 11 | 12 | log = logging.getLogger(__name__) 13 | 14 | 15 | class PartitionAllocator(object): 16 | """ 17 | Helper class that uses Zookeeper to allocate partitions among consumers. 18 | 19 | Uses a ``Party`` instance to represent the group membership and a 20 | ``SharedSet`` instance to handle the set of partitions to be allocated. 21 | 22 | The ``allocator_fn`` argument is a callable that is passed a sorted list 23 | of members and partitions whenever change to either happens. 24 | 25 | .. note:: 26 | 27 | It is *incredibly* important that the ``allocator_fn`` be stable! All 28 | all of the instances of the allocator must agree on what partitions go 29 | where or all hell will break loose. 30 | """ 31 | def __init__( 32 | self, 33 | zk_hosts, 34 | group_name, 35 | consumer_name, 36 | allocator_fn, 37 | on_rebalance=None 38 | ): 39 | self.zk_hosts = zk_hosts 40 | self.group_name = group_name 41 | self.consumer_name = consumer_name 42 | 43 | self.allocator_fn = allocator_fn 44 | self.on_rebalance = on_rebalance 45 | 46 | self.conn = client.KazooClient(hosts=",".join(self.zk_hosts)) 47 | self.connected = threading.Event() 48 | 49 | self.members = set() 50 | self.members_collected = threading.Event() 51 | self.party = Party( 52 | self.conn, self.consumer_name, self.members_path, 53 | on_change=self.on_group_members_change 54 | ) 55 | 56 | self.partitions = set() 57 | self.partitions_collected = threading.Event() 58 | self.shared_set = SharedSet( 59 | self.conn, self.partition_path, 60 | on_change=self.on_partition_change 61 | ) 62 | 63 | self.mapping = {} 64 | 65 | @property 66 | def allocation(self): 67 | """ 68 | Property representing the topics allocated for a specific consumer. 69 | """ 70 | return self.mapping[self.consumer_name] 71 | 72 | @property 73 | def members_path(self): 74 | """ 75 | Property representing the znode path of the member ``Party``. 76 | """ 77 | return "/kiel/groups/%s/members" % self.group_name 78 | 79 | @property 80 | def partition_path(self): 81 | """ 82 | Property representing the znode path of the ``SharedSet``. 83 | """ 84 | return "/kiel/groups/%s/partitions" % self.group_name 85 | 86 | def start(self, seed_partitions): 87 | """ 88 | Connects to zookeeper and collects member and partition data. 89 | 90 | Leverages the `create_attempt()` and ``wait_on_event()`` helper 91 | functions in order to bridge the gap between threaded async 92 | and tornado async. 93 | 94 | Returns a ``Future`` instance once done so that coroutine 95 | methods may yield to it. 96 | """ 97 | log.info("Starting partitioner for group '%s'", self.group_name) 98 | f = concurrent.Future() 99 | 100 | attempt = create_attempter(f) 101 | 102 | attempt(self.connect) 103 | wait_on_event(self.connected) 104 | 105 | attempt(self.party.start) 106 | attempt(self.shared_set.start) 107 | attempt(self.party.join) 108 | attempt(self.add_partitions, seed_partitions) 109 | 110 | if f.done(): 111 | return f 112 | 113 | wait_on_event(self.members_collected) 114 | wait_on_event(self.partitions_collected) 115 | 116 | f.set_result(None) 117 | 118 | return f 119 | 120 | def stop(self): 121 | """ 122 | Signals the ``Party`` that this member has left and closes connections. 123 | 124 | This method returns a ``Future`` so that it can be yielded to in 125 | coroutines. 126 | """ 127 | log.info("Stopping partitioner for group '%s'", self.group_name) 128 | f = concurrent.Future() 129 | 130 | attempt = create_attempter(f) 131 | 132 | attempt(self.party.leave) 133 | attempt(self.conn.stop) 134 | attempt(self.conn.close) 135 | 136 | if not f.done(): 137 | f.set_result(None) 138 | 139 | return f 140 | 141 | def connect(self): 142 | """ 143 | Establishes the kazoo connection and registers the connection handler. 144 | """ 145 | self.conn.add_listener(self.handle_connection_change) 146 | self.conn.start_async() 147 | 148 | def handle_connection_change(self, state): 149 | """ 150 | Handler for changes to the kazoo client's connection's state. 151 | 152 | Responsible for updating the ``connected`` threading event such that 153 | it is only set if/when the kazoo connection is live. 154 | """ 155 | if state == client.KazooState.LOST: 156 | log.info("Zookeeper session lost!") 157 | self.connected.clear() 158 | elif state == client.KazooState.SUSPENDED: 159 | log.info("Zookeeper connection suspended!") 160 | self.connected.clear() 161 | else: 162 | log.info("Zookeeper connection (re)established.") 163 | self.connected.set() 164 | 165 | def on_group_members_change(self, new_members): 166 | """ 167 | Callback for when membership of the ``Party`` changes. 168 | 169 | Sets the ``self.members`` attribute if membership actually 170 | changed, calling `rebalance()` if so. 171 | 172 | Sets the ``members_collected`` threading event when done. 173 | """ 174 | log.info("Consumer group '%s' members changed.", self.group_name) 175 | 176 | new_members = set(new_members) 177 | if new_members != self.members: 178 | self.members = new_members 179 | self.rebalance() 180 | 181 | self.members_collected.set() 182 | 183 | def on_partition_change(self, new_partitions): 184 | """ 185 | Callback for when data in the ``SharedSet`` changes. 186 | 187 | If ``new_partitions`` is ``None`` it means we're the first to 188 | use the ``SharedSet`` so we populate it with our known partitions. 189 | 190 | If the data has been altered in any way the ``self.partitions`` 191 | attribute is updated and `rebalance()` called. 192 | 193 | Sets the `partitions_collected` threading event when done. 194 | """ 195 | if new_partitions is None: 196 | self.conn.create(self.partition_path, value=self.partitions) 197 | return 198 | 199 | if new_partitions != self.partitions: 200 | self.partitions = new_partitions 201 | self.rebalance() 202 | 203 | self.partitions_collected.set() 204 | 205 | def add_partitions(self, partitions): 206 | """ 207 | Ensures that the ``SharedSet`` contains the given partitions. 208 | 209 | The ``partitions`` argument should be a dictionary keyed on 210 | topic names who's values are lists of associated partition IDs. 211 | """ 212 | new_partitions = set() 213 | for topic, partition_ids in six.iteritems(partitions): 214 | new_partitions.update(set([ 215 | ":".join([topic, str(partition_id)]) 216 | for partition_id in partition_ids 217 | ])) 218 | 219 | log.info( 220 | "Attempting to add %d partitions to consumer group '%s'", 221 | len(new_partitions), self.group_name 222 | ) 223 | 224 | wait_on_event(self.connected) 225 | 226 | self.shared_set.add_items(new_partitions) 227 | 228 | def remove_partitions(self, old_partitions): 229 | """ 230 | Ensures that the ``SharedSet`` does *not* contain the given partitions. 231 | 232 | The ``partitions`` argument should be a dictionary keyed on 233 | topic names who's values are lists of associated partition IDs. 234 | """ 235 | log.info( 236 | "Attempting to remove %d partitions from consumer group '%s'", 237 | len(old_partitions), self.group_name 238 | ) 239 | wait_on_event(self.connected) 240 | 241 | self.shared_set.remove_items(set([ 242 | ":".join([topic, partition_id]) 243 | for topic, partition_id in six.iteritems(old_partitions) 244 | ])) 245 | 246 | def rebalance(self): 247 | """ 248 | Callback fired when membership or partition data changes. 249 | 250 | The ``allocator_fn`` is called on the new ``self.members`` and 251 | ``self.partitions`` lists to determine the mapping of members 252 | to partitions. 253 | 254 | If an ``on_rebalance`` callback is configured it is called once 255 | done. 256 | """ 257 | log.info("Rebalancing partitions for group '%s'", self.group_name) 258 | members = sorted(self.members) 259 | partitions = sorted(self.partitions) 260 | 261 | self.mapping = self.allocator_fn(members, partitions) 262 | 263 | for topic in self.allocation: 264 | log.debug( 265 | "Allocation for topic '%s': partitions %s", 266 | topic, ", ".join(map(str, self.allocation[topic])) 267 | ) 268 | 269 | if self.on_rebalance: 270 | self.on_rebalance() 271 | 272 | 273 | def create_attempter(f): 274 | """ 275 | Helper method for methods that call others and use ``Future`` directly. 276 | 277 | Returns a wrapper function that will set the given ``Future``'s exception 278 | state if the inner function call fails. 279 | """ 280 | def attempt(fn, *args, **kwargs): 281 | if f.done(): 282 | return 283 | 284 | try: 285 | fn(*args, **kwargs) 286 | except Exception as e: 287 | f.set_exception(e) 288 | 289 | return attempt 290 | -------------------------------------------------------------------------------- /kiel/zookeeper/party.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from kazoo import exceptions 4 | 5 | 6 | log = logging.getLogger(__name__) 7 | 8 | 9 | class Party(object): 10 | """ 11 | Represents a "party" recipe in Zookeeper. 12 | 13 | A party is a recipe where various clients "join" or "leave" (a loss of 14 | connection constituting a "leave") and each member is notified when 15 | membership changes. 16 | 17 | This is used in the Zookeeper-based ``GroupedConsumer`` in order to 18 | determine who and how many hosts to divvy up partitions to. 19 | """ 20 | def __init__(self, client, member_name, path, on_change): 21 | self.client = client 22 | 23 | self.member_name = member_name 24 | self.path = path 25 | 26 | self.on_change = on_change 27 | 28 | def start(self): 29 | """ 30 | Simple method that sets up the membership change callback. 31 | 32 | Expected to be called by potential members before the `join()` method. 33 | """ 34 | self.client.ensure_path(self.path) 35 | 36 | @self.client.ChildrenWatch(self.path) 37 | def member_change(members): 38 | self.on_change(members) 39 | 40 | def join(self): 41 | """ 42 | Establishes the client as a "member" of the party. 43 | 44 | This is done by creating an ephemeral child node of the party's root 45 | path unique to this member. If the path of the child node exists but 46 | this client isn't the owner, the node is re-created in a transaction 47 | to establish ownership. 48 | 49 | .. note:: 50 | It is important that the child node is *ephemeral* so that lost 51 | connections are indistinguishable from "leaving" the party. 52 | """ 53 | log.info("Joining %s party as %s", self.path, self.member_name) 54 | 55 | path = "/".join([self.path, self.member_name]) 56 | 57 | znode = self.client.exists(path) 58 | 59 | if not znode: 60 | log.debug("ZNode at %s does not exist, creating new one.", path) 61 | self.client.create(path, ephemeral=True, makepath=True) 62 | elif znode.owner_session_id != self.client.client_id[0]: 63 | log.debug("ZNode at %s not owned by us, recreating.", path) 64 | txn = self.client.transaction() 65 | txn.delete(path) 66 | txn.create(path, ephemeral=True) 67 | txn.commit() 68 | 69 | def leave(self): 70 | """ 71 | Simple method that marks the client as having left the party. 72 | 73 | A simple matter of deleting the ephemeral child node associated with 74 | the client. If the node is already deleted this becomes a no-op. 75 | """ 76 | log.info("Leaving %s party as %s", self.path, self.member_name) 77 | path = "/".join([self.path, self.member_name]) 78 | 79 | try: 80 | self.client.delete(path) 81 | except exceptions.NoNodeError: 82 | pass 83 | -------------------------------------------------------------------------------- /kiel/zookeeper/shared_set.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | 4 | 5 | log = logging.getLogger(__name__) 6 | 7 | 8 | class SharedSet(object): 9 | """ 10 | A simple "set" construct in Zookeeper with locking and change callbacks. 11 | 12 | Used by the Zookeeper-based ``GroupedConsumer`` to represent the shared set 13 | of topic partitions divvied up among the group. 14 | """ 15 | def __init__(self, client, path, on_change): 16 | self.client = client 17 | 18 | self.path = path 19 | 20 | self.on_change = on_change 21 | 22 | @property 23 | def lock_path(self): 24 | """ 25 | Property representing the znode path of the shared lock. 26 | """ 27 | return self.path + "/lock" 28 | 29 | def start(self): 30 | """ 31 | Creates the set's znode path and attaches the data-change callback. 32 | """ 33 | self.client.ensure_path(self.path) 34 | 35 | @self.client.DataWatch(self.path) 36 | def set_changed(data, _): 37 | if data is not None: 38 | data = self.deserialize(data) 39 | 40 | self.on_change(data) 41 | 42 | def add_items(self, new_items): 43 | """ 44 | Updates the shared set's data with the given new items added. 45 | 46 | If all of the given items are already present, no data is updated. 47 | 48 | Works entirely behind a zookeeper lock to combat resource contention 49 | among sharers of the set. 50 | """ 51 | with self.client.Lock(self.lock_path): 52 | existing_items = self.deserialize(self.client.get(self.path)[0]) 53 | if not existing_items: 54 | existing_items = set() 55 | 56 | if new_items.issubset(existing_items): 57 | return 58 | 59 | existing_items.update(new_items) 60 | 61 | self.client.set( 62 | self.path, self.serialize(existing_items) 63 | ) 64 | 65 | def remove_items(self, old_items): 66 | """ 67 | Updates the shared set's data with the given items removed. 68 | 69 | If none of the given items are present, no data is updated. 70 | 71 | Works entirely behind a zookeeper lock to combat resource contention 72 | among sharers of the set. 73 | """ 74 | with self.client.Lock(self.lock_path): 75 | existing_items = self.deserialize(self.client.get(self.path)[0]) 76 | 77 | if old_items.isdisjoint(existing_items): 78 | return 79 | 80 | existing_items.difference_update(old_items) 81 | 82 | self.client.set(self.path, self.serialize(existing_items)) 83 | 84 | def serialize(self, data): 85 | """ 86 | Serializes the set data as a list in a JSON string. 87 | """ 88 | return json.dumps(list(data)) 89 | 90 | def deserialize(self, data): 91 | """ 92 | Parses a given JSON string as a list, converts to a python set. 93 | """ 94 | return set(json.loads(data or "[]")) 95 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [build_sphinx] 2 | source_dir = docs 3 | build_dir = .docbuild 4 | all_files = 1 5 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | from kiel import __version__ 4 | 5 | 6 | setup( 7 | name="kiel", 8 | version=__version__, 9 | description="Kafka client for tornado async applications.", 10 | author="William Glass", 11 | author_email="william.glass@gmail.com", 12 | url="http://github.com/wglass/kiel", 13 | license="Apache", 14 | keywords=["kafka", "tornado", "async"], 15 | packages=find_packages(exclude=["tests", "tests.*"]), 16 | install_requires=[ 17 | "tornado>=4.1", 18 | "kazoo", 19 | "six", 20 | ], 21 | extras_require={ 22 | "snappy": [ 23 | "python-snappy" 24 | ] 25 | }, 26 | tests_require=[ 27 | "nose", 28 | "mock", 29 | "coverage", 30 | "flake8", 31 | ], 32 | classifiers=[ 33 | "Development Status :: 4 - Beta", 34 | "Intended Audience :: Developers", 35 | "License :: OSI Approved :: Apache Software License", 36 | "Operating System :: MacOS", 37 | "Operating System :: MacOS :: MacOS X", 38 | "Operating System :: POSIX", 39 | "Operating System :: POSIX :: Linux", 40 | "Operating System :: Unix", 41 | "Programming Language :: Python", 42 | "Programming Language :: Python :: 2.7", 43 | "Programming Language :: Python :: 3.4", 44 | "Programming Language :: Python :: 3.5", 45 | "Programming Language :: Python :: Implementation", 46 | "Programming Language :: Python :: Implementation :: CPython", 47 | "Programming Language :: Python :: Implementation :: PyPy", 48 | "Topic :: Software Development", 49 | "Topic :: Software Development :: Libraries", 50 | "Topic :: Software Development :: Libraries :: Python Modules", 51 | ], 52 | ) 53 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wglass/kiel/12664fae522d95536c977bc3868951e7dce3daa0/tests/__init__.py -------------------------------------------------------------------------------- /tests/cases/__init__.py: -------------------------------------------------------------------------------- 1 | from .async import AsyncTestCase # noqa 2 | from .client import ClientTestCase # noqa 3 | -------------------------------------------------------------------------------- /tests/cases/async.py: -------------------------------------------------------------------------------- 1 | from tornado import testing, concurrent 2 | 3 | 4 | class AsyncTestCase(testing.AsyncTestCase): 5 | 6 | def future_value(self, value): 7 | f = concurrent.Future() 8 | f.set_result(value) 9 | return f 10 | 11 | def future_error(self, exc, instance=None, tb=None): 12 | f = concurrent.Future() 13 | if instance is None or tb is None: 14 | f.set_exception(exc) 15 | else: 16 | f.set_exc_info((exc, instance, tb)) 17 | 18 | return f 19 | -------------------------------------------------------------------------------- /tests/cases/client.py: -------------------------------------------------------------------------------- 1 | import collections 2 | 3 | import six 4 | from tornado import gen 5 | from mock import patch, Mock 6 | 7 | from .async import AsyncTestCase 8 | 9 | 10 | class ClientTestCase(AsyncTestCase): 11 | 12 | def setUp(self): 13 | super(ClientTestCase, self).setUp() 14 | 15 | self.topic_leaders = {} 16 | self.mock_brokers = {} 17 | 18 | self.responses = collections.defaultdict(dict) 19 | self.requests_by_broker = collections.defaultdict(list) 20 | 21 | cluster_patcher = patch("kiel.clients.client.Cluster") 22 | MockCluster = cluster_patcher.start() 23 | self.addCleanup(cluster_patcher.stop) 24 | 25 | cluster = MockCluster.return_value 26 | cluster.topics = collections.defaultdict(list) 27 | cluster.leaders = collections.defaultdict(dict) 28 | 29 | def check_known_broker(broker_id): 30 | return broker_id in self.mock_brokers 31 | 32 | cluster.__contains__.side_effect = check_known_broker 33 | 34 | def get_mock_broker(broker_id): 35 | return self.mock_brokers[broker_id] 36 | 37 | cluster.__getitem__.side_effect = get_mock_broker 38 | 39 | def iterate_broker_ids(): 40 | return iter(self.mock_brokers) 41 | 42 | cluster.__iter__.side_effect = iterate_broker_ids 43 | 44 | def get_leader(topic, partition): 45 | return cluster.leaders[topic][partition] 46 | 47 | cluster.get_leader.side_effect = get_leader 48 | 49 | @gen.coroutine 50 | def refresh_metadata(): 51 | cluster.topics.clear() 52 | cluster.leaders.clear() 53 | for topic, leaders in six.iteritems(self.topic_leaders): 54 | for partition in list(range(len(leaders))): 55 | cluster.topics[topic].append(partition) 56 | cluster.leaders[topic][partition] = leaders[partition] 57 | 58 | cluster.start.side_effect = refresh_metadata 59 | cluster.heal.side_effect = refresh_metadata 60 | cluster.stop.return_value = self.future_value(None) 61 | 62 | def add_broker(self, host, port, broker_id): 63 | broker = Mock() 64 | 65 | @gen.coroutine 66 | def mock_send(request): 67 | self.requests_by_broker[broker_id].append(request) 68 | response = self.responses[broker_id][request.api].pop(0) 69 | if isinstance(response, Exception): 70 | raise response 71 | response.correlation_id = request.correlation_id 72 | raise gen.Return(response) 73 | 74 | broker.send.side_effect = mock_send 75 | 76 | self.mock_brokers[broker_id] = broker 77 | 78 | def add_topic(self, topic_name, leaders): 79 | self.topic_leaders[topic_name] = leaders 80 | 81 | def set_responses(self, broker_id, api, responses): 82 | self.responses[broker_id][api] = responses 83 | 84 | def assert_sent(self, broker_id, request): 85 | for sent in self.requests_by_broker[broker_id]: 86 | if request == sent: 87 | return 88 | 89 | raise AssertionError( 90 | "Request not sent to broker %s: %s" % (broker_id, request) 91 | ) 92 | -------------------------------------------------------------------------------- /tests/clients/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wglass/kiel/12664fae522d95536c977bc3868951e7dce3daa0/tests/clients/__init__.py -------------------------------------------------------------------------------- /tests/clients/test_client.py: -------------------------------------------------------------------------------- 1 | from tests import cases 2 | 3 | from mock import Mock 4 | from tornado import testing, iostream 5 | 6 | from kiel import exc 7 | from kiel.clients import client 8 | 9 | 10 | class ClientTests(cases.ClientTestCase): 11 | 12 | def setUp(self): 13 | super(ClientTests, self).setUp() 14 | 15 | self.add_broker("kafka01", 9202, broker_id=1) 16 | self.add_broker("kafka02", 9202, broker_id=8) 17 | 18 | def test_instantiation(self): 19 | c = client.Client(["kafka01", "kafka02"]) 20 | 21 | self.assertEqual(c.closing, False) 22 | self.assertEqual(c.heal_cluster, False) 23 | 24 | @testing.gen_test 25 | def test_wind_down_must_be_implemented(self): 26 | c = client.Client([]) 27 | 28 | error = None 29 | try: 30 | yield c.wind_down() 31 | except Exception as e: 32 | error = e 33 | 34 | self.assertIsInstance(error, NotImplementedError) 35 | 36 | @testing.gen_test 37 | def test_connect_starts_cluster(self): 38 | c = client.Client([]) 39 | 40 | yield c.connect() 41 | 42 | c.cluster.start.assert_called_once_with() 43 | 44 | @testing.gen_test 45 | def test_close(self): 46 | c = client.Client([]) 47 | c.wind_down = Mock() 48 | c.wind_down.return_value = self.future_value(None) 49 | 50 | yield c.close() 51 | 52 | self.assertEqual(c.closing, True) 53 | c.wind_down.assert_called_once_with() 54 | c.cluster.stop.assert_called_once_with() 55 | 56 | @testing.gen_test 57 | def test_send_connection_error(self): 58 | self.set_responses( 59 | broker_id=1, api="metadata", 60 | responses=[ 61 | exc.BrokerConnectionError("kafka01", 1234) 62 | ] 63 | ) 64 | 65 | c = client.Client(["kafka01", "kafka02"]) 66 | 67 | request = Mock(api="metadata") 68 | 69 | results = yield c.send({1: request}) 70 | 71 | self.assert_sent(1, request) 72 | 73 | c.cluster.heal.assert_called_once_with() 74 | 75 | self.assertEqual(results, {}) 76 | 77 | @testing.gen_test 78 | def test_send_stream_closed(self): 79 | self.set_responses( 80 | broker_id=1, api="metadata", 81 | responses=[ 82 | iostream.StreamClosedError(), 83 | ] 84 | ) 85 | 86 | c = client.Client(["kafka01", "kafka02"]) 87 | 88 | request = Mock(api="metadata") 89 | 90 | results = yield c.send({1: request}) 91 | 92 | self.assert_sent(1, request) 93 | 94 | self.assertEqual(c.cluster.heal.called, False) 95 | 96 | self.assertEqual(results, {}) 97 | 98 | @testing.gen_test 99 | def test_send_error_on_one_broker(self): 100 | metadata_response = Mock(api="group_coordinator") 101 | self.set_responses( 102 | broker_id=1, api="metadata", 103 | responses=[Exception()], 104 | ) 105 | self.set_responses( 106 | broker_id=8, api="group_coordinator", 107 | responses=[metadata_response] 108 | ) 109 | 110 | c = client.Client(["kafka01", "kafka02"]) 111 | c.handle_group_coordinator_response = Mock() 112 | 113 | request1 = Mock(api="metadata") 114 | request2 = Mock(api="group_coordinator") 115 | 116 | results = yield c.send({1: request1, 8: request2}) 117 | 118 | self.assertEqual( 119 | results, 120 | {8: c.handle_group_coordinator_response.return_value} 121 | ) 122 | 123 | self.assert_sent(1, request1) 124 | self.assert_sent(8, request2) 125 | 126 | c.cluster.heal.assert_called_once_with() 127 | 128 | @testing.gen_test 129 | def test_send_no_handler(self): 130 | self.set_responses( 131 | broker_id=8, api="produce", 132 | responses=[Mock(api="produce")], 133 | ) 134 | 135 | c = client.Client(["kafka01", "kafka02"]) 136 | 137 | error = None 138 | try: 139 | yield c.send({8: Mock(api="produce")}) 140 | except Exception as e: 141 | error = e 142 | 143 | self.assertIsInstance(error, exc.UnhandledResponseError) 144 | self.assertEqual(error.api, "produce") 145 | 146 | @testing.gen_test 147 | def test_send_with_handlers(self): 148 | metadata_response = Mock(api="group_coordinator") 149 | fetch_response = Mock(api="fetch") 150 | self.set_responses( 151 | broker_id=1, api="fetch", 152 | responses=[fetch_response], 153 | ) 154 | self.set_responses( 155 | broker_id=8, api="group_coordinator", 156 | responses=[metadata_response] 157 | ) 158 | 159 | c = client.Client(["kafka01", "kafka02"]) 160 | c.handle_group_coordinator_response = Mock() 161 | c.handle_fetch_response = Mock() 162 | 163 | request1 = Mock(api="fetch") 164 | request2 = Mock(api="group_coordinator") 165 | 166 | results = yield c.send({1: request1, 8: request2}) 167 | 168 | self.assertEqual( 169 | results, 170 | { 171 | 1: c.handle_fetch_response.return_value, 172 | 8: c.handle_group_coordinator_response.return_value 173 | } 174 | ) 175 | 176 | self.assert_sent(1, request1) 177 | self.assert_sent(8, request2) 178 | 179 | c.handle_group_coordinator_response.assert_called_once_with( 180 | metadata_response 181 | ) 182 | c.handle_fetch_response.assert_called_once_with(fetch_response) 183 | 184 | self.assertFalse(c.cluster.heal.called) 185 | 186 | @testing.gen_test 187 | def test_send_with_async_handlers(self): 188 | self.set_responses( 189 | broker_id=1, api="group_coordinator", 190 | responses=[Mock(api="group_coordinator")], 191 | ) 192 | self.set_responses( 193 | broker_id=8, api="fetch", 194 | responses=[Mock(api="fetch")], 195 | ) 196 | 197 | c = client.Client(["kafka01", "kafka02"]) 198 | c.handle_group_coordinator_response = Mock() 199 | c.handle_group_coordinator_response.return_value = self.future_value( 200 | "metadata handled!" 201 | ) 202 | c.handle_fetch_response = Mock() 203 | c.handle_fetch_response.return_value = self.future_value( 204 | "fetch handled!" 205 | ) 206 | 207 | results = yield c.send( 208 | {1: Mock(api="group_coordinator"), 8: Mock(api="fetch")} 209 | ) 210 | 211 | self.assertEqual( 212 | results, 213 | { 214 | 1: "metadata handled!", 215 | 8: "fetch handled!", 216 | } 217 | ) 218 | 219 | self.assertFalse(c.cluster.heal.called) 220 | 221 | @testing.gen_test 222 | def test_send_handler_sets_heal_flag(self): 223 | self.set_responses( 224 | broker_id=1, api="fetch", 225 | responses=[Mock(api="fetch")], 226 | ) 227 | self.set_responses( 228 | broker_id=8, api="offset", 229 | responses=[Mock(api="offset")], 230 | ) 231 | 232 | c = client.Client(["kafka01", "kafka02"]) 233 | c.handle_fetch_response = Mock() 234 | c.handle_offset_response = Mock() 235 | 236 | def handle_response(response): 237 | c.heal_cluster = True 238 | return "%s handled!" % response.api 239 | 240 | c.handle_fetch_response.side_effect = handle_response 241 | c.handle_offset_response.side_effect = handle_response 242 | 243 | results = yield c.send({1: Mock(api="fetch"), 8: Mock(api="offset")}) 244 | 245 | self.assertEqual( 246 | results, 247 | { 248 | 1: "fetch handled!", 249 | 8: "offset handled!", 250 | } 251 | ) 252 | 253 | c.cluster.heal.assert_called_once_with() 254 | -------------------------------------------------------------------------------- /tests/compression/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wglass/kiel/12664fae522d95536c977bc3868951e7dce3daa0/tests/compression/__init__.py -------------------------------------------------------------------------------- /tests/compression/test_gzip.py: -------------------------------------------------------------------------------- 1 | import json 2 | import unittest 3 | 4 | from kiel.compression import gzip 5 | 6 | 7 | class GZipCompressionTests(unittest.TestCase): 8 | 9 | def test_compression_is_stable(self): 10 | data = json.dumps({"foo": "bar", "blee": "bloo", "dog": "cat"}) 11 | data = data.encode("utf-8") 12 | 13 | data = gzip.compress(data) 14 | data = gzip.decompress(data) 15 | data = gzip.compress(data) 16 | data = gzip.decompress(data) 17 | 18 | self.assertEqual( 19 | json.loads(data.decode("utf-8")), 20 | {"foo": "bar", "blee": "bloo", "dog": "cat"} 21 | ) 22 | -------------------------------------------------------------------------------- /tests/compression/test_snappy.py: -------------------------------------------------------------------------------- 1 | import json 2 | import struct 3 | import unittest 4 | 5 | from mock import patch 6 | 7 | from kiel.compression import snappy 8 | 9 | 10 | @unittest.skipUnless(snappy.snappy_available, "requires python-snappy") 11 | class SnappyCompressionTests(unittest.TestCase): 12 | 13 | @patch.object(snappy, "snappy_available", False) 14 | def test_compress_runtime_error_if_snappy_unavailable(self): 15 | self.assertRaises( 16 | RuntimeError, 17 | snappy.compress, "foo" 18 | ) 19 | 20 | @patch.object(snappy, "snappy_available", False) 21 | def test_decompress_runtime_error_if_snappy_unavailable(self): 22 | self.assertRaises( 23 | RuntimeError, 24 | snappy.decompress, "foo" 25 | ) 26 | 27 | def test_compression_is_stable(self): 28 | data = json.dumps({"foo": "bar", "dog": "cat"}).encode("utf-8") 29 | 30 | data = snappy.compress(data) 31 | data = snappy.decompress(data) 32 | data = snappy.compress(data) 33 | data = snappy.decompress(data) 34 | 35 | self.assertEqual( 36 | json.loads(data.decode("utf-8")), 37 | {"foo": "bar", "dog": "cat"} 38 | ) 39 | 40 | def test_compression_includes_magic_header(self): 41 | data = json.dumps( 42 | {"foo": "bar", "blee": "bloo", "dog": "cat"}).encode("utf-8") 43 | data = snappy.compress(data) 44 | 45 | header = struct.unpack_from("!bccccccbii", data) 46 | 47 | self.assertEqual( 48 | header, 49 | (-126, b'S', b'N', b'A', b'P', b'P', b'Y', 0, 1, 1) 50 | ) 51 | -------------------------------------------------------------------------------- /tests/protocol/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wglass/kiel/12664fae522d95536c977bc3868951e7dce3daa0/tests/protocol/__init__.py -------------------------------------------------------------------------------- /tests/protocol/test_messages.py: -------------------------------------------------------------------------------- 1 | import json 2 | import unittest 3 | 4 | from kiel.protocol import messages 5 | 6 | 7 | class MessagesTests(unittest.TestCase): 8 | 9 | def test_message_repr(self): 10 | msg = messages.Message( 11 | crc=0, 12 | magic=0, 13 | attributes=0, 14 | key="foo", 15 | value=json.dumps({"bar": "bazz"}) 16 | ) 17 | 18 | self.assertEqual(repr(msg), 'foo => {"bar": "bazz"}') 19 | 20 | def test_messageset_repr(self): 21 | msg1 = messages.Message( 22 | crc=0, 23 | magic=0, 24 | attributes=0, 25 | key="foo", 26 | value=json.dumps({"bar": "bazz"}) 27 | ) 28 | msg2 = messages.Message( 29 | crc=0, 30 | magic=0, 31 | attributes=0, 32 | key="bar", 33 | value=json.dumps({"bwee": "bwoo"}) 34 | ) 35 | msg_set = messages.MessageSet([(10, msg1), (11, msg2)]) 36 | 37 | self.assertEqual( 38 | repr(msg_set), 39 | '[foo => {"bar": "bazz"}, bar => {"bwee": "bwoo"}]' 40 | ) 41 | -------------------------------------------------------------------------------- /tests/protocol/test_primitives.py: -------------------------------------------------------------------------------- 1 | import struct 2 | import unittest 3 | import zlib 4 | 5 | import six 6 | 7 | from kiel.protocol import primitives 8 | 9 | 10 | class PrimitivesTests(unittest.TestCase): 11 | 12 | def test_string_repr(self): 13 | s = primitives.String(u"foobar") 14 | 15 | self.assertEqual(repr(s), repr(u"foobar")) 16 | 17 | def test_array_repr(self): 18 | a = primitives.Array.of(primitives.Int32)([1, 3, 6, 9]) 19 | 20 | self.assertEqual(repr(a), "[1, 3, 6, 9]") 21 | 22 | def test_string_render_parse_is_stable(self): 23 | s = primitives.String(u"foobar") 24 | 25 | fmt, values = s.render() 26 | 27 | raw = struct.pack("!" + fmt, *values) 28 | 29 | value, _ = primitives.String.parse(raw, 0) 30 | 31 | self.assertEqual(value, u"foobar") 32 | 33 | def test_string_render_parse_handles_nonstrings(self): 34 | s = primitives.String(123) 35 | 36 | fmt, values = s.render() 37 | 38 | raw = struct.pack("!" + fmt, *values) 39 | 40 | value, _ = primitives.String.parse(raw, 0) 41 | 42 | self.assertEqual(value, u"123") 43 | 44 | def test_bytes_render_parse_is_stable(self): 45 | b = primitives.Bytes(u"foobar") 46 | 47 | fmt, values = b.render() 48 | 49 | raw = struct.pack("!" + fmt, *values) 50 | 51 | value, _ = primitives.Bytes.parse(raw, 0) 52 | 53 | self.assertEqual(value, u"foobar") 54 | 55 | def test_bytes_render_parse_handles_nonstrings(self): 56 | s = primitives.Bytes(123) 57 | 58 | fmt, values = s.render() 59 | 60 | raw = struct.pack("!" + fmt, *values) 61 | 62 | value, _ = primitives.Bytes.parse(raw, 0) 63 | 64 | self.assertEqual(value, u"123") 65 | 66 | def test_bytes_render_parse_handles_compressed_data(self): 67 | data = zlib.compress(six.b("Best of times, blurst of times.")) 68 | 69 | s = primitives.Bytes(data) 70 | 71 | fmt, values = s.render() 72 | 73 | raw = struct.pack("!" + fmt, *values) 74 | 75 | value, _ = primitives.Bytes.parse(raw, 0) 76 | 77 | self.assertEqual(value, data) 78 | -------------------------------------------------------------------------------- /tests/test_connection.py: -------------------------------------------------------------------------------- 1 | import struct 2 | 3 | from tests import cases 4 | 5 | from tornado import testing 6 | from mock import patch, Mock 7 | 8 | from kiel import exc 9 | from kiel.protocol import metadata 10 | from kiel.connection import Connection 11 | 12 | 13 | class ConnectionTests(cases.AsyncTestCase): 14 | 15 | @patch("tornado.iostream.IOStream") 16 | @testing.gen_test 17 | def test_connect_sets_stream(self, IOStream): 18 | IOStream.return_value.connect.return_value = self.future_value(None) 19 | 20 | conn = Connection("localhost", 1234) 21 | 22 | self.assertEqual(conn.stream, None) 23 | 24 | yield conn.connect() 25 | 26 | self.assertEqual(conn.stream, IOStream.return_value) 27 | IOStream.return_value.connect.assert_called_once_with( 28 | ("localhost", 1234) 29 | ) 30 | 31 | def test_close(self): 32 | conn = Connection("localhost", 1234) 33 | conn.stream = Mock() 34 | 35 | self.assertEqual(conn.closing, False) 36 | 37 | conn.close() 38 | 39 | self.assertEqual(conn.closing, True) 40 | conn.stream.close.assert_called_once_with() 41 | 42 | @testing.gen_test 43 | def test_send_when_closing_causes_error(self): 44 | error = None 45 | 46 | conn = Connection("localhost", 1234) 47 | conn.closing = True 48 | 49 | try: 50 | yield conn.send(Mock()) 51 | except exc.BrokerConnectionError as e: 52 | error = e 53 | 54 | self.assertEqual(error.host, "localhost") 55 | self.assertEqual(error.port, 1234) 56 | 57 | @testing.gen_test 58 | def test_future_error_writing_to_stream_aborts(self): 59 | 60 | class FakeException(Exception): 61 | pass 62 | 63 | conn = Connection("localhost", 1234) 64 | conn.stream = Mock() 65 | conn.stream.write.return_value = self.future_error( 66 | FakeException("oh no!") 67 | ) 68 | 69 | error = None 70 | 71 | try: 72 | yield conn.send(metadata.MetadataRequest()) 73 | except FakeException as e: 74 | error = e 75 | 76 | self.assertEqual(str(error), "oh no!") 77 | self.assertEqual(conn.closing, True) 78 | conn.stream.close.assert_called_once_with() 79 | 80 | @testing.gen_test 81 | def test_immediate_error_writing_to_stream_aborts(self): 82 | 83 | class FakeException(Exception): 84 | pass 85 | 86 | conn = Connection("localhost", 1234) 87 | conn.stream = Mock() 88 | conn.stream.write.side_effect = FakeException("oh no!") 89 | 90 | error = None 91 | 92 | try: 93 | yield conn.send(metadata.MetadataRequest()) 94 | except FakeException as e: 95 | error = e 96 | 97 | self.assertEqual(str(error), "oh no!") 98 | self.assertEqual(conn.closing, True) 99 | conn.stream.close.assert_called_once_with() 100 | 101 | @patch.object(Connection, "read_message") 102 | @testing.gen_test 103 | def test_correlates_responses(self, read_message): 104 | request1 = metadata.MetadataRequest() 105 | request2 = metadata.MetadataRequest(topics=["example.foo"]) 106 | 107 | response1 = metadata.MetadataResponse( 108 | brokers=[metadata.Broker(broker_id=1, host="broker01", port=333)], 109 | topics=[ 110 | metadata.TopicMetadata(error_code=0, name="example.foo"), 111 | metadata.TopicMetadata(error_code=0, name="example.bar"), 112 | ] 113 | ) 114 | response1.correlation_id = request1.correlation_id 115 | response2 = metadata.MetadataResponse( 116 | brokers=[metadata.Broker(broker_id=1, host="broker01", port=333)], 117 | topics=[ 118 | metadata.TopicMetadata(error_code=0, name="example.foo"), 119 | ] 120 | ) 121 | response2.correlation_id = request2.correlation_id 122 | 123 | # response2 comes over the wire before response1 124 | responses = [response2, response1] 125 | 126 | def get_next_response(*args): 127 | return self.future_value(responses.pop(0)) 128 | 129 | read_message.side_effect = get_next_response 130 | 131 | conn = Connection("localhost", 1234) 132 | conn.stream = Mock() 133 | conn.stream.write.return_value = self.future_value(None) 134 | 135 | actual_responses = [conn.send(request1), conn.send(request2)] 136 | 137 | yield conn.read_loop() 138 | 139 | # first response is the one with two topics 140 | self.assertEqual(len(actual_responses[0].result().topics), 2) 141 | self.assertEqual(len(actual_responses[1].result().topics), 1) 142 | 143 | @patch.object(Connection, "read_message") 144 | @testing.gen_test 145 | def test_abort_fails_all_pending_requests(self, read_message): 146 | request1 = metadata.MetadataRequest() 147 | request2 = metadata.MetadataRequest(topics=["example.foo"]) 148 | 149 | mock_responses = [ 150 | Mock(correlation_id=request1.correlation_id), 151 | Mock(correlation_id=request2.correlation_id), 152 | ] 153 | 154 | def get_next_response(*args): 155 | return self.future_value(mock_responses.pop(0)) 156 | 157 | read_message.side_effect = get_next_response 158 | 159 | conn = Connection("localhost", 1234) 160 | conn.stream = Mock() 161 | conn.stream.write.return_value = self.future_value(None) 162 | 163 | responses = [conn.send(request1), conn.send(request2)] 164 | 165 | conn.abort() 166 | conn.abort() # second abort is a no-op 167 | 168 | for response in responses: 169 | error = response.exception() 170 | self.assertEqual(error.host, "localhost") 171 | self.assertEqual(error.port, 1234) 172 | 173 | @testing.gen_test 174 | def test_read_message(self): 175 | response_format = "".join([ 176 | "!", "i", "i", "h%dsi" % len("broker01"), # array of brokers 177 | "i", "hh%ds" % len("example.foo"), # array of topics 178 | "i", # subarray of partitions 179 | "hii", "i", "ii", "i", "i", # partition 1 details 180 | "hii", "i", "ii", "i", "ii", # partition 2 details 181 | ]) 182 | raw_response = struct.pack( 183 | response_format, 184 | 1, # there is 1 broker 185 | 8, len("broker01"), b"broker01", 1234, # broker id,host,port 186 | 1, # there is 1 topic 187 | 0, len("example.foo"), b"example.foo", # topic name, no error 188 | 2, # there are 2 topics 189 | 0, 1, 1, # partition ID 1, leader is broker 1 190 | 2, 2, 3, 1, 2, # two replicas: on 2 & 3, one ISR: broker 2 191 | 0, 2, 3, # partition ID 2, leader is broker 3 192 | 2, 1, 2, 2, 2, 1, # two replicas: on 1 & 2, both are in ISR set 193 | ) 194 | 195 | raw_data = [ 196 | # size of full response (incl. correlation) 197 | struct.pack("!i", struct.calcsize(response_format) + 4), 198 | struct.pack("!i", 555), # correlation id 199 | raw_response 200 | ] 201 | 202 | def get_raw_data(*args): 203 | return self.future_value(raw_data.pop(0)) 204 | 205 | conn = Connection("localhost", 1234) 206 | conn.api_correlation = {555: "metadata"} 207 | 208 | conn.stream = Mock() 209 | conn.stream.read_bytes.side_effect = get_raw_data 210 | 211 | message = yield conn.read_message() 212 | 213 | expected = metadata.MetadataResponse( 214 | brokers=[ 215 | metadata.Broker(broker_id=8, host="broker01", port=1234) 216 | ], 217 | topics=[ 218 | metadata.TopicMetadata( 219 | error_code=0, name="example.foo", 220 | partitions=[ 221 | metadata.PartitionMetadata( 222 | error_code=0, 223 | partition_id=1, 224 | leader=1, 225 | replicas=[2, 3], 226 | isrs=[2] 227 | ), 228 | metadata.PartitionMetadata( 229 | error_code=0, 230 | partition_id=2, 231 | leader=3, 232 | replicas=[1, 2], 233 | isrs=[2, 1] 234 | ), 235 | ] 236 | ), 237 | ] 238 | ) 239 | 240 | self.assertEqual(message, expected) 241 | -------------------------------------------------------------------------------- /tests/test_constants.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from kiel import constants 4 | 5 | 6 | class ConstantsTests(unittest.TestCase): 7 | 8 | def test_all_error_codes_accounted_for(self): 9 | highest_known_error_code = 16 10 | unused_error_codes = {13} 11 | 12 | for code in range(-1, highest_known_error_code + 1): 13 | if code in unused_error_codes: 14 | continue 15 | 16 | self.assertIn(code, constants.ERROR_CODES.keys()) 17 | -------------------------------------------------------------------------------- /tests/test_docstrings.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | import re 3 | 4 | import kiel.clients.client 5 | import kiel.clients.consumer 6 | import kiel.clients.grouped 7 | import kiel.clients.producer 8 | import kiel.clients.single 9 | import kiel.cluster 10 | import kiel.compression.gzip 11 | import kiel.compression.snappy 12 | import kiel.connection 13 | import kiel.constants 14 | import kiel.events 15 | import kiel.exc 16 | import kiel.iterables 17 | import kiel.protocol.coordinator 18 | import kiel.protocol.describe_groups 19 | import kiel.protocol.fetch 20 | import kiel.protocol.join_group 21 | import kiel.protocol.leave_group 22 | import kiel.protocol.list_groups 23 | import kiel.protocol.messages 24 | import kiel.protocol.metadata 25 | import kiel.protocol.offset 26 | import kiel.protocol.offset_commit 27 | import kiel.protocol.offset_fetch 28 | import kiel.protocol.part 29 | import kiel.protocol.primitives 30 | import kiel.protocol.produce 31 | import kiel.protocol.request 32 | import kiel.protocol.response 33 | import kiel.protocol.sync_group 34 | import kiel.zookeeper.allocator 35 | import kiel.zookeeper.party 36 | import kiel.zookeeper.shared_set 37 | 38 | 39 | modules_to_test = ( 40 | kiel.clients.client, 41 | kiel.clients.consumer, 42 | kiel.clients.grouped, 43 | kiel.clients.producer, 44 | kiel.clients.single, 45 | kiel.cluster, 46 | kiel.compression.gzip, 47 | kiel.compression.snappy, 48 | kiel.connection, 49 | kiel.constants, 50 | kiel.events, 51 | kiel.exc, 52 | kiel.iterables, 53 | kiel.protocol.coordinator, 54 | kiel.protocol.describe_groups, 55 | kiel.protocol.fetch, 56 | kiel.protocol.join_group, 57 | kiel.protocol.leave_group, 58 | kiel.protocol.list_groups, 59 | kiel.protocol.messages, 60 | kiel.protocol.metadata, 61 | kiel.protocol.offset, 62 | kiel.protocol.offset_commit, 63 | kiel.protocol.offset_fetch, 64 | kiel.protocol.part, 65 | kiel.protocol.primitives, 66 | kiel.protocol.produce, 67 | kiel.protocol.request, 68 | kiel.protocol.response, 69 | kiel.protocol.sync_group, 70 | kiel.zookeeper.allocator, 71 | kiel.zookeeper.party, 72 | kiel.zookeeper.shared_set, 73 | ) 74 | 75 | 76 | def test_docstrings(): 77 | for module in modules_to_test: 78 | for path, thing in get_module_things(module): 79 | yield create_docstring_assert(path, thing) 80 | 81 | 82 | def get_module_things(module): 83 | module_name = module.__name__ 84 | 85 | for func_name, func in get_module_functions(module): 86 | if inspect.getmodule(func) != module: 87 | continue 88 | yield (module_name + "." + func_name, func) 89 | 90 | for class_name, klass in get_module_classes(module): 91 | if inspect.getmodule(klass) != module: 92 | continue 93 | yield (module_name + "." + class_name, klass) 94 | 95 | for method_name, method in get_class_methods(klass): 96 | if method_name not in klass.__dict__: 97 | continue 98 | yield (module_name + "." + class_name + ":" + method_name, method) 99 | 100 | 101 | def get_module_classes(module): 102 | for name, klass in inspect.getmembers(module, predicate=inspect.isclass): 103 | yield (name, klass) 104 | 105 | 106 | def get_module_functions(module): 107 | for name, func in inspect.getmembers(module, predicate=inspect.isfunction): 108 | yield (name, func) 109 | 110 | 111 | def get_class_methods(klass): 112 | for name, method in inspect.getmembers(klass, predicate=inspect.ismethod): 113 | yield (name, method) 114 | 115 | 116 | def create_docstring_assert(path, thing): 117 | 118 | def test_function(): 119 | assert_docstring_present(thing, path) 120 | # TODO(wglass): uncomment this assert and fill out the param info 121 | # for methods and functions 122 | # assert_docstring_includes_param_metadata(thing, path) 123 | 124 | test_name = "test_docstring__%s" % de_camelcase(path) 125 | test_function.__name__ = test_name 126 | test_function.description = test_name 127 | 128 | return test_function 129 | 130 | 131 | # TODO(wglass): remove __init__ from this when the param metadata assert is 132 | # re-enabled 133 | skipped_special_methods = ("__init__", "__str__", "__repr__") 134 | 135 | 136 | def assert_docstring_present(thing, path): 137 | if any([path.endswith(special) for special in skipped_special_methods]): 138 | return 139 | 140 | docstring = inspect.getdoc(thing) 141 | if not docstring or not docstring.strip(): 142 | raise AssertionError("No docstring present for %s" % path) 143 | 144 | 145 | def assert_docstring_includes_param_metadata(thing, path): 146 | if inspect.isclass(thing): 147 | return 148 | 149 | docstring = inspect.getdoc(thing) 150 | if not docstring: 151 | return 152 | 153 | for arg_name in inspect.getargspec(thing).args: 154 | if arg_name in ("self", "cls"): 155 | continue 156 | 157 | if ":param %s:" % arg_name not in docstring: 158 | raise AssertionError( 159 | "Missing :param: for arg %s of %s" % (arg_name, path) 160 | ) 161 | if ":type %s:" % arg_name not in docstring: 162 | raise AssertionError( 163 | "Missing :type: for arg %s of %s" % (arg_name, path) 164 | ) 165 | 166 | 167 | first_cap_re = re.compile('(.)([A-Z][a-z]+)') 168 | all_cap_re = re.compile('([a-z0-9])([A-Z])') 169 | 170 | 171 | def de_camelcase(name): 172 | return all_cap_re.sub( 173 | r'\1_\2', 174 | first_cap_re.sub(r'\1_\2', name) 175 | ).lower() 176 | -------------------------------------------------------------------------------- /tests/test_events.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | 4 | from mock import patch, Mock 5 | 6 | from kiel import events 7 | 8 | 9 | class EventsTests(unittest.TestCase): 10 | 11 | def test_wait_on_event_with_timeout(self): 12 | mock_event = Mock() 13 | 14 | events.wait_on_event(mock_event, timeout=60) 15 | 16 | mock_event.wait.assert_called_once_with(60) 17 | 18 | @patch("kiel.events.six") 19 | def test_wait_on_event_uses_no_timeout_on_py3(self, mock_six): 20 | mock_six.PY2 = False 21 | 22 | mock_event = Mock() 23 | 24 | events.wait_on_event(mock_event) 25 | 26 | mock_event.wait.assert_called_once_with() 27 | 28 | @patch("kiel.events.sys") 29 | @patch("kiel.events.six") 30 | def test_wait_on_event_uses_maxint_on_py2(self, mock_six, mock_sys): 31 | mock_six.PY2 = True 32 | 33 | mock_event = Mock() 34 | mock_event.is_set.return_value = False 35 | 36 | def set_event(*args): 37 | mock_event.is_set.return_value = True 38 | 39 | # set the event when we wait, otherwise the while loop would go forever 40 | mock_event.wait.side_effect = set_event 41 | 42 | events.wait_on_event(mock_event) 43 | 44 | mock_event.wait.assert_called_once_with(mock_sys.maxint) 45 | 46 | @patch("kiel.events.sys") 47 | @patch("kiel.events.six") 48 | def test_wait_on_event_set_event_is_noop_on_py2(self, mock_six, mock_sys): 49 | mock_six.PY2 = True 50 | 51 | mock_event = Mock() 52 | mock_event.is_set.return_value = True 53 | 54 | events.wait_on_event(mock_event) 55 | 56 | assert mock_event.wait.called is False 57 | -------------------------------------------------------------------------------- /tests/test_exc.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from kiel import exc 4 | 5 | 6 | class ExceptionTests(unittest.TestCase): 7 | 8 | def test_broker_connection_error(self): 9 | e = exc.BrokerConnectionError("kafka01", 4455) 10 | 11 | self.assertEqual(e.host, "kafka01") 12 | self.assertEqual(e.port, 4455) 13 | 14 | self.assertEqual(str(e), "Error connecting to kafka01:4455") 15 | 16 | def test_broker_connection_error_with_broker_id(self): 17 | e = exc.BrokerConnectionError("kafka01", 4455, broker_id=8) 18 | 19 | self.assertEqual(e.host, "kafka01") 20 | self.assertEqual(e.port, 4455) 21 | self.assertEqual(e.broker_id, 8) 22 | 23 | self.assertEqual(str(e), "Error connecting to kafka01:4455") 24 | 25 | def test_unhanded_response_error(self): 26 | e = exc.UnhandledResponseError("offset") 27 | 28 | self.assertEqual(e.api, "offset") 29 | 30 | self.assertEqual(str(e), "No handler method for 'offset' api") 31 | -------------------------------------------------------------------------------- /tests/test_iterables.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import unittest 3 | 4 | from kiel import iterables 5 | 6 | 7 | class IterablesTests(unittest.TestCase): 8 | 9 | def test_drain_on_list(self): 10 | data = ["foo", 1, "bar", 9] 11 | 12 | result = list(iterables.drain(data)) 13 | 14 | self.assertEqual(len(data), 0) 15 | self.assertEqual(result, [9, "bar", 1, "foo"]) 16 | 17 | def test_drain_on_deque(self): 18 | data = collections.deque(["foo", 1, "bar", 9]) 19 | 20 | result = list(iterables.drain(data)) 21 | 22 | self.assertEqual(len(data), 0) 23 | self.assertEqual(result, ["foo", 1, "bar", 9]) 24 | 25 | def test_drain_on_set(self): 26 | data = set(["foo", 1, "bar", 9]) 27 | 28 | result = list(iterables.drain(data)) 29 | 30 | self.assertEqual(len(data), 0) 31 | self.assertEqual(set(result), set(["foo", 1, "bar", 9])) 32 | 33 | def test_drain_on_dict(self): 34 | data = {"foo": 1, "bar": 9} 35 | 36 | result = {key: value for key, value in iterables.drain(data)} 37 | 38 | self.assertEqual(len(data), 0) 39 | self.assertEqual(result, {"foo": 1, "bar": 9}) 40 | -------------------------------------------------------------------------------- /tests/zookeeper/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wglass/kiel/12664fae522d95536c977bc3868951e7dce3daa0/tests/zookeeper/__init__.py -------------------------------------------------------------------------------- /tests/zookeeper/test_allocator.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from mock import patch 4 | 5 | from kiel.zookeeper import allocator 6 | 7 | 8 | @patch.object(allocator, "SharedSet") 9 | @patch.object(allocator, "Party") 10 | class PartitionAllocatorTests(unittest.TestCase): 11 | 12 | def setUp(self): 13 | super(PartitionAllocatorTests, self).setUp() 14 | 15 | kazoo_patcher = patch.object(allocator, "client") 16 | mock_client = kazoo_patcher.start() 17 | self.addCleanup(kazoo_patcher.stop) 18 | 19 | self.KazooClient = mock_client.KazooClient 20 | 21 | def test_defaults(self, Party, SharedSet): 22 | 23 | def alloc(members, partitions): 24 | pass 25 | 26 | a = allocator.PartitionAllocator( 27 | ["zk01", "zk02", "zk03"], "worker-group", "worker01:654321", 28 | allocator_fn=alloc 29 | ) 30 | 31 | self.assertEqual(a.zk_hosts, ["zk01", "zk02", "zk03"]) 32 | self.assertEqual(a.group_name, "worker-group") 33 | self.assertEqual(a.consumer_name, "worker01:654321") 34 | 35 | self.assertEqual(a.allocator_fn, alloc) 36 | self.assertEqual(a.on_rebalance, None) 37 | 38 | self.assertEqual(a.conn, self.KazooClient.return_value) 39 | self.assertEqual(a.party, Party.return_value) 40 | self.assertEqual(a.shared_set, SharedSet.return_value) 41 | 42 | self.assertEqual(a.members, set()) 43 | self.assertEqual(a.partitions, set()) 44 | self.assertEqual(a.mapping, {}) 45 | -------------------------------------------------------------------------------- /tests/zookeeper/test_party.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from mock import Mock 4 | from kazoo.exceptions import NoNodeError 5 | 6 | from kiel.zookeeper import party 7 | 8 | 9 | class PartyTests(unittest.TestCase): 10 | 11 | def test_start_ensures_path_and_watches_changes(self): 12 | client = Mock() 13 | 14 | def collect_callback(fn): 15 | client.change_callback = fn 16 | 17 | client.ChildrenWatch.return_value.side_effect = collect_callback 18 | 19 | on_change = Mock() 20 | 21 | p = party.Party(client, "host.local", "/my/party", on_change) 22 | 23 | p.start() 24 | 25 | client.ensure_path.assert_called_once_with("/my/party") 26 | client.ChildrenWatch.assert_called_once_with("/my/party") 27 | 28 | assert on_change.called is False 29 | 30 | client.change_callback(["foo", "bar"]) 31 | 32 | on_change.assert_called_once_with(["foo", "bar"]) 33 | 34 | def test_join_when_znode_does_not_exist(self): 35 | client = Mock() 36 | client.exists.return_value = None 37 | 38 | p = party.Party(client, "host.local", "/my/party", Mock()) 39 | 40 | p.join() 41 | 42 | client.exists.assert_called_once_with("/my/party/host.local") 43 | client.create.assert_called_once_with( 44 | "/my/party/host.local", ephemeral=True, makepath=True 45 | ) 46 | 47 | def test_join_when_znode_belongs_to_someone_else(self): 48 | client = Mock() 49 | client.exists.return_value = Mock(owner_session_id=1234) 50 | client.client_id = (4321, 0) 51 | 52 | p = party.Party(client, "host.local", "/my/party", Mock()) 53 | 54 | p.join() 55 | 56 | client.transaction.assert_called_once_with() 57 | transaction = client.transaction.return_value 58 | transaction.delete.assert_called_once_with("/my/party/host.local") 59 | transaction.create.assert_called_once_with( 60 | "/my/party/host.local", ephemeral=True 61 | ) 62 | transaction.commit.assert_called_once_with() 63 | 64 | def test_join_when_znode_belongs_to_us(self): 65 | client = Mock() 66 | client.exists.return_value = Mock(owner_session_id=1234) 67 | client.client_id = (1234, 0) 68 | 69 | p = party.Party(client, "host.local", "/my/party", Mock()) 70 | 71 | p.join() 72 | 73 | assert client.create.called is False 74 | assert client.transaction.called is False 75 | 76 | def test_leave(self): 77 | client = Mock() 78 | 79 | p = party.Party(client, "host.local", "/my/party", Mock()) 80 | 81 | p.leave() 82 | 83 | client.delete.assert_called_once_with("/my/party/host.local") 84 | 85 | def test_leave_znode_does_not_exist(self): 86 | client = Mock() 87 | client.delete.side_effect = NoNodeError 88 | 89 | p = party.Party(client, "host.local", "/my/party", Mock()) 90 | 91 | p.leave() 92 | 93 | client.delete.assert_called_once_with("/my/party/host.local") 94 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py27,py35,pypy 3 | skipsdist = True 4 | 5 | [testenv] 6 | usedevelop = True 7 | deps = 8 | python-snappy 9 | pytest 10 | pytest-cov 11 | pytest-flake8 12 | mock 13 | commands = pytest -q --disable-pytest-warnings --cov=kiel --cov-report= --flake8 {toxinidir} 14 | 15 | [testenv:pypy] 16 | usedevelop = True 17 | deps = 18 | python-snappy 19 | pytest 20 | mock 21 | commands = pytest -q --disable-pytest-warnings {toxinidir} 22 | --------------------------------------------------------------------------------