├── .gitignore ├── .pre-commit-config.yaml ├── .travis.yml ├── CHANGELOG.md ├── LICENSE ├── Makefile ├── README.md ├── docker-compose.yml ├── docker ├── itest_0.8.2 │ ├── Dockerfile │ └── run_tests.sh ├── itest_0.9.0 │ ├── Dockerfile │ └── run_tests.sh ├── kafka_0.8.2 │ ├── Dockerfile │ └── config.properties ├── kafka_0.9.0 │ ├── Dockerfile │ └── config.properties └── zookeeper │ └── Dockerfile ├── docs ├── Makefile └── source │ ├── conf.py │ ├── config.rst │ ├── consumer.rst │ ├── consumer_group.rst │ ├── discovery.rst │ ├── error.rst │ ├── getting_started.rst │ ├── index.rst │ ├── monitoring.rst │ ├── offsets.rst │ ├── partitioner.rst │ ├── producer.rst │ └── utils.rst ├── setup.cfg ├── setup.py ├── tests ├── __init__.py ├── conftest.py ├── integration │ ├── conftest.py │ └── test_consumer.py ├── test_config.py ├── test_consumer.py ├── test_consumer_group.py ├── test_discovery.py ├── test_mocking.py ├── test_monitoring.py ├── test_offsets.py ├── test_partitioner.py ├── test_producer.py └── test_utils.py ├── tox.ini └── yelp_kafka ├── __init__.py ├── config.py ├── consumer.py ├── consumer_group.py ├── discovery.py ├── error.py ├── metrics.py ├── metrics_responder.py ├── monitoring.py ├── offsets.py ├── partitioner.py ├── producer.py ├── testing ├── __init__.py └── kafka_mock.py ├── utils.py └── yelp_metrics_responder.py /.gitignore: -------------------------------------------------------------------------------- 1 | test_venv/ 2 | virtualenv_run/ 3 | 4 | *.py[cod] 5 | 6 | # Packages 7 | *.egg 8 | *.egg-info 9 | dist 10 | build 11 | eggs 12 | parts 13 | bin 14 | var 15 | sdist 16 | develop-eggs 17 | .installed.cfg 18 | lib 19 | lib64 20 | __pycache__ 21 | 22 | # Installer logs 23 | pip-log.txt 24 | *.log 25 | 26 | # Unit test / coverage reports 27 | .coverage 28 | .tox 29 | 30 | # Project 31 | .ropeproject 32 | .pydevproject 33 | 34 | # Editors 35 | .*.sw? 36 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | - repo: https://github.com/pre-commit/pre-commit-hooks.git 2 | sha: 29bf11d13689a0a9a895c41eb3591c7e942d377d 3 | hooks: 4 | - id: check-added-large-files 5 | language_version: python2.7 6 | - id: check-merge-conflict 7 | language_version: python2.7 8 | - id: trailing-whitespace 9 | language_version: python2.7 10 | - id: end-of-file-fixer 11 | language_version: python2.7 12 | - id: autopep8-wrapper 13 | language_version: python2.7 14 | args: [--ignore=E501, --in-place] 15 | - id: flake8 16 | language_version: python2.7 17 | args: [--ignore=E501] 18 | 19 | - repo: https://github.com/asottile/reorder_python_imports.git 20 | sha: f3dfe379d2ea341c6cf54d926d4585b35dea9251 21 | hooks: 22 | - id: reorder-python-imports 23 | files: .*\.py$ 24 | language_version: python2.7 25 | args: 26 | - --add-import 27 | - from __future__ import absolute_import 28 | - --add-import 29 | - from __future__ import unicode_literals 30 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 2.7 3 | sudo: required 4 | services: 5 | - docker 6 | before_install: 7 | - sudo sh -c 'echo "deb https://apt.dockerproject.org/repo ubuntu-precise main" > 8 | /etc/apt/sources.list.d/docker.list' 9 | - sudo apt-key adv --keyserver hkp://p80.pool.sks-keyservers.net:80 --recv-keys 58118E89F3A912897C070ADBF76221572C52609D 10 | - sudo apt-get update 11 | - sudo apt-key update 12 | - sudo apt-get -qqy -o Dpkg::Options::="--force-confdef" -o Dpkg::Options::="--force-confold" 13 | install docker-engine 14 | - sudo rm /usr/local/bin/docker-compose 15 | - curl -L https://github.com/docker/compose/releases/download/1.7.0/docker-compose-`uname 16 | -s`-`uname -m` > docker-compose 17 | - chmod +x docker-compose 18 | - sudo mv docker-compose /usr/local/bin 19 | - docker-compose -v 20 | - docker -v 21 | install: 22 | - pip install tox 23 | script: 24 | - tox -e py27 25 | deploy: 26 | provider: pypi 27 | user: yelplabs 28 | password: 29 | secure: sUJKXF/1S4+kExU2IAkuW0kXTYVPWIJoBRPVSgVadf9yEh49XZFAt2IeGyrg5T3PrbaeOn8LEuUG0XCIWyU4M1wiEA5nAIddQ3+WyPSSJ0W/Ueajn75/pesjEkKqS6gEMkd60f7LdEiz347YuWerE6poO8JLYsyNRViv1ck63TZDieCY1q3hHO2jvoeDX++qRZmoHccZfUfmoQPldwM6JEQ1AW2gXIc1iNFl/BVM0HOdTyhkNERQk/NmX+AjI0YI8ICwp9NUDNDt4iGs6F00mW6Vg09A07yH0t2HfuphOfxnFElvlI60i7pICqzqfjeKQG5W1818is9eR/yRzpKEAnUCyACaJ2PVRmINRDz+vZWSYns3rm3C2SBuG/YgAQQLFwMt9nLV9AJGpD7fm7qrc8WF5/wCV8OGARbxpkL6GqZuocgjpg6pNsiT+QTT6rMYDTE2vegn/OtjJ/gtr3ytt1N9oUbp/IBVMnipG+aoPTz0FiTMJqMkq0iFbDj5CALG3WWkLd9drUDMOOOq/tVzRrWjqpQ70Mp3itWbNqQEyU5XUF3ZZVPYHgyMIaYSomYJLJRV1ANJzk8ovDP8RHFp0GihZeY5eSeTfytQpHvGWbDm4qOyWAuaTSKP0uYBIz4+zfg823tzzgng/VbiapfMeyL1U1CUrZdLRJSYUHeEXDQ= 30 | on: 31 | tags: true 32 | distributions: sdist bdist_wheel 33 | repo: Yelp/yelp_kafka 34 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | This changelog only guarantees backward incompatible changes will be listed. 2 | # v5.0.0 (2016-09-09) 3 | ## backward incompatible changes 4 | - `yelp_kafka.discovery` methods using kafka discovery service instead of files 5 | - `yelp_kafka.discovery` methods have different names and take different parameters. An additional 6 | client_id is often needed. This affects: 7 | - get_all_clusters 8 | - get_kafka_connection (client_id is now mandatory) 9 | - get_all_kafka_connections (client_id is now mandatory) 10 | - Deprecated `yelp_kafka.discovery` methods deleted. These are 11 | - get_local_cluster 12 | - get_cluster_by_name 13 | - get_local_scribe_topic 14 | - get_scribe_topics 15 | - get_all_local_scribe_topics 16 | - get_scribe_topic_in_datacenter 17 | - scribe_topic_exists_in_datacenter 18 | - search_local_scribe_topics_by_regex 19 | - search_local_topics_by_regex 20 | - local_scribe_topic_exists 21 | - get_all_consumer_config 22 | 23 | # v4.0.0 (2015-08-24) 24 | ## backward incompatible changes 25 | - `yelp_kafka.config.ClusterConfig`: 26 | - add 'type' attribute to ClusterConfig nametuple 27 | 28 | # v3.0.0 (2015-05-28) 29 | ## backward incompatible changes 30 | - `yelp_kafka.monitoring.get_current_consumer_offsets`: 31 | - function signature changed from (client, group, topics, fail_on_error) 32 | to (client, group, topics, raise_on_error) 33 | - moved to `yelp_kafka.offsets.get_current_consumer_offsets` 34 | - `yelp_kafka.monitoring.get_topics_watermarks`: 35 | - function signature changed from (client, topics, fail_on_error) 36 | to (client, topics, raise_on_error) 37 | - moved to `yelp_kafka.offsets.get_topics_watermarks` 38 | - `yelp_kafka.monitoring.PartitionOffsets` namedtuple moved to 39 | `yelp_kafka.offsets.PartitionOffsets` 40 | ## added 41 | - `yelp_kafka.offsets` new functions added: 42 | advance_consumer_offsets, rewind_consumer_offsets, set_consumer_offsets 43 | 44 | # v2.0.0 (2015-03-26) 45 | ## backward incompatible changes 46 | - `yelp_kafka.monitoring.*` function signatures have been changed to 47 | (client, group, topics) and (client, group, topic, partitions). 48 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2015 David Arthur 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | 203 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .DELETE_ON_ERROR: 2 | 3 | all: test itest 4 | 5 | test: 6 | tox2 tests 7 | 8 | itest: 9 | tox2 -e docker_itest 10 | 11 | sdist: 12 | python setup.py sdist 13 | 14 | bdist_wheel: 15 | python setup.py bdist_wheel 16 | 17 | docs: 18 | tox2 -e docs 19 | 20 | clean: 21 | make -C docs clean 22 | rm -rf build/ dist/ yelp_kafka.egg-info/ .tox/ 23 | find . -name '*.pyc' -delete 24 | find . -name '__pycache__' -delete 25 | rm -rf docs/build/ 26 | 27 | .PHONY: docs 28 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | **Deprecation Warning** 2 | 3 | Please note that this repo is not maintained in the open source community. The code and examples 4 | contained in this repository are for demonstration purposes only. 5 | 6 | You can read the latest from Yelp Engineering on our [tech blog](https://engineeringblog.yelp.com/). 7 | 8 | [![Build Status](https://travis-ci.org/Yelp/yelp_kafka.svg?branch=master)](https://travis-ci.org/Yelp/yelp_kafka) 9 | 10 | 11 | # Yelp Kafka 12 | 13 | ## Producer 14 | 15 | Create a producer for my_topic. 16 | 17 | ```python 18 | 19 | from yelp_kafka import discovery 20 | from yelp_kafka.config import ClusterConfig 21 | from yelp_kafka.producer import YelpKafkaSimpleProducer 22 | from kafka import KafkaClient 23 | from kafka.common import ConsumerTimeout 24 | from kafka.common import FailedPayloadsError 25 | from kafka.common import KafkaUnavailableError 26 | from kafka.common import LeaderNotAvailableError 27 | from kafka.common import NotLeaderForPartitionError 28 | from kafka.common import UnknownTopicOrPartitionError 29 | 30 | # Cluster configuration 31 | cluster_config = ClusterConfig( 32 | type="service", 33 | name="cluster", 34 | broker_list=["cluster-elb-1:9092"], 35 | zookeeper="11.11.11.111:2181,11.11.11.112:2181,11.11.11.113:2181/kafka-1", 36 | ) 37 | # Create a kafka Client 38 | client = KafkaClient(cluster_config.broker_list, client_id='my-client-id') 39 | # Create the producer and send 2 messages 40 | producer = YelpKafkaSimpleProducer( 41 | client=client, 42 | cluster_config=cluster_config, 43 | report_metrics=True, 44 | ) 45 | try: 46 | producer.send_messages("my_topic", "message1", "message2") 47 | except ( 48 | FailedPayloadsError, 49 | KafkaUnavailableError, 50 | LeaderNotAvailableError, 51 | NotLeaderForPartitionError, 52 | UnknownTopicOrPartitionError, 53 | ): 54 | # Usually we want to retry a certain number of times when encountering these exceptions 55 | pass 56 | 57 | ``` 58 | 59 | This example makes use of the [YelpKafkaSimpleProducer](yelp_kafka/producer.py) 60 | from yelp_kafka. 61 | 62 | _client_id_ identifies the client connection in Kafka and it is used by Kafka 0.9.0 to enforce 63 | quota limit per client. We recommend to use a _client_id_ that represents the application. 64 | 65 | In the example there are some exceptions that usually should be safe to just retry. 66 | 67 | **KafkaUnavailableError** can happen when the metadata request to Kafka fails, this 68 | request is broker unaware so a simple retry would pick another broker of the cluster and possibly succeed. 69 | 70 | **LeaderNotAvailableError** and **NotLeaderForPartitionError** may happen during a cluster 71 | rolling restart or upon broker failure. In this case a new leader will be elected, kafka-python 72 | by default refreshes the metadata when encountering these errors, thus upon retry it would 73 | hopefully use a new leader and succeed. However, Kafka doesn't give us any guarantee on how quickly 74 | a new leader will be elected. We measured that for small clusters the elections happens in the order 75 | of hundreds of ms but for large clusters it can take up to several seconds. 76 | Usually an application should retry for a limited amount of time and then consider the request failed and react accordingly. 77 | 78 | Finally, **FailedPayloadsError** may happen in many cases, for example when a leader is missing 79 | or the connection fails in the middle of a request. Metadata is automatically refreshed for this exception as well. 80 | 81 | See Also: [kafka-python](http://kafka-python.readthedocs.org/en/v0.9.5/usage.html) and [SimpleProducer](http://kafka-python.readthedocs.org/en/v0.9.5/apidoc/kafka.producer.html) 82 | 83 | 84 | 85 | ## Consumer 86 | 87 | ```python 88 | 89 | from yelp_kafka import discovery 90 | from yelp_kafka.consumer_group import KafkaConsumerGroup 91 | from yelp_kafka.config import ClusterConfig 92 | from yelp_kafka.config import KafkaConsumerConfig 93 | from yelp_kafka.error import PartitionerError 94 | from kafka.common import ConsumerTimeout 95 | from kafka.common import FailedPayloadsError 96 | from kafka.common import KafkaUnavailableError 97 | from kafka.common import LeaderNotAvailableError 98 | from kafka.common import NotLeaderForPartitionError 99 | 100 | # Cluster configuration 101 | cluster_config = ClusterConfig( 102 | type="service", 103 | name="cluster", 104 | broker_list=["cluster-elb-1:9092"], 105 | zookeeper="11.11.11.111:2181,11.11.11.112:2181,11.11.11.113:2181/kafka-1", 106 | ) 107 | config = KafkaConsumerConfig( 108 | 'my_group_id', 109 | cluster_config, 110 | auto_offset_reset='smallest', 111 | auto_commit_interval_ms=60000, # By default 60 seconds 112 | auto_commit_interval_messages=100, # By default 100 messages 113 | consumer_timeout_ms=100, # By default 100 ms 114 | ) 115 | 116 | consumer = KafkaConsumerGroup(['my_topic'], config) 117 | 118 | def consume_messages(consumer): 119 | while True: 120 | try: 121 | message = consumer.next() 122 | print message.value 123 | consumer.task_done(message) 124 | # If auto_commit is disabled in KafkaConsumerGroup, then you must call 125 | # consumer.commit() yourself. 126 | # 127 | # auto_commit is enabled by default, so here we are implicitly 128 | # letting KafkaConsumerGroup decide when to inform Kafka of our 129 | # completed messages. 130 | 131 | except ConsumerTimeout: 132 | # Applications usually just ignore the ConsumerTimeout 133 | # exception or check a termination flag. 134 | pass 135 | except (FailedPayloadsError, KafkaUnavailableError, LeaderNotAvailableError, NotLeaderForPartitionError): 136 | # See producer example above, usually these exceptions should be retried 137 | 138 | while True: 139 | try: 140 | with consumer: 141 | consume_messages(consumer) 142 | except PartitionerError: 143 | # In this case we can't just retry, because the connection to zookeeper is lost. 144 | # We can either fail the application or re-initialize the consumer connection as 145 | # done in this example. 146 | pass 147 | ``` 148 | 149 | See __producer_example__ above for more information about the exceptions to retry. 150 | The __group_id__ should represent the application/service the consumer belongs to. 151 | 152 | 153 | **Note**: When bootstrapping a new consumer group it is usually recommended to set ``auto_offset_reset`` to **largest**. 154 | It assures that a huge amount of past messages are not consumed the first time a consumer is launched. 155 | ``auto_offset_reset`` should be set to **smallest** immediately after the first run (after the offsets are committed for the first time). 156 | When ``auto_offset_reset`` is set to **smallest** no messages are lost when adding new partitions. 157 | 158 | Create a consumer for all topics ending with mytopic: 159 | 160 | ```python 161 | 162 | from yelp_kafka import discovery 163 | from yelp_kafka.config import ClusterConfig 164 | from yelp_kafka.config import KafkaConsumerConfig 165 | from kafka import KafkaConsumer 166 | 167 | # Cluster configuration 168 | cluster_config = ClusterConfig( 169 | type="service", 170 | name="cluster", 171 | broker_list=["cluster-elb-1:9092"], 172 | zookeeper="11.11.11.111:2181,11.11.11.112:2181,11.11.11.113:2181/kafka-1", 173 | ) 174 | topics, cluster = discovery.search_topics_by_regex('.*mytopic', [cluster_config]) 175 | config = KafkaConsumerConfig(group_id='my_app', cluster=cluster, client_id='my-consumer') 176 | consumer = KafkaConsumer(topics, **config.get_kafka_consumer_config()) 177 | for message in consumer: 178 | print message 179 | ``` 180 | 181 | This example makes use of the KafkaConsumer from kafka-python. This consumer 182 | class should be considered deprecated and should not be used anymore. 183 | 184 | See Also: [KafkaConsumer](http://kafka-python.readthedocs.org/en/v0.9.5/apidoc/kafka.consumer.html#module-kafka.consumer.kafka) 185 | 186 | 187 | ## Reporting Metrics 188 | 189 | 190 | If you're using `yelp_kafka.consumer_group.KafkaConsumerGroup`, you 191 | can send metrics on request latency and error counts. This is on by default 192 | for yelp_kafka and uses an instance of 193 | `yelp_kafka.metrics_responder.MetricsResponder` for reporting metrics 194 | 195 | Reporting metrics directly from the kafka client is an option that is only 196 | available in Yelp's fork of [kafka-python](https://github.com/Yelp/kafka-python) 197 | 198 | Producer metrics can also be reported and are reported by default by the YelpKafkaSimpleProducer 199 | through the `report_metrics` parameter. This defaults to True but can be turned off 200 | 201 | 202 | If you want to plug in your own metric responder module, please use 203 | `yelp_kafka.metrics_responder.MetricsResponder` and pass it in 204 | `yelp_kafka.producer.YelpKafkaSimpleProducer` or 205 | `yelp_kafka.producer.YelpKafkaKeyedProducer` or 206 | `yelp_kafka.consumer_group.KafkaConsumerGroup`. 207 | 208 | 209 | ## Other consumer groups 210 | 211 | 212 | Yelp_Kafka currently provides three *consumer group* interfaces for consuming 213 | from Kafka. 214 | 215 | - `yelp_kafka.consumer_group.KafkaConsumerGroup` is the recommended 216 | class to use if you want start multiple instances of your consumer. You may 217 | start as many instances as you wish (balancing partitions will happen 218 | automatically), and you can control when to mark messages as processed (via 219 | __task_done__ and __commit__). 220 | 221 | - `yelp_kafka.consumer_group.MultiprocessingConsumerGroup` is for 222 | consuming from high volume topics since it starts as many consumer processes as topic 223 | partitions. It also handles process monitoring and restart upon failures. 224 | 225 | - `yelp_kafka.consumer_group.ConsumerGroup` provides the same set of 226 | features as KafkaConsumerGroup, but with a less convenient interface. 227 | This class is considered deprecated. 228 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | itest: 2 | build: docker/itest_${KAFKA_VERSION} 3 | links: 4 | - kafka 5 | - zookeeper 6 | volumes: 7 | - .:/work 8 | command: echo "dummy command" 9 | 10 | kafka: 11 | build: docker/kafka_${KAFKA_VERSION} 12 | expose: 13 | - "9092" 14 | links: 15 | - zookeeper 16 | 17 | zookeeper: 18 | build: docker/zookeeper 19 | expose: 20 | - "2181" 21 | -------------------------------------------------------------------------------- /docker/itest_0.8.2/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM docker-dev.yelpcorp.com/trusty_yelp 2 | MAINTAINER Team Distributed Systems 3 | 4 | RUN apt-get update 5 | 6 | # We need to install Java and Kafka in order to use Kafka CLI. The Kafka server 7 | # will never run in this container; the Kafka server will run in the "kafka" 8 | # container. 9 | RUN apt-get update && apt-get install -y \ 10 | java-8u20-oracle \ 11 | confluent-kafka=0.8.2.0-1 12 | 13 | ENV JAVA_HOME="/usr/lib/jvm/java-8-oracle-1.8.0.20/" 14 | ENV PATH="$PATH:$JAVA_HOME/bin" 15 | 16 | RUN apt-get install -y python \ 17 | python2.7 \ 18 | python3.5 \ 19 | python-pkg-resources \ 20 | python-pip \ 21 | python-setuptools \ 22 | python-virtualenv \ 23 | python-tox2 24 | 25 | COPY run_tests.sh /scripts/run_tests.sh 26 | RUN chmod 755 /scripts/run_tests.sh 27 | 28 | WORKDIR /work 29 | -------------------------------------------------------------------------------- /docker/itest_0.8.2/run_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | function do_at_exit { 6 | exit_status=$? 7 | rm -rf build/ dist/ yelp_kafka.egg-info/ 8 | rm -rf .tox/log .tox/dist 9 | find . -name '*.pyc' -delete 10 | find . -name '__pycache__' -delete 11 | exit $exit_status 12 | } 13 | 14 | # Clean up artifacts from tests 15 | trap do_at_exit EXIT INT TERM 16 | 17 | tox2 -e integration-py27 -e integration-py35 18 | -------------------------------------------------------------------------------- /docker/itest_0.9.0/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM docker-dev.yelpcorp.com/trusty_yelp 2 | MAINTAINER Team Distributed Systems 3 | 4 | # We need to install Java and Kafka in order to use Kafka CLI. The Kafka server 5 | # will never run in this container; the Kafka server will run in the "kafka" 6 | # container. 7 | RUN apt-get update && apt-get install -y \ 8 | java-8u20-oracle \ 9 | confluent-kafka=0.9.0.0-1 10 | 11 | ENV JAVA_HOME="/usr/lib/jvm/java-8-oracle-1.8.0.20/" 12 | ENV PATH="$PATH:$JAVA_HOME/bin" 13 | 14 | RUN apt-get install -y python \ 15 | python2.7 \ 16 | python3.5 \ 17 | python-pkg-resources \ 18 | python-pip \ 19 | python-setuptools \ 20 | python-virtualenv \ 21 | python-tox2 22 | 23 | COPY run_tests.sh /scripts/run_tests.sh 24 | RUN chmod 755 /scripts/run_tests.sh 25 | 26 | WORKDIR /work 27 | -------------------------------------------------------------------------------- /docker/itest_0.9.0/run_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | function do_at_exit { 6 | exit_status=$? 7 | rm -rf build/ dist/ yelp_kafka.egg-info/ 8 | rm -rf .tox/log .tox/dist 9 | find . -name '*.pyc' -delete 10 | find . -name '__pycache__' -delete 11 | exit $exit_status 12 | } 13 | 14 | # Clean up artifacts from tests 15 | trap do_at_exit EXIT INT TERM 16 | 17 | tox2 -e integration-py27 -e integration-py35 18 | -------------------------------------------------------------------------------- /docker/kafka_0.8.2/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM docker-dev.yelpcorp.com/trusty_yelp 2 | MAINTAINER Team Distributed Systems 3 | 4 | RUN apt-get update && apt-get -y install java-8u20-oracle confluent-kafka=0.8.2.0-1 5 | ENV JAVA_HOME="/usr/lib/jvm/java-8-oracle-1.8.0.20/" 6 | 7 | ADD config.properties /server.properties 8 | 9 | CMD echo "Kafka starting" && /usr/bin/kafka-server-start /server.properties 10 | -------------------------------------------------------------------------------- /docker/kafka_0.8.2/config.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # see kafka.server.KafkaConfig for additional details and defaults 16 | 17 | ############################# Server Basics ############################# 18 | 19 | # The id of the broker. This must be set to a unique integer for each broker. 20 | broker.id=0 21 | 22 | ############################# Socket Server Settings ############################# 23 | 24 | # The port the socket server listens on 25 | port=9092 26 | 27 | # Hostname the broker will bind to. If not set, the server will bind to all interfaces 28 | #host.name=localhost 29 | 30 | # Hostname the broker will advertise to producers and consumers. If not set, it uses the 31 | # value for "host.name" if configured. Otherwise, it will use the value returned from 32 | # java.net.InetAddress.getCanonicalHostName(). 33 | #advertised.host.name= 34 | 35 | # The port to publish to ZooKeeper for clients to use. If this is not set, 36 | # it will publish the same port that the broker binds to. 37 | #advertised.port= 38 | 39 | # The number of threads handling network requests 40 | num.network.threads=3 41 | 42 | # The number of threads doing disk I/O 43 | num.io.threads=8 44 | 45 | # The send buffer (SO_SNDBUF) used by the socket server 46 | socket.send.buffer.bytes=102400 47 | 48 | # The receive buffer (SO_RCVBUF) used by the socket server 49 | socket.receive.buffer.bytes=102400 50 | 51 | # The maximum size of a request that the socket server will accept (protection against OOM) 52 | socket.request.max.bytes=104857600 53 | 54 | 55 | ############################# Log Basics ############################# 56 | 57 | # A comma seperated list of directories under which to store log files 58 | log.dirs=/var/lib/kafka 59 | 60 | # The default number of log partitions per topic. More partitions allow greater 61 | # parallelism for consumption, but this will also result in more files across 62 | # the brokers. 63 | num.partitions=1 64 | 65 | # The number of threads per data directory to be used for log recovery at startup and flushing at shutdown. 66 | # This value is recommended to be increased for installations with data dirs located in RAID array. 67 | num.recovery.threads.per.data.dir=1 68 | 69 | ############################# Log Flush Policy ############################# 70 | 71 | # Messages are immediately written to the filesystem but by default we only fsync() to sync 72 | # the OS cache lazily. The following configurations control the flush of data to disk. 73 | # There are a few important trade-offs here: 74 | # 1. Durability: Unflushed data may be lost if you are not using replication. 75 | # 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. 76 | # 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. 77 | # The settings below allow one to configure the flush policy to flush data after a period of time or 78 | # every N messages (or both). This can be done globally and overridden on a per-topic basis. 79 | 80 | # The number of messages to accept before forcing a flush of data to disk 81 | #log.flush.interval.messages=10000 82 | 83 | # The maximum amount of time a message can sit in a log before we force a flush 84 | #log.flush.interval.ms=1000 85 | 86 | ############################# Log Retention Policy ############################# 87 | 88 | # The following configurations control the disposal of log segments. The policy can 89 | # be set to delete segments after a period of time, or after a given size has accumulated. 90 | # A segment will be deleted whenever *either* of these criteria are met. Deletion always happens 91 | # from the end of the log. 92 | 93 | # The minimum age of a log file to be eligible for deletion 94 | log.retention.hours=168 95 | 96 | # A size-based retention policy for logs. Segments are pruned from the log as long as the remaining 97 | # segments don't drop below log.retention.bytes. 98 | #log.retention.bytes=1073741824 99 | 100 | # The maximum size of a log segment file. When this size is reached a new log segment will be created. 101 | log.segment.bytes=1073741824 102 | 103 | # The interval at which log segments are checked to see if they can be deleted according 104 | # to the retention policies 105 | log.retention.check.interval.ms=300000 106 | 107 | # By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. 108 | # If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. 109 | log.cleaner.enable=false 110 | 111 | ############################# Zookeeper ############################# 112 | 113 | # Zookeeper connection string (see zookeeper docs for details). 114 | # This is a comma separated host:port pairs, each corresponding to a zk 115 | # server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". 116 | # You can also append an optional chroot string to the urls to specify the 117 | # root directory for all kafka znodes. 118 | zookeeper.connect=zookeeper:2181 119 | 120 | # Timeout in ms for connecting to zookeeper 121 | zookeeper.connection.timeout.ms=6000 122 | -------------------------------------------------------------------------------- /docker/kafka_0.9.0/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM docker-dev.yelpcorp.com/trusty_yelp 2 | MAINTAINER Team Distributed Systems 3 | 4 | RUN apt-get update && apt-get -y install java-8u20-oracle confluent-kafka=0.9.0.0-1 5 | ENV JAVA_HOME="/usr/lib/jvm/java-8-oracle-1.8.0.20/" 6 | 7 | ADD config.properties /server.properties 8 | 9 | CMD echo "Kafka starting" && /usr/bin/kafka-server-start /server.properties 10 | -------------------------------------------------------------------------------- /docker/kafka_0.9.0/config.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # see kafka.server.KafkaConfig for additional details and defaults 16 | 17 | ############################# Server Basics ############################# 18 | 19 | # The id of the broker. This must be set to a unique integer for each broker. 20 | broker.id=0 21 | 22 | ############################# Socket Server Settings ############################# 23 | 24 | # The port the socket server listens on 25 | port=9092 26 | 27 | # Hostname the broker will bind to. If not set, the server will bind to all interfaces 28 | #host.name=localhost 29 | 30 | # Hostname the broker will advertise to producers and consumers. If not set, it uses the 31 | # value for "host.name" if configured. Otherwise, it will use the value returned from 32 | # java.net.InetAddress.getCanonicalHostName(). 33 | #advertised.host.name= 34 | 35 | # The port to publish to ZooKeeper for clients to use. If this is not set, 36 | # it will publish the same port that the broker binds to. 37 | #advertised.port= 38 | 39 | # The number of threads handling network requests 40 | num.network.threads=3 41 | 42 | # The number of threads doing disk I/O 43 | num.io.threads=8 44 | 45 | # The send buffer (SO_SNDBUF) used by the socket server 46 | socket.send.buffer.bytes=102400 47 | 48 | # The receive buffer (SO_RCVBUF) used by the socket server 49 | socket.receive.buffer.bytes=102400 50 | 51 | # The maximum size of a request that the socket server will accept (protection against OOM) 52 | socket.request.max.bytes=104857600 53 | 54 | 55 | ############################# Log Basics ############################# 56 | 57 | # A comma seperated list of directories under which to store log files 58 | log.dirs=/var/lib/kafka 59 | 60 | # The default number of log partitions per topic. More partitions allow greater 61 | # parallelism for consumption, but this will also result in more files across 62 | # the brokers. 63 | num.partitions=1 64 | 65 | # The number of threads per data directory to be used for log recovery at startup and flushing at shutdown. 66 | # This value is recommended to be increased for installations with data dirs located in RAID array. 67 | num.recovery.threads.per.data.dir=1 68 | 69 | ############################# Log Flush Policy ############################# 70 | 71 | # Messages are immediately written to the filesystem but by default we only fsync() to sync 72 | # the OS cache lazily. The following configurations control the flush of data to disk. 73 | # There are a few important trade-offs here: 74 | # 1. Durability: Unflushed data may be lost if you are not using replication. 75 | # 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. 76 | # 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. 77 | # The settings below allow one to configure the flush policy to flush data after a period of time or 78 | # every N messages (or both). This can be done globally and overridden on a per-topic basis. 79 | 80 | # The number of messages to accept before forcing a flush of data to disk 81 | #log.flush.interval.messages=10000 82 | 83 | # The maximum amount of time a message can sit in a log before we force a flush 84 | #log.flush.interval.ms=1000 85 | 86 | ############################# Log Retention Policy ############################# 87 | 88 | # The following configurations control the disposal of log segments. The policy can 89 | # be set to delete segments after a period of time, or after a given size has accumulated. 90 | # A segment will be deleted whenever *either* of these criteria are met. Deletion always happens 91 | # from the end of the log. 92 | 93 | # The minimum age of a log file to be eligible for deletion 94 | log.retention.hours=168 95 | 96 | # A size-based retention policy for logs. Segments are pruned from the log as long as the remaining 97 | # segments don't drop below log.retention.bytes. 98 | #log.retention.bytes=1073741824 99 | 100 | # The maximum size of a log segment file. When this size is reached a new log segment will be created. 101 | log.segment.bytes=1073741824 102 | 103 | # The interval at which log segments are checked to see if they can be deleted according 104 | # to the retention policies 105 | log.retention.check.interval.ms=300000 106 | 107 | # By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. 108 | # If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. 109 | log.cleaner.enable=false 110 | 111 | ############################# Zookeeper ############################# 112 | 113 | # Zookeeper connection string (see zookeeper docs for details). 114 | # This is a comma separated host:port pairs, each corresponding to a zk 115 | # server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". 116 | # You can also append an optional chroot string to the urls to specify the 117 | # root directory for all kafka znodes. 118 | zookeeper.connect=zookeeper:2181 119 | 120 | # Timeout in ms for connecting to zookeeper 121 | zookeeper.connection.timeout.ms=6000 122 | -------------------------------------------------------------------------------- /docker/zookeeper/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM docker-dev.yelpcorp.com/trusty_yelp 2 | MAINTAINER Team Distributed Systems 3 | 4 | RUN apt-get update && apt-get -y install zookeeper 5 | 6 | CMD /usr/share/zookeeper/bin/zkServer.sh start-foreground 7 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = build 9 | 10 | # Internal variables. 11 | PAPEROPT_a4 = -D latex_paper_size=a4 12 | PAPEROPT_letter = -D latex_paper_size=letter 13 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 14 | 15 | .PHONY: help clean html dirhtml pickle json htmlhelp qthelp latex changes linkcheck doctest 16 | 17 | help: 18 | @echo "Please use \`make ' where is one of" 19 | @echo " html to make standalone HTML files" 20 | @echo " dirhtml to make HTML files named index.html in directories" 21 | @echo " pickle to make pickle files" 22 | @echo " json to make JSON files" 23 | @echo " htmlhelp to make HTML files and a HTML help project" 24 | @echo " qthelp to make HTML files and a qthelp project" 25 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 26 | @echo " changes to make an overview of all changed/added/deprecated items" 27 | @echo " linkcheck to check all external links for integrity" 28 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 29 | 30 | clean: 31 | -rm -rf $(BUILDDIR)/* 32 | 33 | html: 34 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 35 | @echo 36 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 37 | 38 | dirhtml: 39 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 40 | @echo 41 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 42 | 43 | pickle: 44 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 45 | @echo 46 | @echo "Build finished; now you can process the pickle files." 47 | 48 | json: 49 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 50 | @echo 51 | @echo "Build finished; now you can process the JSON files." 52 | 53 | htmlhelp: 54 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 55 | @echo 56 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 57 | ".hhp project file in $(BUILDDIR)/htmlhelp." 58 | 59 | qthelp: 60 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 61 | @echo 62 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 63 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 64 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/YelpKafka.qhcp" 65 | @echo "To view the help file:" 66 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/YelpKafka.qhc" 67 | 68 | latex: 69 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 70 | @echo 71 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 72 | @echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \ 73 | "run these through (pdf)latex." 74 | 75 | changes: 76 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 77 | @echo 78 | @echo "The overview file is in $(BUILDDIR)/changes." 79 | 80 | linkcheck: 81 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 82 | @echo 83 | @echo "Link check complete; look for any errors in the above output " \ 84 | "or in $(BUILDDIR)/linkcheck/output.txt." 85 | 86 | doctest: 87 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 88 | @echo "Testing of doctests in the sources finished, look at the " \ 89 | "results in $(BUILDDIR)/doctest/output.txt." 90 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # -*- coding: utf-8 -*- 16 | # 17 | # Yelp Kafka documentation build configuration file, created by 18 | # sphinx-quickstart on Thu Jan 22 14:07:09 2015. 19 | # 20 | # This file is execfile()d with the current directory set to its containing dir. 21 | # 22 | # Note that not all possible configuration values are present in this 23 | # autogenerated file. 24 | # 25 | # All configuration values have a default; values that are commented out 26 | # serve to show the default. 27 | from __future__ import absolute_import 28 | from __future__ import unicode_literals 29 | 30 | import sphinx_rtd_theme 31 | 32 | # If extensions (or modules to document with autodoc) are in another directory, 33 | # add these directories to sys.path here. If the directory is relative to the 34 | # documentation root, use os.path.abspath to make it absolute, like shown here. 35 | # sys.path.append(os.path.abspath('.')) 36 | 37 | # -- General configuration ----------------------------------------------------- 38 | 39 | # Add any Sphinx extension module names here, as strings. They can be extensions 40 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 41 | extensions = [ 42 | 'sphinx.ext.autodoc', 43 | 'sphinx.ext.doctest', 44 | 'sphinx.ext.intersphinx', 45 | 'sphinx.ext.todo', 46 | 'sphinx.ext.coverage', 47 | 'sphinx.ext.viewcode' 48 | ] 49 | 50 | # Add any paths that contain templates here, relative to this directory. 51 | templates_path = ['_templates'] 52 | 53 | # The suffix of source filenames. 54 | source_suffix = '.rst' 55 | 56 | # The encoding of source files. 57 | # source_encoding = 'utf-8' 58 | 59 | # The master toctree document. 60 | master_doc = 'index' 61 | 62 | # General information about the project. 63 | project = u'Yelp Kafka' 64 | copyright = u'2014-2015, Tools Infra' 65 | 66 | # The version info for the project you're documenting, acts as replacement for 67 | # |version| and |release|, also used in various other places throughout the 68 | # built documents. 69 | # 70 | # The short X.Y version. 71 | # The full version, including alpha/beta/rc tags. 72 | from yelp_kafka import __version__ 73 | version = release = __version__ 74 | 75 | # The language for content autogenerated by Sphinx. Refer to documentation 76 | # for a list of supported languages. 77 | # language = None 78 | 79 | # There are two options for replacing |today|: either, you set today to some 80 | # non-false value, then it is used: 81 | # today = '' 82 | # Else, today_fmt is used as the format for a strftime call. 83 | # today_fmt = '%B %d, %Y' 84 | 85 | # List of documents that shouldn't be included in the build. 86 | # unused_docs = [] 87 | 88 | # List of directories, relative to source directory, that shouldn't be searched 89 | # for source files. 90 | exclude_trees = ['build'] 91 | 92 | # The reST default role (used for this markup: `text`) to use for all documents. 93 | # default_role = None 94 | 95 | # If true, '()' will be appended to :func: etc. cross-reference text. 96 | add_function_parentheses = True 97 | 98 | # If true, the current module name will be prepended to all description 99 | # unit titles (such as .. function::). 100 | # add_module_names = True 101 | 102 | # If true, sectionauthor and moduleauthor directives will be shown in the 103 | # output. They are ignored by default. 104 | # show_authors = False 105 | 106 | # The name of the Pygments (syntax highlighting) style to use. 107 | pygments_style = 'sphinx' 108 | 109 | # A list of ignored prefixes for module index sorting. 110 | # modindex_common_prefix = [] 111 | 112 | 113 | # -- Options for HTML output --------------------------------------------------- 114 | 115 | # The theme to use for HTML and HTML Help pages. Major themes that come with 116 | # Sphinx are currently 'default' and 'sphinxdoc'. 117 | html_theme = 'sphinx_rtd_theme' 118 | 119 | # Theme options are theme-specific and customize the look and feel of a theme 120 | # further. For a list of options available for each theme, see the 121 | # documentation. 122 | # html_theme_options = {} 123 | 124 | # Add any paths that contain custom themes here, relative to this directory. 125 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] 126 | 127 | # The name for this set of Sphinx documents. If None, it defaults to 128 | # " v documentation". 129 | # html_title = None 130 | 131 | # A shorter title for the navigation bar. Default is the same as html_title. 132 | # html_short_title = None 133 | 134 | # The name of an image file (relative to this directory) to place at the top 135 | # of the sidebar. 136 | # html_logo = None 137 | 138 | # The name of an image file (within the static path) to use as favicon of the 139 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 140 | # pixels large. 141 | # html_favicon = None 142 | 143 | # Add any paths that contain custom static files (such as style sheets) here, 144 | # relative to this directory. They are copied after the builtin static files, 145 | # so a file named "default.css" will overwrite the builtin "default.css". 146 | html_static_path = ['_static'] 147 | 148 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 149 | # using the given strftime format. 150 | # html_last_updated_fmt = '%b %d, %Y' 151 | 152 | # If true, SmartyPants will be used to convert quotes and dashes to 153 | # typographically correct entities. 154 | # html_use_smartypants = True 155 | 156 | # Custom sidebar templates, maps document names to template names. 157 | html_sidebars = {'**': ['globaltoc.html', 'relations.html', 'sourcelink.html', 'searchbox.html'], } 158 | 159 | # Additional templates that should be rendered to pages, maps page names to 160 | # template names. 161 | # html_additional_pages = {} 162 | 163 | # If false, no module index is generated. 164 | # html_use_modindex = True 165 | 166 | # If false, no index is generated. 167 | # html_use_index = True 168 | 169 | # If true, the index is split into individual pages for each letter. 170 | # html_split_index = False 171 | 172 | # If true, links to the reST sources are added to the pages. 173 | # html_show_sourcelink = True 174 | 175 | # If true, an OpenSearch description file will be output, and all pages will 176 | # contain a tag referring to it. The value of this option must be the 177 | # base URL from which the finished HTML is served. 178 | # html_use_opensearch = '' 179 | 180 | # If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml"). 181 | # html_file_suffix = '' 182 | 183 | # Output file base name for HTML help builder. 184 | htmlhelp_basename = 'yelpkafkadoc' 185 | 186 | 187 | # -- Options for LaTeX output -------------------------------------------------- 188 | 189 | # The paper size ('letter' or 'a4'). 190 | # latex_paper_size = 'letter' 191 | 192 | # The font size ('10pt', '11pt' or '12pt'). 193 | # latex_font_size = '10pt' 194 | 195 | # Grouping the document tree into LaTeX files. List of tuples 196 | # (source start file, target name, title, author, documentclass [howto/manual]). 197 | latex_documents = [ 198 | ('index', 'YelpKafka.tex', u'Yelp Kafka Documentation', 199 | u'Tools Infra', 'manual'), 200 | ] 201 | 202 | # The name of an image file (relative to this directory) to place at the top of 203 | # the title page. 204 | # latex_logo = None 205 | 206 | # For "manual" documents, if this is true, then toplevel headings are parts, 207 | # not chapters. 208 | # latex_use_parts = False 209 | 210 | # Additional stuff for the LaTeX preamble. 211 | # latex_preamble = '' 212 | 213 | # Documents to append as an appendix to all manuals. 214 | # latex_appendices = [] 215 | 216 | # If false, no module index is generated. 217 | # latex_use_modindex = True 218 | 219 | 220 | # Example configuration for intersphinx: refer to the Python standard library. 221 | intersphinx_mapping = {'http://docs.python.org/': None} 222 | -------------------------------------------------------------------------------- /docs/source/config.rst: -------------------------------------------------------------------------------- 1 | .. _config: 2 | 3 | yelp_kafka.config 4 | ================= 5 | 6 | .. automodule:: yelp_kafka.config 7 | :members: 8 | :exclude-members: ClusterConfig 9 | 10 | .. autoclass:: yelp_kafka.config.ClusterConfig 11 | -------------------------------------------------------------------------------- /docs/source/consumer.rst: -------------------------------------------------------------------------------- 1 | .. _consumer: 2 | 3 | yelp_kafka.consumer 4 | =================== 5 | 6 | .. automodule:: yelp_kafka.consumer 7 | :members: 8 | :exclude-members: Message 9 | 10 | .. autoclass:: yelp_kafka.consumer.Message 11 | 12 | -------------------------------------------------------------------------------- /docs/source/consumer_group.rst: -------------------------------------------------------------------------------- 1 | .. _consumer_group: 2 | 3 | yelp_kafka.consumer_group 4 | ========================= 5 | 6 | .. automodule:: yelp_kafka.consumer_group 7 | :members: 8 | 9 | -------------------------------------------------------------------------------- /docs/source/discovery.rst: -------------------------------------------------------------------------------- 1 | .. _discovery: 2 | 3 | yelp_kafka.discovery 4 | ==================== 5 | Most of discovery functions are Yelp specific. Custom cluster configuration can be created by: 6 | 7 | .. code-block:: python 8 | 9 | from yelp_kafka.config import ClusterConfig 10 | cluster_config = ClusterConfig( 11 | type="service", 12 | name="cluster", 13 | broker_list=["cluster-elb-1:9092"], 14 | zookeeper="11.11.11.111:2181,11.11.11.112:2181,11.11.11.113:2181/kafka-1", 15 | ) 16 | 17 | 18 | .. automodule:: yelp_kafka.discovery 19 | :members: 20 | 21 | -------------------------------------------------------------------------------- /docs/source/error.rst: -------------------------------------------------------------------------------- 1 | .. _error: 2 | 3 | yelp_kafka.error 4 | ================ 5 | 6 | .. automodule:: yelp_kafka.error 7 | :members: 8 | 9 | -------------------------------------------------------------------------------- /docs/source/getting_started.rst: -------------------------------------------------------------------------------- 1 | Getting Started 2 | =============== 3 | 4 | 5 | .. _producer_example: 6 | 7 | Producer 8 | ^^^^^^^^ 9 | 10 | Create a producer for my_topic. 11 | 12 | .. code-block:: python 13 | 14 | from yelp_kafka import discovery 15 | from yelp_kafka.config import ClusterConfig 16 | from yelp_kafka.producer import YelpKafkaSimpleProducer 17 | from kafka import KafkaClient 18 | from kafka.common import ConsumerTimeout 19 | from kafka.common import FailedPayloadsError 20 | from kafka.common import KafkaUnavailableError 21 | from kafka.common import LeaderNotAvailableError 22 | from kafka.common import NotLeaderForPartitionError 23 | from kafka.common import UnknownTopicOrPartitionError 24 | 25 | # Cluster configuration 26 | cluster_config = ClusterConfig( 27 | type="service", 28 | name="cluster", 29 | broker_list=["cluster-elb-1:9092"], 30 | zookeeper="11.11.11.111:2181,11.11.11.112:2181,11.11.11.113:2181/kafka-1", 31 | ) 32 | # Create a kafka Client 33 | client = KafkaClient(cluster_config.broker_list, client_id='my-client-id') 34 | # Create the producer and send 2 messages 35 | producer = YelpKafkaSimpleProducer( 36 | client=client, 37 | cluster_config=cluster_config, 38 | report_metrics=True, 39 | ) 40 | try: 41 | producer.send_messages("my_topic", "message1", "message2") 42 | except ( 43 | FailedPayloadsError, 44 | KafkaUnavailableError, 45 | LeaderNotAvailableError, 46 | NotLeaderForPartitionError, 47 | UnknownTopicOrPartitionError, 48 | ): 49 | # Usually we want to retry a certain number of times when encountering these exceptions 50 | pass 51 | 52 | 53 | 54 | This example makes use of the `YelpKafkaSimpleProducer`_ class from yelp_kafka. 55 | 56 | ``client_id`` identifies the client connection in Kafka and it is used by Kafka 0.9.0 to enforce 57 | quota limit per client. We recommend to use a ``client_id`` that represents the application. 58 | 59 | In the example there are some exceptions that usually should be safe to just retry. 60 | 61 | ``KafkaUnavailableError`` can happen when the metadata request to Kafka fails, this 62 | request is broker unaware so a simple retry would pick another broker of the cluster and possibly succeed. 63 | 64 | ``LeaderNotAvailableError`` and ``NotLeaderForPartitionError`` may happen during a cluster 65 | rolling restart or upon broker failure. In this case a new leader will be elected, kafka-python 66 | by default refreshes the metadata when encountering these errors, thus upon retry it would 67 | hopefully use a new leader and succeed. However, Kafka doesn't give us any guarantee on how quickly 68 | a new leader will be elected. We measured that for small clusters the elections happens in the order 69 | of hundreds of ms but for large clusters it can take up to several seconds. 70 | Usually an application should retry for a limited amount of time and then consider the request failed and react accordingly. 71 | 72 | Finally, ``FailedPayloadsError`` may happen in many cases, for example when a leader is missing 73 | or the connection fails in the middle of a request. Metadata is automatically refreshed for this exception as well. 74 | 75 | .. seealso:: kafka-python `usage examples`_ 76 | 77 | .. _usage examples: http://kafka-python.readthedocs.org/en/v0.9.5/usage.html 78 | .. _SimpleProducer: http://kafka-python.readthedocs.org/en/v0.9.5/apidoc/kafka.producer.html 79 | 80 | .. _consumer_group_example: 81 | 82 | Consumer 83 | ^^^^^^^^ 84 | 85 | .. code-block:: python 86 | 87 | from yelp_kafka import discovery 88 | from yelp_kafka.consumer_group import KafkaConsumerGroup 89 | from yelp_kafka.config import ClusterConfig 90 | from yelp_kafka.config import KafkaConsumerConfig 91 | from yelp_kafka.error import PartitionerError 92 | from kafka.common import ConsumerTimeout 93 | from kafka.common import FailedPayloadsError 94 | from kafka.common import KafkaUnavailableError 95 | from kafka.common import LeaderNotAvailableError 96 | from kafka.common import NotLeaderForPartitionError 97 | 98 | # Cluster configuration 99 | cluster_config = ClusterConfig( 100 | type="service", 101 | name="cluster", 102 | broker_list=["cluster-elb-1:9092"], 103 | zookeeper="11.11.11.111:2181,11.11.11.112:2181,11.11.11.113:2181/kafka-1", 104 | ) 105 | config = KafkaConsumerConfig( 106 | 'my_group_id', 107 | cluster_config, 108 | auto_offset_reset='smallest', 109 | auto_commit_interval_ms=60000, # By default 60 seconds 110 | auto_commit_interval_messages=100, # By default 100 messages 111 | consumer_timeout_ms=100, # By default 100 ms 112 | ) 113 | 114 | consumer = KafkaConsumerGroup(['my_topic'], config) 115 | 116 | def consume_messages(consumer): 117 | while True: 118 | try: 119 | message = consumer.next() 120 | print message.value 121 | consumer.task_done(message) 122 | # If auto_commit is disabled in KafkaConsumerGroup, then you must call 123 | # consumer.commit() yourself. 124 | # 125 | # auto_commit is enabled by default, so here we are implicitly 126 | # letting KafkaConsumerGroup decide when to inform Kafka of our 127 | # completed messages. 128 | 129 | except ConsumerTimeout: 130 | # Applications usually just ignore the ConsumerTimeout 131 | # exception or check a termination flag. 132 | pass 133 | except (FailedPayloadsError, KafkaUnavailableError, LeaderNotAvailableError, NotLeaderForPartitionError): 134 | # See producer example above, usually these exceptions should be retried 135 | 136 | while True: 137 | try: 138 | with consumer: 139 | consume_messages(consumer) 140 | except PartitionerError: 141 | # In this case we can't just retry, because the connection to zookeeper is lost. 142 | # We can either fail the application or re-initialize the consumer connection as 143 | # done in this example. 144 | pass 145 | 146 | See :ref:`producer_example` for more information about the exceptions to retry. 147 | See :ref:`consumer_group_example` for more information about using KafkaConsumerGroup. 148 | The ``group_id`` should represent the application/service the consumer belongs to. 149 | 150 | .. seealso:: :ref:`config` for all the available configuration options. 151 | 152 | 153 | .. note:: When bootstrapping a new consumer group it is usually recommended to set ``auto_offset_reset`` to **largest**. 154 | It assures that a huge amount of past messages are not consumed the first time a consumer is launched. 155 | ``auto_offset_reset`` should be set to **smallest** immediately after the first run (after the offsets are committed for the first time). 156 | When ``auto_offset_reset`` is set to **smallest** no messages are lost when adding new partitions. 157 | 158 | Create a consumer for all topics ending with mytopic: 159 | 160 | .. code-block:: python 161 | 162 | from yelp_kafka import discovery 163 | from yelp_kafka.config import ClusterConfig 164 | from yelp_kafka.config import KafkaConsumerConfig 165 | from kafka import KafkaConsumer 166 | 167 | # Cluster configuration 168 | cluster_config = ClusterConfig( 169 | type="service", 170 | name="cluster", 171 | broker_list=["cluster-elb-1:9092"], 172 | zookeeper="11.11.11.111:2181,11.11.11.112:2181,11.11.11.113:2181/kafka-1", 173 | ) 174 | topics, cluster = discovery.search_topics_by_regex('.*mytopic', [cluster_config]) 175 | config = KafkaConsumerConfig(group_id='my_app', cluster=cluster, client_id='my-consumer') 176 | consumer = KafkaConsumer(topics, **config.get_kafka_consumer_config()) 177 | for message in consumer: 178 | print message 179 | 180 | This example makes use of the `KafkaConsumer`_ from kafka-python. This consumer 181 | class should be considered deprecated and should not be used anymore. 182 | 183 | .. _KafkaConsumer: http://kafka-python.readthedocs.org/en/v0.9.5/apidoc/kafka.consumer.html#module-kafka.consumer.kafka 184 | 185 | 186 | Reporting Metrics 187 | ^^^^^^^^^^^^^^^^^ 188 | 189 | If you're using :py:class:`yelp_kafka.consumer_group.KafkaConsumerGroup`, you 190 | can send metrics on request latency and error counts. This is on by default 191 | for yelp_kafka and uses an instance of 192 | :py:class:`yelp_kafka.metrics_responder.MetricsResponder` for reporting metrics 193 | 194 | Reporting metrics directly from the kafka client is an option that is only 195 | available in Yelp's fork of kafka-python: https://github.com/Yelp/kafka-python 196 | 197 | Producer metrics can also be reported and are reported by default by the YelpKafkaSimpleProducer 198 | through the `report_metrics` parameter. This defaults to True but can be turned off 199 | 200 | 201 | If you want to plug in your own metric responder module, please use 202 | :py:class:`yelp_kafka.metrics_responder.MetricsResponder` and pass it in 203 | :py:class:`yelp_kafka.producer.YelpKafkaSimpleProducer` or 204 | :py:class:`yelp_kafka.producer.YelpKafkaKeyedProducer` or 205 | :py:class:`yelp_kafka.consumer_group.KafkaConsumerGroup`. 206 | 207 | 208 | Other consumer groups 209 | ^^^^^^^^^^^^^^^^^^^^^ 210 | 211 | Yelp_Kafka currently provides three *consumer group* interfaces for consuming 212 | from Kafka. 213 | 214 | - :py:class:`yelp_kafka.consumer_group.KafkaConsumerGroup` is the recommended 215 | class to use if you want start multiple instances of your consumer. You may 216 | start as many instances as you wish (balancing partitions will happen 217 | automatically), and you can control when to mark messages as processed (via 218 | `task_done` and `commit`). 219 | 220 | - :py:class:`yelp_kafka.consumer_group.MultiprocessingConsumerGroup` is for 221 | consuming from high volume topics since it starts as many consumer processes as topic 222 | partitions. It also handles process monitoring and restart upon failures. 223 | 224 | - :py:class:`yelp_kafka.consumer_group.ConsumerGroup` provides the same set of 225 | features as KafkaConsumerGroup, but with a less convenient interface. 226 | This class is considered deprecated. 227 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | Yelp_Kafka v\ |version| 2 | *********************** 3 | 4 | Yelp_Kafka is a library to interact with Kafka. Before reading about Yelp_Kafka, you should know the basics of Kafka. If topic and topic partition are obscure concepts to you, we recommend you to read the introduction of `Kafka documentation`_. 5 | Yelp_Kafka is a wrapper around kafka-python that provides some Yelp specific functions for cluster discovery in addition to custom consumers and producers. 6 | Yelp_Kafka supports consumer groups and multiprocessing consumer groups, that allow multiple 7 | consumer instances to coordinate with each other while consuming messages from different Kafka partitions (see :ref:`consumer_group`). 8 | 9 | .. _Kafka documentation: http://kafka.apache.org/documentation.html#introduction 10 | 11 | .. toctree:: 12 | :maxdepth: -1 13 | 14 | self 15 | getting_started 16 | discovery 17 | config 18 | producer 19 | consumer 20 | partitioner 21 | consumer_group 22 | error 23 | utils 24 | monitoring 25 | offsets 26 | 27 | 28 | Indices and tables 29 | ================== 30 | 31 | * :ref:`genindex` 32 | * :ref:`modindex` 33 | * :ref:`search` 34 | 35 | -------------------------------------------------------------------------------- /docs/source/monitoring.rst: -------------------------------------------------------------------------------- 1 | .. _monitoring: 2 | 3 | yelp_kafka.monitoring 4 | ===================== 5 | 6 | .. automodule:: yelp_kafka.monitoring 7 | :members: 8 | 9 | -------------------------------------------------------------------------------- /docs/source/offsets.rst: -------------------------------------------------------------------------------- 1 | .. _offsets: 2 | 3 | yelp_kafka.offsets 4 | ================== 5 | 6 | .. automodule:: yelp_kafka.offsets 7 | :members: 8 | -------------------------------------------------------------------------------- /docs/source/partitioner.rst: -------------------------------------------------------------------------------- 1 | .. _partitioner: 2 | 3 | yelp_kafka.partitioner 4 | ====================== 5 | 6 | .. automodule:: yelp_kafka.partitioner 7 | :members: 8 | 9 | -------------------------------------------------------------------------------- /docs/source/producer.rst: -------------------------------------------------------------------------------- 1 | .. _producer: 2 | 3 | yelp_kafka.producer 4 | =================== 5 | 6 | .. automodule:: yelp_kafka.producer 7 | :members: 8 | :exclude-members: YelpKafkaProducerMetrics 9 | -------------------------------------------------------------------------------- /docs/source/utils.rst: -------------------------------------------------------------------------------- 1 | .. _utils: 2 | 3 | yelp_kafka.utils 4 | ================ 5 | 6 | .. automodule:: yelp_kafka.utils 7 | :members: 8 | 9 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [wheel] 2 | universal = True 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | from __future__ import absolute_import 16 | from __future__ import unicode_literals 17 | 18 | import sys 19 | 20 | from setuptools import find_packages 21 | from setuptools import setup 22 | from setuptools.command.test import test as TestCommand 23 | 24 | import yelp_kafka 25 | 26 | 27 | class Tox(TestCommand): 28 | 29 | def finalize_options(self): 30 | TestCommand.finalize_options(self) 31 | self.test_args = [] 32 | self.test_suite = True 33 | 34 | def run_tests(self): 35 | # import here, cause outside the eggs aren't loaded 36 | import tox 37 | errno = tox.cmdline(self.test_args) 38 | sys.exit(errno) 39 | 40 | 41 | class Coverage(Tox): 42 | 43 | def finalize_options(self): 44 | TestCommand.finalize_options(self) 45 | self.test_args = ['-e', 'coverage'] 46 | self.test_suite = True 47 | 48 | 49 | setup( 50 | name='yelp_kafka', 51 | version=yelp_kafka.__version__, 52 | author='Tools-Infra Team', 53 | author_email='tools-infra@yelp.com', 54 | license='Copyright Yelp 2014, All Rights Reserved', 55 | url="http://servicedocs.yelpcorp.com/docs/yelp_kafka/index.html", 56 | description='A library to interact with Apache Kafka at Yelp', 57 | packages=find_packages(exclude=["tests*"]), 58 | install_requires=[ 59 | 'bravado', 60 | 'kafka-python<1.0.0', 61 | 'kazoo>=2.0.post2', 62 | 'PyYAML>=3.10', 63 | 'py_zipkin', 64 | 'setproctitle>=1.1.8', 65 | 'simplejson', 66 | 'six', 67 | 'swagger_zipkin', 68 | 'retrying', 69 | ], 70 | extras_require={ 71 | 'internal': ['yelp_meteorite', 'bravado_decorators>=0.10.0'] 72 | }, 73 | cmdclass={ 74 | 'test': Tox, 75 | 'coverage': Coverage 76 | }, 77 | classifiers=[ 78 | "Programming Language :: Python", 79 | "Programming Language :: Python :: 2.7", 80 | "Programming Language :: Python :: 3.5", 81 | "Intended Audience :: Developers", 82 | ], 83 | ) 84 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | from __future__ import absolute_import 16 | from __future__ import unicode_literals 17 | 18 | import logging 19 | 20 | logging.disable(logging.CRITICAL) 21 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | from __future__ import absolute_import 16 | from __future__ import unicode_literals 17 | 18 | import mock 19 | import pytest 20 | 21 | from yelp_kafka.config import ClusterConfig 22 | from yelp_kafka.config import KafkaConsumerConfig 23 | 24 | 25 | MOCK_SERVICES_YAML = { 26 | 'service1.main': {'host': 'host1', 'port': 1111}, 27 | 'kafka_discovery.main': {'host': 'host2', 'port': 2222} 28 | } 29 | 30 | 31 | @pytest.yield_fixture 32 | def mock_swagger_yaml(): 33 | with mock.patch( 34 | 'yelp_kafka.config.load_yaml_config', 35 | return_value=MOCK_SERVICES_YAML, 36 | create=True, 37 | ) as m: 38 | with mock.patch('os.path.isfile', return_value=True): 39 | yield m 40 | 41 | 42 | @pytest.fixture 43 | def cluster(): 44 | return ClusterConfig( 45 | 'cluster_type', 'mycluster', ['test_broker:9292'], 'test_cluster' 46 | ) 47 | 48 | 49 | @pytest.fixture 50 | def mock_pre_rebalance_cb(): 51 | return mock.Mock() 52 | 53 | 54 | @pytest.fixture 55 | def mock_post_rebalance_cb(): 56 | return mock.Mock() 57 | 58 | 59 | @pytest.fixture 60 | def config( 61 | cluster, 62 | mock_pre_rebalance_cb, 63 | mock_post_rebalance_cb 64 | ): 65 | return KafkaConsumerConfig( 66 | cluster=cluster, 67 | group_id='test_group', 68 | client_id='test_client_id', 69 | partitioner_cooldown=0.5, 70 | pre_rebalance_callback=mock_pre_rebalance_cb, 71 | post_rebalance_callback=mock_post_rebalance_cb 72 | ) 73 | -------------------------------------------------------------------------------- /tests/integration/conftest.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | from __future__ import absolute_import 16 | from __future__ import unicode_literals 17 | -------------------------------------------------------------------------------- /tests/integration/test_consumer.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | from __future__ import absolute_import 16 | from __future__ import unicode_literals 17 | 18 | import subprocess 19 | import time 20 | import uuid 21 | from multiprocessing import Process 22 | from multiprocessing import Queue 23 | 24 | from kafka import KafkaClient 25 | from kafka.common import ConsumerTimeout 26 | from six.moves.queue import Empty 27 | 28 | from yelp_kafka.config import ClusterConfig 29 | from yelp_kafka.config import KafkaConsumerConfig 30 | from yelp_kafka.consumer import KafkaSimpleConsumer 31 | from yelp_kafka.consumer_group import KafkaConsumerGroup 32 | from yelp_kafka.producer import YelpKafkaSimpleProducer 33 | 34 | 35 | ZOOKEEPER_URL = 'zookeeper:2181' 36 | KAFKA_URL = 'kafka:9092' 37 | 38 | 39 | def create_topic(topic_name, replication_factor, partitions): 40 | cmd = ['/usr/bin/kafka-topics', '--create', 41 | '--zookeeper', ZOOKEEPER_URL, 42 | '--replication-factor', str(replication_factor), 43 | '--partitions', str(partitions), 44 | '--topic', topic_name] 45 | subprocess.check_call(cmd) 46 | 47 | # It may take a little moment for the topic to be ready for writing. 48 | time.sleep(5) 49 | 50 | 51 | def create_random_topic(replication_factor, partitions): 52 | topic_name = str(uuid.uuid1()) 53 | create_topic(topic_name, replication_factor, partitions) 54 | return topic_name 55 | 56 | 57 | def test_simple_consumer(): 58 | topic = create_random_topic(1, 1) 59 | 60 | messages = [str(i).encode("UTF-8") for i in range(100)] 61 | 62 | cluster_config = ClusterConfig(None, None, [KAFKA_URL], ZOOKEEPER_URL) 63 | producer = YelpKafkaSimpleProducer( 64 | cluster_config=cluster_config, 65 | report_metrics=False, 66 | client=KafkaClient(KAFKA_URL), 67 | ) 68 | producer.send_messages(topic, *messages) 69 | 70 | config = KafkaConsumerConfig( 71 | 'test', 72 | cluster_config, 73 | auto_offset_reset='smallest', 74 | auto_commit=False, 75 | consumer_timeout_ms=1000 76 | ) 77 | consumer = KafkaSimpleConsumer(topic, config) 78 | 79 | with consumer: 80 | for expected_offset in range(100): 81 | message = consumer.get_message() 82 | assert message.offset == expected_offset 83 | assert message.partition == 0 84 | assert message.value == str(expected_offset).encode("UTF-8") 85 | 86 | 87 | def test_kafka_consumer_group_one_consumer_one_partition(): 88 | run_kafka_consumer_group_test(1, 1) 89 | 90 | 91 | def test_kafka_consumer_group_one_consumer_two_partitions(): 92 | run_kafka_consumer_group_test(1, 2) 93 | 94 | 95 | def test_kafka_consumer_group_two_consumers_one_partition(): 96 | run_kafka_consumer_group_test(2, 1) 97 | 98 | 99 | def test_kafka_consumer_group_two_consumers_two_partitions(): 100 | run_kafka_consumer_group_test(5, 5) 101 | 102 | 103 | def run_kafka_consumer_group_test(num_consumers, num_partitions): 104 | topic = create_random_topic(1, num_partitions) 105 | cluster_config = ClusterConfig(None, None, [KAFKA_URL], ZOOKEEPER_URL) 106 | config = KafkaConsumerConfig( 107 | 'test', 108 | cluster_config, 109 | auto_offset_reset='smallest', 110 | partitioner_cooldown=5, 111 | auto_commit_interval_messages=1, 112 | ) 113 | 114 | queue = Queue() 115 | 116 | def create_consumer(): 117 | def consume(): 118 | consumer = KafkaConsumerGroup([topic], config) 119 | with consumer: 120 | while True: 121 | try: 122 | message = consumer.next() 123 | queue.put(message) 124 | consumer.task_done(message) 125 | except ConsumerTimeout: 126 | return 127 | 128 | p = Process(target=consume) 129 | p.daemon = True 130 | return p 131 | 132 | consumer_processes = [create_consumer() for _ in range(num_consumers)] 133 | 134 | for consumer_process in consumer_processes: 135 | consumer_process.start() 136 | 137 | producer = YelpKafkaSimpleProducer( 138 | cluster_config=cluster_config, 139 | report_metrics=False, 140 | client=KafkaClient(KAFKA_URL), 141 | ) 142 | for i in range(100): 143 | producer.send_messages(topic, str(i).encode("UTF-8")) 144 | 145 | # wait until all 100 messages have been consumed 146 | while queue.qsize() < 100: 147 | time.sleep(0.1) 148 | 149 | received_messages = [] 150 | while True: 151 | try: 152 | message = queue.get(block=True, timeout=0.5) 153 | except Empty: 154 | break 155 | received_messages.append(int(message.value)) 156 | 157 | assert [i for i in range(100)] == sorted(received_messages) 158 | -------------------------------------------------------------------------------- /tests/test_consumer.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | from __future__ import absolute_import 16 | from __future__ import unicode_literals 17 | 18 | import contextlib 19 | 20 | import mock 21 | import pytest 22 | from kafka import KafkaClient 23 | from kafka.common import KafkaError 24 | from kafka.common import OffsetCommitRequest 25 | from setproctitle import getproctitle 26 | 27 | from yelp_kafka.config import KafkaConsumerConfig 28 | from yelp_kafka.consumer import KafkaConsumerBase 29 | from yelp_kafka.consumer import KafkaSimpleConsumer 30 | from yelp_kafka.consumer import Message 31 | from yelp_kafka.error import ProcessMessageError 32 | 33 | 34 | @contextlib.contextmanager 35 | def mock_kafka(): 36 | with mock.patch('yelp_kafka.consumer.KafkaClient', autospec=True) as mock_client: 37 | with mock.patch('yelp_kafka.consumer.SimpleConsumer', autospec=True) as mock_consumer: 38 | mock_consumer.return_value.auto_commit = True 39 | yield mock_client, mock_consumer 40 | 41 | 42 | class TestKafkaSimpleConsumer(object): 43 | 44 | @contextlib.contextmanager 45 | def mock_yelpkafka_consumer(self): 46 | with mock.patch.object(KafkaSimpleConsumer, "commit", autospec=True) as mock_commit: 47 | yield mock_commit 48 | 49 | def test_topic_error(self, config): 50 | with pytest.raises(TypeError): 51 | KafkaSimpleConsumer(['test_topic'], config) 52 | 53 | def test_partitions_error(self, config): 54 | with pytest.raises(TypeError): 55 | KafkaSimpleConsumer('test_topic', config, partitions='1') 56 | 57 | def test_connect(self, config): 58 | with mock_kafka() as (mock_client, mock_consumer): 59 | mock_client.return_value = mock.sentinel.client 60 | consumer = KafkaSimpleConsumer('test_topic', config) 61 | consumer.connect() 62 | mock_client.assert_called_once_with( 63 | ['test_broker:9292'], 64 | client_id='test_client_id' 65 | ) 66 | assert not mock_consumer.call_args[0] 67 | kwargs = mock_consumer.call_args[1] 68 | assert kwargs['topic'] == 'test_topic'.encode() 69 | assert kwargs['group'] == 'test_group'.encode() 70 | 71 | def test_get_message(self, config): 72 | with mock_kafka() as (_, mock_consumer): 73 | mock_obj = mock_consumer.return_value 74 | # get message should return a tuple (partition_id, (offset, 75 | # Message)). Message is a namedtuple defined in 76 | # kafka-python that at least contains key and value. 77 | mock_message = mock.Mock() 78 | mock_message.value = 'test_content' 79 | mock_message.key = 'test_key' 80 | kafka_message = (1, (12345, mock_message)) 81 | mock_obj.get_message.return_value = kafka_message 82 | # Set the current offset the offset of the message + 1 83 | mock_obj.offsets = {1: 12346} 84 | consumer = KafkaSimpleConsumer('test_topic', config) 85 | consumer.connect() 86 | assert consumer.get_message() == Message( 87 | partition=1, 88 | offset=12345, 89 | key='test_key', 90 | value='test_content', 91 | ) 92 | 93 | def test_close(self, config): 94 | with mock_kafka() as (mock_client, mock_consumer): 95 | with mock.patch.object( 96 | KafkaSimpleConsumer, 97 | 'commit', 98 | autospec=True, 99 | ) as mock_commit: 100 | consumer = KafkaSimpleConsumer('test_topic', config) 101 | consumer.connect() 102 | consumer.close() 103 | mock_commit.assert_called_once_with(consumer) 104 | mock_client.return_value.close.assert_called_once_with() 105 | 106 | def test_close_no_commit(self, cluster): 107 | config = KafkaConsumerConfig( 108 | cluster=cluster, 109 | group_id='test_group', 110 | client_id='test_client_id', 111 | auto_commit=False 112 | ) 113 | with mock_kafka() as (mock_client, mock_consumer): 114 | with mock.patch.object( 115 | KafkaSimpleConsumer, 116 | 'commit', 117 | autospec=True, 118 | ) as mock_commit: 119 | mock_obj = mock_consumer.return_value 120 | mock_obj.auto_commit = False 121 | consumer = KafkaSimpleConsumer('test_topic', config) 122 | consumer.connect() 123 | consumer.close() 124 | assert not mock_commit.called 125 | mock_client.return_value.close.assert_called_once_with() 126 | 127 | def test_commit_all_partittions(self, config): 128 | with mock_kafka() as (mock_client, mock_consumer): 129 | consumer = KafkaSimpleConsumer('test_topic', config) 130 | consumer.connect() 131 | consumer.commit() 132 | mock_consumer.return_value.commit.assert_called_once_with() 133 | 134 | def test_commit_few_partitions(self, config): 135 | with mock_kafka() as (mock_client, mock_consumer): 136 | consumer = KafkaSimpleConsumer('test_topic', config) 137 | consumer.connect() 138 | topic_partitions = ['partition1', 'partition2'] 139 | consumer.commit(topic_partitions) 140 | mock_consumer.return_value.commit.assert_called_once_with( 141 | topic_partitions, 142 | ) 143 | 144 | def test_commit_message_default(self, config): 145 | with mock_kafka() as (mock_client, mock_consumer): 146 | consumer = KafkaSimpleConsumer('test_topic', config) 147 | consumer.connect() 148 | 149 | actual = consumer.commit_message( 150 | Message(0, 100, 'mykey', 'myvalue'), 151 | ) 152 | 153 | assert actual is True 154 | mock_client.return_value.send_offset_commit_request \ 155 | .assert_called_once_with( 156 | 'test_group'.encode(), 157 | [OffsetCommitRequest('test_topic'.encode(), 0, 100, None)], 158 | ) 159 | 160 | def test_commit_message_zk(self, config): 161 | if getattr(KafkaClient, 'send_offset_commit_request_kafka', None) is None: 162 | return 163 | 164 | with mock_kafka() as (mock_client, mock_consumer): 165 | config._config['offset_storage'] = 'zookeeper' 166 | consumer = KafkaSimpleConsumer('test_topic', config) 167 | consumer.connect() 168 | 169 | actual = consumer.commit_message( 170 | Message(0, 100, 'mykey', 'myvalue'), 171 | ) 172 | 173 | assert actual is True 174 | mock_client.return_value.send_offset_commit_request \ 175 | .assert_called_once_with( 176 | 'test_group'.encode(), 177 | [OffsetCommitRequest('test_topic'.encode(), 0, 100, None)], 178 | ) 179 | 180 | def test_commit_message_kafka(self, config): 181 | if getattr(KafkaClient, 'send_offset_commit_request_kafka', None) is None: 182 | return 183 | 184 | with mock_kafka() as (mock_client, mock_consumer): 185 | config._config['offset_storage'] = 'kafka' 186 | consumer = KafkaSimpleConsumer('test_topic', config) 187 | consumer.connect() 188 | 189 | actual = consumer.commit_message( 190 | Message(0, 100, 'mykey', 'myvalue'), 191 | ) 192 | 193 | assert actual is True 194 | assert not mock_client.return_value.send_offset_commit_request.called 195 | mock_client.return_value.send_offset_commit_request_kafka \ 196 | .assert_called_once_with( 197 | 'test_group'.encode(), 198 | [OffsetCommitRequest('test_topic'.encode(), 0, 100, None)], 199 | ) 200 | 201 | def test_commit_message_dual(self, config): 202 | if getattr(KafkaClient, 'send_offset_commit_request_kafka', None) is None: 203 | return 204 | 205 | with mock_kafka() as (mock_client, mock_consumer): 206 | config._config['offset_storage'] = 'dual' 207 | consumer = KafkaSimpleConsumer('test_topic', config) 208 | consumer.connect() 209 | 210 | actual = consumer.commit_message( 211 | Message(0, 100, 'mykey', 'myvalue'), 212 | ) 213 | 214 | assert actual is True 215 | mock_client.return_value.send_offset_commit_request \ 216 | .assert_called_once_with( 217 | 'test_group'.encode(), 218 | [OffsetCommitRequest('test_topic'.encode(), 0, 100, None)], 219 | ) 220 | mock_client.return_value.send_offset_commit_request_kafka \ 221 | .assert_called_once_with( 222 | 'test_group'.encode(), 223 | [OffsetCommitRequest('test_topic'.encode(), 0, 100, None)], 224 | ) 225 | 226 | def test_commit_message_error(self, config): 227 | with mock_kafka() as (mock_client, mock_consumer): 228 | consumer = KafkaSimpleConsumer('test_topic', config) 229 | consumer.connect() 230 | mock_client.return_value.send_offset_commit_request \ 231 | .side_effect = KafkaError("Boom!") 232 | 233 | actual = consumer.commit_message( 234 | Message(0, 100, 'mykey', 'myvalue'), 235 | ) 236 | assert actual is False 237 | 238 | 239 | class TestKafkaConsumer(object): 240 | 241 | def test_run_and_terminate(self, config): 242 | message_iterator = iter([ 243 | Message(1, 12345, 'key1', 'value1'), 244 | Message(1, 12346, 'key2', 'value2'), 245 | Message(1, 12347, 'key1', 'value3'), 246 | ]) 247 | with mock_kafka() as (mock_client, mock_consumer): 248 | with mock.patch.object(KafkaSimpleConsumer, '__iter__', return_value=message_iterator): 249 | with mock.patch.object(KafkaSimpleConsumer, 'commit') as mock_commit: 250 | consumer = KafkaConsumerBase('test_topic', config) 251 | consumer.process = mock.Mock() 252 | consumer.initialize = mock.Mock() 253 | consumer.dispose = mock.Mock() 254 | consumer.terminate() 255 | consumer.run() 256 | assert consumer.initialize.call_count == 1 257 | # process should have been called 0 times 258 | # termination flag is checked before the first 259 | # message is pulled. 260 | assert consumer.process.call_count == 0 261 | # check just last call arguments 262 | consumer.process.calls_args_list([ 263 | Message(1, 12347, 'key1', 'value3'), 264 | Message(1, 12345, 'key1', 'value1'), 265 | Message(1, 12346, 'key2', 'value2'), 266 | ]) 267 | consumer.dispose.assert_called_once_with() 268 | mock_commit.assert_called_once_with() 269 | mock_client.return_value.close.assert_called_once_with() 270 | 271 | def test_process_error(self, config): 272 | message_iterator = iter([ 273 | Message(1, 12345, 'key1', 'value1'), 274 | Message(1, 12346, 'key2', 'value2'), 275 | Message(1, 12347, 'key1', 'value3'), 276 | ]) 277 | with mock_kafka() as (mock_client, _): 278 | with mock.patch.object( 279 | KafkaSimpleConsumer, 280 | '__iter__', 281 | return_value=message_iterator, 282 | ): 283 | consumer = KafkaConsumerBase('test_topic', config) 284 | consumer.process = mock.Mock(side_effect=Exception('Boom!')) 285 | consumer.initialize = mock.Mock() 286 | consumer.dispose = mock.Mock() 287 | with pytest.raises(ProcessMessageError): 288 | consumer.run() 289 | 290 | def test_set_process_name(self, config): 291 | consumer = KafkaConsumerBase( 292 | 'my_very_extraordinarily_elongated_topic_name', 293 | config, ['1', '2', '3', '4', '5']) 294 | with mock.patch( 295 | 'yelp_kafka.consumer.setproctitle', 296 | ) as mock_setproctitle: 297 | consumer.set_process_name() 298 | expected_name = \ 299 | '{procname}-my_very_extraordinarily_elongated_topic_name' \ 300 | '-{messages}'.format( 301 | procname=getproctitle(), 302 | messages=['1', '2', '3', '4', '5'], 303 | ) 304 | mock_setproctitle.assert_called_with(expected_name) 305 | -------------------------------------------------------------------------------- /tests/test_consumer_group.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | from __future__ import absolute_import 16 | from __future__ import unicode_literals 17 | 18 | import os 19 | import time 20 | from multiprocessing import Process 21 | 22 | import mock 23 | import pytest 24 | from kafka.common import ConsumerTimeout 25 | from kafka.common import KafkaUnavailableError 26 | 27 | from yelp_kafka.config import KafkaConsumerConfig 28 | from yelp_kafka.consumer_group import ConsumerGroup 29 | from yelp_kafka.consumer_group import KafkaConsumerGroup 30 | from yelp_kafka.consumer_group import MultiprocessingConsumerGroup 31 | from yelp_kafka.error import ConsumerGroupError 32 | from yelp_kafka.error import PartitionerError 33 | from yelp_kafka.error import PartitionerZookeeperError 34 | from yelp_kafka.error import ProcessMessageError 35 | 36 | 37 | @mock.patch('yelp_kafka.consumer_group.Partitioner', autospec=True) 38 | class TestConsumerGroup(object): 39 | 40 | topic = 'topic1' 41 | 42 | def test__consume(self, mock_partitioner, config): 43 | group = ConsumerGroup(self.topic, config, mock.Mock()) 44 | group.consumer = mock.MagicMock() 45 | group.consumer.__iter__.return_value = [ 46 | mock.sentinel.message1, 47 | mock.sentinel.message2 48 | ] 49 | group.consume(refresh_timeout=1) 50 | assert group.process.call_args_list == [ 51 | mock.call(mock.sentinel.message1), 52 | mock.call(mock.sentinel.message2) 53 | ] 54 | mock_partitioner.return_value.refresh.assert_called_once_with() 55 | 56 | def test__consume_partitioner_errors(self, mock_partitioner, config): 57 | group = ConsumerGroup(self.topic, config, mock.Mock()) 58 | group.consumer = mock.MagicMock() 59 | group.consumer.__iter__.return_value = [ 60 | mock.sentinel.message1, 61 | mock.sentinel.message2 62 | ] 63 | mock_partitioner.return_value.refresh.side_effect = PartitionerError("Boom") 64 | with pytest.raises(PartitionerError): 65 | group.consume(refresh_timeout=1) 66 | mock_partitioner.return_value.refresh.side_effect = PartitionerZookeeperError("Boom") 67 | with pytest.raises(PartitionerZookeeperError): 68 | group.consume(refresh_timeout=1) 69 | 70 | def test__consume_error(self, mock_partitioner, config): 71 | group = ConsumerGroup(self.topic, config, mock.Mock(side_effect=Exception("Boom!"))) 72 | group.consumer = mock.MagicMock() 73 | group.consumer.__iter__.return_value = [ 74 | mock.sentinel.message1, 75 | mock.sentinel.message2 76 | ] 77 | with pytest.raises(ProcessMessageError): 78 | group.consume(refresh_timeout=1) 79 | 80 | @mock.patch('yelp_kafka.consumer_group.KafkaSimpleConsumer', autospec=True) 81 | def test__acquire(self, mock_consumer, _, config): 82 | group = ConsumerGroup(self.topic, config, mock.Mock()) 83 | partitions = {self.topic: [0, 1]} 84 | group._acquire(partitions) 85 | args, _ = mock_consumer.call_args 86 | topic, _, partitions = args 87 | assert topic == self.topic 88 | assert partitions == [0, 1] 89 | mock_consumer.return_value.connect.assert_called_once_with() 90 | 91 | @mock.patch('yelp_kafka.consumer_group.KafkaSimpleConsumer', autospec=True) 92 | def test__acquire_no_partitions_assigned(self, mock_consumer, _, config): 93 | group = ConsumerGroup(self.topic, config, mock.Mock()) 94 | partitions = {} 95 | group._acquire(partitions) 96 | assert not mock_consumer.called 97 | 98 | @mock.patch('yelp_kafka.consumer_group.KafkaSimpleConsumer', autospec=True) 99 | def test__release(self, mock_consumer, _, config): 100 | group = ConsumerGroup(self.topic, config, mock.Mock()) 101 | partitions = {self.topic: [0, 1]} 102 | group._acquire(partitions) 103 | group._release(partitions) 104 | mock_consumer.return_value.close.assert_called_once_with() 105 | 106 | 107 | class TestKafkaConsumerGroup(object): 108 | 109 | @pytest.fixture 110 | def example_partitions(self): 111 | return {'a': 'b'} 112 | 113 | topic = 'topic1' 114 | group = 'my_group' 115 | 116 | def test___init__string_topics(self): 117 | with pytest.raises(AssertionError): 118 | KafkaConsumerGroup(self.topic, None) 119 | 120 | def test__should_keep_trying_no_timeout(self, cluster): 121 | config = KafkaConsumerConfig( 122 | self.group, 123 | cluster, 124 | consumer_timeout_ms=-1 125 | ) 126 | consumer = KafkaConsumerGroup([], config) 127 | 128 | long_time_ago = time.time() - 1000 129 | assert consumer._should_keep_trying(long_time_ago) 130 | 131 | @mock.patch('time.time') 132 | def test__should_keep_trying_not_timed_out(self, mock_time, cluster): 133 | mock_time.return_value = 0 134 | 135 | config = KafkaConsumerConfig( 136 | self.group, 137 | cluster, 138 | consumer_timeout_ms=1000 139 | ) 140 | consumer = KafkaConsumerGroup([], config) 141 | 142 | almost_a_second_ago = time.time() - 0.8 143 | assert consumer._should_keep_trying(almost_a_second_ago) 144 | 145 | @mock.patch('time.time') 146 | def test__should_keep_trying_timed_out(self, mock_time, cluster): 147 | mock_time.return_value = 0 148 | 149 | config = KafkaConsumerConfig( 150 | self.group, 151 | cluster, 152 | consumer_timeout_ms=1000 153 | ) 154 | consumer = KafkaConsumerGroup([], config) 155 | 156 | over_a_second_ago = time.time() - 1.2 157 | assert not consumer._should_keep_trying(over_a_second_ago) 158 | 159 | def test__auto_commit_enabled_is_enabled(self, cluster): 160 | config = KafkaConsumerConfig( 161 | self.group, 162 | cluster, 163 | auto_commit_enable=True 164 | ) 165 | consumer = KafkaConsumerGroup([], config) 166 | assert consumer._auto_commit_enabled() 167 | 168 | def test__auto_commit_enabled_not_enabled(self, cluster): 169 | config = KafkaConsumerConfig( 170 | self.group, 171 | cluster, 172 | auto_commit_enable=False 173 | ) 174 | consumer = KafkaConsumerGroup([], config) 175 | assert not consumer._auto_commit_enabled() 176 | 177 | @mock.patch('yelp_kafka.consumer_group.Partitioner') 178 | @mock.patch('yelp_kafka.consumer_group.KafkaConsumer') 179 | def test_next(self, mock_consumer, mock_partitioner, cluster): 180 | config = KafkaConsumerConfig( 181 | self.group, 182 | cluster, 183 | consumer_timeout_ms=500 184 | ) 185 | consumer = KafkaConsumerGroup([], config) 186 | consumer.partitioner = mock_partitioner() 187 | consumer.consumer = mock_consumer() 188 | 189 | def fake_next(): 190 | time.sleep(1) 191 | raise ConsumerTimeout() 192 | 193 | consumer.consumer.next.side_effect = fake_next 194 | 195 | # The mock KafkaConsumer.next (called fake_next above) takes longer than 196 | # consumer_timeout_ms, so we should get a ConsumerTimeout from 197 | # KafkaConsumerGroup 198 | with pytest.raises(ConsumerTimeout): 199 | consumer.next() 200 | 201 | consumer.consumer.next.assert_called_once_with() 202 | consumer.partitioner.refresh.assert_called_once_with() 203 | 204 | def test__acquire_has_consumer( 205 | self, 206 | cluster, 207 | example_partitions, 208 | mock_post_rebalance_cb 209 | ): 210 | config = KafkaConsumerConfig( 211 | self.group, 212 | cluster, 213 | post_rebalance_callback=mock_post_rebalance_cb 214 | ) 215 | consumer = KafkaConsumerGroup([], config) 216 | 217 | consumer.consumer = mock.Mock() 218 | consumer._acquire(example_partitions) 219 | 220 | consumer.consumer.set_topic_partitions.assert_called_once_with(example_partitions) 221 | mock_post_rebalance_cb.assert_called_once_with(example_partitions) 222 | 223 | @mock.patch('yelp_kafka.consumer_group.KafkaConsumer') 224 | def test__acquire_has_no_consumer(self, mock_consumer, cluster, example_partitions): 225 | config = KafkaConsumerConfig(self.group, cluster) 226 | consumer = KafkaConsumerGroup([], config) 227 | 228 | consumer._acquire(example_partitions) 229 | mock_consumer.assert_called_once_with(example_partitions, **consumer.config) 230 | 231 | def test__release( 232 | self, 233 | cluster, 234 | example_partitions, 235 | mock_pre_rebalance_cb 236 | ): 237 | config = KafkaConsumerConfig( 238 | self.group, 239 | cluster, 240 | auto_commit_enable=True, 241 | pre_rebalance_callback=mock_pre_rebalance_cb 242 | ) 243 | consumer = KafkaConsumerGroup([], config) 244 | 245 | mock_consumer = mock.Mock() 246 | consumer.consumer = mock_consumer 247 | consumer._release(example_partitions) 248 | 249 | mock_consumer.commit.assert_called_once_with() 250 | mock_consumer.set_topic_partitions.assert_called_once_with({}) 251 | mock_pre_rebalance_cb.assert_called_once_with(example_partitions) 252 | 253 | def test__release_retry(self, cluster): 254 | config = KafkaConsumerConfig( 255 | self.group, 256 | cluster, 257 | auto_commit_enable=True 258 | ) 259 | consumer = KafkaConsumerGroup([], config) 260 | 261 | mock_consumer = mock.Mock() 262 | mock_consumer.set_topic_partitions.side_effect = KafkaUnavailableError 263 | consumer.consumer = mock_consumer 264 | 265 | with pytest.raises(KafkaUnavailableError): 266 | consumer._release({}) 267 | assert mock_consumer.set_topic_partitions.call_count == 2 268 | 269 | 270 | class TestMultiprocessingConsumerGroup(object): 271 | 272 | topics = ['topic1', 'topic2'] 273 | 274 | @pytest.fixture 275 | @mock.patch('yelp_kafka.consumer_group.Partitioner', autospec=True) 276 | def group( 277 | self, _, 278 | mock_pre_rebalance_cb, 279 | mock_post_rebalance_cb 280 | ): 281 | config = KafkaConsumerConfig( 282 | cluster={'broker_list': ['test_broker:9292'], 283 | 'zookeeper': 'zookeeper_uri1:2181,zookeeper_uri2:2181'}, 284 | group_id='test_group', 285 | client_id='test_client_id', 286 | max_termination_timeout_secs=0.1, 287 | pre_rebalance_callback=mock_pre_rebalance_cb, 288 | post_rebalance_callback=mock_post_rebalance_cb 289 | ) 290 | return MultiprocessingConsumerGroup( 291 | self.topics, 292 | config, mock.Mock() 293 | ) 294 | 295 | @mock.patch('yelp_kafka.consumer_group.Partitioner', autospec=True) 296 | def test_acquire(self, _, config, mock_post_rebalance_cb): 297 | consumer_factory = mock.Mock() 298 | mock_consumer = mock.Mock() 299 | consumer_factory.return_value = mock_consumer 300 | group = MultiprocessingConsumerGroup( 301 | self.topics, 302 | config, consumer_factory 303 | ) 304 | partitions = { 305 | 'topic1': [0, 1, 2], 306 | 'topic2': [3] 307 | } 308 | with mock.patch( 309 | 'yelp_kafka.consumer_group.Process', 310 | autospec=True 311 | ) as mock_process: 312 | group.acquire(partitions) 313 | assert all(consumer is mock_consumer 314 | for consumer in group.get_consumers()) 315 | assert consumer_factory.call_count == 4 316 | assert mock_process.call_count == 4 317 | assert mock_process.return_value.start.call_count == 4 318 | mock_post_rebalance_cb.assert_called_once_with(partitions) 319 | 320 | def test_start_consumer_fail(self, group): 321 | consumer = mock.Mock(topic='Test', partitions=[1, 2, 3]) 322 | with mock.patch( 323 | 'yelp_kafka.consumer_group.Process', 324 | autospec=True, 325 | ) as mock_process: 326 | mock_process.return_value.start.side_effect = Exception("Boom!") 327 | with pytest.raises(ConsumerGroupError): 328 | group.start_consumer(consumer) 329 | 330 | def test_release(self, group, mock_pre_rebalance_cb): 331 | consumer = mock.Mock() 332 | args = {'is_alive.return_value': False} 333 | group.consumers = [consumer, consumer] 334 | group.consumer_procs = { 335 | mock.Mock(spec=Process, **args): consumer, 336 | mock.Mock(spec=Process, **args): consumer 337 | } 338 | with mock.patch.object(os, 'kill', autospec=True) as mock_kill: 339 | # Release takes acquired_partitions but in this case it is not used 340 | # so we pass None 341 | group.release(None) 342 | assert not mock_kill.called 343 | assert consumer.terminate.call_count == 2 344 | assert not group.get_consumers() 345 | mock_pre_rebalance_cb.assert_called_once_with(None) 346 | 347 | def test_release_and_kill_unresponsive_consumer(self, group): 348 | consumer = mock.Mock() 349 | args = {'is_alive.return_value': True} 350 | group.consumer_procs = { 351 | mock.Mock(spec=Process, **args): consumer, 352 | mock.Mock(spec=Process, **args): consumer 353 | } 354 | with mock.patch.object(os, 'kill', autospec=True) as mock_kill: 355 | # Release takes acquired_partitions but in this case it is not used 356 | # so we pass None 357 | group.release(None) 358 | assert mock_kill.call_count == 2 359 | assert consumer.terminate.call_count == 2 360 | 361 | def test_monitor(self, group): 362 | consumer1 = mock.Mock() 363 | consumer2 = mock.Mock() 364 | args1 = {'is_alive.return_value': False} 365 | args2 = {'is_alive.return_value': True} 366 | group.consumer_procs = { 367 | mock.Mock(spec=Process, **args1): consumer1, 368 | mock.Mock(spec=Process, **args2): consumer2, 369 | } 370 | mock_new_proc = mock.Mock() 371 | mock_new_proc.is_alive.return_value = True 372 | with mock.patch.object( 373 | MultiprocessingConsumerGroup, 'start_consumer', autospec=True 374 | ) as mock_start: 375 | mock_start.return_value = mock_new_proc 376 | group.monitor() 377 | assert mock_new_proc in group.consumer_procs 378 | mock_start.assert_called_once_with(group, consumer1) 379 | 380 | def test_get_consumers(self, group): 381 | group.consumers = [mock.Mock(), mock.Mock] 382 | actual = group.get_consumers() 383 | # Test that get_consumers actually returns a copy 384 | assert actual is not group.consumers 385 | assert actual == group.consumers 386 | -------------------------------------------------------------------------------- /tests/test_mocking.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | from __future__ import absolute_import 16 | from __future__ import unicode_literals 17 | 18 | import mock 19 | import pytest 20 | from kafka.common import OffsetAndMessage 21 | 22 | from yelp_kafka.testing.kafka_mock import mock_kafka_python 23 | 24 | 25 | class TestSmoke(object): 26 | 27 | def test_send_then_receive(self): 28 | with mock_kafka_python() as kafka_mocks: 29 | client = kafka_mocks.KafkaClient( 30 | mock.ANY, 31 | ) 32 | 33 | producer = kafka_mocks.SimpleProducer( 34 | client, 35 | ) 36 | 37 | producer.send_messages( 38 | 'test_topic', 39 | 'some message 5', 40 | 'some message 6', 41 | ) 42 | 43 | consumer = kafka_mocks.SimpleConsumer( 44 | client, 45 | group='test_group_name', 46 | topic='test_topic', 47 | ) 48 | 49 | messages = consumer.get_messages(count=2) 50 | assert len(messages) == 2 51 | assert [msg.offset for msg in messages] == [0, 1] 52 | assert [msg.message.value for msg in messages] == ['some message 5', 'some message 6'] 53 | 54 | def test_send_then_receive_with_keys(self): 55 | with mock_kafka_python() as kafka_mocks: 56 | client = kafka_mocks.KafkaClient( 57 | mock.ANY, 58 | ) 59 | 60 | producer = kafka_mocks.KeyedProducer( 61 | client, 62 | ) 63 | 64 | producer.send_messages( 65 | 'test_topic', 66 | 0, 67 | 'some message 5', 68 | 'some message 6', 69 | ) 70 | 71 | consumer = kafka_mocks.SimpleConsumer( 72 | client, 73 | group='test_group_name', 74 | topic='test_topic', 75 | ) 76 | 77 | messages = consumer.get_messages(count=2) 78 | assert len(messages) == 2 79 | assert [msg.offset for msg in messages] == [0, 1] 80 | assert [msg.message.value for msg in messages] == ['some message 5', 'some message 6'] 81 | assert [msg.message.key for msg in messages] == [0, 0] 82 | 83 | 84 | @pytest.yield_fixture 85 | def kafka_mocks_with_messages(): 86 | with mock_kafka_python() as kafka_mocks: 87 | client = kafka_mocks.KafkaClient( 88 | mock.ANY, 89 | ) 90 | 91 | producer = kafka_mocks.KeyedProducer( 92 | client, 93 | ) 94 | 95 | producer.send_messages( 96 | 'test_topic', 97 | 0, 98 | 'some message 5', 99 | 'some message 6', 100 | ) 101 | yield kafka_mocks 102 | 103 | 104 | def assert_is_offset_and_message(kafka_message): 105 | assert isinstance(kafka_message, OffsetAndMessage) 106 | 107 | 108 | def assert_is_partition_message(kafka_message): 109 | message_partition, offset_and_message = kafka_message 110 | assert isinstance(message_partition, int) 111 | assert isinstance(offset_and_message, OffsetAndMessage) 112 | 113 | 114 | @pytest.mark.usefixtures('kafka_mocks_with_messages') 115 | class TestConsumers(object): 116 | 117 | def test_simple_consumer(self, kafka_mocks_with_messages): 118 | consumer = kafka_mocks_with_messages.SimpleConsumer( 119 | client=mock.ANY, 120 | group='test_group_name', 121 | topic='test_topic', 122 | ) 123 | messages = consumer.get_messages(count=2) 124 | assert len(messages) == 2 125 | assert [msg.offset for msg in messages] == [0, 1] 126 | assert [msg.message.value for msg in messages] == ['some message 5', 'some message 6'] 127 | assert [msg.message.key for msg in messages] == [0, 0] 128 | 129 | consumer = kafka_mocks_with_messages.SimpleConsumer( 130 | client=mock.ANY, 131 | group='test_group_name2', 132 | topic='test_topic', 133 | ) 134 | msg1 = consumer.get_message() 135 | assert msg1.offset == 0 136 | assert msg1.message.key == 0 137 | assert msg1.message.value == 'some message 5' 138 | msg2 = consumer.get_message() 139 | assert msg2.offset == 1 140 | assert msg2.message.key == 0 141 | assert msg2.message.value == 'some message 6' 142 | 143 | consumer = kafka_mocks_with_messages.SimpleConsumer( 144 | client=mock.ANY, 145 | group='test_group_name3', 146 | topic='test_topic', 147 | ) 148 | messages = list(consumer) 149 | assert len(messages) == 2 150 | assert [msg.offset for msg in messages] == [0, 1] 151 | assert [msg.message.value for msg in messages] == ['some message 5', 'some message 6'] 152 | assert [msg.message.key for msg in messages] == [0, 0] 153 | 154 | def test_simple_consumer_get_partition_info(self): 155 | topic = 'random_topic_name' 156 | with mock_kafka_python() as kmocks: 157 | client = kmocks.KafkaClient(mock.ANY) 158 | producer = kmocks.SimpleProducer(client) 159 | producer.send_messages(topic, *range(7)) 160 | consumer = kmocks.SimpleConsumer( 161 | client=mock.ANY, 162 | group='test_group_name', 163 | topic=topic, 164 | ) 165 | 166 | assert_is_offset_and_message(consumer.get_message()) 167 | assert_is_partition_message(consumer.get_message(get_partition_info=True)) 168 | assert_is_offset_and_message(consumer.get_message(get_partition_info=False)) 169 | assert_is_offset_and_message(consumer.get_message()) 170 | consumer.provide_partition_info() 171 | assert_is_partition_message(consumer.get_message()) 172 | assert_is_offset_and_message(consumer.get_message(get_partition_info=False)) 173 | assert_is_partition_message(consumer.get_message(get_partition_info=None)) 174 | assert consumer.get_message(get_partition_info=True) is None 175 | assert consumer.get_message(get_partition_info=False) is None 176 | 177 | def test_simple_consumer_auto_commit(self): 178 | topic = 'random_topic_name' 179 | with mock_kafka_python() as kmocks: 180 | client = kmocks.KafkaClient(mock.ANY) 181 | producer = kmocks.SimpleProducer(client) 182 | producer.send_messages(topic, *range(1, 5)) 183 | consumer = kmocks.SimpleConsumer( 184 | client=mock.ANY, 185 | group='test_group_name', 186 | topic=topic, 187 | ) 188 | 189 | consumer.get_messages(count=2) 190 | assert consumer.get_message().message.value == 3 191 | assert consumer.get_message().message.value == 4 192 | assert consumer._offset == 4 193 | 194 | def test_simple_consumer_non_auto_commit(self): 195 | topic = 'random_topic_name' 196 | with mock_kafka_python() as kmocks: 197 | client = kmocks.KafkaClient(mock.ANY) 198 | producer = kmocks.SimpleProducer(client) 199 | producer.send_messages(topic, *range(1, 5)) 200 | consumer = kmocks.SimpleConsumer( 201 | client=mock.ANY, 202 | group='test_group_name', 203 | topic=topic, 204 | auto_commit=False 205 | ) 206 | 207 | consumer.get_messages(count=2) 208 | assert consumer.get_message().message.value == 3 209 | assert consumer.get_message().message.value == 4 210 | assert consumer._offset == 0 211 | consumer.commit() 212 | assert consumer._offset == 4 213 | 214 | def test_yelp_consumer(self, kafka_mocks_with_messages): 215 | consumer = kafka_mocks_with_messages.KafkaSimpleConsumer( 216 | 'test_topic', 217 | config=mock.ANY, 218 | ) 219 | messages = consumer.get_messages(count=2) 220 | assert len(messages) == 2 221 | assert [msg.offset for msg in messages] == [0, 1] 222 | assert [msg.value for msg in messages] == ['some message 5', 'some message 6'] 223 | assert [msg.key for msg in messages] == [0, 0] 224 | 225 | consumer = kafka_mocks_with_messages.KafkaSimpleConsumer( 226 | 'test_topic', 227 | config=mock.ANY, 228 | ) 229 | msg1 = consumer.get_message() 230 | assert msg1.offset == 0 231 | assert msg1.key == 0 232 | assert msg1.value == 'some message 5' 233 | msg2 = consumer.get_message() 234 | assert msg2.offset == 1 235 | assert msg2.key == 0 236 | assert msg2.value == 'some message 6' 237 | 238 | consumer = kafka_mocks_with_messages.KafkaSimpleConsumer( 239 | 'test_topic', 240 | config=mock.ANY, 241 | ) 242 | messages = list(consumer) 243 | assert len(messages) == 2 244 | assert [msg.offset for msg in messages] == [0, 1] 245 | assert [msg.value for msg in messages] == ['some message 5', 'some message 6'] 246 | assert [msg.key for msg in messages] == [0, 0] 247 | -------------------------------------------------------------------------------- /tests/test_monitoring.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # -*- coding: utf-8 -*- 16 | from __future__ import absolute_import 17 | from __future__ import unicode_literals 18 | 19 | import mock 20 | import pytest 21 | from kafka.common import KafkaUnavailableError 22 | from kafka.common import OffsetFetchResponse 23 | 24 | from tests.test_offsets import MyKafkaClient 25 | from tests.test_offsets import TestOffsetsBase 26 | from yelp_kafka.error import UnknownPartitions 27 | from yelp_kafka.error import UnknownTopic 28 | from yelp_kafka.monitoring import ConsumerPartitionOffsets 29 | from yelp_kafka.monitoring import get_consumer_offsets_metadata 30 | from yelp_kafka.monitoring import offset_distance 31 | from yelp_kafka.monitoring import topics_offset_distance 32 | 33 | 34 | class TestMonitoring(TestOffsetsBase): 35 | 36 | def test_offset_metadata_invalid_arguments(self, kafka_client_mock): 37 | with pytest.raises(TypeError): 38 | get_consumer_offsets_metadata( 39 | kafka_client_mock, 40 | "this won't even be consulted", 41 | "this should be a list or dict", 42 | ) 43 | 44 | def test_offset_metadata_unknown_topic(self, kafka_client_mock): 45 | with pytest.raises(UnknownTopic): 46 | get_consumer_offsets_metadata( 47 | kafka_client_mock, 48 | "this won't even be consulted", 49 | ["something that doesn't exist"], 50 | ) 51 | 52 | def test_offset_metadata_unknown_topic_no_fail(self, kafka_client_mock): 53 | actual = get_consumer_offsets_metadata( 54 | kafka_client_mock, 55 | "this won't even be consulted", 56 | ["something that doesn't exist"], 57 | raise_on_error=False 58 | ) 59 | assert not actual 60 | 61 | def test_offset_metadata_unknown_partitions(self, kafka_client_mock): 62 | with pytest.raises(UnknownPartitions): 63 | get_consumer_offsets_metadata( 64 | kafka_client_mock, 65 | self.group, 66 | {'topic1': [99]}, 67 | ) 68 | 69 | def test_offset_metadata_unknown_partitions_no_fail(self, kafka_client_mock): 70 | actual = get_consumer_offsets_metadata( 71 | kafka_client_mock, 72 | self.group, 73 | {'topic1': [99]}, 74 | raise_on_error=False 75 | ) 76 | assert not actual 77 | 78 | def test_offset_metadata_invalid_partition_subset(self, kafka_client_mock): 79 | with pytest.raises(UnknownPartitions): 80 | get_consumer_offsets_metadata( 81 | kafka_client_mock, 82 | self.group, 83 | {'topic1': [1, 99]}, 84 | ) 85 | 86 | def test_offset_metadata_invalid_partition_subset_no_fail( 87 | self, 88 | kafka_client_mock 89 | ): 90 | # Partition 99 does not exist, so we expect to have 91 | # offset metadata ONLY for partition 1. 92 | expected = [ 93 | ConsumerPartitionOffsets('topic1', 1, 20, 30, 5) 94 | ] 95 | 96 | actual = get_consumer_offsets_metadata( 97 | kafka_client_mock, 98 | self.group, 99 | {'topic1': [1, 99]}, 100 | raise_on_error=False 101 | ) 102 | assert 'topic1' in actual 103 | assert actual['topic1'] == expected 104 | 105 | def test_get_metadata_kafka_error(self, kafka_client_mock): 106 | with mock.patch.object( 107 | MyKafkaClient, 108 | 'load_metadata_for_topics', 109 | side_effect=KafkaUnavailableError("Boom!"), 110 | autospec=True 111 | ) as mock_func: 112 | with pytest.raises(KafkaUnavailableError): 113 | get_consumer_offsets_metadata( 114 | kafka_client_mock, 115 | self.group, 116 | {'topic1': [99]}, 117 | ) 118 | assert mock_func.call_count == 2 119 | 120 | def test_offset_distance_ok(self, kafka_client_mock): 121 | assert {0: 0, 1: 10, 2: 20} == offset_distance( 122 | kafka_client_mock, 123 | self.group, 124 | 'topic1', 125 | ) 126 | 127 | def test_offset_distance_partition_subset(self, kafka_client_mock): 128 | assert {1: 10, 2: 20} == offset_distance( 129 | kafka_client_mock, 130 | self.group, 131 | 'topic1', 132 | partitions=[1, 2], 133 | ) 134 | 135 | def test_offset_distance_all_partitions(self, kafka_client_mock): 136 | kafka_client = kafka_client_mock 137 | 138 | implicit = offset_distance( 139 | kafka_client, 140 | self.group, 141 | 'topic1', 142 | ) 143 | 144 | explicit = offset_distance( 145 | kafka_client, 146 | self.group, 147 | 'topic1', 148 | partitions=self.high_offsets['topic1'].keys(), 149 | ) 150 | 151 | assert implicit == explicit 152 | 153 | def test_offset_distance_unknown_group(self, kafka_client_mock): 154 | with mock.patch.object( 155 | MyKafkaClient, 156 | 'send_offset_fetch_request', 157 | side_effect=lambda group, payloads, fail_on_error, callback: [ 158 | callback( 159 | OffsetFetchResponse(req.topic.decode(), req.partition, -1, None, 3) 160 | ) 161 | for req in payloads 162 | ] 163 | ): 164 | assert self.high_offsets['topic1'] == offset_distance( 165 | kafka_client_mock, 166 | 'derp', 167 | 'topic1', 168 | ) 169 | 170 | def test_topics_offset_distance(self, kafka_client_mock): 171 | expected = { 172 | 'topic1': {0: 0, 1: 10, 2: 20}, 173 | 'topic2': {0: 35, 1: 50} 174 | } 175 | assert expected == topics_offset_distance( 176 | kafka_client_mock, 177 | self.group, 178 | ['topic1', 'topic2'], 179 | ) 180 | 181 | def test_topics_offset_distance_partition_subset(self, kafka_client_mock): 182 | expected = { 183 | 'topic1': {0: 0, 1: 10}, 184 | 'topic2': {1: 50} 185 | } 186 | assert expected == topics_offset_distance( 187 | kafka_client_mock, 188 | self.group, 189 | {'topic1': [0, 1], 'topic2': [1]}, 190 | ) 191 | 192 | def test_topics_offset_distance_topic_subset(self, kafka_client_mock): 193 | expected = { 194 | 'topic1': {0: 0, 1: 10}, 195 | } 196 | assert expected == topics_offset_distance( 197 | kafka_client_mock, 198 | self.group, 199 | {'topic1': [0, 1]}, 200 | ) 201 | -------------------------------------------------------------------------------- /tests/test_partitioner.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | from __future__ import absolute_import 16 | from __future__ import unicode_literals 17 | 18 | import hashlib 19 | 20 | import mock 21 | import pytest 22 | from kafka.util import kafka_bytestring 23 | from kazoo.protocol.states import KazooState 24 | from kazoo.recipe.partitioner import PartitionState 25 | from kazoo.recipe.partitioner import SetPartitioner 26 | 27 | from yelp_kafka.config import KafkaConsumerConfig 28 | from yelp_kafka.error import PartitionerError 29 | from yelp_kafka.error import PartitionerZookeeperError 30 | from yelp_kafka.partitioner import Partitioner 31 | 32 | 33 | def get_partitioner_state(status): 34 | return {'state': status} 35 | 36 | 37 | class TestPartitioner(object): 38 | 39 | topics = ['topic1', 'topic2'] 40 | 41 | sha = hashlib.sha1(repr(sorted(topics)).encode()).hexdigest() 42 | 43 | @pytest.fixture 44 | @mock.patch('yelp_kafka.partitioner.KazooClient', autospec=True) 45 | @mock.patch('yelp_kafka.partitioner.KafkaClient', autospec=True) 46 | def partitioner(self, kazoo, kafka, config): 47 | return Partitioner(config, self.topics, mock.Mock(), mock.Mock()) 48 | 49 | def test_partitioner_use_sha(self, cluster): 50 | config = KafkaConsumerConfig( 51 | cluster=cluster, 52 | group_id='test_group', 53 | client_id='test_client_id', 54 | partitioner_cooldown=0.5, 55 | use_group_sha=True, 56 | pre_rebalance_callback=mock.Mock(), 57 | post_rebalance_callback=mock.Mock(), 58 | ) 59 | p = Partitioner(config, self.topics, mock.Mock(), mock.Mock()) 60 | 61 | assert p.zk_group_path == '/yelp-kafka/test_group/{sha}'.format(sha=self.sha) 62 | 63 | def test_partitioner_use_sha_false(self, cluster): 64 | config = KafkaConsumerConfig( 65 | cluster=cluster, 66 | group_id='test_group', 67 | client_id='test_client_id', 68 | partitioner_cooldown=0.5, 69 | use_group_sha=False, 70 | pre_rebalance_callback=mock.Mock(), 71 | post_rebalance_callback=mock.Mock(), 72 | ) 73 | p = Partitioner(config, self.topics, mock.Mock(), mock.Mock()) 74 | 75 | assert p.zk_group_path == '/yelp-kafka/test_group' 76 | 77 | def test_get_partitions_set(self, partitioner): 78 | with mock.patch( 79 | 'yelp_kafka.partitioner.get_kafka_topics', 80 | autospec=True 81 | ) as mock_topics: 82 | mock_topics.return_value = { 83 | kafka_bytestring('topic1'): [0, 1, 2, 3], 84 | kafka_bytestring('topic2'): [0, 1, 2], 85 | kafka_bytestring('topic3'): [0, 1, 2, 3], 86 | } 87 | actual = partitioner.get_partitions_set() 88 | assert actual == set([ 89 | 'topic1-0', 'topic1-1', 'topic1-2', 'topic1-3', 90 | 'topic2-0', 'topic2-1', 'topic2-2' 91 | ]) 92 | 93 | def test_handle_release(self, partitioner): 94 | mock_kpartitioner = mock.MagicMock( 95 | spec=SetPartitioner, **get_partitioner_state(PartitionState.RELEASE) 96 | ) 97 | expected_partitions = {'topic1': [0, 1, 3]} 98 | # Enable release 99 | partitioner.acquired_partitions = expected_partitions 100 | partitioner.released_flag = False 101 | 102 | partitioner._handle_group(mock_kpartitioner) 103 | 104 | mock_kpartitioner.release_set.assert_called_once_with() 105 | partitioner.release.assert_called_once_with(expected_partitions) 106 | 107 | def test_handle_release_twice(self, partitioner): 108 | mock_kpartitioner = mock.MagicMock( 109 | spec=SetPartitioner, **get_partitioner_state(PartitionState.RELEASE) 110 | ) 111 | expected_partitions = {'topic1': [0, 1, 3]} 112 | # Enable release 113 | partitioner.acquired_partitions = expected_partitions 114 | partitioner.released_flag = False 115 | 116 | partitioner._handle_group(mock_kpartitioner) 117 | partitioner._handle_group(mock_kpartitioner) 118 | 119 | mock_kpartitioner.release_set.call_count == 2 120 | # User release function should be called only once 121 | partitioner.release.assert_called_once_with(expected_partitions) 122 | 123 | def test_handle_release_failure(self, partitioner): 124 | mock_kpartitioner = mock.MagicMock( 125 | spec=SetPartitioner, **get_partitioner_state(PartitionState.RELEASE) 126 | ) 127 | expected_partitions = {'topic1': [0, 1, 3]} 128 | # Enable release 129 | partitioner.acquired_partitions = expected_partitions 130 | partitioner.released_flag = False 131 | partitioner.release.side_effect = Exception("Boom!") 132 | 133 | with pytest.raises(PartitionerError): 134 | partitioner._handle_group(mock_kpartitioner) 135 | 136 | def test_handle_failed_and_release(self, partitioner): 137 | mock_kpartitioner = mock.MagicMock( 138 | spec=SetPartitioner, 139 | **get_partitioner_state(PartitionState.FAILURE) 140 | ) 141 | expected_partitions = {'topic1': [0, 1, 3]} 142 | partitioner.acquired_partitions = expected_partitions 143 | with mock.patch.object(Partitioner, 'release_and_finish') as mock_destroy: 144 | with pytest.raises(PartitionerZookeeperError): 145 | partitioner._handle_group(mock_kpartitioner) 146 | assert mock_destroy.call_count == 1 147 | 148 | def test_handle_failed_and_release_no_acquired_partitions(self, partitioner): 149 | mock_kpartitioner = mock.MagicMock( 150 | spec=SetPartitioner, 151 | **get_partitioner_state(PartitionState.FAILURE) 152 | ) 153 | with mock.patch.object(Partitioner, 'release_and_finish') as mock_destroy: 154 | with pytest.raises(PartitionerZookeeperError): 155 | partitioner._handle_group(mock_kpartitioner) 156 | assert mock_destroy.call_count == 1 157 | 158 | def test_handle_acquired(self, partitioner): 159 | mock_kpartitioner = mock.MagicMock( 160 | spec=SetPartitioner, **get_partitioner_state(PartitionState.ACQUIRED) 161 | ) 162 | mock_kpartitioner.__iter__.return_value = ['topic1-0', 'topic1-2', 'topic-2-1'] 163 | expected_partitions = {'topic1': [0, 2], 'topic-2': [1]} 164 | 165 | partitioner._handle_group(mock_kpartitioner) 166 | 167 | assert partitioner.acquired_partitions == expected_partitions 168 | assert partitioner.released_flag is False 169 | partitioner.acquire.assert_called_once_with(expected_partitions) 170 | 171 | def test_handle_acquire_failure(self, partitioner): 172 | mock_kpartitioner = mock.MagicMock( 173 | spec=SetPartitioner, **get_partitioner_state(PartitionState.ACQUIRED) 174 | ) 175 | mock_kpartitioner.__iter__.return_value = ['topic1-0', 'topic1-2', 'topic-2-1'] 176 | partitioner.acquire.side_effect = Exception("Boom!") 177 | 178 | with pytest.raises(PartitionerError): 179 | partitioner._handle_group(mock_kpartitioner) 180 | 181 | def test_handle_allocating(self, partitioner): 182 | mock_kpartitioner = mock.MagicMock( 183 | spec=SetPartitioner, **get_partitioner_state(PartitionState.ALLOCATING) 184 | ) 185 | partitioner._handle_group(mock_kpartitioner) 186 | mock_kpartitioner.wait_for_acquire.assert_called_once_with() 187 | 188 | def test__get_partitioner_no_partitions_change(self, partitioner): 189 | expected_partitions = set(['top-1', 'top1-2']) 190 | with mock.patch.object( 191 | Partitioner, 192 | '_create_partitioner', 193 | side_effect=[mock.sentinel.partitioner1, mock.sentinel.partitioner2] 194 | ) as mock_create: 195 | with mock.patch.object(Partitioner, 'get_partitions_set') as mock_partitions: 196 | mock_partitions.return_value = expected_partitions 197 | actual = partitioner._get_partitioner() 198 | 199 | assert actual == mock.sentinel.partitioner1 200 | assert partitioner.partitions_set == expected_partitions 201 | assert not partitioner.need_partitions_refresh() 202 | 203 | # Call the partitioner again with the same partitions set and be sure 204 | # it does not create a new one 205 | partitioner.force_partitions_refresh = True 206 | 207 | actual = partitioner._get_partitioner() 208 | 209 | assert partitioner.partitions_set is expected_partitions 210 | assert actual == mock.sentinel.partitioner1 211 | assert mock_create.call_count == 1 212 | assert not partitioner.need_partitions_refresh() 213 | 214 | def test__get_partitioner_partitions_change(self, partitioner): 215 | # We create a new partitioner, then we change the partitions 216 | # and we expect the partitioner to be destroyed. 217 | expected_partitions = set(['top-1', 'top1-2']) 218 | 219 | create_side_effect = [mock.sentinel.partitioner1, mock.sentinel.partitioner2] 220 | with mock.patch.object(Partitioner, '_create_partitioner', side_effect=create_side_effect) as mock_create: 221 | with mock.patch.object(Partitioner, 'release_and_finish') as mock_destroy: 222 | with mock.patch.object(Partitioner, 'get_partitions_set') as mock_partitions: 223 | mock_partitions.return_value = expected_partitions 224 | # force partitions refresh is True when the partitioner starts 225 | assert partitioner.need_partitions_refresh() 226 | actual = partitioner._get_partitioner() 227 | assert actual == mock.sentinel.partitioner1 228 | assert partitioner.partitions_set == expected_partitions 229 | assert not partitioner.need_partitions_refresh() 230 | 231 | # Change the partitions and test the partitioner gets destroyed for 232 | # rebalancing 233 | partitioner.force_partitions_refresh = True 234 | new_expected_partitions = set(['top-1', 'top1-2', 'top1-3']) 235 | mock_partitions.return_value = new_expected_partitions 236 | actual = partitioner._get_partitioner() 237 | assert partitioner.partitions_set is new_expected_partitions 238 | assert mock_destroy.called 239 | assert actual == mock.sentinel.partitioner2 240 | assert mock_create.call_count == 2 241 | assert not partitioner.need_partitions_refresh() 242 | 243 | @mock.patch('yelp_kafka.partitioner.KafkaClient') 244 | @mock.patch('yelp_kafka.partitioner.KazooClient') 245 | def test__close_connections(self, mock_kazoo, mock_kafka, config): 246 | partitioner = Partitioner(config, self.topics, mock.Mock(), mock.Mock()) 247 | with mock.patch.object( 248 | Partitioner, '_refresh' 249 | ) as mock_refresh: 250 | # start the partitioner and verify that we refresh the partition set 251 | partitioner.start() 252 | mock_refresh.assert_called_once_with() 253 | # destroy the partitioner and ensure we cleanup all open handles. 254 | partitioner._close_connections() 255 | # did we close all open connections with kafka and zk? 256 | mock_kazoo.return_value.stop.assert_called_once_with() 257 | mock_kazoo.return_value.close.assert_called_once_with() 258 | mock_kafka.return_value.close.assert_called_once_with() 259 | assert partitioner.partitions_set == set() 260 | assert partitioner._partitioner is None 261 | assert partitioner.last_partitions_refresh == 0 262 | 263 | @mock.patch('yelp_kafka.partitioner.KafkaClient', autospec=True) 264 | @mock.patch('yelp_kafka.partitioner.KazooClient') 265 | def test__create_partitioner_with_kazoo_connection( 266 | self, 267 | mock_kazoo, 268 | _, 269 | config, 270 | ): 271 | # Mock a successful connection to zookeeper 272 | mock_kpartitioner = mock.MagicMock(spec=SetPartitioner) 273 | mock_kazoo.return_value.SetPartitioner.return_value = mock_kpartitioner 274 | mock_kazoo.return_value.state = KazooState.CONNECTED 275 | partitioner = Partitioner( 276 | config, 277 | self.topics, 278 | mock.Mock(), 279 | mock.Mock(), 280 | ) 281 | # Verify that we distribute the partitions 282 | # when we start the partitioner 283 | with mock.patch.object(Partitioner, '_refresh') as mock_refresh: 284 | partitioner.start() 285 | mock_refresh.assert_called_once_with() 286 | expected_partitions = set(['topic1-1', 'topic1-2']) 287 | assert mock_kpartitioner == partitioner._create_partitioner( 288 | expected_partitions 289 | ) 290 | mock_kazoo.return_value.SetPartitioner.assert_called_once_with( 291 | path='/yelp-kafka/test_group/{sha}'.format(sha=self.sha), 292 | set=expected_partitions, 293 | time_boundary=0.5 294 | ) 295 | assert not mock_kazoo.return_value.start.called 296 | 297 | @mock.patch('yelp_kafka.partitioner.KafkaClient', autospec=True) 298 | @mock.patch('yelp_kafka.partitioner.KazooClient') 299 | def test__create_partitioner_no_kazoo_connection( 300 | self, 301 | mock_kazoo, 302 | _, 303 | config, 304 | ): 305 | # Mock a failed connection to Zookeeper 306 | mock_kpartitioner = mock.MagicMock(spec=SetPartitioner) 307 | mock_kazoo.return_value.SetPartitioner.return_value = mock_kpartitioner 308 | mock_kazoo.return_value.state = KazooState.LOST 309 | partitioner = Partitioner( 310 | config, 311 | self.topics, 312 | mock.Mock(), 313 | mock.Mock(), 314 | ) 315 | # Verify that we attempt to re-establish the connection with Zookeeper 316 | # and distribute the partitions. 317 | with mock.patch.object(Partitioner, '_refresh') as mock_refresh: 318 | partitioner.start() 319 | mock_refresh.assert_called_once_with() 320 | expected_partitions = set(['topic1-1', 'topic1-2']) 321 | assert mock_kpartitioner == partitioner._create_partitioner( 322 | expected_partitions 323 | ) 324 | mock_kazoo.return_value.SetPartitioner.assert_called_once_with( 325 | path='/yelp-kafka/test_group/{sha}'.format(sha=self.sha), 326 | set=expected_partitions, 327 | time_boundary=0.5 328 | ) 329 | assert mock_kazoo.return_value.start.call_count == 1 330 | 331 | def test_get_partitions_kafka_unavailable(self, partitioner): 332 | expected_partitions = set(['fake-topic']) 333 | with mock.patch.object(Partitioner, '_create_partitioner') as mock_create: 334 | with mock.patch.object(Partitioner, 'get_partitions_set') as mock_partitions: 335 | mock_create.return_value = mock.sentinel.partitioner 336 | mock_partitions.return_value = expected_partitions 337 | # Initialize partitioner 338 | actual = partitioner._get_partitioner() 339 | 340 | assert actual == mock.sentinel.partitioner 341 | assert mock_create.call_count == 1 342 | 343 | with mock.patch.object(Partitioner, 'get_partitions_set', side_effect=Exception("Boom!")) as mock_partitions: 344 | with mock.patch.object(Partitioner, 'release_and_finish') as mock_destroy: 345 | # Force partition refresh 346 | partitioner.force_partitions_refresh = True 347 | 348 | with pytest.raises(PartitionerError): 349 | partitioner._get_partitioner() 350 | 351 | assert mock_destroy.called 352 | 353 | def test_release_and_finish(self, partitioner): 354 | with mock.patch.object( 355 | Partitioner, 356 | '_release', 357 | ) as mock_release: 358 | # Attach a mocked partitioner and kafka client 359 | mock_kpartitioner = mock.MagicMock(spec=SetPartitioner) 360 | partitioner._partitioner = mock_kpartitioner 361 | 362 | partitioner.release_and_finish() 363 | 364 | mock_kpartitioner.finish.assert_called_once_with() 365 | assert partitioner._partitioner is None 366 | mock_release.assert_called_once_with(mock_kpartitioner) 367 | -------------------------------------------------------------------------------- /tests/test_producer.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """ 16 | Tests for `yelp_kafka.producer` module. 17 | """ 18 | from __future__ import absolute_import 19 | from __future__ import unicode_literals 20 | 21 | import mock 22 | import pytest 23 | from kafka import SimpleProducer 24 | 25 | from yelp_kafka import metrics 26 | from yelp_kafka.config import ClusterConfig 27 | from yelp_kafka.error import YelpKafkaError 28 | from yelp_kafka.producer import YelpKafkaProducerMetrics 29 | from yelp_kafka.producer import YelpKafkaSimpleProducer 30 | 31 | 32 | @pytest.yield_fixture 33 | def mock_metrics_responder(): 34 | def generate_mock(*args, **kwargs): 35 | return mock.MagicMock() 36 | 37 | with mock.patch.dict('sys.modules', {'yelp_meteorite': mock.MagicMock()}, autospec=True): 38 | with mock.patch('yelp_kafka.yelp_metrics_responder.MeteoriteMetricsResponder', autospec=True) as mock_meteorite: 39 | # Different mock for each timer creation 40 | mock_meteorite.get_timer_emitter.side_effect = generate_mock 41 | yield mock_meteorite 42 | 43 | 44 | @pytest.yield_fixture 45 | def mock_kafka_send_messages(): 46 | with mock.patch( 47 | 'kafka.SimpleProducer.send_messages', 48 | spec=SimpleProducer.send_messages, 49 | ) as mock_send_messages: 50 | yield mock_send_messages 51 | 52 | 53 | @pytest.fixture 54 | def mock_kafka_client(): 55 | return mock.Mock(client_id='test_id') 56 | 57 | 58 | @pytest.fixture() 59 | def mock_cluster_config(): 60 | return mock.Mock(type='test_cluster_type', name='mock_cluster', spec=ClusterConfig) 61 | 62 | 63 | @pytest.fixture 64 | def mock_kafka_producer( 65 | mock_kafka_client, 66 | mock_metrics_responder, 67 | mock_kafka_send_messages, 68 | mock_cluster_config, 69 | ): 70 | return YelpKafkaSimpleProducer( 71 | client=mock_kafka_client, 72 | cluster_config=mock_cluster_config, 73 | metrics_responder=mock_metrics_responder 74 | ) 75 | 76 | 77 | @pytest.fixture 78 | def mock_producer_metrics( 79 | mock_kafka_client, 80 | mock_metrics_responder, 81 | mock_cluster_config, 82 | ): 83 | return YelpKafkaProducerMetrics( 84 | client=mock_kafka_client, 85 | cluster_config=mock_cluster_config, 86 | metrics_responder=mock_metrics_responder 87 | ) 88 | 89 | 90 | def test_setup_metrics( 91 | mock_kafka_client, 92 | mock_metrics_responder, 93 | mock_cluster_config, 94 | ): 95 | # setup metrics called at init 96 | YelpKafkaProducerMetrics( 97 | client=mock_kafka_client, 98 | cluster_config=mock_cluster_config, 99 | metrics_responder=mock_metrics_responder 100 | ) 101 | assert mock_metrics_responder.get_timer_emitter.call_count == len(metrics.TIME_METRIC_NAMES) 102 | 103 | 104 | def test_send_kafka_metrics(mock_producer_metrics): 105 | # Test sending a time metrics 106 | metric = next(iter(metrics.TIME_METRIC_NAMES)) 107 | mock_producer_metrics._send_kafka_metrics(metric, 10) 108 | mock_producer_metrics.metrics_responder.record.assert_called_once_with( 109 | mock_producer_metrics. _get_timer(metric), 110 | 10000 111 | ) 112 | 113 | # Create unknown metric timer 114 | mock_producer_metrics._create_timer('unknown_metric') 115 | mock_producer_metrics._send_kafka_metrics('unknown_metric', 10) 116 | assert mock_producer_metrics._get_timer('unknown_metric').record.call_count == 0 117 | 118 | 119 | def test_send_msg_to_kafka_success( 120 | mock_kafka_producer, 121 | mock_kafka_send_messages, 122 | ): 123 | mock_msg = mock.Mock() 124 | mock_kafka_producer.send_messages('test_topic', mock_msg) 125 | mock_kafka_send_messages.assert_called_once_with('test_topic', mock_msg) 126 | 127 | 128 | def test_send_task_to_kafka_failure( 129 | mock_kafka_producer, 130 | mock_metrics_responder, 131 | mock_kafka_send_messages, 132 | ): 133 | mock_msg = mock.Mock() 134 | mock_kafka_send_messages.side_effect = [YelpKafkaError] 135 | 136 | with pytest.raises(YelpKafkaError): 137 | mock_kafka_producer.send_messages('test_topic', mock_msg) 138 | 139 | mock_kafka_send_messages.assert_called_once_with('test_topic', mock_msg) 140 | mock_kafka_producer.metrics.metrics_responder.record.assert_called_once_with( 141 | mock_kafka_producer.metrics.kafka_enqueue_exception_count, 142 | 1 143 | ) 144 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | from __future__ import absolute_import 16 | from __future__ import unicode_literals 17 | 18 | import mock 19 | import pytest 20 | from kafka.common import KafkaUnavailableError 21 | 22 | from yelp_kafka import utils 23 | 24 | 25 | def test_make_scribe_topic(): 26 | expected = 'scribe.datacenter.scribe_stream' 27 | assert expected == utils.make_scribe_topic( 28 | 'scribe_stream', 'datacenter' 29 | ) 30 | 31 | 32 | def test_get_kafka_topics(): 33 | expected = { 34 | 'topic1': [0, 1, 2, 3], 35 | 'topic2': [0, 1] 36 | } 37 | mock_client = mock.Mock() 38 | mock_client.topic_partitions = expected 39 | actual = utils.get_kafka_topics(mock_client) 40 | assert expected == actual 41 | 42 | 43 | def test_get_kafka_topics_recover_from_error(): 44 | expected = { 45 | 'topic1': [0, 1, 2, 3], 46 | 'topic2': [0, 1] 47 | } 48 | mock_client = mock.Mock() 49 | mock_client.topic_partitions = expected 50 | mock_client.load_metadata_for_topics.side_effect = [KafkaUnavailableError(), None] 51 | actual = utils.get_kafka_topics(mock_client) 52 | assert expected == actual 53 | 54 | 55 | def test_get_kafka_topics_error(): 56 | expected = { 57 | 'topic1': [0, 1, 2, 3], 58 | 'topic2': [0, 1] 59 | } 60 | mock_client = mock.Mock() 61 | mock_client.topic_partitions = expected 62 | mock_client.load_metadata_for_topics.side_effect = KafkaUnavailableError('Boom!') 63 | with pytest.raises(KafkaUnavailableError): 64 | utils.get_kafka_topics(mock_client) 65 | 66 | 67 | def test_extract_datacenter(): 68 | topic = "scribe.uswest1-devc.ranger" 69 | datacenter = utils.extract_datacenter(topic) 70 | assert datacenter == "uswest1-devc" 71 | 72 | topic = "scribe.uswest1-devc.mylogfile.log" 73 | datacenter = utils.extract_datacenter(topic) 74 | assert datacenter == "uswest1-devc" 75 | 76 | 77 | def test_extract_datacenter_error(): 78 | topic = "scribeuswest1-devcranger" 79 | with pytest.raises(ValueError): 80 | utils.extract_datacenter(topic) 81 | 82 | topic = "scribe.uswest1-devcranger" 83 | with pytest.raises(ValueError): 84 | utils.extract_datacenter(topic) 85 | 86 | topic = "scribble.uswest1-devc.ranger" 87 | with pytest.raises(ValueError): 88 | utils.extract_datacenter(topic) 89 | 90 | 91 | def test_extract_stream_name(): 92 | topic = "scribe.uswest1-devc.ranger" 93 | stream = utils.extract_stream_name(topic) 94 | assert stream == "ranger" 95 | 96 | topic = "scribe.uswest1-devc.mylogfile.log" 97 | stream = utils.extract_stream_name(topic) 98 | assert stream == "mylogfile.log" 99 | 100 | 101 | def test_extract_stream_name_error(): 102 | topic = "scribeuswest1-devcranger" 103 | with pytest.raises(ValueError): 104 | utils.extract_stream_name(topic) 105 | 106 | topic = "scribe.uswest1-devcranger" 107 | with pytest.raises(ValueError): 108 | utils.extract_stream_name(topic) 109 | 110 | topic = "scribble.uswest1-devc.ranger" 111 | with pytest.raises(ValueError): 112 | utils.extract_stream_name(topic) 113 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py27, py35 3 | indexserver = 4 | default = https://pypi.python.org/simple/ 5 | [testenv] 6 | passenv = SSH_AUTH_SOCK 7 | deps = 8 | pytest==2.9.2 9 | mock 10 | flake8==2.6.2 11 | pre-commit 12 | commands = 13 | pre-commit install -f --install-hooks 14 | py.test -s --ignore tests/integration {posargs} 15 | flake8 . 16 | 17 | [testenv:devenv] 18 | deps = {[testenv]deps} 19 | envdir = virtualenv_run 20 | commands = 21 | 22 | [testenv:devenv-command] 23 | deps = {[testenv:devenv]deps} 24 | envdir = {[testenv:devenv]envdir} 25 | commands = {posargs} 26 | 27 | [testenv:integration] 28 | deps = 29 | pip==8.1.1 30 | pytest==2.9.2 31 | mock 32 | 33 | [testenv:integration-py27] 34 | basepython = python2.7 35 | envdir = /toxintegration-py27 36 | deps = 37 | {[testenv:integration]deps} 38 | commands = 39 | py.test -s tests/integration 40 | 41 | [testenv:integration-py35] 42 | basepython = python3.5 43 | envdir = /toxintegration-py35 44 | deps = 45 | {[testenv:integration]deps} 46 | commands = 47 | py.test -s tests/integration 48 | 49 | [testenv:docker_itest] 50 | deps = 51 | docker-compose==1.5.2 52 | basepython = python2.7 53 | whitelist_externals = /bin/bash 54 | commands = 55 | /bin/bash -c "export KAFKA_VERSION='0.8.2'; \ 56 | docker-compose rm --force && \ 57 | docker-compose build && \ 58 | docker-compose run itest /scripts/run_tests.sh; exit_status=$?; \ 59 | docker-compose stop; exit $exit_status" 60 | /bin/bash -c "export KAFKA_VERSION='0.9.0'; \ 61 | docker-compose rm --force && \ 62 | docker-compose build && \ 63 | docker-compose run itest /scripts/run_tests.sh; exit_status=$?; \ 64 | docker-compose stop; exit $exit_status" 65 | 66 | [testenv:coverage] 67 | deps = 68 | {[testenv]deps} 69 | coverage 70 | commands = 71 | coverage run --source yelp_kafka/ -m pytest --strict --ignore tests/integration {posargs} 72 | coverage report -m 73 | basepython = python2.7 74 | 75 | [testenv:docs] 76 | deps = 77 | {[testenv]deps} 78 | sphinx 79 | sphinx_rtd_theme 80 | changedir = docs 81 | basepython = python2.7 82 | commands = sphinx-build -b html -d build/doctrees source build/html 83 | 84 | [flake8] 85 | exclude = .svn,CVS,.bzr,.hg,.git,__pycache__,.tox,docs,virtualenv_run,.ropeproject,.yelp_kafka_manual_test 86 | ignore = E501 87 | 88 | [pytest] 89 | norecursedirs = .* _darcs CVS docs virtualenv_run 90 | -------------------------------------------------------------------------------- /yelp_kafka/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | from __future__ import absolute_import 16 | from __future__ import unicode_literals 17 | 18 | __version__ = "5.2.2" 19 | version_info = tuple(map(int, __version__.split('.'))) 20 | -------------------------------------------------------------------------------- /yelp_kafka/consumer.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | from __future__ import absolute_import 16 | from __future__ import unicode_literals 17 | 18 | import logging 19 | from collections import namedtuple 20 | from multiprocessing import Event 21 | 22 | import six 23 | from kafka import KafkaClient 24 | from kafka import SimpleConsumer 25 | from kafka.common import KafkaError 26 | from kafka.common import OffsetCommitRequest 27 | from kafka.util import kafka_bytestring 28 | from setproctitle import getproctitle 29 | from setproctitle import setproctitle 30 | 31 | from yelp_kafka.error import ProcessMessageError 32 | 33 | 34 | Message = namedtuple("Message", ["partition", "offset", "key", "value"]) 35 | """Tuple representing a kafka message. 36 | 37 | * **partition**\(``int``): The partition number of the message 38 | * **offset**\(``int``): Message offset 39 | * **key**\(``str``): Message key 40 | * **value**\(``str``): Message value 41 | """ 42 | 43 | 44 | class KafkaSimpleConsumer(object): 45 | """ Base class for consuming from kafka. 46 | Implement the logic to connect to kafka and consume messages. 47 | KafkaSimpleConsumer is a wrapper around kafka-python SimpleConsumer. 48 | KafkaSimpleConsumer relies on it in order to consume messages from kafka. 49 | KafkaSimpleConsumer does not catch exceptions raised by kafka-python. 50 | 51 | An instance of this class can be used as iterator 52 | to consume messages from kafka. 53 | 54 | .. warning:: This class is considered deprecated in favor of 55 | K:py:class:`yelp_kafka.consumer_group.KafkaConsumerGroup`. 56 | 57 | :param topic: topic to consume from. 58 | :type topic: string. 59 | :param config: consumer configuration. 60 | :type config: dict. 61 | :param partitions: topic partitions to consumer from. 62 | :type partitions: list. 63 | """ 64 | 65 | def __init__(self, topic, config, partitions=None): 66 | self.log = logging.getLogger(self.__class__.__name__) 67 | if not isinstance(topic, six.string_types): 68 | raise TypeError("Topic must be a string") 69 | self.topic = kafka_bytestring(topic) 70 | if partitions and not isinstance(partitions, list): 71 | raise TypeError("Partitions must be a list") 72 | self.partitions = partitions 73 | self.kafka_consumer = None 74 | self.config = config 75 | 76 | def connect(self): 77 | """ Connect to kafka and create a consumer. 78 | It uses config parameters to create a kafka-python 79 | KafkaClient and SimpleConsumer. 80 | """ 81 | # Instantiate a kafka client connected to kafka. 82 | self.client = KafkaClient( 83 | self.config.broker_list, 84 | client_id=self.config.client_id 85 | ) 86 | 87 | # Create a kafka SimpleConsumer. 88 | self.kafka_consumer = SimpleConsumer( 89 | client=self.client, topic=self.topic, partitions=self.partitions, 90 | **self.config.get_simple_consumer_args() 91 | ) 92 | self.log.debug( 93 | "Connected to kafka. Topic %s, partitions %s, %s", 94 | self.topic, 95 | self.partitions, 96 | ','.join(['{0} {1}'.format(k, v) for k, v in 97 | six.iteritems(self.config.get_simple_consumer_args())]) 98 | ) 99 | self.kafka_consumer.provide_partition_info() 100 | 101 | def __iter__(self): 102 | for partition, kafka_message in self.kafka_consumer: 103 | yield Message( 104 | partition=partition, 105 | offset=kafka_message[0], 106 | key=kafka_message[1].key, 107 | value=kafka_message[1].value, 108 | ) 109 | 110 | def __enter__(self): 111 | self.connect() 112 | 113 | def __exit__(self, type, value, tb): 114 | self.close() 115 | 116 | def close(self): 117 | """Disconnect from kafka. 118 | If auto_commit is enabled commit offsets before disconnecting. 119 | """ 120 | if self.kafka_consumer.auto_commit is True: 121 | try: 122 | self.commit() 123 | except: 124 | self.log.exception("Commit error. " 125 | "Offsets may not have been committed") 126 | # Close all the connections to kafka brokers. KafkaClient open 127 | # connections to all the partition leaders. 128 | self.client.close() 129 | 130 | def get_message(self, block=True, timeout=0.1): 131 | """Get message from kafka. It supports the same arguments of get_message 132 | in kafka-python SimpleConsumer. 133 | 134 | :param block: If True, the API will block till at least a message is fetched. 135 | :type block: boolean 136 | :param timeout: If block is True, the function will block for the specified 137 | time (in seconds). 138 | If None, it will block forever. 139 | 140 | :returns: a Kafka message 141 | :rtype: Message namedtuple, which consists of: partition number, 142 | offset, key, and message value 143 | """ 144 | fetched_message = self.kafka_consumer.get_message(block, timeout) 145 | if fetched_message is None: 146 | # get message timed out returns None 147 | return None 148 | else: 149 | partition, kafka_message = fetched_message 150 | return Message( 151 | partition=partition, 152 | offset=kafka_message[0], 153 | key=kafka_message[1].key, 154 | value=kafka_message[1].value, 155 | ) 156 | 157 | def commit(self, partitions=None): 158 | """Commit offset for this consumer group 159 | :param partitions: list of partitions to commit, default commits to all 160 | partitions. 161 | :return: True on success, False on failure. 162 | """ 163 | if partitions: 164 | return self.kafka_consumer.commit(partitions) 165 | else: 166 | return self.kafka_consumer.commit() 167 | 168 | def commit_message(self, message): 169 | """Commit the message offset for this consumer group. This function does not 170 | take care of the consumer offset tracking. It should only be used if 171 | auto_commit is disabled and the commit function never called. 172 | 173 | .. note:: all the messages received before message itself will be committed 174 | as consequence. 175 | 176 | :param message: message to commit. 177 | :type message: Message namedtuple, which consists of: partition number, 178 | offset, key, and message value 179 | :return: True on success, False on failure. 180 | """ 181 | reqs = [ 182 | OffsetCommitRequest( 183 | self.topic, 184 | message.partition, 185 | message.offset, 186 | None, 187 | ) 188 | ] 189 | 190 | try: 191 | if self.config.offset_storage in [None, 'zookeeper', 'dual']: 192 | self.client.send_offset_commit_request(self.config.group_id, reqs) 193 | if self.config.offset_storage in ['kafka', 'dual']: 194 | self.client.send_offset_commit_request_kafka(self.config.group_id, reqs) 195 | except KafkaError as e: 196 | self.log.error("%s saving offsets: %s", e.__class__.__name__, e) 197 | return False 198 | else: 199 | return True 200 | 201 | 202 | class KafkaConsumerBase(KafkaSimpleConsumer): 203 | """Kafka Consumer class. Inherit from 204 | :class:`yelp_kafka.consumer.KafkaSimpleConsumer`. 205 | 206 | Convenient base class to implement new kafka consumers with 207 | message processing logic. 208 | .. note: This class is thread safe. 209 | """ 210 | 211 | def __init__(self, topic, config, partitions=None): 212 | super(KafkaConsumerBase, self).__init__(topic, config, partitions) 213 | self.termination_flag = Event() 214 | 215 | def initialize(self): 216 | """Initialize the consumer. 217 | When using in multiprocessing, this function should re-configure 218 | the logger instance (self.log), since it appears to be no longer 219 | working after the fork. 220 | Called only once when the consumer starts, and before connecting to kafka. 221 | 222 | .. note: implement in subclass. 223 | """ 224 | pass 225 | 226 | def dispose(self): 227 | """Called after offsets commit and kafka connection termination. 228 | It is executed just before exiting the consumer loop. 229 | 230 | .. note: implement in subclass. 231 | """ 232 | pass 233 | 234 | def process(self, message): 235 | """Process a messages. 236 | 237 | .. note: implement in subclass. 238 | 239 | :param message: message to process 240 | :type message: Message 241 | """ 242 | pass 243 | 244 | def terminate(self): 245 | """Terminate the consumer. 246 | Set a termination variable. The consumer is terminated as soon 247 | as it receives the next message are the iter_timeout expires. 248 | """ 249 | self.termination_flag.set() 250 | 251 | def set_process_name(self): 252 | """Setup process name for consumer to include topic and 253 | partitions to improve debuggability. 254 | """ 255 | process_name = '%s-%s-%s' % (getproctitle(), self.topic.decode(), self.partitions) 256 | setproctitle(process_name) 257 | 258 | def run(self): 259 | """Fetch and process messages from kafka. 260 | Non returning function. It initialize the consumer, connect to kafka 261 | and start processing messages. 262 | 263 | :raises: MessageProcessError when the process function fails 264 | """ 265 | # Setup process name for debuggability 266 | self.set_process_name() 267 | 268 | self.initialize() 269 | try: 270 | # We explicitly catch and log the exception. 271 | self.connect() 272 | except: 273 | self.log.exception( 274 | "Consumer topic %s, partition %s, config %s:" 275 | " failed connecting to kafka", 276 | self.topic, 277 | self.partitions, 278 | self.config 279 | ) 280 | raise 281 | while not self.termination_flag.is_set(): 282 | for message in self: 283 | try: 284 | self.process(message) 285 | except: 286 | self.log.exception("Error processing message: %s", message) 287 | raise ProcessMessageError( 288 | "Error processing message: %s", 289 | message, 290 | ) 291 | # Early termination in the event we are stuck in infinite message iteration 292 | # from kafka-python consumer: https://github.com/dpkp/kafka-python/blob/ 293 | # 99d4a3a8b1dbae514b1c6d367908010b65fc8d0c/kafka/consumer/simple.py#L348 294 | if self.termination_flag.is_set(): 295 | break 296 | self._terminate() 297 | 298 | def _terminate(self): 299 | """Commit offsets and terminate the consumer. 300 | """ 301 | self.log.info("Terminating consumer topic %s ", self.topic) 302 | self.commit() 303 | self.client.close() 304 | self.dispose() 305 | -------------------------------------------------------------------------------- /yelp_kafka/error.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | from __future__ import absolute_import 16 | from __future__ import unicode_literals 17 | 18 | 19 | class YelpKafkaError(Exception): 20 | """Base class for yelp_kafka errors.""" 21 | pass 22 | 23 | 24 | class DiscoveryError(YelpKafkaError): 25 | """Errors while using discovery functions.""" 26 | pass 27 | 28 | 29 | class ConsumerError(YelpKafkaError): 30 | """Error in consumer.""" 31 | pass 32 | 33 | 34 | class ConfigurationError(YelpKafkaError): 35 | """Error in configuration. For example. Missing configuration file 36 | or misformatted configuration.""" 37 | pass 38 | 39 | 40 | class InvalidOffsetStorageError(YelpKafkaError): 41 | """Unknown source of offsets.""" 42 | pass 43 | 44 | 45 | class ProcessMessageError(YelpKafkaError): 46 | """Error processing a message from kafka.""" 47 | pass 48 | 49 | 50 | class ConsumerGroupError(YelpKafkaError): 51 | """Error in the consumer group""" 52 | pass 53 | 54 | 55 | class PartitionerError(YelpKafkaError): 56 | """Error in the partitioner""" 57 | pass 58 | 59 | 60 | class PartitionerZookeeperError(YelpKafkaError): 61 | """Error in partitioner communication with Zookeeper""" 62 | pass 63 | 64 | 65 | class UnknownTopic(YelpKafkaError): 66 | pass 67 | 68 | 69 | class UnknownPartitions(YelpKafkaError): 70 | pass 71 | 72 | 73 | class OffsetCommitError(YelpKafkaError): 74 | 75 | def __init__(self, topic, partition, error): 76 | self.topic = topic 77 | self.partition = partition 78 | self.error = error 79 | 80 | def __eq__(self, other): 81 | if all([ 82 | self.topic == other.topic, 83 | self.partition == other.partition, 84 | self.error == other.error, 85 | ]): 86 | return True 87 | return False 88 | 89 | 90 | class InvalidClusterTypeOrRegionError(YelpKafkaError): 91 | pass 92 | 93 | 94 | class InvalidClusterTypeOrNameError(YelpKafkaError): 95 | pass 96 | 97 | 98 | class InvalidClusterTypeOrSuperregionError(YelpKafkaError): 99 | pass 100 | 101 | 102 | class InvalidClusterType(YelpKafkaError): 103 | pass 104 | 105 | 106 | class InvalidLogOrRegionError(YelpKafkaError): 107 | pass 108 | 109 | 110 | class InvalidLogOrSuperregionError(YelpKafkaError): 111 | pass 112 | -------------------------------------------------------------------------------- /yelp_kafka/metrics.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | from __future__ import absolute_import 16 | from __future__ import unicode_literals 17 | 18 | PRODUCE_EXCEPTION_COUNT = 'produce_exception_count' 19 | 20 | TIME_METRIC_NAMES = set([ 21 | 'metadata_request_timer', 22 | 'produce_request_timer', 23 | 'fetch_request_timer', 24 | 'offset_request_timer', 25 | 'offset_commit_request_timer', 26 | 'offset_fetch_request_timer', 27 | 'offset_fetch_request_timer_kafka', 28 | 'consumer_metadata_request_timer', 29 | 'offset_commit_request_timer_kafka', 30 | ]) 31 | 32 | FAILURE_COUNT_METRIC_NAMES = set([ 33 | 'failed_paylads_count', 34 | 'out_of_range_counts', 35 | 'not_leader_for_partition_count', 36 | 'request_timed_out_count' 37 | ]) 38 | -------------------------------------------------------------------------------- /yelp_kafka/metrics_responder.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | from __future__ import absolute_import 16 | from __future__ import unicode_literals 17 | 18 | import abc 19 | import logging 20 | 21 | 22 | class MetricsResponder(object): 23 | """ 24 | Used for publishing metrics with a metric responder instance 25 | """ 26 | 27 | def __init__(self): 28 | self.log = logging.getLogger(self.__class__.__name__) 29 | 30 | @abc.abstractmethod 31 | def get_counter_emitter(self, metric, default_dimensions=None): 32 | """ 33 | Creates an instance for counting/counter a specific metric 34 | 35 | :param metric: the name of the metric 36 | :param default_dimensions: the extra dimensions provided for the metric 37 | :return: an instance of responder for recording counter based metrics 38 | """ 39 | 40 | raise NotImplementedError 41 | 42 | @abc.abstractmethod 43 | def get_timer_emitter(self, metric, default_dimensions=None): 44 | """ 45 | Creates and returns an instance for recording time elapsed 46 | for a specific metric 47 | 48 | :param metric: the name of the metric 49 | :param default_dimensions: the extra dimensions provided for the metric 50 | :return: an instance of responder for recording timer based metrics 51 | """ 52 | 53 | raise NotImplementedError 54 | 55 | @abc.abstractmethod 56 | def record(self, registered_reporter, value, timestamp=None): 57 | """ 58 | Used to record metrics for the registered reporter 59 | 60 | :param registered_reporter: The instance of the reporter 61 | :param value: The value to be recorded 62 | :param timestamp: The timestamp when the metric is recorded 63 | """ 64 | 65 | raise NotImplementedError 66 | -------------------------------------------------------------------------------- /yelp_kafka/monitoring.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | from __future__ import absolute_import 16 | from __future__ import unicode_literals 17 | 18 | import logging 19 | from collections import namedtuple 20 | 21 | import six 22 | from kafka.common import KafkaUnavailableError 23 | 24 | from yelp_kafka.offsets import get_current_consumer_offsets 25 | from yelp_kafka.offsets import get_topics_watermarks 26 | 27 | 28 | log = logging.getLogger(__name__) 29 | 30 | ConsumerPartitionOffsets = namedtuple( 31 | 'ConsumerPartitionOffsets', 32 | ['topic', 'partition', 'current', 'highmark', 'lowmark'] 33 | ) 34 | """Tuple representing the consumer offsets for a topic partition. 35 | 36 | * **topic**\(``str``): Name of the topic 37 | * **partition**\(``int``): Partition number 38 | * **current**\(``int``): current group offset 39 | * **highmark**\(``int``): high watermark 40 | * **lowmark**\(``int``): low watermark 41 | """ 42 | 43 | 44 | def get_consumer_offsets_metadata( 45 | kafka_client, 46 | group, 47 | topics, 48 | raise_on_error=True, 49 | offset_storage='zookeeper', 50 | ): 51 | """This method: 52 | * refreshes metadata for the kafka client 53 | * fetches group offsets 54 | * fetches watermarks 55 | 56 | :param kafka_client: KafkaClient instance 57 | :param group: group id 58 | :param topics: list of topics 59 | :param raise_on_error: if False the method ignores missing topics and 60 | missing partitions. It still may fail on the request send. 61 | :param offset_storage: String, one of {zookeeper, kafka}. 62 | :returns: dict : [ConsumerPartitionOffsets] 63 | """ 64 | 65 | # Refresh client metadata. We do now use the topic list, because we 66 | # don't want to accidentally create the topic if it does not exist. 67 | # If Kafka is unavailable, let's retry loading client metadata (YELPKAFKA-30) 68 | try: 69 | kafka_client.load_metadata_for_topics() 70 | except KafkaUnavailableError: 71 | kafka_client.load_metadata_for_topics() 72 | 73 | group_offsets = get_current_consumer_offsets( 74 | kafka_client, group, topics, raise_on_error, offset_storage 75 | ) 76 | 77 | watermarks = get_topics_watermarks( 78 | kafka_client, topics, raise_on_error 79 | ) 80 | 81 | result = {} 82 | for topic, partitions in six.iteritems(group_offsets): 83 | result[topic] = [ 84 | ConsumerPartitionOffsets( 85 | topic=topic, 86 | partition=partition, 87 | current=group_offsets[topic][partition], 88 | highmark=watermarks[topic][partition].highmark, 89 | lowmark=watermarks[topic][partition].lowmark, 90 | ) for partition in partitions 91 | ] 92 | return result 93 | 94 | 95 | def topics_offset_distance( 96 | kafka_client, 97 | group, 98 | topics, 99 | offset_storage='zookeeper', 100 | ): 101 | """Get the distance a group_id is from the current latest offset 102 | for topics. 103 | 104 | If the group is unkown to kafka it's assumed to be an offset 0. All other 105 | errors will not be caught. 106 | 107 | This method force the client to use fresh metadata by calling 108 | kafka_client.load_metadata_for_topics(topics) before getting 109 | the group offsets. 110 | 111 | :param kafka_client: KafkaClient instance 112 | :param group: consumer group id 113 | :param topics: topics list or dict : <[partitions]> 114 | :param offset_storage: String, one of {zookeeper, kafka}. 115 | :returns: dict : {: } 116 | """ 117 | 118 | distance = {} 119 | for topic, offsets in six.iteritems(get_consumer_offsets_metadata( 120 | kafka_client, 121 | group, 122 | topics, 123 | offset_storage, 124 | )): 125 | distance[topic] = dict([ 126 | (offset.partition, offset.highmark - offset.current) 127 | for offset in offsets 128 | ]) 129 | return distance 130 | 131 | 132 | def offset_distance( 133 | kafka_client, 134 | group, 135 | topic, 136 | partitions=None, 137 | offset_storage='zookeeper', 138 | ): 139 | """Get the distance a group_id is from the current latest in a topic. 140 | 141 | If the group is unknown to kafka, it's assumed to be on offset 0. All other 142 | errors will not be caught. Be prepared for KafkaUnavailableError and its 143 | ilk. 144 | 145 | This method force the client to use fresh metadata by calling 146 | kafka_client.load_metadata_for_topics(topics) before getting 147 | the group offsets. 148 | 149 | :param kafka_client: KafkaClient instance 150 | :param group: consumer group id 151 | :param topic: topic name 152 | :partitions: partitions list 153 | :param offset_storage: String, one of {zookeeper, kafka}. 154 | :returns: dict : 155 | """ 156 | 157 | if partitions: 158 | topics = {topic: partitions} 159 | else: 160 | topics = [topic] 161 | consumer_offsets = get_consumer_offsets_metadata( 162 | kafka_client, 163 | group, 164 | topics, 165 | offset_storage, 166 | ) 167 | return dict( 168 | [(offset.partition, offset.highmark - offset.current) 169 | for offset in consumer_offsets[topic]] 170 | ) 171 | -------------------------------------------------------------------------------- /yelp_kafka/partitioner.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | from __future__ import absolute_import 16 | from __future__ import unicode_literals 17 | 18 | import copy 19 | import hashlib 20 | import logging 21 | import time 22 | import traceback 23 | from collections import defaultdict 24 | 25 | from kafka.client import KafkaClient 26 | from kafka.util import kafka_bytestring 27 | from kazoo.client import KazooClient 28 | from kazoo.protocol.states import KazooState 29 | from kazoo.recipe.partitioner import PartitionState 30 | from kazoo.retry import KazooRetry 31 | 32 | from yelp_kafka.error import PartitionerError 33 | from yelp_kafka.error import PartitionerZookeeperError 34 | from yelp_kafka.utils import get_kafka_topics 35 | 36 | MAX_START_TIME_SECS = 300 37 | # The java kafka api updates every 600s by default. We update the 38 | # number of partitions every 120 seconds. 39 | PARTITIONS_REFRESH_TIMEOUT = 120 40 | 41 | # Define the connection retry policy for kazoo in case of flaky 42 | # zookeeper connections. This ensures we don't keep indefinitely 43 | # trying to connect and masking failures from the application. 44 | KAZOO_RETRY_DEFAULTS = { 45 | 'max_tries': 10, 46 | 'delay': 0.1, 47 | 'backoff': 2, 48 | 'max_jitter': 0.8, 49 | 'max_delay': 60, 50 | } 51 | 52 | 53 | def build_zk_group_path(group_path, topics): 54 | return "{group_path}/{sha}".format( 55 | group_path=group_path, 56 | sha=hashlib.sha1(repr(sorted(topics)).encode()).hexdigest(), 57 | ) 58 | 59 | 60 | class Partitioner(object): 61 | """Partitioner is used to handle distributed a set of 62 | topics/partitions among a group of consumers. 63 | 64 | :param topics: kafka topics 65 | :type topics: list 66 | :param acquire: function to be called when a set of partitions 67 | has been acquired. It should usually allocate the consumers. 68 | :param release: function to be called when the acquired 69 | partitions have to be release. It should usually stops the consumers. 70 | 71 | """ 72 | 73 | def __init__(self, config, topics, acquire, release): 74 | self.log = logging.getLogger(self.__class__.__name__) 75 | self.config = config 76 | # Clients 77 | self.kazoo_client = None 78 | self.kafka_client = None 79 | self.topics = topics 80 | self.acquired_partitions = defaultdict(list) 81 | self.partitions_set = set() 82 | # User callbacks 83 | self.acquire = acquire 84 | self.release = release 85 | # We guarantee that the user defined release function call follows 86 | # always the acquire. release function will never be called twice in a 87 | # row. Initialize to true because no partitions have been acquired at 88 | # startup. 89 | self.released_flag = True 90 | # Kafka metadata refresh 91 | self.force_partitions_refresh = True 92 | self.last_partitions_refresh = 0 93 | # Kazoo partitioner 94 | self._partitioner = None 95 | # Map Kazoo partitioner state to actions 96 | self.actions = { 97 | PartitionState.ALLOCATING: self._allocating, 98 | PartitionState.ACQUIRED: self._acquire, 99 | PartitionState.RELEASE: self._release, 100 | PartitionState.FAILURE: self._fail 101 | } 102 | 103 | self.kazoo_retry = None 104 | self.zk_group_path = build_zk_group_path( 105 | self.config.group_path, 106 | self.topics, 107 | ) if self.config.use_group_sha else self.config.group_path 108 | 109 | def start(self): 110 | """Create a new group and wait until the partitions have been 111 | acquired. This function should never be called twice. 112 | 113 | :raises: PartitionerError upon partitioner failures 114 | 115 | .. note: This is a blocking operation. 116 | """ 117 | self.kazoo_retry = KazooRetry(**KAZOO_RETRY_DEFAULTS) 118 | self.kazoo_client = KazooClient( 119 | self.config.zookeeper, 120 | connection_retry=self.kazoo_retry, 121 | ) 122 | self.kafka_client = KafkaClient(self.config.broker_list) 123 | 124 | self.log.debug("Starting a new group for topics %s", self.topics) 125 | self.released_flag = True 126 | self._refresh() 127 | 128 | def __enter__(self): 129 | self.start() 130 | 131 | def __exit__(self, exc_type, exc_value, traceback): 132 | self.stop() 133 | 134 | def stop(self): 135 | """Leave the group and release the partitions.""" 136 | self.log.debug("Stopping group for topics %s", self.topics) 137 | self.release_and_finish() 138 | self._close_connections() 139 | 140 | def refresh(self): 141 | """Rebalance upon group changes, such as when a consumer 142 | joins/leaves the group, the partitions for a topics change, or the 143 | partitioner itself fails (connection to zookeeper lost). 144 | This method should be called periodically to make sure that the 145 | group is in sync. 146 | 147 | :raises: PartitionerError upon partitioner failures 148 | """ 149 | self.log.debug("Refresh group for topics %s", self.topics) 150 | self._refresh() 151 | 152 | def _refresh(self): 153 | while True: 154 | partitioner = self._get_partitioner() 155 | self._handle_group(partitioner) 156 | if self.acquired_partitions: 157 | break 158 | 159 | def need_partitions_refresh(self): 160 | return (self.force_partitions_refresh or 161 | self.last_partitions_refresh < 162 | time.time() - PARTITIONS_REFRESH_TIMEOUT) 163 | 164 | def _get_partitioner(self): 165 | """Get an instance of the partitioner. When the partitions set changes 166 | we need to destroy the partitioner and create another one. 167 | If the partitioner does not exist yet, create a new partitioner. 168 | If the partitions set changed, destroy the partitioner and create a new 169 | partitioner. Different consumer will eventually use 170 | the same partitions set. 171 | 172 | :param partitions: the partitions set to use for partitioner. 173 | :type partitions: set 174 | """ 175 | if self.need_partitions_refresh() or not self._partitioner: 176 | try: 177 | partitions = self.get_partitions_set() 178 | except Exception: 179 | self.log.exception( 180 | "Failed to get partitions set from Kafka." 181 | "Releasing the group." 182 | ) 183 | self.release_and_finish() 184 | raise PartitionerError( 185 | "Failed to get partitions set from Kafka", 186 | ) 187 | self.force_partitions_refresh = False 188 | self.last_partitions_refresh = time.time() 189 | if partitions != self.partitions_set: 190 | # If partitions changed we release the consumers, destroy the 191 | # partitioner and disconnect from zookeeper. 192 | self.log.info( 193 | "Partitions set changed. New partitions: %s. " 194 | "Old partitions %s. Rebalancing...", 195 | [p for p in partitions if p not in self.partitions_set], 196 | [p for p in self.partitions_set if p not in partitions] 197 | ) 198 | # We need to destroy the existing partitioner before creating 199 | # a new one. 200 | self.release_and_finish() 201 | self._partitioner = self._create_partitioner(partitions) 202 | self.partitions_set = partitions 203 | return self._partitioner 204 | 205 | def _create_partitioner(self, partitions): 206 | """Connect to zookeeper and create a partitioner""" 207 | if self.kazoo_client.state != KazooState.CONNECTED: 208 | try: 209 | self.kazoo_client.start() 210 | except Exception: 211 | self.log.exception("Impossible to connect to zookeeper") 212 | self.release_and_finish() 213 | raise PartitionerError("Zookeeper connection failure") 214 | 215 | self.log.debug( 216 | "Creating partitioner for group %s, topic %s," 217 | " partitions set %s", self.config.group_id, 218 | self.topics, 219 | partitions 220 | ) 221 | return self.kazoo_client.SetPartitioner( 222 | path=self.zk_group_path, 223 | set=partitions, 224 | time_boundary=self.config.partitioner_cooldown, 225 | ) 226 | 227 | def release_and_finish(self): 228 | """Release consumers and terminate the partitioner""" 229 | if self._partitioner: 230 | self._release(self._partitioner) 231 | self._partitioner.finish() 232 | self._partitioner = None 233 | 234 | def _close_connections(self): 235 | self.kafka_client.close() 236 | self.partitions_set = set() 237 | self.last_partitions_refresh = 0 238 | self.kazoo_client.stop() 239 | self.kazoo_client.close() 240 | self.kazoo_retry = None 241 | 242 | def _handle_group(self, partitioner): 243 | """Handle group status changes, for example when a new 244 | consumer joins or leaves the group. 245 | """ 246 | if partitioner: 247 | try: 248 | self.actions[partitioner.state](partitioner) 249 | except KeyError: 250 | self.log.exception("Unexpected partitioner state.") 251 | self.release_and_finish() 252 | raise PartitionerError("Invalid partitioner state %s" % 253 | partitioner.state) 254 | 255 | def _allocating(self, partitioner): 256 | """Usually we don't want to do anything but waiting in 257 | allocating state. 258 | """ 259 | partitioner.wait_for_acquire() 260 | 261 | def _acquire(self, partitioner): 262 | """Acquire kafka topics-[partitions] and start the 263 | consumers for them. 264 | """ 265 | acquired_partitions = self._get_acquired_partitions(partitioner) 266 | if acquired_partitions != self.acquired_partitions: 267 | # TODO: Decrease logging level 268 | self.log.info( 269 | "Total number of acquired partitions = %s" 270 | "It was %s before. Added partitions %s. Removed partitions %s", 271 | len(acquired_partitions), 272 | len(self.acquired_partitions), 273 | [ 274 | p for p in acquired_partitions 275 | if p not in self.acquired_partitions 276 | ], 277 | [ 278 | p for p in self.acquired_partitions 279 | if p not in acquired_partitions 280 | ], 281 | ) 282 | self.acquired_partitions = acquired_partitions 283 | try: 284 | self.acquire(copy.deepcopy(self.acquired_partitions)) 285 | self.released_flag = False 286 | except Exception: 287 | self.log.exception("Acquire action failed.") 288 | trace = traceback.format_exc() 289 | self.release_and_finish() 290 | raise PartitionerError( 291 | "Acquire action failed." 292 | "Acquire error: {trace}".format(trace=trace) 293 | ) 294 | 295 | def _release(self, partitioner): 296 | """Release the consumers and acquired partitions. 297 | This function is executed either at termination time or 298 | whenever there is a group change. 299 | """ 300 | self.log.debug("Releasing partitions") 301 | try: 302 | if not self.released_flag: 303 | self.release(self.acquired_partitions) 304 | self.released_flag = True 305 | except Exception: 306 | trace = traceback.format_exc() 307 | self.log.exception("Release action failed.") 308 | raise PartitionerError( 309 | "Release action failed." 310 | "Release error: {trace}".format(trace=trace), 311 | ) 312 | partitioner.release_set() 313 | self.acquired_partitions.clear() 314 | self.force_partitions_refresh = True 315 | 316 | def _fail(self, partitioner): 317 | """Handle zookeeper failures. 318 | Executed when the consumer group is not able to recover 319 | the connection. In this case, we cowardly stop 320 | the running consumers. 321 | """ 322 | self.log.error("Lost or unable to acquire partitions") 323 | self.release_and_finish() 324 | raise PartitionerZookeeperError( 325 | "Internal partitioner error. " 326 | "Lost connection to zookeeper: {cluster}".format( 327 | cluster=self.config.zookeeper, 328 | ) 329 | ) 330 | 331 | def _get_acquired_partitions(self, partitioner): 332 | """Retrieve acquired partitions from a partitioner. 333 | 334 | :returns: acquired topic and partitions 335 | :rtype: dict {: <[partitions]>} 336 | """ 337 | acquired_partitions = defaultdict(list) 338 | for partition in partitioner: 339 | topic, partition_id = partition.rsplit('-', 1) 340 | acquired_partitions[topic].append(int(partition_id)) 341 | return acquired_partitions 342 | 343 | def get_partitions_set(self): 344 | """ Load partitions metadata from kafka and create 345 | a set containing "-" 346 | 347 | :returns: partitions for user topics 348 | :rtype: set 349 | :raises PartitionerError: if no partitions have been found 350 | """ 351 | topic_partitions = get_kafka_topics(self.kafka_client) 352 | partitions = [] 353 | missing_topics = set() 354 | for topic in self.topics: 355 | kafka_topic = kafka_bytestring(topic) 356 | if kafka_topic not in topic_partitions: 357 | missing_topics.add(topic) 358 | else: 359 | partitions += ["{0}-{1}".format(topic, p) 360 | for p in topic_partitions[kafka_topic]] 361 | if missing_topics: 362 | self.log.info("Missing topics: %s", missing_topics) 363 | if not partitions: 364 | self.release_and_finish() 365 | raise PartitionerError( 366 | "No partitions found for topics: {topics}".format( 367 | topics=self.topics 368 | ) 369 | ) 370 | return set(partitions) 371 | -------------------------------------------------------------------------------- /yelp_kafka/producer.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | from __future__ import absolute_import 16 | from __future__ import unicode_literals 17 | 18 | import logging 19 | 20 | from kafka import KeyedProducer 21 | from kafka import SimpleProducer 22 | from kafka.common import KafkaError 23 | from py_zipkin.zipkin import zipkin_span 24 | 25 | from yelp_kafka import metrics 26 | from yelp_kafka.error import YelpKafkaError 27 | from yelp_kafka.metrics_responder import MetricsResponder 28 | from yelp_kafka.utils import get_default_responder_if_available 29 | METRIC_PREFIX = 'yelp_kafka.YelpKafkaProducer.' 30 | 31 | 32 | class YelpKafkaProducerMetrics(object): 33 | """Used to setup and report producer metrics 34 | 35 | :param cluster_config: producer cluster configuration 36 | :type cluster_config: config.ClusterConfig 37 | :param client: Kafka client for which metrics are to be reported 38 | :type client: kafka.KafkaClient 39 | :param metrics_responder: A metric responder to report metrics 40 | :type metrics_responder: class which implements metric_responder.MetricsResponder 41 | """ 42 | 43 | def __init__( 44 | self, 45 | cluster_config, 46 | client, 47 | metrics_responder=None 48 | ): 49 | self.log = logging.getLogger(self.__class__.__name__) 50 | self.cluster_config = cluster_config 51 | self.client = client 52 | self.timers = {} 53 | self.metrics_responder = metrics_responder 54 | if metrics_responder: 55 | self.setup_metrics() 56 | 57 | def get_kafka_dimensions(self): 58 | return { 59 | 'client_id': self.client.client_id, 60 | 'cluster_type': self.cluster_config.type, 61 | 'cluster_name': self.cluster_config.name, 62 | } 63 | 64 | def setup_metrics(self): 65 | self.client.metrics_responder = self._send_kafka_metrics 66 | kafka_dimensions = self.get_kafka_dimensions() 67 | self.kafka_enqueue_exception_count = self.metrics_responder.get_counter_emitter( 68 | METRIC_PREFIX + metrics.PRODUCE_EXCEPTION_COUNT, 69 | kafka_dimensions 70 | ) 71 | for name in metrics.TIME_METRIC_NAMES: 72 | self._create_timer(name, kafka_dimensions) 73 | 74 | def _send_kafka_metrics(self, key, value): 75 | if key in metrics.TIME_METRIC_NAMES: 76 | # kafka-python emits time in seconds, but yelp_meteorite wants 77 | # milliseconds 78 | time_in_ms = value * 1000 79 | self.metrics_responder.record(self._get_timer(key), time_in_ms) 80 | else: 81 | self.log.warn("Unknown metric: {0}".format(key)) 82 | 83 | def _create_timer(self, name, dimensions=None): 84 | if dimensions is None: 85 | dimensions = {} 86 | new_name = METRIC_PREFIX + name 87 | self.timers[new_name] = self.metrics_responder.get_timer_emitter( 88 | new_name, 89 | default_dimensions=dimensions 90 | ) 91 | 92 | def _get_timer(self, name): 93 | return self.timers[METRIC_PREFIX + name] 94 | 95 | 96 | class YelpKafkaSimpleProducer(SimpleProducer): 97 | """ YelpKafkaSimpleProducer is an extension of the kafka SimpleProducer that 98 | reports metrics about the producer to yelp_meteorite. These metrics include 99 | enqueue latency for both success and failure to send and the number of exceptions 100 | encountered trying to send. 101 | 102 | If metrics reporting isn't required for your producer, specify report_metrics=False. 103 | We highly recommend reporting metrics for monitoring purposes e.g. production latency. 104 | 105 | Note: This producer expects usage of kafka-python==0.9.4.post2 where metrics_responder 106 | is implemented in KafkaClient 107 | 108 | :param cluster_config: producer cluster configuration 109 | :type cluster_config: config.ClusterConfig 110 | :param report_metrics: whether or not to report kafka production metrics. Defaults to True 111 | :type report_metrics: bool 112 | :param metrics_responder: A metric responder to report metrics, defaults to 113 | use :py:class:`yelp_kafka.yelp_metrics_responder.MeteoriteMetricsResponder`, if 114 | the import of yelp_meteorite is successful. Please note, this is only active if 115 | report_metrics is True. 116 | :type metrics_responder: class which implements metric_responder.MetricsResponder 117 | 118 | Additionally all kafka.SimpleProducer params are usable here. See `_SimpleProducer`_. 119 | 120 | .. _SimpleProducer: http://kafka-python.readthedocs.org/en/v0.9.5/apidoc/kafka.producer.html 121 | """ 122 | 123 | def __init__( 124 | self, 125 | cluster_config=None, 126 | report_metrics=True, 127 | metrics_responder=None, 128 | *args, **kwargs 129 | ): 130 | super(YelpKafkaSimpleProducer, self).__init__(*args, **kwargs) 131 | 132 | if report_metrics: 133 | self.metrics_responder = metrics_responder or get_default_responder_if_available() 134 | assert not metrics_responder or isinstance(metrics_responder, MetricsResponder), \ 135 | "Metric Reporter is not of type yelp_kafka.metrics_responder.MetricsResponder" 136 | else: 137 | self.metrics_responder = None 138 | 139 | self.metrics = YelpKafkaProducerMetrics( 140 | cluster_config=cluster_config, 141 | client=self.client, 142 | metrics_responder=metrics_responder 143 | ) 144 | 145 | @zipkin_span(service_name='yelp_kafka', span_name='send_messages_simple_producer') 146 | def send_messages(self, topic, *msg): 147 | try: 148 | super(YelpKafkaSimpleProducer, self).send_messages(topic, *msg) 149 | except (YelpKafkaError, KafkaError): 150 | if self.metrics.metrics_responder: 151 | self.metrics.metrics_responder.record(self.metrics.kafka_enqueue_exception_count, 1) 152 | raise 153 | 154 | 155 | class YelpKafkaKeyedProducer(KeyedProducer): 156 | """ YelpKafkaKeyedProducer is an extension of the kafka KeyedProducer that 157 | reports metrics about the producer to yelp_meteorite. 158 | 159 | Usage is the same as YelpKafkaSimpleProducer 160 | 161 | :param cluster_config: producer cluster configuration 162 | :type cluster_config: config.ClusterConfig 163 | :param report_metrics: whether or not to report kafka production metrics. Defaults to True 164 | :type report_metrics: bool 165 | :param metrics_responder: A metric responder to report metrics, defaults to 166 | use :py:class:`yelp_kafka.yelp_metrics_responder.MeteoriteMetricsResponder`, if 167 | the import of yelp_meteorite is successful. Please note, this is only active if 168 | report_metrics is True. 169 | :type metrics_responder: class which implements metric_responder.MetricsResponder 170 | 171 | Additionally all kafka.KeyedProducer params are usable here. See `_KeyedProducer`_. 172 | 173 | .. _KeyedProducer: http://kafka-python.readthedocs.org/en/v0.9.5/apidoc/kafka.producer.html 174 | """ 175 | 176 | def __init__( 177 | self, 178 | cluster_config=None, 179 | report_metrics=True, 180 | metrics_responder=None, 181 | *args, 182 | **kwargs 183 | ): 184 | super(YelpKafkaKeyedProducer, self).__init__(*args, **kwargs) 185 | 186 | if report_metrics: 187 | self.metrics_responder = metrics_responder or get_default_responder_if_available() 188 | assert not metrics_responder or isinstance(metrics_responder, MetricsResponder), \ 189 | "Metric Reporter is not of type yelp_kafka.metrics_responder.MetricsResponder" 190 | else: 191 | self.metrics_responder = None 192 | 193 | self.metrics = YelpKafkaProducerMetrics( 194 | cluster_config, 195 | self.client, 196 | metrics_responder 197 | ) 198 | 199 | @zipkin_span(service_name='yelp_kafka', span_name='send_messages_keyed_producer') 200 | def send_messages(self, topic, key, *msg): 201 | try: 202 | super(YelpKafkaKeyedProducer, self).send_messages(topic, key, *msg) 203 | except (YelpKafkaError, KafkaError): 204 | if self.metrics.metrics_responder: 205 | self.metrics.metrics_responder.record(self.metrics.kafka_enqueue_exception_count, 1) 206 | raise 207 | -------------------------------------------------------------------------------- /yelp_kafka/testing/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | from __future__ import absolute_import 16 | from __future__ import unicode_literals 17 | -------------------------------------------------------------------------------- /yelp_kafka/testing/kafka_mock.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | from __future__ import absolute_import 16 | from __future__ import unicode_literals 17 | 18 | import contextlib 19 | from collections import namedtuple 20 | 21 | import kafka 22 | import mock 23 | 24 | import yelp_kafka 25 | 26 | KafkaMocks = namedtuple( 27 | 'KafkaMocks', 28 | [ 29 | 'KafkaClient', 30 | 'SimpleProducer', 31 | 'KeyedProducer', 32 | 'SimpleConsumer', 33 | 'KafkaSimpleConsumer', 34 | ], 35 | ) 36 | 37 | 38 | class Registrar(object): 39 | 40 | def __init__(self): 41 | self.topic_registry = {} 42 | 43 | def mock_producer_with_registry(self): 44 | 45 | class MockProducer(object): 46 | ACK_AFTER_CLUSTER_COMMIT = 2 47 | 48 | def __init__( 49 | inner_self, 50 | client, 51 | async=False, 52 | req_acks=1, 53 | ack_timeout=1000, 54 | codec=None, 55 | batch_send=False, 56 | batch_send_every_n=20, 57 | batch_send_every_t=20, 58 | random_start=False 59 | ): 60 | inner_self._client = client 61 | 62 | def send_messages(inner_self, topic, *messages): 63 | # inner_self so we can address the parent object Registrar 64 | # with self, thus accessing global test state. 65 | topic_list = self.topic_registry.setdefault(topic, []) 66 | current_offset = len(topic_list) 67 | new_messages = [ 68 | kafka.common.OffsetAndMessage( 69 | offset=i + current_offset, 70 | message=kafka.common.Message( 71 | magic=0, 72 | attributes=0, 73 | key=None, 74 | value=msg, 75 | ) 76 | ) for i, msg in enumerate(messages)] 77 | topic_list.extend(new_messages) 78 | 79 | return MockProducer 80 | 81 | def mock_keyed_producer_with_registry(self): 82 | class MockKeyedProducer(object): 83 | ACK_AFTER_CLUSTER_COMMIT = 2 84 | 85 | def __init__( 86 | inner_self, 87 | client, 88 | async=False, 89 | req_acks=1, 90 | ack_timeout=1000, 91 | codec=None, 92 | batch_send=False, 93 | batch_send_every_n=20, 94 | batch_send_every_t=20, 95 | random_start=False 96 | ): 97 | inner_self._client = client 98 | self._client = client 99 | 100 | def send_messages(inner_self, topic, key, *messages): 101 | # XXX: By default, don't support multiple partitions in the 102 | # mock. If we need testing support for this, add it later. 103 | 104 | # inner_self so we can address the parent object Registrar 105 | # with self, thus accessing global test state. 106 | topic_list = self.topic_registry.setdefault(topic, []) 107 | current_offset = len(topic_list) 108 | new_messages = [ 109 | kafka.common.OffsetAndMessage( 110 | offset=i + current_offset, 111 | message=kafka.common.Message( 112 | magic=0, 113 | attributes=0, 114 | key=key, 115 | value=msg, 116 | ) 117 | ) for i, msg in enumerate(messages)] 118 | topic_list.extend(new_messages) 119 | 120 | return MockKeyedProducer 121 | 122 | def mock_simple_consumer_with_registrar(self): 123 | class MockSimpleConsumer(object): 124 | """I personally don't need this to be super hardcore, but anyone who 125 | wants to, feel free to add auto_commit, fetch_last_known_offset, 126 | multiple partition support.""" 127 | def __init__( 128 | inner_self, 129 | client, 130 | group, 131 | topic, 132 | auto_commit=True, 133 | partitions=None, 134 | auto_commit_every_n=100, 135 | auto_commit_every_t=5000, 136 | fetch_size_bytes=4096, 137 | buffer_size=4096, 138 | max_buffer_size=32768, 139 | iter_timeout=None 140 | ): 141 | # XXX: This just snapshots the current topic. New messages produced 142 | # won't make it into here.If you need this, build it :) 143 | inner_self._topic = list(self.topic_registry.get(topic, [])) 144 | inner_self._offset = 0 145 | inner_self._partition_info = False 146 | # NOTE(wting|2015-02-25): Someone else implement 147 | # auto_commit_every_n and auto_commit_every_t if you want it. 148 | inner_self._count_since_commit = 0 149 | inner_self._auto_commit = auto_commit 150 | 151 | def get_messages(inner_self, count=1, block=True, timeout=0.10000000000000001): 152 | old_offset = inner_self._offset + inner_self._count_since_commit 153 | new_offset = min(old_offset + count, len(inner_self._topic)) 154 | messages = inner_self._topic[old_offset:new_offset] 155 | 156 | inner_self._count_since_commit += len(messages) 157 | if inner_self._auto_commit: 158 | inner_self.commit() 159 | 160 | return messages 161 | 162 | def get_message(inner_self, block=True, timeout=0.1, get_partition_info=None): 163 | """ 164 | If no messages can be fetched, returns None. 165 | If get_partition_info is None, it defaults to self.partition_info 166 | If get_partition_info is True, returns (partition, message) 167 | If get_partition_info is False, returns message 168 | """ 169 | messages = inner_self.get_messages( 170 | count=1, 171 | block=block, 172 | timeout=timeout 173 | ) 174 | message = messages[0] if messages else None 175 | 176 | if get_partition_info or (get_partition_info is None and inner_self._partition_info): 177 | fake_partition_info = 0 178 | else: 179 | fake_partition_info = None 180 | 181 | if fake_partition_info is not None and message is not None: 182 | return fake_partition_info, message 183 | else: 184 | return message 185 | 186 | def commit(inner_self, partitions=None): 187 | if partitions is not None: 188 | raise NotImplementedError 189 | 190 | inner_self._offset = min( 191 | len(inner_self._topic), 192 | inner_self._offset + inner_self._count_since_commit) 193 | inner_self._count_since_commit = 0 194 | 195 | def fetch_last_known_offsets(inner_self, partitions=None): 196 | return [inner_self._offset] 197 | 198 | def seek(inner_self, offset, whence): 199 | raise NotImplementedError 200 | 201 | def provide_partition_info(inner_self): 202 | inner_self._partition_info = True 203 | 204 | def __iter__(inner_self): 205 | for msg in inner_self._topic[inner_self._offset:]: 206 | yield msg 207 | 208 | return MockSimpleConsumer 209 | 210 | def mock_yelp_consumer_with_registrar(self): 211 | class MockSimpleConsumer(object): 212 | 213 | def __init__( 214 | inner_self, 215 | topic, 216 | config, 217 | partitions=None, 218 | ): 219 | # XXX: This just snapshots the current topic. New messages produced 220 | # won't make it into here.If you need this, build it :) 221 | inner_self._topic = list(self.topic_registry.get(topic, [])) 222 | inner_self._offset = 0 223 | 224 | def connect(self): 225 | pass 226 | 227 | def _translate_messages_to_yelp(inner_self, messages): 228 | return [yelp_kafka.consumer.Message( 229 | partition=0, 230 | offset=message.offset, 231 | key=message.message.key, 232 | value=message.message.value, 233 | ) for message in messages] 234 | 235 | def get_messages(inner_self, count=1, block=True, timeout=0.10000000000000001): 236 | # inner_self so we can address the parent object Registrar 237 | # with self, thus accessing global test state. 238 | new_offset = min(inner_self._offset + count, len(inner_self._topic)) 239 | old_offset = inner_self._offset 240 | inner_self._offset = new_offset 241 | 242 | return inner_self._translate_messages_to_yelp( 243 | inner_self._topic[old_offset:new_offset] 244 | ) 245 | 246 | def get_message(inner_self, block=True, timeout=0.1): 247 | return inner_self.get_messages( 248 | count=1, 249 | block=block, 250 | timeout=timeout, 251 | )[0] 252 | 253 | def close(self): 254 | pass 255 | 256 | def __iter__(inner_self): 257 | translated_messages = inner_self._translate_messages_to_yelp( 258 | inner_self._topic[inner_self._offset:], 259 | ) 260 | for msg in translated_messages: 261 | yield msg 262 | 263 | return MockSimpleConsumer 264 | 265 | 266 | @contextlib.contextmanager 267 | def mock_kafka_python(): 268 | registrar = Registrar() 269 | with mock.patch.object(kafka, 'KafkaClient', spec=kafka.KafkaClient) as Client: 270 | with mock.patch.object( 271 | kafka, 272 | 'SimpleProducer', 273 | registrar.mock_producer_with_registry(), 274 | ) as Producer: 275 | with mock.patch.object( 276 | kafka, 277 | 'KeyedProducer', 278 | registrar.mock_keyed_producer_with_registry(), 279 | ) as KeyedProducer: 280 | with mock.patch.object( 281 | kafka, 282 | 'SimpleConsumer', 283 | registrar.mock_simple_consumer_with_registrar(), 284 | ) as Consumer: 285 | with mock.patch.object( 286 | yelp_kafka.consumer, 287 | 'KafkaSimpleConsumer', 288 | registrar.mock_yelp_consumer_with_registrar(), 289 | ) as YelpConsumer: 290 | yield KafkaMocks( 291 | KafkaClient=Client, 292 | SimpleProducer=Producer, 293 | KeyedProducer=KeyedProducer, 294 | SimpleConsumer=Consumer, 295 | KafkaSimpleConsumer=YelpConsumer, 296 | ) 297 | -------------------------------------------------------------------------------- /yelp_kafka/utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | from __future__ import absolute_import 16 | from __future__ import unicode_literals 17 | 18 | import functools 19 | import logging 20 | 21 | from kafka.common import KafkaUnavailableError 22 | from six.moves import cPickle as pickle 23 | 24 | 25 | log = logging.getLogger(__name__) 26 | 27 | 28 | def get_kafka_topics(kafkaclient): 29 | """Connect to kafka and fetch all the topics/partitions.""" 30 | try: 31 | kafkaclient.load_metadata_for_topics() 32 | except KafkaUnavailableError: 33 | # Sometimes the kafka server closes the connection for inactivity 34 | # in this case the second call should succeed otherwise the kafka 35 | # server is down and we should fail 36 | log.debug("First call to kafka for loading metadata failed." 37 | " Trying again.") 38 | kafkaclient.load_metadata_for_topics() 39 | return kafkaclient.topic_partitions 40 | 41 | 42 | def make_scribe_topic(stream, datacenter): 43 | """Get a scribe topic name 44 | 45 | :param stream: scribe stream name 46 | :param datacenter: datacenter name 47 | :returns: topic name 48 | """ 49 | return "scribe.{0}.{1}".format(datacenter, stream) 50 | 51 | 52 | def _split_topic_name(topic_name): 53 | tokens = topic_name.split(".", 2) 54 | if len(tokens) < 3 or tokens[0] != "scribe": 55 | raise ValueError("Encountered wrongly formatted topic %s" % topic_name) 56 | else: 57 | return tokens 58 | 59 | 60 | def extract_datacenter(topic_name): 61 | """Get the datacenter from a kafka topic name 62 | 63 | :param topic_name: Kafka topic name 64 | :returns: datacenter 65 | :raises: ValueError if the topic name does not conform to the expected 66 | format: "scribe.." 67 | """ 68 | return _split_topic_name(topic_name)[1] 69 | 70 | 71 | def extract_stream_name(topic_name): 72 | """Get the stream name from a kafka topic name 73 | 74 | :param topic_name: Kafka topic name 75 | :returns: stream name 76 | :raises: ValueError if the topic name does not conform to the expected 77 | format: "scribe.." 78 | """ 79 | return _split_topic_name(topic_name)[2] 80 | 81 | 82 | def get_default_responder_if_available(): 83 | try: 84 | from yelp_kafka.yelp_metrics_responder import MeteoriteMetricsResponder 85 | return MeteoriteMetricsResponder() 86 | except ImportError: 87 | logging.error("yelp_meteorite is not present") 88 | 89 | 90 | def retry_if_kafka_unavailable_error(exception): 91 | """Returns true if the exception is of type KafkaUnavailableError 92 | 93 | :param: exception: the exception to be checked 94 | :returns boolean 95 | """ 96 | return isinstance(exception, KafkaUnavailableError) 97 | 98 | 99 | class memoized(object): 100 | """Decorator that caches a function's return value each time it is called. 101 | If called later with the same arguments, the cached value is returned, and 102 | the function is not re-evaluated. 103 | 104 | Based upon from http://wiki.python.org/moin/PythonDecoratorLibrary#Memoize 105 | Nota bene: this decorator memoizes /all/ calls to the function. For a memoization 106 | decorator with limited cache size, consider: 107 | http://code.activestate.com/recipes/496879-memoize-decorator-function-with-cache-size-limit/ 108 | """ 109 | 110 | def __init__(self, func): 111 | self.func = func 112 | self.cache = {} 113 | 114 | def __call__(self, *args, **kwargs): 115 | # If the function args cannot be used as a cache hash key, fail fast 116 | key = pickle.dumps((args, kwargs)) 117 | try: 118 | return self.cache[key] 119 | except KeyError: 120 | value = self.func(*args, **kwargs) 121 | self.cache[key] = value 122 | return value 123 | 124 | def __repr__(self): 125 | """Return the function's docstring.""" 126 | return self.func.__doc__ 127 | 128 | def __get__(self, obj, objtype): 129 | """Support instance methods.""" 130 | return functools.partial(self.__call__, obj) 131 | -------------------------------------------------------------------------------- /yelp_kafka/yelp_metrics_responder.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | from __future__ import absolute_import 16 | from __future__ import unicode_literals 17 | 18 | import logging 19 | 20 | import yelp_meteorite 21 | 22 | from yelp_kafka.metrics_responder import MetricsResponder 23 | 24 | 25 | class MeteoriteMetricsResponder(MetricsResponder): 26 | """ 27 | Used for publishing metrics with a metric reporter instance 28 | """ 29 | 30 | def __init__(self): 31 | super(MeteoriteMetricsResponder, self).__init__() 32 | self.log = logging.getLogger(self.__class__.__name__) 33 | 34 | def get_counter_emitter(self, metric, default_dimensions=None): 35 | return yelp_meteorite.create_counter( 36 | metric, 37 | default_dimensions 38 | ) 39 | 40 | def get_timer_emitter(self, metric, default_dimensions=None): 41 | return yelp_meteorite.create_timer( 42 | metric, 43 | default_dimensions 44 | ) 45 | 46 | def record(self, registered_reporter, value, timestamp=None): 47 | if isinstance(registered_reporter, yelp_meteorite.metrics.Counter): 48 | registered_reporter.count(value) 49 | if isinstance(registered_reporter, yelp_meteorite.metrics.Timer): 50 | registered_reporter.record(value) 51 | else: 52 | self.log.error("Reporter Instance is not defined") 53 | --------------------------------------------------------------------------------