├── .bandit ├── .flake8 ├── .github └── workflows │ ├── ci.yml │ └── upload-pypi.yml ├── .gitignore ├── .isort.cfg ├── .pre-commit-config.yaml ├── LICENSE ├── README.md ├── docker-compose.yml ├── examples ├── logger.py ├── parallel.py └── simple.py ├── kafkaesk ├── __init__.py ├── app.py ├── consumer.py ├── exceptions.py ├── ext │ ├── __init__.py │ └── logging │ │ ├── __init__.py │ │ ├── handler.py │ │ └── record.py ├── kafka.py ├── metrics.py ├── publish.py ├── py.typed └── utils.py ├── mypy.ini ├── poetry.lock ├── pyproject.toml ├── pytest.ini ├── stubs ├── aiokafka │ ├── __init__.py │ ├── errors.py │ └── structs.py └── kafka │ ├── __init__.py │ ├── admin │ ├── __init__.py │ └── client.py │ ├── errors.py │ └── structs.py └── tests ├── __init__.py ├── acceptance ├── __init__.py ├── ext │ ├── __init__.py │ └── logging │ │ ├── __init__.py │ │ ├── test_handler.py │ │ └── test_record.py ├── produce.py ├── test_healthcheck.py ├── test_pubsub.py ├── test_rebalance.py └── test_run.py ├── conftest.py ├── fixtures.py ├── unit ├── __init__.py ├── ext │ ├── __init__.py │ └── logging │ │ ├── __init__.py │ │ └── test_handler.py ├── test_app.py ├── test_consumer.py ├── test_exceptions.py ├── test_kafka.py ├── test_metrics.py └── test_schema.py └── utils.py /.bandit: -------------------------------------------------------------------------------- 1 | [bandit] 2 | exclude: tests 3 | skips: B101,B110,B112,B303,B311,B303 4 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | no-accept-encodings = True 3 | max-line-length = 100 4 | ignore = 5 | E203 6 | W503 7 | E231 8 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: kafkaesk 2 | 3 | on: [push] 4 | 5 | jobs: 6 | # Job to run pre-checks 7 | pre-checks: 8 | runs-on: ubuntu-latest 9 | strategy: 10 | matrix: 11 | python-version: [3.8] 12 | 13 | steps: 14 | - name: Checkout the repository 15 | uses: actions/checkout@v2 16 | 17 | - name: Setup Python 18 | uses: actions/setup-python@v1 19 | with: 20 | python-version: ${{ matrix.python-version }} 21 | 22 | - name: Install package 23 | run: | 24 | pip install poetry 25 | poetry install 26 | - name: Run pre-checks 27 | run: | 28 | poetry run flake8 kafkaesk --config=.flake8 29 | poetry run mypy kafkaesk/ 30 | poetry run isort -c -rc kafkaesk/ 31 | poetry run black --check --verbose kafkaesk 32 | # Job to run tests 33 | tests: 34 | runs-on: ubuntu-latest 35 | 36 | strategy: 37 | matrix: 38 | python-version: [3.8] 39 | 40 | steps: 41 | - name: Checkout the repository 42 | uses: actions/checkout@v2 43 | 44 | - name: Setup Python 45 | uses: actions/setup-python@v1 46 | with: 47 | python-version: ${{ matrix.python-version }} 48 | 49 | - name: Start Docker containers for Zookeeper and Kafka 50 | run: docker-compose up -d 51 | 52 | - name: Install the package 53 | run: | 54 | pip install poetry 55 | poetry install 56 | - name: Run tests 57 | run: | 58 | poetry run pytest -rfE --reruns 2 --cov=kafkaesk -s --tb=native -v --cov-report xml --cov-append tests 59 | - name: Upload coverage to Codecov 60 | uses: codecov/codecov-action@v1 61 | with: 62 | file: ./coverage.xml 63 | -------------------------------------------------------------------------------- /.github/workflows/upload-pypi.yml: -------------------------------------------------------------------------------- 1 | name: Upload package to pypi 2 | 3 | on: 4 | push: 5 | branches: 6 | - "master" 7 | 8 | jobs: 9 | upload: 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - name: Checkout the repository 14 | uses: actions/checkout@v2 15 | 16 | - name: Setup Python 17 | uses: actions/setup-python@v2 18 | with: 19 | python-version: "3.8" 20 | 21 | - name: Publish package 22 | run: | 23 | pip install poetry 24 | poetry config pypi-token.pypi ${{ secrets.PYPI_TOKEN }} 25 | poetry publish --build 26 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode/ 2 | .idea/ 3 | # Python Stuff 4 | **/__pycache__ 5 | *.egg-info 6 | .mypy_cache/ 7 | .python-version 8 | dist/ 9 | .venv/ 10 | venv/ -------------------------------------------------------------------------------- /.isort.cfg: -------------------------------------------------------------------------------- 1 | [settings] 2 | force_alphabetical_sort = True 3 | force_single_line = True 4 | not_skip = __init__.py 5 | line_length = 110 6 | wrap_length = 100 -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/ambv/black 3 | rev: stable 4 | hooks: 5 | - id: black 6 | language_version: python3.8 7 | - repo: https://github.com/pre-commit/mirrors-isort 8 | rev: v4.3.20 9 | hooks: 10 | - id: isort 11 | 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The BSD-2 license 2 | 3 | Copyright (c) 2016, Plone Foundation 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 9 | 10 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 11 | 12 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 13 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 |

3 |
4 | Onna Logo 5 |

6 | 7 |

kafkaesk

8 | 9 | 10 | ## Table Of Contents 11 | 12 | - [About the Project](#about-the-project) 13 | - [Publish](#publish) 14 | - [Subscribe](#subscribe) 15 | - [Avoiding global object](#avoiding-global-object) 16 | - [Manual commit](#manual-commit) 17 | - [kafkaesk contract](#kafkaesk-contract) 18 | - [Worker](#worker) 19 | - [Development](#development) 20 | - [Extensions](#extensions) 21 | - [Naming](#naming) 22 | 23 | 24 | ## About The Project 25 | 26 | This project is meant to help facilitate effortless publishing and subscribing to events with Python and Kafka. 27 | 28 | ### Guiding principal 29 | 30 | - HTTP 31 | - Language agnostic 32 | - Contracts built on top of [Kafka](https://kafka.apache.org/) 33 | 34 | 35 | ### Alternatives 36 | - [aiokafka](https://aiokafka.readthedocs.io/en/stable/): can be complex to scale correctly 37 | - [guillotina_kafka](https://github.com/onna/guillotina_kafka): complex, tied to [Guillotina](https://guillotina.readthedocs.io/en/latest/) 38 | - [faust](https://faust.readthedocs.io/en/latest/): requires additional data layers, not language agnostic 39 | - confluent kafka + avro: close but ends up being like grpc. compilation for languages. No asyncio. 40 | 41 | > Consider this Python project as syntactic sugar around these ideas. 42 | 43 | ## Publish 44 | 45 | Using [pydantic](https://pydantic-docs.helpmanual.io/) but can be done with pure JSON. 46 | 47 | ```python 48 | import kafkaesk 49 | from pydantic import BaseModel 50 | 51 | app = kafkaesk.Application() 52 | 53 | @app.schema("Content", version=1, retention=24 * 60 * 60) 54 | class ContentMessage(BaseModel): 55 | foo: str 56 | 57 | 58 | async def foobar(): 59 | # ... 60 | # doing something in an async func 61 | await app.publish("content.edited.Resource", data=ContentMessage(foo="bar")) 62 | ``` 63 | 64 | A convenience method is available in the `subscriber` dependency instance, this allow to header 65 | propagation from the consumed message. 66 | 67 | ```python 68 | import kafkaesk 69 | from pydantic import BaseModel 70 | 71 | app = kafkaesk.Application() 72 | 73 | @app.schema("Content", version=1, retention=24 * 60 * 60) 74 | class ContentMessage(BaseModel): 75 | foo: str 76 | 77 | 78 | @app.subscribe("content.*", "group_id") 79 | async def get_messages(data: ContentMessage, subscriber): 80 | print(f"{data.foo}") 81 | # This will propagate `data` record headers 82 | await subscriber.publish("content.edited.Resource", data=ContentMessage(foo="bar")) 83 | 84 | ``` 85 | 86 | ## Subscribe 87 | 88 | ```python 89 | import kafkaesk 90 | from pydantic import BaseModel 91 | 92 | app = kafkaesk.Application() 93 | 94 | @app.schema("Content", version=1, retention=24 * 60 * 60) 95 | class ContentMessage(BaseModel): 96 | foo: str 97 | 98 | 99 | @app.subscribe("content.*", "group_id") 100 | async def get_messages(data: ContentMessage): 101 | print(f"{data.foo}") 102 | 103 | ``` 104 | 105 | ## Avoiding global object 106 | 107 | If you do not want to have global application configuration, you can lazily configure 108 | the application and register schemas/subscribers separately. 109 | 110 | ```python 111 | import kafkaesk 112 | from pydantic import BaseModel 113 | 114 | router = kafkaesk.Router() 115 | 116 | @router.schema("Content", version=1, retention=24 * 60 * 60) 117 | class ContentMessage(BaseModel): 118 | foo: str 119 | 120 | 121 | @router.subscribe("content.*", "group_id") 122 | async def get_messages(data: ContentMessage): 123 | print(f"{data.foo}") 124 | 125 | 126 | if __name__ == "__main__": 127 | app = kafkaesk.Application() 128 | app.mount(router) 129 | kafkaesk.run(app) 130 | 131 | ``` 132 | 133 | Optional consumer injected parameters: 134 | 135 | - schema: str 136 | - record: aiokafka.structs.ConsumerRecord 137 | - app: kafkaesk.app.Application 138 | - subscriber: kafkaesk.app.BatchConsumer 139 | 140 | Depending on the type annotation for the first parameter, you will get different data injected: 141 | 142 | - `async def get_messages(data: ContentMessage)`: parses pydantic schema 143 | - `async def get_messages(data: bytes)`: give raw byte data 144 | - `async def get_messages(record: aiokafka.structs.ConsumerRecord)`: give kafka record object 145 | - `async def get_messages(data)`: raw json data in message 146 | 147 | ## Manual commit 148 | 149 | To accomplish a manual commit strategy yourself: 150 | 151 | ```python 152 | app = kafkaesk.Application(auto_commit=False) 153 | 154 | @app.subscribe("content.*", "group_id") 155 | async def get_messages(data: ContentMessage, subscriber): 156 | print(f"{data.foo}") 157 | await subscriber.consumer.commit() 158 | ``` 159 | 160 | ## SSL 161 | Add these values to your `kafka_settings`: 162 | - `ssl_context` - this should be a placeholder as the SSL Context is generally created within the application 163 | - `security_protocol` - one of SSL or PLAINTEXT 164 | - `sasl_mechanism` - one of PLAIN, GSSAPI, SCRAM-SHA-256, SCRAM-SHA-512, OAUTHBEARER 165 | - `sasl_plain_username` . 166 | - `sasl_plain_password` . 167 | 168 | ## kafkaesk contract 169 | 170 | This is a library around using kafka. 171 | Kafka itself does not enforce these concepts. 172 | 173 | - Every message must provide a json schema 174 | - Messages produced will be validated against json schema 175 | - Each topic will have only one schema 176 | - A single schema can be used for multiple topics 177 | - Consumed message schema validation is up to the consumer 178 | - Messages will be consumed at least once. Considering this, your handling should be idempotent 179 | 180 | ### Message format 181 | 182 | ```json 183 | { 184 | "schema": "schema_name:1", 185 | "data": { ... } 186 | } 187 | ``` 188 | 189 | ## Worker 190 | 191 | ```bash 192 | kafkaesk mymodule:app --kafka-servers=localhost:9092 193 | ``` 194 | 195 | Options: 196 | 197 | - --kafka-servers: comma separated list of kafka servers 198 | - --kafka-settings: json encoded options to be passed to https://aiokafka.readthedocs.io/en/stable/api.html#aiokafkaconsumer-class 199 | - --topic-prefix: prefix to use for topics 200 | - --replication-factor: what replication factor topics should be created with. Defaults to min(number of servers, 3). 201 | 202 | ### Application.publish 203 | 204 | - stream_id: str: name of stream to send data to 205 | - data: class that inherits from pydantic.BaseModel 206 | - key: Optional[bytes]: key for message if it needs one 207 | 208 | ### Application.subscribe 209 | 210 | - stream_id: str: fnmatch pattern of streams to subscribe to 211 | - group: Optional[str]: consumer group id to use. Will use name of function if not provided 212 | 213 | ### Application.schema 214 | 215 | - id: str: id of the schema to store 216 | - version: Optional[int]: version of schema to store 217 | - streams: Optional[List[str]]: if streams are known ahead of time, you can pre-create them before you push data 218 | - retention: Optional[int]: retention policy in seconds 219 | 220 | ### Application.configure 221 | 222 | - kafka_servers: Optional[List[str]]: kafka servers to connect to 223 | - topic_prefix: Optional[str]: topic name prefix to subscribe to 224 | - kafka_settings: Optional[Dict[str, Any]]: additional aiokafka settings to pass in 225 | - replication_factor: Optional[int]: what replication factor topics should be created with. Defaults to min(number of servers, 3). 226 | - kafka_api_version: str: default `auto` 227 | - auto_commit: bool: default `True` 228 | - auto_commit_interval_ms: int: default `5000` 229 | 230 | ## Development 231 | 232 | ### Requirements 233 | 234 | - [Docker](https://www.docker.com/) 235 | - [Poetry](https://python-poetry.org/) 236 | 237 | ```bash 238 | poetry install 239 | ``` 240 | 241 | Run tests: 242 | 243 | ```bash 244 | docker-compose up 245 | KAFKA=localhost:9092 poetry run pytest tests 246 | ``` 247 | 248 | ## Extensions 249 | 250 | ### Logging 251 | This extension includes classes to extend Python's logging framework to publish structured log messages to a Kafka topic. 252 | This extension is made up of three main components: an extended `logging.LogRecord` and some custom `logging.Handler`s. 253 | 254 | See `logger.py` in examples directory. 255 | 256 | #### Log Record 257 | `kafkaesk.ext.logging.record.factory` is a function that will return `kafkaesk.ext.logging.record.PydanticLogRecord` objects. 258 | The `factory()` function scans through any `args` passed to a logger and checks each item to determine if it is a subclass of `pydantid.BaseModel`. 259 | 260 | If it is a base model instance and `model._is_log_model` evaluates to `True` the model will be removed from `args` and added to `record._pydantic_data`. 261 | After that `factory()` will use logging's existing logic to finish creating the log record. 262 | 263 | ### Handler 264 | This extensions ships with two handlers capable of handling `kafkaesk.ext.logging.handler.PydanticLogModel` classes: `kafakesk.ext.logging.handler.PydanticStreamHandler` and `kafkaesk.ext.logging.handler.PydanticKafkaeskHandler`. 265 | 266 | The stream handler is a very small wrapper around `logging.StreamHandler`, the signature is the same, the only difference is that the handler will attempt to convert any pydantic models it receives to a human readable log message. 267 | 268 | The kafkaesk handler has a few more bits going on in the background. 269 | 270 | The handler has two required inputs, a `kafkaesk.app.Application` instance and a stream name. 271 | 272 | Once initialized any logs emitted by the handler will be saved into an internal queue. 273 | There is a worker task that handles pulling logs from the queue and writing those logs to the specified topic. 274 | 275 | # Naming 276 | 277 | It's hard and "kafka" is already a fun name. 278 | Hopefully this library isn't literally "kafkaesque" for you. 279 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | # just for dev, fun, playing around 2 | version: "3" 3 | services: 4 | zookeeper: 5 | image: bitnami/zookeeper:latest 6 | ports: 7 | - 2181:2181 8 | expose: 9 | - 2181 10 | environment: 11 | "ALLOW_ANONYMOUS_LOGIN": "yes" 12 | networks: 13 | - kafka-network 14 | 15 | kafka: 16 | image: bitnami/kafka:latest 17 | depends_on: 18 | - zookeeper 19 | ports: 20 | - 9092:9092 21 | expose: 22 | - 9092 23 | links: 24 | - zookeeper 25 | environment: 26 | "ALLOW_PLAINTEXT_LISTENER": "yes" 27 | "KAFKA_CFG_ZOOKEEPER_CONNECT": "zookeeper:2181" 28 | "KAFKA_CFG_AUTO_CREATE_TOPICS_ENABLE": "true" 29 | "KAFKA_CFG_ADVERTISED_LISTENERS": "PLAINTEXT://localhost:9092" 30 | networks: 31 | - kafka-network 32 | 33 | networks: 34 | kafka-network: 35 | driver: bridge 36 | -------------------------------------------------------------------------------- /examples/logger.py: -------------------------------------------------------------------------------- 1 | from kafkaesk import Application 2 | from kafkaesk.ext.logging import PydanticKafkaeskHandler 3 | from kafkaesk.ext.logging import PydanticLogModel 4 | from kafkaesk.ext.logging import PydanticStreamHandler 5 | from pydantic import BaseModel 6 | from typing import Optional 7 | 8 | import asyncio 9 | import logging 10 | 11 | 12 | class UserLog(BaseModel): 13 | _is_log_model = True 14 | user: Optional[str] = None 15 | 16 | 17 | async def test_log() -> None: 18 | app = Application(kafka_servers=["localhost:9092"]) 19 | 20 | logger = logging.getLogger("kafkaesk.ext.logging.kafka") 21 | handler = PydanticKafkaeskHandler(app, "logging.test") 22 | logger.addHandler(handler) 23 | logger.setLevel(logging.DEBUG) 24 | 25 | stream_logger = logging.getLogger("kafakesk.ext.logging.stream") 26 | stream_handler = PydanticStreamHandler() 27 | stream_logger.addHandler(stream_handler) 28 | stream_logger.setLevel(logging.DEBUG) 29 | 30 | @app.subscribe("logging.test", group="example.logging.consumer") 31 | async def consume(data: PydanticLogModel) -> None: 32 | stream_logger.info(data.json()) 33 | 34 | async with app: 35 | logger.debug("Log Message", UserLog(user="kafkaesk")) 36 | await app.flush() 37 | await app.consume_for(1, seconds=5) 38 | 39 | 40 | if __name__ == "__main__": 41 | asyncio.run(test_log()) 42 | -------------------------------------------------------------------------------- /examples/parallel.py: -------------------------------------------------------------------------------- 1 | from kafkaesk import Application 2 | from kafkaesk import run_app 3 | from pydantic import BaseModel 4 | 5 | import asyncio 6 | import logging 7 | import random 8 | 9 | 10 | logging.basicConfig(level=logging.INFO) 11 | 12 | 13 | app = Application() 14 | 15 | 16 | @app.schema("Foobar", streams=["content.foo", "slow.content.foo", "failed.content.foo"]) 17 | class Foobar(BaseModel): 18 | timeout: int 19 | 20 | 21 | async def consumer_logic(data: Foobar, record, subscriber): 22 | try: 23 | print(f"{data} -- {record.headers}: waiting {data.timeout}s...") 24 | await asyncio.sleep(data.timeout) 25 | print(f"{data}: done...") 26 | except asyncio.CancelledError: 27 | # Slow topic 28 | print(f"{data} timeout message, sending to slow topic...") 29 | await subscriber.publish(f"slow.{record.topic}", record, headers=[("slow", b"true")]) 30 | except Exception: 31 | await subscriber.publish(f"failed.{record.topic}", record) 32 | 33 | 34 | async def generate_data(app): 35 | idx = 0 36 | while True: 37 | timeout = random.randint(0, 10) 38 | await app.publish("content.foo", Foobar(timeout=timeout)) 39 | idx += 1 40 | await asyncio.sleep(0.1) 41 | 42 | 43 | async def run(): 44 | app.configure(kafka_servers=["localhost:9092"]) 45 | task = asyncio.create_task(generate_data(app)) 46 | 47 | # Regular tasks should be consumed in less than 5s 48 | app.subscribe("content.*", group="example_content_group", concurrency=10, timeout_seconds=5)( 49 | consumer_logic 50 | ) 51 | 52 | # Timeout taks (slow) can be consumed independendly, with different configuration and logic 53 | app.subscribe( 54 | "slow.content.*", group="timeout_example_content_group", concurrency=1, timeout_seconds=None 55 | )(consumer_logic) 56 | 57 | await run_app(app) 58 | 59 | 60 | if __name__ == "__main__": 61 | asyncio.run(run()) 62 | -------------------------------------------------------------------------------- /examples/simple.py: -------------------------------------------------------------------------------- 1 | from kafkaesk import Application 2 | from kafkaesk import run_app 3 | from pydantic import BaseModel 4 | 5 | import asyncio 6 | import logging 7 | 8 | logging.basicConfig(level=logging.INFO) 9 | 10 | 11 | app = Application() 12 | 13 | 14 | @app.schema("Foobar") 15 | class Foobar(BaseModel): 16 | foo: str 17 | bar: str 18 | 19 | 20 | @app.subscribe("content.*", group="example_content_group") 21 | async def messages(data: Foobar, record): 22 | await asyncio.sleep(0.1) 23 | print(f"{data.foo}: {data.bar}: {record}") 24 | 25 | 26 | async def generate_data(app): 27 | idx = 0 28 | while True: 29 | await app.publish("content.foo", Foobar(foo=str(idx), bar="yo")) 30 | idx += 1 31 | await asyncio.sleep(0.1) 32 | 33 | 34 | async def run(): 35 | app.configure(kafka_servers=["localhost:9092"]) 36 | task = asyncio.create_task(generate_data(app)) 37 | await run_app(app) 38 | # await app.consume_forever() 39 | 40 | 41 | if __name__ == "__main__": 42 | asyncio.run(run()) 43 | -------------------------------------------------------------------------------- /kafkaesk/__init__.py: -------------------------------------------------------------------------------- 1 | from .app import Application # noqa 2 | from .app import BatchConsumer # noqa 3 | from .app import Router # noqa 4 | from .app import run # noqa 5 | from .app import run_app # noqa 6 | from .app import Subscription # noqa 7 | -------------------------------------------------------------------------------- /kafkaesk/app.py: -------------------------------------------------------------------------------- 1 | from .consumer import BatchConsumer 2 | from .consumer import Subscription 3 | from .exceptions import AppNotConfiguredException 4 | from .exceptions import ProducerUnhealthyException 5 | from .exceptions import SchemaConflictException 6 | from .exceptions import StopConsumer 7 | from .kafka import KafkaTopicManager 8 | from .metrics import NOERROR 9 | from .metrics import PRODUCER_TOPIC_OFFSET 10 | from .metrics import PUBLISHED_MESSAGES 11 | from .metrics import PUBLISHED_MESSAGES_TIME 12 | from .metrics import watch_kafka 13 | from .metrics import watch_publish 14 | from .utils import resolve_dotted_name 15 | from asyncio.futures import Future 16 | from functools import partial 17 | from opentracing.scope_managers.contextvars import ContextVarsScopeManager 18 | from pydantic import BaseModel 19 | from types import TracebackType 20 | from typing import Any 21 | from typing import Awaitable 22 | from typing import Callable 23 | from typing import cast 24 | from typing import Dict 25 | from typing import List 26 | from typing import Optional 27 | from typing import Tuple 28 | from typing import Type 29 | 30 | import aiokafka 31 | import aiokafka.errors 32 | import aiokafka.structs 33 | import argparse 34 | import asyncio 35 | import logging 36 | import opentracing 37 | import orjson 38 | import pydantic 39 | import signal 40 | import time 41 | 42 | logger = logging.getLogger("kafkaesk") 43 | 44 | 45 | class SchemaRegistration: 46 | def __init__( 47 | self, 48 | id: str, 49 | version: int, 50 | model: Type[pydantic.BaseModel], 51 | retention: Optional[int] = None, 52 | streams: Optional[List[str]] = None, 53 | ): 54 | self.id = id 55 | self.version = version 56 | self.model = model 57 | self.retention = retention 58 | self.streams = streams 59 | 60 | def __repr__(self) -> str: 61 | return f"" 62 | 63 | 64 | def published_callback(topic: str, start_time: float, fut: Future) -> None: 65 | # Record the metrics 66 | finish_time = time.time() 67 | exception = fut.exception() 68 | if exception: 69 | error = str(exception.__class__.__name__) 70 | PUBLISHED_MESSAGES.labels(stream_id=topic, partition=-1, error=error).inc() 71 | else: 72 | metadata = fut.result() 73 | PUBLISHED_MESSAGES.labels( 74 | stream_id=topic, partition=metadata.partition, error=NOERROR 75 | ).inc() 76 | PRODUCER_TOPIC_OFFSET.labels(stream_id=topic, partition=metadata.partition).set( 77 | metadata.offset 78 | ) 79 | PUBLISHED_MESSAGES_TIME.labels(stream_id=topic).observe(finish_time - start_time) 80 | 81 | 82 | _aiokafka_consumer_settings = ( 83 | "fetch_max_wait_ms", 84 | "fetch_max_bytes", 85 | "fetch_min_bytes", 86 | "max_partition_fetch_bytes", 87 | "request_timeout_ms", 88 | "auto_offset_reset", 89 | "metadata_max_age_ms", 90 | "max_poll_interval_ms", 91 | "rebalance_timeout_ms", 92 | "session_timeout_ms", 93 | "heartbeat_interval_ms", 94 | "consumer_timeout_ms", 95 | "max_poll_records", 96 | "connections_max_idle_ms", 97 | "ssl_context", 98 | "security_protocol", 99 | "sasl_mechanism", 100 | "sasl_plain_username", 101 | "sasl_plain_password", 102 | ) 103 | _aiokafka_producer_settings = ( 104 | "metadata_max_age_ms", 105 | "request_timeout_ms", 106 | "max_batch_size", 107 | "max_request_size", 108 | "send_backoff_ms", 109 | "retry_backoff_ms", 110 | "ssl_context", 111 | "security_protocol", 112 | "sasl_mechanism", 113 | "sasl_plain_username", 114 | "sasl_plain_password", 115 | ) 116 | 117 | 118 | class Router: 119 | """ 120 | Application routing configuration. 121 | """ 122 | 123 | def __init__(self) -> None: 124 | self._subscriptions: List[Subscription] = [] 125 | self._schemas: Dict[str, SchemaRegistration] = {} 126 | self._event_handlers: Dict[str, List[Callable[[], Awaitable[None]]]] = {} 127 | 128 | @property 129 | def subscriptions(self) -> List[Subscription]: 130 | return self._subscriptions 131 | 132 | @property 133 | def schemas(self) -> Dict[str, SchemaRegistration]: 134 | return self._schemas 135 | 136 | @property 137 | def event_handlers(self) -> Dict[str, List[Callable[[], Awaitable[None]]]]: 138 | return self._event_handlers 139 | 140 | def on(self, name: str, handler: Callable[[], Awaitable[None]]) -> None: 141 | if name not in self._event_handlers: 142 | self._event_handlers[name] = [] 143 | 144 | self._event_handlers[name].append(handler) 145 | 146 | def _subscribe( 147 | self, 148 | group: str, 149 | *, 150 | consumer_id: str = None, 151 | pattern: str = None, 152 | topics: List[str] = None, 153 | timeout_seconds: float = None, 154 | concurrency: int = None, 155 | ) -> Callable: 156 | def inner(func: Callable) -> Callable: 157 | # If there is no consumer_id use the group instead 158 | subscription = Subscription( 159 | consumer_id or group, 160 | func, 161 | group or func.__name__, 162 | pattern=pattern, 163 | topics=topics, 164 | concurrency=concurrency, 165 | timeout_seconds=timeout_seconds, 166 | ) 167 | self._subscriptions.append(subscription) 168 | return func 169 | 170 | return inner 171 | 172 | def subscribe_to_topics( 173 | self, 174 | topics: List[str], 175 | group: str, 176 | *, 177 | timeout_seconds: float = None, 178 | concurrency: int = None, 179 | ) -> Callable: 180 | return self._subscribe( 181 | group=group, 182 | topics=topics, 183 | pattern=None, 184 | timeout_seconds=timeout_seconds, 185 | concurrency=concurrency, 186 | ) 187 | 188 | def subscribe_to_pattern( 189 | self, 190 | pattern: str, 191 | group: str, 192 | *, 193 | timeout_seconds: float = None, 194 | concurrency: int = None, 195 | ) -> Callable: 196 | return self._subscribe( 197 | group=group, 198 | topics=None, 199 | pattern=pattern, 200 | timeout_seconds=timeout_seconds, 201 | concurrency=concurrency, 202 | ) 203 | 204 | def subscribe( 205 | self, 206 | stream_id: str, 207 | group: str, 208 | *, 209 | timeout_seconds: float = None, 210 | concurrency: int = None, 211 | ) -> Callable: 212 | """Keep backwards compatibility""" 213 | return self._subscribe( 214 | group=group, 215 | topics=None, 216 | pattern=stream_id, 217 | timeout_seconds=timeout_seconds, 218 | concurrency=concurrency, 219 | ) 220 | 221 | def schema( 222 | self, 223 | _id: Optional[str] = None, 224 | *, 225 | version: Optional[int] = None, 226 | retention: Optional[int] = None, 227 | streams: Optional[List[str]] = None, 228 | ) -> Callable: 229 | version = version or 1 230 | 231 | def inner(cls: Type[BaseModel]) -> Type[BaseModel]: 232 | if _id is None: 233 | type_id = cls.__name__ 234 | else: 235 | type_id = _id 236 | key = f"{type_id}:{version}" 237 | reg = SchemaRegistration( 238 | id=type_id, version=version or 1, model=cls, retention=retention, streams=streams 239 | ) 240 | if key in self._schemas: 241 | raise SchemaConflictException(self._schemas[key], reg) 242 | cls.__key__ = key # type: ignore 243 | self._schemas[key] = reg 244 | return cls 245 | 246 | return inner 247 | 248 | 249 | class Application(Router): 250 | """ 251 | Application configuration 252 | """ 253 | 254 | _producer: Optional[aiokafka.AIOKafkaProducer] = None 255 | 256 | def __init__( 257 | self, 258 | kafka_servers: Optional[List[str]] = None, 259 | topic_prefix: str = "", 260 | kafka_settings: Optional[Dict[str, Any]] = None, 261 | replication_factor: Optional[int] = None, 262 | kafka_api_version: str = "auto", 263 | auto_commit: bool = True, 264 | ): 265 | super().__init__() 266 | self._kafka_servers = kafka_servers 267 | self._kafka_settings = kafka_settings 268 | self._producer = None 269 | self._initialized = False 270 | self._locks: Dict[str, asyncio.Lock] = {} 271 | 272 | self._kafka_api_version = kafka_api_version 273 | self._topic_prefix = topic_prefix 274 | self._replication_factor = replication_factor 275 | self._topic_mng: Optional[KafkaTopicManager] = None 276 | self._subscription_consumers: List[BatchConsumer] = [] 277 | self._subscription_consumers_tasks: List[asyncio.Task] = [] 278 | 279 | self.auto_commit = auto_commit 280 | 281 | @property 282 | def kafka_settings(self) -> Dict[str, Any]: 283 | return self._kafka_settings or {} 284 | 285 | def mount(self, router: Router) -> None: 286 | self._subscriptions.extend(router.subscriptions) 287 | self._schemas.update(router.schemas) 288 | self._event_handlers.update(router.event_handlers) 289 | 290 | async def health_check(self) -> None: 291 | for subscription_consumer in self._subscription_consumers: 292 | await subscription_consumer.healthy() 293 | if not self.producer_healthy(): 294 | raise ProducerUnhealthyException(self._producer) # type: ignore 295 | 296 | async def _call_event_handlers(self, name: str) -> None: 297 | handlers = self._event_handlers.get(name) 298 | 299 | if handlers is not None: 300 | for handler in handlers: 301 | await handler() 302 | 303 | @property 304 | def topic_mng(self) -> KafkaTopicManager: 305 | if self._topic_mng is None: 306 | self._topic_mng = KafkaTopicManager( 307 | cast(List[str], self._kafka_servers), 308 | self._topic_prefix, 309 | replication_factor=self._replication_factor, 310 | kafka_api_version=self._kafka_api_version, 311 | ssl_context=self.kafka_settings.get("ssl_context"), 312 | security_protocol=self.kafka_settings.get("security_protocol", "PLAINTEXT"), 313 | sasl_mechanism=self.kafka_settings.get("sasl_mechanism"), 314 | sasl_plain_username=self.kafka_settings.get("sasl_plain_username"), 315 | sasl_plain_password=self.kafka_settings.get("sasl_plain_password"), 316 | ) 317 | return self._topic_mng 318 | 319 | def get_lock(self, name: str) -> asyncio.Lock: 320 | if name not in self._locks: 321 | self._locks[name] = asyncio.Lock() 322 | return self._locks[name] 323 | 324 | def configure( 325 | self, 326 | kafka_servers: Optional[List[str]] = None, 327 | topic_prefix: Optional[str] = None, 328 | kafka_settings: Optional[Dict[str, Any]] = None, 329 | api_version: Optional[str] = None, 330 | replication_factor: Optional[int] = None, 331 | ) -> None: 332 | if kafka_servers is not None: 333 | self._kafka_servers = kafka_servers 334 | if topic_prefix is not None: 335 | self._topic_prefix = topic_prefix 336 | if kafka_settings is not None: 337 | self._kafka_settings = kafka_settings 338 | if api_version is not None: 339 | self._kafka_api_version = api_version 340 | if replication_factor is not None: 341 | self._replication_factor = replication_factor 342 | 343 | @property 344 | def is_configured(self) -> bool: 345 | return bool(self._kafka_servers) 346 | 347 | async def publish_and_wait( 348 | self, 349 | stream_id: str, 350 | data: BaseModel, 351 | key: Optional[bytes] = None, 352 | headers: Optional[List[Tuple[str, bytes]]] = None, 353 | ) -> aiokafka.structs.ConsumerRecord: 354 | return await (await self.publish(stream_id, data, key, headers=headers)) 355 | 356 | async def _maybe_create_topic(self, stream_id: str, data: BaseModel = None) -> None: 357 | topic_id = self.topic_mng.get_topic_id(stream_id) 358 | async with self.get_lock(stream_id): 359 | if not await self.topic_mng.topic_exists(topic_id): 360 | reg = None 361 | if data: 362 | reg = self.get_schema_reg(data) 363 | retention_ms = None 364 | if reg is not None and reg.retention is not None: 365 | retention_ms = reg.retention * 1000 366 | await self.topic_mng.create_topic( 367 | topic_id, 368 | replication_factor=self._replication_factor, 369 | retention_ms=retention_ms, 370 | ) 371 | 372 | async def publish( 373 | self, 374 | stream_id: str, 375 | data: BaseModel, 376 | key: Optional[bytes] = None, 377 | headers: Optional[List[Tuple[str, bytes]]] = None, 378 | ) -> Awaitable[aiokafka.structs.ConsumerRecord]: 379 | if not self._initialized: 380 | async with self.get_lock("_"): 381 | await self.initialize() 382 | 383 | schema_key = getattr(data, "__key__", None) 384 | if schema_key not in self._schemas: 385 | # do not require key 386 | schema_key = f"{data.__class__.__name__}:1" 387 | data_ = data.dict() 388 | 389 | await self._maybe_create_topic(stream_id, data) 390 | return await self.raw_publish( 391 | stream_id, orjson.dumps({"schema": schema_key, "data": data_}), key, headers=headers 392 | ) 393 | 394 | async def raw_publish( 395 | self, 396 | stream_id: str, 397 | data: bytes, 398 | key: Optional[bytes] = None, 399 | headers: Optional[List[Tuple[str, bytes]]] = None, 400 | ) -> Awaitable[aiokafka.structs.ConsumerRecord]: 401 | logger.debug(f"Sending kafka msg: {stream_id}") 402 | producer = await self._get_producer() 403 | tracer = opentracing.tracer 404 | 405 | if not headers: 406 | headers = [] 407 | else: 408 | # this is just to check the headers shape 409 | try: 410 | for _, _ in headers: 411 | pass 412 | except ValueError: 413 | # We want to be resilient to malformated headers 414 | logger.exception(f"Malformed headers: '{headers}'") 415 | 416 | if isinstance(tracer.scope_manager, ContextVarsScopeManager): 417 | # This only makes sense if the context manager is asyncio aware 418 | if tracer.active_span: 419 | carrier: Dict[str, str] = {} 420 | tracer.inject( 421 | span_context=tracer.active_span, 422 | format=opentracing.Format.TEXT_MAP, 423 | carrier=carrier, 424 | ) 425 | 426 | header_keys = [k for k, _ in headers] 427 | for k, v in carrier.items(): 428 | # Dont overwrite if they are already present! 429 | if k not in header_keys: 430 | headers.append((k, v.encode())) 431 | 432 | if not self.producer_healthy(): 433 | raise ProducerUnhealthyException(self._producer) # type: ignore 434 | 435 | topic_id = self.topic_mng.get_topic_id(stream_id) 436 | start_time = time.time() 437 | with watch_publish(topic_id): 438 | fut = await producer.send( 439 | topic_id, 440 | value=data, 441 | key=key, 442 | headers=headers, 443 | ) 444 | 445 | fut.add_done_callback(partial(published_callback, topic_id, start_time)) # type: ignore 446 | return fut 447 | 448 | async def flush(self) -> None: 449 | if self._producer is not None: 450 | await self._producer.flush() 451 | 452 | def get_schema_reg(self, model_or_def: BaseModel) -> Optional[SchemaRegistration]: 453 | try: 454 | key = model_or_def.__key__ # type: ignore 455 | return self._schemas[key] 456 | except (AttributeError, KeyError): 457 | return None 458 | 459 | def producer_healthy(self) -> bool: 460 | """ 461 | It's possible for the producer to be unhealthy while we're still sending messages to it. 462 | """ 463 | if self._producer is not None and self._producer._sender.sender_task is not None: 464 | return not self._producer._sender.sender_task.done() 465 | return True 466 | 467 | def consumer_factory(self, group_id: str) -> aiokafka.AIOKafkaConsumer: 468 | return aiokafka.AIOKafkaConsumer( 469 | bootstrap_servers=cast(List[str], self._kafka_servers), 470 | loop=asyncio.get_event_loop(), 471 | group_id=group_id, 472 | auto_offset_reset="earliest", 473 | api_version=self._kafka_api_version, 474 | enable_auto_commit=False, 475 | **{k: v for k, v in self.kafka_settings.items() if k in _aiokafka_consumer_settings}, 476 | ) 477 | 478 | def producer_factory(self) -> aiokafka.AIOKafkaProducer: 479 | return aiokafka.AIOKafkaProducer( 480 | bootstrap_servers=cast(List[str], self._kafka_servers), 481 | loop=asyncio.get_event_loop(), 482 | api_version=self._kafka_api_version, 483 | **{k: v for k, v in self.kafka_settings.items() if k in _aiokafka_producer_settings}, 484 | ) 485 | 486 | async def _get_producer(self) -> aiokafka.AIOKafkaProducer: 487 | if self._producer is None: 488 | self._producer = self.producer_factory() 489 | with watch_kafka("producer_start"): 490 | await self._producer.start() 491 | return self._producer 492 | 493 | async def initialize(self) -> None: 494 | if not self.is_configured: 495 | raise AppNotConfiguredException 496 | 497 | await self._call_event_handlers("initialize") 498 | 499 | for reg in self._schemas.values(): 500 | # initialize topics for known streams 501 | for stream_id in reg.streams or []: 502 | topic_id = self.topic_mng.get_topic_id(stream_id) 503 | async with self.get_lock(stream_id): 504 | if not await self.topic_mng.topic_exists(topic_id): 505 | await self.topic_mng.create_topic( 506 | topic_id, 507 | retention_ms=reg.retention * 1000 508 | if reg.retention is not None 509 | else None, 510 | ) 511 | 512 | self._initialized = True 513 | 514 | async def finalize(self) -> None: 515 | await self._call_event_handlers("finalize") 516 | 517 | await self.stop() 518 | 519 | if self._producer is not None: 520 | with watch_kafka("producer_flush"): 521 | await self._producer.flush() 522 | with watch_kafka("producer_stop"): 523 | await self._producer.stop() 524 | 525 | if self._topic_mng is not None: 526 | await self._topic_mng.finalize() 527 | 528 | self._producer = None 529 | self._initialized = False 530 | self._topic_mng = None 531 | 532 | async def __aenter__(self) -> "Application": 533 | await self.initialize() 534 | return self 535 | 536 | async def __aexit__( 537 | self, 538 | exc_type: Optional[Type[BaseException]] = None, 539 | exc: Optional[BaseException] = None, 540 | traceback: Optional[TracebackType] = None, 541 | ) -> None: 542 | logger.info("Stopping application...", exc_info=exc) 543 | await self.finalize() 544 | 545 | async def consume_for(self, num_messages: int, *, seconds: Optional[int] = None) -> int: 546 | consumed = 0 547 | self._subscription_consumers = [] 548 | tasks = [] 549 | for subscription in self._subscriptions: 550 | 551 | async def on_message(record: aiokafka.structs.ConsumerRecord) -> None: 552 | nonlocal consumed 553 | consumed += 1 554 | if consumed >= num_messages: 555 | raise StopConsumer 556 | 557 | consumer = BatchConsumer( 558 | subscription=subscription, 559 | app=self, 560 | event_handlers={"message": [on_message]}, 561 | auto_commit=self.auto_commit, 562 | ) 563 | 564 | self._subscription_consumers.append(consumer) 565 | tasks.append(asyncio.create_task(consumer(), name=str(consumer))) 566 | 567 | done, pending = await asyncio.wait( 568 | tasks, timeout=seconds, return_when=asyncio.FIRST_EXCEPTION 569 | ) 570 | await self.stop() 571 | 572 | # re-raise any errors so we can validate during tests 573 | for task in done: 574 | exc = task.exception() 575 | if exc is not None: 576 | raise exc 577 | 578 | for task in pending: 579 | task.cancel() 580 | 581 | return consumed 582 | 583 | def consume_forever(self) -> Awaitable: 584 | self._subscription_consumers = [] 585 | self._subscription_consumers_tasks = [] 586 | 587 | for subscription in self._subscriptions: 588 | consumer = BatchConsumer( 589 | subscription=subscription, 590 | app=self, 591 | auto_commit=self.auto_commit, 592 | ) 593 | self._subscription_consumers.append(consumer) 594 | 595 | self._subscription_consumers_tasks = [ 596 | asyncio.create_task(c()) for c in self._subscription_consumers 597 | ] 598 | return asyncio.wait(self._subscription_consumers_tasks, return_when=asyncio.FIRST_EXCEPTION) 599 | 600 | async def stop(self) -> None: 601 | async with self.get_lock("_"): 602 | # do not allow stop calls at same time 603 | 604 | if len(self._subscription_consumers) == 0: 605 | return 606 | 607 | _, pending = await asyncio.wait( 608 | [asyncio.create_task(c.stop()) for c in self._subscription_consumers if c], 609 | timeout=5, 610 | ) 611 | for task in pending: 612 | # stop tasks that didn't finish 613 | task.cancel() 614 | 615 | for task in self._subscription_consumers_tasks: 616 | # make sure everything is done 617 | if not task.done(): 618 | task.cancel() 619 | 620 | for task in self._subscription_consumers_tasks: 621 | try: 622 | await asyncio.wait([task]) 623 | except asyncio.CancelledError: 624 | ... 625 | 626 | 627 | cli_parser = argparse.ArgumentParser(description="Run kafkaesk worker.") 628 | cli_parser.add_argument("app", help="Application object") 629 | cli_parser.add_argument("--kafka-servers", help="Kafka servers") 630 | cli_parser.add_argument("--kafka-settings", help="Kafka settings") 631 | cli_parser.add_argument("--topic-prefix", help="Topic prefix") 632 | cli_parser.add_argument("--api-version", help="Kafka API Version") 633 | 634 | 635 | def _sig_handler(app: Application) -> None: 636 | asyncio.create_task(app.stop()) 637 | 638 | 639 | async def run_app(app: Application) -> None: 640 | async with app: 641 | loop = asyncio.get_event_loop() 642 | fut = asyncio.create_task(app.consume_forever()) 643 | for signame in {"SIGINT", "SIGTERM"}: 644 | loop.add_signal_handler(getattr(signal, signame), partial(_sig_handler, app)) 645 | done, pending = await fut 646 | logger.debug("Exiting consumer") 647 | 648 | await app.stop() 649 | # re-raise any errors so we can validate during tests 650 | for task in done: 651 | exc = task.exception() 652 | if exc is not None: 653 | raise exc 654 | 655 | 656 | def run(app: Optional[Application] = None) -> None: 657 | if app is None: 658 | opts = cli_parser.parse_args() 659 | module_str, attr = opts.app.split(":") 660 | module = resolve_dotted_name(module_str) 661 | app = getattr(module, attr) 662 | 663 | if callable(app): 664 | app = app() 665 | 666 | app = cast(Application, app) 667 | 668 | if opts.kafka_servers: 669 | app.configure(kafka_servers=opts.kafka_servers.split(",")) 670 | if opts.kafka_settings: 671 | app.configure(kafka_settings=orjson.loads(opts.kafka_settings)) 672 | if opts.topic_prefix: 673 | app.configure(topic_prefix=opts.topic_prefix) 674 | if opts.api_version: 675 | app.configure(api_version=opts.api_version) 676 | 677 | try: 678 | asyncio.run(run_app(app)) 679 | except asyncio.CancelledError: # pragma: no cover 680 | logger.debug("Closing because task was exited") 681 | -------------------------------------------------------------------------------- /kafkaesk/consumer.py: -------------------------------------------------------------------------------- 1 | from .exceptions import ConsumerUnhealthyException 2 | from .exceptions import HandlerTaskCancelled 3 | from .exceptions import StopConsumer 4 | from .exceptions import UnhandledMessage 5 | from .metrics import CONSUMED_MESSAGE_TIME 6 | from .metrics import CONSUMED_MESSAGES 7 | from .metrics import CONSUMED_MESSAGES_BATCH_SIZE 8 | from .metrics import CONSUMER_HEALTH 9 | from .metrics import CONSUMER_REBALANCED 10 | from .metrics import CONSUMER_TOPIC_OFFSET 11 | from .metrics import MESSAGE_LEAD_TIME 12 | from .metrics import NOERROR 13 | from kafka.structs import TopicPartition 14 | 15 | import aiokafka 16 | import asyncio 17 | import fnmatch 18 | import functools 19 | import inspect 20 | import logging 21 | import opentracing 22 | import orjson 23 | import pydantic 24 | import time 25 | import typing 26 | 27 | if typing.TYPE_CHECKING: # pragma: no cover 28 | from .app import Application 29 | else: 30 | Application = None 31 | 32 | 33 | logger = logging.getLogger(__name__) 34 | 35 | 36 | class Subscription: 37 | def __init__( 38 | self, 39 | consumer_id: str, 40 | func: typing.Callable, 41 | group: str, 42 | *, 43 | pattern: typing.Optional[str] = None, 44 | topics: typing.Optional[typing.List[str]] = None, 45 | timeout_seconds: float = 0.0, 46 | concurrency: int = None, 47 | ): 48 | self.consumer_id = consumer_id 49 | self.pattern = pattern 50 | self.topics = topics 51 | self.func = func 52 | self.group = group 53 | self.timeout = timeout_seconds 54 | self.concurrency = concurrency 55 | 56 | def __repr__(self) -> str: 57 | return f"" 58 | 59 | 60 | def _pydantic_msg_handler( 61 | model: typing.Type[pydantic.BaseModel], record: aiokafka.ConsumerRecord 62 | ) -> pydantic.BaseModel: 63 | try: 64 | data: typing.Dict[str, typing.Any] = orjson.loads(record.value) 65 | return model.parse_obj(data["data"]) 66 | except orjson.JSONDecodeError: 67 | # log the execption so we can see what fields failed 68 | logger.warning(f"Payload is not valid json: {record}", exc_info=True) 69 | raise UnhandledMessage("Error deserializing json") 70 | except pydantic.ValidationError: 71 | # log the execption so we can see what fields failed 72 | logger.warning(f"Error parsing pydantic model:{model} {record}", exc_info=True) 73 | raise UnhandledMessage(f"Error parsing data: {model}") 74 | except Exception: 75 | # Catch all 76 | logger.warning(f"Error parsing payload: {model} {record}", exc_info=True) 77 | raise UnhandledMessage("Error parsing payload") 78 | 79 | 80 | def _raw_msg_handler(record: aiokafka.structs.ConsumerRecord) -> typing.Dict[str, typing.Any]: 81 | data: typing.Dict[str, typing.Any] = orjson.loads(record.value) 82 | return data 83 | 84 | 85 | def _bytes_msg_handler(record: aiokafka.structs.ConsumerRecord) -> bytes: 86 | return record.value 87 | 88 | 89 | def _record_msg_handler(record: aiokafka.structs.ConsumerRecord) -> aiokafka.structs.ConsumerRecord: 90 | return record 91 | 92 | 93 | def build_handler( 94 | coro: typing.Callable, app: "Application", consumer: "BatchConsumer" 95 | ) -> typing.Callable: 96 | """Introspection on the coroutine signature to inject dependencies""" 97 | sig = inspect.signature(coro) 98 | param_name = [k for k in sig.parameters.keys()][0] 99 | annotation = sig.parameters[param_name].annotation 100 | handler = _raw_msg_handler 101 | if annotation and annotation != sig.empty: 102 | if annotation == bytes: 103 | handler = _bytes_msg_handler # type: ignore 104 | elif annotation == aiokafka.ConsumerRecord: 105 | handler = _record_msg_handler # type: ignore 106 | else: 107 | handler = functools.partial(_pydantic_msg_handler, annotation) # type: ignore 108 | 109 | it = iter(sig.parameters.items()) 110 | # first argument is required and its the payload 111 | next(it) 112 | kwargs: typing.Dict[str, typing.Any] = getattr(coro, "__extra_kwargs__", {}) 113 | 114 | for key, param in it: 115 | if key == "schema": 116 | kwargs["schema"] = None 117 | elif key == "record": 118 | kwargs["record"] = None 119 | elif key == "app": 120 | kwargs["app"] = app 121 | elif key == "subscriber": 122 | kwargs["subscriber"] = consumer 123 | elif issubclass(param.annotation, opentracing.Span): 124 | kwargs[key] = opentracing.Span 125 | 126 | async def inner(record: aiokafka.ConsumerRecord, span: opentracing.Span) -> None: 127 | data = handler(record) 128 | deps = kwargs.copy() 129 | 130 | for key, param in kwargs.items(): 131 | if key == "schema": 132 | msg = orjson.loads(record.value) 133 | deps["schema"] = msg["schema"] 134 | elif key == "record": 135 | deps["record"] = record 136 | elif param == opentracing.Span: 137 | deps[key] = span 138 | 139 | await coro(data, **deps) 140 | 141 | return inner 142 | 143 | 144 | class BatchConsumer(aiokafka.ConsumerRebalanceListener): 145 | _subscription: Subscription 146 | _close: typing.Optional[asyncio.Future] = None 147 | _consumer: aiokafka.AIOKafkaConsumer 148 | _offsets: typing.Dict[aiokafka.TopicPartition, int] 149 | _message_handler: typing.Callable 150 | _initialized: bool 151 | _running: bool = False 152 | 153 | def __init__( 154 | self, 155 | subscription: Subscription, 156 | app: "Application", 157 | event_handlers: typing.Optional[typing.Dict[str, typing.List[typing.Callable]]] = None, 158 | auto_commit: bool = True, 159 | ): 160 | self._initialized = False 161 | self.stream_id = subscription.consumer_id 162 | self.group_id = subscription.group 163 | self._coro = subscription.func 164 | self._event_handlers = event_handlers or {} 165 | self._concurrency = subscription.concurrency or 1 166 | self._timeout = subscription.timeout 167 | self._subscription = subscription 168 | self._close = None 169 | self._app = app 170 | self._last_commit = 0.0 171 | self._auto_commit = auto_commit 172 | self._tp: typing.Dict[aiokafka.TopicPartition, int] = {} 173 | 174 | # We accept either pattern or a list of topics, also we might accept a single topic 175 | # to keep compatibility with older API 176 | self.pattern = subscription.pattern 177 | self.topics = subscription.topics 178 | 179 | async def __call__(self) -> None: 180 | if not self._initialized: 181 | await self.initialize() 182 | 183 | try: 184 | while not self._close: 185 | try: 186 | if not self._consumer.assignment(): 187 | await asyncio.sleep(2) 188 | continue 189 | await self._consume() 190 | except aiokafka.errors.KafkaConnectionError: 191 | # We retry 192 | self._health_metric(False) 193 | logger.info(f"Consumer {self} kafka connection error, retrying...") 194 | await asyncio.sleep(0.5) 195 | except asyncio.CancelledError: 196 | self._health_metric(False) 197 | except StopConsumer: 198 | self._health_metric(False) 199 | logger.info(f"Consumer {self} stopped, exiting") 200 | except BaseException as exc: 201 | logger.exception(f"Consumer {self} failed. Finalizing.", exc_info=exc) 202 | self._health_metric(False) 203 | raise 204 | finally: 205 | await self.finalize() 206 | 207 | def _health_metric(self, healthy: bool) -> None: 208 | CONSUMER_HEALTH.labels( 209 | group_id=self.group_id, 210 | ).set(healthy) 211 | 212 | async def emit(self, name: str, *args: typing.Any, **kwargs: typing.Any) -> None: 213 | for func in self._event_handlers.get(name, []): 214 | try: 215 | await func(*args, **kwargs) 216 | except StopConsumer: 217 | raise 218 | except Exception: 219 | logger.warning(f"Error emitting event: {name}: {func}", exc_info=True) 220 | 221 | async def initialize(self) -> None: 222 | self._close = None 223 | self._running = True 224 | self._processing = asyncio.Lock() 225 | self._consumer = await self._consumer_factory() 226 | await self._consumer.start() 227 | self._message_handler = build_handler(self._coro, self._app, self) # type: ignore 228 | self._initialized = True 229 | 230 | async def finalize(self) -> None: 231 | try: 232 | await self._consumer.stop() 233 | except Exception: 234 | logger.info(f"[{self}] Could not commit on shutdown", exc_info=True) 235 | 236 | self._initialized = False 237 | self._running = False 238 | if self._close: 239 | self._close.set_result("done") 240 | 241 | async def _consumer_factory(self) -> aiokafka.AIOKafkaConsumer: 242 | consumer = self._app.consumer_factory(self.group_id) 243 | 244 | if self.pattern and self.topics: 245 | raise AssertionError( 246 | "Both of the params 'pattern' and 'topics' are not allowed. Select only one mode." 247 | ) # noqa 248 | 249 | if self.pattern: 250 | # This is needed in case we have a prefix 251 | topic_id = self._app.topic_mng.get_topic_id(self.pattern) 252 | 253 | if "*" in self.pattern: 254 | pattern = fnmatch.translate(topic_id) 255 | consumer.subscribe(pattern=pattern, listener=self) # type: ignore 256 | else: 257 | consumer.subscribe(topics=[topic_id], listener=self) # type: ignore 258 | elif self.topics: 259 | topics = [self._app.topic_mng.get_topic_id(topic) for topic in self.topics] 260 | consumer.subscribe(topics=topics, listener=self) # type: ignore 261 | else: 262 | raise ValueError("Either `topics` or `pattern` should be defined") 263 | 264 | return consumer 265 | 266 | async def stop(self) -> None: 267 | if not self._running: 268 | return 269 | 270 | # Exit the loop, this will trigger finalize call 271 | loop = asyncio.get_running_loop() 272 | self._close = loop.create_future() 273 | await asyncio.wait([self._close]) 274 | 275 | def __repr__(self) -> str: 276 | return f"" 277 | 278 | def _span(self, record: aiokafka.ConsumerRecord) -> opentracing.SpanContext: 279 | tracer = opentracing.tracer 280 | headers = {x[0]: x[1].decode() for x in record.headers or []} 281 | parent = tracer.extract(opentracing.Format.TEXT_MAP, headers) 282 | context = tracer.start_active_span( 283 | record.topic, 284 | tags={ 285 | "message_bus.destination": record.topic, 286 | "message_bus.partition": record.partition, 287 | "message_bus.group_id": self.group_id, 288 | }, 289 | references=[opentracing.follows_from(parent)], 290 | ) 291 | return context.span 292 | 293 | async def _handler(self, record: aiokafka.ConsumerRecord) -> None: 294 | with self._span(record) as span: 295 | await self._message_handler(record, span) 296 | 297 | async def _consume(self) -> None: 298 | batch = await self._consumer.getmany(max_records=self._concurrency, timeout_ms=500) 299 | 300 | async with self._processing: 301 | if not batch: 302 | await self._maybe_commit() 303 | else: 304 | await self._consume_batch(batch) 305 | 306 | async def _consume_batch( 307 | self, batch: typing.Dict[TopicPartition, typing.List[aiokafka.ConsumerRecord]] 308 | ) -> None: 309 | futures: typing.Dict[asyncio.Future[typing.Any], aiokafka.ConsumerRecord] = dict() 310 | for tp, records in batch.items(): 311 | for record in records: 312 | coro = self._handler(record) 313 | fut = asyncio.create_task(coro) 314 | futures[fut] = record 315 | 316 | # TODO: this metric is kept for backwards-compatibility, but should be revisited 317 | with CONSUMED_MESSAGE_TIME.labels( 318 | stream_id=self.stream_id, 319 | partition=next(iter(batch)), 320 | group_id=self.group_id, 321 | ).time(): 322 | done, pending = await asyncio.wait( 323 | futures.keys(), 324 | timeout=self._timeout, 325 | return_when=asyncio.FIRST_EXCEPTION, 326 | ) 327 | 328 | # Look for failures 329 | for task in done: 330 | record = futures[task] 331 | tp = aiokafka.TopicPartition(record.topic, record.partition) 332 | 333 | # Get the largest offset of the batch 334 | current_max = self._tp.get(tp) 335 | if not current_max: 336 | self._tp[tp] = record.offset + 1 337 | else: 338 | self._tp[tp] = max(record.offset + 1, current_max) 339 | 340 | try: 341 | if exc := task.exception(): 342 | self._count_message(record, error=exc.__class__.__name__) 343 | await self.on_handler_failed(exc, record) 344 | else: 345 | self._count_message(record) 346 | except asyncio.InvalidStateError: 347 | # Task didnt finish yet, we shouldnt be here since we are 348 | # iterating the `done` list, so just log something 349 | logger.warning(f"Trying to get exception from unfinished task. Record: {record}") 350 | except asyncio.CancelledError: 351 | # During task execution any exception will be returned in 352 | # the `done` list. But timeout exception should be captured 353 | # independendly, thats why we handle this condition here. 354 | self._count_message(record, error="cancelled") 355 | await self.on_handler_failed(HandlerTaskCancelled(record), record) 356 | 357 | # Process timeout tasks 358 | for task in pending: 359 | record = futures[task] 360 | 361 | try: 362 | # This will raise a `asyncio.CancelledError`, the consumer logic 363 | # is responsible to catch it. 364 | task.cancel() 365 | await task 366 | except asyncio.CancelledError: 367 | # App didnt catch this exception, so we treat it as an unmanaged one. 368 | await self.on_handler_timeout(record) 369 | 370 | self._count_message(record, error="pending") 371 | 372 | for tp, records in batch.items(): 373 | CONSUMED_MESSAGES_BATCH_SIZE.labels( 374 | stream_id=tp.topic, 375 | group_id=self.group_id, 376 | partition=tp.partition, 377 | ).observe(len(records)) 378 | 379 | for record in sorted(records, key=lambda rec: rec.offset): 380 | lead_time = time.time() - record.timestamp / 1000 # type: ignore 381 | MESSAGE_LEAD_TIME.labels( 382 | stream_id=record.topic, 383 | group_id=self.group_id, 384 | partition=record.partition, 385 | ).observe(lead_time) 386 | 387 | CONSUMER_TOPIC_OFFSET.labels( 388 | stream_id=record.topic, 389 | group_id=self.group_id, 390 | partition=record.partition, 391 | ).set(record.offset) 392 | 393 | # Commit first and then call the event subscribers 394 | await self._maybe_commit() 395 | for _, records in batch.items(): 396 | for record in records: 397 | await self.emit("message", record=record) 398 | 399 | def _count_message(self, record: aiokafka.ConsumerRecord, error: str = NOERROR) -> None: 400 | CONSUMED_MESSAGES.labels( 401 | stream_id=record.topic, 402 | error=error, 403 | partition=record.partition, 404 | group_id=self.group_id, 405 | ).inc() 406 | 407 | @property 408 | def consumer(self) -> aiokafka.AIOKafkaConsumer: 409 | return self._consumer 410 | 411 | async def _maybe_commit(self, forced: bool = False) -> None: 412 | if not self._auto_commit: 413 | return 414 | 415 | if not self._consumer.assignment() or not self._tp: 416 | logger.warning("Cannot commit because no partitions are assigned!") 417 | return 418 | 419 | interval = self._app.kafka_settings.get("auto_commit_interval_ms", 5000) / 1000 420 | now = time.time() 421 | if forced or (now > (self._last_commit + interval)): 422 | try: 423 | if self._tp: 424 | await self._consumer.commit(offsets=self._tp) 425 | except aiokafka.errors.CommitFailedError: 426 | logger.warning("Error attempting to commit", exc_info=True) 427 | self._last_commit = now 428 | 429 | async def publish( 430 | self, 431 | stream_id: str, 432 | record: aiokafka.ConsumerRecord, 433 | headers: typing.Optional[typing.List[typing.Tuple[str, bytes]]] = None, 434 | ) -> None: 435 | record_headers = (record.headers or []) + (headers or []) 436 | 437 | fut = await self._app.raw_publish( 438 | stream_id=stream_id, data=record.value, key=record.key, headers=record_headers 439 | ) 440 | await fut 441 | 442 | async def healthy(self) -> None: 443 | if not self._running: 444 | self._health_metric(False) 445 | raise ConsumerUnhealthyException(f"Consumer '{self}' is not running") 446 | 447 | if self._consumer is not None and not await self._consumer._client.ready( 448 | self._consumer._coordinator.coordinator_id 449 | ): 450 | self._health_metric(False) 451 | raise ConsumerUnhealthyException(f"Consumer '{self}' is not ready") 452 | 453 | self._health_metric(True) 454 | return 455 | 456 | # Event handlers 457 | async def on_partitions_revoked(self, revoked: typing.List[aiokafka.TopicPartition]) -> None: 458 | if revoked: 459 | # Wait for the current batch to be processed 460 | async with self._processing: 461 | if self._auto_commit: 462 | # And commit before releasing the partitions. 463 | await self._maybe_commit(forced=True) 464 | 465 | for tp in revoked: 466 | # Remove the partition from the dict 467 | self._tp.pop(tp, None) 468 | CONSUMER_REBALANCED.labels( 469 | partition=tp.partition, 470 | group_id=self.group_id, 471 | event="revoked", 472 | ).inc() 473 | logger.info(f"Partitions revoked to {self}: {revoked}") 474 | 475 | async def on_partitions_assigned(self, assigned: typing.List[aiokafka.TopicPartition]) -> None: 476 | if assigned: 477 | logger.info(f"Partitions assigned to {self}: {assigned}") 478 | 479 | for tp in assigned: 480 | position = await self._consumer.position(tp) 481 | self._tp[tp] = position 482 | 483 | CONSUMER_REBALANCED.labels( 484 | partition=tp.partition, 485 | group_id=self.group_id, 486 | event="assigned", 487 | ).inc() 488 | 489 | async def on_handler_timeout(self, record: aiokafka.ConsumerRecord) -> None: 490 | raise HandlerTaskCancelled(record) 491 | 492 | async def on_handler_failed( 493 | self, exception: BaseException, record: aiokafka.ConsumerRecord 494 | ) -> None: 495 | if isinstance(exception, UnhandledMessage): 496 | logger.warning("Unhandled message, ignoring...", exc_info=exception) 497 | else: 498 | raise exception 499 | -------------------------------------------------------------------------------- /kafkaesk/exceptions.py: -------------------------------------------------------------------------------- 1 | from typing import TYPE_CHECKING 2 | 3 | import aiokafka 4 | 5 | if TYPE_CHECKING: # pragma: no cover 6 | from .app import SchemaRegistration 7 | else: 8 | SchemaRegistration = SubscriptionConsumer = None 9 | 10 | 11 | class JsonSchemaRequiredException(Exception): 12 | ... 13 | 14 | 15 | class SchemaConflictException(Exception): 16 | def __init__(self, existing: SchemaRegistration, new: SchemaRegistration): 17 | self.existing = existing 18 | self.new = new 19 | 20 | def __str__(self) -> str: 21 | return f"""""" 25 | 26 | 27 | class UnhandledMessage(Exception): 28 | ... 29 | 30 | 31 | class StopConsumer(Exception): 32 | ... 33 | 34 | 35 | class HandlerTaskCancelled(Exception): 36 | def __init__(self, record: aiokafka.ConsumerRecord): 37 | self.record = record 38 | 39 | 40 | class ConsumerUnhealthyException(Exception): 41 | def __init__(self, reason: str): 42 | self.reason = reason 43 | 44 | 45 | class AutoCommitError(ConsumerUnhealthyException): 46 | ... 47 | 48 | 49 | class ProducerUnhealthyException(Exception): 50 | def __init__(self, producer: aiokafka.AIOKafkaProducer): 51 | self.producer = producer 52 | 53 | 54 | class AppNotConfiguredException(Exception): 55 | ... 56 | -------------------------------------------------------------------------------- /kafkaesk/ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onna/kafkaesk/10e88fd921fddff70b8cb973e739e280caa4cac8/kafkaesk/ext/__init__.py -------------------------------------------------------------------------------- /kafkaesk/ext/logging/__init__.py: -------------------------------------------------------------------------------- 1 | from .handler import PydanticKafkaeskHandler 2 | from .handler import PydanticLogModel 3 | from .handler import PydanticStreamHandler 4 | 5 | __all__ = ("PydanticLogModel", "PydanticKafkaeskHandler", "PydanticStreamHandler") 6 | -------------------------------------------------------------------------------- /kafkaesk/ext/logging/handler.py: -------------------------------------------------------------------------------- 1 | from .record import PydanticLogRecord 2 | from datetime import datetime 3 | from typing import Any 4 | from typing import Dict 5 | from typing import IO 6 | from typing import Optional 7 | 8 | import asyncio 9 | import kafkaesk 10 | import logging 11 | import os 12 | import pydantic 13 | import socket 14 | import sys 15 | import time 16 | 17 | NAMESPACE_FILEPATH = "/var/run/secrets/kubernetes.io/serviceaccount/namespace" 18 | _not_set = object() 19 | _K8S_NS = _not_set 20 | 21 | 22 | def get_k8s_ns() -> Optional[str]: 23 | global _K8S_NS 24 | if _K8S_NS == _not_set: 25 | if os.path.exists(NAMESPACE_FILEPATH): 26 | with open(NAMESPACE_FILEPATH) as fi: 27 | _K8S_NS = fi.read().strip() 28 | else: 29 | _K8S_NS = None 30 | return _K8S_NS # type: ignore 31 | 32 | 33 | class InvalidLogFormat(Exception): 34 | ... 35 | 36 | 37 | class PydanticLogModel(pydantic.BaseModel): 38 | class Config: 39 | extra = pydantic.Extra.allow 40 | 41 | 42 | class PydanticStreamHandler(logging.StreamHandler): 43 | def __init__(self, stream: Optional[IO[str]] = None): 44 | super().__init__(stream=stream) 45 | 46 | def format(self, record: PydanticLogRecord) -> str: # type: ignore 47 | message = super().format(record) 48 | 49 | for log in getattr(record, "pydantic_data", []): 50 | # log some attributes 51 | formatted_data = [] 52 | size = 0 53 | for field_name in log.__fields__.keys(): 54 | val = getattr(log, field_name) 55 | formatted = f"{field_name}={val}" 56 | size += len(formatted) 57 | formatted_data.append(formatted) 58 | 59 | if size > 256: 60 | break 61 | message += f": {', '.join(formatted_data)}" 62 | break 63 | 64 | return message 65 | 66 | 67 | class KafkaeskQueue: 68 | def __init__( 69 | self, 70 | app: kafkaesk.app.Application, 71 | max_queue: int = 10000, 72 | ): 73 | self._queue: Optional[asyncio.Queue] = None 74 | self._queue_size = max_queue 75 | 76 | self._app = app 77 | 78 | self._app.on("finalize", self.flush) 79 | 80 | self._task: Optional[asyncio.Task] = None 81 | 82 | def start(self) -> None: 83 | if self._queue is None: 84 | self._queue = asyncio.Queue(maxsize=self._queue_size) 85 | 86 | if self._task is None or self._task.done(): 87 | self._task = asyncio.create_task(self._run()) 88 | 89 | def close(self) -> None: 90 | if self._task is not None and not self._task._loop.is_closed(): 91 | if not self._task.done() and not self._task.cancelled(): 92 | self._task.cancel() 93 | 94 | @property 95 | def running(self) -> bool: 96 | if self._task is None: 97 | return False 98 | 99 | if self._task.done(): 100 | return False 101 | 102 | return True 103 | 104 | async def _run(self) -> None: 105 | if self._queue is None: 106 | raise RuntimeError("Queue must be started before workers") 107 | 108 | while True: 109 | try: 110 | stream, log_data = await asyncio.wait_for(asyncio.create_task(self._queue.get()), 1) 111 | await self._publish(stream, log_data) 112 | 113 | except asyncio.TimeoutError: 114 | continue 115 | 116 | except asyncio.CancelledError: 117 | await self.flush() 118 | return 119 | 120 | async def flush(self) -> None: 121 | if self._queue is not None: 122 | while not self._queue.empty(): 123 | stream, message = await self._queue.get() 124 | await self._publish(stream, message) 125 | 126 | async def _publish(self, stream: str, log_data: PydanticLogModel) -> None: 127 | if not self._app._initialized: 128 | await self._app.initialize() 129 | 130 | await self._app.publish(stream, log_data) 131 | # TODO: Handle other Kafak errors that may be raised 132 | 133 | def put_nowait(self, stream: str, log_data: PydanticLogModel) -> None: 134 | if self._queue is not None: 135 | self._queue.put_nowait((stream, log_data)) 136 | 137 | 138 | _formatter = logging.Formatter() 139 | 140 | 141 | class PydanticKafkaeskHandler(logging.Handler): 142 | def __init__( 143 | self, app: kafkaesk.Application, stream: str, queue: Optional[KafkaeskQueue] = None 144 | ): 145 | self.app = app 146 | self.stream = stream 147 | 148 | if queue is None: 149 | self._queue = KafkaeskQueue(self.app) 150 | else: 151 | self._queue = queue 152 | 153 | self._last_warning_sent = 0.0 154 | 155 | self._initialize_model() 156 | 157 | super().__init__() 158 | 159 | def clone(self) -> "PydanticKafkaeskHandler": 160 | return PydanticKafkaeskHandler(self.app, self.stream, queue=self._queue) 161 | 162 | def _initialize_model(self) -> None: 163 | try: 164 | self.app.schema("PydanticLogModel")(PydanticLogModel) 165 | except kafkaesk.app.SchemaConflictException: 166 | pass 167 | 168 | def _format_base_log(self, record: PydanticLogRecord) -> Dict[str, Any]: 169 | if record.exc_text is None and record.exc_info: 170 | record.exc_text = _formatter.formatException(record.exc_info) 171 | try: 172 | record.exc_type = record.exc_info[0].__name__ # type: ignore 173 | except (AttributeError, IndexError): # pragma: no cover 174 | ... 175 | 176 | if record.stack_info: 177 | record.stack_text = _formatter.formatStack(record.stack_info) 178 | 179 | service_name = "unknown" 180 | hostname = socket.gethostname() 181 | dashes = hostname.count("-") 182 | if dashes > 0: 183 | # detect kubernetes service host 184 | service_name = "-".join(hostname.split("-")[: -min(dashes, 2)]) 185 | 186 | return { 187 | "timestamp": datetime.utcnow().isoformat(), 188 | "logger": record.name, 189 | "severity": record.levelname, 190 | "level": record.levelno, 191 | "message": record.getMessage(), 192 | "exception": record.exc_type, 193 | "trace": record.stack_text, 194 | "stack": record.exc_text, 195 | "hostname": hostname, 196 | "service": service_name, 197 | "namespace": get_k8s_ns(), 198 | "cluster": os.environ.get("CLUSTER"), 199 | } 200 | 201 | def _format_extra_logs(self, record: PydanticLogRecord) -> Dict[str, Any]: 202 | extra_logs: Dict[str, Any] = {} 203 | 204 | for log in getattr(record, "pydantic_data", []): 205 | extra_logs.update( 206 | log.dict( 207 | exclude_none=True, 208 | exclude={ 209 | "_is_log_model", 210 | }, 211 | ) 212 | ) 213 | 214 | return extra_logs 215 | 216 | def emit(self, record: PydanticLogRecord) -> None: # type: ignore 217 | if not self._queue.running: 218 | try: 219 | self._queue.start() 220 | except RuntimeError: 221 | sys.stderr.write("RuntimeError starting kafka logging, ignoring") 222 | return 223 | 224 | try: 225 | raw_data = self._format_base_log(record) 226 | raw_data.update(self._format_extra_logs(record)) 227 | log_data = PydanticLogModel(**raw_data) 228 | self._queue.put_nowait(self.stream, log_data) 229 | except InvalidLogFormat: # pragma: no cover 230 | sys.stderr.write("PydanticKafkaeskHandler recieved non-pydantic model") 231 | except RuntimeError: 232 | sys.stderr.write("Queue No event loop running to send log to Kafka\n") 233 | except asyncio.QueueFull: 234 | if time.time() - self._last_warning_sent > 30: 235 | sys.stderr.write("Queue hit max log queue size, discarding message\n") 236 | self._last_warning_sent = time.time() 237 | except AttributeError: # pragma: no cover 238 | sys.stderr.write("Queue Error sending Kafkaesk log message\n") 239 | 240 | def close(self) -> None: 241 | self.acquire() 242 | try: 243 | super().close() 244 | if self._queue is not None: 245 | self._queue.close() 246 | finally: 247 | self.release() 248 | -------------------------------------------------------------------------------- /kafkaesk/ext/logging/record.py: -------------------------------------------------------------------------------- 1 | from types import TracebackType 2 | from typing import List 3 | from typing import Optional 4 | from typing import Tuple 5 | from typing import Union 6 | 7 | import logging 8 | import pydantic 9 | 10 | 11 | class PydanticLogRecord(logging.LogRecord): 12 | def __init__( 13 | self, 14 | name: str, 15 | level: int, 16 | fn: str, 17 | lno: int, 18 | msg: str, 19 | args: Tuple, 20 | exc_info: Union[ 21 | Tuple[type, BaseException, Optional[TracebackType]], Tuple[None, None, None], None 22 | ], 23 | func: Optional[str] = None, 24 | sinfo: Optional[str] = None, 25 | pydantic_data: Optional[List[pydantic.BaseModel]] = None, 26 | ): 27 | super().__init__(name, level, fn, lno, msg, args, exc_info, func, sinfo) 28 | 29 | self.pydantic_data = pydantic_data or [] 30 | self.exc_type: Optional[str] = None 31 | self.stack_text: Optional[str] = None 32 | 33 | 34 | def factory( 35 | name: str, 36 | level: int, 37 | fn: str, 38 | lno: int, 39 | msg: str, 40 | args: Tuple, 41 | exc_info: Union[ 42 | Tuple[type, BaseException, Optional[TracebackType]], Tuple[None, None, None], None 43 | ], 44 | func: Optional[str] = None, 45 | sinfo: Optional[str] = None, 46 | ) -> PydanticLogRecord: 47 | pydantic_data: List[pydantic.BaseModel] = [] 48 | 49 | new_args = [] 50 | for arg in args: 51 | if isinstance(arg, pydantic.BaseModel): 52 | if hasattr(arg, "_is_log_model") and getattr(arg, "_is_log_model", False) is True: 53 | pydantic_data.append(arg) 54 | continue 55 | new_args.append(arg) 56 | 57 | args = tuple(new_args) 58 | 59 | record = PydanticLogRecord( 60 | name, level, fn, lno, msg, args, exc_info, func, sinfo, pydantic_data 61 | ) 62 | 63 | return record 64 | 65 | 66 | if logging.getLogRecordFactory() != factory: 67 | logging.setLogRecordFactory(factory) 68 | -------------------------------------------------------------------------------- /kafkaesk/kafka.py: -------------------------------------------------------------------------------- 1 | from .metrics import watch_kafka 2 | from aiokafka import TopicPartition 3 | from kafkaesk.utils import run_async 4 | from typing import Any 5 | from typing import Dict 6 | from typing import List 7 | from typing import Optional 8 | from typing import Tuple 9 | 10 | import kafka 11 | import kafka.admin 12 | import kafka.admin.client 13 | import kafka.errors 14 | import kafka.structs 15 | 16 | 17 | class KafkaTopicManager: 18 | _admin_client: Optional[kafka.admin.client.KafkaAdminClient] = None 19 | _client: Optional[kafka.KafkaClient] = None 20 | _kafka_api_version: Optional[Tuple[int, ...]] = None 21 | 22 | def __init__( 23 | self, 24 | bootstrap_servers: List[str], 25 | prefix: str = "", 26 | replication_factor: Optional[int] = None, 27 | kafka_api_version: str = "auto", 28 | ssl_context: Optional[Any] = None, 29 | security_protocol: Optional[str] = "PLAINTEXT", 30 | sasl_mechanism: Optional[str] = "", 31 | sasl_plain_username: Optional[str] = "", 32 | sasl_plain_password: Optional[str] = "", 33 | ): 34 | self.prefix = prefix 35 | self._bootstrap_servers = bootstrap_servers 36 | self._admin_client = self._client = None 37 | self._topic_cache: List[str] = [] 38 | self._replication_factor: int = replication_factor or min(3, len(self._bootstrap_servers)) 39 | if kafka_api_version == "auto": 40 | self._kafka_api_version = None 41 | else: 42 | self._kafka_api_version = tuple([int(v) for v in kafka_api_version.split(".")]) 43 | self.ssl_context = ssl_context 44 | self.security_protocol = security_protocol 45 | self.sasl_mechanism = sasl_mechanism 46 | self.sasl_plain_username = sasl_plain_username 47 | self.sasl_plain_password = sasl_plain_password 48 | 49 | @property 50 | def kafka_api_version(self) -> Optional[Tuple[int, ...]]: 51 | return self._kafka_api_version 52 | 53 | async def finalize(self) -> None: 54 | if self._admin_client is not None: 55 | await run_async(self._admin_client.close) 56 | self._admin_client = None 57 | if self._client is not None: 58 | await run_async(self._client.close) 59 | self._client = None 60 | 61 | def get_topic_id(self, topic: str) -> str: 62 | return f"{self.prefix}{topic}" 63 | 64 | async def get_admin_client(self) -> kafka.admin.client.KafkaAdminClient: 65 | if self._admin_client is None: 66 | with watch_kafka("sync_admin_connect"): 67 | self._admin_client = await run_async( 68 | kafka.admin.client.KafkaAdminClient, 69 | bootstrap_servers=self._bootstrap_servers, 70 | api_version=self._kafka_api_version, 71 | ssl_context=self.ssl_context, 72 | security_protocol=self.security_protocol, 73 | sasl_mechanism=self.sasl_mechanism, 74 | sasl_plain_username=self.sasl_plain_username, 75 | sasl_plain_password=self.sasl_plain_password, 76 | ) 77 | return self._admin_client 78 | 79 | async def list_consumer_group_offsets( 80 | self, group_id: str, partitions: Optional[List[TopicPartition]] = None 81 | ) -> Dict[kafka.structs.TopicPartition, kafka.structs.OffsetAndMetadata]: 82 | client = await self.get_admin_client() 83 | return await run_async(client.list_consumer_group_offsets, group_id, partitions=partitions) 84 | 85 | async def topic_exists(self, topic: str) -> bool: 86 | if self._client is None: 87 | with watch_kafka("sync_consumer_connect"): 88 | self._client = await run_async( 89 | kafka.KafkaConsumer, 90 | bootstrap_servers=self._bootstrap_servers, 91 | enable_auto_commit=False, 92 | api_version=self._kafka_api_version, 93 | ssl_context=self.ssl_context, 94 | security_protocol=self.security_protocol, 95 | sasl_mechanism=self.sasl_mechanism, 96 | sasl_plain_username=self.sasl_plain_username, 97 | sasl_plain_password=self.sasl_plain_password, 98 | ) 99 | if topic in self._topic_cache: 100 | return True 101 | with watch_kafka("sync_topics"): 102 | if topic in await run_async(self._client.topics): 103 | self._topic_cache.append(topic) 104 | return True 105 | return False 106 | 107 | async def create_topic( 108 | self, 109 | topic: str, 110 | *, 111 | partitions: int = 7, 112 | replication_factor: Optional[int] = None, 113 | retention_ms: Optional[int] = None, 114 | ) -> None: 115 | topic_configs: Dict[str, Any] = {} 116 | if retention_ms is not None: 117 | topic_configs["retention.ms"] = retention_ms 118 | new_topic = kafka.admin.NewTopic( 119 | topic, 120 | partitions, 121 | replication_factor or self._replication_factor, 122 | topic_configs=topic_configs, 123 | ) 124 | client = await self.get_admin_client() 125 | try: 126 | with watch_kafka("sync_create_topics"): 127 | await run_async(client.create_topics, [new_topic]) 128 | except kafka.errors.TopicAlreadyExistsError: 129 | pass 130 | self._topic_cache.append(topic) 131 | return None 132 | -------------------------------------------------------------------------------- /kafkaesk/metrics.py: -------------------------------------------------------------------------------- 1 | from prometheus_client.utils import INF 2 | from typing import Dict 3 | from typing import Optional 4 | from typing import Type 5 | 6 | import prometheus_client as client 7 | import time 8 | import traceback 9 | 10 | NOERROR = "none" 11 | ERROR_GENERAL_EXCEPTION = "exception" 12 | 13 | KAFKA_ACTION = client.Counter( 14 | "kafkaesk_kafka_action", 15 | "Perform action on kafka", 16 | ["type", "error"], 17 | ) 18 | 19 | KAFKA_ACTION_TIME = client.Histogram( 20 | "kafkaesk_kafka_action_time", 21 | "Time taken to perform kafka action", 22 | ["type"], 23 | ) 24 | 25 | PUBLISH_MESSAGES = client.Counter( 26 | "kafkaesk_publish_messages", 27 | "Number of messages attempted to be published", 28 | ["stream_id", "error"], 29 | ) 30 | 31 | PUBLISH_MESSAGES_TIME = client.Histogram( 32 | "kafkaesk_publish_messages_time", 33 | "Time taken for a message to be queued for publishing (in seconds)", 34 | ["stream_id"], 35 | ) 36 | 37 | PUBLISHED_MESSAGES = client.Counter( 38 | "kafkaesk_published_messages", 39 | "Number of published messages", 40 | ["stream_id", "partition", "error"], 41 | ) 42 | 43 | PUBLISHED_MESSAGES_TIME = client.Histogram( 44 | "kafkaesk_published_messages_time", 45 | "Time taken for a message to be published (in seconds)", 46 | ["stream_id"], 47 | ) 48 | 49 | 50 | CONSUMED_MESSAGES = client.Counter( 51 | "kafkaesk_consumed_messages", 52 | "Number of consumed messages", 53 | ["stream_id", "partition", "error", "group_id"], 54 | ) 55 | 56 | CONSUMED_MESSAGES_BATCH_SIZE = client.Histogram( 57 | "kafkaesk_consumed_messages_batch_size", 58 | "Size of message batches consumed", 59 | ["stream_id", "group_id", "partition"], 60 | buckets=[1, 5, 10, 20, 50, 100, 200, 500, 1000], 61 | ) 62 | 63 | CONSUMED_MESSAGE_TIME = client.Histogram( 64 | "kafkaesk_consumed_message_elapsed_time", 65 | "Processing time for consumed message (in seconds)", 66 | ["stream_id", "group_id", "partition"], 67 | ) 68 | 69 | PRODUCER_TOPIC_OFFSET = client.Gauge( 70 | "kafkaesk_produced_topic_offset", 71 | "Offset for produced messages a the topic", 72 | ["stream_id", "partition"], 73 | ) 74 | 75 | CONSUMER_TOPIC_OFFSET = client.Gauge( 76 | "kafkaesk_consumed_topic_offset", 77 | "Offset for consumed messages in a topic", 78 | ["group_id", "partition", "stream_id"], 79 | ) 80 | 81 | MESSAGE_LEAD_TIME = client.Histogram( 82 | "kafkaesk_message_lead_time", 83 | "Time that the message has been waiting to be handled by a consumer (in seconds)", 84 | ["stream_id", "group_id", "partition"], 85 | buckets=(0.1, 0.5, 1, 3, 5, 10, 30, 60, 60, 120, 300, INF), 86 | ) 87 | 88 | CONSUMER_REBALANCED = client.Counter( 89 | "kafkaesk_consumer_rebalanced", 90 | "Consumer rebalances", 91 | ["group_id", "partition", "event"], 92 | ) 93 | 94 | CONSUMER_HEALTH = client.Gauge( 95 | "kafkaesk_consumer_health", "Liveness probe for the consumer", ["group_id"] 96 | ) 97 | 98 | 99 | class watch: 100 | start: float 101 | 102 | def __init__( 103 | self, 104 | *, 105 | counter: Optional[client.Counter] = None, 106 | histogram: Optional[client.Histogram] = None, 107 | labels: Optional[Dict[str, str]] = None, 108 | ): 109 | self.counter = counter 110 | self.histogram = histogram 111 | self.labels = labels or {} 112 | 113 | def __enter__(self) -> None: 114 | self.start = time.time() 115 | 116 | def __exit__( 117 | self, 118 | exc_type: Optional[Type[Exception]] = None, 119 | exc_value: Optional[Exception] = None, 120 | exc_traceback: Optional[traceback.StackSummary] = None, 121 | ) -> None: 122 | error = NOERROR 123 | if self.histogram is not None: 124 | finished = time.time() 125 | self.histogram.labels(**self.labels).observe(finished - self.start) 126 | 127 | if self.counter is not None: 128 | if exc_value is None: 129 | error = NOERROR 130 | else: 131 | error = ERROR_GENERAL_EXCEPTION 132 | self.counter.labels(error=error, **self.labels).inc() 133 | 134 | 135 | class watch_kafka(watch): 136 | def __init__(self, type: str): 137 | super().__init__(counter=KAFKA_ACTION, histogram=KAFKA_ACTION_TIME, labels={"type": type}) 138 | 139 | 140 | class watch_publish(watch): 141 | def __init__(self, stream_id: str): 142 | super().__init__( 143 | counter=PUBLISH_MESSAGES, 144 | histogram=PUBLISH_MESSAGES_TIME, 145 | labels={"stream_id": stream_id}, 146 | ) 147 | -------------------------------------------------------------------------------- /kafkaesk/publish.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onna/kafkaesk/10e88fd921fddff70b8cb973e739e280caa4cac8/kafkaesk/publish.py -------------------------------------------------------------------------------- /kafkaesk/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onna/kafkaesk/10e88fd921fddff70b8cb973e739e280caa4cac8/kafkaesk/py.typed -------------------------------------------------------------------------------- /kafkaesk/utils.py: -------------------------------------------------------------------------------- 1 | from concurrent.futures.thread import ThreadPoolExecutor 2 | from functools import partial 3 | from typing import Any 4 | from typing import Callable 5 | 6 | import asyncio 7 | 8 | executor = ThreadPoolExecutor(max_workers=30) 9 | 10 | 11 | async def run_async(func: Callable[..., Any], *args: Any, **kwargs: Any) -> Any: 12 | func_to_run = partial(func, *args, **kwargs) 13 | loop = asyncio.get_event_loop() 14 | return await loop.run_in_executor(executor, func_to_run) 15 | 16 | 17 | def resolve_dotted_name(name: str) -> Any: 18 | """ 19 | import the provided dotted name 20 | >>> resolve_dotted_name('foo.bar') 21 | 22 | :param name: dotted name 23 | """ 24 | names = name.split(".") 25 | used = names.pop(0) 26 | found = __import__(used) 27 | for n in names: 28 | used += "." + n 29 | try: 30 | found = getattr(found, n) 31 | except AttributeError: 32 | __import__(used) 33 | found = getattr(found, n) 34 | 35 | return found 36 | -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | namespace_packages=True 3 | mypy_path = stubs 4 | follow_imports_for_stubs = True 5 | disallow_incomplete_defs = True 6 | check_untyped_defs = True 7 | disallow_untyped_calls = True 8 | disallow_untyped_defs = True 9 | disallow_untyped_decorators = True 10 | disable_error_code=empty-body,assignment,unused-coroutine,var-annotated,arg-type 11 | 12 | [mypy-aiohttp_client] 13 | ignore_missing_imports = True 14 | [mypy-opentracing.*] 15 | ignore_missing_imports = True 16 | 17 | # test ignores 18 | [mypy-pytest] 19 | ignore_missing_imports = True 20 | [mypy-asynctest] 21 | ignore_missing_imports = True 22 | [mypy-pytest_docker_fixtures] 23 | ignore_missing_imports = True 24 | [mypy-prometheus_client.*] 25 | ignore_missing_imports = True 26 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "kafkaesk" 3 | version = "0.8.5" 4 | description = "Easy publish and subscribe to events with python and Kafka." 5 | authors = ["vangheem ", "pfreixes "] 6 | classifiers = [ 7 | "Programming Language :: Python :: 3.7", 8 | "Programming Language :: Python :: 3.8", 9 | "Programming Language :: Python :: 3.9", 10 | "Programming Language :: Python :: 3.10", 11 | "Framework :: AsyncIO", 12 | "License :: OSI Approved :: BSD License", 13 | "Topic :: System :: Distributed Computing" 14 | ] 15 | readme = "README.md" 16 | 17 | 18 | [tool.poetry.dependencies] 19 | python = ">=3.8.1" 20 | aiokafka = ">=0.7.1" 21 | kafka-python = "^2.0.2" 22 | pydantic = ">=1.5.1" 23 | orjson = ">=3.3.1" 24 | jsonschema = ">=3.2.0" 25 | prometheus_client = ">=0.8.0" 26 | opentracing = ">=2.3.0" 27 | async-timeout = ">=3.0.1" 28 | 29 | [tool.poetry.dev-dependencies] 30 | pytest = "^7.4.0" 31 | pytest-docker-fixtures = "^1.3.17" 32 | pytest-asyncio = "^0.21.0" 33 | mypy = "1.0.0" 34 | flake8 = "^6.0.0" 35 | isort = "^5.12.0" 36 | 37 | black = "^23.3.0" 38 | pytest-rerunfailures = "^11.1.2" 39 | pytest-cov = "^4.1.0" 40 | pdbpp = "^0.10.3" 41 | python-language-server = "^0.36.2" 42 | jaeger-client = "4.7.0" 43 | 44 | [tool.poetry.scripts] 45 | kafkaesk = 'kafkaesk.app:run' 46 | 47 | 48 | [tool.poetry.urls] 49 | "GitHub" = "https://github.com/onna/kafkaesk" 50 | 51 | [tool.black] 52 | line-length = 100 53 | target-version = ['py37'] 54 | include = '\.pyi?$' 55 | exclude = ''' 56 | 57 | ( 58 | /( 59 | \.eggs # exclude a few common directories in the 60 | | \.git # root of the project 61 | | \.hg 62 | | \.mypy_cache 63 | | \.tox 64 | | \.venv 65 | | _build 66 | | buck-out 67 | | build 68 | | dist 69 | )/ 70 | | foo.py # also separately exclude a file named foo.py in 71 | # the root of the project 72 | ) 73 | ''' 74 | [build-system] 75 | requires = ["poetry>=0.12"] 76 | build-backend = "poetry.masonry.api" 77 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | addopts = -p no:warnings -------------------------------------------------------------------------------- /stubs/aiokafka/__init__.py: -------------------------------------------------------------------------------- 1 | from asyncio.events import AbstractEventLoop 2 | from collections import namedtuple 3 | from kafka.structs import ConsumerRecord 4 | from kafka.structs import TopicPartition 5 | from typing import Any 6 | from typing import AsyncIterator 7 | from typing import Awaitable 8 | from typing import Dict 9 | from typing import List 10 | from typing import Optional 11 | from typing import Set 12 | from typing import Tuple 13 | 14 | 15 | class AIOKafkaProducer: 16 | _sender: Any 17 | 18 | def __init__( 19 | self, 20 | bootstrap_servers: List[str], 21 | loop: AbstractEventLoop, 22 | enable_auto_commit: Optional[bool] = True, 23 | group_id: Optional[str] = None, 24 | api_version: str = "auto", 25 | ): 26 | ... 27 | 28 | async def send( 29 | self, 30 | topic_id: str, 31 | value: bytes, 32 | key: Optional[bytes] = None, 33 | headers: Optional[List[Tuple[str, bytes]]] = None, 34 | ) -> Awaitable[ConsumerRecord]: 35 | ... 36 | 37 | async def start(self) -> None: 38 | ... 39 | 40 | async def stop(self) -> None: 41 | ... 42 | 43 | async def flush(self) -> None: 44 | ... 45 | 46 | 47 | class AIOKafkaClient: 48 | async def ready(self, node_id: str, *, group: Optional[str] = None) -> bool: 49 | ... 50 | 51 | 52 | class GroupCoordinator: 53 | coordinator_id: str 54 | 55 | def request_rejoin(self) -> None: 56 | ... 57 | 58 | def need_rejoin(self, subscription: "Subscription") -> bool: 59 | ... 60 | 61 | async def ensure_coordinator_known(self) -> None: 62 | ... 63 | 64 | async def ensure_active_group(self, subscription: Any, assignment: Any) -> None: 65 | ... 66 | 67 | 68 | class Subscription: 69 | subscription: Any 70 | 71 | async def wait_for_subscription(self) -> None: 72 | ... 73 | 74 | async def partitions_auto_assigned(self) -> bool: 75 | ... 76 | 77 | 78 | class AIOKafkaConsumer: 79 | _client: AIOKafkaClient 80 | _coordinator: GroupCoordinator 81 | _subscription: Subscription 82 | _group_id: Optional[str] = None 83 | 84 | def __init__( 85 | self, 86 | bootstrap_servers: List[str], 87 | loop: AbstractEventLoop, 88 | group_id: Optional[str] = None, 89 | api_version: str = "auto", 90 | **kwargs: Any, 91 | ): 92 | ... 93 | 94 | async def getone(self, *partitions: Optional[List[TopicPartition]]) -> ConsumerRecord: 95 | ... 96 | 97 | async def subscribe( 98 | self, 99 | topics: Optional[List[str]] = None, 100 | pattern: Optional[str] = None, 101 | listener: Optional["ConsumerRebalanceListener"] = None, 102 | ) -> None: 103 | ... 104 | 105 | async def start(self) -> None: 106 | ... 107 | 108 | async def stop(self) -> None: 109 | ... 110 | 111 | async def commit(self, offsets: Optional[Dict[TopicPartition, int]] = None) -> None: 112 | ... 113 | 114 | def __aiter__(self) -> AsyncIterator[ConsumerRecord]: 115 | ... 116 | 117 | async def __anext__(self) -> ConsumerRecord: 118 | ... 119 | 120 | async def position(self, tp: TopicPartition) -> int: 121 | ... 122 | 123 | async def seek(self, tp: TopicPartition, offset: int) -> None: 124 | ... 125 | 126 | async def seek_to_beginning(self, tp: TopicPartition) -> None: 127 | ... 128 | 129 | def assignment(self) -> Set[TopicPartition]: 130 | ... 131 | 132 | async def getmany( 133 | self, *partitions: TopicPartition, timeout_ms: int = 0, max_records: int = None 134 | ) -> Dict[TopicPartition, List[ConsumerRecord]]: 135 | ... 136 | 137 | 138 | class ConsumerRebalanceListener: 139 | async def on_partitions_revoked(self, revoked: List[TopicPartition]) -> None: 140 | ... 141 | 142 | async def on_partitions_assigned(self, assigned: List[TopicPartition]) -> None: 143 | ... 144 | 145 | 146 | OffsetAndMetadata = namedtuple( 147 | "OffsetAndMetadata", 148 | # TODO add leaderEpoch: OffsetAndMetadata(offset, leaderEpoch, metadata) 149 | ["offset", "metadata"], 150 | ) 151 | -------------------------------------------------------------------------------- /stubs/aiokafka/errors.py: -------------------------------------------------------------------------------- 1 | class KafkaError(Exception): 2 | ... 3 | 4 | 5 | class NodeNotReadyError(KafkaError): 6 | ... 7 | 8 | 9 | class RequestTimedOutError(KafkaError): 10 | ... 11 | 12 | 13 | class ConsumerStoppedError(KafkaError): 14 | ... 15 | 16 | 17 | class IllegalStateError(KafkaError): 18 | ... 19 | 20 | 21 | class UnrecognizedBrokerVersion(KafkaError): 22 | ... 23 | 24 | 25 | class KafkaConnectionError(KafkaError): 26 | ... 27 | 28 | 29 | class CommitFailedError(KafkaError): 30 | ... 31 | -------------------------------------------------------------------------------- /stubs/aiokafka/structs.py: -------------------------------------------------------------------------------- 1 | from kafka.structs import ConsumerRecord # noqa 2 | from kafka.structs import TopicPartition 3 | -------------------------------------------------------------------------------- /stubs/kafka/__init__.py: -------------------------------------------------------------------------------- 1 | from kafka.structs import TopicPartition 2 | from typing import Dict 3 | from typing import List 4 | from typing import Optional 5 | 6 | 7 | class KafkaConsumer: 8 | def __init__( 9 | self, 10 | bootstrap_servers: List[str], 11 | enable_auto_commit: Optional[bool] = True, 12 | group_id: Optional[str] = None, 13 | ): 14 | ... 15 | 16 | def assign(self, parts: List[TopicPartition]) -> None: 17 | ... 18 | 19 | def seek_to_beginning(self, partition: TopicPartition) -> None: 20 | ... 21 | 22 | def end_offsets(self, parts: List[TopicPartition]) -> Dict[TopicPartition, int]: 23 | ... 24 | 25 | 26 | class KafkaClient: 27 | topic_partitions: Dict[str, List[int]] 28 | 29 | def close(self) -> None: 30 | ... 31 | 32 | def topics(self) -> List[str]: 33 | ... 34 | -------------------------------------------------------------------------------- /stubs/kafka/admin/__init__.py: -------------------------------------------------------------------------------- 1 | from .client import NewTopic # noqa 2 | -------------------------------------------------------------------------------- /stubs/kafka/admin/client.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | from typing import Dict 3 | from typing import List 4 | 5 | import kafka.structs 6 | 7 | 8 | class NewTopic: 9 | def __init__( 10 | self, topic: str, partitions: int, replication_factor: int, topic_configs: Dict[str, Any] 11 | ): 12 | ... 13 | 14 | 15 | class KafkaAdminClient: 16 | def create_topics(self, topics: List[NewTopic]) -> None: 17 | ... 18 | 19 | def close(self) -> None: 20 | ... 21 | 22 | def list_consumer_group_offsets( 23 | self, group_id: str 24 | ) -> Dict[kafka.structs.TopicPartition, kafka.structs.OffsetAndMetadata]: 25 | ... 26 | -------------------------------------------------------------------------------- /stubs/kafka/errors.py: -------------------------------------------------------------------------------- 1 | class TopicAlreadyExistsError(Exception): 2 | ... 3 | 4 | 5 | class CommitFailedError(Exception): 6 | ... 7 | 8 | 9 | class IllegalStateError(Exception): 10 | ... 11 | -------------------------------------------------------------------------------- /stubs/kafka/structs.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | from typing import Optional 3 | from typing import Tuple 4 | 5 | 6 | class ConsumerRecord: 7 | partition: int 8 | offset: int 9 | topic: str 10 | value: bytes 11 | key: bytes 12 | headers: Optional[List[Tuple[str, bytes]]] = None 13 | 14 | 15 | class TopicPartition: 16 | topic: str 17 | partition: int 18 | 19 | def __init__(self, topic: str, partition: int): 20 | ... 21 | 22 | 23 | class OffsetAndMetadata: 24 | offset: int 25 | metadata: str 26 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onna/kafkaesk/10e88fd921fddff70b8cb973e739e280caa4cac8/tests/__init__.py -------------------------------------------------------------------------------- /tests/acceptance/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onna/kafkaesk/10e88fd921fddff70b8cb973e739e280caa4cac8/tests/acceptance/__init__.py -------------------------------------------------------------------------------- /tests/acceptance/ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onna/kafkaesk/10e88fd921fddff70b8cb973e739e280caa4cac8/tests/acceptance/ext/__init__.py -------------------------------------------------------------------------------- /tests/acceptance/ext/logging/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onna/kafkaesk/10e88fd921fddff70b8cb973e739e280caa4cac8/tests/acceptance/ext/logging/__init__.py -------------------------------------------------------------------------------- /tests/acceptance/ext/logging/test_handler.py: -------------------------------------------------------------------------------- 1 | from kafkaesk.ext.logging.handler import KafkaeskQueue 2 | from kafkaesk.ext.logging.handler import PydanticKafkaeskHandler 3 | from kafkaesk.ext.logging.handler import PydanticLogModel 4 | from kafkaesk.ext.logging.handler import PydanticStreamHandler 5 | from typing import Optional 6 | from unittest.mock import MagicMock 7 | from unittest.mock import Mock 8 | from unittest.mock import patch 9 | 10 | import asyncio 11 | import io 12 | import kafkaesk 13 | import logging 14 | import pydantic 15 | import pytest 16 | import pytest_asyncio 17 | import time 18 | import uuid 19 | 20 | pytestmark = pytest.mark.asyncio 21 | 22 | 23 | @pytest_asyncio.fixture(scope="function") 24 | def logger(): 25 | ll = logging.getLogger("test") 26 | ll.propagate = False 27 | ll.setLevel(logging.DEBUG) 28 | 29 | return ll 30 | 31 | 32 | @pytest_asyncio.fixture(scope="function") 33 | def stream_handler(logger): 34 | stream = io.StringIO() 35 | handler = PydanticStreamHandler(stream=stream) 36 | logger.addHandler(handler) 37 | 38 | return stream 39 | 40 | 41 | @pytest_asyncio.fixture(scope="function") 42 | def kafakesk_handler(app, logger): 43 | handler = PydanticKafkaeskHandler(app, "log.test") 44 | logger.addHandler(handler) 45 | 46 | return handler 47 | 48 | 49 | async def test_handler_initializes_applogger(kafka, logger): 50 | app = kafkaesk.Application( 51 | [f"{kafka[0]}:{kafka[1]}"], 52 | topic_prefix=uuid.uuid4().hex, 53 | kafka_settings={"metadata_max_age_ms": 500}, 54 | ) 55 | 56 | handler = PydanticKafkaeskHandler(app, "log.test") 57 | logger.addHandler(handler) 58 | 59 | logger.error("Hi!") 60 | 61 | await asyncio.sleep(0.1) 62 | assert app._initialized 63 | 64 | 65 | @pytest_asyncio.fixture(scope="function") 66 | def log_consumer(app): 67 | consumed = [] 68 | 69 | @app.subscribe("log.test", group="test_group") 70 | async def consume(data: PydanticLogModel): 71 | consumed.append(data) 72 | 73 | yield consumed 74 | 75 | 76 | class TestPydanticStreamHandler: 77 | async def test_stream_handler(self, stream_handler, logger): 78 | logger.info("Test Message %s", "extra") 79 | 80 | message = stream_handler.getvalue() 81 | 82 | assert "Test Message extra" in message 83 | 84 | async def test_stream_handler_with_log_model(self, stream_handler, logger): 85 | class LogModel(pydantic.BaseModel): 86 | _is_log_model = True 87 | foo: Optional[str] = None 88 | 89 | logger.info("Test Message %s", "extra", LogModel(foo="bar")) 90 | 91 | message = stream_handler.getvalue() 92 | 93 | assert "Test Message extra" in message 94 | assert "foo=bar" in message 95 | 96 | async def test_stream_handler_with_log_model_shortens_log_messae(self, stream_handler, logger): 97 | class LogModel(pydantic.BaseModel): 98 | _is_log_model = True 99 | foo: str 100 | bar: str 101 | 102 | logger.info("Test Message %s", "extra", LogModel(foo="X" * 256, bar="Y" * 256)) 103 | 104 | message = stream_handler.getvalue() 105 | 106 | assert "Test Message extra" in message 107 | assert f"foo={'X' * 256}" in message 108 | assert f"bar={'Y' * 256}" not in message 109 | 110 | 111 | class TestPydanticKafkaeskHandler: 112 | async def test_kafka_handler(self, app, kafakesk_handler, logger, log_consumer): 113 | async with app: 114 | logger.info("Test Message %s", "extra") 115 | await app.flush() 116 | await app.consume_for(1, seconds=8) 117 | 118 | assert len(log_consumer) == 1 119 | assert log_consumer[0].message == "Test Message extra" 120 | 121 | async def test_kafka_handler_with_log_model(self, app, kafakesk_handler, logger, log_consumer): 122 | class LogModel(pydantic.BaseModel): 123 | _is_log_model = True 124 | foo: Optional[str] = None 125 | 126 | async with app: 127 | logger.info("Test Message %s", "extra", LogModel(foo="bar")) 128 | await app.flush() 129 | await app.consume_for(1, seconds=8) 130 | 131 | assert len(log_consumer) == 1 132 | assert log_consumer[0].message == "Test Message extra" 133 | assert log_consumer[0].foo == "bar" 134 | 135 | def test_emit_std_output_queue_full(self): 136 | queue = MagicMock() 137 | with patch("kafkaesk.ext.logging.handler.KafkaeskQueue", return_value=queue), patch( 138 | "kafkaesk.ext.logging.handler.sys.stderr.write" 139 | ) as std_write: 140 | queue.put_nowait.side_effect = asyncio.QueueFull 141 | handler = PydanticKafkaeskHandler(MagicMock(), "foo") 142 | record = Mock() 143 | record.pydantic_data = [] 144 | handler.emit(record) 145 | std_write.assert_called_once() 146 | 147 | def test_emit_limits_std_output_queue_full(self): 148 | queue = MagicMock() 149 | with patch("kafkaesk.ext.logging.handler.KafkaeskQueue", return_value=queue), patch( 150 | "kafkaesk.ext.logging.handler.sys.stderr.write" 151 | ) as std_write: 152 | queue.put_nowait.side_effect = asyncio.QueueFull 153 | handler = PydanticKafkaeskHandler(MagicMock(), "foo") 154 | handler._last_warning_sent = time.time() + 1 155 | record = Mock() 156 | record.pydantic_data = [] 157 | handler.emit(record) 158 | std_write.assert_not_called() 159 | 160 | def test_clone(self): 161 | handler = PydanticKafkaeskHandler(MagicMock(), "foo") 162 | handler2 = handler.clone() 163 | assert handler != handler2 164 | assert handler.app == handler2.app 165 | assert handler._queue == handler2._queue 166 | 167 | def test_emit_drops_message_on_runtime_error_start(self): 168 | queue = MagicMock() 169 | with patch("kafkaesk.ext.logging.handler.KafkaeskQueue", return_value=queue), patch( 170 | "kafkaesk.ext.logging.handler.sys.stderr.write" 171 | ) as std_write: 172 | queue.running = False 173 | queue.start.side_effect = RuntimeError 174 | handler = PydanticKafkaeskHandler(MagicMock(), "foo") 175 | record = Mock() 176 | record.pydantic_data = [] 177 | handler.emit(record) 178 | std_write.assert_called_once() 179 | 180 | 181 | class TestKafkaeskQueue: 182 | @pytest_asyncio.fixture(scope="function") 183 | async def queue(self, request, app): 184 | max_queue = 10000 185 | for marker in request.node.iter_markers("with_max_queue"): 186 | max_queue = marker.args[0] 187 | 188 | app.schema("PydanticLogModel")(PydanticLogModel) 189 | q = KafkaeskQueue(app, max_queue=max_queue) 190 | 191 | return q 192 | 193 | async def test_queue(self, app, queue): 194 | consumed = [] 195 | 196 | @app.subscribe("log.test", group="test_group") 197 | async def consume(data: PydanticLogModel): 198 | consumed.append(data) 199 | 200 | async with app: 201 | queue.start() 202 | queue.put_nowait("log.test", PydanticLogModel(foo="bar")) 203 | 204 | await app.flush() 205 | await app.consume_for(1, seconds=8) 206 | 207 | queue.close() 208 | await queue._task 209 | 210 | assert len(consumed) == 1 211 | 212 | async def test_queue_flush(self, app, queue, log_consumer): 213 | async with app: 214 | queue.start() 215 | for i in range(10): 216 | queue.put_nowait("log.test", PydanticLogModel(count=i)) 217 | 218 | await queue.flush() 219 | 220 | await app.flush() 221 | await app.consume_for(10, seconds=8) 222 | 223 | assert len(log_consumer) == 10 224 | 225 | async def test_queue_flush_on_close(self, app, queue, log_consumer): 226 | async with app: 227 | queue.start() 228 | await asyncio.sleep(0.1) 229 | queue.close() 230 | 231 | for i in range(10): 232 | queue.put_nowait("log.test", PydanticLogModel(count=i)) 233 | 234 | await app.flush() 235 | await app.consume_for(10, seconds=8) 236 | 237 | assert len(log_consumer) == 10 238 | assert queue._task.done() 239 | 240 | @pytest.mark.with_max_queue(1) 241 | async def test_queue_max_size(self, app, queue): 242 | queue.start() 243 | queue.put_nowait("log.test", PydanticLogModel()) 244 | 245 | with pytest.raises(asyncio.QueueFull): 246 | queue.put_nowait("log.test", PydanticLogModel()) 247 | -------------------------------------------------------------------------------- /tests/acceptance/ext/logging/test_record.py: -------------------------------------------------------------------------------- 1 | from kafkaesk.ext.logging.record import factory 2 | from kafkaesk.ext.logging.record import PydanticLogRecord 3 | from typing import Optional 4 | 5 | import logging 6 | import pydantic 7 | import pytest 8 | 9 | pytestmark = pytest.mark.asyncio 10 | 11 | 12 | async def test_factory_return_type() -> None: 13 | record = factory( 14 | name="logger.test", 15 | level=logging.INFO, 16 | fn="test_factory_retrun_type", 17 | lno=4, 18 | msg="Test Log", 19 | args=(), 20 | exc_info=None, 21 | func=None, 22 | sinfo=None, 23 | ) 24 | 25 | assert isinstance(record, PydanticLogRecord) 26 | assert issubclass(PydanticLogRecord, logging.LogRecord) 27 | 28 | 29 | async def test_factory_adds_pydantic_models() -> None: 30 | class LogModel(pydantic.BaseModel): 31 | _is_log_model = True 32 | foo: Optional[str] = None 33 | 34 | record = factory( 35 | name="logger.test", 36 | level=logging.INFO, 37 | fn="test_factory_retrun_type", 38 | lno=4, 39 | msg="Test Log", 40 | args=(LogModel(foo="bar"),), 41 | exc_info=None, 42 | func=None, 43 | sinfo=None, 44 | ) 45 | 46 | assert len(record.pydantic_data) == 1 47 | assert len(record.args) == 0 48 | 49 | 50 | async def test_factory_formats_msg() -> None: 51 | record = factory( 52 | name="logger.test", 53 | level=logging.INFO, 54 | fn="test_factory_retrun_type", 55 | lno=4, 56 | msg="Test Log %s", 57 | args=("extra",), 58 | exc_info=None, 59 | func=None, 60 | sinfo=None, 61 | ) 62 | 63 | assert record.getMessage() == "Test Log extra" 64 | 65 | 66 | async def test_factory_formats_msg_and_adds_pydantic_model() -> None: 67 | class LogModel(pydantic.BaseModel): 68 | _is_log_model = True 69 | foo: Optional[str] = None 70 | 71 | record = factory( 72 | name="logger.test", 73 | level=logging.INFO, 74 | fn="test_factory_retrun_type", 75 | lno=4, 76 | msg="Test Log %s", 77 | args=("extra", LogModel(foo="bar")), 78 | exc_info=None, 79 | func=None, 80 | sinfo=None, 81 | ) 82 | 83 | assert record.getMessage() == "Test Log extra" 84 | assert len(record.pydantic_data) == 1 85 | -------------------------------------------------------------------------------- /tests/acceptance/produce.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import pydantic 3 | 4 | 5 | class Foo(pydantic.BaseModel): 6 | foo: str 7 | 8 | 9 | async def producer(app, topic): 10 | while True: 11 | try: 12 | await app.publish(topic, Foo(foo="bar")) 13 | await asyncio.sleep(0.05) 14 | except asyncio.CancelledError: 15 | return 16 | -------------------------------------------------------------------------------- /tests/acceptance/test_healthcheck.py: -------------------------------------------------------------------------------- 1 | from aiokafka import ConsumerRecord 2 | from kafkaesk import Application 3 | from kafkaesk.exceptions import ConsumerUnhealthyException 4 | from .produce import producer 5 | from kafkaesk.exceptions import ProducerUnhealthyException 6 | from kafkaesk.kafka import KafkaTopicManager 7 | from unittest.mock import call 8 | from unittest.mock import MagicMock 9 | from unittest.mock import Mock 10 | from unittest.mock import patch 11 | 12 | import aiokafka.structs 13 | import asyncio 14 | import pydantic 15 | import pytest 16 | import uuid 17 | 18 | try: 19 | from unittest.mock import AsyncMock 20 | except: # noqa 21 | AsyncMock = None # type: ignore 22 | 23 | pytestmark = pytest.mark.asyncio 24 | 25 | TOPIC = "test-hc" 26 | 27 | 28 | async def test_health_check_should_fail_with_unhandled(app: Application): 29 | @app.subscribe(TOPIC, group=TOPIC) 30 | async def consume(data): 31 | raise Exception("failure!") 32 | 33 | async with app: 34 | produce = asyncio.create_task(producer(app, TOPIC)) 35 | fut = asyncio.create_task(app.consume_forever()) 36 | await fut 37 | 38 | with pytest.raises(ConsumerUnhealthyException): 39 | await app.health_check() 40 | 41 | produce.cancel() 42 | 43 | 44 | async def test_health_check_should_succeed(app): 45 | @app.subscribe(TOPIC, group=TOPIC) 46 | async def consume(data): 47 | ... 48 | 49 | async with app: 50 | produce = asyncio.create_task(producer(app, TOPIC)) 51 | asyncio.create_task(app.consume_forever()) 52 | await asyncio.sleep(1) # wait for some to produce and then be consumed to cause failure 53 | await app.health_check() 54 | produce.cancel() 55 | -------------------------------------------------------------------------------- /tests/acceptance/test_pubsub.py: -------------------------------------------------------------------------------- 1 | from aiokafka import ConsumerRecord 2 | from kafkaesk import Application 3 | from kafkaesk.exceptions import ProducerUnhealthyException 4 | from kafkaesk.kafka import KafkaTopicManager 5 | from unittest.mock import call 6 | from unittest.mock import MagicMock 7 | from unittest.mock import Mock 8 | from unittest.mock import patch 9 | 10 | import aiokafka.structs 11 | import asyncio 12 | import pydantic 13 | import pytest 14 | import uuid 15 | 16 | try: 17 | from unittest.mock import AsyncMock 18 | except: # noqa 19 | AsyncMock = None # type: ignore 20 | 21 | pytestmark = pytest.mark.asyncio 22 | 23 | 24 | async def test_data_binding(app): 25 | consumed = [] 26 | 27 | @app.schema("Foo", streams=["foo.bar"]) 28 | class Foo(pydantic.BaseModel): 29 | bar: str 30 | 31 | @app.subscribe("foo.bar", group="test_group") 32 | async def consume(data: Foo, schema, record, app): 33 | consumed.append((data, schema, record, app)) 34 | 35 | async with app: 36 | await app.publish_and_wait("foo.bar", Foo(bar="1")) 37 | await app.flush() 38 | await app.consume_for(1, seconds=10) 39 | 40 | assert len(consumed) == 1 41 | assert len(consumed[0]) == 4 42 | 43 | 44 | async def test_consume_message(app): 45 | consumed = [] 46 | 47 | @app.schema("Foo", streams=["foo.bar"]) 48 | class Foo(pydantic.BaseModel): 49 | bar: str 50 | 51 | @app.subscribe("foo.bar", group="test_group") 52 | async def consume(data: Foo): 53 | consumed.append(data) 54 | 55 | async with app: 56 | await app.publish_and_wait("foo.bar", Foo(bar="1")) 57 | await app.flush() 58 | await app.consume_for(1, seconds=10) 59 | 60 | assert len(consumed) == 1 61 | 62 | 63 | async def test_consume_many_messages(app): 64 | consumed = [] 65 | 66 | @app.schema("Foo", streams=["foo.bar"]) 67 | class Foo(pydantic.BaseModel): 68 | bar: str 69 | 70 | @app.subscribe("foo.bar", group="test_group") 71 | async def consume(data: Foo): 72 | consumed.append(data) 73 | 74 | async with app: 75 | fut = asyncio.create_task(app.consume_for(10, seconds=10)) 76 | await asyncio.sleep(0.1) 77 | for idx in range(10): 78 | await app.publish("foo.bar", Foo(bar=str(idx))) 79 | await app.flush() 80 | await fut 81 | 82 | assert len(consumed) == 10 83 | 84 | 85 | async def test_slow_messages(app: Application): 86 | consumed = [] 87 | 88 | @app.schema("Slow", streams=["foo.bar"]) 89 | class Slow(pydantic.BaseModel): 90 | latency: float 91 | 92 | @app.subscribe("foo.bar", group="test_group", concurrency=10, timeout_seconds=0.045) 93 | async def consumer(data: Slow, record: aiokafka.ConsumerRecord): 94 | try: 95 | await asyncio.sleep(data.latency) 96 | consumed.append(("ok", data.latency, record.topic)) 97 | except asyncio.CancelledError: 98 | consumed.append(("cancelled", data.latency, record.topic)) 99 | 100 | async with app: 101 | for idx in range(10): 102 | await app.publish("foo.bar", Slow(latency=idx * 0.01)) 103 | await asyncio.sleep(0.01) 104 | await app.flush() 105 | 106 | fut = asyncio.create_task(app.consume_for(num_messages=8, seconds=5)) 107 | await fut 108 | 109 | assert len([x for x in consumed if x[0] == "ok"]) == 5 110 | assert len([x for x in consumed if x[0] == "cancelled"]) == 5 111 | 112 | 113 | async def test_not_consume_message_that_does_not_match(app): 114 | consumed = [] 115 | 116 | @app.schema("Foo", streams=["foo.bar"]) 117 | class Foo(pydantic.BaseModel): 118 | bar: str 119 | 120 | @app.subscribe("foo.bar", group="test_group") 121 | async def consume(data: Foo): 122 | consumed.append(data) 123 | 124 | async with app: 125 | await app.publish("foo.bar1", Foo(bar="1")) 126 | await app.flush() 127 | await app.consume_for(1, seconds=5) 128 | 129 | assert len(consumed) == 0 130 | 131 | 132 | async def test_subscribe_without_group(app): 133 | @app.schema("Foo") 134 | class Foo(pydantic.BaseModel): 135 | bar: str 136 | 137 | with pytest.raises(TypeError): 138 | 139 | @app.subscribe("foo.bar") 140 | async def consume(data: Foo): 141 | ... 142 | 143 | 144 | async def test_multiple_subscribers_different_models(app): 145 | consumed1 = [] 146 | consumed2 = [] 147 | 148 | @app.schema("Foo", version=1, streams=["foo.bar"]) 149 | class Foo1(pydantic.BaseModel): 150 | bar: str 151 | 152 | @app.schema("Foo", version=2) 153 | class Foo2(pydantic.BaseModel): 154 | foo: str 155 | bar: str 156 | 157 | @app.subscribe( 158 | "foo.bar", 159 | group="test_group", 160 | ) 161 | async def consume1(data: Foo1): 162 | consumed1.append(data) 163 | 164 | @app.subscribe( 165 | "foo.bar", 166 | group="test_group_2", 167 | ) 168 | async def consume2(data: Foo2): 169 | consumed2.append(data) 170 | 171 | async with app: 172 | fut = asyncio.create_task(app.consume_for(4, seconds=10)) 173 | await asyncio.sleep(0.2) 174 | 175 | await app.publish("foo.bar", Foo1(bar="1")) 176 | await app.publish("foo.bar", Foo2(foo="2", bar="3")) 177 | await app.flush() 178 | await fut 179 | 180 | assert all([isinstance(v, Foo1) for v in consumed1]) 181 | assert all([isinstance(v, Foo2) for v in consumed2]) 182 | 183 | 184 | async def test_subscribe_diff_data_types(app): 185 | consumed_records = [] 186 | consumed_bytes = [] 187 | 188 | @app.schema("Foo", version=1, streams=["foo.bar"]) 189 | class Foo(pydantic.BaseModel): 190 | bar: str 191 | 192 | @app.subscribe("foo.bar", group="test_group") 193 | async def consume_record(data: ConsumerRecord): 194 | consumed_records.append(data) 195 | 196 | @app.subscribe("foo.bar", group="test_group_2") 197 | async def consume_bytes(data: bytes): 198 | consumed_bytes.append(data) 199 | 200 | async with app: 201 | await app.publish("foo.bar", Foo(bar="1")) 202 | await app.flush() 203 | await app.consume_for(1, seconds=10) 204 | 205 | assert len(consumed_records) == 1 206 | assert len(consumed_bytes) == 1 207 | assert isinstance(consumed_records[0], ConsumerRecord) 208 | assert isinstance(consumed_bytes[0], bytes) 209 | 210 | 211 | async def test_subscribe_to_topic_that_does_not_exist(app): 212 | consumed_records = [] 213 | 214 | @app.schema("Foo", version=1) 215 | class Foo(pydantic.BaseModel): 216 | bar: str 217 | 218 | @app.subscribe("foo.bar", group="test_group") 219 | async def consume_record(data: Foo): 220 | consumed_records.append(data) 221 | 222 | async with app: 223 | for idx in range(10): 224 | await app.publish("foo.bar", Foo(bar=str(idx))) 225 | 226 | await app.flush() 227 | fut = asyncio.create_task(app.consume_for(10, seconds=10)) 228 | await fut 229 | 230 | assert len(consumed_records) == 10 231 | 232 | 233 | async def test_subscribe_to_topic_that_already_has_messages_for_group(app): 234 | consumed_records = [] 235 | 236 | @app.schema("Foo", version=1) 237 | class Foo(pydantic.BaseModel): 238 | bar: str 239 | 240 | @app.subscribe("foo.bar", group="test_group") 241 | async def consume_record(data: Foo): 242 | consumed_records.append(data) 243 | 244 | async with app: 245 | for idx in range(10): 246 | await app.publish("foo.bar", Foo(bar=str(idx))) 247 | await app.flush() 248 | 249 | fut = asyncio.create_task(app.consume_for(20, seconds=10)) 250 | 251 | for idx in range(10): 252 | await app.publish("foo.bar", Foo(bar=str(idx))) 253 | await app.flush() 254 | 255 | await fut 256 | 257 | assert len(consumed_records) == 20 258 | 259 | 260 | async def test_cache_topic_exists_topic_mng(kafka): 261 | mng = KafkaTopicManager( 262 | bootstrap_servers=[f"{kafka[0]}:{kafka[1]}"], 263 | prefix=uuid.uuid4().hex, 264 | ) 265 | 266 | topic_id = mng.get_topic_id("foobar") 267 | assert not await mng.topic_exists(topic_id) 268 | assert topic_id not in mng._topic_cache 269 | 270 | await mng.create_topic(topic_id) 271 | assert await mng.topic_exists(topic_id) 272 | 273 | 274 | async def test_subscription_failure(app): 275 | probe = Mock() 276 | stream_id = "foo-bar-subfailure" 277 | group_id = "test_sub_group_failure" 278 | topic_id = app.topic_mng.get_topic_id(stream_id) 279 | 280 | @app.schema(streams=[stream_id]) 281 | class Foo(pydantic.BaseModel): 282 | bar: str 283 | 284 | @app.subscribe(stream_id, group=group_id) 285 | async def noop_ng(data: Foo): 286 | probe("error", data) 287 | raise Exception("Unhandled Exception") 288 | 289 | async with app: 290 | await app.publish(stream_id, Foo(bar="1")) 291 | await app.publish(stream_id, Foo(bar="1")) 292 | await app.flush() 293 | 294 | # it fails 295 | with pytest.raises(Exception): 296 | await app.consume_for(2, seconds=20) 297 | 298 | # verify we didn't commit 299 | offsets = [ 300 | v 301 | for k, v in (await app.topic_mng.list_consumer_group_offsets(group_id)).items() 302 | if k.topic == topic_id 303 | ] 304 | assert offsets == [] 305 | 306 | # remove wrong consumer 307 | app._subscriptions = [] 308 | 309 | @app.subscribe(stream_id, group=group_id) 310 | async def noop_ok(data: Foo): 311 | probe("ok", data) 312 | 313 | async with app: 314 | await app.publish(stream_id, Foo(bar="2")) 315 | await app.flush() 316 | 317 | await app.consume_for(3, seconds=10) 318 | 319 | await app._subscription_consumers[0]._maybe_commit(forced=True) 320 | 321 | # make sure we that now committed all messages 322 | assert ( 323 | sum( 324 | [ 325 | om.offset 326 | for tp, om in ( 327 | await app.topic_mng.list_consumer_group_offsets(group_id) 328 | ).items() 329 | if tp.topic == topic_id 330 | ] 331 | ) 332 | == 3 333 | ) 334 | 335 | probe.assert_has_calls( 336 | [call("error", Foo(bar="1")), call("ok", Foo(bar="1")), call("ok", Foo(bar="2"))], 337 | any_order=True, 338 | ) 339 | 340 | 341 | async def test_publish_unregistered_schema(app): 342 | probe = Mock() 343 | stream_id = "foo-bar-unregistered" 344 | group_id = "test-sub-unregistered" 345 | 346 | class Foo(pydantic.BaseModel): 347 | bar: str 348 | 349 | @app.subscribe(stream_id, group=group_id) 350 | async def noop(data: Foo): 351 | probe(data) 352 | 353 | async with app: 354 | await app.publish(stream_id, Foo(bar="1")) 355 | await app.publish(stream_id, Foo(bar="2")) 356 | await app.flush() 357 | 358 | await app.consume_for(2, seconds=5) 359 | 360 | probe.assert_has_calls( 361 | [call(Foo(bar="1")), call(Foo(bar="2"))], 362 | any_order=True, 363 | ) 364 | 365 | # 1 failed + 3 ok 366 | assert len(probe.mock_calls) == 2 367 | 368 | 369 | async def test_raw_publish_data(app): 370 | probe = Mock() 371 | stream_id = "foo-bar-raw" 372 | group_id = "test-sub-raw" 373 | 374 | @app.subscribe(stream_id, group=group_id) 375 | async def noop(record: aiokafka.structs.ConsumerRecord): 376 | probe(record.value) 377 | 378 | async with app: 379 | await app.raw_publish(stream_id, b"1") 380 | await app.raw_publish(stream_id, b"2") 381 | await app.flush() 382 | 383 | await app.consume_for(2, seconds=5) 384 | 385 | probe.assert_has_calls( 386 | [call(b"1"), call(b"2")], 387 | any_order=True, 388 | ) 389 | 390 | # 1 failed + 3 ok 391 | assert len(probe.mock_calls) == 2 392 | 393 | 394 | async def test_publish_unhealthy(app): 395 | 396 | async with app: 397 | app._producer = AsyncMock() 398 | app._producer._sender = MagicMock() 399 | app._producer._sender.sender_task.done.return_value = True 400 | with pytest.raises(ProducerUnhealthyException): 401 | await app.raw_publish("foobar", b"foobar") 402 | 403 | 404 | async def test_invalid_event_schema_is_sending_error_metric(app): 405 | side_effect = None 406 | 407 | @app.schema("Foo", streams=["foo.bar"]) 408 | class Foo(pydantic.BaseModel): 409 | bar: str 410 | 411 | class Baz(pydantic.BaseModel): 412 | qux: str 413 | 414 | @app.subscribe("foo.bar", group="test_group") 415 | async def consume(data: Foo): 416 | side_effect = True 417 | 418 | with patch("kafkaesk.consumer.CONSUMED_MESSAGES") as consumed_messages_metric: 419 | async with app: 420 | await app.publish("foo.bar", Baz(qux="1")) 421 | await app.flush() 422 | await app.consume_for(1, seconds=5) 423 | 424 | consumed_messages_metric.labels.assert_called_once() 425 | metric_kwargs = consumed_messages_metric.labels.call_args.kwargs 426 | assert metric_kwargs["error"] == "UnhandledMessage" 427 | consumed_messages_metric.labels(**metric_kwargs).inc.assert_called_once() 428 | 429 | assert side_effect is None 430 | 431 | 432 | async def test_malformed_event_schema_is_sending_error_metric(app): 433 | side_effect = None 434 | 435 | @app.schema("Foo", streams=["foo.bar"]) 436 | class Foo(pydantic.BaseModel): 437 | bar: str 438 | 439 | @app.subscribe("foo.bar", group="test_group") 440 | async def consume(data: Foo): 441 | side_effect = True 442 | 443 | with patch("kafkaesk.consumer.CONSUMED_MESSAGES") as consumed_messages_metric: 444 | async with app: 445 | await app.raw_publish("foo.bar", b"bad string") 446 | await app.flush() 447 | await app.consume_for(1, seconds=5) 448 | 449 | consumed_messages_metric.labels.assert_called_once() 450 | metric_kwargs = consumed_messages_metric.labels.call_args.kwargs 451 | assert metric_kwargs["error"] == "UnhandledMessage" 452 | consumed_messages_metric.labels(**metric_kwargs).inc.assert_called_once() 453 | 454 | assert side_effect is None 455 | -------------------------------------------------------------------------------- /tests/acceptance/test_rebalance.py: -------------------------------------------------------------------------------- 1 | from .produce import Foo 2 | from .produce import producer 3 | from kafkaesk.consumer import BatchConsumer, Subscription 4 | 5 | import asyncio 6 | import kafkaesk 7 | import pytest 8 | 9 | pytestmark = pytest.mark.asyncio 10 | 11 | GROUP = TOPIC = "test-rebalance" 12 | 13 | 14 | async def test_cancel_getone(app): 15 | 16 | app.schema(streams=[TOPIC])(Foo) 17 | 18 | async def handler(*args, **kwargs): 19 | pass 20 | 21 | async with app: 22 | subscription = Subscription( 23 | "test_consumer", 24 | handler, 25 | GROUP, 26 | topics=[TOPIC], 27 | timeout_seconds=1, 28 | ) 29 | consumer = BatchConsumer( 30 | subscription=subscription, 31 | app=app, 32 | ) 33 | await consumer.initialize() 34 | raw_consumer = consumer._consumer 35 | with raw_consumer._subscription.fetch_context(): 36 | try: 37 | await asyncio.wait_for(raw_consumer._fetcher.next_record([]), timeout=0.1) 38 | except asyncio.TimeoutError: 39 | assert len(raw_consumer._fetcher._fetch_waiters) == 0 40 | await raw_consumer.stop() 41 | 42 | 43 | async def test_many_consumers_rebalancing(kafka, topic_prefix): 44 | apps = [] 45 | for idx in range(5): 46 | app = kafkaesk.Application( 47 | [f"{kafka[0]}:{kafka[1]}"], 48 | topic_prefix=topic_prefix, 49 | ) 50 | app.schema(streams=[TOPIC])(Foo) 51 | app.id = idx 52 | 53 | @app.subscribe(TOPIC, group=GROUP) 54 | async def consumer(ob: Foo, record, app): 55 | ... 56 | 57 | await app.initialize() 58 | apps.append(app) 59 | 60 | produce = asyncio.create_task(producer(apps[0], TOPIC)) 61 | 62 | consumer_tasks = [] 63 | for app in apps: 64 | consumer_tasks.append(asyncio.create_task(app.consume_forever())) 65 | 66 | await asyncio.sleep(5) 67 | 68 | # cycle through each, destroying... 69 | for idx in range(5): 70 | await apps[idx].stop() 71 | await asyncio.sleep(1) 72 | assert consumer_tasks[idx].done() 73 | 74 | # start again 75 | consumer_tasks[idx] = asyncio.create_task(apps[idx].consume_forever()) 76 | 77 | produce.cancel() 78 | 79 | for idx in range(5): 80 | await apps[idx].stop() 81 | 82 | 83 | async def test_consume_every_message_once_during_rebalance(kafka, topic_prefix): 84 | """ 85 | No matter what, even without reassignment, some messages 86 | seem to be relayed. You can see if when a single consumer and no rebalance 87 | sometimes. 88 | """ 89 | consumed = {} 90 | 91 | def record_msg(record): 92 | key = f"{record.partition}-{record.offset}" 93 | if key not in consumed: 94 | consumed[key] = 0 95 | consumed[key] += 1 96 | 97 | apps = [] 98 | for idx in range(5): 99 | app = kafkaesk.Application( 100 | [f"{kafka[0]}:{kafka[1]}"], 101 | topic_prefix=topic_prefix, 102 | kafka_settings={"auto_commit_interval_ms": 10} 103 | ) 104 | app.schema(streams=[TOPIC])(Foo) 105 | app.id = idx 106 | 107 | @app.subscribe(TOPIC, group=GROUP) 108 | async def consumer(ob: Foo, record, app): 109 | record_msg(record) 110 | 111 | await app.initialize() 112 | apps.append(app) 113 | 114 | consumer_tasks = [] 115 | for app in apps: 116 | consumer_tasks.append(asyncio.create_task(app.consume_forever())) 117 | 118 | await asyncio.sleep(1) 119 | produce = asyncio.create_task(producer(apps[0], TOPIC)) 120 | await asyncio.sleep(5) 121 | 122 | # cycle through each, destroying... 123 | for idx in range(5): 124 | await apps[idx].stop() 125 | await asyncio.sleep(1) 126 | assert consumer_tasks[idx].done() 127 | # start again 128 | consumer_tasks[idx] = asyncio.create_task(apps[idx].consume_forever()) 129 | 130 | produce.cancel() 131 | 132 | for idx in range(5): 133 | await apps[idx].stop() 134 | 135 | assert len(consumed) > 100 136 | 137 | # now check that we always consumed a message only once 138 | for v in consumed.values(): 139 | assert v == 1 140 | -------------------------------------------------------------------------------- /tests/acceptance/test_run.py: -------------------------------------------------------------------------------- 1 | from .produce import Foo 2 | from .produce import producer 3 | from kafkaesk import Application 4 | 5 | import asyncio 6 | import pytest 7 | import signal 8 | 9 | TOPIC = "test-run" 10 | GROUP = "test-run2" 11 | 12 | pytestmark = pytest.mark.asyncio 13 | 14 | test_app = Application() 15 | 16 | test_app.schema(streams=[TOPIC])(Foo) 17 | 18 | 19 | @test_app.subscribe(TOPIC, group=GROUP) 20 | async def _consumer(ob: Foo, record, app): 21 | ... 22 | 23 | 24 | async def test_run_exits_cleanly_while_consuming(kafka, topic_prefix): 25 | kserver = f"{kafka[0]}:{kafka[1]}" 26 | app = Application([kserver], topic_prefix=topic_prefix) 27 | async with app: 28 | pro = asyncio.create_task(producer(app, TOPIC)) 29 | 30 | proc = await asyncio.create_subprocess_exec( 31 | "kafkaesk", 32 | "tests.acceptance.test_run:test_app", 33 | "--kafka-servers", 34 | kserver, 35 | "--topic-prefix", 36 | topic_prefix, 37 | # cwd=_test_dir, 38 | ) 39 | 40 | await asyncio.sleep(5) 41 | pro.cancel() 42 | 43 | proc.send_signal(signal.SIGINT) 44 | await proc.wait() 45 | 46 | assert proc.returncode == 0 47 | 48 | results = await app.topic_mng.list_consumer_group_offsets(GROUP) 49 | topic_id = app.topic_mng.get_topic_id(TOPIC) 50 | count = 0 51 | for tp, pos in results.items(): 52 | if tp.topic != topic_id: 53 | continue 54 | count += pos.offset 55 | assert count > 0 56 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | pytest_plugins = ["tests.fixtures"] 2 | -------------------------------------------------------------------------------- /tests/fixtures.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import DEFAULT 2 | from unittest.mock import patch 3 | 4 | import kafkaesk 5 | import os 6 | import pytest_asyncio 7 | import uuid 8 | 9 | 10 | @pytest_asyncio.fixture() 11 | async def kafka(): 12 | yield os.environ.get("KAFKA", "localhost:9092").split(":") 13 | 14 | 15 | @pytest_asyncio.fixture() 16 | def topic_prefix(): 17 | return uuid.uuid4().hex 18 | 19 | 20 | @pytest_asyncio.fixture() 21 | async def app(kafka, topic_prefix): 22 | yield kafkaesk.Application( 23 | [f"{kafka[0]}:{kafka[1]}"], 24 | topic_prefix=topic_prefix, 25 | kafka_settings={ 26 | "metadata_max_age_ms": 500, 27 | }, 28 | ) 29 | 30 | 31 | @pytest_asyncio.fixture() 32 | def metrics(): 33 | with patch.multiple( 34 | "kafkaesk.app", 35 | PUBLISHED_MESSAGES=DEFAULT, 36 | PRODUCER_TOPIC_OFFSET=DEFAULT, 37 | PUBLISHED_MESSAGES_TIME=DEFAULT, 38 | ) as mock: 39 | yield mock 40 | -------------------------------------------------------------------------------- /tests/unit/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onna/kafkaesk/10e88fd921fddff70b8cb973e739e280caa4cac8/tests/unit/__init__.py -------------------------------------------------------------------------------- /tests/unit/ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onna/kafkaesk/10e88fd921fddff70b8cb973e739e280caa4cac8/tests/unit/ext/__init__.py -------------------------------------------------------------------------------- /tests/unit/ext/logging/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onna/kafkaesk/10e88fd921fddff70b8cb973e739e280caa4cac8/tests/unit/ext/logging/__init__.py -------------------------------------------------------------------------------- /tests/unit/ext/logging/test_handler.py: -------------------------------------------------------------------------------- 1 | from kafkaesk.app import Application 2 | from kafkaesk.ext.logging import handler 3 | from unittest.mock import MagicMock 4 | from unittest.mock import patch 5 | 6 | import pytest 7 | 8 | pytestmark = pytest.mark.asyncio 9 | 10 | 11 | def test_close_log_handler(): 12 | mock = MagicMock() 13 | h = handler.PydanticKafkaeskHandler(MagicMock(), "stream", mock) 14 | h.close() 15 | mock.close.assert_called_once() 16 | 17 | 18 | def test_format_log_exc(): 19 | h = handler.PydanticKafkaeskHandler(MagicMock(), "stream", MagicMock()) 20 | record = MagicMock() 21 | record.exc_text = None 22 | record.exc_info = (Exception(), None, None) 23 | 24 | data = h._format_base_log(record) 25 | assert data["stack"] 26 | 27 | 28 | def test_swallows_schema_conflict(): 29 | app = Application() 30 | handler.PydanticKafkaeskHandler(app, "stream", MagicMock()) 31 | handler.PydanticKafkaeskHandler(app, "stream", MagicMock()) 32 | 33 | 34 | def test_get_k8s_ns(): 35 | 36 | with patch("kafkaesk.ext.logging.handler._K8S_NS", handler._not_set), patch( 37 | "kafkaesk.ext.logging.handler.os.path.exists", return_value=True 38 | ), patch("kafkaesk.ext.logging.handler.open") as open_file: 39 | fi = MagicMock() 40 | cm = MagicMock() 41 | cm.__enter__.return_value = fi 42 | open_file.return_value = cm 43 | fi.read.return_value = "foobar\n" 44 | assert handler.get_k8s_ns() == "foobar" 45 | 46 | 47 | class TestQueue: 48 | def test_not_running(self): 49 | qq = handler.KafkaeskQueue(MagicMock()) 50 | assert not qq.running 51 | 52 | def test_not_running_task_done(self): 53 | qq = handler.KafkaeskQueue(MagicMock()) 54 | qq._task = MagicMock() 55 | qq._task.done.return_value = True 56 | assert not qq.running 57 | 58 | def test_running(self): 59 | qq = handler.KafkaeskQueue(MagicMock()) 60 | qq._task = MagicMock() 61 | qq._task.done.return_value = False 62 | assert qq.running 63 | 64 | async def test_runtime_error_not_running(self): 65 | qq = handler.KafkaeskQueue(MagicMock()) 66 | with pytest.raises(RuntimeError): 67 | assert await qq._run() 68 | -------------------------------------------------------------------------------- /tests/unit/test_app.py: -------------------------------------------------------------------------------- 1 | from asyncio.futures import Future 2 | from kafkaesk.app import Application 3 | from kafkaesk.app import published_callback 4 | from kafkaesk.app import run 5 | from kafkaesk.app import run_app 6 | from kafkaesk.app import SchemaRegistration 7 | from jaeger_client import Config, Tracer 8 | from opentracing.scope_managers.contextvars import ContextVarsScopeManager 9 | from tests.utils import record_factory 10 | from unittest.mock import ANY 11 | from unittest.mock import AsyncMock 12 | from unittest.mock import MagicMock 13 | from unittest.mock import Mock 14 | from unittest.mock import patch 15 | 16 | import asyncio 17 | import json 18 | import kafkaesk 19 | import kafkaesk.exceptions 20 | import opentracing 21 | import pydantic 22 | import pytest 23 | import time 24 | 25 | pytestmark = pytest.mark.asyncio 26 | 27 | 28 | class TestApplication: 29 | async def test_app_events(self): 30 | app = Application() 31 | 32 | async def on_finalize(): 33 | pass 34 | 35 | app.on("finalize", on_finalize) 36 | assert len(app._event_handlers["finalize"]) == 1 37 | 38 | async def test_app_finalize_event(self): 39 | app = Application() 40 | 41 | class CallTracker: 42 | def __init__(self): 43 | self.called = False 44 | 45 | async def on_finalize(self): 46 | self.called = True 47 | 48 | tracker = CallTracker() 49 | app.on("finalize", tracker.on_finalize) 50 | await app.finalize() 51 | 52 | assert tracker.called is True 53 | 54 | def test_publish_callback(self, metrics): 55 | fut = Future() 56 | fut.set_result(record_factory()) 57 | published_callback("topic", time.time() - 1, fut) 58 | 59 | metrics["PUBLISHED_MESSAGES"].labels.assert_called_with( 60 | stream_id="topic", partition=0, error="none" 61 | ) 62 | metrics["PUBLISHED_MESSAGES"].labels().inc() 63 | 64 | metrics["PRODUCER_TOPIC_OFFSET"].labels.assert_called_with(stream_id="topic", partition=0) 65 | metrics["PRODUCER_TOPIC_OFFSET"].labels().set.assert_called_with(0) 66 | 67 | metrics["PUBLISHED_MESSAGES_TIME"].labels.assert_called_with(stream_id="topic") 68 | assert metrics["PUBLISHED_MESSAGES_TIME"].labels().observe.mock_calls[0].args[ 69 | 0 70 | ] == pytest.approx(1, 0.1) 71 | 72 | def test_publish_callback_exc(self, metrics): 73 | fut = Future() 74 | fut.set_exception(Exception()) 75 | published_callback("topic", time.time(), fut) 76 | 77 | metrics["PUBLISHED_MESSAGES"].labels.assert_called_with( 78 | stream_id="topic", partition=-1, error="Exception" 79 | ) 80 | metrics["PUBLISHED_MESSAGES"].labels().inc() 81 | 82 | def test_mount_router(self): 83 | app = Application() 84 | 85 | router = kafkaesk.Router() 86 | 87 | @router.schema("Foo", streams=["foo.bar"]) 88 | class Foo(pydantic.BaseModel): 89 | bar: str 90 | 91 | @router.subscribe("foo.bar", group="test_group") 92 | async def consume(data: Foo, schema, record): 93 | ... 94 | 95 | app.mount(router) 96 | 97 | assert app.subscriptions == router.subscriptions 98 | assert app.schemas == router.schemas 99 | assert app.event_handlers == router.event_handlers 100 | 101 | async def test_consumer_health_check(self): 102 | app = kafkaesk.Application() 103 | subscription_consumer = AsyncMock() 104 | app._subscription_consumers.append(subscription_consumer) 105 | subscription_consumer.consumer._client.ready.return_value = True 106 | await app.health_check() 107 | 108 | async def test_consumer_health_check_raises_exception(self): 109 | app = kafkaesk.Application() 110 | subscription = kafkaesk.Subscription( 111 | "test_consumer", lambda record: 1, "group", topics=["foo"] 112 | ) 113 | 114 | subscription_consumer = kafkaesk.BatchConsumer( 115 | subscription=subscription, 116 | app=app, 117 | ) 118 | app._subscription_consumers.append(subscription_consumer) 119 | subscription_consumer._consumer = AsyncMock() 120 | subscription_consumer._consumer._client.ready.return_value = False 121 | with pytest.raises(kafkaesk.exceptions.ConsumerUnhealthyException): 122 | await app.health_check() 123 | 124 | async def test_consumer_health_check_producer_healthy(self): 125 | app = kafkaesk.Application() 126 | app._producer = MagicMock() 127 | app._producer._sender.sender_task.done.return_value = False 128 | await app.health_check() 129 | 130 | async def test_consumer_health_check_producer_unhealthy(self): 131 | app = kafkaesk.Application() 132 | app._producer = MagicMock() 133 | app._producer._sender.sender_task.done.return_value = True 134 | with pytest.raises(kafkaesk.exceptions.ProducerUnhealthyException): 135 | await app.health_check() 136 | 137 | async def test_configure_kafka_producer(self): 138 | app = kafkaesk.Application( 139 | kafka_settings={ 140 | "metadata_max_age_ms": 100, 141 | "max_batch_size": 100, 142 | # invalid for producer so should not be applied here 143 | "max_partition_fetch_bytes": 100, 144 | } 145 | ) 146 | # verify it is created correctly 147 | app.producer_factory() 148 | 149 | # now, validate the wiring 150 | with patch("kafkaesk.app.aiokafka.AIOKafkaProducer") as mock: 151 | app.producer_factory() 152 | mock.assert_called_with( 153 | bootstrap_servers=None, 154 | loop=ANY, 155 | api_version="auto", 156 | metadata_max_age_ms=100, 157 | max_batch_size=100, 158 | ) 159 | 160 | async def test_configure_kafka_consumer(self): 161 | app = kafkaesk.Application( 162 | kafka_settings={ 163 | "max_partition_fetch_bytes": 100, 164 | "fetch_max_wait_ms": 100, 165 | "metadata_max_age_ms": 100, 166 | # invalid for consumer so should not be applied here 167 | "max_batch_size": 100, 168 | } 169 | ) 170 | # verify it is created correctly 171 | app.consumer_factory(group_id="foobar") 172 | 173 | # now, validate the wiring 174 | with patch("kafkaesk.app.aiokafka.AIOKafkaConsumer") as mock: 175 | app.consumer_factory(group_id="foobar") 176 | mock.assert_called_with( 177 | bootstrap_servers=None, 178 | loop=ANY, 179 | group_id="foobar", 180 | api_version="auto", 181 | auto_offset_reset="earliest", 182 | enable_auto_commit=False, 183 | max_partition_fetch_bytes=100, 184 | fetch_max_wait_ms=100, 185 | metadata_max_age_ms=100, 186 | ) 187 | 188 | def test_configure(self): 189 | app = kafkaesk.Application() 190 | app.configure( 191 | kafka_servers=["kafka_servers"], 192 | topic_prefix="topic_prefix", 193 | kafka_settings={"kafka_settings": "kafka_settings"}, 194 | api_version="api_version", 195 | replication_factor="replication_factor", 196 | ) 197 | assert app._kafka_servers == ["kafka_servers"] 198 | assert app._topic_prefix == "topic_prefix" 199 | assert app._kafka_settings == {"kafka_settings": "kafka_settings"} 200 | assert app._kafka_api_version == "api_version" 201 | assert app._replication_factor == "replication_factor" 202 | 203 | # now make sure none values do not overwrite 204 | app.configure( 205 | kafka_servers=None, 206 | topic_prefix=None, 207 | kafka_settings=None, 208 | api_version=None, 209 | replication_factor=None, 210 | ) 211 | assert app._kafka_servers == ["kafka_servers"] 212 | assert app._topic_prefix == "topic_prefix" 213 | assert app._kafka_settings == {"kafka_settings": "kafka_settings"} 214 | assert app._kafka_api_version == "api_version" 215 | assert app._replication_factor == "replication_factor" 216 | 217 | async def test_initialize_with_unconfigured_app_raises_exception(self): 218 | app = kafkaesk.Application() 219 | with pytest.raises(kafkaesk.exceptions.AppNotConfiguredException): 220 | await app.initialize() 221 | 222 | async def test_publish_propagates_headers(self): 223 | app = kafkaesk.Application(kafka_servers=["foo"]) 224 | 225 | class Foo(pydantic.BaseModel): 226 | bar: str 227 | 228 | producer = AsyncMock() 229 | producer.send.return_value = fut = asyncio.Future() 230 | fut.set_result("ok") 231 | app._get_producer = AsyncMock(return_value=producer) 232 | app._topic_mng = MagicMock() 233 | app._topic_mng.get_topic_id.return_value = "foobar" 234 | app._topic_mng.topic_exists = AsyncMock(return_value=True) 235 | 236 | future = await app.publish("foobar", Foo(bar="foo"), headers=[("foo", b"bar")]) 237 | _ = await future 238 | 239 | producer.send.assert_called_with( 240 | "foobar", 241 | value=b'{"schema":"Foo:1","data":{"bar":"foo"}}', 242 | key=None, 243 | headers=[("foo", b"bar")], 244 | ) 245 | 246 | async def test_publish_configured_retention_policy(self): 247 | app = kafkaesk.Application(kafka_servers=["foo"]) 248 | 249 | @app.schema(retention=100) 250 | class Foo(pydantic.BaseModel): 251 | bar: str 252 | 253 | producer = AsyncMock() 254 | producer.send.return_value = fut = asyncio.Future() 255 | fut.set_result("ok") 256 | app._get_producer = AsyncMock(return_value=producer) 257 | app._topic_mng = MagicMock() 258 | app._topic_mng.get_topic_id.return_value = "foobar" 259 | app._topic_mng.topic_exists = AsyncMock(return_value=False) 260 | app._topic_mng.create_topic = AsyncMock() 261 | 262 | future = await app.publish("foobar", Foo(bar="foo"), headers=[("foo", b"bar")]) 263 | await future 264 | app._topic_mng.create_topic.assert_called_with( 265 | "foobar", replication_factor=None, retention_ms=100 * 1000 266 | ) 267 | 268 | async def test_publish_injects_tracing(self): 269 | app = kafkaesk.Application(kafka_servers=["foo"]) 270 | producer = AsyncMock() 271 | producer.send.return_value = fut = asyncio.Future() 272 | fut.set_result("ok") 273 | app._get_producer = AsyncMock(return_value=producer) 274 | config = Config( 275 | config={"sampler": {"type": "const", "param": 1}, "logging": True, "propagation": "b3"}, 276 | service_name="test_service", 277 | scope_manager=ContextVarsScopeManager(), 278 | ) 279 | # this call also sets opentracing.tracer 280 | tracer = config.initialize_tracer() 281 | 282 | span = tracer.start_span(operation_name="dummy") 283 | tracer.scope_manager.activate(span, True) 284 | 285 | future = await app.raw_publish("foobar", b"foobar") 286 | await future 287 | 288 | headers = producer.mock_calls[0].kwargs["headers"] 289 | assert str(span).startswith(headers[0][1].decode()) 290 | 291 | 292 | class TestSchemaRegistration: 293 | def test_schema_registration_repr(self): 294 | reg = SchemaRegistration(id="id", version=1, model=None) 295 | assert repr(reg) == "" 296 | 297 | 298 | test_app = Application() 299 | 300 | 301 | def app_callable(): 302 | return test_app 303 | 304 | 305 | class TestRun: 306 | def test_run(self): 307 | rapp = AsyncMock() 308 | with patch("kafkaesk.app.run_app", rapp), patch("kafkaesk.app.cli_parser") as cli_parser: 309 | args = Mock() 310 | args.app = "tests.unit.test_app:test_app" 311 | args.kafka_servers = "foo,bar" 312 | args.kafka_settings = json.dumps({"foo": "bar"}) 313 | args.topic_prefix = "prefix" 314 | args.api_version = "api_version" 315 | cli_parser.parse_args.return_value = args 316 | 317 | run() 318 | 319 | rapp.assert_called_once() 320 | assert test_app._kafka_servers == ["foo", "bar"] 321 | assert test_app._kafka_settings == {"foo": "bar"} 322 | assert test_app._topic_prefix == "prefix" 323 | assert test_app._kafka_api_version == "api_version" 324 | 325 | def test_run_callable(self): 326 | rapp = AsyncMock() 327 | with patch("kafkaesk.app.run_app", rapp), patch("kafkaesk.app.cli_parser") as cli_parser: 328 | args = Mock() 329 | args.app = "tests.unit.test_app:app_callable" 330 | args.kafka_settings = None 331 | cli_parser.parse_args.return_value = args 332 | 333 | run() 334 | 335 | rapp.assert_called_once() 336 | 337 | async def test_run_app(self): 338 | app_mock = AsyncMock() 339 | app_mock.consume_forever.return_value = (set(), set()) 340 | loop = MagicMock() 341 | with patch("kafkaesk.app.asyncio.get_event_loop", return_value=loop): 342 | await run_app(app_mock) 343 | app_mock.consume_forever.assert_called_once() 344 | assert len(loop.add_signal_handler.mock_calls) == 2 345 | -------------------------------------------------------------------------------- /tests/unit/test_consumer.py: -------------------------------------------------------------------------------- 1 | from kafkaesk import Application 2 | from kafkaesk import Subscription 3 | from kafkaesk.consumer import build_handler 4 | from kafkaesk.consumer import BatchConsumer, Subscription 5 | from kafkaesk.exceptions import ConsumerUnhealthyException 6 | from kafkaesk.exceptions import StopConsumer 7 | from kafkaesk.exceptions import UnhandledMessage 8 | from tests.utils import record_factory 9 | from unittest.mock import AsyncMock 10 | from unittest.mock import MagicMock 11 | from unittest.mock import Mock 12 | from unittest.mock import patch 13 | 14 | import aiokafka.errors 15 | import asyncio 16 | import opentracing 17 | import pydantic 18 | import pytest 19 | import pytest_asyncio 20 | import time 21 | import json 22 | 23 | pytestmark = pytest.mark.asyncio 24 | 25 | 26 | @pytest_asyncio.fixture() 27 | def subscription_conf(): 28 | subscription = Subscription( 29 | "foo", 30 | lambda record: 1, 31 | "group", 32 | topics=["foo"], 33 | timeout_seconds=1, 34 | ) 35 | yield subscription 36 | 37 | 38 | @pytest_asyncio.fixture() 39 | def subscription(subscription_conf): 40 | yield BatchConsumer( 41 | subscription=subscription_conf, 42 | app=Application(kafka_servers=["foobar"]), 43 | ) 44 | 45 | 46 | def test_subscription_repr(): 47 | sub = Subscription("stream_id", lambda x: None, "group") 48 | assert repr(sub) == "" 49 | 50 | 51 | class TestMessageHandler: 52 | def factory(self, func): 53 | return build_handler(func, app=MagicMock(), consumer=None) 54 | 55 | async def test_message_handler(self): 56 | side_effect = None 57 | 58 | async def raw_func(data): 59 | nonlocal side_effect 60 | assert isinstance(data, dict) 61 | side_effect = True 62 | 63 | handler = self.factory(raw_func) 64 | await handler(record_factory(), None) 65 | assert side_effect is True 66 | 67 | async def test_message_handler_map_types(self): 68 | class Foo(pydantic.BaseModel): 69 | foo: str 70 | 71 | async def handle_func(ob: Foo, schema, record, app, span: opentracing.Span): 72 | assert ob.foo == "bar" 73 | assert schema == "Foo:1" 74 | assert record is not None 75 | assert app is not None 76 | assert span is not None 77 | 78 | handler = self.factory(handle_func) 79 | await handler(record_factory(), MagicMock()) 80 | 81 | async def test_malformed_message(self): 82 | class Foo(pydantic.BaseModel): 83 | foo: str 84 | 85 | side_effect = None 86 | 87 | async def func(ob: Foo): 88 | nonlocal side_effect 89 | side_effect = True 90 | 91 | record = aiokafka.structs.ConsumerRecord( 92 | topic="topic", 93 | partition=0, 94 | offset=0, 95 | timestamp=time.time() * 1000, 96 | timestamp_type=1, 97 | key="key", 98 | value=json.dumps({"schema": "Foo:1", "data": "bad format"}).encode(), 99 | checksum="1", 100 | serialized_key_size=10, 101 | serialized_value_size=10, 102 | headers=[], 103 | ) 104 | 105 | handler = self.factory(func) 106 | with pytest.raises(UnhandledMessage): 107 | await handler(record, None) 108 | 109 | assert side_effect is None 110 | 111 | 112 | class TestSubscriptionConsumer: 113 | async def test_healthy(self, subscription): 114 | subscription._consumer = MagicMock() 115 | subscription._running = True 116 | subscription._consumer._coordinator.coordinator_id = "coordinator_id" 117 | subscription._consumer._client.ready = AsyncMock(return_value=True) 118 | assert await subscription.healthy() is None 119 | subscription._consumer._client.ready.assert_called_with("coordinator_id") 120 | 121 | async def test_unhealthy(self, subscription): 122 | subscription._consumer = MagicMock() 123 | subscription._running = True 124 | subscription._consumer._client.ready = AsyncMock(return_value=False) 125 | with pytest.raises(ConsumerUnhealthyException): 126 | assert await subscription.healthy() 127 | 128 | subscription._consumer = MagicMock() 129 | subscription._running = False 130 | with pytest.raises(ConsumerUnhealthyException): 131 | assert await subscription.healthy() 132 | 133 | async def test_emit(self, subscription_conf): 134 | probe = AsyncMock() 135 | 136 | sub = BatchConsumer( 137 | subscription=subscription_conf, 138 | app=Application(kafka_servers=["foobar"]), 139 | event_handlers={"event": [probe]}, 140 | ) 141 | await sub.emit("event", "foo", "bar") 142 | probe.assert_called_with("foo", "bar") 143 | 144 | async def test_emit_raises_stop(self, subscription_conf): 145 | sub = BatchConsumer( 146 | subscription=subscription_conf, 147 | app=Application(kafka_servers=["foobar"]), 148 | event_handlers={"event": [AsyncMock(side_effect=StopConsumer)]}, 149 | ) 150 | 151 | with pytest.raises(StopConsumer): 152 | await sub.emit("event", "foo", "bar") 153 | 154 | async def test_emit_swallow_ex(self, subscription_conf): 155 | sub = BatchConsumer( 156 | subscription=subscription_conf, 157 | app=Application(kafka_servers=["foobar"]), 158 | event_handlers={"event": [AsyncMock(side_effect=Exception)]}, 159 | ) 160 | 161 | await sub.emit("event", "foo", "bar") 162 | 163 | async def test_retries_on_connection_failure(self, subscription): 164 | run_mock = AsyncMock() 165 | sleep = AsyncMock() 166 | run_mock.side_effect = [aiokafka.errors.KafkaConnectionError, StopConsumer] 167 | subscription._consumer = MagicMock() 168 | with patch.object(subscription, "initialize", AsyncMock()), patch.object( 169 | subscription, "finalize", AsyncMock() 170 | ), patch.object(subscription, "_consume", run_mock), patch( 171 | "kafkaesk.consumer.asyncio.sleep", sleep 172 | ): 173 | await subscription() 174 | sleep.assert_called_once() 175 | assert len(run_mock.mock_calls) == 2 176 | 177 | async def test_finalize_handles_exceptions(self, subscription): 178 | consumer = AsyncMock() 179 | consumer.stop.side_effect = Exception 180 | consumer.commit.side_effect = Exception 181 | 182 | subscription._consumer = consumer 183 | await subscription.finalize() 184 | 185 | consumer.stop.assert_called_once() 186 | 187 | async def test_run_exits_when_fut_closed_fut(self, subscription): 188 | sub = subscription 189 | consumer = AsyncMock() 190 | consumer.getmany.return_value = {"": [record_factory() for _ in range(10)]} 191 | sub._consumer = consumer 192 | sub._running = True 193 | 194 | async def _handle_message(record): 195 | await asyncio.sleep(0.03) 196 | 197 | with patch.object(sub, "_handler", _handle_message): 198 | task = asyncio.create_task(sub._consume()) 199 | await asyncio.sleep(0.01) 200 | stop_task = asyncio.create_task(sub.stop()) 201 | await asyncio.sleep(0.01) 202 | sub._close.set_result(None) 203 | 204 | await asyncio.wait([stop_task, task]) 205 | 206 | async def test_auto_commit_can_be_disabled(self, subscription_conf): 207 | sub = BatchConsumer( 208 | subscription=subscription_conf, 209 | app=Application(kafka_servers=["foobar"]), 210 | auto_commit=False, 211 | ) 212 | await sub._maybe_commit() 213 | assert sub._last_commit == 0 214 | -------------------------------------------------------------------------------- /tests/unit/test_exceptions.py: -------------------------------------------------------------------------------- 1 | from kafkaesk.app import SchemaRegistration 2 | from kafkaesk.exceptions import SchemaConflictException 3 | 4 | 5 | def test_repr_conflict(): 6 | ex = SchemaConflictException( 7 | SchemaRegistration("id", 1, None), SchemaRegistration("id", 1, None) 8 | ) 9 | assert "Schema Conflict" in str(ex) 10 | -------------------------------------------------------------------------------- /tests/unit/test_kafka.py: -------------------------------------------------------------------------------- 1 | from kafkaesk.kafka import KafkaTopicManager 2 | from unittest.mock import patch 3 | 4 | import kafka.errors 5 | import pytest 6 | 7 | pytestmark = pytest.mark.asyncio 8 | 9 | 10 | async def test_create_topic_uses_replication_factor_from_servers(): 11 | mng = KafkaTopicManager(["foo", "bar"]) 12 | with patch("kafka.admin.client.KafkaAdminClient"): 13 | await mng.create_topic("Foobar") 14 | client = await mng.get_admin_client() 15 | assert client.create_topics.called 16 | assert client.create_topics.call_args[0][0][0].replication_factor == 2 17 | 18 | 19 | async def test_create_topic_uses_replication_factor_from_servers_min_3(): 20 | mng = KafkaTopicManager(["foo", "bar", "foo2", "foo3", "foo4"]) 21 | with patch("kafka.admin.client.KafkaAdminClient"): 22 | await mng.create_topic("Foobar") 23 | client = await mng.get_admin_client() 24 | assert client.create_topics.called 25 | assert client.create_topics.call_args[0][0][0].replication_factor == 3 26 | 27 | 28 | async def test_create_topic_uses_replication_factor(): 29 | mng = KafkaTopicManager(["foo", "bar"], replication_factor=1) 30 | with patch("kafka.admin.client.KafkaAdminClient"): 31 | await mng.create_topic("Foobar", retention_ms=100) 32 | client = await mng.get_admin_client() 33 | assert client.create_topics.called 34 | assert client.create_topics.call_args[0][0][0].replication_factor == 1 35 | assert client.create_topics.call_args[0][0][0].topic_configs["retention.ms"] == 100 36 | 37 | 38 | async def test_create_topic_already_exists(): 39 | mng = KafkaTopicManager(["foo", "bar"], replication_factor=1) 40 | with patch("kafka.admin.client.KafkaAdminClient"): 41 | client = await mng.get_admin_client() 42 | client.create_topics.side_effect = kafka.errors.TopicAlreadyExistsError 43 | await mng.create_topic("Foobar") 44 | client.create_topics.assert_called_once() 45 | 46 | 47 | def test_constructor_translates_api_version(): 48 | mng = KafkaTopicManager(["foobar"], kafka_api_version="auto") 49 | assert mng.kafka_api_version is None 50 | 51 | mng = KafkaTopicManager(["foobar"], kafka_api_version="2.4.0") 52 | assert mng.kafka_api_version == (2, 4, 0) 53 | -------------------------------------------------------------------------------- /tests/unit/test_metrics.py: -------------------------------------------------------------------------------- 1 | from aiokafka.structs import OffsetAndMetadata 2 | from aiokafka.structs import TopicPartition 3 | from kafkaesk.app import Application 4 | from kafkaesk.consumer import BatchConsumer, Subscription 5 | from tests.utils import record_factory 6 | from unittest.mock import AsyncMock 7 | from unittest.mock import MagicMock 8 | from unittest.mock import patch 9 | 10 | import asyncio 11 | import pytest 12 | 13 | pytestmark = pytest.mark.asyncio 14 | 15 | 16 | async def test_record_metric_on_rebalance(): 17 | async def coro(*arg, **kwargs): 18 | pass 19 | 20 | with patch("kafkaesk.consumer.CONSUMER_REBALANCED") as rebalance_metric: 21 | app_mock = AsyncMock() 22 | app_mock.topic_mng.list_consumer_group_offsets.return_value = { 23 | TopicPartition(topic="foobar", partition=0): OffsetAndMetadata(offset=0, metadata={}) 24 | } 25 | 26 | subscription = Subscription( 27 | "test_consumer", 28 | coro, 29 | "group", 30 | topics=["stream.foo"], 31 | ) 32 | 33 | rebalance_listener = BatchConsumer( 34 | subscription=subscription, 35 | app=app_mock, 36 | ) 37 | rebalance_listener._consumer = AsyncMock() 38 | 39 | await rebalance_listener.on_partitions_assigned( 40 | [TopicPartition(topic="foobar", partition=0)] 41 | ) 42 | rebalance_metric.labels.assert_called_with( 43 | partition=0, 44 | group_id="group", 45 | event="assigned", 46 | ) 47 | rebalance_metric.labels().inc.assert_called_once() 48 | 49 | 50 | async def test_record_metric_on_publish(): 51 | """ 52 | this test is acting funny on github action... 53 | """ 54 | with patch("kafkaesk.app.PUBLISHED_MESSAGES") as published_metric, patch( 55 | "kafkaesk.app.PUBLISHED_MESSAGES_TIME" 56 | ) as published_metric_time, patch("kafkaesk.metrics.PUBLISH_MESSAGES") as publish_metric, patch( 57 | "kafkaesk.metrics.PUBLISH_MESSAGES_TIME" 58 | ) as publish_metric_time: 59 | app = Application() 60 | 61 | async def _fake_publish(*args, **kwargs): 62 | async def _publish(): 63 | return record_factory() 64 | 65 | return asyncio.create_task(_publish()) 66 | 67 | producer = AsyncMock() 68 | producer.send.side_effect = _fake_publish 69 | app._get_producer = AsyncMock(return_value=producer) 70 | app._topic_mng = MagicMock() 71 | app._topic_mng.get_topic_id.return_value = "foobar" 72 | 73 | await (await app.raw_publish("foo", b"data")) 74 | 75 | published_metric.labels.assert_called_with(stream_id="foobar", partition=0, error="none") 76 | published_metric.labels( 77 | stream_id="foobar", partition=0, error="none" 78 | ).inc.assert_called_once() 79 | published_metric_time.labels.assert_called_with(stream_id="foobar") 80 | published_metric_time.labels(stream_id="foobar").observe.assert_called_once() 81 | 82 | publish_metric.labels.assert_called_with(stream_id="foobar", error="none") 83 | publish_metric.labels(stream_id="foobar", error="none").inc.assert_called_once() 84 | publish_metric_time.labels.assert_called_with(stream_id="foobar") 85 | publish_metric_time.labels(stream_id="foobar").observe.assert_called_once() 86 | 87 | 88 | async def test_record_metric_error(): 89 | """ 90 | this test is acting funny on github action... 91 | """ 92 | with patch("kafkaesk.metrics.PUBLISH_MESSAGES") as publish_metric, patch( 93 | "kafkaesk.metrics.PUBLISH_MESSAGES_TIME" 94 | ) as publish_metric_time: 95 | app = Application() 96 | 97 | producer = AsyncMock() 98 | producer.send.side_effect = Exception 99 | app._get_producer = AsyncMock(return_value=producer) 100 | app._topic_mng = MagicMock() 101 | app._topic_mng.get_topic_id.return_value = "foobar" 102 | 103 | with pytest.raises(Exception): 104 | await app.raw_publish("foo", b"data") 105 | 106 | publish_metric.labels.assert_called_with(stream_id="foobar", error="exception") 107 | publish_metric.labels(stream_id="foobar", error="none").inc.assert_called_once() 108 | publish_metric_time.labels.assert_called_with(stream_id="foobar") 109 | publish_metric_time.labels(stream_id="foobar").observe.assert_called_once() 110 | -------------------------------------------------------------------------------- /tests/unit/test_schema.py: -------------------------------------------------------------------------------- 1 | from kafkaesk import Application 2 | from kafkaesk.exceptions import SchemaConflictException 3 | 4 | import pydantic 5 | import pytest 6 | 7 | pytestmark = pytest.mark.asyncio 8 | 9 | 10 | async def test_not_allowed_to_register_same_schema_twice(): 11 | app = Application() 12 | 13 | @app.schema("Foo", version=1) 14 | class Foo1(pydantic.BaseModel): 15 | bar: str 16 | 17 | with pytest.raises(SchemaConflictException): 18 | 19 | @app.schema("Foo", version=1) 20 | class Foo2(pydantic.BaseModel): 21 | foo: str 22 | 23 | 24 | async def test_do_not_require_schema_name(): 25 | app = Application() 26 | 27 | @app.schema() 28 | class Foo(pydantic.BaseModel): 29 | bar: str 30 | 31 | assert "Foo:1" in app._schemas 32 | 33 | 34 | async def test_get_registered_schema(): 35 | app = Application() 36 | 37 | @app.schema() 38 | class Foo(pydantic.BaseModel): 39 | bar: str 40 | 41 | assert app.get_schema_reg(Foo) is not None 42 | 43 | 44 | async def test_get_registered_schema_missing(): 45 | app = Application() 46 | 47 | class Foo(pydantic.BaseModel): 48 | bar: str 49 | 50 | assert app.get_schema_reg(Foo) is None 51 | -------------------------------------------------------------------------------- /tests/utils.py: -------------------------------------------------------------------------------- 1 | import aiokafka.structs 2 | import json 3 | import time 4 | 5 | 6 | def record_factory(): 7 | return aiokafka.structs.ConsumerRecord( 8 | topic="topic", 9 | partition=0, 10 | offset=0, 11 | timestamp=time.time() * 1000, 12 | timestamp_type=1, 13 | key="key", 14 | value=json.dumps({"schema": "Foo:1", "data": {"foo": "bar"}}).encode(), 15 | checksum="1", 16 | serialized_key_size=10, 17 | serialized_value_size=10, 18 | headers=[], 19 | ) 20 | --------------------------------------------------------------------------------