├── .bandit
├── .flake8
├── .github
└── workflows
│ ├── ci.yml
│ └── upload-pypi.yml
├── .gitignore
├── .isort.cfg
├── .pre-commit-config.yaml
├── LICENSE
├── README.md
├── docker-compose.yml
├── examples
├── logger.py
├── parallel.py
└── simple.py
├── kafkaesk
├── __init__.py
├── app.py
├── consumer.py
├── exceptions.py
├── ext
│ ├── __init__.py
│ └── logging
│ │ ├── __init__.py
│ │ ├── handler.py
│ │ └── record.py
├── kafka.py
├── metrics.py
├── publish.py
├── py.typed
└── utils.py
├── mypy.ini
├── poetry.lock
├── pyproject.toml
├── pytest.ini
├── stubs
├── aiokafka
│ ├── __init__.py
│ ├── errors.py
│ └── structs.py
└── kafka
│ ├── __init__.py
│ ├── admin
│ ├── __init__.py
│ └── client.py
│ ├── errors.py
│ └── structs.py
└── tests
├── __init__.py
├── acceptance
├── __init__.py
├── ext
│ ├── __init__.py
│ └── logging
│ │ ├── __init__.py
│ │ ├── test_handler.py
│ │ └── test_record.py
├── produce.py
├── test_healthcheck.py
├── test_pubsub.py
├── test_rebalance.py
└── test_run.py
├── conftest.py
├── fixtures.py
├── unit
├── __init__.py
├── ext
│ ├── __init__.py
│ └── logging
│ │ ├── __init__.py
│ │ └── test_handler.py
├── test_app.py
├── test_consumer.py
├── test_exceptions.py
├── test_kafka.py
├── test_metrics.py
└── test_schema.py
└── utils.py
/.bandit:
--------------------------------------------------------------------------------
1 | [bandit]
2 | exclude: tests
3 | skips: B101,B110,B112,B303,B311,B303
4 |
--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | no-accept-encodings = True
3 | max-line-length = 100
4 | ignore =
5 | E203
6 | W503
7 | E231
8 |
--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
1 | name: kafkaesk
2 |
3 | on: [push]
4 |
5 | jobs:
6 | # Job to run pre-checks
7 | pre-checks:
8 | runs-on: ubuntu-latest
9 | strategy:
10 | matrix:
11 | python-version: [3.8]
12 |
13 | steps:
14 | - name: Checkout the repository
15 | uses: actions/checkout@v2
16 |
17 | - name: Setup Python
18 | uses: actions/setup-python@v1
19 | with:
20 | python-version: ${{ matrix.python-version }}
21 |
22 | - name: Install package
23 | run: |
24 | pip install poetry
25 | poetry install
26 | - name: Run pre-checks
27 | run: |
28 | poetry run flake8 kafkaesk --config=.flake8
29 | poetry run mypy kafkaesk/
30 | poetry run isort -c -rc kafkaesk/
31 | poetry run black --check --verbose kafkaesk
32 | # Job to run tests
33 | tests:
34 | runs-on: ubuntu-latest
35 |
36 | strategy:
37 | matrix:
38 | python-version: [3.8]
39 |
40 | steps:
41 | - name: Checkout the repository
42 | uses: actions/checkout@v2
43 |
44 | - name: Setup Python
45 | uses: actions/setup-python@v1
46 | with:
47 | python-version: ${{ matrix.python-version }}
48 |
49 | - name: Start Docker containers for Zookeeper and Kafka
50 | run: docker-compose up -d
51 |
52 | - name: Install the package
53 | run: |
54 | pip install poetry
55 | poetry install
56 | - name: Run tests
57 | run: |
58 | poetry run pytest -rfE --reruns 2 --cov=kafkaesk -s --tb=native -v --cov-report xml --cov-append tests
59 | - name: Upload coverage to Codecov
60 | uses: codecov/codecov-action@v1
61 | with:
62 | file: ./coverage.xml
63 |
--------------------------------------------------------------------------------
/.github/workflows/upload-pypi.yml:
--------------------------------------------------------------------------------
1 | name: Upload package to pypi
2 |
3 | on:
4 | push:
5 | branches:
6 | - "master"
7 |
8 | jobs:
9 | upload:
10 | runs-on: ubuntu-latest
11 |
12 | steps:
13 | - name: Checkout the repository
14 | uses: actions/checkout@v2
15 |
16 | - name: Setup Python
17 | uses: actions/setup-python@v2
18 | with:
19 | python-version: "3.8"
20 |
21 | - name: Publish package
22 | run: |
23 | pip install poetry
24 | poetry config pypi-token.pypi ${{ secrets.PYPI_TOKEN }}
25 | poetry publish --build
26 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .vscode/
2 | .idea/
3 | # Python Stuff
4 | **/__pycache__
5 | *.egg-info
6 | .mypy_cache/
7 | .python-version
8 | dist/
9 | .venv/
10 | venv/
--------------------------------------------------------------------------------
/.isort.cfg:
--------------------------------------------------------------------------------
1 | [settings]
2 | force_alphabetical_sort = True
3 | force_single_line = True
4 | not_skip = __init__.py
5 | line_length = 110
6 | wrap_length = 100
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: https://github.com/ambv/black
3 | rev: stable
4 | hooks:
5 | - id: black
6 | language_version: python3.8
7 | - repo: https://github.com/pre-commit/mirrors-isort
8 | rev: v4.3.20
9 | hooks:
10 | - id: isort
11 |
12 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The BSD-2 license
2 |
3 | Copyright (c) 2016, Plone Foundation
4 | All rights reserved.
5 |
6 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
7 |
8 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
9 |
10 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
11 |
12 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
13 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 | kafkaesk
8 |
9 |
10 | ## Table Of Contents
11 |
12 | - [About the Project](#about-the-project)
13 | - [Publish](#publish)
14 | - [Subscribe](#subscribe)
15 | - [Avoiding global object](#avoiding-global-object)
16 | - [Manual commit](#manual-commit)
17 | - [kafkaesk contract](#kafkaesk-contract)
18 | - [Worker](#worker)
19 | - [Development](#development)
20 | - [Extensions](#extensions)
21 | - [Naming](#naming)
22 |
23 |
24 | ## About The Project
25 |
26 | This project is meant to help facilitate effortless publishing and subscribing to events with Python and Kafka.
27 |
28 | ### Guiding principal
29 |
30 | - HTTP
31 | - Language agnostic
32 | - Contracts built on top of [Kafka](https://kafka.apache.org/)
33 |
34 |
35 | ### Alternatives
36 | - [aiokafka](https://aiokafka.readthedocs.io/en/stable/): can be complex to scale correctly
37 | - [guillotina_kafka](https://github.com/onna/guillotina_kafka): complex, tied to [Guillotina](https://guillotina.readthedocs.io/en/latest/)
38 | - [faust](https://faust.readthedocs.io/en/latest/): requires additional data layers, not language agnostic
39 | - confluent kafka + avro: close but ends up being like grpc. compilation for languages. No asyncio.
40 |
41 | > Consider this Python project as syntactic sugar around these ideas.
42 |
43 | ## Publish
44 |
45 | Using [pydantic](https://pydantic-docs.helpmanual.io/) but can be done with pure JSON.
46 |
47 | ```python
48 | import kafkaesk
49 | from pydantic import BaseModel
50 |
51 | app = kafkaesk.Application()
52 |
53 | @app.schema("Content", version=1, retention=24 * 60 * 60)
54 | class ContentMessage(BaseModel):
55 | foo: str
56 |
57 |
58 | async def foobar():
59 | # ...
60 | # doing something in an async func
61 | await app.publish("content.edited.Resource", data=ContentMessage(foo="bar"))
62 | ```
63 |
64 | A convenience method is available in the `subscriber` dependency instance, this allow to header
65 | propagation from the consumed message.
66 |
67 | ```python
68 | import kafkaesk
69 | from pydantic import BaseModel
70 |
71 | app = kafkaesk.Application()
72 |
73 | @app.schema("Content", version=1, retention=24 * 60 * 60)
74 | class ContentMessage(BaseModel):
75 | foo: str
76 |
77 |
78 | @app.subscribe("content.*", "group_id")
79 | async def get_messages(data: ContentMessage, subscriber):
80 | print(f"{data.foo}")
81 | # This will propagate `data` record headers
82 | await subscriber.publish("content.edited.Resource", data=ContentMessage(foo="bar"))
83 |
84 | ```
85 |
86 | ## Subscribe
87 |
88 | ```python
89 | import kafkaesk
90 | from pydantic import BaseModel
91 |
92 | app = kafkaesk.Application()
93 |
94 | @app.schema("Content", version=1, retention=24 * 60 * 60)
95 | class ContentMessage(BaseModel):
96 | foo: str
97 |
98 |
99 | @app.subscribe("content.*", "group_id")
100 | async def get_messages(data: ContentMessage):
101 | print(f"{data.foo}")
102 |
103 | ```
104 |
105 | ## Avoiding global object
106 |
107 | If you do not want to have global application configuration, you can lazily configure
108 | the application and register schemas/subscribers separately.
109 |
110 | ```python
111 | import kafkaesk
112 | from pydantic import BaseModel
113 |
114 | router = kafkaesk.Router()
115 |
116 | @router.schema("Content", version=1, retention=24 * 60 * 60)
117 | class ContentMessage(BaseModel):
118 | foo: str
119 |
120 |
121 | @router.subscribe("content.*", "group_id")
122 | async def get_messages(data: ContentMessage):
123 | print(f"{data.foo}")
124 |
125 |
126 | if __name__ == "__main__":
127 | app = kafkaesk.Application()
128 | app.mount(router)
129 | kafkaesk.run(app)
130 |
131 | ```
132 |
133 | Optional consumer injected parameters:
134 |
135 | - schema: str
136 | - record: aiokafka.structs.ConsumerRecord
137 | - app: kafkaesk.app.Application
138 | - subscriber: kafkaesk.app.BatchConsumer
139 |
140 | Depending on the type annotation for the first parameter, you will get different data injected:
141 |
142 | - `async def get_messages(data: ContentMessage)`: parses pydantic schema
143 | - `async def get_messages(data: bytes)`: give raw byte data
144 | - `async def get_messages(record: aiokafka.structs.ConsumerRecord)`: give kafka record object
145 | - `async def get_messages(data)`: raw json data in message
146 |
147 | ## Manual commit
148 |
149 | To accomplish a manual commit strategy yourself:
150 |
151 | ```python
152 | app = kafkaesk.Application(auto_commit=False)
153 |
154 | @app.subscribe("content.*", "group_id")
155 | async def get_messages(data: ContentMessage, subscriber):
156 | print(f"{data.foo}")
157 | await subscriber.consumer.commit()
158 | ```
159 |
160 | ## SSL
161 | Add these values to your `kafka_settings`:
162 | - `ssl_context` - this should be a placeholder as the SSL Context is generally created within the application
163 | - `security_protocol` - one of SSL or PLAINTEXT
164 | - `sasl_mechanism` - one of PLAIN, GSSAPI, SCRAM-SHA-256, SCRAM-SHA-512, OAUTHBEARER
165 | - `sasl_plain_username` .
166 | - `sasl_plain_password` .
167 |
168 | ## kafkaesk contract
169 |
170 | This is a library around using kafka.
171 | Kafka itself does not enforce these concepts.
172 |
173 | - Every message must provide a json schema
174 | - Messages produced will be validated against json schema
175 | - Each topic will have only one schema
176 | - A single schema can be used for multiple topics
177 | - Consumed message schema validation is up to the consumer
178 | - Messages will be consumed at least once. Considering this, your handling should be idempotent
179 |
180 | ### Message format
181 |
182 | ```json
183 | {
184 | "schema": "schema_name:1",
185 | "data": { ... }
186 | }
187 | ```
188 |
189 | ## Worker
190 |
191 | ```bash
192 | kafkaesk mymodule:app --kafka-servers=localhost:9092
193 | ```
194 |
195 | Options:
196 |
197 | - --kafka-servers: comma separated list of kafka servers
198 | - --kafka-settings: json encoded options to be passed to https://aiokafka.readthedocs.io/en/stable/api.html#aiokafkaconsumer-class
199 | - --topic-prefix: prefix to use for topics
200 | - --replication-factor: what replication factor topics should be created with. Defaults to min(number of servers, 3).
201 |
202 | ### Application.publish
203 |
204 | - stream_id: str: name of stream to send data to
205 | - data: class that inherits from pydantic.BaseModel
206 | - key: Optional[bytes]: key for message if it needs one
207 |
208 | ### Application.subscribe
209 |
210 | - stream_id: str: fnmatch pattern of streams to subscribe to
211 | - group: Optional[str]: consumer group id to use. Will use name of function if not provided
212 |
213 | ### Application.schema
214 |
215 | - id: str: id of the schema to store
216 | - version: Optional[int]: version of schema to store
217 | - streams: Optional[List[str]]: if streams are known ahead of time, you can pre-create them before you push data
218 | - retention: Optional[int]: retention policy in seconds
219 |
220 | ### Application.configure
221 |
222 | - kafka_servers: Optional[List[str]]: kafka servers to connect to
223 | - topic_prefix: Optional[str]: topic name prefix to subscribe to
224 | - kafka_settings: Optional[Dict[str, Any]]: additional aiokafka settings to pass in
225 | - replication_factor: Optional[int]: what replication factor topics should be created with. Defaults to min(number of servers, 3).
226 | - kafka_api_version: str: default `auto`
227 | - auto_commit: bool: default `True`
228 | - auto_commit_interval_ms: int: default `5000`
229 |
230 | ## Development
231 |
232 | ### Requirements
233 |
234 | - [Docker](https://www.docker.com/)
235 | - [Poetry](https://python-poetry.org/)
236 |
237 | ```bash
238 | poetry install
239 | ```
240 |
241 | Run tests:
242 |
243 | ```bash
244 | docker-compose up
245 | KAFKA=localhost:9092 poetry run pytest tests
246 | ```
247 |
248 | ## Extensions
249 |
250 | ### Logging
251 | This extension includes classes to extend Python's logging framework to publish structured log messages to a Kafka topic.
252 | This extension is made up of three main components: an extended `logging.LogRecord` and some custom `logging.Handler`s.
253 |
254 | See `logger.py` in examples directory.
255 |
256 | #### Log Record
257 | `kafkaesk.ext.logging.record.factory` is a function that will return `kafkaesk.ext.logging.record.PydanticLogRecord` objects.
258 | The `factory()` function scans through any `args` passed to a logger and checks each item to determine if it is a subclass of `pydantid.BaseModel`.
259 |
260 | If it is a base model instance and `model._is_log_model` evaluates to `True` the model will be removed from `args` and added to `record._pydantic_data`.
261 | After that `factory()` will use logging's existing logic to finish creating the log record.
262 |
263 | ### Handler
264 | This extensions ships with two handlers capable of handling `kafkaesk.ext.logging.handler.PydanticLogModel` classes: `kafakesk.ext.logging.handler.PydanticStreamHandler` and `kafkaesk.ext.logging.handler.PydanticKafkaeskHandler`.
265 |
266 | The stream handler is a very small wrapper around `logging.StreamHandler`, the signature is the same, the only difference is that the handler will attempt to convert any pydantic models it receives to a human readable log message.
267 |
268 | The kafkaesk handler has a few more bits going on in the background.
269 |
270 | The handler has two required inputs, a `kafkaesk.app.Application` instance and a stream name.
271 |
272 | Once initialized any logs emitted by the handler will be saved into an internal queue.
273 | There is a worker task that handles pulling logs from the queue and writing those logs to the specified topic.
274 |
275 | # Naming
276 |
277 | It's hard and "kafka" is already a fun name.
278 | Hopefully this library isn't literally "kafkaesque" for you.
279 |
--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
1 | # just for dev, fun, playing around
2 | version: "3"
3 | services:
4 | zookeeper:
5 | image: bitnami/zookeeper:latest
6 | ports:
7 | - 2181:2181
8 | expose:
9 | - 2181
10 | environment:
11 | "ALLOW_ANONYMOUS_LOGIN": "yes"
12 | networks:
13 | - kafka-network
14 |
15 | kafka:
16 | image: bitnami/kafka:latest
17 | depends_on:
18 | - zookeeper
19 | ports:
20 | - 9092:9092
21 | expose:
22 | - 9092
23 | links:
24 | - zookeeper
25 | environment:
26 | "ALLOW_PLAINTEXT_LISTENER": "yes"
27 | "KAFKA_CFG_ZOOKEEPER_CONNECT": "zookeeper:2181"
28 | "KAFKA_CFG_AUTO_CREATE_TOPICS_ENABLE": "true"
29 | "KAFKA_CFG_ADVERTISED_LISTENERS": "PLAINTEXT://localhost:9092"
30 | networks:
31 | - kafka-network
32 |
33 | networks:
34 | kafka-network:
35 | driver: bridge
36 |
--------------------------------------------------------------------------------
/examples/logger.py:
--------------------------------------------------------------------------------
1 | from kafkaesk import Application
2 | from kafkaesk.ext.logging import PydanticKafkaeskHandler
3 | from kafkaesk.ext.logging import PydanticLogModel
4 | from kafkaesk.ext.logging import PydanticStreamHandler
5 | from pydantic import BaseModel
6 | from typing import Optional
7 |
8 | import asyncio
9 | import logging
10 |
11 |
12 | class UserLog(BaseModel):
13 | _is_log_model = True
14 | user: Optional[str] = None
15 |
16 |
17 | async def test_log() -> None:
18 | app = Application(kafka_servers=["localhost:9092"])
19 |
20 | logger = logging.getLogger("kafkaesk.ext.logging.kafka")
21 | handler = PydanticKafkaeskHandler(app, "logging.test")
22 | logger.addHandler(handler)
23 | logger.setLevel(logging.DEBUG)
24 |
25 | stream_logger = logging.getLogger("kafakesk.ext.logging.stream")
26 | stream_handler = PydanticStreamHandler()
27 | stream_logger.addHandler(stream_handler)
28 | stream_logger.setLevel(logging.DEBUG)
29 |
30 | @app.subscribe("logging.test", group="example.logging.consumer")
31 | async def consume(data: PydanticLogModel) -> None:
32 | stream_logger.info(data.json())
33 |
34 | async with app:
35 | logger.debug("Log Message", UserLog(user="kafkaesk"))
36 | await app.flush()
37 | await app.consume_for(1, seconds=5)
38 |
39 |
40 | if __name__ == "__main__":
41 | asyncio.run(test_log())
42 |
--------------------------------------------------------------------------------
/examples/parallel.py:
--------------------------------------------------------------------------------
1 | from kafkaesk import Application
2 | from kafkaesk import run_app
3 | from pydantic import BaseModel
4 |
5 | import asyncio
6 | import logging
7 | import random
8 |
9 |
10 | logging.basicConfig(level=logging.INFO)
11 |
12 |
13 | app = Application()
14 |
15 |
16 | @app.schema("Foobar", streams=["content.foo", "slow.content.foo", "failed.content.foo"])
17 | class Foobar(BaseModel):
18 | timeout: int
19 |
20 |
21 | async def consumer_logic(data: Foobar, record, subscriber):
22 | try:
23 | print(f"{data} -- {record.headers}: waiting {data.timeout}s...")
24 | await asyncio.sleep(data.timeout)
25 | print(f"{data}: done...")
26 | except asyncio.CancelledError:
27 | # Slow topic
28 | print(f"{data} timeout message, sending to slow topic...")
29 | await subscriber.publish(f"slow.{record.topic}", record, headers=[("slow", b"true")])
30 | except Exception:
31 | await subscriber.publish(f"failed.{record.topic}", record)
32 |
33 |
34 | async def generate_data(app):
35 | idx = 0
36 | while True:
37 | timeout = random.randint(0, 10)
38 | await app.publish("content.foo", Foobar(timeout=timeout))
39 | idx += 1
40 | await asyncio.sleep(0.1)
41 |
42 |
43 | async def run():
44 | app.configure(kafka_servers=["localhost:9092"])
45 | task = asyncio.create_task(generate_data(app))
46 |
47 | # Regular tasks should be consumed in less than 5s
48 | app.subscribe("content.*", group="example_content_group", concurrency=10, timeout_seconds=5)(
49 | consumer_logic
50 | )
51 |
52 | # Timeout taks (slow) can be consumed independendly, with different configuration and logic
53 | app.subscribe(
54 | "slow.content.*", group="timeout_example_content_group", concurrency=1, timeout_seconds=None
55 | )(consumer_logic)
56 |
57 | await run_app(app)
58 |
59 |
60 | if __name__ == "__main__":
61 | asyncio.run(run())
62 |
--------------------------------------------------------------------------------
/examples/simple.py:
--------------------------------------------------------------------------------
1 | from kafkaesk import Application
2 | from kafkaesk import run_app
3 | from pydantic import BaseModel
4 |
5 | import asyncio
6 | import logging
7 |
8 | logging.basicConfig(level=logging.INFO)
9 |
10 |
11 | app = Application()
12 |
13 |
14 | @app.schema("Foobar")
15 | class Foobar(BaseModel):
16 | foo: str
17 | bar: str
18 |
19 |
20 | @app.subscribe("content.*", group="example_content_group")
21 | async def messages(data: Foobar, record):
22 | await asyncio.sleep(0.1)
23 | print(f"{data.foo}: {data.bar}: {record}")
24 |
25 |
26 | async def generate_data(app):
27 | idx = 0
28 | while True:
29 | await app.publish("content.foo", Foobar(foo=str(idx), bar="yo"))
30 | idx += 1
31 | await asyncio.sleep(0.1)
32 |
33 |
34 | async def run():
35 | app.configure(kafka_servers=["localhost:9092"])
36 | task = asyncio.create_task(generate_data(app))
37 | await run_app(app)
38 | # await app.consume_forever()
39 |
40 |
41 | if __name__ == "__main__":
42 | asyncio.run(run())
43 |
--------------------------------------------------------------------------------
/kafkaesk/__init__.py:
--------------------------------------------------------------------------------
1 | from .app import Application # noqa
2 | from .app import BatchConsumer # noqa
3 | from .app import Router # noqa
4 | from .app import run # noqa
5 | from .app import run_app # noqa
6 | from .app import Subscription # noqa
7 |
--------------------------------------------------------------------------------
/kafkaesk/app.py:
--------------------------------------------------------------------------------
1 | from .consumer import BatchConsumer
2 | from .consumer import Subscription
3 | from .exceptions import AppNotConfiguredException
4 | from .exceptions import ProducerUnhealthyException
5 | from .exceptions import SchemaConflictException
6 | from .exceptions import StopConsumer
7 | from .kafka import KafkaTopicManager
8 | from .metrics import NOERROR
9 | from .metrics import PRODUCER_TOPIC_OFFSET
10 | from .metrics import PUBLISHED_MESSAGES
11 | from .metrics import PUBLISHED_MESSAGES_TIME
12 | from .metrics import watch_kafka
13 | from .metrics import watch_publish
14 | from .utils import resolve_dotted_name
15 | from asyncio.futures import Future
16 | from functools import partial
17 | from opentracing.scope_managers.contextvars import ContextVarsScopeManager
18 | from pydantic import BaseModel
19 | from types import TracebackType
20 | from typing import Any
21 | from typing import Awaitable
22 | from typing import Callable
23 | from typing import cast
24 | from typing import Dict
25 | from typing import List
26 | from typing import Optional
27 | from typing import Tuple
28 | from typing import Type
29 |
30 | import aiokafka
31 | import aiokafka.errors
32 | import aiokafka.structs
33 | import argparse
34 | import asyncio
35 | import logging
36 | import opentracing
37 | import orjson
38 | import pydantic
39 | import signal
40 | import time
41 |
42 | logger = logging.getLogger("kafkaesk")
43 |
44 |
45 | class SchemaRegistration:
46 | def __init__(
47 | self,
48 | id: str,
49 | version: int,
50 | model: Type[pydantic.BaseModel],
51 | retention: Optional[int] = None,
52 | streams: Optional[List[str]] = None,
53 | ):
54 | self.id = id
55 | self.version = version
56 | self.model = model
57 | self.retention = retention
58 | self.streams = streams
59 |
60 | def __repr__(self) -> str:
61 | return f""
62 |
63 |
64 | def published_callback(topic: str, start_time: float, fut: Future) -> None:
65 | # Record the metrics
66 | finish_time = time.time()
67 | exception = fut.exception()
68 | if exception:
69 | error = str(exception.__class__.__name__)
70 | PUBLISHED_MESSAGES.labels(stream_id=topic, partition=-1, error=error).inc()
71 | else:
72 | metadata = fut.result()
73 | PUBLISHED_MESSAGES.labels(
74 | stream_id=topic, partition=metadata.partition, error=NOERROR
75 | ).inc()
76 | PRODUCER_TOPIC_OFFSET.labels(stream_id=topic, partition=metadata.partition).set(
77 | metadata.offset
78 | )
79 | PUBLISHED_MESSAGES_TIME.labels(stream_id=topic).observe(finish_time - start_time)
80 |
81 |
82 | _aiokafka_consumer_settings = (
83 | "fetch_max_wait_ms",
84 | "fetch_max_bytes",
85 | "fetch_min_bytes",
86 | "max_partition_fetch_bytes",
87 | "request_timeout_ms",
88 | "auto_offset_reset",
89 | "metadata_max_age_ms",
90 | "max_poll_interval_ms",
91 | "rebalance_timeout_ms",
92 | "session_timeout_ms",
93 | "heartbeat_interval_ms",
94 | "consumer_timeout_ms",
95 | "max_poll_records",
96 | "connections_max_idle_ms",
97 | "ssl_context",
98 | "security_protocol",
99 | "sasl_mechanism",
100 | "sasl_plain_username",
101 | "sasl_plain_password",
102 | )
103 | _aiokafka_producer_settings = (
104 | "metadata_max_age_ms",
105 | "request_timeout_ms",
106 | "max_batch_size",
107 | "max_request_size",
108 | "send_backoff_ms",
109 | "retry_backoff_ms",
110 | "ssl_context",
111 | "security_protocol",
112 | "sasl_mechanism",
113 | "sasl_plain_username",
114 | "sasl_plain_password",
115 | )
116 |
117 |
118 | class Router:
119 | """
120 | Application routing configuration.
121 | """
122 |
123 | def __init__(self) -> None:
124 | self._subscriptions: List[Subscription] = []
125 | self._schemas: Dict[str, SchemaRegistration] = {}
126 | self._event_handlers: Dict[str, List[Callable[[], Awaitable[None]]]] = {}
127 |
128 | @property
129 | def subscriptions(self) -> List[Subscription]:
130 | return self._subscriptions
131 |
132 | @property
133 | def schemas(self) -> Dict[str, SchemaRegistration]:
134 | return self._schemas
135 |
136 | @property
137 | def event_handlers(self) -> Dict[str, List[Callable[[], Awaitable[None]]]]:
138 | return self._event_handlers
139 |
140 | def on(self, name: str, handler: Callable[[], Awaitable[None]]) -> None:
141 | if name not in self._event_handlers:
142 | self._event_handlers[name] = []
143 |
144 | self._event_handlers[name].append(handler)
145 |
146 | def _subscribe(
147 | self,
148 | group: str,
149 | *,
150 | consumer_id: str = None,
151 | pattern: str = None,
152 | topics: List[str] = None,
153 | timeout_seconds: float = None,
154 | concurrency: int = None,
155 | ) -> Callable:
156 | def inner(func: Callable) -> Callable:
157 | # If there is no consumer_id use the group instead
158 | subscription = Subscription(
159 | consumer_id or group,
160 | func,
161 | group or func.__name__,
162 | pattern=pattern,
163 | topics=topics,
164 | concurrency=concurrency,
165 | timeout_seconds=timeout_seconds,
166 | )
167 | self._subscriptions.append(subscription)
168 | return func
169 |
170 | return inner
171 |
172 | def subscribe_to_topics(
173 | self,
174 | topics: List[str],
175 | group: str,
176 | *,
177 | timeout_seconds: float = None,
178 | concurrency: int = None,
179 | ) -> Callable:
180 | return self._subscribe(
181 | group=group,
182 | topics=topics,
183 | pattern=None,
184 | timeout_seconds=timeout_seconds,
185 | concurrency=concurrency,
186 | )
187 |
188 | def subscribe_to_pattern(
189 | self,
190 | pattern: str,
191 | group: str,
192 | *,
193 | timeout_seconds: float = None,
194 | concurrency: int = None,
195 | ) -> Callable:
196 | return self._subscribe(
197 | group=group,
198 | topics=None,
199 | pattern=pattern,
200 | timeout_seconds=timeout_seconds,
201 | concurrency=concurrency,
202 | )
203 |
204 | def subscribe(
205 | self,
206 | stream_id: str,
207 | group: str,
208 | *,
209 | timeout_seconds: float = None,
210 | concurrency: int = None,
211 | ) -> Callable:
212 | """Keep backwards compatibility"""
213 | return self._subscribe(
214 | group=group,
215 | topics=None,
216 | pattern=stream_id,
217 | timeout_seconds=timeout_seconds,
218 | concurrency=concurrency,
219 | )
220 |
221 | def schema(
222 | self,
223 | _id: Optional[str] = None,
224 | *,
225 | version: Optional[int] = None,
226 | retention: Optional[int] = None,
227 | streams: Optional[List[str]] = None,
228 | ) -> Callable:
229 | version = version or 1
230 |
231 | def inner(cls: Type[BaseModel]) -> Type[BaseModel]:
232 | if _id is None:
233 | type_id = cls.__name__
234 | else:
235 | type_id = _id
236 | key = f"{type_id}:{version}"
237 | reg = SchemaRegistration(
238 | id=type_id, version=version or 1, model=cls, retention=retention, streams=streams
239 | )
240 | if key in self._schemas:
241 | raise SchemaConflictException(self._schemas[key], reg)
242 | cls.__key__ = key # type: ignore
243 | self._schemas[key] = reg
244 | return cls
245 |
246 | return inner
247 |
248 |
249 | class Application(Router):
250 | """
251 | Application configuration
252 | """
253 |
254 | _producer: Optional[aiokafka.AIOKafkaProducer] = None
255 |
256 | def __init__(
257 | self,
258 | kafka_servers: Optional[List[str]] = None,
259 | topic_prefix: str = "",
260 | kafka_settings: Optional[Dict[str, Any]] = None,
261 | replication_factor: Optional[int] = None,
262 | kafka_api_version: str = "auto",
263 | auto_commit: bool = True,
264 | ):
265 | super().__init__()
266 | self._kafka_servers = kafka_servers
267 | self._kafka_settings = kafka_settings
268 | self._producer = None
269 | self._initialized = False
270 | self._locks: Dict[str, asyncio.Lock] = {}
271 |
272 | self._kafka_api_version = kafka_api_version
273 | self._topic_prefix = topic_prefix
274 | self._replication_factor = replication_factor
275 | self._topic_mng: Optional[KafkaTopicManager] = None
276 | self._subscription_consumers: List[BatchConsumer] = []
277 | self._subscription_consumers_tasks: List[asyncio.Task] = []
278 |
279 | self.auto_commit = auto_commit
280 |
281 | @property
282 | def kafka_settings(self) -> Dict[str, Any]:
283 | return self._kafka_settings or {}
284 |
285 | def mount(self, router: Router) -> None:
286 | self._subscriptions.extend(router.subscriptions)
287 | self._schemas.update(router.schemas)
288 | self._event_handlers.update(router.event_handlers)
289 |
290 | async def health_check(self) -> None:
291 | for subscription_consumer in self._subscription_consumers:
292 | await subscription_consumer.healthy()
293 | if not self.producer_healthy():
294 | raise ProducerUnhealthyException(self._producer) # type: ignore
295 |
296 | async def _call_event_handlers(self, name: str) -> None:
297 | handlers = self._event_handlers.get(name)
298 |
299 | if handlers is not None:
300 | for handler in handlers:
301 | await handler()
302 |
303 | @property
304 | def topic_mng(self) -> KafkaTopicManager:
305 | if self._topic_mng is None:
306 | self._topic_mng = KafkaTopicManager(
307 | cast(List[str], self._kafka_servers),
308 | self._topic_prefix,
309 | replication_factor=self._replication_factor,
310 | kafka_api_version=self._kafka_api_version,
311 | ssl_context=self.kafka_settings.get("ssl_context"),
312 | security_protocol=self.kafka_settings.get("security_protocol", "PLAINTEXT"),
313 | sasl_mechanism=self.kafka_settings.get("sasl_mechanism"),
314 | sasl_plain_username=self.kafka_settings.get("sasl_plain_username"),
315 | sasl_plain_password=self.kafka_settings.get("sasl_plain_password"),
316 | )
317 | return self._topic_mng
318 |
319 | def get_lock(self, name: str) -> asyncio.Lock:
320 | if name not in self._locks:
321 | self._locks[name] = asyncio.Lock()
322 | return self._locks[name]
323 |
324 | def configure(
325 | self,
326 | kafka_servers: Optional[List[str]] = None,
327 | topic_prefix: Optional[str] = None,
328 | kafka_settings: Optional[Dict[str, Any]] = None,
329 | api_version: Optional[str] = None,
330 | replication_factor: Optional[int] = None,
331 | ) -> None:
332 | if kafka_servers is not None:
333 | self._kafka_servers = kafka_servers
334 | if topic_prefix is not None:
335 | self._topic_prefix = topic_prefix
336 | if kafka_settings is not None:
337 | self._kafka_settings = kafka_settings
338 | if api_version is not None:
339 | self._kafka_api_version = api_version
340 | if replication_factor is not None:
341 | self._replication_factor = replication_factor
342 |
343 | @property
344 | def is_configured(self) -> bool:
345 | return bool(self._kafka_servers)
346 |
347 | async def publish_and_wait(
348 | self,
349 | stream_id: str,
350 | data: BaseModel,
351 | key: Optional[bytes] = None,
352 | headers: Optional[List[Tuple[str, bytes]]] = None,
353 | ) -> aiokafka.structs.ConsumerRecord:
354 | return await (await self.publish(stream_id, data, key, headers=headers))
355 |
356 | async def _maybe_create_topic(self, stream_id: str, data: BaseModel = None) -> None:
357 | topic_id = self.topic_mng.get_topic_id(stream_id)
358 | async with self.get_lock(stream_id):
359 | if not await self.topic_mng.topic_exists(topic_id):
360 | reg = None
361 | if data:
362 | reg = self.get_schema_reg(data)
363 | retention_ms = None
364 | if reg is not None and reg.retention is not None:
365 | retention_ms = reg.retention * 1000
366 | await self.topic_mng.create_topic(
367 | topic_id,
368 | replication_factor=self._replication_factor,
369 | retention_ms=retention_ms,
370 | )
371 |
372 | async def publish(
373 | self,
374 | stream_id: str,
375 | data: BaseModel,
376 | key: Optional[bytes] = None,
377 | headers: Optional[List[Tuple[str, bytes]]] = None,
378 | ) -> Awaitable[aiokafka.structs.ConsumerRecord]:
379 | if not self._initialized:
380 | async with self.get_lock("_"):
381 | await self.initialize()
382 |
383 | schema_key = getattr(data, "__key__", None)
384 | if schema_key not in self._schemas:
385 | # do not require key
386 | schema_key = f"{data.__class__.__name__}:1"
387 | data_ = data.dict()
388 |
389 | await self._maybe_create_topic(stream_id, data)
390 | return await self.raw_publish(
391 | stream_id, orjson.dumps({"schema": schema_key, "data": data_}), key, headers=headers
392 | )
393 |
394 | async def raw_publish(
395 | self,
396 | stream_id: str,
397 | data: bytes,
398 | key: Optional[bytes] = None,
399 | headers: Optional[List[Tuple[str, bytes]]] = None,
400 | ) -> Awaitable[aiokafka.structs.ConsumerRecord]:
401 | logger.debug(f"Sending kafka msg: {stream_id}")
402 | producer = await self._get_producer()
403 | tracer = opentracing.tracer
404 |
405 | if not headers:
406 | headers = []
407 | else:
408 | # this is just to check the headers shape
409 | try:
410 | for _, _ in headers:
411 | pass
412 | except ValueError:
413 | # We want to be resilient to malformated headers
414 | logger.exception(f"Malformed headers: '{headers}'")
415 |
416 | if isinstance(tracer.scope_manager, ContextVarsScopeManager):
417 | # This only makes sense if the context manager is asyncio aware
418 | if tracer.active_span:
419 | carrier: Dict[str, str] = {}
420 | tracer.inject(
421 | span_context=tracer.active_span,
422 | format=opentracing.Format.TEXT_MAP,
423 | carrier=carrier,
424 | )
425 |
426 | header_keys = [k for k, _ in headers]
427 | for k, v in carrier.items():
428 | # Dont overwrite if they are already present!
429 | if k not in header_keys:
430 | headers.append((k, v.encode()))
431 |
432 | if not self.producer_healthy():
433 | raise ProducerUnhealthyException(self._producer) # type: ignore
434 |
435 | topic_id = self.topic_mng.get_topic_id(stream_id)
436 | start_time = time.time()
437 | with watch_publish(topic_id):
438 | fut = await producer.send(
439 | topic_id,
440 | value=data,
441 | key=key,
442 | headers=headers,
443 | )
444 |
445 | fut.add_done_callback(partial(published_callback, topic_id, start_time)) # type: ignore
446 | return fut
447 |
448 | async def flush(self) -> None:
449 | if self._producer is not None:
450 | await self._producer.flush()
451 |
452 | def get_schema_reg(self, model_or_def: BaseModel) -> Optional[SchemaRegistration]:
453 | try:
454 | key = model_or_def.__key__ # type: ignore
455 | return self._schemas[key]
456 | except (AttributeError, KeyError):
457 | return None
458 |
459 | def producer_healthy(self) -> bool:
460 | """
461 | It's possible for the producer to be unhealthy while we're still sending messages to it.
462 | """
463 | if self._producer is not None and self._producer._sender.sender_task is not None:
464 | return not self._producer._sender.sender_task.done()
465 | return True
466 |
467 | def consumer_factory(self, group_id: str) -> aiokafka.AIOKafkaConsumer:
468 | return aiokafka.AIOKafkaConsumer(
469 | bootstrap_servers=cast(List[str], self._kafka_servers),
470 | loop=asyncio.get_event_loop(),
471 | group_id=group_id,
472 | auto_offset_reset="earliest",
473 | api_version=self._kafka_api_version,
474 | enable_auto_commit=False,
475 | **{k: v for k, v in self.kafka_settings.items() if k in _aiokafka_consumer_settings},
476 | )
477 |
478 | def producer_factory(self) -> aiokafka.AIOKafkaProducer:
479 | return aiokafka.AIOKafkaProducer(
480 | bootstrap_servers=cast(List[str], self._kafka_servers),
481 | loop=asyncio.get_event_loop(),
482 | api_version=self._kafka_api_version,
483 | **{k: v for k, v in self.kafka_settings.items() if k in _aiokafka_producer_settings},
484 | )
485 |
486 | async def _get_producer(self) -> aiokafka.AIOKafkaProducer:
487 | if self._producer is None:
488 | self._producer = self.producer_factory()
489 | with watch_kafka("producer_start"):
490 | await self._producer.start()
491 | return self._producer
492 |
493 | async def initialize(self) -> None:
494 | if not self.is_configured:
495 | raise AppNotConfiguredException
496 |
497 | await self._call_event_handlers("initialize")
498 |
499 | for reg in self._schemas.values():
500 | # initialize topics for known streams
501 | for stream_id in reg.streams or []:
502 | topic_id = self.topic_mng.get_topic_id(stream_id)
503 | async with self.get_lock(stream_id):
504 | if not await self.topic_mng.topic_exists(topic_id):
505 | await self.topic_mng.create_topic(
506 | topic_id,
507 | retention_ms=reg.retention * 1000
508 | if reg.retention is not None
509 | else None,
510 | )
511 |
512 | self._initialized = True
513 |
514 | async def finalize(self) -> None:
515 | await self._call_event_handlers("finalize")
516 |
517 | await self.stop()
518 |
519 | if self._producer is not None:
520 | with watch_kafka("producer_flush"):
521 | await self._producer.flush()
522 | with watch_kafka("producer_stop"):
523 | await self._producer.stop()
524 |
525 | if self._topic_mng is not None:
526 | await self._topic_mng.finalize()
527 |
528 | self._producer = None
529 | self._initialized = False
530 | self._topic_mng = None
531 |
532 | async def __aenter__(self) -> "Application":
533 | await self.initialize()
534 | return self
535 |
536 | async def __aexit__(
537 | self,
538 | exc_type: Optional[Type[BaseException]] = None,
539 | exc: Optional[BaseException] = None,
540 | traceback: Optional[TracebackType] = None,
541 | ) -> None:
542 | logger.info("Stopping application...", exc_info=exc)
543 | await self.finalize()
544 |
545 | async def consume_for(self, num_messages: int, *, seconds: Optional[int] = None) -> int:
546 | consumed = 0
547 | self._subscription_consumers = []
548 | tasks = []
549 | for subscription in self._subscriptions:
550 |
551 | async def on_message(record: aiokafka.structs.ConsumerRecord) -> None:
552 | nonlocal consumed
553 | consumed += 1
554 | if consumed >= num_messages:
555 | raise StopConsumer
556 |
557 | consumer = BatchConsumer(
558 | subscription=subscription,
559 | app=self,
560 | event_handlers={"message": [on_message]},
561 | auto_commit=self.auto_commit,
562 | )
563 |
564 | self._subscription_consumers.append(consumer)
565 | tasks.append(asyncio.create_task(consumer(), name=str(consumer)))
566 |
567 | done, pending = await asyncio.wait(
568 | tasks, timeout=seconds, return_when=asyncio.FIRST_EXCEPTION
569 | )
570 | await self.stop()
571 |
572 | # re-raise any errors so we can validate during tests
573 | for task in done:
574 | exc = task.exception()
575 | if exc is not None:
576 | raise exc
577 |
578 | for task in pending:
579 | task.cancel()
580 |
581 | return consumed
582 |
583 | def consume_forever(self) -> Awaitable:
584 | self._subscription_consumers = []
585 | self._subscription_consumers_tasks = []
586 |
587 | for subscription in self._subscriptions:
588 | consumer = BatchConsumer(
589 | subscription=subscription,
590 | app=self,
591 | auto_commit=self.auto_commit,
592 | )
593 | self._subscription_consumers.append(consumer)
594 |
595 | self._subscription_consumers_tasks = [
596 | asyncio.create_task(c()) for c in self._subscription_consumers
597 | ]
598 | return asyncio.wait(self._subscription_consumers_tasks, return_when=asyncio.FIRST_EXCEPTION)
599 |
600 | async def stop(self) -> None:
601 | async with self.get_lock("_"):
602 | # do not allow stop calls at same time
603 |
604 | if len(self._subscription_consumers) == 0:
605 | return
606 |
607 | _, pending = await asyncio.wait(
608 | [asyncio.create_task(c.stop()) for c in self._subscription_consumers if c],
609 | timeout=5,
610 | )
611 | for task in pending:
612 | # stop tasks that didn't finish
613 | task.cancel()
614 |
615 | for task in self._subscription_consumers_tasks:
616 | # make sure everything is done
617 | if not task.done():
618 | task.cancel()
619 |
620 | for task in self._subscription_consumers_tasks:
621 | try:
622 | await asyncio.wait([task])
623 | except asyncio.CancelledError:
624 | ...
625 |
626 |
627 | cli_parser = argparse.ArgumentParser(description="Run kafkaesk worker.")
628 | cli_parser.add_argument("app", help="Application object")
629 | cli_parser.add_argument("--kafka-servers", help="Kafka servers")
630 | cli_parser.add_argument("--kafka-settings", help="Kafka settings")
631 | cli_parser.add_argument("--topic-prefix", help="Topic prefix")
632 | cli_parser.add_argument("--api-version", help="Kafka API Version")
633 |
634 |
635 | def _sig_handler(app: Application) -> None:
636 | asyncio.create_task(app.stop())
637 |
638 |
639 | async def run_app(app: Application) -> None:
640 | async with app:
641 | loop = asyncio.get_event_loop()
642 | fut = asyncio.create_task(app.consume_forever())
643 | for signame in {"SIGINT", "SIGTERM"}:
644 | loop.add_signal_handler(getattr(signal, signame), partial(_sig_handler, app))
645 | done, pending = await fut
646 | logger.debug("Exiting consumer")
647 |
648 | await app.stop()
649 | # re-raise any errors so we can validate during tests
650 | for task in done:
651 | exc = task.exception()
652 | if exc is not None:
653 | raise exc
654 |
655 |
656 | def run(app: Optional[Application] = None) -> None:
657 | if app is None:
658 | opts = cli_parser.parse_args()
659 | module_str, attr = opts.app.split(":")
660 | module = resolve_dotted_name(module_str)
661 | app = getattr(module, attr)
662 |
663 | if callable(app):
664 | app = app()
665 |
666 | app = cast(Application, app)
667 |
668 | if opts.kafka_servers:
669 | app.configure(kafka_servers=opts.kafka_servers.split(","))
670 | if opts.kafka_settings:
671 | app.configure(kafka_settings=orjson.loads(opts.kafka_settings))
672 | if opts.topic_prefix:
673 | app.configure(topic_prefix=opts.topic_prefix)
674 | if opts.api_version:
675 | app.configure(api_version=opts.api_version)
676 |
677 | try:
678 | asyncio.run(run_app(app))
679 | except asyncio.CancelledError: # pragma: no cover
680 | logger.debug("Closing because task was exited")
681 |
--------------------------------------------------------------------------------
/kafkaesk/consumer.py:
--------------------------------------------------------------------------------
1 | from .exceptions import ConsumerUnhealthyException
2 | from .exceptions import HandlerTaskCancelled
3 | from .exceptions import StopConsumer
4 | from .exceptions import UnhandledMessage
5 | from .metrics import CONSUMED_MESSAGE_TIME
6 | from .metrics import CONSUMED_MESSAGES
7 | from .metrics import CONSUMED_MESSAGES_BATCH_SIZE
8 | from .metrics import CONSUMER_HEALTH
9 | from .metrics import CONSUMER_REBALANCED
10 | from .metrics import CONSUMER_TOPIC_OFFSET
11 | from .metrics import MESSAGE_LEAD_TIME
12 | from .metrics import NOERROR
13 | from kafka.structs import TopicPartition
14 |
15 | import aiokafka
16 | import asyncio
17 | import fnmatch
18 | import functools
19 | import inspect
20 | import logging
21 | import opentracing
22 | import orjson
23 | import pydantic
24 | import time
25 | import typing
26 |
27 | if typing.TYPE_CHECKING: # pragma: no cover
28 | from .app import Application
29 | else:
30 | Application = None
31 |
32 |
33 | logger = logging.getLogger(__name__)
34 |
35 |
36 | class Subscription:
37 | def __init__(
38 | self,
39 | consumer_id: str,
40 | func: typing.Callable,
41 | group: str,
42 | *,
43 | pattern: typing.Optional[str] = None,
44 | topics: typing.Optional[typing.List[str]] = None,
45 | timeout_seconds: float = 0.0,
46 | concurrency: int = None,
47 | ):
48 | self.consumer_id = consumer_id
49 | self.pattern = pattern
50 | self.topics = topics
51 | self.func = func
52 | self.group = group
53 | self.timeout = timeout_seconds
54 | self.concurrency = concurrency
55 |
56 | def __repr__(self) -> str:
57 | return f""
58 |
59 |
60 | def _pydantic_msg_handler(
61 | model: typing.Type[pydantic.BaseModel], record: aiokafka.ConsumerRecord
62 | ) -> pydantic.BaseModel:
63 | try:
64 | data: typing.Dict[str, typing.Any] = orjson.loads(record.value)
65 | return model.parse_obj(data["data"])
66 | except orjson.JSONDecodeError:
67 | # log the execption so we can see what fields failed
68 | logger.warning(f"Payload is not valid json: {record}", exc_info=True)
69 | raise UnhandledMessage("Error deserializing json")
70 | except pydantic.ValidationError:
71 | # log the execption so we can see what fields failed
72 | logger.warning(f"Error parsing pydantic model:{model} {record}", exc_info=True)
73 | raise UnhandledMessage(f"Error parsing data: {model}")
74 | except Exception:
75 | # Catch all
76 | logger.warning(f"Error parsing payload: {model} {record}", exc_info=True)
77 | raise UnhandledMessage("Error parsing payload")
78 |
79 |
80 | def _raw_msg_handler(record: aiokafka.structs.ConsumerRecord) -> typing.Dict[str, typing.Any]:
81 | data: typing.Dict[str, typing.Any] = orjson.loads(record.value)
82 | return data
83 |
84 |
85 | def _bytes_msg_handler(record: aiokafka.structs.ConsumerRecord) -> bytes:
86 | return record.value
87 |
88 |
89 | def _record_msg_handler(record: aiokafka.structs.ConsumerRecord) -> aiokafka.structs.ConsumerRecord:
90 | return record
91 |
92 |
93 | def build_handler(
94 | coro: typing.Callable, app: "Application", consumer: "BatchConsumer"
95 | ) -> typing.Callable:
96 | """Introspection on the coroutine signature to inject dependencies"""
97 | sig = inspect.signature(coro)
98 | param_name = [k for k in sig.parameters.keys()][0]
99 | annotation = sig.parameters[param_name].annotation
100 | handler = _raw_msg_handler
101 | if annotation and annotation != sig.empty:
102 | if annotation == bytes:
103 | handler = _bytes_msg_handler # type: ignore
104 | elif annotation == aiokafka.ConsumerRecord:
105 | handler = _record_msg_handler # type: ignore
106 | else:
107 | handler = functools.partial(_pydantic_msg_handler, annotation) # type: ignore
108 |
109 | it = iter(sig.parameters.items())
110 | # first argument is required and its the payload
111 | next(it)
112 | kwargs: typing.Dict[str, typing.Any] = getattr(coro, "__extra_kwargs__", {})
113 |
114 | for key, param in it:
115 | if key == "schema":
116 | kwargs["schema"] = None
117 | elif key == "record":
118 | kwargs["record"] = None
119 | elif key == "app":
120 | kwargs["app"] = app
121 | elif key == "subscriber":
122 | kwargs["subscriber"] = consumer
123 | elif issubclass(param.annotation, opentracing.Span):
124 | kwargs[key] = opentracing.Span
125 |
126 | async def inner(record: aiokafka.ConsumerRecord, span: opentracing.Span) -> None:
127 | data = handler(record)
128 | deps = kwargs.copy()
129 |
130 | for key, param in kwargs.items():
131 | if key == "schema":
132 | msg = orjson.loads(record.value)
133 | deps["schema"] = msg["schema"]
134 | elif key == "record":
135 | deps["record"] = record
136 | elif param == opentracing.Span:
137 | deps[key] = span
138 |
139 | await coro(data, **deps)
140 |
141 | return inner
142 |
143 |
144 | class BatchConsumer(aiokafka.ConsumerRebalanceListener):
145 | _subscription: Subscription
146 | _close: typing.Optional[asyncio.Future] = None
147 | _consumer: aiokafka.AIOKafkaConsumer
148 | _offsets: typing.Dict[aiokafka.TopicPartition, int]
149 | _message_handler: typing.Callable
150 | _initialized: bool
151 | _running: bool = False
152 |
153 | def __init__(
154 | self,
155 | subscription: Subscription,
156 | app: "Application",
157 | event_handlers: typing.Optional[typing.Dict[str, typing.List[typing.Callable]]] = None,
158 | auto_commit: bool = True,
159 | ):
160 | self._initialized = False
161 | self.stream_id = subscription.consumer_id
162 | self.group_id = subscription.group
163 | self._coro = subscription.func
164 | self._event_handlers = event_handlers or {}
165 | self._concurrency = subscription.concurrency or 1
166 | self._timeout = subscription.timeout
167 | self._subscription = subscription
168 | self._close = None
169 | self._app = app
170 | self._last_commit = 0.0
171 | self._auto_commit = auto_commit
172 | self._tp: typing.Dict[aiokafka.TopicPartition, int] = {}
173 |
174 | # We accept either pattern or a list of topics, also we might accept a single topic
175 | # to keep compatibility with older API
176 | self.pattern = subscription.pattern
177 | self.topics = subscription.topics
178 |
179 | async def __call__(self) -> None:
180 | if not self._initialized:
181 | await self.initialize()
182 |
183 | try:
184 | while not self._close:
185 | try:
186 | if not self._consumer.assignment():
187 | await asyncio.sleep(2)
188 | continue
189 | await self._consume()
190 | except aiokafka.errors.KafkaConnectionError:
191 | # We retry
192 | self._health_metric(False)
193 | logger.info(f"Consumer {self} kafka connection error, retrying...")
194 | await asyncio.sleep(0.5)
195 | except asyncio.CancelledError:
196 | self._health_metric(False)
197 | except StopConsumer:
198 | self._health_metric(False)
199 | logger.info(f"Consumer {self} stopped, exiting")
200 | except BaseException as exc:
201 | logger.exception(f"Consumer {self} failed. Finalizing.", exc_info=exc)
202 | self._health_metric(False)
203 | raise
204 | finally:
205 | await self.finalize()
206 |
207 | def _health_metric(self, healthy: bool) -> None:
208 | CONSUMER_HEALTH.labels(
209 | group_id=self.group_id,
210 | ).set(healthy)
211 |
212 | async def emit(self, name: str, *args: typing.Any, **kwargs: typing.Any) -> None:
213 | for func in self._event_handlers.get(name, []):
214 | try:
215 | await func(*args, **kwargs)
216 | except StopConsumer:
217 | raise
218 | except Exception:
219 | logger.warning(f"Error emitting event: {name}: {func}", exc_info=True)
220 |
221 | async def initialize(self) -> None:
222 | self._close = None
223 | self._running = True
224 | self._processing = asyncio.Lock()
225 | self._consumer = await self._consumer_factory()
226 | await self._consumer.start()
227 | self._message_handler = build_handler(self._coro, self._app, self) # type: ignore
228 | self._initialized = True
229 |
230 | async def finalize(self) -> None:
231 | try:
232 | await self._consumer.stop()
233 | except Exception:
234 | logger.info(f"[{self}] Could not commit on shutdown", exc_info=True)
235 |
236 | self._initialized = False
237 | self._running = False
238 | if self._close:
239 | self._close.set_result("done")
240 |
241 | async def _consumer_factory(self) -> aiokafka.AIOKafkaConsumer:
242 | consumer = self._app.consumer_factory(self.group_id)
243 |
244 | if self.pattern and self.topics:
245 | raise AssertionError(
246 | "Both of the params 'pattern' and 'topics' are not allowed. Select only one mode."
247 | ) # noqa
248 |
249 | if self.pattern:
250 | # This is needed in case we have a prefix
251 | topic_id = self._app.topic_mng.get_topic_id(self.pattern)
252 |
253 | if "*" in self.pattern:
254 | pattern = fnmatch.translate(topic_id)
255 | consumer.subscribe(pattern=pattern, listener=self) # type: ignore
256 | else:
257 | consumer.subscribe(topics=[topic_id], listener=self) # type: ignore
258 | elif self.topics:
259 | topics = [self._app.topic_mng.get_topic_id(topic) for topic in self.topics]
260 | consumer.subscribe(topics=topics, listener=self) # type: ignore
261 | else:
262 | raise ValueError("Either `topics` or `pattern` should be defined")
263 |
264 | return consumer
265 |
266 | async def stop(self) -> None:
267 | if not self._running:
268 | return
269 |
270 | # Exit the loop, this will trigger finalize call
271 | loop = asyncio.get_running_loop()
272 | self._close = loop.create_future()
273 | await asyncio.wait([self._close])
274 |
275 | def __repr__(self) -> str:
276 | return f""
277 |
278 | def _span(self, record: aiokafka.ConsumerRecord) -> opentracing.SpanContext:
279 | tracer = opentracing.tracer
280 | headers = {x[0]: x[1].decode() for x in record.headers or []}
281 | parent = tracer.extract(opentracing.Format.TEXT_MAP, headers)
282 | context = tracer.start_active_span(
283 | record.topic,
284 | tags={
285 | "message_bus.destination": record.topic,
286 | "message_bus.partition": record.partition,
287 | "message_bus.group_id": self.group_id,
288 | },
289 | references=[opentracing.follows_from(parent)],
290 | )
291 | return context.span
292 |
293 | async def _handler(self, record: aiokafka.ConsumerRecord) -> None:
294 | with self._span(record) as span:
295 | await self._message_handler(record, span)
296 |
297 | async def _consume(self) -> None:
298 | batch = await self._consumer.getmany(max_records=self._concurrency, timeout_ms=500)
299 |
300 | async with self._processing:
301 | if not batch:
302 | await self._maybe_commit()
303 | else:
304 | await self._consume_batch(batch)
305 |
306 | async def _consume_batch(
307 | self, batch: typing.Dict[TopicPartition, typing.List[aiokafka.ConsumerRecord]]
308 | ) -> None:
309 | futures: typing.Dict[asyncio.Future[typing.Any], aiokafka.ConsumerRecord] = dict()
310 | for tp, records in batch.items():
311 | for record in records:
312 | coro = self._handler(record)
313 | fut = asyncio.create_task(coro)
314 | futures[fut] = record
315 |
316 | # TODO: this metric is kept for backwards-compatibility, but should be revisited
317 | with CONSUMED_MESSAGE_TIME.labels(
318 | stream_id=self.stream_id,
319 | partition=next(iter(batch)),
320 | group_id=self.group_id,
321 | ).time():
322 | done, pending = await asyncio.wait(
323 | futures.keys(),
324 | timeout=self._timeout,
325 | return_when=asyncio.FIRST_EXCEPTION,
326 | )
327 |
328 | # Look for failures
329 | for task in done:
330 | record = futures[task]
331 | tp = aiokafka.TopicPartition(record.topic, record.partition)
332 |
333 | # Get the largest offset of the batch
334 | current_max = self._tp.get(tp)
335 | if not current_max:
336 | self._tp[tp] = record.offset + 1
337 | else:
338 | self._tp[tp] = max(record.offset + 1, current_max)
339 |
340 | try:
341 | if exc := task.exception():
342 | self._count_message(record, error=exc.__class__.__name__)
343 | await self.on_handler_failed(exc, record)
344 | else:
345 | self._count_message(record)
346 | except asyncio.InvalidStateError:
347 | # Task didnt finish yet, we shouldnt be here since we are
348 | # iterating the `done` list, so just log something
349 | logger.warning(f"Trying to get exception from unfinished task. Record: {record}")
350 | except asyncio.CancelledError:
351 | # During task execution any exception will be returned in
352 | # the `done` list. But timeout exception should be captured
353 | # independendly, thats why we handle this condition here.
354 | self._count_message(record, error="cancelled")
355 | await self.on_handler_failed(HandlerTaskCancelled(record), record)
356 |
357 | # Process timeout tasks
358 | for task in pending:
359 | record = futures[task]
360 |
361 | try:
362 | # This will raise a `asyncio.CancelledError`, the consumer logic
363 | # is responsible to catch it.
364 | task.cancel()
365 | await task
366 | except asyncio.CancelledError:
367 | # App didnt catch this exception, so we treat it as an unmanaged one.
368 | await self.on_handler_timeout(record)
369 |
370 | self._count_message(record, error="pending")
371 |
372 | for tp, records in batch.items():
373 | CONSUMED_MESSAGES_BATCH_SIZE.labels(
374 | stream_id=tp.topic,
375 | group_id=self.group_id,
376 | partition=tp.partition,
377 | ).observe(len(records))
378 |
379 | for record in sorted(records, key=lambda rec: rec.offset):
380 | lead_time = time.time() - record.timestamp / 1000 # type: ignore
381 | MESSAGE_LEAD_TIME.labels(
382 | stream_id=record.topic,
383 | group_id=self.group_id,
384 | partition=record.partition,
385 | ).observe(lead_time)
386 |
387 | CONSUMER_TOPIC_OFFSET.labels(
388 | stream_id=record.topic,
389 | group_id=self.group_id,
390 | partition=record.partition,
391 | ).set(record.offset)
392 |
393 | # Commit first and then call the event subscribers
394 | await self._maybe_commit()
395 | for _, records in batch.items():
396 | for record in records:
397 | await self.emit("message", record=record)
398 |
399 | def _count_message(self, record: aiokafka.ConsumerRecord, error: str = NOERROR) -> None:
400 | CONSUMED_MESSAGES.labels(
401 | stream_id=record.topic,
402 | error=error,
403 | partition=record.partition,
404 | group_id=self.group_id,
405 | ).inc()
406 |
407 | @property
408 | def consumer(self) -> aiokafka.AIOKafkaConsumer:
409 | return self._consumer
410 |
411 | async def _maybe_commit(self, forced: bool = False) -> None:
412 | if not self._auto_commit:
413 | return
414 |
415 | if not self._consumer.assignment() or not self._tp:
416 | logger.warning("Cannot commit because no partitions are assigned!")
417 | return
418 |
419 | interval = self._app.kafka_settings.get("auto_commit_interval_ms", 5000) / 1000
420 | now = time.time()
421 | if forced or (now > (self._last_commit + interval)):
422 | try:
423 | if self._tp:
424 | await self._consumer.commit(offsets=self._tp)
425 | except aiokafka.errors.CommitFailedError:
426 | logger.warning("Error attempting to commit", exc_info=True)
427 | self._last_commit = now
428 |
429 | async def publish(
430 | self,
431 | stream_id: str,
432 | record: aiokafka.ConsumerRecord,
433 | headers: typing.Optional[typing.List[typing.Tuple[str, bytes]]] = None,
434 | ) -> None:
435 | record_headers = (record.headers or []) + (headers or [])
436 |
437 | fut = await self._app.raw_publish(
438 | stream_id=stream_id, data=record.value, key=record.key, headers=record_headers
439 | )
440 | await fut
441 |
442 | async def healthy(self) -> None:
443 | if not self._running:
444 | self._health_metric(False)
445 | raise ConsumerUnhealthyException(f"Consumer '{self}' is not running")
446 |
447 | if self._consumer is not None and not await self._consumer._client.ready(
448 | self._consumer._coordinator.coordinator_id
449 | ):
450 | self._health_metric(False)
451 | raise ConsumerUnhealthyException(f"Consumer '{self}' is not ready")
452 |
453 | self._health_metric(True)
454 | return
455 |
456 | # Event handlers
457 | async def on_partitions_revoked(self, revoked: typing.List[aiokafka.TopicPartition]) -> None:
458 | if revoked:
459 | # Wait for the current batch to be processed
460 | async with self._processing:
461 | if self._auto_commit:
462 | # And commit before releasing the partitions.
463 | await self._maybe_commit(forced=True)
464 |
465 | for tp in revoked:
466 | # Remove the partition from the dict
467 | self._tp.pop(tp, None)
468 | CONSUMER_REBALANCED.labels(
469 | partition=tp.partition,
470 | group_id=self.group_id,
471 | event="revoked",
472 | ).inc()
473 | logger.info(f"Partitions revoked to {self}: {revoked}")
474 |
475 | async def on_partitions_assigned(self, assigned: typing.List[aiokafka.TopicPartition]) -> None:
476 | if assigned:
477 | logger.info(f"Partitions assigned to {self}: {assigned}")
478 |
479 | for tp in assigned:
480 | position = await self._consumer.position(tp)
481 | self._tp[tp] = position
482 |
483 | CONSUMER_REBALANCED.labels(
484 | partition=tp.partition,
485 | group_id=self.group_id,
486 | event="assigned",
487 | ).inc()
488 |
489 | async def on_handler_timeout(self, record: aiokafka.ConsumerRecord) -> None:
490 | raise HandlerTaskCancelled(record)
491 |
492 | async def on_handler_failed(
493 | self, exception: BaseException, record: aiokafka.ConsumerRecord
494 | ) -> None:
495 | if isinstance(exception, UnhandledMessage):
496 | logger.warning("Unhandled message, ignoring...", exc_info=exception)
497 | else:
498 | raise exception
499 |
--------------------------------------------------------------------------------
/kafkaesk/exceptions.py:
--------------------------------------------------------------------------------
1 | from typing import TYPE_CHECKING
2 |
3 | import aiokafka
4 |
5 | if TYPE_CHECKING: # pragma: no cover
6 | from .app import SchemaRegistration
7 | else:
8 | SchemaRegistration = SubscriptionConsumer = None
9 |
10 |
11 | class JsonSchemaRequiredException(Exception):
12 | ...
13 |
14 |
15 | class SchemaConflictException(Exception):
16 | def __init__(self, existing: SchemaRegistration, new: SchemaRegistration):
17 | self.existing = existing
18 | self.new = new
19 |
20 | def __str__(self) -> str:
21 | return f""""""
25 |
26 |
27 | class UnhandledMessage(Exception):
28 | ...
29 |
30 |
31 | class StopConsumer(Exception):
32 | ...
33 |
34 |
35 | class HandlerTaskCancelled(Exception):
36 | def __init__(self, record: aiokafka.ConsumerRecord):
37 | self.record = record
38 |
39 |
40 | class ConsumerUnhealthyException(Exception):
41 | def __init__(self, reason: str):
42 | self.reason = reason
43 |
44 |
45 | class AutoCommitError(ConsumerUnhealthyException):
46 | ...
47 |
48 |
49 | class ProducerUnhealthyException(Exception):
50 | def __init__(self, producer: aiokafka.AIOKafkaProducer):
51 | self.producer = producer
52 |
53 |
54 | class AppNotConfiguredException(Exception):
55 | ...
56 |
--------------------------------------------------------------------------------
/kafkaesk/ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/onna/kafkaesk/10e88fd921fddff70b8cb973e739e280caa4cac8/kafkaesk/ext/__init__.py
--------------------------------------------------------------------------------
/kafkaesk/ext/logging/__init__.py:
--------------------------------------------------------------------------------
1 | from .handler import PydanticKafkaeskHandler
2 | from .handler import PydanticLogModel
3 | from .handler import PydanticStreamHandler
4 |
5 | __all__ = ("PydanticLogModel", "PydanticKafkaeskHandler", "PydanticStreamHandler")
6 |
--------------------------------------------------------------------------------
/kafkaesk/ext/logging/handler.py:
--------------------------------------------------------------------------------
1 | from .record import PydanticLogRecord
2 | from datetime import datetime
3 | from typing import Any
4 | from typing import Dict
5 | from typing import IO
6 | from typing import Optional
7 |
8 | import asyncio
9 | import kafkaesk
10 | import logging
11 | import os
12 | import pydantic
13 | import socket
14 | import sys
15 | import time
16 |
17 | NAMESPACE_FILEPATH = "/var/run/secrets/kubernetes.io/serviceaccount/namespace"
18 | _not_set = object()
19 | _K8S_NS = _not_set
20 |
21 |
22 | def get_k8s_ns() -> Optional[str]:
23 | global _K8S_NS
24 | if _K8S_NS == _not_set:
25 | if os.path.exists(NAMESPACE_FILEPATH):
26 | with open(NAMESPACE_FILEPATH) as fi:
27 | _K8S_NS = fi.read().strip()
28 | else:
29 | _K8S_NS = None
30 | return _K8S_NS # type: ignore
31 |
32 |
33 | class InvalidLogFormat(Exception):
34 | ...
35 |
36 |
37 | class PydanticLogModel(pydantic.BaseModel):
38 | class Config:
39 | extra = pydantic.Extra.allow
40 |
41 |
42 | class PydanticStreamHandler(logging.StreamHandler):
43 | def __init__(self, stream: Optional[IO[str]] = None):
44 | super().__init__(stream=stream)
45 |
46 | def format(self, record: PydanticLogRecord) -> str: # type: ignore
47 | message = super().format(record)
48 |
49 | for log in getattr(record, "pydantic_data", []):
50 | # log some attributes
51 | formatted_data = []
52 | size = 0
53 | for field_name in log.__fields__.keys():
54 | val = getattr(log, field_name)
55 | formatted = f"{field_name}={val}"
56 | size += len(formatted)
57 | formatted_data.append(formatted)
58 |
59 | if size > 256:
60 | break
61 | message += f": {', '.join(formatted_data)}"
62 | break
63 |
64 | return message
65 |
66 |
67 | class KafkaeskQueue:
68 | def __init__(
69 | self,
70 | app: kafkaesk.app.Application,
71 | max_queue: int = 10000,
72 | ):
73 | self._queue: Optional[asyncio.Queue] = None
74 | self._queue_size = max_queue
75 |
76 | self._app = app
77 |
78 | self._app.on("finalize", self.flush)
79 |
80 | self._task: Optional[asyncio.Task] = None
81 |
82 | def start(self) -> None:
83 | if self._queue is None:
84 | self._queue = asyncio.Queue(maxsize=self._queue_size)
85 |
86 | if self._task is None or self._task.done():
87 | self._task = asyncio.create_task(self._run())
88 |
89 | def close(self) -> None:
90 | if self._task is not None and not self._task._loop.is_closed():
91 | if not self._task.done() and not self._task.cancelled():
92 | self._task.cancel()
93 |
94 | @property
95 | def running(self) -> bool:
96 | if self._task is None:
97 | return False
98 |
99 | if self._task.done():
100 | return False
101 |
102 | return True
103 |
104 | async def _run(self) -> None:
105 | if self._queue is None:
106 | raise RuntimeError("Queue must be started before workers")
107 |
108 | while True:
109 | try:
110 | stream, log_data = await asyncio.wait_for(asyncio.create_task(self._queue.get()), 1)
111 | await self._publish(stream, log_data)
112 |
113 | except asyncio.TimeoutError:
114 | continue
115 |
116 | except asyncio.CancelledError:
117 | await self.flush()
118 | return
119 |
120 | async def flush(self) -> None:
121 | if self._queue is not None:
122 | while not self._queue.empty():
123 | stream, message = await self._queue.get()
124 | await self._publish(stream, message)
125 |
126 | async def _publish(self, stream: str, log_data: PydanticLogModel) -> None:
127 | if not self._app._initialized:
128 | await self._app.initialize()
129 |
130 | await self._app.publish(stream, log_data)
131 | # TODO: Handle other Kafak errors that may be raised
132 |
133 | def put_nowait(self, stream: str, log_data: PydanticLogModel) -> None:
134 | if self._queue is not None:
135 | self._queue.put_nowait((stream, log_data))
136 |
137 |
138 | _formatter = logging.Formatter()
139 |
140 |
141 | class PydanticKafkaeskHandler(logging.Handler):
142 | def __init__(
143 | self, app: kafkaesk.Application, stream: str, queue: Optional[KafkaeskQueue] = None
144 | ):
145 | self.app = app
146 | self.stream = stream
147 |
148 | if queue is None:
149 | self._queue = KafkaeskQueue(self.app)
150 | else:
151 | self._queue = queue
152 |
153 | self._last_warning_sent = 0.0
154 |
155 | self._initialize_model()
156 |
157 | super().__init__()
158 |
159 | def clone(self) -> "PydanticKafkaeskHandler":
160 | return PydanticKafkaeskHandler(self.app, self.stream, queue=self._queue)
161 |
162 | def _initialize_model(self) -> None:
163 | try:
164 | self.app.schema("PydanticLogModel")(PydanticLogModel)
165 | except kafkaesk.app.SchemaConflictException:
166 | pass
167 |
168 | def _format_base_log(self, record: PydanticLogRecord) -> Dict[str, Any]:
169 | if record.exc_text is None and record.exc_info:
170 | record.exc_text = _formatter.formatException(record.exc_info)
171 | try:
172 | record.exc_type = record.exc_info[0].__name__ # type: ignore
173 | except (AttributeError, IndexError): # pragma: no cover
174 | ...
175 |
176 | if record.stack_info:
177 | record.stack_text = _formatter.formatStack(record.stack_info)
178 |
179 | service_name = "unknown"
180 | hostname = socket.gethostname()
181 | dashes = hostname.count("-")
182 | if dashes > 0:
183 | # detect kubernetes service host
184 | service_name = "-".join(hostname.split("-")[: -min(dashes, 2)])
185 |
186 | return {
187 | "timestamp": datetime.utcnow().isoformat(),
188 | "logger": record.name,
189 | "severity": record.levelname,
190 | "level": record.levelno,
191 | "message": record.getMessage(),
192 | "exception": record.exc_type,
193 | "trace": record.stack_text,
194 | "stack": record.exc_text,
195 | "hostname": hostname,
196 | "service": service_name,
197 | "namespace": get_k8s_ns(),
198 | "cluster": os.environ.get("CLUSTER"),
199 | }
200 |
201 | def _format_extra_logs(self, record: PydanticLogRecord) -> Dict[str, Any]:
202 | extra_logs: Dict[str, Any] = {}
203 |
204 | for log in getattr(record, "pydantic_data", []):
205 | extra_logs.update(
206 | log.dict(
207 | exclude_none=True,
208 | exclude={
209 | "_is_log_model",
210 | },
211 | )
212 | )
213 |
214 | return extra_logs
215 |
216 | def emit(self, record: PydanticLogRecord) -> None: # type: ignore
217 | if not self._queue.running:
218 | try:
219 | self._queue.start()
220 | except RuntimeError:
221 | sys.stderr.write("RuntimeError starting kafka logging, ignoring")
222 | return
223 |
224 | try:
225 | raw_data = self._format_base_log(record)
226 | raw_data.update(self._format_extra_logs(record))
227 | log_data = PydanticLogModel(**raw_data)
228 | self._queue.put_nowait(self.stream, log_data)
229 | except InvalidLogFormat: # pragma: no cover
230 | sys.stderr.write("PydanticKafkaeskHandler recieved non-pydantic model")
231 | except RuntimeError:
232 | sys.stderr.write("Queue No event loop running to send log to Kafka\n")
233 | except asyncio.QueueFull:
234 | if time.time() - self._last_warning_sent > 30:
235 | sys.stderr.write("Queue hit max log queue size, discarding message\n")
236 | self._last_warning_sent = time.time()
237 | except AttributeError: # pragma: no cover
238 | sys.stderr.write("Queue Error sending Kafkaesk log message\n")
239 |
240 | def close(self) -> None:
241 | self.acquire()
242 | try:
243 | super().close()
244 | if self._queue is not None:
245 | self._queue.close()
246 | finally:
247 | self.release()
248 |
--------------------------------------------------------------------------------
/kafkaesk/ext/logging/record.py:
--------------------------------------------------------------------------------
1 | from types import TracebackType
2 | from typing import List
3 | from typing import Optional
4 | from typing import Tuple
5 | from typing import Union
6 |
7 | import logging
8 | import pydantic
9 |
10 |
11 | class PydanticLogRecord(logging.LogRecord):
12 | def __init__(
13 | self,
14 | name: str,
15 | level: int,
16 | fn: str,
17 | lno: int,
18 | msg: str,
19 | args: Tuple,
20 | exc_info: Union[
21 | Tuple[type, BaseException, Optional[TracebackType]], Tuple[None, None, None], None
22 | ],
23 | func: Optional[str] = None,
24 | sinfo: Optional[str] = None,
25 | pydantic_data: Optional[List[pydantic.BaseModel]] = None,
26 | ):
27 | super().__init__(name, level, fn, lno, msg, args, exc_info, func, sinfo)
28 |
29 | self.pydantic_data = pydantic_data or []
30 | self.exc_type: Optional[str] = None
31 | self.stack_text: Optional[str] = None
32 |
33 |
34 | def factory(
35 | name: str,
36 | level: int,
37 | fn: str,
38 | lno: int,
39 | msg: str,
40 | args: Tuple,
41 | exc_info: Union[
42 | Tuple[type, BaseException, Optional[TracebackType]], Tuple[None, None, None], None
43 | ],
44 | func: Optional[str] = None,
45 | sinfo: Optional[str] = None,
46 | ) -> PydanticLogRecord:
47 | pydantic_data: List[pydantic.BaseModel] = []
48 |
49 | new_args = []
50 | for arg in args:
51 | if isinstance(arg, pydantic.BaseModel):
52 | if hasattr(arg, "_is_log_model") and getattr(arg, "_is_log_model", False) is True:
53 | pydantic_data.append(arg)
54 | continue
55 | new_args.append(arg)
56 |
57 | args = tuple(new_args)
58 |
59 | record = PydanticLogRecord(
60 | name, level, fn, lno, msg, args, exc_info, func, sinfo, pydantic_data
61 | )
62 |
63 | return record
64 |
65 |
66 | if logging.getLogRecordFactory() != factory:
67 | logging.setLogRecordFactory(factory)
68 |
--------------------------------------------------------------------------------
/kafkaesk/kafka.py:
--------------------------------------------------------------------------------
1 | from .metrics import watch_kafka
2 | from aiokafka import TopicPartition
3 | from kafkaesk.utils import run_async
4 | from typing import Any
5 | from typing import Dict
6 | from typing import List
7 | from typing import Optional
8 | from typing import Tuple
9 |
10 | import kafka
11 | import kafka.admin
12 | import kafka.admin.client
13 | import kafka.errors
14 | import kafka.structs
15 |
16 |
17 | class KafkaTopicManager:
18 | _admin_client: Optional[kafka.admin.client.KafkaAdminClient] = None
19 | _client: Optional[kafka.KafkaClient] = None
20 | _kafka_api_version: Optional[Tuple[int, ...]] = None
21 |
22 | def __init__(
23 | self,
24 | bootstrap_servers: List[str],
25 | prefix: str = "",
26 | replication_factor: Optional[int] = None,
27 | kafka_api_version: str = "auto",
28 | ssl_context: Optional[Any] = None,
29 | security_protocol: Optional[str] = "PLAINTEXT",
30 | sasl_mechanism: Optional[str] = "",
31 | sasl_plain_username: Optional[str] = "",
32 | sasl_plain_password: Optional[str] = "",
33 | ):
34 | self.prefix = prefix
35 | self._bootstrap_servers = bootstrap_servers
36 | self._admin_client = self._client = None
37 | self._topic_cache: List[str] = []
38 | self._replication_factor: int = replication_factor or min(3, len(self._bootstrap_servers))
39 | if kafka_api_version == "auto":
40 | self._kafka_api_version = None
41 | else:
42 | self._kafka_api_version = tuple([int(v) for v in kafka_api_version.split(".")])
43 | self.ssl_context = ssl_context
44 | self.security_protocol = security_protocol
45 | self.sasl_mechanism = sasl_mechanism
46 | self.sasl_plain_username = sasl_plain_username
47 | self.sasl_plain_password = sasl_plain_password
48 |
49 | @property
50 | def kafka_api_version(self) -> Optional[Tuple[int, ...]]:
51 | return self._kafka_api_version
52 |
53 | async def finalize(self) -> None:
54 | if self._admin_client is not None:
55 | await run_async(self._admin_client.close)
56 | self._admin_client = None
57 | if self._client is not None:
58 | await run_async(self._client.close)
59 | self._client = None
60 |
61 | def get_topic_id(self, topic: str) -> str:
62 | return f"{self.prefix}{topic}"
63 |
64 | async def get_admin_client(self) -> kafka.admin.client.KafkaAdminClient:
65 | if self._admin_client is None:
66 | with watch_kafka("sync_admin_connect"):
67 | self._admin_client = await run_async(
68 | kafka.admin.client.KafkaAdminClient,
69 | bootstrap_servers=self._bootstrap_servers,
70 | api_version=self._kafka_api_version,
71 | ssl_context=self.ssl_context,
72 | security_protocol=self.security_protocol,
73 | sasl_mechanism=self.sasl_mechanism,
74 | sasl_plain_username=self.sasl_plain_username,
75 | sasl_plain_password=self.sasl_plain_password,
76 | )
77 | return self._admin_client
78 |
79 | async def list_consumer_group_offsets(
80 | self, group_id: str, partitions: Optional[List[TopicPartition]] = None
81 | ) -> Dict[kafka.structs.TopicPartition, kafka.structs.OffsetAndMetadata]:
82 | client = await self.get_admin_client()
83 | return await run_async(client.list_consumer_group_offsets, group_id, partitions=partitions)
84 |
85 | async def topic_exists(self, topic: str) -> bool:
86 | if self._client is None:
87 | with watch_kafka("sync_consumer_connect"):
88 | self._client = await run_async(
89 | kafka.KafkaConsumer,
90 | bootstrap_servers=self._bootstrap_servers,
91 | enable_auto_commit=False,
92 | api_version=self._kafka_api_version,
93 | ssl_context=self.ssl_context,
94 | security_protocol=self.security_protocol,
95 | sasl_mechanism=self.sasl_mechanism,
96 | sasl_plain_username=self.sasl_plain_username,
97 | sasl_plain_password=self.sasl_plain_password,
98 | )
99 | if topic in self._topic_cache:
100 | return True
101 | with watch_kafka("sync_topics"):
102 | if topic in await run_async(self._client.topics):
103 | self._topic_cache.append(topic)
104 | return True
105 | return False
106 |
107 | async def create_topic(
108 | self,
109 | topic: str,
110 | *,
111 | partitions: int = 7,
112 | replication_factor: Optional[int] = None,
113 | retention_ms: Optional[int] = None,
114 | ) -> None:
115 | topic_configs: Dict[str, Any] = {}
116 | if retention_ms is not None:
117 | topic_configs["retention.ms"] = retention_ms
118 | new_topic = kafka.admin.NewTopic(
119 | topic,
120 | partitions,
121 | replication_factor or self._replication_factor,
122 | topic_configs=topic_configs,
123 | )
124 | client = await self.get_admin_client()
125 | try:
126 | with watch_kafka("sync_create_topics"):
127 | await run_async(client.create_topics, [new_topic])
128 | except kafka.errors.TopicAlreadyExistsError:
129 | pass
130 | self._topic_cache.append(topic)
131 | return None
132 |
--------------------------------------------------------------------------------
/kafkaesk/metrics.py:
--------------------------------------------------------------------------------
1 | from prometheus_client.utils import INF
2 | from typing import Dict
3 | from typing import Optional
4 | from typing import Type
5 |
6 | import prometheus_client as client
7 | import time
8 | import traceback
9 |
10 | NOERROR = "none"
11 | ERROR_GENERAL_EXCEPTION = "exception"
12 |
13 | KAFKA_ACTION = client.Counter(
14 | "kafkaesk_kafka_action",
15 | "Perform action on kafka",
16 | ["type", "error"],
17 | )
18 |
19 | KAFKA_ACTION_TIME = client.Histogram(
20 | "kafkaesk_kafka_action_time",
21 | "Time taken to perform kafka action",
22 | ["type"],
23 | )
24 |
25 | PUBLISH_MESSAGES = client.Counter(
26 | "kafkaesk_publish_messages",
27 | "Number of messages attempted to be published",
28 | ["stream_id", "error"],
29 | )
30 |
31 | PUBLISH_MESSAGES_TIME = client.Histogram(
32 | "kafkaesk_publish_messages_time",
33 | "Time taken for a message to be queued for publishing (in seconds)",
34 | ["stream_id"],
35 | )
36 |
37 | PUBLISHED_MESSAGES = client.Counter(
38 | "kafkaesk_published_messages",
39 | "Number of published messages",
40 | ["stream_id", "partition", "error"],
41 | )
42 |
43 | PUBLISHED_MESSAGES_TIME = client.Histogram(
44 | "kafkaesk_published_messages_time",
45 | "Time taken for a message to be published (in seconds)",
46 | ["stream_id"],
47 | )
48 |
49 |
50 | CONSUMED_MESSAGES = client.Counter(
51 | "kafkaesk_consumed_messages",
52 | "Number of consumed messages",
53 | ["stream_id", "partition", "error", "group_id"],
54 | )
55 |
56 | CONSUMED_MESSAGES_BATCH_SIZE = client.Histogram(
57 | "kafkaesk_consumed_messages_batch_size",
58 | "Size of message batches consumed",
59 | ["stream_id", "group_id", "partition"],
60 | buckets=[1, 5, 10, 20, 50, 100, 200, 500, 1000],
61 | )
62 |
63 | CONSUMED_MESSAGE_TIME = client.Histogram(
64 | "kafkaesk_consumed_message_elapsed_time",
65 | "Processing time for consumed message (in seconds)",
66 | ["stream_id", "group_id", "partition"],
67 | )
68 |
69 | PRODUCER_TOPIC_OFFSET = client.Gauge(
70 | "kafkaesk_produced_topic_offset",
71 | "Offset for produced messages a the topic",
72 | ["stream_id", "partition"],
73 | )
74 |
75 | CONSUMER_TOPIC_OFFSET = client.Gauge(
76 | "kafkaesk_consumed_topic_offset",
77 | "Offset for consumed messages in a topic",
78 | ["group_id", "partition", "stream_id"],
79 | )
80 |
81 | MESSAGE_LEAD_TIME = client.Histogram(
82 | "kafkaesk_message_lead_time",
83 | "Time that the message has been waiting to be handled by a consumer (in seconds)",
84 | ["stream_id", "group_id", "partition"],
85 | buckets=(0.1, 0.5, 1, 3, 5, 10, 30, 60, 60, 120, 300, INF),
86 | )
87 |
88 | CONSUMER_REBALANCED = client.Counter(
89 | "kafkaesk_consumer_rebalanced",
90 | "Consumer rebalances",
91 | ["group_id", "partition", "event"],
92 | )
93 |
94 | CONSUMER_HEALTH = client.Gauge(
95 | "kafkaesk_consumer_health", "Liveness probe for the consumer", ["group_id"]
96 | )
97 |
98 |
99 | class watch:
100 | start: float
101 |
102 | def __init__(
103 | self,
104 | *,
105 | counter: Optional[client.Counter] = None,
106 | histogram: Optional[client.Histogram] = None,
107 | labels: Optional[Dict[str, str]] = None,
108 | ):
109 | self.counter = counter
110 | self.histogram = histogram
111 | self.labels = labels or {}
112 |
113 | def __enter__(self) -> None:
114 | self.start = time.time()
115 |
116 | def __exit__(
117 | self,
118 | exc_type: Optional[Type[Exception]] = None,
119 | exc_value: Optional[Exception] = None,
120 | exc_traceback: Optional[traceback.StackSummary] = None,
121 | ) -> None:
122 | error = NOERROR
123 | if self.histogram is not None:
124 | finished = time.time()
125 | self.histogram.labels(**self.labels).observe(finished - self.start)
126 |
127 | if self.counter is not None:
128 | if exc_value is None:
129 | error = NOERROR
130 | else:
131 | error = ERROR_GENERAL_EXCEPTION
132 | self.counter.labels(error=error, **self.labels).inc()
133 |
134 |
135 | class watch_kafka(watch):
136 | def __init__(self, type: str):
137 | super().__init__(counter=KAFKA_ACTION, histogram=KAFKA_ACTION_TIME, labels={"type": type})
138 |
139 |
140 | class watch_publish(watch):
141 | def __init__(self, stream_id: str):
142 | super().__init__(
143 | counter=PUBLISH_MESSAGES,
144 | histogram=PUBLISH_MESSAGES_TIME,
145 | labels={"stream_id": stream_id},
146 | )
147 |
--------------------------------------------------------------------------------
/kafkaesk/publish.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/onna/kafkaesk/10e88fd921fddff70b8cb973e739e280caa4cac8/kafkaesk/publish.py
--------------------------------------------------------------------------------
/kafkaesk/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/onna/kafkaesk/10e88fd921fddff70b8cb973e739e280caa4cac8/kafkaesk/py.typed
--------------------------------------------------------------------------------
/kafkaesk/utils.py:
--------------------------------------------------------------------------------
1 | from concurrent.futures.thread import ThreadPoolExecutor
2 | from functools import partial
3 | from typing import Any
4 | from typing import Callable
5 |
6 | import asyncio
7 |
8 | executor = ThreadPoolExecutor(max_workers=30)
9 |
10 |
11 | async def run_async(func: Callable[..., Any], *args: Any, **kwargs: Any) -> Any:
12 | func_to_run = partial(func, *args, **kwargs)
13 | loop = asyncio.get_event_loop()
14 | return await loop.run_in_executor(executor, func_to_run)
15 |
16 |
17 | def resolve_dotted_name(name: str) -> Any:
18 | """
19 | import the provided dotted name
20 | >>> resolve_dotted_name('foo.bar')
21 |