├── .bandit
├── .flake8
├── .github
    └── workflows
    │   ├── ci.yml
    │   └── upload-pypi.yml
├── .gitignore
├── .isort.cfg
├── .pre-commit-config.yaml
├── LICENSE
├── README.md
├── docker-compose.yml
├── examples
    ├── logger.py
    ├── parallel.py
    └── simple.py
├── kafkaesk
    ├── __init__.py
    ├── app.py
    ├── consumer.py
    ├── exceptions.py
    ├── ext
    │   ├── __init__.py
    │   └── logging
    │   │   ├── __init__.py
    │   │   ├── handler.py
    │   │   └── record.py
    ├── kafka.py
    ├── metrics.py
    ├── publish.py
    ├── py.typed
    └── utils.py
├── mypy.ini
├── poetry.lock
├── pyproject.toml
├── pytest.ini
├── stubs
    ├── aiokafka
    │   ├── __init__.py
    │   ├── errors.py
    │   └── structs.py
    └── kafka
    │   ├── __init__.py
    │   ├── admin
    │       ├── __init__.py
    │       └── client.py
    │   ├── errors.py
    │   └── structs.py
└── tests
    ├── __init__.py
    ├── acceptance
        ├── __init__.py
        ├── ext
        │   ├── __init__.py
        │   └── logging
        │   │   ├── __init__.py
        │   │   ├── test_handler.py
        │   │   └── test_record.py
        ├── produce.py
        ├── test_healthcheck.py
        ├── test_pubsub.py
        ├── test_rebalance.py
        └── test_run.py
    ├── conftest.py
    ├── fixtures.py
    ├── unit
        ├── __init__.py
        ├── ext
        │   ├── __init__.py
        │   └── logging
        │   │   ├── __init__.py
        │   │   └── test_handler.py
        ├── test_app.py
        ├── test_consumer.py
        ├── test_exceptions.py
        ├── test_kafka.py
        ├── test_metrics.py
        └── test_schema.py
    └── utils.py


/.bandit:
--------------------------------------------------------------------------------
1 | [bandit]
2 | exclude: tests
3 | skips: B101,B110,B112,B303,B311,B303
4 | 


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | no-accept-encodings = True
3 | max-line-length = 100
4 | ignore =
5 |   E203
6 |   W503
7 |   E231
8 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: kafkaesk
 2 | 
 3 | on: [push]
 4 | 
 5 | jobs:
 6 |   # Job to run pre-checks
 7 |   pre-checks:
 8 |     runs-on: ubuntu-latest
 9 |     strategy:
10 |       matrix:
11 |         python-version: [3.8]
12 | 
13 |     steps:
14 |       - name: Checkout the repository
15 |         uses: actions/checkout@v2
16 | 
17 |       - name: Setup Python
18 |         uses: actions/setup-python@v1
19 |         with:
20 |           python-version: ${{ matrix.python-version }}
21 | 
22 |       - name: Install package
23 |         run: |
24 |           pip install poetry
25 |           poetry install
26 |       - name: Run pre-checks
27 |         run: |
28 |           poetry run flake8 kafkaesk --config=.flake8
29 |           poetry run mypy kafkaesk/
30 |           poetry run isort -c -rc kafkaesk/
31 |           poetry run black --check --verbose kafkaesk
32 |   # Job to run tests
33 |   tests:
34 |     runs-on: ubuntu-latest
35 | 
36 |     strategy:
37 |       matrix:
38 |         python-version: [3.8]
39 | 
40 |     steps:
41 |       - name: Checkout the repository
42 |         uses: actions/checkout@v2
43 | 
44 |       - name: Setup Python
45 |         uses: actions/setup-python@v1
46 |         with:
47 |           python-version: ${{ matrix.python-version }}
48 | 
49 |       - name: Start Docker containers for Zookeeper and Kafka
50 |         run: docker-compose up -d
51 | 
52 |       - name: Install the package
53 |         run: |
54 |           pip install poetry
55 |           poetry install
56 |       - name: Run tests
57 |         run: |
58 |           poetry run pytest -rfE --reruns 2 --cov=kafkaesk -s --tb=native -v --cov-report xml --cov-append tests
59 |       - name: Upload coverage to Codecov
60 |         uses: codecov/codecov-action@v1
61 |         with:
62 |           file: ./coverage.xml
63 | 


--------------------------------------------------------------------------------
/.github/workflows/upload-pypi.yml:
--------------------------------------------------------------------------------
 1 | name: Upload package to pypi
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - "master"
 7 | 
 8 | jobs:
 9 |   upload:
10 |     runs-on: ubuntu-latest
11 | 
12 |     steps:
13 |       - name: Checkout the repository
14 |         uses: actions/checkout@v2
15 | 
16 |       - name: Setup Python
17 |         uses: actions/setup-python@v2
18 |         with:
19 |           python-version: "3.8"
20 | 
21 |       - name: Publish package
22 |         run: |
23 |           pip install poetry
24 |           poetry config pypi-token.pypi ${{ secrets.PYPI_TOKEN }}
25 |           poetry publish --build
26 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .vscode/
 2 | .idea/
 3 | # Python Stuff
 4 | **/__pycache__
 5 | *.egg-info
 6 | .mypy_cache/
 7 | .python-version
 8 | dist/
 9 | .venv/
10 | venv/


--------------------------------------------------------------------------------
/.isort.cfg:
--------------------------------------------------------------------------------
1 | [settings]
2 | force_alphabetical_sort = True
3 | force_single_line = True
4 | not_skip = __init__.py
5 | line_length = 110
6 | wrap_length = 100


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/ambv/black
 3 |     rev: stable
 4 |     hooks:
 5 |     - id: black
 6 |       language_version: python3.8
 7 |   - repo: https://github.com/pre-commit/mirrors-isort
 8 |     rev: v4.3.20
 9 |     hooks:
10 |     - id: isort
11 | 
12 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The BSD-2 license
 2 | 
 3 | Copyright (c) 2016, Plone Foundation
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
 7 | 
 8 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
 9 | 
10 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
11 | 
12 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
13 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <!-- PROJECT LOGO -->
  2 | <h1 align="center">
  3 |   <br>
  4 |   <img src="https://onna.com/wp-content/uploads/2020/03/h-onna-solid.png" alt="Onna Logo"></a>
  5 | </h1>
  6 | 
  7 | <h2 align="center">kafkaesk</h2>
  8 | 
  9 | <!-- TABLE OF CONTENTS -->
 10 | ## Table Of Contents
 11 | 
 12 | - [About the Project](#about-the-project)
 13 | - [Publish](#publish)
 14 | - [Subscribe](#subscribe)
 15 | - [Avoiding global object](#avoiding-global-object)
 16 | - [Manual commit](#manual-commit)
 17 | - [kafkaesk contract](#kafkaesk-contract)
 18 | - [Worker](#worker)
 19 | - [Development](#development)
 20 | - [Extensions](#extensions)
 21 | - [Naming](#naming)
 22 | 
 23 | 
 24 | ## About The Project
 25 | 
 26 | This project is meant to help facilitate effortless publishing and subscribing to events with Python and Kafka.
 27 | 
 28 | ### Guiding principal
 29 | 
 30 | - HTTP
 31 | - Language agnostic
 32 | - Contracts built on top of [Kafka](https://kafka.apache.org/)
 33 | 
 34 | 
 35 | ### Alternatives
 36 |  - [aiokafka](https://aiokafka.readthedocs.io/en/stable/): can be complex to scale correctly
 37 |  - [guillotina_kafka](https://github.com/onna/guillotina_kafka): complex, tied to [Guillotina](https://guillotina.readthedocs.io/en/latest/)
 38 |  - [faust](https://faust.readthedocs.io/en/latest/): requires additional data layers, not language agnostic
 39 |  - confluent kafka + avro: close but ends up being like grpc. compilation for languages. No asyncio.
 40 | 
 41 | > Consider this Python project as syntactic sugar around these ideas.
 42 | 
 43 | ## Publish
 44 | 
 45 | Using [pydantic](https://pydantic-docs.helpmanual.io/) but can be done with pure JSON.
 46 | 
 47 | ```python
 48 | import kafkaesk
 49 | from pydantic import BaseModel
 50 | 
 51 | app = kafkaesk.Application()
 52 | 
 53 | @app.schema("Content", version=1, retention=24 * 60 * 60)
 54 | class ContentMessage(BaseModel):
 55 |     foo: str
 56 | 
 57 | 
 58 | async def foobar():
 59 |     # ...
 60 |     # doing something in an async func
 61 |     await app.publish("content.edited.Resource", data=ContentMessage(foo="bar"))
 62 | ```
 63 | 
 64 | A convenience method is available in the `subscriber` dependency instance, this allow to header
 65 | propagation from the consumed message.
 66 | 
 67 | ```python
 68 | import kafkaesk
 69 | from pydantic import BaseModel
 70 | 
 71 | app = kafkaesk.Application()
 72 | 
 73 | @app.schema("Content", version=1, retention=24 * 60 * 60)
 74 | class ContentMessage(BaseModel):
 75 |     foo: str
 76 | 
 77 | 
 78 | @app.subscribe("content.*", "group_id")
 79 | async def get_messages(data: ContentMessage, subscriber):
 80 |     print(f"{data.foo}")
 81 |     # This will propagate `data` record headers
 82 |     await subscriber.publish("content.edited.Resource", data=ContentMessage(foo="bar"))
 83 | 
 84 | ```
 85 | 
 86 | ## Subscribe
 87 | 
 88 | ```python
 89 | import kafkaesk
 90 | from pydantic import BaseModel
 91 | 
 92 | app = kafkaesk.Application()
 93 | 
 94 | @app.schema("Content", version=1, retention=24 * 60 * 60)
 95 | class ContentMessage(BaseModel):
 96 |     foo: str
 97 | 
 98 | 
 99 | @app.subscribe("content.*", "group_id")
100 | async def get_messages(data: ContentMessage):
101 |     print(f"{data.foo}")
102 | 
103 | ```
104 | 
105 | ## Avoiding global object
106 | 
107 | If you do not want to have global application configuration, you can lazily configure
108 | the application and register schemas/subscribers separately.
109 | 
110 | ```python
111 | import kafkaesk
112 | from pydantic import BaseModel
113 | 
114 | router = kafkaesk.Router()
115 | 
116 | @router.schema("Content", version=1, retention=24 * 60 * 60)
117 | class ContentMessage(BaseModel):
118 |     foo: str
119 | 
120 | 
121 | @router.subscribe("content.*", "group_id")
122 | async def get_messages(data: ContentMessage):
123 |     print(f"{data.foo}")
124 | 
125 | 
126 | if __name__ == "__main__":
127 |     app = kafkaesk.Application()
128 |     app.mount(router)
129 |     kafkaesk.run(app)
130 | 
131 | ```
132 | 
133 | Optional consumer injected parameters:
134 | 
135 | - schema: str
136 | - record: aiokafka.structs.ConsumerRecord
137 | - app: kafkaesk.app.Application
138 | - subscriber: kafkaesk.app.BatchConsumer
139 | 
140 | Depending on the type annotation for the first parameter, you will get different data injected:
141 | 
142 | - `async def get_messages(data: ContentMessage)`: parses pydantic schema
143 | - `async def get_messages(data: bytes)`: give raw byte data
144 | - `async def get_messages(record: aiokafka.structs.ConsumerRecord)`: give kafka record object
145 | - `async def get_messages(data)`: raw json data in message
146 | 
147 | ## Manual commit
148 | 
149 | To accomplish a manual commit strategy yourself:
150 | 
151 | ```python
152 | app = kafkaesk.Application(auto_commit=False)
153 | 
154 | @app.subscribe("content.*", "group_id")
155 | async def get_messages(data: ContentMessage, subscriber):
156 |     print(f"{data.foo}")
157 |     await subscriber.consumer.commit()
158 | ```
159 | 
160 | ## SSL
161 | Add these values to your `kafka_settings`:  
162 | - `ssl_context` - this should be a placeholder as the SSL Context is generally created within the application  
163 | - `security_protocol` - one of SSL or PLAINTEXT  
164 | - `sasl_mechanism` - one of PLAIN, GSSAPI, SCRAM-SHA-256, SCRAM-SHA-512, OAUTHBEARER  
165 | - `sasl_plain_username` . 
166 | - `sasl_plain_password` . 
167 | 
168 | ## kafkaesk contract
169 | 
170 | This is a library around using kafka.
171 | Kafka itself does not enforce these concepts.
172 | 
173 | - Every message must provide a json schema
174 | - Messages produced will be validated against json schema
175 | - Each topic will have only one schema
176 | - A single schema can be used for multiple topics
177 | - Consumed message schema validation is up to the consumer
178 | - Messages will be consumed at least once. Considering this, your handling should be idempotent
179 | 
180 | ### Message format
181 | 
182 | ```json
183 | {
184 |     "schema": "schema_name:1",
185 |     "data": { ... }
186 | }
187 | ```
188 | 
189 | ## Worker
190 | 
191 | ```bash
192 | kafkaesk mymodule:app --kafka-servers=localhost:9092
193 | ```
194 | 
195 | Options:
196 | 
197 |  - --kafka-servers: comma separated list of kafka servers
198 |  - --kafka-settings: json encoded options to be passed to https://aiokafka.readthedocs.io/en/stable/api.html#aiokafkaconsumer-class
199 |  - --topic-prefix: prefix to use for topics
200 |  - --replication-factor: what replication factor topics should be created with. Defaults to min(number of servers, 3).
201 | 
202 | ### Application.publish
203 | 
204 | - stream_id: str: name of stream to send data to
205 | - data: class that inherits from pydantic.BaseModel
206 | - key: Optional[bytes]: key for message if it needs one
207 | 
208 | ### Application.subscribe
209 | 
210 | - stream_id: str: fnmatch pattern of streams to subscribe to
211 | - group: Optional[str]: consumer group id to use. Will use name of function if not provided
212 | 
213 | ### Application.schema
214 | 
215 | - id: str: id of the schema to store
216 | - version: Optional[int]: version of schema to store
217 | - streams: Optional[List[str]]: if streams are known ahead of time, you can pre-create them before you push data
218 | - retention: Optional[int]: retention policy in seconds
219 | 
220 | ### Application.configure
221 | 
222 | - kafka_servers: Optional[List[str]]: kafka servers to connect to
223 | - topic_prefix: Optional[str]: topic name prefix to subscribe to
224 | - kafka_settings: Optional[Dict[str, Any]]: additional aiokafka settings to pass in
225 | - replication_factor: Optional[int]: what replication factor topics should be created with. Defaults to min(number of servers, 3).
226 | - kafka_api_version: str: default `auto`
227 | - auto_commit: bool: default `True`
228 | - auto_commit_interval_ms: int: default `5000`
229 | 
230 | ## Development
231 | 
232 | ### Requirements
233 | 
234 | - [Docker](https://www.docker.com/)
235 | - [Poetry](https://python-poetry.org/)
236 | 
237 | ```bash
238 | poetry install
239 | ```
240 | 
241 | Run tests:
242 | 
243 | ```bash
244 | docker-compose up
245 | KAFKA=localhost:9092 poetry run pytest tests
246 | ```
247 | 
248 | ## Extensions
249 | 
250 | ### Logging
251 | This extension includes classes to extend Python's logging framework to publish structured log messages to a Kafka topic.
252 | This extension is made up of three main components: an extended `logging.LogRecord` and some custom `logging.Handler`s.
253 | 
254 | See `logger.py` in examples directory.
255 | 
256 | #### Log Record
257 | `kafkaesk.ext.logging.record.factory` is a function that will return `kafkaesk.ext.logging.record.PydanticLogRecord` objects.
258 | The `factory()` function scans through any `args` passed to a logger and checks each item to determine if it is a subclass of `pydantid.BaseModel`.
259 | 
260 | If it is a base model instance and `model._is_log_model` evaluates to `True` the model will be removed from `args` and added to `record._pydantic_data`.
261 | After that `factory()` will use logging's existing logic to finish creating the log record.
262 | 
263 | ### Handler
264 | This extensions ships with two handlers capable of handling `kafkaesk.ext.logging.handler.PydanticLogModel` classes: `kafakesk.ext.logging.handler.PydanticStreamHandler` and `kafkaesk.ext.logging.handler.PydanticKafkaeskHandler`.
265 | 
266 | The stream handler is a very small wrapper around `logging.StreamHandler`, the signature is the same, the only difference is that the handler will attempt to convert any pydantic models it receives to a human readable log message.
267 | 
268 | The kafkaesk handler has a few more bits going on in the background.
269 | 
270 | The handler has two required inputs, a `kafkaesk.app.Application` instance and a stream name.
271 | 
272 | Once initialized any logs emitted by the handler will be saved into an internal queue.
273 | There is a worker task that handles pulling logs from the queue and writing those logs to the specified topic.
274 | 
275 | # Naming
276 | 
277 | It's hard and "kafka" is already a fun name.
278 | Hopefully this library isn't literally "kafkaesque" for you.
279 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | # just for dev, fun, playing around
 2 | version: "3"
 3 | services:
 4 |   zookeeper:
 5 |     image: bitnami/zookeeper:latest
 6 |     ports:
 7 |       - 2181:2181
 8 |     expose:
 9 |       - 2181
10 |     environment:
11 |       "ALLOW_ANONYMOUS_LOGIN": "yes"
12 |     networks:
13 |       - kafka-network
14 | 
15 |   kafka:
16 |     image: bitnami/kafka:latest
17 |     depends_on:
18 |       - zookeeper
19 |     ports:
20 |       - 9092:9092
21 |     expose:
22 |       - 9092
23 |     links:
24 |       - zookeeper
25 |     environment:
26 |       "ALLOW_PLAINTEXT_LISTENER": "yes"
27 |       "KAFKA_CFG_ZOOKEEPER_CONNECT": "zookeeper:2181"
28 |       "KAFKA_CFG_AUTO_CREATE_TOPICS_ENABLE": "true"
29 |       "KAFKA_CFG_ADVERTISED_LISTENERS": "PLAINTEXT://localhost:9092"
30 |     networks:
31 |       - kafka-network
32 | 
33 | networks:
34 |   kafka-network:
35 |     driver: bridge
36 | 


--------------------------------------------------------------------------------
/examples/logger.py:
--------------------------------------------------------------------------------
 1 | from kafkaesk import Application
 2 | from kafkaesk.ext.logging import PydanticKafkaeskHandler
 3 | from kafkaesk.ext.logging import PydanticLogModel
 4 | from kafkaesk.ext.logging import PydanticStreamHandler
 5 | from pydantic import BaseModel
 6 | from typing import Optional
 7 | 
 8 | import asyncio
 9 | import logging
10 | 
11 | 
12 | class UserLog(BaseModel):
13 |     _is_log_model = True
14 |     user: Optional[str] = None
15 | 
16 | 
17 | async def test_log() -> None:
18 |     app = Application(kafka_servers=["localhost:9092"])
19 | 
20 |     logger = logging.getLogger("kafkaesk.ext.logging.kafka")
21 |     handler = PydanticKafkaeskHandler(app, "logging.test")
22 |     logger.addHandler(handler)
23 |     logger.setLevel(logging.DEBUG)
24 | 
25 |     stream_logger = logging.getLogger("kafakesk.ext.logging.stream")
26 |     stream_handler = PydanticStreamHandler()
27 |     stream_logger.addHandler(stream_handler)
28 |     stream_logger.setLevel(logging.DEBUG)
29 | 
30 |     @app.subscribe("logging.test", group="example.logging.consumer")
31 |     async def consume(data: PydanticLogModel) -> None:
32 |         stream_logger.info(data.json())
33 | 
34 |     async with app:
35 |         logger.debug("Log Message", UserLog(user="kafkaesk"))
36 |         await app.flush()
37 |         await app.consume_for(1, seconds=5)
38 | 
39 | 
40 | if __name__ == "__main__":
41 |     asyncio.run(test_log())
42 | 


--------------------------------------------------------------------------------
/examples/parallel.py:
--------------------------------------------------------------------------------
 1 | from kafkaesk import Application
 2 | from kafkaesk import run_app
 3 | from pydantic import BaseModel
 4 | 
 5 | import asyncio
 6 | import logging
 7 | import random
 8 | 
 9 | 
10 | logging.basicConfig(level=logging.INFO)
11 | 
12 | 
13 | app = Application()
14 | 
15 | 
16 | @app.schema("Foobar", streams=["content.foo", "slow.content.foo", "failed.content.foo"])
17 | class Foobar(BaseModel):
18 |     timeout: int
19 | 
20 | 
21 | async def consumer_logic(data: Foobar, record, subscriber):
22 |     try:
23 |         print(f"{data} -- {record.headers}: waiting {data.timeout}s...")
24 |         await asyncio.sleep(data.timeout)
25 |         print(f"{data}: done...")
26 |     except asyncio.CancelledError:
27 |         # Slow topic
28 |         print(f"{data} timeout message, sending to slow topic...")
29 |         await subscriber.publish(f"slow.{record.topic}", record, headers=[("slow", b"true")])
30 |     except Exception:
31 |         await subscriber.publish(f"failed.{record.topic}", record)
32 | 
33 | 
34 | async def generate_data(app):
35 |     idx = 0
36 |     while True:
37 |         timeout = random.randint(0, 10)
38 |         await app.publish("content.foo", Foobar(timeout=timeout))
39 |         idx += 1
40 |         await asyncio.sleep(0.1)
41 | 
42 | 
43 | async def run():
44 |     app.configure(kafka_servers=["localhost:9092"])
45 |     task = asyncio.create_task(generate_data(app))
46 | 
47 |     # Regular tasks should be consumed in less than 5s
48 |     app.subscribe("content.*", group="example_content_group", concurrency=10, timeout_seconds=5)(
49 |         consumer_logic
50 |     )
51 | 
52 |     # Timeout taks (slow) can be consumed independendly, with different configuration and logic
53 |     app.subscribe(
54 |         "slow.content.*", group="timeout_example_content_group", concurrency=1, timeout_seconds=None
55 |     )(consumer_logic)
56 | 
57 |     await run_app(app)
58 | 
59 | 
60 | if __name__ == "__main__":
61 |     asyncio.run(run())
62 | 


--------------------------------------------------------------------------------
/examples/simple.py:
--------------------------------------------------------------------------------
 1 | from kafkaesk import Application
 2 | from kafkaesk import run_app
 3 | from pydantic import BaseModel
 4 | 
 5 | import asyncio
 6 | import logging
 7 | 
 8 | logging.basicConfig(level=logging.INFO)
 9 | 
10 | 
11 | app = Application()
12 | 
13 | 
14 | @app.schema("Foobar")
15 | class Foobar(BaseModel):
16 |     foo: str
17 |     bar: str
18 | 
19 | 
20 | @app.subscribe("content.*", group="example_content_group")
21 | async def messages(data: Foobar, record):
22 |     await asyncio.sleep(0.1)
23 |     print(f"{data.foo}: {data.bar}: {record}")
24 | 
25 | 
26 | async def generate_data(app):
27 |     idx = 0
28 |     while True:
29 |         await app.publish("content.foo", Foobar(foo=str(idx), bar="yo"))
30 |         idx += 1
31 |         await asyncio.sleep(0.1)
32 | 
33 | 
34 | async def run():
35 |     app.configure(kafka_servers=["localhost:9092"])
36 |     task = asyncio.create_task(generate_data(app))
37 |     await run_app(app)
38 |     # await app.consume_forever()
39 | 
40 | 
41 | if __name__ == "__main__":
42 |     asyncio.run(run())
43 | 


--------------------------------------------------------------------------------
/kafkaesk/__init__.py:
--------------------------------------------------------------------------------
1 | from .app import Application  # noqa
2 | from .app import BatchConsumer  # noqa
3 | from .app import Router  # noqa
4 | from .app import run  # noqa
5 | from .app import run_app  # noqa
6 | from .app import Subscription  # noqa
7 | 


--------------------------------------------------------------------------------
/kafkaesk/app.py:
--------------------------------------------------------------------------------
  1 | from .consumer import BatchConsumer
  2 | from .consumer import Subscription
  3 | from .exceptions import AppNotConfiguredException
  4 | from .exceptions import ProducerUnhealthyException
  5 | from .exceptions import SchemaConflictException
  6 | from .exceptions import StopConsumer
  7 | from .kafka import KafkaTopicManager
  8 | from .metrics import NOERROR
  9 | from .metrics import PRODUCER_TOPIC_OFFSET
 10 | from .metrics import PUBLISHED_MESSAGES
 11 | from .metrics import PUBLISHED_MESSAGES_TIME
 12 | from .metrics import watch_kafka
 13 | from .metrics import watch_publish
 14 | from .utils import resolve_dotted_name
 15 | from asyncio.futures import Future
 16 | from functools import partial
 17 | from opentracing.scope_managers.contextvars import ContextVarsScopeManager
 18 | from pydantic import BaseModel
 19 | from types import TracebackType
 20 | from typing import Any
 21 | from typing import Awaitable
 22 | from typing import Callable
 23 | from typing import cast
 24 | from typing import Dict
 25 | from typing import List
 26 | from typing import Optional
 27 | from typing import Tuple
 28 | from typing import Type
 29 | 
 30 | import aiokafka
 31 | import aiokafka.errors
 32 | import aiokafka.structs
 33 | import argparse
 34 | import asyncio
 35 | import logging
 36 | import opentracing
 37 | import orjson
 38 | import pydantic
 39 | import signal
 40 | import time
 41 | 
 42 | logger = logging.getLogger("kafkaesk")
 43 | 
 44 | 
 45 | class SchemaRegistration:
 46 |     def __init__(
 47 |         self,
 48 |         id: str,
 49 |         version: int,
 50 |         model: Type[pydantic.BaseModel],
 51 |         retention: Optional[int] = None,
 52 |         streams: Optional[List[str]] = None,
 53 |     ):
 54 |         self.id = id
 55 |         self.version = version
 56 |         self.model = model
 57 |         self.retention = retention
 58 |         self.streams = streams
 59 | 
 60 |     def __repr__(self) -> str:
 61 |         return f"<SchemaRegistration {self.id}, version: {self.version} >"
 62 | 
 63 | 
 64 | def published_callback(topic: str, start_time: float, fut: Future) -> None:
 65 |     # Record the metrics
 66 |     finish_time = time.time()
 67 |     exception = fut.exception()
 68 |     if exception:
 69 |         error = str(exception.__class__.__name__)
 70 |         PUBLISHED_MESSAGES.labels(stream_id=topic, partition=-1, error=error).inc()
 71 |     else:
 72 |         metadata = fut.result()
 73 |         PUBLISHED_MESSAGES.labels(
 74 |             stream_id=topic, partition=metadata.partition, error=NOERROR
 75 |         ).inc()
 76 |         PRODUCER_TOPIC_OFFSET.labels(stream_id=topic, partition=metadata.partition).set(
 77 |             metadata.offset
 78 |         )
 79 |         PUBLISHED_MESSAGES_TIME.labels(stream_id=topic).observe(finish_time - start_time)
 80 | 
 81 | 
 82 | _aiokafka_consumer_settings = (
 83 |     "fetch_max_wait_ms",
 84 |     "fetch_max_bytes",
 85 |     "fetch_min_bytes",
 86 |     "max_partition_fetch_bytes",
 87 |     "request_timeout_ms",
 88 |     "auto_offset_reset",
 89 |     "metadata_max_age_ms",
 90 |     "max_poll_interval_ms",
 91 |     "rebalance_timeout_ms",
 92 |     "session_timeout_ms",
 93 |     "heartbeat_interval_ms",
 94 |     "consumer_timeout_ms",
 95 |     "max_poll_records",
 96 |     "connections_max_idle_ms",
 97 |     "ssl_context",
 98 |     "security_protocol",
 99 |     "sasl_mechanism",
100 |     "sasl_plain_username",
101 |     "sasl_plain_password",
102 | )
103 | _aiokafka_producer_settings = (
104 |     "metadata_max_age_ms",
105 |     "request_timeout_ms",
106 |     "max_batch_size",
107 |     "max_request_size",
108 |     "send_backoff_ms",
109 |     "retry_backoff_ms",
110 |     "ssl_context",
111 |     "security_protocol",
112 |     "sasl_mechanism",
113 |     "sasl_plain_username",
114 |     "sasl_plain_password",
115 | )
116 | 
117 | 
118 | class Router:
119 |     """
120 |     Application routing configuration.
121 |     """
122 | 
123 |     def __init__(self) -> None:
124 |         self._subscriptions: List[Subscription] = []
125 |         self._schemas: Dict[str, SchemaRegistration] = {}
126 |         self._event_handlers: Dict[str, List[Callable[[], Awaitable[None]]]] = {}
127 | 
128 |     @property
129 |     def subscriptions(self) -> List[Subscription]:
130 |         return self._subscriptions
131 | 
132 |     @property
133 |     def schemas(self) -> Dict[str, SchemaRegistration]:
134 |         return self._schemas
135 | 
136 |     @property
137 |     def event_handlers(self) -> Dict[str, List[Callable[[], Awaitable[None]]]]:
138 |         return self._event_handlers
139 | 
140 |     def on(self, name: str, handler: Callable[[], Awaitable[None]]) -> None:
141 |         if name not in self._event_handlers:
142 |             self._event_handlers[name] = []
143 | 
144 |         self._event_handlers[name].append(handler)
145 | 
146 |     def _subscribe(
147 |         self,
148 |         group: str,
149 |         *,
150 |         consumer_id: str = None,
151 |         pattern: str = None,
152 |         topics: List[str] = None,
153 |         timeout_seconds: float = None,
154 |         concurrency: int = None,
155 |     ) -> Callable:
156 |         def inner(func: Callable) -> Callable:
157 |             # If there is no consumer_id use the group instead
158 |             subscription = Subscription(
159 |                 consumer_id or group,
160 |                 func,
161 |                 group or func.__name__,
162 |                 pattern=pattern,
163 |                 topics=topics,
164 |                 concurrency=concurrency,
165 |                 timeout_seconds=timeout_seconds,
166 |             )
167 |             self._subscriptions.append(subscription)
168 |             return func
169 | 
170 |         return inner
171 | 
172 |     def subscribe_to_topics(
173 |         self,
174 |         topics: List[str],
175 |         group: str,
176 |         *,
177 |         timeout_seconds: float = None,
178 |         concurrency: int = None,
179 |     ) -> Callable:
180 |         return self._subscribe(
181 |             group=group,
182 |             topics=topics,
183 |             pattern=None,
184 |             timeout_seconds=timeout_seconds,
185 |             concurrency=concurrency,
186 |         )
187 | 
188 |     def subscribe_to_pattern(
189 |         self,
190 |         pattern: str,
191 |         group: str,
192 |         *,
193 |         timeout_seconds: float = None,
194 |         concurrency: int = None,
195 |     ) -> Callable:
196 |         return self._subscribe(
197 |             group=group,
198 |             topics=None,
199 |             pattern=pattern,
200 |             timeout_seconds=timeout_seconds,
201 |             concurrency=concurrency,
202 |         )
203 | 
204 |     def subscribe(
205 |         self,
206 |         stream_id: str,
207 |         group: str,
208 |         *,
209 |         timeout_seconds: float = None,
210 |         concurrency: int = None,
211 |     ) -> Callable:
212 |         """Keep backwards compatibility"""
213 |         return self._subscribe(
214 |             group=group,
215 |             topics=None,
216 |             pattern=stream_id,
217 |             timeout_seconds=timeout_seconds,
218 |             concurrency=concurrency,
219 |         )
220 | 
221 |     def schema(
222 |         self,
223 |         _id: Optional[str] = None,
224 |         *,
225 |         version: Optional[int] = None,
226 |         retention: Optional[int] = None,
227 |         streams: Optional[List[str]] = None,
228 |     ) -> Callable:
229 |         version = version or 1
230 | 
231 |         def inner(cls: Type[BaseModel]) -> Type[BaseModel]:
232 |             if _id is None:
233 |                 type_id = cls.__name__
234 |             else:
235 |                 type_id = _id
236 |             key = f"{type_id}:{version}"
237 |             reg = SchemaRegistration(
238 |                 id=type_id, version=version or 1, model=cls, retention=retention, streams=streams
239 |             )
240 |             if key in self._schemas:
241 |                 raise SchemaConflictException(self._schemas[key], reg)
242 |             cls.__key__ = key  # type: ignore
243 |             self._schemas[key] = reg
244 |             return cls
245 | 
246 |         return inner
247 | 
248 | 
249 | class Application(Router):
250 |     """
251 |     Application configuration
252 |     """
253 | 
254 |     _producer: Optional[aiokafka.AIOKafkaProducer] = None
255 | 
256 |     def __init__(
257 |         self,
258 |         kafka_servers: Optional[List[str]] = None,
259 |         topic_prefix: str = "",
260 |         kafka_settings: Optional[Dict[str, Any]] = None,
261 |         replication_factor: Optional[int] = None,
262 |         kafka_api_version: str = "auto",
263 |         auto_commit: bool = True,
264 |     ):
265 |         super().__init__()
266 |         self._kafka_servers = kafka_servers
267 |         self._kafka_settings = kafka_settings
268 |         self._producer = None
269 |         self._initialized = False
270 |         self._locks: Dict[str, asyncio.Lock] = {}
271 | 
272 |         self._kafka_api_version = kafka_api_version
273 |         self._topic_prefix = topic_prefix
274 |         self._replication_factor = replication_factor
275 |         self._topic_mng: Optional[KafkaTopicManager] = None
276 |         self._subscription_consumers: List[BatchConsumer] = []
277 |         self._subscription_consumers_tasks: List[asyncio.Task] = []
278 | 
279 |         self.auto_commit = auto_commit
280 | 
281 |     @property
282 |     def kafka_settings(self) -> Dict[str, Any]:
283 |         return self._kafka_settings or {}
284 | 
285 |     def mount(self, router: Router) -> None:
286 |         self._subscriptions.extend(router.subscriptions)
287 |         self._schemas.update(router.schemas)
288 |         self._event_handlers.update(router.event_handlers)
289 | 
290 |     async def health_check(self) -> None:
291 |         for subscription_consumer in self._subscription_consumers:
292 |             await subscription_consumer.healthy()
293 |         if not self.producer_healthy():
294 |             raise ProducerUnhealthyException(self._producer)  # type: ignore
295 | 
296 |     async def _call_event_handlers(self, name: str) -> None:
297 |         handlers = self._event_handlers.get(name)
298 | 
299 |         if handlers is not None:
300 |             for handler in handlers:
301 |                 await handler()
302 | 
303 |     @property
304 |     def topic_mng(self) -> KafkaTopicManager:
305 |         if self._topic_mng is None:
306 |             self._topic_mng = KafkaTopicManager(
307 |                 cast(List[str], self._kafka_servers),
308 |                 self._topic_prefix,
309 |                 replication_factor=self._replication_factor,
310 |                 kafka_api_version=self._kafka_api_version,
311 |                 ssl_context=self.kafka_settings.get("ssl_context"),
312 |                 security_protocol=self.kafka_settings.get("security_protocol", "PLAINTEXT"),
313 |                 sasl_mechanism=self.kafka_settings.get("sasl_mechanism"),
314 |                 sasl_plain_username=self.kafka_settings.get("sasl_plain_username"),
315 |                 sasl_plain_password=self.kafka_settings.get("sasl_plain_password"),
316 |             )
317 |         return self._topic_mng
318 | 
319 |     def get_lock(self, name: str) -> asyncio.Lock:
320 |         if name not in self._locks:
321 |             self._locks[name] = asyncio.Lock()
322 |         return self._locks[name]
323 | 
324 |     def configure(
325 |         self,
326 |         kafka_servers: Optional[List[str]] = None,
327 |         topic_prefix: Optional[str] = None,
328 |         kafka_settings: Optional[Dict[str, Any]] = None,
329 |         api_version: Optional[str] = None,
330 |         replication_factor: Optional[int] = None,
331 |     ) -> None:
332 |         if kafka_servers is not None:
333 |             self._kafka_servers = kafka_servers
334 |         if topic_prefix is not None:
335 |             self._topic_prefix = topic_prefix
336 |         if kafka_settings is not None:
337 |             self._kafka_settings = kafka_settings
338 |         if api_version is not None:
339 |             self._kafka_api_version = api_version
340 |         if replication_factor is not None:
341 |             self._replication_factor = replication_factor
342 | 
343 |     @property
344 |     def is_configured(self) -> bool:
345 |         return bool(self._kafka_servers)
346 | 
347 |     async def publish_and_wait(
348 |         self,
349 |         stream_id: str,
350 |         data: BaseModel,
351 |         key: Optional[bytes] = None,
352 |         headers: Optional[List[Tuple[str, bytes]]] = None,
353 |     ) -> aiokafka.structs.ConsumerRecord:
354 |         return await (await self.publish(stream_id, data, key, headers=headers))
355 | 
356 |     async def _maybe_create_topic(self, stream_id: str, data: BaseModel = None) -> None:
357 |         topic_id = self.topic_mng.get_topic_id(stream_id)
358 |         async with self.get_lock(stream_id):
359 |             if not await self.topic_mng.topic_exists(topic_id):
360 |                 reg = None
361 |                 if data:
362 |                     reg = self.get_schema_reg(data)
363 |                 retention_ms = None
364 |                 if reg is not None and reg.retention is not None:
365 |                     retention_ms = reg.retention * 1000
366 |                 await self.topic_mng.create_topic(
367 |                     topic_id,
368 |                     replication_factor=self._replication_factor,
369 |                     retention_ms=retention_ms,
370 |                 )
371 | 
372 |     async def publish(
373 |         self,
374 |         stream_id: str,
375 |         data: BaseModel,
376 |         key: Optional[bytes] = None,
377 |         headers: Optional[List[Tuple[str, bytes]]] = None,
378 |     ) -> Awaitable[aiokafka.structs.ConsumerRecord]:
379 |         if not self._initialized:
380 |             async with self.get_lock("_"):
381 |                 await self.initialize()
382 | 
383 |         schema_key = getattr(data, "__key__", None)
384 |         if schema_key not in self._schemas:
385 |             # do not require key
386 |             schema_key = f"{data.__class__.__name__}:1"
387 |         data_ = data.dict()
388 | 
389 |         await self._maybe_create_topic(stream_id, data)
390 |         return await self.raw_publish(
391 |             stream_id, orjson.dumps({"schema": schema_key, "data": data_}), key, headers=headers
392 |         )
393 | 
394 |     async def raw_publish(
395 |         self,
396 |         stream_id: str,
397 |         data: bytes,
398 |         key: Optional[bytes] = None,
399 |         headers: Optional[List[Tuple[str, bytes]]] = None,
400 |     ) -> Awaitable[aiokafka.structs.ConsumerRecord]:
401 |         logger.debug(f"Sending kafka msg: {stream_id}")
402 |         producer = await self._get_producer()
403 |         tracer = opentracing.tracer
404 | 
405 |         if not headers:
406 |             headers = []
407 |         else:
408 |             # this is just to check the headers shape
409 |             try:
410 |                 for _, _ in headers:
411 |                     pass
412 |             except ValueError:
413 |                 # We want to be resilient to malformated headers
414 |                 logger.exception(f"Malformed headers: '{headers}'")
415 | 
416 |         if isinstance(tracer.scope_manager, ContextVarsScopeManager):
417 |             # This only makes sense if the context manager is asyncio aware
418 |             if tracer.active_span:
419 |                 carrier: Dict[str, str] = {}
420 |                 tracer.inject(
421 |                     span_context=tracer.active_span,
422 |                     format=opentracing.Format.TEXT_MAP,
423 |                     carrier=carrier,
424 |                 )
425 | 
426 |                 header_keys = [k for k, _ in headers]
427 |                 for k, v in carrier.items():
428 |                     # Dont overwrite if they are already present!
429 |                     if k not in header_keys:
430 |                         headers.append((k, v.encode()))
431 | 
432 |         if not self.producer_healthy():
433 |             raise ProducerUnhealthyException(self._producer)  # type: ignore
434 | 
435 |         topic_id = self.topic_mng.get_topic_id(stream_id)
436 |         start_time = time.time()
437 |         with watch_publish(topic_id):
438 |             fut = await producer.send(
439 |                 topic_id,
440 |                 value=data,
441 |                 key=key,
442 |                 headers=headers,
443 |             )
444 | 
445 |         fut.add_done_callback(partial(published_callback, topic_id, start_time))  # type: ignore
446 |         return fut
447 | 
448 |     async def flush(self) -> None:
449 |         if self._producer is not None:
450 |             await self._producer.flush()
451 | 
452 |     def get_schema_reg(self, model_or_def: BaseModel) -> Optional[SchemaRegistration]:
453 |         try:
454 |             key = model_or_def.__key__  # type: ignore
455 |             return self._schemas[key]
456 |         except (AttributeError, KeyError):
457 |             return None
458 | 
459 |     def producer_healthy(self) -> bool:
460 |         """
461 |         It's possible for the producer to be unhealthy while we're still sending messages to it.
462 |         """
463 |         if self._producer is not None and self._producer._sender.sender_task is not None:
464 |             return not self._producer._sender.sender_task.done()
465 |         return True
466 | 
467 |     def consumer_factory(self, group_id: str) -> aiokafka.AIOKafkaConsumer:
468 |         return aiokafka.AIOKafkaConsumer(
469 |             bootstrap_servers=cast(List[str], self._kafka_servers),
470 |             loop=asyncio.get_event_loop(),
471 |             group_id=group_id,
472 |             auto_offset_reset="earliest",
473 |             api_version=self._kafka_api_version,
474 |             enable_auto_commit=False,
475 |             **{k: v for k, v in self.kafka_settings.items() if k in _aiokafka_consumer_settings},
476 |         )
477 | 
478 |     def producer_factory(self) -> aiokafka.AIOKafkaProducer:
479 |         return aiokafka.AIOKafkaProducer(
480 |             bootstrap_servers=cast(List[str], self._kafka_servers),
481 |             loop=asyncio.get_event_loop(),
482 |             api_version=self._kafka_api_version,
483 |             **{k: v for k, v in self.kafka_settings.items() if k in _aiokafka_producer_settings},
484 |         )
485 | 
486 |     async def _get_producer(self) -> aiokafka.AIOKafkaProducer:
487 |         if self._producer is None:
488 |             self._producer = self.producer_factory()
489 |             with watch_kafka("producer_start"):
490 |                 await self._producer.start()
491 |         return self._producer
492 | 
493 |     async def initialize(self) -> None:
494 |         if not self.is_configured:
495 |             raise AppNotConfiguredException
496 | 
497 |         await self._call_event_handlers("initialize")
498 | 
499 |         for reg in self._schemas.values():
500 |             # initialize topics for known streams
501 |             for stream_id in reg.streams or []:
502 |                 topic_id = self.topic_mng.get_topic_id(stream_id)
503 |                 async with self.get_lock(stream_id):
504 |                     if not await self.topic_mng.topic_exists(topic_id):
505 |                         await self.topic_mng.create_topic(
506 |                             topic_id,
507 |                             retention_ms=reg.retention * 1000
508 |                             if reg.retention is not None
509 |                             else None,
510 |                         )
511 | 
512 |         self._initialized = True
513 | 
514 |     async def finalize(self) -> None:
515 |         await self._call_event_handlers("finalize")
516 | 
517 |         await self.stop()
518 | 
519 |         if self._producer is not None:
520 |             with watch_kafka("producer_flush"):
521 |                 await self._producer.flush()
522 |             with watch_kafka("producer_stop"):
523 |                 await self._producer.stop()
524 | 
525 |         if self._topic_mng is not None:
526 |             await self._topic_mng.finalize()
527 | 
528 |         self._producer = None
529 |         self._initialized = False
530 |         self._topic_mng = None
531 | 
532 |     async def __aenter__(self) -> "Application":
533 |         await self.initialize()
534 |         return self
535 | 
536 |     async def __aexit__(
537 |         self,
538 |         exc_type: Optional[Type[BaseException]] = None,
539 |         exc: Optional[BaseException] = None,
540 |         traceback: Optional[TracebackType] = None,
541 |     ) -> None:
542 |         logger.info("Stopping application...", exc_info=exc)
543 |         await self.finalize()
544 | 
545 |     async def consume_for(self, num_messages: int, *, seconds: Optional[int] = None) -> int:
546 |         consumed = 0
547 |         self._subscription_consumers = []
548 |         tasks = []
549 |         for subscription in self._subscriptions:
550 | 
551 |             async def on_message(record: aiokafka.structs.ConsumerRecord) -> None:
552 |                 nonlocal consumed
553 |                 consumed += 1
554 |                 if consumed >= num_messages:
555 |                     raise StopConsumer
556 | 
557 |             consumer = BatchConsumer(
558 |                 subscription=subscription,
559 |                 app=self,
560 |                 event_handlers={"message": [on_message]},
561 |                 auto_commit=self.auto_commit,
562 |             )
563 | 
564 |             self._subscription_consumers.append(consumer)
565 |             tasks.append(asyncio.create_task(consumer(), name=str(consumer)))
566 | 
567 |         done, pending = await asyncio.wait(
568 |             tasks, timeout=seconds, return_when=asyncio.FIRST_EXCEPTION
569 |         )
570 |         await self.stop()
571 | 
572 |         # re-raise any errors so we can validate during tests
573 |         for task in done:
574 |             exc = task.exception()
575 |             if exc is not None:
576 |                 raise exc
577 | 
578 |         for task in pending:
579 |             task.cancel()
580 | 
581 |         return consumed
582 | 
583 |     def consume_forever(self) -> Awaitable:
584 |         self._subscription_consumers = []
585 |         self._subscription_consumers_tasks = []
586 | 
587 |         for subscription in self._subscriptions:
588 |             consumer = BatchConsumer(
589 |                 subscription=subscription,
590 |                 app=self,
591 |                 auto_commit=self.auto_commit,
592 |             )
593 |             self._subscription_consumers.append(consumer)
594 | 
595 |         self._subscription_consumers_tasks = [
596 |             asyncio.create_task(c()) for c in self._subscription_consumers
597 |         ]
598 |         return asyncio.wait(self._subscription_consumers_tasks, return_when=asyncio.FIRST_EXCEPTION)
599 | 
600 |     async def stop(self) -> None:
601 |         async with self.get_lock("_"):
602 |             # do not allow stop calls at same time
603 | 
604 |             if len(self._subscription_consumers) == 0:
605 |                 return
606 | 
607 |             _, pending = await asyncio.wait(
608 |                 [asyncio.create_task(c.stop()) for c in self._subscription_consumers if c],
609 |                 timeout=5,
610 |             )
611 |             for task in pending:
612 |                 # stop tasks that didn't finish
613 |                 task.cancel()
614 | 
615 |             for task in self._subscription_consumers_tasks:
616 |                 # make sure everything is done
617 |                 if not task.done():
618 |                     task.cancel()
619 | 
620 |             for task in self._subscription_consumers_tasks:
621 |                 try:
622 |                     await asyncio.wait([task])
623 |                 except asyncio.CancelledError:
624 |                     ...
625 | 
626 | 
627 | cli_parser = argparse.ArgumentParser(description="Run kafkaesk worker.")
628 | cli_parser.add_argument("app", help="Application object")
629 | cli_parser.add_argument("--kafka-servers", help="Kafka servers")
630 | cli_parser.add_argument("--kafka-settings", help="Kafka settings")
631 | cli_parser.add_argument("--topic-prefix", help="Topic prefix")
632 | cli_parser.add_argument("--api-version", help="Kafka API Version")
633 | 
634 | 
635 | def _sig_handler(app: Application) -> None:
636 |     asyncio.create_task(app.stop())
637 | 
638 | 
639 | async def run_app(app: Application) -> None:
640 |     async with app:
641 |         loop = asyncio.get_event_loop()
642 |         fut = asyncio.create_task(app.consume_forever())
643 |         for signame in {"SIGINT", "SIGTERM"}:
644 |             loop.add_signal_handler(getattr(signal, signame), partial(_sig_handler, app))
645 |         done, pending = await fut
646 |         logger.debug("Exiting consumer")
647 | 
648 |         await app.stop()
649 |         # re-raise any errors so we can validate during tests
650 |         for task in done:
651 |             exc = task.exception()
652 |             if exc is not None:
653 |                 raise exc
654 | 
655 | 
656 | def run(app: Optional[Application] = None) -> None:
657 |     if app is None:
658 |         opts = cli_parser.parse_args()
659 |         module_str, attr = opts.app.split(":")
660 |         module = resolve_dotted_name(module_str)
661 |         app = getattr(module, attr)
662 | 
663 |         if callable(app):
664 |             app = app()
665 | 
666 |         app = cast(Application, app)
667 | 
668 |         if opts.kafka_servers:
669 |             app.configure(kafka_servers=opts.kafka_servers.split(","))
670 |         if opts.kafka_settings:
671 |             app.configure(kafka_settings=orjson.loads(opts.kafka_settings))
672 |         if opts.topic_prefix:
673 |             app.configure(topic_prefix=opts.topic_prefix)
674 |         if opts.api_version:
675 |             app.configure(api_version=opts.api_version)
676 | 
677 |     try:
678 |         asyncio.run(run_app(app))
679 |     except asyncio.CancelledError:  # pragma: no cover
680 |         logger.debug("Closing because task was exited")
681 | 


--------------------------------------------------------------------------------
/kafkaesk/consumer.py:
--------------------------------------------------------------------------------
  1 | from .exceptions import ConsumerUnhealthyException
  2 | from .exceptions import HandlerTaskCancelled
  3 | from .exceptions import StopConsumer
  4 | from .exceptions import UnhandledMessage
  5 | from .metrics import CONSUMED_MESSAGE_TIME
  6 | from .metrics import CONSUMED_MESSAGES
  7 | from .metrics import CONSUMED_MESSAGES_BATCH_SIZE
  8 | from .metrics import CONSUMER_HEALTH
  9 | from .metrics import CONSUMER_REBALANCED
 10 | from .metrics import CONSUMER_TOPIC_OFFSET
 11 | from .metrics import MESSAGE_LEAD_TIME
 12 | from .metrics import NOERROR
 13 | from kafka.structs import TopicPartition
 14 | 
 15 | import aiokafka
 16 | import asyncio
 17 | import fnmatch
 18 | import functools
 19 | import inspect
 20 | import logging
 21 | import opentracing
 22 | import orjson
 23 | import pydantic
 24 | import time
 25 | import typing
 26 | 
 27 | if typing.TYPE_CHECKING:  # pragma: no cover
 28 |     from .app import Application
 29 | else:
 30 |     Application = None
 31 | 
 32 | 
 33 | logger = logging.getLogger(__name__)
 34 | 
 35 | 
 36 | class Subscription:
 37 |     def __init__(
 38 |         self,
 39 |         consumer_id: str,
 40 |         func: typing.Callable,
 41 |         group: str,
 42 |         *,
 43 |         pattern: typing.Optional[str] = None,
 44 |         topics: typing.Optional[typing.List[str]] = None,
 45 |         timeout_seconds: float = 0.0,
 46 |         concurrency: int = None,
 47 |     ):
 48 |         self.consumer_id = consumer_id
 49 |         self.pattern = pattern
 50 |         self.topics = topics
 51 |         self.func = func
 52 |         self.group = group
 53 |         self.timeout = timeout_seconds
 54 |         self.concurrency = concurrency
 55 | 
 56 |     def __repr__(self) -> str:
 57 |         return f"<Subscription stream: {self.consumer_id} >"
 58 | 
 59 | 
 60 | def _pydantic_msg_handler(
 61 |     model: typing.Type[pydantic.BaseModel], record: aiokafka.ConsumerRecord
 62 | ) -> pydantic.BaseModel:
 63 |     try:
 64 |         data: typing.Dict[str, typing.Any] = orjson.loads(record.value)
 65 |         return model.parse_obj(data["data"])
 66 |     except orjson.JSONDecodeError:
 67 |         # log the execption so we can see what fields failed
 68 |         logger.warning(f"Payload is not valid json: {record}", exc_info=True)
 69 |         raise UnhandledMessage("Error deserializing json")
 70 |     except pydantic.ValidationError:
 71 |         # log the execption so we can see what fields failed
 72 |         logger.warning(f"Error parsing pydantic model:{model} {record}", exc_info=True)
 73 |         raise UnhandledMessage(f"Error parsing data: {model}")
 74 |     except Exception:
 75 |         # Catch all
 76 |         logger.warning(f"Error parsing payload: {model} {record}", exc_info=True)
 77 |         raise UnhandledMessage("Error parsing payload")
 78 | 
 79 | 
 80 | def _raw_msg_handler(record: aiokafka.structs.ConsumerRecord) -> typing.Dict[str, typing.Any]:
 81 |     data: typing.Dict[str, typing.Any] = orjson.loads(record.value)
 82 |     return data
 83 | 
 84 | 
 85 | def _bytes_msg_handler(record: aiokafka.structs.ConsumerRecord) -> bytes:
 86 |     return record.value
 87 | 
 88 | 
 89 | def _record_msg_handler(record: aiokafka.structs.ConsumerRecord) -> aiokafka.structs.ConsumerRecord:
 90 |     return record
 91 | 
 92 | 
 93 | def build_handler(
 94 |     coro: typing.Callable, app: "Application", consumer: "BatchConsumer"
 95 | ) -> typing.Callable:
 96 |     """Introspection on the coroutine signature to inject dependencies"""
 97 |     sig = inspect.signature(coro)
 98 |     param_name = [k for k in sig.parameters.keys()][0]
 99 |     annotation = sig.parameters[param_name].annotation
100 |     handler = _raw_msg_handler
101 |     if annotation and annotation != sig.empty:
102 |         if annotation == bytes:
103 |             handler = _bytes_msg_handler  # type: ignore
104 |         elif annotation == aiokafka.ConsumerRecord:
105 |             handler = _record_msg_handler  # type: ignore
106 |         else:
107 |             handler = functools.partial(_pydantic_msg_handler, annotation)  # type: ignore
108 | 
109 |     it = iter(sig.parameters.items())
110 |     # first argument is required and its the payload
111 |     next(it)
112 |     kwargs: typing.Dict[str, typing.Any] = getattr(coro, "__extra_kwargs__", {})
113 | 
114 |     for key, param in it:
115 |         if key == "schema":
116 |             kwargs["schema"] = None
117 |         elif key == "record":
118 |             kwargs["record"] = None
119 |         elif key == "app":
120 |             kwargs["app"] = app
121 |         elif key == "subscriber":
122 |             kwargs["subscriber"] = consumer
123 |         elif issubclass(param.annotation, opentracing.Span):
124 |             kwargs[key] = opentracing.Span
125 | 
126 |     async def inner(record: aiokafka.ConsumerRecord, span: opentracing.Span) -> None:
127 |         data = handler(record)
128 |         deps = kwargs.copy()
129 | 
130 |         for key, param in kwargs.items():
131 |             if key == "schema":
132 |                 msg = orjson.loads(record.value)
133 |                 deps["schema"] = msg["schema"]
134 |             elif key == "record":
135 |                 deps["record"] = record
136 |             elif param == opentracing.Span:
137 |                 deps[key] = span
138 | 
139 |         await coro(data, **deps)
140 | 
141 |     return inner
142 | 
143 | 
144 | class BatchConsumer(aiokafka.ConsumerRebalanceListener):
145 |     _subscription: Subscription
146 |     _close: typing.Optional[asyncio.Future] = None
147 |     _consumer: aiokafka.AIOKafkaConsumer
148 |     _offsets: typing.Dict[aiokafka.TopicPartition, int]
149 |     _message_handler: typing.Callable
150 |     _initialized: bool
151 |     _running: bool = False
152 | 
153 |     def __init__(
154 |         self,
155 |         subscription: Subscription,
156 |         app: "Application",
157 |         event_handlers: typing.Optional[typing.Dict[str, typing.List[typing.Callable]]] = None,
158 |         auto_commit: bool = True,
159 |     ):
160 |         self._initialized = False
161 |         self.stream_id = subscription.consumer_id
162 |         self.group_id = subscription.group
163 |         self._coro = subscription.func
164 |         self._event_handlers = event_handlers or {}
165 |         self._concurrency = subscription.concurrency or 1
166 |         self._timeout = subscription.timeout
167 |         self._subscription = subscription
168 |         self._close = None
169 |         self._app = app
170 |         self._last_commit = 0.0
171 |         self._auto_commit = auto_commit
172 |         self._tp: typing.Dict[aiokafka.TopicPartition, int] = {}
173 | 
174 |         # We accept either pattern or a list of topics, also we might accept a single topic
175 |         # to keep compatibility with older API
176 |         self.pattern = subscription.pattern
177 |         self.topics = subscription.topics
178 | 
179 |     async def __call__(self) -> None:
180 |         if not self._initialized:
181 |             await self.initialize()
182 | 
183 |         try:
184 |             while not self._close:
185 |                 try:
186 |                     if not self._consumer.assignment():
187 |                         await asyncio.sleep(2)
188 |                         continue
189 |                     await self._consume()
190 |                 except aiokafka.errors.KafkaConnectionError:
191 |                     # We retry
192 |                     self._health_metric(False)
193 |                     logger.info(f"Consumer {self} kafka connection error, retrying...")
194 |                     await asyncio.sleep(0.5)
195 |         except asyncio.CancelledError:
196 |             self._health_metric(False)
197 |         except StopConsumer:
198 |             self._health_metric(False)
199 |             logger.info(f"Consumer {self} stopped, exiting")
200 |         except BaseException as exc:
201 |             logger.exception(f"Consumer {self} failed. Finalizing.", exc_info=exc)
202 |             self._health_metric(False)
203 |             raise
204 |         finally:
205 |             await self.finalize()
206 | 
207 |     def _health_metric(self, healthy: bool) -> None:
208 |         CONSUMER_HEALTH.labels(
209 |             group_id=self.group_id,
210 |         ).set(healthy)
211 | 
212 |     async def emit(self, name: str, *args: typing.Any, **kwargs: typing.Any) -> None:
213 |         for func in self._event_handlers.get(name, []):
214 |             try:
215 |                 await func(*args, **kwargs)
216 |             except StopConsumer:
217 |                 raise
218 |             except Exception:
219 |                 logger.warning(f"Error emitting event: {name}: {func}", exc_info=True)
220 | 
221 |     async def initialize(self) -> None:
222 |         self._close = None
223 |         self._running = True
224 |         self._processing = asyncio.Lock()
225 |         self._consumer = await self._consumer_factory()
226 |         await self._consumer.start()
227 |         self._message_handler = build_handler(self._coro, self._app, self)  # type: ignore
228 |         self._initialized = True
229 | 
230 |     async def finalize(self) -> None:
231 |         try:
232 |             await self._consumer.stop()
233 |         except Exception:
234 |             logger.info(f"[{self}] Could not commit on shutdown", exc_info=True)
235 | 
236 |         self._initialized = False
237 |         self._running = False
238 |         if self._close:
239 |             self._close.set_result("done")
240 | 
241 |     async def _consumer_factory(self) -> aiokafka.AIOKafkaConsumer:
242 |         consumer = self._app.consumer_factory(self.group_id)
243 | 
244 |         if self.pattern and self.topics:
245 |             raise AssertionError(
246 |                 "Both of the params 'pattern' and 'topics' are not allowed. Select only one mode."
247 |             )  # noqa
248 | 
249 |         if self.pattern:
250 |             # This is needed in case we have a prefix
251 |             topic_id = self._app.topic_mng.get_topic_id(self.pattern)
252 | 
253 |             if "*" in self.pattern:
254 |                 pattern = fnmatch.translate(topic_id)
255 |                 consumer.subscribe(pattern=pattern, listener=self)  # type: ignore
256 |             else:
257 |                 consumer.subscribe(topics=[topic_id], listener=self)  # type: ignore
258 |         elif self.topics:
259 |             topics = [self._app.topic_mng.get_topic_id(topic) for topic in self.topics]
260 |             consumer.subscribe(topics=topics, listener=self)  # type: ignore
261 |         else:
262 |             raise ValueError("Either `topics` or `pattern` should be defined")
263 | 
264 |         return consumer
265 | 
266 |     async def stop(self) -> None:
267 |         if not self._running:
268 |             return
269 | 
270 |         # Exit the loop, this will trigger finalize call
271 |         loop = asyncio.get_running_loop()
272 |         self._close = loop.create_future()
273 |         await asyncio.wait([self._close])
274 | 
275 |     def __repr__(self) -> str:
276 |         return f"<Consumer: {self.stream_id}, Group: {self.group_id}>"
277 | 
278 |     def _span(self, record: aiokafka.ConsumerRecord) -> opentracing.SpanContext:
279 |         tracer = opentracing.tracer
280 |         headers = {x[0]: x[1].decode() for x in record.headers or []}
281 |         parent = tracer.extract(opentracing.Format.TEXT_MAP, headers)
282 |         context = tracer.start_active_span(
283 |             record.topic,
284 |             tags={
285 |                 "message_bus.destination": record.topic,
286 |                 "message_bus.partition": record.partition,
287 |                 "message_bus.group_id": self.group_id,
288 |             },
289 |             references=[opentracing.follows_from(parent)],
290 |         )
291 |         return context.span
292 | 
293 |     async def _handler(self, record: aiokafka.ConsumerRecord) -> None:
294 |         with self._span(record) as span:
295 |             await self._message_handler(record, span)
296 | 
297 |     async def _consume(self) -> None:
298 |         batch = await self._consumer.getmany(max_records=self._concurrency, timeout_ms=500)
299 | 
300 |         async with self._processing:
301 |             if not batch:
302 |                 await self._maybe_commit()
303 |             else:
304 |                 await self._consume_batch(batch)
305 | 
306 |     async def _consume_batch(
307 |         self, batch: typing.Dict[TopicPartition, typing.List[aiokafka.ConsumerRecord]]
308 |     ) -> None:
309 |         futures: typing.Dict[asyncio.Future[typing.Any], aiokafka.ConsumerRecord] = dict()
310 |         for tp, records in batch.items():
311 |             for record in records:
312 |                 coro = self._handler(record)
313 |                 fut = asyncio.create_task(coro)
314 |                 futures[fut] = record
315 | 
316 |         # TODO: this metric is kept for backwards-compatibility, but should be revisited
317 |         with CONSUMED_MESSAGE_TIME.labels(
318 |             stream_id=self.stream_id,
319 |             partition=next(iter(batch)),
320 |             group_id=self.group_id,
321 |         ).time():
322 |             done, pending = await asyncio.wait(
323 |                 futures.keys(),
324 |                 timeout=self._timeout,
325 |                 return_when=asyncio.FIRST_EXCEPTION,
326 |             )
327 | 
328 |         # Look for failures
329 |         for task in done:
330 |             record = futures[task]
331 |             tp = aiokafka.TopicPartition(record.topic, record.partition)
332 | 
333 |             # Get the largest offset of the batch
334 |             current_max = self._tp.get(tp)
335 |             if not current_max:
336 |                 self._tp[tp] = record.offset + 1
337 |             else:
338 |                 self._tp[tp] = max(record.offset + 1, current_max)
339 | 
340 |             try:
341 |                 if exc := task.exception():
342 |                     self._count_message(record, error=exc.__class__.__name__)
343 |                     await self.on_handler_failed(exc, record)
344 |                 else:
345 |                     self._count_message(record)
346 |             except asyncio.InvalidStateError:
347 |                 # Task didnt finish yet, we shouldnt be here since we are
348 |                 # iterating the `done` list, so just log something
349 |                 logger.warning(f"Trying to get exception from unfinished task. Record: {record}")
350 |             except asyncio.CancelledError:
351 |                 # During task execution any exception will be returned in
352 |                 # the `done` list. But timeout exception should be captured
353 |                 # independendly, thats why we handle this condition here.
354 |                 self._count_message(record, error="cancelled")
355 |                 await self.on_handler_failed(HandlerTaskCancelled(record), record)
356 | 
357 |         # Process timeout tasks
358 |         for task in pending:
359 |             record = futures[task]
360 | 
361 |             try:
362 |                 # This will raise a `asyncio.CancelledError`, the consumer logic
363 |                 # is responsible to catch it.
364 |                 task.cancel()
365 |                 await task
366 |             except asyncio.CancelledError:
367 |                 # App didnt catch this exception, so we treat it as an unmanaged one.
368 |                 await self.on_handler_timeout(record)
369 | 
370 |             self._count_message(record, error="pending")
371 | 
372 |         for tp, records in batch.items():
373 |             CONSUMED_MESSAGES_BATCH_SIZE.labels(
374 |                 stream_id=tp.topic,
375 |                 group_id=self.group_id,
376 |                 partition=tp.partition,
377 |             ).observe(len(records))
378 | 
379 |             for record in sorted(records, key=lambda rec: rec.offset):
380 |                 lead_time = time.time() - record.timestamp / 1000  # type: ignore
381 |                 MESSAGE_LEAD_TIME.labels(
382 |                     stream_id=record.topic,
383 |                     group_id=self.group_id,
384 |                     partition=record.partition,
385 |                 ).observe(lead_time)
386 | 
387 |                 CONSUMER_TOPIC_OFFSET.labels(
388 |                     stream_id=record.topic,
389 |                     group_id=self.group_id,
390 |                     partition=record.partition,
391 |                 ).set(record.offset)
392 | 
393 |         # Commit first and then call the event subscribers
394 |         await self._maybe_commit()
395 |         for _, records in batch.items():
396 |             for record in records:
397 |                 await self.emit("message", record=record)
398 | 
399 |     def _count_message(self, record: aiokafka.ConsumerRecord, error: str = NOERROR) -> None:
400 |         CONSUMED_MESSAGES.labels(
401 |             stream_id=record.topic,
402 |             error=error,
403 |             partition=record.partition,
404 |             group_id=self.group_id,
405 |         ).inc()
406 | 
407 |     @property
408 |     def consumer(self) -> aiokafka.AIOKafkaConsumer:
409 |         return self._consumer
410 | 
411 |     async def _maybe_commit(self, forced: bool = False) -> None:
412 |         if not self._auto_commit:
413 |             return
414 | 
415 |         if not self._consumer.assignment() or not self._tp:
416 |             logger.warning("Cannot commit because no partitions are assigned!")
417 |             return
418 | 
419 |         interval = self._app.kafka_settings.get("auto_commit_interval_ms", 5000) / 1000
420 |         now = time.time()
421 |         if forced or (now > (self._last_commit + interval)):
422 |             try:
423 |                 if self._tp:
424 |                     await self._consumer.commit(offsets=self._tp)
425 |             except aiokafka.errors.CommitFailedError:
426 |                 logger.warning("Error attempting to commit", exc_info=True)
427 |             self._last_commit = now
428 | 
429 |     async def publish(
430 |         self,
431 |         stream_id: str,
432 |         record: aiokafka.ConsumerRecord,
433 |         headers: typing.Optional[typing.List[typing.Tuple[str, bytes]]] = None,
434 |     ) -> None:
435 |         record_headers = (record.headers or []) + (headers or [])
436 | 
437 |         fut = await self._app.raw_publish(
438 |             stream_id=stream_id, data=record.value, key=record.key, headers=record_headers
439 |         )
440 |         await fut
441 | 
442 |     async def healthy(self) -> None:
443 |         if not self._running:
444 |             self._health_metric(False)
445 |             raise ConsumerUnhealthyException(f"Consumer '{self}' is not running")
446 | 
447 |         if self._consumer is not None and not await self._consumer._client.ready(
448 |             self._consumer._coordinator.coordinator_id
449 |         ):
450 |             self._health_metric(False)
451 |             raise ConsumerUnhealthyException(f"Consumer '{self}' is not ready")
452 | 
453 |         self._health_metric(True)
454 |         return
455 | 
456 |     # Event handlers
457 |     async def on_partitions_revoked(self, revoked: typing.List[aiokafka.TopicPartition]) -> None:
458 |         if revoked:
459 |             # Wait for the current batch to be processed
460 |             async with self._processing:
461 |                 if self._auto_commit:
462 |                     # And commit before releasing the partitions.
463 |                     await self._maybe_commit(forced=True)
464 | 
465 |                 for tp in revoked:
466 |                     # Remove the partition from the dict
467 |                     self._tp.pop(tp, None)
468 |                     CONSUMER_REBALANCED.labels(
469 |                         partition=tp.partition,
470 |                         group_id=self.group_id,
471 |                         event="revoked",
472 |                     ).inc()
473 |             logger.info(f"Partitions revoked to {self}: {revoked}")
474 | 
475 |     async def on_partitions_assigned(self, assigned: typing.List[aiokafka.TopicPartition]) -> None:
476 |         if assigned:
477 |             logger.info(f"Partitions assigned to {self}: {assigned}")
478 | 
479 |         for tp in assigned:
480 |             position = await self._consumer.position(tp)
481 |             self._tp[tp] = position
482 | 
483 |             CONSUMER_REBALANCED.labels(
484 |                 partition=tp.partition,
485 |                 group_id=self.group_id,
486 |                 event="assigned",
487 |             ).inc()
488 | 
489 |     async def on_handler_timeout(self, record: aiokafka.ConsumerRecord) -> None:
490 |         raise HandlerTaskCancelled(record)
491 | 
492 |     async def on_handler_failed(
493 |         self, exception: BaseException, record: aiokafka.ConsumerRecord
494 |     ) -> None:
495 |         if isinstance(exception, UnhandledMessage):
496 |             logger.warning("Unhandled message, ignoring...", exc_info=exception)
497 |         else:
498 |             raise exception
499 | 


--------------------------------------------------------------------------------
/kafkaesk/exceptions.py:
--------------------------------------------------------------------------------
 1 | from typing import TYPE_CHECKING
 2 | 
 3 | import aiokafka
 4 | 
 5 | if TYPE_CHECKING:  # pragma: no cover
 6 |     from .app import SchemaRegistration
 7 | else:
 8 |     SchemaRegistration = SubscriptionConsumer = None
 9 | 
10 | 
11 | class JsonSchemaRequiredException(Exception):
12 |     ...
13 | 
14 | 
15 | class SchemaConflictException(Exception):
16 |     def __init__(self, existing: SchemaRegistration, new: SchemaRegistration):
17 |         self.existing = existing
18 |         self.new = new
19 | 
20 |     def __str__(self) -> str:
21 |         return f"""<Schema Conflict:
22 | Existing: {self.existing}
23 | New: {self.new}
24 | />"""
25 | 
26 | 
27 | class UnhandledMessage(Exception):
28 |     ...
29 | 
30 | 
31 | class StopConsumer(Exception):
32 |     ...
33 | 
34 | 
35 | class HandlerTaskCancelled(Exception):
36 |     def __init__(self, record: aiokafka.ConsumerRecord):
37 |         self.record = record
38 | 
39 | 
40 | class ConsumerUnhealthyException(Exception):
41 |     def __init__(self, reason: str):
42 |         self.reason = reason
43 | 
44 | 
45 | class AutoCommitError(ConsumerUnhealthyException):
46 |     ...
47 | 
48 | 
49 | class ProducerUnhealthyException(Exception):
50 |     def __init__(self, producer: aiokafka.AIOKafkaProducer):
51 |         self.producer = producer
52 | 
53 | 
54 | class AppNotConfiguredException(Exception):
55 |     ...
56 | 


--------------------------------------------------------------------------------
/kafkaesk/ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/onna/kafkaesk/10e88fd921fddff70b8cb973e739e280caa4cac8/kafkaesk/ext/__init__.py


--------------------------------------------------------------------------------
/kafkaesk/ext/logging/__init__.py:
--------------------------------------------------------------------------------
1 | from .handler import PydanticKafkaeskHandler
2 | from .handler import PydanticLogModel
3 | from .handler import PydanticStreamHandler
4 | 
5 | __all__ = ("PydanticLogModel", "PydanticKafkaeskHandler", "PydanticStreamHandler")
6 | 


--------------------------------------------------------------------------------
/kafkaesk/ext/logging/handler.py:
--------------------------------------------------------------------------------
  1 | from .record import PydanticLogRecord
  2 | from datetime import datetime
  3 | from typing import Any
  4 | from typing import Dict
  5 | from typing import IO
  6 | from typing import Optional
  7 | 
  8 | import asyncio
  9 | import kafkaesk
 10 | import logging
 11 | import os
 12 | import pydantic
 13 | import socket
 14 | import sys
 15 | import time
 16 | 
 17 | NAMESPACE_FILEPATH = "/var/run/secrets/kubernetes.io/serviceaccount/namespace"
 18 | _not_set = object()
 19 | _K8S_NS = _not_set
 20 | 
 21 | 
 22 | def get_k8s_ns() -> Optional[str]:
 23 |     global _K8S_NS
 24 |     if _K8S_NS == _not_set:
 25 |         if os.path.exists(NAMESPACE_FILEPATH):
 26 |             with open(NAMESPACE_FILEPATH) as fi:
 27 |                 _K8S_NS = fi.read().strip()
 28 |         else:
 29 |             _K8S_NS = None
 30 |     return _K8S_NS  # type: ignore
 31 | 
 32 | 
 33 | class InvalidLogFormat(Exception):
 34 |     ...
 35 | 
 36 | 
 37 | class PydanticLogModel(pydantic.BaseModel):
 38 |     class Config:
 39 |         extra = pydantic.Extra.allow
 40 | 
 41 | 
 42 | class PydanticStreamHandler(logging.StreamHandler):
 43 |     def __init__(self, stream: Optional[IO[str]] = None):
 44 |         super().__init__(stream=stream)
 45 | 
 46 |     def format(self, record: PydanticLogRecord) -> str:  # type: ignore
 47 |         message = super().format(record)
 48 | 
 49 |         for log in getattr(record, "pydantic_data", []):
 50 |             # log some attributes
 51 |             formatted_data = []
 52 |             size = 0
 53 |             for field_name in log.__fields__.keys():
 54 |                 val = getattr(log, field_name)
 55 |                 formatted = f"{field_name}={val}"
 56 |                 size += len(formatted)
 57 |                 formatted_data.append(formatted)
 58 | 
 59 |                 if size > 256:
 60 |                     break
 61 |             message += f": {', '.join(formatted_data)}"
 62 |             break
 63 | 
 64 |         return message
 65 | 
 66 | 
 67 | class KafkaeskQueue:
 68 |     def __init__(
 69 |         self,
 70 |         app: kafkaesk.app.Application,
 71 |         max_queue: int = 10000,
 72 |     ):
 73 |         self._queue: Optional[asyncio.Queue] = None
 74 |         self._queue_size = max_queue
 75 | 
 76 |         self._app = app
 77 | 
 78 |         self._app.on("finalize", self.flush)
 79 | 
 80 |         self._task: Optional[asyncio.Task] = None
 81 | 
 82 |     def start(self) -> None:
 83 |         if self._queue is None:
 84 |             self._queue = asyncio.Queue(maxsize=self._queue_size)
 85 | 
 86 |         if self._task is None or self._task.done():
 87 |             self._task = asyncio.create_task(self._run())
 88 | 
 89 |     def close(self) -> None:
 90 |         if self._task is not None and not self._task._loop.is_closed():
 91 |             if not self._task.done() and not self._task.cancelled():
 92 |                 self._task.cancel()
 93 | 
 94 |     @property
 95 |     def running(self) -> bool:
 96 |         if self._task is None:
 97 |             return False
 98 | 
 99 |         if self._task.done():
100 |             return False
101 | 
102 |         return True
103 | 
104 |     async def _run(self) -> None:
105 |         if self._queue is None:
106 |             raise RuntimeError("Queue must be started before workers")
107 | 
108 |         while True:
109 |             try:
110 |                 stream, log_data = await asyncio.wait_for(asyncio.create_task(self._queue.get()), 1)
111 |                 await self._publish(stream, log_data)
112 | 
113 |             except asyncio.TimeoutError:
114 |                 continue
115 | 
116 |             except asyncio.CancelledError:
117 |                 await self.flush()
118 |                 return
119 | 
120 |     async def flush(self) -> None:
121 |         if self._queue is not None:
122 |             while not self._queue.empty():
123 |                 stream, message = await self._queue.get()
124 |                 await self._publish(stream, message)
125 | 
126 |     async def _publish(self, stream: str, log_data: PydanticLogModel) -> None:
127 |         if not self._app._initialized:
128 |             await self._app.initialize()
129 | 
130 |         await self._app.publish(stream, log_data)
131 |         # TODO: Handle other Kafak errors that may be raised
132 | 
133 |     def put_nowait(self, stream: str, log_data: PydanticLogModel) -> None:
134 |         if self._queue is not None:
135 |             self._queue.put_nowait((stream, log_data))
136 | 
137 | 
138 | _formatter = logging.Formatter()
139 | 
140 | 
141 | class PydanticKafkaeskHandler(logging.Handler):
142 |     def __init__(
143 |         self, app: kafkaesk.Application, stream: str, queue: Optional[KafkaeskQueue] = None
144 |     ):
145 |         self.app = app
146 |         self.stream = stream
147 | 
148 |         if queue is None:
149 |             self._queue = KafkaeskQueue(self.app)
150 |         else:
151 |             self._queue = queue
152 | 
153 |         self._last_warning_sent = 0.0
154 | 
155 |         self._initialize_model()
156 | 
157 |         super().__init__()
158 | 
159 |     def clone(self) -> "PydanticKafkaeskHandler":
160 |         return PydanticKafkaeskHandler(self.app, self.stream, queue=self._queue)
161 | 
162 |     def _initialize_model(self) -> None:
163 |         try:
164 |             self.app.schema("PydanticLogModel")(PydanticLogModel)
165 |         except kafkaesk.app.SchemaConflictException:
166 |             pass
167 | 
168 |     def _format_base_log(self, record: PydanticLogRecord) -> Dict[str, Any]:
169 |         if record.exc_text is None and record.exc_info:
170 |             record.exc_text = _formatter.formatException(record.exc_info)
171 |             try:
172 |                 record.exc_type = record.exc_info[0].__name__  # type: ignore
173 |             except (AttributeError, IndexError):  # pragma: no cover
174 |                 ...
175 | 
176 |         if record.stack_info:
177 |             record.stack_text = _formatter.formatStack(record.stack_info)
178 | 
179 |         service_name = "unknown"
180 |         hostname = socket.gethostname()
181 |         dashes = hostname.count("-")
182 |         if dashes > 0:
183 |             # detect kubernetes service host
184 |             service_name = "-".join(hostname.split("-")[: -min(dashes, 2)])
185 | 
186 |         return {
187 |             "timestamp": datetime.utcnow().isoformat(),
188 |             "logger": record.name,
189 |             "severity": record.levelname,
190 |             "level": record.levelno,
191 |             "message": record.getMessage(),
192 |             "exception": record.exc_type,
193 |             "trace": record.stack_text,
194 |             "stack": record.exc_text,
195 |             "hostname": hostname,
196 |             "service": service_name,
197 |             "namespace": get_k8s_ns(),
198 |             "cluster": os.environ.get("CLUSTER"),
199 |         }
200 | 
201 |     def _format_extra_logs(self, record: PydanticLogRecord) -> Dict[str, Any]:
202 |         extra_logs: Dict[str, Any] = {}
203 | 
204 |         for log in getattr(record, "pydantic_data", []):
205 |             extra_logs.update(
206 |                 log.dict(
207 |                     exclude_none=True,
208 |                     exclude={
209 |                         "_is_log_model",
210 |                     },
211 |                 )
212 |             )
213 | 
214 |         return extra_logs
215 | 
216 |     def emit(self, record: PydanticLogRecord) -> None:  # type: ignore
217 |         if not self._queue.running:
218 |             try:
219 |                 self._queue.start()
220 |             except RuntimeError:
221 |                 sys.stderr.write("RuntimeError starting kafka logging, ignoring")
222 |                 return
223 | 
224 |         try:
225 |             raw_data = self._format_base_log(record)
226 |             raw_data.update(self._format_extra_logs(record))
227 |             log_data = PydanticLogModel(**raw_data)
228 |             self._queue.put_nowait(self.stream, log_data)
229 |         except InvalidLogFormat:  # pragma: no cover
230 |             sys.stderr.write("PydanticKafkaeskHandler recieved non-pydantic model")
231 |         except RuntimeError:
232 |             sys.stderr.write("Queue No event loop running to send log to Kafka\n")
233 |         except asyncio.QueueFull:
234 |             if time.time() - self._last_warning_sent > 30:
235 |                 sys.stderr.write("Queue hit max log queue size, discarding message\n")
236 |                 self._last_warning_sent = time.time()
237 |         except AttributeError:  # pragma: no cover
238 |             sys.stderr.write("Queue Error sending Kafkaesk log message\n")
239 | 
240 |     def close(self) -> None:
241 |         self.acquire()
242 |         try:
243 |             super().close()
244 |             if self._queue is not None:
245 |                 self._queue.close()
246 |         finally:
247 |             self.release()
248 | 


--------------------------------------------------------------------------------
/kafkaesk/ext/logging/record.py:
--------------------------------------------------------------------------------
 1 | from types import TracebackType
 2 | from typing import List
 3 | from typing import Optional
 4 | from typing import Tuple
 5 | from typing import Union
 6 | 
 7 | import logging
 8 | import pydantic
 9 | 
10 | 
11 | class PydanticLogRecord(logging.LogRecord):
12 |     def __init__(
13 |         self,
14 |         name: str,
15 |         level: int,
16 |         fn: str,
17 |         lno: int,
18 |         msg: str,
19 |         args: Tuple,
20 |         exc_info: Union[
21 |             Tuple[type, BaseException, Optional[TracebackType]], Tuple[None, None, None], None
22 |         ],
23 |         func: Optional[str] = None,
24 |         sinfo: Optional[str] = None,
25 |         pydantic_data: Optional[List[pydantic.BaseModel]] = None,
26 |     ):
27 |         super().__init__(name, level, fn, lno, msg, args, exc_info, func, sinfo)
28 | 
29 |         self.pydantic_data = pydantic_data or []
30 |         self.exc_type: Optional[str] = None
31 |         self.stack_text: Optional[str] = None
32 | 
33 | 
34 | def factory(
35 |     name: str,
36 |     level: int,
37 |     fn: str,
38 |     lno: int,
39 |     msg: str,
40 |     args: Tuple,
41 |     exc_info: Union[
42 |         Tuple[type, BaseException, Optional[TracebackType]], Tuple[None, None, None], None
43 |     ],
44 |     func: Optional[str] = None,
45 |     sinfo: Optional[str] = None,
46 | ) -> PydanticLogRecord:
47 |     pydantic_data: List[pydantic.BaseModel] = []
48 | 
49 |     new_args = []
50 |     for arg in args:
51 |         if isinstance(arg, pydantic.BaseModel):
52 |             if hasattr(arg, "_is_log_model") and getattr(arg, "_is_log_model", False) is True:
53 |                 pydantic_data.append(arg)
54 |                 continue
55 |         new_args.append(arg)
56 | 
57 |     args = tuple(new_args)
58 | 
59 |     record = PydanticLogRecord(
60 |         name, level, fn, lno, msg, args, exc_info, func, sinfo, pydantic_data
61 |     )
62 | 
63 |     return record
64 | 
65 | 
66 | if logging.getLogRecordFactory() != factory:
67 |     logging.setLogRecordFactory(factory)
68 | 


--------------------------------------------------------------------------------
/kafkaesk/kafka.py:
--------------------------------------------------------------------------------
  1 | from .metrics import watch_kafka
  2 | from aiokafka import TopicPartition
  3 | from kafkaesk.utils import run_async
  4 | from typing import Any
  5 | from typing import Dict
  6 | from typing import List
  7 | from typing import Optional
  8 | from typing import Tuple
  9 | 
 10 | import kafka
 11 | import kafka.admin
 12 | import kafka.admin.client
 13 | import kafka.errors
 14 | import kafka.structs
 15 | 
 16 | 
 17 | class KafkaTopicManager:
 18 |     _admin_client: Optional[kafka.admin.client.KafkaAdminClient] = None
 19 |     _client: Optional[kafka.KafkaClient] = None
 20 |     _kafka_api_version: Optional[Tuple[int, ...]] = None
 21 | 
 22 |     def __init__(
 23 |         self,
 24 |         bootstrap_servers: List[str],
 25 |         prefix: str = "",
 26 |         replication_factor: Optional[int] = None,
 27 |         kafka_api_version: str = "auto",
 28 |         ssl_context: Optional[Any] = None,
 29 |         security_protocol: Optional[str] = "PLAINTEXT",
 30 |         sasl_mechanism: Optional[str] = "",
 31 |         sasl_plain_username: Optional[str] = "",
 32 |         sasl_plain_password: Optional[str] = "",
 33 |     ):
 34 |         self.prefix = prefix
 35 |         self._bootstrap_servers = bootstrap_servers
 36 |         self._admin_client = self._client = None
 37 |         self._topic_cache: List[str] = []
 38 |         self._replication_factor: int = replication_factor or min(3, len(self._bootstrap_servers))
 39 |         if kafka_api_version == "auto":
 40 |             self._kafka_api_version = None
 41 |         else:
 42 |             self._kafka_api_version = tuple([int(v) for v in kafka_api_version.split(".")])
 43 |         self.ssl_context = ssl_context
 44 |         self.security_protocol = security_protocol
 45 |         self.sasl_mechanism = sasl_mechanism
 46 |         self.sasl_plain_username = sasl_plain_username
 47 |         self.sasl_plain_password = sasl_plain_password
 48 | 
 49 |     @property
 50 |     def kafka_api_version(self) -> Optional[Tuple[int, ...]]:
 51 |         return self._kafka_api_version
 52 | 
 53 |     async def finalize(self) -> None:
 54 |         if self._admin_client is not None:
 55 |             await run_async(self._admin_client.close)
 56 |             self._admin_client = None
 57 |         if self._client is not None:
 58 |             await run_async(self._client.close)
 59 |             self._client = None
 60 | 
 61 |     def get_topic_id(self, topic: str) -> str:
 62 |         return f"{self.prefix}{topic}"
 63 | 
 64 |     async def get_admin_client(self) -> kafka.admin.client.KafkaAdminClient:
 65 |         if self._admin_client is None:
 66 |             with watch_kafka("sync_admin_connect"):
 67 |                 self._admin_client = await run_async(
 68 |                     kafka.admin.client.KafkaAdminClient,
 69 |                     bootstrap_servers=self._bootstrap_servers,
 70 |                     api_version=self._kafka_api_version,
 71 |                     ssl_context=self.ssl_context,
 72 |                     security_protocol=self.security_protocol,
 73 |                     sasl_mechanism=self.sasl_mechanism,
 74 |                     sasl_plain_username=self.sasl_plain_username,
 75 |                     sasl_plain_password=self.sasl_plain_password,
 76 |                 )
 77 |         return self._admin_client
 78 | 
 79 |     async def list_consumer_group_offsets(
 80 |         self, group_id: str, partitions: Optional[List[TopicPartition]] = None
 81 |     ) -> Dict[kafka.structs.TopicPartition, kafka.structs.OffsetAndMetadata]:
 82 |         client = await self.get_admin_client()
 83 |         return await run_async(client.list_consumer_group_offsets, group_id, partitions=partitions)
 84 | 
 85 |     async def topic_exists(self, topic: str) -> bool:
 86 |         if self._client is None:
 87 |             with watch_kafka("sync_consumer_connect"):
 88 |                 self._client = await run_async(
 89 |                     kafka.KafkaConsumer,
 90 |                     bootstrap_servers=self._bootstrap_servers,
 91 |                     enable_auto_commit=False,
 92 |                     api_version=self._kafka_api_version,
 93 |                     ssl_context=self.ssl_context,
 94 |                     security_protocol=self.security_protocol,
 95 |                     sasl_mechanism=self.sasl_mechanism,
 96 |                     sasl_plain_username=self.sasl_plain_username,
 97 |                     sasl_plain_password=self.sasl_plain_password,
 98 |                 )
 99 |         if topic in self._topic_cache:
100 |             return True
101 |         with watch_kafka("sync_topics"):
102 |             if topic in await run_async(self._client.topics):
103 |                 self._topic_cache.append(topic)
104 |                 return True
105 |         return False
106 | 
107 |     async def create_topic(
108 |         self,
109 |         topic: str,
110 |         *,
111 |         partitions: int = 7,
112 |         replication_factor: Optional[int] = None,
113 |         retention_ms: Optional[int] = None,
114 |     ) -> None:
115 |         topic_configs: Dict[str, Any] = {}
116 |         if retention_ms is not None:
117 |             topic_configs["retention.ms"] = retention_ms
118 |         new_topic = kafka.admin.NewTopic(
119 |             topic,
120 |             partitions,
121 |             replication_factor or self._replication_factor,
122 |             topic_configs=topic_configs,
123 |         )
124 |         client = await self.get_admin_client()
125 |         try:
126 |             with watch_kafka("sync_create_topics"):
127 |                 await run_async(client.create_topics, [new_topic])
128 |         except kafka.errors.TopicAlreadyExistsError:
129 |             pass
130 |         self._topic_cache.append(topic)
131 |         return None
132 | 


--------------------------------------------------------------------------------
/kafkaesk/metrics.py:
--------------------------------------------------------------------------------
  1 | from prometheus_client.utils import INF
  2 | from typing import Dict
  3 | from typing import Optional
  4 | from typing import Type
  5 | 
  6 | import prometheus_client as client
  7 | import time
  8 | import traceback
  9 | 
 10 | NOERROR = "none"
 11 | ERROR_GENERAL_EXCEPTION = "exception"
 12 | 
 13 | KAFKA_ACTION = client.Counter(
 14 |     "kafkaesk_kafka_action",
 15 |     "Perform action on kafka",
 16 |     ["type", "error"],
 17 | )
 18 | 
 19 | KAFKA_ACTION_TIME = client.Histogram(
 20 |     "kafkaesk_kafka_action_time",
 21 |     "Time taken to perform kafka action",
 22 |     ["type"],
 23 | )
 24 | 
 25 | PUBLISH_MESSAGES = client.Counter(
 26 |     "kafkaesk_publish_messages",
 27 |     "Number of messages attempted to be published",
 28 |     ["stream_id", "error"],
 29 | )
 30 | 
 31 | PUBLISH_MESSAGES_TIME = client.Histogram(
 32 |     "kafkaesk_publish_messages_time",
 33 |     "Time taken for a message to be queued for publishing (in seconds)",
 34 |     ["stream_id"],
 35 | )
 36 | 
 37 | PUBLISHED_MESSAGES = client.Counter(
 38 |     "kafkaesk_published_messages",
 39 |     "Number of published messages",
 40 |     ["stream_id", "partition", "error"],
 41 | )
 42 | 
 43 | PUBLISHED_MESSAGES_TIME = client.Histogram(
 44 |     "kafkaesk_published_messages_time",
 45 |     "Time taken for a message to be published (in seconds)",
 46 |     ["stream_id"],
 47 | )
 48 | 
 49 | 
 50 | CONSUMED_MESSAGES = client.Counter(
 51 |     "kafkaesk_consumed_messages",
 52 |     "Number of consumed messages",
 53 |     ["stream_id", "partition", "error", "group_id"],
 54 | )
 55 | 
 56 | CONSUMED_MESSAGES_BATCH_SIZE = client.Histogram(
 57 |     "kafkaesk_consumed_messages_batch_size",
 58 |     "Size of message batches consumed",
 59 |     ["stream_id", "group_id", "partition"],
 60 |     buckets=[1, 5, 10, 20, 50, 100, 200, 500, 1000],
 61 | )
 62 | 
 63 | CONSUMED_MESSAGE_TIME = client.Histogram(
 64 |     "kafkaesk_consumed_message_elapsed_time",
 65 |     "Processing time for consumed message (in seconds)",
 66 |     ["stream_id", "group_id", "partition"],
 67 | )
 68 | 
 69 | PRODUCER_TOPIC_OFFSET = client.Gauge(
 70 |     "kafkaesk_produced_topic_offset",
 71 |     "Offset for produced messages a the topic",
 72 |     ["stream_id", "partition"],
 73 | )
 74 | 
 75 | CONSUMER_TOPIC_OFFSET = client.Gauge(
 76 |     "kafkaesk_consumed_topic_offset",
 77 |     "Offset for consumed messages in a topic",
 78 |     ["group_id", "partition", "stream_id"],
 79 | )
 80 | 
 81 | MESSAGE_LEAD_TIME = client.Histogram(
 82 |     "kafkaesk_message_lead_time",
 83 |     "Time that the message has been waiting to be handled by a consumer (in seconds)",
 84 |     ["stream_id", "group_id", "partition"],
 85 |     buckets=(0.1, 0.5, 1, 3, 5, 10, 30, 60, 60, 120, 300, INF),
 86 | )
 87 | 
 88 | CONSUMER_REBALANCED = client.Counter(
 89 |     "kafkaesk_consumer_rebalanced",
 90 |     "Consumer rebalances",
 91 |     ["group_id", "partition", "event"],
 92 | )
 93 | 
 94 | CONSUMER_HEALTH = client.Gauge(
 95 |     "kafkaesk_consumer_health", "Liveness probe for the consumer", ["group_id"]
 96 | )
 97 | 
 98 | 
 99 | class watch:
100 |     start: float
101 | 
102 |     def __init__(
103 |         self,
104 |         *,
105 |         counter: Optional[client.Counter] = None,
106 |         histogram: Optional[client.Histogram] = None,
107 |         labels: Optional[Dict[str, str]] = None,
108 |     ):
109 |         self.counter = counter
110 |         self.histogram = histogram
111 |         self.labels = labels or {}
112 | 
113 |     def __enter__(self) -> None:
114 |         self.start = time.time()
115 | 
116 |     def __exit__(
117 |         self,
118 |         exc_type: Optional[Type[Exception]] = None,
119 |         exc_value: Optional[Exception] = None,
120 |         exc_traceback: Optional[traceback.StackSummary] = None,
121 |     ) -> None:
122 |         error = NOERROR
123 |         if self.histogram is not None:
124 |             finished = time.time()
125 |             self.histogram.labels(**self.labels).observe(finished - self.start)
126 | 
127 |         if self.counter is not None:
128 |             if exc_value is None:
129 |                 error = NOERROR
130 |             else:
131 |                 error = ERROR_GENERAL_EXCEPTION
132 |             self.counter.labels(error=error, **self.labels).inc()
133 | 
134 | 
135 | class watch_kafka(watch):
136 |     def __init__(self, type: str):
137 |         super().__init__(counter=KAFKA_ACTION, histogram=KAFKA_ACTION_TIME, labels={"type": type})
138 | 
139 | 
140 | class watch_publish(watch):
141 |     def __init__(self, stream_id: str):
142 |         super().__init__(
143 |             counter=PUBLISH_MESSAGES,
144 |             histogram=PUBLISH_MESSAGES_TIME,
145 |             labels={"stream_id": stream_id},
146 |         )
147 | 


--------------------------------------------------------------------------------
/kafkaesk/publish.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/onna/kafkaesk/10e88fd921fddff70b8cb973e739e280caa4cac8/kafkaesk/publish.py


--------------------------------------------------------------------------------
/kafkaesk/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/onna/kafkaesk/10e88fd921fddff70b8cb973e739e280caa4cac8/kafkaesk/py.typed


--------------------------------------------------------------------------------
/kafkaesk/utils.py:
--------------------------------------------------------------------------------
 1 | from concurrent.futures.thread import ThreadPoolExecutor
 2 | from functools import partial
 3 | from typing import Any
 4 | from typing import Callable
 5 | 
 6 | import asyncio
 7 | 
 8 | executor = ThreadPoolExecutor(max_workers=30)
 9 | 
10 | 
11 | async def run_async(func: Callable[..., Any], *args: Any, **kwargs: Any) -> Any:
12 |     func_to_run = partial(func, *args, **kwargs)
13 |     loop = asyncio.get_event_loop()
14 |     return await loop.run_in_executor(executor, func_to_run)
15 | 
16 | 
17 | def resolve_dotted_name(name: str) -> Any:
18 |     """
19 |     import the provided dotted name
20 |     >>> resolve_dotted_name('foo.bar')
21 |     <object bar>
22 |     :param name: dotted name
23 |     """
24 |     names = name.split(".")
25 |     used = names.pop(0)
26 |     found = __import__(used)
27 |     for n in names:
28 |         used += "." + n
29 |         try:
30 |             found = getattr(found, n)
31 |         except AttributeError:
32 |             __import__(used)
33 |             found = getattr(found, n)
34 | 
35 |     return found
36 | 


--------------------------------------------------------------------------------
/mypy.ini:
--------------------------------------------------------------------------------
 1 | [mypy]
 2 | namespace_packages=True
 3 | mypy_path = stubs
 4 | follow_imports_for_stubs = True
 5 | disallow_incomplete_defs = True
 6 | check_untyped_defs = True
 7 | disallow_untyped_calls = True
 8 | disallow_untyped_defs = True
 9 | disallow_untyped_decorators = True
10 | disable_error_code=empty-body,assignment,unused-coroutine,var-annotated,arg-type
11 | 
12 | [mypy-aiohttp_client]
13 | ignore_missing_imports = True
14 | [mypy-opentracing.*]
15 | ignore_missing_imports = True
16 | 
17 | # test ignores
18 | [mypy-pytest]
19 | ignore_missing_imports = True
20 | [mypy-asynctest]
21 | ignore_missing_imports = True
22 | [mypy-pytest_docker_fixtures]
23 | ignore_missing_imports = True
24 | [mypy-prometheus_client.*]
25 | ignore_missing_imports = True
26 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "kafkaesk"
 3 | version = "0.8.5"
 4 | description = "Easy publish and subscribe to events with python and Kafka."
 5 | authors = ["vangheem <vangheem@gmail.com>", "pfreixes <pfreixes@gmail.com>"]
 6 | classifiers = [
 7 |     "Programming Language :: Python :: 3.7",
 8 |     "Programming Language :: Python :: 3.8",
 9 |     "Programming Language :: Python :: 3.9",
10 |     "Programming Language :: Python :: 3.10",
11 |     "Framework :: AsyncIO",
12 |     "License :: OSI Approved :: BSD License",
13 |     "Topic :: System :: Distributed Computing"
14 | ]
15 | readme = "README.md"
16 | 
17 | 
18 | [tool.poetry.dependencies]
19 | python = ">=3.8.1"
20 | aiokafka = ">=0.7.1"
21 | kafka-python = "^2.0.2"
22 | pydantic = ">=1.5.1"
23 | orjson = ">=3.3.1"
24 | jsonschema = ">=3.2.0"
25 | prometheus_client = ">=0.8.0"
26 | opentracing = ">=2.3.0"
27 | async-timeout = ">=3.0.1"
28 | 
29 | [tool.poetry.dev-dependencies]
30 | pytest = "^7.4.0"
31 | pytest-docker-fixtures = "^1.3.17"
32 | pytest-asyncio = "^0.21.0"
33 | mypy = "1.0.0"
34 | flake8 = "^6.0.0"
35 | isort = "^5.12.0"
36 | 
37 | black = "^23.3.0"
38 | pytest-rerunfailures = "^11.1.2"
39 | pytest-cov = "^4.1.0"
40 | pdbpp = "^0.10.3"
41 | python-language-server = "^0.36.2"
42 | jaeger-client = "4.7.0"
43 | 
44 | [tool.poetry.scripts]
45 | kafkaesk = 'kafkaesk.app:run'
46 | 
47 | 
48 | [tool.poetry.urls]
49 | "GitHub" = "https://github.com/onna/kafkaesk"
50 | 
51 | [tool.black]
52 | line-length = 100
53 | target-version = ['py37']
54 | include = '\.pyi?$'
55 | exclude = '''
56 | 
57 | (
58 |   /(
59 |       \.eggs         # exclude a few common directories in the
60 |     | \.git          # root of the project
61 |     | \.hg
62 |     | \.mypy_cache
63 |     | \.tox
64 |     | \.venv
65 |     | _build
66 |     | buck-out
67 |     | build
68 |     | dist
69 |   )/
70 |   | foo.py           # also separately exclude a file named foo.py in
71 |                      # the root of the project
72 | )
73 | '''
74 | [build-system]
75 | requires = ["poetry>=0.12"]
76 | build-backend = "poetry.masonry.api"
77 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | addopts = -p no:warnings


--------------------------------------------------------------------------------
/stubs/aiokafka/__init__.py:
--------------------------------------------------------------------------------
  1 | from asyncio.events import AbstractEventLoop
  2 | from collections import namedtuple
  3 | from kafka.structs import ConsumerRecord
  4 | from kafka.structs import TopicPartition
  5 | from typing import Any
  6 | from typing import AsyncIterator
  7 | from typing import Awaitable
  8 | from typing import Dict
  9 | from typing import List
 10 | from typing import Optional
 11 | from typing import Set
 12 | from typing import Tuple
 13 | 
 14 | 
 15 | class AIOKafkaProducer:
 16 |     _sender: Any
 17 | 
 18 |     def __init__(
 19 |         self,
 20 |         bootstrap_servers: List[str],
 21 |         loop: AbstractEventLoop,
 22 |         enable_auto_commit: Optional[bool] = True,
 23 |         group_id: Optional[str] = None,
 24 |         api_version: str = "auto",
 25 |     ):
 26 |         ...
 27 | 
 28 |     async def send(
 29 |         self,
 30 |         topic_id: str,
 31 |         value: bytes,
 32 |         key: Optional[bytes] = None,
 33 |         headers: Optional[List[Tuple[str, bytes]]] = None,
 34 |     ) -> Awaitable[ConsumerRecord]:
 35 |         ...
 36 | 
 37 |     async def start(self) -> None:
 38 |         ...
 39 | 
 40 |     async def stop(self) -> None:
 41 |         ...
 42 | 
 43 |     async def flush(self) -> None:
 44 |         ...
 45 | 
 46 | 
 47 | class AIOKafkaClient:
 48 |     async def ready(self, node_id: str, *, group: Optional[str] = None) -> bool:
 49 |         ...
 50 | 
 51 | 
 52 | class GroupCoordinator:
 53 |     coordinator_id: str
 54 | 
 55 |     def request_rejoin(self) -> None:
 56 |         ...
 57 | 
 58 |     def need_rejoin(self, subscription: "Subscription") -> bool:
 59 |         ...
 60 | 
 61 |     async def ensure_coordinator_known(self) -> None:
 62 |         ...
 63 | 
 64 |     async def ensure_active_group(self, subscription: Any, assignment: Any) -> None:
 65 |         ...
 66 | 
 67 | 
 68 | class Subscription:
 69 |     subscription: Any
 70 | 
 71 |     async def wait_for_subscription(self) -> None:
 72 |         ...
 73 | 
 74 |     async def partitions_auto_assigned(self) -> bool:
 75 |         ...
 76 | 
 77 | 
 78 | class AIOKafkaConsumer:
 79 |     _client: AIOKafkaClient
 80 |     _coordinator: GroupCoordinator
 81 |     _subscription: Subscription
 82 |     _group_id: Optional[str] = None
 83 | 
 84 |     def __init__(
 85 |         self,
 86 |         bootstrap_servers: List[str],
 87 |         loop: AbstractEventLoop,
 88 |         group_id: Optional[str] = None,
 89 |         api_version: str = "auto",
 90 |         **kwargs: Any,
 91 |     ):
 92 |         ...
 93 | 
 94 |     async def getone(self, *partitions: Optional[List[TopicPartition]]) -> ConsumerRecord:
 95 |         ...
 96 | 
 97 |     async def subscribe(
 98 |         self,
 99 |         topics: Optional[List[str]] = None,
100 |         pattern: Optional[str] = None,
101 |         listener: Optional["ConsumerRebalanceListener"] = None,
102 |     ) -> None:
103 |         ...
104 | 
105 |     async def start(self) -> None:
106 |         ...
107 | 
108 |     async def stop(self) -> None:
109 |         ...
110 | 
111 |     async def commit(self, offsets: Optional[Dict[TopicPartition, int]] = None) -> None:
112 |         ...
113 | 
114 |     def __aiter__(self) -> AsyncIterator[ConsumerRecord]:
115 |         ...
116 | 
117 |     async def __anext__(self) -> ConsumerRecord:
118 |         ...
119 | 
120 |     async def position(self, tp: TopicPartition) -> int:
121 |         ...
122 | 
123 |     async def seek(self, tp: TopicPartition, offset: int) -> None:
124 |         ...
125 | 
126 |     async def seek_to_beginning(self, tp: TopicPartition) -> None:
127 |         ...
128 | 
129 |     def assignment(self) -> Set[TopicPartition]:
130 |         ...
131 | 
132 |     async def getmany(
133 |         self, *partitions: TopicPartition, timeout_ms: int = 0, max_records: int = None
134 |     ) -> Dict[TopicPartition, List[ConsumerRecord]]:
135 |         ...
136 | 
137 | 
138 | class ConsumerRebalanceListener:
139 |     async def on_partitions_revoked(self, revoked: List[TopicPartition]) -> None:
140 |         ...
141 | 
142 |     async def on_partitions_assigned(self, assigned: List[TopicPartition]) -> None:
143 |         ...
144 | 
145 | 
146 | OffsetAndMetadata = namedtuple(
147 |     "OffsetAndMetadata",
148 |     # TODO add leaderEpoch: OffsetAndMetadata(offset, leaderEpoch, metadata)
149 |     ["offset", "metadata"],
150 | )
151 | 


--------------------------------------------------------------------------------
/stubs/aiokafka/errors.py:
--------------------------------------------------------------------------------
 1 | class KafkaError(Exception):
 2 |     ...
 3 | 
 4 | 
 5 | class NodeNotReadyError(KafkaError):
 6 |     ...
 7 | 
 8 | 
 9 | class RequestTimedOutError(KafkaError):
10 |     ...
11 | 
12 | 
13 | class ConsumerStoppedError(KafkaError):
14 |     ...
15 | 
16 | 
17 | class IllegalStateError(KafkaError):
18 |     ...
19 | 
20 | 
21 | class UnrecognizedBrokerVersion(KafkaError):
22 |     ...
23 | 
24 | 
25 | class KafkaConnectionError(KafkaError):
26 |     ...
27 | 
28 | 
29 | class CommitFailedError(KafkaError):
30 |     ...
31 | 


--------------------------------------------------------------------------------
/stubs/aiokafka/structs.py:
--------------------------------------------------------------------------------
1 | from kafka.structs import ConsumerRecord  # noqa
2 | from kafka.structs import TopicPartition
3 | 


--------------------------------------------------------------------------------
/stubs/kafka/__init__.py:
--------------------------------------------------------------------------------
 1 | from kafka.structs import TopicPartition
 2 | from typing import Dict
 3 | from typing import List
 4 | from typing import Optional
 5 | 
 6 | 
 7 | class KafkaConsumer:
 8 |     def __init__(
 9 |         self,
10 |         bootstrap_servers: List[str],
11 |         enable_auto_commit: Optional[bool] = True,
12 |         group_id: Optional[str] = None,
13 |     ):
14 |         ...
15 | 
16 |     def assign(self, parts: List[TopicPartition]) -> None:
17 |         ...
18 | 
19 |     def seek_to_beginning(self, partition: TopicPartition) -> None:
20 |         ...
21 | 
22 |     def end_offsets(self, parts: List[TopicPartition]) -> Dict[TopicPartition, int]:
23 |         ...
24 | 
25 | 
26 | class KafkaClient:
27 |     topic_partitions: Dict[str, List[int]]
28 | 
29 |     def close(self) -> None:
30 |         ...
31 | 
32 |     def topics(self) -> List[str]:
33 |         ...
34 | 


--------------------------------------------------------------------------------
/stubs/kafka/admin/__init__.py:
--------------------------------------------------------------------------------
1 | from .client import NewTopic  # noqa
2 | 


--------------------------------------------------------------------------------
/stubs/kafka/admin/client.py:
--------------------------------------------------------------------------------
 1 | from typing import Any
 2 | from typing import Dict
 3 | from typing import List
 4 | 
 5 | import kafka.structs
 6 | 
 7 | 
 8 | class NewTopic:
 9 |     def __init__(
10 |         self, topic: str, partitions: int, replication_factor: int, topic_configs: Dict[str, Any]
11 |     ):
12 |         ...
13 | 
14 | 
15 | class KafkaAdminClient:
16 |     def create_topics(self, topics: List[NewTopic]) -> None:
17 |         ...
18 | 
19 |     def close(self) -> None:
20 |         ...
21 | 
22 |     def list_consumer_group_offsets(
23 |         self, group_id: str
24 |     ) -> Dict[kafka.structs.TopicPartition, kafka.structs.OffsetAndMetadata]:
25 |         ...
26 | 


--------------------------------------------------------------------------------
/stubs/kafka/errors.py:
--------------------------------------------------------------------------------
 1 | class TopicAlreadyExistsError(Exception):
 2 |     ...
 3 | 
 4 | 
 5 | class CommitFailedError(Exception):
 6 |     ...
 7 | 
 8 | 
 9 | class IllegalStateError(Exception):
10 |     ...
11 | 


--------------------------------------------------------------------------------
/stubs/kafka/structs.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | from typing import Optional
 3 | from typing import Tuple
 4 | 
 5 | 
 6 | class ConsumerRecord:
 7 |     partition: int
 8 |     offset: int
 9 |     topic: str
10 |     value: bytes
11 |     key: bytes
12 |     headers: Optional[List[Tuple[str, bytes]]] = None
13 | 
14 | 
15 | class TopicPartition:
16 |     topic: str
17 |     partition: int
18 | 
19 |     def __init__(self, topic: str, partition: int):
20 |         ...
21 | 
22 | 
23 | class OffsetAndMetadata:
24 |     offset: int
25 |     metadata: str
26 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/onna/kafkaesk/10e88fd921fddff70b8cb973e739e280caa4cac8/tests/__init__.py


--------------------------------------------------------------------------------
/tests/acceptance/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/onna/kafkaesk/10e88fd921fddff70b8cb973e739e280caa4cac8/tests/acceptance/__init__.py


--------------------------------------------------------------------------------
/tests/acceptance/ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/onna/kafkaesk/10e88fd921fddff70b8cb973e739e280caa4cac8/tests/acceptance/ext/__init__.py


--------------------------------------------------------------------------------
/tests/acceptance/ext/logging/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/onna/kafkaesk/10e88fd921fddff70b8cb973e739e280caa4cac8/tests/acceptance/ext/logging/__init__.py


--------------------------------------------------------------------------------
/tests/acceptance/ext/logging/test_handler.py:
--------------------------------------------------------------------------------
  1 | from kafkaesk.ext.logging.handler import KafkaeskQueue
  2 | from kafkaesk.ext.logging.handler import PydanticKafkaeskHandler
  3 | from kafkaesk.ext.logging.handler import PydanticLogModel
  4 | from kafkaesk.ext.logging.handler import PydanticStreamHandler
  5 | from typing import Optional
  6 | from unittest.mock import MagicMock
  7 | from unittest.mock import Mock
  8 | from unittest.mock import patch
  9 | 
 10 | import asyncio
 11 | import io
 12 | import kafkaesk
 13 | import logging
 14 | import pydantic
 15 | import pytest
 16 | import pytest_asyncio
 17 | import time
 18 | import uuid
 19 | 
 20 | pytestmark = pytest.mark.asyncio
 21 | 
 22 | 
 23 | @pytest_asyncio.fixture(scope="function")
 24 | def logger():
 25 |     ll = logging.getLogger("test")
 26 |     ll.propagate = False
 27 |     ll.setLevel(logging.DEBUG)
 28 | 
 29 |     return ll
 30 | 
 31 | 
 32 | @pytest_asyncio.fixture(scope="function")
 33 | def stream_handler(logger):
 34 |     stream = io.StringIO()
 35 |     handler = PydanticStreamHandler(stream=stream)
 36 |     logger.addHandler(handler)
 37 | 
 38 |     return stream
 39 | 
 40 | 
 41 | @pytest_asyncio.fixture(scope="function")
 42 | def kafakesk_handler(app, logger):
 43 |     handler = PydanticKafkaeskHandler(app, "log.test")
 44 |     logger.addHandler(handler)
 45 | 
 46 |     return handler
 47 | 
 48 | 
 49 | async def test_handler_initializes_applogger(kafka, logger):
 50 |     app = kafkaesk.Application(
 51 |         [f"{kafka[0]}:{kafka[1]}"],
 52 |         topic_prefix=uuid.uuid4().hex,
 53 |         kafka_settings={"metadata_max_age_ms": 500},
 54 |     )
 55 | 
 56 |     handler = PydanticKafkaeskHandler(app, "log.test")
 57 |     logger.addHandler(handler)
 58 | 
 59 |     logger.error("Hi!")
 60 | 
 61 |     await asyncio.sleep(0.1)
 62 |     assert app._initialized
 63 | 
 64 | 
 65 | @pytest_asyncio.fixture(scope="function")
 66 | def log_consumer(app):
 67 |     consumed = []
 68 | 
 69 |     @app.subscribe("log.test", group="test_group")
 70 |     async def consume(data: PydanticLogModel):
 71 |         consumed.append(data)
 72 | 
 73 |     yield consumed
 74 | 
 75 | 
 76 | class TestPydanticStreamHandler:
 77 |     async def test_stream_handler(self, stream_handler, logger):
 78 |         logger.info("Test Message %s", "extra")
 79 | 
 80 |         message = stream_handler.getvalue()
 81 | 
 82 |         assert "Test Message extra" in message
 83 | 
 84 |     async def test_stream_handler_with_log_model(self, stream_handler, logger):
 85 |         class LogModel(pydantic.BaseModel):
 86 |             _is_log_model = True
 87 |             foo: Optional[str] = None
 88 | 
 89 |         logger.info("Test Message %s", "extra", LogModel(foo="bar"))
 90 | 
 91 |         message = stream_handler.getvalue()
 92 | 
 93 |         assert "Test Message extra" in message
 94 |         assert "foo=bar" in message
 95 | 
 96 |     async def test_stream_handler_with_log_model_shortens_log_messae(self, stream_handler, logger):
 97 |         class LogModel(pydantic.BaseModel):
 98 |             _is_log_model = True
 99 |             foo: str
100 |             bar: str
101 | 
102 |         logger.info("Test Message %s", "extra", LogModel(foo="X" * 256, bar="Y" * 256))
103 | 
104 |         message = stream_handler.getvalue()
105 | 
106 |         assert "Test Message extra" in message
107 |         assert f"foo={'X' * 256}" in message
108 |         assert f"bar={'Y' * 256}" not in message
109 | 
110 | 
111 | class TestPydanticKafkaeskHandler:
112 |     async def test_kafka_handler(self, app, kafakesk_handler, logger, log_consumer):
113 |         async with app:
114 |             logger.info("Test Message %s", "extra")
115 |             await app.flush()
116 |             await app.consume_for(1, seconds=8)
117 | 
118 |         assert len(log_consumer) == 1
119 |         assert log_consumer[0].message == "Test Message extra"
120 | 
121 |     async def test_kafka_handler_with_log_model(self, app, kafakesk_handler, logger, log_consumer):
122 |         class LogModel(pydantic.BaseModel):
123 |             _is_log_model = True
124 |             foo: Optional[str] = None
125 | 
126 |         async with app:
127 |             logger.info("Test Message %s", "extra", LogModel(foo="bar"))
128 |             await app.flush()
129 |             await app.consume_for(1, seconds=8)
130 | 
131 |         assert len(log_consumer) == 1
132 |         assert log_consumer[0].message == "Test Message extra"
133 |         assert log_consumer[0].foo == "bar"
134 | 
135 |     def test_emit_std_output_queue_full(self):
136 |         queue = MagicMock()
137 |         with patch("kafkaesk.ext.logging.handler.KafkaeskQueue", return_value=queue), patch(
138 |             "kafkaesk.ext.logging.handler.sys.stderr.write"
139 |         ) as std_write:
140 |             queue.put_nowait.side_effect = asyncio.QueueFull
141 |             handler = PydanticKafkaeskHandler(MagicMock(), "foo")
142 |             record = Mock()
143 |             record.pydantic_data = []
144 |             handler.emit(record)
145 |             std_write.assert_called_once()
146 | 
147 |     def test_emit_limits_std_output_queue_full(self):
148 |         queue = MagicMock()
149 |         with patch("kafkaesk.ext.logging.handler.KafkaeskQueue", return_value=queue), patch(
150 |             "kafkaesk.ext.logging.handler.sys.stderr.write"
151 |         ) as std_write:
152 |             queue.put_nowait.side_effect = asyncio.QueueFull
153 |             handler = PydanticKafkaeskHandler(MagicMock(), "foo")
154 |             handler._last_warning_sent = time.time() + 1
155 |             record = Mock()
156 |             record.pydantic_data = []
157 |             handler.emit(record)
158 |             std_write.assert_not_called()
159 | 
160 |     def test_clone(self):
161 |         handler = PydanticKafkaeskHandler(MagicMock(), "foo")
162 |         handler2 = handler.clone()
163 |         assert handler != handler2
164 |         assert handler.app == handler2.app
165 |         assert handler._queue == handler2._queue
166 | 
167 |     def test_emit_drops_message_on_runtime_error_start(self):
168 |         queue = MagicMock()
169 |         with patch("kafkaesk.ext.logging.handler.KafkaeskQueue", return_value=queue), patch(
170 |             "kafkaesk.ext.logging.handler.sys.stderr.write"
171 |         ) as std_write:
172 |             queue.running = False
173 |             queue.start.side_effect = RuntimeError
174 |             handler = PydanticKafkaeskHandler(MagicMock(), "foo")
175 |             record = Mock()
176 |             record.pydantic_data = []
177 |             handler.emit(record)
178 |             std_write.assert_called_once()
179 | 
180 | 
181 | class TestKafkaeskQueue:
182 |     @pytest_asyncio.fixture(scope="function")
183 |     async def queue(self, request, app):
184 |         max_queue = 10000
185 |         for marker in request.node.iter_markers("with_max_queue"):
186 |             max_queue = marker.args[0]
187 | 
188 |         app.schema("PydanticLogModel")(PydanticLogModel)
189 |         q = KafkaeskQueue(app, max_queue=max_queue)
190 | 
191 |         return q
192 | 
193 |     async def test_queue(self, app, queue):
194 |         consumed = []
195 | 
196 |         @app.subscribe("log.test", group="test_group")
197 |         async def consume(data: PydanticLogModel):
198 |             consumed.append(data)
199 | 
200 |         async with app:
201 |             queue.start()
202 |             queue.put_nowait("log.test", PydanticLogModel(foo="bar"))
203 | 
204 |             await app.flush()
205 |             await app.consume_for(1, seconds=8)
206 | 
207 |         queue.close()
208 |         await queue._task
209 | 
210 |         assert len(consumed) == 1
211 | 
212 |     async def test_queue_flush(self, app, queue, log_consumer):
213 |         async with app:
214 |             queue.start()
215 |             for i in range(10):
216 |                 queue.put_nowait("log.test", PydanticLogModel(count=i))
217 | 
218 |             await queue.flush()
219 | 
220 |             await app.flush()
221 |             await app.consume_for(10, seconds=8)
222 | 
223 |         assert len(log_consumer) == 10
224 | 
225 |     async def test_queue_flush_on_close(self, app, queue, log_consumer):
226 |         async with app:
227 |             queue.start()
228 |             await asyncio.sleep(0.1)
229 |             queue.close()
230 | 
231 |             for i in range(10):
232 |                 queue.put_nowait("log.test", PydanticLogModel(count=i))
233 | 
234 |             await app.flush()
235 |             await app.consume_for(10, seconds=8)
236 | 
237 |         assert len(log_consumer) == 10
238 |         assert queue._task.done()
239 | 
240 |     @pytest.mark.with_max_queue(1)
241 |     async def test_queue_max_size(self, app, queue):
242 |         queue.start()
243 |         queue.put_nowait("log.test", PydanticLogModel())
244 | 
245 |         with pytest.raises(asyncio.QueueFull):
246 |             queue.put_nowait("log.test", PydanticLogModel())
247 | 


--------------------------------------------------------------------------------
/tests/acceptance/ext/logging/test_record.py:
--------------------------------------------------------------------------------
 1 | from kafkaesk.ext.logging.record import factory
 2 | from kafkaesk.ext.logging.record import PydanticLogRecord
 3 | from typing import Optional
 4 | 
 5 | import logging
 6 | import pydantic
 7 | import pytest
 8 | 
 9 | pytestmark = pytest.mark.asyncio
10 | 
11 | 
12 | async def test_factory_return_type() -> None:
13 |     record = factory(
14 |         name="logger.test",
15 |         level=logging.INFO,
16 |         fn="test_factory_retrun_type",
17 |         lno=4,
18 |         msg="Test Log",
19 |         args=(),
20 |         exc_info=None,
21 |         func=None,
22 |         sinfo=None,
23 |     )
24 | 
25 |     assert isinstance(record, PydanticLogRecord)
26 |     assert issubclass(PydanticLogRecord, logging.LogRecord)
27 | 
28 | 
29 | async def test_factory_adds_pydantic_models() -> None:
30 |     class LogModel(pydantic.BaseModel):
31 |         _is_log_model = True
32 |         foo: Optional[str] = None
33 | 
34 |     record = factory(
35 |         name="logger.test",
36 |         level=logging.INFO,
37 |         fn="test_factory_retrun_type",
38 |         lno=4,
39 |         msg="Test Log",
40 |         args=(LogModel(foo="bar"),),
41 |         exc_info=None,
42 |         func=None,
43 |         sinfo=None,
44 |     )
45 | 
46 |     assert len(record.pydantic_data) == 1
47 |     assert len(record.args) == 0
48 | 
49 | 
50 | async def test_factory_formats_msg() -> None:
51 |     record = factory(
52 |         name="logger.test",
53 |         level=logging.INFO,
54 |         fn="test_factory_retrun_type",
55 |         lno=4,
56 |         msg="Test Log %s",
57 |         args=("extra",),
58 |         exc_info=None,
59 |         func=None,
60 |         sinfo=None,
61 |     )
62 | 
63 |     assert record.getMessage() == "Test Log extra"
64 | 
65 | 
66 | async def test_factory_formats_msg_and_adds_pydantic_model() -> None:
67 |     class LogModel(pydantic.BaseModel):
68 |         _is_log_model = True
69 |         foo: Optional[str] = None
70 | 
71 |     record = factory(
72 |         name="logger.test",
73 |         level=logging.INFO,
74 |         fn="test_factory_retrun_type",
75 |         lno=4,
76 |         msg="Test Log %s",
77 |         args=("extra", LogModel(foo="bar")),
78 |         exc_info=None,
79 |         func=None,
80 |         sinfo=None,
81 |     )
82 | 
83 |     assert record.getMessage() == "Test Log extra"
84 |     assert len(record.pydantic_data) == 1
85 | 


--------------------------------------------------------------------------------
/tests/acceptance/produce.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import pydantic
 3 | 
 4 | 
 5 | class Foo(pydantic.BaseModel):
 6 |     foo: str
 7 | 
 8 | 
 9 | async def producer(app, topic):
10 |     while True:
11 |         try:
12 |             await app.publish(topic, Foo(foo="bar"))
13 |             await asyncio.sleep(0.05)
14 |         except asyncio.CancelledError:
15 |             return
16 | 


--------------------------------------------------------------------------------
/tests/acceptance/test_healthcheck.py:
--------------------------------------------------------------------------------
 1 | from aiokafka import ConsumerRecord
 2 | from kafkaesk import Application
 3 | from kafkaesk.exceptions import ConsumerUnhealthyException
 4 | from .produce import producer
 5 | from kafkaesk.exceptions import ProducerUnhealthyException
 6 | from kafkaesk.kafka import KafkaTopicManager
 7 | from unittest.mock import call
 8 | from unittest.mock import MagicMock
 9 | from unittest.mock import Mock
10 | from unittest.mock import patch
11 | 
12 | import aiokafka.structs
13 | import asyncio
14 | import pydantic
15 | import pytest
16 | import uuid
17 | 
18 | try:
19 |     from unittest.mock import AsyncMock
20 | except:  # noqa
21 |     AsyncMock = None  # type: ignore
22 | 
23 | pytestmark = pytest.mark.asyncio
24 | 
25 | TOPIC = "test-hc"
26 | 
27 | 
28 | async def test_health_check_should_fail_with_unhandled(app: Application):
29 |     @app.subscribe(TOPIC, group=TOPIC)
30 |     async def consume(data):
31 |         raise Exception("failure!")
32 | 
33 |     async with app:
34 |         produce = asyncio.create_task(producer(app, TOPIC))
35 |         fut = asyncio.create_task(app.consume_forever())
36 |         await fut
37 | 
38 |         with pytest.raises(ConsumerUnhealthyException):
39 |             await app.health_check()
40 | 
41 |         produce.cancel()
42 | 
43 | 
44 | async def test_health_check_should_succeed(app):
45 |     @app.subscribe(TOPIC, group=TOPIC)
46 |     async def consume(data):
47 |         ...
48 | 
49 |     async with app:
50 |         produce = asyncio.create_task(producer(app, TOPIC))
51 |         asyncio.create_task(app.consume_forever())
52 |         await asyncio.sleep(1)  # wait for some to produce and then be consumed to cause failure
53 |         await app.health_check()
54 |         produce.cancel()
55 | 


--------------------------------------------------------------------------------
/tests/acceptance/test_pubsub.py:
--------------------------------------------------------------------------------
  1 | from aiokafka import ConsumerRecord
  2 | from kafkaesk import Application
  3 | from kafkaesk.exceptions import ProducerUnhealthyException
  4 | from kafkaesk.kafka import KafkaTopicManager
  5 | from unittest.mock import call
  6 | from unittest.mock import MagicMock
  7 | from unittest.mock import Mock
  8 | from unittest.mock import patch
  9 | 
 10 | import aiokafka.structs
 11 | import asyncio
 12 | import pydantic
 13 | import pytest
 14 | import uuid
 15 | 
 16 | try:
 17 |     from unittest.mock import AsyncMock
 18 | except:  # noqa
 19 |     AsyncMock = None  # type: ignore
 20 | 
 21 | pytestmark = pytest.mark.asyncio
 22 | 
 23 | 
 24 | async def test_data_binding(app):
 25 |     consumed = []
 26 | 
 27 |     @app.schema("Foo", streams=["foo.bar"])
 28 |     class Foo(pydantic.BaseModel):
 29 |         bar: str
 30 | 
 31 |     @app.subscribe("foo.bar", group="test_group")
 32 |     async def consume(data: Foo, schema, record, app):
 33 |         consumed.append((data, schema, record, app))
 34 | 
 35 |     async with app:
 36 |         await app.publish_and_wait("foo.bar", Foo(bar="1"))
 37 |         await app.flush()
 38 |         await app.consume_for(1, seconds=10)
 39 | 
 40 |     assert len(consumed) == 1
 41 |     assert len(consumed[0]) == 4
 42 | 
 43 | 
 44 | async def test_consume_message(app):
 45 |     consumed = []
 46 | 
 47 |     @app.schema("Foo", streams=["foo.bar"])
 48 |     class Foo(pydantic.BaseModel):
 49 |         bar: str
 50 | 
 51 |     @app.subscribe("foo.bar", group="test_group")
 52 |     async def consume(data: Foo):
 53 |         consumed.append(data)
 54 | 
 55 |     async with app:
 56 |         await app.publish_and_wait("foo.bar", Foo(bar="1"))
 57 |         await app.flush()
 58 |         await app.consume_for(1, seconds=10)
 59 | 
 60 |     assert len(consumed) == 1
 61 | 
 62 | 
 63 | async def test_consume_many_messages(app):
 64 |     consumed = []
 65 | 
 66 |     @app.schema("Foo", streams=["foo.bar"])
 67 |     class Foo(pydantic.BaseModel):
 68 |         bar: str
 69 | 
 70 |     @app.subscribe("foo.bar", group="test_group")
 71 |     async def consume(data: Foo):
 72 |         consumed.append(data)
 73 | 
 74 |     async with app:
 75 |         fut = asyncio.create_task(app.consume_for(10, seconds=10))
 76 |         await asyncio.sleep(0.1)
 77 |         for idx in range(10):
 78 |             await app.publish("foo.bar", Foo(bar=str(idx)))
 79 |         await app.flush()
 80 |         await fut
 81 | 
 82 |     assert len(consumed) == 10
 83 | 
 84 | 
 85 | async def test_slow_messages(app: Application):
 86 |     consumed = []
 87 | 
 88 |     @app.schema("Slow", streams=["foo.bar"])
 89 |     class Slow(pydantic.BaseModel):
 90 |         latency: float
 91 | 
 92 |     @app.subscribe("foo.bar", group="test_group", concurrency=10, timeout_seconds=0.045)
 93 |     async def consumer(data: Slow, record: aiokafka.ConsumerRecord):
 94 |         try:
 95 |             await asyncio.sleep(data.latency)
 96 |             consumed.append(("ok", data.latency, record.topic))
 97 |         except asyncio.CancelledError:
 98 |             consumed.append(("cancelled", data.latency, record.topic))
 99 | 
100 |     async with app:
101 |         for idx in range(10):
102 |             await app.publish("foo.bar", Slow(latency=idx * 0.01))
103 |             await asyncio.sleep(0.01)
104 |         await app.flush()
105 | 
106 |         fut = asyncio.create_task(app.consume_for(num_messages=8, seconds=5))
107 |         await fut
108 | 
109 |         assert len([x for x in consumed if x[0] == "ok"]) == 5
110 |         assert len([x for x in consumed if x[0] == "cancelled"]) == 5
111 | 
112 | 
113 | async def test_not_consume_message_that_does_not_match(app):
114 |     consumed = []
115 | 
116 |     @app.schema("Foo", streams=["foo.bar"])
117 |     class Foo(pydantic.BaseModel):
118 |         bar: str
119 | 
120 |     @app.subscribe("foo.bar", group="test_group")
121 |     async def consume(data: Foo):
122 |         consumed.append(data)
123 | 
124 |     async with app:
125 |         await app.publish("foo.bar1", Foo(bar="1"))
126 |         await app.flush()
127 |         await app.consume_for(1, seconds=5)
128 | 
129 |     assert len(consumed) == 0
130 | 
131 | 
132 | async def test_subscribe_without_group(app):
133 |     @app.schema("Foo")
134 |     class Foo(pydantic.BaseModel):
135 |         bar: str
136 | 
137 |     with pytest.raises(TypeError):
138 | 
139 |         @app.subscribe("foo.bar")
140 |         async def consume(data: Foo):
141 |             ...
142 | 
143 | 
144 | async def test_multiple_subscribers_different_models(app):
145 |     consumed1 = []
146 |     consumed2 = []
147 | 
148 |     @app.schema("Foo", version=1, streams=["foo.bar"])
149 |     class Foo1(pydantic.BaseModel):
150 |         bar: str
151 | 
152 |     @app.schema("Foo", version=2)
153 |     class Foo2(pydantic.BaseModel):
154 |         foo: str
155 |         bar: str
156 | 
157 |     @app.subscribe(
158 |         "foo.bar",
159 |         group="test_group",
160 |     )
161 |     async def consume1(data: Foo1):
162 |         consumed1.append(data)
163 | 
164 |     @app.subscribe(
165 |         "foo.bar",
166 |         group="test_group_2",
167 |     )
168 |     async def consume2(data: Foo2):
169 |         consumed2.append(data)
170 | 
171 |     async with app:
172 |         fut = asyncio.create_task(app.consume_for(4, seconds=10))
173 |         await asyncio.sleep(0.2)
174 | 
175 |         await app.publish("foo.bar", Foo1(bar="1"))
176 |         await app.publish("foo.bar", Foo2(foo="2", bar="3"))
177 |         await app.flush()
178 |         await fut
179 | 
180 |     assert all([isinstance(v, Foo1) for v in consumed1])
181 |     assert all([isinstance(v, Foo2) for v in consumed2])
182 | 
183 | 
184 | async def test_subscribe_diff_data_types(app):
185 |     consumed_records = []
186 |     consumed_bytes = []
187 | 
188 |     @app.schema("Foo", version=1, streams=["foo.bar"])
189 |     class Foo(pydantic.BaseModel):
190 |         bar: str
191 | 
192 |     @app.subscribe("foo.bar", group="test_group")
193 |     async def consume_record(data: ConsumerRecord):
194 |         consumed_records.append(data)
195 | 
196 |     @app.subscribe("foo.bar", group="test_group_2")
197 |     async def consume_bytes(data: bytes):
198 |         consumed_bytes.append(data)
199 | 
200 |     async with app:
201 |         await app.publish("foo.bar", Foo(bar="1"))
202 |         await app.flush()
203 |         await app.consume_for(1, seconds=10)
204 | 
205 |     assert len(consumed_records) == 1
206 |     assert len(consumed_bytes) == 1
207 |     assert isinstance(consumed_records[0], ConsumerRecord)
208 |     assert isinstance(consumed_bytes[0], bytes)
209 | 
210 | 
211 | async def test_subscribe_to_topic_that_does_not_exist(app):
212 |     consumed_records = []
213 | 
214 |     @app.schema("Foo", version=1)
215 |     class Foo(pydantic.BaseModel):
216 |         bar: str
217 | 
218 |     @app.subscribe("foo.bar", group="test_group")
219 |     async def consume_record(data: Foo):
220 |         consumed_records.append(data)
221 | 
222 |     async with app:
223 |         for idx in range(10):
224 |             await app.publish("foo.bar", Foo(bar=str(idx)))
225 | 
226 |         await app.flush()
227 |         fut = asyncio.create_task(app.consume_for(10, seconds=10))
228 |         await fut
229 | 
230 |     assert len(consumed_records) == 10
231 | 
232 | 
233 | async def test_subscribe_to_topic_that_already_has_messages_for_group(app):
234 |     consumed_records = []
235 | 
236 |     @app.schema("Foo", version=1)
237 |     class Foo(pydantic.BaseModel):
238 |         bar: str
239 | 
240 |     @app.subscribe("foo.bar", group="test_group")
241 |     async def consume_record(data: Foo):
242 |         consumed_records.append(data)
243 | 
244 |     async with app:
245 |         for idx in range(10):
246 |             await app.publish("foo.bar", Foo(bar=str(idx)))
247 |         await app.flush()
248 | 
249 |         fut = asyncio.create_task(app.consume_for(20, seconds=10))
250 | 
251 |         for idx in range(10):
252 |             await app.publish("foo.bar", Foo(bar=str(idx)))
253 |         await app.flush()
254 | 
255 |         await fut
256 | 
257 |     assert len(consumed_records) == 20
258 | 
259 | 
260 | async def test_cache_topic_exists_topic_mng(kafka):
261 |     mng = KafkaTopicManager(
262 |         bootstrap_servers=[f"{kafka[0]}:{kafka[1]}"],
263 |         prefix=uuid.uuid4().hex,
264 |     )
265 | 
266 |     topic_id = mng.get_topic_id("foobar")
267 |     assert not await mng.topic_exists(topic_id)
268 |     assert topic_id not in mng._topic_cache
269 | 
270 |     await mng.create_topic(topic_id)
271 |     assert await mng.topic_exists(topic_id)
272 | 
273 | 
274 | async def test_subscription_failure(app):
275 |     probe = Mock()
276 |     stream_id = "foo-bar-subfailure"
277 |     group_id = "test_sub_group_failure"
278 |     topic_id = app.topic_mng.get_topic_id(stream_id)
279 | 
280 |     @app.schema(streams=[stream_id])
281 |     class Foo(pydantic.BaseModel):
282 |         bar: str
283 | 
284 |     @app.subscribe(stream_id, group=group_id)
285 |     async def noop_ng(data: Foo):
286 |         probe("error", data)
287 |         raise Exception("Unhandled Exception")
288 | 
289 |     async with app:
290 |         await app.publish(stream_id, Foo(bar="1"))
291 |         await app.publish(stream_id, Foo(bar="1"))
292 |         await app.flush()
293 | 
294 |         # it fails
295 |         with pytest.raises(Exception):
296 |             await app.consume_for(2, seconds=20)
297 | 
298 |         # verify we didn't commit
299 |         offsets = [
300 |             v
301 |             for k, v in (await app.topic_mng.list_consumer_group_offsets(group_id)).items()
302 |             if k.topic == topic_id
303 |         ]
304 |         assert offsets == []
305 | 
306 |     # remove wrong consumer
307 |     app._subscriptions = []
308 | 
309 |     @app.subscribe(stream_id, group=group_id)
310 |     async def noop_ok(data: Foo):
311 |         probe("ok", data)
312 | 
313 |     async with app:
314 |         await app.publish(stream_id, Foo(bar="2"))
315 |         await app.flush()
316 | 
317 |         await app.consume_for(3, seconds=10)
318 | 
319 |         await app._subscription_consumers[0]._maybe_commit(forced=True)
320 | 
321 |         # make sure we that now committed all messages
322 |         assert (
323 |             sum(
324 |                 [
325 |                     om.offset
326 |                     for tp, om in (
327 |                         await app.topic_mng.list_consumer_group_offsets(group_id)
328 |                     ).items()
329 |                     if tp.topic == topic_id
330 |                 ]
331 |             )
332 |             == 3
333 |         )
334 | 
335 |     probe.assert_has_calls(
336 |         [call("error", Foo(bar="1")), call("ok", Foo(bar="1")), call("ok", Foo(bar="2"))],
337 |         any_order=True,
338 |     )
339 | 
340 | 
341 | async def test_publish_unregistered_schema(app):
342 |     probe = Mock()
343 |     stream_id = "foo-bar-unregistered"
344 |     group_id = "test-sub-unregistered"
345 | 
346 |     class Foo(pydantic.BaseModel):
347 |         bar: str
348 | 
349 |     @app.subscribe(stream_id, group=group_id)
350 |     async def noop(data: Foo):
351 |         probe(data)
352 | 
353 |     async with app:
354 |         await app.publish(stream_id, Foo(bar="1"))
355 |         await app.publish(stream_id, Foo(bar="2"))
356 |         await app.flush()
357 | 
358 |         await app.consume_for(2, seconds=5)
359 | 
360 |     probe.assert_has_calls(
361 |         [call(Foo(bar="1")), call(Foo(bar="2"))],
362 |         any_order=True,
363 |     )
364 | 
365 |     # 1 failed + 3 ok
366 |     assert len(probe.mock_calls) == 2
367 | 
368 | 
369 | async def test_raw_publish_data(app):
370 |     probe = Mock()
371 |     stream_id = "foo-bar-raw"
372 |     group_id = "test-sub-raw"
373 | 
374 |     @app.subscribe(stream_id, group=group_id)
375 |     async def noop(record: aiokafka.structs.ConsumerRecord):
376 |         probe(record.value)
377 | 
378 |     async with app:
379 |         await app.raw_publish(stream_id, b"1")
380 |         await app.raw_publish(stream_id, b"2")
381 |         await app.flush()
382 | 
383 |         await app.consume_for(2, seconds=5)
384 | 
385 |     probe.assert_has_calls(
386 |         [call(b"1"), call(b"2")],
387 |         any_order=True,
388 |     )
389 | 
390 |     # 1 failed + 3 ok
391 |     assert len(probe.mock_calls) == 2
392 | 
393 | 
394 | async def test_publish_unhealthy(app):
395 | 
396 |     async with app:
397 |         app._producer = AsyncMock()
398 |         app._producer._sender = MagicMock()
399 |         app._producer._sender.sender_task.done.return_value = True
400 |         with pytest.raises(ProducerUnhealthyException):
401 |             await app.raw_publish("foobar", b"foobar")
402 | 
403 | 
404 | async def test_invalid_event_schema_is_sending_error_metric(app):
405 |     side_effect = None
406 | 
407 |     @app.schema("Foo", streams=["foo.bar"])
408 |     class Foo(pydantic.BaseModel):
409 |         bar: str
410 | 
411 |     class Baz(pydantic.BaseModel):
412 |         qux: str
413 | 
414 |     @app.subscribe("foo.bar", group="test_group")
415 |     async def consume(data: Foo):
416 |         side_effect = True
417 | 
418 |     with patch("kafkaesk.consumer.CONSUMED_MESSAGES") as consumed_messages_metric:
419 |         async with app:
420 |             await app.publish("foo.bar", Baz(qux="1"))
421 |             await app.flush()
422 |             await app.consume_for(1, seconds=5)
423 | 
424 |         consumed_messages_metric.labels.assert_called_once()
425 |         metric_kwargs = consumed_messages_metric.labels.call_args.kwargs
426 |         assert metric_kwargs["error"] == "UnhandledMessage"
427 |         consumed_messages_metric.labels(**metric_kwargs).inc.assert_called_once()
428 | 
429 |     assert side_effect is None
430 | 
431 | 
432 | async def test_malformed_event_schema_is_sending_error_metric(app):
433 |     side_effect = None
434 | 
435 |     @app.schema("Foo", streams=["foo.bar"])
436 |     class Foo(pydantic.BaseModel):
437 |         bar: str
438 | 
439 |     @app.subscribe("foo.bar", group="test_group")
440 |     async def consume(data: Foo):
441 |         side_effect = True
442 | 
443 |     with patch("kafkaesk.consumer.CONSUMED_MESSAGES") as consumed_messages_metric:
444 |         async with app:
445 |             await app.raw_publish("foo.bar", b"bad string")
446 |             await app.flush()
447 |             await app.consume_for(1, seconds=5)
448 | 
449 |         consumed_messages_metric.labels.assert_called_once()
450 |         metric_kwargs = consumed_messages_metric.labels.call_args.kwargs
451 |         assert metric_kwargs["error"] == "UnhandledMessage"
452 |         consumed_messages_metric.labels(**metric_kwargs).inc.assert_called_once()
453 | 
454 |     assert side_effect is None
455 | 


--------------------------------------------------------------------------------
/tests/acceptance/test_rebalance.py:
--------------------------------------------------------------------------------
  1 | from .produce import Foo
  2 | from .produce import producer
  3 | from kafkaesk.consumer import BatchConsumer, Subscription
  4 | 
  5 | import asyncio
  6 | import kafkaesk
  7 | import pytest
  8 | 
  9 | pytestmark = pytest.mark.asyncio
 10 | 
 11 | GROUP = TOPIC = "test-rebalance"
 12 | 
 13 | 
 14 | async def test_cancel_getone(app):
 15 | 
 16 |     app.schema(streams=[TOPIC])(Foo)
 17 | 
 18 |     async def handler(*args, **kwargs):
 19 |         pass
 20 | 
 21 |     async with app:
 22 |         subscription = Subscription(
 23 |             "test_consumer",
 24 |             handler,
 25 |             GROUP,
 26 |             topics=[TOPIC],
 27 |             timeout_seconds=1,
 28 |         )
 29 |         consumer = BatchConsumer(
 30 |             subscription=subscription,
 31 |             app=app,
 32 |         )
 33 |         await consumer.initialize()
 34 |         raw_consumer = consumer._consumer
 35 |         with raw_consumer._subscription.fetch_context():
 36 |             try:
 37 |                 await asyncio.wait_for(raw_consumer._fetcher.next_record([]), timeout=0.1)
 38 |             except asyncio.TimeoutError:
 39 |                 assert len(raw_consumer._fetcher._fetch_waiters) == 0
 40 |         await raw_consumer.stop()
 41 | 
 42 | 
 43 | async def test_many_consumers_rebalancing(kafka, topic_prefix):
 44 |     apps = []
 45 |     for idx in range(5):
 46 |         app = kafkaesk.Application(
 47 |             [f"{kafka[0]}:{kafka[1]}"],
 48 |             topic_prefix=topic_prefix,
 49 |         )
 50 |         app.schema(streams=[TOPIC])(Foo)
 51 |         app.id = idx
 52 | 
 53 |         @app.subscribe(TOPIC, group=GROUP)
 54 |         async def consumer(ob: Foo, record, app):
 55 |             ...
 56 | 
 57 |         await app.initialize()
 58 |         apps.append(app)
 59 | 
 60 |     produce = asyncio.create_task(producer(apps[0], TOPIC))
 61 | 
 62 |     consumer_tasks = []
 63 |     for app in apps:
 64 |         consumer_tasks.append(asyncio.create_task(app.consume_forever()))
 65 | 
 66 |     await asyncio.sleep(5)
 67 | 
 68 |     # cycle through each, destroying...
 69 |     for idx in range(5):
 70 |         await apps[idx].stop()
 71 |         await asyncio.sleep(1)
 72 |         assert consumer_tasks[idx].done()
 73 | 
 74 |         # start again
 75 |         consumer_tasks[idx] = asyncio.create_task(apps[idx].consume_forever())
 76 | 
 77 |     produce.cancel()
 78 | 
 79 |     for idx in range(5):
 80 |         await apps[idx].stop()
 81 | 
 82 | 
 83 | async def test_consume_every_message_once_during_rebalance(kafka, topic_prefix):
 84 |     """
 85 |     No matter what, even without reassignment, some messages
 86 |     seem to be relayed. You can see if when a single consumer and no rebalance
 87 |     sometimes.
 88 |     """
 89 |     consumed = {}
 90 | 
 91 |     def record_msg(record):
 92 |         key = f"{record.partition}-{record.offset}"
 93 |         if key not in consumed:
 94 |             consumed[key] = 0
 95 |         consumed[key] += 1
 96 | 
 97 |     apps = []
 98 |     for idx in range(5):
 99 |         app = kafkaesk.Application(
100 |             [f"{kafka[0]}:{kafka[1]}"],
101 |             topic_prefix=topic_prefix,
102 |             kafka_settings={"auto_commit_interval_ms": 10}
103 |         )
104 |         app.schema(streams=[TOPIC])(Foo)
105 |         app.id = idx
106 | 
107 |         @app.subscribe(TOPIC, group=GROUP)
108 |         async def consumer(ob: Foo, record, app):
109 |             record_msg(record)
110 | 
111 |         await app.initialize()
112 |         apps.append(app)
113 | 
114 |     consumer_tasks = []
115 |     for app in apps:
116 |         consumer_tasks.append(asyncio.create_task(app.consume_forever()))
117 | 
118 |     await asyncio.sleep(1)
119 |     produce = asyncio.create_task(producer(apps[0], TOPIC))
120 |     await asyncio.sleep(5)
121 | 
122 |     # cycle through each, destroying...
123 |     for idx in range(5):
124 |         await apps[idx].stop()
125 |         await asyncio.sleep(1)
126 |         assert consumer_tasks[idx].done()
127 |         # start again
128 |         consumer_tasks[idx] = asyncio.create_task(apps[idx].consume_forever())
129 | 
130 |     produce.cancel()
131 | 
132 |     for idx in range(5):
133 |         await apps[idx].stop()
134 | 
135 |     assert len(consumed) > 100
136 | 
137 |     # now check that we always consumed a message only once
138 |     for v in consumed.values():
139 |         assert v == 1
140 | 


--------------------------------------------------------------------------------
/tests/acceptance/test_run.py:
--------------------------------------------------------------------------------
 1 | from .produce import Foo
 2 | from .produce import producer
 3 | from kafkaesk import Application
 4 | 
 5 | import asyncio
 6 | import pytest
 7 | import signal
 8 | 
 9 | TOPIC = "test-run"
10 | GROUP = "test-run2"
11 | 
12 | pytestmark = pytest.mark.asyncio
13 | 
14 | test_app = Application()
15 | 
16 | test_app.schema(streams=[TOPIC])(Foo)
17 | 
18 | 
19 | @test_app.subscribe(TOPIC, group=GROUP)
20 | async def _consumer(ob: Foo, record, app):
21 |     ...
22 | 
23 | 
24 | async def test_run_exits_cleanly_while_consuming(kafka, topic_prefix):
25 |     kserver = f"{kafka[0]}:{kafka[1]}"
26 |     app = Application([kserver], topic_prefix=topic_prefix)
27 |     async with app:
28 |         pro = asyncio.create_task(producer(app, TOPIC))
29 | 
30 |         proc = await asyncio.create_subprocess_exec(
31 |             "kafkaesk",
32 |             "tests.acceptance.test_run:test_app",
33 |             "--kafka-servers",
34 |             kserver,
35 |             "--topic-prefix",
36 |             topic_prefix,
37 |             # cwd=_test_dir,
38 |         )
39 | 
40 |         await asyncio.sleep(5)
41 |         pro.cancel()
42 | 
43 |         proc.send_signal(signal.SIGINT)
44 |         await proc.wait()
45 | 
46 |         assert proc.returncode == 0
47 | 
48 |         results = await app.topic_mng.list_consumer_group_offsets(GROUP)
49 |         topic_id = app.topic_mng.get_topic_id(TOPIC)
50 |         count = 0
51 |         for tp, pos in results.items():
52 |             if tp.topic != topic_id:
53 |                 continue
54 |             count += pos.offset
55 |         assert count > 0
56 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
1 | pytest_plugins = ["tests.fixtures"]
2 | 


--------------------------------------------------------------------------------
/tests/fixtures.py:
--------------------------------------------------------------------------------
 1 | from unittest.mock import DEFAULT
 2 | from unittest.mock import patch
 3 | 
 4 | import kafkaesk
 5 | import os
 6 | import pytest_asyncio
 7 | import uuid
 8 | 
 9 | 
10 | @pytest_asyncio.fixture()
11 | async def kafka():
12 |     yield os.environ.get("KAFKA", "localhost:9092").split(":")
13 | 
14 | 
15 | @pytest_asyncio.fixture()
16 | def topic_prefix():
17 |     return uuid.uuid4().hex
18 | 
19 | 
20 | @pytest_asyncio.fixture()
21 | async def app(kafka, topic_prefix):
22 |     yield kafkaesk.Application(
23 |         [f"{kafka[0]}:{kafka[1]}"],
24 |         topic_prefix=topic_prefix,
25 |         kafka_settings={
26 |             "metadata_max_age_ms": 500,
27 |         },
28 |     )
29 | 
30 | 
31 | @pytest_asyncio.fixture()
32 | def metrics():
33 |     with patch.multiple(
34 |         "kafkaesk.app",
35 |         PUBLISHED_MESSAGES=DEFAULT,
36 |         PRODUCER_TOPIC_OFFSET=DEFAULT,
37 |         PUBLISHED_MESSAGES_TIME=DEFAULT,
38 |     ) as mock:
39 |         yield mock
40 | 


--------------------------------------------------------------------------------
/tests/unit/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/onna/kafkaesk/10e88fd921fddff70b8cb973e739e280caa4cac8/tests/unit/__init__.py


--------------------------------------------------------------------------------
/tests/unit/ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/onna/kafkaesk/10e88fd921fddff70b8cb973e739e280caa4cac8/tests/unit/ext/__init__.py


--------------------------------------------------------------------------------
/tests/unit/ext/logging/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/onna/kafkaesk/10e88fd921fddff70b8cb973e739e280caa4cac8/tests/unit/ext/logging/__init__.py


--------------------------------------------------------------------------------
/tests/unit/ext/logging/test_handler.py:
--------------------------------------------------------------------------------
 1 | from kafkaesk.app import Application
 2 | from kafkaesk.ext.logging import handler
 3 | from unittest.mock import MagicMock
 4 | from unittest.mock import patch
 5 | 
 6 | import pytest
 7 | 
 8 | pytestmark = pytest.mark.asyncio
 9 | 
10 | 
11 | def test_close_log_handler():
12 |     mock = MagicMock()
13 |     h = handler.PydanticKafkaeskHandler(MagicMock(), "stream", mock)
14 |     h.close()
15 |     mock.close.assert_called_once()
16 | 
17 | 
18 | def test_format_log_exc():
19 |     h = handler.PydanticKafkaeskHandler(MagicMock(), "stream", MagicMock())
20 |     record = MagicMock()
21 |     record.exc_text = None
22 |     record.exc_info = (Exception(), None, None)
23 | 
24 |     data = h._format_base_log(record)
25 |     assert data["stack"]
26 | 
27 | 
28 | def test_swallows_schema_conflict():
29 |     app = Application()
30 |     handler.PydanticKafkaeskHandler(app, "stream", MagicMock())
31 |     handler.PydanticKafkaeskHandler(app, "stream", MagicMock())
32 | 
33 | 
34 | def test_get_k8s_ns():
35 | 
36 |     with patch("kafkaesk.ext.logging.handler._K8S_NS", handler._not_set), patch(
37 |         "kafkaesk.ext.logging.handler.os.path.exists", return_value=True
38 |     ), patch("kafkaesk.ext.logging.handler.open") as open_file:
39 |         fi = MagicMock()
40 |         cm = MagicMock()
41 |         cm.__enter__.return_value = fi
42 |         open_file.return_value = cm
43 |         fi.read.return_value = "foobar\n"
44 |         assert handler.get_k8s_ns() == "foobar"
45 | 
46 | 
47 | class TestQueue:
48 |     def test_not_running(self):
49 |         qq = handler.KafkaeskQueue(MagicMock())
50 |         assert not qq.running
51 | 
52 |     def test_not_running_task_done(self):
53 |         qq = handler.KafkaeskQueue(MagicMock())
54 |         qq._task = MagicMock()
55 |         qq._task.done.return_value = True
56 |         assert not qq.running
57 | 
58 |     def test_running(self):
59 |         qq = handler.KafkaeskQueue(MagicMock())
60 |         qq._task = MagicMock()
61 |         qq._task.done.return_value = False
62 |         assert qq.running
63 | 
64 |     async def test_runtime_error_not_running(self):
65 |         qq = handler.KafkaeskQueue(MagicMock())
66 |         with pytest.raises(RuntimeError):
67 |             assert await qq._run()
68 | 


--------------------------------------------------------------------------------
/tests/unit/test_app.py:
--------------------------------------------------------------------------------
  1 | from asyncio.futures import Future
  2 | from kafkaesk.app import Application
  3 | from kafkaesk.app import published_callback
  4 | from kafkaesk.app import run
  5 | from kafkaesk.app import run_app
  6 | from kafkaesk.app import SchemaRegistration
  7 | from jaeger_client import Config, Tracer
  8 | from opentracing.scope_managers.contextvars import ContextVarsScopeManager
  9 | from tests.utils import record_factory
 10 | from unittest.mock import ANY
 11 | from unittest.mock import AsyncMock
 12 | from unittest.mock import MagicMock
 13 | from unittest.mock import Mock
 14 | from unittest.mock import patch
 15 | 
 16 | import asyncio
 17 | import json
 18 | import kafkaesk
 19 | import kafkaesk.exceptions
 20 | import opentracing
 21 | import pydantic
 22 | import pytest
 23 | import time
 24 | 
 25 | pytestmark = pytest.mark.asyncio
 26 | 
 27 | 
 28 | class TestApplication:
 29 |     async def test_app_events(self):
 30 |         app = Application()
 31 | 
 32 |         async def on_finalize():
 33 |             pass
 34 | 
 35 |         app.on("finalize", on_finalize)
 36 |         assert len(app._event_handlers["finalize"]) == 1
 37 | 
 38 |     async def test_app_finalize_event(self):
 39 |         app = Application()
 40 | 
 41 |         class CallTracker:
 42 |             def __init__(self):
 43 |                 self.called = False
 44 | 
 45 |             async def on_finalize(self):
 46 |                 self.called = True
 47 | 
 48 |         tracker = CallTracker()
 49 |         app.on("finalize", tracker.on_finalize)
 50 |         await app.finalize()
 51 | 
 52 |         assert tracker.called is True
 53 | 
 54 |     def test_publish_callback(self, metrics):
 55 |         fut = Future()
 56 |         fut.set_result(record_factory())
 57 |         published_callback("topic", time.time() - 1, fut)
 58 | 
 59 |         metrics["PUBLISHED_MESSAGES"].labels.assert_called_with(
 60 |             stream_id="topic", partition=0, error="none"
 61 |         )
 62 |         metrics["PUBLISHED_MESSAGES"].labels().inc()
 63 | 
 64 |         metrics["PRODUCER_TOPIC_OFFSET"].labels.assert_called_with(stream_id="topic", partition=0)
 65 |         metrics["PRODUCER_TOPIC_OFFSET"].labels().set.assert_called_with(0)
 66 | 
 67 |         metrics["PUBLISHED_MESSAGES_TIME"].labels.assert_called_with(stream_id="topic")
 68 |         assert metrics["PUBLISHED_MESSAGES_TIME"].labels().observe.mock_calls[0].args[
 69 |             0
 70 |         ] == pytest.approx(1, 0.1)
 71 | 
 72 |     def test_publish_callback_exc(self, metrics):
 73 |         fut = Future()
 74 |         fut.set_exception(Exception())
 75 |         published_callback("topic", time.time(), fut)
 76 | 
 77 |         metrics["PUBLISHED_MESSAGES"].labels.assert_called_with(
 78 |             stream_id="topic", partition=-1, error="Exception"
 79 |         )
 80 |         metrics["PUBLISHED_MESSAGES"].labels().inc()
 81 | 
 82 |     def test_mount_router(self):
 83 |         app = Application()
 84 | 
 85 |         router = kafkaesk.Router()
 86 | 
 87 |         @router.schema("Foo", streams=["foo.bar"])
 88 |         class Foo(pydantic.BaseModel):
 89 |             bar: str
 90 | 
 91 |         @router.subscribe("foo.bar", group="test_group")
 92 |         async def consume(data: Foo, schema, record):
 93 |             ...
 94 | 
 95 |         app.mount(router)
 96 | 
 97 |         assert app.subscriptions == router.subscriptions
 98 |         assert app.schemas == router.schemas
 99 |         assert app.event_handlers == router.event_handlers
100 | 
101 |     async def test_consumer_health_check(self):
102 |         app = kafkaesk.Application()
103 |         subscription_consumer = AsyncMock()
104 |         app._subscription_consumers.append(subscription_consumer)
105 |         subscription_consumer.consumer._client.ready.return_value = True
106 |         await app.health_check()
107 | 
108 |     async def test_consumer_health_check_raises_exception(self):
109 |         app = kafkaesk.Application()
110 |         subscription = kafkaesk.Subscription(
111 |             "test_consumer", lambda record: 1, "group", topics=["foo"]
112 |         )
113 | 
114 |         subscription_consumer = kafkaesk.BatchConsumer(
115 |             subscription=subscription,
116 |             app=app,
117 |         )
118 |         app._subscription_consumers.append(subscription_consumer)
119 |         subscription_consumer._consumer = AsyncMock()
120 |         subscription_consumer._consumer._client.ready.return_value = False
121 |         with pytest.raises(kafkaesk.exceptions.ConsumerUnhealthyException):
122 |             await app.health_check()
123 | 
124 |     async def test_consumer_health_check_producer_healthy(self):
125 |         app = kafkaesk.Application()
126 |         app._producer = MagicMock()
127 |         app._producer._sender.sender_task.done.return_value = False
128 |         await app.health_check()
129 | 
130 |     async def test_consumer_health_check_producer_unhealthy(self):
131 |         app = kafkaesk.Application()
132 |         app._producer = MagicMock()
133 |         app._producer._sender.sender_task.done.return_value = True
134 |         with pytest.raises(kafkaesk.exceptions.ProducerUnhealthyException):
135 |             await app.health_check()
136 | 
137 |     async def test_configure_kafka_producer(self):
138 |         app = kafkaesk.Application(
139 |             kafka_settings={
140 |                 "metadata_max_age_ms": 100,
141 |                 "max_batch_size": 100,
142 |                 # invalid for producer so should not be applied here
143 |                 "max_partition_fetch_bytes": 100,
144 |             }
145 |         )
146 |         # verify it is created correctly
147 |         app.producer_factory()
148 | 
149 |         # now, validate the wiring
150 |         with patch("kafkaesk.app.aiokafka.AIOKafkaProducer") as mock:
151 |             app.producer_factory()
152 |             mock.assert_called_with(
153 |                 bootstrap_servers=None,
154 |                 loop=ANY,
155 |                 api_version="auto",
156 |                 metadata_max_age_ms=100,
157 |                 max_batch_size=100,
158 |             )
159 | 
160 |     async def test_configure_kafka_consumer(self):
161 |         app = kafkaesk.Application(
162 |             kafka_settings={
163 |                 "max_partition_fetch_bytes": 100,
164 |                 "fetch_max_wait_ms": 100,
165 |                 "metadata_max_age_ms": 100,
166 |                 # invalid for consumer so should not be applied here
167 |                 "max_batch_size": 100,
168 |             }
169 |         )
170 |         # verify it is created correctly
171 |         app.consumer_factory(group_id="foobar")
172 | 
173 |         # now, validate the wiring
174 |         with patch("kafkaesk.app.aiokafka.AIOKafkaConsumer") as mock:
175 |             app.consumer_factory(group_id="foobar")
176 |             mock.assert_called_with(
177 |                 bootstrap_servers=None,
178 |                 loop=ANY,
179 |                 group_id="foobar",
180 |                 api_version="auto",
181 |                 auto_offset_reset="earliest",
182 |                 enable_auto_commit=False,
183 |                 max_partition_fetch_bytes=100,
184 |                 fetch_max_wait_ms=100,
185 |                 metadata_max_age_ms=100,
186 |             )
187 | 
188 |     def test_configure(self):
189 |         app = kafkaesk.Application()
190 |         app.configure(
191 |             kafka_servers=["kafka_servers"],
192 |             topic_prefix="topic_prefix",
193 |             kafka_settings={"kafka_settings": "kafka_settings"},
194 |             api_version="api_version",
195 |             replication_factor="replication_factor",
196 |         )
197 |         assert app._kafka_servers == ["kafka_servers"]
198 |         assert app._topic_prefix == "topic_prefix"
199 |         assert app._kafka_settings == {"kafka_settings": "kafka_settings"}
200 |         assert app._kafka_api_version == "api_version"
201 |         assert app._replication_factor == "replication_factor"
202 | 
203 |         # now make sure none values do not overwrite
204 |         app.configure(
205 |             kafka_servers=None,
206 |             topic_prefix=None,
207 |             kafka_settings=None,
208 |             api_version=None,
209 |             replication_factor=None,
210 |         )
211 |         assert app._kafka_servers == ["kafka_servers"]
212 |         assert app._topic_prefix == "topic_prefix"
213 |         assert app._kafka_settings == {"kafka_settings": "kafka_settings"}
214 |         assert app._kafka_api_version == "api_version"
215 |         assert app._replication_factor == "replication_factor"
216 | 
217 |     async def test_initialize_with_unconfigured_app_raises_exception(self):
218 |         app = kafkaesk.Application()
219 |         with pytest.raises(kafkaesk.exceptions.AppNotConfiguredException):
220 |             await app.initialize()
221 | 
222 |     async def test_publish_propagates_headers(self):
223 |         app = kafkaesk.Application(kafka_servers=["foo"])
224 | 
225 |         class Foo(pydantic.BaseModel):
226 |             bar: str
227 | 
228 |         producer = AsyncMock()
229 |         producer.send.return_value = fut = asyncio.Future()
230 |         fut.set_result("ok")
231 |         app._get_producer = AsyncMock(return_value=producer)
232 |         app._topic_mng = MagicMock()
233 |         app._topic_mng.get_topic_id.return_value = "foobar"
234 |         app._topic_mng.topic_exists = AsyncMock(return_value=True)
235 | 
236 |         future = await app.publish("foobar", Foo(bar="foo"), headers=[("foo", b"bar")])
237 |         _ = await future
238 | 
239 |         producer.send.assert_called_with(
240 |             "foobar",
241 |             value=b'{"schema":"Foo:1","data":{"bar":"foo"}}',
242 |             key=None,
243 |             headers=[("foo", b"bar")],
244 |         )
245 | 
246 |     async def test_publish_configured_retention_policy(self):
247 |         app = kafkaesk.Application(kafka_servers=["foo"])
248 | 
249 |         @app.schema(retention=100)
250 |         class Foo(pydantic.BaseModel):
251 |             bar: str
252 | 
253 |         producer = AsyncMock()
254 |         producer.send.return_value = fut = asyncio.Future()
255 |         fut.set_result("ok")
256 |         app._get_producer = AsyncMock(return_value=producer)
257 |         app._topic_mng = MagicMock()
258 |         app._topic_mng.get_topic_id.return_value = "foobar"
259 |         app._topic_mng.topic_exists = AsyncMock(return_value=False)
260 |         app._topic_mng.create_topic = AsyncMock()
261 | 
262 |         future = await app.publish("foobar", Foo(bar="foo"), headers=[("foo", b"bar")])
263 |         await future
264 |         app._topic_mng.create_topic.assert_called_with(
265 |             "foobar", replication_factor=None, retention_ms=100 * 1000
266 |         )
267 | 
268 |     async def test_publish_injects_tracing(self):
269 |         app = kafkaesk.Application(kafka_servers=["foo"])
270 |         producer = AsyncMock()
271 |         producer.send.return_value = fut = asyncio.Future()
272 |         fut.set_result("ok")
273 |         app._get_producer = AsyncMock(return_value=producer)
274 |         config = Config(
275 |             config={"sampler": {"type": "const", "param": 1}, "logging": True, "propagation": "b3"},
276 |             service_name="test_service",
277 |             scope_manager=ContextVarsScopeManager(),
278 |         )
279 |         # this call also sets opentracing.tracer
280 |         tracer = config.initialize_tracer()
281 | 
282 |         span = tracer.start_span(operation_name="dummy")
283 |         tracer.scope_manager.activate(span, True)
284 | 
285 |         future = await app.raw_publish("foobar", b"foobar")
286 |         await future
287 | 
288 |         headers = producer.mock_calls[0].kwargs["headers"]
289 |         assert str(span).startswith(headers[0][1].decode())
290 | 
291 | 
292 | class TestSchemaRegistration:
293 |     def test_schema_registration_repr(self):
294 |         reg = SchemaRegistration(id="id", version=1, model=None)
295 |         assert repr(reg) == "<SchemaRegistration id, version: 1 >"
296 | 
297 | 
298 | test_app = Application()
299 | 
300 | 
301 | def app_callable():
302 |     return test_app
303 | 
304 | 
305 | class TestRun:
306 |     def test_run(self):
307 |         rapp = AsyncMock()
308 |         with patch("kafkaesk.app.run_app", rapp), patch("kafkaesk.app.cli_parser") as cli_parser:
309 |             args = Mock()
310 |             args.app = "tests.unit.test_app:test_app"
311 |             args.kafka_servers = "foo,bar"
312 |             args.kafka_settings = json.dumps({"foo": "bar"})
313 |             args.topic_prefix = "prefix"
314 |             args.api_version = "api_version"
315 |             cli_parser.parse_args.return_value = args
316 | 
317 |             run()
318 | 
319 |             rapp.assert_called_once()
320 |             assert test_app._kafka_servers == ["foo", "bar"]
321 |             assert test_app._kafka_settings == {"foo": "bar"}
322 |             assert test_app._topic_prefix == "prefix"
323 |             assert test_app._kafka_api_version == "api_version"
324 | 
325 |     def test_run_callable(self):
326 |         rapp = AsyncMock()
327 |         with patch("kafkaesk.app.run_app", rapp), patch("kafkaesk.app.cli_parser") as cli_parser:
328 |             args = Mock()
329 |             args.app = "tests.unit.test_app:app_callable"
330 |             args.kafka_settings = None
331 |             cli_parser.parse_args.return_value = args
332 | 
333 |             run()
334 | 
335 |             rapp.assert_called_once()
336 | 
337 |     async def test_run_app(self):
338 |         app_mock = AsyncMock()
339 |         app_mock.consume_forever.return_value = (set(), set())
340 |         loop = MagicMock()
341 |         with patch("kafkaesk.app.asyncio.get_event_loop", return_value=loop):
342 |             await run_app(app_mock)
343 |         app_mock.consume_forever.assert_called_once()
344 |         assert len(loop.add_signal_handler.mock_calls) == 2
345 | 


--------------------------------------------------------------------------------
/tests/unit/test_consumer.py:
--------------------------------------------------------------------------------
  1 | from kafkaesk import Application
  2 | from kafkaesk import Subscription
  3 | from kafkaesk.consumer import build_handler
  4 | from kafkaesk.consumer import BatchConsumer, Subscription
  5 | from kafkaesk.exceptions import ConsumerUnhealthyException
  6 | from kafkaesk.exceptions import StopConsumer
  7 | from kafkaesk.exceptions import UnhandledMessage
  8 | from tests.utils import record_factory
  9 | from unittest.mock import AsyncMock
 10 | from unittest.mock import MagicMock
 11 | from unittest.mock import Mock
 12 | from unittest.mock import patch
 13 | 
 14 | import aiokafka.errors
 15 | import asyncio
 16 | import opentracing
 17 | import pydantic
 18 | import pytest
 19 | import pytest_asyncio
 20 | import time
 21 | import json
 22 | 
 23 | pytestmark = pytest.mark.asyncio
 24 | 
 25 | 
 26 | @pytest_asyncio.fixture()
 27 | def subscription_conf():
 28 |     subscription = Subscription(
 29 |         "foo",
 30 |         lambda record: 1,
 31 |         "group",
 32 |         topics=["foo"],
 33 |         timeout_seconds=1,
 34 |     )
 35 |     yield subscription
 36 | 
 37 | 
 38 | @pytest_asyncio.fixture()
 39 | def subscription(subscription_conf):
 40 |     yield BatchConsumer(
 41 |         subscription=subscription_conf,
 42 |         app=Application(kafka_servers=["foobar"]),
 43 |     )
 44 | 
 45 | 
 46 | def test_subscription_repr():
 47 |     sub = Subscription("stream_id", lambda x: None, "group")
 48 |     assert repr(sub) == "<Subscription stream: stream_id >"
 49 | 
 50 | 
 51 | class TestMessageHandler:
 52 |     def factory(self, func):
 53 |         return build_handler(func, app=MagicMock(), consumer=None)
 54 | 
 55 |     async def test_message_handler(self):
 56 |         side_effect = None
 57 | 
 58 |         async def raw_func(data):
 59 |             nonlocal side_effect
 60 |             assert isinstance(data, dict)
 61 |             side_effect = True
 62 | 
 63 |         handler = self.factory(raw_func)
 64 |         await handler(record_factory(), None)
 65 |         assert side_effect is True
 66 | 
 67 |     async def test_message_handler_map_types(self):
 68 |         class Foo(pydantic.BaseModel):
 69 |             foo: str
 70 | 
 71 |         async def handle_func(ob: Foo, schema, record, app, span: opentracing.Span):
 72 |             assert ob.foo == "bar"
 73 |             assert schema == "Foo:1"
 74 |             assert record is not None
 75 |             assert app is not None
 76 |             assert span is not None
 77 | 
 78 |         handler = self.factory(handle_func)
 79 |         await handler(record_factory(), MagicMock())
 80 | 
 81 |     async def test_malformed_message(self):
 82 |         class Foo(pydantic.BaseModel):
 83 |             foo: str
 84 | 
 85 |         side_effect = None
 86 | 
 87 |         async def func(ob: Foo):
 88 |             nonlocal side_effect
 89 |             side_effect = True
 90 | 
 91 |         record = aiokafka.structs.ConsumerRecord(
 92 |             topic="topic",
 93 |             partition=0,
 94 |             offset=0,
 95 |             timestamp=time.time() * 1000,
 96 |             timestamp_type=1,
 97 |             key="key",
 98 |             value=json.dumps({"schema": "Foo:1", "data": "bad format"}).encode(),
 99 |             checksum="1",
100 |             serialized_key_size=10,
101 |             serialized_value_size=10,
102 |             headers=[],
103 |         )
104 | 
105 |         handler = self.factory(func)
106 |         with pytest.raises(UnhandledMessage):
107 |             await handler(record, None)
108 | 
109 |         assert side_effect is None
110 | 
111 | 
112 | class TestSubscriptionConsumer:
113 |     async def test_healthy(self, subscription):
114 |         subscription._consumer = MagicMock()
115 |         subscription._running = True
116 |         subscription._consumer._coordinator.coordinator_id = "coordinator_id"
117 |         subscription._consumer._client.ready = AsyncMock(return_value=True)
118 |         assert await subscription.healthy() is None
119 |         subscription._consumer._client.ready.assert_called_with("coordinator_id")
120 | 
121 |     async def test_unhealthy(self, subscription):
122 |         subscription._consumer = MagicMock()
123 |         subscription._running = True
124 |         subscription._consumer._client.ready = AsyncMock(return_value=False)
125 |         with pytest.raises(ConsumerUnhealthyException):
126 |             assert await subscription.healthy()
127 | 
128 |         subscription._consumer = MagicMock()
129 |         subscription._running = False
130 |         with pytest.raises(ConsumerUnhealthyException):
131 |             assert await subscription.healthy()
132 | 
133 |     async def test_emit(self, subscription_conf):
134 |         probe = AsyncMock()
135 | 
136 |         sub = BatchConsumer(
137 |             subscription=subscription_conf,
138 |             app=Application(kafka_servers=["foobar"]),
139 |             event_handlers={"event": [probe]},
140 |         )
141 |         await sub.emit("event", "foo", "bar")
142 |         probe.assert_called_with("foo", "bar")
143 | 
144 |     async def test_emit_raises_stop(self, subscription_conf):
145 |         sub = BatchConsumer(
146 |             subscription=subscription_conf,
147 |             app=Application(kafka_servers=["foobar"]),
148 |             event_handlers={"event": [AsyncMock(side_effect=StopConsumer)]},
149 |         )
150 | 
151 |         with pytest.raises(StopConsumer):
152 |             await sub.emit("event", "foo", "bar")
153 | 
154 |     async def test_emit_swallow_ex(self, subscription_conf):
155 |         sub = BatchConsumer(
156 |             subscription=subscription_conf,
157 |             app=Application(kafka_servers=["foobar"]),
158 |             event_handlers={"event": [AsyncMock(side_effect=Exception)]},
159 |         )
160 | 
161 |         await sub.emit("event", "foo", "bar")
162 | 
163 |     async def test_retries_on_connection_failure(self, subscription):
164 |         run_mock = AsyncMock()
165 |         sleep = AsyncMock()
166 |         run_mock.side_effect = [aiokafka.errors.KafkaConnectionError, StopConsumer]
167 |         subscription._consumer = MagicMock()
168 |         with patch.object(subscription, "initialize", AsyncMock()), patch.object(
169 |             subscription, "finalize", AsyncMock()
170 |         ), patch.object(subscription, "_consume", run_mock), patch(
171 |             "kafkaesk.consumer.asyncio.sleep", sleep
172 |         ):
173 |             await subscription()
174 |             sleep.assert_called_once()
175 |             assert len(run_mock.mock_calls) == 2
176 | 
177 |     async def test_finalize_handles_exceptions(self, subscription):
178 |         consumer = AsyncMock()
179 |         consumer.stop.side_effect = Exception
180 |         consumer.commit.side_effect = Exception
181 | 
182 |         subscription._consumer = consumer
183 |         await subscription.finalize()
184 | 
185 |         consumer.stop.assert_called_once()
186 | 
187 |     async def test_run_exits_when_fut_closed_fut(self, subscription):
188 |         sub = subscription
189 |         consumer = AsyncMock()
190 |         consumer.getmany.return_value = {"": [record_factory() for _ in range(10)]}
191 |         sub._consumer = consumer
192 |         sub._running = True
193 | 
194 |         async def _handle_message(record):
195 |             await asyncio.sleep(0.03)
196 | 
197 |         with patch.object(sub, "_handler", _handle_message):
198 |             task = asyncio.create_task(sub._consume())
199 |             await asyncio.sleep(0.01)
200 |             stop_task = asyncio.create_task(sub.stop())
201 |             await asyncio.sleep(0.01)
202 |             sub._close.set_result(None)
203 | 
204 |             await asyncio.wait([stop_task, task])
205 | 
206 |     async def test_auto_commit_can_be_disabled(self, subscription_conf):
207 |         sub = BatchConsumer(
208 |             subscription=subscription_conf,
209 |             app=Application(kafka_servers=["foobar"]),
210 |             auto_commit=False,
211 |         )
212 |         await sub._maybe_commit()
213 |         assert sub._last_commit == 0
214 | 


--------------------------------------------------------------------------------
/tests/unit/test_exceptions.py:
--------------------------------------------------------------------------------
 1 | from kafkaesk.app import SchemaRegistration
 2 | from kafkaesk.exceptions import SchemaConflictException
 3 | 
 4 | 
 5 | def test_repr_conflict():
 6 |     ex = SchemaConflictException(
 7 |         SchemaRegistration("id", 1, None), SchemaRegistration("id", 1, None)
 8 |     )
 9 |     assert "Schema Conflict" in str(ex)
10 | 


--------------------------------------------------------------------------------
/tests/unit/test_kafka.py:
--------------------------------------------------------------------------------
 1 | from kafkaesk.kafka import KafkaTopicManager
 2 | from unittest.mock import patch
 3 | 
 4 | import kafka.errors
 5 | import pytest
 6 | 
 7 | pytestmark = pytest.mark.asyncio
 8 | 
 9 | 
10 | async def test_create_topic_uses_replication_factor_from_servers():
11 |     mng = KafkaTopicManager(["foo", "bar"])
12 |     with patch("kafka.admin.client.KafkaAdminClient"):
13 |         await mng.create_topic("Foobar")
14 |         client = await mng.get_admin_client()
15 |         assert client.create_topics.called
16 |         assert client.create_topics.call_args[0][0][0].replication_factor == 2
17 | 
18 | 
19 | async def test_create_topic_uses_replication_factor_from_servers_min_3():
20 |     mng = KafkaTopicManager(["foo", "bar", "foo2", "foo3", "foo4"])
21 |     with patch("kafka.admin.client.KafkaAdminClient"):
22 |         await mng.create_topic("Foobar")
23 |         client = await mng.get_admin_client()
24 |         assert client.create_topics.called
25 |         assert client.create_topics.call_args[0][0][0].replication_factor == 3
26 | 
27 | 
28 | async def test_create_topic_uses_replication_factor():
29 |     mng = KafkaTopicManager(["foo", "bar"], replication_factor=1)
30 |     with patch("kafka.admin.client.KafkaAdminClient"):
31 |         await mng.create_topic("Foobar", retention_ms=100)
32 |         client = await mng.get_admin_client()
33 |         assert client.create_topics.called
34 |         assert client.create_topics.call_args[0][0][0].replication_factor == 1
35 |         assert client.create_topics.call_args[0][0][0].topic_configs["retention.ms"] == 100
36 | 
37 | 
38 | async def test_create_topic_already_exists():
39 |     mng = KafkaTopicManager(["foo", "bar"], replication_factor=1)
40 |     with patch("kafka.admin.client.KafkaAdminClient"):
41 |         client = await mng.get_admin_client()
42 |         client.create_topics.side_effect = kafka.errors.TopicAlreadyExistsError
43 |         await mng.create_topic("Foobar")
44 |         client.create_topics.assert_called_once()
45 | 
46 | 
47 | def test_constructor_translates_api_version():
48 |     mng = KafkaTopicManager(["foobar"], kafka_api_version="auto")
49 |     assert mng.kafka_api_version is None
50 | 
51 |     mng = KafkaTopicManager(["foobar"], kafka_api_version="2.4.0")
52 |     assert mng.kafka_api_version == (2, 4, 0)
53 | 


--------------------------------------------------------------------------------
/tests/unit/test_metrics.py:
--------------------------------------------------------------------------------
  1 | from aiokafka.structs import OffsetAndMetadata
  2 | from aiokafka.structs import TopicPartition
  3 | from kafkaesk.app import Application
  4 | from kafkaesk.consumer import BatchConsumer, Subscription
  5 | from tests.utils import record_factory
  6 | from unittest.mock import AsyncMock
  7 | from unittest.mock import MagicMock
  8 | from unittest.mock import patch
  9 | 
 10 | import asyncio
 11 | import pytest
 12 | 
 13 | pytestmark = pytest.mark.asyncio
 14 | 
 15 | 
 16 | async def test_record_metric_on_rebalance():
 17 |     async def coro(*arg, **kwargs):
 18 |         pass
 19 | 
 20 |     with patch("kafkaesk.consumer.CONSUMER_REBALANCED") as rebalance_metric:
 21 |         app_mock = AsyncMock()
 22 |         app_mock.topic_mng.list_consumer_group_offsets.return_value = {
 23 |             TopicPartition(topic="foobar", partition=0): OffsetAndMetadata(offset=0, metadata={})
 24 |         }
 25 | 
 26 |         subscription = Subscription(
 27 |             "test_consumer",
 28 |             coro,
 29 |             "group",
 30 |             topics=["stream.foo"],
 31 |         )
 32 | 
 33 |         rebalance_listener = BatchConsumer(
 34 |             subscription=subscription,
 35 |             app=app_mock,
 36 |         )
 37 |         rebalance_listener._consumer = AsyncMock()
 38 | 
 39 |         await rebalance_listener.on_partitions_assigned(
 40 |             [TopicPartition(topic="foobar", partition=0)]
 41 |         )
 42 |         rebalance_metric.labels.assert_called_with(
 43 |             partition=0,
 44 |             group_id="group",
 45 |             event="assigned",
 46 |         )
 47 |         rebalance_metric.labels().inc.assert_called_once()
 48 | 
 49 | 
 50 | async def test_record_metric_on_publish():
 51 |     """
 52 |     this test is acting funny on github action...
 53 |     """
 54 |     with patch("kafkaesk.app.PUBLISHED_MESSAGES") as published_metric, patch(
 55 |         "kafkaesk.app.PUBLISHED_MESSAGES_TIME"
 56 |     ) as published_metric_time, patch("kafkaesk.metrics.PUBLISH_MESSAGES") as publish_metric, patch(
 57 |         "kafkaesk.metrics.PUBLISH_MESSAGES_TIME"
 58 |     ) as publish_metric_time:
 59 |         app = Application()
 60 | 
 61 |         async def _fake_publish(*args, **kwargs):
 62 |             async def _publish():
 63 |                 return record_factory()
 64 | 
 65 |             return asyncio.create_task(_publish())
 66 | 
 67 |         producer = AsyncMock()
 68 |         producer.send.side_effect = _fake_publish
 69 |         app._get_producer = AsyncMock(return_value=producer)
 70 |         app._topic_mng = MagicMock()
 71 |         app._topic_mng.get_topic_id.return_value = "foobar"
 72 | 
 73 |         await (await app.raw_publish("foo", b"data"))
 74 | 
 75 |         published_metric.labels.assert_called_with(stream_id="foobar", partition=0, error="none")
 76 |         published_metric.labels(
 77 |             stream_id="foobar", partition=0, error="none"
 78 |         ).inc.assert_called_once()
 79 |         published_metric_time.labels.assert_called_with(stream_id="foobar")
 80 |         published_metric_time.labels(stream_id="foobar").observe.assert_called_once()
 81 | 
 82 |         publish_metric.labels.assert_called_with(stream_id="foobar", error="none")
 83 |         publish_metric.labels(stream_id="foobar", error="none").inc.assert_called_once()
 84 |         publish_metric_time.labels.assert_called_with(stream_id="foobar")
 85 |         publish_metric_time.labels(stream_id="foobar").observe.assert_called_once()
 86 | 
 87 | 
 88 | async def test_record_metric_error():
 89 |     """
 90 |     this test is acting funny on github action...
 91 |     """
 92 |     with patch("kafkaesk.metrics.PUBLISH_MESSAGES") as publish_metric, patch(
 93 |         "kafkaesk.metrics.PUBLISH_MESSAGES_TIME"
 94 |     ) as publish_metric_time:
 95 |         app = Application()
 96 | 
 97 |         producer = AsyncMock()
 98 |         producer.send.side_effect = Exception
 99 |         app._get_producer = AsyncMock(return_value=producer)
100 |         app._topic_mng = MagicMock()
101 |         app._topic_mng.get_topic_id.return_value = "foobar"
102 | 
103 |         with pytest.raises(Exception):
104 |             await app.raw_publish("foo", b"data")
105 | 
106 |         publish_metric.labels.assert_called_with(stream_id="foobar", error="exception")
107 |         publish_metric.labels(stream_id="foobar", error="none").inc.assert_called_once()
108 |         publish_metric_time.labels.assert_called_with(stream_id="foobar")
109 |         publish_metric_time.labels(stream_id="foobar").observe.assert_called_once()
110 | 


--------------------------------------------------------------------------------
/tests/unit/test_schema.py:
--------------------------------------------------------------------------------
 1 | from kafkaesk import Application
 2 | from kafkaesk.exceptions import SchemaConflictException
 3 | 
 4 | import pydantic
 5 | import pytest
 6 | 
 7 | pytestmark = pytest.mark.asyncio
 8 | 
 9 | 
10 | async def test_not_allowed_to_register_same_schema_twice():
11 |     app = Application()
12 | 
13 |     @app.schema("Foo", version=1)
14 |     class Foo1(pydantic.BaseModel):
15 |         bar: str
16 | 
17 |     with pytest.raises(SchemaConflictException):
18 | 
19 |         @app.schema("Foo", version=1)
20 |         class Foo2(pydantic.BaseModel):
21 |             foo: str
22 | 
23 | 
24 | async def test_do_not_require_schema_name():
25 |     app = Application()
26 | 
27 |     @app.schema()
28 |     class Foo(pydantic.BaseModel):
29 |         bar: str
30 | 
31 |     assert "Foo:1" in app._schemas
32 | 
33 | 
34 | async def test_get_registered_schema():
35 |     app = Application()
36 | 
37 |     @app.schema()
38 |     class Foo(pydantic.BaseModel):
39 |         bar: str
40 | 
41 |     assert app.get_schema_reg(Foo) is not None
42 | 
43 | 
44 | async def test_get_registered_schema_missing():
45 |     app = Application()
46 | 
47 |     class Foo(pydantic.BaseModel):
48 |         bar: str
49 | 
50 |     assert app.get_schema_reg(Foo) is None
51 | 


--------------------------------------------------------------------------------
/tests/utils.py:
--------------------------------------------------------------------------------
 1 | import aiokafka.structs
 2 | import json
 3 | import time
 4 | 
 5 | 
 6 | def record_factory():
 7 |     return aiokafka.structs.ConsumerRecord(
 8 |         topic="topic",
 9 |         partition=0,
10 |         offset=0,
11 |         timestamp=time.time() * 1000,
12 |         timestamp_type=1,
13 |         key="key",
14 |         value=json.dumps({"schema": "Foo:1", "data": {"foo": "bar"}}).encode(),
15 |         checksum="1",
16 |         serialized_key_size=10,
17 |         serialized_value_size=10,
18 |         headers=[],
19 |     )
20 | 


--------------------------------------------------------------------------------