├── logpipe ├── py.typed ├── formats │ ├── __init__.py │ ├── json.py │ ├── pickle.py │ └── msgpack.py ├── tests │ ├── __init__.py │ ├── unit │ │ ├── __init__.py │ │ ├── kafka │ │ │ ├── __init__.py │ │ │ ├── test_producer.py │ │ │ └── test_consumer.py │ │ ├── kinesis │ │ │ ├── __init__.py │ │ │ ├── test_producer.py │ │ │ └── test_consumer.py │ │ ├── test_settings.py │ │ ├── test_format.py │ │ ├── test_consumer.py │ │ └── test_producer.py │ ├── integration │ │ ├── __init__.py │ │ └── test_roundtrip.py │ └── common.py ├── management │ ├── __init__.py │ └── commands │ │ ├── __init__.py │ │ └── run_kafka_consumer.py ├── migrations │ ├── __init__.py │ ├── 0002_auto_20170427_1451.py │ ├── 0004_auto_20170502_1403.py │ ├── 0001_initial.py │ ├── 0005_auto_20180917_1348.py │ ├── 0003_auto_20170427_1703.py │ └── 0006_alter_kafkaoffset_options_and_more.py ├── constants.py ├── locale │ └── es │ │ └── LC_MESSAGES │ │ ├── django.mo │ │ └── django.po ├── apps.py ├── registry.py ├── admin.py ├── exceptions.py ├── backend │ ├── __init__.py │ ├── dummy.py │ ├── kafka.py │ └── kinesis.py ├── docgen_setup.py ├── settings.py ├── __init__.py ├── format.py ├── abc.py ├── producer.py ├── models.py └── consumer.py ├── sandbox ├── __init__.py ├── lptester │ ├── __init__.py │ ├── migrations │ │ ├── __init__.py │ │ ├── 0002_person_uuid.py │ │ └── 0001_initial.py │ ├── constants.py │ ├── signals.py │ ├── apps.py │ ├── consumers.py │ ├── admin.py │ ├── producers.py │ ├── models.py │ └── serializers.py ├── urls.py └── settings.py ├── renovate.json ├── docs ├── custom-styles.css ├── api.md ├── index.md ├── releases.md ├── installation.md └── usage.md ├── manage.py ├── Dockerfile ├── tox.ini ├── bin └── publish.sh ├── docker-compose.yml ├── Makefile ├── LICENSE ├── mkdocs.yml ├── README.md ├── .pre-commit-config.yaml ├── .gitlab-ci.yml ├── .gitignore ├── pyproject.toml └── CHANGELOG.md /logpipe/py.typed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sandbox/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /logpipe/formats/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /logpipe/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sandbox/lptester/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /logpipe/management/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /logpipe/migrations/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /logpipe/tests/unit/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /logpipe/tests/integration/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /logpipe/tests/unit/kafka/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /logpipe/tests/unit/kinesis/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /logpipe/management/commands/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sandbox/lptester/migrations/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sandbox/lptester/constants.py: -------------------------------------------------------------------------------- 1 | TOPIC_PEOPLE = "people" 2 | -------------------------------------------------------------------------------- /logpipe/constants.py: -------------------------------------------------------------------------------- 1 | FORMAT_JSON = "json" 2 | FORMAT_MSGPACK = "msgpack" 3 | FORMAT_PICKLE = "pickle" 4 | -------------------------------------------------------------------------------- /sandbox/lptester/signals.py: -------------------------------------------------------------------------------- 1 | import django.dispatch 2 | 3 | person_altered = django.dispatch.Signal() 4 | -------------------------------------------------------------------------------- /logpipe/locale/es/LC_MESSAGES/django.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thelabnyc/django-logpipe/HEAD/logpipe/locale/es/LC_MESSAGES/django.mo -------------------------------------------------------------------------------- /sandbox/urls.py: -------------------------------------------------------------------------------- 1 | from django.contrib import admin 2 | from django.urls import path 3 | 4 | urlpatterns = (path("admin/", admin.site.urls),) 5 | -------------------------------------------------------------------------------- /renovate.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://docs.renovatebot.com/renovate-schema.json", 3 | "extends": ["gitlab>thelabnyc/renovate-config:library"] 4 | } 5 | -------------------------------------------------------------------------------- /sandbox/lptester/apps.py: -------------------------------------------------------------------------------- 1 | from django.apps import AppConfig 2 | 3 | 4 | class LPTesterConfig(AppConfig): 5 | name = "sandbox.lptester" 6 | label = "lptester" 7 | default = True 8 | 9 | def ready(self) -> None: 10 | from . import consumers, producers # NOQA 11 | -------------------------------------------------------------------------------- /logpipe/apps.py: -------------------------------------------------------------------------------- 1 | from django.apps import AppConfig 2 | from django.utils.translation import gettext_lazy as _ 3 | 4 | 5 | class LogpipeConfig(AppConfig): 6 | name = "logpipe" 7 | label = "logpipe" 8 | # Translators: Backend Library Name 9 | verbose_name = _("LogPipe") 10 | -------------------------------------------------------------------------------- /docs/custom-styles.css: -------------------------------------------------------------------------------- 1 | /* Styles for https://github.com/tomchristie/mkautodoc */ 2 | div.autodoc-docstring { 3 | padding-left: 20px; 4 | margin-bottom: 30px; 5 | border-left: 5px solid rgba(230, 230, 230); 6 | } 7 | 8 | div.autodoc-members { 9 | padding-left: 20px; 10 | margin-bottom: 15px; 11 | } 12 | -------------------------------------------------------------------------------- /sandbox/lptester/consumers.py: -------------------------------------------------------------------------------- 1 | from logpipe import Consumer, register_consumer 2 | 3 | from . import constants, serializers 4 | 5 | 6 | @register_consumer 7 | def build_person_consumer() -> Consumer: 8 | consumer = Consumer(constants.TOPIC_PEOPLE) 9 | consumer.register(serializers.PersonSerializer) 10 | return consumer 11 | -------------------------------------------------------------------------------- /sandbox/lptester/admin.py: -------------------------------------------------------------------------------- 1 | from django.contrib import admin 2 | 3 | from . import models 4 | 5 | 6 | @admin.register(models.Person) 7 | class PersonAdmin(admin.ModelAdmin[models.Person]): 8 | fields = ["uuid", "first_name", "last_name"] 9 | readonly_fields = ["uuid"] 10 | list_display = ["uuid", "first_name", "last_name"] 11 | -------------------------------------------------------------------------------- /logpipe/migrations/0002_auto_20170427_1451.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 1.11 on 2017-04-27 14:51 2 | 3 | from django.db import migrations 4 | 5 | 6 | class Migration(migrations.Migration): 7 | dependencies = [ 8 | ("logpipe", "0001_initial"), 9 | ] 10 | 11 | operations = [migrations.RenameModel("Offset", "KafkaOffset")] 12 | -------------------------------------------------------------------------------- /manage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | import sys 4 | 5 | 6 | def main(): 7 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "sandbox.settings") 8 | 9 | from django.core.management import execute_from_command_line 10 | 11 | execute_from_command_line(sys.argv) 12 | 13 | 14 | if __name__ == "__main__": 15 | main() 16 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM registry.gitlab.com/thelabnyc/python:3.14@sha256:0e41570605a9add60854b464b5d6af7f367406efc2ee75e6a222da7d3f03d390 2 | 3 | RUN mkdir /code 4 | WORKDIR /code 5 | 6 | RUN apt-get update && \ 7 | apt-get install -y gettext && \ 8 | rm -rf /var/lib/apt/lists/* 9 | 10 | ADD . /code/ 11 | RUN uv sync 12 | 13 | RUN mkdir /tox 14 | ENV TOX_WORK_DIR='/tox' 15 | -------------------------------------------------------------------------------- /sandbox/lptester/migrations/0002_person_uuid.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 1.9.6 on 2016-07-04 15:27 2 | 3 | import uuid 4 | 5 | from django.db import migrations, models 6 | 7 | 8 | class Migration(migrations.Migration): 9 | dependencies = [ 10 | ("lptester", "0001_initial"), 11 | ] 12 | 13 | operations = [ 14 | migrations.AddField( 15 | model_name="person", 16 | name="uuid", 17 | field=models.UUIDField(default=uuid.uuid4, unique=True), 18 | ), 19 | ] 20 | -------------------------------------------------------------------------------- /docs/api.md: -------------------------------------------------------------------------------- 1 | # API Reference 2 | 3 | ## DRFProducer 4 | 5 | ::: logpipe.producer.DRFProducer 6 | :docstring: 7 | :members: 8 | 9 | ## PydanticProducer 10 | 11 | ::: logpipe.producer.PydanticProducer 12 | :docstring: 13 | :members: 14 | 15 | ## Consumer 16 | 17 | ::: logpipe.consumer.Consumer 18 | :docstring: 19 | :members: 20 | 21 | ## MultiConsumer 22 | 23 | ::: logpipe.consumer.MultiConsumer 24 | :docstring: 25 | :members: 26 | 27 | ## register_consumer 28 | 29 | ::: logpipe.registry.register_consumer 30 | :docstring: 31 | :members: 32 | -------------------------------------------------------------------------------- /sandbox/lptester/producers.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | from django.dispatch import receiver 4 | 5 | from logpipe import Producer 6 | 7 | from . import constants, models, serializers, signals 8 | 9 | 10 | @receiver( 11 | signals.person_altered, 12 | sender=models.Person, 13 | dispatch_uid="send_person_altered_message", 14 | ) 15 | def send_person_altered_message( 16 | sender: type[models.Person], 17 | person: models.Person, 18 | **kwargs: Any, 19 | ) -> None: 20 | producer = Producer(constants.TOPIC_PEOPLE, serializers.PersonSerializer) 21 | producer.send(person) 22 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | isolated_build = True 3 | toxworkdir={env:TOX_WORK_DIR:.tox} 4 | envlist = py{311,312,313,314}-django{420,510,520}-drf{316} 5 | 6 | [testenv] 7 | runner = uv-venv-runner 8 | deps = 9 | django420: django>=4.2,<4.3 10 | django510: django>=5.1,<5.2 11 | django520: django>=5.2,<5.3 12 | drf316: djangorestframework>=3.16,<3.17 13 | setenv = 14 | PYTHONWARNINGS = d 15 | commands = 16 | mypy logpipe sandbox 17 | coverage run \ 18 | manage.py test \ 19 | logpipe.tests.unit \ 20 | -v 2 \ 21 | --buffer 22 | coverage report 23 | -------------------------------------------------------------------------------- /logpipe/management/commands/run_kafka_consumer.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | from django.core.management.base import BaseCommand 4 | 5 | from logpipe.consumer import MultiConsumer 6 | from logpipe.registry import list_registered_consumers 7 | 8 | 9 | class Command(BaseCommand): 10 | help = "Fetch and apply Kafka messages" 11 | 12 | def handle(self, *args: Any, **options: Any) -> None: 13 | consumers = list_registered_consumers() 14 | for c in consumers: 15 | print("Found consumer: %s" % c) 16 | print("Running indefinite consumer...") 17 | multi = MultiConsumer(*consumers) 18 | multi.run() 19 | -------------------------------------------------------------------------------- /sandbox/lptester/models.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | import uuid 3 | 4 | from django.db import models 5 | 6 | from .signals import person_altered 7 | 8 | 9 | class Person(models.Model): 10 | uuid = models.UUIDField(default=uuid.uuid4, unique=True) 11 | first_name = models.CharField(max_length=200) 12 | last_name = models.CharField(max_length=200) 13 | 14 | _disable_kafka_signals = False 15 | 16 | def save(self, *args: Any, **kwargs: Any) -> None: 17 | ret = super().save(*args, **kwargs) 18 | if not self._disable_kafka_signals: 19 | person_altered.send(sender=self.__class__, person=self) 20 | return ret 21 | -------------------------------------------------------------------------------- /logpipe/registry.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Callable 2 | import functools 3 | 4 | from .consumer import Consumer 5 | 6 | ConsumerFactory = Callable[[], Consumer] 7 | 8 | _registered_consumers: list[ConsumerFactory] = [] 9 | 10 | 11 | def register_consumer(fn: ConsumerFactory) -> ConsumerFactory: 12 | _registered_consumers.append(fn) 13 | 14 | @functools.wraps(fn) 15 | def wrap() -> Consumer: 16 | return fn() 17 | 18 | return wrap 19 | 20 | 21 | def list_registered_consumers() -> list[Consumer]: 22 | return [build() for build in _registered_consumers] 23 | 24 | 25 | __all__ = ["register_consumer", "list_registered_consumers"] 26 | -------------------------------------------------------------------------------- /bin/publish.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -euxo pipefail 4 | 5 | # Check git status 6 | git fetch --all 7 | CURRENT_BRANCH=$(git branch --show-current) 8 | if [ "$CURRENT_BRANCH" != "master" ]; then 9 | echo "This script must be run only when the master branch is checked out, but the current branch is ${CURRENT_BRANCH}. Abort!" 10 | exit 1 11 | fi 12 | 13 | NUM_BEHIND=$(git log ..origin/master | wc -l | awk '{print $1}') 14 | if [ "$NUM_BEHIND" == "0" ]; then 15 | echo "" 16 | else 17 | echo "Your branch is NOT up to date with origin/master. Abort! Please fetch and rebase first." 18 | exit 1 19 | fi 20 | 21 | # Update version and publish via commitizen 22 | cz bump "$@" 23 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '2' 2 | services: 3 | spotify__kafka: 4 | image: spotify/kafka@sha256:cf8f8f760b48a07fb99df24fab8201ec8b647634751e842b67103a25a388981b 5 | environment: 6 | ADVERTISED_HOST: 'spotify__kafka' 7 | ADVERTISED_PORT: '9092' 8 | AUTO_CREATE_TOPICS: 'true' 9 | 10 | postgres: 11 | image: postgres:latest@sha256:38d5c9d522037d8bf0864c9068e4df2f8a60127c6489ab06f98fdeda535560f9 12 | environment: 13 | POSTGRES_HOST_AUTH_METHOD: 'trust' 14 | 15 | test: 16 | build: . 17 | command: python manage.py runserver 0.0.0.0:8000 18 | ports: 19 | - "8000:8000" 20 | depends_on: 21 | - spotify__kafka 22 | - postgres 23 | volumes: 24 | - .:/code 25 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Create the .po and .mo files used for i18n 2 | .PHONY: translations 3 | translations: 4 | cd logpipe && \ 5 | django-admin makemessages -a && \ 6 | django-admin compilemessages 7 | 8 | .PHONY: install_precommit 9 | install_precommit: 10 | pre-commit install 11 | 12 | .PHONY: test_precommit 13 | test_precommit: install_precommit 14 | pre-commit run --all-files 15 | 16 | .PHONY: docs_serve 17 | docs_serve: 18 | DJANGO_SETTINGS_MODULE=logpipe.docgen_setup uv run mkdocs serve --strict 19 | 20 | .PHONY: docs_build 21 | docs_build: 22 | DJANGO_SETTINGS_MODULE=logpipe.docgen_setup uv run mkdocs build --strict 23 | 24 | docs: docs_build 25 | rm -rf public/ && \ 26 | mkdir -p public/ && \ 27 | cp -r build/mkdocs/* public/ 28 | -------------------------------------------------------------------------------- /logpipe/admin.py: -------------------------------------------------------------------------------- 1 | from django.contrib import admin 2 | 3 | from . import models 4 | 5 | 6 | @admin.register(models.KafkaOffset) 7 | class KafkaOffsetAdmin(admin.ModelAdmin): # type: ignore[type-arg] 8 | fields = ["topic", "partition", "offset"] 9 | list_display = ["topic", "partition", "offset"] 10 | list_filter = ["topic", "partition"] 11 | readonly_fields = ["topic", "partition"] 12 | 13 | 14 | @admin.register(models.KinesisOffset) 15 | class KinesisOffsetAdmin(admin.ModelAdmin): # type: ignore[type-arg] 16 | fields = ["region", "stream", "shard", "sequence_number"] 17 | list_display = ["stream", "region", "shard", "sequence_number"] 18 | list_filter = ["stream", "region", "shard"] 19 | readonly_fields = ["region", "stream", "shard"] 20 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | ISC License 2 | 3 | Copyright (c) 2016 - 2025 thelab 4 | 5 | Permission to use, copy, modify, and/or distribute this software for any 6 | purpose with or without fee is hereby granted, provided that the above 7 | copyright notice and this permission notice appear in all copies. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH 10 | REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY 11 | AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, 12 | INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM 13 | LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR 14 | OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 15 | PERFORMANCE OF THIS SOFTWARE. 16 | -------------------------------------------------------------------------------- /logpipe/formats/json.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Mapping 2 | from typing import IO, Any 3 | 4 | from rest_framework.parsers import JSONParser as _JSONParser 5 | from rest_framework.renderers import JSONRenderer as _JSONRenderer 6 | 7 | from ..abc import Parser, Renderer 8 | 9 | 10 | class JSONRenderer(_JSONRenderer, Renderer): 11 | pass 12 | 13 | 14 | class JSONParser(_JSONParser, Parser): 15 | def parse( 16 | self, 17 | stream: IO[Any], 18 | media_type: str | None = None, 19 | parser_context: Mapping[str, Any] | None = None, 20 | ) -> dict[str, Any]: 21 | return super().parse( 22 | stream, 23 | media_type=media_type, 24 | parser_context=parser_context, 25 | ) 26 | 27 | 28 | __all__ = ["JSONRenderer", "JSONParser"] 29 | -------------------------------------------------------------------------------- /logpipe/migrations/0004_auto_20170502_1403.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 1.11 on 2017-05-02 14:03 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | dependencies = [ 8 | ("logpipe", "0003_auto_20170427_1703"), 9 | ] 10 | 11 | operations = [ 12 | migrations.AlterField( 13 | model_name="kinesisoffset", 14 | name="sequence_number", 15 | field=models.CharField( 16 | help_text="The current sequence number in the Kinesis shard", 17 | max_length=200, 18 | ), 19 | ), 20 | migrations.AlterField( 21 | model_name="kinesisoffset", 22 | name="shard", 23 | field=models.CharField(help_text="The Kinesis shard ID", max_length=200), 24 | ), 25 | ] 26 | -------------------------------------------------------------------------------- /sandbox/lptester/migrations/0001_initial.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 1.9.6 on 2016-07-04 14:29 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | initial = True 8 | 9 | dependencies = [] 10 | 11 | operations = [ 12 | migrations.CreateModel( 13 | name="Person", 14 | fields=[ 15 | ( 16 | "id", 17 | models.AutoField( 18 | auto_created=True, 19 | primary_key=True, 20 | serialize=False, 21 | verbose_name="ID", 22 | ), 23 | ), 24 | ("first_name", models.CharField(max_length=200)), 25 | ("last_name", models.CharField(max_length=200)), 26 | ], 27 | ), 28 | ] 29 | -------------------------------------------------------------------------------- /sandbox/lptester/serializers.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | from rest_framework import serializers 4 | 5 | from logpipe.abc import DRFSerializer 6 | 7 | from . import models 8 | 9 | 10 | class PersonSerializer( 11 | serializers.ModelSerializer[models.Person], 12 | DRFSerializer[models.Person], 13 | ): 14 | VERSION = 1 15 | KEY_FIELD = "uuid" 16 | 17 | class Meta: 18 | model = models.Person 19 | fields = ["uuid", "first_name", "last_name"] 20 | 21 | @classmethod 22 | def lookup_instance(cls, **kwargs: Any) -> models.Person | None: 23 | uuid = kwargs.get("uuid") 24 | if uuid is None: 25 | return None 26 | try: 27 | person = models.Person.objects.get(uuid=uuid) 28 | person._disable_kafka_signals = True 29 | return person 30 | except models.Person.DoesNotExist: 31 | pass 32 | return None 33 | -------------------------------------------------------------------------------- /logpipe/exceptions.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | from rest_framework import serializers 4 | 5 | from .abc import Record 6 | 7 | 8 | class LogPipeError(Exception): 9 | pass 10 | 11 | 12 | class LogPipeMessageError(LogPipeError): 13 | message: Record 14 | 15 | def __init__(self, descr: Any, message: Record): 16 | super().__init__(descr) 17 | self.message = message 18 | 19 | 20 | class UnknownFormatError(LogPipeError): 21 | pass 22 | 23 | 24 | class IgnoredMessageTypeError(LogPipeMessageError): 25 | pass 26 | 27 | 28 | class UnknownMessageTypeError(LogPipeMessageError): 29 | pass 30 | 31 | 32 | class UnknownMessageVersionError(LogPipeMessageError): 33 | pass 34 | 35 | 36 | class InvalidMessageError(LogPipeMessageError): 37 | pass 38 | 39 | 40 | class ValidationError(LogPipeMessageError, serializers.ValidationError): 41 | pass 42 | 43 | 44 | class MissingTopicError(LogPipeError): 45 | pass 46 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # Welcome 2 | 3 | [![](https://gitlab.com/thelabnyc/django-logpipe/badges/master/build.svg)](https://gitlab.com/thelabnyc/django-logpipe/commits/master) 4 | [![](https://img.shields.io/pypi/l/django-logpipe.svg)](https://pypi.python.org/pypi/) 5 | [![](https://badge.fury.io/py/django-logpipe.svg)](https://pypi.python.org/pypi/django-logpipe) 6 | [![](https://img.shields.io/pypi/format/django-logpipe.svg)](https://pypi.python.org/pypi/django-logpipe) 7 | 8 | Django-logpipe is a library that serves as a universal pipe for moving data around between Django applications and services. It supports serialization by means of [Django REST Framework][drf] and/or [Pydantic][pydantic], and supports using either [Apache Kafka][kafka]] or [Amazon Kinesis][kinesis] as the underlying data stream. 9 | 10 | [drf]: http://www.django-rest-framework.org/ 11 | [pydantic]: https://docs.pydantic.dev/ 12 | [kafka]: https://kafka.apache.org/ 13 | [kinesis]: https://aws.amazon.com/kinesis/ 14 | -------------------------------------------------------------------------------- /logpipe/backend/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import Any 4 | 5 | from django.utils.module_loading import import_string 6 | 7 | from .. import settings 8 | from ..abc import ConsumerBackend, OffsetStoreBackend, ProducerBackend 9 | 10 | 11 | def get_offset_backend() -> OffsetStoreBackend: 12 | default = "logpipe.backend.kafka.ModelOffsetStore" 13 | backend_path = settings.get("OFFSET_BACKEND", default) 14 | return import_string(backend_path)() 15 | 16 | 17 | def get_consumer_backend(topic_name: str, **kwargs: Any) -> ConsumerBackend: 18 | default = "logpipe.backend.kafka.Consumer" 19 | backend_path = settings.get("CONSUMER_BACKEND", default) 20 | return import_string(backend_path)(topic_name, **kwargs) 21 | 22 | 23 | def get_producer_backend() -> ProducerBackend: 24 | default = "logpipe.backend.kafka.Producer" 25 | backend_path = settings.get("PRODUCER_BACKEND", default) 26 | return import_string(backend_path)() 27 | -------------------------------------------------------------------------------- /logpipe/docgen_setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | docs_dir, _ = os.path.split(__file__) 5 | sys.path.append(os.path.dirname(docs_dir)) 6 | 7 | SECRET_KEY = os.environ.get("SECRET_KEY", "django-insecure") 8 | INSTALLED_APPS = [ 9 | "django.contrib.admin", 10 | "django.contrib.auth", 11 | "django.contrib.contenttypes", 12 | "django.contrib.sessions", 13 | "django.contrib.sites", 14 | "django.contrib.messages", 15 | "django.contrib.staticfiles", 16 | "django.contrib.flatpages", 17 | "logpipe", 18 | ] 19 | 20 | STATIC_URL = "docgen-static/" 21 | 22 | LOGPIPE = { 23 | "OFFSET_BACKEND": "logpipe.backend.dummy.ModelOffsetStore", 24 | "PRODUCER_BACKEND": "logpipe.backend.dummy.Producer", 25 | "CONSUMER_BACKEND": "logpipe.backend.dummy.Consumer", 26 | } 27 | 28 | setup = None 29 | from django.apps import apps # noqa 30 | from django.conf import settings # noqa 31 | import django # noqa 32 | 33 | if not apps.ready and not settings.configured: 34 | django.setup() 35 | -------------------------------------------------------------------------------- /logpipe/formats/pickle.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Mapping 2 | from typing import IO, Any 3 | import pickle 4 | 5 | from rest_framework import parsers, renderers 6 | 7 | from ..abc import Parser, Renderer 8 | 9 | 10 | class PickleRenderer(renderers.BaseRenderer, Renderer): 11 | media_type = "application/python-pickle" 12 | format = "pickle" 13 | charset = None 14 | render_style = "binary" 15 | 16 | def render( 17 | self, 18 | data: dict[str, Any], 19 | media_type: str | None = None, 20 | renderer_context: Mapping[str, Any] | None = None, 21 | ) -> bytes: 22 | return pickle.dumps(data) 23 | 24 | 25 | class PickleParser(parsers.BaseParser, Parser): 26 | media_type = "application/python-pickle" 27 | 28 | def parse( 29 | self, 30 | stream: IO[Any], 31 | media_type: str | None = None, 32 | parser_context: Mapping[str, Any] | None = None, 33 | ) -> dict[str, Any]: 34 | return pickle.load(stream) 35 | 36 | 37 | __all__ = ["PickleRenderer", "PickleParser"] 38 | -------------------------------------------------------------------------------- /logpipe/tests/unit/test_settings.py: -------------------------------------------------------------------------------- 1 | from django.core.exceptions import ImproperlyConfigured 2 | from django.test import TestCase, override_settings 3 | 4 | from logpipe import settings 5 | 6 | 7 | class SettingsTest(TestCase): 8 | @override_settings(LOGPIPE={"KAFKA_BOOTSTRAP_SERVERS": ["kafka:9092"]}) 9 | def test_normal_required_key(self): 10 | self.assertEqual(settings.get("KAFKA_BOOTSTRAP_SERVERS"), ["kafka:9092"]) 11 | 12 | @override_settings(LOGPIPE={"KAFKA_BOOTSTRAP_SERVERS": ["kafka:9092"], "KAFKA_MAX_SEND_RETRIES": 3}) 13 | def test_normal_optional_key(self): 14 | self.assertEqual(settings.get("KAFKA_MAX_SEND_RETRIES", 5), 3) 15 | 16 | @override_settings(LOGPIPE={}) 17 | def test_missing_required_key(self): 18 | with self.assertRaises(ImproperlyConfigured): 19 | settings.get("KAFKA_BOOTSTRAP_SERVERS") 20 | 21 | @override_settings(LOGPIPE={"KAFKA_BOOTSTRAP_SERVERS": ["kafka:9092"]}) 22 | def test_missing_optional_key(self): 23 | self.assertEqual(settings.get("KAFKA_MAX_SEND_RETRIES", 5), 5) 24 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | --- 2 | site_name: django-logpipe 3 | 4 | theme: 5 | name: material 6 | icon: 7 | repo: fontawesome/brands/git-alt 8 | features: 9 | - search.suggest 10 | - search.highlight 11 | - navigation.footer 12 | 13 | repo_name: django-logpipe 14 | repo_url: https://gitlab.com/thelabnyc/django-logpipe 15 | edit_uri: -/edit/master/docs/ 16 | 17 | docs_dir: docs/ 18 | site_dir: build/mkdocs/ 19 | 20 | extra_css: 21 | - custom-styles.css 22 | 23 | markdown_extensions: 24 | - toc: 25 | permalink: True 26 | # - extra 27 | - abbr 28 | # - attr_list 29 | - def_list 30 | # - fenced_code 31 | - footnotes 32 | - md_in_html 33 | # - tables 34 | - codehilite 35 | - smarty 36 | - admonition 37 | - pymdownx.superfences: 38 | custom_fences: 39 | - name: mermaid 40 | class: mermaid 41 | format: !!python/name:pymdownx.superfences.fence_div_format 42 | - pymdownx.arithmatex: 43 | generic: true 44 | - pymdownx.tasklist: 45 | custom_checkbox: true 46 | clickable_checkbox: true 47 | - mkautodoc 48 | 49 | use_directory_urls: false 50 | -------------------------------------------------------------------------------- /logpipe/tests/integration/test_roundtrip.py: -------------------------------------------------------------------------------- 1 | from logpipe import Consumer, Producer 2 | 3 | from ..common import TOPIC_STATES, BaseTest, StateSerializer_DRF 4 | 5 | 6 | class RoundTripTest(BaseTest): 7 | def test_roundtrip_state(self): 8 | def save(ser): 9 | self.assertEqual(ser.validated_data["code"], "NY") 10 | self.assertEqual(ser.validated_data["name"], "New York") 11 | 12 | FakeStateSerializer = self.mock_state_serializer_drf(save) 13 | 14 | producer = Producer(TOPIC_STATES, StateSerializer_DRF) 15 | record = producer.send({"code": "NY", "name": "New York"}) 16 | self.assertEqual(record.topic, "us-states") 17 | self.assertEqual(record.partition, 0) 18 | self.assertTrue(record.offset >= 0) 19 | 20 | # producer.client.flush() 21 | 22 | consumer = Consumer(TOPIC_STATES, consumer_timeout_ms=1000) 23 | consumer.register(FakeStateSerializer) 24 | consumer.run(iter_limit=1) 25 | 26 | self.assertEqual(FakeStateSerializer.call_count, 1) 27 | self.assertEqual(self.serializers["state"].save.call_count, 1) 28 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # django-logpipe 2 | 3 | [![](https://gitlab.com/thelabnyc/django-logpipe/badges/master/build.svg)](https://gitlab.com/thelabnyc/django-logpipe/commits/master) 4 | [![](https://img.shields.io/pypi/l/django-logpipe.svg)](https://pypi.python.org/pypi/) 5 | [![](https://badge.fury.io/py/django-logpipe.svg)](https://pypi.python.org/pypi/django-logpipe) 6 | [![](https://img.shields.io/pypi/format/django-logpipe.svg)](https://pypi.python.org/pypi/django-logpipe) 7 | 8 | Django-logpipe is a library that serves as a universal pipe for moving data around between Django applications and services. It supports serialization by means of [Django REST Framework][drf] and/or [Pydantic][pydantic], and supports using either [Apache Kafka][kafka]] or [Amazon Kinesis][kinesis] as the underlying data stream. 9 | 10 | [drf]: http://www.django-rest-framework.org/ 11 | [pydantic]: https://docs.pydantic.dev/ 12 | [kafka]: https://kafka.apache.org/ 13 | [kinesis]: https://aws.amazon.com/kinesis/ 14 | 15 | ## Documentation 16 | 17 | See [https://thelabnyc.gitlab.io/django-logpipe/](https://thelabnyc.gitlab.io/django-logpipe/) 18 | 19 | ## Change log 20 | 21 | See [Release Notes](./docs/releases.md) 22 | -------------------------------------------------------------------------------- /logpipe/migrations/0001_initial.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 1.9.6 on 2016-07-04 14:15 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | initial = True 8 | 9 | dependencies = [] 10 | 11 | operations = [ 12 | migrations.CreateModel( 13 | name="Offset", 14 | fields=[ 15 | ( 16 | "id", 17 | models.AutoField( 18 | auto_created=True, 19 | primary_key=True, 20 | serialize=False, 21 | verbose_name="ID", 22 | ), 23 | ), 24 | ("topic", models.CharField(max_length=200)), 25 | ("partition", models.PositiveIntegerField()), 26 | ("offset", models.PositiveIntegerField(default=0)), 27 | ], 28 | options={ 29 | "ordering": ("topic", "partition", "offset"), 30 | }, 31 | ), 32 | migrations.AlterUniqueTogether( 33 | name="offset", 34 | unique_together={("topic", "partition")}, 35 | ), 36 | ] 37 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://gitlab.com/thelabnyc/thelab-pre-commit-hooks 3 | rev: v0.0.3 4 | hooks: 5 | - id: update-copyright-year 6 | 7 | - repo: https://github.com/pre-commit/pre-commit-hooks 8 | rev: v6.0.0 9 | hooks: 10 | - id: check-json 11 | - id: check-merge-conflict 12 | - id: check-symlinks 13 | - id: check-toml 14 | - id: check-yaml 15 | args: [--unsafe] 16 | - id: end-of-file-fixer 17 | - id: trailing-whitespace 18 | 19 | - repo: https://github.com/asottile/pyupgrade 20 | rev: v3.21.2 21 | hooks: 22 | - id: pyupgrade 23 | args: [--py311-plus] 24 | 25 | - repo: https://github.com/adamchainz/django-upgrade 26 | rev: "1.29.1" 27 | hooks: 28 | - id: django-upgrade 29 | 30 | - repo: https://github.com/pycqa/isort 31 | rev: "7.0.0" 32 | hooks: 33 | - id: isort 34 | 35 | - repo: https://github.com/astral-sh/ruff-pre-commit 36 | rev: v0.14.10 37 | hooks: 38 | - id: ruff 39 | - id: ruff-format 40 | 41 | - repo: https://github.com/commitizen-tools/commitizen 42 | rev: v4.10.1 43 | hooks: 44 | - id: commitizen 45 | -------------------------------------------------------------------------------- /logpipe/settings.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | import os 3 | 4 | from django.conf import settings 5 | from django.core.exceptions import ImproperlyConfigured 6 | 7 | 8 | def get(key: str, default: Any = None) -> Any: 9 | if default is None and key not in settings.LOGPIPE: 10 | raise ImproperlyConfigured('Please ensure LOGPIPE["%s"] is defined in your settings.py file.' % key) 11 | return settings.LOGPIPE.get(key, default) 12 | 13 | 14 | def get_aws_region(_default: str = "us-east-1") -> str: 15 | # Try to use the explicit KINESIS_REGION setting 16 | region = get("KINESIS_REGION", "") 17 | if region: 18 | return region 19 | # Try to import boto3 to get the region name 20 | try: 21 | import boto3 22 | except ImportError: 23 | # Can't import boto3, so fallback to the AWS_DEFAULT_REGION environment variable, then finally, us-east-1 24 | return os.environ.get("AWS_DEFAULT_REGION", _default) 25 | # Use the region for boto3's default session 26 | if boto3.DEFAULT_SESSION is not None: 27 | region = boto3.DEFAULT_SESSION.region_name 28 | if region: 29 | return region 30 | # Finally, make a new session and use it's region 31 | region = boto3.session.Session().region_name 32 | if region: 33 | return region 34 | # Finally, return the default 35 | return _default 36 | -------------------------------------------------------------------------------- /logpipe/formats/msgpack.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Mapping 2 | from typing import IO, Any 3 | 4 | from rest_framework import parsers, renderers 5 | 6 | from ..abc import Parser, Renderer 7 | 8 | _import_error: ImportError 9 | try: 10 | import msgpack 11 | except ImportError as e: 12 | msgpack = None # type: ignore[assignment] 13 | _import_error = e 14 | 15 | 16 | class MsgPackRenderer(renderers.BaseRenderer, Renderer): 17 | media_type = "application/msgpack" 18 | format = "msgpack" 19 | charset = None 20 | render_style = "binary" 21 | 22 | def render( 23 | self, 24 | data: dict[str, Any], 25 | media_type: str | None = None, 26 | renderer_context: Mapping[str, Any] | None = None, 27 | ) -> bytes: 28 | if not msgpack: 29 | raise _import_error 30 | return msgpack.packb(data, use_bin_type=True) 31 | 32 | 33 | class MsgPackParser(parsers.BaseParser, Parser): 34 | media_type = "application/msgpack" 35 | 36 | def parse( 37 | self, 38 | stream: IO[Any], 39 | media_type: str | None = None, 40 | parser_context: Mapping[str, Any] | None = None, 41 | ) -> dict[str, Any]: 42 | if not msgpack: 43 | raise _import_error 44 | return msgpack.unpack(stream, use_list=False) 45 | 46 | 47 | __all__ = ["MsgPackRenderer", "MsgPackParser"] 48 | -------------------------------------------------------------------------------- /logpipe/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from django.core.exceptions import ImproperlyConfigured 4 | 5 | from . import format, settings 6 | from .constants import FORMAT_JSON, FORMAT_MSGPACK, FORMAT_PICKLE 7 | from .consumer import Consumer, MultiConsumer 8 | from .formats.json import JSONParser, JSONRenderer 9 | from .formats.msgpack import MsgPackParser, MsgPackRenderer 10 | from .formats.pickle import PickleParser, PickleRenderer 11 | from .producer import DRFProducer, Producer, PydanticProducer 12 | from .registry import register_consumer 13 | 14 | logger = logging.getLogger(__name__) 15 | 16 | 17 | _default_format = settings.get("DEFAULT_FORMAT", FORMAT_JSON) 18 | _allow_incoming_pickle = settings.get("ALLOW_INCOMING_PICKLE", False) 19 | if _default_format == FORMAT_PICKLE: 20 | if not _allow_incoming_pickle: 21 | raise ImproperlyConfigured("Can not set DEFAULT_FORMAT to Pickle unless the ALLOW_INCOMING_PICKLE is enabled.") 22 | logger.warning("DEFAULT_FORMAT is set to Pickle. This is insecure and probable isn't a good idea.") 23 | 24 | format.register(FORMAT_JSON, JSONRenderer(), JSONParser()) 25 | format.register(FORMAT_MSGPACK, MsgPackRenderer(), MsgPackParser()) 26 | 27 | if _allow_incoming_pickle: 28 | format.register(FORMAT_PICKLE, PickleRenderer(), PickleParser()) 29 | 30 | 31 | __all__ = [ 32 | "FORMAT_JSON", 33 | "FORMAT_MSGPACK", 34 | "FORMAT_PICKLE", 35 | "DRFProducer", 36 | "PydanticProducer", 37 | "Producer", 38 | "Consumer", 39 | "MultiConsumer", 40 | "register_consumer", 41 | ] 42 | -------------------------------------------------------------------------------- /logpipe/format.py: -------------------------------------------------------------------------------- 1 | from io import BytesIO 2 | from typing import Any, TypedDict 3 | 4 | from .abc import Parser, Renderer 5 | from .exceptions import UnknownFormatError 6 | 7 | 8 | class FormatRegistryEntry(TypedDict): 9 | renderer: Renderer 10 | parser: Parser 11 | 12 | 13 | FormatRegistry = dict[bytes, FormatRegistryEntry] 14 | 15 | _delim = b":" 16 | _formats: FormatRegistry = {} 17 | 18 | 19 | def _bytes(seq: str | bytes) -> bytes: 20 | return seq.encode() if hasattr(seq, "encode") else seq 21 | 22 | 23 | def register(codestr: str, renderer: Renderer, parser: Parser) -> None: 24 | code = _bytes(codestr) 25 | _formats[code] = { 26 | "renderer": renderer, 27 | "parser": parser, 28 | } 29 | 30 | 31 | def unregister(codestr: str) -> None: 32 | code = _bytes(codestr) 33 | try: 34 | del _formats[code] 35 | except KeyError: 36 | pass 37 | 38 | 39 | def render(codestr: str, data: dict[str, Any]) -> bytes: 40 | code = _bytes(codestr) 41 | if code not in _formats: 42 | raise UnknownFormatError(f"Could not find renderer for format {codestr}") 43 | body = _formats[code]["renderer"].render(data) 44 | return code + _delim + body 45 | 46 | 47 | def parse(_data: str | bytes) -> dict[str, Any]: 48 | data = _bytes(_data) 49 | code, body = data.split(_delim, 1) 50 | if code not in _formats: 51 | raise UnknownFormatError("Could not find parser for format %s" % code.decode()) 52 | return _formats[code]["parser"].parse(BytesIO(body)) 53 | 54 | 55 | __all__ = ["register", "unregister", "render", "parse"] 56 | -------------------------------------------------------------------------------- /logpipe/migrations/0005_auto_20180917_1348.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 1.11 on 2018-09-17 13:48 2 | 3 | from django.db import migrations, models 4 | 5 | import logpipe.settings 6 | 7 | 8 | class Migration(migrations.Migration): 9 | dependencies = [ 10 | ("logpipe", "0004_auto_20170502_1403"), 11 | ] 12 | 13 | operations = [ 14 | migrations.AddField( 15 | model_name="kinesisoffset", 16 | name="region", 17 | field=models.CharField( 18 | choices=[ 19 | ("us-east-1", "US East (N. Virginia)"), 20 | ("us-east-2", "US East (Ohio)"), 21 | ("us-west-1", "US West (N. California)"), 22 | ("us-west-2", "US West (Oregon)"), 23 | ("ap-south-1", "Asia Pacific (Mumbai)"), 24 | ("ap-northeast-2", "Asia Pacific (Seoul)"), 25 | ("ap-southeast-1", "Asia Pacific (Singapore)"), 26 | ("ap-southeast-2", "Asia Pacific (Sydney)"), 27 | ("ap-northeast-1", "Asia Pacific (Tokyo)"), 28 | ("ca-central-1", "Canada (Central)"), 29 | ("eu-central-1", "EU (Frankfurt)"), 30 | ("eu-west-1", "EU (Ireland)"), 31 | ("eu-west-2", "EU (London)"), 32 | ("eu-west-3", "EU (Paris)"), 33 | ("sa-east-1", "South America (São Paulo)"), 34 | ("cn-north-1", "China (Beijing)"), 35 | ("us-gov-west-1", "AWS GovCloud (US)"), 36 | ], 37 | default=logpipe.settings.get_aws_region, 38 | help_text="The Kinesis stream region name", 39 | max_length=20, 40 | ), 41 | ), 42 | migrations.AlterUniqueTogether( 43 | name="kinesisoffset", 44 | unique_together={("region", "stream", "shard")}, 45 | ), 46 | ] 47 | -------------------------------------------------------------------------------- /docs/releases.md: -------------------------------------------------------------------------------- 1 | # Release Notes 2 | 3 | ## 1.4.0 4 | 5 | - Drop support for Python 3.10. 6 | - Add support for Python 3.12. 7 | - Add Python type annotations. 8 | - Add support for using [Pydantic](https://docs.pydantic.dev/) models as an alternative to DRF serializers. 9 | 10 | ## 1.3.0 11 | 12 | - Add PRODUCER_ID setting to aid in debugging which systems sent which messages, especially when interrogating logged messages. 13 | 14 | ## 1.2.0 15 | 16 | - Add Python 3.10 and 3.11 to test suite. 17 | - Add Django 4.0 and 4.1 to test suite. 18 | - Drop Python 3.8 from test suite. 19 | - Drop Django 2.2, 3.0, and 3.1 from test suite. 20 | - Added missing DB migrations (though no actual DB changes exist). 21 | 22 | ## 1.1.0 23 | 24 | - Add Python 3.9 to test suite 25 | - Add Django 3.2 to test suite 26 | 27 | ## 1.0.0 28 | 29 | - No changes. 30 | 31 | ## 0.3.2 32 | 33 | - Fix compatibility issue with Django 3.0 34 | 35 | ## 0.3.1 36 | 37 | - Internationalization 38 | 39 | ## 0.3.0 40 | 41 | - In KinesisOffset model, track the AWS region for a stream. This allows a single database to subscribe to multiple streams in different regions, even it they have the same name. 42 | - Improved logic for detecting the current AWS region. 43 | - Add Django 2.1 to tox test suite. 44 | - Add support for Python 3.7. 45 | - Add support for python-kafka 1.4.4. 46 | 47 | ## 0.2.1 48 | 49 | - More robustly handle exceptions thrown by a consumer serializer's `save()` method. 50 | - Improve log messages and levels for invalid or unknown messages. 51 | - Add new method: `logpipe.Consumer.add_ignored_message_type`, which allows the consumer to explicitly ignore specific message types silently. This helps to filter log noise (messages that a consumer really doesn't care about) from actual errors (messages a consumer is skipping, but should be processing). 52 | 53 | ## 0.2.0 54 | 55 | - Added concept of message types. 56 | - Added support for AWS Kinesis. 57 | 58 | ## 0.1.0 59 | 60 | - Initial release. 61 | -------------------------------------------------------------------------------- /logpipe/tests/common.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import ClassVar 3 | from unittest.mock import MagicMock 4 | 5 | from django.test import TestCase 6 | from pydantic import Field 7 | from rest_framework import serializers 8 | 9 | from ..abc import PydanticModel 10 | 11 | TOPIC_STATES = "us-states" 12 | 13 | 14 | class StateSerializer_DRF(serializers.Serializer): 15 | """Keyed Serializer for sending data about US States""" 16 | 17 | MESSAGE_TYPE = "us-state" 18 | VERSION = 1 19 | KEY_FIELD = "code" 20 | code = serializers.CharField(min_length=2, max_length=2) 21 | name = serializers.CharField() 22 | 23 | 24 | class State_Pydantic(PydanticModel): 25 | MESSAGE_TYPE: ClassVar[str] = "us-state" 26 | VERSION: ClassVar[int] = 1 27 | KEY_FIELD: ClassVar[str] = "code" 28 | 29 | code: str = Field( 30 | ..., 31 | max_length=2, 32 | min_length=2, 33 | ) 34 | name: str = "" 35 | 36 | 37 | @dataclass 38 | class StateModel: 39 | id: int | None = None 40 | code: str = "" 41 | name: str = "" 42 | 43 | 44 | class BaseTest(TestCase): 45 | def __init__(self, *args, **kwargs): 46 | super().__init__(*args, **kwargs) 47 | self.serializers = {} 48 | 49 | def mock_state_serializer_drf(self, save=None): 50 | def make(*args, **kwargs): 51 | ser = StateSerializer_DRF(*args, **kwargs) 52 | ser.save = MagicMock() 53 | if save: 54 | ser.save.side_effect = lambda *args, **kwargs: save(ser, *args, **kwargs) 55 | self.serializers["state"] = ser 56 | return ser 57 | 58 | FakeStateSerializer = MagicMock() 59 | FakeStateSerializer.MESSAGE_TYPE = StateSerializer_DRF.MESSAGE_TYPE 60 | FakeStateSerializer.VERSION = StateSerializer_DRF.VERSION 61 | FakeStateSerializer.side_effect = make 62 | 63 | return FakeStateSerializer 64 | 65 | def mock_state_serializer_pydantic(self, save=None): 66 | class MockState_Pydantic(State_Pydantic): 67 | def save(self): 68 | if save: 69 | save(self) 70 | 71 | return MockState_Pydantic 72 | -------------------------------------------------------------------------------- /logpipe/backend/dummy.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import Any 4 | import collections 5 | import time 6 | 7 | from ..abc import ( 8 | ConsumerBackend, 9 | OffsetStoreBackend, 10 | ProducerBackend, 11 | Record, 12 | RecordMetadata, 13 | TopicName, 14 | ) 15 | 16 | _topics: dict[TopicName, collections.deque[Record]] = {} 17 | _offsets: collections.Counter[str] = collections.Counter() 18 | 19 | 20 | def reset_topics() -> None: 21 | global _topics, _offsets 22 | _topics = {} 23 | _offsets = collections.Counter() 24 | return None 25 | 26 | 27 | class Consumer(ConsumerBackend): 28 | def __init__(self, topic_name: str, **kwargs: Any): 29 | self.topic_name = topic_name 30 | 31 | def seek_to_sequence_number(self, shard: str, sequence_number: str | None = None) -> None: 32 | pass 33 | 34 | def __iter__(self) -> Consumer: 35 | return self 36 | 37 | def __next__(self) -> Record: 38 | _records = _topics.get(self.topic_name) 39 | if _records: 40 | try: 41 | return _records.popleft() 42 | except IndexError: 43 | pass 44 | raise StopIteration() 45 | 46 | 47 | class Producer(ProducerBackend): 48 | def send(self, topic_name: TopicName, key: str, value: bytes) -> RecordMetadata | None: 49 | _offsets[topic_name] += 1 50 | record = Record( 51 | topic=topic_name, 52 | partition="0", 53 | offset=_offsets[topic_name], 54 | timestamp=(time.time() * 1000), 55 | key=key, 56 | value=value, 57 | ) 58 | if topic_name not in _topics: 59 | _topics[topic_name] = collections.deque() 60 | _topics[topic_name].append(record) 61 | return RecordMetadata( 62 | topic=topic_name, 63 | partition=record.partition, 64 | offset=str(record.offset), 65 | ) 66 | 67 | 68 | class ModelOffsetStore(OffsetStoreBackend): 69 | def commit(self, consumer: ConsumerBackend, message: Record) -> None: 70 | pass 71 | 72 | def seek(self, consumer: ConsumerBackend, topic: TopicName, partition: str) -> None: 73 | pass 74 | -------------------------------------------------------------------------------- /logpipe/migrations/0003_auto_20170427_1703.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 1.11 on 2017-04-27 17:03 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | dependencies = [ 8 | ("logpipe", "0002_auto_20170427_1451"), 9 | ] 10 | 11 | operations = [ 12 | migrations.CreateModel( 13 | name="KinesisOffset", 14 | fields=[ 15 | ( 16 | "id", 17 | models.AutoField( 18 | auto_created=True, 19 | primary_key=True, 20 | serialize=False, 21 | verbose_name="ID", 22 | ), 23 | ), 24 | ( 25 | "stream", 26 | models.CharField(help_text="The Kinesis stream name", max_length=200), 27 | ), 28 | ( 29 | "shard", 30 | models.CharField(help_text="The Kinesis shard ID", max_length=20), 31 | ), 32 | ( 33 | "sequence_number", 34 | models.CharField( 35 | help_text="The current sequence number in the Kinesis shard", 36 | max_length=20, 37 | ), 38 | ), 39 | ], 40 | options={ 41 | "ordering": ("stream", "shard", "sequence_number"), 42 | }, 43 | ), 44 | migrations.AlterField( 45 | model_name="kafkaoffset", 46 | name="offset", 47 | field=models.PositiveIntegerField(default=0, help_text="The current offset in the Kafka partition"), 48 | ), 49 | migrations.AlterField( 50 | model_name="kafkaoffset", 51 | name="partition", 52 | field=models.PositiveIntegerField(help_text="The Kafka partition identifier"), 53 | ), 54 | migrations.AlterField( 55 | model_name="kafkaoffset", 56 | name="topic", 57 | field=models.CharField(help_text="The Kafka topic name", max_length=200), 58 | ), 59 | migrations.AlterUniqueTogether( 60 | name="kinesisoffset", 61 | unique_together={("stream", "shard")}, 62 | ), 63 | ] 64 | -------------------------------------------------------------------------------- /docs/installation.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | 3 | Install `django-logpipe` from pip. 4 | 5 | ```sh 6 | pip install django-logpipe 7 | ``` 8 | 9 | Add `logpipe` to your installed apps. 10 | 11 | ```py 12 | INSTALLED_APPS = [ 13 | # … 14 | 'logpipe', 15 | # … 16 | ] 17 | ``` 18 | 19 | Add connection settings to your `settings.py` file. If you're using Kafka, this will look like this: 20 | 21 | ```py 22 | LOGPIPE = { 23 | # Required Settings 24 | 'OFFSET_BACKEND': 'logpipe.backend.kafka.ModelOffsetStore', 25 | 'CONSUMER_BACKEND': 'logpipe.backend.kafka.Consumer', 26 | 'PRODUCER_BACKEND': 'logpipe.backend.kafka.Producer', 27 | 'KAFKA_BOOTSTRAP_SERVERS': [ 28 | 'kafka:9092' 29 | ], 30 | 'KAFKA_CONSUMER_KWARGS': { 31 | 'group_id': 'django-logpipe', 32 | }, 33 | 34 | # Optional Settings 35 | 'KAFKA_SEND_TIMEOUT': 10, 36 | 'KAFKA_MAX_SEND_RETRIES': 0, 37 | 'KAFKA_KWARGS': { 38 | # Example for Confluent Cloud 39 | 'security_protocol': 'SASL_SSL', 40 | 'sasl_mechanism': 'PLAIN', 41 | 'sasl_plain_username': '', 42 | 'sasl_plain_password': '', 43 | # …or for OVHCloud 44 | 'security_protocol': 'SSL', 45 | 'ssl_cafile': '', 46 | 'ssl_certfile': '', 47 | 'ssl_keyfile': '', 48 | }, 49 | 'MIN_MESSAGE_LAG_MS': 0, 50 | 'DEFAULT_FORMAT': 'json', 51 | 'PRODUCER_ID': 'my-application-name', 52 | } 53 | ``` 54 | 55 | If you're using AWS Kinesis instead of Kafka, it will look like this: 56 | 57 | ```py 58 | LOGPIPE = { 59 | # Required Settings 60 | 'OFFSET_BACKEND': 'logpipe.backend.kinesis.ModelOffsetStore', 61 | 'CONSUMER_BACKEND': 'logpipe.backend.kinesis.Consumer', 62 | 'PRODUCER_BACKEND': 'logpipe.backend.kinesis.Producer', 63 | 64 | # Optional Settings 65 | # 'KINESIS_REGION': 'us-east-1', 66 | # 'KINESIS_FETCH_LIMIT': 25, 67 | # 'KINESIS_SEQ_NUM_CACHE_SIZE': 1000, 68 | # 'MIN_MESSAGE_LAG_MS': 0, 69 | # 'DEFAULT_FORMAT': 'json', 70 | # 'PRODUCER_ID': 'my-application-name', 71 | # 'KINESIS_SHARD_ITERATOR_TYPE': "LATEST" | "TRIM_HORIZON" (default) 72 | } 73 | ``` 74 | 75 | Run migrations. This will create the model used to store Kafka log position offsets. 76 | 77 | ```sh 78 | python manage.py migrate logpipe 79 | ``` 80 | -------------------------------------------------------------------------------- /.gitlab-ci.yml: -------------------------------------------------------------------------------- 1 | variables: 2 | ADVERTISED_HOST: "spotify__kafka" 3 | ADVERTISED_PORT: "9092" 4 | AUTO_CREATE_TOPICS: "true" 5 | POSTGRES_HOST_AUTH_METHOD: "trust" 6 | 7 | stages: 8 | - test 9 | - release 10 | 11 | services: 12 | - spotify/kafka@sha256:cf8f8f760b48a07fb99df24fab8201ec8b647634751e842b67103a25a388981b 13 | - postgres:latest@sha256:38d5c9d522037d8bf0864c9068e4df2f8a60127c6489ab06f98fdeda535560f9 14 | 15 | cache: 16 | key: "$CI_PROJECT_NAME" 17 | paths: 18 | - $HOME/.cache/pip 19 | 20 | include: 21 | - component: gitlab.com/thelabnyc/thelab-ci-components/review@0.6.2 22 | - component: gitlab.com/thelabnyc/thelab-ci-components/precommit@0.6.2 23 | rules: 24 | - if: $CI_PIPELINE_SOURCE == "schedule" 25 | when: never 26 | - if: $CI_COMMIT_BRANCH && $CI_COMMIT_REF_PROTECTED == "true" 27 | - if: $CI_COMMIT_TAG && $CI_COMMIT_REF_PROTECTED == "true" 28 | - if: '$CI_PIPELINE_SOURCE == "merge_request_event"' 29 | - component: gitlab.com/thelabnyc/thelab-ci-components/publish-gitlab-release@0.6.2 30 | - component: gitlab.com/thelabnyc/thelab-ci-components/publish-to-pypi@0.6.2 31 | 32 | test:lib: 33 | stage: test 34 | image: "registry.gitlab.com/thelabnyc/python:${IMAGE}" 35 | rules: 36 | - if: $CI_COMMIT_BRANCH && $CI_COMMIT_REF_PROTECTED == "true" 37 | - if: $CI_COMMIT_TAG && $CI_COMMIT_REF_PROTECTED == "true" 38 | - if: '$CI_PIPELINE_SOURCE == "merge_request_event"' 39 | script: 40 | - uv sync 41 | - uv run tox 42 | coverage: '/^TOTAL.+?(\d+\%)$/' 43 | parallel: 44 | matrix: 45 | - IMAGE: "3.11" 46 | TOX_SKIP_ENV: "^(?!py311-)" 47 | - IMAGE: "3.12" 48 | TOX_SKIP_ENV: "^(?!py312-)" 49 | - IMAGE: "3.13" 50 | TOX_SKIP_ENV: "^(?!py313-)" 51 | - IMAGE: "3.14" 52 | TOX_SKIP_ENV: "^(?!py314-)" 53 | 54 | test:docs: 55 | stage: test 56 | image: "registry.gitlab.com/thelabnyc/python:3.14@sha256:0e41570605a9add60854b464b5d6af7f367406efc2ee75e6a222da7d3f03d390" 57 | rules: 58 | - if: '$CI_PIPELINE_SOURCE == "merge_request_event"' 59 | script: 60 | - uv sync 61 | - make docs 62 | 63 | pages: 64 | stage: release 65 | image: "registry.gitlab.com/thelabnyc/python:3.14@sha256:0e41570605a9add60854b464b5d6af7f367406efc2ee75e6a222da7d3f03d390" 66 | rules: 67 | - if: $CI_PIPELINE_SOURCE == "schedule" 68 | when: never 69 | - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH && $CI_COMMIT_REF_PROTECTED == "true" 70 | needs: 71 | - test:lib 72 | script: 73 | - uv sync 74 | - make docs 75 | artifacts: 76 | paths: 77 | - public 78 | -------------------------------------------------------------------------------- /logpipe/tests/unit/test_format.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | from django.test import TestCase, override_settings 4 | 5 | from logpipe.constants import FORMAT_PICKLE 6 | from logpipe.exceptions import UnknownFormatError 7 | from logpipe.formats.pickle import PickleParser, PickleRenderer 8 | import logpipe.format 9 | 10 | 11 | class JSONFormatTest(TestCase): 12 | def test_render(self): 13 | msg = logpipe.format.render( 14 | "json", 15 | { 16 | "foo": "bar", 17 | }, 18 | ) 19 | self.assertEqual(msg, b'json:{"foo":"bar"}') 20 | 21 | def test_parse(self): 22 | data = logpipe.format.parse(b'json:{"foo":"bar"}') 23 | self.assertEqual( 24 | data, 25 | { 26 | "foo": "bar", 27 | }, 28 | ) 29 | 30 | 31 | class MsgPackFormatTest(TestCase): 32 | def test_render(self): 33 | msg = logpipe.format.render("msgpack", {"foo": "bar"}) 34 | self.assertEqual(msg, b"msgpack:\x81\xa3foo\xa3bar") 35 | 36 | def test_parse(self): 37 | data = logpipe.format.parse(b"msgpack:\x81\xa3foo\xa3bar") 38 | self.assertEqual( 39 | data, 40 | { 41 | "foo": "bar", 42 | }, 43 | ) 44 | 45 | 46 | class PickleFormatTest(TestCase): 47 | @override_settings(LOGPIPE={"BOOTSTRAP_SERVERS": ["kafka:9092"]}) 48 | def test_default(self): 49 | with self.assertRaises(UnknownFormatError): 50 | logpipe.format.render("pickle", {}) 51 | 52 | def test_render(self): 53 | logpipe.format.register(FORMAT_PICKLE, PickleRenderer(), PickleParser()) 54 | msg = logpipe.format.render("pickle", {"foo": "bar"}) 55 | self.assertTrue(msg.startswith(b"pickle:")) 56 | self.assertEqual(pickle.loads(msg.replace(b"pickle:", b"")), {"foo": "bar"}) 57 | logpipe.format.unregister(FORMAT_PICKLE) 58 | 59 | def test_parse(self): 60 | logpipe.format.register(FORMAT_PICKLE, PickleRenderer(), PickleParser()) 61 | data = logpipe.format.parse(b"pickle:\x80\x03}q\x00X\x03\x00\x00\x00fooq\x01X\x03\x00\x00\x00barq\x02s.") 62 | self.assertEqual( 63 | data, 64 | { 65 | "foo": "bar", 66 | }, 67 | ) 68 | logpipe.format.unregister(FORMAT_PICKLE) 69 | 70 | 71 | class UnknownFormatTest(TestCase): 72 | def test_render(self): 73 | with self.assertRaises(UnknownFormatError): 74 | logpipe.format.render("xml", {}) 75 | 76 | def test_parse(self): 77 | with self.assertRaises(UnknownFormatError): 78 | logpipe.format.parse(b"xml:bar") 79 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | version.txt 2 | public 3 | 4 | # Created by https://www.gitignore.io/api/osx,sublimetext,python,linux,django 5 | 6 | ### OSX ### 7 | *.DS_Store 8 | .AppleDouble 9 | .LSOverride 10 | 11 | # Icon must end with two \r 12 | Icon 13 | 14 | 15 | # Thumbnails 16 | ._* 17 | 18 | # Files that might appear in the root of a volume 19 | .DocumentRevisions-V100 20 | .fseventsd 21 | .Spotlight-V100 22 | .TemporaryItems 23 | .Trashes 24 | .VolumeIcon.icns 25 | .com.apple.timemachine.donotpresent 26 | 27 | # Directories potentially created on remote AFP share 28 | .AppleDB 29 | .AppleDesktop 30 | Network Trash Folder 31 | Temporary Items 32 | .apdisk 33 | 34 | 35 | ### SublimeText ### 36 | # cache files for sublime text 37 | *.tmlanguage.cache 38 | *.tmPreferences.cache 39 | *.stTheme.cache 40 | 41 | # workspace files are user-specific 42 | *.sublime-workspace 43 | 44 | # project files should be checked into the repository, unless a significant 45 | # proportion of contributors will probably not be using SublimeText 46 | # *.sublime-project 47 | 48 | # sftp configuration file 49 | sftp-config.json 50 | 51 | # Package control specific files 52 | Package Control.last-run 53 | Package Control.ca-list 54 | Package Control.ca-bundle 55 | Package Control.system-ca-bundle 56 | Package Control.cache/ 57 | Package Control.ca-certs/ 58 | bh_unicode_properties.cache 59 | 60 | # Sublime-github package stores a github token in this file 61 | # https://packagecontrol.io/packages/sublime-github 62 | GitHub.sublime-settings 63 | 64 | 65 | ### Python ### 66 | # Byte-compiled / optimized / DLL files 67 | __pycache__/ 68 | *.py[cod] 69 | *$py.class 70 | 71 | # C extensions 72 | *.so 73 | 74 | # Distribution / packaging 75 | .Python 76 | env/ 77 | build/ 78 | develop-eggs/ 79 | dist/ 80 | downloads/ 81 | eggs/ 82 | .eggs/ 83 | lib/ 84 | lib64/ 85 | parts/ 86 | sdist/ 87 | var/ 88 | *.egg-info/ 89 | .installed.cfg 90 | *.egg 91 | 92 | # PyInstaller 93 | # Usually these files are written by a python script from a template 94 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 95 | *.manifest 96 | *.spec 97 | 98 | # Installer logs 99 | pip-log.txt 100 | pip-delete-this-directory.txt 101 | 102 | # Unit test / coverage reports 103 | htmlcov/ 104 | .tox/ 105 | .coverage 106 | .coverage.* 107 | .cache 108 | nosetests.xml 109 | coverage.xml 110 | *,cover 111 | .hypothesis/ 112 | 113 | # Translations 114 | *.pot 115 | 116 | # Django stuff: 117 | *.log 118 | local_settings.py 119 | 120 | # Flask stuff: 121 | instance/ 122 | .webassets-cache 123 | 124 | # Scrapy stuff: 125 | .scrapy 126 | 127 | # Sphinx documentation 128 | docs/_build/ 129 | 130 | # PyBuilder 131 | target/ 132 | 133 | # IPython Notebook 134 | .ipynb_checkpoints 135 | 136 | # pyenv 137 | .python-version 138 | 139 | # celery beat schedule file 140 | celerybeat-schedule 141 | 142 | # dotenv 143 | .env 144 | 145 | # virtualenv 146 | venv/ 147 | ENV/ 148 | 149 | # Spyder project settings 150 | .spyderproject 151 | 152 | # Rope project settings 153 | .ropeproject 154 | 155 | 156 | ### Linux ### 157 | *~ 158 | 159 | # temporary files which can be created if a process still has a handle open of a deleted file 160 | .fuse_hidden* 161 | 162 | # KDE directory preferences 163 | .directory 164 | 165 | # Linux trash folder which might appear on any partition or disk 166 | .Trash-* 167 | 168 | 169 | ### Django ### 170 | *.log 171 | *.pot 172 | *.pyc 173 | __pycache__/ 174 | local_settings.py 175 | db.sqlite3 176 | media 177 | -------------------------------------------------------------------------------- /logpipe/abc.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from abc import abstractmethod 4 | from collections.abc import Iterable, Mapping 5 | from enum import Enum, auto 6 | from typing import ( 7 | IO, 8 | Any, 9 | ClassVar, 10 | Literal, 11 | NamedTuple, 12 | Protocol, 13 | TypeGuard, 14 | TypeVar, 15 | ) 16 | 17 | from django.db import models 18 | from pydantic import BaseModel 19 | from rest_framework import serializers 20 | 21 | MessageType = str 22 | MessageVersion = int 23 | TopicName = str 24 | 25 | _IN = TypeVar("_IN", bound=models.Model) # Instance Type 26 | 27 | 28 | class Record(NamedTuple): 29 | topic: str 30 | partition: str 31 | offset: str | int 32 | timestamp: int | float 33 | key: str 34 | value: str | bytes 35 | 36 | 37 | class RecordMetadata(NamedTuple): 38 | topic: str 39 | partition: str 40 | offset: str 41 | 42 | 43 | class ConsumerBackend(Iterable[Record]): 44 | topic_name: TopicName 45 | 46 | def __init__(self, topic_name: TopicName, **kwargs: Any): 47 | pass 48 | 49 | def seek_to_sequence_number(self, shard: str, sequence_number: str | None = None) -> None: 50 | raise NotImplementedError() 51 | 52 | @abstractmethod 53 | def __iter__(self) -> ConsumerBackend: 54 | pass 55 | 56 | @abstractmethod 57 | def __next__(self) -> Record: 58 | pass 59 | 60 | 61 | class ProducerBackend(Protocol): 62 | def send(self, topic_name: TopicName, key: str, value: bytes) -> RecordMetadata | None: 63 | pass 64 | 65 | 66 | class OffsetStoreBackend(Protocol): 67 | def commit(self, consumer: ConsumerBackend, message: Record) -> None: 68 | pass 69 | 70 | def seek(self, consumer: ConsumerBackend, topic: TopicName, partition: str) -> None: 71 | pass 72 | 73 | 74 | class Renderer(Protocol): 75 | media_type: str 76 | format: str 77 | charset: str | None 78 | render_style: str 79 | 80 | def render( 81 | self, 82 | data: dict[str, Any], 83 | media_type: str | None = None, 84 | renderer_context: Mapping[str, Any] | None = None, 85 | ) -> bytes: 86 | pass 87 | 88 | 89 | class Parser(Protocol): 90 | media_type: str 91 | 92 | def parse( 93 | self, 94 | stream: IO[Any], 95 | media_type: str | None = None, 96 | parser_context: Mapping[str, Any] | None = None, 97 | ) -> dict[str, Any]: 98 | pass 99 | 100 | 101 | class SerializerType(Enum): 102 | DRF = auto() 103 | PYDANTIC = auto() 104 | 105 | 106 | class DRFSerializer(serializers.Serializer[_IN]): 107 | _tag: ClassVar[Literal[SerializerType.DRF]] = SerializerType.DRF 108 | MESSAGE_TYPE: ClassVar[str] 109 | VERSION: ClassVar[int] 110 | KEY_FIELD: ClassVar[str] 111 | 112 | @classmethod 113 | def lookup_instance(cls, **kwargs: Any) -> _IN | None: 114 | raise NotImplementedError() 115 | 116 | 117 | class PydanticModel(BaseModel): 118 | _tag: ClassVar[Literal[SerializerType.PYDANTIC]] = SerializerType.PYDANTIC 119 | MESSAGE_TYPE: ClassVar[str] 120 | VERSION: ClassVar[int] 121 | KEY_FIELD: ClassVar[str] 122 | 123 | def save(self) -> Any: 124 | raise NotImplementedError() 125 | 126 | 127 | SerializerClass = type[DRFSerializer[Any]] | type[PydanticModel] 128 | Serializer = DRFSerializer[Any] | PydanticModel 129 | 130 | 131 | def is_pydantic_serializer_class( 132 | cls: SerializerClass, 133 | ) -> TypeGuard[type[PydanticModel]]: 134 | return hasattr(cls, "_tag") and cls._tag == SerializerType.PYDANTIC 135 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["hatchling"] 3 | build-backend = "hatchling.build" 4 | 5 | [project] 6 | name = "django-logpipe" 7 | version = "1.5.1" 8 | description = "Move data around between Python services using Kafka and/or AWS Kinesis and Django Rest Framework serializers." 9 | readme = "README.md" 10 | dependencies = [ 11 | "Django (>=5.2)", 12 | "djangorestframework (>=3.16.1)", 13 | "lru-dict (>=1.4.1)", 14 | "pydantic (>=2.12.5,<3)", 15 | "kafka-python (>=2.3.0,<3)", 16 | "boto3 (>=1.42.8,<2)", 17 | "msgpack (>=1.1.2,<2)", 18 | ] 19 | requires-python = ">=3.11" 20 | 21 | [project.license] 22 | text = "ISC" 23 | 24 | [[project.authors]] 25 | name = "thelab" 26 | email = "thelabdev@thelab.co" 27 | 28 | [project.urls] 29 | Homepage = "https://gitlab.com/thelabnyc/django-logpipe" 30 | Repository = "https://gitlab.com/thelabnyc/django-logpipe" 31 | 32 | [dependency-groups] 33 | dev = [ 34 | "coverage==7.13.0", 35 | "ruff (>=0.14.10)", 36 | "moto (==5.1.18)", 37 | "boto3 (>=1.42.8,<2)", 38 | "boto3-stubs[kinesis] (>=1.42.8,<2)", 39 | "botocore-stubs (>=1.42.8,<2)", 40 | "psycopg2-binary (==2.9.11)", 41 | "tox (>=4.32.0)", 42 | "pytz (==2025.2)", 43 | "kafka-python (==2.3.0)", 44 | "mypy (==1.19.1)", 45 | "django-stubs (==5.2.8)", 46 | "djangorestframework-stubs (==3.16.6)", 47 | "msgpack-types (==0.5.0)", 48 | "tox-uv (>=1.29.0)", 49 | "mkdocs (>=1.6.1,<2)", 50 | "pymdown-extensions (>=10.19.1,<11)", 51 | "mkdocs-material (>=9.7.1,<10)", 52 | "mkautodoc (>=0.2.0,<0.3)", 53 | ] 54 | 55 | [tool.hatch.build.targets.wheel] 56 | packages = ["logpipe"] 57 | 58 | [[tool.uv.index]] 59 | name = "thelabnyc" 60 | url = "https://gitlab.com/api/v4/groups/269576/-/packages/pypi/simple" 61 | explicit = true 62 | 63 | [tool.mypy] 64 | python_version = "3.11" 65 | plugins = ["mypy_django_plugin.main", "mypy_drf_plugin.main"] 66 | 67 | # Strict mode, see mypy --help 68 | warn_unused_configs = true 69 | disallow_subclassing_any = true 70 | disallow_any_generics = true 71 | disallow_untyped_calls = true 72 | disallow_untyped_defs = true 73 | disallow_incomplete_defs = true 74 | check_untyped_defs = true 75 | disallow_untyped_decorators = true 76 | no_implicit_optional = true 77 | warn_redundant_casts = true 78 | warn_unused_ignores = true 79 | # warn_return_any = true 80 | no_implicit_reexport = true 81 | show_error_codes = true 82 | # Not turned on by strict 83 | strict_equality = true 84 | 85 | [[tool.mypy.overrides]] 86 | module = "kafka.*" 87 | ignore_missing_imports = true 88 | 89 | [[tool.mypy.overrides]] 90 | module = "logpipe.tests.*" 91 | ignore_errors = true 92 | 93 | 94 | [tool.django-stubs] 95 | django_settings_module = "sandbox.settings" 96 | 97 | [tool.isort] 98 | profile = "black" 99 | from_first = true 100 | 101 | [tool.ruff] 102 | line-length = 160 103 | 104 | [tool.ruff.lint.isort] 105 | from-first = true 106 | 107 | [tool.coverage.run] 108 | branch = true 109 | source_pkgs = ["logpipe"] 110 | omit = ["*/migrations/*", "*/snapshots/*", "*/tests/*"] 111 | 112 | [tool.coverage.report] 113 | show_missing = true 114 | ignore_errors = true 115 | 116 | 117 | [tool.commitizen] 118 | name = "cz_conventional_commits" 119 | annotated_tag = true 120 | gpg_sign = true 121 | tag_format = "v$version" 122 | update_changelog_on_bump = true 123 | changelog_merge_prerelease = true 124 | version_provider = "pep621" 125 | version_scheme = "pep440" 126 | version_files = ["pyproject.toml:version"] 127 | pre_bump_hooks = ["pre-commit run --all-files || true"] 128 | post_bump_hooks = ["git push origin master $CZ_POST_CURRENT_TAG_VERSION"] 129 | -------------------------------------------------------------------------------- /logpipe/tests/unit/kafka/test_producer.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import MagicMock, patch 2 | import binascii 3 | 4 | from django.test import TestCase, override_settings 5 | from kafka.consumer.fetcher import ConsumerRecord 6 | 7 | from logpipe import Producer 8 | from logpipe.tests.common import TOPIC_STATES, StateModel, StateSerializer_DRF 9 | 10 | LOGPIPE = { 11 | "KAFKA_BOOTSTRAP_SERVERS": ["kafka:9092"], 12 | "KAFKA_SEND_TIMEOUT": 5, 13 | "KAFKA_MAX_SEND_RETRIES": 5, 14 | } 15 | 16 | 17 | class DRFProducerTest(TestCase): 18 | @override_settings(LOGPIPE=LOGPIPE) 19 | @patch("kafka.KafkaProducer") 20 | def test_normal_send(self, KafkaProducer): 21 | future = MagicMock() 22 | future.get.return_value = self._get_record_metadata() 23 | 24 | def test_send_call(topic, key, value): 25 | self.assertEqual(topic, "us-states") 26 | self.assertEqual(key, b"NY") 27 | self.assertIn(b"json:", value) 28 | self.assertIn(b'"message":{"', value) 29 | self.assertIn(b'"code":"NY"', value) 30 | self.assertIn(b'"name":"New York"', value) 31 | self.assertIn(b'"version":1', value) 32 | return future 33 | 34 | client = MagicMock() 35 | client.send.side_effect = test_send_call 36 | KafkaProducer.return_value = client 37 | 38 | producer = Producer(TOPIC_STATES, StateSerializer_DRF) 39 | ret = producer.send({"code": "NY", "name": "New York"}) 40 | self.assertEqual(ret.topic, TOPIC_STATES) 41 | self.assertEqual(ret.partition, 0) 42 | self.assertEqual(ret.offset, 42) 43 | self.assertEqual(KafkaProducer.call_count, 1) 44 | self.assertEqual(client.send.call_count, 1) 45 | self.assertEqual(future.get.call_count, 1) 46 | KafkaProducer.assert_called_with(bootstrap_servers=["kafka:9092"], retries=5) 47 | future.get.assert_called_with(timeout=5) 48 | 49 | @override_settings(LOGPIPE=LOGPIPE) 50 | @patch("kafka.KafkaProducer") 51 | def test_object_send(self, KafkaProducer): 52 | future = MagicMock() 53 | future.get.return_value = self._get_record_metadata() 54 | 55 | def test_send_call(topic, key, value): 56 | self.assertEqual(topic, "us-states") 57 | self.assertEqual(key, b"NY") 58 | self.assertIn(b"json:", value) 59 | self.assertIn(b'"message":{"', value) 60 | self.assertIn(b'"code":"NY"', value) 61 | self.assertIn(b'"name":"New York"', value) 62 | self.assertIn(b'"version":1', value) 63 | return future 64 | 65 | client = MagicMock() 66 | client.send.side_effect = test_send_call 67 | KafkaProducer.return_value = client 68 | 69 | producer = Producer(TOPIC_STATES, StateSerializer_DRF) 70 | obj = StateModel( 71 | code="NY", 72 | name="New York", 73 | ) 74 | ret = producer.send(obj) 75 | self.assertEqual(ret.topic, TOPIC_STATES) 76 | self.assertEqual(ret.partition, 0) 77 | self.assertEqual(ret.offset, 42) 78 | self.assertEqual(KafkaProducer.call_count, 1) 79 | self.assertEqual(client.send.call_count, 1) 80 | self.assertEqual(future.get.call_count, 1) 81 | KafkaProducer.assert_called_with(bootstrap_servers=["kafka:9092"], retries=5) 82 | future.get.assert_called_with(timeout=5) 83 | 84 | def _get_record_metadata(self): 85 | return ConsumerRecord( 86 | topic=TOPIC_STATES, 87 | partition=0, 88 | leader_epoch=-1, 89 | offset=42, 90 | timestamp=1467649216540, 91 | timestamp_type=0, 92 | key=b"NY", 93 | value=b"foo", 94 | headers=None, 95 | checksum=binascii.crc32(b"foo"), 96 | serialized_key_size=b"NY", 97 | serialized_value_size=b"foo", 98 | serialized_header_size=0, 99 | ) 100 | -------------------------------------------------------------------------------- /sandbox/settings.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from django.utils.translation import gettext_lazy as _ 4 | import django_stubs_ext 5 | 6 | django_stubs_ext.monkeypatch() 7 | 8 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 9 | 10 | DEBUG = True 11 | SECRET_KEY = "li0$-gnv)76g$yf7p@(cg-^_q7j6df5cx$o-gsef5hd68phj!4" 12 | SITE_ID = 1 13 | ROOT_URLCONF = "sandbox.urls" 14 | ALLOWED_HOSTS = ["*"] 15 | 16 | USE_I18N = True 17 | LANGUAGE_CODE = "en-us" 18 | LANGUAGES = ( 19 | ("en-us", _("English")), 20 | ("es", _("Spanish")), 21 | ) 22 | 23 | INSTALLED_APPS = [ 24 | "django.contrib.admin", 25 | "django.contrib.auth", 26 | "django.contrib.contenttypes", 27 | "django.contrib.sessions", 28 | "django.contrib.sites", 29 | "django.contrib.messages", 30 | "django.contrib.staticfiles", 31 | "django.contrib.flatpages", 32 | "logpipe", 33 | "sandbox.lptester", 34 | ] 35 | 36 | MIDDLEWARE = ( 37 | "django.contrib.sessions.middleware.SessionMiddleware", 38 | "django.middleware.locale.LocaleMiddleware", 39 | "django.middleware.common.CommonMiddleware", 40 | "django.middleware.csrf.CsrfViewMiddleware", 41 | "django.contrib.auth.middleware.AuthenticationMiddleware", 42 | "django.contrib.messages.middleware.MessageMiddleware", 43 | "django.middleware.clickjacking.XFrameOptionsMiddleware", 44 | "django.middleware.security.SecurityMiddleware", 45 | "django.contrib.flatpages.middleware.FlatpageFallbackMiddleware", 46 | ) 47 | 48 | AUTHENTICATION_BACKENDS = ("django.contrib.auth.backends.ModelBackend",) 49 | 50 | TEMPLATES = [ 51 | { 52 | "BACKEND": "django.template.backends.django.DjangoTemplates", 53 | "DIRS": [], 54 | "APP_DIRS": True, 55 | "OPTIONS": { 56 | "context_processors": [ 57 | "django.template.context_processors.debug", 58 | "django.template.context_processors.request", 59 | "django.contrib.auth.context_processors.auth", 60 | "django.contrib.messages.context_processors.messages", 61 | "django.template.context_processors.i18n", 62 | ], 63 | }, 64 | }, 65 | ] 66 | 67 | DEFAULT_AUTO_FIELD = "django.db.models.AutoField" 68 | DATABASES = { 69 | "default": { 70 | "ENGINE": "django.db.backends.postgresql", 71 | "NAME": "postgres", 72 | "USER": "postgres", 73 | "PASSWORD": "", 74 | "HOST": "postgres", 75 | "PORT": 5432, 76 | } 77 | } 78 | 79 | 80 | STATIC_URL = "/static/" 81 | 82 | 83 | LOGPIPE = { 84 | "KAFKA_BOOTSTRAP_SERVERS": ["spotify__kafka:9092"], 85 | "KAFKA_CONSUMER_KWARGS": { 86 | "group_id": "django-logpipe", 87 | }, 88 | # OFFSET_BACKEND: Defaults to logpipe.backend.kafka.ModelOffsetStore. 89 | # CONSUMER_BACKEND: Defaults to logpipe.backend.kafka.Consumer. 90 | # PRODUCER_BACKEND: Defaults to logpipe.backend.kafka.Producer. 91 | # KAFKA_BOOTSTRAP_SERVERS: List of Kafka hostname:post pairs. Required when using Kafka. 92 | # KAFKA_SEND_TIMEOUT: Defaults to 10 seconds. 93 | # KAFKA_MAX_SEND_RETRIES: Defaults to 0 retry attempts. 94 | # KINESIS_REGION: Defaults to 'us-east-1'. 95 | # KINESIS_FETCH_LIMIT: Defaults to 25 records. 96 | # KINESIS_SEQ_NUM_CACHE_SIZE: Defaults to 1000. 97 | # MIN_MESSAGE_LAG_MS: Defaults to 0ms 98 | # DEFAULT_FORMAT: Defaults to 'json' 99 | } 100 | 101 | 102 | LOGGING = { 103 | "version": 1, 104 | "disable_existing_loggers": False, 105 | "formatters": { 106 | "verbose": { 107 | "format": "%(asctime)s django %(name)s: %(levelname)s %(process)d %(thread)d %(message)s", 108 | "datefmt": "%Y-%m-%dT%H:%M:%S", 109 | }, 110 | }, 111 | "handlers": {"console": {"class": "logging.StreamHandler", "formatter": "verbose"}}, 112 | "loggers": { 113 | "logpipe": { 114 | "level": "CRITICAL", 115 | } 116 | }, 117 | "root": { 118 | "handlers": ["console"], 119 | "level": "CRITICAL", 120 | }, 121 | } 122 | -------------------------------------------------------------------------------- /logpipe/producer.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Generic, TypeVar 2 | import logging 3 | 4 | from django.db import models 5 | from pydantic import RootModel 6 | 7 | from . import settings 8 | from .abc import DRFSerializer, ProducerBackend, PydanticModel, RecordMetadata 9 | from .backend import get_producer_backend 10 | from .constants import FORMAT_JSON 11 | from .format import render 12 | 13 | logger = logging.getLogger(__name__) 14 | 15 | 16 | _DRFSerType = TypeVar("_DRFSerType", bound=type[DRFSerializer[Any]]) 17 | 18 | 19 | class BaseProducer: 20 | client: ProducerBackend 21 | topic_name: str 22 | producer_id: str 23 | 24 | def __init__( 25 | self, 26 | topic_name: str, 27 | producer_id: str | None = None, 28 | ): 29 | self.client = get_producer_backend() 30 | self.topic_name = topic_name 31 | self.producer_id = producer_id if producer_id else settings.get("PRODUCER_ID", "") 32 | 33 | def _inner_send( 34 | self, 35 | message_type: str, 36 | version: int, 37 | key: str, 38 | data: Any, 39 | ) -> RecordMetadata | None: 40 | # Render everything into a string 41 | renderer = settings.get("DEFAULT_FORMAT", FORMAT_JSON) 42 | body = { 43 | "type": message_type, 44 | "version": version, 45 | "message": data, 46 | } 47 | if self.producer_id: 48 | body["producer"] = self.producer_id 49 | serialized_data = render(renderer, body) 50 | 51 | # Send the message data into the backend 52 | record_metadata = self.client.send( 53 | self.topic_name, 54 | key=key, 55 | value=serialized_data, 56 | ) 57 | logger.debug(f'Sent message with type "{message_type}", key "{key}" to topic "{self.topic_name}"') 58 | return record_metadata 59 | 60 | 61 | class DRFProducer(BaseProducer, Generic[_DRFSerType]): 62 | """ 63 | Producer class for sending messages that are serialized using a Django Rest 64 | Framework serializer. 65 | """ 66 | 67 | serializer_class: _DRFSerType 68 | 69 | def __init__( 70 | self, 71 | topic_name: str, 72 | serializer_class: _DRFSerType, 73 | producer_id: str | None = None, 74 | ): 75 | super().__init__(topic_name, producer_id) 76 | self.serializer_class = serializer_class 77 | 78 | def send(self, instance: dict[str, Any] | models.Model) -> RecordMetadata | None: 79 | """ 80 | Serialize the given object using the previously specified serializer, then 81 | write it to the log backend (Kafka or Kinesis). 82 | """ 83 | # Get the message type and version 84 | message_type = self.serializer_class.MESSAGE_TYPE 85 | version = self.serializer_class.VERSION 86 | 87 | # Init the serializer 88 | ser = self.serializer_class(instance=instance) 89 | 90 | # Get the message's partition key 91 | key_field = getattr(self.serializer_class, "KEY_FIELD", None) 92 | key = "" 93 | if key_field: 94 | key = str(ser.data[key_field]) 95 | 96 | # Send 97 | return self._inner_send( 98 | message_type=message_type, 99 | version=version, 100 | key=key, 101 | data=ser.data, 102 | ) 103 | 104 | 105 | # For backwards compatibility 106 | Producer = DRFProducer 107 | 108 | 109 | class PydanticProducer(BaseProducer): 110 | def send(self, instance: PydanticModel) -> RecordMetadata | None: 111 | # Get the message's partition key 112 | key_field = getattr(instance, "KEY_FIELD", None) 113 | key = "" 114 | if key_field: 115 | keyobj = getattr(instance, key_field) 116 | if isinstance(keyobj, RootModel): 117 | keyobj = keyobj.model_dump(mode="json") 118 | key = str(keyobj) 119 | 120 | # Send 121 | return self._inner_send( 122 | message_type=instance.MESSAGE_TYPE, 123 | version=instance.VERSION, 124 | key=key, 125 | data=instance.model_dump(mode="json"), 126 | ) 127 | -------------------------------------------------------------------------------- /logpipe/migrations/0006_alter_kafkaoffset_options_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 4.1.7 on 2023-03-15 11:45 2 | 3 | from django.db import migrations, models 4 | 5 | import logpipe.settings 6 | 7 | 8 | class Migration(migrations.Migration): 9 | dependencies = [ 10 | ("logpipe", "0005_auto_20180917_1348"), 11 | ] 12 | 13 | operations = [ 14 | migrations.AlterModelOptions( 15 | name="kafkaoffset", 16 | options={ 17 | "ordering": ("topic", "partition", "offset"), 18 | "verbose_name": "Kafka Offset", 19 | "verbose_name_plural": "Kafka Offsets", 20 | }, 21 | ), 22 | migrations.AlterModelOptions( 23 | name="kinesisoffset", 24 | options={ 25 | "ordering": ("stream", "shard", "sequence_number"), 26 | "verbose_name": "AWS Kinesis Offset", 27 | "verbose_name_plural": "AWS Kinesis Offsets", 28 | }, 29 | ), 30 | migrations.AlterField( 31 | model_name="kafkaoffset", 32 | name="offset", 33 | field=models.PositiveIntegerField( 34 | default=0, 35 | help_text="The current offset in the Kafka partition", 36 | verbose_name="Kafka Offset", 37 | ), 38 | ), 39 | migrations.AlterField( 40 | model_name="kafkaoffset", 41 | name="partition", 42 | field=models.PositiveIntegerField( 43 | help_text="The Kafka partition identifier", 44 | verbose_name="Kafka Partition ID", 45 | ), 46 | ), 47 | migrations.AlterField( 48 | model_name="kafkaoffset", 49 | name="topic", 50 | field=models.CharField( 51 | help_text="The Kafka topic name", 52 | max_length=200, 53 | verbose_name="Kafka Topic Name", 54 | ), 55 | ), 56 | migrations.AlterField( 57 | model_name="kinesisoffset", 58 | name="region", 59 | field=models.CharField( 60 | choices=[ 61 | ("us-east-1", "US East (N. Virginia)"), 62 | ("us-east-2", "US East (Ohio)"), 63 | ("us-west-1", "US West (N. California)"), 64 | ("us-west-2", "US West (Oregon)"), 65 | ("ap-south-1", "Asia Pacific (Mumbai)"), 66 | ("ap-northeast-2", "Asia Pacific (Seoul)"), 67 | ("ap-southeast-1", "Asia Pacific (Singapore)"), 68 | ("ap-southeast-2", "Asia Pacific (Sydney)"), 69 | ("ap-northeast-1", "Asia Pacific (Tokyo)"), 70 | ("ca-central-1", "Canada (Central)"), 71 | ("eu-central-1", "EU (Frankfurt)"), 72 | ("eu-west-1", "EU (Ireland)"), 73 | ("eu-west-2", "EU (London)"), 74 | ("eu-west-3", "EU (Paris)"), 75 | ("sa-east-1", "South America (São Paulo)"), 76 | ("cn-north-1", "China (Beijing)"), 77 | ("us-gov-west-1", "AWS GovCloud (US)"), 78 | ], 79 | default=logpipe.settings.get_aws_region, 80 | help_text="The Kinesis stream region name", 81 | max_length=20, 82 | verbose_name="AWS Region", 83 | ), 84 | ), 85 | migrations.AlterField( 86 | model_name="kinesisoffset", 87 | name="sequence_number", 88 | field=models.CharField( 89 | help_text="The current sequence number in the Kinesis shard", 90 | max_length=200, 91 | verbose_name="Kinesis Sequence Number", 92 | ), 93 | ), 94 | migrations.AlterField( 95 | model_name="kinesisoffset", 96 | name="shard", 97 | field=models.CharField( 98 | help_text="The Kinesis shard ID", 99 | max_length=200, 100 | verbose_name="Kinesis Shard ID", 101 | ), 102 | ), 103 | migrations.AlterField( 104 | model_name="kinesisoffset", 105 | name="stream", 106 | field=models.CharField( 107 | help_text="The Kinesis stream name", 108 | max_length=200, 109 | verbose_name="Kinesis Stream Name", 110 | ), 111 | ), 112 | ] 113 | -------------------------------------------------------------------------------- /logpipe/tests/unit/test_consumer.py: -------------------------------------------------------------------------------- 1 | import collections 2 | 3 | from django.test import override_settings 4 | 5 | from logpipe import Consumer, DRFProducer, MultiConsumer, PydanticProducer 6 | from logpipe.backend.dummy import reset_topics 7 | from logpipe.tests.common import ( 8 | TOPIC_STATES, 9 | BaseTest, 10 | State_Pydantic, 11 | StateModel, 12 | StateSerializer_DRF, 13 | ) 14 | 15 | LOGPIPE = { 16 | "OFFSET_BACKEND": "logpipe.backend.dummy.ModelOffsetStore", 17 | "PRODUCER_BACKEND": "logpipe.backend.dummy.Producer", 18 | "CONSUMER_BACKEND": "logpipe.backend.dummy.Consumer", 19 | } 20 | 21 | 22 | class DRFConsumerTest(BaseTest): 23 | def setUp(self): 24 | super().setUp() 25 | reset_topics() 26 | 27 | @override_settings(LOGPIPE=LOGPIPE) 28 | def test_normal_consume(self): 29 | # Send a message to the dummy producer 30 | producer = DRFProducer(TOPIC_STATES, StateSerializer_DRF) 31 | ny = StateModel( 32 | id=5, 33 | code="NY", 34 | name="New York", 35 | ) 36 | producer.send(ny) 37 | 38 | # Setup the consumer serializer 39 | test = collections.Counter() 40 | 41 | def save(ser): 42 | self.assertEqual(ser.validated_data["code"], "NY") 43 | self.assertEqual(ser.validated_data["name"], "New York") 44 | test["i"] += 1 45 | 46 | FakeStateSerializer = self.mock_state_serializer_drf(save) 47 | 48 | # Retrieve the message from the dummy consumer. 49 | consumer = Consumer(TOPIC_STATES) 50 | consumer.register(FakeStateSerializer) 51 | 52 | consumer.run(iter_limit=10) 53 | self.assertEqual(self.serializers["state"].save.call_count, 1) 54 | 55 | # Not called again 56 | consumer.run(iter_limit=10) 57 | self.assertEqual(self.serializers["state"].save.call_count, 1) 58 | self.assertEqual(test["i"], 1) 59 | 60 | 61 | class PydanticConsumerTest(BaseTest): 62 | def setUp(self): 63 | super().setUp() 64 | reset_topics() 65 | 66 | @override_settings(LOGPIPE=LOGPIPE) 67 | def test_normal_consume(self): 68 | # Send a message to the dummy producer 69 | producer = PydanticProducer(TOPIC_STATES) 70 | ny = State_Pydantic( 71 | id=5, 72 | code="NY", 73 | name="New York", 74 | ) 75 | producer.send(ny) 76 | 77 | # Setup the consumer serializer 78 | test = collections.Counter() 79 | 80 | def save(_self): 81 | self.assertEqual(_self._instance, None) 82 | self.assertEqual(_self.code, "NY") 83 | self.assertEqual(_self.name, "New York") 84 | test["i"] += 1 85 | 86 | FakeStateSerializer = self.mock_state_serializer_pydantic(save) 87 | 88 | # Retrieve the message from the dummy consumer. 89 | consumer = Consumer(TOPIC_STATES) 90 | consumer.register(FakeStateSerializer) 91 | 92 | # Save called once. 93 | consumer.run(iter_limit=10) 94 | self.assertEqual(test["i"], 1) 95 | 96 | # Not called again 97 | consumer.run(iter_limit=10) 98 | self.assertEqual(test["i"], 1) 99 | 100 | 101 | class MultiConsumerTest(BaseTest): 102 | def setUp(self): 103 | super().setUp() 104 | reset_topics() 105 | 106 | @override_settings(LOGPIPE=LOGPIPE) 107 | def test_normal_consume(self): 108 | # Send a message to the dummy producer 109 | producer = PydanticProducer(TOPIC_STATES) 110 | ny = State_Pydantic( 111 | id=5, 112 | code="NY", 113 | name="New York", 114 | ) 115 | for i in range(5): 116 | producer.send(ny) 117 | 118 | # Setup the consumer serializer 119 | test = collections.Counter() 120 | 121 | def save(_self): 122 | self.assertEqual(_self._instance, None) 123 | self.assertEqual(_self.code, "NY") 124 | self.assertEqual(_self.name, "New York") 125 | test["i"] += 1 126 | 127 | FakeStateSerializer = self.mock_state_serializer_pydantic(save) 128 | 129 | # Retrieve the message from the dummy consumer. 130 | inner_consumer = Consumer(TOPIC_STATES) 131 | inner_consumer.register(FakeStateSerializer) 132 | consumer = MultiConsumer(inner_consumer, inner_consumer) 133 | 134 | # Save called once. 135 | consumer.run(iter_limit=10) 136 | self.assertEqual(test["i"], 5) 137 | -------------------------------------------------------------------------------- /logpipe/models.py: -------------------------------------------------------------------------------- 1 | from django.db import models 2 | from django.utils.translation import gettext_lazy as _ 3 | 4 | from . import settings 5 | 6 | 7 | class KafkaOffset(models.Model): 8 | # Translators: Internal Model Field Name 9 | topic = models.CharField( 10 | _("Kafka Topic Name"), 11 | # Translators: Interal Model Field Help Text 12 | help_text=_("The Kafka topic name"), 13 | max_length=200, 14 | ) 15 | 16 | # Translators: Internal Model Field Name 17 | partition = models.PositiveIntegerField( 18 | _("Kafka Partition ID"), 19 | # Translators: Interal Model Field Help Text 20 | help_text=_("The Kafka partition identifier"), 21 | ) 22 | 23 | # Translators: Internal Model Field Name 24 | offset = models.PositiveIntegerField( 25 | _("Kafka Offset"), 26 | # Translators: Interal Model Field Help Text 27 | help_text=_("The current offset in the Kafka partition"), 28 | default=0, 29 | ) 30 | 31 | class Meta: 32 | # Translators: Internal Model Name (singular) 33 | verbose_name = _("Kafka Offset") 34 | # Translators: Internal Model Name (plural) 35 | verbose_name_plural = _("Kafka Offsets") 36 | unique_together = ("topic", "partition") 37 | ordering = ("topic", "partition", "offset") 38 | 39 | def __str__(self) -> str: 40 | return f'topic="{self.topic}", partition="{self.partition}", offset="{self.offset}"' 41 | 42 | 43 | class KinesisOffset(models.Model): 44 | _region_choices = ( 45 | # Translators: AWS Region Name 46 | ("us-east-1", _("US East (N. Virginia)")), 47 | # Translators: AWS Region Name 48 | ("us-east-2", _("US East (Ohio)")), 49 | # Translators: AWS Region Name 50 | ("us-west-1", _("US West (N. California)")), 51 | # Translators: AWS Region Name 52 | ("us-west-2", _("US West (Oregon)")), 53 | # Translators: AWS Region Name 54 | ("ap-south-1", _("Asia Pacific (Mumbai)")), 55 | # Translators: AWS Region Name 56 | ("ap-northeast-2", _("Asia Pacific (Seoul)")), 57 | # Translators: AWS Region Name 58 | ("ap-southeast-1", _("Asia Pacific (Singapore)")), 59 | # Translators: AWS Region Name 60 | ("ap-southeast-2", _("Asia Pacific (Sydney)")), 61 | # Translators: AWS Region Name 62 | ("ap-northeast-1", _("Asia Pacific (Tokyo)")), 63 | # Translators: AWS Region Name 64 | ("ca-central-1", _("Canada (Central)")), 65 | # Translators: AWS Region Name 66 | ("eu-central-1", _("EU (Frankfurt)")), 67 | # Translators: AWS Region Name 68 | ("eu-west-1", _("EU (Ireland)")), 69 | # Translators: AWS Region Name 70 | ("eu-west-2", _("EU (London)")), 71 | # Translators: AWS Region Name 72 | ("eu-west-3", _("EU (Paris)")), 73 | # Translators: AWS Region Name 74 | ("sa-east-1", _("South America (São Paulo)")), 75 | # Translators: AWS Region Name 76 | ("cn-north-1", _("China (Beijing)")), 77 | # Translators: AWS Region Name 78 | ("us-gov-west-1", _("AWS GovCloud (US)")), 79 | ) 80 | 81 | # Translators: Internal Model Field Name 82 | region = models.CharField( 83 | _("AWS Region"), 84 | # Translators: Interal Model Field Help Text 85 | help_text=_("The Kinesis stream region name"), 86 | max_length=20, 87 | default=settings.get_aws_region, 88 | choices=_region_choices, 89 | ) 90 | 91 | # Translators: Internal Model Field Name 92 | stream = models.CharField( 93 | _("Kinesis Stream Name"), 94 | # Translators: Interal Model Field Help Text 95 | help_text=_("The Kinesis stream name"), 96 | max_length=200, 97 | ) 98 | 99 | # Translators: Internal Model Field Name 100 | shard = models.CharField( 101 | _("Kinesis Shard ID"), 102 | # Translators: Interal Model Field Help Text 103 | help_text=_("The Kinesis shard ID"), 104 | max_length=200, 105 | ) 106 | 107 | # Translators: Internal Model Field Name 108 | sequence_number = models.CharField( 109 | _("Kinesis Sequence Number"), 110 | # Translators: Interal Model Field Help Text 111 | help_text=_("The current sequence number in the Kinesis shard"), 112 | max_length=200, 113 | ) 114 | 115 | class Meta: 116 | # Translators: Internal Model Name (singular) 117 | verbose_name = _("AWS Kinesis Offset") 118 | # Translators: Internal Model Name (plural) 119 | verbose_name_plural = _("AWS Kinesis Offsets") 120 | unique_together = ("region", "stream", "shard") 121 | ordering = ("stream", "shard", "sequence_number") 122 | 123 | def __str__(self) -> str: 124 | return f'region="{self.region}", stream="{self.stream}", shard="{self.shard}", sequence_number="{self.sequence_number}"' 125 | -------------------------------------------------------------------------------- /logpipe/tests/unit/kinesis/test_producer.py: -------------------------------------------------------------------------------- 1 | from django.test import TestCase, override_settings 2 | from moto import mock_aws 3 | import boto3 4 | 5 | from logpipe import Producer 6 | from logpipe.tests.common import TOPIC_STATES, StateModel, StateSerializer_DRF 7 | 8 | LOGPIPE = { 9 | "OFFSET_BACKEND": "logpipe.backend.kinesis.ModelOffsetStore", 10 | "PRODUCER_BACKEND": "logpipe.backend.kinesis.Producer", 11 | "CONSUMER_BACKEND": "logpipe.backend.kinesis.Consumer", 12 | } 13 | 14 | 15 | class DRFProducerTest(TestCase): 16 | @override_settings(LOGPIPE=LOGPIPE) 17 | @mock_aws 18 | def test_normal_send(self): 19 | client = boto3.client("kinesis", region_name="us-east-1") 20 | client.create_stream(StreamName=TOPIC_STATES, ShardCount=1) 21 | 22 | producer = Producer(TOPIC_STATES, StateSerializer_DRF) 23 | 24 | ret = producer.send({"code": "NY", "name": "New York"}) 25 | self.assertEqual(ret.topic, TOPIC_STATES) 26 | self.assertEqual(ret.partition, "shardId-000000000000") 27 | self.assertEqual(ret.offset, "1") 28 | 29 | ret = producer.send({"code": "PA", "name": "Pennsylvania"}) 30 | self.assertEqual(ret.topic, TOPIC_STATES) 31 | self.assertEqual(ret.partition, "shardId-000000000000") 32 | self.assertEqual(ret.offset, "2") 33 | 34 | shard_iter = client.get_shard_iterator( 35 | StreamName=TOPIC_STATES, 36 | ShardId="shardId-000000000000", 37 | ShardIteratorType="TRIM_HORIZON", 38 | )["ShardIterator"] 39 | response = client.get_records(ShardIterator=shard_iter, Limit=100) 40 | 41 | self.assertEqual(response["Records"][0]["SequenceNumber"], "1") 42 | self.assertJSONEqual( 43 | response["Records"][0]["Data"].decode().replace("json:", ""), 44 | { 45 | "type": "us-state", 46 | "version": 1, 47 | "message": { 48 | "code": "NY", 49 | "name": "New York", 50 | }, 51 | }, 52 | ) 53 | self.assertEqual(response["Records"][0]["PartitionKey"], "NY") 54 | 55 | self.assertEqual(response["Records"][1]["SequenceNumber"], "2") 56 | self.assertJSONEqual( 57 | response["Records"][1]["Data"].decode().replace("json:", ""), 58 | { 59 | "type": "us-state", 60 | "version": 1, 61 | "message": { 62 | "code": "PA", 63 | "name": "Pennsylvania", 64 | }, 65 | }, 66 | ) 67 | self.assertEqual(response["Records"][1]["PartitionKey"], "PA") 68 | 69 | @override_settings(LOGPIPE=LOGPIPE) 70 | @mock_aws 71 | def test_object_send(self): 72 | client = boto3.client("kinesis", region_name="us-east-1") 73 | client.create_stream(StreamName=TOPIC_STATES, ShardCount=1) 74 | 75 | producer = Producer(TOPIC_STATES, StateSerializer_DRF) 76 | 77 | obj = StateModel( 78 | code="NY", 79 | name="New York", 80 | ) 81 | ret = producer.send(obj) 82 | self.assertEqual(ret.topic, TOPIC_STATES) 83 | self.assertEqual(ret.partition, "shardId-000000000000") 84 | self.assertEqual(ret.offset, "1") 85 | 86 | obj = StateModel( 87 | code="PA", 88 | name="Pennsylvania", 89 | ) 90 | ret = producer.send(obj) 91 | self.assertEqual(ret.topic, TOPIC_STATES) 92 | self.assertEqual(ret.partition, "shardId-000000000000") 93 | self.assertEqual(ret.offset, "2") 94 | 95 | shard_iter = client.get_shard_iterator( 96 | StreamName=TOPIC_STATES, 97 | ShardId="shardId-000000000000", 98 | ShardIteratorType="TRIM_HORIZON", 99 | )["ShardIterator"] 100 | response = client.get_records(ShardIterator=shard_iter, Limit=100) 101 | 102 | self.assertEqual(response["Records"][0]["SequenceNumber"], "1") 103 | self.assertJSONEqual( 104 | response["Records"][0]["Data"].decode().replace("json:", ""), 105 | { 106 | "type": "us-state", 107 | "version": 1, 108 | "message": { 109 | "code": "NY", 110 | "name": "New York", 111 | }, 112 | }, 113 | ) 114 | self.assertEqual(response["Records"][0]["PartitionKey"], "NY") 115 | 116 | self.assertEqual(response["Records"][1]["SequenceNumber"], "2") 117 | self.assertJSONEqual( 118 | response["Records"][1]["Data"].decode().replace("json:", ""), 119 | { 120 | "type": "us-state", 121 | "version": 1, 122 | "message": { 123 | "code": "PA", 124 | "name": "Pennsylvania", 125 | }, 126 | }, 127 | ) 128 | self.assertEqual(response["Records"][1]["PartitionKey"], "PA") 129 | -------------------------------------------------------------------------------- /logpipe/locale/es/LC_MESSAGES/django.po: -------------------------------------------------------------------------------- 1 | # SOME DESCRIPTIVE TITLE. 2 | # Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER 3 | # This file is distributed under the same license as the PACKAGE package. 4 | # FIRST AUTHOR , YEAR. 5 | # 6 | msgid "" 7 | msgstr "" 8 | "Project-Id-Version: \n" 9 | "Report-Msgid-Bugs-To: \n" 10 | "POT-Creation-Date: 2024-02-13 22:05+0000\n" 11 | "PO-Revision-Date: 2019-06-24 17:55-0400\n" 12 | "Last-Translator: Craig Weber \n" 13 | "Language-Team: \n" 14 | "Language: es\n" 15 | "MIME-Version: 1.0\n" 16 | "Content-Type: text/plain; charset=UTF-8\n" 17 | "Content-Transfer-Encoding: 8bit\n" 18 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 19 | "X-Generator: Poedit 2.2.3\n" 20 | 21 | #. Translators: Backend Library Name 22 | #: logpipe/apps.py:9 23 | msgid "LogPipe" 24 | msgstr "LogPipe" 25 | 26 | #: logpipe/models.py:10 27 | msgid "Kafka Topic Name" 28 | msgstr "Nombre del tema de Kafka" 29 | 30 | #. Translators: Interal Model Field Help Text 31 | #: logpipe/models.py:12 32 | msgid "The Kafka topic name" 33 | msgstr "El nombre del tema de Kafka" 34 | 35 | #: logpipe/models.py:18 36 | msgid "Kafka Partition ID" 37 | msgstr "ID de partición Kafka" 38 | 39 | #. Translators: Interal Model Field Help Text 40 | #: logpipe/models.py:20 41 | msgid "The Kafka partition identifier" 42 | msgstr "El identificador de partición Kafka" 43 | 44 | #. Translators: Internal Model Name (singular) 45 | #: logpipe/models.py:25 logpipe/models.py:33 46 | msgid "Kafka Offset" 47 | msgstr "Kafka Offset" 48 | 49 | #. Translators: Interal Model Field Help Text 50 | #: logpipe/models.py:27 51 | msgid "The current offset in the Kafka partition" 52 | msgstr "El desplazamiento actual en la partición Kafka" 53 | 54 | #. Translators: Internal Model Name (plural) 55 | #: logpipe/models.py:35 56 | msgid "Kafka Offsets" 57 | msgstr "Kafka Offsets" 58 | 59 | #. Translators: AWS Region Name 60 | #: logpipe/models.py:46 61 | msgid "US East (N. Virginia)" 62 | msgstr "EE.UU. Este (Norte de Virginia)" 63 | 64 | #. Translators: AWS Region Name 65 | #: logpipe/models.py:48 66 | msgid "US East (Ohio)" 67 | msgstr "EE.UU. Este (Ohio)" 68 | 69 | #. Translators: AWS Region Name 70 | #: logpipe/models.py:50 71 | msgid "US West (N. California)" 72 | msgstr "EE.UU. Oeste (Norte de California)" 73 | 74 | #. Translators: AWS Region Name 75 | #: logpipe/models.py:52 76 | msgid "US West (Oregon)" 77 | msgstr "EE.UU. Oeste (Oregón)" 78 | 79 | #. Translators: AWS Region Name 80 | #: logpipe/models.py:54 81 | msgid "Asia Pacific (Mumbai)" 82 | msgstr "Asia Pacífico (Mumbai)" 83 | 84 | #. Translators: AWS Region Name 85 | #: logpipe/models.py:56 86 | msgid "Asia Pacific (Seoul)" 87 | msgstr "Asia Pacífico (Seúl)" 88 | 89 | #. Translators: AWS Region Name 90 | #: logpipe/models.py:58 91 | msgid "Asia Pacific (Singapore)" 92 | msgstr "Asia Pacífico (Singapur)" 93 | 94 | #. Translators: AWS Region Name 95 | #: logpipe/models.py:60 96 | msgid "Asia Pacific (Sydney)" 97 | msgstr "Asia Pacífico (Sídney)" 98 | 99 | #. Translators: AWS Region Name 100 | #: logpipe/models.py:62 101 | msgid "Asia Pacific (Tokyo)" 102 | msgstr "Asia Pacífico (Tokio)" 103 | 104 | #. Translators: AWS Region Name 105 | #: logpipe/models.py:64 106 | msgid "Canada (Central)" 107 | msgstr "Canadá (Central)" 108 | 109 | #. Translators: AWS Region Name 110 | #: logpipe/models.py:66 111 | msgid "EU (Frankfurt)" 112 | msgstr "UE (Fráncfort)" 113 | 114 | #. Translators: AWS Region Name 115 | #: logpipe/models.py:68 116 | msgid "EU (Ireland)" 117 | msgstr "UE (Irlanda)" 118 | 119 | #. Translators: AWS Region Name 120 | #: logpipe/models.py:70 121 | msgid "EU (London)" 122 | msgstr "UE (Londres)" 123 | 124 | #. Translators: AWS Region Name 125 | #: logpipe/models.py:72 126 | msgid "EU (Paris)" 127 | msgstr "UE (París)" 128 | 129 | #. Translators: AWS Region Name 130 | #: logpipe/models.py:74 131 | msgid "South America (São Paulo)" 132 | msgstr "América del Sur (São Paulo)" 133 | 134 | #. Translators: AWS Region Name 135 | #: logpipe/models.py:76 136 | msgid "China (Beijing)" 137 | msgstr "China (Beijing)" 138 | 139 | #. Translators: AWS Region Name 140 | #: logpipe/models.py:78 141 | msgid "AWS GovCloud (US)" 142 | msgstr "AWS GovCloud (US-East)" 143 | 144 | #: logpipe/models.py:83 145 | msgid "AWS Region" 146 | msgstr "Regiones AWS" 147 | 148 | #. Translators: Interal Model Field Help Text 149 | #: logpipe/models.py:85 150 | msgid "The Kinesis stream region name" 151 | msgstr "El nombre de la región del arroyo Kinesis" 152 | 153 | #: logpipe/models.py:93 154 | msgid "Kinesis Stream Name" 155 | msgstr "Nombre de Kinesis Stream" 156 | 157 | #. Translators: Interal Model Field Help Text 158 | #: logpipe/models.py:95 159 | msgid "The Kinesis stream name" 160 | msgstr "El nombre de la secuencia de Kinesis" 161 | 162 | #: logpipe/models.py:101 163 | msgid "Kinesis Shard ID" 164 | msgstr "ID de fragmento de kinesis" 165 | 166 | #. Translators: Interal Model Field Help Text 167 | #: logpipe/models.py:103 168 | msgid "The Kinesis shard ID" 169 | msgstr "La identificación del fragmento de Kinesis" 170 | 171 | #: logpipe/models.py:109 172 | msgid "Kinesis Sequence Number" 173 | msgstr "Número de secuencia de kinesis" 174 | 175 | #. Translators: Interal Model Field Help Text 176 | #: logpipe/models.py:111 177 | msgid "The current sequence number in the Kinesis shard" 178 | msgstr "El número de secuencia actual en el fragmento de Kinesis" 179 | 180 | #. Translators: Internal Model Name (singular) 181 | #: logpipe/models.py:117 182 | msgid "AWS Kinesis Offset" 183 | msgstr "AWS Kinesis Offset" 184 | 185 | #. Translators: Internal Model Name (plural) 186 | #: logpipe/models.py:119 187 | msgid "AWS Kinesis Offsets" 188 | msgstr "AWS Kinesis Offsets" 189 | -------------------------------------------------------------------------------- /logpipe/backend/kafka.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import Any, NotRequired, TypedDict 4 | import logging 5 | 6 | from django.apps import apps 7 | import kafka 8 | 9 | from .. import settings 10 | from ..abc import ( 11 | ConsumerBackend, 12 | OffsetStoreBackend, 13 | ProducerBackend, 14 | Record, 15 | RecordMetadata, 16 | ) 17 | from ..exceptions import MissingTopicError 18 | from . import get_offset_backend 19 | 20 | logger = logging.getLogger(__name__) 21 | 22 | 23 | class KafkaClientConfig(TypedDict): 24 | bootstrap_servers: list[str] 25 | retries: NotRequired[int] 26 | auto_offset_reset: NotRequired[str] 27 | enable_auto_commit: NotRequired[bool] 28 | consumer_timeout_ms: NotRequired[int] 29 | 30 | 31 | class ModelOffsetStore(OffsetStoreBackend): 32 | def commit(self, consumer: ConsumerBackend, message: Record) -> None: 33 | if not isinstance(consumer, Consumer): 34 | raise TypeError("Consumer type mismatch") 35 | KafkaOffset = apps.get_model(app_label="logpipe", model_name="KafkaOffset") 36 | logger.debug( 37 | 'Commit offset "%s" for topic "%s", partition "%s" to %s' 38 | % ( 39 | message.offset, 40 | message.topic, 41 | message.partition, 42 | self.__class__.__name__, 43 | ) 44 | ) 45 | obj, created = KafkaOffset.objects.get_or_create(topic=message.topic, partition=message.partition) 46 | obj.offset = int(message.offset) + 1 47 | obj.save() 48 | 49 | def seek(self, consumer: ConsumerBackend, topic: str, partition: str) -> None: 50 | if not isinstance(consumer, Consumer): 51 | raise TypeError("Consumer type mismatch") 52 | KafkaOffset = apps.get_model(app_label="logpipe", model_name="KafkaOffset") 53 | tp = kafka.TopicPartition(topic=topic, partition=partition) 54 | try: 55 | obj = KafkaOffset.objects.get(topic=topic, partition=partition) 56 | logger.debug(f'Seeking to offset "{obj.offset}" on topic "{topic}", partition "{partition}"') 57 | consumer.client.seek(tp, obj.offset) 58 | except KafkaOffset.DoesNotExist: 59 | logger.debug(f'Seeking to beginning of topic "{topic}", partition "{partition}"') 60 | consumer.client.seek_to_beginning(tp) 61 | 62 | 63 | class KafkaOffsetStore(OffsetStoreBackend): 64 | def commit(self, consumer: ConsumerBackend, message: Record) -> None: 65 | if not isinstance(consumer, Consumer): 66 | raise TypeError("Consumer type mismatch") 67 | logger.debug( 68 | 'Commit offset "%s" for topic "%s", partition "%s" to %s' 69 | % ( 70 | message.offset, 71 | message.topic, 72 | message.partition, 73 | self.__class__.__name__, 74 | ) 75 | ) 76 | consumer.client.commit() 77 | 78 | def seek(self, consumer: ConsumerBackend, topic: str, partition: str) -> None: 79 | pass 80 | 81 | 82 | class Consumer(ConsumerBackend): 83 | _client = None 84 | 85 | def __init__(self, topic_name: str, **kwargs: Any): 86 | self.topic_name = topic_name 87 | self.client_kwargs = kwargs 88 | 89 | @property 90 | def client(self) -> kafka.KafkaConsumer: 91 | if not self._client: 92 | kwargs = self._get_client_config() 93 | self._client = kafka.KafkaConsumer(**kwargs) 94 | tps = self._get_topic_partitions() 95 | self._client.assign(tps) 96 | backend = get_offset_backend() 97 | for tp in tps: 98 | backend.seek(self, tp.topic, tp.partition) 99 | self._client.committed(tp) 100 | return self._client 101 | 102 | def __iter__(self) -> Consumer: 103 | return self 104 | 105 | def __next__(self) -> Record: 106 | r = next(self.client) 107 | record = Record( 108 | topic=r.topic, 109 | partition=r.partition, 110 | offset=r.offset, 111 | timestamp=r.timestamp, 112 | key=r.key, 113 | value=r.value, 114 | ) 115 | return record 116 | 117 | def _get_topic_partitions(self) -> list[kafka.TopicPartition]: 118 | p = [] 119 | partitions = self.client.partitions_for_topic(self.topic_name) 120 | if not partitions: 121 | raise MissingTopicError("Could not find topic %s. Does it exist?" % self.topic_name) 122 | for partition in partitions: 123 | tp = kafka.TopicPartition(self.topic_name, partition=partition) 124 | p.append(tp) 125 | return p 126 | 127 | def _get_client_config(self) -> KafkaClientConfig: 128 | kwargs = KafkaClientConfig( 129 | bootstrap_servers=settings.get("KAFKA_BOOTSTRAP_SERVERS"), 130 | auto_offset_reset="earliest", 131 | enable_auto_commit=False, 132 | consumer_timeout_ms=1000, 133 | ) 134 | kwargs.update(settings.get("KAFKA_KWARGS", {})) 135 | kwargs.update(settings.get("KAFKA_CONSUMER_KWARGS", {})) 136 | kwargs.update(self.client_kwargs) # type: ignore[typeddict-item] 137 | return kwargs 138 | 139 | 140 | class Producer(ProducerBackend): 141 | _client = None 142 | 143 | @property 144 | def client(self) -> kafka.KafkaProducer: 145 | if not self._client: 146 | kwargs = self._get_client_config() 147 | self._client = kafka.KafkaProducer(**kwargs) 148 | return self._client 149 | 150 | def send(self, topic_name: str, key: str, value: bytes) -> RecordMetadata: 151 | keybytes = key.encode() 152 | timeout = settings.get("KAFKA_SEND_TIMEOUT", 10) 153 | future = self.client.send(topic_name, key=keybytes, value=value) 154 | metadata = future.get(timeout=timeout) 155 | return RecordMetadata( 156 | topic=topic_name, 157 | partition=metadata.partition, 158 | offset=metadata.offset, 159 | ) 160 | 161 | def _get_client_config(self) -> KafkaClientConfig: 162 | servers = settings.get("KAFKA_BOOTSTRAP_SERVERS") 163 | retries = settings.get("KAFKA_MAX_SEND_RETRIES", 0) 164 | kwargs = KafkaClientConfig( 165 | bootstrap_servers=servers, 166 | retries=retries, 167 | ) 168 | kwargs.update(settings.get("KAFKA_KWARGS", {})) 169 | return kwargs 170 | -------------------------------------------------------------------------------- /logpipe/tests/unit/test_producer.py: -------------------------------------------------------------------------------- 1 | from unittest import mock 2 | 3 | from django.test import TestCase 4 | from pydantic import computed_field 5 | from rest_framework import serializers 6 | 7 | from logpipe import DRFProducer, PydanticProducer 8 | from logpipe.tests.common import ( 9 | TOPIC_STATES, 10 | State_Pydantic, 11 | StateModel, 12 | StateSerializer_DRF, 13 | ) 14 | 15 | 16 | class CustomStateSerializer_DRF(StateSerializer_DRF): 17 | my_ser_method_field = serializers.SerializerMethodField() 18 | 19 | def get_my_ser_method_field(self, obj): 20 | return f"value-{obj.code}" 21 | 22 | 23 | class CustomState_Pydantic(State_Pydantic): 24 | @computed_field 25 | def my_ser_method_field(self) -> str: 26 | return f"value-{self.code}" 27 | 28 | 29 | class DRFProducerTest(TestCase): 30 | def test_send_serializer_method_field(self): 31 | fake_client = mock.MagicMock() 32 | fake_client.send = mock.MagicMock() 33 | 34 | def check_args(topic, key, value): 35 | self.assertEqual(topic, TOPIC_STATES) 36 | self.assertEqual(key, "NY") 37 | self.assertJSONEqual( 38 | value.decode().replace("json:", ""), 39 | { 40 | "type": "us-state", 41 | "version": 1, 42 | "message": { 43 | "code": "NY", 44 | "name": "New York", 45 | "my_ser_method_field": "value-NY", 46 | }, 47 | }, 48 | ) 49 | 50 | fake_client.send.side_effect = check_args 51 | 52 | get_producer_backend = mock.MagicMock() 53 | get_producer_backend.return_value = fake_client 54 | 55 | with mock.patch("logpipe.producer.get_producer_backend", get_producer_backend): 56 | producer = DRFProducer(TOPIC_STATES, CustomStateSerializer_DRF) 57 | 58 | ny = StateModel( 59 | id=5, 60 | code="NY", 61 | name="New York", 62 | ) 63 | producer.send(ny) 64 | 65 | self.assertEqual(fake_client.send.call_count, 1) 66 | 67 | def test_send_with_producer_id(self): 68 | fake_client = mock.MagicMock() 69 | fake_client.send = mock.MagicMock() 70 | 71 | def check_args(topic, key, value): 72 | self.assertEqual(topic, TOPIC_STATES) 73 | self.assertEqual(key, "NY") 74 | self.assertJSONEqual( 75 | value.decode().replace("json:", ""), 76 | { 77 | "type": "us-state", 78 | "version": 1, 79 | "producer": "my-producer-app", 80 | "message": { 81 | "code": "NY", 82 | "name": "New York", 83 | "my_ser_method_field": "value-NY", 84 | }, 85 | }, 86 | ) 87 | 88 | fake_client.send.side_effect = check_args 89 | 90 | get_producer_backend = mock.MagicMock() 91 | get_producer_backend.return_value = fake_client 92 | 93 | with mock.patch("logpipe.producer.get_producer_backend", get_producer_backend): 94 | producer = DRFProducer( 95 | TOPIC_STATES, 96 | CustomStateSerializer_DRF, 97 | producer_id="my-producer-app", 98 | ) 99 | 100 | ny = StateModel( 101 | id=5, 102 | code="NY", 103 | name="New York", 104 | ) 105 | producer.send(ny) 106 | 107 | self.assertEqual(fake_client.send.call_count, 1) 108 | 109 | 110 | class PydanticProducerTest(TestCase): 111 | def test_send_serializer_method_field(self): 112 | fake_client = mock.MagicMock() 113 | fake_client.send = mock.MagicMock() 114 | 115 | def check_args(topic, key, value): 116 | self.assertEqual(topic, TOPIC_STATES) 117 | self.assertEqual(key, "NY") 118 | self.assertJSONEqual( 119 | value.decode().replace("json:", ""), 120 | { 121 | "type": "us-state", 122 | "version": 1, 123 | "message": { 124 | "code": "NY", 125 | "name": "New York", 126 | "my_ser_method_field": "value-NY", 127 | }, 128 | }, 129 | ) 130 | 131 | fake_client.send.side_effect = check_args 132 | 133 | get_producer_backend = mock.MagicMock() 134 | get_producer_backend.return_value = fake_client 135 | 136 | with mock.patch("logpipe.producer.get_producer_backend", get_producer_backend): 137 | producer = PydanticProducer(TOPIC_STATES) 138 | 139 | ny = CustomState_Pydantic( 140 | id=5, 141 | code="NY", 142 | name="New York", 143 | ) 144 | producer.send(ny) 145 | 146 | self.assertEqual(fake_client.send.call_count, 1) 147 | 148 | def test_send_with_producer_id(self): 149 | fake_client = mock.MagicMock() 150 | fake_client.send = mock.MagicMock() 151 | 152 | def check_args(topic, key, value): 153 | self.assertEqual(topic, TOPIC_STATES) 154 | self.assertEqual(key, "NY") 155 | self.assertJSONEqual( 156 | value.decode().replace("json:", ""), 157 | { 158 | "type": "us-state", 159 | "version": 1, 160 | "producer": "my-producer-app", 161 | "message": { 162 | "code": "NY", 163 | "name": "New York", 164 | "my_ser_method_field": "value-NY", 165 | }, 166 | }, 167 | ) 168 | 169 | fake_client.send.side_effect = check_args 170 | 171 | get_producer_backend = mock.MagicMock() 172 | get_producer_backend.return_value = fake_client 173 | 174 | with mock.patch("logpipe.producer.get_producer_backend", get_producer_backend): 175 | producer = PydanticProducer( 176 | TOPIC_STATES, 177 | producer_id="my-producer-app", 178 | ) 179 | 180 | ny = CustomState_Pydantic( 181 | id=5, 182 | code="NY", 183 | name="New York", 184 | ) 185 | producer.send(ny) 186 | 187 | self.assertEqual(fake_client.send.call_count, 1) 188 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## v1.5.1 (2025-09-03) 2 | 3 | ### Fix 4 | 5 | - update docker image tag format 6 | - **deps**: update boto to ^1.37.36 7 | - **deps**: update boto 8 | - **deps**: update dependency pydantic to ^2.11.3 9 | - **deps**: update dependency kafka-python to ^2.1.5 10 | - **deps**: update dependency pydantic to ^2.11.2 11 | - **deps**: update boto to ^1.37.27 12 | 13 | ### Refactor 14 | 15 | - migrate black/flake8 -> ruff 16 | - migrate from poetry -> uv 17 | 18 | ## v1.5.0 (2025-04-03) 19 | 20 | ### Feat 21 | 22 | - support Django 5.2. Drop Django 5.0 23 | 24 | ### Fix 25 | 26 | - **deps**: update dependency pydantic to ^2.11.1 27 | - **deps**: update dependency djangorestframework to >=3.16.0 28 | - **deps**: update dependency pydantic to ^2.11.0 29 | - **deps**: update dependency kafka-python to ^2.1.4 30 | - **deps**: update boto to ^1.37.22 31 | - **deps**: update dependency kafka-python to ^2.1.3 32 | - **deps**: update boto to ^1.37.17 33 | - **deps**: update dependency kafka-python to ^2.1.2 34 | - **deps**: update dependency kafka-python to ^2.1.1 35 | - **deps**: update boto to ^1.37.12 36 | 37 | ### Refactor 38 | 39 | - add pyupgrade / django-upgrade precommit hooks 40 | 41 | ## v1.4.12 (2025-03-10) 42 | 43 | ### Fix 44 | 45 | - fix typo in LogPipeMessageError which swallows messages 46 | - **deps**: update boto to ^1.37.8 47 | - **deps**: update dependency kafka-python to ^2.0.6 48 | - **deps**: update boto 49 | - **deps**: update dependency kafka-python to ^2.0.5 50 | - **deps**: update dependency kafka-python to ^2.0.4 51 | - **deps**: update boto to ^1.36.26 52 | 53 | ## v1.4.11 (2025-02-17) 54 | 55 | ### Fix 56 | 57 | - pydantic Producer PartitionKey when KEY_FIELD is set to an instance of RootModel 58 | - **deps**: update boto to ^1.36.20 59 | - update kafka-python dependency to 2.0.3 60 | 61 | ## v1.4.10 (2025-02-06) 62 | 63 | ### Fix 64 | 65 | - improper handling of Pydantic validation errors 66 | - **deps**: update boto to ^1.36.10 67 | - **deps**: update dependency pydantic to ^2.10.6 68 | - **deps**: update boto to ^1.36.5 69 | 70 | ## v1.4.9 (2025-01-23) 71 | 72 | ### Fix 73 | 74 | - allow the customization of ShardIteratorType through the settings (!224) 75 | 76 | ## v1.4.8 (2025-01-23) 77 | 78 | ### Fix 79 | 80 | - add type checking to sandbox app 81 | - add django 5.1 and Python 3.13 to tests 82 | - incorrect type annotations in DRFSerializer.lookup_instance 83 | - **deps**: update boto to ^1.36.1 84 | 85 | ## v1.4.7 (2025-01-14) 86 | 87 | ### Fix 88 | 89 | - **deps**: update dependency pydantic to ^2.10.5 90 | - **deps**: update boto to ^1.35.96 91 | - **deps**: update boto to ^1.35.91 92 | - **deps**: update boto to ^1.35.88 93 | - **deps**: update dependency pydantic to ^2.10.4 94 | - **deps**: update boto to ^1.35.85 95 | - **deps**: update boto to ^1.35.80 96 | - **deps**: update boto to ^1.35.76 97 | - **deps**: update dependency pydantic to ^2.10.3 98 | - **deps**: update boto to ^1.35.71 99 | - **deps**: update dependency pydantic to ^2.10.2 100 | - **deps**: update dependency pydantic to ^2.10.1 101 | - **deps**: update boto to ^1.35.67 102 | - **deps**: update boto to ^1.35.63 103 | - **deps**: update boto 104 | - **deps**: update boto to ^1.35.53 105 | - **deps**: update boto 106 | - lint 107 | - **deps**: update boto to ^1.35.43 108 | - **deps**: update boto to ^1.35.33 109 | - **deps**: update boto to ^1.35.23 110 | - **deps**: update dependency pydantic to ^2.9.2 111 | - **deps**: update boto to ^1.35.18 112 | - **deps**: update dependency msgpack to ^1.1.0 113 | - **deps**: update dependency pydantic to ^2.9.1 114 | - **deps**: update dependency botocore-stubs to ^1.35.14 115 | - **deps**: update dependency pydantic to ^2.9.0 116 | - **deps**: update dependency boto3-stubs to ^1.35.14 117 | - **deps**: update dependency boto3 to ^1.35.14 118 | - **deps**: update dependency boto3-stubs to ^1.35.13 119 | - **deps**: update dependency boto3 to ^1.35.13 120 | - **deps**: update dependency boto3 to ^1.35.11 121 | - **deps**: update dependency botocore-stubs to ^1.35.10 122 | - **deps**: update dependency boto3-stubs to ^1.35.10 123 | - **deps**: update dependency boto3 to ^1.35.10 124 | - **deps**: update dependency boto3 to ^1.35.9 125 | 126 | ## v1.4.6 (2024-08-31) 127 | 128 | ### Fix 129 | 130 | - **deps**: update dependency boto3 to ^1.35.7 131 | - **deps**: update dependency botocore-stubs to ^1.35.6 132 | - **deps**: update dependency boto3-stubs to ^1.35.6 133 | - **deps**: update dependency boto3 to ^1.35.6 134 | - **deps**: update dependency botocore-stubs to ^1.35.5 135 | - **deps**: update dependency boto3-stubs to ^1.35.5 136 | - **deps**: update dependency boto3 to ^1.35.5 137 | - **deps**: update dependency boto3 to ^1.35.4 138 | - **deps**: update dependency boto3 to ^1.35.2 139 | - **deps**: update dependency boto3-stubs to ^1.35.1 140 | - **deps**: update dependency boto3 to ^1.35.1 141 | - **deps**: update dependency botocore-stubs to ^1.35.0 142 | 143 | ## v1.4.5 (2024-08-20) 144 | 145 | ### Fix 146 | 147 | - AttributeError: type object Serializer has no attribute _tag 148 | - **deps**: update dependency boto3-stubs to ^1.35.0 149 | - **deps**: update dependency boto3 to ^1.35.0 150 | - **deps**: update dependency boto3-stubs to ^1.34.162 151 | - **deps**: update dependency boto3 to ^1.34.162 152 | - **deps**: update dependency boto3-stubs to ^1.34.160 153 | - **deps**: update dependency botocore-stubs to ^1.34.159 154 | - **deps**: update dependency boto3-stubs to ^1.34.159 155 | 156 | ## v1.4.4 (2024-08-14) 157 | 158 | ### Fix 159 | 160 | - kafka-python dep declaration 161 | 162 | ## v1.4.3 (2024-08-14) 163 | 164 | ### Fix 165 | 166 | - tox extras 167 | 168 | ## v1.4.2 (2024-08-14) 169 | 170 | ### Fix 171 | 172 | - fix errant pkg extra definitions 173 | 174 | ## v1.4.1 (2024-08-14) 175 | 176 | ### Fix 177 | 178 | - fix missing dependency on boto3-stubs when using Kinesis 179 | 180 | ## v1.4.1b0 (2024-08-08) 181 | 182 | ### Fix 183 | 184 | - **deps**: update dependency pydantic to ^2.8.2 185 | - **deps**: update dependency pydantic to ^2.8.0 186 | - **deps**: update dependency django to >=5.0.6 187 | - **deps**: update dependency djangorestframework to >=3.15.2 188 | - **deps**: update dependency django to >=4.2.13 189 | - **deps**: update dependency pydantic to ^2.7.4 190 | - **deps**: update dependency lru-dict to >=1.3.0 191 | - **deps**: update dependency djangorestframework to v3.15.2 192 | - **deps**: update dependency pydantic to v2.7.4 193 | - **deps**: update dependency pydantic to v2.7.3 194 | - **deps**: update dependency pydantic to v2.7.2 195 | - **deps**: update dependency djangorestframework to v3.15.1 196 | - **deps**: update dependency django to v5.0.6 197 | 198 | ## v1.4.0 (2024-02-13) 199 | 200 | ## v1.3.0 (2023-06-07) 201 | 202 | ## v1.2.0 (2023-03-15) 203 | 204 | ## v1.1.0 (2021-05-27) 205 | 206 | ## v1.0.0 (2020-02-19) 207 | 208 | ## v0.3.2 (2019-12-12) 209 | 210 | ## v0.3.1 (2019-07-10) 211 | 212 | ## v0.3.0 (2018-11-28) 213 | 214 | ## v0.2.1 (2018-01-09) 215 | 216 | ## v0.2.0 (2017-10-04) 217 | 218 | ## v0.1.0 (2024-08-08) 219 | -------------------------------------------------------------------------------- /logpipe/tests/unit/kafka/test_consumer.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import MagicMock, patch 2 | import binascii 3 | 4 | from django.test import override_settings 5 | from kafka.consumer.fetcher import ConsumerRecord 6 | from kafka.structs import TopicPartition 7 | from rest_framework.exceptions import ValidationError 8 | 9 | from logpipe import Consumer 10 | from logpipe.exceptions import InvalidMessageError, UnknownMessageVersionError 11 | from logpipe.tests.common import TOPIC_STATES, BaseTest 12 | 13 | LOGPIPE = { 14 | "KAFKA_BOOTSTRAP_SERVERS": ["kafka:9092"], 15 | } 16 | 17 | 18 | class ConsumerTest(BaseTest): 19 | @override_settings(LOGPIPE=LOGPIPE) 20 | @patch("kafka.KafkaConsumer") 21 | def test_normal_consume(self, KafkaConsumer): 22 | # Make a fake consumer to generate a message 23 | fake_kafka_consumer = self.mock_consumer( 24 | KafkaConsumer, 25 | value=b'json:{"message":{"code":"NY","name":"New York"},"version":1,"type":"us-state"}', 26 | max_calls=100, 27 | ) 28 | 29 | # Test the values sent to our serializer match the message 30 | def save(ser): 31 | self.assertEqual(ser.validated_data["code"], "NY") 32 | self.assertEqual(ser.validated_data["name"], "New York") 33 | 34 | FakeStateSerializer = self.mock_state_serializer_drf(save) 35 | 36 | # Consume a message 37 | consumer = Consumer(TOPIC_STATES, consumer_timeout_ms=500) 38 | consumer.register(FakeStateSerializer) 39 | consumer.run(iter_limit=1) 40 | 41 | # Test the expected mocks where called 42 | KafkaConsumer.assert_called_once_with( 43 | auto_offset_reset="earliest", 44 | bootstrap_servers=["kafka:9092"], 45 | consumer_timeout_ms=500, 46 | enable_auto_commit=False, 47 | ) 48 | fake_kafka_consumer.partitions_for_topic.assert_called_once_with(TOPIC_STATES) 49 | fake_kafka_consumer.assign.assert_called_once_with( 50 | [ 51 | TopicPartition(partition=0, topic=TOPIC_STATES), 52 | TopicPartition(partition=1, topic=TOPIC_STATES), 53 | ] 54 | ) 55 | 56 | self.assertEqual(KafkaConsumer.call_count, 1) 57 | self.assertEqual(FakeStateSerializer.call_count, 1) 58 | self.assertEqual(fake_kafka_consumer.__next__.call_count, 1) 59 | self.assertEqual(self.serializers["state"].save.call_count, 1) 60 | 61 | consumer.run(iter_limit=1) 62 | 63 | self.assertEqual(KafkaConsumer.call_count, 1) 64 | self.assertEqual(FakeStateSerializer.call_count, 2) 65 | self.assertEqual(fake_kafka_consumer.__next__.call_count, 2) 66 | self.assertEqual(self.serializers["state"].save.call_count, 1) 67 | 68 | @patch("kafka.KafkaConsumer") 69 | def test_missing_version_throws(self, KafkaConsumer): 70 | self.mock_consumer(KafkaConsumer, value=b'json:{"message":{"code":"NY","name":"New York"}}') 71 | FakeStateSerializer = self.mock_state_serializer_drf() 72 | consumer = Consumer(TOPIC_STATES, consumer_timeout_ms=500, throw_errors=True) 73 | with self.assertRaises(InvalidMessageError): 74 | consumer.run(iter_limit=1) 75 | self.assertEqual(FakeStateSerializer.call_count, 0) 76 | 77 | @patch("kafka.KafkaConsumer") 78 | def test_missing_version_ignored(self, KafkaConsumer): 79 | self.mock_consumer(KafkaConsumer, value=b'json:{"message":{"code":"NY","name":"New York"}}') 80 | FakeStateSerializer = self.mock_state_serializer_drf() 81 | consumer = Consumer(TOPIC_STATES, consumer_timeout_ms=500) 82 | consumer.run(iter_limit=1) 83 | self.assertEqual(FakeStateSerializer.call_count, 0) 84 | 85 | @patch("kafka.KafkaConsumer") 86 | def test_missing_message_throws(self, KafkaConsumer): 87 | self.mock_consumer(KafkaConsumer, value=b'json:{"version":1}') 88 | FakeStateSerializer = self.mock_state_serializer_drf() 89 | consumer = Consumer(TOPIC_STATES, consumer_timeout_ms=500, throw_errors=True) 90 | with self.assertRaises(InvalidMessageError): 91 | consumer.run(iter_limit=1) 92 | self.assertEqual(FakeStateSerializer.call_count, 0) 93 | 94 | @patch("kafka.KafkaConsumer") 95 | def test_missing_message_ignored(self, KafkaConsumer): 96 | self.mock_consumer(KafkaConsumer, value=b'json:{"version":1}') 97 | FakeStateSerializer = self.mock_state_serializer_drf() 98 | consumer = Consumer(TOPIC_STATES, consumer_timeout_ms=500) 99 | consumer.run(iter_limit=1) 100 | self.assertEqual(FakeStateSerializer.call_count, 0) 101 | 102 | @patch("kafka.KafkaConsumer") 103 | def test_unknown_version_throws(self, KafkaConsumer): 104 | self.mock_consumer( 105 | KafkaConsumer, 106 | value=b'json:{"message":{"code":"NY","name":"New York"},"version":2,"type":"us-state"}', 107 | ) 108 | FakeStateSerializer = self.mock_state_serializer_drf() 109 | 110 | consumer = Consumer(TOPIC_STATES, consumer_timeout_ms=500, throw_errors=True) 111 | consumer.register(FakeStateSerializer) 112 | with self.assertRaises(UnknownMessageVersionError): 113 | consumer.run(iter_limit=1) 114 | self.assertEqual(FakeStateSerializer.call_count, 0) 115 | 116 | @patch("kafka.KafkaConsumer") 117 | def test_unknown_version_ignored(self, KafkaConsumer): 118 | self.mock_consumer( 119 | KafkaConsumer, 120 | value=b'json:{"message":{"code":"NY","name":"New York"},"version":2,"type":"us-state"}', 121 | ) 122 | FakeStateSerializer = self.mock_state_serializer_drf() 123 | 124 | consumer = Consumer(TOPIC_STATES, consumer_timeout_ms=500) 125 | consumer.register(FakeStateSerializer) 126 | consumer.run(iter_limit=1) 127 | self.assertEqual(FakeStateSerializer.call_count, 0) 128 | 129 | @patch("kafka.KafkaConsumer") 130 | def test_invalid_message_throws(self, KafkaConsumer): 131 | self.mock_consumer( 132 | KafkaConsumer, 133 | value=b'json:{"message":{"code":"NYC","name":"New York"},"version":1,"type":"us-state"}', 134 | ) 135 | FakeStateSerializer = self.mock_state_serializer_drf() 136 | 137 | consumer = Consumer(TOPIC_STATES, consumer_timeout_ms=500, throw_errors=True) 138 | consumer.register(FakeStateSerializer) 139 | with self.assertRaises(ValidationError): 140 | consumer.run(iter_limit=1) 141 | self.assertEqual(FakeStateSerializer.call_count, 1) 142 | self.assertEqual(self.serializers["state"].save.call_count, 0) 143 | 144 | @patch("kafka.KafkaConsumer") 145 | def test_invalid_message_ignored(self, KafkaConsumer): 146 | self.mock_consumer( 147 | KafkaConsumer, 148 | value=b'json:{"message":{"code":"NYC","name":"New York"},"version":1,"type":"us-state"}', 149 | ) 150 | FakeStateSerializer = self.mock_state_serializer_drf() 151 | consumer = Consumer(TOPIC_STATES, consumer_timeout_ms=500) 152 | consumer.register(FakeStateSerializer) 153 | consumer.run(iter_limit=1) 154 | self.assertEqual(FakeStateSerializer.call_count, 1) 155 | self.assertEqual(self.serializers["state"].save.call_count, 0) 156 | 157 | @patch("kafka.KafkaConsumer") 158 | def test_ignored_message_type_is_ignored(self, KafkaConsumer): 159 | self.mock_consumer( 160 | KafkaConsumer, 161 | value=b'json:{"message":{"code":"NY","name":"New York"},"version":1,"type":"us-state"}', 162 | ) 163 | FakeStateSerializer = self.mock_state_serializer_drf() 164 | consumer = Consumer(TOPIC_STATES, consumer_timeout_ms=500) 165 | consumer.add_ignored_message_type("us-state") 166 | consumer.register(FakeStateSerializer) 167 | consumer.run(iter_limit=1) 168 | # Even though message is valid, the serializer should never get called since message type is explicitly ignored. 169 | self.assertEqual(FakeStateSerializer.call_count, 0) 170 | self.assertTrue("state" not in self.serializers) 171 | 172 | def mock_consumer(self, KafkaConsumer, value, max_calls=1): 173 | # Mock a consumer object 174 | fake_kafka_consumer = MagicMock() 175 | 176 | # Should return a record when used as an iterator. Set up the mock to 177 | # return the record up to the limit of max_calls. Then raises StopIteration 178 | record = ConsumerRecord( 179 | topic=TOPIC_STATES, 180 | partition=0, 181 | leader_epoch=-1, 182 | offset=42, 183 | timestamp=1467649216540, 184 | timestamp_type=0, 185 | key=b"NY", 186 | value=value, 187 | headers=None, 188 | checksum=binascii.crc32(value), 189 | serialized_key_size=b"NY", 190 | serialized_value_size=value, 191 | serialized_header_size=0, 192 | ) 193 | 194 | meta = {"i": 0} 195 | 196 | def _iter(*args, **kwargs): 197 | if meta["i"] >= max_calls: 198 | raise StopIteration() 199 | meta["i"] += 1 200 | return record 201 | 202 | fake_kafka_consumer.__next__.side_effect = _iter 203 | 204 | # Return some partitions 205 | fake_kafka_consumer.partitions_for_topic.return_value = {0, 1} 206 | 207 | # Make class instantiation return our mock 208 | KafkaConsumer.return_value = fake_kafka_consumer 209 | 210 | return fake_kafka_consumer 211 | -------------------------------------------------------------------------------- /logpipe/tests/unit/kinesis/test_consumer.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import MagicMock 2 | 3 | from django.test import override_settings 4 | from moto import mock_aws 5 | from rest_framework.exceptions import ValidationError 6 | import boto3 7 | 8 | from logpipe import Consumer 9 | from logpipe.exceptions import InvalidMessageError, UnknownMessageVersionError 10 | from logpipe.tests.common import TOPIC_STATES, BaseTest 11 | 12 | LOGPIPE = { 13 | "OFFSET_BACKEND": "logpipe.backend.kinesis.ModelOffsetStore", 14 | "PRODUCER_BACKEND": "logpipe.backend.kinesis.Producer", 15 | "CONSUMER_BACKEND": "logpipe.backend.kinesis.Consumer", 16 | } 17 | 18 | 19 | class ConsumerTest(BaseTest): 20 | @override_settings(LOGPIPE=LOGPIPE) 21 | @mock_aws 22 | def test_normal_consume(self): 23 | self.make_stream_with_record( 24 | "NY", 25 | b'json:{"message":{"code":"NY","name":"New York"},"version":1,"type":"us-state"}', 26 | ) 27 | 28 | # Test the values sent to our serializer match the message 29 | def save(ser): 30 | self.assertEqual(ser.validated_data["code"], "NY") 31 | self.assertEqual(ser.validated_data["name"], "New York") 32 | 33 | FakeStateSerializer = self.mock_state_serializer_drf(save) 34 | 35 | # Consume a message 36 | consumer = Consumer(TOPIC_STATES) 37 | consumer.register(FakeStateSerializer) 38 | 39 | consumer.run(iter_limit=10) 40 | self.assertEqual(self.serializers["state"].save.call_count, 1) 41 | consumer.run(iter_limit=10) 42 | self.assertEqual(self.serializers["state"].save.call_count, 1) 43 | 44 | @override_settings(LOGPIPE=LOGPIPE) 45 | @mock_aws 46 | def test_multi_shard_consume(self): 47 | # Send a bunch of messages to a bunch of shards 48 | key = 1 49 | value = b'json:{"message":{"code":"NY","name":"New York"},"version":1,"type":"us-state"}' 50 | client = self.make_stream_with_record(str(key), value, shard_count=20) 51 | for i in range(100): 52 | key += 1 53 | client.put_record(StreamName=TOPIC_STATES, Data=value, PartitionKey=str(key)) 54 | 55 | # Test the values sent to our serializer match the message 56 | test = {"i": 0} 57 | 58 | def save(ser): 59 | self.assertEqual(ser.validated_data["code"], "NY") 60 | self.assertEqual(ser.validated_data["name"], "New York") 61 | test["i"] += 1 62 | 63 | FakeStateSerializer = self.mock_state_serializer_drf(save) 64 | 65 | # Consume messages. Log should have 101 messages in it now. 66 | consumer = Consumer(TOPIC_STATES) 67 | consumer.register(FakeStateSerializer) 68 | consumer.run(iter_limit=2000) 69 | self.assertEqual(FakeStateSerializer.call_count, 101) 70 | self.assertEqual(test["i"], 101) 71 | 72 | @override_settings(LOGPIPE=LOGPIPE) 73 | @mock_aws 74 | def test_missing_version_throws(self): 75 | self.make_stream_with_record("NY", b'json:{"message":{"code":"NY","name":"New York"}}') 76 | FakeStateSerializer = self.mock_state_serializer_drf() 77 | consumer = Consumer(TOPIC_STATES, consumer_timeout_ms=500, throw_errors=True) 78 | with self.assertRaises(InvalidMessageError): 79 | consumer.run(iter_limit=1) 80 | self.assertEqual(FakeStateSerializer.call_count, 0) 81 | 82 | @override_settings(LOGPIPE=LOGPIPE) 83 | @mock_aws 84 | def test_missing_version_ignored(self): 85 | self.make_stream_with_record("NY", b'json:{"message":{"code":"NY","name":"New York"}}') 86 | FakeStateSerializer = self.mock_state_serializer_drf() 87 | consumer = Consumer(TOPIC_STATES, consumer_timeout_ms=500) 88 | consumer.run(iter_limit=1) 89 | self.assertEqual(FakeStateSerializer.call_count, 0) 90 | 91 | @override_settings(LOGPIPE=LOGPIPE) 92 | @mock_aws 93 | def test_missing_message_throws(self): 94 | self.make_stream_with_record("NY", b'json:{"version":1}') 95 | FakeStateSerializer = self.mock_state_serializer_drf() 96 | consumer = Consumer(TOPIC_STATES, consumer_timeout_ms=500, throw_errors=True) 97 | with self.assertRaises(InvalidMessageError): 98 | consumer.run(iter_limit=1) 99 | self.assertEqual(FakeStateSerializer.call_count, 0) 100 | 101 | @override_settings(LOGPIPE=LOGPIPE) 102 | @mock_aws 103 | def test_missing_message_ignored(self): 104 | self.make_stream_with_record("NY", b'json:{"version":1}') 105 | FakeStateSerializer = self.mock_state_serializer_drf() 106 | consumer = Consumer(TOPIC_STATES, consumer_timeout_ms=500) 107 | consumer.run(iter_limit=1) 108 | self.assertEqual(FakeStateSerializer.call_count, 0) 109 | 110 | @override_settings(LOGPIPE=LOGPIPE) 111 | @mock_aws 112 | def test_unknown_version_throws(self): 113 | self.make_stream_with_record( 114 | "NY", 115 | b'json:{"message":{"code":"NY","name":"New York"},"version":2,"type":"us-state"}', 116 | ) 117 | FakeStateSerializer = self.mock_state_serializer_drf() 118 | consumer = Consumer(TOPIC_STATES, consumer_timeout_ms=500, throw_errors=True) 119 | consumer.register(FakeStateSerializer) 120 | with self.assertRaises(UnknownMessageVersionError): 121 | consumer.run(iter_limit=1) 122 | self.assertEqual(FakeStateSerializer.call_count, 0) 123 | 124 | @override_settings(LOGPIPE=LOGPIPE) 125 | @mock_aws 126 | def test_unknown_version_ignored(self): 127 | self.make_stream_with_record( 128 | "NY", 129 | b'json:{"message":{"code":"NY","name":"New York"},"version":2,"type":"us-state"}', 130 | ) 131 | FakeStateSerializer = self.mock_state_serializer_drf() 132 | consumer = Consumer(TOPIC_STATES, consumer_timeout_ms=500) 133 | consumer.register(FakeStateSerializer) 134 | consumer.run(iter_limit=1) 135 | self.assertEqual(FakeStateSerializer.call_count, 0) 136 | 137 | @override_settings(LOGPIPE=LOGPIPE) 138 | @mock_aws 139 | def test_invalid_message_throws(self): 140 | self.make_stream_with_record( 141 | "NY", 142 | b'json:{"message":{"code":"NYC","name":"New York"},"version":1,"type":"us-state"}', 143 | ) 144 | FakeStateSerializer = self.mock_state_serializer_drf() 145 | consumer = Consumer(TOPIC_STATES, consumer_timeout_ms=500, throw_errors=True) 146 | consumer.register(FakeStateSerializer) 147 | with self.assertRaises(ValidationError): 148 | consumer.run(iter_limit=1) 149 | self.assertEqual(FakeStateSerializer.call_count, 1) 150 | self.assertEqual(self.serializers["state"].save.call_count, 0) 151 | 152 | @override_settings(LOGPIPE=LOGPIPE) 153 | @mock_aws 154 | def test_invalid_message_throws_pydantic(self): 155 | self.make_stream_with_record( 156 | "NY", 157 | b'json:{"message":{"code":"NYC","name":"New York"},"version":1,"type":"us-state"}', 158 | ) 159 | save = MagicMock() 160 | FakeStateSerializer = self.mock_state_serializer_pydantic(save) 161 | consumer = Consumer(TOPIC_STATES, consumer_timeout_ms=500, throw_errors=True) 162 | consumer.register(FakeStateSerializer) 163 | with self.assertRaises(ValidationError): 164 | consumer.run(iter_limit=1) 165 | self.assertEqual(save.call_count, 0) 166 | 167 | @override_settings(LOGPIPE=LOGPIPE) 168 | @mock_aws 169 | def test_invalid_message_ignored(self): 170 | self.make_stream_with_record( 171 | "NY", 172 | b'json:{"message":{"code":"NYC","name":"New York"},"version":1,"type":"us-state"}', 173 | ) 174 | FakeStateSerializer = self.mock_state_serializer_drf() 175 | consumer = Consumer(TOPIC_STATES, consumer_timeout_ms=500) 176 | consumer.register(FakeStateSerializer) 177 | consumer.run(iter_limit=1) 178 | self.assertEqual(FakeStateSerializer.call_count, 1) 179 | self.assertEqual(self.serializers["state"].save.call_count, 0) 180 | 181 | @override_settings(LOGPIPE=LOGPIPE) 182 | @mock_aws 183 | def test_invalid_message_ignored_pydantic(self): 184 | self.make_stream_with_record( 185 | "NY", 186 | b'json:{"message":{"code":"NYC","name":"New York"},"version":1,"type":"us-state"}', 187 | ) 188 | save = MagicMock() 189 | FakeStateSerializer = self.mock_state_serializer_pydantic(save) 190 | consumer = Consumer(TOPIC_STATES, consumer_timeout_ms=500) 191 | consumer.register(FakeStateSerializer) 192 | consumer.run(iter_limit=1) 193 | self.assertEqual(save.call_count, 0) 194 | 195 | @override_settings(LOGPIPE=LOGPIPE) 196 | @mock_aws 197 | def test_ignored_message_type_is_ignored(self): 198 | self.make_stream_with_record( 199 | "NY", 200 | b'json:{"message":{"code":"NY","name":"New York"},"version":1,"type":"us-state"}', 201 | ) 202 | FakeStateSerializer = self.mock_state_serializer_drf() 203 | consumer = Consumer(TOPIC_STATES, consumer_timeout_ms=500) 204 | consumer.add_ignored_message_type("us-state") 205 | consumer.register(FakeStateSerializer) 206 | consumer.run(iter_limit=1) 207 | # Even though message is valid, the serializer should never get called since message type is explicitly ignored. 208 | self.assertEqual(FakeStateSerializer.call_count, 0) 209 | self.assertTrue("state" not in self.serializers) 210 | 211 | def make_stream_with_record(self, key, value, shard_count=1): 212 | client = boto3.client("kinesis", region_name="us-east-1") 213 | client.create_stream(StreamName=TOPIC_STATES, ShardCount=shard_count) 214 | client.put_record(StreamName=TOPIC_STATES, Data=value, PartitionKey=key) 215 | return client 216 | -------------------------------------------------------------------------------- /logpipe/consumer.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Generator, Iterator 2 | from typing import Any, TypeVar, cast 3 | import itertools 4 | import logging 5 | import time 6 | 7 | from django.db import models, transaction 8 | from rest_framework import serializers 9 | import pydantic_core 10 | 11 | from . import settings 12 | from .abc import ( 13 | ConsumerBackend, 14 | DRFSerializer, 15 | MessageType, 16 | MessageVersion, 17 | PydanticModel, 18 | Record, 19 | Serializer, 20 | SerializerClass, 21 | is_pydantic_serializer_class, 22 | ) 23 | from .backend import get_consumer_backend, get_offset_backend 24 | from .exceptions import ( 25 | IgnoredMessageTypeError, 26 | InvalidMessageError, 27 | UnknownMessageTypeError, 28 | UnknownMessageVersionError, 29 | ValidationError, 30 | ) 31 | from .format import parse 32 | 33 | logger = logging.getLogger(__name__) 34 | 35 | _Ser = TypeVar("_Ser", bound=Serializer) 36 | _DRFSer = TypeVar("_DRFSer", bound=DRFSerializer[Any]) 37 | 38 | 39 | class Consumer(Iterator[tuple[Record, Serializer]]): 40 | consumer: ConsumerBackend 41 | throw_errors: bool 42 | serializer_classes: dict[MessageType, dict[MessageVersion, SerializerClass]] 43 | ignored_message_types: set[MessageType] 44 | 45 | def __init__(self, topic_name: str, throw_errors: bool = False, **kwargs: Any): 46 | self.consumer = get_consumer_backend(topic_name, **kwargs) 47 | self.throw_errors = throw_errors 48 | self.serializer_classes = {} 49 | self.ignored_message_types = set() 50 | 51 | def __iter__(self) -> Iterator[tuple[Record, Serializer]]: 52 | if self.throw_errors: 53 | return self 54 | return self._error_handler() 55 | 56 | def __next__(self) -> tuple[Record, Serializer]: 57 | return self._get_next_message() 58 | 59 | def add_ignored_message_type(self, message_type: MessageType) -> None: 60 | self.ignored_message_types.add(message_type) 61 | 62 | def commit(self, message: Record) -> None: 63 | get_offset_backend().commit(self.consumer, message) 64 | 65 | def register(self, serializer_class: SerializerClass) -> None: 66 | message_type = serializer_class.MESSAGE_TYPE 67 | version = serializer_class.VERSION 68 | if message_type not in self.serializer_classes: 69 | self.serializer_classes[message_type] = {} 70 | self.serializer_classes[message_type][version] = serializer_class 71 | 72 | def run(self, iter_limit: int = 0) -> None: 73 | i = 0 74 | for message, serializer in self: 75 | with transaction.atomic(): 76 | try: 77 | serializer.save() 78 | self.commit(message) 79 | except Exception as e: 80 | info = ( 81 | message.key, 82 | message.topic, 83 | message.partition, 84 | message.offset, 85 | ) 86 | logger.exception('Failed to process message with key "%s" from topic "%s", partition "%s", offset "%s"' % info) 87 | raise e 88 | i += 1 89 | if iter_limit > 0 and i >= iter_limit: 90 | break 91 | 92 | def _error_handler(self) -> Generator[tuple[Record, Serializer], None, None]: 93 | while True: 94 | # Try to get the next message 95 | try: 96 | yield next(self) 97 | 98 | # Obey the laws of StopIteration 99 | except StopIteration: 100 | return 101 | 102 | # Message format was invalid in some way: log error and move on. 103 | except InvalidMessageError as e: 104 | logger.error(f"Failed to deserialize message in topic {self.consumer.topic_name}. Details: {e}") 105 | self.commit(e.message) 106 | 107 | # Message type has been explicitly ignored: skip it silently and move on. 108 | except IgnoredMessageTypeError as e: 109 | logger.debug(f"Skipping ignored message type in topic {self.consumer.topic_name}. Details: {e}") 110 | self.commit(e.message) 111 | 112 | # Message type is unknown: log error and move on. 113 | except UnknownMessageTypeError as e: 114 | logger.error(f"Skipping unknown message type in topic {self.consumer.topic_name}. Details: {e}") 115 | self.commit(e.message) 116 | 117 | # Message version is unknown: log error and move on. 118 | except UnknownMessageVersionError as e: 119 | logger.error(f"Skipping unknown message version in topic {self.consumer.topic_name}. Details: {e}") 120 | self.commit(e.message) 121 | 122 | # Serializer for message type flagged message as invalid: log warning and move on. 123 | except ValidationError as e: 124 | logger.warning(f"Skipping invalid message in topic {self.consumer.topic_name}. Details: {e}") 125 | self.commit(e.message) 126 | 127 | pass 128 | 129 | def _get_next_message(self) -> tuple[Record, Serializer]: 130 | message = next(self.consumer) 131 | 132 | info = (message.key, message.topic, message.partition, message.offset) 133 | logger.debug('Received message with key "%s" from topic "%s", partition "%s", offset "%s"' % info) 134 | 135 | # Wait? 136 | timestamp = getattr(message, "timestamp", None) or (time.time() * 1000) 137 | lag_ms = (time.time() * 1000) - timestamp 138 | logger.debug("Message lag is %sms" % lag_ms) 139 | wait_ms = settings.get("MIN_MESSAGE_LAG_MS", 0) - lag_ms 140 | if wait_ms > 0: 141 | logger.debug("Respecting MIN_MESSAGE_LAG_MS by waiting %sms" % wait_ms) 142 | time.sleep(wait_ms / 1000) 143 | logger.debug("Finished waiting") 144 | 145 | try: 146 | serializer = self._unserialize(message) 147 | except Exception as e: 148 | raise e 149 | 150 | return message, serializer 151 | 152 | def _unserialize(self, message: Record) -> Serializer: 153 | data = parse(message.value) 154 | if "type" not in data: 155 | raise InvalidMessageError('Received message missing missing a top-level "type" key.', message) 156 | if "version" not in data: 157 | raise InvalidMessageError('Received message missing missing a top-level "version" key.', message) 158 | if "message" not in data: 159 | raise InvalidMessageError('Received message missing missing a top-level "message" key.', message) 160 | 161 | message_type = data["type"] 162 | if message_type in self.ignored_message_types: 163 | raise IgnoredMessageTypeError( 164 | f'Received message with ignored type "{message_type}" in topic {message.topic}', 165 | message, 166 | ) 167 | if message_type not in self.serializer_classes: 168 | raise UnknownMessageTypeError( 169 | f'Received message with unknown type "{message_type}" in topic {message.topic}', 170 | message, 171 | ) 172 | 173 | version = data["version"] 174 | if version not in self.serializer_classes[message_type]: 175 | raise UnknownMessageVersionError( 176 | f'Received message of type "{message_type}" with unknown version "{version}" in topic {message.topic}', 177 | message, 178 | ) 179 | 180 | serializer_class = self.serializer_classes[message_type][version] 181 | 182 | instance = None 183 | if hasattr(serializer_class, "lookup_instance"): 184 | instance = serializer_class.lookup_instance(**data["message"]) 185 | serializer = self._construct_serializer_instance( 186 | serializer_class=serializer_class, 187 | message=message, 188 | instance=instance, 189 | data=data["message"], 190 | ) 191 | return serializer 192 | 193 | def _construct_serializer_instance( 194 | self, 195 | serializer_class: SerializerClass, 196 | message: Record, 197 | instance: models.Model | None, 198 | data: Any, 199 | ) -> Serializer: 200 | if is_pydantic_serializer_class(serializer_class): 201 | return self._construct_pydantic_serializer_instance( 202 | serializer_class=serializer_class, 203 | message=message, 204 | instance=instance, 205 | data=data, 206 | ) 207 | # TODO: this cast can go away once we can use TypeIs instead of 208 | # TypeGuard (added in Python 3.13). 209 | serializer_class = cast(type[DRFSerializer[Any]], serializer_class) 210 | return self._construct_drf_serializer_instance( 211 | serializer_class=serializer_class, 212 | message=message, 213 | instance=instance, 214 | data=data, 215 | ) 216 | 217 | def _construct_drf_serializer_instance( 218 | self, 219 | serializer_class: type[_DRFSer], 220 | message: Record, 221 | instance: models.Model | None, 222 | data: Any, 223 | ) -> _DRFSer: 224 | serializer = serializer_class(instance=instance, data=data) 225 | try: 226 | serializer.is_valid(raise_exception=True) 227 | except serializers.ValidationError as e: 228 | raise ValidationError(e, message) 229 | return serializer 230 | 231 | def _construct_pydantic_serializer_instance( 232 | self, 233 | serializer_class: type[PydanticModel], 234 | message: Record, 235 | instance: models.Model | None, 236 | data: Any, 237 | ) -> PydanticModel: 238 | try: 239 | serializer = serializer_class.model_validate(data) 240 | except pydantic_core.ValidationError as e: 241 | raise ValidationError(e, message) 242 | serializer._instance = instance # type: ignore[attr-defined] 243 | return serializer 244 | 245 | 246 | class MultiConsumer: 247 | consumers: list[Consumer] 248 | 249 | def __init__(self, *consumers: Consumer): 250 | self.consumers = list(consumers) 251 | 252 | def run(self, iter_limit: int = 0) -> None: 253 | i = 0 254 | for consumer in itertools.cycle(self.consumers): 255 | consumer.run(iter_limit=1) 256 | i += 1 257 | if iter_limit > 0 and i >= iter_limit: 258 | break 259 | -------------------------------------------------------------------------------- /logpipe/backend/kinesis.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING, Any, NotRequired, TypedDict 4 | import collections 5 | import logging 6 | import time 7 | 8 | from botocore.exceptions import ClientError 9 | from django.apps import apps 10 | from lru import LRU 11 | import boto3 12 | 13 | from .. import settings 14 | from ..abc import ( 15 | ConsumerBackend, 16 | OffsetStoreBackend, 17 | ProducerBackend, 18 | Record, 19 | RecordMetadata, 20 | ) 21 | from . import get_offset_backend 22 | 23 | if TYPE_CHECKING: 24 | from mypy_boto3_kinesis import KinesisClient 25 | from mypy_boto3_kinesis.type_defs import ( 26 | GetRecordsOutputTypeDef, 27 | PutRecordOutputTypeDef, 28 | ) 29 | 30 | logger = logging.getLogger(__name__) 31 | 32 | ShardID = str 33 | ShardIterator = str 34 | 35 | 36 | class KinesisClientConfig(TypedDict): 37 | region_name: str 38 | 39 | 40 | class PutRecordKwargs(TypedDict): 41 | StreamName: str 42 | Data: bytes 43 | PartitionKey: str 44 | SequenceNumberForOrdering: NotRequired[str] 45 | 46 | 47 | class KinesisBase: 48 | _client: KinesisClient | None = None 49 | 50 | @property 51 | def client(self) -> KinesisClient: 52 | if not self._client: 53 | kwargs = self._get_client_config() 54 | self._client = boto3.client("kinesis", **kwargs) 55 | return self._client 56 | 57 | def _get_client_config(self) -> KinesisClientConfig: 58 | return KinesisClientConfig( 59 | region_name=settings.get_aws_region(), 60 | ) 61 | 62 | 63 | class ModelOffsetStore(OffsetStoreBackend): 64 | def commit(self, consumer: ConsumerBackend, message: Record) -> None: 65 | KinesisOffset = apps.get_model(app_label="logpipe", model_name="KinesisOffset") 66 | region = settings.get_aws_region() 67 | logger.debug( 68 | 'Commit offset "%s" for region "%s", stream "%s", shard "%s" to %s' 69 | % ( 70 | message.offset, 71 | region, 72 | message.topic, 73 | message.partition, 74 | self.__class__.__name__, 75 | ) 76 | ) 77 | obj, created = KinesisOffset.objects.get_or_create(region=region, stream=message.topic, shard=message.partition) 78 | obj.sequence_number = message.offset 79 | obj.save() 80 | 81 | def seek(self, consumer: ConsumerBackend, stream: str, shard: str) -> None: 82 | KinesisOffset = apps.get_model(app_label="logpipe", model_name="KinesisOffset") 83 | region = settings.get_aws_region() 84 | try: 85 | obj = KinesisOffset.objects.get(region=settings.get_aws_region(), stream=stream, shard=shard) 86 | logger.debug(f'Seeking to offset "{obj.sequence_number}" on region "{region}", stream "{stream}", partition "{shard}"') 87 | consumer.seek_to_sequence_number(shard, obj.sequence_number) 88 | except KinesisOffset.DoesNotExist: 89 | logger.debug(f'Seeking to beginning of region "{region}", stream "{stream}", partition "{shard}"') 90 | consumer.seek_to_sequence_number(shard, None) 91 | 92 | 93 | class Consumer(KinesisBase, ConsumerBackend): 94 | def __init__(self, topic_name: str, **kwargs: Any): 95 | self.topic_name = topic_name 96 | self.client_kwargs = kwargs 97 | 98 | self.shards: collections.deque[ShardID] = collections.deque() 99 | self.records: collections.deque[Record] = collections.deque() 100 | self.shard_iters: dict[ShardID, ShardIterator] = {} 101 | 102 | shards = self._list_shard_ids() 103 | logger.debug("Found %d kinesis shards.", len(shards)) 104 | backend = get_offset_backend() 105 | for shard in shards: 106 | self.shards.append(shard) 107 | backend.seek(self, self.topic_name, shard) 108 | 109 | def seek_to_sequence_number(self, shard: str, sequence_number: str | None = None) -> None: 110 | if sequence_number is None: 111 | resp = self.client.get_shard_iterator( 112 | StreamName=self.topic_name, 113 | ShardId=shard, 114 | ShardIteratorType=settings.get("KINESIS_SHARD_ITERATOR_TYPE", default="TRIM_HORIZON"), 115 | ) 116 | else: 117 | resp = self.client.get_shard_iterator( 118 | StreamName=self.topic_name, 119 | ShardId=shard, 120 | ShardIteratorType="AFTER_SEQUENCE_NUMBER", 121 | StartingSequenceNumber=sequence_number, 122 | ) 123 | self.shard_iters[shard] = resp["ShardIterator"] 124 | 125 | def __iter__(self) -> Consumer: 126 | return self 127 | 128 | def __next__(self) -> Record: 129 | # Try and load records. Keep trying until either (1) we have some records or (2) current_lag drops to 0 130 | while len(self.records) <= 0: 131 | # Load a page from each shard and sum the shard lags 132 | current_lag = 0 133 | for i in range(len(self.shards)): 134 | current_lag += self._load_next_page() 135 | 136 | # If all shards report 0 lag, then give up trying to load records 137 | if current_lag <= 0: 138 | break 139 | 140 | # If we've tried all the shards and still don't have any records, stop iteration 141 | if len(self.records) == 0: 142 | raise StopIteration() 143 | 144 | # Return the left most record in the queue 145 | return self.records.popleft() 146 | 147 | def _load_next_page(self) -> int: 148 | # Load a page from the left-most shard in the queue 149 | try: 150 | shard = self.shards.popleft() 151 | except IndexError: 152 | return 0 153 | 154 | # Get the next shard iterator for the shard 155 | shard_iter = self.shard_iters.pop(shard, None) 156 | if not shard_iter: 157 | return 0 158 | 159 | # Fetch the records from Kinesis 160 | logger.debug("Loading page of records from %s.%s", self.topic_name, shard) 161 | fetch_limit = settings.get("KINESIS_FETCH_LIMIT", 25) 162 | response = self._get_records(shard_iter, fetch_limit) 163 | if response is None: 164 | return 0 165 | 166 | # This default value is mostly just for testing with Moto. Real Kinesis should always return a value for MillisBehindLatest. 167 | num_records = len(response["Records"]) 168 | if "MillisBehindLatest" in response: 169 | current_stream_lag = response["MillisBehindLatest"] 170 | else: 171 | current_stream_lag = 0 if num_records == 0 else 1 172 | logger.debug(f"Loaded {num_records} records from {self.topic_name}.{shard}. Currently {current_stream_lag}ms behind stream head.") 173 | 174 | # Add the records page into the queue 175 | timestamp = (time.time() * 1000) - current_stream_lag 176 | for r in response["Records"]: 177 | record = Record( 178 | topic=self.topic_name, 179 | partition=shard, 180 | offset=r["SequenceNumber"], 181 | timestamp=timestamp, 182 | key=r["PartitionKey"], 183 | value=r["Data"], 184 | ) 185 | self.records.append(record) 186 | 187 | # Add the shard back to the right of the queue and save the shard iterator for next time we need 188 | # to get records from this shard. If NextShardIterator is None, the shard has been closed and 189 | # we should remove it from the pool. 190 | if response.get("NextShardIterator", None): 191 | self.shard_iters[shard] = response["NextShardIterator"] 192 | self.shards.append(shard) 193 | else: 194 | logger.info(f"Shard {self.topic_name}.{shard} has been closed. Removing it from the fetch pool.") 195 | 196 | return current_stream_lag 197 | 198 | def _get_records( 199 | self, 200 | shard_iter: ShardIterator, 201 | fetch_limit: int, 202 | retries: int = 1, 203 | ) -> GetRecordsOutputTypeDef | None: 204 | i = 0 205 | while i <= retries: 206 | try: 207 | response = self.client.get_records(ShardIterator=shard_iter, Limit=fetch_limit) 208 | return response 209 | except ClientError as e: 210 | if e.response["Error"]["Code"] == "ProvisionedThroughputExceededException": 211 | logger.warning("Caught ProvisionedThroughputExceededException. Sleeping for 5 seconds.") 212 | time.sleep(5) 213 | else: 214 | logger.warning("Received {} from AWS API: {}".format(e.response["Error"]["Code"], e.response["Error"]["Message"])) 215 | i += 1 216 | logger.warning(f"After {i} attempts, couldn't get records from Kinesis. Giving up.") 217 | return None 218 | 219 | def _list_shard_ids(self) -> list[ShardID]: 220 | resp = self.client.describe_stream(StreamName=self.topic_name) 221 | return [shard["ShardId"] for shard in resp["StreamDescription"]["Shards"]] 222 | 223 | 224 | class Producer(KinesisBase, ProducerBackend): 225 | _last_sequence_numbers: LRU[str, dict[str, str]] = LRU(settings.get("KINESIS_SEQ_NUM_CACHE_SIZE", 1000)) 226 | 227 | def send(self, topic_name: str, key: str, value: bytes) -> RecordMetadata | None: 228 | kwargs = PutRecordKwargs( 229 | StreamName=topic_name, 230 | Data=value, 231 | PartitionKey=key, 232 | ) 233 | 234 | if topic_name not in self._last_sequence_numbers: 235 | self._last_sequence_numbers[topic_name] = {} 236 | last_seq_num = self._last_sequence_numbers[topic_name].get(key) 237 | if last_seq_num: 238 | kwargs["SequenceNumberForOrdering"] = last_seq_num 239 | 240 | metadata = self._send_and_retry(kwargs) 241 | if metadata is None: 242 | return None 243 | 244 | shard_id = metadata["ShardId"] 245 | seq_num = str(metadata["SequenceNumber"]) 246 | self._last_sequence_numbers[topic_name][key] = seq_num 247 | 248 | return RecordMetadata(topic=topic_name, partition=shard_id, offset=seq_num) 249 | 250 | def _send_and_retry(self, data: PutRecordKwargs, retries: int = 1) -> PutRecordOutputTypeDef | None: 251 | i = 0 252 | while i <= retries: 253 | try: 254 | metadata = self.client.put_record(**data) 255 | return metadata 256 | except ClientError as e: 257 | if e.response["Error"]["Code"] == "ProvisionedThroughputExceededException": 258 | logger.warning("Caught ProvisionedThroughputExceededException. Sleeping for 5 seconds.") 259 | time.sleep(5) 260 | else: 261 | logger.warning( 262 | "Received %s from AWS API: %s", 263 | e.response["Error"]["Code"], 264 | e.response["Error"]["Message"], 265 | ) 266 | i += 1 267 | logger.warning(f"After {i} attempts, couldn't send message to Kinesis. Giving up.") 268 | return None 269 | -------------------------------------------------------------------------------- /docs/usage.md: -------------------------------------------------------------------------------- 1 | # Usage Guide 2 | 3 | Usage of django-logpipe differs slightly based on if you choose to use [django-rest-framework (DRF)](https://www.django-rest-framework.org/) serializers or [Pydandic](https://docs.pydantic.dev) serializers. Continue reading to see how to handle each case. 4 | 5 | ## Serializers 6 | 7 | ### DRF Serializers 8 | 9 | The first step in either sending or receiving messages with `logpipe` is to define a serializer. Serializers for `logpipe` have a few rules: 10 | 11 | 1. Must be either a subclass of `rest_framework.serializers.Serializer` or a class implementing an interface that mimics `rest_framework.serializers.Serializer`. 12 | 1. Must have a `MESSAGE_TYPE` attribute defined on the class. The value should be a string that defines uniquely defines the data-type within it's Topic / Stream. 13 | 2. Must have a `VERSION` attribute defined on the class. The value should be a monotonic integer representing the schema version number. 14 | 3. Must have a `KEY_FIELD` attribute defined on the class, representing the name of the field to use as the message key. The message key is used by Kafka when performing log compaction and by Kinesis as the shard partition key. The property can be omitted for topics which do not require a key. 15 | 4. If the serializer will be used for incoming-messages, it should implement class method `lookup_instance(cls, **kwargs)`. This class method will be called with message data as keyword arguments directly before instantiating the serializer. It should lookup and return the related object (if one exists) so that it can be passed to the serializer's `instance` argument during initialization. If no object exists yet (the message is representing a new object), it should return `None`. 16 | 17 | Below is a sample Django model and it's accompanying serializer. 18 | 19 | ```py title="myapp/models.py" 20 | from django.db import models 21 | from rest_framework import serializers 22 | import uuid 23 | 24 | 25 | class Person(models.Model): 26 | uuid = models.UUIDField(default=uuid.uuid4, unique=True) 27 | first_name = models.CharField(max_length=200) 28 | last_name = models.CharField(max_length=200) 29 | 30 | 31 | class PersonSerializer(serializers.ModelSerializer): 32 | MESSAGE_TYPE = 'person' 33 | VERSION = 1 34 | KEY_FIELD = 'uuid' 35 | 36 | class Meta: 37 | model = Person 38 | fields = ['uuid', 'first_name', 'last_name'] 39 | 40 | @classmethod 41 | def lookup_instance(cls, uuid, **kwargs): 42 | try: 43 | return Person.objects.get(uuid=uuid) 44 | except models.Person.DoesNotExist: 45 | pass 46 | ``` 47 | 48 | ### Pydantic Serializers 49 | 50 | As an alternative to using DRF serializers (as described above), you may also use Pydantic models. The same `MESSAGE_TYPE`, `VERSION`, `KEY_FIELD` must be defined as `ClassVar`s on the model class. 51 | 52 | ```py title="myapp/models.py" 53 | from typing import ClassVar 54 | from django.db import models 55 | from logpipe.abc import PydanticModel 56 | import uuid 57 | 58 | 59 | class Person(models.Model): 60 | uuid = models.UUIDField(default=uuid.uuid4, unique=True) 61 | first_name = models.CharField(max_length=200) 62 | last_name = models.CharField(max_length=200) 63 | 64 | 65 | class PersonSchema(PydanticModel): 66 | MESSAGE_TYPE: ClassVar[str] = 'person' 67 | VERSION: ClassVar[int] = 1 68 | KEY_FIELD: ClassVar[str] = 'uuid' 69 | 70 | uuid: uuid.UUID 71 | first_name: str 72 | last_name: str 73 | 74 | def save(self) -> Person: 75 | """ 76 | The save method is called when a `person` message is consumed from the 77 | data stream. 78 | """ 79 | try: 80 | person = Person.objects.get(uuid=self.uuid) 81 | except Person.DoesNotExist: 82 | person = Person() 83 | person.first_name = self.first_name 84 | person.last_name = self.last_name 85 | person.save() 86 | return person 87 | ``` 88 | 89 | ## Sending Messages 90 | 91 | ### DRF Producer 92 | 93 | Once a serializer exists, you can send a message to Kafka by creating Producer object and calling the `send` method. 94 | 95 | ```py 96 | from logpipe import DRFProducer 97 | from .models import Person, PersonSerializer 98 | 99 | joe = Person.objects.create( 100 | first_name='Joe', 101 | last_name='Schmoe', 102 | ) 103 | producer = DRFProducer('people', PersonSerializer) 104 | producer.send(joe) 105 | ``` 106 | 107 | The above sample code would result in the following message being sent to the Kafka topic named `people`. 108 | 109 | ```txt 110 | json:{"type":"person","version":1,"producer":"my-application-name","message":{"first_name":"Joe","last_name":"Schmoe","uuid":"xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx"}} 111 | ``` 112 | 113 | ### Pydantic Producer 114 | 115 | If using a Pydantic model instead of a DRF serializer, use the `PydanticProducer` class instead of `DRFProducer`. 116 | 117 | ```py 118 | from logpipe import PydanticProducer 119 | from .models import PersonSchema 120 | import uuid 121 | 122 | joe = PersonSchema( 123 | uuid=uuid.uuid4(), 124 | first_name='Joe', 125 | last_name='Schmoe', 126 | ) 127 | producer = PydanticProducer('people') 128 | producer.send(joe) 129 | ``` 130 | 131 | The above sample code would result in the following message being sent to the Kafka topic named `people`. 132 | 133 | ```txt 134 | json:{"type":"person","version":1,"producer":"my-application-name","message":{"first_name":"Joe","last_name":"Schmoe","uuid":"xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx"}} 135 | ``` 136 | 137 | ## Receiving Messages 138 | 139 | To processing incoming messages, we can reuse the same model and serializer. We just need to instantiate a Consumer object. Unlike Producers, there's not separate Consumer classes for DRF vs. Pydantic serializers. Either type of serializer can be passed into the `Consumer.register` method. 140 | 141 | ```py 142 | from logpipe import Consumer 143 | from .models import PersonSerializer, PersonSchema 144 | 145 | # Watch for messages, but timeout after 1000ms of no messages 146 | consumer = Consumer('people', consumer_timeout_ms=1000) 147 | consumer.register(PersonSerializer) 148 | consumer.run() 149 | 150 | # Watch for messages and block forever 151 | consumer = Consumer('people') 152 | consumer.register(PersonSerializer) 153 | consumer.run() 154 | 155 | # Pydantic serializers work here too. 156 | consumer = Consumer('people') 157 | consumer.register(PersonSchema) 158 | consumer.run() 159 | ``` 160 | 161 | The consumer object uses Django REST Framework's built-in `save`, `create`, and `update` methods to apply the message. If your messages aren't tied directly to a Django model, skip defining the `lookup_instance` class method and override the `save` method to house your custom import logic. 162 | 163 | ### Consuming Multiple Data-Types Per Topic 164 | 165 | If you have multiple data-types in a single topic or stream, you can consume them all by registering multiple serializers with the consumer. 166 | 167 | ```py 168 | from logpipe import Consumer 169 | from .models import PersonSerializer, PlaceSerializer, ThingSerializer 170 | 171 | consumer = Consumer('nouns') 172 | consumer.register(PersonSerializer) 173 | consumer.register(PlaceSerializer) 174 | consumer.register(ThingSerializer) 175 | consumer.run() 176 | ``` 177 | 178 | You can also support multiple incompatible version of message types by defining a serializer for each message type version and registering them all with the consumer. 179 | 180 | ```py 181 | from logpipe import Consumer 182 | from .models import ( 183 | PersonSerializerVersion1, 184 | PersonSerializerVersion2, 185 | PlaceSerializer, 186 | ThingSerializer, 187 | ) 188 | 189 | consumer = Consumer('nouns') 190 | consumer.register(PersonSerializerVersion1) 191 | consumer.register(PersonSerializerVersion2) 192 | consumer.register(PlaceSerializer) 193 | consumer.register(ThingSerializer) 194 | consumer.run() 195 | ``` 196 | 197 | ### Consuming from Multiple Topics 198 | 199 | If you have multiple streams or topics to watch, make a consumers for each, and watch them all simultaneously in the same process by using a MultiConsumer. 200 | 201 | ```py 202 | from logpipe import MultiConsumer, Consumer 203 | from .models import ( 204 | PersonSerializer, 205 | PlaceSerializer, 206 | ) 207 | 208 | people_consumer = Consumer('people') 209 | people_consumer.register(PersonSerializer) 210 | 211 | places_consumer = Consumer('places') 212 | places_consumer.register(PlaceSerializer) 213 | 214 | multi = MultiConsumer(people_consumer, places_consumer) 215 | 216 | # Watch for 'people' and 'places' topics indefinitely 217 | multi.run() 218 | ``` 219 | 220 | ### Management Commands 221 | 222 | Finally, consumers can be registered and run automatically by the build in `run_kafka_consumer` management command. 223 | 224 | ```py 225 | # myapp/apps.py 226 | from django.apps import AppConfig 227 | from logpipe import Consumer, register_consumer 228 | 229 | class MyAppConfig(AppConfig): 230 | name = 'myapp' 231 | 232 | # Register consumers with logpipe 233 | @register_consumer 234 | def build_person_consumer(): 235 | consumer = Consumer('people') 236 | consumer.register(PersonSerializer) 237 | return consumer 238 | ``` 239 | 240 | Use the `register_consumer` decorator to register as many consumers and topics as you need to work with. Then, run the `run_kafka_consumer` command to process messages for all consumers automatically in a round-robin fashion. 241 | 242 | ```py 243 | python manage.py run_kafka_consumer 244 | ``` 245 | 246 | 247 | ## Dealing with Schema Changes 248 | 249 | Schema changes are handled using the `VERSION` attribute required on every serializer class. When sending, a producer includes the schema version number in the message data. Then, when a consumer receives a message, it looks for a register serializer with a matching version number. If no serializer is found with a matching version number, a `logpipe.exceptions.UnknownMessageVersionError` exception is raised. 250 | 251 | To perform a backwards-incompatible schema change, the following steps should be performed. 252 | 253 | 1. Update consumer code to have knowledge of the new schema version. 254 | 2. Update producer code to being sending the new schema version. 255 | 3. After some amount of time (when you are sure no old-version messages still exist in Kafka), remove the code related to the old schema version. 256 | 257 | For example, if we wanted to require an email field on the `Person` model we defined above, the first step would be to update consumers to know about the new field. 258 | 259 | ```py 260 | class Person(models.Model): 261 | uuid = models.UUIDField(default=uuid.uuid4, unique=True) 262 | first_name = models.CharField(max_length=200) 263 | last_name = models.CharField(max_length=200) 264 | email = models.EmailField(max_length=200, null=True) 265 | 266 | 267 | class PersonSerializerV1(serializers.ModelSerializer): 268 | MESSAGE_TYPE = 'person' 269 | VERSION = 1 270 | KEY_FIELD = 'uuid' 271 | 272 | class Meta: 273 | model = Person 274 | fields = ['uuid', 'first_name', 'last_name'] 275 | 276 | 277 | class PersonSerializerV2(PersonSerializerV1): 278 | MESSAGE_TYPE = 'person' 279 | VERSION = 2 280 | 281 | class Meta(PersonSerializerV1.META): 282 | fields = ['uuid', 'first_name', 'last_name', 'email'] 283 | 284 | 285 | consumer = Consumer('people', consumer_timeout_ms=1000) 286 | consumer.register(PersonSerializerV1) 287 | consumer.register(PersonSerializerV2) 288 | ``` 289 | 290 | The consumers will now use the appropriate serializer for the message version. Second, we need to update producer code to being using schema version 2. 291 | 292 | ```py 293 | producer = Producer('people', PersonSerializerV2) 294 | ``` 295 | 296 | Finally, after all the old version 1 messages have been dropped (by log compaction), the `PersonSerializerV1` class can be removed form the code base. 297 | --------------------------------------------------------------------------------