├── .gitignore ├── .gitlab-ci.yml ├── .pre-commit-config.yaml ├── CHANGELOG.md ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── bin └── publish.sh ├── docker-compose.yml ├── docs ├── api.md ├── custom-styles.css ├── index.md ├── installation.md ├── releases.md └── usage.md ├── manage.py ├── mkdocs.yml ├── poetry.lock ├── pyproject.toml ├── renovate.json ├── sandbox ├── __init__.py ├── lptester │ ├── __init__.py │ ├── admin.py │ ├── apps.py │ ├── constants.py │ ├── consumers.py │ ├── migrations │ │ ├── 0001_initial.py │ │ ├── 0002_person_uuid.py │ │ └── __init__.py │ ├── models.py │ ├── producers.py │ ├── serializers.py │ └── signals.py ├── settings.py └── urls.py ├── setup.cfg ├── src └── logpipe │ ├── __init__.py │ ├── abc.py │ ├── admin.py │ ├── apps.py │ ├── backend │ ├── __init__.py │ ├── dummy.py │ ├── kafka.py │ └── kinesis.py │ ├── constants.py │ ├── consumer.py │ ├── docgen_setup.py │ ├── exceptions.py │ ├── format.py │ ├── formats │ ├── __init__.py │ ├── json.py │ ├── msgpack.py │ └── pickle.py │ ├── locale │ └── es │ │ └── LC_MESSAGES │ │ ├── django.mo │ │ └── django.po │ ├── management │ ├── __init__.py │ └── commands │ │ ├── __init__.py │ │ └── run_kafka_consumer.py │ ├── migrations │ ├── 0001_initial.py │ ├── 0002_auto_20170427_1451.py │ ├── 0003_auto_20170427_1703.py │ ├── 0004_auto_20170502_1403.py │ ├── 0005_auto_20180917_1348.py │ ├── 0006_alter_kafkaoffset_options_and_more.py │ └── __init__.py │ ├── models.py │ ├── producer.py │ ├── py.typed │ ├── registry.py │ ├── settings.py │ └── tests │ ├── __init__.py │ ├── common.py │ ├── integration │ ├── __init__.py │ └── test_roundtrip.py │ └── unit │ ├── __init__.py │ ├── kafka │ ├── __init__.py │ ├── test_consumer.py │ └── test_producer.py │ ├── kinesis │ ├── __init__.py │ ├── test_consumer.py │ └── test_producer.py │ ├── test_consumer.py │ ├── test_format.py │ ├── test_producer.py │ └── test_settings.py └── tox.ini /.gitignore: -------------------------------------------------------------------------------- 1 | version.txt 2 | public 3 | 4 | # Created by https://www.gitignore.io/api/osx,sublimetext,python,linux,django 5 | 6 | ### OSX ### 7 | *.DS_Store 8 | .AppleDouble 9 | .LSOverride 10 | 11 | # Icon must end with two \r 12 | Icon 13 | 14 | 15 | # Thumbnails 16 | ._* 17 | 18 | # Files that might appear in the root of a volume 19 | .DocumentRevisions-V100 20 | .fseventsd 21 | .Spotlight-V100 22 | .TemporaryItems 23 | .Trashes 24 | .VolumeIcon.icns 25 | .com.apple.timemachine.donotpresent 26 | 27 | # Directories potentially created on remote AFP share 28 | .AppleDB 29 | .AppleDesktop 30 | Network Trash Folder 31 | Temporary Items 32 | .apdisk 33 | 34 | 35 | ### SublimeText ### 36 | # cache files for sublime text 37 | *.tmlanguage.cache 38 | *.tmPreferences.cache 39 | *.stTheme.cache 40 | 41 | # workspace files are user-specific 42 | *.sublime-workspace 43 | 44 | # project files should be checked into the repository, unless a significant 45 | # proportion of contributors will probably not be using SublimeText 46 | # *.sublime-project 47 | 48 | # sftp configuration file 49 | sftp-config.json 50 | 51 | # Package control specific files 52 | Package Control.last-run 53 | Package Control.ca-list 54 | Package Control.ca-bundle 55 | Package Control.system-ca-bundle 56 | Package Control.cache/ 57 | Package Control.ca-certs/ 58 | bh_unicode_properties.cache 59 | 60 | # Sublime-github package stores a github token in this file 61 | # https://packagecontrol.io/packages/sublime-github 62 | GitHub.sublime-settings 63 | 64 | 65 | ### Python ### 66 | # Byte-compiled / optimized / DLL files 67 | __pycache__/ 68 | *.py[cod] 69 | *$py.class 70 | 71 | # C extensions 72 | *.so 73 | 74 | # Distribution / packaging 75 | .Python 76 | env/ 77 | build/ 78 | develop-eggs/ 79 | dist/ 80 | downloads/ 81 | eggs/ 82 | .eggs/ 83 | lib/ 84 | lib64/ 85 | parts/ 86 | sdist/ 87 | var/ 88 | *.egg-info/ 89 | .installed.cfg 90 | *.egg 91 | 92 | # PyInstaller 93 | # Usually these files are written by a python script from a template 94 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 95 | *.manifest 96 | *.spec 97 | 98 | # Installer logs 99 | pip-log.txt 100 | pip-delete-this-directory.txt 101 | 102 | # Unit test / coverage reports 103 | htmlcov/ 104 | .tox/ 105 | .coverage 106 | .coverage.* 107 | .cache 108 | nosetests.xml 109 | coverage.xml 110 | *,cover 111 | .hypothesis/ 112 | 113 | # Translations 114 | *.pot 115 | 116 | # Django stuff: 117 | *.log 118 | local_settings.py 119 | 120 | # Flask stuff: 121 | instance/ 122 | .webassets-cache 123 | 124 | # Scrapy stuff: 125 | .scrapy 126 | 127 | # Sphinx documentation 128 | docs/_build/ 129 | 130 | # PyBuilder 131 | target/ 132 | 133 | # IPython Notebook 134 | .ipynb_checkpoints 135 | 136 | # pyenv 137 | .python-version 138 | 139 | # celery beat schedule file 140 | celerybeat-schedule 141 | 142 | # dotenv 143 | .env 144 | 145 | # virtualenv 146 | venv/ 147 | ENV/ 148 | 149 | # Spyder project settings 150 | .spyderproject 151 | 152 | # Rope project settings 153 | .ropeproject 154 | 155 | 156 | ### Linux ### 157 | *~ 158 | 159 | # temporary files which can be created if a process still has a handle open of a deleted file 160 | .fuse_hidden* 161 | 162 | # KDE directory preferences 163 | .directory 164 | 165 | # Linux trash folder which might appear on any partition or disk 166 | .Trash-* 167 | 168 | 169 | ### Django ### 170 | *.log 171 | *.pot 172 | *.pyc 173 | __pycache__/ 174 | local_settings.py 175 | db.sqlite3 176 | media 177 | -------------------------------------------------------------------------------- /.gitlab-ci.yml: -------------------------------------------------------------------------------- 1 | variables: 2 | ADVERTISED_HOST: 'spotify__kafka' 3 | ADVERTISED_PORT: '9092' 4 | AUTO_CREATE_TOPICS: 'true' 5 | POSTGRES_HOST_AUTH_METHOD: 'trust' 6 | 7 | stages: 8 | - test 9 | - release 10 | 11 | services: 12 | - spotify/kafka@sha256:cf8f8f760b48a07fb99df24fab8201ec8b647634751e842b67103a25a388981b 13 | - postgres:latest@sha256:30a72339ce74f2621f0f82cd983a11ade307ec2e634a7998318e8813a6f6f25c 14 | 15 | cache: 16 | key: "$CI_PROJECT_NAME" 17 | paths: 18 | - $HOME/.cache/pip 19 | 20 | include: 21 | - component: gitlab.com/thelabnyc/thelab-ci-components/precommit@0.4.0 22 | rules: 23 | - if: $CI_PIPELINE_SOURCE == "schedule" 24 | when: never 25 | - if: $CI_COMMIT_BRANCH && $CI_COMMIT_REF_PROTECTED == "true" 26 | - if: $CI_COMMIT_TAG && $CI_COMMIT_REF_PROTECTED == "true" 27 | - if: '$CI_PIPELINE_SOURCE == "merge_request_event"' 28 | - component: gitlab.com/thelabnyc/thelab-ci-components/publish-gitlab-release@0.4.0 29 | - component: gitlab.com/thelabnyc/thelab-ci-components/publish-to-pypi@0.4.0 30 | 31 | test:lib: 32 | stage: test 33 | image: "registry.gitlab.com/thelabnyc/python:${IMAGE}" 34 | rules: 35 | - if: $CI_COMMIT_BRANCH && $CI_COMMIT_REF_PROTECTED == "true" 36 | - if: $CI_COMMIT_TAG && $CI_COMMIT_REF_PROTECTED == "true" 37 | - if: '$CI_PIPELINE_SOURCE == "merge_request_event"' 38 | script: 39 | - pip install tox 40 | - tox 41 | coverage: '/^TOTAL.+?(\d+\%)$/' 42 | parallel: 43 | matrix: 44 | - IMAGE: py311 45 | TOX_SKIP_ENV: "^(?!py311-)" 46 | - IMAGE: py312 47 | TOX_SKIP_ENV: "^(?!py312-)" 48 | - IMAGE: py313 49 | TOX_SKIP_ENV: "^(?!py313-)" 50 | 51 | test:docs: 52 | stage: test 53 | image: "registry.gitlab.com/thelabnyc/python:3.13.737@sha256:407e710f73e88a66ab3ccb868211496f7c85e034fc752464c02d2dc50ba6316d" 54 | rules: 55 | - if: '$CI_PIPELINE_SOURCE == "merge_request_event"' 56 | script: 57 | - poetry install 58 | - make docs 59 | 60 | pages: 61 | stage: release 62 | image: "registry.gitlab.com/thelabnyc/python:3.13.737@sha256:407e710f73e88a66ab3ccb868211496f7c85e034fc752464c02d2dc50ba6316d" 63 | rules: 64 | - if: $CI_PIPELINE_SOURCE == "schedule" 65 | when: never 66 | - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH && $CI_COMMIT_REF_PROTECTED == "true" 67 | needs: 68 | - test:lib 69 | script: 70 | - poetry install 71 | - make docs 72 | artifacts: 73 | paths: 74 | - public 75 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://gitlab.com/thelabnyc/thelab-pre-commit-hooks 3 | rev: v0.0.2 4 | hooks: 5 | - id: update-copyright-year 6 | 7 | - repo: https://github.com/pre-commit/pre-commit-hooks 8 | rev: v5.0.0 9 | hooks: 10 | - id: check-json 11 | - id: check-merge-conflict 12 | - id: check-symlinks 13 | - id: check-toml 14 | - id: check-yaml 15 | args: [--unsafe] 16 | - id: end-of-file-fixer 17 | - id: trailing-whitespace 18 | 19 | - repo: https://github.com/asottile/pyupgrade 20 | rev: v3.20.0 21 | hooks: 22 | - id: pyupgrade 23 | args: [--py311-plus] 24 | 25 | - repo: https://github.com/adamchainz/django-upgrade 26 | rev: "1.25.0" 27 | hooks: 28 | - id: django-upgrade 29 | 30 | - repo: https://github.com/psf/black 31 | rev: "25.1.0" 32 | hooks: 33 | - id: black 34 | 35 | - repo: https://github.com/pycqa/isort 36 | rev: "6.0.1" 37 | hooks: 38 | - id: isort 39 | 40 | - repo: https://github.com/commitizen-tools/commitizen 41 | rev: v4.8.2 42 | hooks: 43 | - id: commitizen 44 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## v1.5.0 (2025-04-03) 2 | 3 | ### Feat 4 | 5 | - support Django 5.2. Drop Django 5.0 6 | 7 | ### Fix 8 | 9 | - **deps**: update dependency pydantic to ^2.11.1 10 | - **deps**: update dependency djangorestframework to >=3.16.0 11 | - **deps**: update dependency pydantic to ^2.11.0 12 | - **deps**: update dependency kafka-python to ^2.1.4 13 | - **deps**: update boto to ^1.37.22 14 | - **deps**: update dependency kafka-python to ^2.1.3 15 | - **deps**: update boto to ^1.37.17 16 | - **deps**: update dependency kafka-python to ^2.1.2 17 | - **deps**: update dependency kafka-python to ^2.1.1 18 | - **deps**: update boto to ^1.37.12 19 | 20 | ### Refactor 21 | 22 | - add pyupgrade / django-upgrade precommit hooks 23 | 24 | ## v1.4.12 (2025-03-10) 25 | 26 | ### Fix 27 | 28 | - fix typo in LogPipeMessageError which swallows messages 29 | - **deps**: update boto to ^1.37.8 30 | - **deps**: update dependency kafka-python to ^2.0.6 31 | - **deps**: update boto 32 | - **deps**: update dependency kafka-python to ^2.0.5 33 | - **deps**: update dependency kafka-python to ^2.0.4 34 | - **deps**: update boto to ^1.36.26 35 | 36 | ## v1.4.11 (2025-02-17) 37 | 38 | ### Fix 39 | 40 | - pydantic Producer PartitionKey when KEY_FIELD is set to an instance of RootModel 41 | - **deps**: update boto to ^1.36.20 42 | - update kafka-python dependency to 2.0.3 43 | 44 | ## v1.4.10 (2025-02-06) 45 | 46 | ### Fix 47 | 48 | - improper handling of Pydantic validation errors 49 | - **deps**: update boto to ^1.36.10 50 | - **deps**: update dependency pydantic to ^2.10.6 51 | - **deps**: update boto to ^1.36.5 52 | 53 | ## v1.4.9 (2025-01-23) 54 | 55 | ### Fix 56 | 57 | - allow the customization of ShardIteratorType through the settings (!224) 58 | 59 | ## v1.4.8 (2025-01-23) 60 | 61 | ### Fix 62 | 63 | - add type checking to sandbox app 64 | - add django 5.1 and Python 3.13 to tests 65 | - incorrect type annotations in DRFSerializer.lookup_instance 66 | - **deps**: update boto to ^1.36.1 67 | 68 | ## v1.4.7 (2025-01-14) 69 | 70 | ### Fix 71 | 72 | - **deps**: update dependency pydantic to ^2.10.5 73 | - **deps**: update boto to ^1.35.96 74 | - **deps**: update boto to ^1.35.91 75 | - **deps**: update boto to ^1.35.88 76 | - **deps**: update dependency pydantic to ^2.10.4 77 | - **deps**: update boto to ^1.35.85 78 | - **deps**: update boto to ^1.35.80 79 | - **deps**: update boto to ^1.35.76 80 | - **deps**: update dependency pydantic to ^2.10.3 81 | - **deps**: update boto to ^1.35.71 82 | - **deps**: update dependency pydantic to ^2.10.2 83 | - **deps**: update dependency pydantic to ^2.10.1 84 | - **deps**: update boto to ^1.35.67 85 | - **deps**: update boto to ^1.35.63 86 | - **deps**: update boto 87 | - **deps**: update boto to ^1.35.53 88 | - **deps**: update boto 89 | - lint 90 | - **deps**: update boto to ^1.35.43 91 | - **deps**: update boto to ^1.35.33 92 | - **deps**: update boto to ^1.35.23 93 | - **deps**: update dependency pydantic to ^2.9.2 94 | - **deps**: update boto to ^1.35.18 95 | - **deps**: update dependency msgpack to ^1.1.0 96 | - **deps**: update dependency pydantic to ^2.9.1 97 | - **deps**: update dependency botocore-stubs to ^1.35.14 98 | - **deps**: update dependency pydantic to ^2.9.0 99 | - **deps**: update dependency boto3-stubs to ^1.35.14 100 | - **deps**: update dependency boto3 to ^1.35.14 101 | - **deps**: update dependency boto3-stubs to ^1.35.13 102 | - **deps**: update dependency boto3 to ^1.35.13 103 | - **deps**: update dependency boto3 to ^1.35.11 104 | - **deps**: update dependency botocore-stubs to ^1.35.10 105 | - **deps**: update dependency boto3-stubs to ^1.35.10 106 | - **deps**: update dependency boto3 to ^1.35.10 107 | - **deps**: update dependency boto3 to ^1.35.9 108 | 109 | ## v1.4.6 (2024-08-31) 110 | 111 | ### Fix 112 | 113 | - **deps**: update dependency boto3 to ^1.35.7 114 | - **deps**: update dependency botocore-stubs to ^1.35.6 115 | - **deps**: update dependency boto3-stubs to ^1.35.6 116 | - **deps**: update dependency boto3 to ^1.35.6 117 | - **deps**: update dependency botocore-stubs to ^1.35.5 118 | - **deps**: update dependency boto3-stubs to ^1.35.5 119 | - **deps**: update dependency boto3 to ^1.35.5 120 | - **deps**: update dependency boto3 to ^1.35.4 121 | - **deps**: update dependency boto3 to ^1.35.2 122 | - **deps**: update dependency boto3-stubs to ^1.35.1 123 | - **deps**: update dependency boto3 to ^1.35.1 124 | - **deps**: update dependency botocore-stubs to ^1.35.0 125 | 126 | ## v1.4.5 (2024-08-20) 127 | 128 | ### Fix 129 | 130 | - AttributeError: type object Serializer has no attribute _tag 131 | - **deps**: update dependency boto3-stubs to ^1.35.0 132 | - **deps**: update dependency boto3 to ^1.35.0 133 | - **deps**: update dependency boto3-stubs to ^1.34.162 134 | - **deps**: update dependency boto3 to ^1.34.162 135 | - **deps**: update dependency boto3-stubs to ^1.34.160 136 | - **deps**: update dependency botocore-stubs to ^1.34.159 137 | - **deps**: update dependency boto3-stubs to ^1.34.159 138 | 139 | ## v1.4.4 (2024-08-14) 140 | 141 | ### Fix 142 | 143 | - kafka-python dep declaration 144 | 145 | ## v1.4.3 (2024-08-14) 146 | 147 | ### Fix 148 | 149 | - tox extras 150 | 151 | ## v1.4.2 (2024-08-14) 152 | 153 | ### Fix 154 | 155 | - fix errant pkg extra definitions 156 | 157 | ## v1.4.1 (2024-08-14) 158 | 159 | ### Fix 160 | 161 | - fix missing dependency on boto3-stubs when using Kinesis 162 | 163 | ## v1.4.1b0 (2024-08-08) 164 | 165 | ### Fix 166 | 167 | - **deps**: update dependency pydantic to ^2.8.2 168 | - **deps**: update dependency pydantic to ^2.8.0 169 | - **deps**: update dependency django to >=5.0.6 170 | - **deps**: update dependency djangorestframework to >=3.15.2 171 | - **deps**: update dependency django to >=4.2.13 172 | - **deps**: update dependency pydantic to ^2.7.4 173 | - **deps**: update dependency lru-dict to >=1.3.0 174 | - **deps**: update dependency djangorestframework to v3.15.2 175 | - **deps**: update dependency pydantic to v2.7.4 176 | - **deps**: update dependency pydantic to v2.7.3 177 | - **deps**: update dependency pydantic to v2.7.2 178 | - **deps**: update dependency djangorestframework to v3.15.1 179 | - **deps**: update dependency django to v5.0.6 180 | 181 | ## v1.4.0 (2024-02-13) 182 | 183 | ## v1.3.0 (2023-06-07) 184 | 185 | ## v1.2.0 (2023-03-15) 186 | 187 | ## v1.1.0 (2021-05-27) 188 | 189 | ## v1.0.0 (2020-02-19) 190 | 191 | ## v0.3.2 (2019-12-12) 192 | 193 | ## v0.3.1 (2019-07-10) 194 | 195 | ## v0.3.0 (2018-11-28) 196 | 197 | ## v0.2.1 (2018-01-09) 198 | 199 | ## v0.2.0 (2017-10-04) 200 | 201 | ## v0.1.0 (2024-08-08) 202 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM registry.gitlab.com/thelabnyc/python:3.13.737@sha256:407e710f73e88a66ab3ccb868211496f7c85e034fc752464c02d2dc50ba6316d 2 | 3 | RUN mkdir /code 4 | WORKDIR /code 5 | 6 | RUN apt-get update && \ 7 | apt-get install -y gettext && \ 8 | rm -rf /var/lib/apt/lists/* 9 | 10 | ADD . /code/ 11 | RUN poetry install 12 | 13 | RUN mkdir /tox 14 | ENV TOX_WORK_DIR='/tox' 15 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | ISC License 2 | 3 | Copyright (c) 2016 - 2025 thelab 4 | 5 | Permission to use, copy, modify, and/or distribute this software for any 6 | purpose with or without fee is hereby granted, provided that the above 7 | copyright notice and this permission notice appear in all copies. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH 10 | REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY 11 | AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, 12 | INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM 13 | LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR 14 | OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 15 | PERFORMANCE OF THIS SOFTWARE. 16 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Create the .po and .mo files used for i18n 2 | .PHONY: translations 3 | translations: 4 | cd src/logpipe && \ 5 | django-admin makemessages -a && \ 6 | django-admin compilemessages 7 | 8 | .PHONY: install_precommit 9 | install_precommit: 10 | pre-commit install 11 | 12 | .PHONY: test_precommit 13 | test_precommit: install_precommit 14 | pre-commit run --all-files 15 | 16 | .PHONY: docs_serve 17 | docs_serve: 18 | DJANGO_SETTINGS_MODULE=logpipe.docgen_setup poetry run mkdocs serve --strict 19 | 20 | .PHONY: docs_build 21 | docs_build: 22 | DJANGO_SETTINGS_MODULE=logpipe.docgen_setup poetry run mkdocs build --strict 23 | 24 | docs: docs_build 25 | rm -rf public/ && \ 26 | mkdir -p public/ && \ 27 | cp -r build/mkdocs/* public/ 28 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # django-logpipe 2 | 3 | [![](https://gitlab.com/thelabnyc/django-logpipe/badges/master/build.svg)](https://gitlab.com/thelabnyc/django-logpipe/commits/master) 4 | [![](https://img.shields.io/pypi/l/django-logpipe.svg)](https://pypi.python.org/pypi/) 5 | [![](https://badge.fury.io/py/django-logpipe.svg)](https://pypi.python.org/pypi/django-logpipe) 6 | [![](https://img.shields.io/pypi/format/django-logpipe.svg)](https://pypi.python.org/pypi/django-logpipe) 7 | 8 | Django-logpipe is a library that serves as a universal pipe for moving data around between Django applications and services. It supports serialization by means of [Django REST Framework][drf] and/or [Pydantic][pydantic], and supports using either [Apache Kafka][kafka]] or [Amazon Kinesis][kinesis] as the underlying data stream. 9 | 10 | [drf]: http://www.django-rest-framework.org/ 11 | [pydantic]: https://docs.pydantic.dev/ 12 | [kafka]: https://kafka.apache.org/ 13 | [kinesis]: https://aws.amazon.com/kinesis/ 14 | 15 | ## Documentation 16 | 17 | See [https://thelabnyc.gitlab.io/django-logpipe/](https://thelabnyc.gitlab.io/django-logpipe/) 18 | 19 | ## Change log 20 | 21 | See [Release Notes](./docs/releases.md) 22 | -------------------------------------------------------------------------------- /bin/publish.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -euxo pipefail 4 | 5 | # Check git status 6 | git fetch --all 7 | CURRENT_BRANCH=$(git branch --show-current) 8 | if [ "$CURRENT_BRANCH" != "master" ]; then 9 | echo "This script must be run only when the master branch is checked out, but the current branch is ${CURRENT_BRANCH}. Abort!" 10 | exit 1 11 | fi 12 | 13 | NUM_BEHIND=$(git log ..origin/master | wc -l | awk '{print $1}') 14 | if [ "$NUM_BEHIND" == "0" ]; then 15 | echo "" 16 | else 17 | echo "Your branch is NOT up to date with origin/master. Abort! Please fetch and rebase first." 18 | exit 1 19 | fi 20 | 21 | # Update version and publish via commitizen 22 | cz bump "$@" 23 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '2' 2 | services: 3 | spotify__kafka: 4 | image: spotify/kafka@sha256:cf8f8f760b48a07fb99df24fab8201ec8b647634751e842b67103a25a388981b 5 | environment: 6 | ADVERTISED_HOST: 'spotify__kafka' 7 | ADVERTISED_PORT: '9092' 8 | AUTO_CREATE_TOPICS: 'true' 9 | 10 | postgres: 11 | image: postgres:latest@sha256:30a72339ce74f2621f0f82cd983a11ade307ec2e634a7998318e8813a6f6f25c 12 | environment: 13 | POSTGRES_HOST_AUTH_METHOD: 'trust' 14 | 15 | test: 16 | build: . 17 | command: python manage.py runserver 0.0.0.0:8000 18 | ports: 19 | - "8000:8000" 20 | depends_on: 21 | - spotify__kafka 22 | - postgres 23 | volumes: 24 | - .:/code 25 | -------------------------------------------------------------------------------- /docs/api.md: -------------------------------------------------------------------------------- 1 | # API Reference 2 | 3 | ## DRFProducer 4 | 5 | ::: logpipe.producer.DRFProducer 6 | :docstring: 7 | :members: 8 | 9 | ## PydanticProducer 10 | 11 | ::: logpipe.producer.PydanticProducer 12 | :docstring: 13 | :members: 14 | 15 | ## Consumer 16 | 17 | ::: logpipe.consumer.Consumer 18 | :docstring: 19 | :members: 20 | 21 | ## MultiConsumer 22 | 23 | ::: logpipe.consumer.MultiConsumer 24 | :docstring: 25 | :members: 26 | 27 | ## register_consumer 28 | 29 | ::: logpipe.registry.register_consumer 30 | :docstring: 31 | :members: 32 | -------------------------------------------------------------------------------- /docs/custom-styles.css: -------------------------------------------------------------------------------- 1 | /* Styles for https://github.com/tomchristie/mkautodoc */ 2 | div.autodoc-docstring { 3 | padding-left: 20px; 4 | margin-bottom: 30px; 5 | border-left: 5px solid rgba(230, 230, 230); 6 | } 7 | 8 | div.autodoc-members { 9 | padding-left: 20px; 10 | margin-bottom: 15px; 11 | } 12 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # Welcome 2 | 3 | [![](https://gitlab.com/thelabnyc/django-logpipe/badges/master/build.svg)](https://gitlab.com/thelabnyc/django-logpipe/commits/master) 4 | [![](https://img.shields.io/pypi/l/django-logpipe.svg)](https://pypi.python.org/pypi/) 5 | [![](https://badge.fury.io/py/django-logpipe.svg)](https://pypi.python.org/pypi/django-logpipe) 6 | [![](https://img.shields.io/pypi/format/django-logpipe.svg)](https://pypi.python.org/pypi/django-logpipe) 7 | 8 | Django-logpipe is a library that serves as a universal pipe for moving data around between Django applications and services. It supports serialization by means of [Django REST Framework][drf] and/or [Pydantic][pydantic], and supports using either [Apache Kafka][kafka]] or [Amazon Kinesis][kinesis] as the underlying data stream. 9 | 10 | [drf]: http://www.django-rest-framework.org/ 11 | [pydantic]: https://docs.pydantic.dev/ 12 | [kafka]: https://kafka.apache.org/ 13 | [kinesis]: https://aws.amazon.com/kinesis/ 14 | -------------------------------------------------------------------------------- /docs/installation.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | 3 | Install `django-logpipe` from pip. 4 | 5 | ```sh 6 | pip install django-logpipe 7 | ``` 8 | 9 | Add `logpipe` to your installed apps. 10 | 11 | ```py 12 | INSTALLED_APPS = [ 13 | # … 14 | 'logpipe', 15 | # … 16 | ] 17 | ``` 18 | 19 | Add connection settings to your `settings.py` file. If you're using Kafka, this will look like this: 20 | 21 | ```py 22 | LOGPIPE = { 23 | # Required Settings 24 | 'OFFSET_BACKEND': 'logpipe.backend.kafka.ModelOffsetStore', 25 | 'CONSUMER_BACKEND': 'logpipe.backend.kafka.Consumer', 26 | 'PRODUCER_BACKEND': 'logpipe.backend.kafka.Producer', 27 | 'KAFKA_BOOTSTRAP_SERVERS': [ 28 | 'kafka:9092' 29 | ], 30 | 'KAFKA_CONSUMER_KWARGS': { 31 | 'group_id': 'django-logpipe', 32 | }, 33 | 34 | # Optional Settings 35 | 'KAFKA_SEND_TIMEOUT': 10, 36 | 'KAFKA_MAX_SEND_RETRIES': 0, 37 | 'KAFKA_KWARGS': { 38 | # Example for Confluent Cloud 39 | 'security_protocol': 'SASL_SSL', 40 | 'sasl_mechanism': 'PLAIN', 41 | 'sasl_plain_username': '', 42 | 'sasl_plain_password': '', 43 | # …or for OVHCloud 44 | 'security_protocol': 'SSL', 45 | 'ssl_cafile': '', 46 | 'ssl_certfile': '', 47 | 'ssl_keyfile': '', 48 | }, 49 | 'MIN_MESSAGE_LAG_MS': 0, 50 | 'DEFAULT_FORMAT': 'json', 51 | 'PRODUCER_ID': 'my-application-name', 52 | } 53 | ``` 54 | 55 | If you're using AWS Kinesis instead of Kafka, it will look like this: 56 | 57 | ```py 58 | LOGPIPE = { 59 | # Required Settings 60 | 'OFFSET_BACKEND': 'logpipe.backend.kinesis.ModelOffsetStore', 61 | 'CONSUMER_BACKEND': 'logpipe.backend.kinesis.Consumer', 62 | 'PRODUCER_BACKEND': 'logpipe.backend.kinesis.Producer', 63 | 64 | # Optional Settings 65 | # 'KINESIS_REGION': 'us-east-1', 66 | # 'KINESIS_FETCH_LIMIT': 25, 67 | # 'KINESIS_SEQ_NUM_CACHE_SIZE': 1000, 68 | # 'MIN_MESSAGE_LAG_MS': 0, 69 | # 'DEFAULT_FORMAT': 'json', 70 | # 'PRODUCER_ID': 'my-application-name', 71 | # 'KINESIS_SHARD_ITERATOR_TYPE': "LATEST" | "TRIM_HORIZON" (default) 72 | } 73 | ``` 74 | 75 | Run migrations. This will create the model used to store Kafka log position offsets. 76 | 77 | ```sh 78 | python manage.py migrate logpipe 79 | ``` 80 | -------------------------------------------------------------------------------- /docs/releases.md: -------------------------------------------------------------------------------- 1 | # Release Notes 2 | 3 | ## 1.4.0 4 | 5 | - Drop support for Python 3.10. 6 | - Add support for Python 3.12. 7 | - Add Python type annotations. 8 | - Add support for using [Pydantic](https://docs.pydantic.dev/) models as an alternative to DRF serializers. 9 | 10 | ## 1.3.0 11 | 12 | - Add PRODUCER_ID setting to aid in debugging which systems sent which messages, especially when interrogating logged messages. 13 | 14 | ## 1.2.0 15 | 16 | - Add Python 3.10 and 3.11 to test suite. 17 | - Add Django 4.0 and 4.1 to test suite. 18 | - Drop Python 3.8 from test suite. 19 | - Drop Django 2.2, 3.0, and 3.1 from test suite. 20 | - Added missing DB migrations (though no actual DB changes exist). 21 | 22 | ## 1.1.0 23 | 24 | - Add Python 3.9 to test suite 25 | - Add Django 3.2 to test suite 26 | 27 | ## 1.0.0 28 | 29 | - No changes. 30 | 31 | ## 0.3.2 32 | 33 | - Fix compatibility issue with Django 3.0 34 | 35 | ## 0.3.1 36 | 37 | - Internationalization 38 | 39 | ## 0.3.0 40 | 41 | - In KinesisOffset model, track the AWS region for a stream. This allows a single database to subscribe to multiple streams in different regions, even it they have the same name. 42 | - Improved logic for detecting the current AWS region. 43 | - Add Django 2.1 to tox test suite. 44 | - Add support for Python 3.7. 45 | - Add support for python-kafka 1.4.4. 46 | 47 | ## 0.2.1 48 | 49 | - More robustly handle exceptions thrown by a consumer serializer's `save()` method. 50 | - Improve log messages and levels for invalid or unknown messages. 51 | - Add new method: `logpipe.Consumer.add_ignored_message_type`, which allows the consumer to explicitly ignore specific message types silently. This helps to filter log noise (messages that a consumer really doesn't care about) from actual errors (messages a consumer is skipping, but should be processing). 52 | 53 | ## 0.2.0 54 | 55 | - Added concept of message types. 56 | - Added support for AWS Kinesis. 57 | 58 | ## 0.1.0 59 | 60 | - Initial release. 61 | -------------------------------------------------------------------------------- /docs/usage.md: -------------------------------------------------------------------------------- 1 | # Usage Guide 2 | 3 | Usage of django-logpipe differs slightly based on if you choose to use [django-rest-framework (DRF)](https://www.django-rest-framework.org/) serializers or [Pydandic](https://docs.pydantic.dev) serializers. Continue reading to see how to handle each case. 4 | 5 | ## Serializers 6 | 7 | ### DRF Serializers 8 | 9 | The first step in either sending or receiving messages with `logpipe` is to define a serializer. Serializers for `logpipe` have a few rules: 10 | 11 | 1. Must be either a subclass of `rest_framework.serializers.Serializer` or a class implementing an interface that mimics `rest_framework.serializers.Serializer`. 12 | 1. Must have a `MESSAGE_TYPE` attribute defined on the class. The value should be a string that defines uniquely defines the data-type within it's Topic / Stream. 13 | 2. Must have a `VERSION` attribute defined on the class. The value should be a monotonic integer representing the schema version number. 14 | 3. Must have a `KEY_FIELD` attribute defined on the class, representing the name of the field to use as the message key. The message key is used by Kafka when performing log compaction and by Kinesis as the shard partition key. The property can be omitted for topics which do not require a key. 15 | 4. If the serializer will be used for incoming-messages, it should implement class method `lookup_instance(cls, **kwargs)`. This class method will be called with message data as keyword arguments directly before instantiating the serializer. It should lookup and return the related object (if one exists) so that it can be passed to the serializer's `instance` argument during initialization. If no object exists yet (the message is representing a new object), it should return `None`. 16 | 17 | Below is a sample Django model and it's accompanying serializer. 18 | 19 | ```py title="myapp/models.py" 20 | from django.db import models 21 | from rest_framework import serializers 22 | import uuid 23 | 24 | 25 | class Person(models.Model): 26 | uuid = models.UUIDField(default=uuid.uuid4, unique=True) 27 | first_name = models.CharField(max_length=200) 28 | last_name = models.CharField(max_length=200) 29 | 30 | 31 | class PersonSerializer(serializers.ModelSerializer): 32 | MESSAGE_TYPE = 'person' 33 | VERSION = 1 34 | KEY_FIELD = 'uuid' 35 | 36 | class Meta: 37 | model = Person 38 | fields = ['uuid', 'first_name', 'last_name'] 39 | 40 | @classmethod 41 | def lookup_instance(cls, uuid, **kwargs): 42 | try: 43 | return Person.objects.get(uuid=uuid) 44 | except models.Person.DoesNotExist: 45 | pass 46 | ``` 47 | 48 | ### Pydantic Serializers 49 | 50 | As an alternative to using DRF serializers (as described above), you may also use Pydantic models. The same `MESSAGE_TYPE`, `VERSION`, `KEY_FIELD` must be defined as `ClassVar`s on the model class. 51 | 52 | ```py title="myapp/models.py" 53 | from typing import ClassVar 54 | from django.db import models 55 | from logpipe.abc import PydanticModel 56 | import uuid 57 | 58 | 59 | class Person(models.Model): 60 | uuid = models.UUIDField(default=uuid.uuid4, unique=True) 61 | first_name = models.CharField(max_length=200) 62 | last_name = models.CharField(max_length=200) 63 | 64 | 65 | class PersonSchema(PydanticModel): 66 | MESSAGE_TYPE: ClassVar[str] = 'person' 67 | VERSION: ClassVar[int] = 1 68 | KEY_FIELD: ClassVar[str] = 'uuid' 69 | 70 | uuid: uuid.UUID 71 | first_name: str 72 | last_name: str 73 | 74 | def save(self) -> Person: 75 | """ 76 | The save method is called when a `person` message is consumed from the 77 | data stream. 78 | """ 79 | try: 80 | person = Person.objects.get(uuid=self.uuid) 81 | except Person.DoesNotExist: 82 | person = Person() 83 | person.first_name = self.first_name 84 | person.last_name = self.last_name 85 | person.save() 86 | return person 87 | ``` 88 | 89 | ## Sending Messages 90 | 91 | ### DRF Producer 92 | 93 | Once a serializer exists, you can send a message to Kafka by creating Producer object and calling the `send` method. 94 | 95 | ```py 96 | from logpipe import DRFProducer 97 | from .models import Person, PersonSerializer 98 | 99 | joe = Person.objects.create( 100 | first_name='Joe', 101 | last_name='Schmoe', 102 | ) 103 | producer = DRFProducer('people', PersonSerializer) 104 | producer.send(joe) 105 | ``` 106 | 107 | The above sample code would result in the following message being sent to the Kafka topic named `people`. 108 | 109 | ```txt 110 | json:{"type":"person","version":1,"producer":"my-application-name","message":{"first_name":"Joe","last_name":"Schmoe","uuid":"xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx"}} 111 | ``` 112 | 113 | ### Pydantic Producer 114 | 115 | If using a Pydantic model instead of a DRF serializer, use the `PydanticProducer` class instead of `DRFProducer`. 116 | 117 | ```py 118 | from logpipe import PydanticProducer 119 | from .models import PersonSchema 120 | import uuid 121 | 122 | joe = PersonSchema( 123 | uuid=uuid.uuid4(), 124 | first_name='Joe', 125 | last_name='Schmoe', 126 | ) 127 | producer = PydanticProducer('people') 128 | producer.send(joe) 129 | ``` 130 | 131 | The above sample code would result in the following message being sent to the Kafka topic named `people`. 132 | 133 | ```txt 134 | json:{"type":"person","version":1,"producer":"my-application-name","message":{"first_name":"Joe","last_name":"Schmoe","uuid":"xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx"}} 135 | ``` 136 | 137 | ## Receiving Messages 138 | 139 | To processing incoming messages, we can reuse the same model and serializer. We just need to instantiate a Consumer object. Unlike Producers, there's not separate Consumer classes for DRF vs. Pydantic serializers. Either type of serializer can be passed into the `Consumer.register` method. 140 | 141 | ```py 142 | from logpipe import Consumer 143 | from .models import PersonSerializer, PersonSchema 144 | 145 | # Watch for messages, but timeout after 1000ms of no messages 146 | consumer = Consumer('people', consumer_timeout_ms=1000) 147 | consumer.register(PersonSerializer) 148 | consumer.run() 149 | 150 | # Watch for messages and block forever 151 | consumer = Consumer('people') 152 | consumer.register(PersonSerializer) 153 | consumer.run() 154 | 155 | # Pydantic serializers work here too. 156 | consumer = Consumer('people') 157 | consumer.register(PersonSchema) 158 | consumer.run() 159 | ``` 160 | 161 | The consumer object uses Django REST Framework's built-in `save`, `create`, and `update` methods to apply the message. If your messages aren't tied directly to a Django model, skip defining the `lookup_instance` class method and override the `save` method to house your custom import logic. 162 | 163 | ### Consuming Multiple Data-Types Per Topic 164 | 165 | If you have multiple data-types in a single topic or stream, you can consume them all by registering multiple serializers with the consumer. 166 | 167 | ```py 168 | from logpipe import Consumer 169 | from .models import PersonSerializer, PlaceSerializer, ThingSerializer 170 | 171 | consumer = Consumer('nouns') 172 | consumer.register(PersonSerializer) 173 | consumer.register(PlaceSerializer) 174 | consumer.register(ThingSerializer) 175 | consumer.run() 176 | ``` 177 | 178 | You can also support multiple incompatible version of message types by defining a serializer for each message type version and registering them all with the consumer. 179 | 180 | ```py 181 | from logpipe import Consumer 182 | from .models import ( 183 | PersonSerializerVersion1, 184 | PersonSerializerVersion2, 185 | PlaceSerializer, 186 | ThingSerializer, 187 | ) 188 | 189 | consumer = Consumer('nouns') 190 | consumer.register(PersonSerializerVersion1) 191 | consumer.register(PersonSerializerVersion2) 192 | consumer.register(PlaceSerializer) 193 | consumer.register(ThingSerializer) 194 | consumer.run() 195 | ``` 196 | 197 | ### Consuming from Multiple Topics 198 | 199 | If you have multiple streams or topics to watch, make a consumers for each, and watch them all simultaneously in the same process by using a MultiConsumer. 200 | 201 | ```py 202 | from logpipe import MultiConsumer, Consumer 203 | from .models import ( 204 | PersonSerializer, 205 | PlaceSerializer, 206 | ) 207 | 208 | people_consumer = Consumer('people') 209 | people_consumer.register(PersonSerializer) 210 | 211 | places_consumer = Consumer('places') 212 | places_consumer.register(PlaceSerializer) 213 | 214 | multi = MultiConsumer(people_consumer, places_consumer) 215 | 216 | # Watch for 'people' and 'places' topics indefinitely 217 | multi.run() 218 | ``` 219 | 220 | ### Management Commands 221 | 222 | Finally, consumers can be registered and run automatically by the build in `run_kafka_consumer` management command. 223 | 224 | ```py 225 | # myapp/apps.py 226 | from django.apps import AppConfig 227 | from logpipe import Consumer, register_consumer 228 | 229 | class MyAppConfig(AppConfig): 230 | name = 'myapp' 231 | 232 | # Register consumers with logpipe 233 | @register_consumer 234 | def build_person_consumer(): 235 | consumer = Consumer('people') 236 | consumer.register(PersonSerializer) 237 | return consumer 238 | ``` 239 | 240 | Use the `register_consumer` decorator to register as many consumers and topics as you need to work with. Then, run the `run_kafka_consumer` command to process messages for all consumers automatically in a round-robin fashion. 241 | 242 | ```py 243 | python manage.py run_kafka_consumer 244 | ``` 245 | 246 | 247 | ## Dealing with Schema Changes 248 | 249 | Schema changes are handled using the `VERSION` attribute required on every serializer class. When sending, a producer includes the schema version number in the message data. Then, when a consumer receives a message, it looks for a register serializer with a matching version number. If no serializer is found with a matching version number, a `logpipe.exceptions.UnknownMessageVersionError` exception is raised. 250 | 251 | To perform a backwards-incompatible schema change, the following steps should be performed. 252 | 253 | 1. Update consumer code to have knowledge of the new schema version. 254 | 2. Update producer code to being sending the new schema version. 255 | 3. After some amount of time (when you are sure no old-version messages still exist in Kafka), remove the code related to the old schema version. 256 | 257 | For example, if we wanted to require an email field on the `Person` model we defined above, the first step would be to update consumers to know about the new field. 258 | 259 | ```py 260 | class Person(models.Model): 261 | uuid = models.UUIDField(default=uuid.uuid4, unique=True) 262 | first_name = models.CharField(max_length=200) 263 | last_name = models.CharField(max_length=200) 264 | email = models.EmailField(max_length=200, null=True) 265 | 266 | 267 | class PersonSerializerV1(serializers.ModelSerializer): 268 | MESSAGE_TYPE = 'person' 269 | VERSION = 1 270 | KEY_FIELD = 'uuid' 271 | 272 | class Meta: 273 | model = Person 274 | fields = ['uuid', 'first_name', 'last_name'] 275 | 276 | 277 | class PersonSerializerV2(PersonSerializerV1): 278 | MESSAGE_TYPE = 'person' 279 | VERSION = 2 280 | 281 | class Meta(PersonSerializerV1.META): 282 | fields = ['uuid', 'first_name', 'last_name', 'email'] 283 | 284 | 285 | consumer = Consumer('people', consumer_timeout_ms=1000) 286 | consumer.register(PersonSerializerV1) 287 | consumer.register(PersonSerializerV2) 288 | ``` 289 | 290 | The consumers will now use the appropriate serializer for the message version. Second, we need to update producer code to being using schema version 2. 291 | 292 | ```py 293 | producer = Producer('people', PersonSerializerV2) 294 | ``` 295 | 296 | Finally, after all the old version 1 messages have been dropped (by log compaction), the `PersonSerializerV1` class can be removed form the code base. 297 | -------------------------------------------------------------------------------- /manage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | import sys 4 | 5 | 6 | def main(): 7 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "sandbox.settings") 8 | 9 | from django.core.management import execute_from_command_line 10 | 11 | execute_from_command_line(sys.argv) 12 | 13 | 14 | if __name__ == "__main__": 15 | main() 16 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | --- 2 | site_name: django-logpipe 3 | 4 | theme: 5 | name: material 6 | icon: 7 | repo: fontawesome/brands/git-alt 8 | features: 9 | - search.suggest 10 | - search.highlight 11 | - navigation.footer 12 | 13 | repo_name: django-logpipe 14 | repo_url: https://gitlab.com/thelabnyc/django-logpipe 15 | edit_uri: -/edit/master/docs/ 16 | 17 | docs_dir: docs/ 18 | site_dir: build/mkdocs/ 19 | 20 | extra_css: 21 | - custom-styles.css 22 | 23 | markdown_extensions: 24 | - toc: 25 | permalink: True 26 | # - extra 27 | - abbr 28 | # - attr_list 29 | - def_list 30 | # - fenced_code 31 | - footnotes 32 | - md_in_html 33 | # - tables 34 | - codehilite 35 | - smarty 36 | - admonition 37 | - pymdownx.superfences: 38 | custom_fences: 39 | - name: mermaid 40 | class: mermaid 41 | format: !!python/name:pymdownx.superfences.fence_div_format 42 | - pymdownx.arithmatex: 43 | generic: true 44 | - pymdownx.tasklist: 45 | custom_checkbox: true 46 | clickable_checkbox: true 47 | - mkautodoc 48 | 49 | use_directory_urls: false 50 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ "poetry-core>=2.1.2",] 3 | build-backend = "poetry.core.masonry.api" 4 | 5 | [tool.poetry] 6 | name = "django-logpipe" 7 | version = "1.5.0" 8 | description = "Move data around between Python services using Kafka and/or AWS Kinesis and Django Rest Framework serializers." 9 | authors = ["thelab "] 10 | readme = "README.md" 11 | homepage = "https://gitlab.com/thelabnyc/django-logpipe" 12 | repository = "https://gitlab.com/thelabnyc/django-logpipe" 13 | license = "ISC" 14 | 15 | [[tool.poetry.packages]] 16 | include = "logpipe" 17 | from = "src" 18 | 19 | [tool.poetry.dependencies] 20 | python = "^3.11" 21 | Django = ">=4.2" 22 | djangorestframework = ">=3.16.0" 23 | lru-dict = ">=1.3.0" 24 | pydantic = "^2.11.3" 25 | kafka-python = {version = "^2.1.5", optional = true} 26 | boto3 = {version = "^1.37.36", optional = true} 27 | msgpack = {version = "^1.1.0", optional = true} 28 | 29 | [tool.poetry.extras] 30 | kafka = ["kafka-python"] 31 | kinesis = ["boto3", "boto3-stubs", "botocore-stubs"] 32 | msgpack = ["msgpack"] 33 | 34 | [tool.poetry.group.dev.dependencies] 35 | coverage = "7.8.2" 36 | flake8 = "7.2.0" 37 | moto = "5.1.5" 38 | boto3 = "^1.37.36" 39 | boto3-stubs = {version = "^1.37.36", extras = ["kinesis"]} 40 | botocore-stubs = {version = "^1.37.36"} 41 | psycopg2-binary = "2.9.10" 42 | tox = "4.26.0" 43 | pytz = "==2025.2" 44 | kafka-python = {version = "2.2.11", optional = true} 45 | mypy = "1.16.0" 46 | django-stubs = {version = "5.2.0"} 47 | djangorestframework-stubs = "3.16.0" 48 | msgpack-types = "0.5.0" 49 | 50 | [tool.poetry.group.docs.dependencies] 51 | mkdocs = "^1.6.1" 52 | pymdown-extensions = "^10.14.3" 53 | mkdocs-material = "^9.6.12" 54 | mkautodoc = "^0.2.0" 55 | 56 | [tool.mypy] 57 | python_version = "3.11" 58 | plugins = ["mypy_django_plugin.main", "mypy_drf_plugin.main"] 59 | 60 | # Strict mode, see mypy --help 61 | warn_unused_configs = true 62 | disallow_subclassing_any = true 63 | disallow_any_generics = true 64 | disallow_untyped_calls = true 65 | disallow_untyped_defs = true 66 | disallow_incomplete_defs = true 67 | check_untyped_defs = true 68 | disallow_untyped_decorators = true 69 | no_implicit_optional = true 70 | warn_redundant_casts = true 71 | warn_unused_ignores = true 72 | # warn_return_any = true 73 | no_implicit_reexport = true 74 | show_error_codes = true 75 | # Not turned on by strict 76 | strict_equality = true 77 | 78 | [[tool.mypy.overrides]] 79 | module = "kafka.*" 80 | ignore_missing_imports = true 81 | 82 | [[tool.mypy.overrides]] 83 | module = "logpipe.tests.*" 84 | ignore_errors = true 85 | 86 | [tool.django-stubs] 87 | django_settings_module = "sandbox.settings" 88 | 89 | [tool.isort] 90 | profile = "black" 91 | from_first = true 92 | 93 | [tool.coverage.run] 94 | branch = true 95 | source_pkgs = ["logpipe"] 96 | omit = [ 97 | "*/migrations/*", 98 | "*/snapshots/*", 99 | "*/tests/*", 100 | ] 101 | 102 | [tool.coverage.report] 103 | show_missing = true 104 | ignore_errors = true 105 | 106 | [tool.commitizen] 107 | name = "cz_conventional_commits" 108 | annotated_tag = true 109 | gpg_sign = true 110 | tag_format = "v$version" 111 | update_changelog_on_bump = true 112 | changelog_merge_prerelease = true 113 | version_provider = "poetry" 114 | version_scheme = "pep440" 115 | version_files = [ 116 | "pyproject.toml:version", 117 | ] 118 | pre_bump_hooks = [ 119 | "pre-commit run --all-files || true", 120 | ] 121 | post_bump_hooks = [ 122 | "git push origin master $CZ_POST_CURRENT_TAG_VERSION" 123 | ] 124 | -------------------------------------------------------------------------------- /renovate.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://docs.renovatebot.com/renovate-schema.json", 3 | "extends": ["gitlab>thelabnyc/renovate-config:library"] 4 | } 5 | -------------------------------------------------------------------------------- /sandbox/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thelabnyc/django-logpipe/4f4f017e400d9ccbd2148d3e735e0269df692748/sandbox/__init__.py -------------------------------------------------------------------------------- /sandbox/lptester/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thelabnyc/django-logpipe/4f4f017e400d9ccbd2148d3e735e0269df692748/sandbox/lptester/__init__.py -------------------------------------------------------------------------------- /sandbox/lptester/admin.py: -------------------------------------------------------------------------------- 1 | from django.contrib import admin 2 | 3 | from . import models 4 | 5 | 6 | @admin.register(models.Person) 7 | class PersonAdmin(admin.ModelAdmin[models.Person]): 8 | fields = ["uuid", "first_name", "last_name"] 9 | readonly_fields = ["uuid"] 10 | list_display = ["uuid", "first_name", "last_name"] 11 | -------------------------------------------------------------------------------- /sandbox/lptester/apps.py: -------------------------------------------------------------------------------- 1 | from django.apps import AppConfig 2 | 3 | 4 | class LPTesterConfig(AppConfig): 5 | name = "sandbox.lptester" 6 | label = "lptester" 7 | default = True 8 | 9 | def ready(self) -> None: 10 | from . import consumers, producers # NOQA 11 | -------------------------------------------------------------------------------- /sandbox/lptester/constants.py: -------------------------------------------------------------------------------- 1 | TOPIC_PEOPLE = "people" 2 | -------------------------------------------------------------------------------- /sandbox/lptester/consumers.py: -------------------------------------------------------------------------------- 1 | from logpipe import Consumer, register_consumer 2 | 3 | from . import constants, serializers 4 | 5 | 6 | @register_consumer 7 | def build_person_consumer() -> Consumer: 8 | consumer = Consumer(constants.TOPIC_PEOPLE) 9 | consumer.register(serializers.PersonSerializer) 10 | return consumer 11 | -------------------------------------------------------------------------------- /sandbox/lptester/migrations/0001_initial.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 1.9.6 on 2016-07-04 14:29 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | initial = True 8 | 9 | dependencies = [] 10 | 11 | operations = [ 12 | migrations.CreateModel( 13 | name="Person", 14 | fields=[ 15 | ( 16 | "id", 17 | models.AutoField( 18 | auto_created=True, 19 | primary_key=True, 20 | serialize=False, 21 | verbose_name="ID", 22 | ), 23 | ), 24 | ("first_name", models.CharField(max_length=200)), 25 | ("last_name", models.CharField(max_length=200)), 26 | ], 27 | ), 28 | ] 29 | -------------------------------------------------------------------------------- /sandbox/lptester/migrations/0002_person_uuid.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 1.9.6 on 2016-07-04 15:27 2 | 3 | import uuid 4 | 5 | from django.db import migrations, models 6 | 7 | 8 | class Migration(migrations.Migration): 9 | dependencies = [ 10 | ("lptester", "0001_initial"), 11 | ] 12 | 13 | operations = [ 14 | migrations.AddField( 15 | model_name="person", 16 | name="uuid", 17 | field=models.UUIDField(default=uuid.uuid4, unique=True), 18 | ), 19 | ] 20 | -------------------------------------------------------------------------------- /sandbox/lptester/migrations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thelabnyc/django-logpipe/4f4f017e400d9ccbd2148d3e735e0269df692748/sandbox/lptester/migrations/__init__.py -------------------------------------------------------------------------------- /sandbox/lptester/models.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | import uuid 3 | 4 | from django.db import models 5 | 6 | from .signals import person_altered 7 | 8 | 9 | class Person(models.Model): 10 | uuid = models.UUIDField(default=uuid.uuid4, unique=True) 11 | first_name = models.CharField(max_length=200) 12 | last_name = models.CharField(max_length=200) 13 | 14 | _disable_kafka_signals = False 15 | 16 | def save(self, *args: Any, **kwargs: Any) -> None: 17 | ret = super().save(*args, **kwargs) 18 | if not self._disable_kafka_signals: 19 | person_altered.send(sender=self.__class__, person=self) 20 | return ret 21 | -------------------------------------------------------------------------------- /sandbox/lptester/producers.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | from django.dispatch import receiver 4 | 5 | from logpipe import Producer 6 | 7 | from . import constants, models, serializers, signals 8 | 9 | 10 | @receiver( 11 | signals.person_altered, 12 | sender=models.Person, 13 | dispatch_uid="send_person_altered_message", 14 | ) 15 | def send_person_altered_message( 16 | sender: type[models.Person], 17 | person: models.Person, 18 | **kwargs: Any, 19 | ) -> None: 20 | producer = Producer(constants.TOPIC_PEOPLE, serializers.PersonSerializer) 21 | producer.send(person) 22 | -------------------------------------------------------------------------------- /sandbox/lptester/serializers.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | from rest_framework import serializers 4 | 5 | from logpipe.abc import DRFSerializer 6 | 7 | from . import models 8 | 9 | 10 | class PersonSerializer( 11 | serializers.ModelSerializer[models.Person], 12 | DRFSerializer[models.Person], 13 | ): 14 | VERSION = 1 15 | KEY_FIELD = "uuid" 16 | 17 | class Meta: 18 | model = models.Person 19 | fields = ["uuid", "first_name", "last_name"] 20 | 21 | @classmethod 22 | def lookup_instance(cls, **kwargs: Any) -> models.Person | None: 23 | uuid = kwargs.get("uuid") 24 | if uuid is None: 25 | return None 26 | try: 27 | person = models.Person.objects.get(uuid=uuid) 28 | person._disable_kafka_signals = True 29 | return person 30 | except models.Person.DoesNotExist: 31 | pass 32 | return None 33 | -------------------------------------------------------------------------------- /sandbox/lptester/signals.py: -------------------------------------------------------------------------------- 1 | import django.dispatch 2 | 3 | person_altered = django.dispatch.Signal() 4 | -------------------------------------------------------------------------------- /sandbox/settings.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from django.utils.translation import gettext_lazy as _ 4 | import django_stubs_ext 5 | 6 | django_stubs_ext.monkeypatch() 7 | 8 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 9 | 10 | DEBUG = True 11 | SECRET_KEY = "li0$-gnv)76g$yf7p@(cg-^_q7j6df5cx$o-gsef5hd68phj!4" 12 | SITE_ID = 1 13 | ROOT_URLCONF = "sandbox.urls" 14 | ALLOWED_HOSTS = ["*"] 15 | 16 | USE_I18N = True 17 | LANGUAGE_CODE = "en-us" 18 | LANGUAGES = ( 19 | ("en-us", _("English")), 20 | ("es", _("Spanish")), 21 | ) 22 | 23 | INSTALLED_APPS = [ 24 | "django.contrib.admin", 25 | "django.contrib.auth", 26 | "django.contrib.contenttypes", 27 | "django.contrib.sessions", 28 | "django.contrib.sites", 29 | "django.contrib.messages", 30 | "django.contrib.staticfiles", 31 | "django.contrib.flatpages", 32 | "logpipe", 33 | "sandbox.lptester", 34 | ] 35 | 36 | MIDDLEWARE = ( 37 | "django.contrib.sessions.middleware.SessionMiddleware", 38 | "django.middleware.locale.LocaleMiddleware", 39 | "django.middleware.common.CommonMiddleware", 40 | "django.middleware.csrf.CsrfViewMiddleware", 41 | "django.contrib.auth.middleware.AuthenticationMiddleware", 42 | "django.contrib.messages.middleware.MessageMiddleware", 43 | "django.middleware.clickjacking.XFrameOptionsMiddleware", 44 | "django.middleware.security.SecurityMiddleware", 45 | "django.contrib.flatpages.middleware.FlatpageFallbackMiddleware", 46 | ) 47 | 48 | AUTHENTICATION_BACKENDS = ("django.contrib.auth.backends.ModelBackend",) 49 | 50 | TEMPLATES = [ 51 | { 52 | "BACKEND": "django.template.backends.django.DjangoTemplates", 53 | "DIRS": [], 54 | "APP_DIRS": True, 55 | "OPTIONS": { 56 | "context_processors": [ 57 | "django.template.context_processors.debug", 58 | "django.template.context_processors.request", 59 | "django.contrib.auth.context_processors.auth", 60 | "django.contrib.messages.context_processors.messages", 61 | "django.template.context_processors.i18n", 62 | ], 63 | }, 64 | }, 65 | ] 66 | 67 | DEFAULT_AUTO_FIELD = "django.db.models.AutoField" 68 | DATABASES = { 69 | "default": { 70 | "ENGINE": "django.db.backends.postgresql", 71 | "NAME": "postgres", 72 | "USER": "postgres", 73 | "PASSWORD": "", 74 | "HOST": "postgres", 75 | "PORT": 5432, 76 | } 77 | } 78 | 79 | 80 | STATIC_URL = "/static/" 81 | 82 | 83 | LOGPIPE = { 84 | "KAFKA_BOOTSTRAP_SERVERS": ["spotify__kafka:9092"], 85 | "KAFKA_CONSUMER_KWARGS": { 86 | "group_id": "django-logpipe", 87 | }, 88 | # OFFSET_BACKEND: Defaults to logpipe.backend.kafka.ModelOffsetStore. 89 | # CONSUMER_BACKEND: Defaults to logpipe.backend.kafka.Consumer. 90 | # PRODUCER_BACKEND: Defaults to logpipe.backend.kafka.Producer. 91 | # KAFKA_BOOTSTRAP_SERVERS: List of Kafka hostname:post pairs. Required when using Kafka. 92 | # KAFKA_SEND_TIMEOUT: Defaults to 10 seconds. 93 | # KAFKA_MAX_SEND_RETRIES: Defaults to 0 retry attempts. 94 | # KINESIS_REGION: Defaults to 'us-east-1'. 95 | # KINESIS_FETCH_LIMIT: Defaults to 25 records. 96 | # KINESIS_SEQ_NUM_CACHE_SIZE: Defaults to 1000. 97 | # MIN_MESSAGE_LAG_MS: Defaults to 0ms 98 | # DEFAULT_FORMAT: Defaults to 'json' 99 | } 100 | 101 | 102 | LOGGING = { 103 | "version": 1, 104 | "disable_existing_loggers": False, 105 | "formatters": { 106 | "verbose": { 107 | "format": "%(asctime)s django %(name)s: %(levelname)s %(process)d %(thread)d %(message)s", 108 | "datefmt": "%Y-%m-%dT%H:%M:%S", 109 | }, 110 | }, 111 | "handlers": {"console": {"class": "logging.StreamHandler", "formatter": "verbose"}}, 112 | "loggers": { 113 | "logpipe": { 114 | "level": "CRITICAL", 115 | } 116 | }, 117 | "root": { 118 | "handlers": ["console"], 119 | "level": "CRITICAL", 120 | }, 121 | } 122 | -------------------------------------------------------------------------------- /sandbox/urls.py: -------------------------------------------------------------------------------- 1 | from django.contrib import admin 2 | from django.urls import path 3 | 4 | urlpatterns = (path("admin/", admin.site.urls),) 5 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | ignore = W503 3 | extend-ignore = E203 4 | max-line-length = 160 5 | exclude = migrations 6 | -------------------------------------------------------------------------------- /src/logpipe/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from django.core.exceptions import ImproperlyConfigured 4 | 5 | from . import format, settings 6 | from .constants import FORMAT_JSON, FORMAT_MSGPACK, FORMAT_PICKLE 7 | from .consumer import Consumer, MultiConsumer 8 | from .formats.json import JSONParser, JSONRenderer 9 | from .formats.msgpack import MsgPackParser, MsgPackRenderer 10 | from .formats.pickle import PickleParser, PickleRenderer 11 | from .producer import DRFProducer, Producer, PydanticProducer 12 | from .registry import register_consumer 13 | 14 | logger = logging.getLogger(__name__) 15 | 16 | 17 | _default_format = settings.get("DEFAULT_FORMAT", FORMAT_JSON) 18 | _allow_incoming_pickle = settings.get("ALLOW_INCOMING_PICKLE", False) 19 | if _default_format == FORMAT_PICKLE: 20 | if not _allow_incoming_pickle: 21 | raise ImproperlyConfigured( 22 | "Can not set DEFAULT_FORMAT to Pickle unless the ALLOW_INCOMING_PICKLE is enabled." 23 | ) 24 | logger.warning( 25 | "DEFAULT_FORMAT is set to Pickle. This is insecure and probable isn't a good idea." 26 | ) 27 | 28 | format.register(FORMAT_JSON, JSONRenderer(), JSONParser()) 29 | format.register(FORMAT_MSGPACK, MsgPackRenderer(), MsgPackParser()) 30 | 31 | if _allow_incoming_pickle: 32 | format.register(FORMAT_PICKLE, PickleRenderer(), PickleParser()) 33 | 34 | 35 | __all__ = [ 36 | "FORMAT_JSON", 37 | "FORMAT_MSGPACK", 38 | "FORMAT_PICKLE", 39 | "DRFProducer", 40 | "PydanticProducer", 41 | "Producer", 42 | "Consumer", 43 | "MultiConsumer", 44 | "register_consumer", 45 | ] 46 | -------------------------------------------------------------------------------- /src/logpipe/abc.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from abc import abstractmethod 4 | from collections.abc import Iterable, Mapping 5 | from enum import Enum, auto 6 | from typing import ( 7 | IO, 8 | Any, 9 | ClassVar, 10 | Literal, 11 | NamedTuple, 12 | Protocol, 13 | TypeGuard, 14 | TypeVar, 15 | ) 16 | 17 | from django.db import models 18 | from pydantic import BaseModel 19 | from rest_framework import serializers 20 | 21 | MessageType = str 22 | MessageVersion = int 23 | TopicName = str 24 | 25 | _IN = TypeVar("_IN", bound=models.Model) # Instance Type 26 | 27 | 28 | class Record(NamedTuple): 29 | topic: str 30 | partition: str 31 | offset: str | int 32 | timestamp: int | float 33 | key: str 34 | value: str | bytes 35 | 36 | 37 | class RecordMetadata(NamedTuple): 38 | topic: str 39 | partition: str 40 | offset: str 41 | 42 | 43 | class ConsumerBackend(Iterable[Record]): 44 | topic_name: TopicName 45 | 46 | def __init__(self, topic_name: TopicName, **kwargs: Any): 47 | pass 48 | 49 | def seek_to_sequence_number( 50 | self, shard: str, sequence_number: str | None = None 51 | ) -> None: 52 | raise NotImplementedError() 53 | 54 | @abstractmethod 55 | def __iter__(self) -> ConsumerBackend: 56 | pass 57 | 58 | @abstractmethod 59 | def __next__(self) -> Record: 60 | pass 61 | 62 | 63 | class ProducerBackend(Protocol): 64 | def send( 65 | self, topic_name: TopicName, key: str, value: bytes 66 | ) -> RecordMetadata | None: 67 | pass 68 | 69 | 70 | class OffsetStoreBackend(Protocol): 71 | def commit(self, consumer: ConsumerBackend, message: Record) -> None: 72 | pass 73 | 74 | def seek(self, consumer: ConsumerBackend, topic: TopicName, partition: str) -> None: 75 | pass 76 | 77 | 78 | class Renderer(Protocol): 79 | media_type: str 80 | format: str 81 | charset: str | None 82 | render_style: str 83 | 84 | def render( 85 | self, 86 | data: dict[str, Any], 87 | media_type: str | None = None, 88 | renderer_context: Mapping[str, Any] | None = None, 89 | ) -> bytes: 90 | pass 91 | 92 | 93 | class Parser(Protocol): 94 | media_type: str 95 | 96 | def parse( 97 | self, 98 | stream: IO[Any], 99 | media_type: str | None = None, 100 | parser_context: Mapping[str, Any] | None = None, 101 | ) -> dict[str, Any]: 102 | pass 103 | 104 | 105 | class SerializerType(Enum): 106 | DRF = auto() 107 | PYDANTIC = auto() 108 | 109 | 110 | class DRFSerializer(serializers.Serializer[_IN]): 111 | _tag: ClassVar[Literal[SerializerType.DRF]] = SerializerType.DRF 112 | MESSAGE_TYPE: ClassVar[str] 113 | VERSION: ClassVar[int] 114 | KEY_FIELD: ClassVar[str] 115 | 116 | @classmethod 117 | def lookup_instance(cls, **kwargs: Any) -> _IN | None: 118 | raise NotImplementedError() 119 | 120 | 121 | class PydanticModel(BaseModel): 122 | _tag: ClassVar[Literal[SerializerType.PYDANTIC]] = SerializerType.PYDANTIC 123 | MESSAGE_TYPE: ClassVar[str] 124 | VERSION: ClassVar[int] 125 | KEY_FIELD: ClassVar[str] 126 | 127 | def save(self) -> Any: 128 | raise NotImplementedError() 129 | 130 | 131 | SerializerClass = type[DRFSerializer[Any]] | type[PydanticModel] 132 | Serializer = DRFSerializer[Any] | PydanticModel 133 | 134 | 135 | def is_pydantic_serializer_class( 136 | cls: SerializerClass, 137 | ) -> TypeGuard[type[PydanticModel]]: 138 | return hasattr(cls, "_tag") and cls._tag == SerializerType.PYDANTIC 139 | -------------------------------------------------------------------------------- /src/logpipe/admin.py: -------------------------------------------------------------------------------- 1 | from django.contrib import admin 2 | 3 | from . import models 4 | 5 | 6 | @admin.register(models.KafkaOffset) 7 | class KafkaOffsetAdmin(admin.ModelAdmin): # type: ignore[type-arg] 8 | fields = ["topic", "partition", "offset"] 9 | list_display = ["topic", "partition", "offset"] 10 | list_filter = ["topic", "partition"] 11 | readonly_fields = ["topic", "partition"] 12 | 13 | 14 | @admin.register(models.KinesisOffset) 15 | class KinesisOffsetAdmin(admin.ModelAdmin): # type: ignore[type-arg] 16 | fields = ["region", "stream", "shard", "sequence_number"] 17 | list_display = ["stream", "region", "shard", "sequence_number"] 18 | list_filter = ["stream", "region", "shard"] 19 | readonly_fields = ["region", "stream", "shard"] 20 | -------------------------------------------------------------------------------- /src/logpipe/apps.py: -------------------------------------------------------------------------------- 1 | from django.apps import AppConfig 2 | from django.utils.translation import gettext_lazy as _ 3 | 4 | 5 | class LogpipeConfig(AppConfig): 6 | name = "logpipe" 7 | label = "logpipe" 8 | # Translators: Backend Library Name 9 | verbose_name = _("LogPipe") 10 | -------------------------------------------------------------------------------- /src/logpipe/backend/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import Any 4 | 5 | from django.utils.module_loading import import_string 6 | 7 | from .. import settings 8 | from ..abc import ConsumerBackend, OffsetStoreBackend, ProducerBackend 9 | 10 | 11 | def get_offset_backend() -> OffsetStoreBackend: 12 | default = "logpipe.backend.kafka.ModelOffsetStore" 13 | backend_path = settings.get("OFFSET_BACKEND", default) 14 | return import_string(backend_path)() 15 | 16 | 17 | def get_consumer_backend(topic_name: str, **kwargs: Any) -> ConsumerBackend: 18 | default = "logpipe.backend.kafka.Consumer" 19 | backend_path = settings.get("CONSUMER_BACKEND", default) 20 | return import_string(backend_path)(topic_name, **kwargs) 21 | 22 | 23 | def get_producer_backend() -> ProducerBackend: 24 | default = "logpipe.backend.kafka.Producer" 25 | backend_path = settings.get("PRODUCER_BACKEND", default) 26 | return import_string(backend_path)() 27 | -------------------------------------------------------------------------------- /src/logpipe/backend/dummy.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import Any 4 | import collections 5 | import time 6 | 7 | from ..abc import ( 8 | ConsumerBackend, 9 | OffsetStoreBackend, 10 | ProducerBackend, 11 | Record, 12 | RecordMetadata, 13 | TopicName, 14 | ) 15 | 16 | _topics: dict[TopicName, collections.deque[Record]] = {} 17 | _offsets: collections.Counter[str] = collections.Counter() 18 | 19 | 20 | def reset_topics() -> None: 21 | global _topics, _offsets 22 | _topics = {} 23 | _offsets = collections.Counter() 24 | return None 25 | 26 | 27 | class Consumer(ConsumerBackend): 28 | def __init__(self, topic_name: str, **kwargs: Any): 29 | self.topic_name = topic_name 30 | 31 | def seek_to_sequence_number( 32 | self, shard: str, sequence_number: str | None = None 33 | ) -> None: 34 | pass 35 | 36 | def __iter__(self) -> Consumer: 37 | return self 38 | 39 | def __next__(self) -> Record: 40 | _records = _topics.get(self.topic_name) 41 | if _records: 42 | try: 43 | return _records.popleft() 44 | except IndexError: 45 | pass 46 | raise StopIteration() 47 | 48 | 49 | class Producer(ProducerBackend): 50 | def send( 51 | self, topic_name: TopicName, key: str, value: bytes 52 | ) -> RecordMetadata | None: 53 | _offsets[topic_name] += 1 54 | record = Record( 55 | topic=topic_name, 56 | partition="0", 57 | offset=_offsets[topic_name], 58 | timestamp=(time.time() * 1000), 59 | key=key, 60 | value=value, 61 | ) 62 | if topic_name not in _topics: 63 | _topics[topic_name] = collections.deque() 64 | _topics[topic_name].append(record) 65 | return RecordMetadata( 66 | topic=topic_name, 67 | partition=record.partition, 68 | offset=str(record.offset), 69 | ) 70 | 71 | 72 | class ModelOffsetStore(OffsetStoreBackend): 73 | def commit(self, consumer: ConsumerBackend, message: Record) -> None: 74 | pass 75 | 76 | def seek(self, consumer: ConsumerBackend, topic: TopicName, partition: str) -> None: 77 | pass 78 | -------------------------------------------------------------------------------- /src/logpipe/backend/kafka.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import Any, NotRequired, TypedDict 4 | import logging 5 | 6 | from django.apps import apps 7 | import kafka 8 | 9 | from .. import settings 10 | from ..abc import ( 11 | ConsumerBackend, 12 | OffsetStoreBackend, 13 | ProducerBackend, 14 | Record, 15 | RecordMetadata, 16 | ) 17 | from ..exceptions import MissingTopicError 18 | from . import get_offset_backend 19 | 20 | logger = logging.getLogger(__name__) 21 | 22 | 23 | class KafkaClientConfig(TypedDict): 24 | bootstrap_servers: list[str] 25 | retries: NotRequired[int] 26 | auto_offset_reset: NotRequired[str] 27 | enable_auto_commit: NotRequired[bool] 28 | consumer_timeout_ms: NotRequired[int] 29 | 30 | 31 | class ModelOffsetStore(OffsetStoreBackend): 32 | def commit(self, consumer: ConsumerBackend, message: Record) -> None: 33 | if not isinstance(consumer, Consumer): 34 | raise TypeError("Consumer type mismatch") 35 | KafkaOffset = apps.get_model(app_label="logpipe", model_name="KafkaOffset") 36 | logger.debug( 37 | 'Commit offset "%s" for topic "%s", partition "%s" to %s' 38 | % ( 39 | message.offset, 40 | message.topic, 41 | message.partition, 42 | self.__class__.__name__, 43 | ) 44 | ) 45 | obj, created = KafkaOffset.objects.get_or_create( 46 | topic=message.topic, partition=message.partition 47 | ) 48 | obj.offset = int(message.offset) + 1 49 | obj.save() 50 | 51 | def seek(self, consumer: ConsumerBackend, topic: str, partition: str) -> None: 52 | if not isinstance(consumer, Consumer): 53 | raise TypeError("Consumer type mismatch") 54 | KafkaOffset = apps.get_model(app_label="logpipe", model_name="KafkaOffset") 55 | tp = kafka.TopicPartition(topic=topic, partition=partition) 56 | try: 57 | obj = KafkaOffset.objects.get(topic=topic, partition=partition) 58 | logger.debug( 59 | 'Seeking to offset "%s" on topic "%s", partition "%s"' 60 | % (obj.offset, topic, partition) 61 | ) 62 | consumer.client.seek(tp, obj.offset) 63 | except KafkaOffset.DoesNotExist: 64 | logger.debug( 65 | 'Seeking to beginning of topic "%s", partition "%s"' 66 | % (topic, partition) 67 | ) 68 | consumer.client.seek_to_beginning(tp) 69 | 70 | 71 | class KafkaOffsetStore(OffsetStoreBackend): 72 | def commit(self, consumer: ConsumerBackend, message: Record) -> None: 73 | if not isinstance(consumer, Consumer): 74 | raise TypeError("Consumer type mismatch") 75 | logger.debug( 76 | 'Commit offset "%s" for topic "%s", partition "%s" to %s' 77 | % ( 78 | message.offset, 79 | message.topic, 80 | message.partition, 81 | self.__class__.__name__, 82 | ) 83 | ) 84 | consumer.client.commit() 85 | 86 | def seek(self, consumer: ConsumerBackend, topic: str, partition: str) -> None: 87 | pass 88 | 89 | 90 | class Consumer(ConsumerBackend): 91 | _client = None 92 | 93 | def __init__(self, topic_name: str, **kwargs: Any): 94 | self.topic_name = topic_name 95 | self.client_kwargs = kwargs 96 | 97 | @property 98 | def client(self) -> kafka.KafkaConsumer: 99 | if not self._client: 100 | kwargs = self._get_client_config() 101 | self._client = kafka.KafkaConsumer(**kwargs) 102 | tps = self._get_topic_partitions() 103 | self._client.assign(tps) 104 | backend = get_offset_backend() 105 | for tp in tps: 106 | backend.seek(self, tp.topic, tp.partition) 107 | self._client.committed(tp) 108 | return self._client 109 | 110 | def __iter__(self) -> Consumer: 111 | return self 112 | 113 | def __next__(self) -> Record: 114 | r = next(self.client) 115 | record = Record( 116 | topic=r.topic, 117 | partition=r.partition, 118 | offset=r.offset, 119 | timestamp=r.timestamp, 120 | key=r.key, 121 | value=r.value, 122 | ) 123 | return record 124 | 125 | def _get_topic_partitions(self) -> list[kafka.TopicPartition]: 126 | p = [] 127 | partitions = self.client.partitions_for_topic(self.topic_name) 128 | if not partitions: 129 | raise MissingTopicError( 130 | "Could not find topic %s. Does it exist?" % self.topic_name 131 | ) 132 | for partition in partitions: 133 | tp = kafka.TopicPartition(self.topic_name, partition=partition) 134 | p.append(tp) 135 | return p 136 | 137 | def _get_client_config(self) -> KafkaClientConfig: 138 | kwargs = KafkaClientConfig( 139 | bootstrap_servers=settings.get("KAFKA_BOOTSTRAP_SERVERS"), 140 | auto_offset_reset="earliest", 141 | enable_auto_commit=False, 142 | consumer_timeout_ms=1000, 143 | ) 144 | kwargs.update(settings.get("KAFKA_KWARGS", {})) 145 | kwargs.update(settings.get("KAFKA_CONSUMER_KWARGS", {})) 146 | kwargs.update(self.client_kwargs) # type: ignore[typeddict-item] 147 | return kwargs 148 | 149 | 150 | class Producer(ProducerBackend): 151 | _client = None 152 | 153 | @property 154 | def client(self) -> kafka.KafkaProducer: 155 | if not self._client: 156 | kwargs = self._get_client_config() 157 | self._client = kafka.KafkaProducer(**kwargs) 158 | return self._client 159 | 160 | def send(self, topic_name: str, key: str, value: bytes) -> RecordMetadata: 161 | keybytes = key.encode() 162 | timeout = settings.get("KAFKA_SEND_TIMEOUT", 10) 163 | future = self.client.send(topic_name, key=keybytes, value=value) 164 | metadata = future.get(timeout=timeout) 165 | return RecordMetadata( 166 | topic=topic_name, 167 | partition=metadata.partition, 168 | offset=metadata.offset, 169 | ) 170 | 171 | def _get_client_config(self) -> KafkaClientConfig: 172 | servers = settings.get("KAFKA_BOOTSTRAP_SERVERS") 173 | retries = settings.get("KAFKA_MAX_SEND_RETRIES", 0) 174 | kwargs = KafkaClientConfig( 175 | bootstrap_servers=servers, 176 | retries=retries, 177 | ) 178 | kwargs.update(settings.get("KAFKA_KWARGS", {})) 179 | return kwargs 180 | -------------------------------------------------------------------------------- /src/logpipe/backend/kinesis.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING, Any, NotRequired, TypedDict 4 | import collections 5 | import logging 6 | import time 7 | 8 | from botocore.exceptions import ClientError 9 | from django.apps import apps 10 | from lru import LRU 11 | import boto3 12 | 13 | from .. import settings 14 | from ..abc import ( 15 | ConsumerBackend, 16 | OffsetStoreBackend, 17 | ProducerBackend, 18 | Record, 19 | RecordMetadata, 20 | ) 21 | from . import get_offset_backend 22 | 23 | if TYPE_CHECKING: 24 | from mypy_boto3_kinesis import KinesisClient 25 | from mypy_boto3_kinesis.type_defs import ( 26 | GetRecordsOutputTypeDef, 27 | PutRecordOutputTypeDef, 28 | ) 29 | 30 | logger = logging.getLogger(__name__) 31 | 32 | ShardID = str 33 | ShardIterator = str 34 | 35 | 36 | class KinesisClientConfig(TypedDict): 37 | region_name: str 38 | 39 | 40 | class PutRecordKwargs(TypedDict): 41 | StreamName: str 42 | Data: bytes 43 | PartitionKey: str 44 | SequenceNumberForOrdering: NotRequired[str] 45 | 46 | 47 | class KinesisBase: 48 | _client: KinesisClient | None = None 49 | 50 | @property 51 | def client(self) -> KinesisClient: 52 | if not self._client: 53 | kwargs = self._get_client_config() 54 | self._client = boto3.client("kinesis", **kwargs) 55 | return self._client 56 | 57 | def _get_client_config(self) -> KinesisClientConfig: 58 | return KinesisClientConfig( 59 | region_name=settings.get_aws_region(), 60 | ) 61 | 62 | 63 | class ModelOffsetStore(OffsetStoreBackend): 64 | def commit(self, consumer: ConsumerBackend, message: Record) -> None: 65 | KinesisOffset = apps.get_model(app_label="logpipe", model_name="KinesisOffset") 66 | region = settings.get_aws_region() 67 | logger.debug( 68 | 'Commit offset "%s" for region "%s", stream "%s", shard "%s" to %s' 69 | % ( 70 | message.offset, 71 | region, 72 | message.topic, 73 | message.partition, 74 | self.__class__.__name__, 75 | ) 76 | ) 77 | obj, created = KinesisOffset.objects.get_or_create( 78 | region=region, stream=message.topic, shard=message.partition 79 | ) 80 | obj.sequence_number = message.offset 81 | obj.save() 82 | 83 | def seek(self, consumer: ConsumerBackend, stream: str, shard: str) -> None: 84 | KinesisOffset = apps.get_model(app_label="logpipe", model_name="KinesisOffset") 85 | region = settings.get_aws_region() 86 | try: 87 | obj = KinesisOffset.objects.get( 88 | region=settings.get_aws_region(), stream=stream, shard=shard 89 | ) 90 | logger.debug( 91 | 'Seeking to offset "%s" on region "%s", stream "%s", partition "%s"' 92 | % (obj.sequence_number, region, stream, shard) 93 | ) 94 | consumer.seek_to_sequence_number(shard, obj.sequence_number) 95 | except KinesisOffset.DoesNotExist: 96 | logger.debug( 97 | 'Seeking to beginning of region "%s", stream "%s", partition "%s"' 98 | % (region, stream, shard) 99 | ) 100 | consumer.seek_to_sequence_number(shard, None) 101 | 102 | 103 | class Consumer(KinesisBase, ConsumerBackend): 104 | def __init__(self, topic_name: str, **kwargs: Any): 105 | self.topic_name = topic_name 106 | self.client_kwargs = kwargs 107 | 108 | self.shards: collections.deque[ShardID] = collections.deque() 109 | self.records: collections.deque[Record] = collections.deque() 110 | self.shard_iters: dict[ShardID, ShardIterator] = {} 111 | 112 | shards = self._list_shard_ids() 113 | logger.debug("Found %d kinesis shards.", len(shards)) 114 | backend = get_offset_backend() 115 | for shard in shards: 116 | self.shards.append(shard) 117 | backend.seek(self, self.topic_name, shard) 118 | 119 | def seek_to_sequence_number( 120 | self, shard: str, sequence_number: str | None = None 121 | ) -> None: 122 | if sequence_number is None: 123 | resp = self.client.get_shard_iterator( 124 | StreamName=self.topic_name, 125 | ShardId=shard, 126 | ShardIteratorType=settings.get( 127 | "KINESIS_SHARD_ITERATOR_TYPE", default="TRIM_HORIZON" 128 | ), 129 | ) 130 | else: 131 | resp = self.client.get_shard_iterator( 132 | StreamName=self.topic_name, 133 | ShardId=shard, 134 | ShardIteratorType="AFTER_SEQUENCE_NUMBER", 135 | StartingSequenceNumber=sequence_number, 136 | ) 137 | self.shard_iters[shard] = resp["ShardIterator"] 138 | 139 | def __iter__(self) -> Consumer: 140 | return self 141 | 142 | def __next__(self) -> Record: 143 | # Try and load records. Keep trying until either (1) we have some records or (2) current_lag drops to 0 144 | while len(self.records) <= 0: 145 | # Load a page from each shard and sum the shard lags 146 | current_lag = 0 147 | for i in range(len(self.shards)): 148 | current_lag += self._load_next_page() 149 | 150 | # If all shards report 0 lag, then give up trying to load records 151 | if current_lag <= 0: 152 | break 153 | 154 | # If we've tried all the shards and still don't have any records, stop iteration 155 | if len(self.records) == 0: 156 | raise StopIteration() 157 | 158 | # Return the left most record in the queue 159 | return self.records.popleft() 160 | 161 | def _load_next_page(self) -> int: 162 | # Load a page from the left-most shard in the queue 163 | try: 164 | shard = self.shards.popleft() 165 | except IndexError: 166 | return 0 167 | 168 | # Get the next shard iterator for the shard 169 | shard_iter = self.shard_iters.pop(shard, None) 170 | if not shard_iter: 171 | return 0 172 | 173 | # Fetch the records from Kinesis 174 | logger.debug("Loading page of records from %s.%s", self.topic_name, shard) 175 | fetch_limit = settings.get("KINESIS_FETCH_LIMIT", 25) 176 | response = self._get_records(shard_iter, fetch_limit) 177 | if response is None: 178 | return 0 179 | 180 | # This default value is mostly just for testing with Moto. Real Kinesis should always return a value for MillisBehindLatest. 181 | num_records = len(response["Records"]) 182 | if "MillisBehindLatest" in response: 183 | current_stream_lag = response["MillisBehindLatest"] 184 | else: 185 | current_stream_lag = 0 if num_records == 0 else 1 186 | logger.debug( 187 | "Loaded {} records from {}.{}. Currently {}ms behind stream head.".format( 188 | num_records, self.topic_name, shard, current_stream_lag 189 | ) 190 | ) 191 | 192 | # Add the records page into the queue 193 | timestamp = (time.time() * 1000) - current_stream_lag 194 | for r in response["Records"]: 195 | record = Record( 196 | topic=self.topic_name, 197 | partition=shard, 198 | offset=r["SequenceNumber"], 199 | timestamp=timestamp, 200 | key=r["PartitionKey"], 201 | value=r["Data"], 202 | ) 203 | self.records.append(record) 204 | 205 | # Add the shard back to the right of the queue and save the shard iterator for next time we need 206 | # to get records from this shard. If NextShardIterator is None, the shard has been closed and 207 | # we should remove it from the pool. 208 | if response.get("NextShardIterator", None): 209 | self.shard_iters[shard] = response["NextShardIterator"] 210 | self.shards.append(shard) 211 | else: 212 | logger.info( 213 | "Shard {}.{} has been closed. Removing it from the fetch pool.".format( 214 | self.topic_name, shard 215 | ) 216 | ) 217 | 218 | return current_stream_lag 219 | 220 | def _get_records( 221 | self, 222 | shard_iter: ShardIterator, 223 | fetch_limit: int, 224 | retries: int = 1, 225 | ) -> GetRecordsOutputTypeDef | None: 226 | i = 0 227 | while i <= retries: 228 | try: 229 | response = self.client.get_records( 230 | ShardIterator=shard_iter, Limit=fetch_limit 231 | ) 232 | return response 233 | except ClientError as e: 234 | if ( 235 | e.response["Error"]["Code"] 236 | == "ProvisionedThroughputExceededException" 237 | ): 238 | logger.warning( 239 | "Caught ProvisionedThroughputExceededException. Sleeping for 5 seconds." 240 | ) 241 | time.sleep(5) 242 | else: 243 | logger.warning( 244 | "Received {} from AWS API: {}".format( 245 | e.response["Error"]["Code"], e.response["Error"]["Message"] 246 | ) 247 | ) 248 | i += 1 249 | logger.warning( 250 | f"After {i} attempts, couldn't get records from Kinesis. Giving up." 251 | ) 252 | return None 253 | 254 | def _list_shard_ids(self) -> list[ShardID]: 255 | resp = self.client.describe_stream(StreamName=self.topic_name) 256 | return [shard["ShardId"] for shard in resp["StreamDescription"]["Shards"]] 257 | 258 | 259 | class Producer(KinesisBase, ProducerBackend): 260 | _last_sequence_numbers: LRU[str, dict[str, str]] = LRU( 261 | settings.get("KINESIS_SEQ_NUM_CACHE_SIZE", 1000) 262 | ) 263 | 264 | def send(self, topic_name: str, key: str, value: bytes) -> RecordMetadata | None: 265 | kwargs = PutRecordKwargs( 266 | StreamName=topic_name, 267 | Data=value, 268 | PartitionKey=key, 269 | ) 270 | 271 | if topic_name not in self._last_sequence_numbers: 272 | self._last_sequence_numbers[topic_name] = {} 273 | last_seq_num = self._last_sequence_numbers[topic_name].get(key) 274 | if last_seq_num: 275 | kwargs["SequenceNumberForOrdering"] = last_seq_num 276 | 277 | metadata = self._send_and_retry(kwargs) 278 | if metadata is None: 279 | return None 280 | 281 | shard_id = metadata["ShardId"] 282 | seq_num = str(metadata["SequenceNumber"]) 283 | self._last_sequence_numbers[topic_name][key] = seq_num 284 | 285 | return RecordMetadata(topic=topic_name, partition=shard_id, offset=seq_num) 286 | 287 | def _send_and_retry( 288 | self, data: PutRecordKwargs, retries: int = 1 289 | ) -> PutRecordOutputTypeDef | None: 290 | i = 0 291 | while i <= retries: 292 | try: 293 | metadata = self.client.put_record(**data) 294 | return metadata 295 | except ClientError as e: 296 | if ( 297 | e.response["Error"]["Code"] 298 | == "ProvisionedThroughputExceededException" 299 | ): 300 | logger.warning( 301 | "Caught ProvisionedThroughputExceededException. Sleeping for 5 seconds." 302 | ) 303 | time.sleep(5) 304 | else: 305 | logger.warning( 306 | "Received %s from AWS API: %s", 307 | e.response["Error"]["Code"], 308 | e.response["Error"]["Message"], 309 | ) 310 | i += 1 311 | logger.warning( 312 | f"After {i} attempts, couldn't send message to Kinesis. Giving up." 313 | ) 314 | return None 315 | -------------------------------------------------------------------------------- /src/logpipe/constants.py: -------------------------------------------------------------------------------- 1 | FORMAT_JSON = "json" 2 | FORMAT_MSGPACK = "msgpack" 3 | FORMAT_PICKLE = "pickle" 4 | -------------------------------------------------------------------------------- /src/logpipe/consumer.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Generator, Iterator 2 | from typing import Any, TypeVar, cast 3 | import itertools 4 | import logging 5 | import time 6 | 7 | from django.db import models, transaction 8 | from rest_framework import serializers 9 | import pydantic_core 10 | 11 | from . import settings 12 | from .abc import ( 13 | ConsumerBackend, 14 | DRFSerializer, 15 | MessageType, 16 | MessageVersion, 17 | PydanticModel, 18 | Record, 19 | Serializer, 20 | SerializerClass, 21 | is_pydantic_serializer_class, 22 | ) 23 | from .backend import get_consumer_backend, get_offset_backend 24 | from .exceptions import ( 25 | IgnoredMessageTypeError, 26 | InvalidMessageError, 27 | UnknownMessageTypeError, 28 | UnknownMessageVersionError, 29 | ValidationError, 30 | ) 31 | from .format import parse 32 | 33 | logger = logging.getLogger(__name__) 34 | 35 | _Ser = TypeVar("_Ser", bound=Serializer) 36 | _DRFSer = TypeVar("_DRFSer", bound=DRFSerializer[Any]) 37 | 38 | 39 | class Consumer(Iterator[tuple[Record, Serializer]]): 40 | consumer: ConsumerBackend 41 | throw_errors: bool 42 | serializer_classes: dict[MessageType, dict[MessageVersion, SerializerClass]] 43 | ignored_message_types: set[MessageType] 44 | 45 | def __init__(self, topic_name: str, throw_errors: bool = False, **kwargs: Any): 46 | self.consumer = get_consumer_backend(topic_name, **kwargs) 47 | self.throw_errors = throw_errors 48 | self.serializer_classes = {} 49 | self.ignored_message_types = set() 50 | 51 | def __iter__(self) -> Iterator[tuple[Record, Serializer]]: 52 | if self.throw_errors: 53 | return self 54 | return self._error_handler() 55 | 56 | def __next__(self) -> tuple[Record, Serializer]: 57 | return self._get_next_message() 58 | 59 | def add_ignored_message_type(self, message_type: MessageType) -> None: 60 | self.ignored_message_types.add(message_type) 61 | 62 | def commit(self, message: Record) -> None: 63 | get_offset_backend().commit(self.consumer, message) 64 | 65 | def register(self, serializer_class: SerializerClass) -> None: 66 | message_type = serializer_class.MESSAGE_TYPE 67 | version = serializer_class.VERSION 68 | if message_type not in self.serializer_classes: 69 | self.serializer_classes[message_type] = {} 70 | self.serializer_classes[message_type][version] = serializer_class 71 | 72 | def run(self, iter_limit: int = 0) -> None: 73 | i = 0 74 | for message, serializer in self: 75 | with transaction.atomic(): 76 | try: 77 | serializer.save() 78 | self.commit(message) 79 | except Exception as e: 80 | info = ( 81 | message.key, 82 | message.topic, 83 | message.partition, 84 | message.offset, 85 | ) 86 | logger.exception( 87 | 'Failed to process message with key "%s" from topic "%s", partition "%s", offset "%s"' 88 | % info 89 | ) 90 | raise e 91 | i += 1 92 | if iter_limit > 0 and i >= iter_limit: 93 | break 94 | 95 | def _error_handler(self) -> Generator[tuple[Record, Serializer], None, None]: 96 | while True: 97 | # Try to get the next message 98 | try: 99 | yield next(self) 100 | 101 | # Obey the laws of StopIteration 102 | except StopIteration: 103 | return 104 | 105 | # Message format was invalid in some way: log error and move on. 106 | except InvalidMessageError as e: 107 | logger.error( 108 | "Failed to deserialize message in topic {}. Details: {}".format( 109 | self.consumer.topic_name, e 110 | ) 111 | ) 112 | self.commit(e.message) 113 | 114 | # Message type has been explicitly ignored: skip it silently and move on. 115 | except IgnoredMessageTypeError as e: 116 | logger.debug( 117 | "Skipping ignored message type in topic {}. Details: {}".format( 118 | self.consumer.topic_name, e 119 | ) 120 | ) 121 | self.commit(e.message) 122 | 123 | # Message type is unknown: log error and move on. 124 | except UnknownMessageTypeError as e: 125 | logger.error( 126 | "Skipping unknown message type in topic {}. Details: {}".format( 127 | self.consumer.topic_name, e 128 | ) 129 | ) 130 | self.commit(e.message) 131 | 132 | # Message version is unknown: log error and move on. 133 | except UnknownMessageVersionError as e: 134 | logger.error( 135 | "Skipping unknown message version in topic {}. Details: {}".format( 136 | self.consumer.topic_name, e 137 | ) 138 | ) 139 | self.commit(e.message) 140 | 141 | # Serializer for message type flagged message as invalid: log warning and move on. 142 | except ValidationError as e: 143 | logger.warning( 144 | "Skipping invalid message in topic {}. Details: {}".format( 145 | self.consumer.topic_name, e 146 | ) 147 | ) 148 | self.commit(e.message) 149 | 150 | pass 151 | 152 | def _get_next_message(self) -> tuple[Record, Serializer]: 153 | message = next(self.consumer) 154 | 155 | info = (message.key, message.topic, message.partition, message.offset) 156 | logger.debug( 157 | 'Received message with key "%s" from topic "%s", partition "%s", offset "%s"' 158 | % info 159 | ) 160 | 161 | # Wait? 162 | timestamp = getattr(message, "timestamp", None) or (time.time() * 1000) 163 | lag_ms = (time.time() * 1000) - timestamp 164 | logger.debug("Message lag is %sms" % lag_ms) 165 | wait_ms = settings.get("MIN_MESSAGE_LAG_MS", 0) - lag_ms 166 | if wait_ms > 0: 167 | logger.debug("Respecting MIN_MESSAGE_LAG_MS by waiting %sms" % wait_ms) 168 | time.sleep(wait_ms / 1000) 169 | logger.debug("Finished waiting") 170 | 171 | try: 172 | serializer = self._unserialize(message) 173 | except Exception as e: 174 | raise e 175 | 176 | return message, serializer 177 | 178 | def _unserialize(self, message: Record) -> Serializer: 179 | data = parse(message.value) 180 | if "type" not in data: 181 | raise InvalidMessageError( 182 | 'Received message missing missing a top-level "type" key.', message 183 | ) 184 | if "version" not in data: 185 | raise InvalidMessageError( 186 | 'Received message missing missing a top-level "version" key.', message 187 | ) 188 | if "message" not in data: 189 | raise InvalidMessageError( 190 | 'Received message missing missing a top-level "message" key.', message 191 | ) 192 | 193 | message_type = data["type"] 194 | if message_type in self.ignored_message_types: 195 | raise IgnoredMessageTypeError( 196 | 'Received message with ignored type "%s" in topic %s' 197 | % (message_type, message.topic), 198 | message, 199 | ) 200 | if message_type not in self.serializer_classes: 201 | raise UnknownMessageTypeError( 202 | 'Received message with unknown type "%s" in topic %s' 203 | % (message_type, message.topic), 204 | message, 205 | ) 206 | 207 | version = data["version"] 208 | if version not in self.serializer_classes[message_type]: 209 | raise UnknownMessageVersionError( 210 | 'Received message of type "%s" with unknown version "%s" in topic %s' 211 | % (message_type, version, message.topic), 212 | message, 213 | ) 214 | 215 | serializer_class = self.serializer_classes[message_type][version] 216 | 217 | instance = None 218 | if hasattr(serializer_class, "lookup_instance"): 219 | instance = serializer_class.lookup_instance(**data["message"]) 220 | serializer = self._construct_serializer_instance( 221 | serializer_class=serializer_class, 222 | message=message, 223 | instance=instance, 224 | data=data["message"], 225 | ) 226 | return serializer 227 | 228 | def _construct_serializer_instance( 229 | self, 230 | serializer_class: SerializerClass, 231 | message: Record, 232 | instance: models.Model | None, 233 | data: Any, 234 | ) -> Serializer: 235 | if is_pydantic_serializer_class(serializer_class): 236 | return self._construct_pydantic_serializer_instance( 237 | serializer_class=serializer_class, 238 | message=message, 239 | instance=instance, 240 | data=data, 241 | ) 242 | # TODO: this cast can go away once we can use TypeIs instead of 243 | # TypeGuard (added in Python 3.13). 244 | serializer_class = cast(type[DRFSerializer[Any]], serializer_class) 245 | return self._construct_drf_serializer_instance( 246 | serializer_class=serializer_class, 247 | message=message, 248 | instance=instance, 249 | data=data, 250 | ) 251 | 252 | def _construct_drf_serializer_instance( 253 | self, 254 | serializer_class: type[_DRFSer], 255 | message: Record, 256 | instance: models.Model | None, 257 | data: Any, 258 | ) -> _DRFSer: 259 | serializer = serializer_class(instance=instance, data=data) 260 | try: 261 | serializer.is_valid(raise_exception=True) 262 | except serializers.ValidationError as e: 263 | raise ValidationError(e, message) 264 | return serializer 265 | 266 | def _construct_pydantic_serializer_instance( 267 | self, 268 | serializer_class: type[PydanticModel], 269 | message: Record, 270 | instance: models.Model | None, 271 | data: Any, 272 | ) -> PydanticModel: 273 | try: 274 | serializer = serializer_class.model_validate(data) 275 | except pydantic_core.ValidationError as e: 276 | raise ValidationError(e, message) 277 | serializer._instance = instance # type: ignore[attr-defined] 278 | return serializer 279 | 280 | 281 | class MultiConsumer: 282 | consumers: list[Consumer] 283 | 284 | def __init__(self, *consumers: Consumer): 285 | self.consumers = list(consumers) 286 | 287 | def run(self, iter_limit: int = 0) -> None: 288 | i = 0 289 | for consumer in itertools.cycle(self.consumers): 290 | consumer.run(iter_limit=1) 291 | i += 1 292 | if iter_limit > 0 and i >= iter_limit: 293 | break 294 | -------------------------------------------------------------------------------- /src/logpipe/docgen_setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | docs_dir, _ = os.path.split(__file__) 5 | sys.path.append(os.path.dirname(docs_dir)) 6 | 7 | SECRET_KEY = os.environ.get("SECRET_KEY", "django-insecure") 8 | INSTALLED_APPS = [ 9 | "django.contrib.admin", 10 | "django.contrib.auth", 11 | "django.contrib.contenttypes", 12 | "django.contrib.sessions", 13 | "django.contrib.sites", 14 | "django.contrib.messages", 15 | "django.contrib.staticfiles", 16 | "django.contrib.flatpages", 17 | "logpipe", 18 | ] 19 | 20 | STATIC_URL = "docgen-static/" 21 | 22 | LOGPIPE = { 23 | "OFFSET_BACKEND": "logpipe.backend.dummy.ModelOffsetStore", 24 | "PRODUCER_BACKEND": "logpipe.backend.dummy.Producer", 25 | "CONSUMER_BACKEND": "logpipe.backend.dummy.Consumer", 26 | } 27 | 28 | setup = None 29 | from django.apps import apps # noqa 30 | from django.conf import settings # noqa 31 | import django # noqa 32 | 33 | if not apps.ready and not settings.configured: 34 | django.setup() 35 | -------------------------------------------------------------------------------- /src/logpipe/exceptions.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | from rest_framework import serializers 4 | 5 | from .abc import Record 6 | 7 | 8 | class LogPipeError(Exception): 9 | pass 10 | 11 | 12 | class LogPipeMessageError(LogPipeError): 13 | message: Record 14 | 15 | def __init__(self, descr: Any, message: Record): 16 | super().__init__(descr) 17 | self.message = message 18 | 19 | 20 | class UnknownFormatError(LogPipeError): 21 | pass 22 | 23 | 24 | class IgnoredMessageTypeError(LogPipeMessageError): 25 | pass 26 | 27 | 28 | class UnknownMessageTypeError(LogPipeMessageError): 29 | pass 30 | 31 | 32 | class UnknownMessageVersionError(LogPipeMessageError): 33 | pass 34 | 35 | 36 | class InvalidMessageError(LogPipeMessageError): 37 | pass 38 | 39 | 40 | class ValidationError(LogPipeMessageError, serializers.ValidationError): 41 | pass 42 | 43 | 44 | class MissingTopicError(LogPipeError): 45 | pass 46 | -------------------------------------------------------------------------------- /src/logpipe/format.py: -------------------------------------------------------------------------------- 1 | from io import BytesIO 2 | from typing import Any, TypedDict 3 | 4 | from .abc import Parser, Renderer 5 | from .exceptions import UnknownFormatError 6 | 7 | 8 | class FormatRegistryEntry(TypedDict): 9 | renderer: Renderer 10 | parser: Parser 11 | 12 | 13 | FormatRegistry = dict[bytes, FormatRegistryEntry] 14 | 15 | _delim = b":" 16 | _formats: FormatRegistry = {} 17 | 18 | 19 | def _bytes(seq: str | bytes) -> bytes: 20 | return seq.encode() if hasattr(seq, "encode") else seq 21 | 22 | 23 | def register(codestr: str, renderer: Renderer, parser: Parser) -> None: 24 | code = _bytes(codestr) 25 | _formats[code] = { 26 | "renderer": renderer, 27 | "parser": parser, 28 | } 29 | 30 | 31 | def unregister(codestr: str) -> None: 32 | code = _bytes(codestr) 33 | try: 34 | del _formats[code] 35 | except KeyError: 36 | pass 37 | 38 | 39 | def render(codestr: str, data: dict[str, Any]) -> bytes: 40 | code = _bytes(codestr) 41 | if code not in _formats: 42 | raise UnknownFormatError(f"Could not find renderer for format {codestr}") 43 | body = _formats[code]["renderer"].render(data) 44 | return code + _delim + body 45 | 46 | 47 | def parse(_data: str | bytes) -> dict[str, Any]: 48 | data = _bytes(_data) 49 | code, body = data.split(_delim, 1) 50 | if code not in _formats: 51 | raise UnknownFormatError("Could not find parser for format %s" % code.decode()) 52 | return _formats[code]["parser"].parse(BytesIO(body)) 53 | 54 | 55 | __all__ = ["register", "unregister", "render", "parse"] 56 | -------------------------------------------------------------------------------- /src/logpipe/formats/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thelabnyc/django-logpipe/4f4f017e400d9ccbd2148d3e735e0269df692748/src/logpipe/formats/__init__.py -------------------------------------------------------------------------------- /src/logpipe/formats/json.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Mapping 2 | from typing import IO, Any 3 | 4 | from rest_framework.parsers import JSONParser as _JSONParser 5 | from rest_framework.renderers import JSONRenderer as _JSONRenderer 6 | 7 | from ..abc import Parser, Renderer 8 | 9 | 10 | class JSONRenderer(_JSONRenderer, Renderer): 11 | pass 12 | 13 | 14 | class JSONParser(_JSONParser, Parser): 15 | def parse( 16 | self, 17 | stream: IO[Any], 18 | media_type: str | None = None, 19 | parser_context: Mapping[str, Any] | None = None, 20 | ) -> dict[str, Any]: 21 | return super().parse( 22 | stream, 23 | media_type=media_type, 24 | parser_context=parser_context, 25 | ) 26 | 27 | 28 | __all__ = ["JSONRenderer", "JSONParser"] 29 | -------------------------------------------------------------------------------- /src/logpipe/formats/msgpack.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Mapping 2 | from typing import IO, Any 3 | 4 | from rest_framework import parsers, renderers 5 | 6 | from ..abc import Parser, Renderer 7 | 8 | _import_error: ImportError 9 | try: 10 | import msgpack 11 | except ImportError as e: 12 | msgpack = None # type: ignore[assignment] 13 | _import_error = e 14 | 15 | 16 | class MsgPackRenderer(renderers.BaseRenderer, Renderer): 17 | media_type = "application/msgpack" 18 | format = "msgpack" 19 | charset = None 20 | render_style = "binary" 21 | 22 | def render( 23 | self, 24 | data: dict[str, Any], 25 | media_type: str | None = None, 26 | renderer_context: Mapping[str, Any] | None = None, 27 | ) -> bytes: 28 | if not msgpack: 29 | raise _import_error 30 | return msgpack.packb(data, use_bin_type=True) 31 | 32 | 33 | class MsgPackParser(parsers.BaseParser, Parser): 34 | media_type = "application/msgpack" 35 | 36 | def parse( 37 | self, 38 | stream: IO[Any], 39 | media_type: str | None = None, 40 | parser_context: Mapping[str, Any] | None = None, 41 | ) -> dict[str, Any]: 42 | if not msgpack: 43 | raise _import_error 44 | return msgpack.unpack(stream, use_list=False) 45 | 46 | 47 | __all__ = ["MsgPackRenderer", "MsgPackParser"] 48 | -------------------------------------------------------------------------------- /src/logpipe/formats/pickle.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Mapping 2 | from typing import IO, Any 3 | import pickle 4 | 5 | from rest_framework import parsers, renderers 6 | 7 | from ..abc import Parser, Renderer 8 | 9 | 10 | class PickleRenderer(renderers.BaseRenderer, Renderer): 11 | media_type = "application/python-pickle" 12 | format = "pickle" 13 | charset = None 14 | render_style = "binary" 15 | 16 | def render( 17 | self, 18 | data: dict[str, Any], 19 | media_type: str | None = None, 20 | renderer_context: Mapping[str, Any] | None = None, 21 | ) -> bytes: 22 | return pickle.dumps(data) 23 | 24 | 25 | class PickleParser(parsers.BaseParser, Parser): 26 | media_type = "application/python-pickle" 27 | 28 | def parse( 29 | self, 30 | stream: IO[Any], 31 | media_type: str | None = None, 32 | parser_context: Mapping[str, Any] | None = None, 33 | ) -> dict[str, Any]: 34 | return pickle.load(stream) 35 | 36 | 37 | __all__ = ["PickleRenderer", "PickleParser"] 38 | -------------------------------------------------------------------------------- /src/logpipe/locale/es/LC_MESSAGES/django.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thelabnyc/django-logpipe/4f4f017e400d9ccbd2148d3e735e0269df692748/src/logpipe/locale/es/LC_MESSAGES/django.mo -------------------------------------------------------------------------------- /src/logpipe/locale/es/LC_MESSAGES/django.po: -------------------------------------------------------------------------------- 1 | # SOME DESCRIPTIVE TITLE. 2 | # Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER 3 | # This file is distributed under the same license as the PACKAGE package. 4 | # FIRST AUTHOR , YEAR. 5 | # 6 | msgid "" 7 | msgstr "" 8 | "Project-Id-Version: \n" 9 | "Report-Msgid-Bugs-To: \n" 10 | "POT-Creation-Date: 2024-02-13 22:05+0000\n" 11 | "PO-Revision-Date: 2019-06-24 17:55-0400\n" 12 | "Last-Translator: Craig Weber \n" 13 | "Language-Team: \n" 14 | "Language: es\n" 15 | "MIME-Version: 1.0\n" 16 | "Content-Type: text/plain; charset=UTF-8\n" 17 | "Content-Transfer-Encoding: 8bit\n" 18 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 19 | "X-Generator: Poedit 2.2.3\n" 20 | 21 | #. Translators: Backend Library Name 22 | #: logpipe/apps.py:9 23 | msgid "LogPipe" 24 | msgstr "LogPipe" 25 | 26 | #: logpipe/models.py:10 27 | msgid "Kafka Topic Name" 28 | msgstr "Nombre del tema de Kafka" 29 | 30 | #. Translators: Interal Model Field Help Text 31 | #: logpipe/models.py:12 32 | msgid "The Kafka topic name" 33 | msgstr "El nombre del tema de Kafka" 34 | 35 | #: logpipe/models.py:18 36 | msgid "Kafka Partition ID" 37 | msgstr "ID de partición Kafka" 38 | 39 | #. Translators: Interal Model Field Help Text 40 | #: logpipe/models.py:20 41 | msgid "The Kafka partition identifier" 42 | msgstr "El identificador de partición Kafka" 43 | 44 | #. Translators: Internal Model Name (singular) 45 | #: logpipe/models.py:25 logpipe/models.py:33 46 | msgid "Kafka Offset" 47 | msgstr "Kafka Offset" 48 | 49 | #. Translators: Interal Model Field Help Text 50 | #: logpipe/models.py:27 51 | msgid "The current offset in the Kafka partition" 52 | msgstr "El desplazamiento actual en la partición Kafka" 53 | 54 | #. Translators: Internal Model Name (plural) 55 | #: logpipe/models.py:35 56 | msgid "Kafka Offsets" 57 | msgstr "Kafka Offsets" 58 | 59 | #. Translators: AWS Region Name 60 | #: logpipe/models.py:46 61 | msgid "US East (N. Virginia)" 62 | msgstr "EE.UU. Este (Norte de Virginia)" 63 | 64 | #. Translators: AWS Region Name 65 | #: logpipe/models.py:48 66 | msgid "US East (Ohio)" 67 | msgstr "EE.UU. Este (Ohio)" 68 | 69 | #. Translators: AWS Region Name 70 | #: logpipe/models.py:50 71 | msgid "US West (N. California)" 72 | msgstr "EE.UU. Oeste (Norte de California)" 73 | 74 | #. Translators: AWS Region Name 75 | #: logpipe/models.py:52 76 | msgid "US West (Oregon)" 77 | msgstr "EE.UU. Oeste (Oregón)" 78 | 79 | #. Translators: AWS Region Name 80 | #: logpipe/models.py:54 81 | msgid "Asia Pacific (Mumbai)" 82 | msgstr "Asia Pacífico (Mumbai)" 83 | 84 | #. Translators: AWS Region Name 85 | #: logpipe/models.py:56 86 | msgid "Asia Pacific (Seoul)" 87 | msgstr "Asia Pacífico (Seúl)" 88 | 89 | #. Translators: AWS Region Name 90 | #: logpipe/models.py:58 91 | msgid "Asia Pacific (Singapore)" 92 | msgstr "Asia Pacífico (Singapur)" 93 | 94 | #. Translators: AWS Region Name 95 | #: logpipe/models.py:60 96 | msgid "Asia Pacific (Sydney)" 97 | msgstr "Asia Pacífico (Sídney)" 98 | 99 | #. Translators: AWS Region Name 100 | #: logpipe/models.py:62 101 | msgid "Asia Pacific (Tokyo)" 102 | msgstr "Asia Pacífico (Tokio)" 103 | 104 | #. Translators: AWS Region Name 105 | #: logpipe/models.py:64 106 | msgid "Canada (Central)" 107 | msgstr "Canadá (Central)" 108 | 109 | #. Translators: AWS Region Name 110 | #: logpipe/models.py:66 111 | msgid "EU (Frankfurt)" 112 | msgstr "UE (Fráncfort)" 113 | 114 | #. Translators: AWS Region Name 115 | #: logpipe/models.py:68 116 | msgid "EU (Ireland)" 117 | msgstr "UE (Irlanda)" 118 | 119 | #. Translators: AWS Region Name 120 | #: logpipe/models.py:70 121 | msgid "EU (London)" 122 | msgstr "UE (Londres)" 123 | 124 | #. Translators: AWS Region Name 125 | #: logpipe/models.py:72 126 | msgid "EU (Paris)" 127 | msgstr "UE (París)" 128 | 129 | #. Translators: AWS Region Name 130 | #: logpipe/models.py:74 131 | msgid "South America (São Paulo)" 132 | msgstr "América del Sur (São Paulo)" 133 | 134 | #. Translators: AWS Region Name 135 | #: logpipe/models.py:76 136 | msgid "China (Beijing)" 137 | msgstr "China (Beijing)" 138 | 139 | #. Translators: AWS Region Name 140 | #: logpipe/models.py:78 141 | msgid "AWS GovCloud (US)" 142 | msgstr "AWS GovCloud (US-East)" 143 | 144 | #: logpipe/models.py:83 145 | msgid "AWS Region" 146 | msgstr "Regiones AWS" 147 | 148 | #. Translators: Interal Model Field Help Text 149 | #: logpipe/models.py:85 150 | msgid "The Kinesis stream region name" 151 | msgstr "El nombre de la región del arroyo Kinesis" 152 | 153 | #: logpipe/models.py:93 154 | msgid "Kinesis Stream Name" 155 | msgstr "Nombre de Kinesis Stream" 156 | 157 | #. Translators: Interal Model Field Help Text 158 | #: logpipe/models.py:95 159 | msgid "The Kinesis stream name" 160 | msgstr "El nombre de la secuencia de Kinesis" 161 | 162 | #: logpipe/models.py:101 163 | msgid "Kinesis Shard ID" 164 | msgstr "ID de fragmento de kinesis" 165 | 166 | #. Translators: Interal Model Field Help Text 167 | #: logpipe/models.py:103 168 | msgid "The Kinesis shard ID" 169 | msgstr "La identificación del fragmento de Kinesis" 170 | 171 | #: logpipe/models.py:109 172 | msgid "Kinesis Sequence Number" 173 | msgstr "Número de secuencia de kinesis" 174 | 175 | #. Translators: Interal Model Field Help Text 176 | #: logpipe/models.py:111 177 | msgid "The current sequence number in the Kinesis shard" 178 | msgstr "El número de secuencia actual en el fragmento de Kinesis" 179 | 180 | #. Translators: Internal Model Name (singular) 181 | #: logpipe/models.py:117 182 | msgid "AWS Kinesis Offset" 183 | msgstr "AWS Kinesis Offset" 184 | 185 | #. Translators: Internal Model Name (plural) 186 | #: logpipe/models.py:119 187 | msgid "AWS Kinesis Offsets" 188 | msgstr "AWS Kinesis Offsets" 189 | -------------------------------------------------------------------------------- /src/logpipe/management/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thelabnyc/django-logpipe/4f4f017e400d9ccbd2148d3e735e0269df692748/src/logpipe/management/__init__.py -------------------------------------------------------------------------------- /src/logpipe/management/commands/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thelabnyc/django-logpipe/4f4f017e400d9ccbd2148d3e735e0269df692748/src/logpipe/management/commands/__init__.py -------------------------------------------------------------------------------- /src/logpipe/management/commands/run_kafka_consumer.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | from django.core.management.base import BaseCommand 4 | 5 | from logpipe.consumer import MultiConsumer 6 | from logpipe.registry import list_registered_consumers 7 | 8 | 9 | class Command(BaseCommand): 10 | help = "Fetch and apply Kafka messages" 11 | 12 | def handle(self, *args: Any, **options: Any) -> None: 13 | consumers = list_registered_consumers() 14 | for c in consumers: 15 | print("Found consumer: %s" % c) 16 | print("Running indefinite consumer...") 17 | multi = MultiConsumer(*consumers) 18 | multi.run() 19 | -------------------------------------------------------------------------------- /src/logpipe/migrations/0001_initial.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 1.9.6 on 2016-07-04 14:15 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | initial = True 8 | 9 | dependencies = [] 10 | 11 | operations = [ 12 | migrations.CreateModel( 13 | name="Offset", 14 | fields=[ 15 | ( 16 | "id", 17 | models.AutoField( 18 | auto_created=True, 19 | primary_key=True, 20 | serialize=False, 21 | verbose_name="ID", 22 | ), 23 | ), 24 | ("topic", models.CharField(max_length=200)), 25 | ("partition", models.PositiveIntegerField()), 26 | ("offset", models.PositiveIntegerField(default=0)), 27 | ], 28 | options={ 29 | "ordering": ("topic", "partition", "offset"), 30 | }, 31 | ), 32 | migrations.AlterUniqueTogether( 33 | name="offset", 34 | unique_together={("topic", "partition")}, 35 | ), 36 | ] 37 | -------------------------------------------------------------------------------- /src/logpipe/migrations/0002_auto_20170427_1451.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 1.11 on 2017-04-27 14:51 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | dependencies = [ 8 | ("logpipe", "0001_initial"), 9 | ] 10 | 11 | operations = [migrations.RenameModel("Offset", "KafkaOffset")] 12 | -------------------------------------------------------------------------------- /src/logpipe/migrations/0003_auto_20170427_1703.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 1.11 on 2017-04-27 17:03 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | dependencies = [ 8 | ("logpipe", "0002_auto_20170427_1451"), 9 | ] 10 | 11 | operations = [ 12 | migrations.CreateModel( 13 | name="KinesisOffset", 14 | fields=[ 15 | ( 16 | "id", 17 | models.AutoField( 18 | auto_created=True, 19 | primary_key=True, 20 | serialize=False, 21 | verbose_name="ID", 22 | ), 23 | ), 24 | ( 25 | "stream", 26 | models.CharField( 27 | help_text="The Kinesis stream name", max_length=200 28 | ), 29 | ), 30 | ( 31 | "shard", 32 | models.CharField(help_text="The Kinesis shard ID", max_length=20), 33 | ), 34 | ( 35 | "sequence_number", 36 | models.CharField( 37 | help_text="The current sequence number in the Kinesis shard", 38 | max_length=20, 39 | ), 40 | ), 41 | ], 42 | options={ 43 | "ordering": ("stream", "shard", "sequence_number"), 44 | }, 45 | ), 46 | migrations.AlterField( 47 | model_name="kafkaoffset", 48 | name="offset", 49 | field=models.PositiveIntegerField( 50 | default=0, help_text="The current offset in the Kafka partition" 51 | ), 52 | ), 53 | migrations.AlterField( 54 | model_name="kafkaoffset", 55 | name="partition", 56 | field=models.PositiveIntegerField( 57 | help_text="The Kafka partition identifier" 58 | ), 59 | ), 60 | migrations.AlterField( 61 | model_name="kafkaoffset", 62 | name="topic", 63 | field=models.CharField(help_text="The Kafka topic name", max_length=200), 64 | ), 65 | migrations.AlterUniqueTogether( 66 | name="kinesisoffset", 67 | unique_together={("stream", "shard")}, 68 | ), 69 | ] 70 | -------------------------------------------------------------------------------- /src/logpipe/migrations/0004_auto_20170502_1403.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 1.11 on 2017-05-02 14:03 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | dependencies = [ 8 | ("logpipe", "0003_auto_20170427_1703"), 9 | ] 10 | 11 | operations = [ 12 | migrations.AlterField( 13 | model_name="kinesisoffset", 14 | name="sequence_number", 15 | field=models.CharField( 16 | help_text="The current sequence number in the Kinesis shard", 17 | max_length=200, 18 | ), 19 | ), 20 | migrations.AlterField( 21 | model_name="kinesisoffset", 22 | name="shard", 23 | field=models.CharField(help_text="The Kinesis shard ID", max_length=200), 24 | ), 25 | ] 26 | -------------------------------------------------------------------------------- /src/logpipe/migrations/0005_auto_20180917_1348.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 1.11 on 2018-09-17 13:48 2 | 3 | from django.db import migrations, models 4 | 5 | import logpipe.settings 6 | 7 | 8 | class Migration(migrations.Migration): 9 | dependencies = [ 10 | ("logpipe", "0004_auto_20170502_1403"), 11 | ] 12 | 13 | operations = [ 14 | migrations.AddField( 15 | model_name="kinesisoffset", 16 | name="region", 17 | field=models.CharField( 18 | choices=[ 19 | ("us-east-1", "US East (N. Virginia)"), 20 | ("us-east-2", "US East (Ohio)"), 21 | ("us-west-1", "US West (N. California)"), 22 | ("us-west-2", "US West (Oregon)"), 23 | ("ap-south-1", "Asia Pacific (Mumbai)"), 24 | ("ap-northeast-2", "Asia Pacific (Seoul)"), 25 | ("ap-southeast-1", "Asia Pacific (Singapore)"), 26 | ("ap-southeast-2", "Asia Pacific (Sydney)"), 27 | ("ap-northeast-1", "Asia Pacific (Tokyo)"), 28 | ("ca-central-1", "Canada (Central)"), 29 | ("eu-central-1", "EU (Frankfurt)"), 30 | ("eu-west-1", "EU (Ireland)"), 31 | ("eu-west-2", "EU (London)"), 32 | ("eu-west-3", "EU (Paris)"), 33 | ("sa-east-1", "South America (São Paulo)"), 34 | ("cn-north-1", "China (Beijing)"), 35 | ("us-gov-west-1", "AWS GovCloud (US)"), 36 | ], 37 | default=logpipe.settings.get_aws_region, 38 | help_text="The Kinesis stream region name", 39 | max_length=20, 40 | ), 41 | ), 42 | migrations.AlterUniqueTogether( 43 | name="kinesisoffset", 44 | unique_together={("region", "stream", "shard")}, 45 | ), 46 | ] 47 | -------------------------------------------------------------------------------- /src/logpipe/migrations/0006_alter_kafkaoffset_options_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 4.1.7 on 2023-03-15 11:45 2 | 3 | from django.db import migrations, models 4 | 5 | import logpipe.settings 6 | 7 | 8 | class Migration(migrations.Migration): 9 | dependencies = [ 10 | ("logpipe", "0005_auto_20180917_1348"), 11 | ] 12 | 13 | operations = [ 14 | migrations.AlterModelOptions( 15 | name="kafkaoffset", 16 | options={ 17 | "ordering": ("topic", "partition", "offset"), 18 | "verbose_name": "Kafka Offset", 19 | "verbose_name_plural": "Kafka Offsets", 20 | }, 21 | ), 22 | migrations.AlterModelOptions( 23 | name="kinesisoffset", 24 | options={ 25 | "ordering": ("stream", "shard", "sequence_number"), 26 | "verbose_name": "AWS Kinesis Offset", 27 | "verbose_name_plural": "AWS Kinesis Offsets", 28 | }, 29 | ), 30 | migrations.AlterField( 31 | model_name="kafkaoffset", 32 | name="offset", 33 | field=models.PositiveIntegerField( 34 | default=0, 35 | help_text="The current offset in the Kafka partition", 36 | verbose_name="Kafka Offset", 37 | ), 38 | ), 39 | migrations.AlterField( 40 | model_name="kafkaoffset", 41 | name="partition", 42 | field=models.PositiveIntegerField( 43 | help_text="The Kafka partition identifier", 44 | verbose_name="Kafka Partition ID", 45 | ), 46 | ), 47 | migrations.AlterField( 48 | model_name="kafkaoffset", 49 | name="topic", 50 | field=models.CharField( 51 | help_text="The Kafka topic name", 52 | max_length=200, 53 | verbose_name="Kafka Topic Name", 54 | ), 55 | ), 56 | migrations.AlterField( 57 | model_name="kinesisoffset", 58 | name="region", 59 | field=models.CharField( 60 | choices=[ 61 | ("us-east-1", "US East (N. Virginia)"), 62 | ("us-east-2", "US East (Ohio)"), 63 | ("us-west-1", "US West (N. California)"), 64 | ("us-west-2", "US West (Oregon)"), 65 | ("ap-south-1", "Asia Pacific (Mumbai)"), 66 | ("ap-northeast-2", "Asia Pacific (Seoul)"), 67 | ("ap-southeast-1", "Asia Pacific (Singapore)"), 68 | ("ap-southeast-2", "Asia Pacific (Sydney)"), 69 | ("ap-northeast-1", "Asia Pacific (Tokyo)"), 70 | ("ca-central-1", "Canada (Central)"), 71 | ("eu-central-1", "EU (Frankfurt)"), 72 | ("eu-west-1", "EU (Ireland)"), 73 | ("eu-west-2", "EU (London)"), 74 | ("eu-west-3", "EU (Paris)"), 75 | ("sa-east-1", "South America (São Paulo)"), 76 | ("cn-north-1", "China (Beijing)"), 77 | ("us-gov-west-1", "AWS GovCloud (US)"), 78 | ], 79 | default=logpipe.settings.get_aws_region, 80 | help_text="The Kinesis stream region name", 81 | max_length=20, 82 | verbose_name="AWS Region", 83 | ), 84 | ), 85 | migrations.AlterField( 86 | model_name="kinesisoffset", 87 | name="sequence_number", 88 | field=models.CharField( 89 | help_text="The current sequence number in the Kinesis shard", 90 | max_length=200, 91 | verbose_name="Kinesis Sequence Number", 92 | ), 93 | ), 94 | migrations.AlterField( 95 | model_name="kinesisoffset", 96 | name="shard", 97 | field=models.CharField( 98 | help_text="The Kinesis shard ID", 99 | max_length=200, 100 | verbose_name="Kinesis Shard ID", 101 | ), 102 | ), 103 | migrations.AlterField( 104 | model_name="kinesisoffset", 105 | name="stream", 106 | field=models.CharField( 107 | help_text="The Kinesis stream name", 108 | max_length=200, 109 | verbose_name="Kinesis Stream Name", 110 | ), 111 | ), 112 | ] 113 | -------------------------------------------------------------------------------- /src/logpipe/migrations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thelabnyc/django-logpipe/4f4f017e400d9ccbd2148d3e735e0269df692748/src/logpipe/migrations/__init__.py -------------------------------------------------------------------------------- /src/logpipe/models.py: -------------------------------------------------------------------------------- 1 | from django.db import models 2 | from django.utils.translation import gettext_lazy as _ 3 | 4 | from . import settings 5 | 6 | 7 | class KafkaOffset(models.Model): 8 | # Translators: Internal Model Field Name 9 | topic = models.CharField( 10 | _("Kafka Topic Name"), 11 | # Translators: Interal Model Field Help Text 12 | help_text=_("The Kafka topic name"), 13 | max_length=200, 14 | ) 15 | 16 | # Translators: Internal Model Field Name 17 | partition = models.PositiveIntegerField( 18 | _("Kafka Partition ID"), 19 | # Translators: Interal Model Field Help Text 20 | help_text=_("The Kafka partition identifier"), 21 | ) 22 | 23 | # Translators: Internal Model Field Name 24 | offset = models.PositiveIntegerField( 25 | _("Kafka Offset"), 26 | # Translators: Interal Model Field Help Text 27 | help_text=_("The current offset in the Kafka partition"), 28 | default=0, 29 | ) 30 | 31 | class Meta: 32 | # Translators: Internal Model Name (singular) 33 | verbose_name = _("Kafka Offset") 34 | # Translators: Internal Model Name (plural) 35 | verbose_name_plural = _("Kafka Offsets") 36 | unique_together = ("topic", "partition") 37 | ordering = ("topic", "partition", "offset") 38 | 39 | def __str__(self) -> str: 40 | return f'topic="{self.topic}", partition="{self.partition}", offset="{self.offset}"' 41 | 42 | 43 | class KinesisOffset(models.Model): 44 | _region_choices = ( 45 | # Translators: AWS Region Name 46 | ("us-east-1", _("US East (N. Virginia)")), 47 | # Translators: AWS Region Name 48 | ("us-east-2", _("US East (Ohio)")), 49 | # Translators: AWS Region Name 50 | ("us-west-1", _("US West (N. California)")), 51 | # Translators: AWS Region Name 52 | ("us-west-2", _("US West (Oregon)")), 53 | # Translators: AWS Region Name 54 | ("ap-south-1", _("Asia Pacific (Mumbai)")), 55 | # Translators: AWS Region Name 56 | ("ap-northeast-2", _("Asia Pacific (Seoul)")), 57 | # Translators: AWS Region Name 58 | ("ap-southeast-1", _("Asia Pacific (Singapore)")), 59 | # Translators: AWS Region Name 60 | ("ap-southeast-2", _("Asia Pacific (Sydney)")), 61 | # Translators: AWS Region Name 62 | ("ap-northeast-1", _("Asia Pacific (Tokyo)")), 63 | # Translators: AWS Region Name 64 | ("ca-central-1", _("Canada (Central)")), 65 | # Translators: AWS Region Name 66 | ("eu-central-1", _("EU (Frankfurt)")), 67 | # Translators: AWS Region Name 68 | ("eu-west-1", _("EU (Ireland)")), 69 | # Translators: AWS Region Name 70 | ("eu-west-2", _("EU (London)")), 71 | # Translators: AWS Region Name 72 | ("eu-west-3", _("EU (Paris)")), 73 | # Translators: AWS Region Name 74 | ("sa-east-1", _("South America (São Paulo)")), 75 | # Translators: AWS Region Name 76 | ("cn-north-1", _("China (Beijing)")), 77 | # Translators: AWS Region Name 78 | ("us-gov-west-1", _("AWS GovCloud (US)")), 79 | ) 80 | 81 | # Translators: Internal Model Field Name 82 | region = models.CharField( 83 | _("AWS Region"), 84 | # Translators: Interal Model Field Help Text 85 | help_text=_("The Kinesis stream region name"), 86 | max_length=20, 87 | default=settings.get_aws_region, 88 | choices=_region_choices, 89 | ) 90 | 91 | # Translators: Internal Model Field Name 92 | stream = models.CharField( 93 | _("Kinesis Stream Name"), 94 | # Translators: Interal Model Field Help Text 95 | help_text=_("The Kinesis stream name"), 96 | max_length=200, 97 | ) 98 | 99 | # Translators: Internal Model Field Name 100 | shard = models.CharField( 101 | _("Kinesis Shard ID"), 102 | # Translators: Interal Model Field Help Text 103 | help_text=_("The Kinesis shard ID"), 104 | max_length=200, 105 | ) 106 | 107 | # Translators: Internal Model Field Name 108 | sequence_number = models.CharField( 109 | _("Kinesis Sequence Number"), 110 | # Translators: Interal Model Field Help Text 111 | help_text=_("The current sequence number in the Kinesis shard"), 112 | max_length=200, 113 | ) 114 | 115 | class Meta: 116 | # Translators: Internal Model Name (singular) 117 | verbose_name = _("AWS Kinesis Offset") 118 | # Translators: Internal Model Name (plural) 119 | verbose_name_plural = _("AWS Kinesis Offsets") 120 | unique_together = ("region", "stream", "shard") 121 | ordering = ("stream", "shard", "sequence_number") 122 | 123 | def __str__(self) -> str: 124 | return f'region="{self.region}", stream="{self.stream}", shard="{self.shard}", sequence_number="{self.sequence_number}"' 125 | -------------------------------------------------------------------------------- /src/logpipe/producer.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Generic, TypeVar 2 | import logging 3 | 4 | from django.db import models 5 | from pydantic import RootModel 6 | 7 | from . import settings 8 | from .abc import DRFSerializer, ProducerBackend, PydanticModel, RecordMetadata 9 | from .backend import get_producer_backend 10 | from .constants import FORMAT_JSON 11 | from .format import render 12 | 13 | logger = logging.getLogger(__name__) 14 | 15 | 16 | _DRFSerType = TypeVar("_DRFSerType", bound=type[DRFSerializer[Any]]) 17 | 18 | 19 | class BaseProducer: 20 | client: ProducerBackend 21 | topic_name: str 22 | producer_id: str 23 | 24 | def __init__( 25 | self, 26 | topic_name: str, 27 | producer_id: str | None = None, 28 | ): 29 | self.client = get_producer_backend() 30 | self.topic_name = topic_name 31 | self.producer_id = ( 32 | producer_id if producer_id else settings.get("PRODUCER_ID", "") 33 | ) 34 | 35 | def _inner_send( 36 | self, 37 | message_type: str, 38 | version: int, 39 | key: str, 40 | data: Any, 41 | ) -> RecordMetadata | None: 42 | # Render everything into a string 43 | renderer = settings.get("DEFAULT_FORMAT", FORMAT_JSON) 44 | body = { 45 | "type": message_type, 46 | "version": version, 47 | "message": data, 48 | } 49 | if self.producer_id: 50 | body["producer"] = self.producer_id 51 | serialized_data = render(renderer, body) 52 | 53 | # Send the message data into the backend 54 | record_metadata = self.client.send( 55 | self.topic_name, 56 | key=key, 57 | value=serialized_data, 58 | ) 59 | logger.debug( 60 | 'Sent message with type "%s", key "%s" to topic "%s"' 61 | % (message_type, key, self.topic_name) 62 | ) 63 | return record_metadata 64 | 65 | 66 | class DRFProducer(BaseProducer, Generic[_DRFSerType]): 67 | """ 68 | Producer class for sending messages that are serialized using a Django Rest 69 | Framework serializer. 70 | """ 71 | 72 | serializer_class: _DRFSerType 73 | 74 | def __init__( 75 | self, 76 | topic_name: str, 77 | serializer_class: _DRFSerType, 78 | producer_id: str | None = None, 79 | ): 80 | super().__init__(topic_name, producer_id) 81 | self.serializer_class = serializer_class 82 | 83 | def send(self, instance: dict[str, Any] | models.Model) -> RecordMetadata | None: 84 | """ 85 | Serialize the given object using the previously specified serializer, then 86 | write it to the log backend (Kafka or Kinesis). 87 | """ 88 | # Get the message type and version 89 | message_type = self.serializer_class.MESSAGE_TYPE 90 | version = self.serializer_class.VERSION 91 | 92 | # Init the serializer 93 | ser = self.serializer_class(instance=instance) 94 | 95 | # Get the message's partition key 96 | key_field = getattr(self.serializer_class, "KEY_FIELD", None) 97 | key = "" 98 | if key_field: 99 | key = str(ser.data[key_field]) 100 | 101 | # Send 102 | return self._inner_send( 103 | message_type=message_type, 104 | version=version, 105 | key=key, 106 | data=ser.data, 107 | ) 108 | 109 | 110 | # For backwards compatibility 111 | Producer = DRFProducer 112 | 113 | 114 | class PydanticProducer(BaseProducer): 115 | def send(self, instance: PydanticModel) -> RecordMetadata | None: 116 | # Get the message's partition key 117 | key_field = getattr(instance, "KEY_FIELD", None) 118 | key = "" 119 | if key_field: 120 | keyobj = getattr(instance, key_field) 121 | if isinstance(keyobj, RootModel): 122 | keyobj = keyobj.model_dump(mode="json") 123 | key = str(keyobj) 124 | 125 | # Send 126 | return self._inner_send( 127 | message_type=instance.MESSAGE_TYPE, 128 | version=instance.VERSION, 129 | key=key, 130 | data=instance.model_dump(mode="json"), 131 | ) 132 | -------------------------------------------------------------------------------- /src/logpipe/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thelabnyc/django-logpipe/4f4f017e400d9ccbd2148d3e735e0269df692748/src/logpipe/py.typed -------------------------------------------------------------------------------- /src/logpipe/registry.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Callable 2 | import functools 3 | 4 | from .consumer import Consumer 5 | 6 | ConsumerFactory = Callable[[], Consumer] 7 | 8 | _registered_consumers: list[ConsumerFactory] = [] 9 | 10 | 11 | def register_consumer(fn: ConsumerFactory) -> ConsumerFactory: 12 | _registered_consumers.append(fn) 13 | 14 | @functools.wraps(fn) 15 | def wrap() -> Consumer: 16 | return fn() 17 | 18 | return wrap 19 | 20 | 21 | def list_registered_consumers() -> list[Consumer]: 22 | return [build() for build in _registered_consumers] 23 | 24 | 25 | __all__ = ["register_consumer", "list_registered_consumers"] 26 | -------------------------------------------------------------------------------- /src/logpipe/settings.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | import os 3 | 4 | from django.conf import settings 5 | from django.core.exceptions import ImproperlyConfigured 6 | 7 | 8 | def get(key: str, default: Any = None) -> Any: 9 | if default is None and key not in settings.LOGPIPE: 10 | raise ImproperlyConfigured( 11 | 'Please ensure LOGPIPE["%s"] is defined in your settings.py file.' % key 12 | ) 13 | return settings.LOGPIPE.get(key, default) 14 | 15 | 16 | def get_aws_region(_default: str = "us-east-1") -> str: 17 | # Try to use the explicit KINESIS_REGION setting 18 | region = get("KINESIS_REGION", "") 19 | if region: 20 | return region 21 | # Try to import boto3 to get the region name 22 | try: 23 | import boto3 24 | except ImportError: 25 | # Can't import boto3, so fallback to the AWS_DEFAULT_REGION environment variable, then finally, us-east-1 26 | return os.environ.get("AWS_DEFAULT_REGION", _default) 27 | # Use the region for boto3's default session 28 | if boto3.DEFAULT_SESSION is not None: 29 | region = boto3.DEFAULT_SESSION.region_name 30 | if region: 31 | return region 32 | # Finally, make a new session and use it's region 33 | region = boto3.session.Session().region_name 34 | if region: 35 | return region 36 | # Finally, return the default 37 | return _default 38 | -------------------------------------------------------------------------------- /src/logpipe/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thelabnyc/django-logpipe/4f4f017e400d9ccbd2148d3e735e0269df692748/src/logpipe/tests/__init__.py -------------------------------------------------------------------------------- /src/logpipe/tests/common.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import ClassVar 3 | from unittest.mock import MagicMock 4 | 5 | from django.test import TestCase 6 | from pydantic import Field 7 | from rest_framework import serializers 8 | 9 | from ..abc import PydanticModel 10 | 11 | TOPIC_STATES = "us-states" 12 | 13 | 14 | class StateSerializer_DRF(serializers.Serializer): 15 | """Keyed Serializer for sending data about US States""" 16 | 17 | MESSAGE_TYPE = "us-state" 18 | VERSION = 1 19 | KEY_FIELD = "code" 20 | code = serializers.CharField(min_length=2, max_length=2) 21 | name = serializers.CharField() 22 | 23 | 24 | class State_Pydantic(PydanticModel): 25 | MESSAGE_TYPE: ClassVar[str] = "us-state" 26 | VERSION: ClassVar[int] = 1 27 | KEY_FIELD: ClassVar[str] = "code" 28 | 29 | code: str = Field( 30 | ..., 31 | max_length=2, 32 | min_length=2, 33 | ) 34 | name: str = "" 35 | 36 | 37 | @dataclass 38 | class StateModel: 39 | id: int | None = None 40 | code: str = "" 41 | name: str = "" 42 | 43 | 44 | class BaseTest(TestCase): 45 | def __init__(self, *args, **kwargs): 46 | super().__init__(*args, **kwargs) 47 | self.serializers = {} 48 | 49 | def mock_state_serializer_drf(self, save=None): 50 | def make(*args, **kwargs): 51 | ser = StateSerializer_DRF(*args, **kwargs) 52 | ser.save = MagicMock() 53 | if save: 54 | ser.save.side_effect = lambda *args, **kwargs: save( 55 | ser, *args, **kwargs 56 | ) 57 | self.serializers["state"] = ser 58 | return ser 59 | 60 | FakeStateSerializer = MagicMock() 61 | FakeStateSerializer.MESSAGE_TYPE = StateSerializer_DRF.MESSAGE_TYPE 62 | FakeStateSerializer.VERSION = StateSerializer_DRF.VERSION 63 | FakeStateSerializer.side_effect = make 64 | 65 | return FakeStateSerializer 66 | 67 | def mock_state_serializer_pydantic(self, save=None): 68 | class MockState_Pydantic(State_Pydantic): 69 | def save(self): 70 | if save: 71 | save(self) 72 | 73 | return MockState_Pydantic 74 | -------------------------------------------------------------------------------- /src/logpipe/tests/integration/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thelabnyc/django-logpipe/4f4f017e400d9ccbd2148d3e735e0269df692748/src/logpipe/tests/integration/__init__.py -------------------------------------------------------------------------------- /src/logpipe/tests/integration/test_roundtrip.py: -------------------------------------------------------------------------------- 1 | from logpipe import Consumer, Producer 2 | 3 | from ..common import TOPIC_STATES, BaseTest, StateSerializer_DRF 4 | 5 | 6 | class RoundTripTest(BaseTest): 7 | def test_roundtrip_state(self): 8 | def save(ser): 9 | self.assertEqual(ser.validated_data["code"], "NY") 10 | self.assertEqual(ser.validated_data["name"], "New York") 11 | 12 | FakeStateSerializer = self.mock_state_serializer_drf(save) 13 | 14 | producer = Producer(TOPIC_STATES, StateSerializer_DRF) 15 | record = producer.send({"code": "NY", "name": "New York"}) 16 | self.assertEqual(record.topic, "us-states") 17 | self.assertEqual(record.partition, 0) 18 | self.assertTrue(record.offset >= 0) 19 | 20 | # producer.client.flush() 21 | 22 | consumer = Consumer(TOPIC_STATES, consumer_timeout_ms=1000) 23 | consumer.register(FakeStateSerializer) 24 | consumer.run(iter_limit=1) 25 | 26 | self.assertEqual(FakeStateSerializer.call_count, 1) 27 | self.assertEqual(self.serializers["state"].save.call_count, 1) 28 | -------------------------------------------------------------------------------- /src/logpipe/tests/unit/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thelabnyc/django-logpipe/4f4f017e400d9ccbd2148d3e735e0269df692748/src/logpipe/tests/unit/__init__.py -------------------------------------------------------------------------------- /src/logpipe/tests/unit/kafka/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thelabnyc/django-logpipe/4f4f017e400d9ccbd2148d3e735e0269df692748/src/logpipe/tests/unit/kafka/__init__.py -------------------------------------------------------------------------------- /src/logpipe/tests/unit/kafka/test_consumer.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import MagicMock, patch 2 | import binascii 3 | 4 | from django.test import override_settings 5 | from kafka.consumer.fetcher import ConsumerRecord 6 | from kafka.structs import TopicPartition 7 | from rest_framework.exceptions import ValidationError 8 | 9 | from logpipe import Consumer 10 | from logpipe.exceptions import InvalidMessageError, UnknownMessageVersionError 11 | from logpipe.tests.common import TOPIC_STATES, BaseTest 12 | 13 | LOGPIPE = { 14 | "KAFKA_BOOTSTRAP_SERVERS": ["kafka:9092"], 15 | } 16 | 17 | 18 | class ConsumerTest(BaseTest): 19 | @override_settings(LOGPIPE=LOGPIPE) 20 | @patch("kafka.KafkaConsumer") 21 | def test_normal_consume(self, KafkaConsumer): 22 | # Make a fake consumer to generate a message 23 | fake_kafka_consumer = self.mock_consumer( 24 | KafkaConsumer, 25 | value=b'json:{"message":{"code":"NY","name":"New York"},"version":1,"type":"us-state"}', 26 | max_calls=100, 27 | ) 28 | 29 | # Test the values sent to our serializer match the message 30 | def save(ser): 31 | self.assertEqual(ser.validated_data["code"], "NY") 32 | self.assertEqual(ser.validated_data["name"], "New York") 33 | 34 | FakeStateSerializer = self.mock_state_serializer_drf(save) 35 | 36 | # Consume a message 37 | consumer = Consumer(TOPIC_STATES, consumer_timeout_ms=500) 38 | consumer.register(FakeStateSerializer) 39 | consumer.run(iter_limit=1) 40 | 41 | # Test the expected mocks where called 42 | KafkaConsumer.assert_called_once_with( 43 | auto_offset_reset="earliest", 44 | bootstrap_servers=["kafka:9092"], 45 | consumer_timeout_ms=500, 46 | enable_auto_commit=False, 47 | ) 48 | fake_kafka_consumer.partitions_for_topic.assert_called_once_with(TOPIC_STATES) 49 | fake_kafka_consumer.assign.assert_called_once_with( 50 | [ 51 | TopicPartition(partition=0, topic=TOPIC_STATES), 52 | TopicPartition(partition=1, topic=TOPIC_STATES), 53 | ] 54 | ) 55 | 56 | self.assertEqual(KafkaConsumer.call_count, 1) 57 | self.assertEqual(FakeStateSerializer.call_count, 1) 58 | self.assertEqual(fake_kafka_consumer.__next__.call_count, 1) 59 | self.assertEqual(self.serializers["state"].save.call_count, 1) 60 | 61 | consumer.run(iter_limit=1) 62 | 63 | self.assertEqual(KafkaConsumer.call_count, 1) 64 | self.assertEqual(FakeStateSerializer.call_count, 2) 65 | self.assertEqual(fake_kafka_consumer.__next__.call_count, 2) 66 | self.assertEqual(self.serializers["state"].save.call_count, 1) 67 | 68 | @patch("kafka.KafkaConsumer") 69 | def test_missing_version_throws(self, KafkaConsumer): 70 | self.mock_consumer( 71 | KafkaConsumer, value=b'json:{"message":{"code":"NY","name":"New York"}}' 72 | ) 73 | FakeStateSerializer = self.mock_state_serializer_drf() 74 | consumer = Consumer(TOPIC_STATES, consumer_timeout_ms=500, throw_errors=True) 75 | with self.assertRaises(InvalidMessageError): 76 | consumer.run(iter_limit=1) 77 | self.assertEqual(FakeStateSerializer.call_count, 0) 78 | 79 | @patch("kafka.KafkaConsumer") 80 | def test_missing_version_ignored(self, KafkaConsumer): 81 | self.mock_consumer( 82 | KafkaConsumer, value=b'json:{"message":{"code":"NY","name":"New York"}}' 83 | ) 84 | FakeStateSerializer = self.mock_state_serializer_drf() 85 | consumer = Consumer(TOPIC_STATES, consumer_timeout_ms=500) 86 | consumer.run(iter_limit=1) 87 | self.assertEqual(FakeStateSerializer.call_count, 0) 88 | 89 | @patch("kafka.KafkaConsumer") 90 | def test_missing_message_throws(self, KafkaConsumer): 91 | self.mock_consumer(KafkaConsumer, value=b'json:{"version":1}') 92 | FakeStateSerializer = self.mock_state_serializer_drf() 93 | consumer = Consumer(TOPIC_STATES, consumer_timeout_ms=500, throw_errors=True) 94 | with self.assertRaises(InvalidMessageError): 95 | consumer.run(iter_limit=1) 96 | self.assertEqual(FakeStateSerializer.call_count, 0) 97 | 98 | @patch("kafka.KafkaConsumer") 99 | def test_missing_message_ignored(self, KafkaConsumer): 100 | self.mock_consumer(KafkaConsumer, value=b'json:{"version":1}') 101 | FakeStateSerializer = self.mock_state_serializer_drf() 102 | consumer = Consumer(TOPIC_STATES, consumer_timeout_ms=500) 103 | consumer.run(iter_limit=1) 104 | self.assertEqual(FakeStateSerializer.call_count, 0) 105 | 106 | @patch("kafka.KafkaConsumer") 107 | def test_unknown_version_throws(self, KafkaConsumer): 108 | self.mock_consumer( 109 | KafkaConsumer, 110 | value=b'json:{"message":{"code":"NY","name":"New York"},"version":2,"type":"us-state"}', 111 | ) 112 | FakeStateSerializer = self.mock_state_serializer_drf() 113 | 114 | consumer = Consumer(TOPIC_STATES, consumer_timeout_ms=500, throw_errors=True) 115 | consumer.register(FakeStateSerializer) 116 | with self.assertRaises(UnknownMessageVersionError): 117 | consumer.run(iter_limit=1) 118 | self.assertEqual(FakeStateSerializer.call_count, 0) 119 | 120 | @patch("kafka.KafkaConsumer") 121 | def test_unknown_version_ignored(self, KafkaConsumer): 122 | self.mock_consumer( 123 | KafkaConsumer, 124 | value=b'json:{"message":{"code":"NY","name":"New York"},"version":2,"type":"us-state"}', 125 | ) 126 | FakeStateSerializer = self.mock_state_serializer_drf() 127 | 128 | consumer = Consumer(TOPIC_STATES, consumer_timeout_ms=500) 129 | consumer.register(FakeStateSerializer) 130 | consumer.run(iter_limit=1) 131 | self.assertEqual(FakeStateSerializer.call_count, 0) 132 | 133 | @patch("kafka.KafkaConsumer") 134 | def test_invalid_message_throws(self, KafkaConsumer): 135 | self.mock_consumer( 136 | KafkaConsumer, 137 | value=b'json:{"message":{"code":"NYC","name":"New York"},"version":1,"type":"us-state"}', 138 | ) 139 | FakeStateSerializer = self.mock_state_serializer_drf() 140 | 141 | consumer = Consumer(TOPIC_STATES, consumer_timeout_ms=500, throw_errors=True) 142 | consumer.register(FakeStateSerializer) 143 | with self.assertRaises(ValidationError): 144 | consumer.run(iter_limit=1) 145 | self.assertEqual(FakeStateSerializer.call_count, 1) 146 | self.assertEqual(self.serializers["state"].save.call_count, 0) 147 | 148 | @patch("kafka.KafkaConsumer") 149 | def test_invalid_message_ignored(self, KafkaConsumer): 150 | self.mock_consumer( 151 | KafkaConsumer, 152 | value=b'json:{"message":{"code":"NYC","name":"New York"},"version":1,"type":"us-state"}', 153 | ) 154 | FakeStateSerializer = self.mock_state_serializer_drf() 155 | consumer = Consumer(TOPIC_STATES, consumer_timeout_ms=500) 156 | consumer.register(FakeStateSerializer) 157 | consumer.run(iter_limit=1) 158 | self.assertEqual(FakeStateSerializer.call_count, 1) 159 | self.assertEqual(self.serializers["state"].save.call_count, 0) 160 | 161 | @patch("kafka.KafkaConsumer") 162 | def test_ignored_message_type_is_ignored(self, KafkaConsumer): 163 | self.mock_consumer( 164 | KafkaConsumer, 165 | value=b'json:{"message":{"code":"NY","name":"New York"},"version":1,"type":"us-state"}', 166 | ) 167 | FakeStateSerializer = self.mock_state_serializer_drf() 168 | consumer = Consumer(TOPIC_STATES, consumer_timeout_ms=500) 169 | consumer.add_ignored_message_type("us-state") 170 | consumer.register(FakeStateSerializer) 171 | consumer.run(iter_limit=1) 172 | # Even though message is valid, the serializer should never get called since message type is explicitly ignored. 173 | self.assertEqual(FakeStateSerializer.call_count, 0) 174 | self.assertTrue("state" not in self.serializers) 175 | 176 | def mock_consumer(self, KafkaConsumer, value, max_calls=1): 177 | # Mock a consumer object 178 | fake_kafka_consumer = MagicMock() 179 | 180 | # Should return a record when used as an iterator. Set up the mock to 181 | # return the record up to the limit of max_calls. Then raises StopIteration 182 | record = ConsumerRecord( 183 | topic=TOPIC_STATES, 184 | partition=0, 185 | leader_epoch=-1, 186 | offset=42, 187 | timestamp=1467649216540, 188 | timestamp_type=0, 189 | key=b"NY", 190 | value=value, 191 | headers=None, 192 | checksum=binascii.crc32(value), 193 | serialized_key_size=b"NY", 194 | serialized_value_size=value, 195 | serialized_header_size=0, 196 | ) 197 | 198 | meta = {"i": 0} 199 | 200 | def _iter(*args, **kwargs): 201 | if meta["i"] >= max_calls: 202 | raise StopIteration() 203 | meta["i"] += 1 204 | return record 205 | 206 | fake_kafka_consumer.__next__.side_effect = _iter 207 | 208 | # Return some partitions 209 | fake_kafka_consumer.partitions_for_topic.return_value = {0, 1} 210 | 211 | # Make class instantiation return our mock 212 | KafkaConsumer.return_value = fake_kafka_consumer 213 | 214 | return fake_kafka_consumer 215 | -------------------------------------------------------------------------------- /src/logpipe/tests/unit/kafka/test_producer.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import MagicMock, patch 2 | import binascii 3 | 4 | from django.test import TestCase, override_settings 5 | from kafka.consumer.fetcher import ConsumerRecord 6 | 7 | from logpipe import Producer 8 | from logpipe.tests.common import TOPIC_STATES, StateModel, StateSerializer_DRF 9 | 10 | LOGPIPE = { 11 | "KAFKA_BOOTSTRAP_SERVERS": ["kafka:9092"], 12 | "KAFKA_SEND_TIMEOUT": 5, 13 | "KAFKA_MAX_SEND_RETRIES": 5, 14 | } 15 | 16 | 17 | class DRFProducerTest(TestCase): 18 | @override_settings(LOGPIPE=LOGPIPE) 19 | @patch("kafka.KafkaProducer") 20 | def test_normal_send(self, KafkaProducer): 21 | future = MagicMock() 22 | future.get.return_value = self._get_record_metadata() 23 | 24 | def test_send_call(topic, key, value): 25 | self.assertEqual(topic, "us-states") 26 | self.assertEqual(key, b"NY") 27 | self.assertIn(b"json:", value) 28 | self.assertIn(b'"message":{"', value) 29 | self.assertIn(b'"code":"NY"', value) 30 | self.assertIn(b'"name":"New York"', value) 31 | self.assertIn(b'"version":1', value) 32 | return future 33 | 34 | client = MagicMock() 35 | client.send.side_effect = test_send_call 36 | KafkaProducer.return_value = client 37 | 38 | producer = Producer(TOPIC_STATES, StateSerializer_DRF) 39 | ret = producer.send({"code": "NY", "name": "New York"}) 40 | self.assertEqual(ret.topic, TOPIC_STATES) 41 | self.assertEqual(ret.partition, 0) 42 | self.assertEqual(ret.offset, 42) 43 | self.assertEqual(KafkaProducer.call_count, 1) 44 | self.assertEqual(client.send.call_count, 1) 45 | self.assertEqual(future.get.call_count, 1) 46 | KafkaProducer.assert_called_with(bootstrap_servers=["kafka:9092"], retries=5) 47 | future.get.assert_called_with(timeout=5) 48 | 49 | @override_settings(LOGPIPE=LOGPIPE) 50 | @patch("kafka.KafkaProducer") 51 | def test_object_send(self, KafkaProducer): 52 | future = MagicMock() 53 | future.get.return_value = self._get_record_metadata() 54 | 55 | def test_send_call(topic, key, value): 56 | self.assertEqual(topic, "us-states") 57 | self.assertEqual(key, b"NY") 58 | self.assertIn(b"json:", value) 59 | self.assertIn(b'"message":{"', value) 60 | self.assertIn(b'"code":"NY"', value) 61 | self.assertIn(b'"name":"New York"', value) 62 | self.assertIn(b'"version":1', value) 63 | return future 64 | 65 | client = MagicMock() 66 | client.send.side_effect = test_send_call 67 | KafkaProducer.return_value = client 68 | 69 | producer = Producer(TOPIC_STATES, StateSerializer_DRF) 70 | obj = StateModel( 71 | code="NY", 72 | name="New York", 73 | ) 74 | ret = producer.send(obj) 75 | self.assertEqual(ret.topic, TOPIC_STATES) 76 | self.assertEqual(ret.partition, 0) 77 | self.assertEqual(ret.offset, 42) 78 | self.assertEqual(KafkaProducer.call_count, 1) 79 | self.assertEqual(client.send.call_count, 1) 80 | self.assertEqual(future.get.call_count, 1) 81 | KafkaProducer.assert_called_with(bootstrap_servers=["kafka:9092"], retries=5) 82 | future.get.assert_called_with(timeout=5) 83 | 84 | def _get_record_metadata(self): 85 | return ConsumerRecord( 86 | topic=TOPIC_STATES, 87 | partition=0, 88 | leader_epoch=-1, 89 | offset=42, 90 | timestamp=1467649216540, 91 | timestamp_type=0, 92 | key=b"NY", 93 | value=b"foo", 94 | headers=None, 95 | checksum=binascii.crc32(b"foo"), 96 | serialized_key_size=b"NY", 97 | serialized_value_size=b"foo", 98 | serialized_header_size=0, 99 | ) 100 | -------------------------------------------------------------------------------- /src/logpipe/tests/unit/kinesis/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thelabnyc/django-logpipe/4f4f017e400d9ccbd2148d3e735e0269df692748/src/logpipe/tests/unit/kinesis/__init__.py -------------------------------------------------------------------------------- /src/logpipe/tests/unit/kinesis/test_consumer.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import MagicMock 2 | 3 | from django.test import override_settings 4 | from moto import mock_aws 5 | from rest_framework.exceptions import ValidationError 6 | import boto3 7 | 8 | from logpipe import Consumer 9 | from logpipe.exceptions import InvalidMessageError, UnknownMessageVersionError 10 | from logpipe.tests.common import TOPIC_STATES, BaseTest 11 | 12 | LOGPIPE = { 13 | "OFFSET_BACKEND": "logpipe.backend.kinesis.ModelOffsetStore", 14 | "PRODUCER_BACKEND": "logpipe.backend.kinesis.Producer", 15 | "CONSUMER_BACKEND": "logpipe.backend.kinesis.Consumer", 16 | } 17 | 18 | 19 | class ConsumerTest(BaseTest): 20 | @override_settings(LOGPIPE=LOGPIPE) 21 | @mock_aws 22 | def test_normal_consume(self): 23 | self.make_stream_with_record( 24 | "NY", 25 | b'json:{"message":{"code":"NY","name":"New York"},"version":1,"type":"us-state"}', 26 | ) 27 | 28 | # Test the values sent to our serializer match the message 29 | def save(ser): 30 | self.assertEqual(ser.validated_data["code"], "NY") 31 | self.assertEqual(ser.validated_data["name"], "New York") 32 | 33 | FakeStateSerializer = self.mock_state_serializer_drf(save) 34 | 35 | # Consume a message 36 | consumer = Consumer(TOPIC_STATES) 37 | consumer.register(FakeStateSerializer) 38 | 39 | consumer.run(iter_limit=10) 40 | self.assertEqual(self.serializers["state"].save.call_count, 1) 41 | consumer.run(iter_limit=10) 42 | self.assertEqual(self.serializers["state"].save.call_count, 1) 43 | 44 | @override_settings(LOGPIPE=LOGPIPE) 45 | @mock_aws 46 | def test_multi_shard_consume(self): 47 | # Send a bunch of messages to a bunch of shards 48 | key = 1 49 | value = b'json:{"message":{"code":"NY","name":"New York"},"version":1,"type":"us-state"}' 50 | client = self.make_stream_with_record(str(key), value, shard_count=20) 51 | for i in range(100): 52 | key += 1 53 | client.put_record( 54 | StreamName=TOPIC_STATES, Data=value, PartitionKey=str(key) 55 | ) 56 | 57 | # Test the values sent to our serializer match the message 58 | test = {"i": 0} 59 | 60 | def save(ser): 61 | self.assertEqual(ser.validated_data["code"], "NY") 62 | self.assertEqual(ser.validated_data["name"], "New York") 63 | test["i"] += 1 64 | 65 | FakeStateSerializer = self.mock_state_serializer_drf(save) 66 | 67 | # Consume messages. Log should have 101 messages in it now. 68 | consumer = Consumer(TOPIC_STATES) 69 | consumer.register(FakeStateSerializer) 70 | consumer.run(iter_limit=2000) 71 | self.assertEqual(FakeStateSerializer.call_count, 101) 72 | self.assertEqual(test["i"], 101) 73 | 74 | @override_settings(LOGPIPE=LOGPIPE) 75 | @mock_aws 76 | def test_missing_version_throws(self): 77 | self.make_stream_with_record( 78 | "NY", b'json:{"message":{"code":"NY","name":"New York"}}' 79 | ) 80 | FakeStateSerializer = self.mock_state_serializer_drf() 81 | consumer = Consumer(TOPIC_STATES, consumer_timeout_ms=500, throw_errors=True) 82 | with self.assertRaises(InvalidMessageError): 83 | consumer.run(iter_limit=1) 84 | self.assertEqual(FakeStateSerializer.call_count, 0) 85 | 86 | @override_settings(LOGPIPE=LOGPIPE) 87 | @mock_aws 88 | def test_missing_version_ignored(self): 89 | self.make_stream_with_record( 90 | "NY", b'json:{"message":{"code":"NY","name":"New York"}}' 91 | ) 92 | FakeStateSerializer = self.mock_state_serializer_drf() 93 | consumer = Consumer(TOPIC_STATES, consumer_timeout_ms=500) 94 | consumer.run(iter_limit=1) 95 | self.assertEqual(FakeStateSerializer.call_count, 0) 96 | 97 | @override_settings(LOGPIPE=LOGPIPE) 98 | @mock_aws 99 | def test_missing_message_throws(self): 100 | self.make_stream_with_record("NY", b'json:{"version":1}') 101 | FakeStateSerializer = self.mock_state_serializer_drf() 102 | consumer = Consumer(TOPIC_STATES, consumer_timeout_ms=500, throw_errors=True) 103 | with self.assertRaises(InvalidMessageError): 104 | consumer.run(iter_limit=1) 105 | self.assertEqual(FakeStateSerializer.call_count, 0) 106 | 107 | @override_settings(LOGPIPE=LOGPIPE) 108 | @mock_aws 109 | def test_missing_message_ignored(self): 110 | self.make_stream_with_record("NY", b'json:{"version":1}') 111 | FakeStateSerializer = self.mock_state_serializer_drf() 112 | consumer = Consumer(TOPIC_STATES, consumer_timeout_ms=500) 113 | consumer.run(iter_limit=1) 114 | self.assertEqual(FakeStateSerializer.call_count, 0) 115 | 116 | @override_settings(LOGPIPE=LOGPIPE) 117 | @mock_aws 118 | def test_unknown_version_throws(self): 119 | self.make_stream_with_record( 120 | "NY", 121 | b'json:{"message":{"code":"NY","name":"New York"},"version":2,"type":"us-state"}', 122 | ) 123 | FakeStateSerializer = self.mock_state_serializer_drf() 124 | consumer = Consumer(TOPIC_STATES, consumer_timeout_ms=500, throw_errors=True) 125 | consumer.register(FakeStateSerializer) 126 | with self.assertRaises(UnknownMessageVersionError): 127 | consumer.run(iter_limit=1) 128 | self.assertEqual(FakeStateSerializer.call_count, 0) 129 | 130 | @override_settings(LOGPIPE=LOGPIPE) 131 | @mock_aws 132 | def test_unknown_version_ignored(self): 133 | self.make_stream_with_record( 134 | "NY", 135 | b'json:{"message":{"code":"NY","name":"New York"},"version":2,"type":"us-state"}', 136 | ) 137 | FakeStateSerializer = self.mock_state_serializer_drf() 138 | consumer = Consumer(TOPIC_STATES, consumer_timeout_ms=500) 139 | consumer.register(FakeStateSerializer) 140 | consumer.run(iter_limit=1) 141 | self.assertEqual(FakeStateSerializer.call_count, 0) 142 | 143 | @override_settings(LOGPIPE=LOGPIPE) 144 | @mock_aws 145 | def test_invalid_message_throws(self): 146 | self.make_stream_with_record( 147 | "NY", 148 | b'json:{"message":{"code":"NYC","name":"New York"},"version":1,"type":"us-state"}', 149 | ) 150 | FakeStateSerializer = self.mock_state_serializer_drf() 151 | consumer = Consumer(TOPIC_STATES, consumer_timeout_ms=500, throw_errors=True) 152 | consumer.register(FakeStateSerializer) 153 | with self.assertRaises(ValidationError): 154 | consumer.run(iter_limit=1) 155 | self.assertEqual(FakeStateSerializer.call_count, 1) 156 | self.assertEqual(self.serializers["state"].save.call_count, 0) 157 | 158 | @override_settings(LOGPIPE=LOGPIPE) 159 | @mock_aws 160 | def test_invalid_message_throws_pydantic(self): 161 | self.make_stream_with_record( 162 | "NY", 163 | b'json:{"message":{"code":"NYC","name":"New York"},"version":1,"type":"us-state"}', 164 | ) 165 | save = MagicMock() 166 | FakeStateSerializer = self.mock_state_serializer_pydantic(save) 167 | consumer = Consumer(TOPIC_STATES, consumer_timeout_ms=500, throw_errors=True) 168 | consumer.register(FakeStateSerializer) 169 | with self.assertRaises(ValidationError): 170 | consumer.run(iter_limit=1) 171 | self.assertEqual(save.call_count, 0) 172 | 173 | @override_settings(LOGPIPE=LOGPIPE) 174 | @mock_aws 175 | def test_invalid_message_ignored(self): 176 | self.make_stream_with_record( 177 | "NY", 178 | b'json:{"message":{"code":"NYC","name":"New York"},"version":1,"type":"us-state"}', 179 | ) 180 | FakeStateSerializer = self.mock_state_serializer_drf() 181 | consumer = Consumer(TOPIC_STATES, consumer_timeout_ms=500) 182 | consumer.register(FakeStateSerializer) 183 | consumer.run(iter_limit=1) 184 | self.assertEqual(FakeStateSerializer.call_count, 1) 185 | self.assertEqual(self.serializers["state"].save.call_count, 0) 186 | 187 | @override_settings(LOGPIPE=LOGPIPE) 188 | @mock_aws 189 | def test_invalid_message_ignored_pydantic(self): 190 | self.make_stream_with_record( 191 | "NY", 192 | b'json:{"message":{"code":"NYC","name":"New York"},"version":1,"type":"us-state"}', 193 | ) 194 | save = MagicMock() 195 | FakeStateSerializer = self.mock_state_serializer_pydantic(save) 196 | consumer = Consumer(TOPIC_STATES, consumer_timeout_ms=500) 197 | consumer.register(FakeStateSerializer) 198 | consumer.run(iter_limit=1) 199 | self.assertEqual(save.call_count, 0) 200 | 201 | @override_settings(LOGPIPE=LOGPIPE) 202 | @mock_aws 203 | def test_ignored_message_type_is_ignored(self): 204 | self.make_stream_with_record( 205 | "NY", 206 | b'json:{"message":{"code":"NY","name":"New York"},"version":1,"type":"us-state"}', 207 | ) 208 | FakeStateSerializer = self.mock_state_serializer_drf() 209 | consumer = Consumer(TOPIC_STATES, consumer_timeout_ms=500) 210 | consumer.add_ignored_message_type("us-state") 211 | consumer.register(FakeStateSerializer) 212 | consumer.run(iter_limit=1) 213 | # Even though message is valid, the serializer should never get called since message type is explicitly ignored. 214 | self.assertEqual(FakeStateSerializer.call_count, 0) 215 | self.assertTrue("state" not in self.serializers) 216 | 217 | def make_stream_with_record(self, key, value, shard_count=1): 218 | client = boto3.client("kinesis", region_name="us-east-1") 219 | client.create_stream(StreamName=TOPIC_STATES, ShardCount=shard_count) 220 | client.put_record(StreamName=TOPIC_STATES, Data=value, PartitionKey=key) 221 | return client 222 | -------------------------------------------------------------------------------- /src/logpipe/tests/unit/kinesis/test_producer.py: -------------------------------------------------------------------------------- 1 | from django.test import TestCase, override_settings 2 | from moto import mock_aws 3 | import boto3 4 | 5 | from logpipe import Producer 6 | from logpipe.tests.common import TOPIC_STATES, StateModel, StateSerializer_DRF 7 | 8 | LOGPIPE = { 9 | "OFFSET_BACKEND": "logpipe.backend.kinesis.ModelOffsetStore", 10 | "PRODUCER_BACKEND": "logpipe.backend.kinesis.Producer", 11 | "CONSUMER_BACKEND": "logpipe.backend.kinesis.Consumer", 12 | } 13 | 14 | 15 | class DRFProducerTest(TestCase): 16 | @override_settings(LOGPIPE=LOGPIPE) 17 | @mock_aws 18 | def test_normal_send(self): 19 | client = boto3.client("kinesis", region_name="us-east-1") 20 | client.create_stream(StreamName=TOPIC_STATES, ShardCount=1) 21 | 22 | producer = Producer(TOPIC_STATES, StateSerializer_DRF) 23 | 24 | ret = producer.send({"code": "NY", "name": "New York"}) 25 | self.assertEqual(ret.topic, TOPIC_STATES) 26 | self.assertEqual(ret.partition, "shardId-000000000000") 27 | self.assertEqual(ret.offset, "1") 28 | 29 | ret = producer.send({"code": "PA", "name": "Pennsylvania"}) 30 | self.assertEqual(ret.topic, TOPIC_STATES) 31 | self.assertEqual(ret.partition, "shardId-000000000000") 32 | self.assertEqual(ret.offset, "2") 33 | 34 | shard_iter = client.get_shard_iterator( 35 | StreamName=TOPIC_STATES, 36 | ShardId="shardId-000000000000", 37 | ShardIteratorType="TRIM_HORIZON", 38 | )["ShardIterator"] 39 | response = client.get_records(ShardIterator=shard_iter, Limit=100) 40 | 41 | self.assertEqual(response["Records"][0]["SequenceNumber"], "1") 42 | self.assertJSONEqual( 43 | response["Records"][0]["Data"].decode().replace("json:", ""), 44 | { 45 | "type": "us-state", 46 | "version": 1, 47 | "message": { 48 | "code": "NY", 49 | "name": "New York", 50 | }, 51 | }, 52 | ) 53 | self.assertEqual(response["Records"][0]["PartitionKey"], "NY") 54 | 55 | self.assertEqual(response["Records"][1]["SequenceNumber"], "2") 56 | self.assertJSONEqual( 57 | response["Records"][1]["Data"].decode().replace("json:", ""), 58 | { 59 | "type": "us-state", 60 | "version": 1, 61 | "message": { 62 | "code": "PA", 63 | "name": "Pennsylvania", 64 | }, 65 | }, 66 | ) 67 | self.assertEqual(response["Records"][1]["PartitionKey"], "PA") 68 | 69 | @override_settings(LOGPIPE=LOGPIPE) 70 | @mock_aws 71 | def test_object_send(self): 72 | client = boto3.client("kinesis", region_name="us-east-1") 73 | client.create_stream(StreamName=TOPIC_STATES, ShardCount=1) 74 | 75 | producer = Producer(TOPIC_STATES, StateSerializer_DRF) 76 | 77 | obj = StateModel( 78 | code="NY", 79 | name="New York", 80 | ) 81 | ret = producer.send(obj) 82 | self.assertEqual(ret.topic, TOPIC_STATES) 83 | self.assertEqual(ret.partition, "shardId-000000000000") 84 | self.assertEqual(ret.offset, "1") 85 | 86 | obj = StateModel( 87 | code="PA", 88 | name="Pennsylvania", 89 | ) 90 | ret = producer.send(obj) 91 | self.assertEqual(ret.topic, TOPIC_STATES) 92 | self.assertEqual(ret.partition, "shardId-000000000000") 93 | self.assertEqual(ret.offset, "2") 94 | 95 | shard_iter = client.get_shard_iterator( 96 | StreamName=TOPIC_STATES, 97 | ShardId="shardId-000000000000", 98 | ShardIteratorType="TRIM_HORIZON", 99 | )["ShardIterator"] 100 | response = client.get_records(ShardIterator=shard_iter, Limit=100) 101 | 102 | self.assertEqual(response["Records"][0]["SequenceNumber"], "1") 103 | self.assertJSONEqual( 104 | response["Records"][0]["Data"].decode().replace("json:", ""), 105 | { 106 | "type": "us-state", 107 | "version": 1, 108 | "message": { 109 | "code": "NY", 110 | "name": "New York", 111 | }, 112 | }, 113 | ) 114 | self.assertEqual(response["Records"][0]["PartitionKey"], "NY") 115 | 116 | self.assertEqual(response["Records"][1]["SequenceNumber"], "2") 117 | self.assertJSONEqual( 118 | response["Records"][1]["Data"].decode().replace("json:", ""), 119 | { 120 | "type": "us-state", 121 | "version": 1, 122 | "message": { 123 | "code": "PA", 124 | "name": "Pennsylvania", 125 | }, 126 | }, 127 | ) 128 | self.assertEqual(response["Records"][1]["PartitionKey"], "PA") 129 | -------------------------------------------------------------------------------- /src/logpipe/tests/unit/test_consumer.py: -------------------------------------------------------------------------------- 1 | import collections 2 | 3 | from django.test import override_settings 4 | 5 | from logpipe import Consumer, DRFProducer, MultiConsumer, PydanticProducer 6 | from logpipe.backend.dummy import reset_topics 7 | from logpipe.tests.common import ( 8 | TOPIC_STATES, 9 | BaseTest, 10 | State_Pydantic, 11 | StateModel, 12 | StateSerializer_DRF, 13 | ) 14 | 15 | LOGPIPE = { 16 | "OFFSET_BACKEND": "logpipe.backend.dummy.ModelOffsetStore", 17 | "PRODUCER_BACKEND": "logpipe.backend.dummy.Producer", 18 | "CONSUMER_BACKEND": "logpipe.backend.dummy.Consumer", 19 | } 20 | 21 | 22 | class DRFConsumerTest(BaseTest): 23 | def setUp(self): 24 | super().setUp() 25 | reset_topics() 26 | 27 | @override_settings(LOGPIPE=LOGPIPE) 28 | def test_normal_consume(self): 29 | # Send a message to the dummy producer 30 | producer = DRFProducer(TOPIC_STATES, StateSerializer_DRF) 31 | ny = StateModel( 32 | id=5, 33 | code="NY", 34 | name="New York", 35 | ) 36 | producer.send(ny) 37 | 38 | # Setup the consumer serializer 39 | test = collections.Counter() 40 | 41 | def save(ser): 42 | self.assertEqual(ser.validated_data["code"], "NY") 43 | self.assertEqual(ser.validated_data["name"], "New York") 44 | test["i"] += 1 45 | 46 | FakeStateSerializer = self.mock_state_serializer_drf(save) 47 | 48 | # Retrieve the message from the dummy consumer. 49 | consumer = Consumer(TOPIC_STATES) 50 | consumer.register(FakeStateSerializer) 51 | 52 | consumer.run(iter_limit=10) 53 | self.assertEqual(self.serializers["state"].save.call_count, 1) 54 | 55 | # Not called again 56 | consumer.run(iter_limit=10) 57 | self.assertEqual(self.serializers["state"].save.call_count, 1) 58 | self.assertEqual(test["i"], 1) 59 | 60 | 61 | class PydanticConsumerTest(BaseTest): 62 | def setUp(self): 63 | super().setUp() 64 | reset_topics() 65 | 66 | @override_settings(LOGPIPE=LOGPIPE) 67 | def test_normal_consume(self): 68 | # Send a message to the dummy producer 69 | producer = PydanticProducer(TOPIC_STATES) 70 | ny = State_Pydantic( 71 | id=5, 72 | code="NY", 73 | name="New York", 74 | ) 75 | producer.send(ny) 76 | 77 | # Setup the consumer serializer 78 | test = collections.Counter() 79 | 80 | def save(_self): 81 | self.assertEqual(_self._instance, None) 82 | self.assertEqual(_self.code, "NY") 83 | self.assertEqual(_self.name, "New York") 84 | test["i"] += 1 85 | 86 | FakeStateSerializer = self.mock_state_serializer_pydantic(save) 87 | 88 | # Retrieve the message from the dummy consumer. 89 | consumer = Consumer(TOPIC_STATES) 90 | consumer.register(FakeStateSerializer) 91 | 92 | # Save called once. 93 | consumer.run(iter_limit=10) 94 | self.assertEqual(test["i"], 1) 95 | 96 | # Not called again 97 | consumer.run(iter_limit=10) 98 | self.assertEqual(test["i"], 1) 99 | 100 | 101 | class MultiConsumerTest(BaseTest): 102 | def setUp(self): 103 | super().setUp() 104 | reset_topics() 105 | 106 | @override_settings(LOGPIPE=LOGPIPE) 107 | def test_normal_consume(self): 108 | # Send a message to the dummy producer 109 | producer = PydanticProducer(TOPIC_STATES) 110 | ny = State_Pydantic( 111 | id=5, 112 | code="NY", 113 | name="New York", 114 | ) 115 | for i in range(5): 116 | producer.send(ny) 117 | 118 | # Setup the consumer serializer 119 | test = collections.Counter() 120 | 121 | def save(_self): 122 | self.assertEqual(_self._instance, None) 123 | self.assertEqual(_self.code, "NY") 124 | self.assertEqual(_self.name, "New York") 125 | test["i"] += 1 126 | 127 | FakeStateSerializer = self.mock_state_serializer_pydantic(save) 128 | 129 | # Retrieve the message from the dummy consumer. 130 | inner_consumer = Consumer(TOPIC_STATES) 131 | inner_consumer.register(FakeStateSerializer) 132 | consumer = MultiConsumer(inner_consumer, inner_consumer) 133 | 134 | # Save called once. 135 | consumer.run(iter_limit=10) 136 | self.assertEqual(test["i"], 5) 137 | -------------------------------------------------------------------------------- /src/logpipe/tests/unit/test_format.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | from django.test import TestCase, override_settings 4 | 5 | from logpipe.constants import FORMAT_PICKLE 6 | from logpipe.exceptions import UnknownFormatError 7 | from logpipe.formats.pickle import PickleParser, PickleRenderer 8 | import logpipe.format 9 | 10 | 11 | class JSONFormatTest(TestCase): 12 | def test_render(self): 13 | msg = logpipe.format.render( 14 | "json", 15 | { 16 | "foo": "bar", 17 | }, 18 | ) 19 | self.assertEqual(msg, b'json:{"foo":"bar"}') 20 | 21 | def test_parse(self): 22 | data = logpipe.format.parse(b'json:{"foo":"bar"}') 23 | self.assertEqual( 24 | data, 25 | { 26 | "foo": "bar", 27 | }, 28 | ) 29 | 30 | 31 | class MsgPackFormatTest(TestCase): 32 | def test_render(self): 33 | msg = logpipe.format.render("msgpack", {"foo": "bar"}) 34 | self.assertEqual(msg, b"msgpack:\x81\xa3foo\xa3bar") 35 | 36 | def test_parse(self): 37 | data = logpipe.format.parse(b"msgpack:\x81\xa3foo\xa3bar") 38 | self.assertEqual( 39 | data, 40 | { 41 | "foo": "bar", 42 | }, 43 | ) 44 | 45 | 46 | class PickleFormatTest(TestCase): 47 | @override_settings(LOGPIPE={"BOOTSTRAP_SERVERS": ["kafka:9092"]}) 48 | def test_default(self): 49 | with self.assertRaises(UnknownFormatError): 50 | logpipe.format.render("pickle", {}) 51 | 52 | def test_render(self): 53 | logpipe.format.register(FORMAT_PICKLE, PickleRenderer(), PickleParser()) 54 | msg = logpipe.format.render("pickle", {"foo": "bar"}) 55 | self.assertTrue(msg.startswith(b"pickle:")) 56 | self.assertEqual(pickle.loads(msg.replace(b"pickle:", b"")), {"foo": "bar"}) 57 | logpipe.format.unregister(FORMAT_PICKLE) 58 | 59 | def test_parse(self): 60 | logpipe.format.register(FORMAT_PICKLE, PickleRenderer(), PickleParser()) 61 | data = logpipe.format.parse( 62 | b"pickle:\x80\x03}q\x00X\x03\x00\x00\x00fooq\x01X\x03\x00\x00\x00barq\x02s." 63 | ) 64 | self.assertEqual( 65 | data, 66 | { 67 | "foo": "bar", 68 | }, 69 | ) 70 | logpipe.format.unregister(FORMAT_PICKLE) 71 | 72 | 73 | class UnknownFormatTest(TestCase): 74 | def test_render(self): 75 | with self.assertRaises(UnknownFormatError): 76 | logpipe.format.render("xml", {}) 77 | 78 | def test_parse(self): 79 | with self.assertRaises(UnknownFormatError): 80 | logpipe.format.parse(b"xml:bar") 81 | -------------------------------------------------------------------------------- /src/logpipe/tests/unit/test_producer.py: -------------------------------------------------------------------------------- 1 | from unittest import mock 2 | 3 | from django.test import TestCase 4 | from pydantic import computed_field 5 | from rest_framework import serializers 6 | 7 | from logpipe import DRFProducer, PydanticProducer 8 | from logpipe.tests.common import ( 9 | TOPIC_STATES, 10 | State_Pydantic, 11 | StateModel, 12 | StateSerializer_DRF, 13 | ) 14 | 15 | 16 | class CustomStateSerializer_DRF(StateSerializer_DRF): 17 | my_ser_method_field = serializers.SerializerMethodField() 18 | 19 | def get_my_ser_method_field(self, obj): 20 | return f"value-{obj.code}" 21 | 22 | 23 | class CustomState_Pydantic(State_Pydantic): 24 | @computed_field 25 | def my_ser_method_field(self) -> str: 26 | return f"value-{self.code}" 27 | 28 | 29 | class DRFProducerTest(TestCase): 30 | def test_send_serializer_method_field(self): 31 | fake_client = mock.MagicMock() 32 | fake_client.send = mock.MagicMock() 33 | 34 | def check_args(topic, key, value): 35 | self.assertEqual(topic, TOPIC_STATES) 36 | self.assertEqual(key, "NY") 37 | self.assertJSONEqual( 38 | value.decode().replace("json:", ""), 39 | { 40 | "type": "us-state", 41 | "version": 1, 42 | "message": { 43 | "code": "NY", 44 | "name": "New York", 45 | "my_ser_method_field": "value-NY", 46 | }, 47 | }, 48 | ) 49 | 50 | fake_client.send.side_effect = check_args 51 | 52 | get_producer_backend = mock.MagicMock() 53 | get_producer_backend.return_value = fake_client 54 | 55 | with mock.patch("logpipe.producer.get_producer_backend", get_producer_backend): 56 | producer = DRFProducer(TOPIC_STATES, CustomStateSerializer_DRF) 57 | 58 | ny = StateModel( 59 | id=5, 60 | code="NY", 61 | name="New York", 62 | ) 63 | producer.send(ny) 64 | 65 | self.assertEqual(fake_client.send.call_count, 1) 66 | 67 | def test_send_with_producer_id(self): 68 | fake_client = mock.MagicMock() 69 | fake_client.send = mock.MagicMock() 70 | 71 | def check_args(topic, key, value): 72 | self.assertEqual(topic, TOPIC_STATES) 73 | self.assertEqual(key, "NY") 74 | self.assertJSONEqual( 75 | value.decode().replace("json:", ""), 76 | { 77 | "type": "us-state", 78 | "version": 1, 79 | "producer": "my-producer-app", 80 | "message": { 81 | "code": "NY", 82 | "name": "New York", 83 | "my_ser_method_field": "value-NY", 84 | }, 85 | }, 86 | ) 87 | 88 | fake_client.send.side_effect = check_args 89 | 90 | get_producer_backend = mock.MagicMock() 91 | get_producer_backend.return_value = fake_client 92 | 93 | with mock.patch("logpipe.producer.get_producer_backend", get_producer_backend): 94 | producer = DRFProducer( 95 | TOPIC_STATES, 96 | CustomStateSerializer_DRF, 97 | producer_id="my-producer-app", 98 | ) 99 | 100 | ny = StateModel( 101 | id=5, 102 | code="NY", 103 | name="New York", 104 | ) 105 | producer.send(ny) 106 | 107 | self.assertEqual(fake_client.send.call_count, 1) 108 | 109 | 110 | class PydanticProducerTest(TestCase): 111 | def test_send_serializer_method_field(self): 112 | fake_client = mock.MagicMock() 113 | fake_client.send = mock.MagicMock() 114 | 115 | def check_args(topic, key, value): 116 | self.assertEqual(topic, TOPIC_STATES) 117 | self.assertEqual(key, "NY") 118 | self.assertJSONEqual( 119 | value.decode().replace("json:", ""), 120 | { 121 | "type": "us-state", 122 | "version": 1, 123 | "message": { 124 | "code": "NY", 125 | "name": "New York", 126 | "my_ser_method_field": "value-NY", 127 | }, 128 | }, 129 | ) 130 | 131 | fake_client.send.side_effect = check_args 132 | 133 | get_producer_backend = mock.MagicMock() 134 | get_producer_backend.return_value = fake_client 135 | 136 | with mock.patch("logpipe.producer.get_producer_backend", get_producer_backend): 137 | producer = PydanticProducer(TOPIC_STATES) 138 | 139 | ny = CustomState_Pydantic( 140 | id=5, 141 | code="NY", 142 | name="New York", 143 | ) 144 | producer.send(ny) 145 | 146 | self.assertEqual(fake_client.send.call_count, 1) 147 | 148 | def test_send_with_producer_id(self): 149 | fake_client = mock.MagicMock() 150 | fake_client.send = mock.MagicMock() 151 | 152 | def check_args(topic, key, value): 153 | self.assertEqual(topic, TOPIC_STATES) 154 | self.assertEqual(key, "NY") 155 | self.assertJSONEqual( 156 | value.decode().replace("json:", ""), 157 | { 158 | "type": "us-state", 159 | "version": 1, 160 | "producer": "my-producer-app", 161 | "message": { 162 | "code": "NY", 163 | "name": "New York", 164 | "my_ser_method_field": "value-NY", 165 | }, 166 | }, 167 | ) 168 | 169 | fake_client.send.side_effect = check_args 170 | 171 | get_producer_backend = mock.MagicMock() 172 | get_producer_backend.return_value = fake_client 173 | 174 | with mock.patch("logpipe.producer.get_producer_backend", get_producer_backend): 175 | producer = PydanticProducer( 176 | TOPIC_STATES, 177 | producer_id="my-producer-app", 178 | ) 179 | 180 | ny = CustomState_Pydantic( 181 | id=5, 182 | code="NY", 183 | name="New York", 184 | ) 185 | producer.send(ny) 186 | 187 | self.assertEqual(fake_client.send.call_count, 1) 188 | -------------------------------------------------------------------------------- /src/logpipe/tests/unit/test_settings.py: -------------------------------------------------------------------------------- 1 | from django.core.exceptions import ImproperlyConfigured 2 | from django.test import TestCase, override_settings 3 | 4 | from logpipe import settings 5 | 6 | 7 | class SettingsTest(TestCase): 8 | @override_settings(LOGPIPE={"KAFKA_BOOTSTRAP_SERVERS": ["kafka:9092"]}) 9 | def test_normal_required_key(self): 10 | self.assertEqual(settings.get("KAFKA_BOOTSTRAP_SERVERS"), ["kafka:9092"]) 11 | 12 | @override_settings( 13 | LOGPIPE={"KAFKA_BOOTSTRAP_SERVERS": ["kafka:9092"], "KAFKA_MAX_SEND_RETRIES": 3} 14 | ) 15 | def test_normal_optional_key(self): 16 | self.assertEqual(settings.get("KAFKA_MAX_SEND_RETRIES", 5), 3) 17 | 18 | @override_settings(LOGPIPE={}) 19 | def test_missing_required_key(self): 20 | with self.assertRaises(ImproperlyConfigured): 21 | settings.get("KAFKA_BOOTSTRAP_SERVERS") 22 | 23 | @override_settings(LOGPIPE={"KAFKA_BOOTSTRAP_SERVERS": ["kafka:9092"]}) 24 | def test_missing_optional_key(self): 25 | self.assertEqual(settings.get("KAFKA_MAX_SEND_RETRIES", 5), 5) 26 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | isolated_build = True 3 | toxworkdir={env:TOX_WORK_DIR:.tox} 4 | envlist = py{311,312,313}-django{420,510,520}-drf{316} 5 | 6 | [testenv] 7 | allowlist_externals = 8 | bash 9 | deps = 10 | django420: django>=4.2,<4.3 11 | django510: django>=5.1,<5.2 12 | django520: django>=5.2,<5.3 13 | drf316: djangorestframework>=3.16,<3.17 14 | setenv = 15 | PYTHONWARNINGS = d 16 | # Install the dependencies managed by Poetry, except for Django (which was 17 | # already installed by tox). This prevents Poetry from overwriting the version 18 | # of Django we're trying to test with the version in the lock file. 19 | # Adapted from here: https://github.com/python-poetry/poetry/discussions/4307 20 | commands_pre = 21 | bash -c 'poetry export --all-extras --with dev --without-hashes -f requirements.txt | \ 22 | grep -v "^[dD]jango==" | \ 23 | grep -v "^djangorestframework==" | \ 24 | pip install --no-deps -r /dev/stdin' 25 | commands = 26 | flake8 {toxinidir}/src/logpipe {toxinidir}/sandbox 27 | mypy {toxinidir}/src/logpipe {toxinidir}/sandbox 28 | {envpython} -m coverage run \ 29 | {toxinidir}/manage.py test \ 30 | logpipe.tests.unit \ 31 | -v 2 \ 32 | --buffer 33 | coverage report 34 | --------------------------------------------------------------------------------