├── MANIFEST.in ├── docs ├── changelog.rst ├── manifest.yaml ├── release.rst ├── api.rst ├── installation.rst ├── contributing.rst ├── development.rst ├── Makefile ├── index.rst ├── configuration.rst ├── conf.py └── userguide.rst ├── tests ├── __init__.py ├── data │ └── config │ │ ├── aggregator_config.yaml │ │ └── aggregator_config_min_sample_size.yaml ├── kafkaaggregator │ ├── test_operations.py │ ├── test_fields.py │ ├── cassettes │ │ ├── test_register.yaml │ │ └── test_get_fields.yaml │ ├── test_topics.py │ ├── test_aggregator.py │ └── test_compute.py ├── example │ └── test_example.py └── conftest.py ├── setup.py ├── .github ├── dependabot.yml └── workflows │ └── ci.yaml ├── renovate.json ├── requirements ├── main.in ├── dev.in └── main.txt ├── scripts ├── docker-tag.sh ├── install-base-packages.sh └── install-dependency-packages.sh ├── src └── kafkaaggregator │ ├── __init__.py │ ├── aggregator_config.yaml │ ├── app.py │ ├── models.py │ ├── operations.py │ ├── cli.py │ ├── fields.py │ ├── generator.py │ ├── templates │ └── agent.j2 │ ├── topics.py │ ├── config.py │ ├── aggregator_config.py │ ├── example.py │ └── aggregator.py ├── .pre-commit-config.yaml ├── Makefile ├── CHANGELOG.rst ├── README.rst ├── setup.cfg ├── Dockerfile ├── examples └── join_streams.py ├── .gitignore ├── pyproject.toml └── docker-compose.yaml /MANIFEST.in: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/changelog.rst: -------------------------------------------------------------------------------- 1 | ../CHANGELOG.rst -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | """Tests for the kafka-aggregator package.""" 2 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """Package setup.""" 2 | 3 | from setuptools import setup 4 | 5 | setup(use_scm_version=True) 6 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "github-actions" 4 | directory: "/" 5 | schedule: 6 | interval: "weekly" 7 | 8 | - package-ecosystem: "docker" 9 | directory: "/" 10 | schedule: 11 | interval: "weekly" 12 | -------------------------------------------------------------------------------- /renovate.json: -------------------------------------------------------------------------------- 1 | { 2 | "enabledManagers": [ 3 | "docker-compose", 4 | "kustomize" 5 | ], 6 | "extends": [ 7 | "config:base" 8 | ], 9 | "packageRules": [ 10 | { 11 | "groupName": "test dependencies", 12 | "paths": [ 13 | "docker-compose.yaml" 14 | ] 15 | } 16 | ] 17 | } 18 | -------------------------------------------------------------------------------- /requirements/main.in: -------------------------------------------------------------------------------- 1 | # Editable runtime dependencies (equivalent to install_requires) 2 | # Add direct runtime dependencies here, as well as implicit dependencies 3 | # with constrained versions. 4 | # 5 | # After editing, update requirements/main.txt by running: 6 | # make update-deps 7 | 8 | aiofiles 9 | faust-avro 10 | jinja2 11 | pydantic 12 | -------------------------------------------------------------------------------- /tests/data/config/aggregator_config.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | aggregated_topics: 3 | - name: aggregated_example0 4 | window_aggregation: 5 | window_size_seconds: 1 6 | operations: 7 | - mean 8 | filter: 9 | source_topics: 10 | - name: example0 11 | fields: 12 | - spam 13 | - ham 14 | - eggs 15 | map: 16 | spam: foo 17 | ham: bar -------------------------------------------------------------------------------- /tests/kafkaaggregator/test_operations.py: -------------------------------------------------------------------------------- 1 | """Tests for the fields module.""" 2 | import pytest 3 | 4 | from kafkaaggregator.fields import Field 5 | 6 | 7 | def test_invalid_operation() -> None: 8 | """Test for invalid operation.""" 9 | with pytest.raises(RuntimeError): 10 | Field( 11 | "field", int, source_field_name="source_field", operation="maximum" 12 | ) 13 | -------------------------------------------------------------------------------- /scripts/docker-tag.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Determine the tag for Docker images based on GitHub Actions environment 4 | # variables. 5 | 6 | set -eo pipefail 7 | 8 | if [ -n "$GITHUB_HEAD_REF" ]; then 9 | # For pull requests 10 | echo ${GITHUB_HEAD_REF} | sed -E 's,/,-,g' 11 | else 12 | # For push events 13 | echo ${GITHUB_REF} | sed -E 's,refs/(heads|tags)/,,' | sed -E 's,/,-,g' 14 | fi 15 | -------------------------------------------------------------------------------- /tests/data/config/aggregator_config_min_sample_size.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | aggregated_topics: 3 | - name: aggregated_example0 4 | window_aggregation: 5 | window_size_seconds: 1 6 | min_sample_size: 10 7 | operations: 8 | - mean 9 | filter: 10 | source_topics: 11 | - name: example0 12 | fields: 13 | - spam 14 | - ham 15 | - eggs 16 | map: 17 | spam: foo 18 | ham: bar -------------------------------------------------------------------------------- /tests/kafkaaggregator/test_fields.py: -------------------------------------------------------------------------------- 1 | """Tests for the fields module.""" 2 | 3 | from kafkaaggregator.fields import Field 4 | 5 | 6 | def test_hash() -> None: 7 | """Test if Field is hashable. 8 | 9 | A Field must be hashable to be used with Faust. 10 | """ 11 | assert hash( 12 | Field( 13 | "field", 14 | int, 15 | source_field_name="source_field", 16 | operation="mean", 17 | ) 18 | ) 19 | -------------------------------------------------------------------------------- /docs/manifest.yaml: -------------------------------------------------------------------------------- 1 | # Documentation manifest. 2 | 3 | # List of names of Python modules in this package. 4 | # For each module there is a corresponding module doc subdirectory. 5 | modules: 6 | - "kafkaaggregator" 7 | 8 | # Name of the static content directories (subdirectories of `_static`). 9 | # Static content directories are usually named after the package. 10 | # Most packages do not need a static content directory (leave commented out). 11 | # statics: 12 | # - "_static/kafkaaggregator" 13 | -------------------------------------------------------------------------------- /docs/release.rst: -------------------------------------------------------------------------------- 1 | 2 | ================= 3 | Release procedure 4 | ================= 5 | 6 | A reminder for the maintainers on how to make a new release. Releases are made 7 | by creating a Git tag with a semantic version and pushing to GitHub. 8 | 9 | Make sure all your changes are committed (including an entry in CHANGELOG.rst). 10 | Then run: 11 | 12 | .. code-block:: bash 13 | 14 | $ git tag -s X.Y.Z -m "X.Y.Z" 15 | $ git push 16 | $ git push --tags 17 | 18 | A GitHub action will build the new version and upload to Docker Hub. 19 | -------------------------------------------------------------------------------- /src/kafkaaggregator/__init__.py: -------------------------------------------------------------------------------- 1 | """The kafkaaggregator service.""" 2 | 3 | __all__ = ["__version__", "metadata"] 4 | 5 | import sys 6 | 7 | if sys.version_info < (3, 8): 8 | from importlib_metadata import PackageNotFoundError, version 9 | else: 10 | from importlib.metadata import PackageNotFoundError, version 11 | 12 | 13 | __version__: str 14 | """The application version string of (PEP 440 / SemVer compatible).""" 15 | 16 | try: 17 | __version__ = version(__name__) 18 | except PackageNotFoundError: 19 | # package is not installed 20 | __version__ = "0.0.0" 21 | -------------------------------------------------------------------------------- /requirements/dev.in: -------------------------------------------------------------------------------- 1 | # Editable development dependencies 2 | # Add direct development, test, and documentation dependencies here, as well 3 | # as implicit dev dependencies with constrained versions. 4 | # 5 | # After editing, update requirements/dev.txt by running: 6 | # make update-deps 7 | 8 | -c main.txt 9 | 10 | coverage[toml] 11 | documenteer 12 | flake8 13 | flake8-docstrings 14 | graphviz 15 | lsst_sphinx_bootstrap_theme 16 | mypy 17 | pre-commit 18 | pytest 19 | pytest-asyncio 20 | pytest-vcr 21 | sphinx-automodapi 22 | sphinx-click 23 | sphinx-prompt 24 | types-aiofiles 25 | types-PyYAML 26 | -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- 1 | ############# 2 | API Reference 3 | ############# 4 | 5 | .. automodapi:: kafkaaggregator.app 6 | :no-inheritance-diagram: 7 | 8 | .. automodapi:: kafkaaggregator.fields 9 | 10 | .. automodapi:: kafkaaggregator.topics 11 | 12 | .. automodapi:: kafkaaggregator.models 13 | :no-inheritance-diagram: 14 | 15 | .. automodapi:: kafkaaggregator.aggregator 16 | :no-inheritance-diagram: 17 | 18 | .. automodapi:: kafkaaggregator.generator 19 | :no-inheritance-diagram: 20 | 21 | .. automodapi:: kafkaaggregator.cli 22 | :no-inheritance-diagram: 23 | 24 | .. automodapi:: kafkaaggregator.example 25 | -------------------------------------------------------------------------------- /src/kafkaaggregator/aggregator_config.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | aggregated_topics: 3 | - name: aggregated_example0 4 | window_aggregation: 5 | window_size_seconds: 1 6 | operations: 7 | - mean 8 | filter: 9 | source_topics: 10 | - name: example0 11 | fields: 12 | - spam 13 | - ham 14 | - eggs 15 | map: 16 | spam: foo 17 | ham: bar 18 | - name: aggregated_example1 19 | window_aggregation: 20 | window_size_seconds: 1 21 | operations: 22 | - mean 23 | filter: 24 | source_topics: 25 | - name: example0 26 | fields: 27 | - spam 28 | - ham 29 | - eggs 30 | map: 31 | spam: foo 32 | ham: bar -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v4.4.0 4 | hooks: 5 | - id: trailing-whitespace 6 | - id: check-yaml 7 | - id: check-toml 8 | 9 | - repo: https://github.com/pycqa/isort 10 | rev: 5.10.1 11 | hooks: 12 | - id: isort 13 | additional_dependencies: [toml] 14 | 15 | - repo: https://github.com/psf/black 16 | rev: 22.10.0 17 | hooks: 18 | - id: black 19 | 20 | - repo: https://github.com/PyCQA/flake8 21 | rev: 6.0.0 22 | hooks: 23 | - id: flake8 24 | additional_dependencies: 25 | - flake8-docstrings 26 | args: [--docstring-convention=numpy] 27 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: update-deps 2 | update-deps: 3 | pip install --upgrade pip-tools pip setuptools 4 | pip-compile --upgrade --build-isolation --generate-hashes --output-file requirements/main.txt requirements/main.in 5 | pip-compile --upgrade --build-isolation --generate-hashes --output-file requirements/dev.txt requirements/dev.in requirements/main.in 6 | pip-sync requirements/main.txt requirements/dev.txt 7 | 8 | .PHONY: init 9 | init: 10 | pip install --editable . 11 | pip install --upgrade -r requirements/main.txt -r requirements/dev.txt 12 | rm -rf .tox 13 | pip install --upgrade tox 14 | pre-commit install 15 | 16 | .PHONY: update 17 | update: update-deps init 18 | 19 | .PHONY: run 20 | run: 21 | kafkaaggregator 22 | -------------------------------------------------------------------------------- /tests/example/test_example.py: -------------------------------------------------------------------------------- 1 | """Test example package.""" 2 | 3 | import pytest 4 | from faust.windows import TumblingWindow 5 | 6 | 7 | @pytest.mark.asyncio 8 | async def test_tumbling_window_ranges() -> None: 9 | """Test Faust tumbling window ranges.""" 10 | size = 1.0 11 | window = TumblingWindow(size) 12 | # Test if a timestamp in the extremes of the window falls in the expected 13 | # window 14 | start = 0 15 | end = 0.9 16 | assert window.ranges(start) == [(start, end)] 17 | assert window.ranges(end) == [(start, end)] 18 | assert window.ranges(start + size) == [(start + size, end + size)] 19 | # Test if a timestamp between (start + end) and (start + size) falls 20 | # in the expected window 21 | assert window.ranges(start + end + 0.05) == [(start, end)] 22 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | """Configure a kafka-aggregator test application.""" 2 | 3 | from pathlib import Path 4 | 5 | import faust_avro 6 | import pytest 7 | from yarl import URL 8 | 9 | from kafkaaggregator.app import create_app 10 | 11 | 12 | @pytest.fixture() 13 | def test_app() -> faust_avro.App: 14 | """Creates test app. 15 | 16 | Returns 17 | ------- 18 | app : `Faust_avro.App` 19 | Faust Avro application. 20 | """ 21 | app = create_app() 22 | # Ensure memory store is used for tests 23 | app.finalize() 24 | app.conf.store = URL("memory://") 25 | app.flow_control.resume() 26 | return app 27 | 28 | 29 | @pytest.fixture 30 | def config_dir() -> Path: 31 | """Directory containing test configuration data.""" 32 | return Path(__file__).parent.joinpath("data/config") 33 | -------------------------------------------------------------------------------- /docs/installation.rst: -------------------------------------------------------------------------------- 1 | .. _installation: 2 | 3 | ################## 4 | Installation guide 5 | ################## 6 | 7 | kafka-aggregator is meant to be run on Kubernetes and it assumes that Kafka is also running in the same kubernetes cluster. This section shows how to use a `Helm chart`_ to install kafka-aggregator. The main configuration settings you need to know to get it running are covered in the :ref:`configuration` section. 8 | 9 | 10 | .. _`helm-chart`: 11 | 12 | Helm chart 13 | ========== 14 | 15 | There is a Helm chart for kafka-aggregator available from the `Rubin Observatory charts repository`_. To use the Helm chart, set the appropriate configuration values in the `values.yaml`_ file. 16 | 17 | .. _Rubin Observatory charts repository: https://lsst-sqre.github.io/charts 18 | .. _values.yaml: https://github.com/lsst-sqre/charts/blob/master/charts/kafka-aggregator/values.yaml 19 | 20 | 21 | Argo CD 22 | ======= 23 | 24 | kafka-aggregator aaa is deployed using Argo CD. An example of Argo CD app using the Helm chart can be found `here `_. 25 | -------------------------------------------------------------------------------- /src/kafkaaggregator/app.py: -------------------------------------------------------------------------------- 1 | """Create a kafka-aggregator application.""" 2 | 3 | __all__ = ["create_app"] 4 | 5 | import faust_avro 6 | 7 | from kafkaaggregator.config import Configuration 8 | 9 | 10 | def create_app(config: Configuration = None) -> faust_avro.App: 11 | """Create and configure a Faust based kafka-aggregator application. 12 | 13 | Parameters 14 | ---------- 15 | config : `Configuration`, optional 16 | The configuration to use. If not provided, the default 17 | :ref:`Configuration` will be used. 18 | """ 19 | if not config: 20 | config = Configuration() 21 | 22 | app = faust_avro.App( 23 | id="kafkaaggregator", 24 | broker=config.broker, 25 | registry_url=config.internal_registry_url, 26 | store=config.store, 27 | autodiscover=True, 28 | origin=config.agents_output_dir, 29 | topic_partitions=config.topic_partitions, 30 | ) 31 | 32 | return app 33 | 34 | 35 | # The default configuration can also be imported from this module 36 | config = Configuration() 37 | 38 | app: faust_avro.App = create_app(config) 39 | -------------------------------------------------------------------------------- /scripts/install-base-packages.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script updates packages in the base Docker image that's used by both the 4 | # build and runtime images, and gives us a place to install additional 5 | # system-level packages with apt-get. 6 | # 7 | # Based on the blog post: 8 | # https://pythonspeed.com/articles/system-packages-docker/ 9 | 10 | # Bash "strict mode", to help catch problems and bugs in the shell 11 | # script. Every bash script you write should include this. See 12 | # http://redsymbol.net/articles/unofficial-bash-strict-mode/ for 13 | # details. 14 | set -euo pipefail 15 | 16 | # Display each command as it's run. 17 | set -x 18 | 19 | # Tell apt-get we're never going to be able to give manual 20 | # feedback: 21 | export DEBIAN_FRONTEND=noninteractive 22 | 23 | # Update the package listing, so we know what packages exist: 24 | apt-get update 25 | 26 | # Install security updates: 27 | apt-get -y upgrade 28 | 29 | # Example of installing a new package, without unnecessary packages: 30 | apt-get -y install --no-install-recommends git 31 | 32 | # Delete cached files we don't need anymore: 33 | apt-get clean 34 | rm -rf /var/lib/apt/lists/* 35 | -------------------------------------------------------------------------------- /docs/contributing.rst: -------------------------------------------------------------------------------- 1 | 2 | ============ 3 | Contributing 4 | ============ 5 | 6 | Contributions are welcome. You can contribute in many ways: 7 | 8 | Reporting an issue 9 | ~~~~~~~~~~~~~~~~~~ 10 | 11 | Report an issue on `GitHub `_. 12 | 13 | If you are reporting an issue, please include: 14 | 15 | * Your operating system name and version. 16 | * Any details about your local setup that might be helpful in troubleshooting. 17 | * Detailed steps to reproduce the issue. 18 | 19 | Fix bugs or implement features 20 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 21 | 22 | Look through the GitHub issues. Anything tagged with "help 23 | wanted" is open to whoever wants to implement it. 24 | 25 | 26 | Write Documentation 27 | ~~~~~~~~~~~~~~~~~~~ 28 | 29 | Whether as part of `docs `_ or docstrings. 30 | 31 | Send Feedback 32 | ~~~~~~~~~~~~~ 33 | 34 | The best way to send feedback is to file an `issue `_. 35 | 36 | If you are proposing a feature: 37 | 38 | * Explain in detail how it would work. 39 | * Keep the scope as narrow as possible, to make it easier to implement. 40 | * Remember that this is an open-source project, and that contributions 41 | are welcome :) 42 | -------------------------------------------------------------------------------- /tests/kafkaaggregator/cassettes/test_register.yaml: -------------------------------------------------------------------------------- 1 | interactions: 2 | - request: 3 | body: null 4 | headers: 5 | Content-Type: 6 | - application/vnd.schemaregistry.v1+json 7 | method: POST 8 | uri: http://localhost:8081/subjects/test-avro-schema-value 9 | response: 10 | body: 11 | string: '{"error_code":40401,"message":"Subject not found."}' 12 | headers: 13 | Content-Length: '51' 14 | Content-Type: application/vnd.schemaregistry.v1+json 15 | Date: Thu, 21 May 2020 22:54:29 GMT 16 | Server: Jetty(9.4.18.v20190429) 17 | status: 18 | code: 404 19 | message: Not Found 20 | url: http://localhost:8081/subjects/test-avro-schema-value 21 | - request: 22 | body: null 23 | headers: 24 | Content-Type: 25 | - application/vnd.schemaregistry.v1+json 26 | method: POST 27 | uri: http://localhost:8081/subjects/test-avro-schema-value/versions 28 | response: 29 | body: 30 | string: '{"id":1}' 31 | headers: 32 | Content-Length: '8' 33 | Content-Type: application/vnd.schemaregistry.v1+json 34 | Date: Thu, 21 May 2020 22:54:29 GMT 35 | Server: Jetty(9.4.18.v20190429) 36 | Vary: Accept-Encoding, User-Agent 37 | status: 38 | code: 200 39 | message: OK 40 | url: http://localhost:8081/subjects/test-avro-schema-value/versions 41 | version: 1 42 | -------------------------------------------------------------------------------- /scripts/install-dependency-packages.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script installs additional packages used by the dependency image but 4 | # not needed by the runtime image, such as additional packages required to 5 | # build Python dependencies. 6 | # 7 | # Since the base image wipes all the apt caches to clean up the image that 8 | # will be reused by the runtime image, we unfortunately have to do another 9 | # apt-get update here, which wastes some time and network. 10 | 11 | # Bash "strict mode", to help catch problems and bugs in the shell 12 | # script. Every bash script you write should include this. See 13 | # http://redsymbol.net/articles/unofficial-bash-strict-mode/ for 14 | # details. 15 | set -euo pipefail 16 | 17 | # Display each command as it's run. 18 | set -x 19 | 20 | # Tell apt-get we're never going to be able to give manual 21 | # feedback: 22 | export DEBIAN_FRONTEND=noninteractive 23 | 24 | # Update the package listing, so we know what packages exist: 25 | apt-get update 26 | 27 | # Install build-essential because sometimes Python dependencies need to build 28 | # C modules, particularly when upgrading to newer Python versions. libffi-dev 29 | # is sometimes needed to build cffi (a cryptography dependency). 30 | apt-get -y install --no-install-recommends build-essential libffi-dev 31 | 32 | # Delete cached files we don't need anymore: 33 | apt-get clean 34 | rm -rf /var/lib/apt/lists/* 35 | -------------------------------------------------------------------------------- /src/kafkaaggregator/models.py: -------------------------------------------------------------------------------- 1 | """Dynamic creation of Faust-avro Records.""" 2 | 3 | __all__ = ["create_record"] 4 | 5 | from typing import Any, List, Mapping 6 | 7 | from faust_avro import Record 8 | 9 | from kafkaaggregator.fields import Field 10 | 11 | 12 | def create_record( 13 | cls_name: str, fields: List[Field], doc: str = None 14 | ) -> Record: 15 | """Create a Faust-avro Record class during runtime. 16 | 17 | Parameters 18 | ---------- 19 | cls_name: `str` 20 | Name of the new class to create. 21 | fields: `list` [`Field`] 22 | List of tuples mapping field names and types for the Faust-avro Record. 23 | doc: `str` 24 | Docstring for the new class. 25 | 26 | Returns 27 | ------- 28 | cls: `Record` 29 | A faust_avro.Record class. 30 | 31 | Examples 32 | -------- 33 | >>> from kafkaaggregator.fields import Field 34 | >>> from kafkaaggregator.models import create_record 35 | >>> Foo = create_record('Foo', [Field('bar', int)]) 36 | >>> f = Foo(bar=0) 37 | >>> f.bar 38 | 0 39 | >>> f.dumps() 40 | {'bar': 0, '__faust': {'ns': '__main__.Foo'}} 41 | 42 | """ 43 | _fields: Mapping[str, Any] = dict([f.astuple() for f in fields]) 44 | 45 | cls_attrs = dict( 46 | __annotations__=_fields, 47 | __doc__=doc, 48 | ) 49 | 50 | return type(cls_name, (Record,), cls_attrs) 51 | -------------------------------------------------------------------------------- /src/kafkaaggregator/operations.py: -------------------------------------------------------------------------------- 1 | """Possible statistical operations on fields.""" 2 | 3 | __all__ = ["Operation", "q1", "q3"] 4 | 5 | from enum import Enum 6 | from statistics import mean, median, quantiles, stdev # noqa: F401 7 | from typing import List 8 | 9 | 10 | class Operation(Enum): 11 | """Possible statistical operations on fields.""" 12 | 13 | MIN = "min" 14 | Q1 = "q1" 15 | MEAN = "mean" 16 | MEDIAN = "median" 17 | Q3 = "q3" 18 | STDEV = "stdev" 19 | MAX = "max" 20 | 21 | @staticmethod 22 | def values() -> List[str]: 23 | """Return list of possible operations.""" 24 | return list(map(lambda op: op.value, Operation)) 25 | 26 | 27 | def q1(data: List[float]) -> float: 28 | """Compute the data first quartile. 29 | 30 | Parameters 31 | ---------- 32 | data: `list` 33 | List of values to compute the statistics from. 34 | 35 | Returns 36 | ------- 37 | value: `float` 38 | The data first quartile. 39 | """ 40 | quartiles = quantiles(data, n=4) 41 | return quartiles[0] 42 | 43 | 44 | def q3(data: List[float]) -> float: 45 | """Compute the daa third quartile. 46 | 47 | Parameters 48 | ---------- 49 | data: `list` 50 | List of values to compute the statistics from. 51 | 52 | Returns 53 | ------- 54 | value: `float` 55 | The data first quartile. 56 | """ 57 | quartiles = quantiles(data, n=4) 58 | return quartiles[2] 59 | -------------------------------------------------------------------------------- /CHANGELOG.rst: -------------------------------------------------------------------------------- 1 | ########## 2 | Change log 3 | ########## 4 | 5 | 0.2.0 (2020-08-14) 6 | ================== 7 | 8 | * Add first and third quartiles (``q1`` and ``q3``) to the list of summary statistics computed by the aggregator. 9 | * Ability to configure the list of summary statistics to be computed. 10 | * Pinned top-level requeriments. 11 | * Add Kafka Connect to the docker-compose setup. 12 | * Use only one Schema Registry by default to simplify local execution. 13 | * First release to PyPI. 14 | 15 | 16 | 0.1.0 (2020-07-13) 17 | ================== 18 | 19 | Initial release of kafka-aggregator with the following features: 20 | 21 | * Use Faust windowing feature to aggregate a stream of messages. 22 | * Use Faust-avro to add Avro serialization and Schema Registry support to Faust. 23 | * Support to an internal Schema Registry to store schemas for the aggreated topics (optional). 24 | * Create aggregation topic schemas from the source topic schemas and from the list of summary statistics to be computed. 25 | * Ability to create Faust records dynamically from aggregation topic schemas. 26 | * Ability to auto-generate code for the Faust agents (stream processors). 27 | * Compute summary statistics for numeric fields: ``min()``, ``mean()``, ``median()``, ``stdev()``, ``max()``. 28 | * Add example module to initialize a number of source topics in kafka, control the number of fields in each topic, and produce messages for those topics at a given frequency. 29 | * Use Kafdrop to inspect messages from source and aggregated topics. 30 | * Add kafka-aggregator documentation site. 31 | -------------------------------------------------------------------------------- /docs/development.rst: -------------------------------------------------------------------------------- 1 | ################# 2 | Development guide 3 | ################# 4 | 5 | Here's how to set up `kafka-aggregator` for local development. 6 | 7 | 1. Clone the `kafka-aggregator `_ repo from GitHub: 8 | 9 | .. code-block:: bash 10 | 11 | $ git clone https://github.com/lsst-sqre/kafka-aggregator.git 12 | 13 | 2. Install your local copy into a virtualenv: 14 | 15 | .. code-block:: bash 16 | 17 | $ cd kafka-aggregator 18 | $ virtualenv -p Python3 venv 19 | $ source venv/bin/activate 20 | $ make update 21 | 22 | 3. Create a branch for local development: 23 | 24 | .. code-block:: bash 25 | 26 | $ git checkout -b name-of-your-bugfix-or-feature 27 | 28 | Now you can make your changes locally. 29 | 30 | 4. When you're done making changes, check that your changes pass the 31 | lint checks, typing checks, and tests. 32 | 33 | .. code-block:: bash 34 | 35 | $ tox -e lint typing py37 36 | 37 | 5. Commit your changes and push your branch to GitHub: 38 | 39 | .. code-block:: bash 40 | 41 | $ git add . 42 | $ git commit -m "Your detailed description of your changes." 43 | $ git push origin name-of-your-bugfix-or-feature 44 | 45 | 6. Submit a pull request through the GitHub website. 46 | 47 | Pull Request Guidelines 48 | ----------------------- 49 | 50 | Before you submit a pull request, check that it meets these guidelines: 51 | 52 | 1. The pull request should include tests. 53 | 2. If the pull request adds functionality, the docs should be updated. 54 | 3. The pull request should work for Python 3. 55 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | ################ 2 | kafka-aggregator 3 | ################ 4 | 5 | |Build| |Docker| 6 | 7 | A Kafka aggregator based on the `Faust `_ Python Stream Processing library. 8 | 9 | kafka-aggregator development is based on the `Safir `__ application template. 10 | 11 | 12 | Overview 13 | ======== 14 | 15 | kafka-aggregator uses `Faust's windowing feature `_ to aggregate a stream of messages from Kafka. 16 | 17 | kafka-aggregator implements a Faust agent, a "stream processor", that adds messages from a source topic into a Faust table. The table is configured as a tumbling window with a size, representing the window duration (time interval) and an expiration time, which specifies the duration for which the data allocated to each window will be stored. Every time a window expires, a callback function is called to aggregate the messages allocated to that window. The size of the window controls the frequency of the aggregated stream. 18 | 19 | kafka-aggregator uses `faust-avro `_ to add Avro serialization and Schema Registry support to Faust. faust-avro can parse Faust models into Avro Schemas. 20 | 21 | See `the docs `_ for more information. 22 | 23 | .. |Build| image:: https://github.com/lsst-sqre/kafka-aggregator/workflows/CI/badge.svg 24 | :alt: GitHub Actions 25 | :scale: 100% 26 | :target: https://github.com/lsst-sqre/kafka-aggregator/actions 27 | 28 | .. |Docker| image:: https://img.shields.io/docker/v/lsstsqre/kafkaaggregator?sort=date 29 | :alt: Docker Hub repository 30 | :scale: 100% 31 | :target: https://hub.docker.com/repository/docker/lsstsqre/kafkaaggregator 32 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = kafka-aggregator 3 | description = A Kafka aggregator based on the Faust stream processing library. 4 | author = Association of Universities for Research in Astronomy, Inc. (AURA) 5 | author_email = sqre-admin@lists.lsst.org 6 | long_description = file: README.rst, CHANGELOG.rst, LICENSE 7 | long_description_content_type = text/x-rst 8 | url = https://github.com/lsst-sqre/kafka-aggregator 9 | project_urls = 10 | Change log = https://github.com/lsst-sqre/kafka-aggregator/master/blob/CHANGELOG.rst 11 | Source code = https://github.com/lsst-sqre/kafka-aggregator 12 | Issue tracker = https://github.com/lsst-sqre/kafka-aggregator/issues 13 | classifiers = 14 | Development Status :: 4 - Beta 15 | License :: OSI Approved :: MIT License 16 | Programming Language :: Python 17 | Programming Language :: Python :: 3 18 | Programming Language :: Python :: 3.9 19 | Natural Language :: English 20 | Operating System :: POSIX 21 | keywords = 22 | lsst 23 | 24 | [options] 25 | zip_safe = False 26 | include_package_data = True 27 | package_dir = 28 | = src 29 | packages=find: 30 | python_requires = >=3.9 31 | setup_requires = 32 | setuptools_scm 33 | # Use requirements/main.in for runtime dependencies instead of install_requires 34 | 35 | [options.packages.find] 36 | where = src 37 | 38 | [options.entry_points] 39 | console_scripts = 40 | kafkaaggregator = kafkaaggregator.cli:main 41 | 42 | [flake8] 43 | max-line-length = 79 44 | # E203: whitespace before :, flake8 disagrees with PEP-8 45 | # W503: line break after binary operator, flake8 disagrees with PEP-8 46 | ignore = E203, W503 47 | docstring-convention = numpy 48 | 49 | [mypy] 50 | disallow_untyped_defs = True 51 | disallow_incomplete_defs = True 52 | ignore_missing_imports = True 53 | show_error_codes = True 54 | strict_equality = True 55 | warn_redundant_casts = True 56 | warn_unreachable = True 57 | warn_unused_ignores = True 58 | -------------------------------------------------------------------------------- /src/kafkaaggregator/cli.py: -------------------------------------------------------------------------------- 1 | """Command-line interface for kafkaaggregator.""" 2 | 3 | __all__ = ["main", "produce", "init_example"] 4 | 5 | import logging 6 | from pathlib import Path 7 | 8 | from faust.cli import AppCommand, option 9 | 10 | from kafkaaggregator.app import app 11 | from kafkaaggregator.config import ExampleConfiguration 12 | from kafkaaggregator.example import ( 13 | AggregationExample, 14 | UnexpectedNumberOfTopicsError, 15 | ) 16 | from kafkaaggregator.generator import AgentGenerator 17 | 18 | logger = logging.getLogger("kafkaaggregator") 19 | 20 | config = ExampleConfiguration() 21 | 22 | 23 | def main() -> None: 24 | """Entrypoint for Faust CLI.""" 25 | app.main() 26 | 27 | 28 | @app.command( 29 | option( 30 | "--frequency", 31 | type=float, 32 | default=config.frequency, 33 | help="The frequency in Hz in wich messages are produced.", 34 | show_default=True, 35 | ), 36 | option( 37 | "--max-messages", 38 | type=int, 39 | default=config.max_messages, 40 | help="The maximum number of messages to produce.", 41 | show_default=True, 42 | ), 43 | ) 44 | async def produce( 45 | self: AppCommand, frequency: float, max_messages: int 46 | ) -> None: 47 | """Produce messages for the aggregation example.""" 48 | example = AggregationExample() 49 | 50 | try: 51 | await example.produce( 52 | app=app, frequency=frequency, max_messages=max_messages 53 | ) 54 | except UnexpectedNumberOfTopicsError as e: 55 | logger.error(e) 56 | 57 | 58 | @app.command() 59 | async def init_example(self: AppCommand) -> None: 60 | """Initialize the source topic used in the aggregation example.""" 61 | example = AggregationExample() 62 | await example.initialize(app=app) 63 | 64 | 65 | @app.command() 66 | async def generate_agents(self: AppCommand) -> None: 67 | """Generate Faust agents' code.""" 68 | agent_generator = AgentGenerator( 69 | Path("aggregator_config.yaml"), "aggregated_example0" 70 | ) 71 | await agent_generator.run() 72 | -------------------------------------------------------------------------------- /tests/kafkaaggregator/test_topics.py: -------------------------------------------------------------------------------- 1 | """Tests for the topics module.""" 2 | 3 | import json 4 | 5 | import faust_avro 6 | import pytest 7 | 8 | from kafkaaggregator.fields import Field 9 | from kafkaaggregator.topics import Topic 10 | 11 | 12 | @pytest.fixture 13 | def avro_schema() -> str: 14 | """Mock avro schema to test primitive data types.""" 15 | schema = json.dumps( 16 | dict( 17 | type="record", 18 | name="test", 19 | doc="Test Avro primitive data types", 20 | fields=[ 21 | dict(name="int_field", type="int"), 22 | dict(name="long_field", type="long"), 23 | dict(name="float_field", type="float"), 24 | dict(name="double_field", type="double"), 25 | dict(name="bytes_field", type="bytes"), 26 | dict(name="string_field", type="string"), 27 | ], 28 | ) 29 | ) 30 | return schema 31 | 32 | 33 | @pytest.mark.asyncio 34 | @pytest.mark.vcr 35 | async def test_register(avro_schema: str) -> None: 36 | """Test topic schema registration.""" 37 | topic = Topic( 38 | name="test-avro-schema", registry_url="http://localhost:8081" 39 | ) 40 | schema_id = await topic.register(schema=avro_schema) 41 | assert schema_id == 1 42 | 43 | 44 | # https://github.com/masterysystems/faust-avro/blob/master/faust_avro/types.py 45 | @pytest.mark.asyncio 46 | @pytest.mark.vcr 47 | async def test_get_fields(avro_schema: str) -> None: 48 | """Test `topic.get_fields()` method returning faust-avro types.""" 49 | topic = Topic( 50 | name="test-avro-schema", registry_url="http://localhost:8081" 51 | ) 52 | await topic.register(schema=avro_schema) 53 | fields = await topic.get_fields() 54 | 55 | assert Field("int_field", faust_avro.types.int32) in fields 56 | assert Field("long_field", int) in fields 57 | assert Field("float_field", faust_avro.types.float32) in fields 58 | assert Field("double_field", float) in fields 59 | assert Field("bytes_field", bytes) in fields 60 | assert Field("string_field", str) in fields 61 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # Internal variables. 11 | PAPEROPT_a4 = -D latex_paper_size=a4 12 | PAPEROPT_letter = -D latex_paper_size=letter 13 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 14 | # the i18n builder cannot share the environment and doctrees with the others 15 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 16 | 17 | .PHONY: help 18 | help: 19 | @echo "Please use \`make ' where is one of" 20 | @echo " html to make standalone HTML files" 21 | @echo " linkcheck to check all external links for integrity" 22 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 23 | @echo " coverage to run coverage check of the documentation (if enabled)" 24 | @echo " dummy to check syntax errors of document sources" 25 | 26 | .PHONY: clean 27 | clean: 28 | rm -rf $(BUILDDIR) 29 | rm -rf api 30 | 31 | .PHONY: html 32 | html: 33 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 34 | @echo 35 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 36 | 37 | .PHONY: linkcheck 38 | linkcheck: 39 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 40 | @echo 41 | @echo "Link check complete; look for any errors in the above output " \ 42 | "or in $(BUILDDIR)/linkcheck/output.txt." 43 | 44 | .PHONY: doctest 45 | doctest: 46 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 47 | @echo "Testing of doctests in the sources finished, look at the " \ 48 | "results in $(BUILDDIR)/doctest/output.txt." 49 | 50 | .PHONY: coverage 51 | coverage: 52 | $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage 53 | @echo "Testing of coverage in the sources finished, look at the " \ 54 | "results in $(BUILDDIR)/coverage/python.txt." 55 | 56 | .PHONY: dummy 57 | dummy: 58 | $(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy 59 | @echo 60 | @echo "Build finished. Dummy builder generates no files." 61 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # This Dockerfile has four stages: 2 | # 3 | # base-image 4 | # Updates the base Python image with security patches and common system 5 | # packages. This image becomes the base of all other images. 6 | # dependencies-image 7 | # Installs third-party dependencies (requirements/main.txt) into a virtual 8 | # environment. This virtual environment is ideal for copying across build 9 | # stages. 10 | # install-image 11 | # Installs the app into the virtual environment. 12 | # runtime-image 13 | # - Copies the virtual environment into place. 14 | # - Runs a non-root user. 15 | # - Sets up the entrypoint and port. 16 | 17 | FROM python:3.10.7-bullseye AS base-image 18 | 19 | # Update system packages 20 | COPY scripts/install-base-packages.sh . 21 | RUN ./install-base-packages.sh && rm ./install-base-packages.sh 22 | 23 | FROM base-image AS dependencies-image 24 | 25 | # Install system packages only needed for building dependencies. 26 | COPY scripts/install-dependency-packages.sh . 27 | RUN ./install-dependency-packages.sh 28 | 29 | # Create a Python virtual environment 30 | ENV VIRTUAL_ENV=/opt/venv 31 | RUN python -m venv $VIRTUAL_ENV 32 | # Make sure we use the virtualenv 33 | ENV PATH="$VIRTUAL_ENV/bin:$PATH" 34 | # Put the latest pip and setuptools in the virtualenv 35 | RUN pip install --upgrade --no-cache-dir pip setuptools wheel 36 | 37 | # Install the app's Python runtime dependencies 38 | COPY requirements/main.txt ./requirements.txt 39 | RUN pip install --quiet --no-cache-dir -r requirements.txt 40 | 41 | FROM base-image AS install-image 42 | 43 | # Use the virtualenv 44 | COPY --from=dependencies-image /opt/venv /opt/venv 45 | ENV PATH="/opt/venv/bin:$PATH" 46 | 47 | COPY . /app 48 | WORKDIR /app 49 | RUN pip install --no-cache-dir . 50 | 51 | FROM base-image AS runtime-image 52 | 53 | # Create a non-root user 54 | RUN useradd --create-home appuser 55 | WORKDIR /home/appuser 56 | 57 | # Make sure we use the virtualenv 58 | ENV PATH="/opt/venv/bin:$PATH" 59 | 60 | COPY --from=install-image /opt/venv /opt/venv 61 | 62 | # Switch to non-root user 63 | USER appuser 64 | 65 | EXPOSE 6066 66 | 67 | ENTRYPOINT ["kafkaaggregator", "worker", "-l", "info"] 68 | -------------------------------------------------------------------------------- /examples/join_streams.py: -------------------------------------------------------------------------------- 1 | """Faust agent to demonstrate joining two streams.""" 2 | 3 | __all__ = [ 4 | "process_window", 5 | "process_stream", 6 | ] 7 | 8 | import logging 9 | from typing import Any, AsyncGenerator, List, Tuple 10 | 11 | from faust.types import StreamT 12 | 13 | from kafkaaggregator.app import app, config 14 | 15 | logger = logging.getLogger("kafkaaggregator") 16 | 17 | # Consume from two source topics 18 | source_topics = app.topic("example-000", "example-001") 19 | 20 | 21 | def process_window(key: Tuple, value: List[Any]) -> None: 22 | """Process the tumbling window. 23 | 24 | Parameters 25 | ---------- 26 | key: `Tuple` 27 | key for the current window in the WindowSet associated to 28 | ``Table[k]``. The key contains the window range. 29 | Example: ``key = (k, (start, end))`` 30 | 31 | value: `list` 32 | List of messages in the current window. 33 | """ 34 | # The resulting stream joins fields from the two source topics 35 | time = key[0] 36 | print(time, value) 37 | 38 | 39 | # Persist the joined stream in a tumbling window table 40 | # .relative_to_stream() means that the window range is relative to the 41 | # timestamp added by kafka 42 | table = ( 43 | app.Table( 44 | "tumbling-window", 45 | default=list, 46 | on_window_close=process_window, 47 | help=f"Persit messages in windows of " f"{config.window_size}s.", 48 | ) 49 | .tumbling(config.window_size, expires=config.window_expires) 50 | .relative_to_stream() 51 | ) 52 | 53 | 54 | @app.agent(source_topics) 55 | async def process_stream(stream: StreamT) -> AsyncGenerator: 56 | """Process incoming events from the source topics.""" 57 | async for event in stream.events(): 58 | # The timestamp (key) comes from the "time" field in the stream 59 | timestamp = event.value["time"] 60 | # Get the current value for this key 61 | record = table[timestamp].value() 62 | # Append the new value 63 | record.append({f"{event.message.topic}.value": event.value["value0"]}) 64 | # upsert table 65 | table[timestamp] = record 66 | 67 | yield event 68 | -------------------------------------------------------------------------------- /tests/kafkaaggregator/cassettes/test_get_fields.yaml: -------------------------------------------------------------------------------- 1 | interactions: 2 | - request: 3 | body: null 4 | headers: 5 | Content-Type: 6 | - application/vnd.schemaregistry.v1+json 7 | method: POST 8 | uri: http://localhost:8081/subjects/test-avro-schema-value 9 | response: 10 | body: 11 | string: '{"subject":"test-avro-schema-value","version":1,"id":4,"schema":"{\"type\":\"record\",\"name\":\"test\",\"doc\":\"Test 12 | Avro primitive data types\",\"fields\":[{\"name\":\"int_field\",\"type\":\"int\"},{\"name\":\"long_field\",\"type\":\"long\"},{\"name\":\"float_field\",\"type\":\"float\"},{\"name\":\"double_field\",\"type\":\"double\"},{\"name\":\"bytes_field\",\"type\":\"bytes\"},{\"name\":\"string_field\",\"type\":\"string\"}]}"}' 13 | headers: 14 | Content-Encoding: gzip 15 | Content-Length: '204' 16 | Content-Type: application/vnd.schemaregistry.v1+json 17 | Date: Thu, 21 May 2020 22:54:29 GMT 18 | Server: Jetty(9.4.18.v20190429) 19 | Vary: Accept-Encoding, User-Agent 20 | status: 21 | code: 200 22 | message: OK 23 | url: http://localhost:8081/subjects/test-avro-schema-value 24 | - request: 25 | body: null 26 | headers: {} 27 | method: GET 28 | uri: http://localhost:8081/subjects/test-avro-schema-value/versions/latest 29 | response: 30 | body: 31 | string: '{"subject":"test-avro-schema-value","version":1,"id":4,"schema":"{\"type\":\"record\",\"name\":\"test\",\"doc\":\"Test 32 | Avro primitive data types\",\"fields\":[{\"name\":\"int_field\",\"type\":\"int\"},{\"name\":\"long_field\",\"type\":\"long\"},{\"name\":\"float_field\",\"type\":\"float\"},{\"name\":\"double_field\",\"type\":\"double\"},{\"name\":\"bytes_field\",\"type\":\"bytes\"},{\"name\":\"string_field\",\"type\":\"string\"}]}"}' 33 | headers: 34 | Content-Encoding: gzip 35 | Content-Length: '204' 36 | Content-Type: application/vnd.schemaregistry.v1+json 37 | Date: Thu, 21 May 2020 22:54:29 GMT 38 | Server: Jetty(9.4.18.v20190429) 39 | Vary: Accept-Encoding, User-Agent 40 | status: 41 | code: 200 42 | message: OK 43 | url: http://localhost:8081/subjects/test-avro-schema-value/versions/latest 44 | version: 1 45 | -------------------------------------------------------------------------------- /src/kafkaaggregator/fields.py: -------------------------------------------------------------------------------- 1 | """Aggregated field class. 2 | 3 | The Field has a numerical type by construction. It holds the name of 4 | the source field being aggregated and the operation performed. 5 | """ 6 | 7 | __all__ = ["Field"] 8 | 9 | from typing import Mapping, Optional, Tuple, Type, Union 10 | 11 | from kafkaaggregator.operations import Operation 12 | 13 | BasicType = Union[Type[int], Type[float], Type[bytes], Type[str]] 14 | 15 | 16 | class Field: 17 | """Represents an aggregated field of numeric type. 18 | 19 | Parameters 20 | ---------- 21 | name : `str` 22 | Field name. 23 | type : `int` or `float` or `bytes` or `str` 24 | Field data type. 25 | source_field_name : `str`, optional 26 | Source field name. 27 | operation : `str`, optional 28 | """ 29 | 30 | def __init__( 31 | self, 32 | name: str, 33 | type: BasicType, 34 | source_field_name: Optional[str] = None, 35 | operation: Optional[str] = None, 36 | ) -> None: 37 | self.name = name 38 | self.type = type 39 | self.source_field_name = source_field_name 40 | self.operation = operation 41 | if operation: 42 | if operation not in Operation.values(): 43 | raise RuntimeError( 44 | f"Invalid operation '{operation}'. " 45 | f"Allowed values are: {', '.join(Operation.values())}." 46 | ) 47 | 48 | def __repr__(self) -> str: 49 | """Field representation.""" 50 | return "Field(" f"name={self.name!r}," f"type={self.type!r})" 51 | 52 | def __eq__(self, other: object) -> bool: 53 | """Field equal to opetator.""" 54 | if not isinstance(other, Field): 55 | return NotImplemented 56 | return self.astuple() == other.astuple() 57 | 58 | def __hash__(self) -> int: 59 | """Field needs to be hashable to work with Faust.""" 60 | return object.__hash__(self) 61 | 62 | def astuple(self) -> Tuple[str, BasicType]: 63 | """Convert field to tuple.""" 64 | _field = (self.name, self.type) 65 | return _field 66 | 67 | def asdict(self) -> Mapping[str, BasicType]: 68 | """Convert field to dict.""" 69 | _field = {self.name: self.type} 70 | return _field 71 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Project 2 | agents/ 3 | 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | pip-wheel-metadata/ 27 | share/python-wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .nox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | *.py,cover 54 | .hypothesis/ 55 | .pytest_cache/ 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | local_settings.py 64 | db.sqlite3 65 | db.sqlite3-journal 66 | 67 | # Flask stuff: 68 | instance/ 69 | .webassets-cache 70 | 71 | # Scrapy stuff: 72 | .scrapy 73 | 74 | # Sphinx documentation 75 | docs/_build/ 76 | docs/api/ 77 | 78 | # PyBuilder 79 | target/ 80 | 81 | # Jupyter Notebook 82 | .ipynb_checkpoints 83 | 84 | # IPython 85 | profile_default/ 86 | ipython_config.py 87 | 88 | # pyenv 89 | # For a library or package, you might want to ignore these files since the code is 90 | # intended to run in multiple environments; otherwise, check them in: 91 | # .python-version 92 | 93 | # pipenv 94 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 95 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 96 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 97 | # install all needed dependencies. 98 | #Pipfile.lock 99 | 100 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 101 | __pypackages__/ 102 | 103 | # Celery stuff 104 | celerybeat-schedule 105 | celerybeat.pid 106 | 107 | # SageMath parsed files 108 | *.sage.py 109 | 110 | # Environments 111 | .env 112 | .venv 113 | env/ 114 | venv/ 115 | ENV/ 116 | env.bak/ 117 | venv.bak/ 118 | 119 | # Spyder project settings 120 | .spyderproject 121 | .spyproject 122 | 123 | # Rope project settings 124 | .ropeproject 125 | 126 | # mkdocs documentation 127 | /site 128 | 129 | # mypy 130 | .mypy_cache/ 131 | .dmypy.json 132 | dmypy.json 133 | 134 | # Pyre type checker 135 | .pyre/ 136 | 137 | # pytype static type analyzer 138 | .pytype/ 139 | -------------------------------------------------------------------------------- /tests/kafkaaggregator/test_aggregator.py: -------------------------------------------------------------------------------- 1 | """Tests for the aggregator module.""" 2 | 3 | from typing import List 4 | 5 | import pytest 6 | 7 | from kafkaaggregator.aggregator import Aggregator 8 | from kafkaaggregator.fields import Field 9 | from kafkaaggregator.models import create_record 10 | 11 | 12 | @pytest.fixture 13 | def source_topic_fields() -> List[Field]: 14 | """Mock source topic fields.""" 15 | fields = [ 16 | Field("time", int), 17 | Field("value", float), 18 | Field("excluded", int), 19 | Field("nonnumeric", bool), 20 | Field("inttype", int), 21 | ] 22 | return fields 23 | 24 | 25 | @pytest.fixture 26 | def excluded_field_names() -> List[str]: 27 | """Mock excluded field names.""" 28 | return ["time", "excluded"] 29 | 30 | 31 | @pytest.fixture 32 | def operations() -> List[str]: 33 | """Mock list of opertions.""" 34 | return ["min", "mean", "median", "stdev", "max"] 35 | 36 | 37 | def test_aggregation_fields( 38 | source_topic_fields: List[Field], 39 | excluded_field_names: List[str], 40 | operations: List[str], 41 | ) -> None: 42 | """Test aggregation fields creation.""" 43 | aggregation_fields = Aggregator._create_aggregated_fields( 44 | source_topic_fields, operations 45 | ) 46 | # `time`, `count` and `window_size` are added by the aggregator 47 | assert Field("time", float) in aggregation_fields 48 | assert Field("count", int) in aggregation_fields 49 | assert Field("window_size", float) in aggregation_fields 50 | # if there's `time` field in the source topic it is replaced 51 | assert Field("time", int) not in aggregation_fields 52 | # summary statistic fields added based on the the `value` field 53 | assert Field("min_value", float) in aggregation_fields 54 | assert Field("mean_value", float) in aggregation_fields 55 | assert Field("median_value", float) in aggregation_fields 56 | assert Field("max_value", float) in aggregation_fields 57 | # field names added to the excluded_field_names list are not aggregated 58 | assert Field("excluded", float) not in aggregation_fields 59 | # non numeric fields are excluded 60 | assert Field("nonnumeric", bool) not in aggregation_fields 61 | assert Field("min_nonnumeric", float) not in aggregation_fields 62 | # int type is aggregated as float 63 | assert Field("min_inttype", float) in aggregation_fields 64 | 65 | 66 | def test_record_class() -> None: 67 | """Test Faust Record creation.""" 68 | # make a simple Faust Record 69 | Foo = create_record( 70 | cls_name="Foo", 71 | fields=[Field("bar", int)], 72 | doc="Test record", 73 | ) 74 | f = Foo(bar=0) 75 | assert f.is_valid() 76 | assert f.asdict() == {"bar": 0} 77 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "setuptools>=42", 4 | "wheel", 5 | "setuptools_scm[toml]>=3.4" 6 | ] 7 | build-backend = 'setuptools.build_meta' 8 | 9 | [tool.setuptools_scm] 10 | 11 | [tool.tox] 12 | legacy_tox_ini = """ 13 | [tox] 14 | envlist = py,coverage-report,typing,lint 15 | isolated_build = True 16 | 17 | [testenv] 18 | description = Run pytest against {envname}. 19 | deps = 20 | -r{toxinidir}/requirements/main.txt 21 | -r{toxinidir}/requirements/dev.txt 22 | commands = 23 | coverage run -m pytest {posargs} 24 | 25 | [testenv:coverage-report] 26 | description = Compile coverage from each test run. 27 | skip_install = true 28 | deps = coverage[toml]>=5.0.2 29 | depends = 30 | py 31 | commands = 32 | coverage combine 33 | coverage report 34 | 35 | [testenv:typing] 36 | description = Run mypy. 37 | commands = 38 | mypy --show-traceback src/kafkaaggregator tests setup.py 39 | 40 | [testenv:lint] 41 | description = Lint codebase by running pre-commit (Black, isort, Flake8). 42 | skip_install = true 43 | deps = 44 | pre-commit 45 | commands = pre-commit run --all-files 46 | 47 | [testenv:docs] 48 | description = Build documentation (HTML) with Sphinx. 49 | commands = 50 | sphinx-build -n -T -b html -d {envtmpdir}/doctrees docs docs/_build/html 51 | 52 | 53 | [testenv:run] 54 | description = Run the development server with auto-reload for code changes. 55 | usedevelop = true 56 | commands = adev runserver --app-factory create_app src/kafkaaggregator/app.py 57 | """ 58 | 59 | [tool.coverage.run] 60 | parallel = true 61 | branch = true 62 | source = ["kafkaaggregator"] 63 | 64 | [tool.coverage.paths] 65 | source = ["src", ".tox/*/site-packages"] 66 | 67 | [tool.coverage.report] 68 | show_missing = true 69 | exclude_lines = [ 70 | "pragma: no cover", 71 | "def __repr__", 72 | "if self.debug:", 73 | "if settings.DEBUG", 74 | "raise AssertionError", 75 | "raise NotImplementedError", 76 | "if 0:", 77 | "if __name__ == .__main__.:", 78 | "if TYPE_CHECKING:" 79 | ] 80 | 81 | [tool.black] 82 | line-length = 79 83 | target-version = ['py37'] 84 | exclude = ''' 85 | /( 86 | \.eggs 87 | | \.git 88 | | \.mypy_cache 89 | | \.tox 90 | | \.venv 91 | | _build 92 | | build 93 | | dist 94 | )/ 95 | ''' 96 | # Use single-quoted strings so TOML treats the string like a Python r-string 97 | # Multi-line strings are implicitly treated by black as regular expressions 98 | 99 | [tool.isort] 100 | include_trailing_comma = true 101 | multi_line_output = 3 102 | known_first_party = ["kafkaaggregator", "tests"] 103 | skip = ["docs/conf.py"] 104 | 105 | [tool.pytest.ini_options] 106 | asyncio_mode = "strict" 107 | filterwarnings = [ 108 | # Bug in venusian (via faust) 109 | "ignore:.*imp module is deprecated.*:DeprecationWarning" 110 | ] 111 | -------------------------------------------------------------------------------- /src/kafkaaggregator/generator.py: -------------------------------------------------------------------------------- 1 | """Generates Faust agents based on the agent.j2 template.""" 2 | 3 | __all__ = ["AgentGenerator"] 4 | 5 | import logging 6 | import os 7 | from pathlib import Path 8 | from typing import Any, Mapping 9 | 10 | import aiofiles 11 | from jinja2 import Environment, PackageLoader, Template, TemplateError 12 | 13 | from kafkaaggregator.aggregator_config import AggregatorConfig 14 | from kafkaaggregator.app import config 15 | 16 | logger = logging.getLogger("kafkaaggregator") 17 | 18 | 19 | class AgentGenerator: 20 | """Generate Faust agents from a list of source topics. 21 | 22 | Creates the context and renders the agents code template. 23 | 24 | Parameters 25 | ---------- 26 | source_topic_names : `list` 27 | List of source topic names. 28 | """ 29 | 30 | logger = logger 31 | 32 | def __init__(self, configfile: Path, aggregated_topic: str) -> None: 33 | 34 | self._aggregated_topic_name = aggregated_topic 35 | 36 | config = AggregatorConfig(configfile).get(aggregated_topic) 37 | 38 | # Supports the 1 source topic -> 1 aggregated topic case for the moment 39 | self._source_topic_name = config.source_topics[0] 40 | self._template: Template = self._load_template() 41 | 42 | @property 43 | def template(self) -> Template: 44 | """Get the agent template.""" 45 | return self._template 46 | 47 | @staticmethod 48 | def _create_filepath(source_topic_name: str) -> str: 49 | """Return the file path for the agent. 50 | 51 | The directory name comes from the agents_output_dir configuration 52 | parameter and the file name is based on source topic name. 53 | 54 | Parameters 55 | ---------- 56 | source_topic_name : `str` 57 | Name of the source topic to aggregate. 58 | """ 59 | agents_output_dir = config.agents_output_dir 60 | 61 | filepath = os.path.join(agents_output_dir, f"{source_topic_name}.py") 62 | 63 | return filepath 64 | 65 | def _create_context(self) -> Mapping[str, Any]: 66 | """Create the template context. 67 | 68 | The template context stores the values passed to the template. 69 | 70 | Parameters 71 | ---------- 72 | source_topic_name : `str` 73 | Name of the source topic to aggregate 74 | 75 | Returns 76 | ------- 77 | context : `dict` 78 | A dictionary with values passed to the template. 79 | """ 80 | cls_name = self._source_topic_name.title().replace("-", "") 81 | 82 | context = dict( 83 | cls_name=cls_name, 84 | source_topic_name=self._source_topic_name, 85 | aggregation_topic_name=self._aggregated_topic_name, 86 | ) 87 | 88 | return context 89 | 90 | @staticmethod 91 | def _load_template() -> Template: 92 | """Load the agent template file.""" 93 | agent_template_file = config.agent_template_file 94 | 95 | env = Environment( 96 | loader=PackageLoader("kafkaaggregator"), keep_trailing_newline=True 97 | ) 98 | try: 99 | template = env.get_template(agent_template_file) 100 | except TemplateError as e: 101 | logger.error("Error loading the agent template file.") 102 | raise e 103 | 104 | return template 105 | 106 | async def run(self) -> None: 107 | """Run agents code generation.""" 108 | logger.info(f"Generating agent code for {self._source_topic_name}.") 109 | filepath = self._create_filepath(self._source_topic_name) 110 | context = self._create_context() 111 | 112 | async with aiofiles.open(filepath, "w") as file: 113 | await file.write(self._template.render(**context)) 114 | -------------------------------------------------------------------------------- /src/kafkaaggregator/templates/agent.j2: -------------------------------------------------------------------------------- 1 | """Faust agent for the {{ source_topic_name }} topic. 2 | 3 | The Faust agent persists the messages for the source topic in tumbling windows. 4 | See https://faust.readthedocs.io/en/latest/userguide/tables.html#windowing 5 | 6 | If the source topic is produced at a constant rate, the number of messages 7 | aggregated in each window is n = window_size * frequency. The window range is 8 | relative to the timestamp in the Kafka stream. 9 | 10 | The window_expires parameter controls when the callback function that 11 | processes the expired window(s) is called. When a window is processed, a new 12 | message is produced with the aggregated results. 13 | """ 14 | 15 | __all__ = [ 16 | "process_window", 17 | "process_source_topic", 18 | ] 19 | 20 | import logging 21 | from typing import Any, AsyncGenerator, List, Tuple 22 | 23 | from faust import web 24 | from faust.types import StreamT 25 | 26 | from kafkaaggregator.aggregator import Aggregator 27 | from kafkaaggregator.app import app, config 28 | 29 | logger = logging.getLogger("kafkaaggregator") 30 | 31 | 32 | # Topic names for the example are obtained from the configuration 33 | aggregator = Aggregator( 34 | source_topic_name="{{ source_topic_name }}", 35 | aggregation_topic_name="{{ aggregation_topic_name }}", 36 | excluded_field_names=config.excluded_field_names, 37 | operations=config.operations, 38 | ) 39 | 40 | # The Faust Record for the aggregation topic is created at runtime 41 | {{ cls_name }} = aggregator.async_create_record() 42 | 43 | source_topic = app.topic("{{ source_topic_name }}") 44 | 45 | aggregation_topic = app.topic( 46 | "{{ aggregation_topic_name }}", 47 | value_type={{ cls_name }}, 48 | internal=True 49 | ) 50 | 51 | 52 | def process_window(key: Tuple, value: List[Any]) -> None: 53 | """Process a window and send an aggregated message. 54 | 55 | Parameters 56 | ---------- 57 | key: `Tuple` 58 | key for the current window in the WindowSet associated to 59 | ``Table[k]``. The key contains the window range. 60 | Example: ``key = (k, (start, end))`` 61 | 62 | value: `list` 63 | List of messages in the current window. 64 | """ 65 | start, end = key[1] 66 | 67 | # Faust defines the window range as (start, start + size - 0.1) 68 | # https://github.com/robinhood/faust/blob/master/faust/types/windows.py#L16 69 | # To compute the midpoint of the window we have to correct the window range 70 | # by 0.1. Note that, despite of this definition, messages with timestamps 71 | # between (start + size - 0.1) and (start + size) are correctly added to 72 | # the current window. 73 | time = (start + end + 0.1) / 2 74 | 75 | aggregated_message = aggregator.compute( 76 | time=time, 77 | window_size=config.window_size, 78 | min_sample_size=config.min_sample_size, 79 | messages=value 80 | ) 81 | 82 | aggregation_topic.send_soon(value=aggregated_message) 83 | 84 | logger.info( 85 | f"{aggregated_message.count:5d} messages aggregated on " 86 | f"{{ source_topic_name }}-tumbling-window ({start:.2f}, " 87 | f"{(end + 0.1):.2f})." 88 | ) 89 | 90 | 91 | # Tumbling window to persist source topic messages, the process_window 92 | # callback is called when the window expires. The window range is relative to 93 | # the timestamp added by Kafka. 94 | table = ( 95 | app.Table( 96 | "{{ source_topic_name }}-tumbling-window", 97 | default=list, 98 | on_window_close=process_window, 99 | help=f"Persit messages in windows of " f"{config.window_size}s.", 100 | ) 101 | .tumbling(config.window_size, expires=config.window_expires) 102 | .relative_to_stream() 103 | ) 104 | 105 | 106 | @app.agent(source_topic) 107 | async def process_source_topic(stream: StreamT) -> AsyncGenerator: 108 | """Process incoming messages for the {{ source_topic_name }} topic.""" 109 | async for message in stream: 110 | messages = table["{{ source_topic_name }}"].value() 111 | messages.append(message) 112 | table["{{ source_topic_name }}"] = messages 113 | 114 | yield message 115 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | ################ 2 | Kafka-aggregator 3 | ################ 4 | 5 | A Kafka aggregator based on the `Faust `_ Python Stream Processing library. 6 | 7 | This site provides documentation for the kafka-aggregator installation, configuration, user and development guides, and API reference. 8 | 9 | Before installing kafka-aggregator, you might want to use the docker-compose set up as a way to run it locally, in this case jump straight to the `Configuration`_ and `User guide`_ sessions. 10 | 11 | Overview 12 | ======== 13 | 14 | kafka-aggregator uses `Faust's windowing feature `_ to aggregate Kafka streams. 15 | 16 | kafka-aggregator implements a Faust agent (stream processor) that adds messages from a source topic into a Faust table. The table is configured as a tumbling window with a size and an expiration time. Every time a window expires, a callback function is called to aggregate the messages allocated to that window. The size of the window controls the frequency of the aggregated stream. 17 | 18 | kafka-aggregator uses `faust-avro `_ to add Avro serialization and Schema Registry support to Faust. 19 | 20 | 21 | .. figure:: /_static/kafka-aggregator.svg 22 | :name: Kafka-aggretor architecture diagram 23 | 24 | Figure 1. Kafka-aggregator architecture diagram showing Kafka and Faust components. 25 | 26 | Summary statistics 27 | ------------------ 28 | kafka-aggregator uses the `Python statistics`_ module to compute summary statistics for each numerical field in the source topic. 29 | 30 | .. table:: *Summary statistics computed by kafka-aggregator*. 31 | 32 | +----------+--------------------------------------+ 33 | | mean() | Arithmetic mean ("average") of data. | 34 | +----------+--------------------------------------+ 35 | | median() | Median (middle value) of data. | 36 | +----------+--------------------------------------+ 37 | | min() | Minimum value of data. | 38 | +----------+--------------------------------------+ 39 | | max() | Maximum value of data. | 40 | +----------+--------------------------------------+ 41 | | stdev() | Sample standard deviation of data. | 42 | +----------+--------------------------------------+ 43 | | q1() | First quartile of the data. | 44 | +----------+--------------------------------------+ 45 | | q3() | Third quartile of the data. | 46 | +----------+--------------------------------------+ 47 | 48 | .. _Python statistics: https://docs.python.org/3/library/statistics.html 49 | 50 | 51 | Scalability 52 | ----------- 53 | 54 | As a kafka application, it is easy to scale kafka-aggregator horizontally by increasing the number of partitions for the source topics and by running more workers. 55 | 56 | To help to define the number of workers in a given environment, kafka-aggregator comes with an example module. Using the kafka-aggregator example module, you can initialize a number of source topics in kafka, control the number of fields in each topic, and produce messages for those topics at a given frequency. It is a good way to start running kafka-aggregator and to understand how it scales in a particular environment. 57 | 58 | 59 | Installation 60 | ============ 61 | 62 | .. toctree:: 63 | :maxdepth: 2 64 | 65 | installation 66 | 67 | Configuration 68 | ============= 69 | 70 | .. toctree:: 71 | :maxdepth: 2 72 | 73 | configuration 74 | 75 | User guide 76 | ========== 77 | 78 | .. toctree:: 79 | :maxdepth: 2 80 | 81 | userguide 82 | 83 | Development guide 84 | ================= 85 | 86 | .. toctree:: 87 | :maxdepth: 2 88 | 89 | development 90 | release 91 | 92 | API 93 | === 94 | 95 | .. toctree:: 96 | :maxdepth: 2 97 | 98 | api 99 | 100 | 101 | Project information 102 | =================== 103 | 104 | The GitHub repository for `kafka-aggregator` is https://github.com/lsst-sqre/kafka-aggregator 105 | 106 | .. toctree:: 107 | :maxdepth: 2 108 | 109 | contributing 110 | changelog 111 | 112 | See the LICENSE_ file for licensing information. 113 | 114 | .. _LICENSE: https://github.com/lsst-sqre/kafka-aggregator/blob/master/LICENSE 115 | -------------------------------------------------------------------------------- /src/kafkaaggregator/topics.py: -------------------------------------------------------------------------------- 1 | """Implements Topic, SourceTopic and AggregationTopic classes. 2 | 3 | The Topic class has methods to retrieve the topic schema from the Schema 4 | Registry and a parsed list of fields from the Avro schema with Python types. 5 | 6 | The child classes SourceTopic and AggregationTopic set the right Schema 7 | Registry URL to be used with each topic type. 8 | """ 9 | __all__ = ["SchemaException", "Topic", "SourceTopic", "AggregatedTopic"] 10 | 11 | import json 12 | import logging 13 | from typing import List, Union 14 | 15 | from faust_avro.asyncio import ConfluentSchemaRegistryClient 16 | 17 | from kafkaaggregator.app import config 18 | from kafkaaggregator.fields import Field 19 | 20 | logger = logging.getLogger("kafkaaggregator") 21 | 22 | AvroSchemaT = str 23 | 24 | 25 | class SchemaException(Exception): 26 | """A generic schema registry client exception.""" 27 | 28 | 29 | class Topic: 30 | """ 31 | Topic schema and interaction with the Schema Registry. 32 | 33 | Parameters 34 | ---------- 35 | name : `str` 36 | Name of a kafka topic. 37 | registry_url : `str` 38 | Schema Registry URL. 39 | """ 40 | 41 | logger = logger 42 | 43 | def __init__( 44 | self, name: str, registry_url: str = config.registry_url 45 | ) -> None: 46 | 47 | self.name = name 48 | self._subject = f"{self.name}-value" 49 | self._client = ConfluentSchemaRegistryClient(url=registry_url) 50 | self._registry = self._client.registry 51 | self._parse = self._client.registry.parse 52 | 53 | async def get_schema(self) -> AvroSchemaT: 54 | """Retrieve topic schema from the Schema Registry. 55 | 56 | Returns 57 | ------- 58 | schema : `str` 59 | Avro schema. 60 | """ 61 | schema = None 62 | try: 63 | schema = await self._client.schema_by_topic(self._subject) 64 | except Exception: 65 | msg = f"Could not retrieve schema for subject {self._subject}." 66 | raise SchemaException(msg) 67 | 68 | return schema 69 | 70 | async def get_fields(self) -> List[Field]: 71 | """Get topic fields. 72 | 73 | Parses the topic Avro schema and returns a list of fields with 74 | Python types. 75 | 76 | Returns 77 | ------- 78 | fields : `list` [`Field`] 79 | List of topic fields. 80 | """ 81 | schema = await self.get_schema() 82 | fields = [] 83 | if schema: 84 | # The faust-avro parser expects a json-parsed avro schema 85 | # https://github.com/masterysystems/faust-avro/blob/master/faust_avro/parsers/avro.py#L20 86 | parsed_schema = self._parse(json.loads(schema)) 87 | for field in parsed_schema.fields: 88 | fields.append(Field(field.name, field.type.python_type)) 89 | 90 | return fields 91 | 92 | async def register(self, schema: AvroSchemaT) -> Union[int, None]: 93 | """Register an Avro schema with the Schema Registry. 94 | 95 | If the schema is already register for this subject it does nothing. 96 | 97 | Parameters 98 | ---------- 99 | subject : `str` 100 | Name of the topic subject. 101 | schema : `str` 102 | Avro schema. 103 | 104 | Returns 105 | ------- 106 | schema_id : `int` or `None` 107 | Schema ID from the Schema Registry or `None` if it is already 108 | registered. 109 | """ 110 | logger.info(f"Register schema for subject {self._subject}.") 111 | 112 | is_registered = False 113 | try: 114 | is_registered = await self._client.is_registered( 115 | self._subject, schema 116 | ) 117 | except Exception: 118 | msg = "Could not connect to Schema Registry." 119 | raise SchemaException(msg) 120 | 121 | schema_id = None 122 | if not is_registered: 123 | try: 124 | schema_id = await self._client.register(self._subject, schema) 125 | except Exception: 126 | msg = f"Could not register schema for subject {self._subject}." 127 | raise SchemaException(msg) 128 | return schema_id 129 | 130 | 131 | class SourceTopic(Topic): 132 | """Represents source topics. 133 | 134 | Sets the right Schema Registry URL for source topics. 135 | 136 | Parameters 137 | ---------- 138 | name: `str` 139 | Name of the source topic in Kafka. 140 | """ 141 | 142 | def __init__(self, name: str) -> None: 143 | super().__init__(name=name, registry_url=config.registry_url) 144 | 145 | 146 | class AggregatedTopic(Topic): 147 | """Represents aggregated topics. 148 | 149 | Sets the right Schema Registry URL for aggregated topics. 150 | 151 | Parameters 152 | ---------- 153 | name: `str` 154 | Name of the aggregated topic in Kafka. 155 | """ 156 | 157 | def __init__(self, name: str) -> None: 158 | super().__init__(name=name, registry_url=config.internal_registry_url) 159 | -------------------------------------------------------------------------------- /docker-compose.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | version: '3' 3 | services: 4 | 5 | zookeeper: 6 | image: confluentinc/cp-zookeeper:5.5.3 7 | hostname: zookeeper 8 | container_name: zookeeper 9 | ports: 10 | - "2181:2181" 11 | environment: 12 | ZOOKEEPER_CLIENT_PORT: 2181 13 | ZOOKEEPER_TICK_TIME: 2000 14 | 15 | broker: 16 | image: confluentinc/cp-enterprise-kafka:5.5.3 17 | hostname: broker 18 | container_name: broker 19 | depends_on: 20 | - zookeeper 21 | ports: 22 | - "9092:9092" 23 | environment: 24 | KAFKA_BROKER_ID: 1 25 | KAFKA_ZOOKEEPER_CONNECT: 'zookeeper:2181' 26 | KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT 27 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://broker:29092,PLAINTEXT_HOST://localhost:9092 28 | KAFKA_METRIC_REPORTERS: io.confluent.metrics.reporter.ConfluentMetricsReporter 29 | KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 30 | KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 0 31 | CONFLUENT_METRICS_REPORTER_BOOTSTRAP_SERVERS: broker:29092 32 | CONFLUENT_METRICS_REPORTER_ZOOKEEPER_CONNECT: zookeeper:2181 33 | CONFLUENT_METRICS_REPORTER_TOPIC_REPLICAS: 1 34 | CONFLUENT_METRICS_ENABLE: 'true' 35 | CONFLUENT_SUPPORT_CUSTOMER_ID: 'anonymous' 36 | 37 | kafdrop: 38 | image: obsidiandynamics/kafdrop 39 | hostname: kafdrop 40 | container_name: kafdrop 41 | depends_on: 42 | - zookeeper 43 | - broker 44 | - schema-registry 45 | ports: 46 | - "9000:9000" 47 | environment: 48 | KAFKA_BROKERCONNECT: "broker:29092" 49 | JVM_OPTS: "-Xms32M -Xmx64M" 50 | SERVER_SERVLET_CONTEXTPATH: "/" 51 | CMD_ARGS: "--message.format=AVRO --schemaregistry.connect=http://schema-registry:8081" 52 | 53 | control-center: 54 | image: confluentinc/cp-enterprise-control-center:5.5.3 55 | hostname: control-center 56 | container_name: control-center 57 | depends_on: 58 | - zookeeper 59 | - broker 60 | - schema-registry 61 | ports: 62 | - "9021:9021" 63 | environment: 64 | CONTROL_CENTER_BOOTSTRAP_SERVERS: 'broker:29092' 65 | CONTROL_CENTER_ZOOKEEPER_CONNECT: 'zookeeper:2181' 66 | CONTROL_CENTER_SCHEMA_REGISTRY_URL: "http://schema-registry:8081" 67 | CONTROL_CENTER_REPLICATION_FACTOR: 1 68 | CONTROL_CENTER_INTERNAL_TOPICS_PARTITIONS: 1 69 | CONTROL_CENTER_MONITORING_INTERCEPTOR_TOPIC_PARTITIONS: 1 70 | CONFLUENT_METRICS_TOPIC_REPLICATION: 1 71 | PORT: 9021 72 | 73 | schema-registry: 74 | image: confluentinc/cp-schema-registry:5.5.3 75 | hostname: schema-registry 76 | container_name: schema-registry 77 | depends_on: 78 | - zookeeper 79 | - broker 80 | ports: 81 | - "8081:8081" 82 | environment: 83 | SCHEMA_REGISTRY_HOST_NAME: schema-registry 84 | SCHEMA_REGISTRY_MASTER_ELIGIBILITY: "true" 85 | SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: 'broker:29092' 86 | 87 | internal-schema-registry: 88 | image: confluentinc/cp-schema-registry:5.5.3 89 | hostname: internal-schema-registry 90 | container_name: internal-schema-registry 91 | depends_on: 92 | - zookeeper 93 | - broker 94 | ports: 95 | - "28081:28081" 96 | environment: 97 | SCHEMA_REGISTRY_LISTENERS: http://0.0.0.0:28081 98 | SCHEMA_REGISTRY_HOST_NAME: internal-schema-registry 99 | SCHEMA_REGISTRY_KAFKASTORE_TOPIC: _internal_schemas 100 | SCHEMA_REGISTRY_SCHEMA_REGISTRY_GROUP_ID: internal-schema-registry 101 | SCHEMA_REGISTRY_MASTER_ELIGIBILITY: "true" 102 | SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: 'broker:29092' 103 | 104 | connect: 105 | image: lsstsqre/cp-kafka-connect:5.5.2-0.8.0 106 | hostname: connect 107 | container_name: connect 108 | depends_on: 109 | - zookeeper 110 | - broker 111 | - schema-registry 112 | ports: 113 | - "8083:8083" 114 | environment: 115 | CONNECT_BOOTSTRAP_SERVERS: 'broker:29092' 116 | CONNECT_REST_ADVERTISED_HOST_NAME: connect 117 | CONNECT_REST_PORT: 8083 118 | CONNECT_GROUP_ID: compose-connect-group 119 | CONNECT_CONFIG_STORAGE_TOPIC: docker-connect-configs 120 | CONNECT_CONFIG_STORAGE_REPLICATION_FACTOR: 1 121 | CONNECT_OFFSET_FLUSH_INTERVAL_MS: 10000 122 | CONNECT_OFFSET_STORAGE_TOPIC: docker-connect-offsets 123 | CONNECT_OFFSET_STORAGE_REPLICATION_FACTOR: 1 124 | CONNECT_STATUS_STORAGE_TOPIC: docker-connect-status 125 | CONNECT_STATUS_STORAGE_REPLICATION_FACTOR: 1 126 | CONNECT_KEY_CONVERTER: org.apache.kafka.connect.storage.StringConverter 127 | CONNECT_VALUE_CONVERTER: io.confluent.connect.avro.AvroConverter 128 | CONNECT_VALUE_CONVERTER_SCHEMA_REGISTRY_URL: http://schema-registry:8081 129 | CONNECT_INTERNAL_KEY_CONVERTER: "org.apache.kafka.connect.json.JsonConverter" 130 | CONNECT_INTERNAL_VALUE_CONVERTER: "org.apache.kafka.connect.json.JsonConverter" 131 | CONNECT_ZOOKEEPER_CONNECT: 'zookeeper:2181' 132 | CONNECT_PLUGIN_PATH: "/usr/share/java,/etc/landoop/jars/lib" 133 | CONNECT_LOG4J_LOGGERS: org.apache.zookeeper=ERROR,org.I0Itec.zkclient=ERROR,org.reflections=ERROR 134 | -------------------------------------------------------------------------------- /tests/kafkaaggregator/test_compute.py: -------------------------------------------------------------------------------- 1 | """Test the Kafka-aggregator method for computing summary statistics.""" 2 | 3 | from pathlib import Path 4 | from typing import Any, List, Mapping 5 | 6 | import pytest 7 | 8 | from kafkaaggregator.aggregator import Aggregator 9 | from kafkaaggregator.fields import Field 10 | from kafkaaggregator.models import create_record 11 | 12 | 13 | @pytest.fixture 14 | def incoming_messages() -> List[Any]: 15 | """Mock incoming messages.""" 16 | messages = [ 17 | {"time": 0, "value": 1.0}, 18 | {"time": 1, "value": 2.0}, 19 | {"time": 2, "value": 3.0}, 20 | ] 21 | return messages 22 | 23 | 24 | @pytest.fixture 25 | def aggregated_fields() -> List[Field]: 26 | """Mock aggregation fields.""" 27 | fields = [ 28 | Field("time", int), 29 | Field("count", int), 30 | Field("window_size", float), 31 | Field("min_value", float, "value", "min"), 32 | Field("mean_value", float, "value", "mean"), 33 | Field("median_value", float, "value", "median"), 34 | Field("stdev_value", float, "value", "stdev"), 35 | Field("max_value", float, "value", "max"), 36 | ] 37 | return fields 38 | 39 | 40 | @pytest.fixture 41 | def expected_result() -> Mapping[str, Any]: 42 | """Return test expected result.""" 43 | result = { 44 | "count": 3, 45 | "min_value": 1.0, 46 | "time": 1.0, 47 | "window_size": 1.0, 48 | "max_value": 3.0, 49 | "mean_value": 2.0, 50 | "median_value": 2.0, 51 | "stdev_value": 1.0, 52 | } 53 | return result 54 | 55 | 56 | @pytest.fixture 57 | def first_message_value(incoming_messages: List[Any]) -> Mapping[str, Any]: 58 | """Return the value of the first message instead of computing statistics. 59 | 60 | That's the expected result if the number of messages in the aggregation 61 | window is smaller than the min_sample_size. 62 | """ 63 | result = { 64 | "count": 3, 65 | "min_value": incoming_messages[0]["value"], 66 | "time": 1.0, # timestamp of the aggregated message 67 | "window_size": 1.0, 68 | "max_value": incoming_messages[0]["value"], 69 | "mean_value": incoming_messages[0]["value"], 70 | "median_value": incoming_messages[0]["value"], 71 | "stdev_value": incoming_messages[0]["value"], 72 | } 73 | return result 74 | 75 | 76 | def test_compute( 77 | config_dir: Path, 78 | incoming_messages: List[Any], 79 | aggregated_fields: List[Field], 80 | expected_result: Mapping[str, Any], 81 | ) -> None: 82 | """Test the Aggregator compute method. 83 | 84 | Parameters 85 | ---------- 86 | incoming_messages: `list` 87 | Mock list of incoming messages 88 | aggregation_fields: `list` [`Field`] 89 | List of fields to aggregate. 90 | expected_result: `dict` 91 | Dictionary with the expected result for the aggregated_message 92 | """ 93 | Agg = Aggregator( 94 | configfile=config_dir.joinpath("aggregator_config.yaml"), 95 | aggregated_topic="aggregated_example0", 96 | ) 97 | 98 | # Mock the creation of the aggregated fields 99 | Agg._aggregated_fields = aggregated_fields 100 | 101 | # Mock the creation of the Faust Record for the aggregation topic 102 | Agg._record = create_record( 103 | cls_name="AggregationRecord", 104 | fields=aggregated_fields, 105 | doc="Faust record for topic test-source-topic.", 106 | ) 107 | aggregated_message = Agg.compute( 108 | time=1.0, 109 | messages=incoming_messages, 110 | ) 111 | assert aggregated_message.is_valid() 112 | assert aggregated_message.asdict() == expected_result 113 | 114 | 115 | def test_compute_min_sample_size( 116 | config_dir: Path, 117 | incoming_messages: List[Any], 118 | aggregated_fields: List[Field], 119 | first_message_value: Mapping[str, Any], 120 | ) -> None: 121 | """Test the Aggregator compute method. 122 | 123 | Test the case where the min_sample_size is larger than the number 124 | of messages in the aggregation window. 125 | 126 | Parameters 127 | ---------- 128 | incoming_messages: `list` 129 | Mock list of incoming messages 130 | aggregation_fields: `list` [`Field`] 131 | List of fields to aggregate. 132 | expected_result: `dict` 133 | Dictionary with the expected result for the aggregated_message 134 | """ 135 | Agg = Aggregator( 136 | configfile=config_dir.joinpath( 137 | "aggregator_config_min_sample_size.yaml" 138 | ), 139 | aggregated_topic="aggregated_example0", 140 | ) 141 | 142 | # Mock the creation of the aggregated fields 143 | Agg._aggregated_fields = aggregated_fields 144 | 145 | # Mock the creation of the Faust Record for the aggregation topic 146 | Agg._record = create_record( 147 | cls_name="AggregationRecord", 148 | fields=aggregated_fields, 149 | doc="Faust record for topic test-source-topic.", 150 | ) 151 | 152 | aggregated_message = Agg.compute( 153 | time=1.0, 154 | messages=incoming_messages, 155 | ) 156 | assert aggregated_message.asdict() == first_message_value 157 | -------------------------------------------------------------------------------- /src/kafkaaggregator/config.py: -------------------------------------------------------------------------------- 1 | """Configuration definition.""" 2 | 3 | __all__ = ["Configuration", "ExampleConfiguration"] 4 | 5 | import os 6 | import sys 7 | from dataclasses import dataclass 8 | from os.path import abspath, dirname, isdir 9 | from typing import List 10 | 11 | from kafkaaggregator.operations import Operation 12 | 13 | 14 | @dataclass 15 | class Configuration: 16 | """Configuration for kafkaaggregator.""" 17 | 18 | broker: str = os.getenv("KAFKA_BROKER_URL", "kafka://localhost:9092") 19 | """The Kafka broker URL. 20 | 21 | Currently, the only supported production transport is kafka://. 22 | This uses the aiokafka client under the hood, for consuming and producing 23 | messages. 24 | """ 25 | 26 | registry_url: str = os.getenv( 27 | "SCHEMA_REGISTRY_URL", "http://localhost:8081" 28 | ) 29 | """The Confluent Schema Registry URL. 30 | 31 | Schema Registry used to read source topic schemas. 32 | """ 33 | 34 | internal_registry_url: str = os.getenv( 35 | "INTERNAL_SCHEMA_REGISTRY_URL", "http://localhost:8081" 36 | ) 37 | """Internal Confluent Schema Registry URL. 38 | 39 | Used in conjunction with faust-avro to register aggregated topic schemas. 40 | Depending on your Kafka setup you can use this internal Schema Registry to 41 | separate the aggregated topic schemas from other schemas and avoid 42 | Schema ID conflicts. 43 | """ 44 | 45 | store: str = os.getenv("STORE", "memory://") 46 | """The backend used for table storage. 47 | 48 | Tables are stored in-memory by default. In production, a persistent table 49 | store, such as rocksdb:// is preferred. 50 | """ 51 | 52 | min_sample_size: int = int(os.getenv("MIN_SAMPLE_SIZE", "2")) 53 | """Minimum sample size to compute statistics. 54 | 55 | Given the size of the tumbling window and the frequency of incoming 56 | messages, this parameter sets the minimum sample size to compute 57 | statistics. The Faust tumbling window will always contain at least one 58 | message. If the number of messages in the tumbling window is smaller than 59 | min_sample_size the values of the first message are used instead. 60 | 61 | The default value ``min_sample_size=2`` make sure we can compute stdev. 62 | """ 63 | 64 | topic_partitions: int = int(os.getenv("TOPIC_PARTITIONS", "4")) 65 | """Default number of partitions for new topics. 66 | 67 | This defines the maximum number of workers we could use to distribute the 68 | workload of the application. 69 | """ 70 | 71 | topic_regex: str = str(os.getenv("TOPIC_REGEX", ".*")) 72 | """Regex used to filter topic names.""" 73 | 74 | aggregator_config_file: str = os.getenv( 75 | "AGGREGATOR_CONFIG_FILE", "aggregator.yaml" 76 | ) 77 | """Aggregator configuration file. 78 | 79 | Specify the mapping between source and aggregated topics, the 80 | fields within those topics to use and window aggregation configuration. 81 | """ 82 | 83 | agents_output_dir: str = os.getenv("AGENTS_OUTPUT_DIR", "agents") 84 | """Name of output directory for the agents' code.""" 85 | 86 | agent_template_file: str = os.getenv("AGENT_TEMPLATE_FILE", "agent.j2") 87 | """Name of the agent Jinja2 template file.""" 88 | 89 | def __post_init__(self) -> None: 90 | """Post config initialization steps.""" 91 | # Validate operations 92 | self.operations = self._strtolist( 93 | os.getenv("OPERATIONS", "min, q1, mean, median, stdev, q3, max") 94 | ) 95 | 96 | for operation in self.operations: 97 | if operation not in Operation.values(): 98 | raise ValueError( 99 | f"Invalid operation '{operation}' in config.operations. " 100 | f"Allowed values are: {', '.join(Operation.values())}." 101 | ) 102 | 103 | # Make sure agents_output_dir exists and update syspath to enable 104 | # agents autodiscover 105 | if not isdir(self.agents_output_dir): 106 | os.makedirs(self.agents_output_dir) 107 | 108 | sys.path.append(abspath(dirname(self.agents_output_dir))) 109 | 110 | def _strtolist(self, s: str) -> List[str]: 111 | """Convert comma separated values to a list of strings. 112 | 113 | Parameters 114 | ---------- 115 | s : `str` 116 | Comma separated values 117 | 118 | Returns 119 | ------- 120 | slist : `list` 121 | """ 122 | slist = s.replace(" ", "").split(",") 123 | return slist 124 | 125 | 126 | @dataclass 127 | class ExampleConfiguration: 128 | """Configuration for the Kafkaaggregator example.""" 129 | 130 | ntopics: int = int(os.getenv("NTOPICS", "10")) 131 | """Number of source topics used in the aggregation example.""" 132 | 133 | nfields: int = int(os.getenv("NFIELDS", "10")) 134 | """Number of fields for source topics used in the aggregation example.""" 135 | 136 | frequency: float = float(os.getenv("FREQUENCY", "10")) 137 | """The frequency in Hz in which messages are produced for the 138 | example topics. 139 | """ 140 | 141 | max_messages: int = int(os.getenv("MAX_MESSAGES", "10")) 142 | """The maximum number of messages to produce. Set max_messages to a number 143 | smaller than 1 to produce an indefinite number of messages. 144 | """ 145 | 146 | source_topic_name_prefix: str = os.getenv( 147 | "SOURCE_TOPIC_NAME_PRFIX", "example" 148 | ) 149 | """The prefix for source topic names to use with the aggregator example. 150 | """ 151 | -------------------------------------------------------------------------------- /src/kafkaaggregator/aggregator_config.py: -------------------------------------------------------------------------------- 1 | """Pydantic models for the aggregator configuration.""" 2 | 3 | from pathlib import Path 4 | from typing import Any, List, Mapping, Optional 5 | 6 | import yaml 7 | from pydantic import BaseModel, validator 8 | 9 | from kafkaaggregator.operations import Operation 10 | 11 | __all__ = [ 12 | "SourceTopic", 13 | "Filter", 14 | "WindowAggregation", 15 | "AggregatedTopic", 16 | "AggregatorConfig", 17 | ] 18 | 19 | 20 | class SourceTopic(BaseModel): 21 | """Describe a source topic.""" 22 | 23 | name: str 24 | """Source topic name.""" 25 | 26 | fields: List[str] 27 | """List of fields to keep from the source topic.""" 28 | 29 | # TODO: add value transformation 30 | 31 | map: Optional[Mapping[str, str]] = None 32 | """Map transformation on fields. 33 | 34 | For example, to rename a field use ``field: new_field``. 35 | """ 36 | 37 | # TODO: add validation for Source topic name and fields 38 | 39 | @validator("map") 40 | def validate_map( 41 | cls, field_value: str, values: Mapping[str, Any], **kwargs: Any 42 | ) -> str: 43 | """Validate the mapping parameter.""" 44 | if field_value: 45 | for k in field_value: 46 | if k not in values["fields"]: 47 | raise ValueError( 48 | f"Invalid field name '{k}' in mapping specification." 49 | ) 50 | return field_value 51 | 52 | 53 | class Filter(BaseModel): 54 | """Filter transformation on source topics.""" 55 | 56 | source_topics: List[SourceTopic] 57 | """List of source topics to keep in the source stream.""" 58 | 59 | 60 | class WindowAggregation(BaseModel): 61 | """Specify window aggregation configuration.""" 62 | 63 | window_size_seconds: int = 1 64 | """Size of the tumbling window in seconds used to aggregate messages.""" 65 | 66 | window_expiration_seconds: int = 0 67 | """Default Window expiration time in seconds. 68 | 69 | This parameter controls when the callback function to process the expired 70 | window(s) is called. 71 | """ 72 | 73 | min_sample_size: int = 2 74 | """Minimum sample size to compute statistics. 75 | 76 | Given the size of the tumbling window and the frequency of incoming 77 | messages, this parameter sets the minimum sample size to compute 78 | statistics. The Faust tumbling window will always contain at least one 79 | message. If the number of messages in the tumbling window is smaller than 80 | min_sample_size, no statistics are computed and the values for the first 81 | message are used instead. 82 | 83 | The default value ``min_sample_size=2`` make sure we can compute stdev. 84 | """ 85 | 86 | operations: List[str] = ["mean"] 87 | """Window aggregation operations to perform.""" 88 | 89 | @validator("operations") 90 | def validate_operations(cls, operations: List) -> List: 91 | """Validate the operations parameter.""" 92 | for operation in operations: 93 | if operation not in Operation.values(): 94 | raise ValueError( 95 | f"Invalid operation '{operation}'. " 96 | f"Allowed values are: {', '.join(Operation.values())}." 97 | ) 98 | return operations 99 | 100 | 101 | class AggregatedTopic(BaseModel): 102 | """Describe an aggregated topic.""" 103 | 104 | name: str 105 | """Aggregated topic name.""" 106 | 107 | filter: Filter 108 | """Data filtering.""" 109 | 110 | window_aggregation: WindowAggregation 111 | """Window aggregation.""" 112 | 113 | @property 114 | def source_topics(self) -> List: 115 | """List source topic names.""" 116 | source_topics: List = [] 117 | for topic in self.filter.source_topics: 118 | source_topics.append(topic.name) 119 | 120 | return source_topics 121 | 122 | def get(self, source_topic: str) -> SourceTopic: 123 | """Get source topic object by its name.""" 124 | for topic in self.filter.source_topics: 125 | if topic.name == source_topic: 126 | break 127 | return topic 128 | 129 | 130 | class AggregatedTopics(BaseModel): 131 | """Describe the configuration for all the aggregated topics.""" 132 | 133 | aggregated_topics: List[AggregatedTopic] 134 | """List of aggregated topics.""" 135 | 136 | 137 | class AggregatorConfig: 138 | """A representation of the aggregator configuration.""" 139 | 140 | def __init__(self, configfile: Path) -> None: 141 | self._configfile = configfile 142 | self._config = self._parse() 143 | 144 | def _parse(self) -> AggregatedTopics: 145 | """Parse aggregator configuration file.""" 146 | f = open(self._configfile) 147 | yaml_data = yaml.safe_load(f) 148 | config = AggregatedTopics.parse_obj(yaml_data) 149 | return config 150 | 151 | @property 152 | def config(self) -> AggregatedTopics: 153 | """Return the configuration object.""" 154 | return self._config 155 | 156 | @property 157 | def aggregated_topics(self) -> List: 158 | """List aggregated topic names.""" 159 | aggregated_topic: List = [] 160 | for topic in self._config.aggregated_topics: 161 | aggregated_topic.append(topic.name) 162 | return aggregated_topic 163 | 164 | def get(self, aggregated_topic: str) -> AggregatedTopic: 165 | """Get aggregated topic object by its name.""" 166 | for topic in self._config.aggregated_topics: 167 | if topic.name == aggregated_topic: 168 | break 169 | return topic 170 | -------------------------------------------------------------------------------- /.github/workflows/ci.yaml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | "on": 4 | push: 5 | branches-ignore: 6 | # These should always correspond to pull requests, so ignore them for 7 | # the push trigger and let them be triggered by the pull_request 8 | # trigger, avoiding running the workflow twice. This is a minor 9 | # optimization so there's no need to ensure this is comprehensive. 10 | - "dependabot/**" 11 | - "renovate/**" 12 | - "tickets/**" 13 | - "u/**" 14 | tags: 15 | - "*" 16 | pull_request: {} 17 | 18 | jobs: 19 | test: 20 | runs-on: ubuntu-latest 21 | 22 | strategy: 23 | matrix: 24 | python: 25 | - "3.9" 26 | 27 | steps: 28 | - uses: actions/checkout@v3 29 | 30 | - name: Set up Python 31 | uses: actions/setup-python@v4 32 | with: 33 | python-version: ${{ matrix.python }} 34 | 35 | - name: Run pre-commit 36 | uses: pre-commit/action@v3.0.0 37 | 38 | - name: Install tox 39 | run: pip install tox 40 | 41 | - name: Cache tox environments 42 | id: cache-tox 43 | uses: actions/cache@v3 44 | with: 45 | path: .tox 46 | # requirements/*.txt and pyproject.toml have versioning info 47 | # that would impact the tox environment. 48 | key: tox-${{ matrix.python }}-${{ hashFiles('requirements/*.txt') }}-${{ hashFiles('pyproject.toml') }} 49 | restore-keys: | 50 | tox-${{ matrix.python }}-${{ hashFiles('requirements/*.txt') }}- 51 | 52 | - name: Run tox 53 | run: tox -e py,coverage-report,typing 54 | 55 | build: 56 | runs-on: ubuntu-latest 57 | needs: [test] 58 | 59 | # Only do Docker builds of tagged releases and pull requests from ticket 60 | # branches. This will still trigger on pull requests from untrusted 61 | # repositories whose branch names match our tickets/* branch convention, 62 | # but in this case the build will fail with an error since the secret 63 | # won't be set. 64 | if: > 65 | startsWith(github.ref, 'refs/tags/') 66 | || startsWith(github.head_ref, 'tickets/') 67 | 68 | steps: 69 | - uses: actions/checkout@v3 70 | with: 71 | fetch-depth: 0 72 | 73 | - name: Define the Docker tag 74 | id: vars 75 | run: echo ::set-output name=tag::$(scripts/docker-tag.sh) 76 | 77 | - name: Print the tag 78 | id: print 79 | run: echo ${{ steps.vars.outputs.tag }} 80 | 81 | - name: Set up Docker Buildx 82 | uses: docker/setup-buildx-action@v2 83 | 84 | - name: Log in to Docker Hub 85 | uses: docker/login-action@v2 86 | with: 87 | username: ${{ secrets.DOCKER_USERNAME }} 88 | password: ${{ secrets.DOCKER_TOKEN }} 89 | 90 | - name: Log in to GitHub Container Registry 91 | uses: docker/login-action@v2 92 | with: 93 | registry: ghcr.io 94 | username: ${{ github.repository_owner }} 95 | password: ${{ secrets.GITHUB_TOKEN }} 96 | 97 | - name: Build and push 98 | uses: docker/build-push-action@v3 99 | with: 100 | context: . 101 | push: true 102 | tags: | 103 | lsstsqre/kafkaaggregator:${{ steps.vars.outputs.tag }} 104 | ghcr.io/lsst-sqre/kafkaaggregator:${{ steps.vars.outputs.tag }} 105 | cache-from: type=gha 106 | cache-to: type=gha,mode=max 107 | 108 | docs: 109 | runs-on: ubuntu-latest 110 | steps: 111 | - uses: actions/checkout@v3 112 | 113 | - name: Set up Python 114 | uses: actions/setup-python@v4 115 | with: 116 | python-version: "3.9" 117 | 118 | - name: Install tox and LTD Conveyor 119 | run: pip install tox ltd-conveyor 120 | 121 | - name: Install graphviz 122 | run: sudo apt-get install graphviz 123 | 124 | - name: Run tox 125 | run: tox -e docs 126 | 127 | # Only attempt documentation uploads for long-lived branches, tagged 128 | # releases, and pull requests from ticket branches. This avoids version 129 | # clutter in the docs and failures when a PR doesn't have access to 130 | # secrets. This will still trigger on pull requests from untrusted 131 | # repositories whose branch names match our tickets/* branch convention, 132 | # but in this case the build will fail with an error since the secret 133 | # won't be set. 134 | - name: Upload to LSST the Docs 135 | env: 136 | LTD_USERNAME: ${{ secrets.LTD_USERNAME }} 137 | LTD_PASSWORD: ${{ secrets.LTD_PASSWORD }} 138 | run: ltd upload --product kafka-aggregator --gh --dir docs/_build/html 139 | if: > 140 | github.event_name != 'pull_request' 141 | || startsWith(github.head_ref, 'tickets/') 142 | 143 | pypi: 144 | runs-on: ubuntu-latest 145 | needs: [test, build, docs] 146 | if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') 147 | 148 | steps: 149 | - uses: actions/checkout@v3 150 | with: 151 | fetch-depth: 0 # full history for setuptools_scm 152 | - name: Set up Python 153 | uses: actions/setup-python@v4 154 | with: 155 | python-version: "3.9" 156 | - name: Python install 157 | run: | 158 | python -m pip install --upgrade pip 159 | pip install . 160 | pip install --upgrade setuptools wheel 161 | - name: Build a binary wheel and a source tarball 162 | run: python setup.py sdist bdist_wheel 163 | - name: Publish package 164 | uses: pypa/gh-action-pypi-publish@v1.6.4 165 | with: 166 | user: __token__ 167 | password: ${{ secrets.PYPI_SQRE_ADMIN }} 168 | -------------------------------------------------------------------------------- /src/kafkaaggregator/example.py: -------------------------------------------------------------------------------- 1 | """Aggregation example.""" 2 | 3 | 4 | __all__ = ["AggregationExample", "UnexpectedNumberOfTopicsError"] 5 | 6 | import asyncio 7 | import json 8 | import logging 9 | import random 10 | from time import time 11 | from typing import List 12 | 13 | import faust_avro 14 | from faust_avro import Record 15 | 16 | from kafkaaggregator.config import ExampleConfiguration 17 | from kafkaaggregator.fields import Field 18 | from kafkaaggregator.models import create_record 19 | from kafkaaggregator.topics import SourceTopic 20 | 21 | AvroSchemaT = str 22 | 23 | logger = logging.getLogger("kafkaaggregator") 24 | 25 | config = ExampleConfiguration() 26 | 27 | 28 | class UnexpectedNumberOfTopicsError(RuntimeError): 29 | """Raised when the number of source topics is unnexpected. 30 | 31 | The number of source topics in Kafka must match the number of topics 32 | initialized by the example. 33 | """ 34 | 35 | 36 | class AggregationExample: 37 | """Initialize topics and produce messages for the aggregation example. 38 | 39 | The aggregation example can be used to evaluate the performance of the 40 | kafka-aggregator application. 41 | """ 42 | 43 | MAX_NTOPICS = 999 44 | MAX_NFIELDS = 999 45 | 46 | def __init__(self) -> None: 47 | self._ntopics = min(config.ntopics, AggregationExample.MAX_NTOPICS) 48 | self._nfields = min(config.nfields, AggregationExample.MAX_NFIELDS) 49 | self._source_topic_names: List = [] 50 | self._make_record = create_record 51 | 52 | def make_fields(self) -> List[Field]: 53 | """Make fields for the example topics. 54 | 55 | Returns 56 | ------- 57 | fields : `list` 58 | A list of fields mapping field name and type. 59 | """ 60 | # A source topic needs a timestamp field 61 | time = Field(name="time", type=float) 62 | fields = [time] 63 | for n in range(self._nfields): 64 | fields.append(Field(name=f"value{n}", type=float)) 65 | return fields 66 | 67 | def create_record(self, name: str) -> Record: 68 | """Create a Faust-avro Record class for the source topic. 69 | 70 | With a Faust-avro Record for the source topic it is possible 71 | to produce messages in Avro format for the aggregation example, 72 | instead of using ``value_type=bytes``. 73 | 74 | Returns 75 | ------- 76 | record : `Record` 77 | Faust-avro Record class for the source topic. 78 | """ 79 | logger.info(f"Make Faust record for topic {name}.") 80 | cls_name = name.title().replace("-", "") 81 | fields = self.make_fields() 82 | self._record = self._make_record( 83 | cls_name=cls_name, 84 | fields=fields, 85 | doc=f"Faust record for topic {name}.", 86 | ) 87 | return self._record 88 | 89 | async def initialize(self, app: faust_avro.App) -> None: 90 | """Initialize source topics for the aggregation example. 91 | 92 | To initialize the topic, its schema needs to be registered in 93 | the Schema Registry and the topic itself needs to be created in Kafka. 94 | 95 | Parameters 96 | ---------- 97 | app : `faust_avro.App` 98 | Faust application 99 | """ 100 | for n in range(self._ntopics): 101 | source_topic_name = f"{config.source_topic_name_prefix}-{n:03d}" 102 | source_topic = SourceTopic(name=source_topic_name) 103 | record = self.create_record(name=source_topic_name) 104 | schema = record.to_avro(registry=source_topic._registry) 105 | await source_topic.register(schema=json.dumps(schema)) 106 | # Declare the source topic as an internal topic in Faust. 107 | internal_topic = app.topic( 108 | source_topic_name, value_type=record, internal=True 109 | ) 110 | await internal_topic.declare() 111 | self._source_topic_names.append(source_topic_name) 112 | 113 | async def produce( 114 | self, app: faust_avro.App, frequency: float, max_messages: int 115 | ) -> None: 116 | """Produce messages for the source topics in the aggregation example. 117 | 118 | In the aggregation example we can specify the frequency in which the 119 | messages are produced, the maximum number of messages for each source 120 | topic and the number of fields in every message. 121 | 122 | Parameters 123 | ---------- 124 | app : `faust_avro.App` 125 | Faust application 126 | frequency : `float` 127 | The frequency in Hz in wich messages are produced. 128 | max_messages : `int` 129 | The maximum number of messages to produce. 130 | """ 131 | logger.info( 132 | f"Producing message(s) at {frequency} Hz for each source topic." 133 | ) 134 | count = 0 135 | send_count = 0 136 | 137 | while True: 138 | message = {"time": time()} 139 | for n in range(self._nfields): 140 | value = random.random() 141 | message.update({f"value{n}": value}) 142 | # The same message is sent for all source topics 143 | for source_topic_name in self._source_topic_names: 144 | source_topic = app.topic(source_topic_name) 145 | await source_topic.send(value=message) 146 | send_count += 1 147 | 148 | await asyncio.sleep(1 / frequency) 149 | # Allow for an indefinite loop if max_messages is a number 150 | # smaller than 1 151 | count += 1 152 | if count == max_messages: 153 | logger.info(f"{send_count} messages sent.") 154 | break 155 | -------------------------------------------------------------------------------- /docs/configuration.rst: -------------------------------------------------------------------------------- 1 | .. _configuration: 2 | 3 | ###################### 4 | Configuration settings 5 | ###################### 6 | 7 | In this section we discuss the main configuration settings to get kafka-aggregator running. The `Configuration class`_ is also documented here and can be used as reference for the configuration settings exposed in the `values.yaml`_ when using the :ref:`helm-chart`. 8 | 9 | .. _values.yaml: https://github.com/lsst-sqre/charts/blob/master/charts/kafka-aggregator/values.yaml 10 | 11 | 12 | 13 | Kafka settings 14 | ============== 15 | 16 | To configure kafka-aggregator with Kafka, you need to provide the Kafka `broker` and the `schema_registry_url` URLs. 17 | 18 | Separating source and aggregated schemas 19 | ---------------------------------------- 20 | 21 | The `Confluent Schema Registry`_ is used to manage Avro schemas for the source and aggregation topics. 22 | 23 | In a Kafka multi-site set up, usually there's `continuous migration`_ of the Avro schemas from the source Schema Registry to the destination Schema Registry. 24 | 25 | kafka-aggregator normally runs on the destination cluster and thus it would register the aggregation topic schemas to the destination Schema Registry. Depending on how replication is configured, you don't have schema migration back to the source cluster or you might not want to replicate the schemas for the aggregation topics. 26 | 27 | Either way, to avoid collisions between schema IDs for schemas created at the source Schema Registry and destination Schema Registry, we recommend deploying a separate Schema Registry to store the schemas for the aggregation topics. 28 | 29 | In this case, set the `internal_registry_url` configuration accordingly. The `docker-compose`_ configuration in the kafka-aggregator repository shows how to configure an internal schema registry for kafka-aggregator. 30 | 31 | 32 | .. _Confluent Schema Registry: https://docs.confluent.io/current/schema-registry/index.html 33 | .. _continuous migration: https://docs.confluent.io/current/schema-registry/installation/migrate.html#continuous-migration 34 | .. _docker-compose: https://github.com/lsst-sqre/kafka-aggregator/blob/master/docker-compose.yaml 35 | 36 | 37 | Kafka-aggregator settings 38 | ========================= 39 | 40 | The following configuration settings are specific to the kafka-aggregator application. 41 | 42 | Selecting topics to aggregate 43 | ----------------------------- 44 | 45 | kafka-aggregator selects source topics from Kafka using a regular expression `topic_regex` and exclude source topics listed in the `excluded_topics` list. 46 | 47 | Summary Statistics 48 | ------------------ 49 | The `operations` configuration setting specifies the summary statistics to be computed for each 50 | numerical field in the source topic. 51 | 52 | 53 | Aggregation window settings 54 | --------------------------- 55 | 56 | Configure the size of the aggregation using the `window_size` configuration setting. `window_expires` specifies the duration to store the data allocated to each window. 57 | 58 | .. note:: 59 | 60 | Faust allocates at least one message on each aggregation window, if `window_size` is smaller than the time interval between two consecutive messages Faust will skip that window and no aggregation is computed. 61 | 62 | 63 | When deciding the size of the aggregation window, an important consideration is the `data reduction factor` given by ``R=window_size*f_in/N`` where ``f_in`` is the frequency of the input data stream in Hz and ``N`` is the number of summary statistics computed by kafka-aggregator. For example, to get a reduction factor of 100 times in storage for an input data stream of 50Hz the size of the aggregation window must be 10s for N=5. 64 | 65 | Also, `window_size` should be large enough to minimize the standard error associated to the number of messages (sample size) allocated to a window. The smaller the standard error the more precise the computed estimate is. kafka-aggregator stores the sample size in the ``count`` field of each aggregated message. That can be used, for example, to compute the standard error of the mean, given by ``SE=stdev/sqrt(count)`` where ``stdev`` is the sample standard deviation (the square root of the sample variance) computed and stored for each field in the aggregated message. 66 | 67 | kafka-aggregator allows to control the minimum sample size to compute statistics by setting the `min_sample_size` parameter, which by default is `min_sample_size=2`. 68 | 69 | .. note:: 70 | 71 | If ``count`` is less than `min_sample_size` there are not enough values in the aggregation window to compute statistics, then kafka-aggregator uses the first value in the window instead. 72 | 73 | Special field names 74 | ------------------- 75 | 76 | kafka-aggregator excludes by default the field names ``time``, ``window_size`` and ``count``. In particular, those fields are added to each aggregated message: ``time`` is the midpoint of the aggregation window, ``window_size`` is the size of the aggregation window (the `window_size` configuration setting used at a given time) and ``count`` is the sample size as discussed above. 77 | 78 | Use the `excluded_field_names` list to exclude other fields from being aggregated. 79 | 80 | Aggregation topic name 81 | ---------------------- 82 | 83 | By default aggregation topics names are formatted by adding the ``aggregated`` suffix to the source topic name ``{source_topic_name}-aggregated``. That can be changed by setting the `topic_rename_format` parameter in the configuration. 84 | 85 | 86 | Example module configuration 87 | ============================ 88 | 89 | The kafka-aggregator example module can be used to initialize "example source topics" in Kafka and produce messages for those topics. 90 | 91 | `source_topic_name_prefix` sets the name prefix for the example source topics. The number of topics to create is set by `ntopics` and the number of fields in each topic is set by `nfields`. The number of partitions for the example source topics is set by `topic_partitions`. 92 | 93 | The example module also produces messages for the example source topics. The frequency in Hz in which messages are produced is set by `frequency` and the maximum number of messages produced for each topic is set by `max_messages`. If `max_messages` is a number smaller than 1, an indefinite number of messages is produced. 94 | 95 | 96 | Configuration class 97 | =================== 98 | 99 | .. automodapi:: kafkaaggregator.config 100 | :no-inheritance-diagram: 101 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | """Sphinx configuration.""" 2 | 3 | import os 4 | import sys 5 | 6 | import lsst_sphinx_bootstrap_theme 7 | 8 | import kafkaaggregator 9 | 10 | # Work around Sphinx bug related to large and highly-nested source files 11 | sys.setrecursionlimit(2000) 12 | 13 | # -- General configuration ------------------------------------------------ 14 | 15 | # If your documentation needs a minimal Sphinx version, state it here. 16 | # needs_sphinx = '1.0' 17 | 18 | # Add any Sphinx extension module names here, as strings. They can be 19 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 20 | # ones. 21 | extensions = [ 22 | "sphinx.ext.autodoc", 23 | "sphinx.ext.doctest", 24 | "sphinx.ext.napoleon", 25 | "sphinx.ext.intersphinx", 26 | "sphinx.ext.todo", 27 | "sphinx.ext.ifconfig", 28 | "sphinx_automodapi.automodapi", 29 | "sphinx_automodapi.smart_resolver", 30 | "documenteer.sphinxext", 31 | ] 32 | 33 | # The suffix(es) of source filenames. 34 | # You can specify multiple suffix as a list of string: 35 | # source_suffix = ['.rst', '.md'] 36 | source_suffix = ".rst" 37 | 38 | # The master toctree document. 39 | master_doc = "index" 40 | 41 | # General information about the project. 42 | project = "kafka-aggregator" 43 | copyright = "2020" "Association of Universities for Research in Astronomy" 44 | author = "LSST SQuaRE" 45 | 46 | version = kafkaaggregator.__version__ 47 | release = version 48 | 49 | # The language for content autogenerated by Sphinx. Refer to documentation 50 | # for a list of supported languages. 51 | # 52 | # This is also used if you do content translation via gettext catalogs. 53 | # Usually you set "language" from the command line for these cases. 54 | language = None 55 | 56 | # There are two options for replacing |today|: either, you set today to some 57 | # non-false value, then it is used: 58 | # today = '' 59 | # Else, today_fmt is used as the format for a strftime call. 60 | # today_fmt = '%B %d, %Y' 61 | 62 | # List of patterns, relative to source directory, that match files and 63 | # directories to ignore when looking for source files. 64 | exclude_patterns = ["_build", "README.rst"] 65 | 66 | # The name of the Pygments (syntax highlighting) style to use. 67 | pygments_style = "sphinx" 68 | 69 | # The reST default role cross-links Python (used for this markup: `text`) 70 | default_role = "py:obj" 71 | 72 | # Intersphinx 73 | 74 | intersphinx_mapping = { 75 | # 'python': ('https://docs.python.org/3/', None), 76 | } 77 | 78 | # -- Options for linkcheck builder ---------------------------------------- 79 | 80 | linkcheck_retries = 2 81 | 82 | # -- Options for HTML output ---------------------------------------------- 83 | 84 | templates_path = [ 85 | "_templates", 86 | lsst_sphinx_bootstrap_theme.get_html_templates_path(), 87 | ] 88 | 89 | html_theme = "lsst_sphinx_bootstrap_theme" 90 | html_theme_path = [lsst_sphinx_bootstrap_theme.get_html_theme_path()] 91 | 92 | 93 | html_context = { 94 | # Enable "Edit in GitHub" link 95 | "display_github": True, 96 | # https://{{ github_host|default("github.com") }}/{{ github_user }}/ 97 | # {{ github_repo }}/blob/ 98 | # {{ github_version }}{{ conf_py_path }}{{ pagename }}{{ suffix }} 99 | "github_user": "lsst-sqre", 100 | "github_repo": "kafka-aggregator", 101 | "conf_py_path": "docs/", 102 | # GITHUB_REF is available in GitHub Actions, but master is a safe default 103 | "github_version": os.getenv("GITHUB_REF", default="master") + "/", 104 | } 105 | 106 | # Theme options are theme-specific and customize the look and feel of a theme 107 | # further. For a list of options available for each theme, see the 108 | # documentation. 109 | html_theme_options = {"logotext": project} 110 | 111 | # The name for this set of Sphinx documents. If None, it defaults to 112 | # " v documentation". 113 | # html_title = None 114 | 115 | # A shorter title for the navigation bar. Default is the same as html_title. 116 | html_short_title = f"{project}" 117 | 118 | # Add any paths that contain custom static files (such as style sheets) here, 119 | # relative to this directory. They are copied after the builtin static files, 120 | # so a file named "default.css" will overwrite the builtin "default.css". 121 | html_static_path = [] 122 | 123 | # If true, links to the reST sources are added to the pages. 124 | html_show_sourcelink = False 125 | 126 | # Do not copy reST source for each page into the build 127 | html_copy_source = False 128 | 129 | # If false, no module index is generated. 130 | html_domain_indices = True 131 | 132 | # If false, no index is generated. 133 | html_use_index = True 134 | 135 | # API Reference ============================================================== 136 | 137 | napoleon_google_docstring = False 138 | napoleon_numpy_docstring = True 139 | napoleon_include_init_with_doc = False 140 | napoleon_include_private_with_doc = False 141 | napoleon_include_special_with_doc = True 142 | napoleon_use_admonition_for_examples = False 143 | napoleon_use_admonition_for_notes = False 144 | napoleon_use_admonition_for_references = False 145 | napoleon_use_ivar = False 146 | napoleon_use_keyword = True # TODO 147 | napoleon_use_param = True 148 | napoleon_use_rtype = True 149 | 150 | autosummary_generate = True 151 | 152 | automodapi_inheritance_diagram = True 153 | automodapi_toctreedirnm = "api" 154 | automodsumm_inherited_members = True 155 | 156 | # Docstrings for classes and methods are inherited from parents. 157 | autodoc_inherit_docstrings = True 158 | 159 | # Class documentation should only contain the class docstring and 160 | # ignore the __init__ docstring, account to LSST coding standards. 161 | autoclass_content = "class" 162 | 163 | # Default flags for automodapi directives. Special members are dunder 164 | # methods. 165 | autodoc_default_options = { 166 | "show-inheritance": True, 167 | "special-members": True, 168 | } 169 | 170 | # Render inheritance diagrams in SVG 171 | graphviz_output_format = "svg" 172 | 173 | graphviz_dot_args = [ 174 | "-Nfontsize=10", 175 | "-Nfontname=Helvetica Neue, Helvetica, Arial, sans-serif", 176 | "-Efontsize=10", 177 | "-Efontname=Helvetica Neue, Helvetica, Arial, sans-serif", 178 | "-Gfontsize=10", 179 | "-Gfontname=Helvetica Neue, Helvetica, Arial, sans-serif", 180 | ] 181 | 182 | # TODO extension ============================================================= 183 | 184 | todo_include_todos = False 185 | -------------------------------------------------------------------------------- /docs/userguide.rst: -------------------------------------------------------------------------------- 1 | ########################### 2 | How to run kafka-aggregator 3 | ########################### 4 | 5 | 6 | Running locally with docker-compose 7 | =================================== 8 | 9 | In this guide, we use ``docker-compose`` to illustrate how to run kafka-aggregator. To run kafka-aggregator on a Kubernetes environment see the :ref:`installation` section instead. 10 | 11 | kafka-aggregator `docker-compose configuration`_ includes services to run Confluent Kafka (zookeeper, broker, schema-registry and control-center) and was based on `this example`_. 12 | 13 | .. _docker-compose configuration: https://github.com/lsst-sqre/kafka-aggregator/blob/master/docker-compose.yaml 14 | .. _this example: https://github.com/confluentinc/examples/blob/5.3.2-post/cp-all-in-one/docker-compose.yml 15 | 16 | Clone the kafka-aggregator repository: 17 | 18 | .. code-block:: bash 19 | 20 | $ git clone https://github.com/lsst-sqre/kafka-aggregator.git 21 | 22 | Start the `zookeeper`, `broker`, and `schema-registry` services: 23 | 24 | .. code-block:: bash 25 | 26 | docker-compose up -d zookeeper broker schema-registry 27 | 28 | On another terminal session, create a new Python virtual environment and install kafka-aggregator locally: 29 | 30 | .. code-block:: bash 31 | 32 | $ cd kafka-aggregator 33 | $ virtualenv -p Python3 venv 34 | $ source venv/bin/activate 35 | $ make update 36 | 37 | 38 | Initializing source topics 39 | ========================== 40 | 41 | .. note:: 42 | In a production environment we expect that the source topics already exist in Kafka and that their Avro schemas are available from the Schema Registry. 43 | 44 | 45 | Using the kafka-aggregator example module, you can initialize source topics in Kafka, control the number of fields in each topic, and produce messages for those topics at a given frequency. 46 | 47 | With the default :ref:`configuration`, this command will initialize 10 source topics with 10 fields each and register their Avro schemas with the Schema Registry. 48 | 49 | .. code-block:: bash 50 | 51 | kafkaaggregator -l info init-example 52 | 53 | You can check that the source topics were created in Kafka: 54 | 55 | .. code-block:: bash 56 | 57 | docker-compose exec broker kafka-topics --bootstrap-server broker:29092 --list 58 | 59 | 60 | The Avro schemas were registered with the Schema Registry: 61 | 62 | .. code-block:: bash 63 | 64 | curl http://localhost:8081/subjects 65 | 66 | 67 | Generating Faust agents 68 | ======================= 69 | 70 | Use this command to generate the Faust agents to process the source topics. 71 | 72 | .. code-block:: bash 73 | 74 | kafkaaggregator -l info generate-agents 75 | 76 | .. note:: 77 | 78 | By default agents are generated under the ``./agents`` folder where kafka-aggregator runs. 79 | 80 | For the source topics initialized with the kafka-aggregator example module you should have this output: 81 | 82 | .. code-block:: bash 83 | 84 | kafkaaggregator -l info agents 85 | [2020-07-06 18:30:58,115] [54727] [INFO] [^Worker]: Starting... 86 | ┌Agents─────────────────────────────┬─────────────┬──────────────────────────────────────────────────────┐ 87 | │ name │ topic │ help │ 88 | ├───────────────────────────────────┼─────────────┼──────────────────────────────────────────────────────┤ 89 | │ @example-000.process_source_topic │ example-000 │ Process incoming messages for the example-000 topic. │ 90 | │ @example-001.process_source_topic │ example-001 │ Process incoming messages for the example-001 topic. │ 91 | │ @example-002.process_source_topic │ example-002 │ Process incoming messages for the example-002 topic. │ 92 | │ @example-003.process_source_topic │ example-003 │ Process incoming messages for the example-003 topic. │ 93 | │ @example-004.process_source_topic │ example-004 │ Process incoming messages for the example-004 topic. │ 94 | │ @example-005.process_source_topic │ example-005 │ Process incoming messages for the example-005 topic. │ 95 | │ @example-006.process_source_topic │ example-006 │ Process incoming messages for the example-006 topic. │ 96 | │ @example-007.process_source_topic │ example-007 │ Process incoming messages for the example-007 topic. │ 97 | │ @example-008.process_source_topic │ example-008 │ Process incoming messages for the example-008 topic. │ 98 | │ @example-009.process_source_topic │ example-009 │ Process incoming messages for the example-009 topic. │ 99 | └───────────────────────────────────┴─────────────┴──────────────────────────────────────────────────────┘ 100 | [2020-07-06 18:30:58,153] [54727] [INFO] [^Worker]: Stopping... 101 | [2020-07-06 18:30:58,153] [54727] [INFO] [^Worker]: Gathering service tasks... 102 | [2020-07-06 18:30:58,153] [54727] [INFO] [^Worker]: Gathering all futures... 103 | [2020-07-06 18:30:59,156] [54727] [INFO] [^Worker]: Closing event loop 104 | 105 | 106 | Starting a worker 107 | ================= 108 | 109 | Use this command to start a kafka-aggregator worker: 110 | 111 | .. code-block:: bash 112 | 113 | kafkaaggregator -l info worker 114 | 115 | 116 | Producing messages 117 | ================== 118 | 119 | On another terminal use this command to produce messages for the source topics. This command produces 6000 messages at 10Hz. 120 | 121 | .. code-block:: bash 122 | 123 | kafkaaggregator -l info produce --frequency 10 --max-messages 6000 124 | 125 | You can use `Confluent Control Center `_ to inspect the messages for the source and aggregation topics or use the following from the command line: 126 | 127 | .. code-block:: bash 128 | 129 | docker-compose exec broker /bin/bash 130 | root@broker:/# kafka-console-consumer --bootstrap-server broker:9092 --topic example-000 131 | ... 132 | root@broker:/# kafka-console-consumer --bootstrap-server broker:9092 --topic example-000-aggregated 133 | 134 | 135 | Inspecting the consumer lag 136 | =========================== 137 | 138 | An important aspect to look at is the consumer lag for the ``kafkaaggregator`` consumers. An advantage of Faust is that you can easily add more workers to distribute the workload of the application. If the source topics are created with multiple partitions, individual partitions are assigned to different workers. 139 | 140 | 141 | Internal vs. external managed topics 142 | ==================================== 143 | 144 | Faust manages topics declared as `internal` by the agents, like the aggregation topic, which is created by Faust and whose schema is also controlled by a Faust Record. 145 | 146 | The kafka-aggregator example also demonstrates that we can aggregate source topics that are declared as `external`, i.e. not managed by Faust. The agents assume that external topics exist and the messages can be deserialized using the Avro schemas, without specifying a model for the external topic in Faust. 147 | -------------------------------------------------------------------------------- /src/kafkaaggregator/aggregator.py: -------------------------------------------------------------------------------- 1 | """Create the aggregation model and compute summary statistics. 2 | 3 | Given a source topic and a list of field names to exclude from aggregation 4 | create the aggregation model and compute summary statistics. 5 | 6 | kafka-aggregator adds the aggregation fields `time`, `window_size`, and 7 | `count` and computes `min`, `mean`, `stdev`, `median`, and `max` statistics 8 | for every numeric field in the source topic. 9 | """ 10 | 11 | 12 | __all__ = ["Aggregator"] 13 | 14 | import asyncio 15 | import json 16 | import logging 17 | from pathlib import Path 18 | from statistics import StatisticsError 19 | from typing import Any, List 20 | 21 | from faust_avro import Record 22 | 23 | from kafkaaggregator.aggregator_config import AggregatorConfig 24 | from kafkaaggregator.fields import Field 25 | from kafkaaggregator.models import create_record 26 | from kafkaaggregator.operations import ( # noqa: F401 27 | mean, 28 | median, 29 | q1, 30 | q3, 31 | stdev, 32 | ) 33 | from kafkaaggregator.topics import AggregatedTopic, SourceTopic 34 | 35 | logger = logging.getLogger("kafkaaggregator") 36 | 37 | 38 | class Aggregator: 39 | """Create the aggregation model and compute summary statistics. 40 | 41 | Given a source topic and a list of field names to exclude from aggregation 42 | create the aggregation model and compute summary statistics. 43 | 44 | kafka-aggregator adds the aggregation fields `time`, `window_size`, and 45 | `count` and computes `min`, `mean`, `stdev`, `median`, and `max` statistics 46 | for every numeric field in the source topic. 47 | 48 | Parameters 49 | ---------- 50 | aggregated_topic : `str` 51 | Name of the aggregated topic. 52 | config : `AggregatorConfig` 53 | Aggregator configuration 54 | """ 55 | 56 | logger = logger 57 | 58 | def __init__(self, configfile: Path, aggregated_topic: str) -> None: 59 | 60 | self._aggregated_topic = AggregatedTopic(name=aggregated_topic) 61 | 62 | config = AggregatorConfig(configfile).get(aggregated_topic) 63 | self._operations = config.window_aggregation.operations 64 | self._window_size_secods = ( 65 | config.window_aggregation.window_size_seconds 66 | ) 67 | self._min_sample_size = config.window_aggregation.min_sample_size 68 | 69 | # Supports the 1 source topic -> 1 aggregated topic case for the moment 70 | source_topic = config.source_topics[0] 71 | 72 | self._source_topic = SourceTopic(name=source_topic) 73 | self._fields = config.get(source_topic).fields 74 | self._create_record = create_record 75 | 76 | self._aggregated_fields: List[Field] = [] 77 | self._record: Record = None 78 | 79 | @staticmethod 80 | def _create_aggregated_fields( 81 | fields: List[Field], 82 | operations: List[str], 83 | ) -> List[Field]: 84 | """Create aggregated topic fields based on the source topic fields. 85 | 86 | Add the fields `time`, `window_size`, and `count`. 87 | For each numeric field in the source topic add new fields for each 88 | configured operation. 89 | 90 | Parameters 91 | ---------- 92 | fields : `list` [`Field`] 93 | List of fields to aggregate. 94 | operations : `list` 95 | List of operations to perform. 96 | 97 | Returns 98 | ------- 99 | aggregation_fields : `list` [`Field`] 100 | List of aggregation fields. 101 | """ 102 | time = Field(name="time", type=float) 103 | window_size = Field(name="window_size", type=float) 104 | count = Field(name="count", type=int) 105 | 106 | aggregated_fields = [time, window_size, count] 107 | 108 | for field in fields: 109 | # Only numeric fields are aggregated 110 | if field.type in (int, float): 111 | for operation in operations: 112 | f = Field( 113 | name=f"{operation}_{field.name}", 114 | type=float, 115 | source_field_name=field.name, 116 | operation=operation, 117 | ) 118 | aggregated_fields.append(f) 119 | 120 | return aggregated_fields 121 | 122 | async def create_record(self) -> Record: 123 | """Create a Faust-avro Record class for the aggregation topic. 124 | 125 | Returns 126 | ------- 127 | record : `Record` 128 | Faust-avro Record class for the aggreated topic. 129 | """ 130 | aggregated_topic_name = self._aggregated_topic.name 131 | logger.info(f"Create Faust record for topic {aggregated_topic_name}.") 132 | 133 | cls_name = aggregated_topic_name.title().replace("-", "") 134 | 135 | # TODO: add ability to filter fields 136 | source_fields = await self._source_topic.get_fields() 137 | 138 | self._aggregated_fields = self._create_aggregated_fields( 139 | source_fields, self._operations 140 | ) 141 | 142 | self._record = self._create_record( 143 | cls_name=cls_name, 144 | fields=self._aggregated_fields, 145 | doc=f"Faust record for topic {aggregated_topic_name}.", 146 | ) 147 | 148 | await self._register(self._record) 149 | 150 | return self._record 151 | 152 | def async_create_record(self) -> Record: 153 | """Sync call to ``async create_record()``. 154 | 155 | Get the current event loop and call the async ``create_record()`` 156 | method. 157 | 158 | Returns 159 | ------- 160 | record : `Record` 161 | Faust-avro Record class for the aggreation topic. 162 | """ 163 | loop = asyncio.get_event_loop() 164 | record = loop.run_until_complete(self.create_record()) 165 | return record 166 | 167 | async def _register(self, record: Record) -> None: 168 | """Register the Avro schema for the aggregation topic. 169 | 170 | Parameters 171 | ---------- 172 | record: `Record` 173 | Faust-avro Record for the aggregation model. 174 | """ 175 | topic_name = self._aggregated_topic.name 176 | logger.info(f"Register Avro schema for topic {topic_name}.") 177 | schema = record.to_avro(registry=self._aggregated_topic._registry) 178 | 179 | await self._aggregated_topic.register(schema=json.dumps(schema)) 180 | 181 | def compute( 182 | self, 183 | time: float, 184 | messages: List[Any], 185 | ) -> Record: 186 | """Compute summary statistics for a list of messages. 187 | 188 | Parameters 189 | ---------- 190 | time: `float` 191 | The timestamp of the aggregated message, typically the midpoint 192 | of the aggregation window. 193 | messages: `list` 194 | List of messages from which to compute the summary statistics 195 | 196 | Returns 197 | ------- 198 | aggregated_message: `Record` 199 | Aggregated message. 200 | """ 201 | if not self._record: 202 | msg = ( 203 | "Use Aggregator.create_record() to created the Faust record " 204 | "for the aggregated topic first." 205 | ) 206 | raise RuntimeError(msg) 207 | 208 | count = len(messages) 209 | 210 | aggregated_values = { 211 | "count": count, 212 | "time": time, 213 | "window_size": self._window_size_secods, 214 | } 215 | 216 | for aggregated_field in self._aggregated_fields: 217 | 218 | if aggregated_field.operation: 219 | 220 | source_field_name = aggregated_field.source_field_name 221 | values = [message[source_field_name] for message in messages] 222 | 223 | try: 224 | operation = aggregated_field.operation 225 | # Make sure there are enough values to compute statistics 226 | if len(values) >= self._min_sample_size: 227 | aggregated_value = eval(operation)(values) 228 | else: 229 | # use the first value instead 230 | aggregated_value = values[0] 231 | except Exception: 232 | msg = f"Error computing {operation} of {values}." 233 | raise StatisticsError(msg) 234 | 235 | aggregated_values.update( 236 | {aggregated_field.name: aggregated_value} 237 | ) 238 | 239 | aggregated_message = self._record(**aggregated_values) 240 | 241 | return aggregated_message 242 | -------------------------------------------------------------------------------- /requirements/main.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile with python 3.9 3 | # To update, run: 4 | # 5 | # pip-compile --generate-hashes --output-file=requirements/main.txt requirements/main.in 6 | # 7 | aiofiles==0.8.0 \ 8 | --hash=sha256:7a973fc22b29e9962d0897805ace5856e6a566ab1f0c8e5c91ff6c866519c937 \ 9 | --hash=sha256:8334f23235248a3b2e83b2c3a78a22674f39969b96397126cc93664d9a901e59 10 | # via -r requirements/main.in 11 | aiohttp==3.8.1 \ 12 | --hash=sha256:01d7bdb774a9acc838e6b8f1d114f45303841b89b95984cbb7d80ea41172a9e3 \ 13 | --hash=sha256:03a6d5349c9ee8f79ab3ff3694d6ce1cfc3ced1c9d36200cb8f08ba06bd3b782 \ 14 | --hash=sha256:04d48b8ce6ab3cf2097b1855e1505181bdd05586ca275f2505514a6e274e8e75 \ 15 | --hash=sha256:0770e2806a30e744b4e21c9d73b7bee18a1cfa3c47991ee2e5a65b887c49d5cf \ 16 | --hash=sha256:07b05cd3305e8a73112103c834e91cd27ce5b4bd07850c4b4dbd1877d3f45be7 \ 17 | --hash=sha256:086f92daf51a032d062ec5f58af5ca6a44d082c35299c96376a41cbb33034675 \ 18 | --hash=sha256:099ebd2c37ac74cce10a3527d2b49af80243e2a4fa39e7bce41617fbc35fa3c1 \ 19 | --hash=sha256:0c7ebbbde809ff4e970824b2b6cb7e4222be6b95a296e46c03cf050878fc1785 \ 20 | --hash=sha256:102e487eeb82afac440581e5d7f8f44560b36cf0bdd11abc51a46c1cd88914d4 \ 21 | --hash=sha256:11691cf4dc5b94236ccc609b70fec991234e7ef8d4c02dd0c9668d1e486f5abf \ 22 | --hash=sha256:11a67c0d562e07067c4e86bffc1553f2cf5b664d6111c894671b2b8712f3aba5 \ 23 | --hash=sha256:12de6add4038df8f72fac606dff775791a60f113a725c960f2bab01d8b8e6b15 \ 24 | --hash=sha256:13487abd2f761d4be7c8ff9080de2671e53fff69711d46de703c310c4c9317ca \ 25 | --hash=sha256:15b09b06dae900777833fe7fc4b4aa426556ce95847a3e8d7548e2d19e34edb8 \ 26 | --hash=sha256:1c182cb873bc91b411e184dab7a2b664d4fea2743df0e4d57402f7f3fa644bac \ 27 | --hash=sha256:1ed0b6477896559f17b9eaeb6d38e07f7f9ffe40b9f0f9627ae8b9926ae260a8 \ 28 | --hash=sha256:28d490af82bc6b7ce53ff31337a18a10498303fe66f701ab65ef27e143c3b0ef \ 29 | --hash=sha256:2e5d962cf7e1d426aa0e528a7e198658cdc8aa4fe87f781d039ad75dcd52c516 \ 30 | --hash=sha256:2ed076098b171573161eb146afcb9129b5ff63308960aeca4b676d9d3c35e700 \ 31 | --hash=sha256:2f2f69dca064926e79997f45b2f34e202b320fd3782f17a91941f7eb85502ee2 \ 32 | --hash=sha256:31560d268ff62143e92423ef183680b9829b1b482c011713ae941997921eebc8 \ 33 | --hash=sha256:31d1e1c0dbf19ebccbfd62eff461518dcb1e307b195e93bba60c965a4dcf1ba0 \ 34 | --hash=sha256:37951ad2f4a6df6506750a23f7cbabad24c73c65f23f72e95897bb2cecbae676 \ 35 | --hash=sha256:3af642b43ce56c24d063325dd2cf20ee012d2b9ba4c3c008755a301aaea720ad \ 36 | --hash=sha256:44db35a9e15d6fe5c40d74952e803b1d96e964f683b5a78c3cc64eb177878155 \ 37 | --hash=sha256:473d93d4450880fe278696549f2e7aed8cd23708c3c1997981464475f32137db \ 38 | --hash=sha256:477c3ea0ba410b2b56b7efb072c36fa91b1e6fc331761798fa3f28bb224830dd \ 39 | --hash=sha256:4a4a4e30bf1edcad13fb0804300557aedd07a92cabc74382fdd0ba6ca2661091 \ 40 | --hash=sha256:4aed991a28ea3ce320dc8ce655875e1e00a11bdd29fe9444dd4f88c30d558602 \ 41 | --hash=sha256:51467000f3647d519272392f484126aa716f747859794ac9924a7aafa86cd411 \ 42 | --hash=sha256:55c3d1072704d27401c92339144d199d9de7b52627f724a949fc7d5fc56d8b93 \ 43 | --hash=sha256:589c72667a5febd36f1315aa6e5f56dd4aa4862df295cb51c769d16142ddd7cd \ 44 | --hash=sha256:5bfde62d1d2641a1f5173b8c8c2d96ceb4854f54a44c23102e2ccc7e02f003ec \ 45 | --hash=sha256:5c23b1ad869653bc818e972b7a3a79852d0e494e9ab7e1a701a3decc49c20d51 \ 46 | --hash=sha256:61bfc23df345d8c9716d03717c2ed5e27374e0fe6f659ea64edcd27b4b044cf7 \ 47 | --hash=sha256:6ae828d3a003f03ae31915c31fa684b9890ea44c9c989056fea96e3d12a9fa17 \ 48 | --hash=sha256:6c7cefb4b0640703eb1069835c02486669312bf2f12b48a748e0a7756d0de33d \ 49 | --hash=sha256:6d69f36d445c45cda7b3b26afef2fc34ef5ac0cdc75584a87ef307ee3c8c6d00 \ 50 | --hash=sha256:6f0d5f33feb5f69ddd57a4a4bd3d56c719a141080b445cbf18f238973c5c9923 \ 51 | --hash=sha256:6f8b01295e26c68b3a1b90efb7a89029110d3a4139270b24fda961893216c440 \ 52 | --hash=sha256:713ac174a629d39b7c6a3aa757b337599798da4c1157114a314e4e391cd28e32 \ 53 | --hash=sha256:718626a174e7e467f0558954f94af117b7d4695d48eb980146016afa4b580b2e \ 54 | --hash=sha256:7187a76598bdb895af0adbd2fb7474d7f6025d170bc0a1130242da817ce9e7d1 \ 55 | --hash=sha256:71927042ed6365a09a98a6377501af5c9f0a4d38083652bcd2281a06a5976724 \ 56 | --hash=sha256:7d08744e9bae2ca9c382581f7dce1273fe3c9bae94ff572c3626e8da5b193c6a \ 57 | --hash=sha256:7dadf3c307b31e0e61689cbf9e06be7a867c563d5a63ce9dca578f956609abf8 \ 58 | --hash=sha256:81e3d8c34c623ca4e36c46524a3530e99c0bc95ed068fd6e9b55cb721d408fb2 \ 59 | --hash=sha256:844a9b460871ee0a0b0b68a64890dae9c415e513db0f4a7e3cab41a0f2fedf33 \ 60 | --hash=sha256:8b7ef7cbd4fec9a1e811a5de813311ed4f7ac7d93e0fda233c9b3e1428f7dd7b \ 61 | --hash=sha256:97ef77eb6b044134c0b3a96e16abcb05ecce892965a2124c566af0fd60f717e2 \ 62 | --hash=sha256:99b5eeae8e019e7aad8af8bb314fb908dd2e028b3cdaad87ec05095394cce632 \ 63 | --hash=sha256:a25fa703a527158aaf10dafd956f7d42ac6d30ec80e9a70846253dd13e2f067b \ 64 | --hash=sha256:a2f635ce61a89c5732537a7896b6319a8fcfa23ba09bec36e1b1ac0ab31270d2 \ 65 | --hash=sha256:a79004bb58748f31ae1cbe9fa891054baaa46fb106c2dc7af9f8e3304dc30316 \ 66 | --hash=sha256:a996d01ca39b8dfe77440f3cd600825d05841088fd6bc0144cc6c2ec14cc5f74 \ 67 | --hash=sha256:b0e20cddbd676ab8a64c774fefa0ad787cc506afd844de95da56060348021e96 \ 68 | --hash=sha256:b6613280ccedf24354406caf785db748bebbddcf31408b20c0b48cb86af76866 \ 69 | --hash=sha256:b9d00268fcb9f66fbcc7cd9fe423741d90c75ee029a1d15c09b22d23253c0a44 \ 70 | --hash=sha256:bb01ba6b0d3f6c68b89fce7305080145d4877ad3acaed424bae4d4ee75faa950 \ 71 | --hash=sha256:c2aef4703f1f2ddc6df17519885dbfa3514929149d3ff900b73f45998f2532fa \ 72 | --hash=sha256:c34dc4958b232ef6188c4318cb7b2c2d80521c9a56c52449f8f93ab7bc2a8a1c \ 73 | --hash=sha256:c3630c3ef435c0a7c549ba170a0633a56e92629aeed0e707fec832dee313fb7a \ 74 | --hash=sha256:c3d6a4d0619e09dcd61021debf7059955c2004fa29f48788a3dfaf9c9901a7cd \ 75 | --hash=sha256:d15367ce87c8e9e09b0f989bfd72dc641bcd04ba091c68cd305312d00962addd \ 76 | --hash=sha256:d2f9b69293c33aaa53d923032fe227feac867f81682f002ce33ffae978f0a9a9 \ 77 | --hash=sha256:e999f2d0e12eea01caeecb17b653f3713d758f6dcc770417cf29ef08d3931421 \ 78 | --hash=sha256:ea302f34477fda3f85560a06d9ebdc7fa41e82420e892fc50b577e35fc6a50b2 \ 79 | --hash=sha256:eaba923151d9deea315be1f3e2b31cc39a6d1d2f682f942905951f4e40200922 \ 80 | --hash=sha256:ef9612483cb35171d51d9173647eed5d0069eaa2ee812793a75373447d487aa4 \ 81 | --hash=sha256:f5315a2eb0239185af1bddb1abf472d877fede3cc8d143c6cddad37678293237 \ 82 | --hash=sha256:fa0ffcace9b3aa34d205d8130f7873fcfefcb6a4dd3dd705b0dab69af6712642 \ 83 | --hash=sha256:fc5471e1a54de15ef71c1bc6ebe80d4dc681ea600e68bfd1cbce40427f0b7578 84 | # via 85 | # aiohttp-cors 86 | # faust 87 | # faust-avro 88 | aiohttp-cors==0.7.0 \ 89 | --hash=sha256:0451ba59fdf6909d0e2cd21e4c0a43752bc0703d33fc78ae94d9d9321710193e \ 90 | --hash=sha256:4d39c6d7100fd9764ed1caf8cebf0eb01bf5e3f24e2e073fda6234bc48b19f5d 91 | # via faust 92 | aiosignal==1.2.0 \ 93 | --hash=sha256:26e62109036cd181df6e6ad646f91f0dcfd05fe16d0cb924138ff2ab75d64e3a \ 94 | --hash=sha256:78ed67db6c7b7ced4f98e495e572106d5c432a93e1ddd1bf475e1dc05f5b7df2 95 | # via aiohttp 96 | async-timeout==4.0.2 \ 97 | --hash=sha256:2163e1640ddb52b7a8c80d0a67a08587e5d245cc9c553a74a847056bc2976b15 \ 98 | --hash=sha256:8ca1e4fcf50d07413d66d1a5e416e42cfdf5851c981d679a09851a6853383b3c 99 | # via aiohttp 100 | attrs==21.4.0 \ 101 | --hash=sha256:2d27e3784d7a565d36ab851fe94887c5eccd6a463168875832a1be79c82828b4 \ 102 | --hash=sha256:626ba8234211db98e869df76230a137c4c40a12d72445c45d5f5b716f076e2fd 103 | # via aiohttp 104 | charset-normalizer==2.1.0 \ 105 | --hash=sha256:5189b6f22b01957427f35b6a08d9a0bc45b46d3788ef5a92e978433c7a35f8a5 \ 106 | --hash=sha256:575e708016ff3a5e3681541cb9d79312c416835686d054a23accb873b254f413 107 | # via aiohttp 108 | click==7.1.2 \ 109 | --hash=sha256:d2b5255c7c6349bc1bd1e59e08cd12acbbd63ce649f2588755783aa94dfb6b1a \ 110 | --hash=sha256:dacca89f4bfadd5de3d7489b7c8a566eee0d3676333fbb50030263894c38c0dc 111 | # via faust 112 | colorclass==2.2.2 \ 113 | --hash=sha256:6d4fe287766166a98ca7bc6f6312daf04a0481b1eda43e7173484051c0ab4366 \ 114 | --hash=sha256:6f10c273a0ef7a1150b1120b6095cbdd68e5cf36dfd5d0fc957a2500bbf99a55 115 | # via faust 116 | colorlog==6.6.0 \ 117 | --hash=sha256:344f73204009e4c83c5b6beb00b3c45dc70fcdae3c80db919e0a4171d006fde8 \ 118 | --hash=sha256:351c51e866c86c3217f08e4b067a7974a678be78f07f85fc2d55b8babde6d94e 119 | # via mode 120 | croniter==1.3.5 \ 121 | --hash=sha256:4f72faca42c00beb6e30907f1315145f43dfbe5ec0ad4ada24b4c0d57b86a33a \ 122 | --hash=sha256:7592fc0e8a00d82af98dfa2768b75983b6fb4c2adc8f6d0d7c931a715b7cefee 123 | # via faust 124 | fastavro==0.22.13 \ 125 | --hash=sha256:0456f0dd1b0887bff3b78e1be98cafdda2dc60840f28588d22e3f1464960c174 \ 126 | --hash=sha256:1244735758e8e7dac85c9f11fcef3044eb7977a28a225cad7da465d4189488be \ 127 | --hash=sha256:31519ff01c2bcd0554f7963f4d29c5fed3d0dc0b9ca4f812dced4134ae7483ce \ 128 | --hash=sha256:343af15333093329b1066bf3cfd04e8a911b852c3da6805d30acefdecd8649cb \ 129 | --hash=sha256:36d482398bc5bd4a70cbd18e003e6b434c55fb0e874b2e67785bf914c80c4e70 \ 130 | --hash=sha256:3dbbd053aafaf513ea00b0b058a5ad9056d519fe803396149498df7b57797169 \ 131 | --hash=sha256:4338b0f822065b5e363a623b004a01fb66131aca6ddca032e90e391a7eed8606 \ 132 | --hash=sha256:60f2ec7a9cfa90fc5ddcd6d916f0163c5e1a32d17f008e8ec50dbbb95b1ecbc0 \ 133 | --hash=sha256:741862281083cbdb071b7667ed1d5bbd0d7dce265aae91ed6f43d6fa9d72befc \ 134 | --hash=sha256:806f0baf18beb541215b3d79c5288f9a8b615422c7c43b0556906c3774ed322f \ 135 | --hash=sha256:8377b0739a97e3c3fc6975b8929818dc84fdbac7902692341a473b00f11095f2 \ 136 | --hash=sha256:b2483674f56e8a023191574ead3fc6f92fb0b46a6e61b91a6db3adba00c1ad21 \ 137 | --hash=sha256:c1bb045adf28ffaf572042b03ade8c32ab33eb4dac780d8b266b8fa3614c7ed8 \ 138 | --hash=sha256:c304da104f7df86229d0f9ebff261dc8909110e297772911f5607c36d7b2427a \ 139 | --hash=sha256:c9a17a59e4b05c7d75d749abd91e61923efce97c89712ceb5e74c57c132ec77d \ 140 | --hash=sha256:d35bd760a201a384803c5d50c61144a180442251a03f583857a460fc19004e63 \ 141 | --hash=sha256:d90ab3b49876c281383e65380e962fec38337ed58496a53196f48dfd50ab36f3 \ 142 | --hash=sha256:dc91349316c284f4f760ddf5cfb65fa03ef4ca449988f9aa426e8c840f2b81d1 \ 143 | --hash=sha256:e4f2803f0731871c606c08d93ed6d777ccc70b6dd09fd66a9271c11b7f698737 \ 144 | --hash=sha256:e5fc98a72c9a2e5dfd04d2a4790bdf389db8cf2c1bc2eeac59801ff0972b4d70 \ 145 | --hash=sha256:f41e959af7f354544b056e32226fc40db560d97dfbc1db5660d6faad6ea37696 \ 146 | --hash=sha256:ff08d511b9b2a07373b290c0025dc40d93820d20f5c78841ba1447d596b2bbcb 147 | # via faust-avro 148 | faust==1.10.4 \ 149 | --hash=sha256:128dc0b9483aa4009edcc8b23f5c132757f2329c5da1fcc144d4c1d1dd63f156 \ 150 | --hash=sha256:4ae94762a16c3ef70aa1e77772ff94395c2068ecbb99e1acce252d0f1156bd2c \ 151 | --hash=sha256:abaade164bde21cd5f41dff24a203ff91f2c935c849c8e6a807f854bf84ea77d \ 152 | --hash=sha256:cfd47e756825eb8c6d197c1a2f25199aef2895cd31b522c74ec2dbb95dfa6fcc \ 153 | --hash=sha256:ffcd350ea29d528f6814fc9a42b5e50e130310da054a93e9d8216ef89a254611 154 | # via faust-avro 155 | faust-avro==0.4.1 \ 156 | --hash=sha256:4a45e59c365eb335268f5724ce9c3476ee0d27047af948453ab79ad73480ace1 \ 157 | --hash=sha256:9380a31178b8bac3d543826d44b02a82c0a8a05be8d1dbbf8f26e6700e1de0f1 158 | # via -r requirements/main.in 159 | frozenlist==1.3.0 \ 160 | --hash=sha256:006d3595e7d4108a12025ddf415ae0f6c9e736e726a5db0183326fd191b14c5e \ 161 | --hash=sha256:01a73627448b1f2145bddb6e6c2259988bb8aee0fb361776ff8604b99616cd08 \ 162 | --hash=sha256:03a7dd1bfce30216a3f51a84e6dd0e4a573d23ca50f0346634916ff105ba6e6b \ 163 | --hash=sha256:0437fe763fb5d4adad1756050cbf855bbb2bf0d9385c7bb13d7a10b0dd550486 \ 164 | --hash=sha256:04cb491c4b1c051734d41ea2552fde292f5f3a9c911363f74f39c23659c4af78 \ 165 | --hash=sha256:0c36e78b9509e97042ef869c0e1e6ef6429e55817c12d78245eb915e1cca7468 \ 166 | --hash=sha256:25af28b560e0c76fa41f550eacb389905633e7ac02d6eb3c09017fa1c8cdfde1 \ 167 | --hash=sha256:2fdc3cd845e5a1f71a0c3518528bfdbfe2efaf9886d6f49eacc5ee4fd9a10953 \ 168 | --hash=sha256:30530930410855c451bea83f7b272fb1c495ed9d5cc72895ac29e91279401db3 \ 169 | --hash=sha256:31977f84828b5bb856ca1eb07bf7e3a34f33a5cddce981d880240ba06639b94d \ 170 | --hash=sha256:3c62964192a1c0c30b49f403495911298810bada64e4f03249ca35a33ca0417a \ 171 | --hash=sha256:3f7c935c7b58b0d78c0beea0c7358e165f95f1fd8a7e98baa40d22a05b4a8141 \ 172 | --hash=sha256:40dff8962b8eba91fd3848d857203f0bd704b5f1fa2b3fc9af64901a190bba08 \ 173 | --hash=sha256:40ec383bc194accba825fbb7d0ef3dda5736ceab2375462f1d8672d9f6b68d07 \ 174 | --hash=sha256:436496321dad302b8b27ca955364a439ed1f0999311c393dccb243e451ff66aa \ 175 | --hash=sha256:4406cfabef8f07b3b3af0f50f70938ec06d9f0fc26cbdeaab431cbc3ca3caeaa \ 176 | --hash=sha256:45334234ec30fc4ea677f43171b18a27505bfb2dba9aca4398a62692c0ea8868 \ 177 | --hash=sha256:47be22dc27ed933d55ee55845d34a3e4e9f6fee93039e7f8ebadb0c2f60d403f \ 178 | --hash=sha256:4a44ebbf601d7bac77976d429e9bdb5a4614f9f4027777f9e54fd765196e9d3b \ 179 | --hash=sha256:4eda49bea3602812518765810af732229b4291d2695ed24a0a20e098c45a707b \ 180 | --hash=sha256:57f4d3f03a18facacb2a6bcd21bccd011e3b75d463dc49f838fd699d074fabd1 \ 181 | --hash=sha256:603b9091bd70fae7be28bdb8aa5c9990f4241aa33abb673390a7f7329296695f \ 182 | --hash=sha256:65bc6e2fece04e2145ab6e3c47428d1bbc05aede61ae365b2c1bddd94906e478 \ 183 | --hash=sha256:691ddf6dc50480ce49f68441f1d16a4c3325887453837036e0fb94736eae1e58 \ 184 | --hash=sha256:6983a31698490825171be44ffbafeaa930ddf590d3f051e397143a5045513b01 \ 185 | --hash=sha256:6a202458d1298ced3768f5a7d44301e7c86defac162ace0ab7434c2e961166e8 \ 186 | --hash=sha256:6eb275c6385dd72594758cbe96c07cdb9bd6becf84235f4a594bdf21e3596c9d \ 187 | --hash=sha256:754728d65f1acc61e0f4df784456106e35afb7bf39cfe37227ab00436fb38676 \ 188 | --hash=sha256:768efd082074bb203c934e83a61654ed4931ef02412c2fbdecea0cff7ecd0274 \ 189 | --hash=sha256:772965f773757a6026dea111a15e6e2678fbd6216180f82a48a40b27de1ee2ab \ 190 | --hash=sha256:871d42623ae15eb0b0e9df65baeee6976b2e161d0ba93155411d58ff27483ad8 \ 191 | --hash=sha256:88aafd445a233dbbf8a65a62bc3249a0acd0d81ab18f6feb461cc5a938610d24 \ 192 | --hash=sha256:8c905a5186d77111f02144fab5b849ab524f1e876a1e75205cd1386a9be4b00a \ 193 | --hash=sha256:8cf829bd2e2956066dd4de43fd8ec881d87842a06708c035b37ef632930505a2 \ 194 | --hash=sha256:92e650bd09b5dda929523b9f8e7f99b24deac61240ecc1a32aeba487afcd970f \ 195 | --hash=sha256:93641a51f89473837333b2f8100f3f89795295b858cd4c7d4a1f18e299dc0a4f \ 196 | --hash=sha256:94c7a8a9fc9383b52c410a2ec952521906d355d18fccc927fca52ab575ee8b93 \ 197 | --hash=sha256:9f892d6a94ec5c7b785e548e42722e6f3a52f5f32a8461e82ac3e67a3bd073f1 \ 198 | --hash=sha256:acb267b09a509c1df5a4ca04140da96016f40d2ed183cdc356d237286c971b51 \ 199 | --hash=sha256:adac9700675cf99e3615eb6a0eb5e9f5a4143c7d42c05cea2e7f71c27a3d0846 \ 200 | --hash=sha256:aff388be97ef2677ae185e72dc500d19ecaf31b698986800d3fc4f399a5e30a5 \ 201 | --hash=sha256:b5009062d78a8c6890d50b4e53b0ddda31841b3935c1937e2ed8c1bda1c7fb9d \ 202 | --hash=sha256:b684c68077b84522b5c7eafc1dc735bfa5b341fb011d5552ebe0968e22ed641c \ 203 | --hash=sha256:b9e3e9e365991f8cc5f5edc1fd65b58b41d0514a6a7ad95ef5c7f34eb49b3d3e \ 204 | --hash=sha256:bd89acd1b8bb4f31b47072615d72e7f53a948d302b7c1d1455e42622de180eae \ 205 | --hash=sha256:bde99812f237f79eaf3f04ebffd74f6718bbd216101b35ac7955c2d47c17da02 \ 206 | --hash=sha256:c6c321dd013e8fc20735b92cb4892c115f5cdb82c817b1e5b07f6b95d952b2f0 \ 207 | --hash=sha256:ce6f2ba0edb7b0c1d8976565298ad2deba6f8064d2bebb6ffce2ca896eb35b0b \ 208 | --hash=sha256:d2257aaba9660f78c7b1d8fea963b68f3feffb1a9d5d05a18401ca9eb3e8d0a3 \ 209 | --hash=sha256:d26b650b71fdc88065b7a21f8ace70175bcf3b5bdba5ea22df4bfd893e795a3b \ 210 | --hash=sha256:d6d32ff213aef0fd0bcf803bffe15cfa2d4fde237d1d4838e62aec242a8362fa \ 211 | --hash=sha256:e1e26ac0a253a2907d654a37e390904426d5ae5483150ce3adedb35c8c06614a \ 212 | --hash=sha256:e30b2f9683812eb30cf3f0a8e9f79f8d590a7999f731cf39f9105a7c4a39489d \ 213 | --hash=sha256:e84cb61b0ac40a0c3e0e8b79c575161c5300d1d89e13c0e02f76193982f066ed \ 214 | --hash=sha256:e982878792c971cbd60ee510c4ee5bf089a8246226dea1f2138aa0bb67aff148 \ 215 | --hash=sha256:f20baa05eaa2bcd5404c445ec51aed1c268d62600362dc6cfe04fae34a424bd9 \ 216 | --hash=sha256:f7353ba3367473d1d616ee727945f439e027f0bb16ac1a750219a8344d1d5d3c \ 217 | --hash=sha256:f96293d6f982c58ebebb428c50163d010c2f05de0cde99fd681bfdc18d4b2dc2 \ 218 | --hash=sha256:ff9310f05b9d9c5c4dd472983dc956901ee6cb2c3ec1ab116ecdde25f3ce4951 219 | # via 220 | # aiohttp 221 | # aiosignal 222 | funcy==1.17 \ 223 | --hash=sha256:40b9b9a88141ae6a174df1a95861f2b82f2fdc17669080788b73a3ed9370e968 \ 224 | --hash=sha256:ba7af5e58bfc69321aaf860a1547f18d35e145706b95d1b3c966abc4f0b60309 225 | # via faust-avro 226 | idna==3.3 \ 227 | --hash=sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff \ 228 | --hash=sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d 229 | # via yarl 230 | jinja2==3.1.2 \ 231 | --hash=sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852 \ 232 | --hash=sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61 233 | # via -r requirements/main.in 234 | kafka-python==1.4.7 \ 235 | --hash=sha256:2f29baad4b3efe05a2bb81ac268855aa01cbc68397f15bac77b494ffd7e2cada \ 236 | --hash=sha256:4fbebebfcb6fc94903fb720fe883d7bbec7298f4f1acb857c21dd3b4b114ba4b 237 | # via robinhood-aiokafka 238 | markupsafe==2.1.1 \ 239 | --hash=sha256:0212a68688482dc52b2d45013df70d169f542b7394fc744c02a57374a4207003 \ 240 | --hash=sha256:089cf3dbf0cd6c100f02945abeb18484bd1ee57a079aefd52cffd17fba910b88 \ 241 | --hash=sha256:10c1bfff05d95783da83491be968e8fe789263689c02724e0c691933c52994f5 \ 242 | --hash=sha256:33b74d289bd2f5e527beadcaa3f401e0df0a89927c1559c8566c066fa4248ab7 \ 243 | --hash=sha256:3799351e2336dc91ea70b034983ee71cf2f9533cdff7c14c90ea126bfd95d65a \ 244 | --hash=sha256:3ce11ee3f23f79dbd06fb3d63e2f6af7b12db1d46932fe7bd8afa259a5996603 \ 245 | --hash=sha256:421be9fbf0ffe9ffd7a378aafebbf6f4602d564d34be190fc19a193232fd12b1 \ 246 | --hash=sha256:43093fb83d8343aac0b1baa75516da6092f58f41200907ef92448ecab8825135 \ 247 | --hash=sha256:46d00d6cfecdde84d40e572d63735ef81423ad31184100411e6e3388d405e247 \ 248 | --hash=sha256:4a33dea2b688b3190ee12bd7cfa29d39c9ed176bda40bfa11099a3ce5d3a7ac6 \ 249 | --hash=sha256:4b9fe39a2ccc108a4accc2676e77da025ce383c108593d65cc909add5c3bd601 \ 250 | --hash=sha256:56442863ed2b06d19c37f94d999035e15ee982988920e12a5b4ba29b62ad1f77 \ 251 | --hash=sha256:671cd1187ed5e62818414afe79ed29da836dde67166a9fac6d435873c44fdd02 \ 252 | --hash=sha256:694deca8d702d5db21ec83983ce0bb4b26a578e71fbdbd4fdcd387daa90e4d5e \ 253 | --hash=sha256:6a074d34ee7a5ce3effbc526b7083ec9731bb3cbf921bbe1d3005d4d2bdb3a63 \ 254 | --hash=sha256:6d0072fea50feec76a4c418096652f2c3238eaa014b2f94aeb1d56a66b41403f \ 255 | --hash=sha256:6fbf47b5d3728c6aea2abb0589b5d30459e369baa772e0f37a0320185e87c980 \ 256 | --hash=sha256:7f91197cc9e48f989d12e4e6fbc46495c446636dfc81b9ccf50bb0ec74b91d4b \ 257 | --hash=sha256:86b1f75c4e7c2ac2ccdaec2b9022845dbb81880ca318bb7a0a01fbf7813e3812 \ 258 | --hash=sha256:8dc1c72a69aa7e082593c4a203dcf94ddb74bb5c8a731e4e1eb68d031e8498ff \ 259 | --hash=sha256:8e3dcf21f367459434c18e71b2a9532d96547aef8a871872a5bd69a715c15f96 \ 260 | --hash=sha256:8e576a51ad59e4bfaac456023a78f6b5e6e7651dcd383bcc3e18d06f9b55d6d1 \ 261 | --hash=sha256:96e37a3dc86e80bf81758c152fe66dbf60ed5eca3d26305edf01892257049925 \ 262 | --hash=sha256:97a68e6ada378df82bc9f16b800ab77cbf4b2fada0081794318520138c088e4a \ 263 | --hash=sha256:99a2a507ed3ac881b975a2976d59f38c19386d128e7a9a18b7df6fff1fd4c1d6 \ 264 | --hash=sha256:a49907dd8420c5685cfa064a1335b6754b74541bbb3706c259c02ed65b644b3e \ 265 | --hash=sha256:b09bf97215625a311f669476f44b8b318b075847b49316d3e28c08e41a7a573f \ 266 | --hash=sha256:b7bd98b796e2b6553da7225aeb61f447f80a1ca64f41d83612e6139ca5213aa4 \ 267 | --hash=sha256:b87db4360013327109564f0e591bd2a3b318547bcef31b468a92ee504d07ae4f \ 268 | --hash=sha256:bcb3ed405ed3222f9904899563d6fc492ff75cce56cba05e32eff40e6acbeaa3 \ 269 | --hash=sha256:d4306c36ca495956b6d568d276ac11fdd9c30a36f1b6eb928070dc5360b22e1c \ 270 | --hash=sha256:d5ee4f386140395a2c818d149221149c54849dfcfcb9f1debfe07a8b8bd63f9a \ 271 | --hash=sha256:dda30ba7e87fbbb7eab1ec9f58678558fd9a6b8b853530e176eabd064da81417 \ 272 | --hash=sha256:e04e26803c9c3851c931eac40c695602c6295b8d432cbe78609649ad9bd2da8a \ 273 | --hash=sha256:e1c0b87e09fa55a220f058d1d49d3fb8df88fbfab58558f1198e08c1e1de842a \ 274 | --hash=sha256:e72591e9ecd94d7feb70c1cbd7be7b3ebea3f548870aa91e2732960fa4d57a37 \ 275 | --hash=sha256:e8c843bbcda3a2f1e3c2ab25913c80a3c5376cd00c6e8c4a86a89a28c8dc5452 \ 276 | --hash=sha256:efc1913fd2ca4f334418481c7e595c00aad186563bbc1ec76067848c7ca0a933 \ 277 | --hash=sha256:f121a1420d4e173a5d96e47e9a0c0dcff965afdf1626d28de1460815f7c4ee7a \ 278 | --hash=sha256:fc7b548b17d238737688817ab67deebb30e8073c95749d55538ed473130ec0c7 279 | # via jinja2 280 | mode==4.3.2 \ 281 | --hash=sha256:2df1a558dcaa42a7f9a983acb33438371a668076171be19b4c94bf0cdfd6e2eb \ 282 | --hash=sha256:e54deee3b1988b1553425ab8b99fad957d56367eccc0d912fe009998ab651a82 283 | # via faust 284 | multidict==6.0.2 \ 285 | --hash=sha256:0327292e745a880459ef71be14e709aaea2f783f3537588fb4ed09b6c01bca60 \ 286 | --hash=sha256:041b81a5f6b38244b34dc18c7b6aba91f9cdaf854d9a39e5ff0b58e2b5773b9c \ 287 | --hash=sha256:0556a1d4ea2d949efe5fd76a09b4a82e3a4a30700553a6725535098d8d9fb672 \ 288 | --hash=sha256:05f6949d6169878a03e607a21e3b862eaf8e356590e8bdae4227eedadacf6e51 \ 289 | --hash=sha256:07a017cfa00c9890011628eab2503bee5872f27144936a52eaab449be5eaf032 \ 290 | --hash=sha256:0b9e95a740109c6047602f4db4da9949e6c5945cefbad34a1299775ddc9a62e2 \ 291 | --hash=sha256:19adcfc2a7197cdc3987044e3f415168fc5dc1f720c932eb1ef4f71a2067e08b \ 292 | --hash=sha256:19d9bad105dfb34eb539c97b132057a4e709919ec4dd883ece5838bcbf262b80 \ 293 | --hash=sha256:225383a6603c086e6cef0f2f05564acb4f4d5f019a4e3e983f572b8530f70c88 \ 294 | --hash=sha256:23b616fdc3c74c9fe01d76ce0d1ce872d2d396d8fa8e4899398ad64fb5aa214a \ 295 | --hash=sha256:2957489cba47c2539a8eb7ab32ff49101439ccf78eab724c828c1a54ff3ff98d \ 296 | --hash=sha256:2d36e929d7f6a16d4eb11b250719c39560dd70545356365b494249e2186bc389 \ 297 | --hash=sha256:2e4a0785b84fb59e43c18a015ffc575ba93f7d1dbd272b4cdad9f5134b8a006c \ 298 | --hash=sha256:3368bf2398b0e0fcbf46d85795adc4c259299fec50c1416d0f77c0a843a3eed9 \ 299 | --hash=sha256:373ba9d1d061c76462d74e7de1c0c8e267e9791ee8cfefcf6b0b2495762c370c \ 300 | --hash=sha256:4070613ea2227da2bfb2c35a6041e4371b0af6b0be57f424fe2318b42a748516 \ 301 | --hash=sha256:45183c96ddf61bf96d2684d9fbaf6f3564d86b34cb125761f9a0ef9e36c1d55b \ 302 | --hash=sha256:4571f1beddff25f3e925eea34268422622963cd8dc395bb8778eb28418248e43 \ 303 | --hash=sha256:47e6a7e923e9cada7c139531feac59448f1f47727a79076c0b1ee80274cd8eee \ 304 | --hash=sha256:47fbeedbf94bed6547d3aa632075d804867a352d86688c04e606971595460227 \ 305 | --hash=sha256:497988d6b6ec6ed6f87030ec03280b696ca47dbf0648045e4e1d28b80346560d \ 306 | --hash=sha256:4bae31803d708f6f15fd98be6a6ac0b6958fcf68fda3c77a048a4f9073704aae \ 307 | --hash=sha256:50bd442726e288e884f7be9071016c15a8742eb689a593a0cac49ea093eef0a7 \ 308 | --hash=sha256:514fe2b8d750d6cdb4712346a2c5084a80220821a3e91f3f71eec11cf8d28fd4 \ 309 | --hash=sha256:5774d9218d77befa7b70d836004a768fb9aa4fdb53c97498f4d8d3f67bb9cfa9 \ 310 | --hash=sha256:5fdda29a3c7e76a064f2477c9aab1ba96fd94e02e386f1e665bca1807fc5386f \ 311 | --hash=sha256:5ff3bd75f38e4c43f1f470f2df7a4d430b821c4ce22be384e1459cb57d6bb013 \ 312 | --hash=sha256:626fe10ac87851f4cffecee161fc6f8f9853f0f6f1035b59337a51d29ff3b4f9 \ 313 | --hash=sha256:6701bf8a5d03a43375909ac91b6980aea74b0f5402fbe9428fc3f6edf5d9677e \ 314 | --hash=sha256:684133b1e1fe91eda8fa7447f137c9490a064c6b7f392aa857bba83a28cfb693 \ 315 | --hash=sha256:6f3cdef8a247d1eafa649085812f8a310e728bdf3900ff6c434eafb2d443b23a \ 316 | --hash=sha256:75bdf08716edde767b09e76829db8c1e5ca9d8bb0a8d4bd94ae1eafe3dac5e15 \ 317 | --hash=sha256:7c40b7bbece294ae3a87c1bc2abff0ff9beef41d14188cda94ada7bcea99b0fb \ 318 | --hash=sha256:8004dca28e15b86d1b1372515f32eb6f814bdf6f00952699bdeb541691091f96 \ 319 | --hash=sha256:8064b7c6f0af936a741ea1efd18690bacfbae4078c0c385d7c3f611d11f0cf87 \ 320 | --hash=sha256:89171b2c769e03a953d5969b2f272efa931426355b6c0cb508022976a17fd376 \ 321 | --hash=sha256:8cbf0132f3de7cc6c6ce00147cc78e6439ea736cee6bca4f068bcf892b0fd658 \ 322 | --hash=sha256:9cc57c68cb9139c7cd6fc39f211b02198e69fb90ce4bc4a094cf5fe0d20fd8b0 \ 323 | --hash=sha256:a007b1638e148c3cfb6bf0bdc4f82776cef0ac487191d093cdc316905e504071 \ 324 | --hash=sha256:a2c34a93e1d2aa35fbf1485e5010337c72c6791407d03aa5f4eed920343dd360 \ 325 | --hash=sha256:a45e1135cb07086833ce969555df39149680e5471c04dfd6a915abd2fc3f6dbc \ 326 | --hash=sha256:ac0e27844758d7177989ce406acc6a83c16ed4524ebc363c1f748cba184d89d3 \ 327 | --hash=sha256:aef9cc3d9c7d63d924adac329c33835e0243b5052a6dfcbf7732a921c6e918ba \ 328 | --hash=sha256:b9d153e7f1f9ba0b23ad1568b3b9e17301e23b042c23870f9ee0522dc5cc79e8 \ 329 | --hash=sha256:bfba7c6d5d7c9099ba21f84662b037a0ffd4a5e6b26ac07d19e423e6fdf965a9 \ 330 | --hash=sha256:c207fff63adcdf5a485969131dc70e4b194327666b7e8a87a97fbc4fd80a53b2 \ 331 | --hash=sha256:d0509e469d48940147e1235d994cd849a8f8195e0bca65f8f5439c56e17872a3 \ 332 | --hash=sha256:d16cce709ebfadc91278a1c005e3c17dd5f71f5098bfae1035149785ea6e9c68 \ 333 | --hash=sha256:d48b8ee1d4068561ce8033d2c344cf5232cb29ee1a0206a7b828c79cbc5982b8 \ 334 | --hash=sha256:de989b195c3d636ba000ee4281cd03bb1234635b124bf4cd89eeee9ca8fcb09d \ 335 | --hash=sha256:e07c8e79d6e6fd37b42f3250dba122053fddb319e84b55dd3a8d6446e1a7ee49 \ 336 | --hash=sha256:e2c2e459f7050aeb7c1b1276763364884595d47000c1cddb51764c0d8976e608 \ 337 | --hash=sha256:e5b20e9599ba74391ca0cfbd7b328fcc20976823ba19bc573983a25b32e92b57 \ 338 | --hash=sha256:e875b6086e325bab7e680e4316d667fc0e5e174bb5611eb16b3ea121c8951b86 \ 339 | --hash=sha256:f4f052ee022928d34fe1f4d2bc743f32609fb79ed9c49a1710a5ad6b2198db20 \ 340 | --hash=sha256:fcb91630817aa8b9bc4a74023e4198480587269c272c58b3279875ed7235c293 \ 341 | --hash=sha256:fd9fc9c4849a07f3635ccffa895d57abce554b467d611a5009ba4f39b78a8849 \ 342 | --hash=sha256:feba80698173761cddd814fa22e88b0661e98cb810f9f986c54aa34d281e4937 \ 343 | --hash=sha256:feea820722e69451743a3d56ad74948b68bf456984d63c1a92e8347b7b88452d 344 | # via 345 | # aiohttp 346 | # yarl 347 | mypy-extensions==0.4.3 \ 348 | --hash=sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d \ 349 | --hash=sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8 350 | # via 351 | # faust 352 | # mode 353 | # typing-inspect 354 | opentracing==1.3.0 \ 355 | --hash=sha256:9b3f7c7a20c34170b9253c97121256264daf6b5f090035c732c6e2548cc5c0a7 356 | # via faust 357 | pydantic==1.9.1 \ 358 | --hash=sha256:02eefd7087268b711a3ff4db528e9916ac9aa18616da7bca69c1871d0b7a091f \ 359 | --hash=sha256:059b6c1795170809103a1538255883e1983e5b831faea6558ef873d4955b4a74 \ 360 | --hash=sha256:0bf07cab5b279859c253d26a9194a8906e6f4a210063b84b433cf90a569de0c1 \ 361 | --hash=sha256:1542636a39c4892c4f4fa6270696902acb186a9aaeac6f6cf92ce6ae2e88564b \ 362 | --hash=sha256:177071dfc0df6248fd22b43036f936cfe2508077a72af0933d0c1fa269b18537 \ 363 | --hash=sha256:18f3e912f9ad1bdec27fb06b8198a2ccc32f201e24174cec1b3424dda605a310 \ 364 | --hash=sha256:1dd8fecbad028cd89d04a46688d2fcc14423e8a196d5b0a5c65105664901f810 \ 365 | --hash=sha256:1ed987c3ff29fff7fd8c3ea3a3ea877ad310aae2ef9889a119e22d3f2db0691a \ 366 | --hash=sha256:447d5521575f18e18240906beadc58551e97ec98142266e521c34968c76c8761 \ 367 | --hash=sha256:494f7c8537f0c02b740c229af4cb47c0d39840b829ecdcfc93d91dcbb0779892 \ 368 | --hash=sha256:4988c0f13c42bfa9ddd2fe2f569c9d54646ce84adc5de84228cfe83396f3bd58 \ 369 | --hash=sha256:4ce9ae9e91f46c344bec3b03d6ee9612802682c1551aaf627ad24045ce090761 \ 370 | --hash=sha256:5d93d4e95eacd313d2c765ebe40d49ca9dd2ed90e5b37d0d421c597af830c195 \ 371 | --hash=sha256:61b6760b08b7c395975d893e0b814a11cf011ebb24f7d869e7118f5a339a82e1 \ 372 | --hash=sha256:72ccb318bf0c9ab97fc04c10c37683d9eea952ed526707fabf9ac5ae59b701fd \ 373 | --hash=sha256:79b485767c13788ee314669008d01f9ef3bc05db9ea3298f6a50d3ef596a154b \ 374 | --hash=sha256:7eb57ba90929bac0b6cc2af2373893d80ac559adda6933e562dcfb375029acee \ 375 | --hash=sha256:8bc541a405423ce0e51c19f637050acdbdf8feca34150e0d17f675e72d119580 \ 376 | --hash=sha256:969dd06110cb780da01336b281f53e2e7eb3a482831df441fb65dd30403f4608 \ 377 | --hash=sha256:985ceb5d0a86fcaa61e45781e567a59baa0da292d5ed2e490d612d0de5796918 \ 378 | --hash=sha256:9bcf8b6e011be08fb729d110f3e22e654a50f8a826b0575c7196616780683380 \ 379 | --hash=sha256:9ce157d979f742a915b75f792dbd6aa63b8eccaf46a1005ba03aa8a986bde34a \ 380 | --hash=sha256:9f659a5ee95c8baa2436d392267988fd0f43eb774e5eb8739252e5a7e9cf07e0 \ 381 | --hash=sha256:a4a88dcd6ff8fd47c18b3a3709a89adb39a6373f4482e04c1b765045c7e282fd \ 382 | --hash=sha256:a955260d47f03df08acf45689bd163ed9df82c0e0124beb4251b1290fa7ae728 \ 383 | --hash=sha256:a9af62e9b5b9bc67b2a195ebc2c2662fdf498a822d62f902bf27cccb52dbbf49 \ 384 | --hash=sha256:ae72f8098acb368d877b210ebe02ba12585e77bd0db78ac04a1ee9b9f5dd2166 \ 385 | --hash=sha256:b83ba3825bc91dfa989d4eed76865e71aea3a6ca1388b59fc801ee04c4d8d0d6 \ 386 | --hash=sha256:c11951b404e08b01b151222a1cb1a9f0a860a8153ce8334149ab9199cd198131 \ 387 | --hash=sha256:c320c64dd876e45254bdd350f0179da737463eea41c43bacbee9d8c9d1021f11 \ 388 | --hash=sha256:c8098a724c2784bf03e8070993f6d46aa2eeca031f8d8a048dff277703e6e193 \ 389 | --hash=sha256:d12f96b5b64bec3f43c8e82b4aab7599d0157f11c798c9f9c528a72b9e0b339a \ 390 | --hash=sha256:e565a785233c2d03724c4dc55464559639b1ba9ecf091288dd47ad9c629433bd \ 391 | --hash=sha256:f0f047e11febe5c3198ed346b507e1d010330d56ad615a7e0a89fae604065a0e \ 392 | --hash=sha256:fe4670cb32ea98ffbf5a1262f14c3e102cccd92b1869df3bb09538158ba90fe6 393 | # via -r requirements/main.in 394 | python-dateutil==2.8.2 \ 395 | --hash=sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86 \ 396 | --hash=sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9 397 | # via croniter 398 | pytz==2022.1 \ 399 | --hash=sha256:1e760e2fe6a8163bc0b3d9a19c4f84342afa0a2affebfaa84b01b978a02ecaa7 \ 400 | --hash=sha256:e68985985296d9a66a881eb3193b0906246245294a881e7c8afe623866ac6a5c 401 | # via fastavro 402 | robinhood-aiokafka==1.1.6 \ 403 | --hash=sha256:12ab446ec50634b53f5a7d16502a5b048ae68f8075a568be54c0197cef549afd \ 404 | --hash=sha256:a1c468d0780ad903fce41fc0ff98b37ddf390470554303c335a86b24768f677a \ 405 | --hash=sha256:b5ac0591a3c5a61fcfb316acfe7b4bfa336f20f048642be769bd0a321b94acda \ 406 | --hash=sha256:d8d03d73d3af3e9e74d01085294eb2848d06b4bd79ec90c0fd0be2142fabe530 \ 407 | --hash=sha256:efe6bdc0d6bb07945135799f34f436d2ec912774b096765daa189a66e2f89faa 408 | # via faust 409 | six==1.16.0 \ 410 | --hash=sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926 \ 411 | --hash=sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254 412 | # via python-dateutil 413 | terminaltables==3.1.10 \ 414 | --hash=sha256:ba6eca5cb5ba02bba4c9f4f985af80c54ec3dccf94cfcd190154386255e47543 \ 415 | --hash=sha256:e4fdc4179c9e4aab5f674d80f09d76fa436b96fdc698a8505e0a36bf0804a874 416 | # via faust 417 | typing-extensions==4.3.0 \ 418 | --hash=sha256:25642c956049920a5aa49edcdd6ab1e06d7e5d467fc00e0506c44ac86fbfca02 \ 419 | --hash=sha256:e6d2677a32f47fc7eb2795db1dd15c1f34eff616bcaf2cfb5e997f854fa1c4a6 420 | # via 421 | # pydantic 422 | # typing-inspect 423 | typing-inspect==0.5.0 \ 424 | --hash=sha256:75c97b7854426a129f3184c68588db29091ff58e6908ed520add1d52fc44df6e \ 425 | --hash=sha256:811b44f92e780b90cfe7bac94249a4fae87cfaa9b40312765489255045231d9c \ 426 | --hash=sha256:c6ed1cd34860857c53c146a6704a96da12e1661087828ce350f34addc6e5eee3 427 | # via faust-avro 428 | venusian==1.2.0 \ 429 | --hash=sha256:2f2d077a1eedc3fda40425f65687c8c494da7e83d7c23bc2c4d1a40eb3ca5b6d \ 430 | --hash=sha256:64ec8285b80b110d0ae5db4280e90e31848a59db98db1aba4d7d46f48ce91e3e 431 | # via faust 432 | yarl==1.7.2 \ 433 | --hash=sha256:044daf3012e43d4b3538562da94a88fb12a6490652dbc29fb19adfa02cf72eac \ 434 | --hash=sha256:0cba38120db72123db7c58322fa69e3c0efa933040ffb586c3a87c063ec7cae8 \ 435 | --hash=sha256:167ab7f64e409e9bdd99333fe8c67b5574a1f0495dcfd905bc7454e766729b9e \ 436 | --hash=sha256:1be4bbb3d27a4e9aa5f3df2ab61e3701ce8fcbd3e9846dbce7c033a7e8136746 \ 437 | --hash=sha256:1ca56f002eaf7998b5fcf73b2421790da9d2586331805f38acd9997743114e98 \ 438 | --hash=sha256:1d3d5ad8ea96bd6d643d80c7b8d5977b4e2fb1bab6c9da7322616fd26203d125 \ 439 | --hash=sha256:1eb6480ef366d75b54c68164094a6a560c247370a68c02dddb11f20c4c6d3c9d \ 440 | --hash=sha256:1edc172dcca3f11b38a9d5c7505c83c1913c0addc99cd28e993efeaafdfaa18d \ 441 | --hash=sha256:211fcd65c58bf250fb994b53bc45a442ddc9f441f6fec53e65de8cba48ded986 \ 442 | --hash=sha256:29e0656d5497733dcddc21797da5a2ab990c0cb9719f1f969e58a4abac66234d \ 443 | --hash=sha256:368bcf400247318382cc150aaa632582d0780b28ee6053cd80268c7e72796dec \ 444 | --hash=sha256:39d5493c5ecd75c8093fa7700a2fb5c94fe28c839c8e40144b7ab7ccba6938c8 \ 445 | --hash=sha256:3abddf0b8e41445426d29f955b24aeecc83fa1072be1be4e0d194134a7d9baee \ 446 | --hash=sha256:3bf8cfe8856708ede6a73907bf0501f2dc4e104085e070a41f5d88e7faf237f3 \ 447 | --hash=sha256:3ec1d9a0d7780416e657f1e405ba35ec1ba453a4f1511eb8b9fbab81cb8b3ce1 \ 448 | --hash=sha256:45399b46d60c253327a460e99856752009fcee5f5d3c80b2f7c0cae1c38d56dd \ 449 | --hash=sha256:52690eb521d690ab041c3919666bea13ab9fbff80d615ec16fa81a297131276b \ 450 | --hash=sha256:534b047277a9a19d858cde163aba93f3e1677d5acd92f7d10ace419d478540de \ 451 | --hash=sha256:580c1f15500e137a8c37053e4cbf6058944d4c114701fa59944607505c2fe3a0 \ 452 | --hash=sha256:59218fef177296451b23214c91ea3aba7858b4ae3306dde120224cfe0f7a6ee8 \ 453 | --hash=sha256:5ba63585a89c9885f18331a55d25fe81dc2d82b71311ff8bd378fc8004202ff6 \ 454 | --hash=sha256:5bb7d54b8f61ba6eee541fba4b83d22b8a046b4ef4d8eb7f15a7e35db2e1e245 \ 455 | --hash=sha256:6152224d0a1eb254f97df3997d79dadd8bb2c1a02ef283dbb34b97d4f8492d23 \ 456 | --hash=sha256:67e94028817defe5e705079b10a8438b8cb56e7115fa01640e9c0bb3edf67332 \ 457 | --hash=sha256:695ba021a9e04418507fa930d5f0704edbce47076bdcfeeaba1c83683e5649d1 \ 458 | --hash=sha256:6a1a9fe17621af43e9b9fcea8bd088ba682c8192d744b386ee3c47b56eaabb2c \ 459 | --hash=sha256:6ab0c3274d0a846840bf6c27d2c60ba771a12e4d7586bf550eefc2df0b56b3b4 \ 460 | --hash=sha256:6feca8b6bfb9eef6ee057628e71e1734caf520a907b6ec0d62839e8293e945c0 \ 461 | --hash=sha256:737e401cd0c493f7e3dd4db72aca11cfe069531c9761b8ea474926936b3c57c8 \ 462 | --hash=sha256:788713c2896f426a4e166b11f4ec538b5736294ebf7d5f654ae445fd44270832 \ 463 | --hash=sha256:797c2c412b04403d2da075fb93c123df35239cd7b4cc4e0cd9e5839b73f52c58 \ 464 | --hash=sha256:8300401dc88cad23f5b4e4c1226f44a5aa696436a4026e456fe0e5d2f7f486e6 \ 465 | --hash=sha256:87f6e082bce21464857ba58b569370e7b547d239ca22248be68ea5d6b51464a1 \ 466 | --hash=sha256:89ccbf58e6a0ab89d487c92a490cb5660d06c3a47ca08872859672f9c511fc52 \ 467 | --hash=sha256:8b0915ee85150963a9504c10de4e4729ae700af11df0dc5550e6587ed7891e92 \ 468 | --hash=sha256:8cce6f9fa3df25f55521fbb5c7e4a736683148bcc0c75b21863789e5185f9185 \ 469 | --hash=sha256:95a1873b6c0dd1c437fb3bb4a4aaa699a48c218ac7ca1e74b0bee0ab16c7d60d \ 470 | --hash=sha256:9b4c77d92d56a4c5027572752aa35082e40c561eec776048330d2907aead891d \ 471 | --hash=sha256:9bfcd43c65fbb339dc7086b5315750efa42a34eefad0256ba114cd8ad3896f4b \ 472 | --hash=sha256:9c1f083e7e71b2dd01f7cd7434a5f88c15213194df38bc29b388ccdf1492b739 \ 473 | --hash=sha256:a1d0894f238763717bdcfea74558c94e3bc34aeacd3351d769460c1a586a8b05 \ 474 | --hash=sha256:a467a431a0817a292121c13cbe637348b546e6ef47ca14a790aa2fa8cc93df63 \ 475 | --hash=sha256:aa32aaa97d8b2ed4e54dc65d241a0da1c627454950f7d7b1f95b13985afd6c5d \ 476 | --hash=sha256:ac10bbac36cd89eac19f4e51c032ba6b412b3892b685076f4acd2de18ca990aa \ 477 | --hash=sha256:ac35ccde589ab6a1870a484ed136d49a26bcd06b6a1c6397b1967ca13ceb3913 \ 478 | --hash=sha256:bab827163113177aee910adb1f48ff7af31ee0289f434f7e22d10baf624a6dfe \ 479 | --hash=sha256:baf81561f2972fb895e7844882898bda1eef4b07b5b385bcd308d2098f1a767b \ 480 | --hash=sha256:bf19725fec28452474d9887a128e98dd67eee7b7d52e932e6949c532d820dc3b \ 481 | --hash=sha256:c01a89a44bb672c38f42b49cdb0ad667b116d731b3f4c896f72302ff77d71656 \ 482 | --hash=sha256:c0910c6b6c31359d2f6184828888c983d54d09d581a4a23547a35f1d0b9484b1 \ 483 | --hash=sha256:c10ea1e80a697cf7d80d1ed414b5cb8f1eec07d618f54637067ae3c0334133c4 \ 484 | --hash=sha256:c1164a2eac148d85bbdd23e07dfcc930f2e633220f3eb3c3e2a25f6148c2819e \ 485 | --hash=sha256:c145ab54702334c42237a6c6c4cc08703b6aa9b94e2f227ceb3d477d20c36c63 \ 486 | --hash=sha256:c17965ff3706beedafd458c452bf15bac693ecd146a60a06a214614dc097a271 \ 487 | --hash=sha256:c19324a1c5399b602f3b6e7db9478e5b1adf5cf58901996fc973fe4fccd73eed \ 488 | --hash=sha256:c2a1ac41a6aa980db03d098a5531f13985edcb451bcd9d00670b03129922cd0d \ 489 | --hash=sha256:c6ddcd80d79c96eb19c354d9dca95291589c5954099836b7c8d29278a7ec0bda \ 490 | --hash=sha256:c9c6d927e098c2d360695f2e9d38870b2e92e0919be07dbe339aefa32a090265 \ 491 | --hash=sha256:cc8b7a7254c0fc3187d43d6cb54b5032d2365efd1df0cd1749c0c4df5f0ad45f \ 492 | --hash=sha256:cff3ba513db55cc6a35076f32c4cdc27032bd075c9faef31fec749e64b45d26c \ 493 | --hash=sha256:d260d4dc495c05d6600264a197d9d6f7fc9347f21d2594926202fd08cf89a8ba \ 494 | --hash=sha256:d6f3d62e16c10e88d2168ba2d065aa374e3c538998ed04996cd373ff2036d64c \ 495 | --hash=sha256:da6df107b9ccfe52d3a48165e48d72db0eca3e3029b5b8cb4fe6ee3cb870ba8b \ 496 | --hash=sha256:dfe4b95b7e00c6635a72e2d00b478e8a28bfb122dc76349a06e20792eb53a523 \ 497 | --hash=sha256:e39378894ee6ae9f555ae2de332d513a5763276a9265f8e7cbaeb1b1ee74623a \ 498 | --hash=sha256:ede3b46cdb719c794427dcce9d8beb4abe8b9aa1e97526cc20de9bd6583ad1ef \ 499 | --hash=sha256:f2a8508f7350512434e41065684076f640ecce176d262a7d54f0da41d99c5a95 \ 500 | --hash=sha256:f44477ae29025d8ea87ec308539f95963ffdc31a82f42ca9deecf2d505242e72 \ 501 | --hash=sha256:f64394bd7ceef1237cc604b5a89bf748c95982a84bcd3c4bbeb40f685c810794 \ 502 | --hash=sha256:fc4dd8b01a8112809e6b636b00f487846956402834a7fd59d46d4f4267181c41 \ 503 | --hash=sha256:fce78593346c014d0d986b7ebc80d782b7f5e19843ca798ed62f8e3ba8728576 \ 504 | --hash=sha256:fd547ec596d90c8676e369dd8a581a21227fe9b4ad37d0dc7feb4ccf544c2d59 505 | # via 506 | # aiohttp 507 | # faust 508 | --------------------------------------------------------------------------------