├── tests ├── __init__.py └── test_with_kafka.py ├── kafka_schema_registry ├── version.py └── __init__.py ├── requirements.txt ├── .gitignore ├── requirements-dev.txt ├── setup.cfg ├── setup.py ├── LICENSE ├── .github └── workflows │ └── python-package.yaml ├── Makefile ├── CHANGELOG.md └── README.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /kafka_schema_registry/version.py: -------------------------------------------------------------------------------- 1 | __VERSION__ = '0.2.2' 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | fastavro==1.12.1 2 | kafka-python==2.3.0 3 | requests==2.32.5 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | *.py[cod] 3 | 4 | build/ 5 | dist/ 6 | *.egg-info/ 7 | 8 | htmlcov/ 9 | .coverage 10 | 11 | venv/ 12 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | pytest==8.4.1 2 | pytest-cov==6.2.1 3 | flake8==7.3.0 4 | responses==0.25.7 5 | twine==6.1.0 6 | wheel==0.45.1 7 | 8 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [tool:pytest] 2 | addopts = 3 | --cov=kafka_schema_registry 4 | --cov=tests 5 | --cov-report=html 6 | --cov-report=term-missing:skip-covered 7 | 8 | [flake8] 9 | exclude = venv,build 10 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | meta = {} 4 | exec(open('./kafka_schema_registry/version.py').read(), meta) 5 | meta['long_description'] = open('./README.md').read() 6 | 7 | 8 | setup( 9 | name='kafka-schema-registry', 10 | version=meta['__VERSION__'], 11 | description='Kafka and schema registry integration', 12 | long_description=meta['long_description'], 13 | long_description_content_type='text/markdown', 14 | keywords='kafka schema-registry', 15 | author='FlixTech', 16 | author_email="open-source@flixbus.com", 17 | url='https://github.com/flix-tech/kafka-schema-registry', 18 | project_urls={ 19 | "Changelog": "https://github.com/flix-tech/kafka-schema-registry/blob/master/CHANGELOG.md", # noqa 20 | "Source": 'https://github.com/flix-tech/kafka-schema-registry', 21 | }, 22 | python_requires='>=3.9', 23 | install_requires=[ 24 | 'fastavro', 25 | 'kafka-python', 26 | 'requests', 27 | ], 28 | packages=['kafka_schema_registry'], 29 | license='MIT', 30 | ) 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 FlixMobility Tech GmbH 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.github/workflows/python-package.yaml: -------------------------------------------------------------------------------- 1 | name: CI/CD Pipeline 2 | 3 | on: 4 | - push 5 | - pull_request 6 | 7 | jobs: 8 | test: 9 | name: Test Python ${{ matrix.python-version }} 10 | runs-on: ubuntu-latest 11 | 12 | strategy: 13 | fail-fast: false 14 | matrix: 15 | python-version: 16 | - "3.9" 17 | - "3.10" 18 | - "3.11" 19 | - "3.12" 20 | 21 | 22 | steps: 23 | - uses: actions/checkout@v2 24 | 25 | - name: Set up Python ${{ matrix.python-version }} 26 | uses: actions/setup-python@v2 27 | with: 28 | python-version: ${{ matrix.python-version }} 29 | 30 | - name: Run tests 31 | run: make test 32 | 33 | - name: Run linter 34 | run: make lint 35 | 36 | publish: 37 | name: Publish package to PyPI 38 | if: startsWith(github.ref, 'refs/tags') 39 | runs-on: ubuntu-latest 40 | needs: test 41 | steps: 42 | - uses: actions/checkout@v2 43 | 44 | - uses: actions/setup-python@v2 45 | 46 | - name: Publish package to PyPI 47 | run: make release 48 | env: 49 | TWINE_USERNAME: __token__ 50 | TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} 51 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # system python interpreter. used only to create virtual environment 2 | PY = python3 3 | VENV = venv 4 | BIN=$(VENV)/bin 5 | 6 | 7 | ifeq ($(OS), Windows_NT) 8 | BIN=$(VENV)/Scripts 9 | PY=python 10 | endif 11 | 12 | all: lint test 13 | 14 | $(VENV): requirements.txt requirements-dev.txt setup.py 15 | $(PY) -m venv $(VENV) 16 | # required since Python 3.12 17 | $(BIN)/pip install setuptools 18 | $(BIN)/pip install --upgrade -r requirements.txt 19 | $(BIN)/pip install --upgrade -r requirements-dev.txt 20 | $(BIN)/pip install -e . 21 | touch $(VENV) 22 | 23 | 24 | .PHONY: start-redpanda 25 | start-redpanda: 26 | docker run --name=redpanda-1 --rm \ 27 | -p 9092:9092 \ 28 | vectorized/redpanda:latest \ 29 | start \ 30 | --overprovisioned \ 31 | --smp 1 \ 32 | --memory 128M \ 33 | --reserve-memory 0M \ 34 | --node-id 0 \ 35 | --check=false 36 | 37 | .PHONY: test 38 | test: $(VENV) 39 | $(BIN)/pytest 40 | 41 | .PHONY: lint 42 | lint: $(VENV) 43 | $(BIN)/flake8 44 | 45 | .PHONY: release 46 | release: $(VENV) 47 | rm -rf dist 48 | $(BIN)/python setup.py sdist bdist_wheel 49 | $(BIN)/twine upload dist/* 50 | 51 | .PHONY: clean 52 | clean: 53 | rm -rf build dist *.egg-info 54 | rm -rf $(VENV) 55 | find . -type f -name *.pyc -delete 56 | find . -type d -name __pycache__ -delete 57 | # coverage 58 | rm -rf htmlcov .coverage 59 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## [Unreleased] -- YYYY-MM-DD 4 | 5 | ## [0.2.2] -- 2025-07-25 6 | 7 | * Upgrade Kafka-python to 2.2.15 8 | 9 | ## [0.2.1] -- 2025-03-04 10 | 11 | * Move back to dpkp/kafka-python, it's maintained again 12 | 13 | ## [0.2.0] -- 2024-12-09 14 | 15 | * Update dependencies 16 | * Show the raw response in case of JSON errors from the registry 17 | 18 | 19 | ## [0.1.2] -- 2022-07-13 20 | 21 | * Pass per-topic config (used when created a topic) as a dedicated variable, not as part of the Client configs 22 | 23 | ## [0.1.1] -- 2022-07-12 24 | 25 | * Fixed API's config params 26 | prepare_producer() uses two API's: 27 | 1. KafkaAdminClient -> creates topics 28 | 2. KafkaProducer -> sends events to kafka topic 29 | Both the above API's config parameters are not equivalent, due to this it was not possible to set parameters which are API specific and raises (Unrecognized configs) error. This change makes sure correct configs are passed to the respective API's. 30 | 31 | ## [0.1.0] -- 2022-07-12 32 | 33 | * Added Python 3.10 to test suite 34 | 35 | ## [0.0.4] -- 2022-01-28 36 | 37 | * Propagate extra arguments to the Kafka library (e.g. for authentication) 38 | 39 | ## [0.0.3] -- 2021-07-06 40 | 41 | * Fixed package name 42 | 43 | ## [0.0.2] -- 2021-07-06 44 | 45 | * Dummy release to test gh-actions to pypi 46 | 47 | ## [0.0.1] -- 2021-07-05 48 | 49 | * Initial Release -- you probably should not use this at this point. 50 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # kafka-schema-registry 2 | 3 | This library allows you to create topics on Kafka topics, associated with a 4 | Confluent Schema Registry, and publish messages on them. 5 | 6 | It takes care of: 7 | * creating the topic 8 | * publishing the associated schema (or updating an existing one) 9 | * serializing and publishing messages to Kafka 10 | 11 | It works with [kafka-python][], and extra arguments are forwarded to it. 12 | 13 | [kafka-python]: https://github.com/dpkp/kafka-python 14 | 15 | 16 | ## Installing 17 | 18 | ```sh 19 | pip install kafka-schema-registry 20 | ``` 21 | 22 | ## Usage 23 | 24 | ```python 25 | from kafka_schema_registry import prepare_producer 26 | 27 | SAMPLE_SCHEMA = { 28 | "type": "record", 29 | "name": "TestType", 30 | "fields" : [ 31 | {"name": "age", "type": "int"}, 32 | {"name": "name", "type": ["null", "string"]} 33 | ] 34 | } 35 | 36 | 37 | producer = prepare_producer( 38 | ['localhost:9092'], 39 | f'http://schemaregistry', 40 | topic_name, 41 | 1, 42 | 1, 43 | value_schema=SAMPLE_SCHEMA, 44 | ) 45 | 46 | producer.send(topic_name, {'age': 34}) 47 | producer.send(topic_name, {'age': 9000, 'name': 'john'}) 48 | ``` 49 | 50 | ## Running the tests 51 | 52 | The test requires Docker in order to start a local Redpanda instance. 53 | 54 | * `make start-redpanda` to start the server 55 | * `make test` to configure a virtualenv and run the tests 56 | -------------------------------------------------------------------------------- /tests/test_with_kafka.py: -------------------------------------------------------------------------------- 1 | import json 2 | import uuid 3 | import socket 4 | 5 | import pytest 6 | import responses 7 | from kafka.errors import UnknownTopicOrPartitionError 8 | 9 | from kafka_schema_registry import publish_schemas 10 | from kafka_schema_registry import prepare_producer 11 | from kafka_schema_registry import create_topic, delete_topic 12 | 13 | 14 | def has_kafka(): 15 | sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 16 | result = sock.connect_ex(('localhost', 9092)) 17 | sock.close() 18 | return True if result == 0 else False 19 | 20 | 21 | SAMPLE_SCHEMA = { 22 | "type": "record", 23 | "name": "TestType", 24 | "fields": [ 25 | {"name": "age", "type": "int"}, 26 | {"name": "name", "type": ["null", "string"]} 27 | ] 28 | } 29 | 30 | 31 | def test_check_schema_presence(): 32 | with pytest.raises(ValueError) as exc: 33 | publish_schemas( 34 | 'not-really-used', 35 | 'http://schemaregistry', 36 | ) 37 | assert str(exc.value) == 'No key nor value schema was given' 38 | 39 | 40 | @responses.activate 41 | def test_publish_value_schema(): 42 | topic_name = f'test-topic-{uuid.uuid4()}' 43 | schema = dict(bla=42) 44 | responses.add( 45 | responses.POST, 46 | f'http://schemaregistry/subjects/{topic_name}-value/versions', 47 | json=dict(id=2), 48 | status=200) 49 | 50 | (k_id, v_id) = publish_schemas( 51 | topic_name, 52 | 'http://schemaregistry', 53 | value_schema=schema, 54 | ) 55 | assert json.loads(responses.calls[0].request.body) == dict(schema=schema) 56 | assert (k_id, v_id) == (None, 2) 57 | 58 | 59 | @responses.activate 60 | def test_publish_key_schema(): 61 | topic_name = f'test-topic-{uuid.uuid4()}' 62 | schema = dict(bla=42) 63 | responses.add( 64 | responses.POST, 65 | f'http://schemaregistry/subjects/{topic_name}-key/versions', 66 | json=dict(id=2), 67 | status=200) 68 | 69 | (k_id, v_id) = publish_schemas( 70 | topic_name, 71 | 'http://schemaregistry', 72 | key_schema=schema, 73 | ) 74 | assert json.loads(responses.calls[0].request.body) == dict(schema=schema) 75 | assert (k_id, v_id) == (2, None) 76 | 77 | 78 | @pytest.mark.skipif(not has_kafka(), reason="No Kafka Cluster running") 79 | @responses.activate 80 | def test_publish_messages(): 81 | topic_name = f'test-topic-{uuid.uuid4()}' 82 | responses.add( 83 | responses.POST, 84 | f'http://schemaregistry/subjects/{topic_name}-value/versions', 85 | json=dict(id=2), 86 | status=200) 87 | producer = prepare_producer( 88 | ['localhost:9092'], 89 | 'http://schemaregistry', 90 | topic_name, 91 | 1, 92 | 1, 93 | value_schema=SAMPLE_SCHEMA, 94 | ) 95 | # the message does not match 96 | with pytest.raises(ValueError): 97 | producer.send(topic_name, {'e': 34}) 98 | 99 | producer.send(topic_name, {'age': 34}) 100 | producer.send(topic_name, {'age': 9000, 'name': 'john'}) 101 | 102 | 103 | @pytest.mark.skipif(not has_kafka(), reason="No Kafka Cluster running") 104 | def test_topic_creation_deletion(): 105 | topic_name = f'test-topic-{uuid.uuid4()}' 106 | with pytest.raises(UnknownTopicOrPartitionError): 107 | delete_topic(topic_name, bootstrap_servers=['localhost:9092']) 108 | create_topic(['localhost:9092'], topic_name, 1, 1) 109 | delete_topic(topic_name, bootstrap_servers=['localhost:9092']) 110 | with pytest.raises(UnknownTopicOrPartitionError): 111 | delete_topic(topic_name, bootstrap_servers=['localhost:9092']) 112 | 113 | 114 | @pytest.mark.skipif(not has_kafka(), reason="No Kafka Cluster running") 115 | @responses.activate 116 | def test_correct_config_params(): 117 | """ prepare_producer() uses two API's: 118 | 1) KafkaAdminClient -> Creates topics 119 | 2) KafkaProducer -> sends events to kafka topic 120 | Both the above API's config params are not equivalent, this 121 | test makes sure correct configs are passed to the respective API's 122 | without raising any errors. 123 | """ 124 | 125 | request_timeout_ms = 30000 # Common config param 126 | batch_size = 16384 # Producer specific config 127 | topic_config = {'cleanup.policy': 'compact'} # Topic specific config 128 | topic_name = f'test-topic-{uuid.uuid4()}' 129 | responses.add( 130 | responses.POST, 131 | f'http://schemaregistry/subjects/{topic_name}-value/versions', 132 | json=dict(id=2), 133 | status=200) 134 | producer = prepare_producer( 135 | ['localhost:9092'], 136 | 'http://schemaregistry', 137 | topic_name, 138 | 1, 139 | 1, 140 | value_schema=SAMPLE_SCHEMA, 141 | request_timeout_ms=request_timeout_ms, 142 | batch_size=batch_size, 143 | topic_config=topic_config, 144 | ) 145 | 146 | producer.send(topic_name, {'age': 34}) 147 | producer.send(topic_name, {'age': 9000, 'name': 'john'}) 148 | 149 | 150 | @pytest.mark.skipif(not has_kafka(), reason="No Kafka Cluster running") 151 | @responses.activate 152 | def test_incorrect_config_params(): 153 | """ If invalid config parameters are passed then AssertionError is raised. 154 | Currently there is no way to check the valid topic configurations, 155 | hence skipped and depends on the user to provide valid configs. 156 | """ 157 | invalid_param = 'dummy' 158 | topic_name = f'test-topic-{uuid.uuid4()}' 159 | responses.add( 160 | responses.POST, 161 | f'http://schemaregistry/subjects/{topic_name}-value/versions', 162 | json=dict(id=2), 163 | status=200) 164 | with pytest.raises(AssertionError): 165 | prepare_producer( 166 | ['localhost:9092'], 167 | 'http://schemaregistry', 168 | topic_name, 169 | 1, 170 | 1, 171 | value_schema=SAMPLE_SCHEMA, 172 | invalid_param=invalid_param 173 | ) 174 | -------------------------------------------------------------------------------- /kafka_schema_registry/__init__.py: -------------------------------------------------------------------------------- 1 | from io import BytesIO 2 | import copy 3 | import json 4 | import logging 5 | import struct 6 | from typing import List 7 | 8 | from fastavro import parse_schema, schemaless_writer 9 | from kafka import KafkaProducer, KafkaAdminClient 10 | from kafka.admin import NewTopic 11 | from kafka.errors import TopicAlreadyExistsError, NoBrokersAvailable 12 | from requests import request 13 | from requests.exceptions import JSONDecodeError 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | # the log from python-kafka is absurdly verbose, reduce it 18 | # it logs every single produced event 19 | logging.getLogger('kafka.producer.record_accumulator').setLevel(logging.INFO) 20 | logging.getLogger('kafka.producer.sender').setLevel(logging.INFO) 21 | logging.getLogger('kafka.protocol.parser').setLevel(logging.INFO) 22 | logging.getLogger('kafka.conn').setLevel(logging.INFO) 23 | logging.getLogger('kafka.producer.kafka').setLevel(logging.INFO) 24 | 25 | 26 | def delete_topic(topic_name: str, **kwargs): 27 | """Delete a topic from Kafka. 28 | 29 | The topic is deleted synchronously, the function returns when done. 30 | Notice that Lenses and other tools can take a few minutes to show 31 | the change. 32 | 33 | Parameters 34 | ---------- 35 | topic_name : str 36 | The name of the topic to delete 37 | """ 38 | admin_config = copy.copy(KafkaAdminClient.DEFAULT_CONFIG) 39 | for key in admin_config: 40 | admin_config[key] = kwargs.get(key, admin_config[key]) 41 | 42 | admin_client = KafkaAdminClient(**admin_config) 43 | admin_client.delete_topics([topic_name]) 44 | 45 | 46 | def publish_schemas( 47 | topic_name: str, 48 | avro_schema_registry: str, 49 | value_schema: dict = None, 50 | key_schema: dict = None, 51 | ): 52 | """Publish the schema for a given topic. 53 | 54 | If the schema is already there and identical, the id is simply returned, 55 | so subsequent calls are idempotent. 56 | 57 | At least one of the schemas must be specified. 58 | 59 | Parameters 60 | ---------- 61 | topic_name : str 62 | The name of the topic 63 | avro_schema_registry : str 64 | The URL of the schema registry 65 | value_schema : str 66 | The value Avro schema as a JSON-encoded string, or None 67 | key_schema : str 68 | The key Avro schema as a JSON-encoded string, or None 69 | 70 | Return 71 | ------ 72 | tuple of int 73 | The ids of the published schemas as a (key_id, value_id) tuple 74 | """ 75 | if value_schema is None and key_schema is None: 76 | raise ValueError('No key nor value schema was given') 77 | value_schema_id = None 78 | # API: 79 | # https://docs.confluent.io/current/schema-registry/develop/api.html 80 | if value_schema is not None: 81 | url_value = f'{avro_schema_registry}/subjects/{topic_name}-value/versions' # NOQA 82 | value_resp = request( 83 | 'POST', 84 | url_value, 85 | data=json.dumps({"schema": value_schema}), 86 | headers={ 87 | 'Content-Type': 'application/json' 88 | } 89 | ) 90 | try: 91 | obj = value_resp.json() 92 | except JSONDecodeError: 93 | logger.error(f'Error decoding response: {value_resp.text}') 94 | raise 95 | if 'id' not in obj: 96 | logger.error(f'No id in response: {value_resp.json()}') 97 | value_schema_id = obj['id'] 98 | 99 | key_schema_id = None 100 | if key_schema is not None: 101 | url_key = f'{avro_schema_registry}/subjects/{topic_name}-key/versions' # NOQA 102 | key_resp = request( 103 | 'POST', 104 | url_key, 105 | data=json.dumps({"schema": key_schema}), 106 | headers={ 107 | 'Content-Type': 'application/json' 108 | } 109 | ) 110 | key_schema_id = key_resp.json()['id'] 111 | 112 | return (key_schema_id, value_schema_id) 113 | 114 | 115 | def create_topic( 116 | bootstrap_servers: List[str], 117 | topic_name: str, 118 | num_partitions: int, 119 | replication_factor: int, 120 | topic_config: dict = None, 121 | **kwargs, 122 | ): 123 | """Create a topic with the given number of partitions. 124 | 125 | If the topic already exists, nothing happens. 126 | 127 | Parameters 128 | ---------- 129 | bootstrap_servers : list of str 130 | The list of Kafka servers 131 | topic_name : str 132 | The name of the topic 133 | num_partitions : int 134 | The number of partitions 135 | replication_factor : int 136 | The replication factor for this topic 137 | """ 138 | admin_config = copy.copy(KafkaAdminClient.DEFAULT_CONFIG) 139 | admin_config['bootstrap_servers'] = bootstrap_servers 140 | # Resets configurations passed by user 141 | for key in admin_config: 142 | admin_config[key] = kwargs.get(key, admin_config[key]) 143 | 144 | try: 145 | # WORKAROUND: see https://github.com/dpkp/kafka-python/pull/2048 146 | # when done remove this try catch 147 | admin_client = KafkaAdminClient(**admin_config) 148 | except NoBrokersAvailable: 149 | logger.warning('Error instantiating the client, should be solved by ' 150 | 'https://github.com/dpkp/kafka-python/pull/2048') 151 | return 152 | try: 153 | admin_client.create_topics([ 154 | NewTopic( 155 | name=topic_name, 156 | num_partitions=num_partitions, 157 | replication_factor=replication_factor, 158 | topic_configs=topic_config, 159 | ) 160 | ]) 161 | logger.info(f'Topic created: {topic_name}') 162 | except TopicAlreadyExistsError: 163 | logger.info(f'Not recreating existing topic {topic_name}') 164 | 165 | 166 | def prepare_producer( 167 | bootstrap_servers: List[str], 168 | avro_schema_registry: str, 169 | topic_name: str, 170 | num_partitions: int, 171 | replication_factor: int, 172 | value_schema: dict = None, 173 | key_schema: dict = None, 174 | topic_config: dict = None, 175 | **kwargs, 176 | ): 177 | """Ensure the topic and the schema exist and returns a producer for it. 178 | 179 | The function is idempotent by design, so can be called multiple times 180 | and it will use the schema and topic if present or create them 181 | the first time. 182 | 183 | Parameters 184 | ---------- 185 | bootstrap_servers : list of str 186 | The list of Kafka servers 187 | avro_schema_registry : str 188 | The URL of the schema registry 189 | topic_name : str 190 | name of the topic to write to 191 | num_partitions : int 192 | The number of partitions 193 | replication_factor : int 194 | The replication factor for this topic 195 | value_schema : dict, optional 196 | The value schema, or None 197 | key_schema_path : str, optional 198 | The key schema, or None 199 | Returns 200 | ------- 201 | KafkaProducer 202 | A producer ready to be used e.g. by calling send() 203 | """ 204 | if value_schema is None and key_schema is None: 205 | raise ValueError('No key nor value schema was given') 206 | 207 | # Check for valid key, value pairs 208 | invalid_key = set(kwargs).difference(set(KafkaProducer.DEFAULT_CONFIG)) \ 209 | .difference(set(KafkaAdminClient.DEFAULT_CONFIG)) 210 | assert not invalid_key, f'Unrecognized configs: {invalid_key}' 211 | 212 | create_topic( 213 | bootstrap_servers, 214 | topic_name, 215 | num_partitions, 216 | replication_factor, 217 | topic_config, 218 | **kwargs, 219 | ) 220 | 221 | parsed_value_schema = None 222 | default_values = {} 223 | if value_schema is not None: 224 | parsed_value_schema = parse_schema(value_schema) 225 | # store the default values to remove 226 | # the values from the messages when identical 227 | default_values = { 228 | field['name']: field['default'] 229 | for field in parsed_value_schema['fields'] 230 | if 'default' in field 231 | } 232 | 233 | parsed_key_schema = None 234 | default_keys = {} 235 | if key_schema is not None: 236 | parsed_key_schema = parse_schema(key_schema) 237 | if key_schema != "string": 238 | # store the default values to remove 239 | # the values from the messages when identical 240 | default_keys = { 241 | field['name']: field['default'] 242 | for field in parsed_key_schema['fields'] 243 | if 'default' in field 244 | } 245 | 246 | key_schema_id, value_schema_id = publish_schemas( 247 | topic_name, 248 | avro_schema_registry, 249 | value_schema=( 250 | json.dumps(value_schema) 251 | if value_schema is not None else None), 252 | key_schema=( 253 | json.dumps(key_schema) 254 | if key_schema is not None else None), 255 | ) 256 | 257 | def avro_record_value_writer( 258 | record, 259 | schema=parsed_value_schema, 260 | value_schema_id=value_schema_id, 261 | default_values=default_values, 262 | ): 263 | buf = BytesIO() 264 | buf.write(struct.pack('>bI', 0, value_schema_id)) 265 | for k, v in default_values.items(): 266 | if record.get(k) == v and v is not None: 267 | del record[k] 268 | schemaless_writer(buf, schema, record) 269 | return buf.getvalue() 270 | 271 | def avro_record_key_writer( 272 | record, 273 | schema=parsed_key_schema, 274 | key_schema_id=key_schema_id, 275 | default_keys=default_keys, 276 | ): 277 | buf = BytesIO() 278 | buf.write(struct.pack('>bI', 0, key_schema_id)) 279 | for k, v in default_keys.items(): 280 | if record.get(k) == v and v is not None: 281 | del record[k] 282 | schemaless_writer(buf, schema, record) 283 | return buf.getvalue() 284 | 285 | producer_config = copy.copy(KafkaProducer.DEFAULT_CONFIG) 286 | # Default configurations 287 | # bootstrap servers 288 | producer_config['bootstrap_servers'] = bootstrap_servers 289 | # notice that the serializer are called even with None, hence the check 290 | producer_config['value_serializer'] = ( 291 | avro_record_value_writer if value_schema else None) 292 | producer_config['key_serializer'] = ( 293 | avro_record_key_writer if key_schema else None) 294 | # compression, note that is done on a whole batch 295 | producer_config['compression_type'] = 'gzip' 296 | # time to get an initial answer from the brokers when initializing 297 | # the default is 2 seconds and in case of slow network breaks the app 298 | producer_config['api_version_auto_timeout_ms'] = 10 * 1000 299 | # accumulate messages for these ms before sending them 300 | producer_config['linger_ms'] = 1000 301 | 302 | # Resets configurations passed by user 303 | for key in producer_config: 304 | producer_config[key] = kwargs.get(key, producer_config[key]) 305 | 306 | return KafkaProducer(**producer_config) 307 | --------------------------------------------------------------------------------