├── tests
    ├── __init__.py
    └── test_with_kafka.py
├── kafka_schema_registry
    ├── version.py
    └── __init__.py
├── requirements.txt
├── .gitignore
├── requirements-dev.txt
├── setup.cfg
├── setup.py
├── LICENSE
├── .github
    └── workflows
    │   └── python-package.yaml
├── Makefile
├── CHANGELOG.md
└── README.md


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/kafka_schema_registry/version.py:
--------------------------------------------------------------------------------
1 | __VERSION__ = '0.2.2'
2 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | fastavro==1.12.1
2 | kafka-python==2.3.0
3 | requests==2.32.5
4 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | __pycache__/
 2 | *.py[cod]
 3 | 
 4 | build/
 5 | dist/
 6 | *.egg-info/
 7 | 
 8 | htmlcov/
 9 | .coverage
10 | 
11 | venv/
12 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | pytest==8.4.1
2 | pytest-cov==6.2.1
3 | flake8==7.3.0
4 | responses==0.25.7
5 | twine==6.1.0
6 | wheel==0.45.1
7 | 
8 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [tool:pytest]
 2 | addopts =
 3 |     --cov=kafka_schema_registry
 4 |     --cov=tests
 5 |     --cov-report=html
 6 |     --cov-report=term-missing:skip-covered
 7 | 
 8 | [flake8]
 9 | exclude = venv,build
10 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | meta = {}
 4 | exec(open('./kafka_schema_registry/version.py').read(), meta)
 5 | meta['long_description'] = open('./README.md').read()
 6 | 
 7 | 
 8 | setup(
 9 |     name='kafka-schema-registry',
10 |     version=meta['__VERSION__'],
11 |     description='Kafka and schema registry integration',
12 |     long_description=meta['long_description'],
13 |     long_description_content_type='text/markdown',
14 |     keywords='kafka schema-registry',
15 |     author='FlixTech',
16 |     author_email="open-source@flixbus.com",
17 |     url='https://github.com/flix-tech/kafka-schema-registry',
18 |     project_urls={
19 |         "Changelog": "https://github.com/flix-tech/kafka-schema-registry/blob/master/CHANGELOG.md",  # noqa
20 |         "Source": 'https://github.com/flix-tech/kafka-schema-registry',
21 |     },
22 |     python_requires='>=3.9',
23 |     install_requires=[
24 |         'fastavro',
25 |         'kafka-python',
26 |         'requests',
27 |     ],
28 |     packages=['kafka_schema_registry'],
29 |     license='MIT',
30 | )
31 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 FlixMobility Tech GmbH
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/.github/workflows/python-package.yaml:
--------------------------------------------------------------------------------
 1 | name: CI/CD Pipeline
 2 | 
 3 | on:
 4 |   - push
 5 |   - pull_request
 6 | 
 7 | jobs:
 8 |   test:
 9 |     name: Test Python ${{ matrix.python-version }}
10 |     runs-on: ubuntu-latest
11 | 
12 |     strategy:
13 |       fail-fast: false
14 |       matrix:
15 |         python-version:
16 |           - "3.9"
17 |           - "3.10"
18 |           - "3.11"
19 |           - "3.12"
20 | 
21 | 
22 |     steps:
23 |     - uses: actions/checkout@v2
24 | 
25 |     - name: Set up Python ${{ matrix.python-version }}
26 |       uses: actions/setup-python@v2
27 |       with:
28 |         python-version: ${{ matrix.python-version }}
29 | 
30 |     - name: Run tests
31 |       run: make test
32 | 
33 |     - name: Run linter
34 |       run: make lint
35 | 
36 |   publish:
37 |     name: Publish package to PyPI
38 |     if: startsWith(github.ref, 'refs/tags')
39 |     runs-on: ubuntu-latest
40 |     needs: test
41 |     steps:
42 |     - uses: actions/checkout@v2
43 | 
44 |     - uses: actions/setup-python@v2
45 | 
46 |     - name: Publish package to PyPI
47 |       run: make release
48 |       env:
49 |         TWINE_USERNAME: __token__
50 |         TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
51 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | # system python interpreter. used only to create virtual environment
 2 | PY = python3
 3 | VENV = venv
 4 | BIN=$(VENV)/bin
 5 | 
 6 | 
 7 | ifeq ($(OS), Windows_NT)
 8 | 	BIN=$(VENV)/Scripts
 9 | 	PY=python
10 | endif
11 | 
12 | all: lint test
13 | 
14 | $(VENV): requirements.txt requirements-dev.txt setup.py
15 | 	$(PY) -m venv $(VENV)
16 | 	# required since Python 3.12
17 | 	$(BIN)/pip install setuptools
18 | 	$(BIN)/pip install --upgrade -r requirements.txt
19 | 	$(BIN)/pip install --upgrade -r requirements-dev.txt
20 | 	$(BIN)/pip install -e .
21 | 	touch $(VENV)
22 | 
23 | 
24 | .PHONY: start-redpanda
25 | start-redpanda:
26 | 	docker run --name=redpanda-1 --rm \
27 | 		-p 9092:9092 \
28 | 		vectorized/redpanda:latest \
29 | 		start \
30 | 		--overprovisioned \
31 | 		--smp 1  \
32 | 		--memory 128M \
33 | 		--reserve-memory 0M \
34 | 		--node-id 0 \
35 | 		--check=false
36 | 
37 | .PHONY: test
38 | test: $(VENV)
39 | 	$(BIN)/pytest
40 | 
41 | .PHONY: lint
42 | lint: $(VENV)
43 | 	$(BIN)/flake8
44 | 
45 | .PHONY: release
46 | release: $(VENV)
47 | 	rm -rf dist
48 | 	$(BIN)/python setup.py sdist bdist_wheel
49 | 	$(BIN)/twine upload dist/*
50 | 
51 | .PHONY: clean
52 | clean:
53 | 	rm -rf build dist *.egg-info
54 | 	rm -rf $(VENV)
55 | 	find . -type f -name *.pyc -delete
56 | 	find . -type d -name __pycache__ -delete
57 | 	# coverage
58 | 	rm -rf htmlcov .coverage
59 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | 
 3 | ## [Unreleased] -- YYYY-MM-DD
 4 | 
 5 | ## [0.2.2] -- 2025-07-25
 6 | 
 7 | * Upgrade Kafka-python to 2.2.15
 8 | 
 9 | ## [0.2.1] -- 2025-03-04
10 | 
11 | * Move back to dpkp/kafka-python, it's maintained again
12 | 
13 | ## [0.2.0] -- 2024-12-09
14 | 
15 | * Update dependencies
16 | * Show the raw response in case of JSON errors from the registry
17 | 
18 | 
19 | ## [0.1.2] -- 2022-07-13
20 | 
21 | * Pass per-topic config (used when created a topic) as a dedicated variable, not as part of the Client configs
22 | 
23 | ## [0.1.1] -- 2022-07-12
24 | 
25 |  * Fixed API's config params
26 | prepare_producer() uses two API's:
27 | 	 1. KafkaAdminClient -> creates topics
28 | 	 2. KafkaProducer -> sends events to kafka topic
29 | Both the above API's config parameters are not equivalent, due to this it was not possible to set parameters which are API specific and raises (Unrecognized configs) error. This change makes sure correct configs are passed to the respective API's.
30 | 
31 | ## [0.1.0] -- 2022-07-12
32 | 
33 | * Added Python 3.10 to test suite
34 | 
35 | ## [0.0.4] -- 2022-01-28
36 | 
37 | * Propagate extra arguments to the Kafka library (e.g. for authentication)
38 | 
39 | ## [0.0.3] -- 2021-07-06
40 | 
41 | * Fixed package name
42 | 
43 | ## [0.0.2] -- 2021-07-06
44 | 
45 | * Dummy release to test gh-actions to pypi
46 | 
47 | ## [0.0.1] -- 2021-07-05
48 | 
49 | * Initial Release -- you probably should not use this at this point.
50 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # kafka-schema-registry
 2 | 
 3 | This library allows you to create topics on Kafka topics, associated with a
 4 | Confluent Schema Registry, and publish messages on them.
 5 | 
 6 | It takes care of:
 7 | * creating the topic
 8 | * publishing the associated schema (or updating an existing one)
 9 | * serializing and publishing messages to Kafka
10 | 
11 | It works with [kafka-python][], and extra arguments are forwarded to it.
12 | 
13 | [kafka-python]: https://github.com/dpkp/kafka-python
14 | 
15 | 
16 | ## Installing
17 | 
18 | ```sh
19 | pip install kafka-schema-registry
20 | ```
21 | 
22 | ## Usage
23 | 
24 | ```python
25 | from kafka_schema_registry import prepare_producer
26 | 
27 | SAMPLE_SCHEMA = {
28 |     "type": "record",
29 |     "name": "TestType",
30 |     "fields" : [
31 |         {"name": "age", "type": "int"},
32 |         {"name": "name", "type": ["null", "string"]}
33 |     ]
34 | }
35 | 
36 | 
37 | producer = prepare_producer(
38 |         ['localhost:9092'],
39 |         f'http://schemaregistry',
40 |         topic_name,
41 |         1,
42 |         1,
43 |         value_schema=SAMPLE_SCHEMA,
44 | )
45 | 
46 | producer.send(topic_name, {'age': 34})
47 | producer.send(topic_name, {'age': 9000, 'name': 'john'})
48 | ```
49 | 
50 | ## Running the tests
51 | 
52 | The test requires Docker in order to start a local Redpanda instance.
53 | 
54 | * `make start-redpanda` to start the server
55 | * `make test` to configure a virtualenv and run the tests
56 | 


--------------------------------------------------------------------------------
/tests/test_with_kafka.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import uuid
  3 | import socket
  4 | 
  5 | import pytest
  6 | import responses
  7 | from kafka.errors import UnknownTopicOrPartitionError
  8 | 
  9 | from kafka_schema_registry import publish_schemas
 10 | from kafka_schema_registry import prepare_producer
 11 | from kafka_schema_registry import create_topic, delete_topic
 12 | 
 13 | 
 14 | def has_kafka():
 15 |     sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
 16 |     result = sock.connect_ex(('localhost', 9092))
 17 |     sock.close()
 18 |     return True if result == 0 else False
 19 | 
 20 | 
 21 | SAMPLE_SCHEMA = {
 22 |   "type": "record",
 23 |   "name": "TestType",
 24 |   "fields": [
 25 |     {"name": "age", "type": "int"},
 26 |     {"name": "name", "type": ["null", "string"]}
 27 |   ]
 28 | }
 29 | 
 30 | 
 31 | def test_check_schema_presence():
 32 |     with pytest.raises(ValueError) as exc:
 33 |         publish_schemas(
 34 |             'not-really-used',
 35 |             'http://schemaregistry',
 36 |         )
 37 |     assert str(exc.value) == 'No key nor value schema was given'
 38 | 
 39 | 
 40 | @responses.activate
 41 | def test_publish_value_schema():
 42 |     topic_name = f'test-topic-{uuid.uuid4()}'
 43 |     schema = dict(bla=42)
 44 |     responses.add(
 45 |         responses.POST,
 46 |         f'http://schemaregistry/subjects/{topic_name}-value/versions',
 47 |         json=dict(id=2),
 48 |         status=200)
 49 | 
 50 |     (k_id, v_id) = publish_schemas(
 51 |         topic_name,
 52 |         'http://schemaregistry',
 53 |         value_schema=schema,
 54 |     )
 55 |     assert json.loads(responses.calls[0].request.body) == dict(schema=schema)
 56 |     assert (k_id, v_id) == (None, 2)
 57 | 
 58 | 
 59 | @responses.activate
 60 | def test_publish_key_schema():
 61 |     topic_name = f'test-topic-{uuid.uuid4()}'
 62 |     schema = dict(bla=42)
 63 |     responses.add(
 64 |         responses.POST,
 65 |         f'http://schemaregistry/subjects/{topic_name}-key/versions',
 66 |         json=dict(id=2),
 67 |         status=200)
 68 | 
 69 |     (k_id, v_id) = publish_schemas(
 70 |         topic_name,
 71 |         'http://schemaregistry',
 72 |         key_schema=schema,
 73 |     )
 74 |     assert json.loads(responses.calls[0].request.body) == dict(schema=schema)
 75 |     assert (k_id, v_id) == (2, None)
 76 | 
 77 | 
 78 | @pytest.mark.skipif(not has_kafka(), reason="No Kafka Cluster running")
 79 | @responses.activate
 80 | def test_publish_messages():
 81 |     topic_name = f'test-topic-{uuid.uuid4()}'
 82 |     responses.add(
 83 |         responses.POST,
 84 |         f'http://schemaregistry/subjects/{topic_name}-value/versions',
 85 |         json=dict(id=2),
 86 |         status=200)
 87 |     producer = prepare_producer(
 88 |         ['localhost:9092'],
 89 |         'http://schemaregistry',
 90 |         topic_name,
 91 |         1,
 92 |         1,
 93 |         value_schema=SAMPLE_SCHEMA,
 94 |     )
 95 |     # the message does not match
 96 |     with pytest.raises(ValueError):
 97 |         producer.send(topic_name, {'e': 34})
 98 | 
 99 |     producer.send(topic_name, {'age': 34})
100 |     producer.send(topic_name, {'age': 9000, 'name': 'john'})
101 | 
102 | 
103 | @pytest.mark.skipif(not has_kafka(), reason="No Kafka Cluster running")
104 | def test_topic_creation_deletion():
105 |     topic_name = f'test-topic-{uuid.uuid4()}'
106 |     with pytest.raises(UnknownTopicOrPartitionError):
107 |         delete_topic(topic_name, bootstrap_servers=['localhost:9092'])
108 |     create_topic(['localhost:9092'], topic_name, 1, 1)
109 |     delete_topic(topic_name, bootstrap_servers=['localhost:9092'])
110 |     with pytest.raises(UnknownTopicOrPartitionError):
111 |         delete_topic(topic_name, bootstrap_servers=['localhost:9092'])
112 | 
113 | 
114 | @pytest.mark.skipif(not has_kafka(), reason="No Kafka Cluster running")
115 | @responses.activate
116 | def test_correct_config_params():
117 |     """ prepare_producer() uses two API's:
118 |          1) KafkaAdminClient -> Creates topics
119 |          2) KafkaProducer -> sends events to kafka topic
120 |      Both the above API's config params are not equivalent, this
121 |      test makes sure correct configs are passed to the respective API's
122 |      without raising any errors.
123 |     """
124 | 
125 |     request_timeout_ms = 30000                      # Common config param
126 |     batch_size = 16384                              # Producer specific config
127 |     topic_config = {'cleanup.policy': 'compact'}    # Topic specific config
128 |     topic_name = f'test-topic-{uuid.uuid4()}'
129 |     responses.add(
130 |         responses.POST,
131 |         f'http://schemaregistry/subjects/{topic_name}-value/versions',
132 |         json=dict(id=2),
133 |         status=200)
134 |     producer = prepare_producer(
135 |         ['localhost:9092'],
136 |         'http://schemaregistry',
137 |         topic_name,
138 |         1,
139 |         1,
140 |         value_schema=SAMPLE_SCHEMA,
141 |         request_timeout_ms=request_timeout_ms,
142 |         batch_size=batch_size,
143 |         topic_config=topic_config,
144 |         )
145 | 
146 |     producer.send(topic_name, {'age': 34})
147 |     producer.send(topic_name, {'age': 9000, 'name': 'john'})
148 | 
149 | 
150 | @pytest.mark.skipif(not has_kafka(), reason="No Kafka Cluster running")
151 | @responses.activate
152 | def test_incorrect_config_params():
153 |     """ If invalid config parameters are passed then AssertionError is raised.
154 |         Currently there is no way to check the valid topic configurations,
155 |         hence skipped and depends on the user to provide valid configs.
156 |     """
157 |     invalid_param = 'dummy'
158 |     topic_name = f'test-topic-{uuid.uuid4()}'
159 |     responses.add(
160 |         responses.POST,
161 |         f'http://schemaregistry/subjects/{topic_name}-value/versions',
162 |         json=dict(id=2),
163 |         status=200)
164 |     with pytest.raises(AssertionError):
165 |         prepare_producer(
166 |             ['localhost:9092'],
167 |             'http://schemaregistry',
168 |             topic_name,
169 |             1,
170 |             1,
171 |             value_schema=SAMPLE_SCHEMA,
172 |             invalid_param=invalid_param
173 |             )
174 | 


--------------------------------------------------------------------------------
/kafka_schema_registry/__init__.py:
--------------------------------------------------------------------------------
  1 | from io import BytesIO
  2 | import copy
  3 | import json
  4 | import logging
  5 | import struct
  6 | from typing import List
  7 | 
  8 | from fastavro import parse_schema, schemaless_writer
  9 | from kafka import KafkaProducer, KafkaAdminClient
 10 | from kafka.admin import NewTopic
 11 | from kafka.errors import TopicAlreadyExistsError, NoBrokersAvailable
 12 | from requests import request
 13 | from requests.exceptions import JSONDecodeError
 14 | 
 15 | logger = logging.getLogger(__name__)
 16 | 
 17 | # the log from python-kafka is absurdly verbose, reduce it
 18 | # it logs every single produced event
 19 | logging.getLogger('kafka.producer.record_accumulator').setLevel(logging.INFO)
 20 | logging.getLogger('kafka.producer.sender').setLevel(logging.INFO)
 21 | logging.getLogger('kafka.protocol.parser').setLevel(logging.INFO)
 22 | logging.getLogger('kafka.conn').setLevel(logging.INFO)
 23 | logging.getLogger('kafka.producer.kafka').setLevel(logging.INFO)
 24 | 
 25 | 
 26 | def delete_topic(topic_name: str, **kwargs):
 27 |     """Delete a topic from Kafka.
 28 | 
 29 |     The topic is deleted synchronously, the function returns when done.
 30 |     Notice that Lenses and other tools can take a few minutes to show
 31 |     the change.
 32 | 
 33 |     Parameters
 34 |     ----------
 35 |     topic_name : str
 36 |         The name of the topic to delete
 37 |     """
 38 |     admin_config = copy.copy(KafkaAdminClient.DEFAULT_CONFIG)
 39 |     for key in admin_config:
 40 |         admin_config[key] = kwargs.get(key, admin_config[key])
 41 | 
 42 |     admin_client = KafkaAdminClient(**admin_config)
 43 |     admin_client.delete_topics([topic_name])
 44 | 
 45 | 
 46 | def publish_schemas(
 47 |     topic_name: str,
 48 |     avro_schema_registry: str,
 49 |     value_schema: dict = None,
 50 |     key_schema: dict = None,
 51 |         ):
 52 |     """Publish the schema for a given topic.
 53 | 
 54 |     If the schema is already there and identical, the id is simply returned,
 55 |     so subsequent calls are idempotent.
 56 | 
 57 |     At least one of the schemas must be specified.
 58 | 
 59 |     Parameters
 60 |     ----------
 61 |     topic_name : str
 62 |         The name of the topic
 63 |     avro_schema_registry : str
 64 |         The URL of the schema registry
 65 |     value_schema : str
 66 |         The value Avro schema as a JSON-encoded string, or None
 67 |     key_schema : str
 68 |         The key Avro schema as a JSON-encoded string, or None
 69 | 
 70 |     Return
 71 |     ------
 72 |     tuple of int
 73 |         The ids of the published schemas as a (key_id, value_id) tuple
 74 |     """
 75 |     if value_schema is None and key_schema is None:
 76 |         raise ValueError('No key nor value schema was given')
 77 |     value_schema_id = None
 78 |     # API:
 79 |     # https://docs.confluent.io/current/schema-registry/develop/api.html
 80 |     if value_schema is not None:
 81 |         url_value = f'{avro_schema_registry}/subjects/{topic_name}-value/versions' # NOQA
 82 |         value_resp = request(
 83 |             'POST',
 84 |             url_value,
 85 |             data=json.dumps({"schema": value_schema}),
 86 |             headers={
 87 |                 'Content-Type': 'application/json'
 88 |                 }
 89 |             )
 90 |         try:
 91 |             obj = value_resp.json()
 92 |         except JSONDecodeError:
 93 |             logger.error(f'Error decoding response: {value_resp.text}')
 94 |             raise
 95 |         if 'id' not in obj:
 96 |             logger.error(f'No id in response: {value_resp.json()}')
 97 |         value_schema_id = obj['id']
 98 | 
 99 |     key_schema_id = None
100 |     if key_schema is not None:
101 |         url_key = f'{avro_schema_registry}/subjects/{topic_name}-key/versions' # NOQA
102 |         key_resp = request(
103 |             'POST',
104 |             url_key,
105 |             data=json.dumps({"schema": key_schema}),
106 |             headers={
107 |                 'Content-Type': 'application/json'
108 |                 }
109 |             )
110 |         key_schema_id = key_resp.json()['id']
111 | 
112 |     return (key_schema_id, value_schema_id)
113 | 
114 | 
115 | def create_topic(
116 |     bootstrap_servers: List[str],
117 |     topic_name: str,
118 |     num_partitions: int,
119 |     replication_factor: int,
120 |     topic_config: dict = None,
121 |     **kwargs,
122 | ):
123 |     """Create a topic with the given number of partitions.
124 | 
125 |     If the topic already exists, nothing happens.
126 | 
127 |     Parameters
128 |     ----------
129 |     bootstrap_servers : list of str
130 |         The list of Kafka servers
131 |     topic_name : str
132 |         The name of the topic
133 |     num_partitions : int
134 |         The number of partitions
135 |     replication_factor : int
136 |         The replication factor for this topic
137 |     """
138 |     admin_config = copy.copy(KafkaAdminClient.DEFAULT_CONFIG)
139 |     admin_config['bootstrap_servers'] = bootstrap_servers
140 |     # Resets configurations passed by user
141 |     for key in admin_config:
142 |         admin_config[key] = kwargs.get(key, admin_config[key])
143 | 
144 |     try:
145 |         # WORKAROUND: see https://github.com/dpkp/kafka-python/pull/2048
146 |         # when done remove this try catch
147 |         admin_client = KafkaAdminClient(**admin_config)
148 |     except NoBrokersAvailable:
149 |         logger.warning('Error instantiating the client, should be solved by '
150 |                        'https://github.com/dpkp/kafka-python/pull/2048')
151 |         return
152 |     try:
153 |         admin_client.create_topics([
154 |             NewTopic(
155 |                 name=topic_name,
156 |                 num_partitions=num_partitions,
157 |                 replication_factor=replication_factor,
158 |                 topic_configs=topic_config,
159 |                 )
160 |         ])
161 |         logger.info(f'Topic created: {topic_name}')
162 |     except TopicAlreadyExistsError:
163 |         logger.info(f'Not recreating existing topic {topic_name}')
164 | 
165 | 
166 | def prepare_producer(
167 |     bootstrap_servers: List[str],
168 |     avro_schema_registry: str,
169 |     topic_name: str,
170 |     num_partitions: int,
171 |     replication_factor: int,
172 |     value_schema: dict = None,
173 |     key_schema: dict = None,
174 |     topic_config: dict = None,
175 |     **kwargs,
176 |         ):
177 |     """Ensure the topic and the schema exist and returns a producer for it.
178 | 
179 |     The function is idempotent by design, so can be called multiple times
180 |     and it will use the schema and topic if present or create them
181 |     the first time.
182 | 
183 |     Parameters
184 |     ----------
185 |     bootstrap_servers : list of str
186 |         The list of Kafka servers
187 |     avro_schema_registry : str
188 |         The URL of the schema registry
189 |     topic_name : str
190 |         name of the topic to write to
191 |     num_partitions : int
192 |         The number of partitions
193 |     replication_factor : int
194 |         The replication factor for this topic
195 |     value_schema : dict, optional
196 |         The value schema, or None
197 |     key_schema_path : str, optional
198 |         The key schema, or None
199 |     Returns
200 |     -------
201 |     KafkaProducer
202 |         A producer ready to be used e.g. by calling send()
203 |     """
204 |     if value_schema is None and key_schema is None:
205 |         raise ValueError('No key nor value schema was given')
206 | 
207 |     # Check for valid key, value pairs
208 |     invalid_key = set(kwargs).difference(set(KafkaProducer.DEFAULT_CONFIG)) \
209 |                              .difference(set(KafkaAdminClient.DEFAULT_CONFIG))
210 |     assert not invalid_key, f'Unrecognized configs: {invalid_key}'
211 | 
212 |     create_topic(
213 |         bootstrap_servers,
214 |         topic_name,
215 |         num_partitions,
216 |         replication_factor,
217 |         topic_config,
218 |         **kwargs,
219 |     )
220 | 
221 |     parsed_value_schema = None
222 |     default_values = {}
223 |     if value_schema is not None:
224 |         parsed_value_schema = parse_schema(value_schema)
225 |         # store the default values to remove
226 |         # the values from the messages when identical
227 |         default_values = {
228 |             field['name']: field['default']
229 |             for field in parsed_value_schema['fields']
230 |             if 'default' in field
231 |         }
232 | 
233 |     parsed_key_schema = None
234 |     default_keys = {}
235 |     if key_schema is not None:
236 |         parsed_key_schema = parse_schema(key_schema)
237 |         if key_schema != "string":
238 |             # store the default values to remove
239 |             # the values from the messages when identical
240 |             default_keys = {
241 |                 field['name']: field['default']
242 |                 for field in parsed_key_schema['fields']
243 |                 if 'default' in field
244 |             }
245 | 
246 |     key_schema_id, value_schema_id = publish_schemas(
247 |         topic_name,
248 |         avro_schema_registry,
249 |         value_schema=(
250 |             json.dumps(value_schema)
251 |             if value_schema is not None else None),
252 |         key_schema=(
253 |             json.dumps(key_schema)
254 |             if key_schema is not None else None),
255 |     )
256 | 
257 |     def avro_record_value_writer(
258 |         record,
259 |         schema=parsed_value_schema,
260 |         value_schema_id=value_schema_id,
261 |         default_values=default_values,
262 |             ):
263 |         buf = BytesIO()
264 |         buf.write(struct.pack('>bI', 0, value_schema_id))
265 |         for k, v in default_values.items():
266 |             if record.get(k) == v and v is not None:
267 |                 del record[k]
268 |         schemaless_writer(buf, schema, record)
269 |         return buf.getvalue()
270 | 
271 |     def avro_record_key_writer(
272 |         record,
273 |         schema=parsed_key_schema,
274 |         key_schema_id=key_schema_id,
275 |         default_keys=default_keys,
276 |             ):
277 |         buf = BytesIO()
278 |         buf.write(struct.pack('>bI', 0, key_schema_id))
279 |         for k, v in default_keys.items():
280 |             if record.get(k) == v and v is not None:
281 |                 del record[k]
282 |         schemaless_writer(buf, schema, record)
283 |         return buf.getvalue()
284 | 
285 |     producer_config = copy.copy(KafkaProducer.DEFAULT_CONFIG)
286 |     # Default configurations
287 |     # bootstrap servers
288 |     producer_config['bootstrap_servers'] = bootstrap_servers
289 |     # notice that the serializer are called even with None, hence the check
290 |     producer_config['value_serializer'] = (
291 |             avro_record_value_writer if value_schema else None)
292 |     producer_config['key_serializer'] = (
293 |             avro_record_key_writer if key_schema else None)
294 |     # compression, note that is done on a whole batch
295 |     producer_config['compression_type'] = 'gzip'
296 |     # time to get an initial answer from the brokers when initializing
297 |     # the default is 2 seconds and in case of slow network breaks the app
298 |     producer_config['api_version_auto_timeout_ms'] = 10 * 1000
299 |     # accumulate messages for these ms before sending them
300 |     producer_config['linger_ms'] = 1000
301 | 
302 |     # Resets configurations passed by user
303 |     for key in producer_config:
304 |         producer_config[key] = kwargs.get(key, producer_config[key])
305 | 
306 |     return KafkaProducer(**producer_config)
307 | 


--------------------------------------------------------------------------------