├── .editorconfig
├── .github
    └── workflows
    │   └── main.yml
├── .gitignore
├── .python-version
├── LICENSE
├── README.md
├── setup.cfg
├── setup.py
├── src
    └── aws_schema_registry
    │   ├── __init__.py
    │   ├── adapter
    │       ├── __init__.py
    │       └── kafka.py
    │   ├── avro.py
    │   ├── client.py
    │   ├── codec.py
    │   ├── exception.py
    │   ├── jsonschema.py
    │   ├── naming.py
    │   ├── schema.py
    │   └── serde.py
├── tests
    ├── integration
    │   ├── conftest.py
    │   ├── java
    │   │   ├── README.md
    │   │   ├── pom.xml
    │   │   ├── src
    │   │   │   └── main
    │   │   │   │   └── java
    │   │   │   │       └── com
    │   │   │   │           └── disasteraware
    │   │   │   │               └── aws
    │   │   │   │                   └── schemaregistry
    │   │   │   │                       └── App.java
    │   │   ├── test_java_integration.py
    │   │   ├── user.avsc
    │   │   └── user.json
    │   └── kafka_test
    │   │   ├── README.md
    │   │   ├── docker-compose.yml
    │   │   ├── test_kafka_integration.py
    │   │   ├── user.json
    │   │   ├── user.v1.avsc
    │   │   └── user.v2.avsc
    ├── test_avro.py
    ├── test_client.py
    ├── test_codec.py
    └── test_jsonschema.py
└── tox.ini


/.editorconfig:
--------------------------------------------------------------------------------
 1 | root = true
 2 | 
 3 | [*]
 4 | charset = utf-8
 5 | insert_final_newline = true
 6 | indent_style = space
 7 | end_of_line = lf
 8 | trim_trailing_whitespace = true
 9 | 
10 | [*.py]
11 | indent_size = 4
12 | 
13 | [*.{yml}]
14 | indent_size = 2
15 | 


--------------------------------------------------------------------------------
/.github/workflows/main.yml:
--------------------------------------------------------------------------------
 1 | name: main
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 |     branches: [ main ]
 8 |   release:
 9 |     branches: [ main ]
10 |     types: [ created ]
11 | 
12 | jobs:
13 |   lint:
14 |     runs-on: ubuntu-latest
15 | 
16 |     steps:
17 |     - uses: actions/checkout@v2
18 | 
19 |     - uses: actions/setup-python@v2
20 | 
21 |     - name: Install dependencies
22 |       run: |
23 |         python -m pip install --upgrade pip
24 |         pip install tox
25 | 
26 |     - run: tox -e flake8
27 | 
28 |   typecheck:
29 |     needs: lint
30 |     runs-on: ubuntu-latest
31 | 
32 |     steps:
33 |     - uses: actions/checkout@v2
34 | 
35 |     - uses: actions/setup-python@v2
36 | 
37 |     - name: Install dependencies
38 |       run: |
39 |         python -m pip install --upgrade pip
40 |         pip install tox
41 | 
42 |     - run: tox -e mypy
43 | 
44 |   test:
45 |     needs: lint
46 |     runs-on: ${{ matrix.operating-system }}
47 |     timeout-minutes: 15
48 | 
49 |     strategy:
50 |       fail-fast: false
51 |       matrix:
52 |         operating-system: [ubuntu-latest, macos-latest]
53 |         python-version: ['3.8', '3.9', '3.10', '3.11']
54 | 
55 |     steps:
56 |     - uses: actions/checkout@v2
57 | 
58 |     - name: Set up Python ${{ matrix.python-version }}
59 |       uses: actions/setup-python@v2
60 |       with:
61 |         python-version: ${{ matrix.python-version }}
62 | 
63 |     - name: Install dependencies
64 |       run: |
65 |         python -m pip install --upgrade pip
66 |         pip install tox tox-gh-actions
67 | 
68 |     - name: Test
69 |       run: tox
70 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # python
 2 | venv*
 3 | .venv*
 4 | env
 5 | .env
 6 | .tox
 7 | .toxenv
 8 | __pycache__
 9 | *.egg-info
10 | build
11 | dist
12 | 
13 | # IDEs
14 | .vscode
15 | .idea
16 | *.iml
17 | 
18 | tests/integration/java/target
19 | 


--------------------------------------------------------------------------------
/.python-version:
--------------------------------------------------------------------------------
1 | 3.11.4
2 | 3.10.12
3 | 3.9.7
4 | 3.8.12
5 | 3.7.12
6 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # AWS Glue Schema Registry for Python
  2 | 
  3 | [![PyPI](https://img.shields.io/pypi/v/aws-glue-schema-registry.svg)](https://pypi.org/project/aws-glue-schema-registry)
  4 | [![PyPI](https://img.shields.io/pypi/pyversions/aws-glue-schema-registry)](https://pypi.org/project/aws-glue-schema-registry)
  5 | [![main](https://github.com/DisasterAWARE/aws-glue-schema-registry-python/actions/workflows/main.yml/badge.svg)](https://github.com/DisasterAWARE/aws-glue-schema-registry-python/actions/workflows/main.yml)
  6 | 
  7 | Use the AWS Glue Schema Registry in Python projects.
  8 | 
  9 | This library is a partial port of [aws-glue-schema-registry](https://github.com/awslabs/aws-glue-schema-registry) which implements a subset of its features with full compatibility.
 10 | 
 11 | ## Feature Support
 12 | 
 13 | Feature | Java Library | Python Library | Notes
 14 | :------ | :----------- | :------------- | :----
 15 | Serialization and deserialization using schema registry | ✔️ | ✔️
 16 | Avro message format | ✔️ | ✔️
 17 | JSON Schema message format | ✔️ | ✔️
 18 | Kafka Streams support | ✔️ | | N/A for Python, Kafka Streams is Java-only
 19 | Compression | ✔️ | ✔️ |
 20 | Local schema cache | ✔️ | ✔️
 21 | Schema auto-registration | ✔️ | ✔️
 22 | Evolution checks | ✔️ | ✔️
 23 | Migration from a third party Schema Registry | ✔️ | ✔️
 24 | Flink support | ✔️ | ❌
 25 | Kafka Connect support | ✔️ | | N/A for Python, Kafka Connect is Java-only
 26 | 
 27 | ## Installation - PyPI (Recommended)
 28 | 
 29 | `pip install aws-glue-schema-registry`
 30 | 
 31 | ## Installation - local
 32 | 
 33 | Clone this repository and run:
 34 | 
 35 | ```
 36 | python setup.py install -e .
 37 | ```
 38 | 
 39 | This library includes opt-in extra dependencies that enable support for certain features. For example, to use the schema registry with [kafka-python](https://pypi.org/project/kafka-python/), you should install the `kafka-python` extra:
 40 | 
 41 | ```
 42 | python setup.py install -e .[kafka-python]
 43 | ```
 44 | 
 45 | Extra name | Purpose
 46 | :--------- | :------
 47 | kafka-python | Provides adapter classes to plug into `kafka-python`
 48 | 
 49 | ## Usage
 50 | 
 51 | First use `boto3` to create a low-level AWS Glue client:
 52 | 
 53 | ```python
 54 | import boto3
 55 | 
 56 | # Pass your AWS credentials or profile information here
 57 | session = boto3.Session(access_key_id=xxx, secret_access_key=xxx, region_name='us-west-2')
 58 | 
 59 | glue_client = session.client('glue')
 60 | ```
 61 | 
 62 | See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/quickstart.html#configuration for more information on configuring boto3.
 63 | 
 64 | Send Kafka messages with `SchemaRegistrySerializer`:
 65 | 
 66 | ```python
 67 | from aws_schema_registry import DataAndSchema, SchemaRegistryClient
 68 | from aws_schema_registry.avro import AvroSchema
 69 | 
 70 | # In this example we will use kafka-python as our Kafka client,
 71 | # so we need to have the `kafka-python` extras installed and use
 72 | # the kafka adapter.
 73 | from aws_schema_registry.adapter.kafka import KafkaSerializer
 74 | from kafka import KafkaProducer
 75 | 
 76 | # Create the schema registry client, which is a façade around the boto3 glue client
 77 | client = SchemaRegistryClient(glue_client,
 78 |                               registry_name='my-registry')
 79 | 
 80 | # Create the serializer
 81 | serializer = KafkaSerializer(client)
 82 | 
 83 | # Create the producer
 84 | producer = KafkaProducer(value_serializer=serializer)
 85 | 
 86 | # Our producer needs a schema to send along with the data.
 87 | # In this example we're using Avro, so we'll load an .avsc file.
 88 | with open('user.avsc', 'r') as schema_file:
 89 |     schema = AvroSchema(schema_file.read())
 90 | 
 91 | # Send message data along with schema
 92 | data = {
 93 |     'name': 'John Doe',
 94 |     'favorite_number': 6
 95 | }
 96 | producer.send('my-topic', value=(data, schema))
 97 | # the value MUST be a tuple when we're using the KafkaSerializer
 98 | ```
 99 | 
100 | Read Kafka messages with `SchemaRegistryDeserializer`:
101 | 
102 | ```python
103 | from aws_schema_registry import SchemaRegistryClient
104 | 
105 | # In this example we will use kafka-python as our Kafka client,
106 | # so we need to have the `kafka-python` extras installed and use
107 | # the kafka adapter.
108 | from aws_schema_registry.adapter.kafka import KafkaDeserializer
109 | from kafka import KafkaConsumer
110 | 
111 | # Create the schema registry client, which is a façade around the boto3 glue client
112 | client = SchemaRegistryClient(glue_client,
113 |                               registry_name='my-registry')
114 | 
115 | # Create the deserializer
116 | deserializer = KafkaDeserializer(client)
117 | 
118 | # Create the consumer
119 | consumer = KafkaConsumer('my-topic', value_deserializer=deserializer)
120 | 
121 | # Now use the consumer normally
122 | for message in consumer:
123 |     # The deserializer produces DataAndSchema instances
124 |     value: DataAndSchema = message.value
125 |     # which are NamedTuples with a `data` and `schema` property
126 |     value.data == value[0]
127 |     value.schema == value[1]
128 |     # and can be deconstructed
129 |     data, schema = value
130 | ```
131 | 
132 | ## Contributing
133 | 
134 | Clone this repository and install development dependencies:
135 | 
136 | ```
137 | pip install -e .[dev]
138 | ```
139 | 
140 | Run the linter and tests with tox before committing. After committing, check Github Actions to see the result of the automated checks.
141 | 
142 | ### Linting
143 | 
144 | Lint the code with:
145 | 
146 | ```
147 | flake8
148 | ```
149 | 
150 | Run the type checker with:
151 | 
152 | ```
153 | mypy
154 | ```
155 | 
156 | ### Tests
157 | 
158 | Tests go under the `tests/` directory. All tests outside of `tests/integration` are unit tests with no external dependencies.
159 | 
160 | Tests under `tests/integration` are integration test that interact with external resources and/or real AWS schema registries. They generally run slower and require some additional configuration.
161 | 
162 | Run just the unit tests with:
163 | 
164 | ```
165 | pytest --ignore tests/integration
166 | ```
167 | 
168 | All integration tests use the following environment variables:
169 | 
170 | - `AWS_ACCESS_KEY_ID`
171 | - `AWS_SECRET_ACCESS_KEY`
172 | - `AWS_SESSION_TOKEN`
173 | - `AWS_REGION`
174 | - `AWS_PROFILE`
175 | - `CLEANUP_REGISTRY`: Set to any value to prevent the test from destroying the registry created during the test, allowing you to inspect its contents.
176 | 
177 | If no `AWS_` environment variables are set, `boto3` will try to load credentials from your default AWS profile.
178 | 
179 | See individual integration test directories for additional requirements and setup instructions.
180 | 
181 | ### Tox
182 | 
183 | This project uses [Tox](https://tox.wiki/en/latest/) to run tests across multiple Python versions.
184 | 
185 | Install Tox with:
186 | 
187 | ```
188 | pip install tox
189 | ```
190 | 
191 | and run it with:
192 | 
193 | ```
194 | tox
195 | ```
196 | 
197 | Note that Tox requires the tested python versions to be installed. One convenient way to manage this is using [pyenv](https://github.com/pyenv/pyenv#installation). See the `.python-versions` file for the Python versions that need to be installed.
198 | 
199 | 
200 | ### Releases
201 | 
202 | Assuming pypi permissions:
203 | 
204 | ```
205 | python -m build
206 | twine upload -r testpypi dist/*
207 | twine upload dist/*
208 | ```


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | name = aws-glue-schema-registry
 3 | version = 1.1.3
 4 | description = Use the AWS Glue Schema Registry.
 5 | long_description = file: README.md
 6 | long_description_content_type = text/markdown
 7 | author = Corentin Debost
 8 | author_email = develop@disasteraware.com
 9 | license = Apache Software License
10 | license_files = LICENSE
11 | classifiers =
12 |     Development Status :: 4 - Beta
13 |     Intended Audience :: Developers
14 |     License :: OSI Approved :: Apache Software License
15 |     Programming Language :: Python :: 3
16 |     Programming Language :: Python :: 3 :: Only
17 |     Programming Language :: Python :: 3.8
18 |     Programming Language :: Python :: 3.9
19 |     Programming Language :: Python :: 3.10
20 |     Programming Language :: Python :: 3.11
21 |     Topic :: Software Development :: Libraries
22 |     Typing :: Typed
23 | keywords = aws, glue, schema, registry, avro
24 | project_urls =
25 |     Source=https://github.com/DisasterAWARE/aws-glue-schema-registry-python
26 | 
27 | [options]
28 | packages =
29 |     aws_schema_registry
30 |     aws_schema_registry.adapter
31 | package_dir =
32 |     =src
33 | python_requires = >=3.8
34 | install_requires =
35 |     boto3>=1.17.102
36 |     typing-extensions>=3.7.4.3;python_version<"3.8"
37 |     fastavro>=1.4.5
38 |     orjson~=3.6.0;python_version<"3.11"
39 |     orjson>=3.7.7;python_version>="3.11"
40 |     fastjsonschema~=2.15
41 | setup_requires =
42 |     setuptools
43 | 
44 | [options.extras_require]
45 | dev =
46 |     pytest>=6
47 |     flake8>=3
48 | kafka-python =
49 |     kafka-python>=2
50 | 
51 | [mypy]
52 | files = src,tests
53 | 
54 | [mypy-kafka.*]
55 | ignore_missing_imports = True
56 | 
57 | [mypy-boto3.*]
58 | ignore_missing_imports = True
59 | 
60 | [mypy-fastjsonschema.*]
61 | ignore_missing_imports = True
62 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import setuptools
2 | 
3 | if __name__ == '__main__':
4 |     setuptools.setup()
5 | 


--------------------------------------------------------------------------------
/src/aws_schema_registry/__init__.py:
--------------------------------------------------------------------------------
 1 | from .client import SchemaRegistryClient
 2 | from .exception import SchemaRegistryException
 3 | from .schema import (
 4 |     CompatibilityMode, DataFormat, Schema, SchemaVersion, ValidationError
 5 | )
 6 | from .serde import DataAndSchema, KafkaDeserializer, KafkaSerializer
 7 | 
 8 | __version__ = '1.0.0'
 9 | 
10 | __all__ = [
11 |     'CompatibilityMode',
12 |     'DataAndSchema',
13 |     'DataFormat',
14 |     'KafkaDeserializer',
15 |     'KafkaSerializer',
16 |     'Schema',
17 |     'SchemaRegistryClient',
18 |     'SchemaRegistryException',
19 |     'SchemaVersion',
20 |     'ValidationError'
21 | ]
22 | 


--------------------------------------------------------------------------------
/src/aws_schema_registry/adapter/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DisasterAWARE/aws-glue-schema-registry-python/e514f86151abc0b9247342ecc9b4a6bea67c3fe4/src/aws_schema_registry/adapter/__init__.py


--------------------------------------------------------------------------------
/src/aws_schema_registry/adapter/kafka.py:
--------------------------------------------------------------------------------
 1 | """Adapter for kafka-python.
 2 | 
 3 | https://pypi.org/project/kafka-python/
 4 | """
 5 | 
 6 | from kafka.serializer import Serializer, Deserializer
 7 | 
 8 | from aws_schema_registry import (
 9 |     KafkaSerializer as _KafkaSerializer,
10 |     KafkaDeserializer as _KafkaDeserializer
11 | )
12 | 
13 | 
14 | class KafkaSerializer(Serializer):
15 |     def __init__(self, *args, **kwargs):
16 |         self._serializer = _KafkaSerializer(*args, **kwargs)
17 | 
18 |     def serialize(self, topic, value):
19 |         return self._serializer.serialize(topic, value)
20 | 
21 | 
22 | class KafkaDeserializer(Deserializer):
23 |     def __init__(self, *args, **kwargs):
24 |         self._deserializer = _KafkaDeserializer(*args, **kwargs)
25 | 
26 |     def deserialize(self, topic, bytes_):
27 |         return self._deserializer.deserialize(topic, bytes_)
28 | 


--------------------------------------------------------------------------------
/src/aws_schema_registry/avro.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from io import BytesIO
 4 | import json
 5 | from typing import Union
 6 | 
 7 | import fastavro
 8 | 
 9 | from aws_schema_registry.schema import DataFormat, Schema, ValidationError
10 | 
11 | 
12 | class AvroSchema(Schema):
13 |     """Implementation of the `Schema` protocol for Avro schemas.
14 | 
15 |     Arguments:
16 |         definition: the schema, either as a parsed dict or a string
17 |         return_record_name: if true, when reading a union of records,
18 |             the result will be a tuple where the first value is the
19 |             name of the record and the second value is the record
20 |             itself
21 |     """
22 | 
23 |     def __init__(self, definition: Union[str, dict],
24 |                  return_record_name: bool = False):
25 |         if isinstance(definition, str):
26 |             self._dict = json.loads(definition)
27 |         else:
28 |             self._dict = definition
29 |         self._parsed = fastavro.parse_schema(self._dict)
30 |         self.return_record_name = return_record_name
31 | 
32 |     def __hash__(self):
33 |         return hash(str(self))
34 | 
35 |     def __eq__(self, other):
36 |         return isinstance(other, AvroSchema) and \
37 |             self._parsed == other._parsed and \
38 |             self.return_record_name == other.return_record_name
39 | 
40 |     def __str__(self):
41 |         return json.dumps(self._dict)
42 | 
43 |     def __repr__(self):
44 |         return '<AvroSchema %s>' % self._dict
45 | 
46 |     @property
47 |     def data_format(self) -> DataFormat:
48 |         return 'AVRO'
49 | 
50 |     @property
51 |     def fqn(self) -> str:
52 |         # https://github.com/fastavro/fastavro/issues/415
53 |         return self._parsed.get('name', self._parsed['type'])
54 | 
55 |     def read(self, bytes_: bytes):
56 |         b = BytesIO(bytes_)
57 |         value = fastavro.schemaless_reader(
58 |             b,
59 |             self._parsed,
60 |             return_record_name=self.return_record_name
61 |         )
62 |         b.close()
63 |         return value
64 | 
65 |     def write(self, data) -> bytes:
66 |         b = BytesIO()
67 |         fastavro.schemaless_writer(b, self._parsed, data)
68 |         value = b.getvalue()
69 |         b.close()
70 |         return value
71 | 
72 |     def validate(self, data):
73 |         try:
74 |             fastavro.validate(data, self._parsed)
75 |         except Exception as e:
76 |             # the message will contain space characters, json.loads + str is a
77 |             # (relatively inefficient) way to remove them
78 |             detail: list[str] = json.loads(str(e))
79 |             raise ValidationError(str(detail)) from e
80 | 


--------------------------------------------------------------------------------
/src/aws_schema_registry/client.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime
  2 | import logging
  3 | import time
  4 | import random
  5 | import string
  6 | from typing import ContextManager, Mapping, Optional
  7 | from uuid import UUID
  8 | 
  9 | from aws_schema_registry.schema import (
 10 |     CompatibilityMode, DataFormat, SchemaVersion
 11 | )
 12 | from aws_schema_registry.exception import SchemaRegistryException
 13 | 
 14 | LOG = logging.getLogger(__name__)
 15 | 
 16 | SCHEMA_VERSION_NOT_FOUND_MSG = 'Schema version is not found.'
 17 | SCHEMA_NOT_FOUND_MSG = 'Schema is not found.'
 18 | 
 19 | DEFAULT_COMPATIBILITY_MODE: CompatibilityMode = 'BACKWARD'
 20 | 
 21 | 
 22 | def schema_name_from_arn(arn: str) -> str:
 23 |     return arn.split('/')[-1]
 24 | 
 25 | 
 26 | class SchemaRegistryClient:
 27 |     """Façade that makes the registry API easier to use.
 28 | 
 29 |     Simplifies the large boto glue client to just operations on a
 30 |     single registry at a time and hides HTTP communication details.
 31 | 
 32 |     Arguments:
 33 |         glue_client: glue client created by `botocore`/`boto3`.
 34 |         registry_name: the name of the registry this client will work
 35 |             against. If not specified, defaults to the default registry
 36 |             which is named 'default-registry'.
 37 |         max_wait_attempts: maximum number of times to check whether a
 38 |             newly created schema has become available before reporting
 39 |             an error.
 40 |         wait_interval_seconds: delay in seconds between checking
 41 |             whether a newly created schema has become available.
 42 |     """
 43 | 
 44 |     def __init__(
 45 |         self,
 46 |         glue_client,
 47 |         registry_name: str = 'default-registry',
 48 |         max_wait_attempts: int = 10,
 49 |         wait_interval_seconds: float = 3
 50 |     ):
 51 |         self.glue_client = glue_client
 52 |         self.registry_name = registry_name
 53 |         self.max_wait_attempts = max_wait_attempts
 54 |         self.wait_interval_seconds = wait_interval_seconds
 55 | 
 56 |     def get_schema_version(self, version_id: UUID) -> SchemaVersion:
 57 |         """Get a schema version from the registry by id.
 58 | 
 59 |         Arguments:
 60 |             version_id: the schema version's unique id.
 61 | 
 62 |         Returns:
 63 |             SchemaVersion
 64 |         """
 65 |         try:
 66 |             res = self.glue_client.get_schema_version(
 67 |                 SchemaVersionId=str(version_id)
 68 |             )
 69 |         except Exception as e:
 70 |             raise SchemaRegistryException(
 71 |                 f'Failed to get schema version by id {version_id}'
 72 |             ) from e
 73 |         if (
 74 |             res['SchemaVersionId'] is None or
 75 |             res['Status'] != 'AVAILABLE'
 76 |         ):
 77 |             raise SchemaRegistryException(
 78 |                 f"Schema Found but status is {res['Status']}"
 79 |             )
 80 |         return SchemaVersion(
 81 |             schema_name=schema_name_from_arn(res['SchemaArn']),
 82 |             version_id=UUID(res['SchemaVersionId']),
 83 |             definition=res['SchemaDefinition'],
 84 |             data_format=res['DataFormat'],
 85 |             status=res['Status'],
 86 |             version_number=res['VersionNumber']
 87 |         )
 88 | 
 89 |     def get_schema_by_definition(
 90 |         self,
 91 |         definition: str,
 92 |         schema_name: str
 93 |     ) -> SchemaVersion:
 94 |         """Get a schema version from the registry by schema definition.
 95 | 
 96 |         Arguments:
 97 |             definition: the stringified schema definition.
 98 |             schema_name: the name of the schema.
 99 | 
100 |         Returns:
101 |             SchemaVersion
102 |         """
103 |         try:
104 |             LOG.debug(
105 |                 'Getting schema version id for: name = %s, definition = %s',
106 |                 schema_name, definition
107 |             )
108 |             res = self.glue_client.get_schema_by_definition(
109 |                 SchemaId={
110 |                     'SchemaName': schema_name,
111 |                     'RegistryName': self.registry_name
112 |                 },
113 |                 SchemaDefinition=definition
114 |             )
115 |             if (
116 |                 res['SchemaVersionId'] is None or
117 |                 res['Status'] != 'AVAILABLE'
118 |             ):
119 |                 raise SchemaRegistryException(
120 |                     f"Schema Found but status is {res['Status']}"
121 |                 )
122 |             return SchemaVersion(
123 |                 schema_name=schema_name_from_arn(res['SchemaArn']),
124 |                 version_id=UUID(res['SchemaVersionId']),
125 |                 definition=definition,
126 |                 data_format=res['DataFormat'],
127 |                 status=res['Status']
128 |             )
129 |         except Exception as e:
130 |             raise SchemaRegistryException(
131 |                 'Failed to get schemaVersionId by schema definition for schema'
132 |                 f' name = {schema_name}'
133 |             ) from e
134 | 
135 |     def get_or_register_schema_version(
136 |         self,
137 |         definition: str,
138 |         schema_name: str,
139 |         data_format: DataFormat,
140 |         compatibility_mode: CompatibilityMode = DEFAULT_COMPATIBILITY_MODE,
141 |         metadata: Optional[Mapping[str, str]] = None
142 |     ) -> SchemaVersion:
143 |         """Get Schema Version ID by following below steps:
144 | 
145 |         1) If schema version id exists in registry then get it from registry
146 |         2) If schema version id does not exist in registry
147 |              then if schema exists but version doesn't exist
148 |                  then
149 |                  2.1) Register schema version
150 |              else if schema does not exist
151 |                  then
152 |                  2.2) create schema and register schema version
153 | 
154 |         Arguments:
155 |             definition: the stringified schema definition.
156 |             schema_name: the name of the schema in the registry.
157 |             data_format: which format to use if creating the schema.
158 |                 Has no effect if the schema by name already exists.
159 |             compatibility_mode: which compatibility mode to use if
160 |                 creating the schema. Has no effect if the schema by
161 |                 name already exists.
162 |             metadata: optional metadata to add to the schema version
163 |                 if registering a new version. Has no effect if a
164 |                 schema version matching the specified definition already
165 |                 exists.
166 |         """
167 |         try:
168 |             version = self.get_schema_by_definition(
169 |                 definition, schema_name
170 |             )
171 |         except SchemaRegistryException as e:
172 |             cause_msg = str(e.__cause__)
173 |             if SCHEMA_VERSION_NOT_FOUND_MSG in cause_msg:
174 |                 LOG.debug(cause_msg)
175 |                 version_id = self.register_schema_version(
176 |                     definition, schema_name, metadata
177 |                 )
178 |             elif SCHEMA_NOT_FOUND_MSG in cause_msg:
179 |                 LOG.debug(cause_msg)
180 |                 version_id = self.create_schema(
181 |                     schema_name, data_format, definition, compatibility_mode,
182 |                     metadata
183 |                 )
184 |             else:
185 |                 raise SchemaRegistryException(
186 |                     'Exception occurred while fetching or registering schema'
187 |                     f' definition = {definition}, schema name = {schema_name}'
188 |                 ) from e
189 |             version = self.get_schema_version(version_id)
190 |         return version
191 | 
192 |     def register_schema_version(
193 |         self,
194 |         definition: str,
195 |         schema_name: str,
196 |         metadata: Optional[Mapping[str, str]] = None
197 |     ) -> UUID:
198 |         """Register a new version to an existing schema.
199 | 
200 |         Waits until the new version becomes available before returning.
201 | 
202 |         Arguments:
203 |             definition: the schema definition.
204 |             schema_name: the name of the schema.
205 |             metadata (optional): version metadata key-value pairs.
206 | 
207 |         Returns:
208 |             UUID: the id of the new schema version
209 |         """
210 |         try:
211 |             res = self.glue_client.register_schema_version(
212 |                 SchemaId={
213 |                     'SchemaName': schema_name,
214 |                     'RegistryName': self.registry_name
215 |                 },
216 |                 SchemaDefinition=definition
217 |             )
218 |             version_id = UUID(res['SchemaVersionId'])
219 |             LOG.info('Registered the schema version with schema version '
220 |                      'id = %s and with version number = %s and status %s',
221 |                      version_id, res['VersionNumber'], res['Status'])
222 |             if res['Status'] != 'AVAILABLE':
223 |                 self._wait_for_schema_evolution_check_to_complete(version_id)
224 |         except Exception as e:
225 |             raise SchemaRegistryException(
226 |                 'Register schema :: Call failed when registering the schema'
227 |                 f' with the schema registry for schema name = {schema_name}',
228 |             ) from e
229 |         if metadata:
230 |             self.put_schema_version_metadata(version_id, metadata)
231 |         return version_id
232 | 
233 |     def _wait_for_schema_evolution_check_to_complete(
234 |         self,
235 |         schema_version_id: UUID
236 |     ):
237 |         time.sleep(self.wait_interval_seconds)
238 |         for _ in range(self.max_wait_attempts):
239 |             res = self.glue_client.get_schema_version(
240 |                 SchemaVersionId=str(schema_version_id)
241 |             )
242 |             status = res['Status']
243 |             if status == 'AVAILABLE':
244 |                 break
245 |             elif status != 'PENDING':
246 |                 raise SchemaRegistryException(
247 |                     'Schema evolution check failed.'
248 |                     f' schemaVersionId {schema_version_id} is in'
249 |                     f' {status} status.'
250 |                 )
251 |         else:
252 |             raise SchemaRegistryException(
253 |                 'Retries exhausted for schema evolution check for '
254 |                 f'schemaVersionId = {schema_version_id}'
255 |             )
256 | 
257 |     def put_schema_version_metadata(
258 |         self,
259 |         version_id: UUID,
260 |         metadata: Mapping[str, str]
261 |     ):
262 |         for k, v in metadata.items():
263 |             try:
264 |                 self.glue_client.put_schema_version_metadata(
265 |                     SchemaVersionId=str(version_id),
266 |                     MetadataKeyValue={
267 |                         'MetadataKey': k,
268 |                         'MetadataValue': v
269 |                     }
270 |                 )
271 |             except Exception as e:
272 |                 raise SchemaRegistryException(
273 |                     'Put schema version metadata :: Call failed when put'
274 |                     f' metadata key = {k} value = {v} to schema for schema'
275 |                     f' versionid = {version_id}'
276 |                 ) from e
277 | 
278 |     def create_schema(
279 |         self,
280 |         name: str,
281 |         data_format: DataFormat,
282 |         definition: str,
283 |         compatibility_mode: CompatibilityMode = DEFAULT_COMPATIBILITY_MODE,
284 |         metadata: Optional[Mapping[str, str]] = None
285 |     ) -> UUID:
286 |         """Create a new schema and return the version id."""
287 |         try:
288 |             LOG.info('Creating schema with name: %s and definition: %s',
289 |                      name, definition)
290 |             res = self.glue_client.create_schema(
291 |                 SchemaName=name,
292 |                 RegistryId={
293 |                     'RegistryName': self.registry_name
294 |                 },
295 |                 DataFormat=data_format,
296 |                 Compatibility=compatibility_mode,
297 |                 Description='',
298 |                 Tags={},
299 |                 SchemaDefinition=definition
300 |             )
301 |             version_id = UUID(res['SchemaVersionId'])
302 |             if metadata:
303 |                 self.put_schema_version_metadata(version_id, metadata)
304 |         except Exception as e:
305 |             if type(e).__name__ == 'AlreadyExistsException':
306 |                 LOG.warn('Schema is already created, this could be caused by '
307 |                          'multiple producers racing to auto-create schema.')
308 |                 version_id = self.register_schema_version(
309 |                     definition, name, metadata
310 |                 )
311 |             else:
312 |                 raise SchemaRegistryException(
313 |                     f'Create schema {name} failed'
314 |                 ) from e
315 |         return version_id
316 | 
317 | 
318 | class TemporaryRegistry(ContextManager):
319 |     """A real schema registry for use in tests and experiments.
320 | 
321 |     This class implements the ContextManager protocol, creating the registry
322 |     on enter and destroying it on exit.
323 | 
324 |     Usage:
325 | 
326 | ```python
327 |     with TemporaryRegistry(glue_client, 'MyRegistry') as r:
328 |         # registry is created on enter
329 |         print(r.name)  # the "real" (suffixed) registry name
330 |         # registry is destroyed on exit
331 | ```
332 | 
333 |     Arguments:
334 |         glue_client: glue client created by `botocore`/`boto3`.
335 |         name: human-readable name for the created registry. The name will be
336 |             suffixed by a random identifier to reduce the freqency of
337 |             collisions.
338 |         description: description for the created registry.
339 |         autoremove: whether to destroy the created registry. Defaults to True.
340 |     """
341 | 
342 |     DEFAULT_DESCRIPTION = 'Temporary registry created with the aws-glue-schema-registry Python library.'  # NOQA
343 | 
344 |     def __init__(self, glue_client,
345 |                  name: str = 'temporary-registry',
346 |                  description: str = DEFAULT_DESCRIPTION,
347 |                  autoremove: bool = True):
348 |         self.glue_client = glue_client
349 |         date = datetime.utcnow().strftime('%y-%m-%d-%H-%M')
350 |         r = ''.join(random.choices(string.digits + string.ascii_letters,
351 |                                    k=16))
352 |         self.name = f'{name}-{date}-{r}'
353 |         self.description = description
354 |         self.autoremove = autoremove
355 | 
356 |     def __enter__(self):
357 |         LOG.info('creating registry %s...' % self.name)
358 |         self.glue_client.create_registry(
359 |             RegistryName=self.name,
360 |             Description=self.description
361 |         )
362 |         return self
363 | 
364 |     def __exit__(self, *args):
365 |         if self.autoremove:
366 |             LOG.info('deleting registry %s...' % self.name)
367 |             self.glue_client.delete_registry(
368 |                 RegistryId={'RegistryName': self.name}
369 |             )
370 | 


--------------------------------------------------------------------------------
/src/aws_schema_registry/codec.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | """Functions to encode and decode values with schema information.
  4 | 
  5 | In order to benefit from using a schema registry with Kafka, key and
  6 | value data must be encoded with additional bits of information, which
  7 | effectively renders the raw bytes human unreadable.
  8 | 
  9 | The encoded data consists of the following components:
 10 | 
 11 |     byte | value
 12 |     ------------
 13 |     0    | magic byte that signifies whether the data was written with
 14 |          | a compatible client
 15 |     1    | which algorithm was used to compress the data bytes
 16 |     2-17 | uuid that indicates the writer schema
 17 |     18+  | actual data bytes, possibly compressed according to the
 18 |          | compression byte
 19 | 
 20 | This encoding is based on the Java Glue Schema Registry client
 21 | (https://github.com/awslabs/aws-glue-schema-registry) in an effort to
 22 | maintain full compatibility.
 23 | """
 24 | 
 25 | from io import BytesIO
 26 | from uuid import UUID
 27 | import zlib
 28 | 
 29 | VERSION_BYTE = b'\x03'
 30 | """Expected value of the magic version byte.
 31 | 
 32 | If the leading byte of the encoded data has a different value,
 33 | that signifies one of the following:
 34 | 
 35 |     1. The data was encoded by a different version of the encoder
 36 |     2. The data was encoded by a different library (e.g. the Java library)
 37 |        that is no longer compatible with this library
 38 |     3. The data was encoded for another schema registry
 39 |        (e.g. Confluent Schema Registry)
 40 |     4. The data was written by a schema-less producer
 41 | """
 42 | 
 43 | COMPRESSION_ENABLED_BYTE = b'\x05'
 44 | """Compression byte when using ZLIB compression."""
 45 | 
 46 | COMPRESSION_DISABLED_BYTE = b'\x00'
 47 | """Compression byte when compression is disabled."""
 48 | 
 49 | SCHEMA_VERSION_ID_SIZE = 16
 50 | """Number of bytes reserved for the schema version uuid."""
 51 | 
 52 | 
 53 | class CodecException(Exception):
 54 |     """Raised when encoding or decoding fails."""
 55 | 
 56 | 
 57 | class UnknownEncodingException(CodecException):
 58 |     """Raised when decoding data with an unknown encoding."""
 59 | 
 60 | 
 61 | def encode(data: bytes,
 62 |            schema_version_id: UUID,
 63 |            compression=None) -> bytes:
 64 |     """Encode data and schema information into bytes.
 65 | 
 66 |     Arguments:
 67 |         data (bytes): the payload itself.
 68 |         schema_version_id (UUID): version id of the schema used to
 69 |             serialize the data.
 70 |         compression (Any): whether to compress the payload data.
 71 |             Any truthy value can be passed to enable compression.
 72 | 
 73 |             Currently only ZLIB compression is supported. In future
 74 |             versions this parameter may take specific values to
 75 |             differentiate between different compression algorithms.
 76 | 
 77 |     Returns:
 78 |         bytes
 79 |     """
 80 |     b = BytesIO()
 81 |     if compression:
 82 |         compression_byte = COMPRESSION_ENABLED_BYTE
 83 |         data = zlib.compress(data)
 84 |     else:
 85 |         compression_byte = COMPRESSION_DISABLED_BYTE
 86 |     b.write(VERSION_BYTE)
 87 |     b.write(compression_byte)
 88 |     b.write(schema_version_id.bytes)
 89 |     b.write(data)
 90 |     value = b.getvalue()
 91 |     b.close()
 92 |     return value
 93 | 
 94 | 
 95 | def decode(bytes_: bytes) -> tuple[bytes, UUID]:
 96 |     """Decode bytes into data and schema information.
 97 | 
 98 |     Arguments:
 99 |         bytes_ (bytes): encoded bytes.
100 | 
101 |     Returns:
102 |         tuple[bytes, UUID]: a two-item tuple consisting of the decoded
103 |             and decompressed data, and the schema version id
104 | 
105 |     Raises:
106 |         UnknownEncodingException: if the leading byte of the encoded
107 |             data is not recognized, implying the data was encoded with
108 |             an incompatible client or for a different schema registry
109 |         CodecException: if any other error occurs while decoding
110 |     """
111 |     b = BytesIO(bytes_)
112 |     version = b.read(1)
113 |     if version != VERSION_BYTE:
114 |         raise UnknownEncodingException(
115 |             r"leading byte {version!r} not recognized"
116 |         )
117 |     compression = b.read(1)
118 |     schema_version = UUID(bytes=b.read(SCHEMA_VERSION_ID_SIZE))
119 |     data = b.read()
120 |     if compression == COMPRESSION_ENABLED_BYTE:
121 |         data = zlib.decompress(data)
122 |     elif compression != COMPRESSION_DISABLED_BYTE:
123 |         raise CodecException(
124 |             f'compression byte {compression!r} not recognized'
125 |         )
126 |     return data, schema_version
127 | 


--------------------------------------------------------------------------------
/src/aws_schema_registry/exception.py:
--------------------------------------------------------------------------------
1 | class SchemaRegistryException(Exception):
2 |     pass
3 | 


--------------------------------------------------------------------------------
/src/aws_schema_registry/jsonschema.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import Union
 4 | 
 5 | import orjson
 6 | 
 7 | import fastjsonschema
 8 | 
 9 | from aws_schema_registry.schema import DataFormat, Schema, ValidationError
10 | 
11 | 
12 | class JsonSchema(Schema):
13 |     """Implementation of the `Schema` protocol for JSON schemas.
14 | 
15 |     Arguments:
16 |         definition: the schema, either as a parsed dict or a string
17 |     """
18 | 
19 |     def __init__(self, definition: Union[str, dict]):
20 |         if isinstance(definition, str):
21 |             self._dict = orjson.loads(definition)
22 |         else:
23 |             self._dict = definition
24 |         self._compiled_validation_method = fastjsonschema.compile(self._dict)
25 | 
26 |     def __hash__(self):
27 |         return hash(str(self))
28 | 
29 |     def __eq__(self, other):
30 |         return isinstance(other, JsonSchema) and \
31 |                self._dict == other._dict
32 | 
33 |     def __str__(self):
34 |         return orjson.dumps(self._dict).decode()
35 | 
36 |     def __repr__(self):
37 |         return '<JsonSchema %s>' % self._dict
38 | 
39 |     @property
40 |     def data_format(self) -> DataFormat:
41 |         return 'JSON'
42 | 
43 |     @property
44 |     def fqn(self) -> str:
45 |         return ""
46 | 
47 |     def read(self, bytes_: bytes):
48 |         data = orjson.loads(bytes_)
49 |         self.validate(data)
50 |         return data
51 | 
52 |     def write(self, data) -> bytes:
53 |         self.validate(data)
54 |         return orjson.dumps(data)
55 | 
56 |     def validate(self, data):
57 |         try:
58 |             self._compiled_validation_method(data)
59 |         except fastjsonschema.exceptions.JsonSchemaValueException as e:
60 |             raise ValidationError(str(e)) from e
61 | 


--------------------------------------------------------------------------------
/src/aws_schema_registry/naming.py:
--------------------------------------------------------------------------------
 1 | """Strategies for choosing schema names in the registry.
 2 | 
 3 | The following strategies are included in this module:
 4 |     * topic_name_strategy (default)
 5 |     * record_name_strategy
 6 |     * topic_record_name_strategy
 7 | 
 8 | Use `help(<strategy>)` to see more information on each strategy.
 9 | 
10 | Implement custom strategies by defining a function that fulfills the
11 | SchemaNameStrategy:
12 | 
13 | ```python
14 | from aws_schema_registry import Schema
15 | 
16 | def custom_strategy(topic: str, is_key: bool, schema: Schema) -> str:
17 |     ...
18 | ```
19 | """
20 | 
21 | import sys
22 | 
23 | if sys.version_info[1] < 8:  # py37
24 |     from typing_extensions import Protocol
25 | else:
26 |     from typing import Protocol
27 | 
28 | from aws_schema_registry.schema import Schema
29 | 
30 | 
31 | class SchemaNamingStrategy(Protocol):
32 |     """Controls how schema names are chosen for a value.
33 | 
34 |     Arguments:
35 |         topic: the name of the topic the value is being written into.
36 |         value: the _unserialized_ value being written.
37 |         is_key: whether the value is a Kafka key or value.
38 | 
39 |     Returns:
40 |         str: the schema name to use
41 |     """
42 |     def __call__(self, topic: str, is_key: bool, schema: Schema) -> str: ...
43 | 
44 | 
45 | def topic_name_strategy(topic: str, is_key: bool, schema: Schema) -> str:
46 |     """The default naming strategy.
47 | 
48 |     Message keys are `<topic>-key` and message values are
49 |     `<topic>-value`.
50 | 
51 |     This is a sensible strategy for topics whose records follow a uniform
52 |     schema, but does not allow mixing different schemas on the same topic.
53 |     """
54 |     return f"{topic}-{'key' if is_key else 'value'}"
55 | 
56 | 
57 | def record_name_strategy(topic: str, is_key: bool, schema: Schema) -> str:
58 |     """Uses the fully-qualified record name as the schema name.
59 | 
60 |     Allows a topic to contain records with multiple incompatible schemas.
61 |     However, this requires that the fully-qualified record names uniquely
62 |     and consistently identify a schema across the entire registry.
63 |     """
64 |     return schema.fqn
65 | 
66 | 
67 | def topic_record_name_strategy(topic: str, is_key: bool,
68 |                                schema: Schema) -> str:
69 |     """Combines the topic and record name to form the schema name.
70 | 
71 |     Allows a topic to contain records with multiple incompatible schemas.
72 |     Additionally allows different topics to use the same record name for
73 |     incompatible schemas.
74 |     """
75 |     return f'{topic}-{schema.fqn}'
76 | 


--------------------------------------------------------------------------------
/src/aws_schema_registry/schema.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from abc import ABC, abstractmethod
 4 | from dataclasses import dataclass
 5 | import sys
 6 | from typing import Any, Optional, Hashable
 7 | from uuid import UUID
 8 | 
 9 | if sys.version_info[1] < 8:  # for py37
10 |     from typing_extensions import Literal
11 | else:
12 |     from typing import Literal
13 | 
14 | DataFormat = Literal['AVRO', 'JSON']
15 | 
16 | CompatibilityMode = Literal['NONE', 'DISABLED', 'BACKWARD', 'BACKWARD_ALL',
17 |                             'FORWARD', 'FORWARD_ALL', 'FULL', 'FULL_ALL']
18 | """Controls the checks performed on new schema versions.
19 | 
20 | Values:
21 |     NONE: no compatibility checks performed
22 |     DISABLED: no new versions can be added to the schema
23 |     BACKWARD: consumer can read both current and previous version
24 |     BACKWARD_ALL: consumer can read current and all previous
25 |         versions
26 |     FORWARD: consumer can read both current and subsequent version
27 |     FORWARD_ALL: consumer can read both current and all subsequent
28 |         versions
29 |     FULL: combination of 'BACKWARD' and 'FORWARD'
30 |     FULL_ALL: combination of 'BACKWARD_ALL' and 'FORWARD_ALL'
31 | """
32 | 
33 | SchemaStatus = Literal['AVAILABLE', 'PENDING', 'DELETING']
34 | SchemaVersionStatus = Literal['AVAILABLE', 'PENDING', 'FAILURE', 'DELETING']
35 | 
36 | 
37 | class Schema(ABC, Hashable):
38 |     """Abstract base class for a schema implementation."""
39 | 
40 |     @property
41 |     @abstractmethod
42 |     def data_format(self) -> DataFormat:
43 |         """The data format of this schema."""
44 | 
45 |     @property
46 |     @abstractmethod
47 |     def fqn(self) -> str:
48 |         """The fully-qualified name of this schema."""
49 | 
50 |     @abstractmethod
51 |     def read(self, bytes_: bytes) -> Any:
52 |         """Read bytes into a record."""
53 | 
54 |     @abstractmethod
55 |     def write(self, data) -> bytes:
56 |         """Write a record into bytes."""
57 | 
58 |     def validate(self, data) -> None:
59 |         """Raise a ValidationException if the data is invalid."""
60 | 
61 | 
62 | class ValidationError(Exception):
63 |     """Raised when a schema's `validate` is called on invalid data.
64 | 
65 |     The error need not contain *every* validation error, just the first that
66 |     classifies the data as invalid.
67 |     """
68 |     pass
69 | 
70 | 
71 | @dataclass
72 | class SchemaVersion:
73 |     schema_name: str
74 |     version_id: UUID
75 |     definition: str
76 |     data_format: DataFormat
77 |     status: SchemaVersionStatus
78 |     version_number: Optional[int] = None
79 | 
80 |     def __hash__(self):
81 |         return hash(self.definition)
82 | 


--------------------------------------------------------------------------------
/src/aws_schema_registry/serde.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import functools
  4 | import logging
  5 | from typing import Any, NamedTuple
  6 | from uuid import UUID
  7 | 
  8 | from aws_schema_registry.avro import AvroSchema
  9 | from aws_schema_registry.jsonschema import JsonSchema
 10 | from aws_schema_registry.client import SchemaRegistryClient
 11 | from aws_schema_registry.codec import decode, encode, UnknownEncodingException
 12 | from aws_schema_registry.exception import SchemaRegistryException
 13 | from aws_schema_registry.naming import (
 14 |     SchemaNamingStrategy, topic_name_strategy
 15 | )
 16 | from aws_schema_registry.schema import CompatibilityMode, Schema, SchemaVersion
 17 | 
 18 | LOG = logging.getLogger(__name__)
 19 | 
 20 | 
 21 | class DataAndSchema(NamedTuple):
 22 |     """Data and its schema.
 23 | 
 24 |     Can be used to wrap the data and schema together before calling the
 25 |     producer's producing methods.
 26 |     """
 27 |     data: Any
 28 |     schema: Schema
 29 | 
 30 | 
 31 | class KafkaSerializer:
 32 |     """Kafka serializer that uses the AWS Schema Registry.
 33 | 
 34 |     Arguments:
 35 |         client: instance of SchemaRegistryClient
 36 |         is_key (optional): whether the serializer is serializing keys as
 37 |             opposed to values. Defaults to false. Setting this to the
 38 |             appropriate value is important to avoid mixing key and value
 39 |             schemas if using the default schema name strategy.
 40 |         compatibility_mode (optional): the compatibility mode t use if
 41 |             creating a new schema in the registry. Defaults to the
 42 |             registry's default compatibility setting if not specified.
 43 |         schema_naming_strategy (optional): how to choose the schema name
 44 |             when creating new schemas. Defaults to the topic name
 45 |             strategy. See the `naming` module for more information and
 46 |             alternate strategies.
 47 |     """
 48 | 
 49 |     def __init__(
 50 |         self,
 51 |         client: SchemaRegistryClient,
 52 |         is_key: bool = False,
 53 |         compatibility_mode: CompatibilityMode = 'BACKWARD',
 54 |         schema_naming_strategy: SchemaNamingStrategy = topic_name_strategy
 55 |     ):
 56 |         self.client = client
 57 |         self.is_key = is_key
 58 |         self.compatibility_mode: CompatibilityMode = compatibility_mode
 59 |         self.schema_naming_strategy = schema_naming_strategy
 60 | 
 61 |     def serialize(self, topic: str, data_and_schema: DataAndSchema):
 62 |         if data_and_schema is None:
 63 |             return None
 64 |         if not isinstance(data_and_schema, tuple):
 65 |             raise TypeError('KafkaSerializer can only serialize',
 66 |                             f' {tuple}, got {type(data_and_schema)}')
 67 |         data, schema = data_and_schema
 68 |         schema_version = self._get_schema_version(topic, schema)
 69 |         serialized = schema.write(data)
 70 |         return encode(serialized, schema_version.version_id)
 71 | 
 72 |     @functools.lru_cache(maxsize=None)
 73 |     def _get_schema_version(self, topic: str, schema: Schema) -> SchemaVersion:
 74 |         schema_name = self.schema_naming_strategy(topic, self.is_key, schema)
 75 |         LOG.info('Fetching schema %s...', schema_name)
 76 |         return self.client.get_or_register_schema_version(
 77 |             definition=str(schema),
 78 |             schema_name=schema_name,
 79 |             data_format=schema.data_format,
 80 |             compatibility_mode=self.compatibility_mode
 81 |         )
 82 | 
 83 | 
 84 | class KafkaDeserializer:
 85 |     """Kafka serializer that uses the AWS Schema Registry.
 86 | 
 87 |     Arguments:
 88 |         client: instance of SchemaRegistryClient.
 89 |         return_record_name: if true, when reading a union of records,
 90 |             the result will be a tuple where the first value is the
 91 |             name of the record and the second value is the record
 92 |             itself
 93 |         secondary_deserializer: optional deserializer to pass through
 94 |             to when processing values with an unrecognized encoding.
 95 |             This is primarily use to migrate from other schema
 96 |             registries or handle schema-less data. The secondary deserializer
 97 |             should either be a callable taking the same arguments as
 98 |             deserialize or an object with a matching deserialize method.
 99 |     """
100 | 
101 |     def __init__(
102 |         self,
103 |         client: SchemaRegistryClient,
104 |         return_record_name: bool = False,
105 |         secondary_deserializer=None
106 |     ):
107 |         self.client = client
108 |         self.return_record_name = return_record_name
109 |         self.secondary_deserializer = secondary_deserializer
110 | 
111 |     def deserialize(self, topic: str, bytes_: bytes):
112 |         if bytes_ is None:
113 |             return None
114 |         try:
115 |             data_bytes, schema_version_id = decode(bytes_)
116 |         except UnknownEncodingException as e:
117 |             if self.secondary_deserializer:
118 |                 if callable(self.secondary_deserializer):
119 |                     return self.secondary_deserializer(topic, bytes_)
120 |                 return self.secondary_deserializer.deserialize(topic, bytes_)
121 |             else:
122 |                 raise SchemaRegistryException(
123 |                     'no secondary deserializer provided to handle'
124 |                     ' unrecognized data encoding'
125 |                 ) from e
126 |         writer_schema_version = self._get_schema_version(schema_version_id)
127 |         writer_schema = self._schema_for_version(writer_schema_version)
128 |         return DataAndSchema(writer_schema.read(data_bytes), writer_schema)
129 | 
130 |     @functools.lru_cache(maxsize=None)
131 |     def _get_schema_version(self, version_id: UUID):
132 |         LOG.info('Fetching schema version %s...', version_id)
133 |         return self.client.get_schema_version(version_id)
134 | 
135 |     @functools.lru_cache(maxsize=None)
136 |     def _schema_for_version(self, version: SchemaVersion) -> Schema:
137 |         if version.data_format == 'AVRO':
138 |             return AvroSchema(version.definition)
139 |         elif version.data_format == 'JSON':
140 |             return JsonSchema(version.definition)
141 | 


--------------------------------------------------------------------------------
/tests/integration/conftest.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | 
 4 | import boto3
 5 | import pytest
 6 | 
 7 | from aws_schema_registry.client import TemporaryRegistry
 8 | 
 9 | LOG = logging.getLogger(__name__)
10 | 
11 | AWS_ACCESS_KEY_ID = os.getenv('AWS_ACCESS_KEY_ID')
12 | AWS_SECRET_ACCESS_KEY = os.getenv('AWS_SECRET_ACCESS_KEY')
13 | AWS_SESSION_TOKEN = os.getenv('AWS_SESSION_TOKEN')
14 | AWS_REGION = os.getenv('AWS_REGION')
15 | AWS_PROFILE = os.getenv('AWS_PROFILE')
16 | 
17 | CLEANUP_REGISTRY = os.getenv('CLEANUP_REGISTRY') is None
18 | 
19 | 
20 | @pytest.fixture(scope='session')
21 | def boto_session():
22 |     return boto3.Session(
23 |         aws_access_key_id=AWS_ACCESS_KEY_ID,
24 |         aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
25 |         aws_session_token=AWS_SESSION_TOKEN,
26 |         region_name=AWS_REGION,
27 |         profile_name=AWS_PROFILE
28 |     )
29 | 
30 | 
31 | @pytest.fixture(scope='session')
32 | def glue_client(boto_session):
33 |     return boto_session.client('glue')
34 | 
35 | 
36 | @pytest.fixture(scope='session')
37 | def registry(glue_client):
38 |     """The AWS Glue registry to use for testing."""
39 |     with TemporaryRegistry(
40 |         glue_client,
41 |         name='schema-registry-tests',
42 |         description='Registry used for the schema registry python integration'
43 |         ' tests. This registry does not hold any valuable data and is safe to'
44 |         ' delete as long as it is not currently in use by a test',
45 |         autoremove=CLEANUP_REGISTRY
46 |     ) as registry:
47 |         yield registry.name
48 | 


--------------------------------------------------------------------------------
/tests/integration/java/README.md:
--------------------------------------------------------------------------------
1 | # Java integration tests
2 | 
3 | Tests that this libary is compatible with the [Java version](https://github.com/awslabs/aws-glue-schema-registry).
4 | 
5 | Requires Java v11+ and Maven to build the Java test project.
6 | 


--------------------------------------------------------------------------------
/tests/integration/java/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | 
 3 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |   <modelVersion>4.0.0</modelVersion>
 6 | 
 7 |   <groupId>com.disasteraware.aws.schemaregistry</groupId>
 8 |   <artifactId>java-integration-test</artifactId>
 9 |   <version>develop-SNAPSHOT</version>
10 |   <packaging>jar</packaging>
11 | 
12 |   <name>java-integration-test</name>
13 | 
14 |   <properties>
15 |     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
16 |     <maven.compiler.source>11</maven.compiler.source>
17 |     <maven.compiler.target>11</maven.compiler.target>
18 |   </properties>
19 | 
20 |   <dependencies>
21 |     <dependency>
22 |       <groupId>software.amazon.glue</groupId>
23 |       <artifactId>schema-registry-serde</artifactId>
24 |       <version>1.1.4</version>
25 |     </dependency>
26 |       <dependency>
27 |         <groupId>org.apache.avro</groupId>
28 |         <artifactId>avro</artifactId>
29 |         <version>1.10.2</version>
30 |       </dependency>
31 |   </dependencies>
32 | 
33 |   <build>
34 |     <plugins>
35 |       <plugin>
36 |         <groupId>org.apache.maven.plugins</groupId>
37 |         <artifactId>maven-shade-plugin</artifactId>
38 |         <version>2.3</version>
39 |         <configuration>
40 |           <finalName>${project.artifactId}</finalName>
41 |           <createDependencyReducedPom>false</createDependencyReducedPom>
42 |           <transformers>
43 |             <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
44 |               <manifestEntries>
45 |                 <Main-Class>com.disasteraware.aws.schemaregistry.App</Main-Class>
46 |               </manifestEntries>
47 |             </transformer>
48 |           </transformers>
49 |         </configuration>
50 |         <executions>
51 |           <execution>
52 |             <phase>package</phase>
53 |             <goals>
54 |               <goal>shade</goal>
55 |             </goals>
56 |           </execution>
57 |         </executions>
58 |       </plugin>
59 |     </plugins>
60 |   </build>
61 | </project>
62 | 


--------------------------------------------------------------------------------
/tests/integration/java/src/main/java/com/disasteraware/aws/schemaregistry/App.java:
--------------------------------------------------------------------------------
 1 | package com.disasteraware.aws.schemaregistry;
 2 | 
 3 | import java.io.IOException;
 4 | import java.util.HashMap;
 5 | import java.util.Map;
 6 | import java.util.Objects;
 7 | 
 8 | import com.amazonaws.services.schemaregistry.deserializers.GlueSchemaRegistryKafkaDeserializer;
 9 | import com.amazonaws.services.schemaregistry.serializers.GlueSchemaRegistryKafkaSerializer;
10 | import com.amazonaws.services.schemaregistry.utils.AWSSchemaRegistryConstants;
11 | import com.amazonaws.services.schemaregistry.utils.AvroRecordType;
12 | 
13 | import org.apache.avro.Schema;
14 | import org.apache.avro.generic.GenericData;
15 | import org.apache.avro.generic.GenericRecord;
16 | import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider;
17 | import software.amazon.awssdk.auth.credentials.ProfileCredentialsProvider;
18 | import software.amazon.awssdk.services.glue.model.DataFormat;
19 | 
20 | public class App {
21 |     static AwsCredentialsProvider credentialsProvider = ProfileCredentialsProvider.builder()
22 |             .profileName(System.getenv("AWS_PROFILE"))
23 |             .build();
24 | 
25 |     static Map<String, Object> configs = new HashMap<>();
26 | 
27 |     public static void main(String[] args) {
28 |         String dataFormat = Objects.requireNonNull(System.getenv("DATA_FORMAT"));
29 |         configs.put(AWSSchemaRegistryConstants.AWS_REGION, Objects.requireNonNull(System.getenv("AWS_REGION")));
30 |         configs.put(AWSSchemaRegistryConstants.REGISTRY_NAME, Objects.requireNonNull(System.getenv("REGISTRY_NAME")));
31 |         configs.put(AWSSchemaRegistryConstants.SCHEMA_NAME, Objects.requireNonNull(System.getenv("SCHEMA_NAME")));
32 |         configs.put(AWSSchemaRegistryConstants.SCHEMA_AUTO_REGISTRATION_SETTING, true);
33 | 
34 |         if (dataFormat.equals("AVRO")) {
35 |             configs.put(AWSSchemaRegistryConstants.DATA_FORMAT, DataFormat.AVRO.name());
36 |             configs.put(AWSSchemaRegistryConstants.AVRO_RECORD_TYPE, AvroRecordType.GENERIC_RECORD.getName());
37 |             try {
38 |                 byte[] bytes;
39 |                 GenericRecord record;
40 |                 Schema schema;
41 | 
42 |                 bytes = System.in.readAllBytes();
43 | 
44 |                 GlueSchemaRegistryKafkaDeserializer deserializer = new GlueSchemaRegistryKafkaDeserializer(configs);
45 |                 record = (GenericRecord) deserializer.deserialize("test", bytes);
46 |                 schema = record.getSchema();
47 | 
48 |                 GlueSchemaRegistryKafkaSerializer serializer = new GlueSchemaRegistryKafkaSerializer(configs);
49 |                 bytes = serializer.serialize("test", record);
50 | 
51 |                 System.out.write(bytes, 0, bytes.length);
52 |             } catch (IOException e) {
53 |                 e.printStackTrace();
54 |                 System.exit(1);
55 |             }
56 |         } else if (dataFormat.equals("JSON")) {
57 |             configs.put(AWSSchemaRegistryConstants.DATA_FORMAT, DataFormat.JSON.name());
58 |                 try {
59 |                     byte[] bytes;
60 |                     Object record;
61 |                     Schema schema;
62 | 
63 |                     bytes = System.in.readAllBytes();
64 | 
65 |                     GlueSchemaRegistryKafkaDeserializer deserializer = new GlueSchemaRegistryKafkaDeserializer(configs);
66 |                     record = deserializer.deserialize("test", bytes);
67 | 
68 |                     GlueSchemaRegistryKafkaSerializer serializer = new GlueSchemaRegistryKafkaSerializer(configs);
69 |                     bytes = serializer.serialize("test", record);
70 | 
71 |                     System.out.write(bytes, 0, bytes.length);
72 |                 } catch (IOException e) {
73 |                     e.printStackTrace();
74 |                     System.exit(1);
75 |                 }
76 |         } else {
77 |             System.out.println("Only JSON or AVRO are acceptable data formats");
78 |             System.exit(1);
79 |         }
80 |     }
81 | }
82 | 


--------------------------------------------------------------------------------
/tests/integration/java/test_java_integration.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | import subprocess
 4 | 
 5 | import pytest
 6 | 
 7 | from aws_schema_registry import DataAndSchema, SchemaRegistryClient
 8 | from aws_schema_registry.avro import AvroSchema
 9 | from aws_schema_registry.jsonschema import JsonSchema
10 | from aws_schema_registry.adapter.kafka import (
11 |     KafkaDeserializer, KafkaSerializer
12 | )
13 | 
14 | LOG = logging.getLogger(__name__)
15 | 
16 | JAVA_CODE_LOCATION = os.path.dirname(__file__)
17 | JAR_LOCATION = os.path.join(
18 |     JAVA_CODE_LOCATION,
19 |     'target',
20 |     'java-integration-test.jar'
21 | )
22 | 
23 | with open(os.path.join(os.path.dirname(__file__), 'user.avsc'), 'r') as f:
24 |     SCHEMA = AvroSchema(f.read())
25 | 
26 | with open(os.path.join(os.path.dirname(__file__), 'user.json'), 'r') as f:
27 |     JSON_SCHEMA = JsonSchema(f.read())
28 | 
29 | 
30 | def _topic_name_schema_type_name_strategy(topic, is_key, schema):
31 |     return f"{topic}-{'key' if is_key else 'value'}-{schema.data_format}"
32 | 
33 | 
34 | @pytest.mark.parametrize("schema", [SCHEMA, JSON_SCHEMA])
35 | def test_interop_with_java_library(glue_client, registry,
36 |                                    boto_session, schema):
37 |     client = SchemaRegistryClient(glue_client, registry_name=registry)
38 |     serializer = KafkaSerializer(
39 |         client,
40 |         schema_naming_strategy=_topic_name_schema_type_name_strategy)
41 |     deserializer = KafkaDeserializer(client)
42 | 
43 |     data = {
44 |         'name': 'John Doe',
45 |         'favorite_number': 6,
46 |         'favorite_color': 'red'
47 |     }
48 |     serialized: bytes = serializer.serialize(
49 |         'test', DataAndSchema(data, schema)
50 |     )
51 | 
52 |     if not os.path.exists(JAR_LOCATION):
53 |         LOG.info('Test java jar not found at %s, trying to compile...',
54 |                  JAR_LOCATION)
55 |         compile_java()
56 |     credentials = boto_session.get_credentials()
57 |     proc = subprocess.run(
58 |         ['java', '-jar', JAR_LOCATION],
59 |         input=serialized,
60 |         capture_output=True,
61 |         env={
62 |             'DATA_FORMAT': schema.data_format,
63 |             'AWS_ACCESS_KEY_ID': credentials.access_key,
64 |             'AWS_SECRET_ACCESS_KEY': credentials.secret_key,
65 |             'AWS_SESSION_TOKEN': credentials.token,
66 |             'AWS_REGION': boto_session.region_name,
67 |             'REGISTRY_NAME': registry,
68 |             'SCHEMA_NAME': _topic_name_schema_type_name_strategy(
69 |                 "test", False, schema)
70 |         }
71 |     )
72 |     print(proc.stderr)
73 |     proc.check_returncode()
74 |     deserialized = deserializer.deserialize('test', proc.stdout)
75 |     assert deserialized
76 |     assert deserialized.data == data
77 |     assert deserialized.schema == schema
78 | 
79 | 
80 | def compile_java():
81 |     LOG.info('Finding mvn...')
82 |     find_mvn_proc = subprocess.run(['which', 'mvn'], capture_output=True)
83 |     if find_mvn_proc.returncode != 0:
84 |         raise Exception('Cannot find an installation of maven to compile the'
85 |                         ' java test code. Compile manually or install mvn.')
86 |     mvn = find_mvn_proc.stdout.decode('utf-8').strip()
87 |     LOG.info('mvn found at %s', mvn)
88 |     LOG.info('compiling...')
89 |     compile_proc = subprocess.run(
90 |         [mvn, 'clean', 'package'],
91 |         cwd=JAVA_CODE_LOCATION
92 |     )
93 |     compile_proc.check_returncode()
94 | 


--------------------------------------------------------------------------------
/tests/integration/java/user.avsc:
--------------------------------------------------------------------------------
 1 | {
 2 |   "namespace": "aws_schema_registry.integrationtests",
 3 |   "type": "record",
 4 |   "name": "User",
 5 |   "fields": [
 6 |       {"name": "name", "type": "string" },
 7 |       {"name": "favorite_number",  "type": ["int", "null"] },
 8 |       {"name": "favorite_color", "type": ["string", "null"] }
 9 |   ]
10 | }
11 | 


--------------------------------------------------------------------------------
/tests/integration/java/user.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "http://json-schema.org/draft-04/schema#",
 3 |   "type": "object",
 4 |   "properties": {
 5 |     "name": {
 6 |       "type": "string"
 7 |     },
 8 |     "favorite_number": {
 9 |       "type": "integer"
10 |     },
11 |     "favorite_color": {
12 |       "type": "string"
13 |     }
14 |   },
15 |   "required": [
16 |     "name",
17 |     "favorite_number",
18 |     "favorite_color"
19 |   ]
20 | }
21 | 


--------------------------------------------------------------------------------
/tests/integration/kafka_test/README.md:
--------------------------------------------------------------------------------
1 | # Kafka integration tests
2 | 
3 | Tests that the serializer and deserializer works correctly with a real Kafka cluster.
4 | 
5 | Requires [Docker](https://www.docker.com/). Tested with Docker v20.
6 | 
7 | Run `docker compose -f tests/integration/kafka_test/docker-compose.yml up -d` before running these tests. Destroy the docker stack with `docker compose -f tests/integration/kafka_test/docker-compose.yml down`.
8 | 


--------------------------------------------------------------------------------
/tests/integration/kafka_test/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '2'
 2 | 
 3 | services:
 4 |   zookeeper:
 5 |     image: 'public.ecr.aws/bitnami/zookeeper:latest'
 6 |     ports:
 7 |       - '2181:2182'
 8 |     environment:
 9 |       - ALLOW_ANONYMOUS_LOGIN=yes
10 | 
11 |   kafka:
12 |     image: 'public.ecr.aws/bitnami/kafka:latest'
13 |     ports:
14 |       - '9092:9092'
15 |     links:
16 |       - zookeeper
17 |     container_name: local_kafka
18 |     environment:
19 |       - KAFKA_BROKER_ID=1
20 |       - KAFKA_CFG_ZOOKEEPER_CONNECT=zookeeper:2181
21 |       - KAFKA_LISTENERS=PLAINTEXT://0.0.0.0:9092
22 |       - KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://localhost:9092
23 |       - ALLOW_PLAINTEXT_LISTENER=yes


--------------------------------------------------------------------------------
/tests/integration/kafka_test/test_kafka_integration.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime
  2 | import os
  3 | 
  4 | from kafka import KafkaAdminClient, KafkaProducer, KafkaConsumer
  5 | from kafka.admin import NewTopic
  6 | import pytest
  7 | 
  8 | from aws_schema_registry import DataAndSchema, SchemaRegistryClient
  9 | from aws_schema_registry.avro import AvroSchema
 10 | from aws_schema_registry.jsonschema import JsonSchema
 11 | from aws_schema_registry.adapter.kafka import (
 12 |     KafkaDeserializer, KafkaSerializer
 13 | )
 14 | from aws_schema_registry.naming import (record_name_strategy,
 15 |                                         topic_name_strategy)
 16 | 
 17 | BOOTSTRAP_STRING = '127.0.0.1:9092'
 18 | 
 19 | TOPIC_PREFIX = 'SchemaRegistryTests'
 20 | NUMBER_OF_PARTITIONS = 1
 21 | REPLICATION_FACTOR = 1
 22 | 
 23 | DATE = datetime.utcnow().strftime('%y-%m-%d-%H-%M')
 24 | 
 25 | with open(os.path.join(os.path.dirname(__file__), 'user.v1.avsc'), 'r') as f:
 26 |     SCHEMA_V1 = AvroSchema(f.read())
 27 | with open(os.path.join(os.path.dirname(__file__), 'user.v2.avsc'), 'r') as f:
 28 |     SCHEMA_V2 = AvroSchema(f.read())
 29 | with open(os.path.join(os.path.dirname(__file__), 'user.json'), 'r') as f:
 30 |     SCHEMA_JSON = JsonSchema(f.read())
 31 | 
 32 | PRODUCER_PROPERTIES = {
 33 |     'bootstrap_servers': BOOTSTRAP_STRING,
 34 |     'acks': 'all',
 35 |     'retries': 0,
 36 |     'batch_size': 16384,
 37 |     'linger_ms': 1,
 38 |     'buffer_memory': 33554432,
 39 |     'request_timeout_ms': 1000
 40 | }
 41 | 
 42 | CONSUMER_PROPERTIES = {
 43 |     'bootstrap_servers': BOOTSTRAP_STRING,
 44 |     'auto_offset_reset': 'earliest',
 45 |     'enable_auto_commit': False
 46 | }
 47 | 
 48 | 
 49 | @pytest.fixture(scope='session')
 50 | def topic():
 51 |     """The topic to use for testing. Name is partially random."""
 52 |     name = f'{TOPIC_PREFIX}-{DATE}'
 53 |     admin_client = KafkaAdminClient(bootstrap_servers=BOOTSTRAP_STRING)
 54 |     print('creating topic %s...' % name)
 55 |     admin_client.create_topics([
 56 |         NewTopic(name, NUMBER_OF_PARTITIONS, REPLICATION_FACTOR)
 57 |     ])
 58 |     yield name
 59 |     print('deleting topic %s...' % name)
 60 |     admin_client.delete_topics([name])
 61 | 
 62 | 
 63 | def test_produce_consume_with_ser_de_schema_registry(
 64 |     glue_client, topic, registry
 65 | ):
 66 |     client = SchemaRegistryClient(
 67 |         glue_client, registry_name=registry
 68 |     )
 69 |     serializer = KafkaSerializer(
 70 |         client, schema_naming_strategy=record_name_strategy
 71 |     )
 72 | 
 73 |     # jsonschema has no fqn, so we use topic_name_strategy for it
 74 |     # (which also requires a separate producer)
 75 |     json_serializer = KafkaSerializer(
 76 |         client, schema_naming_strategy=topic_name_strategy
 77 |     )
 78 | 
 79 |     deserializer = KafkaDeserializer(client)
 80 | 
 81 |     producer = KafkaProducer(
 82 |         value_serializer=serializer,
 83 |         **PRODUCER_PROPERTIES
 84 |     )
 85 | 
 86 |     json_producer = KafkaProducer(
 87 |         value_serializer=json_serializer,
 88 |         **PRODUCER_PROPERTIES
 89 |     )
 90 | 
 91 |     data1 = {
 92 |         'name': 'John Doe',
 93 |         'favorite_number': 6,
 94 |         'favorite_color': 'red'
 95 |     }
 96 |     producer.send(topic, DataAndSchema(data1, SCHEMA_V1))
 97 | 
 98 |     data2 = {
 99 |         'name': 'John Doe',
100 |         'favorite_number': 6,
101 |         'favorite_colors': ['red', 'blue']
102 |     }
103 |     producer.send(topic, DataAndSchema(data2, SCHEMA_V2))
104 | 
105 |     data3 = {
106 |         'name': 'John Doe',
107 |         'favorite_number': 6,
108 |         'favorite_colors': ['red', 'blue', "yello"]
109 |     }
110 |     json_producer.send(topic, DataAndSchema(data3, SCHEMA_JSON))
111 | 
112 |     consumer = KafkaConsumer(
113 |         topic,
114 |         value_deserializer=deserializer,
115 |         **CONSUMER_PROPERTIES
116 |     )
117 |     batch = consumer.poll(timeout_ms=1000)
118 |     assert len(batch) == 1
119 |     messages = batch[list(batch.keys())[0]]
120 |     assert len(messages) == 3
121 |     assert messages[0].value.data == data1
122 |     assert messages[0].value.schema == SCHEMA_V1
123 |     assert messages[1].value.data == data2
124 |     assert messages[1].value.schema == SCHEMA_V2
125 |     assert messages[2].value.data == data3
126 |     assert messages[2].value.schema == SCHEMA_JSON
127 | 


--------------------------------------------------------------------------------
/tests/integration/kafka_test/user.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "http://json-schema.org/draft-04/schema#",
 3 |   "type": "object",
 4 |   "properties": {
 5 |     "name": {
 6 |       "type": "string"
 7 |     },
 8 |     "favorite_number": {
 9 |       "type": "integer"
10 |     },
11 |     "favorite_colors": {
12 |       "type": "array",
13 |       "items": {
14 |         "type": "string"
15 |       }
16 |     }
17 |   },
18 |   "required": [
19 |     "name",
20 |     "favorite_number",
21 |     "favorite_colors"
22 |   ]
23 | }
24 | 


--------------------------------------------------------------------------------
/tests/integration/kafka_test/user.v1.avsc:
--------------------------------------------------------------------------------
 1 | {
 2 |   "namespace": "aws_schema_registry.integrationtests",
 3 |   "type": "record",
 4 |   "name": "User",
 5 |   "fields": [
 6 |       {"name": "name", "type": "string" },
 7 |       {"name": "favorite_number",  "type": ["int", "null"] },
 8 |       {"name": "favorite_color", "type": ["string", "null"] }
 9 |   ]
10 | }
11 | 


--------------------------------------------------------------------------------
/tests/integration/kafka_test/user.v2.avsc:
--------------------------------------------------------------------------------
 1 | {
 2 |   "namespace": "aws_schema_registry.integrationtests",
 3 |   "type": "record",
 4 |   "name": "User",
 5 |   "fields": [
 6 |       {"name": "name", "type": "string" },
 7 |       {"name": "favorite_number",  "type": ["int", "null"] },
 8 |       {"name": "favorite_colors", "type": {
 9 |           "type": "array", "items": {
10 |               "namespace": "com.amazonaws.services.schemaregistry.serializers.avro",
11 |               "name": "color",
12 |               "type": "string"
13 |           }
14 |       }, "default": []}
15 |   ]
16 | }
17 | 


--------------------------------------------------------------------------------
/tests/test_avro.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from aws_schema_registry import ValidationError
 4 | from aws_schema_registry.avro import AvroSchema
 5 | 
 6 | 
 7 | def test_fully_qualified_name():
 8 |     s = AvroSchema('{"type": "record", "namespace": "foo", "name": "Bar"}')
 9 |     assert s.fqn == "foo.Bar"
10 | 
11 | 
12 | def test_primitive_name():
13 |     # fastavro does not fulfill this part of the Avro spec
14 |     s = AvroSchema('{"type": "string"}')
15 |     assert s.fqn == 'string'
16 | 
17 | 
18 | def test_readwrite():
19 |     s = AvroSchema('''
20 | {
21 |   "type": "record",
22 |   "name": "JediMaster",
23 |   "fields": [
24 |     {"name": "name", "type": "string" },
25 |     {"name": "age", "type": "int" }
26 |   ]
27 | }''')
28 |     d = {
29 |         'name': 'Yoda',
30 |         'age': 900
31 |     }
32 |     assert s.read(s.write(d)) == d
33 | 
34 | 
35 | def test_validation():
36 |     s = AvroSchema('''
37 | {
38 |   "type": "record",
39 |   "name": "JediMaster",
40 |   "fields": [
41 |     {"name": "name", "type": "string" },
42 |     {"name": "age", "type": "int" }
43 |   ]
44 | }''')
45 |     with pytest.raises(ValidationError) as e:
46 |         s.validate({'name': 'Obi-Wan'})
47 |     assert 'name' not in str(e)
48 |     assert 'age' in str(e)
49 |     with pytest.raises(ValidationError) as e:
50 |         s.validate({'name': 1, 'age': 2})
51 |     assert 'name' in str(e)
52 |     assert 'age' not in str(e)
53 |     s.validate({'name': 'Jar Jar', 'age': 42, 'sith': True})
54 | 


--------------------------------------------------------------------------------
/tests/test_client.py:
--------------------------------------------------------------------------------
  1 | from unittest.mock import Mock
  2 | from uuid import UUID, uuid4
  3 | 
  4 | import pytest
  5 | 
  6 | from aws_schema_registry.client import (
  7 |     SchemaRegistryClient, SCHEMA_NOT_FOUND_MSG, SCHEMA_VERSION_NOT_FOUND_MSG
  8 | )
  9 | from aws_schema_registry.exception import SchemaRegistryException
 10 | 
 11 | REGISTRY_NAME = 'user-topic'
 12 | SCHEMA_NAME = 'User-Topic'
 13 | JSON_SCHEMA_NAME = 'User-Topic-json'
 14 | SCHEMA_ARN = f'arn:aws:glue:us-west-2:123:schema/{REGISTRY_NAME}/{SCHEMA_NAME}'
 15 | SCHEMA_VERSION_ID = UUID('b7b4a7f0-9c96-4e4a-a687-fb5de9ef0c63')
 16 | JSON_SCHEMA_VERSION_ID = UUID('98718bb6-ca2a-4ac6-b841-748cab68b1b1')
 17 | SCHEMA_DEF = '{"name": "Test", "type": "record", "fields": []}'
 18 | JSON_SCHEMA_DEF = """{
 19 |       "$schema": "http://json-schema.org/draft-04/schema#",
 20 |       "type": "object",
 21 |       "properties": {
 22 |         "name": {
 23 |           "type": "string"
 24 |         },
 25 |         "age": {
 26 |           "type": "integer"
 27 |         }
 28 |       },
 29 |       "required": [
 30 |         "name",
 31 |         "age"
 32 |       ]
 33 |     }"""
 34 | 
 35 | METADATA = {
 36 |     'event-source-1': 'topic1',
 37 |     'event-source-2': 'topic2',
 38 |     'event-source-3': 'topic3',
 39 |     'event-source-4': 'topic4',
 40 |     'event-source-5': 'topic5'
 41 | }
 42 | 
 43 | 
 44 | @pytest.fixture
 45 | def glue_client():
 46 |     return Mock()
 47 | 
 48 | 
 49 | @pytest.fixture
 50 | def client(glue_client):
 51 |     return SchemaRegistryClient(
 52 |         glue_client,
 53 |         registry_name=REGISTRY_NAME,
 54 |         wait_interval_seconds=0
 55 |     )
 56 | 
 57 | 
 58 | def test_get_schema_version(client, glue_client):
 59 |     glue_client.get_schema_version = Mock(return_value={
 60 |         'SchemaVersionId': str(SCHEMA_VERSION_ID),
 61 |         'SchemaDefinition': SCHEMA_DEF,
 62 |         'SchemaArn': SCHEMA_ARN,
 63 |         'DataFormat': 'AVRO',
 64 |         'VersionNumber': 123,
 65 |         'Status': 'AVAILABLE'
 66 |     })
 67 | 
 68 |     version = client.get_schema_version(SCHEMA_VERSION_ID)
 69 | 
 70 |     assert version.version_id == SCHEMA_VERSION_ID
 71 | 
 72 | 
 73 | def test_get_schema_by_definition(client, glue_client):
 74 |     glue_client.get_schema_by_definition = Mock(return_value={
 75 |         'SchemaVersionId': str(SCHEMA_VERSION_ID),
 76 |         'SchemaArn': SCHEMA_ARN,
 77 |         'DataFormat': 'AVRO',
 78 |         'Status': 'AVAILABLE'
 79 |     })
 80 | 
 81 |     version = client.get_schema_by_definition(SCHEMA_DEF, SCHEMA_NAME)
 82 | 
 83 |     assert version.version_id == SCHEMA_VERSION_ID
 84 | 
 85 | 
 86 | def test_get_or_register_schema_version_creates_schema(client, glue_client):
 87 |     glue_client.get_schema_by_definition = Mock(
 88 |         side_effect=SchemaRegistryException(
 89 |             Exception(SCHEMA_NOT_FOUND_MSG)
 90 |         ))
 91 |     glue_client.create_schema = Mock(return_value={
 92 |         'RegistryName': REGISTRY_NAME,
 93 |         'SchemaName': SCHEMA_NAME,
 94 |         'Description': '',
 95 |         'DataFormat': 'AVRO',
 96 |         'Compatibility': 'BACKWARD',
 97 |         'SchemaStatus': 'AVAILABLE',
 98 |         'SchemaVersionId': str(SCHEMA_VERSION_ID),
 99 |         'SchemaVersionStatus': 'AVAILABLE'
100 |     })
101 |     glue_client.get_schema_version = Mock(return_value={
102 |         'SchemaVersionId': str(SCHEMA_VERSION_ID),
103 |         'SchemaDefinition': SCHEMA_DEF,
104 |         'DataFormat': 'AVRO',
105 |         'SchemaArn': SCHEMA_ARN,
106 |         'VersionNumber': 123,
107 |         'Status': 'AVAILABLE'
108 |     })
109 | 
110 |     version = client.get_or_register_schema_version(
111 |         definition=SCHEMA_DEF,
112 |         schema_name=SCHEMA_NAME,
113 |         data_format='AVRO'
114 |     )
115 | 
116 |     assert version.version_id == SCHEMA_VERSION_ID
117 | 
118 |     glue_client.get_schema_version = Mock(return_value={
119 |         'SchemaVersionId': str(SCHEMA_VERSION_ID),
120 |         'SchemaDefinition': SCHEMA_DEF,
121 |         'DataFormat': 'JSON',
122 |         'SchemaArn': SCHEMA_ARN,
123 |         'VersionNumber': 123,
124 |         'Status': 'AVAILABLE'
125 |     })
126 | 
127 |     version = client.get_or_register_schema_version(
128 |         definition=SCHEMA_DEF,
129 |         schema_name=SCHEMA_NAME,
130 |         data_format='JSON'
131 |     )
132 | 
133 |     assert version.version_id == SCHEMA_VERSION_ID
134 | 
135 | 
136 | def test_get_or_register_schema_version_registers_version(
137 |     client, glue_client
138 | ):
139 |     glue_client.get_schema_by_definition = Mock(
140 |         side_effect=SchemaRegistryException(
141 |             Exception(SCHEMA_VERSION_NOT_FOUND_MSG)
142 |         ))
143 |     glue_client.register_schema_version = Mock(return_value={
144 |         'SchemaVersionId': str(SCHEMA_VERSION_ID),
145 |         'VersionNumber': 123,
146 |         'Status': 'AVAILABLE'
147 |     })
148 |     glue_client.get_schema_version = Mock(return_value={
149 |         'SchemaVersionId': str(SCHEMA_VERSION_ID),
150 |         'SchemaDefinition': SCHEMA_DEF,
151 |         'DataFormat': 'AVRO',
152 |         'SchemaArn': SCHEMA_ARN,
153 |         'VersionNumber': 123,
154 |         'Status': 'AVAILABLE'
155 |     })
156 | 
157 |     version = client.get_or_register_schema_version(
158 |         definition=SCHEMA_DEF,
159 |         schema_name=SCHEMA_NAME,
160 |         data_format='AVRO'
161 |     )
162 | 
163 |     assert version.version_id == SCHEMA_VERSION_ID
164 | 
165 | 
166 | @pytest.mark.parametrize(
167 |     "schema_def,schema_name,schema_ver_id",
168 |     [(SCHEMA_DEF, SCHEMA_NAME, SCHEMA_VERSION_ID),
169 |      (JSON_SCHEMA_DEF, JSON_SCHEMA_NAME, JSON_SCHEMA_VERSION_ID)])
170 | def test_register_schema_version(client, glue_client,
171 |                                  schema_name, schema_def, schema_ver_id):
172 |     print(schema_name, schema_def, schema_ver_id)
173 |     glue_client.register_schema_version = Mock(return_value={
174 |         'SchemaVersionId': str(schema_ver_id),
175 |         'VersionNumber': 1,
176 |         'Status': 'AVAILABLE'
177 |     })
178 | 
179 |     version_id = client.register_schema_version(schema_def, schema_name)
180 | 
181 |     assert version_id == schema_ver_id
182 | 
183 | 
184 | def test_wait_for_schema_evolution_check_to_complete(client, glue_client):
185 |     responses = [
186 |         {
187 |             'SchemaVersionId': str(SCHEMA_VERSION_ID),
188 |             'Status': 'PENDING'
189 |         }, {
190 |             'SchemaVersionId': str(SCHEMA_VERSION_ID),
191 |             'Status': 'AVAILABLE'
192 |         }
193 |     ]
194 |     glue_client.get_schema_version = Mock(side_effect=responses)
195 | 
196 |     client._wait_for_schema_evolution_check_to_complete(SCHEMA_VERSION_ID)
197 | 
198 | 
199 | def test_schema_evolution_timeout(client, glue_client):
200 |     glue_client.get_schema_version = Mock(return_value={
201 |         'SchemaVersionId': str(SCHEMA_VERSION_ID),
202 |         'Status': 'PENDING'
203 |     })
204 | 
205 |     with pytest.raises(SchemaRegistryException):
206 |         client._wait_for_schema_evolution_check_to_complete(SCHEMA_VERSION_ID)
207 | 
208 |     assert glue_client.get_schema_version.call_count == 10
209 | 
210 | 
211 | def test_put_schema_version_metadata_succeeds(client, glue_client):
212 |     glue_client.put_schema_version_metadata = Mock(
213 |         side_effect=_make_put_schema_version_metadata_response
214 |     )
215 | 
216 |     client.put_schema_version_metadata(SCHEMA_VERSION_ID, METADATA)
217 | 
218 |     assert (
219 |         glue_client.put_schema_version_metadata.call_count
220 |         ==
221 |         len(METADATA)
222 |     )
223 |     for k, v in METADATA.items():
224 |         glue_client.put_schema_version_metadata.assert_any_call(
225 |             SchemaVersionId=str(SCHEMA_VERSION_ID),
226 |             MetadataKeyValue={
227 |                 'MetadataKey': k,
228 |                 'MetadataValue': v
229 |             }
230 |         )
231 | 
232 | 
233 | def _make_put_schema_version_metadata_response(
234 |     SchemaVersionId: str,
235 |     MetadataKeyValue: dict
236 | ):
237 |     return {
238 |         'SchemaVersionId': SchemaVersionId,
239 |         'MetadataKey': MetadataKeyValue['MetadataKey'],
240 |         'MetadataValue': MetadataKeyValue['MetadataValue']
241 |     }
242 | 
243 | 
244 | @pytest.mark.parametrize("data_format", ["AVRO", "JSON"])
245 | def test_create_schema(client, glue_client, data_format):
246 |     schema_version_id = uuid4()
247 |     glue_client.create_schema = Mock(return_value={
248 |         'SchemaName': SCHEMA_NAME,
249 |         'DataFormat': data_format,
250 |         'SchemaVersionId': str(schema_version_id)
251 |     })
252 | 
253 |     version_id = client.create_schema(
254 |         SCHEMA_NAME, data_format, SCHEMA_DEF
255 |     )
256 | 
257 |     assert version_id == schema_version_id
258 | 


--------------------------------------------------------------------------------
/tests/test_codec.py:
--------------------------------------------------------------------------------
 1 | from uuid import uuid4
 2 | 
 3 | import pytest
 4 | 
 5 | from aws_schema_registry.codec import (
 6 |     CodecException, encode, decode,
 7 |     UnknownEncodingException
 8 | )
 9 | 
10 | 
11 | @pytest.mark.parametrize('compression', [None, 'zlib'])
12 | def test_codec(compression):
13 |     data = (1024).to_bytes(2, 'big')
14 |     schema_version_id = uuid4()
15 |     encoded = encode(data, schema_version_id, compression=compression)
16 |     decoded = decode(encoded)
17 |     assert decoded[0] == data
18 |     assert decoded[1] == schema_version_id
19 | 
20 | 
21 | def test_unknown_leading_byte():
22 |     # leading byte '0' is what the Confluent Schema Registry client uses
23 |     bytes_ = b'\x00\x05\x00\x00'
24 |     with pytest.raises(UnknownEncodingException):
25 |         decode(bytes_)
26 | 
27 | 
28 | def test_unknown_compression_byte():
29 |     bytes_ = b'\x00\x01\x00\x00'
30 |     with pytest.raises(CodecException):
31 |         decode(bytes_)
32 | 


--------------------------------------------------------------------------------
/tests/test_jsonschema.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import pytest
  3 | 
  4 | from aws_schema_registry import ValidationError
  5 | from aws_schema_registry.jsonschema import JsonSchema
  6 | 
  7 | 
  8 | def test_readwrite():
  9 |     s = JsonSchema("""{
 10 |           "$schema": "http://json-schema.org/draft-04/schema#",
 11 |           "type": "object",
 12 |           "properties": {
 13 |             "name": {
 14 |               "type": "string"
 15 |             },
 16 |             "age": {
 17 |               "type": "integer"
 18 |             }
 19 |           },
 20 |           "required": [
 21 |             "name",
 22 |             "age"
 23 |           ]
 24 |         }""")
 25 | 
 26 |     d = {
 27 |         'name': 'Yoda',
 28 |         'age': 900
 29 |     }
 30 | 
 31 |     assert s.read(s.write(d)) == d
 32 | 
 33 | 
 34 | def test_validation_during_read_write():
 35 |     s = JsonSchema("""{
 36 |           "$schema": "http://json-schema.org/draft-04/schema#",
 37 |           "type": "object",
 38 |           "properties": {
 39 |             "name": {
 40 |               "type": "string"
 41 |             },
 42 |             "age": {
 43 |               "type": "integer"
 44 |             }
 45 |           },
 46 |           "required": [
 47 |             "name",
 48 |             "age"
 49 |           ]
 50 |         }""")
 51 | 
 52 |     with pytest.raises(ValidationError, match=re.escape(
 53 |         "data.name must be string"
 54 |     )):
 55 |         s.read(b'{"name": 1, "age": 2}')
 56 | 
 57 |     with pytest.raises(ValidationError, match=re.escape(
 58 |         "data.name must be string"
 59 |     )):
 60 |         s.write({"name": 1, "age": 2})
 61 | 
 62 | 
 63 | def test_validation():
 64 |     s = JsonSchema("""{
 65 |       "$schema": "http://json-schema.org/draft-04/schema#",
 66 |       "type": "object",
 67 |       "properties": {
 68 |         "name": {
 69 |           "type": "string"
 70 |         },
 71 |         "age": {
 72 |           "type": "integer"
 73 |         }
 74 |       },
 75 |       "required": [
 76 |         "name",
 77 |         "age"
 78 |       ]
 79 |     }""")
 80 | 
 81 |     with pytest.raises(
 82 |         ValidationError,
 83 |         # fastjsonschema>=2.18.0 reports only missing properties, so it will
 84 |         # exclude 'name'
 85 |         match=r"data must contain \[('name', )?'age'\] properties"
 86 |     ):
 87 |         s.validate({'name': 'Obi-Wan'})
 88 |     with pytest.raises(ValidationError, match=re.escape(
 89 |         "data.name must be string"
 90 |     )):
 91 |         s.validate({'name': 1, 'age': 2})
 92 | 
 93 |     s.validate({'name': 'Jar Jar', 'age': 42, 'sith': True})
 94 | 
 95 |     s = JsonSchema("""{
 96 |           "$schema": "http://json-schema.org/draft-04/schema#",
 97 |           "type": "object",
 98 |           "properties": {
 99 |             "name": {
100 |               "type": "string"
101 |             },
102 |             "age": {
103 |               "type": "integer"
104 |             }
105 |           },
106 |           "required": [
107 |             "name",
108 |             "age"
109 |           ],
110 |           "additionalProperties": false
111 |         }""")
112 | 
113 |     with pytest.raises(ValidationError, match=re.escape(
114 |         "data must not contain {'sith'} properties"
115 |     )):
116 |         s.validate({'name': 'Jar Jar', 'age': 42, 'sith': True})
117 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist = flake8,mypy,py38,py39,py310,py311
 3 | 
 4 | [gh-actions]
 5 | python =
 6 |     3.8: py38
 7 |     3.9: py39
 8 |     3.10: py310
 9 |     3.11: py311
10 | 
11 | [testenv]
12 | deps = pytest
13 | commands =
14 |     pip install . .[dev] .[kafka-python]
15 |     python -m pytest --ignore tests/integration
16 | 
17 | [testenv:flake8]
18 | deps = flake8
19 | commands = flake8
20 | 
21 | [testenv:mypy]
22 | deps = mypy
23 | commands =
24 |     pip install . .[dev] .[kafka-python]
25 |     python -m mypy
26 | 
27 | [pytest]
28 | python_files = test_*
29 | testpaths = tests
30 | 
31 | [flake8]
32 | exclude = venv*,.venv*,env,.env,.tox,.toxenv,.git,__pycache__
33 | 


--------------------------------------------------------------------------------