├── .gitignore ├── LICENSE ├── README.md ├── confluent ├── __init__.py └── schemaregistry │ ├── __init__.py │ ├── client │ ├── CachedSchemaRegistryClient.py │ ├── MockSchemaRegistryClient.py │ └── __init__.py │ └── serializers │ ├── MessageSerializer.py │ ├── Util.py │ └── __init__.py ├── setup.py └── test ├── __init__.py ├── adv_schema.avsc ├── basic_schema.avsc ├── data_gen.py ├── mock_registry.py ├── setup_test_path.py ├── test_cached_client.py ├── test_message_serializer.py ├── test_mock_client.py └── test_util.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | .venv* 38 | *.ignore.py 39 | *.avro -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Python Schema Registry Client 2 | 3 | A Python client used to interact with [Confluent](http://confluent.io/)'s 4 | [schema registry](https://github.com/confluentinc/schema-registry). Supports Python 2.6 and 2.7. This also works within a virtual env. 5 | 6 | The API is heavily based off of the existing Java API of [Confluent schema registry](https://github.com/confluentinc/schema-registry). 7 | 8 | # Installation 9 | 10 | Run `python setup.py install` from the source root. 11 | 12 | This library will be available via `pip` in the future. 13 | 14 | # Example Usage 15 | 16 | 17 | ```python 18 | from confluent.schemaregistry.client import CachedSchemaRegistryClient 19 | from confluent.schemaregistry.serializers import MessageSerializer, Util 20 | 21 | # Note that some methods may throw exceptions if 22 | # the registry cannot be reached, decoding/encoding fails, 23 | # or IO fails 24 | 25 | # some helper methods in util to get a schema 26 | avro_schema = Util.parse_schema_from_file('/path/to/schema.avsc') 27 | avro_schema = Util.parse_schema_from_string(open('/path/to/schema.avsc').read()) 28 | 29 | # Initialize the client 30 | client = CachedSchemaRegistryClient(url='http://registry.host') 31 | 32 | # Schema operations 33 | 34 | # register a schema for a subject 35 | schema_id = client.register('my_subject', avro_schema) 36 | 37 | # fetch a schema by ID 38 | avro_schema = client.get_by_id(schema_id) 39 | 40 | # get the latest schema info for a subject 41 | schema_id,avro_schema,schema_version = client.get_latest_schema('my_subject') 42 | 43 | # get the version of a schema 44 | schema_version = client.get_version('my_subject', avro_schema) 45 | 46 | # Compatibility tests 47 | is_compatible = client.test_compatibility('my_subject', another_schema) 48 | 49 | # One of NONE, FULL, FORWARD, BACKWARD 50 | new_level = client.update_compatibility('NONE','my_subject') 51 | current_level = client.get_compatibility('my_subject') 52 | 53 | # Message operations 54 | 55 | # encode a record to put onto kafka 56 | serializer = MessageSerializer(client) 57 | record = get_obj_to_put_into_kafka() 58 | 59 | # use the schema id directly 60 | encoded = serializer.encode_record_with_schema_id(schema_id, record) 61 | # use an existing schema and topic 62 | # this will register the schema to the right subject based 63 | # on the topic name and then serialize 64 | encoded = serializer.encode_record_with_schema('my_topic', avro_schema, record) 65 | 66 | # encode a record with the latest schema for the topic 67 | # this is not efficient as it queries for the latest 68 | # schema each time 69 | encoded = serializer.encode_record_for_topic('my_kafka_topic', record) 70 | 71 | 72 | # decode a message from kafka 73 | message = get_message_from_kafka() 74 | decoded_object = serializer.decode_message(message) 75 | 76 | 77 | ``` 78 | 79 | # Running Tests 80 | 81 | ``` 82 | pip install unittest2 83 | unit2 discover -s test 84 | ``` 85 | 86 | Tests use unittest2 due to unittest being different between 2.6 and 2.7. 87 | 88 | # License 89 | 90 | The project is licensed under the Apache 2 license. 91 | -------------------------------------------------------------------------------- /confluent/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /confluent/schemaregistry/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Entry point for schema registry module 3 | """ 4 | __version__ = (0,1,1) 5 | -------------------------------------------------------------------------------- /confluent/schemaregistry/client/CachedSchemaRegistryClient.py: -------------------------------------------------------------------------------- 1 | import urllib2 2 | import json 3 | import sys 4 | 5 | from . import ClientError, VALID_LEVELS 6 | from ..serializers import Util 7 | 8 | # Common accept header sent 9 | ACCEPT_HDR="application/vnd.schemaregistry.v1+json, application/vnd.schemaregistry+json, application/json" 10 | 11 | class CachedSchemaRegistryClient(object): 12 | """ 13 | A client that talks to a Schema Registry over HTTP 14 | 15 | See http://confluent.io/docs/current/schema-registry/docs/intro.html 16 | 17 | Errors communicating to the server will result in a ClientError being raised. 18 | """ 19 | def __init__(self, url, max_schemas_per_subject=1000): 20 | """Construct a client by passing in the base URL of the schema registry server""" 21 | 22 | self.url = url.rstrip('/') 23 | 24 | self.max_schemas_per_subject = max_schemas_per_subject 25 | # subj => { schema => id } 26 | self.subject_to_schema_ids = { } 27 | # id => avro_schema 28 | self.id_to_schema = {} 29 | # subj => { schema => version } 30 | self.subject_to_schema_versions = {} 31 | 32 | def _send_request(self, url, method='GET', body=None, headers=None): 33 | if body: 34 | body = json.dumps(body) 35 | 36 | new_req = urllib2.Request(url, data=body) 37 | # must be callable 38 | new_req.get_method = lambda: method 39 | # set the accept header 40 | new_req.add_header("Accept",ACCEPT_HDR) 41 | if body: 42 | new_req.add_header("Content-Length",str(len(body))) 43 | new_req.add_header("Content-Type","application/json") 44 | # add additional headers if present 45 | if headers: 46 | for header_name in headers: 47 | new_req.add_header(header_name, headers[header_name]) 48 | try: 49 | response = urllib2.urlopen(new_req) 50 | # read response 51 | result = json.loads(response.read()) 52 | # build meta with headers as a dict 53 | meta = response.info().dict 54 | # http code 55 | code = response.getcode() 56 | # return result + meta tuple 57 | return (result, meta, code) 58 | except urllib2.HTTPError as e: 59 | code = e.code 60 | result = json.loads(e.read()) 61 | message = "HTTP Error (%d) from schema registry: %s %d" % (code, 62 | result.get('message'), 63 | result.get('error_code')) 64 | raise ClientError(message, code) 65 | except ClientError as e: 66 | raise e 67 | except: 68 | msg = "An unexpected error occurred: %s" % (str(sys.exc_info()[1])) 69 | raise ClientError(msg) 70 | 71 | def _add_to_cache(self, cache, subject, schema, value): 72 | if subject not in cache: 73 | cache[subject] = { } 74 | sub_cache = cache[subject] 75 | sub_cache[schema] = value 76 | 77 | def _cache_schema(self, schema, schema_id, subject=None, version=None): 78 | # don't overwrite anything 79 | if schema_id in self.id_to_schema: 80 | schema = self.id_to_schema[schema_id] 81 | else: 82 | self.id_to_schema[schema_id] = schema 83 | 84 | if subject: 85 | self._add_to_cache(self.subject_to_schema_ids, 86 | subject, schema, schema_id) 87 | if version: 88 | self._add_to_cache(self.subject_to_schema_versions, 89 | subject, schema, version) 90 | 91 | 92 | def register(self, subject, avro_schema): 93 | """ 94 | Register a schema with the registry under the given subject 95 | and receive a schema id. 96 | 97 | avro_schema must be a parsed schema from the python avro library 98 | 99 | Multiple instances of the same schema will result in cache misses. 100 | """ 101 | schemas_to_id = self.subject_to_schema_ids.get(subject, { }) 102 | schema_id = schemas_to_id.get(avro_schema, -1) 103 | if schema_id != -1: 104 | return schema_id 105 | 106 | # send it up 107 | url = '/'.join([self.url,'subjects',subject,'versions']) 108 | # body is { schema : json_string } 109 | body = { 'schema' : json.dumps(avro_schema.to_json()) } 110 | result,meta,code = self._send_request(url, method='POST', body=body) 111 | # result is a dict 112 | schema_id = result['id'] 113 | # cache it 114 | self._cache_schema(avro_schema, schema_id, subject) 115 | return schema_id 116 | 117 | def get_by_id(self, schema_id): 118 | """Retrieve a parsed avro schema by id or None if not found""" 119 | if schema_id in self.id_to_schema: 120 | return self.id_to_schema[schema_id] 121 | # fetch from the registry 122 | url = '/'.join([self.url,'schemas','ids',str(schema_id)]) 123 | try: 124 | result,meta,code = self._send_request(url) 125 | except ClientError as e: 126 | if e.http_code == 404: 127 | return None 128 | else: 129 | raise e 130 | else: 131 | # need to parse the schema 132 | schema_str = result.get("schema") 133 | try: 134 | result = Util.parse_schema_from_string(schema_str) 135 | # cache it 136 | self._cache_schema(result, schema_id) 137 | return result 138 | except: 139 | # bad schema - should not happen 140 | raise ClientError("Received bad schema from registry.") 141 | 142 | def get_latest_schema(self, subject): 143 | """ 144 | Return the latest 3-tuple of: 145 | (the schema id, the parsed avro schema, the schema version) 146 | for a particular subject. 147 | 148 | This call always contacts the registry. 149 | 150 | If the subject is not found, (None,None,None) is returned. 151 | """ 152 | url = '/'.join([self.url, 'subjects',subject,'versions','latest']) 153 | try: 154 | result,meta,code = self._send_request(url) 155 | except ClientError as e: 156 | if e.http_code == 404: 157 | return (None, None, None) 158 | raise e 159 | schema_id = result['id'] 160 | version = result['version'] 161 | if schema_id in self.id_to_schema: 162 | schema = self.id_to_schema[schema_id] 163 | else: 164 | try: 165 | schema = Util.parse_schema_from_string(result['schema']) 166 | except: 167 | # bad schema - should not happen 168 | raise ClientError("Received bad schema from registry.") 169 | 170 | self._cache_schema(schema, schema_id, subject, version) 171 | return (schema_id, schema, version) 172 | 173 | 174 | def get_version(self, subject, avro_schema): 175 | """ 176 | Get the version of a schema for a given subject. 177 | 178 | Returns -1 if not found. 179 | """ 180 | schemas_to_version = self.subject_to_schema_versions.get(subject,{}) 181 | version = schemas_to_version.get(avro_schema, -1) 182 | if version != -1: 183 | return version 184 | 185 | url = '/'.join([self.url, 'subjects', subject]) 186 | body = { 'schema' : json.dumps(avro_schema.to_json()) } 187 | try: 188 | result,meta,code = self._send_request(url, method='POST', body=body) 189 | schema_id = result['id'] 190 | version = result['version'] 191 | self._cache_schema(avro_schema, schema_id, subject, version) 192 | return version 193 | except ClientError as e: 194 | if e.http_code == 404: 195 | return -1 196 | else: 197 | raise e 198 | 199 | def test_compatibility(self, subject, avro_schema, version='latest'): 200 | """ 201 | Test the compatibility of a candidate parsed schema for a given subject. 202 | 203 | By default the latest version is checked against. 204 | """ 205 | url = '/'.join([self.url,'compatibility','subjects',subject, 206 | 'versions',str(version)]) 207 | body = { 'schema' : json.dumps(avro_schema.to_json()) } 208 | try: 209 | result,meta,code = self._send_request(url, method='POST', body=body) 210 | return result.get('is_compatible') 211 | except: 212 | return False 213 | 214 | 215 | def update_compatibility(self, level, subject=None): 216 | """ 217 | Update the compatibility level for a subject. Level must be one of: 218 | 219 | 'NONE','FULL','FORWARD', or 'BACKWARD' 220 | """ 221 | if level not in VALID_LEVELS: 222 | raise ClientError("Invalid level specified: %s" % (str(level))) 223 | 224 | url = '/'.join([self.url,'config']) 225 | if subject: 226 | url += '/' + subject 227 | 228 | body = { "compatibility" : level } 229 | result,meta,code = self._send_request(url, method='PUT', body=body) 230 | return result['compatibility'] 231 | 232 | def get_compatibility(self, subject=None): 233 | """ 234 | Get the current compatibility level for a subject. Result will be one of: 235 | 236 | 'NONE','FULL','FORWARD', or 'BACKWARD' 237 | """ 238 | url = '/'.join([self.url,'config']) 239 | if subject: 240 | url += '/' + subject 241 | 242 | result,meta,code = self._send_request(url) 243 | compatibility = result.get('compatibility', None) 244 | if not compatibility: 245 | compatbility = result.get('compatibilityLevel') 246 | 247 | return compatbility 248 | -------------------------------------------------------------------------------- /confluent/schemaregistry/client/MockSchemaRegistryClient.py: -------------------------------------------------------------------------------- 1 | import json 2 | import sys 3 | 4 | from . import ClientError, VALID_LEVELS 5 | from ..serializers import Util 6 | 7 | class MockSchemaRegistryClient(object): 8 | """ 9 | A client that acts as a schema registry locally. 10 | 11 | Compatibiity related methods are not implemented at this time. 12 | """ 13 | def __init__(self, max_schemas_per_subject=1000): 14 | self.max_schemas_per_subject = max_schemas_per_subject 15 | # subj => { schema => id } 16 | self.subject_to_schema_ids = { } 17 | # id => avro_schema 18 | self.id_to_schema = {} 19 | # subj => { schema => version } 20 | self.subject_to_schema_versions = {} 21 | 22 | self.subject_to_latest_schema = { } 23 | 24 | # counters 25 | self.next_id = 1 26 | self.schema_to_id = { } 27 | 28 | def _get_next_id(self, schema): 29 | if schema in self.schema_to_id: 30 | return self.schema_to_id[schema] 31 | result = self.next_id 32 | self.next_id += 1 33 | self.schema_to_id[schema] = result 34 | return result 35 | 36 | def _get_next_version(self, subject): 37 | if subject not in self.subject_to_schema_versions: 38 | self.subject_to_schema_versions[subject] = { } 39 | return len(self.subject_to_schema_versions[subject]) 40 | 41 | def _get_all_versions(self, subject): 42 | versions = self.subject_to_schema_versions.get(subject,{}) 43 | return sorted(versions) 44 | 45 | def _add_to_cache(self, cache, subject, schema, value): 46 | if subject not in cache: 47 | cache[subject] = { } 48 | sub_cache = cache[subject] 49 | sub_cache[schema] = value 50 | 51 | def _cache_schema(self, schema, schema_id, subject, version): 52 | # don't overwrite anything 53 | if schema_id in self.id_to_schema: 54 | schema = self.id_to_schema[schema_id] 55 | else: 56 | self.id_to_schema[schema_id] = schema 57 | 58 | self._add_to_cache(self.subject_to_schema_ids, 59 | subject, schema, schema_id) 60 | 61 | self._add_to_cache(self.subject_to_schema_versions, 62 | subject, schema, version) 63 | 64 | if subject in self.subject_to_latest_schema: 65 | si,s,v = self.subject_to_latest_schema[subject] 66 | if v > version: 67 | return 68 | self.subject_to_latest_schema[subject] = (schema_id, schema, version) 69 | 70 | def register(self, subject, avro_schema): 71 | """ 72 | Register a schema with the registry under the given subject 73 | and receive a schema id. 74 | 75 | avro_schema must be a parsed schema from the python avro library 76 | 77 | Multiple instances of the same schema will result in inconsistencies. 78 | """ 79 | schemas_to_id = self.subject_to_schema_ids.get(subject, { }) 80 | schema_id = schemas_to_id.get(avro_schema, -1) 81 | if schema_id != -1: 82 | return schema_id 83 | 84 | # add it 85 | version = self._get_next_version(subject) 86 | schema_id = self._get_next_id(avro_schema) 87 | 88 | # cache it 89 | self._cache_schema(avro_schema, schema_id, subject, version) 90 | return schema_id 91 | 92 | def get_by_id(self, schema_id): 93 | """Retrieve a parsed avro schema by id or None if not found""" 94 | return self.id_to_schema.get(schema_id, None) 95 | 96 | def get_latest_schema(self, subject): 97 | """ 98 | Return the latest 3-tuple of: 99 | (the schema id, the parsed avro schema, the schema version) 100 | for a particular subject. 101 | 102 | If the subject is not found, (None,None,None) is returned. 103 | """ 104 | return self.subject_to_latest_schema.get(subject, (None, None, None)) 105 | 106 | def get_version(self, subject, avro_schema): 107 | """ 108 | Get the version of a schema for a given subject. 109 | 110 | Returns -1 if not found. 111 | """ 112 | schemas_to_version = self.subject_to_schema_versions.get(subject, {}) 113 | return schemas_to_version.get(avro_schema, -1) 114 | 115 | def get_id_for_schema(self, subject, avro_schema): 116 | """ 117 | Get the ID of a parsed schema 118 | """ 119 | schemas_to_id = self.subject_to_schema_ids.get(subject, {}) 120 | return schemas_to_id.get(avro_schema, -1) 121 | 122 | def test_compatibility(self, subject, avro_schema, version='latest'): 123 | raise ClientError("not implemented") 124 | 125 | def update_compatibility(self, level, subject=None): 126 | raise ClientError("not implemented") 127 | 128 | def get_compatibility(self, subject=None): 129 | raise ClientError("not implemented") 130 | -------------------------------------------------------------------------------- /confluent/schemaregistry/client/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | VALID_LEVELS=['NONE','FULL','FORWARD','BACKWARD'] 3 | 4 | class ClientError(Exception, object): 5 | """Error thrown by Schema Registry clients""" 6 | def __init__(self, message, http_code=-1): 7 | self.message = message 8 | self.http_code = http_code 9 | super(ClientError, self).__init__(self.__str__()) 10 | 11 | def __repr__(self): 12 | return "ClientError(error={error})".format(error=self.message) 13 | 14 | def __str__(self): 15 | return self.message 16 | 17 | from MockSchemaRegistryClient import * 18 | from CachedSchemaRegistryClient import * 19 | -------------------------------------------------------------------------------- /confluent/schemaregistry/serializers/MessageSerializer.py: -------------------------------------------------------------------------------- 1 | from avro import io 2 | import StringIO 3 | import json 4 | import struct 5 | import sys 6 | 7 | from . import SerializerError 8 | 9 | MAGIC_BYTE = 0 10 | 11 | HAS_FAST = False 12 | try: 13 | from fastavro.reader import read_data 14 | HAS_FAST = True 15 | except: 16 | pass 17 | 18 | 19 | class ContextStringIO(StringIO.StringIO): 20 | """ 21 | Wrapper to allow use of StringIO via 'with' constructs. 22 | """ 23 | def __enter__(self): 24 | return self 25 | def __exit__(self, *args): 26 | self.close() 27 | return False 28 | 29 | class MessageSerializer(object): 30 | """ 31 | A helper class that can serialize and deserialize messages 32 | that need to be encoded or decoded using the schema registry. 33 | 34 | All encode_* methods return a buffer that can be sent to kafka. 35 | All decode_* methods expect a buffer received from kafka. 36 | """ 37 | def __init__(self, registry_client): 38 | self.registry_client = registry_client 39 | self.id_to_decoder_func = { } 40 | self.id_to_writers = { } 41 | 42 | def encode_record_with_schema(self, topic, schema, record, is_key=False): 43 | """ 44 | Given a parsed avro schema, encode a record for the given topic. The 45 | record is expected to be a dictionary. 46 | 47 | The schema is registered with the subject of 'topic-value' 48 | """ 49 | if not isinstance(record, dict): 50 | raise SerializerError("record must be a dictionary") 51 | subject_suffix = ('-key' if is_key else '-value') 52 | # get the latest schema for the subject 53 | subject = topic + subject_suffix 54 | # register it 55 | try: 56 | schema_id = self.registry_client.register(subject, schema) 57 | except: 58 | schema_id = None 59 | 60 | if not schema_id: 61 | message = "Unable to retrieve schema id for subject %s" % (subject) 62 | raise SerializerError(message) 63 | 64 | # cache writer 65 | self.id_to_writers[schema_id] = io.DatumWriter(schema) 66 | return self.encode_record_with_schema_id(schema_id, record) 67 | 68 | # subject = topic + suffix 69 | def encode_record_for_topic(self, topic, record, is_key=False): 70 | """ 71 | Encode a record for a given topic. 72 | 73 | This is expensive as it fetches the latest schema for a given topic. 74 | """ 75 | if not isinstance(record, dict): 76 | raise SerializerError("record must be a dictionary") 77 | subject_suffix = ('-key' if is_key else '-value') 78 | # get the latest schema for the subject 79 | subject = topic + subject_suffix 80 | try: 81 | schema_id,schema,version = self.registry_client.get_latest_schema(subject) 82 | except ClientError as e: 83 | message = "Unable to retrieve schema id for subject %s" % (subject) 84 | raise SerializerError(message) 85 | else: 86 | # cache writer 87 | self.id_to_writers[schema_id] = io.DatumWriter(schema) 88 | return self.encode_record_with_schema_id(schema_id, record) 89 | 90 | def encode_record_with_schema_id(self, schema_id, record): 91 | """ 92 | Encode a record with a given schema id. The record must 93 | be a python dictionary. 94 | """ 95 | if not isinstance(record, dict): 96 | raise SerializerError("record must be a dictionary") 97 | # use slow avro 98 | if schema_id not in self.id_to_writers: 99 | # get the writer + schema 100 | try: 101 | schema = self.registry_client.get_by_id(schema_id) 102 | if not schema: 103 | raise SerializerError("Schema does not exist") 104 | self.id_to_writers[schema_id] = io.DatumWriter(schema) 105 | except ClientError as e: 106 | raise SerializerError("Error fetching schema from registry") 107 | 108 | # get the writer 109 | writer = self.id_to_writers[schema_id] 110 | with ContextStringIO() as outf: 111 | # write the header 112 | # magic byte 113 | outf.write(struct.pack('b',MAGIC_BYTE)) 114 | # write the schema ID in network byte order (big end) 115 | outf.write(struct.pack('>I',schema_id)) 116 | # write the record to the rest of it 117 | # Create an encoder that we'll write to 118 | encoder = io.BinaryEncoder(outf) 119 | # write the magic byte 120 | # write the object in 'obj' as Avro to the fake file... 121 | writer.write(record, encoder) 122 | 123 | return outf.getvalue() 124 | 125 | 126 | # Decoder support 127 | def _get_decoder_func(self, schema_id, payload): 128 | if schema_id in self.id_to_decoder_func: 129 | return self.id_to_decoder_func[schema_id] 130 | 131 | # fetch from schema reg 132 | try: 133 | schema = self.registry_client.get_by_id(schema_id) 134 | except: 135 | schema = None 136 | 137 | if not schema: 138 | err = "unable to fetch schema with id %d" % (schema_id) 139 | raise SerializerError(err) 140 | 141 | curr_pos = payload.tell() 142 | if HAS_FAST: 143 | # try to use fast avro 144 | try: 145 | schema_dict = schema.to_json() 146 | obj = read_data(payload, schema_dict) 147 | # here means we passed so this is something fastavro can do 148 | # seek back since it will be called again for the 149 | # same payload - one time hit 150 | 151 | payload.seek(curr_pos) 152 | decoder_func = lambda p: read_data(p, schema_dict) 153 | self.id_to_decoder_func[schema_id] = decoder_func 154 | return self.id_to_decoder_func[schema_id] 155 | except: 156 | pass 157 | 158 | # here means we should just delegate to slow avro 159 | # rewind 160 | payload.seek(curr_pos) 161 | avro_reader = io.DatumReader(schema) 162 | def decoder(p): 163 | bin_decoder = io.BinaryDecoder(p) 164 | return avro_reader.read(bin_decoder) 165 | 166 | self.id_to_decoder_func[schema_id] = decoder 167 | return self.id_to_decoder_func[schema_id] 168 | 169 | def decode_message(self, message): 170 | """ 171 | Decode a message from kafka that has been encoded for use with 172 | the schema registry. 173 | """ 174 | if len(message) <= 5: 175 | raise SerializerError("message is too small to decode") 176 | 177 | with ContextStringIO(message) as payload: 178 | magic,schema_id = struct.unpack('>bI',payload.read(5)) 179 | if magic != MAGIC_BYTE: 180 | raise SerializerError("message does not start with magic byte") 181 | decoder_func = self._get_decoder_func(schema_id, payload) 182 | return decoder_func(payload) 183 | -------------------------------------------------------------------------------- /confluent/schemaregistry/serializers/Util.py: -------------------------------------------------------------------------------- 1 | """ 2 | Basic utilities for handling avro schemas 3 | """ 4 | from avro import schema 5 | 6 | def parse_schema_from_string(schema_str): 7 | """Parse a schema given a schema string""" 8 | return schema.parse(schema_str) 9 | 10 | def parse_schema_from_file(schema_path): 11 | """Parse a schema from a file path""" 12 | with open(schema_path) as f: 13 | return parse_schema_from_string(f.read()) 14 | -------------------------------------------------------------------------------- /confluent/schemaregistry/serializers/__init__.py: -------------------------------------------------------------------------------- 1 | class SerializerError(Exception): 2 | """Generic error from serializer package""" 3 | def __init__(self, message): 4 | self.message = message 5 | 6 | def __repr__(self): 7 | return 'SerializerError(error={error})'.format(error=self.message) 8 | 9 | def __str__(self): 10 | return self.message 11 | 12 | from MessageSerializer import * 13 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # Setup 2 | try: 3 | from setuptools import setup 4 | except ImportError: 5 | from distutils.core import setup 6 | from sys import version_info 7 | 8 | import confluent.schemaregistry 9 | 10 | install_requires = [] 11 | 12 | version = '.'.join([ str(confluent.schemaregistry.__version__[i]) for i in range(3) ]) 13 | 14 | setup( 15 | name = 'confluent-schemaregistry', 16 | version = version, 17 | packages = ['confluent', 18 | 'confluent.schemaregistry', 19 | 'confluent.schemaregistry.serializers', 20 | 'confluent.schemaregistry.client'], 21 | 22 | 23 | # Project uses simplejson, so ensure that it gets installed or upgraded 24 | # on the target machine 25 | install_requires = ['avro'], 26 | 27 | # metadata for upload to PyPI 28 | author = 'Verisign', 29 | author_email = 'vsrtc-dev@verisign.com', 30 | description = 'Confluent Schema Registry lib', 31 | keywords = 'confluent schema registry schemaregistry', 32 | extras_require = { 33 | 'fastavro': ['fastavro'], 34 | }, 35 | test_requires = ['unittest2'] 36 | ) 37 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/verisign/python-confluent-schemaregistry/43ab3ee506c7150faad247d44817d04c8c886483/test/__init__.py -------------------------------------------------------------------------------- /test/adv_schema.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "name": "advanced", 3 | "type": "record", 4 | "doc": "advanced schema for tests", 5 | "namespace": "python.test.advanced", 6 | "fields": [ 7 | { 8 | "name": "number", 9 | "doc": "age", 10 | "type": [ 11 | "long", 12 | "null" 13 | ] 14 | }, 15 | { 16 | "name": "name", 17 | "doc": "a name", 18 | "type": [ 19 | "string" 20 | ] 21 | }, 22 | { 23 | "name": "friends", 24 | "doc": "friends", 25 | "type" : { 26 | "type": "map", 27 | "values" : { 28 | "name": "basicPerson", 29 | "type": "record", 30 | "namespace": "python.test.advanced", 31 | "fields": [ 32 | { 33 | "name": "number", 34 | "doc": "friend age", 35 | "type": [ 36 | "long", 37 | "null" 38 | ] 39 | }, 40 | { 41 | "name": "name", 42 | "doc": "friend name", 43 | "type": [ 44 | "string" 45 | ] 46 | } 47 | ] 48 | } 49 | } 50 | }, 51 | { 52 | "name" : "family", 53 | "doc" : "family", 54 | "type" : { 55 | "namespace" : "python.test.advanced", 56 | "type" : "map", 57 | "values" : "basicPerson" 58 | } 59 | } 60 | ] 61 | } 62 | -------------------------------------------------------------------------------- /test/basic_schema.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "name": "basic", 3 | "type": "record", 4 | "doc": "basic schema for tests", 5 | "namespace": "python.test.basic", 6 | "fields": [ 7 | { 8 | "name": "number", 9 | "doc": "age", 10 | "type": [ 11 | "long", 12 | "null" 13 | ] 14 | }, 15 | { 16 | "name": "name", 17 | "doc": "a name", 18 | "type": [ 19 | "string" 20 | ] 21 | } 22 | ] 23 | } -------------------------------------------------------------------------------- /test/data_gen.py: -------------------------------------------------------------------------------- 1 | import random 2 | import os 3 | import os.path 4 | 5 | NAMES = ['stefan','melanie','nick','darrel','kent','simon'] 6 | AGES = list(range(1,10)) + [None] 7 | 8 | def get_schema_path(fname): 9 | dname = os.path.dirname(os.path.realpath(__file__)) 10 | return os.path.join(dname, fname) 11 | 12 | def load_schema_file(fname): 13 | fname = get_schema_path(fname) 14 | with open(fname) as f: 15 | return f.read() 16 | 17 | BASIC_SCHEMA = load_schema_file('basic_schema.avsc') 18 | 19 | def create_basic_item(i): 20 | return { 21 | 'name' : random.choice(NAMES) + '-' + str(i), 22 | 'number' : random.choice(AGES) 23 | } 24 | 25 | BASIC_ITEMS = map(create_basic_item, range(1,20)) 26 | 27 | ADVANCED_SCHEMA = load_schema_file('adv_schema.avsc') 28 | 29 | def create_adv_item(i): 30 | friends = map(create_basic_item, range(1,3)) 31 | family = map(create_basic_item, range(1,3)) 32 | basic = create_basic_item(i) 33 | basic['family'] = dict(map(lambda bi: (bi['name'],bi), family)) 34 | basic['friends'] = dict(map(lambda bi: (bi['name'],bi), friends)) 35 | return basic 36 | 37 | ADVANCED_ITEMS = map(create_adv_item, range(1, 20)) 38 | 39 | from avro import schema 40 | from avro.datafile import DataFileReader, DataFileWriter 41 | from avro.io import DatumReader, DatumWriter 42 | import json 43 | 44 | def _write_items(base_name, schema_str, items): 45 | avro_schema = schema.parse(schema_str) 46 | avro_file = base_name + '.avro' 47 | with DataFileWriter(open(avro_file, "w"), DatumWriter(), avro_schema) as writer: 48 | for i in items: 49 | writer.append(i) 50 | writer.close 51 | return (avro_file) 52 | 53 | def write_basic_items(base_name): 54 | return _write_items(base_name, BASIC_SCHEMA, BASIC_ITEMS) 55 | 56 | def write_advanced_items(base_name): 57 | return _write_items(base_name, ADVANCED_SCHEMA, ADVANCED_ITEMS) 58 | 59 | def cleanup(files): 60 | for f in files: 61 | try: 62 | os.remove(f) 63 | except OSError: 64 | pass 65 | 66 | if __name__ == "__main__": 67 | write_advanced_items("advanced") 68 | -------------------------------------------------------------------------------- /test/mock_registry.py: -------------------------------------------------------------------------------- 1 | import BaseHTTPServer 2 | import re 3 | from threading import Thread 4 | import setup_test_path 5 | 6 | from confluent.schemaregistry.client import MockSchemaRegistryClient 7 | from confluent.schemaregistry.serializers import Util 8 | import json 9 | 10 | class ReqHandler(BaseHTTPServer.BaseHTTPRequestHandler): 11 | protocol_version = "HTTP/1.0" 12 | def do_GET(self): 13 | self.server._run_routes(self) 14 | 15 | def do_POST(self): 16 | self.server._run_routes(self) 17 | 18 | def log_message(self, format, *args): 19 | pass 20 | 21 | 22 | class MockServer(BaseHTTPServer.HTTPServer, object): 23 | def __init__(self, *args, **kwargs): 24 | super(MockServer, self).__init__(*args, **kwargs) 25 | self.counts = { } 26 | self.registry = MockSchemaRegistryClient() 27 | self.schema_cache = { } 28 | self.all_routes = { 29 | 'GET' : [ 30 | (r"/schemas/ids/(\d+)", 'get_schema_by_id'), 31 | (r"/subjects/(\w+)/versions/latest", 'get_latest') 32 | ], 33 | 'POST' : [ 34 | (r"/subjects/(\w+)/versions", 'register'), 35 | (r"/subjects/(\w+)", 'get_version') 36 | ] 37 | } 38 | 39 | def _send_response(self, resp, status, body): 40 | resp.send_response(status) 41 | resp.send_header("Content-Type","application/json") 42 | resp.end_headers() 43 | resp.wfile.write(json.dumps(body)) 44 | resp.finish() 45 | 46 | def _create_error(self, msg, status=400, err_code=1): 47 | return (status, { 48 | "error_code" : err_code, 49 | "message" : msg 50 | }) 51 | 52 | def _run_routes(self, req): 53 | self.add_count((req.command, req.path)) 54 | routes = self.all_routes.get(req.command, []) 55 | for r in routes: 56 | m = re.match(r[0], req.path) 57 | if m: 58 | func = getattr(self, r[1]) 59 | status,body = func(req, m.groups()) 60 | return self._send_response(req, status, body) 61 | 62 | # here means we got a bad req 63 | status,body = self._create_error("bad path specified") 64 | self._send_response(req, status, body) 65 | 66 | def get_schema_by_id(self, req, groups): 67 | schema_id = int(groups[0]) 68 | schema = self.registry.get_by_id(schema_id) 69 | if not schema: 70 | return self._create_error("schema not found", 404) 71 | result = { 72 | "schema" : json.dumps(schema.to_json()) 73 | } 74 | return (200,result) 75 | 76 | def _get_identity_schema(self, avro_schema): 77 | # normalized 78 | schema_str = json.dumps(avro_schema.to_json()) 79 | if schema_str in self.schema_cache: 80 | return self.schema_cache[schema_str] 81 | self.schema_cache[schema_str] = avro_schema 82 | return avro_schema 83 | 84 | def _get_schema_from_body(self, req): 85 | length = int(req.headers.getheader('content-length')) 86 | data = req.rfile.read(length) 87 | data = json.loads(data) 88 | schema = data.get("schema",None) 89 | if not schema: 90 | return None 91 | try: 92 | avro_schema = Util.parse_schema_from_string(schema) 93 | return self._get_identity_schema(avro_schema) 94 | except: 95 | return None 96 | 97 | def register(self, req, groups): 98 | avro_schema = self._get_schema_from_body(req) 99 | if not avro_schema: 100 | return self._create_error("Invalid avro schema", 422, 42201) 101 | subject = groups[0] 102 | schema_id = self.registry.register(subject, avro_schema) 103 | return (200, {'id' : schema_id }) 104 | 105 | def get_version(self, req, groups): 106 | avro_schema = self._get_schema_from_body(req) 107 | if not avro_schema: 108 | return self._create_error("Invalid avro schema", 422, 42201) 109 | subject = groups[0] 110 | version = self.registry.get_version(subject, avro_schema) 111 | if version == -1: 112 | return self._create_error("Not found", 404) 113 | schema_id = self.registry.get_id_for_schema(subject, avro_schema) 114 | 115 | result = { 116 | "schema" : json.dumps(avro_schema.to_json()), 117 | "subject" : subject, 118 | "id" : schema_id, 119 | "version" : version 120 | } 121 | return (200, result) 122 | 123 | def get_latest(self, req, groups): 124 | subject = groups[0] 125 | schema_id,avro_schema,version = self.registry.get_latest_schema(subject) 126 | if schema_id == None: 127 | return self._create_error("Not found", 404) 128 | result = { 129 | "schema" : json.dumps(avro_schema.to_json()), 130 | "subject" : subject, 131 | "id" : schema_id, 132 | "version" : version 133 | } 134 | return (200, result) 135 | 136 | 137 | def add_count(self, path): 138 | if path not in self.counts: 139 | self.counts[path] = 0 140 | self.counts[path] += 1 141 | 142 | class ServerThread(Thread): 143 | def __init__(self, port): 144 | Thread.__init__(self) 145 | self.server = None 146 | self.port = port 147 | 148 | def run(self): 149 | self.server = MockServer(('127.0.0.1',self.port), ReqHandler) 150 | self.server.serve_forever() 151 | 152 | def shutdown(self): 153 | if self.server: 154 | self.server.shutdown() 155 | self.server.socket.close() 156 | 157 | 158 | if __name__ == '__main__': 159 | s = ServerThread(9001) 160 | s.start() 161 | -------------------------------------------------------------------------------- /test/setup_test_path.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import os.path 4 | 5 | ''' 6 | Hacky way to ensure that we are loading the code that we 7 | intend to test. 8 | ''' 9 | 10 | test_dir = os.path.dirname(os.path.realpath(__file__)) 11 | parent = os.path.join(test_dir, '..') 12 | sys.path.insert(0, parent) 13 | -------------------------------------------------------------------------------- /test/test_cached_client.py: -------------------------------------------------------------------------------- 1 | import unittest2 as unittest 2 | import setup_test_path 3 | import data_gen 4 | import mock_registry 5 | import time 6 | 7 | from confluent.schemaregistry.client import CachedSchemaRegistryClient 8 | from confluent.schemaregistry.serializers import Util 9 | 10 | class TestCacheSchemaRegistryClient(unittest.TestCase): 11 | 12 | def setUp(self): 13 | self.server = mock_registry.ServerThread(9001) 14 | self.server.start() 15 | self.client = CachedSchemaRegistryClient('http://127.0.0.1:9001') 16 | time.sleep(1) 17 | 18 | def tearDown(self): 19 | self.server.shutdown() 20 | self.server.join() 21 | 22 | def test_register(self): 23 | parsed = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA) 24 | client = self.client 25 | schema_id = client.register('test', parsed) 26 | self.assertTrue(schema_id > 0) 27 | self.assertEqual(len(client.id_to_schema), 1) 28 | 29 | def test_multi_subject_register(self): 30 | parsed = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA) 31 | client = self.client 32 | schema_id = client.register('test', parsed) 33 | self.assertTrue(schema_id > 0) 34 | 35 | # register again under different subject 36 | dupe_id = client.register('other', parsed) 37 | self.assertEqual(schema_id, dupe_id) 38 | self.assertEqual(len(client.id_to_schema), 1) 39 | 40 | def test_dupe_register(self): 41 | parsed = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA) 42 | subject = 'test' 43 | client = self.client 44 | schema_id = client.register(subject, parsed) 45 | self.assertTrue(schema_id > 0) 46 | latest = client.get_latest_schema(subject) 47 | 48 | # register again under same subject 49 | dupe_id = client.register(subject, parsed) 50 | self.assertEqual(schema_id, dupe_id) 51 | dupe_latest = client.get_latest_schema(subject) 52 | self.assertEqual(latest, dupe_latest) 53 | 54 | def assertLatest(self, meta_tuple, sid, schema, version): 55 | self.assertNotEqual(sid, -1) 56 | self.assertNotEqual(version, -1) 57 | self.assertEqual(meta_tuple[0], sid) 58 | self.assertEqual(meta_tuple[1], schema) 59 | self.assertEqual(meta_tuple[2], version) 60 | 61 | 62 | def test_getters(self): 63 | parsed = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA) 64 | client = self.client 65 | subject = 'test' 66 | version = client.get_version(subject, parsed) 67 | self.assertEqual(version, -1) 68 | schema = client.get_by_id(1) 69 | self.assertEqual(schema, None) 70 | latest = client.get_latest_schema(subject) 71 | self.assertEqual(latest, (None,None,None)) 72 | 73 | # register 74 | schema_id = client.register(subject, parsed) 75 | latest = client.get_latest_schema(subject) 76 | version = client.get_version(subject, parsed) 77 | self.assertLatest(latest, schema_id, parsed, version) 78 | 79 | fetched = client.get_by_id(schema_id) 80 | self.assertEqual(fetched, parsed) 81 | 82 | def test_multi_register(self): 83 | basic = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA) 84 | adv = Util.parse_schema_from_string(data_gen.ADVANCED_SCHEMA) 85 | subject = 'test' 86 | client = self.client 87 | 88 | id1 = client.register(subject, basic) 89 | latest1 = client.get_latest_schema(subject) 90 | v1 = client.get_version(subject, basic) 91 | self.assertLatest(latest1, id1, basic, v1) 92 | 93 | id2 = client.register(subject, adv) 94 | latest2 = client.get_latest_schema(subject) 95 | v2 = client.get_version(subject, adv) 96 | self.assertLatest(latest2, id2, adv, v2) 97 | 98 | self.assertNotEqual(id1, id2) 99 | self.assertNotEqual(latest1, latest2) 100 | # ensure version is higher 101 | self.assertTrue(latest1[2] < latest2[2]) 102 | 103 | client.register(subject, basic) 104 | latest3 = client.get_latest_schema(subject) 105 | # latest should not change with a re-reg 106 | self.assertEqual(latest2, latest3) 107 | 108 | def suite(): 109 | return unittest.TestLoader().loadTestsFromTestCase(BaseTest) 110 | -------------------------------------------------------------------------------- /test/test_message_serializer.py: -------------------------------------------------------------------------------- 1 | import unittest2 as unittest 2 | import data_gen 3 | import setup_test_path 4 | 5 | import struct 6 | 7 | from avro import schema 8 | from confluent.schemaregistry.serializers import MessageSerializer, Util 9 | from confluent.schemaregistry.client import MockSchemaRegistryClient 10 | 11 | class TestMessageSerializer(unittest.TestCase): 12 | 13 | def setUp(self): 14 | # need to set up the serializer 15 | self.client = MockSchemaRegistryClient() 16 | self.ms = MessageSerializer(self.client) 17 | 18 | def assertMessageIsSame(self, message, expected, schema_id): 19 | self.assertTrue(message) 20 | self.assertTrue(len(message) > 5) 21 | magic,sid = struct.unpack('>bI',message[0:5]) 22 | self.assertEqual(magic, 0) 23 | self.assertEqual(sid, schema_id) 24 | decoded = self.ms.decode_message(message) 25 | self.assertTrue(decoded) 26 | self.assertEqual(decoded, expected) 27 | 28 | def test_encode_with_schema_id(self): 29 | adv = Util.parse_schema_from_string(data_gen.ADVANCED_SCHEMA) 30 | basic = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA) 31 | subject = 'test' 32 | schema_id = self.client.register(subject, basic) 33 | 34 | records = data_gen.BASIC_ITEMS 35 | for record in records: 36 | message = self.ms.encode_record_with_schema_id(schema_id, record) 37 | self.assertMessageIsSame(message, record, schema_id) 38 | 39 | subject = 'test_adv' 40 | adv_schema_id = self.client.register(subject, adv) 41 | self.assertNotEqual(adv_schema_id, schema_id) 42 | records = data_gen.ADVANCED_ITEMS 43 | for record in records: 44 | message = self.ms.encode_record_with_schema_id(adv_schema_id, record) 45 | self.assertMessageIsSame(message, record, adv_schema_id) 46 | 47 | 48 | def test_encode_record_for_topic(self): 49 | topic = 'test' 50 | basic = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA) 51 | subject = 'test-value' 52 | schema_id = self.client.register(subject, basic) 53 | 54 | records = data_gen.BASIC_ITEMS 55 | for record in records: 56 | message = self.ms.encode_record_for_topic(topic, record) 57 | self.assertMessageIsSame(message, record ,schema_id) 58 | 59 | def test_encode_record_with_schema(self): 60 | topic = 'test' 61 | basic = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA) 62 | subject = 'test-value' 63 | schema_id = self.client.register(subject, basic) 64 | records = data_gen.BASIC_ITEMS 65 | for record in records: 66 | message = self.ms.encode_record_with_schema(topic, basic, record) 67 | self.assertMessageIsSame(message, record ,schema_id) 68 | 69 | def suite(): 70 | return unittest.TestLoader().loadTestsFromTestCase(TestMessageSerializer) 71 | -------------------------------------------------------------------------------- /test/test_mock_client.py: -------------------------------------------------------------------------------- 1 | import unittest2 as unittest 2 | import setup_test_path 3 | import data_gen 4 | 5 | from confluent.schemaregistry.client import MockSchemaRegistryClient 6 | from confluent.schemaregistry.serializers import Util 7 | 8 | class TestMockSchemaRegistryClient(unittest.TestCase): 9 | 10 | def setUp(self): 11 | self.client = MockSchemaRegistryClient() 12 | 13 | def test_register(self): 14 | parsed = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA) 15 | client = self.client 16 | schema_id = client.register('test', parsed) 17 | self.assertTrue(schema_id > 0) 18 | self.assertEqual(len(client.id_to_schema), 1) 19 | 20 | def test_multi_subject_register(self): 21 | parsed = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA) 22 | client = self.client 23 | schema_id = client.register('test', parsed) 24 | self.assertTrue(schema_id > 0) 25 | 26 | # register again under different subject 27 | dupe_id = client.register('other', parsed) 28 | self.assertEqual(schema_id, dupe_id) 29 | self.assertEqual(len(client.id_to_schema), 1) 30 | 31 | def test_dupe_register(self): 32 | parsed = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA) 33 | subject = 'test' 34 | client = self.client 35 | schema_id = client.register(subject, parsed) 36 | self.assertTrue(schema_id > 0) 37 | latest = client.get_latest_schema(subject) 38 | 39 | # register again under same subject 40 | dupe_id = client.register(subject, parsed) 41 | self.assertEqual(schema_id, dupe_id) 42 | dupe_latest = client.get_latest_schema(subject) 43 | self.assertEqual(latest, dupe_latest) 44 | 45 | def assertLatest(self, meta_tuple, sid, schema, version): 46 | self.assertNotEqual(sid, -1) 47 | self.assertNotEqual(version, -1) 48 | self.assertEqual(meta_tuple[0], sid) 49 | self.assertEqual(meta_tuple[1], schema) 50 | self.assertEqual(meta_tuple[2], version) 51 | 52 | 53 | def test_getters(self): 54 | parsed = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA) 55 | client = self.client 56 | subject = 'test' 57 | version = client.get_version(subject, parsed) 58 | self.assertEqual(version, -1) 59 | schema = client.get_by_id(1) 60 | self.assertEqual(schema, None) 61 | latest = client.get_latest_schema(subject) 62 | self.assertEqual(latest, (None,None,None)) 63 | 64 | # register 65 | schema_id = client.register(subject, parsed) 66 | latest = client.get_latest_schema(subject) 67 | version = client.get_version(subject, parsed) 68 | self.assertLatest(latest, schema_id, parsed, version) 69 | 70 | fetched = client.get_by_id(schema_id) 71 | self.assertEqual(fetched, parsed) 72 | 73 | def test_multi_register(self): 74 | basic = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA) 75 | adv = Util.parse_schema_from_string(data_gen.ADVANCED_SCHEMA) 76 | subject = 'test' 77 | client = self.client 78 | 79 | id1 = client.register(subject, basic) 80 | latest1 = client.get_latest_schema(subject) 81 | v1 = client.get_version(subject, basic) 82 | self.assertLatest(latest1, id1, basic, v1) 83 | 84 | id2 = client.register(subject, adv) 85 | latest2 = client.get_latest_schema(subject) 86 | v2 = client.get_version(subject, adv) 87 | self.assertLatest(latest2, id2, adv, v2) 88 | 89 | self.assertNotEqual(id1, id2) 90 | self.assertNotEqual(latest1, latest2) 91 | # ensure version is higher 92 | self.assertTrue(latest1[2] < latest2[2]) 93 | 94 | client.register(subject, basic) 95 | latest3 = client.get_latest_schema(subject) 96 | # latest should not change with a re-reg 97 | self.assertEqual(latest2, latest3) 98 | 99 | def suite(): 100 | return unittest.TestLoader().loadTestsFromTestCase(BaseTest) 101 | -------------------------------------------------------------------------------- /test/test_util.py: -------------------------------------------------------------------------------- 1 | import unittest2 as unittest 2 | import data_gen 3 | 4 | from avro import schema 5 | from confluent.schemaregistry.serializers import Util 6 | 7 | class TestUtil(unittest.TestCase): 8 | def test_schema_from_string(self): 9 | parsed = Util.parse_schema_from_string(data_gen.BASIC_SCHEMA) 10 | self.assertTrue(isinstance(parsed, schema.Schema)) 11 | 12 | def test_schema_from_file(self): 13 | parsed = Util.parse_schema_from_file(data_gen.get_schema_path('adv_schema.avsc')) 14 | self.assertTrue(isinstance(parsed, schema.Schema)) 15 | 16 | def suite(): 17 | return unittest.TestLoader().loadTestsFromTestCase(TestUtil) 18 | --------------------------------------------------------------------------------