├── .github └── workflows │ └── gradle.yml ├── .gitignore ├── .java-version ├── .travis.yml ├── LICENSE ├── README.md ├── build.gradle ├── gradle └── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties ├── gradlew ├── gradlew.bat ├── settings.gradle └── src ├── main └── java │ └── me │ └── frmr │ └── kafka │ └── connect │ └── RegistrylessAvroConverter.java └── test ├── java └── me │ └── frmr │ └── kafka │ └── connect │ └── RegistrylessAvroConverterTest.java └── resources ├── data ├── binary │ └── beamer.avro └── json │ └── beamer.json └── schema ├── dog.avsc └── invalid.avsc /.github/workflows/gradle.yml: -------------------------------------------------------------------------------- 1 | name: RAC CI 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | 9 | steps: 10 | - uses: actions/checkout@v1 11 | - name: Set up JDK 1.8 12 | uses: actions/setup-java@v1 13 | with: 14 | java-version: 1.8 15 | - name: Build with Gradle 16 | run: ./gradlew build 17 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | .gradle 3 | -------------------------------------------------------------------------------- /.java-version: -------------------------------------------------------------------------------- 1 | 1.8 2 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: java 2 | 3 | jdk: 4 | - openjdk8 5 | - openjdk11 6 | 7 | branches: 8 | only: 9 | - master 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Registryless Avro Converter 2 | 3 | This is an Avro converter for Kafka Connect that does not depend on Confluent Schema Registry. It 4 | shares much of the same underlying code as Confluent's `AvroConverter`, and should work the same in 5 | practice less any features that deal with the Schema Registry itself. 6 | 7 | We developed this converter at MailChimp to facilitate R&D with Connect and use cases where pushing 8 | the Schema Registry Avro Format through Kafka was not desirable or we couldn't justify the overhead 9 | of a Schema Registry. 10 | 11 | ## Using the Converter 12 | 13 | ### Setup 14 | 15 | 1. You must have **Java 8** as your runtime environment. 16 | 2. **Confluent Platform**: as this plugin relies on various Confluent libraries that are 17 | distributed with CP (e.g. their avro converter, etc). See the chart below for the version matrix. 18 | 3. Configure a `plugin.path` in your connect setup and drop a RegistrylessAvroConverter JAR in that 19 | path so that its picked up with Kafka Connect starts. 20 | 21 | Once you've confirmed that the binary is in place, then in a properties file or JSON connector 22 | configuration you can specify this converter for keys and/or values. 23 | 24 | ### Version Matrix 25 | 26 | | RAC Version | Kafka Version | Confluent Version | 27 | |---------------|---------------|-------------------| 28 | | 1.10.0 | 2.5.0 | 5.5.1 | 29 | | 1.9.0 | 2.4.1 | 5.4.2 | 30 | | 1.8.0 | 2.3.0 | 5.3.0 | 31 | | 1.7.0 | 2.2.0 | 5.2.0 | 32 | | 1.6.0 | 2.1.1 | 5.1.2 | 33 | | 1.5.0 | 2.0.1 | 5.0.3 | 34 | | 1.4.0 | 1.1.1 | 4.1.3 | 35 | | 1.3.0 | 1.0.0 | 4.0.0 | 36 | 37 | ### Configuration 38 | 39 | To use the RegistrylessAvroConverter, simply provide it in the `key.converter` or `value.converter` 40 | setting for your connector. RAC can run with or without an explicit reader or writer schema. If an 41 | explicit schema is not provided, the schema used will be determined at runtime. 42 | 43 | **N.B.** Schemas determined at runtime could vary depending on how your connector is implemented 44 | and how it generates Connect Data Schemas. We recommend understanding the semantics of your 45 | Connectors before using the schemaless configuration for sources. 46 | 47 | Here's an example of how we might define RAC for use with keys and values without an explicit schema 48 | in standalone mode: 49 | 50 | ``` 51 | key.converter=me.frmr.kafka.connect.RegistrylessAvroConverter 52 | value.converter=me.frmr.kafka.connect.RegistrylessAvroConverter 53 | ``` 54 | 55 | And this is how you would define a RAC _with_ an explicit schema: 56 | 57 | ``` 58 | key.converter=me.frmr.kafka.connect.RegistrylessAvroConverter 59 | key.converter.schema.path=/path/to/schema/file.avsc 60 | value.converter=me.frmr.kafka.connect.RegistrylessAvroConverter 61 | value.converter.schema.path=/path/to/schema/file.avsc 62 | ``` 63 | 64 | You can also tune the number of cached schemas we maintain in memory. By default, we store 50 but 65 | you may need to increase that limit if your data structures have a lot of nesting or you're dealing 66 | with a lot of different data structure. You can tune it using the `schema.cache.size` setting: 67 | 68 | ``` 69 | key.converter.schema.cache.size = 100 70 | value.converter.schema.cache.size = 100 71 | ``` 72 | 73 | Unfortunately, the best way to _know_ you need to tune this value right now might be to hook up 74 | YourKit or something similar. 75 | 76 | ## Building the Converter 77 | 78 | This converter uses Gradle. Building the project is as simple as: 79 | 80 | ``` 81 | ./gradlew build 82 | ``` 83 | 84 | ## General Notes 85 | 86 | * This project is a bit weird because it's designed to be run in a Kafka Connect runtime. So 87 | all of the dependencies are `compileOnly` because they're available on the classpath at runtime. 88 | * If you're testing this locally, it's a bit weird in much the same way. You'll need to copy 89 | the JAR into an appropriate plugin path folder (as configured on your Connect worker) so the class 90 | is visible to Kafka Connect for local testing. 91 | 92 | ## Contributing 93 | 94 | Pull requests and issues are welcome! If you think you've spotted a problem or you just have a 95 | question do not hesitate to [open an issue](https://github.com/farmdawgnation/registryless-avro-converter/issues/new). 96 | -------------------------------------------------------------------------------- /build.gradle: -------------------------------------------------------------------------------- 1 | plugins { 2 | id 'java' 3 | } 4 | 5 | version = '1.11.0-SNAPSHOT' 6 | group = 'me.frmr.kafka.connect' 7 | 8 | repositories { 9 | mavenLocal() 10 | mavenCentral() 11 | maven { 12 | url "http://packages.confluent.io/maven/" 13 | } 14 | } 15 | 16 | ext { 17 | // You have to keep these synced - so confluent 4.1.0 builds against kafka connect 1.1.0. 18 | // You'll need to read the release notes for each confluent platform release to figure out 19 | // what version of Kafka it's built against 20 | kafkaVersion = '2.5.0' 21 | cpVersion = '5.5.1' 22 | 23 | junitVersion = '5.4.1' 24 | slf4jApiVersion = '1.7.25' 25 | } 26 | 27 | dependencies { 28 | // These libraries are marked compileOnly. Though they are required at runtime, the reality of 29 | // deploying software inside a Kafka Connect ecosystem is that it could be challenging to avoid 30 | // issues where we end up with multiple versions of the libraries on the classpath once all the 31 | // plugins are loaded. Our code should behave well so long as the version available at runtime 32 | // is binary compatible with the version we built against. 33 | compileOnly "org.apache.kafka:connect-api:$kafkaVersion" 34 | compileOnly "io.confluent:kafka-connect-avro-converter:$cpVersion" 35 | compileOnly "org.slf4j:slf4j-api:$slf4jApiVersion" 36 | 37 | testImplementation "org.apache.kafka:connect-api:$kafkaVersion" 38 | testImplementation "io.confluent:kafka-connect-avro-converter:$cpVersion" 39 | testImplementation 'org.apache.commons:commons-io:1.3.2' 40 | testImplementation "org.slf4j:slf4j-api:$slf4jApiVersion" 41 | testImplementation "org.junit.jupiter:junit-jupiter:$junitVersion" 42 | } 43 | 44 | test { 45 | useJUnitPlatform() 46 | testLogging { 47 | events "passed", "skipped", "failed" 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/farmdawgnation/registryless-avro-converter/0a1cd28719f51eee2323fcd355d78ef7b77eb73a/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | distributionUrl=https\://services.gradle.org/distributions/gradle-6.1-bin.zip 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | -------------------------------------------------------------------------------- /gradlew: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | # 4 | # Copyright 2015 the original author or authors. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # https://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | ############################################################################## 20 | ## 21 | ## Gradle start up script for UN*X 22 | ## 23 | ############################################################################## 24 | 25 | # Attempt to set APP_HOME 26 | # Resolve links: $0 may be a link 27 | PRG="$0" 28 | # Need this for relative symlinks. 29 | while [ -h "$PRG" ] ; do 30 | ls=`ls -ld "$PRG"` 31 | link=`expr "$ls" : '.*-> \(.*\)$'` 32 | if expr "$link" : '/.*' > /dev/null; then 33 | PRG="$link" 34 | else 35 | PRG=`dirname "$PRG"`"/$link" 36 | fi 37 | done 38 | SAVED="`pwd`" 39 | cd "`dirname \"$PRG\"`/" >/dev/null 40 | APP_HOME="`pwd -P`" 41 | cd "$SAVED" >/dev/null 42 | 43 | APP_NAME="Gradle" 44 | APP_BASE_NAME=`basename "$0"` 45 | 46 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 47 | DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' 48 | 49 | # Use the maximum available, or set MAX_FD != -1 to use that value. 50 | MAX_FD="maximum" 51 | 52 | warn () { 53 | echo "$*" 54 | } 55 | 56 | die () { 57 | echo 58 | echo "$*" 59 | echo 60 | exit 1 61 | } 62 | 63 | # OS specific support (must be 'true' or 'false'). 64 | cygwin=false 65 | msys=false 66 | darwin=false 67 | nonstop=false 68 | case "`uname`" in 69 | CYGWIN* ) 70 | cygwin=true 71 | ;; 72 | Darwin* ) 73 | darwin=true 74 | ;; 75 | MINGW* ) 76 | msys=true 77 | ;; 78 | NONSTOP* ) 79 | nonstop=true 80 | ;; 81 | esac 82 | 83 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar 84 | 85 | # Determine the Java command to use to start the JVM. 86 | if [ -n "$JAVA_HOME" ] ; then 87 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 88 | # IBM's JDK on AIX uses strange locations for the executables 89 | JAVACMD="$JAVA_HOME/jre/sh/java" 90 | else 91 | JAVACMD="$JAVA_HOME/bin/java" 92 | fi 93 | if [ ! -x "$JAVACMD" ] ; then 94 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME 95 | 96 | Please set the JAVA_HOME variable in your environment to match the 97 | location of your Java installation." 98 | fi 99 | else 100 | JAVACMD="java" 101 | which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 102 | 103 | Please set the JAVA_HOME variable in your environment to match the 104 | location of your Java installation." 105 | fi 106 | 107 | # Increase the maximum file descriptors if we can. 108 | if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then 109 | MAX_FD_LIMIT=`ulimit -H -n` 110 | if [ $? -eq 0 ] ; then 111 | if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then 112 | MAX_FD="$MAX_FD_LIMIT" 113 | fi 114 | ulimit -n $MAX_FD 115 | if [ $? -ne 0 ] ; then 116 | warn "Could not set maximum file descriptor limit: $MAX_FD" 117 | fi 118 | else 119 | warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" 120 | fi 121 | fi 122 | 123 | # For Darwin, add options to specify how the application appears in the dock 124 | if $darwin; then 125 | GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" 126 | fi 127 | 128 | # For Cygwin or MSYS, switch paths to Windows format before running java 129 | if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then 130 | APP_HOME=`cygpath --path --mixed "$APP_HOME"` 131 | CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` 132 | JAVACMD=`cygpath --unix "$JAVACMD"` 133 | 134 | # We build the pattern for arguments to be converted via cygpath 135 | ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` 136 | SEP="" 137 | for dir in $ROOTDIRSRAW ; do 138 | ROOTDIRS="$ROOTDIRS$SEP$dir" 139 | SEP="|" 140 | done 141 | OURCYGPATTERN="(^($ROOTDIRS))" 142 | # Add a user-defined pattern to the cygpath arguments 143 | if [ "$GRADLE_CYGPATTERN" != "" ] ; then 144 | OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" 145 | fi 146 | # Now convert the arguments - kludge to limit ourselves to /bin/sh 147 | i=0 148 | for arg in "$@" ; do 149 | CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` 150 | CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option 151 | 152 | if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition 153 | eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` 154 | else 155 | eval `echo args$i`="\"$arg\"" 156 | fi 157 | i=$((i+1)) 158 | done 159 | case $i in 160 | (0) set -- ;; 161 | (1) set -- "$args0" ;; 162 | (2) set -- "$args0" "$args1" ;; 163 | (3) set -- "$args0" "$args1" "$args2" ;; 164 | (4) set -- "$args0" "$args1" "$args2" "$args3" ;; 165 | (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; 166 | (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; 167 | (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; 168 | (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; 169 | (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; 170 | esac 171 | fi 172 | 173 | # Escape application args 174 | save () { 175 | for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done 176 | echo " " 177 | } 178 | APP_ARGS=$(save "$@") 179 | 180 | # Collect all arguments for the java command, following the shell quoting and substitution rules 181 | eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" 182 | 183 | # by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong 184 | if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then 185 | cd "$(dirname "$0")" 186 | fi 187 | 188 | exec "$JAVACMD" "$@" 189 | -------------------------------------------------------------------------------- /gradlew.bat: -------------------------------------------------------------------------------- 1 | @rem 2 | @rem Copyright 2015 the original author or authors. 3 | @rem 4 | @rem Licensed under the Apache License, Version 2.0 (the "License"); 5 | @rem you may not use this file except in compliance with the License. 6 | @rem You may obtain a copy of the License at 7 | @rem 8 | @rem https://www.apache.org/licenses/LICENSE-2.0 9 | @rem 10 | @rem Unless required by applicable law or agreed to in writing, software 11 | @rem distributed under the License is distributed on an "AS IS" BASIS, 12 | @rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | @rem See the License for the specific language governing permissions and 14 | @rem limitations under the License. 15 | @rem 16 | 17 | @if "%DEBUG%" == "" @echo off 18 | @rem ########################################################################## 19 | @rem 20 | @rem Gradle startup script for Windows 21 | @rem 22 | @rem ########################################################################## 23 | 24 | @rem Set local scope for the variables with windows NT shell 25 | if "%OS%"=="Windows_NT" setlocal 26 | 27 | set DIRNAME=%~dp0 28 | if "%DIRNAME%" == "" set DIRNAME=. 29 | set APP_BASE_NAME=%~n0 30 | set APP_HOME=%DIRNAME% 31 | 32 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 33 | set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" 34 | 35 | @rem Find java.exe 36 | if defined JAVA_HOME goto findJavaFromJavaHome 37 | 38 | set JAVA_EXE=java.exe 39 | %JAVA_EXE% -version >NUL 2>&1 40 | if "%ERRORLEVEL%" == "0" goto init 41 | 42 | echo. 43 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 44 | echo. 45 | echo Please set the JAVA_HOME variable in your environment to match the 46 | echo location of your Java installation. 47 | 48 | goto fail 49 | 50 | :findJavaFromJavaHome 51 | set JAVA_HOME=%JAVA_HOME:"=% 52 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 53 | 54 | if exist "%JAVA_EXE%" goto init 55 | 56 | echo. 57 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 58 | echo. 59 | echo Please set the JAVA_HOME variable in your environment to match the 60 | echo location of your Java installation. 61 | 62 | goto fail 63 | 64 | :init 65 | @rem Get command-line arguments, handling Windows variants 66 | 67 | if not "%OS%" == "Windows_NT" goto win9xME_args 68 | 69 | :win9xME_args 70 | @rem Slurp the command line arguments. 71 | set CMD_LINE_ARGS= 72 | set _SKIP=2 73 | 74 | :win9xME_args_slurp 75 | if "x%~1" == "x" goto execute 76 | 77 | set CMD_LINE_ARGS=%* 78 | 79 | :execute 80 | @rem Setup the command line 81 | 82 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 83 | 84 | @rem Execute Gradle 85 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% 86 | 87 | :end 88 | @rem End local scope for the variables with windows NT shell 89 | if "%ERRORLEVEL%"=="0" goto mainEnd 90 | 91 | :fail 92 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 93 | rem the _cmd.exe /c_ return code! 94 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 95 | exit /b 1 96 | 97 | :mainEnd 98 | if "%OS%"=="Windows_NT" endlocal 99 | 100 | :omega 101 | -------------------------------------------------------------------------------- /settings.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * This file was generated by the Gradle 'init' task. 3 | * 4 | * The settings file is used to specify which projects to include in your build. 5 | * 6 | * Detailed information about configuring a multi-project build in Gradle can be found 7 | * in the user guide at https://docs.gradle.org/4.5/userguide/multi_project_builds.html 8 | */ 9 | 10 | rootProject.name = 'registryless-avro-converter' 11 | -------------------------------------------------------------------------------- /src/main/java/me/frmr/kafka/connect/RegistrylessAvroConverter.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2018 Matt Farmer (github.com/farmdawgnation) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package me.frmr.kafka.connect; 17 | 18 | import io.confluent.connect.avro.AvroData; 19 | import java.io.File; 20 | import java.io.IOException; 21 | import java.io.ByteArrayOutputStream; 22 | import java.util.Map; 23 | import org.apache.avro.SchemaParseException; 24 | import org.apache.avro.io.DatumReader; 25 | import org.apache.avro.io.DatumWriter; 26 | import org.apache.avro.generic.GenericDatumReader; 27 | import org.apache.avro.generic.GenericDatumWriter; 28 | import org.apache.avro.generic.GenericRecord; 29 | import org.apache.avro.file.CodecFactory; 30 | import org.apache.avro.file.DataFileReader; 31 | import org.apache.avro.file.DataFileWriter; 32 | import org.apache.avro.file.SeekableByteArrayInput; 33 | import org.apache.kafka.connect.data.Schema; 34 | import org.apache.kafka.connect.data.SchemaAndValue; 35 | import org.apache.kafka.connect.errors.DataException; 36 | import org.apache.kafka.connect.storage.Converter; 37 | import org.slf4j.Logger; 38 | import org.slf4j.LoggerFactory; 39 | 40 | /** 41 | * Implementation of Converter that uses Avro schemas and objects without 42 | * using an external schema registry. Requires that a `schema.path` configuration 43 | * option is provided that tells the converter where to find its Avro schema. 44 | */ 45 | public class RegistrylessAvroConverter implements Converter { 46 | private static Logger logger = LoggerFactory.getLogger(RegistrylessAvroConverter.class); 47 | 48 | /** 49 | * The default schema cache size. We pick 50 so that there's room in the cache for some recurring 50 | * nested types in a complex schema. 51 | */ 52 | private Integer schemaCacheSize = 50; 53 | 54 | private org.apache.avro.Schema avroSchema = null; 55 | private Schema connectSchema = null; 56 | private AvroData avroDataHelper = null; 57 | 58 | @Override 59 | public void configure(Map configs, boolean isKey) { 60 | if (configs.get("schema.cache.size") instanceof Integer) { 61 | schemaCacheSize = (Integer) configs.get("schema.cache.size"); 62 | } 63 | 64 | avroDataHelper = new AvroData(schemaCacheSize); 65 | 66 | if (configs.get("schema.path") instanceof String) { 67 | String avroSchemaPath = (String) configs.get("schema.path"); 68 | org.apache.avro.Schema.Parser parser = new org.apache.avro.Schema.Parser(); 69 | 70 | File avroSchemaFile = null; 71 | try { 72 | avroSchemaFile = new File(avroSchemaPath); 73 | avroSchema = parser.parse(avroSchemaFile); 74 | connectSchema = avroDataHelper.toConnectSchema(avroSchema); 75 | } catch (SchemaParseException spe) { 76 | throw new IllegalStateException("Unable to parse Avro schema when starting RegistrylessAvroConverter", spe); 77 | } catch (IOException ioe) { 78 | throw new IllegalStateException("Unable to parse Avro schema when starting RegistrylessAvroConverter", ioe); 79 | } 80 | } 81 | } 82 | 83 | @Override 84 | public byte[] fromConnectData(String topic, Schema schema, Object value) { 85 | DatumWriter datumWriter; 86 | if (avroSchema != null) { 87 | datumWriter = new GenericDatumWriter(avroSchema); 88 | } else { 89 | datumWriter = new GenericDatumWriter(); 90 | } 91 | GenericRecord avroInstance = (GenericRecord)avroDataHelper.fromConnectData(schema, value); 92 | 93 | try ( 94 | ByteArrayOutputStream baos = new ByteArrayOutputStream(); 95 | DataFileWriter dataFileWriter = new DataFileWriter(datumWriter); 96 | ) { 97 | dataFileWriter.setCodec(CodecFactory.nullCodec()); 98 | 99 | if (avroSchema != null) { 100 | dataFileWriter.create(avroSchema, baos); 101 | } else { 102 | dataFileWriter.create(avroInstance.getSchema(), baos); 103 | } 104 | 105 | dataFileWriter.append(avroInstance); 106 | dataFileWriter.flush(); 107 | 108 | return baos.toByteArray(); 109 | } catch (IOException ioe) { 110 | throw new DataException("Error serializing Avro", ioe); 111 | } 112 | } 113 | 114 | @Override 115 | public SchemaAndValue toConnectData(String topic, byte[] value) { 116 | DatumReader datumReader; 117 | if (avroSchema != null) { 118 | datumReader = new GenericDatumReader<>(avroSchema); 119 | } else { 120 | datumReader = new GenericDatumReader<>(); 121 | } 122 | GenericRecord instance = null; 123 | 124 | try ( 125 | SeekableByteArrayInput sbai = new SeekableByteArrayInput(value); 126 | DataFileReader dataFileReader = new DataFileReader<>(sbai, datumReader); 127 | ) { 128 | instance = dataFileReader.next(instance); 129 | if (instance == null) { 130 | logger.warn("Instance was null"); 131 | } 132 | 133 | if (avroSchema != null) { 134 | return avroDataHelper.toConnectData(avroSchema, instance); 135 | } else { 136 | return avroDataHelper.toConnectData(instance.getSchema(), instance); 137 | } 138 | } catch (IOException ioe) { 139 | throw new DataException("Failed to deserialize Avro data from topic %s :".format(topic), ioe); 140 | } 141 | } 142 | } 143 | -------------------------------------------------------------------------------- /src/test/java/me/frmr/kafka/connect/RegistrylessAvroConverterTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2018 Matt Farmer (github.com/farmdawgnation) 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package me.frmr.kafka.connect; 17 | 18 | import static org.junit.jupiter.api.Assertions.assertEquals; 19 | import static org.junit.jupiter.api.Assertions.assertThrows; 20 | 21 | import org.apache.avro.io.DatumReader; 22 | import org.apache.avro.generic.GenericDatumReader; 23 | import org.apache.avro.generic.GenericRecord; 24 | import org.apache.avro.file.DataFileReader; 25 | import org.apache.avro.file.SeekableByteArrayInput; 26 | import org.apache.commons.io.IOUtils; 27 | import org.apache.commons.io.FileUtils; 28 | import org.apache.kafka.connect.data.*; 29 | import org.junit.jupiter.api.Test; 30 | import java.io.File; 31 | import java.io.InputStream; 32 | import java.io.IOException; 33 | import java.util.Arrays; 34 | import java.util.HashMap; 35 | import java.util.Map; 36 | 37 | class RegistrylessAvroConverterTest { 38 | @Test 39 | void configureWorksOnParsableSchema() { 40 | // This only has to work in the project directory because this is a test. I'm not particularly 41 | // concerned if it works when the tests are packaged in JAR form right now. If we start doing 42 | // that then we'll do something clever-er. 43 | String validSchemaPath = new File("src/test/resources/schema/dog.avsc").getAbsolutePath(); 44 | 45 | RegistrylessAvroConverter sut = new RegistrylessAvroConverter(); 46 | Map settings = new HashMap(); 47 | settings.put("schema.path", validSchemaPath); 48 | 49 | sut.configure(settings, false); 50 | } 51 | 52 | @Test 53 | void configureThrowsOnInvalidSchema() { 54 | // This only has to work in the project directory because this is a test. I'm not particularly 55 | // concerned if it works when the tests are packaged in JAR form right now. If we start doing 56 | // that then we'll do something clever-er. 57 | String invalidSchemaPath = new File("src/test/resources/schema/invalid.avsc").getAbsolutePath(); 58 | 59 | RegistrylessAvroConverter sut = new RegistrylessAvroConverter(); 60 | Map settings = new HashMap(); 61 | settings.put("schema.path", invalidSchemaPath); 62 | 63 | Throwable resultingException = assertThrows(IllegalStateException.class, () -> sut.configure(settings, false)); 64 | assertEquals("Unable to parse Avro schema when starting RegistrylessAvroConverter", resultingException.getMessage()); 65 | } 66 | 67 | 68 | @Test 69 | void fromConnectDataWorksWithWriterSchema() throws Exception { 70 | // This only has to work in the project directory because this is a test. I'm not particularly 71 | // concerned if it works when the tests are packaged in JAR form right now. If we start doing 72 | // that then we'll do something clever-er. 73 | String validSchemaPath = new File("src/test/resources/schema/dog.avsc").getAbsolutePath(); 74 | 75 | RegistrylessAvroConverter sut = new RegistrylessAvroConverter(); 76 | Map settings = new HashMap(); 77 | settings.put("schema.path", validSchemaPath); 78 | sut.configure(settings, false); 79 | 80 | Schema dogSchema = SchemaBuilder.struct() 81 | .name("dog") 82 | .field("name", Schema.STRING_SCHEMA) 83 | .field("breed", Schema.STRING_SCHEMA) 84 | .build(); 85 | 86 | Struct dogStruct = new Struct(dogSchema) 87 | .put("name", "Beamer") 88 | .put("breed", "Boarder Collie"); 89 | 90 | byte[] result = sut.fromConnectData("test_topic", dogSchema, dogStruct); 91 | 92 | // This is a bit annoying but because of the way avro works - the resulting byte array isn't 93 | // deterministic - so we need to read it back using the avro tools. 94 | DatumReader datumReader = new GenericDatumReader<>(); 95 | GenericRecord instance = null; 96 | try ( 97 | SeekableByteArrayInput sbai = new SeekableByteArrayInput(result); 98 | DataFileReader dataFileReader = new DataFileReader<>(sbai, datumReader); 99 | ) { 100 | instance = dataFileReader.next(); 101 | 102 | assertEquals("Beamer", instance.get("name").toString()); 103 | assertEquals("Boarder Collie", instance.get("breed").toString()); 104 | } catch (IOException ioe) { 105 | throw new Exception("Failed to deserialize Avro data", ioe); 106 | } 107 | } 108 | 109 | @Test 110 | void fromConnectDataWorksWithoutWriterSchema() throws Exception { 111 | RegistrylessAvroConverter sut = new RegistrylessAvroConverter(); 112 | Map settings = new HashMap(); 113 | sut.configure(settings, false); 114 | 115 | Schema dogSchema = SchemaBuilder.struct() 116 | .name("dog") 117 | .field("name", Schema.STRING_SCHEMA) 118 | .field("breed", Schema.STRING_SCHEMA) 119 | .build(); 120 | 121 | Struct dogStruct = new Struct(dogSchema) 122 | .put("name", "Beamer") 123 | .put("breed", "Boarder Collie"); 124 | 125 | byte[] result = sut.fromConnectData("test_topic", dogSchema, dogStruct); 126 | 127 | // This is a bit annoying but because of the way avro works - the resulting byte array isn't 128 | // deterministic - so we need to read it back using the avro tools. 129 | DatumReader datumReader = new GenericDatumReader<>(); 130 | GenericRecord instance = null; 131 | try ( 132 | SeekableByteArrayInput sbai = new SeekableByteArrayInput(result); 133 | DataFileReader dataFileReader = new DataFileReader<>(sbai, datumReader); 134 | ) { 135 | instance = dataFileReader.next(); 136 | 137 | assertEquals("Beamer", instance.get("name").toString()); 138 | assertEquals("Boarder Collie", instance.get("breed").toString()); 139 | } catch (IOException ioe) { 140 | throw new Exception("Failed to deserialize Avro data", ioe); 141 | } 142 | } 143 | 144 | @Test 145 | void toConnectDataWorksWithReaderSchema() throws IOException { 146 | InputStream dogDataStream = this.getClass().getClassLoader().getResourceAsStream("data/binary/beamer.avro"); 147 | byte[] dogData = IOUtils.toByteArray(dogDataStream); 148 | 149 | // This only has to work in the project directory because this is a test. I'm not particularly 150 | // concerned if it works when the tests are packaged in JAR form right now. If we start doing 151 | // that then we'll do something clever-er. 152 | String validSchemaPath = new File("src/test/resources/schema/dog.avsc").getAbsolutePath(); 153 | 154 | RegistrylessAvroConverter sut = new RegistrylessAvroConverter(); 155 | Map settings = new HashMap(); 156 | settings.put("schema.path", validSchemaPath); 157 | sut.configure(settings, false); 158 | 159 | SchemaAndValue sav = sut.toConnectData("test_topic", dogData); 160 | 161 | Schema dogSchema = sav.schema(); 162 | assertEquals(Schema.Type.STRUCT, dogSchema.type()); 163 | assertEquals(Schema.Type.STRING, dogSchema.field("name").schema().type()); 164 | assertEquals(Schema.Type.STRING, dogSchema.field("breed").schema().type()); 165 | 166 | Struct dogStruct = (Struct)sav.value(); 167 | assertEquals("Beamer", dogStruct.getString("name")); 168 | assertEquals("Border Collie", dogStruct.getString("breed")); 169 | } 170 | 171 | @Test 172 | void toConnectDataWorksWithoutReaderSchema() throws IOException { 173 | InputStream dogDataStream = this.getClass().getClassLoader().getResourceAsStream("data/binary/beamer.avro"); 174 | byte[] dogData = IOUtils.toByteArray(dogDataStream); 175 | 176 | RegistrylessAvroConverter sut = new RegistrylessAvroConverter(); 177 | Map settings = new HashMap(); 178 | sut.configure(settings, false); 179 | 180 | SchemaAndValue sav = sut.toConnectData("test_topic", dogData); 181 | 182 | Schema dogSchema = sav.schema(); 183 | assertEquals(Schema.Type.STRUCT, dogSchema.type()); 184 | assertEquals(Schema.Type.STRING, dogSchema.field("name").schema().type()); 185 | assertEquals(Schema.Type.STRING, dogSchema.field("breed").schema().type()); 186 | 187 | Struct dogStruct = (Struct)sav.value(); 188 | assertEquals("Beamer", dogStruct.getString("name")); 189 | assertEquals("Border Collie", dogStruct.getString("breed")); 190 | } 191 | } 192 | -------------------------------------------------------------------------------- /src/test/resources/data/binary/beamer.avro: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/farmdawgnation/registryless-avro-converter/0a1cd28719f51eee2323fcd355d78ef7b77eb73a/src/test/resources/data/binary/beamer.avro -------------------------------------------------------------------------------- /src/test/resources/data/json/beamer.json: -------------------------------------------------------------------------------- 1 | {"name": "Beamer", "breed": "Border Collie"} 2 | -------------------------------------------------------------------------------- /src/test/resources/schema/dog.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "type": "record", 3 | "name": "dog", 4 | "fields": [ 5 | {"name": "name", "type": "string"}, 6 | {"name": "breed", "type": "string"} 7 | ] 8 | } 9 | -------------------------------------------------------------------------------- /src/test/resources/schema/invalid.avsc: -------------------------------------------------------------------------------- 1 | yo what's up i'm an invalid schema 2 | --------------------------------------------------------------------------------