├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── build.gradle ├── gradle.properties ├── gradle └── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties ├── gradlew ├── gradlew.bat ├── settings.gradle └── src ├── main └── scala │ └── com │ └── landoop │ └── avro │ └── sql │ ├── AvroFieldValueGetter.scala │ ├── AvroSchemaSql.scala │ ├── AvroSql.scala │ └── AvroUnpacker.scala └── test └── scala └── com └── landoop └── sql └── avro ├── AvroSqlTest.scala ├── AvroSqlWithRetainStructureTest.scala ├── Person.scala └── Pizza.scala /.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | .gradle/ 3 | /.gradle/ 4 | /.idea/ 5 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: scala 2 | scala: 3 | - 2.11.8 4 | 5 | jdk: 6 | - oraclejdk8 7 | 8 | # sudo: true 9 | 10 | # Enable if you want to use gradlew 11 | before_install: 12 | - chmod +x gradlew 13 | 14 | # If you omit install, travis will always run gradle assemble 15 | install: echo "skip 'gradle assembly'" 16 | 17 | script: 18 | - ./gradlew clean build 19 | 20 | cache: 21 | directories: 22 | - $HOME/.gradle/caches/ 23 | - $HOME/.gradle/wrapper/ 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://travis-ci.org/Landoop/avro-sql.svg?branch=master)](https://travis-ci.org/Landoop/avro-sql) 2 | [![GitHub license](https://img.shields.io/github/license/Landoop/avro-sql.svg)]() 3 | # Avro-Sql 4 | 5 | This is a library allowing to transform the shape of an Avro record using SQL. 6 | It relies on **Apache Calcite** for the SQL parsing. 7 | 8 | ```scala 9 | import AvroSql._ 10 | val record: GenericRecord = {...} 11 | record.scql("SELECT name, address.street.name as streetName") 12 | ``` 13 | As simple as that! 14 | 15 | Let's say we have the following Avro Schema: 16 | 17 | ```json 18 | { 19 | "type": "record", 20 | "name": "Pizza", 21 | "namespace": "com.landoop.sql.avro", 22 | "fields": [ 23 | { 24 | "name": "ingredients", 25 | "type": { 26 | "type": "array", 27 | "items": { 28 | "type": "record", 29 | "name": "Ingredient", 30 | "fields": [ 31 | { 32 | "name": "name", 33 | "type": "string" 34 | }, 35 | { 36 | "name": "sugar", 37 | "type": "double" 38 | }, 39 | { 40 | "name": "fat", 41 | "type": "double" 42 | } 43 | ] 44 | } 45 | } 46 | }, 47 | { 48 | "name": "vegetarian", 49 | "type": "boolean" 50 | }, 51 | { 52 | "name": "vegan", 53 | "type": "boolean" 54 | }, 55 | { 56 | "name": "calories", 57 | "type": "int" 58 | }, 59 | { 60 | "name": "fieldName", 61 | "type": "string" 62 | } 63 | ] 64 | } 65 | ``` 66 | 67 | using the library one can apply to types of queries: 68 | * to flatten it 69 | * to retain the structure while cherry-picking and/or rename fields 70 | The difference between the two is marked by the **_withstructure_*** keyword. 71 | If this is missing you will end up flattening the structure. 72 | 73 | 74 | Let's take a look at the flatten first. There are cases when you are receiving a nested 75 | avro structure and you want to flatten the structure while being able to cherry pick the fields and rename them. 76 | Imagine we have the following Avro schema: 77 | ``` 78 | { 79 | "type": "record", 80 | "name": "Person", 81 | "namespace": "com.landoop.sql.avro", 82 | "fields": [ 83 | { 84 | "name": "name", 85 | "type": "string" 86 | }, 87 | { 88 | "name": "address", 89 | "type": { 90 | "type": "record", 91 | "name": "Address", 92 | "fields": [ 93 | { 94 | "name": "street", 95 | "type": { 96 | "type": "record", 97 | "name": "Street", 98 | "fields": [ 99 | { 100 | "name": "name", 101 | "type": "string" 102 | } 103 | ] 104 | } 105 | }, 106 | { 107 | "name": "street2", 108 | "type": [ 109 | "null", 110 | "Street" 111 | ] 112 | }, 113 | { 114 | "name": "city", 115 | "type": "string" 116 | }, 117 | { 118 | "name": "state", 119 | "type": "string" 120 | }, 121 | { 122 | "name": "zip", 123 | "type": "string" 124 | }, 125 | { 126 | "name": "country", 127 | "type": "string" 128 | } 129 | ] 130 | } 131 | } 132 | ] 133 | } 134 | ``` 135 | Applying this SQL like syntax 136 | ``` 137 | SELECT 138 | name, 139 | address.street.*, 140 | address.street2.name as streetName2 141 | FROM topic 142 | ``` 143 | the projected new schema is: 144 | ``` 145 | { 146 | "type": "record", 147 | "name": "Person", 148 | "namespace": "com.landoop.sql.avro", 149 | "fields": [ 150 | { 151 | "name": "name", 152 | "type": "string" 153 | }, 154 | { 155 | "name": "name_1", 156 | "type": "string" 157 | }, 158 | { 159 | "name": "streetName2", 160 | "type": "string" 161 | } 162 | ] 163 | } 164 | ``` 165 | 166 | There are scenarios where you might want to rename fields and maybe reorder them. 167 | By applying this SQL like syntax on the Pizza schema 168 | 169 | ``` 170 | SELECT 171 | name, 172 | ingredients.name as fieldName, 173 | ingredients.sugar as fieldSugar, 174 | ingredients.*, 175 | calories as cals 176 | withstructure 177 | ``` 178 | we end up projecting the first structure into this one: 179 | 180 | ```json 181 | { 182 | "type": "record", 183 | "name": "Pizza", 184 | "namespace": "com.landoop.sql.avro", 185 | "fields": [ 186 | { 187 | "name": "name", 188 | "type": "string" 189 | }, 190 | { 191 | "name": "ingredients", 192 | "type": { 193 | "type": "array", 194 | "items": { 195 | "type": "record", 196 | "name": "Ingredient", 197 | "fields": [ 198 | { 199 | "name": "fieldName", 200 | "type": "string" 201 | }, 202 | { 203 | "name": "fieldSugar", 204 | "type": "double" 205 | }, 206 | { 207 | "name": "fat", 208 | "type": "double" 209 | } 210 | ] 211 | } 212 | } 213 | }, 214 | { 215 | "name": "cals", 216 | "type": "int" 217 | } 218 | ] 219 | } 220 | ``` 221 | 222 | ## Flatten rules 223 | * you can't flatten a schema containing array fields 224 | * when flattening and the column name has already been used it will get a index appended. For example if field *name* appears twice and you don't specifically 225 | rename the second instance (*name as renamedName*) the new schema will end up containing: *name* and *name_1* 226 | 227 | ## How to use it 228 | 229 | ```scala 230 | import AvroSql._ 231 | val record: GenericRecord = {...} 232 | record.scql("SELECT name, address.street.name as streetName") 233 | ``` 234 | As simple as that! 235 | 236 | ## Query Examples 237 | You can find more examples in the unit tests, however here are a few used: 238 | * flattening 239 | ``` 240 | //rename and only pick fields on first level 241 | SELECT calories as C ,vegan as V ,name as fieldName FROM topic 242 | 243 | //Cherry pick fields on different levels in the structure 244 | SELECT name, address.street.name as streetName FROM topic 245 | 246 | //Select and rename fields on nested level 247 | SELECT name, address.street.*, address.street2.name as streetName2 FROM topic 248 | ``` 249 | * retaining the structure 250 | ``` 251 | //you can select itself - obviousely no real gain on this 252 | SELECT * FROM topic withstructure 253 | 254 | //rename a field 255 | SELECT *, name as fieldName FROM topic withstructure 256 | 257 | //rename a complex field 258 | SELECT *, ingredients as stuff FROM topic withstructure 259 | 260 | //select a single field 261 | SELECT vegan FROM topic withstructure 262 | 263 | //rename and only select nested fields 264 | SELECT ingredients.name as fieldName, ingredients.sugar as fieldSugar, ingredients.* FROM topic withstructure 265 | 266 | 267 | ``` 268 | 269 | ## Release Notes 270 | 271 | 272 | **0.1 (2017-05-03)** 273 | 274 | * first release 275 | 276 | ### Building 277 | 278 | ***Requires gradle 3.4.1 to build.*** 279 | 280 | To build 281 | 282 | ```bash 283 | gradle compile 284 | ``` 285 | 286 | To test 287 | 288 | ```bash 289 | gradle test 290 | ``` 291 | 292 | 293 | You can also use the gradle wrapper 294 | 295 | ``` 296 | ./gradlew build 297 | ``` 298 | 299 | To view dependency trees 300 | 301 | ``` 302 | gradle dependencies #  303 | ``` -------------------------------------------------------------------------------- /build.gradle: -------------------------------------------------------------------------------- 1 | buildscript { 2 | repositories { 3 | jcenter() 4 | maven { 5 | url 'https://plugins.gradle.org/m2/' 6 | } 7 | } 8 | dependencies { 9 | classpath 'com.github.maiflai:gradle-scalatest:0.18' 10 | classpath 'io.codearte.gradle.nexus:gradle-nexus-staging-plugin:0.11.0' 11 | classpath 'net.researchgate:gradle-release:2.6.0' 12 | } 13 | } 14 | 15 | apply plugin: 'signing' 16 | apply plugin: 'io.codearte.nexus-staging' 17 | apply plugin: 'net.researchgate.release' 18 | 19 | allprojects { 20 | group = 'com.landoop' 21 | version = version 22 | description = "Allows to run SQL over an Avro record to morph its structure" 23 | 24 | apply plugin: 'scala' 25 | apply plugin: 'maven' 26 | apply plugin: 'com.github.maiflai.scalatest' 27 | sourceCompatibility = 1.8 28 | targetCompatibility = 1.8 29 | 30 | ext { 31 | scalaMajorVersion = '2.11' 32 | scala = '2.11.11' 33 | scalaCheck = '1.11.1' 34 | scalaTest = '2.2.6' 35 | junitVersion = '4.12' 36 | calciteVersion = "1.12.0" 37 | kafkaVersion = '1.0.0' 38 | confluentVersion = '4.0.0' 39 | scalaLoggingVersion = '3.5.0' 40 | mockitoVersion = '2.7.13' 41 | avroVersion = '1.8.1' 42 | avro4sVersion = "1.6.2" 43 | slf4jVersion = "1.7.7" 44 | sqlCore="1.0" 45 | } 46 | 47 | repositories { 48 | mavenLocal() 49 | mavenCentral() 50 | maven { url "http://packages.confluent.io/maven/" } 51 | maven { url "http://repo.typesafe.com/typesafe/releases/" } 52 | } 53 | 54 | configurations { 55 | provided 56 | compile.extendsFrom provided 57 | } 58 | 59 | dependencies { 60 | compile "org.scala-lang:scala-library:$scala" 61 | compile "com.typesafe.scala-logging:scala-logging_$scalaMajorVersion:$scalaLoggingVersion" 62 | 63 | compile ("com.landoop:sql-core_$scalaMajorVersion:$sqlCore") 64 | provided "org.apache.avro:avro:$avroVersion" 65 | compile "io.confluent:kafka-avro-serializer:${confluentVersion}" 66 | //provided "org.apache.kafka:kafka_$scalaMajorVersion:${kafkaVersion}" 67 | provided "org.apache.kafka:connect-api:${kafkaVersion}" 68 | compile "com.sksamuel.avro4s:avro4s-core_$scalaMajorVersion:$avro4sVersion" 69 | compile("org.apache.calcite:calcite-core:$calciteVersion") 70 | 71 | testCompile "org.mockito:mockito-core:$mockitoVersion" 72 | testCompile "org.scalacheck:scalacheck_$scalaMajorVersion:$scalaCheck" 73 | testCompile "org.scalatest:scalatest_$scalaMajorVersion:$scalaTest" 74 | testCompile "junit:junit:$junitVersion" 75 | testCompile "org.apache.curator:curator-test:3.1.0" 76 | testCompile 'org.powermock:powermock-module-junit4:1.6.5' 77 | testCompile 'org.pegdown:pegdown:1.1.0' 78 | } 79 | 80 | test { 81 | maxParallelForks = 1 82 | minHeapSize '256m' 83 | maxHeapSize '2048m' 84 | systemProperty 'keystore', projectDir.canonicalPath + "/src/test/resources/stc_keystore.jks" 85 | systemProperty 'truststore', projectDir.canonicalPath + "/src/test/resources/stc_truststore.jks" 86 | } 87 | 88 | task testJar(type: Jar, dependsOn: testClasses) { 89 | baseName = "test-${project.archivesBaseName}" 90 | from sourceSets.test.output 91 | } 92 | 93 | configurations { 94 | tests 95 | } 96 | 97 | task sourcesJar(type: Jar) { 98 | classifier = 'sources' 99 | from sourceSets.main.allSource 100 | } 101 | 102 | task javadocJar(type: Jar) { 103 | classifier = 'javadoc' 104 | from javadoc 105 | } 106 | 107 | task scaladocJar(type: Jar) { 108 | classifier = 'scaladoc' 109 | from '../LICENSE' 110 | from scaladoc 111 | } 112 | 113 | tasks.withType(Tar) { 114 | compression Compression.GZIP 115 | extension = 'tgz' 116 | } 117 | 118 | artifacts { 119 | archives javadocJar, scaladocJar, sourcesJar 120 | } 121 | 122 | task compile(dependsOn: 'compileScala') 123 | javadoc.dependsOn scaladoc 124 | 125 | signing { 126 | required { gradle.taskGraph.hasTask("uploadArchives") } 127 | sign configurations.archives 128 | } 129 | 130 | // OSSRH publication 131 | if (project.hasProperty('release')) { 132 | uploadArchives { 133 | repositories { 134 | mavenDeployer { 135 | // POM signature 136 | beforeDeployment { MavenDeployment deployment -> signing.signPom(deployment) } 137 | // Target repository 138 | repository(url: "https://oss.sonatype.org/service/local/staging/deploy/maven2/") { 139 | authentication(userName: ossrhUsername, password: ossrhPassword) 140 | } 141 | pom.project { 142 | name project.name 143 | description project.description 144 | packaging 'jar' 145 | url 'https://github.com/landoop/avro-sql' 146 | 147 | scm { 148 | connection 'scm:git:https://github.com/landoop/avro-sql.git' 149 | developerConnection 'scm:git:git@github.com:landoop/avro-sql.git' 150 | url 'https://github.com/landoop/avro-sql.git' 151 | } 152 | 153 | licenses { 154 | license { 155 | name 'Apache License 2.0' 156 | url 'http://www.apache.org/licenses/LICENSE-2.0.html' 157 | distribution 'repo' 158 | } 159 | } 160 | 161 | developers { 162 | developer { 163 | id = 'andrewstevenson' 164 | name = 'Andrew Stevenson' 165 | email = 'andrew@landoop.com' 166 | } 167 | developer { 168 | id = 'stheppi' 169 | name = 'Stefan Bocutiu' 170 | email = 'stefan@landoop.com' 171 | } 172 | developer { 173 | id = 'Antwnis' 174 | name = 'Antonios Chalkiopoulos' 175 | email = 'antonios@landoop.com' 176 | } 177 | } 178 | } 179 | } 180 | } 181 | } 182 | 183 | nexusStaging { 184 | packageGroup = project.getGroup() 185 | username = ossrhUsername 186 | password = ossrhPassword 187 | } 188 | } 189 | } 190 | 191 | -------------------------------------------------------------------------------- /gradle.properties: -------------------------------------------------------------------------------- 1 | version=1.0.1 2 | ossrhUsername=you 3 | ossrhPassword=me 4 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lensesio/avro-sql/0bf518f406f63fb59ee499fcd8c0073c7e05cecf/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | #Thu Apr 20 14:26:11 EEST 2017 2 | distributionBase=GRADLE_USER_HOME 3 | distributionPath=wrapper/dists 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | distributionUrl=https\://services.gradle.org/distributions/gradle-3.4-bin.zip 7 | -------------------------------------------------------------------------------- /gradlew: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | ############################################################################## 4 | ## 5 | ## Gradle start up script for UN*X 6 | ## 7 | ############################################################################## 8 | 9 | # Attempt to set APP_HOME 10 | # Resolve links: $0 may be a link 11 | PRG="$0" 12 | # Need this for relative symlinks. 13 | while [ -h "$PRG" ] ; do 14 | ls=`ls -ld "$PRG"` 15 | link=`expr "$ls" : '.*-> \(.*\)$'` 16 | if expr "$link" : '/.*' > /dev/null; then 17 | PRG="$link" 18 | else 19 | PRG=`dirname "$PRG"`"/$link" 20 | fi 21 | done 22 | SAVED="`pwd`" 23 | cd "`dirname \"$PRG\"`/" >/dev/null 24 | APP_HOME="`pwd -P`" 25 | cd "$SAVED" >/dev/null 26 | 27 | APP_NAME="Gradle" 28 | APP_BASE_NAME=`basename "$0"` 29 | 30 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 31 | DEFAULT_JVM_OPTS="" 32 | 33 | # Use the maximum available, or set MAX_FD != -1 to use that value. 34 | MAX_FD="maximum" 35 | 36 | warn ( ) { 37 | echo "$*" 38 | } 39 | 40 | die ( ) { 41 | echo 42 | echo "$*" 43 | echo 44 | exit 1 45 | } 46 | 47 | # OS specific support (must be 'true' or 'false'). 48 | cygwin=false 49 | msys=false 50 | darwin=false 51 | nonstop=false 52 | case "`uname`" in 53 | CYGWIN* ) 54 | cygwin=true 55 | ;; 56 | Darwin* ) 57 | darwin=true 58 | ;; 59 | MINGW* ) 60 | msys=true 61 | ;; 62 | NONSTOP* ) 63 | nonstop=true 64 | ;; 65 | esac 66 | 67 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar 68 | 69 | # Determine the Java command to use to start the JVM. 70 | if [ -n "$JAVA_HOME" ] ; then 71 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 72 | # IBM's JDK on AIX uses strange locations for the executables 73 | JAVACMD="$JAVA_HOME/jre/sh/java" 74 | else 75 | JAVACMD="$JAVA_HOME/bin/java" 76 | fi 77 | if [ ! -x "$JAVACMD" ] ; then 78 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME 79 | 80 | Please set the JAVA_HOME variable in your environment to match the 81 | location of your Java installation." 82 | fi 83 | else 84 | JAVACMD="java" 85 | which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 86 | 87 | Please set the JAVA_HOME variable in your environment to match the 88 | location of your Java installation." 89 | fi 90 | 91 | # Increase the maximum file descriptors if we can. 92 | if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then 93 | MAX_FD_LIMIT=`ulimit -H -n` 94 | if [ $? -eq 0 ] ; then 95 | if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then 96 | MAX_FD="$MAX_FD_LIMIT" 97 | fi 98 | ulimit -n $MAX_FD 99 | if [ $? -ne 0 ] ; then 100 | warn "Could not set maximum file descriptor limit: $MAX_FD" 101 | fi 102 | else 103 | warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" 104 | fi 105 | fi 106 | 107 | # For Darwin, add options to specify how the application appears in the dock 108 | if $darwin; then 109 | GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" 110 | fi 111 | 112 | # For Cygwin, switch paths to Windows format before running java 113 | if $cygwin ; then 114 | APP_HOME=`cygpath --path --mixed "$APP_HOME"` 115 | CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` 116 | JAVACMD=`cygpath --unix "$JAVACMD"` 117 | 118 | # We build the pattern for arguments to be converted via cygpath 119 | ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` 120 | SEP="" 121 | for dir in $ROOTDIRSRAW ; do 122 | ROOTDIRS="$ROOTDIRS$SEP$dir" 123 | SEP="|" 124 | done 125 | OURCYGPATTERN="(^($ROOTDIRS))" 126 | # Add a user-defined pattern to the cygpath arguments 127 | if [ "$GRADLE_CYGPATTERN" != "" ] ; then 128 | OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" 129 | fi 130 | # Now convert the arguments - kludge to limit ourselves to /bin/sh 131 | i=0 132 | for arg in "$@" ; do 133 | CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` 134 | CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option 135 | 136 | if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition 137 | eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` 138 | else 139 | eval `echo args$i`="\"$arg\"" 140 | fi 141 | i=$((i+1)) 142 | done 143 | case $i in 144 | (0) set -- ;; 145 | (1) set -- "$args0" ;; 146 | (2) set -- "$args0" "$args1" ;; 147 | (3) set -- "$args0" "$args1" "$args2" ;; 148 | (4) set -- "$args0" "$args1" "$args2" "$args3" ;; 149 | (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; 150 | (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; 151 | (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; 152 | (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; 153 | (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; 154 | esac 155 | fi 156 | 157 | # Escape application args 158 | save ( ) { 159 | for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done 160 | echo " " 161 | } 162 | APP_ARGS=$(save "$@") 163 | 164 | # Collect all arguments for the java command, following the shell quoting and substitution rules 165 | eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" 166 | 167 | # by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong 168 | if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then 169 | cd "$(dirname "$0")" 170 | fi 171 | 172 | exec "$JAVACMD" "$@" 173 | -------------------------------------------------------------------------------- /gradlew.bat: -------------------------------------------------------------------------------- 1 | @if "%DEBUG%" == "" @echo off 2 | @rem ########################################################################## 3 | @rem 4 | @rem Gradle startup script for Windows 5 | @rem 6 | @rem ########################################################################## 7 | 8 | @rem Set local scope for the variables with windows NT shell 9 | if "%OS%"=="Windows_NT" setlocal 10 | 11 | set DIRNAME=%~dp0 12 | if "%DIRNAME%" == "" set DIRNAME=. 13 | set APP_BASE_NAME=%~n0 14 | set APP_HOME=%DIRNAME% 15 | 16 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 17 | set DEFAULT_JVM_OPTS= 18 | 19 | @rem Find java.exe 20 | if defined JAVA_HOME goto findJavaFromJavaHome 21 | 22 | set JAVA_EXE=java.exe 23 | %JAVA_EXE% -version >NUL 2>&1 24 | if "%ERRORLEVEL%" == "0" goto init 25 | 26 | echo. 27 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 28 | echo. 29 | echo Please set the JAVA_HOME variable in your environment to match the 30 | echo location of your Java installation. 31 | 32 | goto fail 33 | 34 | :findJavaFromJavaHome 35 | set JAVA_HOME=%JAVA_HOME:"=% 36 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 37 | 38 | if exist "%JAVA_EXE%" goto init 39 | 40 | echo. 41 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 42 | echo. 43 | echo Please set the JAVA_HOME variable in your environment to match the 44 | echo location of your Java installation. 45 | 46 | goto fail 47 | 48 | :init 49 | @rem Get command-line arguments, handling Windows variants 50 | 51 | if not "%OS%" == "Windows_NT" goto win9xME_args 52 | 53 | :win9xME_args 54 | @rem Slurp the command line arguments. 55 | set CMD_LINE_ARGS= 56 | set _SKIP=2 57 | 58 | :win9xME_args_slurp 59 | if "x%~1" == "x" goto execute 60 | 61 | set CMD_LINE_ARGS=%* 62 | 63 | :execute 64 | @rem Setup the command line 65 | 66 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 67 | 68 | @rem Execute Gradle 69 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% 70 | 71 | :end 72 | @rem End local scope for the variables with windows NT shell 73 | if "%ERRORLEVEL%"=="0" goto mainEnd 74 | 75 | :fail 76 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 77 | rem the _cmd.exe /c_ return code! 78 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 79 | exit /b 1 80 | 81 | :mainEnd 82 | if "%OS%"=="Windows_NT" endlocal 83 | 84 | :omega 85 | -------------------------------------------------------------------------------- /settings.gradle: -------------------------------------------------------------------------------- 1 | rootProject.name = 'avro-sql_2.11' 2 | -------------------------------------------------------------------------------- /src/main/scala/com/landoop/avro/sql/AvroFieldValueGetter.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2017 Landoop. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.landoop.avro.sql 17 | 18 | import AvroSchemaSql._ 19 | import AvroSql._ 20 | import org.apache.avro.Schema 21 | import org.apache.avro.generic.IndexedRecord 22 | import org.apache.avro.util.Utf8 23 | 24 | trait AvroFieldValueGetter { 25 | 26 | def get(value: Any, schema: Schema, path: Seq[String]): Option[Any] = { 27 | path.headOption.map { parent => 28 | schema.getType match { 29 | case Schema.Type.RECORD => if (Option(value).isEmpty) None else fromRecord(value, schema, path) 30 | case Schema.Type.MAP => if (Option(value).isEmpty) None else fromMap(value, schema, path) 31 | case Schema.Type.UNION => get(value, schema.fromUnion(), path) 32 | case _ => throw new IllegalArgumentException(s"Can't select $parent field from schema:$schema") 33 | } 34 | }.getOrElse { 35 | schema.getType match { 36 | case Schema.Type.BOOLEAN | Schema.Type.NULL | 37 | Schema.Type.DOUBLE | Schema.Type.FLOAT | 38 | Schema.Type.LONG | Schema.Type.INT | 39 | Schema.Type.ENUM | Schema.Type.BYTES | 40 | Schema.Type.FIXED => Option(value) 41 | 42 | case Schema.Type.STRING => Option(new Utf8(value.toString).asInstanceOf[Any]) //yes UTF8 43 | 44 | case Schema.Type.UNION => get(value, schema.fromUnion(), path) 45 | 46 | case Schema.Type.ARRAY | Schema.Type.MAP | Schema.Type.RECORD => 47 | throw new IllegalArgumentException(s"Can't select an element from an array(schema:$schema)") 48 | 49 | case other => throw new IllegalArgumentException(s"Invalid Avro schema type:$other") 50 | } 51 | } 52 | } 53 | 54 | 55 | private def fromRecord(value: Any, schema: Schema, path: Seq[String]) = { 56 | val field = Option(schema.getField(path.head)) 57 | .getOrElse(throw new IllegalArgumentException(s"Can't find field:${path.head} in schema:$schema")) 58 | val v = value.asInstanceOf[IndexedRecord].get(path.head) 59 | get(v, field.schema(), path.tail) 60 | } 61 | 62 | 63 | private def fromMap(value: Any, schema: Schema, path: Seq[String]) = { 64 | val field = Option(schema.getField(path.head)) 65 | .getOrElse(throw new IllegalArgumentException(s"Can't find field:${path.head} in schema:$schema")) 66 | val v = value.asInstanceOf[IndexedRecord].get(path.head) 67 | get(v, field.schema(), path.tail) 68 | } 69 | 70 | } 71 | -------------------------------------------------------------------------------- /src/main/scala/com/landoop/avro/sql/AvroSchemaSql.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2017 Landoop. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.landoop.avro.sql 18 | 19 | import com.landoop.sql.{Field, SqlContext} 20 | import org.apache.avro.Schema 21 | import org.apache.avro.Schema.{Field => AvroField} 22 | import org.apache.calcite.sql.SqlSelect 23 | 24 | import scala.annotation.tailrec 25 | import scala.collection.JavaConversions._ 26 | import scala.collection.mutable.ArrayBuffer 27 | 28 | object AvroSchemaSql { 29 | 30 | implicit class AvroSchemaSqlExtension(val schema: Schema) extends AnyVal { 31 | 32 | def isNullable(): Boolean = { 33 | schema.getType == Schema.Type.UNION && 34 | schema.getTypes.exists(_.getType == Schema.Type.NULL) 35 | } 36 | 37 | /** 38 | * This assumes a null, type union. probably better to look at the value and work out the schema 39 | */ 40 | def fromUnion(): Schema = { 41 | schema.getTypes.toList match { 42 | case actualSchema +: Nil => actualSchema 43 | case List(n, actualSchema) if n.getType == Schema.Type.NULL => actualSchema 44 | case List(actualSchema, n) if n.getType == Schema.Type.NULL => actualSchema 45 | case _ => throw new IllegalArgumentException("Unions has one specific type and null") 46 | } 47 | } 48 | 49 | def getFields(path: Seq[String]): Seq[AvroField] = { 50 | def navigate(current: Schema, parents: Seq[String]): Seq[AvroField] = { 51 | if (Option(parents).isEmpty || parents.isEmpty) { 52 | current.getType match { 53 | case Schema.Type.RECORD => current.getFields 54 | case Schema.Type.UNION => navigate(current.fromUnion(), parents) 55 | case Schema.Type.MAP => throw new IllegalArgumentException(s"Can't select fields ${path.mkString(".")} since it resolved to a Map($current)") 56 | case _ => throw new IllegalArgumentException(s"Can't select fields ${path.mkString(".")} from schema:$current ") 57 | } 58 | } else { 59 | current.getType match { 60 | case Schema.Type.RECORD => 61 | val field = Option(current.getField(parents.head)) 62 | .getOrElse(throw new IllegalArgumentException(s"Can't find field ${parents.head} in schema:$current")) 63 | navigate(field.schema(), parents.tail) 64 | case Schema.Type.UNION => navigate(schema.fromUnion(), parents) 65 | case _ => throw new IllegalArgumentException(s"Can't select fields ${path.mkString(".")} from schema:$current ") 66 | } 67 | } 68 | } 69 | 70 | navigate(schema, path) 71 | } 72 | 73 | def fromPath(path: Seq[String]): Seq[AvroField] = { 74 | AvroSchemaExtension.fromPath(schema, path) 75 | } 76 | 77 | def copy(sql: SqlSelect, flatten: Boolean): Schema = { 78 | if (!flatten) { 79 | implicit val sqlContext = new SqlContext(Field.from(sql)) 80 | copy 81 | } 82 | else { 83 | this.flatten(Field.from(sql)) 84 | } 85 | } 86 | 87 | def copy()(implicit sqlContext: SqlContext): Schema = { 88 | AvroSchemaExtension.copy(schema, Vector.empty) 89 | } 90 | 91 | def copyAsNullable(): Schema = { 92 | schema.getType match { 93 | case Schema.Type.UNION => 94 | if (schema.getTypes.get(0).getType == Schema.Type.NULL) { 95 | schema 96 | } 97 | else { 98 | val newSchema = Schema.createUnion(Schema.create(Schema.Type.NULL) +: schema.getTypes) 99 | newSchema.copyProperties(schema) 100 | } 101 | case _ => Schema.createUnion(Schema.create(Schema.Type.NULL), schema) 102 | } 103 | } 104 | 105 | def flatten(fields: Seq[Field]): Schema = { 106 | def allowOnlyStarSelection() = { 107 | fields match { 108 | case Seq(f) if f.name == "*" => schema 109 | case _ => throw new IllegalArgumentException(s"You can't select fields from schema:$schema") 110 | } 111 | } 112 | 113 | schema.getType match { 114 | case Schema.Type.ARRAY | Schema.Type.MAP => throw new IllegalArgumentException(s"Can't flattent schema type:${schema.getType}") 115 | case Schema.Type.BOOLEAN | Schema.Type.BYTES | 116 | Schema.Type.DOUBLE | Schema.Type.ENUM | 117 | Schema.Type.FIXED | Schema.Type.FLOAT | 118 | Schema.Type.INT | Schema.Type.LONG | 119 | Schema.Type.NULL | Schema.Type.STRING => allowOnlyStarSelection() 120 | 121 | //case Schema.Type.MAP => allowOnlyStarSelection() 122 | case Schema.Type.UNION => schema.fromUnion().flatten(fields) 123 | case Schema.Type.RECORD => 124 | fields match { 125 | case Seq(f) if f.name == "*" => schema 126 | case _ => createRecordSchemaForFlatten(fields) 127 | } 128 | } 129 | } 130 | 131 | private[sql] def copyProperties(from: Schema): Schema = { 132 | from.getType match { 133 | case Schema.Type.RECORD | Schema.Type.FIXED | Schema.Type.ENUM => 134 | from.getAliases.foreach(schema.addAlias) 135 | case _ => 136 | } 137 | from.getObjectProps.foreach { case (prop: String, value: Any) => 138 | schema.addProp(prop, value) 139 | } 140 | schema 141 | } 142 | 143 | private def createRecordSchemaForFlatten(fields: Seq[Field]): Schema = { 144 | val newSchema = Schema.createRecord(schema.getName, schema.getDoc, schema.getNamespace, false) 145 | val fieldParentsMap = fields.foldLeft(Map.empty[String, ArrayBuffer[String]]) { case (map, f) => 146 | val key = Option(f.parents).map(_.mkString(".")).getOrElse("") 147 | val buffer = map.getOrElse(key, ArrayBuffer.empty[String]) 148 | if (buffer.contains(f.name)) { 149 | throw new IllegalArgumentException(s"You have defined the field ${ 150 | if (f.hasParents) { 151 | f.parents.mkString(".") + "." + f.name 152 | } else { 153 | f.name 154 | } 155 | } more than once!") 156 | } 157 | buffer += f.name 158 | map + (key -> buffer) 159 | } 160 | 161 | val colsMap = collection.mutable.Map.empty[String, Int] 162 | 163 | def getNextFieldName(fieldName: String): String = { 164 | colsMap.get(fieldName).map { v => 165 | colsMap.put(fieldName, v + 1) 166 | s"${fieldName}_${v + 1}" 167 | }.getOrElse { 168 | colsMap.put(fieldName, 0) 169 | fieldName 170 | } 171 | } 172 | 173 | val newFields = fields.flatMap { 174 | 175 | case field if field.name == "*" => 176 | val siblings = fieldParentsMap.get(Option(field.parents).map(_.mkString(".")).getOrElse("")) 177 | Option(field.parents) 178 | .map { p => 179 | val s = schema.fromPath(p) 180 | .headOption 181 | .getOrElse(throw new IllegalArgumentException(s"Can't find field ${p.mkString(".")} in schema:$schema")) 182 | .schema() 183 | 184 | s.getType match { 185 | case Schema.Type.UNION => 186 | val underlyingSchema = s.fromUnion() 187 | underlyingSchema.getType match { 188 | case Schema.Type.RECORD => 189 | if (!underlyingSchema.isNullable()) underlyingSchema.getFields.toSeq 190 | else underlyingSchema.getFields.map { f => 191 | new AvroField(f.name(), f.schema().copyAsNullable, f.doc(), f.defaultVal()) 192 | } 193 | case other => throw new IllegalArgumentException(s"Field selection ${p.mkString(".")} resolves to schema type:$other. Only RECORD type is allowed") 194 | } 195 | case Schema.Type.RECORD => 196 | if (!s.isNullable()) s.getFields.toSeq 197 | else s.getFields.map { f => 198 | new AvroField(f.name(), f.schema().copyAsNullable, f.doc(), f.defaultVal()) 199 | } 200 | case other => 201 | throw new IllegalArgumentException(s"Field selection ${p.mkString(".")} resolves to schema type:$other. Only RECORD type is allowed") 202 | } 203 | } 204 | .getOrElse { 205 | if (!schema.isNullable) schema.getFields.toSeq 206 | else schema.getFields.map { f => 207 | new AvroField(f.name(), f.schema().copyAsNullable, f.doc(), f.defaultVal()) 208 | } 209 | } 210 | .withFilter { f => 211 | siblings.collect { case s if s.contains(f.name()) => false }.getOrElse(true) 212 | } 213 | .map { f => 214 | AvroSchemaExtension.checkAllowedSchemas(f.schema(), field) 215 | new AvroField(getNextFieldName(f.name()), f.schema(), f.doc(), f.defaultVal()) 216 | } 217 | 218 | case field if field.hasParents => 219 | schema.fromPath(field.parents :+ field.name) 220 | .map { extracted => 221 | require(extracted != null, s"Invalid field:${(field.parents :+ field.name).mkString(".")}") 222 | AvroSchemaExtension.checkAllowedSchemas(extracted.schema(), field) 223 | if (field.alias == "*") { 224 | new AvroField(getNextFieldName(extracted.name()), extracted.schema(), extracted.doc(), extracted.defaultVal()) 225 | } else { 226 | new AvroField(getNextFieldName(field.alias), extracted.schema(), extracted.doc(), extracted.defaultVal()) 227 | } 228 | } 229 | 230 | case field => 231 | val originalField = Option(schema.getField(field.name)) 232 | .getOrElse(throw new IllegalArgumentException(s"Can't find field:${field.name} in schema:$schema")) 233 | AvroSchemaExtension.checkAllowedSchemas(originalField.schema(), field) 234 | Seq(new AvroField(getNextFieldName(field.alias), originalField.schema(), originalField.doc(), originalField.defaultVal())) 235 | } 236 | 237 | 238 | newSchema.setFields(newFields) 239 | newSchema.copyProperties(schema) 240 | } 241 | } 242 | 243 | private object AvroSchemaExtension { 244 | def copy(from: Schema, parents: Vector[String])(implicit sqlContext: SqlContext): Schema = { 245 | from.getType match { 246 | case Schema.Type.RECORD => createRecordSchema(from, parents) 247 | case Schema.Type.ARRAY => 248 | val newSchema = Schema.createArray(copy(from.getElementType, parents)) 249 | newSchema.copyProperties(from) 250 | case Schema.Type.MAP => 251 | val elementSchema = copy(from.getValueType, parents) 252 | val newSchema = Schema.createMap(elementSchema) 253 | newSchema.copyProperties(from) 254 | 255 | case Schema.Type.UNION => 256 | val newSchema = Schema.createUnion(from.getTypes.map(copy(_, parents))) 257 | newSchema.copyProperties(from) 258 | 259 | case _ => from 260 | } 261 | } 262 | 263 | 264 | private def createRecordSchema(from: Schema, parents: Vector[String])(implicit sqlContext: SqlContext): Schema = { 265 | val newSchema = Schema.createRecord(from.getName, from.getDoc, from.getNamespace, false) 266 | 267 | val fields = sqlContext.getFieldsForPath(parents) 268 | val newFields: Seq[Schema.Field] = fields match { 269 | case Seq() => 270 | from.getFields 271 | .map { schemaField => 272 | val newSchema = copy(schemaField.schema(), parents :+ schemaField.name) 273 | val newField = new org.apache.avro.Schema.Field(schemaField.name, newSchema, schemaField.doc(), schemaField.defaultVal()) 274 | schemaField.aliases().foreach(newField.addAlias) 275 | newField 276 | } 277 | 278 | case Seq(Left(f)) if f.name == "*" => 279 | from.getFields.map { schemaField => 280 | val newSchema = copy(schemaField.schema(), parents :+ schemaField.name) 281 | val newField = new org.apache.avro.Schema.Field(schemaField.name, newSchema, schemaField.doc(), schemaField.defaultVal()) 282 | schemaField.aliases().foreach(newField.addAlias) 283 | newField 284 | } 285 | case other => 286 | fields.flatMap { 287 | case Left(field) if field.name == "*" => 288 | from.getFields 289 | .withFilter(f => !fields.exists(e => e.isLeft && e.left.get.name == f.name)) 290 | .map { f => 291 | val newSchema = copy(f.schema(), parents :+ f.name) 292 | newSchema.copyProperties(f.schema()) 293 | val newField = new org.apache.avro.Schema.Field(f.name(), newSchema, f.doc(), f.defaultVal()) 294 | newField 295 | }.toList 296 | 297 | case Left(field) => 298 | val originalField = Option(from.getField(field.name)).getOrElse( 299 | throw new IllegalArgumentException(s"Invalid selecting ${parents.mkString("", ".", ".")}${field.name}. Schema doesn't contain it.")) 300 | val newSchema = copy(originalField.schema(), parents :+ field.name) 301 | newSchema.copyProperties(originalField.schema()) 302 | val newField = new org.apache.avro.Schema.Field(field.alias, newSchema, originalField.doc(), originalField.defaultVal()) 303 | Seq(newField) 304 | 305 | case Right(field) => 306 | val originalField = Option(from.getField(field)) 307 | .getOrElse(throw new IllegalArgumentException(s"Invalid selecting ${parents.mkString("", ".", ".")}$field. Schema doesn't contain it.")) 308 | val newSchema = copy(originalField.schema(), parents :+ field) 309 | newSchema.copyProperties(originalField.schema()) 310 | val newField = new org.apache.avro.Schema.Field(field, newSchema, originalField.doc(), originalField.defaultVal()) 311 | Seq(newField) 312 | } 313 | } 314 | 315 | newSchema.setFields(newFields) 316 | newSchema.copyProperties(from) 317 | } 318 | 319 | def fromPath(from: Schema, path: Seq[String]): Seq[AvroField] = { 320 | fromPathInternal(from: Schema, path, from.isNullable()) 321 | } 322 | 323 | @tailrec 324 | private def fromPathInternal(from: Schema, path: Seq[String], isOptional: Boolean): Seq[AvroField] = { 325 | path match { 326 | case Seq(field) if field == "*" => 327 | from.getType match { 328 | case Schema.Type.RECORD => 329 | if (!isOptional) from.getFields.toSeq 330 | else from.getFields.map(asNullable) 331 | 332 | case Schema.Type.UNION => 333 | val underlyingSchema = from.fromUnion() 334 | underlyingSchema.getType match { 335 | case Schema.Type.RECORD => 336 | if (!isOptional) underlyingSchema.getFields.toSeq 337 | else underlyingSchema.getFields.map(asNullable) 338 | 339 | case other => throw new IllegalArgumentException(s"Can't select field:$field from ${other.toString}") 340 | } 341 | case other => throw new IllegalArgumentException(s"Can't select field:$field from ${other.toString}") 342 | } 343 | case Seq(field) => 344 | from.getType match { 345 | case Schema.Type.RECORD => 346 | if (!isOptional) Seq(from.getField(field)) 347 | else Seq(asNullable(from.getField(field))) 348 | 349 | case Schema.Type.UNION => 350 | val underlyingSchema = from.fromUnion() 351 | underlyingSchema.getType match { 352 | case Schema.Type.RECORD => 353 | if (!isOptional) underlyingSchema.getFields.toSeq 354 | else underlyingSchema.getFields.map(asNullable) 355 | 356 | case other => throw new IllegalArgumentException(s"Can't select field:$field from ${other.toString}") 357 | } 358 | 359 | case other => throw new IllegalArgumentException(s"Can't select field:$field from ${other.toString}") 360 | } 361 | case head +: tail => 362 | val next = Option(from.getField(head)) 363 | .getOrElse(throw new IllegalArgumentException(s"Can't find the field '$head'")) 364 | fromPathInternal(next.schema(), tail, isOptional || next.schema().isNullable()) 365 | } 366 | } 367 | 368 | private def asNullable(f: AvroField): AvroField = { 369 | new AvroField(f.name(), f.schema().copyAsNullable(), f.doc(), f.defaultVal()) 370 | } 371 | 372 | @tailrec 373 | def checkAllowedSchemas(schema: Schema, field: Field): Unit = { 374 | schema.getType match { 375 | case Schema.Type.ARRAY | Schema.Type.MAP => throw new IllegalArgumentException(s"Can't flatten from schema:$schema by selecting '${field.name}'") 376 | case Schema.Type.UNION => checkAllowedSchemas(schema.fromUnion(), field) 377 | case _ => 378 | } 379 | } 380 | } 381 | 382 | } 383 | -------------------------------------------------------------------------------- /src/main/scala/com/landoop/avro/sql/AvroSql.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2017 Landoop. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.landoop.avro.sql 17 | 18 | import java.util 19 | 20 | import com.landoop.avro.sql.AvroSchemaSql._ 21 | import com.landoop.sql.{Field, SqlContext} 22 | import io.confluent.kafka.serializers.NonRecordContainer 23 | import org.apache.avro.Schema 24 | import org.apache.avro.generic.{GenericContainer, GenericData, IndexedRecord} 25 | import org.apache.avro.util.Utf8 26 | import org.apache.calcite.sql.SqlSelect 27 | 28 | import scala.collection.JavaConversions._ 29 | import scala.collection.mutable.ArrayBuffer 30 | import scala.util.{Failure, Success, Try} 31 | 32 | object AvroSql extends AvroFieldValueGetter { 33 | private val StringSchema = Schema.create(Schema.Type.STRING) 34 | 35 | implicit class IndexedRecordExtension(val record: IndexedRecord) extends AnyVal { 36 | def get(fieldName: String): Any = { 37 | Option(record.getSchema.getField(fieldName)) 38 | .map(f => record.get(f.pos)) 39 | .orNull 40 | } 41 | } 42 | 43 | implicit class GenericContainerKcqlConverter(val from: GenericContainer) extends AnyVal { 44 | def sql(query: String): GenericContainer = { 45 | import org.apache.calcite.config.Lex 46 | import org.apache.calcite.sql.parser.SqlParser 47 | val config = SqlParser.configBuilder 48 | .setLex(Lex.MYSQL) 49 | .setCaseSensitive(false) 50 | .setIdentifierMaxLength(250) 51 | .build 52 | 53 | val withStructure: Boolean = query.trim.toLowerCase().endsWith("withstructure") 54 | val sql = if (withStructure) { 55 | query.trim.dropRight("withstructure".length) 56 | } else query 57 | 58 | val parser = SqlParser.create(sql, config) 59 | val select = Try(parser.parseQuery()) match { 60 | case Failure(e) => throw new IllegalArgumentException(s"Query is not valid.${e.getMessage}") 61 | case Success(sqlSelect: SqlSelect) => sqlSelect 62 | case Success(sqlNode) => throw new IllegalArgumentException("Only `select` statements are allowed") 63 | } 64 | this.sql(select, !withStructure) 65 | } 66 | 67 | def sql(query: SqlSelect, flatten: Boolean): GenericContainer = { 68 | Option(from).map { _ => 69 | from match { 70 | case _: NonRecordContainer => 71 | case _: IndexedRecord => 72 | case other => throw new IllegalArgumentException(s"Avro type ${other.getClass.getName} is not supported") 73 | } 74 | if (!flatten) { 75 | implicit val sqlContext = new SqlContext(Field.from(query)) 76 | val schema = from.getSchema.copy() 77 | sql(schema) 78 | } else { 79 | implicit val fields = Field.from(query) 80 | val schema = from.getSchema.flatten(fields) 81 | this.flatten(schema) 82 | } 83 | } 84 | }.orNull 85 | 86 | def sql(fields: Seq[Field], flatten: Boolean): GenericContainer = { 87 | Option(from).map { _ => 88 | from match { 89 | case _: NonRecordContainer => 90 | case _: IndexedRecord => 91 | case other => throw new IllegalArgumentException(s"Avro type ${other.getClass.getName} is not supported") 92 | } 93 | if (!flatten) { 94 | implicit val sqlContext = new SqlContext(fields) 95 | val schema = from.getSchema.copy() 96 | sql(schema) 97 | } else { 98 | implicit val f = fields 99 | val schema = from.getSchema.flatten(fields) 100 | this.flatten(schema) 101 | } 102 | }.orNull 103 | } 104 | 105 | 106 | private[sql] def sql(newSchema: Schema)(implicit sqlContext: SqlContext): GenericContainer = { 107 | from match { 108 | case container: NonRecordContainer => 109 | sqlContext.fields match { 110 | case Seq(f) if f.name == "*" => container 111 | case _ => throw new IllegalArgumentException(s"Can't select specific fields from primitive avro record:${from.getClass.getName}") 112 | } 113 | case record: IndexedRecord => fromRecord(record, record.getSchema, newSchema, Vector.empty[String]).asInstanceOf[IndexedRecord] 114 | case other => throw new IllegalArgumentException(s"${other.getClass.getName} is not handled") 115 | } 116 | } 117 | 118 | private[sql] def flatten(newSchema: Schema)(implicit fields: Seq[Field]): GenericContainer = { 119 | from match { 120 | case container: NonRecordContainer => flattenPrimitive(container) 121 | case record: IndexedRecord => flattenIndexedRecord(record, newSchema) 122 | case other => throw new IllegalArgumentException(s"${other.getClass.getName} is not handled") 123 | } 124 | } 125 | 126 | private def flattenPrimitive(value: NonRecordContainer)(implicit fields: Seq[Field]): GenericContainer = { 127 | fields match { 128 | case Seq(f) if f.name == "*" => value 129 | case _ => throw new IllegalArgumentException(s"Can't select multiple fields from ${value.getSchema}") 130 | } 131 | } 132 | 133 | private def flattenIndexedRecord(record: IndexedRecord, newSchema: Schema)(implicit fields: Seq[Field]): GenericContainer = { 134 | val fieldsParentMap = fields.foldLeft(Map.empty[String, ArrayBuffer[String]]) { case (map, f) => 135 | val key = Option(f.parents).map(_.mkString(".")).getOrElse("") 136 | val buffer = map.getOrElse(key, ArrayBuffer.empty[String]) 137 | buffer += f.name 138 | map + (key -> buffer) 139 | } 140 | 141 | val newRecord = new GenericData.Record(newSchema) 142 | fields.foldLeft(0) { case (index, field) => 143 | if (field.name == "*") { 144 | val sourceFields = record.getSchema.getFields(Option(field.parents).getOrElse(Seq.empty)) 145 | val key = Option(field.parents).map(_.mkString(".")).getOrElse("") 146 | sourceFields 147 | .filter { f => 148 | fieldsParentMap.get(key).forall(!_.contains(f.name)) 149 | }.foldLeft(index) { case (i, f) => 150 | val extractedValue = get(record, record.getSchema, Option(field.parents).getOrElse(Seq.empty[String]) :+ f.name) 151 | newRecord.put(i, extractedValue.orNull) 152 | i + 1 153 | } 154 | } 155 | else { 156 | val extractedValue = get(record, record.getSchema, Option(field.parents).getOrElse(Seq.empty[String]) :+ field.name) 157 | newRecord.put(index, extractedValue.orNull) 158 | index + 1 159 | } 160 | } 161 | newRecord 162 | } 163 | 164 | private[sql] def fromUnion(value: Any, 165 | fromSchema: Schema, 166 | targetSchema: Schema, 167 | parents: Seq[String])(implicit sqlContext: SqlContext): Any = { 168 | from(value, fromSchema.fromUnion(), targetSchema.fromUnion(), parents) 169 | } 170 | 171 | 172 | private[sql] def fromArray(value: Any, 173 | schema: Schema, 174 | targetSchema: 175 | Schema, 176 | parents: Seq[String])(implicit sqlContext: SqlContext): Any = { 177 | value match { 178 | case c: java.util.Collection[_] => 179 | c.foldLeft(new java.util.ArrayList[Any](c.size())) { (acc, e) => 180 | acc.add(from(e, schema.getElementType, targetSchema.getElementType, parents)) 181 | acc 182 | } 183 | case other => throw new IllegalArgumentException(s"${other.getClass.getName} is not handled") 184 | } 185 | } 186 | 187 | private[sql] def fromRecord(value: Any, 188 | schema: Schema, 189 | targetSchema: Schema, 190 | parents: Seq[String])(implicit sqlContext: SqlContext): Any = { 191 | val record = value.asInstanceOf[IndexedRecord] 192 | val fields = sqlContext.getFieldsForPath(parents) 193 | //.get(parents.head) 194 | val fieldsTuple = fields.headOption.map { _ => 195 | fields.flatMap { 196 | case Left(field) if field.name == "*" => 197 | val filteredFields = fields.collect { case Left(f) if f.name != "*" => f.name }.toSet 198 | 199 | schema.getFields 200 | .withFilter(f => !filteredFields.contains(f.name())) 201 | .map { f => 202 | val sourceField = Option(schema.getField(f.name)) 203 | .getOrElse(throw new IllegalArgumentException(s"${f.name} was not found in $schema")) 204 | sourceField -> f 205 | } 206 | 207 | case Left(field) => 208 | val sourceField = Option(schema.getField(field.name)) 209 | .getOrElse(throw new IllegalArgumentException(s"${field.name} can't be found in $schema")) 210 | 211 | val targetField = Option(targetSchema.getField(field.alias)) 212 | .getOrElse(throw new IllegalArgumentException(s"${field.alias} can't be found in $targetSchema")) 213 | 214 | List(sourceField -> targetField) 215 | 216 | case Right(field) => 217 | val sourceField = Option(schema.getField(field)) 218 | .getOrElse(throw new IllegalArgumentException(s"$field can't be found in $schema")) 219 | 220 | val targetField = Option(targetSchema.getField(field)) 221 | .getOrElse(throw new IllegalArgumentException(s"$field can't be found in $targetSchema")) 222 | 223 | List(sourceField -> targetField) 224 | 225 | } 226 | }.getOrElse { 227 | targetSchema.getFields 228 | .map { f => 229 | val sourceField = Option(schema.getField(f.name)) 230 | .getOrElse(throw new IllegalArgumentException(s"Can't find the field ${f.name} in ${schema.getFields.map(_.name()).mkString(",")}")) 231 | sourceField -> f 232 | } 233 | } 234 | 235 | val newRecord = new GenericData.Record(targetSchema) 236 | fieldsTuple.foreach { case (sourceField, targetField) => 237 | val v = from(record.get(sourceField.name()), 238 | sourceField.schema(), 239 | targetField.schema(), 240 | parents :+ sourceField.name) 241 | newRecord.put(targetField.name(), v) 242 | } 243 | newRecord 244 | } 245 | 246 | private[sql] def fromMap(value: Any, fromSchema: Schema, 247 | targetSchema: Schema, 248 | parents: Seq[String])(implicit sqlContext: SqlContext): Any = { 249 | Option(value.asInstanceOf[java.util.Map[CharSequence, Any]]).map { map => 250 | val newMap = new util.HashMap[CharSequence, Any]() 251 | //check if there are keys for this 252 | val fields = sqlContext.getFieldsForPath(parents) 253 | val initialMap = { 254 | if (fields.exists(f => f.isLeft && f.left.get.name == "*")) { 255 | map.keySet().map(k => k.toString -> k.toString).toMap 256 | } else { 257 | Map.empty[String, String] 258 | } 259 | } 260 | 261 | fields.headOption.map { _ => 262 | fields.filterNot(f => f.isLeft && f.left.get.name != "*") 263 | .foldLeft(initialMap) { 264 | case (m, Left(f)) => m + (f.name -> f.alias) 265 | case (m, Right(f)) => m + (f -> f) 266 | } 267 | } 268 | .getOrElse(map.keySet().map(k => k.toString -> k.toString).toMap) 269 | .foreach { case (key, alias) => 270 | Option(map.get(key)).foreach { v => 271 | newMap.put( 272 | from(key, StringSchema, StringSchema, null).asInstanceOf[CharSequence], 273 | from(v, fromSchema.getValueType, targetSchema.getValueType, parents)) 274 | } 275 | } 276 | newMap 277 | }.orNull 278 | } 279 | 280 | private[sql] def from(from: Any, 281 | fromSchema: Schema, 282 | targetSchema: Schema, 283 | parents: Seq[String])(implicit sqlContext: SqlContext): Any = { 284 | Option(from).map { _ => 285 | implicit val s = fromSchema 286 | fromSchema.getType match { 287 | case Schema.Type.BOOLEAN | Schema.Type.NULL | 288 | Schema.Type.DOUBLE | Schema.Type.FLOAT | 289 | Schema.Type.LONG | Schema.Type.INT | 290 | Schema.Type.ENUM | Schema.Type.BYTES | Schema.Type.FIXED => from 291 | 292 | case Schema.Type.STRING => new Utf8(from.toString).asInstanceOf[Any] //yes UTF8 293 | 294 | case Schema.Type.UNION => fromUnion(from, fromSchema, targetSchema, parents) 295 | 296 | case Schema.Type.ARRAY => fromArray(from, fromSchema, targetSchema, parents) 297 | 298 | case Schema.Type.MAP => fromMap(from, fromSchema, targetSchema, parents) 299 | 300 | case Schema.Type.RECORD => fromRecord(from, fromSchema, targetSchema, parents) 301 | 302 | case other => throw new IllegalArgumentException(s"Invalid Avro schema type:$other") 303 | } 304 | }.orNull 305 | } 306 | } 307 | 308 | } 309 | -------------------------------------------------------------------------------- /src/main/scala/com/landoop/avro/sql/AvroUnpacker.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2017 Landoop. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.landoop.avro.sql 17 | 18 | import java.nio.ByteBuffer 19 | import java.text.SimpleDateFormat 20 | import java.util.{Date, TimeZone} 21 | 22 | import org.apache.avro.Conversions.{DecimalConversion, UUIDConversion} 23 | import org.apache.avro.generic.{GenericFixed, IndexedRecord} 24 | import org.apache.avro.{LogicalTypes, Schema} 25 | 26 | import scala.collection.JavaConversions._ 27 | import scala.collection.mutable.{Map => MutableMap} 28 | 29 | object AvroUnpacker { 30 | private val ISO_DATE_FORMAT: SimpleDateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'") 31 | private val TIME_FORMAT: SimpleDateFormat = new SimpleDateFormat("HH:mm:ss.SSSZ") 32 | 33 | ISO_DATE_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC")) 34 | 35 | private val DecimalConversion = new DecimalConversion 36 | private val UUIDConversion = new UUIDConversion 37 | private val DECIMAL = "decimal" 38 | private val UUID = "uuid" 39 | private val DATE = "date" 40 | private val TIME_MILLIS = "time-millis" 41 | private val TIME_MICROS = "time-micros" 42 | private val TIMESTAMP_MILLIS = "timestamp-millis" 43 | private val TIMESTAMP_MICROS = "timestamp-micros" 44 | 45 | def fromBytes(value: Any)(implicit schema: Schema): Any = { 46 | val bytes = value match { 47 | case b: ByteBuffer => b.array() 48 | case a: Array[Byte] => a 49 | case other => throw new IllegalArgumentException(s"${other.getClass.getName} is not supported ") 50 | } 51 | Option(LogicalTypes.fromSchemaIgnoreInvalid(schema)).map { lt => 52 | lt.getName match { 53 | case UUID => UUIDConversion.fromCharSequence(new String(bytes), schema, lt) 54 | case DECIMAL => DecimalConversion.fromBytes(ByteBuffer.wrap(bytes), schema, lt) 55 | case _ => bytes //not supporting something else 56 | } 57 | }.getOrElse(bytes) 58 | } 59 | 60 | def fromFixed(value: Any)(implicit schema: Schema): Any = fromBytes(value.asInstanceOf[GenericFixed].bytes()) 61 | 62 | def fromArray(value: Any)(implicit schema: Schema): Any = { 63 | value match { 64 | case c: java.util.Collection[_] => c.map(apply(_, schema.getElementType)) 65 | case arr: Array[_] if arr.getClass.getComponentType.isPrimitive => arr 66 | case arr: Array[_] => arr.map(apply(_, schema.getElementType)) 67 | case other => throw new IllegalArgumentException(s"Unknown ARRAY type ${other.getClass.getName}") 68 | } 69 | } 70 | 71 | def fromMap(value: Any)(implicit schema: Schema): Any = { 72 | value.asInstanceOf[java.util.Map[_, _]].foldLeft(Map.empty[String, Any]) { case (map, (key, v)) => 73 | map + (key.toString -> apply(v, schema.getValueType)) 74 | } 75 | } 76 | 77 | def fromRecord(value: Any)(implicit schema: Schema): Any = { 78 | value match { 79 | case record: IndexedRecord => 80 | record.getSchema.getFields 81 | .zipWithIndex 82 | .foldLeft(MutableMap.empty[String, Any]) { case (map, (f, i)) => 83 | map + (f.name -> apply(record.get(i), f.schema)) 84 | } 85 | case other => throw new IllegalArgumentException(s"Unsupported RECORD type ${other.getClass.getName}") 86 | } 87 | } 88 | 89 | def fromUnion(value: Any)(implicit schema: Schema): Any = { 90 | schema.getTypes.toList match { 91 | case actualSchema +: Nil => apply(value, actualSchema) 92 | case List(n, actualSchema) if n.getType == Schema.Type.NULL => apply(value, actualSchema) 93 | case List(actualSchema, n) if n.getType == Schema.Type.NULL => apply(value, actualSchema) 94 | case _ => throw new IllegalArgumentException("Unions has one specific type and null") 95 | } 96 | } 97 | 98 | def fromInt(value: Any)(implicit schema: Schema): Any = { 99 | val i = value.asInstanceOf[Int] 100 | Option(LogicalTypes.fromSchemaIgnoreInvalid(schema)).map { lt => 101 | lt.getName match { 102 | case TIME_MILLIS => TIME_FORMAT.format(new Date(i.toLong)) 103 | case TIMESTAMP_MILLIS => ISO_DATE_FORMAT.format(new Date(i.toLong)) 104 | case DATE => ISO_DATE_FORMAT.format(new Date(i.toLong * 86400000)) 105 | } 106 | }.getOrElse(i) 107 | } 108 | 109 | 110 | def fromLong(value: Any)(implicit schema: Schema): Any = { 111 | val l = value.asInstanceOf[Long] 112 | Option(LogicalTypes.fromSchemaIgnoreInvalid(schema)).map { lt => 113 | lt.getName match { 114 | case TIME_MILLIS => TIME_FORMAT.format(new Date(l)) 115 | case TIMESTAMP_MILLIS => ISO_DATE_FORMAT.format(new Date(l)) 116 | case DATE => ISO_DATE_FORMAT.format(new Date(l * 86400000)) 117 | } 118 | }.getOrElse(l) 119 | } 120 | 121 | def apply(value: Any, schema: Schema): Any = { 122 | Option(value).map { _ => 123 | implicit val s = schema 124 | schema.getType match { 125 | case Schema.Type.BOOLEAN | Schema.Type.NULL | 126 | Schema.Type.DOUBLE | Schema.Type.FLOAT => value 127 | 128 | case Schema.Type.LONG => fromLong(value) 129 | case Schema.Type.STRING => value.toString //yes UTF8 130 | case Schema.Type.INT => fromInt(value) 131 | case Schema.Type.ENUM => value.toString 132 | case Schema.Type.UNION => fromUnion(value) 133 | case Schema.Type.ARRAY => fromArray(value) 134 | case Schema.Type.FIXED => fromFixed(value) 135 | case Schema.Type.MAP => fromMap(value) 136 | case Schema.Type.BYTES => fromBytes(value) 137 | case Schema.Type.RECORD => fromRecord(value) 138 | case other => throw new IllegalArgumentException(s"Invalid Avro schema type:$other") 139 | } 140 | }.orNull 141 | } 142 | 143 | } 144 | -------------------------------------------------------------------------------- /src/test/scala/com/landoop/sql/avro/AvroSqlTest.scala: -------------------------------------------------------------------------------- 1 | package com.landoop.sql.avro 2 | 3 | import com.landoop.avro.sql.AvroSql._ 4 | import com.sksamuel.avro4s.{RecordFormat, SchemaFor, ToRecord} 5 | import io.confluent.kafka.serializers.NonRecordContainer 6 | import org.apache.avro.Schema 7 | import org.apache.avro.generic.{GenericContainer, GenericRecord} 8 | import org.scalatest.{Matchers, WordSpec} 9 | 10 | class AvroSqlTest extends WordSpec with Matchers { 11 | 12 | private def compare[T](actual: GenericContainer, t: T)(implicit schemaFor: SchemaFor[T], toRecord: ToRecord[T]) = { 13 | val expectedSchema = schemaFor().toString() 14 | .replace("LocalPerson", "Person") 15 | .replace("LocalAddress", "Address") 16 | .replace("LocalStreet", "Street") 17 | .replace("LocalPizza", "Pizza") 18 | .replace("LocalSimpleAddress", "SimpleAddress") 19 | 20 | actual.getSchema.toString() shouldBe expectedSchema 21 | 22 | val expectedRecord = toRecord.apply(t) 23 | actual.toString shouldBe expectedRecord.toString 24 | } 25 | 26 | "AvroSql" should { 27 | "handle null payload" in { 28 | null.asInstanceOf[GenericContainer].sql("SELECT *") shouldBe null.asInstanceOf[Any] 29 | } 30 | 31 | "throw an exception when the parameter is not a GenericRecord or NonRecordContainer" in { 32 | intercept[IllegalArgumentException] { 33 | new GenericContainer { 34 | override def getSchema = null 35 | }.sql("SELECT *") 36 | } 37 | } 38 | 39 | "handle Int avro record" in { 40 | val container = new NonRecordContainer(Schema.create(Schema.Type.INT), 2000) 41 | 42 | val expected = new NonRecordContainer(Schema.create(Schema.Type.INT), 2000) 43 | container.sql("SELECT *") shouldBe expected 44 | } 45 | 46 | "handle Nullable Int avro record with a integer value" in { 47 | val nullSchema = Schema.create(Schema.Type.NULL) 48 | val intSchema = Schema.create(Schema.Type.INT) 49 | val schema = Schema.createUnion(nullSchema, intSchema) 50 | 51 | val container = new NonRecordContainer(schema, 2000) 52 | val expected = new NonRecordContainer(schema, 2000) 53 | 54 | container.sql("SELECT *") shouldBe expected 55 | } 56 | 57 | "handle Nullable Int avro record with a null value" in { 58 | val nullSchema = Schema.create(Schema.Type.NULL) 59 | val intSchema = Schema.create(Schema.Type.INT) 60 | val schema = Schema.createUnion(nullSchema, intSchema) 61 | 62 | val container = new NonRecordContainer(schema, null) 63 | val expected = new NonRecordContainer(schema, null) 64 | container.sql("SELECT *") shouldBe expected 65 | } 66 | 67 | "throw an exception when trying to select field of an Int avro record" in { 68 | val expected = 2000 69 | val container = new NonRecordContainer(Schema.create(Schema.Type.INT), expected) 70 | intercept[IllegalArgumentException] { 71 | container.sql("SELECT field1") 72 | } 73 | } 74 | 75 | "handle 'SELECT name,vegan, calories ' for a record" in { 76 | val pepperoni = Pizza("pepperoni", Seq(Ingredient("pepperoni", 12, 4.4), Ingredient("onions", 1, 0.4)), false, false, 98) 77 | 78 | val schema = SchemaFor[Pizza]() 79 | val toRecord = ToRecord[Pizza] 80 | val record = RecordFormat[Pizza].to(pepperoni) 81 | 82 | val actual = record.sql("SELECT name,vegan, calories") 83 | 84 | case class LocalPizza(name: String, vegan: Boolean, calories: Int) 85 | val expected = LocalPizza(pepperoni.name, pepperoni.vegan, pepperoni.calories) 86 | 87 | compare(actual, expected) 88 | } 89 | 90 | "handle 'SELECT name as fieldName,vegan as V, calories as C' for a record" in { 91 | val pepperoni = Pizza("pepperoni", Seq(Ingredient("pepperoni", 12, 4.4), Ingredient("onions", 1, 0.4)), false, false, 98) 92 | 93 | val schema = SchemaFor[Pizza]() 94 | val toRecord = ToRecord[Pizza] 95 | val record = RecordFormat[Pizza].to(pepperoni) 96 | 97 | val actual = record.sql("SELECT name as fieldName,vegan as V, calories as C") 98 | 99 | case class LocalPizza(fieldName: String, V: Boolean, C: Int) 100 | val expected = LocalPizza(pepperoni.name, pepperoni.vegan, pepperoni.calories) 101 | 102 | compare(actual, expected) 103 | } 104 | 105 | "handle 'SELECT calories as C ,vegan as V ,name as fieldName' for a record" in { 106 | val pepperoni = Pizza("pepperoni", Seq(Ingredient("pepperoni", 12, 4.4), Ingredient("onions", 1, 0.4)), false, false, 98) 107 | 108 | val schema = SchemaFor[Pizza]() 109 | val toRecord = ToRecord[Pizza] 110 | val record = RecordFormat[Pizza].to(pepperoni) 111 | 112 | val actual = record.sql("SELECT calories as C,vegan as V,name as fieldName") 113 | 114 | case class LocalPizza(C: Int, V: Boolean, fieldName: String) 115 | val expected = LocalPizza(pepperoni.calories, pepperoni.vegan, pepperoni.name) 116 | 117 | compare(actual, expected) 118 | } 119 | 120 | "throw an exception when selecting arrays ' for a record" in { 121 | val pepperoni = Pizza("pepperoni", Seq(Ingredient("pepperoni", 12, 4.4), Ingredient("onions", 1, 0.4)), false, false, 98) 122 | 123 | val schema = SchemaFor[Pizza]() 124 | val toRecord = ToRecord[Pizza] 125 | val record = RecordFormat[Pizza].to(pepperoni) 126 | 127 | intercept[IllegalArgumentException] { 128 | record.sql("SELECT *, name as fieldName") 129 | } 130 | } 131 | 132 | "handle 'SELECT name, address.street.name' from record" in { 133 | val person = Person("Rick", Address(Street("Rock St"), None, "MtV", "CA", "94041", "USA")) 134 | 135 | val schema = SchemaFor[Person]() 136 | val toRecord = ToRecord[Person] 137 | val record = RecordFormat[Person].to(person) 138 | 139 | val actual = record.sql("SELECT name, address.street.name") 140 | 141 | case class LocalPerson(name: String, name_1: String) 142 | val localPerson = LocalPerson(person.name, person.address.street.name) 143 | compare(actual, localPerson) 144 | } 145 | 146 | "handle 'SELECT name, address.street.name as streetName' from record" in { 147 | val person = Person("Rick", Address(Street("Rock St"), None, "MtV", "CA", "94041", "USA")) 148 | 149 | val schema = SchemaFor[Person]() 150 | val toRecord = ToRecord[Person] 151 | val record: GenericRecord = RecordFormat[Person].to(person) 152 | 153 | val actual = record.sql("SELECT name, address.street.name as streetName") 154 | 155 | case class LocalPerson(name: String, streetName: String) 156 | val localPerson = LocalPerson(person.name, person.address.street.name) 157 | compare(actual, localPerson) 158 | } 159 | 160 | "handle 'SELECT name, address.street.name as streetName, address.street2.name as streetName2' from record" in { 161 | val person = Person("Rick", Address(Street("Rock St"), None, "MtV", "CA", "94041", "USA")) 162 | 163 | val schema = SchemaFor[Person]() 164 | val toRecord = ToRecord[Person] 165 | val record = RecordFormat[Person].to(person) 166 | 167 | val actual = record.sql("SELECT name, address.street.name as streetName, address.street2.name as streetName2") 168 | 169 | case class LocalPerson(name: String, streetName: String, streetName2: Option[String]) 170 | val localPerson = LocalPerson(person.name, person.address.street.name, person.address.street2.map(_.name)) 171 | compare(actual, localPerson) 172 | } 173 | 174 | "handle 'SELECT name, address.street.*, address.street2.name as streetName2' from record" in { 175 | val person = Person("Rick", Address(Street("Rock St"), None, "MtV", "CA", "94041", "USA")) 176 | 177 | val schema = SchemaFor[Person]() 178 | val toRecord = ToRecord[Person] 179 | val record = RecordFormat[Person].to(person) 180 | 181 | val actual = record.sql("SELECT name, address.street.*, address.street2.name as streetName2") 182 | 183 | case class LocalPerson(name: String, name_1: String, streetName2: Option[String]) 184 | val localPerson = LocalPerson(person.name, person.address.street.name, person.address.street2.map(_.name)) 185 | compare(actual, localPerson) 186 | } 187 | 188 | "handle 'SELECT name, address.street.*, address.street2.*' from record" in { 189 | val person = Person("Rick", Address(Street("Rock St"), None, "MtV", "CA", "94041", "USA")) 190 | 191 | val schema = SchemaFor[Person]() 192 | val toRecord = ToRecord[Person] 193 | val record = RecordFormat[Person].to(person) 194 | 195 | val actual = record.sql("SELECT name, address.street.*, address.street2.*") 196 | 197 | case class LocalPerson(name: String, name_1: String, name_2: String) 198 | val localPerson = LocalPerson(person.name, person.address.street.name, person.address.street2.map(_.name).orNull) 199 | compare(actual, localPerson) 200 | 201 | val person1 = Person("Rick", Address(Street("Rock St"), Some(Street("412 East")), "MtV", "CA", "94041", "USA")) 202 | val record1 = RecordFormat[Person].to(person1) 203 | 204 | val actual1 = record.sql("SELECT name, address.street.*, address.street2.*") 205 | val localPerson1 = LocalPerson(person.name, person.address.street.name, person.address.street2.map(_.name).orNull) 206 | compare(actual1, localPerson1) 207 | 208 | } 209 | 210 | "handle 'SELECT address.state, address.city,name, address.street.name' from record" in { 211 | val person = Person("Rick", Address(Street("Rock St"), None, "MtV", "CA", "94041", "USA")) 212 | 213 | val schema = SchemaFor[Person]() 214 | val toRecord = ToRecord[Person] 215 | val record = RecordFormat[Person].to(person) 216 | 217 | val actual = record.sql("SELECT address.state, address.city,name, address.street.name") 218 | 219 | case class LocalPerson(state: String, city: String, name: String, name_1: String) 220 | val localPerson = LocalPerson(person.address.state, person.address.city, person.name, person.address.street.name) 221 | compare(actual, localPerson) 222 | } 223 | 224 | "handle 'SELECT address.state as S, address.city as C,name, address.street.name' from record" in { 225 | val person = Person("Rick", Address(Street("Rock St"), None, "MtV", "CA", "94041", "USA")) 226 | 227 | val schema = SchemaFor[Person]() 228 | val toRecord = ToRecord[Person] 229 | val record = RecordFormat[Person].to(person) 230 | 231 | val actual = record.sql("SELECT address.state as S, address.city as C,name, address.street.name") 232 | 233 | case class LocalPerson(S: String, C: String, name: String, name_1: String) 234 | val localPerson = LocalPerson(person.address.state, person.address.city, person.name, person.address.street.name) 235 | compare(actual, localPerson) 236 | } 237 | 238 | "throw an exception if the field doesn't exist in the schema" in { 239 | val person = Person("Rick", Address(Street("Rock St"), None, "MtV", "CA", "94041", "USA")) 240 | 241 | val schema = SchemaFor[Person]() 242 | val toRecord = ToRecord[Person] 243 | val record = RecordFormat[Person].to(person) 244 | 245 | intercept[IllegalArgumentException] { 246 | record.sql("SELECT address.bam, address.city,name, address.street.name") 247 | } 248 | } 249 | 250 | 251 | "handle 'SELECT * FROM simpleAddress' from record" in { 252 | val address = SimpleAddress("Rock St", "MtV", "CA", "94041", "USA") 253 | 254 | val schema = SchemaFor[SimpleAddress]() 255 | val toRecord = ToRecord[SimpleAddress] 256 | val record = RecordFormat[SimpleAddress].to(address) 257 | 258 | val actual = record.sql("SELECT * FROM simpleAddress") 259 | actual shouldBe record 260 | } 261 | 262 | "handle 'SELECT street as S, city, state, zip as Z, country as C FROM simpleAddress' from record" in { 263 | val address = SimpleAddress("Rock St", "MtV", "CA", "94041", "USA") 264 | 265 | val schema = SchemaFor[SimpleAddress]() 266 | val toRecord = ToRecord[SimpleAddress] 267 | val record = RecordFormat[SimpleAddress].to(address) 268 | 269 | val actual = record.sql("SELECT street as S, city, state, zip as Z, country as C FROM simpleAddress") 270 | 271 | case class LocalSimpleAddress(S: String, city: String, state: String, Z: String, C: String) 272 | val expected = LocalSimpleAddress(address.street, address.city, address.state, address.zip, address.country) 273 | 274 | compare(actual, expected) 275 | } 276 | 277 | "handle 'SELECT zip as Z, * FROM simpleAddress' from record" in { 278 | val address = SimpleAddress("Rock St", "MtV", "CA", "94041", "USA") 279 | 280 | val schema = SchemaFor[SimpleAddress]() 281 | val toRecord = ToRecord[SimpleAddress] 282 | val record = RecordFormat[SimpleAddress].to(address) 283 | 284 | val actual = record.sql("SELECT zip as Z, * FROM simpleAddress") 285 | 286 | case class LocalSimpleAddress(Z: String, street: String, city: String, state: String, country: String) 287 | val expected = LocalSimpleAddress(address.zip, address.street, address.city, address.state, address.country) 288 | 289 | compare(actual, expected) 290 | } 291 | 292 | "handle 'SELECT zip as Z, *, state as S FROM simpleAddress' from record" in { 293 | val address = SimpleAddress("Rock St", "MtV", "CA", "94041", "USA") 294 | 295 | val schema = SchemaFor[SimpleAddress]() 296 | val toRecord = ToRecord[SimpleAddress] 297 | val record = RecordFormat[SimpleAddress].to(address) 298 | 299 | val actual = record.sql("SELECT zip as Z, *, state as S FROM simpleAddress") 300 | 301 | case class LocalSimpleAddress(Z: String, street: String, city: String, country: String, S: String) 302 | val expected = LocalSimpleAddress(address.zip, address.street, address.city, address.country, address.state) 303 | 304 | compare(actual, expected) 305 | } 306 | } 307 | } 308 | -------------------------------------------------------------------------------- /src/test/scala/com/landoop/sql/avro/AvroSqlWithRetainStructureTest.scala: -------------------------------------------------------------------------------- 1 | package com.landoop.sql.avro 2 | 3 | import com.landoop.avro.sql.AvroSql._ 4 | import com.sksamuel.avro4s.{RecordFormat, SchemaFor, ToRecord} 5 | import io.confluent.kafka.serializers.NonRecordContainer 6 | import org.apache.avro.Schema 7 | import org.apache.avro.generic.GenericContainer 8 | import org.scalatest.{Matchers, WordSpec} 9 | 10 | class AvroSqlWithRetainStructureTest extends WordSpec with Matchers { 11 | 12 | private def compare[T](actual: GenericContainer, t: T)(implicit schemaFor: SchemaFor[T], toRecord: ToRecord[T]) = { 13 | val expectedSchema = schemaFor().toString() 14 | .replace("LocalPizza", "Pizza") 15 | .replace("LocalIngredient", "Ingredient") 16 | 17 | actual.getSchema.toString() shouldBe expectedSchema 18 | 19 | val expectedRecord = toRecord.apply(t) 20 | actual.toString shouldBe expectedRecord.toString 21 | } 22 | 23 | "AvroSql" should { 24 | "handle null payload" in { 25 | null.asInstanceOf[GenericContainer].sql("SELECT * FROM topic withstructure") shouldBe null.asInstanceOf[Any] 26 | } 27 | 28 | "throw an exception when the parameter is not a GenericRecord or NonRecordContainer" in { 29 | intercept[IllegalArgumentException] { 30 | new GenericContainer { 31 | override def getSchema = null 32 | }.sql("SELECT * FROM topic withstructure") 33 | } 34 | } 35 | 36 | "handle Int avro record" in { 37 | val container = new NonRecordContainer(Schema.create(Schema.Type.INT), 2000) 38 | 39 | val expected = new NonRecordContainer(Schema.create(Schema.Type.INT), 2000) 40 | container.sql("SELECT * FROM topic withstructure") shouldBe expected 41 | } 42 | 43 | "handle Nullable Int avro record with a integer value" in { 44 | val nullSchema = Schema.create(Schema.Type.NULL) 45 | val intSchema = Schema.create(Schema.Type.INT) 46 | val schema = Schema.createUnion(nullSchema, intSchema) 47 | 48 | val container = new NonRecordContainer(schema, 2000) 49 | val expected = new NonRecordContainer(schema, 2000) 50 | 51 | container.sql("SELECT * FROM topic withstructure") shouldBe expected 52 | } 53 | 54 | "handle Nullable Int avro record with a null value" in { 55 | val nullSchema = Schema.create(Schema.Type.NULL) 56 | val intSchema = Schema.create(Schema.Type.INT) 57 | val schema = Schema.createUnion(nullSchema, intSchema) 58 | 59 | val container = new NonRecordContainer(schema, null) 60 | val expected = new NonRecordContainer(schema, null) 61 | container.sql("SELECT * FROM topic withstructure") shouldBe expected 62 | } 63 | 64 | "throw an exception when trying to select field of an Int avro record" in { 65 | val expected = 2000 66 | val container = new NonRecordContainer(Schema.create(Schema.Type.INT), expected) 67 | intercept[IllegalArgumentException] { 68 | container.sql("SELECT field1 FROM topic withstructure") 69 | } 70 | } 71 | 72 | "handle 'SELECT * FROM topic withstructure' for a record" in { 73 | val pepperoni = Pizza("pepperoni", Seq(Ingredient("pepperoni", 12, 4.4), Ingredient("onions", 1, 0.4)), false, false, 98) 74 | 75 | val schema = SchemaFor[Pizza]() 76 | val toRecord = ToRecord[Pizza] 77 | val record = RecordFormat[Pizza].to(pepperoni) 78 | 79 | val actual = record.sql("SELECT *FROM topic withstructure") 80 | actual shouldBe record 81 | } 82 | 83 | "handle 'SELECT *, name as fieldName FROM topic withstructure' for a record" in { 84 | val pepperoni = Pizza("pepperoni", Seq(Ingredient("pepperoni", 12, 4.4), Ingredient("onions", 1, 0.4)), false, false, 98) 85 | 86 | val schema = SchemaFor[Pizza]() 87 | val toRecord = ToRecord[Pizza] 88 | val record = RecordFormat[Pizza].to(pepperoni) 89 | 90 | val actual = record.sql("SELECT *, name as fieldName FROM topic withstructure") 91 | 92 | case class LocalIngredient(name: String, sugar: Double, fat: Double) 93 | case class LocalPizza(ingredients: Seq[LocalIngredient], vegetarian: Boolean, vegan: Boolean, calories: Int, fieldName: String) 94 | 95 | val newpepperoni = LocalPizza(Seq(LocalIngredient("pepperoni", 12, 4.4), LocalIngredient("onions", 1, 0.4)), false, false, 98, "pepperoni") 96 | compare(actual, newpepperoni) 97 | } 98 | 99 | "handle 'SELECT *, ingredients as stuff FROM topic withstructure' for a record" in { 100 | val pepperoni = Pizza("pepperoni", Seq(Ingredient("pepperoni", 12, 4.4), Ingredient("onions", 1, 0.4)), false, false, 98) 101 | 102 | val schema = SchemaFor[Pizza]() 103 | val toRecord = ToRecord[Pizza] 104 | val record = RecordFormat[Pizza].to(pepperoni) 105 | 106 | val actual = record.sql("SELECT *, ingredients as stuff FROM topic withstructure") 107 | 108 | case class LocalIngredient(name: String, sugar: Double, fat: Double) 109 | case class LocalPizza(name: String, vegetarian: Boolean, vegan: Boolean, calories: Int, stuff: Seq[LocalIngredient]) 110 | 111 | val newpepperoni = LocalPizza(pepperoni.name, pepperoni.vegetarian, pepperoni.vegan, pepperoni.calories, Seq(LocalIngredient("pepperoni", 12, 4.4), LocalIngredient("onions", 1, 0.4))) 112 | compare(actual, newpepperoni) 113 | } 114 | 115 | "handle 'SELECT name as fieldName, * FROM topic withstructure' for a record" in { 116 | val pepperoni = Pizza("pepperoni", Seq(Ingredient("pepperoni", 12, 4.4), Ingredient("onions", 1, 0.4)), false, false, 98) 117 | 118 | val schema = SchemaFor[Pizza]() 119 | val toRecord = ToRecord[Pizza] 120 | val record = RecordFormat[Pizza].to(pepperoni) 121 | 122 | val actual = record.sql("SELECT name as fieldName, * FROM topic withstructure") 123 | 124 | case class LocalIngredient(name: String, sugar: Double, fat: Double) 125 | case class LocalPizza(fieldName: String, ingredients: Seq[LocalIngredient], vegetarian: Boolean, vegan: Boolean, calories: Int) 126 | val newpepperoni = LocalPizza(pepperoni.name, Seq(LocalIngredient("pepperoni", 12, 4.4), LocalIngredient("onions", 1, 0.4)), pepperoni.vegetarian, pepperoni.vegan, pepperoni.calories) 127 | compare(actual, newpepperoni) 128 | } 129 | 130 | "handle 'SELECT vegan FROM topic withstructure' for a record" in { 131 | val pepperoni = Pizza("pepperoni", Seq(Ingredient("pepperoni", 12, 4.4), Ingredient("onions", 1, 0.4)), false, false, 98) 132 | 133 | val schema = SchemaFor[Pizza]() 134 | val toRecord = ToRecord[Pizza] 135 | val record = RecordFormat[Pizza].to(pepperoni) 136 | 137 | val actual = record.sql("SELECT vegan FROM topic withstructure") 138 | 139 | case class LocalPizza(vegan: Boolean) 140 | val newpepperoni = LocalPizza(pepperoni.vegan) 141 | compare(actual, newpepperoni) 142 | } 143 | 144 | "handle 'SELECT vegan as veganA FROM topic withstructure' for a record" in { 145 | val pepperoni = Pizza("pepperoni", Seq(Ingredient("pepperoni", 12, 4.4), Ingredient("onions", 1, 0.4)), false, false, 98) 146 | 147 | val schema = SchemaFor[Pizza]() 148 | val toRecord = ToRecord[Pizza] 149 | val record = RecordFormat[Pizza].to(pepperoni) 150 | 151 | val actual = record.sql("SELECT vegan as veganA FROM topic withstructure") 152 | 153 | case class LocalPizza(veganA: Boolean) 154 | val newpepperoni = LocalPizza(pepperoni.vegan) 155 | compare(actual, newpepperoni) 156 | } 157 | 158 | "handle 'SELECT ingredients.name FROM topic withstructure' for a record" in { 159 | val pepperoni = Pizza("pepperoni", Seq(Ingredient("pepperoni", 12, 4.4), Ingredient("onions", 1, 0.4)), false, false, 98) 160 | 161 | val schema = SchemaFor[Pizza]() 162 | val toRecord = ToRecord[Pizza] 163 | val record = RecordFormat[Pizza].to(pepperoni) 164 | 165 | val actual = record.sql("SELECT ingredients.name FROM topic withstructure") 166 | 167 | case class LocalIngredient(name: String) 168 | case class LocalPizza(ingredients: Seq[LocalIngredient]) 169 | val newpepperoni = LocalPizza(Seq(LocalIngredient("pepperoni"), LocalIngredient("onions"))) 170 | compare(actual, newpepperoni) 171 | } 172 | 173 | "handle 'SELECT ingredients.name, ingredients.sugar FROM topic withstructure' for a record" in { 174 | val pepperoni = Pizza("pepperoni", Seq(Ingredient("pepperoni", 12, 4.4), Ingredient("onions", 1, 0.4)), false, false, 98) 175 | 176 | val schema = SchemaFor[Pizza]() 177 | val toRecord = ToRecord[Pizza] 178 | val record = RecordFormat[Pizza].to(pepperoni) 179 | 180 | val actual = record.sql("SELECT ingredients.name, ingredients.sugar FROM topic withstructure") 181 | 182 | case class LocalIngredient(name: String, sugar: Double) 183 | case class LocalPizza(ingredients: Seq[LocalIngredient]) 184 | val newpepperoni = LocalPizza(Seq(LocalIngredient("pepperoni", 12), LocalIngredient("onions", 1))) 185 | compare(actual, newpepperoni) 186 | } 187 | 188 | "handle 'SELECT ingredients.name as fieldName, ingredients.sugar as fieldSugar FROM topic withstructure' for a record" in { 189 | val pepperoni = Pizza("pepperoni", Seq(Ingredient("pepperoni", 12, 4.4), Ingredient("onions", 1, 0.4)), false, false, 98) 190 | 191 | val schema = SchemaFor[Pizza]() 192 | val toRecord = ToRecord[Pizza] 193 | val record = RecordFormat[Pizza].to(pepperoni) 194 | 195 | val actual = record.sql("SELECT ingredients.name as fieldName, ingredients.sugar as fieldSugar FROM topic withstructure") 196 | 197 | case class LocalIngredient(fieldName: String, fieldSugar: Double) 198 | case class LocalPizza(ingredients: Seq[LocalIngredient]) 199 | val newpepperoni = LocalPizza(Seq(LocalIngredient("pepperoni", 12), LocalIngredient("onions", 1))) 200 | compare(actual, newpepperoni) 201 | } 202 | 203 | 204 | "handle 'SELECT ingredients.*,ingredients.name as fieldName, ingredients.sugar as fieldSugar FROM topic withstructure' for a record" in { 205 | val pepperoni = Pizza("pepperoni", Seq(Ingredient("pepperoni", 12, 4.4), Ingredient("onions", 1, 0.4)), false, false, 98) 206 | 207 | val schema = SchemaFor[Pizza]() 208 | val toRecord = ToRecord[Pizza] 209 | val record = RecordFormat[Pizza].to(pepperoni) 210 | 211 | val actual = record.sql("SELECT ingredients.*,ingredients.name as fieldName, ingredients.sugar as fieldSugar FROM topic withstructure") 212 | case class LocalIngredient(fat: Double, fieldName: String, fieldSugar: Double) 213 | case class LocalPizza(ingredients: Seq[LocalIngredient]) 214 | val newpepperoni = LocalPizza(Seq(LocalIngredient(4.4, "pepperoni", 12), LocalIngredient(0.4, "onions", 1))) 215 | compare(actual, newpepperoni) 216 | } 217 | 218 | "handle 'SELECT ingredients.name as fieldName,ingredients.*, ingredients.sugar as fieldSugar FROM topic withstructure' for a record" in { 219 | val pepperoni = Pizza("pepperoni", Seq(Ingredient("pepperoni", 12, 4.4), Ingredient("onions", 1, 0.4)), false, false, 98) 220 | 221 | val schema = SchemaFor[Pizza]() 222 | val toRecord = ToRecord[Pizza] 223 | val record = RecordFormat[Pizza].to(pepperoni) 224 | 225 | val actual = record.sql("SELECT ingredients.name as fieldName,ingredients.*, ingredients.sugar as fieldSugar FROM topic withstructure") 226 | 227 | case class LocalIngredient(fieldName: String, fat: Double, fieldSugar: Double) 228 | case class LocalPizza(ingredients: Seq[LocalIngredient]) 229 | val newpepperoni = LocalPizza(Seq(LocalIngredient("pepperoni", 4.4, 12), LocalIngredient("onions", 0.4, 1))) 230 | compare(actual, newpepperoni) 231 | } 232 | 233 | "handle 'SELECT ingredients.name as fieldName, ingredients.sugar as fieldSugar, ingredients.* FROM topic withstructure' for a record" in { 234 | val pepperoni = Pizza("pepperoni", Seq(Ingredient("pepperoni", 12, 4.4), Ingredient("onions", 1, 0.4)), false, false, 98) 235 | 236 | val schema = SchemaFor[Pizza]() 237 | val toRecord = ToRecord[Pizza] 238 | val record = RecordFormat[Pizza].to(pepperoni) 239 | 240 | val actual = record.sql("SELECT ingredients.name as fieldName, ingredients.sugar as fieldSugar, ingredients.* FROM topic withstructure") 241 | 242 | case class LocalIngredient(fieldName: String, fieldSugar: Double, fat: Double) 243 | case class LocalPizza(ingredients: Seq[LocalIngredient]) 244 | val newpepperoni = LocalPizza(Seq(LocalIngredient("pepperoni", 12, 4.4), LocalIngredient("onions", 1, 0.4))) 245 | compare(actual, newpepperoni) 246 | } 247 | 248 | 249 | "handle 'SELECT name, ingredients.name as fieldName, ingredients.sugar as fieldSugar, ingredients.* FROM topic withstructure' for a record" in { 250 | val pepperoni = Pizza("pepperoni", Seq(Ingredient("pepperoni", 12, 4.4), Ingredient("onions", 1, 0.4)), false, false, 98) 251 | 252 | val schema = SchemaFor[Pizza]() 253 | val toRecord = ToRecord[Pizza] 254 | val record = RecordFormat[Pizza].to(pepperoni) 255 | 256 | val actual = record.sql("SELECT name, ingredients.name as fieldName, ingredients.sugar as fieldSugar, ingredients.* FROM topic withstructure") 257 | 258 | case class LocalIngredient(fieldName: String, fieldSugar: Double, fat: Double) 259 | case class LocalPizza(name: String, ingredients: Seq[LocalIngredient]) 260 | val newpepperoni = LocalPizza("pepperoni", Seq(LocalIngredient("pepperoni", 12, 4.4), LocalIngredient("onions", 1, 0.4))) 261 | compare(actual, newpepperoni) 262 | } 263 | 264 | "handle 'SELECT name, ingredients.name as fieldName, ingredients.sugar as fieldSugar, ingredients.*, calories as cals FROM topic withstructure' for a record" in { 265 | val pepperoni = Pizza("pepperoni", Seq(Ingredient("pepperoni", 12, 4.4), Ingredient("onions", 1, 0.4)), false, false, 98) 266 | 267 | val schema = SchemaFor[Pizza]() 268 | val toRecord = ToRecord[Pizza] 269 | val record = RecordFormat[Pizza].to(pepperoni) 270 | 271 | val actual = record.sql("SELECT name, ingredients.name as fieldName, ingredients.sugar as fieldSugar, ingredients.*, calories as cals FROM topic withstructure") 272 | case class LocalIngredient(fieldName: String, fieldSugar: Double, fat: Double) 273 | case class LocalPizza(name: String, ingredients: Seq[LocalIngredient], cals: Int) 274 | val newpepperoni = LocalPizza("pepperoni", Seq(LocalIngredient("pepperoni", 12, 4.4), LocalIngredient("onions", 1, 0.4)), 98) 275 | compare(actual, newpepperoni) 276 | } 277 | 278 | "handle 'SELECT name, ingredients.name as fieldName, calories as cals,ingredients.sugar as fieldSugar, ingredients.* FROM topic withstructure' for a record" in { 279 | val pepperoni = Pizza("pepperoni", Seq(Ingredient("pepperoni", 12, 4.4), Ingredient("onions", 1, 0.4)), false, false, 98) 280 | 281 | val schema = SchemaFor[Pizza]() 282 | val toRecord = ToRecord[Pizza] 283 | val record = RecordFormat[Pizza].to(pepperoni) 284 | 285 | val actual = record.sql("SELECT name, ingredients.name as fieldName, calories as cals, ingredients.sugar as fieldSugar, ingredients.* FROM topic withstructure") 286 | case class LocalIngredient(fieldName: String, fieldSugar: Double, fat: Double) 287 | case class LocalPizza(name: String, ingredients: Seq[LocalIngredient], cals: Int) 288 | val newpepperoni = LocalPizza("pepperoni", Seq(LocalIngredient("pepperoni", 12, 4.4), LocalIngredient("onions", 1, 0.4)), 98) 289 | compare(actual, newpepperoni) 290 | } 291 | } 292 | } 293 | -------------------------------------------------------------------------------- /src/test/scala/com/landoop/sql/avro/Person.scala: -------------------------------------------------------------------------------- 1 | package com.landoop.sql.avro 2 | 3 | case class Person(name: String, address: Address) 4 | 5 | case class Address(street: Street, street2: Option[Street], city: String, state: String, zip: String, country: String) 6 | 7 | case class Street(name: String) 8 | 9 | case class SimpleAddress(street: String, city: String, state: String, zip: String, country: String) 10 | -------------------------------------------------------------------------------- /src/test/scala/com/landoop/sql/avro/Pizza.scala: -------------------------------------------------------------------------------- 1 | package com.landoop.sql.avro 2 | 3 | case class Ingredient(name: String, sugar: Double, fat: Double) 4 | 5 | case class Pizza(name: String, ingredients: Seq[Ingredient], vegetarian: Boolean, vegan: Boolean, calories: Int) 6 | 7 | --------------------------------------------------------------------------------