├── .gitignore ├── LICENSE ├── README.md ├── pom.xml └── src ├── main ├── java │ └── com │ │ └── eclecticlogic │ │ └── orc │ │ ├── Converter.java │ │ ├── Factory.java │ │ ├── Orc.java │ │ ├── OrcConverter.java │ │ ├── OrcHandle.java │ │ ├── OrcList.java │ │ ├── OrcTemporal.java │ │ ├── OrcTemporalType.java │ │ ├── OrcWriter.java │ │ ├── Schema.java │ │ └── impl │ │ ├── AbstractOrcWriter.java │ │ ├── Column.java │ │ ├── PropertyInterceptor.java │ │ ├── ProxyManager.java │ │ ├── SchemaFilter.java │ │ ├── SchemaSpi.java │ │ ├── SchemaSpiImpl.java │ │ ├── bootstrap │ │ ├── GeneratorUtil.java │ │ └── OrcWriterBootstrap.java │ │ └── schema │ │ ├── AbstractSchemaColumn.java │ │ ├── ComplexType.java │ │ ├── GenInfo.java │ │ ├── ListChildSchemaColumn.java │ │ ├── SchemaColumn.java │ │ ├── Template.java │ │ ├── TypeDesc.java │ │ └── TypeInfo.java └── resources │ └── eclectic │ └── orc │ └── template │ ├── classShell.stg │ ├── methodCreateTypeDescription.stg │ ├── methodSpecialCaseSetup.stg │ └── methodWrite.stg └── test ├── groovy └── com │ └── eclecticlogic │ └── orc │ ├── ArrayTest.java │ ├── ChromaticConverter.groovy │ ├── Club.groovy │ ├── Color.groovy │ ├── Course.groovy │ ├── Graduate.groovy │ ├── GraduateDelegate.groovy │ ├── GraduationConverter.groovy │ ├── House.groovy │ ├── HouseConverter.groovy │ ├── Level.groovy │ ├── Power.groovy │ ├── Student.groovy │ ├── Teacher.groovy │ └── impl │ ├── TestBootstrap.groovy │ ├── TestSchemaImpl.groovy │ ├── bootstrap │ └── TestGeneratorUtil.groovy │ └── schema │ └── TestAbstractSchemaColumn.groovy └── resources ├── logback-test.xml └── orc-testng-suite.xml /.gitignore: -------------------------------------------------------------------------------- 1 | *.class 2 | 3 | # Mobile Tools for Java (J2ME) 4 | .mtj.tmp/ 5 | 6 | # Package Files # 7 | *.jar 8 | *.war 9 | *.ear 10 | eclectic-orc.iml 11 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 12 | hs_err_pid* 13 | target/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Eclectic-ORC 2 | ===== 3 | 4 | Eclectic-ORC is a Java object writer for creating ORC files by simply annotating your class files as necessary. 5 | The framework uses runtime code generation to create a fast customized ORC writer taking care of all the low-level details. 6 | 7 | ## Feature Highlights 8 | 9 | - Declarative Schema Definition 10 | - Annotated column specification (use `@Orc` or JPA `@Column` annotations) 11 | 12 | # Getting Started 13 | 14 | Download the eclectic-orc jar from Maven central: 15 | 16 | ``` 17 | 18 | com.eclecticlogic 19 | eclectic-orc 20 | 1.0.9 21 | 22 | ``` 23 | 24 | Minimum dependencies that you need to provide in your application: 25 | 26 | 1. Java 8 or above (the design leverages method references and lambdas extensively) 27 | 1. slf4j (over logback or log4j) v1.7.23 or higher 28 | 29 | ## A simple example 30 | 31 | Consider a simple class that you want to serialize to an ORC file: 32 | 33 | ```java 34 | public class Student { 35 | int year; 36 | String name; 37 | 38 | public String getName() { 39 | ... 40 | } 41 | 42 | public int getYear() { 43 | ... 44 | } 45 | 46 | ... 47 | } 48 | ``` 49 | 50 | To write a collection of Students to an ORC file, you first have to provide a schema definition. 51 | The eclectic-orc library makes doing this trivial: 52 | 53 | ```java 54 | import com.eclecticlogic.orc.Factory; 55 | import com.eclecticlogic.orc.Schema; 56 | 57 | ... 58 | 59 | public void schemaSetup() { 60 | Schema schema = Factory.createSchema(Student.class) 61 | .column(Student::getName) // 62 | .column(Student::getYear); 63 | } 64 | ``` 65 | 66 | The above schema definition implicitly does three things: 67 | 68 | 1. It defines the order of the columns (first name then year) 69 | 1. It defines the data types of the columns (String, int) 70 | 1. It defines the names of the columns (name, year) 71 | 72 | The library allows you to customize aspects of the schema. Let us start with column names. 73 | If you want the *year* column to be called *graduationYear*, simply change the 74 | schema column definition. 75 | 76 | ```java 77 | Schema schema = Factory.createSchema(Student.class) 78 | .column(Student::getName) // 79 | .column("graduationYear", Student::getYear); 80 | ``` 81 | 82 | You can also define columns based on properties of other classes that are referenced. If the `Student` class referenced a Club class as shown below: 83 | 84 | ```java 85 | 86 | public class Club { 87 | String name; 88 | 89 | public String sanitizedClubName() { 90 | return ... 91 | } 92 | } 93 | 94 | public class Student { 95 | Club club; 96 | 97 | public Club getClub() { 98 | return club; 99 | } 100 | } 101 | ``` 102 | You can reference the club name in your schema definition by chaining the call as `getClub().sanitizedClubName()`. 103 | The astute reader would have noticed that sanitizedClubName() is not a java-bean compliant getter. That is right. 104 | eclectic-orc does not restrict you to just java-bean getters. Any method that takes no parameters and returns a non-void 105 | type can be used for a column definition. A schema to incorporate the above definition would look like this. 106 | 107 | ```java 108 | Schema schema = Factory.createSchema(Student.class) 109 | .column(Student::getName) // 110 | .column("graduationYear", Student::getYear) 111 | .column(it -> it.getClub().sanitizedClubName()); 112 | ``` 113 | We've now defined a third column of type `String` and given it an implicit name of "santitizedClubName." Of course, just like 114 | before you can choose to change the name to something else. The same definition in Groovy could be written as: 115 | 116 | ```groovy 117 | Schema schema = Factory.createSchema(Student) 118 | .column { it.name } 119 | .column('graduationYear') { it.year } 120 | .column { it.club.santizedClubName() } 121 | ``` 122 | 123 | To write a collection of `Student` objects, we simply create an OrcHandle reference, configure it, open it to get an OrcWriter 124 | reference and write our collection. 125 | 126 | ```java 127 | import org.apache.hadoop.fs.Path 128 | 129 | // First get an OrcHandle reference. 130 | OrcHandle handle = Factory.createWriter(schema); 131 | // Customize it by calling one of the withXYZ() methods. This is optional as defaults are provided. 132 | 133 | // Create an OrcWriter by calling open. 134 | Path path = new Path("/home/kabram/temp/dp/graduate.orc"); 135 | 136 | OrcWriter writer = handle.open(path); 137 | List students = ... 138 | // The write method may be called multiple times if you are retrieving objects in batches. 139 | writer.write(students); 140 | writer.close(); 141 | ``` 142 | 143 | In simple cases, the above code can be written as: 144 | 145 | ```java 146 | Factory.createWriter(schema) // 147 | .open(new Path("/home/kabram/temp/dp/graduate.orc")) // 148 | .write(students) // 149 | .close(); 150 | ``` 151 | 152 | ### Data Type Support 153 | 154 | The following data types are **supported** in the current release: 155 | 156 | 1. Java primitive types - `boolean`, `char`, `byte`, `short`, `int`, `long`, `float`, `double`. These map to their corresponding counterparts 157 | with the exception of `char` which maps to `varchar(1)` The exception for `char` is because AWS Athena is currently unable to handle `char` column types. 158 | 2. `BigDecimal` mapping to ORC `Decimal` type. 159 | 3. `LocalDate` mapping to ORC `Date` type. 160 | 4. `Date`, `LocalDateTime`, `ZonedDateTime` mapping to ORC `Timestamp` type unless there is either a JPA `@Temporal` or `@OrcTemporal` annotation 161 | that defines the `TemporalType` (or `OrcTemporalType`) as `DATE`. 162 | 5. `String` mapping to ORC `string` type. 163 | 6. Any derivative of `Iterable` mapping to ORC `List` type, currently supporting only simple types as the member. See below for how to use lists. 164 | 165 | The following data types are **not supported** in the current release: 166 | 167 | 1. `Binary` data type. 168 | 2. `Map` 169 | 3. `Union` 170 | 4. Sub-structures (`Struct` within your table, map of structs, list of structs, etc.) 171 | 172 | #### Special cases 173 | 174 | ##### String length specification 175 | 176 | To specify the number of characters for a String column type, simply use the `@Orc` annotation. If the framework finds 177 | an existing JPA `@Column` annotation, it will use the length property of that as well. If both annotations are present, 178 | the `@Orc` annotation takes precedence. The `@Orc` annotation is only supported on methods. 179 | 180 | ```java 181 | public class Student { 182 | String name; 183 | 184 | @Orc(length = 50) 185 | public String getName() { 186 | return name; 187 | } 188 | } 189 | ``` 190 | 191 | ##### Decimal precision/scale specification 192 | 193 | You can also specify the precision and scale of `BigDecimal` data type by using the JPA `@Column` or `@Orc` annotations. 194 | By default, the precision is 38 and scale is 10. This can be changed via annotation: 195 | 196 | ```java 197 | public class Employee { 198 | BigDecimal salary; 199 | 200 | @Orc(precision = 10, scale = 2) 201 | public BigDecimal getSalary() { 202 | 203 | } 204 | } 205 | ``` 206 | 207 | ##### Converting data types 208 | 209 | There may be times you want to write a data type that is not a supported type. For example, you may have a birthday property 210 | that only records the year and month using the `java.time.YearMonth` class. You can handle these column types by defining a type 211 | converter, a class that implements the `Converter` interface. In our example, to convert `YearMonth` to `LocalDate`, 212 | defaulting to the first day of the month, we could write: 213 | 214 | ```java 215 | public class YearMonthConverter implements Converter { 216 | 217 | @Override 218 | public Class getConvertedClass() { 219 | return LocalDate.class; 220 | } 221 | 222 | 223 | @Override 224 | public LocalDate convert(YearMonth yearMonth) { 225 | return yearMonth.atDay(1); 226 | } 227 | } 228 | ``` 229 | 230 | We can now annotate the `YearMonth` accessor with the `@OrcConverter` annotation: 231 | 232 | ```java 233 | public class Employee { 234 | YearMonth birthday; 235 | 236 | @OrcConverter(YearMonthConverter.class) 237 | public YearMonth getBirthday() { 238 | ... 239 | } 240 | } 241 | 242 | ... 243 | 244 | Schema schema = Factory.createSchema(Employee.class) // 245 | .column(Employee::getBirthday) // This is now a LocalDate data type. 246 | ``` 247 | 248 | ##### Java Enum 249 | Java Enums require special handling to convert them to a specific data type. There are three ways to handle enums. 250 | 251 | 1. Do nothing: If your schema column is an `Enum` derivative, then the column will be treated as a `String` with the `name()` 252 | method being called to get the value. 253 | 1. Annotation: Annotate a custom enum method with `@Orc`. If you have a method in your `Enum` class that provides the value 254 | you would like to store, you can add the `@Orc` annotation to it. 255 | 1. Converter: Annotate your accessor method that returns an `Enum` with `@OrcConverter` specifying a converter that takes your 256 | enum and returns a supported data-type. 257 | 258 | ##### Handling lists 259 | 260 | Eclectic-orc supports creation of list columns that can hold a single scalar data type. To include a list column in the schema 261 | definition, annotate the accessor method with the `@OrcList` annotation. Strictly speaking, any derivative of `java.lang.Iterable` 262 | is supported. The `@OrcList` annotation requires you to specify the `Class` of the entries of the `Iterable`. This is because the 263 | type information is lost at runtime due to type-erasure. You also need to specify the average number of entries you expect to 264 | see in the list. This is a technical implementation detail due to the way lists are stored in ORC files. Finally, there is 265 | a converter attribute you can use to convert each item of the `Iterable` to a different type. Note: If you annotate the list 266 | accessor with `@OrcConverer`, you will be modifying the `List`/`Iterable` itself into some other data type. 267 | 268 | If your `Iterable` consists of `Enum` instances, the existing strategy for enums is automatically used - using an enum method 269 | annotated with `@Orc` or calling `name()`. 270 | 271 | # Custom columns 272 | 273 | If your collection member class does not have a method that gets you a column value that you need, i.e., you need to compute the value 274 | on the fly based on existing methods in the class, you can create a delegate class that accepts the collection member class as a 275 | constructor parameter and then implement your logic in the delegate class and use that method in the column definition. 276 | 277 | ```java 278 | Schema schema = Factory.createSchema(Student.class) 279 | .withDelegate(StudentDelegate.class) 280 | .delegatedColumn("someProperty", StudentDelegate::getLastFirstName) 281 | ... 282 | ``` 283 | 284 | The StudentDelegate class would be something like this 285 | 286 | ```java 287 | class StudentDelegate { 288 | 289 | Student delegate 290 | 291 | StudentDelegate(Student delegate) { 292 | this.delegate = delegate 293 | } 294 | 295 | 296 | String getLastFirstName() { 297 | return delegate.getLastName + ", " + delegate.getFirstName()); 298 | } 299 | } 300 | 301 | ``` 302 | 303 | # Release Notes 304 | 305 | # 1.0.9 306 | 307 | - Reverted usage of JOOR and brought back Javassist since JOOR cannot handle fat-jar that spring boot generates. 308 | 309 | # 1.0.6 310 | 311 | - Temporary fix for compiler classpath issue with JOOR. 312 | 313 | # 1.0.5 314 | 315 | - Switch to JOOR for runtime compilation (better support for Java 9+) 316 | - Fixed bug in array allocation for list columns. 317 | 318 | # 1.0.3 319 | 320 | - Added delegate concept for computed columns. 321 | 322 | # 1.0.2 323 | 324 | - Bug fix in bootstrap - incorrectly caching instance instead of class. 325 | - Bug fix in OrcWriter.withOptions() method. 326 | 327 | ### 1.0.0 328 | 329 | - Initial release -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4.0.0 3 | 4 | Orc Writer 5 | com.eclecticlogic 6 | eclectic-orc 7 | 1.1.2-SNAPSHOT 8 | jar 9 | 10 | Supports writing Java objects to ORC files. 11 | https://github.com/eclecticlogic/orc 12 | 13 | 14 | The Apache Software License, Version 2.0 15 | http://www.apache.org/licenses/LICENSE-2.0.txt 16 | Repo 17 | 18 | 19 | 20 | 21 | git@github.com:eclecticlogic/eclectic-orc.gt 22 | scm:git:git@github.com:eclecticlogic/eclectic-orc.git 23 | scm:git:git@github.com:eclecticlogic/eclectic-orc.git 24 | HEAD 25 | 26 | 27 | 28 | 29 | kabram 30 | Karthik Abram 31 | karthik@eclecticlogic.com 32 | 33 | 34 | 35 | 36 | 37 | release 38 | 39 | 40 | performRelease 41 | true 42 | 43 | 44 | 45 | 46 | 47 | org.apache.maven.plugins 48 | maven-source-plugin 49 | 2.2.1 50 | 51 | 52 | attach-sources 53 | 54 | jar-no-fork 55 | 56 | 57 | 58 | 59 | 60 | org.apache.maven.plugins 61 | maven-javadoc-plugin 62 | 2.9.1 63 | 64 | 65 | attach-javadocs 66 | 67 | jar 68 | 69 | 70 | 71 | 72 | -Xdoclint:none 73 | 74 | 75 | 76 | org.apache.maven.plugins 77 | maven-gpg-plugin 78 | 79 | 80 | sign-artifacts 81 | verify 82 | 83 | sign 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | org.codehaus.gmavenplus 99 | gmavenplus-plugin 100 | 3.0.0 101 | 102 | 103 | 104 | addTestSources 105 | testCompile 106 | 107 | 108 | 109 | 110 | 111 | org.apache.maven.plugins 112 | maven-compiler-plugin 113 | 3.11.0 114 | 115 | ${env.JAVA_HOME}/bin/javac 116 | 1.8 117 | 1.8 118 | 1.8 119 | 120 | 121 | 122 | 123 | org.apache.maven.plugins 124 | maven-release-plugin 125 | 3.0.1 126 | 127 | forked-path 128 | true 129 | false 130 | release 131 | deploy 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | org.apache.maven.plugins 141 | maven-surefire-plugin 142 | 3.1.2 143 | 144 | ${env.JAVA_HOME}/bin/java 145 | 146 | src/test/resources/orc-testng-suite.xml 147 | 148 | 149 | 150 | 151 | 152 | org.sonatype.plugins 153 | nexus-staging-maven-plugin 154 | 1.6.7 155 | true 156 | 157 | ossrh 158 | https://oss.sonatype.org/ 159 | true 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | ossrh 169 | https://oss.sonatype.org/content/repositories/snapshots 170 | 171 | 172 | 173 | 174 | 175 | org.apache.orc 176 | orc-core 177 | ${version.orc} 178 | 179 | 180 | log4j 181 | log4j 182 | 183 | 184 | org.slf4j 185 | slf4j-log4j12 186 | 187 | 188 | 189 | 190 | org.apache.hadoop 191 | hadoop-common 192 | 3.3.6 193 | 194 | 195 | org.apache.hadoop 196 | hadoop-hdfs-client 197 | 3.3.6 198 | 199 | 200 | org.apache.hive 201 | hive-storage-api 202 | 2.8.1 203 | 204 | 205 | com.google.protobuf 206 | protobuf-java 207 | 3.24.3 208 | 209 | 210 | cglib 211 | cglib-nodep 212 | 3.2.4 213 | 214 | 215 | org.antlr 216 | stringtemplate 217 | 4.0.2 218 | 219 | 220 | org.hibernate.javax.persistence 221 | hibernate-jpa-2.1-api 222 | 1.0.0.Final 223 | 224 | 225 | org.slf4j 226 | slf4j-api 227 | ${version.slf4j} 228 | jar 229 | provided 230 | 231 | 232 | org.javassist 233 | javassist 234 | 3.21.0-GA 235 | 236 | 237 | 238 | 239 | org.codehaus.groovy 240 | groovy-all 241 | 2.4.8 242 | test 243 | 244 | 245 | 246 | org.testng 247 | testng 248 | 7.5.1 249 | test 250 | 251 | 252 | ch.qos.logback 253 | logback-classic 254 | 1.3.11 255 | test 256 | 257 | 258 | 259 | 260 | 261 | 2.0.9 262 | 1.9.1 263 | 264 | 265 | -------------------------------------------------------------------------------- /src/main/java/com/eclecticlogic/orc/Converter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017 Eclectic Logic LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.eclecticlogic.orc; 18 | 19 | /** 20 | * Converts from a user-type U to an orc-compatible type T. Use this with the @OrcConverter annotation to convert custom data-types to 21 | * orc-compatible types. 22 | * @param user-data type 23 | * @param converted orc-compatible type 24 | */ 25 | public interface Converter { 26 | 27 | /** 28 | * @return Class of the orc-compatible type. 29 | */ 30 | Class getConvertedClass(); 31 | 32 | /** 33 | * @param instance Instance of your domain specific type. 34 | * @return Converted value. 35 | */ 36 | T convert(U instance); 37 | } 38 | -------------------------------------------------------------------------------- /src/main/java/com/eclecticlogic/orc/Factory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017 Eclectic Logic LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.eclecticlogic.orc; 18 | 19 | import com.eclecticlogic.orc.impl.SchemaSpi; 20 | import com.eclecticlogic.orc.impl.bootstrap.OrcWriterBootstrap; 21 | import com.eclecticlogic.orc.impl.SchemaSpiImpl; 22 | 23 | /** 24 | * This is the main class to interact with the eclectic-orc library. 25 | * Created by kabram 26 | */ 27 | public class Factory { 28 | 29 | /** 30 | * @param clz The class of objects you want to write to your orc file. 31 | * @param The type of objects you want to write. 32 | * @return Schema creator to specify the orc file schema. 33 | */ 34 | public static Schema createSchema(Class clz) { 35 | return new SchemaSpiImpl(clz); 36 | } 37 | 38 | 39 | /** 40 | * @param schema The schema for the orc file you want to create. 41 | * @param The type of the object you are working with. 42 | * @return Instance that allows you to configure, open and write to your data. 43 | */ 44 | public static OrcHandle createWriter(Schema schema) { 45 | return OrcWriterBootstrap.create((SchemaSpi)schema); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/main/java/com/eclecticlogic/orc/Orc.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017 Eclectic Logic LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.eclecticlogic.orc; 18 | 19 | import java.lang.annotation.*; 20 | 21 | /** 22 | * Annotates additional properties of a column for type definition. Note: If your class has JPA @Column annotations, those can be used 23 | * in lieu of this annotation. If both are specified, this annotation takes precedence. 24 | * 25 | * Created by kabram 26 | */ 27 | @Retention(RetentionPolicy.RUNTIME) 28 | @Target(ElementType.METHOD) 29 | @Inherited 30 | public @interface Orc { 31 | 32 | /** 33 | * @return Length of the field. Applicable only to string. If left empty, the output column will be of type 34 | * string. Otherwise the output column will be of type varchar(length). 35 | */ 36 | int length() default 0; 37 | 38 | 39 | /** 40 | * @return Precision for BigDecimal type 41 | */ 42 | int precision() default 0; 43 | 44 | 45 | /** 46 | * @return Scale for BigDecimal type. 47 | */ 48 | int scale() default 0; 49 | } 50 | -------------------------------------------------------------------------------- /src/main/java/com/eclecticlogic/orc/OrcConverter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017 Eclectic Logic LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.eclecticlogic.orc; 18 | 19 | import java.lang.annotation.ElementType; 20 | import java.lang.annotation.Inherited; 21 | import java.lang.annotation.Retention; 22 | import java.lang.annotation.RetentionPolicy; 23 | import java.lang.annotation.Target; 24 | 25 | /** 26 | * Annotate a domain specific type with this to define a converter that can convert your data-type to an orc-compatible one. 27 | * Created by kabram 28 | */ 29 | @Retention(RetentionPolicy.RUNTIME) 30 | @Target(ElementType.METHOD) 31 | @Inherited 32 | public @interface OrcConverter { 33 | 34 | /** 35 | * @return Converter class to use for type-conversion. 36 | */ 37 | Class value(); 38 | } 39 | -------------------------------------------------------------------------------- /src/main/java/com/eclecticlogic/orc/OrcHandle.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017 Eclectic Logic LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.eclecticlogic.orc; 18 | 19 | import org.apache.hadoop.conf.Configuration; 20 | import org.apache.hadoop.fs.Path; 21 | import org.apache.orc.CompressionKind; 22 | import org.apache.orc.OrcFile.WriterOptions; 23 | 24 | /** 25 | * This is the interface to configure and open an orc file. Get an instance of this from the Factory by passing in a Schema definition. 26 | * Created by kabram 27 | */ 28 | public interface OrcHandle { 29 | 30 | /** 31 | * @param configuration Configuration to use. This is optional. 32 | * @return self reference for fluent interface. 33 | */ 34 | OrcHandle withConfiguration(Configuration configuration); 35 | 36 | /** 37 | * @param writerOptions Writer options to use. Note: if you pass in an explicit writerOptions object, this value will not be used. 38 | * @return self reference for fluent interface. 39 | */ 40 | OrcHandle withOptions(WriterOptions writerOptions); 41 | 42 | /** 43 | * @param compressionKind Compression to use. This value will overwrite any setting passed in WriterOptions. 44 | * @return self reference for fluent interface. 45 | */ 46 | OrcHandle withCompression(CompressionKind compressionKind); 47 | 48 | /** 49 | * @param size Buffer size to use. This value will overwrite any setting passed in WriterOptions. 50 | * @return self reference for fluent interface. 51 | */ 52 | OrcHandle withBufferSize(int size); 53 | 54 | /** 55 | * @param batchSize Vector batch size to use. 56 | * @return self reference for fluent interface. 57 | */ 58 | OrcHandle withBatchSize(int batchSize); 59 | 60 | 61 | /** 62 | * @param path Path to write orc file to. 63 | * @return self reference for fluent interface. 64 | */ 65 | OrcWriter open(Path path); 66 | 67 | } 68 | -------------------------------------------------------------------------------- /src/main/java/com/eclecticlogic/orc/OrcList.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017 Eclectic Logic LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.eclecticlogic.orc; 18 | 19 | import java.lang.annotation.ElementType; 20 | import java.lang.annotation.Inherited; 21 | import java.lang.annotation.Retention; 22 | import java.lang.annotation.RetentionPolicy; 23 | import java.lang.annotation.Target; 24 | 25 | /** 26 | * Annotates a list (strictly, any derivative of java.lang.Iterable) return type to denote child data type and average collection size. 27 | * Created by kabram 28 | */ 29 | @Retention(RetentionPolicy.RUNTIME) 30 | @Target(ElementType.METHOD) 31 | @Inherited 32 | public @interface OrcList { 33 | 34 | /** 35 | * @return Type of the elements. Due to type-erasure, this information is lost at runtime in code. Therefore we attempt to explicitly 36 | * capture it. 37 | */ 38 | Class entryType(); 39 | 40 | 41 | /** 42 | * @return Average size in bytes of the elements. 43 | */ 44 | int elementSize() default 1; 45 | 46 | 47 | /** 48 | * @return Average size of elements in the collection. 49 | */ 50 | int averageSize() default 1; 51 | 52 | 53 | /** 54 | * @return A converter for each element of the class. T 55 | */ 56 | Class> converter() default DEFAULT.class; 57 | 58 | 59 | /** 60 | * An elaborate workaround for a vexing issue with not being able to use null as the default value. 61 | * Refer to http://stackoverflow.com/questions/1178104/error-setting-a-default-null-value-for-an-annotations-field 62 | */ 63 | static abstract class DEFAULT implements Converter {} 64 | } 65 | -------------------------------------------------------------------------------- /src/main/java/com/eclecticlogic/orc/OrcTemporal.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017 Eclectic Logic LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.eclecticlogic.orc; 18 | 19 | import java.lang.annotation.Retention; 20 | import java.lang.annotation.Target; 21 | 22 | import static java.lang.annotation.ElementType.METHOD; 23 | import static java.lang.annotation.RetentionPolicy.RUNTIME; 24 | 25 | /** 26 | * Similar to the JPA temporal annotation, defines whether the date should be interpreted as including or not-including time. 27 | * If the JPA @Temporal annotation is present, its value will be used. If both annotations are present, this annotation takes precedence. 28 | * 29 | * Created by kabram. 30 | */ 31 | @Target(METHOD) 32 | @Retention(RUNTIME) 33 | public @interface OrcTemporal { 34 | /** 35 | * Discriminate between date and timstamp 36 | */ 37 | OrcTemporalType value(); 38 | } 39 | -------------------------------------------------------------------------------- /src/main/java/com/eclecticlogic/orc/OrcTemporalType.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017 Eclectic Logic LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.eclecticlogic.orc; 18 | 19 | /** 20 | * Defines whether a java.util.Date value (or derivative) should be interpreted as a Date without time or a timestamp value. 21 | * Created by kabram. 22 | */ 23 | public enum OrcTemporalType { 24 | DATE, // 25 | TIMESTAMP 26 | } 27 | -------------------------------------------------------------------------------- /src/main/java/com/eclecticlogic/orc/OrcWriter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017 Eclectic Logic LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.eclecticlogic.orc; 18 | 19 | import java.io.Closeable; 20 | import java.io.IOException; 21 | import java.util.function.Consumer; 22 | 23 | /** 24 | * This is the interface to write your data and close the orc file. 25 | * Created by kabram 26 | */ 27 | public interface OrcWriter extends Closeable { 28 | 29 | /** 30 | * This method will throw a wrapped IOException if underlying API throws an IO exception. This may be called multiple times 31 | * to write data to the same file. 32 | * @param data Data to write. 33 | */ 34 | OrcWriter write(Iterable data); 35 | 36 | 37 | /** 38 | * A variant of the Closeable.close() method that calls the supplied exception handler instead of throwing an exception. 39 | * useful in cases where you want to simply ignore the exception and not make your code verbose. 40 | * @param exceptionHandler 41 | */ 42 | void close(Consumer exceptionHandler); 43 | } 44 | -------------------------------------------------------------------------------- /src/main/java/com/eclecticlogic/orc/Schema.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017 Eclectic Logic LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.eclecticlogic.orc; 18 | 19 | import java.util.function.Function; 20 | 21 | /** 22 | * Interface to define your orc file schema. The columns are defined in the order in which you call the column functions. 23 | * 24 | * Created by kabram 25 | */ 26 | public interface Schema { 27 | 28 | /** 29 | * @param fieldFunction An instance of the type T will be passed to this function and you are expected to call the appropriate 30 | * method to define the column. You are not restricted to just java-bean getters. Any method that takes no 31 | * parameter and returns a non-void type can be called. You can also chain method calls (e.g. getXyz().getPqr()) 32 | * to get at sub-attributes. The name of the column is derived from the last method to be invoked. 33 | * 34 | * @return Self-reference for fluent interface buildout. 35 | */ 36 | Schema column(Function fieldFunction); 37 | 38 | /** 39 | * @param name An explicit name to be used for the column. 40 | * @param columnFunction Same as above 41 | * @return self-reference for fluent interface. 42 | */ 43 | Schema column(String name, Function columnFunction); 44 | 45 | 46 | /** 47 | * @param delegate A class that accepts an instance of T in the constructor and provides "computed" functions. 48 | * @param 49 | * @return self-reference for fluent-interface. 50 | */ 51 | Schema withDelegate(Class delegate); 52 | 53 | 54 | Schema delegatedColumn(Function fieldFunction); 55 | 56 | /** 57 | * @param name An explicit name to be used for the column. 58 | * @param columnFunction Same as above 59 | * @return self-reference for fluent interface. 60 | */ 61 | Schema delegatedColumn(String name, Function columnFunction); 62 | 63 | } 64 | -------------------------------------------------------------------------------- /src/main/java/com/eclecticlogic/orc/impl/AbstractOrcWriter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017 Eclectic Logic LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.eclecticlogic.orc.impl; 18 | 19 | import com.eclecticlogic.orc.OrcHandle; 20 | import com.eclecticlogic.orc.OrcWriter; 21 | import org.apache.hadoop.conf.Configuration; 22 | import org.apache.hadoop.fs.Path; 23 | import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; 24 | import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; 25 | import org.apache.orc.CompressionKind; 26 | import org.apache.orc.OrcFile; 27 | import org.apache.orc.TypeDescription; 28 | import org.apache.orc.Writer; 29 | 30 | import java.io.IOException; 31 | import java.util.function.Consumer; 32 | 33 | /** 34 | * Created by kabram 35 | */ 36 | public abstract class AbstractOrcWriter implements OrcHandle, OrcWriter { 37 | 38 | private Configuration configuration = new Configuration(); 39 | private OrcFile.WriterOptions writerOptions; 40 | private CompressionKind compressionKind; 41 | private int bufferSize = 10 * 1024; 42 | private int batchSize = 1024; 43 | private TypeDescription _typeDescription; 44 | protected VectorizedRowBatch vectorizedRowBatch; 45 | private Writer writer; 46 | 47 | 48 | @Override 49 | public OrcHandle withConfiguration(Configuration configuration) { 50 | this.configuration = configuration; 51 | return this; 52 | } 53 | 54 | @Override 55 | public OrcHandle withOptions(OrcFile.WriterOptions writerOptions) { 56 | this.writerOptions = writerOptions; 57 | return this; 58 | } 59 | 60 | @Override 61 | public OrcHandle withCompression(CompressionKind compressionKind) { 62 | this.compressionKind = compressionKind; 63 | return this; 64 | } 65 | 66 | @Override 67 | public OrcHandle withBufferSize(int size) { 68 | this.bufferSize = size; 69 | return this; 70 | } 71 | 72 | @Override 73 | public OrcHandle withBatchSize(int batchSize) { 74 | this.batchSize = batchSize; 75 | return this; 76 | } 77 | 78 | 79 | @Override 80 | public OrcWriter open(Path path) { 81 | if (writerOptions == null) { 82 | writerOptions = OrcFile.writerOptions(configuration); 83 | } 84 | if (compressionKind != null) { 85 | writerOptions.compress(compressionKind); 86 | } 87 | if (bufferSize != 0) { 88 | writerOptions.bufferSize(bufferSize); 89 | } 90 | // Add the schema to the writer options. 91 | TypeDescription schema = getTypeDescription(); 92 | writerOptions.setSchema(schema); 93 | try { 94 | writer = OrcFile.createWriter(path, writerOptions); 95 | } catch (IOException e) { 96 | throw new RuntimeException(e); 97 | } 98 | vectorizedRowBatch = schema.createRowBatch(batchSize); 99 | specialCaseSetup(); 100 | return this; 101 | } 102 | 103 | 104 | @Override 105 | public OrcWriter write(Iterable data) { 106 | try { 107 | for (T datum : data) { 108 | if (vectorizedRowBatch.size == vectorizedRowBatch.getMaxSize()) { 109 | writer.addRowBatch(vectorizedRowBatch); 110 | vectorizedRowBatch.reset(); 111 | } 112 | // Write the datum to the column vectors. 113 | write(datum); 114 | vectorizedRowBatch.size++; 115 | } 116 | } catch (IOException e) { 117 | throw new RuntimeException(e); 118 | } 119 | return this; 120 | } 121 | 122 | 123 | @Override 124 | public void close() throws IOException { 125 | if (vectorizedRowBatch != null) { 126 | writer.addRowBatch(vectorizedRowBatch); 127 | vectorizedRowBatch = null; 128 | } 129 | if (writer != null) { 130 | writer.close(); 131 | writer = null; 132 | } 133 | } 134 | 135 | 136 | @Override 137 | public void close(Consumer exceptionHandler) { 138 | try { 139 | close(); 140 | } catch (IOException e) { 141 | exceptionHandler.accept(e); 142 | } 143 | } 144 | 145 | 146 | protected TypeDescription getTypeDescription() { 147 | if (_typeDescription == null) { 148 | _typeDescription = createTypeDescription(); 149 | } 150 | return _typeDescription; 151 | } 152 | 153 | 154 | /** 155 | * Helper utility to set the value of the current property to null in the vector. 156 | * @param vector 157 | */ 158 | protected void setNull(ColumnVector vector) { 159 | vector.isNull[vectorizedRowBatch.size] = true; 160 | vector.noNulls = false; 161 | } 162 | 163 | 164 | /** 165 | * @return The schema for the orc file as computed by the property access definitions. The implementation is generated dynamically at 166 | * runtime using javassist. 167 | */ 168 | protected abstract TypeDescription createTypeDescription(); 169 | 170 | 171 | /** 172 | * Hook to setup special cases such as the modification of list child to support the full flattened size 173 | * (rows x average list column size per row) 174 | */ 175 | protected abstract void specialCaseSetup(); 176 | 177 | 178 | /** 179 | * Routine that actually populates one row of the list into the vectorized row batch. The implementation of this is generated 180 | * dynamically at runtime using javassist. 181 | * @param datum Object instance to write. 182 | */ 183 | protected abstract void write(T datum); 184 | 185 | 186 | @Override 187 | public String toString() { 188 | return getTypeDescription().toString(); 189 | } 190 | } 191 | -------------------------------------------------------------------------------- /src/main/java/com/eclecticlogic/orc/impl/Column.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017 Eclectic Logic LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.eclecticlogic.orc.impl; 18 | 19 | import java.util.function.Function; 20 | import java.util.function.Supplier; 21 | 22 | /** 23 | * Captures the elements of a schema column - name, column accessor function and sub-schema. 24 | * Created by kabram 25 | */ 26 | public class Column { 27 | Supplier nameFunction; 28 | Function columnFunction; 29 | boolean delegated; 30 | SchemaSpi subSchema; 31 | 32 | public Supplier getNameFunction() { 33 | return nameFunction; 34 | } 35 | 36 | public void setNameFunction(Supplier nameFunction) { 37 | this.nameFunction = nameFunction; 38 | } 39 | 40 | public Function getColumnFunction() { 41 | return columnFunction; 42 | } 43 | 44 | public void setColumnFunction(Function columnFunction) { 45 | this.columnFunction = columnFunction; 46 | } 47 | 48 | public boolean isDelegated() { 49 | return delegated; 50 | } 51 | 52 | public void setDelegated(boolean delegated) { 53 | this.delegated = delegated; 54 | } 55 | 56 | public SchemaSpi getSubSchema() { 57 | return subSchema; 58 | } 59 | 60 | public void setSubSchema(SchemaSpi subSchema) { 61 | this.subSchema = subSchema; 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /src/main/java/com/eclecticlogic/orc/impl/PropertyInterceptor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017 Eclectic Logic LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.eclecticlogic.orc.impl; 18 | 19 | import com.eclecticlogic.orc.impl.bootstrap.GeneratorUtil; 20 | import net.sf.cglib.proxy.MethodInterceptor; 21 | import net.sf.cglib.proxy.MethodProxy; 22 | 23 | import java.lang.reflect.Method; 24 | import java.lang.reflect.Modifier; 25 | import java.util.stream.Stream; 26 | 27 | /** 28 | * Created by kabram 29 | */ 30 | public class PropertyInterceptor implements MethodInterceptor { 31 | 32 | private final SchemaSpi schema; 33 | private final ProxyManager proxyManager; 34 | 35 | 36 | public PropertyInterceptor(ProxyManager proxyManager, SchemaSpi schema) { 37 | this.schema = schema; 38 | this.proxyManager = proxyManager; 39 | } 40 | 41 | 42 | @Override 43 | public Object intercept(Object o, Method method, Object[] objects, MethodProxy methodProxy) throws Throwable { 44 | if (schema != null) { 45 | schema.add(method); 46 | } 47 | return getReturnValue(method); 48 | } 49 | 50 | 51 | @SuppressWarnings("unchecked") 52 | Object getReturnValue(Method method) { 53 | Class returnType = method.getReturnType(); 54 | if (returnType.isPrimitive()) { 55 | return GeneratorUtil.getDefaultValueForPrimitiveType(method.getReturnType()); 56 | } else if (Modifier.isFinal(returnType.getModifiers()) || !isDefaultConstructable(returnType)) { 57 | // Cannot sub-class a final class or a class with no default constructor and therefore cannot make a proxy 58 | return null; 59 | } else { 60 | return proxyManager.generate(returnType); 61 | } 62 | } 63 | 64 | 65 | /** 66 | * Attribution: http://stackoverflow.com/questions/27810634/how-can-i-check-a-class-has-no-arguments-constructor 67 | * 68 | * @param clazz 69 | * @return true if clazz has a contructor with 0 parameters. 70 | */ 71 | private boolean isDefaultConstructable(Class clazz) { 72 | return Stream.of(clazz.getConstructors()).anyMatch((c) -> c.getParameterCount() == 0); 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /src/main/java/com/eclecticlogic/orc/impl/ProxyManager.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017 Eclectic Logic LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.eclecticlogic.orc.impl; 18 | 19 | import net.sf.cglib.proxy.Callback; 20 | import net.sf.cglib.proxy.Enhancer; 21 | import net.sf.cglib.proxy.NoOp; 22 | 23 | /** 24 | * Created by kabram 25 | */ 26 | public class ProxyManager { 27 | final SchemaSpi schema; 28 | 29 | public ProxyManager(SchemaSpi schema) { 30 | this.schema = schema; 31 | } 32 | 33 | public T generate(Class clz) { 34 | Enhancer enhancer = new Enhancer(); 35 | enhancer.setSuperclass(clz); 36 | enhancer.setCallbacks(new Callback[]{new PropertyInterceptor<>(this, schema), NoOp.INSTANCE}); 37 | enhancer.setCallbackFilter(new SchemaFilter<>(clz)); 38 | return (T) enhancer.create(); 39 | } 40 | 41 | 42 | public R generate(Class clz, Class[] argTypes, Object ... args) { 43 | Enhancer enhancer = new Enhancer(); 44 | enhancer.setSuperclass(clz); 45 | enhancer.setCallbacks(new Callback[]{new PropertyInterceptor<>(this, schema), NoOp.INSTANCE}); 46 | enhancer.setCallbackFilter(new SchemaFilter<>(clz)); 47 | return (R) enhancer.create(argTypes, args); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/main/java/com/eclecticlogic/orc/impl/SchemaFilter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017 Eclectic Logic LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.eclecticlogic.orc.impl; 18 | 19 | import net.sf.cglib.proxy.CallbackFilter; 20 | 21 | import java.lang.reflect.Method; 22 | import java.util.Arrays; 23 | import java.util.HashSet; 24 | import java.util.Set; 25 | 26 | import static java.util.stream.Collectors.*; 27 | 28 | /** 29 | * Filter to decide whether to intercept the method or not. 30 | * Created by kabram 31 | */ 32 | public class SchemaFilter implements CallbackFilter { 33 | 34 | private final Class clz; 35 | private final Set allowedMethods; 36 | private static final Set disallowed = new HashSet(Arrays.asList("getClass", "notify", "wait", "notifyAll")); 37 | 38 | public SchemaFilter(Class clz) { 39 | this.clz = clz; 40 | allowedMethods = Arrays.stream(clz.getMethods()) // 41 | // No parameters 42 | .filter(it -> it.getParameterCount() == 0) // 43 | // Not void return type 44 | .filter(it -> !it.getReturnType().isAssignableFrom(Void.class)) // 45 | .filter(it -> !disallowed.contains(it.getName())) // 46 | // Filter out groovy specific meta-methods. Primitives and arrays don't have a package name! 47 | .filter(it -> it.getReturnType().isPrimitive() || // 48 | it.getReturnType().isArray() || // 49 | !it.getReturnType().getPackage().getName().contains("groovy")) 50 | .collect(toSet()); 51 | } 52 | 53 | 54 | @Override 55 | public int accept(Method method) { 56 | return allowedMethods.contains(method) ? 0 : 1; 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /src/main/java/com/eclecticlogic/orc/impl/SchemaSpi.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017 Eclectic Logic LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.eclecticlogic.orc.impl; 18 | 19 | import com.eclecticlogic.orc.Schema; 20 | import com.eclecticlogic.orc.impl.schema.SchemaColumn; 21 | 22 | import java.lang.reflect.Method; 23 | 24 | /** 25 | * Created by kabram 26 | */ 27 | public interface SchemaSpi extends Schema { 28 | 29 | Class getSchemaClass(); 30 | 31 | SchemaColumn compile(); 32 | 33 | void add(Method method); 34 | } 35 | -------------------------------------------------------------------------------- /src/main/java/com/eclecticlogic/orc/impl/SchemaSpiImpl.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017 Eclectic Logic LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.eclecticlogic.orc.impl; 18 | 19 | import com.eclecticlogic.orc.Schema; 20 | import com.eclecticlogic.orc.impl.bootstrap.GeneratorUtil; 21 | import com.eclecticlogic.orc.impl.schema.ListChildSchemaColumn; 22 | import com.eclecticlogic.orc.impl.schema.SchemaColumn; 23 | import org.apache.orc.TypeDescription.Category; 24 | 25 | import java.lang.reflect.Method; 26 | import java.util.ArrayList; 27 | import java.util.List; 28 | import java.util.function.Function; 29 | import java.util.function.Supplier; 30 | 31 | /** 32 | * Created by kabram 33 | */ 34 | public class SchemaSpiImpl implements SchemaSpi { 35 | 36 | final Class schemaClz; 37 | final T proxy, delegateProxy; 38 | Object delegate; 39 | Class delegateClass; 40 | final List columns = new ArrayList<>(); 41 | SchemaColumn currentSchemaColumn; 42 | String lastAccessedProperty; 43 | 44 | public SchemaSpiImpl(Class clz) { 45 | schemaClz = clz; 46 | proxy = new ProxyManager<>(this).generate(clz); 47 | delegateProxy = new ProxyManager<>((SchemaSpiImpl)null).generate(clz); 48 | } 49 | 50 | 51 | @Override 52 | public Schema withDelegate(Class delegate) { 53 | this.delegateClass = delegate; 54 | this.delegate = new ProxyManager<>(this).generate(delegate, new Class[] {schemaClz}, delegateProxy); 55 | return this; 56 | } 57 | 58 | 59 | @Override 60 | public Class getSchemaClass() { 61 | return schemaClz; 62 | } 63 | 64 | 65 | @Override 66 | public Schema column(Function columnFunction) { 67 | return column(() -> lastAccessedProperty, (Function) columnFunction, false); 68 | } 69 | 70 | 71 | @Override 72 | public Schema column(String name, Function columnFunction) { 73 | return column(() -> name, (Function) columnFunction, false); 74 | } 75 | 76 | 77 | @Override 78 | public Schema delegatedColumn(Function columnFunction) { 79 | return column(() -> lastAccessedProperty, (Function) columnFunction, true); 80 | } 81 | 82 | 83 | @Override 84 | public Schema delegatedColumn(String name, Function columnFunction) { 85 | return column(() -> name, (Function) columnFunction, true); 86 | } 87 | 88 | 89 | Schema column(Supplier nameFunction, Function columnFunction, boolean delegated) { 90 | Column column = new Column(); 91 | column.setNameFunction(nameFunction); 92 | column.setColumnFunction(columnFunction); 93 | column.setDelegated(delegated); 94 | columns.add(column); 95 | return this; 96 | } 97 | 98 | 99 | @Override 100 | public SchemaColumn compile() { 101 | SchemaColumn struct = new SchemaColumn(); 102 | struct.setDelegateClass(delegateClass); 103 | 104 | for (Column column : columns) { 105 | lastAccessedProperty = null; 106 | currentSchemaColumn = new SchemaColumn(); 107 | currentSchemaColumn.setNeedsDelegate(column.isDelegated()); 108 | column.getColumnFunction().apply(column.isDelegated() ? delegate : proxy); 109 | currentSchemaColumn.getTypeDescription().setName(column.getNameFunction().get()); 110 | struct.getComplexType().getStructChildren().add(currentSchemaColumn); 111 | // Special types. 112 | if (currentSchemaColumn.getCategory() == Category.LIST) { 113 | currentSchemaColumn.getComplexType().setListChild(new ListChildSchemaColumn(currentSchemaColumn)); 114 | } 115 | } 116 | computeColumnIndices(struct.getComplexType().getStructChildren(), 0); 117 | return struct; 118 | } 119 | 120 | 121 | int computeColumnIndices(List schemaColumns, int index) { 122 | for (SchemaColumn column : schemaColumns) { 123 | if (column.getCategory() == Category.STRUCT) { 124 | index = computeColumnIndices(column.getComplexType().getStructChildren(), index); 125 | } else { 126 | column.setColumnIndex(index++); 127 | } 128 | } 129 | return index; 130 | } 131 | 132 | 133 | @Override 134 | public void add(Method accessor) { 135 | currentSchemaColumn.getAccessorMethods().add(accessor); 136 | lastAccessedProperty = GeneratorUtil.getPropertyName(accessor); 137 | } 138 | 139 | 140 | } 141 | -------------------------------------------------------------------------------- /src/main/java/com/eclecticlogic/orc/impl/bootstrap/GeneratorUtil.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017 Eclectic Logic LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.eclecticlogic.orc.impl.bootstrap; 18 | 19 | import com.eclecticlogic.orc.Orc; 20 | import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; 21 | import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; 22 | import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; 23 | import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; 24 | import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector; 25 | import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; 26 | import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector; 27 | import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector; 28 | import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; 29 | import org.apache.hadoop.hive.ql.exec.vector.UnionColumnVector; 30 | import org.apache.orc.TypeDescription.Category; 31 | 32 | import java.lang.reflect.Method; 33 | import java.math.BigDecimal; 34 | import java.time.LocalDate; 35 | import java.time.LocalDateTime; 36 | import java.time.ZonedDateTime; 37 | import java.util.Arrays; 38 | import java.util.Collections; 39 | import java.util.Date; 40 | import java.util.HashMap; 41 | import java.util.Map; 42 | import java.util.Optional; 43 | 44 | import static org.apache.orc.TypeDescription.Category.*; 45 | 46 | /** 47 | * A dumping ground for various static mappings! 48 | * Created by kabram 49 | */ 50 | public class GeneratorUtil { 51 | 52 | private final static Map, Category> categoriesByBasicType; 53 | private final static Map, Category> categoriesByAssignableType; 54 | private final static Map, String> primitiveAccessorByType; 55 | private final static Map, String> templateNameClassReinitByType; 56 | private final static Map, Object> defaultsByPrimitiveType; 57 | private final static Map typeDescriptionCreatorByCategory; 58 | private final static Map templateNameColumnSetterByCategory; 59 | private final static Map> vectorClassesByCategory; 60 | 61 | static { 62 | { 63 | Map, Category> map = new HashMap(); 64 | map.put(Boolean.TYPE, Category.BOOLEAN); 65 | map.put(Boolean.class, Category.BOOLEAN); 66 | map.put(Character.TYPE, Category.CHAR); 67 | map.put(Character.class, Category.CHAR); 68 | map.put(Byte.TYPE, Category.BYTE); 69 | map.put(Byte.class, Category.BYTE); 70 | map.put(Short.TYPE, Category.SHORT); 71 | map.put(Short.class, Category.SHORT); 72 | map.put(Integer.TYPE, Category.INT); 73 | map.put(Integer.class, Category.INT); 74 | map.put(Long.TYPE, Category.LONG); 75 | map.put(Long.class, Category.LONG); 76 | map.put(Float.TYPE, Category.FLOAT); 77 | map.put(Float.class, Category.FLOAT); 78 | map.put(Double.TYPE, Category.DOUBLE); 79 | map.put(Double.class, Category.DOUBLE); 80 | categoriesByBasicType = Collections.unmodifiableMap(map); 81 | } 82 | { 83 | Map, String> map = new HashMap(); 84 | map.put(Boolean.class, "booleanValue()"); 85 | map.put(Byte.class, "byteValue()"); 86 | map.put(Character.class, "charValue()"); 87 | map.put(Short.class, "shortValue()"); 88 | map.put(Integer.class, "intValue()"); 89 | map.put(Long.class, "longValue()"); 90 | map.put(Float.class, "floatValue()"); 91 | map.put(Double.class, "doubleValue()"); 92 | primitiveAccessorByType = Collections.unmodifiableMap(map); 93 | } 94 | { 95 | Map, String> map = new HashMap(); 96 | map.put(BytesColumnVector.class, "initBytesList"); 97 | map.put(LongColumnVector.class, "initLongList"); 98 | map.put(DoubleColumnVector.class, "initDoubleList"); 99 | map.put(DecimalColumnVector.class, "initDecimalList"); 100 | map.put(TimestampColumnVector.class, "initTimestampList"); 101 | templateNameClassReinitByType = Collections.unmodifiableMap(map); 102 | } 103 | { 104 | Map, Object> map = new HashMap(); 105 | map.put(Boolean.TYPE, Boolean.valueOf(false)); 106 | map.put(Character.TYPE, Character.valueOf('\u0000')); 107 | map.put(Byte.TYPE, Byte.valueOf((byte)0)); 108 | map.put(Short.TYPE, Short.valueOf((short)0)); 109 | map.put(Integer.TYPE, Integer.valueOf(0)); 110 | map.put(Long.TYPE, Long.valueOf(0L)); 111 | map.put(Float.TYPE, Float.valueOf(0.0F)); 112 | map.put(Double.TYPE, Double.valueOf(0.0D)); 113 | defaultsByPrimitiveType = Collections.unmodifiableMap(map); 114 | } 115 | { 116 | Map, Category> map = new HashMap<>(); 117 | map.put(BigDecimal.class, Category.DECIMAL); 118 | map.put(LocalDate.class, Category.DATE); 119 | map.put(LocalDateTime.class, Category.TIMESTAMP); 120 | map.put(ZonedDateTime.class, Category.TIMESTAMP); 121 | map.put(Date.class, Category.TIMESTAMP); 122 | map.put(Iterable.class, Category.LIST); 123 | categoriesByAssignableType = Collections.unmodifiableMap(map); 124 | } 125 | { 126 | Map map = new HashMap<>(); 127 | map.put(Category.BINARY, "createBinary"); 128 | map.put(Category.BOOLEAN, "createBoolean"); 129 | map.put(Category.BYTE, "createByte"); 130 | map.put(Category.CHAR, "createVarchar"); // AWS Athena doesn't seem to support char. 131 | map.put(Category.DATE, "createDate"); 132 | map.put(Category.DECIMAL, "createDecimal"); 133 | map.put(Category.DOUBLE, "createDouble"); 134 | map.put(Category.FLOAT, "createFloat"); 135 | map.put(Category.INT, "createInt"); 136 | map.put(Category.LIST, "createList"); 137 | map.put(Category.LONG, "createLong"); 138 | map.put(Category.MAP, "createMap"); 139 | map.put(Category.SHORT, "createShort"); 140 | map.put(Category.STRING, "createString"); 141 | map.put(Category.STRUCT, "createStruct"); 142 | map.put(Category.TIMESTAMP, "createTimestamp"); 143 | map.put(Category.UNION, "createUnion"); 144 | map.put(Category.VARCHAR, "createVarchar"); 145 | typeDescriptionCreatorByCategory = Collections.unmodifiableMap(map); 146 | } 147 | { 148 | Map> map = new HashMap<>(); 149 | map.put(Category.BINARY, BytesColumnVector.class); 150 | map.put(Category.BOOLEAN, LongColumnVector.class); 151 | map.put(Category.BYTE, LongColumnVector.class); 152 | map.put(Category.CHAR, BytesColumnVector.class); 153 | map.put(Category.DATE, LongColumnVector.class); 154 | map.put(Category.DECIMAL, DecimalColumnVector.class); 155 | map.put(Category.DOUBLE, DoubleColumnVector.class); 156 | map.put(Category.FLOAT, DoubleColumnVector.class); 157 | map.put(Category.INT, LongColumnVector.class); 158 | map.put(Category.LIST, ListColumnVector.class); 159 | map.put(Category.LONG, LongColumnVector.class); 160 | map.put(Category.MAP, MapColumnVector.class); 161 | map.put(Category.SHORT, LongColumnVector.class); 162 | map.put(Category.STRING, BytesColumnVector.class); 163 | map.put(Category.STRUCT, StructColumnVector.class); 164 | map.put(Category.TIMESTAMP, TimestampColumnVector.class); 165 | map.put(Category.UNION, UnionColumnVector.class); 166 | map.put(Category.VARCHAR, BytesColumnVector.class); 167 | vectorClassesByCategory = Collections.unmodifiableMap(map); 168 | } 169 | { 170 | Map map = new HashMap<>(); 171 | map.put(Category.BINARY, "columnBinary"); 172 | map.put(Category.BOOLEAN, "columnBoolean"); 173 | map.put(Category.BYTE, "columnByte"); 174 | map.put(Category.CHAR, "columnChar"); // AWS Athena doesn't seem to support char. 175 | map.put(Category.DATE, "columnDate"); 176 | map.put(Category.DECIMAL, "columnDecimal"); 177 | map.put(Category.DOUBLE, "columnDouble"); 178 | map.put(Category.FLOAT, "columnFloat"); 179 | map.put(Category.INT, "columnInt"); 180 | map.put(Category.LIST, "columnList"); 181 | map.put(Category.LONG, "columnLong"); 182 | map.put(Category.MAP, "columnMap"); 183 | map.put(Category.SHORT, "columnShort"); 184 | map.put(Category.STRING, "columnVarchar"); 185 | map.put(Category.STRUCT, "columnStruct"); 186 | map.put(Category.TIMESTAMP, "columnTimestamp"); 187 | map.put(Category.UNION, "columnUnion"); 188 | map.put(Category.VARCHAR, "columnVarchar"); 189 | templateNameColumnSetterByCategory = Collections.unmodifiableMap(map); 190 | } 191 | } 192 | 193 | public static Object getDefaultValueForPrimitiveType(Class primitiveType) { 194 | return defaultsByPrimitiveType.get(primitiveType); 195 | } 196 | 197 | public static Category getCategoryByBasicType(Class clz) { 198 | return categoriesByBasicType.get(clz); 199 | } 200 | 201 | 202 | public static Category getCategoryByAssignableType(Class clz) { 203 | for (Class aClz : categoriesByAssignableType.keySet()) { 204 | if (aClz.isAssignableFrom(clz)) { 205 | return categoriesByAssignableType.get(aClz); 206 | } 207 | } 208 | return null; 209 | } 210 | 211 | 212 | public static String getPrimitiveAccessorByType(Class clz) { 213 | return primitiveAccessorByType.get(clz); 214 | } 215 | 216 | 217 | public static String getTypeDescriptionCreator(Category category) { 218 | return typeDescriptionCreatorByCategory.get(category); 219 | } 220 | 221 | 222 | public static Class getVectorClassName(Category category) { 223 | return vectorClassesByCategory.get(category); 224 | } 225 | 226 | 227 | public static String getTemplateNameColumnSetter(Category category) { 228 | return templateNameColumnSetterByCategory.get(category); 229 | } 230 | 231 | 232 | public static boolean isSupportsLengthSpecification(Category category) { 233 | return Arrays.stream(new Category[]{CHAR, VARCHAR, STRING}).anyMatch(it -> it == category); 234 | } 235 | 236 | 237 | public static String getTemplateNameListReinit(Category category) { 238 | return templateNameClassReinitByType.get(getVectorClassName(category)); 239 | } 240 | 241 | 242 | /** 243 | * @param method method reference 244 | * @return Javabean property name for a getter or the method name itself turned into camel case. 245 | */ 246 | public static String getPropertyName(Method method) { 247 | int index = 0; 248 | if (method.getName().startsWith("get") && // 249 | method.getName().length() > 3 && // 250 | Character.isUpperCase(method.getName().charAt(3))) { 251 | index = 3; 252 | } else if (method.getName().startsWith("is") && // 253 | method.getName().length() > 2 && // 254 | Character.isUpperCase(method.getName().charAt(2))) { 255 | index = 2; 256 | } 257 | String propertyName = method.getName().substring(index); 258 | if (Character.isUpperCase(propertyName.charAt(0))) { 259 | propertyName = Character.toLowerCase(propertyName.charAt(0)) + propertyName.substring(1); 260 | } 261 | return propertyName; 262 | } 263 | 264 | 265 | public static Optional getAnnotatedMethodInEnum(Class> clz) { 266 | return Arrays.stream(clz.getMethods()) // 267 | .filter(it -> it.isAnnotationPresent(Orc.class)) // 268 | .filter(it -> it.getParameterCount() == 0) // 269 | .filter(it -> !Void.TYPE.equals(it.getReturnType())) // 270 | .findFirst(); 271 | } 272 | } 273 | -------------------------------------------------------------------------------- /src/main/java/com/eclecticlogic/orc/impl/bootstrap/OrcWriterBootstrap.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017 Eclectic Logic LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.eclecticlogic.orc.impl.bootstrap; 18 | 19 | import com.eclecticlogic.orc.OrcHandle; 20 | import com.eclecticlogic.orc.impl.AbstractOrcWriter; 21 | import com.eclecticlogic.orc.impl.SchemaSpi; 22 | import com.eclecticlogic.orc.impl.schema.SchemaColumn; 23 | import javassist.*; 24 | import org.slf4j.Logger; 25 | import org.slf4j.LoggerFactory; 26 | import org.stringtemplate.v4.ST; 27 | import org.stringtemplate.v4.STGroup; 28 | import org.stringtemplate.v4.STGroupFile; 29 | 30 | import java.util.ArrayList; 31 | import java.util.List; 32 | import java.util.Stack; 33 | import java.util.concurrent.ConcurrentHashMap; 34 | import java.util.concurrent.atomic.AtomicInteger; 35 | 36 | /** 37 | * Main class responsible for bootstrapping the writer class by generating code at runtime based on the schema definition. 38 | * Created by kabram 39 | */ 40 | public class OrcWriterBootstrap { 41 | 42 | private static final String ORC_WRITER_PACKAGE = "com.eclecticlogic.eclectic.orc.impl.writer"; 43 | 44 | private final static ConcurrentHashMap, Class> writersByClass = new ConcurrentHashMap<>(); 45 | // This is used to prevent linkage error due to concurrent creation of classes. 46 | private static AtomicInteger extractorNameSuffix = new AtomicInteger(); 47 | 48 | private static Logger logger = LoggerFactory.getLogger(OrcWriterBootstrap.class); 49 | 50 | 51 | @SuppressWarnings("unchecked") 52 | public static OrcHandle create(SchemaSpi schema) { 53 | Class clz = schema.getSchemaClass(); 54 | writersByClass.computeIfAbsent(clz, (cz) -> createWriter(schema)); 55 | try { 56 | return (OrcHandle) writersByClass.get(clz).newInstance(); 57 | } catch (InstantiationException | IllegalAccessException e) { 58 | throw new RuntimeException(e); 59 | } 60 | } 61 | 62 | 63 | @SuppressWarnings("unchecked") 64 | static Class createWriter(SchemaSpi schema) { 65 | ClassPool pool = ClassPool.getDefault(); 66 | pool.insertClassPath(new ClassClassPath(AbstractOrcWriter.class)); 67 | CtClass cc = pool.makeClass(ORC_WRITER_PACKAGE + schema.getSchemaClass().getSimpleName() + "$OrcWriter_" + extractorNameSuffix 68 | .incrementAndGet()); 69 | SchemaColumn schemaColumn = schema.compile(); 70 | try { 71 | cc.setSuperclass(pool.get(AbstractOrcWriter.class.getName())); 72 | 73 | cc.addMethod(CtNewMethod.make(createTypeDescriptionBody(schemaColumn), cc)); 74 | cc.addMethod(CtNewMethod.make(getSpecialCaseSetupBody(schemaColumn), cc)); 75 | cc.addMethod(CtNewMethod.make(getWriteBody(schemaColumn, schema.getSchemaClass()), cc)); 76 | } catch (CannotCompileException | NotFoundException e) { 77 | throw new RuntimeException(e); 78 | } 79 | 80 | try { 81 | return (Class) cc.toClass(); 82 | } catch (CannotCompileException e) { 83 | throw new RuntimeException(e); 84 | } 85 | } 86 | 87 | 88 | static String getClassShell(String packageName, String clsName, String superName) { 89 | STGroup group = new STGroupFile("eclectic/orc/template/classShell.stg"); 90 | ST st = group.getInstanceOf("classShell"); 91 | st.add("pkgName", packageName); 92 | st.add("clsName", clsName); 93 | st.add("superName", superName); 94 | String s = st.render(); 95 | logger.trace(s); 96 | return s; 97 | } 98 | 99 | 100 | static String createTypeDescriptionBody(SchemaColumn schemaColumn) { 101 | STGroup group = new STGroupFile("eclectic/orc/template/methodCreateTypeDescription.stg"); 102 | ST st = group.getInstanceOf("methodGetTypeDescription"); 103 | st.add("schemaColumn", schemaColumn); 104 | String s = st.render(); 105 | logger.trace(s); 106 | return s; 107 | } 108 | 109 | 110 | static String getSpecialCaseSetupBody(SchemaColumn schemaColumn) { 111 | STGroup group = new STGroupFile("eclectic/orc/template/methodSpecialCaseSetup.stg"); 112 | // Special case setup is needed for lists to adjust list child vector size. 113 | List listSchemaTypes = new ArrayList<>(); 114 | Stack structTypes = new Stack<>(); 115 | structTypes.push(schemaColumn); 116 | while (!structTypes.isEmpty()) { 117 | for (SchemaColumn child : structTypes.pop().getComplexType().getStructChildren()) { 118 | if (child.getTypeInfo().isTypeStruct()) { 119 | structTypes.push(child); 120 | } else if (child.getTypeInfo().isTypeList()) { 121 | listSchemaTypes.add(child); 122 | } 123 | } 124 | } 125 | ST st = group.getInstanceOf("methodSpecialCaseSetup"); 126 | st.add("list", listSchemaTypes); 127 | String s = st.render(); 128 | logger.trace(s); 129 | return s; 130 | } 131 | 132 | 133 | static String getWriteBody(SchemaColumn schemaColumn, Class schemaClass) { 134 | STGroup group = new STGroupFile("eclectic/orc/template/methodWrite.stg"); 135 | ST st = group.getInstanceOf("methodWrite"); 136 | st.add("schemaColumn", schemaColumn); 137 | st.add("sclass", schemaClass); 138 | String s = st.render(); 139 | logger.trace(s); 140 | return s; 141 | } 142 | } 143 | -------------------------------------------------------------------------------- /src/main/java/com/eclecticlogic/orc/impl/schema/AbstractSchemaColumn.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017 Eclectic Logic LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.eclecticlogic.orc.impl.schema; 18 | 19 | import com.eclecticlogic.orc.Converter; 20 | import com.eclecticlogic.orc.Orc; 21 | import com.eclecticlogic.orc.OrcTemporal; 22 | import com.eclecticlogic.orc.impl.bootstrap.GeneratorUtil; 23 | import org.apache.orc.TypeDescription.Category; 24 | 25 | import javax.persistence.Column; 26 | import javax.persistence.Temporal; 27 | import java.lang.reflect.Method; 28 | import java.util.ArrayList; 29 | import java.util.List; 30 | import java.util.Optional; 31 | 32 | /** 33 | * Created by kabram on 2/27/17. 34 | */ 35 | public class AbstractSchemaColumn implements GenInfo { 36 | 37 | private final List accessorMethods = new ArrayList<>(); 38 | private int columnIndex; // Column columnIndex for this if the entire structure were to be flattened. 39 | private Category _category; // lazy computed cateogry 40 | 41 | 42 | @Override 43 | public List getAccessorMethods() { 44 | return accessorMethods; 45 | } 46 | 47 | 48 | public int getColumnIndex() { 49 | return columnIndex; 50 | } 51 | 52 | 53 | public void setColumnIndex(int columnIndex) { 54 | this.columnIndex = columnIndex; 55 | } 56 | 57 | 58 | @Override 59 | public Class getColumnClassType() { 60 | Method method = getLastAccessorMethod(); 61 | Converter c = getConverter(); 62 | return c == null ? method.getReturnType() : c.getConvertedClass(); 63 | } 64 | 65 | 66 | @Override 67 | public Category getCategory() { 68 | if (_category == null) { 69 | _category = _getCategory(getColumnClassType()); 70 | } 71 | return _category; 72 | } 73 | 74 | 75 | @SuppressWarnings("unchecked") 76 | protected Category _getCategory(Class clz) { 77 | if (clz == null) { 78 | return Category.STRUCT; 79 | } else if (GeneratorUtil.getCategoryByBasicType(clz) != null) { 80 | return GeneratorUtil.getCategoryByBasicType(clz); 81 | } else if (String.class.isAssignableFrom(clz)) { 82 | // Return STRING vs VARCHAR based on whether size is specified or not. 83 | Orc orc = getAnnotation(Orc.class); 84 | if (orc == null) { 85 | // Check if jpa column annotation is present. 86 | Column col = getAnnotation(Column.class); 87 | if (col == null || col.length() == 0) { 88 | return Category.STRING; 89 | } 90 | return Category.VARCHAR; 91 | } 92 | return orc.length() == 0 ? Category.STRING : Category.VARCHAR; 93 | } else if (GeneratorUtil.getCategoryByAssignableType(clz) != null) { 94 | Category category = GeneratorUtil.getCategoryByAssignableType(clz); 95 | return category == Category.TIMESTAMP ? getAnnotationBasedDateCategory() : category; 96 | } else if (Enum.class.isAssignableFrom(clz)) { 97 | return getEnumCategory((Class>) clz); 98 | } 99 | return Category.STRUCT; 100 | } 101 | 102 | 103 | /** 104 | * @return DATE or TIMESTAMP based on presence of @OrcTemporal annotation. 105 | */ 106 | protected Category getAnnotationBasedDateCategory() { 107 | OrcTemporal orcTemporal = getAnnotation(OrcTemporal.class); 108 | if (orcTemporal == null) { 109 | Temporal jpaTemporal = getAnnotation(Temporal.class); 110 | if (jpaTemporal == null) { 111 | return Category.TIMESTAMP; 112 | } 113 | switch (jpaTemporal.value()) { 114 | case DATE: 115 | return Category.DATE; 116 | case TIME: 117 | case TIMESTAMP: 118 | return Category.TIMESTAMP; 119 | } 120 | } 121 | switch (orcTemporal.value()) { 122 | case DATE: 123 | return Category.DATE; 124 | case TIMESTAMP: 125 | return Category.TIMESTAMP; 126 | } 127 | return Category.TIMESTAMP; 128 | } 129 | 130 | 131 | /** 132 | * Look for orc annotation in any of the methods. If found, return a category based on the return type of that method. 133 | * Otherwise we will call name() and therefore the Category is STRING. 134 | * @param clz 135 | * @return 136 | */ 137 | protected Category getEnumCategory(Class> clz) { 138 | Optional annotatedMethod = GeneratorUtil.getAnnotatedMethodInEnum(clz); 139 | // Assumed to be a call to name() and therefore a string 140 | return annotatedMethod.map(m -> _getCategory(m.getReturnType())).orElse(Category.STRING); 141 | } 142 | 143 | } 144 | -------------------------------------------------------------------------------- /src/main/java/com/eclecticlogic/orc/impl/schema/ComplexType.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017 Eclectic Logic LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.eclecticlogic.orc.impl.schema; 18 | 19 | import com.eclecticlogic.orc.OrcList; 20 | import com.eclecticlogic.orc.impl.bootstrap.GeneratorUtil; 21 | 22 | import java.util.ArrayList; 23 | import java.util.List; 24 | 25 | /** 26 | * Complex column types. 27 | * Created by kabram 28 | */ 29 | public class ComplexType { 30 | 31 | private final GenInfo genInfo; 32 | private List structChildren = new ArrayList<>(); 33 | private SchemaColumn listChild; 34 | private SchemaColumn mapKey; 35 | private SchemaColumn mapValue; 36 | 37 | 38 | public ComplexType(GenInfo genInfo) { 39 | this.genInfo = genInfo; 40 | } 41 | 42 | 43 | public List getStructChildren() { 44 | return structChildren; 45 | } 46 | 47 | 48 | public void setStructChildren(List structChildren) { 49 | this.structChildren = structChildren; 50 | } 51 | 52 | 53 | public SchemaColumn getListChild() { 54 | return listChild; 55 | } 56 | 57 | 58 | public void setListChild(SchemaColumn listChild) { 59 | this.listChild = listChild; 60 | } 61 | 62 | 63 | public SchemaColumn getMapKey() { 64 | return mapKey; 65 | } 66 | 67 | 68 | public void setMapKey(SchemaColumn mapKey) { 69 | this.mapKey = mapKey; 70 | } 71 | 72 | 73 | public SchemaColumn getMapValue() { 74 | return mapValue; 75 | } 76 | 77 | 78 | public void setMapValue(SchemaColumn mapValue) { 79 | this.mapValue = mapValue; 80 | } 81 | 82 | 83 | /** 84 | * @return Average size of list for list column types. 85 | */ 86 | public int getAverageNullSize() { 87 | OrcList orcCollection = genInfo.getAnnotation(OrcList.class); 88 | if (orcCollection == null) { 89 | throw new RuntimeException("@OrcList annotation must be present for list column types."); 90 | } 91 | return orcCollection.averageSize() ; 92 | } 93 | 94 | 95 | /** 96 | * @return Average size of list for list column types. 97 | */ 98 | public int getAverageSize() { 99 | OrcList orcCollection = genInfo.getAnnotation(OrcList.class); 100 | if (orcCollection == null) { 101 | throw new RuntimeException("@OrcList annotation must be present for list column types."); 102 | } 103 | return orcCollection.averageSize() * orcCollection.elementSize(); 104 | } 105 | 106 | 107 | public String getTemplateNameListReinit() { 108 | return GeneratorUtil.getTemplateNameListReinit(genInfo.getCategory()); 109 | } 110 | 111 | } 112 | -------------------------------------------------------------------------------- /src/main/java/com/eclecticlogic/orc/impl/schema/GenInfo.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017 Eclectic Logic LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.eclecticlogic.orc.impl.schema; 18 | 19 | import com.eclecticlogic.orc.Converter; 20 | import com.eclecticlogic.orc.OrcConverter; 21 | import com.eclecticlogic.orc.impl.bootstrap.GeneratorUtil; 22 | import org.apache.orc.TypeDescription.Category; 23 | 24 | import java.lang.annotation.Annotation; 25 | import java.lang.reflect.Method; 26 | import java.util.List; 27 | 28 | /** 29 | * Basic interface implemented by SchemaColumn that other specialized views of the column depend on. 30 | * Created by kabram 31 | */ 32 | public interface GenInfo { 33 | 34 | default A getAnnotation(Class clz) { 35 | Method theMethod = null; 36 | if (Enum.class.isAssignableFrom(getColumnClassType())) { 37 | // For enums, we need to get the annotation from the enum annotated method. 38 | theMethod = GeneratorUtil.getAnnotatedMethodInEnum((Class>) getColumnClassType()) // 39 | .orElse(null); 40 | } else { 41 | theMethod = getLastAccessorMethod(); 42 | } 43 | return theMethod == null ? null : theMethod.isAnnotationPresent(clz) ? theMethod.getDeclaredAnnotation(clz) : null; 44 | } 45 | 46 | 47 | default Converter getConverter() { 48 | Method method = getLastAccessorMethod(); 49 | if (method.isAnnotationPresent(OrcConverter.class)) { 50 | try { 51 | return method.getDeclaredAnnotation(OrcConverter.class).value().newInstance(); 52 | } catch (InstantiationException | IllegalAccessException e) { 53 | throw new RuntimeException(e); 54 | } 55 | } 56 | return null; 57 | } 58 | 59 | 60 | /** 61 | * @return Category of concern. 62 | */ 63 | Category getCategory(); 64 | 65 | 66 | /** 67 | * @return Accessor methods 68 | */ 69 | List getAccessorMethods(); 70 | 71 | 72 | /** 73 | * @return The last accessor method in a chain of invocations for getting the value for the column. 74 | */ 75 | default Method getLastAccessorMethod() { 76 | try { 77 | return getAccessorMethods().isEmpty() ? // 78 | Object.class.getMethod("getClass") : getAccessorMethods().get(getAccessorMethods().size() - 1); 79 | } catch (NoSuchMethodException e) { 80 | throw new RuntimeException(e); 81 | } 82 | } 83 | 84 | 85 | /** 86 | * @return Type of the column class, applying converters if necessary. 87 | */ 88 | Class getColumnClassType(); 89 | } 90 | -------------------------------------------------------------------------------- /src/main/java/com/eclecticlogic/orc/impl/schema/ListChildSchemaColumn.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017 Eclectic Logic LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.eclecticlogic.orc.impl.schema; 18 | 19 | import com.eclecticlogic.orc.Converter; 20 | import com.eclecticlogic.orc.OrcList; 21 | 22 | import java.lang.reflect.Method; 23 | 24 | /** 25 | * Created by kabram 26 | */ 27 | public class ListChildSchemaColumn extends SchemaColumn { 28 | 29 | private final SchemaColumn parent; 30 | 31 | 32 | public ListChildSchemaColumn(SchemaColumn parent) { 33 | this.parent = parent; 34 | } 35 | 36 | 37 | @Override 38 | public Method getLastAccessorMethod() { 39 | return parent.getLastAccessorMethod(); 40 | } 41 | 42 | 43 | public Class getListEntryType() { 44 | OrcList orcCollection = getLastAccessorMethod().getDeclaredAnnotation(OrcList.class); 45 | if (orcCollection == null) { 46 | throw new RuntimeException("@OrcList annotation must be present for list column types."); 47 | } 48 | return orcCollection.entryType(); 49 | } 50 | 51 | 52 | @Override 53 | public Class getColumnClassType() { 54 | OrcList orcCollection = getLastAccessorMethod().getDeclaredAnnotation(OrcList.class); 55 | if (orcCollection == null) { 56 | throw new RuntimeException("@OrcList annotation must be present for list column types."); 57 | } 58 | Converter c = getConverter(); 59 | if (c == null) { 60 | return orcCollection.entryType(); 61 | } else { 62 | return c.getConvertedClass(); 63 | } 64 | } 65 | 66 | 67 | /** 68 | * @return Check if the parent @OrcList has a converter defined. 69 | */ 70 | @Override 71 | public Converter getConverter() { 72 | OrcList orcCollection = getLastAccessorMethod().getDeclaredAnnotation(OrcList.class); 73 | if (orcCollection.converter().equals(OrcList.DEFAULT.class)) { 74 | return null; 75 | } else { 76 | try { 77 | return orcCollection.converter().newInstance(); 78 | } catch (InstantiationException | IllegalAccessException e) { 79 | throw new RuntimeException(e); 80 | } 81 | } 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /src/main/java/com/eclecticlogic/orc/impl/schema/SchemaColumn.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017 Eclectic Logic LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.eclecticlogic.orc.impl.schema; 18 | 19 | import com.eclecticlogic.orc.impl.bootstrap.GeneratorUtil; 20 | 21 | import java.lang.reflect.Method; 22 | 23 | /** 24 | * Created by kabram 25 | */ 26 | public class SchemaColumn extends AbstractSchemaColumn { 27 | 28 | private final TypeDesc typeDescription = new TypeDesc(this); 29 | private final TypeInfo typeInfo = new TypeInfo(this); 30 | private final ComplexType complexType = new ComplexType(this); 31 | private final Template template = new Template(this); 32 | private Class delegateClass; 33 | private boolean needsDelegate; 34 | 35 | public boolean isNeedsDelegate() { 36 | return needsDelegate; 37 | } 38 | 39 | public void setNeedsDelegate(boolean needsDelegate) { 40 | this.needsDelegate = needsDelegate; 41 | } 42 | 43 | public Class getDelegateClass() { 44 | return delegateClass; 45 | } 46 | 47 | public void setDelegateClass(Class delegateClass) { 48 | this.delegateClass = delegateClass; 49 | } 50 | 51 | 52 | public ComplexType getComplexType() { 53 | return complexType; 54 | } 55 | 56 | 57 | public TypeDesc getTypeDescription() { 58 | return typeDescription; 59 | } 60 | 61 | 62 | public TypeInfo getTypeInfo() { 63 | return typeInfo; 64 | } 65 | 66 | 67 | public Template getTemplate() { 68 | return template; 69 | } 70 | 71 | 72 | /** 73 | * @return true if the column vector (not the datatype) holds primitive values (e.g. LongVectorColumn holds primitive values, but 74 | * there is not a vector that holds primitive chars). 75 | */ 76 | public boolean isPrimitiveVector() { 77 | return GeneratorUtil.getPrimitiveAccessorByType(getColumnClassType()) != null; 78 | } 79 | 80 | 81 | /** 82 | * @return Fully qualified class name of the Orc ColumnVector that implements this column. 83 | */ 84 | public String getVectorClassName() { 85 | return GeneratorUtil.getVectorClassName(getCategory()).getName(); 86 | } 87 | 88 | 89 | } 90 | -------------------------------------------------------------------------------- /src/main/java/com/eclecticlogic/orc/impl/schema/Template.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017 Eclectic Logic LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.eclecticlogic.orc.impl.schema; 18 | 19 | import com.eclecticlogic.orc.Orc; 20 | import com.eclecticlogic.orc.impl.bootstrap.GeneratorUtil; 21 | 22 | import java.lang.reflect.Method; 23 | import java.util.Arrays; 24 | import java.util.Optional; 25 | import java.util.concurrent.atomic.AtomicInteger; 26 | 27 | /** 28 | * Created by kabram on 3/1/17. 29 | */ 30 | public class Template { 31 | 32 | private static final AtomicInteger variableCounter = new AtomicInteger(); 33 | private final GenInfo genInfo; 34 | private ThreadLocal currentVariable = new ThreadLocal<>(); 35 | 36 | 37 | public Template(GenInfo genInfo) { 38 | this.genInfo = genInfo; 39 | } 40 | 41 | 42 | public String getTemplateNameColumnSetter() { 43 | return GeneratorUtil.getTemplateNameColumnSetter(genInfo.getCategory()); 44 | } 45 | 46 | 47 | /** 48 | * @return Creates a new temporary variable name and returns it. 49 | */ 50 | public String getNewTempVariable() { 51 | currentVariable.set(variableCounter.getAndIncrement()); 52 | return "v" + currentVariable.get(); 53 | } 54 | 55 | 56 | /** 57 | * @return Returns the current temporary variable. At least one call to getNewTempVariable() from current thread should precede this 58 | * call. 59 | */ 60 | public String getTempVariable() { 61 | return "v" + currentVariable.get(); 62 | } 63 | 64 | 65 | /** 66 | * @return The method to be used to convert an enum to an orc-compatible value. 67 | */ 68 | public String getEnumMethod() { 69 | Optional annotatedMethod = GeneratorUtil.getAnnotatedMethodInEnum((Class>) genInfo.getColumnClassType()); 70 | return annotatedMethod.map(Method::getName).orElse("name"); 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /src/main/java/com/eclecticlogic/orc/impl/schema/TypeDesc.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017 Eclectic Logic LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.eclecticlogic.orc.impl.schema; 18 | 19 | import com.eclecticlogic.orc.Orc; 20 | import com.eclecticlogic.orc.impl.bootstrap.GeneratorUtil; 21 | import org.apache.orc.TypeDescription; 22 | 23 | import javax.persistence.Column; 24 | 25 | /** 26 | * Models attributes for org.apache.orc.TypeDescription 27 | * Created by kabram 28 | */ 29 | public class TypeDesc { 30 | 31 | private String name; 32 | private final GenInfo genInfo; 33 | 34 | public TypeDesc(GenInfo genInfo) { 35 | this.genInfo = genInfo; 36 | } 37 | 38 | 39 | public String getName() { 40 | return name; 41 | } 42 | 43 | 44 | public void setName(String name) { 45 | this.name = name; 46 | } 47 | 48 | 49 | public String getCreateMethod() { 50 | return GeneratorUtil.getTypeDescriptionCreator(genInfo.getCategory()); 51 | } 52 | 53 | 54 | 55 | /** 56 | * @return Length to be used for varchar (or string) data type. 57 | */ 58 | public Integer getLength() { 59 | if (genInfo.getCategory() == TypeDescription.Category.CHAR) { 60 | return 1; 61 | } else if (!GeneratorUtil.isSupportsLengthSpecification(genInfo.getCategory())) { 62 | return null; 63 | } 64 | Orc orc = genInfo.getAnnotation(Orc.class); 65 | if (orc == null) { 66 | // Check if jpa column annotation is present. 67 | Column col = genInfo.getAnnotation(Column.class); 68 | if (col == null || col.length() == 0) { 69 | return null; 70 | } 71 | return col.length(); 72 | } 73 | return orc.length() == 0 ? null : orc.length(); 74 | } 75 | 76 | 77 | /** 78 | * @return precision to be used for Decimal data type. 79 | */ 80 | public int getPrecision() { 81 | Orc orc = genInfo.getAnnotation(Orc.class); 82 | if (orc == null) { 83 | Column col = genInfo.getAnnotation(Column.class); 84 | return col == null ? 0 : col.precision(); 85 | } 86 | return orc.precision(); 87 | } 88 | 89 | 90 | /** 91 | * @return scale to be used for Decimal data type. 92 | */ 93 | public int getScale() { 94 | Orc orc = genInfo.getAnnotation(Orc.class); 95 | if (orc == null) { 96 | Column col = genInfo.getAnnotation(Column.class); 97 | return col == null ? 0 : col.scale(); 98 | } 99 | return orc.scale(); 100 | } 101 | 102 | 103 | public boolean isPrecisionFirst() { 104 | return getPrecision() != 0 && getPrecision() > 10; 105 | } 106 | 107 | 108 | public boolean isPrecisionLast() { 109 | return getPrecision() != 0 && getPrecision() <= 10; 110 | } 111 | 112 | 113 | public boolean isScaleFirst() { 114 | return isPrecisionLast(); 115 | } 116 | 117 | 118 | public boolean isScaleLast() { 119 | return isPrecisionFirst(); 120 | } 121 | 122 | } 123 | -------------------------------------------------------------------------------- /src/main/java/com/eclecticlogic/orc/impl/schema/TypeInfo.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017 Eclectic Logic LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.eclecticlogic.orc.impl.schema; 18 | 19 | import com.eclecticlogic.orc.impl.bootstrap.GeneratorUtil; 20 | import org.apache.orc.TypeDescription; 21 | 22 | import java.time.LocalDate; 23 | import java.time.ZonedDateTime; 24 | import java.util.Collection; 25 | import java.util.Date; 26 | import java.util.List; 27 | 28 | /** 29 | * Created by kabram on 2/28/17. 30 | */ 31 | public class TypeInfo { 32 | 33 | private final GenInfo genInfo; 34 | 35 | 36 | public TypeInfo(GenInfo genInfo) { 37 | this.genInfo = genInfo; 38 | } 39 | 40 | 41 | public boolean isPrimitive() { 42 | return genInfo.getLastAccessorMethod().getReturnType().isPrimitive(); 43 | } 44 | 45 | 46 | public boolean isTypeBinary() { 47 | return genInfo.getCategory() == TypeDescription.Category.BINARY; 48 | } 49 | 50 | 51 | public boolean isTypeChar() { 52 | return genInfo.getCategory() == TypeDescription.Category.CHAR; 53 | } 54 | 55 | 56 | public boolean isTypeVarchar() { 57 | return genInfo.getCategory() == TypeDescription.Category.VARCHAR; 58 | } 59 | 60 | 61 | public boolean isTypeString() { 62 | return genInfo.getCategory() == TypeDescription.Category.STRING; 63 | } 64 | 65 | 66 | public boolean isTypeBoolean() { 67 | return genInfo.getCategory() == TypeDescription.Category.BOOLEAN; 68 | } 69 | 70 | 71 | public boolean isTypeByte() { 72 | return genInfo.getCategory() == TypeDescription.Category.BYTE; 73 | } 74 | 75 | 76 | public boolean isTypeShort() { 77 | return genInfo.getCategory() == TypeDescription.Category.SHORT; 78 | } 79 | 80 | 81 | public boolean isTypeInt() { 82 | return genInfo.getCategory() == TypeDescription.Category.INT; 83 | } 84 | 85 | 86 | public boolean isTypeLong() { 87 | return genInfo.getCategory() == TypeDescription.Category.LONG; 88 | } 89 | 90 | 91 | public boolean isTypeDate() { 92 | return genInfo.getCategory() == TypeDescription.Category.DATE; 93 | } 94 | 95 | 96 | public boolean isTypeDecimal() { 97 | return genInfo.getCategory() == TypeDescription.Category.DECIMAL; 98 | } 99 | 100 | 101 | public boolean isTypeFloat() { 102 | return genInfo.getCategory() == TypeDescription.Category.FLOAT; 103 | } 104 | 105 | 106 | public boolean isTypeDouble() { 107 | return genInfo.getCategory() == TypeDescription.Category.DOUBLE; 108 | } 109 | 110 | 111 | public boolean isTypeList() { 112 | return genInfo.getCategory() == TypeDescription.Category.LIST; 113 | } 114 | 115 | 116 | public boolean isTypeMap() { 117 | return genInfo.getCategory() == TypeDescription.Category.MAP; 118 | } 119 | 120 | 121 | public boolean isTypeStruct() { 122 | return genInfo.getCategory() == TypeDescription.Category.STRUCT; 123 | } 124 | 125 | 126 | public boolean isTypeTimestamp() { 127 | return genInfo.getCategory() == TypeDescription.Category.TIMESTAMP; 128 | } 129 | 130 | 131 | public boolean isTypeUnion() { 132 | return genInfo.getCategory() == TypeDescription.Category.UNION; 133 | } 134 | 135 | 136 | public boolean isTypeLocalDate() { 137 | return LocalDate.class.isAssignableFrom(genInfo.getColumnClassType()); 138 | } 139 | 140 | 141 | public boolean isTypeZonedDateTime() { 142 | return ZonedDateTime.class.isAssignableFrom(genInfo.getColumnClassType()); 143 | } 144 | 145 | 146 | public boolean isTypeDateTime() { 147 | return Date.class.isAssignableFrom(genInfo.getColumnClassType()); 148 | } 149 | 150 | 151 | /** 152 | * @return true if the underlying type is a java util List derivative. 153 | */ 154 | public boolean isTypeJavaList() { return List.class.isAssignableFrom(genInfo.getColumnClassType()); } 155 | 156 | 157 | /** 158 | * @return true if the underlying type is a java util Collection derivative. 159 | */ 160 | public boolean isTypeJavaCollection() { return Collection.class.isAssignableFrom(genInfo.getColumnClassType()); } 161 | 162 | 163 | /** 164 | * @return true if the underlying type is an Enum derivative. 165 | */ 166 | public boolean isEnum() { return Enum.class.isAssignableFrom(genInfo.getColumnClassType()); } 167 | 168 | 169 | /** 170 | * @return Returns the method that can be used on a boxed type to get the primitive type. 171 | */ 172 | public String getPrimitiveConversionMethod() { 173 | return GeneratorUtil.getPrimitiveAccessorByType(genInfo.getColumnClassType()); 174 | } 175 | 176 | 177 | } 178 | -------------------------------------------------------------------------------- /src/main/resources/eclectic/orc/template/classShell.stg: -------------------------------------------------------------------------------- 1 | classShell(pkgName, clsName, superName) ::= << 2 | package ; 3 | 4 | public class extends { 5 | >> -------------------------------------------------------------------------------- /src/main/resources/eclectic/orc/template/methodCreateTypeDescription.stg: -------------------------------------------------------------------------------- 1 | 2 | methodGetTypeDescription(schemaColumn) ::= << 3 | protected org.apache.orc.TypeDescription createTypeDescription() { 4 | return ; 5 | } 6 | >> 7 | 8 | typeDescription(schemaColumn) ::= << 9 | org.apache.orc.TypeDescription.() 10 | .withMaxLength() 11 | .withPrecision() 12 | .withScale() 13 | .withScale() 14 | .withPrecision() 15 | 16 | >> 17 | 18 | fieldSetup(schemaColumn) ::= << 19 | .addField("", ) 20 | >> -------------------------------------------------------------------------------- /src/main/resources/eclectic/orc/template/methodSpecialCaseSetup.stg: -------------------------------------------------------------------------------- 1 | methodSpecialCaseSetup(list) ::= << 2 | protected void specialCaseSetup() { 3 | 4 | } 5 | >> 6 | 7 | specialCaseListInstruction(schemaColumn) ::= << 8 | { 9 | vector = ()vectorizedRowBatch.cols[]; 10 | child = ()vector.child; 11 | int nullLength = 1024 * ; 12 | child.isNull = new boolean[nullLength]; 13 | vector.offsets = new long[nullLength]; 14 | int newLength = 1024 * ; 15 | <(schemaColumn.complexType.listChild.complexType.templateNameListReinit)(schemaColumn, "child", "newLength", "nullLength")> 16 | } 17 | >> 18 | 19 | initBytesList(schemaColumn, val, len, nullLength) ::= << 20 | .vector = new byte[][]; 21 | .start = new int[]; 22 | .length = new int[]; 23 | >> 24 | 25 | initLongList(schemaColumn, val, len, nullLength) ::= << 26 | .vector = new long[]; 27 | >> 28 | 29 | initDoubleList(schemaColumn, val, len, nullLength) ::= << 30 | .vector = new double[]; 31 | >> 32 | 33 | initDecimalList(schemaColumn, val, len, nullLength) ::= << 34 | .vector = new HiveDecimalWritable[]; 35 | >> 36 | 37 | initTimestampList(schemaColumn, val, len, nullLength) ::= << 38 | .time = new long[]; 39 | .nanos = new int[]; 40 | >> -------------------------------------------------------------------------------- /src/main/resources/eclectic/orc/template/methodWrite.stg: -------------------------------------------------------------------------------- 1 | methodWrite(schemaColumn, sclass) ::= << 2 | protected void write(Object inputRow) { 3 | datum = ()inputRow; 4 | 5 | delegateInstance = new (datum); 6 | 7 | 8 | } 9 | >> 10 | 11 | writeStruct(schemaColumn) ::= << 12 | 13 | >> 14 | 15 | writeColumn(schemaColumn) ::= << 16 | 17 | 18 | 19 | { 20 | vector = ()vectorizedRowBatch.cols[]; 21 | 22 | } 23 | 24 | >> 25 | 26 | delegateAwareWrite(schemaColumn, method, methods, var) ::= << 27 | 28 | 29 | 30 | 31 | 32 | >> 33 | 34 | nullSafeChainAccess(schemaColumn, method, methods, var) ::= << 35 | 36 | = .(); 37 | 38 | if ( == null) { 39 | setNull(vector); 40 | } else { 41 | 42 | } 43 | 44 | 45 | <(schemaColumn.template.templateNameColumnSetter)(schemaColumn, "vector", schemaColumn.template.tempVariable, "vectorizedRowBatch.size")> 46 | 47 | if ( == null) { 48 | setNull(vector); 49 | } else { 50 | <(schemaColumn.template.templateNameColumnSetter)(schemaColumn, "vector", enumConvertedValue(schemaColumn, schemaColumn.template.tempVariable), "vectorizedRowBatch.size")> 51 | } 52 | 53 | 54 | >> 55 | 56 | enumConvertedValue(schemaColumn, value) ::= << 57 | .() 58 | >> 59 | 60 | convertedValue(schemaColumn, value) ::= << new ().convert(()) >> 61 | 62 | columnBinary(schemaColumn, var, val, loc) ::= << 63 | // TODO 64 | >> 65 | 66 | columnBoolean(schemaColumn, var, val, loc) ::= << 67 | 68 | .vector[] = . ? 1 : 0; 69 | 70 | .vector[] = ? 1 : 0; 71 | 72 | >> 73 | 74 | columnByte(schemaColumn, var, val, loc) ::= << 75 | 76 | >> 77 | 78 | columnChar(schemaColumn, var, val, loc) ::= << 79 | byte[] charArray = new byte[1]; 80 | 81 | charArray[0] = (byte).; 82 | 83 | charArray[0] = (byte); 84 | 85 | .setVal(, charArray); 86 | >> 87 | 88 | columnDate(schemaColumn, var, val, loc) ::= << 89 | 90 | .vector[] = .toEpochDay(); 91 | 92 | .vector[] = java.time.Instant.ofEpochMilli(.getTime()).atZone(java.time.ZoneId.systemDefault()).toLocalDate().toEpochDay(); 93 | 94 | >> 95 | 96 | columnDecimal(schemaColumn, var, val, loc) ::= << 97 | .set(, org.apache.hadoop.hive.common.type.HiveDecimal.create(, false)); 98 | >> 99 | 100 | columnDouble(schemaColumn, var, val, loc) ::= << 101 | 102 | .vector[] = .; 103 | 104 | .vector[] = ; 105 | 106 | >> 107 | 108 | columnFloat(schemaColumn, var, val, loc) ::= << 109 | 110 | >> 111 | 112 | columnInt(schemaColumn, var, val, loc) ::= << 113 | 114 | >> 115 | 116 | columnList(schemaColumn, var, val, loc) ::= << 117 | 118 | java.util.List list = ; 119 | 120 | java.util.List list = new java.util.ArrayList(); 121 | 122 | java.util.List list = new java.util.ArrayList(); 123 | java.util.Iterator iterator = .iterator(); 124 | while (iterator.hasNext()) { 125 | list.add(iterator.next()); 126 | } 127 | 128 | .offsets[] = .childCount; 129 | .lengths[] = list.size(); 130 | .childCount += list.size(); 131 | for (int i = 0; i \< list.size(); i++) { 132 | int location = ((int).offsets[]) + i; 133 | childVector = ().child; 134 | 135 | 136 | listElement = (); 137 | <(schemaColumn.complexType.listChild.template.templateNameColumnSetter)(schemaColumn.complexType.listChild, "childVector", "listElement", "location")> 138 | 139 | listElement = ()list.get(i); 140 | <(schemaColumn.complexType.listChild.template.templateNameColumnSetter)(schemaColumn.complexType.listChild, "childVector", enumConvertedValue(schemaColumn.complexType.listChild, "listElement"), "location")> 141 | 142 | } 143 | >> 144 | 145 | columnLong(schemaColumn, var, val, loc) ::= << 146 | 147 | .vector[] = .; 148 | 149 | .vector[] = ; 150 | 151 | >> 152 | 153 | columnShort(schemaColumn, var, val, loc) ::= << 154 | 155 | >> 156 | 157 | columnString(schemaColumn, var, val, loc) ::= << 158 | .setVal(, .getBytes()); 159 | >> 160 | 161 | columnTimestamp(schemaColumn, var, val, loc) ::= << 162 | 163 | .set(, java.sql.Timestamp.from(.toInstant())); 164 | 165 | .set(, new java.sql.Timestamp(.getTime())); 166 | 167 | .set(, ); 168 | 169 | >> 170 | 171 | columnVarchar(schemaColumn, var, val, loc) ::= << 172 | 173 | >> -------------------------------------------------------------------------------- /src/test/groovy/com/eclecticlogic/orc/ArrayTest.java: -------------------------------------------------------------------------------- 1 | package com.eclecticlogic.orc; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | public class ArrayTest { 7 | 8 | String name; 9 | 10 | List grades = new ArrayList<>(); 11 | 12 | 13 | public String getName() { 14 | return name; 15 | } 16 | 17 | public void setName(String name) { 18 | this.name = name; 19 | } 20 | 21 | 22 | @OrcList(entryType = String.class, averageSize = 1200, elementSize = 30) 23 | public List getGrades() { 24 | return grades; 25 | } 26 | 27 | public void setGrades(List grades) { 28 | this.grades = grades; 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/test/groovy/com/eclecticlogic/orc/ChromaticConverter.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017 Eclectic Logic LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.eclecticlogic.orc 18 | 19 | /** 20 | * Created by kabram on 3/2/17. 21 | */ 22 | class ChromaticConverter implements Converter { 23 | 24 | @Override 25 | Class getConvertedClass() { 26 | return Boolean 27 | } 28 | 29 | @Override 30 | Boolean convert(Color instance) { 31 | return instance == Color.RED || instance == Color.GREEN 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/test/groovy/com/eclecticlogic/orc/Club.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017 Eclectic Logic LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.eclecticlogic.orc 18 | 19 | /** 20 | * Created by kabram on 3/1/17. 21 | */ 22 | enum Club { 23 | PHI, OMEGA; 24 | 25 | } -------------------------------------------------------------------------------- /src/test/groovy/com/eclecticlogic/orc/Color.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017 Eclectic Logic LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.eclecticlogic.orc 18 | 19 | /** 20 | * Created by kabram on 3/1/17. 21 | */ 22 | enum Color { 23 | 24 | BLUE('B' as char), GREEN('G' as char), RED('R' as char), YELLOW('Y' as char), VIOLET('V' as char), WHITE('W' as char); 25 | 26 | Color(char code) { 27 | this.code = code 28 | } 29 | private char code; 30 | 31 | @Orc 32 | char getCode() { 33 | return code 34 | } 35 | } -------------------------------------------------------------------------------- /src/test/groovy/com/eclecticlogic/orc/Course.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017 Eclectic Logic LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.eclecticlogic.orc 18 | 19 | /** 20 | * Created by kabram on 2/28/17. 21 | */ 22 | class Course { 23 | String name; 24 | Teacher teacher; 25 | } 26 | -------------------------------------------------------------------------------- /src/test/groovy/com/eclecticlogic/orc/Graduate.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017 Eclectic Logic LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.eclecticlogic.orc 18 | 19 | import java.time.LocalDate 20 | import java.time.YearMonth 21 | 22 | /** 23 | * Created by kabram 24 | */ 25 | public class Graduate extends Student { 26 | 27 | List houses 28 | List colors 29 | Level level 30 | Club club 31 | Power power; 32 | private String major; 33 | private BigDecimal allowance; 34 | private List subjects = new ArrayList<>(); 35 | private List grades = new ArrayList<>(); 36 | Iterable courseGrades = new ArrayList<>(); 37 | Set courseDates = new HashSet<>() 38 | Queue courseAudits = new ArrayDeque<>() 39 | Course course; 40 | YearMonth graduationDate; 41 | Character subjectGrade; 42 | LocalDate initiationDate; 43 | 44 | public String getMajor() { 45 | return major; 46 | } 47 | 48 | public void setMajor(String major) { 49 | this.major = major; 50 | } 51 | 52 | @Orc(precision = 10, scale = 5) 53 | public BigDecimal getAllowance() { 54 | return allowance; 55 | } 56 | 57 | public void setAllowance(BigDecimal allowance) { 58 | this.allowance = allowance; 59 | } 60 | 61 | 62 | @OrcList(entryType = String.class, averageSize = 50) 63 | @Orc(length = 30) 64 | public List getSubjects() { 65 | return subjects; 66 | } 67 | 68 | 69 | @OrcList(entryType = Long.class, averageSize = 25) 70 | public List getGrades() { 71 | return grades; 72 | } 73 | 74 | 75 | public Course mycoursework() { 76 | return course; 77 | } 78 | 79 | 80 | @OrcConverter(GraduationConverter) 81 | YearMonth getGraduationDate() { 82 | return graduationDate 83 | } 84 | 85 | @OrcList(entryType = Character, averageSize = 5) 86 | Iterable getCourseGrades() { 87 | return courseGrades 88 | } 89 | 90 | @OrcList(entryType = Date, averageSize = 5) 91 | Set getCourseDates() { 92 | return courseDates 93 | } 94 | 95 | @OrcList(entryType = Boolean, averageSize = 5) 96 | Queue getCourseAudits() { 97 | return courseAudits 98 | } 99 | 100 | @OrcList(entryType = Color, averageSize = 5) 101 | List getColors() { 102 | return colors 103 | } 104 | 105 | @OrcList(entryType = House, averageSize = 10, converter = HouseConverter) 106 | List getHouses() { 107 | return houses 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /src/test/groovy/com/eclecticlogic/orc/GraduateDelegate.groovy: -------------------------------------------------------------------------------- 1 | package com.eclecticlogic.orc 2 | 3 | class GraduateDelegate { 4 | 5 | Graduate delegate 6 | 7 | GraduateDelegate(Graduate delegate) { 8 | this.delegate = delegate 9 | } 10 | 11 | 12 | String major() { 13 | return "Major " + delegate.major + " " + (delegate.course == null ? "null" : delegate.course.name); 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /src/test/groovy/com/eclecticlogic/orc/GraduationConverter.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017 Eclectic Logic LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.eclecticlogic.orc 18 | 19 | import java.time.LocalDate 20 | import java.time.YearMonth 21 | 22 | /** 23 | * Created by kabram on 2/28/17. 24 | */ 25 | class GraduationConverter implements Converter { 26 | 27 | @Override 28 | Class getConvertedClass() { 29 | return Integer 30 | } 31 | 32 | @Override 33 | Integer convert(YearMonth instance) { 34 | return instance.year * 100 + instance.month 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/test/groovy/com/eclecticlogic/orc/House.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017 Eclectic Logic LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.eclecticlogic.orc 18 | 19 | /** 20 | * Created by kabram on 3/1/17. 21 | */ 22 | class House { 23 | 24 | String name 25 | } 26 | -------------------------------------------------------------------------------- /src/test/groovy/com/eclecticlogic/orc/HouseConverter.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017 Eclectic Logic LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.eclecticlogic.orc 18 | 19 | /** 20 | * Created by kabram on 3/1/17. 21 | */ 22 | class HouseConverter implements Converter { 23 | 24 | @Override 25 | Class getConvertedClass() { 26 | return String 27 | } 28 | 29 | @Override 30 | String convert(House instance) { 31 | return instance.name 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/test/groovy/com/eclecticlogic/orc/Level.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017 Eclectic Logic LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.eclecticlogic.orc 18 | 19 | /** 20 | * Created by kabram on 3/1/17. 21 | */ 22 | enum Level { 23 | 24 | FRESHMEN('F' as char), SOPHOMORE('S' as char), JUNIOR('J' as char), SENIOR('N' as char); 25 | 26 | Level(char code) { 27 | this.code = code 28 | } 29 | private char code; 30 | 31 | @Orc 32 | char getCode() { 33 | return code 34 | } 35 | } -------------------------------------------------------------------------------- /src/test/groovy/com/eclecticlogic/orc/Power.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017 Eclectic Logic LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.eclecticlogic.orc 18 | 19 | /** 20 | * Created by kabram on 3/1/17. 21 | */ 22 | enum Power { 23 | 24 | LOW(45.0), MEDIUM(77.4), HIGH(89.9); 25 | 26 | Power(BigDecimal value) { 27 | this.value = value 28 | } 29 | private BigDecimal value 30 | 31 | @Orc(precision = 23, scale = 7) 32 | BigDecimal getValue() { 33 | return value 34 | } 35 | } -------------------------------------------------------------------------------- /src/test/groovy/com/eclecticlogic/orc/Student.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017 Eclectic Logic LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | */ 17 | package com.eclecticlogic.orc 18 | /** 19 | * Created by kabram 20 | */ 21 | class Student { 22 | 23 | Color chromaticColor 24 | String name 25 | int age 26 | BigDecimal score 27 | boolean resident 28 | 29 | 30 | @Orc(length = 20) 31 | String getName() { 32 | return name 33 | } 34 | 35 | 36 | @Orc 37 | int getAge() { 38 | return age 39 | } 40 | 41 | 42 | @Orc(precision = 10, scale = 2) 43 | BigDecimal getScore() { 44 | return score 45 | } 46 | 47 | void setName(String name) { 48 | this.name = name 49 | } 50 | 51 | void setAge(int age) { 52 | this.age = age 53 | } 54 | 55 | void setScore(BigDecimal score) { 56 | this.score = score 57 | } 58 | 59 | @OrcConverter(ChromaticConverter) 60 | Color getChromaticColor() { 61 | return chromaticColor 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /src/test/groovy/com/eclecticlogic/orc/Teacher.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017 Eclectic Logic LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.eclecticlogic.orc 18 | 19 | import java.time.YearMonth 20 | 21 | /** 22 | * Created by kabram on 2/28/17. 23 | */ 24 | class Teacher { 25 | String name; 26 | boolean tenure; 27 | YearMonth startMonth 28 | 29 | @OrcConverter(GraduationConverter) 30 | YearMonth getStartMonth() { 31 | return startMonth 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/test/groovy/com/eclecticlogic/orc/impl/TestBootstrap.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017 Eclectic Logic LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.eclecticlogic.orc.impl 18 | 19 | import com.eclecticlogic.orc.ArrayTest 20 | import com.eclecticlogic.orc.Club 21 | import com.eclecticlogic.orc.Color 22 | import com.eclecticlogic.orc.Course 23 | import com.eclecticlogic.orc.Factory 24 | import com.eclecticlogic.orc.Graduate 25 | import com.eclecticlogic.orc.GraduateDelegate 26 | import com.eclecticlogic.orc.House 27 | import com.eclecticlogic.orc.Level 28 | import com.eclecticlogic.orc.OrcHandle 29 | import com.eclecticlogic.orc.Schema 30 | import com.eclecticlogic.orc.Teacher 31 | import com.google.common.base.Strings 32 | import org.apache.hadoop.fs.Path 33 | 34 | import java.nio.file.Files 35 | import java.nio.file.Paths 36 | import java.time.LocalDate 37 | import java.time.YearMonth 38 | 39 | import org.testng.annotations.Test 40 | 41 | /** 42 | * Created by kabram 43 | */ 44 | @Test 45 | class TestBootstrap { 46 | 47 | void testOrcWriting() { 48 | Schema schema = Factory.createSchema(Graduate) 49 | .withDelegate(GraduateDelegate) 50 | .column { it.chromaticColor } 51 | .column { it.houses } 52 | .column { it.colors } 53 | .column { it.level } 54 | .column {it.club } 55 | .column { it.power } 56 | .column { it.name } 57 | .column { it.age } 58 | .column('money') { it.allowance } 59 | .column('subjects') { it.subjects } 60 | .column('gpa') { it.grades } 61 | .column('subject') { it.course.name } 62 | .column('teacher') { it.course.teacher.name } 63 | .column('tenured') { it.course.teacher.tenure } 64 | .column { it.course.teacher.startMonth } 65 | .column('advisor') { it.mycoursework().teacher.name } 66 | .column { it.courseGrades } 67 | .column { it.courseAudits } 68 | .column { it.courseDates } 69 | .column('initiation') { it.initiationDate } 70 | .delegatedColumn { it.major() } 71 | 72 | OrcHandle handle = Factory.createWriter(schema) 73 | List list = [] 74 | list << new Graduate(name: 'abc', age: 10, allowance: 150.0).with { 75 | it.chromaticColor = Color.WHITE 76 | it.houses = [new House(name: 'a'), new House(name: 'b')] 77 | it.colors = [Color.RED, Color.GREEN, Color.BLUE] 78 | it.level = Level.FRESHMEN 79 | it.club = Club.OMEGA 80 | it.subjects << 'english' 81 | it.subjects << 'history' 82 | it.subjects << 'math' 83 | it.grades << 2L 84 | it.grades << 3L 85 | it.grades << 4L 86 | it.subjectGrade = 'A' 87 | it.course = new Course(name: 'Mathematics', teacher: new Teacher(name: 'John Brewer')) 88 | it.graduationDate = YearMonth.of(2020, 12) 89 | it.courseAudits = new ArrayDeque<>().with { 90 | it.add(true) 91 | it.add(false) 92 | it.add(false) 93 | return it 94 | } 95 | it.courseDates = [new Date(), new Date(), new Date()] as Set 96 | it.courseGrades = ['A' as char, 'B' as char, 'Z' as char, 'W' as char] 97 | it.initiationDate = LocalDate.of(2016, 1, 1) 98 | return it 99 | } 100 | list << new Graduate(name: 'def', age: 20, allowance: 250.0).with { 101 | it.chromaticColor = Color.RED 102 | it.subjects << 'math' 103 | it.subjects << 'english' 104 | it.subjects << 'history' 105 | it.grades << 4L 106 | it.grades << 5L 107 | it.grades << 5L 108 | it.subjectGrade = 'A' 109 | it.graduationDate = YearMonth.of(2020, 12) 110 | it.courseAudits = new ArrayDeque<>().with { 111 | it.add(false) 112 | it.add(true) 113 | it.add(false) 114 | return it 115 | } 116 | it.courseDates = [new Date(), new Date(), new Date()] as Set 117 | it.courseGrades = ['A' as char, 'B' as char, 'Z' as char, 'W' as char] 118 | it.course = new Course(name: 'Physics', teacher: new Teacher(name: 'Feynman', tenure: true)) 119 | return it 120 | } 121 | list << new Graduate(name: 'aaa', age: 30, allowance: 350.0) 122 | Path path = new Path(System.getProperty('user.home'),'temp/dp/graduate.orc') 123 | try { 124 | handle.open(path).write(list).close() 125 | } finally { 126 | Files.delete(Paths.get(System.getProperty('user.home'),'temp/dp/graduate.orc')) 127 | } 128 | 129 | } 130 | 131 | 132 | void testVeryLongArray() { 133 | Schema schema = Factory.createSchema(ArrayTest) 134 | .column { it.name } 135 | .column { it.grades } 136 | 137 | OrcHandle handle = Factory.createWriter(schema) 138 | List list = [] 139 | 140 | String pattern = 'abcdefghijklmnopqrstuvwxyz' 141 | for (int j = 0; j < 20; j++) { 142 | ArrayTest entry = new ArrayTest(name: 'a') 143 | for (int i = 1; i < 1_000; i++) { 144 | entry.grades << Strings.repeat(pattern[new Random().nextInt(26)], new Random().nextInt(15)) 145 | } 146 | list << entry 147 | } 148 | 149 | Path path = new Path(System.getProperty('user.home'),'temp/dp/array.orc') 150 | try { 151 | handle.open(path).write(list).close() 152 | } 153 | finally { 154 | Files.delete(Paths.get(System.getProperty('user.home'),'temp/dp/array.orc')) 155 | } 156 | } 157 | 158 | } 159 | -------------------------------------------------------------------------------- /src/test/groovy/com/eclecticlogic/orc/impl/TestSchemaImpl.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017 Eclectic Logic LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.eclecticlogic.orc.impl 18 | 19 | import com.eclecticlogic.orc.Factory 20 | import com.eclecticlogic.orc.Graduate 21 | import org.testng.annotations.Test 22 | 23 | /** 24 | * Created by kabram 25 | */ 26 | @Test 27 | class TestSchemaImpl { 28 | 29 | void testFieldNames() { 30 | SchemaSpiImpl schema = Factory.createSchema(Graduate) 31 | .column() {it.name} 32 | .column() {it.age} 33 | .column('money') {it.allowance } 34 | schema.compile() 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/test/groovy/com/eclecticlogic/orc/impl/bootstrap/TestGeneratorUtil.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017 Eclectic Logic LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.eclecticlogic.orc.impl.bootstrap 18 | 19 | import com.eclecticlogic.orc.Orc 20 | import org.testng.annotations.Test 21 | 22 | import java.time.LocalDate 23 | import java.time.LocalDateTime 24 | import java.time.ZonedDateTime 25 | 26 | import static org.apache.orc.TypeDescription.Category.* 27 | import static org.testng.Assert.* 28 | 29 | /** 30 | * Created by kabram 31 | */ 32 | @Test 33 | class TestGeneratorUtil { 34 | 35 | class Money extends BigDecimal { 36 | Money(String var1) { 37 | super(var1) 38 | } 39 | } 40 | 41 | class MyDate extends Date {} 42 | 43 | class FunnyProperties { 44 | int getMyScore() { 0 } 45 | int notMyScore() { 0 } 46 | int CanThisBeTrue() { 0 } 47 | boolean isThisTrue() { 0 } 48 | } 49 | 50 | enum WarType { 51 | LOCAL, GLOBAL 52 | 53 | @Orc 54 | int getIntensity() { 0 } 55 | } 56 | 57 | enum GameType { 58 | RPG, MMORG 59 | } 60 | 61 | void testPrimitiveDefaultValues() { 62 | assertEquals GeneratorUtil.getDefaultValueForPrimitiveType(Boolean.TYPE) as boolean, false 63 | assertEquals GeneratorUtil.getDefaultValueForPrimitiveType(Character.TYPE), '\u0000' as char 64 | assertEquals GeneratorUtil.getDefaultValueForPrimitiveType(Byte.TYPE), 0 65 | assertEquals GeneratorUtil.getDefaultValueForPrimitiveType(Short.TYPE), 0 66 | assertEquals GeneratorUtil.getDefaultValueForPrimitiveType(Integer.TYPE) as int, 0 67 | assertEquals GeneratorUtil.getDefaultValueForPrimitiveType(Long.TYPE), 0 68 | assertEquals GeneratorUtil.getDefaultValueForPrimitiveType(Float.TYPE) as float, 0.0f 69 | assertEquals GeneratorUtil.getDefaultValueForPrimitiveType(Double.TYPE) as double, 0.0d 70 | } 71 | 72 | void testCategoriesByBasicTypes() { 73 | assertEquals GeneratorUtil.getCategoryByBasicType(Boolean.TYPE), BOOLEAN 74 | assertEquals GeneratorUtil.getCategoryByBasicType(Character.TYPE), CHAR 75 | assertEquals GeneratorUtil.getCategoryByBasicType(Byte.TYPE), BYTE 76 | assertEquals GeneratorUtil.getCategoryByBasicType(Short.TYPE), SHORT 77 | assertEquals GeneratorUtil.getCategoryByBasicType(Integer.TYPE), INT 78 | assertEquals GeneratorUtil.getCategoryByBasicType(Long.TYPE), LONG 79 | assertEquals GeneratorUtil.getCategoryByBasicType(Float.TYPE), FLOAT 80 | assertEquals GeneratorUtil.getCategoryByBasicType(Double.TYPE), DOUBLE 81 | } 82 | 83 | void testCategoriesByAssignableTypes() { 84 | assertEquals GeneratorUtil.getCategoryByAssignableType(Money), DECIMAL 85 | assertEquals GeneratorUtil.getCategoryByAssignableType(Set), LIST 86 | assertEquals GeneratorUtil.getCategoryByAssignableType(Date), TIMESTAMP 87 | assertEquals GeneratorUtil.getCategoryByAssignableType(LocalDate), DATE 88 | assertEquals GeneratorUtil.getCategoryByAssignableType(LocalDateTime), TIMESTAMP 89 | assertEquals GeneratorUtil.getCategoryByAssignableType(ZonedDateTime), TIMESTAMP 90 | assertEquals GeneratorUtil.getCategoryByAssignableType(MyDate), TIMESTAMP 91 | } 92 | 93 | void testPrimitiveAccessorByType() { 94 | assertEquals GeneratorUtil.getPrimitiveAccessorByType(Boolean), 'booleanValue()' 95 | assertEquals GeneratorUtil.getPrimitiveAccessorByType(Byte), 'byteValue()' 96 | assertEquals GeneratorUtil.getPrimitiveAccessorByType(Character), 'charValue()' 97 | assertEquals GeneratorUtil.getPrimitiveAccessorByType(Short), 'shortValue()' 98 | assertEquals GeneratorUtil.getPrimitiveAccessorByType(Integer), 'intValue()' 99 | assertEquals GeneratorUtil.getPrimitiveAccessorByType(Long), 'longValue()' 100 | assertEquals GeneratorUtil.getPrimitiveAccessorByType(Float), 'floatValue()' 101 | assertEquals GeneratorUtil.getPrimitiveAccessorByType(Double), 'doubleValue()' 102 | } 103 | 104 | void testPropertyName() { 105 | assertEquals GeneratorUtil.getPropertyName(FunnyProperties.getMethod('getMyScore')), 'myScore' 106 | assertEquals GeneratorUtil.getPropertyName(FunnyProperties.getMethod('notMyScore')), 'notMyScore' 107 | assertEquals GeneratorUtil.getPropertyName(FunnyProperties.getMethod('CanThisBeTrue')), 'canThisBeTrue' 108 | assertEquals GeneratorUtil.getPropertyName(FunnyProperties.getMethod('isThisTrue')), 'thisTrue' 109 | } 110 | 111 | void testGetAnnotatedMethodInEnum() { 112 | assertTrue GeneratorUtil.getAnnotatedMethodInEnum(WarType).present 113 | assertEquals GeneratorUtil.getAnnotatedMethodInEnum(WarType).get(), WarType.getMethod('getIntensity') 114 | assertFalse GeneratorUtil.getAnnotatedMethodInEnum(GameType).present 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /src/test/groovy/com/eclecticlogic/orc/impl/schema/TestAbstractSchemaColumn.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017 Eclectic Logic LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.eclecticlogic.orc.impl.schema 18 | 19 | import com.eclecticlogic.orc.Converter 20 | import com.eclecticlogic.orc.Orc 21 | import com.eclecticlogic.orc.OrcTemporal 22 | import com.eclecticlogic.orc.OrcTemporalType 23 | import org.testng.annotations.Test 24 | 25 | import javax.persistence.Temporal 26 | import javax.persistence.TemporalType 27 | import java.lang.annotation.Annotation 28 | import java.lang.reflect.Method 29 | import java.time.LocalDate 30 | import java.time.LocalDateTime 31 | import java.time.ZonedDateTime 32 | 33 | import static org.apache.orc.TypeDescription.Category.* 34 | import static org.testng.Assert.assertEquals 35 | 36 | /** 37 | * Created by kabram 38 | */ 39 | @Test 40 | class TestAbstractSchemaColumn { 41 | 42 | class SampleClass { 43 | String getName() { return null } 44 | } 45 | 46 | void testGetColumnClassType() { 47 | AbstractSchemaColumn underTest1 = new AbstractSchemaColumn() { 48 | @Override 49 | Converter getConverter() { 50 | return null 51 | } 52 | 53 | @Override 54 | Method getLastAccessorMethod() { 55 | return SampleClass.getMethod('getName') 56 | } 57 | } 58 | 59 | assertEquals(underTest1.columnClassType, String) 60 | 61 | AbstractSchemaColumn underTest2 = new AbstractSchemaColumn() { 62 | @Override 63 | Converter getConverter() { 64 | return new Converter() { 65 | @Override 66 | Class getConvertedClass() { 67 | return BigDecimal 68 | } 69 | 70 | @Override 71 | Object convert(Object instance) { 72 | return null 73 | } 74 | } 75 | } 76 | } 77 | 78 | assertEquals(underTest2.columnClassType, BigDecimal) 79 | } 80 | 81 | class Dollar extends BigDecimal { 82 | Dollar(String s) { super (s)} 83 | } 84 | 85 | void testGetCategory() { 86 | AbstractSchemaColumn column = new AbstractSchemaColumn() { 87 | def A getAnnotation(Class clz) { 88 | return null 89 | } 90 | } 91 | AbstractSchemaColumn column2 = new AbstractSchemaColumn() { 92 | def A getAnnotation(Class clz) { 93 | return new Orc() { 94 | @Override 95 | int length() { 96 | return 10 97 | } 98 | 99 | @Override 100 | int precision() { 101 | return 0 102 | } 103 | 104 | @Override 105 | int scale() { 106 | return 0 107 | } 108 | 109 | @Override 110 | Class annotationType() { 111 | return null 112 | } 113 | } 114 | } 115 | } 116 | assertEquals(column._getCategory(null), STRUCT) 117 | assertEquals(column._getCategory(SampleClass), STRUCT) 118 | assertEquals(column._getCategory(String), STRING) 119 | assertEquals(column._getCategory(BigDecimal), DECIMAL) 120 | assertEquals(column._getCategory(Dollar), DECIMAL) 121 | assertEquals(column._getCategory(Boolean.TYPE), BOOLEAN) 122 | assertEquals(column2._getCategory(String), VARCHAR) 123 | assertEquals(column._getCategory(LocalDate), DATE) 124 | assertEquals(column._getCategory(LocalDateTime), TIMESTAMP) 125 | assertEquals(column._getCategory(Date), TIMESTAMP) 126 | assertEquals(column._getCategory(ZonedDateTime), TIMESTAMP) 127 | 128 | AbstractSchemaColumn column3 = new AbstractSchemaColumn() { 129 | def A getAnnotation(Class clz) { 130 | return new OrcTemporal() { 131 | @Override 132 | OrcTemporalType value() { 133 | return OrcTemporalType.DATE 134 | } 135 | 136 | @Override 137 | Class annotationType() { 138 | return null 139 | } 140 | } 141 | } 142 | } 143 | assertEquals(column3._getCategory(LocalDateTime), DATE) 144 | assertEquals(column3._getCategory(Date), DATE) 145 | assertEquals(column3._getCategory(ZonedDateTime), DATE) 146 | } 147 | 148 | 149 | void testGetAnnotationBasedDateCategory() { 150 | AbstractSchemaColumn column = new AbstractSchemaColumn() { 151 | Annotation returnValue 152 | def A getAnnotation(Class clz) { 153 | if (returnValue == null) { 154 | return null 155 | } 156 | if (clz.isAssignableFrom(returnValue.class)) { 157 | return returnValue 158 | } 159 | return null 160 | } 161 | } 162 | assertEquals(column.annotationBasedDateCategory, TIMESTAMP) 163 | column.returnValue = new Temporal() { 164 | @Override 165 | TemporalType value() { 166 | return TemporalType.TIMESTAMP 167 | } 168 | 169 | @Override 170 | Class annotationType() { 171 | return null 172 | } 173 | } 174 | assertEquals(column.annotationBasedDateCategory, TIMESTAMP) 175 | column.returnValue = new Temporal() { 176 | @Override 177 | TemporalType value() { 178 | return TemporalType.DATE 179 | } 180 | 181 | @Override 182 | Class annotationType() { 183 | return null 184 | } 185 | } 186 | assertEquals(column.annotationBasedDateCategory, DATE) 187 | column.returnValue = new OrcTemporal() { 188 | @Override 189 | OrcTemporalType value() { 190 | return OrcTemporalType.DATE 191 | } 192 | 193 | @Override 194 | Class annotationType() { 195 | return null 196 | } 197 | } 198 | assertEquals(column.annotationBasedDateCategory, DATE) 199 | column.returnValue = new OrcTemporal() { 200 | @Override 201 | OrcTemporalType value() { 202 | return OrcTemporalType.TIMESTAMP 203 | } 204 | 205 | @Override 206 | Class annotationType() { 207 | return null 208 | } 209 | } 210 | assertEquals(column.annotationBasedDateCategory, TIMESTAMP) 211 | } 212 | 213 | enum Month { 214 | JAN, FEB, MAR 215 | } 216 | 217 | enum Animal { 218 | DOG('D' as char), CAT('C' as char) 219 | 220 | char code 221 | 222 | Animal(char code) { 223 | this.code = code 224 | } 225 | 226 | @Orc 227 | char getCode() { 228 | return code 229 | } 230 | } 231 | 232 | void testGetEnumCategory() { 233 | AbstractSchemaColumn column = new AbstractSchemaColumn() 234 | assertEquals(column.getEnumCategory(Month), STRING) 235 | assertEquals(column.getEnumCategory(Animal), CHAR) 236 | } 237 | } 238 | -------------------------------------------------------------------------------- /src/test/resources/logback-test.xml: -------------------------------------------------------------------------------- 1 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | %d{HH:mm:ss.SSS} [%thread] %X{adjustmentJobId} %-5level %logger{36} - %msg%n 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /src/test/resources/orc-testng-suite.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | --------------------------------------------------------------------------------