├── .gitignore
├── LICENSE
├── README.md
├── pom.xml
└── src
├── main
├── java
│ └── com
│ │ └── eclecticlogic
│ │ └── orc
│ │ ├── Converter.java
│ │ ├── Factory.java
│ │ ├── Orc.java
│ │ ├── OrcConverter.java
│ │ ├── OrcHandle.java
│ │ ├── OrcList.java
│ │ ├── OrcTemporal.java
│ │ ├── OrcTemporalType.java
│ │ ├── OrcWriter.java
│ │ ├── Schema.java
│ │ └── impl
│ │ ├── AbstractOrcWriter.java
│ │ ├── Column.java
│ │ ├── PropertyInterceptor.java
│ │ ├── ProxyManager.java
│ │ ├── SchemaFilter.java
│ │ ├── SchemaSpi.java
│ │ ├── SchemaSpiImpl.java
│ │ ├── bootstrap
│ │ ├── GeneratorUtil.java
│ │ └── OrcWriterBootstrap.java
│ │ └── schema
│ │ ├── AbstractSchemaColumn.java
│ │ ├── ComplexType.java
│ │ ├── GenInfo.java
│ │ ├── ListChildSchemaColumn.java
│ │ ├── SchemaColumn.java
│ │ ├── Template.java
│ │ ├── TypeDesc.java
│ │ └── TypeInfo.java
└── resources
│ └── eclectic
│ └── orc
│ └── template
│ ├── classShell.stg
│ ├── methodCreateTypeDescription.stg
│ ├── methodSpecialCaseSetup.stg
│ └── methodWrite.stg
└── test
├── groovy
└── com
│ └── eclecticlogic
│ └── orc
│ ├── ArrayTest.java
│ ├── ChromaticConverter.groovy
│ ├── Club.groovy
│ ├── Color.groovy
│ ├── Course.groovy
│ ├── Graduate.groovy
│ ├── GraduateDelegate.groovy
│ ├── GraduationConverter.groovy
│ ├── House.groovy
│ ├── HouseConverter.groovy
│ ├── Level.groovy
│ ├── Power.groovy
│ ├── Student.groovy
│ ├── Teacher.groovy
│ └── impl
│ ├── TestBootstrap.groovy
│ ├── TestSchemaImpl.groovy
│ ├── bootstrap
│ └── TestGeneratorUtil.groovy
│ └── schema
│ └── TestAbstractSchemaColumn.groovy
└── resources
├── logback-test.xml
└── orc-testng-suite.xml
/.gitignore:
--------------------------------------------------------------------------------
1 | *.class
2 |
3 | # Mobile Tools for Java (J2ME)
4 | .mtj.tmp/
5 |
6 | # Package Files #
7 | *.jar
8 | *.war
9 | *.ear
10 | eclectic-orc.iml
11 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
12 | hs_err_pid*
13 | target/
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "{}"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright {yyyy} {name of copyright owner}
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Eclectic-ORC
2 | =====
3 |
4 | Eclectic-ORC is a Java object writer for creating ORC files by simply annotating your class files as necessary.
5 | The framework uses runtime code generation to create a fast customized ORC writer taking care of all the low-level details.
6 |
7 | ## Feature Highlights
8 |
9 | - Declarative Schema Definition
10 | - Annotated column specification (use `@Orc` or JPA `@Column` annotations)
11 |
12 | # Getting Started
13 |
14 | Download the eclectic-orc jar from Maven central:
15 |
16 | ```
17 |
18 | com.eclecticlogic
19 | eclectic-orc
20 | 1.0.9
21 |
22 | ```
23 |
24 | Minimum dependencies that you need to provide in your application:
25 |
26 | 1. Java 8 or above (the design leverages method references and lambdas extensively)
27 | 1. slf4j (over logback or log4j) v1.7.23 or higher
28 |
29 | ## A simple example
30 |
31 | Consider a simple class that you want to serialize to an ORC file:
32 |
33 | ```java
34 | public class Student {
35 | int year;
36 | String name;
37 |
38 | public String getName() {
39 | ...
40 | }
41 |
42 | public int getYear() {
43 | ...
44 | }
45 |
46 | ...
47 | }
48 | ```
49 |
50 | To write a collection of Students to an ORC file, you first have to provide a schema definition.
51 | The eclectic-orc library makes doing this trivial:
52 |
53 | ```java
54 | import com.eclecticlogic.orc.Factory;
55 | import com.eclecticlogic.orc.Schema;
56 |
57 | ...
58 |
59 | public void schemaSetup() {
60 | Schema schema = Factory.createSchema(Student.class)
61 | .column(Student::getName) //
62 | .column(Student::getYear);
63 | }
64 | ```
65 |
66 | The above schema definition implicitly does three things:
67 |
68 | 1. It defines the order of the columns (first name then year)
69 | 1. It defines the data types of the columns (String, int)
70 | 1. It defines the names of the columns (name, year)
71 |
72 | The library allows you to customize aspects of the schema. Let us start with column names.
73 | If you want the *year* column to be called *graduationYear*, simply change the
74 | schema column definition.
75 |
76 | ```java
77 | Schema schema = Factory.createSchema(Student.class)
78 | .column(Student::getName) //
79 | .column("graduationYear", Student::getYear);
80 | ```
81 |
82 | You can also define columns based on properties of other classes that are referenced. If the `Student` class referenced a Club class as shown below:
83 |
84 | ```java
85 |
86 | public class Club {
87 | String name;
88 |
89 | public String sanitizedClubName() {
90 | return ...
91 | }
92 | }
93 |
94 | public class Student {
95 | Club club;
96 |
97 | public Club getClub() {
98 | return club;
99 | }
100 | }
101 | ```
102 | You can reference the club name in your schema definition by chaining the call as `getClub().sanitizedClubName()`.
103 | The astute reader would have noticed that sanitizedClubName() is not a java-bean compliant getter. That is right.
104 | eclectic-orc does not restrict you to just java-bean getters. Any method that takes no parameters and returns a non-void
105 | type can be used for a column definition. A schema to incorporate the above definition would look like this.
106 |
107 | ```java
108 | Schema schema = Factory.createSchema(Student.class)
109 | .column(Student::getName) //
110 | .column("graduationYear", Student::getYear)
111 | .column(it -> it.getClub().sanitizedClubName());
112 | ```
113 | We've now defined a third column of type `String` and given it an implicit name of "santitizedClubName." Of course, just like
114 | before you can choose to change the name to something else. The same definition in Groovy could be written as:
115 |
116 | ```groovy
117 | Schema schema = Factory.createSchema(Student)
118 | .column { it.name }
119 | .column('graduationYear') { it.year }
120 | .column { it.club.santizedClubName() }
121 | ```
122 |
123 | To write a collection of `Student` objects, we simply create an OrcHandle reference, configure it, open it to get an OrcWriter
124 | reference and write our collection.
125 |
126 | ```java
127 | import org.apache.hadoop.fs.Path
128 |
129 | // First get an OrcHandle reference.
130 | OrcHandle handle = Factory.createWriter(schema);
131 | // Customize it by calling one of the withXYZ() methods. This is optional as defaults are provided.
132 |
133 | // Create an OrcWriter by calling open.
134 | Path path = new Path("/home/kabram/temp/dp/graduate.orc");
135 |
136 | OrcWriter writer = handle.open(path);
137 | List students = ...
138 | // The write method may be called multiple times if you are retrieving objects in batches.
139 | writer.write(students);
140 | writer.close();
141 | ```
142 |
143 | In simple cases, the above code can be written as:
144 |
145 | ```java
146 | Factory.createWriter(schema) //
147 | .open(new Path("/home/kabram/temp/dp/graduate.orc")) //
148 | .write(students) //
149 | .close();
150 | ```
151 |
152 | ### Data Type Support
153 |
154 | The following data types are **supported** in the current release:
155 |
156 | 1. Java primitive types - `boolean`, `char`, `byte`, `short`, `int`, `long`, `float`, `double`. These map to their corresponding counterparts
157 | with the exception of `char` which maps to `varchar(1)` The exception for `char` is because AWS Athena is currently unable to handle `char` column types.
158 | 2. `BigDecimal` mapping to ORC `Decimal` type.
159 | 3. `LocalDate` mapping to ORC `Date` type.
160 | 4. `Date`, `LocalDateTime`, `ZonedDateTime` mapping to ORC `Timestamp` type unless there is either a JPA `@Temporal` or `@OrcTemporal` annotation
161 | that defines the `TemporalType` (or `OrcTemporalType`) as `DATE`.
162 | 5. `String` mapping to ORC `string` type.
163 | 6. Any derivative of `Iterable` mapping to ORC `List` type, currently supporting only simple types as the member. See below for how to use lists.
164 |
165 | The following data types are **not supported** in the current release:
166 |
167 | 1. `Binary` data type.
168 | 2. `Map`
169 | 3. `Union`
170 | 4. Sub-structures (`Struct` within your table, map of structs, list of structs, etc.)
171 |
172 | #### Special cases
173 |
174 | ##### String length specification
175 |
176 | To specify the number of characters for a String column type, simply use the `@Orc` annotation. If the framework finds
177 | an existing JPA `@Column` annotation, it will use the length property of that as well. If both annotations are present,
178 | the `@Orc` annotation takes precedence. The `@Orc` annotation is only supported on methods.
179 |
180 | ```java
181 | public class Student {
182 | String name;
183 |
184 | @Orc(length = 50)
185 | public String getName() {
186 | return name;
187 | }
188 | }
189 | ```
190 |
191 | ##### Decimal precision/scale specification
192 |
193 | You can also specify the precision and scale of `BigDecimal` data type by using the JPA `@Column` or `@Orc` annotations.
194 | By default, the precision is 38 and scale is 10. This can be changed via annotation:
195 |
196 | ```java
197 | public class Employee {
198 | BigDecimal salary;
199 |
200 | @Orc(precision = 10, scale = 2)
201 | public BigDecimal getSalary() {
202 |
203 | }
204 | }
205 | ```
206 |
207 | ##### Converting data types
208 |
209 | There may be times you want to write a data type that is not a supported type. For example, you may have a birthday property
210 | that only records the year and month using the `java.time.YearMonth` class. You can handle these column types by defining a type
211 | converter, a class that implements the `Converter` interface. In our example, to convert `YearMonth` to `LocalDate`,
212 | defaulting to the first day of the month, we could write:
213 |
214 | ```java
215 | public class YearMonthConverter implements Converter {
216 |
217 | @Override
218 | public Class getConvertedClass() {
219 | return LocalDate.class;
220 | }
221 |
222 |
223 | @Override
224 | public LocalDate convert(YearMonth yearMonth) {
225 | return yearMonth.atDay(1);
226 | }
227 | }
228 | ```
229 |
230 | We can now annotate the `YearMonth` accessor with the `@OrcConverter` annotation:
231 |
232 | ```java
233 | public class Employee {
234 | YearMonth birthday;
235 |
236 | @OrcConverter(YearMonthConverter.class)
237 | public YearMonth getBirthday() {
238 | ...
239 | }
240 | }
241 |
242 | ...
243 |
244 | Schema schema = Factory.createSchema(Employee.class) //
245 | .column(Employee::getBirthday) // This is now a LocalDate data type.
246 | ```
247 |
248 | ##### Java Enum
249 | Java Enums require special handling to convert them to a specific data type. There are three ways to handle enums.
250 |
251 | 1. Do nothing: If your schema column is an `Enum` derivative, then the column will be treated as a `String` with the `name()`
252 | method being called to get the value.
253 | 1. Annotation: Annotate a custom enum method with `@Orc`. If you have a method in your `Enum` class that provides the value
254 | you would like to store, you can add the `@Orc` annotation to it.
255 | 1. Converter: Annotate your accessor method that returns an `Enum` with `@OrcConverter` specifying a converter that takes your
256 | enum and returns a supported data-type.
257 |
258 | ##### Handling lists
259 |
260 | Eclectic-orc supports creation of list columns that can hold a single scalar data type. To include a list column in the schema
261 | definition, annotate the accessor method with the `@OrcList` annotation. Strictly speaking, any derivative of `java.lang.Iterable`
262 | is supported. The `@OrcList` annotation requires you to specify the `Class` of the entries of the `Iterable`. This is because the
263 | type information is lost at runtime due to type-erasure. You also need to specify the average number of entries you expect to
264 | see in the list. This is a technical implementation detail due to the way lists are stored in ORC files. Finally, there is
265 | a converter attribute you can use to convert each item of the `Iterable` to a different type. Note: If you annotate the list
266 | accessor with `@OrcConverer`, you will be modifying the `List`/`Iterable` itself into some other data type.
267 |
268 | If your `Iterable` consists of `Enum` instances, the existing strategy for enums is automatically used - using an enum method
269 | annotated with `@Orc` or calling `name()`.
270 |
271 | # Custom columns
272 |
273 | If your collection member class does not have a method that gets you a column value that you need, i.e., you need to compute the value
274 | on the fly based on existing methods in the class, you can create a delegate class that accepts the collection member class as a
275 | constructor parameter and then implement your logic in the delegate class and use that method in the column definition.
276 |
277 | ```java
278 | Schema schema = Factory.createSchema(Student.class)
279 | .withDelegate(StudentDelegate.class)
280 | .delegatedColumn("someProperty", StudentDelegate::getLastFirstName)
281 | ...
282 | ```
283 |
284 | The StudentDelegate class would be something like this
285 |
286 | ```java
287 | class StudentDelegate {
288 |
289 | Student delegate
290 |
291 | StudentDelegate(Student delegate) {
292 | this.delegate = delegate
293 | }
294 |
295 |
296 | String getLastFirstName() {
297 | return delegate.getLastName + ", " + delegate.getFirstName());
298 | }
299 | }
300 |
301 | ```
302 |
303 | # Release Notes
304 |
305 | # 1.0.9
306 |
307 | - Reverted usage of JOOR and brought back Javassist since JOOR cannot handle fat-jar that spring boot generates.
308 |
309 | # 1.0.6
310 |
311 | - Temporary fix for compiler classpath issue with JOOR.
312 |
313 | # 1.0.5
314 |
315 | - Switch to JOOR for runtime compilation (better support for Java 9+)
316 | - Fixed bug in array allocation for list columns.
317 |
318 | # 1.0.3
319 |
320 | - Added delegate concept for computed columns.
321 |
322 | # 1.0.2
323 |
324 | - Bug fix in bootstrap - incorrectly caching instance instead of class.
325 | - Bug fix in OrcWriter.withOptions() method.
326 |
327 | ### 1.0.0
328 |
329 | - Initial release
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
2 | 4.0.0
3 |
4 | Orc Writer
5 | com.eclecticlogic
6 | eclectic-orc
7 | 1.1.2-SNAPSHOT
8 | jar
9 |
10 | Supports writing Java objects to ORC files.
11 | https://github.com/eclecticlogic/orc
12 |
13 |
14 | The Apache Software License, Version 2.0
15 | http://www.apache.org/licenses/LICENSE-2.0.txt
16 | Repo
17 |
18 |
19 |
20 |
21 | git@github.com:eclecticlogic/eclectic-orc.gt
22 | scm:git:git@github.com:eclecticlogic/eclectic-orc.git
23 | scm:git:git@github.com:eclecticlogic/eclectic-orc.git
24 | HEAD
25 |
26 |
27 |
28 |
29 | kabram
30 | Karthik Abram
31 | karthik@eclecticlogic.com
32 |
33 |
34 |
35 |
36 |
37 | release
38 |
39 |
40 | performRelease
41 | true
42 |
43 |
44 |
45 |
46 |
47 | org.apache.maven.plugins
48 | maven-source-plugin
49 | 2.2.1
50 |
51 |
52 | attach-sources
53 |
54 | jar-no-fork
55 |
56 |
57 |
58 |
59 |
60 | org.apache.maven.plugins
61 | maven-javadoc-plugin
62 | 2.9.1
63 |
64 |
65 | attach-javadocs
66 |
67 | jar
68 |
69 |
70 |
71 |
72 | -Xdoclint:none
73 |
74 |
75 |
76 | org.apache.maven.plugins
77 | maven-gpg-plugin
78 |
79 |
80 | sign-artifacts
81 | verify
82 |
83 | sign
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 | org.codehaus.gmavenplus
99 | gmavenplus-plugin
100 | 3.0.0
101 |
102 |
103 |
104 | addTestSources
105 | testCompile
106 |
107 |
108 |
109 |
110 |
111 | org.apache.maven.plugins
112 | maven-compiler-plugin
113 | 3.11.0
114 |
115 | ${env.JAVA_HOME}/bin/javac
116 | 1.8
117 | 1.8
118 | 1.8
119 |
120 |
121 |
122 |
123 | org.apache.maven.plugins
124 | maven-release-plugin
125 | 3.0.1
126 |
127 | forked-path
128 | true
129 | false
130 | release
131 | deploy
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 | org.apache.maven.plugins
141 | maven-surefire-plugin
142 | 3.1.2
143 |
144 | ${env.JAVA_HOME}/bin/java
145 |
146 | src/test/resources/orc-testng-suite.xml
147 |
148 |
149 |
150 |
151 |
152 | org.sonatype.plugins
153 | nexus-staging-maven-plugin
154 | 1.6.7
155 | true
156 |
157 | ossrh
158 | https://oss.sonatype.org/
159 | true
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 | ossrh
169 | https://oss.sonatype.org/content/repositories/snapshots
170 |
171 |
172 |
173 |
174 |
175 | org.apache.orc
176 | orc-core
177 | ${version.orc}
178 |
179 |
180 | log4j
181 | log4j
182 |
183 |
184 | org.slf4j
185 | slf4j-log4j12
186 |
187 |
188 |
189 |
190 | org.apache.hadoop
191 | hadoop-common
192 | 3.3.6
193 |
194 |
195 | org.apache.hadoop
196 | hadoop-hdfs-client
197 | 3.3.6
198 |
199 |
200 | org.apache.hive
201 | hive-storage-api
202 | 2.8.1
203 |
204 |
205 | com.google.protobuf
206 | protobuf-java
207 | 3.24.3
208 |
209 |
210 | cglib
211 | cglib-nodep
212 | 3.2.4
213 |
214 |
215 | org.antlr
216 | stringtemplate
217 | 4.0.2
218 |
219 |
220 | org.hibernate.javax.persistence
221 | hibernate-jpa-2.1-api
222 | 1.0.0.Final
223 |
224 |
225 | org.slf4j
226 | slf4j-api
227 | ${version.slf4j}
228 | jar
229 | provided
230 |
231 |
232 | org.javassist
233 | javassist
234 | 3.21.0-GA
235 |
236 |
237 |
238 |
239 | org.codehaus.groovy
240 | groovy-all
241 | 2.4.8
242 | test
243 |
244 |
245 |
246 | org.testng
247 | testng
248 | 7.5.1
249 | test
250 |
251 |
252 | ch.qos.logback
253 | logback-classic
254 | 1.3.11
255 | test
256 |
257 |
258 |
259 |
260 |
261 | 2.0.9
262 | 1.9.1
263 |
264 |
265 |
--------------------------------------------------------------------------------
/src/main/java/com/eclecticlogic/orc/Converter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2017 Eclectic Logic LLC
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.eclecticlogic.orc;
18 |
19 | /**
20 | * Converts from a user-type U to an orc-compatible type T. Use this with the @OrcConverter annotation to convert custom data-types to
21 | * orc-compatible types.
22 | * @param user-data type
23 | * @param converted orc-compatible type
24 | */
25 | public interface Converter {
26 |
27 | /**
28 | * @return Class of the orc-compatible type.
29 | */
30 | Class getConvertedClass();
31 |
32 | /**
33 | * @param instance Instance of your domain specific type.
34 | * @return Converted value.
35 | */
36 | T convert(U instance);
37 | }
38 |
--------------------------------------------------------------------------------
/src/main/java/com/eclecticlogic/orc/Factory.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2017 Eclectic Logic LLC
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.eclecticlogic.orc;
18 |
19 | import com.eclecticlogic.orc.impl.SchemaSpi;
20 | import com.eclecticlogic.orc.impl.bootstrap.OrcWriterBootstrap;
21 | import com.eclecticlogic.orc.impl.SchemaSpiImpl;
22 |
23 | /**
24 | * This is the main class to interact with the eclectic-orc library.
25 | * Created by kabram
26 | */
27 | public class Factory {
28 |
29 | /**
30 | * @param clz The class of objects you want to write to your orc file.
31 | * @param The type of objects you want to write.
32 | * @return Schema creator to specify the orc file schema.
33 | */
34 | public static Schema createSchema(Class clz) {
35 | return new SchemaSpiImpl(clz);
36 | }
37 |
38 |
39 | /**
40 | * @param schema The schema for the orc file you want to create.
41 | * @param The type of the object you are working with.
42 | * @return Instance that allows you to configure, open and write to your data.
43 | */
44 | public static OrcHandle createWriter(Schema schema) {
45 | return OrcWriterBootstrap.create((SchemaSpi)schema);
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/src/main/java/com/eclecticlogic/orc/Orc.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2017 Eclectic Logic LLC
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.eclecticlogic.orc;
18 |
19 | import java.lang.annotation.*;
20 |
21 | /**
22 | * Annotates additional properties of a column for type definition. Note: If your class has JPA @Column annotations, those can be used
23 | * in lieu of this annotation. If both are specified, this annotation takes precedence.
24 | *
25 | * Created by kabram
26 | */
27 | @Retention(RetentionPolicy.RUNTIME)
28 | @Target(ElementType.METHOD)
29 | @Inherited
30 | public @interface Orc {
31 |
32 | /**
33 | * @return Length of the field. Applicable only to string. If left empty, the output column will be of type
34 | * string. Otherwise the output column will be of type varchar(length).
35 | */
36 | int length() default 0;
37 |
38 |
39 | /**
40 | * @return Precision for BigDecimal type
41 | */
42 | int precision() default 0;
43 |
44 |
45 | /**
46 | * @return Scale for BigDecimal type.
47 | */
48 | int scale() default 0;
49 | }
50 |
--------------------------------------------------------------------------------
/src/main/java/com/eclecticlogic/orc/OrcConverter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2017 Eclectic Logic LLC
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.eclecticlogic.orc;
18 |
19 | import java.lang.annotation.ElementType;
20 | import java.lang.annotation.Inherited;
21 | import java.lang.annotation.Retention;
22 | import java.lang.annotation.RetentionPolicy;
23 | import java.lang.annotation.Target;
24 |
25 | /**
26 | * Annotate a domain specific type with this to define a converter that can convert your data-type to an orc-compatible one.
27 | * Created by kabram
28 | */
29 | @Retention(RetentionPolicy.RUNTIME)
30 | @Target(ElementType.METHOD)
31 | @Inherited
32 | public @interface OrcConverter {
33 |
34 | /**
35 | * @return Converter class to use for type-conversion.
36 | */
37 | Class extends Converter> value();
38 | }
39 |
--------------------------------------------------------------------------------
/src/main/java/com/eclecticlogic/orc/OrcHandle.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2017 Eclectic Logic LLC
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.eclecticlogic.orc;
18 |
19 | import org.apache.hadoop.conf.Configuration;
20 | import org.apache.hadoop.fs.Path;
21 | import org.apache.orc.CompressionKind;
22 | import org.apache.orc.OrcFile.WriterOptions;
23 |
24 | /**
25 | * This is the interface to configure and open an orc file. Get an instance of this from the Factory by passing in a Schema definition.
26 | * Created by kabram
27 | */
28 | public interface OrcHandle {
29 |
30 | /**
31 | * @param configuration Configuration to use. This is optional.
32 | * @return self reference for fluent interface.
33 | */
34 | OrcHandle withConfiguration(Configuration configuration);
35 |
36 | /**
37 | * @param writerOptions Writer options to use. Note: if you pass in an explicit writerOptions object, this value will not be used.
38 | * @return self reference for fluent interface.
39 | */
40 | OrcHandle withOptions(WriterOptions writerOptions);
41 |
42 | /**
43 | * @param compressionKind Compression to use. This value will overwrite any setting passed in WriterOptions.
44 | * @return self reference for fluent interface.
45 | */
46 | OrcHandle withCompression(CompressionKind compressionKind);
47 |
48 | /**
49 | * @param size Buffer size to use. This value will overwrite any setting passed in WriterOptions.
50 | * @return self reference for fluent interface.
51 | */
52 | OrcHandle withBufferSize(int size);
53 |
54 | /**
55 | * @param batchSize Vector batch size to use.
56 | * @return self reference for fluent interface.
57 | */
58 | OrcHandle withBatchSize(int batchSize);
59 |
60 |
61 | /**
62 | * @param path Path to write orc file to.
63 | * @return self reference for fluent interface.
64 | */
65 | OrcWriter open(Path path);
66 |
67 | }
68 |
--------------------------------------------------------------------------------
/src/main/java/com/eclecticlogic/orc/OrcList.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2017 Eclectic Logic LLC
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.eclecticlogic.orc;
18 |
19 | import java.lang.annotation.ElementType;
20 | import java.lang.annotation.Inherited;
21 | import java.lang.annotation.Retention;
22 | import java.lang.annotation.RetentionPolicy;
23 | import java.lang.annotation.Target;
24 |
25 | /**
26 | * Annotates a list (strictly, any derivative of java.lang.Iterable) return type to denote child data type and average collection size.
27 | * Created by kabram
28 | */
29 | @Retention(RetentionPolicy.RUNTIME)
30 | @Target(ElementType.METHOD)
31 | @Inherited
32 | public @interface OrcList {
33 |
34 | /**
35 | * @return Type of the elements. Due to type-erasure, this information is lost at runtime in code. Therefore we attempt to explicitly
36 | * capture it.
37 | */
38 | Class> entryType();
39 |
40 |
41 | /**
42 | * @return Average size in bytes of the elements.
43 | */
44 | int elementSize() default 1;
45 |
46 |
47 | /**
48 | * @return Average size of elements in the collection.
49 | */
50 | int averageSize() default 1;
51 |
52 |
53 | /**
54 | * @return A converter for each element of the class. T
55 | */
56 | Class extends Converter, ?>> converter() default DEFAULT.class;
57 |
58 |
59 | /**
60 | * An elaborate workaround for a vexing issue with not being able to use null as the default value.
61 | * Refer to http://stackoverflow.com/questions/1178104/error-setting-a-default-null-value-for-an-annotations-field
62 | */
63 | static abstract class DEFAULT implements Converter {}
64 | }
65 |
--------------------------------------------------------------------------------
/src/main/java/com/eclecticlogic/orc/OrcTemporal.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2017 Eclectic Logic LLC
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.eclecticlogic.orc;
18 |
19 | import java.lang.annotation.Retention;
20 | import java.lang.annotation.Target;
21 |
22 | import static java.lang.annotation.ElementType.METHOD;
23 | import static java.lang.annotation.RetentionPolicy.RUNTIME;
24 |
25 | /**
26 | * Similar to the JPA temporal annotation, defines whether the date should be interpreted as including or not-including time.
27 | * If the JPA @Temporal annotation is present, its value will be used. If both annotations are present, this annotation takes precedence.
28 | *
29 | * Created by kabram.
30 | */
31 | @Target(METHOD)
32 | @Retention(RUNTIME)
33 | public @interface OrcTemporal {
34 | /**
35 | * Discriminate between date and timstamp
36 | */
37 | OrcTemporalType value();
38 | }
39 |
--------------------------------------------------------------------------------
/src/main/java/com/eclecticlogic/orc/OrcTemporalType.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2017 Eclectic Logic LLC
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.eclecticlogic.orc;
18 |
19 | /**
20 | * Defines whether a java.util.Date value (or derivative) should be interpreted as a Date without time or a timestamp value.
21 | * Created by kabram.
22 | */
23 | public enum OrcTemporalType {
24 | DATE, //
25 | TIMESTAMP
26 | }
27 |
--------------------------------------------------------------------------------
/src/main/java/com/eclecticlogic/orc/OrcWriter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2017 Eclectic Logic LLC
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.eclecticlogic.orc;
18 |
19 | import java.io.Closeable;
20 | import java.io.IOException;
21 | import java.util.function.Consumer;
22 |
23 | /**
24 | * This is the interface to write your data and close the orc file.
25 | * Created by kabram
26 | */
27 | public interface OrcWriter extends Closeable {
28 |
29 | /**
30 | * This method will throw a wrapped IOException if underlying API throws an IO exception. This may be called multiple times
31 | * to write data to the same file.
32 | * @param data Data to write.
33 | */
34 | OrcWriter write(Iterable data);
35 |
36 |
37 | /**
38 | * A variant of the Closeable.close() method that calls the supplied exception handler instead of throwing an exception.
39 | * useful in cases where you want to simply ignore the exception and not make your code verbose.
40 | * @param exceptionHandler
41 | */
42 | void close(Consumer exceptionHandler);
43 | }
44 |
--------------------------------------------------------------------------------
/src/main/java/com/eclecticlogic/orc/Schema.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2017 Eclectic Logic LLC
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.eclecticlogic.orc;
18 |
19 | import java.util.function.Function;
20 |
21 | /**
22 | * Interface to define your orc file schema. The columns are defined in the order in which you call the column functions.
23 | *
24 | * Created by kabram
25 | */
26 | public interface Schema {
27 |
28 | /**
29 | * @param fieldFunction An instance of the type T will be passed to this function and you are expected to call the appropriate
30 | * method to define the column. You are not restricted to just java-bean getters. Any method that takes no
31 | * parameter and returns a non-void type can be called. You can also chain method calls (e.g. getXyz().getPqr())
32 | * to get at sub-attributes. The name of the column is derived from the last method to be invoked.
33 | *
34 | * @return Self-reference for fluent interface buildout.
35 | */
36 | Schema column(Function fieldFunction);
37 |
38 | /**
39 | * @param name An explicit name to be used for the column.
40 | * @param columnFunction Same as above
41 | * @return self-reference for fluent interface.
42 | */
43 | Schema column(String name, Function columnFunction);
44 |
45 |
46 | /**
47 | * @param delegate A class that accepts an instance of T in the constructor and provides "computed" functions.
48 | * @param
49 | * @return self-reference for fluent-interface.
50 | */
51 | Schema withDelegate(Class delegate);
52 |
53 |
54 | Schema delegatedColumn(Function fieldFunction);
55 |
56 | /**
57 | * @param name An explicit name to be used for the column.
58 | * @param columnFunction Same as above
59 | * @return self-reference for fluent interface.
60 | */
61 | Schema delegatedColumn(String name, Function columnFunction);
62 |
63 | }
64 |
--------------------------------------------------------------------------------
/src/main/java/com/eclecticlogic/orc/impl/AbstractOrcWriter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2017 Eclectic Logic LLC
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.eclecticlogic.orc.impl;
18 |
19 | import com.eclecticlogic.orc.OrcHandle;
20 | import com.eclecticlogic.orc.OrcWriter;
21 | import org.apache.hadoop.conf.Configuration;
22 | import org.apache.hadoop.fs.Path;
23 | import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
24 | import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
25 | import org.apache.orc.CompressionKind;
26 | import org.apache.orc.OrcFile;
27 | import org.apache.orc.TypeDescription;
28 | import org.apache.orc.Writer;
29 |
30 | import java.io.IOException;
31 | import java.util.function.Consumer;
32 |
33 | /**
34 | * Created by kabram
35 | */
36 | public abstract class AbstractOrcWriter implements OrcHandle, OrcWriter {
37 |
38 | private Configuration configuration = new Configuration();
39 | private OrcFile.WriterOptions writerOptions;
40 | private CompressionKind compressionKind;
41 | private int bufferSize = 10 * 1024;
42 | private int batchSize = 1024;
43 | private TypeDescription _typeDescription;
44 | protected VectorizedRowBatch vectorizedRowBatch;
45 | private Writer writer;
46 |
47 |
48 | @Override
49 | public OrcHandle withConfiguration(Configuration configuration) {
50 | this.configuration = configuration;
51 | return this;
52 | }
53 |
54 | @Override
55 | public OrcHandle withOptions(OrcFile.WriterOptions writerOptions) {
56 | this.writerOptions = writerOptions;
57 | return this;
58 | }
59 |
60 | @Override
61 | public OrcHandle withCompression(CompressionKind compressionKind) {
62 | this.compressionKind = compressionKind;
63 | return this;
64 | }
65 |
66 | @Override
67 | public OrcHandle withBufferSize(int size) {
68 | this.bufferSize = size;
69 | return this;
70 | }
71 |
72 | @Override
73 | public OrcHandle withBatchSize(int batchSize) {
74 | this.batchSize = batchSize;
75 | return this;
76 | }
77 |
78 |
79 | @Override
80 | public OrcWriter open(Path path) {
81 | if (writerOptions == null) {
82 | writerOptions = OrcFile.writerOptions(configuration);
83 | }
84 | if (compressionKind != null) {
85 | writerOptions.compress(compressionKind);
86 | }
87 | if (bufferSize != 0) {
88 | writerOptions.bufferSize(bufferSize);
89 | }
90 | // Add the schema to the writer options.
91 | TypeDescription schema = getTypeDescription();
92 | writerOptions.setSchema(schema);
93 | try {
94 | writer = OrcFile.createWriter(path, writerOptions);
95 | } catch (IOException e) {
96 | throw new RuntimeException(e);
97 | }
98 | vectorizedRowBatch = schema.createRowBatch(batchSize);
99 | specialCaseSetup();
100 | return this;
101 | }
102 |
103 |
104 | @Override
105 | public OrcWriter write(Iterable data) {
106 | try {
107 | for (T datum : data) {
108 | if (vectorizedRowBatch.size == vectorizedRowBatch.getMaxSize()) {
109 | writer.addRowBatch(vectorizedRowBatch);
110 | vectorizedRowBatch.reset();
111 | }
112 | // Write the datum to the column vectors.
113 | write(datum);
114 | vectorizedRowBatch.size++;
115 | }
116 | } catch (IOException e) {
117 | throw new RuntimeException(e);
118 | }
119 | return this;
120 | }
121 |
122 |
123 | @Override
124 | public void close() throws IOException {
125 | if (vectorizedRowBatch != null) {
126 | writer.addRowBatch(vectorizedRowBatch);
127 | vectorizedRowBatch = null;
128 | }
129 | if (writer != null) {
130 | writer.close();
131 | writer = null;
132 | }
133 | }
134 |
135 |
136 | @Override
137 | public void close(Consumer exceptionHandler) {
138 | try {
139 | close();
140 | } catch (IOException e) {
141 | exceptionHandler.accept(e);
142 | }
143 | }
144 |
145 |
146 | protected TypeDescription getTypeDescription() {
147 | if (_typeDescription == null) {
148 | _typeDescription = createTypeDescription();
149 | }
150 | return _typeDescription;
151 | }
152 |
153 |
154 | /**
155 | * Helper utility to set the value of the current property to null in the vector.
156 | * @param vector
157 | */
158 | protected void setNull(ColumnVector vector) {
159 | vector.isNull[vectorizedRowBatch.size] = true;
160 | vector.noNulls = false;
161 | }
162 |
163 |
164 | /**
165 | * @return The schema for the orc file as computed by the property access definitions. The implementation is generated dynamically at
166 | * runtime using javassist.
167 | */
168 | protected abstract TypeDescription createTypeDescription();
169 |
170 |
171 | /**
172 | * Hook to setup special cases such as the modification of list child to support the full flattened size
173 | * (rows x average list column size per row)
174 | */
175 | protected abstract void specialCaseSetup();
176 |
177 |
178 | /**
179 | * Routine that actually populates one row of the list into the vectorized row batch. The implementation of this is generated
180 | * dynamically at runtime using javassist.
181 | * @param datum Object instance to write.
182 | */
183 | protected abstract void write(T datum);
184 |
185 |
186 | @Override
187 | public String toString() {
188 | return getTypeDescription().toString();
189 | }
190 | }
191 |
--------------------------------------------------------------------------------
/src/main/java/com/eclecticlogic/orc/impl/Column.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2017 Eclectic Logic LLC
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.eclecticlogic.orc.impl;
18 |
19 | import java.util.function.Function;
20 | import java.util.function.Supplier;
21 |
22 | /**
23 | * Captures the elements of a schema column - name, column accessor function and sub-schema.
24 | * Created by kabram
25 | */
26 | public class Column {
27 | Supplier nameFunction;
28 | Function