├── .dockerignore
├── .github
├── dependabot.yml
└── workflows
│ ├── build.yml
│ └── release.yml
├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── build.gradle
├── buildSrc
├── build.gradle
└── src
│ └── main
│ └── groovy
│ └── datafusion.java-conventions.gradle
├── datafusion-examples
├── build.gradle
└── src
│ └── main
│ ├── java
│ └── org
│ │ └── apache
│ │ └── arrow
│ │ └── datafusion
│ │ └── examples
│ │ └── ExampleMain.java
│ └── resources
│ ├── aggregate_test_100.csv
│ ├── aggregate_test_100.parquet
│ ├── log4j2.xml
│ └── test_table.csv
├── datafusion-java
├── build.gradle
├── src
│ ├── main
│ │ └── java
│ │ │ └── org
│ │ │ └── apache
│ │ │ └── arrow
│ │ │ └── datafusion
│ │ │ ├── AbstractProxy.java
│ │ │ ├── ArrowFormat.java
│ │ │ ├── CsvFormat.java
│ │ │ ├── DataFrame.java
│ │ │ ├── DataFrames.java
│ │ │ ├── DefaultDataFrame.java
│ │ │ ├── DefaultRecordBatchStream.java
│ │ │ ├── DefaultSessionContext.java
│ │ │ ├── DefaultTableProvider.java
│ │ │ ├── ErrorUtil.java
│ │ │ ├── ExecutionOptions.java
│ │ │ ├── FileFormat.java
│ │ │ ├── FileFormats.java
│ │ │ ├── JNILoader.java
│ │ │ ├── ListingOptions.java
│ │ │ ├── ListingTable.java
│ │ │ ├── ListingTableConfig.java
│ │ │ ├── NativeProxy.java
│ │ │ ├── ObjectResultCallback.java
│ │ │ ├── ParquetFormat.java
│ │ │ ├── ParquetOptions.java
│ │ │ ├── RecordBatchStream.java
│ │ │ ├── Runtime.java
│ │ │ ├── SessionConfig.java
│ │ │ ├── SessionContext.java
│ │ │ ├── SessionContexts.java
│ │ │ ├── SqlParserOptions.java
│ │ │ ├── TableProvider.java
│ │ │ ├── TableProviders.java
│ │ │ ├── TokioRuntime.java
│ │ │ └── package-info.java
│ └── test
│ │ ├── java
│ │ └── org
│ │ │ └── apache
│ │ │ └── arrow
│ │ │ └── datafusion
│ │ │ ├── ParquetWriter.java
│ │ │ ├── TestExecuteStream.java
│ │ │ ├── TestListingTable.java
│ │ │ ├── TestQuery.java
│ │ │ └── TestSessionConfig.java
│ │ └── resources
│ │ └── dictionary_data.parquet
└── write_test_files.py
├── datafusion-jni
├── .gitignore
├── Cargo.toml
└── src
│ ├── context.rs
│ ├── dataframe.rs
│ ├── file_formats.rs
│ ├── lib.rs
│ ├── listing_options.rs
│ ├── listing_table.rs
│ ├── listing_table_config.rs
│ ├── runtime.rs
│ ├── session_config.rs
│ ├── stream.rs
│ ├── table_provider.rs
│ └── util.rs
├── gradle.properties
├── gradle
└── wrapper
│ ├── gradle-wrapper.jar
│ └── gradle-wrapper.properties
├── gradlew
├── gradlew.bat
└── settings.gradle
/.dockerignore:
--------------------------------------------------------------------------------
1 | Dockerfile
2 | README.md
3 | **/build/
4 | **/target/
5 | **/out/
6 | **/bin/
7 | **/.idea/
8 | **/.gradle/
9 |
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | # To get started with Dependabot version updates, you'll need to specify which
2 | # package ecosystems to update and where the package manifests are located.
3 | # Please see the documentation for all configuration options:
4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
5 |
6 | version: 2
7 | updates:
8 | - package-ecosystem: "gradle" # See documentation for possible values
9 | directory: "/" # Location of package manifests
10 | schedule:
11 | interval: "weekly"
12 |
--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
1 | name: Build
2 | on:
3 | push:
4 | branches: ["*"]
5 | pull_request:
6 | branches: ["*"]
7 | jobs:
8 | rust:
9 | strategy:
10 | matrix:
11 | os: ["ubuntu-latest", "macos-latest", "windows-latest"]
12 | runs-on: ${{ matrix.os }}
13 | steps:
14 | - uses: actions/checkout@v3
15 |
16 | - name: Stable with rustfmt and clippy
17 | uses: dtolnay/rust-toolchain@stable
18 | with:
19 | toolchain: stable
20 | components: rustfmt, clippy
21 |
22 | - name: Set up JDK 17
23 | uses: actions/setup-java@v3
24 | with:
25 | java-version: "17"
26 | distribution: "temurin"
27 |
28 | - name: Validate Gradle wrapper
29 | uses: gradle/wrapper-validation-action@v1
30 |
31 | - name: Cargo build
32 | run: ./gradlew cargoReleaseBuild
33 |
34 | - name: Upload built artifacts
35 | uses: actions/upload-artifact@v3
36 | if: matrix.os == 'macos-latest'
37 | with:
38 | name: datafusion-jni-${{ matrix.os }}
39 | if-no-files-found: error
40 | path: "datafusion-jni/target/release/libdatafusion_jni.dylib"
41 | retention-days: 3
42 |
43 | - name: Upload built artifacts
44 | uses: actions/upload-artifact@v3
45 | if: matrix.os == 'ubuntu-latest'
46 | with:
47 | name: datafusion-jni-${{ matrix.os }}
48 | if-no-files-found: error
49 | path: "datafusion-jni/target/release/libdatafusion_jni.so"
50 | retention-days: 3
51 |
52 | - name: Upload built artifacts
53 | uses: actions/upload-artifact@v3
54 | if: matrix.os == 'windows-latest'
55 | with:
56 | name: datafusion-jni-${{ matrix.os }}
57 | if-no-files-found: error
58 | # note no "lib"
59 | path: "datafusion-jni\\target\\release\\datafusion_jni.dll"
60 | retention-days: 3
61 |
62 | java:
63 | needs:
64 | - rust
65 | runs-on: ubuntu-latest
66 | steps:
67 | - uses: actions/checkout@v3
68 |
69 | - name: Set up JDK 17
70 | uses: actions/setup-java@v3
71 | with:
72 | java-version: 17
73 | distribution: "temurin"
74 |
75 | - name: Validate Gradle wrapper
76 | uses: gradle/wrapper-validation-action@v1
77 |
78 | - name: Download ubuntu artifacts
79 | uses: actions/download-artifact@v3
80 | with:
81 | name: datafusion-jni-ubuntu-latest
82 | path: datafusion-java/build/jni_libs/linux-x86_64
83 |
84 | - name: Download windows artifacts
85 | uses: actions/download-artifact@v3
86 | with:
87 | name: datafusion-jni-windows-latest
88 | path: datafusion-java/build/jni_libs/windows-x86_64
89 |
90 | - name: Download macos artifacts
91 | uses: actions/download-artifact@v3
92 | with:
93 | name: datafusion-jni-macos-latest
94 | path: datafusion-java/build/jni_libs/osx-x86_64
95 |
96 | - name: List downloaded artifacts
97 | run: tree datafusion-java/build/jni_libs
98 |
99 | - name: Build and test
100 | run: ./gradlew -PJNI_PATH=${{ github.workspace }}/datafusion-java/build/jni_libs/linux-x86_64 build
101 |
102 | - name: Publish to Maven Local
103 | run: ./gradlew publishToMavenLocal
104 |
105 | - name: Upload built artifacts
106 | uses: actions/upload-artifact@v3
107 | with:
108 | name: datafusion-java
109 | if-no-files-found: error
110 | path: ~/.m2/repository/io/github/datafusion-contrib/datafusion-java
111 | retention-days: 3
112 |
--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
1 | name: Release
2 | on:
3 | release:
4 | types: [created]
5 | push:
6 | branches: [main]
7 | jobs:
8 | rust:
9 | strategy:
10 | matrix:
11 | os: ["ubuntu-latest", "macos-latest", "windows-latest"]
12 | runs-on: ${{ matrix.os }}
13 | steps:
14 | - uses: actions/checkout@v3
15 |
16 | - name: Stable with rustfmt and clippy
17 | uses: dtolnay/rust-toolchain@stable
18 | with:
19 | toolchain: stable
20 | components: rustfmt, clippy
21 |
22 | - name: Set up JDK 17
23 | uses: actions/setup-java@v3
24 | with:
25 | java-version: "17"
26 | distribution: "temurin"
27 |
28 | - name: Validate Gradle wrapper
29 | uses: gradle/wrapper-validation-action@v1
30 |
31 | - name: Cargo build
32 | run: ./gradlew cargoReleaseBuild
33 |
34 | - name: Upload built artifacts
35 | uses: actions/upload-artifact@v3
36 | if: matrix.os == 'macos-latest'
37 | with:
38 | name: datafusion-jni-${{ matrix.os }}
39 | if-no-files-found: error
40 | path: "datafusion-jni/target/release/libdatafusion_jni.dylib"
41 | retention-days: 3
42 |
43 | - name: Upload built artifacts
44 | uses: actions/upload-artifact@v3
45 | if: matrix.os == 'ubuntu-latest'
46 | with:
47 | name: datafusion-jni-${{ matrix.os }}
48 | if-no-files-found: error
49 | path: "datafusion-jni/target/release/libdatafusion_jni.so"
50 | retention-days: 3
51 |
52 | - name: Upload built artifacts
53 | uses: actions/upload-artifact@v3
54 | if: matrix.os == 'windows-latest'
55 | with:
56 | name: datafusion-jni-${{ matrix.os }}
57 | if-no-files-found: error
58 | # note no "lib"
59 | path: "datafusion-jni\\target\\release\\datafusion_jni.dll"
60 | retention-days: 3
61 |
62 | java:
63 | needs:
64 | - rust
65 | runs-on: ubuntu-latest
66 | steps:
67 | - uses: actions/checkout@v3
68 |
69 | - name: Set up JDK 17
70 | uses: actions/setup-java@v3
71 | with:
72 | java-version: 17
73 | distribution: "temurin"
74 |
75 | - name: Validate Gradle wrapper
76 | uses: gradle/wrapper-validation-action@v1
77 |
78 | - name: Download ubuntu artifacts
79 | uses: actions/download-artifact@v3
80 | with:
81 | name: datafusion-jni-ubuntu-latest
82 | path: datafusion-java/build/jni_libs/linux-x86_64
83 |
84 | - name: Download windows artifacts
85 | uses: actions/download-artifact@v3
86 | with:
87 | name: datafusion-jni-windows-latest
88 | path: datafusion-java/build/jni_libs/windows-x86_64
89 |
90 | - name: Download macos artifacts
91 | uses: actions/download-artifact@v3
92 | with:
93 | name: datafusion-jni-macos-latest
94 | path: datafusion-java/build/jni_libs/osx-x86_64
95 |
96 | - name: List downloaded artifacts
97 | run: tree datafusion-java/build/jni_libs
98 |
99 | - name: Publish to Sonatype
100 | run: ./gradlew publish
101 | env:
102 | MAVEN_USERNAME: ${{ secrets.OSSRH_USERNAME }}
103 | MAVEN_PASSWORD: ${{ secrets.OSSRH_TOKEN }}
104 | ORG_GRADLE_PROJECT_signingKeyId: ${{ secrets.SIGNING_KEY_ID }}
105 | ORG_GRADLE_PROJECT_signingKey: ${{ secrets.SIGNING_KEY }}
106 | ORG_GRADLE_PROJECT_signingPassword: ${{ secrets.SIGNING_PASSWORD }}
107 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 |
2 | # Created by https://www.toptal.com/developers/gitignore/api/java,gradle
3 | # Edit at https://www.toptal.com/developers/gitignore?templates=java,gradle
4 |
5 | ### Java ###
6 | # Compiled class file
7 | *.class
8 |
9 | # Log file
10 | *.log
11 |
12 | # BlueJ files
13 | *.ctxt
14 |
15 | # Mobile Tools for Java (J2ME)
16 | .mtj.tmp/
17 |
18 | # Package Files #
19 | *.jar
20 | *.war
21 | *.nar
22 | *.ear
23 | *.zip
24 | *.tar.gz
25 | *.rar
26 |
27 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
28 | hs_err_pid*
29 |
30 | ### Gradle ###
31 | .gradle
32 | build/
33 |
34 | # Ignore Gradle GUI config
35 | gradle-app.setting
36 |
37 | # Avoid ignoring Gradle wrapper jar file (.jar files are usually ignored)
38 | !gradle-wrapper.jar
39 |
40 | # Cache of project
41 | .gradletasknamecache
42 |
43 | # # Work around https://youtrack.jetbrains.com/issue/IDEA-116898
44 | # gradle/wrapper/gradle-wrapper.properties
45 |
46 | ### Gradle Patch ###
47 | **/build/
48 |
49 | # Eclipse Gradle plugin generated files
50 | # Eclipse Core
51 | .project
52 | # JDT-specific (Eclipse Java Development Tools)
53 | .classpath
54 |
55 | # End of https://www.toptal.com/developers/gitignore/api/java,gradle
56 |
57 | # IntelliJ
58 | *.ipr
59 | *.iws
60 | *.iml
61 |
62 | .settings/
63 |
64 | .idea/
65 | out/
66 | bin/
67 | .vscode/
68 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM amazoncorretto:21
2 |
3 | RUN yum install -y gcc && \
4 | yum clean all && \
5 | rm -rf /var/cache/yum
6 |
7 | RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
8 |
9 | ENV PATH="/root/.cargo/bin:$PATH"
10 |
11 | WORKDIR /usr/opt/datafusion-java
12 |
13 | COPY build.gradle settings.gradle gradlew ./
14 |
15 | COPY gradle gradle
16 |
17 | RUN ./gradlew --version
18 |
19 | COPY . .
20 |
21 | RUN ./gradlew copyDevLibrary installDist
22 |
23 | # Set working directory so that the relative paths to resource files used in ExampleMain are correct
24 | WORKDIR /usr/opt/datafusion-java/datafusion-examples
25 |
26 | # Configure environment variables to allow loading datafusion-java in jshell
27 | ENV CLASSPATH="/usr/opt/datafusion-java/datafusion-examples/build/install/datafusion-examples/lib/*"
28 | ENV JDK_JAVA_OPTIONS="-Djava.library.path=/usr/opt/datafusion-java/datafusion-java/build/jni_libs/dev --add-opens=java.base/java.nio=ALL-UNNAMED"
29 |
30 | CMD ["./build/install/datafusion-examples/bin/datafusion-examples"]
31 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # datafusion-java
2 |
3 | [](https://github.com/datafusion-contrib/datafusion-java/actions/workflows/build.yml)
4 | [](https://github.com/datafusion-contrib/datafusion-java/actions/workflows/release.yml)
5 | [](https://repo.maven.apache.org/maven2/io/github/datafusion-contrib/datafusion-java/)
6 |
7 | A Java binding to [Apache DataFusion][1]
8 |
9 | ## Status
10 |
11 | This project is still a work in progress, and it currently works with Arrow 14.0 and DataFusion version 25.0.
12 | It is built and verified in CI against Java 11 and 21. You may check out the [docker run instructions](#how-to-run-the-interactive-demo)
13 | where Java 21 `jshell` is used to run interactively.
14 |
15 | ## How to use in your code
16 |
17 | The artifacts are [published][2] to maven central, so you can use datafusion-java like any normal Java library:
18 |
19 | ```groovy
20 | dependencies {
21 | implementation(
22 | group = "io.github.datafusion-contrib",
23 | name = "datafusion-java",
24 | version = "0.16.0" // or latest version, checkout https://github.com/datafusion-contrib/datafusion-java/releases
25 | )
26 | }
27 | ```
28 |
29 | To test it out, you can use this piece of demo code:
30 |
31 |
32 | DataFusionDemo.java
33 |
34 | ```java
35 | package com.me;
36 |
37 | import org.apache.arrow.datafusion.DataFrame;
38 | import org.apache.arrow.datafusion.SessionContext;
39 | import org.apache.arrow.datafusion.SessionContexts;
40 |
41 | public class DataFusionDemo {
42 |
43 | public static void main(String[] args) throws Exception {
44 | try (SessionContext sessionContext = SessionContexts.create()) {
45 | sessionContext.sql("select sqrt(65536)").thenCompose(DataFrame::show).join();
46 | }
47 | }
48 | }
49 | ```
50 |
51 |
52 |
53 |
54 | build.gradle.kts
55 |
56 | ```kotlin
57 | plugins {
58 | java
59 | application
60 | }
61 |
62 | repositories {
63 | mavenCentral()
64 | google()
65 | }
66 |
67 | tasks {
68 | application {
69 | mainClass.set("com.me.DataFusionDemo")
70 | }
71 | }
72 |
73 | dependencies {
74 | implementation(
75 | group = "io.github.datafusion-contrib",
76 | name = "datafusion-java",
77 | version = "0.16.0"
78 | )
79 | }
80 |
81 | ```
82 |
83 |
84 |
85 |
86 | Run result
87 |
88 | ```
89 | $ ./gradlew run
90 | ...
91 | > Task :compileKotlin UP-TO-DATE
92 | > Task :compileJava UP-TO-DATE
93 | > Task :processResources NO-SOURCE
94 | > Task :classes UP-TO-DATE
95 |
96 | > Task :run
97 | successfully created tokio runtime
98 | +--------------------+
99 | | sqrt(Int64(65536)) |
100 | +--------------------+
101 | | 256 |
102 | +--------------------+
103 | successfully shutdown tokio runtime
104 |
105 | BUILD SUCCESSFUL in 2s
106 | 3 actionable tasks: 1 executed, 2 up-to-date
107 | 16:43:34: Execution finished 'run'.
108 | ```
109 |
110 |
111 |
112 | ## How to run the interactive demo
113 |
114 | ### 1. Run using Docker (with `jshell`)
115 |
116 | First build the docker image:
117 |
118 | ```
119 | docker build -t datafusion-example .
120 | ```
121 |
122 | Then you can run the example program using Docker:
123 |
124 | ```
125 | docker run --rm -it datafusion-example
126 | ```
127 |
128 | Or start an interactive jshell session:
129 |
130 | ```
131 | docker run --rm -it datafusion-example jshell
132 | ```
133 |
134 |
135 | Example jshell session
136 |
137 | ```text
138 | Jan 11, 2024 1:49:28 AM java.util.prefs.FileSystemPreferences$1 run
139 | INFO: Created user preferences directory.
140 | | Welcome to JShell -- Version 21
141 | | For an introduction type: /help intro
142 |
143 | jshell> import org.apache.arrow.datafusion.*
144 |
145 | jshell> var context = SessionContexts.create()
146 | 01:41:05.586 [main] DEBUG org.apache.arrow.datafusion.JNILoader -- successfully loaded datafusion_jni from library path
147 | 01:41:05.589 [main] DEBUG org.apache.arrow.datafusion.JNILoader -- datafusion_jni already loaded, returning
148 | 01:41:05.590 [main] DEBUG org.apache.arrow.datafusion.AbstractProxy -- Obtaining DefaultSessionContext@7f58383b8db0
149 | 01:41:05.591 [main] DEBUG org.apache.arrow.datafusion.AbstractProxy -- Obtaining TokioRuntime@7f58383ce110
150 | context ==> org.apache.arrow.datafusion.DefaultSessionContext@2d209079
151 |
152 | jshell> var df = context.sql("select 1.1 + cos(2.0)").join()
153 | 01:41:10.961 [main] DEBUG org.apache.arrow.datafusion.AbstractProxy -- Obtaining DefaultDataFrame@7f5838209100
154 | df ==> org.apache.arrow.datafusion.DefaultDataFrame@34ce8af7
155 |
156 | jshell> import org.apache.arrow.memory.*
157 |
158 | jshell> var allocator = new RootAllocator()
159 | 01:41:22.521 [main] INFO org.apache.arrow.memory.BaseAllocator -- Debug mode disabled. Enable with the VM option -Darrow.memory.debug.allocator=true.
160 | 01:41:22.525 [main] INFO org.apache.arrow.memory.DefaultAllocationManagerOption -- allocation manager type not specified, using netty as the default type
161 | 01:41:22.525 [main] INFO org.apache.arrow.memory.CheckAllocator -- Using DefaultAllocationManager at memory-unsafe-14.0.2.jar!/org/apache/arrow/memory/DefaultAllocationManagerFactory.class
162 | 01:41:22.531 [main] DEBUG org.apache.arrow.memory.util.MemoryUtil -- Constructor for direct buffer found and made accessible
163 | 01:41:22.536 [main] DEBUG org.apache.arrow.memory.util.MemoryUtil -- direct buffer constructor: available
164 | 01:41:22.537 [main] DEBUG org.apache.arrow.memory.rounding.DefaultRoundingPolicy -- -Dorg.apache.memory.allocator.pageSize: 8192
165 | 01:41:22.537 [main] DEBUG org.apache.arrow.memory.rounding.DefaultRoundingPolicy -- -Dorg.apache.memory.allocator.maxOrder: 11
166 | allocator ==> Allocator(ROOT) 0/0/0/9223372036854775807 (res/actual/peak/limit)
167 |
168 |
169 | jshell> var r = df.collect(allocator).join()
170 | 01:41:29.635 [main] INFO org.apache.arrow.datafusion.DefaultDataFrame -- successfully completed with arr length=610
171 | r ==> org.apache.arrow.vector.ipc.ArrowFileReader@7ac7a4e4
172 |
173 | jshell> var root = r.getVectorSchemaRoot()
174 | 01:41:34.658 [main] DEBUG org.apache.arrow.vector.ipc.ReadChannel -- Reading buffer with size: 10
175 | 01:41:34.661 [main] DEBUG org.apache.arrow.vector.ipc.ArrowFileReader -- Footer starts at 416, length: 184
176 | 01:41:34.661 [main] DEBUG org.apache.arrow.vector.ipc.ReadChannel -- Reading buffer with size: 184
177 | root ==> org.apache.arrow.vector.VectorSchemaRoot@6cd28fa7
178 |
179 | jshell> r.loadNextBatch()
180 | 01:41:39.421 [main] DEBUG org.apache.arrow.vector.ipc.ArrowFileReader -- RecordBatch at 200, metadata: 192, body: 16
181 | 01:41:39.423 [main] DEBUG org.apache.arrow.vector.ipc.ReadChannel -- Reading buffer with size: 208
182 | 01:41:39.424 [main] DEBUG org.apache.arrow.vector.ipc.message.ArrowRecordBatch -- Buffer in RecordBatch at 0, length: 1
183 | 01:41:39.425 [main] DEBUG org.apache.arrow.vector.ipc.message.ArrowRecordBatch -- Buffer in RecordBatch at 8, length: 8
184 | $8 ==> true
185 |
186 | jshell> var v = root.getVector(0)
187 | v ==> [0.6838531634528577]
188 | ```
189 |
190 |
191 |
192 | ### 2. Build from source
193 |
194 | Note you must have a local Rust and Java environment setup.
195 |
196 | Run the example in one line:
197 |
198 | ```bash
199 | ./gradlew run
200 | ```
201 |
202 | Or roll your own test example:
203 |
204 | ```java
205 | import org.apache.arrow.datafusion.DataFrame;
206 | import org.apache.arrow.datafusion.SessionContext;
207 | import org.apache.arrow.datafusion.SessionContexts;
208 | import org.apache.arrow.memory.BufferAllocator;
209 | import org.apache.arrow.memory.RootAllocator;
210 | import org.apache.arrow.vector.Float8Vector;
211 | import org.apache.arrow.vector.VectorSchemaRoot;
212 | import org.apache.arrow.vector.ipc.ArrowReader;
213 | import org.slf4j.Logger;
214 | import org.slf4j.LoggerFactory;
215 |
216 | import java.io.IOException;
217 |
218 | public class ExampleMain {
219 |
220 | private static final Logger logger = LoggerFactory.getLogger(ExampleMain.class);
221 |
222 | public static void main(String[] args) throws Exception {
223 | try (SessionContext sessionContext = SessionContexts.create(); BufferAllocator allocator = new RootAllocator()) {
224 | DataFrame dataFrame = sessionContext.sql("select 1.5 + sqrt(2.0)").get();
225 | dataFrame.collect(allocator).thenAccept(ExampleMain::onReaderResult).get();
226 | }
227 | }
228 |
229 | private static void onReaderResult(ArrowReader reader) {
230 | try {
231 | VectorSchemaRoot root = reader.getVectorSchemaRoot();
232 | while (reader.loadNextBatch()) {
233 | Float8Vector vector = (Float8Vector) root.getVector(0);
234 | for (int i = 0; i < root.getRowCount(); i += 1) {
235 | logger.info("value {}={}", i, vector.getValueAsDouble(i));
236 | }
237 | }
238 | // close to release resource
239 | reader.close();
240 | } catch (IOException e) {
241 | logger.warn("got IO Exception", e);
242 | }
243 | }
244 | }
245 | ```
246 |
247 | To build the library:
248 |
249 | ```bash
250 | ./gradlew build
251 | ```
252 |
253 | [1]: https://github.com/apache/datafusion
254 | [2]: https://repo.maven.apache.org/maven2/io/github/datafusion-contrib/datafusion-java/
255 |
--------------------------------------------------------------------------------
/build.gradle:
--------------------------------------------------------------------------------
1 | buildscript {
2 | repositories {
3 | mavenCentral()
4 | }
5 | }
6 |
7 | plugins {
8 | id 'java'
9 | id 'com.diffplug.spotless' version '6.25.0' apply false
10 | id 'com.google.osdetector' version "1.7.3" apply false
11 | }
12 |
--------------------------------------------------------------------------------
/buildSrc/build.gradle:
--------------------------------------------------------------------------------
1 | plugins {
2 | id 'groovy-gradle-plugin'
3 | }
4 |
5 | repositories {
6 | gradlePluginPortal() // so that external plugins can be resolved in dependencies section
7 | mavenCentral()
8 | }
9 |
--------------------------------------------------------------------------------
/buildSrc/src/main/groovy/datafusion.java-conventions.gradle:
--------------------------------------------------------------------------------
1 | plugins {
2 | id 'java'
3 | }
4 |
5 | group = 'io.github.datafusion-contrib'
6 | version = '0.17.0-SNAPSHOT'
7 |
8 | repositories {
9 | mavenCentral()
10 | }
11 |
12 | java {
13 | toolchain {
14 | languageVersion = JavaLanguageVersion.of(21)
15 | }
16 | }
17 |
18 | tasks.withType(JavaCompile).configureEach {
19 | // down-compile to minimal version
20 | options.release.set(8)
21 | }
22 |
--------------------------------------------------------------------------------
/datafusion-examples/build.gradle:
--------------------------------------------------------------------------------
1 | plugins {
2 | id 'application'
3 | id 'datafusion.java-conventions'
4 | id 'com.diffplug.spotless'
5 | }
6 |
7 | spotless {
8 | java {
9 | googleJavaFormat()
10 | }
11 | }
12 |
13 | dependencies {
14 | implementation project(':datafusion-java')
15 | implementation 'org.slf4j:slf4j-api:2.0.16'
16 | implementation 'ch.qos.logback:logback-classic:1.5.12'
17 | implementation 'org.apache.arrow:arrow-format:18.0.0'
18 | implementation 'org.apache.arrow:arrow-vector:18.1.0'
19 | }
20 |
21 | application {
22 | mainClass = 'org.apache.arrow.datafusion.examples.ExampleMain'
23 | def libraryPath = findProperty("JNI_PATH") ?: "$rootDir/datafusion-java/build/jni_libs/dev"
24 | applicationDefaultJvmArgs += ["-Djava.library.path=$libraryPath", "--add-opens=java.base/java.nio=ALL-UNNAMED"]
25 | }
26 |
27 | tasks.named("run").configure {
28 | dependsOn(":datafusion-java:copyDevLibrary")
29 | }
30 |
--------------------------------------------------------------------------------
/datafusion-examples/src/main/java/org/apache/arrow/datafusion/examples/ExampleMain.java:
--------------------------------------------------------------------------------
1 | package org.apache.arrow.datafusion.examples;
2 |
3 | import java.io.IOException;
4 | import java.nio.file.Files;
5 | import java.nio.file.Path;
6 | import java.nio.file.Paths;
7 | import java.util.concurrent.CompletableFuture;
8 | import org.apache.arrow.datafusion.DataFrame;
9 | import org.apache.arrow.datafusion.SessionContext;
10 | import org.apache.arrow.datafusion.SessionContexts;
11 | import org.apache.arrow.memory.BufferAllocator;
12 | import org.apache.arrow.memory.RootAllocator;
13 | import org.apache.arrow.vector.BigIntVector;
14 | import org.apache.arrow.vector.VarCharVector;
15 | import org.apache.arrow.vector.VectorSchemaRoot;
16 | import org.apache.arrow.vector.ipc.ArrowReader;
17 | import org.slf4j.Logger;
18 | import org.slf4j.LoggerFactory;
19 |
20 | public class ExampleMain {
21 |
22 | private static final Logger logger = LoggerFactory.getLogger(ExampleMain.class);
23 |
24 | public static void main(String[] args) throws Exception {
25 | try (SessionContext context = SessionContexts.create();
26 | BufferAllocator allocator = new RootAllocator()) {
27 | loadConstant(context).join();
28 |
29 | context.registerCsv("test_csv", Paths.get("src/main/resources/test_table.csv")).join();
30 | context.sql("select * from test_csv limit 3").thenComposeAsync(DataFrame::show).join();
31 |
32 | context
33 | .registerParquet(
34 | "test_parquet", Paths.get("src/main/resources/aggregate_test_100.parquet"))
35 | .join();
36 | context.sql("select * from test_parquet limit 5").thenComposeAsync(DataFrame::show).join();
37 |
38 | context
39 | .sql("select * from test_csv")
40 | .thenComposeAsync(df -> df.collect(allocator))
41 | .thenAccept(ExampleMain::consumeReader)
42 | .join();
43 |
44 | Path tempPath = Files.createTempDirectory("datafusion-examples");
45 |
46 | context
47 | .sql("select * from test_parquet limit 3")
48 | .thenComposeAsync(df -> df.writeCsv(tempPath.resolve("csv-out")))
49 | .join();
50 |
51 | context
52 | .sql("select * from test_parquet limit 3")
53 | .thenComposeAsync(df -> df.writeParquet(tempPath.resolve("parquet-out")))
54 | .join();
55 |
56 | context
57 | .sql("select * from test_parquet limit 3")
58 | .thenAccept(
59 | df -> {
60 | try {
61 | boolean previouslyRegistered =
62 | context.registerTable("test_parquet_limited", df.intoView()).isPresent();
63 | assert !previouslyRegistered;
64 | } catch (Exception e) {
65 | throw new RuntimeException(e);
66 | }
67 | })
68 | .join();
69 |
70 | context.sql("select * from test_parquet_limited").thenComposeAsync(DataFrame::show).join();
71 | }
72 | }
73 |
74 | private static void consumeReader(ArrowReader reader) {
75 | try {
76 | VectorSchemaRoot root = reader.getVectorSchemaRoot();
77 | while (reader.loadNextBatch()) {
78 | VarCharVector nameVector = (VarCharVector) root.getVector(0);
79 | logger.info(
80 | "name vector size {}, row count {}, value={}",
81 | nameVector.getValueCount(),
82 | root.getRowCount(),
83 | nameVector);
84 | BigIntVector ageVector = (BigIntVector) root.getVector(1);
85 | logger.info(
86 | "age vector size {}, row count {}, value={}",
87 | ageVector.getValueCount(),
88 | root.getRowCount(),
89 | ageVector);
90 | }
91 | reader.close();
92 | } catch (IOException e) {
93 | logger.warn("got IO Exception", e);
94 | }
95 | }
96 |
97 | private static CompletableFuture loadConstant(SessionContext context) {
98 | return context
99 | .sql("select 1 + 2")
100 | .thenComposeAsync(
101 | dataFrame -> {
102 | logger.info("successfully loaded data frame {}", dataFrame);
103 | return dataFrame.show();
104 | });
105 | }
106 | }
107 |
--------------------------------------------------------------------------------
/datafusion-examples/src/main/resources/aggregate_test_100.csv:
--------------------------------------------------------------------------------
1 | c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13
2 | c,2,1,18109,2033001162,-6513304855495910254,25,43062,1491205016,5863949479783605708,0.110830784,0.9294097332465232,6WfVFBVGJSQb7FhA7E0lBwdvjfZnSW
3 | d,5,-40,22614,706441268,-7542719935673075327,155,14337,3373581039,11720144131976083864,0.69632107,0.3114712539863804,C2GT5KVyOPZpgKVl110TyZO0NcJ434
4 | b,1,29,-18218,994303988,5983957848665088916,204,9489,3275293996,14857091259186476033,0.53840446,0.17909035118828576,AyYVExXK6AR2qUTxNZ7qRHQOVGMLcz
5 | a,1,-85,-15154,1171968280,1919439543497968449,77,52286,774637006,12101411955859039553,0.12285209,0.6864391962767343,0keZ5G8BffGwgF2RwQD59TFzMStxCB
6 | b,5,-82,22080,1824882165,7373730676428214987,208,34331,3342719438,3330177516592499461,0.82634634,0.40975383525297016,Ig1QcuKsjHXkproePdERo2w0mYzIqd
7 | b,4,-111,-1967,-4229382,1892872227362838079,67,9832,1243785310,8382489916947120498,0.06563997,0.152498292971736,Sfx0vxv1skzZWT1PqVdoRDdO6Sb6xH
8 | e,3,104,-25136,1738331255,300633854973581194,139,20807,3577318119,13079037564113702254,0.40154034,0.7764360990307122,DuJNG8tufSqW0ZstHqWj3aGvFLMg4A
9 | a,3,13,12613,1299719633,2020498574254265315,191,17835,3998790955,14881411008939145569,0.041445434,0.8813167497816289,Amn2K87Db5Es3dFQO9cw9cvpAM6h35
10 | d,1,38,18384,-335410409,-1632237090406591229,26,57510,2712615025,1842662804748246269,0.6064476,0.6404495093354053,4HX6feIvmNXBN7XGqgO4YVBkhu8GDI
11 | a,4,-38,20744,762932956,308913475857409919,7,45465,1787652631,878137512938218976,0.7459874,0.02182578039211991,ydkwycaISlYSlEq3TlkS2m15I2pcp8
12 | d,1,57,28781,-1143802338,2662536767954229885,202,62167,879082834,4338034436871150616,0.7618384,0.42950521730777025,VY0zXmXeksCT8BzvpzpPLbmU9Kp9Y4
13 | a,4,-54,-2376,434021400,5502271306323260832,113,15777,2502326480,7966148640299601101,0.5720931,0.30585375151301186,KJFcmTVjdkCMv94wYCtfHMFhzyRsmH
14 | e,3,112,-6823,-421042466,8535335158538929274,129,32712,3759340273,9916295859593918600,0.6424343,0.6316565296547284,BsM5ZAYifRh5Lw3Y8X1r53I0cTJnfE
15 | d,2,113,3917,-108973366,-7220140168410319165,197,24380,63044568,4225581724448081782,0.11867094,0.2944158618048994,90gAtmGEeIqUTbo1ZrxCvWtsseukXC
16 | b,1,54,-18410,1413111008,-7145106120930085900,249,5382,1842680163,17818611040257178339,0.8881188,0.24899794314659673,6FPJlLAcaQ5uokyOWZ9HGdLZObFvOZ
17 | c,1,103,-22186,431378678,1346564663822463162,146,12393,3766999078,10901819591635583995,0.064453244,0.7784918983501654,2T3wSlHdEmASmO0xcXHnndkKEt6bz8
18 | e,2,49,24495,-587831330,9178511478067509438,129,12757,1289293657,10948666249269100825,0.5610077,0.5991138115095911,bgK1r6v3BCTh0aejJUhkA1Hn6idXGp
19 | d,1,-98,13630,-1991133944,1184110014998006843,220,2986,225513085,9634106610243643486,0.89651865,0.1640882545084913,y7C453hRWd4E7ImjNDWlpexB8nUqjh
20 | d,3,77,15091,-1302295658,8795481303066536947,154,35477,2093538928,17419098323248948387,0.11952883,0.7035635283169166,O66j6PaYuZhEUtqV6fuU7TyjM2WxC5
21 | e,2,97,18167,1593800404,-9112448817105133638,163,45185,3188005828,2792105417953811674,0.38175434,0.4094218353587008,ukOiFGGFnQJDHFgZxHMpvhD3zybF0M
22 | e,4,-56,-31500,1544188174,3096047390018154410,220,417,557517119,2774306934041974261,0.15459597,0.19113293583306745,IZTkHMLvIKuiLjhDjYMmIHxh166we4
23 | d,1,-99,5613,1213926989,-8863698443222021480,19,18736,4216440507,14933742247195536130,0.6067944,0.33639590659276175,aDxBtor7Icd9C5hnTvvw5NrIre740e
24 | a,5,36,-16974,623103518,6834444206535996609,71,29458,141047417,17448660630302620693,0.17100024,0.04429073092078406,OF7fQ37GzaZ5ikA2oMyvleKtgnLjXh
25 | e,4,-53,13788,2064155045,-691093532952651300,243,35106,2778168728,9463973906560740422,0.34515214,0.27159190516490006,0VVIHzxWtNOFLtnhjHEKjXaJOSLJfm
26 | c,2,-29,25305,-537142430,-7683452043175617798,150,31648,598822671,11759014161799384683,0.8315913,0.946325164889271,9UbObCsVkmYpJGcGrgfK90qOnwb2Lj
27 | a,1,-25,15295,383352709,4980135132406487265,231,102,3276123488,12763583666216333412,0.53796273,0.17592486905979987,XemNcT1xp61xcM1Qz3wZ1VECCnq06O
28 | c,4,123,16620,852509237,-3087630526856906991,196,33715,3566741189,4546434653720168472,0.07606989,0.819715865079681,8LIh0b6jmDGm87BmIyjdxNIpX4ugjD
29 | a,5,-31,-12907,586844478,-4862189775214031241,170,28086,1013876852,11005002152861474932,0.35319167,0.05573662213439634,MeSTAXq8gVxVjbEjgkvU9YLte0X9uE
30 | a,2,45,15673,-1899175111,398282800995316041,99,2555,145294611,8554426087132697832,0.17333257,0.6405262429561641,b3b9esRhTzFEawbs6XhpKnD9ojutHB
31 | b,3,17,14457,670497898,-2390782464845307388,255,24770,1538863055,12662506238151717757,0.34077626,0.7614304100703713,6x93sxYioWuq5c9Kkk8oTAAORM7cH0
32 | e,4,97,-13181,2047637360,6176835796788944083,158,53000,2042457019,9726016502640071617,0.7085086,0.12357539988406441,oHJMNvWuunsIMIWFnYG31RCfkOo2V7
33 | c,2,-60,-16312,-1808210365,-3368300253197863813,71,39635,2844041986,7045482583778080653,0.805363,0.6425694115212065,BJqx5WokrmrrezZA0dUbleMYkG5U2O
34 | e,1,36,-21481,-928766616,-3471238138418013024,150,52569,2610290479,7788847578701297242,0.2578469,0.7670021786149205,gpo8K5qtYePve6jyPt6xgJx4YOVjms
35 | b,5,-5,24896,1955646088,2430204191283109071,118,43655,2424630722,11429640193932435507,0.87989986,0.7328050041291218,JafwVLSVk5AVoXFuzclesQ000EE2k1
36 | a,3,13,32064,912707948,3826618523497875379,42,21463,2214035726,10771380284714693539,0.6133468,0.7325106678655877,i6RQVXKUh7MzuGMDaNclUYnFUAireU
37 | c,1,41,-4667,-644225469,7049620391314639084,196,48099,2125812933,15419512479294091215,0.5780736,0.9255031346434324,mzbkwXKrPeZnxg2Kn1LRF5hYSsmksS
38 | d,2,93,-12642,2053379412,6468763445799074329,147,50842,1000948272,5536487915963301239,0.4279275,0.28534428578703896,lqhzgLsXZ8JhtpeeUWWNbMz8PHI705
39 | c,3,73,-9565,-382483011,1765659477910680019,186,1535,1088543984,2906943497598597237,0.680652,0.6009475544728957,Ow5PGpfTm4dXCfTDsXAOTatXRoAydR
40 | c,3,-2,-18655,-2141999138,-3154042970870838072,251,34970,3862393166,13062025193350212516,0.034291923,0.7697753383420857,IWl0G3ZlMNf7WT8yjIB49cx7MmYOmr
41 | c,3,22,13741,-2098805236,8604102724776612452,45,2516,1362369177,196777795886465166,0.94669616,0.0494924465469434,6oIXZuIPIqEoPBvFmbt2Nxy3tryGUE
42 | b,2,63,21456,-2138770630,-2380041687053733364,181,57594,2705709344,13144161537396946288,0.09683716,0.3051364088814128,nYVJnVicpGRqKZibHyBAmtmzBXAFfT
43 | d,4,102,-24558,1991172974,-7823479531661596016,14,36599,1534194097,2240998421986827216,0.028003037,0.8824879447595726,0og6hSkhbX8AC1ktFS4kounvTzy8Vo
44 | d,1,-8,27138,-1383162419,7682021027078563072,36,64517,2861376515,9904216782086286050,0.80954456,0.9463098243875633,AFGCj7OWlEB5QfniEFgonMq90Tq5uH
45 | a,3,17,-22796,1337043149,-1282905594104562444,167,2809,754775609,732272194388185106,0.3884129,0.658671129040488,VDhtJkYjAYPykCgOU9x3v7v3t4SO1a
46 | e,2,52,23388,715235348,605432070100399212,165,56980,3314983189,7386391799827871203,0.46076488,0.980809631269599,jQimhdepw3GKmioWUlVSWeBVRKFkY3
47 | b,5,68,21576,1188285940,5717755781990389024,224,27600,974297360,9865419128970328044,0.80895734,0.7973920072996036,ioEncce3mPOXD2hWhpZpCPWGATG6GU
48 | b,2,31,23127,-800561771,-8706387435232961848,153,27034,1098639440,3343692635488765507,0.35692692,0.5590205548347534,okOkcWflkNXIy4R8LzmySyY1EC3sYd
49 | c,1,-24,-24085,-1882293856,7385529783747709716,41,48048,520189543,2402288956117186783,0.39761502,0.3600766362333053,Fi4rJeTQq4eXj8Lxg3Hja5hBVTVV5u
50 | a,4,65,-28462,-1813935549,7602389238442209730,18,363,1865307672,11378396836996498283,0.09130204,0.5593249815276734,WHmjWk2AY4c6m7DA4GitUx6nmb1yYS
51 | d,1,125,31106,-1176490478,-4306856842351827308,90,17910,3625286410,17869394731126786457,0.8882508,0.7631239070049998,dVdvo6nUD5FgCgsbOZLds28RyGTpnx
52 | b,4,17,-28070,-673237643,1904316899655860234,188,27744,933879086,3732692885824435932,0.41860116,0.40342283197779727,JHNgc2UCaiXOdmkxwDDyGhRlO0mnBQ
53 | c,2,-106,-1114,-1927628110,1080308211931669384,177,20421,141680161,7464432081248293405,0.56749094,0.565352842229935,Vp3gmWunM5A7wOC9YW2JroFqTWjvTi
54 | d,5,-59,2045,-2117946883,1170799768349713170,189,63353,1365198901,2501626630745849169,0.75173044,0.18628859265874176,F7NSTjWvQJyBburN7CXRUlbgp2dIrA
55 | d,4,55,-1471,1902023838,1252101628560265705,157,3691,811650497,1524771507450695976,0.2968701,0.5437595540422571,f9ALCzwDAKmdu7Rk2msJaB1wxe5IBX
56 | b,2,-60,-21739,-1908480893,-8897292622858103761,59,50009,2525744318,1719090662556698549,0.52930677,0.560333188635217,l7uwDoTepWwnAP0ufqtHJS3CRi7RfP
57 | d,3,-76,8809,141218956,-9110406195556445909,58,5494,1824517658,12046662515387914426,0.8557294,0.6668423897406515,Z2sWcQr0qyCJRMHDpRy3aQr7PkHtkK
58 | e,4,73,-22501,1282464673,2541794052864382235,67,21119,538589788,9575476605699527641,0.48515016,0.296036538664718,4JznSdBajNWhu4hRQwjV1FjTTxY68i
59 | b,4,-117,19316,2051224722,-5534418579506232438,133,52046,3023531799,13684453606722360110,0.62608826,0.8506721053047003,mhjME0zBHbrK6NMkytMTQzOssOa1gF
60 | a,4,-101,11640,1993193190,2992662416070659899,230,40566,466439833,16778113360088370541,0.3991115,0.574210838214554,NEhyk8uIx4kEULJGa8qIyFjjBcP2G6
61 | b,5,62,16337,41423756,-2274773899098124524,121,34206,2307004493,10575647935385523483,0.23794776,0.1754261586710173,qnPOOmslCJaT45buUisMRnM0rc77EK
62 | c,4,-79,5281,-237425046,373011991904079451,121,55620,2818832252,2464584078983135763,0.49774808,0.9237877978193884,t6fQUjJejPcjc04wHvHTPe55S65B4V
63 | b,2,68,15874,49866617,1179733259727844435,121,23948,3455216719,3898128009708892708,0.6306253,0.9185813970744787,802bgTGl6Bk5TlkPYYTxp5JkKyaYUA
64 | c,1,70,27752,1325868318,1241882478563331892,63,61637,473294098,4976799313755010034,0.13801557,0.5081765563442366,Ktb7GQ0N1DrxwkCkEUsTaIXk0xYinn
65 | e,2,-61,-2888,-1660426473,2553892468492435401,126,35429,4144173353,939909697866979632,0.4405142,0.9231889896940375,BPtQMxnuSPpxMExYV9YkDa6cAN7GP3
66 | e,4,74,-12612,-1885422396,1702850374057819332,130,3583,3198969145,10767179755613315144,0.5518061,0.5614503754617461,QEHVvcP8gxI6EMJIrvcnIhgzPNjIvv
67 | d,2,122,10130,-168758331,-3179091803916845592,30,794,4061635107,15695681119022625322,0.69592506,0.9748360509016578,OPwBqCEK5PWTjWaiOyL45u2NLTaDWv
68 | e,3,71,194,1436496767,-5639533800082367925,158,44507,3105312559,3998472996619161534,0.930117,0.6108938307533,pTeu0WMjBRTaNRT15rLCuEh3tBJVc5
69 | c,5,-94,-15880,2025611582,-3348824099853919681,5,40622,4268716378,12849419495718510869,0.34163946,0.4830878559436823,RilTlL1tKkPOUFuzmLydHAVZwv1OGl
70 | d,1,-72,25590,1188089983,3090286296481837049,241,832,3542840110,5885937420286765261,0.41980565,0.21535402343780985,wwXqSGKLyBQyPkonlzBNYUJTCo4LRS
71 | e,1,71,-5479,-1339586153,-3920238763788954243,123,53012,4229654142,10297218950720052365,0.73473036,0.5773498217058918,cBGc0kSm32ylBDnxogG727C0uhZEYZ
72 | e,4,96,-30336,427197269,7506304308750926996,95,48483,3521368277,5437030162957481122,0.58104324,0.42073125331890115,3BEOHQsMEFZ58VcNTOJYShTBpAPzbt
73 | a,2,-48,-18025,439738328,-313657814587041987,222,13763,3717551163,9135746610908713318,0.055064857,0.9800193410444061,ukyD7b0Efj7tNlFSRmzZ0IqkEzg2a8
74 | a,1,-56,8692,2106705285,-7811675384226570375,231,15573,1454057357,677091006469429514,0.42794758,0.2739938529235548,JN0VclewmjwYlSl8386MlWv5rEhWCz
75 | e,2,52,-12056,-1090239422,9011500141803970147,238,4168,2013662838,12565360638488684051,0.6694766,0.39144436569161134,xipQ93429ksjNcXPX5326VSg1xJZcW
76 | a,1,-5,12636,794623392,2909750622865366631,15,24022,2669374863,4776679784701509574,0.29877836,0.2537253407987472,waIGbOGl1PM6gnzZ4uuZt4E2yDWRHs
77 | b,1,12,7652,-1448995523,-5332734971209541785,136,49283,4076864659,15449267433866484283,0.6214579,0.05636955101974106,akiiY5N0I44CMwEnBL6RTBk7BRkxEj
78 | e,5,64,-26526,1689098844,8950618259486183091,224,45253,662099130,16127995415060805595,0.2897315,0.5759450483859969,56MZa5O1hVtX4c5sbnCfxuX5kDChqI
79 | c,4,-90,-2935,1579876740,6733733506744649678,254,12876,3593959807,4094315663314091142,0.5708688,0.5603062368164834,Ld2ej8NEv5zNcqU60FwpHeZKBhfpiV
80 | e,5,-86,32514,-467659022,-8012578250188146150,254,2684,2861911482,2126626171973341689,0.12559289,0.01479305307777301,gxfHWUF8XgY2KdFxigxvNEXe2V2XMl
81 | c,2,-117,-30187,-1222533990,-191957437217035800,136,47061,2293105904,12659011877190539078,0.2047385,0.9706712283358269,pLk3i59bZwd5KBZrI1FiweYTd5hteG
82 | a,3,14,28162,397430452,-452851601758273256,57,14722,431948861,8164671015278284913,0.40199697,0.07260475960924484,TtDKUZxzVxsq758G6AWPSYuZgVgbcl
83 | c,2,29,-3855,1354539333,4742062657200940467,81,53815,3398507249,562977550464243101,0.7124534,0.991517828651004,Oq6J4Rx6nde0YlhOIJkFsX2MsSvAQ0
84 | b,4,-59,25286,1423957796,2646602445954944051,0,61069,3570297463,15100310750150419896,0.49619365,0.04893135681998029,fuyvs0w7WsKSlXqJ1e6HFSoLmx03AG
85 | a,1,83,-14704,2143473091,-4387559599038777245,37,829,4015442341,4602675983996931623,0.89542526,0.9567595541247681,ErJFw6hzZ5fmI5r8bhE4JzlscnhKZU
86 | a,3,-12,-9168,1489733240,-1569376002217735076,206,33821,3959216334,16060348691054629425,0.9488028,0.9293883502480845,oLZ21P2JEDooxV1pU31cIxQHEeeoLu
87 | c,4,3,-30508,659422734,-6455460736227846736,133,59663,2306130875,8622584762448622224,0.16999894,0.4273123318932347,EcCuckwsF3gV1Ecgmh5v4KM8g1ozif
88 | a,3,-72,-11122,-2141451704,-2578916903971263854,83,30296,1995343206,17452974532402389080,0.94209343,0.3231750610081745,e2Gh6Ov8XkXoFdJWhl0EjwEHlMDYyG
89 | c,2,-107,-2904,-1011669561,782342092880993439,18,29527,1157161427,4403623840168496677,0.31988364,0.36936304600612724,QYlaIAnJA6r8rlAb6f59wcxvcPcWFf
90 | c,5,118,19208,-134213907,-2120241105523909127,86,57751,1229567292,16493024289408725403,0.5536642,0.9723580396501548,TTQUwpMNSXZqVBKAFvXu7OlWvKXJKX
91 | c,3,97,29106,-903316089,2874859437662206732,207,42171,3473924576,8188072741116415408,0.32792538,0.2667177795079635,HKSMQ9nTnwXCJIte1JrM1dtYnDtJ8g
92 | b,3,-101,-13217,-346989627,5456800329302529236,26,54276,243203849,17929716297117857676,0.05422181,0.09465635123783445,MXhhH1Var3OzzJCtI9VNyYvA0q8UyJ
93 | a,2,-43,13080,370975815,5881039805148485053,2,20120,2939920218,906367167997372130,0.42733806,0.16301110515739792,m6jD0LBIQWaMfenwRCTANI9eOdyyto
94 | a,5,-101,-12484,-842693467,-6140627905445351305,57,57885,2496054700,2243924747182709810,0.59520596,0.9491397432856566,QJYm7YRA3YetcBHI5wkMZeLXVmfuNy
95 | b,5,-44,15788,-629486480,5822642169425315613,13,11872,3457053821,2413406423648025909,0.44318348,0.32869374687050157,ALuRhobVWbnQTTWZdSOk0iVe8oYFhW
96 | d,4,5,-7688,702611616,6239356364381313700,4,39363,3126475872,35363005357834672,0.3766935,0.061029375346466685,H5j5ZHy1FGesOAHjkQEDYCucbpKWRu
97 | e,1,120,10837,-1331533190,6342019705133850847,245,3975,2830981072,16439861276703750332,0.6623719,0.9965400387585364,LiEBxds3X0Uw0lxiYjDqrkAaAwoiIW
98 | e,3,-95,13611,2030965207,927403809957470678,119,59134,559847112,10966649192992996919,0.5301289,0.047343434291126085,gTpyQnEODMcpsPnJMZC66gh33i3m0b
99 | d,3,123,29533,240273900,1176001466590906949,117,30972,2592330556,12883447461717956514,0.39075065,0.38870280983958583,1aOcrEGd0cOqZe2I5XBOm0nDcwtBZO
100 | b,4,47,20690,-1009656194,-2027442591571700798,200,7781,326151275,2881913079548128905,0.57360977,0.2145232647388039,52mKlRE3aHCBZtjECq6sY9OqVf8Dze
101 | e,4,30,-16110,61035129,-3356533792537910152,159,299,28774375,13526465947516666293,0.6999775,0.03968347085780355,cq4WSAIFwx3wwTUS5bp1wCe71R6U5I
--------------------------------------------------------------------------------
/datafusion-examples/src/main/resources/aggregate_test_100.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datafusion-contrib/datafusion-java/c5dee4178ff9187de4cc7f5eb2e9ddd471223444/datafusion-examples/src/main/resources/aggregate_test_100.parquet
--------------------------------------------------------------------------------
/datafusion-examples/src/main/resources/log4j2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/datafusion-examples/src/main/resources/test_table.csv:
--------------------------------------------------------------------------------
1 | name,age
2 | John,23
3 | Alice,29
4 |
--------------------------------------------------------------------------------
/datafusion-java/build.gradle:
--------------------------------------------------------------------------------
1 | plugins {
2 | id 'java-library'
3 | id 'maven-publish'
4 | id 'signing'
5 | id 'datafusion.java-conventions'
6 | id 'com.diffplug.spotless'
7 | id 'com.google.osdetector'
8 | }
9 |
10 | dependencies {
11 | api 'org.slf4j:slf4j-api:2.0.16'
12 | api 'org.apache.arrow:arrow-format:18.0.0'
13 | api 'org.apache.arrow:arrow-vector:18.1.0'
14 | implementation 'org.apache.arrow:arrow-c-data:18.1.0'
15 | runtimeOnly 'org.apache.arrow:arrow-memory-unsafe:18.0.0'
16 | testImplementation 'org.apache.arrow:arrow-compression:14.0.2'
17 | testImplementation 'org.junit.jupiter:junit-jupiter:5.10.2'
18 | testImplementation 'org.apache.hadoop:hadoop-client:3.4.1'
19 | testImplementation 'org.apache.hadoop:hadoop-common:3.4.1'
20 | testImplementation 'org.apache.parquet:parquet-avro:1.13.1'
21 | testImplementation 'org.apache.parquet:parquet-hadoop:1.15.0'
22 | }
23 |
24 | spotless {
25 | java {
26 | googleJavaFormat()
27 | }
28 | }
29 |
30 | java {
31 | withJavadocJar()
32 | withSourcesJar()
33 |
34 | compileJava {
35 | options.compilerArgs += ["-h", "${layout.buildDirectory.asFile.get()}/target/headers"]
36 | }
37 | }
38 |
39 | javadoc {
40 | if (JavaVersion.current().isJava9Compatible()) {
41 | options.addBooleanOption('html5', true)
42 | }
43 | }
44 |
45 | test {
46 | def libraryPath = findProperty("JNI_PATH") ?: "$rootDir/datafusion-java/build/jni_libs/dev"
47 | jvmArgs += ["-Djava.library.path=$libraryPath", "--add-opens=java.base/java.nio=ALL-UNNAMED"]
48 | useJUnitPlatform()
49 | }
50 |
51 | def cargoBinary = "${System.getProperty('user.home')}/.cargo/bin/cargo"
52 |
53 | tasks.register('cargoDevBuild', Exec) {
54 | workingDir "$rootDir/datafusion-jni"
55 | executable cargoBinary
56 | args += ['build']
57 | }
58 |
59 | tasks.register('cargoReleaseBuild', Exec) {
60 | workingDir "$rootDir/datafusion-jni"
61 | executable cargoBinary
62 | args += ['build', '--release']
63 | }
64 |
65 | def extensionMapping = [
66 | "osx" : "dylib",
67 | "linux" : "so",
68 | "windows": "dll"
69 | ]
70 |
71 | tasks.register('copyDevLibrary', Sync) {
72 | def extension = extensionMapping[osdetector.os]
73 | from "${rootDir}/datafusion-jni/target/debug/libdatafusion_jni.$extension"
74 | into layout.buildDirectory.dir("jni_libs/dev")
75 | dependsOn cargoDevBuild
76 | }
77 |
78 | tasks.named("test") {
79 | dependsOn copyDevLibrary
80 | }
81 |
82 | tasks.register('copyBuiltLibrary', Copy) {
83 | def extension = extensionMapping[osdetector.os]
84 | from "${rootDir}/datafusion-jni/target/release/libdatafusion_jni.$extension"
85 | into layout.buildDirectory.dir("jni_libs/${osdetector.classifier}")
86 | dependsOn cargoReleaseBuild
87 | }
88 |
89 | def classifierOsx = 'osx-x86_64'
90 | def extensionOsx = 'dylib'
91 | def jniLibOsx = layout.buildDirectory.file("jni_libs/$classifierOsx/libdatafusion_jni.$extensionOsx")
92 |
93 | def classifierLinux = 'linux-x86_64'
94 | def extensionLinux = 'so'
95 | def jniLibLinux = layout.buildDirectory.file("jni_libs/$classifierLinux/libdatafusion_jni.$extensionLinux")
96 |
97 | def classifierWindows = "windows-x86_64"
98 | def extensionWindows = "dll"
99 | def jniLibWindows = layout.buildDirectory.file("jni_libs/$classifierWindows/datafusion_jni.$extensionWindows")
100 |
101 | tasks.register('jarWithOsxLib', Jar) {
102 | from sourceSets.main.output
103 | from jniLibOsx
104 | rename "libdatafusion_jni.$extensionOsx", "jni_libs/libdatafusion_jni.$extensionOsx"
105 | archiveClassifier.set(classifierOsx)
106 | }
107 |
108 | tasks.register('jarWithLinuxLib', Jar) {
109 | from sourceSets.main.output
110 | from jniLibLinux
111 | rename "libdatafusion_jni.$extensionLinux", "jni_libs/libdatafusion_jni.$extensionLinux"
112 | archiveClassifier.set(classifierLinux)
113 | }
114 |
115 | tasks.register('jarWithWindowsLib', Jar) {
116 | from sourceSets.main.output
117 | from jniLibWindows
118 | rename "datafusion_jni.$extensionWindows", "jni_libs/datafusion_jni.$extensionWindows"
119 | archiveClassifier.set(classifierWindows)
120 | }
121 |
122 | tasks.register('jarWithLib', Jar) {
123 | from sourceSets.main.output
124 | from jniLibOsx
125 | rename "libdatafusion_jni.$extensionOsx", "jni_libs/libdatafusion_jni.$extensionOsx"
126 | from jniLibLinux
127 | rename "libdatafusion_jni.$extensionLinux", "jni_libs/libdatafusion_jni.$extensionLinux"
128 | from jniLibWindows
129 | rename "datafusion_jni.$extensionWindows", "jni_libs/datafusion_jni.$extensionWindows"
130 | }
131 |
132 | publishing {
133 | publications {
134 | mavenJava(MavenPublication) {
135 | artifactId 'datafusion-java'
136 | artifact sourcesJar
137 | artifact javadocJar
138 | artifact jarWithLib
139 | pom {
140 | name = 'DataFusion Java'
141 | description = 'A Java binding to Apache Arrow DataFusion library'
142 | url = 'https://github.com/datafusion-contrib/datafusion-java'
143 | licenses {
144 | license {
145 | name = 'The Apache License, Version 2.0'
146 | url = 'http://www.apache.org/licenses/LICENSE-2.0.txt'
147 | }
148 | }
149 | developers {
150 | developer {
151 | id = 'dev'
152 | name = 'Apache Arrow Developers'
153 | email = 'dev@arrow.apache.org'
154 | }
155 | }
156 | scm {
157 | connection = 'scm:git:git@github.com:datafusion-contrib/datafusion-java.git'
158 | developerConnection = 'scm:git:https://github.com/datafusion-contrib/datafusion-java.git'
159 | url = 'https://github.com/datafusion-contrib/datafusion-java'
160 | }
161 | }
162 | pom.withXml {
163 | // Dependencies don't get mapped to the pom file due to using custom artifacts,
164 | // so add them here
165 | def dependenciesNode = asNode().appendNode('dependencies')
166 | def apiDependencies = configurations.api.allDependencies
167 | Set includedDependencies = []
168 | apiDependencies.each {
169 | def dependencyNode = dependenciesNode.appendNode('dependency')
170 | dependencyNode.appendNode('groupId', it.getGroup())
171 | dependencyNode.appendNode('artifactId', it.getName())
172 | dependencyNode.appendNode('version', it.getVersion())
173 | dependencyNode.appendNode('scope', 'compile')
174 | includedDependencies.add(String.format("%s:%s", it.getGroup(), it.getName()))
175 | }
176 | def implementationDependencies = configurations.implementation.allDependencies
177 | implementationDependencies.each {
178 | if (!includedDependencies.contains(String.format("%s:%s", it.getGroup(), it.getName()))) {
179 | def dependencyNode = dependenciesNode.appendNode('dependency')
180 | dependencyNode.appendNode('groupId', it.getGroup())
181 | dependencyNode.appendNode('artifactId', it.getName())
182 | dependencyNode.appendNode('version', it.getVersion())
183 | dependencyNode.appendNode('scope', 'runtime')
184 | }
185 | }
186 | def runtimeDependencies = configurations.runtimeOnly.allDependencies
187 | runtimeDependencies.each {
188 | def dependencyNode = dependenciesNode.appendNode('dependency')
189 | dependencyNode.appendNode('groupId', it.getGroup())
190 | dependencyNode.appendNode('artifactId', it.getName())
191 | dependencyNode.appendNode('version', it.getVersion())
192 | dependencyNode.appendNode('scope', 'runtime')
193 | }
194 | }
195 | }
196 | }
197 | repositories {
198 | maven {
199 | name = "Sonatype"
200 | def releasesRepoUrl = "https://s01.oss.sonatype.org/service/local/staging/deploy/maven2/"
201 | def snapshotsRepoUrl = "https://s01.oss.sonatype.org/content/repositories/snapshots/"
202 | url = version.endsWith('SNAPSHOT') ? snapshotsRepoUrl : releasesRepoUrl
203 | credentials {
204 | username = findProperty('ossrhUsername') ?: System.getenv("MAVEN_USERNAME")
205 | password = findProperty('ossrhPassword') ?: System.getenv("MAVEN_PASSWORD")
206 | }
207 | }
208 | }
209 | }
210 |
211 | def artifacts = publishing.publications.mavenJava.artifacts
212 |
213 | if (jniLibLinux.get().asFile.exists()) {
214 | artifacts.artifact jarWithLinuxLib
215 | }
216 |
217 | if (jniLibOsx.get().asFile.exists()) {
218 | artifacts.artifact jarWithOsxLib
219 | }
220 |
221 | if (jniLibWindows.get().asFile.exists()) {
222 | artifacts.artifact jarWithWindowsLib
223 | }
224 |
225 |
226 | signing {
227 | required { !version.endsWith("SNAPSHOT") && gradle.taskGraph.hasTask("publish") }
228 | def signingKeyId = findProperty("signingKeyId")
229 | def signingKey = findProperty("signingKey")
230 | def signingPassword = findProperty("signingPassword")
231 | useInMemoryPgpKeys(signingKeyId, signingKey, signingPassword)
232 | sign publishing.publications.mavenJava
233 | }
234 |
--------------------------------------------------------------------------------
/datafusion-java/src/main/java/org/apache/arrow/datafusion/AbstractProxy.java:
--------------------------------------------------------------------------------
1 | package org.apache.arrow.datafusion;
2 |
3 | import java.util.concurrent.ConcurrentHashMap;
4 | import java.util.concurrent.ConcurrentMap;
5 | import java.util.concurrent.atomic.AtomicBoolean;
6 | import org.slf4j.Logger;
7 | import org.slf4j.LoggerFactory;
8 |
9 | abstract class AbstractProxy implements AutoCloseable, NativeProxy {
10 | private static final Logger logger = LoggerFactory.getLogger(AbstractProxy.class);
11 | private final long pointer;
12 | private final AtomicBoolean closed;
13 | private final ConcurrentMap children;
14 |
15 | protected AbstractProxy(long pointer) {
16 | this.pointer = pointer;
17 | if (logger.isDebugEnabled()) {
18 | logger.debug("Obtaining {}@{}", getClass().getSimpleName(), Long.toHexString(pointer));
19 | }
20 | this.closed = new AtomicBoolean(false);
21 | this.children = new ConcurrentHashMap<>();
22 | }
23 |
24 | /**
25 | * Register a child proxy object that should be closed when this object is closed
26 | *
27 | * @param child the child proxy to register
28 | */
29 | protected final void registerChild(AbstractProxy child) {
30 | AbstractProxy old = children.putIfAbsent(child.getPointer(), child);
31 | if (old != null) {
32 | logger.warn("duplicated registry for {}: {}", child.getPointer(), old);
33 | }
34 | }
35 |
36 | /**
37 | * @return Whether the object has been closed
38 | */
39 | protected final boolean isClosed() {
40 | return closed.get();
41 | }
42 |
43 | @Override
44 | public final long getPointer() {
45 | return pointer;
46 | }
47 |
48 | abstract void doClose(long pointer) throws Exception;
49 |
50 | // Ensure native library is loaded before any proxy object is used
51 | static {
52 | JNILoader.load();
53 | }
54 |
55 | @Override
56 | public final void close() throws Exception {
57 | if (closed.compareAndSet(false, true)) {
58 | for (AbstractProxy child : children.values()) {
59 | // detection to avoid cycle
60 | if (!child.isClosed()) {
61 | child.close();
62 | }
63 | }
64 | if (logger.isDebugEnabled()) {
65 | logger.debug("Closing {}@{}", getClass().getSimpleName(), Long.toHexString(pointer));
66 | }
67 | doClose(pointer);
68 | } else {
69 | logger.warn("Failed to close {}, maybe already closed?", getPointer());
70 | }
71 | }
72 | }
73 |
--------------------------------------------------------------------------------
/datafusion-java/src/main/java/org/apache/arrow/datafusion/ArrowFormat.java:
--------------------------------------------------------------------------------
1 | package org.apache.arrow.datafusion;
2 |
3 | /** The Apache Arrow IPC file format configuration. This format is also known as Feather V2 */
4 | public class ArrowFormat extends AbstractProxy implements FileFormat {
5 | /** Create a new ArrowFormat with default options */
6 | public ArrowFormat() {
7 | super(FileFormats.createArrow());
8 | }
9 |
10 | @Override
11 | void doClose(long pointer) {
12 | FileFormats.destroyFileFormat(pointer);
13 | }
14 | }
15 |
--------------------------------------------------------------------------------
/datafusion-java/src/main/java/org/apache/arrow/datafusion/CsvFormat.java:
--------------------------------------------------------------------------------
1 | package org.apache.arrow.datafusion;
2 |
3 | /** The CSV file format configuration */
4 | public class CsvFormat extends AbstractProxy implements FileFormat {
5 | /** Create new CSV format with default options */
6 | public CsvFormat() {
7 | super(FileFormats.createCsv());
8 | }
9 |
10 | @Override
11 | void doClose(long pointer) {
12 | FileFormats.destroyFileFormat(pointer);
13 | }
14 | }
15 |
--------------------------------------------------------------------------------
/datafusion-java/src/main/java/org/apache/arrow/datafusion/DataFrame.java:
--------------------------------------------------------------------------------
1 | package org.apache.arrow.datafusion;
2 |
3 | import java.nio.file.Path;
4 | import java.util.concurrent.CompletableFuture;
5 | import org.apache.arrow.memory.BufferAllocator;
6 | import org.apache.arrow.vector.ipc.ArrowReader;
7 |
8 | /**
9 | * A dataframe is a rectangle shaped data that holds columns and rows, and can be {@link
10 | * #collect(BufferAllocator) collected} into {@link
11 | * org.apache.arrow.vector.ipc.message.ArrowRecordBatch batches} and read via {@link ArrowReader
12 | * reader}.
13 | */
14 | public interface DataFrame extends NativeProxy {
15 | /**
16 | * Collect dataframe into a list of record batches
17 | *
18 | * @param allocator {@link BufferAllocator buffer allocator} to allocate vectors within Reader
19 | * @return {@link ArrowReader reader} instance to extract the data, you are expected to {@link
20 | * ArrowReader#close()} it after usage to release memory
21 | */
22 | CompletableFuture collect(BufferAllocator allocator);
23 |
24 | /**
25 | * Execute this DataFrame and return a stream of the result data
26 | *
27 | * @param allocator {@link BufferAllocator buffer allocator} to allocate vectors for the stream
28 | * @return Stream of results
29 | */
30 | CompletableFuture executeStream(BufferAllocator allocator);
31 |
32 | /**
33 | * Print results.
34 | *
35 | * @return null
36 | */
37 | CompletableFuture show();
38 |
39 | /**
40 | * Write results to a parquet file.
41 | *
42 | * @param path path to write parquet file to
43 | * @return null
44 | */
45 | CompletableFuture writeParquet(Path path);
46 |
47 | /**
48 | * Write results to a csv file.
49 | *
50 | * @param path path to write csv file to
51 | * @return null
52 | */
53 | CompletableFuture writeCsv(Path path);
54 |
55 | /**
56 | * Converts this DataFrame into a TableProvider that can be registered as a table view using
57 | * {@link SessionContext#registerParquet(String, Path)}
58 | *
59 | * @return the table provider ready to be e.g. {@link SessionContext#registerTable(String,
60 | * TableProvider) registered}.
61 | */
62 | TableProvider intoView();
63 | }
64 |
--------------------------------------------------------------------------------
/datafusion-java/src/main/java/org/apache/arrow/datafusion/DataFrames.java:
--------------------------------------------------------------------------------
1 | package org.apache.arrow.datafusion;
2 |
3 | import java.util.function.BiConsumer;
4 | import java.util.function.Consumer;
5 |
6 | /** helper class that calls into native stack for {@link DataFrame} */
7 | final class DataFrames {
8 |
9 | private DataFrames() {}
10 |
11 | static native void destroyDataFrame(long pointer);
12 |
13 | static native void showDataframe(long runtime, long dataframe, Consumer callback);
14 |
15 | static native void collectDataframe(
16 | long runtime, long dataframe, BiConsumer callback);
17 |
18 | static native void executeStream(long runtime, long dataframe, ObjectResultCallback callback);
19 |
20 | static native void writeParquet(
21 | long runtime, long dataframe, String path, Consumer callback);
22 |
23 | static native void writeCsv(long runtime, long dataframe, String path, Consumer callback);
24 |
25 | static native long intoView(long dataframe);
26 | }
27 |
--------------------------------------------------------------------------------
/datafusion-java/src/main/java/org/apache/arrow/datafusion/DefaultDataFrame.java:
--------------------------------------------------------------------------------
1 | package org.apache.arrow.datafusion;
2 |
3 | import java.nio.file.Path;
4 | import java.util.concurrent.CompletableFuture;
5 | import org.apache.arrow.memory.BufferAllocator;
6 | import org.apache.arrow.vector.ipc.ArrowFileReader;
7 | import org.apache.arrow.vector.ipc.ArrowReader;
8 | import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel;
9 | import org.slf4j.Logger;
10 | import org.slf4j.LoggerFactory;
11 |
12 | class DefaultDataFrame extends AbstractProxy implements DataFrame {
13 |
14 | private static final Logger logger = LoggerFactory.getLogger(DefaultDataFrame.class);
15 | private final SessionContext context;
16 |
17 | DefaultDataFrame(SessionContext context, long pointer) {
18 | super(pointer);
19 | this.context = context;
20 | }
21 |
22 | @Override
23 | public CompletableFuture collect(BufferAllocator allocator) {
24 | CompletableFuture result = new CompletableFuture<>();
25 | Runtime runtime = context.getRuntime();
26 | long runtimePointer = runtime.getPointer();
27 | long dataframe = getPointer();
28 | DataFrames.collectDataframe(
29 | runtimePointer,
30 | dataframe,
31 | (String errString, byte[] arr) -> {
32 | if (ErrorUtil.containsError(errString)) {
33 | result.completeExceptionally(new RuntimeException(errString));
34 | } else {
35 | logger.info("successfully completed with arr length={}", arr.length);
36 | ByteArrayReadableSeekableByteChannel byteChannel =
37 | new ByteArrayReadableSeekableByteChannel(arr);
38 | result.complete(new ArrowFileReader(byteChannel, allocator));
39 | }
40 | });
41 | return result;
42 | }
43 |
44 | @Override
45 | public CompletableFuture executeStream(BufferAllocator allocator) {
46 | CompletableFuture result = new CompletableFuture<>();
47 | Runtime runtime = context.getRuntime();
48 | long runtimePointer = runtime.getPointer();
49 | long dataframe = getPointer();
50 | DataFrames.executeStream(
51 | runtimePointer,
52 | dataframe,
53 | (errString, streamId) -> {
54 | if (ErrorUtil.containsError(errString)) {
55 | result.completeExceptionally(new RuntimeException(errString));
56 | } else {
57 | result.complete(new DefaultRecordBatchStream(context, streamId, allocator));
58 | }
59 | });
60 | return result;
61 | }
62 |
63 | @Override
64 | public CompletableFuture show() {
65 | Runtime runtime = context.getRuntime();
66 | long runtimePointer = runtime.getPointer();
67 | long dataframe = getPointer();
68 | CompletableFuture future = new CompletableFuture<>();
69 | DataFrames.showDataframe(
70 | runtimePointer,
71 | dataframe,
72 | (String errString) -> {
73 | if (ErrorUtil.containsError(errString)) {
74 | future.completeExceptionally(new RuntimeException(errString));
75 | } else {
76 | future.complete(null);
77 | }
78 | });
79 | return future;
80 | }
81 |
82 | @Override
83 | public CompletableFuture writeParquet(Path path) {
84 | Runtime runtime = context.getRuntime();
85 | long runtimePointer = runtime.getPointer();
86 | long dataframe = getPointer();
87 | CompletableFuture future = new CompletableFuture<>();
88 | DataFrames.writeParquet(
89 | runtimePointer,
90 | dataframe,
91 | path.toAbsolutePath().toString(),
92 | (String errString) -> {
93 | if (ErrorUtil.containsError(errString)) {
94 | future.completeExceptionally(new RuntimeException(errString));
95 | } else {
96 | future.complete(null);
97 | }
98 | });
99 | return future;
100 | }
101 |
102 | @Override
103 | public CompletableFuture writeCsv(Path path) {
104 | Runtime runtime = context.getRuntime();
105 | long runtimePointer = runtime.getPointer();
106 | long dataframe = getPointer();
107 | CompletableFuture future = new CompletableFuture<>();
108 | DataFrames.writeCsv(
109 | runtimePointer,
110 | dataframe,
111 | path.toAbsolutePath().toString(),
112 | (String errString) -> {
113 | if (ErrorUtil.containsError(errString)) {
114 | future.completeExceptionally(new RuntimeException(errString));
115 | } else {
116 | future.complete(null);
117 | }
118 | });
119 | return future;
120 | }
121 |
122 | @Override
123 | public TableProvider intoView() {
124 | long dataframe = getPointer();
125 | long tableProviderPointer = DataFrames.intoView(dataframe);
126 | return new DefaultTableProvider(tableProviderPointer);
127 | }
128 |
129 | @Override
130 | void doClose(long pointer) {
131 | DataFrames.destroyDataFrame(pointer);
132 | }
133 | }
134 |
--------------------------------------------------------------------------------
/datafusion-java/src/main/java/org/apache/arrow/datafusion/DefaultRecordBatchStream.java:
--------------------------------------------------------------------------------
1 | package org.apache.arrow.datafusion;
2 |
3 | import java.util.Set;
4 | import java.util.concurrent.CompletableFuture;
5 | import org.apache.arrow.c.ArrowArray;
6 | import org.apache.arrow.c.ArrowSchema;
7 | import org.apache.arrow.c.CDataDictionaryProvider;
8 | import org.apache.arrow.c.Data;
9 | import org.apache.arrow.memory.BufferAllocator;
10 | import org.apache.arrow.vector.VectorSchemaRoot;
11 | import org.apache.arrow.vector.dictionary.Dictionary;
12 | import org.apache.arrow.vector.types.pojo.Schema;
13 |
14 | class DefaultRecordBatchStream extends AbstractProxy implements RecordBatchStream {
15 | private final SessionContext context;
16 | private final BufferAllocator allocator;
17 | private final CDataDictionaryProvider dictionaryProvider;
18 | private VectorSchemaRoot vectorSchemaRoot = null;
19 | private boolean initialized = false;
20 |
21 | DefaultRecordBatchStream(SessionContext context, long pointer, BufferAllocator allocator) {
22 | super(pointer);
23 | this.context = context;
24 | this.allocator = allocator;
25 | this.dictionaryProvider = new CDataDictionaryProvider();
26 | }
27 |
28 | @Override
29 | void doClose(long pointer) {
30 | destroy(pointer);
31 | dictionaryProvider.close();
32 | if (initialized) {
33 | vectorSchemaRoot.close();
34 | }
35 | }
36 |
37 | @Override
38 | public VectorSchemaRoot getVectorSchemaRoot() {
39 | ensureInitialized();
40 | return vectorSchemaRoot;
41 | }
42 |
43 | @Override
44 | public CompletableFuture loadNextBatch() {
45 | ensureInitialized();
46 | Runtime runtime = context.getRuntime();
47 | long runtimePointer = runtime.getPointer();
48 | long recordBatchStream = getPointer();
49 | CompletableFuture result = new CompletableFuture<>();
50 | next(
51 | runtimePointer,
52 | recordBatchStream,
53 | (errString, arrowArrayAddress) -> {
54 | if (ErrorUtil.containsError(errString)) {
55 | result.completeExceptionally(new RuntimeException(errString));
56 | } else if (arrowArrayAddress == 0) {
57 | // Reached end of stream
58 | result.complete(false);
59 | } else {
60 | try {
61 | ArrowArray arrowArray = ArrowArray.wrap(arrowArrayAddress);
62 | Data.importIntoVectorSchemaRoot(
63 | allocator, arrowArray, vectorSchemaRoot, dictionaryProvider);
64 | result.complete(true);
65 | } catch (Exception e) {
66 | result.completeExceptionally(e);
67 | }
68 | }
69 | });
70 | return result;
71 | }
72 |
73 | @Override
74 | public Dictionary lookup(long id) {
75 | return dictionaryProvider.lookup(id);
76 | }
77 |
78 | @Override
79 | public Set getDictionaryIds() {
80 | return dictionaryProvider.getDictionaryIds();
81 | }
82 |
83 | private void ensureInitialized() {
84 | if (!initialized) {
85 | Schema schema = getSchema();
86 | this.vectorSchemaRoot = VectorSchemaRoot.create(schema, allocator);
87 | }
88 | initialized = true;
89 | }
90 |
91 | private Schema getSchema() {
92 | long recordBatchStream = getPointer();
93 | // Native method is not async, but use a future to store the result for convenience
94 | CompletableFuture result = new CompletableFuture<>();
95 | getSchema(
96 | recordBatchStream,
97 | (errString, arrowSchemaAddress) -> {
98 | if (ErrorUtil.containsError(errString)) {
99 | result.completeExceptionally(new RuntimeException(errString));
100 | } else {
101 | try {
102 | ArrowSchema arrowSchema = ArrowSchema.wrap(arrowSchemaAddress);
103 | Schema schema = Data.importSchema(allocator, arrowSchema, dictionaryProvider);
104 | result.complete(schema);
105 | // The FFI schema will be released from rust when it is dropped
106 | } catch (Exception e) {
107 | result.completeExceptionally(e);
108 | }
109 | }
110 | });
111 | return result.join();
112 | }
113 |
114 | private static native void getSchema(long pointer, ObjectResultCallback callback);
115 |
116 | private static native void next(long runtime, long pointer, ObjectResultCallback callback);
117 |
118 | private static native void destroy(long pointer);
119 | }
120 |
--------------------------------------------------------------------------------
/datafusion-java/src/main/java/org/apache/arrow/datafusion/DefaultSessionContext.java:
--------------------------------------------------------------------------------
1 | package org.apache.arrow.datafusion;
2 |
3 | import java.nio.file.Path;
4 | import java.util.Optional;
5 | import java.util.concurrent.CompletableFuture;
6 | import java.util.function.Consumer;
7 | import org.slf4j.Logger;
8 | import org.slf4j.LoggerFactory;
9 |
10 | class DefaultSessionContext extends AbstractProxy implements SessionContext {
11 |
12 | private static final Logger logger = LoggerFactory.getLogger(DefaultSessionContext.class);
13 |
14 | static native void querySql(
15 | long runtime, long context, String sql, ObjectResultCallback callback);
16 |
17 | static native void registerCsv(
18 | long runtime, long context, String name, String path, Consumer callback);
19 |
20 | static native void registerParquet(
21 | long runtime, long context, String name, String path, Consumer callback);
22 |
23 | static native long registerTable(long context, String name, long tableProvider) throws Exception;
24 |
25 | @Override
26 | public CompletableFuture sql(String sql) {
27 | long runtime = getRuntime().getPointer();
28 | CompletableFuture future = new CompletableFuture<>();
29 | querySql(
30 | runtime,
31 | getPointer(),
32 | sql,
33 | (errMessage, dataframeId) -> {
34 | if (null != errMessage && !errMessage.isEmpty()) {
35 | future.completeExceptionally(new RuntimeException(errMessage));
36 | } else {
37 | DefaultDataFrame frame = new DefaultDataFrame(DefaultSessionContext.this, dataframeId);
38 | future.complete(frame);
39 | }
40 | });
41 | return future;
42 | }
43 |
44 | @Override
45 | public CompletableFuture registerCsv(String name, Path path) {
46 | long runtime = getRuntime().getPointer();
47 | CompletableFuture future = new CompletableFuture<>();
48 | registerCsv(
49 | runtime,
50 | getPointer(),
51 | name,
52 | path.toAbsolutePath().toString(),
53 | (errMessage) -> voidCallback(future, errMessage));
54 | return future;
55 | }
56 |
57 | @Override
58 | public CompletableFuture registerParquet(String name, Path path) {
59 | long runtime = getRuntime().getPointer();
60 | CompletableFuture future = new CompletableFuture<>();
61 | registerParquet(
62 | runtime,
63 | getPointer(),
64 | name,
65 | path.toAbsolutePath().toString(),
66 | (errMessage) -> voidCallback(future, errMessage));
67 | return future;
68 | }
69 |
70 | @Override
71 | public Optional registerTable(String name, TableProvider tableProvider)
72 | throws Exception {
73 | long previouslyRegistered = registerTable(getPointer(), name, tableProvider.getPointer());
74 | if (previouslyRegistered == 0) {
75 | return Optional.empty();
76 | }
77 | return Optional.of(new DefaultTableProvider(previouslyRegistered));
78 | }
79 |
80 | private void voidCallback(CompletableFuture future, String errMessage) {
81 | if (null != errMessage && !errMessage.isEmpty()) {
82 | future.completeExceptionally(new RuntimeException(errMessage));
83 | } else {
84 | future.complete(null);
85 | }
86 | }
87 |
88 | @Override
89 | public Runtime getRuntime() {
90 | return runtime;
91 | }
92 |
93 | private final TokioRuntime runtime;
94 |
95 | DefaultSessionContext(long pointer) {
96 | super(pointer);
97 | this.runtime = TokioRuntime.create();
98 | registerChild(runtime);
99 | }
100 |
101 | @Override
102 | void doClose(long pointer) throws Exception {
103 | SessionContexts.destroySessionContext(pointer);
104 | }
105 | }
106 |
--------------------------------------------------------------------------------
/datafusion-java/src/main/java/org/apache/arrow/datafusion/DefaultTableProvider.java:
--------------------------------------------------------------------------------
1 | package org.apache.arrow.datafusion;
2 |
3 | class DefaultTableProvider extends AbstractProxy implements TableProvider {
4 | DefaultTableProvider(long pointer) {
5 | super(pointer);
6 | }
7 |
8 | @Override
9 | void doClose(long pointer) throws Exception {
10 | TableProviders.destroyTableProvider(pointer);
11 | }
12 | }
13 |
--------------------------------------------------------------------------------
/datafusion-java/src/main/java/org/apache/arrow/datafusion/ErrorUtil.java:
--------------------------------------------------------------------------------
1 | package org.apache.arrow.datafusion;
2 |
3 | class ErrorUtil {
4 |
5 | private ErrorUtil() {}
6 |
7 | static boolean containsError(String errString) {
8 | return errString != null && !errString.isEmpty();
9 | }
10 | }
11 |
--------------------------------------------------------------------------------
/datafusion-java/src/main/java/org/apache/arrow/datafusion/ExecutionOptions.java:
--------------------------------------------------------------------------------
1 | package org.apache.arrow.datafusion;
2 |
3 | /** Configures options related to query execution */
4 | @SuppressWarnings("UnusedReturnValue")
5 | public class ExecutionOptions {
6 | private final SessionConfig config;
7 |
8 | ExecutionOptions(SessionConfig config) {
9 | this.config = config;
10 | }
11 |
12 | /**
13 | * Get execution options related to reading Parquet data
14 | *
15 | * @return {@link ParquetOptions} instance for this config
16 | */
17 | public ParquetOptions parquet() {
18 | return new ParquetOptions(config);
19 | }
20 |
21 | /**
22 | * Get the batch size
23 | *
24 | * @return batch size
25 | */
26 | public long batchSize() {
27 | return SessionConfig.getExecutionOptionsBatchSize(config.getPointer());
28 | }
29 |
30 | /**
31 | * Set the size of batches to use when creating new data batches
32 | *
33 | * @param batchSize the batch size to set
34 | * @return the modified {@link ExecutionOptions} instance
35 | */
36 | public ExecutionOptions withBatchSize(long batchSize) {
37 | SessionConfig.setExecutionOptionsBatchSize(config.getPointer(), batchSize);
38 | return this;
39 | }
40 |
41 | /**
42 | * Get whether batch coalescing is enabled
43 | *
44 | * @return whether batch coalescing is enabled
45 | */
46 | public boolean coalesceBatches() {
47 | return SessionConfig.getExecutionOptionsCoalesceBatches(config.getPointer());
48 | }
49 |
50 | /**
51 | * Set whether to enable batch coalescing
52 | *
53 | * @param enabled whether to enable batch coalescing
54 | * @return the modified {@link ExecutionOptions} instance
55 | */
56 | public ExecutionOptions withCoalesceBatches(boolean enabled) {
57 | SessionConfig.setExecutionOptionsCoalesceBatches(config.getPointer(), enabled);
58 | return this;
59 | }
60 |
61 | /**
62 | * Get whether statistics collection is enabled
63 | *
64 | * @return whether statistics collection is enabled
65 | */
66 | public boolean collectStatistics() {
67 | return SessionConfig.getExecutionOptionsCollectStatistics(config.getPointer());
68 | }
69 |
70 | /**
71 | * Set whether to enable statistics collection
72 | *
73 | * @param enabled whether to enable statistics collection
74 | * @return the modified {@link ExecutionOptions} instance
75 | */
76 | public ExecutionOptions withCollectStatistics(boolean enabled) {
77 | SessionConfig.setExecutionOptionsCollectStatistics(config.getPointer(), enabled);
78 | return this;
79 | }
80 |
81 | /**
82 | * Get the target number of partitions
83 | *
84 | * @return number of partitions
85 | */
86 | public long targetPartitions() {
87 | return SessionConfig.getExecutionOptionsTargetPartitions(config.getPointer());
88 | }
89 |
90 | /**
91 | * Set the target number of partitions
92 | *
93 | * @param targetPartitions the number of partitions to set
94 | * @return the modified {@link ExecutionOptions} instance
95 | */
96 | public ExecutionOptions withTargetPartitions(long targetPartitions) {
97 | SessionConfig.setExecutionOptionsTargetPartitions(config.getPointer(), targetPartitions);
98 | return this;
99 | }
100 | }
101 |
--------------------------------------------------------------------------------
/datafusion-java/src/main/java/org/apache/arrow/datafusion/FileFormat.java:
--------------------------------------------------------------------------------
1 | package org.apache.arrow.datafusion;
2 |
3 | /** Interface for file formats that can provide table data */
4 | public interface FileFormat extends AutoCloseable, NativeProxy {}
5 |
--------------------------------------------------------------------------------
/datafusion-java/src/main/java/org/apache/arrow/datafusion/FileFormats.java:
--------------------------------------------------------------------------------
1 | package org.apache.arrow.datafusion;
2 |
3 | class FileFormats {
4 |
5 | private FileFormats() {}
6 |
7 | static native long createArrow();
8 |
9 | static native long createCsv();
10 |
11 | static native long createParquet();
12 |
13 | static native void destroyFileFormat(long pointer);
14 | }
15 |
--------------------------------------------------------------------------------
/datafusion-java/src/main/java/org/apache/arrow/datafusion/JNILoader.java:
--------------------------------------------------------------------------------
1 | package org.apache.arrow.datafusion;
2 |
3 | import java.io.File;
4 | import java.io.FileOutputStream;
5 | import java.io.IOException;
6 | import java.io.InputStream;
7 | import java.util.concurrent.atomic.AtomicBoolean;
8 | import org.slf4j.Logger;
9 | import org.slf4j.LoggerFactory;
10 |
11 | final class JNILoader {
12 |
13 | private JNILoader() {}
14 |
15 | private static final Logger logger = LoggerFactory.getLogger(JNILoader.class);
16 |
17 | private static final AtomicBoolean loaded = new AtomicBoolean(false);
18 |
19 | private enum OsName {
20 | Windows,
21 | Osx,
22 | Linux
23 | }
24 |
25 | private static final String libraryName = "datafusion_jni";
26 |
27 | private static final String ERROR_MSG =
28 | String.format(
29 | "Unsupported OS/arch (`%s' detected), cannot find `%s' or load `%s' from system libraries. "
30 | + "Please try building from source the jar or providing %s in your system.",
31 | getOsName(), getResourceName(), libraryName, libraryName);
32 |
33 | private static OsName getOsName() {
34 | String os = System.getProperty("os.name").toLowerCase().replace(' ', '_');
35 | if (os.contains("win")) {
36 | return OsName.Windows;
37 | } else if (os.startsWith("mac") || os.contains("os_x")) {
38 | return OsName.Osx;
39 | } else {
40 | return OsName.Linux;
41 | }
42 | }
43 |
44 | private static String getLibraryFileName() {
45 | String prefix = "lib";
46 | if (getOsName() == OsName.Windows) {
47 | prefix = "";
48 | }
49 | return prefix + libraryName + "." + getExtension();
50 | }
51 |
52 | /**
53 | * @return the absolute path in the jar file for the jni library
54 | */
55 | private static String getResourceName() {
56 | return "/jni_libs/" + getLibraryFileName();
57 | }
58 |
59 | private static String getExtension() {
60 | OsName osName = getOsName();
61 | if (osName == OsName.Linux) {
62 | return "so";
63 | } else if (osName == OsName.Osx) {
64 | return "dylib";
65 | } else if (osName == OsName.Windows) {
66 | return "dll";
67 | }
68 | throw new IllegalStateException("Cannot determine the extension for " + osName);
69 | }
70 |
71 | static synchronized void load() {
72 | if (loaded.get()) {
73 | logger.debug("{} already loaded, returning", libraryName);
74 | return;
75 | }
76 | InputStream is = JNILoader.class.getResourceAsStream(getResourceName());
77 | if (is == null) {
78 | try {
79 | System.loadLibrary(libraryName);
80 | loaded.set(true);
81 | logger.debug("successfully loaded {} from library path", libraryName);
82 | return;
83 | } catch (UnsatisfiedLinkError e) {
84 | UnsatisfiedLinkError err =
85 | new UnsatisfiedLinkError(String.format("%s\n%s", e.getMessage(), ERROR_MSG));
86 | err.setStackTrace(e.getStackTrace());
87 | throw err;
88 | }
89 | }
90 | final File tempFile = extractToTempFile(is);
91 | try {
92 | System.load(tempFile.getAbsolutePath());
93 | } catch (UnsatisfiedLinkError le1) {
94 | // fall-back to loading from the system library path
95 | try {
96 | System.loadLibrary(libraryName);
97 | logger.debug("successfully loaded {} from extracted lib file", libraryName);
98 | loaded.set(true);
99 | } catch (UnsatisfiedLinkError le2) {
100 | // display error in case problem with loading from temp folder
101 | // and from system library path - concatenate both messages
102 | UnsatisfiedLinkError err =
103 | new UnsatisfiedLinkError(
104 | String.format("%s\n%s\n%s", le1.getMessage(), le2.getMessage(), ERROR_MSG));
105 | err.setStackTrace(le2.getStackTrace());
106 | throw err;
107 | }
108 | }
109 | }
110 |
111 | private static File extractToTempFile(InputStream is) {
112 | final File tempFile;
113 | try {
114 | tempFile = File.createTempFile(libraryName, "." + getExtension(), null);
115 | tempFile.deleteOnExit();
116 | } catch (IOException e) {
117 | throw new IllegalStateException("Cannot create temporary files", e);
118 | }
119 | try (InputStream in = is;
120 | FileOutputStream out = new FileOutputStream(tempFile)) {
121 | byte[] buf = new byte[8192];
122 | while (true) {
123 | int read = in.read(buf);
124 | if (read == -1) {
125 | break;
126 | }
127 | out.write(buf, 0, read);
128 | }
129 | } catch (IOException e) {
130 | throw new IllegalStateException("Failed to extract lib file and write to temp file", e);
131 | }
132 | return tempFile;
133 | }
134 | }
135 |
--------------------------------------------------------------------------------
/datafusion-java/src/main/java/org/apache/arrow/datafusion/ListingOptions.java:
--------------------------------------------------------------------------------
1 | package org.apache.arrow.datafusion;
2 |
3 | /** Configures options for a {@link ListingTable} */
4 | public class ListingOptions extends AbstractProxy implements AutoCloseable {
5 | /** A Builder for {@link ListingOptions} instances */
6 | public static class Builder {
7 | private final FileFormat format;
8 | private String fileExtension = "";
9 | private boolean collectStat = true;
10 |
11 | /**
12 | * Create a new {@link ListingOptions} builder
13 | *
14 | * @param format The file format used by data files in the listing table
15 | */
16 | public Builder(FileFormat format) {
17 | this.format = format;
18 | }
19 |
20 | /**
21 | * Specify a suffix used to filter files in the listing location
22 | *
23 | * @param fileExtension The file suffix to filter on
24 | * @return This builder
25 | */
26 | public Builder withFileExtension(String fileExtension) {
27 | this.fileExtension = fileExtension;
28 | return this;
29 | }
30 |
31 | /**
32 | * Specify whether to collect statistics from files
33 | *
34 | * @param collectStat whether to collect statistics
35 | * @return This builder
36 | */
37 | public Builder withCollectStat(boolean collectStat) {
38 | this.collectStat = collectStat;
39 | return this;
40 | }
41 |
42 | /**
43 | * Build a new {@link ListingOptions} instance from the configured builder
44 | *
45 | * @return The built {@link ListingOptions}
46 | */
47 | public ListingOptions build() {
48 | return new ListingOptions(this);
49 | }
50 | }
51 |
52 | /**
53 | * Create a builder for listing options
54 | *
55 | * @param format The file format used by data files in the listing table
56 | * @return A new {@link Builder} instance
57 | */
58 | public static Builder builder(FileFormat format) {
59 | return new Builder(format);
60 | }
61 |
62 | /**
63 | * Construct ListingOptions from a Builder
64 | *
65 | * @param builder The builder to use
66 | */
67 | private ListingOptions(Builder builder) {
68 | super(create(builder.format.getPointer(), builder.fileExtension, builder.collectStat));
69 | }
70 |
71 | @Override
72 | void doClose(long pointer) {
73 | destroy(pointer);
74 | }
75 |
76 | private static native long create(long format, String fileExtension, boolean collectStat);
77 |
78 | private static native void destroy(long pointer);
79 | }
80 |
--------------------------------------------------------------------------------
/datafusion-java/src/main/java/org/apache/arrow/datafusion/ListingTable.java:
--------------------------------------------------------------------------------
1 | package org.apache.arrow.datafusion;
2 |
3 | import java.util.concurrent.CompletableFuture;
4 |
5 | /** A data source composed of multiple files that share a schema */
6 | public class ListingTable extends AbstractProxy implements TableProvider {
7 | /**
8 | * Create a new listing table
9 | *
10 | * @param config The listing table configuration
11 | */
12 | public ListingTable(ListingTableConfig config) {
13 | super(createListingTable(config));
14 | }
15 |
16 | private static long createListingTable(ListingTableConfig config) {
17 | CompletableFuture result = new CompletableFuture<>();
18 | create(
19 | config.getPointer(),
20 | (errString, tableId) -> {
21 | if (ErrorUtil.containsError(errString)) {
22 | result.completeExceptionally(new RuntimeException(errString));
23 | } else {
24 | result.complete(tableId);
25 | }
26 | });
27 | return result.join();
28 | }
29 |
30 | @Override
31 | void doClose(long pointer) {
32 | TableProviders.destroyTableProvider(pointer);
33 | }
34 |
35 | private static native void create(long config, ObjectResultCallback result);
36 | }
37 |
--------------------------------------------------------------------------------
/datafusion-java/src/main/java/org/apache/arrow/datafusion/ListingTableConfig.java:
--------------------------------------------------------------------------------
1 | package org.apache.arrow.datafusion;
2 |
3 | import java.net.URI;
4 | import java.nio.file.Path;
5 | import java.util.Arrays;
6 | import java.util.concurrent.CompletableFuture;
7 |
8 | /** Configuration for creating a {@link ListingTable} */
9 | public class ListingTableConfig extends AbstractProxy implements AutoCloseable {
10 | /** A Builder for {@link ListingTableConfig} instances */
11 | public static class Builder {
12 | private final String[] tablePaths;
13 | private ListingOptions options = null;
14 |
15 | /**
16 | * Create a new {@link Builder}
17 | *
18 | * @param tablePath The path where data files are stored. This may be a file system path or a
19 | * URL with a scheme. When no scheme is provided, glob expressions may be used to filter
20 | * files.
21 | */
22 | public Builder(String tablePath) {
23 | this(new String[] {tablePath});
24 | }
25 |
26 | /**
27 | * Create a new {@link Builder}
28 | *
29 | * @param tablePaths The paths where data files are stored. This may be an array of file system
30 | * paths or an array of URLs with a scheme. When no scheme is provided, glob expressions may
31 | * be used to filter files.
32 | */
33 | public Builder(String[] tablePaths) {
34 | this.tablePaths = tablePaths;
35 | }
36 |
37 | /**
38 | * Specify the {@link ListingOptions} to use
39 | *
40 | * @param options The {@link ListingOptions} to use
41 | * @return this Builder instance
42 | */
43 | public Builder withListingOptions(ListingOptions options) {
44 | this.options = options;
45 | return this;
46 | }
47 |
48 | /**
49 | * Create the listing table config. This is async as the schema may need to be inferred
50 | *
51 | * @param context The {@link SessionContext} to use when inferring the schema
52 | * @return Future that will complete with the table config
53 | */
54 | public CompletableFuture build(SessionContext context) {
55 | return createListingTableConfig(this, context).thenApply(ListingTableConfig::new);
56 | }
57 | }
58 |
59 | /**
60 | * Create a new {@link Builder} for a {@link ListingTableConfig}
61 | *
62 | * @param tablePath The path where data files are stored. This may be a file system path or a URL
63 | * with a scheme. When no scheme is specified, glob expressions may be used to filter files.
64 | * @return A new {@link Builder} instance
65 | */
66 | public static Builder builder(String tablePath) {
67 | return new Builder(tablePath);
68 | }
69 |
70 | /**
71 | * Create a new {@link Builder} for a {@link ListingTableConfig} from a file path
72 | *
73 | * @param tablePath The path where data files are stored
74 | * @return A new {@link Builder} instance
75 | */
76 | public static Builder builder(Path tablePath) {
77 | return new Builder(tablePath.toString());
78 | }
79 |
80 | /**
81 | * Create a new {@link Builder} for a {@link ListingTableConfig} from an array of paths
82 | *
83 | * @param tablePaths The path array where data files are stored
84 | * @return A new {@link Builder} instance
85 | */
86 | public static Builder builder(Path[] tablePaths) {
87 | String[] pathStrings =
88 | Arrays.stream(tablePaths)
89 | .map(path -> path.toString())
90 | .toArray(length -> new String[length]);
91 | return new Builder(pathStrings);
92 | }
93 |
94 | /**
95 | * Create a new {@link Builder} for a {@link ListingTableConfig} from a URI
96 | *
97 | * @param tablePath The location where data files are stored
98 | * @return A new {@link Builder} instance
99 | */
100 | public static Builder builder(URI tablePath) {
101 | return new Builder(tablePath.toString());
102 | }
103 |
104 | private ListingTableConfig(long pointer) {
105 | super(pointer);
106 | }
107 |
108 | private static CompletableFuture createListingTableConfig(
109 | Builder builder, SessionContext context) {
110 | CompletableFuture future = new CompletableFuture<>();
111 | Runtime runtime = context.getRuntime();
112 | create(
113 | runtime.getPointer(),
114 | context.getPointer(),
115 | builder.tablePaths,
116 | builder.options == null ? 0 : builder.options.getPointer(),
117 | (errMessage, configId) -> {
118 | if (ErrorUtil.containsError(errMessage)) {
119 | future.completeExceptionally(new RuntimeException(errMessage));
120 | } else {
121 | future.complete(configId);
122 | }
123 | });
124 | return future;
125 | }
126 |
127 | @Override
128 | void doClose(long pointer) {
129 | destroy(pointer);
130 | }
131 |
132 | private static native void create(
133 | long runtime, long context, String[] tablePaths, long options, ObjectResultCallback callback);
134 |
135 | private static native void destroy(long pointer);
136 | }
137 |
--------------------------------------------------------------------------------
/datafusion-java/src/main/java/org/apache/arrow/datafusion/NativeProxy.java:
--------------------------------------------------------------------------------
1 | package org.apache.arrow.datafusion;
2 |
3 | /**
4 | * A native proxy is a proxy that points to a Rust managed object so that when it requires releasing
5 | * resources the point will be used.
6 | */
7 | interface NativeProxy {
8 |
9 | /**
10 | * Get a pointer to the native object
11 | *
12 | * @return Pointer value as a long
13 | */
14 | long getPointer();
15 | }
16 |
--------------------------------------------------------------------------------
/datafusion-java/src/main/java/org/apache/arrow/datafusion/ObjectResultCallback.java:
--------------------------------------------------------------------------------
1 | package org.apache.arrow.datafusion;
2 |
3 | interface ObjectResultCallback {
4 | void callback(String errMessage, long value);
5 | }
6 |
--------------------------------------------------------------------------------
/datafusion-java/src/main/java/org/apache/arrow/datafusion/ParquetFormat.java:
--------------------------------------------------------------------------------
1 | package org.apache.arrow.datafusion;
2 |
3 | /** The Apache Parquet file format configuration */
4 | public class ParquetFormat extends AbstractProxy implements FileFormat {
5 | /** Create new ParquetFormat with default options */
6 | public ParquetFormat() {
7 | super(FileFormats.createParquet());
8 | }
9 |
10 | @Override
11 | void doClose(long pointer) {
12 | FileFormats.destroyFileFormat(pointer);
13 | }
14 | }
15 |
--------------------------------------------------------------------------------
/datafusion-java/src/main/java/org/apache/arrow/datafusion/ParquetOptions.java:
--------------------------------------------------------------------------------
1 | package org.apache.arrow.datafusion;
2 |
3 | import java.util.Optional;
4 |
5 | /** Configures options specific to reading Parquet data */
6 | @SuppressWarnings("UnusedReturnValue")
7 | public class ParquetOptions {
8 | private final SessionConfig config;
9 |
10 | ParquetOptions(SessionConfig config) {
11 | this.config = config;
12 | }
13 |
14 | /**
15 | * Get whether parquet data page level metadata (Page Index) statistics are used
16 | *
17 | * @return whether using the page index is enabled
18 | */
19 | public boolean enablePageIndex() {
20 | return SessionConfig.getParquetOptionsEnablePageIndex(config.getPointer());
21 | }
22 |
23 | /**
24 | * Set whether to use parquet data page level metadata (Page Index) statistics to reduce the
25 | * number of rows decoded.
26 | *
27 | * @param enabled whether using the page index is enabled
28 | * @return the modified {@link ParquetOptions} instance
29 | */
30 | public ParquetOptions withEnablePageIndex(boolean enabled) {
31 | SessionConfig.setParquetOptionsEnablePageIndex(config.getPointer(), enabled);
32 | return this;
33 | }
34 |
35 | /**
36 | * Get whether pruning is enabled, meaning reading row groups will be skipped based on metadata
37 | *
38 | * @return whether pruning is enabled
39 | */
40 | public boolean pruning() {
41 | return SessionConfig.getParquetOptionsPruning(config.getPointer());
42 | }
43 |
44 | /**
45 | * Set whether pruning is enabled, meaning reading row groups will be skipped based on metadata
46 | *
47 | * @param enabled whether to enable pruning
48 | * @return the modified {@link ParquetOptions} instance
49 | */
50 | public ParquetOptions withPruning(boolean enabled) {
51 | SessionConfig.setParquetOptionsPruning(config.getPointer(), enabled);
52 | return this;
53 | }
54 |
55 | /**
56 | * Get whether file metadata is skipped, to avoid schema conflicts
57 | *
58 | * @return whether metadata is skipped
59 | */
60 | public boolean skipMetadata() {
61 | return SessionConfig.getParquetOptionsSkipMetadata(config.getPointer());
62 | }
63 |
64 | /**
65 | * Set whether file metadata is skipped, to avoid schema conflicts
66 | *
67 | * @param enabled whether to skip metadata
68 | * @return the modified {@link ParquetOptions} instance
69 | */
70 | public ParquetOptions withSkipMetadata(boolean enabled) {
71 | SessionConfig.setParquetOptionsSkipMetadata(config.getPointer(), enabled);
72 | return this;
73 | }
74 |
75 | /**
76 | * Get the metadata size hint
77 | *
78 | * @return metadata size hint value
79 | */
80 | public Optional metadataSizeHint() {
81 | long sizeHint = SessionConfig.getParquetOptionsMetadataSizeHint(config.getPointer());
82 | return sizeHint < 0 ? Optional.empty() : Optional.of(sizeHint);
83 | }
84 |
85 | /**
86 | * Set the metadata size hint, which is used to attempt to read the full metadata at once rather
87 | * than needing one read to get the metadata size and then a second read for the metadata itself.
88 | *
89 | * @param metadataSizeHint the metadata size hint
90 | * @return the modified {@link ParquetOptions} instance
91 | */
92 | public ParquetOptions withMetadataSizeHint(Optional metadataSizeHint) {
93 | long value = -1L;
94 | if (metadataSizeHint.isPresent()) {
95 | value = metadataSizeHint.get();
96 | if (value < 0) {
97 | throw new RuntimeException("metadataSizeHint cannot be negative");
98 | }
99 | }
100 | SessionConfig.setParquetOptionsMetadataSizeHint(config.getPointer(), value);
101 | return this;
102 | }
103 |
104 | /**
105 | * Get whether filter pushdown is enabled, so filters are applied during parquet decoding
106 | *
107 | * @return whether filter pushdown is enabled
108 | */
109 | public boolean pushdownFilters() {
110 | return SessionConfig.getParquetOptionsPushdownFilters(config.getPointer());
111 | }
112 |
113 | /**
114 | * Set whether filter pushdown is enabled, so filters are applied during parquet decoding
115 | *
116 | * @param enabled whether to pushdown filters
117 | * @return the modified {@link ParquetOptions} instance
118 | */
119 | public ParquetOptions withPushdownFilters(boolean enabled) {
120 | SessionConfig.setParquetOptionsPushdownFilters(config.getPointer(), enabled);
121 | return this;
122 | }
123 |
124 | /**
125 | * Get whether filter reordering is enabled to minimize evaluation cost
126 | *
127 | * @return whether filter reordering is enabled
128 | */
129 | public boolean reorderFilters() {
130 | return SessionConfig.getParquetOptionsReorderFilters(config.getPointer());
131 | }
132 |
133 | /**
134 | * Set whether filter reordering is enabled to minimize evaluation cost
135 | *
136 | * @param enabled whether to reorder filters
137 | * @return the modified {@link ParquetOptions} instance
138 | */
139 | public ParquetOptions withReorderFilters(boolean enabled) {
140 | SessionConfig.setParquetOptionsReorderFilters(config.getPointer(), enabled);
141 | return this;
142 | }
143 | }
144 |
--------------------------------------------------------------------------------
/datafusion-java/src/main/java/org/apache/arrow/datafusion/RecordBatchStream.java:
--------------------------------------------------------------------------------
1 | package org.apache.arrow.datafusion;
2 |
3 | import java.util.concurrent.CompletableFuture;
4 | import org.apache.arrow.vector.VectorSchemaRoot;
5 | import org.apache.arrow.vector.dictionary.DictionaryProvider;
6 |
7 | /**
8 | * A record batch stream is a stream of tabular Arrow data that can be iterated over asynchronously
9 | */
10 | public interface RecordBatchStream extends AutoCloseable, NativeProxy, DictionaryProvider {
11 | /**
12 | * Get the VectorSchemaRoot that will be populated with data as the stream is iterated over
13 | *
14 | * @return the stream's VectorSchemaRoot
15 | */
16 | VectorSchemaRoot getVectorSchemaRoot();
17 |
18 | /**
19 | * Load the next record batch in the stream into the VectorSchemaRoot
20 | *
21 | * @return Future that will complete with true if a batch was loaded or false if the end of the
22 | * stream has been reached
23 | */
24 | CompletableFuture loadNextBatch();
25 | }
26 |
--------------------------------------------------------------------------------
/datafusion-java/src/main/java/org/apache/arrow/datafusion/Runtime.java:
--------------------------------------------------------------------------------
1 | package org.apache.arrow.datafusion;
2 |
3 | /** A runtime represents the underlying async runtime in datafusion engine */
4 | public interface Runtime extends AutoCloseable, NativeProxy {}
5 |
--------------------------------------------------------------------------------
/datafusion-java/src/main/java/org/apache/arrow/datafusion/SessionConfig.java:
--------------------------------------------------------------------------------
1 | package org.apache.arrow.datafusion;
2 |
3 | import java.util.function.Consumer;
4 |
5 | /** Configuration for creating a {@link SessionContext} using {@link SessionContexts#withConfig} */
6 | public class SessionConfig extends AbstractProxy implements AutoCloseable {
7 | /** Create a new default {@link SessionConfig} */
8 | public SessionConfig() {
9 | super(create());
10 | }
11 |
12 | /**
13 | * Get options related to query execution
14 | *
15 | * @return {@link ExecutionOptions} instance for this config
16 | */
17 | public ExecutionOptions executionOptions() {
18 | return new ExecutionOptions(this);
19 | }
20 |
21 | /**
22 | * Get options specific to parsing SQL queries
23 | *
24 | * @return {@link SqlParserOptions} instance for this config
25 | */
26 | public SqlParserOptions sqlParserOptions() {
27 | return new SqlParserOptions(this);
28 | }
29 |
30 | /**
31 | * Modify this session configuration and then return it, to simplify use in a try-with-resources
32 | * statement
33 | *
34 | * @param configurationCallback Callback used to update the configuration
35 | * @return This {@link SessionConfig} instance after being updated
36 | */
37 | public SessionConfig withConfiguration(Consumer configurationCallback) {
38 | configurationCallback.accept(this);
39 | return this;
40 | }
41 |
42 | @Override
43 | void doClose(long pointer) {
44 | destroy(pointer);
45 | }
46 |
47 | private static native long create();
48 |
49 | private static native void destroy(long pointer);
50 |
51 | // ExecutionOptions native methods
52 |
53 | static native long getExecutionOptionsBatchSize(long pointer);
54 |
55 | static native void setExecutionOptionsBatchSize(long pointer, long batchSize);
56 |
57 | static native boolean getExecutionOptionsCoalesceBatches(long pointer);
58 |
59 | static native void setExecutionOptionsCoalesceBatches(long pointer, boolean enabled);
60 |
61 | static native boolean getExecutionOptionsCollectStatistics(long pointer);
62 |
63 | static native void setExecutionOptionsCollectStatistics(long pointer, boolean enabled);
64 |
65 | static native long getExecutionOptionsTargetPartitions(long pointer);
66 |
67 | static native void setExecutionOptionsTargetPartitions(long pointer, long batchSize);
68 |
69 | // ParquetOptions native methods
70 |
71 | static native boolean getParquetOptionsEnablePageIndex(long pointer);
72 |
73 | static native void setParquetOptionsEnablePageIndex(long pointer, boolean enabled);
74 |
75 | static native boolean getParquetOptionsPruning(long pointer);
76 |
77 | static native void setParquetOptionsPruning(long pointer, boolean enabled);
78 |
79 | static native boolean getParquetOptionsSkipMetadata(long pointer);
80 |
81 | static native void setParquetOptionsSkipMetadata(long pointer, boolean enabled);
82 |
83 | static native long getParquetOptionsMetadataSizeHint(long pointer);
84 |
85 | static native void setParquetOptionsMetadataSizeHint(long pointer, long value);
86 |
87 | static native boolean getParquetOptionsPushdownFilters(long pointer);
88 |
89 | static native void setParquetOptionsPushdownFilters(long pointer, boolean enabled);
90 |
91 | static native boolean getParquetOptionsReorderFilters(long pointer);
92 |
93 | static native void setParquetOptionsReorderFilters(long pointer, boolean enabled);
94 |
95 | // SqlParserOptions native methods
96 |
97 | static native boolean getSqlParserOptionsParseFloatAsDecimal(long pointer);
98 |
99 | static native void setSqlParserOptionsParseFloatAsDecimal(long pointer, boolean enabled);
100 |
101 | static native boolean getSqlParserOptionsEnableIdentNormalization(long pointer);
102 |
103 | static native void setSqlParserOptionsEnableIdentNormalization(long pointer, boolean enabled);
104 |
105 | static native String getSqlParserOptionsDialect(long pointer);
106 |
107 | static native void setSqlParserOptionsDialect(long pointer, String dialect);
108 | }
109 |
--------------------------------------------------------------------------------
/datafusion-java/src/main/java/org/apache/arrow/datafusion/SessionContext.java:
--------------------------------------------------------------------------------
1 | package org.apache.arrow.datafusion;
2 |
3 | import java.nio.file.Path;
4 | import java.util.Optional;
5 | import java.util.concurrent.CompletableFuture;
6 |
7 | /** A session context holds resources and is the entrance for obtaining {@link DataFrame} */
8 | public interface SessionContext extends AutoCloseable, NativeProxy {
9 |
10 | /**
11 | * Obtain the {@link DataFrame} by running the {@code sql} against the datafusion library
12 | *
13 | * @param sql The query to execute
14 | * @return DataFrame representing the query result
15 | */
16 | CompletableFuture sql(String sql);
17 |
18 | /**
19 | * Registering a csv file with the context
20 | *
21 | * @param name The table name to use to refer to the data
22 | * @param path Path to the CSV file
23 | * @return Future that is completed when the CSV is registered
24 | */
25 | CompletableFuture registerCsv(String name, Path path);
26 |
27 | /**
28 | * Registering a parquet file with the context
29 | *
30 | * @param name The table name to use to refer to the data
31 | * @param path Path to the Parquet file
32 | * @return Future that is completed when the Parquet file is registered
33 | */
34 | CompletableFuture registerParquet(String name, Path path);
35 |
36 | /**
37 | * Registers a TableProvider as a table that can be referenced from SQL statements executed
38 | * against this context.
39 | *
40 | * @param name table reference
41 | * @param tableProvider table provider
42 | * @return as of Arrow 22 this is only {@link Optional#empty()}
43 | * @throws Exception when the table is already registered
44 | */
45 | Optional registerTable(String name, TableProvider tableProvider) throws Exception;
46 |
47 | /**
48 | * Get the runtime associated with this context
49 | *
50 | * @return The context's runtime
51 | */
52 | Runtime getRuntime();
53 | }
54 |
--------------------------------------------------------------------------------
/datafusion-java/src/main/java/org/apache/arrow/datafusion/SessionContexts.java:
--------------------------------------------------------------------------------
1 | package org.apache.arrow.datafusion;
2 |
3 | import java.util.function.Consumer;
4 |
5 | /** Manages session contexts */
6 | public class SessionContexts {
7 |
8 | private SessionContexts() {}
9 |
10 | /**
11 | * Create a new session context
12 | *
13 | * @return native pointer to the created session context
14 | */
15 | static native long createSessionContext();
16 |
17 | /**
18 | * Create a new session context using a SessionConfig
19 | *
20 | * @param configPointer pointer to the native session config object to use
21 | * @return native pointer to the created session context
22 | */
23 | static native long createSessionContextWithConfig(long configPointer);
24 |
25 | /**
26 | * Destroy a session context
27 | *
28 | * @param pointer native pointer to the session context to destroy
29 | */
30 | static native void destroySessionContext(long pointer);
31 |
32 | static {
33 | JNILoader.load();
34 | }
35 |
36 | /**
37 | * Create a new default session context
38 | *
39 | * @return The created context
40 | */
41 | public static SessionContext create() {
42 | long pointer = createSessionContext();
43 | return new DefaultSessionContext(pointer);
44 | }
45 |
46 | /**
47 | * Create a new session context using the provided configuration
48 | *
49 | * @param config the configuration for the session
50 | * @return The created context
51 | */
52 | public static SessionContext withConfig(SessionConfig config) {
53 | long pointer = createSessionContextWithConfig(config.getPointer());
54 | return new DefaultSessionContext(pointer);
55 | }
56 |
57 | /**
58 | * Create a new session context using the provided callback to configure the session
59 | *
60 | * @param configuration callback to modify the {@link SessionConfig} for the session
61 | * @return The created context
62 | * @throws Exception if an error is encountered closing the session config resource
63 | */
64 | public static SessionContext withConfig(Consumer configuration) throws Exception {
65 | try (SessionConfig config = new SessionConfig().withConfiguration(configuration)) {
66 | long pointer = createSessionContextWithConfig(config.getPointer());
67 | return new DefaultSessionContext(pointer);
68 | }
69 | }
70 | }
71 |
--------------------------------------------------------------------------------
/datafusion-java/src/main/java/org/apache/arrow/datafusion/SqlParserOptions.java:
--------------------------------------------------------------------------------
1 | package org.apache.arrow.datafusion;
2 |
3 | /** Configures options specific to parsing SQL queries */
4 | @SuppressWarnings("UnusedReturnValue")
5 | public class SqlParserOptions {
6 | private final SessionConfig config;
7 |
8 | SqlParserOptions(SessionConfig config) {
9 | this.config = config;
10 | }
11 |
12 | /**
13 | * Get whether to parse floats as decimal type
14 | *
15 | * @return whether to parse floats as decimal
16 | */
17 | public boolean parseFloatAsDecimal() {
18 | return SessionConfig.getSqlParserOptionsParseFloatAsDecimal(config.getPointer());
19 | }
20 |
21 | /**
22 | * Set whether to parse floats as decimal type
23 | *
24 | * @param enabled whether to parse floats as decimal
25 | * @return the modified {@link SqlParserOptions} instance
26 | */
27 | public SqlParserOptions withParseFloatAsDecimal(boolean enabled) {
28 | SessionConfig.setSqlParserOptionsParseFloatAsDecimal(config.getPointer(), enabled);
29 | return this;
30 | }
31 |
32 | /**
33 | * Get whether to convert identifiers to lowercase when not quoted
34 | *
35 | * @return whether ident normalization is enabled
36 | */
37 | public boolean enableIdentNormalization() {
38 | return SessionConfig.getSqlParserOptionsEnableIdentNormalization(config.getPointer());
39 | }
40 |
41 | /**
42 | * Set whether to convert identifiers to lowercase when not quoted
43 | *
44 | * @param enabled whether ident normalization is enabled
45 | * @return the modified {@link SqlParserOptions} instance
46 | */
47 | public SqlParserOptions withEnableIdentNormalization(boolean enabled) {
48 | SessionConfig.setSqlParserOptionsEnableIdentNormalization(config.getPointer(), enabled);
49 | return this;
50 | }
51 |
52 | /**
53 | * Get the SQL dialect used
54 | *
55 | * @return the SQL dialect used
56 | */
57 | public String dialect() {
58 | return SessionConfig.getSqlParserOptionsDialect(config.getPointer());
59 | }
60 |
61 | /**
62 | * Set the SQL dialect to use
63 | *
64 | * @param dialect the SQL dialect to use
65 | * @return the modified {@link SqlParserOptions} instance
66 | */
67 | public SqlParserOptions withDialect(String dialect) {
68 | SessionConfig.setSqlParserOptionsDialect(config.getPointer(), dialect);
69 | return this;
70 | }
71 | }
72 |
--------------------------------------------------------------------------------
/datafusion-java/src/main/java/org/apache/arrow/datafusion/TableProvider.java:
--------------------------------------------------------------------------------
1 | package org.apache.arrow.datafusion;
2 |
3 | /** vague interface that maps to {@code Arc}. */
4 | public interface TableProvider extends NativeProxy {}
5 |
--------------------------------------------------------------------------------
/datafusion-java/src/main/java/org/apache/arrow/datafusion/TableProviders.java:
--------------------------------------------------------------------------------
1 | package org.apache.arrow.datafusion;
2 |
3 | class TableProviders {
4 |
5 | private TableProviders() {}
6 |
7 | static native void destroyTableProvider(long pointer);
8 | }
9 |
--------------------------------------------------------------------------------
/datafusion-java/src/main/java/org/apache/arrow/datafusion/TokioRuntime.java:
--------------------------------------------------------------------------------
1 | package org.apache.arrow.datafusion;
2 |
3 | final class TokioRuntime extends AbstractProxy implements Runtime {
4 |
5 | TokioRuntime(long pointer) {
6 | super(pointer);
7 | }
8 |
9 | @Override
10 | void doClose(long pointer) {
11 | destroyTokioRuntime(pointer);
12 | }
13 |
14 | static TokioRuntime create() {
15 | long pointer = TokioRuntime.createTokioRuntime();
16 | if (pointer <= 0) {
17 | throw new IllegalStateException("failed to create runtime");
18 | }
19 | return new TokioRuntime(pointer);
20 | }
21 |
22 | static native long createTokioRuntime();
23 |
24 | static native void destroyTokioRuntime(long pointer);
25 | }
26 |
--------------------------------------------------------------------------------
/datafusion-java/src/main/java/org/apache/arrow/datafusion/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 | * This module contains a Java JNI binding to Apache Arrow DataFusion which is the query engine
3 | * library to work with data in Arrow format.
4 | */
5 | package org.apache.arrow.datafusion;
6 |
--------------------------------------------------------------------------------
/datafusion-java/src/test/java/org/apache/arrow/datafusion/ParquetWriter.java:
--------------------------------------------------------------------------------
1 | package org.apache.arrow.datafusion;
2 |
3 | import java.io.IOException;
4 | import java.nio.file.Path;
5 | import java.util.function.BiConsumer;
6 | import org.apache.avro.Schema;
7 | import org.apache.avro.generic.GenericData;
8 | import org.apache.hadoop.conf.Configuration;
9 | import org.apache.parquet.avro.AvroParquetWriter;
10 | import org.apache.parquet.column.ParquetProperties;
11 | import org.apache.parquet.hadoop.metadata.CompressionCodecName;
12 | import org.apache.parquet.hadoop.util.HadoopOutputFile;
13 | import org.apache.parquet.io.OutputFile;
14 |
15 | /** Helper class for writing test files in Parquet format using Avro records */
16 | public class ParquetWriter {
17 | public static void writeParquet(
18 | Path path, String schema, int rowCount, BiConsumer setRecord)
19 | throws IOException {
20 | Configuration config = new Configuration();
21 | org.apache.hadoop.fs.Path hadoopFilePath = new org.apache.hadoop.fs.Path(path.toString());
22 | OutputFile outputFile = HadoopOutputFile.fromPath(hadoopFilePath, config);
23 |
24 | Schema.Parser parser = new Schema.Parser().setValidate(true);
25 | Schema avroSchema = parser.parse(schema);
26 |
27 | try (org.apache.parquet.hadoop.ParquetWriter writer =
28 | AvroParquetWriter.builder(outputFile)
29 | .withSchema(avroSchema)
30 | .withConf(config)
31 | .withWriterVersion(ParquetProperties.WriterVersion.PARQUET_2_0)
32 | .withCompressionCodec(CompressionCodecName.SNAPPY)
33 | .build()) {
34 | for (int i = 0; i < rowCount; ++i) {
35 | GenericData.Record record = new GenericData.Record(avroSchema);
36 | setRecord.accept(i, record);
37 | writer.write(record);
38 | }
39 | }
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/datafusion-java/src/test/java/org/apache/arrow/datafusion/TestExecuteStream.java:
--------------------------------------------------------------------------------
1 | package org.apache.arrow.datafusion;
2 |
3 | import static org.junit.jupiter.api.Assertions.*;
4 |
5 | import java.net.URL;
6 | import java.nio.charset.StandardCharsets;
7 | import java.nio.file.Files;
8 | import java.nio.file.Path;
9 | import java.nio.file.Paths;
10 | import java.util.Arrays;
11 | import java.util.List;
12 | import org.apache.arrow.memory.BufferAllocator;
13 | import org.apache.arrow.memory.RootAllocator;
14 | import org.apache.arrow.vector.BigIntVector;
15 | import org.apache.arrow.vector.Float8Vector;
16 | import org.apache.arrow.vector.VarCharVector;
17 | import org.apache.arrow.vector.VectorSchemaRoot;
18 | import org.apache.arrow.vector.dictionary.DictionaryEncoder;
19 | import org.apache.arrow.vector.types.pojo.Schema;
20 | import org.junit.jupiter.api.Test;
21 | import org.junit.jupiter.api.io.TempDir;
22 |
23 | public class TestExecuteStream {
24 | @Test
25 | public void executeStream(@TempDir Path tempDir) throws Exception {
26 | try (SessionContext context = SessionContexts.create();
27 | BufferAllocator allocator = new RootAllocator()) {
28 | Path csvFilePath = tempDir.resolve("data.csv");
29 |
30 | List lines = Arrays.asList("x,y,z", "1,2,3.5", "4,5,6.5", "7,8,9.5");
31 | Files.write(csvFilePath, lines);
32 |
33 | context.registerCsv("test", csvFilePath).join();
34 |
35 | try (RecordBatchStream stream =
36 | context
37 | .sql("SELECT y,z FROM test WHERE x > 3")
38 | .thenComposeAsync(df -> df.executeStream(allocator))
39 | .join()) {
40 | VectorSchemaRoot root = stream.getVectorSchemaRoot();
41 | Schema schema = root.getSchema();
42 | assertEquals(2, schema.getFields().size());
43 | assertEquals("y", schema.getFields().get(0).getName());
44 | assertEquals("z", schema.getFields().get(1).getName());
45 |
46 | assertTrue(stream.loadNextBatch().join());
47 | assertEquals(2, root.getRowCount());
48 | BigIntVector yValues = (BigIntVector) root.getVector(0);
49 | assertEquals(5, yValues.get(0));
50 | assertEquals(8, yValues.get(1));
51 | Float8Vector zValues = (Float8Vector) root.getVector(1);
52 | assertEquals(6.5, zValues.get(0));
53 | assertEquals(9.5, zValues.get(1));
54 |
55 | assertFalse(stream.loadNextBatch().join());
56 | }
57 | }
58 | }
59 |
60 | @Test
61 | public void readDictionaryData() throws Exception {
62 | try (SessionContext context = SessionContexts.create();
63 | BufferAllocator allocator = new RootAllocator()) {
64 |
65 | URL fileUrl = this.getClass().getResource("/dictionary_data.parquet");
66 | Path parquetFilePath = Paths.get(fileUrl.getPath());
67 |
68 | context.registerParquet("test", parquetFilePath).join();
69 |
70 | try (RecordBatchStream stream =
71 | context
72 | .sql("SELECT x,y FROM test")
73 | .thenComposeAsync(df -> df.executeStream(allocator))
74 | .join()) {
75 | VectorSchemaRoot root = stream.getVectorSchemaRoot();
76 | Schema schema = root.getSchema();
77 | assertEquals(2, schema.getFields().size());
78 | assertEquals("x", schema.getFields().get(0).getName());
79 | assertEquals("y", schema.getFields().get(1).getName());
80 |
81 | int rowsRead = 0;
82 | while (stream.loadNextBatch().join()) {
83 | int batchNumRows = root.getRowCount();
84 | BigIntVector xValuesEncoded = (BigIntVector) root.getVector(0);
85 | long xDictionaryId = xValuesEncoded.getField().getDictionary().getId();
86 | try (VarCharVector xValues =
87 | (VarCharVector)
88 | DictionaryEncoder.decode(xValuesEncoded, stream.lookup(xDictionaryId))) {
89 | String[] expected = {"one", "two", "three"};
90 | for (int i = 0; i < batchNumRows; ++i) {
91 | assertEquals(
92 | new String(xValues.get(i), StandardCharsets.UTF_8), expected[(rowsRead + i) % 3]);
93 | }
94 | }
95 |
96 | BigIntVector yValuesEncoded = (BigIntVector) root.getVector(1);
97 | long yDictionaryId = yValuesEncoded.getField().getDictionary().getId();
98 | try (VarCharVector yValues =
99 | (VarCharVector)
100 | DictionaryEncoder.decode(yValuesEncoded, stream.lookup(yDictionaryId))) {
101 | String[] expected = {"four", "five", "six"};
102 | for (int i = 0; i < batchNumRows; ++i) {
103 | assertEquals(
104 | new String(yValues.get(i), StandardCharsets.UTF_8), expected[(rowsRead + i) % 3]);
105 | }
106 | }
107 | rowsRead += batchNumRows;
108 | }
109 |
110 | assertEquals(100, rowsRead);
111 | }
112 | }
113 | }
114 | }
115 |
--------------------------------------------------------------------------------
/datafusion-java/src/test/java/org/apache/arrow/datafusion/TestListingTable.java:
--------------------------------------------------------------------------------
1 | package org.apache.arrow.datafusion;
2 |
3 | import static org.junit.jupiter.api.Assertions.*;
4 |
5 | import java.io.FileOutputStream;
6 | import java.nio.charset.StandardCharsets;
7 | import java.nio.file.Files;
8 | import java.nio.file.Path;
9 | import java.time.Instant;
10 | import java.util.ArrayList;
11 | import java.util.Arrays;
12 | import java.util.List;
13 | import java.util.stream.Collectors;
14 | import org.apache.arrow.compression.CommonsCompressionFactory;
15 | import org.apache.arrow.memory.BufferAllocator;
16 | import org.apache.arrow.memory.RootAllocator;
17 | import org.apache.arrow.vector.BigIntVector;
18 | import org.apache.arrow.vector.FieldVector;
19 | import org.apache.arrow.vector.VarCharVector;
20 | import org.apache.arrow.vector.VectorSchemaRoot;
21 | import org.apache.arrow.vector.compression.CompressionCodec;
22 | import org.apache.arrow.vector.compression.CompressionUtil;
23 | import org.apache.arrow.vector.compression.NoCompressionCodec;
24 | import org.apache.arrow.vector.ipc.ArrowFileWriter;
25 | import org.apache.arrow.vector.ipc.ArrowReader;
26 | import org.apache.arrow.vector.ipc.message.IpcOption;
27 | import org.apache.arrow.vector.types.pojo.Field;
28 | import org.junit.jupiter.api.Test;
29 | import org.junit.jupiter.api.io.TempDir;
30 |
31 | public class TestListingTable {
32 | @Test
33 | public void testCsvListingTable(@TempDir Path tempDir) throws Exception {
34 | try (SessionContext context = SessionContexts.create();
35 | BufferAllocator allocator = new RootAllocator()) {
36 | Path dataDir = tempDir.resolve("data");
37 | Files.createDirectories(dataDir);
38 |
39 | Path csvFilePath0 = dataDir.resolve("0.csv");
40 | List lines = Arrays.asList("x,y", "1,2", "3,4");
41 | Files.write(csvFilePath0, lines);
42 |
43 | Path csvFilePath1 = dataDir.resolve("1.csv");
44 | lines = Arrays.asList("x,y", "1,12", "3,14");
45 | Files.write(csvFilePath1, lines);
46 |
47 | try (CsvFormat format = new CsvFormat();
48 | ListingOptions listingOptions =
49 | ListingOptions.builder(format).withFileExtension(".csv").build();
50 | ListingTableConfig tableConfig =
51 | ListingTableConfig.builder(dataDir)
52 | .withListingOptions(listingOptions)
53 | .build(context)
54 | .join();
55 | ListingTable listingTable = new ListingTable(tableConfig)) {
56 | context.registerTable("test", listingTable);
57 | testQuery(context, allocator);
58 | }
59 | }
60 | }
61 |
62 | @Test
63 | public void testParquetListingTable(@TempDir Path tempDir) throws Exception {
64 | try (SessionContext context = SessionContexts.create();
65 | BufferAllocator allocator = new RootAllocator()) {
66 | Path dataDir = tempDir.resolve("data");
67 | writeParquetFiles(dataDir);
68 |
69 | try (ParquetFormat format = new ParquetFormat();
70 | ListingOptions listingOptions =
71 | ListingOptions.builder(format).withFileExtension(".parquet").build();
72 | ListingTableConfig tableConfig =
73 | ListingTableConfig.builder(dataDir)
74 | .withListingOptions(listingOptions)
75 | .build(context)
76 | .join();
77 | ListingTable listingTable = new ListingTable(tableConfig)) {
78 | context.registerTable("test", listingTable);
79 | testQuery(context, allocator);
80 | }
81 | }
82 | }
83 |
84 | @Test
85 | public void testArrowListingTable(@TempDir Path tempDir) throws Exception {
86 | try (SessionContext context = SessionContexts.create();
87 | BufferAllocator allocator = new RootAllocator()) {
88 | Path dataDir = tempDir.resolve("data");
89 | Files.createDirectories(dataDir);
90 |
91 | Path arrowFilePath0 = dataDir.resolve("0.arrow");
92 | Path arrowFilePath1 = dataDir.resolve("1.arrow");
93 |
94 | // Write data files in Arrow IPC (Feather V2) file format
95 | try (BigIntVector xVector = new BigIntVector("x", allocator);
96 | BigIntVector yVector = new BigIntVector("y", allocator)) {
97 | List vectors = Arrays.asList(xVector, yVector);
98 |
99 | for (int i = 0; i < 2; i++) {
100 | xVector.setSafe(i, i * 2 + 1);
101 | yVector.setSafe(i, i * 2 + 2);
102 | }
103 | xVector.setValueCount(2);
104 | yVector.setValueCount(2);
105 | writeArrowFile(arrowFilePath0, vectors, false);
106 |
107 | xVector.reset();
108 | yVector.reset();
109 | for (int i = 0; i < 2; i++) {
110 | xVector.setSafe(i, i * 2 + 1);
111 | yVector.setSafe(i, i * 2 + 12);
112 | }
113 | xVector.setValueCount(2);
114 | yVector.setValueCount(2);
115 | writeArrowFile(arrowFilePath1, vectors, false);
116 | }
117 |
118 | try (ArrowFormat format = new ArrowFormat();
119 | ListingOptions listingOptions =
120 | ListingOptions.builder(format).withFileExtension(".arrow").build();
121 | ListingTableConfig tableConfig =
122 | ListingTableConfig.builder(dataDir)
123 | .withListingOptions(listingOptions)
124 | .build(context)
125 | .join();
126 | ListingTable listingTable = new ListingTable(tableConfig)) {
127 | context.registerTable("test", listingTable);
128 | testQuery(context, allocator);
129 | }
130 | }
131 | }
132 |
133 | @Test
134 | public void testCompressedArrowIpc(@TempDir Path tempDir) throws Exception {
135 | try (SessionContext context = SessionContexts.create();
136 | BufferAllocator allocator = new RootAllocator()) {
137 | Path dataDir = tempDir.resolve("data");
138 | Files.createDirectories(dataDir);
139 | Path arrowFilePath0 = dataDir.resolve("0.arrow");
140 |
141 | // Data needs to be reasonably large otherwise compression is not used
142 | int numRows = 10_000;
143 |
144 | // Write data files in compressed Arrow IPC (Feather V2) file format
145 | try (BigIntVector xVector = new BigIntVector("x", allocator)) {
146 | for (int i = 0; i < numRows; i++) {
147 | xVector.setSafe(i, i * 2 + 1);
148 | }
149 | xVector.setValueCount(numRows);
150 | List vectors = Arrays.asList(xVector);
151 | writeArrowFile(arrowFilePath0, vectors, true);
152 | }
153 |
154 | try (ArrowFormat format = new ArrowFormat();
155 | ListingOptions listingOptions =
156 | ListingOptions.builder(format).withFileExtension(".arrow").build();
157 | ListingTableConfig tableConfig =
158 | ListingTableConfig.builder(dataDir)
159 | .withListingOptions(listingOptions)
160 | .build(context)
161 | .join();
162 | ListingTable listingTable = new ListingTable(tableConfig)) {
163 | context.registerTable("test", listingTable);
164 | try (ArrowReader reader =
165 | context
166 | .sql("SELECT x FROM test")
167 | .thenComposeAsync(df -> df.collect(allocator))
168 | .join()) {
169 |
170 | int globalRow = 0;
171 | VectorSchemaRoot root = reader.getVectorSchemaRoot();
172 | while (reader.loadNextBatch()) {
173 | BigIntVector xValues = (BigIntVector) root.getVector(0);
174 | for (int row = 0; row < root.getRowCount(); ++row, ++globalRow) {
175 | assertEquals(globalRow * 2 + 1, xValues.get(row));
176 | }
177 | }
178 | assertEquals(numRows, globalRow);
179 | }
180 | }
181 | }
182 | }
183 |
184 | @Test
185 | public void testDisableCollectStat(@TempDir Path tempDir) throws Exception {
186 | try (SessionContext context = SessionContexts.create();
187 | BufferAllocator allocator = new RootAllocator()) {
188 | Path dataDir = tempDir.resolve("data");
189 | writeParquetFiles(dataDir);
190 |
191 | try (ParquetFormat format = new ParquetFormat();
192 | ListingOptions listingOptions =
193 | ListingOptions.builder(format)
194 | .withFileExtension(".parquet")
195 | .withCollectStat(false)
196 | .build();
197 | ListingTableConfig tableConfig =
198 | ListingTableConfig.builder(dataDir)
199 | .withListingOptions(listingOptions)
200 | .build(context)
201 | .join();
202 | ListingTable listingTable = new ListingTable(tableConfig)) {
203 | context.registerTable("test", listingTable);
204 | testQuery(context, allocator);
205 | }
206 | }
207 | }
208 |
209 | @Test
210 | public void testMultiplePaths(@TempDir Path tempDir) throws Exception {
211 | try (SessionContext context = SessionContexts.create();
212 | BufferAllocator allocator = new RootAllocator()) {
213 | Path dataDir = tempDir.resolve("data");
214 | Path[] dataFiles = writeParquetFiles(dataDir);
215 |
216 | try (ParquetFormat format = new ParquetFormat();
217 | ListingOptions listingOptions =
218 | ListingOptions.builder(format).withFileExtension(".parquet").build();
219 | ListingTableConfig tableConfig =
220 | ListingTableConfig.builder(dataFiles)
221 | .withListingOptions(listingOptions)
222 | .build(context)
223 | .join();
224 | ListingTable listingTable = new ListingTable(tableConfig)) {
225 | context.registerTable("test", listingTable);
226 | testQuery(context, allocator);
227 | }
228 | }
229 | }
230 |
231 | private static Path[] writeParquetFiles(Path dataDir) throws Exception {
232 | String schema =
233 | "{\"namespace\": \"org.example\","
234 | + "\"type\": \"record\","
235 | + "\"name\": \"record_name\","
236 | + "\"fields\": ["
237 | + " {\"name\": \"x\", \"type\": \"long\"},"
238 | + " {\"name\": \"y\", \"type\": \"long\"}"
239 | + " ]}";
240 |
241 | Path parquetFilePath0 = dataDir.resolve("0.parquet");
242 | ParquetWriter.writeParquet(
243 | parquetFilePath0,
244 | schema,
245 | 2,
246 | (i, record) -> {
247 | record.put("x", i * 2 + 1);
248 | record.put("y", i * 2 + 2);
249 | });
250 |
251 | Path parquetFilePath1 = dataDir.resolve("1.parquet");
252 | ParquetWriter.writeParquet(
253 | parquetFilePath1,
254 | schema,
255 | 2,
256 | (i, record) -> {
257 | record.put("x", i * 2 + 1);
258 | record.put("y", i * 2 + 12);
259 | });
260 | return new Path[] {parquetFilePath0, parquetFilePath1};
261 | }
262 |
263 | private static void writeArrowFile(Path filePath, List vectors, boolean compressed)
264 | throws Exception {
265 | List fields = vectors.stream().map(v -> v.getField()).collect(Collectors.toList());
266 | CompressionUtil.CodecType codec =
267 | compressed ? CompressionUtil.CodecType.ZSTD : CompressionUtil.CodecType.NO_COMPRESSION;
268 | CompressionCodec.Factory compressionFactory =
269 | compressed ? new CommonsCompressionFactory() : NoCompressionCodec.Factory.INSTANCE;
270 | try (VectorSchemaRoot root = new VectorSchemaRoot(fields, vectors);
271 | FileOutputStream output = new FileOutputStream(filePath.toString());
272 | ArrowFileWriter writer =
273 | new ArrowFileWriter(
274 | root,
275 | null,
276 | output.getChannel(),
277 | null,
278 | IpcOption.DEFAULT,
279 | compressionFactory,
280 | codec)) {
281 | writer.start();
282 | writer.writeBatch();
283 | writer.end();
284 | }
285 | }
286 |
287 | private static void testQuery(SessionContext context, BufferAllocator allocator)
288 | throws Exception {
289 | try (ArrowReader reader =
290 | context
291 | .sql("SELECT y FROM test WHERE x = 3 ORDER BY y")
292 | .thenComposeAsync(df -> df.collect(allocator))
293 | .join()) {
294 |
295 | long[] expectedResults = {4, 14};
296 | int globalRow = 0;
297 | VectorSchemaRoot root = reader.getVectorSchemaRoot();
298 | while (reader.loadNextBatch()) {
299 | BigIntVector yValues = (BigIntVector) root.getVector(0);
300 | for (int row = 0; row < root.getRowCount(); ++row, ++globalRow) {
301 | assertTrue(globalRow < expectedResults.length);
302 | assertEquals(expectedResults[globalRow], yValues.get(row));
303 | }
304 | }
305 | assertEquals(expectedResults.length, globalRow);
306 | }
307 | }
308 |
309 | @Test
310 | public void testParquetTimestampedStrings(@TempDir Path tempDir) throws Exception {
311 | try (SessionContext context = SessionContexts.create();
312 | BufferAllocator allocator = new RootAllocator()) {
313 | Path dataDir = tempDir.resolve("data");
314 | String schema =
315 | "{\"namespace\": \"org.example\","
316 | + "\"type\": \"record\","
317 | + "\"name\": \"record_name\","
318 | + "\"fields\": ["
319 | + " {\"name\": \"id\", \"type\": \"long\"},"
320 | + " {\"name\": \"timestamp\", \"type\": {\"type\": \"long\", \"logicalType\": \"timestamp-millis\"}},"
321 | + " {\"name\": \"text\", \"type\": \"string\"}"
322 | + " ]}";
323 |
324 | Path parquetFilePath0 = dataDir.resolve("0.parquet");
325 | Instant[] timestamps0 = {
326 | Instant.parse("2022-04-04T00:00:00Z"),
327 | Instant.parse("2022-05-04T00:00:00Z"),
328 | Instant.parse("2022-06-06T00:00:00Z"),
329 | };
330 | ParquetWriter.writeParquet(
331 | parquetFilePath0,
332 | schema,
333 | 3,
334 | (i, record) -> {
335 | record.put("id", i + 1);
336 | record.put("timestamp", (timestamps0[i].getEpochSecond() * 1_000));
337 | record.put("text", String.format("Text%d", i + 1));
338 | });
339 |
340 | Path parquetFilePath1 = dataDir.resolve("1.parquet");
341 | Instant[] timestamps1 = {
342 | Instant.parse("2023-04-04T00:00:00Z"),
343 | Instant.parse("2023-04-04T00:00:00Z"),
344 | Instant.parse("2022-08-01T00:00:00Z"),
345 | };
346 | ParquetWriter.writeParquet(
347 | parquetFilePath1,
348 | schema,
349 | 3,
350 | (i, record) -> {
351 | record.put("id", i + 4);
352 | record.put("timestamp", (timestamps1[i].getEpochSecond() * 1_000));
353 | record.put("text", String.format("Text%d", i + 4));
354 | });
355 |
356 | Path[] filePaths = {parquetFilePath0, parquetFilePath1};
357 |
358 | try (ParquetFormat format = new ParquetFormat();
359 | ListingOptions listingOptions =
360 | ListingOptions.builder(format).withFileExtension(".parquet").build();
361 | ListingTableConfig tableConfig =
362 | ListingTableConfig.builder(filePaths)
363 | .withListingOptions(listingOptions)
364 | .build(context)
365 | .join();
366 | ListingTable listingTable = new ListingTable(tableConfig)) {
367 | context.registerTable("test", listingTable);
368 | try (ArrowReader reader =
369 | context
370 | .sql(
371 | "SELECT id,text FROM test WHERE ID IN (2, 3, 4) AND timestamp < '2023-01-01T00:00:00Z' ORDER BY id")
372 | .thenComposeAsync(df -> df.collect(allocator))
373 | .join()) {
374 |
375 | Long[] expectedIds = {2L, 3L};
376 | String[] expectedText = {"Text2", "Text3"};
377 | List actualIds = new ArrayList<>();
378 | List actualText = new ArrayList<>();
379 | int globalRow = 0;
380 | VectorSchemaRoot root = reader.getVectorSchemaRoot();
381 | while (reader.loadNextBatch()) {
382 | BigIntVector idValues = (BigIntVector) root.getVector(0);
383 | VarCharVector textValues = (VarCharVector) root.getVector(1);
384 | for (int row = 0; row < root.getRowCount(); ++row, ++globalRow) {
385 | actualIds.add(idValues.get(row));
386 | actualText.add(new String(textValues.get(row), StandardCharsets.UTF_8));
387 | }
388 | }
389 | assertArrayEquals(expectedIds, actualIds.toArray(new Long[0]));
390 | assertArrayEquals(expectedText, actualText.toArray(new String[0]));
391 | }
392 | }
393 | }
394 | }
395 | }
396 |
--------------------------------------------------------------------------------
/datafusion-java/src/test/java/org/apache/arrow/datafusion/TestQuery.java:
--------------------------------------------------------------------------------
1 | package org.apache.arrow.datafusion;
2 |
3 | import static org.junit.jupiter.api.Assertions.assertEquals;
4 | import static org.junit.jupiter.api.Assertions.assertFalse;
5 | import static org.junit.jupiter.api.Assertions.assertThrows;
6 | import static org.junit.jupiter.api.Assertions.assertTrue;
7 |
8 | import java.nio.file.Files;
9 | import java.nio.file.Path;
10 | import java.util.Arrays;
11 | import java.util.List;
12 | import org.apache.arrow.memory.BufferAllocator;
13 | import org.apache.arrow.memory.RootAllocator;
14 | import org.apache.arrow.vector.BigIntVector;
15 | import org.apache.arrow.vector.VectorSchemaRoot;
16 | import org.apache.arrow.vector.ipc.ArrowReader;
17 | import org.junit.jupiter.api.Test;
18 | import org.junit.jupiter.api.io.TempDir;
19 |
20 | public class TestQuery {
21 |
22 | @Test
23 | public void testQueryInMemoryTable() throws Exception {
24 | try (SessionContext context = SessionContexts.create();
25 | BufferAllocator allocator = new RootAllocator()) {
26 | DataFrame df = context.sql("SELECT * FROM (VALUES (1, 2), (3, 4)) AS t (x, y)").join();
27 | assertFalse(
28 | context.registerTable("test", df.intoView()).isPresent(),
29 | "there should not be any duplicates");
30 | testQuery(context, allocator);
31 | assertThrows(
32 | Exception.class,
33 | () -> context.registerTable("test", df.intoView()),
34 | "now there should be duplicates");
35 | }
36 | }
37 |
38 | @Test
39 | public void testQueryCsv(@TempDir Path tempDir) throws Exception {
40 | try (SessionContext context = SessionContexts.create();
41 | BufferAllocator allocator = new RootAllocator()) {
42 | Path csvFilePath = tempDir.resolve("data.csv");
43 |
44 | List lines = Arrays.asList("x,y", "1,2", "3,4");
45 | Files.write(csvFilePath, lines);
46 |
47 | context.registerCsv("test", csvFilePath).join();
48 | testQuery(context, allocator);
49 | }
50 | }
51 |
52 | @Test
53 | public void testQueryParquet(@TempDir Path tempDir) throws Exception {
54 | try (SessionContext context = SessionContexts.create();
55 | BufferAllocator allocator = new RootAllocator()) {
56 | Path parquetFilePath = tempDir.resolve("data.parquet");
57 |
58 | String schema =
59 | "{\"namespace\": \"org.example\","
60 | + "\"type\": \"record\","
61 | + "\"name\": \"record_name\","
62 | + "\"fields\": ["
63 | + " {\"name\": \"x\", \"type\": \"long\"},"
64 | + " {\"name\": \"y\", \"type\": \"long\"}"
65 | + " ]}";
66 |
67 | ParquetWriter.writeParquet(
68 | parquetFilePath,
69 | schema,
70 | 2,
71 | (i, record) -> {
72 | record.put("x", i * 2 + 1);
73 | record.put("y", i * 2 + 2);
74 | });
75 |
76 | context.registerParquet("test", parquetFilePath).join();
77 | testQuery(context, allocator);
78 | }
79 | }
80 |
81 | @Test
82 | public void testInvalidQuery() throws Exception {
83 | try (SessionContext context = SessionContexts.create()) {
84 | assertThrows(
85 | RuntimeException.class,
86 | () -> context.sql("SELECT z FROM (VALUES (1, 2), (3, 4)) AS t (x, y)").join(),
87 | "invalid column name in query should raise an error");
88 | }
89 | }
90 |
91 | private static void testQuery(SessionContext context, BufferAllocator allocator)
92 | throws Exception {
93 | try (ArrowReader reader =
94 | context
95 | .sql("SELECT y FROM test WHERE x = 3")
96 | .thenComposeAsync(df -> df.collect(allocator))
97 | .join()) {
98 |
99 | VectorSchemaRoot root = reader.getVectorSchemaRoot();
100 | assertTrue(reader.loadNextBatch());
101 |
102 | assertEquals(1, root.getRowCount());
103 | BigIntVector yValues = (BigIntVector) root.getVector(0);
104 | assertEquals(4, yValues.get(0));
105 |
106 | assertFalse(reader.loadNextBatch());
107 | }
108 | }
109 | }
110 |
--------------------------------------------------------------------------------
/datafusion-java/src/test/java/org/apache/arrow/datafusion/TestSessionConfig.java:
--------------------------------------------------------------------------------
1 | package org.apache.arrow.datafusion;
2 |
3 | import static org.junit.jupiter.api.Assertions.*;
4 |
5 | import java.nio.file.Path;
6 | import java.util.Optional;
7 | import org.apache.arrow.memory.BufferAllocator;
8 | import org.apache.arrow.memory.RootAllocator;
9 | import org.apache.arrow.vector.VectorSchemaRoot;
10 | import org.junit.jupiter.api.Test;
11 | import org.junit.jupiter.api.io.TempDir;
12 |
13 | public class TestSessionConfig {
14 | @Test
15 | public void testRegisterInvalidCsvPath(@TempDir Path tempDir) throws Exception {
16 | try (SessionContext context = SessionContexts.create()) {
17 | Path filePath = tempDir.resolve("non-existent.csv");
18 | assertThrows(
19 | RuntimeException.class,
20 | () -> context.registerCsv("test", filePath).join(),
21 | "Expected an exception to be raised from an IO error");
22 | }
23 | }
24 |
25 | @Test
26 | public void testRegisterInvalidParquetPath(@TempDir Path tempDir) throws Exception {
27 | try (SessionContext context = SessionContexts.create()) {
28 | Path filePath = tempDir.resolve("non-existent.parquet");
29 | assertThrows(
30 | RuntimeException.class,
31 | () -> context.registerParquet("test", filePath).join(),
32 | "Expected an exception to be raised from an IO error");
33 | }
34 | }
35 |
36 | @Test
37 | public void testCreateSessionWithConfig() throws Exception {
38 | try (SessionContext context =
39 | SessionContexts.withConfig(
40 | (c) -> c.executionOptions().parquet().withEnablePageIndex(true))) {
41 | // Only testing we can successfully create a session context with the config
42 | }
43 | }
44 |
45 | @Test
46 | public void testParquetOptions() throws Exception {
47 | try (SessionConfig config = new SessionConfig()) {
48 | ParquetOptions parquetOptions = config.executionOptions().parquet();
49 |
50 | assertTrue(parquetOptions.enablePageIndex());
51 | parquetOptions.withEnablePageIndex(false);
52 | assertFalse(parquetOptions.enablePageIndex());
53 |
54 | assertTrue(parquetOptions.pruning());
55 | parquetOptions.withPruning(false);
56 | assertFalse(parquetOptions.pruning());
57 |
58 | assertTrue(parquetOptions.skipMetadata());
59 | parquetOptions.withSkipMetadata(false);
60 | assertFalse(parquetOptions.skipMetadata());
61 |
62 | assertFalse(parquetOptions.metadataSizeHint().isPresent());
63 | parquetOptions.withMetadataSizeHint(Optional.of(123L));
64 | Optional sizeHint = parquetOptions.metadataSizeHint();
65 | assertTrue(sizeHint.isPresent());
66 | assertEquals(123L, sizeHint.get());
67 | parquetOptions.withMetadataSizeHint(Optional.empty());
68 | assertFalse(parquetOptions.metadataSizeHint().isPresent());
69 |
70 | assertFalse(parquetOptions.pushdownFilters());
71 | parquetOptions.withPushdownFilters(true);
72 | assertTrue(parquetOptions.pushdownFilters());
73 |
74 | assertFalse(parquetOptions.reorderFilters());
75 | parquetOptions.withReorderFilters(true);
76 | assertTrue(parquetOptions.reorderFilters());
77 | }
78 | }
79 |
80 | @Test
81 | public void testSqlParserOptions() throws Exception {
82 | try (SessionConfig config = new SessionConfig()) {
83 | SqlParserOptions sqlParserOptions = config.sqlParserOptions();
84 |
85 | assertFalse(sqlParserOptions.parseFloatAsDecimal());
86 | sqlParserOptions.withParseFloatAsDecimal(true);
87 | assertTrue(sqlParserOptions.parseFloatAsDecimal());
88 |
89 | assertTrue(sqlParserOptions.enableIdentNormalization());
90 | sqlParserOptions.withEnableIdentNormalization(false);
91 | assertFalse(sqlParserOptions.enableIdentNormalization());
92 |
93 | assertEquals("generic", sqlParserOptions.dialect());
94 | sqlParserOptions.withDialect("PostgreSQL");
95 | assertEquals("PostgreSQL", sqlParserOptions.dialect());
96 | }
97 | }
98 |
99 | @Test
100 | public void testExecutionOptions() throws Exception {
101 | try (SessionConfig config = new SessionConfig()) {
102 | ExecutionOptions executionOptions = config.executionOptions();
103 |
104 | assertEquals(8192, executionOptions.batchSize());
105 | executionOptions.withBatchSize(1024);
106 | assertEquals(1024, executionOptions.batchSize());
107 |
108 | assertTrue(executionOptions.coalesceBatches());
109 | executionOptions.withCoalesceBatches(false);
110 | assertFalse(executionOptions.coalesceBatches());
111 |
112 | assertFalse(executionOptions.collectStatistics());
113 | executionOptions.withCollectStatistics(true);
114 | assertTrue(executionOptions.collectStatistics());
115 |
116 | long targetPartitions = executionOptions.targetPartitions();
117 | assertTrue(targetPartitions > 0);
118 | executionOptions.withTargetPartitions(targetPartitions * 2);
119 | assertEquals(targetPartitions * 2, executionOptions.targetPartitions());
120 | }
121 | }
122 |
123 | @Test
124 | public void testBatchSize(@TempDir Path tempDir) throws Exception {
125 | long rowCount = 1024;
126 | long batchSize = 64;
127 | try (SessionContext context =
128 | SessionContexts.withConfig((conf) -> conf.executionOptions().withBatchSize(batchSize));
129 | BufferAllocator allocator = new RootAllocator()) {
130 | Path parquetFilePath = tempDir.resolve("data.parquet");
131 |
132 | String parquetSchema =
133 | "{\"namespace\": \"org.example\","
134 | + "\"type\": \"record\","
135 | + "\"name\": \"record_name\","
136 | + "\"fields\": ["
137 | + " {\"name\": \"x\", \"type\": \"long\"}"
138 | + " ]}";
139 |
140 | ParquetWriter.writeParquet(
141 | parquetFilePath,
142 | parquetSchema,
143 | 1024,
144 | (i, record) -> {
145 | record.put("x", i);
146 | });
147 |
148 | context.registerParquet("test", parquetFilePath).join();
149 |
150 | try (RecordBatchStream stream =
151 | context
152 | .sql("SELECT * FROM test")
153 | .thenComposeAsync(df -> df.executeStream(allocator))
154 | .join()) {
155 | VectorSchemaRoot root = stream.getVectorSchemaRoot();
156 |
157 | long rowsReceived = 0;
158 | while (stream.loadNextBatch().join()) {
159 | assertTrue(root.getRowCount() <= batchSize);
160 | rowsReceived += root.getRowCount();
161 | }
162 |
163 | assertEquals(rowCount, rowsReceived);
164 | }
165 | }
166 | }
167 | }
168 |
--------------------------------------------------------------------------------
/datafusion-java/src/test/resources/dictionary_data.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datafusion-contrib/datafusion-java/c5dee4178ff9187de4cc7f5eb2e9ddd471223444/datafusion-java/src/test/resources/dictionary_data.parquet
--------------------------------------------------------------------------------
/datafusion-java/write_test_files.py:
--------------------------------------------------------------------------------
1 | import pyarrow as pa
2 | import pyarrow.parquet as pq
3 |
4 |
5 | num_rows = 100
6 |
7 | dict_array_x = pa.DictionaryArray.from_arrays(
8 | pa.array([i % 3 for i in range(num_rows)]), pa.array(["one", "two", "three"])
9 | )
10 |
11 | dict_array_y = pa.DictionaryArray.from_arrays(
12 | pa.array([i % 3 for i in range(num_rows)]), pa.array(["four", "five", "six"])
13 | )
14 |
15 | table = pa.Table.from_arrays([dict_array_x, dict_array_y], ["x", "y"])
16 | pq.write_table(table, "src/test/resources/dictionary_data.parquet")
17 |
--------------------------------------------------------------------------------
/datafusion-jni/.gitignore:
--------------------------------------------------------------------------------
1 | # Created by https://www.toptal.com/developers/gitignore/api/rust
2 | # Edit at https://www.toptal.com/developers/gitignore?templates=rust
3 |
4 | ### Rust ###
5 | # Generated by Cargo
6 | # will have compiled files and executables
7 | debug/
8 | target/
9 |
10 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
11 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
12 | Cargo.lock
13 |
14 | # These are backup files generated by rustfmt
15 | **/*.rs.bk
16 |
17 | # MSVC Windows builds of rustc generate these, which store debugging information
18 | *.pdb
19 |
20 | # End of https://www.toptal.com/developers/gitignore/api/rust
21 |
--------------------------------------------------------------------------------
/datafusion-jni/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "datafusion_jni"
3 | version = "0.16.0"
4 | homepage = "https://github.com/apache/datafusion"
5 | repository = "https://github.com/apache/datafusion"
6 | authors = ["Apache Arrow "]
7 | description = "Build and run queries against data"
8 | readme = "README.md"
9 | license = "Apache-2.0"
10 | edition = "2021"
11 |
12 | [dependencies]
13 | jni = "^0.21.0"
14 | tokio = "^1.32.0"
15 | arrow = { version = "^39.0", features = ["ffi", "ipc_compression"] }
16 | datafusion = "^25.0"
17 | futures = "0.3.28"
18 |
19 | [lib]
20 | crate_type = ["cdylib"]
21 |
22 | [profile.release]
23 | lto = true
24 | strip = true
25 |
--------------------------------------------------------------------------------
/datafusion-jni/src/context.rs:
--------------------------------------------------------------------------------
1 | use datafusion::datasource::TableProvider;
2 | use datafusion::execution::context::SessionContext;
3 | use datafusion::prelude::{CsvReadOptions, ParquetReadOptions, SessionConfig};
4 | use jni::objects::{JClass, JObject, JString};
5 | use jni::sys::jlong;
6 | use jni::JNIEnv;
7 | use std::sync::Arc;
8 | use tokio::runtime::Runtime;
9 |
10 | use crate::util::{set_error_message, set_object_result};
11 |
12 | #[no_mangle]
13 | pub extern "system" fn Java_org_apache_arrow_datafusion_DefaultSessionContext_registerCsv(
14 | mut env: JNIEnv,
15 | _class: JClass,
16 | runtime: jlong,
17 | pointer: jlong,
18 | name: JString,
19 | path: JString,
20 | callback: JObject,
21 | ) {
22 | let runtime = unsafe { &mut *(runtime as *mut Runtime) };
23 | let name: String = env
24 | .get_string(&name)
25 | .expect("Couldn't get name as string!")
26 | .into();
27 | let path: String = env
28 | .get_string(&path)
29 | .expect("Couldn't get path as string!")
30 | .into();
31 | let context = unsafe { &mut *(pointer as *mut SessionContext) };
32 | runtime.block_on(async {
33 | let register_result = context
34 | .register_csv(&name, &path, CsvReadOptions::new())
35 | .await;
36 | set_error_message(&mut env, callback, register_result);
37 | });
38 | }
39 |
40 | #[no_mangle]
41 | pub extern "system" fn Java_org_apache_arrow_datafusion_DefaultSessionContext_registerTable(
42 | mut env: JNIEnv,
43 | _class: JClass,
44 | pointer: jlong,
45 | name: JString,
46 | table_provider: jlong,
47 | ) -> jlong {
48 | let name: String = env
49 | .get_string(&name)
50 | .expect("Couldn't get name as string!")
51 | .into();
52 | let context = unsafe { &mut *(pointer as *mut SessionContext) };
53 | let table_provider = unsafe { &*(table_provider as *const Arc) };
54 | let result = context.register_table(&name, table_provider.clone());
55 | match result {
56 | // TODO this is to be fixed on datafusion side as duplicates will not be returned
57 | // and instead returned as err
58 | Ok(Some(v)) => Box::into_raw(Box::new(v)) as jlong,
59 | Ok(None) => 0,
60 | Err(err) => {
61 | env.throw_new("java/lang/Exception", err.to_string())
62 | .unwrap();
63 | 0
64 | }
65 | }
66 | }
67 |
68 | #[no_mangle]
69 | pub extern "system" fn Java_org_apache_arrow_datafusion_DefaultSessionContext_registerParquet(
70 | mut env: JNIEnv,
71 | _class: JClass,
72 | runtime: jlong,
73 | pointer: jlong,
74 | name: JString,
75 | path: JString,
76 | callback: JObject,
77 | ) {
78 | let runtime = unsafe { &mut *(runtime as *mut Runtime) };
79 | let name: String = env
80 | .get_string(&name)
81 | .expect("Couldn't get name as string!")
82 | .into();
83 | let path: String = env
84 | .get_string(&path)
85 | .expect("Couldn't get path as string!")
86 | .into();
87 | let context = unsafe { &mut *(pointer as *mut SessionContext) };
88 | runtime.block_on(async {
89 | let register_result = context
90 | .register_parquet(&name, &path, ParquetReadOptions::default())
91 | .await;
92 | set_error_message(&mut env, callback, register_result);
93 | });
94 | }
95 |
96 | #[no_mangle]
97 | pub extern "system" fn Java_org_apache_arrow_datafusion_DefaultSessionContext_querySql(
98 | mut env: JNIEnv,
99 | _class: JClass,
100 | runtime: jlong,
101 | pointer: jlong,
102 | sql: JString,
103 | callback: JObject,
104 | ) {
105 | let runtime = unsafe { &mut *(runtime as *mut Runtime) };
106 | let sql: String = env
107 | .get_string(&sql)
108 | .expect("Couldn't get sql as string!")
109 | .into();
110 | let context = unsafe { &mut *(pointer as *mut SessionContext) };
111 | runtime.block_on(async {
112 | let query_result = context.sql(&sql).await;
113 | set_object_result(
114 | &mut env,
115 | callback,
116 | query_result.map(|df| Box::into_raw(Box::new(df))),
117 | );
118 | });
119 | }
120 | #[no_mangle]
121 | pub extern "system" fn Java_org_apache_arrow_datafusion_SessionContexts_destroySessionContext(
122 | _env: JNIEnv,
123 | _class: JClass,
124 | pointer: jlong,
125 | ) {
126 | let _ = unsafe { Box::from_raw(pointer as *mut SessionContext) };
127 | }
128 |
129 | #[no_mangle]
130 | pub extern "system" fn Java_org_apache_arrow_datafusion_SessionContexts_createSessionContext(
131 | _env: JNIEnv,
132 | _class: JClass,
133 | ) -> jlong {
134 | let context = SessionContext::new();
135 | Box::into_raw(Box::new(context)) as jlong
136 | }
137 |
138 | #[no_mangle]
139 | pub extern "system" fn Java_org_apache_arrow_datafusion_SessionContexts_createSessionContextWithConfig(
140 | _env: JNIEnv,
141 | _class: JClass,
142 | config: jlong,
143 | ) -> jlong {
144 | let config = unsafe { &*(config as *const SessionConfig) };
145 | let context = SessionContext::with_config(config.clone());
146 | Box::into_raw(Box::new(context)) as jlong
147 | }
148 |
--------------------------------------------------------------------------------
/datafusion-jni/src/dataframe.rs:
--------------------------------------------------------------------------------
1 | use arrow::ipc::writer::FileWriter;
2 | use datafusion::dataframe::DataFrame;
3 | use jni::objects::{JClass, JObject, JString};
4 | use jni::sys::jlong;
5 | use jni::JNIEnv;
6 | use std::convert::Into;
7 | use std::io::BufWriter;
8 | use std::io::Cursor;
9 | use tokio::runtime::Runtime;
10 |
11 | use crate::util::{set_error_message, set_object_result};
12 |
13 | #[no_mangle]
14 | pub extern "system" fn Java_org_apache_arrow_datafusion_DataFrames_collectDataframe(
15 | mut env: JNIEnv,
16 | _class: JClass,
17 | runtime: jlong,
18 | dataframe: jlong,
19 | callback: JObject,
20 | ) {
21 | let runtime = unsafe { &mut *(runtime as *mut Runtime) };
22 | let dataframe = unsafe { &mut *(dataframe as *mut DataFrame) };
23 | let schema = dataframe.schema().into();
24 | runtime.block_on(async {
25 | let batches = dataframe
26 | .clone()
27 | .collect()
28 | .await
29 | .expect("failed to collect dataframe");
30 | let mut buff = Cursor::new(vec![0; 0]);
31 | {
32 | let mut writer = FileWriter::try_new(BufWriter::new(&mut buff), &schema)
33 | .expect("failed to create writer");
34 | for batch in batches {
35 | writer.write(&batch).expect("failed to write batch");
36 | }
37 | writer.finish().expect("failed to finish");
38 | }
39 | let err_message = env
40 | .new_string("".to_string())
41 | .expect("Couldn't create java string!");
42 | let ba = env
43 | .byte_array_from_slice(buff.get_ref())
44 | .expect("cannot create empty byte array");
45 | env.call_method(
46 | callback,
47 | "accept",
48 | "(Ljava/lang/Object;Ljava/lang/Object;)V",
49 | &[(&err_message).into(), (&ba).into()],
50 | )
51 | .expect("failed to call method");
52 | });
53 | }
54 |
55 | #[no_mangle]
56 | pub extern "system" fn Java_org_apache_arrow_datafusion_DataFrames_executeStream(
57 | mut env: JNIEnv,
58 | _class: JClass,
59 | runtime: jlong,
60 | dataframe: jlong,
61 | callback: JObject,
62 | ) {
63 | let runtime = unsafe { &mut *(runtime as *mut Runtime) };
64 | let dataframe = unsafe { &mut *(dataframe as *mut DataFrame) };
65 | runtime.block_on(async {
66 | let stream_result = dataframe.clone().execute_stream().await;
67 | set_object_result(
68 | &mut env,
69 | callback,
70 | stream_result.map(|stream| Box::into_raw(Box::new(stream))),
71 | );
72 | });
73 | }
74 |
75 | #[no_mangle]
76 | pub extern "system" fn Java_org_apache_arrow_datafusion_DataFrames_showDataframe(
77 | mut env: JNIEnv,
78 | _class: JClass,
79 | runtime: jlong,
80 | dataframe: jlong,
81 | callback: JObject,
82 | ) {
83 | let runtime = unsafe { &mut *(runtime as *mut Runtime) };
84 | let dataframe = unsafe { &*(dataframe as *const DataFrame) };
85 | runtime.block_on(async {
86 | let r = dataframe.clone().show().await;
87 | set_error_message(&mut env, callback, r);
88 | });
89 | }
90 |
91 | #[no_mangle]
92 | pub extern "system" fn Java_org_apache_arrow_datafusion_DataFrames_intoView(
93 | _env: JNIEnv,
94 | _class: JClass,
95 | dataframe: jlong,
96 | ) -> jlong {
97 | let dataframe = unsafe { &*(dataframe as *const DataFrame) };
98 | let provider = dataframe.clone().into_view();
99 | Box::into_raw(Box::new(provider)) as jlong
100 | }
101 |
102 | #[no_mangle]
103 | pub extern "system" fn Java_org_apache_arrow_datafusion_DataFrames_writeParquet(
104 | mut env: JNIEnv,
105 | _class: JClass,
106 | runtime: jlong,
107 | dataframe: jlong,
108 | path: JString,
109 | callback: JObject,
110 | ) {
111 | let runtime = unsafe { &mut *(runtime as *mut Runtime) };
112 | let dataframe = unsafe { &*(dataframe as *const DataFrame) };
113 | let path: String = env
114 | .get_string(&path)
115 | .expect("Couldn't get path as string!")
116 | .into();
117 | runtime.block_on(async {
118 | let r = dataframe.clone().write_parquet(&path, None).await;
119 | set_error_message(&mut env, callback, r);
120 | });
121 | }
122 |
123 | #[no_mangle]
124 | pub extern "system" fn Java_org_apache_arrow_datafusion_DataFrames_writeCsv(
125 | mut env: JNIEnv,
126 | _class: JClass,
127 | runtime: jlong,
128 | dataframe: jlong,
129 | path: JString,
130 | callback: JObject,
131 | ) {
132 | let runtime = unsafe { &mut *(runtime as *mut Runtime) };
133 | let dataframe = unsafe { &*(dataframe as *const DataFrame) };
134 | let path: String = env
135 | .get_string(&path)
136 | .expect("Couldn't get path as string!")
137 | .into();
138 | runtime.block_on(async {
139 | let r = dataframe.clone().write_csv(&path).await;
140 | set_error_message(&mut env, callback, r);
141 | });
142 | }
143 |
144 | #[no_mangle]
145 | pub extern "system" fn Java_org_apache_arrow_datafusion_DataFrames_destroyDataFrame(
146 | _env: JNIEnv,
147 | _class: JClass,
148 | pointer: jlong,
149 | ) {
150 | let _ = unsafe { Box::from_raw(pointer as *mut DataFrame) };
151 | }
152 |
--------------------------------------------------------------------------------
/datafusion-jni/src/file_formats.rs:
--------------------------------------------------------------------------------
1 | use datafusion::datasource::file_format::arrow::ArrowFormat;
2 | use datafusion::datasource::file_format::csv::CsvFormat;
3 | use datafusion::datasource::file_format::parquet::ParquetFormat;
4 | use datafusion::datasource::file_format::FileFormat;
5 | use jni::objects::JClass;
6 | use jni::sys::jlong;
7 | use jni::JNIEnv;
8 | use std::sync::Arc;
9 |
10 | #[no_mangle]
11 | pub extern "system" fn Java_org_apache_arrow_datafusion_FileFormats_createCsv(
12 | _env: JNIEnv,
13 | _class: JClass,
14 | ) -> jlong {
15 | // Return as an Arc rather than CsvFormat so this
16 | // can be passed into ListingOptions.create
17 | let format: Arc = Arc::new(CsvFormat::default());
18 | Box::into_raw(Box::new(format)) as jlong
19 | }
20 |
21 | #[no_mangle]
22 | pub extern "system" fn Java_org_apache_arrow_datafusion_FileFormats_createParquet(
23 | _env: JNIEnv,
24 | _class: JClass,
25 | ) -> jlong {
26 | // Return as an Arc rather than ParquetFormat so this
27 | // can be passed into ListingOptions.create
28 | let format: Arc = Arc::new(ParquetFormat::default());
29 | Box::into_raw(Box::new(format)) as jlong
30 | }
31 |
32 | #[no_mangle]
33 | pub extern "system" fn Java_org_apache_arrow_datafusion_FileFormats_createArrow(
34 | _env: JNIEnv,
35 | _class: JClass,
36 | ) -> jlong {
37 | let format: Arc = Arc::new(ArrowFormat::default());
38 | Box::into_raw(Box::new(format)) as jlong
39 | }
40 |
41 | #[no_mangle]
42 | pub extern "system" fn Java_org_apache_arrow_datafusion_FileFormats_destroyFileFormat(
43 | _env: JNIEnv,
44 | _class: JClass,
45 | pointer: jlong,
46 | ) {
47 | let _ = unsafe { Box::from_raw(pointer as *mut Arc) };
48 | }
49 |
--------------------------------------------------------------------------------
/datafusion-jni/src/lib.rs:
--------------------------------------------------------------------------------
1 | mod context;
2 | mod dataframe;
3 | mod file_formats;
4 | mod listing_options;
5 | mod listing_table;
6 | mod listing_table_config;
7 | mod runtime;
8 | mod session_config;
9 | mod stream;
10 | mod table_provider;
11 | mod util;
12 |
--------------------------------------------------------------------------------
/datafusion-jni/src/listing_options.rs:
--------------------------------------------------------------------------------
1 | use datafusion::datasource::file_format::FileFormat;
2 | use datafusion::datasource::listing::ListingOptions;
3 | use jni::objects::{JClass, JString};
4 | use jni::sys::{jboolean, jlong};
5 | use jni::JNIEnv;
6 | use std::sync::Arc;
7 |
8 | #[no_mangle]
9 | pub extern "system" fn Java_org_apache_arrow_datafusion_ListingOptions_create(
10 | mut env: JNIEnv,
11 | _class: JClass,
12 | format: jlong,
13 | file_extension: JString,
14 | collect_stat: jboolean,
15 | ) -> jlong {
16 | let format = unsafe { &*(format as *const Arc) };
17 |
18 | let file_extension: String = env
19 | .get_string(&file_extension)
20 | .expect("Couldn't get Java file_extension string")
21 | .into();
22 |
23 | let listing_options = ListingOptions::new(format.clone())
24 | .with_file_extension(file_extension)
25 | .with_collect_stat(collect_stat == 1u8);
26 | Box::into_raw(Box::new(listing_options)) as jlong
27 | }
28 |
29 | #[no_mangle]
30 | pub extern "system" fn Java_org_apache_arrow_datafusion_ListingOptions_destroy(
31 | _env: JNIEnv,
32 | _class: JClass,
33 | pointer: jlong,
34 | ) {
35 | let _ = unsafe { Box::from_raw(pointer as *mut ListingOptions) };
36 | }
37 |
--------------------------------------------------------------------------------
/datafusion-jni/src/listing_table.rs:
--------------------------------------------------------------------------------
1 | use datafusion::datasource::listing::{ListingTable, ListingTableConfig};
2 | use datafusion::datasource::TableProvider;
3 | use jni::objects::{JClass, JObject};
4 | use jni::sys::jlong;
5 | use jni::JNIEnv;
6 | use std::sync::Arc;
7 |
8 | use crate::util::set_object_result;
9 |
10 | #[no_mangle]
11 | pub extern "system" fn Java_org_apache_arrow_datafusion_ListingTable_create(
12 | mut env: JNIEnv,
13 | _class: JClass,
14 | table_config: jlong,
15 | object_result: JObject,
16 | ) {
17 | let table_config = unsafe { &*(table_config as *const ListingTableConfig) };
18 | // Clone table config as it will be moved into ListingTable
19 | let table_config = ListingTableConfig {
20 | table_paths: table_config.table_paths.clone(),
21 | file_schema: table_config.file_schema.clone(),
22 | options: table_config.options.clone(),
23 | };
24 | let table_provider_result = ListingTable::try_new(table_config).map(|listing_table| {
25 | // Return as an Arc rather than ListingTable so this
26 | // can be passed into SessionContext.registerTable
27 | let table_provider: Arc = Arc::new(listing_table);
28 | Box::into_raw(Box::new(table_provider))
29 | });
30 | set_object_result(&mut env, object_result, table_provider_result);
31 | }
32 |
--------------------------------------------------------------------------------
/datafusion-jni/src/listing_table_config.rs:
--------------------------------------------------------------------------------
1 | use datafusion::datasource::listing::{ListingOptions, ListingTableConfig, ListingTableUrl};
2 | use datafusion::execution::context::SessionContext;
3 | use jni::objects::{JClass, JObject, JObjectArray, JString};
4 | use jni::sys::jlong;
5 | use jni::JNIEnv;
6 | use tokio::runtime::Runtime;
7 |
8 | use crate::util::{set_object_result, set_object_result_error};
9 |
10 | #[no_mangle]
11 | pub extern "system" fn Java_org_apache_arrow_datafusion_ListingTableConfig_create(
12 | mut env: JNIEnv,
13 | _class: JClass,
14 | runtime: jlong,
15 | context: jlong,
16 | table_paths: JObjectArray,
17 | listing_options: jlong,
18 | callback: JObject,
19 | ) {
20 | let runtime = unsafe { &*(runtime as *const Runtime) };
21 | let context = unsafe { &*(context as *const SessionContext) };
22 |
23 | let mut table_urls: Vec = Vec::new();
24 | let table_paths_length = env
25 | .get_array_length(&table_paths)
26 | .expect("Couldn't get array length of table_paths");
27 | for i in 0..table_paths_length {
28 | let table_path_str: JString = env
29 | .get_object_array_element(&table_paths, i)
30 | .expect("Couldn't get array string element")
31 | .into();
32 | let table_path: String = env
33 | .get_string(&table_path_str)
34 | .expect("Couldn't get native string source")
35 | .into();
36 | let table_url = ListingTableUrl::parse(table_path);
37 | let table_url = match table_url {
38 | Ok(url) => url,
39 | Err(err) => {
40 | set_object_result_error(&mut env, callback, &err);
41 | return;
42 | }
43 | };
44 | table_urls.push(table_url);
45 | }
46 |
47 | runtime.block_on(async {
48 | let listing_table_config = ListingTableConfig::new_with_multi_paths(table_urls);
49 |
50 | let listing_table_config = match listing_options {
51 | 0 => listing_table_config,
52 | listing_options => {
53 | let listing_options = unsafe { &*(listing_options as *const ListingOptions) };
54 | listing_table_config.with_listing_options(listing_options.clone())
55 | }
56 | };
57 |
58 | let session_state = context.state();
59 | let config_result = listing_table_config.infer_schema(&session_state).await;
60 | set_object_result(
61 | &mut env,
62 | callback,
63 | config_result.map(|config| Box::into_raw(Box::new(config))),
64 | );
65 | });
66 | }
67 |
68 | #[no_mangle]
69 | pub extern "system" fn Java_org_apache_arrow_datafusion_ListingTableConfig_destroy(
70 | _env: JNIEnv,
71 | _class: JClass,
72 | pointer: jlong,
73 | ) {
74 | let _ = unsafe { Box::from_raw(pointer as *mut ListingTableConfig) };
75 | }
76 |
--------------------------------------------------------------------------------
/datafusion-jni/src/runtime.rs:
--------------------------------------------------------------------------------
1 | use jni::objects::JClass;
2 | use jni::sys::jlong;
3 | use jni::JNIEnv;
4 | use std::time::Duration;
5 | use tokio::runtime::Runtime;
6 |
7 | #[no_mangle]
8 | pub extern "system" fn Java_org_apache_arrow_datafusion_TokioRuntime_createTokioRuntime(
9 | _env: JNIEnv,
10 | _class: JClass,
11 | ) -> jlong {
12 | if let Ok(runtime) = Runtime::new() {
13 | // println!("successfully created tokio runtime");
14 | Box::into_raw(Box::new(runtime)) as jlong
15 | } else {
16 | // TODO error handling
17 | -1
18 | }
19 | }
20 | #[no_mangle]
21 | pub extern "system" fn Java_org_apache_arrow_datafusion_TokioRuntime_destroyTokioRuntime(
22 | _env: JNIEnv,
23 | _class: JClass,
24 | pointer: jlong,
25 | ) {
26 | let runtime = unsafe { Box::from_raw(pointer as *mut Runtime) };
27 | runtime.shutdown_timeout(Duration::from_millis(100));
28 | // println!("successfully shutdown tokio runtime");
29 | }
30 |
--------------------------------------------------------------------------------
/datafusion-jni/src/session_config.rs:
--------------------------------------------------------------------------------
1 | use datafusion::execution::context::SessionConfig;
2 | use jni::objects::{JClass, JString};
3 | use jni::sys::{jboolean, jlong};
4 | use jni::JNIEnv;
5 |
6 | #[no_mangle]
7 | pub extern "system" fn Java_org_apache_arrow_datafusion_SessionConfig_create(
8 | _env: JNIEnv,
9 | _class: JClass,
10 | ) -> jlong {
11 | let session_config = Box::new(SessionConfig::new());
12 | Box::into_raw(session_config) as jlong
13 | }
14 |
15 | #[no_mangle]
16 | pub extern "system" fn Java_org_apache_arrow_datafusion_SessionConfig_destroy(
17 | _env: JNIEnv,
18 | _class: JClass,
19 | pointer: jlong,
20 | ) {
21 | let _ = unsafe { Box::from_raw(pointer as *mut SessionConfig) };
22 | }
23 |
24 | // Helper macros to implement boolean options
25 |
26 | macro_rules! bool_getter {
27 | ($name:ident, $($property_path:ident).+) => {
28 | #[no_mangle]
29 | pub extern "system" fn $name(
30 | _env: JNIEnv,
31 | _class: JClass,
32 | pointer: jlong,
33 | ) -> jboolean {
34 | let config = unsafe { &*(pointer as *const SessionConfig) };
35 | let property_value = config.options().$($property_path).+;
36 | if property_value {
37 | 1u8
38 | } else {
39 | 0u8
40 | }
41 | }
42 | }
43 | }
44 |
45 | macro_rules! bool_setter {
46 | ($name:ident, $($property_path:ident).+) => {
47 | #[no_mangle]
48 | pub extern "system" fn $name(
49 | _env: JNIEnv,
50 | _class: JClass,
51 | pointer: jlong,
52 | enabled: jboolean,
53 | ) {
54 | let config = unsafe { &mut *(pointer as *mut SessionConfig) };
55 | config.options_mut().$($property_path).+ = enabled != 0u8;
56 | }
57 | }
58 | }
59 |
60 | macro_rules! usize_getter {
61 | ($name:ident, $($property_path:ident).+) => {
62 | #[no_mangle]
63 | pub extern "system" fn $name(
64 | _env: JNIEnv,
65 | _class: JClass,
66 | pointer: jlong,
67 | ) -> jlong {
68 | let config = unsafe { &*(pointer as *const SessionConfig) };
69 | let property_value = config.options().$($property_path).+;
70 | property_value as jlong
71 | }
72 | }
73 | }
74 |
75 | macro_rules! usize_setter {
76 | ($name:ident, $($property_path:ident).+) => {
77 | #[no_mangle]
78 | pub extern "system" fn $name(
79 | _env: JNIEnv,
80 | _class: JClass,
81 | pointer: jlong,
82 | value: jlong,
83 | ) {
84 | let config = unsafe { &mut *(pointer as *mut SessionConfig) };
85 | config.options_mut().$($property_path).+ = value as usize;
86 | }
87 | }
88 | }
89 |
90 | // ExecutionOptions
91 |
92 | usize_getter!(
93 | Java_org_apache_arrow_datafusion_SessionConfig_getExecutionOptionsBatchSize,
94 | execution.batch_size
95 | );
96 | usize_setter!(
97 | Java_org_apache_arrow_datafusion_SessionConfig_setExecutionOptionsBatchSize,
98 | execution.batch_size
99 | );
100 |
101 | bool_getter!(
102 | Java_org_apache_arrow_datafusion_SessionConfig_getExecutionOptionsCoalesceBatches,
103 | execution.coalesce_batches
104 | );
105 | bool_setter!(
106 | Java_org_apache_arrow_datafusion_SessionConfig_setExecutionOptionsCoalesceBatches,
107 | execution.coalesce_batches
108 | );
109 |
110 | bool_getter!(
111 | Java_org_apache_arrow_datafusion_SessionConfig_getExecutionOptionsCollectStatistics,
112 | execution.collect_statistics
113 | );
114 | bool_setter!(
115 | Java_org_apache_arrow_datafusion_SessionConfig_setExecutionOptionsCollectStatistics,
116 | execution.collect_statistics
117 | );
118 |
119 | usize_getter!(
120 | Java_org_apache_arrow_datafusion_SessionConfig_getExecutionOptionsTargetPartitions,
121 | execution.target_partitions
122 | );
123 | usize_setter!(
124 | Java_org_apache_arrow_datafusion_SessionConfig_setExecutionOptionsTargetPartitions,
125 | execution.target_partitions
126 | );
127 |
128 | // ParquetOptions
129 |
130 | bool_getter!(
131 | Java_org_apache_arrow_datafusion_SessionConfig_getParquetOptionsEnablePageIndex,
132 | execution.parquet.enable_page_index
133 | );
134 | bool_setter!(
135 | Java_org_apache_arrow_datafusion_SessionConfig_setParquetOptionsEnablePageIndex,
136 | execution.parquet.enable_page_index
137 | );
138 |
139 | bool_getter!(
140 | Java_org_apache_arrow_datafusion_SessionConfig_getParquetOptionsPruning,
141 | execution.parquet.pruning
142 | );
143 | bool_setter!(
144 | Java_org_apache_arrow_datafusion_SessionConfig_setParquetOptionsPruning,
145 | execution.parquet.pruning
146 | );
147 |
148 | bool_getter!(
149 | Java_org_apache_arrow_datafusion_SessionConfig_getParquetOptionsSkipMetadata,
150 | execution.parquet.skip_metadata
151 | );
152 | bool_setter!(
153 | Java_org_apache_arrow_datafusion_SessionConfig_setParquetOptionsSkipMetadata,
154 | execution.parquet.skip_metadata
155 | );
156 |
157 | bool_getter!(
158 | Java_org_apache_arrow_datafusion_SessionConfig_getParquetOptionsPushdownFilters,
159 | execution.parquet.pushdown_filters
160 | );
161 | bool_setter!(
162 | Java_org_apache_arrow_datafusion_SessionConfig_setParquetOptionsPushdownFilters,
163 | execution.parquet.pushdown_filters
164 | );
165 |
166 | bool_getter!(
167 | Java_org_apache_arrow_datafusion_SessionConfig_getParquetOptionsReorderFilters,
168 | execution.parquet.reorder_filters
169 | );
170 | bool_setter!(
171 | Java_org_apache_arrow_datafusion_SessionConfig_setParquetOptionsReorderFilters,
172 | execution.parquet.reorder_filters
173 | );
174 |
175 | #[no_mangle]
176 | pub extern "system" fn Java_org_apache_arrow_datafusion_SessionConfig_getParquetOptionsMetadataSizeHint(
177 | _env: JNIEnv,
178 | _class: JClass,
179 | pointer: jlong,
180 | ) -> jlong {
181 | let config = unsafe { &*(pointer as *const SessionConfig) };
182 | let size_hint = config.options().execution.parquet.metadata_size_hint;
183 | match size_hint {
184 | Some(size_hint) => size_hint as jlong,
185 | None => -1 as jlong,
186 | }
187 | }
188 |
189 | #[no_mangle]
190 | pub extern "system" fn Java_org_apache_arrow_datafusion_SessionConfig_setParquetOptionsMetadataSizeHint(
191 | _env: JNIEnv,
192 | _class: JClass,
193 | pointer: jlong,
194 | value: jlong,
195 | ) {
196 | let config = unsafe { &mut *(pointer as *mut SessionConfig) };
197 | if value >= 0 {
198 | config.options_mut().execution.parquet.metadata_size_hint = Some(value as usize);
199 | } else {
200 | config.options_mut().execution.parquet.metadata_size_hint = None;
201 | }
202 | }
203 |
204 | // SqlParserOptions
205 |
206 | bool_getter!(
207 | Java_org_apache_arrow_datafusion_SessionConfig_getSqlParserOptionsParseFloatAsDecimal,
208 | sql_parser.parse_float_as_decimal
209 | );
210 | bool_setter!(
211 | Java_org_apache_arrow_datafusion_SessionConfig_setSqlParserOptionsParseFloatAsDecimal,
212 | sql_parser.parse_float_as_decimal
213 | );
214 |
215 | bool_getter!(
216 | Java_org_apache_arrow_datafusion_SessionConfig_getSqlParserOptionsEnableIdentNormalization,
217 | sql_parser.enable_ident_normalization
218 | );
219 | bool_setter!(
220 | Java_org_apache_arrow_datafusion_SessionConfig_setSqlParserOptionsEnableIdentNormalization,
221 | sql_parser.enable_ident_normalization
222 | );
223 |
224 | #[no_mangle]
225 | pub extern "system" fn Java_org_apache_arrow_datafusion_SessionConfig_getSqlParserOptionsDialect<
226 | 'local,
227 | >(
228 | env: JNIEnv<'local>,
229 | _class: JClass<'local>,
230 | pointer: jlong,
231 | ) -> JString<'local> {
232 | let config = unsafe { &*(pointer as *const SessionConfig) };
233 | let dialect = &config.options().sql_parser.dialect;
234 | env.new_string(dialect)
235 | .expect("Couldn't create Java string")
236 | }
237 |
238 | #[no_mangle]
239 | pub extern "system" fn Java_org_apache_arrow_datafusion_SessionConfig_setSqlParserOptionsDialect(
240 | mut env: JNIEnv,
241 | _class: JClass,
242 | pointer: jlong,
243 | dialect: JString,
244 | ) {
245 | let config = unsafe { &mut *(pointer as *mut SessionConfig) };
246 | let dialect: String = env
247 | .get_string(&dialect)
248 | .expect("Couldn't get dialect string")
249 | .into();
250 | config.options_mut().sql_parser.dialect = dialect;
251 | }
252 |
--------------------------------------------------------------------------------
/datafusion-jni/src/stream.rs:
--------------------------------------------------------------------------------
1 | use arrow::array::Array;
2 | use arrow::array::StructArray;
3 | use arrow::ffi::{FFI_ArrowArray, FFI_ArrowSchema};
4 | use datafusion::physical_plan::SendableRecordBatchStream;
5 | use futures::stream::TryStreamExt;
6 | use jni::objects::{JClass, JObject};
7 | use jni::sys::jlong;
8 | use jni::JNIEnv;
9 | use std::convert::Into;
10 | use std::ptr::addr_of_mut;
11 | use tokio::runtime::Runtime;
12 |
13 | use crate::util::{set_object_result_error, set_object_result_ok};
14 |
15 | #[no_mangle]
16 | pub extern "system" fn Java_org_apache_arrow_datafusion_DefaultRecordBatchStream_next(
17 | mut env: JNIEnv,
18 | _class: JClass,
19 | runtime: jlong,
20 | stream: jlong,
21 | callback: JObject,
22 | ) {
23 | let runtime = unsafe { &mut *(runtime as *mut Runtime) };
24 | let stream = unsafe { &mut *(stream as *mut SendableRecordBatchStream) };
25 | runtime.block_on(async {
26 | let next = stream.try_next().await;
27 | match next {
28 | Ok(Some(batch)) => {
29 | // Convert to struct array for compatibility with FFI
30 | let struct_array: StructArray = batch.into();
31 | let array_data = struct_array.into_data();
32 | let mut ffi_array = FFI_ArrowArray::new(&array_data);
33 | // ffi_array must remain alive until after the callback is called
34 | set_object_result_ok(&mut env, callback, addr_of_mut!(ffi_array));
35 | }
36 | Ok(None) => {
37 | set_object_result_ok(&mut env, callback, 0 as *mut FFI_ArrowSchema);
38 | }
39 | Err(err) => {
40 | set_object_result_error(&mut env, callback, &err);
41 | }
42 | }
43 | });
44 | }
45 |
46 | #[no_mangle]
47 | pub extern "system" fn Java_org_apache_arrow_datafusion_DefaultRecordBatchStream_getSchema(
48 | mut env: JNIEnv,
49 | _class: JClass,
50 | stream: jlong,
51 | callback: JObject,
52 | ) {
53 | let stream = unsafe { &mut *(stream as *mut SendableRecordBatchStream) };
54 | let schema = stream.schema();
55 | let ffi_schema = FFI_ArrowSchema::try_from(&*schema);
56 | match ffi_schema {
57 | Ok(mut ffi_schema) => {
58 | // ffi_schema must remain alive until after the callback is called
59 | set_object_result_ok(&mut env, callback, addr_of_mut!(ffi_schema));
60 | }
61 | Err(err) => {
62 | set_object_result_error(&mut env, callback, &err);
63 | }
64 | }
65 | }
66 |
67 | #[no_mangle]
68 | pub extern "system" fn Java_org_apache_arrow_datafusion_DefaultRecordBatchStream_destroy(
69 | _env: JNIEnv,
70 | _class: JClass,
71 | pointer: jlong,
72 | ) {
73 | let _ = unsafe { Box::from_raw(pointer as *mut SendableRecordBatchStream) };
74 | }
75 |
--------------------------------------------------------------------------------
/datafusion-jni/src/table_provider.rs:
--------------------------------------------------------------------------------
1 | use datafusion::datasource::TableProvider;
2 | use jni::objects::JClass;
3 | use jni::sys::jlong;
4 | use jni::JNIEnv;
5 | use std::sync::Arc;
6 |
7 | #[no_mangle]
8 | pub extern "system" fn Java_org_apache_arrow_datafusion_TableProviders_destroyTableProvider(
9 | _env: JNIEnv,
10 | _class: JClass,
11 | pointer: jlong,
12 | ) {
13 | let _ = unsafe { Box::from_raw(pointer as *mut Arc) };
14 | }
15 |
--------------------------------------------------------------------------------
/datafusion-jni/src/util.rs:
--------------------------------------------------------------------------------
1 | use std::error::Error;
2 |
3 | use jni::objects::JObject;
4 | use jni::sys::jlong;
5 | use jni::JNIEnv;
6 |
7 | /// Set error message from a result using a Consumer Java callback
8 | pub fn set_error_message(env: &mut JNIEnv, callback: JObject, result: Result<(), Err>) {
9 | match result {
10 | Ok(_) => {
11 | let err_message = JObject::null();
12 | env.call_method(
13 | callback,
14 | "accept",
15 | "(Ljava/lang/Object;)V",
16 | &[(&err_message).into()],
17 | )
18 | .expect("Failed to call error handler with null message");
19 | }
20 | Err(err) => {
21 | let err_message = env
22 | .new_string(err.to_string())
23 | .expect("Couldn't create java string for error message");
24 | env.call_method(
25 | callback,
26 | "accept",
27 | "(Ljava/lang/Object;)V",
28 | &[(&err_message).into()],
29 | )
30 | .expect("Failed to call error handler with error message");
31 | }
32 | };
33 | }
34 |
35 | /// Call an ObjectResultCallback to return either a pointer to a newly created object or an error message
36 | pub fn set_object_result(
37 | env: &mut JNIEnv,
38 | callback: JObject,
39 | address: Result<*mut T, Err>,
40 | ) {
41 | match address {
42 | Ok(address) => set_object_result_ok(env, callback, address),
43 | Err(err) => set_object_result_error(env, callback, &err),
44 | };
45 | }
46 |
47 | /// Set success result by calling an ObjectResultCallback
48 | pub fn set_object_result_ok(env: &mut JNIEnv, callback: JObject, address: *mut T) {
49 | let err_message = JObject::null();
50 | env.call_method(
51 | callback,
52 | "callback",
53 | "(Ljava/lang/String;J)V",
54 | &[(&err_message).into(), (address as jlong).into()],
55 | )
56 | .expect("Failed to call object result callback with address");
57 | }
58 |
59 | /// Set error result by calling an ObjectResultCallback
60 | pub fn set_object_result_error(env: &mut JNIEnv, callback: JObject, error: &T) {
61 | let err_message = env
62 | .new_string(error.to_string())
63 | .expect("Couldn't create java string for error message");
64 | let address = -1 as jlong;
65 | env.call_method(
66 | callback,
67 | "callback",
68 | "(Ljava/lang/String;J)V",
69 | &[(&err_message).into(), address.into()],
70 | )
71 | .expect("Failed to call object result callback with error");
72 | }
73 |
--------------------------------------------------------------------------------
/gradle.properties:
--------------------------------------------------------------------------------
1 | org.gradle.jvmargs=--add-exports jdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED \
2 | --add-exports jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED \
3 | --add-exports jdk.compiler/com.sun.tools.javac.parser=ALL-UNNAMED \
4 | --add-exports jdk.compiler/com.sun.tools.javac.tree=ALL-UNNAMED \
5 | --add-exports jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED
6 |
--------------------------------------------------------------------------------
/gradle/wrapper/gradle-wrapper.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datafusion-contrib/datafusion-java/c5dee4178ff9187de4cc7f5eb2e9ddd471223444/gradle/wrapper/gradle-wrapper.jar
--------------------------------------------------------------------------------
/gradle/wrapper/gradle-wrapper.properties:
--------------------------------------------------------------------------------
1 | distributionBase=GRADLE_USER_HOME
2 | distributionPath=wrapper/dists
3 | distributionUrl=https\://services.gradle.org/distributions/gradle-8.7-bin.zip
4 | networkTimeout=10000
5 | validateDistributionUrl=true
6 | zipStoreBase=GRADLE_USER_HOME
7 | zipStorePath=wrapper/dists
8 |
--------------------------------------------------------------------------------
/gradlew:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | #
4 | # Copyright © 2015-2021 the original authors.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # https://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | #
18 |
19 | ##############################################################################
20 | #
21 | # Gradle start up script for POSIX generated by Gradle.
22 | #
23 | # Important for running:
24 | #
25 | # (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is
26 | # noncompliant, but you have some other compliant shell such as ksh or
27 | # bash, then to run this script, type that shell name before the whole
28 | # command line, like:
29 | #
30 | # ksh Gradle
31 | #
32 | # Busybox and similar reduced shells will NOT work, because this script
33 | # requires all of these POSIX shell features:
34 | # * functions;
35 | # * expansions «$var», «${var}», «${var:-default}», «${var+SET}»,
36 | # «${var#prefix}», «${var%suffix}», and «$( cmd )»;
37 | # * compound commands having a testable exit status, especially «case»;
38 | # * various built-in commands including «command», «set», and «ulimit».
39 | #
40 | # Important for patching:
41 | #
42 | # (2) This script targets any POSIX shell, so it avoids extensions provided
43 | # by Bash, Ksh, etc; in particular arrays are avoided.
44 | #
45 | # The "traditional" practice of packing multiple parameters into a
46 | # space-separated string is a well documented source of bugs and security
47 | # problems, so this is (mostly) avoided, by progressively accumulating
48 | # options in "$@", and eventually passing that to Java.
49 | #
50 | # Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS,
51 | # and GRADLE_OPTS) rely on word-splitting, this is performed explicitly;
52 | # see the in-line comments for details.
53 | #
54 | # There are tweaks for specific operating systems such as AIX, CygWin,
55 | # Darwin, MinGW, and NonStop.
56 | #
57 | # (3) This script is generated from the Groovy template
58 | # https://github.com/gradle/gradle/blob/HEAD/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
59 | # within the Gradle project.
60 | #
61 | # You can find Gradle at https://github.com/gradle/gradle/.
62 | #
63 | ##############################################################################
64 |
65 | # Attempt to set APP_HOME
66 |
67 | # Resolve links: $0 may be a link
68 | app_path=$0
69 |
70 | # Need this for daisy-chained symlinks.
71 | while
72 | APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path
73 | [ -h "$app_path" ]
74 | do
75 | ls=$( ls -ld "$app_path" )
76 | link=${ls#*' -> '}
77 | case $link in #(
78 | /*) app_path=$link ;; #(
79 | *) app_path=$APP_HOME$link ;;
80 | esac
81 | done
82 |
83 | # This is normally unused
84 | # shellcheck disable=SC2034
85 | APP_BASE_NAME=${0##*/}
86 | # Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036)
87 | APP_HOME=$( cd "${APP_HOME:-./}" > /dev/null && pwd -P ) || exit
88 |
89 | # Use the maximum available, or set MAX_FD != -1 to use that value.
90 | MAX_FD=maximum
91 |
92 | warn () {
93 | echo "$*"
94 | } >&2
95 |
96 | die () {
97 | echo
98 | echo "$*"
99 | echo
100 | exit 1
101 | } >&2
102 |
103 | # OS specific support (must be 'true' or 'false').
104 | cygwin=false
105 | msys=false
106 | darwin=false
107 | nonstop=false
108 | case "$( uname )" in #(
109 | CYGWIN* ) cygwin=true ;; #(
110 | Darwin* ) darwin=true ;; #(
111 | MSYS* | MINGW* ) msys=true ;; #(
112 | NONSTOP* ) nonstop=true ;;
113 | esac
114 |
115 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
116 |
117 |
118 | # Determine the Java command to use to start the JVM.
119 | if [ -n "$JAVA_HOME" ] ; then
120 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
121 | # IBM's JDK on AIX uses strange locations for the executables
122 | JAVACMD=$JAVA_HOME/jre/sh/java
123 | else
124 | JAVACMD=$JAVA_HOME/bin/java
125 | fi
126 | if [ ! -x "$JAVACMD" ] ; then
127 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
128 |
129 | Please set the JAVA_HOME variable in your environment to match the
130 | location of your Java installation."
131 | fi
132 | else
133 | JAVACMD=java
134 | if ! command -v java >/dev/null 2>&1
135 | then
136 | die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
137 |
138 | Please set the JAVA_HOME variable in your environment to match the
139 | location of your Java installation."
140 | fi
141 | fi
142 |
143 | # Increase the maximum file descriptors if we can.
144 | if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
145 | case $MAX_FD in #(
146 | max*)
147 | # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked.
148 | # shellcheck disable=SC2039,SC3045
149 | MAX_FD=$( ulimit -H -n ) ||
150 | warn "Could not query maximum file descriptor limit"
151 | esac
152 | case $MAX_FD in #(
153 | '' | soft) :;; #(
154 | *)
155 | # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked.
156 | # shellcheck disable=SC2039,SC3045
157 | ulimit -n "$MAX_FD" ||
158 | warn "Could not set maximum file descriptor limit to $MAX_FD"
159 | esac
160 | fi
161 |
162 | # Collect all arguments for the java command, stacking in reverse order:
163 | # * args from the command line
164 | # * the main class name
165 | # * -classpath
166 | # * -D...appname settings
167 | # * --module-path (only if needed)
168 | # * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables.
169 |
170 | # For Cygwin or MSYS, switch paths to Windows format before running java
171 | if "$cygwin" || "$msys" ; then
172 | APP_HOME=$( cygpath --path --mixed "$APP_HOME" )
173 | CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" )
174 |
175 | JAVACMD=$( cygpath --unix "$JAVACMD" )
176 |
177 | # Now convert the arguments - kludge to limit ourselves to /bin/sh
178 | for arg do
179 | if
180 | case $arg in #(
181 | -*) false ;; # don't mess with options #(
182 | /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath
183 | [ -e "$t" ] ;; #(
184 | *) false ;;
185 | esac
186 | then
187 | arg=$( cygpath --path --ignore --mixed "$arg" )
188 | fi
189 | # Roll the args list around exactly as many times as the number of
190 | # args, so each arg winds up back in the position where it started, but
191 | # possibly modified.
192 | #
193 | # NB: a `for` loop captures its iteration list before it begins, so
194 | # changing the positional parameters here affects neither the number of
195 | # iterations, nor the values presented in `arg`.
196 | shift # remove old arg
197 | set -- "$@" "$arg" # push replacement arg
198 | done
199 | fi
200 |
201 |
202 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
203 | DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
204 |
205 | # Collect all arguments for the java command:
206 | # * DEFAULT_JVM_OPTS, JAVA_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments,
207 | # and any embedded shellness will be escaped.
208 | # * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be
209 | # treated as '${Hostname}' itself on the command line.
210 |
211 | set -- \
212 | "-Dorg.gradle.appname=$APP_BASE_NAME" \
213 | -classpath "$CLASSPATH" \
214 | org.gradle.wrapper.GradleWrapperMain \
215 | "$@"
216 |
217 | # Stop when "xargs" is not available.
218 | if ! command -v xargs >/dev/null 2>&1
219 | then
220 | die "xargs is not available"
221 | fi
222 |
223 | # Use "xargs" to parse quoted args.
224 | #
225 | # With -n1 it outputs one arg per line, with the quotes and backslashes removed.
226 | #
227 | # In Bash we could simply go:
228 | #
229 | # readarray ARGS < <( xargs -n1 <<<"$var" ) &&
230 | # set -- "${ARGS[@]}" "$@"
231 | #
232 | # but POSIX shell has neither arrays nor command substitution, so instead we
233 | # post-process each arg (as a line of input to sed) to backslash-escape any
234 | # character that might be a shell metacharacter, then use eval to reverse
235 | # that process (while maintaining the separation between arguments), and wrap
236 | # the whole thing up as a single "set" statement.
237 | #
238 | # This will of course break if any of these variables contains a newline or
239 | # an unmatched quote.
240 | #
241 |
242 | eval "set -- $(
243 | printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" |
244 | xargs -n1 |
245 | sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' |
246 | tr '\n' ' '
247 | )" '"$@"'
248 |
249 | exec "$JAVACMD" "$@"
250 |
--------------------------------------------------------------------------------
/gradlew.bat:
--------------------------------------------------------------------------------
1 | @rem
2 | @rem Copyright 2015 the original author or authors.
3 | @rem
4 | @rem Licensed under the Apache License, Version 2.0 (the "License");
5 | @rem you may not use this file except in compliance with the License.
6 | @rem You may obtain a copy of the License at
7 | @rem
8 | @rem https://www.apache.org/licenses/LICENSE-2.0
9 | @rem
10 | @rem Unless required by applicable law or agreed to in writing, software
11 | @rem distributed under the License is distributed on an "AS IS" BASIS,
12 | @rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | @rem See the License for the specific language governing permissions and
14 | @rem limitations under the License.
15 | @rem
16 |
17 | @if "%DEBUG%"=="" @echo off
18 | @rem ##########################################################################
19 | @rem
20 | @rem Gradle startup script for Windows
21 | @rem
22 | @rem ##########################################################################
23 |
24 | @rem Set local scope for the variables with windows NT shell
25 | if "%OS%"=="Windows_NT" setlocal
26 |
27 | set DIRNAME=%~dp0
28 | if "%DIRNAME%"=="" set DIRNAME=.
29 | @rem This is normally unused
30 | set APP_BASE_NAME=%~n0
31 | set APP_HOME=%DIRNAME%
32 |
33 | @rem Resolve any "." and ".." in APP_HOME to make it shorter.
34 | for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
35 |
36 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
37 | set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
38 |
39 | @rem Find java.exe
40 | if defined JAVA_HOME goto findJavaFromJavaHome
41 |
42 | set JAVA_EXE=java.exe
43 | %JAVA_EXE% -version >NUL 2>&1
44 | if %ERRORLEVEL% equ 0 goto execute
45 |
46 | echo. 1>&2
47 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 1>&2
48 | echo. 1>&2
49 | echo Please set the JAVA_HOME variable in your environment to match the 1>&2
50 | echo location of your Java installation. 1>&2
51 |
52 | goto fail
53 |
54 | :findJavaFromJavaHome
55 | set JAVA_HOME=%JAVA_HOME:"=%
56 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe
57 |
58 | if exist "%JAVA_EXE%" goto execute
59 |
60 | echo. 1>&2
61 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 1>&2
62 | echo. 1>&2
63 | echo Please set the JAVA_HOME variable in your environment to match the 1>&2
64 | echo location of your Java installation. 1>&2
65 |
66 | goto fail
67 |
68 | :execute
69 | @rem Setup the command line
70 |
71 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
72 |
73 |
74 | @rem Execute Gradle
75 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
76 |
77 | :end
78 | @rem End local scope for the variables with windows NT shell
79 | if %ERRORLEVEL% equ 0 goto mainEnd
80 |
81 | :fail
82 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
83 | rem the _cmd.exe /c_ return code!
84 | set EXIT_CODE=%ERRORLEVEL%
85 | if %EXIT_CODE% equ 0 set EXIT_CODE=1
86 | if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE%
87 | exit /b %EXIT_CODE%
88 |
89 | :mainEnd
90 | if "%OS%"=="Windows_NT" endlocal
91 |
92 | :omega
93 |
--------------------------------------------------------------------------------
/settings.gradle:
--------------------------------------------------------------------------------
1 | // https://docs.gradle.org/current/userguide/toolchains.html#sub:download_repositories
2 | plugins {
3 | id 'org.gradle.toolchains.foojay-resolver-convention' version '0.8.0'
4 | }
5 |
6 | rootProject.name = 'datafusion-java'
7 |
8 | include 'datafusion-java', 'datafusion-examples'
9 |
--------------------------------------------------------------------------------