├── .dockerignore ├── .github ├── dependabot.yml └── workflows │ ├── build.yml │ └── release.yml ├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── build.gradle ├── buildSrc ├── build.gradle └── src │ └── main │ └── groovy │ └── datafusion.java-conventions.gradle ├── datafusion-examples ├── build.gradle └── src │ └── main │ ├── java │ └── org │ │ └── apache │ │ └── arrow │ │ └── datafusion │ │ └── examples │ │ └── ExampleMain.java │ └── resources │ ├── aggregate_test_100.csv │ ├── aggregate_test_100.parquet │ ├── log4j2.xml │ └── test_table.csv ├── datafusion-java ├── build.gradle ├── src │ ├── main │ │ └── java │ │ │ └── org │ │ │ └── apache │ │ │ └── arrow │ │ │ └── datafusion │ │ │ ├── AbstractProxy.java │ │ │ ├── ArrowFormat.java │ │ │ ├── CsvFormat.java │ │ │ ├── DataFrame.java │ │ │ ├── DataFrames.java │ │ │ ├── DefaultDataFrame.java │ │ │ ├── DefaultRecordBatchStream.java │ │ │ ├── DefaultSessionContext.java │ │ │ ├── DefaultTableProvider.java │ │ │ ├── ErrorUtil.java │ │ │ ├── ExecutionOptions.java │ │ │ ├── FileFormat.java │ │ │ ├── FileFormats.java │ │ │ ├── JNILoader.java │ │ │ ├── ListingOptions.java │ │ │ ├── ListingTable.java │ │ │ ├── ListingTableConfig.java │ │ │ ├── NativeProxy.java │ │ │ ├── ObjectResultCallback.java │ │ │ ├── ParquetFormat.java │ │ │ ├── ParquetOptions.java │ │ │ ├── RecordBatchStream.java │ │ │ ├── Runtime.java │ │ │ ├── SessionConfig.java │ │ │ ├── SessionContext.java │ │ │ ├── SessionContexts.java │ │ │ ├── SqlParserOptions.java │ │ │ ├── TableProvider.java │ │ │ ├── TableProviders.java │ │ │ ├── TokioRuntime.java │ │ │ └── package-info.java │ └── test │ │ ├── java │ │ └── org │ │ │ └── apache │ │ │ └── arrow │ │ │ └── datafusion │ │ │ ├── ParquetWriter.java │ │ │ ├── TestExecuteStream.java │ │ │ ├── TestListingTable.java │ │ │ ├── TestQuery.java │ │ │ └── TestSessionConfig.java │ │ └── resources │ │ └── dictionary_data.parquet └── write_test_files.py ├── datafusion-jni ├── .gitignore ├── Cargo.toml └── src │ ├── context.rs │ ├── dataframe.rs │ ├── file_formats.rs │ ├── lib.rs │ ├── listing_options.rs │ ├── listing_table.rs │ ├── listing_table_config.rs │ ├── runtime.rs │ ├── session_config.rs │ ├── stream.rs │ ├── table_provider.rs │ └── util.rs ├── gradle.properties ├── gradle └── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties ├── gradlew ├── gradlew.bat └── settings.gradle /.dockerignore: -------------------------------------------------------------------------------- 1 | Dockerfile 2 | README.md 3 | **/build/ 4 | **/target/ 5 | **/out/ 6 | **/bin/ 7 | **/.idea/ 8 | **/.gradle/ 9 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: "gradle" # See documentation for possible values 9 | directory: "/" # Location of package manifests 10 | schedule: 11 | interval: "weekly" 12 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Build 2 | on: 3 | push: 4 | branches: ["*"] 5 | pull_request: 6 | branches: ["*"] 7 | jobs: 8 | rust: 9 | strategy: 10 | matrix: 11 | os: ["ubuntu-latest", "macos-latest", "windows-latest"] 12 | runs-on: ${{ matrix.os }} 13 | steps: 14 | - uses: actions/checkout@v3 15 | 16 | - name: Stable with rustfmt and clippy 17 | uses: dtolnay/rust-toolchain@stable 18 | with: 19 | toolchain: stable 20 | components: rustfmt, clippy 21 | 22 | - name: Set up JDK 17 23 | uses: actions/setup-java@v3 24 | with: 25 | java-version: "17" 26 | distribution: "temurin" 27 | 28 | - name: Validate Gradle wrapper 29 | uses: gradle/wrapper-validation-action@v1 30 | 31 | - name: Cargo build 32 | run: ./gradlew cargoReleaseBuild 33 | 34 | - name: Upload built artifacts 35 | uses: actions/upload-artifact@v3 36 | if: matrix.os == 'macos-latest' 37 | with: 38 | name: datafusion-jni-${{ matrix.os }} 39 | if-no-files-found: error 40 | path: "datafusion-jni/target/release/libdatafusion_jni.dylib" 41 | retention-days: 3 42 | 43 | - name: Upload built artifacts 44 | uses: actions/upload-artifact@v3 45 | if: matrix.os == 'ubuntu-latest' 46 | with: 47 | name: datafusion-jni-${{ matrix.os }} 48 | if-no-files-found: error 49 | path: "datafusion-jni/target/release/libdatafusion_jni.so" 50 | retention-days: 3 51 | 52 | - name: Upload built artifacts 53 | uses: actions/upload-artifact@v3 54 | if: matrix.os == 'windows-latest' 55 | with: 56 | name: datafusion-jni-${{ matrix.os }} 57 | if-no-files-found: error 58 | # note no "lib" 59 | path: "datafusion-jni\\target\\release\\datafusion_jni.dll" 60 | retention-days: 3 61 | 62 | java: 63 | needs: 64 | - rust 65 | runs-on: ubuntu-latest 66 | steps: 67 | - uses: actions/checkout@v3 68 | 69 | - name: Set up JDK 17 70 | uses: actions/setup-java@v3 71 | with: 72 | java-version: 17 73 | distribution: "temurin" 74 | 75 | - name: Validate Gradle wrapper 76 | uses: gradle/wrapper-validation-action@v1 77 | 78 | - name: Download ubuntu artifacts 79 | uses: actions/download-artifact@v3 80 | with: 81 | name: datafusion-jni-ubuntu-latest 82 | path: datafusion-java/build/jni_libs/linux-x86_64 83 | 84 | - name: Download windows artifacts 85 | uses: actions/download-artifact@v3 86 | with: 87 | name: datafusion-jni-windows-latest 88 | path: datafusion-java/build/jni_libs/windows-x86_64 89 | 90 | - name: Download macos artifacts 91 | uses: actions/download-artifact@v3 92 | with: 93 | name: datafusion-jni-macos-latest 94 | path: datafusion-java/build/jni_libs/osx-x86_64 95 | 96 | - name: List downloaded artifacts 97 | run: tree datafusion-java/build/jni_libs 98 | 99 | - name: Build and test 100 | run: ./gradlew -PJNI_PATH=${{ github.workspace }}/datafusion-java/build/jni_libs/linux-x86_64 build 101 | 102 | - name: Publish to Maven Local 103 | run: ./gradlew publishToMavenLocal 104 | 105 | - name: Upload built artifacts 106 | uses: actions/upload-artifact@v3 107 | with: 108 | name: datafusion-java 109 | if-no-files-found: error 110 | path: ~/.m2/repository/io/github/datafusion-contrib/datafusion-java 111 | retention-days: 3 112 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | on: 3 | release: 4 | types: [created] 5 | push: 6 | branches: [main] 7 | jobs: 8 | rust: 9 | strategy: 10 | matrix: 11 | os: ["ubuntu-latest", "macos-latest", "windows-latest"] 12 | runs-on: ${{ matrix.os }} 13 | steps: 14 | - uses: actions/checkout@v3 15 | 16 | - name: Stable with rustfmt and clippy 17 | uses: dtolnay/rust-toolchain@stable 18 | with: 19 | toolchain: stable 20 | components: rustfmt, clippy 21 | 22 | - name: Set up JDK 17 23 | uses: actions/setup-java@v3 24 | with: 25 | java-version: "17" 26 | distribution: "temurin" 27 | 28 | - name: Validate Gradle wrapper 29 | uses: gradle/wrapper-validation-action@v1 30 | 31 | - name: Cargo build 32 | run: ./gradlew cargoReleaseBuild 33 | 34 | - name: Upload built artifacts 35 | uses: actions/upload-artifact@v3 36 | if: matrix.os == 'macos-latest' 37 | with: 38 | name: datafusion-jni-${{ matrix.os }} 39 | if-no-files-found: error 40 | path: "datafusion-jni/target/release/libdatafusion_jni.dylib" 41 | retention-days: 3 42 | 43 | - name: Upload built artifacts 44 | uses: actions/upload-artifact@v3 45 | if: matrix.os == 'ubuntu-latest' 46 | with: 47 | name: datafusion-jni-${{ matrix.os }} 48 | if-no-files-found: error 49 | path: "datafusion-jni/target/release/libdatafusion_jni.so" 50 | retention-days: 3 51 | 52 | - name: Upload built artifacts 53 | uses: actions/upload-artifact@v3 54 | if: matrix.os == 'windows-latest' 55 | with: 56 | name: datafusion-jni-${{ matrix.os }} 57 | if-no-files-found: error 58 | # note no "lib" 59 | path: "datafusion-jni\\target\\release\\datafusion_jni.dll" 60 | retention-days: 3 61 | 62 | java: 63 | needs: 64 | - rust 65 | runs-on: ubuntu-latest 66 | steps: 67 | - uses: actions/checkout@v3 68 | 69 | - name: Set up JDK 17 70 | uses: actions/setup-java@v3 71 | with: 72 | java-version: 17 73 | distribution: "temurin" 74 | 75 | - name: Validate Gradle wrapper 76 | uses: gradle/wrapper-validation-action@v1 77 | 78 | - name: Download ubuntu artifacts 79 | uses: actions/download-artifact@v3 80 | with: 81 | name: datafusion-jni-ubuntu-latest 82 | path: datafusion-java/build/jni_libs/linux-x86_64 83 | 84 | - name: Download windows artifacts 85 | uses: actions/download-artifact@v3 86 | with: 87 | name: datafusion-jni-windows-latest 88 | path: datafusion-java/build/jni_libs/windows-x86_64 89 | 90 | - name: Download macos artifacts 91 | uses: actions/download-artifact@v3 92 | with: 93 | name: datafusion-jni-macos-latest 94 | path: datafusion-java/build/jni_libs/osx-x86_64 95 | 96 | - name: List downloaded artifacts 97 | run: tree datafusion-java/build/jni_libs 98 | 99 | - name: Publish to Sonatype 100 | run: ./gradlew publish 101 | env: 102 | MAVEN_USERNAME: ${{ secrets.OSSRH_USERNAME }} 103 | MAVEN_PASSWORD: ${{ secrets.OSSRH_TOKEN }} 104 | ORG_GRADLE_PROJECT_signingKeyId: ${{ secrets.SIGNING_KEY_ID }} 105 | ORG_GRADLE_PROJECT_signingKey: ${{ secrets.SIGNING_KEY }} 106 | ORG_GRADLE_PROJECT_signingPassword: ${{ secrets.SIGNING_PASSWORD }} 107 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.toptal.com/developers/gitignore/api/java,gradle 3 | # Edit at https://www.toptal.com/developers/gitignore?templates=java,gradle 4 | 5 | ### Java ### 6 | # Compiled class file 7 | *.class 8 | 9 | # Log file 10 | *.log 11 | 12 | # BlueJ files 13 | *.ctxt 14 | 15 | # Mobile Tools for Java (J2ME) 16 | .mtj.tmp/ 17 | 18 | # Package Files # 19 | *.jar 20 | *.war 21 | *.nar 22 | *.ear 23 | *.zip 24 | *.tar.gz 25 | *.rar 26 | 27 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 28 | hs_err_pid* 29 | 30 | ### Gradle ### 31 | .gradle 32 | build/ 33 | 34 | # Ignore Gradle GUI config 35 | gradle-app.setting 36 | 37 | # Avoid ignoring Gradle wrapper jar file (.jar files are usually ignored) 38 | !gradle-wrapper.jar 39 | 40 | # Cache of project 41 | .gradletasknamecache 42 | 43 | # # Work around https://youtrack.jetbrains.com/issue/IDEA-116898 44 | # gradle/wrapper/gradle-wrapper.properties 45 | 46 | ### Gradle Patch ### 47 | **/build/ 48 | 49 | # Eclipse Gradle plugin generated files 50 | # Eclipse Core 51 | .project 52 | # JDT-specific (Eclipse Java Development Tools) 53 | .classpath 54 | 55 | # End of https://www.toptal.com/developers/gitignore/api/java,gradle 56 | 57 | # IntelliJ 58 | *.ipr 59 | *.iws 60 | *.iml 61 | 62 | .settings/ 63 | 64 | .idea/ 65 | out/ 66 | bin/ 67 | .vscode/ 68 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM amazoncorretto:21 2 | 3 | RUN yum install -y gcc && \ 4 | yum clean all && \ 5 | rm -rf /var/cache/yum 6 | 7 | RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y 8 | 9 | ENV PATH="/root/.cargo/bin:$PATH" 10 | 11 | WORKDIR /usr/opt/datafusion-java 12 | 13 | COPY build.gradle settings.gradle gradlew ./ 14 | 15 | COPY gradle gradle 16 | 17 | RUN ./gradlew --version 18 | 19 | COPY . . 20 | 21 | RUN ./gradlew copyDevLibrary installDist 22 | 23 | # Set working directory so that the relative paths to resource files used in ExampleMain are correct 24 | WORKDIR /usr/opt/datafusion-java/datafusion-examples 25 | 26 | # Configure environment variables to allow loading datafusion-java in jshell 27 | ENV CLASSPATH="/usr/opt/datafusion-java/datafusion-examples/build/install/datafusion-examples/lib/*" 28 | ENV JDK_JAVA_OPTIONS="-Djava.library.path=/usr/opt/datafusion-java/datafusion-java/build/jni_libs/dev --add-opens=java.base/java.nio=ALL-UNNAMED" 29 | 30 | CMD ["./build/install/datafusion-examples/bin/datafusion-examples"] 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # datafusion-java 2 | 3 | [![Build](https://github.com/datafusion-contrib/datafusion-java/actions/workflows/build.yml/badge.svg)](https://github.com/datafusion-contrib/datafusion-java/actions/workflows/build.yml) 4 | [![Release](https://github.com/datafusion-contrib/datafusion-java/actions/workflows/release.yml/badge.svg)](https://github.com/datafusion-contrib/datafusion-java/actions/workflows/release.yml) 5 | [![Maven metadata URL](https://img.shields.io/maven-metadata/v?metadataUrl=https%3A%2F%2Frepo.maven.apache.org%2Fmaven2%2Fio%2Fgithub%2Fdatafusion-contrib%2Fdatafusion-java%2Fmaven-metadata.xml)](https://repo.maven.apache.org/maven2/io/github/datafusion-contrib/datafusion-java/) 6 | 7 | A Java binding to [Apache DataFusion][1] 8 | 9 | ## Status 10 | 11 | This project is still a work in progress, and it currently works with Arrow 14.0 and DataFusion version 25.0. 12 | It is built and verified in CI against Java 11 and 21. You may check out the [docker run instructions](#how-to-run-the-interactive-demo) 13 | where Java 21 `jshell` is used to run interactively. 14 | 15 | ## How to use in your code 16 | 17 | The artifacts are [published][2] to maven central, so you can use datafusion-java like any normal Java library: 18 | 19 | ```groovy 20 | dependencies { 21 | implementation( 22 | group = "io.github.datafusion-contrib", 23 | name = "datafusion-java", 24 | version = "0.16.0" // or latest version, checkout https://github.com/datafusion-contrib/datafusion-java/releases 25 | ) 26 | } 27 | ``` 28 | 29 | To test it out, you can use this piece of demo code: 30 | 31 |
32 | DataFusionDemo.java 33 | 34 | ```java 35 | package com.me; 36 | 37 | import org.apache.arrow.datafusion.DataFrame; 38 | import org.apache.arrow.datafusion.SessionContext; 39 | import org.apache.arrow.datafusion.SessionContexts; 40 | 41 | public class DataFusionDemo { 42 | 43 | public static void main(String[] args) throws Exception { 44 | try (SessionContext sessionContext = SessionContexts.create()) { 45 | sessionContext.sql("select sqrt(65536)").thenCompose(DataFrame::show).join(); 46 | } 47 | } 48 | } 49 | ``` 50 | 51 |
52 | 53 |
54 | build.gradle.kts 55 | 56 | ```kotlin 57 | plugins { 58 | java 59 | application 60 | } 61 | 62 | repositories { 63 | mavenCentral() 64 | google() 65 | } 66 | 67 | tasks { 68 | application { 69 | mainClass.set("com.me.DataFusionDemo") 70 | } 71 | } 72 | 73 | dependencies { 74 | implementation( 75 | group = "io.github.datafusion-contrib", 76 | name = "datafusion-java", 77 | version = "0.16.0" 78 | ) 79 | } 80 | 81 | ``` 82 | 83 |
84 | 85 |
86 | Run result 87 | 88 | ``` 89 | $ ./gradlew run 90 | ... 91 | > Task :compileKotlin UP-TO-DATE 92 | > Task :compileJava UP-TO-DATE 93 | > Task :processResources NO-SOURCE 94 | > Task :classes UP-TO-DATE 95 | 96 | > Task :run 97 | successfully created tokio runtime 98 | +--------------------+ 99 | | sqrt(Int64(65536)) | 100 | +--------------------+ 101 | | 256 | 102 | +--------------------+ 103 | successfully shutdown tokio runtime 104 | 105 | BUILD SUCCESSFUL in 2s 106 | 3 actionable tasks: 1 executed, 2 up-to-date 107 | 16:43:34: Execution finished 'run'. 108 | ``` 109 | 110 |
111 | 112 | ## How to run the interactive demo 113 | 114 | ### 1. Run using Docker (with `jshell`) 115 | 116 | First build the docker image: 117 | 118 | ``` 119 | docker build -t datafusion-example . 120 | ``` 121 | 122 | Then you can run the example program using Docker: 123 | 124 | ``` 125 | docker run --rm -it datafusion-example 126 | ``` 127 | 128 | Or start an interactive jshell session: 129 | 130 | ``` 131 | docker run --rm -it datafusion-example jshell 132 | ``` 133 | 134 |
135 | Example jshell session 136 | 137 | ```text 138 | Jan 11, 2024 1:49:28 AM java.util.prefs.FileSystemPreferences$1 run 139 | INFO: Created user preferences directory. 140 | | Welcome to JShell -- Version 21 141 | | For an introduction type: /help intro 142 | 143 | jshell> import org.apache.arrow.datafusion.* 144 | 145 | jshell> var context = SessionContexts.create() 146 | 01:41:05.586 [main] DEBUG org.apache.arrow.datafusion.JNILoader -- successfully loaded datafusion_jni from library path 147 | 01:41:05.589 [main] DEBUG org.apache.arrow.datafusion.JNILoader -- datafusion_jni already loaded, returning 148 | 01:41:05.590 [main] DEBUG org.apache.arrow.datafusion.AbstractProxy -- Obtaining DefaultSessionContext@7f58383b8db0 149 | 01:41:05.591 [main] DEBUG org.apache.arrow.datafusion.AbstractProxy -- Obtaining TokioRuntime@7f58383ce110 150 | context ==> org.apache.arrow.datafusion.DefaultSessionContext@2d209079 151 | 152 | jshell> var df = context.sql("select 1.1 + cos(2.0)").join() 153 | 01:41:10.961 [main] DEBUG org.apache.arrow.datafusion.AbstractProxy -- Obtaining DefaultDataFrame@7f5838209100 154 | df ==> org.apache.arrow.datafusion.DefaultDataFrame@34ce8af7 155 | 156 | jshell> import org.apache.arrow.memory.* 157 | 158 | jshell> var allocator = new RootAllocator() 159 | 01:41:22.521 [main] INFO org.apache.arrow.memory.BaseAllocator -- Debug mode disabled. Enable with the VM option -Darrow.memory.debug.allocator=true. 160 | 01:41:22.525 [main] INFO org.apache.arrow.memory.DefaultAllocationManagerOption -- allocation manager type not specified, using netty as the default type 161 | 01:41:22.525 [main] INFO org.apache.arrow.memory.CheckAllocator -- Using DefaultAllocationManager at memory-unsafe-14.0.2.jar!/org/apache/arrow/memory/DefaultAllocationManagerFactory.class 162 | 01:41:22.531 [main] DEBUG org.apache.arrow.memory.util.MemoryUtil -- Constructor for direct buffer found and made accessible 163 | 01:41:22.536 [main] DEBUG org.apache.arrow.memory.util.MemoryUtil -- direct buffer constructor: available 164 | 01:41:22.537 [main] DEBUG org.apache.arrow.memory.rounding.DefaultRoundingPolicy -- -Dorg.apache.memory.allocator.pageSize: 8192 165 | 01:41:22.537 [main] DEBUG org.apache.arrow.memory.rounding.DefaultRoundingPolicy -- -Dorg.apache.memory.allocator.maxOrder: 11 166 | allocator ==> Allocator(ROOT) 0/0/0/9223372036854775807 (res/actual/peak/limit) 167 | 168 | 169 | jshell> var r = df.collect(allocator).join() 170 | 01:41:29.635 [main] INFO org.apache.arrow.datafusion.DefaultDataFrame -- successfully completed with arr length=610 171 | r ==> org.apache.arrow.vector.ipc.ArrowFileReader@7ac7a4e4 172 | 173 | jshell> var root = r.getVectorSchemaRoot() 174 | 01:41:34.658 [main] DEBUG org.apache.arrow.vector.ipc.ReadChannel -- Reading buffer with size: 10 175 | 01:41:34.661 [main] DEBUG org.apache.arrow.vector.ipc.ArrowFileReader -- Footer starts at 416, length: 184 176 | 01:41:34.661 [main] DEBUG org.apache.arrow.vector.ipc.ReadChannel -- Reading buffer with size: 184 177 | root ==> org.apache.arrow.vector.VectorSchemaRoot@6cd28fa7 178 | 179 | jshell> r.loadNextBatch() 180 | 01:41:39.421 [main] DEBUG org.apache.arrow.vector.ipc.ArrowFileReader -- RecordBatch at 200, metadata: 192, body: 16 181 | 01:41:39.423 [main] DEBUG org.apache.arrow.vector.ipc.ReadChannel -- Reading buffer with size: 208 182 | 01:41:39.424 [main] DEBUG org.apache.arrow.vector.ipc.message.ArrowRecordBatch -- Buffer in RecordBatch at 0, length: 1 183 | 01:41:39.425 [main] DEBUG org.apache.arrow.vector.ipc.message.ArrowRecordBatch -- Buffer in RecordBatch at 8, length: 8 184 | $8 ==> true 185 | 186 | jshell> var v = root.getVector(0) 187 | v ==> [0.6838531634528577] 188 | ``` 189 | 190 |
191 | 192 | ### 2. Build from source 193 | 194 | Note you must have a local Rust and Java environment setup. 195 | 196 | Run the example in one line: 197 | 198 | ```bash 199 | ./gradlew run 200 | ``` 201 | 202 | Or roll your own test example: 203 | 204 | ```java 205 | import org.apache.arrow.datafusion.DataFrame; 206 | import org.apache.arrow.datafusion.SessionContext; 207 | import org.apache.arrow.datafusion.SessionContexts; 208 | import org.apache.arrow.memory.BufferAllocator; 209 | import org.apache.arrow.memory.RootAllocator; 210 | import org.apache.arrow.vector.Float8Vector; 211 | import org.apache.arrow.vector.VectorSchemaRoot; 212 | import org.apache.arrow.vector.ipc.ArrowReader; 213 | import org.slf4j.Logger; 214 | import org.slf4j.LoggerFactory; 215 | 216 | import java.io.IOException; 217 | 218 | public class ExampleMain { 219 | 220 | private static final Logger logger = LoggerFactory.getLogger(ExampleMain.class); 221 | 222 | public static void main(String[] args) throws Exception { 223 | try (SessionContext sessionContext = SessionContexts.create(); BufferAllocator allocator = new RootAllocator()) { 224 | DataFrame dataFrame = sessionContext.sql("select 1.5 + sqrt(2.0)").get(); 225 | dataFrame.collect(allocator).thenAccept(ExampleMain::onReaderResult).get(); 226 | } 227 | } 228 | 229 | private static void onReaderResult(ArrowReader reader) { 230 | try { 231 | VectorSchemaRoot root = reader.getVectorSchemaRoot(); 232 | while (reader.loadNextBatch()) { 233 | Float8Vector vector = (Float8Vector) root.getVector(0); 234 | for (int i = 0; i < root.getRowCount(); i += 1) { 235 | logger.info("value {}={}", i, vector.getValueAsDouble(i)); 236 | } 237 | } 238 | // close to release resource 239 | reader.close(); 240 | } catch (IOException e) { 241 | logger.warn("got IO Exception", e); 242 | } 243 | } 244 | } 245 | ``` 246 | 247 | To build the library: 248 | 249 | ```bash 250 | ./gradlew build 251 | ``` 252 | 253 | [1]: https://github.com/apache/datafusion 254 | [2]: https://repo.maven.apache.org/maven2/io/github/datafusion-contrib/datafusion-java/ 255 | -------------------------------------------------------------------------------- /build.gradle: -------------------------------------------------------------------------------- 1 | buildscript { 2 | repositories { 3 | mavenCentral() 4 | } 5 | } 6 | 7 | plugins { 8 | id 'java' 9 | id 'com.diffplug.spotless' version '6.25.0' apply false 10 | id 'com.google.osdetector' version "1.7.3" apply false 11 | } 12 | -------------------------------------------------------------------------------- /buildSrc/build.gradle: -------------------------------------------------------------------------------- 1 | plugins { 2 | id 'groovy-gradle-plugin' 3 | } 4 | 5 | repositories { 6 | gradlePluginPortal() // so that external plugins can be resolved in dependencies section 7 | mavenCentral() 8 | } 9 | -------------------------------------------------------------------------------- /buildSrc/src/main/groovy/datafusion.java-conventions.gradle: -------------------------------------------------------------------------------- 1 | plugins { 2 | id 'java' 3 | } 4 | 5 | group = 'io.github.datafusion-contrib' 6 | version = '0.17.0-SNAPSHOT' 7 | 8 | repositories { 9 | mavenCentral() 10 | } 11 | 12 | java { 13 | toolchain { 14 | languageVersion = JavaLanguageVersion.of(21) 15 | } 16 | } 17 | 18 | tasks.withType(JavaCompile).configureEach { 19 | // down-compile to minimal version 20 | options.release.set(8) 21 | } 22 | -------------------------------------------------------------------------------- /datafusion-examples/build.gradle: -------------------------------------------------------------------------------- 1 | plugins { 2 | id 'application' 3 | id 'datafusion.java-conventions' 4 | id 'com.diffplug.spotless' 5 | } 6 | 7 | spotless { 8 | java { 9 | googleJavaFormat() 10 | } 11 | } 12 | 13 | dependencies { 14 | implementation project(':datafusion-java') 15 | implementation 'org.slf4j:slf4j-api:2.0.16' 16 | implementation 'ch.qos.logback:logback-classic:1.5.12' 17 | implementation 'org.apache.arrow:arrow-format:18.0.0' 18 | implementation 'org.apache.arrow:arrow-vector:18.1.0' 19 | } 20 | 21 | application { 22 | mainClass = 'org.apache.arrow.datafusion.examples.ExampleMain' 23 | def libraryPath = findProperty("JNI_PATH") ?: "$rootDir/datafusion-java/build/jni_libs/dev" 24 | applicationDefaultJvmArgs += ["-Djava.library.path=$libraryPath", "--add-opens=java.base/java.nio=ALL-UNNAMED"] 25 | } 26 | 27 | tasks.named("run").configure { 28 | dependsOn(":datafusion-java:copyDevLibrary") 29 | } 30 | -------------------------------------------------------------------------------- /datafusion-examples/src/main/java/org/apache/arrow/datafusion/examples/ExampleMain.java: -------------------------------------------------------------------------------- 1 | package org.apache.arrow.datafusion.examples; 2 | 3 | import java.io.IOException; 4 | import java.nio.file.Files; 5 | import java.nio.file.Path; 6 | import java.nio.file.Paths; 7 | import java.util.concurrent.CompletableFuture; 8 | import org.apache.arrow.datafusion.DataFrame; 9 | import org.apache.arrow.datafusion.SessionContext; 10 | import org.apache.arrow.datafusion.SessionContexts; 11 | import org.apache.arrow.memory.BufferAllocator; 12 | import org.apache.arrow.memory.RootAllocator; 13 | import org.apache.arrow.vector.BigIntVector; 14 | import org.apache.arrow.vector.VarCharVector; 15 | import org.apache.arrow.vector.VectorSchemaRoot; 16 | import org.apache.arrow.vector.ipc.ArrowReader; 17 | import org.slf4j.Logger; 18 | import org.slf4j.LoggerFactory; 19 | 20 | public class ExampleMain { 21 | 22 | private static final Logger logger = LoggerFactory.getLogger(ExampleMain.class); 23 | 24 | public static void main(String[] args) throws Exception { 25 | try (SessionContext context = SessionContexts.create(); 26 | BufferAllocator allocator = new RootAllocator()) { 27 | loadConstant(context).join(); 28 | 29 | context.registerCsv("test_csv", Paths.get("src/main/resources/test_table.csv")).join(); 30 | context.sql("select * from test_csv limit 3").thenComposeAsync(DataFrame::show).join(); 31 | 32 | context 33 | .registerParquet( 34 | "test_parquet", Paths.get("src/main/resources/aggregate_test_100.parquet")) 35 | .join(); 36 | context.sql("select * from test_parquet limit 5").thenComposeAsync(DataFrame::show).join(); 37 | 38 | context 39 | .sql("select * from test_csv") 40 | .thenComposeAsync(df -> df.collect(allocator)) 41 | .thenAccept(ExampleMain::consumeReader) 42 | .join(); 43 | 44 | Path tempPath = Files.createTempDirectory("datafusion-examples"); 45 | 46 | context 47 | .sql("select * from test_parquet limit 3") 48 | .thenComposeAsync(df -> df.writeCsv(tempPath.resolve("csv-out"))) 49 | .join(); 50 | 51 | context 52 | .sql("select * from test_parquet limit 3") 53 | .thenComposeAsync(df -> df.writeParquet(tempPath.resolve("parquet-out"))) 54 | .join(); 55 | 56 | context 57 | .sql("select * from test_parquet limit 3") 58 | .thenAccept( 59 | df -> { 60 | try { 61 | boolean previouslyRegistered = 62 | context.registerTable("test_parquet_limited", df.intoView()).isPresent(); 63 | assert !previouslyRegistered; 64 | } catch (Exception e) { 65 | throw new RuntimeException(e); 66 | } 67 | }) 68 | .join(); 69 | 70 | context.sql("select * from test_parquet_limited").thenComposeAsync(DataFrame::show).join(); 71 | } 72 | } 73 | 74 | private static void consumeReader(ArrowReader reader) { 75 | try { 76 | VectorSchemaRoot root = reader.getVectorSchemaRoot(); 77 | while (reader.loadNextBatch()) { 78 | VarCharVector nameVector = (VarCharVector) root.getVector(0); 79 | logger.info( 80 | "name vector size {}, row count {}, value={}", 81 | nameVector.getValueCount(), 82 | root.getRowCount(), 83 | nameVector); 84 | BigIntVector ageVector = (BigIntVector) root.getVector(1); 85 | logger.info( 86 | "age vector size {}, row count {}, value={}", 87 | ageVector.getValueCount(), 88 | root.getRowCount(), 89 | ageVector); 90 | } 91 | reader.close(); 92 | } catch (IOException e) { 93 | logger.warn("got IO Exception", e); 94 | } 95 | } 96 | 97 | private static CompletableFuture loadConstant(SessionContext context) { 98 | return context 99 | .sql("select 1 + 2") 100 | .thenComposeAsync( 101 | dataFrame -> { 102 | logger.info("successfully loaded data frame {}", dataFrame); 103 | return dataFrame.show(); 104 | }); 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /datafusion-examples/src/main/resources/aggregate_test_100.csv: -------------------------------------------------------------------------------- 1 | c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13 2 | c,2,1,18109,2033001162,-6513304855495910254,25,43062,1491205016,5863949479783605708,0.110830784,0.9294097332465232,6WfVFBVGJSQb7FhA7E0lBwdvjfZnSW 3 | d,5,-40,22614,706441268,-7542719935673075327,155,14337,3373581039,11720144131976083864,0.69632107,0.3114712539863804,C2GT5KVyOPZpgKVl110TyZO0NcJ434 4 | b,1,29,-18218,994303988,5983957848665088916,204,9489,3275293996,14857091259186476033,0.53840446,0.17909035118828576,AyYVExXK6AR2qUTxNZ7qRHQOVGMLcz 5 | a,1,-85,-15154,1171968280,1919439543497968449,77,52286,774637006,12101411955859039553,0.12285209,0.6864391962767343,0keZ5G8BffGwgF2RwQD59TFzMStxCB 6 | b,5,-82,22080,1824882165,7373730676428214987,208,34331,3342719438,3330177516592499461,0.82634634,0.40975383525297016,Ig1QcuKsjHXkproePdERo2w0mYzIqd 7 | b,4,-111,-1967,-4229382,1892872227362838079,67,9832,1243785310,8382489916947120498,0.06563997,0.152498292971736,Sfx0vxv1skzZWT1PqVdoRDdO6Sb6xH 8 | e,3,104,-25136,1738331255,300633854973581194,139,20807,3577318119,13079037564113702254,0.40154034,0.7764360990307122,DuJNG8tufSqW0ZstHqWj3aGvFLMg4A 9 | a,3,13,12613,1299719633,2020498574254265315,191,17835,3998790955,14881411008939145569,0.041445434,0.8813167497816289,Amn2K87Db5Es3dFQO9cw9cvpAM6h35 10 | d,1,38,18384,-335410409,-1632237090406591229,26,57510,2712615025,1842662804748246269,0.6064476,0.6404495093354053,4HX6feIvmNXBN7XGqgO4YVBkhu8GDI 11 | a,4,-38,20744,762932956,308913475857409919,7,45465,1787652631,878137512938218976,0.7459874,0.02182578039211991,ydkwycaISlYSlEq3TlkS2m15I2pcp8 12 | d,1,57,28781,-1143802338,2662536767954229885,202,62167,879082834,4338034436871150616,0.7618384,0.42950521730777025,VY0zXmXeksCT8BzvpzpPLbmU9Kp9Y4 13 | a,4,-54,-2376,434021400,5502271306323260832,113,15777,2502326480,7966148640299601101,0.5720931,0.30585375151301186,KJFcmTVjdkCMv94wYCtfHMFhzyRsmH 14 | e,3,112,-6823,-421042466,8535335158538929274,129,32712,3759340273,9916295859593918600,0.6424343,0.6316565296547284,BsM5ZAYifRh5Lw3Y8X1r53I0cTJnfE 15 | d,2,113,3917,-108973366,-7220140168410319165,197,24380,63044568,4225581724448081782,0.11867094,0.2944158618048994,90gAtmGEeIqUTbo1ZrxCvWtsseukXC 16 | b,1,54,-18410,1413111008,-7145106120930085900,249,5382,1842680163,17818611040257178339,0.8881188,0.24899794314659673,6FPJlLAcaQ5uokyOWZ9HGdLZObFvOZ 17 | c,1,103,-22186,431378678,1346564663822463162,146,12393,3766999078,10901819591635583995,0.064453244,0.7784918983501654,2T3wSlHdEmASmO0xcXHnndkKEt6bz8 18 | e,2,49,24495,-587831330,9178511478067509438,129,12757,1289293657,10948666249269100825,0.5610077,0.5991138115095911,bgK1r6v3BCTh0aejJUhkA1Hn6idXGp 19 | d,1,-98,13630,-1991133944,1184110014998006843,220,2986,225513085,9634106610243643486,0.89651865,0.1640882545084913,y7C453hRWd4E7ImjNDWlpexB8nUqjh 20 | d,3,77,15091,-1302295658,8795481303066536947,154,35477,2093538928,17419098323248948387,0.11952883,0.7035635283169166,O66j6PaYuZhEUtqV6fuU7TyjM2WxC5 21 | e,2,97,18167,1593800404,-9112448817105133638,163,45185,3188005828,2792105417953811674,0.38175434,0.4094218353587008,ukOiFGGFnQJDHFgZxHMpvhD3zybF0M 22 | e,4,-56,-31500,1544188174,3096047390018154410,220,417,557517119,2774306934041974261,0.15459597,0.19113293583306745,IZTkHMLvIKuiLjhDjYMmIHxh166we4 23 | d,1,-99,5613,1213926989,-8863698443222021480,19,18736,4216440507,14933742247195536130,0.6067944,0.33639590659276175,aDxBtor7Icd9C5hnTvvw5NrIre740e 24 | a,5,36,-16974,623103518,6834444206535996609,71,29458,141047417,17448660630302620693,0.17100024,0.04429073092078406,OF7fQ37GzaZ5ikA2oMyvleKtgnLjXh 25 | e,4,-53,13788,2064155045,-691093532952651300,243,35106,2778168728,9463973906560740422,0.34515214,0.27159190516490006,0VVIHzxWtNOFLtnhjHEKjXaJOSLJfm 26 | c,2,-29,25305,-537142430,-7683452043175617798,150,31648,598822671,11759014161799384683,0.8315913,0.946325164889271,9UbObCsVkmYpJGcGrgfK90qOnwb2Lj 27 | a,1,-25,15295,383352709,4980135132406487265,231,102,3276123488,12763583666216333412,0.53796273,0.17592486905979987,XemNcT1xp61xcM1Qz3wZ1VECCnq06O 28 | c,4,123,16620,852509237,-3087630526856906991,196,33715,3566741189,4546434653720168472,0.07606989,0.819715865079681,8LIh0b6jmDGm87BmIyjdxNIpX4ugjD 29 | a,5,-31,-12907,586844478,-4862189775214031241,170,28086,1013876852,11005002152861474932,0.35319167,0.05573662213439634,MeSTAXq8gVxVjbEjgkvU9YLte0X9uE 30 | a,2,45,15673,-1899175111,398282800995316041,99,2555,145294611,8554426087132697832,0.17333257,0.6405262429561641,b3b9esRhTzFEawbs6XhpKnD9ojutHB 31 | b,3,17,14457,670497898,-2390782464845307388,255,24770,1538863055,12662506238151717757,0.34077626,0.7614304100703713,6x93sxYioWuq5c9Kkk8oTAAORM7cH0 32 | e,4,97,-13181,2047637360,6176835796788944083,158,53000,2042457019,9726016502640071617,0.7085086,0.12357539988406441,oHJMNvWuunsIMIWFnYG31RCfkOo2V7 33 | c,2,-60,-16312,-1808210365,-3368300253197863813,71,39635,2844041986,7045482583778080653,0.805363,0.6425694115212065,BJqx5WokrmrrezZA0dUbleMYkG5U2O 34 | e,1,36,-21481,-928766616,-3471238138418013024,150,52569,2610290479,7788847578701297242,0.2578469,0.7670021786149205,gpo8K5qtYePve6jyPt6xgJx4YOVjms 35 | b,5,-5,24896,1955646088,2430204191283109071,118,43655,2424630722,11429640193932435507,0.87989986,0.7328050041291218,JafwVLSVk5AVoXFuzclesQ000EE2k1 36 | a,3,13,32064,912707948,3826618523497875379,42,21463,2214035726,10771380284714693539,0.6133468,0.7325106678655877,i6RQVXKUh7MzuGMDaNclUYnFUAireU 37 | c,1,41,-4667,-644225469,7049620391314639084,196,48099,2125812933,15419512479294091215,0.5780736,0.9255031346434324,mzbkwXKrPeZnxg2Kn1LRF5hYSsmksS 38 | d,2,93,-12642,2053379412,6468763445799074329,147,50842,1000948272,5536487915963301239,0.4279275,0.28534428578703896,lqhzgLsXZ8JhtpeeUWWNbMz8PHI705 39 | c,3,73,-9565,-382483011,1765659477910680019,186,1535,1088543984,2906943497598597237,0.680652,0.6009475544728957,Ow5PGpfTm4dXCfTDsXAOTatXRoAydR 40 | c,3,-2,-18655,-2141999138,-3154042970870838072,251,34970,3862393166,13062025193350212516,0.034291923,0.7697753383420857,IWl0G3ZlMNf7WT8yjIB49cx7MmYOmr 41 | c,3,22,13741,-2098805236,8604102724776612452,45,2516,1362369177,196777795886465166,0.94669616,0.0494924465469434,6oIXZuIPIqEoPBvFmbt2Nxy3tryGUE 42 | b,2,63,21456,-2138770630,-2380041687053733364,181,57594,2705709344,13144161537396946288,0.09683716,0.3051364088814128,nYVJnVicpGRqKZibHyBAmtmzBXAFfT 43 | d,4,102,-24558,1991172974,-7823479531661596016,14,36599,1534194097,2240998421986827216,0.028003037,0.8824879447595726,0og6hSkhbX8AC1ktFS4kounvTzy8Vo 44 | d,1,-8,27138,-1383162419,7682021027078563072,36,64517,2861376515,9904216782086286050,0.80954456,0.9463098243875633,AFGCj7OWlEB5QfniEFgonMq90Tq5uH 45 | a,3,17,-22796,1337043149,-1282905594104562444,167,2809,754775609,732272194388185106,0.3884129,0.658671129040488,VDhtJkYjAYPykCgOU9x3v7v3t4SO1a 46 | e,2,52,23388,715235348,605432070100399212,165,56980,3314983189,7386391799827871203,0.46076488,0.980809631269599,jQimhdepw3GKmioWUlVSWeBVRKFkY3 47 | b,5,68,21576,1188285940,5717755781990389024,224,27600,974297360,9865419128970328044,0.80895734,0.7973920072996036,ioEncce3mPOXD2hWhpZpCPWGATG6GU 48 | b,2,31,23127,-800561771,-8706387435232961848,153,27034,1098639440,3343692635488765507,0.35692692,0.5590205548347534,okOkcWflkNXIy4R8LzmySyY1EC3sYd 49 | c,1,-24,-24085,-1882293856,7385529783747709716,41,48048,520189543,2402288956117186783,0.39761502,0.3600766362333053,Fi4rJeTQq4eXj8Lxg3Hja5hBVTVV5u 50 | a,4,65,-28462,-1813935549,7602389238442209730,18,363,1865307672,11378396836996498283,0.09130204,0.5593249815276734,WHmjWk2AY4c6m7DA4GitUx6nmb1yYS 51 | d,1,125,31106,-1176490478,-4306856842351827308,90,17910,3625286410,17869394731126786457,0.8882508,0.7631239070049998,dVdvo6nUD5FgCgsbOZLds28RyGTpnx 52 | b,4,17,-28070,-673237643,1904316899655860234,188,27744,933879086,3732692885824435932,0.41860116,0.40342283197779727,JHNgc2UCaiXOdmkxwDDyGhRlO0mnBQ 53 | c,2,-106,-1114,-1927628110,1080308211931669384,177,20421,141680161,7464432081248293405,0.56749094,0.565352842229935,Vp3gmWunM5A7wOC9YW2JroFqTWjvTi 54 | d,5,-59,2045,-2117946883,1170799768349713170,189,63353,1365198901,2501626630745849169,0.75173044,0.18628859265874176,F7NSTjWvQJyBburN7CXRUlbgp2dIrA 55 | d,4,55,-1471,1902023838,1252101628560265705,157,3691,811650497,1524771507450695976,0.2968701,0.5437595540422571,f9ALCzwDAKmdu7Rk2msJaB1wxe5IBX 56 | b,2,-60,-21739,-1908480893,-8897292622858103761,59,50009,2525744318,1719090662556698549,0.52930677,0.560333188635217,l7uwDoTepWwnAP0ufqtHJS3CRi7RfP 57 | d,3,-76,8809,141218956,-9110406195556445909,58,5494,1824517658,12046662515387914426,0.8557294,0.6668423897406515,Z2sWcQr0qyCJRMHDpRy3aQr7PkHtkK 58 | e,4,73,-22501,1282464673,2541794052864382235,67,21119,538589788,9575476605699527641,0.48515016,0.296036538664718,4JznSdBajNWhu4hRQwjV1FjTTxY68i 59 | b,4,-117,19316,2051224722,-5534418579506232438,133,52046,3023531799,13684453606722360110,0.62608826,0.8506721053047003,mhjME0zBHbrK6NMkytMTQzOssOa1gF 60 | a,4,-101,11640,1993193190,2992662416070659899,230,40566,466439833,16778113360088370541,0.3991115,0.574210838214554,NEhyk8uIx4kEULJGa8qIyFjjBcP2G6 61 | b,5,62,16337,41423756,-2274773899098124524,121,34206,2307004493,10575647935385523483,0.23794776,0.1754261586710173,qnPOOmslCJaT45buUisMRnM0rc77EK 62 | c,4,-79,5281,-237425046,373011991904079451,121,55620,2818832252,2464584078983135763,0.49774808,0.9237877978193884,t6fQUjJejPcjc04wHvHTPe55S65B4V 63 | b,2,68,15874,49866617,1179733259727844435,121,23948,3455216719,3898128009708892708,0.6306253,0.9185813970744787,802bgTGl6Bk5TlkPYYTxp5JkKyaYUA 64 | c,1,70,27752,1325868318,1241882478563331892,63,61637,473294098,4976799313755010034,0.13801557,0.5081765563442366,Ktb7GQ0N1DrxwkCkEUsTaIXk0xYinn 65 | e,2,-61,-2888,-1660426473,2553892468492435401,126,35429,4144173353,939909697866979632,0.4405142,0.9231889896940375,BPtQMxnuSPpxMExYV9YkDa6cAN7GP3 66 | e,4,74,-12612,-1885422396,1702850374057819332,130,3583,3198969145,10767179755613315144,0.5518061,0.5614503754617461,QEHVvcP8gxI6EMJIrvcnIhgzPNjIvv 67 | d,2,122,10130,-168758331,-3179091803916845592,30,794,4061635107,15695681119022625322,0.69592506,0.9748360509016578,OPwBqCEK5PWTjWaiOyL45u2NLTaDWv 68 | e,3,71,194,1436496767,-5639533800082367925,158,44507,3105312559,3998472996619161534,0.930117,0.6108938307533,pTeu0WMjBRTaNRT15rLCuEh3tBJVc5 69 | c,5,-94,-15880,2025611582,-3348824099853919681,5,40622,4268716378,12849419495718510869,0.34163946,0.4830878559436823,RilTlL1tKkPOUFuzmLydHAVZwv1OGl 70 | d,1,-72,25590,1188089983,3090286296481837049,241,832,3542840110,5885937420286765261,0.41980565,0.21535402343780985,wwXqSGKLyBQyPkonlzBNYUJTCo4LRS 71 | e,1,71,-5479,-1339586153,-3920238763788954243,123,53012,4229654142,10297218950720052365,0.73473036,0.5773498217058918,cBGc0kSm32ylBDnxogG727C0uhZEYZ 72 | e,4,96,-30336,427197269,7506304308750926996,95,48483,3521368277,5437030162957481122,0.58104324,0.42073125331890115,3BEOHQsMEFZ58VcNTOJYShTBpAPzbt 73 | a,2,-48,-18025,439738328,-313657814587041987,222,13763,3717551163,9135746610908713318,0.055064857,0.9800193410444061,ukyD7b0Efj7tNlFSRmzZ0IqkEzg2a8 74 | a,1,-56,8692,2106705285,-7811675384226570375,231,15573,1454057357,677091006469429514,0.42794758,0.2739938529235548,JN0VclewmjwYlSl8386MlWv5rEhWCz 75 | e,2,52,-12056,-1090239422,9011500141803970147,238,4168,2013662838,12565360638488684051,0.6694766,0.39144436569161134,xipQ93429ksjNcXPX5326VSg1xJZcW 76 | a,1,-5,12636,794623392,2909750622865366631,15,24022,2669374863,4776679784701509574,0.29877836,0.2537253407987472,waIGbOGl1PM6gnzZ4uuZt4E2yDWRHs 77 | b,1,12,7652,-1448995523,-5332734971209541785,136,49283,4076864659,15449267433866484283,0.6214579,0.05636955101974106,akiiY5N0I44CMwEnBL6RTBk7BRkxEj 78 | e,5,64,-26526,1689098844,8950618259486183091,224,45253,662099130,16127995415060805595,0.2897315,0.5759450483859969,56MZa5O1hVtX4c5sbnCfxuX5kDChqI 79 | c,4,-90,-2935,1579876740,6733733506744649678,254,12876,3593959807,4094315663314091142,0.5708688,0.5603062368164834,Ld2ej8NEv5zNcqU60FwpHeZKBhfpiV 80 | e,5,-86,32514,-467659022,-8012578250188146150,254,2684,2861911482,2126626171973341689,0.12559289,0.01479305307777301,gxfHWUF8XgY2KdFxigxvNEXe2V2XMl 81 | c,2,-117,-30187,-1222533990,-191957437217035800,136,47061,2293105904,12659011877190539078,0.2047385,0.9706712283358269,pLk3i59bZwd5KBZrI1FiweYTd5hteG 82 | a,3,14,28162,397430452,-452851601758273256,57,14722,431948861,8164671015278284913,0.40199697,0.07260475960924484,TtDKUZxzVxsq758G6AWPSYuZgVgbcl 83 | c,2,29,-3855,1354539333,4742062657200940467,81,53815,3398507249,562977550464243101,0.7124534,0.991517828651004,Oq6J4Rx6nde0YlhOIJkFsX2MsSvAQ0 84 | b,4,-59,25286,1423957796,2646602445954944051,0,61069,3570297463,15100310750150419896,0.49619365,0.04893135681998029,fuyvs0w7WsKSlXqJ1e6HFSoLmx03AG 85 | a,1,83,-14704,2143473091,-4387559599038777245,37,829,4015442341,4602675983996931623,0.89542526,0.9567595541247681,ErJFw6hzZ5fmI5r8bhE4JzlscnhKZU 86 | a,3,-12,-9168,1489733240,-1569376002217735076,206,33821,3959216334,16060348691054629425,0.9488028,0.9293883502480845,oLZ21P2JEDooxV1pU31cIxQHEeeoLu 87 | c,4,3,-30508,659422734,-6455460736227846736,133,59663,2306130875,8622584762448622224,0.16999894,0.4273123318932347,EcCuckwsF3gV1Ecgmh5v4KM8g1ozif 88 | a,3,-72,-11122,-2141451704,-2578916903971263854,83,30296,1995343206,17452974532402389080,0.94209343,0.3231750610081745,e2Gh6Ov8XkXoFdJWhl0EjwEHlMDYyG 89 | c,2,-107,-2904,-1011669561,782342092880993439,18,29527,1157161427,4403623840168496677,0.31988364,0.36936304600612724,QYlaIAnJA6r8rlAb6f59wcxvcPcWFf 90 | c,5,118,19208,-134213907,-2120241105523909127,86,57751,1229567292,16493024289408725403,0.5536642,0.9723580396501548,TTQUwpMNSXZqVBKAFvXu7OlWvKXJKX 91 | c,3,97,29106,-903316089,2874859437662206732,207,42171,3473924576,8188072741116415408,0.32792538,0.2667177795079635,HKSMQ9nTnwXCJIte1JrM1dtYnDtJ8g 92 | b,3,-101,-13217,-346989627,5456800329302529236,26,54276,243203849,17929716297117857676,0.05422181,0.09465635123783445,MXhhH1Var3OzzJCtI9VNyYvA0q8UyJ 93 | a,2,-43,13080,370975815,5881039805148485053,2,20120,2939920218,906367167997372130,0.42733806,0.16301110515739792,m6jD0LBIQWaMfenwRCTANI9eOdyyto 94 | a,5,-101,-12484,-842693467,-6140627905445351305,57,57885,2496054700,2243924747182709810,0.59520596,0.9491397432856566,QJYm7YRA3YetcBHI5wkMZeLXVmfuNy 95 | b,5,-44,15788,-629486480,5822642169425315613,13,11872,3457053821,2413406423648025909,0.44318348,0.32869374687050157,ALuRhobVWbnQTTWZdSOk0iVe8oYFhW 96 | d,4,5,-7688,702611616,6239356364381313700,4,39363,3126475872,35363005357834672,0.3766935,0.061029375346466685,H5j5ZHy1FGesOAHjkQEDYCucbpKWRu 97 | e,1,120,10837,-1331533190,6342019705133850847,245,3975,2830981072,16439861276703750332,0.6623719,0.9965400387585364,LiEBxds3X0Uw0lxiYjDqrkAaAwoiIW 98 | e,3,-95,13611,2030965207,927403809957470678,119,59134,559847112,10966649192992996919,0.5301289,0.047343434291126085,gTpyQnEODMcpsPnJMZC66gh33i3m0b 99 | d,3,123,29533,240273900,1176001466590906949,117,30972,2592330556,12883447461717956514,0.39075065,0.38870280983958583,1aOcrEGd0cOqZe2I5XBOm0nDcwtBZO 100 | b,4,47,20690,-1009656194,-2027442591571700798,200,7781,326151275,2881913079548128905,0.57360977,0.2145232647388039,52mKlRE3aHCBZtjECq6sY9OqVf8Dze 101 | e,4,30,-16110,61035129,-3356533792537910152,159,299,28774375,13526465947516666293,0.6999775,0.03968347085780355,cq4WSAIFwx3wwTUS5bp1wCe71R6U5I -------------------------------------------------------------------------------- /datafusion-examples/src/main/resources/aggregate_test_100.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datafusion-contrib/datafusion-java/c5dee4178ff9187de4cc7f5eb2e9ddd471223444/datafusion-examples/src/main/resources/aggregate_test_100.parquet -------------------------------------------------------------------------------- /datafusion-examples/src/main/resources/log4j2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /datafusion-examples/src/main/resources/test_table.csv: -------------------------------------------------------------------------------- 1 | name,age 2 | John,23 3 | Alice,29 4 | -------------------------------------------------------------------------------- /datafusion-java/build.gradle: -------------------------------------------------------------------------------- 1 | plugins { 2 | id 'java-library' 3 | id 'maven-publish' 4 | id 'signing' 5 | id 'datafusion.java-conventions' 6 | id 'com.diffplug.spotless' 7 | id 'com.google.osdetector' 8 | } 9 | 10 | dependencies { 11 | api 'org.slf4j:slf4j-api:2.0.16' 12 | api 'org.apache.arrow:arrow-format:18.0.0' 13 | api 'org.apache.arrow:arrow-vector:18.1.0' 14 | implementation 'org.apache.arrow:arrow-c-data:18.1.0' 15 | runtimeOnly 'org.apache.arrow:arrow-memory-unsafe:18.0.0' 16 | testImplementation 'org.apache.arrow:arrow-compression:14.0.2' 17 | testImplementation 'org.junit.jupiter:junit-jupiter:5.10.2' 18 | testImplementation 'org.apache.hadoop:hadoop-client:3.4.1' 19 | testImplementation 'org.apache.hadoop:hadoop-common:3.4.1' 20 | testImplementation 'org.apache.parquet:parquet-avro:1.13.1' 21 | testImplementation 'org.apache.parquet:parquet-hadoop:1.15.0' 22 | } 23 | 24 | spotless { 25 | java { 26 | googleJavaFormat() 27 | } 28 | } 29 | 30 | java { 31 | withJavadocJar() 32 | withSourcesJar() 33 | 34 | compileJava { 35 | options.compilerArgs += ["-h", "${layout.buildDirectory.asFile.get()}/target/headers"] 36 | } 37 | } 38 | 39 | javadoc { 40 | if (JavaVersion.current().isJava9Compatible()) { 41 | options.addBooleanOption('html5', true) 42 | } 43 | } 44 | 45 | test { 46 | def libraryPath = findProperty("JNI_PATH") ?: "$rootDir/datafusion-java/build/jni_libs/dev" 47 | jvmArgs += ["-Djava.library.path=$libraryPath", "--add-opens=java.base/java.nio=ALL-UNNAMED"] 48 | useJUnitPlatform() 49 | } 50 | 51 | def cargoBinary = "${System.getProperty('user.home')}/.cargo/bin/cargo" 52 | 53 | tasks.register('cargoDevBuild', Exec) { 54 | workingDir "$rootDir/datafusion-jni" 55 | executable cargoBinary 56 | args += ['build'] 57 | } 58 | 59 | tasks.register('cargoReleaseBuild', Exec) { 60 | workingDir "$rootDir/datafusion-jni" 61 | executable cargoBinary 62 | args += ['build', '--release'] 63 | } 64 | 65 | def extensionMapping = [ 66 | "osx" : "dylib", 67 | "linux" : "so", 68 | "windows": "dll" 69 | ] 70 | 71 | tasks.register('copyDevLibrary', Sync) { 72 | def extension = extensionMapping[osdetector.os] 73 | from "${rootDir}/datafusion-jni/target/debug/libdatafusion_jni.$extension" 74 | into layout.buildDirectory.dir("jni_libs/dev") 75 | dependsOn cargoDevBuild 76 | } 77 | 78 | tasks.named("test") { 79 | dependsOn copyDevLibrary 80 | } 81 | 82 | tasks.register('copyBuiltLibrary', Copy) { 83 | def extension = extensionMapping[osdetector.os] 84 | from "${rootDir}/datafusion-jni/target/release/libdatafusion_jni.$extension" 85 | into layout.buildDirectory.dir("jni_libs/${osdetector.classifier}") 86 | dependsOn cargoReleaseBuild 87 | } 88 | 89 | def classifierOsx = 'osx-x86_64' 90 | def extensionOsx = 'dylib' 91 | def jniLibOsx = layout.buildDirectory.file("jni_libs/$classifierOsx/libdatafusion_jni.$extensionOsx") 92 | 93 | def classifierLinux = 'linux-x86_64' 94 | def extensionLinux = 'so' 95 | def jniLibLinux = layout.buildDirectory.file("jni_libs/$classifierLinux/libdatafusion_jni.$extensionLinux") 96 | 97 | def classifierWindows = "windows-x86_64" 98 | def extensionWindows = "dll" 99 | def jniLibWindows = layout.buildDirectory.file("jni_libs/$classifierWindows/datafusion_jni.$extensionWindows") 100 | 101 | tasks.register('jarWithOsxLib', Jar) { 102 | from sourceSets.main.output 103 | from jniLibOsx 104 | rename "libdatafusion_jni.$extensionOsx", "jni_libs/libdatafusion_jni.$extensionOsx" 105 | archiveClassifier.set(classifierOsx) 106 | } 107 | 108 | tasks.register('jarWithLinuxLib', Jar) { 109 | from sourceSets.main.output 110 | from jniLibLinux 111 | rename "libdatafusion_jni.$extensionLinux", "jni_libs/libdatafusion_jni.$extensionLinux" 112 | archiveClassifier.set(classifierLinux) 113 | } 114 | 115 | tasks.register('jarWithWindowsLib', Jar) { 116 | from sourceSets.main.output 117 | from jniLibWindows 118 | rename "datafusion_jni.$extensionWindows", "jni_libs/datafusion_jni.$extensionWindows" 119 | archiveClassifier.set(classifierWindows) 120 | } 121 | 122 | tasks.register('jarWithLib', Jar) { 123 | from sourceSets.main.output 124 | from jniLibOsx 125 | rename "libdatafusion_jni.$extensionOsx", "jni_libs/libdatafusion_jni.$extensionOsx" 126 | from jniLibLinux 127 | rename "libdatafusion_jni.$extensionLinux", "jni_libs/libdatafusion_jni.$extensionLinux" 128 | from jniLibWindows 129 | rename "datafusion_jni.$extensionWindows", "jni_libs/datafusion_jni.$extensionWindows" 130 | } 131 | 132 | publishing { 133 | publications { 134 | mavenJava(MavenPublication) { 135 | artifactId 'datafusion-java' 136 | artifact sourcesJar 137 | artifact javadocJar 138 | artifact jarWithLib 139 | pom { 140 | name = 'DataFusion Java' 141 | description = 'A Java binding to Apache Arrow DataFusion library' 142 | url = 'https://github.com/datafusion-contrib/datafusion-java' 143 | licenses { 144 | license { 145 | name = 'The Apache License, Version 2.0' 146 | url = 'http://www.apache.org/licenses/LICENSE-2.0.txt' 147 | } 148 | } 149 | developers { 150 | developer { 151 | id = 'dev' 152 | name = 'Apache Arrow Developers' 153 | email = 'dev@arrow.apache.org' 154 | } 155 | } 156 | scm { 157 | connection = 'scm:git:git@github.com:datafusion-contrib/datafusion-java.git' 158 | developerConnection = 'scm:git:https://github.com/datafusion-contrib/datafusion-java.git' 159 | url = 'https://github.com/datafusion-contrib/datafusion-java' 160 | } 161 | } 162 | pom.withXml { 163 | // Dependencies don't get mapped to the pom file due to using custom artifacts, 164 | // so add them here 165 | def dependenciesNode = asNode().appendNode('dependencies') 166 | def apiDependencies = configurations.api.allDependencies 167 | Set includedDependencies = [] 168 | apiDependencies.each { 169 | def dependencyNode = dependenciesNode.appendNode('dependency') 170 | dependencyNode.appendNode('groupId', it.getGroup()) 171 | dependencyNode.appendNode('artifactId', it.getName()) 172 | dependencyNode.appendNode('version', it.getVersion()) 173 | dependencyNode.appendNode('scope', 'compile') 174 | includedDependencies.add(String.format("%s:%s", it.getGroup(), it.getName())) 175 | } 176 | def implementationDependencies = configurations.implementation.allDependencies 177 | implementationDependencies.each { 178 | if (!includedDependencies.contains(String.format("%s:%s", it.getGroup(), it.getName()))) { 179 | def dependencyNode = dependenciesNode.appendNode('dependency') 180 | dependencyNode.appendNode('groupId', it.getGroup()) 181 | dependencyNode.appendNode('artifactId', it.getName()) 182 | dependencyNode.appendNode('version', it.getVersion()) 183 | dependencyNode.appendNode('scope', 'runtime') 184 | } 185 | } 186 | def runtimeDependencies = configurations.runtimeOnly.allDependencies 187 | runtimeDependencies.each { 188 | def dependencyNode = dependenciesNode.appendNode('dependency') 189 | dependencyNode.appendNode('groupId', it.getGroup()) 190 | dependencyNode.appendNode('artifactId', it.getName()) 191 | dependencyNode.appendNode('version', it.getVersion()) 192 | dependencyNode.appendNode('scope', 'runtime') 193 | } 194 | } 195 | } 196 | } 197 | repositories { 198 | maven { 199 | name = "Sonatype" 200 | def releasesRepoUrl = "https://s01.oss.sonatype.org/service/local/staging/deploy/maven2/" 201 | def snapshotsRepoUrl = "https://s01.oss.sonatype.org/content/repositories/snapshots/" 202 | url = version.endsWith('SNAPSHOT') ? snapshotsRepoUrl : releasesRepoUrl 203 | credentials { 204 | username = findProperty('ossrhUsername') ?: System.getenv("MAVEN_USERNAME") 205 | password = findProperty('ossrhPassword') ?: System.getenv("MAVEN_PASSWORD") 206 | } 207 | } 208 | } 209 | } 210 | 211 | def artifacts = publishing.publications.mavenJava.artifacts 212 | 213 | if (jniLibLinux.get().asFile.exists()) { 214 | artifacts.artifact jarWithLinuxLib 215 | } 216 | 217 | if (jniLibOsx.get().asFile.exists()) { 218 | artifacts.artifact jarWithOsxLib 219 | } 220 | 221 | if (jniLibWindows.get().asFile.exists()) { 222 | artifacts.artifact jarWithWindowsLib 223 | } 224 | 225 | 226 | signing { 227 | required { !version.endsWith("SNAPSHOT") && gradle.taskGraph.hasTask("publish") } 228 | def signingKeyId = findProperty("signingKeyId") 229 | def signingKey = findProperty("signingKey") 230 | def signingPassword = findProperty("signingPassword") 231 | useInMemoryPgpKeys(signingKeyId, signingKey, signingPassword) 232 | sign publishing.publications.mavenJava 233 | } 234 | -------------------------------------------------------------------------------- /datafusion-java/src/main/java/org/apache/arrow/datafusion/AbstractProxy.java: -------------------------------------------------------------------------------- 1 | package org.apache.arrow.datafusion; 2 | 3 | import java.util.concurrent.ConcurrentHashMap; 4 | import java.util.concurrent.ConcurrentMap; 5 | import java.util.concurrent.atomic.AtomicBoolean; 6 | import org.slf4j.Logger; 7 | import org.slf4j.LoggerFactory; 8 | 9 | abstract class AbstractProxy implements AutoCloseable, NativeProxy { 10 | private static final Logger logger = LoggerFactory.getLogger(AbstractProxy.class); 11 | private final long pointer; 12 | private final AtomicBoolean closed; 13 | private final ConcurrentMap children; 14 | 15 | protected AbstractProxy(long pointer) { 16 | this.pointer = pointer; 17 | if (logger.isDebugEnabled()) { 18 | logger.debug("Obtaining {}@{}", getClass().getSimpleName(), Long.toHexString(pointer)); 19 | } 20 | this.closed = new AtomicBoolean(false); 21 | this.children = new ConcurrentHashMap<>(); 22 | } 23 | 24 | /** 25 | * Register a child proxy object that should be closed when this object is closed 26 | * 27 | * @param child the child proxy to register 28 | */ 29 | protected final void registerChild(AbstractProxy child) { 30 | AbstractProxy old = children.putIfAbsent(child.getPointer(), child); 31 | if (old != null) { 32 | logger.warn("duplicated registry for {}: {}", child.getPointer(), old); 33 | } 34 | } 35 | 36 | /** 37 | * @return Whether the object has been closed 38 | */ 39 | protected final boolean isClosed() { 40 | return closed.get(); 41 | } 42 | 43 | @Override 44 | public final long getPointer() { 45 | return pointer; 46 | } 47 | 48 | abstract void doClose(long pointer) throws Exception; 49 | 50 | // Ensure native library is loaded before any proxy object is used 51 | static { 52 | JNILoader.load(); 53 | } 54 | 55 | @Override 56 | public final void close() throws Exception { 57 | if (closed.compareAndSet(false, true)) { 58 | for (AbstractProxy child : children.values()) { 59 | // detection to avoid cycle 60 | if (!child.isClosed()) { 61 | child.close(); 62 | } 63 | } 64 | if (logger.isDebugEnabled()) { 65 | logger.debug("Closing {}@{}", getClass().getSimpleName(), Long.toHexString(pointer)); 66 | } 67 | doClose(pointer); 68 | } else { 69 | logger.warn("Failed to close {}, maybe already closed?", getPointer()); 70 | } 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /datafusion-java/src/main/java/org/apache/arrow/datafusion/ArrowFormat.java: -------------------------------------------------------------------------------- 1 | package org.apache.arrow.datafusion; 2 | 3 | /** The Apache Arrow IPC file format configuration. This format is also known as Feather V2 */ 4 | public class ArrowFormat extends AbstractProxy implements FileFormat { 5 | /** Create a new ArrowFormat with default options */ 6 | public ArrowFormat() { 7 | super(FileFormats.createArrow()); 8 | } 9 | 10 | @Override 11 | void doClose(long pointer) { 12 | FileFormats.destroyFileFormat(pointer); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /datafusion-java/src/main/java/org/apache/arrow/datafusion/CsvFormat.java: -------------------------------------------------------------------------------- 1 | package org.apache.arrow.datafusion; 2 | 3 | /** The CSV file format configuration */ 4 | public class CsvFormat extends AbstractProxy implements FileFormat { 5 | /** Create new CSV format with default options */ 6 | public CsvFormat() { 7 | super(FileFormats.createCsv()); 8 | } 9 | 10 | @Override 11 | void doClose(long pointer) { 12 | FileFormats.destroyFileFormat(pointer); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /datafusion-java/src/main/java/org/apache/arrow/datafusion/DataFrame.java: -------------------------------------------------------------------------------- 1 | package org.apache.arrow.datafusion; 2 | 3 | import java.nio.file.Path; 4 | import java.util.concurrent.CompletableFuture; 5 | import org.apache.arrow.memory.BufferAllocator; 6 | import org.apache.arrow.vector.ipc.ArrowReader; 7 | 8 | /** 9 | * A dataframe is a rectangle shaped data that holds columns and rows, and can be {@link 10 | * #collect(BufferAllocator) collected} into {@link 11 | * org.apache.arrow.vector.ipc.message.ArrowRecordBatch batches} and read via {@link ArrowReader 12 | * reader}. 13 | */ 14 | public interface DataFrame extends NativeProxy { 15 | /** 16 | * Collect dataframe into a list of record batches 17 | * 18 | * @param allocator {@link BufferAllocator buffer allocator} to allocate vectors within Reader 19 | * @return {@link ArrowReader reader} instance to extract the data, you are expected to {@link 20 | * ArrowReader#close()} it after usage to release memory 21 | */ 22 | CompletableFuture collect(BufferAllocator allocator); 23 | 24 | /** 25 | * Execute this DataFrame and return a stream of the result data 26 | * 27 | * @param allocator {@link BufferAllocator buffer allocator} to allocate vectors for the stream 28 | * @return Stream of results 29 | */ 30 | CompletableFuture executeStream(BufferAllocator allocator); 31 | 32 | /** 33 | * Print results. 34 | * 35 | * @return null 36 | */ 37 | CompletableFuture show(); 38 | 39 | /** 40 | * Write results to a parquet file. 41 | * 42 | * @param path path to write parquet file to 43 | * @return null 44 | */ 45 | CompletableFuture writeParquet(Path path); 46 | 47 | /** 48 | * Write results to a csv file. 49 | * 50 | * @param path path to write csv file to 51 | * @return null 52 | */ 53 | CompletableFuture writeCsv(Path path); 54 | 55 | /** 56 | * Converts this DataFrame into a TableProvider that can be registered as a table view using 57 | * {@link SessionContext#registerParquet(String, Path)} 58 | * 59 | * @return the table provider ready to be e.g. {@link SessionContext#registerTable(String, 60 | * TableProvider) registered}. 61 | */ 62 | TableProvider intoView(); 63 | } 64 | -------------------------------------------------------------------------------- /datafusion-java/src/main/java/org/apache/arrow/datafusion/DataFrames.java: -------------------------------------------------------------------------------- 1 | package org.apache.arrow.datafusion; 2 | 3 | import java.util.function.BiConsumer; 4 | import java.util.function.Consumer; 5 | 6 | /** helper class that calls into native stack for {@link DataFrame} */ 7 | final class DataFrames { 8 | 9 | private DataFrames() {} 10 | 11 | static native void destroyDataFrame(long pointer); 12 | 13 | static native void showDataframe(long runtime, long dataframe, Consumer callback); 14 | 15 | static native void collectDataframe( 16 | long runtime, long dataframe, BiConsumer callback); 17 | 18 | static native void executeStream(long runtime, long dataframe, ObjectResultCallback callback); 19 | 20 | static native void writeParquet( 21 | long runtime, long dataframe, String path, Consumer callback); 22 | 23 | static native void writeCsv(long runtime, long dataframe, String path, Consumer callback); 24 | 25 | static native long intoView(long dataframe); 26 | } 27 | -------------------------------------------------------------------------------- /datafusion-java/src/main/java/org/apache/arrow/datafusion/DefaultDataFrame.java: -------------------------------------------------------------------------------- 1 | package org.apache.arrow.datafusion; 2 | 3 | import java.nio.file.Path; 4 | import java.util.concurrent.CompletableFuture; 5 | import org.apache.arrow.memory.BufferAllocator; 6 | import org.apache.arrow.vector.ipc.ArrowFileReader; 7 | import org.apache.arrow.vector.ipc.ArrowReader; 8 | import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel; 9 | import org.slf4j.Logger; 10 | import org.slf4j.LoggerFactory; 11 | 12 | class DefaultDataFrame extends AbstractProxy implements DataFrame { 13 | 14 | private static final Logger logger = LoggerFactory.getLogger(DefaultDataFrame.class); 15 | private final SessionContext context; 16 | 17 | DefaultDataFrame(SessionContext context, long pointer) { 18 | super(pointer); 19 | this.context = context; 20 | } 21 | 22 | @Override 23 | public CompletableFuture collect(BufferAllocator allocator) { 24 | CompletableFuture result = new CompletableFuture<>(); 25 | Runtime runtime = context.getRuntime(); 26 | long runtimePointer = runtime.getPointer(); 27 | long dataframe = getPointer(); 28 | DataFrames.collectDataframe( 29 | runtimePointer, 30 | dataframe, 31 | (String errString, byte[] arr) -> { 32 | if (ErrorUtil.containsError(errString)) { 33 | result.completeExceptionally(new RuntimeException(errString)); 34 | } else { 35 | logger.info("successfully completed with arr length={}", arr.length); 36 | ByteArrayReadableSeekableByteChannel byteChannel = 37 | new ByteArrayReadableSeekableByteChannel(arr); 38 | result.complete(new ArrowFileReader(byteChannel, allocator)); 39 | } 40 | }); 41 | return result; 42 | } 43 | 44 | @Override 45 | public CompletableFuture executeStream(BufferAllocator allocator) { 46 | CompletableFuture result = new CompletableFuture<>(); 47 | Runtime runtime = context.getRuntime(); 48 | long runtimePointer = runtime.getPointer(); 49 | long dataframe = getPointer(); 50 | DataFrames.executeStream( 51 | runtimePointer, 52 | dataframe, 53 | (errString, streamId) -> { 54 | if (ErrorUtil.containsError(errString)) { 55 | result.completeExceptionally(new RuntimeException(errString)); 56 | } else { 57 | result.complete(new DefaultRecordBatchStream(context, streamId, allocator)); 58 | } 59 | }); 60 | return result; 61 | } 62 | 63 | @Override 64 | public CompletableFuture show() { 65 | Runtime runtime = context.getRuntime(); 66 | long runtimePointer = runtime.getPointer(); 67 | long dataframe = getPointer(); 68 | CompletableFuture future = new CompletableFuture<>(); 69 | DataFrames.showDataframe( 70 | runtimePointer, 71 | dataframe, 72 | (String errString) -> { 73 | if (ErrorUtil.containsError(errString)) { 74 | future.completeExceptionally(new RuntimeException(errString)); 75 | } else { 76 | future.complete(null); 77 | } 78 | }); 79 | return future; 80 | } 81 | 82 | @Override 83 | public CompletableFuture writeParquet(Path path) { 84 | Runtime runtime = context.getRuntime(); 85 | long runtimePointer = runtime.getPointer(); 86 | long dataframe = getPointer(); 87 | CompletableFuture future = new CompletableFuture<>(); 88 | DataFrames.writeParquet( 89 | runtimePointer, 90 | dataframe, 91 | path.toAbsolutePath().toString(), 92 | (String errString) -> { 93 | if (ErrorUtil.containsError(errString)) { 94 | future.completeExceptionally(new RuntimeException(errString)); 95 | } else { 96 | future.complete(null); 97 | } 98 | }); 99 | return future; 100 | } 101 | 102 | @Override 103 | public CompletableFuture writeCsv(Path path) { 104 | Runtime runtime = context.getRuntime(); 105 | long runtimePointer = runtime.getPointer(); 106 | long dataframe = getPointer(); 107 | CompletableFuture future = new CompletableFuture<>(); 108 | DataFrames.writeCsv( 109 | runtimePointer, 110 | dataframe, 111 | path.toAbsolutePath().toString(), 112 | (String errString) -> { 113 | if (ErrorUtil.containsError(errString)) { 114 | future.completeExceptionally(new RuntimeException(errString)); 115 | } else { 116 | future.complete(null); 117 | } 118 | }); 119 | return future; 120 | } 121 | 122 | @Override 123 | public TableProvider intoView() { 124 | long dataframe = getPointer(); 125 | long tableProviderPointer = DataFrames.intoView(dataframe); 126 | return new DefaultTableProvider(tableProviderPointer); 127 | } 128 | 129 | @Override 130 | void doClose(long pointer) { 131 | DataFrames.destroyDataFrame(pointer); 132 | } 133 | } 134 | -------------------------------------------------------------------------------- /datafusion-java/src/main/java/org/apache/arrow/datafusion/DefaultRecordBatchStream.java: -------------------------------------------------------------------------------- 1 | package org.apache.arrow.datafusion; 2 | 3 | import java.util.Set; 4 | import java.util.concurrent.CompletableFuture; 5 | import org.apache.arrow.c.ArrowArray; 6 | import org.apache.arrow.c.ArrowSchema; 7 | import org.apache.arrow.c.CDataDictionaryProvider; 8 | import org.apache.arrow.c.Data; 9 | import org.apache.arrow.memory.BufferAllocator; 10 | import org.apache.arrow.vector.VectorSchemaRoot; 11 | import org.apache.arrow.vector.dictionary.Dictionary; 12 | import org.apache.arrow.vector.types.pojo.Schema; 13 | 14 | class DefaultRecordBatchStream extends AbstractProxy implements RecordBatchStream { 15 | private final SessionContext context; 16 | private final BufferAllocator allocator; 17 | private final CDataDictionaryProvider dictionaryProvider; 18 | private VectorSchemaRoot vectorSchemaRoot = null; 19 | private boolean initialized = false; 20 | 21 | DefaultRecordBatchStream(SessionContext context, long pointer, BufferAllocator allocator) { 22 | super(pointer); 23 | this.context = context; 24 | this.allocator = allocator; 25 | this.dictionaryProvider = new CDataDictionaryProvider(); 26 | } 27 | 28 | @Override 29 | void doClose(long pointer) { 30 | destroy(pointer); 31 | dictionaryProvider.close(); 32 | if (initialized) { 33 | vectorSchemaRoot.close(); 34 | } 35 | } 36 | 37 | @Override 38 | public VectorSchemaRoot getVectorSchemaRoot() { 39 | ensureInitialized(); 40 | return vectorSchemaRoot; 41 | } 42 | 43 | @Override 44 | public CompletableFuture loadNextBatch() { 45 | ensureInitialized(); 46 | Runtime runtime = context.getRuntime(); 47 | long runtimePointer = runtime.getPointer(); 48 | long recordBatchStream = getPointer(); 49 | CompletableFuture result = new CompletableFuture<>(); 50 | next( 51 | runtimePointer, 52 | recordBatchStream, 53 | (errString, arrowArrayAddress) -> { 54 | if (ErrorUtil.containsError(errString)) { 55 | result.completeExceptionally(new RuntimeException(errString)); 56 | } else if (arrowArrayAddress == 0) { 57 | // Reached end of stream 58 | result.complete(false); 59 | } else { 60 | try { 61 | ArrowArray arrowArray = ArrowArray.wrap(arrowArrayAddress); 62 | Data.importIntoVectorSchemaRoot( 63 | allocator, arrowArray, vectorSchemaRoot, dictionaryProvider); 64 | result.complete(true); 65 | } catch (Exception e) { 66 | result.completeExceptionally(e); 67 | } 68 | } 69 | }); 70 | return result; 71 | } 72 | 73 | @Override 74 | public Dictionary lookup(long id) { 75 | return dictionaryProvider.lookup(id); 76 | } 77 | 78 | @Override 79 | public Set getDictionaryIds() { 80 | return dictionaryProvider.getDictionaryIds(); 81 | } 82 | 83 | private void ensureInitialized() { 84 | if (!initialized) { 85 | Schema schema = getSchema(); 86 | this.vectorSchemaRoot = VectorSchemaRoot.create(schema, allocator); 87 | } 88 | initialized = true; 89 | } 90 | 91 | private Schema getSchema() { 92 | long recordBatchStream = getPointer(); 93 | // Native method is not async, but use a future to store the result for convenience 94 | CompletableFuture result = new CompletableFuture<>(); 95 | getSchema( 96 | recordBatchStream, 97 | (errString, arrowSchemaAddress) -> { 98 | if (ErrorUtil.containsError(errString)) { 99 | result.completeExceptionally(new RuntimeException(errString)); 100 | } else { 101 | try { 102 | ArrowSchema arrowSchema = ArrowSchema.wrap(arrowSchemaAddress); 103 | Schema schema = Data.importSchema(allocator, arrowSchema, dictionaryProvider); 104 | result.complete(schema); 105 | // The FFI schema will be released from rust when it is dropped 106 | } catch (Exception e) { 107 | result.completeExceptionally(e); 108 | } 109 | } 110 | }); 111 | return result.join(); 112 | } 113 | 114 | private static native void getSchema(long pointer, ObjectResultCallback callback); 115 | 116 | private static native void next(long runtime, long pointer, ObjectResultCallback callback); 117 | 118 | private static native void destroy(long pointer); 119 | } 120 | -------------------------------------------------------------------------------- /datafusion-java/src/main/java/org/apache/arrow/datafusion/DefaultSessionContext.java: -------------------------------------------------------------------------------- 1 | package org.apache.arrow.datafusion; 2 | 3 | import java.nio.file.Path; 4 | import java.util.Optional; 5 | import java.util.concurrent.CompletableFuture; 6 | import java.util.function.Consumer; 7 | import org.slf4j.Logger; 8 | import org.slf4j.LoggerFactory; 9 | 10 | class DefaultSessionContext extends AbstractProxy implements SessionContext { 11 | 12 | private static final Logger logger = LoggerFactory.getLogger(DefaultSessionContext.class); 13 | 14 | static native void querySql( 15 | long runtime, long context, String sql, ObjectResultCallback callback); 16 | 17 | static native void registerCsv( 18 | long runtime, long context, String name, String path, Consumer callback); 19 | 20 | static native void registerParquet( 21 | long runtime, long context, String name, String path, Consumer callback); 22 | 23 | static native long registerTable(long context, String name, long tableProvider) throws Exception; 24 | 25 | @Override 26 | public CompletableFuture sql(String sql) { 27 | long runtime = getRuntime().getPointer(); 28 | CompletableFuture future = new CompletableFuture<>(); 29 | querySql( 30 | runtime, 31 | getPointer(), 32 | sql, 33 | (errMessage, dataframeId) -> { 34 | if (null != errMessage && !errMessage.isEmpty()) { 35 | future.completeExceptionally(new RuntimeException(errMessage)); 36 | } else { 37 | DefaultDataFrame frame = new DefaultDataFrame(DefaultSessionContext.this, dataframeId); 38 | future.complete(frame); 39 | } 40 | }); 41 | return future; 42 | } 43 | 44 | @Override 45 | public CompletableFuture registerCsv(String name, Path path) { 46 | long runtime = getRuntime().getPointer(); 47 | CompletableFuture future = new CompletableFuture<>(); 48 | registerCsv( 49 | runtime, 50 | getPointer(), 51 | name, 52 | path.toAbsolutePath().toString(), 53 | (errMessage) -> voidCallback(future, errMessage)); 54 | return future; 55 | } 56 | 57 | @Override 58 | public CompletableFuture registerParquet(String name, Path path) { 59 | long runtime = getRuntime().getPointer(); 60 | CompletableFuture future = new CompletableFuture<>(); 61 | registerParquet( 62 | runtime, 63 | getPointer(), 64 | name, 65 | path.toAbsolutePath().toString(), 66 | (errMessage) -> voidCallback(future, errMessage)); 67 | return future; 68 | } 69 | 70 | @Override 71 | public Optional registerTable(String name, TableProvider tableProvider) 72 | throws Exception { 73 | long previouslyRegistered = registerTable(getPointer(), name, tableProvider.getPointer()); 74 | if (previouslyRegistered == 0) { 75 | return Optional.empty(); 76 | } 77 | return Optional.of(new DefaultTableProvider(previouslyRegistered)); 78 | } 79 | 80 | private void voidCallback(CompletableFuture future, String errMessage) { 81 | if (null != errMessage && !errMessage.isEmpty()) { 82 | future.completeExceptionally(new RuntimeException(errMessage)); 83 | } else { 84 | future.complete(null); 85 | } 86 | } 87 | 88 | @Override 89 | public Runtime getRuntime() { 90 | return runtime; 91 | } 92 | 93 | private final TokioRuntime runtime; 94 | 95 | DefaultSessionContext(long pointer) { 96 | super(pointer); 97 | this.runtime = TokioRuntime.create(); 98 | registerChild(runtime); 99 | } 100 | 101 | @Override 102 | void doClose(long pointer) throws Exception { 103 | SessionContexts.destroySessionContext(pointer); 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /datafusion-java/src/main/java/org/apache/arrow/datafusion/DefaultTableProvider.java: -------------------------------------------------------------------------------- 1 | package org.apache.arrow.datafusion; 2 | 3 | class DefaultTableProvider extends AbstractProxy implements TableProvider { 4 | DefaultTableProvider(long pointer) { 5 | super(pointer); 6 | } 7 | 8 | @Override 9 | void doClose(long pointer) throws Exception { 10 | TableProviders.destroyTableProvider(pointer); 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /datafusion-java/src/main/java/org/apache/arrow/datafusion/ErrorUtil.java: -------------------------------------------------------------------------------- 1 | package org.apache.arrow.datafusion; 2 | 3 | class ErrorUtil { 4 | 5 | private ErrorUtil() {} 6 | 7 | static boolean containsError(String errString) { 8 | return errString != null && !errString.isEmpty(); 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /datafusion-java/src/main/java/org/apache/arrow/datafusion/ExecutionOptions.java: -------------------------------------------------------------------------------- 1 | package org.apache.arrow.datafusion; 2 | 3 | /** Configures options related to query execution */ 4 | @SuppressWarnings("UnusedReturnValue") 5 | public class ExecutionOptions { 6 | private final SessionConfig config; 7 | 8 | ExecutionOptions(SessionConfig config) { 9 | this.config = config; 10 | } 11 | 12 | /** 13 | * Get execution options related to reading Parquet data 14 | * 15 | * @return {@link ParquetOptions} instance for this config 16 | */ 17 | public ParquetOptions parquet() { 18 | return new ParquetOptions(config); 19 | } 20 | 21 | /** 22 | * Get the batch size 23 | * 24 | * @return batch size 25 | */ 26 | public long batchSize() { 27 | return SessionConfig.getExecutionOptionsBatchSize(config.getPointer()); 28 | } 29 | 30 | /** 31 | * Set the size of batches to use when creating new data batches 32 | * 33 | * @param batchSize the batch size to set 34 | * @return the modified {@link ExecutionOptions} instance 35 | */ 36 | public ExecutionOptions withBatchSize(long batchSize) { 37 | SessionConfig.setExecutionOptionsBatchSize(config.getPointer(), batchSize); 38 | return this; 39 | } 40 | 41 | /** 42 | * Get whether batch coalescing is enabled 43 | * 44 | * @return whether batch coalescing is enabled 45 | */ 46 | public boolean coalesceBatches() { 47 | return SessionConfig.getExecutionOptionsCoalesceBatches(config.getPointer()); 48 | } 49 | 50 | /** 51 | * Set whether to enable batch coalescing 52 | * 53 | * @param enabled whether to enable batch coalescing 54 | * @return the modified {@link ExecutionOptions} instance 55 | */ 56 | public ExecutionOptions withCoalesceBatches(boolean enabled) { 57 | SessionConfig.setExecutionOptionsCoalesceBatches(config.getPointer(), enabled); 58 | return this; 59 | } 60 | 61 | /** 62 | * Get whether statistics collection is enabled 63 | * 64 | * @return whether statistics collection is enabled 65 | */ 66 | public boolean collectStatistics() { 67 | return SessionConfig.getExecutionOptionsCollectStatistics(config.getPointer()); 68 | } 69 | 70 | /** 71 | * Set whether to enable statistics collection 72 | * 73 | * @param enabled whether to enable statistics collection 74 | * @return the modified {@link ExecutionOptions} instance 75 | */ 76 | public ExecutionOptions withCollectStatistics(boolean enabled) { 77 | SessionConfig.setExecutionOptionsCollectStatistics(config.getPointer(), enabled); 78 | return this; 79 | } 80 | 81 | /** 82 | * Get the target number of partitions 83 | * 84 | * @return number of partitions 85 | */ 86 | public long targetPartitions() { 87 | return SessionConfig.getExecutionOptionsTargetPartitions(config.getPointer()); 88 | } 89 | 90 | /** 91 | * Set the target number of partitions 92 | * 93 | * @param targetPartitions the number of partitions to set 94 | * @return the modified {@link ExecutionOptions} instance 95 | */ 96 | public ExecutionOptions withTargetPartitions(long targetPartitions) { 97 | SessionConfig.setExecutionOptionsTargetPartitions(config.getPointer(), targetPartitions); 98 | return this; 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /datafusion-java/src/main/java/org/apache/arrow/datafusion/FileFormat.java: -------------------------------------------------------------------------------- 1 | package org.apache.arrow.datafusion; 2 | 3 | /** Interface for file formats that can provide table data */ 4 | public interface FileFormat extends AutoCloseable, NativeProxy {} 5 | -------------------------------------------------------------------------------- /datafusion-java/src/main/java/org/apache/arrow/datafusion/FileFormats.java: -------------------------------------------------------------------------------- 1 | package org.apache.arrow.datafusion; 2 | 3 | class FileFormats { 4 | 5 | private FileFormats() {} 6 | 7 | static native long createArrow(); 8 | 9 | static native long createCsv(); 10 | 11 | static native long createParquet(); 12 | 13 | static native void destroyFileFormat(long pointer); 14 | } 15 | -------------------------------------------------------------------------------- /datafusion-java/src/main/java/org/apache/arrow/datafusion/JNILoader.java: -------------------------------------------------------------------------------- 1 | package org.apache.arrow.datafusion; 2 | 3 | import java.io.File; 4 | import java.io.FileOutputStream; 5 | import java.io.IOException; 6 | import java.io.InputStream; 7 | import java.util.concurrent.atomic.AtomicBoolean; 8 | import org.slf4j.Logger; 9 | import org.slf4j.LoggerFactory; 10 | 11 | final class JNILoader { 12 | 13 | private JNILoader() {} 14 | 15 | private static final Logger logger = LoggerFactory.getLogger(JNILoader.class); 16 | 17 | private static final AtomicBoolean loaded = new AtomicBoolean(false); 18 | 19 | private enum OsName { 20 | Windows, 21 | Osx, 22 | Linux 23 | } 24 | 25 | private static final String libraryName = "datafusion_jni"; 26 | 27 | private static final String ERROR_MSG = 28 | String.format( 29 | "Unsupported OS/arch (`%s' detected), cannot find `%s' or load `%s' from system libraries. " 30 | + "Please try building from source the jar or providing %s in your system.", 31 | getOsName(), getResourceName(), libraryName, libraryName); 32 | 33 | private static OsName getOsName() { 34 | String os = System.getProperty("os.name").toLowerCase().replace(' ', '_'); 35 | if (os.contains("win")) { 36 | return OsName.Windows; 37 | } else if (os.startsWith("mac") || os.contains("os_x")) { 38 | return OsName.Osx; 39 | } else { 40 | return OsName.Linux; 41 | } 42 | } 43 | 44 | private static String getLibraryFileName() { 45 | String prefix = "lib"; 46 | if (getOsName() == OsName.Windows) { 47 | prefix = ""; 48 | } 49 | return prefix + libraryName + "." + getExtension(); 50 | } 51 | 52 | /** 53 | * @return the absolute path in the jar file for the jni library 54 | */ 55 | private static String getResourceName() { 56 | return "/jni_libs/" + getLibraryFileName(); 57 | } 58 | 59 | private static String getExtension() { 60 | OsName osName = getOsName(); 61 | if (osName == OsName.Linux) { 62 | return "so"; 63 | } else if (osName == OsName.Osx) { 64 | return "dylib"; 65 | } else if (osName == OsName.Windows) { 66 | return "dll"; 67 | } 68 | throw new IllegalStateException("Cannot determine the extension for " + osName); 69 | } 70 | 71 | static synchronized void load() { 72 | if (loaded.get()) { 73 | logger.debug("{} already loaded, returning", libraryName); 74 | return; 75 | } 76 | InputStream is = JNILoader.class.getResourceAsStream(getResourceName()); 77 | if (is == null) { 78 | try { 79 | System.loadLibrary(libraryName); 80 | loaded.set(true); 81 | logger.debug("successfully loaded {} from library path", libraryName); 82 | return; 83 | } catch (UnsatisfiedLinkError e) { 84 | UnsatisfiedLinkError err = 85 | new UnsatisfiedLinkError(String.format("%s\n%s", e.getMessage(), ERROR_MSG)); 86 | err.setStackTrace(e.getStackTrace()); 87 | throw err; 88 | } 89 | } 90 | final File tempFile = extractToTempFile(is); 91 | try { 92 | System.load(tempFile.getAbsolutePath()); 93 | } catch (UnsatisfiedLinkError le1) { 94 | // fall-back to loading from the system library path 95 | try { 96 | System.loadLibrary(libraryName); 97 | logger.debug("successfully loaded {} from extracted lib file", libraryName); 98 | loaded.set(true); 99 | } catch (UnsatisfiedLinkError le2) { 100 | // display error in case problem with loading from temp folder 101 | // and from system library path - concatenate both messages 102 | UnsatisfiedLinkError err = 103 | new UnsatisfiedLinkError( 104 | String.format("%s\n%s\n%s", le1.getMessage(), le2.getMessage(), ERROR_MSG)); 105 | err.setStackTrace(le2.getStackTrace()); 106 | throw err; 107 | } 108 | } 109 | } 110 | 111 | private static File extractToTempFile(InputStream is) { 112 | final File tempFile; 113 | try { 114 | tempFile = File.createTempFile(libraryName, "." + getExtension(), null); 115 | tempFile.deleteOnExit(); 116 | } catch (IOException e) { 117 | throw new IllegalStateException("Cannot create temporary files", e); 118 | } 119 | try (InputStream in = is; 120 | FileOutputStream out = new FileOutputStream(tempFile)) { 121 | byte[] buf = new byte[8192]; 122 | while (true) { 123 | int read = in.read(buf); 124 | if (read == -1) { 125 | break; 126 | } 127 | out.write(buf, 0, read); 128 | } 129 | } catch (IOException e) { 130 | throw new IllegalStateException("Failed to extract lib file and write to temp file", e); 131 | } 132 | return tempFile; 133 | } 134 | } 135 | -------------------------------------------------------------------------------- /datafusion-java/src/main/java/org/apache/arrow/datafusion/ListingOptions.java: -------------------------------------------------------------------------------- 1 | package org.apache.arrow.datafusion; 2 | 3 | /** Configures options for a {@link ListingTable} */ 4 | public class ListingOptions extends AbstractProxy implements AutoCloseable { 5 | /** A Builder for {@link ListingOptions} instances */ 6 | public static class Builder { 7 | private final FileFormat format; 8 | private String fileExtension = ""; 9 | private boolean collectStat = true; 10 | 11 | /** 12 | * Create a new {@link ListingOptions} builder 13 | * 14 | * @param format The file format used by data files in the listing table 15 | */ 16 | public Builder(FileFormat format) { 17 | this.format = format; 18 | } 19 | 20 | /** 21 | * Specify a suffix used to filter files in the listing location 22 | * 23 | * @param fileExtension The file suffix to filter on 24 | * @return This builder 25 | */ 26 | public Builder withFileExtension(String fileExtension) { 27 | this.fileExtension = fileExtension; 28 | return this; 29 | } 30 | 31 | /** 32 | * Specify whether to collect statistics from files 33 | * 34 | * @param collectStat whether to collect statistics 35 | * @return This builder 36 | */ 37 | public Builder withCollectStat(boolean collectStat) { 38 | this.collectStat = collectStat; 39 | return this; 40 | } 41 | 42 | /** 43 | * Build a new {@link ListingOptions} instance from the configured builder 44 | * 45 | * @return The built {@link ListingOptions} 46 | */ 47 | public ListingOptions build() { 48 | return new ListingOptions(this); 49 | } 50 | } 51 | 52 | /** 53 | * Create a builder for listing options 54 | * 55 | * @param format The file format used by data files in the listing table 56 | * @return A new {@link Builder} instance 57 | */ 58 | public static Builder builder(FileFormat format) { 59 | return new Builder(format); 60 | } 61 | 62 | /** 63 | * Construct ListingOptions from a Builder 64 | * 65 | * @param builder The builder to use 66 | */ 67 | private ListingOptions(Builder builder) { 68 | super(create(builder.format.getPointer(), builder.fileExtension, builder.collectStat)); 69 | } 70 | 71 | @Override 72 | void doClose(long pointer) { 73 | destroy(pointer); 74 | } 75 | 76 | private static native long create(long format, String fileExtension, boolean collectStat); 77 | 78 | private static native void destroy(long pointer); 79 | } 80 | -------------------------------------------------------------------------------- /datafusion-java/src/main/java/org/apache/arrow/datafusion/ListingTable.java: -------------------------------------------------------------------------------- 1 | package org.apache.arrow.datafusion; 2 | 3 | import java.util.concurrent.CompletableFuture; 4 | 5 | /** A data source composed of multiple files that share a schema */ 6 | public class ListingTable extends AbstractProxy implements TableProvider { 7 | /** 8 | * Create a new listing table 9 | * 10 | * @param config The listing table configuration 11 | */ 12 | public ListingTable(ListingTableConfig config) { 13 | super(createListingTable(config)); 14 | } 15 | 16 | private static long createListingTable(ListingTableConfig config) { 17 | CompletableFuture result = new CompletableFuture<>(); 18 | create( 19 | config.getPointer(), 20 | (errString, tableId) -> { 21 | if (ErrorUtil.containsError(errString)) { 22 | result.completeExceptionally(new RuntimeException(errString)); 23 | } else { 24 | result.complete(tableId); 25 | } 26 | }); 27 | return result.join(); 28 | } 29 | 30 | @Override 31 | void doClose(long pointer) { 32 | TableProviders.destroyTableProvider(pointer); 33 | } 34 | 35 | private static native void create(long config, ObjectResultCallback result); 36 | } 37 | -------------------------------------------------------------------------------- /datafusion-java/src/main/java/org/apache/arrow/datafusion/ListingTableConfig.java: -------------------------------------------------------------------------------- 1 | package org.apache.arrow.datafusion; 2 | 3 | import java.net.URI; 4 | import java.nio.file.Path; 5 | import java.util.Arrays; 6 | import java.util.concurrent.CompletableFuture; 7 | 8 | /** Configuration for creating a {@link ListingTable} */ 9 | public class ListingTableConfig extends AbstractProxy implements AutoCloseable { 10 | /** A Builder for {@link ListingTableConfig} instances */ 11 | public static class Builder { 12 | private final String[] tablePaths; 13 | private ListingOptions options = null; 14 | 15 | /** 16 | * Create a new {@link Builder} 17 | * 18 | * @param tablePath The path where data files are stored. This may be a file system path or a 19 | * URL with a scheme. When no scheme is provided, glob expressions may be used to filter 20 | * files. 21 | */ 22 | public Builder(String tablePath) { 23 | this(new String[] {tablePath}); 24 | } 25 | 26 | /** 27 | * Create a new {@link Builder} 28 | * 29 | * @param tablePaths The paths where data files are stored. This may be an array of file system 30 | * paths or an array of URLs with a scheme. When no scheme is provided, glob expressions may 31 | * be used to filter files. 32 | */ 33 | public Builder(String[] tablePaths) { 34 | this.tablePaths = tablePaths; 35 | } 36 | 37 | /** 38 | * Specify the {@link ListingOptions} to use 39 | * 40 | * @param options The {@link ListingOptions} to use 41 | * @return this Builder instance 42 | */ 43 | public Builder withListingOptions(ListingOptions options) { 44 | this.options = options; 45 | return this; 46 | } 47 | 48 | /** 49 | * Create the listing table config. This is async as the schema may need to be inferred 50 | * 51 | * @param context The {@link SessionContext} to use when inferring the schema 52 | * @return Future that will complete with the table config 53 | */ 54 | public CompletableFuture build(SessionContext context) { 55 | return createListingTableConfig(this, context).thenApply(ListingTableConfig::new); 56 | } 57 | } 58 | 59 | /** 60 | * Create a new {@link Builder} for a {@link ListingTableConfig} 61 | * 62 | * @param tablePath The path where data files are stored. This may be a file system path or a URL 63 | * with a scheme. When no scheme is specified, glob expressions may be used to filter files. 64 | * @return A new {@link Builder} instance 65 | */ 66 | public static Builder builder(String tablePath) { 67 | return new Builder(tablePath); 68 | } 69 | 70 | /** 71 | * Create a new {@link Builder} for a {@link ListingTableConfig} from a file path 72 | * 73 | * @param tablePath The path where data files are stored 74 | * @return A new {@link Builder} instance 75 | */ 76 | public static Builder builder(Path tablePath) { 77 | return new Builder(tablePath.toString()); 78 | } 79 | 80 | /** 81 | * Create a new {@link Builder} for a {@link ListingTableConfig} from an array of paths 82 | * 83 | * @param tablePaths The path array where data files are stored 84 | * @return A new {@link Builder} instance 85 | */ 86 | public static Builder builder(Path[] tablePaths) { 87 | String[] pathStrings = 88 | Arrays.stream(tablePaths) 89 | .map(path -> path.toString()) 90 | .toArray(length -> new String[length]); 91 | return new Builder(pathStrings); 92 | } 93 | 94 | /** 95 | * Create a new {@link Builder} for a {@link ListingTableConfig} from a URI 96 | * 97 | * @param tablePath The location where data files are stored 98 | * @return A new {@link Builder} instance 99 | */ 100 | public static Builder builder(URI tablePath) { 101 | return new Builder(tablePath.toString()); 102 | } 103 | 104 | private ListingTableConfig(long pointer) { 105 | super(pointer); 106 | } 107 | 108 | private static CompletableFuture createListingTableConfig( 109 | Builder builder, SessionContext context) { 110 | CompletableFuture future = new CompletableFuture<>(); 111 | Runtime runtime = context.getRuntime(); 112 | create( 113 | runtime.getPointer(), 114 | context.getPointer(), 115 | builder.tablePaths, 116 | builder.options == null ? 0 : builder.options.getPointer(), 117 | (errMessage, configId) -> { 118 | if (ErrorUtil.containsError(errMessage)) { 119 | future.completeExceptionally(new RuntimeException(errMessage)); 120 | } else { 121 | future.complete(configId); 122 | } 123 | }); 124 | return future; 125 | } 126 | 127 | @Override 128 | void doClose(long pointer) { 129 | destroy(pointer); 130 | } 131 | 132 | private static native void create( 133 | long runtime, long context, String[] tablePaths, long options, ObjectResultCallback callback); 134 | 135 | private static native void destroy(long pointer); 136 | } 137 | -------------------------------------------------------------------------------- /datafusion-java/src/main/java/org/apache/arrow/datafusion/NativeProxy.java: -------------------------------------------------------------------------------- 1 | package org.apache.arrow.datafusion; 2 | 3 | /** 4 | * A native proxy is a proxy that points to a Rust managed object so that when it requires releasing 5 | * resources the point will be used. 6 | */ 7 | interface NativeProxy { 8 | 9 | /** 10 | * Get a pointer to the native object 11 | * 12 | * @return Pointer value as a long 13 | */ 14 | long getPointer(); 15 | } 16 | -------------------------------------------------------------------------------- /datafusion-java/src/main/java/org/apache/arrow/datafusion/ObjectResultCallback.java: -------------------------------------------------------------------------------- 1 | package org.apache.arrow.datafusion; 2 | 3 | interface ObjectResultCallback { 4 | void callback(String errMessage, long value); 5 | } 6 | -------------------------------------------------------------------------------- /datafusion-java/src/main/java/org/apache/arrow/datafusion/ParquetFormat.java: -------------------------------------------------------------------------------- 1 | package org.apache.arrow.datafusion; 2 | 3 | /** The Apache Parquet file format configuration */ 4 | public class ParquetFormat extends AbstractProxy implements FileFormat { 5 | /** Create new ParquetFormat with default options */ 6 | public ParquetFormat() { 7 | super(FileFormats.createParquet()); 8 | } 9 | 10 | @Override 11 | void doClose(long pointer) { 12 | FileFormats.destroyFileFormat(pointer); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /datafusion-java/src/main/java/org/apache/arrow/datafusion/ParquetOptions.java: -------------------------------------------------------------------------------- 1 | package org.apache.arrow.datafusion; 2 | 3 | import java.util.Optional; 4 | 5 | /** Configures options specific to reading Parquet data */ 6 | @SuppressWarnings("UnusedReturnValue") 7 | public class ParquetOptions { 8 | private final SessionConfig config; 9 | 10 | ParquetOptions(SessionConfig config) { 11 | this.config = config; 12 | } 13 | 14 | /** 15 | * Get whether parquet data page level metadata (Page Index) statistics are used 16 | * 17 | * @return whether using the page index is enabled 18 | */ 19 | public boolean enablePageIndex() { 20 | return SessionConfig.getParquetOptionsEnablePageIndex(config.getPointer()); 21 | } 22 | 23 | /** 24 | * Set whether to use parquet data page level metadata (Page Index) statistics to reduce the 25 | * number of rows decoded. 26 | * 27 | * @param enabled whether using the page index is enabled 28 | * @return the modified {@link ParquetOptions} instance 29 | */ 30 | public ParquetOptions withEnablePageIndex(boolean enabled) { 31 | SessionConfig.setParquetOptionsEnablePageIndex(config.getPointer(), enabled); 32 | return this; 33 | } 34 | 35 | /** 36 | * Get whether pruning is enabled, meaning reading row groups will be skipped based on metadata 37 | * 38 | * @return whether pruning is enabled 39 | */ 40 | public boolean pruning() { 41 | return SessionConfig.getParquetOptionsPruning(config.getPointer()); 42 | } 43 | 44 | /** 45 | * Set whether pruning is enabled, meaning reading row groups will be skipped based on metadata 46 | * 47 | * @param enabled whether to enable pruning 48 | * @return the modified {@link ParquetOptions} instance 49 | */ 50 | public ParquetOptions withPruning(boolean enabled) { 51 | SessionConfig.setParquetOptionsPruning(config.getPointer(), enabled); 52 | return this; 53 | } 54 | 55 | /** 56 | * Get whether file metadata is skipped, to avoid schema conflicts 57 | * 58 | * @return whether metadata is skipped 59 | */ 60 | public boolean skipMetadata() { 61 | return SessionConfig.getParquetOptionsSkipMetadata(config.getPointer()); 62 | } 63 | 64 | /** 65 | * Set whether file metadata is skipped, to avoid schema conflicts 66 | * 67 | * @param enabled whether to skip metadata 68 | * @return the modified {@link ParquetOptions} instance 69 | */ 70 | public ParquetOptions withSkipMetadata(boolean enabled) { 71 | SessionConfig.setParquetOptionsSkipMetadata(config.getPointer(), enabled); 72 | return this; 73 | } 74 | 75 | /** 76 | * Get the metadata size hint 77 | * 78 | * @return metadata size hint value 79 | */ 80 | public Optional metadataSizeHint() { 81 | long sizeHint = SessionConfig.getParquetOptionsMetadataSizeHint(config.getPointer()); 82 | return sizeHint < 0 ? Optional.empty() : Optional.of(sizeHint); 83 | } 84 | 85 | /** 86 | * Set the metadata size hint, which is used to attempt to read the full metadata at once rather 87 | * than needing one read to get the metadata size and then a second read for the metadata itself. 88 | * 89 | * @param metadataSizeHint the metadata size hint 90 | * @return the modified {@link ParquetOptions} instance 91 | */ 92 | public ParquetOptions withMetadataSizeHint(Optional metadataSizeHint) { 93 | long value = -1L; 94 | if (metadataSizeHint.isPresent()) { 95 | value = metadataSizeHint.get(); 96 | if (value < 0) { 97 | throw new RuntimeException("metadataSizeHint cannot be negative"); 98 | } 99 | } 100 | SessionConfig.setParquetOptionsMetadataSizeHint(config.getPointer(), value); 101 | return this; 102 | } 103 | 104 | /** 105 | * Get whether filter pushdown is enabled, so filters are applied during parquet decoding 106 | * 107 | * @return whether filter pushdown is enabled 108 | */ 109 | public boolean pushdownFilters() { 110 | return SessionConfig.getParquetOptionsPushdownFilters(config.getPointer()); 111 | } 112 | 113 | /** 114 | * Set whether filter pushdown is enabled, so filters are applied during parquet decoding 115 | * 116 | * @param enabled whether to pushdown filters 117 | * @return the modified {@link ParquetOptions} instance 118 | */ 119 | public ParquetOptions withPushdownFilters(boolean enabled) { 120 | SessionConfig.setParquetOptionsPushdownFilters(config.getPointer(), enabled); 121 | return this; 122 | } 123 | 124 | /** 125 | * Get whether filter reordering is enabled to minimize evaluation cost 126 | * 127 | * @return whether filter reordering is enabled 128 | */ 129 | public boolean reorderFilters() { 130 | return SessionConfig.getParquetOptionsReorderFilters(config.getPointer()); 131 | } 132 | 133 | /** 134 | * Set whether filter reordering is enabled to minimize evaluation cost 135 | * 136 | * @param enabled whether to reorder filters 137 | * @return the modified {@link ParquetOptions} instance 138 | */ 139 | public ParquetOptions withReorderFilters(boolean enabled) { 140 | SessionConfig.setParquetOptionsReorderFilters(config.getPointer(), enabled); 141 | return this; 142 | } 143 | } 144 | -------------------------------------------------------------------------------- /datafusion-java/src/main/java/org/apache/arrow/datafusion/RecordBatchStream.java: -------------------------------------------------------------------------------- 1 | package org.apache.arrow.datafusion; 2 | 3 | import java.util.concurrent.CompletableFuture; 4 | import org.apache.arrow.vector.VectorSchemaRoot; 5 | import org.apache.arrow.vector.dictionary.DictionaryProvider; 6 | 7 | /** 8 | * A record batch stream is a stream of tabular Arrow data that can be iterated over asynchronously 9 | */ 10 | public interface RecordBatchStream extends AutoCloseable, NativeProxy, DictionaryProvider { 11 | /** 12 | * Get the VectorSchemaRoot that will be populated with data as the stream is iterated over 13 | * 14 | * @return the stream's VectorSchemaRoot 15 | */ 16 | VectorSchemaRoot getVectorSchemaRoot(); 17 | 18 | /** 19 | * Load the next record batch in the stream into the VectorSchemaRoot 20 | * 21 | * @return Future that will complete with true if a batch was loaded or false if the end of the 22 | * stream has been reached 23 | */ 24 | CompletableFuture loadNextBatch(); 25 | } 26 | -------------------------------------------------------------------------------- /datafusion-java/src/main/java/org/apache/arrow/datafusion/Runtime.java: -------------------------------------------------------------------------------- 1 | package org.apache.arrow.datafusion; 2 | 3 | /** A runtime represents the underlying async runtime in datafusion engine */ 4 | public interface Runtime extends AutoCloseable, NativeProxy {} 5 | -------------------------------------------------------------------------------- /datafusion-java/src/main/java/org/apache/arrow/datafusion/SessionConfig.java: -------------------------------------------------------------------------------- 1 | package org.apache.arrow.datafusion; 2 | 3 | import java.util.function.Consumer; 4 | 5 | /** Configuration for creating a {@link SessionContext} using {@link SessionContexts#withConfig} */ 6 | public class SessionConfig extends AbstractProxy implements AutoCloseable { 7 | /** Create a new default {@link SessionConfig} */ 8 | public SessionConfig() { 9 | super(create()); 10 | } 11 | 12 | /** 13 | * Get options related to query execution 14 | * 15 | * @return {@link ExecutionOptions} instance for this config 16 | */ 17 | public ExecutionOptions executionOptions() { 18 | return new ExecutionOptions(this); 19 | } 20 | 21 | /** 22 | * Get options specific to parsing SQL queries 23 | * 24 | * @return {@link SqlParserOptions} instance for this config 25 | */ 26 | public SqlParserOptions sqlParserOptions() { 27 | return new SqlParserOptions(this); 28 | } 29 | 30 | /** 31 | * Modify this session configuration and then return it, to simplify use in a try-with-resources 32 | * statement 33 | * 34 | * @param configurationCallback Callback used to update the configuration 35 | * @return This {@link SessionConfig} instance after being updated 36 | */ 37 | public SessionConfig withConfiguration(Consumer configurationCallback) { 38 | configurationCallback.accept(this); 39 | return this; 40 | } 41 | 42 | @Override 43 | void doClose(long pointer) { 44 | destroy(pointer); 45 | } 46 | 47 | private static native long create(); 48 | 49 | private static native void destroy(long pointer); 50 | 51 | // ExecutionOptions native methods 52 | 53 | static native long getExecutionOptionsBatchSize(long pointer); 54 | 55 | static native void setExecutionOptionsBatchSize(long pointer, long batchSize); 56 | 57 | static native boolean getExecutionOptionsCoalesceBatches(long pointer); 58 | 59 | static native void setExecutionOptionsCoalesceBatches(long pointer, boolean enabled); 60 | 61 | static native boolean getExecutionOptionsCollectStatistics(long pointer); 62 | 63 | static native void setExecutionOptionsCollectStatistics(long pointer, boolean enabled); 64 | 65 | static native long getExecutionOptionsTargetPartitions(long pointer); 66 | 67 | static native void setExecutionOptionsTargetPartitions(long pointer, long batchSize); 68 | 69 | // ParquetOptions native methods 70 | 71 | static native boolean getParquetOptionsEnablePageIndex(long pointer); 72 | 73 | static native void setParquetOptionsEnablePageIndex(long pointer, boolean enabled); 74 | 75 | static native boolean getParquetOptionsPruning(long pointer); 76 | 77 | static native void setParquetOptionsPruning(long pointer, boolean enabled); 78 | 79 | static native boolean getParquetOptionsSkipMetadata(long pointer); 80 | 81 | static native void setParquetOptionsSkipMetadata(long pointer, boolean enabled); 82 | 83 | static native long getParquetOptionsMetadataSizeHint(long pointer); 84 | 85 | static native void setParquetOptionsMetadataSizeHint(long pointer, long value); 86 | 87 | static native boolean getParquetOptionsPushdownFilters(long pointer); 88 | 89 | static native void setParquetOptionsPushdownFilters(long pointer, boolean enabled); 90 | 91 | static native boolean getParquetOptionsReorderFilters(long pointer); 92 | 93 | static native void setParquetOptionsReorderFilters(long pointer, boolean enabled); 94 | 95 | // SqlParserOptions native methods 96 | 97 | static native boolean getSqlParserOptionsParseFloatAsDecimal(long pointer); 98 | 99 | static native void setSqlParserOptionsParseFloatAsDecimal(long pointer, boolean enabled); 100 | 101 | static native boolean getSqlParserOptionsEnableIdentNormalization(long pointer); 102 | 103 | static native void setSqlParserOptionsEnableIdentNormalization(long pointer, boolean enabled); 104 | 105 | static native String getSqlParserOptionsDialect(long pointer); 106 | 107 | static native void setSqlParserOptionsDialect(long pointer, String dialect); 108 | } 109 | -------------------------------------------------------------------------------- /datafusion-java/src/main/java/org/apache/arrow/datafusion/SessionContext.java: -------------------------------------------------------------------------------- 1 | package org.apache.arrow.datafusion; 2 | 3 | import java.nio.file.Path; 4 | import java.util.Optional; 5 | import java.util.concurrent.CompletableFuture; 6 | 7 | /** A session context holds resources and is the entrance for obtaining {@link DataFrame} */ 8 | public interface SessionContext extends AutoCloseable, NativeProxy { 9 | 10 | /** 11 | * Obtain the {@link DataFrame} by running the {@code sql} against the datafusion library 12 | * 13 | * @param sql The query to execute 14 | * @return DataFrame representing the query result 15 | */ 16 | CompletableFuture sql(String sql); 17 | 18 | /** 19 | * Registering a csv file with the context 20 | * 21 | * @param name The table name to use to refer to the data 22 | * @param path Path to the CSV file 23 | * @return Future that is completed when the CSV is registered 24 | */ 25 | CompletableFuture registerCsv(String name, Path path); 26 | 27 | /** 28 | * Registering a parquet file with the context 29 | * 30 | * @param name The table name to use to refer to the data 31 | * @param path Path to the Parquet file 32 | * @return Future that is completed when the Parquet file is registered 33 | */ 34 | CompletableFuture registerParquet(String name, Path path); 35 | 36 | /** 37 | * Registers a TableProvider as a table that can be referenced from SQL statements executed 38 | * against this context. 39 | * 40 | * @param name table reference 41 | * @param tableProvider table provider 42 | * @return as of Arrow 22 this is only {@link Optional#empty()} 43 | * @throws Exception when the table is already registered 44 | */ 45 | Optional registerTable(String name, TableProvider tableProvider) throws Exception; 46 | 47 | /** 48 | * Get the runtime associated with this context 49 | * 50 | * @return The context's runtime 51 | */ 52 | Runtime getRuntime(); 53 | } 54 | -------------------------------------------------------------------------------- /datafusion-java/src/main/java/org/apache/arrow/datafusion/SessionContexts.java: -------------------------------------------------------------------------------- 1 | package org.apache.arrow.datafusion; 2 | 3 | import java.util.function.Consumer; 4 | 5 | /** Manages session contexts */ 6 | public class SessionContexts { 7 | 8 | private SessionContexts() {} 9 | 10 | /** 11 | * Create a new session context 12 | * 13 | * @return native pointer to the created session context 14 | */ 15 | static native long createSessionContext(); 16 | 17 | /** 18 | * Create a new session context using a SessionConfig 19 | * 20 | * @param configPointer pointer to the native session config object to use 21 | * @return native pointer to the created session context 22 | */ 23 | static native long createSessionContextWithConfig(long configPointer); 24 | 25 | /** 26 | * Destroy a session context 27 | * 28 | * @param pointer native pointer to the session context to destroy 29 | */ 30 | static native void destroySessionContext(long pointer); 31 | 32 | static { 33 | JNILoader.load(); 34 | } 35 | 36 | /** 37 | * Create a new default session context 38 | * 39 | * @return The created context 40 | */ 41 | public static SessionContext create() { 42 | long pointer = createSessionContext(); 43 | return new DefaultSessionContext(pointer); 44 | } 45 | 46 | /** 47 | * Create a new session context using the provided configuration 48 | * 49 | * @param config the configuration for the session 50 | * @return The created context 51 | */ 52 | public static SessionContext withConfig(SessionConfig config) { 53 | long pointer = createSessionContextWithConfig(config.getPointer()); 54 | return new DefaultSessionContext(pointer); 55 | } 56 | 57 | /** 58 | * Create a new session context using the provided callback to configure the session 59 | * 60 | * @param configuration callback to modify the {@link SessionConfig} for the session 61 | * @return The created context 62 | * @throws Exception if an error is encountered closing the session config resource 63 | */ 64 | public static SessionContext withConfig(Consumer configuration) throws Exception { 65 | try (SessionConfig config = new SessionConfig().withConfiguration(configuration)) { 66 | long pointer = createSessionContextWithConfig(config.getPointer()); 67 | return new DefaultSessionContext(pointer); 68 | } 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /datafusion-java/src/main/java/org/apache/arrow/datafusion/SqlParserOptions.java: -------------------------------------------------------------------------------- 1 | package org.apache.arrow.datafusion; 2 | 3 | /** Configures options specific to parsing SQL queries */ 4 | @SuppressWarnings("UnusedReturnValue") 5 | public class SqlParserOptions { 6 | private final SessionConfig config; 7 | 8 | SqlParserOptions(SessionConfig config) { 9 | this.config = config; 10 | } 11 | 12 | /** 13 | * Get whether to parse floats as decimal type 14 | * 15 | * @return whether to parse floats as decimal 16 | */ 17 | public boolean parseFloatAsDecimal() { 18 | return SessionConfig.getSqlParserOptionsParseFloatAsDecimal(config.getPointer()); 19 | } 20 | 21 | /** 22 | * Set whether to parse floats as decimal type 23 | * 24 | * @param enabled whether to parse floats as decimal 25 | * @return the modified {@link SqlParserOptions} instance 26 | */ 27 | public SqlParserOptions withParseFloatAsDecimal(boolean enabled) { 28 | SessionConfig.setSqlParserOptionsParseFloatAsDecimal(config.getPointer(), enabled); 29 | return this; 30 | } 31 | 32 | /** 33 | * Get whether to convert identifiers to lowercase when not quoted 34 | * 35 | * @return whether ident normalization is enabled 36 | */ 37 | public boolean enableIdentNormalization() { 38 | return SessionConfig.getSqlParserOptionsEnableIdentNormalization(config.getPointer()); 39 | } 40 | 41 | /** 42 | * Set whether to convert identifiers to lowercase when not quoted 43 | * 44 | * @param enabled whether ident normalization is enabled 45 | * @return the modified {@link SqlParserOptions} instance 46 | */ 47 | public SqlParserOptions withEnableIdentNormalization(boolean enabled) { 48 | SessionConfig.setSqlParserOptionsEnableIdentNormalization(config.getPointer(), enabled); 49 | return this; 50 | } 51 | 52 | /** 53 | * Get the SQL dialect used 54 | * 55 | * @return the SQL dialect used 56 | */ 57 | public String dialect() { 58 | return SessionConfig.getSqlParserOptionsDialect(config.getPointer()); 59 | } 60 | 61 | /** 62 | * Set the SQL dialect to use 63 | * 64 | * @param dialect the SQL dialect to use 65 | * @return the modified {@link SqlParserOptions} instance 66 | */ 67 | public SqlParserOptions withDialect(String dialect) { 68 | SessionConfig.setSqlParserOptionsDialect(config.getPointer(), dialect); 69 | return this; 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /datafusion-java/src/main/java/org/apache/arrow/datafusion/TableProvider.java: -------------------------------------------------------------------------------- 1 | package org.apache.arrow.datafusion; 2 | 3 | /** vague interface that maps to {@code Arc}. */ 4 | public interface TableProvider extends NativeProxy {} 5 | -------------------------------------------------------------------------------- /datafusion-java/src/main/java/org/apache/arrow/datafusion/TableProviders.java: -------------------------------------------------------------------------------- 1 | package org.apache.arrow.datafusion; 2 | 3 | class TableProviders { 4 | 5 | private TableProviders() {} 6 | 7 | static native void destroyTableProvider(long pointer); 8 | } 9 | -------------------------------------------------------------------------------- /datafusion-java/src/main/java/org/apache/arrow/datafusion/TokioRuntime.java: -------------------------------------------------------------------------------- 1 | package org.apache.arrow.datafusion; 2 | 3 | final class TokioRuntime extends AbstractProxy implements Runtime { 4 | 5 | TokioRuntime(long pointer) { 6 | super(pointer); 7 | } 8 | 9 | @Override 10 | void doClose(long pointer) { 11 | destroyTokioRuntime(pointer); 12 | } 13 | 14 | static TokioRuntime create() { 15 | long pointer = TokioRuntime.createTokioRuntime(); 16 | if (pointer <= 0) { 17 | throw new IllegalStateException("failed to create runtime"); 18 | } 19 | return new TokioRuntime(pointer); 20 | } 21 | 22 | static native long createTokioRuntime(); 23 | 24 | static native void destroyTokioRuntime(long pointer); 25 | } 26 | -------------------------------------------------------------------------------- /datafusion-java/src/main/java/org/apache/arrow/datafusion/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * This module contains a Java JNI binding to Apache Arrow DataFusion which is the query engine 3 | * library to work with data in Arrow format. 4 | */ 5 | package org.apache.arrow.datafusion; 6 | -------------------------------------------------------------------------------- /datafusion-java/src/test/java/org/apache/arrow/datafusion/ParquetWriter.java: -------------------------------------------------------------------------------- 1 | package org.apache.arrow.datafusion; 2 | 3 | import java.io.IOException; 4 | import java.nio.file.Path; 5 | import java.util.function.BiConsumer; 6 | import org.apache.avro.Schema; 7 | import org.apache.avro.generic.GenericData; 8 | import org.apache.hadoop.conf.Configuration; 9 | import org.apache.parquet.avro.AvroParquetWriter; 10 | import org.apache.parquet.column.ParquetProperties; 11 | import org.apache.parquet.hadoop.metadata.CompressionCodecName; 12 | import org.apache.parquet.hadoop.util.HadoopOutputFile; 13 | import org.apache.parquet.io.OutputFile; 14 | 15 | /** Helper class for writing test files in Parquet format using Avro records */ 16 | public class ParquetWriter { 17 | public static void writeParquet( 18 | Path path, String schema, int rowCount, BiConsumer setRecord) 19 | throws IOException { 20 | Configuration config = new Configuration(); 21 | org.apache.hadoop.fs.Path hadoopFilePath = new org.apache.hadoop.fs.Path(path.toString()); 22 | OutputFile outputFile = HadoopOutputFile.fromPath(hadoopFilePath, config); 23 | 24 | Schema.Parser parser = new Schema.Parser().setValidate(true); 25 | Schema avroSchema = parser.parse(schema); 26 | 27 | try (org.apache.parquet.hadoop.ParquetWriter writer = 28 | AvroParquetWriter.builder(outputFile) 29 | .withSchema(avroSchema) 30 | .withConf(config) 31 | .withWriterVersion(ParquetProperties.WriterVersion.PARQUET_2_0) 32 | .withCompressionCodec(CompressionCodecName.SNAPPY) 33 | .build()) { 34 | for (int i = 0; i < rowCount; ++i) { 35 | GenericData.Record record = new GenericData.Record(avroSchema); 36 | setRecord.accept(i, record); 37 | writer.write(record); 38 | } 39 | } 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /datafusion-java/src/test/java/org/apache/arrow/datafusion/TestExecuteStream.java: -------------------------------------------------------------------------------- 1 | package org.apache.arrow.datafusion; 2 | 3 | import static org.junit.jupiter.api.Assertions.*; 4 | 5 | import java.net.URL; 6 | import java.nio.charset.StandardCharsets; 7 | import java.nio.file.Files; 8 | import java.nio.file.Path; 9 | import java.nio.file.Paths; 10 | import java.util.Arrays; 11 | import java.util.List; 12 | import org.apache.arrow.memory.BufferAllocator; 13 | import org.apache.arrow.memory.RootAllocator; 14 | import org.apache.arrow.vector.BigIntVector; 15 | import org.apache.arrow.vector.Float8Vector; 16 | import org.apache.arrow.vector.VarCharVector; 17 | import org.apache.arrow.vector.VectorSchemaRoot; 18 | import org.apache.arrow.vector.dictionary.DictionaryEncoder; 19 | import org.apache.arrow.vector.types.pojo.Schema; 20 | import org.junit.jupiter.api.Test; 21 | import org.junit.jupiter.api.io.TempDir; 22 | 23 | public class TestExecuteStream { 24 | @Test 25 | public void executeStream(@TempDir Path tempDir) throws Exception { 26 | try (SessionContext context = SessionContexts.create(); 27 | BufferAllocator allocator = new RootAllocator()) { 28 | Path csvFilePath = tempDir.resolve("data.csv"); 29 | 30 | List lines = Arrays.asList("x,y,z", "1,2,3.5", "4,5,6.5", "7,8,9.5"); 31 | Files.write(csvFilePath, lines); 32 | 33 | context.registerCsv("test", csvFilePath).join(); 34 | 35 | try (RecordBatchStream stream = 36 | context 37 | .sql("SELECT y,z FROM test WHERE x > 3") 38 | .thenComposeAsync(df -> df.executeStream(allocator)) 39 | .join()) { 40 | VectorSchemaRoot root = stream.getVectorSchemaRoot(); 41 | Schema schema = root.getSchema(); 42 | assertEquals(2, schema.getFields().size()); 43 | assertEquals("y", schema.getFields().get(0).getName()); 44 | assertEquals("z", schema.getFields().get(1).getName()); 45 | 46 | assertTrue(stream.loadNextBatch().join()); 47 | assertEquals(2, root.getRowCount()); 48 | BigIntVector yValues = (BigIntVector) root.getVector(0); 49 | assertEquals(5, yValues.get(0)); 50 | assertEquals(8, yValues.get(1)); 51 | Float8Vector zValues = (Float8Vector) root.getVector(1); 52 | assertEquals(6.5, zValues.get(0)); 53 | assertEquals(9.5, zValues.get(1)); 54 | 55 | assertFalse(stream.loadNextBatch().join()); 56 | } 57 | } 58 | } 59 | 60 | @Test 61 | public void readDictionaryData() throws Exception { 62 | try (SessionContext context = SessionContexts.create(); 63 | BufferAllocator allocator = new RootAllocator()) { 64 | 65 | URL fileUrl = this.getClass().getResource("/dictionary_data.parquet"); 66 | Path parquetFilePath = Paths.get(fileUrl.getPath()); 67 | 68 | context.registerParquet("test", parquetFilePath).join(); 69 | 70 | try (RecordBatchStream stream = 71 | context 72 | .sql("SELECT x,y FROM test") 73 | .thenComposeAsync(df -> df.executeStream(allocator)) 74 | .join()) { 75 | VectorSchemaRoot root = stream.getVectorSchemaRoot(); 76 | Schema schema = root.getSchema(); 77 | assertEquals(2, schema.getFields().size()); 78 | assertEquals("x", schema.getFields().get(0).getName()); 79 | assertEquals("y", schema.getFields().get(1).getName()); 80 | 81 | int rowsRead = 0; 82 | while (stream.loadNextBatch().join()) { 83 | int batchNumRows = root.getRowCount(); 84 | BigIntVector xValuesEncoded = (BigIntVector) root.getVector(0); 85 | long xDictionaryId = xValuesEncoded.getField().getDictionary().getId(); 86 | try (VarCharVector xValues = 87 | (VarCharVector) 88 | DictionaryEncoder.decode(xValuesEncoded, stream.lookup(xDictionaryId))) { 89 | String[] expected = {"one", "two", "three"}; 90 | for (int i = 0; i < batchNumRows; ++i) { 91 | assertEquals( 92 | new String(xValues.get(i), StandardCharsets.UTF_8), expected[(rowsRead + i) % 3]); 93 | } 94 | } 95 | 96 | BigIntVector yValuesEncoded = (BigIntVector) root.getVector(1); 97 | long yDictionaryId = yValuesEncoded.getField().getDictionary().getId(); 98 | try (VarCharVector yValues = 99 | (VarCharVector) 100 | DictionaryEncoder.decode(yValuesEncoded, stream.lookup(yDictionaryId))) { 101 | String[] expected = {"four", "five", "six"}; 102 | for (int i = 0; i < batchNumRows; ++i) { 103 | assertEquals( 104 | new String(yValues.get(i), StandardCharsets.UTF_8), expected[(rowsRead + i) % 3]); 105 | } 106 | } 107 | rowsRead += batchNumRows; 108 | } 109 | 110 | assertEquals(100, rowsRead); 111 | } 112 | } 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /datafusion-java/src/test/java/org/apache/arrow/datafusion/TestListingTable.java: -------------------------------------------------------------------------------- 1 | package org.apache.arrow.datafusion; 2 | 3 | import static org.junit.jupiter.api.Assertions.*; 4 | 5 | import java.io.FileOutputStream; 6 | import java.nio.charset.StandardCharsets; 7 | import java.nio.file.Files; 8 | import java.nio.file.Path; 9 | import java.time.Instant; 10 | import java.util.ArrayList; 11 | import java.util.Arrays; 12 | import java.util.List; 13 | import java.util.stream.Collectors; 14 | import org.apache.arrow.compression.CommonsCompressionFactory; 15 | import org.apache.arrow.memory.BufferAllocator; 16 | import org.apache.arrow.memory.RootAllocator; 17 | import org.apache.arrow.vector.BigIntVector; 18 | import org.apache.arrow.vector.FieldVector; 19 | import org.apache.arrow.vector.VarCharVector; 20 | import org.apache.arrow.vector.VectorSchemaRoot; 21 | import org.apache.arrow.vector.compression.CompressionCodec; 22 | import org.apache.arrow.vector.compression.CompressionUtil; 23 | import org.apache.arrow.vector.compression.NoCompressionCodec; 24 | import org.apache.arrow.vector.ipc.ArrowFileWriter; 25 | import org.apache.arrow.vector.ipc.ArrowReader; 26 | import org.apache.arrow.vector.ipc.message.IpcOption; 27 | import org.apache.arrow.vector.types.pojo.Field; 28 | import org.junit.jupiter.api.Test; 29 | import org.junit.jupiter.api.io.TempDir; 30 | 31 | public class TestListingTable { 32 | @Test 33 | public void testCsvListingTable(@TempDir Path tempDir) throws Exception { 34 | try (SessionContext context = SessionContexts.create(); 35 | BufferAllocator allocator = new RootAllocator()) { 36 | Path dataDir = tempDir.resolve("data"); 37 | Files.createDirectories(dataDir); 38 | 39 | Path csvFilePath0 = dataDir.resolve("0.csv"); 40 | List lines = Arrays.asList("x,y", "1,2", "3,4"); 41 | Files.write(csvFilePath0, lines); 42 | 43 | Path csvFilePath1 = dataDir.resolve("1.csv"); 44 | lines = Arrays.asList("x,y", "1,12", "3,14"); 45 | Files.write(csvFilePath1, lines); 46 | 47 | try (CsvFormat format = new CsvFormat(); 48 | ListingOptions listingOptions = 49 | ListingOptions.builder(format).withFileExtension(".csv").build(); 50 | ListingTableConfig tableConfig = 51 | ListingTableConfig.builder(dataDir) 52 | .withListingOptions(listingOptions) 53 | .build(context) 54 | .join(); 55 | ListingTable listingTable = new ListingTable(tableConfig)) { 56 | context.registerTable("test", listingTable); 57 | testQuery(context, allocator); 58 | } 59 | } 60 | } 61 | 62 | @Test 63 | public void testParquetListingTable(@TempDir Path tempDir) throws Exception { 64 | try (SessionContext context = SessionContexts.create(); 65 | BufferAllocator allocator = new RootAllocator()) { 66 | Path dataDir = tempDir.resolve("data"); 67 | writeParquetFiles(dataDir); 68 | 69 | try (ParquetFormat format = new ParquetFormat(); 70 | ListingOptions listingOptions = 71 | ListingOptions.builder(format).withFileExtension(".parquet").build(); 72 | ListingTableConfig tableConfig = 73 | ListingTableConfig.builder(dataDir) 74 | .withListingOptions(listingOptions) 75 | .build(context) 76 | .join(); 77 | ListingTable listingTable = new ListingTable(tableConfig)) { 78 | context.registerTable("test", listingTable); 79 | testQuery(context, allocator); 80 | } 81 | } 82 | } 83 | 84 | @Test 85 | public void testArrowListingTable(@TempDir Path tempDir) throws Exception { 86 | try (SessionContext context = SessionContexts.create(); 87 | BufferAllocator allocator = new RootAllocator()) { 88 | Path dataDir = tempDir.resolve("data"); 89 | Files.createDirectories(dataDir); 90 | 91 | Path arrowFilePath0 = dataDir.resolve("0.arrow"); 92 | Path arrowFilePath1 = dataDir.resolve("1.arrow"); 93 | 94 | // Write data files in Arrow IPC (Feather V2) file format 95 | try (BigIntVector xVector = new BigIntVector("x", allocator); 96 | BigIntVector yVector = new BigIntVector("y", allocator)) { 97 | List vectors = Arrays.asList(xVector, yVector); 98 | 99 | for (int i = 0; i < 2; i++) { 100 | xVector.setSafe(i, i * 2 + 1); 101 | yVector.setSafe(i, i * 2 + 2); 102 | } 103 | xVector.setValueCount(2); 104 | yVector.setValueCount(2); 105 | writeArrowFile(arrowFilePath0, vectors, false); 106 | 107 | xVector.reset(); 108 | yVector.reset(); 109 | for (int i = 0; i < 2; i++) { 110 | xVector.setSafe(i, i * 2 + 1); 111 | yVector.setSafe(i, i * 2 + 12); 112 | } 113 | xVector.setValueCount(2); 114 | yVector.setValueCount(2); 115 | writeArrowFile(arrowFilePath1, vectors, false); 116 | } 117 | 118 | try (ArrowFormat format = new ArrowFormat(); 119 | ListingOptions listingOptions = 120 | ListingOptions.builder(format).withFileExtension(".arrow").build(); 121 | ListingTableConfig tableConfig = 122 | ListingTableConfig.builder(dataDir) 123 | .withListingOptions(listingOptions) 124 | .build(context) 125 | .join(); 126 | ListingTable listingTable = new ListingTable(tableConfig)) { 127 | context.registerTable("test", listingTable); 128 | testQuery(context, allocator); 129 | } 130 | } 131 | } 132 | 133 | @Test 134 | public void testCompressedArrowIpc(@TempDir Path tempDir) throws Exception { 135 | try (SessionContext context = SessionContexts.create(); 136 | BufferAllocator allocator = new RootAllocator()) { 137 | Path dataDir = tempDir.resolve("data"); 138 | Files.createDirectories(dataDir); 139 | Path arrowFilePath0 = dataDir.resolve("0.arrow"); 140 | 141 | // Data needs to be reasonably large otherwise compression is not used 142 | int numRows = 10_000; 143 | 144 | // Write data files in compressed Arrow IPC (Feather V2) file format 145 | try (BigIntVector xVector = new BigIntVector("x", allocator)) { 146 | for (int i = 0; i < numRows; i++) { 147 | xVector.setSafe(i, i * 2 + 1); 148 | } 149 | xVector.setValueCount(numRows); 150 | List vectors = Arrays.asList(xVector); 151 | writeArrowFile(arrowFilePath0, vectors, true); 152 | } 153 | 154 | try (ArrowFormat format = new ArrowFormat(); 155 | ListingOptions listingOptions = 156 | ListingOptions.builder(format).withFileExtension(".arrow").build(); 157 | ListingTableConfig tableConfig = 158 | ListingTableConfig.builder(dataDir) 159 | .withListingOptions(listingOptions) 160 | .build(context) 161 | .join(); 162 | ListingTable listingTable = new ListingTable(tableConfig)) { 163 | context.registerTable("test", listingTable); 164 | try (ArrowReader reader = 165 | context 166 | .sql("SELECT x FROM test") 167 | .thenComposeAsync(df -> df.collect(allocator)) 168 | .join()) { 169 | 170 | int globalRow = 0; 171 | VectorSchemaRoot root = reader.getVectorSchemaRoot(); 172 | while (reader.loadNextBatch()) { 173 | BigIntVector xValues = (BigIntVector) root.getVector(0); 174 | for (int row = 0; row < root.getRowCount(); ++row, ++globalRow) { 175 | assertEquals(globalRow * 2 + 1, xValues.get(row)); 176 | } 177 | } 178 | assertEquals(numRows, globalRow); 179 | } 180 | } 181 | } 182 | } 183 | 184 | @Test 185 | public void testDisableCollectStat(@TempDir Path tempDir) throws Exception { 186 | try (SessionContext context = SessionContexts.create(); 187 | BufferAllocator allocator = new RootAllocator()) { 188 | Path dataDir = tempDir.resolve("data"); 189 | writeParquetFiles(dataDir); 190 | 191 | try (ParquetFormat format = new ParquetFormat(); 192 | ListingOptions listingOptions = 193 | ListingOptions.builder(format) 194 | .withFileExtension(".parquet") 195 | .withCollectStat(false) 196 | .build(); 197 | ListingTableConfig tableConfig = 198 | ListingTableConfig.builder(dataDir) 199 | .withListingOptions(listingOptions) 200 | .build(context) 201 | .join(); 202 | ListingTable listingTable = new ListingTable(tableConfig)) { 203 | context.registerTable("test", listingTable); 204 | testQuery(context, allocator); 205 | } 206 | } 207 | } 208 | 209 | @Test 210 | public void testMultiplePaths(@TempDir Path tempDir) throws Exception { 211 | try (SessionContext context = SessionContexts.create(); 212 | BufferAllocator allocator = new RootAllocator()) { 213 | Path dataDir = tempDir.resolve("data"); 214 | Path[] dataFiles = writeParquetFiles(dataDir); 215 | 216 | try (ParquetFormat format = new ParquetFormat(); 217 | ListingOptions listingOptions = 218 | ListingOptions.builder(format).withFileExtension(".parquet").build(); 219 | ListingTableConfig tableConfig = 220 | ListingTableConfig.builder(dataFiles) 221 | .withListingOptions(listingOptions) 222 | .build(context) 223 | .join(); 224 | ListingTable listingTable = new ListingTable(tableConfig)) { 225 | context.registerTable("test", listingTable); 226 | testQuery(context, allocator); 227 | } 228 | } 229 | } 230 | 231 | private static Path[] writeParquetFiles(Path dataDir) throws Exception { 232 | String schema = 233 | "{\"namespace\": \"org.example\"," 234 | + "\"type\": \"record\"," 235 | + "\"name\": \"record_name\"," 236 | + "\"fields\": [" 237 | + " {\"name\": \"x\", \"type\": \"long\"}," 238 | + " {\"name\": \"y\", \"type\": \"long\"}" 239 | + " ]}"; 240 | 241 | Path parquetFilePath0 = dataDir.resolve("0.parquet"); 242 | ParquetWriter.writeParquet( 243 | parquetFilePath0, 244 | schema, 245 | 2, 246 | (i, record) -> { 247 | record.put("x", i * 2 + 1); 248 | record.put("y", i * 2 + 2); 249 | }); 250 | 251 | Path parquetFilePath1 = dataDir.resolve("1.parquet"); 252 | ParquetWriter.writeParquet( 253 | parquetFilePath1, 254 | schema, 255 | 2, 256 | (i, record) -> { 257 | record.put("x", i * 2 + 1); 258 | record.put("y", i * 2 + 12); 259 | }); 260 | return new Path[] {parquetFilePath0, parquetFilePath1}; 261 | } 262 | 263 | private static void writeArrowFile(Path filePath, List vectors, boolean compressed) 264 | throws Exception { 265 | List fields = vectors.stream().map(v -> v.getField()).collect(Collectors.toList()); 266 | CompressionUtil.CodecType codec = 267 | compressed ? CompressionUtil.CodecType.ZSTD : CompressionUtil.CodecType.NO_COMPRESSION; 268 | CompressionCodec.Factory compressionFactory = 269 | compressed ? new CommonsCompressionFactory() : NoCompressionCodec.Factory.INSTANCE; 270 | try (VectorSchemaRoot root = new VectorSchemaRoot(fields, vectors); 271 | FileOutputStream output = new FileOutputStream(filePath.toString()); 272 | ArrowFileWriter writer = 273 | new ArrowFileWriter( 274 | root, 275 | null, 276 | output.getChannel(), 277 | null, 278 | IpcOption.DEFAULT, 279 | compressionFactory, 280 | codec)) { 281 | writer.start(); 282 | writer.writeBatch(); 283 | writer.end(); 284 | } 285 | } 286 | 287 | private static void testQuery(SessionContext context, BufferAllocator allocator) 288 | throws Exception { 289 | try (ArrowReader reader = 290 | context 291 | .sql("SELECT y FROM test WHERE x = 3 ORDER BY y") 292 | .thenComposeAsync(df -> df.collect(allocator)) 293 | .join()) { 294 | 295 | long[] expectedResults = {4, 14}; 296 | int globalRow = 0; 297 | VectorSchemaRoot root = reader.getVectorSchemaRoot(); 298 | while (reader.loadNextBatch()) { 299 | BigIntVector yValues = (BigIntVector) root.getVector(0); 300 | for (int row = 0; row < root.getRowCount(); ++row, ++globalRow) { 301 | assertTrue(globalRow < expectedResults.length); 302 | assertEquals(expectedResults[globalRow], yValues.get(row)); 303 | } 304 | } 305 | assertEquals(expectedResults.length, globalRow); 306 | } 307 | } 308 | 309 | @Test 310 | public void testParquetTimestampedStrings(@TempDir Path tempDir) throws Exception { 311 | try (SessionContext context = SessionContexts.create(); 312 | BufferAllocator allocator = new RootAllocator()) { 313 | Path dataDir = tempDir.resolve("data"); 314 | String schema = 315 | "{\"namespace\": \"org.example\"," 316 | + "\"type\": \"record\"," 317 | + "\"name\": \"record_name\"," 318 | + "\"fields\": [" 319 | + " {\"name\": \"id\", \"type\": \"long\"}," 320 | + " {\"name\": \"timestamp\", \"type\": {\"type\": \"long\", \"logicalType\": \"timestamp-millis\"}}," 321 | + " {\"name\": \"text\", \"type\": \"string\"}" 322 | + " ]}"; 323 | 324 | Path parquetFilePath0 = dataDir.resolve("0.parquet"); 325 | Instant[] timestamps0 = { 326 | Instant.parse("2022-04-04T00:00:00Z"), 327 | Instant.parse("2022-05-04T00:00:00Z"), 328 | Instant.parse("2022-06-06T00:00:00Z"), 329 | }; 330 | ParquetWriter.writeParquet( 331 | parquetFilePath0, 332 | schema, 333 | 3, 334 | (i, record) -> { 335 | record.put("id", i + 1); 336 | record.put("timestamp", (timestamps0[i].getEpochSecond() * 1_000)); 337 | record.put("text", String.format("Text%d", i + 1)); 338 | }); 339 | 340 | Path parquetFilePath1 = dataDir.resolve("1.parquet"); 341 | Instant[] timestamps1 = { 342 | Instant.parse("2023-04-04T00:00:00Z"), 343 | Instant.parse("2023-04-04T00:00:00Z"), 344 | Instant.parse("2022-08-01T00:00:00Z"), 345 | }; 346 | ParquetWriter.writeParquet( 347 | parquetFilePath1, 348 | schema, 349 | 3, 350 | (i, record) -> { 351 | record.put("id", i + 4); 352 | record.put("timestamp", (timestamps1[i].getEpochSecond() * 1_000)); 353 | record.put("text", String.format("Text%d", i + 4)); 354 | }); 355 | 356 | Path[] filePaths = {parquetFilePath0, parquetFilePath1}; 357 | 358 | try (ParquetFormat format = new ParquetFormat(); 359 | ListingOptions listingOptions = 360 | ListingOptions.builder(format).withFileExtension(".parquet").build(); 361 | ListingTableConfig tableConfig = 362 | ListingTableConfig.builder(filePaths) 363 | .withListingOptions(listingOptions) 364 | .build(context) 365 | .join(); 366 | ListingTable listingTable = new ListingTable(tableConfig)) { 367 | context.registerTable("test", listingTable); 368 | try (ArrowReader reader = 369 | context 370 | .sql( 371 | "SELECT id,text FROM test WHERE ID IN (2, 3, 4) AND timestamp < '2023-01-01T00:00:00Z' ORDER BY id") 372 | .thenComposeAsync(df -> df.collect(allocator)) 373 | .join()) { 374 | 375 | Long[] expectedIds = {2L, 3L}; 376 | String[] expectedText = {"Text2", "Text3"}; 377 | List actualIds = new ArrayList<>(); 378 | List actualText = new ArrayList<>(); 379 | int globalRow = 0; 380 | VectorSchemaRoot root = reader.getVectorSchemaRoot(); 381 | while (reader.loadNextBatch()) { 382 | BigIntVector idValues = (BigIntVector) root.getVector(0); 383 | VarCharVector textValues = (VarCharVector) root.getVector(1); 384 | for (int row = 0; row < root.getRowCount(); ++row, ++globalRow) { 385 | actualIds.add(idValues.get(row)); 386 | actualText.add(new String(textValues.get(row), StandardCharsets.UTF_8)); 387 | } 388 | } 389 | assertArrayEquals(expectedIds, actualIds.toArray(new Long[0])); 390 | assertArrayEquals(expectedText, actualText.toArray(new String[0])); 391 | } 392 | } 393 | } 394 | } 395 | } 396 | -------------------------------------------------------------------------------- /datafusion-java/src/test/java/org/apache/arrow/datafusion/TestQuery.java: -------------------------------------------------------------------------------- 1 | package org.apache.arrow.datafusion; 2 | 3 | import static org.junit.jupiter.api.Assertions.assertEquals; 4 | import static org.junit.jupiter.api.Assertions.assertFalse; 5 | import static org.junit.jupiter.api.Assertions.assertThrows; 6 | import static org.junit.jupiter.api.Assertions.assertTrue; 7 | 8 | import java.nio.file.Files; 9 | import java.nio.file.Path; 10 | import java.util.Arrays; 11 | import java.util.List; 12 | import org.apache.arrow.memory.BufferAllocator; 13 | import org.apache.arrow.memory.RootAllocator; 14 | import org.apache.arrow.vector.BigIntVector; 15 | import org.apache.arrow.vector.VectorSchemaRoot; 16 | import org.apache.arrow.vector.ipc.ArrowReader; 17 | import org.junit.jupiter.api.Test; 18 | import org.junit.jupiter.api.io.TempDir; 19 | 20 | public class TestQuery { 21 | 22 | @Test 23 | public void testQueryInMemoryTable() throws Exception { 24 | try (SessionContext context = SessionContexts.create(); 25 | BufferAllocator allocator = new RootAllocator()) { 26 | DataFrame df = context.sql("SELECT * FROM (VALUES (1, 2), (3, 4)) AS t (x, y)").join(); 27 | assertFalse( 28 | context.registerTable("test", df.intoView()).isPresent(), 29 | "there should not be any duplicates"); 30 | testQuery(context, allocator); 31 | assertThrows( 32 | Exception.class, 33 | () -> context.registerTable("test", df.intoView()), 34 | "now there should be duplicates"); 35 | } 36 | } 37 | 38 | @Test 39 | public void testQueryCsv(@TempDir Path tempDir) throws Exception { 40 | try (SessionContext context = SessionContexts.create(); 41 | BufferAllocator allocator = new RootAllocator()) { 42 | Path csvFilePath = tempDir.resolve("data.csv"); 43 | 44 | List lines = Arrays.asList("x,y", "1,2", "3,4"); 45 | Files.write(csvFilePath, lines); 46 | 47 | context.registerCsv("test", csvFilePath).join(); 48 | testQuery(context, allocator); 49 | } 50 | } 51 | 52 | @Test 53 | public void testQueryParquet(@TempDir Path tempDir) throws Exception { 54 | try (SessionContext context = SessionContexts.create(); 55 | BufferAllocator allocator = new RootAllocator()) { 56 | Path parquetFilePath = tempDir.resolve("data.parquet"); 57 | 58 | String schema = 59 | "{\"namespace\": \"org.example\"," 60 | + "\"type\": \"record\"," 61 | + "\"name\": \"record_name\"," 62 | + "\"fields\": [" 63 | + " {\"name\": \"x\", \"type\": \"long\"}," 64 | + " {\"name\": \"y\", \"type\": \"long\"}" 65 | + " ]}"; 66 | 67 | ParquetWriter.writeParquet( 68 | parquetFilePath, 69 | schema, 70 | 2, 71 | (i, record) -> { 72 | record.put("x", i * 2 + 1); 73 | record.put("y", i * 2 + 2); 74 | }); 75 | 76 | context.registerParquet("test", parquetFilePath).join(); 77 | testQuery(context, allocator); 78 | } 79 | } 80 | 81 | @Test 82 | public void testInvalidQuery() throws Exception { 83 | try (SessionContext context = SessionContexts.create()) { 84 | assertThrows( 85 | RuntimeException.class, 86 | () -> context.sql("SELECT z FROM (VALUES (1, 2), (3, 4)) AS t (x, y)").join(), 87 | "invalid column name in query should raise an error"); 88 | } 89 | } 90 | 91 | private static void testQuery(SessionContext context, BufferAllocator allocator) 92 | throws Exception { 93 | try (ArrowReader reader = 94 | context 95 | .sql("SELECT y FROM test WHERE x = 3") 96 | .thenComposeAsync(df -> df.collect(allocator)) 97 | .join()) { 98 | 99 | VectorSchemaRoot root = reader.getVectorSchemaRoot(); 100 | assertTrue(reader.loadNextBatch()); 101 | 102 | assertEquals(1, root.getRowCount()); 103 | BigIntVector yValues = (BigIntVector) root.getVector(0); 104 | assertEquals(4, yValues.get(0)); 105 | 106 | assertFalse(reader.loadNextBatch()); 107 | } 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /datafusion-java/src/test/java/org/apache/arrow/datafusion/TestSessionConfig.java: -------------------------------------------------------------------------------- 1 | package org.apache.arrow.datafusion; 2 | 3 | import static org.junit.jupiter.api.Assertions.*; 4 | 5 | import java.nio.file.Path; 6 | import java.util.Optional; 7 | import org.apache.arrow.memory.BufferAllocator; 8 | import org.apache.arrow.memory.RootAllocator; 9 | import org.apache.arrow.vector.VectorSchemaRoot; 10 | import org.junit.jupiter.api.Test; 11 | import org.junit.jupiter.api.io.TempDir; 12 | 13 | public class TestSessionConfig { 14 | @Test 15 | public void testRegisterInvalidCsvPath(@TempDir Path tempDir) throws Exception { 16 | try (SessionContext context = SessionContexts.create()) { 17 | Path filePath = tempDir.resolve("non-existent.csv"); 18 | assertThrows( 19 | RuntimeException.class, 20 | () -> context.registerCsv("test", filePath).join(), 21 | "Expected an exception to be raised from an IO error"); 22 | } 23 | } 24 | 25 | @Test 26 | public void testRegisterInvalidParquetPath(@TempDir Path tempDir) throws Exception { 27 | try (SessionContext context = SessionContexts.create()) { 28 | Path filePath = tempDir.resolve("non-existent.parquet"); 29 | assertThrows( 30 | RuntimeException.class, 31 | () -> context.registerParquet("test", filePath).join(), 32 | "Expected an exception to be raised from an IO error"); 33 | } 34 | } 35 | 36 | @Test 37 | public void testCreateSessionWithConfig() throws Exception { 38 | try (SessionContext context = 39 | SessionContexts.withConfig( 40 | (c) -> c.executionOptions().parquet().withEnablePageIndex(true))) { 41 | // Only testing we can successfully create a session context with the config 42 | } 43 | } 44 | 45 | @Test 46 | public void testParquetOptions() throws Exception { 47 | try (SessionConfig config = new SessionConfig()) { 48 | ParquetOptions parquetOptions = config.executionOptions().parquet(); 49 | 50 | assertTrue(parquetOptions.enablePageIndex()); 51 | parquetOptions.withEnablePageIndex(false); 52 | assertFalse(parquetOptions.enablePageIndex()); 53 | 54 | assertTrue(parquetOptions.pruning()); 55 | parquetOptions.withPruning(false); 56 | assertFalse(parquetOptions.pruning()); 57 | 58 | assertTrue(parquetOptions.skipMetadata()); 59 | parquetOptions.withSkipMetadata(false); 60 | assertFalse(parquetOptions.skipMetadata()); 61 | 62 | assertFalse(parquetOptions.metadataSizeHint().isPresent()); 63 | parquetOptions.withMetadataSizeHint(Optional.of(123L)); 64 | Optional sizeHint = parquetOptions.metadataSizeHint(); 65 | assertTrue(sizeHint.isPresent()); 66 | assertEquals(123L, sizeHint.get()); 67 | parquetOptions.withMetadataSizeHint(Optional.empty()); 68 | assertFalse(parquetOptions.metadataSizeHint().isPresent()); 69 | 70 | assertFalse(parquetOptions.pushdownFilters()); 71 | parquetOptions.withPushdownFilters(true); 72 | assertTrue(parquetOptions.pushdownFilters()); 73 | 74 | assertFalse(parquetOptions.reorderFilters()); 75 | parquetOptions.withReorderFilters(true); 76 | assertTrue(parquetOptions.reorderFilters()); 77 | } 78 | } 79 | 80 | @Test 81 | public void testSqlParserOptions() throws Exception { 82 | try (SessionConfig config = new SessionConfig()) { 83 | SqlParserOptions sqlParserOptions = config.sqlParserOptions(); 84 | 85 | assertFalse(sqlParserOptions.parseFloatAsDecimal()); 86 | sqlParserOptions.withParseFloatAsDecimal(true); 87 | assertTrue(sqlParserOptions.parseFloatAsDecimal()); 88 | 89 | assertTrue(sqlParserOptions.enableIdentNormalization()); 90 | sqlParserOptions.withEnableIdentNormalization(false); 91 | assertFalse(sqlParserOptions.enableIdentNormalization()); 92 | 93 | assertEquals("generic", sqlParserOptions.dialect()); 94 | sqlParserOptions.withDialect("PostgreSQL"); 95 | assertEquals("PostgreSQL", sqlParserOptions.dialect()); 96 | } 97 | } 98 | 99 | @Test 100 | public void testExecutionOptions() throws Exception { 101 | try (SessionConfig config = new SessionConfig()) { 102 | ExecutionOptions executionOptions = config.executionOptions(); 103 | 104 | assertEquals(8192, executionOptions.batchSize()); 105 | executionOptions.withBatchSize(1024); 106 | assertEquals(1024, executionOptions.batchSize()); 107 | 108 | assertTrue(executionOptions.coalesceBatches()); 109 | executionOptions.withCoalesceBatches(false); 110 | assertFalse(executionOptions.coalesceBatches()); 111 | 112 | assertFalse(executionOptions.collectStatistics()); 113 | executionOptions.withCollectStatistics(true); 114 | assertTrue(executionOptions.collectStatistics()); 115 | 116 | long targetPartitions = executionOptions.targetPartitions(); 117 | assertTrue(targetPartitions > 0); 118 | executionOptions.withTargetPartitions(targetPartitions * 2); 119 | assertEquals(targetPartitions * 2, executionOptions.targetPartitions()); 120 | } 121 | } 122 | 123 | @Test 124 | public void testBatchSize(@TempDir Path tempDir) throws Exception { 125 | long rowCount = 1024; 126 | long batchSize = 64; 127 | try (SessionContext context = 128 | SessionContexts.withConfig((conf) -> conf.executionOptions().withBatchSize(batchSize)); 129 | BufferAllocator allocator = new RootAllocator()) { 130 | Path parquetFilePath = tempDir.resolve("data.parquet"); 131 | 132 | String parquetSchema = 133 | "{\"namespace\": \"org.example\"," 134 | + "\"type\": \"record\"," 135 | + "\"name\": \"record_name\"," 136 | + "\"fields\": [" 137 | + " {\"name\": \"x\", \"type\": \"long\"}" 138 | + " ]}"; 139 | 140 | ParquetWriter.writeParquet( 141 | parquetFilePath, 142 | parquetSchema, 143 | 1024, 144 | (i, record) -> { 145 | record.put("x", i); 146 | }); 147 | 148 | context.registerParquet("test", parquetFilePath).join(); 149 | 150 | try (RecordBatchStream stream = 151 | context 152 | .sql("SELECT * FROM test") 153 | .thenComposeAsync(df -> df.executeStream(allocator)) 154 | .join()) { 155 | VectorSchemaRoot root = stream.getVectorSchemaRoot(); 156 | 157 | long rowsReceived = 0; 158 | while (stream.loadNextBatch().join()) { 159 | assertTrue(root.getRowCount() <= batchSize); 160 | rowsReceived += root.getRowCount(); 161 | } 162 | 163 | assertEquals(rowCount, rowsReceived); 164 | } 165 | } 166 | } 167 | } 168 | -------------------------------------------------------------------------------- /datafusion-java/src/test/resources/dictionary_data.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datafusion-contrib/datafusion-java/c5dee4178ff9187de4cc7f5eb2e9ddd471223444/datafusion-java/src/test/resources/dictionary_data.parquet -------------------------------------------------------------------------------- /datafusion-java/write_test_files.py: -------------------------------------------------------------------------------- 1 | import pyarrow as pa 2 | import pyarrow.parquet as pq 3 | 4 | 5 | num_rows = 100 6 | 7 | dict_array_x = pa.DictionaryArray.from_arrays( 8 | pa.array([i % 3 for i in range(num_rows)]), pa.array(["one", "two", "three"]) 9 | ) 10 | 11 | dict_array_y = pa.DictionaryArray.from_arrays( 12 | pa.array([i % 3 for i in range(num_rows)]), pa.array(["four", "five", "six"]) 13 | ) 14 | 15 | table = pa.Table.from_arrays([dict_array_x, dict_array_y], ["x", "y"]) 16 | pq.write_table(table, "src/test/resources/dictionary_data.parquet") 17 | -------------------------------------------------------------------------------- /datafusion-jni/.gitignore: -------------------------------------------------------------------------------- 1 | # Created by https://www.toptal.com/developers/gitignore/api/rust 2 | # Edit at https://www.toptal.com/developers/gitignore?templates=rust 3 | 4 | ### Rust ### 5 | # Generated by Cargo 6 | # will have compiled files and executables 7 | debug/ 8 | target/ 9 | 10 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries 11 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html 12 | Cargo.lock 13 | 14 | # These are backup files generated by rustfmt 15 | **/*.rs.bk 16 | 17 | # MSVC Windows builds of rustc generate these, which store debugging information 18 | *.pdb 19 | 20 | # End of https://www.toptal.com/developers/gitignore/api/rust 21 | -------------------------------------------------------------------------------- /datafusion-jni/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "datafusion_jni" 3 | version = "0.16.0" 4 | homepage = "https://github.com/apache/datafusion" 5 | repository = "https://github.com/apache/datafusion" 6 | authors = ["Apache Arrow "] 7 | description = "Build and run queries against data" 8 | readme = "README.md" 9 | license = "Apache-2.0" 10 | edition = "2021" 11 | 12 | [dependencies] 13 | jni = "^0.21.0" 14 | tokio = "^1.32.0" 15 | arrow = { version = "^39.0", features = ["ffi", "ipc_compression"] } 16 | datafusion = "^25.0" 17 | futures = "0.3.28" 18 | 19 | [lib] 20 | crate_type = ["cdylib"] 21 | 22 | [profile.release] 23 | lto = true 24 | strip = true 25 | -------------------------------------------------------------------------------- /datafusion-jni/src/context.rs: -------------------------------------------------------------------------------- 1 | use datafusion::datasource::TableProvider; 2 | use datafusion::execution::context::SessionContext; 3 | use datafusion::prelude::{CsvReadOptions, ParquetReadOptions, SessionConfig}; 4 | use jni::objects::{JClass, JObject, JString}; 5 | use jni::sys::jlong; 6 | use jni::JNIEnv; 7 | use std::sync::Arc; 8 | use tokio::runtime::Runtime; 9 | 10 | use crate::util::{set_error_message, set_object_result}; 11 | 12 | #[no_mangle] 13 | pub extern "system" fn Java_org_apache_arrow_datafusion_DefaultSessionContext_registerCsv( 14 | mut env: JNIEnv, 15 | _class: JClass, 16 | runtime: jlong, 17 | pointer: jlong, 18 | name: JString, 19 | path: JString, 20 | callback: JObject, 21 | ) { 22 | let runtime = unsafe { &mut *(runtime as *mut Runtime) }; 23 | let name: String = env 24 | .get_string(&name) 25 | .expect("Couldn't get name as string!") 26 | .into(); 27 | let path: String = env 28 | .get_string(&path) 29 | .expect("Couldn't get path as string!") 30 | .into(); 31 | let context = unsafe { &mut *(pointer as *mut SessionContext) }; 32 | runtime.block_on(async { 33 | let register_result = context 34 | .register_csv(&name, &path, CsvReadOptions::new()) 35 | .await; 36 | set_error_message(&mut env, callback, register_result); 37 | }); 38 | } 39 | 40 | #[no_mangle] 41 | pub extern "system" fn Java_org_apache_arrow_datafusion_DefaultSessionContext_registerTable( 42 | mut env: JNIEnv, 43 | _class: JClass, 44 | pointer: jlong, 45 | name: JString, 46 | table_provider: jlong, 47 | ) -> jlong { 48 | let name: String = env 49 | .get_string(&name) 50 | .expect("Couldn't get name as string!") 51 | .into(); 52 | let context = unsafe { &mut *(pointer as *mut SessionContext) }; 53 | let table_provider = unsafe { &*(table_provider as *const Arc) }; 54 | let result = context.register_table(&name, table_provider.clone()); 55 | match result { 56 | // TODO this is to be fixed on datafusion side as duplicates will not be returned 57 | // and instead returned as err 58 | Ok(Some(v)) => Box::into_raw(Box::new(v)) as jlong, 59 | Ok(None) => 0, 60 | Err(err) => { 61 | env.throw_new("java/lang/Exception", err.to_string()) 62 | .unwrap(); 63 | 0 64 | } 65 | } 66 | } 67 | 68 | #[no_mangle] 69 | pub extern "system" fn Java_org_apache_arrow_datafusion_DefaultSessionContext_registerParquet( 70 | mut env: JNIEnv, 71 | _class: JClass, 72 | runtime: jlong, 73 | pointer: jlong, 74 | name: JString, 75 | path: JString, 76 | callback: JObject, 77 | ) { 78 | let runtime = unsafe { &mut *(runtime as *mut Runtime) }; 79 | let name: String = env 80 | .get_string(&name) 81 | .expect("Couldn't get name as string!") 82 | .into(); 83 | let path: String = env 84 | .get_string(&path) 85 | .expect("Couldn't get path as string!") 86 | .into(); 87 | let context = unsafe { &mut *(pointer as *mut SessionContext) }; 88 | runtime.block_on(async { 89 | let register_result = context 90 | .register_parquet(&name, &path, ParquetReadOptions::default()) 91 | .await; 92 | set_error_message(&mut env, callback, register_result); 93 | }); 94 | } 95 | 96 | #[no_mangle] 97 | pub extern "system" fn Java_org_apache_arrow_datafusion_DefaultSessionContext_querySql( 98 | mut env: JNIEnv, 99 | _class: JClass, 100 | runtime: jlong, 101 | pointer: jlong, 102 | sql: JString, 103 | callback: JObject, 104 | ) { 105 | let runtime = unsafe { &mut *(runtime as *mut Runtime) }; 106 | let sql: String = env 107 | .get_string(&sql) 108 | .expect("Couldn't get sql as string!") 109 | .into(); 110 | let context = unsafe { &mut *(pointer as *mut SessionContext) }; 111 | runtime.block_on(async { 112 | let query_result = context.sql(&sql).await; 113 | set_object_result( 114 | &mut env, 115 | callback, 116 | query_result.map(|df| Box::into_raw(Box::new(df))), 117 | ); 118 | }); 119 | } 120 | #[no_mangle] 121 | pub extern "system" fn Java_org_apache_arrow_datafusion_SessionContexts_destroySessionContext( 122 | _env: JNIEnv, 123 | _class: JClass, 124 | pointer: jlong, 125 | ) { 126 | let _ = unsafe { Box::from_raw(pointer as *mut SessionContext) }; 127 | } 128 | 129 | #[no_mangle] 130 | pub extern "system" fn Java_org_apache_arrow_datafusion_SessionContexts_createSessionContext( 131 | _env: JNIEnv, 132 | _class: JClass, 133 | ) -> jlong { 134 | let context = SessionContext::new(); 135 | Box::into_raw(Box::new(context)) as jlong 136 | } 137 | 138 | #[no_mangle] 139 | pub extern "system" fn Java_org_apache_arrow_datafusion_SessionContexts_createSessionContextWithConfig( 140 | _env: JNIEnv, 141 | _class: JClass, 142 | config: jlong, 143 | ) -> jlong { 144 | let config = unsafe { &*(config as *const SessionConfig) }; 145 | let context = SessionContext::with_config(config.clone()); 146 | Box::into_raw(Box::new(context)) as jlong 147 | } 148 | -------------------------------------------------------------------------------- /datafusion-jni/src/dataframe.rs: -------------------------------------------------------------------------------- 1 | use arrow::ipc::writer::FileWriter; 2 | use datafusion::dataframe::DataFrame; 3 | use jni::objects::{JClass, JObject, JString}; 4 | use jni::sys::jlong; 5 | use jni::JNIEnv; 6 | use std::convert::Into; 7 | use std::io::BufWriter; 8 | use std::io::Cursor; 9 | use tokio::runtime::Runtime; 10 | 11 | use crate::util::{set_error_message, set_object_result}; 12 | 13 | #[no_mangle] 14 | pub extern "system" fn Java_org_apache_arrow_datafusion_DataFrames_collectDataframe( 15 | mut env: JNIEnv, 16 | _class: JClass, 17 | runtime: jlong, 18 | dataframe: jlong, 19 | callback: JObject, 20 | ) { 21 | let runtime = unsafe { &mut *(runtime as *mut Runtime) }; 22 | let dataframe = unsafe { &mut *(dataframe as *mut DataFrame) }; 23 | let schema = dataframe.schema().into(); 24 | runtime.block_on(async { 25 | let batches = dataframe 26 | .clone() 27 | .collect() 28 | .await 29 | .expect("failed to collect dataframe"); 30 | let mut buff = Cursor::new(vec![0; 0]); 31 | { 32 | let mut writer = FileWriter::try_new(BufWriter::new(&mut buff), &schema) 33 | .expect("failed to create writer"); 34 | for batch in batches { 35 | writer.write(&batch).expect("failed to write batch"); 36 | } 37 | writer.finish().expect("failed to finish"); 38 | } 39 | let err_message = env 40 | .new_string("".to_string()) 41 | .expect("Couldn't create java string!"); 42 | let ba = env 43 | .byte_array_from_slice(buff.get_ref()) 44 | .expect("cannot create empty byte array"); 45 | env.call_method( 46 | callback, 47 | "accept", 48 | "(Ljava/lang/Object;Ljava/lang/Object;)V", 49 | &[(&err_message).into(), (&ba).into()], 50 | ) 51 | .expect("failed to call method"); 52 | }); 53 | } 54 | 55 | #[no_mangle] 56 | pub extern "system" fn Java_org_apache_arrow_datafusion_DataFrames_executeStream( 57 | mut env: JNIEnv, 58 | _class: JClass, 59 | runtime: jlong, 60 | dataframe: jlong, 61 | callback: JObject, 62 | ) { 63 | let runtime = unsafe { &mut *(runtime as *mut Runtime) }; 64 | let dataframe = unsafe { &mut *(dataframe as *mut DataFrame) }; 65 | runtime.block_on(async { 66 | let stream_result = dataframe.clone().execute_stream().await; 67 | set_object_result( 68 | &mut env, 69 | callback, 70 | stream_result.map(|stream| Box::into_raw(Box::new(stream))), 71 | ); 72 | }); 73 | } 74 | 75 | #[no_mangle] 76 | pub extern "system" fn Java_org_apache_arrow_datafusion_DataFrames_showDataframe( 77 | mut env: JNIEnv, 78 | _class: JClass, 79 | runtime: jlong, 80 | dataframe: jlong, 81 | callback: JObject, 82 | ) { 83 | let runtime = unsafe { &mut *(runtime as *mut Runtime) }; 84 | let dataframe = unsafe { &*(dataframe as *const DataFrame) }; 85 | runtime.block_on(async { 86 | let r = dataframe.clone().show().await; 87 | set_error_message(&mut env, callback, r); 88 | }); 89 | } 90 | 91 | #[no_mangle] 92 | pub extern "system" fn Java_org_apache_arrow_datafusion_DataFrames_intoView( 93 | _env: JNIEnv, 94 | _class: JClass, 95 | dataframe: jlong, 96 | ) -> jlong { 97 | let dataframe = unsafe { &*(dataframe as *const DataFrame) }; 98 | let provider = dataframe.clone().into_view(); 99 | Box::into_raw(Box::new(provider)) as jlong 100 | } 101 | 102 | #[no_mangle] 103 | pub extern "system" fn Java_org_apache_arrow_datafusion_DataFrames_writeParquet( 104 | mut env: JNIEnv, 105 | _class: JClass, 106 | runtime: jlong, 107 | dataframe: jlong, 108 | path: JString, 109 | callback: JObject, 110 | ) { 111 | let runtime = unsafe { &mut *(runtime as *mut Runtime) }; 112 | let dataframe = unsafe { &*(dataframe as *const DataFrame) }; 113 | let path: String = env 114 | .get_string(&path) 115 | .expect("Couldn't get path as string!") 116 | .into(); 117 | runtime.block_on(async { 118 | let r = dataframe.clone().write_parquet(&path, None).await; 119 | set_error_message(&mut env, callback, r); 120 | }); 121 | } 122 | 123 | #[no_mangle] 124 | pub extern "system" fn Java_org_apache_arrow_datafusion_DataFrames_writeCsv( 125 | mut env: JNIEnv, 126 | _class: JClass, 127 | runtime: jlong, 128 | dataframe: jlong, 129 | path: JString, 130 | callback: JObject, 131 | ) { 132 | let runtime = unsafe { &mut *(runtime as *mut Runtime) }; 133 | let dataframe = unsafe { &*(dataframe as *const DataFrame) }; 134 | let path: String = env 135 | .get_string(&path) 136 | .expect("Couldn't get path as string!") 137 | .into(); 138 | runtime.block_on(async { 139 | let r = dataframe.clone().write_csv(&path).await; 140 | set_error_message(&mut env, callback, r); 141 | }); 142 | } 143 | 144 | #[no_mangle] 145 | pub extern "system" fn Java_org_apache_arrow_datafusion_DataFrames_destroyDataFrame( 146 | _env: JNIEnv, 147 | _class: JClass, 148 | pointer: jlong, 149 | ) { 150 | let _ = unsafe { Box::from_raw(pointer as *mut DataFrame) }; 151 | } 152 | -------------------------------------------------------------------------------- /datafusion-jni/src/file_formats.rs: -------------------------------------------------------------------------------- 1 | use datafusion::datasource::file_format::arrow::ArrowFormat; 2 | use datafusion::datasource::file_format::csv::CsvFormat; 3 | use datafusion::datasource::file_format::parquet::ParquetFormat; 4 | use datafusion::datasource::file_format::FileFormat; 5 | use jni::objects::JClass; 6 | use jni::sys::jlong; 7 | use jni::JNIEnv; 8 | use std::sync::Arc; 9 | 10 | #[no_mangle] 11 | pub extern "system" fn Java_org_apache_arrow_datafusion_FileFormats_createCsv( 12 | _env: JNIEnv, 13 | _class: JClass, 14 | ) -> jlong { 15 | // Return as an Arc rather than CsvFormat so this 16 | // can be passed into ListingOptions.create 17 | let format: Arc = Arc::new(CsvFormat::default()); 18 | Box::into_raw(Box::new(format)) as jlong 19 | } 20 | 21 | #[no_mangle] 22 | pub extern "system" fn Java_org_apache_arrow_datafusion_FileFormats_createParquet( 23 | _env: JNIEnv, 24 | _class: JClass, 25 | ) -> jlong { 26 | // Return as an Arc rather than ParquetFormat so this 27 | // can be passed into ListingOptions.create 28 | let format: Arc = Arc::new(ParquetFormat::default()); 29 | Box::into_raw(Box::new(format)) as jlong 30 | } 31 | 32 | #[no_mangle] 33 | pub extern "system" fn Java_org_apache_arrow_datafusion_FileFormats_createArrow( 34 | _env: JNIEnv, 35 | _class: JClass, 36 | ) -> jlong { 37 | let format: Arc = Arc::new(ArrowFormat::default()); 38 | Box::into_raw(Box::new(format)) as jlong 39 | } 40 | 41 | #[no_mangle] 42 | pub extern "system" fn Java_org_apache_arrow_datafusion_FileFormats_destroyFileFormat( 43 | _env: JNIEnv, 44 | _class: JClass, 45 | pointer: jlong, 46 | ) { 47 | let _ = unsafe { Box::from_raw(pointer as *mut Arc) }; 48 | } 49 | -------------------------------------------------------------------------------- /datafusion-jni/src/lib.rs: -------------------------------------------------------------------------------- 1 | mod context; 2 | mod dataframe; 3 | mod file_formats; 4 | mod listing_options; 5 | mod listing_table; 6 | mod listing_table_config; 7 | mod runtime; 8 | mod session_config; 9 | mod stream; 10 | mod table_provider; 11 | mod util; 12 | -------------------------------------------------------------------------------- /datafusion-jni/src/listing_options.rs: -------------------------------------------------------------------------------- 1 | use datafusion::datasource::file_format::FileFormat; 2 | use datafusion::datasource::listing::ListingOptions; 3 | use jni::objects::{JClass, JString}; 4 | use jni::sys::{jboolean, jlong}; 5 | use jni::JNIEnv; 6 | use std::sync::Arc; 7 | 8 | #[no_mangle] 9 | pub extern "system" fn Java_org_apache_arrow_datafusion_ListingOptions_create( 10 | mut env: JNIEnv, 11 | _class: JClass, 12 | format: jlong, 13 | file_extension: JString, 14 | collect_stat: jboolean, 15 | ) -> jlong { 16 | let format = unsafe { &*(format as *const Arc) }; 17 | 18 | let file_extension: String = env 19 | .get_string(&file_extension) 20 | .expect("Couldn't get Java file_extension string") 21 | .into(); 22 | 23 | let listing_options = ListingOptions::new(format.clone()) 24 | .with_file_extension(file_extension) 25 | .with_collect_stat(collect_stat == 1u8); 26 | Box::into_raw(Box::new(listing_options)) as jlong 27 | } 28 | 29 | #[no_mangle] 30 | pub extern "system" fn Java_org_apache_arrow_datafusion_ListingOptions_destroy( 31 | _env: JNIEnv, 32 | _class: JClass, 33 | pointer: jlong, 34 | ) { 35 | let _ = unsafe { Box::from_raw(pointer as *mut ListingOptions) }; 36 | } 37 | -------------------------------------------------------------------------------- /datafusion-jni/src/listing_table.rs: -------------------------------------------------------------------------------- 1 | use datafusion::datasource::listing::{ListingTable, ListingTableConfig}; 2 | use datafusion::datasource::TableProvider; 3 | use jni::objects::{JClass, JObject}; 4 | use jni::sys::jlong; 5 | use jni::JNIEnv; 6 | use std::sync::Arc; 7 | 8 | use crate::util::set_object_result; 9 | 10 | #[no_mangle] 11 | pub extern "system" fn Java_org_apache_arrow_datafusion_ListingTable_create( 12 | mut env: JNIEnv, 13 | _class: JClass, 14 | table_config: jlong, 15 | object_result: JObject, 16 | ) { 17 | let table_config = unsafe { &*(table_config as *const ListingTableConfig) }; 18 | // Clone table config as it will be moved into ListingTable 19 | let table_config = ListingTableConfig { 20 | table_paths: table_config.table_paths.clone(), 21 | file_schema: table_config.file_schema.clone(), 22 | options: table_config.options.clone(), 23 | }; 24 | let table_provider_result = ListingTable::try_new(table_config).map(|listing_table| { 25 | // Return as an Arc rather than ListingTable so this 26 | // can be passed into SessionContext.registerTable 27 | let table_provider: Arc = Arc::new(listing_table); 28 | Box::into_raw(Box::new(table_provider)) 29 | }); 30 | set_object_result(&mut env, object_result, table_provider_result); 31 | } 32 | -------------------------------------------------------------------------------- /datafusion-jni/src/listing_table_config.rs: -------------------------------------------------------------------------------- 1 | use datafusion::datasource::listing::{ListingOptions, ListingTableConfig, ListingTableUrl}; 2 | use datafusion::execution::context::SessionContext; 3 | use jni::objects::{JClass, JObject, JObjectArray, JString}; 4 | use jni::sys::jlong; 5 | use jni::JNIEnv; 6 | use tokio::runtime::Runtime; 7 | 8 | use crate::util::{set_object_result, set_object_result_error}; 9 | 10 | #[no_mangle] 11 | pub extern "system" fn Java_org_apache_arrow_datafusion_ListingTableConfig_create( 12 | mut env: JNIEnv, 13 | _class: JClass, 14 | runtime: jlong, 15 | context: jlong, 16 | table_paths: JObjectArray, 17 | listing_options: jlong, 18 | callback: JObject, 19 | ) { 20 | let runtime = unsafe { &*(runtime as *const Runtime) }; 21 | let context = unsafe { &*(context as *const SessionContext) }; 22 | 23 | let mut table_urls: Vec = Vec::new(); 24 | let table_paths_length = env 25 | .get_array_length(&table_paths) 26 | .expect("Couldn't get array length of table_paths"); 27 | for i in 0..table_paths_length { 28 | let table_path_str: JString = env 29 | .get_object_array_element(&table_paths, i) 30 | .expect("Couldn't get array string element") 31 | .into(); 32 | let table_path: String = env 33 | .get_string(&table_path_str) 34 | .expect("Couldn't get native string source") 35 | .into(); 36 | let table_url = ListingTableUrl::parse(table_path); 37 | let table_url = match table_url { 38 | Ok(url) => url, 39 | Err(err) => { 40 | set_object_result_error(&mut env, callback, &err); 41 | return; 42 | } 43 | }; 44 | table_urls.push(table_url); 45 | } 46 | 47 | runtime.block_on(async { 48 | let listing_table_config = ListingTableConfig::new_with_multi_paths(table_urls); 49 | 50 | let listing_table_config = match listing_options { 51 | 0 => listing_table_config, 52 | listing_options => { 53 | let listing_options = unsafe { &*(listing_options as *const ListingOptions) }; 54 | listing_table_config.with_listing_options(listing_options.clone()) 55 | } 56 | }; 57 | 58 | let session_state = context.state(); 59 | let config_result = listing_table_config.infer_schema(&session_state).await; 60 | set_object_result( 61 | &mut env, 62 | callback, 63 | config_result.map(|config| Box::into_raw(Box::new(config))), 64 | ); 65 | }); 66 | } 67 | 68 | #[no_mangle] 69 | pub extern "system" fn Java_org_apache_arrow_datafusion_ListingTableConfig_destroy( 70 | _env: JNIEnv, 71 | _class: JClass, 72 | pointer: jlong, 73 | ) { 74 | let _ = unsafe { Box::from_raw(pointer as *mut ListingTableConfig) }; 75 | } 76 | -------------------------------------------------------------------------------- /datafusion-jni/src/runtime.rs: -------------------------------------------------------------------------------- 1 | use jni::objects::JClass; 2 | use jni::sys::jlong; 3 | use jni::JNIEnv; 4 | use std::time::Duration; 5 | use tokio::runtime::Runtime; 6 | 7 | #[no_mangle] 8 | pub extern "system" fn Java_org_apache_arrow_datafusion_TokioRuntime_createTokioRuntime( 9 | _env: JNIEnv, 10 | _class: JClass, 11 | ) -> jlong { 12 | if let Ok(runtime) = Runtime::new() { 13 | // println!("successfully created tokio runtime"); 14 | Box::into_raw(Box::new(runtime)) as jlong 15 | } else { 16 | // TODO error handling 17 | -1 18 | } 19 | } 20 | #[no_mangle] 21 | pub extern "system" fn Java_org_apache_arrow_datafusion_TokioRuntime_destroyTokioRuntime( 22 | _env: JNIEnv, 23 | _class: JClass, 24 | pointer: jlong, 25 | ) { 26 | let runtime = unsafe { Box::from_raw(pointer as *mut Runtime) }; 27 | runtime.shutdown_timeout(Duration::from_millis(100)); 28 | // println!("successfully shutdown tokio runtime"); 29 | } 30 | -------------------------------------------------------------------------------- /datafusion-jni/src/session_config.rs: -------------------------------------------------------------------------------- 1 | use datafusion::execution::context::SessionConfig; 2 | use jni::objects::{JClass, JString}; 3 | use jni::sys::{jboolean, jlong}; 4 | use jni::JNIEnv; 5 | 6 | #[no_mangle] 7 | pub extern "system" fn Java_org_apache_arrow_datafusion_SessionConfig_create( 8 | _env: JNIEnv, 9 | _class: JClass, 10 | ) -> jlong { 11 | let session_config = Box::new(SessionConfig::new()); 12 | Box::into_raw(session_config) as jlong 13 | } 14 | 15 | #[no_mangle] 16 | pub extern "system" fn Java_org_apache_arrow_datafusion_SessionConfig_destroy( 17 | _env: JNIEnv, 18 | _class: JClass, 19 | pointer: jlong, 20 | ) { 21 | let _ = unsafe { Box::from_raw(pointer as *mut SessionConfig) }; 22 | } 23 | 24 | // Helper macros to implement boolean options 25 | 26 | macro_rules! bool_getter { 27 | ($name:ident, $($property_path:ident).+) => { 28 | #[no_mangle] 29 | pub extern "system" fn $name( 30 | _env: JNIEnv, 31 | _class: JClass, 32 | pointer: jlong, 33 | ) -> jboolean { 34 | let config = unsafe { &*(pointer as *const SessionConfig) }; 35 | let property_value = config.options().$($property_path).+; 36 | if property_value { 37 | 1u8 38 | } else { 39 | 0u8 40 | } 41 | } 42 | } 43 | } 44 | 45 | macro_rules! bool_setter { 46 | ($name:ident, $($property_path:ident).+) => { 47 | #[no_mangle] 48 | pub extern "system" fn $name( 49 | _env: JNIEnv, 50 | _class: JClass, 51 | pointer: jlong, 52 | enabled: jboolean, 53 | ) { 54 | let config = unsafe { &mut *(pointer as *mut SessionConfig) }; 55 | config.options_mut().$($property_path).+ = enabled != 0u8; 56 | } 57 | } 58 | } 59 | 60 | macro_rules! usize_getter { 61 | ($name:ident, $($property_path:ident).+) => { 62 | #[no_mangle] 63 | pub extern "system" fn $name( 64 | _env: JNIEnv, 65 | _class: JClass, 66 | pointer: jlong, 67 | ) -> jlong { 68 | let config = unsafe { &*(pointer as *const SessionConfig) }; 69 | let property_value = config.options().$($property_path).+; 70 | property_value as jlong 71 | } 72 | } 73 | } 74 | 75 | macro_rules! usize_setter { 76 | ($name:ident, $($property_path:ident).+) => { 77 | #[no_mangle] 78 | pub extern "system" fn $name( 79 | _env: JNIEnv, 80 | _class: JClass, 81 | pointer: jlong, 82 | value: jlong, 83 | ) { 84 | let config = unsafe { &mut *(pointer as *mut SessionConfig) }; 85 | config.options_mut().$($property_path).+ = value as usize; 86 | } 87 | } 88 | } 89 | 90 | // ExecutionOptions 91 | 92 | usize_getter!( 93 | Java_org_apache_arrow_datafusion_SessionConfig_getExecutionOptionsBatchSize, 94 | execution.batch_size 95 | ); 96 | usize_setter!( 97 | Java_org_apache_arrow_datafusion_SessionConfig_setExecutionOptionsBatchSize, 98 | execution.batch_size 99 | ); 100 | 101 | bool_getter!( 102 | Java_org_apache_arrow_datafusion_SessionConfig_getExecutionOptionsCoalesceBatches, 103 | execution.coalesce_batches 104 | ); 105 | bool_setter!( 106 | Java_org_apache_arrow_datafusion_SessionConfig_setExecutionOptionsCoalesceBatches, 107 | execution.coalesce_batches 108 | ); 109 | 110 | bool_getter!( 111 | Java_org_apache_arrow_datafusion_SessionConfig_getExecutionOptionsCollectStatistics, 112 | execution.collect_statistics 113 | ); 114 | bool_setter!( 115 | Java_org_apache_arrow_datafusion_SessionConfig_setExecutionOptionsCollectStatistics, 116 | execution.collect_statistics 117 | ); 118 | 119 | usize_getter!( 120 | Java_org_apache_arrow_datafusion_SessionConfig_getExecutionOptionsTargetPartitions, 121 | execution.target_partitions 122 | ); 123 | usize_setter!( 124 | Java_org_apache_arrow_datafusion_SessionConfig_setExecutionOptionsTargetPartitions, 125 | execution.target_partitions 126 | ); 127 | 128 | // ParquetOptions 129 | 130 | bool_getter!( 131 | Java_org_apache_arrow_datafusion_SessionConfig_getParquetOptionsEnablePageIndex, 132 | execution.parquet.enable_page_index 133 | ); 134 | bool_setter!( 135 | Java_org_apache_arrow_datafusion_SessionConfig_setParquetOptionsEnablePageIndex, 136 | execution.parquet.enable_page_index 137 | ); 138 | 139 | bool_getter!( 140 | Java_org_apache_arrow_datafusion_SessionConfig_getParquetOptionsPruning, 141 | execution.parquet.pruning 142 | ); 143 | bool_setter!( 144 | Java_org_apache_arrow_datafusion_SessionConfig_setParquetOptionsPruning, 145 | execution.parquet.pruning 146 | ); 147 | 148 | bool_getter!( 149 | Java_org_apache_arrow_datafusion_SessionConfig_getParquetOptionsSkipMetadata, 150 | execution.parquet.skip_metadata 151 | ); 152 | bool_setter!( 153 | Java_org_apache_arrow_datafusion_SessionConfig_setParquetOptionsSkipMetadata, 154 | execution.parquet.skip_metadata 155 | ); 156 | 157 | bool_getter!( 158 | Java_org_apache_arrow_datafusion_SessionConfig_getParquetOptionsPushdownFilters, 159 | execution.parquet.pushdown_filters 160 | ); 161 | bool_setter!( 162 | Java_org_apache_arrow_datafusion_SessionConfig_setParquetOptionsPushdownFilters, 163 | execution.parquet.pushdown_filters 164 | ); 165 | 166 | bool_getter!( 167 | Java_org_apache_arrow_datafusion_SessionConfig_getParquetOptionsReorderFilters, 168 | execution.parquet.reorder_filters 169 | ); 170 | bool_setter!( 171 | Java_org_apache_arrow_datafusion_SessionConfig_setParquetOptionsReorderFilters, 172 | execution.parquet.reorder_filters 173 | ); 174 | 175 | #[no_mangle] 176 | pub extern "system" fn Java_org_apache_arrow_datafusion_SessionConfig_getParquetOptionsMetadataSizeHint( 177 | _env: JNIEnv, 178 | _class: JClass, 179 | pointer: jlong, 180 | ) -> jlong { 181 | let config = unsafe { &*(pointer as *const SessionConfig) }; 182 | let size_hint = config.options().execution.parquet.metadata_size_hint; 183 | match size_hint { 184 | Some(size_hint) => size_hint as jlong, 185 | None => -1 as jlong, 186 | } 187 | } 188 | 189 | #[no_mangle] 190 | pub extern "system" fn Java_org_apache_arrow_datafusion_SessionConfig_setParquetOptionsMetadataSizeHint( 191 | _env: JNIEnv, 192 | _class: JClass, 193 | pointer: jlong, 194 | value: jlong, 195 | ) { 196 | let config = unsafe { &mut *(pointer as *mut SessionConfig) }; 197 | if value >= 0 { 198 | config.options_mut().execution.parquet.metadata_size_hint = Some(value as usize); 199 | } else { 200 | config.options_mut().execution.parquet.metadata_size_hint = None; 201 | } 202 | } 203 | 204 | // SqlParserOptions 205 | 206 | bool_getter!( 207 | Java_org_apache_arrow_datafusion_SessionConfig_getSqlParserOptionsParseFloatAsDecimal, 208 | sql_parser.parse_float_as_decimal 209 | ); 210 | bool_setter!( 211 | Java_org_apache_arrow_datafusion_SessionConfig_setSqlParserOptionsParseFloatAsDecimal, 212 | sql_parser.parse_float_as_decimal 213 | ); 214 | 215 | bool_getter!( 216 | Java_org_apache_arrow_datafusion_SessionConfig_getSqlParserOptionsEnableIdentNormalization, 217 | sql_parser.enable_ident_normalization 218 | ); 219 | bool_setter!( 220 | Java_org_apache_arrow_datafusion_SessionConfig_setSqlParserOptionsEnableIdentNormalization, 221 | sql_parser.enable_ident_normalization 222 | ); 223 | 224 | #[no_mangle] 225 | pub extern "system" fn Java_org_apache_arrow_datafusion_SessionConfig_getSqlParserOptionsDialect< 226 | 'local, 227 | >( 228 | env: JNIEnv<'local>, 229 | _class: JClass<'local>, 230 | pointer: jlong, 231 | ) -> JString<'local> { 232 | let config = unsafe { &*(pointer as *const SessionConfig) }; 233 | let dialect = &config.options().sql_parser.dialect; 234 | env.new_string(dialect) 235 | .expect("Couldn't create Java string") 236 | } 237 | 238 | #[no_mangle] 239 | pub extern "system" fn Java_org_apache_arrow_datafusion_SessionConfig_setSqlParserOptionsDialect( 240 | mut env: JNIEnv, 241 | _class: JClass, 242 | pointer: jlong, 243 | dialect: JString, 244 | ) { 245 | let config = unsafe { &mut *(pointer as *mut SessionConfig) }; 246 | let dialect: String = env 247 | .get_string(&dialect) 248 | .expect("Couldn't get dialect string") 249 | .into(); 250 | config.options_mut().sql_parser.dialect = dialect; 251 | } 252 | -------------------------------------------------------------------------------- /datafusion-jni/src/stream.rs: -------------------------------------------------------------------------------- 1 | use arrow::array::Array; 2 | use arrow::array::StructArray; 3 | use arrow::ffi::{FFI_ArrowArray, FFI_ArrowSchema}; 4 | use datafusion::physical_plan::SendableRecordBatchStream; 5 | use futures::stream::TryStreamExt; 6 | use jni::objects::{JClass, JObject}; 7 | use jni::sys::jlong; 8 | use jni::JNIEnv; 9 | use std::convert::Into; 10 | use std::ptr::addr_of_mut; 11 | use tokio::runtime::Runtime; 12 | 13 | use crate::util::{set_object_result_error, set_object_result_ok}; 14 | 15 | #[no_mangle] 16 | pub extern "system" fn Java_org_apache_arrow_datafusion_DefaultRecordBatchStream_next( 17 | mut env: JNIEnv, 18 | _class: JClass, 19 | runtime: jlong, 20 | stream: jlong, 21 | callback: JObject, 22 | ) { 23 | let runtime = unsafe { &mut *(runtime as *mut Runtime) }; 24 | let stream = unsafe { &mut *(stream as *mut SendableRecordBatchStream) }; 25 | runtime.block_on(async { 26 | let next = stream.try_next().await; 27 | match next { 28 | Ok(Some(batch)) => { 29 | // Convert to struct array for compatibility with FFI 30 | let struct_array: StructArray = batch.into(); 31 | let array_data = struct_array.into_data(); 32 | let mut ffi_array = FFI_ArrowArray::new(&array_data); 33 | // ffi_array must remain alive until after the callback is called 34 | set_object_result_ok(&mut env, callback, addr_of_mut!(ffi_array)); 35 | } 36 | Ok(None) => { 37 | set_object_result_ok(&mut env, callback, 0 as *mut FFI_ArrowSchema); 38 | } 39 | Err(err) => { 40 | set_object_result_error(&mut env, callback, &err); 41 | } 42 | } 43 | }); 44 | } 45 | 46 | #[no_mangle] 47 | pub extern "system" fn Java_org_apache_arrow_datafusion_DefaultRecordBatchStream_getSchema( 48 | mut env: JNIEnv, 49 | _class: JClass, 50 | stream: jlong, 51 | callback: JObject, 52 | ) { 53 | let stream = unsafe { &mut *(stream as *mut SendableRecordBatchStream) }; 54 | let schema = stream.schema(); 55 | let ffi_schema = FFI_ArrowSchema::try_from(&*schema); 56 | match ffi_schema { 57 | Ok(mut ffi_schema) => { 58 | // ffi_schema must remain alive until after the callback is called 59 | set_object_result_ok(&mut env, callback, addr_of_mut!(ffi_schema)); 60 | } 61 | Err(err) => { 62 | set_object_result_error(&mut env, callback, &err); 63 | } 64 | } 65 | } 66 | 67 | #[no_mangle] 68 | pub extern "system" fn Java_org_apache_arrow_datafusion_DefaultRecordBatchStream_destroy( 69 | _env: JNIEnv, 70 | _class: JClass, 71 | pointer: jlong, 72 | ) { 73 | let _ = unsafe { Box::from_raw(pointer as *mut SendableRecordBatchStream) }; 74 | } 75 | -------------------------------------------------------------------------------- /datafusion-jni/src/table_provider.rs: -------------------------------------------------------------------------------- 1 | use datafusion::datasource::TableProvider; 2 | use jni::objects::JClass; 3 | use jni::sys::jlong; 4 | use jni::JNIEnv; 5 | use std::sync::Arc; 6 | 7 | #[no_mangle] 8 | pub extern "system" fn Java_org_apache_arrow_datafusion_TableProviders_destroyTableProvider( 9 | _env: JNIEnv, 10 | _class: JClass, 11 | pointer: jlong, 12 | ) { 13 | let _ = unsafe { Box::from_raw(pointer as *mut Arc) }; 14 | } 15 | -------------------------------------------------------------------------------- /datafusion-jni/src/util.rs: -------------------------------------------------------------------------------- 1 | use std::error::Error; 2 | 3 | use jni::objects::JObject; 4 | use jni::sys::jlong; 5 | use jni::JNIEnv; 6 | 7 | /// Set error message from a result using a Consumer Java callback 8 | pub fn set_error_message(env: &mut JNIEnv, callback: JObject, result: Result<(), Err>) { 9 | match result { 10 | Ok(_) => { 11 | let err_message = JObject::null(); 12 | env.call_method( 13 | callback, 14 | "accept", 15 | "(Ljava/lang/Object;)V", 16 | &[(&err_message).into()], 17 | ) 18 | .expect("Failed to call error handler with null message"); 19 | } 20 | Err(err) => { 21 | let err_message = env 22 | .new_string(err.to_string()) 23 | .expect("Couldn't create java string for error message"); 24 | env.call_method( 25 | callback, 26 | "accept", 27 | "(Ljava/lang/Object;)V", 28 | &[(&err_message).into()], 29 | ) 30 | .expect("Failed to call error handler with error message"); 31 | } 32 | }; 33 | } 34 | 35 | /// Call an ObjectResultCallback to return either a pointer to a newly created object or an error message 36 | pub fn set_object_result( 37 | env: &mut JNIEnv, 38 | callback: JObject, 39 | address: Result<*mut T, Err>, 40 | ) { 41 | match address { 42 | Ok(address) => set_object_result_ok(env, callback, address), 43 | Err(err) => set_object_result_error(env, callback, &err), 44 | }; 45 | } 46 | 47 | /// Set success result by calling an ObjectResultCallback 48 | pub fn set_object_result_ok(env: &mut JNIEnv, callback: JObject, address: *mut T) { 49 | let err_message = JObject::null(); 50 | env.call_method( 51 | callback, 52 | "callback", 53 | "(Ljava/lang/String;J)V", 54 | &[(&err_message).into(), (address as jlong).into()], 55 | ) 56 | .expect("Failed to call object result callback with address"); 57 | } 58 | 59 | /// Set error result by calling an ObjectResultCallback 60 | pub fn set_object_result_error(env: &mut JNIEnv, callback: JObject, error: &T) { 61 | let err_message = env 62 | .new_string(error.to_string()) 63 | .expect("Couldn't create java string for error message"); 64 | let address = -1 as jlong; 65 | env.call_method( 66 | callback, 67 | "callback", 68 | "(Ljava/lang/String;J)V", 69 | &[(&err_message).into(), address.into()], 70 | ) 71 | .expect("Failed to call object result callback with error"); 72 | } 73 | -------------------------------------------------------------------------------- /gradle.properties: -------------------------------------------------------------------------------- 1 | org.gradle.jvmargs=--add-exports jdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED \ 2 | --add-exports jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED \ 3 | --add-exports jdk.compiler/com.sun.tools.javac.parser=ALL-UNNAMED \ 4 | --add-exports jdk.compiler/com.sun.tools.javac.tree=ALL-UNNAMED \ 5 | --add-exports jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED 6 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datafusion-contrib/datafusion-java/c5dee4178ff9187de4cc7f5eb2e9ddd471223444/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | distributionUrl=https\://services.gradle.org/distributions/gradle-8.7-bin.zip 4 | networkTimeout=10000 5 | validateDistributionUrl=true 6 | zipStoreBase=GRADLE_USER_HOME 7 | zipStorePath=wrapper/dists 8 | -------------------------------------------------------------------------------- /gradlew: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # 4 | # Copyright © 2015-2021 the original authors. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # https://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | ############################################################################## 20 | # 21 | # Gradle start up script for POSIX generated by Gradle. 22 | # 23 | # Important for running: 24 | # 25 | # (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is 26 | # noncompliant, but you have some other compliant shell such as ksh or 27 | # bash, then to run this script, type that shell name before the whole 28 | # command line, like: 29 | # 30 | # ksh Gradle 31 | # 32 | # Busybox and similar reduced shells will NOT work, because this script 33 | # requires all of these POSIX shell features: 34 | # * functions; 35 | # * expansions «$var», «${var}», «${var:-default}», «${var+SET}», 36 | # «${var#prefix}», «${var%suffix}», and «$( cmd )»; 37 | # * compound commands having a testable exit status, especially «case»; 38 | # * various built-in commands including «command», «set», and «ulimit». 39 | # 40 | # Important for patching: 41 | # 42 | # (2) This script targets any POSIX shell, so it avoids extensions provided 43 | # by Bash, Ksh, etc; in particular arrays are avoided. 44 | # 45 | # The "traditional" practice of packing multiple parameters into a 46 | # space-separated string is a well documented source of bugs and security 47 | # problems, so this is (mostly) avoided, by progressively accumulating 48 | # options in "$@", and eventually passing that to Java. 49 | # 50 | # Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS, 51 | # and GRADLE_OPTS) rely on word-splitting, this is performed explicitly; 52 | # see the in-line comments for details. 53 | # 54 | # There are tweaks for specific operating systems such as AIX, CygWin, 55 | # Darwin, MinGW, and NonStop. 56 | # 57 | # (3) This script is generated from the Groovy template 58 | # https://github.com/gradle/gradle/blob/HEAD/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt 59 | # within the Gradle project. 60 | # 61 | # You can find Gradle at https://github.com/gradle/gradle/. 62 | # 63 | ############################################################################## 64 | 65 | # Attempt to set APP_HOME 66 | 67 | # Resolve links: $0 may be a link 68 | app_path=$0 69 | 70 | # Need this for daisy-chained symlinks. 71 | while 72 | APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path 73 | [ -h "$app_path" ] 74 | do 75 | ls=$( ls -ld "$app_path" ) 76 | link=${ls#*' -> '} 77 | case $link in #( 78 | /*) app_path=$link ;; #( 79 | *) app_path=$APP_HOME$link ;; 80 | esac 81 | done 82 | 83 | # This is normally unused 84 | # shellcheck disable=SC2034 85 | APP_BASE_NAME=${0##*/} 86 | # Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036) 87 | APP_HOME=$( cd "${APP_HOME:-./}" > /dev/null && pwd -P ) || exit 88 | 89 | # Use the maximum available, or set MAX_FD != -1 to use that value. 90 | MAX_FD=maximum 91 | 92 | warn () { 93 | echo "$*" 94 | } >&2 95 | 96 | die () { 97 | echo 98 | echo "$*" 99 | echo 100 | exit 1 101 | } >&2 102 | 103 | # OS specific support (must be 'true' or 'false'). 104 | cygwin=false 105 | msys=false 106 | darwin=false 107 | nonstop=false 108 | case "$( uname )" in #( 109 | CYGWIN* ) cygwin=true ;; #( 110 | Darwin* ) darwin=true ;; #( 111 | MSYS* | MINGW* ) msys=true ;; #( 112 | NONSTOP* ) nonstop=true ;; 113 | esac 114 | 115 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar 116 | 117 | 118 | # Determine the Java command to use to start the JVM. 119 | if [ -n "$JAVA_HOME" ] ; then 120 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 121 | # IBM's JDK on AIX uses strange locations for the executables 122 | JAVACMD=$JAVA_HOME/jre/sh/java 123 | else 124 | JAVACMD=$JAVA_HOME/bin/java 125 | fi 126 | if [ ! -x "$JAVACMD" ] ; then 127 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME 128 | 129 | Please set the JAVA_HOME variable in your environment to match the 130 | location of your Java installation." 131 | fi 132 | else 133 | JAVACMD=java 134 | if ! command -v java >/dev/null 2>&1 135 | then 136 | die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 137 | 138 | Please set the JAVA_HOME variable in your environment to match the 139 | location of your Java installation." 140 | fi 141 | fi 142 | 143 | # Increase the maximum file descriptors if we can. 144 | if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then 145 | case $MAX_FD in #( 146 | max*) 147 | # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked. 148 | # shellcheck disable=SC2039,SC3045 149 | MAX_FD=$( ulimit -H -n ) || 150 | warn "Could not query maximum file descriptor limit" 151 | esac 152 | case $MAX_FD in #( 153 | '' | soft) :;; #( 154 | *) 155 | # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked. 156 | # shellcheck disable=SC2039,SC3045 157 | ulimit -n "$MAX_FD" || 158 | warn "Could not set maximum file descriptor limit to $MAX_FD" 159 | esac 160 | fi 161 | 162 | # Collect all arguments for the java command, stacking in reverse order: 163 | # * args from the command line 164 | # * the main class name 165 | # * -classpath 166 | # * -D...appname settings 167 | # * --module-path (only if needed) 168 | # * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables. 169 | 170 | # For Cygwin or MSYS, switch paths to Windows format before running java 171 | if "$cygwin" || "$msys" ; then 172 | APP_HOME=$( cygpath --path --mixed "$APP_HOME" ) 173 | CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" ) 174 | 175 | JAVACMD=$( cygpath --unix "$JAVACMD" ) 176 | 177 | # Now convert the arguments - kludge to limit ourselves to /bin/sh 178 | for arg do 179 | if 180 | case $arg in #( 181 | -*) false ;; # don't mess with options #( 182 | /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath 183 | [ -e "$t" ] ;; #( 184 | *) false ;; 185 | esac 186 | then 187 | arg=$( cygpath --path --ignore --mixed "$arg" ) 188 | fi 189 | # Roll the args list around exactly as many times as the number of 190 | # args, so each arg winds up back in the position where it started, but 191 | # possibly modified. 192 | # 193 | # NB: a `for` loop captures its iteration list before it begins, so 194 | # changing the positional parameters here affects neither the number of 195 | # iterations, nor the values presented in `arg`. 196 | shift # remove old arg 197 | set -- "$@" "$arg" # push replacement arg 198 | done 199 | fi 200 | 201 | 202 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 203 | DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' 204 | 205 | # Collect all arguments for the java command: 206 | # * DEFAULT_JVM_OPTS, JAVA_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments, 207 | # and any embedded shellness will be escaped. 208 | # * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be 209 | # treated as '${Hostname}' itself on the command line. 210 | 211 | set -- \ 212 | "-Dorg.gradle.appname=$APP_BASE_NAME" \ 213 | -classpath "$CLASSPATH" \ 214 | org.gradle.wrapper.GradleWrapperMain \ 215 | "$@" 216 | 217 | # Stop when "xargs" is not available. 218 | if ! command -v xargs >/dev/null 2>&1 219 | then 220 | die "xargs is not available" 221 | fi 222 | 223 | # Use "xargs" to parse quoted args. 224 | # 225 | # With -n1 it outputs one arg per line, with the quotes and backslashes removed. 226 | # 227 | # In Bash we could simply go: 228 | # 229 | # readarray ARGS < <( xargs -n1 <<<"$var" ) && 230 | # set -- "${ARGS[@]}" "$@" 231 | # 232 | # but POSIX shell has neither arrays nor command substitution, so instead we 233 | # post-process each arg (as a line of input to sed) to backslash-escape any 234 | # character that might be a shell metacharacter, then use eval to reverse 235 | # that process (while maintaining the separation between arguments), and wrap 236 | # the whole thing up as a single "set" statement. 237 | # 238 | # This will of course break if any of these variables contains a newline or 239 | # an unmatched quote. 240 | # 241 | 242 | eval "set -- $( 243 | printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" | 244 | xargs -n1 | 245 | sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' | 246 | tr '\n' ' ' 247 | )" '"$@"' 248 | 249 | exec "$JAVACMD" "$@" 250 | -------------------------------------------------------------------------------- /gradlew.bat: -------------------------------------------------------------------------------- 1 | @rem 2 | @rem Copyright 2015 the original author or authors. 3 | @rem 4 | @rem Licensed under the Apache License, Version 2.0 (the "License"); 5 | @rem you may not use this file except in compliance with the License. 6 | @rem You may obtain a copy of the License at 7 | @rem 8 | @rem https://www.apache.org/licenses/LICENSE-2.0 9 | @rem 10 | @rem Unless required by applicable law or agreed to in writing, software 11 | @rem distributed under the License is distributed on an "AS IS" BASIS, 12 | @rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | @rem See the License for the specific language governing permissions and 14 | @rem limitations under the License. 15 | @rem 16 | 17 | @if "%DEBUG%"=="" @echo off 18 | @rem ########################################################################## 19 | @rem 20 | @rem Gradle startup script for Windows 21 | @rem 22 | @rem ########################################################################## 23 | 24 | @rem Set local scope for the variables with windows NT shell 25 | if "%OS%"=="Windows_NT" setlocal 26 | 27 | set DIRNAME=%~dp0 28 | if "%DIRNAME%"=="" set DIRNAME=. 29 | @rem This is normally unused 30 | set APP_BASE_NAME=%~n0 31 | set APP_HOME=%DIRNAME% 32 | 33 | @rem Resolve any "." and ".." in APP_HOME to make it shorter. 34 | for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi 35 | 36 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 37 | set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" 38 | 39 | @rem Find java.exe 40 | if defined JAVA_HOME goto findJavaFromJavaHome 41 | 42 | set JAVA_EXE=java.exe 43 | %JAVA_EXE% -version >NUL 2>&1 44 | if %ERRORLEVEL% equ 0 goto execute 45 | 46 | echo. 1>&2 47 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 1>&2 48 | echo. 1>&2 49 | echo Please set the JAVA_HOME variable in your environment to match the 1>&2 50 | echo location of your Java installation. 1>&2 51 | 52 | goto fail 53 | 54 | :findJavaFromJavaHome 55 | set JAVA_HOME=%JAVA_HOME:"=% 56 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 57 | 58 | if exist "%JAVA_EXE%" goto execute 59 | 60 | echo. 1>&2 61 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 1>&2 62 | echo. 1>&2 63 | echo Please set the JAVA_HOME variable in your environment to match the 1>&2 64 | echo location of your Java installation. 1>&2 65 | 66 | goto fail 67 | 68 | :execute 69 | @rem Setup the command line 70 | 71 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 72 | 73 | 74 | @rem Execute Gradle 75 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* 76 | 77 | :end 78 | @rem End local scope for the variables with windows NT shell 79 | if %ERRORLEVEL% equ 0 goto mainEnd 80 | 81 | :fail 82 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 83 | rem the _cmd.exe /c_ return code! 84 | set EXIT_CODE=%ERRORLEVEL% 85 | if %EXIT_CODE% equ 0 set EXIT_CODE=1 86 | if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE% 87 | exit /b %EXIT_CODE% 88 | 89 | :mainEnd 90 | if "%OS%"=="Windows_NT" endlocal 91 | 92 | :omega 93 | -------------------------------------------------------------------------------- /settings.gradle: -------------------------------------------------------------------------------- 1 | // https://docs.gradle.org/current/userguide/toolchains.html#sub:download_repositories 2 | plugins { 3 | id 'org.gradle.toolchains.foojay-resolver-convention' version '0.8.0' 4 | } 5 | 6 | rootProject.name = 'datafusion-java' 7 | 8 | include 'datafusion-java', 'datafusion-examples' 9 | --------------------------------------------------------------------------------