├── .editorconfig ├── .github └── workflows │ └── maven-build.yml ├── .gitignore ├── .mvn ├── extensions.xml └── wrapper │ ├── MavenWrapperDownloader.java │ └── maven-wrapper.properties ├── .travis.yml ├── AUTHORS ├── LICENSE ├── NOTICE ├── README.md ├── build_jar.sh ├── mvnw ├── mvnw.cmd ├── pom.xml └── src ├── main ├── java │ └── org │ │ └── apache │ │ └── arrow │ │ └── flight │ │ └── spark │ │ ├── DefaultSource.java │ │ ├── FlightArrowColumnVector.java │ │ ├── FlightClientFactory.java │ │ ├── FlightClientMiddlewareFactory.java │ │ ├── FlightClientOptions.java │ │ ├── FlightColumnarPartitionReader.java │ │ ├── FlightEndpointWrapper.java │ │ ├── FlightPartition.java │ │ ├── FlightPartitionReader.java │ │ ├── FlightPartitionReaderFactory.java │ │ ├── FlightScan.java │ │ ├── FlightScanBuilder.java │ │ ├── FlightSparkContext.java │ │ ├── FlightTable.java │ │ ├── TokenClientMiddleware.java │ │ └── TokenClientMiddlewareFactory.java └── scala │ └── org │ └── apache │ └── spark │ └── sql │ └── execution │ └── arrow │ └── FlightArrowUtils.scala └── test ├── java └── org │ └── apache │ └── arrow │ └── flight │ └── spark │ └── TestConnector.java └── resources └── logback-test.xml /.editorconfig: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2019 The flight-spark-source Authors 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | 18 | root = true 19 | 20 | [*] 21 | end_of_line = lf 22 | insert_final_newline = true 23 | indent_size = 2 24 | indent_style = space 25 | 26 | [*.js] 27 | trim_trailing_whitespace = true 28 | -------------------------------------------------------------------------------- /.github/workflows/maven-build.yml: -------------------------------------------------------------------------------- 1 | # For more information see: https://help.github.com/actions/language-and-framework-guides/building-and-testing-java-with-maven 2 | 3 | name: Java CI with Maven 4 | 5 | on: 6 | workflow_dispatch: 7 | push: 8 | branches: [ "master" ] 9 | pull_request: 10 | branches: [ "master" ] 11 | release: 12 | types: 13 | - published 14 | 15 | jobs: 16 | build: 17 | runs-on: ubuntu-latest 18 | steps: 19 | - uses: actions/checkout@v3 20 | - name: Set up JDK 11 21 | uses: actions/setup-java@v3 22 | with: 23 | java-version: '11' 24 | distribution: 'temurin' 25 | cache: maven 26 | - name: Build with Maven 27 | run: mvn -B -V verify -Dmaven.javadoc.skip=true -Dlicense.skip=true --file pom.xml 28 | - name: Archive JAR artifact 29 | uses: actions/upload-artifact@v3 30 | with: 31 | name: flight-spark-source-shaded-jar 32 | path: | 33 | target/flight-spark-source-1.0-SNAPSHOT-shaded.jar 34 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .buildpath 2 | .checkstyle 3 | .classpath 4 | .idea/ 5 | .vscode/ 6 | .project 7 | .mvn/wrapper/maven-wrapper.jar 8 | .profiler 9 | .settings/ 10 | *~ 11 | *.log 12 | *.lck 13 | *.iml 14 | *.patch 15 | *.DS_Store 16 | log.path_IS_UNDEFINED 17 | target/ 18 | node_modules/ 19 | node/ 20 | reports/ 21 | git.properties 22 | TAGS 23 | .DS_STORE 24 | contrib/native/client/build/ 25 | contrib/native/client/build/* 26 | CMakeCache.txt 27 | CMakeFiles 28 | Makefile 29 | cmake_install.cmake 30 | install_manifest.txt 31 | dependency-reduced-pom.xml 32 | user.webpack.config.json 33 | .eslintcache 34 | plugins/sample-data/dremio_hive_db/* 35 | plugins/hive/metastore_db/* 36 | .npmrc 37 | .externalToolBuilders* 38 | .vscode 39 | .indexing 40 | -------------------------------------------------------------------------------- /.mvn/extensions.xml: -------------------------------------------------------------------------------- 1 | 2 | 19 | 20 | 21 | fr.jcgay.maven 22 | maven-profiler 23 | 2.6 24 | 25 | 26 | fr.jcgay.maven 27 | maven-notifier 28 | 1.10.1 29 | 30 | 31 | -------------------------------------------------------------------------------- /.mvn/wrapper/MavenWrapperDownloader.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2019 The flight-spark-source Authors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | import java.net.*; 17 | import java.io.*; 18 | import java.nio.channels.*; 19 | import java.util.Properties; 20 | 21 | public class MavenWrapperDownloader { 22 | 23 | private static final String WRAPPER_VERSION = "0.5.6"; 24 | /** 25 | * Default URL to download the maven-wrapper.jar from, if no 'downloadUrl' is provided. 26 | */ 27 | private static final String DEFAULT_DOWNLOAD_URL = "https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/" 28 | + WRAPPER_VERSION + "/maven-wrapper-" + WRAPPER_VERSION + ".jar"; 29 | 30 | /** 31 | * Path to the maven-wrapper.properties file, which might contain a downloadUrl property to 32 | * use instead of the default one. 33 | */ 34 | private static final String MAVEN_WRAPPER_PROPERTIES_PATH = 35 | ".mvn/wrapper/maven-wrapper.properties"; 36 | 37 | /** 38 | * Path where the maven-wrapper.jar will be saved to. 39 | */ 40 | private static final String MAVEN_WRAPPER_JAR_PATH = 41 | ".mvn/wrapper/maven-wrapper.jar"; 42 | 43 | /** 44 | * Name of the property which should be used to override the default download url for the wrapper. 45 | */ 46 | private static final String PROPERTY_NAME_WRAPPER_URL = "wrapperUrl"; 47 | 48 | public static void main(String args[]) { 49 | System.out.println("- Downloader started"); 50 | File baseDirectory = new File(args[0]); 51 | System.out.println("- Using base directory: " + baseDirectory.getAbsolutePath()); 52 | 53 | // If the maven-wrapper.properties exists, read it and check if it contains a custom 54 | // wrapperUrl parameter. 55 | File mavenWrapperPropertyFile = new File(baseDirectory, MAVEN_WRAPPER_PROPERTIES_PATH); 56 | String url = DEFAULT_DOWNLOAD_URL; 57 | if(mavenWrapperPropertyFile.exists()) { 58 | FileInputStream mavenWrapperPropertyFileInputStream = null; 59 | try { 60 | mavenWrapperPropertyFileInputStream = new FileInputStream(mavenWrapperPropertyFile); 61 | Properties mavenWrapperProperties = new Properties(); 62 | mavenWrapperProperties.load(mavenWrapperPropertyFileInputStream); 63 | url = mavenWrapperProperties.getProperty(PROPERTY_NAME_WRAPPER_URL, url); 64 | } catch (IOException e) { 65 | System.out.println("- ERROR loading '" + MAVEN_WRAPPER_PROPERTIES_PATH + "'"); 66 | } finally { 67 | try { 68 | if(mavenWrapperPropertyFileInputStream != null) { 69 | mavenWrapperPropertyFileInputStream.close(); 70 | } 71 | } catch (IOException e) { 72 | // Ignore ... 73 | } 74 | } 75 | } 76 | System.out.println("- Downloading from: " + url); 77 | 78 | File outputFile = new File(baseDirectory.getAbsolutePath(), MAVEN_WRAPPER_JAR_PATH); 79 | if(!outputFile.getParentFile().exists()) { 80 | if(!outputFile.getParentFile().mkdirs()) { 81 | System.out.println( 82 | "- ERROR creating output directory '" + outputFile.getParentFile().getAbsolutePath() + "'"); 83 | } 84 | } 85 | System.out.println("- Downloading to: " + outputFile.getAbsolutePath()); 86 | try { 87 | downloadFileFromURL(url, outputFile); 88 | System.out.println("Done"); 89 | System.exit(0); 90 | } catch (Throwable e) { 91 | System.out.println("- Error downloading"); 92 | e.printStackTrace(); 93 | System.exit(1); 94 | } 95 | } 96 | 97 | private static void downloadFileFromURL(String urlString, File destination) throws Exception { 98 | if (System.getenv("MVNW_USERNAME") != null && System.getenv("MVNW_PASSWORD") != null) { 99 | String username = System.getenv("MVNW_USERNAME"); 100 | char[] password = System.getenv("MVNW_PASSWORD").toCharArray(); 101 | Authenticator.setDefault(new Authenticator() { 102 | @Override 103 | protected PasswordAuthentication getPasswordAuthentication() { 104 | return new PasswordAuthentication(username, password); 105 | } 106 | }); 107 | } 108 | URL website = new URL(urlString); 109 | ReadableByteChannel rbc; 110 | rbc = Channels.newChannel(website.openStream()); 111 | FileOutputStream fos = new FileOutputStream(destination); 112 | fos.getChannel().transferFrom(rbc, 0, Long.MAX_VALUE); 113 | fos.close(); 114 | rbc.close(); 115 | } 116 | 117 | } 118 | -------------------------------------------------------------------------------- /.mvn/wrapper/maven-wrapper.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2019 The flight-spark-source Authors 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.6.3/apache-maven-3.6.3-bin.zip 18 | wrapperUrl=https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar 19 | 20 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | dist: focal 2 | language: java 3 | jdk: openjdk11 4 | cache: 5 | directories: 6 | - $HOME/.m2 7 | install: mvn install -DskipTests=true -Dmaven.javadoc.skip=true -B -V 8 | script: mvn test -B 9 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | # This is the list of flight-spark-source's significant contributors. 2 | # 3 | # This does not necessarily list everyone who has contributed code, 4 | # especially since many employees of one corporation may be contributing. 5 | # To see the full list of contributors, see the revision history in 6 | # source control. 7 | Ryan Murray 8 | Kyle Brooks 9 | Doron Chen -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright 2017 - Dremio Corporation 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Dremio 2 | Copyright 2015-2017 Dremio Corporation 3 | 4 | This product includes software developed at 5 | The Apache Software Foundation (http://www.apache.org/). 6 | 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Spark source for Flight RPC enabled endpoints 2 | ========================================= 3 | 4 | [](https://github.com/rymurr/flight-spark-source) 5 | [![Build Status](https://github.com/rymurr/flight-spark-source/actions/workflows/maven-build.yml/badge.svg)](https://github.com/rymurr/flight-spark-source/actions/workflows/maven-build.yml) 6 | 7 | This uses the new [Source V2 Interface](https://databricks.com/session/apache-spark-data-source-v2) to connect to 8 | [Apache Arrow Flight](https://www.dremio.com/understanding-apache-arrow-flight/) endpoints. It is a prototype of what is 9 | possible with Arrow Flight. The prototype has achieved 50x speed up compared to serial jdbc driver and scales with the 10 | number of Flight endpoints/spark executors being run in parallel. 11 | 12 | It currently supports: 13 | 14 | * Columnar Batch reading 15 | * Reading in parallel many flight endpoints as Spark partitions 16 | * filter and project pushdown 17 | 18 | It currently lacks: 19 | 20 | * support for all Spark/Arrow data types and filters 21 | * write interface to use `DoPut` to write Spark dataframes back to an Arrow Flight endpoint 22 | * leverage the transactional capabilities of the Spark Source V2 interface 23 | * publish benchmark test 24 | 25 | ## Usage 26 | You can choose to build the JAR locally, or use one of the archived JAR artifacts built from a [Github Actions workflow run](https://github.com/rymurr/flight-spark-source/actions/workflows/maven-build.yml). 27 | 28 | 1. Take the built JAR file named: `flight-spark-source-1.0-SNAPSHOT-shaded.jar` - and copy it to the spark master node. For the sake of this example, we will use the `/tmp` directory 29 | 2. Ensure you have a Flight server running and accessible to your Spark cluster. For an example of a Python Flight RPC server - see [this link](https://arrow.apache.org/cookbook/py/flight.html#streaming-parquet-storage-service). 30 | NOTE: you will have to add a `get_schema` end-point to that example server for it to work - with signature: 31 | ```def get_schema(self, context, descriptor) -> pyarrow.flight.SchemaResult``` 32 | See this [link](https://arrow.apache.org/docs/python/generated/pyarrow.flight.FlightClient.html#pyarrow.flight.FlightClient.get_schema) for more details. 33 | 3. On the Spark master - start an interactive Python (or PySpark) session and run something like: 34 | ``` 35 | import os 36 | from pyspark.sql import SparkSession 37 | 38 | # Get a Spark session and load the connector JAR 39 | spark = (SparkSession 40 | .builder 41 | .appName("flight client") 42 | .config("spark.jars", "/tmp/flight-spark-source-1.0-SNAPSHOT-shaded.jar") 43 | .getOrCreate() 44 | ) 45 | 46 | # Read from a Flight RPC server using an arbitrary string containing either a command or path 47 | # Note - this will call the Flight RPC Server's "get_schema" end-point (which must be present to use the connector) 48 | df = (spark.read.format('cdap.org.apache.arrow.flight.spark') 49 | .option('uri', 'grpc+tls://flight.example.com:8815') 50 | # ------------------------------------------------------------------- 51 | # Uncomment the following line to trust the server's CA if it self-signed 52 | # .option('trustedCertificates', root_ca) # In this example, root_ca is a str with contents of a PEM-encoded cert 53 | # ------------------------------------------------------------------- 54 | # Uncomment the following 2 lines to use authentication if your Flight RPC server supports Basic Token auth 55 | # .option('username', 'flight_user') 56 | # .option('password', os.environ['FLIGHT_PASSWORD']) # Using an env var containing the password here for better security 57 | # ------------------------------------------------------------------- 58 | # Uncomment the following 2 lines to use MTLS client certificate verification if your Flight RPC server supports it (MTLS client certs MUST be version 3 or above!!!) 59 | # .option('clientCertificate', mtls_cert_chain) # In this example, mtls_cert_chain is a str with contents of a PEM-encoded client cert (signed by the servers verification CA) 60 | # .option('clientKey', mtls_private_key) # In this example, mtls_private_key is a str with content of a PEM-encoded client private key 61 | # ------------------------------------------------------------------- 62 | .load('/some_path_or_command') # A Path or Command supported by the Flight RPC server 63 | ) 64 | 65 | # Pull the data from the Flight RPC Server's end-point(s) to the Spark worker(s) 66 | df.count() 67 | # or 68 | df.show(n=10) 69 | ``` 70 | 71 | ## How to build locally 72 | To build from source locally: 73 | 1. Clone the repo 74 | 2. Make sure you have Java 11 and Maven installed. 75 | 3. Run these steps: 76 | ```shell 77 | cd flight-spark-source 78 | ./build_jar.sh 79 | ``` 80 | 81 | The target JAR will be present in sub-directory: `target` - with filename: `flight-spark-source-1.0-SNAPSHOT-shaded.jar`. 82 | -------------------------------------------------------------------------------- /build_jar.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | mvn clean package 4 | -------------------------------------------------------------------------------- /mvnw: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # Copyright (C) 2019 The flight-spark-source Authors 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # ---------------------------------------------------------------------------- 19 | # Maven Start Up Batch script 20 | # 21 | # Required ENV vars: 22 | # ------------------ 23 | # JAVA_HOME - location of a JDK home dir 24 | # 25 | # Optional ENV vars 26 | # ----------------- 27 | # M2_HOME - location of maven2's installed home dir 28 | # MAVEN_OPTS - parameters passed to the Java VM when running Maven 29 | # e.g. to debug Maven itself, use 30 | # set MAVEN_OPTS=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000 31 | # MAVEN_SKIP_RC - flag to disable loading of mavenrc files 32 | # ---------------------------------------------------------------------------- 33 | 34 | if [ -z "$MAVEN_SKIP_RC" ] ; then 35 | 36 | if [ -f /etc/mavenrc ] ; then 37 | . /etc/mavenrc 38 | fi 39 | 40 | if [ -f "$HOME/.mavenrc" ] ; then 41 | . "$HOME/.mavenrc" 42 | fi 43 | 44 | fi 45 | 46 | # OS specific support. $var _must_ be set to either true or false. 47 | cygwin=false; 48 | darwin=false; 49 | mingw=false 50 | case "`uname`" in 51 | CYGWIN*) cygwin=true ;; 52 | MINGW*) mingw=true;; 53 | Darwin*) darwin=true 54 | # Use /usr/libexec/java_home if available, otherwise fall back to /Library/Java/Home 55 | # See https://developer.apple.com/library/mac/qa/qa1170/_index.html 56 | if [ -z "$JAVA_HOME" ]; then 57 | if [ -x "/usr/libexec/java_home" ]; then 58 | export JAVA_HOME="`/usr/libexec/java_home`" 59 | else 60 | export JAVA_HOME="/Library/Java/Home" 61 | fi 62 | fi 63 | ;; 64 | esac 65 | 66 | if [ -z "$JAVA_HOME" ] ; then 67 | if [ -r /etc/gentoo-release ] ; then 68 | JAVA_HOME=`java-config --jre-home` 69 | fi 70 | fi 71 | 72 | if [ -z "$M2_HOME" ] ; then 73 | ## resolve links - $0 may be a link to maven's home 74 | PRG="$0" 75 | 76 | # need this for relative symlinks 77 | while [ -h "$PRG" ] ; do 78 | ls=`ls -ld "$PRG"` 79 | link=`expr "$ls" : '.*-> \(.*\)$'` 80 | if expr "$link" : '/.*' > /dev/null; then 81 | PRG="$link" 82 | else 83 | PRG="`dirname "$PRG"`/$link" 84 | fi 85 | done 86 | 87 | saveddir=`pwd` 88 | 89 | M2_HOME=`dirname "$PRG"`/.. 90 | 91 | # make it fully qualified 92 | M2_HOME=`cd "$M2_HOME" && pwd` 93 | 94 | cd "$saveddir" 95 | # echo Using m2 at $M2_HOME 96 | fi 97 | 98 | # For Cygwin, ensure paths are in UNIX format before anything is touched 99 | if $cygwin ; then 100 | [ -n "$M2_HOME" ] && 101 | M2_HOME=`cygpath --unix "$M2_HOME"` 102 | [ -n "$JAVA_HOME" ] && 103 | JAVA_HOME=`cygpath --unix "$JAVA_HOME"` 104 | [ -n "$CLASSPATH" ] && 105 | CLASSPATH=`cygpath --path --unix "$CLASSPATH"` 106 | fi 107 | 108 | # For Mingw, ensure paths are in UNIX format before anything is touched 109 | if $mingw ; then 110 | [ -n "$M2_HOME" ] && 111 | M2_HOME="`(cd "$M2_HOME"; pwd)`" 112 | [ -n "$JAVA_HOME" ] && 113 | JAVA_HOME="`(cd "$JAVA_HOME"; pwd)`" 114 | fi 115 | 116 | if [ -z "$JAVA_HOME" ]; then 117 | javaExecutable="`which javac`" 118 | if [ -n "$javaExecutable" ] && ! [ "`expr \"$javaExecutable\" : '\([^ ]*\)'`" = "no" ]; then 119 | # readlink(1) is not available as standard on Solaris 10. 120 | readLink=`which readlink` 121 | if [ ! `expr "$readLink" : '\([^ ]*\)'` = "no" ]; then 122 | if $darwin ; then 123 | javaHome="`dirname \"$javaExecutable\"`" 124 | javaExecutable="`cd \"$javaHome\" && pwd -P`/javac" 125 | else 126 | javaExecutable="`readlink -f \"$javaExecutable\"`" 127 | fi 128 | javaHome="`dirname \"$javaExecutable\"`" 129 | javaHome=`expr "$javaHome" : '\(.*\)/bin'` 130 | JAVA_HOME="$javaHome" 131 | export JAVA_HOME 132 | fi 133 | fi 134 | fi 135 | 136 | if [ -z "$JAVACMD" ] ; then 137 | if [ -n "$JAVA_HOME" ] ; then 138 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 139 | # IBM's JDK on AIX uses strange locations for the executables 140 | JAVACMD="$JAVA_HOME/jre/sh/java" 141 | else 142 | JAVACMD="$JAVA_HOME/bin/java" 143 | fi 144 | else 145 | JAVACMD="`which java`" 146 | fi 147 | fi 148 | 149 | if [ ! -x "$JAVACMD" ] ; then 150 | echo "Error: JAVA_HOME is not defined correctly." >&2 151 | echo " We cannot execute $JAVACMD" >&2 152 | exit 1 153 | fi 154 | 155 | if [ -z "$JAVA_HOME" ] ; then 156 | echo "Warning: JAVA_HOME environment variable is not set." 157 | fi 158 | 159 | CLASSWORLDS_LAUNCHER=org.codehaus.plexus.classworlds.launcher.Launcher 160 | 161 | # traverses directory structure from process work directory to filesystem root 162 | # first directory with .mvn subdirectory is considered project base directory 163 | find_maven_basedir() { 164 | 165 | if [ -z "$1" ] 166 | then 167 | echo "Path not specified to find_maven_basedir" 168 | return 1 169 | fi 170 | 171 | basedir="$1" 172 | wdir="$1" 173 | while [ "$wdir" != '/' ] ; do 174 | if [ -d "$wdir"/.mvn ] ; then 175 | basedir=$wdir 176 | break 177 | fi 178 | # workaround for JBEAP-8937 (on Solaris 10/Sparc) 179 | if [ -d "${wdir}" ]; then 180 | wdir=`cd "$wdir/.."; pwd` 181 | fi 182 | # end of workaround 183 | done 184 | echo "${basedir}" 185 | } 186 | 187 | # concatenates all lines of a file 188 | concat_lines() { 189 | if [ -f "$1" ]; then 190 | echo "$(tr -s '\n' ' ' < "$1")" 191 | fi 192 | } 193 | 194 | BASE_DIR=`find_maven_basedir "$(pwd)"` 195 | if [ -z "$BASE_DIR" ]; then 196 | exit 1; 197 | fi 198 | 199 | ########################################################################################## 200 | # Extension to allow automatically downloading the maven-wrapper.jar from Maven-central 201 | # This allows using the maven wrapper in projects that prohibit checking in binary data. 202 | ########################################################################################## 203 | if [ -r "$BASE_DIR/.mvn/wrapper/maven-wrapper.jar" ]; then 204 | if [ "$MVNW_VERBOSE" = true ]; then 205 | echo "Found .mvn/wrapper/maven-wrapper.jar" 206 | fi 207 | else 208 | if [ "$MVNW_VERBOSE" = true ]; then 209 | echo "Couldn't find .mvn/wrapper/maven-wrapper.jar, downloading it ..." 210 | fi 211 | if [ -n "$MVNW_REPOURL" ]; then 212 | jarUrl="$MVNW_REPOURL/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar" 213 | else 214 | jarUrl="https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar" 215 | fi 216 | while IFS="=" read key value; do 217 | case "$key" in (wrapperUrl) jarUrl="$value"; break ;; 218 | esac 219 | done < "$BASE_DIR/.mvn/wrapper/maven-wrapper.properties" 220 | if [ "$MVNW_VERBOSE" = true ]; then 221 | echo "Downloading from: $jarUrl" 222 | fi 223 | wrapperJarPath="$BASE_DIR/.mvn/wrapper/maven-wrapper.jar" 224 | if $cygwin; then 225 | wrapperJarPath=`cygpath --path --windows "$wrapperJarPath"` 226 | fi 227 | 228 | if command -v wget > /dev/null; then 229 | if [ "$MVNW_VERBOSE" = true ]; then 230 | echo "Found wget ... using wget" 231 | fi 232 | if [ -z "$MVNW_USERNAME" ] || [ -z "$MVNW_PASSWORD" ]; then 233 | wget "$jarUrl" -O "$wrapperJarPath" 234 | else 235 | wget --http-user=$MVNW_USERNAME --http-password=$MVNW_PASSWORD "$jarUrl" -O "$wrapperJarPath" 236 | fi 237 | elif command -v curl > /dev/null; then 238 | if [ "$MVNW_VERBOSE" = true ]; then 239 | echo "Found curl ... using curl" 240 | fi 241 | if [ -z "$MVNW_USERNAME" ] || [ -z "$MVNW_PASSWORD" ]; then 242 | curl -o "$wrapperJarPath" "$jarUrl" -f 243 | else 244 | curl --user $MVNW_USERNAME:$MVNW_PASSWORD -o "$wrapperJarPath" "$jarUrl" -f 245 | fi 246 | 247 | else 248 | if [ "$MVNW_VERBOSE" = true ]; then 249 | echo "Falling back to using Java to download" 250 | fi 251 | javaClass="$BASE_DIR/.mvn/wrapper/MavenWrapperDownloader.java" 252 | # For Cygwin, switch paths to Windows format before running javac 253 | if $cygwin; then 254 | javaClass=`cygpath --path --windows "$javaClass"` 255 | fi 256 | if [ -e "$javaClass" ]; then 257 | if [ ! -e "$BASE_DIR/.mvn/wrapper/MavenWrapperDownloader.class" ]; then 258 | if [ "$MVNW_VERBOSE" = true ]; then 259 | echo " - Compiling MavenWrapperDownloader.java ..." 260 | fi 261 | # Compiling the Java class 262 | ("$JAVA_HOME/bin/javac" "$javaClass") 263 | fi 264 | if [ -e "$BASE_DIR/.mvn/wrapper/MavenWrapperDownloader.class" ]; then 265 | # Running the downloader 266 | if [ "$MVNW_VERBOSE" = true ]; then 267 | echo " - Running MavenWrapperDownloader.java ..." 268 | fi 269 | ("$JAVA_HOME/bin/java" -cp .mvn/wrapper MavenWrapperDownloader "$MAVEN_PROJECTBASEDIR") 270 | fi 271 | fi 272 | fi 273 | fi 274 | ########################################################################################## 275 | # End of extension 276 | ########################################################################################## 277 | 278 | export MAVEN_PROJECTBASEDIR=${MAVEN_BASEDIR:-"$BASE_DIR"} 279 | if [ "$MVNW_VERBOSE" = true ]; then 280 | echo $MAVEN_PROJECTBASEDIR 281 | fi 282 | MAVEN_OPTS="$(concat_lines "$MAVEN_PROJECTBASEDIR/.mvn/jvm.config") $MAVEN_OPTS" 283 | 284 | # For Cygwin, switch paths to Windows format before running java 285 | if $cygwin; then 286 | [ -n "$M2_HOME" ] && 287 | M2_HOME=`cygpath --path --windows "$M2_HOME"` 288 | [ -n "$JAVA_HOME" ] && 289 | JAVA_HOME=`cygpath --path --windows "$JAVA_HOME"` 290 | [ -n "$CLASSPATH" ] && 291 | CLASSPATH=`cygpath --path --windows "$CLASSPATH"` 292 | [ -n "$MAVEN_PROJECTBASEDIR" ] && 293 | MAVEN_PROJECTBASEDIR=`cygpath --path --windows "$MAVEN_PROJECTBASEDIR"` 294 | fi 295 | 296 | # Provide a "standardized" way to retrieve the CLI args that will 297 | # work with both Windows and non-Windows executions. 298 | MAVEN_CMD_LINE_ARGS="$MAVEN_CONFIG $@" 299 | export MAVEN_CMD_LINE_ARGS 300 | 301 | WRAPPER_LAUNCHER=org.apache.maven.wrapper.MavenWrapperMain 302 | 303 | exec "$JAVACMD" \ 304 | $MAVEN_OPTS \ 305 | -classpath "$MAVEN_PROJECTBASEDIR/.mvn/wrapper/maven-wrapper.jar" \ 306 | "-Dmaven.home=${M2_HOME}" "-Dmaven.multiModuleProjectDirectory=${MAVEN_PROJECTBASEDIR}" \ 307 | ${WRAPPER_LAUNCHER} $MAVEN_CONFIG "$@" 308 | -------------------------------------------------------------------------------- /mvnw.cmd: -------------------------------------------------------------------------------- 1 | @REM 2 | @REM Copyright (C) 2019 The flight-spark-source Authors 3 | @REM 4 | @REM Licensed under the Apache License, Version 2.0 (the "License"); 5 | @REM you may not use this file except in compliance with the License. 6 | @REM You may obtain a copy of the License at 7 | @REM 8 | @REM http://www.apache.org/licenses/LICENSE-2.0 9 | @REM 10 | @REM Unless required by applicable law or agreed to in writing, software 11 | @REM distributed under the License is distributed on an "AS IS" BASIS, 12 | @REM WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | @REM See the License for the specific language governing permissions and 14 | @REM limitations under the License. 15 | @REM 16 | 17 | @REM ---------------------------------------------------------------------------- 18 | @REM Maven Start Up Batch script 19 | @REM 20 | @REM Required ENV vars: 21 | @REM JAVA_HOME - location of a JDK home dir 22 | @REM 23 | @REM Optional ENV vars 24 | @REM M2_HOME - location of maven2's installed home dir 25 | @REM MAVEN_BATCH_ECHO - set to 'on' to enable the echoing of the batch commands 26 | @REM MAVEN_BATCH_PAUSE - set to 'on' to wait for a keystroke before ending 27 | @REM MAVEN_OPTS - parameters passed to the Java VM when running Maven 28 | @REM e.g. to debug Maven itself, use 29 | @REM set MAVEN_OPTS=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000 30 | @REM MAVEN_SKIP_RC - flag to disable loading of mavenrc files 31 | @REM ---------------------------------------------------------------------------- 32 | 33 | @REM Begin all REM lines with '@' in case MAVEN_BATCH_ECHO is 'on' 34 | @echo off 35 | @REM set title of command window 36 | title %0 37 | @REM enable echoing by setting MAVEN_BATCH_ECHO to 'on' 38 | @if "%MAVEN_BATCH_ECHO%" == "on" echo %MAVEN_BATCH_ECHO% 39 | 40 | @REM set %HOME% to equivalent of $HOME 41 | if "%HOME%" == "" (set "HOME=%HOMEDRIVE%%HOMEPATH%") 42 | 43 | @REM Execute a user defined script before this one 44 | if not "%MAVEN_SKIP_RC%" == "" goto skipRcPre 45 | @REM check for pre script, once with legacy .bat ending and once with .cmd ending 46 | if exist "%HOME%\mavenrc_pre.bat" call "%HOME%\mavenrc_pre.bat" 47 | if exist "%HOME%\mavenrc_pre.cmd" call "%HOME%\mavenrc_pre.cmd" 48 | :skipRcPre 49 | 50 | @setlocal 51 | 52 | set ERROR_CODE=0 53 | 54 | @REM To isolate internal variables from possible post scripts, we use another setlocal 55 | @setlocal 56 | 57 | @REM ==== START VALIDATION ==== 58 | if not "%JAVA_HOME%" == "" goto OkJHome 59 | 60 | echo. 61 | echo Error: JAVA_HOME not found in your environment. >&2 62 | echo Please set the JAVA_HOME variable in your environment to match the >&2 63 | echo location of your Java installation. >&2 64 | echo. 65 | goto error 66 | 67 | :OkJHome 68 | if exist "%JAVA_HOME%\bin\java.exe" goto init 69 | 70 | echo. 71 | echo Error: JAVA_HOME is set to an invalid directory. >&2 72 | echo JAVA_HOME = "%JAVA_HOME%" >&2 73 | echo Please set the JAVA_HOME variable in your environment to match the >&2 74 | echo location of your Java installation. >&2 75 | echo. 76 | goto error 77 | 78 | @REM ==== END VALIDATION ==== 79 | 80 | :init 81 | 82 | @REM Find the project base dir, i.e. the directory that contains the folder ".mvn". 83 | @REM Fallback to current working directory if not found. 84 | 85 | set MAVEN_PROJECTBASEDIR=%MAVEN_BASEDIR% 86 | IF NOT "%MAVEN_PROJECTBASEDIR%"=="" goto endDetectBaseDir 87 | 88 | set EXEC_DIR=%CD% 89 | set WDIR=%EXEC_DIR% 90 | :findBaseDir 91 | IF EXIST "%WDIR%"\.mvn goto baseDirFound 92 | cd .. 93 | IF "%WDIR%"=="%CD%" goto baseDirNotFound 94 | set WDIR=%CD% 95 | goto findBaseDir 96 | 97 | :baseDirFound 98 | set MAVEN_PROJECTBASEDIR=%WDIR% 99 | cd "%EXEC_DIR%" 100 | goto endDetectBaseDir 101 | 102 | :baseDirNotFound 103 | set MAVEN_PROJECTBASEDIR=%EXEC_DIR% 104 | cd "%EXEC_DIR%" 105 | 106 | :endDetectBaseDir 107 | 108 | IF NOT EXIST "%MAVEN_PROJECTBASEDIR%\.mvn\jvm.config" goto endReadAdditionalConfig 109 | 110 | @setlocal EnableExtensions EnableDelayedExpansion 111 | for /F "usebackq delims=" %%a in ("%MAVEN_PROJECTBASEDIR%\.mvn\jvm.config") do set JVM_CONFIG_MAVEN_PROPS=!JVM_CONFIG_MAVEN_PROPS! %%a 112 | @endlocal & set JVM_CONFIG_MAVEN_PROPS=%JVM_CONFIG_MAVEN_PROPS% 113 | 114 | :endReadAdditionalConfig 115 | 116 | SET MAVEN_JAVA_EXE="%JAVA_HOME%\bin\java.exe" 117 | set WRAPPER_JAR="%MAVEN_PROJECTBASEDIR%\.mvn\wrapper\maven-wrapper.jar" 118 | set WRAPPER_LAUNCHER=org.apache.maven.wrapper.MavenWrapperMain 119 | 120 | set DOWNLOAD_URL="https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar" 121 | 122 | FOR /F "tokens=1,2 delims==" %%A IN ("%MAVEN_PROJECTBASEDIR%\.mvn\wrapper\maven-wrapper.properties") DO ( 123 | IF "%%A"=="wrapperUrl" SET DOWNLOAD_URL=%%B 124 | ) 125 | 126 | @REM Extension to allow automatically downloading the maven-wrapper.jar from Maven-central 127 | @REM This allows using the maven wrapper in projects that prohibit checking in binary data. 128 | if exist %WRAPPER_JAR% ( 129 | if "%MVNW_VERBOSE%" == "true" ( 130 | echo Found %WRAPPER_JAR% 131 | ) 132 | ) else ( 133 | if not "%MVNW_REPOURL%" == "" ( 134 | SET DOWNLOAD_URL="%MVNW_REPOURL%/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar" 135 | ) 136 | if "%MVNW_VERBOSE%" == "true" ( 137 | echo Couldn't find %WRAPPER_JAR%, downloading it ... 138 | echo Downloading from: %DOWNLOAD_URL% 139 | ) 140 | 141 | powershell -Command "&{"^ 142 | "$webclient = new-object System.Net.WebClient;"^ 143 | "if (-not ([string]::IsNullOrEmpty('%MVNW_USERNAME%') -and [string]::IsNullOrEmpty('%MVNW_PASSWORD%'))) {"^ 144 | "$webclient.Credentials = new-object System.Net.NetworkCredential('%MVNW_USERNAME%', '%MVNW_PASSWORD%');"^ 145 | "}"^ 146 | "[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12; $webclient.DownloadFile('%DOWNLOAD_URL%', '%WRAPPER_JAR%')"^ 147 | "}" 148 | if "%MVNW_VERBOSE%" == "true" ( 149 | echo Finished downloading %WRAPPER_JAR% 150 | ) 151 | ) 152 | @REM End of extension 153 | 154 | @REM Provide a "standardized" way to retrieve the CLI args that will 155 | @REM work with both Windows and non-Windows executions. 156 | set MAVEN_CMD_LINE_ARGS=%* 157 | 158 | %MAVEN_JAVA_EXE% %JVM_CONFIG_MAVEN_PROPS% %MAVEN_OPTS% %MAVEN_DEBUG_OPTS% -classpath %WRAPPER_JAR% "-Dmaven.multiModuleProjectDirectory=%MAVEN_PROJECTBASEDIR%" %WRAPPER_LAUNCHER% %MAVEN_CONFIG% %* 159 | if ERRORLEVEL 1 goto error 160 | goto end 161 | 162 | :error 163 | set ERROR_CODE=1 164 | 165 | :end 166 | @endlocal & set ERROR_CODE=%ERROR_CODE% 167 | 168 | if not "%MAVEN_SKIP_RC%" == "" goto skipRcPost 169 | @REM check for post script, once with legacy .bat ending and once with .cmd ending 170 | if exist "%HOME%\mavenrc_post.bat" call "%HOME%\mavenrc_post.bat" 171 | if exist "%HOME%\mavenrc_post.cmd" call "%HOME%\mavenrc_post.cmd" 172 | :skipRcPost 173 | 174 | @REM pause the script if MAVEN_BATCH_PAUSE is set to 'on' 175 | if "%MAVEN_BATCH_PAUSE%" == "on" pause 176 | 177 | if "%MAVEN_TERMINATE_CMD%" == "on" exit %ERROR_CODE% 178 | 179 | exit /B %ERROR_CODE% 180 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 19 | 22 | 4.0.0 23 | 24 | org.apache.arrow.flight.spark 25 | flight-spark-source 26 | 1.0-SNAPSHOT 27 | 28 | 29 | 11 30 | 2.12.14 31 | 7.0.0 32 | 3.2.1 33 | 1.7.25 34 | 2.12.6 35 | 2.12.7.1 36 | UTF-8 37 | UTF-8 38 | 39 | 40 | 41 | 42 | kr.motd.maven 43 | os-maven-plugin 44 | 1.7.0 45 | 46 | 47 | 48 | 49 | 50 | src/main/resources 51 | true 52 | 53 | 54 | 55 | 56 | 57 | org.apache.maven.plugins 58 | maven-checkstyle-plugin 59 | 3.1.0 60 | 61 | ${project.basedir}/src/main/checkstyle/checkstyle-config.xml 62 | ${project.basedir}/src/main/checkstyle/checkstyle-suppressions.xml 63 | 64 | 65 | 66 | org.apache.maven.plugins 67 | maven-surefire-plugin 68 | 3.0.0 69 | 70 | 71 | true 72 | 73 | 74 | --add-opens=java.base/sun.nio.ch=ALL-UNNAMED 75 | --add-opens=java.base/java.nio=ALL-UNNAMED 76 | 77 | 78 | 79 | 80 | com.mycila 81 | license-maven-plugin 82 | 3.0 83 | 84 | 85 | Copyright (C) ${project.inceptionYear} 86 | 87 | Licensed under the Apache License, Version 2.0 (the "License"); 88 | you may not use this file except in compliance with the License. 89 | You may obtain a copy of the License at 90 | 91 | http://www.apache.org/licenses/LICENSE-2.0 92 | 93 | Unless required by applicable law or agreed to in writing, software 94 | distributed under the License is distributed on an "AS IS" BASIS, 95 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 96 | See the License for the specific language governing permissions and 97 | limitations under the License. 98 | 99 | 100 | The ${project.artifactId} Authors 101 | 2019 102 | 103 | 104 | 2019 105 | 106 | true 107 | false 108 | 109 | 110 | src/** 111 | * 112 | **/.mvn/** 113 | 114 | 115 | 116 | **/*~ 117 | **/#*# 118 | **/.#* 119 | **/%*% 120 | **/._* 121 | **/.repository/** 122 | **/CVS 123 | **/CVS/** 124 | **/.cvsignore 125 | **/RCS 126 | **/RCS/** 127 | **/SCCS 128 | **/SCCS/** 129 | **/vssver.scc 130 | **/.svn 131 | **/.svn/** 132 | **/.arch-ids 133 | **/.arch-ids/** 134 | **/.bzr 135 | **/.bzr/** 136 | **/.MySCMServerInfo 137 | **/.DS_Store 138 | **/.metadata 139 | **/.metadata/** 140 | **/.hg 141 | **/.hg/** 142 | **/.hgignore 143 | **/.git 144 | **/.git/** 145 | **/.gitignore 146 | **/.gitmodules 147 | **/BitKeeper 148 | **/BitKeeper/** 149 | **/ChangeSet 150 | **/ChangeSet/** 151 | **/_darcs 152 | **/_darcs/** 153 | **/.darcsrepo 154 | **/.darcsrepo/** 155 | **/-darcs-backup* 156 | **/.darcs-temp-mail 157 | 158 | **/test-output/** 159 | **/release.properties 160 | **/dependency-reduced-pom.xml 161 | **/release-pom.xml 162 | **/pom.xml.releaseBackup 163 | **/cobertura.ser 164 | **/.clover/** 165 | **/.classpath 166 | **/.project 167 | **/.settings/** 168 | **/*.iml 169 | **/*.ipr 170 | **/*.iws 171 | .idea/** 172 | **/nb-configuration.xml 173 | **/MANIFEST.MF 174 | **/*.jpg 175 | **/*.png 176 | **/*.gif 177 | **/*.ico 178 | **/*.bmp 179 | **/*.tiff 180 | **/*.tif 181 | **/*.cr2 182 | **/*.xcf 183 | **/*.class 184 | **/*.exe 185 | **/*.dll 186 | **/*.so 187 | **/*.md5 188 | **/*.sha1 189 | **/*.jar 190 | **/*.zip 191 | **/*.rar 192 | **/*.tar 193 | **/*.tar.gz 194 | **/*.tar.bz2 195 | **/*.gz 196 | **/*.xls 197 | **/META-INF/services/** 198 | **/*.md 199 | **/*.xls 200 | **/*.doc 201 | **/*.odt 202 | **/*.ods 203 | **/*.pdf 204 | **/.travis.yml 205 | **/*.swf 206 | **/*.json 207 | 208 | **/*.eot 209 | **/*.ttf 210 | **/*.woff 211 | **/*.xlsx 212 | **/*.docx 213 | **/*.ppt 214 | **/*.pptx 215 | **/*.patch 216 | 217 | 218 | 219 | **/*.log 220 | **/*.txt 221 | **/*.csv 222 | **/*.tsv 223 | **/*.parquet 224 | **/*.jks 225 | **/*.nonformat 226 | **/*.gzip 227 | **/*.k 228 | **/*.q 229 | **/*.dat 230 | 231 | 232 | **/Jenkinsfile 233 | **/LICENSE 234 | **/NOTICE 235 | **/AUTHORS 236 | **/postinstall 237 | **/.babelrc 238 | **/.checkstyle 239 | **/.eslintcache 240 | **/.eslintignore 241 | **/.eslintrc 242 | **/git.properties 243 | **/pom.xml.versionsBackup 244 | **/q 245 | **/c.java 246 | 247 | 248 | **/node_modules/** 249 | **/.idea/** 250 | **/db/** 251 | **/*.ipynb 252 | 253 | 254 | SLASHSTAR_STYLE 255 | DOUBLEDASHES_STYLE 256 | DOUBLESLASH_STYLE 257 | DOUBLESLASH_STYLE 258 | DOUBLESLASH_STYLE 259 | SLASHSTAR_STYLE 260 | SLASHSTAR_STYLE 261 | SLASHSTAR_STYLE 262 | SLASHSTAR_STYLE 263 | SCRIPT_STYLE 264 | SCRIPT_STYLE 265 | SCRIPT_STYLE 266 | DOUBLEDASHES_STYLE 267 | SCRIPT_STYLE 268 | SLASHSTAR_STYLE 269 | SCRIPT_STYLE 270 | SCRIPT_STYLE 271 | SCRIPT_STYLE 272 | XML_STYLE 273 | SCRIPT_STYLE 274 | 275 | 276 | 277 | 278 | default-cli 279 | 280 | format 281 | 282 | 283 | 284 | verify-license-headers 285 | verify 286 | 287 | check 288 | 289 | 290 | 291 | 292 | 293 | maven-enforcer-plugin 294 | 1.4.1 295 | 296 | 297 | avoid_bad_dependencies 298 | verify 299 | 300 | enforce 301 | 302 | 303 | 304 | 305 | 306 | commons-logging 307 | javax.servlet:servlet-api 308 | org.mortbay.jetty:servlet-api 309 | org.mortbay.jetty:servlet-api-2.5 310 | log4j:log4j 311 | 312 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | org.apache.maven.plugins 320 | maven-compiler-plugin 321 | 3.8.1 322 | 323 | ${java.major-version} 324 | ${java.major-version} 325 | 326 | 327 | 328 | net.alchim31.maven 329 | scala-maven-plugin 330 | 4.6.1 331 | 332 | false 333 | 334 | 335 | 336 | scala-compile-first 337 | process-resources 338 | 339 | add-source 340 | compile 341 | 342 | 343 | 344 | scala-test-compile 345 | process-test-resources 346 | 347 | testCompile 348 | 349 | 350 | 351 | 352 | 353 | org.apache.maven.plugins 354 | maven-shade-plugin 355 | 3.3.0 356 | 357 | 358 | package 359 | 360 | shade 361 | 362 | 363 | 364 | 365 | org.apache.arrow:flight-core 366 | org.apache.arrow:flight-grpc 367 | org.apache.arrow:arrow-vector 368 | org.apache.arrow:arrow-format 369 | org.apache.arrow:arrow-memory-core 370 | org.apache.arrow:arrow-memory-netty 371 | com.google.flatbuffers:flatbuffers-java 372 | io.grpc:* 373 | io.netty:* 374 | io.opencensus:* 375 | com.google.code.gson:gson 376 | com.google.code.findbugs:jsr305 377 | com.google.code.errorprone:error_prone_annotations 378 | com.google.api.grpc:proto-google-common-protos 379 | com.google.protobuf:protobuf-java 380 | com.google.guava:guava 381 | com.google.guava:failureaccess 382 | io.perfmark:perfmark-api 383 | 384 | 385 | io.netty:netty-transport-native-unix-common 386 | io.netty:netty-transport-native-epoll 387 | 388 | 389 | 390 | 391 | 392 | *:* 393 | 394 | META-INF/*.SF 395 | META-INF/*.DSA 396 | META-INF/*.RSA 397 | 398 | 399 | 400 | 401 | 402 | com.google.protobuf 403 | cdap.com.google.protobuf 404 | 405 | 406 | org.apache.arrow 407 | cdap.org.apache.arrow 408 | 409 | 410 | io.grpc 411 | cdap.io.grpc 412 | 413 | 414 | io.netty 415 | cdap.io.netty 416 | 417 | 418 | com.google 419 | cdap.com.google 420 | 421 | 422 | 423 | META-INF.native.libnetty_ 424 | META-INF.native.libcdap_netty_ 425 | 426 | 427 | META-INF.native.netty_ 428 | META-INF.native.cdap_netty_ 429 | 430 | 431 | 432 | 433 | 434 | true 435 | shaded 436 | 437 | 438 | 439 | 440 | 441 | 442 | 443 | 444 | org.apache.spark 445 | spark-core_2.12 446 | ${spark.version} 447 | 448 | 449 | org.slf4j 450 | slf4j-log4j12 451 | 452 | 453 | commons-logging 454 | commons-logging 455 | 456 | 457 | log4j 458 | log4j 459 | 460 | 461 | org.apache.arrow 462 | arrow-format 463 | 464 | 465 | org.apache.arrow 466 | arrow-vector 467 | 468 | 469 | 470 | 471 | org.apache.spark 472 | spark-sql_2.12 473 | ${spark.version} 474 | 475 | 476 | org.slf4j 477 | slf4j-log4j12 478 | 479 | 480 | commons-logging 481 | commons-logging 482 | 483 | 484 | log4j 485 | log4j 486 | 487 | 488 | javax.servlet 489 | servlet-api 490 | 491 | 492 | org.apache.arrow 493 | arrow-format 494 | 495 | 496 | org.apache.arrow 497 | arrow-vector 498 | 499 | 500 | 501 | 502 | 503 | com.fasterxml.jackson.core 504 | jackson-core 505 | ${jackson-core.version} 506 | 507 | 508 | com.fasterxml.jackson.core 509 | jackson-databind 510 | ${jackson-databind.version} 511 | 512 | 513 | org.apache.arrow 514 | flight-core 515 | ${arrow.version} 516 | 517 | 518 | org.apache.arrow 519 | flight-grpc 520 | ${arrow.version} 521 | 522 | 523 | org.scala-lang 524 | scala-library 525 | ${scala.version} 526 | 527 | 528 | 529 | org.slf4j 530 | jul-to-slf4j 531 | ${dep.slf4j.version} 532 | test 533 | 534 | 535 | org.apache.arrow 536 | flight-core 537 | ${arrow.version} 538 | tests 539 | test 540 | 541 | 542 | 543 | org.slf4j 544 | jcl-over-slf4j 545 | ${dep.slf4j.version} 546 | test 547 | 548 | 549 | 550 | org.slf4j 551 | log4j-over-slf4j 552 | ${dep.slf4j.version} 553 | test 554 | 555 | 556 | ch.qos.logback 557 | logback-classic 558 | 1.2.9 559 | test 560 | 561 | 562 | de.huxhorn.lilith 563 | de.huxhorn.lilith.logback.appender.multiplex-classic 564 | 8.2.0 565 | test 566 | 567 | 568 | junit 569 | junit 570 | 4.13.1 571 | test 572 | 573 | 574 | 575 | 576 | -------------------------------------------------------------------------------- /src/main/java/org/apache/arrow/flight/spark/DefaultSource.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2019 The flight-spark-source Authors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.arrow.flight.spark; 18 | 19 | import java.net.URISyntaxException; 20 | import java.util.ArrayList; 21 | import java.util.List; 22 | import java.util.Map; 23 | 24 | import org.apache.arrow.flight.Location; 25 | import org.apache.spark.sql.connector.catalog.TableProvider; 26 | import org.apache.spark.sql.connector.expressions.Transform; 27 | import org.apache.spark.sql.SparkSession; 28 | import org.apache.spark.sql.connector.catalog.Table; 29 | import org.apache.spark.sql.sources.DataSourceRegister; 30 | import org.apache.spark.sql.types.StructType; 31 | import org.apache.spark.sql.util.CaseInsensitiveStringMap; 32 | import org.apache.spark.api.java.JavaSparkContext; 33 | import org.apache.spark.broadcast.Broadcast; 34 | 35 | public class DefaultSource implements TableProvider, DataSourceRegister { 36 | private SparkSession spark; 37 | 38 | private SparkSession getSparkSession() { 39 | if (spark == null) { 40 | spark = SparkSession.getActiveSession().get(); 41 | } 42 | return spark; 43 | } 44 | 45 | private FlightTable makeTable(CaseInsensitiveStringMap options) { 46 | String uri = options.getOrDefault("uri", "grpc://localhost:47470"); 47 | Location location; 48 | try { 49 | location = new Location(uri); 50 | } catch (URISyntaxException e) { 51 | throw new RuntimeException(e); 52 | } 53 | 54 | String sql = options.getOrDefault("path", ""); 55 | String username = options.getOrDefault("username", ""); 56 | String password = options.getOrDefault("password", ""); 57 | String trustedCertificates = options.getOrDefault("trustedCertificates", ""); 58 | String clientCertificate = options.getOrDefault("clientCertificate", ""); 59 | String clientKey = options.getOrDefault("clientKey", ""); 60 | String token = options.getOrDefault("token", ""); 61 | List middleware = new ArrayList<>(); 62 | if (!token.isEmpty()) { 63 | middleware.add(new TokenClientMiddlewareFactory(token)); 64 | } 65 | 66 | 67 | Broadcast clientOptions = JavaSparkContext.fromSparkContext(getSparkSession().sparkContext()).broadcast( 68 | new FlightClientOptions(username, password, trustedCertificates, clientCertificate, clientKey, middleware) 69 | ); 70 | 71 | return new FlightTable( 72 | String.format("{} Location {} Command {}", shortName(), location.getUri().toString(), sql), 73 | location, 74 | sql, 75 | clientOptions 76 | ); 77 | } 78 | 79 | @Override 80 | public StructType inferSchema(CaseInsensitiveStringMap options) { 81 | return makeTable(options).schema(); 82 | } 83 | 84 | @Override 85 | public String shortName() { 86 | return "flight"; 87 | } 88 | 89 | @Override 90 | public Table getTable(StructType schema, Transform[] partitioning, Map options) { 91 | return makeTable(new CaseInsensitiveStringMap(options)); 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /src/main/java/org/apache/arrow/flight/spark/FlightArrowColumnVector.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2019 The flight-spark-source Authors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | /* 17 | * Licensed to the Apache Software Foundation (ASF) under one or more 18 | * contributor license agreements. See the NOTICE file distributed with 19 | * this work for additional information regarding copyright ownership. 20 | * The ASF licenses this file to You under the Apache License, Version 2.0 21 | * (the "License"); you may not use this file except in compliance with 22 | * the License. You may obtain a copy of the License at 23 | * 24 | * http://www.apache.org/licenses/LICENSE-2.0 25 | * 26 | * Unless required by applicable law or agreed to in writing, software 27 | * distributed under the License is distributed on an "AS IS" BASIS, 28 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 29 | * See the License for the specific language governing permissions and 30 | * limitations under the License. 31 | */ 32 | 33 | package org.apache.arrow.flight.spark; 34 | 35 | import org.apache.arrow.vector.BigIntVector; 36 | import org.apache.arrow.vector.BitVector; 37 | import org.apache.arrow.vector.DateDayVector; 38 | import org.apache.arrow.vector.DateMilliVector; 39 | import org.apache.arrow.vector.DecimalVector; 40 | import org.apache.arrow.vector.Float4Vector; 41 | import org.apache.arrow.vector.Float8Vector; 42 | import org.apache.arrow.vector.IntVector; 43 | import org.apache.arrow.vector.SmallIntVector; 44 | import org.apache.arrow.vector.TimeStampMicroVector; 45 | import org.apache.arrow.vector.TimeStampMicroTZVector; 46 | import org.apache.arrow.vector.TimeStampMilliVector; 47 | import org.apache.arrow.vector.TimeStampVector; 48 | import org.apache.arrow.vector.TinyIntVector; 49 | import org.apache.arrow.vector.ValueVector; 50 | import org.apache.arrow.vector.VarBinaryVector; 51 | import org.apache.arrow.vector.VarCharVector; 52 | import org.apache.arrow.vector.complex.ListVector; 53 | import org.apache.arrow.vector.complex.StructVector; 54 | import org.apache.arrow.vector.holders.NullableVarCharHolder; 55 | import org.apache.arrow.memory.ArrowBuf; 56 | import org.apache.spark.sql.execution.arrow.FlightArrowUtils; 57 | import org.apache.spark.sql.types.Decimal; 58 | import org.apache.spark.sql.vectorized.ColumnVector; 59 | import org.apache.spark.sql.vectorized.ColumnarArray; 60 | import org.apache.spark.sql.vectorized.ColumnarMap; 61 | import org.apache.spark.unsafe.types.UTF8String; 62 | 63 | 64 | /** 65 | * A column vector backed by Apache Arrow. Currently calendar interval type and map type are not 66 | * supported. This is a copy of ArrowColumnVector with added support for DateMilli and TimestampMilli 67 | */ 68 | public final class FlightArrowColumnVector extends ColumnVector { 69 | 70 | private final ArrowVectorAccessor accessor; 71 | private FlightArrowColumnVector[] childColumns; 72 | 73 | @Override 74 | public boolean hasNull() { 75 | return accessor.getNullCount() > 0; 76 | } 77 | 78 | @Override 79 | public int numNulls() { 80 | return accessor.getNullCount(); 81 | } 82 | 83 | @Override 84 | public void close() { 85 | if (childColumns != null) { 86 | for (int i = 0; i < childColumns.length; i++) { 87 | childColumns[i].close(); 88 | childColumns[i] = null; 89 | } 90 | childColumns = null; 91 | } 92 | accessor.close(); 93 | } 94 | 95 | @Override 96 | public boolean isNullAt(int rowId) { 97 | return accessor.isNullAt(rowId); 98 | } 99 | 100 | @Override 101 | public boolean getBoolean(int rowId) { 102 | return accessor.getBoolean(rowId); 103 | } 104 | 105 | @Override 106 | public byte getByte(int rowId) { 107 | return accessor.getByte(rowId); 108 | } 109 | 110 | @Override 111 | public short getShort(int rowId) { 112 | return accessor.getShort(rowId); 113 | } 114 | 115 | @Override 116 | public int getInt(int rowId) { 117 | return accessor.getInt(rowId); 118 | } 119 | 120 | @Override 121 | public long getLong(int rowId) { 122 | return accessor.getLong(rowId); 123 | } 124 | 125 | @Override 126 | public float getFloat(int rowId) { 127 | return accessor.getFloat(rowId); 128 | } 129 | 130 | @Override 131 | public double getDouble(int rowId) { 132 | return accessor.getDouble(rowId); 133 | } 134 | 135 | @Override 136 | public Decimal getDecimal(int rowId, int precision, int scale) { 137 | if (isNullAt(rowId)) { 138 | return null; 139 | } 140 | return accessor.getDecimal(rowId, precision, scale); 141 | } 142 | 143 | @Override 144 | public UTF8String getUTF8String(int rowId) { 145 | if (isNullAt(rowId)) { 146 | return null; 147 | } 148 | return accessor.getUTF8String(rowId); 149 | } 150 | 151 | @Override 152 | public byte[] getBinary(int rowId) { 153 | if (isNullAt(rowId)) { 154 | return null; 155 | } 156 | return accessor.getBinary(rowId); 157 | } 158 | 159 | @Override 160 | public ColumnarArray getArray(int rowId) { 161 | if (isNullAt(rowId)) { 162 | return null; 163 | } 164 | return accessor.getArray(rowId); 165 | } 166 | 167 | @Override 168 | public ColumnarMap getMap(int rowId) { 169 | throw new UnsupportedOperationException(); 170 | } 171 | 172 | @Override 173 | public FlightArrowColumnVector getChild(int ordinal) { 174 | return childColumns[ordinal]; 175 | } 176 | 177 | public FlightArrowColumnVector(ValueVector vector) { 178 | super(FlightArrowUtils.fromArrowField(vector.getField())); 179 | 180 | if (vector instanceof BitVector) { 181 | accessor = new BooleanAccessor((BitVector) vector); 182 | } else if (vector instanceof TinyIntVector) { 183 | accessor = new ByteAccessor((TinyIntVector) vector); 184 | } else if (vector instanceof SmallIntVector) { 185 | accessor = new ShortAccessor((SmallIntVector) vector); 186 | } else if (vector instanceof IntVector) { 187 | accessor = new IntAccessor((IntVector) vector); 188 | } else if (vector instanceof BigIntVector) { 189 | accessor = new LongAccessor((BigIntVector) vector); 190 | } else if (vector instanceof Float4Vector) { 191 | accessor = new FloatAccessor((Float4Vector) vector); 192 | } else if (vector instanceof Float8Vector) { 193 | accessor = new DoubleAccessor((Float8Vector) vector); 194 | } else if (vector instanceof DecimalVector) { 195 | accessor = new DecimalAccessor((DecimalVector) vector); 196 | } else if (vector instanceof VarCharVector) { 197 | accessor = new StringAccessor((VarCharVector) vector); 198 | } else if (vector instanceof VarBinaryVector) { 199 | accessor = new BinaryAccessor((VarBinaryVector) vector); 200 | } else if (vector instanceof DateDayVector) { 201 | accessor = new DateAccessor((DateDayVector) vector); 202 | } else if (vector instanceof DateMilliVector) { 203 | accessor = new DateMilliAccessor((DateMilliVector) vector); 204 | } else if (vector instanceof TimeStampMicroVector) { 205 | accessor = new TimestampMicroAccessor((TimeStampMicroVector) vector); 206 | } else if (vector instanceof TimeStampMicroTZVector) { 207 | accessor = new TimestampMicroTZAccessor((TimeStampMicroTZVector) vector); 208 | } else if (vector instanceof TimeStampMilliVector) { 209 | accessor = new TimestampMilliAccessor((TimeStampMilliVector) vector); 210 | } else if (vector instanceof ListVector) { 211 | ListVector listVector = (ListVector) vector; 212 | accessor = new ArrayAccessor(listVector); 213 | } else if (vector instanceof StructVector) { 214 | StructVector structVector = (StructVector) vector; 215 | accessor = new StructAccessor(structVector); 216 | 217 | childColumns = new FlightArrowColumnVector[structVector.size()]; 218 | for (int i = 0; i < childColumns.length; ++i) { 219 | childColumns[i] = new FlightArrowColumnVector(structVector.getVectorById(i)); 220 | } 221 | } else { 222 | System.out.println(vector); 223 | throw new UnsupportedOperationException(); 224 | } 225 | } 226 | 227 | private abstract static class ArrowVectorAccessor { 228 | 229 | private final ValueVector vector; 230 | 231 | ArrowVectorAccessor(ValueVector vector) { 232 | this.vector = vector; 233 | } 234 | 235 | // TODO: should be final after removing ArrayAccessor workaround 236 | boolean isNullAt(int rowId) { 237 | return vector.isNull(rowId); 238 | } 239 | 240 | final int getNullCount() { 241 | return vector.getNullCount(); 242 | } 243 | 244 | final void close() { 245 | vector.close(); 246 | } 247 | 248 | boolean getBoolean(int rowId) { 249 | throw new UnsupportedOperationException(); 250 | } 251 | 252 | byte getByte(int rowId) { 253 | throw new UnsupportedOperationException(); 254 | } 255 | 256 | short getShort(int rowId) { 257 | throw new UnsupportedOperationException(); 258 | } 259 | 260 | int getInt(int rowId) { 261 | throw new UnsupportedOperationException(); 262 | } 263 | 264 | long getLong(int rowId) { 265 | throw new UnsupportedOperationException(); 266 | } 267 | 268 | float getFloat(int rowId) { 269 | throw new UnsupportedOperationException(); 270 | } 271 | 272 | double getDouble(int rowId) { 273 | throw new UnsupportedOperationException(); 274 | } 275 | 276 | Decimal getDecimal(int rowId, int precision, int scale) { 277 | throw new UnsupportedOperationException(); 278 | } 279 | 280 | UTF8String getUTF8String(int rowId) { 281 | throw new UnsupportedOperationException(); 282 | } 283 | 284 | byte[] getBinary(int rowId) { 285 | throw new UnsupportedOperationException(); 286 | } 287 | 288 | ColumnarArray getArray(int rowId) { 289 | throw new UnsupportedOperationException(); 290 | } 291 | } 292 | 293 | private static class BooleanAccessor extends ArrowVectorAccessor { 294 | 295 | private final BitVector accessor; 296 | 297 | BooleanAccessor(BitVector vector) { 298 | super(vector); 299 | this.accessor = vector; 300 | } 301 | 302 | @Override 303 | final boolean getBoolean(int rowId) { 304 | return accessor.get(rowId) == 1; 305 | } 306 | } 307 | 308 | private static class ByteAccessor extends ArrowVectorAccessor { 309 | 310 | private final TinyIntVector accessor; 311 | 312 | ByteAccessor(TinyIntVector vector) { 313 | super(vector); 314 | this.accessor = vector; 315 | } 316 | 317 | @Override 318 | final byte getByte(int rowId) { 319 | return accessor.get(rowId); 320 | } 321 | } 322 | 323 | private static class ShortAccessor extends ArrowVectorAccessor { 324 | 325 | private final SmallIntVector accessor; 326 | 327 | ShortAccessor(SmallIntVector vector) { 328 | super(vector); 329 | this.accessor = vector; 330 | } 331 | 332 | @Override 333 | final short getShort(int rowId) { 334 | return accessor.get(rowId); 335 | } 336 | } 337 | 338 | private static class IntAccessor extends ArrowVectorAccessor { 339 | 340 | private final IntVector accessor; 341 | 342 | IntAccessor(IntVector vector) { 343 | super(vector); 344 | this.accessor = vector; 345 | } 346 | 347 | @Override 348 | final int getInt(int rowId) { 349 | return accessor.get(rowId); 350 | } 351 | } 352 | 353 | private static class LongAccessor extends ArrowVectorAccessor { 354 | 355 | private final BigIntVector accessor; 356 | 357 | LongAccessor(BigIntVector vector) { 358 | super(vector); 359 | this.accessor = vector; 360 | } 361 | 362 | @Override 363 | final long getLong(int rowId) { 364 | return accessor.get(rowId); 365 | } 366 | } 367 | 368 | private static class FloatAccessor extends ArrowVectorAccessor { 369 | 370 | private final Float4Vector accessor; 371 | 372 | FloatAccessor(Float4Vector vector) { 373 | super(vector); 374 | this.accessor = vector; 375 | } 376 | 377 | @Override 378 | final float getFloat(int rowId) { 379 | return accessor.get(rowId); 380 | } 381 | } 382 | 383 | private static class DoubleAccessor extends ArrowVectorAccessor { 384 | 385 | private final Float8Vector accessor; 386 | 387 | DoubleAccessor(Float8Vector vector) { 388 | super(vector); 389 | this.accessor = vector; 390 | } 391 | 392 | @Override 393 | final double getDouble(int rowId) { 394 | return accessor.get(rowId); 395 | } 396 | } 397 | 398 | private static class DecimalAccessor extends ArrowVectorAccessor { 399 | 400 | private final DecimalVector accessor; 401 | 402 | DecimalAccessor(DecimalVector vector) { 403 | super(vector); 404 | this.accessor = vector; 405 | } 406 | 407 | @Override 408 | final Decimal getDecimal(int rowId, int precision, int scale) { 409 | if (isNullAt(rowId)) { 410 | return null; 411 | } 412 | return Decimal.apply(accessor.getObject(rowId), precision, scale); 413 | } 414 | } 415 | 416 | private static class StringAccessor extends ArrowVectorAccessor { 417 | 418 | private final VarCharVector accessor; 419 | private final NullableVarCharHolder stringResult = new NullableVarCharHolder(); 420 | 421 | StringAccessor(VarCharVector vector) { 422 | super(vector); 423 | this.accessor = vector; 424 | } 425 | 426 | @Override 427 | final UTF8String getUTF8String(int rowId) { 428 | accessor.get(rowId, stringResult); 429 | if (stringResult.isSet == 0) { 430 | return null; 431 | } else { 432 | return UTF8String.fromAddress(null, 433 | stringResult.buffer.memoryAddress() + stringResult.start, 434 | stringResult.end - stringResult.start); 435 | } 436 | } 437 | } 438 | 439 | private static class BinaryAccessor extends ArrowVectorAccessor { 440 | 441 | private final VarBinaryVector accessor; 442 | 443 | BinaryAccessor(VarBinaryVector vector) { 444 | super(vector); 445 | this.accessor = vector; 446 | } 447 | 448 | @Override 449 | final byte[] getBinary(int rowId) { 450 | return accessor.getObject(rowId); 451 | } 452 | } 453 | 454 | private static class DateAccessor extends ArrowVectorAccessor { 455 | 456 | private final DateDayVector accessor; 457 | 458 | DateAccessor(DateDayVector vector) { 459 | super(vector); 460 | this.accessor = vector; 461 | } 462 | 463 | @Override 464 | final int getInt(int rowId) { 465 | return accessor.get(rowId); 466 | } 467 | } 468 | 469 | private static class DateMilliAccessor extends ArrowVectorAccessor { 470 | 471 | private final DateMilliVector accessor; 472 | private final double val = 1.0 / (24. * 60. * 60. * 1000.); 473 | 474 | DateMilliAccessor(DateMilliVector vector) { 475 | super(vector); 476 | this.accessor = vector; 477 | } 478 | 479 | @Override 480 | final int getInt(int rowId) { 481 | System.out.println(accessor.get(rowId) + " " + (accessor.get(rowId) * val) + " " + val); 482 | return (int) (accessor.get(rowId) * val); 483 | } 484 | } 485 | 486 | private static class TimestampMicroAccessor extends ArrowVectorAccessor { 487 | 488 | private final TimeStampVector accessor; 489 | 490 | TimestampMicroAccessor(TimeStampMicroVector vector) { 491 | super(vector); 492 | this.accessor = vector; 493 | } 494 | 495 | @Override 496 | final long getLong(int rowId) { 497 | return accessor.get(rowId); 498 | } 499 | } 500 | 501 | private static class TimestampMicroTZAccessor extends ArrowVectorAccessor { 502 | 503 | private final TimeStampVector accessor; 504 | 505 | TimestampMicroTZAccessor(TimeStampMicroTZVector vector) { 506 | super(vector); 507 | this.accessor = vector; 508 | } 509 | 510 | @Override 511 | final long getLong(int rowId) { 512 | return accessor.get(rowId); 513 | } 514 | } 515 | 516 | private static class TimestampMilliAccessor extends ArrowVectorAccessor { 517 | 518 | private final TimeStampVector accessor; 519 | 520 | TimestampMilliAccessor(TimeStampMilliVector vector) { 521 | super(vector); 522 | this.accessor = vector; 523 | } 524 | 525 | @Override 526 | final long getLong(int rowId) { 527 | return accessor.get(rowId) * 1000; 528 | } 529 | } 530 | 531 | private static class ArrayAccessor extends ArrowVectorAccessor { 532 | 533 | private final ListVector accessor; 534 | private final FlightArrowColumnVector arrayData; 535 | 536 | ArrayAccessor(ListVector vector) { 537 | super(vector); 538 | this.accessor = vector; 539 | this.arrayData = new FlightArrowColumnVector(vector.getDataVector()); 540 | } 541 | 542 | @Override 543 | final boolean isNullAt(int rowId) { 544 | // TODO: Workaround if vector has all non-null values, see ARROW-1948 545 | if (accessor.getValueCount() > 0 && accessor.getValidityBuffer().capacity() == 0) { 546 | return false; 547 | } else { 548 | return super.isNullAt(rowId); 549 | } 550 | } 551 | 552 | @Override 553 | final ColumnarArray getArray(int rowId) { 554 | ArrowBuf offsets = accessor.getOffsetBuffer(); 555 | int index = rowId * ListVector.OFFSET_WIDTH; 556 | int start = offsets.getInt(index); 557 | int end = offsets.getInt(index + ListVector.OFFSET_WIDTH); 558 | return new ColumnarArray(arrayData, start, end - start); 559 | } 560 | } 561 | 562 | /** 563 | * Any call to "get" method will throw UnsupportedOperationException. 564 | *

565 | * Access struct values in a ArrowColumnVector doesn't use this accessor. Instead, it uses 566 | * getStruct() method defined in the parent class. Any call to "get" method in this class is a 567 | * bug in the code. 568 | */ 569 | private static class StructAccessor extends ArrowVectorAccessor { 570 | 571 | StructAccessor(StructVector vector) { 572 | super(vector); 573 | } 574 | } 575 | } 576 | -------------------------------------------------------------------------------- /src/main/java/org/apache/arrow/flight/spark/FlightClientFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2019 The flight-spark-source Authors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package org.apache.arrow.flight.spark; 17 | 18 | import java.io.ByteArrayInputStream; 19 | import java.io.InputStream; 20 | 21 | import org.apache.arrow.flight.FlightClient; 22 | import org.apache.arrow.flight.Location; 23 | import org.apache.arrow.flight.grpc.CredentialCallOption; 24 | import org.apache.arrow.memory.BufferAllocator; 25 | import org.apache.arrow.memory.RootAllocator; 26 | 27 | public class FlightClientFactory implements AutoCloseable { 28 | private final BufferAllocator allocator = new RootAllocator(); 29 | private final Location defaultLocation; 30 | private final FlightClientOptions clientOptions; 31 | 32 | private CredentialCallOption callOption; 33 | 34 | public FlightClientFactory(Location defaultLocation, FlightClientOptions clientOptions) { 35 | this.defaultLocation = defaultLocation; 36 | this.clientOptions = clientOptions; 37 | } 38 | 39 | public FlightClient apply() { 40 | FlightClient.Builder builder = FlightClient.builder(allocator, defaultLocation); 41 | 42 | if (!clientOptions.getTrustedCertificates().isEmpty()) { 43 | builder.trustedCertificates(new ByteArrayInputStream(clientOptions.getTrustedCertificates().getBytes())); 44 | } 45 | 46 | String clientCertificate = clientOptions.getClientCertificate(); 47 | if (clientCertificate != null && !clientCertificate.isEmpty()) { 48 | InputStream clientCert = new ByteArrayInputStream(clientCertificate.getBytes()); 49 | InputStream clientKey = new ByteArrayInputStream(clientOptions.getClientKey().getBytes()); 50 | builder.clientCertificate(clientCert, clientKey); 51 | } 52 | 53 | // Add client middleware 54 | clientOptions.getMiddleware().stream().forEach(middleware -> builder.intercept(middleware)); 55 | 56 | FlightClient client = builder.build(); 57 | String username = clientOptions.getUsername(); 58 | if (username != null && !username.isEmpty()) { 59 | this.callOption = client.authenticateBasicToken(clientOptions.getUsername(), clientOptions.getPassword()).get(); 60 | } 61 | 62 | return client; 63 | } 64 | 65 | public CredentialCallOption getCallOption() { 66 | return this.callOption; 67 | } 68 | 69 | @Override 70 | public void close() { 71 | allocator.close(); 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /src/main/java/org/apache/arrow/flight/spark/FlightClientMiddlewareFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2019 The flight-spark-source Authors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.arrow.flight.spark; 18 | 19 | import java.io.Serializable; 20 | 21 | import org.apache.arrow.flight.FlightClientMiddleware; 22 | 23 | public interface FlightClientMiddlewareFactory extends FlightClientMiddleware.Factory, Serializable { 24 | 25 | } 26 | -------------------------------------------------------------------------------- /src/main/java/org/apache/arrow/flight/spark/FlightClientOptions.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2019 The flight-spark-source Authors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.arrow.flight.spark; 18 | 19 | import java.io.Serializable; 20 | import java.util.List; 21 | 22 | public class FlightClientOptions implements Serializable { 23 | private final String username; 24 | private final String password; 25 | private final String trustedCertificates; 26 | private final String clientCertificate; 27 | private final String clientKey; 28 | private final List middleware; 29 | 30 | public FlightClientOptions(String username, String password, String trustedCertificates, String clientCertificate, String clientKey, List middleware) { 31 | this.username = username; 32 | this.password = password; 33 | this.trustedCertificates = trustedCertificates; 34 | this.clientCertificate = clientCertificate; 35 | this.clientKey = clientKey; 36 | this.middleware = middleware; 37 | } 38 | 39 | public String getUsername() { 40 | return username; 41 | } 42 | 43 | public String getPassword() { 44 | return password; 45 | } 46 | 47 | public String getTrustedCertificates() { 48 | return trustedCertificates; 49 | } 50 | 51 | public String getClientCertificate() { 52 | return clientCertificate; 53 | } 54 | 55 | public String getClientKey() { 56 | return clientKey; 57 | } 58 | 59 | public List getMiddleware() { 60 | return middleware; 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /src/main/java/org/apache/arrow/flight/spark/FlightColumnarPartitionReader.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2019 The flight-spark-source Authors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.arrow.flight.spark; 18 | 19 | import java.io.IOException; 20 | 21 | import org.apache.arrow.flight.grpc.CredentialCallOption; 22 | import org.apache.spark.sql.connector.read.PartitionReader; 23 | import org.apache.spark.sql.vectorized.ColumnarBatch; 24 | import org.apache.arrow.flight.FlightClient; 25 | import org.apache.arrow.flight.FlightStream; 26 | import org.apache.arrow.util.AutoCloseables; 27 | import org.apache.spark.sql.vectorized.ColumnVector; 28 | 29 | public class FlightColumnarPartitionReader implements PartitionReader { 30 | private final FlightClientFactory clientFactory; 31 | private final FlightClient client; 32 | private final FlightStream stream; 33 | 34 | public FlightColumnarPartitionReader(FlightClientOptions clientOptions, FlightPartition partition) { 35 | // TODO - Should we handle multiple locations? 36 | clientFactory = new FlightClientFactory(partition.getEndpoint().get().getLocations().get(0), clientOptions); 37 | client = clientFactory.apply(); 38 | CredentialCallOption callOption = clientFactory.getCallOption(); 39 | stream = client.getStream(partition.getEndpoint().get().getTicket(), callOption); 40 | } 41 | 42 | // This is written this way because the Spark interface iterates in a different way. 43 | // E.g., .next() -> .get() vs. .hasNext() -> .next() 44 | @Override 45 | public boolean next() throws IOException { 46 | try { 47 | return stream.next(); 48 | } catch (RuntimeException e) { 49 | throw new IOException(e); 50 | } 51 | } 52 | 53 | @Override 54 | public ColumnarBatch get() { 55 | ColumnarBatch batch = new ColumnarBatch( 56 | stream.getRoot().getFieldVectors() 57 | .stream() 58 | .map(FlightArrowColumnVector::new) 59 | .toArray(ColumnVector[]::new) 60 | ); 61 | batch.setNumRows(stream.getRoot().getRowCount()); 62 | return batch; 63 | } 64 | 65 | @Override 66 | public void close() throws IOException { 67 | try { 68 | AutoCloseables.close(stream, client, clientFactory); 69 | } catch (Exception e) { 70 | throw new IOException(e); 71 | } 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /src/main/java/org/apache/arrow/flight/spark/FlightEndpointWrapper.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2019 The flight-spark-source Authors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.arrow.flight.spark; 18 | 19 | import java.io.IOException; 20 | import java.io.ObjectInputStream; 21 | import java.io.ObjectOutputStream; 22 | import java.io.Serializable; 23 | import java.net.URI; 24 | import java.util.ArrayList; 25 | import java.util.stream.Collectors; 26 | 27 | import org.apache.arrow.flight.FlightEndpoint; 28 | import org.apache.arrow.flight.Location; 29 | import org.apache.arrow.flight.Ticket; 30 | 31 | // This is needed for FlightEndpoint to be Serializable in spark. 32 | // org.apache.arrow.flight.FlightEndpoint is a POJO of Serializable types. 33 | // However if spark is using build-in serialization instead of Kyro then we must implement Serializable 34 | public class FlightEndpointWrapper implements Serializable { 35 | private FlightEndpoint inner; 36 | 37 | public FlightEndpointWrapper(FlightEndpoint inner) { 38 | this.inner = inner; 39 | } 40 | 41 | public FlightEndpoint get() { 42 | return inner; 43 | } 44 | 45 | private void writeObject(ObjectOutputStream out) throws IOException { 46 | ArrayList locations = inner.getLocations().stream().map(location -> location.getUri()).collect(Collectors.toCollection(ArrayList::new)); 47 | out.writeObject(locations); 48 | out.write(inner.getTicket().getBytes()); 49 | } 50 | 51 | private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException { 52 | @SuppressWarnings("unchecked") 53 | Location[] locations = ((ArrayList) in.readObject()).stream().map(l -> new Location(l)).toArray(Location[]::new); 54 | byte[] ticket = in.readAllBytes(); 55 | this.inner = new FlightEndpoint(new Ticket(ticket), locations); 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/main/java/org/apache/arrow/flight/spark/FlightPartition.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2019 The flight-spark-source Authors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.arrow.flight.spark; 18 | 19 | import org.apache.spark.sql.connector.read.InputPartition; 20 | 21 | public class FlightPartition implements InputPartition { 22 | private final FlightEndpointWrapper endpoint; 23 | 24 | public FlightPartition(FlightEndpointWrapper endpoint) { 25 | this.endpoint = endpoint; 26 | } 27 | 28 | @Override 29 | public String[] preferredLocations() { 30 | return endpoint.get().getLocations().stream().map(location -> location.getUri().getHost()).toArray(String[]::new); 31 | } 32 | 33 | public FlightEndpointWrapper getEndpoint() { 34 | return endpoint; 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/main/java/org/apache/arrow/flight/spark/FlightPartitionReader.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2019 The flight-spark-source Authors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.arrow.flight.spark; 18 | 19 | import java.io.IOException; 20 | import java.util.Iterator; 21 | import java.util.Optional; 22 | 23 | import org.apache.arrow.flight.FlightClient; 24 | import org.apache.arrow.flight.FlightStream; 25 | import org.apache.arrow.flight.grpc.CredentialCallOption; 26 | import org.apache.arrow.util.AutoCloseables; 27 | import org.apache.spark.sql.catalyst.InternalRow; 28 | import org.apache.spark.sql.connector.read.PartitionReader; 29 | import org.apache.spark.sql.vectorized.ColumnVector; 30 | import org.apache.spark.sql.vectorized.ColumnarBatch; 31 | 32 | public class FlightPartitionReader implements PartitionReader { 33 | private final FlightClientFactory clientFactory;; 34 | private final FlightClient client; 35 | private final CredentialCallOption callOption; 36 | private final FlightStream stream; 37 | private Optional> batch; 38 | private InternalRow row; 39 | 40 | public FlightPartitionReader(FlightClientOptions clientOptions, FlightPartition partition) { 41 | // TODO - Should we handle multiple locations? 42 | clientFactory = new FlightClientFactory(partition.getEndpoint().get().getLocations().get(0), clientOptions); 43 | client = clientFactory.apply(); 44 | callOption = clientFactory.getCallOption(); 45 | stream = client.getStream(partition.getEndpoint().get().getTicket(), callOption); 46 | } 47 | 48 | private Iterator getNextBatch() { 49 | ColumnarBatch batch = new ColumnarBatch( 50 | stream.getRoot().getFieldVectors() 51 | .stream() 52 | .map(FlightArrowColumnVector::new) 53 | .toArray(ColumnVector[]::new) 54 | ); 55 | batch.setNumRows(stream.getRoot().getRowCount()); 56 | return batch.rowIterator(); 57 | } 58 | 59 | // This is written this way because the Spark interface iterates in a different way. 60 | // E.g., .next() -> .get() vs. .hasNext() -> .next() 61 | @Override 62 | public boolean next() throws IOException { 63 | try { 64 | // Try the iterator first then get next batch 65 | // Not quite rust match expressions... 66 | return batch.map(currentBatch -> { 67 | // Are there still rows in this batch? 68 | if (currentBatch.hasNext()) { 69 | row = currentBatch.next(); 70 | return true; 71 | // No more rows, get the next batch 72 | } else { 73 | // Is there another batch? 74 | if (stream.next()) { 75 | // Yes, then fetch it. 76 | Iterator nextBatch = getNextBatch(); 77 | batch = Optional.of(nextBatch); 78 | if (currentBatch.hasNext()) { 79 | row = currentBatch.next(); 80 | return true; 81 | // Odd, we got an empty batch 82 | } else { 83 | return false; 84 | } 85 | // This partition / stream is complete 86 | } else { 87 | return false; 88 | } 89 | } 90 | // Fetch the first batch 91 | }).orElseGet(() -> { 92 | // Is the stream empty? 93 | if (stream.next()) { 94 | // No, then fetch the first batch 95 | Iterator firstBatch = getNextBatch(); 96 | batch = Optional.of(firstBatch); 97 | if (firstBatch.hasNext()) { 98 | row = firstBatch.next(); 99 | return true; 100 | // Odd, we got an empty batch 101 | } else { 102 | return false; 103 | } 104 | // The stream was empty... 105 | } else { 106 | return false; 107 | } 108 | }); 109 | } catch (RuntimeException e) { 110 | throw new IOException(e); 111 | } 112 | } 113 | 114 | @Override 115 | public InternalRow get() { 116 | return row; 117 | } 118 | 119 | @Override 120 | public void close() throws IOException { 121 | try { 122 | AutoCloseables.close(stream, client, clientFactory); 123 | } catch (Exception e) { 124 | throw new IOException(e); 125 | } 126 | } 127 | } 128 | -------------------------------------------------------------------------------- /src/main/java/org/apache/arrow/flight/spark/FlightPartitionReaderFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2019 The flight-spark-source Authors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.arrow.flight.spark; 18 | 19 | import org.apache.spark.broadcast.Broadcast; 20 | import org.apache.spark.sql.catalyst.InternalRow; 21 | import org.apache.spark.sql.connector.read.InputPartition; 22 | import org.apache.spark.sql.connector.read.PartitionReader; 23 | import org.apache.spark.sql.connector.read.PartitionReaderFactory; 24 | import org.apache.spark.sql.vectorized.ColumnarBatch; 25 | 26 | public class FlightPartitionReaderFactory implements PartitionReaderFactory { 27 | private final Broadcast clientOptions; 28 | 29 | public FlightPartitionReaderFactory(Broadcast clientOptions) { 30 | this.clientOptions = clientOptions; 31 | } 32 | 33 | @Override 34 | public PartitionReader createReader(InputPartition iPartition) { 35 | // This feels wrong but this is what upstream spark sources do to. 36 | FlightPartition partition = (FlightPartition) iPartition; 37 | return new FlightPartitionReader(clientOptions.getValue(), partition); 38 | } 39 | 40 | @Override 41 | public PartitionReader createColumnarReader(InputPartition iPartition) { 42 | // This feels wrong but this is what upstream spark sources do to. 43 | FlightPartition partition = (FlightPartition) iPartition; 44 | return new FlightColumnarPartitionReader(clientOptions.getValue(), partition); 45 | } 46 | 47 | @Override 48 | public boolean supportColumnarReads(InputPartition partition) { 49 | return true; 50 | } 51 | 52 | } 53 | -------------------------------------------------------------------------------- /src/main/java/org/apache/arrow/flight/spark/FlightScan.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2019 The flight-spark-source Authors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.arrow.flight.spark; 18 | 19 | import org.apache.spark.sql.connector.read.Scan; 20 | 21 | import org.apache.arrow.flight.FlightInfo; 22 | import org.apache.spark.broadcast.Broadcast; 23 | import org.apache.spark.sql.connector.read.Batch; 24 | import org.apache.spark.sql.connector.read.InputPartition; 25 | import org.apache.spark.sql.connector.read.PartitionReaderFactory; 26 | import org.apache.spark.sql.types.StructType; 27 | 28 | public class FlightScan implements Scan, Batch { 29 | private final StructType schema; 30 | private final FlightInfo info; 31 | private final Broadcast clientOptions; 32 | 33 | public FlightScan(StructType schema, FlightInfo info, Broadcast clientOptions) { 34 | this.schema = schema; 35 | this.info = info; 36 | this.clientOptions = clientOptions; 37 | } 38 | 39 | @Override 40 | public StructType readSchema() { 41 | return schema; 42 | } 43 | 44 | @Override 45 | public Batch toBatch() { 46 | return this; 47 | } 48 | 49 | @Override 50 | public InputPartition[] planInputPartitions() { 51 | InputPartition[] batches = info.getEndpoints().stream().map(endpoint -> { 52 | FlightEndpointWrapper endpointWrapper = new FlightEndpointWrapper(endpoint); 53 | return new FlightPartition(endpointWrapper); 54 | }).toArray(InputPartition[]::new); 55 | return batches; 56 | } 57 | 58 | @Override 59 | public PartitionReaderFactory createReaderFactory() { 60 | return new FlightPartitionReaderFactory(clientOptions); 61 | } 62 | 63 | } 64 | -------------------------------------------------------------------------------- /src/main/java/org/apache/arrow/flight/spark/FlightScanBuilder.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2019 The flight-spark-source Authors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.arrow.flight.spark; 18 | 19 | import java.util.List; 20 | import java.util.Map; 21 | import java.util.stream.Collectors; 22 | 23 | import org.apache.arrow.flight.FlightClient; 24 | import org.apache.arrow.flight.FlightDescriptor; 25 | import org.apache.arrow.flight.FlightInfo; 26 | import org.apache.arrow.flight.Location; 27 | import org.apache.arrow.flight.SchemaResult; 28 | import org.apache.arrow.flight.grpc.CredentialCallOption; 29 | import org.apache.arrow.util.AutoCloseables; 30 | import org.apache.arrow.vector.types.FloatingPointPrecision; 31 | import org.apache.arrow.vector.types.pojo.ArrowType; 32 | import org.apache.arrow.vector.types.pojo.FieldType; 33 | import org.apache.spark.broadcast.Broadcast; 34 | import org.apache.spark.sql.connector.read.Scan; 35 | import org.apache.spark.sql.connector.read.ScanBuilder; 36 | import org.apache.spark.sql.connector.read.SupportsPushDownFilters; 37 | import org.apache.spark.sql.connector.read.SupportsPushDownRequiredColumns; 38 | import org.apache.spark.sql.sources.*; 39 | import org.apache.spark.sql.types.*; 40 | import org.slf4j.Logger; 41 | import org.slf4j.LoggerFactory; 42 | 43 | import scala.collection.JavaConversions; 44 | 45 | import com.google.common.collect.Lists; 46 | import com.google.common.base.Joiner; 47 | 48 | public class FlightScanBuilder implements ScanBuilder, SupportsPushDownRequiredColumns, SupportsPushDownFilters { 49 | private static final Logger LOGGER = LoggerFactory.getLogger(FlightScanBuilder.class); 50 | private static final Joiner WHERE_JOINER = Joiner.on(" and "); 51 | private static final Joiner PROJ_JOINER = Joiner.on(", "); 52 | private SchemaResult flightSchema; 53 | private StructType schema; 54 | private final Location location; 55 | private final Broadcast clientOptions; 56 | private FlightDescriptor descriptor; 57 | private String sql; 58 | private Filter[] pushed; 59 | 60 | public FlightScanBuilder(Location location, Broadcast clientOptions, String sql) { 61 | this.location = location; 62 | this.clientOptions = clientOptions; 63 | this.sql = sql; 64 | descriptor = getDescriptor(sql); 65 | } 66 | 67 | private class Client implements AutoCloseable { 68 | private final FlightClientFactory clientFactory; 69 | private final FlightClient client; 70 | private final CredentialCallOption callOption; 71 | 72 | public Client(Location location, FlightClientOptions clientOptions) { 73 | this.clientFactory = new FlightClientFactory(location, clientOptions); 74 | this.client = clientFactory.apply(); 75 | this.callOption = clientFactory.getCallOption(); 76 | } 77 | 78 | public FlightClient get() { 79 | return client; 80 | } 81 | 82 | public CredentialCallOption getCallOption() { 83 | return this.callOption; 84 | } 85 | 86 | @Override 87 | public void close() throws Exception { 88 | AutoCloseables.close(client, clientFactory); 89 | } 90 | } 91 | 92 | private void getFlightSchema(FlightDescriptor descriptor) { 93 | try (Client client = new Client(location, clientOptions.getValue())) { 94 | LOGGER.info("getSchema() descriptor: %s", descriptor); 95 | flightSchema = client.get().getSchema(descriptor, client.getCallOption()); 96 | } catch (Exception e) { 97 | throw new RuntimeException(e); 98 | } 99 | } 100 | 101 | @Override 102 | public Scan build() { 103 | try (Client client = new Client(location, clientOptions.getValue())) { 104 | FlightDescriptor descriptor = FlightDescriptor.command(sql.getBytes()); 105 | LOGGER.info("getInfo() descriptor: %s", descriptor); 106 | FlightInfo info = client.get().getInfo(descriptor, client.getCallOption()); 107 | return new FlightScan(readSchema(), info, clientOptions); 108 | } catch (Exception e) { 109 | throw new RuntimeException(e); 110 | } 111 | } 112 | 113 | private boolean canBePushed(Filter filter) { 114 | if (filter instanceof IsNotNull) { 115 | return true; 116 | } else if (filter instanceof EqualTo) { 117 | return true; 118 | } 119 | if (filter instanceof GreaterThan) { 120 | return true; 121 | } 122 | if (filter instanceof GreaterThanOrEqual) { 123 | return true; 124 | } 125 | if (filter instanceof LessThan) { 126 | return true; 127 | } 128 | if (filter instanceof LessThanOrEqual) { 129 | return true; 130 | } 131 | LOGGER.error("Cant push filter of type " + filter.toString()); 132 | return false; 133 | } 134 | 135 | private String valueToString(Object value) { 136 | if (value instanceof String) { 137 | return String.format("'%s'", value); 138 | } 139 | return value.toString(); 140 | } 141 | 142 | private String generateWhereClause(List pushed) { 143 | List filterStr = Lists.newArrayList(); 144 | for (Filter filter : pushed) { 145 | if (filter instanceof IsNotNull) { 146 | filterStr.add(String.format("isnotnull(\"%s\")", ((IsNotNull) filter).attribute())); 147 | } else if (filter instanceof EqualTo) { 148 | filterStr.add(String.format("\"%s\" = %s", ((EqualTo) filter).attribute(), valueToString(((EqualTo) filter).value()))); 149 | } else if (filter instanceof GreaterThan) { 150 | filterStr.add(String.format("\"%s\" > %s", ((GreaterThan) filter).attribute(), valueToString(((GreaterThan) filter).value()))); 151 | } else if (filter instanceof GreaterThanOrEqual) { 152 | filterStr.add(String.format("\"%s\" <= %s", ((GreaterThanOrEqual) filter).attribute(), valueToString(((GreaterThanOrEqual) filter).value()))); 153 | } else if (filter instanceof LessThan) { 154 | filterStr.add(String.format("\"%s\" < %s", ((LessThan) filter).attribute(), valueToString(((LessThan) filter).value()))); 155 | } else if (filter instanceof LessThanOrEqual) { 156 | filterStr.add(String.format("\"%s\" <= %s", ((LessThanOrEqual) filter).attribute(), valueToString(((LessThanOrEqual) filter).value()))); 157 | } 158 | //todo fill out rest of Filter types 159 | } 160 | return WHERE_JOINER.join(filterStr); 161 | } 162 | 163 | private FlightDescriptor getDescriptor(String sql) { 164 | return FlightDescriptor.command(sql.getBytes()); 165 | } 166 | 167 | private void mergeWhereDescriptors(String whereClause) { 168 | sql = String.format("select * from (%s) as where_merge where %s", sql, whereClause); 169 | descriptor = getDescriptor(sql); 170 | } 171 | 172 | @Override 173 | public Filter[] pushFilters(Filter[] filters) { 174 | List notPushed = Lists.newArrayList(); 175 | List pushed = Lists.newArrayList(); 176 | for (Filter filter : filters) { 177 | boolean isPushed = canBePushed(filter); 178 | if (isPushed) { 179 | pushed.add(filter); 180 | } else { 181 | notPushed.add(filter); 182 | } 183 | } 184 | this.pushed = pushed.toArray(new Filter[0]); 185 | if (!pushed.isEmpty()) { 186 | String whereClause = generateWhereClause(pushed); 187 | mergeWhereDescriptors(whereClause); 188 | getFlightSchema(descriptor); 189 | } 190 | return notPushed.toArray(new Filter[0]); 191 | } 192 | 193 | @Override 194 | public Filter[] pushedFilters() { 195 | return pushed; 196 | } 197 | 198 | private DataType sparkFromArrow(FieldType fieldType) { 199 | switch (fieldType.getType().getTypeID()) { 200 | case Null: 201 | return DataTypes.NullType; 202 | case Struct: 203 | throw new UnsupportedOperationException("have not implemented Struct type yet"); 204 | case List: 205 | throw new UnsupportedOperationException("have not implemented List type yet"); 206 | case FixedSizeList: 207 | throw new UnsupportedOperationException("have not implemented FixedSizeList type yet"); 208 | case Union: 209 | throw new UnsupportedOperationException("have not implemented Union type yet"); 210 | case Int: 211 | ArrowType.Int intType = (ArrowType.Int) fieldType.getType(); 212 | int bitWidth = intType.getBitWidth(); 213 | if (bitWidth == 8) { 214 | return DataTypes.ByteType; 215 | } else if (bitWidth == 16) { 216 | return DataTypes.ShortType; 217 | } else if (bitWidth == 32) { 218 | return DataTypes.IntegerType; 219 | } else if (bitWidth == 64) { 220 | return DataTypes.LongType; 221 | } 222 | throw new UnsupportedOperationException("unknown int type with bitwidth " + bitWidth); 223 | case FloatingPoint: 224 | ArrowType.FloatingPoint floatType = (ArrowType.FloatingPoint) fieldType.getType(); 225 | FloatingPointPrecision precision = floatType.getPrecision(); 226 | switch (precision) { 227 | case HALF: 228 | case SINGLE: 229 | return DataTypes.FloatType; 230 | case DOUBLE: 231 | return DataTypes.DoubleType; 232 | } 233 | case Utf8: 234 | return DataTypes.StringType; 235 | case Binary: 236 | case FixedSizeBinary: 237 | return DataTypes.BinaryType; 238 | case Bool: 239 | return DataTypes.BooleanType; 240 | case Decimal: 241 | throw new UnsupportedOperationException("have not implemented Decimal type yet"); 242 | case Date: 243 | return DataTypes.DateType; 244 | case Time: 245 | return DataTypes.TimestampType; // note i don't know what this will do! 246 | case Timestamp: 247 | return DataTypes.TimestampType; 248 | case Interval: 249 | return DataTypes.CalendarIntervalType; 250 | case NONE: 251 | return DataTypes.NullType; 252 | default: 253 | throw new IllegalStateException("Unexpected value: " + fieldType); 254 | } 255 | } 256 | 257 | private StructType readSchemaImpl() { 258 | if (flightSchema == null) { 259 | getFlightSchema(descriptor); 260 | } 261 | StructField[] fields = flightSchema.getSchema().getFields().stream() 262 | .map(field -> new StructField(field.getName(), 263 | sparkFromArrow(field.getFieldType()), 264 | field.isNullable(), 265 | Metadata.empty())) 266 | .toArray(StructField[]::new); 267 | return new StructType(fields); 268 | } 269 | 270 | public StructType readSchema() { 271 | if (schema == null) { 272 | schema = readSchemaImpl(); 273 | } 274 | return schema; 275 | } 276 | 277 | private void mergeProjDescriptors(String projClause) { 278 | sql = String.format("select %s from (%s) as proj_merge", projClause, sql); 279 | descriptor = getDescriptor(sql); 280 | } 281 | 282 | @Override 283 | public void pruneColumns(StructType requiredSchema) { 284 | if (requiredSchema.toSeq().isEmpty()) { 285 | return; 286 | } 287 | StructType schema = readSchema(); 288 | List fields = Lists.newArrayList(); 289 | List fieldsLeft = Lists.newArrayList(); 290 | Map fieldNames = JavaConversions.seqAsJavaList(schema.toSeq()).stream() 291 | .collect(Collectors.toMap(StructField::name, f -> f)); 292 | for (StructField field : JavaConversions.seqAsJavaList(requiredSchema.toSeq())) { 293 | String name = field.name(); 294 | StructField f = fieldNames.remove(name); 295 | if (f != null) { 296 | fields.add(String.format("\"%s\"", name)); 297 | fieldsLeft.add(f); 298 | } 299 | } 300 | if (!fieldNames.isEmpty()) { 301 | this.schema = new StructType(fieldsLeft.toArray(new StructField[0])); 302 | mergeProjDescriptors(PROJ_JOINER.join(fields)); 303 | getFlightSchema(descriptor); 304 | } 305 | } 306 | } 307 | -------------------------------------------------------------------------------- /src/main/java/org/apache/arrow/flight/spark/FlightSparkContext.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2019 The flight-spark-source Authors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package org.apache.arrow.flight.spark; 17 | 18 | import org.apache.spark.SparkConf; 19 | import org.apache.spark.sql.DataFrameReader; 20 | import org.apache.spark.sql.Dataset; 21 | import org.apache.spark.sql.Row; 22 | import org.apache.spark.sql.SparkSession; 23 | 24 | public class FlightSparkContext { 25 | 26 | private SparkConf conf; 27 | 28 | private final DataFrameReader reader; 29 | 30 | public FlightSparkContext(SparkSession spark) { 31 | this.conf = spark.sparkContext().getConf(); 32 | reader = spark.read().format("org.apache.arrow.flight.spark"); 33 | } 34 | 35 | public Dataset read(String s) { 36 | return reader.option("port", Integer.parseInt(conf.get("spark.flight.endpoint.port"))) 37 | .option("uri", String.format( 38 | "grpc://%s:%s", 39 | conf.get("spark.flight.endpoint.host"), 40 | conf.get("spark.flight.endpoint.port"))) 41 | .option("username", conf.get("spark.flight.auth.username")) 42 | .option("password", conf.get("spark.flight.auth.password")) 43 | .load(s); 44 | } 45 | 46 | public Dataset readSql(String s) { 47 | return reader.option("port", Integer.parseInt(conf.get("spark.flight.endpoint.port"))) 48 | .option("uri", String.format( 49 | "grpc://%s:%s", 50 | conf.get("spark.flight.endpoint.host"), 51 | conf.get("spark.flight.endpoint.port"))) 52 | .option("username", conf.get("spark.flight.auth.username")) 53 | .option("password", conf.get("spark.flight.auth.password")) 54 | .load(s); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/main/java/org/apache/arrow/flight/spark/FlightTable.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2019 The flight-spark-source Authors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.arrow.flight.spark; 18 | 19 | import java.util.Set; 20 | 21 | import org.apache.arrow.flight.Location; 22 | import org.apache.spark.broadcast.Broadcast; 23 | import org.apache.spark.sql.connector.catalog.SupportsRead; 24 | import org.apache.spark.sql.connector.catalog.Table; 25 | import org.apache.spark.sql.connector.catalog.TableCapability; 26 | import org.apache.spark.sql.connector.read.ScanBuilder; 27 | import org.apache.spark.sql.types.StructType; 28 | import org.apache.spark.sql.util.CaseInsensitiveStringMap; 29 | 30 | public class FlightTable implements Table, SupportsRead { 31 | private static final Set CAPABILITIES = Set.of(TableCapability.BATCH_READ); 32 | private final String name; 33 | private final Location location; 34 | private final String sql; 35 | private final Broadcast clientOptions; 36 | private StructType schema; 37 | 38 | public FlightTable(String name, Location location, String sql, Broadcast clientOptions) { 39 | this.name = name; 40 | this.location = location; 41 | this.sql = sql; 42 | this.clientOptions = clientOptions; 43 | } 44 | 45 | @Override 46 | public String name() { 47 | return name; 48 | } 49 | 50 | @Override 51 | public StructType schema() { 52 | if (schema == null) { 53 | FlightScanBuilder scanBuilder = new FlightScanBuilder(location, clientOptions, sql); 54 | schema = scanBuilder.readSchema(); 55 | } 56 | return schema; 57 | } 58 | 59 | // TODO - We could probably implement partitioning() but it would require server side support 60 | 61 | @Override 62 | public Set capabilities() { 63 | // We only support reading for now 64 | return CAPABILITIES; 65 | } 66 | 67 | @Override 68 | public ScanBuilder newScanBuilder(CaseInsensitiveStringMap options) { 69 | return new FlightScanBuilder(location, clientOptions, sql); 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /src/main/java/org/apache/arrow/flight/spark/TokenClientMiddleware.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2019 The flight-spark-source Authors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.arrow.flight.spark; 18 | 19 | import org.apache.arrow.flight.CallHeaders; 20 | import org.apache.arrow.flight.CallStatus; 21 | import org.apache.arrow.flight.FlightClientMiddleware; 22 | 23 | public class TokenClientMiddleware implements FlightClientMiddleware { 24 | private final String token; 25 | 26 | public TokenClientMiddleware(String token) { 27 | this.token = token; 28 | } 29 | 30 | @Override 31 | public void onBeforeSendingHeaders(CallHeaders outgoingHeaders) { 32 | outgoingHeaders.insert("authorization", String.format("Bearer %s", token)); 33 | } 34 | 35 | @Override 36 | public void onHeadersReceived(CallHeaders incomingHeaders) { 37 | // Nothing needed here 38 | } 39 | 40 | @Override 41 | public void onCallCompleted(CallStatus status) { 42 | // Nothing needed here 43 | } 44 | 45 | } 46 | -------------------------------------------------------------------------------- /src/main/java/org/apache/arrow/flight/spark/TokenClientMiddlewareFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2019 The flight-spark-source Authors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.arrow.flight.spark; 18 | 19 | import org.apache.arrow.flight.CallInfo; 20 | import org.apache.arrow.flight.FlightClientMiddleware; 21 | 22 | public class TokenClientMiddlewareFactory implements FlightClientMiddlewareFactory { 23 | private final String token; 24 | 25 | public TokenClientMiddlewareFactory(String token) { 26 | this.token = token; 27 | } 28 | 29 | @Override 30 | public FlightClientMiddleware onCallStarted(CallInfo info) { 31 | return new TokenClientMiddleware(token); 32 | } 33 | 34 | } 35 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/sql/execution/arrow/FlightArrowUtils.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2019 The flight-spark-source Authors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.apache.spark.sql.execution.arrow 18 | 19 | import org.apache.arrow.memory.RootAllocator 20 | import org.apache.arrow.vector.types.pojo.{ArrowType, Field, FieldType, Schema} 21 | import org.apache.arrow.vector.types.{DateUnit, FloatingPointPrecision, TimeUnit} 22 | import org.apache.spark.sql.internal.SQLConf 23 | import org.apache.spark.sql.types._ 24 | import scala.collection.JavaConverters._ 25 | 26 | /** 27 | * FlightArrowUtils is a copy of ArrowUtils with extra support for DateMilli and TimestampMilli 28 | */ 29 | object FlightArrowUtils { 30 | 31 | val rootAllocator = new RootAllocator(Long.MaxValue) 32 | 33 | // todo: support more types. 34 | 35 | /** Maps data type from Spark to Arrow. NOTE: timeZoneId required for TimestampTypes */ 36 | def toArrowType(dt: DataType, timeZoneId: String): ArrowType = dt match { 37 | case BooleanType => ArrowType.Bool.INSTANCE 38 | case ByteType => new ArrowType.Int(8, true) 39 | case ShortType => new ArrowType.Int(8 * 2, true) 40 | case IntegerType => new ArrowType.Int(8 * 4, true) 41 | case LongType => new ArrowType.Int(8 * 8, true) 42 | case FloatType => new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE) 43 | case DoubleType => new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE) 44 | case StringType => ArrowType.Utf8.INSTANCE 45 | case BinaryType => ArrowType.Binary.INSTANCE 46 | case DecimalType.Fixed(precision, scale) => new ArrowType.Decimal(precision, scale) 47 | case DateType => new ArrowType.Date(DateUnit.DAY) 48 | case TimestampType => 49 | if (timeZoneId == null) { 50 | throw new UnsupportedOperationException( 51 | s"${TimestampType.catalogString} must supply timeZoneId parameter") 52 | } else { 53 | new ArrowType.Timestamp(TimeUnit.MICROSECOND, timeZoneId) 54 | } 55 | case _ => 56 | throw new UnsupportedOperationException(s"Unsupported data type: ${dt.catalogString}") 57 | } 58 | 59 | def fromArrowType(dt: ArrowType): DataType = dt match { 60 | case ArrowType.Bool.INSTANCE => BooleanType 61 | case int: ArrowType.Int if int.getIsSigned && int.getBitWidth == 8 => ByteType 62 | case int: ArrowType.Int if int.getIsSigned && int.getBitWidth == 8 * 2 => ShortType 63 | case int: ArrowType.Int if int.getIsSigned && int.getBitWidth == 8 * 4 => IntegerType 64 | case int: ArrowType.Int if int.getIsSigned && int.getBitWidth == 8 * 8 => LongType 65 | case float: ArrowType.FloatingPoint 66 | if float.getPrecision() == FloatingPointPrecision.SINGLE => FloatType 67 | case float: ArrowType.FloatingPoint 68 | if float.getPrecision() == FloatingPointPrecision.DOUBLE => DoubleType 69 | case ArrowType.Utf8.INSTANCE => StringType 70 | case ArrowType.Binary.INSTANCE => BinaryType 71 | case d: ArrowType.Decimal => DecimalType(d.getPrecision, d.getScale) 72 | case date: ArrowType.Date if date.getUnit == DateUnit.DAY || date.getUnit == DateUnit.MILLISECOND => DateType 73 | case ts: ArrowType.Timestamp if ts.getUnit == TimeUnit.MICROSECOND || ts.getUnit == TimeUnit.MILLISECOND => TimestampType 74 | case _ => throw new UnsupportedOperationException(s"Unsupported data type: $dt") 75 | } 76 | 77 | /** Maps field from Spark to Arrow. NOTE: timeZoneId required for TimestampType */ 78 | def toArrowField( 79 | name: String, dt: DataType, nullable: Boolean, timeZoneId: String): Field = { 80 | dt match { 81 | case ArrayType(elementType, containsNull) => 82 | val fieldType = new FieldType(nullable, ArrowType.List.INSTANCE, null) 83 | new Field(name, fieldType, 84 | Seq(toArrowField("element", elementType, containsNull, timeZoneId)).asJava) 85 | case StructType(fields) => 86 | val fieldType = new FieldType(nullable, ArrowType.Struct.INSTANCE, null) 87 | new Field(name, fieldType, 88 | fields.map { field => 89 | toArrowField(field.name, field.dataType, field.nullable, timeZoneId) 90 | }.toSeq.asJava) 91 | case dataType => 92 | val fieldType = new FieldType(nullable, toArrowType(dataType, timeZoneId), null) 93 | new Field(name, fieldType, Seq.empty[Field].asJava) 94 | } 95 | } 96 | 97 | def fromArrowField(field: Field): DataType = { 98 | field.getType match { 99 | case ArrowType.List.INSTANCE => 100 | val elementField = field.getChildren().get(0) 101 | val elementType = fromArrowField(elementField) 102 | ArrayType(elementType, containsNull = elementField.isNullable) 103 | case ArrowType.Struct.INSTANCE => 104 | val fields = field.getChildren().asScala.map { child => 105 | val dt = fromArrowField(child) 106 | StructField(child.getName, dt, child.isNullable) 107 | } 108 | StructType(fields) 109 | case arrowType => fromArrowType(arrowType) 110 | } 111 | } 112 | 113 | /** Maps schema from Spark to Arrow. NOTE: timeZoneId required for TimestampType in StructType */ 114 | def toArrowSchema(schema: StructType, timeZoneId: String): Schema = { 115 | new Schema(schema.map { field => 116 | toArrowField(field.name, field.dataType, field.nullable, timeZoneId) 117 | }.asJava) 118 | } 119 | 120 | def fromArrowSchema(schema: Schema): StructType = { 121 | StructType(schema.getFields.asScala.map { field => 122 | val dt = fromArrowField(field) 123 | StructField(field.getName, dt, field.isNullable) 124 | }) 125 | } 126 | 127 | /** Return Map with conf settings to be used in ArrowPythonRunner */ 128 | def getPythonRunnerConfMap(conf: SQLConf): Map[String, String] = { 129 | val timeZoneConf = Seq(SQLConf.SESSION_LOCAL_TIMEZONE.key -> 130 | conf.sessionLocalTimeZone) 131 | val pandasColsByName = Seq(SQLConf.PANDAS_GROUPED_MAP_ASSIGN_COLUMNS_BY_NAME.key -> 132 | conf.pandasGroupedMapAssignColumnsByName.toString) 133 | Map(timeZoneConf ++ pandasColsByName: _*) 134 | } 135 | } 136 | -------------------------------------------------------------------------------- /src/test/java/org/apache/arrow/flight/spark/TestConnector.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2019 The flight-spark-source Authors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package org.apache.arrow.flight.spark; 17 | 18 | import java.io.ByteArrayOutputStream; 19 | import java.io.IOException; 20 | import java.io.ObjectOutputStream; 21 | import java.util.ArrayList; 22 | import java.util.Iterator; 23 | import java.util.List; 24 | import java.util.Optional; 25 | import java.util.function.Consumer; 26 | 27 | import org.apache.arrow.flight.Action; 28 | import org.apache.arrow.flight.FlightDescriptor; 29 | import org.apache.arrow.flight.FlightEndpoint; 30 | import org.apache.arrow.flight.FlightInfo; 31 | import org.apache.arrow.flight.FlightServer; 32 | import org.apache.arrow.flight.FlightTestUtil; 33 | import org.apache.arrow.flight.Location; 34 | import org.apache.arrow.flight.NoOpFlightProducer; 35 | import org.apache.arrow.flight.Result; 36 | import org.apache.arrow.flight.Ticket; 37 | import org.apache.arrow.flight.auth.ServerAuthHandler; 38 | import org.apache.arrow.flight.auth2.CallHeaderAuthenticator; 39 | import org.apache.arrow.flight.auth2.BasicCallHeaderAuthenticator; 40 | import org.apache.arrow.flight.auth2.GeneratedBearerTokenAuthenticator; 41 | import org.apache.arrow.memory.BufferAllocator; 42 | import org.apache.arrow.memory.RootAllocator; 43 | import org.apache.arrow.util.AutoCloseables; 44 | import org.apache.arrow.vector.BigIntVector; 45 | import org.apache.arrow.vector.Float8Vector; 46 | import org.apache.arrow.vector.VarCharVector; 47 | import org.apache.arrow.vector.VectorSchemaRoot; 48 | import org.apache.arrow.vector.types.Types; 49 | import org.apache.arrow.vector.types.pojo.Field; 50 | import org.apache.arrow.vector.types.pojo.Schema; 51 | import org.apache.arrow.vector.util.Text; 52 | import org.apache.spark.api.java.JavaSparkContext; 53 | import org.apache.spark.sql.Dataset; 54 | import org.apache.spark.sql.Row; 55 | import org.apache.spark.sql.SparkSession; 56 | import org.junit.AfterClass; 57 | import org.junit.Assert; 58 | import org.junit.BeforeClass; 59 | import org.junit.Test; 60 | import org.junit.Test.None; 61 | import org.apache.arrow.flight.CallStatus; 62 | import com.google.common.collect.ImmutableList; 63 | import com.google.common.base.Strings; 64 | 65 | public class TestConnector { 66 | private static final String USERNAME_1 = "flight1"; 67 | private static final String USERNAME_2 = "flight2"; 68 | private static final String NO_USERNAME = ""; 69 | private static final String PASSWORD_1 = "woohoo1"; 70 | private static final String PASSWORD_2 = "woohoo2"; 71 | 72 | private static final BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE); 73 | private static Location location; 74 | private static FlightServer server; 75 | private static SparkSession spark; 76 | private static FlightSparkContext csc; 77 | 78 | public static CallHeaderAuthenticator.AuthResult validate(String username, String password) { 79 | if (Strings.isNullOrEmpty(username)) { 80 | throw CallStatus.UNAUTHENTICATED.withDescription("Credentials not supplied.").toRuntimeException(); 81 | } 82 | final String identity; 83 | if (USERNAME_1.equals(username) && PASSWORD_1.equals(password)) { 84 | identity = USERNAME_1; 85 | } else if (USERNAME_2.equals(username) && PASSWORD_2.equals(password)) { 86 | identity = USERNAME_2; 87 | } else { 88 | throw CallStatus.UNAUTHENTICATED.withDescription("Username or password is invalid.").toRuntimeException(); 89 | } 90 | return () -> identity; 91 | } 92 | 93 | @BeforeClass 94 | public static void setUp() throws Exception { 95 | FlightServer.Builder builder = FlightServer.builder(allocator, 96 | Location.forGrpcInsecure(FlightTestUtil.LOCALHOST, /*port*/ 0), 97 | new TestProducer()); 98 | builder.headerAuthenticator( 99 | new GeneratedBearerTokenAuthenticator( 100 | new BasicCallHeaderAuthenticator(TestConnector::validate) 101 | ) 102 | ); 103 | server = builder.build(); 104 | server.start(); 105 | location = server.getLocation(); 106 | spark = SparkSession.builder() 107 | .appName("flightTest") 108 | .master("local[*]") 109 | .config("spark.driver.host", "127.0.0.1") 110 | .config("spark.driver.allowMultipleContexts", "true") 111 | .config("spark.flight.endpoint.host", location.getUri().getHost()) 112 | .config("spark.flight.endpoint.port", Integer.toString(location.getUri().getPort())) 113 | .config("spark.flight.auth.username", USERNAME_1) 114 | .config("spark.flight.auth.password", PASSWORD_1) 115 | .getOrCreate(); 116 | csc = new FlightSparkContext(spark); 117 | } 118 | 119 | @AfterClass 120 | public static void tearDown() throws Exception { 121 | AutoCloseables.close(server, allocator, spark); 122 | } 123 | 124 | private class DummyObjectOutputStream extends ObjectOutputStream { 125 | public DummyObjectOutputStream() throws IOException { 126 | super(new ByteArrayOutputStream()); 127 | } 128 | } 129 | 130 | @Test(expected = None.class) 131 | public void testFlightPartitionReaderFactorySerialization() throws IOException { 132 | List middleware = new ArrayList<>(); 133 | FlightClientOptions clientOptions = new FlightClientOptions("xxx", "yyy", "FooBar", "FooBar", "FooBar", middleware); 134 | FlightPartitionReaderFactory readerFactory = new FlightPartitionReaderFactory(JavaSparkContext.fromSparkContext(spark.sparkContext()).broadcast(clientOptions)); 135 | 136 | try (ObjectOutputStream oos = new DummyObjectOutputStream()) { 137 | oos.writeObject(readerFactory); 138 | } 139 | } 140 | 141 | @Test(expected = None.class) 142 | public void testFlightPartitionSerialization() throws IOException { 143 | Ticket ticket = new Ticket("FooBar".getBytes()); 144 | FlightEndpoint endpoint = new FlightEndpoint(ticket, location); 145 | FlightPartition partition = new FlightPartition(new FlightEndpointWrapper(endpoint)); 146 | try (ObjectOutputStream oos = new DummyObjectOutputStream()) { 147 | oos.writeObject(partition); 148 | } 149 | } 150 | 151 | @Test 152 | public void testConnect() { 153 | csc.read("test.table"); 154 | } 155 | 156 | @Test 157 | public void testRead() { 158 | long count = csc.read("test.table").count(); 159 | Assert.assertEquals(20, count); 160 | } 161 | 162 | @Test 163 | public void testSql() { 164 | long count = csc.readSql("select * from test.table").count(); 165 | Assert.assertEquals(20, count); 166 | } 167 | 168 | @Test 169 | public void testFilter() { 170 | Dataset df = csc.readSql("select * from test.table"); 171 | long count = df.filter(df.col("symbol").equalTo("USDCAD")).count(); 172 | long countOriginal = csc.readSql("select * from test.table").count(); 173 | Assert.assertTrue(count < countOriginal); 174 | } 175 | 176 | private static class SizeConsumer implements Consumer { 177 | private int length = 0; 178 | private int width = 0; 179 | 180 | @Override 181 | public void accept(Row row) { 182 | length += 1; 183 | width = row.length(); 184 | } 185 | } 186 | 187 | @Test 188 | public void testProject() { 189 | Dataset df = csc.readSql("select * from test.table"); 190 | SizeConsumer c = new SizeConsumer(); 191 | df.select("bid", "ask", "symbol").toLocalIterator().forEachRemaining(c); 192 | long count = c.width; 193 | long countOriginal = csc.readSql("select * from test.table").columns().length; 194 | Assert.assertTrue(count < countOriginal); 195 | } 196 | 197 | private static class TestProducer extends NoOpFlightProducer { 198 | private boolean parallel = false; 199 | 200 | @Override 201 | public void doAction(CallContext context, Action action, StreamListener listener) { 202 | parallel = true; 203 | listener.onNext(new Result("ok".getBytes())); 204 | listener.onCompleted(); 205 | } 206 | 207 | @Override 208 | public FlightInfo getFlightInfo(CallContext context, FlightDescriptor descriptor) { 209 | Schema schema; 210 | List endpoints; 211 | if (parallel) { 212 | endpoints = ImmutableList.of(new FlightEndpoint(new Ticket(descriptor.getCommand()), location), 213 | new FlightEndpoint(new Ticket(descriptor.getCommand()), location)); 214 | } else { 215 | endpoints = ImmutableList.of(new FlightEndpoint(new Ticket(descriptor.getCommand()), location)); 216 | } 217 | if (new String(descriptor.getCommand()).equals("select \"bid\", \"ask\", \"symbol\" from (select * from test.table))")) { 218 | schema = new Schema(ImmutableList.of( 219 | Field.nullable("bid", Types.MinorType.FLOAT8.getType()), 220 | Field.nullable("ask", Types.MinorType.FLOAT8.getType()), 221 | Field.nullable("symbol", Types.MinorType.VARCHAR.getType())) 222 | ); 223 | 224 | } else { 225 | schema = new Schema(ImmutableList.of( 226 | Field.nullable("bid", Types.MinorType.FLOAT8.getType()), 227 | Field.nullable("ask", Types.MinorType.FLOAT8.getType()), 228 | Field.nullable("symbol", Types.MinorType.VARCHAR.getType()), 229 | Field.nullable("bidsize", Types.MinorType.BIGINT.getType()), 230 | Field.nullable("asksize", Types.MinorType.BIGINT.getType())) 231 | ); 232 | } 233 | return new FlightInfo(schema, descriptor, endpoints, 1000000, 10); 234 | } 235 | 236 | @Override 237 | public void getStream(CallContext context, Ticket ticket, ServerStreamListener listener) { 238 | final int size = (new String(ticket.getBytes()).contains("USDCAD")) ? 5 : 10; 239 | 240 | if (new String(ticket.getBytes()).equals("select \"bid\", \"ask\", \"symbol\" from (select * from test.table))")) { 241 | Float8Vector b = new Float8Vector("bid", allocator); 242 | Float8Vector a = new Float8Vector("ask", allocator); 243 | VarCharVector s = new VarCharVector("symbol", allocator); 244 | 245 | VectorSchemaRoot root = VectorSchemaRoot.of(b, a, s); 246 | listener.start(root); 247 | 248 | //batch 1 249 | root.allocateNew(); 250 | for (int i = 0; i < size; i++) { 251 | b.set(i, (double) i); 252 | a.set(i, (double) i); 253 | s.set(i, (i % 2 == 0) ? new Text("USDCAD") : new Text("EURUSD")); 254 | } 255 | b.setValueCount(size); 256 | a.setValueCount(size); 257 | s.setValueCount(size); 258 | root.setRowCount(size); 259 | listener.putNext(); 260 | 261 | // batch 2 262 | 263 | root.allocateNew(); 264 | for (int i = 0; i < size; i++) { 265 | b.set(i, (double) i); 266 | a.set(i, (double) i); 267 | s.set(i, (i % 2 == 0) ? new Text("USDCAD") : new Text("EURUSD")); 268 | } 269 | b.setValueCount(size); 270 | a.setValueCount(size); 271 | s.setValueCount(size); 272 | root.setRowCount(size); 273 | listener.putNext(); 274 | root.clear(); 275 | listener.completed(); 276 | } else { 277 | BigIntVector bs = new BigIntVector("bidsize", allocator); 278 | BigIntVector as = new BigIntVector("asksize", allocator); 279 | Float8Vector b = new Float8Vector("bid", allocator); 280 | Float8Vector a = new Float8Vector("ask", allocator); 281 | VarCharVector s = new VarCharVector("symbol", allocator); 282 | 283 | VectorSchemaRoot root = VectorSchemaRoot.of(b, a, s, bs, as); 284 | listener.start(root); 285 | 286 | //batch 1 287 | root.allocateNew(); 288 | for (int i = 0; i < size; i++) { 289 | bs.set(i, (long) i); 290 | as.set(i, (long) i); 291 | b.set(i, (double) i); 292 | a.set(i, (double) i); 293 | s.set(i, (i % 2 == 0) ? new Text("USDCAD") : new Text("EURUSD")); 294 | } 295 | bs.setValueCount(size); 296 | as.setValueCount(size); 297 | b.setValueCount(size); 298 | a.setValueCount(size); 299 | s.setValueCount(size); 300 | root.setRowCount(size); 301 | listener.putNext(); 302 | 303 | // batch 2 304 | 305 | root.allocateNew(); 306 | for (int i = 0; i < size; i++) { 307 | bs.set(i, (long) i); 308 | as.set(i, (long) i); 309 | b.set(i, (double) i); 310 | a.set(i, (double) i); 311 | s.set(i, (i % 2 == 0) ? new Text("USDCAD") : new Text("EURUSD")); 312 | } 313 | bs.setValueCount(size); 314 | as.setValueCount(size); 315 | b.setValueCount(size); 316 | a.setValueCount(size); 317 | s.setValueCount(size); 318 | root.setRowCount(size); 319 | listener.putNext(); 320 | root.clear(); 321 | listener.completed(); 322 | } 323 | } 324 | 325 | 326 | } 327 | } 328 | -------------------------------------------------------------------------------- /src/test/resources/logback-test.xml: -------------------------------------------------------------------------------- 1 | 2 | 19 | 20 | 22 | true 23 | 10000 24 | true 25 | ${LILITH_HOSTNAME:-localhost} 26 | 27 | 28 | 29 | 30 | %highlight %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | --------------------------------------------------------------------------------