├── .github ├── dependabot.yml ├── problem-matcher.json └── workflows │ ├── main.yml │ └── release.yml ├── .gitignore ├── .mvn ├── jvm.config ├── modernizer │ └── violations.xml └── wrapper │ └── maven-wrapper.properties ├── LICENSE ├── README.md ├── docker └── docker-compose.yml ├── mvnw ├── pom.xml └── src ├── main └── java │ └── org │ └── ebyhr │ └── trino │ └── storage │ ├── ByteResponseHandler.java │ ├── FileType.java │ ├── ForStorage.java │ ├── ListPageSource.java │ ├── StorageClient.java │ ├── StorageColumnHandle.java │ ├── StorageConfig.java │ ├── StorageConnector.java │ ├── StorageConnectorFactory.java │ ├── StorageMetadata.java │ ├── StorageModule.java │ ├── StoragePageSourceProvider.java │ ├── StoragePlugin.java │ ├── StorageRecordSetProvider.java │ ├── StorageSplit.java │ ├── StorageSplitManager.java │ ├── StorageTable.java │ ├── StorageTableHandle.java │ ├── StorageTransactionHandle.java │ ├── operator │ ├── AvroColumnDecoder.java │ ├── AvroPlugin.java │ ├── AvroSchemaConverter.java │ ├── CsvPlugin.java │ ├── ExcelPlugin.java │ ├── FilePlugin.java │ ├── JsonPlugin.java │ ├── OrcPageSource.java │ ├── OrcPlugin.java │ ├── OrcTypeTranslator.java │ ├── ParquetPageSource.java │ ├── ParquetPlugin.java │ ├── ParquetTypeTranslator.java │ ├── PluginFactory.java │ ├── RawPlugin.java │ └── TextPlugin.java │ └── ptf │ ├── ListTableFunction.java │ └── ReadFileTableFunction.java └── test ├── java └── org │ └── ebyhr │ └── trino │ └── storage │ ├── StorageQueryRunner.java │ ├── TestRestrictedStorageConnector.java │ ├── TestStorageConnector.java │ ├── TestStoragePlugin.java │ ├── TestingHadoopServer.java │ ├── TestingMinioServer.java │ └── TestingStorageServer.java └── resources ├── example-data ├── apache-lz4.orc ├── array-of-objects.json ├── avro-data.avro ├── example-metadata.json ├── lineitem-1.csv ├── lineitem-2.csv ├── newlines.json ├── numbers-1.csv ├── numbers-2.csv ├── numbers-2.ssv ├── numbers.tsv ├── orders-1.csv ├── orders-2.csv ├── parquet_data.parquet ├── quoted_fields_with_newlines.csv ├── quoted_fields_with_newlines.ssv ├── quoted_fields_with_newlines.tsv ├── quoted_fields_with_separator.csv ├── quoted_fields_with_separator.ssv ├── quoted_fields_with_separator.tsv └── sample.xlsx └── minio └── hive-core-site.xml /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "maven" 4 | directory: "/" 5 | schedule: 6 | interval: "weekly" 7 | groups: 8 | dependency-updates: 9 | applies-to: version-updates 10 | update-types: 11 | - major 12 | - minor 13 | - patch 14 | security-updates: 15 | applies-to: security-updates 16 | dependency-type: production 17 | - package-ecosystem: "github-actions" 18 | directory: "/" 19 | schedule: 20 | interval: "weekly" 21 | -------------------------------------------------------------------------------- /.github/problem-matcher.json: -------------------------------------------------------------------------------- 1 | { 2 | "problemMatcher": [ 3 | { 4 | "owner": "maven", 5 | "pattern": [ 6 | { 7 | "regexp": "^.*\\[(ERROR|WARN(?:ING)?)\\]\\s+(.*):\\[(\\d+),(\\d+)\\] (?:error: )?[\\[\\(](.*)[\\]\\)] (.*)$", 8 | "severity": 1, 9 | "file": 2, 10 | "line": 3, 11 | "column": 4, 12 | "message": 6, 13 | "code": 5 14 | } 15 | ] 16 | } 17 | ] 18 | } 19 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: [push, pull_request] 4 | 5 | env: 6 | HADOOP_USER_NAME: hive 7 | 8 | jobs: 9 | maven-checks: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v4 13 | - uses: actions/setup-java@v4 14 | with: 15 | java-version: '23' 16 | distribution: 'temurin' 17 | cache: 'maven' 18 | - name: Configure Problem Matchers 19 | run: | 20 | echo "::add-matcher::.github/problem-matcher.json" 21 | echo "::remove-matcher owner=java::" 22 | - name: Maven Checks 23 | run: | 24 | ./mvnw -B clean install 25 | - name: Annotate run 26 | uses: trinodb/github-actions/action-surefire-report@b63800bedfbc7ab1ff2e5fe7eaecf5ab82ce6a70 27 | if: always() 28 | with: 29 | fail_if_no_tests: false 30 | skip_publishing: true 31 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: CD 2 | 3 | on: 4 | workflow_dispatch: {} 5 | 6 | env: 7 | HADOOP_USER_NAME: hive 8 | 9 | jobs: 10 | release: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v4 14 | - uses: actions/setup-java@v4 15 | with: 16 | java-version: '23' 17 | distribution: 'temurin' 18 | server-id: github 19 | cache: 'maven' 20 | - name: Configure Problem Matchers 21 | run: | 22 | echo "::add-matcher::.github/problem-matcher.json" 23 | echo "::remove-matcher owner=java::" 24 | - name: Configure Git user 25 | run: | 26 | git config user.email "actions@github.com" 27 | git config user.name "GitHub Actions" 28 | - name: Prepare release 29 | run: ./mvnw -B release:prepare 30 | - name: Save version number in env 31 | run: | 32 | echo "VERSION=$(grep 'project.rel.org.ebyhr\\:trino-storage=' release.properties | cut -d'=' -f2)" >> $GITHUB_ENV 33 | - uses: marvinpinto/action-automatic-releases@v1.2.1 34 | with: 35 | repo_token: "${{ secrets.GITHUB_TOKEN }}" 36 | prerelease: false 37 | automatic_release_tag: v${{ env.VERSION }} 38 | title: v${{ env.VERSION }} 39 | files: | 40 | target/*.zip 41 | - name: Publish JAR 42 | run: ./mvnw -B release:perform -Darguments=-Dgpg.skip 43 | env: 44 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 45 | - name: Annotate run 46 | uses: trinodb/github-actions/action-surefire-report@b63800bedfbc7ab1ff2e5fe7eaecf5ab82ce6a70 47 | if: always() 48 | with: 49 | fail_if_no_tests: false 50 | skip_publishing: true 51 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | 3 | target 4 | .idea 5 | *.iml 6 | -------------------------------------------------------------------------------- /.mvn/jvm.config: -------------------------------------------------------------------------------- 1 | --add-exports jdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED 2 | --add-exports jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED 3 | --add-exports jdk.compiler/com.sun.tools.javac.main=ALL-UNNAMED 4 | --add-exports jdk.compiler/com.sun.tools.javac.model=ALL-UNNAMED 5 | --add-exports jdk.compiler/com.sun.tools.javac.parser=ALL-UNNAMED 6 | --add-exports jdk.compiler/com.sun.tools.javac.processing=ALL-UNNAMED 7 | --add-exports jdk.compiler/com.sun.tools.javac.tree=ALL-UNNAMED 8 | --add-exports jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED 9 | --add-opens jdk.compiler/com.sun.tools.javac.code=ALL-UNNAMED 10 | --add-opens jdk.compiler/com.sun.tools.javac.comp=ALL-UNNAMED 11 | -------------------------------------------------------------------------------- /.mvn/modernizer/violations.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | java/lang/Class.newInstance:()Ljava/lang/Object; 5 | 1.1 6 | Prefer Class.getConstructor().newInstance() 7 | 8 | 9 | 10 | java/lang/String."<init>":([B)V 11 | 1.1 12 | Prefer new String(byte[], Charset) 13 | 14 | 15 | 16 | java/lang/String.getBytes:()[B 17 | 1.1 18 | Prefer String.getBytes(Charset) 19 | 20 | 21 | 22 | java/lang/String.toLowerCase:()Ljava/lang/String; 23 | 1.1 24 | Prefer String.toLowerCase(java.util.Locale) 25 | 26 | 27 | 28 | java/lang/String.toUpperCase:()Ljava/lang/String; 29 | 1.1 30 | Prefer String.toUpperCase(java.util.Locale) 31 | 32 | 33 | 34 | 35 | java/io/File.toString:()Ljava/lang/String; 36 | 1.1 37 | Prefer File.getPath() 38 | 39 | 40 | 41 | com/google/common/primitives/Ints.checkedCast:(J)I 42 | 1.8 43 | Prefer Math.toIntExact(long) 44 | 45 | 46 | 47 | org/testng/Assert.assertEquals:(Ljava/lang/Iterable;Ljava/lang/Iterable;)V 48 | 1.8 49 | Use io.trino.testing.assertions.Assert.assertEquals due to TestNG #543 50 | 51 | 52 | 53 | org/testng/Assert.assertEquals:(Ljava/lang/Iterable;Ljava/lang/Iterable;Ljava/lang/String;)V 54 | 1.8 55 | Use io.trino.testing.assertions.Assert.assertEquals due to TestNG #543 56 | 57 | 58 | 59 | org/testng/Assert.assertThrows:(Lorg/testng/Assert$ThrowingRunnable;)V 60 | 1.8 61 | Use AssertJ's assertThatThrownBy, see https://github.com/trinodb/trino/issues/5320 for rationale 62 | 63 | 64 | 65 | org/testng/Assert.assertThrows:(Ljava/lang/Class;Lorg/testng/Assert$ThrowingRunnable;)V 66 | 1.8 67 | Use AssertJ's assertThatThrownBy, see https://github.com/trinodb/trino/issues/5320 for rationale 68 | 69 | 70 | 71 | org/apache/hadoop/conf/Configuration."<init>":()V 72 | 1.1 73 | Prefer new Configuration(false) 74 | 75 | 76 | 77 | java/util/TimeZone.getTimeZone:(Ljava/lang/String;)Ljava/util/TimeZone; 78 | 1.8 79 | Avoid TimeZone.getTimeZone as it returns GMT for a zone not supported by the JVM. Use TimeZone.getTimeZone(ZoneId.of(..)) instead, or TimeZone.getTimeZone(..., false). 80 | 81 | 82 | 83 | org/joda/time/DateTimeZone.toTimeZone:()Ljava/util/TimeZone; 84 | 1.8 85 | Avoid DateTimeZone.toTimeZone as it returns GMT for a zone not supported by the JVM. Use TimeZone.getTimeZone(ZoneId.of(dtz.getId())) instead. 86 | 87 | 88 | 89 | com/esri/core/geometry/ogc/OGCGeometry.equals:(Lcom/esri/core/geometry/ogc/OGCGeometry;)Z 90 | 1.6 91 | Prefer OGCGeometry.Equals(OGCGeometry) 92 | 93 | 94 | 95 | com/esri/core/geometry/ogc/OGCGeometry.equals:(Ljava/lang/Object;)Z 96 | 1.6 97 | Prefer OGCGeometry.Equals(OGCGeometry) 98 | 99 | 100 | 101 | io/airlift/units/DataSize."<init>":(DLio/airlift/units/DataSize$Unit;)V 102 | 1.8 103 | Use io.airlift.units.DataSize.of(long, DataSize.Unit) 104 | 105 | 106 | 107 | io/airlift/units/DataSize.succinctDataSize:(DLio/airlift/units/DataSize$Unit;)Lio/airlift/units/DataSize; 108 | 1.8 109 | Use io.airlift.units.DataSize.of(long, DataSize.Unit).succinct() -- Note that succinct conversion only affects toString() results 110 | 111 | 112 | 113 | io/airlift/units/DataSize.getValue:()D 114 | 1.8 115 | Use io.airlift.units.DataSize.toBytes() and Unit.inBytes() for conversion 116 | 117 | 118 | 119 | io/airlift/units/DataSize.getValue:(Lio/airlift/units/DataSize$Unit;)D 120 | 1.8 121 | Use io.airlift.units.DataSize.toBytes() and Unit.inBytes() for conversion 122 | 123 | 124 | 125 | io/airlift/units/DataSize.roundTo:(Lio/airlift/units/DataSize$Unit;)J 126 | 1.8 127 | Method is deprecated for removal 128 | 129 | 130 | 131 | io/airlift/units/DataSize.convertTo:(Lio/airlift/units/DataSize$Unit;)Lio/airlift/units/DataSize; 132 | 1.8 133 | Use io.airlift.units.DataSize.to(DataSize.Unit) 134 | 135 | 136 | 137 | io/airlift/units/DataSize.convertToMostSuccinctDataSize:()Lio/airlift/units/DataSize; 138 | 1.8 139 | Use io.airlift.units.DataSize.succinct() 140 | 141 | 142 | 143 | io/airlift/testing/Closeables.closeQuietly:([Ljava/io/Closeable;)V 144 | 1.0 145 | Use Closeables.closeAll() or Closer. 146 | 147 | 148 | 149 | com/google/inject/util/Modules.combine:(Ljava/lang/Iterable;)Lcom/google/inject/Module; 150 | 1.8 151 | Use io.airlift.configuration.ConfigurationAwareModule.combine 152 | 153 | 154 | 155 | com/google/inject/util/Modules.combine:([Lcom/google/inject/Module;)Lcom/google/inject/Module; 156 | 1.8 157 | Use io.airlift.configuration.ConfigurationAwareModule.combine 158 | 159 | 160 | -------------------------------------------------------------------------------- /.mvn/wrapper/maven-wrapper.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | wrapperVersion=3.3.2 18 | distributionType=only-script 19 | distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.9.8/apache-maven-3.9.8-bin.zip 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Trino Storage Connector [![Build Status](https://github.com/snowlift/trino-storage/workflows/CI/badge.svg)](https://github.com/snowlift/trino-storage/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster) 2 | This is a [Trino](http://trino.io/) connector to access single file (e.g. csv, tsv). Please keep in mind that this is not production ready and it was created for tests. 3 | 4 | # Supported scheme 5 | - hdfs 6 | - s3a 7 | - file 8 | - http 9 | - https 10 | 11 | > Note: reading local files (with the `file` or no schema) can be disabled by setting `allow-local-files=false` in the catalog configuration. 12 | 13 | # Query 14 | You need to specify file type by schema name and use absolute path. 15 | ```sql 16 | SELECT * FROM 17 | storage.csv."file:///tmp/numbers-2.csv"; 18 | 19 | SELECT * FROM 20 | storage.csv."https://raw.githubusercontent.com/snowlift/trino-storage/master/src/test/resources/example-data/numbers-2.csv"; 21 | ``` 22 | 23 | Supported schemas are below. 24 | - `tsv` 25 | - `csv` 26 | - `ssv` 27 | - `txt` 28 | - `raw` 29 | - `excel` 30 | - `orc` 31 | - `json` 32 | 33 | `csv` plugin extracts each line, with columns separated by `,`. Currently the first line is used as column names. 34 | ```sql 35 | SELECT * FROM 36 | storage.csv."https://raw.githubusercontent.com/snowlift/trino-storage/master/src/test/resources/example-data/numbers-2.csv"; 37 | ``` 38 | ``` 39 | ten | 10 40 | --------+---- 41 | eleven | 11 42 | twelve | 12 43 | (2 rows) 44 | ``` 45 | 46 | Tab (`\t`) and semicolon (`;`) delimiters are also supported, using the `tsv` and `ssv` plugins, respectively. 47 | 48 | `txt` plugin doesn't extract each line. Currently column name is always `value`. 49 | ```sql 50 | SELECT * FROM 51 | storage.txt."https://raw.githubusercontent.com/snowlift/trino-storage/master/src/test/resources/example-data/numbers.tsv"; 52 | ``` 53 | ``` 54 | value 55 | -------- 56 | one 1 57 | two 2 58 | three 3 59 | (3 rows) 60 | ``` 61 | 62 | `raw` plugin doesn't extract each line. Currently column name is always `data`. This connector is similar to `txt` plugin. 63 | The main difference is `txt` plugin may return multiple rows, but `raw` plugin always return only one row. 64 | ```sql 65 | SELECT * FROM 66 | storage.raw."https://raw.githubusercontent.com/snowlift/trino-storage/master/src/test/resources/example-data/numbers.tsv"; 67 | ``` 68 | ``` 69 | data 70 | -------- 71 | one 1 72 | two 2 73 | three 3 74 | (1 row) 75 | ``` 76 | 77 | `excel` plugin currently read first sheet. 78 | ```sql 79 | SELECT * FROM 80 | storage.excel."https://raw.githubusercontent.com/snowlift/trino-storage/master/src/test/resources/example-data/sample.xlsx"; 81 | ``` 82 | ``` 83 | data 84 | -------- 85 | one 1 86 | two 2 87 | three 3 88 | (1 row) 89 | ``` 90 | 91 | # Table functions 92 | 93 | The connector provides specific table functions to list directory status and read files. 94 | ```sql 95 | SELECT * FROM TABLE(storage.system.list('/tmp/trino-storage')); 96 | ``` 97 | ``` 98 | file_modified_time | size | name 99 | -----------------------------+------+----------------------------- 100 | 2023-05-03 12:14:22.107 UTC | 12 | /tmp/trino-storage/test.txt 101 | ``` 102 | 103 | ```sql 104 | SELECT * FROM TABLE(storage.system.read_file('csv', '/tmp/trino-storage/test.txt')); 105 | ``` 106 | ``` 107 | col 108 | ------------- 109 | hello world 110 | ``` 111 | 112 | # Build 113 | Run all the unit test classes. 114 | ``` 115 | ./mvnw test 116 | ``` 117 | 118 | Build without running tests 119 | ``` 120 | ./mvnw clean install -DskipTests 121 | ``` 122 | 123 | > Note: tests include intergration tests, that will run Minio and HDFS as Docker containers. They need to pull their images, 124 | > which can take a while. If you see the tests getting stuck, try pulling these images before starting tests, to see the progress. 125 | > Look for image names and versions in `TestingMinioServer` and `TestingHadoopServer` test classes. 126 | > It is also required to set the `HADOOP_USER_NAME` environmental variable to `hive`. 127 | 128 | # Deploy 129 | Unarchive trino-storage-{version}.zip and copy jar files in target directory to use storage connector in your Trino cluster. 130 | -------------------------------------------------------------------------------- /docker/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '2' 2 | services: 3 | 4 | hadoop-master: 5 | hostname: hadoop-master 6 | image: 'ghcr.io/trinodb/testing/hdp3.1-hive' 7 | ports: 8 | - '1180:1180' 9 | - '8020:8020' 10 | - '8042:8042' 11 | - '8088:8088' 12 | - '9000:9000' 13 | - '9083:9083' 14 | - '10000:10000' 15 | - '19888:19888' 16 | - '13306:3306' 17 | - '50070:50070' 18 | - '50075:50075' 19 | -------------------------------------------------------------------------------- /mvnw: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # ---------------------------------------------------------------------------- 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | # ---------------------------------------------------------------------------- 20 | 21 | # ---------------------------------------------------------------------------- 22 | # Apache Maven Wrapper startup batch script, version 3.3.2 23 | # 24 | # Optional ENV vars 25 | # ----------------- 26 | # JAVA_HOME - location of a JDK home dir, required when download maven via java source 27 | # MVNW_REPOURL - repo url base for downloading maven distribution 28 | # MVNW_USERNAME/MVNW_PASSWORD - user and password for downloading maven 29 | # MVNW_VERBOSE - true: enable verbose log; debug: trace the mvnw script; others: silence the output 30 | # ---------------------------------------------------------------------------- 31 | 32 | set -euf 33 | [ "${MVNW_VERBOSE-}" != debug ] || set -x 34 | 35 | # OS specific support. 36 | native_path() { printf %s\\n "$1"; } 37 | case "$(uname)" in 38 | CYGWIN* | MINGW*) 39 | [ -z "${JAVA_HOME-}" ] || JAVA_HOME="$(cygpath --unix "$JAVA_HOME")" 40 | native_path() { cygpath --path --windows "$1"; } 41 | ;; 42 | esac 43 | 44 | # set JAVACMD and JAVACCMD 45 | set_java_home() { 46 | # For Cygwin and MinGW, ensure paths are in Unix format before anything is touched 47 | if [ -n "${JAVA_HOME-}" ]; then 48 | if [ -x "$JAVA_HOME/jre/sh/java" ]; then 49 | # IBM's JDK on AIX uses strange locations for the executables 50 | JAVACMD="$JAVA_HOME/jre/sh/java" 51 | JAVACCMD="$JAVA_HOME/jre/sh/javac" 52 | else 53 | JAVACMD="$JAVA_HOME/bin/java" 54 | JAVACCMD="$JAVA_HOME/bin/javac" 55 | 56 | if [ ! -x "$JAVACMD" ] || [ ! -x "$JAVACCMD" ]; then 57 | echo "The JAVA_HOME environment variable is not defined correctly, so mvnw cannot run." >&2 58 | echo "JAVA_HOME is set to \"$JAVA_HOME\", but \"\$JAVA_HOME/bin/java\" or \"\$JAVA_HOME/bin/javac\" does not exist." >&2 59 | return 1 60 | fi 61 | fi 62 | else 63 | JAVACMD="$( 64 | 'set' +e 65 | 'unset' -f command 2>/dev/null 66 | 'command' -v java 67 | )" || : 68 | JAVACCMD="$( 69 | 'set' +e 70 | 'unset' -f command 2>/dev/null 71 | 'command' -v javac 72 | )" || : 73 | 74 | if [ ! -x "${JAVACMD-}" ] || [ ! -x "${JAVACCMD-}" ]; then 75 | echo "The java/javac command does not exist in PATH nor is JAVA_HOME set, so mvnw cannot run." >&2 76 | return 1 77 | fi 78 | fi 79 | } 80 | 81 | # hash string like Java String::hashCode 82 | hash_string() { 83 | str="${1:-}" h=0 84 | while [ -n "$str" ]; do 85 | char="${str%"${str#?}"}" 86 | h=$(((h * 31 + $(LC_CTYPE=C printf %d "'$char")) % 4294967296)) 87 | str="${str#?}" 88 | done 89 | printf %x\\n $h 90 | } 91 | 92 | verbose() { :; } 93 | [ "${MVNW_VERBOSE-}" != true ] || verbose() { printf %s\\n "${1-}"; } 94 | 95 | die() { 96 | printf %s\\n "$1" >&2 97 | exit 1 98 | } 99 | 100 | trim() { 101 | # MWRAPPER-139: 102 | # Trims trailing and leading whitespace, carriage returns, tabs, and linefeeds. 103 | # Needed for removing poorly interpreted newline sequences when running in more 104 | # exotic environments such as mingw bash on Windows. 105 | printf "%s" "${1}" | tr -d '[:space:]' 106 | } 107 | 108 | # parse distributionUrl and optional distributionSha256Sum, requires .mvn/wrapper/maven-wrapper.properties 109 | while IFS="=" read -r key value; do 110 | case "${key-}" in 111 | distributionUrl) distributionUrl=$(trim "${value-}") ;; 112 | distributionSha256Sum) distributionSha256Sum=$(trim "${value-}") ;; 113 | esac 114 | done <"${0%/*}/.mvn/wrapper/maven-wrapper.properties" 115 | [ -n "${distributionUrl-}" ] || die "cannot read distributionUrl property in ${0%/*}/.mvn/wrapper/maven-wrapper.properties" 116 | 117 | case "${distributionUrl##*/}" in 118 | maven-mvnd-*bin.*) 119 | MVN_CMD=mvnd.sh _MVNW_REPO_PATTERN=/maven/mvnd/ 120 | case "${PROCESSOR_ARCHITECTURE-}${PROCESSOR_ARCHITEW6432-}:$(uname -a)" in 121 | *AMD64:CYGWIN* | *AMD64:MINGW*) distributionPlatform=windows-amd64 ;; 122 | :Darwin*x86_64) distributionPlatform=darwin-amd64 ;; 123 | :Darwin*arm64) distributionPlatform=darwin-aarch64 ;; 124 | :Linux*x86_64*) distributionPlatform=linux-amd64 ;; 125 | *) 126 | echo "Cannot detect native platform for mvnd on $(uname)-$(uname -m), use pure java version" >&2 127 | distributionPlatform=linux-amd64 128 | ;; 129 | esac 130 | distributionUrl="${distributionUrl%-bin.*}-$distributionPlatform.zip" 131 | ;; 132 | maven-mvnd-*) MVN_CMD=mvnd.sh _MVNW_REPO_PATTERN=/maven/mvnd/ ;; 133 | *) MVN_CMD="mvn${0##*/mvnw}" _MVNW_REPO_PATTERN=/org/apache/maven/ ;; 134 | esac 135 | 136 | # apply MVNW_REPOURL and calculate MAVEN_HOME 137 | # maven home pattern: ~/.m2/wrapper/dists/{apache-maven-,maven-mvnd--}/ 138 | [ -z "${MVNW_REPOURL-}" ] || distributionUrl="$MVNW_REPOURL$_MVNW_REPO_PATTERN${distributionUrl#*"$_MVNW_REPO_PATTERN"}" 139 | distributionUrlName="${distributionUrl##*/}" 140 | distributionUrlNameMain="${distributionUrlName%.*}" 141 | distributionUrlNameMain="${distributionUrlNameMain%-bin}" 142 | MAVEN_USER_HOME="${MAVEN_USER_HOME:-${HOME}/.m2}" 143 | MAVEN_HOME="${MAVEN_USER_HOME}/wrapper/dists/${distributionUrlNameMain-}/$(hash_string "$distributionUrl")" 144 | 145 | exec_maven() { 146 | unset MVNW_VERBOSE MVNW_USERNAME MVNW_PASSWORD MVNW_REPOURL || : 147 | exec "$MAVEN_HOME/bin/$MVN_CMD" "$@" || die "cannot exec $MAVEN_HOME/bin/$MVN_CMD" 148 | } 149 | 150 | if [ -d "$MAVEN_HOME" ]; then 151 | verbose "found existing MAVEN_HOME at $MAVEN_HOME" 152 | exec_maven "$@" 153 | fi 154 | 155 | case "${distributionUrl-}" in 156 | *?-bin.zip | *?maven-mvnd-?*-?*.zip) ;; 157 | *) die "distributionUrl is not valid, must match *-bin.zip or maven-mvnd-*.zip, but found '${distributionUrl-}'" ;; 158 | esac 159 | 160 | # prepare tmp dir 161 | if TMP_DOWNLOAD_DIR="$(mktemp -d)" && [ -d "$TMP_DOWNLOAD_DIR" ]; then 162 | clean() { rm -rf -- "$TMP_DOWNLOAD_DIR"; } 163 | trap clean HUP INT TERM EXIT 164 | else 165 | die "cannot create temp dir" 166 | fi 167 | 168 | mkdir -p -- "${MAVEN_HOME%/*}" 169 | 170 | # Download and Install Apache Maven 171 | verbose "Couldn't find MAVEN_HOME, downloading and installing it ..." 172 | verbose "Downloading from: $distributionUrl" 173 | verbose "Downloading to: $TMP_DOWNLOAD_DIR/$distributionUrlName" 174 | 175 | # select .zip or .tar.gz 176 | if ! command -v unzip >/dev/null; then 177 | distributionUrl="${distributionUrl%.zip}.tar.gz" 178 | distributionUrlName="${distributionUrl##*/}" 179 | fi 180 | 181 | # verbose opt 182 | __MVNW_QUIET_WGET=--quiet __MVNW_QUIET_CURL=--silent __MVNW_QUIET_UNZIP=-q __MVNW_QUIET_TAR='' 183 | [ "${MVNW_VERBOSE-}" != true ] || __MVNW_QUIET_WGET='' __MVNW_QUIET_CURL='' __MVNW_QUIET_UNZIP='' __MVNW_QUIET_TAR=v 184 | 185 | # normalize http auth 186 | case "${MVNW_PASSWORD:+has-password}" in 187 | '') MVNW_USERNAME='' MVNW_PASSWORD='' ;; 188 | has-password) [ -n "${MVNW_USERNAME-}" ] || MVNW_USERNAME='' MVNW_PASSWORD='' ;; 189 | esac 190 | 191 | if [ -z "${MVNW_USERNAME-}" ] && command -v wget >/dev/null; then 192 | verbose "Found wget ... using wget" 193 | wget ${__MVNW_QUIET_WGET:+"$__MVNW_QUIET_WGET"} "$distributionUrl" -O "$TMP_DOWNLOAD_DIR/$distributionUrlName" || die "wget: Failed to fetch $distributionUrl" 194 | elif [ -z "${MVNW_USERNAME-}" ] && command -v curl >/dev/null; then 195 | verbose "Found curl ... using curl" 196 | curl ${__MVNW_QUIET_CURL:+"$__MVNW_QUIET_CURL"} -f -L -o "$TMP_DOWNLOAD_DIR/$distributionUrlName" "$distributionUrl" || die "curl: Failed to fetch $distributionUrl" 197 | elif set_java_home; then 198 | verbose "Falling back to use Java to download" 199 | javaSource="$TMP_DOWNLOAD_DIR/Downloader.java" 200 | targetZip="$TMP_DOWNLOAD_DIR/$distributionUrlName" 201 | cat >"$javaSource" <<-END 202 | public class Downloader extends java.net.Authenticator 203 | { 204 | protected java.net.PasswordAuthentication getPasswordAuthentication() 205 | { 206 | return new java.net.PasswordAuthentication( System.getenv( "MVNW_USERNAME" ), System.getenv( "MVNW_PASSWORD" ).toCharArray() ); 207 | } 208 | public static void main( String[] args ) throws Exception 209 | { 210 | setDefault( new Downloader() ); 211 | java.nio.file.Files.copy( java.net.URI.create( args[0] ).toURL().openStream(), java.nio.file.Paths.get( args[1] ).toAbsolutePath().normalize() ); 212 | } 213 | } 214 | END 215 | # For Cygwin/MinGW, switch paths to Windows format before running javac and java 216 | verbose " - Compiling Downloader.java ..." 217 | "$(native_path "$JAVACCMD")" "$(native_path "$javaSource")" || die "Failed to compile Downloader.java" 218 | verbose " - Running Downloader.java ..." 219 | "$(native_path "$JAVACMD")" -cp "$(native_path "$TMP_DOWNLOAD_DIR")" Downloader "$distributionUrl" "$(native_path "$targetZip")" 220 | fi 221 | 222 | # If specified, validate the SHA-256 sum of the Maven distribution zip file 223 | if [ -n "${distributionSha256Sum-}" ]; then 224 | distributionSha256Result=false 225 | if [ "$MVN_CMD" = mvnd.sh ]; then 226 | echo "Checksum validation is not supported for maven-mvnd." >&2 227 | echo "Please disable validation by removing 'distributionSha256Sum' from your maven-wrapper.properties." >&2 228 | exit 1 229 | elif command -v sha256sum >/dev/null; then 230 | if echo "$distributionSha256Sum $TMP_DOWNLOAD_DIR/$distributionUrlName" | sha256sum -c >/dev/null 2>&1; then 231 | distributionSha256Result=true 232 | fi 233 | elif command -v shasum >/dev/null; then 234 | if echo "$distributionSha256Sum $TMP_DOWNLOAD_DIR/$distributionUrlName" | shasum -a 256 -c >/dev/null 2>&1; then 235 | distributionSha256Result=true 236 | fi 237 | else 238 | echo "Checksum validation was requested but neither 'sha256sum' or 'shasum' are available." >&2 239 | echo "Please install either command, or disable validation by removing 'distributionSha256Sum' from your maven-wrapper.properties." >&2 240 | exit 1 241 | fi 242 | if [ $distributionSha256Result = false ]; then 243 | echo "Error: Failed to validate Maven distribution SHA-256, your Maven distribution might be compromised." >&2 244 | echo "If you updated your Maven version, you need to update the specified distributionSha256Sum property." >&2 245 | exit 1 246 | fi 247 | fi 248 | 249 | # unzip and move 250 | if command -v unzip >/dev/null; then 251 | unzip ${__MVNW_QUIET_UNZIP:+"$__MVNW_QUIET_UNZIP"} "$TMP_DOWNLOAD_DIR/$distributionUrlName" -d "$TMP_DOWNLOAD_DIR" || die "failed to unzip" 252 | else 253 | tar xzf${__MVNW_QUIET_TAR:+"$__MVNW_QUIET_TAR"} "$TMP_DOWNLOAD_DIR/$distributionUrlName" -C "$TMP_DOWNLOAD_DIR" || die "failed to untar" 254 | fi 255 | printf %s\\n "$distributionUrl" >"$TMP_DOWNLOAD_DIR/$distributionUrlNameMain/mvnw.url" 256 | mv -- "$TMP_DOWNLOAD_DIR/$distributionUrlNameMain" "$MAVEN_HOME" || [ -d "$MAVEN_HOME" ] || die "fail to move MAVEN_HOME" 257 | 258 | clean || : 259 | exec_maven "$@" 260 | -------------------------------------------------------------------------------- /src/main/java/org/ebyhr/trino/storage/ByteResponseHandler.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package org.ebyhr.trino.storage; 15 | 16 | import io.airlift.http.client.Request; 17 | import io.airlift.http.client.Response; 18 | import io.airlift.http.client.ResponseHandler; 19 | 20 | import static io.airlift.http.client.ResponseHandlerUtils.propagate; 21 | import static io.airlift.http.client.ResponseHandlerUtils.readResponseBytes; 22 | 23 | public class ByteResponseHandler 24 | implements ResponseHandler 25 | { 26 | private static final ByteResponseHandler BYTE_RESPONSE_HANDLER = new ByteResponseHandler(); 27 | 28 | public static ByteResponseHandler createByteResponseHandler() 29 | { 30 | return BYTE_RESPONSE_HANDLER; 31 | } 32 | 33 | private ByteResponseHandler() {} 34 | 35 | @Override 36 | public ByteResponse handleException(Request request, Exception exception) 37 | { 38 | throw propagate(request, exception); 39 | } 40 | 41 | @Override 42 | public ByteResponse handle(Request request, Response response) 43 | { 44 | byte[] bytes = readResponseBytes(request, response); 45 | return new ByteResponse(response.getStatusCode(), bytes); 46 | } 47 | 48 | public static class ByteResponse 49 | { 50 | private final int statusCode; 51 | private final byte[] body; 52 | 53 | public ByteResponse(int statusCode, byte[] body) 54 | { 55 | this.statusCode = statusCode; 56 | this.body = body; 57 | } 58 | 59 | public int getStatusCode() 60 | { 61 | return statusCode; 62 | } 63 | 64 | public byte[] getBody() 65 | { 66 | return body; 67 | } 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /src/main/java/org/ebyhr/trino/storage/FileType.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package org.ebyhr.trino.storage; 15 | 16 | import static java.util.Locale.ENGLISH; 17 | 18 | public enum FileType 19 | { 20 | CSV, SSV, TSV, TXT, RAW, EXCEL, ORC, PARQUET, JSON, AVRO; 21 | 22 | @Override 23 | public String toString() 24 | { 25 | return name().toLowerCase(ENGLISH); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/main/java/org/ebyhr/trino/storage/ForStorage.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package org.ebyhr.trino.storage; 15 | 16 | import com.google.inject.BindingAnnotation; 17 | 18 | import javax.inject.Qualifier; 19 | 20 | import java.lang.annotation.Retention; 21 | import java.lang.annotation.Target; 22 | 23 | import static java.lang.annotation.ElementType.FIELD; 24 | import static java.lang.annotation.ElementType.METHOD; 25 | import static java.lang.annotation.ElementType.PARAMETER; 26 | import static java.lang.annotation.RetentionPolicy.RUNTIME; 27 | 28 | @Retention(RUNTIME) 29 | @Target({FIELD, PARAMETER, METHOD}) 30 | @Qualifier 31 | @BindingAnnotation 32 | public @interface ForStorage 33 | { 34 | } 35 | -------------------------------------------------------------------------------- /src/main/java/org/ebyhr/trino/storage/ListPageSource.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package org.ebyhr.trino.storage; 15 | 16 | import io.airlift.slice.Slices; 17 | import io.trino.filesystem.FileEntry; 18 | import io.trino.filesystem.FileIterator; 19 | import io.trino.spi.Page; 20 | import io.trino.spi.PageBuilder; 21 | import io.trino.spi.connector.ColumnHandle; 22 | import io.trino.spi.connector.ConnectorPageSource; 23 | import io.trino.spi.connector.ConnectorSession; 24 | 25 | import java.io.IOException; 26 | import java.io.UncheckedIOException; 27 | import java.util.List; 28 | 29 | import static io.trino.spi.type.BigintType.BIGINT; 30 | import static io.trino.spi.type.DateTimeEncoding.packDateTimeWithZone; 31 | import static io.trino.spi.type.TimeZoneKey.UTC_KEY; 32 | import static io.trino.spi.type.VarcharType.VARCHAR; 33 | 34 | public class ListPageSource 35 | implements ConnectorPageSource 36 | { 37 | private final List columns; 38 | private final long readTimeNanos; 39 | private final FileIterator fileStatuses; 40 | private boolean done; 41 | 42 | public ListPageSource(StorageClient storageClient, ConnectorSession session, String path, List columns) 43 | { 44 | this.columns = columns; 45 | long start = System.nanoTime(); 46 | this.fileStatuses = storageClient.list(session, path); 47 | readTimeNanos = System.nanoTime() - start; 48 | } 49 | 50 | @Override 51 | public long getCompletedBytes() 52 | { 53 | return 0; 54 | } 55 | 56 | @Override 57 | public long getReadTimeNanos() 58 | { 59 | return readTimeNanos; 60 | } 61 | 62 | @Override 63 | public boolean isFinished() 64 | { 65 | return done; 66 | } 67 | 68 | @Override 69 | public Page getNextPage() 70 | { 71 | if (done) { 72 | return null; 73 | } 74 | 75 | done = true; 76 | 77 | PageBuilder page = new PageBuilder(columns.stream().map(column -> ((StorageColumnHandle) column).getType()).toList()); 78 | try { 79 | while (fileStatuses.hasNext()) { 80 | FileEntry status = fileStatuses.next(); 81 | page.declarePosition(); 82 | for (int i = 0; i < columns.size(); i++) { 83 | StorageColumnHandle column = (StorageColumnHandle) columns.get(i); 84 | switch (column.getName()) { 85 | case "file_modified_time" -> BIGINT.writeLong(page.getBlockBuilder(i), packDateTimeWithZone(status.lastModified().toEpochMilli(), UTC_KEY)); 86 | case "size" -> BIGINT.writeLong(page.getBlockBuilder(i), status.length()); 87 | case "name" -> VARCHAR.writeSlice(page.getBlockBuilder(i), Slices.utf8Slice(status.location().toString())); 88 | default -> throw new IllegalStateException("Unknown column name " + column.getName()); 89 | } 90 | } 91 | } 92 | } 93 | catch (IOException e) { 94 | throw new UncheckedIOException(e); 95 | } 96 | return page.build(); 97 | } 98 | 99 | @Override 100 | public long getMemoryUsage() 101 | { 102 | return 0; 103 | } 104 | 105 | @Override 106 | public void close() {} 107 | } 108 | -------------------------------------------------------------------------------- /src/main/java/org/ebyhr/trino/storage/StorageClient.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package org.ebyhr.trino.storage; 15 | 16 | import com.google.inject.Inject; 17 | import io.airlift.http.client.HttpClient; 18 | import io.airlift.http.client.HttpStatus; 19 | import io.airlift.http.client.Request; 20 | import io.airlift.log.Logger; 21 | import io.trino.filesystem.FileIterator; 22 | import io.trino.filesystem.Location; 23 | import io.trino.filesystem.TrinoFileSystemFactory; 24 | import io.trino.filesystem.local.LocalFileSystem; 25 | import io.trino.spi.TrinoException; 26 | import io.trino.spi.connector.ConnectorSession; 27 | import io.trino.spi.type.VarcharType; 28 | import org.ebyhr.trino.storage.operator.FilePlugin; 29 | import org.ebyhr.trino.storage.operator.PluginFactory; 30 | 31 | import java.io.ByteArrayInputStream; 32 | import java.io.IOException; 33 | import java.io.InputStream; 34 | import java.io.UncheckedIOException; 35 | import java.net.URI; 36 | import java.nio.file.Path; 37 | import java.util.HashSet; 38 | import java.util.List; 39 | import java.util.Set; 40 | import java.util.stream.Collectors; 41 | import java.util.stream.Stream; 42 | 43 | import static io.airlift.http.client.Request.Builder.prepareGet; 44 | import static io.trino.spi.StandardErrorCode.PERMISSION_DENIED; 45 | import static java.lang.String.format; 46 | import static java.util.Objects.requireNonNull; 47 | import static org.ebyhr.trino.storage.ByteResponseHandler.createByteResponseHandler; 48 | import static org.ebyhr.trino.storage.ptf.ListTableFunction.LIST_SCHEMA_NAME; 49 | 50 | public class StorageClient 51 | { 52 | private static final Logger log = Logger.get(StorageClient.class); 53 | 54 | private final TrinoFileSystemFactory fileSystemFactory; 55 | private final HttpClient httpClient; 56 | private final boolean allowLocalFiles; 57 | 58 | @Inject 59 | public StorageClient(TrinoFileSystemFactory fileSystemFactory, @ForStorage HttpClient httpClient, StorageConfig storageConfig) 60 | { 61 | this.fileSystemFactory = requireNonNull(fileSystemFactory, "fileSystemFactory is null"); 62 | this.httpClient = requireNonNull(httpClient, "httpClient is null"); 63 | this.allowLocalFiles = requireNonNull(storageConfig, "storageConfig is null").getAllowLocalFiles(); 64 | } 65 | 66 | public List getSchemaNames() 67 | { 68 | return Stream.of(FileType.values()) 69 | .map(FileType::toString) 70 | .collect(Collectors.toList()); 71 | } 72 | 73 | public Set getTableNames(String schema) 74 | { 75 | requireNonNull(schema, "schema is null"); 76 | return new HashSet<>(); 77 | } 78 | 79 | public StorageTable getTable(ConnectorSession session, String schema, String tableName) 80 | { 81 | requireNonNull(schema, "schema is null"); 82 | requireNonNull(tableName, "tableName is null"); 83 | 84 | if (isLocalFile(tableName) && !allowLocalFiles) { 85 | throw new TrinoException(PERMISSION_DENIED, "Reading local files is disabled"); 86 | } 87 | if (schema.equals(LIST_SCHEMA_NAME)) { 88 | return new StorageTable(StorageSplit.Mode.LIST, tableName, List.of(new StorageColumnHandle("path", VarcharType.VARCHAR))); 89 | } 90 | 91 | FilePlugin plugin = PluginFactory.create(schema); 92 | try { 93 | List columns = plugin.getFields(tableName, path -> getInputStream(session, path)); 94 | return new StorageTable(StorageSplit.Mode.TABLE, tableName, columns); 95 | } 96 | catch (Exception e) { 97 | log.error(e, "Failed to get table: %s.%s", schema, tableName); 98 | return null; 99 | } 100 | } 101 | 102 | private boolean isLocalFile(String path) 103 | { 104 | return path.startsWith("file:") || !( 105 | path.startsWith("http://") || path.startsWith("https://") 106 | || path.startsWith("hdfs://") || path.startsWith("s3a://") || path.startsWith("s3://")); 107 | } 108 | 109 | public InputStream getInputStream(ConnectorSession session, String path) 110 | { 111 | try { 112 | if (path.startsWith("http://") || path.startsWith("https://")) { 113 | Request request = prepareGet().setUri(URI.create(path)).build(); 114 | ByteResponseHandler.ByteResponse response = httpClient.execute(request, createByteResponseHandler()); 115 | int status = response.getStatusCode(); 116 | if (status != HttpStatus.OK.code()) { 117 | throw new IllegalStateException(format("Request to '%s' returned unexpected status code: '%d'", path, status)); 118 | } 119 | return new ByteArrayInputStream(response.getBody()); 120 | } 121 | if (path.startsWith("hdfs://") || path.startsWith("s3a://") || path.startsWith("s3://")) { 122 | return fileSystemFactory.create(session).newInputFile(Location.of(path)).newStream(); 123 | } 124 | 125 | if (!allowLocalFiles) { 126 | throw new TrinoException(PERMISSION_DENIED, "Reading local files is disabled"); 127 | } 128 | if (!path.startsWith("file:")) { 129 | path = "file:" + path; 130 | } 131 | return URI.create(path).toURL().openStream(); 132 | } 133 | catch (IOException e) { 134 | throw new UncheckedIOException(format("Failed to open stream for %s", path), e); 135 | } 136 | } 137 | 138 | public FileIterator list(ConnectorSession session, String path) 139 | { 140 | try { 141 | if (path.startsWith("http://") || path.startsWith("https://")) { 142 | throw new IllegalArgumentException("Listing files over HTTP is not supported"); 143 | } 144 | if (path.startsWith("hdfs://") || path.startsWith("s3a://") || path.startsWith("s3://")) { 145 | return fileSystemFactory.create(session).listFiles(Location.of(path)); 146 | } 147 | if (!allowLocalFiles) { 148 | throw new TrinoException(PERMISSION_DENIED, "Reading local files is disabled"); 149 | } 150 | if (path.startsWith("file://")) { 151 | path = path.substring("file://".length()); 152 | } 153 | else if (path.startsWith("file:")) { 154 | path = path.substring("file:".length()); 155 | } 156 | return new LocalFileSystem(Path.of(path)).listFiles(Location.of("local:///")); 157 | } 158 | catch (IOException e) { 159 | throw new UncheckedIOException(e); 160 | } 161 | } 162 | } 163 | -------------------------------------------------------------------------------- /src/main/java/org/ebyhr/trino/storage/StorageColumnHandle.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package org.ebyhr.trino.storage; 15 | 16 | import com.fasterxml.jackson.annotation.JsonCreator; 17 | import com.fasterxml.jackson.annotation.JsonProperty; 18 | import io.trino.spi.connector.ColumnHandle; 19 | import io.trino.spi.connector.ColumnMetadata; 20 | import io.trino.spi.type.Type; 21 | 22 | import java.util.Objects; 23 | 24 | import static com.google.common.base.MoreObjects.toStringHelper; 25 | import static java.util.Objects.requireNonNull; 26 | 27 | public final class StorageColumnHandle 28 | implements ColumnHandle 29 | { 30 | private final String name; 31 | private final Type type; 32 | 33 | @JsonCreator 34 | public StorageColumnHandle( 35 | @JsonProperty("name") String name, 36 | @JsonProperty("type") Type type) 37 | { 38 | this.name = requireNonNull(name, "name is null"); 39 | this.type = requireNonNull(type, "type is null"); 40 | } 41 | 42 | @JsonProperty 43 | public String getName() 44 | { 45 | return name; 46 | } 47 | 48 | @JsonProperty 49 | public Type getType() 50 | { 51 | return type; 52 | } 53 | 54 | public ColumnMetadata getColumnMetadata() 55 | { 56 | return new ColumnMetadata(name, type); 57 | } 58 | 59 | @Override 60 | public int hashCode() 61 | { 62 | return Objects.hash(name); 63 | } 64 | 65 | @Override 66 | public boolean equals(Object obj) 67 | { 68 | if (this == obj) { 69 | return true; 70 | } 71 | if ((obj == null) || (getClass() != obj.getClass())) { 72 | return false; 73 | } 74 | 75 | StorageColumnHandle other = (StorageColumnHandle) obj; 76 | return Objects.equals(this.name, other.name); 77 | } 78 | 79 | @Override 80 | public String toString() 81 | { 82 | return toStringHelper(this) 83 | .add("name", name) 84 | .add("type", type) 85 | .toString(); 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /src/main/java/org/ebyhr/trino/storage/StorageConfig.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package org.ebyhr.trino.storage; 16 | 17 | import io.airlift.configuration.Config; 18 | import io.airlift.configuration.ConfigDescription; 19 | 20 | public class StorageConfig 21 | { 22 | private boolean allowLocalFiles = true; 23 | 24 | public boolean getAllowLocalFiles() 25 | { 26 | return allowLocalFiles; 27 | } 28 | 29 | @Config("allow-local-files") 30 | @ConfigDescription("If true, allow reading local files") 31 | public StorageConfig setAllowLocalFiles(boolean allowLocalFiles) 32 | { 33 | this.allowLocalFiles = allowLocalFiles; 34 | return this; 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/main/java/org/ebyhr/trino/storage/StorageConnector.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package org.ebyhr.trino.storage; 15 | 16 | import com.google.common.collect.ImmutableSet; 17 | import com.google.inject.Inject; 18 | import io.airlift.bootstrap.LifeCycleManager; 19 | import io.airlift.log.Logger; 20 | import io.trino.spi.connector.Connector; 21 | import io.trino.spi.connector.ConnectorMetadata; 22 | import io.trino.spi.connector.ConnectorPageSourceProvider; 23 | import io.trino.spi.connector.ConnectorSession; 24 | import io.trino.spi.connector.ConnectorSplitManager; 25 | import io.trino.spi.connector.ConnectorTransactionHandle; 26 | import io.trino.spi.function.table.ConnectorTableFunction; 27 | import io.trino.spi.transaction.IsolationLevel; 28 | 29 | import java.util.Set; 30 | 31 | import static java.util.Objects.requireNonNull; 32 | import static org.ebyhr.trino.storage.StorageTransactionHandle.INSTANCE; 33 | 34 | public class StorageConnector 35 | implements Connector 36 | { 37 | private static final Logger log = Logger.get(StorageConnector.class); 38 | 39 | private final LifeCycleManager lifeCycleManager; 40 | private final StorageMetadata metadata; 41 | private final StorageSplitManager splitManager; 42 | private final StoragePageSourceProvider pageSourceProvider; 43 | private final Set connectorTableFunctions; 44 | 45 | @Inject 46 | public StorageConnector( 47 | LifeCycleManager lifeCycleManager, 48 | StorageMetadata metadata, 49 | StorageSplitManager splitManager, 50 | StoragePageSourceProvider pageSourceProvider, 51 | Set connectorTableFunctions) 52 | { 53 | this.lifeCycleManager = requireNonNull(lifeCycleManager, "lifeCycleManager is null"); 54 | this.metadata = requireNonNull(metadata, "metadata is null"); 55 | this.splitManager = requireNonNull(splitManager, "splitManager is null"); 56 | this.pageSourceProvider = requireNonNull(pageSourceProvider, "pageSourceProvider is null"); 57 | this.connectorTableFunctions = ImmutableSet.copyOf(requireNonNull(connectorTableFunctions, "connectorTableFunctions is null")); 58 | } 59 | 60 | @Override 61 | public ConnectorTransactionHandle beginTransaction(IsolationLevel isolationLevel, boolean readOnly, boolean autoCommit) 62 | { 63 | return INSTANCE; 64 | } 65 | 66 | @Override 67 | public ConnectorMetadata getMetadata(ConnectorSession session, ConnectorTransactionHandle transactionHandle) 68 | { 69 | return metadata; 70 | } 71 | 72 | @Override 73 | public ConnectorSplitManager getSplitManager() 74 | { 75 | return splitManager; 76 | } 77 | 78 | @Override 79 | public ConnectorPageSourceProvider getPageSourceProvider() 80 | { 81 | return pageSourceProvider; 82 | } 83 | 84 | @Override 85 | public Set getTableFunctions() 86 | { 87 | return connectorTableFunctions; 88 | } 89 | 90 | @Override 91 | public final void shutdown() 92 | { 93 | try { 94 | lifeCycleManager.stop(); 95 | } 96 | catch (Exception e) { 97 | log.error(e, "Error shutting down connector"); 98 | } 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /src/main/java/org/ebyhr/trino/storage/StorageConnectorFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package org.ebyhr.trino.storage; 15 | 16 | import com.google.inject.Injector; 17 | import io.airlift.bootstrap.Bootstrap; 18 | import io.airlift.json.JsonModule; 19 | import io.trino.hdfs.HdfsModule; 20 | import io.trino.hdfs.authentication.HdfsAuthenticationModule; 21 | import io.trino.hdfs.azure.HiveAzureModule; 22 | import io.trino.hdfs.gcs.HiveGcsModule; 23 | import io.trino.hdfs.s3.HiveS3Module; 24 | import io.trino.spi.connector.Connector; 25 | import io.trino.spi.connector.ConnectorContext; 26 | import io.trino.spi.connector.ConnectorFactory; 27 | 28 | import java.util.Map; 29 | 30 | import static com.google.common.base.Throwables.throwIfUnchecked; 31 | import static java.util.Objects.requireNonNull; 32 | 33 | public class StorageConnectorFactory 34 | implements ConnectorFactory 35 | { 36 | @Override 37 | public String getName() 38 | { 39 | return "storage"; 40 | } 41 | 42 | @Override 43 | public Connector create(String catalogName, Map requiredConfig, ConnectorContext context) 44 | { 45 | requireNonNull(requiredConfig, "requiredConfig is null"); 46 | try { 47 | // A plugin is not required to use Guice; it is just very convenient 48 | Bootstrap app = new Bootstrap( 49 | new JsonModule(), 50 | new StorageModule(context.getTypeManager()), 51 | new HdfsModule(), 52 | new HiveS3Module(), 53 | new HiveGcsModule(), 54 | new HiveAzureModule(), 55 | new HdfsAuthenticationModule()); 56 | 57 | Injector injector = app 58 | .doNotInitializeLogging() 59 | .setRequiredConfigurationProperties(requiredConfig) 60 | .initialize(); 61 | 62 | return injector.getInstance(StorageConnector.class); 63 | } 64 | catch (Exception e) { 65 | throwIfUnchecked(e); 66 | throw new RuntimeException(e); 67 | } 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /src/main/java/org/ebyhr/trino/storage/StorageMetadata.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package org.ebyhr.trino.storage; 15 | 16 | import com.google.common.collect.ImmutableMap; 17 | import com.google.inject.Inject; 18 | import io.trino.spi.StandardErrorCode; 19 | import io.trino.spi.TrinoException; 20 | import io.trino.spi.connector.ColumnHandle; 21 | import io.trino.spi.connector.ColumnMetadata; 22 | import io.trino.spi.connector.ConnectorMetadata; 23 | import io.trino.spi.connector.ConnectorSession; 24 | import io.trino.spi.connector.ConnectorTableHandle; 25 | import io.trino.spi.connector.ConnectorTableMetadata; 26 | import io.trino.spi.connector.ConnectorTableVersion; 27 | import io.trino.spi.connector.SchemaTableName; 28 | import io.trino.spi.connector.SchemaTablePrefix; 29 | import io.trino.spi.connector.TableColumnsMetadata; 30 | import io.trino.spi.connector.TableFunctionApplicationResult; 31 | import io.trino.spi.connector.TableNotFoundException; 32 | import io.trino.spi.function.table.ConnectorTableFunctionHandle; 33 | import org.ebyhr.trino.storage.ptf.ListTableFunction.QueryFunctionHandle; 34 | import org.ebyhr.trino.storage.ptf.ReadFileTableFunction.ReadFunctionHandle; 35 | 36 | import java.util.Iterator; 37 | import java.util.List; 38 | import java.util.Map; 39 | import java.util.Optional; 40 | import java.util.stream.Stream; 41 | 42 | import static com.google.common.collect.ImmutableList.toImmutableList; 43 | import static java.util.Objects.requireNonNull; 44 | import static org.ebyhr.trino.storage.ptf.ListTableFunction.COLUMNS_METADATA; 45 | import static org.ebyhr.trino.storage.ptf.ListTableFunction.COLUMN_HANDLES; 46 | import static org.ebyhr.trino.storage.ptf.ListTableFunction.LIST_SCHEMA_NAME; 47 | 48 | public class StorageMetadata 49 | implements ConnectorMetadata 50 | { 51 | private final StorageClient storageClient; 52 | 53 | @Inject 54 | public StorageMetadata(StorageClient storageClient) 55 | { 56 | this.storageClient = requireNonNull(storageClient, "storageClient is null"); 57 | } 58 | 59 | @Override 60 | public List listSchemaNames(ConnectorSession session) 61 | { 62 | return listSchemaNames(); 63 | } 64 | 65 | public List listSchemaNames() 66 | { 67 | return List.copyOf(storageClient.getSchemaNames()); 68 | } 69 | 70 | @Override 71 | public StorageTableHandle getTableHandle(ConnectorSession session, SchemaTableName tableName, Optional startVersion, Optional endVersion) 72 | { 73 | if (startVersion.isPresent() || endVersion.isPresent()) { 74 | throw new TrinoException(StandardErrorCode.NOT_SUPPORTED, "This connector does not support versioned tables"); 75 | } 76 | if (!listSchemaNames(session).contains(tableName.getSchemaName())) { 77 | return null; 78 | } 79 | 80 | StorageTable table = storageClient.getTable(session, tableName.getSchemaName(), tableName.getTableName()); 81 | if (table == null) { 82 | return null; 83 | } 84 | 85 | return new StorageTableHandle(table.getMode(), tableName.getSchemaName(), tableName.getTableName()); 86 | } 87 | 88 | @Override 89 | public ConnectorTableMetadata getTableMetadata(ConnectorSession session, ConnectorTableHandle table) 90 | { 91 | StorageTableHandle storageTableHandle = (StorageTableHandle) table; 92 | RemoteTableName tableName = new RemoteTableName(storageTableHandle.getSchemaName(), storageTableHandle.getTableName()); 93 | 94 | return getStorageTableMetadata(session, tableName); 95 | } 96 | 97 | @Override 98 | public List listTables(ConnectorSession session, Optional schemaNameOrNull) 99 | { 100 | SchemaTablePrefix prefix = schemaNameOrNull 101 | .map(SchemaTablePrefix::new) 102 | .orElseGet(SchemaTablePrefix::new); 103 | return listTables(prefix).map(RemoteTableName::toSchemaTableName).collect(toImmutableList()); 104 | } 105 | 106 | @Override 107 | public Map getColumnHandles(ConnectorSession session, ConnectorTableHandle tableHandle) 108 | { 109 | StorageTableHandle storageTableHandle = (StorageTableHandle) tableHandle; 110 | 111 | StorageTable table = storageClient.getTable(session, storageTableHandle.getSchemaName(), storageTableHandle.getTableName()); 112 | if (table == null) { 113 | throw new TableNotFoundException(storageTableHandle.toSchemaTableName()); 114 | } 115 | 116 | ImmutableMap.Builder columnHandles = ImmutableMap.builder(); 117 | for (ColumnMetadata column : table.getColumnsMetadata()) { 118 | columnHandles.put(column.getName(), new StorageColumnHandle(column.getName(), column.getType())); 119 | } 120 | return columnHandles.build(); 121 | } 122 | 123 | @Override 124 | public Map> listTableColumns(ConnectorSession session, SchemaTablePrefix prefix) 125 | { 126 | requireNonNull(prefix, "prefix is null"); 127 | ImmutableMap.Builder> columns = ImmutableMap.builder(); 128 | for (RemoteTableName tableName : listTables(prefix).toList()) { 129 | ConnectorTableMetadata tableMetadata = getStorageTableMetadata(session, tableName); 130 | // table can disappear during listing operation 131 | if (tableMetadata != null) { 132 | columns.put(tableName.toSchemaTableName(), tableMetadata.getColumns()); 133 | } 134 | } 135 | return columns.build(); 136 | } 137 | 138 | @Override 139 | public Iterator streamTableColumns(ConnectorSession session, SchemaTablePrefix prefix) 140 | { 141 | requireNonNull(prefix, "prefix is null"); 142 | return listTables(prefix) 143 | .map(table -> TableColumnsMetadata.forTable( 144 | table.toSchemaTableName(), 145 | requireNonNull(getStorageTableMetadata(session, table), "tableMetadata is null") 146 | .getColumns())) 147 | .iterator(); 148 | } 149 | 150 | private ConnectorTableMetadata getStorageTableMetadata(ConnectorSession session, RemoteTableName tableName) 151 | { 152 | if (tableName.schemaName().equals(LIST_SCHEMA_NAME)) { 153 | return new ConnectorTableMetadata(tableName.toSchemaTableName(), COLUMNS_METADATA); 154 | } 155 | 156 | if (!listSchemaNames().contains(tableName.schemaName())) { 157 | return null; 158 | } 159 | 160 | StorageTable table = storageClient.getTable(session, tableName.schemaName(), tableName.tableName()); 161 | if (table == null) { 162 | return null; 163 | } 164 | 165 | return new ConnectorTableMetadata(tableName.toSchemaTableName(), table.getColumnsMetadata()); 166 | } 167 | 168 | private Stream listTables(SchemaTablePrefix prefix) 169 | { 170 | if (prefix.getSchema().isPresent() && prefix.getTable().isPresent()) { 171 | return Stream.of(new RemoteTableName(prefix.getSchema().get(), prefix.getTable().get())); 172 | } 173 | 174 | List schemaNames = prefix.getSchema() 175 | .map(List::of) 176 | .orElseGet(storageClient::getSchemaNames); 177 | 178 | return schemaNames.stream() 179 | .flatMap(schemaName -> storageClient.getTableNames(schemaName).stream() 180 | .map(tableName -> new RemoteTableName(LIST_SCHEMA_NAME, LIST_SCHEMA_NAME))); 181 | } 182 | 183 | @Override 184 | public ColumnMetadata getColumnMetadata(ConnectorSession session, ConnectorTableHandle tableHandle, ColumnHandle columnHandle) 185 | { 186 | return ((StorageColumnHandle) columnHandle).getColumnMetadata(); 187 | } 188 | 189 | @Override 190 | public Optional> applyTableFunction(ConnectorSession session, ConnectorTableFunctionHandle handle) 191 | { 192 | if (handle instanceof ReadFunctionHandle catFunctionHandle) { 193 | return Optional.of(new TableFunctionApplicationResult<>( 194 | catFunctionHandle.getTableHandle(), 195 | catFunctionHandle.getColumns().stream() 196 | .map(column -> new StorageColumnHandle(column.getName(), column.getType())) 197 | .collect(toImmutableList()))); 198 | } 199 | if (handle instanceof QueryFunctionHandle queryFunctionHandle) { 200 | return Optional.of(new TableFunctionApplicationResult<>(queryFunctionHandle.getTableHandle(), COLUMN_HANDLES)); 201 | } 202 | return Optional.empty(); 203 | } 204 | 205 | /** 206 | * Simplified variant of {@link SchemaTableName} that doesn't case-fold. 207 | */ 208 | private record RemoteTableName(String schemaName, String tableName) 209 | { 210 | public SchemaTableName toSchemaTableName() 211 | { 212 | return new SchemaTableName(schemaName(), tableName()); 213 | } 214 | } 215 | } 216 | -------------------------------------------------------------------------------- /src/main/java/org/ebyhr/trino/storage/StorageModule.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package org.ebyhr.trino.storage; 15 | 16 | import com.fasterxml.jackson.databind.DeserializationContext; 17 | import com.fasterxml.jackson.databind.deser.std.FromStringDeserializer; 18 | import com.google.inject.Binder; 19 | import com.google.inject.Inject; 20 | import com.google.inject.Module; 21 | import com.google.inject.Scopes; 22 | import io.airlift.http.client.HttpClientConfig; 23 | import io.opentelemetry.api.OpenTelemetry; 24 | import io.trino.filesystem.TrinoFileSystemFactory; 25 | import io.trino.filesystem.hdfs.HdfsFileSystemFactory; 26 | import io.trino.hdfs.TrinoHdfsFileSystemStats; 27 | import io.trino.spi.function.table.ConnectorTableFunction; 28 | import io.trino.spi.type.Type; 29 | import io.trino.spi.type.TypeManager; 30 | import io.trino.spi.type.TypeSignature; 31 | import org.ebyhr.trino.storage.ptf.ListTableFunction; 32 | import org.ebyhr.trino.storage.ptf.ReadFileTableFunction; 33 | 34 | import static com.google.inject.multibindings.Multibinder.newSetBinder; 35 | import static io.airlift.configuration.ConfigBinder.configBinder; 36 | import static io.airlift.http.client.HttpClientBinder.httpClientBinder; 37 | import static io.airlift.json.JsonBinder.jsonBinder; 38 | import static io.airlift.json.JsonCodec.listJsonCodec; 39 | import static io.airlift.json.JsonCodecBinder.jsonCodecBinder; 40 | import static java.util.Objects.requireNonNull; 41 | 42 | public class StorageModule 43 | implements Module 44 | { 45 | private final TypeManager typeManager; 46 | 47 | public StorageModule(TypeManager typeManager) 48 | { 49 | this.typeManager = requireNonNull(typeManager, "typeManager is null"); 50 | } 51 | 52 | @Override 53 | public void configure(Binder binder) 54 | { 55 | binder.bind(TypeManager.class).toInstance(typeManager); 56 | 57 | binder.bind(StorageConnector.class).in(Scopes.SINGLETON); 58 | binder.bind(StorageMetadata.class).in(Scopes.SINGLETON); 59 | binder.bind(StorageClient.class).in(Scopes.SINGLETON); 60 | binder.bind(StorageSplitManager.class).in(Scopes.SINGLETON); 61 | binder.bind(StorageRecordSetProvider.class).in(Scopes.SINGLETON); 62 | binder.bind(StoragePageSourceProvider.class).in(Scopes.SINGLETON); 63 | newSetBinder(binder, ConnectorTableFunction.class).addBinding().toProvider(ReadFileTableFunction.class).in(Scopes.SINGLETON); 64 | newSetBinder(binder, ConnectorTableFunction.class).addBinding().toProvider(ListTableFunction.class).in(Scopes.SINGLETON); 65 | binder.bind(TrinoFileSystemFactory.class).to(HdfsFileSystemFactory.class).in(Scopes.SINGLETON); 66 | binder.bind(TrinoHdfsFileSystemStats.class).in(Scopes.SINGLETON); 67 | binder.bind(OpenTelemetry.class).toInstance(OpenTelemetry.noop()); 68 | configBinder(binder).bindConfig(StorageConfig.class); 69 | 70 | jsonBinder(binder).addDeserializerBinding(Type.class).to(TypeDeserializer.class); 71 | jsonCodecBinder(binder).bindMapJsonCodec(String.class, listJsonCodec(StorageTable.class)); 72 | 73 | configBinder(binder).bindConfig(HttpClientConfig.class, ForStorage.class); 74 | httpClientBinder(binder).bindHttpClient("storage", ForStorage.class); 75 | } 76 | 77 | public static final class TypeDeserializer 78 | extends FromStringDeserializer 79 | { 80 | private final TypeManager typeManager; 81 | 82 | @Inject 83 | public TypeDeserializer(TypeManager typeManager) 84 | { 85 | super(Type.class); 86 | this.typeManager = requireNonNull(typeManager, "typeManager is null"); 87 | } 88 | 89 | @Override 90 | protected Type _deserialize(String value, DeserializationContext context) 91 | { 92 | return typeManager.getType(new TypeSignature(value)); 93 | } 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /src/main/java/org/ebyhr/trino/storage/StoragePageSourceProvider.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package org.ebyhr.trino.storage; 15 | 16 | import com.google.inject.Inject; 17 | import io.trino.spi.Page; 18 | import io.trino.spi.connector.ColumnHandle; 19 | import io.trino.spi.connector.ConnectorPageSource; 20 | import io.trino.spi.connector.ConnectorPageSourceProvider; 21 | import io.trino.spi.connector.ConnectorSession; 22 | import io.trino.spi.connector.ConnectorSplit; 23 | import io.trino.spi.connector.ConnectorTableHandle; 24 | import io.trino.spi.connector.ConnectorTransactionHandle; 25 | import io.trino.spi.connector.DynamicFilter; 26 | import io.trino.spi.connector.FixedPageSource; 27 | import io.trino.spi.connector.RecordPageSource; 28 | import io.trino.spi.connector.RecordSet; 29 | import org.ebyhr.trino.storage.operator.FilePlugin; 30 | import org.ebyhr.trino.storage.operator.PluginFactory; 31 | 32 | import java.util.List; 33 | import java.util.stream.StreamSupport; 34 | 35 | import static java.util.Objects.requireNonNull; 36 | import static java.util.stream.Collectors.toList; 37 | import static org.ebyhr.trino.storage.StorageSplit.Mode.LIST; 38 | 39 | public class StoragePageSourceProvider 40 | implements ConnectorPageSourceProvider 41 | { 42 | private final StorageClient storageClient; 43 | private final StorageRecordSetProvider recordSetProvider; 44 | 45 | @Inject 46 | public StoragePageSourceProvider(StorageClient storageClient, StorageRecordSetProvider recordSetProvider) 47 | { 48 | this.storageClient = requireNonNull(storageClient, "storageClient is null"); 49 | this.recordSetProvider = requireNonNull(recordSetProvider, "recordSetProvider is null"); 50 | } 51 | 52 | @Override 53 | public ConnectorPageSource createPageSource( 54 | ConnectorTransactionHandle transaction, 55 | ConnectorSession session, 56 | ConnectorSplit split, 57 | ConnectorTableHandle table, 58 | List columns, 59 | DynamicFilter dynamicFilter) 60 | { 61 | StorageSplit storageSplit = (StorageSplit) requireNonNull(split, "split is null"); 62 | if (storageSplit.getMode() == LIST) { 63 | return new ListPageSource(storageClient, session, storageSplit.getTableName(), columns); 64 | } 65 | 66 | String schemaName = storageSplit.getSchemaName(); 67 | String tableName = storageSplit.getTableName(); 68 | FilePlugin plugin = PluginFactory.create(schemaName); 69 | List handles = columns.stream() 70 | .map(c -> (StorageColumnHandle) c) 71 | .map(c -> c.getName().toLowerCase()) 72 | .toList(); 73 | 74 | try { 75 | return plugin.getConnectorPageSource(tableName, handles, path -> storageClient.getInputStream(session, path)); 76 | } 77 | catch (UnsupportedOperationException ignored) { 78 | // Ignore it when a plugin doesn't implement getConnectorPageSource 79 | // and assume it implements getPagesIterator or getRecordsIterator 80 | } 81 | 82 | try { 83 | Iterable iterable = plugin.getPagesIterator(tableName, handles, path -> storageClient.getInputStream(session, path)); 84 | List pages = StreamSupport.stream(iterable.spliterator(), false) 85 | .collect(toList()); 86 | return new FixedPageSource(pages); 87 | } 88 | catch (UnsupportedOperationException ignored) { 89 | // Ignore it when a plugin doesn't implement getPagesIterator 90 | // and assume it implements getRecordsIterator for the record set below 91 | } 92 | 93 | RecordSet recordSet = recordSetProvider.getRecordSet(transaction, session, split, table, columns); 94 | return new RecordPageSource(recordSet); 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /src/main/java/org/ebyhr/trino/storage/StoragePlugin.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package org.ebyhr.trino.storage; 15 | 16 | import io.trino.spi.Plugin; 17 | import io.trino.spi.connector.ConnectorFactory; 18 | 19 | import java.util.List; 20 | 21 | public class StoragePlugin 22 | implements Plugin 23 | { 24 | @Override 25 | public Iterable getConnectorFactories() 26 | { 27 | return List.of(new StorageConnectorFactory()); 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /src/main/java/org/ebyhr/trino/storage/StorageRecordSetProvider.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package org.ebyhr.trino.storage; 15 | 16 | import com.google.common.collect.Iterables; 17 | import com.google.inject.Inject; 18 | import io.trino.spi.connector.ColumnHandle; 19 | import io.trino.spi.connector.ColumnMetadata; 20 | import io.trino.spi.connector.ConnectorRecordSetProvider; 21 | import io.trino.spi.connector.ConnectorSession; 22 | import io.trino.spi.connector.ConnectorSplit; 23 | import io.trino.spi.connector.ConnectorTableHandle; 24 | import io.trino.spi.connector.ConnectorTransactionHandle; 25 | import io.trino.spi.connector.InMemoryRecordSet; 26 | import io.trino.spi.connector.RecordSet; 27 | import io.trino.spi.type.Type; 28 | import org.ebyhr.trino.storage.operator.FilePlugin; 29 | import org.ebyhr.trino.storage.operator.PluginFactory; 30 | 31 | import java.util.List; 32 | import java.util.stream.Stream; 33 | 34 | import static com.google.common.base.Preconditions.checkState; 35 | import static java.util.Objects.requireNonNull; 36 | import static java.util.stream.Collectors.toList; 37 | 38 | public class StorageRecordSetProvider 39 | implements ConnectorRecordSetProvider 40 | { 41 | private final StorageClient storageClient; 42 | 43 | @Inject 44 | public StorageRecordSetProvider(StorageClient storageClient) 45 | { 46 | this.storageClient = requireNonNull(storageClient, "storageClient is null"); 47 | } 48 | 49 | @Override 50 | public RecordSet getRecordSet( 51 | ConnectorTransactionHandle transaction, 52 | ConnectorSession session, 53 | ConnectorSplit split, 54 | ConnectorTableHandle table, 55 | List columns) 56 | { 57 | requireNonNull(split, "split is null"); 58 | StorageSplit storageSplit = (StorageSplit) split; 59 | 60 | String schemaName = storageSplit.getSchemaName(); 61 | String tableName = storageSplit.getTableName(); 62 | StorageTable storageTable = storageClient.getTable(session, schemaName, tableName); 63 | // this can happen if table is removed during a query 64 | checkState(storageTable != null, "Table %s.%s no longer exists", schemaName, tableName); 65 | 66 | FilePlugin plugin = PluginFactory.create(schemaName); 67 | Stream> stream = plugin.getRecordsIterator(tableName, path -> storageClient.getInputStream(session, path)); 68 | Iterable> rows = stream::iterator; 69 | 70 | List handles = columns 71 | .stream() 72 | .map(c -> (StorageColumnHandle) c) 73 | .collect(toList()); 74 | List columnIndexes = handles 75 | .stream() 76 | .map(column -> { 77 | int index = 0; 78 | for (ColumnMetadata columnMetadata : storageTable.getColumnsMetadata()) { 79 | if (columnMetadata.getName().equalsIgnoreCase(column.getName())) { 80 | return index; 81 | } 82 | index++; 83 | } 84 | throw new IllegalStateException("Unknown column: " + column.getName()); 85 | }) 86 | .collect(toList()); 87 | 88 | //noinspection StaticPseudoFunctionalStyleMethod 89 | Iterable> mappedRows = Iterables.transform(rows, row -> columnIndexes 90 | .stream() 91 | .map(row::get) 92 | .collect(toList())); 93 | 94 | List mappedTypes = handles 95 | .stream() 96 | .map(StorageColumnHandle::getType) 97 | .collect(toList()); 98 | return new InMemoryRecordSet(mappedTypes, mappedRows); 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /src/main/java/org/ebyhr/trino/storage/StorageSplit.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package org.ebyhr.trino.storage; 15 | 16 | import com.fasterxml.jackson.annotation.JsonCreator; 17 | import com.fasterxml.jackson.annotation.JsonProperty; 18 | import com.google.common.collect.ImmutableMap; 19 | import io.airlift.slice.SizeOf; 20 | import io.trino.spi.HostAddress; 21 | import io.trino.spi.connector.ConnectorSplit; 22 | 23 | import java.util.List; 24 | import java.util.Map; 25 | 26 | import static java.util.Objects.requireNonNull; 27 | 28 | public class StorageSplit 29 | implements ConnectorSplit 30 | { 31 | private static final int INSTANCE_SIZE = SizeOf.instanceSize(StorageSplit.class); 32 | private static final int MODE_SIZE = SizeOf.instanceSize(Mode.class); 33 | 34 | private final Mode mode; 35 | private final String schemaName; 36 | private final String tableName; 37 | 38 | @JsonCreator 39 | public StorageSplit( 40 | @JsonProperty("mode") Mode mode, 41 | @JsonProperty("schemaName") String schemaName, 42 | @JsonProperty("tableName") String tableName) 43 | { 44 | this.schemaName = requireNonNull(schemaName, "schema name is null"); 45 | this.mode = requireNonNull(mode, "mode is null"); 46 | this.tableName = requireNonNull(tableName, "table name is null"); 47 | } 48 | 49 | @JsonProperty 50 | public Mode getMode() 51 | { 52 | return mode; 53 | } 54 | 55 | @JsonProperty 56 | public String getSchemaName() 57 | { 58 | return schemaName; 59 | } 60 | 61 | @JsonProperty 62 | public String getTableName() 63 | { 64 | return tableName; 65 | } 66 | 67 | @Override 68 | public List getAddresses() 69 | { 70 | return List.of(); 71 | } 72 | 73 | @Override 74 | public Map getSplitInfo() 75 | { 76 | return ImmutableMap.builder() 77 | .put("mode", mode.name()) 78 | .put("schemaName", schemaName) 79 | .put("tableName", tableName) 80 | .buildOrThrow(); 81 | } 82 | 83 | public long getRetainedSizeInBytes() 84 | { 85 | return INSTANCE_SIZE 86 | + MODE_SIZE 87 | + SizeOf.estimatedSizeOf(schemaName) 88 | + SizeOf.estimatedSizeOf(tableName); 89 | } 90 | 91 | public enum Mode 92 | { 93 | TABLE, 94 | LIST, 95 | /**/; 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /src/main/java/org/ebyhr/trino/storage/StorageSplitManager.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package org.ebyhr.trino.storage; 15 | 16 | import com.google.inject.Inject; 17 | import io.trino.spi.connector.ConnectorSession; 18 | import io.trino.spi.connector.ConnectorSplit; 19 | import io.trino.spi.connector.ConnectorSplitManager; 20 | import io.trino.spi.connector.ConnectorSplitSource; 21 | import io.trino.spi.connector.ConnectorTableHandle; 22 | import io.trino.spi.connector.ConnectorTransactionHandle; 23 | import io.trino.spi.connector.Constraint; 24 | import io.trino.spi.connector.DynamicFilter; 25 | import io.trino.spi.connector.FixedSplitSource; 26 | 27 | import java.util.ArrayList; 28 | import java.util.Collections; 29 | import java.util.List; 30 | 31 | import static com.google.common.base.Preconditions.checkState; 32 | import static java.util.Objects.requireNonNull; 33 | 34 | public class StorageSplitManager 35 | implements ConnectorSplitManager 36 | { 37 | private final StorageClient storageClient; 38 | 39 | @Inject 40 | public StorageSplitManager(StorageClient storageClient) 41 | { 42 | this.storageClient = requireNonNull(storageClient, "client is null"); 43 | } 44 | 45 | @Override 46 | public ConnectorSplitSource getSplits( 47 | ConnectorTransactionHandle transaction, 48 | ConnectorSession session, 49 | ConnectorTableHandle handle, 50 | DynamicFilter dynamicFilter, 51 | Constraint constraint) 52 | { 53 | StorageTableHandle tableHandle = (StorageTableHandle) handle; 54 | StorageTable table = storageClient.getTable(session, tableHandle.getSchemaName(), tableHandle.getTableName()); 55 | // this can happen if table is removed during a query 56 | checkState(table != null, "Table %s.%s no longer exists", tableHandle.getSchemaName(), tableHandle.getTableName()); 57 | 58 | List splits = new ArrayList<>(); 59 | splits.add(new StorageSplit(tableHandle.getMode(), tableHandle.getSchemaName(), tableHandle.getTableName())); 60 | Collections.shuffle(splits); 61 | 62 | return new FixedSplitSource(splits); 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /src/main/java/org/ebyhr/trino/storage/StorageTable.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package org.ebyhr.trino.storage; 15 | 16 | import com.fasterxml.jackson.annotation.JsonCreator; 17 | import com.fasterxml.jackson.annotation.JsonProperty; 18 | import com.google.common.collect.ImmutableList; 19 | import io.trino.spi.connector.ColumnMetadata; 20 | 21 | import java.util.List; 22 | 23 | import static com.google.common.base.Preconditions.checkArgument; 24 | import static com.google.common.base.Strings.isNullOrEmpty; 25 | import static java.util.Objects.requireNonNull; 26 | 27 | public class StorageTable 28 | { 29 | private final StorageSplit.Mode mode; 30 | private final String name; 31 | private final List columns; 32 | private final List columnsMetadata; 33 | 34 | @JsonCreator 35 | public StorageTable( 36 | @JsonProperty("mode") StorageSplit.Mode mode, 37 | @JsonProperty("name") String name, 38 | @JsonProperty("columns") List columns) 39 | { 40 | this.mode = requireNonNull(mode, "mode is null"); 41 | checkArgument(!isNullOrEmpty(name), "name is null or is empty"); 42 | this.name = requireNonNull(name, "name is null"); 43 | this.columns = List.copyOf(requireNonNull(columns, "columns is null")); 44 | 45 | ImmutableList.Builder columnsMetadata = ImmutableList.builder(); 46 | for (StorageColumnHandle column : this.columns) { 47 | columnsMetadata.add(new ColumnMetadata(column.getName(), column.getType())); 48 | } 49 | this.columnsMetadata = columnsMetadata.build(); 50 | } 51 | 52 | @JsonProperty 53 | public StorageSplit.Mode getMode() 54 | { 55 | return mode; 56 | } 57 | 58 | @JsonProperty 59 | public String getName() 60 | { 61 | return name; 62 | } 63 | 64 | @JsonProperty 65 | public List getColumns() 66 | { 67 | return columns; 68 | } 69 | 70 | public List getColumnsMetadata() 71 | { 72 | return columnsMetadata; 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /src/main/java/org/ebyhr/trino/storage/StorageTableHandle.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package org.ebyhr.trino.storage; 15 | 16 | import com.fasterxml.jackson.annotation.JsonCreator; 17 | import com.fasterxml.jackson.annotation.JsonIgnore; 18 | import com.fasterxml.jackson.annotation.JsonProperty; 19 | import com.google.common.base.Joiner; 20 | import io.trino.spi.connector.ConnectorTableHandle; 21 | import io.trino.spi.connector.SchemaTableName; 22 | 23 | import java.util.Objects; 24 | 25 | import static java.util.Objects.requireNonNull; 26 | 27 | public final class StorageTableHandle 28 | implements ConnectorTableHandle 29 | { 30 | private final StorageSplit.Mode mode; 31 | private final String schemaName; 32 | private final String tableName; 33 | 34 | @JsonCreator 35 | public StorageTableHandle( 36 | @JsonProperty("mode") StorageSplit.Mode mode, 37 | @JsonProperty("schemaName") String schemaName, 38 | @JsonProperty("tableName") String tableName) 39 | { 40 | this.mode = requireNonNull(mode, "mode is null"); 41 | this.schemaName = requireNonNull(schemaName, "schemaName is null"); 42 | this.tableName = requireNonNull(tableName, "tableName is null"); 43 | } 44 | 45 | @JsonProperty 46 | public StorageSplit.Mode getMode() 47 | { 48 | return mode; 49 | } 50 | 51 | @JsonProperty 52 | public String getSchemaName() 53 | { 54 | return schemaName; 55 | } 56 | 57 | @JsonProperty 58 | public String getTableName() 59 | { 60 | return tableName; 61 | } 62 | 63 | @JsonIgnore 64 | public SchemaTableName toSchemaTableName() 65 | { 66 | return new SchemaTableName(schemaName, tableName); 67 | } 68 | 69 | @Override 70 | public int hashCode() 71 | { 72 | return Objects.hash(mode, schemaName, tableName); 73 | } 74 | 75 | @Override 76 | public boolean equals(Object obj) 77 | { 78 | if (this == obj) { 79 | return true; 80 | } 81 | if ((obj == null) || (getClass() != obj.getClass())) { 82 | return false; 83 | } 84 | 85 | StorageTableHandle other = (StorageTableHandle) obj; 86 | return Objects.equals(this.mode, other.mode) && 87 | Objects.equals(this.schemaName, other.schemaName) && 88 | Objects.equals(this.tableName, other.tableName); 89 | } 90 | 91 | @Override 92 | public String toString() 93 | { 94 | return Joiner.on(":").join(schemaName, tableName); 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /src/main/java/org/ebyhr/trino/storage/StorageTransactionHandle.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package org.ebyhr.trino.storage; 15 | 16 | import io.trino.spi.connector.ConnectorTransactionHandle; 17 | 18 | public enum StorageTransactionHandle 19 | implements ConnectorTransactionHandle 20 | { 21 | INSTANCE 22 | } 23 | -------------------------------------------------------------------------------- /src/main/java/org/ebyhr/trino/storage/operator/AvroColumnDecoder.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package org.ebyhr.trino.storage.operator; 15 | 16 | import io.airlift.slice.Slice; 17 | import io.airlift.slice.Slices; 18 | import io.trino.spi.ErrorCode; 19 | import io.trino.spi.ErrorCodeSupplier; 20 | import io.trino.spi.ErrorType; 21 | import io.trino.spi.TrinoException; 22 | import io.trino.spi.block.Block; 23 | import io.trino.spi.block.BlockBuilder; 24 | import io.trino.spi.block.MapBlockBuilder; 25 | import io.trino.spi.block.RowBlockBuilder; 26 | import io.trino.spi.block.SqlMap; 27 | import io.trino.spi.block.SqlRow; 28 | import io.trino.spi.type.ArrayType; 29 | import io.trino.spi.type.BigintType; 30 | import io.trino.spi.type.BooleanType; 31 | import io.trino.spi.type.DoubleType; 32 | import io.trino.spi.type.IntegerType; 33 | import io.trino.spi.type.MapType; 34 | import io.trino.spi.type.RealType; 35 | import io.trino.spi.type.RowType; 36 | import io.trino.spi.type.RowType.Field; 37 | import io.trino.spi.type.SmallintType; 38 | import io.trino.spi.type.TinyintType; 39 | import io.trino.spi.type.Type; 40 | import io.trino.spi.type.VarbinaryType; 41 | import io.trino.spi.type.VarcharType; 42 | import org.apache.avro.generic.GenericEnumSymbol; 43 | import org.apache.avro.generic.GenericFixed; 44 | import org.apache.avro.generic.GenericRecord; 45 | 46 | import java.nio.ByteBuffer; 47 | import java.util.List; 48 | import java.util.Map; 49 | 50 | import static com.google.common.base.Preconditions.checkState; 51 | import static io.airlift.slice.Slices.utf8Slice; 52 | import static io.trino.spi.ErrorType.EXTERNAL; 53 | import static io.trino.spi.block.MapValueBuilder.buildMapValue; 54 | import static io.trino.spi.block.RowValueBuilder.buildRowValue; 55 | import static io.trino.spi.type.Varchars.truncateToLength; 56 | import static java.lang.Float.floatToIntBits; 57 | import static java.lang.String.format; 58 | import static java.util.Objects.requireNonNull; 59 | import static org.ebyhr.trino.storage.operator.AvroColumnDecoder.DecoderErrorCode.DECODER_CONVERSION_NOT_SUPPORTED; 60 | 61 | // copied from io.trino.decoder.avro.AvroColumnDecoder 62 | public class AvroColumnDecoder 63 | { 64 | private AvroColumnDecoder() {} 65 | 66 | private static Slice getSlice(Object value, Type type, String columnName) 67 | { 68 | if (type instanceof VarcharType && (value instanceof CharSequence || value instanceof GenericEnumSymbol)) { 69 | return truncateToLength(utf8Slice(value.toString()), type); 70 | } 71 | 72 | if (type instanceof VarbinaryType) { 73 | if (value instanceof ByteBuffer) { 74 | return Slices.wrappedHeapBuffer((ByteBuffer) value); 75 | } 76 | if (value instanceof GenericFixed) { 77 | return Slices.wrappedBuffer(((GenericFixed) value).bytes()); 78 | } 79 | } 80 | 81 | throw new TrinoException(DECODER_CONVERSION_NOT_SUPPORTED, format("cannot decode object of '%s' as '%s' for column '%s'", value.getClass(), type, columnName)); 82 | } 83 | 84 | public static Object serializeObject(BlockBuilder builder, Object value, Type type, String columnName) 85 | { 86 | if (type instanceof ArrayType) { 87 | return serializeList(builder, value, type, columnName); 88 | } 89 | if (type instanceof MapType mapType) { 90 | return serializeMap(builder, value, mapType, columnName); 91 | } 92 | if (type instanceof RowType) { 93 | return serializeRow(builder, value, type, columnName); 94 | } 95 | serializePrimitive(builder, value, type, columnName); 96 | return null; 97 | } 98 | 99 | private static Block serializeList(BlockBuilder parentBlockBuilder, Object value, Type type, String columnName) 100 | { 101 | if (value == null) { 102 | checkState(parentBlockBuilder != null, "parentBlockBuilder is null"); 103 | parentBlockBuilder.appendNull(); 104 | return null; 105 | } 106 | List list = (List) value; 107 | List typeParameters = type.getTypeParameters(); 108 | Type elementType = typeParameters.get(0); 109 | 110 | BlockBuilder blockBuilder = elementType.createBlockBuilder(null, list.size()); 111 | for (Object element : list) { 112 | serializeObject(blockBuilder, element, elementType, columnName); 113 | } 114 | if (parentBlockBuilder != null) { 115 | type.writeObject(parentBlockBuilder, blockBuilder.build()); 116 | return null; 117 | } 118 | return blockBuilder.build(); 119 | } 120 | 121 | private static void serializePrimitive(BlockBuilder blockBuilder, Object value, Type type, String columnName) 122 | { 123 | requireNonNull(blockBuilder, "blockBuilder is null"); 124 | 125 | if (value == null) { 126 | blockBuilder.appendNull(); 127 | return; 128 | } 129 | 130 | if (type instanceof BooleanType) { 131 | type.writeBoolean(blockBuilder, (Boolean) value); 132 | return; 133 | } 134 | 135 | if ((value instanceof Integer || value instanceof Long) && (type instanceof BigintType || type instanceof IntegerType || type instanceof SmallintType || type instanceof TinyintType)) { 136 | type.writeLong(blockBuilder, ((Number) value).longValue()); 137 | return; 138 | } 139 | 140 | if (type instanceof DoubleType) { 141 | type.writeDouble(blockBuilder, (Double) value); 142 | return; 143 | } 144 | 145 | if (type instanceof RealType) { 146 | type.writeLong(blockBuilder, floatToIntBits((Float) value)); 147 | return; 148 | } 149 | 150 | if (type instanceof VarcharType || type instanceof VarbinaryType) { 151 | type.writeSlice(blockBuilder, getSlice(value, type, columnName)); 152 | return; 153 | } 154 | 155 | throw new TrinoException(DECODER_CONVERSION_NOT_SUPPORTED, format("cannot decode object of '%s' as '%s' for column '%s'", value.getClass(), type, columnName)); 156 | } 157 | 158 | private static SqlMap serializeMap(BlockBuilder parentBlockBuilder, Object value, MapType type, String columnName) 159 | { 160 | if (value == null) { 161 | checkState(parentBlockBuilder != null, "parentBlockBuilder is null"); 162 | parentBlockBuilder.appendNull(); 163 | return null; 164 | } 165 | 166 | Map map = (Map) value; 167 | Type keyType = type.getKeyType(); 168 | Type valueType = type.getValueType(); 169 | 170 | if (parentBlockBuilder != null) { 171 | ((MapBlockBuilder) parentBlockBuilder).buildEntry((keyBuilder, valueBuilder) -> buildMap(columnName, map, keyType, valueType, keyBuilder, valueBuilder)); 172 | return null; 173 | } 174 | return buildMapValue(type, map.size(), (keyBuilder, valueBuilder) -> buildMap(columnName, map, keyType, valueType, keyBuilder, valueBuilder)); 175 | } 176 | 177 | private static void buildMap(String columnName, Map map, Type keyType, Type valueType, BlockBuilder keyBuilder, BlockBuilder valueBuilder) 178 | { 179 | for (Map.Entry entry : map.entrySet()) { 180 | if (entry.getKey() != null) { 181 | keyType.writeSlice(keyBuilder, truncateToLength(utf8Slice(entry.getKey().toString()), keyType)); 182 | serializeObject(valueBuilder, entry.getValue(), valueType, columnName); 183 | } 184 | } 185 | } 186 | 187 | private static SqlRow serializeRow(BlockBuilder blockBuilder, Object value, Type type, String columnName) 188 | { 189 | if (value == null) { 190 | checkState(blockBuilder != null, "block builder is null"); 191 | blockBuilder.appendNull(); 192 | return null; 193 | } 194 | 195 | RowType rowType = (RowType) type; 196 | if (blockBuilder == null) { 197 | return buildRowValue(rowType, fieldBuilders -> buildRow(rowType, columnName, (GenericRecord) value, fieldBuilders)); 198 | } 199 | 200 | ((RowBlockBuilder) blockBuilder).buildEntry(fieldBuilders -> buildRow(rowType, columnName, (GenericRecord) value, fieldBuilders)); 201 | return null; 202 | } 203 | 204 | private static void buildRow(RowType type, String columnName, GenericRecord record, List fieldBuilders) 205 | { 206 | List fields = type.getFields(); 207 | for (int i = 0; i < fields.size(); i++) { 208 | Field field = fields.get(i); 209 | checkState(field.getName().isPresent(), "field name not found"); 210 | serializeObject(fieldBuilders.get(i), record.get(field.getName().get()), field.getType(), columnName); 211 | } 212 | } 213 | 214 | // copied from io.trino.decoder.DecoderErrorCode 215 | enum DecoderErrorCode 216 | implements ErrorCodeSupplier 217 | { 218 | /** 219 | * A requested data conversion is not supported. 220 | */ 221 | DECODER_CONVERSION_NOT_SUPPORTED(0, EXTERNAL); 222 | 223 | private final ErrorCode errorCode; 224 | 225 | DecoderErrorCode(int code, ErrorType type) 226 | { 227 | errorCode = new ErrorCode(code + 0x0101_0000, name(), type); 228 | } 229 | 230 | @Override 231 | public ErrorCode toErrorCode() 232 | { 233 | return errorCode; 234 | } 235 | } 236 | } 237 | -------------------------------------------------------------------------------- /src/main/java/org/ebyhr/trino/storage/operator/AvroPlugin.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package org.ebyhr.trino.storage.operator; 15 | 16 | import io.trino.spi.Page; 17 | import io.trino.spi.TrinoException; 18 | import io.trino.spi.block.Block; 19 | import io.trino.spi.block.BlockBuilder; 20 | import io.trino.spi.type.Type; 21 | import org.apache.avro.Schema; 22 | import org.apache.avro.file.DataFileStream; 23 | import org.apache.avro.generic.GenericDatumReader; 24 | import org.apache.avro.generic.GenericRecord; 25 | import org.ebyhr.trino.storage.StorageColumnHandle; 26 | 27 | import java.io.IOException; 28 | import java.io.InputStream; 29 | import java.util.ArrayList; 30 | import java.util.List; 31 | import java.util.function.Function; 32 | 33 | import static io.trino.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR; 34 | import static java.lang.String.format; 35 | import static org.ebyhr.trino.storage.operator.AvroSchemaConverter.convert; 36 | 37 | public class AvroPlugin 38 | implements FilePlugin 39 | { 40 | private static final int INITIAL_BATCH_SIZE = 4 * 1024; // 4 KB 41 | 42 | @Override 43 | public List getFields(String path, Function streamProvider) 44 | { 45 | try (InputStream input = streamProvider.apply(path); 46 | DataFileStream dataFileStream = new DataFileStream<>(input, new GenericDatumReader<>())) { 47 | Schema schema = dataFileStream.getSchema(); 48 | return schema.getFields().stream() 49 | .map(field -> new StorageColumnHandle( 50 | field.name(), 51 | convert(field.schema()).orElseThrow(() -> new UnsupportedOperationException(format("Field Schema %s not convertable", field.schema()))))) 52 | .toList(); 53 | } 54 | catch (IOException e) { 55 | throw new TrinoException(GENERIC_INTERNAL_ERROR, format("Failed to read Avro file: %s", path), e); 56 | } 57 | } 58 | 59 | @Override 60 | public Iterable getPagesIterator(String path, List handleColumns, Function streamProvider) 61 | { 62 | try (InputStream input = streamProvider.apply(path); 63 | DataFileStream dataFileStream = new DataFileStream<>(input, new GenericDatumReader<>())) { 64 | List handledFields = dataFileStream.getSchema().getFields().stream() 65 | .filter(field -> handleColumns.contains(field.name().toLowerCase())) 66 | .toList(); 67 | /* 68 | Define BlockBuilder based on handledFields(avroTypes) and process avro record, 69 | if handleFields has a size of 0, add at least 1 field to prevent `blocks is empty` error 70 | handleFields can be empty when `select count (*)` 71 | */ 72 | if (handledFields.isEmpty()) { 73 | handledFields = dataFileStream.getSchema().getFields().stream() 74 | .limit(1) 75 | .toList(); 76 | } 77 | List avroTypes = handledFields.stream() 78 | .map(field -> convert(field.schema()).orElseThrow(() -> new UnsupportedOperationException(format("Field Schema %s not convertable", field.schema())))) 79 | .toList(); 80 | 81 | List result = new ArrayList<>(); 82 | boolean hasMoreData = true; 83 | 84 | while (hasMoreData) { 85 | BlockBuilder[] blockBuilders = new BlockBuilder[avroTypes.size()]; 86 | for (int i = 0; i < avroTypes.size(); i++) { 87 | blockBuilders[i] = avroTypes.get(i).createBlockBuilder(null, INITIAL_BATCH_SIZE); 88 | } 89 | 90 | int recordCount = 0; 91 | while (dataFileStream.hasNext() && recordCount < INITIAL_BATCH_SIZE) { 92 | if (dataFileStream.next() instanceof GenericRecord record) { 93 | processAvroRecord(record, handledFields, blockBuilders, avroTypes); 94 | recordCount++; 95 | } 96 | 97 | if (recordCount > 0) { 98 | Block[] blocks = new Block[blockBuilders.length]; 99 | for (int i = 0; i < blockBuilders.length; i++) { 100 | blocks[i] = blockBuilders[i].build(); 101 | } 102 | result.add(new Page(blocks)); 103 | } 104 | } 105 | 106 | hasMoreData = dataFileStream.hasNext(); 107 | } 108 | return result; 109 | } 110 | catch (IOException e) { 111 | throw new TrinoException(GENERIC_INTERNAL_ERROR, format("Failed to read Avro file: %s", path), e); 112 | } 113 | } 114 | 115 | private void processAvroRecord(GenericRecord record, List fields, BlockBuilder[] blockBuilders, List trinoTypes) 116 | { 117 | for (int i = 0; i < fields.size(); i++) { 118 | Schema.Field field = fields.get(i); 119 | Object value = record.get(field.name()); 120 | AvroColumnDecoder.serializeObject(blockBuilders[i], value, trinoTypes.get(i), field.name()); 121 | } 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /src/main/java/org/ebyhr/trino/storage/operator/AvroSchemaConverter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package org.ebyhr.trino.storage.operator; 15 | 16 | import com.google.common.collect.ImmutableList; 17 | import com.google.common.collect.ImmutableSet; 18 | import io.trino.spi.type.ArrayType; 19 | import io.trino.spi.type.BigintType; 20 | import io.trino.spi.type.BooleanType; 21 | import io.trino.spi.type.DoubleType; 22 | import io.trino.spi.type.IntegerType; 23 | import io.trino.spi.type.MapType; 24 | import io.trino.spi.type.RealType; 25 | import io.trino.spi.type.RowType; 26 | import io.trino.spi.type.Type; 27 | import io.trino.spi.type.TypeOperators; 28 | import io.trino.spi.type.VarbinaryType; 29 | import io.trino.spi.type.VarcharType; 30 | import org.apache.avro.Schema; 31 | import org.apache.avro.SchemaFormatter; 32 | 33 | import java.util.List; 34 | import java.util.Optional; 35 | import java.util.Set; 36 | 37 | import static com.google.common.base.Preconditions.checkArgument; 38 | import static com.google.common.collect.ImmutableList.toImmutableList; 39 | import static com.google.common.collect.ImmutableSet.toImmutableSet; 40 | import static com.google.common.collect.Iterables.getOnlyElement; 41 | import static io.trino.spi.type.VarcharType.VARCHAR; 42 | import static java.lang.String.format; 43 | import static org.apache.avro.Schema.Type.ARRAY; 44 | import static org.apache.avro.Schema.Type.BYTES; 45 | import static org.apache.avro.Schema.Type.DOUBLE; 46 | import static org.apache.avro.Schema.Type.ENUM; 47 | import static org.apache.avro.Schema.Type.FIXED; 48 | import static org.apache.avro.Schema.Type.FLOAT; 49 | import static org.apache.avro.Schema.Type.INT; 50 | import static org.apache.avro.Schema.Type.LONG; 51 | import static org.apache.avro.Schema.Type.MAP; 52 | import static org.apache.avro.Schema.Type.NULL; 53 | import static org.apache.avro.Schema.Type.RECORD; 54 | import static org.apache.avro.Schema.Type.STRING; 55 | import static org.apache.avro.Schema.Type.UNION; 56 | 57 | /* 58 | copied from io.trino.plugin.kafka.schema.confluent.AvroSchemaConverter 59 | 5 changes 60 | - 1. remove TypeManager variable and add TypeOperators static variable 61 | - 2. set EmptyFieldStrategy static variable and initialize it 62 | - 3. change constructor from public to private 63 | - 4. make convert method to static 64 | - 5. remove unnecessary methods 65 | */ 66 | public class AvroSchemaConverter 67 | { 68 | private static final SchemaFormatter JSON_PRETTY_FORMATTER = SchemaFormatter.getInstance("json/pretty"); 69 | 70 | public static final String DUMMY_FIELD_NAME = "$empty_field_marker"; 71 | 72 | public static final RowType DUMMY_ROW_TYPE = RowType.from(ImmutableList.of(new RowType.Field(Optional.of(DUMMY_FIELD_NAME), BooleanType.BOOLEAN))); 73 | 74 | public enum EmptyFieldStrategy 75 | { 76 | IGNORE, 77 | MARK, 78 | FAIL, 79 | } 80 | 81 | private static final Set INTEGRAL_TYPES = ImmutableSet.of(INT, LONG); 82 | private static final Set DECIMAL_TYPES = ImmutableSet.of(FLOAT, DOUBLE); 83 | private static final Set STRING_TYPES = ImmutableSet.of(STRING, ENUM); 84 | private static final Set BINARY_TYPES = ImmutableSet.of(BYTES, FIXED); 85 | 86 | // fixed 1 87 | private static final TypeOperators typeOperators = new TypeOperators(); 88 | // fixed 2 89 | private static final EmptyFieldStrategy emptyFieldStrategy = EmptyFieldStrategy.IGNORE; 90 | 91 | // fixed 3 92 | private AvroSchemaConverter() {} 93 | 94 | // fixed 4 95 | public static Optional convert(Schema schema) 96 | { 97 | switch (schema.getType()) { 98 | case INT: 99 | return Optional.of(IntegerType.INTEGER); 100 | case LONG: 101 | return Optional.of(BigintType.BIGINT); 102 | case BOOLEAN: 103 | return Optional.of(BooleanType.BOOLEAN); 104 | case FLOAT: 105 | return Optional.of(RealType.REAL); 106 | case DOUBLE: 107 | return Optional.of(DoubleType.DOUBLE); 108 | case ENUM: 109 | case STRING: 110 | return Optional.of(VarcharType.VARCHAR); 111 | case BYTES: 112 | case FIXED: 113 | return Optional.of(VarbinaryType.VARBINARY); 114 | case UNION: 115 | return convertUnion(schema); 116 | case ARRAY: 117 | return convertArray(schema); 118 | case MAP: 119 | return convertMap(schema); 120 | case RECORD: 121 | return convertRecord(schema); 122 | case NULL: 123 | // unsupported 124 | break; 125 | } 126 | throw new UnsupportedOperationException(format("Type %s not supported", schema.getType())); 127 | } 128 | 129 | private static Optional convertUnion(Schema schema) 130 | { 131 | checkArgument(schema.getType().equals(UNION), "schema is not a union schema"); 132 | // Cannot use ImmutableSet.Builder because types may contain multiple FIXED types with different sizes 133 | Set types = schema.getTypes().stream() 134 | .map(Schema::getType) 135 | .collect(toImmutableSet()); 136 | 137 | if (types.contains(NULL)) { 138 | return convertUnion(Schema.createUnion(schema.getTypes().stream() 139 | .filter(type -> type.getType() != NULL) 140 | .collect(toImmutableList()))); 141 | } 142 | if (schema.getTypes().size() == 1) { 143 | return convert(getOnlyElement(schema.getTypes())); 144 | } 145 | if (INTEGRAL_TYPES.containsAll(types)) { 146 | return Optional.of(BigintType.BIGINT); 147 | } 148 | if (DECIMAL_TYPES.containsAll(types)) { 149 | return Optional.of(DoubleType.DOUBLE); 150 | } 151 | if (STRING_TYPES.containsAll(types)) { 152 | return Optional.of(VarcharType.VARCHAR); 153 | } 154 | if (BINARY_TYPES.containsAll(types)) { 155 | return Optional.of(VarbinaryType.VARBINARY); 156 | } 157 | throw new UnsupportedOperationException(format("Incompatible UNION type: '%s'", JSON_PRETTY_FORMATTER.format(schema))); 158 | } 159 | 160 | private static Optional convertArray(Schema schema) 161 | { 162 | checkArgument(schema.getType() == ARRAY, "schema is not an ARRAY"); 163 | return convert(schema.getElementType()).map(ArrayType::new); 164 | } 165 | 166 | private static Optional convertMap(Schema schema) 167 | { 168 | checkArgument(schema.getType() == MAP, "schema is not a MAP"); 169 | return convert(schema.getValueType()).map(AvroSchemaConverter::createMapType); 170 | } 171 | 172 | private static Type createMapType(Type valueType) 173 | { 174 | Type keyType = VARCHAR; 175 | return new MapType(keyType, valueType, typeOperators); 176 | } 177 | 178 | private static Optional convertRecord(Schema schema) 179 | { 180 | checkArgument(schema.getType() == RECORD, "schema is not a RECORD"); 181 | List fields = schema.getFields().stream() 182 | .map(field -> convert(field.schema()).map(type -> new RowType.Field(Optional.ofNullable(field.name()), type))) 183 | .filter(Optional::isPresent) 184 | .map(Optional::get) 185 | .collect(toImmutableList()); 186 | if (fields.isEmpty()) { 187 | switch (emptyFieldStrategy) { 188 | case IGNORE: 189 | return Optional.empty(); 190 | case MARK: 191 | return Optional.of(DUMMY_ROW_TYPE); 192 | case FAIL: 193 | throw new IllegalStateException(format("Struct type has no valid fields for schema: '%s'", schema)); 194 | } 195 | throw new IllegalStateException(format("Unknown emptyFieldStrategy '%s'", emptyFieldStrategy)); 196 | } 197 | return Optional.of(RowType.from(fields)); 198 | } 199 | } 200 | -------------------------------------------------------------------------------- /src/main/java/org/ebyhr/trino/storage/operator/CsvPlugin.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package org.ebyhr.trino.storage.operator; 15 | 16 | import com.fasterxml.jackson.databind.MappingIterator; 17 | import com.fasterxml.jackson.dataformat.csv.CsvMapper; 18 | import com.fasterxml.jackson.dataformat.csv.CsvParser; 19 | import com.fasterxml.jackson.dataformat.csv.CsvSchema; 20 | import com.google.common.collect.Streams; 21 | import org.ebyhr.trino.storage.StorageColumnHandle; 22 | 23 | import java.io.IOException; 24 | import java.io.InputStream; 25 | import java.io.UncheckedIOException; 26 | import java.util.List; 27 | import java.util.function.Function; 28 | import java.util.stream.Stream; 29 | 30 | import static com.google.common.collect.ImmutableList.toImmutableList; 31 | import static io.trino.spi.type.VarcharType.VARCHAR; 32 | 33 | public class CsvPlugin 34 | implements FilePlugin 35 | { 36 | private final CsvMapper mapper; 37 | private final CsvSchema schema; 38 | 39 | public CsvPlugin(char delimiter) 40 | { 41 | this.mapper = new CsvMapper(); 42 | this.mapper.enable(CsvParser.Feature.WRAP_AS_ARRAY).enable(CsvParser.Feature.TRIM_SPACES); 43 | this.schema = CsvSchema.emptySchema().withColumnSeparator(delimiter); 44 | } 45 | 46 | @Override 47 | public List getFields(String path, Function streamProvider) 48 | { 49 | try { 50 | // Read the first line and use the values as column names 51 | MappingIterator> it = this.mapper.readerFor(List.class).with(schema).readValues(streamProvider.apply(path)); 52 | List fields = it.next(); 53 | return fields.stream() 54 | .map(field -> new StorageColumnHandle(field, VARCHAR)) 55 | .collect(toImmutableList()); 56 | } 57 | catch (IOException e) { 58 | throw new UncheckedIOException(e); 59 | } 60 | } 61 | 62 | @Override 63 | public Stream> getRecordsIterator(String path, Function streamProvider) 64 | { 65 | try { 66 | // Read lines and skip the first one because that contains the column names 67 | MappingIterator> it = this.mapper.readerFor(List.class).with(schema).readValues(streamProvider.apply(path)); 68 | return Streams.stream(it).skip(1); 69 | } 70 | catch (IOException e) { 71 | throw new UncheckedIOException(e); 72 | } 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /src/main/java/org/ebyhr/trino/storage/operator/ExcelPlugin.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package org.ebyhr.trino.storage.operator; 15 | 16 | import org.apache.poi.ss.usermodel.Cell; 17 | import org.apache.poi.ss.usermodel.DataFormatter; 18 | import org.apache.poi.ss.usermodel.Row; 19 | import org.apache.poi.ss.usermodel.Sheet; 20 | import org.apache.poi.ss.usermodel.Workbook; 21 | import org.apache.poi.ss.usermodel.WorkbookFactory; 22 | import org.ebyhr.trino.storage.StorageColumnHandle; 23 | 24 | import java.io.IOException; 25 | import java.io.InputStream; 26 | import java.io.UncheckedIOException; 27 | import java.util.ArrayList; 28 | import java.util.Iterator; 29 | import java.util.LinkedList; 30 | import java.util.List; 31 | import java.util.Spliterator; 32 | import java.util.Spliterators; 33 | import java.util.function.Function; 34 | import java.util.stream.Stream; 35 | import java.util.stream.StreamSupport; 36 | 37 | import static io.trino.spi.type.VarcharType.VARCHAR; 38 | 39 | public class ExcelPlugin 40 | implements FilePlugin 41 | { 42 | private static final DataFormatter DATA_FORMATTER = new DataFormatter(); 43 | 44 | @Override 45 | public List getFields(String path, Function streamProvider) 46 | { 47 | try (InputStream inputStream = streamProvider.apply(path); 48 | Workbook workbook = WorkbookFactory.create(inputStream)) { 49 | Sheet sheet = workbook.getSheetAt(0); 50 | Iterator rows = sheet.iterator(); 51 | List columnTypes = new LinkedList<>(); 52 | Row row = rows.next(); 53 | for (Cell cell : row) { 54 | String cellValue = DATA_FORMATTER.formatCellValue(cell); 55 | columnTypes.add(new StorageColumnHandle(cellValue, VARCHAR)); 56 | } 57 | return columnTypes; 58 | } 59 | catch (IOException e) { 60 | throw new UncheckedIOException(e); 61 | } 62 | } 63 | 64 | @Override 65 | public Stream> getRecordsIterator(String path, Function streamProvider) 66 | { 67 | try (InputStream inputStream = streamProvider.apply(path); 68 | Workbook workbook = WorkbookFactory.create(inputStream)) { 69 | Sheet sheet = workbook.getSheetAt(0); 70 | Spliterator spliterator = Spliterators.spliteratorUnknownSize(sheet.iterator(), 0); 71 | return StreamSupport.stream(spliterator, false) 72 | .skip(1) 73 | .map(this::splitToList); 74 | } 75 | catch (IOException e) { 76 | throw new RuntimeException(e); 77 | } 78 | } 79 | 80 | private List splitToList(Row row) 81 | { 82 | List values = new ArrayList<>(); 83 | for (Cell cell : row) { 84 | String cellValue = DATA_FORMATTER.formatCellValue(cell); 85 | values.add(cellValue); 86 | } 87 | return values; 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /src/main/java/org/ebyhr/trino/storage/operator/FilePlugin.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package org.ebyhr.trino.storage.operator; 15 | 16 | import io.trino.spi.Page; 17 | import io.trino.spi.connector.ConnectorPageSource; 18 | import org.ebyhr.trino.storage.StorageColumnHandle; 19 | 20 | import java.io.InputStream; 21 | import java.util.List; 22 | import java.util.function.Function; 23 | import java.util.stream.Stream; 24 | 25 | public interface FilePlugin 26 | { 27 | List getFields(String path, Function streamProvider); 28 | 29 | default Stream> getRecordsIterator(String path, Function streamProvider) 30 | { 31 | throw new UnsupportedOperationException("A FilePlugin must implement getConnectorPageSource, getRecordsIterator or getPagesIterator"); 32 | } 33 | 34 | default Iterable getPagesIterator(String path, List handleColumns, Function streamProvider) 35 | { 36 | throw new UnsupportedOperationException("A FilePlugin must implement getConnectorPageSource, getRecordsIterator or getPagesIterator"); 37 | } 38 | 39 | default ConnectorPageSource getConnectorPageSource(String path, List handleColumns, Function streamProvider) 40 | { 41 | throw new UnsupportedOperationException("A FilePlugin must implement getConnectorPageSource, getRecordsIterator or getPagesIterator"); 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/main/java/org/ebyhr/trino/storage/operator/OrcPageSource.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package org.ebyhr.trino.storage.operator; 15 | 16 | import com.google.common.collect.ImmutableMap; 17 | import com.google.common.io.Closer; 18 | import io.trino.memory.context.AggregatedMemoryContext; 19 | import io.trino.memory.context.LocalMemoryContext; 20 | import io.trino.orc.OrcCorruptionException; 21 | import io.trino.orc.OrcDataSource; 22 | import io.trino.orc.OrcDataSourceId; 23 | import io.trino.orc.OrcRecordReader; 24 | import io.trino.orc.metadata.CompressionKind; 25 | import io.trino.plugin.base.metrics.FileFormatDataSourceStats; 26 | import io.trino.plugin.base.metrics.LongCount; 27 | import io.trino.spi.TrinoException; 28 | import io.trino.spi.connector.ConnectorPageSource; 29 | import io.trino.spi.connector.SourcePage; 30 | import io.trino.spi.metrics.Metrics; 31 | 32 | import java.io.IOException; 33 | import java.io.UncheckedIOException; 34 | import java.util.Optional; 35 | import java.util.OptionalLong; 36 | 37 | import static com.google.common.base.MoreObjects.toStringHelper; 38 | import static io.trino.plugin.base.util.Closables.closeAllSuppress; 39 | import static io.trino.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR; 40 | import static java.lang.String.format; 41 | import static java.util.Objects.requireNonNull; 42 | 43 | public class OrcPageSource 44 | implements ConnectorPageSource 45 | { 46 | private static final String ORC_CODEC_METRIC_PREFIX = "OrcReaderCompressionFormat_"; 47 | 48 | private final OrcRecordReader recordReader; 49 | private final OrcDataSource orcDataSource; 50 | private final AggregatedMemoryContext memoryContext; 51 | private final LocalMemoryContext localMemoryContext; 52 | private final FileFormatDataSourceStats stats; 53 | private final CompressionKind compressionKind; 54 | private boolean closed; 55 | private long completedPositions; 56 | 57 | private Optional outstandingPage = Optional.empty(); 58 | 59 | public OrcPageSource( 60 | OrcRecordReader recordReader, 61 | OrcDataSource orcDataSource, 62 | AggregatedMemoryContext memoryContext, 63 | FileFormatDataSourceStats stats, 64 | CompressionKind compressionKind) 65 | { 66 | this.recordReader = requireNonNull(recordReader, "recordReader is null"); 67 | this.orcDataSource = requireNonNull(orcDataSource, "orcDataSource is null"); 68 | this.stats = requireNonNull(stats, "stats is null"); 69 | this.memoryContext = requireNonNull(memoryContext, "memoryContext is null"); 70 | this.localMemoryContext = memoryContext.newLocalMemoryContext(OrcPageSource.class.getSimpleName()); 71 | this.compressionKind = requireNonNull(compressionKind, "compressionKind is null"); 72 | } 73 | 74 | static TrinoException handleException(OrcDataSourceId dataSourceId, Exception exception) 75 | { 76 | if (exception instanceof TrinoException trinoException) { 77 | return trinoException; 78 | } 79 | if (exception instanceof OrcCorruptionException) { 80 | return new TrinoException(GENERIC_INTERNAL_ERROR, exception); 81 | } 82 | return new TrinoException(GENERIC_INTERNAL_ERROR, format("Failed to read ORC file: %s", dataSourceId), exception); 83 | } 84 | 85 | @Override 86 | public long getCompletedBytes() 87 | { 88 | return orcDataSource.getReadBytes(); 89 | } 90 | 91 | @Override 92 | public OptionalLong getCompletedPositions() 93 | { 94 | return OptionalLong.of(completedPositions); 95 | } 96 | 97 | @Override 98 | public long getReadTimeNanos() 99 | { 100 | return orcDataSource.getReadTimeNanos(); 101 | } 102 | 103 | @Override 104 | public boolean isFinished() 105 | { 106 | return closed; 107 | } 108 | 109 | @Override 110 | public SourcePage getNextSourcePage() 111 | { 112 | SourcePage page; 113 | try { 114 | if (outstandingPage.isPresent()) { 115 | page = outstandingPage.get(); 116 | outstandingPage = Optional.empty(); 117 | // Mark no bytes consumed by outstandingPage. 118 | // We can reset it again below if deletedRows loading yields again. 119 | // In such case the brief period when it is set to 0 will not be observed externally as 120 | // page source memory usage is only read by engine after call to getNextPage completes. 121 | localMemoryContext.setBytes(0); 122 | } 123 | else { 124 | page = recordReader.nextPage(); 125 | } 126 | } 127 | catch (IOException | RuntimeException e) { 128 | closeAllSuppress(e, this); 129 | throw handleException(orcDataSource.getId(), e); 130 | } 131 | 132 | if (page == null) { 133 | close(); 134 | return null; 135 | } 136 | 137 | completedPositions += page.getPositionCount(); 138 | 139 | return page; 140 | } 141 | 142 | @Override 143 | public void close() 144 | { 145 | // some hive input formats are broken and bad things can happen if you close them multiple times 146 | if (closed) { 147 | return; 148 | } 149 | closed = true; 150 | 151 | Closer closer = Closer.create(); 152 | 153 | closer.register(() -> { 154 | stats.addMaxCombinedBytesPerRow(recordReader.getMaxCombinedBytesPerRow()); 155 | recordReader.close(); 156 | }); 157 | 158 | try { 159 | closer.close(); 160 | } 161 | catch (IOException e) { 162 | throw new UncheckedIOException(e); 163 | } 164 | } 165 | 166 | @Override 167 | public String toString() 168 | { 169 | return toStringHelper(this) 170 | .add("orcReader", recordReader) 171 | .toString(); 172 | } 173 | 174 | @Override 175 | public long getMemoryUsage() 176 | { 177 | return memoryContext.getBytes(); 178 | } 179 | 180 | @Override 181 | public Metrics getMetrics() 182 | { 183 | return new Metrics(ImmutableMap.of(ORC_CODEC_METRIC_PREFIX + compressionKind.name(), new LongCount(recordReader.getTotalDataLength()))); 184 | } 185 | } 186 | -------------------------------------------------------------------------------- /src/main/java/org/ebyhr/trino/storage/operator/OrcPlugin.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package org.ebyhr.trino.storage.operator; 15 | 16 | import io.trino.orc.FileOrcDataSource; 17 | import io.trino.orc.OrcColumn; 18 | import io.trino.orc.OrcDataSource; 19 | import io.trino.orc.OrcPredicate; 20 | import io.trino.orc.OrcReader; 21 | import io.trino.orc.OrcReaderOptions; 22 | import io.trino.orc.OrcRecordReader; 23 | import io.trino.orc.metadata.ColumnMetadata; 24 | import io.trino.orc.metadata.CompressionKind; 25 | import io.trino.orc.metadata.OrcType; 26 | import io.trino.plugin.base.metrics.FileFormatDataSourceStats; 27 | import io.trino.spi.TrinoException; 28 | import io.trino.spi.connector.ConnectorPageSource; 29 | import io.trino.spi.type.Type; 30 | import org.ebyhr.trino.storage.StorageColumnHandle; 31 | 32 | import java.io.File; 33 | import java.io.IOException; 34 | import java.io.InputStream; 35 | import java.nio.file.Files; 36 | import java.nio.file.StandardCopyOption; 37 | import java.util.List; 38 | import java.util.Optional; 39 | import java.util.function.Function; 40 | import java.util.stream.Collectors; 41 | 42 | import static io.trino.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext; 43 | import static io.trino.orc.OrcReader.INITIAL_BATCH_SIZE; 44 | import static io.trino.orc.OrcReader.createOrcReader; 45 | import static io.trino.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR; 46 | import static java.lang.String.format; 47 | import static org.ebyhr.trino.storage.operator.OrcTypeTranslator.fromOrcType; 48 | import static org.joda.time.DateTimeZone.UTC; 49 | 50 | public class OrcPlugin 51 | implements FilePlugin 52 | { 53 | @Override 54 | public List getFields(String path, Function streamProvider) 55 | { 56 | try (ClosableFile file = getLocalFile(path, streamProvider)) { 57 | OrcReader reader = getReader(file.getFile()); 58 | ColumnMetadata types = reader.getFooter().getTypes(); 59 | return reader.getRootColumn().getNestedColumns().stream() 60 | .map(orcColumn -> new StorageColumnHandle( 61 | orcColumn.getColumnName(), 62 | fromOrcType(types.get(orcColumn.getColumnId()), types))) 63 | .collect(Collectors.toList()); 64 | } 65 | catch (IOException e) { 66 | throw new RuntimeException(e); 67 | } 68 | } 69 | 70 | @Override 71 | public ConnectorPageSource getConnectorPageSource(String path, List handleColumns, Function streamProvider) 72 | { 73 | try (ClosableFile file = getLocalFile(path, streamProvider)) { 74 | OrcReader reader = getReader(file.getFile()); 75 | OrcDataSource dataSource = new FileOrcDataSource(file.getFile(), new OrcReaderOptions()); 76 | 77 | ColumnMetadata types = reader.getFooter().getTypes(); 78 | List handleOrcColumns = reader.getRootColumn().getNestedColumns().stream() 79 | .filter(orcColumn -> handleColumns.contains(orcColumn.getColumnName().toLowerCase())) 80 | .toList(); 81 | List readTypes = handleOrcColumns.stream() 82 | .map(orcColumn -> fromOrcType(types.get(orcColumn.getColumnId()), types)) 83 | .collect(Collectors.toList()); 84 | OrcRecordReader recordReader = reader.createRecordReader( 85 | handleOrcColumns, 86 | readTypes, 87 | false, 88 | OrcPredicate.TRUE, 89 | UTC, 90 | newSimpleAggregatedMemoryContext(), 91 | INITIAL_BATCH_SIZE, 92 | OrcPlugin::handleException); 93 | return new OrcPageSource(recordReader, 94 | dataSource, 95 | newSimpleAggregatedMemoryContext(), 96 | new FileFormatDataSourceStats(), 97 | CompressionKind.NONE); 98 | } 99 | catch (IOException e) { 100 | throw new RuntimeException(e); 101 | } 102 | } 103 | 104 | private ClosableFile getLocalFile(String path, Function streamProvider) 105 | throws IOException 106 | { 107 | if (path.startsWith("http://") || path.startsWith("https://") || path.startsWith("hdfs://") || path.startsWith("s3a://") || path.startsWith("s3://")) { 108 | AutoDeletingTempFile tempFile = new AutoDeletingTempFile(); 109 | Files.copy(streamProvider.apply(path), tempFile.getFile().toPath(), StandardCopyOption.REPLACE_EXISTING); 110 | return tempFile; 111 | } 112 | if (path.startsWith("file:")) { 113 | return () -> new File(path.substring(5)); 114 | } 115 | throw new IllegalArgumentException(format("Unsupported schema %s", path.split(":", 2)[0])); 116 | } 117 | 118 | private OrcReader getReader(File file) 119 | { 120 | OrcDataSource dataSource; 121 | try { 122 | dataSource = new FileOrcDataSource(file, new OrcReaderOptions()); 123 | } 124 | catch (IOException e) { 125 | throw new RuntimeException(e); 126 | } 127 | Optional reader; 128 | try { 129 | reader = createOrcReader(dataSource, new OrcReaderOptions()); 130 | } 131 | catch (IOException e) { 132 | throw new RuntimeException(e); 133 | } 134 | if (reader.isEmpty()) { 135 | throw new RuntimeException("Failed to create an ORC reader"); 136 | } 137 | return reader.get(); 138 | } 139 | 140 | private static TrinoException handleException(Exception e) 141 | { 142 | return new TrinoException(GENERIC_INTERNAL_ERROR, "Failed to read temporary data", e); 143 | } 144 | 145 | public interface ClosableFile 146 | extends AutoCloseable 147 | { 148 | File getFile(); 149 | 150 | @Override 151 | default void close() 152 | throws IOException 153 | { 154 | } 155 | } 156 | 157 | public static class AutoDeletingTempFile 158 | implements ClosableFile 159 | { 160 | private final File file; 161 | 162 | public AutoDeletingTempFile() 163 | throws IOException 164 | { 165 | file = File.createTempFile("trino-storage-", ".orc"); 166 | } 167 | 168 | @Override 169 | public File getFile() 170 | { 171 | return file; 172 | } 173 | 174 | @Override 175 | public void close() 176 | throws IOException 177 | { 178 | if (!file.delete()) { 179 | throw new IOException(format("Failed to delete temp file %s", file)); 180 | } 181 | } 182 | } 183 | } 184 | -------------------------------------------------------------------------------- /src/main/java/org/ebyhr/trino/storage/operator/OrcTypeTranslator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package org.ebyhr.trino.storage.operator; 15 | 16 | import com.google.common.base.VerifyException; 17 | import com.google.common.collect.ImmutableList; 18 | import io.trino.orc.metadata.ColumnMetadata; 19 | import io.trino.orc.metadata.OrcType; 20 | import io.trino.spi.type.ArrayType; 21 | import io.trino.spi.type.BigintType; 22 | import io.trino.spi.type.BooleanType; 23 | import io.trino.spi.type.DateType; 24 | import io.trino.spi.type.DecimalType; 25 | import io.trino.spi.type.DoubleType; 26 | import io.trino.spi.type.IntegerType; 27 | import io.trino.spi.type.MapType; 28 | import io.trino.spi.type.RowType; 29 | import io.trino.spi.type.TimestampType; 30 | import io.trino.spi.type.Type; 31 | import io.trino.spi.type.TypeOperators; 32 | import io.trino.spi.type.VarbinaryType; 33 | import io.trino.spi.type.VarcharType; 34 | 35 | import static com.google.common.base.Preconditions.checkArgument; 36 | 37 | public final class OrcTypeTranslator 38 | { 39 | private OrcTypeTranslator() {} 40 | 41 | public static Type fromOrcType(OrcType orcType, ColumnMetadata columnMetadata) 42 | { 43 | switch (orcType.getOrcTypeKind()) { 44 | case BOOLEAN: 45 | return BooleanType.BOOLEAN; 46 | 47 | case FLOAT: 48 | case DOUBLE: 49 | return DoubleType.DOUBLE; 50 | 51 | case BYTE: 52 | return VarbinaryType.VARBINARY; 53 | 54 | case DATE: 55 | return DateType.DATE; 56 | 57 | case SHORT: 58 | case INT: 59 | return IntegerType.INTEGER; 60 | 61 | case LONG: 62 | return BigintType.BIGINT; 63 | 64 | case DECIMAL: 65 | checkArgument(orcType.getPrecision().isPresent(), "orcType.getPrecision() is not present"); 66 | checkArgument(orcType.getScale().isPresent(), "orcType.getScale() is not present"); 67 | return DecimalType.createDecimalType(orcType.getPrecision().get(), orcType.getScale().get()); 68 | 69 | case TIMESTAMP: 70 | return TimestampType.createTimestampType(orcType.getPrecision().orElse(3)); 71 | 72 | case BINARY: 73 | return VarbinaryType.VARBINARY; 74 | 75 | case CHAR: 76 | case VARCHAR: 77 | case STRING: 78 | return VarcharType.VARCHAR; 79 | 80 | case LIST: { 81 | Type elementType = fromOrcType(columnMetadata.get(orcType.getFieldTypeIndex(0)), columnMetadata); 82 | return new ArrayType(elementType); 83 | } 84 | 85 | case MAP: { 86 | Type keyType = getType(orcType, 0, columnMetadata); 87 | Type elementType = getType(orcType, 1, columnMetadata); 88 | return new MapType(keyType, elementType, new TypeOperators()); 89 | } 90 | 91 | case STRUCT: { 92 | ImmutableList.Builder fieldTypeInfo = ImmutableList.builder(); 93 | for (int fieldId = 0; fieldId < orcType.getFieldCount(); fieldId++) { 94 | fieldTypeInfo.add(getType(orcType, fieldId, columnMetadata)); 95 | } 96 | return RowType.anonymous(fieldTypeInfo.build()); 97 | } 98 | 99 | case TIMESTAMP_INSTANT: 100 | case UNION: 101 | // unsupported 102 | break; 103 | } 104 | throw new VerifyException("Unhandled ORC type: " + orcType.getOrcTypeKind()); 105 | } 106 | 107 | private static Type getType(OrcType orcType, int index, ColumnMetadata columnMetadata) 108 | { 109 | return fromOrcType(columnMetadata.get(orcType.getFieldTypeIndex(index)), columnMetadata); 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /src/main/java/org/ebyhr/trino/storage/operator/ParquetPageSource.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package org.ebyhr.trino.storage.operator; 15 | 16 | import io.trino.parquet.ParquetCorruptionException; 17 | import io.trino.parquet.ParquetDataSourceId; 18 | import io.trino.parquet.reader.ParquetReader; 19 | import io.trino.spi.TrinoException; 20 | import io.trino.spi.connector.ConnectorPageSource; 21 | import io.trino.spi.connector.SourcePage; 22 | import io.trino.spi.metrics.Metrics; 23 | 24 | import java.io.IOException; 25 | import java.io.UncheckedIOException; 26 | import java.util.OptionalLong; 27 | 28 | import static io.trino.plugin.base.util.Closables.closeAllSuppress; 29 | import static io.trino.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR; 30 | import static java.lang.String.format; 31 | import static java.util.Objects.requireNonNull; 32 | 33 | public class ParquetPageSource 34 | implements ConnectorPageSource 35 | { 36 | private final ParquetReader parquetReader; 37 | 38 | private boolean closed; 39 | private long completedPositions; 40 | 41 | public ParquetPageSource(ParquetReader parquetReader) 42 | { 43 | this.parquetReader = requireNonNull(parquetReader, "parquetReader is null"); 44 | } 45 | 46 | @Override 47 | public long getCompletedBytes() 48 | { 49 | return parquetReader.getDataSource().getReadBytes(); 50 | } 51 | 52 | @Override 53 | public OptionalLong getCompletedPositions() 54 | { 55 | return OptionalLong.of(completedPositions); 56 | } 57 | 58 | @Override 59 | public long getReadTimeNanos() 60 | { 61 | return parquetReader.getDataSource().getReadTimeNanos(); 62 | } 63 | 64 | @Override 65 | public boolean isFinished() 66 | { 67 | return closed; 68 | } 69 | 70 | @Override 71 | public long getMemoryUsage() 72 | { 73 | return parquetReader.getMemoryContext().getBytes(); 74 | } 75 | 76 | @Override 77 | public SourcePage getNextSourcePage() 78 | { 79 | SourcePage page; 80 | try { 81 | page = parquetReader.nextPage(); 82 | } 83 | catch (IOException | RuntimeException e) { 84 | closeAllSuppress(e, this); 85 | throw handleException(parquetReader.getDataSource().getId(), e); 86 | } 87 | 88 | if (closed || page == null) { 89 | close(); 90 | return null; 91 | } 92 | 93 | completedPositions += page.getPositionCount(); 94 | return page; 95 | } 96 | 97 | @Override 98 | public void close() 99 | { 100 | if (closed) { 101 | return; 102 | } 103 | closed = true; 104 | 105 | try { 106 | parquetReader.close(); 107 | } 108 | catch (IOException e) { 109 | throw new UncheckedIOException(e); 110 | } 111 | } 112 | 113 | @Override 114 | public Metrics getMetrics() 115 | { 116 | return parquetReader.getMetrics(); 117 | } 118 | 119 | static TrinoException handleException(ParquetDataSourceId dataSourceId, Exception exception) 120 | { 121 | if (exception instanceof TrinoException trinoException) { 122 | return trinoException; 123 | } 124 | if (exception instanceof ParquetCorruptionException) { 125 | return new TrinoException(GENERIC_INTERNAL_ERROR, exception); 126 | } 127 | return new TrinoException(GENERIC_INTERNAL_ERROR, format("Failed to read Parquet file: %s", dataSourceId), exception); 128 | } 129 | } 130 | -------------------------------------------------------------------------------- /src/main/java/org/ebyhr/trino/storage/operator/ParquetPlugin.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package org.ebyhr.trino.storage.operator; 15 | 16 | import com.google.common.collect.ImmutableList; 17 | import io.trino.parquet.AbstractParquetDataSource; 18 | import io.trino.parquet.Column; 19 | import io.trino.parquet.ParquetCorruptionException; 20 | import io.trino.parquet.ParquetDataSource; 21 | import io.trino.parquet.ParquetDataSourceId; 22 | import io.trino.parquet.ParquetReaderOptions; 23 | import io.trino.parquet.metadata.BlockMetadata; 24 | import io.trino.parquet.metadata.FileMetadata; 25 | import io.trino.parquet.metadata.ParquetMetadata; 26 | import io.trino.parquet.reader.MetadataReader; 27 | import io.trino.parquet.reader.ParquetReader; 28 | import io.trino.parquet.reader.RowGroupInfo; 29 | import io.trino.spi.TrinoException; 30 | import io.trino.spi.connector.ConnectorPageSource; 31 | import org.apache.parquet.column.ColumnDescriptor; 32 | import org.apache.parquet.io.MessageColumnIO; 33 | import org.apache.parquet.schema.MessageType; 34 | import org.ebyhr.trino.storage.StorageColumnHandle; 35 | 36 | import java.io.File; 37 | import java.io.FileNotFoundException; 38 | import java.io.IOException; 39 | import java.io.InputStream; 40 | import java.io.RandomAccessFile; 41 | import java.nio.file.Files; 42 | import java.nio.file.StandardCopyOption; 43 | import java.util.List; 44 | import java.util.Map; 45 | import java.util.Optional; 46 | import java.util.function.Function; 47 | import java.util.stream.Collectors; 48 | 49 | import static com.google.common.base.Strings.nullToEmpty; 50 | import static com.google.common.base.Throwables.throwIfUnchecked; 51 | import static io.trino.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext; 52 | import static io.trino.parquet.ParquetTypeUtils.constructField; 53 | import static io.trino.parquet.ParquetTypeUtils.getColumnIO; 54 | import static io.trino.parquet.ParquetTypeUtils.getDescriptors; 55 | import static io.trino.parquet.ParquetTypeUtils.lookupColumnByName; 56 | import static io.trino.parquet.metadata.PrunedBlockMetadata.createPrunedColumnsMetadata; 57 | import static io.trino.spi.StandardErrorCode.CORRUPT_PAGE; 58 | import static io.trino.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR; 59 | import static io.trino.spi.StandardErrorCode.NOT_FOUND; 60 | import static java.lang.String.format; 61 | import static org.ebyhr.trino.storage.operator.ParquetTypeTranslator.fromParquetType; 62 | import static org.joda.time.DateTimeZone.UTC; 63 | 64 | public class ParquetPlugin 65 | implements FilePlugin 66 | { 67 | @Override 68 | public List getFields(String path, Function streamProvider) 69 | { 70 | try (ClosableFile file = getLocalFile(path, streamProvider)) { 71 | MessageType schema = getSchema(file.getFile()); 72 | return schema.getFields().stream() 73 | .map(field -> new StorageColumnHandle( 74 | field.getName(), 75 | fromParquetType(field))) 76 | .collect(Collectors.toList()); 77 | } 78 | catch (IOException e) { 79 | throw new RuntimeException(e); 80 | } 81 | } 82 | 83 | @Override 84 | public ConnectorPageSource getConnectorPageSource(String path, List handleColumns, Function streamProvider) 85 | { 86 | try (ClosableFile file = getLocalFile(path, streamProvider)) { 87 | ParquetReader reader = getReader(file.getFile(), handleColumns); 88 | return new ParquetPageSource(reader); 89 | } 90 | catch (IOException e) { 91 | throw new RuntimeException(e); 92 | } 93 | } 94 | 95 | private ClosableFile getLocalFile(String path, Function streamProvider) 96 | throws IOException 97 | { 98 | if (path.startsWith("http://") || path.startsWith("https://") || path.startsWith("hdfs://") || path.startsWith("s3a://") || path.startsWith("s3://")) { 99 | AutoDeletingTempFile tempFile = new AutoDeletingTempFile(); 100 | Files.copy(streamProvider.apply(path), tempFile.getFile().toPath(), StandardCopyOption.REPLACE_EXISTING); 101 | return tempFile; 102 | } 103 | if (path.startsWith("file:")) { 104 | return () -> new File(path.substring(5)); 105 | } 106 | throw new IllegalArgumentException(format("Unsupported schema %s", path.split(":", 2)[0])); 107 | } 108 | 109 | private MessageType getSchema(File file) 110 | { 111 | ParquetReaderOptions options = new ParquetReaderOptions(); 112 | MessageType fileSchema = null; 113 | ParquetDataSource dataSource = null; 114 | try { 115 | dataSource = new FileParquetDataSource(file, options); 116 | 117 | ParquetMetadata parquetMetadata = MetadataReader.readFooter(dataSource, Optional.empty()); 118 | FileMetadata fileMetaData = parquetMetadata.getFileMetaData(); 119 | fileSchema = fileMetaData.getSchema(); 120 | } 121 | catch (Exception e) { 122 | handleException(file, dataSource, e); 123 | } 124 | return fileSchema; 125 | } 126 | 127 | private ParquetReader getReader(File file, List handleColumns) 128 | throws IOException 129 | { 130 | ParquetReaderOptions options = new ParquetReaderOptions(); 131 | ParquetDataSource dataSource; 132 | dataSource = new FileParquetDataSource(file, options); 133 | ParquetMetadata parquetMetadata = MetadataReader.readFooter(dataSource, Optional.empty()); 134 | FileMetadata fileMetaData = parquetMetadata.getFileMetaData(); 135 | MessageColumnIO messageColumnIO = getColumnIO(fileMetaData.getSchema(), fileMetaData.getSchema()); 136 | ImmutableList.Builder columnFields = ImmutableList.builder(); 137 | for (org.apache.parquet.schema.Type type : fileMetaData.getSchema().getFields()) { 138 | if (handleColumns.contains(type.getName().toLowerCase())) { 139 | columnFields.add(new Column( 140 | messageColumnIO.getName(), 141 | constructField( 142 | fromParquetType(type), 143 | lookupColumnByName(messageColumnIO, type.getName())) 144 | .orElseThrow())); 145 | } 146 | } 147 | Map, ColumnDescriptor> descriptorsByPath = getDescriptors(fileMetaData.getSchema(), fileMetaData.getSchema()); 148 | long nextStart = 0; 149 | ImmutableList.Builder rowGroupInfoBuilder = ImmutableList.builder(); 150 | for (BlockMetadata block : parquetMetadata.getBlocks()) { 151 | rowGroupInfoBuilder.add( 152 | new RowGroupInfo( 153 | createPrunedColumnsMetadata( 154 | block, 155 | dataSource.getId(), 156 | descriptorsByPath), 157 | nextStart, 158 | Optional.empty())); 159 | nextStart += block.rowCount(); 160 | } 161 | return new ParquetReader( 162 | Optional.ofNullable(fileMetaData.getCreatedBy()), 163 | columnFields.build(), 164 | false, 165 | rowGroupInfoBuilder.build(), 166 | dataSource, 167 | UTC, 168 | newSimpleAggregatedMemoryContext(), 169 | new ParquetReaderOptions(), 170 | exception -> { 171 | throwIfUnchecked(exception); 172 | return new RuntimeException(exception); 173 | }, 174 | Optional.empty(), 175 | Optional.empty()); 176 | } 177 | 178 | private void handleException(File file, ParquetDataSource dataSource, Exception e) 179 | { 180 | try { 181 | if (dataSource != null) { 182 | dataSource.close(); 183 | } 184 | } 185 | catch (IOException ignored) { 186 | } 187 | if (e instanceof TrinoException) { 188 | throw (TrinoException) e; 189 | } 190 | if (e instanceof ParquetCorruptionException) { 191 | throw new TrinoException(CORRUPT_PAGE, e); 192 | } 193 | if (nullToEmpty(e.getMessage()).trim().equals("Filesystem closed") || 194 | e instanceof FileNotFoundException) { 195 | throw new TrinoException(NOT_FOUND, e); 196 | } 197 | String message = format("Error opening Parquet file %s: %s", file, e.getMessage()); 198 | throw new TrinoException(GENERIC_INTERNAL_ERROR, message); 199 | } 200 | 201 | public interface ClosableFile 202 | extends AutoCloseable 203 | { 204 | File getFile(); 205 | 206 | @Override 207 | default void close() 208 | throws IOException 209 | { 210 | } 211 | } 212 | 213 | public static class AutoDeletingTempFile 214 | implements ClosableFile 215 | { 216 | private final File file; 217 | 218 | public AutoDeletingTempFile() 219 | throws IOException 220 | { 221 | file = File.createTempFile("trino-storage-", ".parquet"); 222 | } 223 | 224 | @Override 225 | public File getFile() 226 | { 227 | return file; 228 | } 229 | 230 | @Override 231 | public void close() 232 | throws IOException 233 | { 234 | if (!file.delete()) { 235 | throw new IOException(format("Failed to delete temp file %s", file)); 236 | } 237 | } 238 | } 239 | 240 | /** 241 | * this class is copied from io.trino.parquet.reader.FileParquetDataSource because it is in test-jar 242 | */ 243 | public static class FileParquetDataSource 244 | extends AbstractParquetDataSource 245 | { 246 | private final RandomAccessFile input; 247 | 248 | public FileParquetDataSource(File path, ParquetReaderOptions options) 249 | throws FileNotFoundException 250 | { 251 | super(new ParquetDataSourceId(path.getPath()), path.length(), options); 252 | this.input = new RandomAccessFile(path, "r"); 253 | } 254 | 255 | @Override 256 | public void close() 257 | throws IOException 258 | { 259 | super.close(); 260 | input.close(); 261 | } 262 | 263 | @Override 264 | protected void readInternal(long position, byte[] buffer, int bufferOffset, int bufferLength) 265 | throws IOException 266 | { 267 | input.seek(position); 268 | input.readFully(buffer, bufferOffset, bufferLength); 269 | } 270 | } 271 | } 272 | -------------------------------------------------------------------------------- /src/main/java/org/ebyhr/trino/storage/operator/ParquetTypeTranslator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package org.ebyhr.trino.storage.operator; 15 | 16 | import com.google.common.collect.ImmutableList; 17 | import io.trino.spi.TrinoException; 18 | import io.trino.spi.type.ArrayType; 19 | import io.trino.spi.type.BigintType; 20 | import io.trino.spi.type.BooleanType; 21 | import io.trino.spi.type.DateType; 22 | import io.trino.spi.type.DecimalType; 23 | import io.trino.spi.type.DoubleType; 24 | import io.trino.spi.type.IntegerType; 25 | import io.trino.spi.type.MapType; 26 | import io.trino.spi.type.RealType; 27 | import io.trino.spi.type.RowType; 28 | import io.trino.spi.type.TimeType; 29 | import io.trino.spi.type.TimestampType; 30 | import io.trino.spi.type.Type; 31 | import io.trino.spi.type.TypeOperators; 32 | import io.trino.spi.type.UuidType; 33 | import io.trino.spi.type.VarbinaryType; 34 | import io.trino.spi.type.VarcharType; 35 | import org.apache.parquet.schema.GroupType; 36 | import org.apache.parquet.schema.LogicalTypeAnnotation; 37 | import org.apache.parquet.schema.PrimitiveType; 38 | 39 | import static io.trino.spi.StandardErrorCode.NOT_SUPPORTED; 40 | 41 | public final class ParquetTypeTranslator 42 | { 43 | private ParquetTypeTranslator() {} 44 | 45 | public static Type fromParquetType(org.apache.parquet.schema.Type type) 46 | { 47 | if (type.isPrimitive()) { 48 | return fromParquetType(type.asPrimitiveType()); 49 | } 50 | return fromParquetType(type.asGroupType()); 51 | } 52 | 53 | public static Type fromParquetType(GroupType groupType) 54 | { 55 | LogicalTypeAnnotation logicalTypeAnnotation = groupType.getLogicalTypeAnnotation(); 56 | if (logicalTypeAnnotation instanceof LogicalTypeAnnotation.ListLogicalTypeAnnotation) { 57 | Type elementType = fromParquetType(groupType.getType(0)); 58 | return new ArrayType(elementType); 59 | } 60 | if (logicalTypeAnnotation instanceof LogicalTypeAnnotation.MapLogicalTypeAnnotation) { 61 | Type keyType = fromParquetType(groupType.getType(0)); 62 | Type elementType = fromParquetType(groupType.getType(1)); 63 | return new MapType(keyType, elementType, new TypeOperators()); 64 | } 65 | ImmutableList.Builder fieldTypeInfo = ImmutableList.builder(); 66 | groupType.getFields().forEach(groupField -> fieldTypeInfo.add(fromParquetType(groupField))); 67 | return RowType.anonymous(fieldTypeInfo.build()); 68 | } 69 | 70 | public static Type fromParquetType(PrimitiveType parquetType) 71 | { 72 | LogicalTypeAnnotation logicalTypeAnnotation = parquetType.getLogicalTypeAnnotation(); 73 | if (logicalTypeAnnotation instanceof LogicalTypeAnnotation.StringLogicalTypeAnnotation 74 | || logicalTypeAnnotation instanceof LogicalTypeAnnotation.JsonLogicalTypeAnnotation) { 75 | return VarcharType.VARCHAR; 76 | } 77 | if (logicalTypeAnnotation instanceof LogicalTypeAnnotation.DateLogicalTypeAnnotation) { 78 | return DateType.DATE; 79 | } 80 | if (logicalTypeAnnotation instanceof LogicalTypeAnnotation.DecimalLogicalTypeAnnotation) { 81 | LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalLogicalTypeAnnotation = (LogicalTypeAnnotation.DecimalLogicalTypeAnnotation) logicalTypeAnnotation; 82 | return DecimalType.createDecimalType(decimalLogicalTypeAnnotation.getPrecision(), decimalLogicalTypeAnnotation.getScale()); 83 | } 84 | if (logicalTypeAnnotation instanceof LogicalTypeAnnotation.TimeLogicalTypeAnnotation) { 85 | LogicalTypeAnnotation.TimeLogicalTypeAnnotation timeLogicalTypeAnnotation = (LogicalTypeAnnotation.TimeLogicalTypeAnnotation) logicalTypeAnnotation; 86 | switch (timeLogicalTypeAnnotation.getUnit()) { 87 | case MICROS: 88 | return TimeType.TIME_MICROS; 89 | case MILLIS: 90 | return TimeType.TIME_MILLIS; 91 | case NANOS: 92 | return TimeType.TIME_NANOS; 93 | } 94 | throw new TrinoException(NOT_SUPPORTED, "Unsupported column: " + logicalTypeAnnotation); 95 | } 96 | if (logicalTypeAnnotation instanceof LogicalTypeAnnotation.TimestampLogicalTypeAnnotation) { 97 | LogicalTypeAnnotation.TimestampLogicalTypeAnnotation timestampLogicalTypeAnnotation = (LogicalTypeAnnotation.TimestampLogicalTypeAnnotation) logicalTypeAnnotation; 98 | switch (timestampLogicalTypeAnnotation.getUnit()) { 99 | case MICROS: 100 | return TimestampType.TIMESTAMP_MICROS; 101 | case MILLIS: 102 | return TimestampType.TIMESTAMP_MILLIS; 103 | case NANOS: 104 | return TimestampType.TIMESTAMP_NANOS; 105 | } 106 | throw new TrinoException(NOT_SUPPORTED, "Unsupported column: " + logicalTypeAnnotation); 107 | } 108 | if (logicalTypeAnnotation instanceof LogicalTypeAnnotation.UUIDLogicalTypeAnnotation) { 109 | return UuidType.UUID; 110 | } 111 | // fall back to checking primitive types 112 | switch (parquetType.getPrimitiveTypeName()) { 113 | case BOOLEAN: 114 | return BooleanType.BOOLEAN; 115 | case INT32: 116 | return IntegerType.INTEGER; 117 | case INT64: 118 | return BigintType.BIGINT; 119 | case INT96: 120 | return TimestampType.TIMESTAMP_NANOS; 121 | case FLOAT: 122 | return RealType.REAL; 123 | case DOUBLE: 124 | return DoubleType.DOUBLE; 125 | case BINARY: 126 | return VarbinaryType.VARBINARY; 127 | case FIXED_LEN_BYTE_ARRAY: 128 | // UUID should be handled by logical type annotations, otherwise unsupported 129 | break; 130 | } 131 | throw new TrinoException(NOT_SUPPORTED, "Unsupported column: " + parquetType.getPrimitiveTypeName()); 132 | } 133 | } 134 | -------------------------------------------------------------------------------- /src/main/java/org/ebyhr/trino/storage/operator/PluginFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package org.ebyhr.trino.storage.operator; 15 | 16 | import io.trino.spi.connector.SchemaNotFoundException; 17 | 18 | import static java.util.Locale.ENGLISH; 19 | 20 | public final class PluginFactory 21 | { 22 | private PluginFactory() {} 23 | 24 | public static FilePlugin create(String typeName) 25 | { 26 | switch (typeName.toLowerCase(ENGLISH)) { 27 | case "csv": 28 | return new CsvPlugin(','); 29 | case "tsv": 30 | return new CsvPlugin('\t'); 31 | case "ssv": 32 | return new CsvPlugin(';'); 33 | case "txt": 34 | return new TextPlugin(); 35 | case "raw": 36 | return new RawPlugin(); 37 | case "excel": 38 | return new ExcelPlugin(); 39 | case "orc": 40 | return new OrcPlugin(); 41 | case "parquet": 42 | return new ParquetPlugin(); 43 | case "json": 44 | return new JsonPlugin(); 45 | case "avro": 46 | return new AvroPlugin(); 47 | default: 48 | throw new SchemaNotFoundException(typeName); 49 | } 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/main/java/org/ebyhr/trino/storage/operator/RawPlugin.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package org.ebyhr.trino.storage.operator; 15 | 16 | import org.ebyhr.trino.storage.StorageColumnHandle; 17 | 18 | import java.io.BufferedReader; 19 | import java.io.InputStream; 20 | import java.io.InputStreamReader; 21 | import java.util.List; 22 | import java.util.Objects; 23 | import java.util.function.Function; 24 | import java.util.stream.Collectors; 25 | import java.util.stream.Stream; 26 | 27 | import static io.trino.spi.type.VarcharType.VARCHAR; 28 | 29 | public class RawPlugin 30 | implements FilePlugin 31 | { 32 | @Override 33 | public List getFields(String path, Function streamProvider) 34 | { 35 | return List.of(new StorageColumnHandle("data", VARCHAR)); 36 | } 37 | 38 | @Override 39 | public Stream> getRecordsIterator(String path, Function streamProvider) 40 | { 41 | InputStream inputStream = streamProvider.apply(path); 42 | BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream)); 43 | String blob = reader.lines().map(Objects::toString).collect(Collectors.joining("\n")); 44 | return Stream.of(List.of(blob)); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/main/java/org/ebyhr/trino/storage/operator/TextPlugin.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package org.ebyhr.trino.storage.operator; 15 | 16 | import org.ebyhr.trino.storage.StorageColumnHandle; 17 | 18 | import java.io.BufferedReader; 19 | import java.io.InputStream; 20 | import java.io.InputStreamReader; 21 | import java.util.List; 22 | import java.util.function.Function; 23 | import java.util.stream.Stream; 24 | 25 | import static io.trino.spi.type.VarcharType.VARCHAR; 26 | 27 | public class TextPlugin 28 | implements FilePlugin 29 | { 30 | @Override 31 | public List getFields(String path, Function streamProvider) 32 | { 33 | return List.of(new StorageColumnHandle("value", VARCHAR)); 34 | } 35 | 36 | @Override 37 | public Stream> getRecordsIterator(String path, Function streamProvider) 38 | { 39 | InputStream inputStream = streamProvider.apply(path); 40 | BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream)); 41 | return reader.lines().map(List::of); 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/main/java/org/ebyhr/trino/storage/ptf/ListTableFunction.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package org.ebyhr.trino.storage.ptf; 15 | 16 | import com.fasterxml.jackson.annotation.JsonCreator; 17 | import com.fasterxml.jackson.annotation.JsonProperty; 18 | import com.google.common.collect.ImmutableList; 19 | import com.google.common.collect.ImmutableMap; 20 | import com.google.inject.Provider; 21 | import io.airlift.slice.Slice; 22 | import io.trino.spi.connector.ColumnHandle; 23 | import io.trino.spi.connector.ColumnMetadata; 24 | import io.trino.spi.connector.ConnectorAccessControl; 25 | import io.trino.spi.connector.ConnectorSession; 26 | import io.trino.spi.connector.ConnectorTableHandle; 27 | import io.trino.spi.connector.ConnectorTransactionHandle; 28 | import io.trino.spi.function.table.AbstractConnectorTableFunction; 29 | import io.trino.spi.function.table.Argument; 30 | import io.trino.spi.function.table.ConnectorTableFunction; 31 | import io.trino.spi.function.table.ConnectorTableFunctionHandle; 32 | import io.trino.spi.function.table.Descriptor; 33 | import io.trino.spi.function.table.ScalarArgument; 34 | import io.trino.spi.function.table.ScalarArgumentSpecification; 35 | import io.trino.spi.function.table.TableFunctionAnalysis; 36 | import io.trino.spi.type.Type; 37 | import org.ebyhr.trino.storage.StorageColumnHandle; 38 | import org.ebyhr.trino.storage.StorageTableHandle; 39 | 40 | import java.util.List; 41 | import java.util.Map; 42 | import java.util.Optional; 43 | 44 | import static com.google.common.collect.ImmutableList.toImmutableList; 45 | import static io.trino.spi.function.table.ReturnTypeSpecification.GenericTable.GENERIC_TABLE; 46 | import static io.trino.spi.type.BigintType.BIGINT; 47 | import static io.trino.spi.type.TimestampWithTimeZoneType.TIMESTAMP_TZ_MILLIS; 48 | import static io.trino.spi.type.VarcharType.VARCHAR; 49 | import static java.util.Objects.requireNonNull; 50 | import static org.ebyhr.trino.storage.StorageSplit.Mode.LIST; 51 | 52 | public class ListTableFunction 53 | implements Provider 54 | { 55 | public static final String LIST_SCHEMA_NAME = "$trino-storage/list"; 56 | public static final Map COLUMN_TYPES = ImmutableMap.of( 57 | "file_modified_time", TIMESTAMP_TZ_MILLIS, 58 | "size", BIGINT, 59 | "name", VARCHAR); 60 | 61 | public static final List COLUMNS_METADATA = COLUMN_TYPES.entrySet().stream() 62 | .map(column -> new ColumnMetadata(column.getKey(), column.getValue())) 63 | .collect(toImmutableList()); 64 | public static final List COLUMN_HANDLES = COLUMN_TYPES.entrySet().stream() 65 | .map(column -> new StorageColumnHandle(column.getKey(), column.getValue())) 66 | .collect(toImmutableList()); 67 | 68 | @Override 69 | public ConnectorTableFunction get() 70 | { 71 | return new QueryFunction(); 72 | } 73 | 74 | public static class QueryFunction 75 | extends AbstractConnectorTableFunction 76 | { 77 | public QueryFunction() 78 | { 79 | super( 80 | "system", 81 | "list", 82 | ImmutableList.of( 83 | ScalarArgumentSpecification.builder() 84 | .name("PATH") 85 | .type(VARCHAR) 86 | .build()), 87 | GENERIC_TABLE); 88 | } 89 | 90 | @Override 91 | public TableFunctionAnalysis analyze(ConnectorSession session, ConnectorTransactionHandle transaction, Map arguments, ConnectorAccessControl accessControl) 92 | { 93 | String path = ((Slice) ((ScalarArgument) arguments.get("PATH")).getValue()).toStringUtf8(); 94 | 95 | Descriptor returnedType = new Descriptor(COLUMN_TYPES.entrySet().stream() 96 | .map(column -> new Descriptor.Field(column.getKey(), Optional.of(column.getValue()))) 97 | .collect(toImmutableList())); 98 | 99 | QueryFunctionHandle handle = new QueryFunctionHandle(new StorageTableHandle(LIST, LIST_SCHEMA_NAME, path)); 100 | 101 | return TableFunctionAnalysis.builder() 102 | .returnedType(returnedType) 103 | .handle(handle) 104 | .build(); 105 | } 106 | } 107 | 108 | public static class QueryFunctionHandle 109 | implements ConnectorTableFunctionHandle 110 | { 111 | private final StorageTableHandle tableHandle; 112 | 113 | @JsonCreator 114 | public QueryFunctionHandle(@JsonProperty("tableHandle") StorageTableHandle tableHandle) 115 | { 116 | this.tableHandle = requireNonNull(tableHandle, "tableHandle is null"); 117 | } 118 | 119 | @JsonProperty 120 | public ConnectorTableHandle getTableHandle() 121 | { 122 | return tableHandle; 123 | } 124 | } 125 | } 126 | -------------------------------------------------------------------------------- /src/main/java/org/ebyhr/trino/storage/ptf/ReadFileTableFunction.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package org.ebyhr.trino.storage.ptf; 15 | 16 | import com.fasterxml.jackson.annotation.JsonCreator; 17 | import com.fasterxml.jackson.annotation.JsonProperty; 18 | import com.google.common.collect.ImmutableList; 19 | import com.google.inject.Inject; 20 | import com.google.inject.Provider; 21 | import io.airlift.slice.Slice; 22 | import io.trino.spi.connector.ConnectorAccessControl; 23 | import io.trino.spi.connector.ConnectorSession; 24 | import io.trino.spi.connector.ConnectorTableHandle; 25 | import io.trino.spi.connector.ConnectorTransactionHandle; 26 | import io.trino.spi.function.table.AbstractConnectorTableFunction; 27 | import io.trino.spi.function.table.Argument; 28 | import io.trino.spi.function.table.ConnectorTableFunction; 29 | import io.trino.spi.function.table.ConnectorTableFunctionHandle; 30 | import io.trino.spi.function.table.Descriptor; 31 | import io.trino.spi.function.table.ScalarArgument; 32 | import io.trino.spi.function.table.ScalarArgumentSpecification; 33 | import io.trino.spi.function.table.TableFunctionAnalysis; 34 | import org.ebyhr.trino.storage.StorageClient; 35 | import org.ebyhr.trino.storage.StorageColumnHandle; 36 | import org.ebyhr.trino.storage.StorageTable; 37 | import org.ebyhr.trino.storage.StorageTableHandle; 38 | 39 | import java.util.List; 40 | import java.util.Map; 41 | import java.util.Optional; 42 | 43 | import static com.google.common.collect.ImmutableList.toImmutableList; 44 | import static io.trino.spi.function.table.ReturnTypeSpecification.GenericTable.GENERIC_TABLE; 45 | import static io.trino.spi.type.VarcharType.VARCHAR; 46 | import static java.util.Objects.requireNonNull; 47 | import static org.ebyhr.trino.storage.StorageSplit.Mode.TABLE; 48 | 49 | public class ReadFileTableFunction 50 | implements Provider 51 | { 52 | private final StorageClient storageClient; 53 | 54 | @Inject 55 | public ReadFileTableFunction(StorageClient storageClient) 56 | { 57 | this.storageClient = requireNonNull(storageClient, "storageClient is null"); 58 | } 59 | 60 | @Override 61 | public ConnectorTableFunction get() 62 | { 63 | return new QueryFunction(storageClient); 64 | } 65 | 66 | public static class QueryFunction 67 | extends AbstractConnectorTableFunction 68 | { 69 | private final StorageClient storageClient; 70 | 71 | public QueryFunction(StorageClient storageClient) 72 | { 73 | super( 74 | "system", 75 | "read_file", 76 | ImmutableList.of( 77 | ScalarArgumentSpecification.builder() 78 | .name("TYPE") 79 | .type(VARCHAR) 80 | .build(), 81 | ScalarArgumentSpecification.builder() 82 | .name("PATH") 83 | .type(VARCHAR) 84 | .build()), 85 | GENERIC_TABLE); 86 | this.storageClient = requireNonNull(storageClient, "storageClient is null"); 87 | } 88 | 89 | @Override 90 | public TableFunctionAnalysis analyze(ConnectorSession session, ConnectorTransactionHandle transaction, Map arguments, ConnectorAccessControl accessControl) 91 | { 92 | String type = ((Slice) ((ScalarArgument) arguments.get("TYPE")).getValue()).toStringUtf8(); 93 | String path = ((Slice) ((ScalarArgument) arguments.get("PATH")).getValue()).toStringUtf8(); 94 | 95 | StorageTable table = storageClient.getTable(session, type, path); 96 | if (table == null) { 97 | throw new IllegalArgumentException("Could not read path " + path); 98 | } 99 | 100 | Descriptor returnedType = new Descriptor(table.getColumns().stream() 101 | .map(column -> new Descriptor.Field(column.getName(), Optional.of(column.getType()))) 102 | .collect(toImmutableList())); 103 | 104 | ReadFunctionHandle handle = new ReadFunctionHandle(new StorageTableHandle(TABLE, type, path), table.getColumns()); 105 | 106 | return TableFunctionAnalysis.builder() 107 | .returnedType(returnedType) 108 | .handle(handle) 109 | .build(); 110 | } 111 | } 112 | 113 | public static class ReadFunctionHandle 114 | implements ConnectorTableFunctionHandle 115 | { 116 | private final StorageTableHandle tableHandle; 117 | private final List columns; 118 | 119 | @JsonCreator 120 | public ReadFunctionHandle( 121 | @JsonProperty("tableHandle") StorageTableHandle tableHandle, 122 | @JsonProperty("columns") List columns) 123 | { 124 | this.tableHandle = requireNonNull(tableHandle, "tableHandle is null"); 125 | this.columns = ImmutableList.copyOf(requireNonNull(columns, "columns is null")); 126 | } 127 | 128 | @JsonProperty 129 | public ConnectorTableHandle getTableHandle() 130 | { 131 | return tableHandle; 132 | } 133 | 134 | @JsonProperty 135 | public List getColumns() 136 | { 137 | return columns; 138 | } 139 | } 140 | } 141 | -------------------------------------------------------------------------------- /src/test/java/org/ebyhr/trino/storage/StorageQueryRunner.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package org.ebyhr.trino.storage; 15 | 16 | import io.airlift.log.Logger; 17 | import io.airlift.log.Logging; 18 | import io.trino.Session; 19 | import io.trino.plugin.tpch.TpchPlugin; 20 | import io.trino.spi.security.Identity; 21 | import io.trino.testing.DistributedQueryRunner; 22 | 23 | import java.util.Map; 24 | import java.util.Optional; 25 | 26 | import static io.airlift.testing.Closeables.closeAllSuppress; 27 | import static io.trino.testing.TestingSession.testSessionBuilder; 28 | 29 | public final class StorageQueryRunner 30 | { 31 | private StorageQueryRunner() {} 32 | 33 | private static final String TPCH_SCHEMA = "tpch"; 34 | 35 | public static DistributedQueryRunner createStorageQueryRunner( 36 | Optional storageServer, 37 | Map extraProperties, 38 | Map connectorProperties) 39 | throws Exception 40 | { 41 | DistributedQueryRunner queryRunner = DistributedQueryRunner.builder(createSession()) 42 | .setExtraProperties(extraProperties) 43 | .build(); 44 | try { 45 | queryRunner.installPlugin(new TpchPlugin()); 46 | queryRunner.createCatalog("tpch", "tpch"); 47 | 48 | queryRunner.installPlugin(new StoragePlugin()); 49 | queryRunner.createCatalog("storage", "storage", connectorProperties); 50 | 51 | storageServer.ifPresent(server -> { 52 | server.getHadoopServer().copyFromLocal("example-data/lineitem-1.csv", "/tmp/lineitem-1.csv", "/tmp/lineitem-1"); 53 | server.getHadoopServer().copyFromLocal("example-data/numbers.tsv", "/tmp/numbers.tsv", "/tmp/numbers.tsv"); 54 | }); 55 | 56 | return queryRunner; 57 | } 58 | catch (Throwable e) { 59 | closeAllSuppress(e, queryRunner); 60 | throw e; 61 | } 62 | } 63 | 64 | private static Session createSession() 65 | { 66 | return testSessionBuilder() 67 | .setIdentity(Identity.forUser("hive").build()) 68 | .setCatalog("storage") 69 | .setSchema(TPCH_SCHEMA) 70 | .build(); 71 | } 72 | 73 | public static final class StorageHadoopQueryRunner 74 | { 75 | public static void main(String[] args) 76 | throws Exception 77 | { 78 | Logging.initialize(); 79 | 80 | TestingStorageServer storageServer = new TestingStorageServer(); 81 | DistributedQueryRunner queryRunner = createStorageQueryRunner( 82 | Optional.of(storageServer), 83 | Map.of("http-server.http.port", "8080"), 84 | Map.of( 85 | "hive.s3.path-style-access", "true", 86 | "hive.s3.endpoint", storageServer.getMinioServer().getEndpoint(), 87 | "hive.s3.aws-access-key", TestingMinioServer.ACCESS_KEY, 88 | "hive.s3.aws-secret-key", TestingMinioServer.SECRET_KEY)); 89 | 90 | Logger log = Logger.get(StorageQueryRunner.class); 91 | log.info("======== SERVER STARTED ========"); 92 | log.info("\n====\n%s\n====", queryRunner.getCoordinator().getBaseUrl()); 93 | } 94 | } 95 | 96 | public static final class StorageLocalQueryRunner 97 | { 98 | public static void main(String[] args) 99 | throws Exception 100 | { 101 | Logging.initialize(); 102 | 103 | DistributedQueryRunner queryRunner = createStorageQueryRunner(Optional.empty(), Map.of("http-server.http.port", "8080"), Map.of()); 104 | 105 | Logger log = Logger.get(StorageQueryRunner.class); 106 | log.info("======== SERVER STARTED ========"); 107 | log.info("\n====\n%s\n====", queryRunner.getCoordinator().getBaseUrl()); 108 | } 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /src/test/java/org/ebyhr/trino/storage/TestRestrictedStorageConnector.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package org.ebyhr.trino.storage; 15 | 16 | import com.google.common.collect.ImmutableMap; 17 | import com.google.common.io.Resources; 18 | import io.trino.testing.AbstractTestQueryFramework; 19 | import io.trino.testing.QueryRunner; 20 | import org.junit.jupiter.api.Test; 21 | 22 | import java.util.Optional; 23 | 24 | import static java.lang.String.format; 25 | import static org.ebyhr.trino.storage.StorageQueryRunner.createStorageQueryRunner; 26 | 27 | public final class TestRestrictedStorageConnector 28 | extends AbstractTestQueryFramework 29 | { 30 | private TestingStorageServer server; 31 | 32 | @Override 33 | protected QueryRunner createQueryRunner() 34 | throws Exception 35 | { 36 | server = closeAfterClass(new TestingStorageServer()); 37 | return createStorageQueryRunner( 38 | Optional.of(server), 39 | ImmutableMap.of(), 40 | ImmutableMap.of("allow-local-files", "false")); 41 | } 42 | 43 | @Test 44 | public void testSelectOrc() 45 | { 46 | assertQueryFails( 47 | "SELECT * FROM TABLE(storage.system.read_file('orc', '" + toAbsolutePath("example-data/apache-lz4.orc") + "')) WHERE x = 1658882660", 48 | "Reading local files is disabled"); 49 | assertQuery( 50 | "SELECT * FROM TABLE(storage.system.read_file('orc', '" + toRemotePath("example-data/apache-lz4.orc") + "')) WHERE x = 1658882660", 51 | "VALUES (1658882660, 639, -5557347160648450358)"); 52 | } 53 | 54 | @Test 55 | public void testList() 56 | { 57 | assertQuery( 58 | "SELECT substr(name, strpos(name, '/', -1) + 1) FROM TABLE(storage.system.list('" + server.getHadoopServer().toHdfsPath("/tmp/") + "')) WHERE name LIKE '%numbers%'", 59 | "VALUES ('numbers.tsv')"); 60 | assertQueryFails( 61 | "SELECT substr(name, strpos(name, '/', -1) + 1) FROM TABLE(storage.system.list('" + toAbsolutePath("example-data/") + "')) WHERE name LIKE '%numbers__.csv'", 62 | "Reading local files is disabled"); 63 | } 64 | 65 | private static String toAbsolutePath(String resourceName) 66 | { 67 | return Resources.getResource(resourceName).toString(); 68 | } 69 | 70 | private static String toRemotePath(String resourceName) 71 | { 72 | return format("https://github.com/snowlift/trino-storage/raw/4c381eca1fa44b22372300659a937a57550c90b9/src/test/resources/%s", resourceName); 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /src/test/java/org/ebyhr/trino/storage/TestStorageConnector.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package org.ebyhr.trino.storage; 15 | 16 | import com.google.common.collect.ImmutableMap; 17 | import com.google.common.io.Resources; 18 | import io.trino.testing.AbstractTestQueryFramework; 19 | import io.trino.testing.QueryRunner; 20 | import org.junit.jupiter.api.Test; 21 | 22 | import java.util.Optional; 23 | 24 | import static java.lang.String.format; 25 | import static org.ebyhr.trino.storage.StorageQueryRunner.createStorageQueryRunner; 26 | 27 | public final class TestStorageConnector 28 | extends AbstractTestQueryFramework 29 | { 30 | private TestingStorageServer server; 31 | 32 | private static String toAbsolutePath(String resourceName) 33 | { 34 | return Resources.getResource(resourceName).toString(); 35 | } 36 | 37 | private static String toRemotePath(String resourceName) 38 | { 39 | return format("https://github.com/snowlift/trino-storage/raw/4c381eca1fa44b22372300659a937a57550c90b9/src/test/resources/%s", resourceName); 40 | } 41 | 42 | @Override 43 | protected QueryRunner createQueryRunner() 44 | throws Exception 45 | { 46 | server = closeAfterClass(new TestingStorageServer()); 47 | return createStorageQueryRunner( 48 | Optional.of(server), 49 | ImmutableMap.of(), 50 | ImmutableMap.of()); 51 | } 52 | 53 | @Test 54 | public void testSelectCsv() 55 | { 56 | assertQuery( 57 | "SELECT * FROM TABLE(storage.system.read_file('csv', '" + toAbsolutePath("example-data/numbers-2.csv") + "'))", 58 | "VALUES ('eleven', '11'), ('twelve', '12')"); 59 | assertQuery( 60 | "SELECT * FROM TABLE(storage.system.read_file('csv', '" + toAbsolutePath("example-data/quoted_fields_with_separator.csv") + "'))", 61 | "VALUES ('test','2','3','4'),('test,test,test,test','3','3','5'),(' even weirder, but still valid, value with extra whitespaces that remain due to quoting / ','1','2','3'),('extra whitespaces that should get trimmed due to no quoting','1','2','3')"); 62 | assertQuery( 63 | "SELECT * FROM TABLE(storage.system.read_file('csv', '" + toAbsolutePath("example-data/quoted_fields_with_newlines.csv") + "'))", 64 | "VALUES ('test','2','3','4'),('test,test,test,test','3','3','5'),(' even weirder, but still valid, value with linebreaks and extra\n" + 65 | "whitespaces that should remain due to quoting ','1','2','3'),('extra whitespaces that should get trimmed due to no quoting','1','2','3')"); 66 | } 67 | 68 | @Test 69 | public void testSelectSsv() 70 | { 71 | assertQuery( 72 | "SELECT * FROM TABLE(storage.system.read_file('ssv', '" + toAbsolutePath("example-data/numbers-2.ssv") + "'))", 73 | "VALUES ('eleven', '11'), ('twelve', '12')"); 74 | assertQuery( 75 | "SELECT * FROM TABLE(storage.system.read_file('ssv', '" + toAbsolutePath("example-data/quoted_fields_with_separator.ssv") + "'))", 76 | "VALUES ('test','2','3','4'),('test;test;test;test','3','3','5'),(' even weirder; but still valid; value with extra whitespaces that remain due to quoting / ','1','2','3'),('extra whitespaces that should get trimmed due to no quoting','1','2','3')"); 77 | assertQuery( 78 | "SELECT * FROM TABLE(storage.system.read_file('ssv', '" + toAbsolutePath("example-data/quoted_fields_with_newlines.ssv") + "'))", 79 | "VALUES ('test','2','3','4'),('test;test;test;test','3','3','5'),(' even weirder, but still valid; value with linebreaks and extra\n" + 80 | " whitespaces that should remain due to quoting ','1','2','3'),('extra whitespaces that should get trimmed due to no quoting','1','2','3')"); 81 | } 82 | 83 | @Test 84 | public void testSelectTsv() 85 | { 86 | assertQuery( 87 | "SELECT * FROM TABLE(storage.system.read_file('tsv', '" + toAbsolutePath("example-data/numbers.tsv") + "'))", 88 | "VALUES ('two', '2'), ('three', '3')"); 89 | assertQuery( 90 | "SELECT * FROM TABLE(storage.system.read_file('tsv', '" + server.getHadoopServer().toHdfsPath("/tmp/numbers.tsv") + "'))", 91 | "VALUES ('two', '2'), ('three', '3')"); 92 | assertQuery( 93 | "SELECT * FROM TABLE(storage.system.read_file('tsv', '" + toAbsolutePath("example-data/quoted_fields_with_separator.tsv") + "'))", 94 | "VALUES ('test','2','3','4'),('test\ttest\ttest\ttest','3','3','5'),(' even weirder\t but still valid\t value with extra whitespaces that remain due to quoting / ','1','2','3'),('extra whitespaces that should get trimmed due to no quoting','1','2','3')"); 95 | assertQuery( 96 | "SELECT * FROM TABLE(storage.system.read_file('tsv', '" + toAbsolutePath("example-data/quoted_fields_with_newlines.tsv") + "'))", 97 | "VALUES ('test','2','3','4'),('test\ttest\ttest\ttest','3','3','5'),(' even weirder, but still valid, value with linebreaks and extra\n" + 98 | " whitespaces that should remain due to quoting ','1','2','3'),('extra whitespaces that should get trimmed due to no quoting','1','2','3')"); 99 | } 100 | 101 | @Test 102 | public void testSelectExcel() 103 | { 104 | assertQuery( 105 | "SELECT * FROM TABLE(storage.system.read_file('excel', '" + toAbsolutePath("example-data/sample.xlsx") + "'))", 106 | "VALUES ('a', '1'), ('b', '2')"); 107 | } 108 | 109 | @Test 110 | public void testSelectOrc() 111 | { 112 | assertQuery( 113 | "SELECT * FROM TABLE(storage.system.read_file('orc', '" + toAbsolutePath("example-data/apache-lz4.orc") + "')) WHERE x = 1658882660", 114 | "VALUES (1658882660, 639, -5557347160648450358)"); 115 | assertQuery( 116 | "SELECT * FROM TABLE(storage.system.read_file('orc', '" + toRemotePath("example-data/apache-lz4.orc") + "')) WHERE x = 1658882660", 117 | "VALUES (1658882660, 639, -5557347160648450358)"); 118 | } 119 | 120 | @Test 121 | public void testSelectParquet() 122 | { 123 | assertQuery( 124 | format("SELECT int_col, long_col, varchar_col " + 125 | "FROM storage.parquet.\"%s\" WHERE id_col = 1", toAbsolutePath("example-data/parquet_data.parquet")), 126 | "VALUES (11, 21, 'ant')"); 127 | } 128 | 129 | @Test 130 | public void testSelectJson() 131 | { 132 | // note that empty arrays are not supported at all, because array types are inferred from the first array element 133 | assertQuery( 134 | "SELECT * FROM TABLE(storage.system.read_file('json', '" + toAbsolutePath("example-data/newlines.json") + "'))", 135 | "VALUES " + 136 | "(true, CAST(null AS VARCHAR), 'aaa', 5, CAST(123.456 AS double), ARRAY['aaa', 'bbb']), " + 137 | "(false, CAST(null AS VARCHAR), 'bbb', 10, CAST(123.456 AS double), ARRAY['ccc'])"); 138 | assertQuery( 139 | "SELECT * FROM TABLE(storage.system.read_file('json', '" + toAbsolutePath("example-data/array-of-objects.json") + "'))", 140 | "VALUES " + 141 | "(true, CAST(null AS VARCHAR), 'aaa', 5, CAST(123.456 AS double), ARRAY['aaa', 'bbb']), " + 142 | "(false, CAST(null AS VARCHAR), 'bbb', 10, CAST(123.456 AS double), ARRAY['ccc'])"); 143 | } 144 | 145 | @Test 146 | public void testList() 147 | { 148 | assertQuery( 149 | "SELECT substr(name, strpos(name, '/', -1) + 1) FROM TABLE(storage.system.list('" + server.getHadoopServer().toHdfsPath("/tmp/") + "')) WHERE name LIKE '%numbers%'", 150 | "VALUES ('numbers.tsv')"); 151 | assertQuery( 152 | "SELECT substr(name, strpos(name, '/', -1) + 1) FROM TABLE(storage.system.list('" + toAbsolutePath("example-data/") + "')) WHERE name LIKE '%numbers__.csv'", 153 | "VALUES ('numbers-1.csv'), ('numbers-2.csv')"); 154 | } 155 | 156 | @Test 157 | public void testSelectAvro() 158 | { 159 | assertQuery( 160 | "SELECT * FROM TABLE(storage.system.read_file('avro', '" + toAbsolutePath("example-data/avro-data.avro") + "'))", 161 | "VALUES ('Kim', 35, 1745565389925, 175.5, 70.2, true, 'MALE', ARRAY['kim@example.com', 'kim2@example.com'])"); 162 | assertQuery( 163 | "SELECT age, active, gender FROM TABLE(storage.system.read_file('avro', '" + toAbsolutePath("example-data/avro-data.avro") + "'))", 164 | "VALUES (35, true, 'MALE')"); 165 | assertQuery( 166 | "SELECT count(*) FROM TABLE(storage.system.read_file('avro', '" + toAbsolutePath("example-data/avro-data.avro") + "'))", 167 | "VALUES (1)"); 168 | assertQuery( 169 | "SELECT count(*) FROM TABLE(storage.system.read_file('avro', '" + toAbsolutePath("example-data/avro-data.avro") + "')) WHERE age = 36", 170 | "VALUES (0)"); 171 | } 172 | } 173 | -------------------------------------------------------------------------------- /src/test/java/org/ebyhr/trino/storage/TestStoragePlugin.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package org.ebyhr.trino.storage; 15 | 16 | import io.trino.spi.connector.ConnectorFactory; 17 | import io.trino.testing.TestingConnectorContext; 18 | import org.junit.jupiter.api.Test; 19 | 20 | import java.util.Map; 21 | 22 | import static com.google.common.collect.Iterables.getOnlyElement; 23 | 24 | public class TestStoragePlugin 25 | { 26 | @Test 27 | public void testCreateConnector() 28 | { 29 | ConnectorFactory factory = getConnectorFactory(); 30 | // simplest possible configuration 31 | factory.create("test", Map.of(), new TestingConnectorContext()).shutdown(); 32 | } 33 | 34 | @Test 35 | public void testHttpClient() 36 | { 37 | ConnectorFactory factory = getConnectorFactory(); 38 | factory.create("test", Map.of("http-client.http-proxy", "http://example.com:8080"), new TestingConnectorContext()).shutdown(); 39 | } 40 | 41 | private static ConnectorFactory getConnectorFactory() 42 | { 43 | return getOnlyElement(new StoragePlugin().getConnectorFactories()); 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/test/java/org/ebyhr/trino/storage/TestingHadoopServer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package org.ebyhr.trino.storage; 15 | 16 | import com.google.common.collect.ImmutableList; 17 | import org.apache.hadoop.net.NetUtils; 18 | import org.testcontainers.containers.GenericContainer; 19 | import org.testcontainers.containers.Network; 20 | import org.testcontainers.containers.startupcheck.IsRunningStartupCheckStrategy; 21 | import org.testcontainers.containers.wait.strategy.HostPortWaitStrategy; 22 | import org.testcontainers.utility.DockerImageName; 23 | import org.testcontainers.utility.MountableFile; 24 | 25 | import java.io.Closeable; 26 | import java.io.IOException; 27 | import java.io.UncheckedIOException; 28 | import java.net.InetAddress; 29 | import java.net.UnknownHostException; 30 | 31 | import static java.lang.String.format; 32 | 33 | public class TestingHadoopServer 34 | implements Closeable 35 | { 36 | private static final String HOSTNAME = "hadoop-master"; 37 | 38 | private final GenericContainer dockerContainer; 39 | private final String hostname; 40 | 41 | public TestingHadoopServer(Network network) 42 | { 43 | dockerContainer = new GenericContainer<>(DockerImageName.parse("ghcr.io/trinodb/testing/hdp3.1-hive:108")) 44 | .withCreateContainerCmdModifier(cmd -> cmd.withHostName(HOSTNAME)) 45 | .withCopyFileToContainer(MountableFile.forClasspathResource("minio/hive-core-site.xml"), "/etc/hadoop/conf/core-site.xml") 46 | .withStartupCheckStrategy(new IsRunningStartupCheckStrategy()) 47 | .waitingFor(new HostPortWaitStrategy()) 48 | .withNetwork(network); 49 | dockerContainer.setPortBindings(ImmutableList.of("1180:1180", "9000:9000")); 50 | dockerContainer.start(); 51 | hostname = getHostName(); 52 | 53 | // Even though Hadoop is accessed by proxy, Hadoop still tries to resolve hadoop-master 54 | // (e.g: in: NameNodeProxies.createProxy) 55 | // This adds a static resolution for hadoop-master to docker container internal ip 56 | //noinspection deprecation 57 | NetUtils.addStaticResolution(HOSTNAME, dockerContainer.getContainerInfo().getNetworkSettings().getIpAddress()); 58 | } 59 | 60 | public void copyFromLocal(String resourceName, String containerPath, String hdfsPath) 61 | { 62 | try { 63 | dockerContainer.copyFileToContainer(MountableFile.forClasspathResource(resourceName), containerPath); 64 | dockerContainer.execInContainer("hdfs", "dfs", "-copyFromLocal", containerPath, hdfsPath); 65 | } 66 | catch (InterruptedException | IOException e) { 67 | throw new RuntimeException(e); 68 | } 69 | } 70 | 71 | public String getSocksProxy() 72 | { 73 | return format("%s:1180", hostname); 74 | } 75 | 76 | public String toHdfsPath(String path) 77 | { 78 | return format("hdfs://%s:9000%s", hostname, path); 79 | } 80 | 81 | private String getHostName() 82 | { 83 | try { 84 | return InetAddress.getLocalHost().getHostAddress(); 85 | } 86 | catch (UnknownHostException e) { 87 | throw new UncheckedIOException(e); 88 | } 89 | } 90 | 91 | @Override 92 | public void close() 93 | { 94 | dockerContainer.close(); 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /src/test/java/org/ebyhr/trino/storage/TestingMinioServer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package org.ebyhr.trino.storage; 15 | 16 | import com.amazonaws.auth.AWSStaticCredentialsProvider; 17 | import com.amazonaws.auth.BasicAWSCredentials; 18 | import com.amazonaws.client.builder.AwsClientBuilder; 19 | import com.amazonaws.services.s3.AmazonS3; 20 | import com.amazonaws.services.s3.AmazonS3ClientBuilder; 21 | import org.testcontainers.containers.GenericContainer; 22 | import org.testcontainers.containers.Network; 23 | import org.testcontainers.containers.startupcheck.IsRunningStartupCheckStrategy; 24 | import org.testcontainers.containers.wait.strategy.HostPortWaitStrategy; 25 | import org.testcontainers.utility.DockerImageName; 26 | 27 | import java.io.Closeable; 28 | 29 | import static java.lang.String.format; 30 | 31 | public class TestingMinioServer 32 | implements Closeable 33 | { 34 | public static final String ACCESS_KEY = "accesskey"; 35 | public static final String SECRET_KEY = "secretkey"; 36 | 37 | private static final String DEFAULT_IMAGE = "minio/minio:RELEASE.2022-08-13T21-54-44Z"; 38 | private static final String HOSTNAME = "minio"; 39 | 40 | private static final int API_PORT = 4566; 41 | private static final int CONSOLE_PORT = 4567; 42 | 43 | private final GenericContainer container; 44 | private final AmazonS3 s3Client; 45 | 46 | public TestingMinioServer(Network network) 47 | { 48 | container = new GenericContainer<>(DockerImageName.parse(DEFAULT_IMAGE)) 49 | .withCreateContainerCmdModifier(cmd -> cmd.withHostName(HOSTNAME)) 50 | .withCommand("server", 51 | "--address", "0.0.0.0:" + API_PORT, 52 | "--console-address", "0.0.0.0:" + CONSOLE_PORT, 53 | "/data") 54 | .withEnv("MINIO_ACCESS_KEY", ACCESS_KEY) 55 | .withEnv("MINIO_SECRET_KEY", SECRET_KEY) 56 | .withExposedPorts(API_PORT, CONSOLE_PORT) 57 | .withStartupCheckStrategy(new IsRunningStartupCheckStrategy()) 58 | .waitingFor(new HostPortWaitStrategy()) 59 | .withNetwork(network); 60 | container.start(); 61 | 62 | s3Client = AmazonS3ClientBuilder 63 | .standard() 64 | .withEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration("http://localhost:" + container.getMappedPort(API_PORT), "us-east-1")) 65 | .withCredentials(new AWSStaticCredentialsProvider(new BasicAWSCredentials(ACCESS_KEY, SECRET_KEY))) 66 | .withPathStyleAccessEnabled(true) 67 | .build(); 68 | } 69 | 70 | public String getEndpoint() 71 | { 72 | return format("http://%s:%s", container.getHost(), container.getMappedPort(API_PORT)); 73 | } 74 | 75 | public void createBucket(String name) 76 | { 77 | s3Client.createBucket(name); 78 | } 79 | 80 | public void createFile(String bucketName, String fileName) 81 | { 82 | s3Client.putObject(bucketName, fileName, "hello"); 83 | } 84 | 85 | @Override 86 | public void close() 87 | { 88 | container.close(); 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /src/test/java/org/ebyhr/trino/storage/TestingStorageServer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package org.ebyhr.trino.storage; 15 | 16 | import io.trino.plugin.base.util.AutoCloseableCloser; 17 | import io.trino.testing.ResourcePresence; 18 | import org.testcontainers.containers.Network; 19 | 20 | import static org.testcontainers.containers.Network.newNetwork; 21 | 22 | public class TestingStorageServer 23 | implements AutoCloseable 24 | { 25 | private final TestingHadoopServer hadoopServer; 26 | private final TestingMinioServer minioServer; 27 | private final AutoCloseableCloser closer = AutoCloseableCloser.create(); 28 | 29 | private boolean isRunning; 30 | 31 | public TestingStorageServer() 32 | { 33 | Network network = closer.register(newNetwork()); 34 | hadoopServer = closer.register(new TestingHadoopServer(network)); 35 | minioServer = closer.register(new TestingMinioServer(network)); 36 | isRunning = true; 37 | } 38 | 39 | public TestingHadoopServer getHadoopServer() 40 | { 41 | return hadoopServer; 42 | } 43 | 44 | public TestingMinioServer getMinioServer() 45 | { 46 | return minioServer; 47 | } 48 | 49 | @Override 50 | public void close() 51 | throws Exception 52 | { 53 | closer.close(); 54 | isRunning = false; 55 | } 56 | 57 | @ResourcePresence 58 | public boolean isRunning() 59 | { 60 | return this.isRunning; 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /src/test/resources/example-data/apache-lz4.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snowlift/trino-storage/c1394cfce9aa98d9cc2bd5abd9090889f921cfca/src/test/resources/example-data/apache-lz4.orc -------------------------------------------------------------------------------- /src/test/resources/example-data/array-of-objects.json: -------------------------------------------------------------------------------- 1 | [ 2 | {"bool": true, "null": null, "string": "aaa", "int": 5, "double": 123.456, "array of strings": ["aaa", "bbb"]}, 3 | {"bool": false, "null": null, "string": "bbb", "int": 10, "double": 123.456, "array of strings": ["ccc"]} 4 | ] 5 | -------------------------------------------------------------------------------- /src/test/resources/example-data/avro-data.avro: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snowlift/trino-storage/c1394cfce9aa98d9cc2bd5abd9090889f921cfca/src/test/resources/example-data/avro-data.avro -------------------------------------------------------------------------------- /src/test/resources/example-data/example-metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "example": [ 3 | { 4 | "name": "numbers", 5 | "columns": [ 6 | { 7 | "name": "text", 8 | "type": "VARCHAR" 9 | }, 10 | { 11 | "name": "value", 12 | "type": "BIGINT" 13 | } 14 | ], 15 | "sources": [ 16 | "numbers-1.csv", 17 | "numbers-2.csv" 18 | ] 19 | } 20 | ], 21 | "tpch": [ 22 | { 23 | "name": "orders", 24 | "columns": [ 25 | { 26 | "name": "orderkey", 27 | "type": "BIGINT" 28 | }, 29 | { 30 | "name": "custkey", 31 | "type": "BIGINT" 32 | }, 33 | { 34 | "name": "orderstatus", 35 | "type": "VARCHAR" 36 | }, 37 | { 38 | "name": "totalprice", 39 | "type": "DOUBLE" 40 | }, 41 | { 42 | "name": "orderdate", 43 | "type": "VARCHAR" 44 | }, 45 | { 46 | "name": "orderpriority", 47 | "type": "VARCHAR" 48 | }, 49 | { 50 | "name": "clerk", 51 | "type": "VARCHAR" 52 | }, 53 | { 54 | "name": "shippriority", 55 | "type": "INTEGER" 56 | }, 57 | { 58 | "name": "comment", 59 | "type": "VARCHAR" 60 | } 61 | ], 62 | "sources": [ 63 | "orders-1.csv", 64 | "orders-2.csv" 65 | ] 66 | }, 67 | { 68 | "name": "lineitem", 69 | "columns": [ 70 | { 71 | "name": "orderkey", 72 | "type": "BIGINT" 73 | }, 74 | { 75 | "name": "partkey", 76 | "type": "BIGINT" 77 | }, 78 | { 79 | "name": "suppkey", 80 | "type": "BIGINT" 81 | }, 82 | { 83 | "name": "linenumber", 84 | "type": "INTEGER" 85 | }, 86 | { 87 | "name": "quantity", 88 | "type": "DOUBLE" 89 | }, 90 | { 91 | "name": "discount", 92 | "type": "DOUBLE" 93 | }, 94 | { 95 | "name": "tax", 96 | "type": "DOUBLE" 97 | }, 98 | { 99 | "name": "returnflag", 100 | "type": "VARCHAR" 101 | }, 102 | { 103 | "name": "linestatus", 104 | "type": "VARCHAR" 105 | }, 106 | { 107 | "name": "shipdate", 108 | "type": "VARCHAR" 109 | }, 110 | { 111 | "name": "commitdate", 112 | "type": "VARCHAR" 113 | }, 114 | { 115 | "name": "receiptdate", 116 | "type": "VARCHAR" 117 | }, 118 | { 119 | "name": "shipinstruct", 120 | "type": "VARCHAR" 121 | }, 122 | { 123 | "name": "shipmode", 124 | "type": "VARCHAR" 125 | }, 126 | { 127 | "name": "comment", 128 | "type": "VARCHAR" 129 | } 130 | ], 131 | "sources": [ 132 | "lineitem-1.csv", 133 | "lineitem-2.csv" 134 | ] 135 | } 136 | ] 137 | } 138 | -------------------------------------------------------------------------------- /src/test/resources/example-data/newlines.json: -------------------------------------------------------------------------------- 1 | {"bool": true, "null": null, "string": "aaa", "int": 5, "double": 123.456, "array of strings": ["aaa", "bbb"]} 2 | {"bool": false, "null": null, "string": "bbb", "int": 10, "double": 123.456, "array of strings": ["ccc"]} 3 | -------------------------------------------------------------------------------- /src/test/resources/example-data/numbers-1.csv: -------------------------------------------------------------------------------- 1 | one, 1 2 | two, 2 3 | three, 3 4 | -------------------------------------------------------------------------------- /src/test/resources/example-data/numbers-2.csv: -------------------------------------------------------------------------------- 1 | ten, 10 2 | eleven, 11 3 | twelve, 12 4 | -------------------------------------------------------------------------------- /src/test/resources/example-data/numbers-2.ssv: -------------------------------------------------------------------------------- 1 | ten; 10 2 | eleven; 11 3 | twelve; 12 4 | -------------------------------------------------------------------------------- /src/test/resources/example-data/numbers.tsv: -------------------------------------------------------------------------------- 1 | one 1 2 | two 2 3 | three 3 4 | -------------------------------------------------------------------------------- /src/test/resources/example-data/parquet_data.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snowlift/trino-storage/c1394cfce9aa98d9cc2bd5abd9090889f921cfca/src/test/resources/example-data/parquet_data.parquet -------------------------------------------------------------------------------- /src/test/resources/example-data/quoted_fields_with_newlines.csv: -------------------------------------------------------------------------------- 1 | header_1,header_2,"weird, but valid header 3", header_4_with_extra_whitespace 2 | test,2,3,4 3 | "test,test,test,test",3,3,5 4 | " even weirder, but still valid, value with linebreaks and extra 5 | whitespaces that should remain due to quoting ",1,2,3 6 | extra whitespaces that should get trimmed due to no quoting ,1,2,3 7 | -------------------------------------------------------------------------------- /src/test/resources/example-data/quoted_fields_with_newlines.ssv: -------------------------------------------------------------------------------- 1 | header_1;header_2;"weird; but valid header 3"; header_4_with_extra_whitespace 2 | test;2;3;4 3 | "test;test;test;test";3;3;5 4 | " even weirder, but still valid; value with linebreaks and extra 5 | whitespaces that should remain due to quoting ";1;2;3 6 | extra whitespaces that should get trimmed due to no quoting ;1;2;3 7 | -------------------------------------------------------------------------------- /src/test/resources/example-data/quoted_fields_with_newlines.tsv: -------------------------------------------------------------------------------- 1 | header_1 header_2 "weird but valid header 3" header_4_with_extra_whitespace 2 | test 2 3 4 3 | "test test test test" 3 3 5 4 | " even weirder, but still valid, value with linebreaks and extra 5 | whitespaces that should remain due to quoting " 1 2 3 6 | extra whitespaces that should get trimmed due to no quoting 1 2 3 7 | -------------------------------------------------------------------------------- /src/test/resources/example-data/quoted_fields_with_separator.csv: -------------------------------------------------------------------------------- 1 | header_1,header_2,"weird, but valid header 3", header_4_with_extra_whitespace 2 | test,2,3,4 3 | "test,test,test,test",3,3,5 4 | " even weirder, but still valid, value with extra whitespaces that remain due to quoting / ",1,2,3 5 | extra whitespaces that should get trimmed due to no quoting ,1,2,3 6 | -------------------------------------------------------------------------------- /src/test/resources/example-data/quoted_fields_with_separator.ssv: -------------------------------------------------------------------------------- 1 | header_1;header_2;"weird; but valid header 3"; header_4_with_extra_whitespace 2 | test;2;3;4 3 | "test;test;test;test";3;3;5 4 | " even weirder; but still valid; value with extra whitespaces that remain due to quoting / ";1;2;3 5 | extra whitespaces that should get trimmed due to no quoting ;1;2;3 6 | -------------------------------------------------------------------------------- /src/test/resources/example-data/quoted_fields_with_separator.tsv: -------------------------------------------------------------------------------- 1 | header_1 header_2 "weird but valid header 3" header_4_with_extra_whitespace 2 | test 2 3 4 3 | "test test test test" 3 3 5 4 | " even weirder but still valid value with extra whitespaces that remain due to quoting / " 1 2 3 5 | extra whitespaces that should get trimmed due to no quoting 1 2 3 6 | -------------------------------------------------------------------------------- /src/test/resources/example-data/sample.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snowlift/trino-storage/c1394cfce9aa98d9cc2bd5abd9090889f921cfca/src/test/resources/example-data/sample.xlsx -------------------------------------------------------------------------------- /src/test/resources/minio/hive-core-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | fs.defaultFS 5 | hdfs://hadoop-master:9000 6 | 7 | 8 | fs.s3a.endpoint 9 | http://minio:4566 10 | 11 | 12 | fs.s3a.access.key 13 | accesskey 14 | 15 | 16 | fs.s3a.secret.key 17 | secretkey 18 | 19 | 20 | fs.s3a.path.style.access 21 | true 22 | 23 | 24 | --------------------------------------------------------------------------------