├── .github
├── dependabot.yml
├── problem-matcher.json
└── workflows
│ ├── main.yml
│ └── release.yml
├── .gitignore
├── .mvn
├── jvm.config
├── modernizer
│ └── violations.xml
└── wrapper
│ └── maven-wrapper.properties
├── LICENSE
├── README.md
├── docker
└── docker-compose.yml
├── mvnw
├── pom.xml
└── src
├── main
└── java
│ └── org
│ └── ebyhr
│ └── trino
│ └── storage
│ ├── ByteResponseHandler.java
│ ├── FileType.java
│ ├── ForStorage.java
│ ├── ListPageSource.java
│ ├── StorageClient.java
│ ├── StorageColumnHandle.java
│ ├── StorageConfig.java
│ ├── StorageConnector.java
│ ├── StorageConnectorFactory.java
│ ├── StorageMetadata.java
│ ├── StorageModule.java
│ ├── StoragePageSourceProvider.java
│ ├── StoragePlugin.java
│ ├── StorageRecordSetProvider.java
│ ├── StorageSplit.java
│ ├── StorageSplitManager.java
│ ├── StorageTable.java
│ ├── StorageTableHandle.java
│ ├── StorageTransactionHandle.java
│ ├── operator
│ ├── AvroColumnDecoder.java
│ ├── AvroPlugin.java
│ ├── AvroSchemaConverter.java
│ ├── CsvPlugin.java
│ ├── ExcelPlugin.java
│ ├── FilePlugin.java
│ ├── JsonPlugin.java
│ ├── OrcPageSource.java
│ ├── OrcPlugin.java
│ ├── OrcTypeTranslator.java
│ ├── ParquetPageSource.java
│ ├── ParquetPlugin.java
│ ├── ParquetTypeTranslator.java
│ ├── PluginFactory.java
│ ├── RawPlugin.java
│ └── TextPlugin.java
│ └── ptf
│ ├── ListTableFunction.java
│ └── ReadFileTableFunction.java
└── test
├── java
└── org
│ └── ebyhr
│ └── trino
│ └── storage
│ ├── StorageQueryRunner.java
│ ├── TestRestrictedStorageConnector.java
│ ├── TestStorageConnector.java
│ ├── TestStoragePlugin.java
│ ├── TestingHadoopServer.java
│ ├── TestingMinioServer.java
│ └── TestingStorageServer.java
└── resources
├── example-data
├── apache-lz4.orc
├── array-of-objects.json
├── avro-data.avro
├── example-metadata.json
├── lineitem-1.csv
├── lineitem-2.csv
├── newlines.json
├── numbers-1.csv
├── numbers-2.csv
├── numbers-2.ssv
├── numbers.tsv
├── orders-1.csv
├── orders-2.csv
├── parquet_data.parquet
├── quoted_fields_with_newlines.csv
├── quoted_fields_with_newlines.ssv
├── quoted_fields_with_newlines.tsv
├── quoted_fields_with_separator.csv
├── quoted_fields_with_separator.ssv
├── quoted_fields_with_separator.tsv
└── sample.xlsx
└── minio
└── hive-core-site.xml
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 | - package-ecosystem: "maven"
4 | directory: "/"
5 | schedule:
6 | interval: "weekly"
7 | groups:
8 | dependency-updates:
9 | applies-to: version-updates
10 | update-types:
11 | - major
12 | - minor
13 | - patch
14 | security-updates:
15 | applies-to: security-updates
16 | dependency-type: production
17 | - package-ecosystem: "github-actions"
18 | directory: "/"
19 | schedule:
20 | interval: "weekly"
21 |
--------------------------------------------------------------------------------
/.github/problem-matcher.json:
--------------------------------------------------------------------------------
1 | {
2 | "problemMatcher": [
3 | {
4 | "owner": "maven",
5 | "pattern": [
6 | {
7 | "regexp": "^.*\\[(ERROR|WARN(?:ING)?)\\]\\s+(.*):\\[(\\d+),(\\d+)\\] (?:error: )?[\\[\\(](.*)[\\]\\)] (.*)$",
8 | "severity": 1,
9 | "file": 2,
10 | "line": 3,
11 | "column": 4,
12 | "message": 6,
13 | "code": 5
14 | }
15 | ]
16 | }
17 | ]
18 | }
19 |
--------------------------------------------------------------------------------
/.github/workflows/main.yml:
--------------------------------------------------------------------------------
1 | name: CI
2 |
3 | on: [push, pull_request]
4 |
5 | env:
6 | HADOOP_USER_NAME: hive
7 |
8 | jobs:
9 | maven-checks:
10 | runs-on: ubuntu-latest
11 | steps:
12 | - uses: actions/checkout@v4
13 | - uses: actions/setup-java@v4
14 | with:
15 | java-version: '23'
16 | distribution: 'temurin'
17 | cache: 'maven'
18 | - name: Configure Problem Matchers
19 | run: |
20 | echo "::add-matcher::.github/problem-matcher.json"
21 | echo "::remove-matcher owner=java::"
22 | - name: Maven Checks
23 | run: |
24 | ./mvnw -B clean install
25 | - name: Annotate run
26 | uses: trinodb/github-actions/action-surefire-report@b63800bedfbc7ab1ff2e5fe7eaecf5ab82ce6a70
27 | if: always()
28 | with:
29 | fail_if_no_tests: false
30 | skip_publishing: true
31 |
--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
1 | name: CD
2 |
3 | on:
4 | workflow_dispatch: {}
5 |
6 | env:
7 | HADOOP_USER_NAME: hive
8 |
9 | jobs:
10 | release:
11 | runs-on: ubuntu-latest
12 | steps:
13 | - uses: actions/checkout@v4
14 | - uses: actions/setup-java@v4
15 | with:
16 | java-version: '23'
17 | distribution: 'temurin'
18 | server-id: github
19 | cache: 'maven'
20 | - name: Configure Problem Matchers
21 | run: |
22 | echo "::add-matcher::.github/problem-matcher.json"
23 | echo "::remove-matcher owner=java::"
24 | - name: Configure Git user
25 | run: |
26 | git config user.email "actions@github.com"
27 | git config user.name "GitHub Actions"
28 | - name: Prepare release
29 | run: ./mvnw -B release:prepare
30 | - name: Save version number in env
31 | run: |
32 | echo "VERSION=$(grep 'project.rel.org.ebyhr\\:trino-storage=' release.properties | cut -d'=' -f2)" >> $GITHUB_ENV
33 | - uses: marvinpinto/action-automatic-releases@v1.2.1
34 | with:
35 | repo_token: "${{ secrets.GITHUB_TOKEN }}"
36 | prerelease: false
37 | automatic_release_tag: v${{ env.VERSION }}
38 | title: v${{ env.VERSION }}
39 | files: |
40 | target/*.zip
41 | - name: Publish JAR
42 | run: ./mvnw -B release:perform -Darguments=-Dgpg.skip
43 | env:
44 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
45 | - name: Annotate run
46 | uses: trinodb/github-actions/action-surefire-report@b63800bedfbc7ab1ff2e5fe7eaecf5ab82ce6a70
47 | if: always()
48 | with:
49 | fail_if_no_tests: false
50 | skip_publishing: true
51 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 |
3 | target
4 | .idea
5 | *.iml
6 |
--------------------------------------------------------------------------------
/.mvn/jvm.config:
--------------------------------------------------------------------------------
1 | --add-exports jdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED
2 | --add-exports jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED
3 | --add-exports jdk.compiler/com.sun.tools.javac.main=ALL-UNNAMED
4 | --add-exports jdk.compiler/com.sun.tools.javac.model=ALL-UNNAMED
5 | --add-exports jdk.compiler/com.sun.tools.javac.parser=ALL-UNNAMED
6 | --add-exports jdk.compiler/com.sun.tools.javac.processing=ALL-UNNAMED
7 | --add-exports jdk.compiler/com.sun.tools.javac.tree=ALL-UNNAMED
8 | --add-exports jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED
9 | --add-opens jdk.compiler/com.sun.tools.javac.code=ALL-UNNAMED
10 | --add-opens jdk.compiler/com.sun.tools.javac.comp=ALL-UNNAMED
11 |
--------------------------------------------------------------------------------
/.mvn/modernizer/violations.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | java/lang/Class.newInstance:()Ljava/lang/Object;
5 | 1.1
6 | Prefer Class.getConstructor().newInstance()
7 |
8 |
9 |
10 | java/lang/String."<init>":([B)V
11 | 1.1
12 | Prefer new String(byte[], Charset)
13 |
14 |
15 |
16 | java/lang/String.getBytes:()[B
17 | 1.1
18 | Prefer String.getBytes(Charset)
19 |
20 |
21 |
22 | java/lang/String.toLowerCase:()Ljava/lang/String;
23 | 1.1
24 | Prefer String.toLowerCase(java.util.Locale)
25 |
26 |
27 |
28 | java/lang/String.toUpperCase:()Ljava/lang/String;
29 | 1.1
30 | Prefer String.toUpperCase(java.util.Locale)
31 |
32 |
33 |
34 |
35 | java/io/File.toString:()Ljava/lang/String;
36 | 1.1
37 | Prefer File.getPath()
38 |
39 |
40 |
41 | com/google/common/primitives/Ints.checkedCast:(J)I
42 | 1.8
43 | Prefer Math.toIntExact(long)
44 |
45 |
46 |
47 | org/testng/Assert.assertEquals:(Ljava/lang/Iterable;Ljava/lang/Iterable;)V
48 | 1.8
49 | Use io.trino.testing.assertions.Assert.assertEquals due to TestNG #543
50 |
51 |
52 |
53 | org/testng/Assert.assertEquals:(Ljava/lang/Iterable;Ljava/lang/Iterable;Ljava/lang/String;)V
54 | 1.8
55 | Use io.trino.testing.assertions.Assert.assertEquals due to TestNG #543
56 |
57 |
58 |
59 | org/testng/Assert.assertThrows:(Lorg/testng/Assert$ThrowingRunnable;)V
60 | 1.8
61 | Use AssertJ's assertThatThrownBy, see https://github.com/trinodb/trino/issues/5320 for rationale
62 |
63 |
64 |
65 | org/testng/Assert.assertThrows:(Ljava/lang/Class;Lorg/testng/Assert$ThrowingRunnable;)V
66 | 1.8
67 | Use AssertJ's assertThatThrownBy, see https://github.com/trinodb/trino/issues/5320 for rationale
68 |
69 |
70 |
71 | org/apache/hadoop/conf/Configuration."<init>":()V
72 | 1.1
73 | Prefer new Configuration(false)
74 |
75 |
76 |
77 | java/util/TimeZone.getTimeZone:(Ljava/lang/String;)Ljava/util/TimeZone;
78 | 1.8
79 | Avoid TimeZone.getTimeZone as it returns GMT for a zone not supported by the JVM. Use TimeZone.getTimeZone(ZoneId.of(..)) instead, or TimeZone.getTimeZone(..., false).
80 |
81 |
82 |
83 | org/joda/time/DateTimeZone.toTimeZone:()Ljava/util/TimeZone;
84 | 1.8
85 | Avoid DateTimeZone.toTimeZone as it returns GMT for a zone not supported by the JVM. Use TimeZone.getTimeZone(ZoneId.of(dtz.getId())) instead.
86 |
87 |
88 |
89 | com/esri/core/geometry/ogc/OGCGeometry.equals:(Lcom/esri/core/geometry/ogc/OGCGeometry;)Z
90 | 1.6
91 | Prefer OGCGeometry.Equals(OGCGeometry)
92 |
93 |
94 |
95 | com/esri/core/geometry/ogc/OGCGeometry.equals:(Ljava/lang/Object;)Z
96 | 1.6
97 | Prefer OGCGeometry.Equals(OGCGeometry)
98 |
99 |
100 |
101 | io/airlift/units/DataSize."<init>":(DLio/airlift/units/DataSize$Unit;)V
102 | 1.8
103 | Use io.airlift.units.DataSize.of(long, DataSize.Unit)
104 |
105 |
106 |
107 | io/airlift/units/DataSize.succinctDataSize:(DLio/airlift/units/DataSize$Unit;)Lio/airlift/units/DataSize;
108 | 1.8
109 | Use io.airlift.units.DataSize.of(long, DataSize.Unit).succinct() -- Note that succinct conversion only affects toString() results
110 |
111 |
112 |
113 | io/airlift/units/DataSize.getValue:()D
114 | 1.8
115 | Use io.airlift.units.DataSize.toBytes() and Unit.inBytes() for conversion
116 |
117 |
118 |
119 | io/airlift/units/DataSize.getValue:(Lio/airlift/units/DataSize$Unit;)D
120 | 1.8
121 | Use io.airlift.units.DataSize.toBytes() and Unit.inBytes() for conversion
122 |
123 |
124 |
125 | io/airlift/units/DataSize.roundTo:(Lio/airlift/units/DataSize$Unit;)J
126 | 1.8
127 | Method is deprecated for removal
128 |
129 |
130 |
131 | io/airlift/units/DataSize.convertTo:(Lio/airlift/units/DataSize$Unit;)Lio/airlift/units/DataSize;
132 | 1.8
133 | Use io.airlift.units.DataSize.to(DataSize.Unit)
134 |
135 |
136 |
137 | io/airlift/units/DataSize.convertToMostSuccinctDataSize:()Lio/airlift/units/DataSize;
138 | 1.8
139 | Use io.airlift.units.DataSize.succinct()
140 |
141 |
142 |
143 | io/airlift/testing/Closeables.closeQuietly:([Ljava/io/Closeable;)V
144 | 1.0
145 | Use Closeables.closeAll() or Closer.
146 |
147 |
148 |
149 | com/google/inject/util/Modules.combine:(Ljava/lang/Iterable;)Lcom/google/inject/Module;
150 | 1.8
151 | Use io.airlift.configuration.ConfigurationAwareModule.combine
152 |
153 |
154 |
155 | com/google/inject/util/Modules.combine:([Lcom/google/inject/Module;)Lcom/google/inject/Module;
156 | 1.8
157 | Use io.airlift.configuration.ConfigurationAwareModule.combine
158 |
159 |
160 |
--------------------------------------------------------------------------------
/.mvn/wrapper/maven-wrapper.properties:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 | wrapperVersion=3.3.2
18 | distributionType=only-script
19 | distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.9.8/apache-maven-3.9.8-bin.zip
20 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Trino Storage Connector [](https://github.com/snowlift/trino-storage/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster)
2 | This is a [Trino](http://trino.io/) connector to access single file (e.g. csv, tsv). Please keep in mind that this is not production ready and it was created for tests.
3 |
4 | # Supported scheme
5 | - hdfs
6 | - s3a
7 | - file
8 | - http
9 | - https
10 |
11 | > Note: reading local files (with the `file` or no schema) can be disabled by setting `allow-local-files=false` in the catalog configuration.
12 |
13 | # Query
14 | You need to specify file type by schema name and use absolute path.
15 | ```sql
16 | SELECT * FROM
17 | storage.csv."file:///tmp/numbers-2.csv";
18 |
19 | SELECT * FROM
20 | storage.csv."https://raw.githubusercontent.com/snowlift/trino-storage/master/src/test/resources/example-data/numbers-2.csv";
21 | ```
22 |
23 | Supported schemas are below.
24 | - `tsv`
25 | - `csv`
26 | - `ssv`
27 | - `txt`
28 | - `raw`
29 | - `excel`
30 | - `orc`
31 | - `json`
32 |
33 | `csv` plugin extracts each line, with columns separated by `,`. Currently the first line is used as column names.
34 | ```sql
35 | SELECT * FROM
36 | storage.csv."https://raw.githubusercontent.com/snowlift/trino-storage/master/src/test/resources/example-data/numbers-2.csv";
37 | ```
38 | ```
39 | ten | 10
40 | --------+----
41 | eleven | 11
42 | twelve | 12
43 | (2 rows)
44 | ```
45 |
46 | Tab (`\t`) and semicolon (`;`) delimiters are also supported, using the `tsv` and `ssv` plugins, respectively.
47 |
48 | `txt` plugin doesn't extract each line. Currently column name is always `value`.
49 | ```sql
50 | SELECT * FROM
51 | storage.txt."https://raw.githubusercontent.com/snowlift/trino-storage/master/src/test/resources/example-data/numbers.tsv";
52 | ```
53 | ```
54 | value
55 | --------
56 | one 1
57 | two 2
58 | three 3
59 | (3 rows)
60 | ```
61 |
62 | `raw` plugin doesn't extract each line. Currently column name is always `data`. This connector is similar to `txt` plugin.
63 | The main difference is `txt` plugin may return multiple rows, but `raw` plugin always return only one row.
64 | ```sql
65 | SELECT * FROM
66 | storage.raw."https://raw.githubusercontent.com/snowlift/trino-storage/master/src/test/resources/example-data/numbers.tsv";
67 | ```
68 | ```
69 | data
70 | --------
71 | one 1
72 | two 2
73 | three 3
74 | (1 row)
75 | ```
76 |
77 | `excel` plugin currently read first sheet.
78 | ```sql
79 | SELECT * FROM
80 | storage.excel."https://raw.githubusercontent.com/snowlift/trino-storage/master/src/test/resources/example-data/sample.xlsx";
81 | ```
82 | ```
83 | data
84 | --------
85 | one 1
86 | two 2
87 | three 3
88 | (1 row)
89 | ```
90 |
91 | # Table functions
92 |
93 | The connector provides specific table functions to list directory status and read files.
94 | ```sql
95 | SELECT * FROM TABLE(storage.system.list('/tmp/trino-storage'));
96 | ```
97 | ```
98 | file_modified_time | size | name
99 | -----------------------------+------+-----------------------------
100 | 2023-05-03 12:14:22.107 UTC | 12 | /tmp/trino-storage/test.txt
101 | ```
102 |
103 | ```sql
104 | SELECT * FROM TABLE(storage.system.read_file('csv', '/tmp/trino-storage/test.txt'));
105 | ```
106 | ```
107 | col
108 | -------------
109 | hello world
110 | ```
111 |
112 | # Build
113 | Run all the unit test classes.
114 | ```
115 | ./mvnw test
116 | ```
117 |
118 | Build without running tests
119 | ```
120 | ./mvnw clean install -DskipTests
121 | ```
122 |
123 | > Note: tests include intergration tests, that will run Minio and HDFS as Docker containers. They need to pull their images,
124 | > which can take a while. If you see the tests getting stuck, try pulling these images before starting tests, to see the progress.
125 | > Look for image names and versions in `TestingMinioServer` and `TestingHadoopServer` test classes.
126 | > It is also required to set the `HADOOP_USER_NAME` environmental variable to `hive`.
127 |
128 | # Deploy
129 | Unarchive trino-storage-{version}.zip and copy jar files in target directory to use storage connector in your Trino cluster.
130 |
--------------------------------------------------------------------------------
/docker/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: '2'
2 | services:
3 |
4 | hadoop-master:
5 | hostname: hadoop-master
6 | image: 'ghcr.io/trinodb/testing/hdp3.1-hive'
7 | ports:
8 | - '1180:1180'
9 | - '8020:8020'
10 | - '8042:8042'
11 | - '8088:8088'
12 | - '9000:9000'
13 | - '9083:9083'
14 | - '10000:10000'
15 | - '19888:19888'
16 | - '13306:3306'
17 | - '50070:50070'
18 | - '50075:50075'
19 |
--------------------------------------------------------------------------------
/mvnw:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | # ----------------------------------------------------------------------------
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 | # ----------------------------------------------------------------------------
20 |
21 | # ----------------------------------------------------------------------------
22 | # Apache Maven Wrapper startup batch script, version 3.3.2
23 | #
24 | # Optional ENV vars
25 | # -----------------
26 | # JAVA_HOME - location of a JDK home dir, required when download maven via java source
27 | # MVNW_REPOURL - repo url base for downloading maven distribution
28 | # MVNW_USERNAME/MVNW_PASSWORD - user and password for downloading maven
29 | # MVNW_VERBOSE - true: enable verbose log; debug: trace the mvnw script; others: silence the output
30 | # ----------------------------------------------------------------------------
31 |
32 | set -euf
33 | [ "${MVNW_VERBOSE-}" != debug ] || set -x
34 |
35 | # OS specific support.
36 | native_path() { printf %s\\n "$1"; }
37 | case "$(uname)" in
38 | CYGWIN* | MINGW*)
39 | [ -z "${JAVA_HOME-}" ] || JAVA_HOME="$(cygpath --unix "$JAVA_HOME")"
40 | native_path() { cygpath --path --windows "$1"; }
41 | ;;
42 | esac
43 |
44 | # set JAVACMD and JAVACCMD
45 | set_java_home() {
46 | # For Cygwin and MinGW, ensure paths are in Unix format before anything is touched
47 | if [ -n "${JAVA_HOME-}" ]; then
48 | if [ -x "$JAVA_HOME/jre/sh/java" ]; then
49 | # IBM's JDK on AIX uses strange locations for the executables
50 | JAVACMD="$JAVA_HOME/jre/sh/java"
51 | JAVACCMD="$JAVA_HOME/jre/sh/javac"
52 | else
53 | JAVACMD="$JAVA_HOME/bin/java"
54 | JAVACCMD="$JAVA_HOME/bin/javac"
55 |
56 | if [ ! -x "$JAVACMD" ] || [ ! -x "$JAVACCMD" ]; then
57 | echo "The JAVA_HOME environment variable is not defined correctly, so mvnw cannot run." >&2
58 | echo "JAVA_HOME is set to \"$JAVA_HOME\", but \"\$JAVA_HOME/bin/java\" or \"\$JAVA_HOME/bin/javac\" does not exist." >&2
59 | return 1
60 | fi
61 | fi
62 | else
63 | JAVACMD="$(
64 | 'set' +e
65 | 'unset' -f command 2>/dev/null
66 | 'command' -v java
67 | )" || :
68 | JAVACCMD="$(
69 | 'set' +e
70 | 'unset' -f command 2>/dev/null
71 | 'command' -v javac
72 | )" || :
73 |
74 | if [ ! -x "${JAVACMD-}" ] || [ ! -x "${JAVACCMD-}" ]; then
75 | echo "The java/javac command does not exist in PATH nor is JAVA_HOME set, so mvnw cannot run." >&2
76 | return 1
77 | fi
78 | fi
79 | }
80 |
81 | # hash string like Java String::hashCode
82 | hash_string() {
83 | str="${1:-}" h=0
84 | while [ -n "$str" ]; do
85 | char="${str%"${str#?}"}"
86 | h=$(((h * 31 + $(LC_CTYPE=C printf %d "'$char")) % 4294967296))
87 | str="${str#?}"
88 | done
89 | printf %x\\n $h
90 | }
91 |
92 | verbose() { :; }
93 | [ "${MVNW_VERBOSE-}" != true ] || verbose() { printf %s\\n "${1-}"; }
94 |
95 | die() {
96 | printf %s\\n "$1" >&2
97 | exit 1
98 | }
99 |
100 | trim() {
101 | # MWRAPPER-139:
102 | # Trims trailing and leading whitespace, carriage returns, tabs, and linefeeds.
103 | # Needed for removing poorly interpreted newline sequences when running in more
104 | # exotic environments such as mingw bash on Windows.
105 | printf "%s" "${1}" | tr -d '[:space:]'
106 | }
107 |
108 | # parse distributionUrl and optional distributionSha256Sum, requires .mvn/wrapper/maven-wrapper.properties
109 | while IFS="=" read -r key value; do
110 | case "${key-}" in
111 | distributionUrl) distributionUrl=$(trim "${value-}") ;;
112 | distributionSha256Sum) distributionSha256Sum=$(trim "${value-}") ;;
113 | esac
114 | done <"${0%/*}/.mvn/wrapper/maven-wrapper.properties"
115 | [ -n "${distributionUrl-}" ] || die "cannot read distributionUrl property in ${0%/*}/.mvn/wrapper/maven-wrapper.properties"
116 |
117 | case "${distributionUrl##*/}" in
118 | maven-mvnd-*bin.*)
119 | MVN_CMD=mvnd.sh _MVNW_REPO_PATTERN=/maven/mvnd/
120 | case "${PROCESSOR_ARCHITECTURE-}${PROCESSOR_ARCHITEW6432-}:$(uname -a)" in
121 | *AMD64:CYGWIN* | *AMD64:MINGW*) distributionPlatform=windows-amd64 ;;
122 | :Darwin*x86_64) distributionPlatform=darwin-amd64 ;;
123 | :Darwin*arm64) distributionPlatform=darwin-aarch64 ;;
124 | :Linux*x86_64*) distributionPlatform=linux-amd64 ;;
125 | *)
126 | echo "Cannot detect native platform for mvnd on $(uname)-$(uname -m), use pure java version" >&2
127 | distributionPlatform=linux-amd64
128 | ;;
129 | esac
130 | distributionUrl="${distributionUrl%-bin.*}-$distributionPlatform.zip"
131 | ;;
132 | maven-mvnd-*) MVN_CMD=mvnd.sh _MVNW_REPO_PATTERN=/maven/mvnd/ ;;
133 | *) MVN_CMD="mvn${0##*/mvnw}" _MVNW_REPO_PATTERN=/org/apache/maven/ ;;
134 | esac
135 |
136 | # apply MVNW_REPOURL and calculate MAVEN_HOME
137 | # maven home pattern: ~/.m2/wrapper/dists/{apache-maven-,maven-mvnd--}/
138 | [ -z "${MVNW_REPOURL-}" ] || distributionUrl="$MVNW_REPOURL$_MVNW_REPO_PATTERN${distributionUrl#*"$_MVNW_REPO_PATTERN"}"
139 | distributionUrlName="${distributionUrl##*/}"
140 | distributionUrlNameMain="${distributionUrlName%.*}"
141 | distributionUrlNameMain="${distributionUrlNameMain%-bin}"
142 | MAVEN_USER_HOME="${MAVEN_USER_HOME:-${HOME}/.m2}"
143 | MAVEN_HOME="${MAVEN_USER_HOME}/wrapper/dists/${distributionUrlNameMain-}/$(hash_string "$distributionUrl")"
144 |
145 | exec_maven() {
146 | unset MVNW_VERBOSE MVNW_USERNAME MVNW_PASSWORD MVNW_REPOURL || :
147 | exec "$MAVEN_HOME/bin/$MVN_CMD" "$@" || die "cannot exec $MAVEN_HOME/bin/$MVN_CMD"
148 | }
149 |
150 | if [ -d "$MAVEN_HOME" ]; then
151 | verbose "found existing MAVEN_HOME at $MAVEN_HOME"
152 | exec_maven "$@"
153 | fi
154 |
155 | case "${distributionUrl-}" in
156 | *?-bin.zip | *?maven-mvnd-?*-?*.zip) ;;
157 | *) die "distributionUrl is not valid, must match *-bin.zip or maven-mvnd-*.zip, but found '${distributionUrl-}'" ;;
158 | esac
159 |
160 | # prepare tmp dir
161 | if TMP_DOWNLOAD_DIR="$(mktemp -d)" && [ -d "$TMP_DOWNLOAD_DIR" ]; then
162 | clean() { rm -rf -- "$TMP_DOWNLOAD_DIR"; }
163 | trap clean HUP INT TERM EXIT
164 | else
165 | die "cannot create temp dir"
166 | fi
167 |
168 | mkdir -p -- "${MAVEN_HOME%/*}"
169 |
170 | # Download and Install Apache Maven
171 | verbose "Couldn't find MAVEN_HOME, downloading and installing it ..."
172 | verbose "Downloading from: $distributionUrl"
173 | verbose "Downloading to: $TMP_DOWNLOAD_DIR/$distributionUrlName"
174 |
175 | # select .zip or .tar.gz
176 | if ! command -v unzip >/dev/null; then
177 | distributionUrl="${distributionUrl%.zip}.tar.gz"
178 | distributionUrlName="${distributionUrl##*/}"
179 | fi
180 |
181 | # verbose opt
182 | __MVNW_QUIET_WGET=--quiet __MVNW_QUIET_CURL=--silent __MVNW_QUIET_UNZIP=-q __MVNW_QUIET_TAR=''
183 | [ "${MVNW_VERBOSE-}" != true ] || __MVNW_QUIET_WGET='' __MVNW_QUIET_CURL='' __MVNW_QUIET_UNZIP='' __MVNW_QUIET_TAR=v
184 |
185 | # normalize http auth
186 | case "${MVNW_PASSWORD:+has-password}" in
187 | '') MVNW_USERNAME='' MVNW_PASSWORD='' ;;
188 | has-password) [ -n "${MVNW_USERNAME-}" ] || MVNW_USERNAME='' MVNW_PASSWORD='' ;;
189 | esac
190 |
191 | if [ -z "${MVNW_USERNAME-}" ] && command -v wget >/dev/null; then
192 | verbose "Found wget ... using wget"
193 | wget ${__MVNW_QUIET_WGET:+"$__MVNW_QUIET_WGET"} "$distributionUrl" -O "$TMP_DOWNLOAD_DIR/$distributionUrlName" || die "wget: Failed to fetch $distributionUrl"
194 | elif [ -z "${MVNW_USERNAME-}" ] && command -v curl >/dev/null; then
195 | verbose "Found curl ... using curl"
196 | curl ${__MVNW_QUIET_CURL:+"$__MVNW_QUIET_CURL"} -f -L -o "$TMP_DOWNLOAD_DIR/$distributionUrlName" "$distributionUrl" || die "curl: Failed to fetch $distributionUrl"
197 | elif set_java_home; then
198 | verbose "Falling back to use Java to download"
199 | javaSource="$TMP_DOWNLOAD_DIR/Downloader.java"
200 | targetZip="$TMP_DOWNLOAD_DIR/$distributionUrlName"
201 | cat >"$javaSource" <<-END
202 | public class Downloader extends java.net.Authenticator
203 | {
204 | protected java.net.PasswordAuthentication getPasswordAuthentication()
205 | {
206 | return new java.net.PasswordAuthentication( System.getenv( "MVNW_USERNAME" ), System.getenv( "MVNW_PASSWORD" ).toCharArray() );
207 | }
208 | public static void main( String[] args ) throws Exception
209 | {
210 | setDefault( new Downloader() );
211 | java.nio.file.Files.copy( java.net.URI.create( args[0] ).toURL().openStream(), java.nio.file.Paths.get( args[1] ).toAbsolutePath().normalize() );
212 | }
213 | }
214 | END
215 | # For Cygwin/MinGW, switch paths to Windows format before running javac and java
216 | verbose " - Compiling Downloader.java ..."
217 | "$(native_path "$JAVACCMD")" "$(native_path "$javaSource")" || die "Failed to compile Downloader.java"
218 | verbose " - Running Downloader.java ..."
219 | "$(native_path "$JAVACMD")" -cp "$(native_path "$TMP_DOWNLOAD_DIR")" Downloader "$distributionUrl" "$(native_path "$targetZip")"
220 | fi
221 |
222 | # If specified, validate the SHA-256 sum of the Maven distribution zip file
223 | if [ -n "${distributionSha256Sum-}" ]; then
224 | distributionSha256Result=false
225 | if [ "$MVN_CMD" = mvnd.sh ]; then
226 | echo "Checksum validation is not supported for maven-mvnd." >&2
227 | echo "Please disable validation by removing 'distributionSha256Sum' from your maven-wrapper.properties." >&2
228 | exit 1
229 | elif command -v sha256sum >/dev/null; then
230 | if echo "$distributionSha256Sum $TMP_DOWNLOAD_DIR/$distributionUrlName" | sha256sum -c >/dev/null 2>&1; then
231 | distributionSha256Result=true
232 | fi
233 | elif command -v shasum >/dev/null; then
234 | if echo "$distributionSha256Sum $TMP_DOWNLOAD_DIR/$distributionUrlName" | shasum -a 256 -c >/dev/null 2>&1; then
235 | distributionSha256Result=true
236 | fi
237 | else
238 | echo "Checksum validation was requested but neither 'sha256sum' or 'shasum' are available." >&2
239 | echo "Please install either command, or disable validation by removing 'distributionSha256Sum' from your maven-wrapper.properties." >&2
240 | exit 1
241 | fi
242 | if [ $distributionSha256Result = false ]; then
243 | echo "Error: Failed to validate Maven distribution SHA-256, your Maven distribution might be compromised." >&2
244 | echo "If you updated your Maven version, you need to update the specified distributionSha256Sum property." >&2
245 | exit 1
246 | fi
247 | fi
248 |
249 | # unzip and move
250 | if command -v unzip >/dev/null; then
251 | unzip ${__MVNW_QUIET_UNZIP:+"$__MVNW_QUIET_UNZIP"} "$TMP_DOWNLOAD_DIR/$distributionUrlName" -d "$TMP_DOWNLOAD_DIR" || die "failed to unzip"
252 | else
253 | tar xzf${__MVNW_QUIET_TAR:+"$__MVNW_QUIET_TAR"} "$TMP_DOWNLOAD_DIR/$distributionUrlName" -C "$TMP_DOWNLOAD_DIR" || die "failed to untar"
254 | fi
255 | printf %s\\n "$distributionUrl" >"$TMP_DOWNLOAD_DIR/$distributionUrlNameMain/mvnw.url"
256 | mv -- "$TMP_DOWNLOAD_DIR/$distributionUrlNameMain" "$MAVEN_HOME" || [ -d "$MAVEN_HOME" ] || die "fail to move MAVEN_HOME"
257 |
258 | clean || :
259 | exec_maven "$@"
260 |
--------------------------------------------------------------------------------
/src/main/java/org/ebyhr/trino/storage/ByteResponseHandler.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * http://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 | package org.ebyhr.trino.storage;
15 |
16 | import io.airlift.http.client.Request;
17 | import io.airlift.http.client.Response;
18 | import io.airlift.http.client.ResponseHandler;
19 |
20 | import static io.airlift.http.client.ResponseHandlerUtils.propagate;
21 | import static io.airlift.http.client.ResponseHandlerUtils.readResponseBytes;
22 |
23 | public class ByteResponseHandler
24 | implements ResponseHandler
25 | {
26 | private static final ByteResponseHandler BYTE_RESPONSE_HANDLER = new ByteResponseHandler();
27 |
28 | public static ByteResponseHandler createByteResponseHandler()
29 | {
30 | return BYTE_RESPONSE_HANDLER;
31 | }
32 |
33 | private ByteResponseHandler() {}
34 |
35 | @Override
36 | public ByteResponse handleException(Request request, Exception exception)
37 | {
38 | throw propagate(request, exception);
39 | }
40 |
41 | @Override
42 | public ByteResponse handle(Request request, Response response)
43 | {
44 | byte[] bytes = readResponseBytes(request, response);
45 | return new ByteResponse(response.getStatusCode(), bytes);
46 | }
47 |
48 | public static class ByteResponse
49 | {
50 | private final int statusCode;
51 | private final byte[] body;
52 |
53 | public ByteResponse(int statusCode, byte[] body)
54 | {
55 | this.statusCode = statusCode;
56 | this.body = body;
57 | }
58 |
59 | public int getStatusCode()
60 | {
61 | return statusCode;
62 | }
63 |
64 | public byte[] getBody()
65 | {
66 | return body;
67 | }
68 | }
69 | }
70 |
--------------------------------------------------------------------------------
/src/main/java/org/ebyhr/trino/storage/FileType.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * http://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 | package org.ebyhr.trino.storage;
15 |
16 | import static java.util.Locale.ENGLISH;
17 |
18 | public enum FileType
19 | {
20 | CSV, SSV, TSV, TXT, RAW, EXCEL, ORC, PARQUET, JSON, AVRO;
21 |
22 | @Override
23 | public String toString()
24 | {
25 | return name().toLowerCase(ENGLISH);
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/src/main/java/org/ebyhr/trino/storage/ForStorage.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * http://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 | package org.ebyhr.trino.storage;
15 |
16 | import com.google.inject.BindingAnnotation;
17 |
18 | import javax.inject.Qualifier;
19 |
20 | import java.lang.annotation.Retention;
21 | import java.lang.annotation.Target;
22 |
23 | import static java.lang.annotation.ElementType.FIELD;
24 | import static java.lang.annotation.ElementType.METHOD;
25 | import static java.lang.annotation.ElementType.PARAMETER;
26 | import static java.lang.annotation.RetentionPolicy.RUNTIME;
27 |
28 | @Retention(RUNTIME)
29 | @Target({FIELD, PARAMETER, METHOD})
30 | @Qualifier
31 | @BindingAnnotation
32 | public @interface ForStorage
33 | {
34 | }
35 |
--------------------------------------------------------------------------------
/src/main/java/org/ebyhr/trino/storage/ListPageSource.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * http://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 | package org.ebyhr.trino.storage;
15 |
16 | import io.airlift.slice.Slices;
17 | import io.trino.filesystem.FileEntry;
18 | import io.trino.filesystem.FileIterator;
19 | import io.trino.spi.Page;
20 | import io.trino.spi.PageBuilder;
21 | import io.trino.spi.connector.ColumnHandle;
22 | import io.trino.spi.connector.ConnectorPageSource;
23 | import io.trino.spi.connector.ConnectorSession;
24 |
25 | import java.io.IOException;
26 | import java.io.UncheckedIOException;
27 | import java.util.List;
28 |
29 | import static io.trino.spi.type.BigintType.BIGINT;
30 | import static io.trino.spi.type.DateTimeEncoding.packDateTimeWithZone;
31 | import static io.trino.spi.type.TimeZoneKey.UTC_KEY;
32 | import static io.trino.spi.type.VarcharType.VARCHAR;
33 |
34 | public class ListPageSource
35 | implements ConnectorPageSource
36 | {
37 | private final List extends ColumnHandle> columns;
38 | private final long readTimeNanos;
39 | private final FileIterator fileStatuses;
40 | private boolean done;
41 |
42 | public ListPageSource(StorageClient storageClient, ConnectorSession session, String path, List extends ColumnHandle> columns)
43 | {
44 | this.columns = columns;
45 | long start = System.nanoTime();
46 | this.fileStatuses = storageClient.list(session, path);
47 | readTimeNanos = System.nanoTime() - start;
48 | }
49 |
50 | @Override
51 | public long getCompletedBytes()
52 | {
53 | return 0;
54 | }
55 |
56 | @Override
57 | public long getReadTimeNanos()
58 | {
59 | return readTimeNanos;
60 | }
61 |
62 | @Override
63 | public boolean isFinished()
64 | {
65 | return done;
66 | }
67 |
68 | @Override
69 | public Page getNextPage()
70 | {
71 | if (done) {
72 | return null;
73 | }
74 |
75 | done = true;
76 |
77 | PageBuilder page = new PageBuilder(columns.stream().map(column -> ((StorageColumnHandle) column).getType()).toList());
78 | try {
79 | while (fileStatuses.hasNext()) {
80 | FileEntry status = fileStatuses.next();
81 | page.declarePosition();
82 | for (int i = 0; i < columns.size(); i++) {
83 | StorageColumnHandle column = (StorageColumnHandle) columns.get(i);
84 | switch (column.getName()) {
85 | case "file_modified_time" -> BIGINT.writeLong(page.getBlockBuilder(i), packDateTimeWithZone(status.lastModified().toEpochMilli(), UTC_KEY));
86 | case "size" -> BIGINT.writeLong(page.getBlockBuilder(i), status.length());
87 | case "name" -> VARCHAR.writeSlice(page.getBlockBuilder(i), Slices.utf8Slice(status.location().toString()));
88 | default -> throw new IllegalStateException("Unknown column name " + column.getName());
89 | }
90 | }
91 | }
92 | }
93 | catch (IOException e) {
94 | throw new UncheckedIOException(e);
95 | }
96 | return page.build();
97 | }
98 |
99 | @Override
100 | public long getMemoryUsage()
101 | {
102 | return 0;
103 | }
104 |
105 | @Override
106 | public void close() {}
107 | }
108 |
--------------------------------------------------------------------------------
/src/main/java/org/ebyhr/trino/storage/StorageClient.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * http://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 | package org.ebyhr.trino.storage;
15 |
16 | import com.google.inject.Inject;
17 | import io.airlift.http.client.HttpClient;
18 | import io.airlift.http.client.HttpStatus;
19 | import io.airlift.http.client.Request;
20 | import io.airlift.log.Logger;
21 | import io.trino.filesystem.FileIterator;
22 | import io.trino.filesystem.Location;
23 | import io.trino.filesystem.TrinoFileSystemFactory;
24 | import io.trino.filesystem.local.LocalFileSystem;
25 | import io.trino.spi.TrinoException;
26 | import io.trino.spi.connector.ConnectorSession;
27 | import io.trino.spi.type.VarcharType;
28 | import org.ebyhr.trino.storage.operator.FilePlugin;
29 | import org.ebyhr.trino.storage.operator.PluginFactory;
30 |
31 | import java.io.ByteArrayInputStream;
32 | import java.io.IOException;
33 | import java.io.InputStream;
34 | import java.io.UncheckedIOException;
35 | import java.net.URI;
36 | import java.nio.file.Path;
37 | import java.util.HashSet;
38 | import java.util.List;
39 | import java.util.Set;
40 | import java.util.stream.Collectors;
41 | import java.util.stream.Stream;
42 |
43 | import static io.airlift.http.client.Request.Builder.prepareGet;
44 | import static io.trino.spi.StandardErrorCode.PERMISSION_DENIED;
45 | import static java.lang.String.format;
46 | import static java.util.Objects.requireNonNull;
47 | import static org.ebyhr.trino.storage.ByteResponseHandler.createByteResponseHandler;
48 | import static org.ebyhr.trino.storage.ptf.ListTableFunction.LIST_SCHEMA_NAME;
49 |
50 | public class StorageClient
51 | {
52 | private static final Logger log = Logger.get(StorageClient.class);
53 |
54 | private final TrinoFileSystemFactory fileSystemFactory;
55 | private final HttpClient httpClient;
56 | private final boolean allowLocalFiles;
57 |
58 | @Inject
59 | public StorageClient(TrinoFileSystemFactory fileSystemFactory, @ForStorage HttpClient httpClient, StorageConfig storageConfig)
60 | {
61 | this.fileSystemFactory = requireNonNull(fileSystemFactory, "fileSystemFactory is null");
62 | this.httpClient = requireNonNull(httpClient, "httpClient is null");
63 | this.allowLocalFiles = requireNonNull(storageConfig, "storageConfig is null").getAllowLocalFiles();
64 | }
65 |
66 | public List getSchemaNames()
67 | {
68 | return Stream.of(FileType.values())
69 | .map(FileType::toString)
70 | .collect(Collectors.toList());
71 | }
72 |
73 | public Set getTableNames(String schema)
74 | {
75 | requireNonNull(schema, "schema is null");
76 | return new HashSet<>();
77 | }
78 |
79 | public StorageTable getTable(ConnectorSession session, String schema, String tableName)
80 | {
81 | requireNonNull(schema, "schema is null");
82 | requireNonNull(tableName, "tableName is null");
83 |
84 | if (isLocalFile(tableName) && !allowLocalFiles) {
85 | throw new TrinoException(PERMISSION_DENIED, "Reading local files is disabled");
86 | }
87 | if (schema.equals(LIST_SCHEMA_NAME)) {
88 | return new StorageTable(StorageSplit.Mode.LIST, tableName, List.of(new StorageColumnHandle("path", VarcharType.VARCHAR)));
89 | }
90 |
91 | FilePlugin plugin = PluginFactory.create(schema);
92 | try {
93 | List columns = plugin.getFields(tableName, path -> getInputStream(session, path));
94 | return new StorageTable(StorageSplit.Mode.TABLE, tableName, columns);
95 | }
96 | catch (Exception e) {
97 | log.error(e, "Failed to get table: %s.%s", schema, tableName);
98 | return null;
99 | }
100 | }
101 |
102 | private boolean isLocalFile(String path)
103 | {
104 | return path.startsWith("file:") || !(
105 | path.startsWith("http://") || path.startsWith("https://")
106 | || path.startsWith("hdfs://") || path.startsWith("s3a://") || path.startsWith("s3://"));
107 | }
108 |
109 | public InputStream getInputStream(ConnectorSession session, String path)
110 | {
111 | try {
112 | if (path.startsWith("http://") || path.startsWith("https://")) {
113 | Request request = prepareGet().setUri(URI.create(path)).build();
114 | ByteResponseHandler.ByteResponse response = httpClient.execute(request, createByteResponseHandler());
115 | int status = response.getStatusCode();
116 | if (status != HttpStatus.OK.code()) {
117 | throw new IllegalStateException(format("Request to '%s' returned unexpected status code: '%d'", path, status));
118 | }
119 | return new ByteArrayInputStream(response.getBody());
120 | }
121 | if (path.startsWith("hdfs://") || path.startsWith("s3a://") || path.startsWith("s3://")) {
122 | return fileSystemFactory.create(session).newInputFile(Location.of(path)).newStream();
123 | }
124 |
125 | if (!allowLocalFiles) {
126 | throw new TrinoException(PERMISSION_DENIED, "Reading local files is disabled");
127 | }
128 | if (!path.startsWith("file:")) {
129 | path = "file:" + path;
130 | }
131 | return URI.create(path).toURL().openStream();
132 | }
133 | catch (IOException e) {
134 | throw new UncheckedIOException(format("Failed to open stream for %s", path), e);
135 | }
136 | }
137 |
138 | public FileIterator list(ConnectorSession session, String path)
139 | {
140 | try {
141 | if (path.startsWith("http://") || path.startsWith("https://")) {
142 | throw new IllegalArgumentException("Listing files over HTTP is not supported");
143 | }
144 | if (path.startsWith("hdfs://") || path.startsWith("s3a://") || path.startsWith("s3://")) {
145 | return fileSystemFactory.create(session).listFiles(Location.of(path));
146 | }
147 | if (!allowLocalFiles) {
148 | throw new TrinoException(PERMISSION_DENIED, "Reading local files is disabled");
149 | }
150 | if (path.startsWith("file://")) {
151 | path = path.substring("file://".length());
152 | }
153 | else if (path.startsWith("file:")) {
154 | path = path.substring("file:".length());
155 | }
156 | return new LocalFileSystem(Path.of(path)).listFiles(Location.of("local:///"));
157 | }
158 | catch (IOException e) {
159 | throw new UncheckedIOException(e);
160 | }
161 | }
162 | }
163 |
--------------------------------------------------------------------------------
/src/main/java/org/ebyhr/trino/storage/StorageColumnHandle.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * http://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 | package org.ebyhr.trino.storage;
15 |
16 | import com.fasterxml.jackson.annotation.JsonCreator;
17 | import com.fasterxml.jackson.annotation.JsonProperty;
18 | import io.trino.spi.connector.ColumnHandle;
19 | import io.trino.spi.connector.ColumnMetadata;
20 | import io.trino.spi.type.Type;
21 |
22 | import java.util.Objects;
23 |
24 | import static com.google.common.base.MoreObjects.toStringHelper;
25 | import static java.util.Objects.requireNonNull;
26 |
27 | public final class StorageColumnHandle
28 | implements ColumnHandle
29 | {
30 | private final String name;
31 | private final Type type;
32 |
33 | @JsonCreator
34 | public StorageColumnHandle(
35 | @JsonProperty("name") String name,
36 | @JsonProperty("type") Type type)
37 | {
38 | this.name = requireNonNull(name, "name is null");
39 | this.type = requireNonNull(type, "type is null");
40 | }
41 |
42 | @JsonProperty
43 | public String getName()
44 | {
45 | return name;
46 | }
47 |
48 | @JsonProperty
49 | public Type getType()
50 | {
51 | return type;
52 | }
53 |
54 | public ColumnMetadata getColumnMetadata()
55 | {
56 | return new ColumnMetadata(name, type);
57 | }
58 |
59 | @Override
60 | public int hashCode()
61 | {
62 | return Objects.hash(name);
63 | }
64 |
65 | @Override
66 | public boolean equals(Object obj)
67 | {
68 | if (this == obj) {
69 | return true;
70 | }
71 | if ((obj == null) || (getClass() != obj.getClass())) {
72 | return false;
73 | }
74 |
75 | StorageColumnHandle other = (StorageColumnHandle) obj;
76 | return Objects.equals(this.name, other.name);
77 | }
78 |
79 | @Override
80 | public String toString()
81 | {
82 | return toStringHelper(this)
83 | .add("name", name)
84 | .add("type", type)
85 | .toString();
86 | }
87 | }
88 |
--------------------------------------------------------------------------------
/src/main/java/org/ebyhr/trino/storage/StorageConfig.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * http://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | package org.ebyhr.trino.storage;
16 |
17 | import io.airlift.configuration.Config;
18 | import io.airlift.configuration.ConfigDescription;
19 |
20 | public class StorageConfig
21 | {
22 | private boolean allowLocalFiles = true;
23 |
24 | public boolean getAllowLocalFiles()
25 | {
26 | return allowLocalFiles;
27 | }
28 |
29 | @Config("allow-local-files")
30 | @ConfigDescription("If true, allow reading local files")
31 | public StorageConfig setAllowLocalFiles(boolean allowLocalFiles)
32 | {
33 | this.allowLocalFiles = allowLocalFiles;
34 | return this;
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/src/main/java/org/ebyhr/trino/storage/StorageConnector.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * http://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 | package org.ebyhr.trino.storage;
15 |
16 | import com.google.common.collect.ImmutableSet;
17 | import com.google.inject.Inject;
18 | import io.airlift.bootstrap.LifeCycleManager;
19 | import io.airlift.log.Logger;
20 | import io.trino.spi.connector.Connector;
21 | import io.trino.spi.connector.ConnectorMetadata;
22 | import io.trino.spi.connector.ConnectorPageSourceProvider;
23 | import io.trino.spi.connector.ConnectorSession;
24 | import io.trino.spi.connector.ConnectorSplitManager;
25 | import io.trino.spi.connector.ConnectorTransactionHandle;
26 | import io.trino.spi.function.table.ConnectorTableFunction;
27 | import io.trino.spi.transaction.IsolationLevel;
28 |
29 | import java.util.Set;
30 |
31 | import static java.util.Objects.requireNonNull;
32 | import static org.ebyhr.trino.storage.StorageTransactionHandle.INSTANCE;
33 |
34 | public class StorageConnector
35 | implements Connector
36 | {
37 | private static final Logger log = Logger.get(StorageConnector.class);
38 |
39 | private final LifeCycleManager lifeCycleManager;
40 | private final StorageMetadata metadata;
41 | private final StorageSplitManager splitManager;
42 | private final StoragePageSourceProvider pageSourceProvider;
43 | private final Set connectorTableFunctions;
44 |
45 | @Inject
46 | public StorageConnector(
47 | LifeCycleManager lifeCycleManager,
48 | StorageMetadata metadata,
49 | StorageSplitManager splitManager,
50 | StoragePageSourceProvider pageSourceProvider,
51 | Set connectorTableFunctions)
52 | {
53 | this.lifeCycleManager = requireNonNull(lifeCycleManager, "lifeCycleManager is null");
54 | this.metadata = requireNonNull(metadata, "metadata is null");
55 | this.splitManager = requireNonNull(splitManager, "splitManager is null");
56 | this.pageSourceProvider = requireNonNull(pageSourceProvider, "pageSourceProvider is null");
57 | this.connectorTableFunctions = ImmutableSet.copyOf(requireNonNull(connectorTableFunctions, "connectorTableFunctions is null"));
58 | }
59 |
60 | @Override
61 | public ConnectorTransactionHandle beginTransaction(IsolationLevel isolationLevel, boolean readOnly, boolean autoCommit)
62 | {
63 | return INSTANCE;
64 | }
65 |
66 | @Override
67 | public ConnectorMetadata getMetadata(ConnectorSession session, ConnectorTransactionHandle transactionHandle)
68 | {
69 | return metadata;
70 | }
71 |
72 | @Override
73 | public ConnectorSplitManager getSplitManager()
74 | {
75 | return splitManager;
76 | }
77 |
78 | @Override
79 | public ConnectorPageSourceProvider getPageSourceProvider()
80 | {
81 | return pageSourceProvider;
82 | }
83 |
84 | @Override
85 | public Set getTableFunctions()
86 | {
87 | return connectorTableFunctions;
88 | }
89 |
90 | @Override
91 | public final void shutdown()
92 | {
93 | try {
94 | lifeCycleManager.stop();
95 | }
96 | catch (Exception e) {
97 | log.error(e, "Error shutting down connector");
98 | }
99 | }
100 | }
101 |
--------------------------------------------------------------------------------
/src/main/java/org/ebyhr/trino/storage/StorageConnectorFactory.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * http://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 | package org.ebyhr.trino.storage;
15 |
16 | import com.google.inject.Injector;
17 | import io.airlift.bootstrap.Bootstrap;
18 | import io.airlift.json.JsonModule;
19 | import io.trino.hdfs.HdfsModule;
20 | import io.trino.hdfs.authentication.HdfsAuthenticationModule;
21 | import io.trino.hdfs.azure.HiveAzureModule;
22 | import io.trino.hdfs.gcs.HiveGcsModule;
23 | import io.trino.hdfs.s3.HiveS3Module;
24 | import io.trino.spi.connector.Connector;
25 | import io.trino.spi.connector.ConnectorContext;
26 | import io.trino.spi.connector.ConnectorFactory;
27 |
28 | import java.util.Map;
29 |
30 | import static com.google.common.base.Throwables.throwIfUnchecked;
31 | import static java.util.Objects.requireNonNull;
32 |
33 | public class StorageConnectorFactory
34 | implements ConnectorFactory
35 | {
36 | @Override
37 | public String getName()
38 | {
39 | return "storage";
40 | }
41 |
42 | @Override
43 | public Connector create(String catalogName, Map requiredConfig, ConnectorContext context)
44 | {
45 | requireNonNull(requiredConfig, "requiredConfig is null");
46 | try {
47 | // A plugin is not required to use Guice; it is just very convenient
48 | Bootstrap app = new Bootstrap(
49 | new JsonModule(),
50 | new StorageModule(context.getTypeManager()),
51 | new HdfsModule(),
52 | new HiveS3Module(),
53 | new HiveGcsModule(),
54 | new HiveAzureModule(),
55 | new HdfsAuthenticationModule());
56 |
57 | Injector injector = app
58 | .doNotInitializeLogging()
59 | .setRequiredConfigurationProperties(requiredConfig)
60 | .initialize();
61 |
62 | return injector.getInstance(StorageConnector.class);
63 | }
64 | catch (Exception e) {
65 | throwIfUnchecked(e);
66 | throw new RuntimeException(e);
67 | }
68 | }
69 | }
70 |
--------------------------------------------------------------------------------
/src/main/java/org/ebyhr/trino/storage/StorageMetadata.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * http://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 | package org.ebyhr.trino.storage;
15 |
16 | import com.google.common.collect.ImmutableMap;
17 | import com.google.inject.Inject;
18 | import io.trino.spi.StandardErrorCode;
19 | import io.trino.spi.TrinoException;
20 | import io.trino.spi.connector.ColumnHandle;
21 | import io.trino.spi.connector.ColumnMetadata;
22 | import io.trino.spi.connector.ConnectorMetadata;
23 | import io.trino.spi.connector.ConnectorSession;
24 | import io.trino.spi.connector.ConnectorTableHandle;
25 | import io.trino.spi.connector.ConnectorTableMetadata;
26 | import io.trino.spi.connector.ConnectorTableVersion;
27 | import io.trino.spi.connector.SchemaTableName;
28 | import io.trino.spi.connector.SchemaTablePrefix;
29 | import io.trino.spi.connector.TableColumnsMetadata;
30 | import io.trino.spi.connector.TableFunctionApplicationResult;
31 | import io.trino.spi.connector.TableNotFoundException;
32 | import io.trino.spi.function.table.ConnectorTableFunctionHandle;
33 | import org.ebyhr.trino.storage.ptf.ListTableFunction.QueryFunctionHandle;
34 | import org.ebyhr.trino.storage.ptf.ReadFileTableFunction.ReadFunctionHandle;
35 |
36 | import java.util.Iterator;
37 | import java.util.List;
38 | import java.util.Map;
39 | import java.util.Optional;
40 | import java.util.stream.Stream;
41 |
42 | import static com.google.common.collect.ImmutableList.toImmutableList;
43 | import static java.util.Objects.requireNonNull;
44 | import static org.ebyhr.trino.storage.ptf.ListTableFunction.COLUMNS_METADATA;
45 | import static org.ebyhr.trino.storage.ptf.ListTableFunction.COLUMN_HANDLES;
46 | import static org.ebyhr.trino.storage.ptf.ListTableFunction.LIST_SCHEMA_NAME;
47 |
48 | public class StorageMetadata
49 | implements ConnectorMetadata
50 | {
51 | private final StorageClient storageClient;
52 |
53 | @Inject
54 | public StorageMetadata(StorageClient storageClient)
55 | {
56 | this.storageClient = requireNonNull(storageClient, "storageClient is null");
57 | }
58 |
59 | @Override
60 | public List listSchemaNames(ConnectorSession session)
61 | {
62 | return listSchemaNames();
63 | }
64 |
65 | public List listSchemaNames()
66 | {
67 | return List.copyOf(storageClient.getSchemaNames());
68 | }
69 |
70 | @Override
71 | public StorageTableHandle getTableHandle(ConnectorSession session, SchemaTableName tableName, Optional startVersion, Optional endVersion)
72 | {
73 | if (startVersion.isPresent() || endVersion.isPresent()) {
74 | throw new TrinoException(StandardErrorCode.NOT_SUPPORTED, "This connector does not support versioned tables");
75 | }
76 | if (!listSchemaNames(session).contains(tableName.getSchemaName())) {
77 | return null;
78 | }
79 |
80 | StorageTable table = storageClient.getTable(session, tableName.getSchemaName(), tableName.getTableName());
81 | if (table == null) {
82 | return null;
83 | }
84 |
85 | return new StorageTableHandle(table.getMode(), tableName.getSchemaName(), tableName.getTableName());
86 | }
87 |
88 | @Override
89 | public ConnectorTableMetadata getTableMetadata(ConnectorSession session, ConnectorTableHandle table)
90 | {
91 | StorageTableHandle storageTableHandle = (StorageTableHandle) table;
92 | RemoteTableName tableName = new RemoteTableName(storageTableHandle.getSchemaName(), storageTableHandle.getTableName());
93 |
94 | return getStorageTableMetadata(session, tableName);
95 | }
96 |
97 | @Override
98 | public List listTables(ConnectorSession session, Optional schemaNameOrNull)
99 | {
100 | SchemaTablePrefix prefix = schemaNameOrNull
101 | .map(SchemaTablePrefix::new)
102 | .orElseGet(SchemaTablePrefix::new);
103 | return listTables(prefix).map(RemoteTableName::toSchemaTableName).collect(toImmutableList());
104 | }
105 |
106 | @Override
107 | public Map getColumnHandles(ConnectorSession session, ConnectorTableHandle tableHandle)
108 | {
109 | StorageTableHandle storageTableHandle = (StorageTableHandle) tableHandle;
110 |
111 | StorageTable table = storageClient.getTable(session, storageTableHandle.getSchemaName(), storageTableHandle.getTableName());
112 | if (table == null) {
113 | throw new TableNotFoundException(storageTableHandle.toSchemaTableName());
114 | }
115 |
116 | ImmutableMap.Builder columnHandles = ImmutableMap.builder();
117 | for (ColumnMetadata column : table.getColumnsMetadata()) {
118 | columnHandles.put(column.getName(), new StorageColumnHandle(column.getName(), column.getType()));
119 | }
120 | return columnHandles.build();
121 | }
122 |
123 | @Override
124 | public Map> listTableColumns(ConnectorSession session, SchemaTablePrefix prefix)
125 | {
126 | requireNonNull(prefix, "prefix is null");
127 | ImmutableMap.Builder> columns = ImmutableMap.builder();
128 | for (RemoteTableName tableName : listTables(prefix).toList()) {
129 | ConnectorTableMetadata tableMetadata = getStorageTableMetadata(session, tableName);
130 | // table can disappear during listing operation
131 | if (tableMetadata != null) {
132 | columns.put(tableName.toSchemaTableName(), tableMetadata.getColumns());
133 | }
134 | }
135 | return columns.build();
136 | }
137 |
138 | @Override
139 | public Iterator streamTableColumns(ConnectorSession session, SchemaTablePrefix prefix)
140 | {
141 | requireNonNull(prefix, "prefix is null");
142 | return listTables(prefix)
143 | .map(table -> TableColumnsMetadata.forTable(
144 | table.toSchemaTableName(),
145 | requireNonNull(getStorageTableMetadata(session, table), "tableMetadata is null")
146 | .getColumns()))
147 | .iterator();
148 | }
149 |
150 | private ConnectorTableMetadata getStorageTableMetadata(ConnectorSession session, RemoteTableName tableName)
151 | {
152 | if (tableName.schemaName().equals(LIST_SCHEMA_NAME)) {
153 | return new ConnectorTableMetadata(tableName.toSchemaTableName(), COLUMNS_METADATA);
154 | }
155 |
156 | if (!listSchemaNames().contains(tableName.schemaName())) {
157 | return null;
158 | }
159 |
160 | StorageTable table = storageClient.getTable(session, tableName.schemaName(), tableName.tableName());
161 | if (table == null) {
162 | return null;
163 | }
164 |
165 | return new ConnectorTableMetadata(tableName.toSchemaTableName(), table.getColumnsMetadata());
166 | }
167 |
168 | private Stream listTables(SchemaTablePrefix prefix)
169 | {
170 | if (prefix.getSchema().isPresent() && prefix.getTable().isPresent()) {
171 | return Stream.of(new RemoteTableName(prefix.getSchema().get(), prefix.getTable().get()));
172 | }
173 |
174 | List schemaNames = prefix.getSchema()
175 | .map(List::of)
176 | .orElseGet(storageClient::getSchemaNames);
177 |
178 | return schemaNames.stream()
179 | .flatMap(schemaName -> storageClient.getTableNames(schemaName).stream()
180 | .map(tableName -> new RemoteTableName(LIST_SCHEMA_NAME, LIST_SCHEMA_NAME)));
181 | }
182 |
183 | @Override
184 | public ColumnMetadata getColumnMetadata(ConnectorSession session, ConnectorTableHandle tableHandle, ColumnHandle columnHandle)
185 | {
186 | return ((StorageColumnHandle) columnHandle).getColumnMetadata();
187 | }
188 |
189 | @Override
190 | public Optional> applyTableFunction(ConnectorSession session, ConnectorTableFunctionHandle handle)
191 | {
192 | if (handle instanceof ReadFunctionHandle catFunctionHandle) {
193 | return Optional.of(new TableFunctionApplicationResult<>(
194 | catFunctionHandle.getTableHandle(),
195 | catFunctionHandle.getColumns().stream()
196 | .map(column -> new StorageColumnHandle(column.getName(), column.getType()))
197 | .collect(toImmutableList())));
198 | }
199 | if (handle instanceof QueryFunctionHandle queryFunctionHandle) {
200 | return Optional.of(new TableFunctionApplicationResult<>(queryFunctionHandle.getTableHandle(), COLUMN_HANDLES));
201 | }
202 | return Optional.empty();
203 | }
204 |
205 | /**
206 | * Simplified variant of {@link SchemaTableName} that doesn't case-fold.
207 | */
208 | private record RemoteTableName(String schemaName, String tableName)
209 | {
210 | public SchemaTableName toSchemaTableName()
211 | {
212 | return new SchemaTableName(schemaName(), tableName());
213 | }
214 | }
215 | }
216 |
--------------------------------------------------------------------------------
/src/main/java/org/ebyhr/trino/storage/StorageModule.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * http://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 | package org.ebyhr.trino.storage;
15 |
16 | import com.fasterxml.jackson.databind.DeserializationContext;
17 | import com.fasterxml.jackson.databind.deser.std.FromStringDeserializer;
18 | import com.google.inject.Binder;
19 | import com.google.inject.Inject;
20 | import com.google.inject.Module;
21 | import com.google.inject.Scopes;
22 | import io.airlift.http.client.HttpClientConfig;
23 | import io.opentelemetry.api.OpenTelemetry;
24 | import io.trino.filesystem.TrinoFileSystemFactory;
25 | import io.trino.filesystem.hdfs.HdfsFileSystemFactory;
26 | import io.trino.hdfs.TrinoHdfsFileSystemStats;
27 | import io.trino.spi.function.table.ConnectorTableFunction;
28 | import io.trino.spi.type.Type;
29 | import io.trino.spi.type.TypeManager;
30 | import io.trino.spi.type.TypeSignature;
31 | import org.ebyhr.trino.storage.ptf.ListTableFunction;
32 | import org.ebyhr.trino.storage.ptf.ReadFileTableFunction;
33 |
34 | import static com.google.inject.multibindings.Multibinder.newSetBinder;
35 | import static io.airlift.configuration.ConfigBinder.configBinder;
36 | import static io.airlift.http.client.HttpClientBinder.httpClientBinder;
37 | import static io.airlift.json.JsonBinder.jsonBinder;
38 | import static io.airlift.json.JsonCodec.listJsonCodec;
39 | import static io.airlift.json.JsonCodecBinder.jsonCodecBinder;
40 | import static java.util.Objects.requireNonNull;
41 |
42 | public class StorageModule
43 | implements Module
44 | {
45 | private final TypeManager typeManager;
46 |
47 | public StorageModule(TypeManager typeManager)
48 | {
49 | this.typeManager = requireNonNull(typeManager, "typeManager is null");
50 | }
51 |
52 | @Override
53 | public void configure(Binder binder)
54 | {
55 | binder.bind(TypeManager.class).toInstance(typeManager);
56 |
57 | binder.bind(StorageConnector.class).in(Scopes.SINGLETON);
58 | binder.bind(StorageMetadata.class).in(Scopes.SINGLETON);
59 | binder.bind(StorageClient.class).in(Scopes.SINGLETON);
60 | binder.bind(StorageSplitManager.class).in(Scopes.SINGLETON);
61 | binder.bind(StorageRecordSetProvider.class).in(Scopes.SINGLETON);
62 | binder.bind(StoragePageSourceProvider.class).in(Scopes.SINGLETON);
63 | newSetBinder(binder, ConnectorTableFunction.class).addBinding().toProvider(ReadFileTableFunction.class).in(Scopes.SINGLETON);
64 | newSetBinder(binder, ConnectorTableFunction.class).addBinding().toProvider(ListTableFunction.class).in(Scopes.SINGLETON);
65 | binder.bind(TrinoFileSystemFactory.class).to(HdfsFileSystemFactory.class).in(Scopes.SINGLETON);
66 | binder.bind(TrinoHdfsFileSystemStats.class).in(Scopes.SINGLETON);
67 | binder.bind(OpenTelemetry.class).toInstance(OpenTelemetry.noop());
68 | configBinder(binder).bindConfig(StorageConfig.class);
69 |
70 | jsonBinder(binder).addDeserializerBinding(Type.class).to(TypeDeserializer.class);
71 | jsonCodecBinder(binder).bindMapJsonCodec(String.class, listJsonCodec(StorageTable.class));
72 |
73 | configBinder(binder).bindConfig(HttpClientConfig.class, ForStorage.class);
74 | httpClientBinder(binder).bindHttpClient("storage", ForStorage.class);
75 | }
76 |
77 | public static final class TypeDeserializer
78 | extends FromStringDeserializer
79 | {
80 | private final TypeManager typeManager;
81 |
82 | @Inject
83 | public TypeDeserializer(TypeManager typeManager)
84 | {
85 | super(Type.class);
86 | this.typeManager = requireNonNull(typeManager, "typeManager is null");
87 | }
88 |
89 | @Override
90 | protected Type _deserialize(String value, DeserializationContext context)
91 | {
92 | return typeManager.getType(new TypeSignature(value));
93 | }
94 | }
95 | }
96 |
--------------------------------------------------------------------------------
/src/main/java/org/ebyhr/trino/storage/StoragePageSourceProvider.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * http://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 | package org.ebyhr.trino.storage;
15 |
16 | import com.google.inject.Inject;
17 | import io.trino.spi.Page;
18 | import io.trino.spi.connector.ColumnHandle;
19 | import io.trino.spi.connector.ConnectorPageSource;
20 | import io.trino.spi.connector.ConnectorPageSourceProvider;
21 | import io.trino.spi.connector.ConnectorSession;
22 | import io.trino.spi.connector.ConnectorSplit;
23 | import io.trino.spi.connector.ConnectorTableHandle;
24 | import io.trino.spi.connector.ConnectorTransactionHandle;
25 | import io.trino.spi.connector.DynamicFilter;
26 | import io.trino.spi.connector.FixedPageSource;
27 | import io.trino.spi.connector.RecordPageSource;
28 | import io.trino.spi.connector.RecordSet;
29 | import org.ebyhr.trino.storage.operator.FilePlugin;
30 | import org.ebyhr.trino.storage.operator.PluginFactory;
31 |
32 | import java.util.List;
33 | import java.util.stream.StreamSupport;
34 |
35 | import static java.util.Objects.requireNonNull;
36 | import static java.util.stream.Collectors.toList;
37 | import static org.ebyhr.trino.storage.StorageSplit.Mode.LIST;
38 |
39 | public class StoragePageSourceProvider
40 | implements ConnectorPageSourceProvider
41 | {
42 | private final StorageClient storageClient;
43 | private final StorageRecordSetProvider recordSetProvider;
44 |
45 | @Inject
46 | public StoragePageSourceProvider(StorageClient storageClient, StorageRecordSetProvider recordSetProvider)
47 | {
48 | this.storageClient = requireNonNull(storageClient, "storageClient is null");
49 | this.recordSetProvider = requireNonNull(recordSetProvider, "recordSetProvider is null");
50 | }
51 |
52 | @Override
53 | public ConnectorPageSource createPageSource(
54 | ConnectorTransactionHandle transaction,
55 | ConnectorSession session,
56 | ConnectorSplit split,
57 | ConnectorTableHandle table,
58 | List columns,
59 | DynamicFilter dynamicFilter)
60 | {
61 | StorageSplit storageSplit = (StorageSplit) requireNonNull(split, "split is null");
62 | if (storageSplit.getMode() == LIST) {
63 | return new ListPageSource(storageClient, session, storageSplit.getTableName(), columns);
64 | }
65 |
66 | String schemaName = storageSplit.getSchemaName();
67 | String tableName = storageSplit.getTableName();
68 | FilePlugin plugin = PluginFactory.create(schemaName);
69 | List handles = columns.stream()
70 | .map(c -> (StorageColumnHandle) c)
71 | .map(c -> c.getName().toLowerCase())
72 | .toList();
73 |
74 | try {
75 | return plugin.getConnectorPageSource(tableName, handles, path -> storageClient.getInputStream(session, path));
76 | }
77 | catch (UnsupportedOperationException ignored) {
78 | // Ignore it when a plugin doesn't implement getConnectorPageSource
79 | // and assume it implements getPagesIterator or getRecordsIterator
80 | }
81 |
82 | try {
83 | Iterable iterable = plugin.getPagesIterator(tableName, handles, path -> storageClient.getInputStream(session, path));
84 | List pages = StreamSupport.stream(iterable.spliterator(), false)
85 | .collect(toList());
86 | return new FixedPageSource(pages);
87 | }
88 | catch (UnsupportedOperationException ignored) {
89 | // Ignore it when a plugin doesn't implement getPagesIterator
90 | // and assume it implements getRecordsIterator for the record set below
91 | }
92 |
93 | RecordSet recordSet = recordSetProvider.getRecordSet(transaction, session, split, table, columns);
94 | return new RecordPageSource(recordSet);
95 | }
96 | }
97 |
--------------------------------------------------------------------------------
/src/main/java/org/ebyhr/trino/storage/StoragePlugin.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * http://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 | package org.ebyhr.trino.storage;
15 |
16 | import io.trino.spi.Plugin;
17 | import io.trino.spi.connector.ConnectorFactory;
18 |
19 | import java.util.List;
20 |
21 | public class StoragePlugin
22 | implements Plugin
23 | {
24 | @Override
25 | public Iterable getConnectorFactories()
26 | {
27 | return List.of(new StorageConnectorFactory());
28 | }
29 | }
30 |
--------------------------------------------------------------------------------
/src/main/java/org/ebyhr/trino/storage/StorageRecordSetProvider.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * http://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 | package org.ebyhr.trino.storage;
15 |
16 | import com.google.common.collect.Iterables;
17 | import com.google.inject.Inject;
18 | import io.trino.spi.connector.ColumnHandle;
19 | import io.trino.spi.connector.ColumnMetadata;
20 | import io.trino.spi.connector.ConnectorRecordSetProvider;
21 | import io.trino.spi.connector.ConnectorSession;
22 | import io.trino.spi.connector.ConnectorSplit;
23 | import io.trino.spi.connector.ConnectorTableHandle;
24 | import io.trino.spi.connector.ConnectorTransactionHandle;
25 | import io.trino.spi.connector.InMemoryRecordSet;
26 | import io.trino.spi.connector.RecordSet;
27 | import io.trino.spi.type.Type;
28 | import org.ebyhr.trino.storage.operator.FilePlugin;
29 | import org.ebyhr.trino.storage.operator.PluginFactory;
30 |
31 | import java.util.List;
32 | import java.util.stream.Stream;
33 |
34 | import static com.google.common.base.Preconditions.checkState;
35 | import static java.util.Objects.requireNonNull;
36 | import static java.util.stream.Collectors.toList;
37 |
38 | public class StorageRecordSetProvider
39 | implements ConnectorRecordSetProvider
40 | {
41 | private final StorageClient storageClient;
42 |
43 | @Inject
44 | public StorageRecordSetProvider(StorageClient storageClient)
45 | {
46 | this.storageClient = requireNonNull(storageClient, "storageClient is null");
47 | }
48 |
49 | @Override
50 | public RecordSet getRecordSet(
51 | ConnectorTransactionHandle transaction,
52 | ConnectorSession session,
53 | ConnectorSplit split,
54 | ConnectorTableHandle table,
55 | List extends ColumnHandle> columns)
56 | {
57 | requireNonNull(split, "split is null");
58 | StorageSplit storageSplit = (StorageSplit) split;
59 |
60 | String schemaName = storageSplit.getSchemaName();
61 | String tableName = storageSplit.getTableName();
62 | StorageTable storageTable = storageClient.getTable(session, schemaName, tableName);
63 | // this can happen if table is removed during a query
64 | checkState(storageTable != null, "Table %s.%s no longer exists", schemaName, tableName);
65 |
66 | FilePlugin plugin = PluginFactory.create(schemaName);
67 | Stream> stream = plugin.getRecordsIterator(tableName, path -> storageClient.getInputStream(session, path));
68 | Iterable> rows = stream::iterator;
69 |
70 | List handles = columns
71 | .stream()
72 | .map(c -> (StorageColumnHandle) c)
73 | .collect(toList());
74 | List columnIndexes = handles
75 | .stream()
76 | .map(column -> {
77 | int index = 0;
78 | for (ColumnMetadata columnMetadata : storageTable.getColumnsMetadata()) {
79 | if (columnMetadata.getName().equalsIgnoreCase(column.getName())) {
80 | return index;
81 | }
82 | index++;
83 | }
84 | throw new IllegalStateException("Unknown column: " + column.getName());
85 | })
86 | .collect(toList());
87 |
88 | //noinspection StaticPseudoFunctionalStyleMethod
89 | Iterable> mappedRows = Iterables.transform(rows, row -> columnIndexes
90 | .stream()
91 | .map(row::get)
92 | .collect(toList()));
93 |
94 | List mappedTypes = handles
95 | .stream()
96 | .map(StorageColumnHandle::getType)
97 | .collect(toList());
98 | return new InMemoryRecordSet(mappedTypes, mappedRows);
99 | }
100 | }
101 |
--------------------------------------------------------------------------------
/src/main/java/org/ebyhr/trino/storage/StorageSplit.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * http://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 | package org.ebyhr.trino.storage;
15 |
16 | import com.fasterxml.jackson.annotation.JsonCreator;
17 | import com.fasterxml.jackson.annotation.JsonProperty;
18 | import com.google.common.collect.ImmutableMap;
19 | import io.airlift.slice.SizeOf;
20 | import io.trino.spi.HostAddress;
21 | import io.trino.spi.connector.ConnectorSplit;
22 |
23 | import java.util.List;
24 | import java.util.Map;
25 |
26 | import static java.util.Objects.requireNonNull;
27 |
28 | public class StorageSplit
29 | implements ConnectorSplit
30 | {
31 | private static final int INSTANCE_SIZE = SizeOf.instanceSize(StorageSplit.class);
32 | private static final int MODE_SIZE = SizeOf.instanceSize(Mode.class);
33 |
34 | private final Mode mode;
35 | private final String schemaName;
36 | private final String tableName;
37 |
38 | @JsonCreator
39 | public StorageSplit(
40 | @JsonProperty("mode") Mode mode,
41 | @JsonProperty("schemaName") String schemaName,
42 | @JsonProperty("tableName") String tableName)
43 | {
44 | this.schemaName = requireNonNull(schemaName, "schema name is null");
45 | this.mode = requireNonNull(mode, "mode is null");
46 | this.tableName = requireNonNull(tableName, "table name is null");
47 | }
48 |
49 | @JsonProperty
50 | public Mode getMode()
51 | {
52 | return mode;
53 | }
54 |
55 | @JsonProperty
56 | public String getSchemaName()
57 | {
58 | return schemaName;
59 | }
60 |
61 | @JsonProperty
62 | public String getTableName()
63 | {
64 | return tableName;
65 | }
66 |
67 | @Override
68 | public List getAddresses()
69 | {
70 | return List.of();
71 | }
72 |
73 | @Override
74 | public Map getSplitInfo()
75 | {
76 | return ImmutableMap.builder()
77 | .put("mode", mode.name())
78 | .put("schemaName", schemaName)
79 | .put("tableName", tableName)
80 | .buildOrThrow();
81 | }
82 |
83 | public long getRetainedSizeInBytes()
84 | {
85 | return INSTANCE_SIZE
86 | + MODE_SIZE
87 | + SizeOf.estimatedSizeOf(schemaName)
88 | + SizeOf.estimatedSizeOf(tableName);
89 | }
90 |
91 | public enum Mode
92 | {
93 | TABLE,
94 | LIST,
95 | /**/;
96 | }
97 | }
98 |
--------------------------------------------------------------------------------
/src/main/java/org/ebyhr/trino/storage/StorageSplitManager.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * http://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 | package org.ebyhr.trino.storage;
15 |
16 | import com.google.inject.Inject;
17 | import io.trino.spi.connector.ConnectorSession;
18 | import io.trino.spi.connector.ConnectorSplit;
19 | import io.trino.spi.connector.ConnectorSplitManager;
20 | import io.trino.spi.connector.ConnectorSplitSource;
21 | import io.trino.spi.connector.ConnectorTableHandle;
22 | import io.trino.spi.connector.ConnectorTransactionHandle;
23 | import io.trino.spi.connector.Constraint;
24 | import io.trino.spi.connector.DynamicFilter;
25 | import io.trino.spi.connector.FixedSplitSource;
26 |
27 | import java.util.ArrayList;
28 | import java.util.Collections;
29 | import java.util.List;
30 |
31 | import static com.google.common.base.Preconditions.checkState;
32 | import static java.util.Objects.requireNonNull;
33 |
34 | public class StorageSplitManager
35 | implements ConnectorSplitManager
36 | {
37 | private final StorageClient storageClient;
38 |
39 | @Inject
40 | public StorageSplitManager(StorageClient storageClient)
41 | {
42 | this.storageClient = requireNonNull(storageClient, "client is null");
43 | }
44 |
45 | @Override
46 | public ConnectorSplitSource getSplits(
47 | ConnectorTransactionHandle transaction,
48 | ConnectorSession session,
49 | ConnectorTableHandle handle,
50 | DynamicFilter dynamicFilter,
51 | Constraint constraint)
52 | {
53 | StorageTableHandle tableHandle = (StorageTableHandle) handle;
54 | StorageTable table = storageClient.getTable(session, tableHandle.getSchemaName(), tableHandle.getTableName());
55 | // this can happen if table is removed during a query
56 | checkState(table != null, "Table %s.%s no longer exists", tableHandle.getSchemaName(), tableHandle.getTableName());
57 |
58 | List splits = new ArrayList<>();
59 | splits.add(new StorageSplit(tableHandle.getMode(), tableHandle.getSchemaName(), tableHandle.getTableName()));
60 | Collections.shuffle(splits);
61 |
62 | return new FixedSplitSource(splits);
63 | }
64 | }
65 |
--------------------------------------------------------------------------------
/src/main/java/org/ebyhr/trino/storage/StorageTable.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * http://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 | package org.ebyhr.trino.storage;
15 |
16 | import com.fasterxml.jackson.annotation.JsonCreator;
17 | import com.fasterxml.jackson.annotation.JsonProperty;
18 | import com.google.common.collect.ImmutableList;
19 | import io.trino.spi.connector.ColumnMetadata;
20 |
21 | import java.util.List;
22 |
23 | import static com.google.common.base.Preconditions.checkArgument;
24 | import static com.google.common.base.Strings.isNullOrEmpty;
25 | import static java.util.Objects.requireNonNull;
26 |
27 | public class StorageTable
28 | {
29 | private final StorageSplit.Mode mode;
30 | private final String name;
31 | private final List columns;
32 | private final List columnsMetadata;
33 |
34 | @JsonCreator
35 | public StorageTable(
36 | @JsonProperty("mode") StorageSplit.Mode mode,
37 | @JsonProperty("name") String name,
38 | @JsonProperty("columns") List columns)
39 | {
40 | this.mode = requireNonNull(mode, "mode is null");
41 | checkArgument(!isNullOrEmpty(name), "name is null or is empty");
42 | this.name = requireNonNull(name, "name is null");
43 | this.columns = List.copyOf(requireNonNull(columns, "columns is null"));
44 |
45 | ImmutableList.Builder columnsMetadata = ImmutableList.builder();
46 | for (StorageColumnHandle column : this.columns) {
47 | columnsMetadata.add(new ColumnMetadata(column.getName(), column.getType()));
48 | }
49 | this.columnsMetadata = columnsMetadata.build();
50 | }
51 |
52 | @JsonProperty
53 | public StorageSplit.Mode getMode()
54 | {
55 | return mode;
56 | }
57 |
58 | @JsonProperty
59 | public String getName()
60 | {
61 | return name;
62 | }
63 |
64 | @JsonProperty
65 | public List getColumns()
66 | {
67 | return columns;
68 | }
69 |
70 | public List getColumnsMetadata()
71 | {
72 | return columnsMetadata;
73 | }
74 | }
75 |
--------------------------------------------------------------------------------
/src/main/java/org/ebyhr/trino/storage/StorageTableHandle.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * http://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 | package org.ebyhr.trino.storage;
15 |
16 | import com.fasterxml.jackson.annotation.JsonCreator;
17 | import com.fasterxml.jackson.annotation.JsonIgnore;
18 | import com.fasterxml.jackson.annotation.JsonProperty;
19 | import com.google.common.base.Joiner;
20 | import io.trino.spi.connector.ConnectorTableHandle;
21 | import io.trino.spi.connector.SchemaTableName;
22 |
23 | import java.util.Objects;
24 |
25 | import static java.util.Objects.requireNonNull;
26 |
27 | public final class StorageTableHandle
28 | implements ConnectorTableHandle
29 | {
30 | private final StorageSplit.Mode mode;
31 | private final String schemaName;
32 | private final String tableName;
33 |
34 | @JsonCreator
35 | public StorageTableHandle(
36 | @JsonProperty("mode") StorageSplit.Mode mode,
37 | @JsonProperty("schemaName") String schemaName,
38 | @JsonProperty("tableName") String tableName)
39 | {
40 | this.mode = requireNonNull(mode, "mode is null");
41 | this.schemaName = requireNonNull(schemaName, "schemaName is null");
42 | this.tableName = requireNonNull(tableName, "tableName is null");
43 | }
44 |
45 | @JsonProperty
46 | public StorageSplit.Mode getMode()
47 | {
48 | return mode;
49 | }
50 |
51 | @JsonProperty
52 | public String getSchemaName()
53 | {
54 | return schemaName;
55 | }
56 |
57 | @JsonProperty
58 | public String getTableName()
59 | {
60 | return tableName;
61 | }
62 |
63 | @JsonIgnore
64 | public SchemaTableName toSchemaTableName()
65 | {
66 | return new SchemaTableName(schemaName, tableName);
67 | }
68 |
69 | @Override
70 | public int hashCode()
71 | {
72 | return Objects.hash(mode, schemaName, tableName);
73 | }
74 |
75 | @Override
76 | public boolean equals(Object obj)
77 | {
78 | if (this == obj) {
79 | return true;
80 | }
81 | if ((obj == null) || (getClass() != obj.getClass())) {
82 | return false;
83 | }
84 |
85 | StorageTableHandle other = (StorageTableHandle) obj;
86 | return Objects.equals(this.mode, other.mode) &&
87 | Objects.equals(this.schemaName, other.schemaName) &&
88 | Objects.equals(this.tableName, other.tableName);
89 | }
90 |
91 | @Override
92 | public String toString()
93 | {
94 | return Joiner.on(":").join(schemaName, tableName);
95 | }
96 | }
97 |
--------------------------------------------------------------------------------
/src/main/java/org/ebyhr/trino/storage/StorageTransactionHandle.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * http://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 | package org.ebyhr.trino.storage;
15 |
16 | import io.trino.spi.connector.ConnectorTransactionHandle;
17 |
18 | public enum StorageTransactionHandle
19 | implements ConnectorTransactionHandle
20 | {
21 | INSTANCE
22 | }
23 |
--------------------------------------------------------------------------------
/src/main/java/org/ebyhr/trino/storage/operator/AvroColumnDecoder.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * http://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 | package org.ebyhr.trino.storage.operator;
15 |
16 | import io.airlift.slice.Slice;
17 | import io.airlift.slice.Slices;
18 | import io.trino.spi.ErrorCode;
19 | import io.trino.spi.ErrorCodeSupplier;
20 | import io.trino.spi.ErrorType;
21 | import io.trino.spi.TrinoException;
22 | import io.trino.spi.block.Block;
23 | import io.trino.spi.block.BlockBuilder;
24 | import io.trino.spi.block.MapBlockBuilder;
25 | import io.trino.spi.block.RowBlockBuilder;
26 | import io.trino.spi.block.SqlMap;
27 | import io.trino.spi.block.SqlRow;
28 | import io.trino.spi.type.ArrayType;
29 | import io.trino.spi.type.BigintType;
30 | import io.trino.spi.type.BooleanType;
31 | import io.trino.spi.type.DoubleType;
32 | import io.trino.spi.type.IntegerType;
33 | import io.trino.spi.type.MapType;
34 | import io.trino.spi.type.RealType;
35 | import io.trino.spi.type.RowType;
36 | import io.trino.spi.type.RowType.Field;
37 | import io.trino.spi.type.SmallintType;
38 | import io.trino.spi.type.TinyintType;
39 | import io.trino.spi.type.Type;
40 | import io.trino.spi.type.VarbinaryType;
41 | import io.trino.spi.type.VarcharType;
42 | import org.apache.avro.generic.GenericEnumSymbol;
43 | import org.apache.avro.generic.GenericFixed;
44 | import org.apache.avro.generic.GenericRecord;
45 |
46 | import java.nio.ByteBuffer;
47 | import java.util.List;
48 | import java.util.Map;
49 |
50 | import static com.google.common.base.Preconditions.checkState;
51 | import static io.airlift.slice.Slices.utf8Slice;
52 | import static io.trino.spi.ErrorType.EXTERNAL;
53 | import static io.trino.spi.block.MapValueBuilder.buildMapValue;
54 | import static io.trino.spi.block.RowValueBuilder.buildRowValue;
55 | import static io.trino.spi.type.Varchars.truncateToLength;
56 | import static java.lang.Float.floatToIntBits;
57 | import static java.lang.String.format;
58 | import static java.util.Objects.requireNonNull;
59 | import static org.ebyhr.trino.storage.operator.AvroColumnDecoder.DecoderErrorCode.DECODER_CONVERSION_NOT_SUPPORTED;
60 |
61 | // copied from io.trino.decoder.avro.AvroColumnDecoder
62 | public class AvroColumnDecoder
63 | {
64 | private AvroColumnDecoder() {}
65 |
66 | private static Slice getSlice(Object value, Type type, String columnName)
67 | {
68 | if (type instanceof VarcharType && (value instanceof CharSequence || value instanceof GenericEnumSymbol)) {
69 | return truncateToLength(utf8Slice(value.toString()), type);
70 | }
71 |
72 | if (type instanceof VarbinaryType) {
73 | if (value instanceof ByteBuffer) {
74 | return Slices.wrappedHeapBuffer((ByteBuffer) value);
75 | }
76 | if (value instanceof GenericFixed) {
77 | return Slices.wrappedBuffer(((GenericFixed) value).bytes());
78 | }
79 | }
80 |
81 | throw new TrinoException(DECODER_CONVERSION_NOT_SUPPORTED, format("cannot decode object of '%s' as '%s' for column '%s'", value.getClass(), type, columnName));
82 | }
83 |
84 | public static Object serializeObject(BlockBuilder builder, Object value, Type type, String columnName)
85 | {
86 | if (type instanceof ArrayType) {
87 | return serializeList(builder, value, type, columnName);
88 | }
89 | if (type instanceof MapType mapType) {
90 | return serializeMap(builder, value, mapType, columnName);
91 | }
92 | if (type instanceof RowType) {
93 | return serializeRow(builder, value, type, columnName);
94 | }
95 | serializePrimitive(builder, value, type, columnName);
96 | return null;
97 | }
98 |
99 | private static Block serializeList(BlockBuilder parentBlockBuilder, Object value, Type type, String columnName)
100 | {
101 | if (value == null) {
102 | checkState(parentBlockBuilder != null, "parentBlockBuilder is null");
103 | parentBlockBuilder.appendNull();
104 | return null;
105 | }
106 | List> list = (List>) value;
107 | List typeParameters = type.getTypeParameters();
108 | Type elementType = typeParameters.get(0);
109 |
110 | BlockBuilder blockBuilder = elementType.createBlockBuilder(null, list.size());
111 | for (Object element : list) {
112 | serializeObject(blockBuilder, element, elementType, columnName);
113 | }
114 | if (parentBlockBuilder != null) {
115 | type.writeObject(parentBlockBuilder, blockBuilder.build());
116 | return null;
117 | }
118 | return blockBuilder.build();
119 | }
120 |
121 | private static void serializePrimitive(BlockBuilder blockBuilder, Object value, Type type, String columnName)
122 | {
123 | requireNonNull(blockBuilder, "blockBuilder is null");
124 |
125 | if (value == null) {
126 | blockBuilder.appendNull();
127 | return;
128 | }
129 |
130 | if (type instanceof BooleanType) {
131 | type.writeBoolean(blockBuilder, (Boolean) value);
132 | return;
133 | }
134 |
135 | if ((value instanceof Integer || value instanceof Long) && (type instanceof BigintType || type instanceof IntegerType || type instanceof SmallintType || type instanceof TinyintType)) {
136 | type.writeLong(blockBuilder, ((Number) value).longValue());
137 | return;
138 | }
139 |
140 | if (type instanceof DoubleType) {
141 | type.writeDouble(blockBuilder, (Double) value);
142 | return;
143 | }
144 |
145 | if (type instanceof RealType) {
146 | type.writeLong(blockBuilder, floatToIntBits((Float) value));
147 | return;
148 | }
149 |
150 | if (type instanceof VarcharType || type instanceof VarbinaryType) {
151 | type.writeSlice(blockBuilder, getSlice(value, type, columnName));
152 | return;
153 | }
154 |
155 | throw new TrinoException(DECODER_CONVERSION_NOT_SUPPORTED, format("cannot decode object of '%s' as '%s' for column '%s'", value.getClass(), type, columnName));
156 | }
157 |
158 | private static SqlMap serializeMap(BlockBuilder parentBlockBuilder, Object value, MapType type, String columnName)
159 | {
160 | if (value == null) {
161 | checkState(parentBlockBuilder != null, "parentBlockBuilder is null");
162 | parentBlockBuilder.appendNull();
163 | return null;
164 | }
165 |
166 | Map, ?> map = (Map, ?>) value;
167 | Type keyType = type.getKeyType();
168 | Type valueType = type.getValueType();
169 |
170 | if (parentBlockBuilder != null) {
171 | ((MapBlockBuilder) parentBlockBuilder).buildEntry((keyBuilder, valueBuilder) -> buildMap(columnName, map, keyType, valueType, keyBuilder, valueBuilder));
172 | return null;
173 | }
174 | return buildMapValue(type, map.size(), (keyBuilder, valueBuilder) -> buildMap(columnName, map, keyType, valueType, keyBuilder, valueBuilder));
175 | }
176 |
177 | private static void buildMap(String columnName, Map, ?> map, Type keyType, Type valueType, BlockBuilder keyBuilder, BlockBuilder valueBuilder)
178 | {
179 | for (Map.Entry, ?> entry : map.entrySet()) {
180 | if (entry.getKey() != null) {
181 | keyType.writeSlice(keyBuilder, truncateToLength(utf8Slice(entry.getKey().toString()), keyType));
182 | serializeObject(valueBuilder, entry.getValue(), valueType, columnName);
183 | }
184 | }
185 | }
186 |
187 | private static SqlRow serializeRow(BlockBuilder blockBuilder, Object value, Type type, String columnName)
188 | {
189 | if (value == null) {
190 | checkState(blockBuilder != null, "block builder is null");
191 | blockBuilder.appendNull();
192 | return null;
193 | }
194 |
195 | RowType rowType = (RowType) type;
196 | if (blockBuilder == null) {
197 | return buildRowValue(rowType, fieldBuilders -> buildRow(rowType, columnName, (GenericRecord) value, fieldBuilders));
198 | }
199 |
200 | ((RowBlockBuilder) blockBuilder).buildEntry(fieldBuilders -> buildRow(rowType, columnName, (GenericRecord) value, fieldBuilders));
201 | return null;
202 | }
203 |
204 | private static void buildRow(RowType type, String columnName, GenericRecord record, List fieldBuilders)
205 | {
206 | List fields = type.getFields();
207 | for (int i = 0; i < fields.size(); i++) {
208 | Field field = fields.get(i);
209 | checkState(field.getName().isPresent(), "field name not found");
210 | serializeObject(fieldBuilders.get(i), record.get(field.getName().get()), field.getType(), columnName);
211 | }
212 | }
213 |
214 | // copied from io.trino.decoder.DecoderErrorCode
215 | enum DecoderErrorCode
216 | implements ErrorCodeSupplier
217 | {
218 | /**
219 | * A requested data conversion is not supported.
220 | */
221 | DECODER_CONVERSION_NOT_SUPPORTED(0, EXTERNAL);
222 |
223 | private final ErrorCode errorCode;
224 |
225 | DecoderErrorCode(int code, ErrorType type)
226 | {
227 | errorCode = new ErrorCode(code + 0x0101_0000, name(), type);
228 | }
229 |
230 | @Override
231 | public ErrorCode toErrorCode()
232 | {
233 | return errorCode;
234 | }
235 | }
236 | }
237 |
--------------------------------------------------------------------------------
/src/main/java/org/ebyhr/trino/storage/operator/AvroPlugin.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * http://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 | package org.ebyhr.trino.storage.operator;
15 |
16 | import io.trino.spi.Page;
17 | import io.trino.spi.TrinoException;
18 | import io.trino.spi.block.Block;
19 | import io.trino.spi.block.BlockBuilder;
20 | import io.trino.spi.type.Type;
21 | import org.apache.avro.Schema;
22 | import org.apache.avro.file.DataFileStream;
23 | import org.apache.avro.generic.GenericDatumReader;
24 | import org.apache.avro.generic.GenericRecord;
25 | import org.ebyhr.trino.storage.StorageColumnHandle;
26 |
27 | import java.io.IOException;
28 | import java.io.InputStream;
29 | import java.util.ArrayList;
30 | import java.util.List;
31 | import java.util.function.Function;
32 |
33 | import static io.trino.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR;
34 | import static java.lang.String.format;
35 | import static org.ebyhr.trino.storage.operator.AvroSchemaConverter.convert;
36 |
37 | public class AvroPlugin
38 | implements FilePlugin
39 | {
40 | private static final int INITIAL_BATCH_SIZE = 4 * 1024; // 4 KB
41 |
42 | @Override
43 | public List getFields(String path, Function streamProvider)
44 | {
45 | try (InputStream input = streamProvider.apply(path);
46 | DataFileStream