├── NOTICE ├── gradle ├── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties └── libs.versions.toml ├── runtime ├── src │ └── main │ │ └── java │ │ └── software │ │ └── amazon │ │ └── s3tables │ │ └── iceberg │ │ └── Runtime.java └── build.gradle ├── .gitignore ├── CODE_OF_CONDUCT.md ├── settings.gradle ├── src └── software │ └── amazon │ └── s3tables │ └── iceberg │ ├── S3TablesAwsClientFactory.java │ ├── S3TablesTable.java │ ├── imports │ ├── RetryDetector.java │ ├── FileIOTracker.java │ ├── UrlConnectionHttpClientConfigurations.java │ ├── ApacheHttpClientConfigurations.java │ ├── HttpClientProperties.java │ └── AwsClientProperties.java │ ├── S3TablesProperties.java │ ├── S3TablesCatalogConfiguration.java │ ├── S3TablesAwsClientFactories.java │ ├── S3TablesAssumeRoleAwsClientFactory.java │ ├── S3TablesLocationProvider.java │ ├── S3TablesCatalogOperations.java │ └── S3TablesCatalog.java ├── tst └── software │ └── amazon │ └── s3tables │ └── iceberg │ ├── S3TablesLocationProviderTest.java │ └── S3TablesCatalogTest.java ├── .github └── workflows │ ├── gradle.yml │ └── gradle-publish.yml ├── gradlew.bat ├── CONTRIBUTING.md ├── README.md ├── gradlew ├── LICENSE └── resources └── META-INF └── THIRD-PARTY /NOTICE: -------------------------------------------------------------------------------- 1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/s3-tables-catalog/HEAD/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /runtime/src/main/java/software/amazon/s3tables/iceberg/Runtime.java: -------------------------------------------------------------------------------- 1 | package software.amazon.s3tables.iceberg; 2 | 3 | public class Runtime { 4 | } 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /annotation-generated-src/ 2 | /annotation-generated-tst/ 3 | .DS_Store 4 | *.iml 5 | build 6 | *.log 7 | metastore_db/ 8 | .gradle 9 | /.idea/ 10 | gradle.properties 11 | /bin/ -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | distributionUrl=https\://services.gradle.org/distributions/gradle-8.11-bin.zip 4 | networkTimeout=10000 5 | validateDistributionUrl=true 6 | zipStoreBase=GRADLE_USER_HOME 7 | zipStorePath=wrapper/dists 8 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /settings.gradle: -------------------------------------------------------------------------------- 1 | plugins { 2 | // Apply the foojay-resolver plugin to allow automatic download of JDKs 3 | id 'org.gradle.toolchains.foojay-resolver-convention' version '0.8.0' 4 | } 5 | 6 | rootProject.name = 's3-tables-catalog-for-iceberg' 7 | 8 | include 'runtime' 9 | 10 | project(':runtime').name = 's3-tables-catalog-for-iceberg-runtime' 11 | -------------------------------------------------------------------------------- /gradle/libs.versions.toml: -------------------------------------------------------------------------------- 1 | [versions] 2 | commons-math3 = "3.6.1" 3 | guava = "33.2.1-jre" 4 | junit-jupiter = "5.10.3" 5 | 6 | [libraries] 7 | commons-math3 = { module = "org.apache.commons:commons-math3", version.ref = "commons-math3" } 8 | guava = { module = "com.google.guava:guava", version.ref = "guava" } 9 | junit-jupiter = { module = "org.junit.jupiter:junit-jupiter", version.ref = "junit-jupiter" } 10 | -------------------------------------------------------------------------------- /src/software/amazon/s3tables/iceberg/S3TablesAwsClientFactory.java: -------------------------------------------------------------------------------- 1 | package software.amazon.s3tables.iceberg; 2 | 3 | import software.amazon.awssdk.services.s3tables.S3TablesClient; 4 | 5 | import java.io.Serializable; 6 | import java.util.Map; 7 | 8 | public interface S3TablesAwsClientFactory extends Serializable { 9 | /** 10 | * create a Amazon S3 Tables client 11 | * 12 | * @return s3tables client 13 | */ 14 | S3TablesClient s3tables(); 15 | 16 | /** 17 | * Initialize AWS client factory from catalog properties. 18 | * 19 | * @param properties catalog properties 20 | */ 21 | void initialize(Map properties); 22 | } 23 | -------------------------------------------------------------------------------- /tst/software/amazon/s3tables/iceberg/S3TablesLocationProviderTest.java: -------------------------------------------------------------------------------- 1 | package software.amazon.s3tables.iceberg; 2 | 3 | import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; 4 | import org.junit.jupiter.api.Test; 5 | import static org.assertj.core.api.Assertions.assertThat; 6 | 7 | public class S3TablesLocationProviderTest { 8 | 9 | @Test 10 | public void testNewDataLocation() { 11 | S3TablesLocationProvider locationProvider = new S3TablesLocationProvider( 12 | "s3://dummy-table-bucket/dummy-table", ImmutableMap.of() 13 | ); 14 | String hash = locationProvider.computeHash("testFile.txt"); 15 | String dataLocation = locationProvider.newDataLocation("testFile.txt"); 16 | assertThat(dataLocation) 17 | .isEqualTo(String.format("s3://dummy-table-bucket/dummy-table/data/%s-testFile.txt", hash)); 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /src/software/amazon/s3tables/iceberg/S3TablesTable.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | package software.amazon.s3tables.iceberg; 20 | 21 | import org.apache.iceberg.BaseTable; 22 | import org.apache.iceberg.TableOperations; 23 | import org.apache.iceberg.metrics.MetricsReporter; 24 | 25 | /** 26 | * A simple wrapper around BaseTable to demarcate S3 Tables tables. Some engines use this to detect 27 | * the type of the table and apply S3 Tables-specific behavior. 28 | */ 29 | public class S3TablesTable extends BaseTable { 30 | public S3TablesTable(TableOperations ops, String name, MetricsReporter reporter) { 31 | super(ops, name, reporter); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /.github/workflows/gradle.yml: -------------------------------------------------------------------------------- 1 | name: Java CI with Gradle 2 | 3 | on: 4 | push: 5 | branches: [ "main" ] 6 | pull_request: 7 | branches: [ "main" ] 8 | 9 | jobs: 10 | build: 11 | 12 | runs-on: ubuntu-latest 13 | permissions: 14 | contents: read 15 | id-token: write 16 | 17 | steps: 18 | - uses: actions/checkout@v4 19 | - name: Set up JDK 17 20 | uses: actions/setup-java@v4 21 | with: 22 | java-version: '17' 23 | distribution: 'temurin' 24 | 25 | - name: configure aws credentials 26 | uses: aws-actions/configure-aws-credentials@v2 27 | with: 28 | aws-region: us-west-2 29 | role-to-assume: ${{ secrets.GHA_ROLE_ARN }} 30 | 31 | - name: Setup Gradle 32 | uses: gradle/actions/setup-gradle@af1da67850ed9a4cedd57bfd976089dd991e2582 # v4.0.0 33 | 34 | - name: Build with Gradle Wrapper 35 | run: ./gradlew build 36 | 37 | dependency-submission: 38 | 39 | runs-on: ubuntu-latest 40 | permissions: 41 | contents: write 42 | 43 | steps: 44 | - uses: actions/checkout@v4 45 | - name: Set up JDK 17 46 | uses: actions/setup-java@v4 47 | with: 48 | java-version: '17' 49 | distribution: 'temurin' 50 | 51 | # Generates and submits a dependency graph, enabling Dependabot Alerts for all project dependencies. 52 | # See: https://github.com/gradle/actions/blob/main/dependency-submission/README.md 53 | - name: Generate and submit dependency graph 54 | uses: gradle/actions/dependency-submission@af1da67850ed9a4cedd57bfd976089dd991e2582 # v4.0.0 55 | -------------------------------------------------------------------------------- /src/software/amazon/s3tables/iceberg/imports/RetryDetector.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | package software.amazon.s3tables.iceberg.imports; 20 | 21 | import software.amazon.awssdk.core.metrics.CoreMetric; 22 | import software.amazon.awssdk.metrics.MetricCollection; 23 | import software.amazon.awssdk.metrics.MetricPublisher; 24 | 25 | import java.util.concurrent.atomic.AtomicBoolean; 26 | 27 | /** 28 | * Metrics are the only reliable way provided by the AWS SDK to determine if an API call was 29 | * retried. This class can be attached to an AWS API call and checked after to determine if retries 30 | * occurred. 31 | */ 32 | public class RetryDetector implements MetricPublisher { 33 | private final AtomicBoolean retried = new AtomicBoolean(false); 34 | 35 | @Override 36 | public void publish(MetricCollection metricCollection) { 37 | if (!retried.get()) { 38 | if (metricCollection.metricValues(CoreMetric.RETRY_COUNT).stream().anyMatch(i -> i > 0)) { 39 | retried.set(true); 40 | } else { 41 | metricCollection.children().forEach(this::publish); 42 | } 43 | } 44 | } 45 | 46 | @Override 47 | public void close() {} 48 | 49 | public boolean retried() { 50 | return retried.get(); 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/software/amazon/s3tables/iceberg/imports/FileIOTracker.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | package software.amazon.s3tables.iceberg.imports; 20 | 21 | import com.github.benmanes.caffeine.cache.Cache; 22 | import com.github.benmanes.caffeine.cache.Caffeine; 23 | import com.github.benmanes.caffeine.cache.RemovalListener; 24 | import java.io.Closeable; 25 | import org.apache.iceberg.TableOperations; 26 | import org.apache.iceberg.io.FileIO; 27 | import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; 28 | import org.apache.iceberg.relocated.com.google.common.base.Preconditions; 29 | 30 | /** 31 | * Keeps track of the {@link FileIO} instance of the given {@link TableOperations} instance and 32 | * closes the {@link FileIO} when {@link FileIOTracker#close()} gets called 33 | */ 34 | public class FileIOTracker implements Closeable { 35 | private final Cache tracker; 36 | 37 | public FileIOTracker() { 38 | this.tracker = 39 | Caffeine.newBuilder() 40 | .weakKeys() 41 | .removalListener( 42 | (RemovalListener) 43 | (ops, fileIO, cause) -> { 44 | if (null != fileIO) { 45 | fileIO.close(); 46 | } 47 | }) 48 | .build(); 49 | } 50 | 51 | public void track(TableOperations ops) { 52 | Preconditions.checkArgument(null != ops, "Invalid table ops: null"); 53 | tracker.put(ops, ops.io()); 54 | } 55 | 56 | @VisibleForTesting 57 | Cache tracker() { 58 | return tracker; 59 | } 60 | 61 | @Override 62 | public void close() { 63 | tracker.invalidateAll(); 64 | tracker.cleanUp(); 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /src/software/amazon/s3tables/iceberg/S3TablesProperties.java: -------------------------------------------------------------------------------- 1 | package software.amazon.s3tables.iceberg; 2 | 3 | import software.amazon.awssdk.core.client.config.SdkAdvancedClientOption; 4 | import software.amazon.awssdk.core.client.config.SdkAdvancedClientOption; 5 | import software.amazon.s3tables.iceberg.S3TablesAwsClientFactories.DefaultS3TablesAwsClientFactory; 6 | import software.amazon.awssdk.services.s3tables.S3TablesClientBuilder; 7 | 8 | import java.io.Serializable; 9 | import java.net.URI; 10 | import java.util.Map; 11 | 12 | public class S3TablesProperties implements Serializable { 13 | /** 14 | * This property is used to pass in the aws client factory implementation class for S3 Tables. The 15 | * class should implement {@link S3TablesAwsClientFactory}. For example, {@link 16 | * DefaultS3TablesAwsClientFactory} implements {@link S3TablesAwsClientFactory}. If this property 17 | * wasn't set, will load one of {@link S3TablesAwsClientFactory} factory classes to 18 | * provide backward compatibility. 19 | */ 20 | public static final String CLIENT_FACTORY = "s3tables.client-factory-impl"; 21 | 22 | /** 23 | * Configure an alternative endpoint of the S3 Tables service to access. 24 | */ 25 | public static final String S3TABLES_ENDPOINT = "s3tables.endpoint"; 26 | public static final String S3_TABLES_ICEBERG_CATALOG = "s3tables-iceberg-catalog/0.1.5"; 27 | 28 | private String s3tablesEndpoint; 29 | 30 | public S3TablesProperties() { 31 | super(); 32 | } 33 | 34 | public S3TablesProperties(Map properties) { 35 | this.s3tablesEndpoint = properties.get(S3TABLES_ENDPOINT); 36 | } 37 | 38 | /** 39 | * Override the endpoint for a s3tables client. 40 | * 41 | *

Sample usage: 42 | * 43 | *

44 |      *     S3TablesClient.builder().applyMutation(s3TablesProperties::applyS3TableEndpointConfigurations)
45 |      * 
46 | */ 47 | public void applyS3TableEndpointConfigurations(T builder) { 48 | if (s3tablesEndpoint != null) { 49 | builder.endpointOverride(URI.create(s3tablesEndpoint)); 50 | } 51 | } 52 | 53 | /** 54 | * Override the user agent for a s3 tables sdk client 55 | * 56 | *

Sample usage: 57 | * 58 | *

59 |      *     S3TablesClient.builder().applyMutation(s3TablesProperties::applyUserAgentConfigurations)
60 |      * 
61 | */ 62 | public void applyUserAgentConfigurations(T builder) { 63 | builder.overrideConfiguration( 64 | c -> c.putAdvancedOption(SdkAdvancedClientOption.USER_AGENT_PREFIX, S3_TABLES_ICEBERG_CATALOG)); 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /src/software/amazon/s3tables/iceberg/S3TablesCatalogConfiguration.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | package software.amazon.s3tables.iceberg; 20 | 21 | import org.apache.commons.configuration2.CombinedConfiguration; 22 | import org.apache.commons.configuration2.PropertiesConfiguration; 23 | import org.apache.commons.configuration2.SystemConfiguration; 24 | import org.apache.commons.configuration2.builder.fluent.Configurations; 25 | import org.apache.commons.configuration2.ex.ConfigurationException; 26 | import org.apache.commons.configuration2.tree.MergeCombiner; 27 | 28 | import java.io.File; 29 | import java.util.Iterator; 30 | 31 | public class S3TablesCatalogConfiguration { 32 | private final CombinedConfiguration config; 33 | 34 | public S3TablesCatalogConfiguration() { 35 | // Initialize CombinedConfiguration with a MergeCombiner 36 | this.config = new CombinedConfiguration(new MergeCombiner()); 37 | } 38 | 39 | // Load configuration from a properties file 40 | public void loadFromPropertiesFile(String filePath) throws ConfigurationException { 41 | Configurations configurations = new Configurations(); 42 | PropertiesConfiguration propertiesConfig = configurations.properties(new File(filePath)); 43 | config.addConfiguration(propertiesConfig); 44 | } 45 | 46 | // Load configuration from environment variables 47 | public void loadFromEnvironment() { 48 | SystemConfiguration systemConfig = new SystemConfiguration(); 49 | config.addConfiguration(systemConfig); 50 | } 51 | 52 | public void set(String key, String value) { 53 | config.setProperty(key, value); 54 | } 55 | 56 | // Get a configuration value 57 | public String get(String key) { 58 | return config.getString(key); 59 | } 60 | 61 | // Get a configuration value with a default 62 | public String getOrDefault(String key, String defaultValue) { 63 | return config.getString(key, defaultValue); 64 | } 65 | 66 | // List all configuration keys 67 | public void listAllKeys() { 68 | Iterator keys = config.getKeys(); 69 | while (keys.hasNext()) { 70 | String key = keys.next(); 71 | System.out.println(key + " = " + config.getString(key)); 72 | } 73 | } 74 | 75 | } 76 | -------------------------------------------------------------------------------- /.github/workflows/gradle-publish.yml: -------------------------------------------------------------------------------- 1 | name: Gradle Package 2 | 3 | on: 4 | workflow_dispatch: 5 | release: 6 | types: [published] 7 | 8 | jobs: 9 | build: 10 | 11 | runs-on: ubuntu-latest 12 | permissions: 13 | contents: read 14 | packages: write 15 | id-token: write 16 | 17 | steps: 18 | - uses: actions/checkout@v4 19 | - name: Set up JDK 17 20 | uses: actions/setup-java@v4 21 | with: 22 | java-version: '17' 23 | distribution: 'temurin' 24 | server-id: github 25 | settings-path: ${{ github.workspace }} 26 | 27 | - name: configure aws credentials 28 | uses: aws-actions/configure-aws-credentials@v2 29 | with: 30 | role-to-assume: ${{ secrets.GHA_ROLE_ARN }} 31 | aws-region: ${{ secrets.AWS_REGION }} 32 | role-session-name: github-actions-session 33 | 34 | - name: Setup Gradle 35 | uses: gradle/actions/setup-gradle@af1da67850ed9a4cedd57bfd976089dd991e2582 # v4.0.0 36 | 37 | - name: Create Gradle configuration 38 | run: | 39 | aws secretsmanager get-secret-value --region ${{ secrets.AWS_REGION }} --secret-id ${{ secrets.GPG_KEY_ID }} --query SecretString --output text | jq -r .passphrase > passphrase.txt 40 | aws secretsmanager get-secret-value --region ${{ secrets.AWS_REGION }} --secret-id ${{ secrets.GPG_KEY_ID }} --query SecretString --output text | jq -r .privateKey | gpg --import --batch --passphrase-file passphrase.txt 41 | gpg --batch --pinentry-mode=loopback --keyring secring.gpg --export-secret-keys --passphrase-file passphrase.txt > $HOME/.gnupg/secring.gpg 42 | echo "signingEnabled=true" > gradle.properties 43 | echo "mavenUsername=$(aws secretsmanager get-secret-value --region ${{ secrets.AWS_REGION }} --secret-id ${{ secrets.MAVEN_TOKEN_ID }} --query SecretString --output text | jq -r .username)" >> gradle.properties 44 | echo "mavenPassword=$(aws secretsmanager get-secret-value --region ${{ secrets.AWS_REGION }} --secret-id ${{ secrets.MAVEN_TOKEN_ID }} --query SecretString --output text | jq -r .password)" >> gradle.properties 45 | echo "# Signing key: https://keys.openpgp.org/vks/v1/by-fingerprint/47941890158FC24F3F3DABEAA94D8D549FA7DD2A" >> gradle.properties 46 | echo "signing.keyId=9FA7DD2A" >> gradle.properties 47 | echo "signing.password=$(aws secretsmanager get-secret-value --region ${{ secrets.AWS_REGION }} --secret-id ${{ secrets.GPG_KEY_ID }} --query SecretString --output text | jq -r .passphrase)" >> gradle.properties 48 | echo "signing.secretKeyRingFile=$HOME/.gnupg/secring.gpg" >> gradle.properties 49 | 50 | - name: Build with Gradle 51 | run: ./gradlew build 52 | 53 | - name: Publish to GitHub Packages 54 | run: ./gradlew publish 55 | 56 | - name: Notify Central Publisher Portal 57 | run: | 58 | curl -X POST \ 59 | "https://ossrh-staging-api.central.sonatype.com/manual/upload/defaultRepository/${{ vars.MAVEN_GROUP_ID }}" \ 60 | -H "Authorization: Bearer $(aws secretsmanager get-secret-value --region ${{ secrets.AWS_REGION }} --secret-id ${{ secrets.MAVEN_TOKEN_ID }} --query SecretString --output text | jq -r .bearer)" \ 61 | -H "Content-Type: application/json" \ 62 | --fail 63 | -------------------------------------------------------------------------------- /gradlew.bat: -------------------------------------------------------------------------------- 1 | @rem 2 | @rem Copyright 2015 the original author or authors. 3 | @rem 4 | @rem Licensed under the Apache License, Version 2.0 (the "License"); 5 | @rem you may not use this file except in compliance with the License. 6 | @rem You may obtain a copy of the License at 7 | @rem 8 | @rem https://www.apache.org/licenses/LICENSE-2.0 9 | @rem 10 | @rem Unless required by applicable law or agreed to in writing, software 11 | @rem distributed under the License is distributed on an "AS IS" BASIS, 12 | @rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | @rem See the License for the specific language governing permissions and 14 | @rem limitations under the License. 15 | @rem 16 | @rem SPDX-License-Identifier: Apache-2.0 17 | @rem 18 | 19 | @if "%DEBUG%"=="" @echo off 20 | @rem ########################################################################## 21 | @rem 22 | @rem Gradle startup script for Windows 23 | @rem 24 | @rem ########################################################################## 25 | 26 | @rem Set local scope for the variables with windows NT shell 27 | if "%OS%"=="Windows_NT" setlocal 28 | 29 | set DIRNAME=%~dp0 30 | if "%DIRNAME%"=="" set DIRNAME=. 31 | @rem This is normally unused 32 | set APP_BASE_NAME=%~n0 33 | set APP_HOME=%DIRNAME% 34 | 35 | @rem Resolve any "." and ".." in APP_HOME to make it shorter. 36 | for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi 37 | 38 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 39 | set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" 40 | 41 | @rem Find java.exe 42 | if defined JAVA_HOME goto findJavaFromJavaHome 43 | 44 | set JAVA_EXE=java.exe 45 | %JAVA_EXE% -version >NUL 2>&1 46 | if %ERRORLEVEL% equ 0 goto execute 47 | 48 | echo. 1>&2 49 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 1>&2 50 | echo. 1>&2 51 | echo Please set the JAVA_HOME variable in your environment to match the 1>&2 52 | echo location of your Java installation. 1>&2 53 | 54 | goto fail 55 | 56 | :findJavaFromJavaHome 57 | set JAVA_HOME=%JAVA_HOME:"=% 58 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 59 | 60 | if exist "%JAVA_EXE%" goto execute 61 | 62 | echo. 1>&2 63 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 1>&2 64 | echo. 1>&2 65 | echo Please set the JAVA_HOME variable in your environment to match the 1>&2 66 | echo location of your Java installation. 1>&2 67 | 68 | goto fail 69 | 70 | :execute 71 | @rem Setup the command line 72 | 73 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 74 | 75 | 76 | @rem Execute Gradle 77 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* 78 | 79 | :end 80 | @rem End local scope for the variables with windows NT shell 81 | if %ERRORLEVEL% equ 0 goto mainEnd 82 | 83 | :fail 84 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 85 | rem the _cmd.exe /c_ return code! 86 | set EXIT_CODE=%ERRORLEVEL% 87 | if %EXIT_CODE% equ 0 set EXIT_CODE=1 88 | if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE% 89 | exit /b %EXIT_CODE% 90 | 91 | :mainEnd 92 | if "%OS%"=="Windows_NT" endlocal 93 | 94 | :omega 95 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *main* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | -------------------------------------------------------------------------------- /src/software/amazon/s3tables/iceberg/S3TablesAwsClientFactories.java: -------------------------------------------------------------------------------- 1 | package software.amazon.s3tables.iceberg; 2 | 3 | import org.apache.iceberg.common.DynConstructors; 4 | import org.apache.iceberg.util.PropertyUtil; 5 | import software.amazon.awssdk.services.s3tables.S3TablesClient; 6 | import software.amazon.s3tables.iceberg.imports.AwsClientProperties; 7 | import software.amazon.s3tables.iceberg.imports.HttpClientProperties; 8 | 9 | import java.util.Map; 10 | 11 | public class S3TablesAwsClientFactories { 12 | 13 | private S3TablesAwsClientFactories() { 14 | } 15 | 16 | public static S3TablesAwsClientFactory from(Map properties) { 17 | String factoryImpl = PropertyUtil.propertyAsString(properties, S3TablesProperties.CLIENT_FACTORY, DefaultS3TablesAwsClientFactory.class.getName()); 18 | return loadClientFactory(factoryImpl, properties); 19 | } 20 | 21 | private static S3TablesAwsClientFactory loadClientFactory(String impl, Map properties) { 22 | DynConstructors.Ctor ctor; 23 | try { 24 | ctor = DynConstructors.builder(S3TablesAwsClientFactory.class).loader(S3TablesAwsClientFactories.class.getClassLoader()).hiddenImpl(impl).buildChecked(); 25 | } catch (NoSuchMethodException e) { 26 | throw new IllegalArgumentException(String.format("Cannot initialize S3TablesAwsClientFactory, missing no-arg constructor: %s", impl), e); 27 | } 28 | 29 | S3TablesAwsClientFactory factory; 30 | try { 31 | factory = ctor.newInstance(); 32 | } catch (ClassCastException e) { 33 | throw new IllegalArgumentException(String.format("Cannot initialize S3TablesAwsClientFactory, %s does not implement S3TablesAwsClientFactory.", impl), e); 34 | } 35 | 36 | factory.initialize(properties); 37 | return factory; 38 | } 39 | 40 | public static class DefaultS3TablesAwsClientFactory implements S3TablesAwsClientFactory { 41 | protected S3TablesProperties s3TablesProperties; 42 | protected AwsClientProperties awsClientProperties; 43 | protected HttpClientProperties httpClientProperties; 44 | 45 | public DefaultS3TablesAwsClientFactory() { 46 | s3TablesProperties = new S3TablesProperties(); 47 | awsClientProperties = new AwsClientProperties(); 48 | httpClientProperties = new HttpClientProperties(); 49 | } 50 | 51 | @Override 52 | public void initialize(Map properties) { 53 | this.s3TablesProperties = new S3TablesProperties(properties); 54 | this.awsClientProperties = new AwsClientProperties(properties); 55 | this.httpClientProperties = new HttpClientProperties(properties); 56 | } 57 | 58 | @Override 59 | public S3TablesClient s3tables() { 60 | return S3TablesClient.builder() 61 | .applyMutation(awsClientProperties::applyClientRegionConfiguration) 62 | .applyMutation(httpClientProperties::applyHttpClientConfigurations) 63 | .applyMutation(s3TablesProperties::applyUserAgentConfigurations) 64 | .applyMutation(s3TablesProperties::applyS3TableEndpointConfigurations) 65 | .applyMutation(awsClientProperties::applyClientCredentialConfigurations) 66 | .build(); 67 | } 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /src/software/amazon/s3tables/iceberg/imports/UrlConnectionHttpClientConfigurations.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | package software.amazon.s3tables.iceberg.imports; 20 | 21 | import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; 22 | import org.apache.iceberg.util.PropertyUtil; 23 | import software.amazon.awssdk.awscore.client.builder.AwsSyncClientBuilder; 24 | import software.amazon.awssdk.http.urlconnection.ProxyConfiguration; 25 | import software.amazon.awssdk.http.urlconnection.UrlConnectionHttpClient; 26 | 27 | import java.net.URI; 28 | import java.time.Duration; 29 | import java.util.Map; 30 | 31 | class UrlConnectionHttpClientConfigurations { 32 | 33 | private Long httpClientUrlConnectionConnectionTimeoutMs; 34 | private Long httpClientUrlConnectionSocketTimeoutMs; 35 | private String proxyEndpoint; 36 | 37 | private UrlConnectionHttpClientConfigurations() {} 38 | 39 | public void configureHttpClientBuilder(T awsClientBuilder) { 40 | UrlConnectionHttpClient.Builder urlConnectionHttpClientBuilder = 41 | UrlConnectionHttpClient.builder(); 42 | configureUrlConnectionHttpClientBuilder(urlConnectionHttpClientBuilder); 43 | awsClientBuilder.httpClientBuilder(urlConnectionHttpClientBuilder); 44 | } 45 | 46 | private void initialize(Map httpClientProperties) { 47 | this.httpClientUrlConnectionConnectionTimeoutMs = 48 | PropertyUtil.propertyAsNullableLong( 49 | httpClientProperties, HttpClientProperties.URLCONNECTION_CONNECTION_TIMEOUT_MS); 50 | this.httpClientUrlConnectionSocketTimeoutMs = 51 | PropertyUtil.propertyAsNullableLong( 52 | httpClientProperties, HttpClientProperties.URLCONNECTION_SOCKET_TIMEOUT_MS); 53 | this.proxyEndpoint = 54 | PropertyUtil.propertyAsString( 55 | httpClientProperties, HttpClientProperties.PROXY_ENDPOINT, null); 56 | } 57 | 58 | @VisibleForTesting 59 | void configureUrlConnectionHttpClientBuilder( 60 | UrlConnectionHttpClient.Builder urlConnectionHttpClientBuilder) { 61 | if (httpClientUrlConnectionConnectionTimeoutMs != null) { 62 | urlConnectionHttpClientBuilder.connectionTimeout( 63 | Duration.ofMillis(httpClientUrlConnectionConnectionTimeoutMs)); 64 | } 65 | if (httpClientUrlConnectionSocketTimeoutMs != null) { 66 | urlConnectionHttpClientBuilder.socketTimeout( 67 | Duration.ofMillis(httpClientUrlConnectionSocketTimeoutMs)); 68 | } 69 | if (proxyEndpoint != null) { 70 | urlConnectionHttpClientBuilder.proxyConfiguration( 71 | ProxyConfiguration.builder().endpoint(URI.create(proxyEndpoint)).build()); 72 | } 73 | } 74 | 75 | public static UrlConnectionHttpClientConfigurations create( 76 | Map httpClientProperties) { 77 | UrlConnectionHttpClientConfigurations configurations = 78 | new UrlConnectionHttpClientConfigurations(); 79 | configurations.initialize(httpClientProperties); 80 | return configurations; 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /src/software/amazon/s3tables/iceberg/S3TablesAssumeRoleAwsClientFactory.java: -------------------------------------------------------------------------------- 1 | package software.amazon.s3tables.iceberg; 2 | 3 | import org.apache.iceberg.relocated.com.google.common.base.Preconditions; 4 | import software.amazon.awssdk.awscore.client.builder.AwsClientBuilder; 5 | import software.amazon.awssdk.awscore.client.builder.AwsSyncClientBuilder; 6 | import software.amazon.awssdk.regions.Region; 7 | import software.amazon.awssdk.services.s3tables.S3TablesClient; 8 | import software.amazon.awssdk.services.sts.StsClient; 9 | import software.amazon.awssdk.services.sts.auth.StsAssumeRoleCredentialsProvider; 10 | import software.amazon.awssdk.services.sts.model.AssumeRoleRequest; 11 | import software.amazon.s3tables.iceberg.imports.AwsClientProperties; 12 | import software.amazon.s3tables.iceberg.imports.HttpClientProperties; 13 | 14 | import java.util.Map; 15 | import java.util.UUID; 16 | 17 | public class S3TablesAssumeRoleAwsClientFactory implements S3TablesAwsClientFactory { 18 | protected S3TablesProperties s3TablesProperties; 19 | protected AwsClientProperties awsClientProperties; 20 | protected HttpClientProperties httpClientProperties; 21 | private String roleSessionName; 22 | 23 | public S3TablesAssumeRoleAwsClientFactory() { 24 | s3TablesProperties = new S3TablesProperties(); 25 | awsClientProperties = new AwsClientProperties(); 26 | httpClientProperties = new HttpClientProperties(); 27 | } 28 | 29 | @Override 30 | public void initialize(Map properties) { 31 | s3TablesProperties = new S3TablesProperties(properties); 32 | this.httpClientProperties = new HttpClientProperties(properties); 33 | awsClientProperties = new AwsClientProperties(properties); 34 | this.roleSessionName = genSessionName(); 35 | Preconditions.checkNotNull( 36 | awsClientProperties.clientAssumeRoleArn(), 37 | "Cannot initialize AssumeRoleClientConfigFactory with null role ARN"); 38 | Preconditions.checkNotNull( 39 | awsClientProperties.clientAssumeRoleRegion(), 40 | "Cannot initialize AssumeRoleClientConfigFactory with null region"); 41 | } 42 | 43 | @Override 44 | public S3TablesClient s3tables() { 45 | return S3TablesClient.builder() 46 | .applyMutation(httpClientProperties::applyHttpClientConfigurations) 47 | .applyMutation(s3TablesProperties::applyS3TableEndpointConfigurations) 48 | .applyMutation(this::applyAssumeRoleConfigurations) 49 | .build(); 50 | } 51 | 52 | private String genSessionName() { 53 | return String.format("s3tables-aws-%s", UUID.randomUUID()); 54 | } 55 | 56 | protected T applyAssumeRoleConfigurations( 57 | T clientBuilder) { 58 | AssumeRoleRequest assumeRoleRequest = 59 | AssumeRoleRequest.builder() 60 | .roleArn(awsClientProperties.clientAssumeRoleArn()) 61 | .roleSessionName(roleSessionName) 62 | .durationSeconds(awsClientProperties.clientAssumeRoleTimeoutSec()) 63 | .externalId(awsClientProperties.clientAssumeRoleExternalId()) 64 | .tags(awsClientProperties.stsClientAssumeRoleTags()) 65 | .build(); 66 | clientBuilder 67 | .credentialsProvider( 68 | StsAssumeRoleCredentialsProvider.builder() 69 | .stsClient(sts()) 70 | .refreshRequest(assumeRoleRequest) 71 | .build()) 72 | .region(Region.of(awsClientProperties.clientAssumeRoleRegion())); 73 | return clientBuilder; 74 | } 75 | 76 | private StsClient sts() { 77 | return StsClient.builder() 78 | .applyMutation(httpClientProperties::applyHttpClientConfigurations) 79 | .build(); 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /src/software/amazon/s3tables/iceberg/S3TablesLocationProvider.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | package software.amazon.s3tables.iceberg; 20 | 21 | import java.nio.charset.StandardCharsets; 22 | import java.util.Map; 23 | import org.apache.iceberg.PartitionSpec; 24 | import org.apache.iceberg.StructLike; 25 | import org.apache.iceberg.TableProperties; 26 | import org.apache.iceberg.io.LocationProvider; 27 | import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; 28 | import org.apache.iceberg.relocated.com.google.common.hash.HashCode; 29 | import org.apache.iceberg.relocated.com.google.common.hash.HashFunction; 30 | import org.apache.iceberg.relocated.com.google.common.hash.Hashing; 31 | import org.apache.iceberg.util.LocationUtil; 32 | 33 | /** 34 | * This location provider provides data locations that are optimized for S3 performance. Both 35 | * General Purpose buckets and Directory buckets will see better throughput and autoscaling behavior 36 | * than using the generic ObjectStoreLocationProvider. 37 | * 38 | *

The data location is resolved as follows. Data files are written directly at this path with no 39 | * other intermediate directories created. 40 | * 41 | *

    42 | *
  1. {@link TableProperties#WRITE_DATA_LOCATION} 43 | *
  2. tableLocation + "/data" 44 | *
45 | * 46 | * The data file is placed immediately under the data location. Partition names are not 47 | * included. The data filename is prefixed with a 24-character binary hash, which ensures that files 48 | * written to S3 are equally distributed across many prefixes in the S3 bucket. 49 | * 50 | *

For example, with tableLocation s3://my-bucket/my-table, an example data file 51 | * could look like 52 | * s3://my-bucket/my-table/data/011101101010001111101000-00000-0-5affc076-96a4-48f2-9cd2-d5efbc9f0c94-00001.parquet 53 | * . 54 | */ 55 | public class S3TablesLocationProvider implements LocationProvider { 56 | private static final HashFunction HASH_FUNC = Hashing.murmur3_32_fixed(); 57 | // the starting index of the lower 24-bits of a 32-bit binary string 58 | private static final int HASH_BINARY_STRING_START_INDEX = 8; 59 | private final String storageLocation; 60 | 61 | public S3TablesLocationProvider(String tableLocation, Map properties) { 62 | this.storageLocation = LocationUtil.stripTrailingSlash(dataLocation(properties, tableLocation)); 63 | } 64 | 65 | @Override 66 | public String newDataLocation(PartitionSpec spec, StructLike partitionData, String filename) { 67 | return newDataLocation(filename); 68 | } 69 | 70 | @Override 71 | public String newDataLocation(String filename) { 72 | String hash = computeHash(filename); 73 | return String.format("%s/%s-%s", storageLocation, hash, filename); 74 | } 75 | 76 | private static String dataLocation(Map properties, String tableLocation) { 77 | String dataLocation = properties.get(TableProperties.WRITE_DATA_LOCATION); 78 | if (dataLocation == null) { 79 | dataLocation = String.format("%s/data", tableLocation); 80 | } 81 | return dataLocation; 82 | } 83 | 84 | @VisibleForTesting 85 | String computeHash(String fileName) { 86 | HashCode hashCode = HASH_FUNC.hashString(fileName, StandardCharsets.UTF_8); 87 | int hash = hashCode.asInt(); 88 | 89 | // {@link Integer#toBinaryString} excludes leading zeros, which we want to preserve. 90 | // force the first bit to be set to get around that. 91 | String hashAsBinaryString = Integer.toBinaryString(hash | Integer.MIN_VALUE); 92 | return hashAsBinaryString.substring(HASH_BINARY_STRING_START_INDEX); 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /runtime/build.gradle: -------------------------------------------------------------------------------- 1 | apply plugin: 'java' 2 | apply plugin: 'com.gradleup.shadow' 3 | apply plugin: 'maven-publish' 4 | apply plugin: 'signing' 5 | 6 | repositories { 7 | mavenCentral() 8 | } 9 | 10 | group = 'software.amazon.s3tables' 11 | version = '0.1.8' 12 | 13 | dependencies { 14 | implementation project(':') 15 | } 16 | 17 | java { 18 | toolchain { 19 | languageVersion = JavaLanguageVersion.of(8) 20 | } 21 | withSourcesJar() 22 | withJavadocJar() 23 | } 24 | 25 | shadowJar { 26 | // exclude dependencies that are already in Iceberg engine runtimes 27 | dependencies { 28 | exclude(dependency('com.github.ben-manes.caffeine:caffeine:.*')) 29 | exclude(dependency('org.apache.iceberg:.*')) 30 | exclude(dependency('com.fasterxml.jackson.core:.*')) 31 | exclude(dependency('com.google.errorprone:.*')) 32 | exclude(dependency('io.airlift:.*')) 33 | exclude(dependency('io.netty:.*')) 34 | exclude(dependency('org.apache.avro:.*')) 35 | exclude(dependency('org.apache.httpcomponents.client5:.*')) 36 | exclude(dependency('org.apache.httpcomponents.core5:.*')) 37 | exclude(dependency('org.checkerframework:.*')) 38 | exclude(dependency('org.roaringbitmap:.*')) 39 | exclude(dependency('org.slf4j:.*')) 40 | } 41 | 42 | // relocate project specific dependencies 43 | relocate 'software.amazon.awssdk', 'software.amazon.s3tables.shaded.awssdk' 44 | relocate 'org.apache.commons', 'software.amazon.s3tables.shaded.org.apache.commons' 45 | relocate 'io.netty', 'software.amazon.s3tables.shaded.io.netty' 46 | relocate 'org.apache.http', 'software.amazon.s3tables.shaded.org.apache.http' 47 | 48 | // relocate all dependencies that Iceberg engine runtimes already relocates 49 | relocate 'com.google.errorprone', 'org.apache.iceberg.shaded.com.google.errorprone' 50 | relocate 'com.google.flatbuffers', 'org.apache.iceberg.shaded.com.google.flatbuffers' 51 | relocate 'com.fasterxml', 'org.apache.iceberg.shaded.com.fasterxml' 52 | relocate 'com.github.benmanes', 'org.apache.iceberg.shaded.com.github.benmanes' 53 | relocate 'org.checkerframework', 'org.apache.iceberg.shaded.org.checkerframework' 54 | relocate 'org.apache.avro', 'org.apache.iceberg.shaded.org.apache.avro' 55 | relocate 'avro.shaded', 'org.apache.iceberg.shaded.org.apache.avro.shaded' 56 | relocate 'com.thoughtworks.paranamer', 'org.apache.iceberg.shaded.com.thoughtworks.paranamer' 57 | relocate 'org.apache.parquet', 'org.apache.iceberg.shaded.org.apache.parquet' 58 | relocate 'shaded.parquet', 'org.apache.iceberg.shaded.org.apache.parquet.shaded' 59 | relocate 'org.apache.orc', 'org.apache.iceberg.shaded.org.apache.orc' 60 | relocate 'io.airlift', 'org.apache.iceberg.shaded.io.airlift' 61 | relocate 'org.apache.hc.client5', 'org.apache.iceberg.shaded.org.apache.hc.client5' 62 | relocate 'org.apache.hc.core5', 'org.apache.iceberg.shaded.org.apache.hc.core5' 63 | relocate 'org.apache.arrow', 'org.apache.iceberg.shaded.org.apache.arrow' 64 | relocate 'com.carrotsearch', 'org.apache.iceberg.shaded.com.carrotsearch' 65 | relocate 'org.threeten.extra', 'org.apache.iceberg.shaded.org.threeten.extra' 66 | relocate 'org.roaringbitmap', 'org.apache.iceberg.shaded.org.roaringbitmap' 67 | 68 | archiveClassifier.set(null) 69 | } 70 | 71 | shadowJar.finalizedBy javadocJar 72 | shadowJar.finalizedBy sourcesJar 73 | 74 | publishing { 75 | publications { 76 | maven(MavenPublication) { 77 | from components.shadow 78 | artifact sourcesJar 79 | artifact javadocJar 80 | pom { 81 | name = 'Amazon S3 Tables Catalog for Apache Iceberg Runtime Jar' 82 | description = 'Amazon S3 Tables Catalog for Apache Iceberg Runtime Jar.' 83 | url = 'https://github.com/awslabs/s3-tables-catalog' 84 | inceptionYear = '2024' 85 | scm{ 86 | url = "https://github.com/awslabs/s3-tables-catalog/tree/main" 87 | connection = "scm:git:ssh://git@github.com:awslabs/s3-tables-catalog.git" 88 | developerConnection = "scm:git:ssh://git@github.com:awslabs/s3-tables-catalog.git" 89 | } 90 | 91 | licenses { 92 | license { 93 | name = 'The Apache License, Version 2.0' 94 | url = 'https://www.apache.org/licenses/LICENSE-2.0.txt' 95 | distribution = 'repo' 96 | } 97 | } 98 | 99 | developers { 100 | developer { 101 | organization = "Amazon Web Services" 102 | organizationUrl = "https://aws.amazon.com" 103 | } 104 | } 105 | } 106 | } 107 | } 108 | repositories { 109 | maven { 110 | url = 'https://ossrh-staging-api.central.sonatype.com/service/local/staging/deploy/maven2/' 111 | credentials(PasswordCredentials) 112 | } 113 | } 114 | } 115 | 116 | signing { 117 | sign publishing.publications.maven 118 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Amazon S3 Tables Catalog for Apache Iceberg 2 | 3 | 4 | 5 | The Amazon S3 Tables Catalog for Apache Iceberg is an open-source library that bridges [S3 Tables](https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-tables.html) operations to engines like [Apache Spark](https://spark.apache.org/), when used with the [Apache Iceberg](https://iceberg.apache.org/) Open Table Format. 6 | 7 | This library can: 8 | * Translate [Apache Iceberg](https://iceberg.apache.org/) operations such as table discovery, metadata reads, and updates 9 | * Add and removes tables in Amazon S3 Tables 10 | 11 | 12 | 13 | ### What are Amazon S3 Tables and table buckets ? 14 | 15 | Amazon S3 Tables are built for storing tabular data, such as daily purchase transactions, streaming sensor data, or ad impressions. Tabular data represents data in columns and rows, like in a database table. Tabular data is most commonly stored in the [Apache Parquet](https://parquet.apache.org/) format. 16 | 17 | The tabular data in Amazon S3 Tables is stored in a new S3 bucket type: a **table bucket**, which stores tables as subresources. S3 Tables has built-in support for tables in the [Apache Iceberg](https://iceberg.apache.org/) format. Using standard SQL statements, you can query your tables with query engines that support Apache Iceberg, such as [Amazon Athena](https://aws.amazon.com/athena/), [Amazon Redshift](https://aws.amazon.com/pm/redshift/), and [Apache Spark](https://spark.apache.org/). 18 | 19 | ## Current Status 20 | 21 | Amazon S3 Tables Catalog for Apache Iceberg is generally available. We're always interested in feedback on features, performance, and compatibility. Please send feedback by opening a [GitHub issue](https://github.com/awslabs/s3-tables-catalog/issues/new/). 22 | 23 | If you discover a potential security issue in this project we ask that you notify Amazon Web Services (AWS) Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do not create a public GitHub issue. 24 | 25 | ## Getting Started 26 | 27 | To get started with Amazon S3 Tables, see [Tutorial: Getting started with S3 Tables](https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-tables-getting-started.html) in the *Amazon S3 User Guide*. 28 | 29 | ### Configuration 30 | 31 | - is your Iceberg Spark session catalog name. Replace it with the name of 32 | your catalog, and remember to change the references throughout all configurations that 33 | are associated with this catalog. In your code, you should then refer to your Iceberg tables 34 | with the fully qualified table name, including the Spark session catalog name, as follows: 35 | ... 36 | 37 | - .warehouse points to the Amazon S3 Tables path 38 | - .catalog-impl = "software.amazon.s3tables.iceberg.S3TablesCatalog" This key is required to point to an 39 | implementation class for any custom catalog implementation. 40 | 41 | ### Java Spark app Example 42 | 43 | Add the lines below to your pom.xml: 44 | ``` 45 | 46 | software.amazon.awssdk 47 | s3tables 48 | 2.29.26 49 | 50 | 51 | software.amazon.s3tables 52 | s3-tables-catalog-for-iceberg 53 | 0.1.8 54 | 55 | ``` 56 | Or if you using a [BOM](https://aws.amazon.com/blogs/developer/managing-dependencies-with-aws-sdk-for-java-bill-of-materials-module-bom/) just add a dependency on the s3 tables sdk: 57 | ``` 58 | 59 | 60 | 61 | software.amazon.awssdk 62 | bom 63 | 2.29.26 64 | pom 65 | import 66 | 67 | 68 | 69 | ``` 70 | 71 | Or for Gradle: 72 | 73 | ``` 74 | dependencies { 75 | implementation 'software.amazon.awssdk:s3tables:2.29.26' 76 | implementation 'software.amazon.s3tables:s3-tables-catalog-for-iceberg:0.1.8' 77 | } 78 | ``` 79 | 80 | 81 | 82 | And finally start a spark session: 83 | 84 | ``` 85 | spark = SparkSession.builder() 86 | .config("spark.sql.catalog.", "org.apache.iceberg.spark.SparkCatalog") 87 | .config("spark.sql.catalog..catalog-impl","software.amazon.s3tables.iceberg.S3TablesCatalog") 88 | .config("spark.sql.catalog..warehouse", ) 89 | .getOrCreate(); 90 | ``` 91 | 92 | ## Contributions 93 | 94 | We welcome contributions to Amazon S3 Tables Catalog for Apache Iceberg! Please see the [contributing guidelines](CONTRIBUTING.md) for more information on how to report bugs, build from source code, or submit pull requests. 95 | 96 | ## Security 97 | 98 | If you discover a potential security issue in this project we ask that you notify Amazon Web Services (AWS) Security via our [vulnerability reporting](http://aws.amazon.com/security/vulnerability-reporting/) page. Please do not create a public GitHub issue. 99 | 100 | ## License 101 | 102 | This project is licensed under the [Apache-2.0 License](LICENSE). 103 | -------------------------------------------------------------------------------- /src/software/amazon/s3tables/iceberg/imports/ApacheHttpClientConfigurations.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | package software.amazon.s3tables.iceberg.imports; 20 | 21 | import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; 22 | import org.apache.iceberg.util.PropertyUtil; 23 | import software.amazon.awssdk.awscore.client.builder.AwsSyncClientBuilder; 24 | import software.amazon.awssdk.http.apache.ApacheHttpClient; 25 | import software.amazon.awssdk.http.apache.ProxyConfiguration; 26 | 27 | import java.net.URI; 28 | import java.time.Duration; 29 | import java.util.Map; 30 | 31 | class ApacheHttpClientConfigurations { 32 | private Long connectionTimeoutMs; 33 | private Long socketTimeoutMs; 34 | private Long acquisitionTimeoutMs; 35 | private Long connectionMaxIdleTimeMs; 36 | private Long connectionTimeToLiveMs; 37 | private Boolean expectContinueEnabled; 38 | private Integer maxConnections; 39 | private Boolean tcpKeepAliveEnabled; 40 | private Boolean useIdleConnectionReaperEnabled; 41 | private String proxyEndpoint; 42 | 43 | private ApacheHttpClientConfigurations() {} 44 | 45 | public void configureHttpClientBuilder(T awsClientBuilder) { 46 | ApacheHttpClient.Builder apacheHttpClientBuilder = ApacheHttpClient.builder(); 47 | configureApacheHttpClientBuilder(apacheHttpClientBuilder); 48 | awsClientBuilder.httpClientBuilder(apacheHttpClientBuilder); 49 | } 50 | 51 | private void initialize(Map httpClientProperties) { 52 | this.connectionTimeoutMs = 53 | PropertyUtil.propertyAsNullableLong( 54 | httpClientProperties, HttpClientProperties.APACHE_CONNECTION_TIMEOUT_MS); 55 | this.socketTimeoutMs = 56 | PropertyUtil.propertyAsNullableLong( 57 | httpClientProperties, HttpClientProperties.APACHE_SOCKET_TIMEOUT_MS); 58 | this.acquisitionTimeoutMs = 59 | PropertyUtil.propertyAsNullableLong( 60 | httpClientProperties, HttpClientProperties.APACHE_CONNECTION_ACQUISITION_TIMEOUT_MS); 61 | this.connectionMaxIdleTimeMs = 62 | PropertyUtil.propertyAsNullableLong( 63 | httpClientProperties, HttpClientProperties.APACHE_CONNECTION_MAX_IDLE_TIME_MS); 64 | this.connectionTimeToLiveMs = 65 | PropertyUtil.propertyAsNullableLong( 66 | httpClientProperties, HttpClientProperties.APACHE_CONNECTION_TIME_TO_LIVE_MS); 67 | this.expectContinueEnabled = 68 | PropertyUtil.propertyAsNullableBoolean( 69 | httpClientProperties, HttpClientProperties.APACHE_EXPECT_CONTINUE_ENABLED); 70 | this.maxConnections = 71 | PropertyUtil.propertyAsNullableInt( 72 | httpClientProperties, HttpClientProperties.APACHE_MAX_CONNECTIONS); 73 | this.tcpKeepAliveEnabled = 74 | PropertyUtil.propertyAsNullableBoolean( 75 | httpClientProperties, HttpClientProperties.APACHE_TCP_KEEP_ALIVE_ENABLED); 76 | this.useIdleConnectionReaperEnabled = 77 | PropertyUtil.propertyAsNullableBoolean( 78 | httpClientProperties, HttpClientProperties.APACHE_USE_IDLE_CONNECTION_REAPER_ENABLED); 79 | this.proxyEndpoint = 80 | PropertyUtil.propertyAsString( 81 | httpClientProperties, HttpClientProperties.PROXY_ENDPOINT, null); 82 | } 83 | 84 | @VisibleForTesting 85 | void configureApacheHttpClientBuilder(ApacheHttpClient.Builder apacheHttpClientBuilder) { 86 | if (connectionTimeoutMs != null) { 87 | apacheHttpClientBuilder.connectionTimeout(Duration.ofMillis(connectionTimeoutMs)); 88 | } 89 | if (socketTimeoutMs != null) { 90 | apacheHttpClientBuilder.socketTimeout(Duration.ofMillis(socketTimeoutMs)); 91 | } 92 | if (acquisitionTimeoutMs != null) { 93 | apacheHttpClientBuilder.connectionAcquisitionTimeout(Duration.ofMillis(acquisitionTimeoutMs)); 94 | } 95 | if (connectionMaxIdleTimeMs != null) { 96 | apacheHttpClientBuilder.connectionMaxIdleTime(Duration.ofMillis(connectionMaxIdleTimeMs)); 97 | } 98 | if (connectionTimeToLiveMs != null) { 99 | apacheHttpClientBuilder.connectionTimeToLive(Duration.ofMillis(connectionTimeToLiveMs)); 100 | } 101 | if (expectContinueEnabled != null) { 102 | apacheHttpClientBuilder.expectContinueEnabled(expectContinueEnabled); 103 | } 104 | if (maxConnections != null) { 105 | apacheHttpClientBuilder.maxConnections(maxConnections); 106 | } 107 | if (tcpKeepAliveEnabled != null) { 108 | apacheHttpClientBuilder.tcpKeepAlive(tcpKeepAliveEnabled); 109 | } 110 | if (useIdleConnectionReaperEnabled != null) { 111 | apacheHttpClientBuilder.useIdleConnectionReaper(useIdleConnectionReaperEnabled); 112 | } 113 | if (proxyEndpoint != null) { 114 | apacheHttpClientBuilder.proxyConfiguration( 115 | ProxyConfiguration.builder().endpoint(URI.create(proxyEndpoint)).build()); 116 | } 117 | } 118 | 119 | public static ApacheHttpClientConfigurations create(Map properties) { 120 | ApacheHttpClientConfigurations configurations = new ApacheHttpClientConfigurations(); 121 | configurations.initialize(properties); 122 | return configurations; 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /gradlew: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # 4 | # Copyright © 2015-2021 the original authors. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # https://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | # SPDX-License-Identifier: Apache-2.0 19 | # 20 | 21 | ############################################################################## 22 | # 23 | # Gradle start up script for POSIX generated by Gradle. 24 | # 25 | # Important for running: 26 | # 27 | # (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is 28 | # noncompliant, but you have some other compliant shell such as ksh or 29 | # bash, then to run this script, type that shell name before the whole 30 | # command line, like: 31 | # 32 | # ksh Gradle 33 | # 34 | # Busybox and similar reduced shells will NOT work, because this script 35 | # requires all of these POSIX shell features: 36 | # * functions; 37 | # * expansions «$var», «${var}», «${var:-default}», «${var+SET}», 38 | # «${var#prefix}», «${var%suffix}», and «$( cmd )»; 39 | # * compound commands having a testable exit status, especially «case»; 40 | # * various built-in commands including «command», «set», and «ulimit». 41 | # 42 | # Important for patching: 43 | # 44 | # (2) This script targets any POSIX shell, so it avoids extensions provided 45 | # by Bash, Ksh, etc; in particular arrays are avoided. 46 | # 47 | # The "traditional" practice of packing multiple parameters into a 48 | # space-separated string is a well documented source of bugs and security 49 | # problems, so this is (mostly) avoided, by progressively accumulating 50 | # options in "$@", and eventually passing that to Java. 51 | # 52 | # Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS, 53 | # and GRADLE_OPTS) rely on word-splitting, this is performed explicitly; 54 | # see the in-line comments for details. 55 | # 56 | # There are tweaks for specific operating systems such as AIX, CygWin, 57 | # Darwin, MinGW, and NonStop. 58 | # 59 | # (3) This script is generated from the Groovy template 60 | # https://github.com/gradle/gradle/blob/HEAD/platforms/jvm/plugins-application/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt 61 | # within the Gradle project. 62 | # 63 | # You can find Gradle at https://github.com/gradle/gradle/. 64 | # 65 | ############################################################################## 66 | 67 | # Attempt to set APP_HOME 68 | 69 | # Resolve links: $0 may be a link 70 | app_path=$0 71 | 72 | # Need this for daisy-chained symlinks. 73 | while 74 | APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path 75 | [ -h "$app_path" ] 76 | do 77 | ls=$( ls -ld "$app_path" ) 78 | link=${ls#*' -> '} 79 | case $link in #( 80 | /*) app_path=$link ;; #( 81 | *) app_path=$APP_HOME$link ;; 82 | esac 83 | done 84 | 85 | # This is normally unused 86 | # shellcheck disable=SC2034 87 | APP_BASE_NAME=${0##*/} 88 | # Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036) 89 | APP_HOME=$( cd -P "${APP_HOME:-./}" > /dev/null && printf '%s 90 | ' "$PWD" ) || exit 91 | 92 | # Use the maximum available, or set MAX_FD != -1 to use that value. 93 | MAX_FD=maximum 94 | 95 | warn () { 96 | echo "$*" 97 | } >&2 98 | 99 | die () { 100 | echo 101 | echo "$*" 102 | echo 103 | exit 1 104 | } >&2 105 | 106 | # OS specific support (must be 'true' or 'false'). 107 | cygwin=false 108 | msys=false 109 | darwin=false 110 | nonstop=false 111 | case "$( uname )" in #( 112 | CYGWIN* ) cygwin=true ;; #( 113 | Darwin* ) darwin=true ;; #( 114 | MSYS* | MINGW* ) msys=true ;; #( 115 | NONSTOP* ) nonstop=true ;; 116 | esac 117 | 118 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar 119 | 120 | 121 | # Determine the Java command to use to start the JVM. 122 | if [ -n "$JAVA_HOME" ] ; then 123 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 124 | # IBM's JDK on AIX uses strange locations for the executables 125 | JAVACMD=$JAVA_HOME/jre/sh/java 126 | else 127 | JAVACMD=$JAVA_HOME/bin/java 128 | fi 129 | if [ ! -x "$JAVACMD" ] ; then 130 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME 131 | 132 | Please set the JAVA_HOME variable in your environment to match the 133 | location of your Java installation." 134 | fi 135 | else 136 | JAVACMD=java 137 | if ! command -v java >/dev/null 2>&1 138 | then 139 | die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 140 | 141 | Please set the JAVA_HOME variable in your environment to match the 142 | location of your Java installation." 143 | fi 144 | fi 145 | 146 | # Increase the maximum file descriptors if we can. 147 | if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then 148 | case $MAX_FD in #( 149 | max*) 150 | # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked. 151 | # shellcheck disable=SC2039,SC3045 152 | MAX_FD=$( ulimit -H -n ) || 153 | warn "Could not query maximum file descriptor limit" 154 | esac 155 | case $MAX_FD in #( 156 | '' | soft) :;; #( 157 | *) 158 | # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked. 159 | # shellcheck disable=SC2039,SC3045 160 | ulimit -n "$MAX_FD" || 161 | warn "Could not set maximum file descriptor limit to $MAX_FD" 162 | esac 163 | fi 164 | 165 | # Collect all arguments for the java command, stacking in reverse order: 166 | # * args from the command line 167 | # * the main class name 168 | # * -classpath 169 | # * -D...appname settings 170 | # * --module-path (only if needed) 171 | # * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables. 172 | 173 | # For Cygwin or MSYS, switch paths to Windows format before running java 174 | if "$cygwin" || "$msys" ; then 175 | APP_HOME=$( cygpath --path --mixed "$APP_HOME" ) 176 | CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" ) 177 | 178 | JAVACMD=$( cygpath --unix "$JAVACMD" ) 179 | 180 | # Now convert the arguments - kludge to limit ourselves to /bin/sh 181 | for arg do 182 | if 183 | case $arg in #( 184 | -*) false ;; # don't mess with options #( 185 | /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath 186 | [ -e "$t" ] ;; #( 187 | *) false ;; 188 | esac 189 | then 190 | arg=$( cygpath --path --ignore --mixed "$arg" ) 191 | fi 192 | # Roll the args list around exactly as many times as the number of 193 | # args, so each arg winds up back in the position where it started, but 194 | # possibly modified. 195 | # 196 | # NB: a `for` loop captures its iteration list before it begins, so 197 | # changing the positional parameters here affects neither the number of 198 | # iterations, nor the values presented in `arg`. 199 | shift # remove old arg 200 | set -- "$@" "$arg" # push replacement arg 201 | done 202 | fi 203 | 204 | 205 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 206 | DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' 207 | 208 | # Collect all arguments for the java command: 209 | # * DEFAULT_JVM_OPTS, JAVA_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments, 210 | # and any embedded shellness will be escaped. 211 | # * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be 212 | # treated as '${Hostname}' itself on the command line. 213 | 214 | set -- \ 215 | "-Dorg.gradle.appname=$APP_BASE_NAME" \ 216 | -classpath "$CLASSPATH" \ 217 | org.gradle.wrapper.GradleWrapperMain \ 218 | "$@" 219 | 220 | # Stop when "xargs" is not available. 221 | if ! command -v xargs >/dev/null 2>&1 222 | then 223 | die "xargs is not available" 224 | fi 225 | 226 | # Use "xargs" to parse quoted args. 227 | # 228 | # With -n1 it outputs one arg per line, with the quotes and backslashes removed. 229 | # 230 | # In Bash we could simply go: 231 | # 232 | # readarray ARGS < <( xargs -n1 <<<"$var" ) && 233 | # set -- "${ARGS[@]}" "$@" 234 | # 235 | # but POSIX shell has neither arrays nor command substitution, so instead we 236 | # post-process each arg (as a line of input to sed) to backslash-escape any 237 | # character that might be a shell metacharacter, then use eval to reverse 238 | # that process (while maintaining the separation between arguments), and wrap 239 | # the whole thing up as a single "set" statement. 240 | # 241 | # This will of course break if any of these variables contains a newline or 242 | # an unmatched quote. 243 | # 244 | 245 | eval "set -- $( 246 | printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" | 247 | xargs -n1 | 248 | sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' | 249 | tr '\n' ' ' 250 | )" '"$@"' 251 | 252 | exec "$JAVACMD" "$@" 253 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | -------------------------------------------------------------------------------- /src/software/amazon/s3tables/iceberg/imports/HttpClientProperties.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | package software.amazon.s3tables.iceberg.imports; 20 | 21 | import org.apache.iceberg.common.DynMethods; 22 | import org.apache.iceberg.relocated.com.google.common.base.Strings; 23 | import org.apache.iceberg.util.PropertyUtil; 24 | import software.amazon.awssdk.awscore.client.builder.AwsSyncClientBuilder; 25 | 26 | import java.io.Serializable; 27 | import java.util.Collections; 28 | import java.util.Map; 29 | 30 | public class HttpClientProperties implements Serializable { 31 | 32 | /** 33 | * The type of {@link software.amazon.awssdk.http.SdkHttpClient} implementation used by {@link 34 | * software.amazon.s3tables.iceberg.S3TablesAwsClientFactory} If set, all AWS clients will use this specified HTTP 35 | * client. If not set, {@link #CLIENT_TYPE_DEFAULT} will be used. For specific types supported, see CLIENT_TYPE_* 36 | * defined below. 37 | */ 38 | public static final String CLIENT_TYPE = "http-client.type"; 39 | 40 | /** 41 | * If this is set under {@link #CLIENT_TYPE}, {@link 42 | * software.amazon.awssdk.http.apache.ApacheHttpClient} will be used as the HTTP Client in {@link 43 | * software.amazon.s3tables.iceberg.S3TablesAwsClientFactory} 44 | */ 45 | public static final String CLIENT_TYPE_APACHE = "apache"; 46 | 47 | private static final String CLIENT_PREFIX = "http-client."; 48 | 49 | /** 50 | * If this is set under {@link #CLIENT_TYPE}, {@link 51 | * software.amazon.awssdk.http.urlconnection.UrlConnectionHttpClient} will be used as the HTTP 52 | * Client in {@link software.amazon.s3tables.iceberg.S3TablesAwsClientFactory} 53 | */ 54 | public static final String CLIENT_TYPE_URLCONNECTION = "urlconnection"; 55 | 56 | public static final String CLIENT_TYPE_DEFAULT = CLIENT_TYPE_APACHE; 57 | 58 | /** 59 | * Used to configure the proxy endpoint. Used by both {@link 60 | * software.amazon.awssdk.http.urlconnection.UrlConnectionHttpClient.Builder} and {@link 61 | * software.amazon.awssdk.http.apache.ApacheHttpClient.Builder} 62 | */ 63 | public static final String PROXY_ENDPOINT = "http-client.proxy-endpoint"; 64 | 65 | /** 66 | * Used to configure the connection timeout in milliseconds for {@link 67 | * software.amazon.awssdk.http.urlconnection.UrlConnectionHttpClient.Builder}. This flag only 68 | * works when {@link #CLIENT_TYPE} is set to {@link #CLIENT_TYPE_URLCONNECTION} 69 | * 70 | *

For more details, see 71 | * https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/http/urlconnection/UrlConnectionHttpClient.Builder.html 72 | */ 73 | public static final String URLCONNECTION_CONNECTION_TIMEOUT_MS = 74 | "http-client.urlconnection.connection-timeout-ms"; 75 | 76 | /** 77 | * Used to configure the socket timeout in milliseconds for {@link 78 | * software.amazon.awssdk.http.urlconnection.UrlConnectionHttpClient.Builder}. This flag only 79 | * works when {@link #CLIENT_TYPE} is set to {@link #CLIENT_TYPE_URLCONNECTION} 80 | * 81 | *

For more details, see 82 | * https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/http/urlconnection/UrlConnectionHttpClient.Builder.html 83 | */ 84 | public static final String URLCONNECTION_SOCKET_TIMEOUT_MS = 85 | "http-client.urlconnection.socket-timeout-ms"; 86 | 87 | /** 88 | * Used to configure the connection timeout in milliseconds for {@link 89 | * software.amazon.awssdk.http.apache.ApacheHttpClient.Builder}. This flag only works when {@link 90 | * #CLIENT_TYPE} is set to {@link #CLIENT_TYPE_APACHE} 91 | * 92 | *

For more details, see 93 | * https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/http/apache/ApacheHttpClient.Builder.html 94 | */ 95 | public static final String APACHE_CONNECTION_TIMEOUT_MS = 96 | "http-client.apache.connection-timeout-ms"; 97 | 98 | /** 99 | * Used to configure the socket timeout in milliseconds for {@link 100 | * software.amazon.awssdk.http.apache.ApacheHttpClient.Builder}. This flag only works when {@link 101 | * #CLIENT_TYPE} is set to {@link #CLIENT_TYPE_APACHE} 102 | * 103 | *

For more details, see 104 | * https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/http/apache/ApacheHttpClient.Builder.html 105 | */ 106 | public static final String APACHE_SOCKET_TIMEOUT_MS = "http-client.apache.socket-timeout-ms"; 107 | 108 | /** 109 | * Used to configure the connection acquisition timeout in milliseconds for {@link 110 | * software.amazon.awssdk.http.apache.ApacheHttpClient.Builder}. This flag only works when {@link 111 | * #CLIENT_TYPE} is set to {@link #CLIENT_TYPE_APACHE} 112 | * 113 | *

For more details, see 114 | * https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/http/apache/ApacheHttpClient.Builder.html 115 | */ 116 | public static final String APACHE_CONNECTION_ACQUISITION_TIMEOUT_MS = 117 | "http-client.apache.connection-acquisition-timeout-ms"; 118 | 119 | /** 120 | * Used to configure the connection max idle time in milliseconds for {@link 121 | * software.amazon.awssdk.http.apache.ApacheHttpClient.Builder}. This flag only works when {@link 122 | * #CLIENT_TYPE} is set to {@link #CLIENT_TYPE_APACHE} 123 | * 124 | *

For more details, see 125 | * https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/http/apache/ApacheHttpClient.Builder.html 126 | */ 127 | public static final String APACHE_CONNECTION_MAX_IDLE_TIME_MS = 128 | "http-client.apache.connection-max-idle-time-ms"; 129 | 130 | /** 131 | * Used to configure the connection time to live in milliseconds for {@link 132 | * software.amazon.awssdk.http.apache.ApacheHttpClient.Builder}. This flag only works when {@link 133 | * #CLIENT_TYPE} is set to {@link #CLIENT_TYPE_APACHE} 134 | * 135 | *

For more details, see 136 | * https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/http/apache/ApacheHttpClient.Builder.html 137 | */ 138 | public static final String APACHE_CONNECTION_TIME_TO_LIVE_MS = 139 | "http-client.apache.connection-time-to-live-ms"; 140 | 141 | /** 142 | * Used to configure whether to enable the expect continue setting for {@link 143 | * software.amazon.awssdk.http.apache.ApacheHttpClient.Builder}. This flag only works when {@link 144 | * #CLIENT_TYPE} is set to {@link #CLIENT_TYPE_APACHE} 145 | * 146 | *

In default, this is disabled. 147 | * 148 | *

For more details, see 149 | * https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/http/apache/ApacheHttpClient.Builder.html 150 | */ 151 | public static final String APACHE_EXPECT_CONTINUE_ENABLED = 152 | "http-client.apache.expect-continue-enabled"; 153 | 154 | /** 155 | * Used to configure the max connections number for {@link 156 | * software.amazon.awssdk.http.apache.ApacheHttpClient.Builder}. This flag only works when {@link 157 | * #CLIENT_TYPE} is set to {@link #CLIENT_TYPE_APACHE} 158 | * 159 | *

For more details, see 160 | * https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/http/apache/ApacheHttpClient.Builder.html 161 | */ 162 | public static final String APACHE_MAX_CONNECTIONS = "http-client.apache.max-connections"; 163 | 164 | /** 165 | * Used to configure whether to enable the tcp keep alive setting for {@link 166 | * software.amazon.awssdk.http.apache.ApacheHttpClient.Builder}. This flag only works when {@link 167 | * #CLIENT_TYPE} is set to {@link #CLIENT_TYPE_APACHE}. 168 | * 169 | *

In default, this is disabled. 170 | * 171 | *

For more details, see 172 | * https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/http/apache/ApacheHttpClient.Builder.html 173 | */ 174 | public static final String APACHE_TCP_KEEP_ALIVE_ENABLED = 175 | "http-client.apache.tcp-keep-alive-enabled"; 176 | 177 | /** 178 | * Used to configure whether to use idle connection reaper for {@link 179 | * software.amazon.awssdk.http.apache.ApacheHttpClient.Builder}. This flag only works when {@link 180 | * #CLIENT_TYPE} is set to {@link #CLIENT_TYPE_APACHE}. 181 | * 182 | *

In default, this is enabled. 183 | * 184 | *

For more details, see 185 | * https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/http/apache/ApacheHttpClient.Builder.html 186 | */ 187 | public static final String APACHE_USE_IDLE_CONNECTION_REAPER_ENABLED = 188 | "http-client.apache.use-idle-connection-reaper-enabled"; 189 | 190 | private String httpClientType; 191 | private final Map httpClientProperties; 192 | 193 | public HttpClientProperties() { 194 | this.httpClientType = CLIENT_TYPE_DEFAULT; 195 | this.httpClientProperties = Collections.emptyMap(); 196 | } 197 | 198 | public HttpClientProperties(Map properties) { 199 | this.httpClientType = 200 | PropertyUtil.propertyAsString(properties, CLIENT_TYPE, CLIENT_TYPE_DEFAULT); 201 | this.httpClientProperties = 202 | PropertyUtil.filterProperties(properties, key -> key.startsWith(CLIENT_PREFIX)); 203 | } 204 | 205 | /** 206 | * Configure the httpClient for a client according to the HttpClientType. The two supported 207 | * HttpClientTypes are urlconnection and apache 208 | * 209 | *

Sample usage: 210 | * 211 | *

212 |    *     S3Client.builder().applyMutation(awsProperties::applyHttpClientConfigurations)
213 |    * 
214 | */ 215 | public void applyHttpClientConfigurations(T builder) { 216 | if (Strings.isNullOrEmpty(httpClientType)) { 217 | httpClientType = CLIENT_TYPE_DEFAULT; 218 | } 219 | 220 | switch (httpClientType) { 221 | case CLIENT_TYPE_URLCONNECTION: 222 | UrlConnectionHttpClientConfigurations urlConnectionHttpClientConfigurations = 223 | loadHttpClientConfigurations(UrlConnectionHttpClientConfigurations.class.getName()); 224 | urlConnectionHttpClientConfigurations.configureHttpClientBuilder(builder); 225 | break; 226 | case CLIENT_TYPE_APACHE: 227 | ApacheHttpClientConfigurations apacheHttpClientConfigurations = 228 | loadHttpClientConfigurations(ApacheHttpClientConfigurations.class.getName()); 229 | apacheHttpClientConfigurations.configureHttpClientBuilder(builder); 230 | break; 231 | default: 232 | throw new IllegalArgumentException("Unrecognized HTTP client type " + httpClientType); 233 | } 234 | } 235 | 236 | /** 237 | * Dynamically load the http client builder to avoid runtime deps requirements of both {@link 238 | * software.amazon.awssdk.http.urlconnection.UrlConnectionHttpClient} and {@link 239 | * software.amazon.awssdk.http.apache.ApacheHttpClient}, since including both will cause error 240 | * described in issue#6715 241 | */ 242 | private T loadHttpClientConfigurations(String impl) { 243 | Object httpClientConfigurations; 244 | try { 245 | httpClientConfigurations = 246 | DynMethods.builder("create") 247 | .hiddenImpl(impl, Map.class) 248 | .buildStaticChecked() 249 | .invoke(httpClientProperties); 250 | return (T) httpClientConfigurations; 251 | } catch (NoSuchMethodException e) { 252 | throw new IllegalArgumentException( 253 | String.format("Cannot create %s to generate and configure the http client builder", impl), 254 | e); 255 | } 256 | } 257 | } 258 | -------------------------------------------------------------------------------- /src/software/amazon/s3tables/iceberg/imports/AwsClientProperties.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | package software.amazon.s3tables.iceberg.imports; 20 | 21 | import org.apache.iceberg.common.DynClasses; 22 | import org.apache.iceberg.common.DynMethods; 23 | import org.apache.iceberg.relocated.com.google.common.base.Preconditions; 24 | import org.apache.iceberg.relocated.com.google.common.base.Strings; 25 | import org.apache.iceberg.relocated.com.google.common.collect.Sets; 26 | import org.apache.iceberg.util.PropertyUtil; 27 | import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; 28 | import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; 29 | import software.amazon.awssdk.auth.credentials.AwsSessionCredentials; 30 | import software.amazon.awssdk.auth.credentials.DefaultCredentialsProvider; 31 | import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; 32 | import software.amazon.awssdk.awscore.client.builder.AwsClientBuilder; 33 | import software.amazon.awssdk.regions.Region; 34 | import software.amazon.awssdk.services.sts.model.Tag; 35 | import software.amazon.s3tables.iceberg.S3TablesAssumeRoleAwsClientFactory; 36 | 37 | import java.io.Serializable; 38 | import java.util.Map; 39 | import java.util.Set; 40 | import java.util.stream.Collectors; 41 | 42 | public class AwsClientProperties implements Serializable { 43 | /** 44 | * Configure the AWS credentials provider used to create AWS clients. A fully qualified concrete 45 | * class with package that implements the {@link AwsCredentialsProvider} interface is required. 46 | * 47 | *

Additionally, the implementation class must also have a create() or create(Map) method 48 | * implemented, which returns an instance of the class that provides aws credentials provider. 49 | * 50 | *

Example: 51 | * client.credentials-provider=software.amazon.awssdk.auth.credentials.SystemPropertyCredentialsProvider 52 | * 53 | *

When set, the default client factory {@link 54 | * software.amazon.s3tables.iceberg.S3TablesAwsClientFactory} will use this provider to get AWS credentials 55 | * provided instead of reading the default credential chain to get AWS access credentials. 56 | */ 57 | public static final String CLIENT_CREDENTIALS_PROVIDER = "client.credentials-provider"; 58 | 59 | /** 60 | * Used by the client.credentials-provider configured value that will be used by {@link 61 | * software.amazon.s3tables.iceberg.S3TablesAwsClientFactory} 62 | * to pass provider-specific properties. Each property consists of a key name and an 63 | * associated value. 64 | */ 65 | protected static final String CLIENT_CREDENTIAL_PROVIDER_PREFIX = "client.credentials-provider."; 66 | 67 | /** 68 | * Used by {@link software.amazon.s3tables.iceberg.S3TablesAwsClientFactory}. 69 | * If set, all AWS clients except STS client will use the given 70 | * region instead of the default region chain. 71 | */ 72 | public static final String CLIENT_REGION = "client.region"; 73 | 74 | /** 75 | * Used by {@link S3TablesAssumeRoleAwsClientFactory}. If set, all AWS clients will assume a role of the 76 | * given ARN, instead of using the default credential chain. 77 | */ 78 | public static final String CLIENT_ASSUME_ROLE_ARN = "client.assume-role.arn"; 79 | 80 | 81 | /** 82 | * Used by {@link S3TablesAssumeRoleAwsClientFactory}. Optional external ID used to assume an IAM role. 83 | * 84 | *

For more details, see 85 | * https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_create_for-user_externalid.html 86 | */ 87 | public static final String CLIENT_ASSUME_ROLE_EXTERNAL_ID = "client.assume-role.external-id"; 88 | 89 | /** 90 | * Used by {@link S3TablesAssumeRoleAwsClientFactory}. If set, all AWS clients except STS client will use 91 | * the given region instead of the default region chain. 92 | * 93 | *

The value must be one of {@link software.amazon.awssdk.regions.Region}, such as 'us-east-1'. 94 | * For more details, see https://docs.aws.amazon.com/general/latest/gr/rande.html 95 | */ 96 | public static final String CLIENT_ASSUME_ROLE_REGION = "client.assume-role.region"; 97 | 98 | /** 99 | * Used by {@link S3TablesAssumeRoleAwsClientFactory}. The timeout of the assume role session in seconds, 100 | * default to 1 hour. At the end of the timeout, a new set of role session credentials will be 101 | * fetched through a STS client. 102 | */ 103 | public static final String CLIENT_ASSUME_ROLE_TIMEOUT_SEC = "client.assume-role.timeout-sec"; 104 | 105 | public static final int CLIENT_ASSUME_ROLE_TIMEOUT_SEC_DEFAULT = 3600; 106 | 107 | /** 108 | * Used by {@link S3TablesAssumeRoleAwsClientFactory}. Optional session name used to assume an IAM role. 109 | * 110 | *

For more details, see 111 | * https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_policies_iam-condition-keys.html#ck_rolesessionname 112 | */ 113 | public static final String CLIENT_ASSUME_ROLE_SESSION_NAME = "client.assume-role.session-name"; 114 | 115 | /** 116 | * Used by {@link S3TablesAssumeRoleAwsClientFactory} to pass a list of sessions. Each session tag 117 | * consists of a key name and an associated value. 118 | */ 119 | public static final String CLIENT_ASSUME_ROLE_TAGS_PREFIX = "client.assume-role.tags."; 120 | 121 | private final Set stsClientAssumeRoleTags; 122 | 123 | private String clientRegion; 124 | private final String clientCredentialsProvider; 125 | private final Map clientCredentialsProviderProperties; 126 | 127 | private final String clientAssumeRoleArn; 128 | private final String clientAssumeRoleExternalId; 129 | private final String clientAssumeRoleRegion; 130 | private final String clientAssumeRoleSessionName; 131 | private final int clientAssumeRoleTimeoutSec; 132 | 133 | public AwsClientProperties() { 134 | this.stsClientAssumeRoleTags = Sets.newHashSet(); 135 | this.clientRegion = null; 136 | this.clientCredentialsProvider = null; 137 | this.clientCredentialsProviderProperties = null; 138 | 139 | this.clientAssumeRoleArn = null; 140 | this.clientAssumeRoleExternalId = null; 141 | this.clientAssumeRoleRegion = null; 142 | this.clientAssumeRoleSessionName = null; 143 | this.clientAssumeRoleTimeoutSec = CLIENT_ASSUME_ROLE_TIMEOUT_SEC_DEFAULT; 144 | } 145 | 146 | public String clientAssumeRoleArn() { 147 | return clientAssumeRoleArn; 148 | } 149 | 150 | public String clientAssumeRoleRegion() { 151 | return clientAssumeRoleRegion; 152 | } 153 | 154 | public int clientAssumeRoleTimeoutSec() { 155 | return clientAssumeRoleTimeoutSec; 156 | } 157 | 158 | public String clientAssumeRoleExternalId() { 159 | return clientAssumeRoleExternalId; 160 | } 161 | 162 | public AwsClientProperties(Map properties) { 163 | this.clientRegion = properties.get(CLIENT_REGION); 164 | this.clientCredentialsProvider = properties.get(CLIENT_CREDENTIALS_PROVIDER); 165 | this.clientCredentialsProviderProperties = 166 | PropertyUtil.propertiesWithPrefix(properties, CLIENT_CREDENTIAL_PROVIDER_PREFIX); 167 | this.clientAssumeRoleArn = properties.get(CLIENT_ASSUME_ROLE_ARN); 168 | this.clientAssumeRoleExternalId = properties.get(CLIENT_ASSUME_ROLE_EXTERNAL_ID); 169 | this.clientAssumeRoleTimeoutSec = 170 | PropertyUtil.propertyAsInt( 171 | properties, CLIENT_ASSUME_ROLE_TIMEOUT_SEC, CLIENT_ASSUME_ROLE_TIMEOUT_SEC_DEFAULT); 172 | this.clientAssumeRoleRegion = properties.get(CLIENT_ASSUME_ROLE_REGION); 173 | this.clientAssumeRoleSessionName = properties.get(CLIENT_ASSUME_ROLE_SESSION_NAME); 174 | this.stsClientAssumeRoleTags = toStsTags(properties, CLIENT_ASSUME_ROLE_TAGS_PREFIX); 175 | } 176 | 177 | public String clientRegion() { 178 | return clientRegion; 179 | } 180 | 181 | public void setClientRegion(String clientRegion) { 182 | this.clientRegion = clientRegion; 183 | } 184 | 185 | public Set stsClientAssumeRoleTags() { 186 | return stsClientAssumeRoleTags; 187 | } 188 | 189 | private Set toStsTags( 190 | Map properties, String prefix) { 191 | return PropertyUtil.propertiesWithPrefix(properties, prefix).entrySet().stream() 192 | .map( 193 | e -> 194 | software.amazon.awssdk.services.sts.model.Tag.builder() 195 | .key(e.getKey()) 196 | .value(e.getValue()) 197 | .build()) 198 | .collect(Collectors.toSet()); 199 | } 200 | /** 201 | * Configure a client AWS region. 202 | * 203 | *

Sample usage: 204 | * 205 | *

206 |    *     S3Client.builder().applyMutation(awsClientProperties::applyClientRegionConfiguration)
207 |    * 
208 | */ 209 | public void applyClientRegionConfiguration(T builder) { 210 | if (clientRegion != null) { 211 | builder.region(Region.of(clientRegion)); 212 | } 213 | } 214 | 215 | /** 216 | * Configure the credential provider for AWS clients. 217 | * 218 | *

Sample usage: 219 | * 220 | *

221 |    *     DynamoDbClient.builder().applyMutation(awsClientProperties::applyClientCredentialConfigurations)
222 |    * 
223 | */ 224 | public void applyClientCredentialConfigurations(T builder) { 225 | if (!Strings.isNullOrEmpty(this.clientCredentialsProvider)) { 226 | builder.credentialsProvider(credentialsProvider(this.clientCredentialsProvider)); 227 | } 228 | } 229 | 230 | /** 231 | * Returns a credentials provider instance. If params were set, we return a new 232 | * credentials instance. If none of the params are set, we try to dynamically load the provided 233 | * credentials provider class. Upon loading the class, we try to invoke {@code create(Map)} static method. If that fails, we fall back to {@code create()}. If credential 235 | * provider class wasn't set, we fall back to default credentials provider. 236 | * 237 | * @param accessKeyId the AWS access key ID 238 | * @param secretAccessKey the AWS secret access key 239 | * @param sessionToken the AWS session token 240 | * @return a credentials provider instance 241 | */ 242 | @SuppressWarnings("checkstyle:HiddenField") 243 | public AwsCredentialsProvider credentialsProvider( 244 | String accessKeyId, String secretAccessKey, String sessionToken) { 245 | 246 | if (!Strings.isNullOrEmpty(accessKeyId) && !Strings.isNullOrEmpty(secretAccessKey)) { 247 | if (Strings.isNullOrEmpty(sessionToken)) { 248 | return StaticCredentialsProvider.create( 249 | AwsBasicCredentials.create(accessKeyId, secretAccessKey)); 250 | } else { 251 | return StaticCredentialsProvider.create( 252 | AwsSessionCredentials.create(accessKeyId, secretAccessKey, sessionToken)); 253 | } 254 | } 255 | 256 | if (!Strings.isNullOrEmpty(this.clientCredentialsProvider)) { 257 | return credentialsProvider(this.clientCredentialsProvider); 258 | } 259 | 260 | // Create a new credential provider for each client 261 | return DefaultCredentialsProvider.builder().build(); 262 | } 263 | 264 | private AwsCredentialsProvider credentialsProvider(String credentialsProviderClass) { 265 | Class providerClass; 266 | try { 267 | providerClass = DynClasses.builder().impl(credentialsProviderClass).buildChecked(); 268 | } catch (ClassNotFoundException e) { 269 | throw new IllegalArgumentException( 270 | String.format( 271 | "Cannot load class %s, it does not exist in the classpath", credentialsProviderClass), 272 | e); 273 | } 274 | 275 | Preconditions.checkArgument( 276 | AwsCredentialsProvider.class.isAssignableFrom(providerClass), 277 | String.format( 278 | "Cannot initialize %s, it does not implement %s.", 279 | credentialsProviderClass, AwsCredentialsProvider.class.getName())); 280 | 281 | try { 282 | return createCredentialsProvider(providerClass); 283 | } catch (NoSuchMethodException e) { 284 | throw new IllegalArgumentException( 285 | String.format( 286 | "Cannot create an instance of %s, it does not contain a static 'create' or 'create(Map)' method", 287 | credentialsProviderClass), 288 | e); 289 | } 290 | } 291 | 292 | private AwsCredentialsProvider createCredentialsProvider(Class providerClass) 293 | throws NoSuchMethodException { 294 | AwsCredentialsProvider provider; 295 | try { 296 | provider = 297 | DynMethods.builder("create") 298 | .hiddenImpl(providerClass, Map.class) 299 | .buildStaticChecked() 300 | .invoke(clientCredentialsProviderProperties); 301 | } catch (NoSuchMethodException e) { 302 | provider = 303 | DynMethods.builder("create").hiddenImpl(providerClass).buildStaticChecked().invoke(); 304 | } 305 | return provider; 306 | } 307 | } 308 | -------------------------------------------------------------------------------- /resources/META-INF/THIRD-PARTY: -------------------------------------------------------------------------------- 1 | ** Iceberge Spark Runtime; version 1.5.x -- https://iceberg.apache.org/ 2 | 3 | Apache License 4 | Version 2.0, January 2004 5 | http://www.apache.org/licenses/ 6 | 7 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 8 | 9 | 1. Definitions. 10 | 11 | "License" shall mean the terms and conditions for use, reproduction, 12 | and distribution as defined by Sections 1 through 9 of this document. 13 | 14 | "Licensor" shall mean the copyright owner or entity authorized by 15 | the copyright owner that is granting the License. 16 | 17 | "Legal Entity" shall mean the union of the acting entity and all 18 | other entities that control, are controlled by, or are under common 19 | control with that entity. For the purposes of this definition, 20 | "control" means (i) the power, direct or indirect, to cause the 21 | direction or management of such entity, whether by contract or 22 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 23 | outstanding shares, or (iii) beneficial ownership of such entity. 24 | 25 | "You" (or "Your") shall mean an individual or Legal Entity 26 | exercising permissions granted by this License. 27 | 28 | "Source" form shall mean the preferred form for making modifications, 29 | including but not limited to software source code, documentation 30 | source, and configuration files. 31 | 32 | "Object" form shall mean any form resulting from mechanical 33 | transformation or translation of a Source form, including but 34 | not limited to compiled object code, generated documentation, 35 | and conversions to other media types. 36 | 37 | "Work" shall mean the work of authorship, whether in Source or 38 | Object form, made available under the License, as indicated by a 39 | copyright notice that is included in or attached to the work 40 | (an example is provided in the Appendix below). 41 | 42 | "Derivative Works" shall mean any work, whether in Source or Object 43 | form, that is based on (or derived from) the Work and for which the 44 | editorial revisions, annotations, elaborations, or other modifications 45 | represent, as a whole, an original work of authorship. For the purposes 46 | of this License, Derivative Works shall not include works that remain 47 | separable from, or merely link (or bind by name) to the interfaces of, 48 | the Work and Derivative Works thereof. 49 | 50 | "Contribution" shall mean any work of authorship, including 51 | the original version of the Work and any modifications or additions 52 | to that Work or Derivative Works thereof, that is intentionally 53 | submitted to Licensor for inclusion in the Work by the copyright owner 54 | or by an individual or Legal Entity authorized to submit on behalf of 55 | the copyright owner. For the purposes of this definition, "submitted" 56 | means any form of electronic, verbal, or written communication sent 57 | to the Licensor or its representatives, including but not limited to 58 | communication on electronic mailing lists, source code control systems, 59 | and issue tracking systems that are managed by, or on behalf of, the 60 | Licensor for the purpose of discussing and improving the Work, but 61 | excluding communication that is conspicuously marked or otherwise 62 | designated in writing by the copyright owner as "Not a Contribution." 63 | 64 | "Contributor" shall mean Licensor and any individual or Legal Entity 65 | on behalf of whom a Contribution has been received by Licensor and 66 | subsequently incorporated within the Work. 67 | 68 | 2. Grant of Copyright License. Subject to the terms and conditions of 69 | this License, each Contributor hereby grants to You a perpetual, 70 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 71 | copyright license to reproduce, prepare Derivative Works of, 72 | publicly display, publicly perform, sublicense, and distribute the 73 | Work and such Derivative Works in Source or Object form. 74 | 75 | 3. Grant of Patent License. Subject to the terms and conditions of 76 | this License, each Contributor hereby grants to You a perpetual, 77 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 78 | (except as stated in this section) patent license to make, have made, 79 | use, offer to sell, sell, import, and otherwise transfer the Work, 80 | where such license applies only to those patent claims licensable 81 | by such Contributor that are necessarily infringed by their 82 | Contribution(s) alone or by combination of their Contribution(s) 83 | with the Work to which such Contribution(s) was submitted. If You 84 | institute patent litigation against any entity (including a 85 | cross-claim or counterclaim in a lawsuit) alleging that the Work 86 | or a Contribution incorporated within the Work constitutes direct 87 | or contributory patent infringement, then any patent licenses 88 | granted to You under this License for that Work shall terminate 89 | as of the date such litigation is filed. 90 | 91 | 4. Redistribution. You may reproduce and distribute copies of the 92 | Work or Derivative Works thereof in any medium, with or without 93 | modifications, and in Source or Object form, provided that You 94 | meet the following conditions: 95 | 96 | (a) You must give any other recipients of the Work or 97 | Derivative Works a copy of this License; and 98 | 99 | (b) You must cause any modified files to carry prominent notices 100 | stating that You changed the files; and 101 | 102 | (c) You must retain, in the Source form of any Derivative Works 103 | that You distribute, all copyright, patent, trademark, and 104 | attribution notices from the Source form of the Work, 105 | excluding those notices that do not pertain to any part of 106 | the Derivative Works; and 107 | 108 | (d) If the Work includes a "NOTICE" text file as part of its 109 | distribution, then any Derivative Works that You distribute must 110 | include a readable copy of the attribution notices contained 111 | within such NOTICE file, excluding those notices that do not 112 | pertain to any part of the Derivative Works, in at least one 113 | of the following places: within a NOTICE text file distributed 114 | as part of the Derivative Works; within the Source form or 115 | documentation, if provided along with the Derivative Works; or, 116 | within a display generated by the Derivative Works, if and 117 | wherever such third-party notices normally appear. The contents 118 | of the NOTICE file are for informational purposes only and 119 | do not modify the License. You may add Your own attribution 120 | notices within Derivative Works that You distribute, alongside 121 | or as an addendum to the NOTICE text from the Work, provided 122 | that such additional attribution notices cannot be construed 123 | as modifying the License. 124 | 125 | You may add Your own copyright statement to Your modifications and 126 | may provide additional or different license terms and conditions 127 | for use, reproduction, or distribution of Your modifications, or 128 | for any such Derivative Works as a whole, provided Your use, 129 | reproduction, and distribution of the Work otherwise complies with 130 | the conditions stated in this License. 131 | 132 | 5. Submission of Contributions. Unless You explicitly state otherwise, 133 | any Contribution intentionally submitted for inclusion in the Work 134 | by You to the Licensor shall be under the terms and conditions of 135 | this License, without any additional terms or conditions. 136 | Notwithstanding the above, nothing herein shall supersede or modify 137 | the terms of any separate license agreement you may have executed 138 | with Licensor regarding such Contributions. 139 | 140 | 6. Trademarks. This License does not grant permission to use the trade 141 | names, trademarks, service marks, or product names of the Licensor, 142 | except as required for reasonable and customary use in describing the 143 | origin of the Work and reproducing the content of the NOTICE file. 144 | 145 | 7. Disclaimer of Warranty. Unless required by applicable law or 146 | agreed to in writing, Licensor provides the Work (and each 147 | Contributor provides its Contributions) on an "AS IS" BASIS, 148 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 149 | implied, including, without limitation, any warranties or conditions 150 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 151 | PARTICULAR PURPOSE. You are solely responsible for determining the 152 | appropriateness of using or redistributing the Work and assume any 153 | risks associated with Your exercise of permissions under this License. 154 | 155 | 8. Limitation of Liability. In no event and under no legal theory, 156 | whether in tort (including negligence), contract, or otherwise, 157 | unless required by applicable law (such as deliberate and grossly 158 | negligent acts) or agreed to in writing, shall any Contributor be 159 | liable to You for damages, including any direct, indirect, special, 160 | incidental, or consequential damages of any character arising as a 161 | result of this License or out of the use or inability to use the 162 | Work (including but not limited to damages for loss of goodwill, 163 | work stoppage, computer failure or malfunction, or any and all 164 | other commercial damages or losses), even if such Contributor 165 | has been advised of the possibility of such damages. 166 | 167 | 9. Accepting Warranty or Additional Liability. While redistributing 168 | the Work or Derivative Works thereof, You may choose to offer, 169 | and charge a fee for, acceptance of support, warranty, indemnity, 170 | or other liability obligations and/or rights consistent with this 171 | License. However, in accepting such obligations, You may act only 172 | on Your own behalf and on Your sole responsibility, not on behalf 173 | of any other Contributor, and only if You agree to indemnify, 174 | defend, and hold each Contributor harmless for any liability 175 | incurred by, or claims asserted against, such Contributor by reason 176 | of your accepting any such warranty or additional liability. 177 | 178 | END OF TERMS AND CONDITIONS 179 | 180 | APPENDIX: How to apply the Apache License to your work. 181 | 182 | To apply the Apache License to your work, attach the following 183 | boilerplate notice, with the fields enclosed by brackets "[]" 184 | replaced with your own identifying information. (Don't include 185 | the brackets!) The text should be enclosed in the appropriate 186 | comment syntax for the file format. We also recommend that a 187 | file or class name and description of purpose be included on the 188 | same "printed page" as the copyright notice for easier 189 | identification within third-party archives. 190 | 191 | Copyright [yyyy] [name of copyright owner] 192 | 193 | Licensed under the Apache License, Version 2.0 (the "License"); 194 | you may not use this file except in compliance with the License. 195 | You may obtain a copy of the License at 196 | 197 | http://www.apache.org/licenses/LICENSE-2.0 198 | 199 | Unless required by applicable law or agreed to in writing, software 200 | distributed under the License is distributed on an "AS IS" BASIS, 201 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 202 | See the License for the specific language governing permissions and 203 | limitations under the License. 204 | 205 | -------------------------------------------------------------------------------- 206 | 207 | This product includes a gradle wrapper. 208 | 209 | * gradlew and gradle/wrapper/gradle-wrapper.properties 210 | 211 | Copyright: 2010-2019 Gradle Authors. 212 | Home page: https://github.com/gradle/gradle 213 | License: https://www.apache.org/licenses/LICENSE-2.0 214 | 215 | -------------------------------------------------------------------------------- 216 | 217 | This product includes code from Apache Avro. 218 | 219 | * Conversion in DecimalWriter is based on Avro's Conversions.DecimalConversion. 220 | 221 | Copyright: 2014-2017 The Apache Software Foundation. 222 | Home page: https://avro.apache.org/ 223 | License: https://www.apache.org/licenses/LICENSE-2.0 224 | 225 | -------------------------------------------------------------------------------- 226 | 227 | This product includes code from Apache Parquet. 228 | 229 | * DynMethods.java 230 | * DynConstructors.java 231 | * AssertHelpers.java 232 | * IOUtil.java readFully and tests 233 | * ByteBufferInputStream implementations and tests 234 | 235 | Copyright: 2014-2017 The Apache Software Foundation. 236 | Home page: https://parquet.apache.org/ 237 | License: https://www.apache.org/licenses/LICENSE-2.0 238 | 239 | -------------------------------------------------------------------------------- 240 | 241 | This product includes code from Cloudera Kite. 242 | 243 | * SchemaVisitor and visit methods 244 | 245 | Copyright: 2013-2017 Cloudera Inc. 246 | Home page: https://kitesdk.org/ 247 | License: https://www.apache.org/licenses/LICENSE-2.0 248 | 249 | -------------------------------------------------------------------------------- 250 | 251 | This product includes code from Presto. 252 | 253 | * Retry wait and jitter logic in Tasks.java 254 | * S3FileIO logic derived from PrestoS3FileSystem.java in S3InputStream.java 255 | and S3OutputStream.java 256 | * SQL grammar rules for parsing CALL statements in IcebergSqlExtensions.g4 257 | * some aspects of handling stored procedures 258 | 259 | Copyright: 2016 Facebook and contributors 260 | Home page: https://prestodb.io/ 261 | License: https://www.apache.org/licenses/LICENSE-2.0 262 | 263 | -------------------------------------------------------------------------------- 264 | 265 | This product includes code from Apache iBATIS. 266 | 267 | * Hive ScriptRunner.java 268 | 269 | Copyright: 2004 Clinton Begin 270 | Home page: https://ibatis.apache.org/ 271 | License: https://www.apache.org/licenses/LICENSE-2.0 272 | 273 | -------------------------------------------------------------------------------- 274 | 275 | This product includes code from Apache Hive. 276 | 277 | * Hive metastore derby schema in hive-schema-3.1.0.derby.sql 278 | 279 | Copyright: 2011-2018 The Apache Software Foundation 280 | Home page: https://hive.apache.org/ 281 | License: https://www.apache.org/licenses/LICENSE-2.0 282 | 283 | -------------------------------------------------------------------------------- 284 | 285 | This product includes code from Apache Spark. 286 | 287 | * dev/check-license script 288 | * vectorized reading of definition levels in 289 | BaseVectorizedParquetValuesReader.java 290 | * portions of the extensions parser 291 | * casting logic in AssignmentAlignmentSupport 292 | * implementation of SetAccumulator. 293 | * Connector expressions. 294 | 295 | Copyright: 2011-2018 The Apache Software Foundation 296 | Home page: https://spark.apache.org/ 297 | License: https://www.apache.org/licenses/LICENSE-2.0 298 | 299 | -------------------------------------------------------------------------------- 300 | 301 | This product includes code from Delta Lake. 302 | 303 | * AssignmentAlignmentSupport is an independent development but 304 | UpdateExpressionsSupport in Delta was used as a reference. 305 | 306 | Copyright: 2020 The Delta Lake Project Authors. 307 | Home page: https://delta.io/ 308 | License: https://www.apache.org/licenses/LICENSE-2.0 309 | 310 | -------------------------------------------------------------------------------- 311 | 312 | This product includes code from Apache Commons. 313 | 314 | * Core ArrayUtil. 315 | 316 | Copyright: 2020 The Apache Software Foundation 317 | Home page: https://commons.apache.org/ 318 | License: https://www.apache.org/licenses/LICENSE-2.0 319 | * For Iceberge Spark Runtime see also this required NOTICE: 320 | Apache Iceberg 321 | Copyright 2017-2022 The Apache Software Foundation 322 | 323 | This product includes software developed at 324 | The Apache Software Foundation (http://www.apache.org/). 325 | 326 | -------------------------------------------------------------------------------- 327 | 328 | This project includes code from Kite, developed at Cloudera, Inc. with 329 | the following copyright notice: 330 | 331 | | Copyright 2013 Cloudera Inc. 332 | | 333 | | Licensed under the Apache License, Version 2.0 (the "License"); 334 | | you may not use this file except in compliance with the License. 335 | | You may obtain a copy of the License at 336 | | 337 | | http://www.apache.org/licenses/LICENSE-2.0 338 | | 339 | | Unless required by applicable law or agreed to in writing, software 340 | | distributed under the License is distributed on an "AS IS" BASIS, 341 | | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 342 | | See the License for the specific language governing permissions and 343 | | limitations under the License. 344 | 345 | -------------------------------------------------------------------------------- /src/software/amazon/s3tables/iceberg/S3TablesCatalogOperations.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | package software.amazon.s3tables.iceberg; 20 | 21 | 22 | import org.apache.commons.lang3.StringUtils; 23 | import org.apache.iceberg.BaseMetastoreTableOperations; 24 | import org.apache.iceberg.CatalogProperties; 25 | import org.apache.iceberg.CatalogUtil; 26 | import org.apache.iceberg.LocationProviders; 27 | import org.apache.iceberg.TableMetadata; 28 | import org.apache.iceberg.TableProperties; 29 | import org.apache.iceberg.aws.s3.S3FileIO; 30 | import org.apache.iceberg.exceptions.CommitFailedException; 31 | import org.apache.iceberg.exceptions.CommitStateUnknownException; 32 | import org.apache.iceberg.exceptions.NoSuchTableException; 33 | import org.apache.iceberg.io.CloseableGroup; 34 | import org.apache.iceberg.io.FileIO; 35 | import org.apache.iceberg.io.LocationProvider; 36 | import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; 37 | import org.apache.iceberg.util.PropertyUtil; 38 | import org.apache.iceberg.util.Tasks; 39 | import org.slf4j.Logger; 40 | import org.slf4j.LoggerFactory; 41 | import software.amazon.awssdk.awscore.exception.AwsServiceException; 42 | import software.amazon.awssdk.services.s3tables.S3TablesClient; 43 | import software.amazon.awssdk.services.s3tables.model.ConflictException; 44 | import software.amazon.awssdk.services.s3tables.model.DeleteTableRequest; 45 | import software.amazon.awssdk.services.s3tables.model.GetTableMetadataLocationRequest; 46 | import software.amazon.awssdk.services.s3tables.model.GetTableMetadataLocationResponse; 47 | import software.amazon.awssdk.services.s3tables.model.NotFoundException; 48 | import software.amazon.awssdk.services.s3tables.model.UpdateTableMetadataLocationRequest; 49 | import software.amazon.awssdk.services.s3tables.model.UpdateTableMetadataLocationResponse; 50 | import software.amazon.awssdk.services.s3tables.model.AccessDeniedException; 51 | import software.amazon.s3tables.iceberg.imports.RetryDetector; 52 | 53 | import java.io.Closeable; 54 | import java.io.IOException; 55 | import java.util.Map; 56 | import java.util.Objects; 57 | import java.util.concurrent.atomic.AtomicReference; 58 | 59 | import static org.apache.iceberg.TableProperties.COMMIT_NUM_STATUS_CHECKS; 60 | import static org.apache.iceberg.TableProperties.COMMIT_NUM_STATUS_CHECKS_DEFAULT; 61 | import static org.apache.iceberg.TableProperties.COMMIT_STATUS_CHECKS_MAX_WAIT_MS; 62 | import static org.apache.iceberg.TableProperties.COMMIT_STATUS_CHECKS_MAX_WAIT_MS_DEFAULT; 63 | import static org.apache.iceberg.TableProperties.COMMIT_STATUS_CHECKS_MIN_WAIT_MS; 64 | import static org.apache.iceberg.TableProperties.COMMIT_STATUS_CHECKS_MIN_WAIT_MS_DEFAULT; 65 | import static org.apache.iceberg.TableProperties.COMMIT_STATUS_CHECKS_TOTAL_WAIT_MS; 66 | import static org.apache.iceberg.TableProperties.COMMIT_STATUS_CHECKS_TOTAL_WAIT_MS_DEFAULT; 67 | 68 | // https://iceberg.apache.org/docs/nightly/custom-catalog/#custom-table-operations-implementation 69 | public class S3TablesCatalogOperations extends BaseMetastoreTableOperations implements Closeable { 70 | private static final Logger LOG = LoggerFactory.getLogger(S3TablesCatalogOperations.class); 71 | 72 | private FileIO fileIO; 73 | private final Map tableCatalogProperties; 74 | private final S3TablesClient tablesClient; 75 | 76 | private final String namespaceName; 77 | private final String tableName; 78 | private final String tableWareHouseLocation; 79 | private final S3TablesCatalogConfiguration conf; 80 | private final Object hadoopConf; 81 | private final CloseableGroup closeableGroup; 82 | 83 | protected S3TablesCatalogOperations(S3TablesClient s3IceClient, 84 | S3TablesCatalogConfiguration conf, 85 | String namespaceName, 86 | String tableName, 87 | String tableWareHouseLocation, 88 | Map tableCatalogProperties, 89 | Object hadoopConf) { 90 | this.tablesClient = s3IceClient; 91 | this.conf = conf; 92 | this.namespaceName = namespaceName; 93 | this.tableName = tableName; 94 | this.tableWareHouseLocation = tableWareHouseLocation; 95 | this.tableCatalogProperties = tableCatalogProperties; 96 | this.hadoopConf = hadoopConf; 97 | this.closeableGroup = new CloseableGroup(); 98 | closeableGroup.setSuppressCloseFailure(true); 99 | closeableGroup.addCloseable(tablesClient); 100 | } 101 | 102 | @Override 103 | protected String tableName() { 104 | return tableName; 105 | } 106 | 107 | @Override 108 | public FileIO io() { 109 | if (fileIO == null) { 110 | try { 111 | fileIO = initializeFileIO(this.tableCatalogProperties, this.hadoopConf); 112 | closeableGroup.addCloseable(fileIO); 113 | } catch (Exception e) { 114 | throw new RuntimeException(e); 115 | } 116 | } 117 | return fileIO; 118 | } 119 | 120 | protected static FileIO initializeFileIO(Map properties, Object hadoopConf) { 121 | String fileIOImpl = properties.get(CatalogProperties.FILE_IO_IMPL); 122 | if (fileIOImpl == null) { 123 | FileIO io = new S3FileIO(); 124 | io.initialize(properties); 125 | return io; 126 | } else { 127 | return CatalogUtil.loadFileIO(fileIOImpl, properties, hadoopConf); 128 | } 129 | } 130 | 131 | /** 132 | * For S3 Tables, we effectively turn `write.object-storage.enabled` on by default. Customers 133 | * can still explicitly disable it as a table property, but if omitted, we default to using 134 | * the S3TablesLocationProvider, which is a clone of the recently upstreamed changes to Iceberg's 135 | * ObjectStoreLocationProvider. 136 | */ 137 | @Override 138 | public LocationProvider locationProvider() { 139 | Map properties = current().properties(); 140 | boolean isObjectStoreEnabled = PropertyUtil.propertyAsBoolean(properties, TableProperties.OBJECT_STORE_ENABLED, true); 141 | if (properties.containsKey(TableProperties.WRITE_LOCATION_PROVIDER_IMPL) || !isObjectStoreEnabled) { 142 | return LocationProviders.locationsFor(current().location(), current().properties()); 143 | } else { 144 | return new S3TablesLocationProvider(current().location(), current().properties()); 145 | } 146 | } 147 | 148 | /** 149 | * The doRefresh method should provide implementation on how to get the metadata location 150 | * If the table doesn't exist, it will throw an error. 151 | */ 152 | @Override 153 | public void doRefresh() { 154 | // Example custom service which returns the metadata location given a dbName and tableName 155 | String metadataLocation = null; 156 | try { 157 | GetTableMetadataLocationRequest getTableMetadataLocationRequest = GetTableMetadataLocationRequest.builder() 158 | .tableBucketARN(tableWareHouseLocation) 159 | .namespace(namespaceName) 160 | .name(tableName) 161 | .build(); 162 | 163 | GetTableMetadataLocationResponse getTableMetadataLocationResponse = this.tablesClient. 164 | getTableMetadataLocation(getTableMetadataLocationRequest); 165 | 166 | if (StringUtils.isEmpty(getTableMetadataLocationResponse.metadataLocation())) { 167 | LOG.debug("Empty metadata location for table {}.{}, skipping doRefresh()", namespaceName, tableName); 168 | disableRefresh(); 169 | return; 170 | } 171 | 172 | metadataLocation = getTableMetadataLocationResponse.metadataLocation(); 173 | } catch (NotFoundException ex) { 174 | LOG.debug("Empty metadata location for table {}.{}, skipping doRefresh()", namespaceName, tableName); 175 | if (currentMetadataLocation() != null) { 176 | LOG.error("Cannot find S3 Table {} after refresh", tableName); 177 | throw new NoSuchTableException( 178 | "Cannot find S3 table metadata location for table %s after refresh, " 179 | + "maybe another process deleted it or revoked your access permission", 180 | tableName()); 181 | } 182 | } 183 | 184 | // When updating from a metadata file location, call the helper method 185 | refreshFromMetadataLocation(metadataLocation); 186 | } 187 | 188 | /** 189 | * The doCommit method should provide implementation on how to update with metadata location atomically 190 | * @param base the base metadata, before any changes were made 191 | * @param metadata the updated metadata, to be committed 192 | * Will drop temporary table if it failed to commit the data 193 | */ 194 | @Override 195 | public void doCommit(TableMetadata base, TableMetadata metadata) { 196 | boolean newTable = false; 197 | RetryDetector retryDetector = new RetryDetector(); 198 | CustomCommitStatus commitStatus = CustomCommitStatus.FAILURE; 199 | String newMetadataLocation = null; 200 | String versionToken = null; 201 | try { 202 | LOG.debug("Commiting metadata to namespace: {} with tableName {}", namespaceName, tableName); 203 | 204 | newTable = base == null; 205 | 206 | newMetadataLocation = writeNewMetadataIfRequired(newTable, metadata); 207 | LOG.debug("Wrote new metadata to {}", newMetadataLocation); 208 | 209 | GetTableMetadataLocationResponse tableMetadataLocationResponse = this.tablesClient.getTableMetadataLocation( 210 | GetTableMetadataLocationRequest.builder() 211 | .name(tableName) 212 | .namespace(namespaceName) 213 | .tableBucketARN(tableWareHouseLocation) 214 | .build()); 215 | 216 | versionToken = tableMetadataLocationResponse.versionToken(); 217 | if (base != null) { 218 | // New tables will have a base empty metadata file written by the control plane 219 | checkMetadataLocation(tableMetadataLocationResponse, base); 220 | LOG.debug("Successfully checked metadata location for {} got VersionToken {}", tableName, versionToken); 221 | } else { 222 | LOG.debug("Skipped checking metadata location for {} because this is a new table", tableName); 223 | } 224 | 225 | UpdateTableMetadataLocationResponse updateTableMetadataLocationResponse = this.tablesClient.updateTableMetadataLocation( 226 | UpdateTableMetadataLocationRequest.builder() 227 | .overrideConfiguration(c -> c.addMetricPublisher(retryDetector)) 228 | .tableBucketARN(tableWareHouseLocation) 229 | .namespace(namespaceName) 230 | .name(tableName).metadataLocation(newMetadataLocation) 231 | .versionToken(versionToken).build()); 232 | 233 | versionToken = updateTableMetadataLocationResponse.versionToken(); 234 | 235 | LOG.debug("Successfully updated metadata new version token is: {}", versionToken); 236 | commitStatus = CustomCommitStatus.SUCCESS; 237 | } catch (ConflictException e) { 238 | LOG.error("Failed to commit metadata due to conflict: ", e); 239 | throw new CommitFailedException(e); 240 | } catch (AccessDeniedException e) { 241 | LOG.error("Failed to commit metadata due to access denied: ", e); 242 | throw e; 243 | } catch (CommitFailedException e) { 244 | LOG.error("Failed commit metadata: ", e); 245 | throw e; 246 | } catch (RuntimeException persistFailure) { 247 | boolean isAwsServiceException = persistFailure instanceof AwsServiceException; 248 | 249 | if (!isAwsServiceException || retryDetector.retried()) { 250 | LOG.warn("Received unexpected failure when committing to {}, validating if commit ended up succeeding.", 251 | tableName, 252 | persistFailure); 253 | 254 | commitStatus = checkCustomCommitStatus(newMetadataLocation, metadata); 255 | } 256 | 257 | // If we got an AWS exception we would usually handle, but find we 258 | // succeeded on a retry that threw an exception, skip the exception. 259 | if (commitStatus != CustomCommitStatus.SUCCESS && isAwsServiceException) { 260 | LOG.error("Received unexpected failure when committing to {}", tableName, persistFailure); 261 | throw new RuntimeException("Persisting failure", persistFailure); 262 | } 263 | switch (commitStatus) { 264 | case SUCCESS: 265 | break; 266 | case FAILURE: 267 | LOG.error("Commit failed ", persistFailure); 268 | throw new CommitFailedException( 269 | persistFailure, "Cannot commit %s due to unexpected exception", tableName()); 270 | case UNKNOWN: 271 | LOG.error("Commit status unknown ", persistFailure); 272 | throw new CommitStateUnknownException(persistFailure); 273 | } 274 | } 275 | finally { 276 | if(newTable && commitStatus != CustomCommitStatus.SUCCESS) { 277 | try { 278 | if (versionToken == null) { 279 | LOG.error("[Critical] Couldn't find version token for {} will not try delete table with invalid metadata", tableName); 280 | //Not throwing an exception here to make sure we bubble up the correct exception for the refresh to the stack trace. 281 | } else { 282 | LOG.info("Commit failed deleting table {} and versionToken {}", tableName, versionToken); 283 | this.tablesClient.deleteTable(DeleteTableRequest.builder() 284 | .name(tableName) 285 | .versionToken(versionToken) 286 | .tableBucketARN(tableWareHouseLocation) 287 | .namespace(namespaceName).build()); 288 | LOG.info("Successfully deleted table {}", tableName); 289 | } 290 | } catch (Throwable deleteFailure) { 291 | // suppress this exception so we can propagate the original exception 292 | LOG.warn("Received unexpected failure when deleting table {}, suppressing.", 293 | tableName, deleteFailure); 294 | } 295 | } 296 | } 297 | } 298 | 299 | private void checkMetadataLocation(GetTableMetadataLocationResponse tableMetadataLocationResponse, TableMetadata base) { 300 | String baseMetadataLocation = base != null? base.metadataFileLocation(): null; 301 | String tableMetadataLocationInDDB = tableMetadataLocationResponse.metadataLocation(); 302 | if (!Objects.equals(baseMetadataLocation, tableMetadataLocationInDDB)) { 303 | LOG.error("Base metadata location {} is not the same as current metadata location {} in DDB ", baseMetadataLocation, tableMetadataLocationInDDB); 304 | throw new CommitFailedException("Base metadata location %s is not the same as current metadata location %s in DDB " , 305 | baseMetadataLocation, tableMetadataLocationInDDB); 306 | } 307 | } 308 | 309 | private CustomCommitStatus checkCustomCommitStatus(String newMetadataLocation, TableMetadata config) { 310 | int maxAttempts = 311 | PropertyUtil.propertyAsInt( 312 | tableCatalogProperties, COMMIT_NUM_STATUS_CHECKS, COMMIT_NUM_STATUS_CHECKS_DEFAULT); 313 | long minWaitMs = 314 | PropertyUtil.propertyAsLong( 315 | tableCatalogProperties, COMMIT_STATUS_CHECKS_MIN_WAIT_MS, COMMIT_STATUS_CHECKS_MIN_WAIT_MS_DEFAULT); 316 | long maxWaitMs = 317 | PropertyUtil.propertyAsLong( 318 | tableCatalogProperties, COMMIT_STATUS_CHECKS_MAX_WAIT_MS, COMMIT_STATUS_CHECKS_MAX_WAIT_MS_DEFAULT); 319 | long totalRetryMs = 320 | PropertyUtil.propertyAsLong( 321 | tableCatalogProperties, 322 | COMMIT_STATUS_CHECKS_TOTAL_WAIT_MS, 323 | COMMIT_STATUS_CHECKS_TOTAL_WAIT_MS_DEFAULT); 324 | 325 | AtomicReference status = new AtomicReference<>(CustomCommitStatus.UNKNOWN); 326 | 327 | Tasks.foreach(newMetadataLocation) 328 | .retry(maxAttempts) 329 | .suppressFailureWhenFinished() 330 | .exponentialBackoff(minWaitMs, maxWaitMs, totalRetryMs, 2.0) 331 | .onFailure( 332 | (location, checkException) -> 333 | LOG.error("Cannot check if commit to {} exists.", tableName, checkException)) 334 | .run( 335 | location -> { 336 | boolean commitSuccess = checkCurrentMetadataLocation(newMetadataLocation); 337 | 338 | if (commitSuccess) { 339 | LOG.info( 340 | "Commit status check: Commit to {} of {} succeeded", 341 | tableName, 342 | newMetadataLocation); 343 | status.set(CustomCommitStatus.SUCCESS); 344 | } else { 345 | LOG.warn( 346 | "Commit status check: Commit to {} of {} unknown, new metadata location is not current " 347 | + "or in history", 348 | tableName, 349 | newMetadataLocation); 350 | } 351 | }); 352 | return status.get(); 353 | } 354 | 355 | private boolean checkCurrentMetadataLocation(String newMetadataLocation) { 356 | TableMetadata metadata = refresh(); 357 | String currentMetadataFileLocation = metadata.metadataFileLocation(); 358 | return currentMetadataFileLocation.equals(newMetadataLocation) 359 | || metadata.previousFiles().stream() 360 | .anyMatch(log -> log.file().equals(newMetadataLocation)); 361 | } 362 | 363 | @VisibleForTesting 364 | Map tableCatalogProperties() { 365 | return tableCatalogProperties; 366 | } 367 | 368 | @Override 369 | public void close() throws IOException { 370 | closeableGroup.close(); 371 | } 372 | 373 | public enum CustomCommitStatus { 374 | FAILURE, 375 | SUCCESS, 376 | UNKNOWN 377 | } 378 | } 379 | -------------------------------------------------------------------------------- /src/software/amazon/s3tables/iceberg/S3TablesCatalog.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | package software.amazon.s3tables.iceberg; 20 | 21 | import software.amazon.s3tables.iceberg.imports.FileIOTracker; 22 | import org.apache.iceberg.BaseMetastoreCatalog; 23 | import org.apache.iceberg.CatalogProperties; 24 | import org.apache.iceberg.exceptions.AlreadyExistsException; 25 | import org.apache.iceberg.MetadataTableType; 26 | import org.apache.iceberg.MetadataTableUtils; 27 | import org.apache.iceberg.Table; 28 | import org.apache.iceberg.TableOperations; 29 | import org.apache.iceberg.aws.s3.S3FileIOProperties; 30 | import org.apache.iceberg.exceptions.NoSuchTableException; 31 | import org.apache.iceberg.hadoop.Configurable; 32 | import org.apache.iceberg.catalog.Namespace; 33 | import org.apache.iceberg.catalog.SupportsNamespaces; 34 | import org.apache.iceberg.catalog.TableIdentifier; 35 | import org.apache.iceberg.exceptions.NamespaceNotEmptyException; 36 | import org.apache.iceberg.exceptions.NoSuchNamespaceException; 37 | import org.apache.iceberg.exceptions.ValidationException; 38 | import org.apache.iceberg.io.CloseableGroup; 39 | import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; 40 | import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; 41 | import org.apache.iceberg.relocated.com.google.common.collect.Maps; 42 | import org.apache.iceberg.util.PropertyUtil; 43 | import org.slf4j.Logger; 44 | import org.slf4j.LoggerFactory; 45 | import software.amazon.awssdk.awscore.exception.AwsServiceException; 46 | import software.amazon.awssdk.core.exception.SdkClientException; 47 | import software.amazon.awssdk.services.s3tables.S3TablesClient; 48 | import software.amazon.awssdk.services.s3tables.model.ConflictException; 49 | import software.amazon.awssdk.services.s3tables.model.CreateNamespaceRequest; 50 | import software.amazon.awssdk.services.s3tables.model.CreateTableRequest; 51 | import software.amazon.awssdk.services.s3tables.model.DeleteNamespaceRequest; 52 | import software.amazon.awssdk.services.s3tables.model.DeleteTableRequest; 53 | import software.amazon.awssdk.services.s3tables.model.GetNamespaceRequest; 54 | import software.amazon.awssdk.services.s3tables.model.GetNamespaceResponse; 55 | import software.amazon.awssdk.services.s3tables.model.GetTableMetadataLocationRequest; 56 | import software.amazon.awssdk.services.s3tables.model.GetTableMetadataLocationResponse; 57 | import software.amazon.awssdk.services.s3tables.model.ListNamespacesRequest; 58 | import software.amazon.awssdk.services.s3tables.model.ListNamespacesResponse; 59 | import software.amazon.awssdk.services.s3tables.model.ListTablesRequest; 60 | import software.amazon.awssdk.services.s3tables.model.ListTablesResponse; 61 | import software.amazon.awssdk.services.s3tables.model.NotFoundException; 62 | import software.amazon.awssdk.services.s3tables.model.OpenTableFormat; 63 | import software.amazon.awssdk.services.s3tables.model.RenameTableRequest; 64 | import software.amazon.awssdk.services.s3tables.model.AccessDeniedException; 65 | 66 | import java.io.Closeable; 67 | import java.io.IOException; 68 | import java.util.ArrayList; 69 | import java.util.Collections; 70 | import java.util.List; 71 | import java.util.Map; 72 | import java.util.Set; 73 | import java.util.function.Function; 74 | import java.util.stream.Collectors; 75 | 76 | public class S3TablesCatalog extends BaseMetastoreCatalog 77 | implements Closeable, SupportsNamespaces, Configurable { 78 | 79 | private static final Logger LOG = LoggerFactory.getLogger(S3TablesCatalog.class); 80 | 81 | private String catalogName; 82 | private Map catalogOptions; 83 | private CloseableGroup closeableGroup; 84 | private FileIOTracker fileIOTracker; 85 | 86 | private Object hadoopConf; 87 | private S3TablesClient tablesClient; 88 | 89 | private final S3TablesCatalogConfiguration configuration; 90 | 91 | private static final ImmutableMap S3_TABLES_DEFAULT_PROPERTIES = ImmutableMap.of( 92 | // S3 Tables does not support deleting objects 93 | S3FileIOProperties.DELETE_ENABLED, 94 | "false" 95 | ); 96 | 97 | // must have a no-arg constructor to be dynamically loaded 98 | // initialize(String name, Map properties) will be called to complete initialization 99 | public S3TablesCatalog() { 100 | configuration = new S3TablesCatalogConfiguration(); 101 | } 102 | 103 | public S3TablesCatalog(S3TablesCatalogConfiguration configuration) { 104 | this.configuration = configuration; 105 | } 106 | 107 | @Override 108 | protected boolean isValidIdentifier(TableIdentifier tableIdentifier) { 109 | final Namespace namespace = tableIdentifier.namespace(); 110 | return namespace != null && namespace.length() == 1; 111 | } 112 | 113 | /** 114 | * Overrides loadTable to return an instance of S3TablesTable rather than BaseTable. Some engines use this to detect 115 | * the type of the table and apply S3 Tables-specific behavior. 116 | */ 117 | @Override 118 | public Table loadTable(TableIdentifier identifier) { 119 | Table result; 120 | if (isValidIdentifier(identifier)) { 121 | TableOperations ops = newTableOps(identifier); 122 | if (ops.current() == null) { 123 | // the identifier may be valid for both tables and metadata tables 124 | if (isValidMetadataIdentifier(identifier)) { 125 | result = loadMetadataTable(identifier); 126 | } else { 127 | throw new NoSuchTableException("Table does not exist: %s", identifier); 128 | } 129 | } else { 130 | result = new S3TablesTable(ops, fullTableName(name(), identifier), metricsReporter()); 131 | } 132 | } else if (isValidMetadataIdentifier(identifier)) { 133 | result = loadMetadataTable(identifier); 134 | } else { 135 | throw new NoSuchTableException("Invalid table identifier: %s", identifier); 136 | } 137 | 138 | LOG.info("Table loaded by catalog: {}", result); 139 | return result; 140 | } 141 | 142 | // Copied from BaseMetastoreCatalog, but private there 143 | private boolean isValidMetadataIdentifier(TableIdentifier identifier) { 144 | return MetadataTableType.from(identifier.name()) != null 145 | && isValidIdentifier(TableIdentifier.of(identifier.namespace().levels())); 146 | } 147 | 148 | // Copied from BaseMetastoreCatalog, but private there 149 | private Table loadMetadataTable(TableIdentifier identifier) { 150 | String tableName = identifier.name(); 151 | MetadataTableType type = MetadataTableType.from(tableName); 152 | if (type != null) { 153 | TableIdentifier baseTableIdentifier = TableIdentifier.of(identifier.namespace().levels()); 154 | TableOperations ops = newTableOps(baseTableIdentifier); 155 | if (ops.current() == null) { 156 | throw new NoSuchTableException("Table does not exist: %s", baseTableIdentifier); 157 | } 158 | 159 | return MetadataTableUtils.createMetadataTableInstance( 160 | ops, name(), baseTableIdentifier, identifier, type); 161 | } else { 162 | throw new NoSuchTableException("Table does not exist: %s", identifier); 163 | } 164 | } 165 | 166 | @Override 167 | protected TableOperations newTableOps(TableIdentifier tableIdentifier) { 168 | if (tableIdentifier.namespace() == null || tableIdentifier.namespace().levels().length == 0) { 169 | throw new ValidationException("Namespace can't be null or empty"); 170 | } 171 | validateSingleLevelNamespace(tableIdentifier.namespace()); 172 | String namespaceName = tableIdentifier.namespace().toString(); 173 | String tableName = tableIdentifier.name(); 174 | 175 | S3TablesCatalogOperations s3TablesCatalogOperations = new S3TablesCatalogOperations( 176 | tablesClient, 177 | configuration, 178 | namespaceName, 179 | tableName, 180 | catalogOptions.get(CatalogProperties.WAREHOUSE_LOCATION), 181 | catalogOptions, 182 | hadoopConf 183 | ); 184 | 185 | fileIOTracker.track(s3TablesCatalogOperations); 186 | return s3TablesCatalogOperations; 187 | } 188 | 189 | /** 190 | * TODO: Check if there is a better way to derive the ware house location 191 | * Currently just checking with the Control Plane APIs to find if a table exists, if not then create it. 192 | */ 193 | @Override 194 | protected String defaultWarehouseLocation(TableIdentifier tableIdentifier) { 195 | validateSingleLevelNamespace(tableIdentifier.namespace()); 196 | try { 197 | LOG.debug("Trying to get TableMetadataLocation for namespace: {}, name: {}", tableIdentifier.namespace(), tableIdentifier.name()); 198 | GetTableMetadataLocationResponse getTableMetadataLocationResponse = tablesClient.getTableMetadataLocation(GetTableMetadataLocationRequest.builder() 199 | .name(tableIdentifier.name()) 200 | .namespace(tableIdentifier.namespace().toString()) 201 | .tableBucketARN(catalogOptions.get(CatalogProperties.WAREHOUSE_LOCATION)) 202 | .build()); 203 | 204 | return getTableMetadataLocationResponse.warehouseLocation(); 205 | } catch (NotFoundException ex) { 206 | LOG.info("Table {} does not exist, creating table to retrieve warehouse location", tableIdentifier.name()); 207 | 208 | try { 209 | tablesClient.createTable( 210 | CreateTableRequest.builder() 211 | .tableBucketARN(catalogOptions.get(CatalogProperties.WAREHOUSE_LOCATION)) 212 | .name(tableIdentifier.name()) 213 | .format(OpenTableFormat.ICEBERG) 214 | .namespace(tableIdentifier.namespace().toString()) 215 | .build()); 216 | 217 | } catch (AccessDeniedException e) { 218 | LOG.error("Failed to create table {} due to access denied", tableIdentifier.name(), e); 219 | throw e; 220 | } catch (Exception e) { 221 | LOG.error("Failed to create table {}", tableIdentifier.name(), e); 222 | throw new RuntimeException(e); 223 | } 224 | try { 225 | GetTableMetadataLocationResponse getTableResponse = tablesClient.getTableMetadataLocation( 226 | GetTableMetadataLocationRequest.builder() 227 | .name(tableIdentifier.name()) 228 | .namespace(tableIdentifier.namespace().toString()) 229 | .tableBucketARN(catalogOptions.get(CatalogProperties.WAREHOUSE_LOCATION)) 230 | .build() 231 | ); 232 | return getTableResponse.warehouseLocation(); 233 | } catch (AccessDeniedException e) { 234 | LOG.error("Failed to get table {} due to access denied", tableIdentifier.name(), e); 235 | throw e; 236 | } catch (Exception e) { 237 | LOG.error("Failed to get table {}", tableIdentifier.name(), e); 238 | throw new RuntimeException(e); 239 | } 240 | } 241 | } 242 | 243 | @Override 244 | public void initialize(String name, Map properties) { 245 | LOG.debug("initialize {}", properties); 246 | S3TablesAwsClientFactory clientFactory = S3TablesAwsClientFactories.from(properties); 247 | initialize(name, properties, clientFactory.s3tables()); 248 | } 249 | 250 | @VisibleForTesting 251 | public void initialize(String name, Map properties, S3TablesClient client) { 252 | if (properties.get(CatalogProperties.WAREHOUSE_LOCATION) == null) { 253 | throw new ValidationException("No Warehouse location provided. Please specify the warehouse location, which should be the table bucket ARN"); 254 | } 255 | 256 | validateUnsupportedCatalogProperties(properties); 257 | 258 | this.catalogOptions = ImmutableMap.builder() 259 | .putAll(S3_TABLES_DEFAULT_PROPERTIES) 260 | .putAll(properties) 261 | .buildKeepingLast(); 262 | 263 | this.closeableGroup = new CloseableGroup(); 264 | this.catalogName = name; 265 | this.tablesClient = client; 266 | this.fileIOTracker = new FileIOTracker(); 267 | 268 | closeableGroup.addCloseable(this.tablesClient); 269 | closeableGroup.addCloseable(fileIOTracker); 270 | closeableGroup.setSuppressCloseFailure(true); 271 | } 272 | 273 | /** 274 | * Validate some common properties that aren't supported by S3 Tables. We only log warnings rather than failing 275 | * to preserve potential forward compatibility. 276 | */ 277 | private void validateUnsupportedCatalogProperties(Map properties) { 278 | if (PropertyUtil.propertyAsBoolean(properties, S3FileIOProperties.DELETE_ENABLED, false)) { 279 | LOG.warn("S3 Tables does not support DeleteObject requests; setting {}=true will cause failures", S3FileIOProperties.DELETE_ENABLED); 280 | } 281 | if (!PropertyUtil.propertiesWithPrefix(properties, S3FileIOProperties.DELETE_TAGS_PREFIX).isEmpty()) { 282 | LOG.warn("S3 Tables does not support tagging objects; setting {} properties will cause failures", S3FileIOProperties.DELETE_TAGS_PREFIX); 283 | } 284 | if (!PropertyUtil.propertiesWithPrefix(properties, S3FileIOProperties.WRITE_TAGS_PREFIX).isEmpty()) { 285 | LOG.warn("S3 Tables does not support tagging objects; setting {} properties will cause failures", S3FileIOProperties.WRITE_TAGS_PREFIX); 286 | } 287 | if (PropertyUtil.propertyAsBoolean(properties, S3FileIOProperties.S3_ACCESS_GRANTS_ENABLED, false)) { 288 | LOG.warn("S3 Tables does not support S3 Access Grants; setting {}=true will cause failures", S3FileIOProperties.S3_ACCESS_GRANTS_ENABLED); 289 | } 290 | String sseConfig = PropertyUtil.propertyAsString(properties, S3FileIOProperties.SSE_TYPE, S3FileIOProperties.SSE_TYPE_NONE); 291 | if (!sseConfig.equals(S3FileIOProperties.SSE_TYPE_NONE) && !sseConfig.equals(S3FileIOProperties.SSE_TYPE_S3)) { 292 | LOG.warn("S3 Tables does not support configuring SSE other than SSE-S3; setting {}={} will cause failures", S3FileIOProperties.SSE_TYPE, sseConfig); 293 | } 294 | String aclConfig = properties.get(S3FileIOProperties.ACL); 295 | if (aclConfig != null) { 296 | LOG.warn("S3 Tables does not support ACLs; setting {}={} will cause failures", S3FileIOProperties.ACL, aclConfig); 297 | } 298 | String storageClassConfig = properties.get(S3FileIOProperties.WRITE_STORAGE_CLASS); 299 | if (storageClassConfig != null && !storageClassConfig.equals("STANDARD")) { 300 | LOG.warn("S3 Tables does not support storage classes other than STANDARD; setting {}={} will cause failures", S3FileIOProperties.WRITE_STORAGE_CLASS, storageClassConfig); 301 | } 302 | } 303 | 304 | @Override 305 | public void createNamespace(Namespace namespace, Map metadata) { 306 | validateSingleLevelNamespace(namespace); 307 | LOG.info("Creating namespace {} with metadata {}", namespace, metadata); 308 | try { 309 | tablesClient.createNamespace( 310 | CreateNamespaceRequest.builder() 311 | .tableBucketARN(catalogOptions.get(CatalogProperties.WAREHOUSE_LOCATION)) 312 | .namespace(Collections.singletonList(namespace.toString())) 313 | .build() 314 | ); 315 | } catch (ConflictException ex) { 316 | LOG.debug("Received exception {}", ex.toString()); 317 | LOG.info("Namespace {} already exists", namespace); 318 | throw new AlreadyExistsException("Namespace already exists"); 319 | } 320 | } 321 | 322 | @Override 323 | public List listNamespaces(Namespace namespace) throws NoSuchNamespaceException { 324 | LOG.debug("Listing namespaces for {}", namespace); 325 | if (!namespace.isEmpty()) { 326 | LOG.error("S3TablesCatalog does not support more than 1 level of namespace"); 327 | throw new IllegalArgumentException(String.format("S3TablesCatalog does not support more than 1 level of " + 328 | "namespace, so can only list top-level namespaces, but got: %s", namespace)); 329 | } 330 | List results = new ArrayList<>(); 331 | try { 332 | listWithToken(continuationToken -> { 333 | ListNamespacesResponse response = tablesClient.listNamespaces( 334 | ListNamespacesRequest.builder() 335 | .tableBucketARN(catalogOptions.get(CatalogProperties.WAREHOUSE_LOCATION)) 336 | .build() 337 | ); 338 | results.addAll( 339 | response.namespaces().stream() 340 | .map(namespaceSummary -> Namespace.of(namespaceSummary.namespace().get(0))) 341 | .collect(Collectors.toList()) 342 | ); 343 | return response.continuationToken(); 344 | }); 345 | } catch (AccessDeniedException e) { 346 | LOG.error("Failed to list namespaces due to access denied", e); 347 | throw e; 348 | } catch (Exception e) { 349 | LOG.error("Failed to list namespaces", e); 350 | throw new RuntimeException(e); 351 | } 352 | LOG.debug("Namespace results: {}", results); 353 | return results; 354 | } 355 | 356 | @Override 357 | public Map loadNamespaceMetadata(Namespace namespace) throws NoSuchNamespaceException { 358 | validateSingleLevelNamespace(namespace); 359 | try { 360 | LOG.debug("Loading metadata for {}", namespace); 361 | GetNamespaceResponse getNamespaceResponse = tablesClient.getNamespace( 362 | GetNamespaceRequest.builder() 363 | .namespace(namespace.toString()) 364 | .tableBucketARN(catalogOptions.get(CatalogProperties.WAREHOUSE_LOCATION)) 365 | .build() 366 | ); 367 | LOG.debug("Loaded metadata {}", getNamespaceResponse.toString()); 368 | 369 | return ImmutableMap.of("namespaceName", getNamespaceResponse.toString()); 370 | } catch (NotFoundException ex) { 371 | throw new NoSuchNamespaceException(ex, "Namespace not found!"); 372 | } catch (AccessDeniedException ex) { 373 | LOG.error("Failed to load namespace metadata due to access denied", ex); 374 | throw ex; 375 | } catch(Exception ex) { 376 | LOG.error("Failed to load namespace metadata", ex); 377 | throw new RuntimeException(String.format("Failed to load namespace metadata for %s: %s", ex.getClass().getName(), ex.getMessage())); 378 | } 379 | } 380 | 381 | @Override 382 | public boolean dropNamespace(Namespace namespace) throws NamespaceNotEmptyException { 383 | try { 384 | LOG.debug("Loading namespaces for {} inorder to drop them", namespace); 385 | GetNamespaceResponse getNamespaceResponse = this.tablesClient.getNamespace(GetNamespaceRequest.builder() 386 | .namespace(namespace.toString()).tableBucketARN(catalogOptions.get(CatalogProperties.WAREHOUSE_LOCATION)) 387 | .build()); 388 | 389 | getNamespaceResponse.namespace().forEach(name -> { 390 | LOG.debug("Deleting namespace {}", Namespace.of(name)); 391 | 392 | this.tablesClient.deleteNamespace(DeleteNamespaceRequest.builder() 393 | .namespace(namespace.toString()).tableBucketARN(catalogOptions.get(CatalogProperties.WAREHOUSE_LOCATION)) 394 | .build()); 395 | }); 396 | 397 | return true; 398 | } catch (ConflictException | NamespaceNotEmptyException ex) { 399 | LOG.error("Failed to delete namespace because it is not empty", ex); 400 | throw ex; 401 | } catch (NotFoundException ex) { 402 | LOG.debug("Namespace: {} not found", namespace); 403 | return false; 404 | } 405 | catch (Exception ex) { 406 | LOG.error("Failed to delete namespace", ex); 407 | throw ex; 408 | } 409 | } 410 | 411 | @Override 412 | public boolean setProperties(Namespace namespace, Map properties) throws NoSuchNamespaceException { 413 | Map newProperties = Maps.newHashMap(); 414 | newProperties.putAll(loadNamespaceMetadata(namespace)); 415 | newProperties.putAll(properties); 416 | 417 | // Always successful, otherwise exception is thrown 418 | return true; 419 | } 420 | 421 | @Override 422 | public boolean removeProperties(Namespace namespace, Set properties) throws NoSuchNamespaceException { 423 | Map metadata = Maps.newHashMap(loadNamespaceMetadata(namespace)); 424 | for (String property : properties) { 425 | metadata.remove(property); 426 | } 427 | 428 | // Always successful, otherwise exception is thrown 429 | return true; 430 | } 431 | 432 | @Override 433 | public List listTables(Namespace namespace) { 434 | LOG.debug("Listing tables for {}", namespace); 435 | namespaceExists(namespace); 436 | List results = new ArrayList<>(); 437 | listWithToken(continuationToken -> { 438 | ListTablesResponse response = tablesClient.listTables( 439 | ListTablesRequest.builder() 440 | .tableBucketARN(catalogOptions.get(CatalogProperties.WAREHOUSE_LOCATION)) 441 | .namespace(namespace.level(0)) 442 | .maxTables(100) 443 | .build() 444 | ); 445 | results.addAll( 446 | response.tables().stream() 447 | .map(tableSummary -> TableIdentifier.of(namespace, tableSummary.name())) 448 | .collect(Collectors.toList()) 449 | ); 450 | return response.continuationToken(); 451 | }); 452 | LOG.debug("Found {} tables", results.size()); 453 | return results; 454 | } 455 | 456 | @Override 457 | public boolean dropTable(TableIdentifier identifier, boolean purge) { 458 | LOG.debug("Trying to delete table: {}", identifier); 459 | if (!purge) { 460 | LOG.error("not allowing drop table with purge=false"); 461 | throw new UnsupportedOperationException("S3 Tables does not support the dropTable operation with purge=false. Some versions of Spark always set this flag to false even when running DROP TABLE PURGE commands." + 462 | " You can retry with DROP TABLE PURGE or use the S3 Tables DeleteTable API to delete a table."); 463 | } 464 | try { 465 | validateSingleLevelNamespace(identifier.namespace()); 466 | 467 | tablesClient.deleteTable( 468 | DeleteTableRequest.builder() 469 | .name(identifier.name()) 470 | .tableBucketARN(catalogOptions.get(CatalogProperties.WAREHOUSE_LOCATION)) 471 | .namespace(identifier.namespace().toString()) 472 | .build() 473 | ); 474 | LOG.info("Successfully deleted {}", identifier); 475 | return true; 476 | } catch (NotFoundException ex) { 477 | LOG.info("Table not found" + identifier); 478 | return false; 479 | } catch (AccessDeniedException e) { 480 | LOG.error("Failed to drop table {} due to access denied", identifier.name(), e); 481 | throw e; 482 | } catch (Exception e) { 483 | LOG.error("Failed to drop table {}", identifier, e); 484 | throw new RuntimeException(e); 485 | } 486 | } 487 | 488 | @Override 489 | public void renameTable(TableIdentifier from, TableIdentifier to) { 490 | /* 491 | To comes in with the full namespace which is expected to be 2 levels. 492 | e.g. it comes in as: ice_catalog.namespace_name instead of just namespace_name. 493 | This throws off our normal validateSingleLevelNamespace which normally just accounts for namespace_name. 494 | */ 495 | validateSingleLevelNamespace(to.namespace(), 2); 496 | validateSingleLevelNamespace(from.namespace()); 497 | 498 | LOG.info("Renaming table from {} to {}", from, to); 499 | 500 | String sourceNamespaceName = from.namespace().toString(); 501 | String targetNamespaceName = null; 502 | /* Since Iceberg supports multiple namespace levels (noticed that for target namespace it considered ice_catalog.namespace_name) 503 | instead of just namespace_name. For now deriving the namespace level from the source TableIdentifier, 504 | and choosing the target namespace at the same level. 505 | */ 506 | int sourceNamespaceLevel = from.namespace().levels().length; 507 | int targetNamespaceLevel = to.namespace().levels().length; 508 | 509 | if (targetNamespaceLevel > sourceNamespaceLevel) { 510 | targetNamespaceName = to.namespace().level(sourceNamespaceLevel); 511 | } else { 512 | targetNamespaceName = to.namespace().toString(); 513 | } 514 | try { 515 | tablesClient.renameTable( 516 | RenameTableRequest.builder() 517 | .name(from.name()) 518 | .newName(to.name()) 519 | .namespace(sourceNamespaceName) 520 | .newNamespaceName(targetNamespaceName) 521 | .tableBucketARN(catalogOptions.get(CatalogProperties.WAREHOUSE_LOCATION)) 522 | .build()); 523 | LOG.info("Successfully renamed table from {} to {}", from.name(), to.name()); 524 | } catch (AccessDeniedException e) { 525 | LOG.error("Failed to rename table {} due to access denied", from.name(), e); 526 | throw e; 527 | } catch (AwsServiceException | SdkClientException e ) { 528 | LOG.error("Failed to rename table {}", from, e); 529 | throw new RuntimeException(e); 530 | } 531 | } 532 | 533 | @Override 534 | public void close() throws IOException { 535 | closeableGroup.close(); 536 | } 537 | 538 | @Override 539 | public void setConf(Object configuration) { 540 | hadoopConf = configuration; 541 | } 542 | 543 | @Override 544 | public String name() { 545 | return catalogName; 546 | } 547 | 548 | @VisibleForTesting 549 | static void validateSingleLevelNamespace(Namespace namespace, int maxLength) { 550 | if (namespace != null && namespace.levels().length > maxLength) { 551 | LOG.error("Namespace {} has {} levels and S3 Tables only supports one", namespace, namespace.levels()); 552 | throw new ValidationException( 553 | "S3TablesCatalog does not support more than 1 level of namespace"); 554 | } 555 | } 556 | 557 | @VisibleForTesting 558 | static void validateSingleLevelNamespace(Namespace namespace) { 559 | validateSingleLevelNamespace(namespace, 1); 560 | } 561 | 562 | private static void listWithToken(Function continuationTokenGenerator) { 563 | String continuationToken = null; 564 | do { 565 | continuationToken = continuationTokenGenerator.apply(continuationToken); 566 | } while (continuationToken != null); 567 | } 568 | 569 | @VisibleForTesting 570 | S3TablesClient getS3TablesClient() { 571 | return this.tablesClient; 572 | } 573 | } 574 | -------------------------------------------------------------------------------- /tst/software/amazon/s3tables/iceberg/S3TablesCatalogTest.java: -------------------------------------------------------------------------------- 1 | package software.amazon.s3tables.iceberg; 2 | 3 | import org.apache.iceberg.CatalogProperties; 4 | import org.apache.iceberg.PartitionSpec; 5 | import org.apache.iceberg.StructLike; 6 | import org.apache.iceberg.TableMetadata; 7 | import org.apache.iceberg.TableOperations; 8 | import org.apache.iceberg.TableProperties; 9 | import org.apache.iceberg.aws.AwsProperties; 10 | import org.apache.iceberg.aws.s3.S3FileIO; 11 | import org.apache.iceberg.catalog.Namespace; 12 | import org.apache.iceberg.catalog.TableIdentifier; 13 | import org.apache.iceberg.exceptions.CommitFailedException; 14 | import org.apache.iceberg.exceptions.ValidationException; 15 | import org.apache.iceberg.io.LocationProvider; 16 | import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; 17 | import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet; 18 | import org.apache.iceberg.relocated.com.google.common.collect.Maps; 19 | import org.junit.jupiter.api.BeforeEach; 20 | import org.junit.jupiter.api.Test; 21 | import static org.assertj.core.api.Assertions.assertThat; 22 | 23 | import org.mockito.ArgumentCaptor; 24 | import software.amazon.awssdk.services.s3tables.S3TablesClient; 25 | import software.amazon.awssdk.services.s3tables.model.ConflictException; 26 | import software.amazon.awssdk.services.s3tables.model.CreateNamespaceRequest; 27 | import software.amazon.awssdk.services.s3tables.model.CreateNamespaceResponse; 28 | import software.amazon.awssdk.services.s3tables.model.CreateTableRequest; 29 | import software.amazon.awssdk.services.s3tables.model.CreateTableResponse; 30 | import software.amazon.awssdk.services.s3tables.model.DeleteNamespaceRequest; 31 | import software.amazon.awssdk.services.s3tables.model.DeleteNamespaceResponse; 32 | import software.amazon.awssdk.services.s3tables.model.DeleteTableRequest; 33 | import software.amazon.awssdk.services.s3tables.model.DeleteTableResponse; 34 | import software.amazon.awssdk.services.s3tables.model.GetNamespaceRequest; 35 | import software.amazon.awssdk.services.s3tables.model.GetNamespaceResponse; 36 | import software.amazon.awssdk.services.s3tables.model.GetTableMetadataLocationRequest; 37 | import software.amazon.awssdk.services.s3tables.model.GetTableMetadataLocationResponse; 38 | import software.amazon.awssdk.services.s3tables.model.ListNamespacesRequest; 39 | import software.amazon.awssdk.services.s3tables.model.ListNamespacesResponse; 40 | import software.amazon.awssdk.services.s3tables.model.ListTablesRequest; 41 | import software.amazon.awssdk.services.s3tables.model.ListTablesResponse; 42 | import software.amazon.awssdk.services.s3tables.model.NamespaceSummary; 43 | import software.amazon.awssdk.services.s3tables.model.NotFoundException; 44 | import software.amazon.awssdk.services.s3tables.model.RenameTableRequest; 45 | import software.amazon.awssdk.services.s3tables.model.RenameTableResponse; 46 | import software.amazon.awssdk.services.s3tables.model.TableSummary; 47 | import software.amazon.awssdk.services.s3tables.model.UpdateTableMetadataLocationRequest; 48 | import software.amazon.awssdk.services.s3tables.model.UpdateTableMetadataLocationResponse; 49 | import software.amazon.awssdk.services.s3tables.model.AccessDeniedException; 50 | import software.amazon.s3tables.iceberg.imports.AwsClientProperties; 51 | 52 | import java.util.Arrays; 53 | import java.util.List; 54 | import java.util.Map; 55 | 56 | import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; 57 | import static org.junit.jupiter.api.Assertions.assertThrows; 58 | import static org.mockito.Mockito.times; 59 | import static org.mockito.Mockito.verify; 60 | import static org.mockito.Mockito.when; 61 | import static org.mockito.Mockito.any; 62 | import static org.mockito.Mockito.mock; 63 | 64 | public class S3TablesCatalogTest { 65 | private static final String DUMMY_WAREHOUSE_PATH = "s3://dummy_warehouse"; 66 | private static final String DUMMY_CATALOG_NAME = "s3tables_catalog"; 67 | private static final String DUMMY_ENDPOINT = "http://localhost:9090"; 68 | private static final TableIdentifier DUMMY_IDENTIFIER = TableIdentifier.of("db", "table"); 69 | private static final String DUMMY_NAMESPACE_NAME = "dummy_namespace"; 70 | private static final String DUMMY_ARN_PATH = "arn:aws:s3tables:us-east-2:012345678901:bucket/example/table/"; 71 | private static final AccessDeniedException ACCESS_DENIED = AccessDeniedException.builder() 72 | .message("Access denied") 73 | .build(); 74 | 75 | private S3TablesCatalog catalog; 76 | private S3TablesClient mockClient; 77 | private S3FileIO mockFileIO; 78 | 79 | @BeforeEach 80 | public void setup() { 81 | // To make client construction work without access to IMDS 82 | System.setProperty("aws.region", "us-east-1"); 83 | 84 | mockClient = mock(S3TablesClient.class); 85 | mockFileIO = mock(S3FileIO.class); 86 | catalog = new S3TablesCatalog(); 87 | catalog.initialize(DUMMY_CATALOG_NAME, 88 | ImmutableMap.of(CatalogProperties.WAREHOUSE_LOCATION, DUMMY_WAREHOUSE_PATH), 89 | mockClient); 90 | } 91 | 92 | @Test 93 | public void testThrowsErrorWhenNoWarehouseProvided() { 94 | try { 95 | catalog.initialize(DUMMY_CATALOG_NAME, ImmutableMap.of(), mockClient); 96 | } catch (Exception e) { 97 | assertThat(e instanceof ValidationException).isTrue(); 98 | } 99 | } 100 | 101 | @Test 102 | public void testDefaultWarehouseLocationWhenTableDoesNotExist() { 103 | CreateTableResponse mockCreateTableResponse = mock(CreateTableResponse.class); 104 | GetTableMetadataLocationResponse mockGetTableResponse = mock(GetTableMetadataLocationResponse.class); 105 | TableIdentifier identifier = TableIdentifier.of("dummy-table"); 106 | when(mockClient.createTable(any(CreateTableRequest.class))).thenReturn(mockCreateTableResponse); 107 | when(mockGetTableResponse.warehouseLocation()) 108 | .thenReturn(String.format("%s/%s", DUMMY_WAREHOUSE_PATH, identifier.name())); 109 | when(mockClient.getTableMetadataLocation(any(GetTableMetadataLocationRequest.class))) 110 | .thenThrow(NotFoundException.class) 111 | .thenReturn(mockGetTableResponse); 112 | String whLocation = catalog.defaultWarehouseLocation(identifier); 113 | assertThat(whLocation).isEqualTo(String.format("%s/%s", DUMMY_WAREHOUSE_PATH, identifier.name())); 114 | } 115 | 116 | @Test 117 | public void testDefaultWarehouseLocationWhenTableExists() { 118 | GetTableMetadataLocationResponse mockGetTableResponse = mock(GetTableMetadataLocationResponse.class); 119 | TableIdentifier identifier = TableIdentifier.of("dummy-table"); 120 | when(mockGetTableResponse.warehouseLocation()) 121 | .thenReturn(String.format("%s/%s", DUMMY_WAREHOUSE_PATH, identifier.name())); 122 | when(mockClient.getTableMetadataLocation(any(GetTableMetadataLocationRequest.class))).thenReturn(mockGetTableResponse); 123 | String whLocation = catalog.defaultWarehouseLocation(identifier); 124 | assertThat(whLocation).isEqualTo(String.format("%s/%s", DUMMY_WAREHOUSE_PATH, identifier.name())); 125 | verify(mockClient, times(0)).createTable(any(CreateTableRequest.class)); 126 | } 127 | 128 | @Test 129 | public void testListTables() { 130 | Namespace namespace = Namespace.of(DUMMY_NAMESPACE_NAME); 131 | GetNamespaceResponse mockNamespaceresponse = mock(GetNamespaceResponse.class); 132 | when(mockNamespaceresponse.toString()).thenReturn(namespace.toString()); 133 | when(mockClient.getNamespace(any(GetNamespaceRequest.class))) 134 | .thenReturn(mockNamespaceresponse); 135 | 136 | TableSummary[] tableSummaryResponses = new TableSummary[]{ 137 | TableSummary.builder().tableARN(DUMMY_ARN_PATH+"table1").name("table1").build(), 138 | TableSummary.builder().tableARN(DUMMY_ARN_PATH+"table2").name("table2").build(), 139 | TableSummary.builder().tableARN(DUMMY_ARN_PATH+"table3").name("table3").build(), 140 | TableSummary.builder().tableARN(DUMMY_ARN_PATH+"table4").name("table4").build(), 141 | TableSummary.builder().tableARN(DUMMY_ARN_PATH+"table5").name("table5").build()}; 142 | 143 | ListTablesResponse response1 = ListTablesResponse.builder() 144 | .tables(Arrays.asList(tableSummaryResponses[0], tableSummaryResponses[1], tableSummaryResponses[2])) 145 | .continuationToken("token") 146 | .build(); 147 | 148 | ListTablesResponse response2 = ListTablesResponse.builder() 149 | .tables(Arrays.asList(tableSummaryResponses[3], tableSummaryResponses[4])) 150 | .build(); 151 | 152 | when(mockClient.listTables(any(ListTablesRequest.class))).thenReturn(response1).thenReturn(response2); 153 | 154 | List identifierList = catalog.listTables(namespace); 155 | verify(mockClient, times(2)).listTables(any(ListTablesRequest.class)); 156 | assertThat(identifierList.size()).isEqualTo(5); 157 | 158 | for(int i =0; i < identifierList.size(); i++) { 159 | assertThat(identifierList.get(i).name()).isEqualTo(tableSummaryResponses[i].name()); 160 | } 161 | } 162 | 163 | @Test 164 | public void testCreateNamespace() { 165 | when(mockClient.createNamespace(any(CreateNamespaceRequest.class))) 166 | .thenReturn(CreateNamespaceResponse.builder() 167 | .namespace(DUMMY_NAMESPACE_NAME) 168 | .tableBucketARN(DUMMY_ARN_PATH+"DUMMY_NAME") 169 | .build()); 170 | catalog.createNamespace(Namespace.of(DUMMY_NAMESPACE_NAME), ImmutableMap.of()); 171 | verify(mockClient, times(1)).createNamespace(any(CreateNamespaceRequest.class)); 172 | } 173 | 174 | @Test 175 | public void testListNamespaces() { 176 | NamespaceSummary[] namespaceSummaryResponses = new NamespaceSummary[] { 177 | NamespaceSummary.builder().namespace("ns1").build(), 178 | NamespaceSummary.builder().namespace("ns3").build(), 179 | NamespaceSummary.builder().namespace("ns5").build(), 180 | NamespaceSummary.builder().namespace("ns6").build(), 181 | NamespaceSummary.builder().namespace("ns7").build(), 182 | NamespaceSummary.builder().namespace("ns8").build() 183 | }; 184 | 185 | when(mockClient.listNamespaces(any(ListNamespacesRequest.class))) 186 | .thenReturn(ListNamespacesResponse.builder() 187 | .namespaces(Arrays.asList(namespaceSummaryResponses[0], namespaceSummaryResponses[1], namespaceSummaryResponses[2])) 188 | .continuationToken("token") 189 | .build()) 190 | .thenReturn(ListNamespacesResponse.builder() 191 | .namespaces(Arrays.asList(namespaceSummaryResponses[3], namespaceSummaryResponses[4], namespaceSummaryResponses[5] 192 | )).build()); 193 | 194 | List namespaceList = catalog.listNamespaces(Namespace.empty()); 195 | verify(mockClient, times(2)).listNamespaces(any(ListNamespacesRequest.class)); 196 | assertThat(namespaceList.size()).isEqualTo(6); 197 | 198 | for(int i =0; i < namespaceList.size(); i++) { 199 | assertThat(namespaceList.get(i).levels()[0].equals(namespaceSummaryResponses[i].namespace().get(0))); 200 | } 201 | } 202 | 203 | @Test 204 | public void testDropTable() { 205 | when(mockClient.getTableMetadataLocation(any(GetTableMetadataLocationRequest.class))).thenReturn(mock(GetTableMetadataLocationResponse.class)); 206 | when(mockClient.deleteTable(any(DeleteTableRequest.class))) 207 | .thenReturn(DeleteTableResponse.builder().build()); 208 | assertThat(catalog.dropTable(TableIdentifier.of("dummy-table"), true)).isTrue(); 209 | } 210 | 211 | @Test 212 | public void testDropTableWithoutPurgeThrowsException() { 213 | when(mockClient.getTableMetadataLocation(any(GetTableMetadataLocationRequest.class))).thenReturn(mock(GetTableMetadataLocationResponse.class)); 214 | when(mockClient.deleteTable(any(DeleteTableRequest.class))) 215 | .thenReturn(DeleteTableResponse.builder().build()); 216 | assertThrows(UnsupportedOperationException.class, () -> catalog.dropTable(TableIdentifier.of("dummy-table"), false)); 217 | } 218 | 219 | @Test 220 | public void testRenameTable() { 221 | ArgumentCaptor requestCaptor = ArgumentCaptor.forClass(RenameTableRequest.class); 222 | 223 | when(mockClient.getTableMetadataLocation(any(GetTableMetadataLocationRequest.class))).thenReturn(mock(GetTableMetadataLocationResponse.class)); 224 | when(mockClient.updateTableMetadataLocation(any(UpdateTableMetadataLocationRequest.class))).thenReturn(mock(UpdateTableMetadataLocationResponse.class)); 225 | 226 | when(mockClient.renameTable(any(RenameTableRequest.class))).thenReturn(mock(RenameTableResponse.class)); 227 | catalog.renameTable(TableIdentifier.of("sourcenamespace","sourcetable"), 228 | TableIdentifier.of("targetnamespace", "targettable")); 229 | verify(mockClient, times(1)).renameTable(requestCaptor.capture()); 230 | RenameTableRequest capturedRequest = requestCaptor.getValue(); 231 | assertThat(capturedRequest.newNamespaceName()).isEqualTo("targetnamespace"); 232 | assertThat(capturedRequest.newName()).isEqualTo("targettable"); 233 | } 234 | 235 | @Test 236 | public void testSetProperties() { 237 | when(mockClient.getNamespace(any(GetNamespaceRequest.class))) 238 | .thenReturn(GetNamespaceResponse.builder() 239 | .namespace(DUMMY_NAMESPACE_NAME).build()); 240 | assertThat(catalog.setProperties(Namespace.of(DUMMY_NAMESPACE_NAME), ImmutableMap.of())).isTrue(); 241 | } 242 | 243 | @Test 244 | public void testRemoveProperties() { 245 | when(mockClient.getNamespace(any(GetNamespaceRequest.class))) 246 | .thenReturn(GetNamespaceResponse.builder() 247 | .namespace(DUMMY_NAMESPACE_NAME).build()); 248 | assertThat(catalog.removeProperties(Namespace.of(DUMMY_NAMESPACE_NAME), ImmutableSet.of())).isTrue(); 249 | } 250 | 251 | @Test 252 | public void testTableLevelS3Properties() { 253 | Map properties = 254 | ImmutableMap.of( 255 | S3TablesProperties.S3TABLES_ENDPOINT, 256 | DUMMY_ENDPOINT, 257 | AwsClientProperties.CLIENT_REGION, 258 | "us-east-1", 259 | AwsClientProperties.CLIENT_CREDENTIALS_PROVIDER, 260 | DUMMY_ENDPOINT + "authorizer", 261 | CatalogProperties.WAREHOUSE_LOCATION, DUMMY_WAREHOUSE_PATH); 262 | 263 | catalog.initialize(DUMMY_CATALOG_NAME, properties, mockClient); 264 | S3TablesCatalogOperations ops = (S3TablesCatalogOperations) catalog.newTableOps(DUMMY_IDENTIFIER); 265 | 266 | Map tableCatalogProperties = ops.tableCatalogProperties(); 267 | 268 | assertThat(tableCatalogProperties) 269 | .containsEntry( 270 | S3TablesProperties.S3TABLES_ENDPOINT, 271 | DUMMY_ENDPOINT) 272 | .containsEntry( 273 | AwsClientProperties.CLIENT_REGION, 274 | "us-east-1").containsEntry(AwsClientProperties.CLIENT_CREDENTIALS_PROVIDER, 275 | DUMMY_ENDPOINT + "authorizer"); 276 | } 277 | 278 | @Test 279 | public void testDefaultLocationProvider() { 280 | TableOperations mockTableOperations = mock(S3TablesCatalogOperations.class); 281 | TableMetadata mockTableMetadata = mock(TableMetadata.class); 282 | when(mockTableMetadata.location()).thenReturn("s3://amzn-s3-demo-dummybucket/"); 283 | when(mockTableMetadata.properties()).thenReturn(ImmutableMap.of()); 284 | when(mockTableOperations.current()).thenReturn(mockTableMetadata); 285 | when(mockTableOperations.locationProvider()).thenCallRealMethod(); 286 | LocationProvider locationProvider = mockTableOperations.locationProvider(); 287 | assertThat(locationProvider instanceof S3TablesLocationProvider).isTrue(); 288 | } 289 | 290 | public static class TestLocationProvider implements LocationProvider { 291 | public TestLocationProvider() {} 292 | 293 | @Override 294 | public String newDataLocation(String s) { 295 | return "dummylocation"; 296 | } 297 | 298 | @Override 299 | public String newDataLocation(PartitionSpec partitionSpec, StructLike structLike, String s) { 300 | return "dummylocation"; 301 | } 302 | } 303 | 304 | @Test 305 | public void testOverriddenLocationProvider() { 306 | TableOperations mockTableOperations = mock(S3TablesCatalogOperations.class); 307 | TableMetadata mockTableMetadata = mock(TableMetadata.class); 308 | when(mockTableMetadata.location()).thenReturn("s3://amzn-s3-demo-dummybucket/"); 309 | when(mockTableMetadata.properties()).thenReturn(ImmutableMap.of(TableProperties.WRITE_LOCATION_PROVIDER_IMPL, "software.amazon.s3tables.iceberg.S3TablesCatalogTest$TestLocationProvider")); 310 | when(mockTableOperations.current()).thenReturn(mockTableMetadata); 311 | when(mockTableOperations.locationProvider()).thenCallRealMethod(); 312 | LocationProvider locationProvider = mockTableOperations.locationProvider(); 313 | assertThat(locationProvider instanceof TestLocationProvider).isTrue(); 314 | } 315 | 316 | @Test 317 | public void testObjectStoreEnabledLocationProvider() { 318 | TableOperations mockTableOperations = mock(S3TablesCatalogOperations.class); 319 | TableMetadata mockTableMetadata = mock(TableMetadata.class); 320 | when(mockTableMetadata.location()).thenReturn("s3://amzn-s3-demo-dummybucket/"); 321 | when(mockTableMetadata.properties()).thenReturn(ImmutableMap.of(TableProperties.OBJECT_STORE_ENABLED, "true")); 322 | when(mockTableOperations.current()).thenReturn(mockTableMetadata); 323 | when(mockTableOperations.locationProvider()).thenCallRealMethod(); 324 | LocationProvider locationProvider = mockTableOperations.locationProvider(); 325 | assertThat(locationProvider instanceof S3TablesLocationProvider).isTrue(); 326 | } 327 | 328 | @Test 329 | public void testObjectStoreDisabledLocationProvider() { 330 | TableOperations mockTableOperations = mock(S3TablesCatalogOperations.class); 331 | TableMetadata mockTableMetadata = mock(TableMetadata.class); 332 | when(mockTableMetadata.location()).thenReturn("s3://amzn-s3-demo-dummybucket/"); 333 | when(mockTableMetadata.properties()).thenReturn(ImmutableMap.of(TableProperties.OBJECT_STORE_ENABLED, "false")); 334 | when(mockTableOperations.current()).thenReturn(mockTableMetadata); 335 | when(mockTableOperations.locationProvider()).thenCallRealMethod(); 336 | LocationProvider locationProvider = mockTableOperations.locationProvider(); 337 | // Iceberg's default location providers aren't public 338 | assertThat(locationProvider.getClass().getName()).isEqualTo("org.apache.iceberg.LocationProviders$DefaultLocationProvider"); 339 | } 340 | 341 | @Test 342 | public void testDeleteNamespace() { 343 | Namespace namespace = Namespace.of(DUMMY_NAMESPACE_NAME); 344 | GetNamespaceResponse mockNamespaceresponse = mock(GetNamespaceResponse.class); 345 | when(mockNamespaceresponse.toString()).thenReturn(namespace.toString()); 346 | when(mockNamespaceresponse.namespace()).thenReturn(Arrays.asList(DUMMY_NAMESPACE_NAME)); 347 | when(mockClient.getNamespace(any(GetNamespaceRequest.class))) 348 | .thenReturn(mockNamespaceresponse); 349 | when(mockClient.deleteNamespace(any(DeleteNamespaceRequest.class))) 350 | .thenReturn(DeleteNamespaceResponse.builder().build()); 351 | 352 | assertThat(catalog.dropNamespace(Namespace.of(DUMMY_NAMESPACE_NAME))).isTrue(); 353 | verify(mockClient, times(1)).deleteNamespace(any(DeleteNamespaceRequest.class)); 354 | } 355 | 356 | @Test 357 | public void testDeleteNamespaceThrowsExpectedErrors() { 358 | Namespace namespace = Namespace.of(DUMMY_NAMESPACE_NAME); 359 | GetNamespaceResponse mockNamespaceresponse = mock(GetNamespaceResponse.class); 360 | when(mockNamespaceresponse.toString()).thenReturn(namespace.toString()); 361 | when(mockNamespaceresponse.namespace()).thenReturn(Arrays.asList(DUMMY_NAMESPACE_NAME)); 362 | when(mockClient.getNamespace(any(GetNamespaceRequest.class))) 363 | .thenReturn(mockNamespaceresponse); 364 | when(mockClient.deleteNamespace(any(DeleteNamespaceRequest.class))) 365 | .thenThrow(ConflictException.builder().build()); 366 | 367 | assertThrows(ConflictException.class, () -> catalog.dropNamespace(Namespace.of(DUMMY_NAMESPACE_NAME))); 368 | verify(mockClient, times(1)).deleteNamespace(any(DeleteNamespaceRequest.class)); 369 | 370 | when(mockClient.deleteNamespace(any(DeleteNamespaceRequest.class))) 371 | .thenThrow(new IllegalArgumentException("blah")); 372 | 373 | assertThrows(IllegalArgumentException.class, () -> catalog.dropNamespace(Namespace.of(DUMMY_NAMESPACE_NAME))); 374 | } 375 | 376 | @Test 377 | public void testValidateSingleLevelNamespace() { 378 | assertDoesNotThrow(() -> S3TablesCatalog.validateSingleLevelNamespace(Namespace.of("level1", "level2"), 2)); 379 | assertThrows(ValidationException.class,() -> S3TablesCatalog.validateSingleLevelNamespace(Namespace.of("level1", "level2"))); 380 | } 381 | 382 | @Test 383 | public void testUpdateMetadataExceptionHandling() { 384 | // simulate an empty table to avoid TableMetadata reading 385 | GetTableMetadataLocationResponse getTableResponse = mock(GetTableMetadataLocationResponse.class); 386 | when(getTableResponse.metadataLocation()).thenReturn(null); 387 | when(mockClient.getTableMetadataLocation(any(GetTableMetadataLocationRequest.class))) 388 | .thenThrow(NotFoundException.class) 389 | .thenReturn(getTableResponse); 390 | 391 | when(mockClient.updateTableMetadataLocation(any(UpdateTableMetadataLocationRequest.class))) 392 | .thenThrow(ConflictException.class); 393 | TableMetadata newTableMetadata = mock(TableMetadata.class); 394 | when(newTableMetadata.metadataFileLocation()).thenReturn("x"); 395 | 396 | assertThrows( 397 | CommitFailedException.class, 398 | () -> catalog.newTableOps(TableIdentifier.of("a", "b")) 399 | .commit(null, newTableMetadata) 400 | ); 401 | } 402 | 403 | @Test 404 | public void testInitializeClientFactory() { 405 | S3TablesCatalog catalog = new S3TablesCatalog(); 406 | catalog.initialize("testcatalog", ImmutableMap.of(CatalogProperties.WAREHOUSE_LOCATION, DUMMY_WAREHOUSE_PATH)); 407 | } 408 | 409 | static class TestS3TablesClient implements S3TablesClient { 410 | @Override 411 | public String serviceName() { 412 | return "s3tables"; 413 | } 414 | 415 | @Override 416 | public void close() {} 417 | } 418 | 419 | static class TestS3TablesAwsClientFactory implements S3TablesAwsClientFactory { 420 | @Override 421 | public S3TablesClient s3tables() { 422 | return new TestS3TablesClient(); 423 | } 424 | 425 | @Override 426 | public void initialize(Map properties) {} 427 | } 428 | 429 | @Test 430 | public void testInitializeClientFactoryCustom() { 431 | S3TablesCatalog catalog = new S3TablesCatalog(); 432 | catalog.initialize("testcatalog", ImmutableMap.of( 433 | CatalogProperties.WAREHOUSE_LOCATION, DUMMY_WAREHOUSE_PATH, 434 | S3TablesProperties.CLIENT_FACTORY, TestS3TablesAwsClientFactory.class.getName())); 435 | 436 | S3TablesClient client = catalog.getS3TablesClient(); 437 | assertThat(client).isInstanceOf(TestS3TablesClient.class); 438 | } 439 | 440 | @Test 441 | public void testAssumeRoleAwsClientFactory() { 442 | Map properties = Maps.newHashMap(); 443 | properties.put(S3TablesProperties.CLIENT_FACTORY, S3TablesAssumeRoleAwsClientFactory.class.getName()); 444 | properties.put(AwsProperties.CLIENT_ASSUME_ROLE_ARN, "arn::dummyarn"); 445 | properties.put(AwsProperties.CLIENT_ASSUME_ROLE_REGION, "us-west-2"); 446 | S3TablesAwsClientFactory clientFactory = S3TablesAwsClientFactories.from(properties); 447 | assertThat(clientFactory).isInstanceOf(S3TablesAssumeRoleAwsClientFactory.class); 448 | } 449 | 450 | @Test 451 | public void testAccessDeniedExceptionPropagatedOnListNamespaces() { 452 | when(mockClient.listNamespaces(any(ListNamespacesRequest.class))) 453 | .thenThrow(ACCESS_DENIED); 454 | 455 | AccessDeniedException thrown = assertThrows(AccessDeniedException.class, 456 | () -> catalog.listNamespaces(Namespace.empty())); 457 | assertThat(thrown).isSameAs(ACCESS_DENIED); 458 | } 459 | 460 | @Test 461 | public void testAccessDeniedExceptionPropagatedOnLoadNamespaceMetadata() { 462 | when(mockClient.getNamespace(any(GetNamespaceRequest.class))) 463 | .thenThrow(ACCESS_DENIED); 464 | 465 | AccessDeniedException thrown = assertThrows(AccessDeniedException.class, 466 | () -> catalog.loadNamespaceMetadata(Namespace.of("test"))); 467 | assertThat(thrown).isSameAs(ACCESS_DENIED); 468 | } 469 | 470 | @Test 471 | public void testAccessDeniedExceptionPropagatedOnDropTable() { 472 | when(mockClient.deleteTable(any(DeleteTableRequest.class))) 473 | .thenThrow(ACCESS_DENIED); 474 | 475 | AccessDeniedException thrown = assertThrows(AccessDeniedException.class, 476 | () -> catalog.dropTable(TableIdentifier.of("test", "table"), true)); 477 | assertThat(thrown).isSameAs(ACCESS_DENIED); 478 | } 479 | 480 | @Test 481 | public void testAccessDeniedExceptionPropagatedOnDefaultWarehouseLocation() { 482 | when(mockClient.getTableMetadataLocation(any(GetTableMetadataLocationRequest.class))) 483 | .thenThrow(NotFoundException.class); 484 | when(mockClient.createTable(any(CreateTableRequest.class))) 485 | .thenThrow(ACCESS_DENIED); 486 | 487 | AccessDeniedException thrown = assertThrows(AccessDeniedException.class, 488 | () -> catalog.defaultWarehouseLocation(TableIdentifier.of("test"))); 489 | assertThat(thrown).isSameAs(ACCESS_DENIED); 490 | } 491 | 492 | @Test 493 | public void testAccessDeniedExceptionPropagatedOnCreateNamespace() { 494 | when(mockClient.createNamespace(any(CreateNamespaceRequest.class))) 495 | .thenThrow(ACCESS_DENIED); 496 | 497 | AccessDeniedException thrown = assertThrows(AccessDeniedException.class, 498 | () -> catalog.createNamespace(Namespace.of("test"), ImmutableMap.of())); 499 | assertThat(thrown).isSameAs(ACCESS_DENIED); 500 | } 501 | 502 | @Test 503 | public void testAccessDeniedExceptionPropagatedOnRenameTable() { 504 | when(mockClient.renameTable(any(RenameTableRequest.class))) 505 | .thenThrow(ACCESS_DENIED); 506 | 507 | AccessDeniedException thrown = assertThrows(AccessDeniedException.class, 508 | () -> catalog.renameTable(TableIdentifier.of("test", "table1"), TableIdentifier.of("test", "table2"))); 509 | assertThat(thrown).isSameAs(ACCESS_DENIED); 510 | } 511 | 512 | @Test 513 | public void testAccessDeniedExceptionPropagatedOnDropNamespace() { 514 | GetNamespaceResponse mockNamespaceResponse = mock(GetNamespaceResponse.class); 515 | when(mockNamespaceResponse.namespace()).thenReturn(Arrays.asList("test")); 516 | when(mockClient.getNamespace(any(GetNamespaceRequest.class))) 517 | .thenReturn(mockNamespaceResponse); 518 | when(mockClient.deleteNamespace(any(DeleteNamespaceRequest.class))) 519 | .thenThrow(ACCESS_DENIED); 520 | 521 | AccessDeniedException thrown = assertThrows(AccessDeniedException.class, 522 | () -> catalog.dropNamespace(Namespace.of("test"))); 523 | assertThat(thrown).isSameAs(ACCESS_DENIED); 524 | } 525 | 526 | @Test 527 | public void testAccessDeniedExceptionPropagatedOnListTables() { 528 | GetNamespaceResponse mockNamespaceResponse = mock(GetNamespaceResponse.class); 529 | when(mockNamespaceResponse.toString()).thenReturn("test"); 530 | when(mockClient.getNamespace(any(GetNamespaceRequest.class))) 531 | .thenReturn(mockNamespaceResponse); 532 | when(mockClient.listTables(any(ListTablesRequest.class))) 533 | .thenThrow(ACCESS_DENIED); 534 | 535 | AccessDeniedException thrown = assertThrows(AccessDeniedException.class, 536 | () -> catalog.listTables(Namespace.of("test"))); 537 | assertThat(thrown).isSameAs(ACCESS_DENIED); 538 | } 539 | 540 | @Test 541 | public void testAccessDeniedExceptionPropagatedOnGetTableMetadataLocation() { 542 | when(mockClient.getTableMetadataLocation(any(GetTableMetadataLocationRequest.class))) 543 | .thenThrow(ACCESS_DENIED); 544 | 545 | S3TablesCatalogOperations ops = (S3TablesCatalogOperations) catalog.newTableOps(TableIdentifier.of("test", "table")); 546 | AccessDeniedException thrown = assertThrows(AccessDeniedException.class, 547 | () -> ops.doRefresh()); 548 | assertThat(thrown).isSameAs(ACCESS_DENIED); 549 | } 550 | 551 | @Test 552 | public void testAccessDeniedExceptionPropagatedOnUpdateTableMetadataLocation() { 553 | GetTableMetadataLocationResponse getTableResponse = mock(GetTableMetadataLocationResponse.class); 554 | when(getTableResponse.metadataLocation()).thenReturn("s3://test/metadata.json"); 555 | when(getTableResponse.versionToken()).thenReturn("token123"); 556 | when(mockClient.getTableMetadataLocation(any(GetTableMetadataLocationRequest.class))) 557 | .thenReturn(getTableResponse); 558 | when(mockClient.updateTableMetadataLocation(any(UpdateTableMetadataLocationRequest.class))) 559 | .thenThrow(ACCESS_DENIED); 560 | 561 | S3TablesCatalogOperations ops = (S3TablesCatalogOperations) catalog.newTableOps(TableIdentifier.of("test", "table")); 562 | TableMetadata mockMetadata = mock(TableMetadata.class); 563 | when(mockMetadata.metadataFileLocation()).thenReturn("s3://test/new-metadata.json"); 564 | 565 | AccessDeniedException thrown = assertThrows(AccessDeniedException.class, 566 | () -> ops.doCommit(null, mockMetadata)); 567 | assertThat(thrown).isSameAs(ACCESS_DENIED); 568 | } 569 | } 570 | --------------------------------------------------------------------------------