├── NOTICE ├── .github └── PULL_REQUEST_TEMPLATE.md ├── CODE_OF_CONDUCT.md ├── aws-glue-datacatalog-client-common ├── src │ ├── main │ │ └── java │ │ │ └── com │ │ │ └── amazonaws │ │ │ └── glue │ │ │ └── catalog │ │ │ ├── metastore │ │ │ ├── AWSCredentialsProviderFactory.java │ │ │ ├── GlueClientFactory.java │ │ │ ├── ExecutorServiceFactory.java │ │ │ ├── DefaultAWSCredentialsProviderFactory.java │ │ │ ├── DefaultExecutorServiceFactory.java │ │ │ ├── AWSGlueMetastoreFactory.java │ │ │ ├── SessionCredentialsProviderFactory.java │ │ │ ├── AWSGlueMetastore.java │ │ │ ├── AWSGlueClientFactory.java │ │ │ ├── AWSGlueMetastoreCacheDecorator.java │ │ │ └── AWSGlueMetastoreBaseDecorator.java │ │ │ ├── exceptions │ │ │ ├── LakeFormationException.java │ │ │ └── InvalidPartitionNameException.java │ │ │ ├── converters │ │ │ ├── ConverterUtils.java │ │ │ ├── CatalogToHiveConverterFactory.java │ │ │ ├── CatalogToHiveConverter.java │ │ │ ├── Hive3CatalogToHiveConverter.java │ │ │ ├── GlueInputConverter.java │ │ │ └── PartitionNameParser.java │ │ │ └── util │ │ │ ├── LoggingHelper.java │ │ │ ├── PartitionKey.java │ │ │ ├── PartitionUtils.java │ │ │ ├── AWSGlueConfig.java │ │ │ ├── HiveTableValidator.java │ │ │ ├── MetastoreClientUtils.java │ │ │ ├── BatchDeletePartitionsHelper.java │ │ │ └── BatchCreatePartitionsHelper.java │ └── test │ │ └── java │ │ └── com │ │ └── amazonaws │ │ └── glue │ │ └── catalog │ │ ├── converters │ │ ├── ConverterUtilsTest.java │ │ ├── CatalogToHiveConverterFactoryTest.java │ │ ├── Hive3CatalogToHiveConverterTest.java │ │ └── GlueInputConverterTest.java │ │ ├── util │ │ ├── LoggingHelperTest.java │ │ ├── PartitionUtilsTest.java │ │ ├── GlueTestClientFactory.java │ │ ├── ExprBuilder.java │ │ ├── HiveTableValidatorTest.java │ │ ├── PartitionKeyTest.java │ │ └── MetastoreClientUtilsTest.java │ │ └── metastore │ │ ├── AWSGlueMetastoreFactoryTest.java │ │ ├── AWSGlueClientFactoryTest.java │ │ └── AWSGlueMetastoreCacheDecoratorTest.java └── pom.xml ├── .gitignore ├── shims ├── pom.xml ├── hive3-shims │ ├── pom.xml │ └── src │ │ └── main │ │ └── java │ │ └── com.amazonaws.glue.shims │ │ └── AwsGlueHive3Shims.java ├── loader │ ├── src │ │ └── main │ │ │ └── java │ │ │ └── com │ │ │ └── amazonaws │ │ │ └── glue │ │ │ └── shims │ │ │ └── ShimsLoader.java │ ├── test │ │ └── main │ │ │ └── java │ │ │ └── com │ │ │ └── amazonaws │ │ │ └── glue │ │ │ └── shims │ │ │ └── ShimsLoaderTest.java │ └── pom.xml ├── spark-hive-shims │ ├── pom.xml │ └── src │ │ └── main │ │ └── java │ │ └── com │ │ └── amazonaws │ │ └── glue │ │ └── shims │ │ └── AwsGlueSparkHiveShims.java └── common │ ├── pom.xml │ └── src │ └── main │ └── java │ └── com │ └── amazonaws │ └── glue │ └── shims │ └── AwsGlueHiveShims.java ├── aws-glue-datacatalog-hive3-client ├── src │ ├── main │ │ └── java │ │ │ └── com │ │ │ └── amazonaws │ │ │ └── glue │ │ │ └── catalog │ │ │ └── metastore │ │ │ └── AWSGlueDataCatalogHiveClientFactory.java │ └── test │ │ └── java │ │ └── com │ │ └── amazonaws │ │ └── glue │ │ └── catalog │ │ └── metastore │ │ └── HiveAWSCatalogMetastoreClientTest.java └── pom.xml ├── aws-glue-datacatalog-spark-client ├── src │ ├── main │ │ └── java │ │ │ └── com │ │ │ └── amazonaws │ │ │ └── glue │ │ │ └── catalog │ │ │ └── metastore │ │ │ └── AWSGlueDataCatalogHiveClientFactory.java │ └── test │ │ └── java │ │ └── com │ │ └── amazonaws │ │ └── glue │ │ └── catalog │ │ └── metastore │ │ ├── SparkCatalogMetastoreClientTest.java │ │ └── integrationtest │ │ ├── MultipleCatalogDatabaseIntegrationTest.java │ │ ├── MultipleCatalogIntegrationTestBase.java │ │ ├── MultipleCatalogTableIntegrationTest.java │ │ ├── MultipleCatalogUserDefinedFunctionIntegrationTest.java │ │ └── MetastoreClientDatabaseIntegrationTest.java └── pom.xml ├── dev-support └── check_style.xml ├── CONTRIBUTING.md ├── pom.xml └── README.md /NOTICE: -------------------------------------------------------------------------------- 1 | AWS Glue Catalog Client For Apache Hive Metastore 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | *Issue #, if available:* 2 | 3 | *Description of changes:* 4 | 5 | 6 | By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. 7 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/metastore/AWSCredentialsProviderFactory.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.catalog.metastore; 2 | 3 | import org.apache.hadoop.conf.Configuration; 4 | 5 | import com.amazonaws.auth.AWSCredentialsProvider; 6 | 7 | public interface AWSCredentialsProviderFactory { 8 | 9 | AWSCredentialsProvider buildAWSCredentialsProvider(Configuration conf); 10 | } 11 | -------------------------------------------------------------------------------- /aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/exceptions/LakeFormationException.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.catalog.exceptions; 2 | 3 | public class LakeFormationException extends RuntimeException { 4 | 5 | public LakeFormationException(String message) { 6 | super(message); 7 | } 8 | 9 | public LakeFormationException(String message, Throwable cause) { 10 | super(message, cause); 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/metastore/GlueClientFactory.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.catalog.metastore; 2 | 3 | import com.amazonaws.services.glue.AWSGlue; 4 | import org.apache.hadoop.hive.metastore.api.MetaException; 5 | 6 | /*** 7 | * Interface for creating Glue AWS Client 8 | */ 9 | public interface GlueClientFactory { 10 | 11 | AWSGlue newClient() throws MetaException; 12 | 13 | } 14 | -------------------------------------------------------------------------------- /aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/metastore/ExecutorServiceFactory.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.catalog.metastore; 2 | 3 | import org.apache.hadoop.conf.Configuration; 4 | 5 | import java.util.concurrent.ExecutorService; 6 | 7 | /* 8 | * Interface for creating an ExecutorService 9 | */ 10 | public interface ExecutorServiceFactory { 11 | public ExecutorService getExecutorService(Configuration conf); 12 | } 13 | -------------------------------------------------------------------------------- /aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/exceptions/InvalidPartitionNameException.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.catalog.exceptions; 2 | 3 | public class InvalidPartitionNameException extends RuntimeException { 4 | 5 | public InvalidPartitionNameException(String message) { 6 | super(message); 7 | } 8 | 9 | public InvalidPartitionNameException(String message, Throwable cause) { 10 | super(message, cause); 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled class file 2 | *.class 3 | 4 | # Log file 5 | *.log 6 | *.iml 7 | 8 | # Package Files # 9 | *.jar 10 | *.war 11 | *.nar 12 | *.ear 13 | *.zip 14 | *.tar.gz 15 | *.rar 16 | /hive/ 17 | /aws-glue-datacatalog-spark-client/target/ 18 | /aws-glue-datacatalog-hive3-client/target/ 19 | /aws-glue-datacatalog-client-common/target/ 20 | /shims/common/target/ 21 | /shims/hive3-shims/target/ 22 | /shims/loader/target/ 23 | /shims/spark-hive-shims/target/ 24 | /.idea/ 25 | /shims/target/ 26 | /target/ -------------------------------------------------------------------------------- /aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/metastore/DefaultAWSCredentialsProviderFactory.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.catalog.metastore; 2 | 3 | import org.apache.hadoop.conf.Configuration; 4 | 5 | import com.amazonaws.auth.AWSCredentialsProvider; 6 | import com.amazonaws.auth.DefaultAWSCredentialsProviderChain; 7 | 8 | public class DefaultAWSCredentialsProviderFactory implements 9 | AWSCredentialsProviderFactory { 10 | 11 | @Override 12 | public AWSCredentialsProvider buildAWSCredentialsProvider(Configuration conf) { 13 | return new DefaultAWSCredentialsProviderChain(); 14 | } 15 | 16 | } 17 | -------------------------------------------------------------------------------- /aws-glue-datacatalog-client-common/src/test/java/com/amazonaws/glue/catalog/converters/ConverterUtilsTest.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.catalog.converters; 2 | 3 | import com.amazonaws.glue.catalog.util.TestObjects; 4 | import com.amazonaws.services.glue.model.Table; 5 | 6 | import org.junit.Test; 7 | 8 | import static org.junit.Assert.assertEquals; 9 | 10 | public class ConverterUtilsTest { 11 | 12 | @Test 13 | public void testCoralTableToStringConversion() { 14 | Table table = TestObjects.getTestTable(); 15 | assertEquals(table, ConverterUtils.stringToCatalogTable(ConverterUtils.catalogTableToString(table))); 16 | } 17 | 18 | } 19 | -------------------------------------------------------------------------------- /shims/pom.xml: -------------------------------------------------------------------------------- 1 | 5 | 6 | com.amazonaws.glue 7 | aws-glue-datacatalog-hive-client 8 | 3.4.0-SNAPSHOT 9 | 10 | 4.0.0 11 | shims 12 | pom 13 | 14 | AwsGlueDataCatalogShims 15 | Shims for Hive 1.2.x/3x compatibility 16 | 17 | 18 | common 19 | spark-hive-shims 20 | hive3-shims 21 | loader 22 | 23 | 24 | -------------------------------------------------------------------------------- /aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/metastore/DefaultExecutorServiceFactory.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.catalog.metastore; 2 | 3 | import com.google.common.util.concurrent.ThreadFactoryBuilder; 4 | import org.apache.hadoop.conf.Configuration; 5 | 6 | import java.util.concurrent.ExecutorService; 7 | import java.util.concurrent.Executors; 8 | 9 | public class DefaultExecutorServiceFactory implements ExecutorServiceFactory { 10 | private static final int NUM_EXECUTOR_THREADS = 5; 11 | 12 | private static final ExecutorService GLUE_METASTORE_DELEGATE_THREAD_POOL = Executors.newFixedThreadPool( 13 | NUM_EXECUTOR_THREADS, new ThreadFactoryBuilder() 14 | .setNameFormat(GlueMetastoreClientDelegate.GLUE_METASTORE_DELEGATE_THREADPOOL_NAME_FORMAT) 15 | .setDaemon(true).build() 16 | ); 17 | 18 | @Override 19 | public ExecutorService getExecutorService(Configuration conf) { 20 | return GLUE_METASTORE_DELEGATE_THREAD_POOL; 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /aws-glue-datacatalog-hive3-client/src/main/java/com/amazonaws/glue/catalog/metastore/AWSGlueDataCatalogHiveClientFactory.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.catalog.metastore; 2 | 3 | import org.apache.hadoop.hive.conf.HiveConf; 4 | import org.apache.hadoop.hive.metastore.HiveMetaHookLoader; 5 | import org.apache.hadoop.hive.metastore.IMetaStoreClient; 6 | import org.apache.hadoop.hive.metastore.api.MetaException; 7 | import org.apache.hadoop.hive.ql.metadata.HiveMetaStoreClientFactory; 8 | 9 | import java.util.concurrent.ConcurrentHashMap; 10 | 11 | public class AWSGlueDataCatalogHiveClientFactory implements HiveMetaStoreClientFactory { 12 | 13 | @Override 14 | public IMetaStoreClient createMetaStoreClient( 15 | HiveConf conf, HiveMetaHookLoader hookLoader, 16 | boolean allowEmbedded, 17 | ConcurrentHashMap concurrentHashMap 18 | ) throws MetaException { 19 | AWSCatalogMetastoreClient client = new AWSCatalogMetastoreClient(conf, hookLoader); 20 | return client; 21 | } 22 | 23 | } 24 | -------------------------------------------------------------------------------- /aws-glue-datacatalog-spark-client/src/main/java/com/amazonaws/glue/catalog/metastore/AWSGlueDataCatalogHiveClientFactory.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.catalog.metastore; 2 | 3 | import org.apache.hadoop.hive.conf.HiveConf; 4 | import org.apache.hadoop.hive.metastore.HiveMetaHookLoader; 5 | import org.apache.hadoop.hive.metastore.IMetaStoreClient; 6 | import org.apache.hadoop.hive.metastore.api.MetaException; 7 | import org.apache.hadoop.hive.ql.metadata.HiveMetaStoreClientFactory; 8 | 9 | import java.util.concurrent.ConcurrentHashMap; 10 | 11 | public class AWSGlueDataCatalogHiveClientFactory implements HiveMetaStoreClientFactory { 12 | 13 | @Override 14 | public IMetaStoreClient createMetaStoreClient( 15 | HiveConf conf, 16 | HiveMetaHookLoader hookLoader, 17 | boolean allowEmbedded, 18 | ConcurrentHashMap concurrentHashMap 19 | ) throws MetaException { 20 | AWSCatalogMetastoreClient client = new AWSCatalogMetastoreClient(conf, hookLoader); 21 | return client; 22 | } 23 | 24 | } 25 | -------------------------------------------------------------------------------- /aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/converters/ConverterUtils.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.catalog.converters; 2 | 3 | import java.util.Date; 4 | import java.util.concurrent.TimeUnit; 5 | 6 | import com.amazonaws.services.glue.model.Table; 7 | 8 | import com.google.gson.Gson; 9 | 10 | public class ConverterUtils { 11 | 12 | private static final Gson gson = new Gson(); 13 | 14 | public static String catalogTableToString(final Table table) { 15 | return gson.toJson(table); 16 | } 17 | 18 | public static Table stringToCatalogTable(final String input) { 19 | return gson.fromJson(input, Table.class); 20 | } 21 | 22 | public static org.apache.hadoop.hive.metastore.api.Date dateToHiveDate(Date date) { 23 | return new org.apache.hadoop.hive.metastore.api.Date(TimeUnit.MILLISECONDS.toDays(date.getTime())); 24 | } 25 | 26 | public static Date hiveDatetoDate(org.apache.hadoop.hive.metastore.api.Date hiveDate) { 27 | return new Date(TimeUnit.DAYS.toMillis(hiveDate.getDaysSinceEpoch())); 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/converters/CatalogToHiveConverterFactory.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.catalog.converters; 2 | 3 | import com.google.common.annotations.VisibleForTesting; 4 | import org.apache.hive.common.util.HiveVersionInfo; 5 | 6 | public class CatalogToHiveConverterFactory { 7 | 8 | private static final String HIVE_3_VERSION = "3."; 9 | 10 | private static CatalogToHiveConverter catalogToHiveConverter; 11 | 12 | public static CatalogToHiveConverter getCatalogToHiveConverter() { 13 | if (catalogToHiveConverter == null) { 14 | catalogToHiveConverter = loadConverter(); 15 | } 16 | return catalogToHiveConverter; 17 | } 18 | 19 | private static CatalogToHiveConverter loadConverter() { 20 | String hiveVersion = HiveVersionInfo.getShortVersion(); 21 | 22 | if (hiveVersion.startsWith(HIVE_3_VERSION)) { 23 | return new Hive3CatalogToHiveConverter(); 24 | } else { 25 | return new BaseCatalogToHiveConverter(); 26 | } 27 | } 28 | 29 | @VisibleForTesting 30 | static void clearConverter() { 31 | catalogToHiveConverter = null; 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/util/LoggingHelper.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.catalog.util; 2 | 3 | import java.util.Collection; 4 | 5 | public class LoggingHelper { 6 | 7 | private static final int MAX_LOG_STRING_LEN = 2000; 8 | 9 | private LoggingHelper() { 10 | } 11 | 12 | public static String concatCollectionToStringForLogging(Collection collection, String delimiter) { 13 | if (collection == null) { 14 | return ""; 15 | } 16 | if (delimiter == null) { 17 | delimiter = ","; 18 | } 19 | StringBuilder bldr = new StringBuilder(); 20 | int totalLen = 0; 21 | int delimiterSize = delimiter.length(); 22 | for (String str : collection) { 23 | if (totalLen > MAX_LOG_STRING_LEN) break; 24 | if (str.length() + totalLen > MAX_LOG_STRING_LEN) { 25 | bldr.append(str.subSequence(0, (MAX_LOG_STRING_LEN-totalLen))); 26 | break; 27 | } else { 28 | bldr.append(str); 29 | bldr.append(delimiter); 30 | totalLen += str.length() + delimiterSize; 31 | } 32 | } 33 | return bldr.toString(); 34 | } 35 | 36 | } 37 | -------------------------------------------------------------------------------- /aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/util/PartitionKey.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.catalog.util; 2 | 3 | import com.amazonaws.services.glue.model.Partition; 4 | 5 | import java.util.List; 6 | 7 | public class PartitionKey { 8 | 9 | private final List partitionValues; 10 | private final int hashCode; 11 | 12 | public PartitionKey(Partition partition) { 13 | this(partition.getValues()); 14 | } 15 | 16 | public PartitionKey(List partitionValues) { 17 | if (partitionValues == null) { 18 | throw new IllegalArgumentException("Partition values cannot be null"); 19 | } 20 | this.partitionValues = partitionValues; 21 | this.hashCode = partitionValues.hashCode(); 22 | } 23 | 24 | @Override 25 | public boolean equals(Object other) { 26 | return this == other || (other != null && other instanceof PartitionKey 27 | && this.partitionValues.equals(((PartitionKey) other).partitionValues)); 28 | } 29 | 30 | @Override 31 | public int hashCode() { 32 | return hashCode; 33 | } 34 | 35 | List getValues() { 36 | return partitionValues; 37 | } 38 | 39 | } 40 | -------------------------------------------------------------------------------- /shims/hive3-shims/pom.xml: -------------------------------------------------------------------------------- 1 | 5 | 6 | com.amazonaws.glue 7 | shims 8 | 3.4.0-SNAPSHOT 9 | ../ 10 | 11 | 4.0.0 12 | hive3-shims 13 | jar 14 | 15 | Hive3Shims 16 | Shims for Hive-3.x compatibility 17 | 18 | 19 | 20 | org.apache.hive 21 | hive-exec 22 | ${hive3.version} 23 | provided 24 | 25 | 26 | com.amazonaws.glue 27 | shims-common 28 | ${project.version} 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/metastore/AWSGlueMetastoreFactory.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.catalog.metastore; 2 | 3 | import com.amazonaws.services.glue.AWSGlue; 4 | import org.apache.hadoop.conf.Configuration; 5 | import org.apache.hadoop.hive.metastore.api.MetaException; 6 | 7 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_DB_CACHE_ENABLE; 8 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_TABLE_CACHE_ENABLE; 9 | 10 | public class AWSGlueMetastoreFactory { 11 | 12 | public AWSGlueMetastore newMetastore(Configuration conf) throws MetaException { 13 | AWSGlue glueClient = new AWSGlueClientFactory(conf).newClient(); 14 | AWSGlueMetastore defaultMetastore = new DefaultAWSGlueMetastore(conf, glueClient); 15 | if(isCacheEnabled(conf)) { 16 | return new AWSGlueMetastoreCacheDecorator(conf, defaultMetastore); 17 | } 18 | return defaultMetastore; 19 | } 20 | 21 | private boolean isCacheEnabled(Configuration conf) { 22 | boolean databaseCacheEnabled = conf.getBoolean(AWS_GLUE_DB_CACHE_ENABLE, false); 23 | boolean tableCacheEnabled = conf.getBoolean(AWS_GLUE_TABLE_CACHE_ENABLE, false); 24 | return (databaseCacheEnabled || tableCacheEnabled); 25 | } 26 | } -------------------------------------------------------------------------------- /shims/loader/src/main/java/com/amazonaws/glue/shims/ShimsLoader.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.shims; 2 | 3 | import com.google.common.annotations.VisibleForTesting; 4 | import org.apache.hive.common.util.HiveVersionInfo; 5 | 6 | public final class ShimsLoader { 7 | 8 | private static AwsGlueHiveShims hiveShims; 9 | 10 | public static synchronized AwsGlueHiveShims getHiveShims() { 11 | if (hiveShims == null) { 12 | hiveShims = loadHiveShims(); 13 | } 14 | return hiveShims; 15 | } 16 | 17 | private static AwsGlueHiveShims loadHiveShims() { 18 | String hiveVersion = HiveVersionInfo.getShortVersion(); 19 | 20 | try { 21 | if (AwsGlueSparkHiveShims.supportsVersion(hiveVersion)) { 22 | return AwsGlueSparkHiveShims.class.newInstance(); 23 | } else if (AwsGlueHive3Shims.supportsVersion(hiveVersion)) { 24 | return AwsGlueHive3Shims.class.newInstance(); 25 | } 26 | } catch (InstantiationException | IllegalAccessException e) { 27 | throw new RuntimeException("unable to get instance of Hive shim class for " + hiveVersion); 28 | } 29 | 30 | 31 | throw new RuntimeException("Shim class for Hive version " + hiveVersion + " does not exist"); 32 | } 33 | 34 | @VisibleForTesting 35 | static synchronized void clearShimClass() { 36 | hiveShims = null; 37 | } 38 | 39 | } 40 | -------------------------------------------------------------------------------- /shims/spark-hive-shims/pom.xml: -------------------------------------------------------------------------------- 1 | 5 | 6 | com.amazonaws.glue 7 | shims 8 | 3.4.0-SNAPSHOT 9 | ../ 10 | 11 | 4.0.0 12 | spark-hive-shims 13 | jar 14 | 15 | spark-hive-shims 16 | 17 | 18 | 19 | org.apache.hive 20 | hive-exec 21 | ${spark-hive.version} 22 | provided 23 | 24 | 25 | org.apache.hive 26 | hive-metastore 27 | ${spark-hive.version} 28 | provided 29 | 30 | 31 | com.amazonaws.glue 32 | shims-common 33 | ${project.version} 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /shims/common/pom.xml: -------------------------------------------------------------------------------- 1 | 5 | 6 | com.amazonaws.glue 7 | shims 8 | 3.4.0-SNAPSHOT 9 | ../ 10 | 11 | 4.0.0 12 | shims-common 13 | jar 14 | 15 | ShimsCommon 16 | Common shim interface 17 | 18 | 19 | 20 | org.apache.hive 21 | hive-exec 22 | ${hive3.version} 23 | provided 24 | 25 | 26 | org.apache.hadoop 27 | hadoop-common 28 | ${hadoop.version} 29 | provided 30 | 31 | 32 | org.apache.hive 33 | hive-metastore 34 | ${hive3.version} 35 | provided 36 | 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/util/PartitionUtils.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.catalog.util; 2 | 3 | import com.amazonaws.services.glue.model.EntityNotFoundException; 4 | import com.amazonaws.services.glue.model.InvalidInputException; 5 | import com.amazonaws.services.glue.model.Partition; 6 | import com.amazonaws.services.glue.model.PartitionValueList; 7 | import com.google.common.collect.Lists; 8 | import com.google.common.collect.Maps; 9 | 10 | import java.util.List; 11 | import java.util.Map; 12 | 13 | public final class PartitionUtils { 14 | 15 | public static Map buildPartitionMap(final List partitions) { 16 | Map partitionValuesMap = Maps.newHashMap(); 17 | for (Partition partition : partitions) { 18 | partitionValuesMap.put(new PartitionKey(partition), partition); 19 | } 20 | return partitionValuesMap; 21 | } 22 | 23 | public static List getPartitionValuesList(final Map partitionMap) { 24 | List partitionValuesList = Lists.newArrayList(); 25 | for (Map.Entry entry : partitionMap.entrySet()) { 26 | partitionValuesList.add(new PartitionValueList().withValues(entry.getValue().getValues())); 27 | } 28 | return partitionValuesList; 29 | } 30 | 31 | public static boolean isInvalidUserInputException(Exception e) { 32 | // exceptions caused by invalid requests, in which case we know all partitions creation failed 33 | return e instanceof EntityNotFoundException || e instanceof InvalidInputException; 34 | } 35 | 36 | } 37 | -------------------------------------------------------------------------------- /shims/common/src/main/java/com/amazonaws/glue/shims/AwsGlueHiveShims.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.shims; 2 | 3 | import org.apache.hadoop.conf.Configuration; 4 | import org.apache.hadoop.fs.Path; 5 | import org.apache.hadoop.hive.metastore.api.Database; 6 | import org.apache.hadoop.hive.metastore.api.EnvironmentContext; 7 | import org.apache.hadoop.hive.metastore.api.FieldSchema; 8 | import org.apache.hadoop.hive.metastore.api.MetaException; 9 | import org.apache.hadoop.hive.metastore.api.Partition; 10 | import org.apache.hadoop.hive.metastore.api.Table; 11 | import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; 12 | import org.apache.hadoop.hive.metastore.Warehouse; 13 | 14 | import java.util.List; 15 | 16 | public interface AwsGlueHiveShims { 17 | 18 | ExprNodeGenericFuncDesc getDeserializeExpression(byte[] exprBytes); 19 | 20 | byte[] getSerializeExpression(ExprNodeGenericFuncDesc expr); 21 | 22 | Path getDefaultTablePath(Database db, String tableName, Warehouse warehouse) 23 | throws MetaException; 24 | 25 | boolean deleteDir(Warehouse wh, Path path, boolean recursive, boolean ifPurge) throws MetaException; 26 | 27 | boolean mkdirs(Warehouse wh, Path path) throws MetaException; 28 | 29 | boolean validateTableName(String name, Configuration conf); 30 | 31 | boolean requireCalStats(Configuration conf, Partition oldPart, Partition newPart, Table tbl, EnvironmentContext environmentContext); 32 | 33 | boolean updateTableStatsFast(Database db, Table tbl, Warehouse wh, boolean madeDir, boolean forceRecompute, EnvironmentContext environmentContext) 34 | throws MetaException; 35 | 36 | String validateTblColumns(List cols); 37 | } 38 | -------------------------------------------------------------------------------- /aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/metastore/SessionCredentialsProviderFactory.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.catalog.metastore; 2 | 3 | import com.amazonaws.auth.AWSCredentialsProvider; 4 | import com.amazonaws.auth.AWSSessionCredentials; 5 | import com.amazonaws.auth.BasicSessionCredentials; 6 | import com.amazonaws.internal.StaticCredentialsProvider; 7 | 8 | import org.apache.hadoop.conf.Configuration; 9 | 10 | import static com.google.common.base.Preconditions.checkArgument; 11 | 12 | public class SessionCredentialsProviderFactory implements AWSCredentialsProviderFactory { 13 | 14 | public final static String AWS_ACCESS_KEY_CONF_VAR = "hive.aws_session_access_id"; 15 | public final static String AWS_SECRET_KEY_CONF_VAR = "hive.aws_session_secret_key"; 16 | public final static String AWS_SESSION_TOKEN_CONF_VAR = "hive.aws_session_token"; 17 | 18 | @Override 19 | public AWSCredentialsProvider buildAWSCredentialsProvider(Configuration conf) { 20 | 21 | checkArgument(conf != null, "conf cannot be null."); 22 | 23 | String accessKey = conf.get(AWS_ACCESS_KEY_CONF_VAR); 24 | String secretKey = conf.get(AWS_SECRET_KEY_CONF_VAR); 25 | String sessionToken = conf.get(AWS_SESSION_TOKEN_CONF_VAR); 26 | 27 | checkArgument(accessKey != null, AWS_ACCESS_KEY_CONF_VAR + " must be set."); 28 | checkArgument(secretKey != null, AWS_SECRET_KEY_CONF_VAR + " must be set."); 29 | checkArgument(sessionToken != null, AWS_SESSION_TOKEN_CONF_VAR + " must be set."); 30 | 31 | AWSSessionCredentials credentials = new BasicSessionCredentials(accessKey, secretKey, sessionToken); 32 | 33 | return new StaticCredentialsProvider(credentials); 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/converters/CatalogToHiveConverter.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.catalog.converters; 2 | 3 | import com.amazonaws.services.glue.model.ErrorDetail; 4 | import com.amazonaws.services.glue.model.ColumnStatistics; 5 | import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; 6 | import org.apache.hadoop.hive.metastore.api.Database; 7 | import org.apache.hadoop.hive.metastore.api.FieldSchema; 8 | import org.apache.hadoop.hive.metastore.api.Function; 9 | import org.apache.hadoop.hive.metastore.api.Partition; 10 | import org.apache.hadoop.hive.metastore.api.Table; 11 | import org.apache.hadoop.hive.metastore.api.TableMeta; 12 | import org.apache.thrift.TException; 13 | 14 | import java.util.List; 15 | 16 | public interface CatalogToHiveConverter { 17 | 18 | TException wrapInHiveException(Throwable e); 19 | 20 | TException errorDetailToHiveException(ErrorDetail errorDetail); 21 | 22 | Database convertDatabase(com.amazonaws.services.glue.model.Database catalogDatabase); 23 | 24 | List convertFieldSchemaList(List catalogFieldSchemaList); 25 | 26 | Table convertTable(com.amazonaws.services.glue.model.Table catalogTable, String dbname); 27 | 28 | TableMeta convertTableMeta(com.amazonaws.services.glue.model.Table catalogTable, String dbName); 29 | 30 | Partition convertPartition(com.amazonaws.services.glue.model.Partition src); 31 | 32 | List convertPartitions(List src); 33 | 34 | Function convertFunction(String dbName, com.amazonaws.services.glue.model.UserDefinedFunction catalogFunction); 35 | 36 | List convertColumnStatisticsList(List catatlogColumnStatisticsList); 37 | } 38 | -------------------------------------------------------------------------------- /aws-glue-datacatalog-client-common/src/test/java/com/amazonaws/glue/catalog/converters/CatalogToHiveConverterFactoryTest.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.catalog.converters; 2 | 3 | import org.apache.hive.common.util.HiveVersionInfo; 4 | import org.junit.After; 5 | import org.junit.Before; 6 | import org.junit.Test; 7 | import org.junit.runner.RunWith; 8 | import org.powermock.core.classloader.annotations.PowerMockIgnore; 9 | import org.powermock.core.classloader.annotations.PrepareForTest; 10 | import org.powermock.modules.junit4.PowerMockRunner; 11 | 12 | import static org.junit.Assert.assertTrue; 13 | import static org.powermock.api.mockito.PowerMockito.mockStatic; 14 | import static org.powermock.api.mockito.PowerMockito.when; 15 | 16 | @PrepareForTest(HiveVersionInfo.class) 17 | @PowerMockIgnore("javax.management.*") 18 | @RunWith(PowerMockRunner.class) 19 | public class CatalogToHiveConverterFactoryTest { 20 | 21 | private static final String HIVE_1_2_VERSION = "1.2.1"; 22 | private static final String HIVE_3_VERSION = "3.1.3"; 23 | 24 | @Before 25 | public void setup() throws ClassNotFoundException { 26 | mockStatic(HiveVersionInfo.class); 27 | } 28 | 29 | @After 30 | public void reset() { 31 | CatalogToHiveConverterFactory.clearConverter(); 32 | } 33 | 34 | @Test 35 | public void testGetBaseCatalogToHiveConverter() { 36 | when(HiveVersionInfo.getShortVersion()).thenReturn(HIVE_1_2_VERSION); 37 | CatalogToHiveConverter catalogToHiveConverter = CatalogToHiveConverterFactory.getCatalogToHiveConverter(); 38 | assertTrue(BaseCatalogToHiveConverter.class.isInstance(catalogToHiveConverter)); 39 | } 40 | 41 | @Test 42 | public void testGetHive3CatalogToHiveConverter() { 43 | when(HiveVersionInfo.getShortVersion()).thenReturn(HIVE_3_VERSION); 44 | CatalogToHiveConverter catalogToHiveConverter = CatalogToHiveConverterFactory.getCatalogToHiveConverter(); 45 | assertTrue(Hive3CatalogToHiveConverter.class.isInstance(catalogToHiveConverter)); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/util/AWSGlueConfig.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.catalog.util; 2 | 3 | import com.amazonaws.ClientConfiguration; 4 | 5 | public final class AWSGlueConfig { 6 | 7 | private AWSGlueConfig() {} 8 | 9 | public static final String AWS_GLUE_ENDPOINT = "aws.glue.endpoint"; 10 | public static final String AWS_REGION = "aws.region"; 11 | public static final String AWS_CATALOG_CREDENTIALS_PROVIDER_FACTORY_CLASS 12 | = "aws.catalog.credentials.provider.factory.class"; 13 | 14 | public static final String AWS_GLUE_MAX_RETRY = "aws.glue.max-error-retries"; 15 | public static final int DEFAULT_MAX_RETRY = 5; 16 | 17 | public static final String AWS_GLUE_MAX_CONNECTIONS = "aws.glue.max-connections"; 18 | public static final int DEFAULT_MAX_CONNECTIONS = ClientConfiguration.DEFAULT_MAX_CONNECTIONS; 19 | 20 | public static final String AWS_GLUE_CONNECTION_TIMEOUT = "aws.glue.connection-timeout"; 21 | public static final int DEFAULT_CONNECTION_TIMEOUT = ClientConfiguration.DEFAULT_CONNECTION_TIMEOUT; 22 | 23 | public static final String AWS_GLUE_SOCKET_TIMEOUT = "aws.glue.socket-timeout"; 24 | public static final int DEFAULT_SOCKET_TIMEOUT = ClientConfiguration.DEFAULT_SOCKET_TIMEOUT; 25 | 26 | public static final String AWS_GLUE_CATALOG_SEPARATOR = "aws.glue.catalog.separator"; 27 | 28 | public static final String AWS_GLUE_DISABLE_UDF = "aws.glue.disable-udf"; 29 | 30 | 31 | public static final String AWS_GLUE_DB_CACHE_ENABLE = "aws.glue.cache.db.enable"; 32 | public static final String AWS_GLUE_DB_CACHE_SIZE = "aws.glue.cache.db.size"; 33 | public static final String AWS_GLUE_DB_CACHE_TTL_MINS = "aws.glue.cache.db.ttl-mins"; 34 | 35 | public static final String AWS_GLUE_TABLE_CACHE_ENABLE = "aws.glue.cache.table.enable"; 36 | public static final String AWS_GLUE_TABLE_CACHE_SIZE = "aws.glue.cache.table.size"; 37 | public static final String AWS_GLUE_TABLE_CACHE_TTL_MINS = "aws.glue.cache.table.ttl-mins"; 38 | } -------------------------------------------------------------------------------- /aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/converters/Hive3CatalogToHiveConverter.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.catalog.converters; 2 | 3 | import org.apache.hadoop.hive.metastore.api.Database; 4 | import org.apache.hadoop.hive.metastore.api.Function; 5 | import org.apache.hadoop.hive.metastore.api.Partition; 6 | import org.apache.hadoop.hive.metastore.api.Table; 7 | import org.apache.hadoop.hive.metastore.api.TableMeta; 8 | 9 | import static org.apache.hadoop.hive.metastore.Warehouse.DEFAULT_CATALOG_NAME; 10 | 11 | public class Hive3CatalogToHiveConverter extends BaseCatalogToHiveConverter { 12 | 13 | @Override 14 | public Database convertDatabase(com.amazonaws.services.glue.model.Database catalogDatabase) { 15 | Database hiveDatabase = super.convertDatabase(catalogDatabase); 16 | hiveDatabase.setCatalogName(DEFAULT_CATALOG_NAME); 17 | return hiveDatabase; 18 | } 19 | 20 | @Override 21 | public Table convertTable(com.amazonaws.services.glue.model.Table catalogTable, String dbname) { 22 | Table hiveTable = super.convertTable(catalogTable, dbname); 23 | hiveTable.setCatName(DEFAULT_CATALOG_NAME); 24 | return hiveTable; 25 | } 26 | 27 | @Override 28 | public TableMeta convertTableMeta(com.amazonaws.services.glue.model.Table catalogTable, String dbName) { 29 | TableMeta tableMeta = super.convertTableMeta(catalogTable, dbName); 30 | tableMeta.setCatName(DEFAULT_CATALOG_NAME); 31 | return tableMeta; 32 | } 33 | 34 | @Override 35 | public Partition convertPartition(com.amazonaws.services.glue.model.Partition src) { 36 | Partition hivePartition = super.convertPartition(src); 37 | hivePartition.setCatName(DEFAULT_CATALOG_NAME); 38 | return hivePartition; 39 | } 40 | 41 | @Override 42 | public Function convertFunction(String dbName, com.amazonaws.services.glue.model.UserDefinedFunction catalogFunction) { 43 | Function hiveFunction = super.convertFunction(dbName, catalogFunction); 44 | if (hiveFunction == null) { 45 | return null; 46 | } 47 | hiveFunction.setCatName(DEFAULT_CATALOG_NAME); 48 | return hiveFunction; 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /shims/loader/test/main/java/com/amazonaws/glue/shims/ShimsLoaderTest.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.shims; 2 | 3 | import static org.junit.Assert.assertTrue; 4 | import static org.mockito.Mockito.when; 5 | import static org.powermock.api.mockito.PowerMockito.mockStatic; 6 | 7 | import org.apache.hive.common.util.HiveVersionInfo; 8 | import org.junit.Before; 9 | import org.junit.Test; 10 | import org.junit.runner.RunWith; 11 | import org.powermock.core.classloader.annotations.PowerMockIgnore; 12 | import org.powermock.core.classloader.annotations.PrepareForTest; 13 | import org.powermock.modules.junit4.PowerMockRunner; 14 | 15 | @PrepareForTest(HiveVersionInfo.class) 16 | @PowerMockIgnore("javax.management.*") 17 | @RunWith(PowerMockRunner.class) 18 | public class ShimsLoaderTest { 19 | 20 | private static final String HIVE_1_2_VERSION = "1.2.1"; 21 | 22 | private static final String HIVE_3_VERSION = "3.1.3"; 23 | 24 | @Before 25 | public void setup() throws ClassNotFoundException { 26 | mockStatic(HiveVersionInfo.class); 27 | } 28 | 29 | @Test 30 | public void hive3ShimsClassSupportsCorrectVersion() { 31 | assertTrue(AwsGlueHive3Shims.supportsVersion(HIVE_3_VERSION)); 32 | } 33 | 34 | @Test 35 | public void hive1Dot2ShimsClassSupportsCorrectVersion() { 36 | assertTrue(AwsGlueSparkHiveShims.supportsVersion(HIVE_1_2_VERSION)); 37 | } 38 | 39 | @Test 40 | public void returnsCorrectShimsImplementationForHive3() { 41 | assertGetsCorrectShimsClassForVersion(AwsGlueHive3Shims.class, HIVE_3_VERSION); 42 | } 43 | 44 | @Test 45 | public void returnsCorrectShimsImplementationForHive1Dot2() { 46 | assertGetsCorrectShimsClassForVersion(AwsGlueSparkHiveShims.class, HIVE_1_2_VERSION); 47 | } 48 | 49 | @Test(expected = RuntimeException.class) 50 | public void throwsWhenSupportingClassDoesNotExist() { 51 | when(HiveVersionInfo.getShortVersion()).thenReturn("this.is.not.a.real.hive.version"); 52 | ShimsLoader.clearShimClass(); 53 | ShimsLoader.getHiveShims(); 54 | } 55 | 56 | private void assertGetsCorrectShimsClassForVersion(Class expectedClass, String version) { 57 | when(HiveVersionInfo.getShortVersion()).thenReturn(version); 58 | AwsGlueHiveShims shims = ShimsLoader.getHiveShims(); 59 | assertTrue(expectedClass.isInstance(shims)); 60 | ShimsLoader.clearShimClass(); 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /aws-glue-datacatalog-spark-client/src/test/java/com/amazonaws/glue/catalog/metastore/SparkCatalogMetastoreClientTest.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.catalog.metastore; 2 | 3 | import com.amazonaws.glue.catalog.util.TestObjects; 4 | import com.amazonaws.services.glue.AWSGlue; 5 | import com.amazonaws.services.glue.model.Database; 6 | import com.amazonaws.services.glue.model.EntityNotFoundException; 7 | import com.amazonaws.services.glue.model.GetUserDefinedFunctionRequest; 8 | import com.amazonaws.services.glue.model.UserDefinedFunction; 9 | 10 | import org.apache.hadoop.hive.conf.HiveConf; 11 | import org.apache.hadoop.hive.metastore.Warehouse; 12 | import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; 13 | import org.junit.Before; 14 | import org.junit.Rule; 15 | import org.junit.Test; 16 | import org.junit.rules.ExpectedException; 17 | 18 | import static org.mockito.Matchers.any; 19 | import static org.mockito.Mockito.mock; 20 | import static org.mockito.Mockito.spy; 21 | import static org.mockito.Mockito.when; 22 | 23 | public class SparkCatalogMetastoreClientTest { 24 | 25 | private AWSGlue glueClient; 26 | private AWSCatalogMetastoreClient metastoreClient; 27 | private Warehouse wh; 28 | private HiveConf conf; 29 | private GlueClientFactory clientFactory; 30 | 31 | // Test objects 32 | private Database testDB; 33 | private UserDefinedFunction testFunction; 34 | 35 | @Rule 36 | public ExpectedException expectedException = ExpectedException.none(); 37 | 38 | @Before 39 | public void setUp() throws Exception{ 40 | testDB = TestObjects.getTestDatabase(); 41 | testFunction = TestObjects.getCatalogTestFunction(); 42 | 43 | conf = spy(new HiveConf()); 44 | glueClient = mock(AWSGlue.class); 45 | clientFactory = mock(GlueClientFactory.class); 46 | when(clientFactory.newClient()).thenReturn(glueClient); 47 | metastoreClient = new AWSCatalogMetastoreClient.Builder().withClientFactory(clientFactory) 48 | .withWarehouse(wh).createDefaults(false).withHiveConf(conf).build(); 49 | } 50 | 51 | @Test 52 | public void testGetFunctionNoSuchObjectExceptionMessage() throws Exception { 53 | expectedException.expect(NoSuchObjectException.class); 54 | expectedException.expectMessage(testFunction.getFunctionName() + " does not exist"); 55 | 56 | when(glueClient.getUserDefinedFunction(any(GetUserDefinedFunctionRequest.class))) 57 | .thenThrow(new EntityNotFoundException("")); 58 | metastoreClient.getFunction(testDB.getName(), testFunction.getFunctionName()); 59 | } 60 | 61 | } 62 | -------------------------------------------------------------------------------- /aws-glue-datacatalog-client-common/src/test/java/com/amazonaws/glue/catalog/util/LoggingHelperTest.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.catalog.util; 2 | 3 | import com.google.common.collect.ImmutableList; 4 | import org.junit.Test; 5 | 6 | import java.util.Collection; 7 | 8 | import static org.hamcrest.Matchers.equalTo; 9 | import static org.hamcrest.Matchers.is; 10 | import static org.junit.Assert.assertThat; 11 | 12 | public class LoggingHelperTest { 13 | 14 | @Test 15 | public void concatCollectionToStringForLoggingTest() { 16 | Collection logs = ImmutableList.of("test_log_1", "test_log_2", "test_log_3"); 17 | String delimiter = "|"; 18 | 19 | String result = LoggingHelper.concatCollectionToStringForLogging(logs, delimiter); 20 | String expected = "test_log_1|test_log_2|test_log_3|"; 21 | 22 | assertThat(result, is(equalTo(expected))); 23 | } 24 | 25 | @Test 26 | public void concatCollectionToStringForLoggingTestWithoutCollection() { 27 | String delimiter = "|"; 28 | 29 | String result = LoggingHelper.concatCollectionToStringForLogging(null, delimiter); 30 | String expected = ""; 31 | 32 | assertThat(result, is(equalTo(expected))); 33 | } 34 | 35 | @Test 36 | public void concatCollectionToStringForLoggingTestWithoutDelimiter() { 37 | Collection logs = ImmutableList.of("test_log_1", "test_log_2", "test_log_3"); 38 | 39 | String result = LoggingHelper.concatCollectionToStringForLogging(logs, null); 40 | String expected = "test_log_1,test_log_2,test_log_3,"; 41 | 42 | assertThat(result, is(equalTo(expected))); 43 | } 44 | 45 | @Test 46 | public void concatCollectionToStringForLoggingTestWithLongerThanLimitInput() { 47 | ImmutableList.Builder listBuilder = new ImmutableList.Builder<>(); 48 | 49 | final int max = 2000; 50 | final String key = "KEY"; 51 | final StringBuilder stringBuilder = new StringBuilder(); 52 | for (int i = 0; i < max; i += key.length()) { 53 | listBuilder.add(key); 54 | stringBuilder.append(key); 55 | } 56 | final String overflow = "OVERFLOW"; 57 | for (int i = 0; i < 100; i += overflow.length()) { 58 | listBuilder.add(overflow); 59 | } 60 | 61 | String result = LoggingHelper.concatCollectionToStringForLogging(listBuilder.build(), ""); 62 | String expected = stringBuilder.toString().substring(0, max); 63 | 64 | assertThat(result.length(), is(equalTo(max))); 65 | assertThat(result, is(equalTo(expected))); 66 | assertThat(expected.indexOf(overflow), is(equalTo(-1))); 67 | } 68 | 69 | } 70 | -------------------------------------------------------------------------------- /shims/loader/pom.xml: -------------------------------------------------------------------------------- 1 | 5 | 6 | com.amazonaws.glue 7 | shims 8 | 3.4.0-SNAPSHOT 9 | ../ 10 | 11 | 4.0.0 12 | shims-loader 13 | jar 14 | 15 | ShimsLoader 16 | Loader for the AwsGlueDataCatalogShim classes 17 | 18 | 19 | 20 | 21 | org.apache.hive 22 | hive-exec 23 | ${hive3.version} 24 | provided 25 | 26 | 27 | com.amazonaws.glue 28 | hive3-shims 29 | ${project.version} 30 | 31 | 32 | com.amazonaws.glue 33 | shims-common 34 | ${project.version} 35 | 36 | 37 | com.amazonaws.glue 38 | spark-hive-shims 39 | ${project.version} 40 | 41 | 42 | junit 43 | junit 44 | ${junit.version} 45 | test 46 | 47 | 48 | org.mockito 49 | mockito-core 50 | ${mockito.version} 51 | test 52 | 53 | 54 | org.powermock 55 | powermock-module-junit4 56 | ${powermock.version} 57 | test 58 | 59 | 60 | org.powermock 61 | powermock-api-mockito 62 | ${powermock.version} 63 | test 64 | 65 | 66 | 67 | -------------------------------------------------------------------------------- /aws-glue-datacatalog-client-common/src/test/java/com/amazonaws/glue/catalog/converters/Hive3CatalogToHiveConverterTest.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.catalog.converters; 2 | 3 | import com.amazonaws.glue.catalog.util.TestObjects; 4 | import com.amazonaws.services.glue.model.Database; 5 | import com.amazonaws.services.glue.model.Partition; 6 | import com.amazonaws.services.glue.model.Table; 7 | import com.amazonaws.services.glue.model.UserDefinedFunction; 8 | import com.google.common.collect.ImmutableList; 9 | import org.apache.hadoop.hive.metastore.api.TableMeta; 10 | import org.junit.Test; 11 | 12 | import static org.apache.hadoop.hive.metastore.Warehouse.DEFAULT_CATALOG_NAME; 13 | import static org.junit.Assert.assertEquals; 14 | 15 | public class Hive3CatalogToHiveConverterTest { 16 | 17 | private static final String TEST_DB_NAME = "testDb"; 18 | private static final String TEST_TBL_NAME = "testTbl"; 19 | private final CatalogToHiveConverter catalogToHiveConverter = new Hive3CatalogToHiveConverter(); 20 | 21 | @Test 22 | public void testDatabaseCatalogName() { 23 | Database catalogDb = TestObjects.getTestDatabase(); 24 | org.apache.hadoop.hive.metastore.api.Database hiveDatabase = catalogToHiveConverter 25 | .convertDatabase(catalogDb); 26 | assertEquals(DEFAULT_CATALOG_NAME, hiveDatabase.getCatalogName()); 27 | } 28 | 29 | @Test 30 | public void testTableCatalogName() { 31 | Table catalogTable = TestObjects.getTestTable(); 32 | org.apache.hadoop.hive.metastore.api.Table hiveTable = catalogToHiveConverter.convertTable(catalogTable, TEST_DB_NAME); 33 | assertEquals(DEFAULT_CATALOG_NAME, hiveTable.getCatName()); 34 | } 35 | 36 | @Test 37 | public void testTableMetaCatalogName() { 38 | Table catalogTable = TestObjects.getTestTable(); 39 | TableMeta tableMeta = catalogToHiveConverter.convertTableMeta(catalogTable, TEST_DB_NAME); 40 | assertEquals(DEFAULT_CATALOG_NAME, tableMeta.getCatName()); 41 | } 42 | 43 | @Test 44 | public void testPartitionConversion() { 45 | Partition partition = TestObjects.getTestPartition(TEST_DB_NAME, TEST_TBL_NAME, ImmutableList.of("1")); 46 | org.apache.hadoop.hive.metastore.api.Partition hivePartition = catalogToHiveConverter.convertPartition(partition); 47 | assertEquals(DEFAULT_CATALOG_NAME, hivePartition.getCatName()); 48 | } 49 | 50 | @Test 51 | public void testFunctionConversion() { 52 | UserDefinedFunction catalogFunction = TestObjects.getCatalogTestFunction(); 53 | org.apache.hadoop.hive.metastore.api.Function hiveFunction = catalogToHiveConverter.convertFunction(TEST_DB_NAME, catalogFunction); 54 | assertEquals(DEFAULT_CATALOG_NAME, hiveFunction.getCatName()); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/util/HiveTableValidator.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.catalog.util; 2 | 3 | import com.amazonaws.services.glue.model.InvalidInputException; 4 | import com.amazonaws.services.glue.model.Table; 5 | 6 | import org.apache.hadoop.hive.metastore.TableType; 7 | 8 | import static org.apache.commons.lang3.StringUtils.isNotEmpty; 9 | import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE; 10 | 11 | public enum HiveTableValidator { 12 | 13 | REQUIRED_PROPERTIES_VALIDATOR { 14 | public void validate(Table table) { 15 | String missingProperty = null; 16 | 17 | if(notApplicableTableType(table)) { 18 | return; 19 | } 20 | 21 | if (table.getTableType() == null) { 22 | missingProperty = "TableType"; 23 | } else if (table.getStorageDescriptor() == null) { 24 | missingProperty = "StorageDescriptor"; 25 | } else if (table.getStorageDescriptor().getInputFormat() == null) { 26 | missingProperty = "StorageDescriptor#InputFormat"; 27 | } else if (table.getStorageDescriptor().getOutputFormat() == null) { 28 | missingProperty = "StorageDescriptor#OutputFormat"; 29 | } else if (table.getStorageDescriptor().getSerdeInfo() == null) { 30 | missingProperty = "StorageDescriptor#SerdeInfo"; 31 | } else if (table.getStorageDescriptor().getSerdeInfo().getSerializationLibrary() == null) { 32 | missingProperty = "StorageDescriptor#SerdeInfo#SerializationLibrary"; 33 | } 34 | 35 | if (missingProperty != null) { 36 | throw new InvalidInputException(String.format("%s cannot be null for table: %s", missingProperty, table.getName())); 37 | } 38 | } 39 | }; 40 | 41 | public abstract void validate(Table table); 42 | 43 | private static boolean notApplicableTableType(Table table) { 44 | if (isNotManagedOrExternalTable(table) || 45 | isStorageHandlerType(table)) { 46 | return true; 47 | } 48 | return false; 49 | } 50 | 51 | private static boolean isNotManagedOrExternalTable(Table table) { 52 | if (table.getTableType() != null && 53 | TableType.valueOf(table.getTableType()) != TableType.MANAGED_TABLE && 54 | TableType.valueOf(table.getTableType()) != TableType.EXTERNAL_TABLE) { 55 | return true; 56 | } 57 | return false; 58 | } 59 | 60 | private static boolean isStorageHandlerType(Table table) { 61 | if (table.getParameters() != null && table.getParameters().containsKey(META_TABLE_STORAGE) && 62 | isNotEmpty(table.getParameters().get(META_TABLE_STORAGE))) { 63 | return true; 64 | } 65 | return false; 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /shims/spark-hive-shims/src/main/java/com/amazonaws/glue/shims/AwsGlueSparkHiveShims.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.shims; 2 | 3 | import org.apache.hadoop.conf.Configuration; 4 | import org.apache.hadoop.fs.Path; 5 | import org.apache.hadoop.hive.metastore.MetaStoreUtils; 6 | import org.apache.hadoop.hive.metastore.api.Database; 7 | import org.apache.hadoop.hive.metastore.api.EnvironmentContext; 8 | import org.apache.hadoop.hive.metastore.api.FieldSchema; 9 | import org.apache.hadoop.hive.metastore.api.MetaException; 10 | import org.apache.hadoop.hive.metastore.api.Partition; 11 | import org.apache.hadoop.hive.metastore.api.Table; 12 | import org.apache.hadoop.hive.metastore.Warehouse; 13 | import org.apache.hadoop.hive.ql.exec.SerializationUtilities; 14 | import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; 15 | 16 | import java.util.List; 17 | 18 | class AwsGlueSparkHiveShims implements AwsGlueHiveShims { 19 | 20 | private static final String SPARK_HIVE_VERSION = "2.3."; 21 | 22 | static boolean supportsVersion(String version) { 23 | return version.startsWith(SPARK_HIVE_VERSION); 24 | } 25 | 26 | @Override 27 | public ExprNodeGenericFuncDesc getDeserializeExpression(byte[] exprBytes) { 28 | return SerializationUtilities.deserializeExpressionFromKryo(exprBytes); 29 | } 30 | 31 | @Override 32 | public byte[] getSerializeExpression(ExprNodeGenericFuncDesc expr) { 33 | return SerializationUtilities.serializeExpressionToKryo(expr); 34 | } 35 | 36 | @Override 37 | public Path getDefaultTablePath(Database db, String tableName, Warehouse warehouse) throws MetaException { 38 | return warehouse.getDefaultTablePath(db, tableName); 39 | } 40 | 41 | @Override 42 | public boolean deleteDir(Warehouse wh, Path path, boolean recursive, boolean ifPurge) throws MetaException { 43 | return wh.deleteDir(path, recursive, ifPurge); 44 | } 45 | 46 | @Override 47 | public boolean mkdirs(Warehouse wh, Path path) throws MetaException { 48 | return wh.mkdirs(path, true); 49 | } 50 | 51 | @Override 52 | public boolean validateTableName(String name, Configuration conf) { 53 | return MetaStoreUtils.validateName(name, conf); 54 | } 55 | 56 | @Override 57 | public boolean requireCalStats( 58 | Configuration conf, 59 | Partition oldPart, 60 | Partition newPart, 61 | Table tbl, 62 | EnvironmentContext environmentContext) { 63 | return MetaStoreUtils.requireCalStats(conf, oldPart, newPart, tbl, environmentContext); 64 | } 65 | 66 | @Override 67 | public boolean updateTableStatsFast( 68 | Database db, 69 | Table tbl, 70 | Warehouse wh, 71 | boolean madeDir, 72 | boolean forceRecompute, 73 | EnvironmentContext environmentContext 74 | ) throws MetaException { 75 | return MetaStoreUtils.updateTableStatsFast(db, tbl, wh, madeDir, forceRecompute, environmentContext); 76 | } 77 | 78 | @Override 79 | public String validateTblColumns(List cols) { 80 | return MetaStoreUtils.validateTblColumns(cols); 81 | } 82 | 83 | } 84 | -------------------------------------------------------------------------------- /aws-glue-datacatalog-client-common/src/test/java/com/amazonaws/glue/catalog/util/PartitionUtilsTest.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.catalog.util; 2 | 3 | import com.amazonaws.services.glue.model.EntityNotFoundException; 4 | import com.amazonaws.services.glue.model.InternalServiceException; 5 | import com.amazonaws.services.glue.model.InvalidInputException; 6 | import com.amazonaws.services.glue.model.Partition; 7 | import com.amazonaws.services.glue.model.PartitionValueList; 8 | import com.amazonaws.services.glue.model.ResourceNumberLimitExceededException; 9 | 10 | import com.google.common.collect.Lists; 11 | import com.google.common.collect.Sets; 12 | import org.junit.Test; 13 | 14 | import java.util.List; 15 | import java.util.Map; 16 | import java.util.Set; 17 | 18 | import static org.hamcrest.CoreMatchers.equalTo; 19 | import static org.hamcrest.CoreMatchers.hasItem; 20 | import static org.junit.Assert.assertEquals; 21 | import static org.junit.Assert.assertFalse; 22 | import static org.junit.Assert.assertThat; 23 | import static org.junit.Assert.assertTrue; 24 | 25 | public class PartitionUtilsTest { 26 | 27 | @Test 28 | public void testBuildPartitionMapAndGetPartitionValuesList() { 29 | String namespaceName = "ns"; 30 | String tableName = "table"; 31 | // choose special values to make values1.hashCode() == values2.hashCode() 32 | List values1 = Lists.newArrayList("Aa"); 33 | List values2 = Lists.newArrayList("BB"); 34 | Partition partition1 = TestObjects.getTestPartition(namespaceName, tableName, values1); 35 | Partition partition2 = TestObjects.getTestPartition(namespaceName, tableName, values2); 36 | Map partitionMap = PartitionUtils.buildPartitionMap(Lists.newArrayList(partition1, partition2)); 37 | List partitionValuesList = PartitionUtils.getPartitionValuesList(partitionMap); 38 | 39 | assertEquals(2, partitionMap.size()); 40 | Set> valuesSet = Sets.newHashSet(values1, values2); 41 | for (PartitionKey partitionKey : partitionMap.keySet()) { 42 | assertThat(valuesSet, hasItem(partitionKey.getValues())); 43 | assertThat(partitionMap.get(partitionKey).getValues(), equalTo(partitionKey.getValues())); 44 | } 45 | 46 | assertEquals(2, partitionValuesList.size()); 47 | for (PartitionValueList partitionValueList : partitionValuesList) { 48 | assertThat(valuesSet, hasItem(partitionValueList.getValues())); 49 | } 50 | } 51 | 52 | @Test 53 | public void testIsInvalidUserInputException() { 54 | assertTrue(PartitionUtils.isInvalidUserInputException(new InvalidInputException("foo"))); 55 | assertTrue(PartitionUtils.isInvalidUserInputException(new EntityNotFoundException("bar"))); 56 | assertFalse(PartitionUtils.isInvalidUserInputException(new InternalServiceException("bar2"))); 57 | assertFalse(PartitionUtils.isInvalidUserInputException(new ResourceNumberLimitExceededException("bar3"))); 58 | assertFalse(PartitionUtils.isInvalidUserInputException(new NullPointerException("bar4"))); 59 | } 60 | 61 | } 62 | -------------------------------------------------------------------------------- /shims/hive3-shims/src/main/java/com.amazonaws.glue.shims/AwsGlueHive3Shims.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.shims; 2 | 3 | import org.apache.hadoop.conf.Configuration; 4 | import org.apache.hadoop.fs.Path; 5 | 6 | import org.apache.hadoop.hive.metastore.api.Database; 7 | import org.apache.hadoop.hive.metastore.api.EnvironmentContext; 8 | import org.apache.hadoop.hive.metastore.api.FieldSchema; 9 | import org.apache.hadoop.hive.metastore.api.MetaException; 10 | import org.apache.hadoop.hive.metastore.api.Partition; 11 | import org.apache.hadoop.hive.metastore.api.Table; 12 | import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; 13 | import org.apache.hadoop.hive.ql.exec.SerializationUtilities; 14 | import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; 15 | import org.apache.hadoop.hive.metastore.Warehouse; 16 | 17 | import java.util.List; 18 | 19 | final class AwsGlueHive3Shims implements AwsGlueHiveShims { 20 | 21 | private static final String HIVE_3_VERSION = "3."; 22 | 23 | static boolean supportsVersion(String version) { 24 | return version.startsWith(HIVE_3_VERSION); 25 | } 26 | 27 | @Override 28 | public ExprNodeGenericFuncDesc getDeserializeExpression(byte[] exprBytes) { 29 | return SerializationUtilities.deserializeExpressionFromKryo(exprBytes); 30 | } 31 | 32 | @Override 33 | public byte[] getSerializeExpression(ExprNodeGenericFuncDesc expr) { 34 | return SerializationUtilities.serializeExpressionToKryo(expr); 35 | } 36 | 37 | @Override 38 | public Path getDefaultTablePath(Database db, String tableName, Warehouse warehouse) throws MetaException { 39 | return warehouse.getDefaultTablePath(db, tableName, false); 40 | } 41 | 42 | @Override 43 | public boolean deleteDir(Warehouse wh, Path path, boolean recursive, boolean ifPurge) throws MetaException { 44 | return wh.deleteDir(path, recursive, ifPurge, true); 45 | } 46 | 47 | @Override 48 | public boolean mkdirs(Warehouse wh, Path path) throws MetaException { 49 | return wh.mkdirs(path); 50 | } 51 | 52 | @Override 53 | public boolean validateTableName(String name, Configuration conf) { 54 | return MetaStoreUtils.validateName(name, conf); 55 | } 56 | 57 | @Override 58 | public boolean requireCalStats( 59 | Configuration conf, 60 | Partition oldPart, 61 | Partition newPart, 62 | Table tbl, 63 | EnvironmentContext environmentContext) { 64 | return MetaStoreUtils.requireCalStats(oldPart, newPart, tbl, environmentContext); 65 | } 66 | 67 | @Override 68 | public boolean updateTableStatsFast( 69 | Database db, 70 | Table tbl, 71 | Warehouse wh, 72 | boolean madeDir, 73 | boolean forceRecompute, 74 | EnvironmentContext environmentContext 75 | ) throws MetaException { 76 | MetaStoreUtils.updateTableStatsSlow(db, tbl, wh, madeDir, forceRecompute, environmentContext); 77 | return true; 78 | } 79 | 80 | @Override 81 | public String validateTblColumns(List cols) { 82 | return MetaStoreUtils.validateTblColumns(cols); 83 | } 84 | 85 | } 86 | 87 | -------------------------------------------------------------------------------- /aws-glue-datacatalog-client-common/src/test/java/com/amazonaws/glue/catalog/util/GlueTestClientFactory.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.catalog.util; 2 | 3 | import com.amazonaws.AmazonClientException; 4 | import com.amazonaws.AmazonServiceException; 5 | import com.amazonaws.AmazonWebServiceRequest; 6 | import com.amazonaws.ClientConfiguration; 7 | import com.amazonaws.auth.DefaultAWSCredentialsProviderChain; 8 | import com.amazonaws.client.builder.AwsClientBuilder.EndpointConfiguration; 9 | import com.amazonaws.glue.catalog.metastore.AWSGlueMultipleCatalogDecorator; 10 | import com.amazonaws.glue.catalog.metastore.GlueClientFactory; 11 | import com.amazonaws.retry.PredefinedRetryPolicies; 12 | import com.amazonaws.retry.RetryPolicy; 13 | import com.amazonaws.services.glue.AWSGlue; 14 | import com.amazonaws.services.glue.AWSGlueClientBuilder; 15 | 16 | import org.apache.commons.lang3.StringUtils; 17 | import org.apache.hadoop.hive.metastore.api.MetaException; 18 | 19 | public final class GlueTestClientFactory implements GlueClientFactory { 20 | 21 | private static final int SC_GATEWAY_TIMEOUT = 504; 22 | 23 | @Override 24 | public AWSGlue newClient() throws MetaException { 25 | AWSGlueClientBuilder glueClientBuilder = AWSGlueClientBuilder.standard() 26 | .withClientConfiguration(createGatewayTimeoutRetryableConfiguration()) 27 | .withCredentials(new DefaultAWSCredentialsProviderChain()); 28 | 29 | String endpoint = System.getProperty("endpoint"); 30 | if (StringUtils.isNotBlank(endpoint)) { 31 | glueClientBuilder.setEndpointConfiguration(new EndpointConfiguration(endpoint, null)); 32 | } 33 | 34 | //Create AWSGlueMultipleCatalogDecorator with empty separator to make sure that it's compatible with 35 | //previous behaviour - all existing integration tests must be passed without any changes. 36 | return new AWSGlueMultipleCatalogDecorator(glueClientBuilder.build(), null); 37 | } 38 | 39 | private static ClientConfiguration createGatewayTimeoutRetryableConfiguration() { 40 | ClientConfiguration retryableConfig = new ClientConfiguration(); 41 | RetryPolicy.RetryCondition retryCondition = new PredefinedRetryPolicies.SDKDefaultRetryCondition() { 42 | @Override 43 | public boolean shouldRetry(AmazonWebServiceRequest originalRequest, AmazonClientException exception, 44 | int retriesAttempted) { 45 | if (super.shouldRetry(originalRequest, exception, retriesAttempted)) { 46 | return true; 47 | } 48 | if (exception != null && exception instanceof AmazonServiceException) { 49 | AmazonServiceException ase = (AmazonServiceException) exception; 50 | if (ase.getStatusCode() == SC_GATEWAY_TIMEOUT) { 51 | return true; 52 | } 53 | } 54 | return false; 55 | } 56 | }; 57 | RetryPolicy retryPolicy = new RetryPolicy(retryCondition, PredefinedRetryPolicies.DEFAULT_BACKOFF_STRATEGY, 58 | PredefinedRetryPolicies.DEFAULT_MAX_ERROR_RETRY, true); 59 | retryableConfig.setRetryPolicy(retryPolicy); 60 | return retryableConfig; 61 | } 62 | 63 | } 64 | -------------------------------------------------------------------------------- /aws-glue-datacatalog-hive3-client/src/test/java/com/amazonaws/glue/catalog/metastore/HiveAWSCatalogMetastoreClientTest.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.catalog.metastore; 2 | 3 | import com.amazonaws.glue.catalog.converters.CatalogToHiveConverter; 4 | import com.amazonaws.glue.catalog.converters.Hive3CatalogToHiveConverter; 5 | import com.amazonaws.services.glue.AWSGlue; 6 | import com.amazonaws.services.glue.model.UserDefinedFunction; 7 | import org.apache.commons.lang.RandomStringUtils; 8 | import org.apache.hadoop.conf.Configuration; 9 | import org.apache.hadoop.hive.metastore.api.ForeignKeysRequest; 10 | import org.apache.hadoop.hive.metastore.api.PrimaryKeysRequest; 11 | import org.apache.hadoop.hive.metastore.conf.MetastoreConf; 12 | import org.junit.Before; 13 | import org.junit.Test; 14 | 15 | import static com.amazonaws.glue.catalog.util.TestObjects.getCatalogTestFunction; 16 | import static com.amazonaws.glue.catalog.util.TestObjects.getTestDatabase; 17 | import static org.junit.Assert.assertEquals; 18 | import static org.junit.Assert.assertNotNull; 19 | import static org.mockito.Mockito.mock; 20 | import static org.mockito.Mockito.spy; 21 | import static org.mockito.Mockito.when; 22 | 23 | public class HiveAWSCatalogMetastoreClientTest { 24 | 25 | private AWSGlue glueClient; 26 | private AWSCatalogMetastoreClient metastoreClient; 27 | private Configuration conf; 28 | private GlueClientFactory clientFactory; 29 | private CatalogToHiveConverter catalogToHiveConverter = new Hive3CatalogToHiveConverter(); 30 | 31 | private org.apache.hadoop.hive.metastore.api.Database testDB; 32 | private org.apache.hadoop.hive.metastore.api.Function testFunction; 33 | private String catalogId; 34 | private UserDefinedFunction catalogTestFunction; 35 | 36 | @Before 37 | public void setUp() throws Exception { 38 | testDB = catalogToHiveConverter.convertDatabase(getTestDatabase()); 39 | catalogTestFunction = getCatalogTestFunction(); 40 | catalogTestFunction.setDatabaseName(testDB.getName()); 41 | testFunction = catalogToHiveConverter.convertFunction(testDB.getName(), catalogTestFunction); 42 | // catalogId is a AWS account number 43 | catalogId = RandomStringUtils.randomNumeric(12); 44 | 45 | conf = spy(MetastoreConf.newMetastoreConf()); 46 | glueClient = spy(AWSGlue.class); 47 | clientFactory = mock(GlueClientFactory.class); 48 | when(clientFactory.newClient()).thenReturn(glueClient); 49 | metastoreClient = new AWSCatalogMetastoreClient.Builder().withClientFactory(clientFactory) 50 | .createDefaults(false).withConf(conf).withCatalogId(catalogId).build(); 51 | } 52 | 53 | @Test 54 | public void testPrimaryKeys_NotNull() throws Exception { 55 | PrimaryKeysRequest request = mock(PrimaryKeysRequest.class); 56 | assertNotNull(metastoreClient.getPrimaryKeys(request)); 57 | } 58 | 59 | @Test 60 | public void testForeignKeys_NotNull() throws Exception { 61 | ForeignKeysRequest request = mock(ForeignKeysRequest.class); 62 | assertNotNull(metastoreClient.getForeignKeys(request)); 63 | } 64 | 65 | @Test 66 | public void testGetNextNotification() throws Exception { 67 | // check that we just return dummy result 68 | assertNotNull(metastoreClient.getNextNotification(0, 1, null)); 69 | } 70 | 71 | @Test 72 | public void testGetCurrentNotificationEventId() throws Exception { 73 | // check that we get dummy result with 0 eventId 74 | assertEquals(0, metastoreClient.getCurrentNotificationEventId().getEventId()); 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /dev-support/check_style.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 69 | 70 | 71 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /aws-glue-datacatalog-client-common/src/test/java/com/amazonaws/glue/catalog/util/ExprBuilder.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.catalog.util; 2 | 3 | import org.apache.hadoop.hive.ql.exec.FunctionRegistry; 4 | import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; 5 | import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; 6 | import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; 7 | import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; 8 | import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; 9 | import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; 10 | 11 | import java.sql.Date; 12 | import java.sql.Timestamp; 13 | import java.util.ArrayList; 14 | import java.util.List; 15 | import java.util.Stack; 16 | 17 | /** 18 | * Utility to craft mock expression trees. Inspired by org.apache.hadoop.hive.metastore.TestMetastoreExpr 19 | */ 20 | public class ExprBuilder { 21 | 22 | private final String tblName; 23 | private final Stack stack = new Stack<>(); 24 | 25 | public ExprBuilder(String tblName) { 26 | this.tblName = tblName; 27 | } 28 | 29 | public ExprNodeGenericFuncDesc build() throws Exception { 30 | if (stack.size() != 1) { 31 | throw new Exception("Bad test: " + stack.size()); 32 | } 33 | return (ExprNodeGenericFuncDesc) stack.pop(); 34 | } 35 | 36 | public ExprBuilder pred(String name, int args) throws Exception { 37 | return fn(name, TypeInfoFactory.booleanTypeInfo, args); 38 | } 39 | 40 | private ExprBuilder fn(String name, TypeInfo ti, int args) throws Exception { 41 | List children = new ArrayList<>(); 42 | for (int i = 0; i < args; ++i) { 43 | children.add(stack.pop()); 44 | } 45 | stack.push(new ExprNodeGenericFuncDesc(ti, FunctionRegistry.getFunctionInfo(name).getGenericUDF(), children)); 46 | return this; 47 | } 48 | 49 | public ExprBuilder strCol(String col) { 50 | return colInternal(TypeInfoFactory.stringTypeInfo, col, true); 51 | } 52 | 53 | public ExprBuilder timestampCol(String col) { 54 | return colInternal(TypeInfoFactory.timestampTypeInfo, col, true); 55 | } 56 | 57 | public ExprBuilder booleanCol(String col) { 58 | return colInternal(TypeInfoFactory.booleanTypeInfo, col, true); 59 | } 60 | 61 | public ExprBuilder charCol(String col) { 62 | return colInternal(TypeInfoFactory.charTypeInfo, col, true); 63 | } 64 | 65 | public ExprBuilder dateCol(String col) { 66 | return colInternal(TypeInfoFactory.dateTypeInfo, col, true); 67 | } 68 | 69 | private ExprBuilder colInternal(TypeInfo ti, String col, boolean part) { 70 | stack.push(new ExprNodeColumnDesc(ti, col, tblName, part)); 71 | return this; 72 | } 73 | 74 | public ExprBuilder val(boolean val) { 75 | return valInternal(TypeInfoFactory.booleanTypeInfo, val); 76 | } 77 | 78 | public ExprBuilder val(String val) { 79 | return valInternal(TypeInfoFactory.stringTypeInfo, val); 80 | } 81 | 82 | public ExprBuilder vals(List vals) { 83 | for (String s : vals) { 84 | val(s); 85 | } 86 | return this; 87 | } 88 | 89 | public ExprBuilder val(Timestamp val) { 90 | return valInternal(TypeInfoFactory.timestampTypeInfo, val); 91 | } 92 | 93 | public ExprBuilder val(Character val) { 94 | return valInternal(TypeInfoFactory.charTypeInfo, val); 95 | } 96 | 97 | public ExprBuilder val(Date val) { 98 | return valInternal(TypeInfoFactory.timestampTypeInfo, val); 99 | } 100 | 101 | private ExprBuilder valInternal(TypeInfo ti, Object val) { 102 | stack.push(new ExprNodeConstantDesc(ti, val)); 103 | return this; 104 | } 105 | 106 | } 107 | -------------------------------------------------------------------------------- /aws-glue-datacatalog-spark-client/src/test/java/com/amazonaws/glue/catalog/metastore/integrationtest/MultipleCatalogDatabaseIntegrationTest.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.catalog.metastore.integrationtest; 2 | 3 | import com.amazonaws.glue.catalog.converters.BaseCatalogToHiveConverter; 4 | import com.amazonaws.glue.catalog.converters.CatalogToHiveConverter; 5 | import com.amazonaws.services.glue.model.DeleteDatabaseRequest; 6 | import com.amazonaws.services.glue.model.EntityNotFoundException; 7 | import com.amazonaws.services.glue.model.GetDatabaseRequest; 8 | import org.apache.hadoop.hive.metastore.api.Database; 9 | import org.apache.hadoop.hive.metastore.api.MetaException; 10 | import org.apache.thrift.TException; 11 | import org.junit.After; 12 | import org.junit.Before; 13 | import org.junit.Rule; 14 | import org.junit.Test; 15 | import org.junit.rules.ExpectedException; 16 | 17 | import java.util.UUID; 18 | 19 | import static com.amazonaws.glue.catalog.util.TestObjects.getTestDatabase; 20 | import static org.junit.Assert.assertEquals; 21 | 22 | public class MultipleCatalogDatabaseIntegrationTest extends MultipleCatalogIntegrationTestBase { 23 | 24 | private Database databaseInAnotherCatalog; 25 | private Database database; 26 | private CatalogToHiveConverter catalogToHiveConverter = new BaseCatalogToHiveConverter(); 27 | 28 | @Rule 29 | public ExpectedException expectedException = ExpectedException.none(); 30 | 31 | @Before 32 | public void setup() throws MetaException, TException { 33 | super.setup(); 34 | 35 | databaseInAnotherCatalog = catalogToHiveConverter.convertDatabase(getTestDatabase()); 36 | database = databaseInAnotherCatalog.deepCopy(); 37 | database.setName(constructDbName(databaseInAnotherCatalog.getName())); 38 | metastoreClient.createDatabase(database); 39 | } 40 | 41 | @After 42 | public void clean() { 43 | try { 44 | glueClient.deleteDatabase(new DeleteDatabaseRequest().withName(databaseInAnotherCatalog.getName())); 45 | } catch (EntityNotFoundException e) { 46 | } 47 | } 48 | 49 | @Test 50 | public void getDatabase() throws TException { 51 | Database createdDb = metastoreClient.getDatabase(database.getName()); 52 | assertEquals(database, createdDb); 53 | 54 | createdDb = catalogToHiveConverter.convertDatabase( 55 | glueClient.getDatabase(new GetDatabaseRequest() 56 | .withCatalogId(anotherCatalogId) 57 | .withName(databaseInAnotherCatalog.getName()) 58 | ).getDatabase()); 59 | assertEquals(databaseInAnotherCatalog, createdDb); 60 | } 61 | 62 | @Test 63 | public void alterDatabase() throws TException { 64 | String newDescription = UUID.randomUUID().toString(); 65 | database.setDescription(newDescription); 66 | databaseInAnotherCatalog.setDescription(newDescription); 67 | metastoreClient.alterDatabase(database.getName(), database); 68 | 69 | Database alteredDb = catalogToHiveConverter.convertDatabase( 70 | glueClient.getDatabase(new GetDatabaseRequest() 71 | .withCatalogId(anotherCatalogId) 72 | .withName(databaseInAnotherCatalog.getName()) 73 | ).getDatabase()); 74 | assertEquals(databaseInAnotherCatalog, alteredDb); 75 | } 76 | 77 | @Test 78 | public void dropDatabase() throws TException { 79 | metastoreClient.dropDatabase(database.getName()); 80 | 81 | expectedException.expect(EntityNotFoundException.class); 82 | glueClient.getDatabase(new GetDatabaseRequest() 83 | .withCatalogId(anotherCatalogId) 84 | .withName(databaseInAnotherCatalog.getName()) 85 | ); 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /aws-glue-datacatalog-client-common/src/test/java/com/amazonaws/glue/catalog/util/HiveTableValidatorTest.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.catalog.util; 2 | 3 | import com.amazonaws.services.glue.model.InvalidInputException; 4 | import com.amazonaws.services.glue.model.Table; 5 | import org.junit.Rule; 6 | import org.junit.Test; 7 | import org.junit.rules.ExpectedException; 8 | import org.apache.hadoop.hive.metastore.TableType; 9 | 10 | import static com.amazonaws.glue.catalog.util.HiveTableValidator.REQUIRED_PROPERTIES_VALIDATOR; 11 | import static com.amazonaws.glue.catalog.util.TestObjects.getTestTable; 12 | import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE; 13 | 14 | public class HiveTableValidatorTest { 15 | 16 | @Rule 17 | public ExpectedException thrown = ExpectedException.none(); 18 | private static final String EXPECTED_MESSAGE = "%s cannot be null"; 19 | 20 | @Test 21 | public void testRequiredProperty_TableType() { 22 | thrown.expect(InvalidInputException.class); 23 | thrown.expectMessage(String.format(EXPECTED_MESSAGE, "TableType")); 24 | Table tbl = getTestTable().withTableType(null); 25 | REQUIRED_PROPERTIES_VALIDATOR.validate(tbl); 26 | } 27 | 28 | @Test 29 | public void testRequiredProperty_StorageDescriptor() { 30 | thrown.expect(InvalidInputException.class); 31 | thrown.expectMessage(String.format(EXPECTED_MESSAGE, "StorageDescriptor")); 32 | Table tbl = getTestTable().withStorageDescriptor(null); 33 | REQUIRED_PROPERTIES_VALIDATOR.validate(tbl); 34 | } 35 | 36 | @Test 37 | public void testRequiredProperty_InputFormat() { 38 | thrown.expect(InvalidInputException.class); 39 | thrown.expectMessage(String.format(EXPECTED_MESSAGE, "StorageDescriptor#InputFormat")); 40 | Table tbl = getTestTable(); 41 | tbl.getStorageDescriptor().setInputFormat(null); 42 | REQUIRED_PROPERTIES_VALIDATOR.validate(tbl); 43 | } 44 | 45 | @Test 46 | public void testRequiredProperty_OutputFormat() { 47 | thrown.expect(InvalidInputException.class); 48 | thrown.expectMessage(String.format(EXPECTED_MESSAGE, "StorageDescriptor#OutputFormat")); 49 | Table tbl = getTestTable(); 50 | tbl.getStorageDescriptor().setOutputFormat(null); 51 | REQUIRED_PROPERTIES_VALIDATOR.validate(tbl); 52 | } 53 | 54 | @Test 55 | public void testRequiredProperty_SerdeInfo() { 56 | thrown.expect(InvalidInputException.class); 57 | thrown.expectMessage(String.format(EXPECTED_MESSAGE, "StorageDescriptor#SerdeInfo")); 58 | Table tbl = getTestTable(); 59 | tbl.getStorageDescriptor().setSerdeInfo(null); 60 | REQUIRED_PROPERTIES_VALIDATOR.validate(tbl); 61 | } 62 | 63 | @Test 64 | public void testRequiredProperty_SerializationLibrary() { 65 | thrown.expect(InvalidInputException.class); 66 | thrown.expectMessage(String.format(EXPECTED_MESSAGE, "StorageDescriptor#SerdeInfo#SerializationLibrary")); 67 | Table tbl = getTestTable(); 68 | tbl.getStorageDescriptor().getSerdeInfo().setSerializationLibrary(null); 69 | REQUIRED_PROPERTIES_VALIDATOR.validate(tbl); 70 | } 71 | 72 | @Test 73 | public void testRequiredProperty_ValidTable() { 74 | REQUIRED_PROPERTIES_VALIDATOR.validate(getTestTable()); 75 | } 76 | 77 | @Test 78 | public void testValidate_ViewTableType() { 79 | Table tbl = getTestTable(); 80 | tbl.setTableType(TableType.VIRTUAL_VIEW.name()); 81 | tbl.getStorageDescriptor().getSerdeInfo().setSerializationLibrary(null); 82 | REQUIRED_PROPERTIES_VALIDATOR.validate(tbl); 83 | } 84 | 85 | @Test 86 | public void testValidate_ExcludeStorageHandlerType() { 87 | Table tbl = getTestTable(); 88 | tbl.getParameters().put(META_TABLE_STORAGE, "org.apache.hadoop.hive.dynamodb.DynamoDBStorageHandler"); 89 | tbl.getStorageDescriptor().setInputFormat(null); 90 | REQUIRED_PROPERTIES_VALIDATOR.validate(tbl); 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check [existing open](https://github.com/awslabs/aws-glue-catalog-client-for-apache-hive-metastore/issues), or [recently closed](https://github.com/awslabs/aws-glue-catalog-client-for-apache-hive-metastore/issues?utf8=%E2%9C%93&q=is%3Aissue%20is%3Aclosed%20), issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *master* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any ['help wanted'](https://github.com/awslabs/aws-glue-catalog-client-for-apache-hive-metastore/labels/help%20wanted) issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](https://github.com/awslabs/aws-glue-catalog-client-for-apache-hive-metastore/blob/master/LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | 61 | We may ask you to sign a [Contributor License Agreement (CLA)](http://en.wikipedia.org/wiki/Contributor_License_Agreement) for larger changes. 62 | -------------------------------------------------------------------------------- /aws-glue-datacatalog-spark-client/src/test/java/com/amazonaws/glue/catalog/metastore/integrationtest/MultipleCatalogIntegrationTestBase.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.catalog.metastore.integrationtest; 2 | 3 | import com.amazonaws.glue.catalog.metastore.AWSCatalogMetastoreClient; 4 | import com.amazonaws.glue.catalog.metastore.AWSGlueClientFactory; 5 | import com.amazonaws.glue.catalog.metastore.GlueClientFactory; 6 | import com.amazonaws.glue.catalog.util.AWSGlueConfig; 7 | import com.amazonaws.services.glue.AWSGlue; 8 | import com.google.common.base.Strings; 9 | import org.apache.hadoop.fs.Path; 10 | import org.apache.hadoop.hive.conf.HiveConf; 11 | import org.apache.hadoop.hive.metastore.IMetaStoreClient; 12 | import org.apache.hadoop.hive.metastore.Warehouse; 13 | import org.apache.hadoop.hive.metastore.api.MetaException; 14 | import org.apache.thrift.TException; 15 | 16 | import static org.mockito.Matchers.any; 17 | import static org.mockito.Matchers.anyString; 18 | import static org.mockito.Mockito.mock; 19 | import static org.mockito.Mockito.when; 20 | 21 | /** 22 | * Base class for integration test to check that multiple Glue catalogs can be accessed using single MetastoreClient. 23 | * To be able to run such test you have to make the following set up: 24 | * 1. Retrieve credentials of AWS account (lets say account A) which will be used to authenticate your test. 25 | * 2. Set credentials and region of account A the following variables: AWS_SECRET_KEY, AWS_ACCESS_KEY, AWS_REGION. 26 | * 3. Set up permissions in another AWS account (lets say account B) that account A has access to account B. 27 | * 3.1 Log in to AWS Console under account B, choose AWS Glue service and go to Settings. 28 | * 3.2 Set the following policy: 29 | { 30 | "Version" : "2012-10-17", 31 | "Statement" : [ { 32 | "Effect" : "Allow", 33 | "Principal" : { 34 | "AWS" : "arn:aws:iam:::root" 35 | }, 36 | "Action" : "glue:*", 37 | "Resource" : "arn:aws:glue:us-east-1::*" 38 | } ] 39 | } 40 | * 4. Set account B id as a value of ANOTHER_GLUE_CATALOG_ID variable. 41 | * 42 | * If another account preparation is not an option for you then you can skip #3 and set account A as a value of 43 | * ANOTHER_GLUE_CATALOG_ID variable. In this case test will pass but you won't test a real multiple catalog access. 44 | */ 45 | public class MultipleCatalogIntegrationTestBase { 46 | private static final String ANOTHER_GLUE_CATALOG_ID = "ANOTHER_GLUE_CATALOG_ID"; 47 | private static final String CATALOG_SEPARATOR = "/-/"; 48 | 49 | protected IMetaStoreClient metastoreClient; 50 | protected AWSGlue glueClient; 51 | protected String anotherCatalogId; 52 | 53 | protected void setup() throws MetaException, TException { 54 | HiveConf conf = new HiveConf(); 55 | conf.set(AWSGlueConfig.AWS_GLUE_CATALOG_SEPARATOR, CATALOG_SEPARATOR); 56 | Warehouse wh = mock(Warehouse.class); 57 | Path tmpPath = new Path("/db"); 58 | when(wh.getDefaultDatabasePath(anyString())).thenReturn(tmpPath); 59 | when(wh.getDnsPath(any(Path.class))).thenReturn(tmpPath); 60 | when(wh.isDir(any(Path.class))).thenReturn(true); 61 | 62 | GlueClientFactory clientFactory = new AWSGlueClientFactory(conf); 63 | glueClient = clientFactory.newClient(); 64 | 65 | metastoreClient = new AWSCatalogMetastoreClient.Builder().withHiveConf(conf).withWarehouse(wh) 66 | .withClientFactory(clientFactory).build(); 67 | 68 | anotherCatalogId = System.getenv(ANOTHER_GLUE_CATALOG_ID); 69 | if (Strings.isNullOrEmpty(anotherCatalogId)) { 70 | throw new RuntimeException(String.format( 71 | "Environment variable %s is not set. " + 72 | "Please read comment for %s class to understand what value should be set there.", 73 | ANOTHER_GLUE_CATALOG_ID, this.getClass().getName())); 74 | } 75 | } 76 | 77 | protected String constructDbName(String originalDbName) { 78 | return String.format("%s%s%s", anotherCatalogId, CATALOG_SEPARATOR, originalDbName); 79 | } 80 | 81 | } 82 | -------------------------------------------------------------------------------- /aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/converters/GlueInputConverter.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.catalog.converters; 2 | 3 | import com.amazonaws.services.glue.model.DatabaseInput; 4 | import com.amazonaws.services.glue.model.PartitionInput; 5 | import com.amazonaws.services.glue.model.TableInput; 6 | import com.amazonaws.services.glue.model.UserDefinedFunctionInput; 7 | 8 | import org.apache.hadoop.hive.metastore.api.Database; 9 | import org.apache.hadoop.hive.metastore.api.Function; 10 | import org.apache.hadoop.hive.metastore.api.Partition; 11 | import org.apache.hadoop.hive.metastore.api.Table; 12 | 13 | import java.util.ArrayList; 14 | import java.util.Collection; 15 | import java.util.List; 16 | 17 | /** 18 | * This class provides methods to convert Hive/Catalog objects to Input objects used 19 | * for Glue API parameters 20 | */ 21 | public final class GlueInputConverter { 22 | 23 | public static DatabaseInput convertToDatabaseInput(Database hiveDatabase) { 24 | return convertToDatabaseInput(HiveToCatalogConverter.convertDatabase(hiveDatabase)); 25 | } 26 | 27 | public static DatabaseInput convertToDatabaseInput(com.amazonaws.services.glue.model.Database database) { 28 | DatabaseInput input = new DatabaseInput(); 29 | 30 | input.setName(database.getName()); 31 | input.setDescription(database.getDescription()); 32 | input.setLocationUri(database.getLocationUri()); 33 | input.setParameters(database.getParameters()); 34 | 35 | return input; 36 | } 37 | 38 | public static TableInput convertToTableInput(Table hiveTable) { 39 | return convertToTableInput(HiveToCatalogConverter.convertTable(hiveTable)); 40 | } 41 | 42 | public static TableInput convertToTableInput(com.amazonaws.services.glue.model.Table table) { 43 | TableInput tableInput = new TableInput(); 44 | 45 | tableInput.setRetention(table.getRetention()); 46 | tableInput.setPartitionKeys(table.getPartitionKeys()); 47 | tableInput.setTableType(table.getTableType()); 48 | tableInput.setName(table.getName()); 49 | tableInput.setOwner(table.getOwner()); 50 | tableInput.setLastAccessTime(table.getLastAccessTime()); 51 | tableInput.setStorageDescriptor(table.getStorageDescriptor()); 52 | tableInput.setParameters(table.getParameters()); 53 | tableInput.setViewExpandedText(table.getViewExpandedText()); 54 | tableInput.setViewOriginalText(table.getViewOriginalText()); 55 | 56 | return tableInput; 57 | } 58 | 59 | public static PartitionInput convertToPartitionInput(Partition src) { 60 | return convertToPartitionInput(HiveToCatalogConverter.convertPartition(src)); 61 | } 62 | 63 | public static PartitionInput convertToPartitionInput(com.amazonaws.services.glue.model.Partition src) { 64 | PartitionInput partitionInput = new PartitionInput(); 65 | 66 | partitionInput.setLastAccessTime(src.getLastAccessTime()); 67 | partitionInput.setParameters(src.getParameters()); 68 | partitionInput.setStorageDescriptor(src.getStorageDescriptor()); 69 | partitionInput.setValues(src.getValues()); 70 | 71 | return partitionInput; 72 | } 73 | 74 | public static List convertToPartitionInputs(Collection parts) { 75 | List inputList = new ArrayList<>(); 76 | 77 | for (com.amazonaws.services.glue.model.Partition part : parts) { 78 | inputList.add(convertToPartitionInput(part)); 79 | } 80 | return inputList; 81 | } 82 | 83 | public static UserDefinedFunctionInput convertToUserDefinedFunctionInput(Function hiveFunction) { 84 | UserDefinedFunctionInput functionInput = new UserDefinedFunctionInput(); 85 | 86 | functionInput.setClassName(hiveFunction.getClassName()); 87 | functionInput.setFunctionName(hiveFunction.getFunctionName()); 88 | functionInput.setOwnerName(hiveFunction.getOwnerName()); 89 | if(hiveFunction.getOwnerType() != null) { 90 | functionInput.setOwnerType(hiveFunction.getOwnerType().name()); 91 | } 92 | functionInput.setResourceUris(HiveToCatalogConverter.covertResourceUriList(hiveFunction.getResourceUris())); 93 | return functionInput; 94 | } 95 | 96 | } 97 | -------------------------------------------------------------------------------- /aws-glue-datacatalog-client-common/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | com.amazonaws.glue 6 | aws-glue-datacatalog-hive-client 7 | 3.4.0-SNAPSHOT 8 | 9 | 4.0.0 10 | AWSGlueDataCatalogClientCommon 11 | aws-glue-datacatalog-client-common 12 | jar 13 | 14 | 15 | org.apache.hive 16 | hive-metastore 17 | ${hive3.version} 18 | provided 19 | 20 | 21 | org.apache.hive 22 | hive-exec 23 | ${hive3.version} 24 | provided 25 | 26 | 27 | com.amazonaws 28 | aws-java-sdk-glue 29 | ${aws.sdk.version} 30 | compile 31 | 32 | 33 | com.amazonaws 34 | aws-java-sdk-core 35 | ${aws.sdk.version} 36 | compile 37 | 38 | 39 | com.google.guava 40 | guava 41 | ${guava.version} 42 | 43 | 44 | org.apache.hadoop 45 | hadoop-common 46 | ${hadoop.version} 47 | provided 48 | 49 | 50 | com.amazonaws.glue 51 | shims-loader 52 | ${project.version} 53 | 54 | 55 | 56 | 57 | org.apache.hadoop 58 | hadoop-mapreduce-client-core 59 | ${hadoop.version} 60 | test 61 | 62 | 63 | junit 64 | junit 65 | ${junit.version} 66 | test 67 | 68 | 69 | org.hamcrest 70 | hamcrest-library 71 | ${hamcrest.version} 72 | test 73 | 74 | 75 | org.mockito 76 | mockito-core 77 | ${mockito.version} 78 | test 79 | 80 | 81 | org.powermock 82 | powermock-module-junit4 83 | ${powermock.version} 84 | test 85 | 86 | 87 | org.powermock 88 | powermock-api-mockito 89 | ${powermock.version} 90 | test 91 | 92 | 93 | 94 | 95 | 96 | 97 | org.apache.maven.plugins 98 | maven-jar-plugin 99 | 2.2 100 | 101 | 102 | 103 | test-jar 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | -------------------------------------------------------------------------------- /aws-glue-datacatalog-client-common/src/test/java/com/amazonaws/glue/catalog/metastore/AWSGlueMetastoreFactoryTest.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.catalog.metastore; 2 | 3 | import org.apache.hadoop.hive.conf.HiveConf; 4 | import org.junit.Before; 5 | import org.junit.Test; 6 | 7 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_DB_CACHE_ENABLE; 8 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_TABLE_CACHE_ENABLE; 9 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_ENDPOINT; 10 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_TABLE_CACHE_SIZE; 11 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_TABLE_CACHE_TTL_MINS; 12 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_DB_CACHE_SIZE; 13 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_DB_CACHE_TTL_MINS; 14 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_REGION; 15 | import static org.mockito.Mockito.atLeastOnce; 16 | import static org.mockito.Mockito.spy; 17 | import static org.mockito.Mockito.verify; 18 | import static org.mockito.Mockito.when; 19 | import static org.junit.Assert.assertTrue; 20 | 21 | public class AWSGlueMetastoreFactoryTest { 22 | 23 | private AWSGlueMetastoreFactory awsGlueMetastoreFactory; 24 | private HiveConf hiveConf; 25 | 26 | @Before 27 | public void setUp() { 28 | awsGlueMetastoreFactory = new AWSGlueMetastoreFactory(); 29 | hiveConf = spy(new HiveConf()); 30 | 31 | // these configs are needed for AWSGlueClient to get initialized 32 | System.setProperty(AWS_REGION, ""); 33 | System.setProperty(AWS_GLUE_ENDPOINT, ""); 34 | when(hiveConf.get(AWS_GLUE_ENDPOINT)).thenReturn("endpoint"); 35 | when(hiveConf.get(AWS_REGION)).thenReturn("us-west-1"); 36 | 37 | // these configs are needed for AWSGlueMetastoreCacheDecorator to get initialized 38 | when(hiveConf.getInt(AWS_GLUE_DB_CACHE_SIZE, 0)).thenReturn(1); 39 | when(hiveConf.getInt(AWS_GLUE_DB_CACHE_TTL_MINS, 0)).thenReturn(1); 40 | when(hiveConf.getInt(AWS_GLUE_TABLE_CACHE_SIZE, 0)).thenReturn(1); 41 | when(hiveConf.getInt(AWS_GLUE_TABLE_CACHE_TTL_MINS, 0)).thenReturn(1); 42 | } 43 | 44 | @Test 45 | public void testNewMetastoreWhenCacheDisabled() throws Exception { 46 | when(hiveConf.getBoolean(AWS_GLUE_DB_CACHE_ENABLE, false)).thenReturn(false); 47 | when(hiveConf.getBoolean(AWS_GLUE_TABLE_CACHE_ENABLE, false)).thenReturn(false); 48 | assertTrue(DefaultAWSGlueMetastore.class.equals( 49 | awsGlueMetastoreFactory.newMetastore(hiveConf).getClass())); 50 | verify(hiveConf, atLeastOnce()).getBoolean(AWS_GLUE_DB_CACHE_ENABLE, false); 51 | verify(hiveConf, atLeastOnce()).getBoolean(AWS_GLUE_TABLE_CACHE_ENABLE, false); 52 | } 53 | 54 | @Test 55 | public void testNewMetastoreWhenTableCacheEnabled() throws Exception { 56 | when(hiveConf.getBoolean(AWS_GLUE_DB_CACHE_ENABLE, false)).thenReturn(false); 57 | when(hiveConf.getBoolean(AWS_GLUE_TABLE_CACHE_ENABLE, false)).thenReturn(true); 58 | assertTrue(AWSGlueMetastoreCacheDecorator.class.equals( 59 | awsGlueMetastoreFactory.newMetastore(hiveConf).getClass())); 60 | verify(hiveConf, atLeastOnce()).getBoolean(AWS_GLUE_DB_CACHE_ENABLE, false); 61 | verify(hiveConf, atLeastOnce()).getBoolean(AWS_GLUE_TABLE_CACHE_ENABLE, false); 62 | } 63 | 64 | @Test 65 | public void testNewMetastoreWhenDBCacheEnabled() throws Exception { 66 | when(hiveConf.getBoolean(AWS_GLUE_DB_CACHE_ENABLE, false)).thenReturn(true); 67 | when(hiveConf.getBoolean(AWS_GLUE_TABLE_CACHE_ENABLE, false)).thenReturn(true); 68 | assertTrue(AWSGlueMetastoreCacheDecorator.class.equals( 69 | awsGlueMetastoreFactory.newMetastore(hiveConf).getClass())); 70 | verify(hiveConf, atLeastOnce()).getBoolean(AWS_GLUE_DB_CACHE_ENABLE, false); 71 | verify(hiveConf, atLeastOnce()).getBoolean(AWS_GLUE_TABLE_CACHE_ENABLE, false); 72 | } 73 | 74 | @Test 75 | public void testNewMetastoreWhenAllCacheEnabled() throws Exception { 76 | when(hiveConf.getBoolean(AWS_GLUE_DB_CACHE_ENABLE, false)).thenReturn(true); 77 | when(hiveConf.getBoolean(AWS_GLUE_TABLE_CACHE_ENABLE, false)).thenReturn(true); 78 | assertTrue(AWSGlueMetastoreCacheDecorator.class.equals( 79 | awsGlueMetastoreFactory.newMetastore(hiveConf).getClass())); 80 | verify(hiveConf, atLeastOnce()).getBoolean(AWS_GLUE_DB_CACHE_ENABLE, false); 81 | verify(hiveConf, atLeastOnce()).getBoolean(AWS_GLUE_TABLE_CACHE_ENABLE, false); 82 | } 83 | 84 | } -------------------------------------------------------------------------------- /aws-glue-datacatalog-hive3-client/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | com.amazonaws.glue 6 | aws-glue-datacatalog-hive-client 7 | 3.4.0-SNAPSHOT 8 | 9 | 4.0.0 10 | AWSGlueDataCatalogHive3Client 11 | aws-glue-datacatalog-hive3-client 12 | jar 13 | 14 | 15 | org.apache.hive 16 | hive-metastore 17 | ${hive3.version} 18 | provided 19 | 20 | 21 | org.apache.hive 22 | hive-exec 23 | ${hive3.version} 24 | provided 25 | 26 | 27 | com.amazonaws.glue 28 | aws-glue-datacatalog-client-common 29 | ${project.version} 30 | 31 | 32 | org.apache.hadoop 33 | hadoop-common 34 | ${hadoop.version} 35 | provided 36 | 37 | 38 | org.apache.httpcomponents 39 | httpclient 40 | ${httpclient.version} 41 | 42 | 43 | 44 | org.apache.hadoop 45 | hadoop-mapreduce-client-core 46 | ${hadoop.version} 47 | test 48 | 49 | 50 | com.amazonaws.glue 51 | aws-glue-datacatalog-client-common 52 | ${project.version} 53 | test 54 | tests 55 | 56 | 57 | junit 58 | junit 59 | ${junit.version} 60 | test 61 | 62 | 63 | org.hamcrest 64 | hamcrest-library 65 | ${hamcrest.version} 66 | test 67 | 68 | 69 | org.mockito 70 | mockito-core 71 | ${mockito.version} 72 | test 73 | 74 | 75 | 76 | 77 | 78 | org.apache.maven.plugins 79 | maven-shade-plugin 80 | 3.0.0 81 | 82 | false 83 | 84 | 85 | 86 | package 87 | 88 | shade 89 | 90 | 91 | 92 | 93 | com.amazonaws.glue:aws-glue-datacatalog-client-common 94 | com.amazonaws.glue:shims-common 95 | com.amazonaws.glue:hive3-shims 96 | com.amazonaws.glue:shims-loader 97 | com.amazonaws.glue:spark-hive-shims 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | -------------------------------------------------------------------------------- /aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/converters/PartitionNameParser.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.catalog.converters; 2 | 3 | import java.util.AbstractMap; 4 | import java.util.ArrayList; 5 | import java.util.LinkedHashMap; 6 | import java.util.List; 7 | import java.util.Map.Entry; 8 | import java.util.Set; 9 | import java.util.regex.Matcher; 10 | import java.util.regex.Pattern; 11 | 12 | import com.amazonaws.glue.catalog.exceptions.InvalidPartitionNameException; 13 | import com.google.common.collect.ImmutableSet; 14 | 15 | public class PartitionNameParser { 16 | 17 | private static final Pattern PARTITION_NAME_VALUE_PATTERN = Pattern.compile("([^/]+)=([^/]+)"); 18 | private static final String PARTITION_NAME_DELIMITER = "/"; 19 | 20 | private static final char STORE_AS_NUMBER = 'n'; 21 | private static final char STORE_AS_STRING = 's'; 22 | 23 | private static final Set NUMERIC_PARTITION_COLUMN_TYPES = ImmutableSet.of( 24 | "tinyint", 25 | "smallint", 26 | "int", 27 | "bigint" 28 | ); 29 | 30 | public static String getPartitionName(List partitionColumns, List partitionValues) { 31 | if (hasInvalidValues(partitionColumns, partitionValues) || hasInvalidSize(partitionColumns, partitionValues)) { 32 | throw new IllegalArgumentException("Partition is not well formed. Columns and values do no match."); 33 | } 34 | 35 | StringBuilder partitionName = new StringBuilder(); 36 | partitionName.append(getPartitionColumnName(partitionColumns.get(0), partitionValues.get(0))); 37 | 38 | for (int i = 1; i < partitionColumns.size(); i++) { 39 | partitionName.append(PARTITION_NAME_DELIMITER); 40 | partitionName.append(getPartitionColumnName(partitionColumns.get(i), partitionValues.get(i))); 41 | } 42 | 43 | return partitionName.toString(); 44 | } 45 | 46 | private static boolean hasInvalidValues(List partitionColumns, List partitionValues) { 47 | return partitionColumns == null || partitionValues == null; 48 | } 49 | 50 | private static boolean hasInvalidSize(List partitionColumns, List partitionValues) { 51 | return partitionColumns.size() != partitionValues.size(); 52 | } 53 | 54 | private static String getPartitionColumnName(String partitionColumn, String partitionValue) { 55 | return partitionColumn + "=" + partitionValue; 56 | } 57 | 58 | public static LinkedHashMap getPartitionColumns(String partitionName) { 59 | LinkedHashMap partitionColumns = new LinkedHashMap<>(); 60 | String[] partitions = partitionName.split(PARTITION_NAME_DELIMITER); 61 | for(String partition : partitions) { 62 | Entry entry = getPartitionColumnValuePair(partition); 63 | partitionColumns.put(entry.getKey(), entry.getValue()); 64 | } 65 | 66 | return partitionColumns; 67 | } 68 | 69 | /* 70 | * Copied from https://github.com/apache/hive/blob/master/common/src/java/org/apache/hadoop/hive/common/FileUtils.java 71 | */ 72 | public static String unescapePathName(String path) { 73 | int len = path.length(); 74 | //pre-allocate sb to have enough buffer size, to avoid realloc 75 | StringBuilder sb = new StringBuilder(len); 76 | for (int i = 0; i < len; i++) { 77 | char c = path.charAt(i); 78 | if (c == '%' && i + 2 < len) { 79 | int code = -1; 80 | try { 81 | code = Integer.parseInt(path.substring(i + 1, i + 3), 16); 82 | } catch (Exception e) { 83 | code = -1; 84 | } 85 | if (code >= 0) { 86 | sb.append((char) code); 87 | i += 2; 88 | continue; 89 | } 90 | } 91 | sb.append(c); 92 | } 93 | return sb.toString(); 94 | } 95 | 96 | private static AbstractMap.SimpleEntry getPartitionColumnValuePair(String partition) { 97 | String column = null; 98 | String value = null; 99 | Matcher partitionMatcher = PARTITION_NAME_VALUE_PATTERN.matcher(partition); 100 | 101 | if (partitionMatcher.matches()) { 102 | column = unescapePathName(partitionMatcher.group(1)); 103 | value = unescapePathName(partitionMatcher.group(2)); 104 | } else { 105 | throw new InvalidPartitionNameException(partition); 106 | } 107 | 108 | return new AbstractMap.SimpleEntry(column, value); 109 | } 110 | 111 | public static List getPartitionValuesFromName(String partitionName) { 112 | List partitionValues = new ArrayList<>(); 113 | String[] partitions = partitionName.split(PARTITION_NAME_DELIMITER); 114 | for(String partition : partitions) { 115 | Entry entry = getPartitionColumnValuePair(partition); 116 | partitionValues.add(entry.getValue()); 117 | } 118 | 119 | return partitionValues; 120 | } 121 | 122 | } 123 | -------------------------------------------------------------------------------- /aws-glue-datacatalog-client-common/src/test/java/com/amazonaws/glue/catalog/util/PartitionKeyTest.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.catalog.util; 2 | 3 | import com.amazonaws.services.glue.model.Partition; 4 | import com.amazonaws.services.glue.model.PartitionError; 5 | 6 | import com.google.common.collect.Lists; 7 | import org.junit.Test; 8 | 9 | import java.util.List; 10 | 11 | import static org.junit.Assert.assertEquals; 12 | import static org.junit.Assert.assertFalse; 13 | 14 | public class PartitionKeyTest { 15 | 16 | @Test(expected=IllegalArgumentException.class) 17 | public void testNull() { 18 | new PartitionKey((List) null); 19 | } 20 | 21 | @Test 22 | public void testEqualsDifferentTable() { 23 | List values1 = Lists.newArrayList("value1", "value2"); 24 | List values2 = Lists.newArrayList("value1", "value2"); 25 | Partition partition1 = TestObjects.getTestPartition("ns", "table1", values1); 26 | Partition partition2 = TestObjects.getTestPartition("ns", "table2", values2); 27 | PartitionKey partitionKey1 = new PartitionKey(partition1); 28 | PartitionKey partitionKey2 = new PartitionKey(partition2); 29 | assertEquals(partitionKey1, partitionKey2); 30 | assertEquals(partitionKey1.hashCode(), partitionKey2.hashCode()); 31 | } 32 | 33 | @Test 34 | public void testEqualsEmptyValue() { 35 | List values = Lists.newArrayList(); 36 | Partition partition1 = TestObjects.getTestPartition("ns", "table", values); 37 | Partition partition2 = TestObjects.getTestPartition("ns", "table", values); 38 | PartitionKey partitionKey1 = new PartitionKey(partition1); 39 | PartitionKey partitionKey2 = new PartitionKey(partition2); 40 | assertEquals(partitionKey1, partitionKey2); 41 | assertEquals(partitionKey1.hashCode(), partitionKey2.hashCode()); 42 | } 43 | 44 | @Test 45 | public void testEqualsDifferentClass() { 46 | List values1 = Lists.newArrayList("value1", "value2"); 47 | List values2 = Lists.newLinkedList(values1); 48 | Partition partition1 = TestObjects.getTestPartition("ns", "table", values1); 49 | Partition partition2 = TestObjects.getTestPartition("ns", "table", values2); 50 | PartitionKey partitionKey1 = new PartitionKey(partition1); 51 | PartitionKey partitionKey2 = new PartitionKey(partition2); 52 | assertEquals(partitionKey1, partitionKey2); 53 | assertEquals(partitionKey1.hashCode(), partitionKey2.hashCode()); 54 | } 55 | 56 | @Test 57 | public void testEqualsPartitionError() { 58 | List values1 = Lists.newArrayList("value1", "value2"); 59 | List values2 = Lists.newArrayList("value1", "value2"); 60 | PartitionError partitionError1 = TestObjects.getPartitionError(values1, new RuntimeException("foo")); 61 | PartitionError partitionError2 = TestObjects.getPartitionError(values2, new Exception("foo2")); 62 | PartitionKey partitionKey1 = new PartitionKey(partitionError1.getPartitionValues()); 63 | PartitionKey partitionKey2 = new PartitionKey(partitionError2.getPartitionValues()); 64 | assertEquals(partitionKey1, partitionKey2); 65 | assertEquals(partitionKey1.hashCode(), partitionKey2.hashCode()); 66 | } 67 | 68 | @Test 69 | public void testEqualsPartitionAndPartitionError() { 70 | List values1 = Lists.newArrayList("value1", "value2"); 71 | List values2 = Lists.newArrayList("value1", "value2"); 72 | Partition partition = TestObjects.getTestPartition("ns", "table", values1); 73 | PartitionError partitionError = TestObjects.getPartitionError(values2, new RuntimeException("foo")); 74 | PartitionKey partitionKey1 = new PartitionKey(partition); 75 | PartitionKey partitionKey2 = new PartitionKey(partitionError.getPartitionValues()); 76 | assertEquals(partitionKey1, partitionKey2); 77 | assertEquals(partitionKey1.hashCode(), partitionKey2.hashCode()); 78 | } 79 | 80 | @Test 81 | public void testEqualsNull() { 82 | List values = Lists.newArrayList("value1", "value2"); 83 | Partition partition = TestObjects.getTestPartition("ns", "table", values); 84 | PartitionKey partitionKey = new PartitionKey(partition); 85 | assertFalse(partitionKey.equals(null)); 86 | } 87 | 88 | @Test 89 | public void testGetValues() { 90 | List values = Lists.newArrayList("value1", "value2"); 91 | Partition partition = TestObjects.getTestPartition("ns", "table", values); 92 | PartitionKey partitionKey1 = new PartitionKey(partition); 93 | assertEquals(Lists.newArrayList(values), partitionKey1.getValues()); 94 | 95 | PartitionError partitionError = TestObjects.getPartitionError(values, new RuntimeException("foo")); 96 | PartitionKey partitionKey2 = new PartitionKey(partitionError.getPartitionValues()); 97 | assertEquals(Lists.newArrayList(values), partitionKey2.getValues()); 98 | } 99 | 100 | } -------------------------------------------------------------------------------- /aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/metastore/AWSGlueMetastore.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.catalog.metastore; 2 | 3 | import com.amazonaws.services.glue.model.ColumnStatistics; 4 | import com.amazonaws.services.glue.model.ColumnStatisticsError; 5 | import com.amazonaws.services.glue.model.Database; 6 | import com.amazonaws.services.glue.model.DatabaseInput; 7 | import com.amazonaws.services.glue.model.Partition; 8 | import com.amazonaws.services.glue.model.PartitionError; 9 | import com.amazonaws.services.glue.model.PartitionInput; 10 | import com.amazonaws.services.glue.model.PartitionValueList; 11 | import com.amazonaws.services.glue.model.Table; 12 | import com.amazonaws.services.glue.model.TableInput; 13 | import com.amazonaws.services.glue.model.UserDefinedFunction; 14 | import com.amazonaws.services.glue.model.UserDefinedFunctionInput; 15 | import org.apache.hadoop.hive.metastore.api.EnvironmentContext; 16 | import org.apache.thrift.TException; 17 | 18 | import java.util.List; 19 | import java.util.Map; 20 | 21 | /** 22 | * This is the accessor interface for using AWS Glue as a metastore. 23 | * The generic AWSGlue interface{@link com.amazonaws.services.glue.AWSGlue} 24 | * has a number of methods that are irrelevant for clients using Glue only 25 | * as a metastore. 26 | * Think of this interface as a wrapper over AWSGlue. This additional layer 27 | * of abstraction achieves the following - 28 | * a) Hides the non-metastore related operations present in AWSGlue 29 | * b) Hides away the batching and pagination related limitations of AWSGlue 30 | */ 31 | public interface AWSGlueMetastore { 32 | 33 | void createDatabase(DatabaseInput databaseInput); 34 | 35 | Database getDatabase(String dbName); 36 | 37 | List getAllDatabases(); 38 | 39 | void updateDatabase(String databaseName, DatabaseInput databaseInput); 40 | 41 | void deleteDatabase(String dbName); 42 | 43 | void createTable(String dbName, TableInput tableInput); 44 | 45 | Table getTable(String dbName, String tableName); 46 | 47 | List getTables(String dbname, String tablePattern); 48 | 49 | void updateTable(String dbName, TableInput tableInput); 50 | 51 | void updateTable(String dbName, TableInput tableInput, EnvironmentContext environmentContext); 52 | 53 | void deleteTable(String dbName, String tableName); 54 | 55 | Partition getPartition(String dbName, String tableName, List partitionValues); 56 | 57 | List getPartitionsByNames(String dbName, String tableName, 58 | List partitionsToGet); 59 | 60 | List getPartitions(String dbName, String tableName, String expression, 61 | long max) throws TException; 62 | 63 | void updatePartition(String dbName, String tableName, List partitionValues, 64 | PartitionInput partitionInput); 65 | 66 | void deletePartition(String dbName, String tableName, List partitionValues); 67 | 68 | List createPartitions(String dbName, String tableName, 69 | List partitionInputs); 70 | 71 | void createUserDefinedFunction(String dbName, UserDefinedFunctionInput functionInput); 72 | 73 | UserDefinedFunction getUserDefinedFunction(String dbName, String functionName); 74 | 75 | List getUserDefinedFunctions(String dbName, String pattern); 76 | 77 | List getUserDefinedFunctions(String pattern); 78 | 79 | void deleteUserDefinedFunction(String dbName, String functionName); 80 | 81 | void updateUserDefinedFunction(String dbName, String functionName, UserDefinedFunctionInput functionInput); 82 | 83 | void deletePartitionColumnStatistics(String dbName, String tableName, List partitionValues, String colName); 84 | 85 | void deleteTableColumnStatistics(String dbName, String tableName, String colName); 86 | 87 | Map> getPartitionColumnStatistics( 88 | String dbName, 89 | String tableName, 90 | List partitionValues, 91 | List columnNames 92 | ); 93 | 94 | List getTableColumnStatistics( 95 | String dbName, 96 | String tableName, 97 | List colNames 98 | ); 99 | 100 | List updatePartitionColumnStatistics( 101 | String dbName, 102 | String tableName, 103 | List partitionValues, 104 | List columnStatistics 105 | ); 106 | 107 | List updateTableColumnStatistics( 108 | String dbName, 109 | String tableName, 110 | List columnStatistics 111 | ); 112 | } -------------------------------------------------------------------------------- /aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/util/MetastoreClientUtils.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.catalog.util; 2 | 3 | import com.amazonaws.glue.catalog.metastore.GlueMetastoreClientDelegate; 4 | import com.amazonaws.glue.shims.AwsGlueHiveShims; 5 | import com.amazonaws.glue.shims.ShimsLoader; 6 | import com.google.common.collect.Maps; 7 | 8 | import org.apache.commons.lang3.StringUtils; 9 | import org.apache.hadoop.conf.Configuration; 10 | import org.apache.hadoop.fs.Path; 11 | import org.apache.hadoop.hive.metastore.Warehouse; 12 | import org.apache.hadoop.hive.metastore.api.InvalidObjectException; 13 | import org.apache.hadoop.hive.metastore.api.MetaException; 14 | import org.apache.hadoop.hive.metastore.api.Table; 15 | 16 | import java.util.Map; 17 | 18 | import static com.google.common.base.Preconditions.checkNotNull; 19 | import static org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE; 20 | 21 | public final class MetastoreClientUtils { 22 | 23 | private static final AwsGlueHiveShims hiveShims = ShimsLoader.getHiveShims(); 24 | 25 | private MetastoreClientUtils() { 26 | // static util class should not be instantiated 27 | } 28 | 29 | /** 30 | * @return boolean 31 | * true -> if directory was able to be created. 32 | * false -> if directory already exists. 33 | * @throws MetaException if directory could not be created. 34 | */ 35 | public static boolean makeDirs(Warehouse wh, Path path) throws MetaException { 36 | checkNotNull(wh, "Warehouse cannot be null"); 37 | checkNotNull(path, "Path cannot be null"); 38 | 39 | boolean madeDir = false; 40 | if (!wh.isDir(path)) { 41 | if (!hiveShims.mkdirs(wh, path)) { 42 | throw new MetaException("Unable to create path: " + path); 43 | } 44 | madeDir = true; 45 | } 46 | return madeDir; 47 | } 48 | 49 | /** 50 | * Taken from HiveMetaStore#create_table_core 51 | * https://github.com/apache/hive/blob/rel/release-2.3.0/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java#L1370-L1383 52 | */ 53 | public static void validateTableObject(Table table, Configuration conf) throws InvalidObjectException { 54 | checkNotNull(table, "table cannot be null"); 55 | checkNotNull(table.getSd(), "Table#StorageDescriptor cannot be null"); 56 | 57 | if (!hiveShims.validateTableName(table.getTableName(), conf)) { 58 | throw new InvalidObjectException(table.getTableName() + " is not a valid object name"); 59 | } 60 | String validate = hiveShims.validateTblColumns(table.getSd().getCols()); 61 | if (validate != null) { 62 | throw new InvalidObjectException("Invalid column " + validate); 63 | } 64 | 65 | if (table.getPartitionKeys() != null) { 66 | validate = hiveShims.validateTblColumns(table.getPartitionKeys()); 67 | if (validate != null) { 68 | throw new InvalidObjectException("Invalid partition column " + validate); 69 | } 70 | } 71 | } 72 | 73 | /** 74 | * Should be used when getting table from Glue that may have been created by 75 | * users manually or through Crawlers. Validates that table contains properties required by Hive/Spark. 76 | * @param table 77 | */ 78 | public static void validateGlueTable(com.amazonaws.services.glue.model.Table table) { 79 | checkNotNull(table, "table cannot be null"); 80 | 81 | for (HiveTableValidator validator : HiveTableValidator.values()) { 82 | validator.validate(table); 83 | } 84 | } 85 | 86 | public static Map deepCopyMap(Map originalMap) { 87 | Map deepCopy = Maps.newHashMap(); 88 | if (originalMap == null) { 89 | return deepCopy; 90 | } 91 | 92 | for (Map.Entry entry : originalMap.entrySet()) { 93 | deepCopy.put(entry.getKey(), entry.getValue()); 94 | } 95 | return deepCopy; 96 | } 97 | 98 | /** 99 | * Mimics MetaStoreUtils.isExternalTable 100 | * Additional logic: check Table#getTableType to see if isExternalTable 101 | */ 102 | public static boolean isExternalTable(org.apache.hadoop.hive.metastore.api.Table table) { 103 | if (table == null) { 104 | return false; 105 | } 106 | 107 | Map params = table.getParameters(); 108 | String paramsExternalStr = params == null ? null : params.get("EXTERNAL"); 109 | if (paramsExternalStr != null) { 110 | return "TRUE".equalsIgnoreCase(paramsExternalStr); 111 | } 112 | 113 | return table.getTableType() != null && EXTERNAL_TABLE.name().equalsIgnoreCase(table.getTableType()); 114 | } 115 | 116 | public static String getCatalogId(Configuration conf) { 117 | if (StringUtils.isNotEmpty(conf.get(GlueMetastoreClientDelegate.CATALOG_ID_CONF))) { 118 | return conf.get(GlueMetastoreClientDelegate.CATALOG_ID_CONF); 119 | } 120 | // This case defaults to using the caller's account Id as Catalog Id. 121 | return null; 122 | } 123 | 124 | } 125 | -------------------------------------------------------------------------------- /aws-glue-datacatalog-client-common/src/test/java/com/amazonaws/glue/catalog/util/MetastoreClientUtilsTest.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.catalog.util; 2 | 3 | import com.amazonaws.glue.catalog.converters.BaseCatalogToHiveConverter; 4 | 5 | import com.amazonaws.glue.catalog.converters.CatalogToHiveConverter; 6 | import com.google.common.collect.ImmutableMap; 7 | import org.apache.hadoop.fs.Path; 8 | import org.apache.hadoop.hive.conf.HiveConf; 9 | import org.apache.hadoop.hive.metastore.Warehouse; 10 | import org.apache.hadoop.hive.metastore.api.Database; 11 | import org.apache.hadoop.hive.metastore.api.InvalidObjectException; 12 | import org.apache.hadoop.hive.metastore.api.MetaException; 13 | import org.apache.hadoop.hive.metastore.api.Table; 14 | import org.junit.Before; 15 | import org.junit.Test; 16 | 17 | import java.util.Map; 18 | 19 | import static com.amazonaws.glue.catalog.util.TestObjects.getTestDatabase; 20 | import static com.amazonaws.glue.catalog.util.TestObjects.getTestTable; 21 | import static org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE; 22 | import static org.junit.Assert.assertEquals; 23 | import static org.junit.Assert.assertFalse; 24 | import static org.junit.Assert.assertNotSame; 25 | import static org.junit.Assert.assertTrue; 26 | import static org.mockito.Mockito.mock; 27 | import static org.mockito.Mockito.when; 28 | 29 | public class MetastoreClientUtilsTest { 30 | 31 | private static final String TEST_LOCATION = "s3://mybucket/"; 32 | 33 | private Warehouse wh; 34 | private HiveConf conf; 35 | 36 | private Path testPath; 37 | private Database testDb; 38 | private Table testTbl; 39 | 40 | @Before 41 | public void setup() { 42 | wh = mock(Warehouse.class); 43 | conf = new HiveConf(); 44 | testPath = new Path(TEST_LOCATION); 45 | CatalogToHiveConverter catalogToHiveConverter = new BaseCatalogToHiveConverter(); 46 | testDb = catalogToHiveConverter.convertDatabase(getTestDatabase()); 47 | testTbl = catalogToHiveConverter.convertTable(getTestTable(), testDb.getName()); 48 | } 49 | 50 | @Test(expected = NullPointerException.class) 51 | public void testMakeDirsNullWh() throws Exception { 52 | MetastoreClientUtils.makeDirs(null, testPath); 53 | } 54 | 55 | @Test(expected = NullPointerException.class) 56 | public void testMakeDirsNullPath() throws Exception { 57 | MetastoreClientUtils.makeDirs(wh, null); 58 | } 59 | 60 | @Test 61 | public void testMakeDirsAlreadyExists() throws Exception { 62 | when(wh.isDir(testPath)).thenReturn(true); 63 | assertFalse(MetastoreClientUtils.makeDirs(wh, testPath)); 64 | } 65 | 66 | @Test(expected = MetaException.class) 67 | public void testMakeDirsCannotCreateDir() throws Exception { 68 | when(wh.isDir(testPath)).thenReturn(false); 69 | when(wh.mkdirs(testPath)).thenReturn(false); 70 | MetastoreClientUtils.makeDirs(wh, testPath); 71 | } 72 | 73 | @Test(expected = InvalidObjectException.class) 74 | public void testValidateTableObjectInvalidName() throws Exception { 75 | testTbl.setTableName("!"); 76 | MetastoreClientUtils.validateTableObject(testTbl, conf); 77 | } 78 | 79 | @Test(expected = InvalidObjectException.class) 80 | public void testValidateTableObjectInvalidColumnName() throws Exception { 81 | testTbl.getSd().getCols().get(0).setType("invalidtype"); 82 | MetastoreClientUtils.validateTableObject(testTbl, conf); 83 | } 84 | 85 | @Test(expected = InvalidObjectException.class) 86 | public void testValidateTableObjectInvalidPartitionKeys() throws Exception { 87 | testTbl.getPartitionKeys().get(0).setType("invalidtype"); 88 | MetastoreClientUtils.validateTableObject(testTbl, conf); 89 | } 90 | 91 | @Test 92 | public void testDeepCopy() throws Exception { 93 | Map orig = ImmutableMap.of("key", "val"); 94 | Map deepCopy = MetastoreClientUtils.deepCopyMap(orig); 95 | assertNotSame(deepCopy, orig); 96 | assertEquals(deepCopy, orig); 97 | } 98 | 99 | @Test 100 | public void testIsExternalTableFalse() { 101 | assertFalse(MetastoreClientUtils.isExternalTable(testTbl)); 102 | } 103 | 104 | @Test 105 | public void testIsExternalTableParamTrue() { 106 | testTbl.getParameters().put("EXTERNAL", "true"); 107 | assertTrue(MetastoreClientUtils.isExternalTable(testTbl)); 108 | } 109 | 110 | @Test 111 | public void testIsExternalTableTableTypeTrue() { 112 | testTbl.setTableType(EXTERNAL_TABLE.name()); 113 | testTbl.setParameters(null); 114 | assertTrue(MetastoreClientUtils.isExternalTable(testTbl)); 115 | } 116 | 117 | @Test 118 | public void testIsExternalTableParamPriority() { 119 | // parameters has higher priority when there is conflict 120 | testTbl.getParameters().put("EXTERNAL", "false"); 121 | testTbl.setTableType(EXTERNAL_TABLE.name()); 122 | assertFalse(MetastoreClientUtils.isExternalTable(testTbl)); 123 | } 124 | 125 | @Test 126 | public void testIsExternalTableNull() { 127 | assertFalse(MetastoreClientUtils.isExternalTable(null)); 128 | } 129 | } 130 | -------------------------------------------------------------------------------- /aws-glue-datacatalog-spark-client/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | com.amazonaws.glue 7 | aws-glue-datacatalog-hive-client 8 | 3.4.0-SNAPSHOT 9 | 10 | 4.0.0 11 | AWSGlueDataCatalogSparkClient 12 | 13 | aws-glue-datacatalog-spark-client 14 | 15 | 16 | org.apache.hive 17 | hive-metastore 18 | ${spark-hive.version} 19 | provided 20 | 21 | 22 | org.apache.hive 23 | hive-exec 24 | ${spark-hive.version} 25 | provided 26 | 27 | 28 | com.amazonaws.glue 29 | aws-glue-datacatalog-client-common 30 | ${project.version} 31 | 32 | 33 | org.apache.hadoop 34 | hadoop-common 35 | ${hadoop.version} 36 | provided 37 | 38 | 39 | org.apache.httpcomponents 40 | httpclient 41 | ${httpclient.version} 42 | 43 | 44 | 45 | com.amazonaws.glue 46 | aws-glue-datacatalog-client-common 47 | ${project.version} 48 | test 49 | tests 50 | 51 | 52 | junit 53 | junit 54 | ${junit.version} 55 | test 56 | 57 | 58 | org.hamcrest 59 | hamcrest-library 60 | ${hamcrest.version} 61 | test 62 | 63 | 64 | org.mockito 65 | mockito-core 66 | ${mockito.version} 67 | test 68 | 69 | 70 | 71 | 72 | 73 | org.apache.maven.plugins 74 | maven-jar-plugin 75 | 2.2 76 | 77 | 78 | 79 | test-jar 80 | 81 | 82 | 83 | 84 | 85 | **/SparkCatalogMetastoreClientTest.* 86 | 87 | 88 | 89 | 90 | org.apache.maven.plugins 91 | maven-shade-plugin 92 | 3.0.0 93 | 94 | false 95 | 96 | 97 | 98 | package 99 | 100 | shade 101 | 102 | 103 | 104 | 105 | com.amazonaws.glue:aws-glue-datacatalog-client-common 106 | com.amazonaws.glue:shims-common 107 | com.amazonaws.glue:hive3-shims 108 | com.amazonaws.glue:shims-loader 109 | com.amazonaws.glue:spark-hive-shims 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | com.amazonaws.glue 6 | aws-glue-datacatalog-hive-client 7 | 3.4.0-SNAPSHOT 8 | 9 | aws-glue-datacatalog-client-common 10 | aws-glue-datacatalog-spark-client 11 | aws-glue-datacatalog-hive3-client 12 | shims 13 | 14 | pom 15 | 16 | AWSGlueDataCatalogHiveClient 17 | http://maven.apache.org 18 | 19 | 20 | UTF-8 21 | 28.2-jre 22 | 3.1.3 23 | 2.3.10-SNAPSHOT 24 | 1.12.31 25 | 4.11 26 | 1.10.19 27 | 1.7.1 28 | 2.15 29 | 1.6.4 30 | 3.1.0 31 | 2.9 32 | 1.3 33 | 4.5.3 34 | ${basedir}/dev-support/check_style.xml 35 | 36 | 37 | 38 | 39 | 40 | 41 | org.apache.maven.plugins 42 | maven-compiler-plugin 43 | 2.3.1 44 | 45 | 1.8 46 | 1.8 47 | UTF-8 48 | 49 | 50 | 51 | org.apache.maven.plugins 52 | maven-surefire-plugin 53 | 2.5 54 | 55 | 56 | **/*Test.java 57 | 58 | 59 | **/TestObjects.java 60 | **/*IntegrationTest.java 61 | 62 | 63 | 64 | 65 | org.apache.maven.plugins 66 | maven-eclipse-plugin 67 | ${maven.eclipse.plugin.version} 68 | 69 | true 70 | true 71 | Hive 72 | ${basedir}/dev-support/eclipse-styles.xml 73 | 74 | 75 | 76 | org.apache.maven.plugins 77 | maven-checkstyle-plugin 78 | 2.9.1 79 | 80 | 81 | verify 82 | verify 83 | 84 | UTF-8 85 | true 86 | false 87 | false 88 | 89 | 90 | check 91 | 92 | 93 | 94 | 95 | 96 | 97 | org.apache.maven.plugins 98 | maven-assembly-plugin 99 | 2.6 100 | 101 | 102 | 103 | jar-with-dependencies 104 | 105 | 106 | 107 | 108 | make-assembly 109 | 110 | package 111 | 112 | single 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | org.apache.maven.plugins 125 | maven-project-info-reports-plugin 126 | 2.7 127 | 128 | false 129 | 130 | 131 | 132 | 133 | 134 | 135 | -------------------------------------------------------------------------------- /aws-glue-datacatalog-spark-client/src/test/java/com/amazonaws/glue/catalog/metastore/integrationtest/MultipleCatalogTableIntegrationTest.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.catalog.metastore.integrationtest; 2 | 3 | import com.amazonaws.glue.catalog.converters.BaseCatalogToHiveConverter; 4 | import com.amazonaws.glue.catalog.converters.CatalogToHiveConverter; 5 | import com.amazonaws.services.glue.model.DeleteDatabaseRequest; 6 | import com.amazonaws.services.glue.model.EntityNotFoundException; 7 | import com.amazonaws.services.glue.model.GetTableRequest; 8 | import com.google.common.collect.ImmutableMap; 9 | import com.google.common.collect.ImmutableSet; 10 | import org.apache.hadoop.hive.metastore.api.Database; 11 | import org.apache.hadoop.hive.metastore.api.MetaException; 12 | import org.apache.hadoop.hive.metastore.api.Table; 13 | import org.apache.thrift.TException; 14 | import org.junit.After; 15 | import org.junit.Before; 16 | import org.junit.Rule; 17 | import org.junit.Test; 18 | import org.junit.rules.ExpectedException; 19 | 20 | import java.util.HashSet; 21 | import java.util.Map; 22 | import java.util.Set; 23 | 24 | import static com.amazonaws.glue.catalog.util.TestObjects.getTestDatabase; 25 | import static com.amazonaws.glue.catalog.util.TestObjects.getTestTable; 26 | import static org.junit.Assert.assertEquals; 27 | 28 | public class MultipleCatalogTableIntegrationTest extends MultipleCatalogIntegrationTestBase { 29 | private Database databaseInAnotherCatalog; 30 | private Database database; 31 | private Table table; 32 | private CatalogToHiveConverter catalogToHiveConverter = new BaseCatalogToHiveConverter(); 33 | 34 | @Rule 35 | public ExpectedException expectedException = ExpectedException.none(); 36 | 37 | @Before 38 | public void setup() throws MetaException, TException { 39 | super.setup(); 40 | 41 | databaseInAnotherCatalog = catalogToHiveConverter.convertDatabase(getTestDatabase()); 42 | database = databaseInAnotherCatalog.deepCopy(); 43 | database.setName(constructDbName(databaseInAnotherCatalog.getName())); 44 | metastoreClient.createDatabase(database); 45 | 46 | table = catalogToHiveConverter.convertTable(getTestTable(), database.getName()); 47 | metastoreClient.createTable(table); 48 | } 49 | 50 | @After 51 | public void clean() { 52 | glueClient.deleteDatabase(new DeleteDatabaseRequest().withName(database.getName())); 53 | } 54 | 55 | @Test 56 | public void getTable() throws TException { 57 | Table createdTable = metastoreClient.getTable(database.getName(), table.getTableName()); 58 | //time is updated on service side and can be different from what we sent 59 | table.setCreateTime(createdTable.getCreateTime()); 60 | table.setLastAccessTime(createdTable.getLastAccessTime()); 61 | assertEquals(table, createdTable); 62 | 63 | createdTable = catalogToHiveConverter.convertTable( 64 | glueClient.getTable(new GetTableRequest() 65 | .withCatalogId(anotherCatalogId) 66 | .withDatabaseName(databaseInAnotherCatalog.getName()) 67 | .withName(table.getTableName()) 68 | ).getTable(), database.getName()); 69 | assertEquals(table, createdTable); 70 | } 71 | 72 | @Test 73 | public void getAllTables() throws TException { 74 | Table table2 = catalogToHiveConverter.convertTable(getTestTable(), database.getName()); 75 | metastoreClient.createTable(table2); 76 | 77 | Table table3 = catalogToHiveConverter.convertTable(getTestTable(), database.getName()); 78 | metastoreClient.createTable(table3); 79 | 80 | Set tableNames = new HashSet<>(metastoreClient.getAllTables(database.getName())); 81 | Set expectedNames = ImmutableSet.of(table.getTableName(), table2.getTableName(), table3.getTableName()); 82 | assertEquals(expectedNames, tableNames); 83 | } 84 | 85 | @Test 86 | public void alterTable() throws TException { 87 | Map newParameters = ImmutableMap.of("param1", "newVal1"); 88 | table.setParameters(newParameters); 89 | metastoreClient.alter_table(database.getName(), table.getTableName(), table); 90 | 91 | Table alteredTable = catalogToHiveConverter.convertTable( 92 | glueClient.getTable(new GetTableRequest() 93 | .withCatalogId(anotherCatalogId) 94 | .withDatabaseName(databaseInAnotherCatalog.getName()) 95 | .withName(table.getTableName()) 96 | ).getTable(), database.getName()); 97 | //time is updated on service side and can be different from what we sent 98 | table.setCreateTime(alteredTable.getCreateTime()); 99 | table.setLastAccessTime(alteredTable.getLastAccessTime()); 100 | assertEquals(table, alteredTable); 101 | } 102 | 103 | @Test 104 | public void dropTable() throws TException { 105 | metastoreClient.dropTable(database.getName(), table.getTableName()); 106 | 107 | expectedException.expect(EntityNotFoundException.class); 108 | glueClient.getTable(new GetTableRequest() 109 | .withCatalogId(anotherCatalogId) 110 | .withDatabaseName(databaseInAnotherCatalog.getName()) 111 | .withName(table.getTableName()) 112 | ); 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/util/BatchDeletePartitionsHelper.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.catalog.util; 2 | 3 | import com.amazonaws.glue.catalog.converters.BaseCatalogToHiveConverter; 4 | import com.amazonaws.glue.catalog.converters.CatalogToHiveConverter; 5 | import com.amazonaws.glue.catalog.converters.CatalogToHiveConverterFactory; 6 | import com.amazonaws.services.glue.model.BatchDeletePartitionRequest; 7 | import com.amazonaws.services.glue.model.BatchDeletePartitionResult; 8 | import com.amazonaws.services.glue.model.EntityNotFoundException; 9 | import com.amazonaws.services.glue.model.ErrorDetail; 10 | import com.amazonaws.services.glue.model.GetPartitionRequest; 11 | import com.amazonaws.services.glue.model.GetPartitionResult; 12 | import com.amazonaws.services.glue.model.Partition; 13 | import com.amazonaws.services.glue.model.PartitionError; 14 | import com.amazonaws.services.glue.AWSGlue; 15 | 16 | import org.apache.log4j.Logger; 17 | import org.apache.thrift.TException; 18 | 19 | import java.util.Collection; 20 | import java.util.List; 21 | import java.util.Map; 22 | 23 | public final class BatchDeletePartitionsHelper { 24 | 25 | private static final Logger logger = Logger.getLogger(BatchDeletePartitionsHelper.class); 26 | 27 | private final AWSGlue client; 28 | private final String namespaceName; 29 | private final String tableName; 30 | private final String catalogId; 31 | private final List partitions; 32 | private Map partitionMap; 33 | private TException firstTException; 34 | private CatalogToHiveConverter catalogToHiveConverter; 35 | 36 | public BatchDeletePartitionsHelper(AWSGlue client, String namespaceName, String tableName, 37 | String catalogId, List partitions) { 38 | this.client = client; 39 | this.namespaceName = namespaceName; 40 | this.tableName = tableName; 41 | this.catalogId = catalogId; 42 | this.partitions = partitions; 43 | catalogToHiveConverter = CatalogToHiveConverterFactory.getCatalogToHiveConverter(); 44 | } 45 | 46 | public BatchDeletePartitionsHelper deletePartitions() { 47 | partitionMap = PartitionUtils.buildPartitionMap(partitions); 48 | 49 | BatchDeletePartitionRequest request = new BatchDeletePartitionRequest().withDatabaseName(namespaceName) 50 | .withTableName(tableName).withCatalogId(catalogId) 51 | .withPartitionsToDelete(PartitionUtils.getPartitionValuesList(partitionMap)); 52 | 53 | try { 54 | BatchDeletePartitionResult result = client.batchDeletePartition(request); 55 | processResult(result); 56 | } catch (Exception e) { 57 | logger.error("Exception thrown while deleting partitions in DataCatalog: ", e); 58 | firstTException = catalogToHiveConverter.wrapInHiveException(e); 59 | if (PartitionUtils.isInvalidUserInputException(e)) { 60 | setAllFailed(); 61 | } else { 62 | checkIfPartitionsDeleted(); 63 | } 64 | } 65 | return this; 66 | } 67 | 68 | private void setAllFailed() { 69 | partitionMap.clear(); 70 | } 71 | 72 | private void processResult(final BatchDeletePartitionResult batchDeletePartitionsResult) { 73 | List partitionErrors = batchDeletePartitionsResult.getErrors(); 74 | if (partitionErrors == null || partitionErrors.isEmpty()) { 75 | return; 76 | } 77 | 78 | logger.error(String.format("BatchDeletePartitions failed to delete %d out of %d partitions. \n", 79 | partitionErrors.size(), partitionMap.size())); 80 | 81 | for (PartitionError partitionError : partitionErrors) { 82 | partitionMap.remove(new PartitionKey(partitionError.getPartitionValues())); 83 | ErrorDetail errorDetail = partitionError.getErrorDetail(); 84 | logger.error(errorDetail.toString()); 85 | if (firstTException == null) { 86 | firstTException = catalogToHiveConverter.errorDetailToHiveException(errorDetail); 87 | } 88 | } 89 | } 90 | 91 | private void checkIfPartitionsDeleted() { 92 | for (Partition partition : partitions) { 93 | if (!partitionDeleted(partition)) { 94 | partitionMap.remove(new PartitionKey(partition)); 95 | } 96 | } 97 | } 98 | 99 | private boolean partitionDeleted(Partition partition) { 100 | GetPartitionRequest request = new GetPartitionRequest() 101 | .withDatabaseName(partition.getDatabaseName()) 102 | .withTableName(partition.getTableName()) 103 | .withPartitionValues(partition.getValues()) 104 | .withCatalogId(catalogId); 105 | 106 | try { 107 | GetPartitionResult result = client.getPartition(request); 108 | Partition partitionReturned = result.getPartition(); 109 | return partitionReturned == null; //probably always false 110 | } catch (EntityNotFoundException e) { 111 | // here we assume namespace and table exist. It is assured by calling "isInvalidUserInputException" method above 112 | return true; 113 | } catch (Exception e) { 114 | logger.error(String.format("Get partition request %s failed. ", request.toString()), e); 115 | // Partition status unknown, we assume that the partition was not deleted 116 | return false; 117 | } 118 | } 119 | 120 | public TException getFirstTException() { 121 | return firstTException; 122 | } 123 | 124 | public Collection getPartitionsDeleted() { 125 | return partitionMap.values(); 126 | } 127 | 128 | } 129 | -------------------------------------------------------------------------------- /aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/util/BatchCreatePartitionsHelper.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.catalog.util; 2 | 3 | import com.amazonaws.glue.catalog.metastore.AWSGlueMetastore; 4 | import com.amazonaws.glue.catalog.converters.CatalogToHiveConverter; 5 | import com.amazonaws.glue.catalog.converters.CatalogToHiveConverterFactory; 6 | import com.amazonaws.glue.catalog.converters.GlueInputConverter; 7 | import com.amazonaws.services.glue.model.EntityNotFoundException; 8 | import com.amazonaws.services.glue.model.Partition; 9 | import com.amazonaws.services.glue.model.PartitionError; 10 | import com.google.common.collect.Lists; 11 | 12 | import org.apache.hadoop.hive.metastore.api.AlreadyExistsException; 13 | import org.apache.log4j.Logger; 14 | import org.apache.thrift.TException; 15 | import org.apache.commons.lang3.StringUtils; 16 | 17 | import java.util.Collection; 18 | import java.util.List; 19 | import java.util.Map; 20 | 21 | import static com.amazonaws.glue.catalog.util.PartitionUtils.isInvalidUserInputException; 22 | 23 | public final class BatchCreatePartitionsHelper { 24 | 25 | private static final Logger logger = Logger.getLogger(BatchCreatePartitionsHelper.class); 26 | 27 | private final AWSGlueMetastore glueClient; 28 | private final String databaseName; 29 | private final String tableName; 30 | private final List partitions; 31 | private final boolean ifNotExists; 32 | private Map partitionMap; 33 | private List partitionsFailed; 34 | private TException firstTException; 35 | private String catalogId; 36 | private CatalogToHiveConverter catalogToHiveConverter; 37 | 38 | public BatchCreatePartitionsHelper(AWSGlueMetastore glueClient, String databaseName, String tableName, String catalogId, 39 | List partitions, boolean ifNotExists) { 40 | this.glueClient = glueClient; 41 | this.databaseName = databaseName; 42 | this.tableName = tableName; 43 | this.catalogId = catalogId; 44 | this.partitions = partitions; 45 | this.ifNotExists = ifNotExists; 46 | catalogToHiveConverter = CatalogToHiveConverterFactory.getCatalogToHiveConverter(); 47 | } 48 | 49 | public BatchCreatePartitionsHelper createPartitions() { 50 | partitionMap = PartitionUtils.buildPartitionMap(partitions); 51 | partitionsFailed = Lists.newArrayList(); 52 | 53 | try { 54 | List result = 55 | glueClient.createPartitions(databaseName, tableName, 56 | GlueInputConverter.convertToPartitionInputs(partitionMap.values())); 57 | processResult(result); 58 | } catch (Exception e) { 59 | logger.error("Exception thrown while creating partitions in DataCatalog: ", e); 60 | firstTException = catalogToHiveConverter.wrapInHiveException(e); 61 | if (isInvalidUserInputException(e)) { 62 | setAllFailed(); 63 | } else { 64 | checkIfPartitionsCreated(); 65 | } 66 | } 67 | return this; 68 | } 69 | 70 | private void setAllFailed() { 71 | partitionsFailed = partitions; 72 | partitionMap.clear(); 73 | } 74 | 75 | private void processResult(List partitionErrors) { 76 | if (partitionErrors == null || partitionErrors.isEmpty()) { 77 | return; 78 | } 79 | 80 | logger.error(String.format("BatchCreatePartitions failed to create %d out of %d partitions. \n", 81 | partitionErrors.size(), partitionMap.size())); 82 | 83 | for (PartitionError partitionError : partitionErrors) { 84 | Partition partitionFailed = partitionMap.remove(new PartitionKey(partitionError.getPartitionValues())); 85 | 86 | TException exception = catalogToHiveConverter.errorDetailToHiveException(partitionError.getErrorDetail()); 87 | if (ifNotExists && exception instanceof AlreadyExistsException) { 88 | // AlreadyExistsException is allowed, so we shouldn't add the partition to partitionsFailed list 89 | continue; 90 | } 91 | logger.error(exception); 92 | if (firstTException == null) { 93 | firstTException = exception; 94 | } 95 | partitionsFailed.add(partitionFailed); 96 | } 97 | } 98 | 99 | private void checkIfPartitionsCreated() { 100 | for (Partition partition : partitions) { 101 | if (!partitionExists(partition)) { 102 | partitionsFailed.add(partition); 103 | partitionMap.remove(new PartitionKey(partition)); 104 | } 105 | } 106 | } 107 | 108 | private boolean partitionExists(Partition partition) { 109 | try { 110 | Partition partitionReturned = glueClient.getPartition(databaseName, tableName, partition.getValues()); 111 | return partitionReturned != null; //probably always true here 112 | } catch (EntityNotFoundException e) { 113 | // here we assume namespace and table exist. It is assured by calling "isInvalidUserInputException" method above 114 | return false; 115 | } catch (Exception e) { 116 | logger.error(String.format("Get partition request %s failed. ", StringUtils.join(partition.getValues(), "/")), e); 117 | // partition status unknown, we assume that the partition was not created 118 | return false; 119 | } 120 | } 121 | 122 | public TException getFirstTException() { 123 | return firstTException; 124 | } 125 | 126 | public Collection getPartitionsCreated() { 127 | return partitionMap.values(); 128 | } 129 | 130 | public List getPartitionsFailed() { 131 | return partitionsFailed; 132 | } 133 | 134 | } -------------------------------------------------------------------------------- /aws-glue-datacatalog-spark-client/src/test/java/com/amazonaws/glue/catalog/metastore/integrationtest/MultipleCatalogUserDefinedFunctionIntegrationTest.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.catalog.metastore.integrationtest; 2 | 3 | import com.amazonaws.glue.catalog.converters.BaseCatalogToHiveConverter; 4 | import com.amazonaws.glue.catalog.converters.CatalogToHiveConverter; 5 | import com.amazonaws.services.glue.model.DeleteDatabaseRequest; 6 | import com.amazonaws.services.glue.model.EntityNotFoundException; 7 | import com.amazonaws.services.glue.model.GetUserDefinedFunctionRequest; 8 | import org.apache.hadoop.hive.metastore.api.Database; 9 | import org.apache.hadoop.hive.metastore.api.Function; 10 | import org.apache.hadoop.hive.metastore.api.MetaException; 11 | import org.apache.thrift.TException; 12 | import org.junit.After; 13 | import org.junit.Before; 14 | import org.junit.Rule; 15 | import org.junit.Test; 16 | import org.junit.rules.ExpectedException; 17 | 18 | import java.util.HashSet; 19 | import java.util.Set; 20 | 21 | import static com.amazonaws.glue.catalog.util.TestObjects.getTestDatabase; 22 | import static com.amazonaws.glue.catalog.util.TestObjects.getCatalogTestFunction; 23 | import static org.junit.Assert.assertEquals; 24 | import static org.junit.Assert.assertTrue; 25 | 26 | public class MultipleCatalogUserDefinedFunctionIntegrationTest extends MultipleCatalogIntegrationTestBase { 27 | private Database databaseInAnotherCatalog; 28 | private Database database; 29 | private Function function1; 30 | private Function function2; 31 | private Function function3; 32 | private CatalogToHiveConverter catalogToHiveConverter = new BaseCatalogToHiveConverter(); 33 | 34 | @Rule 35 | public ExpectedException expectedException = ExpectedException.none(); 36 | 37 | @Before 38 | public void setup() throws MetaException, TException { 39 | super.setup(); 40 | 41 | databaseInAnotherCatalog = catalogToHiveConverter.convertDatabase(getTestDatabase()); 42 | database = databaseInAnotherCatalog.deepCopy(); 43 | database.setName(constructDbName(databaseInAnotherCatalog.getName())); 44 | metastoreClient.createDatabase(database); 45 | 46 | function1 = catalogToHiveConverter.convertFunction(database.getName(), getCatalogTestFunction()); 47 | metastoreClient.createFunction(function1); 48 | 49 | function2 = catalogToHiveConverter.convertFunction(database.getName(), getCatalogTestFunction()); 50 | metastoreClient.createFunction(function2); 51 | 52 | function3 = catalogToHiveConverter.convertFunction(database.getName(), getCatalogTestFunction()); 53 | metastoreClient.createFunction(function3); 54 | } 55 | 56 | @After 57 | public void clean() { 58 | glueClient.deleteDatabase(new DeleteDatabaseRequest().withName(database.getName())); 59 | } 60 | 61 | @Test 62 | public void getFunction() throws TException { 63 | Function createdFunction = metastoreClient.getFunction(database.getName(), function1.getFunctionName()); 64 | function1.setCreateTime(createdFunction.getCreateTime()); //it can be changed during creation process 65 | assertEquals(function1, createdFunction); 66 | 67 | createdFunction = catalogToHiveConverter.convertFunction(database.getName(), 68 | glueClient.getUserDefinedFunction(new GetUserDefinedFunctionRequest() 69 | .withCatalogId(anotherCatalogId) 70 | .withDatabaseName(databaseInAnotherCatalog.getName()) 71 | .withFunctionName(function1.getFunctionName()) 72 | ).getUserDefinedFunction() 73 | ); 74 | assertEquals(function1, createdFunction); 75 | } 76 | 77 | @Test 78 | public void getFunctions() throws TException { 79 | Set functionNames = new HashSet<>(metastoreClient.getFunctions(database.getName(), ".*")); 80 | assertEquals(3, functionNames.size()); 81 | assertTrue(functionNames.contains(function1.getFunctionName())); 82 | assertTrue(functionNames.contains(function2.getFunctionName())); 83 | assertTrue(functionNames.contains(function3.getFunctionName())); 84 | } 85 | 86 | @Test 87 | public void updateFunction() throws TException { 88 | Function newFunction = function1.deepCopy(); 89 | String newClassName = "newClassName"; 90 | newFunction.setClassName(newClassName); 91 | metastoreClient.alterFunction(database.getName(), function1.getFunctionName(), newFunction); 92 | 93 | Function alteredFunction = catalogToHiveConverter.convertFunction(database.getName(), 94 | glueClient.getUserDefinedFunction(new GetUserDefinedFunctionRequest() 95 | .withCatalogId(anotherCatalogId) 96 | .withDatabaseName(databaseInAnotherCatalog.getName()) 97 | .withFunctionName(newFunction.getFunctionName()) 98 | ).getUserDefinedFunction() 99 | ); 100 | alteredFunction.setCreateTime(newFunction.getCreateTime()); 101 | assertEquals(newFunction, alteredFunction); 102 | } 103 | 104 | @Test 105 | public void dropFunction() throws TException { 106 | metastoreClient.dropFunction(database.getName(), function1.getFunctionName()); 107 | 108 | expectedException.expect(EntityNotFoundException.class); 109 | glueClient.getUserDefinedFunction(new GetUserDefinedFunctionRequest() 110 | .withCatalogId(anotherCatalogId) 111 | .withDatabaseName(databaseInAnotherCatalog.getName()) 112 | .withFunctionName(function1.getFunctionName()) 113 | ); 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## AWS Glue Data Catalog Client for Apache Hive Metastore 2 | The AWS Glue Data Catalog is a fully managed, Apache Hive Metastore compatible, metadata repository. Customers can use the Data Catalog as a central repository to store structural and operational metadata for their data. 3 | 4 | AWS Glue provides out-of-box integration with Amazon EMR that enables customers to use the AWS Glue Data Catalog as an external Hive Metastore. To learn more, visit our [documentation](https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-hive-metastore-glue.html). 5 | 6 | This is an open-source implementation of the Apache Hive Metastore client on Amazon EMR clusters that uses the AWS Glue Data Catalog as an external Hive Metastore. It serves as a reference implementation for building a Hive Metastore-compatible client that connects to the AWS Glue Data Catalog. It may be ported to other Hive Metastore-compatible platforms such as other Hadoop and Apache Spark distributions. 7 | 8 | This package is compatible with Spark 3 and Hive 3. 9 | 10 | **Note**: in order for this client implementation to be used with Apache Hive, a patch included in this [JIRA](https://issues.apache.org/jira/browse/HIVE-12679) must be applied to it. All versions of Apache Hive running on Amazon EMR that support the AWS Glue Data Catalog as the metastore already include this patch. Please follow **all steps listed below in the following order**. 11 | 12 | ## Patching Apache Hive and Installing It Locally 13 | 14 | Obtain a copy of Hive from GitHub at https://github.com/apache/hive. 15 | 16 | git clone https://github.com/apache/hive.git 17 | 18 | To build the Hive client, you need to first apply this [patch](https://github.com/awslabs/aws-glue-data-catalog-client-for-apache-hive-metastore/blob/branch-3.4.0/branch_3.1.patch). Download this patch and move it to your local Hive git repository you created above. This patch is included in the repository. Apply the patch and build Hive. 19 | 20 | cd 21 | git checkout branch-3.1 22 | git apply -3 branch_3.1.patch 23 | mvn clean install -DskipTests 24 | 25 | As Spark uses a fork of Hive based off the 2.3 branch, in order to build the Spark client, you need Hive 2.3 built with this [patch](https://issues.apache.org/jira/secure/attachment/12958418/HIVE-12679.branch-2.3.patch). 26 | 27 | If building off the previous Hive repo, please reset those changes: 28 | 29 | git add . 30 | git reset --hard 31 | 32 | Continue with patching the 2.3 branch: 33 | 34 | cd 35 | git checkout branch-2.3 36 | patch -p0 63 | hive.metastore.client.factory.class 64 | com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory 65 | 66 | 67 | ## Configuring Spark to Use the Spark Client 68 | 69 | Similarly, for Spark, you need to install the client jar in Spark's CLASSPATH and create or update Spark's own hive-site.xml to add the above property. On Amazon EMR, this is set in /usr/lib/spark/conf/hive-site.xml. You can also find the location of the Spark client jar in /usr/lib/spark/conf/spark-defaults.conf. 70 | 71 | ## Enabling client side caching for catalog 72 | 73 | Currently, we provide support for caching: 74 | 75 | a) Table metadata - Response from Glue's GetTable operation (https://docs.aws.amazon.com/glue/latest/webapi/API_GetTable.html#API_GetTable_ResponseSyntax) 76 | b) Database metadata - Response from Glue's GetDatabase operation (https://docs.aws.amazon.com/glue/latest/webapi/API_GetDatabase.html#API_GetDatabase_ResponseSyntax) 77 | 78 | Both these entities have dedicated caches for themselves and can be enabled/tuned individually. 79 | 80 | To enable/tune Table cache, use the following properties in your hive/spark configuration file: 81 | 82 | 83 | aws.glue.cache.table.enable 84 | true 85 | 86 | 87 | aws.glue.cache.table.size 88 | 1000 89 | 90 | 91 | aws.glue.cache.table.ttl-mins 92 | 30 93 | 94 | 95 | To enable/tune Database cache: 96 | 97 | 98 | aws.glue.cache.db.enable 99 | true 100 | 101 | 102 | aws.glue.cache.db.size 103 | 1000 104 | 105 | 106 | aws.glue.cache.db.ttl-mins 107 | 30 108 | 109 | 110 | NOTE: The caching logic is disabled by default. 111 | 112 | ## License 113 | 114 | This library is licensed under the Apache 2.0 License. -------------------------------------------------------------------------------- /aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/metastore/AWSGlueClientFactory.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.catalog.metastore; 2 | 3 | import com.amazonaws.ClientConfiguration; 4 | import com.amazonaws.auth.AWSCredentialsProvider; 5 | import com.amazonaws.client.builder.AwsClientBuilder; 6 | import com.amazonaws.regions.Region; 7 | import com.amazonaws.regions.Regions; 8 | import com.amazonaws.services.glue.AWSGlue; 9 | import com.amazonaws.services.glue.AWSGlueClientBuilder; 10 | import com.google.common.annotations.VisibleForTesting; 11 | import com.google.common.base.Preconditions; 12 | import com.google.common.base.Strings; 13 | import org.apache.commons.lang3.StringUtils; 14 | import org.apache.hadoop.conf.Configuration; 15 | import org.apache.hadoop.hive.metastore.api.MetaException; 16 | import org.apache.hadoop.security.UserGroupInformation; 17 | import org.apache.hadoop.util.ReflectionUtils; 18 | import org.apache.log4j.Logger; 19 | 20 | import java.io.IOException; 21 | 22 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_CATALOG_CREDENTIALS_PROVIDER_FACTORY_CLASS; 23 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_CATALOG_SEPARATOR; 24 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_CONNECTION_TIMEOUT; 25 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_ENDPOINT; 26 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_MAX_CONNECTIONS; 27 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_MAX_RETRY; 28 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_SOCKET_TIMEOUT; 29 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_REGION; 30 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.DEFAULT_CONNECTION_TIMEOUT; 31 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.DEFAULT_MAX_CONNECTIONS; 32 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.DEFAULT_MAX_RETRY; 33 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.DEFAULT_SOCKET_TIMEOUT; 34 | 35 | public final class AWSGlueClientFactory implements GlueClientFactory { 36 | 37 | private static final Logger logger = Logger.getLogger(AWSGlueClientFactory.class); 38 | 39 | private final Configuration conf; 40 | 41 | public AWSGlueClientFactory(Configuration conf) { 42 | Preconditions.checkNotNull(conf, "Configuration cannot be null"); 43 | this.conf = conf; 44 | } 45 | 46 | @Override 47 | public AWSGlue newClient() throws MetaException { 48 | try { 49 | AWSGlueClientBuilder glueClientBuilder = AWSGlueClientBuilder.standard() 50 | .withCredentials(getAWSCredentialsProvider(conf)); 51 | 52 | String regionStr = getProperty(AWS_REGION, conf); 53 | String glueEndpoint = getProperty(AWS_GLUE_ENDPOINT, conf); 54 | 55 | // ClientBuilder only allows one of EndpointConfiguration or Region to be set 56 | if (StringUtils.isNotBlank(glueEndpoint)) { 57 | logger.info("Setting glue service endpoint to " + glueEndpoint); 58 | glueClientBuilder.setEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(glueEndpoint, null)); 59 | } else if (StringUtils.isNotBlank(regionStr)) { 60 | logger.info("Setting region to : " + regionStr); 61 | glueClientBuilder.setRegion(regionStr); 62 | } else { 63 | Region currentRegion = Regions.getCurrentRegion(); 64 | if (currentRegion != null) { 65 | logger.info("Using region from ec2 metadata : " + currentRegion.getName()); 66 | glueClientBuilder.setRegion(currentRegion.getName()); 67 | } else { 68 | logger.info("No region info found, using SDK default region: us-east-1"); 69 | } 70 | } 71 | 72 | glueClientBuilder.setClientConfiguration(buildClientConfiguration(conf)); 73 | return decorateGlueClient(glueClientBuilder.build()); 74 | } catch (Exception e) { 75 | String message = "Unable to build AWSGlueClient: " + e; 76 | logger.error(message); 77 | throw new MetaException(message); 78 | } 79 | } 80 | 81 | private AWSGlue decorateGlueClient(AWSGlue originalGlueClient) { 82 | if (Strings.isNullOrEmpty(getProperty(AWS_GLUE_CATALOG_SEPARATOR, conf))) { 83 | return originalGlueClient; 84 | } 85 | return new AWSGlueMultipleCatalogDecorator( 86 | originalGlueClient, 87 | getProperty(AWS_GLUE_CATALOG_SEPARATOR, conf)); 88 | } 89 | 90 | @VisibleForTesting 91 | AWSCredentialsProvider getAWSCredentialsProvider(Configuration conf) { 92 | 93 | Class providerFactoryClass = conf 94 | .getClass(AWS_CATALOG_CREDENTIALS_PROVIDER_FACTORY_CLASS, 95 | DefaultAWSCredentialsProviderFactory.class).asSubclass( 96 | AWSCredentialsProviderFactory.class); 97 | AWSCredentialsProviderFactory provider = ReflectionUtils.newInstance( 98 | providerFactoryClass, conf); 99 | return provider.buildAWSCredentialsProvider(conf); 100 | } 101 | 102 | private String createUserAgent() { 103 | try { 104 | String ugi = UserGroupInformation.getCurrentUser().getUserName(); 105 | return "ugi=" + ugi; 106 | } catch (IOException e) { 107 | /* 108 | * IOException here means that the login failed according 109 | * to UserGroupInformation.getCurrentUser(). In this case, 110 | * we will throw a RuntimeException the same way as 111 | * HiveMetaStoreClient.java 112 | * If not catching IOException, the build will fail with 113 | * unreported exception IOExcetion. 114 | */ 115 | logger.error("Unable to resolve current user name " + e.getMessage()); 116 | throw new RuntimeException(e); 117 | } 118 | } 119 | 120 | private ClientConfiguration buildClientConfiguration(Configuration conf) { 121 | // Pass UserAgent to client configuration, which enable CloudTrail to audit UGI info 122 | // when using Glue Catalog as metastore 123 | ClientConfiguration clientConfiguration = new ClientConfiguration() 124 | .withUserAgent(createUserAgent()) 125 | .withMaxErrorRetry(conf.getInt(AWS_GLUE_MAX_RETRY, DEFAULT_MAX_RETRY)) 126 | .withMaxConnections(conf.getInt(AWS_GLUE_MAX_CONNECTIONS, DEFAULT_MAX_CONNECTIONS)) 127 | .withConnectionTimeout(conf.getInt(AWS_GLUE_CONNECTION_TIMEOUT, DEFAULT_CONNECTION_TIMEOUT)) 128 | .withSocketTimeout(conf.getInt(AWS_GLUE_SOCKET_TIMEOUT, DEFAULT_SOCKET_TIMEOUT)); 129 | return clientConfiguration; 130 | } 131 | 132 | private static String getProperty(String propertyName, Configuration conf) { 133 | return Strings.isNullOrEmpty(System.getProperty(propertyName)) ? 134 | conf.get(propertyName) : System.getProperty(propertyName); 135 | } 136 | } 137 | -------------------------------------------------------------------------------- /aws-glue-datacatalog-spark-client/src/test/java/com/amazonaws/glue/catalog/metastore/integrationtest/MetastoreClientDatabaseIntegrationTest.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.catalog.metastore.integrationtest; 2 | 3 | import com.amazonaws.glue.catalog.converters.BaseCatalogToHiveConverter; 4 | import com.amazonaws.glue.catalog.converters.CatalogToHiveConverter; 5 | import com.amazonaws.glue.catalog.metastore.AWSCatalogMetastoreClient; 6 | import com.amazonaws.glue.catalog.metastore.GlueClientFactory; 7 | import com.amazonaws.glue.catalog.util.GlueTestClientFactory; 8 | import com.amazonaws.services.glue.AWSGlue; 9 | import com.amazonaws.services.glue.model.DeleteDatabaseRequest; 10 | import com.amazonaws.services.glue.model.EntityNotFoundException; 11 | 12 | import com.google.common.collect.Lists; 13 | import com.google.common.collect.Maps; 14 | import org.apache.hadoop.fs.Path; 15 | import org.apache.hadoop.hive.conf.HiveConf; 16 | import org.apache.hadoop.hive.metastore.IMetaStoreClient; 17 | import org.apache.hadoop.hive.metastore.Warehouse; 18 | import org.apache.hadoop.hive.metastore.api.AlreadyExistsException; 19 | import org.apache.hadoop.hive.metastore.api.Database; 20 | import org.apache.hadoop.hive.metastore.api.MetaException; 21 | import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; 22 | import org.apache.thrift.TException; 23 | import org.junit.After; 24 | import org.junit.Before; 25 | import org.junit.Test; 26 | 27 | import java.util.List; 28 | import java.util.Map; 29 | 30 | import static com.amazonaws.glue.catalog.util.TestObjects.getTestDatabase; 31 | 32 | import static org.junit.Assert.assertEquals; 33 | import static org.junit.Assert.assertTrue; 34 | import static org.mockito.Matchers.any; 35 | import static org.mockito.Matchers.anyString; 36 | import static org.mockito.Mockito.mock; 37 | import static org.mockito.Mockito.when; 38 | 39 | public class MetastoreClientDatabaseIntegrationTest { 40 | 41 | private AWSGlue glueClient; 42 | private IMetaStoreClient metastoreClient; 43 | private Warehouse wh; 44 | private Database hiveDB; 45 | private com.amazonaws.services.glue.model.Database catalogDB; 46 | private HiveConf conf; 47 | private Path tmpPath; 48 | private List additionalDbForCleanup; 49 | private CatalogToHiveConverter catalogToHiveConverter = new BaseCatalogToHiveConverter(); 50 | 51 | @Before 52 | public void setup() throws MetaException { 53 | conf = mock(HiveConf.class); 54 | wh = mock(Warehouse.class); 55 | tmpPath = new Path("/db"); 56 | when(wh.getDefaultDatabasePath(anyString())).thenReturn(tmpPath); 57 | when(wh.getDnsPath(any(Path.class))).thenReturn(tmpPath); 58 | when(wh.isDir(any(Path.class))).thenReturn(true); 59 | when(conf.get(HiveConf.ConfVars.USERS_IN_ADMIN_ROLE.varname,"")).thenReturn(""); 60 | 61 | glueClient = new GlueTestClientFactory().newClient(); 62 | GlueClientFactory clientFactory = mock(GlueClientFactory.class); 63 | when(clientFactory.newClient()).thenReturn(glueClient); 64 | 65 | metastoreClient = new AWSCatalogMetastoreClient.Builder().withHiveConf(conf).withWarehouse(wh) 66 | .withClientFactory(clientFactory).build(); 67 | catalogDB = getTestDatabase(); 68 | hiveDB = catalogToHiveConverter.convertDatabase(catalogDB); 69 | 70 | additionalDbForCleanup = Lists.newArrayList(); 71 | } 72 | 73 | @After 74 | public void clean() { 75 | try { 76 | glueClient.deleteDatabase(new DeleteDatabaseRequest().withName(hiveDB.getName())); 77 | 78 | for (String db : additionalDbForCleanup) { 79 | glueClient.deleteDatabase(new DeleteDatabaseRequest().withName(db)); 80 | } 81 | } catch (EntityNotFoundException e) { 82 | //there will be no database to drop after drop database test, so swallow the exception 83 | } 84 | } 85 | 86 | @Test 87 | public void testDefaultDatabase() throws TException { 88 | // default db should exist 89 | metastoreClient.getDatabase("default"); 90 | } 91 | 92 | @Test 93 | public void createValidDatabase() throws TException { 94 | metastoreClient.createDatabase(hiveDB); 95 | Database db = metastoreClient.getDatabase(hiveDB.getName()); 96 | assertEquals(hiveDB, db); 97 | } 98 | 99 | @Test(expected = AlreadyExistsException.class) 100 | public void createDuplicateDatabase() throws TException { 101 | metastoreClient.createDatabase(hiveDB); 102 | metastoreClient.createDatabase(hiveDB); 103 | } 104 | 105 | @Test 106 | public void testDropValidDatabase() throws TException { 107 | metastoreClient.createDatabase(hiveDB); 108 | metastoreClient.dropDatabase(hiveDB.getName()); 109 | assertDrop(hiveDB.getName()); 110 | } 111 | 112 | @Test(expected = NoSuchObjectException.class) 113 | public void dropInvalidDatabase() throws TException { 114 | metastoreClient.dropDatabase(hiveDB.getName()); 115 | } 116 | 117 | @Test 118 | public void dropInvalidDataBaseWithIgnoreUnknownDatabase() throws TException { 119 | metastoreClient.dropDatabase("unknown_db", false, true); 120 | } 121 | 122 | @Test 123 | public void listValidDatabases() throws TException { 124 | Database database2 = catalogToHiveConverter.convertDatabase(getTestDatabase()); 125 | additionalDbForCleanup.add(database2.getName()); 126 | metastoreClient.createDatabase(hiveDB); 127 | metastoreClient.createDatabase(database2); 128 | List databaseName = metastoreClient.getAllDatabases(); 129 | assertTrue(databaseName.contains(hiveDB.getName())); 130 | assertTrue(databaseName.contains(database2.getName())); 131 | } 132 | 133 | @Test(expected = NoSuchObjectException.class) 134 | public void getInvalidDatabase() throws TException { 135 | metastoreClient.getDatabase(hiveDB.getName()); 136 | } 137 | 138 | @Test 139 | public void alterDatabase() throws TException { 140 | Map parameters = Maps.newHashMap(); 141 | parameters.put("param3", "value3"); 142 | parameters.put("param4", "value4"); 143 | 144 | metastoreClient.createDatabase(hiveDB); 145 | 146 | Database updatedDB = catalogToHiveConverter.convertDatabase(getTestDatabase()); 147 | updatedDB.setName(hiveDB.getName()); 148 | updatedDB.setParameters(parameters); 149 | 150 | metastoreClient.alterDatabase(hiveDB.getName(), updatedDB); 151 | Database afterUpdate = metastoreClient.getDatabase(hiveDB.getName()); 152 | 153 | assertTrue(afterUpdate.getParameters().containsKey("param3")); 154 | assertTrue(afterUpdate.getParameters().containsKey("param4")); 155 | } 156 | 157 | private void assertDrop(final String databaseName) throws TException { 158 | boolean dropped = false; 159 | try { 160 | metastoreClient.getDatabase(databaseName); 161 | } catch (NoSuchObjectException e) { 162 | dropped = true; 163 | } 164 | assertTrue("Unable to drop database", dropped); 165 | } 166 | 167 | } 168 | -------------------------------------------------------------------------------- /aws-glue-datacatalog-client-common/src/test/java/com/amazonaws/glue/catalog/converters/GlueInputConverterTest.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.catalog.converters; 2 | 3 | import com.amazonaws.glue.catalog.util.TestObjects; 4 | import com.amazonaws.services.glue.model.Database; 5 | import com.amazonaws.services.glue.model.DatabaseInput; 6 | import com.amazonaws.services.glue.model.Partition; 7 | import com.amazonaws.services.glue.model.PartitionInput; 8 | import com.amazonaws.services.glue.model.Table; 9 | import com.amazonaws.services.glue.model.TableInput; 10 | import com.amazonaws.services.glue.model.UserDefinedFunction; 11 | import com.amazonaws.services.glue.model.UserDefinedFunctionInput; 12 | 13 | import com.google.common.collect.Lists; 14 | import org.junit.Before; 15 | import org.junit.Test; 16 | 17 | import static org.junit.Assert.assertEquals; 18 | 19 | public class GlueInputConverterTest { 20 | 21 | private Database testDB; 22 | private Table testTable; 23 | private Partition testPartition; 24 | private UserDefinedFunction testFunction; 25 | private CatalogToHiveConverter catalogToHiveConverter; 26 | 27 | @Before 28 | public void setup() { 29 | testDB = TestObjects.getTestDatabase(); 30 | testTable = TestObjects.getTestTable(); 31 | testPartition = TestObjects.getTestPartition(testDB.getName(), testTable.getName(), Lists.newArrayList("val1")); 32 | testFunction = TestObjects.getCatalogTestFunction(); 33 | catalogToHiveConverter = new BaseCatalogToHiveConverter(); 34 | } 35 | 36 | @Test 37 | public void testConvertHiveDbToDatabaseInput() { 38 | org.apache.hadoop.hive.metastore.api.Database hivedb = catalogToHiveConverter.convertDatabase(testDB); 39 | DatabaseInput dbInput = GlueInputConverter.convertToDatabaseInput(hivedb); 40 | 41 | assertEquals(testDB.getName(), dbInput.getName()); 42 | assertEquals(testDB.getDescription(), dbInput.getDescription()); 43 | assertEquals(testDB.getLocationUri(), dbInput.getLocationUri()); 44 | assertEquals(testDB.getParameters(), dbInput.getParameters()); 45 | } 46 | 47 | @Test 48 | public void testConvertCatalogDbToDatabaseInput() { 49 | DatabaseInput dbInput = GlueInputConverter.convertToDatabaseInput(testDB); 50 | 51 | assertEquals(testDB.getName(), dbInput.getName()); 52 | assertEquals(testDB.getDescription(), dbInput.getDescription()); 53 | assertEquals(testDB.getLocationUri(), dbInput.getLocationUri()); 54 | assertEquals(testDB.getParameters(), dbInput.getParameters()); 55 | } 56 | 57 | @Test 58 | public void testConvertHiveTableToTableInput() { 59 | org.apache.hadoop.hive.metastore.api.Table hivetbl = catalogToHiveConverter.convertTable(testTable, testDB.getName()); 60 | TableInput tblInput = GlueInputConverter.convertToTableInput(hivetbl); 61 | 62 | assertEquals(testTable.getName(), tblInput.getName()); 63 | assertEquals(testTable.getOwner(), tblInput.getOwner()); 64 | assertEquals(testTable.getTableType(), tblInput.getTableType()); 65 | assertEquals(testTable.getParameters(), tblInput.getParameters()); 66 | assertEquals(testTable.getPartitionKeys(), tblInput.getPartitionKeys()); 67 | assertEquals(testTable.getRetention(), tblInput.getRetention()); 68 | assertEquals(testTable.getLastAccessTime(), tblInput.getLastAccessTime()); 69 | assertEquals(testTable.getStorageDescriptor(), tblInput.getStorageDescriptor()); 70 | assertEquals(testTable.getViewExpandedText(), tblInput.getViewExpandedText()); 71 | assertEquals(testTable.getViewOriginalText(), tblInput.getViewOriginalText()); 72 | } 73 | 74 | @Test 75 | public void testConvertCatalogTableToTableInput() { 76 | TableInput tblInput = GlueInputConverter.convertToTableInput(testTable); 77 | 78 | assertEquals(testTable.getName(), tblInput.getName()); 79 | assertEquals(testTable.getOwner(), tblInput.getOwner()); 80 | assertEquals(testTable.getTableType(), tblInput.getTableType()); 81 | assertEquals(testTable.getParameters(), tblInput.getParameters()); 82 | assertEquals(testTable.getPartitionKeys(), tblInput.getPartitionKeys()); 83 | assertEquals(testTable.getRetention(), tblInput.getRetention()); 84 | assertEquals(testTable.getLastAccessTime(), tblInput.getLastAccessTime()); 85 | assertEquals(testTable.getStorageDescriptor(), tblInput.getStorageDescriptor()); 86 | assertEquals(testTable.getViewExpandedText(), tblInput.getViewExpandedText()); 87 | assertEquals(testTable.getViewOriginalText(), tblInput.getViewOriginalText()); 88 | } 89 | 90 | @Test 91 | public void testConvertHivePartitionToPartitionInput() { 92 | org.apache.hadoop.hive.metastore.api.Partition hivePartition = catalogToHiveConverter.convertPartition(testPartition); 93 | PartitionInput partitionInput = GlueInputConverter.convertToPartitionInput(hivePartition); 94 | 95 | assertEquals(testPartition.getLastAccessTime(), partitionInput.getLastAccessTime()); 96 | assertEquals(testPartition.getParameters(), partitionInput.getParameters()); 97 | assertEquals(testPartition.getStorageDescriptor(), partitionInput.getStorageDescriptor()); 98 | assertEquals(testPartition.getValues(), partitionInput.getValues()); 99 | } 100 | 101 | @Test 102 | public void testConvertCatalogPartitionToPartitionInput() { 103 | PartitionInput partitionInput = GlueInputConverter.convertToPartitionInput(testPartition); 104 | 105 | assertEquals(testPartition.getLastAccessTime(), partitionInput.getLastAccessTime()); 106 | assertEquals(testPartition.getParameters(), partitionInput.getParameters()); 107 | assertEquals(testPartition.getStorageDescriptor(), partitionInput.getStorageDescriptor()); 108 | assertEquals(testPartition.getValues(), partitionInput.getValues()); 109 | } 110 | 111 | @Test 112 | public void testConvertHiveFunctionToFunctionInput() { 113 | org.apache.hadoop.hive.metastore.api.Function hiveFunction = 114 | catalogToHiveConverter.convertFunction(testDB.getName(), testFunction); 115 | UserDefinedFunctionInput functionInput = GlueInputConverter.convertToUserDefinedFunctionInput(hiveFunction); 116 | 117 | assertEquals(testFunction.getClassName(), functionInput.getClassName()); 118 | assertEquals(testFunction.getFunctionName(), functionInput.getFunctionName()); 119 | assertEquals(testFunction.getOwnerName(), functionInput.getOwnerName()); 120 | assertEquals(testFunction.getOwnerType(), functionInput.getOwnerType()); 121 | assertEquals(testFunction.getResourceUris(), functionInput.getResourceUris()); 122 | } 123 | 124 | @Test 125 | public void testConvertHiveFunctionToFunctionInputNullOwnerType() { 126 | org.apache.hadoop.hive.metastore.api.Function hiveFunction = 127 | catalogToHiveConverter.convertFunction(testDB.getName(), testFunction); 128 | hiveFunction.setOwnerType(null); 129 | GlueInputConverter.convertToUserDefinedFunctionInput(hiveFunction); 130 | } 131 | 132 | } 133 | -------------------------------------------------------------------------------- /aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/metastore/AWSGlueMetastoreCacheDecorator.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.catalog.metastore; 2 | 3 | import com.amazonaws.services.glue.model.Database; 4 | import com.amazonaws.services.glue.model.Table; 5 | import com.google.common.annotations.VisibleForTesting; 6 | import com.google.common.cache.Cache; 7 | import com.google.common.cache.CacheBuilder; 8 | import org.apache.hadoop.conf.Configuration; 9 | import org.apache.log4j.Logger; 10 | 11 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_DB_CACHE_ENABLE; 12 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_DB_CACHE_SIZE; 13 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_DB_CACHE_TTL_MINS; 14 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_TABLE_CACHE_ENABLE; 15 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_TABLE_CACHE_SIZE; 16 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_TABLE_CACHE_TTL_MINS; 17 | 18 | import java.util.Objects; 19 | import java.util.concurrent.TimeUnit; 20 | 21 | import static com.google.common.base.Preconditions.checkArgument; 22 | import static com.google.common.base.Preconditions.checkNotNull; 23 | 24 | public class AWSGlueMetastoreCacheDecorator extends AWSGlueMetastoreBaseDecorator { 25 | 26 | private static final Logger logger = Logger.getLogger(AWSGlueMetastoreCacheDecorator.class); 27 | 28 | private final Configuration conf; 29 | 30 | private final boolean databaseCacheEnabled; 31 | 32 | private final boolean tableCacheEnabled; 33 | 34 | @VisibleForTesting 35 | protected Cache databaseCache; 36 | @VisibleForTesting 37 | protected Cache tableCache; 38 | 39 | public AWSGlueMetastoreCacheDecorator(Configuration conf, AWSGlueMetastore awsGlueMetastore) { 40 | super(awsGlueMetastore); 41 | 42 | checkNotNull(conf, "conf can not be null"); 43 | this.conf = conf; 44 | 45 | databaseCacheEnabled = conf.getBoolean(AWS_GLUE_DB_CACHE_ENABLE, false); 46 | if(databaseCacheEnabled) { 47 | int dbCacheSize = conf.getInt(AWS_GLUE_DB_CACHE_SIZE, 0); 48 | int dbCacheTtlMins = conf.getInt(AWS_GLUE_DB_CACHE_TTL_MINS, 0); 49 | 50 | //validate config values for size and ttl 51 | validateConfigValueIsGreaterThanZero(AWS_GLUE_DB_CACHE_SIZE, dbCacheSize); 52 | validateConfigValueIsGreaterThanZero(AWS_GLUE_DB_CACHE_TTL_MINS, dbCacheTtlMins); 53 | 54 | //initialize database cache 55 | databaseCache = CacheBuilder.newBuilder().maximumSize(dbCacheSize) 56 | .expireAfterWrite(dbCacheTtlMins, TimeUnit.MINUTES).build(); 57 | } else { 58 | databaseCache = null; 59 | } 60 | 61 | tableCacheEnabled = conf.getBoolean(AWS_GLUE_TABLE_CACHE_ENABLE, false); 62 | if(tableCacheEnabled) { 63 | int tableCacheSize = conf.getInt(AWS_GLUE_TABLE_CACHE_SIZE, 0); 64 | int tableCacheTtlMins = conf.getInt(AWS_GLUE_TABLE_CACHE_TTL_MINS, 0); 65 | 66 | //validate config values for size and ttl 67 | validateConfigValueIsGreaterThanZero(AWS_GLUE_TABLE_CACHE_SIZE, tableCacheSize); 68 | validateConfigValueIsGreaterThanZero(AWS_GLUE_TABLE_CACHE_TTL_MINS, tableCacheTtlMins); 69 | 70 | //initialize table cache 71 | tableCache = CacheBuilder.newBuilder().maximumSize(tableCacheSize) 72 | .expireAfterWrite(tableCacheTtlMins, TimeUnit.MINUTES).build(); 73 | } else { 74 | tableCache = null; 75 | } 76 | 77 | logger.info("Constructed"); 78 | } 79 | 80 | private void validateConfigValueIsGreaterThanZero(String configName, int value) { 81 | checkArgument(value > 0, String.format("Invalid value for Hive Config %s. " + 82 | "Provide a value greater than zero", configName)); 83 | 84 | } 85 | 86 | @Override 87 | public Database getDatabase(String dbName) { 88 | Database result; 89 | if(databaseCacheEnabled) { 90 | Database valueFromCache = databaseCache.getIfPresent(dbName); 91 | if(valueFromCache != null) { 92 | logger.info("Cache hit for operation [getDatabase] on key [" + dbName + "]"); 93 | result = valueFromCache; 94 | } else { 95 | logger.info("Cache miss for operation [getDatabase] on key [" + dbName + "]"); 96 | result = super.getDatabase(dbName); 97 | databaseCache.put(dbName, result); 98 | } 99 | } else { 100 | result = super.getDatabase(dbName); 101 | } 102 | return result; 103 | } 104 | 105 | @Override 106 | public Table getTable(String dbName, String tableName) { 107 | Table result; 108 | if(tableCacheEnabled) { 109 | TableIdentifier key = new TableIdentifier(dbName, tableName); 110 | Table valueFromCache = tableCache.getIfPresent(key); 111 | if(valueFromCache != null) { 112 | logger.info("Cache hit for operation [getTable] on key [" + key + "]"); 113 | result = valueFromCache; 114 | } else { 115 | logger.info("Cache miss for operation [getTable] on key [" + key + "]"); 116 | result = super.getTable(dbName, tableName); 117 | tableCache.put(key, result); 118 | } 119 | } else { 120 | result = super.getTable(dbName, tableName); 121 | } 122 | return result; 123 | } 124 | 125 | static class TableIdentifier { 126 | private final String dbName; 127 | private final String tableName; 128 | 129 | public TableIdentifier(String dbName, String tableName) { 130 | this.dbName = dbName; 131 | this.tableName = tableName; 132 | } 133 | 134 | public String getDbName() { 135 | return dbName; 136 | } 137 | 138 | public String getTableName() { 139 | return tableName; 140 | } 141 | 142 | @Override 143 | public String toString() { 144 | return "TableIdentifier{" + 145 | "dbName='" + dbName + '\'' + 146 | ", tableName='" + tableName + '\'' + 147 | '}'; 148 | } 149 | 150 | @Override 151 | public boolean equals(Object o) { 152 | if (this == o) return true; 153 | if (o == null || getClass() != o.getClass()) return false; 154 | TableIdentifier that = (TableIdentifier) o; 155 | return Objects.equals(dbName, that.dbName) && 156 | Objects.equals(tableName, that.tableName); 157 | } 158 | 159 | @Override 160 | public int hashCode() { 161 | return Objects.hash(dbName, tableName); 162 | } 163 | } 164 | } -------------------------------------------------------------------------------- /aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/metastore/AWSGlueMetastoreBaseDecorator.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.catalog.metastore; 2 | 3 | import com.amazonaws.services.glue.model.ColumnStatistics; 4 | import com.amazonaws.services.glue.model.ColumnStatisticsError; 5 | import com.amazonaws.services.glue.model.Database; 6 | import com.amazonaws.services.glue.model.DatabaseInput; 7 | import com.amazonaws.services.glue.model.Partition; 8 | import com.amazonaws.services.glue.model.PartitionError; 9 | import com.amazonaws.services.glue.model.PartitionInput; 10 | import com.amazonaws.services.glue.model.PartitionValueList; 11 | import com.amazonaws.services.glue.model.Table; 12 | import com.amazonaws.services.glue.model.TableInput; 13 | import com.amazonaws.services.glue.model.UserDefinedFunction; 14 | import com.amazonaws.services.glue.model.UserDefinedFunctionInput; 15 | import org.apache.hadoop.hive.metastore.api.EnvironmentContext; 16 | import org.apache.thrift.TException; 17 | 18 | import java.util.List; 19 | import java.util.Map; 20 | 21 | import static com.google.common.base.Preconditions.checkNotNull; 22 | 23 | public class AWSGlueMetastoreBaseDecorator implements AWSGlueMetastore { 24 | 25 | private final AWSGlueMetastore awsGlueMetastore; 26 | 27 | public AWSGlueMetastoreBaseDecorator(AWSGlueMetastore awsGlueMetastore) { 28 | checkNotNull(awsGlueMetastore, "awsGlueMetastore can not be null"); 29 | this.awsGlueMetastore = awsGlueMetastore; 30 | } 31 | 32 | @Override 33 | public void createDatabase(DatabaseInput databaseInput) { 34 | awsGlueMetastore.createDatabase(databaseInput); 35 | } 36 | 37 | @Override 38 | public Database getDatabase(String dbName) { 39 | return awsGlueMetastore.getDatabase(dbName); 40 | } 41 | 42 | @Override 43 | public List getAllDatabases() { 44 | return awsGlueMetastore.getAllDatabases(); 45 | } 46 | 47 | @Override 48 | public void updateDatabase(String databaseName, DatabaseInput databaseInput) { 49 | awsGlueMetastore.updateDatabase(databaseName, databaseInput); 50 | } 51 | 52 | @Override 53 | public void deleteDatabase(String dbName) { 54 | awsGlueMetastore.deleteDatabase(dbName); 55 | } 56 | 57 | @Override 58 | public void createTable(String dbName, TableInput tableInput) { 59 | awsGlueMetastore.createTable(dbName, tableInput); 60 | } 61 | 62 | @Override 63 | public Table getTable(String dbName, String tableName) { 64 | return awsGlueMetastore.getTable(dbName, tableName); 65 | } 66 | 67 | @Override 68 | public List
getTables(String dbname, String tablePattern) { 69 | return awsGlueMetastore.getTables(dbname, tablePattern); 70 | } 71 | 72 | @Override 73 | public void updateTable(String dbName, TableInput tableInput) { 74 | awsGlueMetastore.updateTable(dbName, tableInput); 75 | } 76 | 77 | @Override 78 | public void updateTable(String dbName, TableInput tableInput, EnvironmentContext environmentContext) { 79 | awsGlueMetastore.updateTable(dbName, tableInput, environmentContext); 80 | } 81 | 82 | @Override 83 | public void deleteTable(String dbName, String tableName) { 84 | awsGlueMetastore.deleteTable(dbName, tableName); 85 | } 86 | 87 | @Override 88 | public Partition getPartition(String dbName, String tableName, List partitionValues) { 89 | return awsGlueMetastore.getPartition(dbName, tableName, partitionValues); 90 | } 91 | 92 | @Override 93 | public List getPartitionsByNames(String dbName, String tableName, List partitionsToGet) { 94 | return awsGlueMetastore.getPartitionsByNames(dbName, tableName, partitionsToGet); 95 | } 96 | 97 | @Override 98 | public List getPartitions(String dbName, String tableName, String expression, long max) throws TException { 99 | return awsGlueMetastore.getPartitions(dbName, tableName, expression, max); 100 | } 101 | 102 | @Override 103 | public void updatePartition(String dbName, String tableName, List partitionValues, PartitionInput partitionInput) { 104 | awsGlueMetastore.updatePartition(dbName, tableName, partitionValues, partitionInput); 105 | } 106 | 107 | @Override 108 | public void deletePartition(String dbName, String tableName, List partitionValues) { 109 | awsGlueMetastore.deletePartition(dbName, tableName, partitionValues); 110 | } 111 | 112 | @Override 113 | public List createPartitions(String dbName, String tableName, List partitionInputs) { 114 | return awsGlueMetastore.createPartitions(dbName, tableName, partitionInputs); 115 | } 116 | 117 | @Override 118 | public void createUserDefinedFunction(String dbName, UserDefinedFunctionInput functionInput) { 119 | awsGlueMetastore.createUserDefinedFunction(dbName, functionInput); 120 | } 121 | 122 | @Override 123 | public UserDefinedFunction getUserDefinedFunction(String dbName, String functionName) { 124 | return awsGlueMetastore.getUserDefinedFunction(dbName, functionName); 125 | } 126 | 127 | @Override 128 | public List getUserDefinedFunctions(String dbName, String pattern) { 129 | return awsGlueMetastore.getUserDefinedFunctions(dbName, pattern); 130 | } 131 | 132 | @Override 133 | public List getUserDefinedFunctions(String pattern) { 134 | return awsGlueMetastore.getUserDefinedFunctions(pattern); 135 | } 136 | 137 | @Override 138 | public void deleteUserDefinedFunction(String dbName, String functionName) { 139 | awsGlueMetastore.deleteUserDefinedFunction(dbName, functionName); 140 | } 141 | 142 | @Override 143 | public void updateUserDefinedFunction(String dbName, String functionName, UserDefinedFunctionInput functionInput) { 144 | awsGlueMetastore.updateUserDefinedFunction(dbName, functionName, functionInput); 145 | } 146 | 147 | @Override 148 | public void deletePartitionColumnStatistics(String dbName, String tableName, List partitionValues, String colName) { 149 | awsGlueMetastore.deletePartitionColumnStatistics(dbName, tableName, partitionValues, colName); 150 | } 151 | 152 | @Override 153 | public void deleteTableColumnStatistics(String dbName, String tableName, String colName) { 154 | awsGlueMetastore.deleteTableColumnStatistics(dbName, tableName, colName); 155 | } 156 | 157 | @Override 158 | public Map> getPartitionColumnStatistics(String dbName, String tableName, List partitionValues, List columnNames) { 159 | return awsGlueMetastore.getPartitionColumnStatistics(dbName, tableName, partitionValues, columnNames); 160 | } 161 | 162 | @Override 163 | public List getTableColumnStatistics(String dbName, String tableName, List colNames) { 164 | return awsGlueMetastore.getTableColumnStatistics(dbName, tableName, colNames); 165 | } 166 | 167 | @Override 168 | public List updatePartitionColumnStatistics(String dbName, String tableName, List partitionValues, List columnStatistics) { 169 | return awsGlueMetastore.updatePartitionColumnStatistics(dbName, tableName, partitionValues, columnStatistics); 170 | } 171 | 172 | @Override 173 | public List updateTableColumnStatistics(String dbName, String tableName, List columnStatistics) { 174 | return awsGlueMetastore.updateTableColumnStatistics(dbName, tableName, columnStatistics); 175 | } 176 | 177 | } -------------------------------------------------------------------------------- /aws-glue-datacatalog-client-common/src/test/java/com/amazonaws/glue/catalog/metastore/AWSGlueClientFactoryTest.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.catalog.metastore; 2 | 3 | import com.amazonaws.auth.AWSCredentials; 4 | import com.amazonaws.auth.AWSCredentialsProvider; 5 | import com.amazonaws.auth.BasicSessionCredentials; 6 | import com.amazonaws.services.glue.AWSGlue; 7 | 8 | import org.apache.hadoop.conf.Configuration; 9 | import org.hamcrest.Matchers; 10 | import org.junit.Before; 11 | import org.junit.Test; 12 | 13 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_CATALOG_CREDENTIALS_PROVIDER_FACTORY_CLASS; 14 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_CATALOG_SEPARATOR; 15 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_CONNECTION_TIMEOUT; 16 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_ENDPOINT; 17 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_MAX_CONNECTIONS; 18 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_MAX_RETRY; 19 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_SOCKET_TIMEOUT; 20 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_REGION; 21 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.DEFAULT_CONNECTION_TIMEOUT; 22 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.DEFAULT_MAX_CONNECTIONS; 23 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.DEFAULT_MAX_RETRY; 24 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.DEFAULT_SOCKET_TIMEOUT; 25 | import static org.hamcrest.Matchers.instanceOf; 26 | import static org.junit.Assert.assertEquals; 27 | import static org.junit.Assert.assertNotNull; 28 | import static org.junit.Assert.assertThat; 29 | import static org.mockito.Mockito.atLeastOnce; 30 | import static org.mockito.Mockito.never; 31 | import static org.mockito.Mockito.spy; 32 | import static org.mockito.Mockito.verify; 33 | import static org.mockito.Mockito.when; 34 | 35 | public class AWSGlueClientFactoryTest { 36 | 37 | private static final String FAKE_ACCESS_KEY = "accessKey"; 38 | private static final String FAKE_SECRET_KEY = "secretKey"; 39 | private static final String FAKE_SESSION_TOKEN = "sessionToken"; 40 | 41 | private AWSGlueClientFactory glueClientFactory; 42 | private Configuration conf; 43 | 44 | @Before 45 | public void setup() { 46 | conf = spy(new Configuration()); 47 | glueClientFactory = new AWSGlueClientFactory(conf); 48 | } 49 | 50 | @Test 51 | public void testGlueClientConstructionWithHiveConfig() throws Exception { 52 | System.setProperty(AWS_REGION, ""); 53 | System.setProperty(AWS_GLUE_ENDPOINT, ""); 54 | System.setProperty(AWS_GLUE_CATALOG_SEPARATOR, ""); 55 | when(conf.get(AWS_GLUE_ENDPOINT)).thenReturn("endpoint"); 56 | when(conf.get(AWS_REGION)).thenReturn("us-west-1"); 57 | when(conf.get(AWS_GLUE_CATALOG_SEPARATOR)).thenReturn("/"); 58 | 59 | AWSGlue glueClient = glueClientFactory.newClient(); 60 | 61 | assertNotNull(glueClient); 62 | 63 | // client reads hive conf for region & endpoint 64 | verify(conf, atLeastOnce()).get(AWS_GLUE_ENDPOINT); 65 | verify(conf, atLeastOnce()).get(AWS_REGION); 66 | verify(conf, atLeastOnce()).get(AWS_GLUE_CATALOG_SEPARATOR); 67 | } 68 | 69 | @Test 70 | public void testGlueClientContructionWithAWSConfig() throws Exception { 71 | glueClientFactory.newClient(); 72 | verify(conf, atLeastOnce()).getInt(AWS_GLUE_MAX_RETRY, DEFAULT_MAX_RETRY); 73 | verify(conf, atLeastOnce()).getInt(AWS_GLUE_MAX_CONNECTIONS, DEFAULT_MAX_CONNECTIONS); 74 | verify(conf, atLeastOnce()).getInt(AWS_GLUE_SOCKET_TIMEOUT, DEFAULT_SOCKET_TIMEOUT); 75 | verify(conf, atLeastOnce()).getInt(AWS_GLUE_CONNECTION_TIMEOUT, DEFAULT_CONNECTION_TIMEOUT); 76 | } 77 | 78 | @Test 79 | public void testGlueClientConstructionWithSystemProperty() throws Exception { 80 | System.setProperty(AWS_REGION, "us-east-1"); 81 | System.setProperty(AWS_GLUE_ENDPOINT, "endpoint"); 82 | System.setProperty(AWS_GLUE_CATALOG_SEPARATOR, "/"); 83 | 84 | AWSGlue glueClient = glueClientFactory.newClient(); 85 | 86 | assertNotNull(glueClient); 87 | 88 | // client has no interactions with the hive conf since system property is set 89 | verify(conf, never()).get(AWS_GLUE_ENDPOINT); 90 | verify(conf, never()).get(AWS_REGION); 91 | verify(conf, never()).get(AWS_GLUE_CATALOG_SEPARATOR); 92 | } 93 | 94 | @Test 95 | public void testClientConstructionWithSessionCredentialsProviderFactory() throws Exception { 96 | System.setProperty("aws.region", "us-west-2"); 97 | conf.setStrings(SessionCredentialsProviderFactory.AWS_ACCESS_KEY_CONF_VAR, FAKE_ACCESS_KEY); 98 | conf.setStrings(SessionCredentialsProviderFactory.AWS_SECRET_KEY_CONF_VAR, FAKE_SECRET_KEY); 99 | conf.setStrings(SessionCredentialsProviderFactory.AWS_SESSION_TOKEN_CONF_VAR, FAKE_SESSION_TOKEN); 100 | 101 | conf.setStrings(AWS_CATALOG_CREDENTIALS_PROVIDER_FACTORY_CLASS, 102 | SessionCredentialsProviderFactory.class.getCanonicalName()); 103 | 104 | AWSGlue glueClient = glueClientFactory.newClient(); 105 | 106 | assertNotNull(glueClient); 107 | 108 | verify(conf, atLeastOnce()).get(SessionCredentialsProviderFactory.AWS_ACCESS_KEY_CONF_VAR); 109 | verify(conf, atLeastOnce()).get(SessionCredentialsProviderFactory.AWS_SECRET_KEY_CONF_VAR); 110 | verify(conf, atLeastOnce()).get(SessionCredentialsProviderFactory.AWS_SESSION_TOKEN_CONF_VAR); 111 | } 112 | 113 | @Test 114 | public void testCredentialsCreatedBySessionCredentialsProviderFactory() throws Exception { 115 | conf.setStrings(SessionCredentialsProviderFactory.AWS_ACCESS_KEY_CONF_VAR, FAKE_ACCESS_KEY); 116 | conf.setStrings(SessionCredentialsProviderFactory.AWS_SECRET_KEY_CONF_VAR, FAKE_SECRET_KEY); 117 | conf.setStrings(SessionCredentialsProviderFactory.AWS_SESSION_TOKEN_CONF_VAR, FAKE_SESSION_TOKEN); 118 | 119 | SessionCredentialsProviderFactory factory = new SessionCredentialsProviderFactory(); 120 | AWSCredentialsProvider provider = factory.buildAWSCredentialsProvider(conf); 121 | AWSCredentials credentials = provider.getCredentials(); 122 | 123 | assertThat(credentials, instanceOf(BasicSessionCredentials.class)); 124 | 125 | BasicSessionCredentials sessionCredentials = (BasicSessionCredentials) credentials; 126 | 127 | assertEquals(FAKE_ACCESS_KEY, sessionCredentials.getAWSAccessKeyId()); 128 | assertEquals(FAKE_SECRET_KEY, sessionCredentials.getAWSSecretKey()); 129 | assertEquals(FAKE_SESSION_TOKEN, sessionCredentials.getSessionToken()); 130 | } 131 | 132 | @Test(expected = IllegalArgumentException.class) 133 | public void testMissingAccessKeyWithSessionCredentialsProviderFactory() throws Exception { 134 | SessionCredentialsProviderFactory factory = new SessionCredentialsProviderFactory(); 135 | factory.buildAWSCredentialsProvider(conf); 136 | } 137 | 138 | @Test(expected = IllegalArgumentException.class) 139 | public void testMissingSecretKey() throws Exception { 140 | SessionCredentialsProviderFactory factory = new SessionCredentialsProviderFactory(); 141 | conf.setStrings(SessionCredentialsProviderFactory.AWS_ACCESS_KEY_CONF_VAR, FAKE_ACCESS_KEY); 142 | factory.buildAWSCredentialsProvider(conf); 143 | } 144 | 145 | @Test(expected = IllegalArgumentException.class) 146 | public void testMissingSessionTokenKey() throws Exception { 147 | SessionCredentialsProviderFactory factory = new SessionCredentialsProviderFactory(); 148 | conf.setStrings(SessionCredentialsProviderFactory.AWS_ACCESS_KEY_CONF_VAR, FAKE_ACCESS_KEY); 149 | conf.setStrings(SessionCredentialsProviderFactory.AWS_SECRET_KEY_CONF_VAR, FAKE_SECRET_KEY); 150 | factory.buildAWSCredentialsProvider(conf); 151 | } 152 | 153 | } 154 | -------------------------------------------------------------------------------- /aws-glue-datacatalog-client-common/src/test/java/com/amazonaws/glue/catalog/metastore/AWSGlueMetastoreCacheDecoratorTest.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws.glue.catalog.metastore; 2 | 3 | import com.amazonaws.services.glue.model.Database; 4 | import com.amazonaws.services.glue.model.Table; 5 | import com.google.common.cache.Cache; 6 | import org.apache.hadoop.hive.conf.HiveConf; 7 | import org.junit.Before; 8 | import org.junit.Test; 9 | 10 | import static org.junit.Assert.assertEquals; 11 | import static org.junit.Assert.assertNotNull; 12 | import static org.junit.Assert.assertNull; 13 | import static org.mockito.Mockito.doNothing; 14 | import static org.mockito.Mockito.mock; 15 | import static org.mockito.Mockito.spy; 16 | import static org.mockito.Mockito.times; 17 | import static org.mockito.Mockito.verify; 18 | import static org.mockito.Mockito.when; 19 | import static org.junit.Assert.assertTrue; 20 | 21 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_DB_CACHE_ENABLE; 22 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_TABLE_CACHE_ENABLE; 23 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_TABLE_CACHE_SIZE; 24 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_TABLE_CACHE_TTL_MINS; 25 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_DB_CACHE_SIZE; 26 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_DB_CACHE_TTL_MINS; 27 | 28 | public class AWSGlueMetastoreCacheDecoratorTest { 29 | 30 | private AWSGlueMetastore glueMetastore; 31 | private HiveConf hiveConf; 32 | 33 | private static final String DB_NAME = "db"; 34 | private static final String TABLE_NAME = "table"; 35 | private static final AWSGlueMetastoreCacheDecorator.TableIdentifier TABLE_IDENTIFIER = 36 | new AWSGlueMetastoreCacheDecorator.TableIdentifier(DB_NAME, TABLE_NAME); 37 | 38 | @Before 39 | public void setUp() { 40 | glueMetastore = mock(AWSGlueMetastore.class); 41 | hiveConf = spy(new HiveConf()); 42 | when(hiveConf.getBoolean(AWS_GLUE_TABLE_CACHE_ENABLE, false)).thenReturn(true); 43 | when(hiveConf.getBoolean(AWS_GLUE_DB_CACHE_ENABLE, false)).thenReturn(true); 44 | when(hiveConf.getInt(AWS_GLUE_TABLE_CACHE_SIZE, 0)).thenReturn(100); 45 | when(hiveConf.getInt(AWS_GLUE_TABLE_CACHE_TTL_MINS, 0)).thenReturn(100); 46 | when(hiveConf.getInt(AWS_GLUE_DB_CACHE_SIZE, 0)).thenReturn(100); 47 | when(hiveConf.getInt(AWS_GLUE_DB_CACHE_TTL_MINS, 0)).thenReturn(100); 48 | 49 | } 50 | 51 | @Test(expected = NullPointerException.class) 52 | public void testConstructorWithNullConf() { 53 | new AWSGlueMetastoreCacheDecorator(null, glueMetastore); 54 | } 55 | 56 | @Test(expected = IllegalArgumentException.class) 57 | public void testConstructorWithInvalidTableCacheSize() { 58 | when(hiveConf.getInt(AWS_GLUE_TABLE_CACHE_SIZE, 0)).thenReturn(0); 59 | new AWSGlueMetastoreCacheDecorator(hiveConf, glueMetastore); 60 | } 61 | 62 | @Test(expected = IllegalArgumentException.class) 63 | public void testConstructorWithInvalidTableCacheTtl() { 64 | when(hiveConf.getInt(AWS_GLUE_TABLE_CACHE_TTL_MINS, 0)).thenReturn(0); 65 | new AWSGlueMetastoreCacheDecorator(hiveConf, glueMetastore); 66 | } 67 | 68 | @Test(expected = IllegalArgumentException.class) 69 | public void testConstructorWithInvalidDbCacheSize() { 70 | when(hiveConf.getInt(AWS_GLUE_DB_CACHE_SIZE, 0)).thenReturn(0); 71 | new AWSGlueMetastoreCacheDecorator(hiveConf, glueMetastore); 72 | } 73 | 74 | @Test(expected = IllegalArgumentException.class) 75 | public void testConstructorWithInvalidDbCacheTtl() { 76 | when(hiveConf.getInt(AWS_GLUE_DB_CACHE_TTL_MINS, 0)).thenReturn(0); 77 | new AWSGlueMetastoreCacheDecorator(hiveConf, glueMetastore); 78 | } 79 | 80 | @Test 81 | public void testGetDatabaseWhenCacheDisabled() { 82 | //disable cache 83 | when(hiveConf.getBoolean(AWS_GLUE_DB_CACHE_ENABLE, false)).thenReturn(false); 84 | Database db = new Database(); 85 | AWSGlueMetastoreCacheDecorator cacheDecorator = 86 | new AWSGlueMetastoreCacheDecorator(hiveConf, glueMetastore); 87 | when(glueMetastore.getDatabase(DB_NAME)).thenReturn(db); 88 | assertEquals(db, cacheDecorator.getDatabase(DB_NAME)); 89 | assertNull(cacheDecorator.databaseCache); 90 | verify(glueMetastore, times(1)).getDatabase(DB_NAME); 91 | } 92 | 93 | @Test 94 | public void testGetDatabaseWhenCacheEnabledAndCacheMiss() { 95 | Database db = new Database(); 96 | AWSGlueMetastoreCacheDecorator cacheDecorator = 97 | new AWSGlueMetastoreCacheDecorator(hiveConf, glueMetastore); 98 | assertNotNull(cacheDecorator.databaseCache); 99 | Cache dbCache = mock(Cache.class); 100 | cacheDecorator.databaseCache = dbCache; 101 | 102 | when(dbCache.getIfPresent(DB_NAME)).thenReturn(null); 103 | when(glueMetastore.getDatabase(DB_NAME)).thenReturn(db); 104 | doNothing().when(dbCache).put(DB_NAME, db); 105 | 106 | assertEquals(db, cacheDecorator.getDatabase(DB_NAME)); 107 | 108 | verify(glueMetastore, times(1)).getDatabase(DB_NAME); 109 | verify(dbCache, times(1)).getIfPresent(DB_NAME); 110 | verify(dbCache, times(1)).put(DB_NAME, db); 111 | } 112 | 113 | @Test 114 | public void testGetDatabaseWhenCacheEnabledAndCacheHit() { 115 | Database db = new Database(); 116 | AWSGlueMetastoreCacheDecorator cacheDecorator = 117 | new AWSGlueMetastoreCacheDecorator(hiveConf, glueMetastore); 118 | assertNotNull(cacheDecorator.databaseCache); 119 | Cache dbCache = mock(Cache.class); 120 | cacheDecorator.databaseCache = dbCache; 121 | 122 | when(dbCache.getIfPresent(DB_NAME)).thenReturn(db); 123 | 124 | assertEquals(db, cacheDecorator.getDatabase(DB_NAME)); 125 | 126 | verify(dbCache, times(1)).getIfPresent(DB_NAME); 127 | } 128 | 129 | @Test 130 | public void testGetTableWhenCacheDisabled() { 131 | //disable cache 132 | when(hiveConf.getBoolean(AWS_GLUE_TABLE_CACHE_ENABLE, false)).thenReturn(false); 133 | Table table = new Table(); 134 | AWSGlueMetastoreCacheDecorator cacheDecorator = 135 | new AWSGlueMetastoreCacheDecorator(hiveConf, glueMetastore); 136 | when(glueMetastore.getTable(DB_NAME, TABLE_NAME)).thenReturn(table); 137 | assertEquals(table, cacheDecorator.getTable(DB_NAME, TABLE_NAME)); 138 | assertNull(cacheDecorator.tableCache); 139 | verify(glueMetastore, times(1)).getTable(DB_NAME, TABLE_NAME); 140 | } 141 | 142 | @Test 143 | public void testGetTableWhenCacheEnabledAndCacheMiss() { 144 | Table table = new Table(); 145 | AWSGlueMetastoreCacheDecorator cacheDecorator = 146 | new AWSGlueMetastoreCacheDecorator(hiveConf, glueMetastore); 147 | assertNotNull(cacheDecorator.tableCache); 148 | Cache tableCache = mock(Cache.class); 149 | cacheDecorator.tableCache = tableCache; 150 | 151 | when(tableCache.getIfPresent(TABLE_IDENTIFIER)).thenReturn(null); 152 | when(glueMetastore.getTable(DB_NAME, TABLE_NAME)).thenReturn(table); 153 | doNothing().when(tableCache).put(TABLE_IDENTIFIER, table); 154 | 155 | assertEquals(table, cacheDecorator.getTable(DB_NAME, TABLE_NAME)); 156 | 157 | verify(glueMetastore, times(1)).getTable(DB_NAME, TABLE_NAME); 158 | verify(tableCache, times(1)).getIfPresent(TABLE_IDENTIFIER); 159 | verify(tableCache, times(1)).put(TABLE_IDENTIFIER, table); 160 | } 161 | 162 | @Test 163 | public void testGetTableWhenCacheEnabledAndCacheHit() { 164 | Table table = new Table(); 165 | AWSGlueMetastoreCacheDecorator cacheDecorator = 166 | new AWSGlueMetastoreCacheDecorator(hiveConf, glueMetastore); 167 | assertNotNull(cacheDecorator.tableCache); 168 | Cache tableCache = mock(Cache.class); 169 | cacheDecorator.tableCache = tableCache; 170 | 171 | when(tableCache.getIfPresent(TABLE_IDENTIFIER)).thenReturn(table); 172 | 173 | assertEquals(table, cacheDecorator.getTable(DB_NAME, TABLE_NAME)); 174 | 175 | verify(tableCache, times(1)).getIfPresent(TABLE_IDENTIFIER); 176 | } 177 | 178 | } --------------------------------------------------------------------------------