├── NOTICE
├── .github
└── PULL_REQUEST_TEMPLATE.md
├── CODE_OF_CONDUCT.md
├── aws-glue-datacatalog-client-common
├── src
│ ├── main
│ │ └── java
│ │ │ └── com
│ │ │ └── amazonaws
│ │ │ └── glue
│ │ │ └── catalog
│ │ │ ├── metastore
│ │ │ ├── AWSCredentialsProviderFactory.java
│ │ │ ├── GlueClientFactory.java
│ │ │ ├── ExecutorServiceFactory.java
│ │ │ ├── DefaultAWSCredentialsProviderFactory.java
│ │ │ ├── DefaultExecutorServiceFactory.java
│ │ │ ├── AWSGlueMetastoreFactory.java
│ │ │ ├── SessionCredentialsProviderFactory.java
│ │ │ ├── AWSGlueMetastore.java
│ │ │ ├── AWSGlueClientFactory.java
│ │ │ ├── AWSGlueMetastoreCacheDecorator.java
│ │ │ └── AWSGlueMetastoreBaseDecorator.java
│ │ │ ├── exceptions
│ │ │ ├── LakeFormationException.java
│ │ │ └── InvalidPartitionNameException.java
│ │ │ ├── converters
│ │ │ ├── ConverterUtils.java
│ │ │ ├── CatalogToHiveConverterFactory.java
│ │ │ ├── CatalogToHiveConverter.java
│ │ │ ├── Hive3CatalogToHiveConverter.java
│ │ │ ├── GlueInputConverter.java
│ │ │ └── PartitionNameParser.java
│ │ │ └── util
│ │ │ ├── LoggingHelper.java
│ │ │ ├── PartitionKey.java
│ │ │ ├── PartitionUtils.java
│ │ │ ├── AWSGlueConfig.java
│ │ │ ├── HiveTableValidator.java
│ │ │ ├── MetastoreClientUtils.java
│ │ │ ├── BatchDeletePartitionsHelper.java
│ │ │ └── BatchCreatePartitionsHelper.java
│ └── test
│ │ └── java
│ │ └── com
│ │ └── amazonaws
│ │ └── glue
│ │ └── catalog
│ │ ├── converters
│ │ ├── ConverterUtilsTest.java
│ │ ├── CatalogToHiveConverterFactoryTest.java
│ │ ├── Hive3CatalogToHiveConverterTest.java
│ │ └── GlueInputConverterTest.java
│ │ ├── util
│ │ ├── LoggingHelperTest.java
│ │ ├── PartitionUtilsTest.java
│ │ ├── GlueTestClientFactory.java
│ │ ├── ExprBuilder.java
│ │ ├── HiveTableValidatorTest.java
│ │ ├── PartitionKeyTest.java
│ │ └── MetastoreClientUtilsTest.java
│ │ └── metastore
│ │ ├── AWSGlueMetastoreFactoryTest.java
│ │ ├── AWSGlueClientFactoryTest.java
│ │ └── AWSGlueMetastoreCacheDecoratorTest.java
└── pom.xml
├── .gitignore
├── shims
├── pom.xml
├── hive3-shims
│ ├── pom.xml
│ └── src
│ │ └── main
│ │ └── java
│ │ └── com.amazonaws.glue.shims
│ │ └── AwsGlueHive3Shims.java
├── loader
│ ├── src
│ │ └── main
│ │ │ └── java
│ │ │ └── com
│ │ │ └── amazonaws
│ │ │ └── glue
│ │ │ └── shims
│ │ │ └── ShimsLoader.java
│ ├── test
│ │ └── main
│ │ │ └── java
│ │ │ └── com
│ │ │ └── amazonaws
│ │ │ └── glue
│ │ │ └── shims
│ │ │ └── ShimsLoaderTest.java
│ └── pom.xml
├── spark-hive-shims
│ ├── pom.xml
│ └── src
│ │ └── main
│ │ └── java
│ │ └── com
│ │ └── amazonaws
│ │ └── glue
│ │ └── shims
│ │ └── AwsGlueSparkHiveShims.java
└── common
│ ├── pom.xml
│ └── src
│ └── main
│ └── java
│ └── com
│ └── amazonaws
│ └── glue
│ └── shims
│ └── AwsGlueHiveShims.java
├── aws-glue-datacatalog-hive3-client
├── src
│ ├── main
│ │ └── java
│ │ │ └── com
│ │ │ └── amazonaws
│ │ │ └── glue
│ │ │ └── catalog
│ │ │ └── metastore
│ │ │ └── AWSGlueDataCatalogHiveClientFactory.java
│ └── test
│ │ └── java
│ │ └── com
│ │ └── amazonaws
│ │ └── glue
│ │ └── catalog
│ │ └── metastore
│ │ └── HiveAWSCatalogMetastoreClientTest.java
└── pom.xml
├── aws-glue-datacatalog-spark-client
├── src
│ ├── main
│ │ └── java
│ │ │ └── com
│ │ │ └── amazonaws
│ │ │ └── glue
│ │ │ └── catalog
│ │ │ └── metastore
│ │ │ └── AWSGlueDataCatalogHiveClientFactory.java
│ └── test
│ │ └── java
│ │ └── com
│ │ └── amazonaws
│ │ └── glue
│ │ └── catalog
│ │ └── metastore
│ │ ├── SparkCatalogMetastoreClientTest.java
│ │ └── integrationtest
│ │ ├── MultipleCatalogDatabaseIntegrationTest.java
│ │ ├── MultipleCatalogIntegrationTestBase.java
│ │ ├── MultipleCatalogTableIntegrationTest.java
│ │ ├── MultipleCatalogUserDefinedFunctionIntegrationTest.java
│ │ └── MetastoreClientDatabaseIntegrationTest.java
└── pom.xml
├── dev-support
└── check_style.xml
├── CONTRIBUTING.md
├── pom.xml
└── README.md
/NOTICE:
--------------------------------------------------------------------------------
1 | AWS Glue Catalog Client For Apache Hive Metastore
2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
3 |
--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | *Issue #, if available:*
2 |
3 | *Description of changes:*
4 |
5 |
6 | By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license.
7 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.
5 |
--------------------------------------------------------------------------------
/aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/metastore/AWSCredentialsProviderFactory.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.catalog.metastore;
2 |
3 | import org.apache.hadoop.conf.Configuration;
4 |
5 | import com.amazonaws.auth.AWSCredentialsProvider;
6 |
7 | public interface AWSCredentialsProviderFactory {
8 |
9 | AWSCredentialsProvider buildAWSCredentialsProvider(Configuration conf);
10 | }
11 |
--------------------------------------------------------------------------------
/aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/exceptions/LakeFormationException.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.catalog.exceptions;
2 |
3 | public class LakeFormationException extends RuntimeException {
4 |
5 | public LakeFormationException(String message) {
6 | super(message);
7 | }
8 |
9 | public LakeFormationException(String message, Throwable cause) {
10 | super(message, cause);
11 | }
12 | }
13 |
--------------------------------------------------------------------------------
/aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/metastore/GlueClientFactory.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.catalog.metastore;
2 |
3 | import com.amazonaws.services.glue.AWSGlue;
4 | import org.apache.hadoop.hive.metastore.api.MetaException;
5 |
6 | /***
7 | * Interface for creating Glue AWS Client
8 | */
9 | public interface GlueClientFactory {
10 |
11 | AWSGlue newClient() throws MetaException;
12 |
13 | }
14 |
--------------------------------------------------------------------------------
/aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/metastore/ExecutorServiceFactory.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.catalog.metastore;
2 |
3 | import org.apache.hadoop.conf.Configuration;
4 |
5 | import java.util.concurrent.ExecutorService;
6 |
7 | /*
8 | * Interface for creating an ExecutorService
9 | */
10 | public interface ExecutorServiceFactory {
11 | public ExecutorService getExecutorService(Configuration conf);
12 | }
13 |
--------------------------------------------------------------------------------
/aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/exceptions/InvalidPartitionNameException.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.catalog.exceptions;
2 |
3 | public class InvalidPartitionNameException extends RuntimeException {
4 |
5 | public InvalidPartitionNameException(String message) {
6 | super(message);
7 | }
8 |
9 | public InvalidPartitionNameException(String message, Throwable cause) {
10 | super(message, cause);
11 | }
12 | }
13 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Compiled class file
2 | *.class
3 |
4 | # Log file
5 | *.log
6 | *.iml
7 |
8 | # Package Files #
9 | *.jar
10 | *.war
11 | *.nar
12 | *.ear
13 | *.zip
14 | *.tar.gz
15 | *.rar
16 | /hive/
17 | /aws-glue-datacatalog-spark-client/target/
18 | /aws-glue-datacatalog-hive3-client/target/
19 | /aws-glue-datacatalog-client-common/target/
20 | /shims/common/target/
21 | /shims/hive3-shims/target/
22 | /shims/loader/target/
23 | /shims/spark-hive-shims/target/
24 | /.idea/
25 | /shims/target/
26 | /target/
--------------------------------------------------------------------------------
/aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/metastore/DefaultAWSCredentialsProviderFactory.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.catalog.metastore;
2 |
3 | import org.apache.hadoop.conf.Configuration;
4 |
5 | import com.amazonaws.auth.AWSCredentialsProvider;
6 | import com.amazonaws.auth.DefaultAWSCredentialsProviderChain;
7 |
8 | public class DefaultAWSCredentialsProviderFactory implements
9 | AWSCredentialsProviderFactory {
10 |
11 | @Override
12 | public AWSCredentialsProvider buildAWSCredentialsProvider(Configuration conf) {
13 | return new DefaultAWSCredentialsProviderChain();
14 | }
15 |
16 | }
17 |
--------------------------------------------------------------------------------
/aws-glue-datacatalog-client-common/src/test/java/com/amazonaws/glue/catalog/converters/ConverterUtilsTest.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.catalog.converters;
2 |
3 | import com.amazonaws.glue.catalog.util.TestObjects;
4 | import com.amazonaws.services.glue.model.Table;
5 |
6 | import org.junit.Test;
7 |
8 | import static org.junit.Assert.assertEquals;
9 |
10 | public class ConverterUtilsTest {
11 |
12 | @Test
13 | public void testCoralTableToStringConversion() {
14 | Table table = TestObjects.getTestTable();
15 | assertEquals(table, ConverterUtils.stringToCatalogTable(ConverterUtils.catalogTableToString(table)));
16 | }
17 |
18 | }
19 |
--------------------------------------------------------------------------------
/shims/pom.xml:
--------------------------------------------------------------------------------
1 |
5 |
6 | com.amazonaws.glue
7 | aws-glue-datacatalog-hive-client
8 | 3.4.0-SNAPSHOT
9 |
10 | 4.0.0
11 | shims
12 | pom
13 |
14 | AwsGlueDataCatalogShims
15 | Shims for Hive 1.2.x/3x compatibility
16 |
17 |
18 | common
19 | spark-hive-shims
20 | hive3-shims
21 | loader
22 |
23 |
24 |
--------------------------------------------------------------------------------
/aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/metastore/DefaultExecutorServiceFactory.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.catalog.metastore;
2 |
3 | import com.google.common.util.concurrent.ThreadFactoryBuilder;
4 | import org.apache.hadoop.conf.Configuration;
5 |
6 | import java.util.concurrent.ExecutorService;
7 | import java.util.concurrent.Executors;
8 |
9 | public class DefaultExecutorServiceFactory implements ExecutorServiceFactory {
10 | private static final int NUM_EXECUTOR_THREADS = 5;
11 |
12 | private static final ExecutorService GLUE_METASTORE_DELEGATE_THREAD_POOL = Executors.newFixedThreadPool(
13 | NUM_EXECUTOR_THREADS, new ThreadFactoryBuilder()
14 | .setNameFormat(GlueMetastoreClientDelegate.GLUE_METASTORE_DELEGATE_THREADPOOL_NAME_FORMAT)
15 | .setDaemon(true).build()
16 | );
17 |
18 | @Override
19 | public ExecutorService getExecutorService(Configuration conf) {
20 | return GLUE_METASTORE_DELEGATE_THREAD_POOL;
21 | }
22 | }
23 |
--------------------------------------------------------------------------------
/aws-glue-datacatalog-hive3-client/src/main/java/com/amazonaws/glue/catalog/metastore/AWSGlueDataCatalogHiveClientFactory.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.catalog.metastore;
2 |
3 | import org.apache.hadoop.hive.conf.HiveConf;
4 | import org.apache.hadoop.hive.metastore.HiveMetaHookLoader;
5 | import org.apache.hadoop.hive.metastore.IMetaStoreClient;
6 | import org.apache.hadoop.hive.metastore.api.MetaException;
7 | import org.apache.hadoop.hive.ql.metadata.HiveMetaStoreClientFactory;
8 |
9 | import java.util.concurrent.ConcurrentHashMap;
10 |
11 | public class AWSGlueDataCatalogHiveClientFactory implements HiveMetaStoreClientFactory {
12 |
13 | @Override
14 | public IMetaStoreClient createMetaStoreClient(
15 | HiveConf conf, HiveMetaHookLoader hookLoader,
16 | boolean allowEmbedded,
17 | ConcurrentHashMap concurrentHashMap
18 | ) throws MetaException {
19 | AWSCatalogMetastoreClient client = new AWSCatalogMetastoreClient(conf, hookLoader);
20 | return client;
21 | }
22 |
23 | }
24 |
--------------------------------------------------------------------------------
/aws-glue-datacatalog-spark-client/src/main/java/com/amazonaws/glue/catalog/metastore/AWSGlueDataCatalogHiveClientFactory.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.catalog.metastore;
2 |
3 | import org.apache.hadoop.hive.conf.HiveConf;
4 | import org.apache.hadoop.hive.metastore.HiveMetaHookLoader;
5 | import org.apache.hadoop.hive.metastore.IMetaStoreClient;
6 | import org.apache.hadoop.hive.metastore.api.MetaException;
7 | import org.apache.hadoop.hive.ql.metadata.HiveMetaStoreClientFactory;
8 |
9 | import java.util.concurrent.ConcurrentHashMap;
10 |
11 | public class AWSGlueDataCatalogHiveClientFactory implements HiveMetaStoreClientFactory {
12 |
13 | @Override
14 | public IMetaStoreClient createMetaStoreClient(
15 | HiveConf conf,
16 | HiveMetaHookLoader hookLoader,
17 | boolean allowEmbedded,
18 | ConcurrentHashMap concurrentHashMap
19 | ) throws MetaException {
20 | AWSCatalogMetastoreClient client = new AWSCatalogMetastoreClient(conf, hookLoader);
21 | return client;
22 | }
23 |
24 | }
25 |
--------------------------------------------------------------------------------
/aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/converters/ConverterUtils.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.catalog.converters;
2 |
3 | import java.util.Date;
4 | import java.util.concurrent.TimeUnit;
5 |
6 | import com.amazonaws.services.glue.model.Table;
7 |
8 | import com.google.gson.Gson;
9 |
10 | public class ConverterUtils {
11 |
12 | private static final Gson gson = new Gson();
13 |
14 | public static String catalogTableToString(final Table table) {
15 | return gson.toJson(table);
16 | }
17 |
18 | public static Table stringToCatalogTable(final String input) {
19 | return gson.fromJson(input, Table.class);
20 | }
21 |
22 | public static org.apache.hadoop.hive.metastore.api.Date dateToHiveDate(Date date) {
23 | return new org.apache.hadoop.hive.metastore.api.Date(TimeUnit.MILLISECONDS.toDays(date.getTime()));
24 | }
25 |
26 | public static Date hiveDatetoDate(org.apache.hadoop.hive.metastore.api.Date hiveDate) {
27 | return new Date(TimeUnit.DAYS.toMillis(hiveDate.getDaysSinceEpoch()));
28 | }
29 | }
30 |
--------------------------------------------------------------------------------
/aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/converters/CatalogToHiveConverterFactory.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.catalog.converters;
2 |
3 | import com.google.common.annotations.VisibleForTesting;
4 | import org.apache.hive.common.util.HiveVersionInfo;
5 |
6 | public class CatalogToHiveConverterFactory {
7 |
8 | private static final String HIVE_3_VERSION = "3.";
9 |
10 | private static CatalogToHiveConverter catalogToHiveConverter;
11 |
12 | public static CatalogToHiveConverter getCatalogToHiveConverter() {
13 | if (catalogToHiveConverter == null) {
14 | catalogToHiveConverter = loadConverter();
15 | }
16 | return catalogToHiveConverter;
17 | }
18 |
19 | private static CatalogToHiveConverter loadConverter() {
20 | String hiveVersion = HiveVersionInfo.getShortVersion();
21 |
22 | if (hiveVersion.startsWith(HIVE_3_VERSION)) {
23 | return new Hive3CatalogToHiveConverter();
24 | } else {
25 | return new BaseCatalogToHiveConverter();
26 | }
27 | }
28 |
29 | @VisibleForTesting
30 | static void clearConverter() {
31 | catalogToHiveConverter = null;
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/util/LoggingHelper.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.catalog.util;
2 |
3 | import java.util.Collection;
4 |
5 | public class LoggingHelper {
6 |
7 | private static final int MAX_LOG_STRING_LEN = 2000;
8 |
9 | private LoggingHelper() {
10 | }
11 |
12 | public static String concatCollectionToStringForLogging(Collection collection, String delimiter) {
13 | if (collection == null) {
14 | return "";
15 | }
16 | if (delimiter == null) {
17 | delimiter = ",";
18 | }
19 | StringBuilder bldr = new StringBuilder();
20 | int totalLen = 0;
21 | int delimiterSize = delimiter.length();
22 | for (String str : collection) {
23 | if (totalLen > MAX_LOG_STRING_LEN) break;
24 | if (str.length() + totalLen > MAX_LOG_STRING_LEN) {
25 | bldr.append(str.subSequence(0, (MAX_LOG_STRING_LEN-totalLen)));
26 | break;
27 | } else {
28 | bldr.append(str);
29 | bldr.append(delimiter);
30 | totalLen += str.length() + delimiterSize;
31 | }
32 | }
33 | return bldr.toString();
34 | }
35 |
36 | }
37 |
--------------------------------------------------------------------------------
/aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/util/PartitionKey.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.catalog.util;
2 |
3 | import com.amazonaws.services.glue.model.Partition;
4 |
5 | import java.util.List;
6 |
7 | public class PartitionKey {
8 |
9 | private final List partitionValues;
10 | private final int hashCode;
11 |
12 | public PartitionKey(Partition partition) {
13 | this(partition.getValues());
14 | }
15 |
16 | public PartitionKey(List partitionValues) {
17 | if (partitionValues == null) {
18 | throw new IllegalArgumentException("Partition values cannot be null");
19 | }
20 | this.partitionValues = partitionValues;
21 | this.hashCode = partitionValues.hashCode();
22 | }
23 |
24 | @Override
25 | public boolean equals(Object other) {
26 | return this == other || (other != null && other instanceof PartitionKey
27 | && this.partitionValues.equals(((PartitionKey) other).partitionValues));
28 | }
29 |
30 | @Override
31 | public int hashCode() {
32 | return hashCode;
33 | }
34 |
35 | List getValues() {
36 | return partitionValues;
37 | }
38 |
39 | }
40 |
--------------------------------------------------------------------------------
/shims/hive3-shims/pom.xml:
--------------------------------------------------------------------------------
1 |
5 |
6 | com.amazonaws.glue
7 | shims
8 | 3.4.0-SNAPSHOT
9 | ../
10 |
11 | 4.0.0
12 | hive3-shims
13 | jar
14 |
15 | Hive3Shims
16 | Shims for Hive-3.x compatibility
17 |
18 |
19 |
20 | org.apache.hive
21 | hive-exec
22 | ${hive3.version}
23 | provided
24 |
25 |
26 | com.amazonaws.glue
27 | shims-common
28 | ${project.version}
29 |
30 |
31 |
32 |
--------------------------------------------------------------------------------
/aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/metastore/AWSGlueMetastoreFactory.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.catalog.metastore;
2 |
3 | import com.amazonaws.services.glue.AWSGlue;
4 | import org.apache.hadoop.conf.Configuration;
5 | import org.apache.hadoop.hive.metastore.api.MetaException;
6 |
7 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_DB_CACHE_ENABLE;
8 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_TABLE_CACHE_ENABLE;
9 |
10 | public class AWSGlueMetastoreFactory {
11 |
12 | public AWSGlueMetastore newMetastore(Configuration conf) throws MetaException {
13 | AWSGlue glueClient = new AWSGlueClientFactory(conf).newClient();
14 | AWSGlueMetastore defaultMetastore = new DefaultAWSGlueMetastore(conf, glueClient);
15 | if(isCacheEnabled(conf)) {
16 | return new AWSGlueMetastoreCacheDecorator(conf, defaultMetastore);
17 | }
18 | return defaultMetastore;
19 | }
20 |
21 | private boolean isCacheEnabled(Configuration conf) {
22 | boolean databaseCacheEnabled = conf.getBoolean(AWS_GLUE_DB_CACHE_ENABLE, false);
23 | boolean tableCacheEnabled = conf.getBoolean(AWS_GLUE_TABLE_CACHE_ENABLE, false);
24 | return (databaseCacheEnabled || tableCacheEnabled);
25 | }
26 | }
--------------------------------------------------------------------------------
/shims/loader/src/main/java/com/amazonaws/glue/shims/ShimsLoader.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.shims;
2 |
3 | import com.google.common.annotations.VisibleForTesting;
4 | import org.apache.hive.common.util.HiveVersionInfo;
5 |
6 | public final class ShimsLoader {
7 |
8 | private static AwsGlueHiveShims hiveShims;
9 |
10 | public static synchronized AwsGlueHiveShims getHiveShims() {
11 | if (hiveShims == null) {
12 | hiveShims = loadHiveShims();
13 | }
14 | return hiveShims;
15 | }
16 |
17 | private static AwsGlueHiveShims loadHiveShims() {
18 | String hiveVersion = HiveVersionInfo.getShortVersion();
19 |
20 | try {
21 | if (AwsGlueSparkHiveShims.supportsVersion(hiveVersion)) {
22 | return AwsGlueSparkHiveShims.class.newInstance();
23 | } else if (AwsGlueHive3Shims.supportsVersion(hiveVersion)) {
24 | return AwsGlueHive3Shims.class.newInstance();
25 | }
26 | } catch (InstantiationException | IllegalAccessException e) {
27 | throw new RuntimeException("unable to get instance of Hive shim class for " + hiveVersion);
28 | }
29 |
30 |
31 | throw new RuntimeException("Shim class for Hive version " + hiveVersion + " does not exist");
32 | }
33 |
34 | @VisibleForTesting
35 | static synchronized void clearShimClass() {
36 | hiveShims = null;
37 | }
38 |
39 | }
40 |
--------------------------------------------------------------------------------
/shims/spark-hive-shims/pom.xml:
--------------------------------------------------------------------------------
1 |
5 |
6 | com.amazonaws.glue
7 | shims
8 | 3.4.0-SNAPSHOT
9 | ../
10 |
11 | 4.0.0
12 | spark-hive-shims
13 | jar
14 |
15 | spark-hive-shims
16 |
17 |
18 |
19 | org.apache.hive
20 | hive-exec
21 | ${spark-hive.version}
22 | provided
23 |
24 |
25 | org.apache.hive
26 | hive-metastore
27 | ${spark-hive.version}
28 | provided
29 |
30 |
31 | com.amazonaws.glue
32 | shims-common
33 | ${project.version}
34 |
35 |
36 |
37 |
--------------------------------------------------------------------------------
/shims/common/pom.xml:
--------------------------------------------------------------------------------
1 |
5 |
6 | com.amazonaws.glue
7 | shims
8 | 3.4.0-SNAPSHOT
9 | ../
10 |
11 | 4.0.0
12 | shims-common
13 | jar
14 |
15 | ShimsCommon
16 | Common shim interface
17 |
18 |
19 |
20 | org.apache.hive
21 | hive-exec
22 | ${hive3.version}
23 | provided
24 |
25 |
26 | org.apache.hadoop
27 | hadoop-common
28 | ${hadoop.version}
29 | provided
30 |
31 |
32 | org.apache.hive
33 | hive-metastore
34 | ${hive3.version}
35 | provided
36 |
37 |
38 |
39 |
40 |
--------------------------------------------------------------------------------
/aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/util/PartitionUtils.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.catalog.util;
2 |
3 | import com.amazonaws.services.glue.model.EntityNotFoundException;
4 | import com.amazonaws.services.glue.model.InvalidInputException;
5 | import com.amazonaws.services.glue.model.Partition;
6 | import com.amazonaws.services.glue.model.PartitionValueList;
7 | import com.google.common.collect.Lists;
8 | import com.google.common.collect.Maps;
9 |
10 | import java.util.List;
11 | import java.util.Map;
12 |
13 | public final class PartitionUtils {
14 |
15 | public static Map buildPartitionMap(final List partitions) {
16 | Map partitionValuesMap = Maps.newHashMap();
17 | for (Partition partition : partitions) {
18 | partitionValuesMap.put(new PartitionKey(partition), partition);
19 | }
20 | return partitionValuesMap;
21 | }
22 |
23 | public static List getPartitionValuesList(final Map partitionMap) {
24 | List partitionValuesList = Lists.newArrayList();
25 | for (Map.Entry entry : partitionMap.entrySet()) {
26 | partitionValuesList.add(new PartitionValueList().withValues(entry.getValue().getValues()));
27 | }
28 | return partitionValuesList;
29 | }
30 |
31 | public static boolean isInvalidUserInputException(Exception e) {
32 | // exceptions caused by invalid requests, in which case we know all partitions creation failed
33 | return e instanceof EntityNotFoundException || e instanceof InvalidInputException;
34 | }
35 |
36 | }
37 |
--------------------------------------------------------------------------------
/shims/common/src/main/java/com/amazonaws/glue/shims/AwsGlueHiveShims.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.shims;
2 |
3 | import org.apache.hadoop.conf.Configuration;
4 | import org.apache.hadoop.fs.Path;
5 | import org.apache.hadoop.hive.metastore.api.Database;
6 | import org.apache.hadoop.hive.metastore.api.EnvironmentContext;
7 | import org.apache.hadoop.hive.metastore.api.FieldSchema;
8 | import org.apache.hadoop.hive.metastore.api.MetaException;
9 | import org.apache.hadoop.hive.metastore.api.Partition;
10 | import org.apache.hadoop.hive.metastore.api.Table;
11 | import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
12 | import org.apache.hadoop.hive.metastore.Warehouse;
13 |
14 | import java.util.List;
15 |
16 | public interface AwsGlueHiveShims {
17 |
18 | ExprNodeGenericFuncDesc getDeserializeExpression(byte[] exprBytes);
19 |
20 | byte[] getSerializeExpression(ExprNodeGenericFuncDesc expr);
21 |
22 | Path getDefaultTablePath(Database db, String tableName, Warehouse warehouse)
23 | throws MetaException;
24 |
25 | boolean deleteDir(Warehouse wh, Path path, boolean recursive, boolean ifPurge) throws MetaException;
26 |
27 | boolean mkdirs(Warehouse wh, Path path) throws MetaException;
28 |
29 | boolean validateTableName(String name, Configuration conf);
30 |
31 | boolean requireCalStats(Configuration conf, Partition oldPart, Partition newPart, Table tbl, EnvironmentContext environmentContext);
32 |
33 | boolean updateTableStatsFast(Database db, Table tbl, Warehouse wh, boolean madeDir, boolean forceRecompute, EnvironmentContext environmentContext)
34 | throws MetaException;
35 |
36 | String validateTblColumns(List cols);
37 | }
38 |
--------------------------------------------------------------------------------
/aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/metastore/SessionCredentialsProviderFactory.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.catalog.metastore;
2 |
3 | import com.amazonaws.auth.AWSCredentialsProvider;
4 | import com.amazonaws.auth.AWSSessionCredentials;
5 | import com.amazonaws.auth.BasicSessionCredentials;
6 | import com.amazonaws.internal.StaticCredentialsProvider;
7 |
8 | import org.apache.hadoop.conf.Configuration;
9 |
10 | import static com.google.common.base.Preconditions.checkArgument;
11 |
12 | public class SessionCredentialsProviderFactory implements AWSCredentialsProviderFactory {
13 |
14 | public final static String AWS_ACCESS_KEY_CONF_VAR = "hive.aws_session_access_id";
15 | public final static String AWS_SECRET_KEY_CONF_VAR = "hive.aws_session_secret_key";
16 | public final static String AWS_SESSION_TOKEN_CONF_VAR = "hive.aws_session_token";
17 |
18 | @Override
19 | public AWSCredentialsProvider buildAWSCredentialsProvider(Configuration conf) {
20 |
21 | checkArgument(conf != null, "conf cannot be null.");
22 |
23 | String accessKey = conf.get(AWS_ACCESS_KEY_CONF_VAR);
24 | String secretKey = conf.get(AWS_SECRET_KEY_CONF_VAR);
25 | String sessionToken = conf.get(AWS_SESSION_TOKEN_CONF_VAR);
26 |
27 | checkArgument(accessKey != null, AWS_ACCESS_KEY_CONF_VAR + " must be set.");
28 | checkArgument(secretKey != null, AWS_SECRET_KEY_CONF_VAR + " must be set.");
29 | checkArgument(sessionToken != null, AWS_SESSION_TOKEN_CONF_VAR + " must be set.");
30 |
31 | AWSSessionCredentials credentials = new BasicSessionCredentials(accessKey, secretKey, sessionToken);
32 |
33 | return new StaticCredentialsProvider(credentials);
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/converters/CatalogToHiveConverter.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.catalog.converters;
2 |
3 | import com.amazonaws.services.glue.model.ErrorDetail;
4 | import com.amazonaws.services.glue.model.ColumnStatistics;
5 | import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
6 | import org.apache.hadoop.hive.metastore.api.Database;
7 | import org.apache.hadoop.hive.metastore.api.FieldSchema;
8 | import org.apache.hadoop.hive.metastore.api.Function;
9 | import org.apache.hadoop.hive.metastore.api.Partition;
10 | import org.apache.hadoop.hive.metastore.api.Table;
11 | import org.apache.hadoop.hive.metastore.api.TableMeta;
12 | import org.apache.thrift.TException;
13 |
14 | import java.util.List;
15 |
16 | public interface CatalogToHiveConverter {
17 |
18 | TException wrapInHiveException(Throwable e);
19 |
20 | TException errorDetailToHiveException(ErrorDetail errorDetail);
21 |
22 | Database convertDatabase(com.amazonaws.services.glue.model.Database catalogDatabase);
23 |
24 | List convertFieldSchemaList(List catalogFieldSchemaList);
25 |
26 | Table convertTable(com.amazonaws.services.glue.model.Table catalogTable, String dbname);
27 |
28 | TableMeta convertTableMeta(com.amazonaws.services.glue.model.Table catalogTable, String dbName);
29 |
30 | Partition convertPartition(com.amazonaws.services.glue.model.Partition src);
31 |
32 | List convertPartitions(List src);
33 |
34 | Function convertFunction(String dbName, com.amazonaws.services.glue.model.UserDefinedFunction catalogFunction);
35 |
36 | List convertColumnStatisticsList(List catatlogColumnStatisticsList);
37 | }
38 |
--------------------------------------------------------------------------------
/aws-glue-datacatalog-client-common/src/test/java/com/amazonaws/glue/catalog/converters/CatalogToHiveConverterFactoryTest.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.catalog.converters;
2 |
3 | import org.apache.hive.common.util.HiveVersionInfo;
4 | import org.junit.After;
5 | import org.junit.Before;
6 | import org.junit.Test;
7 | import org.junit.runner.RunWith;
8 | import org.powermock.core.classloader.annotations.PowerMockIgnore;
9 | import org.powermock.core.classloader.annotations.PrepareForTest;
10 | import org.powermock.modules.junit4.PowerMockRunner;
11 |
12 | import static org.junit.Assert.assertTrue;
13 | import static org.powermock.api.mockito.PowerMockito.mockStatic;
14 | import static org.powermock.api.mockito.PowerMockito.when;
15 |
16 | @PrepareForTest(HiveVersionInfo.class)
17 | @PowerMockIgnore("javax.management.*")
18 | @RunWith(PowerMockRunner.class)
19 | public class CatalogToHiveConverterFactoryTest {
20 |
21 | private static final String HIVE_1_2_VERSION = "1.2.1";
22 | private static final String HIVE_3_VERSION = "3.1.3";
23 |
24 | @Before
25 | public void setup() throws ClassNotFoundException {
26 | mockStatic(HiveVersionInfo.class);
27 | }
28 |
29 | @After
30 | public void reset() {
31 | CatalogToHiveConverterFactory.clearConverter();
32 | }
33 |
34 | @Test
35 | public void testGetBaseCatalogToHiveConverter() {
36 | when(HiveVersionInfo.getShortVersion()).thenReturn(HIVE_1_2_VERSION);
37 | CatalogToHiveConverter catalogToHiveConverter = CatalogToHiveConverterFactory.getCatalogToHiveConverter();
38 | assertTrue(BaseCatalogToHiveConverter.class.isInstance(catalogToHiveConverter));
39 | }
40 |
41 | @Test
42 | public void testGetHive3CatalogToHiveConverter() {
43 | when(HiveVersionInfo.getShortVersion()).thenReturn(HIVE_3_VERSION);
44 | CatalogToHiveConverter catalogToHiveConverter = CatalogToHiveConverterFactory.getCatalogToHiveConverter();
45 | assertTrue(Hive3CatalogToHiveConverter.class.isInstance(catalogToHiveConverter));
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/util/AWSGlueConfig.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.catalog.util;
2 |
3 | import com.amazonaws.ClientConfiguration;
4 |
5 | public final class AWSGlueConfig {
6 |
7 | private AWSGlueConfig() {}
8 |
9 | public static final String AWS_GLUE_ENDPOINT = "aws.glue.endpoint";
10 | public static final String AWS_REGION = "aws.region";
11 | public static final String AWS_CATALOG_CREDENTIALS_PROVIDER_FACTORY_CLASS
12 | = "aws.catalog.credentials.provider.factory.class";
13 |
14 | public static final String AWS_GLUE_MAX_RETRY = "aws.glue.max-error-retries";
15 | public static final int DEFAULT_MAX_RETRY = 5;
16 |
17 | public static final String AWS_GLUE_MAX_CONNECTIONS = "aws.glue.max-connections";
18 | public static final int DEFAULT_MAX_CONNECTIONS = ClientConfiguration.DEFAULT_MAX_CONNECTIONS;
19 |
20 | public static final String AWS_GLUE_CONNECTION_TIMEOUT = "aws.glue.connection-timeout";
21 | public static final int DEFAULT_CONNECTION_TIMEOUT = ClientConfiguration.DEFAULT_CONNECTION_TIMEOUT;
22 |
23 | public static final String AWS_GLUE_SOCKET_TIMEOUT = "aws.glue.socket-timeout";
24 | public static final int DEFAULT_SOCKET_TIMEOUT = ClientConfiguration.DEFAULT_SOCKET_TIMEOUT;
25 |
26 | public static final String AWS_GLUE_CATALOG_SEPARATOR = "aws.glue.catalog.separator";
27 |
28 | public static final String AWS_GLUE_DISABLE_UDF = "aws.glue.disable-udf";
29 |
30 |
31 | public static final String AWS_GLUE_DB_CACHE_ENABLE = "aws.glue.cache.db.enable";
32 | public static final String AWS_GLUE_DB_CACHE_SIZE = "aws.glue.cache.db.size";
33 | public static final String AWS_GLUE_DB_CACHE_TTL_MINS = "aws.glue.cache.db.ttl-mins";
34 |
35 | public static final String AWS_GLUE_TABLE_CACHE_ENABLE = "aws.glue.cache.table.enable";
36 | public static final String AWS_GLUE_TABLE_CACHE_SIZE = "aws.glue.cache.table.size";
37 | public static final String AWS_GLUE_TABLE_CACHE_TTL_MINS = "aws.glue.cache.table.ttl-mins";
38 | }
--------------------------------------------------------------------------------
/aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/converters/Hive3CatalogToHiveConverter.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.catalog.converters;
2 |
3 | import org.apache.hadoop.hive.metastore.api.Database;
4 | import org.apache.hadoop.hive.metastore.api.Function;
5 | import org.apache.hadoop.hive.metastore.api.Partition;
6 | import org.apache.hadoop.hive.metastore.api.Table;
7 | import org.apache.hadoop.hive.metastore.api.TableMeta;
8 |
9 | import static org.apache.hadoop.hive.metastore.Warehouse.DEFAULT_CATALOG_NAME;
10 |
11 | public class Hive3CatalogToHiveConverter extends BaseCatalogToHiveConverter {
12 |
13 | @Override
14 | public Database convertDatabase(com.amazonaws.services.glue.model.Database catalogDatabase) {
15 | Database hiveDatabase = super.convertDatabase(catalogDatabase);
16 | hiveDatabase.setCatalogName(DEFAULT_CATALOG_NAME);
17 | return hiveDatabase;
18 | }
19 |
20 | @Override
21 | public Table convertTable(com.amazonaws.services.glue.model.Table catalogTable, String dbname) {
22 | Table hiveTable = super.convertTable(catalogTable, dbname);
23 | hiveTable.setCatName(DEFAULT_CATALOG_NAME);
24 | return hiveTable;
25 | }
26 |
27 | @Override
28 | public TableMeta convertTableMeta(com.amazonaws.services.glue.model.Table catalogTable, String dbName) {
29 | TableMeta tableMeta = super.convertTableMeta(catalogTable, dbName);
30 | tableMeta.setCatName(DEFAULT_CATALOG_NAME);
31 | return tableMeta;
32 | }
33 |
34 | @Override
35 | public Partition convertPartition(com.amazonaws.services.glue.model.Partition src) {
36 | Partition hivePartition = super.convertPartition(src);
37 | hivePartition.setCatName(DEFAULT_CATALOG_NAME);
38 | return hivePartition;
39 | }
40 |
41 | @Override
42 | public Function convertFunction(String dbName, com.amazonaws.services.glue.model.UserDefinedFunction catalogFunction) {
43 | Function hiveFunction = super.convertFunction(dbName, catalogFunction);
44 | if (hiveFunction == null) {
45 | return null;
46 | }
47 | hiveFunction.setCatName(DEFAULT_CATALOG_NAME);
48 | return hiveFunction;
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/shims/loader/test/main/java/com/amazonaws/glue/shims/ShimsLoaderTest.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.shims;
2 |
3 | import static org.junit.Assert.assertTrue;
4 | import static org.mockito.Mockito.when;
5 | import static org.powermock.api.mockito.PowerMockito.mockStatic;
6 |
7 | import org.apache.hive.common.util.HiveVersionInfo;
8 | import org.junit.Before;
9 | import org.junit.Test;
10 | import org.junit.runner.RunWith;
11 | import org.powermock.core.classloader.annotations.PowerMockIgnore;
12 | import org.powermock.core.classloader.annotations.PrepareForTest;
13 | import org.powermock.modules.junit4.PowerMockRunner;
14 |
15 | @PrepareForTest(HiveVersionInfo.class)
16 | @PowerMockIgnore("javax.management.*")
17 | @RunWith(PowerMockRunner.class)
18 | public class ShimsLoaderTest {
19 |
20 | private static final String HIVE_1_2_VERSION = "1.2.1";
21 |
22 | private static final String HIVE_3_VERSION = "3.1.3";
23 |
24 | @Before
25 | public void setup() throws ClassNotFoundException {
26 | mockStatic(HiveVersionInfo.class);
27 | }
28 |
29 | @Test
30 | public void hive3ShimsClassSupportsCorrectVersion() {
31 | assertTrue(AwsGlueHive3Shims.supportsVersion(HIVE_3_VERSION));
32 | }
33 |
34 | @Test
35 | public void hive1Dot2ShimsClassSupportsCorrectVersion() {
36 | assertTrue(AwsGlueSparkHiveShims.supportsVersion(HIVE_1_2_VERSION));
37 | }
38 |
39 | @Test
40 | public void returnsCorrectShimsImplementationForHive3() {
41 | assertGetsCorrectShimsClassForVersion(AwsGlueHive3Shims.class, HIVE_3_VERSION);
42 | }
43 |
44 | @Test
45 | public void returnsCorrectShimsImplementationForHive1Dot2() {
46 | assertGetsCorrectShimsClassForVersion(AwsGlueSparkHiveShims.class, HIVE_1_2_VERSION);
47 | }
48 |
49 | @Test(expected = RuntimeException.class)
50 | public void throwsWhenSupportingClassDoesNotExist() {
51 | when(HiveVersionInfo.getShortVersion()).thenReturn("this.is.not.a.real.hive.version");
52 | ShimsLoader.clearShimClass();
53 | ShimsLoader.getHiveShims();
54 | }
55 |
56 | private void assertGetsCorrectShimsClassForVersion(Class expectedClass, String version) {
57 | when(HiveVersionInfo.getShortVersion()).thenReturn(version);
58 | AwsGlueHiveShims shims = ShimsLoader.getHiveShims();
59 | assertTrue(expectedClass.isInstance(shims));
60 | ShimsLoader.clearShimClass();
61 | }
62 | }
63 |
--------------------------------------------------------------------------------
/aws-glue-datacatalog-spark-client/src/test/java/com/amazonaws/glue/catalog/metastore/SparkCatalogMetastoreClientTest.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.catalog.metastore;
2 |
3 | import com.amazonaws.glue.catalog.util.TestObjects;
4 | import com.amazonaws.services.glue.AWSGlue;
5 | import com.amazonaws.services.glue.model.Database;
6 | import com.amazonaws.services.glue.model.EntityNotFoundException;
7 | import com.amazonaws.services.glue.model.GetUserDefinedFunctionRequest;
8 | import com.amazonaws.services.glue.model.UserDefinedFunction;
9 |
10 | import org.apache.hadoop.hive.conf.HiveConf;
11 | import org.apache.hadoop.hive.metastore.Warehouse;
12 | import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
13 | import org.junit.Before;
14 | import org.junit.Rule;
15 | import org.junit.Test;
16 | import org.junit.rules.ExpectedException;
17 |
18 | import static org.mockito.Matchers.any;
19 | import static org.mockito.Mockito.mock;
20 | import static org.mockito.Mockito.spy;
21 | import static org.mockito.Mockito.when;
22 |
23 | public class SparkCatalogMetastoreClientTest {
24 |
25 | private AWSGlue glueClient;
26 | private AWSCatalogMetastoreClient metastoreClient;
27 | private Warehouse wh;
28 | private HiveConf conf;
29 | private GlueClientFactory clientFactory;
30 |
31 | // Test objects
32 | private Database testDB;
33 | private UserDefinedFunction testFunction;
34 |
35 | @Rule
36 | public ExpectedException expectedException = ExpectedException.none();
37 |
38 | @Before
39 | public void setUp() throws Exception{
40 | testDB = TestObjects.getTestDatabase();
41 | testFunction = TestObjects.getCatalogTestFunction();
42 |
43 | conf = spy(new HiveConf());
44 | glueClient = mock(AWSGlue.class);
45 | clientFactory = mock(GlueClientFactory.class);
46 | when(clientFactory.newClient()).thenReturn(glueClient);
47 | metastoreClient = new AWSCatalogMetastoreClient.Builder().withClientFactory(clientFactory)
48 | .withWarehouse(wh).createDefaults(false).withHiveConf(conf).build();
49 | }
50 |
51 | @Test
52 | public void testGetFunctionNoSuchObjectExceptionMessage() throws Exception {
53 | expectedException.expect(NoSuchObjectException.class);
54 | expectedException.expectMessage(testFunction.getFunctionName() + " does not exist");
55 |
56 | when(glueClient.getUserDefinedFunction(any(GetUserDefinedFunctionRequest.class)))
57 | .thenThrow(new EntityNotFoundException(""));
58 | metastoreClient.getFunction(testDB.getName(), testFunction.getFunctionName());
59 | }
60 |
61 | }
62 |
--------------------------------------------------------------------------------
/aws-glue-datacatalog-client-common/src/test/java/com/amazonaws/glue/catalog/util/LoggingHelperTest.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.catalog.util;
2 |
3 | import com.google.common.collect.ImmutableList;
4 | import org.junit.Test;
5 |
6 | import java.util.Collection;
7 |
8 | import static org.hamcrest.Matchers.equalTo;
9 | import static org.hamcrest.Matchers.is;
10 | import static org.junit.Assert.assertThat;
11 |
12 | public class LoggingHelperTest {
13 |
14 | @Test
15 | public void concatCollectionToStringForLoggingTest() {
16 | Collection logs = ImmutableList.of("test_log_1", "test_log_2", "test_log_3");
17 | String delimiter = "|";
18 |
19 | String result = LoggingHelper.concatCollectionToStringForLogging(logs, delimiter);
20 | String expected = "test_log_1|test_log_2|test_log_3|";
21 |
22 | assertThat(result, is(equalTo(expected)));
23 | }
24 |
25 | @Test
26 | public void concatCollectionToStringForLoggingTestWithoutCollection() {
27 | String delimiter = "|";
28 |
29 | String result = LoggingHelper.concatCollectionToStringForLogging(null, delimiter);
30 | String expected = "";
31 |
32 | assertThat(result, is(equalTo(expected)));
33 | }
34 |
35 | @Test
36 | public void concatCollectionToStringForLoggingTestWithoutDelimiter() {
37 | Collection logs = ImmutableList.of("test_log_1", "test_log_2", "test_log_3");
38 |
39 | String result = LoggingHelper.concatCollectionToStringForLogging(logs, null);
40 | String expected = "test_log_1,test_log_2,test_log_3,";
41 |
42 | assertThat(result, is(equalTo(expected)));
43 | }
44 |
45 | @Test
46 | public void concatCollectionToStringForLoggingTestWithLongerThanLimitInput() {
47 | ImmutableList.Builder listBuilder = new ImmutableList.Builder<>();
48 |
49 | final int max = 2000;
50 | final String key = "KEY";
51 | final StringBuilder stringBuilder = new StringBuilder();
52 | for (int i = 0; i < max; i += key.length()) {
53 | listBuilder.add(key);
54 | stringBuilder.append(key);
55 | }
56 | final String overflow = "OVERFLOW";
57 | for (int i = 0; i < 100; i += overflow.length()) {
58 | listBuilder.add(overflow);
59 | }
60 |
61 | String result = LoggingHelper.concatCollectionToStringForLogging(listBuilder.build(), "");
62 | String expected = stringBuilder.toString().substring(0, max);
63 |
64 | assertThat(result.length(), is(equalTo(max)));
65 | assertThat(result, is(equalTo(expected)));
66 | assertThat(expected.indexOf(overflow), is(equalTo(-1)));
67 | }
68 |
69 | }
70 |
--------------------------------------------------------------------------------
/shims/loader/pom.xml:
--------------------------------------------------------------------------------
1 |
5 |
6 | com.amazonaws.glue
7 | shims
8 | 3.4.0-SNAPSHOT
9 | ../
10 |
11 | 4.0.0
12 | shims-loader
13 | jar
14 |
15 | ShimsLoader
16 | Loader for the AwsGlueDataCatalogShim classes
17 |
18 |
19 |
20 |
21 | org.apache.hive
22 | hive-exec
23 | ${hive3.version}
24 | provided
25 |
26 |
27 | com.amazonaws.glue
28 | hive3-shims
29 | ${project.version}
30 |
31 |
32 | com.amazonaws.glue
33 | shims-common
34 | ${project.version}
35 |
36 |
37 | com.amazonaws.glue
38 | spark-hive-shims
39 | ${project.version}
40 |
41 |
42 | junit
43 | junit
44 | ${junit.version}
45 | test
46 |
47 |
48 | org.mockito
49 | mockito-core
50 | ${mockito.version}
51 | test
52 |
53 |
54 | org.powermock
55 | powermock-module-junit4
56 | ${powermock.version}
57 | test
58 |
59 |
60 | org.powermock
61 | powermock-api-mockito
62 | ${powermock.version}
63 | test
64 |
65 |
66 |
67 |
--------------------------------------------------------------------------------
/aws-glue-datacatalog-client-common/src/test/java/com/amazonaws/glue/catalog/converters/Hive3CatalogToHiveConverterTest.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.catalog.converters;
2 |
3 | import com.amazonaws.glue.catalog.util.TestObjects;
4 | import com.amazonaws.services.glue.model.Database;
5 | import com.amazonaws.services.glue.model.Partition;
6 | import com.amazonaws.services.glue.model.Table;
7 | import com.amazonaws.services.glue.model.UserDefinedFunction;
8 | import com.google.common.collect.ImmutableList;
9 | import org.apache.hadoop.hive.metastore.api.TableMeta;
10 | import org.junit.Test;
11 |
12 | import static org.apache.hadoop.hive.metastore.Warehouse.DEFAULT_CATALOG_NAME;
13 | import static org.junit.Assert.assertEquals;
14 |
15 | public class Hive3CatalogToHiveConverterTest {
16 |
17 | private static final String TEST_DB_NAME = "testDb";
18 | private static final String TEST_TBL_NAME = "testTbl";
19 | private final CatalogToHiveConverter catalogToHiveConverter = new Hive3CatalogToHiveConverter();
20 |
21 | @Test
22 | public void testDatabaseCatalogName() {
23 | Database catalogDb = TestObjects.getTestDatabase();
24 | org.apache.hadoop.hive.metastore.api.Database hiveDatabase = catalogToHiveConverter
25 | .convertDatabase(catalogDb);
26 | assertEquals(DEFAULT_CATALOG_NAME, hiveDatabase.getCatalogName());
27 | }
28 |
29 | @Test
30 | public void testTableCatalogName() {
31 | Table catalogTable = TestObjects.getTestTable();
32 | org.apache.hadoop.hive.metastore.api.Table hiveTable = catalogToHiveConverter.convertTable(catalogTable, TEST_DB_NAME);
33 | assertEquals(DEFAULT_CATALOG_NAME, hiveTable.getCatName());
34 | }
35 |
36 | @Test
37 | public void testTableMetaCatalogName() {
38 | Table catalogTable = TestObjects.getTestTable();
39 | TableMeta tableMeta = catalogToHiveConverter.convertTableMeta(catalogTable, TEST_DB_NAME);
40 | assertEquals(DEFAULT_CATALOG_NAME, tableMeta.getCatName());
41 | }
42 |
43 | @Test
44 | public void testPartitionConversion() {
45 | Partition partition = TestObjects.getTestPartition(TEST_DB_NAME, TEST_TBL_NAME, ImmutableList.of("1"));
46 | org.apache.hadoop.hive.metastore.api.Partition hivePartition = catalogToHiveConverter.convertPartition(partition);
47 | assertEquals(DEFAULT_CATALOG_NAME, hivePartition.getCatName());
48 | }
49 |
50 | @Test
51 | public void testFunctionConversion() {
52 | UserDefinedFunction catalogFunction = TestObjects.getCatalogTestFunction();
53 | org.apache.hadoop.hive.metastore.api.Function hiveFunction = catalogToHiveConverter.convertFunction(TEST_DB_NAME, catalogFunction);
54 | assertEquals(DEFAULT_CATALOG_NAME, hiveFunction.getCatName());
55 | }
56 | }
57 |
--------------------------------------------------------------------------------
/aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/util/HiveTableValidator.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.catalog.util;
2 |
3 | import com.amazonaws.services.glue.model.InvalidInputException;
4 | import com.amazonaws.services.glue.model.Table;
5 |
6 | import org.apache.hadoop.hive.metastore.TableType;
7 |
8 | import static org.apache.commons.lang3.StringUtils.isNotEmpty;
9 | import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE;
10 |
11 | public enum HiveTableValidator {
12 |
13 | REQUIRED_PROPERTIES_VALIDATOR {
14 | public void validate(Table table) {
15 | String missingProperty = null;
16 |
17 | if(notApplicableTableType(table)) {
18 | return;
19 | }
20 |
21 | if (table.getTableType() == null) {
22 | missingProperty = "TableType";
23 | } else if (table.getStorageDescriptor() == null) {
24 | missingProperty = "StorageDescriptor";
25 | } else if (table.getStorageDescriptor().getInputFormat() == null) {
26 | missingProperty = "StorageDescriptor#InputFormat";
27 | } else if (table.getStorageDescriptor().getOutputFormat() == null) {
28 | missingProperty = "StorageDescriptor#OutputFormat";
29 | } else if (table.getStorageDescriptor().getSerdeInfo() == null) {
30 | missingProperty = "StorageDescriptor#SerdeInfo";
31 | } else if (table.getStorageDescriptor().getSerdeInfo().getSerializationLibrary() == null) {
32 | missingProperty = "StorageDescriptor#SerdeInfo#SerializationLibrary";
33 | }
34 |
35 | if (missingProperty != null) {
36 | throw new InvalidInputException(String.format("%s cannot be null for table: %s", missingProperty, table.getName()));
37 | }
38 | }
39 | };
40 |
41 | public abstract void validate(Table table);
42 |
43 | private static boolean notApplicableTableType(Table table) {
44 | if (isNotManagedOrExternalTable(table) ||
45 | isStorageHandlerType(table)) {
46 | return true;
47 | }
48 | return false;
49 | }
50 |
51 | private static boolean isNotManagedOrExternalTable(Table table) {
52 | if (table.getTableType() != null &&
53 | TableType.valueOf(table.getTableType()) != TableType.MANAGED_TABLE &&
54 | TableType.valueOf(table.getTableType()) != TableType.EXTERNAL_TABLE) {
55 | return true;
56 | }
57 | return false;
58 | }
59 |
60 | private static boolean isStorageHandlerType(Table table) {
61 | if (table.getParameters() != null && table.getParameters().containsKey(META_TABLE_STORAGE) &&
62 | isNotEmpty(table.getParameters().get(META_TABLE_STORAGE))) {
63 | return true;
64 | }
65 | return false;
66 | }
67 | }
68 |
--------------------------------------------------------------------------------
/shims/spark-hive-shims/src/main/java/com/amazonaws/glue/shims/AwsGlueSparkHiveShims.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.shims;
2 |
3 | import org.apache.hadoop.conf.Configuration;
4 | import org.apache.hadoop.fs.Path;
5 | import org.apache.hadoop.hive.metastore.MetaStoreUtils;
6 | import org.apache.hadoop.hive.metastore.api.Database;
7 | import org.apache.hadoop.hive.metastore.api.EnvironmentContext;
8 | import org.apache.hadoop.hive.metastore.api.FieldSchema;
9 | import org.apache.hadoop.hive.metastore.api.MetaException;
10 | import org.apache.hadoop.hive.metastore.api.Partition;
11 | import org.apache.hadoop.hive.metastore.api.Table;
12 | import org.apache.hadoop.hive.metastore.Warehouse;
13 | import org.apache.hadoop.hive.ql.exec.SerializationUtilities;
14 | import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
15 |
16 | import java.util.List;
17 |
18 | class AwsGlueSparkHiveShims implements AwsGlueHiveShims {
19 |
20 | private static final String SPARK_HIVE_VERSION = "2.3.";
21 |
22 | static boolean supportsVersion(String version) {
23 | return version.startsWith(SPARK_HIVE_VERSION);
24 | }
25 |
26 | @Override
27 | public ExprNodeGenericFuncDesc getDeserializeExpression(byte[] exprBytes) {
28 | return SerializationUtilities.deserializeExpressionFromKryo(exprBytes);
29 | }
30 |
31 | @Override
32 | public byte[] getSerializeExpression(ExprNodeGenericFuncDesc expr) {
33 | return SerializationUtilities.serializeExpressionToKryo(expr);
34 | }
35 |
36 | @Override
37 | public Path getDefaultTablePath(Database db, String tableName, Warehouse warehouse) throws MetaException {
38 | return warehouse.getDefaultTablePath(db, tableName);
39 | }
40 |
41 | @Override
42 | public boolean deleteDir(Warehouse wh, Path path, boolean recursive, boolean ifPurge) throws MetaException {
43 | return wh.deleteDir(path, recursive, ifPurge);
44 | }
45 |
46 | @Override
47 | public boolean mkdirs(Warehouse wh, Path path) throws MetaException {
48 | return wh.mkdirs(path, true);
49 | }
50 |
51 | @Override
52 | public boolean validateTableName(String name, Configuration conf) {
53 | return MetaStoreUtils.validateName(name, conf);
54 | }
55 |
56 | @Override
57 | public boolean requireCalStats(
58 | Configuration conf,
59 | Partition oldPart,
60 | Partition newPart,
61 | Table tbl,
62 | EnvironmentContext environmentContext) {
63 | return MetaStoreUtils.requireCalStats(conf, oldPart, newPart, tbl, environmentContext);
64 | }
65 |
66 | @Override
67 | public boolean updateTableStatsFast(
68 | Database db,
69 | Table tbl,
70 | Warehouse wh,
71 | boolean madeDir,
72 | boolean forceRecompute,
73 | EnvironmentContext environmentContext
74 | ) throws MetaException {
75 | return MetaStoreUtils.updateTableStatsFast(db, tbl, wh, madeDir, forceRecompute, environmentContext);
76 | }
77 |
78 | @Override
79 | public String validateTblColumns(List cols) {
80 | return MetaStoreUtils.validateTblColumns(cols);
81 | }
82 |
83 | }
84 |
--------------------------------------------------------------------------------
/aws-glue-datacatalog-client-common/src/test/java/com/amazonaws/glue/catalog/util/PartitionUtilsTest.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.catalog.util;
2 |
3 | import com.amazonaws.services.glue.model.EntityNotFoundException;
4 | import com.amazonaws.services.glue.model.InternalServiceException;
5 | import com.amazonaws.services.glue.model.InvalidInputException;
6 | import com.amazonaws.services.glue.model.Partition;
7 | import com.amazonaws.services.glue.model.PartitionValueList;
8 | import com.amazonaws.services.glue.model.ResourceNumberLimitExceededException;
9 |
10 | import com.google.common.collect.Lists;
11 | import com.google.common.collect.Sets;
12 | import org.junit.Test;
13 |
14 | import java.util.List;
15 | import java.util.Map;
16 | import java.util.Set;
17 |
18 | import static org.hamcrest.CoreMatchers.equalTo;
19 | import static org.hamcrest.CoreMatchers.hasItem;
20 | import static org.junit.Assert.assertEquals;
21 | import static org.junit.Assert.assertFalse;
22 | import static org.junit.Assert.assertThat;
23 | import static org.junit.Assert.assertTrue;
24 |
25 | public class PartitionUtilsTest {
26 |
27 | @Test
28 | public void testBuildPartitionMapAndGetPartitionValuesList() {
29 | String namespaceName = "ns";
30 | String tableName = "table";
31 | // choose special values to make values1.hashCode() == values2.hashCode()
32 | List values1 = Lists.newArrayList("Aa");
33 | List values2 = Lists.newArrayList("BB");
34 | Partition partition1 = TestObjects.getTestPartition(namespaceName, tableName, values1);
35 | Partition partition2 = TestObjects.getTestPartition(namespaceName, tableName, values2);
36 | Map partitionMap = PartitionUtils.buildPartitionMap(Lists.newArrayList(partition1, partition2));
37 | List partitionValuesList = PartitionUtils.getPartitionValuesList(partitionMap);
38 |
39 | assertEquals(2, partitionMap.size());
40 | Set> valuesSet = Sets.newHashSet(values1, values2);
41 | for (PartitionKey partitionKey : partitionMap.keySet()) {
42 | assertThat(valuesSet, hasItem(partitionKey.getValues()));
43 | assertThat(partitionMap.get(partitionKey).getValues(), equalTo(partitionKey.getValues()));
44 | }
45 |
46 | assertEquals(2, partitionValuesList.size());
47 | for (PartitionValueList partitionValueList : partitionValuesList) {
48 | assertThat(valuesSet, hasItem(partitionValueList.getValues()));
49 | }
50 | }
51 |
52 | @Test
53 | public void testIsInvalidUserInputException() {
54 | assertTrue(PartitionUtils.isInvalidUserInputException(new InvalidInputException("foo")));
55 | assertTrue(PartitionUtils.isInvalidUserInputException(new EntityNotFoundException("bar")));
56 | assertFalse(PartitionUtils.isInvalidUserInputException(new InternalServiceException("bar2")));
57 | assertFalse(PartitionUtils.isInvalidUserInputException(new ResourceNumberLimitExceededException("bar3")));
58 | assertFalse(PartitionUtils.isInvalidUserInputException(new NullPointerException("bar4")));
59 | }
60 |
61 | }
62 |
--------------------------------------------------------------------------------
/shims/hive3-shims/src/main/java/com.amazonaws.glue.shims/AwsGlueHive3Shims.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.shims;
2 |
3 | import org.apache.hadoop.conf.Configuration;
4 | import org.apache.hadoop.fs.Path;
5 |
6 | import org.apache.hadoop.hive.metastore.api.Database;
7 | import org.apache.hadoop.hive.metastore.api.EnvironmentContext;
8 | import org.apache.hadoop.hive.metastore.api.FieldSchema;
9 | import org.apache.hadoop.hive.metastore.api.MetaException;
10 | import org.apache.hadoop.hive.metastore.api.Partition;
11 | import org.apache.hadoop.hive.metastore.api.Table;
12 | import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
13 | import org.apache.hadoop.hive.ql.exec.SerializationUtilities;
14 | import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
15 | import org.apache.hadoop.hive.metastore.Warehouse;
16 |
17 | import java.util.List;
18 |
19 | final class AwsGlueHive3Shims implements AwsGlueHiveShims {
20 |
21 | private static final String HIVE_3_VERSION = "3.";
22 |
23 | static boolean supportsVersion(String version) {
24 | return version.startsWith(HIVE_3_VERSION);
25 | }
26 |
27 | @Override
28 | public ExprNodeGenericFuncDesc getDeserializeExpression(byte[] exprBytes) {
29 | return SerializationUtilities.deserializeExpressionFromKryo(exprBytes);
30 | }
31 |
32 | @Override
33 | public byte[] getSerializeExpression(ExprNodeGenericFuncDesc expr) {
34 | return SerializationUtilities.serializeExpressionToKryo(expr);
35 | }
36 |
37 | @Override
38 | public Path getDefaultTablePath(Database db, String tableName, Warehouse warehouse) throws MetaException {
39 | return warehouse.getDefaultTablePath(db, tableName, false);
40 | }
41 |
42 | @Override
43 | public boolean deleteDir(Warehouse wh, Path path, boolean recursive, boolean ifPurge) throws MetaException {
44 | return wh.deleteDir(path, recursive, ifPurge, true);
45 | }
46 |
47 | @Override
48 | public boolean mkdirs(Warehouse wh, Path path) throws MetaException {
49 | return wh.mkdirs(path);
50 | }
51 |
52 | @Override
53 | public boolean validateTableName(String name, Configuration conf) {
54 | return MetaStoreUtils.validateName(name, conf);
55 | }
56 |
57 | @Override
58 | public boolean requireCalStats(
59 | Configuration conf,
60 | Partition oldPart,
61 | Partition newPart,
62 | Table tbl,
63 | EnvironmentContext environmentContext) {
64 | return MetaStoreUtils.requireCalStats(oldPart, newPart, tbl, environmentContext);
65 | }
66 |
67 | @Override
68 | public boolean updateTableStatsFast(
69 | Database db,
70 | Table tbl,
71 | Warehouse wh,
72 | boolean madeDir,
73 | boolean forceRecompute,
74 | EnvironmentContext environmentContext
75 | ) throws MetaException {
76 | MetaStoreUtils.updateTableStatsSlow(db, tbl, wh, madeDir, forceRecompute, environmentContext);
77 | return true;
78 | }
79 |
80 | @Override
81 | public String validateTblColumns(List cols) {
82 | return MetaStoreUtils.validateTblColumns(cols);
83 | }
84 |
85 | }
86 |
87 |
--------------------------------------------------------------------------------
/aws-glue-datacatalog-client-common/src/test/java/com/amazonaws/glue/catalog/util/GlueTestClientFactory.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.catalog.util;
2 |
3 | import com.amazonaws.AmazonClientException;
4 | import com.amazonaws.AmazonServiceException;
5 | import com.amazonaws.AmazonWebServiceRequest;
6 | import com.amazonaws.ClientConfiguration;
7 | import com.amazonaws.auth.DefaultAWSCredentialsProviderChain;
8 | import com.amazonaws.client.builder.AwsClientBuilder.EndpointConfiguration;
9 | import com.amazonaws.glue.catalog.metastore.AWSGlueMultipleCatalogDecorator;
10 | import com.amazonaws.glue.catalog.metastore.GlueClientFactory;
11 | import com.amazonaws.retry.PredefinedRetryPolicies;
12 | import com.amazonaws.retry.RetryPolicy;
13 | import com.amazonaws.services.glue.AWSGlue;
14 | import com.amazonaws.services.glue.AWSGlueClientBuilder;
15 |
16 | import org.apache.commons.lang3.StringUtils;
17 | import org.apache.hadoop.hive.metastore.api.MetaException;
18 |
19 | public final class GlueTestClientFactory implements GlueClientFactory {
20 |
21 | private static final int SC_GATEWAY_TIMEOUT = 504;
22 |
23 | @Override
24 | public AWSGlue newClient() throws MetaException {
25 | AWSGlueClientBuilder glueClientBuilder = AWSGlueClientBuilder.standard()
26 | .withClientConfiguration(createGatewayTimeoutRetryableConfiguration())
27 | .withCredentials(new DefaultAWSCredentialsProviderChain());
28 |
29 | String endpoint = System.getProperty("endpoint");
30 | if (StringUtils.isNotBlank(endpoint)) {
31 | glueClientBuilder.setEndpointConfiguration(new EndpointConfiguration(endpoint, null));
32 | }
33 |
34 | //Create AWSGlueMultipleCatalogDecorator with empty separator to make sure that it's compatible with
35 | //previous behaviour - all existing integration tests must be passed without any changes.
36 | return new AWSGlueMultipleCatalogDecorator(glueClientBuilder.build(), null);
37 | }
38 |
39 | private static ClientConfiguration createGatewayTimeoutRetryableConfiguration() {
40 | ClientConfiguration retryableConfig = new ClientConfiguration();
41 | RetryPolicy.RetryCondition retryCondition = new PredefinedRetryPolicies.SDKDefaultRetryCondition() {
42 | @Override
43 | public boolean shouldRetry(AmazonWebServiceRequest originalRequest, AmazonClientException exception,
44 | int retriesAttempted) {
45 | if (super.shouldRetry(originalRequest, exception, retriesAttempted)) {
46 | return true;
47 | }
48 | if (exception != null && exception instanceof AmazonServiceException) {
49 | AmazonServiceException ase = (AmazonServiceException) exception;
50 | if (ase.getStatusCode() == SC_GATEWAY_TIMEOUT) {
51 | return true;
52 | }
53 | }
54 | return false;
55 | }
56 | };
57 | RetryPolicy retryPolicy = new RetryPolicy(retryCondition, PredefinedRetryPolicies.DEFAULT_BACKOFF_STRATEGY,
58 | PredefinedRetryPolicies.DEFAULT_MAX_ERROR_RETRY, true);
59 | retryableConfig.setRetryPolicy(retryPolicy);
60 | return retryableConfig;
61 | }
62 |
63 | }
64 |
--------------------------------------------------------------------------------
/aws-glue-datacatalog-hive3-client/src/test/java/com/amazonaws/glue/catalog/metastore/HiveAWSCatalogMetastoreClientTest.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.catalog.metastore;
2 |
3 | import com.amazonaws.glue.catalog.converters.CatalogToHiveConverter;
4 | import com.amazonaws.glue.catalog.converters.Hive3CatalogToHiveConverter;
5 | import com.amazonaws.services.glue.AWSGlue;
6 | import com.amazonaws.services.glue.model.UserDefinedFunction;
7 | import org.apache.commons.lang.RandomStringUtils;
8 | import org.apache.hadoop.conf.Configuration;
9 | import org.apache.hadoop.hive.metastore.api.ForeignKeysRequest;
10 | import org.apache.hadoop.hive.metastore.api.PrimaryKeysRequest;
11 | import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
12 | import org.junit.Before;
13 | import org.junit.Test;
14 |
15 | import static com.amazonaws.glue.catalog.util.TestObjects.getCatalogTestFunction;
16 | import static com.amazonaws.glue.catalog.util.TestObjects.getTestDatabase;
17 | import static org.junit.Assert.assertEquals;
18 | import static org.junit.Assert.assertNotNull;
19 | import static org.mockito.Mockito.mock;
20 | import static org.mockito.Mockito.spy;
21 | import static org.mockito.Mockito.when;
22 |
23 | public class HiveAWSCatalogMetastoreClientTest {
24 |
25 | private AWSGlue glueClient;
26 | private AWSCatalogMetastoreClient metastoreClient;
27 | private Configuration conf;
28 | private GlueClientFactory clientFactory;
29 | private CatalogToHiveConverter catalogToHiveConverter = new Hive3CatalogToHiveConverter();
30 |
31 | private org.apache.hadoop.hive.metastore.api.Database testDB;
32 | private org.apache.hadoop.hive.metastore.api.Function testFunction;
33 | private String catalogId;
34 | private UserDefinedFunction catalogTestFunction;
35 |
36 | @Before
37 | public void setUp() throws Exception {
38 | testDB = catalogToHiveConverter.convertDatabase(getTestDatabase());
39 | catalogTestFunction = getCatalogTestFunction();
40 | catalogTestFunction.setDatabaseName(testDB.getName());
41 | testFunction = catalogToHiveConverter.convertFunction(testDB.getName(), catalogTestFunction);
42 | // catalogId is a AWS account number
43 | catalogId = RandomStringUtils.randomNumeric(12);
44 |
45 | conf = spy(MetastoreConf.newMetastoreConf());
46 | glueClient = spy(AWSGlue.class);
47 | clientFactory = mock(GlueClientFactory.class);
48 | when(clientFactory.newClient()).thenReturn(glueClient);
49 | metastoreClient = new AWSCatalogMetastoreClient.Builder().withClientFactory(clientFactory)
50 | .createDefaults(false).withConf(conf).withCatalogId(catalogId).build();
51 | }
52 |
53 | @Test
54 | public void testPrimaryKeys_NotNull() throws Exception {
55 | PrimaryKeysRequest request = mock(PrimaryKeysRequest.class);
56 | assertNotNull(metastoreClient.getPrimaryKeys(request));
57 | }
58 |
59 | @Test
60 | public void testForeignKeys_NotNull() throws Exception {
61 | ForeignKeysRequest request = mock(ForeignKeysRequest.class);
62 | assertNotNull(metastoreClient.getForeignKeys(request));
63 | }
64 |
65 | @Test
66 | public void testGetNextNotification() throws Exception {
67 | // check that we just return dummy result
68 | assertNotNull(metastoreClient.getNextNotification(0, 1, null));
69 | }
70 |
71 | @Test
72 | public void testGetCurrentNotificationEventId() throws Exception {
73 | // check that we get dummy result with 0 eventId
74 | assertEquals(0, metastoreClient.getCurrentNotificationEventId().getEventId());
75 | }
76 | }
77 |
--------------------------------------------------------------------------------
/dev-support/check_style.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
69 |
70 |
71 |
73 |
74 |
75 |
--------------------------------------------------------------------------------
/aws-glue-datacatalog-client-common/src/test/java/com/amazonaws/glue/catalog/util/ExprBuilder.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.catalog.util;
2 |
3 | import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
4 | import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
5 | import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
6 | import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
7 | import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
8 | import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
9 | import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
10 |
11 | import java.sql.Date;
12 | import java.sql.Timestamp;
13 | import java.util.ArrayList;
14 | import java.util.List;
15 | import java.util.Stack;
16 |
17 | /**
18 | * Utility to craft mock expression trees. Inspired by org.apache.hadoop.hive.metastore.TestMetastoreExpr
19 | */
20 | public class ExprBuilder {
21 |
22 | private final String tblName;
23 | private final Stack stack = new Stack<>();
24 |
25 | public ExprBuilder(String tblName) {
26 | this.tblName = tblName;
27 | }
28 |
29 | public ExprNodeGenericFuncDesc build() throws Exception {
30 | if (stack.size() != 1) {
31 | throw new Exception("Bad test: " + stack.size());
32 | }
33 | return (ExprNodeGenericFuncDesc) stack.pop();
34 | }
35 |
36 | public ExprBuilder pred(String name, int args) throws Exception {
37 | return fn(name, TypeInfoFactory.booleanTypeInfo, args);
38 | }
39 |
40 | private ExprBuilder fn(String name, TypeInfo ti, int args) throws Exception {
41 | List children = new ArrayList<>();
42 | for (int i = 0; i < args; ++i) {
43 | children.add(stack.pop());
44 | }
45 | stack.push(new ExprNodeGenericFuncDesc(ti, FunctionRegistry.getFunctionInfo(name).getGenericUDF(), children));
46 | return this;
47 | }
48 |
49 | public ExprBuilder strCol(String col) {
50 | return colInternal(TypeInfoFactory.stringTypeInfo, col, true);
51 | }
52 |
53 | public ExprBuilder timestampCol(String col) {
54 | return colInternal(TypeInfoFactory.timestampTypeInfo, col, true);
55 | }
56 |
57 | public ExprBuilder booleanCol(String col) {
58 | return colInternal(TypeInfoFactory.booleanTypeInfo, col, true);
59 | }
60 |
61 | public ExprBuilder charCol(String col) {
62 | return colInternal(TypeInfoFactory.charTypeInfo, col, true);
63 | }
64 |
65 | public ExprBuilder dateCol(String col) {
66 | return colInternal(TypeInfoFactory.dateTypeInfo, col, true);
67 | }
68 |
69 | private ExprBuilder colInternal(TypeInfo ti, String col, boolean part) {
70 | stack.push(new ExprNodeColumnDesc(ti, col, tblName, part));
71 | return this;
72 | }
73 |
74 | public ExprBuilder val(boolean val) {
75 | return valInternal(TypeInfoFactory.booleanTypeInfo, val);
76 | }
77 |
78 | public ExprBuilder val(String val) {
79 | return valInternal(TypeInfoFactory.stringTypeInfo, val);
80 | }
81 |
82 | public ExprBuilder vals(List vals) {
83 | for (String s : vals) {
84 | val(s);
85 | }
86 | return this;
87 | }
88 |
89 | public ExprBuilder val(Timestamp val) {
90 | return valInternal(TypeInfoFactory.timestampTypeInfo, val);
91 | }
92 |
93 | public ExprBuilder val(Character val) {
94 | return valInternal(TypeInfoFactory.charTypeInfo, val);
95 | }
96 |
97 | public ExprBuilder val(Date val) {
98 | return valInternal(TypeInfoFactory.timestampTypeInfo, val);
99 | }
100 |
101 | private ExprBuilder valInternal(TypeInfo ti, Object val) {
102 | stack.push(new ExprNodeConstantDesc(ti, val));
103 | return this;
104 | }
105 |
106 | }
107 |
--------------------------------------------------------------------------------
/aws-glue-datacatalog-spark-client/src/test/java/com/amazonaws/glue/catalog/metastore/integrationtest/MultipleCatalogDatabaseIntegrationTest.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.catalog.metastore.integrationtest;
2 |
3 | import com.amazonaws.glue.catalog.converters.BaseCatalogToHiveConverter;
4 | import com.amazonaws.glue.catalog.converters.CatalogToHiveConverter;
5 | import com.amazonaws.services.glue.model.DeleteDatabaseRequest;
6 | import com.amazonaws.services.glue.model.EntityNotFoundException;
7 | import com.amazonaws.services.glue.model.GetDatabaseRequest;
8 | import org.apache.hadoop.hive.metastore.api.Database;
9 | import org.apache.hadoop.hive.metastore.api.MetaException;
10 | import org.apache.thrift.TException;
11 | import org.junit.After;
12 | import org.junit.Before;
13 | import org.junit.Rule;
14 | import org.junit.Test;
15 | import org.junit.rules.ExpectedException;
16 |
17 | import java.util.UUID;
18 |
19 | import static com.amazonaws.glue.catalog.util.TestObjects.getTestDatabase;
20 | import static org.junit.Assert.assertEquals;
21 |
22 | public class MultipleCatalogDatabaseIntegrationTest extends MultipleCatalogIntegrationTestBase {
23 |
24 | private Database databaseInAnotherCatalog;
25 | private Database database;
26 | private CatalogToHiveConverter catalogToHiveConverter = new BaseCatalogToHiveConverter();
27 |
28 | @Rule
29 | public ExpectedException expectedException = ExpectedException.none();
30 |
31 | @Before
32 | public void setup() throws MetaException, TException {
33 | super.setup();
34 |
35 | databaseInAnotherCatalog = catalogToHiveConverter.convertDatabase(getTestDatabase());
36 | database = databaseInAnotherCatalog.deepCopy();
37 | database.setName(constructDbName(databaseInAnotherCatalog.getName()));
38 | metastoreClient.createDatabase(database);
39 | }
40 |
41 | @After
42 | public void clean() {
43 | try {
44 | glueClient.deleteDatabase(new DeleteDatabaseRequest().withName(databaseInAnotherCatalog.getName()));
45 | } catch (EntityNotFoundException e) {
46 | }
47 | }
48 |
49 | @Test
50 | public void getDatabase() throws TException {
51 | Database createdDb = metastoreClient.getDatabase(database.getName());
52 | assertEquals(database, createdDb);
53 |
54 | createdDb = catalogToHiveConverter.convertDatabase(
55 | glueClient.getDatabase(new GetDatabaseRequest()
56 | .withCatalogId(anotherCatalogId)
57 | .withName(databaseInAnotherCatalog.getName())
58 | ).getDatabase());
59 | assertEquals(databaseInAnotherCatalog, createdDb);
60 | }
61 |
62 | @Test
63 | public void alterDatabase() throws TException {
64 | String newDescription = UUID.randomUUID().toString();
65 | database.setDescription(newDescription);
66 | databaseInAnotherCatalog.setDescription(newDescription);
67 | metastoreClient.alterDatabase(database.getName(), database);
68 |
69 | Database alteredDb = catalogToHiveConverter.convertDatabase(
70 | glueClient.getDatabase(new GetDatabaseRequest()
71 | .withCatalogId(anotherCatalogId)
72 | .withName(databaseInAnotherCatalog.getName())
73 | ).getDatabase());
74 | assertEquals(databaseInAnotherCatalog, alteredDb);
75 | }
76 |
77 | @Test
78 | public void dropDatabase() throws TException {
79 | metastoreClient.dropDatabase(database.getName());
80 |
81 | expectedException.expect(EntityNotFoundException.class);
82 | glueClient.getDatabase(new GetDatabaseRequest()
83 | .withCatalogId(anotherCatalogId)
84 | .withName(databaseInAnotherCatalog.getName())
85 | );
86 | }
87 | }
88 |
--------------------------------------------------------------------------------
/aws-glue-datacatalog-client-common/src/test/java/com/amazonaws/glue/catalog/util/HiveTableValidatorTest.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.catalog.util;
2 |
3 | import com.amazonaws.services.glue.model.InvalidInputException;
4 | import com.amazonaws.services.glue.model.Table;
5 | import org.junit.Rule;
6 | import org.junit.Test;
7 | import org.junit.rules.ExpectedException;
8 | import org.apache.hadoop.hive.metastore.TableType;
9 |
10 | import static com.amazonaws.glue.catalog.util.HiveTableValidator.REQUIRED_PROPERTIES_VALIDATOR;
11 | import static com.amazonaws.glue.catalog.util.TestObjects.getTestTable;
12 | import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE;
13 |
14 | public class HiveTableValidatorTest {
15 |
16 | @Rule
17 | public ExpectedException thrown = ExpectedException.none();
18 | private static final String EXPECTED_MESSAGE = "%s cannot be null";
19 |
20 | @Test
21 | public void testRequiredProperty_TableType() {
22 | thrown.expect(InvalidInputException.class);
23 | thrown.expectMessage(String.format(EXPECTED_MESSAGE, "TableType"));
24 | Table tbl = getTestTable().withTableType(null);
25 | REQUIRED_PROPERTIES_VALIDATOR.validate(tbl);
26 | }
27 |
28 | @Test
29 | public void testRequiredProperty_StorageDescriptor() {
30 | thrown.expect(InvalidInputException.class);
31 | thrown.expectMessage(String.format(EXPECTED_MESSAGE, "StorageDescriptor"));
32 | Table tbl = getTestTable().withStorageDescriptor(null);
33 | REQUIRED_PROPERTIES_VALIDATOR.validate(tbl);
34 | }
35 |
36 | @Test
37 | public void testRequiredProperty_InputFormat() {
38 | thrown.expect(InvalidInputException.class);
39 | thrown.expectMessage(String.format(EXPECTED_MESSAGE, "StorageDescriptor#InputFormat"));
40 | Table tbl = getTestTable();
41 | tbl.getStorageDescriptor().setInputFormat(null);
42 | REQUIRED_PROPERTIES_VALIDATOR.validate(tbl);
43 | }
44 |
45 | @Test
46 | public void testRequiredProperty_OutputFormat() {
47 | thrown.expect(InvalidInputException.class);
48 | thrown.expectMessage(String.format(EXPECTED_MESSAGE, "StorageDescriptor#OutputFormat"));
49 | Table tbl = getTestTable();
50 | tbl.getStorageDescriptor().setOutputFormat(null);
51 | REQUIRED_PROPERTIES_VALIDATOR.validate(tbl);
52 | }
53 |
54 | @Test
55 | public void testRequiredProperty_SerdeInfo() {
56 | thrown.expect(InvalidInputException.class);
57 | thrown.expectMessage(String.format(EXPECTED_MESSAGE, "StorageDescriptor#SerdeInfo"));
58 | Table tbl = getTestTable();
59 | tbl.getStorageDescriptor().setSerdeInfo(null);
60 | REQUIRED_PROPERTIES_VALIDATOR.validate(tbl);
61 | }
62 |
63 | @Test
64 | public void testRequiredProperty_SerializationLibrary() {
65 | thrown.expect(InvalidInputException.class);
66 | thrown.expectMessage(String.format(EXPECTED_MESSAGE, "StorageDescriptor#SerdeInfo#SerializationLibrary"));
67 | Table tbl = getTestTable();
68 | tbl.getStorageDescriptor().getSerdeInfo().setSerializationLibrary(null);
69 | REQUIRED_PROPERTIES_VALIDATOR.validate(tbl);
70 | }
71 |
72 | @Test
73 | public void testRequiredProperty_ValidTable() {
74 | REQUIRED_PROPERTIES_VALIDATOR.validate(getTestTable());
75 | }
76 |
77 | @Test
78 | public void testValidate_ViewTableType() {
79 | Table tbl = getTestTable();
80 | tbl.setTableType(TableType.VIRTUAL_VIEW.name());
81 | tbl.getStorageDescriptor().getSerdeInfo().setSerializationLibrary(null);
82 | REQUIRED_PROPERTIES_VALIDATOR.validate(tbl);
83 | }
84 |
85 | @Test
86 | public void testValidate_ExcludeStorageHandlerType() {
87 | Table tbl = getTestTable();
88 | tbl.getParameters().put(META_TABLE_STORAGE, "org.apache.hadoop.hive.dynamodb.DynamoDBStorageHandler");
89 | tbl.getStorageDescriptor().setInputFormat(null);
90 | REQUIRED_PROPERTIES_VALIDATOR.validate(tbl);
91 | }
92 | }
93 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing Guidelines
2 |
3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
4 | documentation, we greatly value feedback and contributions from our community.
5 |
6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
7 | information to effectively respond to your bug report or contribution.
8 |
9 |
10 | ## Reporting Bugs/Feature Requests
11 |
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 |
14 | When filing an issue, please check [existing open](https://github.com/awslabs/aws-glue-catalog-client-for-apache-hive-metastore/issues), or [recently closed](https://github.com/awslabs/aws-glue-catalog-client-for-apache-hive-metastore/issues?utf8=%E2%9C%93&q=is%3Aissue%20is%3Aclosed%20), issues to make sure somebody else hasn't already
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 |
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 |
22 |
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 |
26 | 1. You are working against the latest source on the *master* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 |
30 | To send us a pull request, please:
31 |
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 |
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 |
42 |
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any ['help wanted'](https://github.com/awslabs/aws-glue-catalog-client-for-apache-hive-metastore/labels/help%20wanted) issues is a great place to start.
45 |
46 |
47 | ## Code of Conduct
48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
50 | opensource-codeofconduct@amazon.com with any additional questions or comments.
51 |
52 |
53 | ## Security issue notifications
54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
55 |
56 |
57 | ## Licensing
58 |
59 | See the [LICENSE](https://github.com/awslabs/aws-glue-catalog-client-for-apache-hive-metastore/blob/master/LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.
60 |
61 | We may ask you to sign a [Contributor License Agreement (CLA)](http://en.wikipedia.org/wiki/Contributor_License_Agreement) for larger changes.
62 |
--------------------------------------------------------------------------------
/aws-glue-datacatalog-spark-client/src/test/java/com/amazonaws/glue/catalog/metastore/integrationtest/MultipleCatalogIntegrationTestBase.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.catalog.metastore.integrationtest;
2 |
3 | import com.amazonaws.glue.catalog.metastore.AWSCatalogMetastoreClient;
4 | import com.amazonaws.glue.catalog.metastore.AWSGlueClientFactory;
5 | import com.amazonaws.glue.catalog.metastore.GlueClientFactory;
6 | import com.amazonaws.glue.catalog.util.AWSGlueConfig;
7 | import com.amazonaws.services.glue.AWSGlue;
8 | import com.google.common.base.Strings;
9 | import org.apache.hadoop.fs.Path;
10 | import org.apache.hadoop.hive.conf.HiveConf;
11 | import org.apache.hadoop.hive.metastore.IMetaStoreClient;
12 | import org.apache.hadoop.hive.metastore.Warehouse;
13 | import org.apache.hadoop.hive.metastore.api.MetaException;
14 | import org.apache.thrift.TException;
15 |
16 | import static org.mockito.Matchers.any;
17 | import static org.mockito.Matchers.anyString;
18 | import static org.mockito.Mockito.mock;
19 | import static org.mockito.Mockito.when;
20 |
21 | /**
22 | * Base class for integration test to check that multiple Glue catalogs can be accessed using single MetastoreClient.
23 | * To be able to run such test you have to make the following set up:
24 | * 1. Retrieve credentials of AWS account (lets say account A) which will be used to authenticate your test.
25 | * 2. Set credentials and region of account A the following variables: AWS_SECRET_KEY, AWS_ACCESS_KEY, AWS_REGION.
26 | * 3. Set up permissions in another AWS account (lets say account B) that account A has access to account B.
27 | * 3.1 Log in to AWS Console under account B, choose AWS Glue service and go to Settings.
28 | * 3.2 Set the following policy:
29 | {
30 | "Version" : "2012-10-17",
31 | "Statement" : [ {
32 | "Effect" : "Allow",
33 | "Principal" : {
34 | "AWS" : "arn:aws:iam:::root"
35 | },
36 | "Action" : "glue:*",
37 | "Resource" : "arn:aws:glue:us-east-1::*"
38 | } ]
39 | }
40 | * 4. Set account B id as a value of ANOTHER_GLUE_CATALOG_ID variable.
41 | *
42 | * If another account preparation is not an option for you then you can skip #3 and set account A as a value of
43 | * ANOTHER_GLUE_CATALOG_ID variable. In this case test will pass but you won't test a real multiple catalog access.
44 | */
45 | public class MultipleCatalogIntegrationTestBase {
46 | private static final String ANOTHER_GLUE_CATALOG_ID = "ANOTHER_GLUE_CATALOG_ID";
47 | private static final String CATALOG_SEPARATOR = "/-/";
48 |
49 | protected IMetaStoreClient metastoreClient;
50 | protected AWSGlue glueClient;
51 | protected String anotherCatalogId;
52 |
53 | protected void setup() throws MetaException, TException {
54 | HiveConf conf = new HiveConf();
55 | conf.set(AWSGlueConfig.AWS_GLUE_CATALOG_SEPARATOR, CATALOG_SEPARATOR);
56 | Warehouse wh = mock(Warehouse.class);
57 | Path tmpPath = new Path("/db");
58 | when(wh.getDefaultDatabasePath(anyString())).thenReturn(tmpPath);
59 | when(wh.getDnsPath(any(Path.class))).thenReturn(tmpPath);
60 | when(wh.isDir(any(Path.class))).thenReturn(true);
61 |
62 | GlueClientFactory clientFactory = new AWSGlueClientFactory(conf);
63 | glueClient = clientFactory.newClient();
64 |
65 | metastoreClient = new AWSCatalogMetastoreClient.Builder().withHiveConf(conf).withWarehouse(wh)
66 | .withClientFactory(clientFactory).build();
67 |
68 | anotherCatalogId = System.getenv(ANOTHER_GLUE_CATALOG_ID);
69 | if (Strings.isNullOrEmpty(anotherCatalogId)) {
70 | throw new RuntimeException(String.format(
71 | "Environment variable %s is not set. " +
72 | "Please read comment for %s class to understand what value should be set there.",
73 | ANOTHER_GLUE_CATALOG_ID, this.getClass().getName()));
74 | }
75 | }
76 |
77 | protected String constructDbName(String originalDbName) {
78 | return String.format("%s%s%s", anotherCatalogId, CATALOG_SEPARATOR, originalDbName);
79 | }
80 |
81 | }
82 |
--------------------------------------------------------------------------------
/aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/converters/GlueInputConverter.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.catalog.converters;
2 |
3 | import com.amazonaws.services.glue.model.DatabaseInput;
4 | import com.amazonaws.services.glue.model.PartitionInput;
5 | import com.amazonaws.services.glue.model.TableInput;
6 | import com.amazonaws.services.glue.model.UserDefinedFunctionInput;
7 |
8 | import org.apache.hadoop.hive.metastore.api.Database;
9 | import org.apache.hadoop.hive.metastore.api.Function;
10 | import org.apache.hadoop.hive.metastore.api.Partition;
11 | import org.apache.hadoop.hive.metastore.api.Table;
12 |
13 | import java.util.ArrayList;
14 | import java.util.Collection;
15 | import java.util.List;
16 |
17 | /**
18 | * This class provides methods to convert Hive/Catalog objects to Input objects used
19 | * for Glue API parameters
20 | */
21 | public final class GlueInputConverter {
22 |
23 | public static DatabaseInput convertToDatabaseInput(Database hiveDatabase) {
24 | return convertToDatabaseInput(HiveToCatalogConverter.convertDatabase(hiveDatabase));
25 | }
26 |
27 | public static DatabaseInput convertToDatabaseInput(com.amazonaws.services.glue.model.Database database) {
28 | DatabaseInput input = new DatabaseInput();
29 |
30 | input.setName(database.getName());
31 | input.setDescription(database.getDescription());
32 | input.setLocationUri(database.getLocationUri());
33 | input.setParameters(database.getParameters());
34 |
35 | return input;
36 | }
37 |
38 | public static TableInput convertToTableInput(Table hiveTable) {
39 | return convertToTableInput(HiveToCatalogConverter.convertTable(hiveTable));
40 | }
41 |
42 | public static TableInput convertToTableInput(com.amazonaws.services.glue.model.Table table) {
43 | TableInput tableInput = new TableInput();
44 |
45 | tableInput.setRetention(table.getRetention());
46 | tableInput.setPartitionKeys(table.getPartitionKeys());
47 | tableInput.setTableType(table.getTableType());
48 | tableInput.setName(table.getName());
49 | tableInput.setOwner(table.getOwner());
50 | tableInput.setLastAccessTime(table.getLastAccessTime());
51 | tableInput.setStorageDescriptor(table.getStorageDescriptor());
52 | tableInput.setParameters(table.getParameters());
53 | tableInput.setViewExpandedText(table.getViewExpandedText());
54 | tableInput.setViewOriginalText(table.getViewOriginalText());
55 |
56 | return tableInput;
57 | }
58 |
59 | public static PartitionInput convertToPartitionInput(Partition src) {
60 | return convertToPartitionInput(HiveToCatalogConverter.convertPartition(src));
61 | }
62 |
63 | public static PartitionInput convertToPartitionInput(com.amazonaws.services.glue.model.Partition src) {
64 | PartitionInput partitionInput = new PartitionInput();
65 |
66 | partitionInput.setLastAccessTime(src.getLastAccessTime());
67 | partitionInput.setParameters(src.getParameters());
68 | partitionInput.setStorageDescriptor(src.getStorageDescriptor());
69 | partitionInput.setValues(src.getValues());
70 |
71 | return partitionInput;
72 | }
73 |
74 | public static List convertToPartitionInputs(Collection parts) {
75 | List inputList = new ArrayList<>();
76 |
77 | for (com.amazonaws.services.glue.model.Partition part : parts) {
78 | inputList.add(convertToPartitionInput(part));
79 | }
80 | return inputList;
81 | }
82 |
83 | public static UserDefinedFunctionInput convertToUserDefinedFunctionInput(Function hiveFunction) {
84 | UserDefinedFunctionInput functionInput = new UserDefinedFunctionInput();
85 |
86 | functionInput.setClassName(hiveFunction.getClassName());
87 | functionInput.setFunctionName(hiveFunction.getFunctionName());
88 | functionInput.setOwnerName(hiveFunction.getOwnerName());
89 | if(hiveFunction.getOwnerType() != null) {
90 | functionInput.setOwnerType(hiveFunction.getOwnerType().name());
91 | }
92 | functionInput.setResourceUris(HiveToCatalogConverter.covertResourceUriList(hiveFunction.getResourceUris()));
93 | return functionInput;
94 | }
95 |
96 | }
97 |
--------------------------------------------------------------------------------
/aws-glue-datacatalog-client-common/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
4 |
5 | com.amazonaws.glue
6 | aws-glue-datacatalog-hive-client
7 | 3.4.0-SNAPSHOT
8 |
9 | 4.0.0
10 | AWSGlueDataCatalogClientCommon
11 | aws-glue-datacatalog-client-common
12 | jar
13 |
14 |
15 | org.apache.hive
16 | hive-metastore
17 | ${hive3.version}
18 | provided
19 |
20 |
21 | org.apache.hive
22 | hive-exec
23 | ${hive3.version}
24 | provided
25 |
26 |
27 | com.amazonaws
28 | aws-java-sdk-glue
29 | ${aws.sdk.version}
30 | compile
31 |
32 |
33 | com.amazonaws
34 | aws-java-sdk-core
35 | ${aws.sdk.version}
36 | compile
37 |
38 |
39 | com.google.guava
40 | guava
41 | ${guava.version}
42 |
43 |
44 | org.apache.hadoop
45 | hadoop-common
46 | ${hadoop.version}
47 | provided
48 |
49 |
50 | com.amazonaws.glue
51 | shims-loader
52 | ${project.version}
53 |
54 |
55 |
56 |
57 | org.apache.hadoop
58 | hadoop-mapreduce-client-core
59 | ${hadoop.version}
60 | test
61 |
62 |
63 | junit
64 | junit
65 | ${junit.version}
66 | test
67 |
68 |
69 | org.hamcrest
70 | hamcrest-library
71 | ${hamcrest.version}
72 | test
73 |
74 |
75 | org.mockito
76 | mockito-core
77 | ${mockito.version}
78 | test
79 |
80 |
81 | org.powermock
82 | powermock-module-junit4
83 | ${powermock.version}
84 | test
85 |
86 |
87 | org.powermock
88 | powermock-api-mockito
89 | ${powermock.version}
90 | test
91 |
92 |
93 |
94 |
95 |
96 |
97 | org.apache.maven.plugins
98 | maven-jar-plugin
99 | 2.2
100 |
101 |
102 |
103 | test-jar
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
--------------------------------------------------------------------------------
/aws-glue-datacatalog-client-common/src/test/java/com/amazonaws/glue/catalog/metastore/AWSGlueMetastoreFactoryTest.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.catalog.metastore;
2 |
3 | import org.apache.hadoop.hive.conf.HiveConf;
4 | import org.junit.Before;
5 | import org.junit.Test;
6 |
7 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_DB_CACHE_ENABLE;
8 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_TABLE_CACHE_ENABLE;
9 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_ENDPOINT;
10 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_TABLE_CACHE_SIZE;
11 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_TABLE_CACHE_TTL_MINS;
12 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_DB_CACHE_SIZE;
13 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_DB_CACHE_TTL_MINS;
14 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_REGION;
15 | import static org.mockito.Mockito.atLeastOnce;
16 | import static org.mockito.Mockito.spy;
17 | import static org.mockito.Mockito.verify;
18 | import static org.mockito.Mockito.when;
19 | import static org.junit.Assert.assertTrue;
20 |
21 | public class AWSGlueMetastoreFactoryTest {
22 |
23 | private AWSGlueMetastoreFactory awsGlueMetastoreFactory;
24 | private HiveConf hiveConf;
25 |
26 | @Before
27 | public void setUp() {
28 | awsGlueMetastoreFactory = new AWSGlueMetastoreFactory();
29 | hiveConf = spy(new HiveConf());
30 |
31 | // these configs are needed for AWSGlueClient to get initialized
32 | System.setProperty(AWS_REGION, "");
33 | System.setProperty(AWS_GLUE_ENDPOINT, "");
34 | when(hiveConf.get(AWS_GLUE_ENDPOINT)).thenReturn("endpoint");
35 | when(hiveConf.get(AWS_REGION)).thenReturn("us-west-1");
36 |
37 | // these configs are needed for AWSGlueMetastoreCacheDecorator to get initialized
38 | when(hiveConf.getInt(AWS_GLUE_DB_CACHE_SIZE, 0)).thenReturn(1);
39 | when(hiveConf.getInt(AWS_GLUE_DB_CACHE_TTL_MINS, 0)).thenReturn(1);
40 | when(hiveConf.getInt(AWS_GLUE_TABLE_CACHE_SIZE, 0)).thenReturn(1);
41 | when(hiveConf.getInt(AWS_GLUE_TABLE_CACHE_TTL_MINS, 0)).thenReturn(1);
42 | }
43 |
44 | @Test
45 | public void testNewMetastoreWhenCacheDisabled() throws Exception {
46 | when(hiveConf.getBoolean(AWS_GLUE_DB_CACHE_ENABLE, false)).thenReturn(false);
47 | when(hiveConf.getBoolean(AWS_GLUE_TABLE_CACHE_ENABLE, false)).thenReturn(false);
48 | assertTrue(DefaultAWSGlueMetastore.class.equals(
49 | awsGlueMetastoreFactory.newMetastore(hiveConf).getClass()));
50 | verify(hiveConf, atLeastOnce()).getBoolean(AWS_GLUE_DB_CACHE_ENABLE, false);
51 | verify(hiveConf, atLeastOnce()).getBoolean(AWS_GLUE_TABLE_CACHE_ENABLE, false);
52 | }
53 |
54 | @Test
55 | public void testNewMetastoreWhenTableCacheEnabled() throws Exception {
56 | when(hiveConf.getBoolean(AWS_GLUE_DB_CACHE_ENABLE, false)).thenReturn(false);
57 | when(hiveConf.getBoolean(AWS_GLUE_TABLE_CACHE_ENABLE, false)).thenReturn(true);
58 | assertTrue(AWSGlueMetastoreCacheDecorator.class.equals(
59 | awsGlueMetastoreFactory.newMetastore(hiveConf).getClass()));
60 | verify(hiveConf, atLeastOnce()).getBoolean(AWS_GLUE_DB_CACHE_ENABLE, false);
61 | verify(hiveConf, atLeastOnce()).getBoolean(AWS_GLUE_TABLE_CACHE_ENABLE, false);
62 | }
63 |
64 | @Test
65 | public void testNewMetastoreWhenDBCacheEnabled() throws Exception {
66 | when(hiveConf.getBoolean(AWS_GLUE_DB_CACHE_ENABLE, false)).thenReturn(true);
67 | when(hiveConf.getBoolean(AWS_GLUE_TABLE_CACHE_ENABLE, false)).thenReturn(true);
68 | assertTrue(AWSGlueMetastoreCacheDecorator.class.equals(
69 | awsGlueMetastoreFactory.newMetastore(hiveConf).getClass()));
70 | verify(hiveConf, atLeastOnce()).getBoolean(AWS_GLUE_DB_CACHE_ENABLE, false);
71 | verify(hiveConf, atLeastOnce()).getBoolean(AWS_GLUE_TABLE_CACHE_ENABLE, false);
72 | }
73 |
74 | @Test
75 | public void testNewMetastoreWhenAllCacheEnabled() throws Exception {
76 | when(hiveConf.getBoolean(AWS_GLUE_DB_CACHE_ENABLE, false)).thenReturn(true);
77 | when(hiveConf.getBoolean(AWS_GLUE_TABLE_CACHE_ENABLE, false)).thenReturn(true);
78 | assertTrue(AWSGlueMetastoreCacheDecorator.class.equals(
79 | awsGlueMetastoreFactory.newMetastore(hiveConf).getClass()));
80 | verify(hiveConf, atLeastOnce()).getBoolean(AWS_GLUE_DB_CACHE_ENABLE, false);
81 | verify(hiveConf, atLeastOnce()).getBoolean(AWS_GLUE_TABLE_CACHE_ENABLE, false);
82 | }
83 |
84 | }
--------------------------------------------------------------------------------
/aws-glue-datacatalog-hive3-client/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
4 |
5 | com.amazonaws.glue
6 | aws-glue-datacatalog-hive-client
7 | 3.4.0-SNAPSHOT
8 |
9 | 4.0.0
10 | AWSGlueDataCatalogHive3Client
11 | aws-glue-datacatalog-hive3-client
12 | jar
13 |
14 |
15 | org.apache.hive
16 | hive-metastore
17 | ${hive3.version}
18 | provided
19 |
20 |
21 | org.apache.hive
22 | hive-exec
23 | ${hive3.version}
24 | provided
25 |
26 |
27 | com.amazonaws.glue
28 | aws-glue-datacatalog-client-common
29 | ${project.version}
30 |
31 |
32 | org.apache.hadoop
33 | hadoop-common
34 | ${hadoop.version}
35 | provided
36 |
37 |
38 | org.apache.httpcomponents
39 | httpclient
40 | ${httpclient.version}
41 |
42 |
43 |
44 | org.apache.hadoop
45 | hadoop-mapreduce-client-core
46 | ${hadoop.version}
47 | test
48 |
49 |
50 | com.amazonaws.glue
51 | aws-glue-datacatalog-client-common
52 | ${project.version}
53 | test
54 | tests
55 |
56 |
57 | junit
58 | junit
59 | ${junit.version}
60 | test
61 |
62 |
63 | org.hamcrest
64 | hamcrest-library
65 | ${hamcrest.version}
66 | test
67 |
68 |
69 | org.mockito
70 | mockito-core
71 | ${mockito.version}
72 | test
73 |
74 |
75 |
76 |
77 |
78 | org.apache.maven.plugins
79 | maven-shade-plugin
80 | 3.0.0
81 |
82 | false
83 |
84 |
85 |
86 | package
87 |
88 | shade
89 |
90 |
91 |
92 |
93 | com.amazonaws.glue:aws-glue-datacatalog-client-common
94 | com.amazonaws.glue:shims-common
95 | com.amazonaws.glue:hive3-shims
96 | com.amazonaws.glue:shims-loader
97 | com.amazonaws.glue:spark-hive-shims
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
--------------------------------------------------------------------------------
/aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/converters/PartitionNameParser.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.catalog.converters;
2 |
3 | import java.util.AbstractMap;
4 | import java.util.ArrayList;
5 | import java.util.LinkedHashMap;
6 | import java.util.List;
7 | import java.util.Map.Entry;
8 | import java.util.Set;
9 | import java.util.regex.Matcher;
10 | import java.util.regex.Pattern;
11 |
12 | import com.amazonaws.glue.catalog.exceptions.InvalidPartitionNameException;
13 | import com.google.common.collect.ImmutableSet;
14 |
15 | public class PartitionNameParser {
16 |
17 | private static final Pattern PARTITION_NAME_VALUE_PATTERN = Pattern.compile("([^/]+)=([^/]+)");
18 | private static final String PARTITION_NAME_DELIMITER = "/";
19 |
20 | private static final char STORE_AS_NUMBER = 'n';
21 | private static final char STORE_AS_STRING = 's';
22 |
23 | private static final Set NUMERIC_PARTITION_COLUMN_TYPES = ImmutableSet.of(
24 | "tinyint",
25 | "smallint",
26 | "int",
27 | "bigint"
28 | );
29 |
30 | public static String getPartitionName(List partitionColumns, List partitionValues) {
31 | if (hasInvalidValues(partitionColumns, partitionValues) || hasInvalidSize(partitionColumns, partitionValues)) {
32 | throw new IllegalArgumentException("Partition is not well formed. Columns and values do no match.");
33 | }
34 |
35 | StringBuilder partitionName = new StringBuilder();
36 | partitionName.append(getPartitionColumnName(partitionColumns.get(0), partitionValues.get(0)));
37 |
38 | for (int i = 1; i < partitionColumns.size(); i++) {
39 | partitionName.append(PARTITION_NAME_DELIMITER);
40 | partitionName.append(getPartitionColumnName(partitionColumns.get(i), partitionValues.get(i)));
41 | }
42 |
43 | return partitionName.toString();
44 | }
45 |
46 | private static boolean hasInvalidValues(List partitionColumns, List partitionValues) {
47 | return partitionColumns == null || partitionValues == null;
48 | }
49 |
50 | private static boolean hasInvalidSize(List partitionColumns, List partitionValues) {
51 | return partitionColumns.size() != partitionValues.size();
52 | }
53 |
54 | private static String getPartitionColumnName(String partitionColumn, String partitionValue) {
55 | return partitionColumn + "=" + partitionValue;
56 | }
57 |
58 | public static LinkedHashMap getPartitionColumns(String partitionName) {
59 | LinkedHashMap partitionColumns = new LinkedHashMap<>();
60 | String[] partitions = partitionName.split(PARTITION_NAME_DELIMITER);
61 | for(String partition : partitions) {
62 | Entry entry = getPartitionColumnValuePair(partition);
63 | partitionColumns.put(entry.getKey(), entry.getValue());
64 | }
65 |
66 | return partitionColumns;
67 | }
68 |
69 | /*
70 | * Copied from https://github.com/apache/hive/blob/master/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
71 | */
72 | public static String unescapePathName(String path) {
73 | int len = path.length();
74 | //pre-allocate sb to have enough buffer size, to avoid realloc
75 | StringBuilder sb = new StringBuilder(len);
76 | for (int i = 0; i < len; i++) {
77 | char c = path.charAt(i);
78 | if (c == '%' && i + 2 < len) {
79 | int code = -1;
80 | try {
81 | code = Integer.parseInt(path.substring(i + 1, i + 3), 16);
82 | } catch (Exception e) {
83 | code = -1;
84 | }
85 | if (code >= 0) {
86 | sb.append((char) code);
87 | i += 2;
88 | continue;
89 | }
90 | }
91 | sb.append(c);
92 | }
93 | return sb.toString();
94 | }
95 |
96 | private static AbstractMap.SimpleEntry getPartitionColumnValuePair(String partition) {
97 | String column = null;
98 | String value = null;
99 | Matcher partitionMatcher = PARTITION_NAME_VALUE_PATTERN.matcher(partition);
100 |
101 | if (partitionMatcher.matches()) {
102 | column = unescapePathName(partitionMatcher.group(1));
103 | value = unescapePathName(partitionMatcher.group(2));
104 | } else {
105 | throw new InvalidPartitionNameException(partition);
106 | }
107 |
108 | return new AbstractMap.SimpleEntry(column, value);
109 | }
110 |
111 | public static List getPartitionValuesFromName(String partitionName) {
112 | List partitionValues = new ArrayList<>();
113 | String[] partitions = partitionName.split(PARTITION_NAME_DELIMITER);
114 | for(String partition : partitions) {
115 | Entry entry = getPartitionColumnValuePair(partition);
116 | partitionValues.add(entry.getValue());
117 | }
118 |
119 | return partitionValues;
120 | }
121 |
122 | }
123 |
--------------------------------------------------------------------------------
/aws-glue-datacatalog-client-common/src/test/java/com/amazonaws/glue/catalog/util/PartitionKeyTest.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.catalog.util;
2 |
3 | import com.amazonaws.services.glue.model.Partition;
4 | import com.amazonaws.services.glue.model.PartitionError;
5 |
6 | import com.google.common.collect.Lists;
7 | import org.junit.Test;
8 |
9 | import java.util.List;
10 |
11 | import static org.junit.Assert.assertEquals;
12 | import static org.junit.Assert.assertFalse;
13 |
14 | public class PartitionKeyTest {
15 |
16 | @Test(expected=IllegalArgumentException.class)
17 | public void testNull() {
18 | new PartitionKey((List) null);
19 | }
20 |
21 | @Test
22 | public void testEqualsDifferentTable() {
23 | List values1 = Lists.newArrayList("value1", "value2");
24 | List values2 = Lists.newArrayList("value1", "value2");
25 | Partition partition1 = TestObjects.getTestPartition("ns", "table1", values1);
26 | Partition partition2 = TestObjects.getTestPartition("ns", "table2", values2);
27 | PartitionKey partitionKey1 = new PartitionKey(partition1);
28 | PartitionKey partitionKey2 = new PartitionKey(partition2);
29 | assertEquals(partitionKey1, partitionKey2);
30 | assertEquals(partitionKey1.hashCode(), partitionKey2.hashCode());
31 | }
32 |
33 | @Test
34 | public void testEqualsEmptyValue() {
35 | List values = Lists.newArrayList();
36 | Partition partition1 = TestObjects.getTestPartition("ns", "table", values);
37 | Partition partition2 = TestObjects.getTestPartition("ns", "table", values);
38 | PartitionKey partitionKey1 = new PartitionKey(partition1);
39 | PartitionKey partitionKey2 = new PartitionKey(partition2);
40 | assertEquals(partitionKey1, partitionKey2);
41 | assertEquals(partitionKey1.hashCode(), partitionKey2.hashCode());
42 | }
43 |
44 | @Test
45 | public void testEqualsDifferentClass() {
46 | List values1 = Lists.newArrayList("value1", "value2");
47 | List values2 = Lists.newLinkedList(values1);
48 | Partition partition1 = TestObjects.getTestPartition("ns", "table", values1);
49 | Partition partition2 = TestObjects.getTestPartition("ns", "table", values2);
50 | PartitionKey partitionKey1 = new PartitionKey(partition1);
51 | PartitionKey partitionKey2 = new PartitionKey(partition2);
52 | assertEquals(partitionKey1, partitionKey2);
53 | assertEquals(partitionKey1.hashCode(), partitionKey2.hashCode());
54 | }
55 |
56 | @Test
57 | public void testEqualsPartitionError() {
58 | List values1 = Lists.newArrayList("value1", "value2");
59 | List values2 = Lists.newArrayList("value1", "value2");
60 | PartitionError partitionError1 = TestObjects.getPartitionError(values1, new RuntimeException("foo"));
61 | PartitionError partitionError2 = TestObjects.getPartitionError(values2, new Exception("foo2"));
62 | PartitionKey partitionKey1 = new PartitionKey(partitionError1.getPartitionValues());
63 | PartitionKey partitionKey2 = new PartitionKey(partitionError2.getPartitionValues());
64 | assertEquals(partitionKey1, partitionKey2);
65 | assertEquals(partitionKey1.hashCode(), partitionKey2.hashCode());
66 | }
67 |
68 | @Test
69 | public void testEqualsPartitionAndPartitionError() {
70 | List values1 = Lists.newArrayList("value1", "value2");
71 | List values2 = Lists.newArrayList("value1", "value2");
72 | Partition partition = TestObjects.getTestPartition("ns", "table", values1);
73 | PartitionError partitionError = TestObjects.getPartitionError(values2, new RuntimeException("foo"));
74 | PartitionKey partitionKey1 = new PartitionKey(partition);
75 | PartitionKey partitionKey2 = new PartitionKey(partitionError.getPartitionValues());
76 | assertEquals(partitionKey1, partitionKey2);
77 | assertEquals(partitionKey1.hashCode(), partitionKey2.hashCode());
78 | }
79 |
80 | @Test
81 | public void testEqualsNull() {
82 | List values = Lists.newArrayList("value1", "value2");
83 | Partition partition = TestObjects.getTestPartition("ns", "table", values);
84 | PartitionKey partitionKey = new PartitionKey(partition);
85 | assertFalse(partitionKey.equals(null));
86 | }
87 |
88 | @Test
89 | public void testGetValues() {
90 | List values = Lists.newArrayList("value1", "value2");
91 | Partition partition = TestObjects.getTestPartition("ns", "table", values);
92 | PartitionKey partitionKey1 = new PartitionKey(partition);
93 | assertEquals(Lists.newArrayList(values), partitionKey1.getValues());
94 |
95 | PartitionError partitionError = TestObjects.getPartitionError(values, new RuntimeException("foo"));
96 | PartitionKey partitionKey2 = new PartitionKey(partitionError.getPartitionValues());
97 | assertEquals(Lists.newArrayList(values), partitionKey2.getValues());
98 | }
99 |
100 | }
--------------------------------------------------------------------------------
/aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/metastore/AWSGlueMetastore.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.catalog.metastore;
2 |
3 | import com.amazonaws.services.glue.model.ColumnStatistics;
4 | import com.amazonaws.services.glue.model.ColumnStatisticsError;
5 | import com.amazonaws.services.glue.model.Database;
6 | import com.amazonaws.services.glue.model.DatabaseInput;
7 | import com.amazonaws.services.glue.model.Partition;
8 | import com.amazonaws.services.glue.model.PartitionError;
9 | import com.amazonaws.services.glue.model.PartitionInput;
10 | import com.amazonaws.services.glue.model.PartitionValueList;
11 | import com.amazonaws.services.glue.model.Table;
12 | import com.amazonaws.services.glue.model.TableInput;
13 | import com.amazonaws.services.glue.model.UserDefinedFunction;
14 | import com.amazonaws.services.glue.model.UserDefinedFunctionInput;
15 | import org.apache.hadoop.hive.metastore.api.EnvironmentContext;
16 | import org.apache.thrift.TException;
17 |
18 | import java.util.List;
19 | import java.util.Map;
20 |
21 | /**
22 | * This is the accessor interface for using AWS Glue as a metastore.
23 | * The generic AWSGlue interface{@link com.amazonaws.services.glue.AWSGlue}
24 | * has a number of methods that are irrelevant for clients using Glue only
25 | * as a metastore.
26 | * Think of this interface as a wrapper over AWSGlue. This additional layer
27 | * of abstraction achieves the following -
28 | * a) Hides the non-metastore related operations present in AWSGlue
29 | * b) Hides away the batching and pagination related limitations of AWSGlue
30 | */
31 | public interface AWSGlueMetastore {
32 |
33 | void createDatabase(DatabaseInput databaseInput);
34 |
35 | Database getDatabase(String dbName);
36 |
37 | List getAllDatabases();
38 |
39 | void updateDatabase(String databaseName, DatabaseInput databaseInput);
40 |
41 | void deleteDatabase(String dbName);
42 |
43 | void createTable(String dbName, TableInput tableInput);
44 |
45 | Table getTable(String dbName, String tableName);
46 |
47 | List getTables(String dbname, String tablePattern);
48 |
49 | void updateTable(String dbName, TableInput tableInput);
50 |
51 | void updateTable(String dbName, TableInput tableInput, EnvironmentContext environmentContext);
52 |
53 | void deleteTable(String dbName, String tableName);
54 |
55 | Partition getPartition(String dbName, String tableName, List partitionValues);
56 |
57 | List getPartitionsByNames(String dbName, String tableName,
58 | List partitionsToGet);
59 |
60 | List getPartitions(String dbName, String tableName, String expression,
61 | long max) throws TException;
62 |
63 | void updatePartition(String dbName, String tableName, List partitionValues,
64 | PartitionInput partitionInput);
65 |
66 | void deletePartition(String dbName, String tableName, List partitionValues);
67 |
68 | List createPartitions(String dbName, String tableName,
69 | List partitionInputs);
70 |
71 | void createUserDefinedFunction(String dbName, UserDefinedFunctionInput functionInput);
72 |
73 | UserDefinedFunction getUserDefinedFunction(String dbName, String functionName);
74 |
75 | List getUserDefinedFunctions(String dbName, String pattern);
76 |
77 | List getUserDefinedFunctions(String pattern);
78 |
79 | void deleteUserDefinedFunction(String dbName, String functionName);
80 |
81 | void updateUserDefinedFunction(String dbName, String functionName, UserDefinedFunctionInput functionInput);
82 |
83 | void deletePartitionColumnStatistics(String dbName, String tableName, List partitionValues, String colName);
84 |
85 | void deleteTableColumnStatistics(String dbName, String tableName, String colName);
86 |
87 | Map> getPartitionColumnStatistics(
88 | String dbName,
89 | String tableName,
90 | List partitionValues,
91 | List columnNames
92 | );
93 |
94 | List getTableColumnStatistics(
95 | String dbName,
96 | String tableName,
97 | List colNames
98 | );
99 |
100 | List updatePartitionColumnStatistics(
101 | String dbName,
102 | String tableName,
103 | List partitionValues,
104 | List columnStatistics
105 | );
106 |
107 | List updateTableColumnStatistics(
108 | String dbName,
109 | String tableName,
110 | List columnStatistics
111 | );
112 | }
--------------------------------------------------------------------------------
/aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/util/MetastoreClientUtils.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.catalog.util;
2 |
3 | import com.amazonaws.glue.catalog.metastore.GlueMetastoreClientDelegate;
4 | import com.amazonaws.glue.shims.AwsGlueHiveShims;
5 | import com.amazonaws.glue.shims.ShimsLoader;
6 | import com.google.common.collect.Maps;
7 |
8 | import org.apache.commons.lang3.StringUtils;
9 | import org.apache.hadoop.conf.Configuration;
10 | import org.apache.hadoop.fs.Path;
11 | import org.apache.hadoop.hive.metastore.Warehouse;
12 | import org.apache.hadoop.hive.metastore.api.InvalidObjectException;
13 | import org.apache.hadoop.hive.metastore.api.MetaException;
14 | import org.apache.hadoop.hive.metastore.api.Table;
15 |
16 | import java.util.Map;
17 |
18 | import static com.google.common.base.Preconditions.checkNotNull;
19 | import static org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE;
20 |
21 | public final class MetastoreClientUtils {
22 |
23 | private static final AwsGlueHiveShims hiveShims = ShimsLoader.getHiveShims();
24 |
25 | private MetastoreClientUtils() {
26 | // static util class should not be instantiated
27 | }
28 |
29 | /**
30 | * @return boolean
31 | * true -> if directory was able to be created.
32 | * false -> if directory already exists.
33 | * @throws MetaException if directory could not be created.
34 | */
35 | public static boolean makeDirs(Warehouse wh, Path path) throws MetaException {
36 | checkNotNull(wh, "Warehouse cannot be null");
37 | checkNotNull(path, "Path cannot be null");
38 |
39 | boolean madeDir = false;
40 | if (!wh.isDir(path)) {
41 | if (!hiveShims.mkdirs(wh, path)) {
42 | throw new MetaException("Unable to create path: " + path);
43 | }
44 | madeDir = true;
45 | }
46 | return madeDir;
47 | }
48 |
49 | /**
50 | * Taken from HiveMetaStore#create_table_core
51 | * https://github.com/apache/hive/blob/rel/release-2.3.0/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java#L1370-L1383
52 | */
53 | public static void validateTableObject(Table table, Configuration conf) throws InvalidObjectException {
54 | checkNotNull(table, "table cannot be null");
55 | checkNotNull(table.getSd(), "Table#StorageDescriptor cannot be null");
56 |
57 | if (!hiveShims.validateTableName(table.getTableName(), conf)) {
58 | throw new InvalidObjectException(table.getTableName() + " is not a valid object name");
59 | }
60 | String validate = hiveShims.validateTblColumns(table.getSd().getCols());
61 | if (validate != null) {
62 | throw new InvalidObjectException("Invalid column " + validate);
63 | }
64 |
65 | if (table.getPartitionKeys() != null) {
66 | validate = hiveShims.validateTblColumns(table.getPartitionKeys());
67 | if (validate != null) {
68 | throw new InvalidObjectException("Invalid partition column " + validate);
69 | }
70 | }
71 | }
72 |
73 | /**
74 | * Should be used when getting table from Glue that may have been created by
75 | * users manually or through Crawlers. Validates that table contains properties required by Hive/Spark.
76 | * @param table
77 | */
78 | public static void validateGlueTable(com.amazonaws.services.glue.model.Table table) {
79 | checkNotNull(table, "table cannot be null");
80 |
81 | for (HiveTableValidator validator : HiveTableValidator.values()) {
82 | validator.validate(table);
83 | }
84 | }
85 |
86 | public static Map deepCopyMap(Map originalMap) {
87 | Map deepCopy = Maps.newHashMap();
88 | if (originalMap == null) {
89 | return deepCopy;
90 | }
91 |
92 | for (Map.Entry entry : originalMap.entrySet()) {
93 | deepCopy.put(entry.getKey(), entry.getValue());
94 | }
95 | return deepCopy;
96 | }
97 |
98 | /**
99 | * Mimics MetaStoreUtils.isExternalTable
100 | * Additional logic: check Table#getTableType to see if isExternalTable
101 | */
102 | public static boolean isExternalTable(org.apache.hadoop.hive.metastore.api.Table table) {
103 | if (table == null) {
104 | return false;
105 | }
106 |
107 | Map params = table.getParameters();
108 | String paramsExternalStr = params == null ? null : params.get("EXTERNAL");
109 | if (paramsExternalStr != null) {
110 | return "TRUE".equalsIgnoreCase(paramsExternalStr);
111 | }
112 |
113 | return table.getTableType() != null && EXTERNAL_TABLE.name().equalsIgnoreCase(table.getTableType());
114 | }
115 |
116 | public static String getCatalogId(Configuration conf) {
117 | if (StringUtils.isNotEmpty(conf.get(GlueMetastoreClientDelegate.CATALOG_ID_CONF))) {
118 | return conf.get(GlueMetastoreClientDelegate.CATALOG_ID_CONF);
119 | }
120 | // This case defaults to using the caller's account Id as Catalog Id.
121 | return null;
122 | }
123 |
124 | }
125 |
--------------------------------------------------------------------------------
/aws-glue-datacatalog-client-common/src/test/java/com/amazonaws/glue/catalog/util/MetastoreClientUtilsTest.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.catalog.util;
2 |
3 | import com.amazonaws.glue.catalog.converters.BaseCatalogToHiveConverter;
4 |
5 | import com.amazonaws.glue.catalog.converters.CatalogToHiveConverter;
6 | import com.google.common.collect.ImmutableMap;
7 | import org.apache.hadoop.fs.Path;
8 | import org.apache.hadoop.hive.conf.HiveConf;
9 | import org.apache.hadoop.hive.metastore.Warehouse;
10 | import org.apache.hadoop.hive.metastore.api.Database;
11 | import org.apache.hadoop.hive.metastore.api.InvalidObjectException;
12 | import org.apache.hadoop.hive.metastore.api.MetaException;
13 | import org.apache.hadoop.hive.metastore.api.Table;
14 | import org.junit.Before;
15 | import org.junit.Test;
16 |
17 | import java.util.Map;
18 |
19 | import static com.amazonaws.glue.catalog.util.TestObjects.getTestDatabase;
20 | import static com.amazonaws.glue.catalog.util.TestObjects.getTestTable;
21 | import static org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE;
22 | import static org.junit.Assert.assertEquals;
23 | import static org.junit.Assert.assertFalse;
24 | import static org.junit.Assert.assertNotSame;
25 | import static org.junit.Assert.assertTrue;
26 | import static org.mockito.Mockito.mock;
27 | import static org.mockito.Mockito.when;
28 |
29 | public class MetastoreClientUtilsTest {
30 |
31 | private static final String TEST_LOCATION = "s3://mybucket/";
32 |
33 | private Warehouse wh;
34 | private HiveConf conf;
35 |
36 | private Path testPath;
37 | private Database testDb;
38 | private Table testTbl;
39 |
40 | @Before
41 | public void setup() {
42 | wh = mock(Warehouse.class);
43 | conf = new HiveConf();
44 | testPath = new Path(TEST_LOCATION);
45 | CatalogToHiveConverter catalogToHiveConverter = new BaseCatalogToHiveConverter();
46 | testDb = catalogToHiveConverter.convertDatabase(getTestDatabase());
47 | testTbl = catalogToHiveConverter.convertTable(getTestTable(), testDb.getName());
48 | }
49 |
50 | @Test(expected = NullPointerException.class)
51 | public void testMakeDirsNullWh() throws Exception {
52 | MetastoreClientUtils.makeDirs(null, testPath);
53 | }
54 |
55 | @Test(expected = NullPointerException.class)
56 | public void testMakeDirsNullPath() throws Exception {
57 | MetastoreClientUtils.makeDirs(wh, null);
58 | }
59 |
60 | @Test
61 | public void testMakeDirsAlreadyExists() throws Exception {
62 | when(wh.isDir(testPath)).thenReturn(true);
63 | assertFalse(MetastoreClientUtils.makeDirs(wh, testPath));
64 | }
65 |
66 | @Test(expected = MetaException.class)
67 | public void testMakeDirsCannotCreateDir() throws Exception {
68 | when(wh.isDir(testPath)).thenReturn(false);
69 | when(wh.mkdirs(testPath)).thenReturn(false);
70 | MetastoreClientUtils.makeDirs(wh, testPath);
71 | }
72 |
73 | @Test(expected = InvalidObjectException.class)
74 | public void testValidateTableObjectInvalidName() throws Exception {
75 | testTbl.setTableName("!");
76 | MetastoreClientUtils.validateTableObject(testTbl, conf);
77 | }
78 |
79 | @Test(expected = InvalidObjectException.class)
80 | public void testValidateTableObjectInvalidColumnName() throws Exception {
81 | testTbl.getSd().getCols().get(0).setType("invalidtype");
82 | MetastoreClientUtils.validateTableObject(testTbl, conf);
83 | }
84 |
85 | @Test(expected = InvalidObjectException.class)
86 | public void testValidateTableObjectInvalidPartitionKeys() throws Exception {
87 | testTbl.getPartitionKeys().get(0).setType("invalidtype");
88 | MetastoreClientUtils.validateTableObject(testTbl, conf);
89 | }
90 |
91 | @Test
92 | public void testDeepCopy() throws Exception {
93 | Map orig = ImmutableMap.of("key", "val");
94 | Map deepCopy = MetastoreClientUtils.deepCopyMap(orig);
95 | assertNotSame(deepCopy, orig);
96 | assertEquals(deepCopy, orig);
97 | }
98 |
99 | @Test
100 | public void testIsExternalTableFalse() {
101 | assertFalse(MetastoreClientUtils.isExternalTable(testTbl));
102 | }
103 |
104 | @Test
105 | public void testIsExternalTableParamTrue() {
106 | testTbl.getParameters().put("EXTERNAL", "true");
107 | assertTrue(MetastoreClientUtils.isExternalTable(testTbl));
108 | }
109 |
110 | @Test
111 | public void testIsExternalTableTableTypeTrue() {
112 | testTbl.setTableType(EXTERNAL_TABLE.name());
113 | testTbl.setParameters(null);
114 | assertTrue(MetastoreClientUtils.isExternalTable(testTbl));
115 | }
116 |
117 | @Test
118 | public void testIsExternalTableParamPriority() {
119 | // parameters has higher priority when there is conflict
120 | testTbl.getParameters().put("EXTERNAL", "false");
121 | testTbl.setTableType(EXTERNAL_TABLE.name());
122 | assertFalse(MetastoreClientUtils.isExternalTable(testTbl));
123 | }
124 |
125 | @Test
126 | public void testIsExternalTableNull() {
127 | assertFalse(MetastoreClientUtils.isExternalTable(null));
128 | }
129 | }
130 |
--------------------------------------------------------------------------------
/aws-glue-datacatalog-spark-client/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | com.amazonaws.glue
7 | aws-glue-datacatalog-hive-client
8 | 3.4.0-SNAPSHOT
9 |
10 | 4.0.0
11 | AWSGlueDataCatalogSparkClient
12 |
13 | aws-glue-datacatalog-spark-client
14 |
15 |
16 | org.apache.hive
17 | hive-metastore
18 | ${spark-hive.version}
19 | provided
20 |
21 |
22 | org.apache.hive
23 | hive-exec
24 | ${spark-hive.version}
25 | provided
26 |
27 |
28 | com.amazonaws.glue
29 | aws-glue-datacatalog-client-common
30 | ${project.version}
31 |
32 |
33 | org.apache.hadoop
34 | hadoop-common
35 | ${hadoop.version}
36 | provided
37 |
38 |
39 | org.apache.httpcomponents
40 | httpclient
41 | ${httpclient.version}
42 |
43 |
44 |
45 | com.amazonaws.glue
46 | aws-glue-datacatalog-client-common
47 | ${project.version}
48 | test
49 | tests
50 |
51 |
52 | junit
53 | junit
54 | ${junit.version}
55 | test
56 |
57 |
58 | org.hamcrest
59 | hamcrest-library
60 | ${hamcrest.version}
61 | test
62 |
63 |
64 | org.mockito
65 | mockito-core
66 | ${mockito.version}
67 | test
68 |
69 |
70 |
71 |
72 |
73 | org.apache.maven.plugins
74 | maven-jar-plugin
75 | 2.2
76 |
77 |
78 |
79 | test-jar
80 |
81 |
82 |
83 |
84 |
85 | **/SparkCatalogMetastoreClientTest.*
86 |
87 |
88 |
89 |
90 | org.apache.maven.plugins
91 | maven-shade-plugin
92 | 3.0.0
93 |
94 | false
95 |
96 |
97 |
98 | package
99 |
100 | shade
101 |
102 |
103 |
104 |
105 | com.amazonaws.glue:aws-glue-datacatalog-client-common
106 | com.amazonaws.glue:shims-common
107 | com.amazonaws.glue:hive3-shims
108 | com.amazonaws.glue:shims-loader
109 | com.amazonaws.glue:spark-hive-shims
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
3 | 4.0.0
4 |
5 | com.amazonaws.glue
6 | aws-glue-datacatalog-hive-client
7 | 3.4.0-SNAPSHOT
8 |
9 | aws-glue-datacatalog-client-common
10 | aws-glue-datacatalog-spark-client
11 | aws-glue-datacatalog-hive3-client
12 | shims
13 |
14 | pom
15 |
16 | AWSGlueDataCatalogHiveClient
17 | http://maven.apache.org
18 |
19 |
20 | UTF-8
21 | 28.2-jre
22 | 3.1.3
23 | 2.3.10-SNAPSHOT
24 | 1.12.31
25 | 4.11
26 | 1.10.19
27 | 1.7.1
28 | 2.15
29 | 1.6.4
30 | 3.1.0
31 | 2.9
32 | 1.3
33 | 4.5.3
34 | ${basedir}/dev-support/check_style.xml
35 |
36 |
37 |
38 |
39 |
40 |
41 | org.apache.maven.plugins
42 | maven-compiler-plugin
43 | 2.3.1
44 |
45 | 1.8
46 | 1.8
47 | UTF-8
48 |
49 |
50 |
51 | org.apache.maven.plugins
52 | maven-surefire-plugin
53 | 2.5
54 |
55 |
56 | **/*Test.java
57 |
58 |
59 | **/TestObjects.java
60 | **/*IntegrationTest.java
61 |
62 |
63 |
64 |
65 | org.apache.maven.plugins
66 | maven-eclipse-plugin
67 | ${maven.eclipse.plugin.version}
68 |
69 | true
70 | true
71 | Hive
72 | ${basedir}/dev-support/eclipse-styles.xml
73 |
74 |
75 |
76 | org.apache.maven.plugins
77 | maven-checkstyle-plugin
78 | 2.9.1
79 |
80 |
81 | verify
82 | verify
83 |
84 | UTF-8
85 | true
86 | false
87 | false
88 |
89 |
90 | check
91 |
92 |
93 |
94 |
95 |
96 |
97 | org.apache.maven.plugins
98 | maven-assembly-plugin
99 | 2.6
100 |
101 |
102 |
103 | jar-with-dependencies
104 |
105 |
106 |
107 |
108 | make-assembly
109 |
110 | package
111 |
112 | single
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 | org.apache.maven.plugins
125 | maven-project-info-reports-plugin
126 | 2.7
127 |
128 | false
129 |
130 |
131 |
132 |
133 |
134 |
135 |
--------------------------------------------------------------------------------
/aws-glue-datacatalog-spark-client/src/test/java/com/amazonaws/glue/catalog/metastore/integrationtest/MultipleCatalogTableIntegrationTest.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.catalog.metastore.integrationtest;
2 |
3 | import com.amazonaws.glue.catalog.converters.BaseCatalogToHiveConverter;
4 | import com.amazonaws.glue.catalog.converters.CatalogToHiveConverter;
5 | import com.amazonaws.services.glue.model.DeleteDatabaseRequest;
6 | import com.amazonaws.services.glue.model.EntityNotFoundException;
7 | import com.amazonaws.services.glue.model.GetTableRequest;
8 | import com.google.common.collect.ImmutableMap;
9 | import com.google.common.collect.ImmutableSet;
10 | import org.apache.hadoop.hive.metastore.api.Database;
11 | import org.apache.hadoop.hive.metastore.api.MetaException;
12 | import org.apache.hadoop.hive.metastore.api.Table;
13 | import org.apache.thrift.TException;
14 | import org.junit.After;
15 | import org.junit.Before;
16 | import org.junit.Rule;
17 | import org.junit.Test;
18 | import org.junit.rules.ExpectedException;
19 |
20 | import java.util.HashSet;
21 | import java.util.Map;
22 | import java.util.Set;
23 |
24 | import static com.amazonaws.glue.catalog.util.TestObjects.getTestDatabase;
25 | import static com.amazonaws.glue.catalog.util.TestObjects.getTestTable;
26 | import static org.junit.Assert.assertEquals;
27 |
28 | public class MultipleCatalogTableIntegrationTest extends MultipleCatalogIntegrationTestBase {
29 | private Database databaseInAnotherCatalog;
30 | private Database database;
31 | private Table table;
32 | private CatalogToHiveConverter catalogToHiveConverter = new BaseCatalogToHiveConverter();
33 |
34 | @Rule
35 | public ExpectedException expectedException = ExpectedException.none();
36 |
37 | @Before
38 | public void setup() throws MetaException, TException {
39 | super.setup();
40 |
41 | databaseInAnotherCatalog = catalogToHiveConverter.convertDatabase(getTestDatabase());
42 | database = databaseInAnotherCatalog.deepCopy();
43 | database.setName(constructDbName(databaseInAnotherCatalog.getName()));
44 | metastoreClient.createDatabase(database);
45 |
46 | table = catalogToHiveConverter.convertTable(getTestTable(), database.getName());
47 | metastoreClient.createTable(table);
48 | }
49 |
50 | @After
51 | public void clean() {
52 | glueClient.deleteDatabase(new DeleteDatabaseRequest().withName(database.getName()));
53 | }
54 |
55 | @Test
56 | public void getTable() throws TException {
57 | Table createdTable = metastoreClient.getTable(database.getName(), table.getTableName());
58 | //time is updated on service side and can be different from what we sent
59 | table.setCreateTime(createdTable.getCreateTime());
60 | table.setLastAccessTime(createdTable.getLastAccessTime());
61 | assertEquals(table, createdTable);
62 |
63 | createdTable = catalogToHiveConverter.convertTable(
64 | glueClient.getTable(new GetTableRequest()
65 | .withCatalogId(anotherCatalogId)
66 | .withDatabaseName(databaseInAnotherCatalog.getName())
67 | .withName(table.getTableName())
68 | ).getTable(), database.getName());
69 | assertEquals(table, createdTable);
70 | }
71 |
72 | @Test
73 | public void getAllTables() throws TException {
74 | Table table2 = catalogToHiveConverter.convertTable(getTestTable(), database.getName());
75 | metastoreClient.createTable(table2);
76 |
77 | Table table3 = catalogToHiveConverter.convertTable(getTestTable(), database.getName());
78 | metastoreClient.createTable(table3);
79 |
80 | Set tableNames = new HashSet<>(metastoreClient.getAllTables(database.getName()));
81 | Set expectedNames = ImmutableSet.of(table.getTableName(), table2.getTableName(), table3.getTableName());
82 | assertEquals(expectedNames, tableNames);
83 | }
84 |
85 | @Test
86 | public void alterTable() throws TException {
87 | Map newParameters = ImmutableMap.of("param1", "newVal1");
88 | table.setParameters(newParameters);
89 | metastoreClient.alter_table(database.getName(), table.getTableName(), table);
90 |
91 | Table alteredTable = catalogToHiveConverter.convertTable(
92 | glueClient.getTable(new GetTableRequest()
93 | .withCatalogId(anotherCatalogId)
94 | .withDatabaseName(databaseInAnotherCatalog.getName())
95 | .withName(table.getTableName())
96 | ).getTable(), database.getName());
97 | //time is updated on service side and can be different from what we sent
98 | table.setCreateTime(alteredTable.getCreateTime());
99 | table.setLastAccessTime(alteredTable.getLastAccessTime());
100 | assertEquals(table, alteredTable);
101 | }
102 |
103 | @Test
104 | public void dropTable() throws TException {
105 | metastoreClient.dropTable(database.getName(), table.getTableName());
106 |
107 | expectedException.expect(EntityNotFoundException.class);
108 | glueClient.getTable(new GetTableRequest()
109 | .withCatalogId(anotherCatalogId)
110 | .withDatabaseName(databaseInAnotherCatalog.getName())
111 | .withName(table.getTableName())
112 | );
113 | }
114 | }
115 |
--------------------------------------------------------------------------------
/aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/util/BatchDeletePartitionsHelper.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.catalog.util;
2 |
3 | import com.amazonaws.glue.catalog.converters.BaseCatalogToHiveConverter;
4 | import com.amazonaws.glue.catalog.converters.CatalogToHiveConverter;
5 | import com.amazonaws.glue.catalog.converters.CatalogToHiveConverterFactory;
6 | import com.amazonaws.services.glue.model.BatchDeletePartitionRequest;
7 | import com.amazonaws.services.glue.model.BatchDeletePartitionResult;
8 | import com.amazonaws.services.glue.model.EntityNotFoundException;
9 | import com.amazonaws.services.glue.model.ErrorDetail;
10 | import com.amazonaws.services.glue.model.GetPartitionRequest;
11 | import com.amazonaws.services.glue.model.GetPartitionResult;
12 | import com.amazonaws.services.glue.model.Partition;
13 | import com.amazonaws.services.glue.model.PartitionError;
14 | import com.amazonaws.services.glue.AWSGlue;
15 |
16 | import org.apache.log4j.Logger;
17 | import org.apache.thrift.TException;
18 |
19 | import java.util.Collection;
20 | import java.util.List;
21 | import java.util.Map;
22 |
23 | public final class BatchDeletePartitionsHelper {
24 |
25 | private static final Logger logger = Logger.getLogger(BatchDeletePartitionsHelper.class);
26 |
27 | private final AWSGlue client;
28 | private final String namespaceName;
29 | private final String tableName;
30 | private final String catalogId;
31 | private final List partitions;
32 | private Map partitionMap;
33 | private TException firstTException;
34 | private CatalogToHiveConverter catalogToHiveConverter;
35 |
36 | public BatchDeletePartitionsHelper(AWSGlue client, String namespaceName, String tableName,
37 | String catalogId, List partitions) {
38 | this.client = client;
39 | this.namespaceName = namespaceName;
40 | this.tableName = tableName;
41 | this.catalogId = catalogId;
42 | this.partitions = partitions;
43 | catalogToHiveConverter = CatalogToHiveConverterFactory.getCatalogToHiveConverter();
44 | }
45 |
46 | public BatchDeletePartitionsHelper deletePartitions() {
47 | partitionMap = PartitionUtils.buildPartitionMap(partitions);
48 |
49 | BatchDeletePartitionRequest request = new BatchDeletePartitionRequest().withDatabaseName(namespaceName)
50 | .withTableName(tableName).withCatalogId(catalogId)
51 | .withPartitionsToDelete(PartitionUtils.getPartitionValuesList(partitionMap));
52 |
53 | try {
54 | BatchDeletePartitionResult result = client.batchDeletePartition(request);
55 | processResult(result);
56 | } catch (Exception e) {
57 | logger.error("Exception thrown while deleting partitions in DataCatalog: ", e);
58 | firstTException = catalogToHiveConverter.wrapInHiveException(e);
59 | if (PartitionUtils.isInvalidUserInputException(e)) {
60 | setAllFailed();
61 | } else {
62 | checkIfPartitionsDeleted();
63 | }
64 | }
65 | return this;
66 | }
67 |
68 | private void setAllFailed() {
69 | partitionMap.clear();
70 | }
71 |
72 | private void processResult(final BatchDeletePartitionResult batchDeletePartitionsResult) {
73 | List partitionErrors = batchDeletePartitionsResult.getErrors();
74 | if (partitionErrors == null || partitionErrors.isEmpty()) {
75 | return;
76 | }
77 |
78 | logger.error(String.format("BatchDeletePartitions failed to delete %d out of %d partitions. \n",
79 | partitionErrors.size(), partitionMap.size()));
80 |
81 | for (PartitionError partitionError : partitionErrors) {
82 | partitionMap.remove(new PartitionKey(partitionError.getPartitionValues()));
83 | ErrorDetail errorDetail = partitionError.getErrorDetail();
84 | logger.error(errorDetail.toString());
85 | if (firstTException == null) {
86 | firstTException = catalogToHiveConverter.errorDetailToHiveException(errorDetail);
87 | }
88 | }
89 | }
90 |
91 | private void checkIfPartitionsDeleted() {
92 | for (Partition partition : partitions) {
93 | if (!partitionDeleted(partition)) {
94 | partitionMap.remove(new PartitionKey(partition));
95 | }
96 | }
97 | }
98 |
99 | private boolean partitionDeleted(Partition partition) {
100 | GetPartitionRequest request = new GetPartitionRequest()
101 | .withDatabaseName(partition.getDatabaseName())
102 | .withTableName(partition.getTableName())
103 | .withPartitionValues(partition.getValues())
104 | .withCatalogId(catalogId);
105 |
106 | try {
107 | GetPartitionResult result = client.getPartition(request);
108 | Partition partitionReturned = result.getPartition();
109 | return partitionReturned == null; //probably always false
110 | } catch (EntityNotFoundException e) {
111 | // here we assume namespace and table exist. It is assured by calling "isInvalidUserInputException" method above
112 | return true;
113 | } catch (Exception e) {
114 | logger.error(String.format("Get partition request %s failed. ", request.toString()), e);
115 | // Partition status unknown, we assume that the partition was not deleted
116 | return false;
117 | }
118 | }
119 |
120 | public TException getFirstTException() {
121 | return firstTException;
122 | }
123 |
124 | public Collection getPartitionsDeleted() {
125 | return partitionMap.values();
126 | }
127 |
128 | }
129 |
--------------------------------------------------------------------------------
/aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/util/BatchCreatePartitionsHelper.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.catalog.util;
2 |
3 | import com.amazonaws.glue.catalog.metastore.AWSGlueMetastore;
4 | import com.amazonaws.glue.catalog.converters.CatalogToHiveConverter;
5 | import com.amazonaws.glue.catalog.converters.CatalogToHiveConverterFactory;
6 | import com.amazonaws.glue.catalog.converters.GlueInputConverter;
7 | import com.amazonaws.services.glue.model.EntityNotFoundException;
8 | import com.amazonaws.services.glue.model.Partition;
9 | import com.amazonaws.services.glue.model.PartitionError;
10 | import com.google.common.collect.Lists;
11 |
12 | import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
13 | import org.apache.log4j.Logger;
14 | import org.apache.thrift.TException;
15 | import org.apache.commons.lang3.StringUtils;
16 |
17 | import java.util.Collection;
18 | import java.util.List;
19 | import java.util.Map;
20 |
21 | import static com.amazonaws.glue.catalog.util.PartitionUtils.isInvalidUserInputException;
22 |
23 | public final class BatchCreatePartitionsHelper {
24 |
25 | private static final Logger logger = Logger.getLogger(BatchCreatePartitionsHelper.class);
26 |
27 | private final AWSGlueMetastore glueClient;
28 | private final String databaseName;
29 | private final String tableName;
30 | private final List partitions;
31 | private final boolean ifNotExists;
32 | private Map partitionMap;
33 | private List partitionsFailed;
34 | private TException firstTException;
35 | private String catalogId;
36 | private CatalogToHiveConverter catalogToHiveConverter;
37 |
38 | public BatchCreatePartitionsHelper(AWSGlueMetastore glueClient, String databaseName, String tableName, String catalogId,
39 | List partitions, boolean ifNotExists) {
40 | this.glueClient = glueClient;
41 | this.databaseName = databaseName;
42 | this.tableName = tableName;
43 | this.catalogId = catalogId;
44 | this.partitions = partitions;
45 | this.ifNotExists = ifNotExists;
46 | catalogToHiveConverter = CatalogToHiveConverterFactory.getCatalogToHiveConverter();
47 | }
48 |
49 | public BatchCreatePartitionsHelper createPartitions() {
50 | partitionMap = PartitionUtils.buildPartitionMap(partitions);
51 | partitionsFailed = Lists.newArrayList();
52 |
53 | try {
54 | List result =
55 | glueClient.createPartitions(databaseName, tableName,
56 | GlueInputConverter.convertToPartitionInputs(partitionMap.values()));
57 | processResult(result);
58 | } catch (Exception e) {
59 | logger.error("Exception thrown while creating partitions in DataCatalog: ", e);
60 | firstTException = catalogToHiveConverter.wrapInHiveException(e);
61 | if (isInvalidUserInputException(e)) {
62 | setAllFailed();
63 | } else {
64 | checkIfPartitionsCreated();
65 | }
66 | }
67 | return this;
68 | }
69 |
70 | private void setAllFailed() {
71 | partitionsFailed = partitions;
72 | partitionMap.clear();
73 | }
74 |
75 | private void processResult(List partitionErrors) {
76 | if (partitionErrors == null || partitionErrors.isEmpty()) {
77 | return;
78 | }
79 |
80 | logger.error(String.format("BatchCreatePartitions failed to create %d out of %d partitions. \n",
81 | partitionErrors.size(), partitionMap.size()));
82 |
83 | for (PartitionError partitionError : partitionErrors) {
84 | Partition partitionFailed = partitionMap.remove(new PartitionKey(partitionError.getPartitionValues()));
85 |
86 | TException exception = catalogToHiveConverter.errorDetailToHiveException(partitionError.getErrorDetail());
87 | if (ifNotExists && exception instanceof AlreadyExistsException) {
88 | // AlreadyExistsException is allowed, so we shouldn't add the partition to partitionsFailed list
89 | continue;
90 | }
91 | logger.error(exception);
92 | if (firstTException == null) {
93 | firstTException = exception;
94 | }
95 | partitionsFailed.add(partitionFailed);
96 | }
97 | }
98 |
99 | private void checkIfPartitionsCreated() {
100 | for (Partition partition : partitions) {
101 | if (!partitionExists(partition)) {
102 | partitionsFailed.add(partition);
103 | partitionMap.remove(new PartitionKey(partition));
104 | }
105 | }
106 | }
107 |
108 | private boolean partitionExists(Partition partition) {
109 | try {
110 | Partition partitionReturned = glueClient.getPartition(databaseName, tableName, partition.getValues());
111 | return partitionReturned != null; //probably always true here
112 | } catch (EntityNotFoundException e) {
113 | // here we assume namespace and table exist. It is assured by calling "isInvalidUserInputException" method above
114 | return false;
115 | } catch (Exception e) {
116 | logger.error(String.format("Get partition request %s failed. ", StringUtils.join(partition.getValues(), "/")), e);
117 | // partition status unknown, we assume that the partition was not created
118 | return false;
119 | }
120 | }
121 |
122 | public TException getFirstTException() {
123 | return firstTException;
124 | }
125 |
126 | public Collection getPartitionsCreated() {
127 | return partitionMap.values();
128 | }
129 |
130 | public List getPartitionsFailed() {
131 | return partitionsFailed;
132 | }
133 |
134 | }
--------------------------------------------------------------------------------
/aws-glue-datacatalog-spark-client/src/test/java/com/amazonaws/glue/catalog/metastore/integrationtest/MultipleCatalogUserDefinedFunctionIntegrationTest.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.catalog.metastore.integrationtest;
2 |
3 | import com.amazonaws.glue.catalog.converters.BaseCatalogToHiveConverter;
4 | import com.amazonaws.glue.catalog.converters.CatalogToHiveConverter;
5 | import com.amazonaws.services.glue.model.DeleteDatabaseRequest;
6 | import com.amazonaws.services.glue.model.EntityNotFoundException;
7 | import com.amazonaws.services.glue.model.GetUserDefinedFunctionRequest;
8 | import org.apache.hadoop.hive.metastore.api.Database;
9 | import org.apache.hadoop.hive.metastore.api.Function;
10 | import org.apache.hadoop.hive.metastore.api.MetaException;
11 | import org.apache.thrift.TException;
12 | import org.junit.After;
13 | import org.junit.Before;
14 | import org.junit.Rule;
15 | import org.junit.Test;
16 | import org.junit.rules.ExpectedException;
17 |
18 | import java.util.HashSet;
19 | import java.util.Set;
20 |
21 | import static com.amazonaws.glue.catalog.util.TestObjects.getTestDatabase;
22 | import static com.amazonaws.glue.catalog.util.TestObjects.getCatalogTestFunction;
23 | import static org.junit.Assert.assertEquals;
24 | import static org.junit.Assert.assertTrue;
25 |
26 | public class MultipleCatalogUserDefinedFunctionIntegrationTest extends MultipleCatalogIntegrationTestBase {
27 | private Database databaseInAnotherCatalog;
28 | private Database database;
29 | private Function function1;
30 | private Function function2;
31 | private Function function3;
32 | private CatalogToHiveConverter catalogToHiveConverter = new BaseCatalogToHiveConverter();
33 |
34 | @Rule
35 | public ExpectedException expectedException = ExpectedException.none();
36 |
37 | @Before
38 | public void setup() throws MetaException, TException {
39 | super.setup();
40 |
41 | databaseInAnotherCatalog = catalogToHiveConverter.convertDatabase(getTestDatabase());
42 | database = databaseInAnotherCatalog.deepCopy();
43 | database.setName(constructDbName(databaseInAnotherCatalog.getName()));
44 | metastoreClient.createDatabase(database);
45 |
46 | function1 = catalogToHiveConverter.convertFunction(database.getName(), getCatalogTestFunction());
47 | metastoreClient.createFunction(function1);
48 |
49 | function2 = catalogToHiveConverter.convertFunction(database.getName(), getCatalogTestFunction());
50 | metastoreClient.createFunction(function2);
51 |
52 | function3 = catalogToHiveConverter.convertFunction(database.getName(), getCatalogTestFunction());
53 | metastoreClient.createFunction(function3);
54 | }
55 |
56 | @After
57 | public void clean() {
58 | glueClient.deleteDatabase(new DeleteDatabaseRequest().withName(database.getName()));
59 | }
60 |
61 | @Test
62 | public void getFunction() throws TException {
63 | Function createdFunction = metastoreClient.getFunction(database.getName(), function1.getFunctionName());
64 | function1.setCreateTime(createdFunction.getCreateTime()); //it can be changed during creation process
65 | assertEquals(function1, createdFunction);
66 |
67 | createdFunction = catalogToHiveConverter.convertFunction(database.getName(),
68 | glueClient.getUserDefinedFunction(new GetUserDefinedFunctionRequest()
69 | .withCatalogId(anotherCatalogId)
70 | .withDatabaseName(databaseInAnotherCatalog.getName())
71 | .withFunctionName(function1.getFunctionName())
72 | ).getUserDefinedFunction()
73 | );
74 | assertEquals(function1, createdFunction);
75 | }
76 |
77 | @Test
78 | public void getFunctions() throws TException {
79 | Set functionNames = new HashSet<>(metastoreClient.getFunctions(database.getName(), ".*"));
80 | assertEquals(3, functionNames.size());
81 | assertTrue(functionNames.contains(function1.getFunctionName()));
82 | assertTrue(functionNames.contains(function2.getFunctionName()));
83 | assertTrue(functionNames.contains(function3.getFunctionName()));
84 | }
85 |
86 | @Test
87 | public void updateFunction() throws TException {
88 | Function newFunction = function1.deepCopy();
89 | String newClassName = "newClassName";
90 | newFunction.setClassName(newClassName);
91 | metastoreClient.alterFunction(database.getName(), function1.getFunctionName(), newFunction);
92 |
93 | Function alteredFunction = catalogToHiveConverter.convertFunction(database.getName(),
94 | glueClient.getUserDefinedFunction(new GetUserDefinedFunctionRequest()
95 | .withCatalogId(anotherCatalogId)
96 | .withDatabaseName(databaseInAnotherCatalog.getName())
97 | .withFunctionName(newFunction.getFunctionName())
98 | ).getUserDefinedFunction()
99 | );
100 | alteredFunction.setCreateTime(newFunction.getCreateTime());
101 | assertEquals(newFunction, alteredFunction);
102 | }
103 |
104 | @Test
105 | public void dropFunction() throws TException {
106 | metastoreClient.dropFunction(database.getName(), function1.getFunctionName());
107 |
108 | expectedException.expect(EntityNotFoundException.class);
109 | glueClient.getUserDefinedFunction(new GetUserDefinedFunctionRequest()
110 | .withCatalogId(anotherCatalogId)
111 | .withDatabaseName(databaseInAnotherCatalog.getName())
112 | .withFunctionName(function1.getFunctionName())
113 | );
114 | }
115 | }
116 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## AWS Glue Data Catalog Client for Apache Hive Metastore
2 | The AWS Glue Data Catalog is a fully managed, Apache Hive Metastore compatible, metadata repository. Customers can use the Data Catalog as a central repository to store structural and operational metadata for their data.
3 |
4 | AWS Glue provides out-of-box integration with Amazon EMR that enables customers to use the AWS Glue Data Catalog as an external Hive Metastore. To learn more, visit our [documentation](https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-hive-metastore-glue.html).
5 |
6 | This is an open-source implementation of the Apache Hive Metastore client on Amazon EMR clusters that uses the AWS Glue Data Catalog as an external Hive Metastore. It serves as a reference implementation for building a Hive Metastore-compatible client that connects to the AWS Glue Data Catalog. It may be ported to other Hive Metastore-compatible platforms such as other Hadoop and Apache Spark distributions.
7 |
8 | This package is compatible with Spark 3 and Hive 3.
9 |
10 | **Note**: in order for this client implementation to be used with Apache Hive, a patch included in this [JIRA](https://issues.apache.org/jira/browse/HIVE-12679) must be applied to it. All versions of Apache Hive running on Amazon EMR that support the AWS Glue Data Catalog as the metastore already include this patch. Please follow **all steps listed below in the following order**.
11 |
12 | ## Patching Apache Hive and Installing It Locally
13 |
14 | Obtain a copy of Hive from GitHub at https://github.com/apache/hive.
15 |
16 | git clone https://github.com/apache/hive.git
17 |
18 | To build the Hive client, you need to first apply this [patch](https://github.com/awslabs/aws-glue-data-catalog-client-for-apache-hive-metastore/blob/branch-3.4.0/branch_3.1.patch). Download this patch and move it to your local Hive git repository you created above. This patch is included in the repository. Apply the patch and build Hive.
19 |
20 | cd
21 | git checkout branch-3.1
22 | git apply -3 branch_3.1.patch
23 | mvn clean install -DskipTests
24 |
25 | As Spark uses a fork of Hive based off the 2.3 branch, in order to build the Spark client, you need Hive 2.3 built with this [patch](https://issues.apache.org/jira/secure/attachment/12958418/HIVE-12679.branch-2.3.patch).
26 |
27 | If building off the previous Hive repo, please reset those changes:
28 |
29 | git add .
30 | git reset --hard
31 |
32 | Continue with patching the 2.3 branch:
33 |
34 | cd
35 | git checkout branch-2.3
36 | patch -p0
63 | hive.metastore.client.factory.class
64 | com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory
65 |
66 |
67 | ## Configuring Spark to Use the Spark Client
68 |
69 | Similarly, for Spark, you need to install the client jar in Spark's CLASSPATH and create or update Spark's own hive-site.xml to add the above property. On Amazon EMR, this is set in /usr/lib/spark/conf/hive-site.xml. You can also find the location of the Spark client jar in /usr/lib/spark/conf/spark-defaults.conf.
70 |
71 | ## Enabling client side caching for catalog
72 |
73 | Currently, we provide support for caching:
74 |
75 | a) Table metadata - Response from Glue's GetTable operation (https://docs.aws.amazon.com/glue/latest/webapi/API_GetTable.html#API_GetTable_ResponseSyntax)
76 | b) Database metadata - Response from Glue's GetDatabase operation (https://docs.aws.amazon.com/glue/latest/webapi/API_GetDatabase.html#API_GetDatabase_ResponseSyntax)
77 |
78 | Both these entities have dedicated caches for themselves and can be enabled/tuned individually.
79 |
80 | To enable/tune Table cache, use the following properties in your hive/spark configuration file:
81 |
82 |
83 | aws.glue.cache.table.enable
84 | true
85 |
86 |
87 | aws.glue.cache.table.size
88 | 1000
89 |
90 |
91 | aws.glue.cache.table.ttl-mins
92 | 30
93 |
94 |
95 | To enable/tune Database cache:
96 |
97 |
98 | aws.glue.cache.db.enable
99 | true
100 |
101 |
102 | aws.glue.cache.db.size
103 | 1000
104 |
105 |
106 | aws.glue.cache.db.ttl-mins
107 | 30
108 |
109 |
110 | NOTE: The caching logic is disabled by default.
111 |
112 | ## License
113 |
114 | This library is licensed under the Apache 2.0 License.
--------------------------------------------------------------------------------
/aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/metastore/AWSGlueClientFactory.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.catalog.metastore;
2 |
3 | import com.amazonaws.ClientConfiguration;
4 | import com.amazonaws.auth.AWSCredentialsProvider;
5 | import com.amazonaws.client.builder.AwsClientBuilder;
6 | import com.amazonaws.regions.Region;
7 | import com.amazonaws.regions.Regions;
8 | import com.amazonaws.services.glue.AWSGlue;
9 | import com.amazonaws.services.glue.AWSGlueClientBuilder;
10 | import com.google.common.annotations.VisibleForTesting;
11 | import com.google.common.base.Preconditions;
12 | import com.google.common.base.Strings;
13 | import org.apache.commons.lang3.StringUtils;
14 | import org.apache.hadoop.conf.Configuration;
15 | import org.apache.hadoop.hive.metastore.api.MetaException;
16 | import org.apache.hadoop.security.UserGroupInformation;
17 | import org.apache.hadoop.util.ReflectionUtils;
18 | import org.apache.log4j.Logger;
19 |
20 | import java.io.IOException;
21 |
22 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_CATALOG_CREDENTIALS_PROVIDER_FACTORY_CLASS;
23 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_CATALOG_SEPARATOR;
24 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_CONNECTION_TIMEOUT;
25 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_ENDPOINT;
26 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_MAX_CONNECTIONS;
27 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_MAX_RETRY;
28 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_SOCKET_TIMEOUT;
29 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_REGION;
30 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.DEFAULT_CONNECTION_TIMEOUT;
31 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.DEFAULT_MAX_CONNECTIONS;
32 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.DEFAULT_MAX_RETRY;
33 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.DEFAULT_SOCKET_TIMEOUT;
34 |
35 | public final class AWSGlueClientFactory implements GlueClientFactory {
36 |
37 | private static final Logger logger = Logger.getLogger(AWSGlueClientFactory.class);
38 |
39 | private final Configuration conf;
40 |
41 | public AWSGlueClientFactory(Configuration conf) {
42 | Preconditions.checkNotNull(conf, "Configuration cannot be null");
43 | this.conf = conf;
44 | }
45 |
46 | @Override
47 | public AWSGlue newClient() throws MetaException {
48 | try {
49 | AWSGlueClientBuilder glueClientBuilder = AWSGlueClientBuilder.standard()
50 | .withCredentials(getAWSCredentialsProvider(conf));
51 |
52 | String regionStr = getProperty(AWS_REGION, conf);
53 | String glueEndpoint = getProperty(AWS_GLUE_ENDPOINT, conf);
54 |
55 | // ClientBuilder only allows one of EndpointConfiguration or Region to be set
56 | if (StringUtils.isNotBlank(glueEndpoint)) {
57 | logger.info("Setting glue service endpoint to " + glueEndpoint);
58 | glueClientBuilder.setEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(glueEndpoint, null));
59 | } else if (StringUtils.isNotBlank(regionStr)) {
60 | logger.info("Setting region to : " + regionStr);
61 | glueClientBuilder.setRegion(regionStr);
62 | } else {
63 | Region currentRegion = Regions.getCurrentRegion();
64 | if (currentRegion != null) {
65 | logger.info("Using region from ec2 metadata : " + currentRegion.getName());
66 | glueClientBuilder.setRegion(currentRegion.getName());
67 | } else {
68 | logger.info("No region info found, using SDK default region: us-east-1");
69 | }
70 | }
71 |
72 | glueClientBuilder.setClientConfiguration(buildClientConfiguration(conf));
73 | return decorateGlueClient(glueClientBuilder.build());
74 | } catch (Exception e) {
75 | String message = "Unable to build AWSGlueClient: " + e;
76 | logger.error(message);
77 | throw new MetaException(message);
78 | }
79 | }
80 |
81 | private AWSGlue decorateGlueClient(AWSGlue originalGlueClient) {
82 | if (Strings.isNullOrEmpty(getProperty(AWS_GLUE_CATALOG_SEPARATOR, conf))) {
83 | return originalGlueClient;
84 | }
85 | return new AWSGlueMultipleCatalogDecorator(
86 | originalGlueClient,
87 | getProperty(AWS_GLUE_CATALOG_SEPARATOR, conf));
88 | }
89 |
90 | @VisibleForTesting
91 | AWSCredentialsProvider getAWSCredentialsProvider(Configuration conf) {
92 |
93 | Class extends AWSCredentialsProviderFactory> providerFactoryClass = conf
94 | .getClass(AWS_CATALOG_CREDENTIALS_PROVIDER_FACTORY_CLASS,
95 | DefaultAWSCredentialsProviderFactory.class).asSubclass(
96 | AWSCredentialsProviderFactory.class);
97 | AWSCredentialsProviderFactory provider = ReflectionUtils.newInstance(
98 | providerFactoryClass, conf);
99 | return provider.buildAWSCredentialsProvider(conf);
100 | }
101 |
102 | private String createUserAgent() {
103 | try {
104 | String ugi = UserGroupInformation.getCurrentUser().getUserName();
105 | return "ugi=" + ugi;
106 | } catch (IOException e) {
107 | /*
108 | * IOException here means that the login failed according
109 | * to UserGroupInformation.getCurrentUser(). In this case,
110 | * we will throw a RuntimeException the same way as
111 | * HiveMetaStoreClient.java
112 | * If not catching IOException, the build will fail with
113 | * unreported exception IOExcetion.
114 | */
115 | logger.error("Unable to resolve current user name " + e.getMessage());
116 | throw new RuntimeException(e);
117 | }
118 | }
119 |
120 | private ClientConfiguration buildClientConfiguration(Configuration conf) {
121 | // Pass UserAgent to client configuration, which enable CloudTrail to audit UGI info
122 | // when using Glue Catalog as metastore
123 | ClientConfiguration clientConfiguration = new ClientConfiguration()
124 | .withUserAgent(createUserAgent())
125 | .withMaxErrorRetry(conf.getInt(AWS_GLUE_MAX_RETRY, DEFAULT_MAX_RETRY))
126 | .withMaxConnections(conf.getInt(AWS_GLUE_MAX_CONNECTIONS, DEFAULT_MAX_CONNECTIONS))
127 | .withConnectionTimeout(conf.getInt(AWS_GLUE_CONNECTION_TIMEOUT, DEFAULT_CONNECTION_TIMEOUT))
128 | .withSocketTimeout(conf.getInt(AWS_GLUE_SOCKET_TIMEOUT, DEFAULT_SOCKET_TIMEOUT));
129 | return clientConfiguration;
130 | }
131 |
132 | private static String getProperty(String propertyName, Configuration conf) {
133 | return Strings.isNullOrEmpty(System.getProperty(propertyName)) ?
134 | conf.get(propertyName) : System.getProperty(propertyName);
135 | }
136 | }
137 |
--------------------------------------------------------------------------------
/aws-glue-datacatalog-spark-client/src/test/java/com/amazonaws/glue/catalog/metastore/integrationtest/MetastoreClientDatabaseIntegrationTest.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.catalog.metastore.integrationtest;
2 |
3 | import com.amazonaws.glue.catalog.converters.BaseCatalogToHiveConverter;
4 | import com.amazonaws.glue.catalog.converters.CatalogToHiveConverter;
5 | import com.amazonaws.glue.catalog.metastore.AWSCatalogMetastoreClient;
6 | import com.amazonaws.glue.catalog.metastore.GlueClientFactory;
7 | import com.amazonaws.glue.catalog.util.GlueTestClientFactory;
8 | import com.amazonaws.services.glue.AWSGlue;
9 | import com.amazonaws.services.glue.model.DeleteDatabaseRequest;
10 | import com.amazonaws.services.glue.model.EntityNotFoundException;
11 |
12 | import com.google.common.collect.Lists;
13 | import com.google.common.collect.Maps;
14 | import org.apache.hadoop.fs.Path;
15 | import org.apache.hadoop.hive.conf.HiveConf;
16 | import org.apache.hadoop.hive.metastore.IMetaStoreClient;
17 | import org.apache.hadoop.hive.metastore.Warehouse;
18 | import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
19 | import org.apache.hadoop.hive.metastore.api.Database;
20 | import org.apache.hadoop.hive.metastore.api.MetaException;
21 | import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
22 | import org.apache.thrift.TException;
23 | import org.junit.After;
24 | import org.junit.Before;
25 | import org.junit.Test;
26 |
27 | import java.util.List;
28 | import java.util.Map;
29 |
30 | import static com.amazonaws.glue.catalog.util.TestObjects.getTestDatabase;
31 |
32 | import static org.junit.Assert.assertEquals;
33 | import static org.junit.Assert.assertTrue;
34 | import static org.mockito.Matchers.any;
35 | import static org.mockito.Matchers.anyString;
36 | import static org.mockito.Mockito.mock;
37 | import static org.mockito.Mockito.when;
38 |
39 | public class MetastoreClientDatabaseIntegrationTest {
40 |
41 | private AWSGlue glueClient;
42 | private IMetaStoreClient metastoreClient;
43 | private Warehouse wh;
44 | private Database hiveDB;
45 | private com.amazonaws.services.glue.model.Database catalogDB;
46 | private HiveConf conf;
47 | private Path tmpPath;
48 | private List additionalDbForCleanup;
49 | private CatalogToHiveConverter catalogToHiveConverter = new BaseCatalogToHiveConverter();
50 |
51 | @Before
52 | public void setup() throws MetaException {
53 | conf = mock(HiveConf.class);
54 | wh = mock(Warehouse.class);
55 | tmpPath = new Path("/db");
56 | when(wh.getDefaultDatabasePath(anyString())).thenReturn(tmpPath);
57 | when(wh.getDnsPath(any(Path.class))).thenReturn(tmpPath);
58 | when(wh.isDir(any(Path.class))).thenReturn(true);
59 | when(conf.get(HiveConf.ConfVars.USERS_IN_ADMIN_ROLE.varname,"")).thenReturn("");
60 |
61 | glueClient = new GlueTestClientFactory().newClient();
62 | GlueClientFactory clientFactory = mock(GlueClientFactory.class);
63 | when(clientFactory.newClient()).thenReturn(glueClient);
64 |
65 | metastoreClient = new AWSCatalogMetastoreClient.Builder().withHiveConf(conf).withWarehouse(wh)
66 | .withClientFactory(clientFactory).build();
67 | catalogDB = getTestDatabase();
68 | hiveDB = catalogToHiveConverter.convertDatabase(catalogDB);
69 |
70 | additionalDbForCleanup = Lists.newArrayList();
71 | }
72 |
73 | @After
74 | public void clean() {
75 | try {
76 | glueClient.deleteDatabase(new DeleteDatabaseRequest().withName(hiveDB.getName()));
77 |
78 | for (String db : additionalDbForCleanup) {
79 | glueClient.deleteDatabase(new DeleteDatabaseRequest().withName(db));
80 | }
81 | } catch (EntityNotFoundException e) {
82 | //there will be no database to drop after drop database test, so swallow the exception
83 | }
84 | }
85 |
86 | @Test
87 | public void testDefaultDatabase() throws TException {
88 | // default db should exist
89 | metastoreClient.getDatabase("default");
90 | }
91 |
92 | @Test
93 | public void createValidDatabase() throws TException {
94 | metastoreClient.createDatabase(hiveDB);
95 | Database db = metastoreClient.getDatabase(hiveDB.getName());
96 | assertEquals(hiveDB, db);
97 | }
98 |
99 | @Test(expected = AlreadyExistsException.class)
100 | public void createDuplicateDatabase() throws TException {
101 | metastoreClient.createDatabase(hiveDB);
102 | metastoreClient.createDatabase(hiveDB);
103 | }
104 |
105 | @Test
106 | public void testDropValidDatabase() throws TException {
107 | metastoreClient.createDatabase(hiveDB);
108 | metastoreClient.dropDatabase(hiveDB.getName());
109 | assertDrop(hiveDB.getName());
110 | }
111 |
112 | @Test(expected = NoSuchObjectException.class)
113 | public void dropInvalidDatabase() throws TException {
114 | metastoreClient.dropDatabase(hiveDB.getName());
115 | }
116 |
117 | @Test
118 | public void dropInvalidDataBaseWithIgnoreUnknownDatabase() throws TException {
119 | metastoreClient.dropDatabase("unknown_db", false, true);
120 | }
121 |
122 | @Test
123 | public void listValidDatabases() throws TException {
124 | Database database2 = catalogToHiveConverter.convertDatabase(getTestDatabase());
125 | additionalDbForCleanup.add(database2.getName());
126 | metastoreClient.createDatabase(hiveDB);
127 | metastoreClient.createDatabase(database2);
128 | List databaseName = metastoreClient.getAllDatabases();
129 | assertTrue(databaseName.contains(hiveDB.getName()));
130 | assertTrue(databaseName.contains(database2.getName()));
131 | }
132 |
133 | @Test(expected = NoSuchObjectException.class)
134 | public void getInvalidDatabase() throws TException {
135 | metastoreClient.getDatabase(hiveDB.getName());
136 | }
137 |
138 | @Test
139 | public void alterDatabase() throws TException {
140 | Map parameters = Maps.newHashMap();
141 | parameters.put("param3", "value3");
142 | parameters.put("param4", "value4");
143 |
144 | metastoreClient.createDatabase(hiveDB);
145 |
146 | Database updatedDB = catalogToHiveConverter.convertDatabase(getTestDatabase());
147 | updatedDB.setName(hiveDB.getName());
148 | updatedDB.setParameters(parameters);
149 |
150 | metastoreClient.alterDatabase(hiveDB.getName(), updatedDB);
151 | Database afterUpdate = metastoreClient.getDatabase(hiveDB.getName());
152 |
153 | assertTrue(afterUpdate.getParameters().containsKey("param3"));
154 | assertTrue(afterUpdate.getParameters().containsKey("param4"));
155 | }
156 |
157 | private void assertDrop(final String databaseName) throws TException {
158 | boolean dropped = false;
159 | try {
160 | metastoreClient.getDatabase(databaseName);
161 | } catch (NoSuchObjectException e) {
162 | dropped = true;
163 | }
164 | assertTrue("Unable to drop database", dropped);
165 | }
166 |
167 | }
168 |
--------------------------------------------------------------------------------
/aws-glue-datacatalog-client-common/src/test/java/com/amazonaws/glue/catalog/converters/GlueInputConverterTest.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.catalog.converters;
2 |
3 | import com.amazonaws.glue.catalog.util.TestObjects;
4 | import com.amazonaws.services.glue.model.Database;
5 | import com.amazonaws.services.glue.model.DatabaseInput;
6 | import com.amazonaws.services.glue.model.Partition;
7 | import com.amazonaws.services.glue.model.PartitionInput;
8 | import com.amazonaws.services.glue.model.Table;
9 | import com.amazonaws.services.glue.model.TableInput;
10 | import com.amazonaws.services.glue.model.UserDefinedFunction;
11 | import com.amazonaws.services.glue.model.UserDefinedFunctionInput;
12 |
13 | import com.google.common.collect.Lists;
14 | import org.junit.Before;
15 | import org.junit.Test;
16 |
17 | import static org.junit.Assert.assertEquals;
18 |
19 | public class GlueInputConverterTest {
20 |
21 | private Database testDB;
22 | private Table testTable;
23 | private Partition testPartition;
24 | private UserDefinedFunction testFunction;
25 | private CatalogToHiveConverter catalogToHiveConverter;
26 |
27 | @Before
28 | public void setup() {
29 | testDB = TestObjects.getTestDatabase();
30 | testTable = TestObjects.getTestTable();
31 | testPartition = TestObjects.getTestPartition(testDB.getName(), testTable.getName(), Lists.newArrayList("val1"));
32 | testFunction = TestObjects.getCatalogTestFunction();
33 | catalogToHiveConverter = new BaseCatalogToHiveConverter();
34 | }
35 |
36 | @Test
37 | public void testConvertHiveDbToDatabaseInput() {
38 | org.apache.hadoop.hive.metastore.api.Database hivedb = catalogToHiveConverter.convertDatabase(testDB);
39 | DatabaseInput dbInput = GlueInputConverter.convertToDatabaseInput(hivedb);
40 |
41 | assertEquals(testDB.getName(), dbInput.getName());
42 | assertEquals(testDB.getDescription(), dbInput.getDescription());
43 | assertEquals(testDB.getLocationUri(), dbInput.getLocationUri());
44 | assertEquals(testDB.getParameters(), dbInput.getParameters());
45 | }
46 |
47 | @Test
48 | public void testConvertCatalogDbToDatabaseInput() {
49 | DatabaseInput dbInput = GlueInputConverter.convertToDatabaseInput(testDB);
50 |
51 | assertEquals(testDB.getName(), dbInput.getName());
52 | assertEquals(testDB.getDescription(), dbInput.getDescription());
53 | assertEquals(testDB.getLocationUri(), dbInput.getLocationUri());
54 | assertEquals(testDB.getParameters(), dbInput.getParameters());
55 | }
56 |
57 | @Test
58 | public void testConvertHiveTableToTableInput() {
59 | org.apache.hadoop.hive.metastore.api.Table hivetbl = catalogToHiveConverter.convertTable(testTable, testDB.getName());
60 | TableInput tblInput = GlueInputConverter.convertToTableInput(hivetbl);
61 |
62 | assertEquals(testTable.getName(), tblInput.getName());
63 | assertEquals(testTable.getOwner(), tblInput.getOwner());
64 | assertEquals(testTable.getTableType(), tblInput.getTableType());
65 | assertEquals(testTable.getParameters(), tblInput.getParameters());
66 | assertEquals(testTable.getPartitionKeys(), tblInput.getPartitionKeys());
67 | assertEquals(testTable.getRetention(), tblInput.getRetention());
68 | assertEquals(testTable.getLastAccessTime(), tblInput.getLastAccessTime());
69 | assertEquals(testTable.getStorageDescriptor(), tblInput.getStorageDescriptor());
70 | assertEquals(testTable.getViewExpandedText(), tblInput.getViewExpandedText());
71 | assertEquals(testTable.getViewOriginalText(), tblInput.getViewOriginalText());
72 | }
73 |
74 | @Test
75 | public void testConvertCatalogTableToTableInput() {
76 | TableInput tblInput = GlueInputConverter.convertToTableInput(testTable);
77 |
78 | assertEquals(testTable.getName(), tblInput.getName());
79 | assertEquals(testTable.getOwner(), tblInput.getOwner());
80 | assertEquals(testTable.getTableType(), tblInput.getTableType());
81 | assertEquals(testTable.getParameters(), tblInput.getParameters());
82 | assertEquals(testTable.getPartitionKeys(), tblInput.getPartitionKeys());
83 | assertEquals(testTable.getRetention(), tblInput.getRetention());
84 | assertEquals(testTable.getLastAccessTime(), tblInput.getLastAccessTime());
85 | assertEquals(testTable.getStorageDescriptor(), tblInput.getStorageDescriptor());
86 | assertEquals(testTable.getViewExpandedText(), tblInput.getViewExpandedText());
87 | assertEquals(testTable.getViewOriginalText(), tblInput.getViewOriginalText());
88 | }
89 |
90 | @Test
91 | public void testConvertHivePartitionToPartitionInput() {
92 | org.apache.hadoop.hive.metastore.api.Partition hivePartition = catalogToHiveConverter.convertPartition(testPartition);
93 | PartitionInput partitionInput = GlueInputConverter.convertToPartitionInput(hivePartition);
94 |
95 | assertEquals(testPartition.getLastAccessTime(), partitionInput.getLastAccessTime());
96 | assertEquals(testPartition.getParameters(), partitionInput.getParameters());
97 | assertEquals(testPartition.getStorageDescriptor(), partitionInput.getStorageDescriptor());
98 | assertEquals(testPartition.getValues(), partitionInput.getValues());
99 | }
100 |
101 | @Test
102 | public void testConvertCatalogPartitionToPartitionInput() {
103 | PartitionInput partitionInput = GlueInputConverter.convertToPartitionInput(testPartition);
104 |
105 | assertEquals(testPartition.getLastAccessTime(), partitionInput.getLastAccessTime());
106 | assertEquals(testPartition.getParameters(), partitionInput.getParameters());
107 | assertEquals(testPartition.getStorageDescriptor(), partitionInput.getStorageDescriptor());
108 | assertEquals(testPartition.getValues(), partitionInput.getValues());
109 | }
110 |
111 | @Test
112 | public void testConvertHiveFunctionToFunctionInput() {
113 | org.apache.hadoop.hive.metastore.api.Function hiveFunction =
114 | catalogToHiveConverter.convertFunction(testDB.getName(), testFunction);
115 | UserDefinedFunctionInput functionInput = GlueInputConverter.convertToUserDefinedFunctionInput(hiveFunction);
116 |
117 | assertEquals(testFunction.getClassName(), functionInput.getClassName());
118 | assertEquals(testFunction.getFunctionName(), functionInput.getFunctionName());
119 | assertEquals(testFunction.getOwnerName(), functionInput.getOwnerName());
120 | assertEquals(testFunction.getOwnerType(), functionInput.getOwnerType());
121 | assertEquals(testFunction.getResourceUris(), functionInput.getResourceUris());
122 | }
123 |
124 | @Test
125 | public void testConvertHiveFunctionToFunctionInputNullOwnerType() {
126 | org.apache.hadoop.hive.metastore.api.Function hiveFunction =
127 | catalogToHiveConverter.convertFunction(testDB.getName(), testFunction);
128 | hiveFunction.setOwnerType(null);
129 | GlueInputConverter.convertToUserDefinedFunctionInput(hiveFunction);
130 | }
131 |
132 | }
133 |
--------------------------------------------------------------------------------
/aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/metastore/AWSGlueMetastoreCacheDecorator.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.catalog.metastore;
2 |
3 | import com.amazonaws.services.glue.model.Database;
4 | import com.amazonaws.services.glue.model.Table;
5 | import com.google.common.annotations.VisibleForTesting;
6 | import com.google.common.cache.Cache;
7 | import com.google.common.cache.CacheBuilder;
8 | import org.apache.hadoop.conf.Configuration;
9 | import org.apache.log4j.Logger;
10 |
11 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_DB_CACHE_ENABLE;
12 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_DB_CACHE_SIZE;
13 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_DB_CACHE_TTL_MINS;
14 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_TABLE_CACHE_ENABLE;
15 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_TABLE_CACHE_SIZE;
16 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_TABLE_CACHE_TTL_MINS;
17 |
18 | import java.util.Objects;
19 | import java.util.concurrent.TimeUnit;
20 |
21 | import static com.google.common.base.Preconditions.checkArgument;
22 | import static com.google.common.base.Preconditions.checkNotNull;
23 |
24 | public class AWSGlueMetastoreCacheDecorator extends AWSGlueMetastoreBaseDecorator {
25 |
26 | private static final Logger logger = Logger.getLogger(AWSGlueMetastoreCacheDecorator.class);
27 |
28 | private final Configuration conf;
29 |
30 | private final boolean databaseCacheEnabled;
31 |
32 | private final boolean tableCacheEnabled;
33 |
34 | @VisibleForTesting
35 | protected Cache databaseCache;
36 | @VisibleForTesting
37 | protected Cache tableCache;
38 |
39 | public AWSGlueMetastoreCacheDecorator(Configuration conf, AWSGlueMetastore awsGlueMetastore) {
40 | super(awsGlueMetastore);
41 |
42 | checkNotNull(conf, "conf can not be null");
43 | this.conf = conf;
44 |
45 | databaseCacheEnabled = conf.getBoolean(AWS_GLUE_DB_CACHE_ENABLE, false);
46 | if(databaseCacheEnabled) {
47 | int dbCacheSize = conf.getInt(AWS_GLUE_DB_CACHE_SIZE, 0);
48 | int dbCacheTtlMins = conf.getInt(AWS_GLUE_DB_CACHE_TTL_MINS, 0);
49 |
50 | //validate config values for size and ttl
51 | validateConfigValueIsGreaterThanZero(AWS_GLUE_DB_CACHE_SIZE, dbCacheSize);
52 | validateConfigValueIsGreaterThanZero(AWS_GLUE_DB_CACHE_TTL_MINS, dbCacheTtlMins);
53 |
54 | //initialize database cache
55 | databaseCache = CacheBuilder.newBuilder().maximumSize(dbCacheSize)
56 | .expireAfterWrite(dbCacheTtlMins, TimeUnit.MINUTES).build();
57 | } else {
58 | databaseCache = null;
59 | }
60 |
61 | tableCacheEnabled = conf.getBoolean(AWS_GLUE_TABLE_CACHE_ENABLE, false);
62 | if(tableCacheEnabled) {
63 | int tableCacheSize = conf.getInt(AWS_GLUE_TABLE_CACHE_SIZE, 0);
64 | int tableCacheTtlMins = conf.getInt(AWS_GLUE_TABLE_CACHE_TTL_MINS, 0);
65 |
66 | //validate config values for size and ttl
67 | validateConfigValueIsGreaterThanZero(AWS_GLUE_TABLE_CACHE_SIZE, tableCacheSize);
68 | validateConfigValueIsGreaterThanZero(AWS_GLUE_TABLE_CACHE_TTL_MINS, tableCacheTtlMins);
69 |
70 | //initialize table cache
71 | tableCache = CacheBuilder.newBuilder().maximumSize(tableCacheSize)
72 | .expireAfterWrite(tableCacheTtlMins, TimeUnit.MINUTES).build();
73 | } else {
74 | tableCache = null;
75 | }
76 |
77 | logger.info("Constructed");
78 | }
79 |
80 | private void validateConfigValueIsGreaterThanZero(String configName, int value) {
81 | checkArgument(value > 0, String.format("Invalid value for Hive Config %s. " +
82 | "Provide a value greater than zero", configName));
83 |
84 | }
85 |
86 | @Override
87 | public Database getDatabase(String dbName) {
88 | Database result;
89 | if(databaseCacheEnabled) {
90 | Database valueFromCache = databaseCache.getIfPresent(dbName);
91 | if(valueFromCache != null) {
92 | logger.info("Cache hit for operation [getDatabase] on key [" + dbName + "]");
93 | result = valueFromCache;
94 | } else {
95 | logger.info("Cache miss for operation [getDatabase] on key [" + dbName + "]");
96 | result = super.getDatabase(dbName);
97 | databaseCache.put(dbName, result);
98 | }
99 | } else {
100 | result = super.getDatabase(dbName);
101 | }
102 | return result;
103 | }
104 |
105 | @Override
106 | public Table getTable(String dbName, String tableName) {
107 | Table result;
108 | if(tableCacheEnabled) {
109 | TableIdentifier key = new TableIdentifier(dbName, tableName);
110 | Table valueFromCache = tableCache.getIfPresent(key);
111 | if(valueFromCache != null) {
112 | logger.info("Cache hit for operation [getTable] on key [" + key + "]");
113 | result = valueFromCache;
114 | } else {
115 | logger.info("Cache miss for operation [getTable] on key [" + key + "]");
116 | result = super.getTable(dbName, tableName);
117 | tableCache.put(key, result);
118 | }
119 | } else {
120 | result = super.getTable(dbName, tableName);
121 | }
122 | return result;
123 | }
124 |
125 | static class TableIdentifier {
126 | private final String dbName;
127 | private final String tableName;
128 |
129 | public TableIdentifier(String dbName, String tableName) {
130 | this.dbName = dbName;
131 | this.tableName = tableName;
132 | }
133 |
134 | public String getDbName() {
135 | return dbName;
136 | }
137 |
138 | public String getTableName() {
139 | return tableName;
140 | }
141 |
142 | @Override
143 | public String toString() {
144 | return "TableIdentifier{" +
145 | "dbName='" + dbName + '\'' +
146 | ", tableName='" + tableName + '\'' +
147 | '}';
148 | }
149 |
150 | @Override
151 | public boolean equals(Object o) {
152 | if (this == o) return true;
153 | if (o == null || getClass() != o.getClass()) return false;
154 | TableIdentifier that = (TableIdentifier) o;
155 | return Objects.equals(dbName, that.dbName) &&
156 | Objects.equals(tableName, that.tableName);
157 | }
158 |
159 | @Override
160 | public int hashCode() {
161 | return Objects.hash(dbName, tableName);
162 | }
163 | }
164 | }
--------------------------------------------------------------------------------
/aws-glue-datacatalog-client-common/src/main/java/com/amazonaws/glue/catalog/metastore/AWSGlueMetastoreBaseDecorator.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.catalog.metastore;
2 |
3 | import com.amazonaws.services.glue.model.ColumnStatistics;
4 | import com.amazonaws.services.glue.model.ColumnStatisticsError;
5 | import com.amazonaws.services.glue.model.Database;
6 | import com.amazonaws.services.glue.model.DatabaseInput;
7 | import com.amazonaws.services.glue.model.Partition;
8 | import com.amazonaws.services.glue.model.PartitionError;
9 | import com.amazonaws.services.glue.model.PartitionInput;
10 | import com.amazonaws.services.glue.model.PartitionValueList;
11 | import com.amazonaws.services.glue.model.Table;
12 | import com.amazonaws.services.glue.model.TableInput;
13 | import com.amazonaws.services.glue.model.UserDefinedFunction;
14 | import com.amazonaws.services.glue.model.UserDefinedFunctionInput;
15 | import org.apache.hadoop.hive.metastore.api.EnvironmentContext;
16 | import org.apache.thrift.TException;
17 |
18 | import java.util.List;
19 | import java.util.Map;
20 |
21 | import static com.google.common.base.Preconditions.checkNotNull;
22 |
23 | public class AWSGlueMetastoreBaseDecorator implements AWSGlueMetastore {
24 |
25 | private final AWSGlueMetastore awsGlueMetastore;
26 |
27 | public AWSGlueMetastoreBaseDecorator(AWSGlueMetastore awsGlueMetastore) {
28 | checkNotNull(awsGlueMetastore, "awsGlueMetastore can not be null");
29 | this.awsGlueMetastore = awsGlueMetastore;
30 | }
31 |
32 | @Override
33 | public void createDatabase(DatabaseInput databaseInput) {
34 | awsGlueMetastore.createDatabase(databaseInput);
35 | }
36 |
37 | @Override
38 | public Database getDatabase(String dbName) {
39 | return awsGlueMetastore.getDatabase(dbName);
40 | }
41 |
42 | @Override
43 | public List getAllDatabases() {
44 | return awsGlueMetastore.getAllDatabases();
45 | }
46 |
47 | @Override
48 | public void updateDatabase(String databaseName, DatabaseInput databaseInput) {
49 | awsGlueMetastore.updateDatabase(databaseName, databaseInput);
50 | }
51 |
52 | @Override
53 | public void deleteDatabase(String dbName) {
54 | awsGlueMetastore.deleteDatabase(dbName);
55 | }
56 |
57 | @Override
58 | public void createTable(String dbName, TableInput tableInput) {
59 | awsGlueMetastore.createTable(dbName, tableInput);
60 | }
61 |
62 | @Override
63 | public Table getTable(String dbName, String tableName) {
64 | return awsGlueMetastore.getTable(dbName, tableName);
65 | }
66 |
67 | @Override
68 | public List getTables(String dbname, String tablePattern) {
69 | return awsGlueMetastore.getTables(dbname, tablePattern);
70 | }
71 |
72 | @Override
73 | public void updateTable(String dbName, TableInput tableInput) {
74 | awsGlueMetastore.updateTable(dbName, tableInput);
75 | }
76 |
77 | @Override
78 | public void updateTable(String dbName, TableInput tableInput, EnvironmentContext environmentContext) {
79 | awsGlueMetastore.updateTable(dbName, tableInput, environmentContext);
80 | }
81 |
82 | @Override
83 | public void deleteTable(String dbName, String tableName) {
84 | awsGlueMetastore.deleteTable(dbName, tableName);
85 | }
86 |
87 | @Override
88 | public Partition getPartition(String dbName, String tableName, List partitionValues) {
89 | return awsGlueMetastore.getPartition(dbName, tableName, partitionValues);
90 | }
91 |
92 | @Override
93 | public List getPartitionsByNames(String dbName, String tableName, List partitionsToGet) {
94 | return awsGlueMetastore.getPartitionsByNames(dbName, tableName, partitionsToGet);
95 | }
96 |
97 | @Override
98 | public List getPartitions(String dbName, String tableName, String expression, long max) throws TException {
99 | return awsGlueMetastore.getPartitions(dbName, tableName, expression, max);
100 | }
101 |
102 | @Override
103 | public void updatePartition(String dbName, String tableName, List partitionValues, PartitionInput partitionInput) {
104 | awsGlueMetastore.updatePartition(dbName, tableName, partitionValues, partitionInput);
105 | }
106 |
107 | @Override
108 | public void deletePartition(String dbName, String tableName, List partitionValues) {
109 | awsGlueMetastore.deletePartition(dbName, tableName, partitionValues);
110 | }
111 |
112 | @Override
113 | public List createPartitions(String dbName, String tableName, List partitionInputs) {
114 | return awsGlueMetastore.createPartitions(dbName, tableName, partitionInputs);
115 | }
116 |
117 | @Override
118 | public void createUserDefinedFunction(String dbName, UserDefinedFunctionInput functionInput) {
119 | awsGlueMetastore.createUserDefinedFunction(dbName, functionInput);
120 | }
121 |
122 | @Override
123 | public UserDefinedFunction getUserDefinedFunction(String dbName, String functionName) {
124 | return awsGlueMetastore.getUserDefinedFunction(dbName, functionName);
125 | }
126 |
127 | @Override
128 | public List getUserDefinedFunctions(String dbName, String pattern) {
129 | return awsGlueMetastore.getUserDefinedFunctions(dbName, pattern);
130 | }
131 |
132 | @Override
133 | public List getUserDefinedFunctions(String pattern) {
134 | return awsGlueMetastore.getUserDefinedFunctions(pattern);
135 | }
136 |
137 | @Override
138 | public void deleteUserDefinedFunction(String dbName, String functionName) {
139 | awsGlueMetastore.deleteUserDefinedFunction(dbName, functionName);
140 | }
141 |
142 | @Override
143 | public void updateUserDefinedFunction(String dbName, String functionName, UserDefinedFunctionInput functionInput) {
144 | awsGlueMetastore.updateUserDefinedFunction(dbName, functionName, functionInput);
145 | }
146 |
147 | @Override
148 | public void deletePartitionColumnStatistics(String dbName, String tableName, List partitionValues, String colName) {
149 | awsGlueMetastore.deletePartitionColumnStatistics(dbName, tableName, partitionValues, colName);
150 | }
151 |
152 | @Override
153 | public void deleteTableColumnStatistics(String dbName, String tableName, String colName) {
154 | awsGlueMetastore.deleteTableColumnStatistics(dbName, tableName, colName);
155 | }
156 |
157 | @Override
158 | public Map> getPartitionColumnStatistics(String dbName, String tableName, List partitionValues, List columnNames) {
159 | return awsGlueMetastore.getPartitionColumnStatistics(dbName, tableName, partitionValues, columnNames);
160 | }
161 |
162 | @Override
163 | public List getTableColumnStatistics(String dbName, String tableName, List colNames) {
164 | return awsGlueMetastore.getTableColumnStatistics(dbName, tableName, colNames);
165 | }
166 |
167 | @Override
168 | public List updatePartitionColumnStatistics(String dbName, String tableName, List partitionValues, List columnStatistics) {
169 | return awsGlueMetastore.updatePartitionColumnStatistics(dbName, tableName, partitionValues, columnStatistics);
170 | }
171 |
172 | @Override
173 | public List updateTableColumnStatistics(String dbName, String tableName, List columnStatistics) {
174 | return awsGlueMetastore.updateTableColumnStatistics(dbName, tableName, columnStatistics);
175 | }
176 |
177 | }
--------------------------------------------------------------------------------
/aws-glue-datacatalog-client-common/src/test/java/com/amazonaws/glue/catalog/metastore/AWSGlueClientFactoryTest.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.catalog.metastore;
2 |
3 | import com.amazonaws.auth.AWSCredentials;
4 | import com.amazonaws.auth.AWSCredentialsProvider;
5 | import com.amazonaws.auth.BasicSessionCredentials;
6 | import com.amazonaws.services.glue.AWSGlue;
7 |
8 | import org.apache.hadoop.conf.Configuration;
9 | import org.hamcrest.Matchers;
10 | import org.junit.Before;
11 | import org.junit.Test;
12 |
13 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_CATALOG_CREDENTIALS_PROVIDER_FACTORY_CLASS;
14 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_CATALOG_SEPARATOR;
15 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_CONNECTION_TIMEOUT;
16 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_ENDPOINT;
17 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_MAX_CONNECTIONS;
18 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_MAX_RETRY;
19 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_SOCKET_TIMEOUT;
20 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_REGION;
21 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.DEFAULT_CONNECTION_TIMEOUT;
22 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.DEFAULT_MAX_CONNECTIONS;
23 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.DEFAULT_MAX_RETRY;
24 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.DEFAULT_SOCKET_TIMEOUT;
25 | import static org.hamcrest.Matchers.instanceOf;
26 | import static org.junit.Assert.assertEquals;
27 | import static org.junit.Assert.assertNotNull;
28 | import static org.junit.Assert.assertThat;
29 | import static org.mockito.Mockito.atLeastOnce;
30 | import static org.mockito.Mockito.never;
31 | import static org.mockito.Mockito.spy;
32 | import static org.mockito.Mockito.verify;
33 | import static org.mockito.Mockito.when;
34 |
35 | public class AWSGlueClientFactoryTest {
36 |
37 | private static final String FAKE_ACCESS_KEY = "accessKey";
38 | private static final String FAKE_SECRET_KEY = "secretKey";
39 | private static final String FAKE_SESSION_TOKEN = "sessionToken";
40 |
41 | private AWSGlueClientFactory glueClientFactory;
42 | private Configuration conf;
43 |
44 | @Before
45 | public void setup() {
46 | conf = spy(new Configuration());
47 | glueClientFactory = new AWSGlueClientFactory(conf);
48 | }
49 |
50 | @Test
51 | public void testGlueClientConstructionWithHiveConfig() throws Exception {
52 | System.setProperty(AWS_REGION, "");
53 | System.setProperty(AWS_GLUE_ENDPOINT, "");
54 | System.setProperty(AWS_GLUE_CATALOG_SEPARATOR, "");
55 | when(conf.get(AWS_GLUE_ENDPOINT)).thenReturn("endpoint");
56 | when(conf.get(AWS_REGION)).thenReturn("us-west-1");
57 | when(conf.get(AWS_GLUE_CATALOG_SEPARATOR)).thenReturn("/");
58 |
59 | AWSGlue glueClient = glueClientFactory.newClient();
60 |
61 | assertNotNull(glueClient);
62 |
63 | // client reads hive conf for region & endpoint
64 | verify(conf, atLeastOnce()).get(AWS_GLUE_ENDPOINT);
65 | verify(conf, atLeastOnce()).get(AWS_REGION);
66 | verify(conf, atLeastOnce()).get(AWS_GLUE_CATALOG_SEPARATOR);
67 | }
68 |
69 | @Test
70 | public void testGlueClientContructionWithAWSConfig() throws Exception {
71 | glueClientFactory.newClient();
72 | verify(conf, atLeastOnce()).getInt(AWS_GLUE_MAX_RETRY, DEFAULT_MAX_RETRY);
73 | verify(conf, atLeastOnce()).getInt(AWS_GLUE_MAX_CONNECTIONS, DEFAULT_MAX_CONNECTIONS);
74 | verify(conf, atLeastOnce()).getInt(AWS_GLUE_SOCKET_TIMEOUT, DEFAULT_SOCKET_TIMEOUT);
75 | verify(conf, atLeastOnce()).getInt(AWS_GLUE_CONNECTION_TIMEOUT, DEFAULT_CONNECTION_TIMEOUT);
76 | }
77 |
78 | @Test
79 | public void testGlueClientConstructionWithSystemProperty() throws Exception {
80 | System.setProperty(AWS_REGION, "us-east-1");
81 | System.setProperty(AWS_GLUE_ENDPOINT, "endpoint");
82 | System.setProperty(AWS_GLUE_CATALOG_SEPARATOR, "/");
83 |
84 | AWSGlue glueClient = glueClientFactory.newClient();
85 |
86 | assertNotNull(glueClient);
87 |
88 | // client has no interactions with the hive conf since system property is set
89 | verify(conf, never()).get(AWS_GLUE_ENDPOINT);
90 | verify(conf, never()).get(AWS_REGION);
91 | verify(conf, never()).get(AWS_GLUE_CATALOG_SEPARATOR);
92 | }
93 |
94 | @Test
95 | public void testClientConstructionWithSessionCredentialsProviderFactory() throws Exception {
96 | System.setProperty("aws.region", "us-west-2");
97 | conf.setStrings(SessionCredentialsProviderFactory.AWS_ACCESS_KEY_CONF_VAR, FAKE_ACCESS_KEY);
98 | conf.setStrings(SessionCredentialsProviderFactory.AWS_SECRET_KEY_CONF_VAR, FAKE_SECRET_KEY);
99 | conf.setStrings(SessionCredentialsProviderFactory.AWS_SESSION_TOKEN_CONF_VAR, FAKE_SESSION_TOKEN);
100 |
101 | conf.setStrings(AWS_CATALOG_CREDENTIALS_PROVIDER_FACTORY_CLASS,
102 | SessionCredentialsProviderFactory.class.getCanonicalName());
103 |
104 | AWSGlue glueClient = glueClientFactory.newClient();
105 |
106 | assertNotNull(glueClient);
107 |
108 | verify(conf, atLeastOnce()).get(SessionCredentialsProviderFactory.AWS_ACCESS_KEY_CONF_VAR);
109 | verify(conf, atLeastOnce()).get(SessionCredentialsProviderFactory.AWS_SECRET_KEY_CONF_VAR);
110 | verify(conf, atLeastOnce()).get(SessionCredentialsProviderFactory.AWS_SESSION_TOKEN_CONF_VAR);
111 | }
112 |
113 | @Test
114 | public void testCredentialsCreatedBySessionCredentialsProviderFactory() throws Exception {
115 | conf.setStrings(SessionCredentialsProviderFactory.AWS_ACCESS_KEY_CONF_VAR, FAKE_ACCESS_KEY);
116 | conf.setStrings(SessionCredentialsProviderFactory.AWS_SECRET_KEY_CONF_VAR, FAKE_SECRET_KEY);
117 | conf.setStrings(SessionCredentialsProviderFactory.AWS_SESSION_TOKEN_CONF_VAR, FAKE_SESSION_TOKEN);
118 |
119 | SessionCredentialsProviderFactory factory = new SessionCredentialsProviderFactory();
120 | AWSCredentialsProvider provider = factory.buildAWSCredentialsProvider(conf);
121 | AWSCredentials credentials = provider.getCredentials();
122 |
123 | assertThat(credentials, instanceOf(BasicSessionCredentials.class));
124 |
125 | BasicSessionCredentials sessionCredentials = (BasicSessionCredentials) credentials;
126 |
127 | assertEquals(FAKE_ACCESS_KEY, sessionCredentials.getAWSAccessKeyId());
128 | assertEquals(FAKE_SECRET_KEY, sessionCredentials.getAWSSecretKey());
129 | assertEquals(FAKE_SESSION_TOKEN, sessionCredentials.getSessionToken());
130 | }
131 |
132 | @Test(expected = IllegalArgumentException.class)
133 | public void testMissingAccessKeyWithSessionCredentialsProviderFactory() throws Exception {
134 | SessionCredentialsProviderFactory factory = new SessionCredentialsProviderFactory();
135 | factory.buildAWSCredentialsProvider(conf);
136 | }
137 |
138 | @Test(expected = IllegalArgumentException.class)
139 | public void testMissingSecretKey() throws Exception {
140 | SessionCredentialsProviderFactory factory = new SessionCredentialsProviderFactory();
141 | conf.setStrings(SessionCredentialsProviderFactory.AWS_ACCESS_KEY_CONF_VAR, FAKE_ACCESS_KEY);
142 | factory.buildAWSCredentialsProvider(conf);
143 | }
144 |
145 | @Test(expected = IllegalArgumentException.class)
146 | public void testMissingSessionTokenKey() throws Exception {
147 | SessionCredentialsProviderFactory factory = new SessionCredentialsProviderFactory();
148 | conf.setStrings(SessionCredentialsProviderFactory.AWS_ACCESS_KEY_CONF_VAR, FAKE_ACCESS_KEY);
149 | conf.setStrings(SessionCredentialsProviderFactory.AWS_SECRET_KEY_CONF_VAR, FAKE_SECRET_KEY);
150 | factory.buildAWSCredentialsProvider(conf);
151 | }
152 |
153 | }
154 |
--------------------------------------------------------------------------------
/aws-glue-datacatalog-client-common/src/test/java/com/amazonaws/glue/catalog/metastore/AWSGlueMetastoreCacheDecoratorTest.java:
--------------------------------------------------------------------------------
1 | package com.amazonaws.glue.catalog.metastore;
2 |
3 | import com.amazonaws.services.glue.model.Database;
4 | import com.amazonaws.services.glue.model.Table;
5 | import com.google.common.cache.Cache;
6 | import org.apache.hadoop.hive.conf.HiveConf;
7 | import org.junit.Before;
8 | import org.junit.Test;
9 |
10 | import static org.junit.Assert.assertEquals;
11 | import static org.junit.Assert.assertNotNull;
12 | import static org.junit.Assert.assertNull;
13 | import static org.mockito.Mockito.doNothing;
14 | import static org.mockito.Mockito.mock;
15 | import static org.mockito.Mockito.spy;
16 | import static org.mockito.Mockito.times;
17 | import static org.mockito.Mockito.verify;
18 | import static org.mockito.Mockito.when;
19 | import static org.junit.Assert.assertTrue;
20 |
21 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_DB_CACHE_ENABLE;
22 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_TABLE_CACHE_ENABLE;
23 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_TABLE_CACHE_SIZE;
24 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_TABLE_CACHE_TTL_MINS;
25 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_DB_CACHE_SIZE;
26 | import static com.amazonaws.glue.catalog.util.AWSGlueConfig.AWS_GLUE_DB_CACHE_TTL_MINS;
27 |
28 | public class AWSGlueMetastoreCacheDecoratorTest {
29 |
30 | private AWSGlueMetastore glueMetastore;
31 | private HiveConf hiveConf;
32 |
33 | private static final String DB_NAME = "db";
34 | private static final String TABLE_NAME = "table";
35 | private static final AWSGlueMetastoreCacheDecorator.TableIdentifier TABLE_IDENTIFIER =
36 | new AWSGlueMetastoreCacheDecorator.TableIdentifier(DB_NAME, TABLE_NAME);
37 |
38 | @Before
39 | public void setUp() {
40 | glueMetastore = mock(AWSGlueMetastore.class);
41 | hiveConf = spy(new HiveConf());
42 | when(hiveConf.getBoolean(AWS_GLUE_TABLE_CACHE_ENABLE, false)).thenReturn(true);
43 | when(hiveConf.getBoolean(AWS_GLUE_DB_CACHE_ENABLE, false)).thenReturn(true);
44 | when(hiveConf.getInt(AWS_GLUE_TABLE_CACHE_SIZE, 0)).thenReturn(100);
45 | when(hiveConf.getInt(AWS_GLUE_TABLE_CACHE_TTL_MINS, 0)).thenReturn(100);
46 | when(hiveConf.getInt(AWS_GLUE_DB_CACHE_SIZE, 0)).thenReturn(100);
47 | when(hiveConf.getInt(AWS_GLUE_DB_CACHE_TTL_MINS, 0)).thenReturn(100);
48 |
49 | }
50 |
51 | @Test(expected = NullPointerException.class)
52 | public void testConstructorWithNullConf() {
53 | new AWSGlueMetastoreCacheDecorator(null, glueMetastore);
54 | }
55 |
56 | @Test(expected = IllegalArgumentException.class)
57 | public void testConstructorWithInvalidTableCacheSize() {
58 | when(hiveConf.getInt(AWS_GLUE_TABLE_CACHE_SIZE, 0)).thenReturn(0);
59 | new AWSGlueMetastoreCacheDecorator(hiveConf, glueMetastore);
60 | }
61 |
62 | @Test(expected = IllegalArgumentException.class)
63 | public void testConstructorWithInvalidTableCacheTtl() {
64 | when(hiveConf.getInt(AWS_GLUE_TABLE_CACHE_TTL_MINS, 0)).thenReturn(0);
65 | new AWSGlueMetastoreCacheDecorator(hiveConf, glueMetastore);
66 | }
67 |
68 | @Test(expected = IllegalArgumentException.class)
69 | public void testConstructorWithInvalidDbCacheSize() {
70 | when(hiveConf.getInt(AWS_GLUE_DB_CACHE_SIZE, 0)).thenReturn(0);
71 | new AWSGlueMetastoreCacheDecorator(hiveConf, glueMetastore);
72 | }
73 |
74 | @Test(expected = IllegalArgumentException.class)
75 | public void testConstructorWithInvalidDbCacheTtl() {
76 | when(hiveConf.getInt(AWS_GLUE_DB_CACHE_TTL_MINS, 0)).thenReturn(0);
77 | new AWSGlueMetastoreCacheDecorator(hiveConf, glueMetastore);
78 | }
79 |
80 | @Test
81 | public void testGetDatabaseWhenCacheDisabled() {
82 | //disable cache
83 | when(hiveConf.getBoolean(AWS_GLUE_DB_CACHE_ENABLE, false)).thenReturn(false);
84 | Database db = new Database();
85 | AWSGlueMetastoreCacheDecorator cacheDecorator =
86 | new AWSGlueMetastoreCacheDecorator(hiveConf, glueMetastore);
87 | when(glueMetastore.getDatabase(DB_NAME)).thenReturn(db);
88 | assertEquals(db, cacheDecorator.getDatabase(DB_NAME));
89 | assertNull(cacheDecorator.databaseCache);
90 | verify(glueMetastore, times(1)).getDatabase(DB_NAME);
91 | }
92 |
93 | @Test
94 | public void testGetDatabaseWhenCacheEnabledAndCacheMiss() {
95 | Database db = new Database();
96 | AWSGlueMetastoreCacheDecorator cacheDecorator =
97 | new AWSGlueMetastoreCacheDecorator(hiveConf, glueMetastore);
98 | assertNotNull(cacheDecorator.databaseCache);
99 | Cache dbCache = mock(Cache.class);
100 | cacheDecorator.databaseCache = dbCache;
101 |
102 | when(dbCache.getIfPresent(DB_NAME)).thenReturn(null);
103 | when(glueMetastore.getDatabase(DB_NAME)).thenReturn(db);
104 | doNothing().when(dbCache).put(DB_NAME, db);
105 |
106 | assertEquals(db, cacheDecorator.getDatabase(DB_NAME));
107 |
108 | verify(glueMetastore, times(1)).getDatabase(DB_NAME);
109 | verify(dbCache, times(1)).getIfPresent(DB_NAME);
110 | verify(dbCache, times(1)).put(DB_NAME, db);
111 | }
112 |
113 | @Test
114 | public void testGetDatabaseWhenCacheEnabledAndCacheHit() {
115 | Database db = new Database();
116 | AWSGlueMetastoreCacheDecorator cacheDecorator =
117 | new AWSGlueMetastoreCacheDecorator(hiveConf, glueMetastore);
118 | assertNotNull(cacheDecorator.databaseCache);
119 | Cache dbCache = mock(Cache.class);
120 | cacheDecorator.databaseCache = dbCache;
121 |
122 | when(dbCache.getIfPresent(DB_NAME)).thenReturn(db);
123 |
124 | assertEquals(db, cacheDecorator.getDatabase(DB_NAME));
125 |
126 | verify(dbCache, times(1)).getIfPresent(DB_NAME);
127 | }
128 |
129 | @Test
130 | public void testGetTableWhenCacheDisabled() {
131 | //disable cache
132 | when(hiveConf.getBoolean(AWS_GLUE_TABLE_CACHE_ENABLE, false)).thenReturn(false);
133 | Table table = new Table();
134 | AWSGlueMetastoreCacheDecorator cacheDecorator =
135 | new AWSGlueMetastoreCacheDecorator(hiveConf, glueMetastore);
136 | when(glueMetastore.getTable(DB_NAME, TABLE_NAME)).thenReturn(table);
137 | assertEquals(table, cacheDecorator.getTable(DB_NAME, TABLE_NAME));
138 | assertNull(cacheDecorator.tableCache);
139 | verify(glueMetastore, times(1)).getTable(DB_NAME, TABLE_NAME);
140 | }
141 |
142 | @Test
143 | public void testGetTableWhenCacheEnabledAndCacheMiss() {
144 | Table table = new Table();
145 | AWSGlueMetastoreCacheDecorator cacheDecorator =
146 | new AWSGlueMetastoreCacheDecorator(hiveConf, glueMetastore);
147 | assertNotNull(cacheDecorator.tableCache);
148 | Cache tableCache = mock(Cache.class);
149 | cacheDecorator.tableCache = tableCache;
150 |
151 | when(tableCache.getIfPresent(TABLE_IDENTIFIER)).thenReturn(null);
152 | when(glueMetastore.getTable(DB_NAME, TABLE_NAME)).thenReturn(table);
153 | doNothing().when(tableCache).put(TABLE_IDENTIFIER, table);
154 |
155 | assertEquals(table, cacheDecorator.getTable(DB_NAME, TABLE_NAME));
156 |
157 | verify(glueMetastore, times(1)).getTable(DB_NAME, TABLE_NAME);
158 | verify(tableCache, times(1)).getIfPresent(TABLE_IDENTIFIER);
159 | verify(tableCache, times(1)).put(TABLE_IDENTIFIER, table);
160 | }
161 |
162 | @Test
163 | public void testGetTableWhenCacheEnabledAndCacheHit() {
164 | Table table = new Table();
165 | AWSGlueMetastoreCacheDecorator cacheDecorator =
166 | new AWSGlueMetastoreCacheDecorator(hiveConf, glueMetastore);
167 | assertNotNull(cacheDecorator.tableCache);
168 | Cache tableCache = mock(Cache.class);
169 | cacheDecorator.tableCache = tableCache;
170 |
171 | when(tableCache.getIfPresent(TABLE_IDENTIFIER)).thenReturn(table);
172 |
173 | assertEquals(table, cacheDecorator.getTable(DB_NAME, TABLE_NAME));
174 |
175 | verify(tableCache, times(1)).getIfPresent(TABLE_IDENTIFIER);
176 | }
177 |
178 | }
--------------------------------------------------------------------------------