partitionsAfterAddition = glueUtil.getPartitions(glue, targetGlueCatalogId,
192 | // largeTable.getTable().getDatabaseName(), largeTable.getTable().getName());
193 | // System.out.println("Number of partitions after addition: " + partitionsAfterAddition.size());
194 | }
195 |
196 | } else if (tableStatus.isReplicated() && partitionListFromExport.size() == 0) {
197 | tableStatus.setExportHasPartitions(false);
198 | if (partitionsB4Replication.size() > 0) {
199 | // Export has no partitions but table already has some partitions. Those
200 | // partitions will be deleted in batch mode.
201 | boolean partitionsDeleted = glueUtil.deletePartitions(glue, targetGlueCatalogId,
202 | largeTable.getTable().getDatabaseName(), largeTable.getTable().getName(),
203 | partitionsB4Replication);
204 | if (partitionsDeleted) {
205 | tableStatus.setPartitionsReplicated(true);
206 | recordProcessed = true;
207 | }
208 | }
209 | }
210 | }
211 | // If there is any error in creating/updating table then send it to DLQ
212 | else {
213 | System.out.println("Table replicated but partitions were not replicated. Message will be reprocessed again.");
214 | }
215 |
216 | // Track status in DynamoDB
217 | ddbUtil.trackTableImportStatus(tableStatus, sourceGlueCatalogId, targetGlueCatalogId, importRunId,
218 | exportBatchId, ddbTblNameForTableStatusTracking);
219 | System.out.printf(
220 | "Processing of Table shcema completed. Result: Table replicated: %b, Export has partitions: %b, "
221 | + "Partitions replicated: %b, error: %b \n",
222 | tableStatus.isReplicated(), tableStatus.isExportHasPartitions(), tableStatus.isPartitionsReplicated(),
223 | tableStatus.isError());
224 |
225 | return recordProcessed;
226 | }
227 | }
--------------------------------------------------------------------------------
/src/main/java/com/amazonaws/gdcreplication/util/DBReplicationStatus.java:
--------------------------------------------------------------------------------
1 | // Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 | // SPDX-License-Identifier: MIT-0
3 |
4 | package com.amazonaws.gdcreplication.util;
5 |
6 | public class DBReplicationStatus {
7 |
8 | private String dbName;
9 | private boolean created;
10 | private boolean error;
11 |
12 | public String getDbName() {
13 | return dbName;
14 | }
15 | public void setDbName(String dbName) {
16 | this.dbName = dbName;
17 | }
18 | public boolean isCreated() {
19 | return created;
20 | }
21 | public void setCreated(boolean created) {
22 | this.created = created;
23 | }
24 | public boolean isError() {
25 | return error;
26 | }
27 | public void setError(boolean error) {
28 | this.error = error;
29 | }
30 |
31 | }
32 |
--------------------------------------------------------------------------------
/src/main/java/com/amazonaws/gdcreplication/util/DDBUtil.java:
--------------------------------------------------------------------------------
1 | // Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 | // SPDX-License-Identifier: MIT-0
3 |
4 | package com.amazonaws.gdcreplication.util;
5 |
6 | import java.util.HashMap;
7 | import java.util.List;
8 | import java.util.Map;
9 | import java.util.Optional;
10 |
11 | import com.amazonaws.ClientConfiguration;
12 | import com.amazonaws.services.dynamodbv2.AmazonDynamoDB;
13 | import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClientBuilder;
14 | import com.amazonaws.services.dynamodbv2.document.DynamoDB;
15 | import com.amazonaws.services.dynamodbv2.document.Item;
16 | import com.amazonaws.services.dynamodbv2.document.PutItemOutcome;
17 | import com.amazonaws.services.dynamodbv2.document.Table;
18 | import com.amazonaws.services.dynamodbv2.model.BatchWriteItemRequest;
19 | import com.amazonaws.services.dynamodbv2.model.BatchWriteItemResult;
20 | import com.amazonaws.services.dynamodbv2.model.ReturnConsumedCapacity;
21 | import com.amazonaws.services.dynamodbv2.model.WriteRequest;
22 | import com.google.common.collect.Lists;
23 |
24 | /**
25 | *
26 | * This is a utility class with methods to write items to DynamoDB table.
27 | * from / to a DynamoDB table.
28 | *
29 | *
30 | * @author Ravi Itha, Amazon Web Services, Inc.
31 | *
32 | */
33 | public class DDBUtil {
34 |
35 | /**
36 | * Method to track the status of Tables imported
37 | * @param tableStatus
38 | * @param sourceGlueCatalogId
39 | * @param targetGlueCatalogId
40 | * @param importRunId
41 | * @param ddbTblName
42 | * @return
43 | */
44 | public boolean trackTableImportStatus(TableReplicationStatus tableStatus, String sourceGlueCatalogId,
45 | String targetGlueCatalogId, long importRunId, String exportBatchId, String ddbTblName) {
46 | boolean itemInserted = false;
47 |
48 | ClientConfiguration cc = new ClientConfiguration();
49 | cc.setMaxErrorRetry(10);
50 | AmazonDynamoDB client = AmazonDynamoDBClientBuilder.standard().withClientConfiguration(cc).build();
51 | DynamoDB dynamoDB = new DynamoDB(client);
52 |
53 | Table table = dynamoDB.getTable(ddbTblName);
54 | Item item = new Item().withPrimaryKey("table_id", tableStatus.getTableName().concat("|").concat(tableStatus.getDbName()))
55 | .withNumber("import_run_id", importRunId)
56 | .withString("export_batch_id", exportBatchId)
57 | .withString("table_name", tableStatus.getTableName())
58 | .withString("database_name", tableStatus.getDbName())
59 | .withString("table_schema", tableStatus.getTableSchema())
60 | .withString("target_glue_catalog_id", targetGlueCatalogId)
61 | .withString("source_glue_catalog_id", sourceGlueCatalogId)
62 | .withBoolean("table_created", tableStatus.isCreated())
63 | .withBoolean("table_updated", tableStatus.isUpdated())
64 | .withBoolean("export_has_partitions", tableStatus.isExportHasPartitions())
65 | .withBoolean("partitions_updated", tableStatus.isPartitionsReplicated());
66 | // Write the item to the table
67 | try {
68 | PutItemOutcome outcome = table.putItem(item);
69 | int statusCode = outcome.getPutItemResult().getSdkHttpMetadata().getHttpStatusCode();
70 | if (statusCode == 200) {
71 | itemInserted = true;
72 | System.out
73 | .println("Table item inserted to DynamoDB table. Table name: " + tableStatus.getTableName());
74 | }
75 | } catch(Exception e) {
76 | e.printStackTrace();
77 | System.out.println("Could not insert a Table import status to DynamoDB table: " + ddbTblName);
78 | }
79 | dynamoDB.shutdown();
80 | return itemInserted;
81 | }
82 |
83 | /**
84 | * Method to track the status of Databases imported
85 | * @param sourceGlueCatalogId
86 | * @param targetGlueCatalogId
87 | * @param ddbTblName
88 | * @param databaseName
89 | * @param importRunId
90 | * @param isCreated
91 | * @return
92 | */
93 | public boolean trackDatabaseImportStatus(String sourceGlueCatalogId, String targetGlueCatalogId, String ddbTblName, String databaseName,
94 | long importRunId, String exportBatchId, boolean isCreated) {
95 | boolean itemInserted = false;
96 |
97 | ClientConfiguration cc = new ClientConfiguration();
98 | cc.setMaxErrorRetry(10);
99 | AmazonDynamoDB client = AmazonDynamoDBClientBuilder.standard().withClientConfiguration(cc).build();
100 | DynamoDB dynamoDB = new DynamoDB(client);
101 |
102 | com.amazonaws.services.dynamodbv2.document.Table table = dynamoDB.getTable(ddbTblName);
103 | Item item = new Item().withPrimaryKey("db_id", databaseName).withNumber("import_run_id", importRunId)
104 | .withString("export_batch_id", exportBatchId).withString("target_glue_catalog_id", targetGlueCatalogId)
105 | .withString("source_glue_catalog_id", sourceGlueCatalogId).withBoolean("is_created", isCreated);
106 | // Write the item to the table
107 | try {
108 | PutItemOutcome outcome = table.putItem(item);
109 | int statusCode = outcome.getPutItemResult().getSdkHttpMetadata().getHttpStatusCode();
110 | if (statusCode == 200) {
111 | itemInserted = true;
112 | System.out
113 | .println("Database item inserted to DynamoDB table. Database name: " + databaseName);
114 | }
115 | } catch(Exception e) {
116 | e.printStackTrace();
117 | System.out.println("Could not insert a Database import status to DynamoDB table: " + ddbTblName);
118 | }
119 | dynamoDB.shutdown();
120 | return itemInserted;
121 | }
122 |
123 | /**
124 | * Method to track the status of Tables exported
125 | * @param ddbTblName
126 | * @param glueDBName
127 | * @param glueTableName
128 | * @param glueTableSchema
129 | * @param snsMsgId
130 | * @param glueCatalogId
131 | * @param exportRunId
132 | * @param isExported
133 | * @return
134 | */
135 | public boolean trackTableExportStatus(String ddbTblName, String glueDBName, String glueTableName,
136 | String glueTableSchema, String snsMsgId, String glueCatalogId, long exportRunId, String exportBatchId,
137 | boolean isExported, boolean isLargeTable, String bucketName, String objectKey) {
138 |
139 | boolean itemInserted = false;
140 | if (Optional.of(glueDBName).isPresent() && Optional.of(glueTableName).isPresent()
141 | && Optional.of(glueTableSchema).isPresent() && Optional.of(snsMsgId).isPresent()) {
142 |
143 | ClientConfiguration cc = new ClientConfiguration();
144 | cc.setMaxErrorRetry(10);
145 | AmazonDynamoDB client = AmazonDynamoDBClientBuilder.standard().withClientConfiguration(cc).build();
146 | DynamoDB dynamoDB = new DynamoDB(client);
147 |
148 | com.amazonaws.services.dynamodbv2.document.Table table = dynamoDB.getTable(ddbTblName);
149 |
150 | Item item = new Item().withPrimaryKey("table_id", glueTableName.concat("|").concat(glueDBName))
151 | .withNumber("export_run_id", exportRunId).withString("export_batch_id", exportBatchId)
152 | .withString("source_glue_catalog_id", glueCatalogId).withString("table_schema", glueTableSchema)
153 | .withString("sns_msg_id", snsMsgId).withBoolean("is_exported", isExported)
154 | .withBoolean("is_large_table", isLargeTable);
155 |
156 | if(Optional.ofNullable(bucketName).isPresent() && Optional.ofNullable(objectKey).isPresent())
157 | item.withString("s3_bucket_name", bucketName).withString("object_key", objectKey);
158 |
159 | // Write the item to the table
160 | try {
161 | PutItemOutcome outcome = table.putItem(item);
162 | int statusCode = outcome.getPutItemResult().getSdkHttpMetadata().getHttpStatusCode();
163 | if (statusCode == 200) {
164 | itemInserted = true;
165 | System.out.println("Table item inserted to DynamoDB table. Table name: " + glueTableName);
166 | }
167 | } catch(Exception e) {
168 | e.printStackTrace();
169 | System.out.println("Could not insert a Table export status to DynamoDB table: " + ddbTblName);
170 | }
171 | dynamoDB.shutdown();
172 | } else {
173 | System.out.println("Not all the values present to insert Table item to ");
174 | }
175 | return itemInserted;
176 | }
177 |
178 | /**
179 | * Method to track the status of Databases exported
180 | * @param ddbTblName
181 | * @param glueDBName
182 | * @param glueDBSchema
183 | * @param snsMsgId
184 | * @param glueCatalogId
185 | * @param exportRunId
186 | * @param isExported
187 | * @return
188 | */
189 | public boolean trackDatabaseExportStatus(String ddbTblName, String glueDBName, String glueDBSchema, String snsMsgId,
190 | String glueCatalogId, long exportRunId, String exportBatchId, boolean isExported) {
191 | boolean itemInserted = false;
192 |
193 | ClientConfiguration cc = new ClientConfiguration();
194 | cc.setMaxErrorRetry(10);
195 | AmazonDynamoDB client = AmazonDynamoDBClientBuilder.standard().withClientConfiguration(cc).build();
196 | DynamoDB dynamoDB = new DynamoDB(client);
197 |
198 | com.amazonaws.services.dynamodbv2.document.Table table = dynamoDB.getTable(ddbTblName);
199 | Item item = new Item().withPrimaryKey("db_id", glueDBName)
200 | .withNumber("export_run_id", exportRunId)
201 | .withString("export_batch_id", exportBatchId)
202 | .withString("source_glue_catalog_id", glueCatalogId)
203 | .withString("database_schema", glueDBSchema)
204 | .withString("sns_msg_id", snsMsgId)
205 | .withBoolean("is_exported", isExported);
206 | // Write the item to the table
207 | try {
208 | PutItemOutcome outcome = table.putItem(item);
209 | int statusCode = outcome.getPutItemResult().getSdkHttpMetadata().getHttpStatusCode();
210 | if (statusCode == 200) {
211 | itemInserted = true;
212 | System.out.println("Status inserted to DynamoDB table for Glue Database: " + glueDBName);
213 | }
214 | } catch(Exception e) {
215 | e.printStackTrace();
216 | System.out.println("Could not insert a Database export status to DynamoDB table: " + ddbTblName);
217 | }
218 | dynamoDB.shutdown();
219 | return itemInserted;
220 | }
221 |
222 | /**
223 | * This method inserts multiple items to a DynamoDB table using Batch Write Item API
224 | * @param itemList
225 | * @param dynamoDBTblName
226 | */
227 | public void insertIntoDynamoDB(List itemList, String dynamoDBTblName) {
228 |
229 | System.out.printf("Inserting %d items to DynamoDB using Batch API call. \n", itemList.size());
230 | AmazonDynamoDB dynamoDB = AmazonDynamoDBClientBuilder.standard().build();
231 | for (List miniBatch : Lists.partition(itemList, 25)) {
232 | Map> requestItems = new HashMap>();
233 | requestItems.put(dynamoDBTblName, miniBatch);
234 | BatchWriteItemRequest batchWriteItemRequest = new BatchWriteItemRequest()
235 | .withReturnConsumedCapacity(ReturnConsumedCapacity.TOTAL)
236 | .withRequestItems(requestItems);
237 | BatchWriteItemResult result = dynamoDB.batchWriteItem(batchWriteItemRequest);
238 | while (result.getUnprocessedItems().size() > 0) {
239 | Map> unprocessedItems = result.getUnprocessedItems();
240 | result = dynamoDB.batchWriteItem(unprocessedItems);
241 | }
242 | }
243 | dynamoDB.shutdown();
244 | }
245 |
246 | }
--------------------------------------------------------------------------------
/src/main/java/com/amazonaws/gdcreplication/util/GDCUtil.java:
--------------------------------------------------------------------------------
1 | // Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 | // SPDX-License-Identifier: MIT-0
3 |
4 | package com.amazonaws.gdcreplication.util;
5 |
6 | import java.util.List;
7 | import java.util.Optional;
8 |
9 | import com.amazonaws.services.glue.AWSGlue;
10 | import com.amazonaws.services.glue.model.Database;
11 | import com.amazonaws.services.glue.model.Partition;
12 | import com.amazonaws.services.glue.model.Table;
13 | import com.amazonaws.services.sqs.AmazonSQS;
14 |
15 | public class GDCUtil {
16 |
17 | /**
18 | * This method processes a Message that belongs to Table schema
19 | * @param glue
20 | * @param sqs
21 | * @param targetGlueCatalogId
22 | * @param sourceGlueCatalogId
23 | * @param tableWithPartitions
24 | * @param message
25 | * @param ddbTblNameForTableStatusTracking
26 | * @param sqsQueueURL
27 | * @param exportBatchId
28 | * @param skipTableArchive
29 | */
30 | public void processTableSchema(AWSGlue glue, AmazonSQS sqs, String targetGlueCatalogId, String sourceGlueCatalogId,
31 | TableWithPartitions tableWithPartitions, String message, String ddbTblNameForTableStatusTracking,
32 | String sqsQueueURL, String exportBatchId, boolean skipTableArchive) {
33 |
34 | DDBUtil ddbUtil = new DDBUtil();
35 | SQSUtil sqsUtil = new SQSUtil();
36 | GlueUtil glueUtil = new GlueUtil();
37 | long importRunId = System.currentTimeMillis();
38 |
39 | // Get Table and its Partitions from Input JSON
40 | Table table = tableWithPartitions.getTable();
41 | List partitionListFromExport = tableWithPartitions.getPartitionList();
42 |
43 | // Create or update table
44 | TableReplicationStatus tableStatus = glueUtil.createOrUpdateTable(glue, table, targetGlueCatalogId,
45 | skipTableArchive);
46 | // If database not found then create one
47 | if (tableStatus.isDbNotFoundError()) {
48 | System.out.printf("Creating Database with name: '%s'. \n", table.getDatabaseName());
49 | DBReplicationStatus dbStatus = glueUtil.createGlueDatabase(glue, targetGlueCatalogId,
50 | table.getDatabaseName(),
51 | "Database Imported from Glue Data Catalog of AWS Account Id: ".concat(sourceGlueCatalogId));
52 | // Now, try to create / update table again.
53 | if (dbStatus.isCreated()) {
54 | tableStatus = glueUtil.createOrUpdateTable(glue, tableWithPartitions.getTable(), targetGlueCatalogId,
55 | skipTableArchive);
56 | }
57 | }
58 | tableStatus.setTableSchema(message);
59 |
60 | // Update table partitions
61 | if (!tableStatus.isError()) {
62 | // Get table partitions from Target Account
63 | List partitionsB4Replication = glueUtil.getPartitions(glue, targetGlueCatalogId,
64 | table.getDatabaseName(), table.getName());
65 | System.out.println("Number of partitions before replication: " + partitionsB4Replication.size());
66 |
67 | // Add Partitions to the table if the export has Partitions
68 | if (partitionListFromExport.size() > 0) {
69 | tableStatus.setExportHasPartitions(true);
70 | if (partitionsB4Replication.size() == 0) {
71 | System.out.println("Adding partitions based on the export.");
72 | boolean partitionsAdded = glueUtil.addPartitions(glue, partitionListFromExport, targetGlueCatalogId,
73 | table.getDatabaseName(), table.getName());
74 | if (partitionsAdded)
75 | tableStatus.setPartitionsReplicated(true);
76 | } else {
77 | System.out.println(
78 | "Table has partitions. They will be deleted first before adding partitions based on Export.");
79 | // delete partitions in batch mode
80 | boolean partitionsDeleted = glueUtil.deletePartitions(glue, targetGlueCatalogId,
81 | table.getDatabaseName(), table.getName(), partitionsB4Replication);
82 |
83 | // Enable the below code for debugging purpose. Check number of table partitions after deletion
84 | // List partitionsAfterDeletion = glueUtil.getPartitions(glue, targetGlueCatalogId,
85 | // table.getDatabaseName(), table.getName());
86 | // System.out.println("Number of partitions after deletion: " + partitionsAfterDeletion.size());
87 |
88 | // add partitions from S3 object
89 | boolean partitionsAdded = glueUtil.addPartitions(glue, partitionListFromExport, targetGlueCatalogId,
90 | table.getDatabaseName(), table.getName());
91 |
92 | if (partitionsDeleted && partitionsAdded)
93 | tableStatus.setPartitionsReplicated(true);
94 |
95 | // Enable the below code for debugging purpose. Check number of table partitions after addition
96 | // List partitionsAfterAddition = glueUtil.getPartitions(glue, targetGlueCatalogId,
97 | // table.getDatabaseName(), table.getName());
98 | // System.out.println("Number of partitions after addition: " + partitionsAfterAddition.size());
99 | }
100 | } else if (partitionListFromExport.size() == 0) {
101 | tableStatus.setExportHasPartitions(false);
102 | if (partitionsB4Replication.size() > 0) {
103 | // Export has no partitions but table already has some partitions. Those
104 | // partitions will be deleted in batch mode.
105 | boolean partitionsDeleted = glueUtil.deletePartitions(glue, targetGlueCatalogId,
106 | table.getDatabaseName(), table.getName(), partitionsB4Replication);
107 | if (partitionsDeleted)
108 | tableStatus.setPartitionsReplicated(true);
109 | }
110 | }
111 | }
112 | // If there is any error in creating/updating table then send it to DLQ
113 | else {
114 | System.out.println("Error in creating/updating table in the Glue Data Catalog. It will be send to DLQ.");
115 | sqsUtil.sendTableSchemaToDeadLetterQueue(sqs, sqsQueueURL, tableStatus, exportBatchId, sourceGlueCatalogId);
116 | }
117 | // Track status in DynamoDB
118 | ddbUtil.trackTableImportStatus(tableStatus, sourceGlueCatalogId, targetGlueCatalogId, importRunId,
119 | exportBatchId, ddbTblNameForTableStatusTracking);
120 | System.out.printf(
121 | "Processing of Table shcema completed. Result: Table replicated: %b, Export has partitions: %b, "
122 | + "Partitions replicated: %b, error: %b \n",
123 | tableStatus.isReplicated(), tableStatus.isExportHasPartitions(), tableStatus.isPartitionsReplicated(),
124 | tableStatus.isError());
125 | }
126 |
127 | /**
128 | * This method processes a Message that belongs to Database schema
129 | * @param glue
130 | * @param sqs
131 | * @param targetGlueCatalogId
132 | * @param db
133 | * @param message
134 | * @param sqsQueueURL
135 | * @param sourceGlueCatalogId
136 | * @param exportBatchId
137 | * @param ddbTblNameForDBStatusTracking
138 | */
139 | public void processDatabseSchema(AWSGlue glue, AmazonSQS sqs, String targetGlueCatalogId, Database db,
140 | String message, String sqsQueueURL, String sourceGlueCatalogId, String exportBatchId,
141 | String ddbTblNameForDBStatusTracking) {
142 |
143 | DDBUtil ddbUtil = new DDBUtil();
144 | GlueUtil glueUtil = new GlueUtil();
145 | SQSUtil sqsUtil = new SQSUtil();
146 |
147 | boolean isDBCreated = false;
148 | long importRunId = System.currentTimeMillis();
149 | Database database = glueUtil.getDatabaseIfExist(glue, targetGlueCatalogId, db);
150 | boolean dbExist = Optional.ofNullable(database).isPresent();
151 | if (!dbExist) {
152 | DBReplicationStatus dbStatus = glueUtil.createGlueDatabase(glue, targetGlueCatalogId, db);
153 | if (dbStatus.isError()) {
154 | System.out.println("Error in creating database in the Glue Data Catalog. It will be send to DLQ.");
155 | sqsUtil.sendDatabaseSchemaToDeadLetterQueue(sqs, sqsQueueURL, message, db.getName(), exportBatchId,
156 | sourceGlueCatalogId);
157 | } else
158 | isDBCreated = true;
159 | } else
160 | System.out.printf(
161 | "Database with name '%s' exist already in target Glue Data Catalog. No action will be taken. \n",
162 | database.getName());
163 | // Track status in DynamoDB
164 | ddbUtil.trackDatabaseImportStatus(sourceGlueCatalogId, targetGlueCatalogId, ddbTblNameForDBStatusTracking,
165 | db.getName(), importRunId, exportBatchId, isDBCreated);
166 | System.out.printf("Processing of Database shcema completed. Result: DB already exist: %b, DB created: %b. \n",
167 | dbExist, isDBCreated);
168 | }
169 | }
170 |
--------------------------------------------------------------------------------
/src/main/java/com/amazonaws/gdcreplication/util/GlueUtil.java:
--------------------------------------------------------------------------------
1 | // Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 | // SPDX-License-Identifier: MIT-0
3 |
4 | package com.amazonaws.gdcreplication.util;
5 |
6 | import java.util.ArrayList;
7 | import java.util.List;
8 | import java.util.Optional;
9 | import java.util.concurrent.atomic.AtomicInteger;
10 |
11 | import com.amazonaws.services.glue.AWSGlue;
12 | import com.amazonaws.services.glue.model.BatchCreatePartitionRequest;
13 | import com.amazonaws.services.glue.model.BatchCreatePartitionResult;
14 | import com.amazonaws.services.glue.model.BatchDeletePartitionRequest;
15 | import com.amazonaws.services.glue.model.BatchDeletePartitionResult;
16 | import com.amazonaws.services.glue.model.CreateDatabaseRequest;
17 | import com.amazonaws.services.glue.model.CreateDatabaseResult;
18 | import com.amazonaws.services.glue.model.CreateTableRequest;
19 | import com.amazonaws.services.glue.model.CreateTableResult;
20 | import com.amazonaws.services.glue.model.Database;
21 | import com.amazonaws.services.glue.model.DatabaseInput;
22 | import com.amazonaws.services.glue.model.DeletePartitionRequest;
23 | import com.amazonaws.services.glue.model.DeletePartitionResult;
24 | import com.amazonaws.services.glue.model.EntityNotFoundException;
25 | import com.amazonaws.services.glue.model.GetDatabaseRequest;
26 | import com.amazonaws.services.glue.model.GetDatabaseResult;
27 | import com.amazonaws.services.glue.model.GetDatabasesRequest;
28 | import com.amazonaws.services.glue.model.GetDatabasesResult;
29 | import com.amazonaws.services.glue.model.GetPartitionsRequest;
30 | import com.amazonaws.services.glue.model.GetPartitionsResult;
31 | import com.amazonaws.services.glue.model.GetTableRequest;
32 | import com.amazonaws.services.glue.model.GetTableResult;
33 | import com.amazonaws.services.glue.model.GetTablesRequest;
34 | import com.amazonaws.services.glue.model.GetTablesResult;
35 | import com.amazonaws.services.glue.model.Partition;
36 | import com.amazonaws.services.glue.model.PartitionError;
37 | import com.amazonaws.services.glue.model.PartitionInput;
38 | import com.amazonaws.services.glue.model.PartitionValueList;
39 | import com.amazonaws.services.glue.model.StorageDescriptor;
40 | import com.amazonaws.services.glue.model.Table;
41 | import com.amazonaws.services.glue.model.TableInput;
42 | import com.amazonaws.services.glue.model.UpdateTableRequest;
43 | import com.amazonaws.services.glue.model.UpdateTableResult;
44 | import com.google.common.collect.Lists;
45 |
46 | /**
47 | * This is class has utility methods to work with AWS Glue Data Catalog
48 | *
49 | * @author Ravi Itha, Amazon Web Services, Inc.
50 | *
51 | */
52 | public class GlueUtil {
53 |
54 | /**
55 | * This method checks if a Database exist with the given name in the Glue Data
56 | * Catalog
57 | *
58 | * @param glue
59 | * @param targetCatalogId
60 | * @param db
61 | * @return
62 | */
63 | public Database getDatabaseIfExist(AWSGlue glue, String targetCatalogId, Database db) {
64 | Database database = null;
65 | GetDatabaseRequest getDatabaseRequest = new GetDatabaseRequest();
66 | getDatabaseRequest.setCatalogId(targetCatalogId);
67 | getDatabaseRequest.setName(db.getName());
68 | try {
69 | GetDatabaseResult getDatabaseResult = glue.getDatabase(getDatabaseRequest);
70 | database = getDatabaseResult.getDatabase();
71 | } catch (EntityNotFoundException e) {
72 | System.out.printf("Database '%s' not found. \n", db.getName());
73 | }
74 | return database;
75 | }
76 |
77 | /**
78 | * This method get all the databases from a given Glue Data Catalog
79 | *
80 | * @param glue
81 | * @param sourceGlueCatalogId
82 | * @return
83 | */
84 | public List getDatabases(AWSGlue glue, String sourceGlueCatalogId) {
85 | List masterDBList = new ArrayList();
86 | GetDatabasesRequest getDatabasesRequest = new GetDatabasesRequest();
87 | getDatabasesRequest.setCatalogId(sourceGlueCatalogId);
88 | GetDatabasesResult getDatabasesResult = glue.getDatabases(getDatabasesRequest);
89 | List databaseList = getDatabasesResult.getDatabaseList();
90 | masterDBList.addAll(databaseList);
91 | String databaseResultNextToken = getDatabasesResult.getNextToken();
92 | if (Optional.ofNullable(databaseResultNextToken).isPresent()) {
93 | do {
94 | // create a new GetDatabasesRequest using next token.
95 | getDatabasesRequest = new GetDatabasesRequest();
96 | getDatabasesRequest.setNextToken(databaseResultNextToken);
97 | getDatabasesResult = glue.getDatabases(getDatabasesRequest);
98 | databaseList = getDatabasesResult.getDatabaseList();
99 | masterDBList.addAll(databaseList);
100 | databaseResultNextToken = getDatabasesResult.getNextToken();
101 | } while (Optional.ofNullable(databaseResultNextToken).isPresent());
102 | }
103 | System.out.println("Total number of databases fetched: " + masterDBList.size());
104 | return masterDBList;
105 | }
106 |
107 | /**
108 | * This method creates a new Database in Glue Data Catalog
109 | *
110 | * @param glue
111 | * @param targetGlueCatalogId
112 | * @param db
113 | * @return
114 | */
115 |
116 | public DBReplicationStatus createGlueDatabase(AWSGlue glue, String targetGlueCatalogId, String dbName,
117 | String dbDescription) {
118 | DBReplicationStatus dbStatus = new DBReplicationStatus();
119 | CreateDatabaseRequest createDatabaseRequest = new CreateDatabaseRequest();
120 | DatabaseInput databaseInput = new DatabaseInput();
121 | databaseInput.setName(dbName);
122 | databaseInput.setDescription(dbDescription);
123 | createDatabaseRequest.setDatabaseInput(databaseInput);
124 | try {
125 | CreateDatabaseResult result = glue.createDatabase(createDatabaseRequest);
126 | int statusCode = result.getSdkHttpMetadata().getHttpStatusCode();
127 | if (statusCode == 200) {
128 | System.out.printf("Database created successfully. Database name: '%s'. \n", dbName);
129 | dbStatus.setCreated(true);
130 | dbStatus.setError(false);
131 | } else
132 | System.out.println("Database could not be created");
133 | } catch (Exception e) {
134 | e.printStackTrace();
135 | dbStatus.setDbName(dbName);
136 | dbStatus.setError(true);
137 | System.out.println("Exception thrown while creating Glue Database");
138 | }
139 | return dbStatus;
140 | }
141 |
142 | public DBReplicationStatus createGlueDatabase(AWSGlue glue, String targetGlueCatalogId, Database db) {
143 | DBReplicationStatus dbStatus = new DBReplicationStatus();
144 | CreateDatabaseRequest createDatabaseRequest = new CreateDatabaseRequest();
145 | DatabaseInput databaseInput = new DatabaseInput();
146 | databaseInput.setName(db.getName());
147 | databaseInput.setDescription(db.getDescription());
148 | databaseInput.setLocationUri(db.getLocationUri());
149 | databaseInput.setParameters(db.getParameters());
150 | createDatabaseRequest.setDatabaseInput(databaseInput);
151 | try {
152 | CreateDatabaseResult result = glue.createDatabase(createDatabaseRequest);
153 | int statusCode = result.getSdkHttpMetadata().getHttpStatusCode();
154 | if (statusCode == 200) {
155 | System.out.printf("Database created successfully. Database name: '%s'. \n", db.getName());
156 | dbStatus.setCreated(true);
157 | dbStatus.setError(false);
158 | } else
159 | System.out.println("Database could not be created");
160 | } catch (Exception e) {
161 | e.printStackTrace();
162 | dbStatus.setDbName(db.getName());
163 | dbStatus.setError(true);
164 | System.out.printf("Exception in creating Database with name: '%s'. \n", db.getName());
165 | }
166 | return dbStatus;
167 | }
168 |
169 | /**
170 | * This method creates a TableInput object using Table object
171 | *
172 | * @param table
173 | * @return
174 | */
175 | public TableInput createTableInput(Table table) {
176 | TableInput tableInput = new TableInput();
177 | tableInput.setDescription(table.getDescription());
178 | tableInput.setLastAccessTime(table.getLastAccessTime());
179 | tableInput.setOwner(table.getOwner());
180 | tableInput.setName(table.getName());
181 | if (Optional.ofNullable(table.getStorageDescriptor()).isPresent()) {
182 | tableInput.setStorageDescriptor(table.getStorageDescriptor());
183 | if (Optional.ofNullable(table.getStorageDescriptor().getParameters()).isPresent())
184 | tableInput.setParameters(table.getStorageDescriptor().getParameters());
185 | }
186 | tableInput.setPartitionKeys(table.getPartitionKeys());
187 | tableInput.setTableType(table.getTableType());
188 | tableInput.setViewExpandedText(table.getViewExpandedText());
189 | tableInput.setViewOriginalText(table.getViewOriginalText());
190 | tableInput.setParameters(table.getParameters());
191 | return tableInput;
192 | }
193 |
194 | /**
195 | * This method gets all the tables for a given databases from Glue Data Catalog
196 | *
197 | * @param glue
198 | * @param glueCatalogId
199 | * @param databaseName
200 | * @return
201 | */
202 | public List getTables(AWSGlue glue, String glueCatalogId, String databaseName) {
203 | System.out.printf("Start - Fetching table list for Database %s \n", databaseName);
204 | List masterTableList = new ArrayList();
205 | GetTablesRequest getTablesRequest = new GetTablesRequest();
206 | getTablesRequest.setCatalogId(glueCatalogId);
207 | getTablesRequest.setDatabaseName(databaseName);
208 | GetTablesResult getTablesResult = glue.getTables(getTablesRequest);
209 | List tableList = getTablesResult.getTableList();
210 | masterTableList.addAll(tableList);
211 | String tableResultNextToken = getTablesResult.getNextToken();
212 | if (Optional.ofNullable(tableResultNextToken).isPresent()) {
213 | do {
214 | // creating a new GetTablesResult using next token.
215 | getTablesRequest = new GetTablesRequest();
216 | getTablesRequest.setNextToken(tableResultNextToken);
217 | getTablesRequest.setCatalogId(glueCatalogId);
218 | getTablesRequest.setDatabaseName(databaseName);
219 | getTablesResult = glue.getTables(getTablesRequest);
220 | tableList = getTablesResult.getTableList();
221 | masterTableList.addAll(tableList);
222 | tableResultNextToken = getTablesResult.getNextToken();
223 | } while (Optional.ofNullable(tableResultNextToken).isPresent());
224 | }
225 | System.out.printf("Database '%s' has %d tables. \n", databaseName, masterTableList.size());
226 | System.out.printf("End - Fetching table list for Database %s \n", databaseName);
227 | return masterTableList;
228 | }
229 |
230 | /**
231 | * This method gets a Table using the given name from Glue Data Catalog. If
232 | * there is no table exist with the provided name then it returns null.
233 | *
234 | * @param glue
235 | * @param glueCatalogId
236 | * @param databaseName
237 | * @param tableName
238 | * @return
239 | */
240 | public Table getTable(AWSGlue glue, String glueCatalogId, String databaseName, String tableName) {
241 | Table table = null;
242 | GetTableRequest getTableRequest = new GetTableRequest();
243 | getTableRequest.setDatabaseName(databaseName);
244 | getTableRequest.setName(tableName);
245 | getTableRequest.setCatalogId(glueCatalogId);
246 | try {
247 | GetTableResult tableResult = glue.getTable(getTableRequest);
248 | table = tableResult.getTable();
249 | } catch (EntityNotFoundException e) {
250 | System.out.printf("Table '%s' not found. \n", tableName);
251 | }
252 | return table;
253 | }
254 |
255 | /**
256 | * This method creates or updates a Table in Glue Data Catalog
257 | *
258 | * @param glue
259 | * @param sourceTable
260 | * @param targetGlueCatalogId
261 | * @param skipTableArchive
262 | * @return
263 | */
264 | public TableReplicationStatus createOrUpdateTable(AWSGlue glue, Table sourceTable, String targetGlueCatalogId,
265 | boolean skipTableArchive) {
266 |
267 | TableReplicationStatus tableStatus = new TableReplicationStatus();
268 | tableStatus.setTableName(sourceTable.getName());
269 | tableStatus.setDbName(sourceTable.getDatabaseName());
270 | tableStatus.setReplicationTime(System.currentTimeMillis());
271 |
272 | // Check if a table exist already
273 | GetTableRequest targetTableRequest = new GetTableRequest();
274 | targetTableRequest.setCatalogId(targetGlueCatalogId);
275 | targetTableRequest.setDatabaseName(sourceTable.getDatabaseName());
276 | targetTableRequest.setName(sourceTable.getName());
277 | Table targetTable = null;
278 | try {
279 | GetTableResult targetTableResult = glue.getTable(targetTableRequest);
280 | targetTable = targetTableResult.getTable();
281 | } catch (EntityNotFoundException e) {
282 | System.out.printf("Table '%s' not found. It will be created. \n", sourceTable.getName());
283 | } catch (Exception e) {
284 | e.printStackTrace();
285 | System.out.println("Exception in getting getTable");
286 | }
287 | TableInput tableInput = createTableInput(sourceTable);
288 |
289 | // If table exist - update the table with the schema in the input message.
290 | if (Optional.ofNullable(targetTable).isPresent()) {
291 | System.out.println("Table exist. It will be updated");
292 | UpdateTableRequest updateTableRequest = new UpdateTableRequest();
293 | updateTableRequest.setTableInput(tableInput);
294 | updateTableRequest.setSkipArchive(skipTableArchive);
295 | updateTableRequest.setDatabaseName(sourceTable.getDatabaseName());
296 |
297 | try {
298 | UpdateTableResult updateTableResult = glue.updateTable(updateTableRequest);
299 | int statusCode = updateTableResult.getSdkHttpMetadata().getHttpStatusCode();
300 | if (statusCode == 200) {
301 | tableStatus.setUpdated(true);
302 | tableStatus.setReplicated(true);
303 | tableStatus.setError(false);
304 | System.out.printf("Table '%s' updated successfully. \n", sourceTable.getName());
305 | }
306 | } catch (EntityNotFoundException e) {
307 | e.printStackTrace();
308 | System.out.printf("Exception thrown while updating table '%s'. Reason: '%s' do not exist already. \n",
309 | sourceTable.getName(), sourceTable.getDatabaseName());
310 | tableStatus.setReplicated(false);
311 | tableStatus.setDbNotFoundError(true);
312 | tableStatus.setError(true);
313 | } catch (Exception e) {
314 | e.printStackTrace();
315 | System.out.printf("Exception thrown while updating table '%s'. \n", sourceTable.getName());
316 | tableStatus.setReplicated(false);
317 | tableStatus.setError(true);
318 | }
319 | }
320 | // If the table do not exist - create a new table with the schema in the input
321 | // message.
322 | else {
323 | CreateTableRequest createTableRequest = new CreateTableRequest();
324 | createTableRequest.setCatalogId(targetGlueCatalogId);
325 | createTableRequest.setDatabaseName(sourceTable.getDatabaseName());
326 | createTableRequest.setTableInput(tableInput);
327 | try {
328 | CreateTableResult createTableResult = glue.createTable(createTableRequest);
329 | int statusCode = createTableResult.getSdkHttpMetadata().getHttpStatusCode();
330 | if (statusCode == 200) {
331 | tableStatus.setCreated(true);
332 | tableStatus.setReplicated(true);
333 | tableStatus.setError(false);
334 | System.out.printf("Table '%s' created successfully. \n", sourceTable.getName());
335 | }
336 | } catch (EntityNotFoundException e) {
337 | e.printStackTrace();
338 | System.out.printf("Exception thrown while creating table '%s'. Reason: '%s' do not exist already. \n.",
339 | sourceTable.getName(), sourceTable.getDatabaseName());
340 | tableStatus.setReplicated(false);
341 | tableStatus.setDbNotFoundError(true);
342 | } catch (Exception e) {
343 | e.printStackTrace();
344 | System.out.printf("Exception thrown while creating table '%s' \n.", sourceTable.getName());
345 | tableStatus.setReplicated(false);
346 | tableStatus.setError(true);
347 | }
348 | }
349 | return tableStatus;
350 | }
351 |
352 | /**
353 | * This method gets a list of partitions for a given table.
354 | *
355 | * @param glue
356 | * @param catalogId
357 | * @param databaseName
358 | * @param tableName
359 | * @return
360 | */
361 | public List getPartitions(AWSGlue glue, String catalogId, String databaseName, String tableName) {
362 | List masterPartitionList = new ArrayList();
363 | GetPartitionsRequest getPartitionsRequest = new GetPartitionsRequest();
364 | getPartitionsRequest.setDatabaseName(databaseName);
365 | getPartitionsRequest.setCatalogId(catalogId);
366 | getPartitionsRequest.setTableName(tableName);
367 | GetPartitionsResult getPartitionResult = glue.getPartitions(getPartitionsRequest);
368 | List partitionList = getPartitionResult.getPartitions();
369 | masterPartitionList.addAll(partitionList);
370 | String partitionResultNextToken = getPartitionResult.getNextToken();
371 | if (Optional.ofNullable(partitionResultNextToken).isPresent()) {
372 | do {
373 | // create a new GetPartitionsRequest using next token.
374 | getPartitionsRequest = new GetPartitionsRequest();
375 | getPartitionsRequest.setDatabaseName(databaseName);
376 | getPartitionsRequest.setCatalogId(catalogId);
377 | getPartitionsRequest.setTableName(tableName);
378 | getPartitionsRequest.setNextToken(partitionResultNextToken);
379 | getPartitionResult = glue.getPartitions(getPartitionsRequest);
380 | partitionList = getPartitionResult.getPartitions();
381 | masterPartitionList.addAll(partitionList);
382 | partitionResultNextToken = getPartitionResult.getNextToken();
383 | } while (Optional.ofNullable(partitionResultNextToken).isPresent());
384 | }
385 | return masterPartitionList;
386 | }
387 |
388 | /**
389 | * Add partitions in batch mode
390 | * @param glue
391 | * @param partitionsToAdd
392 | * @param catalogId
393 | * @param databaseName
394 | * @param tableName
395 | * @return
396 | */
397 | public boolean addPartitions(AWSGlue glue, List partitionsToAdd, String catalogId, String databaseName,
398 | String tableName) {
399 | AtomicInteger numPartitionsAdded = new AtomicInteger();
400 | boolean partitionsAdded = false;
401 | BatchCreatePartitionRequest batchCreatePartitionRequest = new BatchCreatePartitionRequest();
402 | batchCreatePartitionRequest.setCatalogId(catalogId);
403 | batchCreatePartitionRequest.setDatabaseName(databaseName);
404 | batchCreatePartitionRequest.setTableName(tableName);
405 |
406 | List partitionInputList = new ArrayList();
407 | for (Partition p : partitionsToAdd) {
408 | PartitionInput pi = new PartitionInput();
409 | StorageDescriptor storageDescriptor = p.getStorageDescriptor();
410 | pi.setStorageDescriptor(storageDescriptor);
411 | pi.setValues(p.getValues());
412 | partitionInputList.add(pi);
413 | }
414 | System.out.println("Partition Input List Size: " + partitionInputList.size());
415 | if(partitionInputList.size() > 100)
416 | System.out.println("The input has more than 100 partitions, it will be sliced into smaller lists with 100 partitions each.");
417 |
418 | List> listofSmallerLists = Lists.partition(partitionInputList, 100);
419 | for(List partInputList : listofSmallerLists) {
420 | batchCreatePartitionRequest.setPartitionInputList(partInputList);
421 | try {
422 | BatchCreatePartitionResult result = glue.batchCreatePartition(batchCreatePartitionRequest);
423 | int statusCode = result.getSdkHttpMetadata().getHttpStatusCode();
424 | List partErrors = result.getErrors();
425 | if (statusCode == 200 && partErrors.size() == 0) {
426 | System.out.printf("%d partitions were added to table '%s' of database '%s'. \n", partInputList.size(),
427 | tableName, databaseName);
428 | partitionsAdded = true;
429 | numPartitionsAdded.getAndAdd(partInputList.size());
430 | System.out.printf("%d of %d partitions added so far. \n", numPartitionsAdded.get(), partitionInputList.size());
431 | } else {
432 | System.out.printf("Not all partitions were added. Status Code: %d, Number of partition errors: %d \n",
433 | statusCode, partErrors.size());
434 | for (PartitionError pe : partErrors) {
435 | System.out.println("Partition Error Message: " + pe.getErrorDetail().getErrorMessage());
436 | List pv = pe.getPartitionValues();
437 | for (String v : pv) {
438 | System.out.println("Partition error value: " + v);
439 | }
440 | }
441 | }
442 | } catch(Exception e) {
443 | e.printStackTrace();
444 | System.out.printf("Exception in adding partitions. \n");
445 | System.out.printf("%d of %d partitions added so far. \n", numPartitionsAdded.get(), partitionInputList.size());
446 | // TODO - what to do when there are exceptions here?
447 | }
448 | }
449 | System.out.println("Total partitions added: " + numPartitionsAdded.get());
450 | return partitionsAdded;
451 | }
452 |
453 | /**
454 | * Delete a single partition
455 | * @param glue
456 | * @param catalogId
457 | * @param databaseName
458 | * @param tableName
459 | * @param partition
460 | * @return
461 | */
462 | public boolean deletePartition(AWSGlue glue, String catalogId, String databaseName, String tableName,
463 | Partition partition) {
464 | boolean partitionDeleted = false;
465 | DeletePartitionRequest deletePartitionRequest = new DeletePartitionRequest();
466 | deletePartitionRequest.setCatalogId(catalogId);
467 | deletePartitionRequest.setDatabaseName(databaseName);
468 | deletePartitionRequest.setTableName(tableName);
469 | deletePartitionRequest.setPartitionValues(partition.getValues());
470 |
471 | DeletePartitionResult result = glue.deletePartition(deletePartitionRequest);
472 | int statusCode = result.getSdkHttpMetadata().getHttpStatusCode();
473 | if (statusCode == 200) {
474 | System.out.printf("Partition deleted from table '%s' of database '%s' \n", tableName, databaseName);
475 | partitionDeleted = true;
476 | }
477 | return partitionDeleted;
478 | }
479 |
480 | /**
481 | * Delete partitions using Batch mode
482 | * @param glue
483 | * @param catalogId
484 | * @param databaseName
485 | * @param tableName
486 | * @param partitionsToDelete
487 | * @return
488 | */
489 | public boolean deletePartitions(AWSGlue glue, String catalogId, String databaseName, String tableName,
490 | List partitionsToDelete) {
491 |
492 | boolean partitionsDeleted = false;
493 |
494 | BatchDeletePartitionRequest batchDeletePartitionRequest = new BatchDeletePartitionRequest();
495 | batchDeletePartitionRequest.setCatalogId(catalogId);
496 | batchDeletePartitionRequest.setDatabaseName(databaseName);
497 | batchDeletePartitionRequest.setTableName(tableName);
498 |
499 | // Prepare a List of PartitionValueList
500 | List listOfPartitionValueList = new ArrayList();
501 |
502 | // For each Partition, get its values, add create a PartitionValueList, and add
503 | // them to List of PartitionValueList
504 | for (Partition p : partitionsToDelete) {
505 | PartitionValueList pvList = new PartitionValueList();
506 | pvList.setValues(p.getValues());
507 | listOfPartitionValueList.add(pvList);
508 | }
509 |
510 | System.out.println("Size of List of PartitionValueList: " + listOfPartitionValueList.size());
511 | List> listofSmallerLists = Lists.partition(listOfPartitionValueList, 25);
512 | for (List smallerList : listofSmallerLists) {
513 | // Add List of PartitionValueList to BatchDeletePartitionRequest
514 | batchDeletePartitionRequest.setPartitionsToDelete(smallerList);
515 | try {
516 | BatchDeletePartitionResult batchDeletePartitionResult = glue
517 | .batchDeletePartition(batchDeletePartitionRequest);
518 | int statusCode = batchDeletePartitionResult.getSdkHttpMetadata().getHttpStatusCode();
519 | List partErrors = batchDeletePartitionResult.getErrors();
520 | if (statusCode == 200 && partErrors.size() == 0) {
521 | System.out.printf("%d partitions from table '%s' of database '%s' were deleted. \n",
522 | smallerList.size(), tableName, databaseName);
523 | partitionsDeleted = true;
524 | } else {
525 | System.out.printf(
526 | "Not all partitions were deleted. Status Code: %d, Number of partition errors: %d \n",
527 | statusCode, partErrors.size());
528 |
529 | for (PartitionError pe : partErrors) {
530 | System.out.println("Partition Error Message: " + pe.getErrorDetail().getErrorMessage());
531 | List pv = pe.getPartitionValues();
532 | for (String v : pv) {
533 | System.out.println("Partition value: " + v);
534 | }
535 | }
536 | }
537 | } catch (Exception e) {
538 | System.out.println("Exception in deleting partitions.");
539 | e.printStackTrace();
540 | }
541 | }
542 | return partitionsDeleted;
543 | }
544 | }
545 |
--------------------------------------------------------------------------------
/src/main/java/com/amazonaws/gdcreplication/util/LargeTable.java:
--------------------------------------------------------------------------------
1 | // Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 | // SPDX-License-Identifier: MIT-0
3 |
4 | package com.amazonaws.gdcreplication.util;
5 |
6 | import com.amazonaws.services.glue.model.Table;
7 |
8 | /**
9 | * This is a POJO class for Glue Database Table
10 | *
11 | * @author Ravi Itha, Amazon Web Services, Inc.
12 | *
13 | */
14 | public class LargeTable {
15 |
16 | private String catalogId;
17 | private boolean largeTable;
18 | private int numberOfPartitions;
19 | private Table table;
20 | private String s3ObjectKey;
21 | private String s3BucketName;
22 |
23 | public Table getTable() {
24 | return table;
25 | }
26 |
27 | public void setTable(Table table) {
28 | this.table = table;
29 | }
30 |
31 | public String getS3BucketName() {
32 | return s3BucketName;
33 | }
34 |
35 | public void setS3BucketName(String s3BucketName) {
36 | this.s3BucketName = s3BucketName;
37 | }
38 |
39 | public String getS3ObjectKey() {
40 | return s3ObjectKey;
41 | }
42 |
43 | public void setS3ObjectKey(String s3ObjectKey) {
44 | this.s3ObjectKey = s3ObjectKey;
45 | }
46 |
47 | public String getCatalogId() {
48 | return catalogId;
49 | }
50 |
51 | public void setCatalogId(String catalogId) {
52 | this.catalogId = catalogId;
53 | }
54 |
55 | public boolean isLargeTable() {
56 | return largeTable;
57 | }
58 |
59 | public void setLargeTable(boolean largeTable) {
60 | this.largeTable = largeTable;
61 | }
62 |
63 | public int getNumberOfPartitions() {
64 | return numberOfPartitions;
65 | }
66 |
67 | public void setNumberOfPartitions(int numberOfPartitions) {
68 | this.numberOfPartitions = numberOfPartitions;
69 | }
70 | }
71 |
--------------------------------------------------------------------------------
/src/main/java/com/amazonaws/gdcreplication/util/S3Util.java:
--------------------------------------------------------------------------------
1 | // Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 | // SPDX-License-Identifier: MIT-0
3 |
4 | package com.amazonaws.gdcreplication.util;
5 |
6 | import java.io.BufferedReader;
7 | import java.io.ByteArrayInputStream;
8 | import java.io.File;
9 | import java.io.IOException;
10 | import java.io.InputStream;
11 | import java.io.InputStreamReader;
12 | import java.nio.charset.StandardCharsets;
13 | import java.util.ArrayList;
14 | import java.util.List;
15 |
16 | import com.amazonaws.AmazonServiceException;
17 | import com.amazonaws.SdkClientException;
18 | import com.amazonaws.auth.profile.ProfileCredentialsProvider;
19 | import com.amazonaws.services.glue.model.Partition;
20 | import com.amazonaws.services.s3.AmazonS3;
21 | import com.amazonaws.services.s3.AmazonS3ClientBuilder;
22 | import com.amazonaws.services.s3.model.GetObjectRequest;
23 | import com.amazonaws.services.s3.model.ObjectMetadata;
24 | import com.amazonaws.services.s3.model.PutObjectRequest;
25 | import com.amazonaws.services.s3.model.ResponseHeaderOverrides;
26 | import com.amazonaws.services.s3.model.S3Object;
27 | import com.google.gson.Gson;
28 | import com.google.gson.JsonSyntaxException;
29 |
30 | public class S3Util {
31 |
32 |
33 | /**
34 | *
35 | * Create an object in S3 with list of partitions.
36 | * @param region
37 | * @param bucket
38 | * @param objectKey
39 | * @param content
40 | * @return
41 | *
42 | * Wrote this method based on inputs from Zoran Ivanovic of AWS
43 | */
44 | public boolean createS3Object(String region, String bucket, String objectKey, String content) {
45 | boolean objectCreated = false;
46 | AmazonS3 s3 = AmazonS3ClientBuilder.standard().withRegion(region).build();
47 |
48 | byte[] contentBytes = content.getBytes(StandardCharsets.UTF_8);
49 | InputStream inputStream = new ByteArrayInputStream(contentBytes);
50 |
51 | ObjectMetadata metadata = new ObjectMetadata();
52 | metadata.setContentLength(contentBytes.length);
53 | PutObjectRequest putObjectRequest = new PutObjectRequest(bucket, objectKey, inputStream, metadata);
54 | // send request to S3 to create an object with the content
55 | try {
56 | s3.putObject(putObjectRequest);
57 | objectCreated = true;
58 | System.out.println("Partition Object uploaded to S3. Object key: " + objectKey);
59 | } catch (AmazonServiceException e) {
60 | System.err.println(e.getErrorMessage());
61 | } catch (Exception e) {
62 | e.printStackTrace();
63 | }
64 | try {
65 | inputStream.close();
66 | } catch(Exception e) {
67 | e.printStackTrace();
68 | System.out.println("Exception thrown while closing InputStream.");
69 | }
70 | return objectCreated;
71 | }
72 |
73 | /**
74 | * Upload a file as an object to S3.
75 | * @param region
76 | * @param bucketName
77 | * @param objKeyName
78 | * @param localFilePath
79 | * @return
80 | * @throws IOException
81 | */
82 | public boolean uploadObject(String region, String bucketName, String objKeyName, String localFilePath)
83 | throws IOException {
84 |
85 | System.out.println("Uploading file to S3.");
86 | boolean objectUploaded = false;
87 | AmazonS3 s3Client = AmazonS3ClientBuilder.standard().withRegion(region).build();
88 |
89 | try {
90 | // Upload a text string as a new object.
91 | s3Client.putObject(bucketName, objKeyName, "Uploaded String Object");
92 | // Upload a file as a new object with ContentType and title specified.
93 | PutObjectRequest request = new PutObjectRequest(bucketName, objKeyName, new File(localFilePath));
94 | ObjectMetadata metadata = new ObjectMetadata();
95 | metadata.setContentType("plain/text");
96 | metadata.addUserMetadata("x-amz-meta-title", "PartitionFile");
97 | request.setMetadata(metadata);
98 | s3Client.putObject(request);
99 | objectUploaded = true;
100 | } catch (AmazonServiceException e) {
101 | // The call was transmitted successfully, but Amazon S3 couldn't process
102 | // it, so it returned an error response.
103 | e.printStackTrace();
104 | } catch (SdkClientException e) {
105 | // Amazon S3 couldn't be contacted for a response, or the client
106 | // couldn't parse the response from Amazon S3.
107 | e.printStackTrace();
108 | }
109 |
110 | return objectUploaded;
111 | }
112 |
113 | public boolean createObject(String region, String bucketName, String tableDDL, String stringObjKeyName)
114 | throws IOException {
115 |
116 | boolean objectCreated = false;
117 |
118 | try {
119 | AmazonS3 s3Client = AmazonS3ClientBuilder.standard().withRegion(region).build();
120 |
121 | // Upload a text string as a new object.
122 | s3Client.putObject(bucketName, stringObjKeyName, tableDDL);
123 | objectCreated = true;
124 |
125 | } catch (AmazonServiceException e) {
126 | // The call was transmitted successfully, but Amazon S3 couldn't process
127 | // it, so it returned an error response.
128 | e.printStackTrace();
129 | } catch (SdkClientException e) {
130 | // Amazon S3 couldn't be contacted for a response, or the client
131 | // couldn't parse the response from Amazon S3.
132 | e.printStackTrace();
133 | }
134 | return objectCreated;
135 | }
136 |
137 | public void getObject(String region, String bucketName, String key) throws IOException {
138 |
139 | S3Object fullObject = null, objectPortion = null, headerOverrideObject = null;
140 | try {
141 | AmazonS3 s3Client = AmazonS3ClientBuilder.standard().withRegion(region)
142 | .withCredentials(new ProfileCredentialsProvider()).build();
143 |
144 | // Get an object and print its contents.
145 | System.out.println("Downloading an object");
146 | fullObject = s3Client.getObject(new GetObjectRequest(bucketName, key));
147 | System.out.println("Content-Type: " + fullObject.getObjectMetadata().getContentType());
148 | System.out.println("Content: ");
149 | displayTextInputStream(fullObject.getObjectContent());
150 |
151 | // Get a range of bytes from an object and print the bytes.
152 | GetObjectRequest rangeObjectRequest = new GetObjectRequest(bucketName, key).withRange(0, 9);
153 | objectPortion = s3Client.getObject(rangeObjectRequest);
154 | System.out.println("Printing bytes retrieved.");
155 |
156 | displayTextInputStream(objectPortion.getObjectContent());
157 |
158 | // Get an entire object, overriding the specified response headers, and print
159 | // the object's content.
160 |
161 | ResponseHeaderOverrides headerOverrides = new ResponseHeaderOverrides().withCacheControl("No-cache")
162 | .withContentDisposition("attachment; filename=example.txt");
163 | GetObjectRequest getObjectRequestHeaderOverride = new GetObjectRequest(bucketName, key)
164 | .withResponseHeaders(headerOverrides);
165 | headerOverrideObject = s3Client.getObject(getObjectRequestHeaderOverride);
166 | displayTextInputStream(headerOverrideObject.getObjectContent());
167 | } catch (AmazonServiceException e) {
168 | // The call was transmitted successfully, but Amazon S3 couldn't process
169 | // it, so it returned an error response.
170 | e.printStackTrace();
171 | } catch (SdkClientException e) {
172 | // Amazon S3 couldn't be contacted for a response, or the client
173 | // couldn't parse the response from Amazon S3.
174 | e.printStackTrace();
175 | } finally {
176 | // To ensure that the network connection doesn't remain open, close any open
177 | // input streams.
178 | if (fullObject != null) {
179 | fullObject.close();
180 | }
181 | if (objectPortion != null) {
182 | objectPortion.close();
183 | }
184 | if (headerOverrideObject != null) {
185 | headerOverrideObject.close();
186 | }
187 | }
188 |
189 | }
190 |
191 | public static void displayTextInputStream(InputStream input) throws IOException {
192 | // Read the text input stream one line at a time and display each line.
193 | BufferedReader reader = new BufferedReader(new InputStreamReader(input));
194 | String line = null;
195 | while ((line = reader.readLine()) != null) {
196 | System.out.println(line);
197 | }
198 | System.out.println();
199 | }
200 |
201 | public List getPartitionsFromS3(String region, String bucket, String key) {
202 |
203 | String contentType = "";
204 | Gson gson = new Gson();
205 | S3Object fullObject = null;
206 | AmazonS3 s3 = AmazonS3ClientBuilder.standard().withRegion(region).build();
207 | System.out.printf("Bucket Name: %s, Object Key: %s \n", bucket, key);
208 |
209 | try {
210 | fullObject = s3.getObject(new GetObjectRequest(bucket, key));
211 | } catch (Exception e) {
212 | e.printStackTrace();
213 | System.out.println("Exception thrown while reading object from S3");
214 | }
215 |
216 | InputStream input = fullObject.getObjectContent();
217 | contentType = fullObject.getObjectMetadata().getContentType();
218 | System.out.println("CONTENT TYPE: " + contentType);
219 |
220 | // Read the text input stream one line at a time and display each line.
221 | List partitionList = new ArrayList();
222 |
223 | BufferedReader reader = new BufferedReader(new InputStreamReader(input));
224 | String line = null;
225 | try {
226 | while ((line = reader.readLine()) != null) {
227 | Partition partition = gson.fromJson(line, Partition.class);
228 | partitionList.add(partition);
229 | }
230 | } catch (JsonSyntaxException | IOException e) {
231 | System.out.println("Exception occured while reading partition information from S3 object.");
232 | e.printStackTrace();
233 | }
234 | System.out.println("Number of partitions read from S3: " + partitionList.size());
235 | return partitionList;
236 | }
237 |
238 | }
239 |
--------------------------------------------------------------------------------
/src/main/java/com/amazonaws/gdcreplication/util/SNSUtil.java:
--------------------------------------------------------------------------------
1 | // Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 | // SPDX-License-Identifier: MIT-0
3 |
4 | package com.amazonaws.gdcreplication.util;
5 |
6 | import java.util.HashMap;
7 | import java.util.List;
8 | import java.util.Map;
9 | import java.util.concurrent.atomic.AtomicInteger;
10 |
11 | import com.amazonaws.services.glue.model.Database;
12 | import com.amazonaws.services.glue.model.Table;
13 | import com.amazonaws.services.sns.AmazonSNS;
14 | import com.amazonaws.services.sns.model.MessageAttributeValue;
15 | import com.amazonaws.services.sns.model.PublishRequest;
16 | import com.amazonaws.services.sns.model.PublishResult;
17 | import com.google.gson.Gson;
18 |
19 | /**
20 | * This is class has utility methods to work with Amazon SNS
21 | * @author Ravi Itha, Amazon Web Services, Inc.
22 | *
23 | */
24 | public class SNSUtil {
25 |
26 |
27 | public PublishResult publishLargeTableSchemaToSNS(AmazonSNS sns, String topicArn, String region, String bucketName, String message,
28 | String sourceGlueCatalogId, String exportBatchId, String messageType) {
29 |
30 | PublishResult publishResponse = null;
31 |
32 | PublishRequest publishRequest = new PublishRequest(topicArn, message);
33 |
34 | Map messageAttributes = new HashMap();
35 | messageAttributes.put("source_catalog_id", createStringAttribute(sourceGlueCatalogId));
36 | messageAttributes.put("message_type", createStringAttribute(messageType));
37 | messageAttributes.put("export_batch_id", createStringAttribute(exportBatchId));
38 | messageAttributes.put("bucket_name", createStringAttribute(bucketName));
39 | messageAttributes.put("region_name", createStringAttribute(region));
40 | publishRequest.setMessageAttributes(messageAttributes);
41 | try {
42 | publishResponse = sns.publish(publishRequest);
43 | } catch (Exception e) {
44 | e.printStackTrace();
45 | System.out.println("Large Table message could not be published to SNS Topic. Topic ARN: " + topicArn);
46 | System.out.println("Message to be published: " + message);
47 | }
48 | return publishResponse;
49 | }
50 |
51 | /**
52 | * This method publishes one Table Schema (DDL) to SNS Topic
53 | *
54 | * @param sns
55 | * @param topicArn
56 | * @param databaseDDL
57 | * @param sourceGlueCatalogId
58 | * @return
59 | */
60 | public PublishResult publishDatabaseSchemaToSNS(AmazonSNS sns, String topicArn, String databaseDDL,
61 | String sourceGlueCatalogId, String exportBatchId) {
62 | PublishResult publishResponse = null;
63 | PublishRequest publishRequest = new PublishRequest(topicArn, databaseDDL);
64 | Map messageAttributes = new HashMap();
65 | messageAttributes.put("source_catalog_id", createStringAttribute(sourceGlueCatalogId));
66 | messageAttributes.put("message_type", createStringAttribute("database"));
67 | messageAttributes.put("export_batch_id", createStringAttribute(exportBatchId));
68 | publishRequest.setMessageAttributes(messageAttributes);
69 | try {
70 | publishResponse = sns.publish(publishRequest);
71 | } catch (Exception e) {
72 | System.out.println("Database schema could not be published to SNS Topic.");
73 | }
74 | return publishResponse;
75 | }
76 |
77 | /**
78 | * This method publishes all Database Schemas (DDL) to SNS Topic and tracks the
79 | * status in a DynamoDB table.
80 | *
81 | * @param sns
82 | * @param dBList
83 | * @param snsTopicArn
84 | * @param ddbUtil
85 | * @param ddbTblName
86 | * @param sourceGlueCatalogId
87 | * @return
88 | */
89 | public int publishDatabaseSchemasToSNS(AmazonSNS sns, List masterDBList, String snsTopicArn,
90 | DDBUtil ddbUtil, String ddbTblName, String sourceGlueCatalogId) {
91 | long exportRunId = System.currentTimeMillis();
92 | String exportBatchId = Long.toString(exportRunId);
93 | AtomicInteger numberOfDatabasesExported = new AtomicInteger();
94 | // Create Message Attributes
95 | MessageAttributeValue sourceCatalogIdMA = createStringAttribute(sourceGlueCatalogId);
96 | MessageAttributeValue msgTypeMA = createStringAttribute("database");
97 | MessageAttributeValue exportBatchIdMA = createStringAttribute(exportBatchId);
98 | // Convert databases to JSON Messages and publish them to SNS Topic
99 | for (Database db : masterDBList) {
100 | // Convert Glue Database to JSON String
101 | Gson gson = new Gson();
102 | String databaseDDL = gson.toJson(db);
103 | // Publish JSON String to Amazon SNS topic
104 | PublishRequest publishRequest = new PublishRequest(snsTopicArn, databaseDDL);
105 | Map messageAttributes = new HashMap();
106 | messageAttributes.put("source_catalog_id", sourceCatalogIdMA);
107 | messageAttributes.put("message_type", msgTypeMA);
108 | messageAttributes.put("export_batch_id", exportBatchIdMA);
109 | publishRequest.setMessageAttributes(messageAttributes);
110 | try {
111 | PublishResult publishResponse = sns.publish(publishRequest);
112 | numberOfDatabasesExported.getAndIncrement();
113 | System.out.printf("Schema for Database '%s' published to SNS Topic. Message_Id: %s. \n",
114 | db.getName(), publishResponse.getMessageId());
115 | ddbUtil.trackDatabaseExportStatus(ddbTblName, db.getName(), databaseDDL, publishResponse.getMessageId(),
116 | sourceGlueCatalogId, exportRunId, exportBatchId, true);
117 | } catch (Exception e) {
118 | e.printStackTrace();
119 | System.out.printf(
120 | "Schema for Database '%s' could not be published to SNS Topic. It will be audited in DynamoDB table. \n",
121 | db.getName());
122 | ddbUtil.trackDatabaseExportStatus(ddbTblName, db.getName(), databaseDDL, "", sourceGlueCatalogId,
123 | exportRunId, exportBatchId, false);
124 | }
125 | }
126 | System.out.println("Number of databases exported to SNS: " + numberOfDatabasesExported.get());
127 | return numberOfDatabasesExported.get();
128 | }
129 |
130 | /**
131 | * This method publishes Table Schema (DDL) to SNS Topic
132 | *
133 | * @param sns
134 | * @param topicArn
135 | * @param tableDDL
136 | * @param sourceGlueCatalogId
137 | * @return
138 | */
139 | public PublishResult publishTableSchemaToSNS(AmazonSNS sns, String topicArn, Table table, String tableDDL,
140 | String sourceGlueCatalogId, String exportBatchId) {
141 | PublishResult publishResponse = null;
142 | PublishRequest publishRequest = new PublishRequest(topicArn, tableDDL);
143 | Map messageAttributes = new HashMap();
144 | messageAttributes.put("source_catalog_id", createStringAttribute(sourceGlueCatalogId));
145 | messageAttributes.put("message_type", createStringAttribute("table"));
146 | messageAttributes.put("export_batch_id", createStringAttribute(exportBatchId));
147 | publishRequest.setMessageAttributes(messageAttributes);
148 | try {
149 | publishResponse = sns.publish(publishRequest);
150 | System.out.printf("Table schema for Table '%s' of database '%s' published to SNS Topic. Message_Id: %s. \n", table.getName(),
151 | table.getDatabaseName(), publishResponse.getMessageId());
152 | } catch (Exception e) {
153 | e.printStackTrace();
154 | System.out.printf(
155 | "Table schema for Table '%s' of database '%s' could not be published to SNS Topic. This will be tracked in DynamoDB table. \n",
156 | table.getName(), table.getDatabaseName());
157 | }
158 | return publishResponse;
159 | }
160 |
161 | /**
162 | * This method creates MessageAttributeValue using a String value
163 | *
164 | * @param attributeValue
165 | * @return
166 | */
167 | public MessageAttributeValue createStringAttribute(final String attributeValue) {
168 | final MessageAttributeValue messageAttributeValue = new MessageAttributeValue().withDataType("String")
169 | .withStringValue(attributeValue);
170 | return messageAttributeValue;
171 | }
172 |
173 | }
174 |
--------------------------------------------------------------------------------
/src/main/java/com/amazonaws/gdcreplication/util/SQSUtil.java:
--------------------------------------------------------------------------------
1 | // Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 | // SPDX-License-Identifier: MIT-0
3 |
4 | package com.amazonaws.gdcreplication.util;
5 |
6 | import java.util.HashMap;
7 | import java.util.Map;
8 |
9 | import com.amazonaws.services.sqs.AmazonSQS;
10 | import com.amazonaws.services.sqs.model.MessageAttributeValue;
11 | import com.amazonaws.services.sqs.model.SendMessageRequest;
12 | import com.amazonaws.services.sqs.model.SendMessageResult;
13 | import com.google.gson.Gson;
14 |
15 | public class SQSUtil {
16 |
17 | public boolean sendTableSchemaToSQSQueue(AmazonSQS sqs, String queueUrl, LargeTable largeTable,
18 | String exportBatchId, String sourceGlueCatalogId) {
19 |
20 | Gson gson = new Gson();
21 | String tableInfo = gson.toJson(largeTable);
22 | System.out.println(tableInfo);
23 |
24 | int statusCode = 400;
25 | boolean messageSentToSQS = false;
26 | Map messageAttributes = new HashMap<>();
27 | messageAttributes.put("ExportBatchId",
28 | new MessageAttributeValue().withDataType("String.ExportBatchId").withStringValue(exportBatchId));
29 | messageAttributes.put("SourceGlueDataCatalogId", new MessageAttributeValue()
30 | .withDataType("String.SourceGlueDataCatalogId").withStringValue(sourceGlueCatalogId));
31 | messageAttributes.put("SchemaType",
32 | new MessageAttributeValue().withDataType("String.SchemaType").withStringValue("largeTable"));
33 |
34 | SendMessageRequest req = new SendMessageRequest().withQueueUrl(queueUrl)
35 | .withMessageBody(tableInfo).withMessageAttributes(messageAttributes);
36 |
37 | try {
38 | SendMessageResult sendMsgRes = sqs.sendMessage(req);
39 | statusCode = sendMsgRes.getSdkHttpMetadata().getHttpStatusCode();
40 | } catch (Exception e) {
41 | e.printStackTrace();
42 | System.out.println("Exception thrown while writing message to SQS. " + e.getLocalizedMessage());
43 | }
44 | if (statusCode == 200) {
45 | messageSentToSQS = true;
46 | System.out.printf("Table details for table '%s' of database '%s' sent to SQS. \n",
47 | largeTable.getTable().getName(), largeTable.getTable().getDatabaseName());
48 | }
49 | return messageSentToSQS;
50 |
51 | }
52 |
53 | public void sendLargeTableSchemaToSQS(AmazonSQS sqs, String queueUrl,
54 | String exportBatchId, String sourceGlueCatalogId, String message, LargeTable largeTable) {
55 |
56 | int statusCode = 400;
57 | Map messageAttributes = new HashMap<>();
58 | messageAttributes.put("ExportBatchId",
59 | new MessageAttributeValue().withDataType("String.ExportBatchId").withStringValue(exportBatchId));
60 | messageAttributes.put("SourceGlueDataCatalogId", new MessageAttributeValue()
61 | .withDataType("String.SourceGlueDataCatalogId").withStringValue(sourceGlueCatalogId));
62 | messageAttributes.put("SchemaType",
63 | new MessageAttributeValue().withDataType("String.SchemaType").withStringValue("largeTable"));
64 |
65 | SendMessageRequest req = new SendMessageRequest().withQueueUrl(queueUrl)
66 | .withMessageBody(message).withMessageAttributes(messageAttributes);
67 |
68 | try {
69 | SendMessageResult sendMsgRes = sqs.sendMessage(req);
70 | statusCode = sendMsgRes.getSdkHttpMetadata().getHttpStatusCode();
71 | } catch (Exception e) {
72 | e.printStackTrace();
73 | System.out.println("Exception thrown while writing message to SQS. " + e.getLocalizedMessage());
74 | }
75 | if (statusCode == 200)
76 | System.out.printf("Large Table schema for table '%s' of database '%s' sent to SQS. \n",
77 | largeTable.getTable().getName(), largeTable.getTable().getDatabaseName());
78 |
79 | }
80 |
81 | public void sendTableSchemaToDeadLetterQueue(AmazonSQS sqs, String queueUrl, TableReplicationStatus tableStatus,
82 | String exportBatchId, String sourceGlueCatalogId) {
83 |
84 | int statusCode = 400;
85 | Map messageAttributes = new HashMap<>();
86 | messageAttributes.put("ExportBatchId",
87 | new MessageAttributeValue().withDataType("String.ExportBatchId").withStringValue(exportBatchId));
88 | messageAttributes.put("SourceGlueDataCatalogId", new MessageAttributeValue()
89 | .withDataType("String.SourceGlueDataCatalogId").withStringValue(sourceGlueCatalogId));
90 | messageAttributes.put("SchemaType",
91 | new MessageAttributeValue().withDataType("String.SchemaType").withStringValue("Table"));
92 |
93 | SendMessageRequest req = new SendMessageRequest().withQueueUrl(queueUrl)
94 | .withMessageBody(tableStatus.getTableSchema()).withMessageAttributes(messageAttributes);
95 |
96 | try {
97 | SendMessageResult sendMsgRes = sqs.sendMessage(req);
98 | statusCode = sendMsgRes.getSdkHttpMetadata().getHttpStatusCode();
99 | } catch (Exception e) {
100 | e.printStackTrace();
101 | System.out.println("Exception thrown while writing message to SQS. " + e.getLocalizedMessage());
102 | }
103 | if (statusCode == 200)
104 | System.out.printf("Table schema for table '%s' of database '%s' sent to SQS. \n",
105 | tableStatus.getTableName(), tableStatus.getDbName());
106 |
107 | }
108 |
109 | public void sendDatabaseSchemaToDeadLetterQueue(AmazonSQS sqs, String queueUrl, String databaseDDL,
110 | String databaseName, String exportBatchId, String sourceGlueCatalogId) {
111 |
112 | int statusCode = 400;
113 | Map messageAttributes = new HashMap<>();
114 | messageAttributes.put("ExportBatchId",
115 | new MessageAttributeValue().withDataType("String.ExportBatchId").withStringValue(exportBatchId));
116 | messageAttributes.put("SourceGlueDataCatalogId", new MessageAttributeValue()
117 | .withDataType("String.SourceGlueDataCatalogId").withStringValue(sourceGlueCatalogId));
118 | messageAttributes.put("SchemaType",
119 | new MessageAttributeValue().withDataType("String.SchemaType").withStringValue("Database"));
120 |
121 | SendMessageRequest req = new SendMessageRequest().withQueueUrl(queueUrl).withMessageBody(databaseDDL)
122 | .withMessageAttributes(messageAttributes);
123 |
124 | try {
125 | SendMessageResult sendMsgRes = sqs.sendMessage(req);
126 | statusCode = sendMsgRes.getSdkHttpMetadata().getHttpStatusCode();
127 | } catch (Exception e) {
128 | e.printStackTrace();
129 | System.out.println("Exception thrown while writing message to SQS. " + e.getLocalizedMessage());
130 | }
131 | if (statusCode == 200)
132 | System.out.printf("Database schema for database '%s' sent to SQS. \n", databaseName);
133 |
134 | }
135 |
136 | }
137 |
--------------------------------------------------------------------------------
/src/main/java/com/amazonaws/gdcreplication/util/TableReplicationStatus.java:
--------------------------------------------------------------------------------
1 | // Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 | // SPDX-License-Identifier: MIT-0
3 |
4 | package com.amazonaws.gdcreplication.util;
5 |
6 | /**
7 | *
8 | * This is a POJO class for Glue Database Table Replication Status
9 | * @author Ravi Itha, Amazon Web Services, Inc.
10 | *
11 | */
12 |
13 | public class TableReplicationStatus {
14 |
15 | private String dbName;
16 | private String tableName;
17 | private String replicationDay;
18 | private String tableSchema;
19 | private long replicationTime;
20 | private boolean created;
21 | private boolean updated;
22 | private boolean replicated;
23 | private boolean exportHasPartitions;
24 | private boolean partitionsReplicated;
25 | private boolean error;
26 | private boolean dbNotFoundError;
27 |
28 | public boolean isDbNotFoundError() {
29 | return dbNotFoundError;
30 | }
31 | public void setDbNotFoundError(boolean dbNotFoundError) {
32 | this.dbNotFoundError = dbNotFoundError;
33 | }
34 | public boolean isError() {
35 | return error;
36 | }
37 | public void setError(boolean error) {
38 | this.error = error;
39 | }
40 | public String getTableSchema() {
41 | return tableSchema;
42 | }
43 | public void setTableSchema(String tableSchema) {
44 | this.tableSchema = tableSchema;
45 | }
46 | public String getDbName() {
47 | return dbName;
48 | }
49 | public void setDbName(String dbName) {
50 | this.dbName = dbName;
51 | }
52 | public String getTableName() {
53 | return tableName;
54 | }
55 | public void setTableName(String tableName) {
56 | this.tableName = tableName;
57 | }
58 | public boolean isReplicated() {
59 | return replicated;
60 | }
61 | public void setReplicated(boolean replicated) {
62 | this.replicated = replicated;
63 | }
64 | public String getReplicationDay() {
65 | return replicationDay;
66 | }
67 | public void setReplicationDay(String replicationDay) {
68 | this.replicationDay = replicationDay;
69 | }
70 | public long getReplicationTime() {
71 | return replicationTime;
72 | }
73 | public void setReplicationTime(long replicationTime) {
74 | this.replicationTime = replicationTime;
75 | }
76 | public boolean isCreated() {
77 | return created;
78 | }
79 | public void setCreated(boolean created) {
80 | this.created = created;
81 | }
82 | public boolean isUpdated() {
83 | return updated;
84 | }
85 | public void setUpdated(boolean updated) {
86 | this.updated = updated;
87 | }
88 | public boolean isExportHasPartitions() {
89 | return exportHasPartitions;
90 | }
91 | public void setExportHasPartitions(boolean exportHasPartitions) {
92 | this.exportHasPartitions = exportHasPartitions;
93 | }
94 | public boolean isPartitionsReplicated() {
95 | return partitionsReplicated;
96 | }
97 | public void setPartitionsReplicated(boolean partitionsReplicated) {
98 | this.partitionsReplicated = partitionsReplicated;
99 | }
100 |
101 | }
102 |
--------------------------------------------------------------------------------
/src/main/java/com/amazonaws/gdcreplication/util/TableWithPartitions.java:
--------------------------------------------------------------------------------
1 | // Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 | // SPDX-License-Identifier: MIT-0
3 | package com.amazonaws.gdcreplication.util;
4 |
5 | import java.util.List;
6 |
7 | import com.amazonaws.services.glue.model.Partition;
8 | import com.amazonaws.services.glue.model.Table;
9 |
10 | public class TableWithPartitions {
11 |
12 | private Table table;
13 | private List partitionList;
14 |
15 | public Table getTable() {
16 | return table;
17 | }
18 | public void setTable(Table table) {
19 | this.table = table;
20 | }
21 | public List getPartitionList() {
22 | return partitionList;
23 | }
24 | public void setPartitionList(List partitionList) {
25 | this.partitionList = partitionList;
26 | }
27 |
28 |
29 |
30 | }
31 |
--------------------------------------------------------------------------------
/src/test/resources/Glue_Replication.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-glue-data-catalog-replication-utility/be40a89f45b0b0285c3cfcf806e638b701c69e03/src/test/resources/Glue_Replication.png
--------------------------------------------------------------------------------
/src/test/resources/Glue_Table_Anatomy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-glue-data-catalog-replication-utility/be40a89f45b0b0285c3cfcf806e638b701c69e03/src/test/resources/Glue_Table_Anatomy.png
--------------------------------------------------------------------------------
/src/test/resources/SNS_Cross_Account_Permissions.txt:
--------------------------------------------------------------------------------
1 | ## Sample commands necessary setup cross-account permissions for AWS Lambda and Amazon SNS Topic
2 |
3 | Command 1: Run this command in Source AWS Account. Grant permission to Target AWS Account to subscribe to the topic:
4 |
5 | aws sns add-permission --label lambda-access --aws-account-id TargetAccount \
6 | --topic-arn arn:aws:sns:us-east-1:SourceAccount:SchemaDistributionSNSTopic \
7 | --action-name Subscribe ListSubscriptionsByTopic Receive
8 |
9 | Command 2: Run this command in Target AWS Account. Add Lambda permissions to allow invocation from Amazon SNS Topic in Source Account.
10 |
11 | aws lambda add-permission --function-name ImportLambda \
12 | --source-arn arn:aws:sns:us-east-1:SourceAccount:SchemaDistributionSNSTopic \
13 | --statement-id sns-x-account --action "lambda:InvokeFunction" \
14 | --principal sns.amazonaws.com
15 |
16 | Command 3: Subscribe Lambda function to the Amazon SNS Topic in Source Account.
17 |
18 | aws sns subscribe --protocol lambda \
19 | --topic-arn arn:aws:sns:us-east-1:SourceAccount:SchemaDistributionSNSTopic \
20 | --notification-endpoint arn:aws:lambda:us-east-1:TargetAccount:function:ImportLambda
21 |
22 | For more detailed explanation, refer AWS documentation at: https://docs.aws.amazon.com/lambda/latest/dg/with-sns-example.html#with-sns-create-x-account-permissions
23 |
--------------------------------------------------------------------------------
/src/test/resources/sample_database_schema.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "database_in_account_a",
3 | "createTime": "Sep 10, 2019 10:16:55 PM"
4 | }
5 |
--------------------------------------------------------------------------------
/src/test/resources/sample_ddb_policy_source_and_target_accounts.json:
--------------------------------------------------------------------------------
1 | {
2 | "Version": "2012-10-17",
3 | "Statement": [
4 | {
5 | "Sid": "VisualEditor0",
6 | "Effect": "Allow",
7 | "Action": [
8 | "dynamodb:BatchWriteItem",
9 | "dynamodb:PutItem"
10 | ],
11 | "Resource": "*"
12 | }
13 | ]
14 | }
--------------------------------------------------------------------------------
/src/test/resources/sample_glue_policy_source_account.json:
--------------------------------------------------------------------------------
1 | {
2 | "Version": "2012-10-17",
3 | "Statement": [
4 | {
5 | "Sid": "VisualEditor0",
6 | "Effect": "Allow",
7 | "Action": [
8 | "glue:GetDatabase",
9 | "glue:GetPartition",
10 | "glue:GetTableVersion",
11 | "glue:GetTables",
12 | "glue:GetTableVersions",
13 | "glue:GetPartitions",
14 | "glue:BatchDeleteTableVersion",
15 | "glue:BatchGetPartition",
16 | "glue:GetDatabases",
17 | "glue:GetTable"
18 | ],
19 | "Resource": "*"
20 | }
21 | ]
22 | }
--------------------------------------------------------------------------------
/src/test/resources/sample_glue_policy_target_account.json:
--------------------------------------------------------------------------------
1 | {
2 | "Version": "2012-10-17",
3 | "Statement": [
4 | {
5 | "Sid": "VisualEditor0",
6 | "Effect": "Allow",
7 | "Action": [
8 | "glue:SearchTables",
9 | "glue:BatchCreatePartition",
10 | "glue:GetDataCatalogEncryptionSettings",
11 | "glue:GetTableVersions",
12 | "glue:GetPartitions",
13 | "glue:BatchDeletePartition",
14 | "glue:DeleteTableVersion",
15 | "glue:UpdateTable",
16 | "glue:GetSecurityConfiguration",
17 | "glue:GetResourcePolicy",
18 | "glue:GetTableVersion",
19 | "glue:CreatePartition",
20 | "glue:UpdatePartition",
21 | "glue:UpdateDatabase",
22 | "glue:CreateTable",
23 | "glue:GetTables",
24 | "glue:BatchGetPartition",
25 | "glue:GetSecurityConfigurations",
26 | "glue:GetDatabases",
27 | "glue:GetTable",
28 | "glue:GetDatabase",
29 | "glue:GetPartition",
30 | "glue:CreateDatabase",
31 | "glue:BatchDeleteTableVersion",
32 | "glue:DeletePartition"
33 | ],
34 | "Resource": "*"
35 | }
36 | ]
37 | }
--------------------------------------------------------------------------------
/src/test/resources/sample_large-table_message_payload.json:
--------------------------------------------------------------------------------
1 | {
2 | "largeTable": true,
3 | "numberOfPartitions": 31,
4 | "s3ObjectKey": "2019-10-18_1571431520830_01234567890_database_in_account_a_data",
5 | "s3BucketName": "bucket_name",
6 | "table": {
7 | "name": "data",
8 | "databaseName": "database_in_account_a",
9 | "owner": "owner",
10 | "createTime": "Sep 25, 2019 6:31:08 PM",
11 | "updateTime": "Sep 25, 2019 6:31:08 PM",
12 | "lastAccessTime": "Sep 25, 2019 6:31:08 PM",
13 | "retention": 0,
14 | "storageDescriptor": {
15 | "columns": [
16 | {
17 | "name": "id",
18 | "type": "string"
19 | },
20 | {
21 | "name": "type",
22 | "type": "string"
23 | },
24 | {
25 | "name": "actor",
26 | "type": "struct\u003cid:int,login:string,display_login:string,gravatar_id:string,url:string,avatar_url:string\u003e"
27 | },
28 | {
29 | "name": "repo",
30 | "type": "struct\u003cid:int,name:string,url:string\u003e"
31 | },
32 | {
33 | "name": "payload",
34 | "type": "struct\u003cpush_id:int,size:int,distinct_size:int,ref:string,head:string,before:string,commits:array\u003cstruct\u003csha:string,author:struct\u003cname:string,email:string\u003e,message:string,distinct:boolean,url:string\u003e\u003e,ref_type:string,master_branch:string,description:string,pusher_type:string,action:string,number:int,pull_request:struct\u003curl:string,id:int,html_url:string,diff_url:string,patch_url:string,issue_url:string,number:int,state:string,locked:boolean,title:string,user:struct\u003clogin:string,id:int,avatar_url:string,gravatar_id:string,url:string,html_url:string,followers_url:string,following_url:string,gists_url:string,starred_url:string,subscriptions_url:string,organizations_url:string,repos_url:string,events_url:string,received_events_url:string,type:string,site_admin:boolean\u003e,body:string,created_at:string,updated_at:string,closed_at:string,merged_at:string,merge_commit_sha:string,assignee:struct\u003clogin:string,id:int,avatar_url:string,gravatar_id:string,url:string,html_url:string,followers_url:string,following_url:string,gists_url:string,starred_url:string,subscriptions_url:string,organizations_url:string,repos_url:string,events_url:string,received_events_url:string,type:string,site_admin:boolean\u003e,assignees:array\u003cstruct\u003clogin:string,id:int,avatar_url:string,gravatar_id:string,url:string,html_url:string,followers_url:string,following_url:string,gists_url:string,starred_url:string,subscriptions_url:string,organizations_url:string,repos_url:string,events_url:string,received_events_url:string,type:string,site_admin:boolean\u003e\u003e,milestone:struct\u003curl:string,html_url:string,labels_url:string,id:int,number:int,title:string,description:string,creator:struct\u003clogin:string,id:int,avatar_url:string,gravatar_id:string,url:string,html_url:string,followers_url:string,following_url:string,gists_url:string,starred_url:string,subscriptions_url:string,organizations_url:string,repos_url:string,events_url:string,received_events_url:string,type:string,site_admin:boolean\u003e,open_issues:int,closed_issues:int,state:string,created_at:string,updated_at:string,due_on:string,closed_at:string\u003e,commits_url:string,review_comments_url:string,review_comment_url:string,comments_url:string,statuses_url:string,head:struct\u003clabel:string,ref:string,sha:string,user:struct\u003clogin:string,id:int,avatar_url:string,gravatar_id:string,url:string,html_url:string,followers_url:string,following_url:string,gists_url:string,starred_url:string,subscriptions_url:string,organizations_url:string,repos_url:string,events_url:string,received_events_url:string,type:string,site_admin:boolean\u003e,repo:struct\u003cid:int,name:string,full_name:string,owner:struct\u003clogin:string,id:int,avatar_url:string,gravatar_id:string,url:string,html_url:string,followers_url:string,following_url:string,gists_url:string,starred_url:string,subscriptions_url:string,organizations_url:string,repos_url:string,events_url:string,received_events_url:string,type:string,site_admin:boolean\u003e,private:boolean,html_url:string,description:string,fork:boolean,url:string,forks_url:string,keys_url:string,collaborators_url:string,teams_url:string,hooks_url:string,issue_events_url:string,events_url:string,assignees_url:string,branches_url:string,tags_url:string,blobs_url:string,git_tags_url:string,git_refs_url:string,trees_url:string,statuses_url:string,languages_url:string,stargazers_url:string,contributors_url:string,subscribers_url:string,subscription_url:string,commits_url:string,git_commits_url:string,comments_url:string,issue_comment_url:string,contents_url:string,compare_url:string,merges_url:string,archive_url:string,downloads_url:string,issues_url:string,pulls_url:string,milestones_url:string,notifications_url:string,labels_url:string,releases_url:string,deployments_url:string,created_at:string,updated_at:string,pushed_at:string,git_url:string,ssh_url:string,clone_url:string,svn_url:string,homepage:string,size:int,stargazers_count:int,watchers_count:int,language:string,has_issues:boolean,has_downloads:boolean,has_wiki:boolean,has_pages:boolean,forks_count:int,mirror_url:string,open_issues_count:int,forks:int,open_issues:int,watchers:int,default_branch:string\u003e\u003e,base:struct\u003clabel:string,ref:string,sha:string,user:struct\u003clogin:string,id:int,avatar_url:string,gravatar_id:string,url:string,html_url:string,followers_url:string,following_url:string,gists_url:string,starred_url:string,subscriptions_url:string,organizations_url:string,repos_url:string,events_url:string,received_events_url:string,type:string,site_admin:boolean\u003e,repo:struct\u003cid:int,name:string,full_name:string,owner:struct\u003clogin:string,id:int,avatar_url:string,gravatar_id:string,url:string,html_url:string,followers_url:string,following_url:string,gists_url:string,starred_url:string,subscriptions_url:string,organizations_url:string,repos_url:string,events_url:string,received_events_url:string,type:string,site_admin:boolean\u003e,private:boolean,html_url:string,description:string,fork:boolean,url:string,forks_url:string,keys_url:string,collaborators_url:string,teams_url:string,hooks_url:string,issue_events_url:string,events_url:string,assignees_url:string,branches_url:string,tags_url:string,blobs_url:string,git_tags_url:string,git_refs_url:string,trees_url:string,statuses_url:string,languages_url:string,stargazers_url:string,contributors_url:string,subscribers_url:string,subscription_url:string,commits_url:string,git_commits_url:string,comments_url:string,issue_comment_url:string,contents_url:string,compare_url:string,merges_url:string,archive_url:string,downloads_url:string,issues_url:string,pulls_url:string,milestones_url:string,notifications_url:string,labels_url:string,releases_url:string,deployments_url:string,created_at:string,updated_at:string,pushed_at:string,git_url:string,ssh_url:string,clone_url:string,svn_url:string,homepage:string,size:int,stargazers_count:int,watchers_count:int,language:string,has_issues:boolean,has_downloads:boolean,has_wiki:boolean,has_pages:boolean,forks_count:int,mirror_url:string,open_issues_count:int,forks:int,open_issues:int,watchers:int,default_branch:string\u003e\u003e,_links:struct\u003cself:struct\u003chref:string\u003e,html:struct\u003chref:string\u003e,issue:struct\u003chref:string\u003e,comments:struct\u003chref:string\u003e,review_comments:struct\u003chref:string\u003e,review_comment:struct\u003chref:string\u003e,commits:struct\u003chref:string\u003e,statuses:struct\u003chref:string\u003e\u003e,merged:boolean,mergeable:boolean,mergeable_state:string,merged_by:struct\u003clogin:string,id:int,avatar_url:string,gravatar_id:string,url:string,html_url:string,followers_url:string,following_url:string,gists_url:string,starred_url:string,subscriptions_url:string,organizations_url:string,repos_url:string,events_url:string,received_events_url:string,type:string,site_admin:boolean\u003e,comments:int,review_comments:int,maintainer_can_modify:boolean,commits:int,additions:int,deletions:int,changed_files:int\u003e,issue:struct\u003curl:string,repository_url:string,labels_url:string,comments_url:string,events_url:string,html_url:string,id:int,number:int,title:string,user:struct\u003clogin:string,id:int,avatar_url:string,gravatar_id:string,url:string,html_url:string,followers_url:string,following_url:string,gists_url:string,starred_url:string,subscriptions_url:string,organizations_url:string,repos_url:string,events_url:string,received_events_url:string,type:string,site_admin:boolean\u003e,labels:array\u003cstruct\u003cid:int,url:string,name:string,color:string,default:boolean\u003e\u003e,state:string,locked:boolean,assignee:struct\u003clogin:string,id:int,avatar_url:string,gravatar_id:string,url:string,html_url:string,followers_url:string,following_url:string,gists_url:string,starred_url:string,subscriptions_url:string,organizations_url:string,repos_url:string,events_url:string,received_events_url:string,type:string,site_admin:boolean\u003e,assignees:array\u003cstruct\u003clogin:string,id:int,avatar_url:string,gravatar_id:string,url:string,html_url:string,followers_url:string,following_url:string,gists_url:string,starred_url:string,subscriptions_url:string,organizations_url:string,repos_url:string,events_url:string,received_events_url:string,type:string,site_admin:boolean\u003e\u003e,milestone:struct\u003curl:string,html_url:string,labels_url:string,id:int,number:int,title:string,description:string,creator:struct\u003clogin:string,id:int,avatar_url:string,gravatar_id:string,url:string,html_url:string,followers_url:string,following_url:string,gists_url:string,starred_url:string,subscriptions_url:string,organizations_url:string,repos_url:string,events_url:string,received_events_url:string,type:string,site_admin:boolean\u003e,open_issues:int,closed_issues:int,state:string,created_at:string,updated_at:string,due_on:string,closed_at:string\u003e,comments:int,created_at:string,updated_at:string,closed_at:string,body:string,pull_request:struct\u003curl:string,html_url:string,diff_url:string,patch_url:string\u003e\u003e,comment:struct\u003curl:string,html_url:string,issue_url:string,id:int,user:struct\u003clogin:string,id:int,avatar_url:string,gravatar_id:string,url:string,html_url:string,followers_url:string,following_url:string,gists_url:string,starred_url:string,subscriptions_url:string,organizations_url:string,repos_url:string,events_url:string,received_events_url:string,type:string,site_admin:boolean\u003e,created_at:string,updated_at:string,body:string,pull_request_review_id:int,diff_hunk:string,path:string,position:int,original_position:int,commit_id:string,original_commit_id:string,pull_request_url:string,_links:struct\u003cself:struct\u003chref:string\u003e,html:struct\u003chref:string\u003e,pull_request:struct\u003chref:string\u003e\u003e,line:int\u003e,forkee:struct\u003cid:int,name:string,full_name:string,owner:struct\u003clogin:string,id:int,avatar_url:string,gravatar_id:string,url:string,html_url:string,followers_url:string,following_url:string,gists_url:string,starred_url:string,subscriptions_url:string,organizations_url:string,repos_url:string,events_url:string,received_events_url:string,type:string,site_admin:boolean\u003e,private:boolean,html_url:string,description:string,fork:boolean,url:string,forks_url:string,keys_url:string,collaborators_url:string,teams_url:string,hooks_url:string,issue_events_url:string,events_url:string,assignees_url:string,branches_url:string,tags_url:string,blobs_url:string,git_tags_url:string,git_refs_url:string,trees_url:string,statuses_url:string,languages_url:string,stargazers_url:string,contributors_url:string,subscribers_url:string,subscription_url:string,commits_url:string,git_commits_url:string,comments_url:string,issue_comment_url:string,contents_url:string,compare_url:string,merges_url:string,archive_url:string,downloads_url:string,issues_url:string,pulls_url:string,milestones_url:string,notifications_url:string,labels_url:string,releases_url:string,deployments_url:string,created_at:string,updated_at:string,pushed_at:string,git_url:string,ssh_url:string,clone_url:string,svn_url:string,homepage:string,size:int,stargazers_count:int,watchers_count:int,language:string,has_issues:boolean,has_downloads:boolean,has_wiki:boolean,has_pages:boolean,forks_count:int,mirror_url:string,open_issues_count:int,forks:int,open_issues:int,watchers:int,default_branch:string,public:boolean\u003e,pages:array\u003cstruct\u003cpage_name:string,title:string,summary:string,action:string,sha:string,html_url:string\u003e\u003e,release:struct\u003curl:string,assets_url:string,upload_url:string,html_url:string,id:int,tag_name:string,target_commitish:string,name:string,draft:boolean,author:struct\u003clogin:string,id:int,avatar_url:string,gravatar_id:string,url:string,html_url:string,followers_url:string,following_url:string,gists_url:string,starred_url:string,subscriptions_url:string,organizations_url:string,repos_url:string,events_url:string,received_events_url:string,type:string,site_admin:boolean\u003e,prerelease:boolean,created_at:string,published_at:string,assets:array\u003cstruct\u003curl:string,id:int,name:string,label:string,uploader:struct\u003clogin:string,id:int,avatar_url:string,gravatar_id:string,url:string,html_url:string,followers_url:string,following_url:string,gists_url:string,starred_url:string,subscriptions_url:string,organizations_url:string,repos_url:string,events_url:string,received_events_url:string,type:string,site_admin:boolean\u003e,content_type:string,state:string,size:int,download_count:int,created_at:string,updated_at:string,browser_download_url:string\u003e\u003e,tarball_url:string,zipball_url:string,body:string\u003e,member:struct\u003clogin:string,id:int,avatar_url:string,gravatar_id:string,url:string,html_url:string,followers_url:string,following_url:string,gists_url:string,starred_url:string,subscriptions_url:string,organizations_url:string,repos_url:string,events_url:string,received_events_url:string,type:string,site_admin:boolean\u003e\u003e"
35 | },
36 | {
37 | "name": "public",
38 | "type": "boolean"
39 | },
40 | {
41 | "name": "created_at",
42 | "type": "string"
43 | },
44 | {
45 | "name": "org",
46 | "type": "struct\u003cid:int,login:string,gravatar_id:string,url:string,avatar_url:string\u003e"
47 | }
48 | ],
49 | "location": "s3://aws-glue-datasets-us-east-1/examples/githubarchive/month/data/",
50 | "inputFormat": "org.apache.hadoop.mapred.TextInputFormat",
51 | "outputFormat": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
52 | "compressed": true,
53 | "numberOfBuckets": -1,
54 | "serdeInfo": {
55 | "serializationLibrary": "org.openx.data.jsonserde.JsonSerDe",
56 | "parameters": {
57 | "paths": "actor,created_at,id,org,payload,public,repo,type"
58 | }
59 | },
60 | "bucketColumns": [
61 | ],
62 | "sortColumns": [
63 | ],
64 | "parameters": {
65 | "sizeKey": "11026096613",
66 | "objectCount": "744",
67 | "UPDATED_BY_CRAWLER": "Github",
68 | "CrawlerSchemaSerializerVersion": "1.0",
69 | "recordCount": "4795154",
70 | "averageRecordSize": "2471",
71 | "CrawlerSchemaDeserializerVersion": "1.0",
72 | "compressionType": "gzip",
73 | "classification": "json",
74 | "typeOfData": "file"
75 | },
76 | "storedAsSubDirectories": false
77 | },
78 | "partitionKeys": [
79 | {
80 | "name": "partition_0",
81 | "type": "string"
82 | },
83 | {
84 | "name": "partition_1",
85 | "type": "string"
86 | },
87 | {
88 | "name": "partition_2",
89 | "type": "string"
90 | }
91 | ],
92 | "tableType": "EXTERNAL_TABLE",
93 | "parameters": {
94 | "sizeKey": "11026096613",
95 | "objectCount": "744",
96 | "UPDATED_BY_CRAWLER": "Github",
97 | "CrawlerSchemaSerializerVersion": "1.0",
98 | "recordCount": "4795154",
99 | "averageRecordSize": "2471",
100 | "CrawlerSchemaDeserializerVersion": "1.0",
101 | "compressionType": "gzip",
102 | "classification": "json",
103 | "typeOfData": "file"
104 | },
105 | "createdBy": "arn:aws:sts::01234567890:assumed-role/AWSGlueServiceRole-abc/AWS-Crawler"
106 | }
107 | }
--------------------------------------------------------------------------------
/src/test/resources/sample_sns_policy_source_account.json:
--------------------------------------------------------------------------------
1 | {
2 | "Version": "2012-10-17",
3 | "Statement": [
4 | {
5 | "Sid": "VisualEditor0",
6 | "Effect": "Allow",
7 | "Action": "sns:Publish",
8 | "Resource": "*"
9 | }
10 | ]
11 | }
--------------------------------------------------------------------------------
/src/test/resources/sample_sqs_policy_source_and_target_accounts.json:
--------------------------------------------------------------------------------
1 | {
2 | "Version": "2012-10-17",
3 | "Statement": [
4 | {
5 | "Sid": "VisualEditor0",
6 | "Effect": "Allow",
7 | "Action": [
8 | "sqs:DeleteMessage",
9 | "sqs:ListQueues",
10 | "sqs:GetQueueUrl",
11 | "sqs:ListDeadLetterSourceQueues",
12 | "sqs:ChangeMessageVisibility",
13 | "sqs:DeleteMessageBatch",
14 | "sqs:SendMessageBatch",
15 | "sqs:ReceiveMessage",
16 | "sqs:SendMessage",
17 | "sqs:GetQueueAttributes",
18 | "sqs:ListQueueTags"
19 | ],
20 | "Resource": "*"
21 | }
22 | ]
23 | }
--------------------------------------------------------------------------------
/src/test/resources/sample_table_schema.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "data",
3 | "databaseName": "database_in_account_a",
4 | "owner": "owner",
5 | "createTime": "Sep 25, 2019 1:31:08 PM",
6 | "updateTime": "Sep 25, 2019 1:31:08 PM",
7 | "lastAccessTime": "Sep 25, 2019 1:31:08 PM",
8 | "retention": 0,
9 | "storageDescriptor": {
10 | "columns": [
11 | {
12 | "name": "id",
13 | "type": "string"
14 | },
15 | {
16 | "name": "type",
17 | "type": "string"
18 | },
19 | {
20 | "name": "actor",
21 | "type": "struct\u003cid:int,login:string,display_login:string,gravatar_id:string,url:string,avatar_url:string\u003e"
22 | },
23 | {
24 | "name": "repo",
25 | "type": "struct\u003cid:int,name:string,url:string\u003e"
26 | },
27 | {
28 | "name": "payload",
29 | "type": "struct\u003cpush_id:int,size:int,distinct_size:int,ref:string,head:string,before:string,commits:array\u003cstruct\u003csha:string,author:struct\u003cname:string,email:string\u003e,message:string,distinct:boolean,url:string\u003e\u003e,ref_type:string,master_branch:string,description:string,pusher_type:string,action:string,number:int,pull_request:struct\u003curl:string,id:int,html_url:string,diff_url:string,patch_url:string,issue_url:string,number:int,state:string,locked:boolean,title:string,user:struct\u003clogin:string,id:int,avatar_url:string,gravatar_id:string,url:string,html_url:string,followers_url:string,following_url:string,gists_url:string,starred_url:string,subscriptions_url:string,organizations_url:string,repos_url:string,events_url:string,received_events_url:string,type:string,site_admin:boolean\u003e,body:string,created_at:string,updated_at:string,closed_at:string,merged_at:string,merge_commit_sha:string,assignee:struct\u003clogin:string,id:int,avatar_url:string,gravatar_id:string,url:string,html_url:string,followers_url:string,following_url:string,gists_url:string,starred_url:string,subscriptions_url:string,organizations_url:string,repos_url:string,events_url:string,received_events_url:string,type:string,site_admin:boolean\u003e,assignees:array\u003cstruct\u003clogin:string,id:int,avatar_url:string,gravatar_id:string,url:string,html_url:string,followers_url:string,following_url:string,gists_url:string,starred_url:string,subscriptions_url:string,organizations_url:string,repos_url:string,events_url:string,received_events_url:string,type:string,site_admin:boolean\u003e\u003e,milestone:struct\u003curl:string,html_url:string,labels_url:string,id:int,number:int,title:string,description:string,creator:struct\u003clogin:string,id:int,avatar_url:string,gravatar_id:string,url:string,html_url:string,followers_url:string,following_url:string,gists_url:string,starred_url:string,subscriptions_url:string,organizations_url:string,repos_url:string,events_url:string,received_events_url:string,type:string,site_admin:boolean\u003e,open_issues:int,closed_issues:int,state:string,created_at:string,updated_at:string,due_on:string,closed_at:string\u003e,commits_url:string,review_comments_url:string,review_comment_url:string,comments_url:string,statuses_url:string,head:struct\u003clabel:string,ref:string,sha:string,user:struct\u003clogin:string,id:int,avatar_url:string,gravatar_id:string,url:string,html_url:string,followers_url:string,following_url:string,gists_url:string,starred_url:string,subscriptions_url:string,organizations_url:string,repos_url:string,events_url:string,received_events_url:string,type:string,site_admin:boolean\u003e,repo:struct\u003cid:int,name:string,full_name:string,owner:struct\u003clogin:string,id:int,avatar_url:string,gravatar_id:string,url:string,html_url:string,followers_url:string,following_url:string,gists_url:string,starred_url:string,subscriptions_url:string,organizations_url:string,repos_url:string,events_url:string,received_events_url:string,type:string,site_admin:boolean\u003e,private:boolean,html_url:string,description:string,fork:boolean,url:string,forks_url:string,keys_url:string,collaborators_url:string,teams_url:string,hooks_url:string,issue_events_url:string,events_url:string,assignees_url:string,branches_url:string,tags_url:string,blobs_url:string,git_tags_url:string,git_refs_url:string,trees_url:string,statuses_url:string,languages_url:string,stargazers_url:string,contributors_url:string,subscribers_url:string,subscription_url:string,commits_url:string,git_commits_url:string,comments_url:string,issue_comment_url:string,contents_url:string,compare_url:string,merges_url:string,archive_url:string,downloads_url:string,issues_url:string,pulls_url:string,milestones_url:string,notifications_url:string,labels_url:string,releases_url:string,deployments_url:string,created_at:string,updated_at:string,pushed_at:string,git_url:string,ssh_url:string,clone_url:string,svn_url:string,homepage:string,size:int,stargazers_count:int,watchers_count:int,language:string,has_issues:boolean,has_downloads:boolean,has_wiki:boolean,has_pages:boolean,forks_count:int,mirror_url:string,open_issues_count:int,forks:int,open_issues:int,watchers:int,default_branch:string\u003e\u003e,base:struct\u003clabel:string,ref:string,sha:string,user:struct\u003clogin:string,id:int,avatar_url:string,gravatar_id:string,url:string,html_url:string,followers_url:string,following_url:string,gists_url:string,starred_url:string,subscriptions_url:string,organizations_url:string,repos_url:string,events_url:string,received_events_url:string,type:string,site_admin:boolean\u003e,repo:struct\u003cid:int,name:string,full_name:string,owner:struct\u003clogin:string,id:int,avatar_url:string,gravatar_id:string,url:string,html_url:string,followers_url:string,following_url:string,gists_url:string,starred_url:string,subscriptions_url:string,organizations_url:string,repos_url:string,events_url:string,received_events_url:string,type:string,site_admin:boolean\u003e,private:boolean,html_url:string,description:string,fork:boolean,url:string,forks_url:string,keys_url:string,collaborators_url:string,teams_url:string,hooks_url:string,issue_events_url:string,events_url:string,assignees_url:string,branches_url:string,tags_url:string,blobs_url:string,git_tags_url:string,git_refs_url:string,trees_url:string,statuses_url:string,languages_url:string,stargazers_url:string,contributors_url:string,subscribers_url:string,subscription_url:string,commits_url:string,git_commits_url:string,comments_url:string,issue_comment_url:string,contents_url:string,compare_url:string,merges_url:string,archive_url:string,downloads_url:string,issues_url:string,pulls_url:string,milestones_url:string,notifications_url:string,labels_url:string,releases_url:string,deployments_url:string,created_at:string,updated_at:string,pushed_at:string,git_url:string,ssh_url:string,clone_url:string,svn_url:string,homepage:string,size:int,stargazers_count:int,watchers_count:int,language:string,has_issues:boolean,has_downloads:boolean,has_wiki:boolean,has_pages:boolean,forks_count:int,mirror_url:string,open_issues_count:int,forks:int,open_issues:int,watchers:int,default_branch:string\u003e\u003e,_links:struct\u003cself:struct\u003chref:string\u003e,html:struct\u003chref:string\u003e,issue:struct\u003chref:string\u003e,comments:struct\u003chref:string\u003e,review_comments:struct\u003chref:string\u003e,review_comment:struct\u003chref:string\u003e,commits:struct\u003chref:string\u003e,statuses:struct\u003chref:string\u003e\u003e,merged:boolean,mergeable:boolean,mergeable_state:string,merged_by:struct\u003clogin:string,id:int,avatar_url:string,gravatar_id:string,url:string,html_url:string,followers_url:string,following_url:string,gists_url:string,starred_url:string,subscriptions_url:string,organizations_url:string,repos_url:string,events_url:string,received_events_url:string,type:string,site_admin:boolean\u003e,comments:int,review_comments:int,maintainer_can_modify:boolean,commits:int,additions:int,deletions:int,changed_files:int\u003e,issue:struct\u003curl:string,repository_url:string,labels_url:string,comments_url:string,events_url:string,html_url:string,id:int,number:int,title:string,user:struct\u003clogin:string,id:int,avatar_url:string,gravatar_id:string,url:string,html_url:string,followers_url:string,following_url:string,gists_url:string,starred_url:string,subscriptions_url:string,organizations_url:string,repos_url:string,events_url:string,received_events_url:string,type:string,site_admin:boolean\u003e,labels:array\u003cstruct\u003cid:int,url:string,name:string,color:string,default:boolean\u003e\u003e,state:string,locked:boolean,assignee:struct\u003clogin:string,id:int,avatar_url:string,gravatar_id:string,url:string,html_url:string,followers_url:string,following_url:string,gists_url:string,starred_url:string,subscriptions_url:string,organizations_url:string,repos_url:string,events_url:string,received_events_url:string,type:string,site_admin:boolean\u003e,assignees:array\u003cstruct\u003clogin:string,id:int,avatar_url:string,gravatar_id:string,url:string,html_url:string,followers_url:string,following_url:string,gists_url:string,starred_url:string,subscriptions_url:string,organizations_url:string,repos_url:string,events_url:string,received_events_url:string,type:string,site_admin:boolean\u003e\u003e,milestone:struct\u003curl:string,html_url:string,labels_url:string,id:int,number:int,title:string,description:string,creator:struct\u003clogin:string,id:int,avatar_url:string,gravatar_id:string,url:string,html_url:string,followers_url:string,following_url:string,gists_url:string,starred_url:string,subscriptions_url:string,organizations_url:string,repos_url:string,events_url:string,received_events_url:string,type:string,site_admin:boolean\u003e,open_issues:int,closed_issues:int,state:string,created_at:string,updated_at:string,due_on:string,closed_at:string\u003e,comments:int,created_at:string,updated_at:string,closed_at:string,body:string,pull_request:struct\u003curl:string,html_url:string,diff_url:string,patch_url:string\u003e\u003e,comment:struct\u003curl:string,html_url:string,issue_url:string,id:int,user:struct\u003clogin:string,id:int,avatar_url:string,gravatar_id:string,url:string,html_url:string,followers_url:string,following_url:string,gists_url:string,starred_url:string,subscriptions_url:string,organizations_url:string,repos_url:string,events_url:string,received_events_url:string,type:string,site_admin:boolean\u003e,created_at:string,updated_at:string,body:string,pull_request_review_id:int,diff_hunk:string,path:string,position:int,original_position:int,commit_id:string,original_commit_id:string,pull_request_url:string,_links:struct\u003cself:struct\u003chref:string\u003e,html:struct\u003chref:string\u003e,pull_request:struct\u003chref:string\u003e\u003e,line:int\u003e,forkee:struct\u003cid:int,name:string,full_name:string,owner:struct\u003clogin:string,id:int,avatar_url:string,gravatar_id:string,url:string,html_url:string,followers_url:string,following_url:string,gists_url:string,starred_url:string,subscriptions_url:string,organizations_url:string,repos_url:string,events_url:string,received_events_url:string,type:string,site_admin:boolean\u003e,private:boolean,html_url:string,description:string,fork:boolean,url:string,forks_url:string,keys_url:string,collaborators_url:string,teams_url:string,hooks_url:string,issue_events_url:string,events_url:string,assignees_url:string,branches_url:string,tags_url:string,blobs_url:string,git_tags_url:string,git_refs_url:string,trees_url:string,statuses_url:string,languages_url:string,stargazers_url:string,contributors_url:string,subscribers_url:string,subscription_url:string,commits_url:string,git_commits_url:string,comments_url:string,issue_comment_url:string,contents_url:string,compare_url:string,merges_url:string,archive_url:string,downloads_url:string,issues_url:string,pulls_url:string,milestones_url:string,notifications_url:string,labels_url:string,releases_url:string,deployments_url:string,created_at:string,updated_at:string,pushed_at:string,git_url:string,ssh_url:string,clone_url:string,svn_url:string,homepage:string,size:int,stargazers_count:int,watchers_count:int,language:string,has_issues:boolean,has_downloads:boolean,has_wiki:boolean,has_pages:boolean,forks_count:int,mirror_url:string,open_issues_count:int,forks:int,open_issues:int,watchers:int,default_branch:string,public:boolean\u003e,pages:array\u003cstruct\u003cpage_name:string,title:string,summary:string,action:string,sha:string,html_url:string\u003e\u003e,release:struct\u003curl:string,assets_url:string,upload_url:string,html_url:string,id:int,tag_name:string,target_commitish:string,name:string,draft:boolean,author:struct\u003clogin:string,id:int,avatar_url:string,gravatar_id:string,url:string,html_url:string,followers_url:string,following_url:string,gists_url:string,starred_url:string,subscriptions_url:string,organizations_url:string,repos_url:string,events_url:string,received_events_url:string,type:string,site_admin:boolean\u003e,prerelease:boolean,created_at:string,published_at:string,assets:array\u003cstruct\u003curl:string,id:int,name:string,label:string,uploader:struct\u003clogin:string,id:int,avatar_url:string,gravatar_id:string,url:string,html_url:string,followers_url:string,following_url:string,gists_url:string,starred_url:string,subscriptions_url:string,organizations_url:string,repos_url:string,events_url:string,received_events_url:string,type:string,site_admin:boolean\u003e,content_type:string,state:string,size:int,download_count:int,created_at:string,updated_at:string,browser_download_url:string\u003e\u003e,tarball_url:string,zipball_url:string,body:string\u003e,member:struct\u003clogin:string,id:int,avatar_url:string,gravatar_id:string,url:string,html_url:string,followers_url:string,following_url:string,gists_url:string,starred_url:string,subscriptions_url:string,organizations_url:string,repos_url:string,events_url:string,received_events_url:string,type:string,site_admin:boolean\u003e\u003e"
30 | },
31 | {
32 | "name": "public",
33 | "type": "boolean"
34 | },
35 | {
36 | "name": "created_at",
37 | "type": "string"
38 | },
39 | {
40 | "name": "org",
41 | "type": "struct\u003cid:int,login:string,gravatar_id:string,url:string,avatar_url:string\u003e"
42 | }
43 | ],
44 | "location": "s3://aws-glue-datasets-us-east-1/examples/githubarchive/month/data/",
45 | "inputFormat": "org.apache.hadoop.mapred.TextInputFormat",
46 | "outputFormat": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
47 | "compressed": true,
48 | "numberOfBuckets": -1,
49 | "serdeInfo": {
50 | "serializationLibrary": "org.openx.data.jsonserde.JsonSerDe",
51 | "parameters": {
52 | "paths": "actor,created_at,id,org,payload,public,repo,type"
53 | }
54 | },
55 | "bucketColumns": [
56 | ],
57 | "sortColumns": [
58 | ],
59 | "parameters": {
60 | "sizeKey": "11026096613",
61 | "objectCount": "744",
62 | "UPDATED_BY_CRAWLER": "Github",
63 | "CrawlerSchemaSerializerVersion": "1.0",
64 | "recordCount": "4795154",
65 | "averageRecordSize": "2471",
66 | "CrawlerSchemaDeserializerVersion": "1.0",
67 | "compressionType": "gzip",
68 | "classification": "json",
69 | "typeOfData": "file"
70 | },
71 | "storedAsSubDirectories": false
72 | },
73 | "partitionKeys": [
74 | {
75 | "name": "partition_0",
76 | "type": "string"
77 | },
78 | {
79 | "name": "partition_1",
80 | "type": "string"
81 | },
82 | {
83 | "name": "partition_2",
84 | "type": "string"
85 | }
86 | ],
87 | "tableType": "EXTERNAL_TABLE",
88 | "parameters": {
89 | "sizeKey": "11026096613",
90 | "objectCount": "744",
91 | "UPDATED_BY_CRAWLER": "Github",
92 | "CrawlerSchemaSerializerVersion": "1.0",
93 | "recordCount": "4795154",
94 | "averageRecordSize": "2471",
95 | "CrawlerSchemaDeserializerVersion": "1.0",
96 | "compressionType": "gzip",
97 | "classification": "json",
98 | "typeOfData": "file"
99 | },
100 | "createdBy": "arn:aws:sts::0123456789:assumed-role/AWSGlueServiceRole-abc/AWS-Crawler"
101 | }
102 |
--------------------------------------------------------------------------------