├── .gitignore
├── src
└── main
│ ├── resources
│ ├── AWS_Glue_Table_versions_cleanup_utility.png
│ ├── table_versions_cleanup_planner_lambda_sqs_policy.json
│ ├── table_versions_cleanup_cloudwatch_logs_policy.json
│ ├── table_versions_cleanup_planner_lambda_glue_policy.json
│ ├── table_versions_cleanup_lambda_dynamodb_policy.json
│ ├── table_versions_cleanup_lambda_glue_policy.json
│ ├── table_versions_cleanup_lambda_sqs_policy.json
│ └── AWS_Glue_Table_versions_cleanup_utility.drawio
│ └── java
│ └── software
│ └── aws
│ └── glue
│ └── tableversions
│ ├── utils
│ ├── GlueTable.java
│ ├── Test.java
│ ├── TableVersionStatus.java
│ ├── TestDDBUtil.java
│ ├── SQSUtil.java
│ ├── DDBUtil.java
│ └── GlueUtil.java
│ └── lambda
│ ├── TableVersionsCleanupLambda.java
│ └── TableVersionsCleanupPlannerLambda.java
├── CODE_OF_CONDUCT.md
├── .project
├── LICENSE
├── dependency-reduced-pom.xml
├── .classpath
├── CONTRIBUTING.md
├── pom.xml
└── README.md
/.gitignore:
--------------------------------------------------------------------------------
1 | target/
2 | .settings/
3 | build/
4 | target/classes/
--------------------------------------------------------------------------------
/src/main/resources/AWS_Glue_Table_versions_cleanup_utility.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-glue-table-versions-cleanup-utility/HEAD/src/main/resources/AWS_Glue_Table_versions_cleanup_utility.png
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.
5 |
--------------------------------------------------------------------------------
/src/main/resources/table_versions_cleanup_planner_lambda_sqs_policy.json:
--------------------------------------------------------------------------------
1 | {
2 | "Version": "2012-10-17",
3 | "Statement": [
4 | {
5 | "Sid": "VisualEditor0",
6 | "Effect": "Allow",
7 | "Action": [
8 | "sqs:SendMessageBatch",
9 | "sqs:SendMessage"
10 | ],
11 | "Resource": "*"
12 | }
13 | ]
14 | }
--------------------------------------------------------------------------------
/src/main/resources/table_versions_cleanup_cloudwatch_logs_policy.json:
--------------------------------------------------------------------------------
1 | {
2 | "Version": "2012-10-17",
3 | "Statement": [
4 | {
5 | "Sid": "VisualEditor0",
6 | "Effect": "Allow",
7 | "Action": [
8 | "logs:CreateLogStream",
9 | "logs:CreateLogGroup",
10 | "logs:PutLogEvents"
11 | ],
12 | "Resource": "*"
13 | }
14 | ]
15 | }
--------------------------------------------------------------------------------
/src/main/resources/table_versions_cleanup_planner_lambda_glue_policy.json:
--------------------------------------------------------------------------------
1 | {
2 | "Version": "2012-10-17",
3 | "Statement": [
4 | {
5 | "Sid": "VisualEditor0",
6 | "Effect": "Allow",
7 | "Action": [
8 | "glue:GetDatabase",
9 | "glue:GetTables",
10 | "glue:GetDatabases",
11 | "glue:GetTable"
12 | ],
13 | "Resource": "*"
14 | }
15 | ]
16 | }
--------------------------------------------------------------------------------
/src/main/resources/table_versions_cleanup_lambda_dynamodb_policy.json:
--------------------------------------------------------------------------------
1 | {
2 | "Version": "2012-10-17",
3 | "Statement": [
4 | {
5 | "Sid": "VisualEditor0",
6 | "Effect": "Allow",
7 | "Action": [
8 | "dynamodb:BatchWriteItem",
9 | "dynamodb:PutItem",
10 | "dynamodb:DeleteItem",
11 | "dynamodb:UpdateItem"
12 | ],
13 | "Resource": "*"
14 | }
15 | ]
16 | }
--------------------------------------------------------------------------------
/src/main/resources/table_versions_cleanup_lambda_glue_policy.json:
--------------------------------------------------------------------------------
1 | {
2 | "Version": "2012-10-17",
3 | "Statement": [
4 | {
5 | "Sid": "VisualEditor0",
6 | "Effect": "Allow",
7 | "Action": [
8 | "glue:GetTableVersion",
9 | "glue:GetTableVersions",
10 | "glue:DeleteTableVersion",
11 | "glue:BatchDeleteTableVersion"
12 | ],
13 | "Resource": "*"
14 | }
15 | ]
16 | }
--------------------------------------------------------------------------------
/src/main/resources/table_versions_cleanup_lambda_sqs_policy.json:
--------------------------------------------------------------------------------
1 | {
2 | "Version": "2012-10-17",
3 | "Statement": [
4 | {
5 | "Sid": "VisualEditor0",
6 | "Effect": "Allow",
7 | "Action": [
8 | "sqs:DeleteMessage",
9 | "sqs:GetQueueUrl",
10 | "sqs:ListDeadLetterSourceQueues",
11 | "sqs:ReceiveMessage",
12 | "sqs:GetQueueAttributes"
13 | ],
14 | "Resource": "*"
15 | }
16 | ]
17 | }
--------------------------------------------------------------------------------
/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | GlueTableVersionCleanup_2
4 |
5 |
6 |
7 |
8 |
9 | org.eclipse.jdt.core.javabuilder
10 |
11 |
12 |
13 |
14 | org.eclipse.m2e.core.maven2Builder
15 |
16 |
17 |
18 |
19 |
20 | org.eclipse.jdt.core.javanature
21 | org.eclipse.m2e.core.maven2Nature
22 |
23 |
24 |
--------------------------------------------------------------------------------
/src/main/java/software/aws/glue/tableversions/utils/GlueTable.java:
--------------------------------------------------------------------------------
1 | // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 | // SPDX-License-Identifier: MIT-0
3 |
4 | package software.aws.glue.tableversions.utils;
5 |
6 | public class GlueTable {
7 |
8 | private String databaseName;
9 | private String tableName;
10 | public String getDatabaseName() {
11 | return databaseName;
12 | }
13 | public void setDatabaseName(String databaseName) {
14 | this.databaseName = databaseName;
15 | }
16 | public String getTableName() {
17 | return tableName;
18 | }
19 | public void setTableName(String tableName) {
20 | this.tableName = tableName;
21 | }
22 |
23 |
24 | }
25 |
--------------------------------------------------------------------------------
/src/main/java/software/aws/glue/tableversions/utils/Test.java:
--------------------------------------------------------------------------------
1 | package software.aws.glue.tableversions.utils;
2 |
3 | import com.amazonaws.services.securitytoken.AWSSecurityTokenService;
4 | import com.amazonaws.services.securitytoken.AWSSecurityTokenServiceClientBuilder;
5 | import com.amazonaws.services.securitytoken.model.GetCallerIdentityRequest;
6 | import com.amazonaws.services.securitytoken.model.GetCallerIdentityResult;
7 |
8 | public class Test {
9 |
10 | public static void main(String[] args) {
11 | AWSSecurityTokenService client = AWSSecurityTokenServiceClientBuilder.standard().build();
12 | GetCallerIdentityRequest request = new GetCallerIdentityRequest();
13 | GetCallerIdentityResult response = client.getCallerIdentity(request);
14 | System.out.println("Account Id: " + response.getAccount());
15 |
16 |
17 | }
18 |
19 | }
20 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this
4 | software and associated documentation files (the "Software"), to deal in the Software
5 | without restriction, including without limitation the rights to use, copy, modify,
6 | merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
7 | permit persons to whom the Software is furnished to do so.
8 |
9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
10 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
11 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
12 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
13 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
14 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--------------------------------------------------------------------------------
/src/main/java/software/aws/glue/tableversions/utils/TableVersionStatus.java:
--------------------------------------------------------------------------------
1 | // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 | // SPDX-License-Identifier: MIT-0
3 |
4 | package software.aws.glue.tableversions.utils;
5 |
6 | public class TableVersionStatus {
7 |
8 | private String tableName;
9 | private String databaseName;
10 | private String versionId;
11 | private boolean deleted;
12 |
13 | public String getTableName() {
14 | return tableName;
15 | }
16 | public void setTableName(String tableName) {
17 | this.tableName = tableName;
18 | }
19 | public String getDatabaseName() {
20 | return databaseName;
21 | }
22 | public void setDatabaseName(String databaseName) {
23 | this.databaseName = databaseName;
24 | }
25 | public String getVersionId() {
26 | return versionId;
27 | }
28 | public void setVersionId(String versionId) {
29 | this.versionId = versionId;
30 | }
31 | public boolean isDeleted() {
32 | return deleted;
33 | }
34 | public void setDeleted(boolean deleted) {
35 | this.deleted = deleted;
36 | }
37 |
38 | }
39 |
--------------------------------------------------------------------------------
/src/main/java/software/aws/glue/tableversions/utils/TestDDBUtil.java:
--------------------------------------------------------------------------------
1 | package software.aws.glue.tableversions.utils;
2 |
3 | import com.amazonaws.services.dynamodbv2.AmazonDynamoDB;
4 | import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClientBuilder;
5 |
6 | public class TestDDBUtil {
7 |
8 | public static void main(String[] args) {
9 |
10 | String ddbTableName_1 = "glue_table_version_cleanup_planner";
11 | String ddbTableName_2 = "glue_table_version_cleanup_statistics";
12 | long executionId = System.currentTimeMillis();
13 |
14 | DDBUtil ddbUtil = new DDBUtil();
15 |
16 | String hashKey_1 = "execution_batch_id";
17 | String rangeKey_1 = "database_name_table_name";
18 |
19 | String hashKey_2 = "execution_id";
20 | String rangeKey_2 = "execution_batch_id";
21 |
22 | String databaseName = "test_db";
23 | String tableName = "test_table";
24 | int numTableVersionsB4Cleanup = 20;
25 | int numVersionsRetained = 10;
26 | int numDeletedVersions = 10;
27 |
28 | long executionBatchId = System.currentTimeMillis();
29 | AmazonDynamoDB ddbClient = AmazonDynamoDBClientBuilder.standard().withRegion("us-east-1").build();
30 | String notificationTime = new java.util.Date().toString();
31 |
32 | ddbUtil.insertTableDetailsToDynamoDB(ddbClient, ddbTableName_1, hashKey_1, rangeKey_1, executionBatchId,
33 | databaseName, tableName, notificationTime);
34 |
35 | ddbUtil.insertCleanupStatusToDynamoDB(ddbClient, ddbTableName_2, hashKey_2, rangeKey_2, executionId,
36 | Long.toString(executionBatchId), databaseName, tableName, numTableVersionsB4Cleanup,
37 | numVersionsRetained, numDeletedVersions);
38 | }
39 |
40 | }
41 |
--------------------------------------------------------------------------------
/src/main/java/software/aws/glue/tableversions/utils/SQSUtil.java:
--------------------------------------------------------------------------------
1 | //Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 | //SPDX-License-Identifier: MIT-0
3 |
4 | package software.aws.glue.tableversions.utils;
5 |
6 | import java.util.HashMap;
7 | import java.util.Map;
8 |
9 | import com.amazonaws.services.sqs.AmazonSQS;
10 | import com.amazonaws.services.sqs.model.MessageAttributeValue;
11 | import com.amazonaws.services.sqs.model.SendMessageRequest;
12 | import com.amazonaws.services.sqs.model.SendMessageResult;
13 |
14 | public class SQSUtil {
15 |
16 | /**
17 | * This method send a message to SQS queue.
18 | * @param sqs
19 | * @param queueURI
20 | * @param message
21 | * @param executionBatchId
22 | * @param databaseName
23 | * @return
24 | */
25 | public boolean sendTableSchemaToSQSQueue(AmazonSQS sqs, String queueURI, String message, long executionBatchId,
26 | String databaseName) {
27 | int statusCode = 400;
28 | boolean messageSentToSQS = false;
29 | Map messageAttributes = new HashMap<>();
30 | messageAttributes.put("ExecutionBatchId", new MessageAttributeValue().withDataType("String.ExecutionBatchId")
31 | .withStringValue(Long.toString(executionBatchId)));
32 | SendMessageRequest req = new SendMessageRequest().withQueueUrl(queueURI).withMessageBody(message)
33 | .withMessageGroupId(databaseName).withMessageAttributes(messageAttributes);
34 | try {
35 | SendMessageResult sendMsgRes = sqs.sendMessage(req);
36 | statusCode = sendMsgRes.getSdkHttpMetadata().getHttpStatusCode();
37 | } catch (Exception e) {
38 | e.printStackTrace();
39 | System.out.println("Exception thrown while writing message to SQS. " + e.getLocalizedMessage());
40 | }
41 | if (statusCode == 200) {
42 | messageSentToSQS = true;
43 | } else
44 | System.out.printf("Cannot write Table schema %s to SQS queue. \n", message);
45 | return messageSentToSQS;
46 | }
47 |
48 | }
49 |
--------------------------------------------------------------------------------
/src/main/resources/AWS_Glue_Table_versions_cleanup_utility.drawio:
--------------------------------------------------------------------------------
1 | 7VrbcuI4EP0aHkPJF26PYEJmtpKdXGaTmaeUsITRjGw5shwgX7+SLWMbGxaSQLzZJVXBbrV1aZ0+rW7Tshx/ecFhOL9iCNOWCdCyZY1bpjmwu/K/EqxSQdfQAo8TlIqMXHBHXrAWAi2NCcJRSVEwRgUJy0KXBQF2RUkGOWeLstqM0fKoIfRwRXDnQlqVPhAk5qm03wG5/Asm3jwb2QC6xYeZshZEc4jYoiCyzluWwxkT6ZW/dDBVtsvscvP1/Pby/tcTv/1jSDvugHZ+Xp2lnU0OeWS9BI4D8equp+xPNBzyn07vr9AdTvrgGxlnXUdildkLI2k+fcu4mDOPBZCe59IRZ3GAsOoVyLtc55KxUAoNKfyFhVhpLMBYMCmaC5/q1hkLRDaEtq0adWO//mGx2cxZzF28S0/DVUDuYbHDFPmWSlfAzMeCr+RzHFMoyHN5dlCD0lvr5YaXF9r2B+xD5/99yExhv3EfkkeHnMNVQSFkJBBRoedrJZAKmubMnp32qFlOCjb2NO0x3+H11PbadNgRi8mXqx8L7z64/tZd3bxc3Geme4Y01stqmV0qbTOKQhiU4NB9ihXNJFt2psVD1b+AlLiyIVeRV576vsBqfQgKOIWRZGCzC32Ji1EwjcJUjabqk3S09KlMPOVvHf891wIDlEBnSpOF1E98w4EEXooy3jmOyIvqQ7uMBoXU7oxanbEahxIvkAJXYh5zKXjGXBAZTIa6wScIJc5H4RTTEXR/e4kbOowynoxrzZJPvXup7vCyLu7paeXRpuh42ymj6ie69zPQ7oLM5zSoz8z09jBXyn0lU2GzWaTAtcF5h3lEfTgClW08AQ9GkpPEUJ01pCBgAc5kE6Kmn3SLA5RpuBRGUQJUKdQqjaDTesVBE/jU7naOz6dmhU+/c+mhCZ6giKP/HEUYW840unvQVoGutE9GgxhiJ+4Lmzx8uGs5ZmsoFwsuoT9FsKUsJBUtQ+3h98RwJrjHPCIskDAADsUwiEN5dU2hNC6vQIPFgpIAO+usBOjtKmwjSD5S7nGICM7bNIfMJDcU1Meg4xi9hFs4+403lBGM5msSyzB1qSB0zSIi5MRl25QJwfwa0AlFdVVsJvhKac8wK3iTD0RhuroZWarBU7xjfv6MU9gn9DiHoXrAX3oqQ2zDRWS3aWLox1kcuOncRocRXAXmW1Fq2oMSRC2jn94v8nQuy+bmhUwuk73lrF6/gt5HHdbrsFdDIe8XZPr7Bplubbw7QTa1c94FknAoi9EDFO5cyhN4y+/bWBHD8T1/5BhWp3tSz99M845CAlhpPAri40eVY6AjskDmzY0hgUE1DvnwhalUZ7wKoM/G1ZTkTdCSbfa4Z06sQtuYcKz5V6KJK0tsYk9+7NGggr3iKaRxAKwFmzyaJXz11U3CjbxNr8paKLE9mh4RitlZdn22tcHJsFh7zLM+Kh4dI9vZXpvbo3Zknijq7JpkHSPc3dzJ/zcxjl8Xb+TfRM2kjhQmlm33zcNIYUtA+qykED1Faz6oya/2g+nefHDK2FSf9n1IVb/hVZTtfNHYIkrPKifnJymiWBUWa3oRZatfv75ustvhQdsEVr+0NWe63NWIwskuqG/UTSovBdYVlPq3BVteC4QQIRJ4aR0fhEtVgEl686VnkaAg36zs7yrOfMqSzEkSs23VmfcPfj3Qa1bwG3x47MsjWzH8GeXwpyGWx74ttZzjxT7733CmzmptBdq6xQISdaj+ofiFRSKZjJtWdJ7XNJLzVfEVIsIUC8U3jCJV9S080LSwVgRDRt7vGOO2nGjWIc7od41yiMtOI82NcXZ9jAMX6t4EYyjUqwFHflHmvW8iVh9fOkMLjDqfL7540qCP6qcFj6425unSKxPsWW6x2xl7HEBL8jb/RVeKy/xncdb53w==
--------------------------------------------------------------------------------
/dependency-reduced-pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | 4.0.0
4 | software.aws.glue
5 | glue-tableversions-cleanup
6 | aws-glue-table-versions-cleanup-utility
7 | 0.1
8 |
9 |
10 |
11 | maven-compiler-plugin
12 | 3.7.0
13 |
14 | 1.8
15 | 1.8
16 |
17 |
18 |
19 | maven-shade-plugin
20 | 3.1.0
21 |
22 |
23 | package
24 |
25 | shade
26 |
27 |
28 |
29 |
30 | software.aws.glue.tableversions.lambda.TableVersionsCleanupPlannerLambda
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 | junit
42 | junit
43 | 4.12
44 | test
45 |
46 |
47 | hamcrest-core
48 | org.hamcrest
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 | com.amazonaws
57 | aws-java-sdk-bom
58 | 1.11.873
59 | pom
60 | import
61 |
62 |
63 |
64 |
65 |
66 |
--------------------------------------------------------------------------------
/.classpath:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing Guidelines
2 |
3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
4 | documentation, we greatly value feedback and contributions from our community.
5 |
6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
7 | information to effectively respond to your bug report or contribution.
8 |
9 |
10 | ## Reporting Bugs/Feature Requests
11 |
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 |
14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 |
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 |
22 |
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 |
26 | 1. You are working against the latest source on the *main* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 |
30 | To send us a pull request, please:
31 |
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 |
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 |
42 |
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start.
45 |
46 |
47 | ## Code of Conduct
48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
50 | opensource-codeofconduct@amazon.com with any additional questions or comments.
51 |
52 |
53 | ## Security issue notifications
54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
55 |
56 |
57 | ## Licensing
58 |
59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.
60 |
--------------------------------------------------------------------------------
/src/main/java/software/aws/glue/tableversions/utils/DDBUtil.java:
--------------------------------------------------------------------------------
1 | // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 | // SPDX-License-Identifier: MIT-0
3 |
4 | package software.aws.glue.tableversions.utils;
5 |
6 | import com.amazonaws.services.dynamodbv2.AmazonDynamoDB;
7 | import com.amazonaws.services.dynamodbv2.document.DynamoDB;
8 | import com.amazonaws.services.dynamodbv2.document.Item;
9 | import com.amazonaws.services.dynamodbv2.document.PutItemOutcome;
10 | import com.amazonaws.services.dynamodbv2.document.Table;
11 |
12 | /**
13 | * This is a utility class with methods to write items to DynamoDB table. from /
14 | * to a DynamoDB table.
15 | *
16 | * @author Ravi Itha, Amazon Web Services, Inc.
17 | *
18 | */
19 | public class DDBUtil {
20 |
21 | public boolean insertCleanupStatusToDynamoDB(AmazonDynamoDB ddbClient, String ddbTableName, String hashKey,
22 | String rangeKey, long executionId, String executionBatchId, String databaseName, String tableName,
23 | int numTableVersionsB4Cleanup, int numVersionsRetained, int numDeletedVersions) {
24 |
25 | boolean itemInserted = false;
26 | DynamoDB dynamoDB = new DynamoDB(ddbClient);
27 | Table table = dynamoDB.getTable(ddbTableName);
28 | Item item = new Item()
29 | .withPrimaryKey(hashKey, executionId)
30 | .withNumber(rangeKey, Long.parseLong(executionBatchId))
31 | .withString("table_name", tableName)
32 | .withString("database_name", databaseName)
33 | .withNumber("number_of_versions_before_cleanup", numTableVersionsB4Cleanup)
34 | .withNumber("number_of_versions_retained", numVersionsRetained)
35 | .withNumber("number_of_versions_deleted", numDeletedVersions);
36 | // Write the item to the table
37 | PutItemOutcome outcome = table.putItem(item);
38 | int statusCode = outcome.getPutItemResult().getSdkHttpMetadata().getHttpStatusCode();
39 | if (statusCode == 200) {
40 | itemInserted = true;
41 | System.out.println("Table version inserted to DynamoDB table: " + ddbTableName);
42 | }
43 | return itemInserted;
44 | }
45 |
46 | /**
47 | * Method to write Table version details to a DynamoDB table.
48 | *
49 | * @param dynamoDBClient
50 | * @param dynamoDBTblName
51 | * @param primaryPartKey
52 | * @param primarySortKey
53 | * @param executionBatchId
54 | * @param databaseName
55 | * @param tableName
56 | * @param notificationTime
57 | * @return
58 | */
59 | public boolean insertTableDetailsToDynamoDB(AmazonDynamoDB ddbClient, String ddbTableName, String hashKey,
60 | String rangeKey, long executionBatchId, String databaseName, String tableName, String messageSentTime) {
61 |
62 | boolean itemInserted = false;
63 | DynamoDB dynamoDB = new DynamoDB(ddbClient);
64 | Table table = dynamoDB.getTable(ddbTableName);
65 | Item item = new Item().withPrimaryKey(hashKey, executionBatchId)
66 | .withString(rangeKey, databaseName.concat("|").concat(tableName)).withString("table_name", tableName)
67 | .withString("database_name", databaseName).withString("message_sent_time", messageSentTime);
68 | // Write the item to the table
69 | PutItemOutcome outcome = table.putItem(item);
70 | int statusCode = outcome.getPutItemResult().getSdkHttpMetadata().getHttpStatusCode();
71 | if (statusCode == 200) {
72 | itemInserted = true;
73 | System.out.println("Table version inserted to DynamoDB table: " + ddbTableName);
74 | }
75 | return itemInserted;
76 | }
77 |
78 | }
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
2 | 4.0.0
3 | software.aws.glue
4 | glue-tableversions-cleanup
5 | 0.1
6 | aws-glue-table-versions-cleanup-utility
7 |
8 |
9 |
10 |
11 | org.apache.maven.plugins
12 | maven-compiler-plugin
13 | 3.7.0
14 |
15 | 1.8
16 | 1.8
17 |
18 |
19 |
20 | org.apache.maven.plugins
21 | maven-shade-plugin
22 | 3.1.0
23 |
24 |
25 | package
26 |
27 | shade
28 |
29 |
30 |
31 |
33 | software.aws.glue.tableversions.lambda.TableVersionsCleanupPlannerLambda
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 | com.amazonaws
47 | aws-java-sdk-bom
48 | 1.11.873
49 | pom
50 | import
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 | com.amazonaws
60 | aws-java-sdk-glue
61 |
62 |
63 |
64 |
65 | com.amazonaws
66 | aws-java-sdk-sqs
67 |
68 |
69 |
70 | com.amazonaws
71 | aws-java-sdk-sts
72 |
73 |
74 |
75 |
76 | com.amazonaws
77 | aws-java-sdk-dynamodb
78 |
79 |
80 |
81 |
82 | junit
83 | junit
84 | 4.13.1
85 | test
86 |
87 |
88 |
89 |
90 | com.amazonaws
91 | aws-lambda-java-events
92 | 2.2.7
93 |
94 |
95 | com.amazonaws
96 | aws-lambda-java-core
97 | 1.2.0
98 |
99 |
100 |
101 |
102 | com.google.guava
103 | guava
104 | 30.0-jre
105 |
106 |
107 |
108 |
109 | com.google.code.gson
110 | gson
111 | 2.8.9
112 |
113 |
114 |
115 |
--------------------------------------------------------------------------------
/src/main/java/software/aws/glue/tableversions/lambda/TableVersionsCleanupLambda.java:
--------------------------------------------------------------------------------
1 | // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 | // SPDX-License-Identifier: MIT-0
3 |
4 | package software.aws.glue.tableversions.lambda;
5 |
6 | import java.util.ArrayList;
7 | import java.util.List;
8 | import java.util.Map.Entry;
9 | import java.util.Optional;
10 |
11 | import com.amazonaws.services.dynamodbv2.AmazonDynamoDB;
12 | import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClientBuilder;
13 | import com.amazonaws.services.glue.AWSGlue;
14 | import com.amazonaws.services.glue.AWSGlueClientBuilder;
15 | import com.amazonaws.services.glue.model.TableVersion;
16 | import com.amazonaws.services.lambda.runtime.Context;
17 | import com.amazonaws.services.lambda.runtime.RequestHandler;
18 | import com.amazonaws.services.lambda.runtime.events.SQSEvent;
19 | import com.amazonaws.services.lambda.runtime.events.SQSEvent.MessageAttribute;
20 | import com.amazonaws.services.lambda.runtime.events.SQSEvent.SQSMessage;
21 | import com.google.common.primitives.Ints;
22 | import com.google.gson.Gson;
23 |
24 | import software.aws.glue.tableversions.utils.DDBUtil;
25 | import software.aws.glue.tableversions.utils.GlueTable;
26 | import software.aws.glue.tableversions.utils.GlueUtil;
27 | import software.aws.glue.tableversions.utils.TableVersionStatus;
28 |
29 | /**
30 | * This class has AWS Lambda Handler method. Upon invocation, it gets an event
31 | * from source SQS queue, gets the message(s).
32 | *
33 | * For each message, it takes the following actions: 1. Parse the message and
34 | * get Table name 2. Fetch list of table versions 3. Determine the list of table
35 | * versions to retains 4. Delete old table versions 5. Insert a record into
36 | * DynamoDB table with the statistics
37 | *
38 | * @author Ravi Itha, Amazon Web Services, Inc.
39 | *
40 | */
41 | public class TableVersionsCleanupLambda implements RequestHandler {
42 |
43 | @Override
44 | public String handleRequest(SQSEvent event, Context context) {
45 |
46 | String region = Optional.ofNullable(System.getenv("region")).orElse("us-east-1");
47 | String ddbTableName = Optional.ofNullable(System.getenv("ddb_table_name"))
48 | .orElse("glue_table_version_cleanup_statistics");
49 | String hashKey = Optional.ofNullable(System.getenv("hash_key")).orElse("execution_id");
50 | String rangeKey = Optional.ofNullable(System.getenv("range_key")).orElse("execution_batch_id");
51 | int numberofVersionsToRetain = Ints
52 | .tryParse(Optional.ofNullable(System.getenv("number_of_versions_to_retain")).orElse("100"));
53 |
54 | System.out.println("Region: " + region);
55 | System.out.println("Number of table versions to retain: " + numberofVersionsToRetain);
56 | System.out.println("DynamoDB Table to track statistics: " + ddbTableName);
57 |
58 | AWSGlue glueClient = AWSGlueClientBuilder.standard().withRegion(region).build();
59 | AmazonDynamoDB dynamoDBClient = AmazonDynamoDBClientBuilder.standard().withRegion(region).build();
60 |
61 | if (numberofVersionsToRetain < 50) {
62 | throw new RuntimeException();
63 | } else {
64 | System.out.println("Number of messages in SQS Event: " + event.getRecords().size());
65 | List sqsMessages = event.getRecords();
66 | processEvent(glueClient, dynamoDBClient, sqsMessages, numberofVersionsToRetain, ddbTableName, hashKey,
67 | rangeKey);
68 | }
69 | return "SNS event to Lambda processed successfully!";
70 | }
71 |
72 | /**
73 | * This method processes SQS event
74 | *
75 | * @param glueClient
76 | * @param dynamoDBClient
77 | * @param sqsMessages
78 | * @param numberofVersionsToRetain
79 | * @param dynamoDBTableName
80 | * @param primaryPartKey
81 | * @param primarySortKey
82 | */
83 | public void processEvent(AWSGlue glueClient, AmazonDynamoDB dynamoDBClient, List sqsMessages,
84 | int numberofVersionsToRetain, String dynamoDBTableName, String hashKey, String rangeKey) {
85 |
86 | DDBUtil ddbUtil = new DDBUtil();
87 | GlueUtil glueUtil = new GlueUtil();
88 | List tblVersionsNotDeletedMasterList = new ArrayList();
89 |
90 | for (SQSMessage sqsMessage : sqsMessages) {
91 | long executionId = System.currentTimeMillis();
92 | // get Execution Batch Id from Message Attributes
93 | String executionBatchId = "";
94 | for (Entry entry : sqsMessage.getMessageAttributes().entrySet()) {
95 | if ("ExecutionBatchId".equalsIgnoreCase(entry.getKey())) {
96 | executionBatchId = entry.getValue().getStringValue();
97 | System.out.println("Execution Batch Id: " + executionBatchId);
98 | }
99 | }
100 |
101 | // de-serialize SQS message to GlueTable
102 | Gson gson = new Gson();
103 | String message = new String(sqsMessage.getBody());
104 | GlueTable glueTable = gson.fromJson(message, GlueTable.class);
105 | System.out.printf("Process event for table '%s' under database '%s' \n", glueTable.getTableName(),
106 | glueTable.getDatabaseName());
107 |
108 | // get table versions
109 | List tableVersionList = glueUtil.getTableVersions(glueClient, glueTable.getTableName(),
110 | glueTable.getDatabaseName());
111 |
112 | if (tableVersionList.size() > numberofVersionsToRetain) {
113 | // identify the versions that are older than numberofVersionsToRetain
114 | List> lists = glueUtil.determineOldVersions(tableVersionList, glueTable.getTableName(),
115 | glueTable.getDatabaseName(), numberofVersionsToRetain);
116 | List versionsToKeep = lists.get(0);
117 | List versionsToDelete = lists.get(1);
118 |
119 | System.out.printf("For table '%s', versions to be deleted: %d, versions to be retaind: %d \n",
120 | glueTable.getTableName(), versionsToDelete.size(), versionsToKeep.size());
121 |
122 | // delete older versions
123 | List tblVersionsNotDeletedList = glueUtil.deleteTableVersions(glueClient,
124 | versionsToDelete, glueTable.getTableName(), glueTable.getDatabaseName());
125 |
126 | int numTableVersionsB4Cleanup = tableVersionList.size();
127 | int numDeletedVersions = versionsToDelete.size() - tblVersionsNotDeletedList.size();
128 |
129 | boolean itemInserted = ddbUtil.insertCleanupStatusToDynamoDB(dynamoDBClient, dynamoDBTableName, hashKey,
130 | rangeKey, executionId, executionBatchId, glueTable.getDatabaseName(), glueTable.getTableName(),
131 | numTableVersionsB4Cleanup, versionsToKeep.size(), numDeletedVersions);
132 |
133 | if (tblVersionsNotDeletedList.size() == 0)
134 | System.out.printf("Older versions of table '%s' under database '%s' were deleted. \n",
135 | glueTable.getTableName(), glueTable.getDatabaseName());
136 | else
137 | tblVersionsNotDeletedMasterList.addAll(tblVersionsNotDeletedList);
138 | } else {
139 | System.out.printf("Table '%s' does not have more than %d versions. Skipping. \n",
140 | glueTable.getTableName(), numberofVersionsToRetain);
141 | }
142 | }
143 | }
144 | }
--------------------------------------------------------------------------------
/src/main/java/software/aws/glue/tableversions/lambda/TableVersionsCleanupPlannerLambda.java:
--------------------------------------------------------------------------------
1 | // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 | // SPDX-License-Identifier: MIT-0
3 |
4 | package software.aws.glue.tableversions.lambda;
5 |
6 | import java.util.ArrayList;
7 | import java.util.Collections;
8 | import java.util.Date;
9 | import java.util.List;
10 | import java.util.Optional;
11 | import java.util.StringTokenizer;
12 | import java.util.concurrent.atomic.AtomicInteger;
13 | import java.util.stream.Collectors;
14 |
15 | import com.amazonaws.regions.Regions;
16 | import com.amazonaws.services.dynamodbv2.AmazonDynamoDB;
17 | import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClientBuilder;
18 | import com.amazonaws.services.glue.AWSGlue;
19 | import com.amazonaws.services.glue.AWSGlueClientBuilder;
20 | import com.amazonaws.services.glue.model.Database;
21 | import com.amazonaws.services.glue.model.Table;
22 | import com.amazonaws.services.lambda.runtime.Context;
23 | import com.amazonaws.services.lambda.runtime.RequestHandler;
24 | import com.amazonaws.services.securitytoken.AWSSecurityTokenService;
25 | import com.amazonaws.services.securitytoken.AWSSecurityTokenServiceClientBuilder;
26 | import com.amazonaws.services.securitytoken.model.GetCallerIdentityRequest;
27 | import com.amazonaws.services.securitytoken.model.GetCallerIdentityResult;
28 | import com.amazonaws.services.sqs.AmazonSQS;
29 | import com.amazonaws.services.sqs.AmazonSQSClientBuilder;
30 | import com.google.gson.Gson;
31 |
32 | import software.aws.glue.tableversions.utils.DDBUtil;
33 | import software.aws.glue.tableversions.utils.GlueTable;
34 | import software.aws.glue.tableversions.utils.GlueUtil;
35 | import software.aws.glue.tableversions.utils.SQSUtil;
36 |
37 | /**
38 | * This class has AWS Lambda Handler method. Upon invocation, it takes the
39 | * following actions: 1. it fetches all databases form Glue Catalog 2. for each
40 | * database, fetches all of its tables 3. for each table, it publishes table
41 | * and database names to SQS queue.
42 | *
43 | * @author Ravi Itha, Amazon Web Services, Inc.
44 | *
45 | */
46 | public class TableVersionsCleanupPlannerLambda implements RequestHandler