├── .envrc ├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.kr.md ├── README.md ├── app.py ├── assets ├── amazon-es-access-control.png ├── amazon-es-config-domain.png ├── amazon-es-deployment-type.png ├── amazon-es-encryption-config.png ├── amazon-es-json-access-policy.png ├── aws-analytics-immersion-day-steps-extra.drawio ├── aws-analytics-system-arch.drawio ├── aws-analytics-system-build-steps-extra.svg ├── aws-analytics-system-build-steps.drawio ├── aws-analytics-system-build-steps.svg ├── aws-athena-create-database.png ├── aws-athena-ctas-lambda-add-trigger.png ├── aws-athena-ctas-lambda-create-function.png ├── aws-athena-ctas-lambda-execution-iam-role.png ├── aws-athena-ctas-lambda-iam-role-policies.png ├── aws-athena-setup-query-results-location-01.png ├── aws-athena-setup-query-results-location-02.png ├── aws-ec2-choose-ami.png ├── aws-ec2-choose-instance-type.png ├── aws-ec2-configure-instance-details.png ├── aws-ec2-configure-security-group.png ├── aws-ec2-connect.png ├── aws-ec2-launch-instance.png ├── aws-ec2-security-group-for-bastion.png ├── aws-ec2-security-group-for-es-client.png ├── aws-ec2-security-group-for-es-cluster.png ├── aws-ec2-select-keypair.png ├── aws-ec2-ssm.png ├── aws-ec2instance-modify-iam-role.png ├── aws-kinesis-firehose-create_new_iam_role.png ├── aws-lambda-add-layer-to-function-layer-version-arn.png ├── aws-lambda-add-layer-to-function.png ├── aws-lambda-create-function.png ├── aws-lambda-create-layer.png ├── aws-lambda-execution-iam-role.png ├── aws-lambda-iam-role-policies.png ├── aws-lambda-kinesis.png ├── aws-quicksight-access-s3.png ├── aws-quicksight-athena-choose_your_table.png ├── aws-quicksight-athena_data_source.png ├── aws-quicksight-bar-chart.png ├── aws-quicksight-choose-s3-bucket.png ├── aws-quicksight-finish-dataset-creation.png ├── aws-quicksight-new_data_sets.png ├── aws-quicksight-quarterly-graph.png ├── aws-quicksight-share-analysis-users.png ├── aws-quicksight-share-analysis.png ├── aws-quicksight-user-email-click-to-view.png ├── aws-quicksight-user-email.png ├── aws-quicksight-user-invitation.png ├── aws_athena_select_all_limit_10.png ├── cfn-stacks-create-stack.png ├── cfn-step1-create-stack.png ├── iam-user-download.png ├── iam-user-policy.png ├── iam-user.png ├── kfh_create_new_iam_role.png ├── kibana-01-add_data.png ├── kibana-02a-create-index-pattern.png ├── kibana-02b-create-index-pattern-configure-settings.png ├── kibana-02c-create-index-pattern-review.png ├── kibana-02d-management-advanced-setting.png ├── kibana-03-discover.png ├── kibana-04-discover-visualize.png ├── kibana-05-discover-change-metrics.png ├── kibana-08-visualize-save.png ├── kibana-09-dashboards.png ├── kibana-10-import-visualization.png ├── kibana-12-discover-save-dashboard.png ├── kibana-13-complete.png ├── ops-create-firehose_role.png ├── ops-create-index-pattern-configure-setting.png ├── ops-create-index-pattern.png ├── ops-dashboards-sidebar-menu-security.png ├── ops-dashboards-sidebar-menu.png ├── ops-entries-for-firehose_role.png ├── ops-management-advanced-setting.png └── ops-role-mappings.png ├── aws-analytics-system-arch.svg ├── cdk.context.json ├── cdk.json ├── data_analytics_system ├── __init__.py ├── bastion_host.py ├── elasticsearch.py ├── firehose.py ├── glue_catalog_database.py ├── kds.py ├── lake_formation.py ├── merge_small_files_lambda.py ├── ops.py ├── upsert_to_es_lambda.py └── vpc.py ├── doc_sources ├── athena_sample_query.md ├── prerequisites.en.md └── prerequisites.kr.md ├── requirements-dev.txt ├── requirements.txt ├── set-up-hands-on-lab.sh ├── setup.py ├── source.bat ├── src └── main │ └── python │ ├── MergeSmallFiles │ └── athena_ctas.py │ ├── UpsertToES │ └── upsert_to_es.py │ └── utils │ ├── gen_kinesis_data.py │ └── kinesis_consumer.py ├── vpc_us_east_1.yaml ├── vpc_us_east_2.yaml └── vpc_us_west_2.yaml /.envrc: -------------------------------------------------------------------------------- 1 | export USE_DEFAULT_VPC=true -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | .DS_Store 107 | .idea/ 108 | bin/ 109 | lib64 110 | pyvenv.cfg 111 | *.bak 112 | share/ 113 | cdk.out/ 114 | cdk.context.json 115 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *main* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software is furnished to do so. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 10 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 11 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 12 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 13 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 14 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 15 | 16 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- encoding: utf-8 -*- 3 | # vim: tabstop=2 shiftwidth=2 softtabstop=2 expandtab 4 | 5 | import os 6 | 7 | import aws_cdk as cdk 8 | 9 | from data_analytics_system import ( 10 | VpcStack, 11 | BastionHostStack, 12 | KinesisDataStreamStack, 13 | # ElasticSearchStack, 14 | ##XXX: For using Amazon OpenSearch Service, remove comments from both the below code 15 | OpenSearchStack, 16 | KinesisFirehoseStack, 17 | UpsertToESStack, 18 | MergeSmallFilesLambdaStack, 19 | GlueCatalogDatabaseStack, 20 | DataLakePermissionsStack 21 | ) 22 | 23 | ACCOUNT = os.getenv('CDK_DEFAULT_ACCOUNT', '') 24 | REGION = os.getenv('CDK_DEFAULT_REGION', 'us-east-1') 25 | AWS_ENV = cdk.Environment(account=ACCOUNT, region=REGION) 26 | 27 | app = cdk.App() 28 | vpc_stack = VpcStack(app, 'DataAnalyticsVpcStack', 29 | env=AWS_ENV) 30 | 31 | bastion_host_stack = BastionHostStack(app, 'DataAnalyticsBastionHostStack', 32 | vpc_stack.vpc, 33 | #XXX: YOU SHOULD pass `region` and `account` values in the `env` of the StackProps 34 | # in order to prevent the following error: 35 | # Cross stack references are only supported for stacks deployed 36 | # to the same environment or between nested stacks and their parent stack 37 | env=AWS_ENV) 38 | bastion_host_stack.add_dependency(vpc_stack) 39 | 40 | kds_stack = KinesisDataStreamStack(app, 'DataAnalyticsKinesisStreamStack') 41 | kds_stack.add_dependency(vpc_stack) 42 | 43 | firehose_stack = KinesisFirehoseStack(app, 'DataAnalyticsFirehoseStack', 44 | kds_stack.kinesis_stream) 45 | firehose_stack.add_dependency(kds_stack) 46 | 47 | # search_stack = ElasticSearchStack(app, 'DataAnalyticsElasticSearchStack', 48 | # vpc_stack.vpc, 49 | # bastion_host_stack.sg_bastion_host, 50 | # #XXX: YOU SHOULD pass `region` and `account` values in the `env` of the StackProps 51 | # # in order to prevent the following error: 52 | # # Cross stack references are only supported for stacks deployed 53 | # # to the same environment or between nested stacks and their parent stack 54 | # env=AWS_ENV) 55 | 56 | #XXX: For using Amazon OpenSearch Service, 57 | # remove comments from both the below codes and the dependent codes, 58 | # then comments out `search_stack = ElasticSearchStack(...)` codes 59 | # 60 | search_stack = OpenSearchStack(app, 'DataAnalyticsOpenSearchStack', 61 | vpc_stack.vpc, 62 | bastion_host_stack.sg_bastion_host, 63 | #XXX: YOU SHOULD pass `region` and `account` values in the `env` of the StackProps 64 | # in order to prevent the following error: 65 | # Cross stack references are only supported for stacks deployed 66 | # to the same environment or between nested stacks and their parent stack 67 | env=AWS_ENV) 68 | search_stack.add_dependency(firehose_stack) 69 | 70 | upsert_to_es_stack = UpsertToESStack(app, 'DataAnalyticsUpsertToESStack', 71 | vpc_stack.vpc, 72 | kds_stack.kinesis_stream, 73 | search_stack.sg_search_client, 74 | search_stack.search_domain_endpoint, 75 | search_stack.search_domain_arn, 76 | env=AWS_ENV 77 | ) 78 | upsert_to_es_stack.add_dependency(search_stack) 79 | 80 | merge_small_files_stack = MergeSmallFilesLambdaStack(app, 'DataAnalyticsMergeSmallFilesStack', 81 | firehose_stack.s3_bucket_name 82 | ) 83 | merge_small_files_stack.add_dependency(upsert_to_es_stack) 84 | 85 | athena_databases = GlueCatalogDatabaseStack(app, 'DataAnalyticsGlueDatabases') 86 | athena_databases.add_dependency(merge_small_files_stack) 87 | 88 | lakeformation_grant_permissions = DataLakePermissionsStack(app, 89 | 'DataAnalyticsGrantLFPermissionsOnMergeFilesJob', 90 | merge_small_files_stack.lambda_exec_role 91 | ) 92 | lakeformation_grant_permissions.add_dependency(athena_databases) 93 | 94 | app.synth() 95 | 96 | -------------------------------------------------------------------------------- /assets/amazon-es-access-control.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/amazon-es-access-control.png -------------------------------------------------------------------------------- /assets/amazon-es-config-domain.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/amazon-es-config-domain.png -------------------------------------------------------------------------------- /assets/amazon-es-deployment-type.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/amazon-es-deployment-type.png -------------------------------------------------------------------------------- /assets/amazon-es-encryption-config.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/amazon-es-encryption-config.png -------------------------------------------------------------------------------- /assets/amazon-es-json-access-policy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/amazon-es-json-access-policy.png -------------------------------------------------------------------------------- /assets/aws-analytics-immersion-day-steps-extra.drawio: -------------------------------------------------------------------------------- 1 | 7V3dc9q4Fv9rmGkfwvhLlv0YSNPdudm73aF77/aJEbYwbgyitkhI//qVbAmwJcAUbCBJ8hAsy8fyOb/zqWPSsfvT5ecUzSd/kBAnHcsIlx37rmNZpmkY7A8feSlGfA8UA1Eah2LSemAQ/8RiUFwXLeIQZ6WJlJCExvPyYEBmMxzQ0hhKU/JcnjYmSfmucxRhZWAQoEQd/X8c0ol8Ltdfn/gNx9FE3NqzYHFiiuRk8STZBIXkeWPI/tSx+ykhtPg0XfZxwpkn+VJcd7/l7GphKZ7ROhf0zWf7B7V/j8ZfH4Lg898Py283N4LKE0oW4oHFYumL5ABZ0CSe4f6KwUbH7o3JjPZJQtJ8js1+7/lde1GKwhivz83IDPPpcZJsToeQcZCNZzQlj7gyOUTZBIfiRk84pTETxwMa4eQLyWIakxk7NyKUkunGhNskjvgJSuZsFImjgK0FM9q9CZ0m7NgUaxdAMy15LJ6X3xJl8+JBx/GSr6M3JzGn8umJEcsEESbNOb9guow48LvoOXO6o0XwiOnwOaaTIRl9Z1TY9J4qKMl1tnS83BgSgvuMyRTT9IVNEWddw+gKvRF65Dhecfy8RiUUUyYbeIRiGhJ6EK1or6HCPgi0HIAcS0HOwM6Xw1bDdDTGKQcXg6SbsKX0RuxDRHNuFAOc67neSlS4PxakmGAHgWFww7EeKq5NUMq0VRBgqy5o1KfLqY7HKl02X4prN3E2vPEgFUVhsqRlpGnhvakLYkhBaxXU0zgM+W16z5OY4sEcBfyezwx5bCwli1m4UpgToA34TglrwDYUrPmGijU57RisDe6+/T3943vvIQo+f6Hkv099y7pxDIXZOGRmWhwKLpb5QFI6IRGZMdNBuEnIBfIdU/oidB8tKCmLayvr+M12Mi7FCaLxU9ln6NggLv3CLcqa4bZjlRhuAVAmQTnwqbhq08ZXCFm+WyXUrZDKyCINsEIql8vqiY4QlepRQvwUB8yH13Is2xxI2d8AB/Rcp46Gscke9E3fvUTHonUibPEL9liIr2coeadzQscruim1WMLFhIqiryK4klexm9J0Nf7Y1PS1Un9ajzao+IWy7FovaMdCOG7ZJK8sxh4LcZum6GVjWo6ibIcl8vX3Wcu0oHham2FflNAbt/YVHltmRZZbTPReUJie3Qoo5IK3ras6X66rWRA5iuNRUBUx0Mzr281VVolGkoLR0SJCPrhVcb86e2pp7Kl/gih9p33a4Mp/mM/NYiZu4w5RxP4MaIrRVPXPlxrP7hb/fuf3Us6M9snGaiyqdfcj9qjku8M1EdzdWxvn7uKUESpCnRm3mmq8BG5towcUcbMz4/znaiKpFBeW9PeAr6fHDotP5VmPhT4MQ6YNw0zoQhO4kzG6qeBOQvHUibt+xd75DaXjVDyK76vKaLdqKP13Zbw0ZRwz9kxIhg/QRm8LQrdro3tebQTGfgd9L/lw3R66vmx2eGidUWjMQ4Pd2ejV151cWI5XbdvuGobvQeh6pg09zylTrJuYQNuo0i0T2pKYnCoXAHbDxvx9W+NE2xrm3jCgqU0NPXCc163wAFQV3v81DQdSUtsINa3h4HULSrGgTr0y/n4T74AuM1KG7xuQpbvQaVds7usWm2d6p9Evz91D6HSC0vcFtKNfeBnTf/jlXSCOvm2cuVsKyvnBizhY+dHjKvdAlOYKRu5ixamhc5yHgkpoo+y7M9NhHLcdXtmzz6YoX5m6Sa5s6ysb51eSshy9SW576ia5p8kkG9skBxdQ14Ey0F8HdHL/eU+C7cPqRvXpGPNe2rmE0g6iEzxD9esFoH694FzFG1ct3twWT3ktZu9Yzsuz5WgVQo3Wu21WbtwajY1NG0MP7i9y601hU0yx3g3hBRjCH4s4eMwKeddVSbd+fHI2Y6gW3f7iTzrI733VBvEA7ouz5eoIVNsA2rWGr7xaUs2WTc8qk6jbPwmdPYQa7p50a7QEvCrJuZWC5C9LrlrZbF1yal7eesDhAFh5S8CUbYKlhk5NRd2vlJdOZ3pqZKUNbMXcGaBvwte3FZOg6ShEw/FiJmKq2i4MbkHZ9pjerpm6NxdRqIn7Q/78bOxecuC6A4v6UpGhhKZRRqfQjcUSUM15Wzd0rl1x07KD+GztQ7DpF+neU6s6qRULRjK2vAyjNJgMM5zytyfqq+MK2/Xbh1TgtWoioZrS/znHs0HOADY+ECy4ait5uFhuymXoszcSwRoNKW2Xo3RGU1uOaio0lOt5t5nnrcuzRPGFG80DFNLeAsrLqUZB3YsPI3TtpfkDOK+vRKnhY1OVKO1m+gW8pAOt8maF7k36xnJkLVNUqJ7WDr6nyFUw7cTmZWbI2iWr5cLrTZCPlMkZ8mN9u5D6Muv1lmc9CLqG6/v8a2081/PKNVbHg13I7KIJWULu+9VXkmv3uplW9ctNGGELQuAZBjRN6FiVHdGmm9/UutMH8yMboGkcRQz5+1WqvqqwqCz+KXyXIe1c/pyg1wF3XPr8i0+Y4e2h4DHKsaQLSWsqlLktgDjYjxpdwwLllEL2yh3bpVy+gIzHGW5G0DVqWQ143l7ftMErfE8A8xlDGk/xcISy/Nr6cVztvNVxVBPfqtM11Vpfzhs29OEre/ibHn/4j1fjeA+XCCiXV4BV8zX35pyumld8sD7m7EcpFwzzqMxbLnGwoJibOor4+3U/FpjfXHS7prJpdcx4bxlTnEbxLOJERPsrk4zma2xe1X4shBbzvMBzLcP1/OrurPOrbyFAWaeUzbLVHqeGPbqlq7+VGqTXApXd0PzETZZL6JZNML35crNV2hijoHzBVzQhU6S2U5sf0Mc31x5tK50Uanu0ozERp2iP1kNAV3poDwKjtwcBxyvXnixXzZbbhYCu0NIeBIK3BwHXdC8MAroGq/YgEL49CED5Ev0KAs6ZIaDr1GoPAvjtQcDzK28Fnd0K6JrS2oKAdXWxwDiJ5//bl5kcEBdUvnXSNM8dGupeEGsPDteWHZwYDq5nXhYcbF1VsD04XFuYeGI4VJ3F+eGga+1rL14YXxkcTgEBq7wdBEBjEGCH6//AUFSb1v/Hwv70Lw== -------------------------------------------------------------------------------- /assets/aws-analytics-system-arch.drawio: -------------------------------------------------------------------------------- 1 | 7Vtbl6I4EP41Po4HCNfHti+zu9M727vOnn30RImQbiQuxFb312+QRIEEhR7Qdqa7H5RKCEl9VV9VijgAt4vN5wQuw9+Jj6KBofmbAbgbGIauaxr7yCTbXOK5Vi4IEuzzTgfBGP+HuJDfF6ywj9JSR0pIRPGyLJyROEYzWpLBJCHrcrc5icpPXcIASYLxDEay9B/s01Csy/YODb8gHIT80a7h5A0LKDrzlaQh9Mm6IAL3A3CbEELzb4vNLYoy5Qm95Pc91LTuJ5agmDa5wQaJ7z3f//1g6JMQBWCD8OMn3eWTo1uxYuQzBfDLmMTsY5SQVeyjbByNXZGEhiQgMYweCVkyoc6Ez4jSLYcPrihhopAuIt4qz5VPP3tYQcBn/hmRBaLJlnVIUAQpfi2jATmowb7f/tYngtkjDI0bIABGfgs3P1PAI4agMAkQ5XcVtVcZyHDt6kBDqzxUSlbJDElDsS+FFR1EO3haQMUN7hVGK64NH73iGXOOKoJkRSMco9u9S2SoBQn0MVP/LYlIcoB2Tg6igQEs0xrZJpOnNCEvqNoZR1Ghs+t4umczuQ/TcG8eryihmDnQI5yi6ImkmGISs7YpoZQsCh1uIhxkDTQzohHkVzM2R5SUzSebJLct3RDXfL3ZI2G6zBc6x5tsHiPma8uscbEJMloawnVqDtnkV2xZMJvPROgOjJYZYCi5f2UPTncPrDPWbOJoc9RcRaurlawFuBy9dYFDBDWGBfpwQL2FlwzqiPV8/W2Bv460YD6Jou23p790b0Q+eZpkJkVHP/j0/UHao9/nvnLE2oWf9k0QpmWWkLJ0sxFB3CQJ3Ba67awoPUJErvo5B0zzEd9KGUrQhdW9E9B7J/uKjvfXJxj6pFEAyzqLUYjrunlJ/fm8ejUiz5QCj2RVATOaZXPe3KdrcCpG0AZKixAL1+2KohR8aij41HO/n0+P8lNBK19YzE0xg1u7gxSyjzFNEFzI8Zkpg5Zd5mS05SIpSFZj6QL7/s6L1yGmaLyEO5JdsyAoebYM13H4Twc/Do7jNsNGyDrHBminLVaZI1WSIfb/kD1Wyp1Ym2dadw9Goe0OJ2ygPNWJM9aU8yXrBmgjS4Kbtcx3f1eTSSUoZ9JfZ9l8Ruwy/1bu9ZL7w8Rn3jBJuS/0YXciRdcluxOmWEqxOqAE9YxlSjg7UZqgHCFMR1aKpSm00gVRKrUC3q6V41p+u66KxATOGTT0D2J6d8Q0Z+oJSYqaMxOos8B6ZrLPxkzqgGicTlYehB6uOltpgc2RbEVFCl1kK2pCs98dQe6rt4akGyCme54sW5dUcZ7y5FGk+t7H7rNlsdsxh5rmuY5juzpw3LfuanWnsntsVunoqn6piySnr+CnLlIajqPrdh1rvbuwJlcja0LddDV7QXSyxjSckOkzG6VNci04p3EIMy05hFlnDGHG8frlj8YEIn2vBJ3GBSxQ8vTqMD17unj6T4JVDb2a9htpusL+pj1kBKZ5nuawJgc0e4HVFZYKkv4JsDRs46gDNcUS6MfH6Rk8IJdux7JvXlWC3z5y2eXSsSLfP2u6bzaoAbRK9+sU0j7dd5ySprShIuarSkfSm/DuCibyxlVS1kfBpPeCCaQhimFzJ93b+GknvVhJxJIs6yZf5VXTYwvNi9ZytuE4Cq+3z0qQDfaJlyFIoMuxQ02IfdEh+KDDd0CH/67w7CXN8W7smM3PEF2MEl3Juv7MVjrePfu6abH1CS6vwoqX5URPhqYpJ3b3atHchwZRQBRnpQpqMT1zqKgL7Q+7dV8YsiVNXHKDyoHq/diarlXAaLi1lLe63qmRej7XasiZ2M+IqO65HSF6cqDuAFWTbQNAWyVwgvy6eeOvKejJ8/rK2BroooeXHneadas7P95Ljwgupj6czFcxz0gbJwBWjQ3V74tAw/JHb/mYolL0uFs/kz0IDVx3WtYcFZGIqc7RKxy6v91p18eZvp/cDr85qPC+fPTrzMedGpxs+Niu9r5dZXlJyqaXIpjMwkmKkuyXLy2ctP1xJ3BZ4lQcOfxjieLxTgFMPuYquG7ubA3Lp/JL4YsffDLfvqntmUpN3T5JpcrCX29pZIOo88Gk/b8HYfvLbUalLdy0zijfUd1P/mXpFzyFV/8qpLnm1TU/OdXsqObHLg8/+8533Ycfz4P7/wE= -------------------------------------------------------------------------------- /assets/aws-analytics-system-build-steps.drawio: -------------------------------------------------------------------------------- 1 | 7Vxbk6I4FP41XbX70BYhQOCxtbtnt7ZnZ6bcra15sqJEYBqNA/HS++s3SCJCQGFGUNfuflBOIJdzvnzn5CR4BwezzYcIL/yP1CXhna65mzv4eKfrAGga/0gkb6nEsc1U4EWBK27KBMPgXyKE4jlvGbgkzt3IKA1ZsMgLJ3Q+JxOWk+Eoouv8bVMa5ltdYI8oguEEh6r0n8BlvhyX5WQFv5HA80XTto7SghmWN4uRxD526XpPBJ/u4CCilKXfZpsBCRPlSb2kzz1XlO46FpE5q/PA8PHr37OP3/ov3uTDZ0b/XA10/R6KYaxwuBQjFr1lb1IFXkSXC7U10YEViRjZlNkCj2UN2XA5TgidERa98fvkU47QkICIborrdaZwZIl++Xu61k2BJSyM7O3qzvTAvwhVNFCLoSlaIC7Hhbic0zn/6HO9zF2S1KPxKxoxn3p0jsMXShdcCLjwG2HsTaAaLxnlIp/NQlGqKvWgkZIu7ClaVWhEQsyCVR66ZcoRj36mAW84M4Sp5wwBAMxXwXDkESae2odasSLNKlbUK5gqpstoQpSqttbajegnDCjRmeHaJatgwqmkaFi6ZGEwJ4MdgSTG9CLsBtwqAxrSKLP4lGaiOx2ahtm3DC6PWURfSfHmIAz3braRAxyLy10c+zvUJNMn4HTzgsck/EzjgAV0zsvGlDE627vhIQy8pIAl2OpjcTXhfSRRHlVJJwXkgC6vxXiTJnG8SAc6DTZJP/qcmRZJ4WzjJSTew+vY6PHOL/mwcNKfkdQd7C8Si5HoacUbjrcNNsOwQhcqigVokKmwwM6b7LMAgtU4/zkSUKlwnwSy+f6USTvnhHQeHRqF2Q156MjIs7i01RHyeIgi/LZ32xZg8YF2tPJ2MkunNZ6WTuAVQKF1Cxc0jwoGrqD0o0gxQCdAQdrhXmmlvWoXVsYPR1/NSLZ5TAbzHryMjPUSMnbstsjYVFT1B/faccARoD1ihvnHkEUEz1QPz3XB8jPrqL8WIsXNFr3xLHDd7WRf+wEjwwXecvGau1GFAGracIeJ+o7SrmcbXWvLNtZxGJdGWYVwiv8/J80q0Rcvcwzz8VnfK3sMIl5RGizNE3JVIy7zAWp9UzE3L5lu/64mFotIyq2/T5L+9Pll+i1/12s6H0Yunw2jWMyFNnAnGRMouJNQzMVnrVECulT21M28LwEmUlRlaiW6ao8+7dPqaqf7H9fVPl3BTl1JjZzHO111S1dTrh6fxqQBX9kVCKyeldZ5+UqmtQ6FMM9SD9cdw9S3zYEYpowUWothTDVhdG6C3OXQdUU30LE6JExTVcUZs6HSUq1nQ/V8WhoaPU1zbIQsG0Bk20a+wrqrX1BY/cJCPRWr31MtOU3YsvMrT37qCAFgVbHWxbk1NctZ4erGy8krYaN1wPwRHX/jtTQIuc2qLZRKLjDUZbhcmXfjwoybYgKZCvrBRJfMGlZU0/ZMN2/KVhX0KnMTjWm6wP6G1eMEpjmOhngRgqhbW1q3aEtdbvr+5DyE4HA9bRtPzVMM1f2Eqwrwm3suq5BPVuP9bsP9E+dDKhXSPNxHKKcprVfi88tSR6i4xX46Zb0nTC4hYYKZT+a4wSStvwo/V0rEUlMiD+kor5seG+emQD7aQKhk1ltdEqR14nzI6QgSAtV3lBNiW6rR3+nwAujw+zKYvMapvetOTKt+3HI2SlQTNF+SkQ63bV81LTbQvih1Cqx4Zk682OMaulXmMAzH6JVkh0DxZNbpFHRRKQdprraXqQbYxejNFpjqgtc5VlPLx2atGic5bsCiwLFPZNGjFbVt0BOfkdhR4Gn2/bUSenKcttipxpq/ha2PR80cAPT/2/oI8Wzs4tF0ORdxae1ZXLU1euA0Ys0kSHtRmZoCedmOn8uepQauOzirbxUZjjmKTUondGvxGFKzB+cmN/mUXXTk6lm5bg89oRrr+fdFa+uLVh6XxLx7McHRxB/FJEreq6k/SXeIr3/oCZ6XOJGaLPm0IPPhVgFcPhQquGrubG6Wez3HDmc//oRqHAs5D5UawDpKpaXpv7bCSFQjC/DOpO3vhvD15VtCpQ2mKawA5eVk/1DZay5jfO0bIg00X575U0PNtjJ/i2i4ZF9fbLe/CmafptHX1RdyXxY8WSETEOclUyxewrS+L5MX+vt/YZ/OcHadM54UJg/fx9sJ88BvAPZis/+E5SWfvwD8q2yNdz5tMC26blA0eCVYlkqjF5Ip+znPLmGhxjZdwmL8Dou0FGp5WABdDac6xYVK4l3iwr16XDTeJ0KFF66gfWZmKMujd4cAcnsIsB1YQIC6TOgUAWWJ9+4QMLk9BFggf8QQlgSNnSKgbLuhMwTo1x8dNH87wgE5BHDJeREAyhLU3UHg+tcNzUnAvjQInHXpqN+gH7AdvQABNQV+Igjwy+z3+9It++xXEOHTfw== -------------------------------------------------------------------------------- /assets/aws-athena-create-database.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/aws-athena-create-database.png -------------------------------------------------------------------------------- /assets/aws-athena-ctas-lambda-add-trigger.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/aws-athena-ctas-lambda-add-trigger.png -------------------------------------------------------------------------------- /assets/aws-athena-ctas-lambda-create-function.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/aws-athena-ctas-lambda-create-function.png -------------------------------------------------------------------------------- /assets/aws-athena-ctas-lambda-execution-iam-role.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/aws-athena-ctas-lambda-execution-iam-role.png -------------------------------------------------------------------------------- /assets/aws-athena-ctas-lambda-iam-role-policies.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/aws-athena-ctas-lambda-iam-role-policies.png -------------------------------------------------------------------------------- /assets/aws-athena-setup-query-results-location-01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/aws-athena-setup-query-results-location-01.png -------------------------------------------------------------------------------- /assets/aws-athena-setup-query-results-location-02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/aws-athena-setup-query-results-location-02.png -------------------------------------------------------------------------------- /assets/aws-ec2-choose-ami.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/aws-ec2-choose-ami.png -------------------------------------------------------------------------------- /assets/aws-ec2-choose-instance-type.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/aws-ec2-choose-instance-type.png -------------------------------------------------------------------------------- /assets/aws-ec2-configure-instance-details.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/aws-ec2-configure-instance-details.png -------------------------------------------------------------------------------- /assets/aws-ec2-configure-security-group.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/aws-ec2-configure-security-group.png -------------------------------------------------------------------------------- /assets/aws-ec2-connect.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/aws-ec2-connect.png -------------------------------------------------------------------------------- /assets/aws-ec2-launch-instance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/aws-ec2-launch-instance.png -------------------------------------------------------------------------------- /assets/aws-ec2-security-group-for-bastion.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/aws-ec2-security-group-for-bastion.png -------------------------------------------------------------------------------- /assets/aws-ec2-security-group-for-es-client.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/aws-ec2-security-group-for-es-client.png -------------------------------------------------------------------------------- /assets/aws-ec2-security-group-for-es-cluster.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/aws-ec2-security-group-for-es-cluster.png -------------------------------------------------------------------------------- /assets/aws-ec2-select-keypair.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/aws-ec2-select-keypair.png -------------------------------------------------------------------------------- /assets/aws-ec2-ssm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/aws-ec2-ssm.png -------------------------------------------------------------------------------- /assets/aws-ec2instance-modify-iam-role.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/aws-ec2instance-modify-iam-role.png -------------------------------------------------------------------------------- /assets/aws-kinesis-firehose-create_new_iam_role.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/aws-kinesis-firehose-create_new_iam_role.png -------------------------------------------------------------------------------- /assets/aws-lambda-add-layer-to-function-layer-version-arn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/aws-lambda-add-layer-to-function-layer-version-arn.png -------------------------------------------------------------------------------- /assets/aws-lambda-add-layer-to-function.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/aws-lambda-add-layer-to-function.png -------------------------------------------------------------------------------- /assets/aws-lambda-create-function.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/aws-lambda-create-function.png -------------------------------------------------------------------------------- /assets/aws-lambda-create-layer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/aws-lambda-create-layer.png -------------------------------------------------------------------------------- /assets/aws-lambda-execution-iam-role.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/aws-lambda-execution-iam-role.png -------------------------------------------------------------------------------- /assets/aws-lambda-iam-role-policies.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/aws-lambda-iam-role-policies.png -------------------------------------------------------------------------------- /assets/aws-lambda-kinesis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/aws-lambda-kinesis.png -------------------------------------------------------------------------------- /assets/aws-quicksight-access-s3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/aws-quicksight-access-s3.png -------------------------------------------------------------------------------- /assets/aws-quicksight-athena-choose_your_table.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/aws-quicksight-athena-choose_your_table.png -------------------------------------------------------------------------------- /assets/aws-quicksight-athena_data_source.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/aws-quicksight-athena_data_source.png -------------------------------------------------------------------------------- /assets/aws-quicksight-bar-chart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/aws-quicksight-bar-chart.png -------------------------------------------------------------------------------- /assets/aws-quicksight-choose-s3-bucket.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/aws-quicksight-choose-s3-bucket.png -------------------------------------------------------------------------------- /assets/aws-quicksight-finish-dataset-creation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/aws-quicksight-finish-dataset-creation.png -------------------------------------------------------------------------------- /assets/aws-quicksight-new_data_sets.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/aws-quicksight-new_data_sets.png -------------------------------------------------------------------------------- /assets/aws-quicksight-quarterly-graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/aws-quicksight-quarterly-graph.png -------------------------------------------------------------------------------- /assets/aws-quicksight-share-analysis-users.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/aws-quicksight-share-analysis-users.png -------------------------------------------------------------------------------- /assets/aws-quicksight-share-analysis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/aws-quicksight-share-analysis.png -------------------------------------------------------------------------------- /assets/aws-quicksight-user-email-click-to-view.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/aws-quicksight-user-email-click-to-view.png -------------------------------------------------------------------------------- /assets/aws-quicksight-user-email.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/aws-quicksight-user-email.png -------------------------------------------------------------------------------- /assets/aws-quicksight-user-invitation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/aws-quicksight-user-invitation.png -------------------------------------------------------------------------------- /assets/aws_athena_select_all_limit_10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/aws_athena_select_all_limit_10.png -------------------------------------------------------------------------------- /assets/cfn-stacks-create-stack.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/cfn-stacks-create-stack.png -------------------------------------------------------------------------------- /assets/cfn-step1-create-stack.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/cfn-step1-create-stack.png -------------------------------------------------------------------------------- /assets/iam-user-download.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/iam-user-download.png -------------------------------------------------------------------------------- /assets/iam-user-policy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/iam-user-policy.png -------------------------------------------------------------------------------- /assets/iam-user.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/iam-user.png -------------------------------------------------------------------------------- /assets/kfh_create_new_iam_role.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/kfh_create_new_iam_role.png -------------------------------------------------------------------------------- /assets/kibana-01-add_data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/kibana-01-add_data.png -------------------------------------------------------------------------------- /assets/kibana-02a-create-index-pattern.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/kibana-02a-create-index-pattern.png -------------------------------------------------------------------------------- /assets/kibana-02b-create-index-pattern-configure-settings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/kibana-02b-create-index-pattern-configure-settings.png -------------------------------------------------------------------------------- /assets/kibana-02c-create-index-pattern-review.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/kibana-02c-create-index-pattern-review.png -------------------------------------------------------------------------------- /assets/kibana-02d-management-advanced-setting.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/kibana-02d-management-advanced-setting.png -------------------------------------------------------------------------------- /assets/kibana-03-discover.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/kibana-03-discover.png -------------------------------------------------------------------------------- /assets/kibana-04-discover-visualize.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/kibana-04-discover-visualize.png -------------------------------------------------------------------------------- /assets/kibana-05-discover-change-metrics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/kibana-05-discover-change-metrics.png -------------------------------------------------------------------------------- /assets/kibana-08-visualize-save.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/kibana-08-visualize-save.png -------------------------------------------------------------------------------- /assets/kibana-09-dashboards.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/kibana-09-dashboards.png -------------------------------------------------------------------------------- /assets/kibana-10-import-visualization.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/kibana-10-import-visualization.png -------------------------------------------------------------------------------- /assets/kibana-12-discover-save-dashboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/kibana-12-discover-save-dashboard.png -------------------------------------------------------------------------------- /assets/kibana-13-complete.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/kibana-13-complete.png -------------------------------------------------------------------------------- /assets/ops-create-firehose_role.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/ops-create-firehose_role.png -------------------------------------------------------------------------------- /assets/ops-create-index-pattern-configure-setting.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/ops-create-index-pattern-configure-setting.png -------------------------------------------------------------------------------- /assets/ops-create-index-pattern.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/ops-create-index-pattern.png -------------------------------------------------------------------------------- /assets/ops-dashboards-sidebar-menu-security.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/ops-dashboards-sidebar-menu-security.png -------------------------------------------------------------------------------- /assets/ops-dashboards-sidebar-menu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/ops-dashboards-sidebar-menu.png -------------------------------------------------------------------------------- /assets/ops-entries-for-firehose_role.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/ops-entries-for-firehose_role.png -------------------------------------------------------------------------------- /assets/ops-management-advanced-setting.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/ops-management-advanced-setting.png -------------------------------------------------------------------------------- /assets/ops-role-mappings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-analytics-immersion-day/7d25b4d4a83bc17a2c2f34a750e2086b1f575e68/assets/ops-role-mappings.png -------------------------------------------------------------------------------- /aws-analytics-system-arch.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 |
devices
devices
Kinesis Data Streams
Kinesis Data Streams
Kinesis Data Firehose
Kinesis Data Firehose
S3
S3
Athena
Athena
QuickSight
QuickSight
Lambda Function
Lambda Function
OpenSearch Service
OpenSearch Service
Kibana
Kibana
Text is not SVG - cannot display
-------------------------------------------------------------------------------- /cdk.context.json: -------------------------------------------------------------------------------- 1 | { 2 | "merge_small_files_lambda_env": { 3 | "OLD_DATABASE": "mydatabase", 4 | "OLD_TABLE_NAME": "retail_trans_json", 5 | "NEW_DATABASE": "mydatabase", 6 | "NEW_TABLE_NAME": "ctas_retail_trans_parquet", 7 | "NEW_TABLE_S3_FOLDER_NAME": "parquet-retail-trans", 8 | "COLUMN_NAMES": "invoice,stockcode,description,quantity,invoicedate,price,customer_id,country" 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /cdk.json: -------------------------------------------------------------------------------- 1 | { 2 | "app": "python3 app.py", 3 | "watch": { 4 | "include": [ 5 | "**" 6 | ], 7 | "exclude": [ 8 | "README.md", 9 | "cdk*.json", 10 | "requirements*.txt", 11 | "source.bat", 12 | "**/__init__.py", 13 | "python/__pycache__", 14 | "tests" 15 | ] 16 | }, 17 | "context": { 18 | "@aws-cdk/customresources:installLatestAwsSdkDefault": false, 19 | "@aws-cdk/aws-apigateway:usagePlanKeyOrderInsensitiveId": true, 20 | "@aws-cdk/core:stackRelativeExports": true, 21 | "@aws-cdk/aws-rds:lowercaseDbIdentifier": true, 22 | "@aws-cdk/aws-lambda:recognizeVersionProps": true, 23 | "@aws-cdk/aws-lambda:recognizeLayerVersion": true, 24 | "@aws-cdk/aws-cloudfront:defaultSecurityPolicyTLSv1.2_2021": true, 25 | "@aws-cdk-containers/ecs-service-extensions:enableDefaultLogDriver": true, 26 | "@aws-cdk/aws-ec2:uniqueImdsv2TemplateName": true, 27 | "@aws-cdk/core:checkSecretUsage": false, 28 | "@aws-cdk/aws-iam:minimizePolicies": true, 29 | "@aws-cdk/aws-ecs:arnFormatIncludesClusterName": true, 30 | "@aws-cdk/core:validateSnapshotRemovalPolicy": true, 31 | "@aws-cdk/aws-codepipeline:crossAccountKeyAliasStackSafeResourceName": true, 32 | "@aws-cdk/aws-s3:createDefaultLoggingPolicy": true, 33 | "@aws-cdk/aws-sns-subscriptions:restrictSqsDescryption": true, 34 | "@aws-cdk/aws-apigateway:disableCloudWatchRole": true, 35 | "@aws-cdk/core:enablePartitionLiterals": true, 36 | "@aws-cdk/core:target-partitions": [ 37 | "aws", 38 | "aws-cn" 39 | ] 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /data_analytics_system/__init__.py: -------------------------------------------------------------------------------- 1 | from .vpc import VpcStack 2 | from .bastion_host import BastionHostStack 3 | from .kds import KinesisDataStreamStack 4 | from .elasticsearch import ElasticSearchStack 5 | from .ops import OpenSearchStack 6 | from .firehose import KinesisFirehoseStack 7 | from .upsert_to_es_lambda import UpsertToESStack 8 | from .merge_small_files_lambda import MergeSmallFilesLambdaStack 9 | from .glue_catalog_database import GlueCatalogDatabaseStack 10 | from .lake_formation import DataLakePermissionsStack 11 | -------------------------------------------------------------------------------- /data_analytics_system/bastion_host.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- encoding: utf-8 -*- 3 | # vim: tabstop=2 shiftwidth=2 softtabstop=2 expandtab 4 | 5 | import aws_cdk as cdk 6 | 7 | from aws_cdk import ( 8 | Stack, 9 | aws_ec2, 10 | aws_iam 11 | ) 12 | from constructs import Construct 13 | 14 | class BastionHostStack(Stack): 15 | 16 | def __init__(self, scope: Construct, construct_id: str, vpc, **kwargs) -> None: 17 | super().__init__(scope, construct_id, **kwargs) 18 | 19 | sg_bastion_host = aws_ec2.SecurityGroup(self, "BastionHostSG", 20 | vpc=vpc, 21 | allow_all_outbound=True, 22 | description='security group for an bastion host', 23 | security_group_name='bastion-host-sg' 24 | ) 25 | cdk.Tags.of(sg_bastion_host).add('Name', 'bastion-host-sg') 26 | 27 | #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceClass.html 28 | #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceSize.html#aws_cdk.aws_ec2.InstanceSize 29 | ec2_instance_type = aws_ec2.InstanceType.of(aws_ec2.InstanceClass.BURSTABLE3, aws_ec2.InstanceSize.MEDIUM) 30 | 31 | #XXX: As there are no SSH public keys deployed on this machine, 32 | # you need to use EC2 Instance Connect with the command 33 | # 'aws ec2-instance-connect send-ssh-public-key' to provide your SSH public key. 34 | # https://aws.amazon.com/de/blogs/compute/new-using-amazon-ec2-instance-connect-for-ssh-access-to-your-ec2-instances/ 35 | bastion_host = aws_ec2.BastionHostLinux(self, "BastionHost", 36 | vpc=vpc, 37 | instance_type=ec2_instance_type, 38 | subnet_selection=aws_ec2.SubnetSelection(subnet_type=aws_ec2.SubnetType.PUBLIC), 39 | security_group=sg_bastion_host 40 | ) 41 | 42 | #TODO: SHOULD restrict IP range allowed to ssh acces 43 | bastion_host.allow_ssh_access_from(aws_ec2.Peer.ipv4("0.0.0.0/0")) 44 | 45 | #XXX: In order to test data pipeline, add {Kinesis, KinesisFirehose}FullAccess Policy to the bastion host. 46 | bastion_host.role.add_to_policy(aws_iam.PolicyStatement( 47 | effect=aws_iam.Effect.ALLOW, 48 | resources=["*"], 49 | actions=["kinesis:*"])) 50 | bastion_host.role.add_to_policy(aws_iam.PolicyStatement( 51 | effect=aws_iam.Effect.ALLOW, 52 | resources=["*"], 53 | actions=["firehose:*"])) 54 | 55 | self.sg_bastion_host = sg_bastion_host 56 | 57 | cdk.CfnOutput(self, 'BastionHostId', value=bastion_host.instance_id, export_name='BastionHostId') 58 | cdk.CfnOutput(self, 'BastionHostPublicDNSName', value=bastion_host.instance_public_dns_name, export_name='BastionHostPublicDNSName') 59 | 60 | -------------------------------------------------------------------------------- /data_analytics_system/elasticsearch.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- encoding: utf-8 -*- 3 | # vim: tabstop=2 shiftwidth=2 softtabstop=2 expandtab 4 | 5 | import aws_cdk as cdk 6 | 7 | from aws_cdk import ( 8 | Stack, 9 | aws_ec2, 10 | aws_elasticsearch 11 | ) 12 | from constructs import Construct 13 | 14 | class ElasticSearchStack(Stack): 15 | 16 | def __init__(self, scope: Construct, construct_id: str, vpc, sg_bastion_host, **kwargs) -> None: 17 | super().__init__(scope, construct_id, **kwargs) 18 | 19 | sg_use_es = aws_ec2.SecurityGroup(self, "ElasticSearchClientSG", 20 | vpc=vpc, 21 | allow_all_outbound=True, 22 | description='security group for an elasticsearch client', 23 | security_group_name='use-es-cluster-sg' 24 | ) 25 | cdk.Tags.of(sg_use_es).add('Name', 'use-es-cluster-sg') 26 | 27 | sg_es = aws_ec2.SecurityGroup(self, "ElasticSearchSG", 28 | vpc=vpc, 29 | allow_all_outbound=True, 30 | description='security group for an elasticsearch cluster', 31 | security_group_name='es-cluster-sg' 32 | ) 33 | cdk.Tags.of(sg_es).add('Name', 'es-cluster-sg') 34 | 35 | sg_es.add_ingress_rule(peer=sg_es, connection=aws_ec2.Port.all_tcp(), description='es-cluster-sg') 36 | sg_es.add_ingress_rule(peer=sg_use_es, connection=aws_ec2.Port.all_tcp(), description='use-es-cluster-sg') 37 | sg_es.add_ingress_rule(peer=sg_bastion_host, connection=aws_ec2.Port.all_tcp(), description='bastion-host-sg') 38 | 39 | #XXX: aws cdk elastsearch example - https://github.com/aws/aws-cdk/issues/2873 40 | es_domain_name = 'retail' 41 | es_cfn_domain = aws_elasticsearch.CfnDomain(self, "ElasticSearch", 42 | #XXX: Amazon OpenSearch Service - Current generation instance types 43 | # https://docs.aws.amazon.com/opensearch-service/latest/developerguide/supported-instance-types.html#latest-gen 44 | elasticsearch_cluster_config={ 45 | "dedicatedMasterCount": 3, 46 | "dedicatedMasterEnabled": True, 47 | "dedicatedMasterType": "t3.medium.elasticsearch", 48 | "instanceCount": 3, 49 | "instanceType": "t3.medium.elasticsearch", 50 | "zoneAwarenessConfig": { 51 | #XXX: az_count must be equal to vpc subnets count. 52 | "availabilityZoneCount": 3, 53 | }, 54 | "zoneAwarenessEnabled": True 55 | }, 56 | ebs_options={ 57 | "ebsEnabled": True, 58 | "volumeSize": 10, 59 | "volumeType": "gp3" 60 | }, 61 | domain_name=es_domain_name, 62 | #XXX: Supported versions of OpenSearch and Elasticsearch 63 | # https://docs.aws.amazon.com/opensearch-service/latest/developerguide/what-is.html#choosing-version 64 | elasticsearch_version="7.10", 65 | encryption_at_rest_options={ 66 | "enabled": False 67 | }, 68 | access_policies={ 69 | "Version": "2012-10-17", 70 | "Statement": [{ 71 | "Effect": "Allow", 72 | "Principal": { 73 | "AWS": "*" 74 | }, 75 | "Action": [ 76 | "es:Describe*", 77 | "es:List*", 78 | "es:Get*", 79 | "es:ESHttp*" 80 | ], 81 | "Resource": self.format_arn(service="es", resource="domain", resource_name="{}/*".format(es_domain_name)) 82 | } 83 | ] 84 | }, 85 | #XXX: For domains running OpenSearch or Elasticsearch 5.3 and later, OpenSearch Service takes hourly automated snapshots 86 | # Only applies for Elasticsearch versions below 5.3 87 | # snapshot_options={ 88 | # "automatedSnapshotStartHour": 17 89 | # }, 90 | vpc_options={ 91 | "securityGroupIds": [sg_es.security_group_id], 92 | #XXX: az_count must be equal to vpc subnets count. 93 | "subnetIds": vpc.select_subnets(subnet_type=aws_ec2.SubnetType.PRIVATE_WITH_EGRESS).subnet_ids 94 | } 95 | ) 96 | cdk.Tags.of(es_cfn_domain).add('Name', 'analytics-workshop-es') 97 | 98 | self.sg_search_client = sg_use_es 99 | self.search_domain_endpoint = es_cfn_domain.attr_domain_endpoint 100 | 101 | cdk.CfnOutput(self, 'ESDomainEndpoint', value=self.search_domain_endpoint, export_name='ESDomainEndpoint') 102 | cdk.CfnOutput(self, 'ESDashboardsURL', value=f"{self.search_domain_endpoint}/_dashboards/", export_name='ESDashboardsURL') 103 | -------------------------------------------------------------------------------- /data_analytics_system/firehose.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- encoding: utf-8 -*- 3 | # vim: tabstop=2 shiftwidth=2 softtabstop=2 expandtab 4 | 5 | import aws_cdk as cdk 6 | 7 | from aws_cdk import ( 8 | Stack, 9 | aws_iam, 10 | aws_s3 as s3, 11 | aws_kinesisfirehose 12 | ) 13 | from constructs import Construct 14 | 15 | class KinesisFirehoseStack(Stack): 16 | 17 | def __init__(self, scope: Construct, construct_id: str, trans_kinesis_stream, **kwargs) -> None: 18 | super().__init__(scope, construct_id, **kwargs) 19 | 20 | s3_bucket = s3.Bucket(self, "s3bucket", 21 | bucket_name="aws-analytics-immersion-day-{region}-{account}".format( 22 | region=cdk.Aws.REGION, account=cdk.Aws.ACCOUNT_ID)) 23 | 24 | firehose_role_policy_doc = aws_iam.PolicyDocument() 25 | firehose_role_policy_doc.add_statements(aws_iam.PolicyStatement(**{ 26 | "effect": aws_iam.Effect.ALLOW, 27 | "resources": [s3_bucket.bucket_arn, "{}/*".format(s3_bucket.bucket_arn)], 28 | "actions": ["s3:AbortMultipartUpload", 29 | "s3:GetBucketLocation", 30 | "s3:GetObject", 31 | "s3:ListBucket", 32 | "s3:ListBucketMultipartUploads", 33 | "s3:PutObject"] 34 | })) 35 | 36 | firehose_role_policy_doc.add_statements(aws_iam.PolicyStatement( 37 | effect=aws_iam.Effect.ALLOW, 38 | resources=["*"], 39 | actions=["glue:GetTable", 40 | "glue:GetTableVersion", 41 | "glue:GetTableVersions"] 42 | )) 43 | 44 | firehose_role_policy_doc.add_statements(aws_iam.PolicyStatement( 45 | effect=aws_iam.Effect.ALLOW, 46 | resources=[trans_kinesis_stream.stream_arn], 47 | # resources=[kinesis_stream_arn], 48 | actions=["kinesis:DescribeStream", 49 | "kinesis:GetShardIterator", 50 | "kinesis:GetRecords"] 51 | )) 52 | 53 | firehose_log_group_name = "/aws/kinesisfirehose/retail-trans" 54 | firehose_role_policy_doc.add_statements(aws_iam.PolicyStatement( 55 | effect=aws_iam.Effect.ALLOW, 56 | #XXX: The ARN will be formatted as follows: 57 | # arn:{partition}:{service}:{region}:{account}:{resource}{sep}}{resource-name} 58 | resources=[self.format_arn(service="logs", resource="log-group", 59 | resource_name="{}:log-stream:*".format(firehose_log_group_name), 60 | arn_format=cdk.ArnFormat.COLON_RESOURCE_NAME)], 61 | actions=["logs:PutLogEvents"] 62 | )) 63 | 64 | firehose_role = aws_iam.Role(self, "FirehoseDeliveryRole", 65 | role_name="FirehoseDeliveryRole", 66 | assumed_by=aws_iam.ServicePrincipal("firehose.amazonaws.com"), 67 | #XXX: use inline_policies to work around https://github.com/aws/aws-cdk/issues/5221 68 | inline_policies={ 69 | "firehose_role_policy": firehose_role_policy_doc 70 | } 71 | ) 72 | 73 | trans_to_s3_delivery_stream = aws_kinesisfirehose.CfnDeliveryStream(self, "KinesisFirehoseToS3", 74 | delivery_stream_name="retail-trans", 75 | delivery_stream_type="KinesisStreamAsSource", 76 | kinesis_stream_source_configuration={ 77 | "kinesisStreamArn": trans_kinesis_stream.stream_arn, 78 | # "kinesisStreamArn": kinesis_stream_arn, 79 | "roleArn": firehose_role.role_arn 80 | }, 81 | extended_s3_destination_configuration={ 82 | "bucketArn": s3_bucket.bucket_arn, 83 | "bufferingHints": { 84 | "intervalInSeconds": 60, 85 | "sizeInMBs": 1 86 | }, 87 | "cloudWatchLoggingOptions": { 88 | "enabled": True, 89 | "logGroupName": firehose_log_group_name, 90 | "logStreamName": "S3Delivery" 91 | }, 92 | "compressionFormat": "UNCOMPRESSED", # [GZIP | HADOOP_SNAPPY | Snappy | UNCOMPRESSED | ZIP] 93 | "prefix": "json-data/year=!{timestamp:yyyy}/month=!{timestamp:MM}/day=!{timestamp:dd}/hour=!{timestamp:HH}/", 94 | "errorOutputPrefix": "error-json/year=!{timestamp:yyyy}/month=!{timestamp:MM}/day=!{timestamp:dd}/hour=!{timestamp:HH}/!{firehose:error-output-type}", 95 | "roleArn": firehose_role.role_arn 96 | } 97 | ) 98 | 99 | self.s3_bucket_name = s3_bucket.bucket_name 100 | 101 | cdk.CfnOutput(self, '{}_S3DestBucket'.format(self.stack_name), value=s3_bucket.bucket_name, export_name='S3DestBucket') 102 | cdk.CfnOutput(self, 'FirehoseRoleArn', value=firehose_role.role_arn, export_name='FirehoseRoleArn') 103 | 104 | -------------------------------------------------------------------------------- /data_analytics_system/glue_catalog_database.py: -------------------------------------------------------------------------------- 1 | import aws_cdk as cdk 2 | 3 | from aws_cdk import ( 4 | Stack, 5 | aws_glue 6 | ) 7 | from constructs import Construct 8 | 9 | 10 | class GlueCatalogDatabaseStack(Stack): 11 | 12 | def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: 13 | super().__init__(scope, construct_id, **kwargs) 14 | 15 | athena_database_info = self.node.try_get_context('merge_small_files_lambda_env') 16 | old_database_name = athena_database_info['OLD_DATABASE'] 17 | new_database_name = athena_database_info['NEW_DATABASE'] 18 | 19 | for idx, database_name in enumerate(list(set([old_database_name, new_database_name]))): 20 | cfn_database = aws_glue.CfnDatabase(self, f"GlueCfnDatabase{idx}", 21 | catalog_id=cdk.Aws.ACCOUNT_ID, 22 | database_input=aws_glue.CfnDatabase.DatabaseInputProperty( 23 | name=database_name 24 | ) 25 | ) 26 | cfn_database.apply_removal_policy(cdk.RemovalPolicy.DESTROY) 27 | 28 | cdk.CfnOutput(self, f'{self.stack_name}_GlueDatabaseName{idx}', 29 | value=cfn_database.database_input.name) 30 | -------------------------------------------------------------------------------- /data_analytics_system/kds.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- encoding: utf-8 -*- 3 | # vim: tabstop=2 shiftwidth=2 softtabstop=2 expandtab 4 | 5 | import aws_cdk as cdk 6 | 7 | from aws_cdk import ( 8 | Stack, 9 | aws_kinesis as kinesis 10 | ) 11 | from constructs import Construct 12 | 13 | class KinesisDataStreamStack(Stack): 14 | 15 | def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: 16 | super().__init__(scope, construct_id, **kwargs) 17 | 18 | trans_kinesis_stream = kinesis.Stream(self, "AnalyticsWorkshopKinesisStreams", 19 | # specify the ON-DEMAND capacity mode. 20 | # default: StreamMode.PROVISIONED 21 | stream_mode=kinesis.StreamMode.ON_DEMAND, 22 | stream_name='retail-trans') 23 | 24 | self.kinesis_stream_name = trans_kinesis_stream.stream_name 25 | self.kinesis_stream_arn = trans_kinesis_stream.stream_arn 26 | self.kinesis_stream = trans_kinesis_stream 27 | 28 | cdk.CfnOutput(self, '{}_KinesisStreamName'.format(self.stack_name), value=self.kinesis_stream.stream_name, 29 | export_name='KinesisStreamName') 30 | cdk.CfnOutput(self, '{}_KinesisStreamArn'.format(self.stack_name), value=self.kinesis_stream.stream_arn, 31 | export_name='KinesisStreamArn') 32 | 33 | -------------------------------------------------------------------------------- /data_analytics_system/lake_formation.py: -------------------------------------------------------------------------------- 1 | import aws_cdk as cdk 2 | 3 | from aws_cdk import ( 4 | Stack, 5 | aws_lakeformation 6 | ) 7 | from constructs import Construct 8 | 9 | class DataLakePermissionsStack(Stack): 10 | 11 | def __init__(self, scope: Construct, construct_id: str, job_role, **kwargs) -> None: 12 | super().__init__(scope, construct_id, **kwargs) 13 | 14 | athena_database_info = self.node.try_get_context('merge_small_files_lambda_env') 15 | old_database_name = athena_database_info['OLD_DATABASE'] 16 | new_database_name = athena_database_info['NEW_DATABASE'] 17 | 18 | database_list = list(set([old_database_name, new_database_name])) 19 | 20 | #XXXX: The role assumed by cdk is not a data lake administrator. 21 | # So, deploying PrincipalPermissions meets the error such as: 22 | # "Resource does not exist or requester is not authorized to access requested permissions." 23 | # In order to solve the error, it is necessary to promote the cdk execution role to the data lake administrator. 24 | # For example, https://github.com/aws-samples/data-lake-as-code/blob/mainline/lib/stacks/datalake-stack.ts#L68 25 | cfn_data_lake_settings = aws_lakeformation.CfnDataLakeSettings(self, "CfnDataLakeSettings", 26 | admins=[aws_lakeformation.CfnDataLakeSettings.DataLakePrincipalProperty( 27 | data_lake_principal_identifier=cdk.Fn.sub(self.synthesizer.cloud_formation_execution_role_arn) 28 | )] 29 | ) 30 | 31 | for idx, database_name in enumerate(database_list): 32 | lf_permissions_on_database = aws_lakeformation.CfnPrincipalPermissions(self, f"LFPermissionsOnDatabase{idx}", 33 | permissions=["CREATE_TABLE", "DROP", "ALTER", "DESCRIBE"], 34 | permissions_with_grant_option=[], 35 | principal=aws_lakeformation.CfnPrincipalPermissions.DataLakePrincipalProperty( 36 | data_lake_principal_identifier=job_role.role_arn 37 | ), 38 | resource=aws_lakeformation.CfnPrincipalPermissions.ResourceProperty( 39 | database=aws_lakeformation.CfnPrincipalPermissions.DatabaseResourceProperty( 40 | catalog_id=cdk.Aws.ACCOUNT_ID, 41 | name=database_name 42 | ) 43 | ) 44 | ) 45 | lf_permissions_on_database.apply_removal_policy(cdk.RemovalPolicy.DESTROY) 46 | 47 | #XXX: In order to keep resource destruction order, 48 | # set dependency between CfnDataLakeSettings and CfnPrincipalPermissions 49 | lf_permissions_on_database.add_dependency(cfn_data_lake_settings) 50 | 51 | for idx, database_name in enumerate(database_list): 52 | lf_permissions_on_table = aws_lakeformation.CfnPrincipalPermissions(self, f"LFPermissionsOnTable{idx}", 53 | permissions=["SELECT", "INSERT", "DELETE", "DESCRIBE", "ALTER"], 54 | permissions_with_grant_option=[], 55 | principal=aws_lakeformation.CfnPrincipalPermissions.DataLakePrincipalProperty( 56 | data_lake_principal_identifier=job_role.role_arn 57 | ), 58 | resource=aws_lakeformation.CfnPrincipalPermissions.ResourceProperty( 59 | #XXX: Can't specify a TableWithColumns resource and a Table resource 60 | table=aws_lakeformation.CfnPrincipalPermissions.TableResourceProperty( 61 | catalog_id=cdk.Aws.ACCOUNT_ID, 62 | database_name=database_name, 63 | table_wildcard={} 64 | ) 65 | ) 66 | ) 67 | lf_permissions_on_table.apply_removal_policy(cdk.RemovalPolicy.DESTROY) 68 | lf_permissions_on_table.add_dependency(cfn_data_lake_settings) 69 | -------------------------------------------------------------------------------- /data_analytics_system/merge_small_files_lambda.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- encoding: utf-8 -*- 3 | # vim: tabstop=2 shiftwidth=2 softtabstop=2 expandtab 4 | 5 | import os 6 | 7 | import aws_cdk as cdk 8 | 9 | from aws_cdk import ( 10 | Stack, 11 | aws_iam, 12 | aws_lambda as _lambda, 13 | aws_logs, 14 | aws_events, 15 | aws_events_targets 16 | ) 17 | from constructs import Construct 18 | 19 | class MergeSmallFilesLambdaStack(Stack): 20 | 21 | def __init__(self, scope: Construct, construct_id: str, s3_bucket_name, **kwargs) -> None: 22 | super().__init__(scope, construct_id, **kwargs) 23 | 24 | _lambda_env = self.node.try_get_context('merge_small_files_lambda_env') 25 | 26 | LAMBDA_ENV_VARS = [ 27 | 'OLD_DATABASE', 28 | 'OLD_TABLE_NAME', 29 | 'NEW_DATABASE', 30 | 'NEW_TABLE_NAME', 31 | 'WORK_GROUP', 32 | 'COLUMN_NAMES' 33 | ] 34 | 35 | lambda_fn_env = {k: v for k, v in _lambda_env.items() if k in LAMBDA_ENV_VARS} 36 | additional_lambda_fn_env = { 37 | 'WORK_GROUP': 'primary', 38 | 'OLD_TABLE_LOCATION_PREFIX': 's3://{}'.format(os.path.join(s3_bucket_name, 'json-data')), 39 | 'OUTPUT_PREFIX': 's3://{}'.format(os.path.join(s3_bucket_name, _lambda_env['NEW_TABLE_S3_FOLDER_NAME'])), 40 | 'STAGING_OUTPUT_PREFIX': 's3://{}'.format(os.path.join(s3_bucket_name, 'tmp')), 41 | 'REGION_NAME': cdk.Aws.REGION 42 | } 43 | lambda_fn_env.update(additional_lambda_fn_env) 44 | 45 | merge_small_files_lambda_fn = _lambda.Function(self, "MergeSmallFiles", 46 | runtime=_lambda.Runtime.PYTHON_3_11, 47 | function_name="MergeSmallFiles", 48 | handler="athena_ctas.lambda_handler", 49 | description="Merge small files in S3", 50 | code=_lambda.Code.from_asset("./src/main/python/MergeSmallFiles"), 51 | environment=lambda_fn_env, 52 | timeout=cdk.Duration.minutes(5) 53 | ) 54 | 55 | merge_small_files_lambda_fn.add_to_role_policy(aws_iam.PolicyStatement( 56 | effect=aws_iam.Effect.ALLOW, 57 | resources=["*"], 58 | actions=["athena:*"])) 59 | merge_small_files_lambda_fn.add_to_role_policy(aws_iam.PolicyStatement( 60 | effect=aws_iam.Effect.ALLOW, 61 | resources=["*"], 62 | actions=["s3:Get*", 63 | "s3:List*", 64 | "s3:AbortMultipartUpload", 65 | "s3:PutObject", 66 | ])) 67 | merge_small_files_lambda_fn.add_to_role_policy(aws_iam.PolicyStatement( 68 | effect=aws_iam.Effect.ALLOW, 69 | resources=["*"], 70 | actions=["glue:CreateDatabase", 71 | "glue:DeleteDatabase", 72 | "glue:GetDatabase", 73 | "glue:GetDatabases", 74 | "glue:UpdateDatabase", 75 | "glue:CreateTable", 76 | "glue:DeleteTable", 77 | "glue:BatchDeleteTable", 78 | "glue:UpdateTable", 79 | "glue:GetTable", 80 | "glue:GetTables", 81 | "glue:BatchCreatePartition", 82 | "glue:CreatePartition", 83 | "glue:DeletePartition", 84 | "glue:BatchDeletePartition", 85 | "glue:UpdatePartition", 86 | "glue:GetPartition", 87 | "glue:GetPartitions", 88 | "glue:BatchGetPartition" 89 | ])) 90 | merge_small_files_lambda_fn.add_to_role_policy(aws_iam.PolicyStatement( 91 | effect=aws_iam.Effect.ALLOW, 92 | resources=["*"], 93 | actions=["lakeformation:GetDataAccess"])) 94 | 95 | lambda_fn_target = aws_events_targets.LambdaFunction(merge_small_files_lambda_fn) 96 | aws_events.Rule(self, "ScheduleRule", 97 | schedule=aws_events.Schedule.cron(minute="5"), 98 | targets=[lambda_fn_target] 99 | ) 100 | 101 | log_group = aws_logs.LogGroup(self, "MergeSmallFilesLogGroup", 102 | log_group_name="/aws/lambda/MergeSmallFiles", 103 | removal_policy=cdk.RemovalPolicy.DESTROY, #XXX: for testing 104 | retention=aws_logs.RetentionDays.THREE_DAYS) 105 | log_group.grant_write(merge_small_files_lambda_fn) 106 | 107 | self.lambda_exec_role = merge_small_files_lambda_fn.role 108 | 109 | cdk.CfnOutput(self, f'{self.stack_name}_MergeFilesFuncName', 110 | value=merge_small_files_lambda_fn.function_name) 111 | 112 | cdk.CfnOutput(self, f'{self.stack_name}_LambdaExecRoleArn', 113 | value=self.lambda_exec_role.role_arn) 114 | -------------------------------------------------------------------------------- /data_analytics_system/ops.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- encoding: utf-8 -*- 3 | # vim: tabstop=2 shiftwidth=2 softtabstop=2 expandtab 4 | 5 | import json 6 | 7 | import aws_cdk as cdk 8 | 9 | from aws_cdk import ( 10 | Stack, 11 | aws_ec2, 12 | aws_opensearchservice, 13 | aws_secretsmanager 14 | ) 15 | from constructs import Construct 16 | 17 | class OpenSearchStack(Stack): 18 | 19 | def __init__(self, scope: Construct, construct_id: str, vpc, sg_bastion_host, **kwargs) -> None: 20 | super().__init__(scope, construct_id, **kwargs) 21 | 22 | sg_use_opensearch = aws_ec2.SecurityGroup(self, "OpenSearchClientSG", 23 | vpc=vpc, 24 | allow_all_outbound=True, 25 | description='security group for an opensearch client', 26 | security_group_name='use-ops-cluster-sg' 27 | ) 28 | cdk.Tags.of(sg_use_opensearch).add('Name', 'use-ops-cluster-sg') 29 | 30 | sg_opensearch_cluster = aws_ec2.SecurityGroup(self, "OpenSearchSG", 31 | vpc=vpc, 32 | allow_all_outbound=True, 33 | description='security group for an opensearch cluster', 34 | security_group_name='ops-cluster-sg' 35 | ) 36 | cdk.Tags.of(sg_opensearch_cluster).add('Name', 'ops-cluster-sg') 37 | 38 | sg_opensearch_cluster.add_ingress_rule(peer=sg_opensearch_cluster, connection=aws_ec2.Port.all_tcp(), description='ops-cluster-sg') 39 | 40 | sg_opensearch_cluster.add_ingress_rule(peer=sg_use_opensearch, connection=aws_ec2.Port.tcp(443), description='use-ops-cluster-sg') 41 | sg_opensearch_cluster.add_ingress_rule(peer=sg_use_opensearch, connection=aws_ec2.Port.tcp_range(9200, 9300), description='use-ops-cluster-sg') 42 | 43 | sg_opensearch_cluster.add_ingress_rule(peer=sg_bastion_host, connection=aws_ec2.Port.tcp(443), description='bastion-host-sg') 44 | sg_opensearch_cluster.add_ingress_rule(peer=sg_bastion_host, connection=aws_ec2.Port.tcp_range(9200, 9300), description='bastion-host-sg') 45 | 46 | ops_domain_name = 'retail' 47 | 48 | master_user_secret = aws_secretsmanager.Secret(self, "OpenSearchMasterUserSecret", 49 | generate_secret_string=aws_secretsmanager.SecretStringGenerator( 50 | secret_string_template=json.dumps({"username": "admin"}), 51 | generate_string_key="password", 52 | # Master password must be at least 8 characters long and contain at least one uppercase letter, 53 | # one lowercase letter, one number, and one special character. 54 | password_length=8 55 | ) 56 | ) 57 | 58 | #XXX: aws cdk elastsearch example - https://github.com/aws/aws-cdk/issues/2873 59 | # You should camelCase the property names instead of PascalCase 60 | ops_domain = aws_opensearchservice.Domain(self, "OpenSearch", 61 | domain_name=ops_domain_name, 62 | #XXX: Supported versions of OpenSearch and Elasticsearch 63 | # https://docs.aws.amazon.com/opensearch-service/latest/developerguide/what-is.html#choosing-version 64 | version=aws_opensearchservice.EngineVersion.OPENSEARCH_2_3, 65 | #XXX: Amazon OpenSearch Service - Current generation instance types 66 | # https://docs.aws.amazon.com/opensearch-service/latest/developerguide/supported-instance-types.html#latest-gen 67 | capacity={ 68 | "master_nodes": 3, 69 | "master_node_instance_type": "r6g.large.search", 70 | "data_nodes": 3, 71 | "data_node_instance_type": "r6g.large.search" 72 | }, 73 | ebs={ 74 | "volume_size": 10, 75 | "volume_type": aws_ec2.EbsDeviceVolumeType.GP3 76 | }, 77 | #XXX: az_count must be equal to vpc subnets count. 78 | zone_awareness={ 79 | "availability_zone_count": 3, 80 | "enabled": True 81 | }, 82 | logging={ 83 | "slow_search_log_enabled": True, 84 | "app_log_enabled": True, 85 | "slow_index_log_enabled": True 86 | }, 87 | fine_grained_access_control=aws_opensearchservice.AdvancedSecurityOptions( 88 | master_user_name=master_user_secret.secret_value_from_json("username").to_string(), 89 | master_user_password=master_user_secret.secret_value_from_json("password") 90 | ), 91 | # Enforce HTTPS is required when fine-grained access control is enabled. 92 | enforce_https=True, 93 | # Node-to-node encryption is required when fine-grained access control is enabled 94 | node_to_node_encryption=True, 95 | # Encryption-at-rest is required when fine-grained access control is enabled. 96 | encryption_at_rest={ 97 | "enabled": True 98 | }, 99 | use_unsigned_basic_auth=True, 100 | security_groups=[sg_opensearch_cluster], 101 | #XXX: For domains running OpenSearch or Elasticsearch 5.3 and later, OpenSearch Service takes hourly automated snapshots 102 | # Only applies for Elasticsearch versions below 5.3 103 | # automated_snapshot_start_hour=17, # 2 AM (GTM+9) 104 | vpc=vpc, 105 | #XXX: az_count must be equal to vpc subnets count. 106 | vpc_subnets=[aws_ec2.SubnetSelection(one_per_az=True, subnet_type=aws_ec2.SubnetType.PRIVATE_WITH_EGRESS)], 107 | removal_policy=cdk.RemovalPolicy.DESTROY # default: cdk.RemovalPolicy.RETAIN 108 | ) 109 | cdk.Tags.of(ops_domain).add('Name', 'analytics-workshop-es') 110 | 111 | self.sg_search_client = sg_use_opensearch 112 | self.search_domain_endpoint = ops_domain.domain_endpoint 113 | self.search_domain_arn = ops_domain.domain_arn 114 | 115 | cdk.CfnOutput(self, 'OPSDomainEndpoint', value=self.search_domain_endpoint, export_name='OPSDomainEndpoint') 116 | cdk.CfnOutput(self, 'OPSDashboardsURL', value=f"{self.search_domain_endpoint}/_dashboards/", export_name='OPSDashboardsURL') 117 | -------------------------------------------------------------------------------- /data_analytics_system/upsert_to_es_lambda.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- encoding: utf-8 -*- 3 | # vim: tabstop=2 shiftwidth=2 softtabstop=2 expandtab 4 | 5 | import os 6 | 7 | import aws_cdk as cdk 8 | 9 | from aws_cdk import ( 10 | Stack, 11 | aws_s3 as s3, 12 | aws_iam, 13 | aws_lambda as _lambda, 14 | aws_logs 15 | ) 16 | from constructs import Construct 17 | 18 | from aws_cdk.aws_lambda_event_sources import ( 19 | KinesisEventSource 20 | ) 21 | 22 | S3_BUCKET_LAMBDA_LAYER_LIB = os.getenv('S3_BUCKET_LAMBDA_LAYER_LIB', 'lambda-layer-resources-use1') 23 | 24 | class UpsertToESStack(Stack): 25 | 26 | def __init__(self, scope: Construct, construct_id: str, vpc, trans_kinesis_stream, sg_use_opensearch, ops_domain_endpoint, ops_domain_arn, **kwargs) -> None: 27 | super().__init__(scope, construct_id, **kwargs) 28 | 29 | #XXX: https://github.com/aws/aws-cdk/issues/1342 30 | s3_lib_bucket = s3.Bucket.from_bucket_name(self, construct_id, S3_BUCKET_LAMBDA_LAYER_LIB) 31 | es_lib_layer = _lambda.LayerVersion(self, "ESLib", 32 | layer_version_name="es-lib", 33 | compatible_runtimes=[_lambda.Runtime.PYTHON_3_11], 34 | code=_lambda.Code.from_bucket(s3_lib_bucket, "var/es-lib.zip") 35 | ) 36 | 37 | ES_INDEX_NAME = 'retail' 38 | ES_TYPE_NAME = 'trans' 39 | 40 | #XXX: add more than 2 security groups 41 | # https://github.com/aws/aws-cdk/blob/ea10f0d141a48819ec0000cd7905feda993870a9/packages/%40aws-cdk/aws-lambda/lib/function.ts#L387 42 | # https://github.com/aws/aws-cdk/issues/1555 43 | # https://github.com/aws/aws-cdk/pull/5049 44 | #XXX: Deploy lambda in VPC - https://github.com/aws/aws-cdk/issues/1342 45 | #XXX: Lambda Runtimes - https://docs.aws.amazon.com/lambda/latest/dg/lambda-runtimes.html 46 | upsert_to_es_lambda_fn = _lambda.Function(self, "UpsertToES", 47 | runtime=_lambda.Runtime.PYTHON_3_11, 48 | function_name="UpsertToES", 49 | handler="upsert_to_es.lambda_handler", 50 | description="Upsert records into Amazon OpenSearch Service", 51 | code=_lambda.Code.from_asset("./src/main/python/UpsertToES"), 52 | environment={ 53 | 'ES_HOST': ops_domain_endpoint, 54 | #TODO: MUST set appropriate environment variables for your workloads. 55 | 'ES_INDEX': ES_INDEX_NAME, 56 | 'ES_TYPE': ES_TYPE_NAME, 57 | 'REQUIRED_FIELDS': 'Invoice,StockCode,Customer_ID', 58 | 'REGION_NAME': kwargs['env'].region, 59 | 'DATE_TYPE_FIELDS': 'InvoiceDate' 60 | }, 61 | timeout=cdk.Duration.minutes(5), 62 | layers=[es_lib_layer], 63 | security_groups=[sg_use_opensearch], 64 | vpc=vpc 65 | ) 66 | 67 | trans_kinesis_event_source = KinesisEventSource(trans_kinesis_stream, batch_size=1000, starting_position=_lambda.StartingPosition.LATEST) 68 | upsert_to_es_lambda_fn.add_event_source(trans_kinesis_event_source) 69 | 70 | upsert_to_es_lambda_fn.add_to_role_policy(aws_iam.PolicyStatement( 71 | effect=aws_iam.Effect.ALLOW, 72 | resources=[ops_domain_arn, "{}/*".format(ops_domain_arn)], 73 | actions=["es:DescribeElasticsearchDomain", 74 | "es:DescribeElasticsearchDomains", 75 | "es:DescribeElasticsearchDomainConfig", 76 | "es:ESHttpPost", 77 | "es:ESHttpPut"] 78 | )) 79 | 80 | upsert_to_es_lambda_fn.add_to_role_policy(aws_iam.PolicyStatement( 81 | effect=aws_iam.Effect.ALLOW, 82 | #XXX: https://aws.amazon.com/premiumsupport/knowledge-center/kinesis-data-firehose-delivery-failure/ 83 | resources=[ 84 | ops_domain_arn, 85 | f"{ops_domain_arn}/_all/_settings", 86 | f"{ops_domain_arn}/_cluster/stats", 87 | f"{ops_domain_arn}/{ES_INDEX_NAME}*/_mapping/{ES_TYPE_NAME}", 88 | f"{ops_domain_arn}/_nodes", 89 | f"{ops_domain_arn}/_nodes/stats", 90 | f"{ops_domain_arn}/_nodes/*/stats", 91 | f"{ops_domain_arn}/_stats", 92 | f"{ops_domain_arn}/{ES_INDEX_NAME}*/_stats" 93 | ], 94 | actions=["es:ESHttpGet"] 95 | )) 96 | 97 | log_group = aws_logs.LogGroup(self, "UpsertToESLogGroup", 98 | log_group_name="/aws/lambda/UpsertToES", 99 | removal_policy=cdk.RemovalPolicy.DESTROY, #XXX: for testing 100 | retention=aws_logs.RetentionDays.THREE_DAYS) 101 | log_group.grant_write(upsert_to_es_lambda_fn) 102 | 103 | cdk.CfnOutput(self, f'{self.stack_name}_LambdaRoleArn', value=upsert_to_es_lambda_fn.role.role_arn) 104 | -------------------------------------------------------------------------------- /data_analytics_system/vpc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- encoding: utf-8 -*- 3 | # vim: tabstop=2 shiftwidth=2 softtabstop=2 expandtab 4 | 5 | import os 6 | import aws_cdk as cdk 7 | 8 | from aws_cdk import ( 9 | Stack, 10 | aws_ec2 11 | ) 12 | from constructs import Construct 13 | 14 | class VpcStack(Stack): 15 | 16 | def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: 17 | super().__init__(scope, construct_id, **kwargs) 18 | 19 | #XXX: For creating this CDK Stack in the existing VPC, 20 | # remove comments from the below codes and 21 | # comments out vpc = aws_ec2.Vpc(..) codes, 22 | # then pass -c vpc_name=your-existing-vpc to cdk command 23 | # for example, 24 | # cdk -c vpc_name=your-existing-vpc syth 25 | # 26 | if str(os.environ.get('USE_DEFAULT_VPC', 'false')).lower() == 'true': 27 | vpc_name = self.node.try_get_context('vpc_name') or "default" 28 | self.vpc = aws_ec2.Vpc.from_lookup(self, 'ExistingVPC', 29 | is_default=True, 30 | vpc_name=vpc_name 31 | ) 32 | else: 33 | self.vpc = aws_ec2.Vpc(self, "AnalyticsWorkshopVPC", 34 | ip_addresses=aws_ec2.IpAddresses.cidr("10.0.0.0/21"), 35 | max_azs=3, 36 | 37 | # 'subnetConfiguration' specifies the "subnet groups" to create. 38 | # Every subnet group will have a subnet for each AZ, so this 39 | # configuration will create `2 groups × 3 AZs = 6` subnets. 40 | subnet_configuration=[ 41 | { 42 | "cidrMask": 24, 43 | "name": "Public", 44 | "subnetType": aws_ec2.SubnetType.PUBLIC, 45 | }, 46 | { 47 | "cidrMask": 24, 48 | "name": "Private", 49 | "subnetType": aws_ec2.SubnetType.PRIVATE_WITH_EGRESS 50 | } 51 | ], 52 | gateway_endpoints={ 53 | "S3": aws_ec2.GatewayVpcEndpointOptions( 54 | service=aws_ec2.GatewayVpcEndpointAwsService.S3 55 | ) 56 | } 57 | ) 58 | 59 | 60 | cdk.CfnOutput(self, 'VPCID', value=self.vpc.vpc_id, 61 | export_name=f'{self.stack_name}-VPCID') 62 | 63 | -------------------------------------------------------------------------------- /doc_sources/athena_sample_query.md: -------------------------------------------------------------------------------- 1 | ### 데이터베이스 생성 2 | 3 | ```text 4 | CREATE DATABASE mydatabase; 5 | ``` 6 | 7 | ### json 데이터 포맷으로 저장된 테이블 생성 8 | ```text 9 | CREATE EXTERNAL TABLE IF NOT EXISTS mydatabase.retail_trans_json ( 10 | `Invoice` string, 11 | `StockCode` string, 12 | `Description` string, 13 | `Quantity` int, 14 | `InvoiceDate` timestamp, 15 | `Price` float, 16 | `Customer_ID` string, 17 | `Country` string 18 | ) 19 | ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe' 20 | WITH SERDEPROPERTIES ( 21 | 'serialization.format' = '1' 22 | ) LOCATION 's3://aws-analytics-immersion-day-xxxxxxxx/json-data/' 23 | TBLPROPERTIES ('has_encrypted_data'='false'); 24 | ``` 25 | 26 | ### Monthly Revenue 계산 쿼리 27 | ```text 28 | SELECT date_trunc('month', invoicedate) invoice_year_month, sum(price*quantity) revenue 29 | FROM mydatabase.retail_trans_json 30 | WHERE invoicedate 31 | BETWEEN timestamp '2010-01-01' 32 | AND timestamp '2010-12-31' 33 | GROUP BY date_trunc('month', invoicedate); 34 | 35 | 36 | SELECT date_trunc('month', invoicedate) invoice_year_month, sum(price*quantity) revenue 37 | FROM mydatabase.retail_parquet_ctas_table 38 | WHERE invoicedate 39 | BETWEEN timestamp '2010-01-01' 40 | AND timestamp '2010-12-31' 41 | GROUP BY date_trunc('month', invoicedate); 42 | 43 | 44 | SELECT date_trunc('month', invoicedate) invoice_year_month, sum(price*quantity) revenue 45 | FROM mydatabase.retail_parquet_snappy_ctas_table 46 | WHERE invoicedate 47 | BETWEEN timestamp '2010-01-01' 48 | AND timestamp '2010-12-31' 49 | GROUP BY date_trunc('month', invoicedate); 50 | ``` 51 | 52 | ### CTAS 예제 53 | ```text 54 | -- Parquet 데이터로 새 테이블을 생성 55 | CREATE TABLE retail_parquet_ctas_table 56 | WITH ( 57 | external_location = 's3://aws-analytics-immersion-day-xxxxxxxx/parquet-data/', 58 | format = 'PARQUET') 59 | AS SELECT * 60 | FROM retail_trans_json; 61 | 62 | 63 | -- 데이터를 Snappy로 압축해서 Parquet 데이터로 새 테이블을 생성 64 | CREATE TABLE retail_parquet_snappy_ctas_table 65 | WITH ( 66 | external_location = 's3://aws-analytics-immersion-day-xxxxxxxx/parquet-snappy-data/', 67 | format = 'PARQUET', 68 | parquet_compression = 'SNAPPY') 69 | AS SELECT * 70 | FROM retail_trans_json; 71 | ``` 72 | 73 | ### Reference 74 | - https://docs.aws.amazon.com/ko_kr/athena/latest/ug/ctas-examples.html#ctas-example-format 75 | - https://docs.aws.amazon.com/ko_kr/athena/latest/ug/ctas-examples.html#ctas-example-compression 76 | -------------------------------------------------------------------------------- /doc_sources/prerequisites.en.md: -------------------------------------------------------------------------------- 1 | # Lab setup 2 | Before starting the lab, create and configure EC2, the IAM user you need. 3 | 4 | ## Table of Contents 5 | * [Creating an IAM User](#iam-user) 6 | * [Creating a new VPC](#vpc) 7 | * [Creating Security Groups](#security-group) 8 | * [Launch an EC2 Instance](#ec2-launch) 9 | * [Configuring your EC2 Instance](#ec2-user-configuration) 10 | 11 | ## Creating an IAM User 12 | Let’s create an IAM User to use during the lab. 13 | 14 | 1. Log in to the AWS Management Console and access the IAM service. 15 | 2. Select **Users** from the left menu. 16 | 3. Click the **Add user** button to enter the Add User page. 17 | 4. Enter `` in User name, and then choose both **Programmatic access** and **AWS Management Console access**. Next, enter `` in **Console password**, 18 | In last, uncheck **Require password reset**. 19 | ![iam-user](../assets/iam-user.png) 20 | 5. Click the **\[Next: Permissions\]** button, select **Attach existing policies directly**, and add **AdministratorAccess** privileges. 21 | ![iam-user-policy](../assets/iam-user-policy.png) 22 | 6. Click the **\[Next: Review\]** button, check the information, and click the **Create user** button. 23 | 7. Click the **Download.csv** button to download the new user's information. This file is essential for setting up EC2, so save it in a location that is easy to remember. 24 | ![iam-user-download](../assets/iam-user-download.png) 25 | 26 | \[[Top](#top)\] 27 | 28 | ## Creating a new VPC 29 | Let’s create a new VPC to use during the lab. 30 | 31 | 1. Log in to the AWS Management Console and select CloudFormation from the list of services. 32 | 2. Select **Create stack** from the left menu. 33 | 3. Choose **With new resources (standard)** from the dropdown list. 34 | ![cfn-stacks-create-stack](../assets/cfn-stacks-create-stack.png) 35 | 4. In **Specify template** section, choose **Upload a template file**. 36 | 5. Click **Choose file** button, and upload a cloudformation template download from [https://github.com/aws-samples/aws-analytics-immersion-day](https://github.com/aws-samples/aws-analytics-immersion-day). 37 | ![cfn-step1-create-stack](../assets/cfn-step1-create-stack.png) 38 | ::alert[Make sure you choose a cloudformation template file corresponding to your region.]{type="info"} 39 | 6. Click **Next** button. 40 | 7. Enter Stack name (e.g., `vpc`), and Click **Next** button. 41 | 8. Click **Next** button to skip `Step 3 Configure stack options`. 42 | 9. Click **Submit** button. 43 | 44 | \[[Top](#top)\] 45 | 46 | ## Creating Security Groups 47 | ### Security Groups to create an EC2 instance for a bastion host 48 | Create and configure a security group of EC2 instance. 49 | 50 | 1. Connect to the EC2 service in the AWS Management Console. 51 | 2. Select the **Security Groups** item from the **NETWORK & SECURITY** menu. 52 | 3. Click **\[Create Security Group\]**. 53 | 4. On the **Create Security Group** screen, enter the necessary information for the **Security Group**, and then **\[Create\]** a new security group. 54 | + Security group name : `bastion` 55 | + Description : `security group for bastion` 56 | 57 | Enter the following in **Inbound** of the security group rules. 58 | + Type : SSH 59 | + Protocol : TCP 60 | + Port Range : 22 61 | + Source : `0.0.0.0/0` 62 | 63 | ![aws-ec2-security-group-for-bastion](../assets/aws-ec2-security-group-for-bastion.png) 64 | 65 | \[[Top](#top)\] 66 | 67 | ### Security Groups created for use in Elasticsearch Service 68 | Create and configure a security group for Elasticsearch Service. 69 | 70 | 1. Connect to EC2 service in AWS Management Console. 71 | 2. Select the **Security Groups** item from the **NETWORK & SECURITY** menu. 72 | 3. Click **\[Create Security Group\]**. 73 | 4. On the **Create Security Group** screen, enter the necessary information for the Security Group, and then **\[Create\]** a new security group. 74 | + Security group name : `use-es-cluster-sg` 75 | + Description : `security group for an es client` 76 | 77 | Enter nothing in **Inbound** of the security group rules. 78 | 79 | ![aws-ec2-security-group-for-es-client](../assets/aws-ec2-security-group-for-es-client.png) 80 | 5. Click **\[Create Security Group\]** again to go to the **Create Security Group** screen. After entering the necessary information for the security group, **\[Create\]** a new security group. 81 | + Security group name : `es-cluster-sg` 82 | + Description : `security group for an es cluster` 83 | 84 | Enter the following in **Inbound** of the security group rules. 85 | + Type : All TCP 86 | + Protocol : TCP 87 | + Port Range : 0-65535 88 | + Source : `use-es-cluster-sg` 의 security group id ex) sg-038b632ef1825cb7f 89 | 90 | ![aws-ec2-security-group-for-es-cluster](../assets/aws-ec2-security-group-for-es-cluster.png) 91 | 92 | \[[Top](#top)\] 93 | 94 | ## Launch an EC2 Instance 95 | Create an EC2 instance that will generate the data needed for the lab in real time. 96 | 97 | 1. Connect to EC2 service in AWS Management Console. 98 | 2. In the upper right, select your region (e.g., **N. Virginia**). 99 | 3. Select **Instances** from the left **INSTANCES** menu and click **\[Launch Instance\]** to start creating a new instance. 100 | ![aws-ec2-launch-instance](../assets/aws-ec2-launch-instance.png) 101 | 4. Step 1: On the **Choose an Amazon Machine Image (AMI)** screen, choose **Amazon Linux 2 AMI (HVM), SSD Volume Type**. 102 | ![aws-ec2-choose-ami](../assets/aws-ec2-choose-ami.png) 103 | 5. Step 2: On the **Choose an Instance Type** screen, select `t2.micro` as the instance type. Click **\[Next: Configure Instance Details\]**. 104 | ![aws-ec2-choose-instance-type](../assets/aws-ec2-choose-instance-type.png) 105 | 6. Step 3: On the **Configure Instance Details** screen, select **Enable** for **Auto-assign Public IP**, and click **\[Next: Add Storage\]**. 106 | ![aws-ec2-configure-instance-details](../assets/aws-ec2-configure-instance-details.png) 107 | 7. Step 4: On the **Add Storage** screen, leave the defaults and click **\[Next: Add Tags\]**. 108 | 8. Step 5: On the **Add Tags** screen, click **\[Next: Configure Security Group\]**. 109 | 9. Step 6: On the **Configure Security Group** screen, select **Select an existing security group** from **Assign a security group**, and then select `bastion` and `use-es-cluster-sg` from the **Security Group** and click **\[Review and Launch\]**. 110 | ![aws-ec2-configure-security-group](../assets/aws-ec2-configure-security-group.png) 111 | 10. Step 7: click **\[Launch\]** on the **Review Instance Launch** screen. 112 | 11. Create a key pair to access EC2 Instance. 113 | Select Create a new key pair, enter `analytics-hol` as the Key pair name, and click **Download Key Pair**. 114 | Save the Key Pair to any location on your PC and click **\[Launch Instances\]**. (EC2 Instance startup may take several minutes.) 115 | ![aws-ec2-select-keypair](../assets/aws-ec2-select-keypair.png) 116 | 12. For MacOS users, Change the File Permission of the downloaded Key Pair file to 400. 117 | ```shell script 118 | $ chmod 400 ./analytics-hol.pem 119 | $ ls -lat analytics-hol.pem 120 | -r-------- 1 ****** ****** 1692 Jun 25 11:49 analytics-hol.pem 121 | ``` 122 | 123 | For Windows OS users, Please refer to [Use PuTTY to connect to your Linux instance from Windows](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/putty.html). 124 | 125 | \[[Top](#top)\] 126 | 127 | ## Configuring your EC2 Instance 128 | Configure the EC2 instances to access and control other AWS resources as follows: 129 | 1. Log into the EC2 instance by ssh. 130 | ```shell script 131 | ssh -i "" ec2-user@ 132 | ``` 133 | or, open Amazon EC2 console at [https://console.aws.amazon.com/ec2/](https://console.aws.amazon.com/ec2/).
134 | In the navigation pane, select the ec2 instance previously created, and then click **\[Connect\]**.
135 | ![aws-ec2-connect](../assets/aws-ec2-connect.png) 136 | When the following screen appears, press **\[EC2 Instance Connect\]** to connect to your instance. 137 | ![aws-ec2-ssm](../assets/aws-ec2-ssm.png) 138 | 139 | 2. Perform the following actions in order on the EC2 instance connected with ssh. 140 | 141 | (1) Download the source code. 142 | ```shell script 143 | wget 'https://github.com/aws-samples/aws-analytics-immersion-day/archive/refs/heads/main.zip' 144 | ``` 145 | (2) Extract the downloaded source code. 146 | ```shell script 147 | unzip -u main.zip 148 | ``` 149 | (3) Grant execution authority to the practice environment setting script. 150 | ```shell script 151 | chmod +x ./aws-analytics-immersion-day-main/set-up-hands-on-lab.sh 152 | ``` 153 | (4) Execute the setup script to set the lab environment. 154 | ```shell script 155 | ./aws-analytics-immersion-day-main/set-up-hands-on-lab.sh 156 | ``` 157 | (5) Make sure the files necessary for the lab are normally created after running the configuration script. For example, check if the source code and necessary files exist as shown below. 158 | ```shell script 159 | [ec2-user@ip-172-31-2-252 ~]$ ls -1 160 | athena_ctas.py 161 | aws-analytics-immersion-day-main 162 | gen_kinesis_data.py 163 | main.zip 164 | upsert_to_es.py 165 | ``` 166 | 167 | 3. In order to run the Python synthentic data generator script (`gen_kinesis_data.py`), we need to set user credentials by following the instructions: 168 | 169 | 1. Perform `aws configure` to access other AWS resources. At this time, the IAM User data created earlier is used. 170 | Open the previously downloaded **.csv** file, check the `Access key ID` and `Secret access key`, and enter them. 171 | ```shell script 172 | $ aws configure 173 | AWS Access Key ID [None]: 174 | AWS Secret Access Key [None]: 175 | Default region name [None]: us-west-2 176 | Default output format [None]: 177 | ``` 178 | 179 | 2. If the setting is complete, the information entered as follows will be masked. 180 | ```shell script 181 | $ aws configure 182 | AWS Access Key ID [****************EETA]: 183 | AWS Secret Access Key [****************CixY]: 184 | Default region name [None]: us-west-2 185 | Default output format [None]: 186 | ``` 187 | 188 | :information_source: Instead of using user credentials, you can attach an IAM role to the EC2 instance to run `gen_kinesis_data.py`. The IAM role should have IAM policies to read/write Amazon Kinesis Data Streams and Kinesis Data Firehose. For the purpose of this hands-on lab, you can create an IAM role attached with `AmazonKinesisFullAccess` and `AmazonKinesisFirehoseFullAccess`. 189 | 190 | \[[Top](#top)\] 191 | -------------------------------------------------------------------------------- /doc_sources/prerequisites.kr.md: -------------------------------------------------------------------------------- 1 | # 실습 준비 작업 2 | 실습을 시작 하기 전에 필요한 IAM User, EC2를 생성하고 및 구성합니다. 3 | 4 | ## Table of Contents 5 | * [IAM User 생성](#iam-user) 6 | * [VPC 생성](#vpc) 7 | * [Security Groups 생성](#security-group) 8 | * [EC2 생성](#ec2-launch) 9 | * [EC2 설정](#ec2-user-configuration) 10 | 11 | ## IAM User 생성 12 | 실습 하는 동안 사용할 IAM User를 생성합니다. 13 | 14 | 1. AWS Management Console에 로그인 한 뒤 IAM 서비스에 접속합니다. 15 | 2. 왼쪽 메뉴에서 Users를 선택합니다. 16 | 3. Add user 버튼을 클릭하여 사용자 추가 페이지로 들어갑니다. 17 | 4. User name에 `<사용자 이름>` 을 입력하고, Access type에 Programmatic access와 18 | AWS Management Console access 둘 모두를 선택합니다. Console password에 `<패스워드>` 를 입력하고, 19 | 마지막 Require password reset의 체크는 해제합니다. 20 | ![iam-user](../assets/iam-user.png) 21 | 5. **\[Next: Permissions\]** 버튼을 클릭하고 Attach existing policies directly를 선택한 뒤 AdministratorAccess 권한을 추가해줍니다. 22 | ![iam-user-policy](../assets/iam-user-policy.png) 23 | 6. **\[Next: Review\]** 버튼을 클릭하고 정보를 확인한 뒤 Create user 버튼을 클릭하여 사용자 생성을 완료합니다. 24 | 7. Download.csv 버튼을 클릭하여 생성한 사용자의 정보를 다운 받습니다. EC2 설정에 꼭 필요한 파일이므로 기억하기 쉬운 위치에 저장합니다. 25 | ![iam-user-download](../assets/iam-user-download.png) 26 | 27 | \[[Top](#top)\] 28 | 29 | ## VPC 생성 30 | 실습 하는 동안 사용할 새로운 VPC를 생성합니다. 31 | 1. AWS Management Console에 로그인 한 뒤 CloudFormation 서비스에 접속합니다. 32 | 2. 왼쪽 메뉴에서 **Create stack** 을 선택합니다. 33 | 3. Dropdown 목록에서 **With new resources (standard)** 을 선택합니다. 34 | ![cfn-stacks-create-stack](../assets/cfn-stacks-create-stack.png) 35 | 4. **Specify template** 섹션에서 **Upload a template file** 를 선택합니다. 36 | 5. **Choose file** 을 클릭해서, [https://github.com/aws-samples/aws-analytics-immersion-day](https://github.com/aws-samples/aws-analytics-immersion-day)에서 로컬 PC에 다운로드 받은 CloudFormation Template 파일을 업로드 합니다. 37 | ![cfn-step1-create-stack](../assets/cfn-step1-create-stack.png) 38 | ::alert[현재 실습 중인 AWS Region에서 사용할 수 있는 Cloudformation Template 파일을 업로드 해야합니다.]{type="info"} 39 | 6. **Next** 버튼을 클릭해서 다음 단계로 이동합니다. 40 | 7. CloudFormation 스택 이름(예: `vpc`)을 입력하고, **Next** 을 클릭해서 다음 단계로 진행합니다. 41 | 8. 나머지 설정을 그대로 두고, 마지막 단계로 이동합니다. 42 | 9. **Submit** 버튼을 클릭해서 VPC를 생성합니다. 43 | 44 | \[[Top](#top)\] 45 | 46 | ## Security Groups 생성 47 | ### bastion host로 사용할 EC2 인스턴스를 위한 Security Groups 생성 48 | 실습용 EC2 인스턴에서 사용할 security group을 생성하고 구성합니다. 49 | 50 | 1. AWS Management Console에서 EC2 서비스에 접속합니다. 51 | 2. **NETWORK & SECURITY** 메뉴에서 **Security Groups** 항목을 선택합니다. 52 | 3. **\[Create Security Group\]** 을 클릭합니다. 53 | 4. Create Security Group 화면에서 Security Group에 필요한 정보를 입력한 후, 새로운 security group을 **\[Create\]** 합니다. 54 | + Security group name : `bastion` 55 | + Description : `security group for bastion` 56 | 57 | Security group rules의 **Inbound** 에 아래 내용을 입력합니다. 58 | + Type : SSH 59 | + Protocol : TCP 60 | + Port Range : 22 61 | + Source : `0.0.0.0/0` 62 | 63 | ![aws-ec2-security-group-for-bastion](../assets/aws-ec2-security-group-for-bastion.png) 64 | 65 | \[[Top](#top)\] 66 | 67 | ### Elasicsearch Service에서 사용할 Security Groups 생성 68 | Elasticsearch Service을 위한 security group을 생성하고 구성합니다. 69 | 1. AWS Management Console에서 EC2 서비스에 접속합니다. 70 | 2. **NETWORK & SECURITY** 메뉴에서 **Security Groups** 항목을 선택합니다. 71 | 3. **\[Create Security Group\]** 을 클릭합니다. 72 | 4. Create Security Group 화면에서 Security Group에 필요한 정보를 입력한 후, 새로운 security group을 **\[Create\]** 합니다. 73 | + Security group name : `use-es-cluster-sg` 74 | + Description : `security group for an es client` 75 | 76 | Security group rules의 **Inbound** 은 아무것도 입력하지 않습니다. 77 | 78 | ![aws-ec2-security-group-for-es-client](../assets/aws-ec2-security-group-for-es-client.png) 79 | 5. 다시 **\[Create Security Group\]** 클릭해서 Create Security Group 화면으로 이동합니다. 80 | Security Group에 필요한 정보를 입력한 후, 새로운 security group을 **\[Create\]** 합니다. 81 | + Security group name : `es-cluster-sg` 82 | + Description : `security group for an es cluster` 83 | 84 | Security group rules의 **Inbound** 에 아래 내용을 입력합니다. 85 | + Type : All TCP 86 | + Protocol : TCP 87 | + Port Range : 0-65535 88 | + Source : `use-es-cluster-sg` 의 security group id ex) sg-038b632ef1825cb7f 89 | 90 | ![aws-ec2-security-group-for-es-cluster](../assets/aws-ec2-security-group-for-es-cluster.png) 91 | 92 | \[[Top](#top)\] 93 | 94 | ## EC2 생성 95 | 실습에 필요한 데이터를 실시간으로 발생시킬 EC2 인스턴스를 생성합니다. 96 | 1. AWS Management Console에서 EC2 서비스에 접속합니다. 97 | 2. 우측 상단에서 Region은 US West (Oregon)를 선택합니다. 98 | 3. 좌측 **INSTANCES** 메뉴에서 **Instances** 를 선택한 후, **\[Launch Instance\]** 를 클릭 해서 새로운 인스턴스 생성을 시작합니다. 99 | ![aws-ec2-launch-instance](../assets/aws-ec2-launch-instance.png) 100 | 4. Step 1: Choose an Amazon Machine Image (AMI) 화면에서 **Amazon Linux AMI 2018.03.0 (HVM), SSD Volume Type** 을 선택합니다. 101 | ![aws-ec2-choose-ami](../assets/aws-ec2-choose-ami.png) 102 | 5. Step 2 : Choose an Instance Type 화면에서 인스턴스 타입은 t2.micro를 선택합니다. **\[Next: Configure Instance Details\]** 을 클릭합니다. 103 | ![aws-ec2-choose-instance-type](../assets/aws-ec2-choose-instance-type.png) 104 | 6. Step 3: Configure Instance Details 화면에서 **Auto-assign Public IP** 를 **Enable** 선택하고, **\[Next: Add Storage\]** 을 클릭합니다. 105 | ![aws-ec2-configure-instance-details](../assets/aws-ec2-configure-instance-details.png) 106 | 7. Step 4: Add Storage 화면에서 기본값을 그대로 두고 **\[Next: Add Tags\]** 를 클릭합니다. 107 | 8. Step 5: Add Tags 화면에서 **\[Next: Configure Security Group\]** 을 클릭합니다. 108 | 9. Step 6: Configure Security Group 화면에서 Assign a security group 에서 Select an **existing** security group를 선택하고, 109 | Security Group 중에서 Name이 `bastion`과 `use-es-cluster-sg` 를 선택 한 후 **\[Review and Launch\]** 를 클릭합니다. 110 | ![aws-ec2-configure-security-group](../assets/aws-ec2-configure-security-group.png) 111 | 10. Step 7: Review Instance Launch 화면에서 **\[Launch\]** 를 클릭합니다. 112 | 11. EC2 Instance에 접속하기 위한 Key pair를 생성합니다. 113 | Create a new key pair를 선택하고 Key pair name은 `analytics-hol` 을 입력한 후 Download Key Pair를 클릭합니다. 114 | Key Pair를 PC의 임의 위치에 저장한 후 **\[Launch Instances\]** 를 클릭합니다. (인스턴스 기동에 몇 분이 소요될 수 있습니다.) 115 | ![aws-ec2-select-keypair](../assets/aws-ec2-select-keypair.png) 116 | 12. (MacOS 사용자) 다운로드 받은 Key Pair 파일의 File Permission을 400으로 변경합니다. 117 | ```shell script 118 | $ chmod 400 ./analytics-hol.pem 119 | $ ls -lat analytics-hol.pem 120 | -r-------- 1 ****** ****** 1692 Jun 25 11:49 analytics-hol.pem 121 | ``` 122 | Windows OS 사용자의 경우, [PuTTY를 사용하여 Windows에서 Linux 인스턴스에 연결](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/putty.html) 123 | 를 참고하십시요. 124 | 125 | \[[Top](#top)\] 126 | 127 | ## EC2 설정 128 | 생성한 EC2 인스턴스가 다른 AWS 리소스에 접근 및 제어할 수 있도록 다음과 같이 구성합니다. 129 | 1. 생성한 인스턴스의 Public IP로 SSH 접속을 합니다. 130 | ```shell script 131 | ssh -i "" ec2-user@ 132 | ``` 133 | 또는, [https://console.aws.amazon.com/ec2/](https://console.aws.amazon.com/ec2/)에서 Amazon EC2 콘솔을 엽니다.
134 | 탐색 창에서 방금 생성한 **인스턴스**를 선택하고, **Connect**를 클릭합니다.
135 | ![aws-ec2-connect](../assets/aws-ec2-connect.png) 136 | 다음과 같은 화면이 뜨면, **Connect** 를 눌러 인스턴스에 접속합니다. 137 | ![aws-ec2-ssm](../assets/aws-ec2-ssm.png) 138 | 139 | 2. ssh로 접속한 EC2 인스턴스에서 다음 작업을 순서대로 수행 합니다. 140 | 141 | (1) 소소 코드를 다운로드 받는다. 142 | ```shell script 143 | wget 'https://github.com/aws-samples/aws-analytics-immersion-day/archive/refs/heads/main.zip' 144 | ``` 145 | (2) 다운로드 받은 소스 코드의 압축을 해제한다. 146 | ```shell script 147 | unzip -u main.zip 148 | ``` 149 | (3) 실습 환경 설정 스크립트에 실행 권한을 부여한다. 150 | ```shell script 151 | chmod +x ./aws-analytics-immersion-day-main/set-up-hands-on-lab.sh 152 | ``` 153 | (4) 실습 환경 설정 스크립트를 실행한다. 154 | ```shell script 155 | ./aws-analytics-immersion-day-main/set-up-hands-on-lab.sh 156 | ``` 157 | (5) 실습 환경 설정 스크립트 실행 후, 실습에 필요한 파일들이 정상적으로 생성되었는지 확인한다. 158 | 예를 들어 아래와 같이 소스 코드와 필요한 파일들이 존재하는지 확인하다. 159 | ```shell script 160 | [ec2-user@ip-172-31-2-252 ~]$ ls -1 161 | athena_ctas.py 162 | aws-analytics-immersion-day-main 163 | gen_kinesis_data.py 164 | main.zip 165 | upsert_to_es.py 166 | ``` 167 | 168 | 3. 테스트 데이터를 생성하는 `gen_kinesis_data.py` 를 실행하기 위해서, AWS User credentials를 아래와 같은 방법으로 설정해야 합니다. 169 | 170 | 1. AWS의 다른 리소스 접근을 위해 AWS Configure를 진행합니다. 이때 앞서 생성한 IAM User 데이터를 활용합니다. 171 | 이전에 다운로드 받은 .csv 파일을 열어 `Access key ID`와 `Secret access key`를 확인하고 입력합니다. 172 | ```shell script 173 | $ aws configure 174 | AWS Access Key ID [None]: 175 | AWS Secret Access Key [None]: 176 | Default region name [None]: us-west-2 177 | Default output format [None]: 178 | ``` 179 | 2. 설정이 완료 되었다면 다음과 같이 입력하신 정보가 마스킹 되어 보이게 됩니다. 180 | ```shell script 181 | $ aws configure 182 | AWS Access Key ID [****************EETA]: 183 | AWS Secret Access Key [****************CixY]: 184 | Default region name [None]: us-west-2 185 | Default output format [None]: 186 | ``` 187 | 188 | :information_source: AWS User credentials을 설정하기 않고, `gen_kinesis_data.py` 를 실행하고자 하는 경우, Amazon Kinesis Data Streams와 Kinesis Data Firehose에 Read/Write할 수 있는 권한을 갖는 IAM Role을 EC2 인스턴스에 부여하는 방법이 있습니다. 이번 실습을 위해서 `AmazonKinesisFullAccess` 과 `AmazonKinesisFirehoseFullAccess` IAM Policy를 포함하는 IAM Role을 생성해서 아래와 같이 EC2 인스턴스에 연결할 수 있습니다. 189 | ![aws-ec2instance-modify-iam-role](../assets/aws-ec2instance-modify-iam-role.png) 190 | 191 | \[[Top](#top)\] 192 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | # pip install boto3 2 | boto3==1.34.49 3 | botocore==1.34.49 4 | 5 | # pip install opensearch 6 | opensearch-py==2.0.1 7 | requests==2.32.0 8 | requests-aws4auth==1.1.2 9 | 10 | # packages for python synthentic data generator 11 | mimesis==4.1.3 -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # pip install aws-cdk 2 | aws-cdk-lib==2.130.0 3 | constructs>=10.0.0,<11.0.0 -------------------------------------------------------------------------------- /set-up-hands-on-lab.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash - 2 | 3 | WORK_DIR=$(cd $(dirname $0); pwd) 4 | TARGET_DIR=$(dirname ${WORK_DIR}) 5 | 6 | OS_NAME=$(cat /etc/os-release | awk -F "=" '$1 == "NAME" { print $2 }') 7 | if [[ z"${OS_NAME}" == z"\"Amazon Linux AMI\"" ]]; 8 | then 9 | sudo yum -y update 10 | sudo yum -y install python36 11 | sudo yum -y install python3-pip 12 | 13 | sudo pip-3.6 install -U boto3 14 | sudo pip-3.6 install csvkit 15 | sudo pip-3.6 install mimesis==4.1.3 16 | elif [[ z"${OS_NAME}" == z"\"Amazon Linux\"" ]]; 17 | then 18 | sudo yum -y update 19 | sudo yum -y install python3 20 | sudo yum -y install python3-pip 21 | 22 | sudo pip3 install -U boto3 23 | sudo pip3 install csvkit 24 | sudo pip3 install mimesis==4.1.3 25 | elif [[ z"${OS_NAME}" == z"\"Ubuntu\"" ]]; 26 | then 27 | sudo apt-get -y update 28 | sudo apt-get -y install python3.6 29 | sudo apt-get -y install python3-pip 30 | 31 | sudo pip3 install -U boto3 32 | sudo pip3 install csvkit 33 | sudo pip3 install mimesis==4.1.3 34 | else 35 | echo "[Unknown OS] You should install python3.6+, pip3+ for yourself!" 36 | exit 0 37 | fi 38 | 39 | ln -sf ${WORK_DIR}/src/main/python/UpsertToES/upsert_to_es.py ${TARGET_DIR}/upsert_to_es.py 40 | ln -sf ${WORK_DIR}/src/main/python/MergeSmallFiles/athena_ctas.py ${TARGET_DIR}/athena_ctas.py 41 | ln -sf ${WORK_DIR}/src/main/python/utils/gen_kinesis_data.py ${TARGET_DIR}/gen_kinesis_data.py 42 | 43 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | 4 | with open("README.md") as fp: 5 | long_description = fp.read() 6 | 7 | 8 | setuptools.setup( 9 | name="data_analytics_system", 10 | version="0.0.1", 11 | 12 | description="An empty CDK Python app", 13 | long_description=long_description, 14 | long_description_content_type="text/markdown", 15 | 16 | author="author", 17 | 18 | package_dir={"": "data_analytics_system"}, 19 | packages=setuptools.find_packages(where="data_analytics_system"), 20 | 21 | install_requires=[ 22 | "aws-cdk-lib", 23 | "constructs" 24 | ], 25 | 26 | python_requires=">=3.6", 27 | 28 | classifiers=[ 29 | "Development Status :: 4 - Beta", 30 | 31 | "Intended Audience :: Developers", 32 | 33 | "License :: OSI Approved :: Apache Software License", 34 | 35 | "Programming Language :: JavaScript", 36 | "Programming Language :: Python :: 3 :: Only", 37 | "Programming Language :: Python :: 3.6", 38 | "Programming Language :: Python :: 3.7", 39 | "Programming Language :: Python :: 3.8", 40 | 41 | "Topic :: Software Development :: Code Generators", 42 | "Topic :: Utilities", 43 | 44 | "Typing :: Typed", 45 | ], 46 | ) 47 | -------------------------------------------------------------------------------- /source.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | 3 | rem The sole purpose of this script is to make the command 4 | rem 5 | rem source .env/bin/activate 6 | rem 7 | rem (which activates a Python virtualenv on Linux or Mac OS X) work on Windows. 8 | rem On Windows, this command just runs this batch file (the argument is ignored). 9 | rem 10 | rem Now we don't need to document a Windows command for activating a virtualenv. 11 | 12 | echo Executing .env\Scripts\activate.bat for you 13 | .env\Scripts\activate.bat 14 | -------------------------------------------------------------------------------- /src/main/python/MergeSmallFiles/athena_ctas.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- encoding: utf-8 -*- 3 | # vim: tabstop=2 shiftwidth=2 softtabstop=2 expandtab 4 | 5 | import sys 6 | import os 7 | import datetime 8 | import time 9 | import random 10 | 11 | import boto3 12 | 13 | random.seed(47) 14 | 15 | DRY_RUN = (os.getenv('DRY_RUN', 'false').lower() == 'true') 16 | AWS_REGION = os.getenv('REGION_NAME', 'us-east-1') 17 | 18 | OLD_DATABASE = os.getenv('OLD_DATABASE') 19 | OLD_TABLE_NAME = os.getenv('OLD_TABLE_NAME') 20 | NEW_DATABASE = os.getenv('NEW_DATABASE') 21 | NEW_TABLE_NAME = os.getenv('NEW_TABLE_NAME') 22 | WORK_GROUP = os.getenv('WORK_GROUP', 'primary') 23 | OLD_TABLE_LOCATION_PREFIX = os.getenv('OLD_TABLE_LOCATION_PREFIX') 24 | OUTPUT_PREFIX = os.getenv('OUTPUT_PREFIX') 25 | STAGING_OUTPUT_PREFIX = os.getenv('STAGING_OUTPUT_PREFIX') 26 | COLUMN_NAMES = os.getenv('COLUMN_NAMES', '*') 27 | 28 | EXTERNAL_LOCATION_FMT = '''{output_prefix}/year={year}/month={month:02}/day={day:02}/hour={hour:02}/''' 29 | 30 | CTAS_QUERY_FMT = '''CREATE TABLE {new_database}.tmp_{new_table_name} 31 | WITH ( 32 | external_location='{location}', 33 | format = 'PARQUET', 34 | parquet_compression = 'SNAPPY') 35 | AS SELECT {columns} 36 | FROM {old_database}.{old_table_name} 37 | WHERE year={year} AND month={month} AND day={day} AND hour={hour} 38 | WITH DATA 39 | ''' 40 | 41 | def run_alter_table_add_partition(athena_client, basic_dt, database_name, table_name, output_prefix): 42 | year, month, day, hour = (basic_dt.year, basic_dt.month, basic_dt.day, basic_dt.hour) 43 | 44 | tmp_table_name = '{table}_{year}{month:02}{day:02}{hour:02}'.format(table=table_name, 45 | year=year, month=month, day=day, hour=hour) 46 | 47 | output_location = '{}/alter_table_{}'.format(STAGING_OUTPUT_PREFIX, tmp_table_name) 48 | 49 | alter_table_stmt = '''ALTER TABLE {database}.{table_name} ADD if NOT EXISTS'''.format(database=database_name, 50 | table_name=table_name) 51 | 52 | partition_expr = '''PARTITION (year={year}, month={month}, day={day}, hour={hour}) LOCATION "{output_prefix}/year={year}/month={month:02}/day={day:02}/hour={hour:02}/"''' 53 | 54 | partition_expr_list = [] 55 | for i in (1, 0, -1): 56 | dt = basic_dt - datetime.timedelta(hours=i) 57 | year, month, day, hour = (dt.year, dt.month, dt.day, dt.hour) 58 | part_expr = partition_expr.format(year=year, month=month, day=day, hour=hour, output_prefix=output_prefix) 59 | partition_expr_list.append(part_expr) 60 | 61 | query = '{} {}'.format(alter_table_stmt, '\n'.join(partition_expr_list)) 62 | print('[INFO] QueryString:\n{}'.format(query), file=sys.stderr) 63 | print('[INFO] OutputLocation: {}'.format(output_location), file=sys.stderr) 64 | 65 | if DRY_RUN: 66 | print('[INFO] End of dry-run', file=sys.stderr) 67 | return 68 | 69 | response = athena_client.start_query_execution( 70 | QueryString=query, 71 | ResultConfiguration={ 72 | 'OutputLocation': output_location 73 | }, 74 | WorkGroup=WORK_GROUP 75 | ) 76 | print('[INFO] QueryExecutionId: {}'.format(response['QueryExecutionId']), file=sys.stderr) 77 | 78 | 79 | def run_drop_tmp_table(athena_client, basic_dt): 80 | year, month, day, hour = (basic_dt.year, basic_dt.month, basic_dt.day, basic_dt.hour) 81 | 82 | tmp_table_name = '{table}_{year}{month:02}{day:02}{hour:02}'.format(table=NEW_TABLE_NAME, 83 | year=year, month=month, day=day, hour=hour) 84 | 85 | output_location = '{}/tmp_{}'.format(STAGING_OUTPUT_PREFIX, tmp_table_name) 86 | query = 'DROP TABLE IF EXISTS {database}.tmp_{table_name}'.format(database=NEW_DATABASE, 87 | table_name=tmp_table_name) 88 | 89 | print('[INFO] QueryString:\n{}'.format(query), file=sys.stderr) 90 | print('[INFO] OutputLocation: {}'.format(output_location), file=sys.stderr) 91 | 92 | if DRY_RUN: 93 | print('[INFO] End of dry-run', file=sys.stderr) 94 | return 95 | 96 | response = athena_client.start_query_execution( 97 | QueryString=query, 98 | ResultConfiguration={ 99 | 'OutputLocation': output_location 100 | }, 101 | WorkGroup=WORK_GROUP 102 | ) 103 | print('[INFO] QueryExecutionId: {}'.format(response['QueryExecutionId']), file=sys.stderr) 104 | 105 | 106 | def run_ctas(athena_client, basic_dt): 107 | year, month, day, hour = (basic_dt.year, basic_dt.month, basic_dt.day, basic_dt.hour) 108 | 109 | new_table_name = '{table}_{year}{month:02}{day:02}{hour:02}'.format(table=NEW_TABLE_NAME, 110 | year=year, month=month, day=day, hour=hour) 111 | 112 | output_location = '{}/tmp_{}'.format(STAGING_OUTPUT_PREFIX, new_table_name) 113 | external_location = EXTERNAL_LOCATION_FMT.format(output_prefix=OUTPUT_PREFIX, 114 | year=year, month=month, day=day, hour=hour) 115 | 116 | query = CTAS_QUERY_FMT.format(new_database=NEW_DATABASE, new_table_name=new_table_name, 117 | old_database=OLD_DATABASE, old_table_name=OLD_TABLE_NAME, columns=COLUMN_NAMES, 118 | year=year, month=month, day=day, hour=hour, location=external_location) 119 | 120 | print('[INFO] QueryString:\n{}'.format(query), file=sys.stderr) 121 | print('[INFO] ExternalLocation: {}'.format(external_location), file=sys.stderr) 122 | print('[INFO] OutputLocation: {}'.format(output_location), file=sys.stderr) 123 | 124 | if DRY_RUN: 125 | print('[INFO] End of dry-run', file=sys.stderr) 126 | return 127 | 128 | response = athena_client.start_query_execution( 129 | QueryString=query, 130 | QueryExecutionContext={ 131 | 'Database': NEW_DATABASE 132 | }, 133 | ResultConfiguration={ 134 | 'OutputLocation': output_location 135 | }, 136 | WorkGroup=WORK_GROUP 137 | ) 138 | print('[INFO] QueryExecutionId: {}'.format(response['QueryExecutionId']), file=sys.stderr) 139 | 140 | 141 | def lambda_handler(event, context): 142 | event_dt = datetime.datetime.strptime(event['time'], "%Y-%m-%dT%H:%M:%SZ") 143 | prev_basic_dt, basic_dt = [event_dt - datetime.timedelta(hours=e) for e in (2, 1)] 144 | 145 | client = boto3.client('athena', region_name=AWS_REGION) 146 | run_drop_tmp_table(client, prev_basic_dt) 147 | 148 | if not DRY_RUN: 149 | print('[INFO] Wait for a few seconds until dropping old table', file=sys.stderr) 150 | time.sleep(10) 151 | 152 | run_alter_table_add_partition(client, basic_dt, 153 | database_name=OLD_DATABASE, 154 | table_name=OLD_TABLE_NAME, 155 | output_prefix=OLD_TABLE_LOCATION_PREFIX) 156 | 157 | if not DRY_RUN: 158 | print('[INFO] Wait for a few seconds until adding partitions to table: %s.%s' % (OLD_DATABASE, OLD_TABLE_NAME), file=sys.stderr) 159 | time.sleep(10) 160 | 161 | run_alter_table_add_partition(client, basic_dt, 162 | database_name=NEW_DATABASE, 163 | table_name=NEW_TABLE_NAME, 164 | output_prefix=OUTPUT_PREFIX) 165 | 166 | if not DRY_RUN: 167 | print('[INFO] Wait for a few seconds until adding partitions to table: %s.%s' % (NEW_DATABASE, NEW_TABLE_NAME), file=sys.stderr) 168 | time.sleep(10) 169 | 170 | run_ctas(client, basic_dt) 171 | 172 | 173 | if __name__ == '__main__': 174 | import argparse 175 | 176 | parser = argparse.ArgumentParser() 177 | parser.add_argument('-dt', '--basic-datetime', default=datetime.datetime.today().strftime('%Y-%m-%dT%H:05:00Z'), 178 | help='The scheduled event occurrence time ex) 2020-02-28T03:05:00Z') 179 | parser.add_argument('--region-name', default='us-east-1', 180 | help='aws region name') 181 | parser.add_argument('--old-database', default='mydatabase', 182 | help='aws athena source database name used by ctas query') 183 | parser.add_argument('--old-table-name', default='retail_trans_json', 184 | help='aws athena source table name used by ctas query') 185 | parser.add_argument('--new-database', default='mydatabase', 186 | help='aws athena target database name for merged files') 187 | parser.add_argument('--new-table-name', default='ctas_retail_trans_parquet', 188 | help='aws athena target table name for merged files') 189 | parser.add_argument('--work-group', default='primary', 190 | help='aws athena work group') 191 | parser.add_argument('--old-table-location-prefix', required=True, 192 | help='s3 path for aws athena source table') 193 | parser.add_argument('--output-prefix', required=True, 194 | help='s3 path for aws athena target table') 195 | parser.add_argument('--staging-output-prefix', required=True, 196 | help='s3 path for aws athena tmp table') 197 | parser.add_argument('--column-names', default='*', 198 | help='selectable column names of aws athena source table') 199 | parser.add_argument('--run', action='store_true', 200 | help='run ctas query') 201 | 202 | options = parser.parse_args() 203 | 204 | DRY_RUN = False if options.run else True 205 | AWS_REGION = options.region_name 206 | OLD_DATABASE = options.old_database 207 | OLD_TABLE_NAME= options.old_table_name 208 | NEW_DATABASE = options.new_database 209 | NEW_TABLE_NAME = options.new_table_name 210 | WORK_GROUP = options.work_group 211 | OLD_TABLE_LOCATION_PREFIX = options.old_table_location_prefix 212 | OUTPUT_PREFIX = options.output_prefix 213 | STAGING_OUTPUT_PREFIX = options.staging_output_prefix 214 | COLUMN_NAMES = options.column_names 215 | 216 | event = { 217 | "id": "cdc73f9d-aea9-11e3-9d5a-835b769c0d9c", 218 | "detail-type": "Scheduled Event", 219 | "source": "aws.events", 220 | "account": "123456789012", 221 | "time": options.basic_datetime, # ex) "2020-02-28T03:05:00Z" 222 | "region": AWS_REGION, # ex) "us-east-1" 223 | "resources": [ 224 | f"arn:aws:events:{AWS_REGION}:123456789012:rule/ExampleRule" 225 | ], 226 | "detail": {} 227 | } 228 | print('[DEBUG] event:\n{}'.format(event), file=sys.stderr) 229 | lambda_handler(event, {}) 230 | -------------------------------------------------------------------------------- /src/main/python/UpsertToES/upsert_to_es.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- encoding: utf-8 -*- 3 | # vim: tabstop=2 shiftwidth=2 softtabstop=2 expandtab 4 | 5 | import sys 6 | import json 7 | import os 8 | import base64 9 | import traceback 10 | import hashlib 11 | import datetime 12 | 13 | import boto3 14 | from opensearchpy import OpenSearch 15 | from opensearchpy import RequestsHttpConnection 16 | from requests_aws4auth import AWS4Auth 17 | 18 | ES_INDEX, ES_TYPE = (os.getenv('ES_INDEX', 'retail'), os.getenv('ES_TYPE', 'trans')) 19 | ES_HOST = os.getenv('ES_HOST') 20 | REQUIRED_FIELDS = [e for e in os.getenv('REQUIRED_FIELDS', '').split(',') if e] 21 | DATE_TYPE_FIELDS = [e for e in os.getenv('DATE_TYPE_FIELDS', '').split(',') if e] 22 | DATE_FORMAT = os.getenv('DATE_FORMAT', '%Y-%m-%d %H:%M:%S') 23 | 24 | AWS_REGION = os.getenv('REGION_NAME', 'us-east-1') 25 | 26 | session = boto3.Session(region_name=AWS_REGION) 27 | credentials = session.get_credentials() 28 | credentials = credentials.get_frozen_credentials() 29 | access_key, secret_key, session_token = (credentials.access_key, credentials.secret_key, credentials.token) 30 | 31 | aws_auth = AWS4Auth( 32 | access_key, 33 | secret_key, 34 | AWS_REGION, 35 | 'es', 36 | session_token=session_token 37 | ) 38 | 39 | ops_client = OpenSearch( 40 | hosts = [{'host': ES_HOST, 'port': 443}], 41 | http_auth=aws_auth, 42 | use_ssl=True, 43 | verify_certs=True, 44 | connection_class=RequestsHttpConnection 45 | ) 46 | print('[INFO] OpenSearch Service', json.dumps(ops_client.info(), indent=2), file=sys.stderr) 47 | 48 | 49 | def lambda_handler(event, context): 50 | import collections 51 | 52 | counter = collections.OrderedDict([('reads', 0), 53 | ('writes', 0), 54 | ('invalid', 0), 55 | ('index_errors', 0), 56 | ('errors', 0)]) 57 | 58 | doc_list = [] 59 | for record in event['Records']: 60 | try: 61 | counter['reads'] += 1 62 | payload = base64.b64decode(record['kinesis']['data']).decode('utf-8') 63 | json_data = json.loads(payload) 64 | 65 | if not any([json_data.get(k, None) for k in REQUIRED_FIELDS]): 66 | counter['invalid'] += 1 67 | continue 68 | 69 | doc_id = ':'.join([json_data.get(k, '') for k in REQUIRED_FIELDS if k]) 70 | json_data['doc_id'] = hashlib.md5(doc_id.encode('utf-8')).hexdigest()[:8] 71 | 72 | for k in DATE_TYPE_FIELDS: 73 | if k in json_data: 74 | dt = datetime.datetime.strptime(json_data[k], DATE_FORMAT) 75 | json_data[k] = dt.strftime("%Y-%m-%dT%H:%M:%SZ") 76 | 77 | # es_index_action_meta = {"index": {"_index": ES_INDEX, "_type": ES_TYPE, "_id": json_data['doc_id']}} 78 | es_index_action_meta = {"index": {"_index": ES_INDEX, "_id": json_data['doc_id']}} 79 | doc_list.append(es_index_action_meta) 80 | doc_list.append(json_data) 81 | 82 | counter['writes'] += 1 83 | except Exception as ex: 84 | counter['errors'] += 1 85 | traceback.print_exc() 86 | 87 | if doc_list: 88 | try: 89 | es_bulk_body = '\n'.join([json.dumps(e) for e in doc_list]) 90 | res = ops_client.bulk(body=es_bulk_body, index=ES_INDEX, refresh=True) 91 | except Exception as ex: 92 | counter['index_errors'] += 1 93 | traceback.print_exc() 94 | 95 | print('[INFO]', ', '.join(['{}={}'.format(k, v) for k, v in counter.items()]), file=sys.stderr) 96 | 97 | 98 | if __name__ == '__main__': 99 | kinesis_data = [ 100 | '''{"Invoice": "489434", "StockCode": "85048", "Description": "15CM CHRISTMAS GLASS BALL 20 LIGHTS", "Quantity": 12, "InvoiceDate": "2009-12-01 07:45:00", "Price": 6.95, "Customer_ID": "13085.0", "Country": "United Kingdom"}''', 101 | '''{"Invoice": "489435", "StockCode": "22350", "Description": "CAT BOWL ", "Quantity": 12, "InvoiceDate": "2009-12-01 07:46:00", "Price": 2.55, "Customer_ID": "13085.0", "Country": "United Kingdom"}''', 102 | '''{"Invoice": "489436", "StockCode": "48173C", "Description": "DOOR MAT BLACK FLOCK ", "Quantity": 10, "InvoiceDate": "2009-12-01 09:06:00", "Price": 5.95, "Customer_ID": "13078.0", "Country": "United Kingdom"}''', 103 | '''{"Invoice": "491970", "StockCode": "21218", "Description": "RED SPOTTY BISCUIT TIN", "Quantity": 2, "InvoiceDate": "2009-12-14 18:03:00", "Price": 8.65, "Customer_ID": "", "Country": "United Kingdom"}''', 104 | ] 105 | 106 | records = [{ 107 | "eventID": "shardId-000000000000:49545115243490985018280067714973144582180062593244200961", 108 | "eventVersion": "1.0", 109 | "kinesis": { 110 | "approximateArrivalTimestamp": 1428537600, 111 | "partitionKey": "partitionKey-1", 112 | "data": base64.b64encode(e.encode('utf-8')), 113 | "kinesisSchemaVersion": "1.0", 114 | "sequenceNumber": "49545115243490985018280067714973144582180062593244200961" 115 | }, 116 | "invokeIdentityArn": "arn:aws:iam::EXAMPLE", 117 | "eventName": "aws:kinesis:record", 118 | "eventSourceARN": "arn:aws:kinesis:EXAMPLE", 119 | "eventSource": "aws:kinesis", 120 | "awsRegion": "us-east-1" 121 | } for e in kinesis_data] 122 | event = {"Records": records} 123 | lambda_handler(event, {}) 124 | -------------------------------------------------------------------------------- /src/main/python/utils/gen_kinesis_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- encoding: utf-8 -*- 3 | # vim: tabstop=2 shiftwidth=2 softtabstop=2 expandtab 4 | 5 | import sys 6 | import json 7 | import argparse 8 | import string 9 | import traceback 10 | import random 11 | import time 12 | import typing 13 | import datetime 14 | 15 | import boto3 16 | 17 | import mimesis 18 | 19 | # Mimesis 5.0 supports Python 3.8, 3.9, and 3.10. 20 | # The Mimesis 4.1.3 is the last to support Python 3.6 and 3.7 21 | # For more information, see https://mimesis.name/en/latest/changelog.html#version-5-0-0 22 | assert mimesis.__version__ == '4.1.3' 23 | 24 | from mimesis import locales 25 | from mimesis.schema import Field, Schema 26 | from mimesis.providers.base import BaseProvider 27 | 28 | random.seed(47) 29 | 30 | 31 | class CustomDatetimeProvider(BaseProvider): 32 | class Meta: 33 | """Class for metadata.""" 34 | name = "custom_datetime" 35 | 36 | def __init__(self, seed=47) -> None: 37 | super().__init__(seed=seed) 38 | self.random = random.Random(seed) 39 | 40 | def formated_datetime(self, fmt='%Y-%m-%dT%H:%M:%SZ', lt_now=False) -> str: 41 | CURRENT_YEAR = datetime.datetime.now().year 42 | CURRENT_MONTH = datetime.datetime.now().month 43 | CURRENT_DAY = datetime.datetime.now().day 44 | CURRENT_HOUR = datetime.datetime.now().hour 45 | CURRENT_MINUTE = datetime.datetime.now().minute 46 | CURRENT_SECOND = datetime.datetime.now().second 47 | 48 | if lt_now: 49 | random_time = datetime.time( 50 | self.random.randint(0, CURRENT_HOUR), 51 | self.random.randint(0, max(0, CURRENT_MINUTE-1)), 52 | self.random.randint(0, max(0, CURRENT_SECOND-1)), 53 | self.random.randint(0, 999999) 54 | ) 55 | else: 56 | random_time = datetime.time( 57 | CURRENT_HOUR, 58 | CURRENT_MINUTE, 59 | self.random.randint(CURRENT_SECOND, 59), 60 | self.random.randint(0, 999999) 61 | ) 62 | 63 | datetime_obj = datetime.datetime.combine( 64 | date=datetime.date(CURRENT_YEAR, CURRENT_MONTH, CURRENT_DAY), 65 | time=random_time, 66 | ) 67 | 68 | return datetime_obj.strftime(fmt) 69 | 70 | 71 | def gen_records(options): 72 | _CURRENT_YEAR = datetime.datetime.now().year 73 | 74 | #XXX: For more information about synthetic data schema, see 75 | # https://github.com/aws-samples/aws-glue-streaming-etl-blog/blob/master/config/generate_data.py 76 | _ = Field(locale=locales.EN, providers=[CustomDatetimeProvider]) 77 | 78 | _schema = Schema(schema=lambda: { 79 | "Invoice": _("pin", mask='######'), 80 | "StockCode": f"{_('pin', mask='#####')}{_('choice', items=string.ascii_uppercase, length=1)}", 81 | "Description": ', '.join(_("words")), 82 | "Quantity": _("integer_number", start=1, end=10), 83 | "InvoiceDate": _("custom_datetime.formated_datetime", fmt="%Y-%m-%d %H:%M:%S", lt_now=True), 84 | "Price": _("float_number", start=0.1, end=100.0, precision=2), 85 | "Customer_ID": f"{_('pin', mask='#####')}", 86 | "Country": _("country") 87 | }) 88 | 89 | ret = [[f"{json.dumps(record)}\n"] for record in _schema.create(options.max_count)] 90 | return ret 91 | 92 | 93 | def put_records_to_firehose(client, options, records): 94 | MAX_RETRY_COUNT = 3 95 | 96 | for data in records: 97 | if options.dry_run: 98 | print(data) 99 | continue 100 | 101 | for _ in range(MAX_RETRY_COUNT): 102 | try: 103 | response = client.put_record( 104 | DeliveryStreamName=options.stream_name, 105 | Record={ 106 | 'Data': '{}\n'.format(data) 107 | } 108 | ) 109 | if options.verbose: 110 | print('[FIREHOSE]', response, file=sys.stderr) 111 | break 112 | except Exception as ex: 113 | traceback.print_exc() 114 | time.sleep(random.randint(1, 10)) 115 | else: 116 | raise RuntimeError('[ERROR] Failed to put_records into stream: {}'.format(options.stream_name)) 117 | 118 | 119 | def put_records_to_kinesis(client, options, records): 120 | MAX_RETRY_COUNT = 3 121 | 122 | payload_list = [] 123 | for data in records: 124 | partition_key = 'part-{:05}'.format(random.randint(1, 1024)) 125 | payload_list.append({'Data': data, 'PartitionKey': partition_key}) 126 | 127 | if options.dry_run: 128 | print(json.dumps(payload_list, ensure_ascii=False)) 129 | return 130 | 131 | for _ in range(MAX_RETRY_COUNT): 132 | try: 133 | response = client.put_records(Records=payload_list, StreamName=options.stream_name) 134 | if options.verbose: 135 | print('[KINESIS]', response, file=sys.stderr) 136 | break 137 | except Exception as ex: 138 | traceback.print_exc() 139 | time.sleep(random.randint(1, 10)) 140 | else: 141 | raise RuntimeError('[ERROR] Failed to put_records into stream: {}'.format(options.stream_name)) 142 | 143 | 144 | def main(): 145 | parser = argparse.ArgumentParser() 146 | 147 | parser.add_argument('--region-name', action='store', default='us-east-1', 148 | help='aws region name (default: us-east-1)') 149 | parser.add_argument('--service-name', required=True, choices=['kinesis', 'firehose', 'console']) 150 | parser.add_argument('--stream-name', help='The name of the stream to put the data record into.') 151 | parser.add_argument('--max-count', default=10, type=int, help='The max number of records to put.') 152 | parser.add_argument('--dry-run', action='store_true') 153 | parser.add_argument('--verbose', action='store_true', help='Show debug logs') 154 | 155 | options = parser.parse_args() 156 | COUNT_STEP = 10 if options.dry_run else 100 157 | 158 | client = boto3.client(options.service_name, region_name=options.region_name) if options.service_name != 'console' else None 159 | counter = 0 160 | for records in gen_records(options): 161 | if options.service_name == 'kinesis': 162 | put_records_to_kinesis(client, options, records) 163 | elif options.service_name == 'firehose': 164 | put_records_to_firehose(client, options, records) 165 | else: 166 | print('\n'.join([e for e in records])) 167 | 168 | counter += len(records) 169 | if counter % COUNT_STEP == 0: 170 | print('[INFO] {} records are processed'.format(counter), file=sys.stderr) 171 | 172 | time.sleep(random.choices([0.01, 0.03, 0.05, 0.07, 0.1])[-1]) 173 | print('[INFO] Total {} records are processed'.format(counter), file=sys.stderr) 174 | 175 | 176 | if __name__ == '__main__': 177 | main() 178 | -------------------------------------------------------------------------------- /src/main/python/utils/kinesis_consumer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- encoding: utf-8 -*- 3 | # vim: tabstop=2 shiftwidth=2 softtabstop=2 expandtab 4 | 5 | import sys 6 | import time 7 | import json 8 | import pprint 9 | import argparse 10 | 11 | import boto3 12 | 13 | SHARD_ITER_TYPE = ('TRIM_HORIZON', 'LATEST') 14 | 15 | def main(): 16 | parser = argparse.ArgumentParser() 17 | 18 | parser.add_argument('--region-name', action='store', default='us-east-1', help='region name') 19 | parser.add_argument('--stream-name', action='store', help='kinesis stream-name') 20 | parser.add_argument('--iter-type', choices=SHARD_ITER_TYPE, default='TRIM_HORIZON', 21 | help='kinesis stream shard iterator type: [{}]'.format(', '.join(SHARD_ITER_TYPE))) 22 | 23 | options = parser.parse_args() 24 | 25 | stream_name, shard_iter_type = options.stream_name, options.iter_type 26 | 27 | kinesis_client = boto3.client('kinesis', region_name=options.region_name) 28 | response = kinesis_client.describe_stream(StreamName=stream_name) 29 | shard_id = response['StreamDescription']['Shards'][0]['ShardId'] 30 | 31 | shard_iterator = kinesis_client.get_shard_iterator(StreamName=stream_name, 32 | ShardId=shard_id, 33 | ShardIteratorType=shard_iter_type) 34 | 35 | shard_iter = shard_iterator['ShardIterator'] 36 | record_response = kinesis_client.get_records(ShardIterator=shard_iter, Limit=123) 37 | print(record_response.get('Records', {})) 38 | 39 | while 'NextShardIterator' in record_response: 40 | record_response = kinesis_client.get_records(ShardIterator=record_response['NextShardIterator'], Limit=123) 41 | print(record_response.get('Records', {})) 42 | 43 | # wait for 5 seconds 44 | time.sleep(5) 45 | 46 | if __name__ == '__main__': 47 | main() 48 | -------------------------------------------------------------------------------- /vpc_us_east_1.yaml: -------------------------------------------------------------------------------- 1 | Resources: 2 | VpcStack0F17A1B7: 3 | Type: AWS::EC2::VPC 4 | Properties: 5 | CidrBlock: 10.0.0.0/16 6 | EnableDnsHostnames: true 7 | EnableDnsSupport: true 8 | InstanceTenancy: default 9 | Tags: 10 | - Key: Name 11 | Value: vpc/VpcStack 12 | Metadata: 13 | aws:cdk:path: vpc/VpcStack/Resource 14 | VpcStackPublicSubnet1Subnet3ACDE2AA: 15 | Type: AWS::EC2::Subnet 16 | Properties: 17 | VpcId: 18 | Ref: VpcStack0F17A1B7 19 | AvailabilityZone: us-east-1a 20 | CidrBlock: 10.0.0.0/20 21 | MapPublicIpOnLaunch: true 22 | Tags: 23 | - Key: aws-cdk:subnet-name 24 | Value: Public 25 | - Key: aws-cdk:subnet-type 26 | Value: Public 27 | - Key: Name 28 | Value: vpc/VpcStack/PublicSubnet1 29 | Metadata: 30 | aws:cdk:path: vpc/VpcStack/PublicSubnet1/Subnet 31 | VpcStackPublicSubnet1RouteTable4FE62862: 32 | Type: AWS::EC2::RouteTable 33 | Properties: 34 | VpcId: 35 | Ref: VpcStack0F17A1B7 36 | Tags: 37 | - Key: Name 38 | Value: vpc/VpcStack/PublicSubnet1 39 | Metadata: 40 | aws:cdk:path: vpc/VpcStack/PublicSubnet1/RouteTable 41 | VpcStackPublicSubnet1RouteTableAssociation3B798904: 42 | Type: AWS::EC2::SubnetRouteTableAssociation 43 | Properties: 44 | RouteTableId: 45 | Ref: VpcStackPublicSubnet1RouteTable4FE62862 46 | SubnetId: 47 | Ref: VpcStackPublicSubnet1Subnet3ACDE2AA 48 | Metadata: 49 | aws:cdk:path: vpc/VpcStack/PublicSubnet1/RouteTableAssociation 50 | VpcStackPublicSubnet1DefaultRoute4E14C6B4: 51 | Type: AWS::EC2::Route 52 | Properties: 53 | RouteTableId: 54 | Ref: VpcStackPublicSubnet1RouteTable4FE62862 55 | DestinationCidrBlock: 0.0.0.0/0 56 | GatewayId: 57 | Ref: VpcStackIGW299C5A78 58 | DependsOn: 59 | - VpcStackVPCGW565B132F 60 | Metadata: 61 | aws:cdk:path: vpc/VpcStack/PublicSubnet1/DefaultRoute 62 | VpcStackPublicSubnet1EIPA71BA69D: 63 | Type: AWS::EC2::EIP 64 | Properties: 65 | Domain: vpc 66 | Tags: 67 | - Key: Name 68 | Value: vpc/VpcStack/PublicSubnet1 69 | Metadata: 70 | aws:cdk:path: vpc/VpcStack/PublicSubnet1/EIP 71 | VpcStackPublicSubnet1NATGatewayB384AF90: 72 | Type: AWS::EC2::NatGateway 73 | Properties: 74 | SubnetId: 75 | Ref: VpcStackPublicSubnet1Subnet3ACDE2AA 76 | AllocationId: 77 | Fn::GetAtt: 78 | - VpcStackPublicSubnet1EIPA71BA69D 79 | - AllocationId 80 | Tags: 81 | - Key: Name 82 | Value: vpc/VpcStack/PublicSubnet1 83 | DependsOn: 84 | - VpcStackPublicSubnet1DefaultRoute4E14C6B4 85 | - VpcStackPublicSubnet1RouteTableAssociation3B798904 86 | Metadata: 87 | aws:cdk:path: vpc/VpcStack/PublicSubnet1/NATGateway 88 | VpcStackPublicSubnet2Subnet1F6A08E8: 89 | Type: AWS::EC2::Subnet 90 | Properties: 91 | VpcId: 92 | Ref: VpcStack0F17A1B7 93 | AvailabilityZone: us-east-1b 94 | CidrBlock: 10.0.16.0/20 95 | MapPublicIpOnLaunch: true 96 | Tags: 97 | - Key: aws-cdk:subnet-name 98 | Value: Public 99 | - Key: aws-cdk:subnet-type 100 | Value: Public 101 | - Key: Name 102 | Value: vpc/VpcStack/PublicSubnet2 103 | Metadata: 104 | aws:cdk:path: vpc/VpcStack/PublicSubnet2/Subnet 105 | VpcStackPublicSubnet2RouteTable5DDEE0F5: 106 | Type: AWS::EC2::RouteTable 107 | Properties: 108 | VpcId: 109 | Ref: VpcStack0F17A1B7 110 | Tags: 111 | - Key: Name 112 | Value: vpc/VpcStack/PublicSubnet2 113 | Metadata: 114 | aws:cdk:path: vpc/VpcStack/PublicSubnet2/RouteTable 115 | VpcStackPublicSubnet2RouteTableAssociationFF440051: 116 | Type: AWS::EC2::SubnetRouteTableAssociation 117 | Properties: 118 | RouteTableId: 119 | Ref: VpcStackPublicSubnet2RouteTable5DDEE0F5 120 | SubnetId: 121 | Ref: VpcStackPublicSubnet2Subnet1F6A08E8 122 | Metadata: 123 | aws:cdk:path: vpc/VpcStack/PublicSubnet2/RouteTableAssociation 124 | VpcStackPublicSubnet2DefaultRoute8B9803F6: 125 | Type: AWS::EC2::Route 126 | Properties: 127 | RouteTableId: 128 | Ref: VpcStackPublicSubnet2RouteTable5DDEE0F5 129 | DestinationCidrBlock: 0.0.0.0/0 130 | GatewayId: 131 | Ref: VpcStackIGW299C5A78 132 | DependsOn: 133 | - VpcStackVPCGW565B132F 134 | Metadata: 135 | aws:cdk:path: vpc/VpcStack/PublicSubnet2/DefaultRoute 136 | VpcStackPublicSubnet2EIPBE670D17: 137 | Type: AWS::EC2::EIP 138 | Properties: 139 | Domain: vpc 140 | Tags: 141 | - Key: Name 142 | Value: vpc/VpcStack/PublicSubnet2 143 | Metadata: 144 | aws:cdk:path: vpc/VpcStack/PublicSubnet2/EIP 145 | VpcStackPublicSubnet2NATGateway8FDE910A: 146 | Type: AWS::EC2::NatGateway 147 | Properties: 148 | SubnetId: 149 | Ref: VpcStackPublicSubnet2Subnet1F6A08E8 150 | AllocationId: 151 | Fn::GetAtt: 152 | - VpcStackPublicSubnet2EIPBE670D17 153 | - AllocationId 154 | Tags: 155 | - Key: Name 156 | Value: vpc/VpcStack/PublicSubnet2 157 | DependsOn: 158 | - VpcStackPublicSubnet2DefaultRoute8B9803F6 159 | - VpcStackPublicSubnet2RouteTableAssociationFF440051 160 | Metadata: 161 | aws:cdk:path: vpc/VpcStack/PublicSubnet2/NATGateway 162 | VpcStackPublicSubnet3Subnet290057FA: 163 | Type: AWS::EC2::Subnet 164 | Properties: 165 | VpcId: 166 | Ref: VpcStack0F17A1B7 167 | AvailabilityZone: us-east-1c 168 | CidrBlock: 10.0.32.0/20 169 | MapPublicIpOnLaunch: true 170 | Tags: 171 | - Key: aws-cdk:subnet-name 172 | Value: Public 173 | - Key: aws-cdk:subnet-type 174 | Value: Public 175 | - Key: Name 176 | Value: vpc/VpcStack/PublicSubnet3 177 | Metadata: 178 | aws:cdk:path: vpc/VpcStack/PublicSubnet3/Subnet 179 | VpcStackPublicSubnet3RouteTableFEDDCBC0: 180 | Type: AWS::EC2::RouteTable 181 | Properties: 182 | VpcId: 183 | Ref: VpcStack0F17A1B7 184 | Tags: 185 | - Key: Name 186 | Value: vpc/VpcStack/PublicSubnet3 187 | Metadata: 188 | aws:cdk:path: vpc/VpcStack/PublicSubnet3/RouteTable 189 | VpcStackPublicSubnet3RouteTableAssociationF0D2CAD0: 190 | Type: AWS::EC2::SubnetRouteTableAssociation 191 | Properties: 192 | RouteTableId: 193 | Ref: VpcStackPublicSubnet3RouteTableFEDDCBC0 194 | SubnetId: 195 | Ref: VpcStackPublicSubnet3Subnet290057FA 196 | Metadata: 197 | aws:cdk:path: vpc/VpcStack/PublicSubnet3/RouteTableAssociation 198 | VpcStackPublicSubnet3DefaultRoute8F939BA2: 199 | Type: AWS::EC2::Route 200 | Properties: 201 | RouteTableId: 202 | Ref: VpcStackPublicSubnet3RouteTableFEDDCBC0 203 | DestinationCidrBlock: 0.0.0.0/0 204 | GatewayId: 205 | Ref: VpcStackIGW299C5A78 206 | DependsOn: 207 | - VpcStackVPCGW565B132F 208 | Metadata: 209 | aws:cdk:path: vpc/VpcStack/PublicSubnet3/DefaultRoute 210 | VpcStackPublicSubnet3EIP0521B737: 211 | Type: AWS::EC2::EIP 212 | Properties: 213 | Domain: vpc 214 | Tags: 215 | - Key: Name 216 | Value: vpc/VpcStack/PublicSubnet3 217 | Metadata: 218 | aws:cdk:path: vpc/VpcStack/PublicSubnet3/EIP 219 | VpcStackPublicSubnet3NATGatewayB101FD95: 220 | Type: AWS::EC2::NatGateway 221 | Properties: 222 | SubnetId: 223 | Ref: VpcStackPublicSubnet3Subnet290057FA 224 | AllocationId: 225 | Fn::GetAtt: 226 | - VpcStackPublicSubnet3EIP0521B737 227 | - AllocationId 228 | Tags: 229 | - Key: Name 230 | Value: vpc/VpcStack/PublicSubnet3 231 | DependsOn: 232 | - VpcStackPublicSubnet3DefaultRoute8F939BA2 233 | - VpcStackPublicSubnet3RouteTableAssociationF0D2CAD0 234 | Metadata: 235 | aws:cdk:path: vpc/VpcStack/PublicSubnet3/NATGateway 236 | VpcStackPrivateSubnet1SubnetECAA9AC8: 237 | Type: AWS::EC2::Subnet 238 | Properties: 239 | VpcId: 240 | Ref: VpcStack0F17A1B7 241 | AvailabilityZone: us-east-1a 242 | CidrBlock: 10.0.48.0/20 243 | MapPublicIpOnLaunch: false 244 | Tags: 245 | - Key: aws-cdk:subnet-name 246 | Value: Private 247 | - Key: aws-cdk:subnet-type 248 | Value: Private 249 | - Key: Name 250 | Value: vpc/VpcStack/PrivateSubnet1 251 | Metadata: 252 | aws:cdk:path: vpc/VpcStack/PrivateSubnet1/Subnet 253 | VpcStackPrivateSubnet1RouteTable25FDF5F8: 254 | Type: AWS::EC2::RouteTable 255 | Properties: 256 | VpcId: 257 | Ref: VpcStack0F17A1B7 258 | Tags: 259 | - Key: Name 260 | Value: vpc/VpcStack/PrivateSubnet1 261 | Metadata: 262 | aws:cdk:path: vpc/VpcStack/PrivateSubnet1/RouteTable 263 | VpcStackPrivateSubnet1RouteTableAssociation0FFF3070: 264 | Type: AWS::EC2::SubnetRouteTableAssociation 265 | Properties: 266 | RouteTableId: 267 | Ref: VpcStackPrivateSubnet1RouteTable25FDF5F8 268 | SubnetId: 269 | Ref: VpcStackPrivateSubnet1SubnetECAA9AC8 270 | Metadata: 271 | aws:cdk:path: vpc/VpcStack/PrivateSubnet1/RouteTableAssociation 272 | VpcStackPrivateSubnet1DefaultRoute983E1C4C: 273 | Type: AWS::EC2::Route 274 | Properties: 275 | RouteTableId: 276 | Ref: VpcStackPrivateSubnet1RouteTable25FDF5F8 277 | DestinationCidrBlock: 0.0.0.0/0 278 | NatGatewayId: 279 | Ref: VpcStackPublicSubnet1NATGatewayB384AF90 280 | Metadata: 281 | aws:cdk:path: vpc/VpcStack/PrivateSubnet1/DefaultRoute 282 | VpcStackPrivateSubnet2SubnetFBEE5372: 283 | Type: AWS::EC2::Subnet 284 | Properties: 285 | VpcId: 286 | Ref: VpcStack0F17A1B7 287 | AvailabilityZone: us-east-1b 288 | CidrBlock: 10.0.64.0/20 289 | MapPublicIpOnLaunch: false 290 | Tags: 291 | - Key: aws-cdk:subnet-name 292 | Value: Private 293 | - Key: aws-cdk:subnet-type 294 | Value: Private 295 | - Key: Name 296 | Value: vpc/VpcStack/PrivateSubnet2 297 | Metadata: 298 | aws:cdk:path: vpc/VpcStack/PrivateSubnet2/Subnet 299 | VpcStackPrivateSubnet2RouteTable83187A33: 300 | Type: AWS::EC2::RouteTable 301 | Properties: 302 | VpcId: 303 | Ref: VpcStack0F17A1B7 304 | Tags: 305 | - Key: Name 306 | Value: vpc/VpcStack/PrivateSubnet2 307 | Metadata: 308 | aws:cdk:path: vpc/VpcStack/PrivateSubnet2/RouteTable 309 | VpcStackPrivateSubnet2RouteTableAssociation5B7FAA59: 310 | Type: AWS::EC2::SubnetRouteTableAssociation 311 | Properties: 312 | RouteTableId: 313 | Ref: VpcStackPrivateSubnet2RouteTable83187A33 314 | SubnetId: 315 | Ref: VpcStackPrivateSubnet2SubnetFBEE5372 316 | Metadata: 317 | aws:cdk:path: vpc/VpcStack/PrivateSubnet2/RouteTableAssociation 318 | VpcStackPrivateSubnet2DefaultRoute8D3C16A0: 319 | Type: AWS::EC2::Route 320 | Properties: 321 | RouteTableId: 322 | Ref: VpcStackPrivateSubnet2RouteTable83187A33 323 | DestinationCidrBlock: 0.0.0.0/0 324 | NatGatewayId: 325 | Ref: VpcStackPublicSubnet2NATGateway8FDE910A 326 | Metadata: 327 | aws:cdk:path: vpc/VpcStack/PrivateSubnet2/DefaultRoute 328 | VpcStackPrivateSubnet3SubnetB7C69E7D: 329 | Type: AWS::EC2::Subnet 330 | Properties: 331 | VpcId: 332 | Ref: VpcStack0F17A1B7 333 | AvailabilityZone: us-east-1c 334 | CidrBlock: 10.0.80.0/20 335 | MapPublicIpOnLaunch: false 336 | Tags: 337 | - Key: aws-cdk:subnet-name 338 | Value: Private 339 | - Key: aws-cdk:subnet-type 340 | Value: Private 341 | - Key: Name 342 | Value: vpc/VpcStack/PrivateSubnet3 343 | Metadata: 344 | aws:cdk:path: vpc/VpcStack/PrivateSubnet3/Subnet 345 | VpcStackPrivateSubnet3RouteTable9C543BD3: 346 | Type: AWS::EC2::RouteTable 347 | Properties: 348 | VpcId: 349 | Ref: VpcStack0F17A1B7 350 | Tags: 351 | - Key: Name 352 | Value: vpc/VpcStack/PrivateSubnet3 353 | Metadata: 354 | aws:cdk:path: vpc/VpcStack/PrivateSubnet3/RouteTable 355 | VpcStackPrivateSubnet3RouteTableAssociation7E077F1D: 356 | Type: AWS::EC2::SubnetRouteTableAssociation 357 | Properties: 358 | RouteTableId: 359 | Ref: VpcStackPrivateSubnet3RouteTable9C543BD3 360 | SubnetId: 361 | Ref: VpcStackPrivateSubnet3SubnetB7C69E7D 362 | Metadata: 363 | aws:cdk:path: vpc/VpcStack/PrivateSubnet3/RouteTableAssociation 364 | VpcStackPrivateSubnet3DefaultRoute33B85BED: 365 | Type: AWS::EC2::Route 366 | Properties: 367 | RouteTableId: 368 | Ref: VpcStackPrivateSubnet3RouteTable9C543BD3 369 | DestinationCidrBlock: 0.0.0.0/0 370 | NatGatewayId: 371 | Ref: VpcStackPublicSubnet3NATGatewayB101FD95 372 | Metadata: 373 | aws:cdk:path: vpc/VpcStack/PrivateSubnet3/DefaultRoute 374 | VpcStackIGW299C5A78: 375 | Type: AWS::EC2::InternetGateway 376 | Properties: 377 | Tags: 378 | - Key: Name 379 | Value: vpc/VpcStack 380 | Metadata: 381 | aws:cdk:path: vpc/VpcStack/IGW 382 | VpcStackVPCGW565B132F: 383 | Type: AWS::EC2::VPCGatewayAttachment 384 | Properties: 385 | VpcId: 386 | Ref: VpcStack0F17A1B7 387 | InternetGatewayId: 388 | Ref: VpcStackIGW299C5A78 389 | Metadata: 390 | aws:cdk:path: vpc/VpcStack/VPCGW 391 | VpcStackS348B4C9B9: 392 | Type: AWS::EC2::VPCEndpoint 393 | Properties: 394 | ServiceName: 395 | Fn::Join: 396 | - "" 397 | - - com.amazonaws. 398 | - Ref: AWS::Region 399 | - .s3 400 | VpcId: 401 | Ref: VpcStack0F17A1B7 402 | RouteTableIds: 403 | - Ref: VpcStackPrivateSubnet1RouteTable25FDF5F8 404 | - Ref: VpcStackPrivateSubnet2RouteTable83187A33 405 | - Ref: VpcStackPrivateSubnet3RouteTable9C543BD3 406 | - Ref: VpcStackPublicSubnet1RouteTable4FE62862 407 | - Ref: VpcStackPublicSubnet2RouteTable5DDEE0F5 408 | - Ref: VpcStackPublicSubnet3RouteTableFEDDCBC0 409 | VpcEndpointType: Gateway 410 | Metadata: 411 | aws:cdk:path: vpc/VpcStack/S3/Resource 412 | -------------------------------------------------------------------------------- /vpc_us_east_2.yaml: -------------------------------------------------------------------------------- 1 | Resources: 2 | VpcStack0F17A1B7: 3 | Type: AWS::EC2::VPC 4 | Properties: 5 | CidrBlock: 10.0.0.0/16 6 | EnableDnsHostnames: true 7 | EnableDnsSupport: true 8 | InstanceTenancy: default 9 | Tags: 10 | - Key: Name 11 | Value: vpc/VpcStack 12 | Metadata: 13 | aws:cdk:path: vpc/VpcStack/Resource 14 | VpcStackPublicSubnet1Subnet3ACDE2AA: 15 | Type: AWS::EC2::Subnet 16 | Properties: 17 | VpcId: 18 | Ref: VpcStack0F17A1B7 19 | AvailabilityZone: us-east-2a 20 | CidrBlock: 10.0.0.0/20 21 | MapPublicIpOnLaunch: true 22 | Tags: 23 | - Key: aws-cdk:subnet-name 24 | Value: Public 25 | - Key: aws-cdk:subnet-type 26 | Value: Public 27 | - Key: Name 28 | Value: vpc/VpcStack/PublicSubnet1 29 | Metadata: 30 | aws:cdk:path: vpc/VpcStack/PublicSubnet1/Subnet 31 | VpcStackPublicSubnet1RouteTable4FE62862: 32 | Type: AWS::EC2::RouteTable 33 | Properties: 34 | VpcId: 35 | Ref: VpcStack0F17A1B7 36 | Tags: 37 | - Key: Name 38 | Value: vpc/VpcStack/PublicSubnet1 39 | Metadata: 40 | aws:cdk:path: vpc/VpcStack/PublicSubnet1/RouteTable 41 | VpcStackPublicSubnet1RouteTableAssociation3B798904: 42 | Type: AWS::EC2::SubnetRouteTableAssociation 43 | Properties: 44 | RouteTableId: 45 | Ref: VpcStackPublicSubnet1RouteTable4FE62862 46 | SubnetId: 47 | Ref: VpcStackPublicSubnet1Subnet3ACDE2AA 48 | Metadata: 49 | aws:cdk:path: vpc/VpcStack/PublicSubnet1/RouteTableAssociation 50 | VpcStackPublicSubnet1DefaultRoute4E14C6B4: 51 | Type: AWS::EC2::Route 52 | Properties: 53 | RouteTableId: 54 | Ref: VpcStackPublicSubnet1RouteTable4FE62862 55 | DestinationCidrBlock: 0.0.0.0/0 56 | GatewayId: 57 | Ref: VpcStackIGW299C5A78 58 | DependsOn: 59 | - VpcStackVPCGW565B132F 60 | Metadata: 61 | aws:cdk:path: vpc/VpcStack/PublicSubnet1/DefaultRoute 62 | VpcStackPublicSubnet1EIPA71BA69D: 63 | Type: AWS::EC2::EIP 64 | Properties: 65 | Domain: vpc 66 | Tags: 67 | - Key: Name 68 | Value: vpc/VpcStack/PublicSubnet1 69 | Metadata: 70 | aws:cdk:path: vpc/VpcStack/PublicSubnet1/EIP 71 | VpcStackPublicSubnet1NATGatewayB384AF90: 72 | Type: AWS::EC2::NatGateway 73 | Properties: 74 | SubnetId: 75 | Ref: VpcStackPublicSubnet1Subnet3ACDE2AA 76 | AllocationId: 77 | Fn::GetAtt: 78 | - VpcStackPublicSubnet1EIPA71BA69D 79 | - AllocationId 80 | Tags: 81 | - Key: Name 82 | Value: vpc/VpcStack/PublicSubnet1 83 | DependsOn: 84 | - VpcStackPublicSubnet1DefaultRoute4E14C6B4 85 | - VpcStackPublicSubnet1RouteTableAssociation3B798904 86 | Metadata: 87 | aws:cdk:path: vpc/VpcStack/PublicSubnet1/NATGateway 88 | VpcStackPublicSubnet2Subnet1F6A08E8: 89 | Type: AWS::EC2::Subnet 90 | Properties: 91 | VpcId: 92 | Ref: VpcStack0F17A1B7 93 | AvailabilityZone: us-east-2b 94 | CidrBlock: 10.0.16.0/20 95 | MapPublicIpOnLaunch: true 96 | Tags: 97 | - Key: aws-cdk:subnet-name 98 | Value: Public 99 | - Key: aws-cdk:subnet-type 100 | Value: Public 101 | - Key: Name 102 | Value: vpc/VpcStack/PublicSubnet2 103 | Metadata: 104 | aws:cdk:path: vpc/VpcStack/PublicSubnet2/Subnet 105 | VpcStackPublicSubnet2RouteTable5DDEE0F5: 106 | Type: AWS::EC2::RouteTable 107 | Properties: 108 | VpcId: 109 | Ref: VpcStack0F17A1B7 110 | Tags: 111 | - Key: Name 112 | Value: vpc/VpcStack/PublicSubnet2 113 | Metadata: 114 | aws:cdk:path: vpc/VpcStack/PublicSubnet2/RouteTable 115 | VpcStackPublicSubnet2RouteTableAssociationFF440051: 116 | Type: AWS::EC2::SubnetRouteTableAssociation 117 | Properties: 118 | RouteTableId: 119 | Ref: VpcStackPublicSubnet2RouteTable5DDEE0F5 120 | SubnetId: 121 | Ref: VpcStackPublicSubnet2Subnet1F6A08E8 122 | Metadata: 123 | aws:cdk:path: vpc/VpcStack/PublicSubnet2/RouteTableAssociation 124 | VpcStackPublicSubnet2DefaultRoute8B9803F6: 125 | Type: AWS::EC2::Route 126 | Properties: 127 | RouteTableId: 128 | Ref: VpcStackPublicSubnet2RouteTable5DDEE0F5 129 | DestinationCidrBlock: 0.0.0.0/0 130 | GatewayId: 131 | Ref: VpcStackIGW299C5A78 132 | DependsOn: 133 | - VpcStackVPCGW565B132F 134 | Metadata: 135 | aws:cdk:path: vpc/VpcStack/PublicSubnet2/DefaultRoute 136 | VpcStackPublicSubnet2EIPBE670D17: 137 | Type: AWS::EC2::EIP 138 | Properties: 139 | Domain: vpc 140 | Tags: 141 | - Key: Name 142 | Value: vpc/VpcStack/PublicSubnet2 143 | Metadata: 144 | aws:cdk:path: vpc/VpcStack/PublicSubnet2/EIP 145 | VpcStackPublicSubnet2NATGateway8FDE910A: 146 | Type: AWS::EC2::NatGateway 147 | Properties: 148 | SubnetId: 149 | Ref: VpcStackPublicSubnet2Subnet1F6A08E8 150 | AllocationId: 151 | Fn::GetAtt: 152 | - VpcStackPublicSubnet2EIPBE670D17 153 | - AllocationId 154 | Tags: 155 | - Key: Name 156 | Value: vpc/VpcStack/PublicSubnet2 157 | DependsOn: 158 | - VpcStackPublicSubnet2DefaultRoute8B9803F6 159 | - VpcStackPublicSubnet2RouteTableAssociationFF440051 160 | Metadata: 161 | aws:cdk:path: vpc/VpcStack/PublicSubnet2/NATGateway 162 | VpcStackPublicSubnet3Subnet290057FA: 163 | Type: AWS::EC2::Subnet 164 | Properties: 165 | VpcId: 166 | Ref: VpcStack0F17A1B7 167 | AvailabilityZone: us-east-2c 168 | CidrBlock: 10.0.32.0/20 169 | MapPublicIpOnLaunch: true 170 | Tags: 171 | - Key: aws-cdk:subnet-name 172 | Value: Public 173 | - Key: aws-cdk:subnet-type 174 | Value: Public 175 | - Key: Name 176 | Value: vpc/VpcStack/PublicSubnet3 177 | Metadata: 178 | aws:cdk:path: vpc/VpcStack/PublicSubnet3/Subnet 179 | VpcStackPublicSubnet3RouteTableFEDDCBC0: 180 | Type: AWS::EC2::RouteTable 181 | Properties: 182 | VpcId: 183 | Ref: VpcStack0F17A1B7 184 | Tags: 185 | - Key: Name 186 | Value: vpc/VpcStack/PublicSubnet3 187 | Metadata: 188 | aws:cdk:path: vpc/VpcStack/PublicSubnet3/RouteTable 189 | VpcStackPublicSubnet3RouteTableAssociationF0D2CAD0: 190 | Type: AWS::EC2::SubnetRouteTableAssociation 191 | Properties: 192 | RouteTableId: 193 | Ref: VpcStackPublicSubnet3RouteTableFEDDCBC0 194 | SubnetId: 195 | Ref: VpcStackPublicSubnet3Subnet290057FA 196 | Metadata: 197 | aws:cdk:path: vpc/VpcStack/PublicSubnet3/RouteTableAssociation 198 | VpcStackPublicSubnet3DefaultRoute8F939BA2: 199 | Type: AWS::EC2::Route 200 | Properties: 201 | RouteTableId: 202 | Ref: VpcStackPublicSubnet3RouteTableFEDDCBC0 203 | DestinationCidrBlock: 0.0.0.0/0 204 | GatewayId: 205 | Ref: VpcStackIGW299C5A78 206 | DependsOn: 207 | - VpcStackVPCGW565B132F 208 | Metadata: 209 | aws:cdk:path: vpc/VpcStack/PublicSubnet3/DefaultRoute 210 | VpcStackPublicSubnet3EIP0521B737: 211 | Type: AWS::EC2::EIP 212 | Properties: 213 | Domain: vpc 214 | Tags: 215 | - Key: Name 216 | Value: vpc/VpcStack/PublicSubnet3 217 | Metadata: 218 | aws:cdk:path: vpc/VpcStack/PublicSubnet3/EIP 219 | VpcStackPublicSubnet3NATGatewayB101FD95: 220 | Type: AWS::EC2::NatGateway 221 | Properties: 222 | SubnetId: 223 | Ref: VpcStackPublicSubnet3Subnet290057FA 224 | AllocationId: 225 | Fn::GetAtt: 226 | - VpcStackPublicSubnet3EIP0521B737 227 | - AllocationId 228 | Tags: 229 | - Key: Name 230 | Value: vpc/VpcStack/PublicSubnet3 231 | DependsOn: 232 | - VpcStackPublicSubnet3DefaultRoute8F939BA2 233 | - VpcStackPublicSubnet3RouteTableAssociationF0D2CAD0 234 | Metadata: 235 | aws:cdk:path: vpc/VpcStack/PublicSubnet3/NATGateway 236 | VpcStackPrivateSubnet1SubnetECAA9AC8: 237 | Type: AWS::EC2::Subnet 238 | Properties: 239 | VpcId: 240 | Ref: VpcStack0F17A1B7 241 | AvailabilityZone: us-east-2a 242 | CidrBlock: 10.0.48.0/20 243 | MapPublicIpOnLaunch: false 244 | Tags: 245 | - Key: aws-cdk:subnet-name 246 | Value: Private 247 | - Key: aws-cdk:subnet-type 248 | Value: Private 249 | - Key: Name 250 | Value: vpc/VpcStack/PrivateSubnet1 251 | Metadata: 252 | aws:cdk:path: vpc/VpcStack/PrivateSubnet1/Subnet 253 | VpcStackPrivateSubnet1RouteTable25FDF5F8: 254 | Type: AWS::EC2::RouteTable 255 | Properties: 256 | VpcId: 257 | Ref: VpcStack0F17A1B7 258 | Tags: 259 | - Key: Name 260 | Value: vpc/VpcStack/PrivateSubnet1 261 | Metadata: 262 | aws:cdk:path: vpc/VpcStack/PrivateSubnet1/RouteTable 263 | VpcStackPrivateSubnet1RouteTableAssociation0FFF3070: 264 | Type: AWS::EC2::SubnetRouteTableAssociation 265 | Properties: 266 | RouteTableId: 267 | Ref: VpcStackPrivateSubnet1RouteTable25FDF5F8 268 | SubnetId: 269 | Ref: VpcStackPrivateSubnet1SubnetECAA9AC8 270 | Metadata: 271 | aws:cdk:path: vpc/VpcStack/PrivateSubnet1/RouteTableAssociation 272 | VpcStackPrivateSubnet1DefaultRoute983E1C4C: 273 | Type: AWS::EC2::Route 274 | Properties: 275 | RouteTableId: 276 | Ref: VpcStackPrivateSubnet1RouteTable25FDF5F8 277 | DestinationCidrBlock: 0.0.0.0/0 278 | NatGatewayId: 279 | Ref: VpcStackPublicSubnet1NATGatewayB384AF90 280 | Metadata: 281 | aws:cdk:path: vpc/VpcStack/PrivateSubnet1/DefaultRoute 282 | VpcStackPrivateSubnet2SubnetFBEE5372: 283 | Type: AWS::EC2::Subnet 284 | Properties: 285 | VpcId: 286 | Ref: VpcStack0F17A1B7 287 | AvailabilityZone: us-east-2b 288 | CidrBlock: 10.0.64.0/20 289 | MapPublicIpOnLaunch: false 290 | Tags: 291 | - Key: aws-cdk:subnet-name 292 | Value: Private 293 | - Key: aws-cdk:subnet-type 294 | Value: Private 295 | - Key: Name 296 | Value: vpc/VpcStack/PrivateSubnet2 297 | Metadata: 298 | aws:cdk:path: vpc/VpcStack/PrivateSubnet2/Subnet 299 | VpcStackPrivateSubnet2RouteTable83187A33: 300 | Type: AWS::EC2::RouteTable 301 | Properties: 302 | VpcId: 303 | Ref: VpcStack0F17A1B7 304 | Tags: 305 | - Key: Name 306 | Value: vpc/VpcStack/PrivateSubnet2 307 | Metadata: 308 | aws:cdk:path: vpc/VpcStack/PrivateSubnet2/RouteTable 309 | VpcStackPrivateSubnet2RouteTableAssociation5B7FAA59: 310 | Type: AWS::EC2::SubnetRouteTableAssociation 311 | Properties: 312 | RouteTableId: 313 | Ref: VpcStackPrivateSubnet2RouteTable83187A33 314 | SubnetId: 315 | Ref: VpcStackPrivateSubnet2SubnetFBEE5372 316 | Metadata: 317 | aws:cdk:path: vpc/VpcStack/PrivateSubnet2/RouteTableAssociation 318 | VpcStackPrivateSubnet2DefaultRoute8D3C16A0: 319 | Type: AWS::EC2::Route 320 | Properties: 321 | RouteTableId: 322 | Ref: VpcStackPrivateSubnet2RouteTable83187A33 323 | DestinationCidrBlock: 0.0.0.0/0 324 | NatGatewayId: 325 | Ref: VpcStackPublicSubnet2NATGateway8FDE910A 326 | Metadata: 327 | aws:cdk:path: vpc/VpcStack/PrivateSubnet2/DefaultRoute 328 | VpcStackPrivateSubnet3SubnetB7C69E7D: 329 | Type: AWS::EC2::Subnet 330 | Properties: 331 | VpcId: 332 | Ref: VpcStack0F17A1B7 333 | AvailabilityZone: us-east-2c 334 | CidrBlock: 10.0.80.0/20 335 | MapPublicIpOnLaunch: false 336 | Tags: 337 | - Key: aws-cdk:subnet-name 338 | Value: Private 339 | - Key: aws-cdk:subnet-type 340 | Value: Private 341 | - Key: Name 342 | Value: vpc/VpcStack/PrivateSubnet3 343 | Metadata: 344 | aws:cdk:path: vpc/VpcStack/PrivateSubnet3/Subnet 345 | VpcStackPrivateSubnet3RouteTable9C543BD3: 346 | Type: AWS::EC2::RouteTable 347 | Properties: 348 | VpcId: 349 | Ref: VpcStack0F17A1B7 350 | Tags: 351 | - Key: Name 352 | Value: vpc/VpcStack/PrivateSubnet3 353 | Metadata: 354 | aws:cdk:path: vpc/VpcStack/PrivateSubnet3/RouteTable 355 | VpcStackPrivateSubnet3RouteTableAssociation7E077F1D: 356 | Type: AWS::EC2::SubnetRouteTableAssociation 357 | Properties: 358 | RouteTableId: 359 | Ref: VpcStackPrivateSubnet3RouteTable9C543BD3 360 | SubnetId: 361 | Ref: VpcStackPrivateSubnet3SubnetB7C69E7D 362 | Metadata: 363 | aws:cdk:path: vpc/VpcStack/PrivateSubnet3/RouteTableAssociation 364 | VpcStackPrivateSubnet3DefaultRoute33B85BED: 365 | Type: AWS::EC2::Route 366 | Properties: 367 | RouteTableId: 368 | Ref: VpcStackPrivateSubnet3RouteTable9C543BD3 369 | DestinationCidrBlock: 0.0.0.0/0 370 | NatGatewayId: 371 | Ref: VpcStackPublicSubnet3NATGatewayB101FD95 372 | Metadata: 373 | aws:cdk:path: vpc/VpcStack/PrivateSubnet3/DefaultRoute 374 | VpcStackIGW299C5A78: 375 | Type: AWS::EC2::InternetGateway 376 | Properties: 377 | Tags: 378 | - Key: Name 379 | Value: vpc/VpcStack 380 | Metadata: 381 | aws:cdk:path: vpc/VpcStack/IGW 382 | VpcStackVPCGW565B132F: 383 | Type: AWS::EC2::VPCGatewayAttachment 384 | Properties: 385 | VpcId: 386 | Ref: VpcStack0F17A1B7 387 | InternetGatewayId: 388 | Ref: VpcStackIGW299C5A78 389 | Metadata: 390 | aws:cdk:path: vpc/VpcStack/VPCGW 391 | VpcStackS348B4C9B9: 392 | Type: AWS::EC2::VPCEndpoint 393 | Properties: 394 | ServiceName: 395 | Fn::Join: 396 | - "" 397 | - - com.amazonaws. 398 | - Ref: AWS::Region 399 | - .s3 400 | VpcId: 401 | Ref: VpcStack0F17A1B7 402 | RouteTableIds: 403 | - Ref: VpcStackPrivateSubnet1RouteTable25FDF5F8 404 | - Ref: VpcStackPrivateSubnet2RouteTable83187A33 405 | - Ref: VpcStackPrivateSubnet3RouteTable9C543BD3 406 | - Ref: VpcStackPublicSubnet1RouteTable4FE62862 407 | - Ref: VpcStackPublicSubnet2RouteTable5DDEE0F5 408 | - Ref: VpcStackPublicSubnet3RouteTableFEDDCBC0 409 | VpcEndpointType: Gateway 410 | Metadata: 411 | aws:cdk:path: vpc/VpcStack/S3/Resource 412 | -------------------------------------------------------------------------------- /vpc_us_west_2.yaml: -------------------------------------------------------------------------------- 1 | Resources: 2 | VpcStack0F17A1B7: 3 | Type: AWS::EC2::VPC 4 | Properties: 5 | CidrBlock: 10.0.0.0/16 6 | EnableDnsHostnames: true 7 | EnableDnsSupport: true 8 | InstanceTenancy: default 9 | Tags: 10 | - Key: Name 11 | Value: vpc/VpcStack 12 | Metadata: 13 | aws:cdk:path: vpc/VpcStack/Resource 14 | VpcStackPublicSubnet1Subnet3ACDE2AA: 15 | Type: AWS::EC2::Subnet 16 | Properties: 17 | VpcId: 18 | Ref: VpcStack0F17A1B7 19 | AvailabilityZone: us-west-2a 20 | CidrBlock: 10.0.0.0/20 21 | MapPublicIpOnLaunch: true 22 | Tags: 23 | - Key: aws-cdk:subnet-name 24 | Value: Public 25 | - Key: aws-cdk:subnet-type 26 | Value: Public 27 | - Key: Name 28 | Value: vpc/VpcStack/PublicSubnet1 29 | Metadata: 30 | aws:cdk:path: vpc/VpcStack/PublicSubnet1/Subnet 31 | VpcStackPublicSubnet1RouteTable4FE62862: 32 | Type: AWS::EC2::RouteTable 33 | Properties: 34 | VpcId: 35 | Ref: VpcStack0F17A1B7 36 | Tags: 37 | - Key: Name 38 | Value: vpc/VpcStack/PublicSubnet1 39 | Metadata: 40 | aws:cdk:path: vpc/VpcStack/PublicSubnet1/RouteTable 41 | VpcStackPublicSubnet1RouteTableAssociation3B798904: 42 | Type: AWS::EC2::SubnetRouteTableAssociation 43 | Properties: 44 | RouteTableId: 45 | Ref: VpcStackPublicSubnet1RouteTable4FE62862 46 | SubnetId: 47 | Ref: VpcStackPublicSubnet1Subnet3ACDE2AA 48 | Metadata: 49 | aws:cdk:path: vpc/VpcStack/PublicSubnet1/RouteTableAssociation 50 | VpcStackPublicSubnet1DefaultRoute4E14C6B4: 51 | Type: AWS::EC2::Route 52 | Properties: 53 | RouteTableId: 54 | Ref: VpcStackPublicSubnet1RouteTable4FE62862 55 | DestinationCidrBlock: 0.0.0.0/0 56 | GatewayId: 57 | Ref: VpcStackIGW299C5A78 58 | DependsOn: 59 | - VpcStackVPCGW565B132F 60 | Metadata: 61 | aws:cdk:path: vpc/VpcStack/PublicSubnet1/DefaultRoute 62 | VpcStackPublicSubnet1EIPA71BA69D: 63 | Type: AWS::EC2::EIP 64 | Properties: 65 | Domain: vpc 66 | Tags: 67 | - Key: Name 68 | Value: vpc/VpcStack/PublicSubnet1 69 | Metadata: 70 | aws:cdk:path: vpc/VpcStack/PublicSubnet1/EIP 71 | VpcStackPublicSubnet1NATGatewayB384AF90: 72 | Type: AWS::EC2::NatGateway 73 | Properties: 74 | SubnetId: 75 | Ref: VpcStackPublicSubnet1Subnet3ACDE2AA 76 | AllocationId: 77 | Fn::GetAtt: 78 | - VpcStackPublicSubnet1EIPA71BA69D 79 | - AllocationId 80 | Tags: 81 | - Key: Name 82 | Value: vpc/VpcStack/PublicSubnet1 83 | DependsOn: 84 | - VpcStackPublicSubnet1DefaultRoute4E14C6B4 85 | - VpcStackPublicSubnet1RouteTableAssociation3B798904 86 | Metadata: 87 | aws:cdk:path: vpc/VpcStack/PublicSubnet1/NATGateway 88 | VpcStackPublicSubnet2Subnet1F6A08E8: 89 | Type: AWS::EC2::Subnet 90 | Properties: 91 | VpcId: 92 | Ref: VpcStack0F17A1B7 93 | AvailabilityZone: us-west-2b 94 | CidrBlock: 10.0.16.0/20 95 | MapPublicIpOnLaunch: true 96 | Tags: 97 | - Key: aws-cdk:subnet-name 98 | Value: Public 99 | - Key: aws-cdk:subnet-type 100 | Value: Public 101 | - Key: Name 102 | Value: vpc/VpcStack/PublicSubnet2 103 | Metadata: 104 | aws:cdk:path: vpc/VpcStack/PublicSubnet2/Subnet 105 | VpcStackPublicSubnet2RouteTable5DDEE0F5: 106 | Type: AWS::EC2::RouteTable 107 | Properties: 108 | VpcId: 109 | Ref: VpcStack0F17A1B7 110 | Tags: 111 | - Key: Name 112 | Value: vpc/VpcStack/PublicSubnet2 113 | Metadata: 114 | aws:cdk:path: vpc/VpcStack/PublicSubnet2/RouteTable 115 | VpcStackPublicSubnet2RouteTableAssociationFF440051: 116 | Type: AWS::EC2::SubnetRouteTableAssociation 117 | Properties: 118 | RouteTableId: 119 | Ref: VpcStackPublicSubnet2RouteTable5DDEE0F5 120 | SubnetId: 121 | Ref: VpcStackPublicSubnet2Subnet1F6A08E8 122 | Metadata: 123 | aws:cdk:path: vpc/VpcStack/PublicSubnet2/RouteTableAssociation 124 | VpcStackPublicSubnet2DefaultRoute8B9803F6: 125 | Type: AWS::EC2::Route 126 | Properties: 127 | RouteTableId: 128 | Ref: VpcStackPublicSubnet2RouteTable5DDEE0F5 129 | DestinationCidrBlock: 0.0.0.0/0 130 | GatewayId: 131 | Ref: VpcStackIGW299C5A78 132 | DependsOn: 133 | - VpcStackVPCGW565B132F 134 | Metadata: 135 | aws:cdk:path: vpc/VpcStack/PublicSubnet2/DefaultRoute 136 | VpcStackPublicSubnet2EIPBE670D17: 137 | Type: AWS::EC2::EIP 138 | Properties: 139 | Domain: vpc 140 | Tags: 141 | - Key: Name 142 | Value: vpc/VpcStack/PublicSubnet2 143 | Metadata: 144 | aws:cdk:path: vpc/VpcStack/PublicSubnet2/EIP 145 | VpcStackPublicSubnet2NATGateway8FDE910A: 146 | Type: AWS::EC2::NatGateway 147 | Properties: 148 | SubnetId: 149 | Ref: VpcStackPublicSubnet2Subnet1F6A08E8 150 | AllocationId: 151 | Fn::GetAtt: 152 | - VpcStackPublicSubnet2EIPBE670D17 153 | - AllocationId 154 | Tags: 155 | - Key: Name 156 | Value: vpc/VpcStack/PublicSubnet2 157 | DependsOn: 158 | - VpcStackPublicSubnet2DefaultRoute8B9803F6 159 | - VpcStackPublicSubnet2RouteTableAssociationFF440051 160 | Metadata: 161 | aws:cdk:path: vpc/VpcStack/PublicSubnet2/NATGateway 162 | VpcStackPublicSubnet3Subnet290057FA: 163 | Type: AWS::EC2::Subnet 164 | Properties: 165 | VpcId: 166 | Ref: VpcStack0F17A1B7 167 | AvailabilityZone: us-west-2c 168 | CidrBlock: 10.0.32.0/20 169 | MapPublicIpOnLaunch: true 170 | Tags: 171 | - Key: aws-cdk:subnet-name 172 | Value: Public 173 | - Key: aws-cdk:subnet-type 174 | Value: Public 175 | - Key: Name 176 | Value: vpc/VpcStack/PublicSubnet3 177 | Metadata: 178 | aws:cdk:path: vpc/VpcStack/PublicSubnet3/Subnet 179 | VpcStackPublicSubnet3RouteTableFEDDCBC0: 180 | Type: AWS::EC2::RouteTable 181 | Properties: 182 | VpcId: 183 | Ref: VpcStack0F17A1B7 184 | Tags: 185 | - Key: Name 186 | Value: vpc/VpcStack/PublicSubnet3 187 | Metadata: 188 | aws:cdk:path: vpc/VpcStack/PublicSubnet3/RouteTable 189 | VpcStackPublicSubnet3RouteTableAssociationF0D2CAD0: 190 | Type: AWS::EC2::SubnetRouteTableAssociation 191 | Properties: 192 | RouteTableId: 193 | Ref: VpcStackPublicSubnet3RouteTableFEDDCBC0 194 | SubnetId: 195 | Ref: VpcStackPublicSubnet3Subnet290057FA 196 | Metadata: 197 | aws:cdk:path: vpc/VpcStack/PublicSubnet3/RouteTableAssociation 198 | VpcStackPublicSubnet3DefaultRoute8F939BA2: 199 | Type: AWS::EC2::Route 200 | Properties: 201 | RouteTableId: 202 | Ref: VpcStackPublicSubnet3RouteTableFEDDCBC0 203 | DestinationCidrBlock: 0.0.0.0/0 204 | GatewayId: 205 | Ref: VpcStackIGW299C5A78 206 | DependsOn: 207 | - VpcStackVPCGW565B132F 208 | Metadata: 209 | aws:cdk:path: vpc/VpcStack/PublicSubnet3/DefaultRoute 210 | VpcStackPublicSubnet3EIP0521B737: 211 | Type: AWS::EC2::EIP 212 | Properties: 213 | Domain: vpc 214 | Tags: 215 | - Key: Name 216 | Value: vpc/VpcStack/PublicSubnet3 217 | Metadata: 218 | aws:cdk:path: vpc/VpcStack/PublicSubnet3/EIP 219 | VpcStackPublicSubnet3NATGatewayB101FD95: 220 | Type: AWS::EC2::NatGateway 221 | Properties: 222 | SubnetId: 223 | Ref: VpcStackPublicSubnet3Subnet290057FA 224 | AllocationId: 225 | Fn::GetAtt: 226 | - VpcStackPublicSubnet3EIP0521B737 227 | - AllocationId 228 | Tags: 229 | - Key: Name 230 | Value: vpc/VpcStack/PublicSubnet3 231 | DependsOn: 232 | - VpcStackPublicSubnet3DefaultRoute8F939BA2 233 | - VpcStackPublicSubnet3RouteTableAssociationF0D2CAD0 234 | Metadata: 235 | aws:cdk:path: vpc/VpcStack/PublicSubnet3/NATGateway 236 | VpcStackPrivateSubnet1SubnetECAA9AC8: 237 | Type: AWS::EC2::Subnet 238 | Properties: 239 | VpcId: 240 | Ref: VpcStack0F17A1B7 241 | AvailabilityZone: us-west-2a 242 | CidrBlock: 10.0.48.0/20 243 | MapPublicIpOnLaunch: false 244 | Tags: 245 | - Key: aws-cdk:subnet-name 246 | Value: Private 247 | - Key: aws-cdk:subnet-type 248 | Value: Private 249 | - Key: Name 250 | Value: vpc/VpcStack/PrivateSubnet1 251 | Metadata: 252 | aws:cdk:path: vpc/VpcStack/PrivateSubnet1/Subnet 253 | VpcStackPrivateSubnet1RouteTable25FDF5F8: 254 | Type: AWS::EC2::RouteTable 255 | Properties: 256 | VpcId: 257 | Ref: VpcStack0F17A1B7 258 | Tags: 259 | - Key: Name 260 | Value: vpc/VpcStack/PrivateSubnet1 261 | Metadata: 262 | aws:cdk:path: vpc/VpcStack/PrivateSubnet1/RouteTable 263 | VpcStackPrivateSubnet1RouteTableAssociation0FFF3070: 264 | Type: AWS::EC2::SubnetRouteTableAssociation 265 | Properties: 266 | RouteTableId: 267 | Ref: VpcStackPrivateSubnet1RouteTable25FDF5F8 268 | SubnetId: 269 | Ref: VpcStackPrivateSubnet1SubnetECAA9AC8 270 | Metadata: 271 | aws:cdk:path: vpc/VpcStack/PrivateSubnet1/RouteTableAssociation 272 | VpcStackPrivateSubnet1DefaultRoute983E1C4C: 273 | Type: AWS::EC2::Route 274 | Properties: 275 | RouteTableId: 276 | Ref: VpcStackPrivateSubnet1RouteTable25FDF5F8 277 | DestinationCidrBlock: 0.0.0.0/0 278 | NatGatewayId: 279 | Ref: VpcStackPublicSubnet1NATGatewayB384AF90 280 | Metadata: 281 | aws:cdk:path: vpc/VpcStack/PrivateSubnet1/DefaultRoute 282 | VpcStackPrivateSubnet2SubnetFBEE5372: 283 | Type: AWS::EC2::Subnet 284 | Properties: 285 | VpcId: 286 | Ref: VpcStack0F17A1B7 287 | AvailabilityZone: us-west-2b 288 | CidrBlock: 10.0.64.0/20 289 | MapPublicIpOnLaunch: false 290 | Tags: 291 | - Key: aws-cdk:subnet-name 292 | Value: Private 293 | - Key: aws-cdk:subnet-type 294 | Value: Private 295 | - Key: Name 296 | Value: vpc/VpcStack/PrivateSubnet2 297 | Metadata: 298 | aws:cdk:path: vpc/VpcStack/PrivateSubnet2/Subnet 299 | VpcStackPrivateSubnet2RouteTable83187A33: 300 | Type: AWS::EC2::RouteTable 301 | Properties: 302 | VpcId: 303 | Ref: VpcStack0F17A1B7 304 | Tags: 305 | - Key: Name 306 | Value: vpc/VpcStack/PrivateSubnet2 307 | Metadata: 308 | aws:cdk:path: vpc/VpcStack/PrivateSubnet2/RouteTable 309 | VpcStackPrivateSubnet2RouteTableAssociation5B7FAA59: 310 | Type: AWS::EC2::SubnetRouteTableAssociation 311 | Properties: 312 | RouteTableId: 313 | Ref: VpcStackPrivateSubnet2RouteTable83187A33 314 | SubnetId: 315 | Ref: VpcStackPrivateSubnet2SubnetFBEE5372 316 | Metadata: 317 | aws:cdk:path: vpc/VpcStack/PrivateSubnet2/RouteTableAssociation 318 | VpcStackPrivateSubnet2DefaultRoute8D3C16A0: 319 | Type: AWS::EC2::Route 320 | Properties: 321 | RouteTableId: 322 | Ref: VpcStackPrivateSubnet2RouteTable83187A33 323 | DestinationCidrBlock: 0.0.0.0/0 324 | NatGatewayId: 325 | Ref: VpcStackPublicSubnet2NATGateway8FDE910A 326 | Metadata: 327 | aws:cdk:path: vpc/VpcStack/PrivateSubnet2/DefaultRoute 328 | VpcStackPrivateSubnet3SubnetB7C69E7D: 329 | Type: AWS::EC2::Subnet 330 | Properties: 331 | VpcId: 332 | Ref: VpcStack0F17A1B7 333 | AvailabilityZone: us-west-2c 334 | CidrBlock: 10.0.80.0/20 335 | MapPublicIpOnLaunch: false 336 | Tags: 337 | - Key: aws-cdk:subnet-name 338 | Value: Private 339 | - Key: aws-cdk:subnet-type 340 | Value: Private 341 | - Key: Name 342 | Value: vpc/VpcStack/PrivateSubnet3 343 | Metadata: 344 | aws:cdk:path: vpc/VpcStack/PrivateSubnet3/Subnet 345 | VpcStackPrivateSubnet3RouteTable9C543BD3: 346 | Type: AWS::EC2::RouteTable 347 | Properties: 348 | VpcId: 349 | Ref: VpcStack0F17A1B7 350 | Tags: 351 | - Key: Name 352 | Value: vpc/VpcStack/PrivateSubnet3 353 | Metadata: 354 | aws:cdk:path: vpc/VpcStack/PrivateSubnet3/RouteTable 355 | VpcStackPrivateSubnet3RouteTableAssociation7E077F1D: 356 | Type: AWS::EC2::SubnetRouteTableAssociation 357 | Properties: 358 | RouteTableId: 359 | Ref: VpcStackPrivateSubnet3RouteTable9C543BD3 360 | SubnetId: 361 | Ref: VpcStackPrivateSubnet3SubnetB7C69E7D 362 | Metadata: 363 | aws:cdk:path: vpc/VpcStack/PrivateSubnet3/RouteTableAssociation 364 | VpcStackPrivateSubnet3DefaultRoute33B85BED: 365 | Type: AWS::EC2::Route 366 | Properties: 367 | RouteTableId: 368 | Ref: VpcStackPrivateSubnet3RouteTable9C543BD3 369 | DestinationCidrBlock: 0.0.0.0/0 370 | NatGatewayId: 371 | Ref: VpcStackPublicSubnet3NATGatewayB101FD95 372 | Metadata: 373 | aws:cdk:path: vpc/VpcStack/PrivateSubnet3/DefaultRoute 374 | VpcStackIGW299C5A78: 375 | Type: AWS::EC2::InternetGateway 376 | Properties: 377 | Tags: 378 | - Key: Name 379 | Value: vpc/VpcStack 380 | Metadata: 381 | aws:cdk:path: vpc/VpcStack/IGW 382 | VpcStackVPCGW565B132F: 383 | Type: AWS::EC2::VPCGatewayAttachment 384 | Properties: 385 | VpcId: 386 | Ref: VpcStack0F17A1B7 387 | InternetGatewayId: 388 | Ref: VpcStackIGW299C5A78 389 | Metadata: 390 | aws:cdk:path: vpc/VpcStack/VPCGW 391 | VpcStackS348B4C9B9: 392 | Type: AWS::EC2::VPCEndpoint 393 | Properties: 394 | ServiceName: 395 | Fn::Join: 396 | - "" 397 | - - com.amazonaws. 398 | - Ref: AWS::Region 399 | - .s3 400 | VpcId: 401 | Ref: VpcStack0F17A1B7 402 | RouteTableIds: 403 | - Ref: VpcStackPrivateSubnet1RouteTable25FDF5F8 404 | - Ref: VpcStackPrivateSubnet2RouteTable83187A33 405 | - Ref: VpcStackPrivateSubnet3RouteTable9C543BD3 406 | - Ref: VpcStackPublicSubnet1RouteTable4FE62862 407 | - Ref: VpcStackPublicSubnet2RouteTable5DDEE0F5 408 | - Ref: VpcStackPublicSubnet3RouteTableFEDDCBC0 409 | VpcEndpointType: Gateway 410 | Metadata: 411 | aws:cdk:path: vpc/VpcStack/S3/Resource 412 | --------------------------------------------------------------------------------