├── blueprints ├── examples │ ├── Lambda │ │ ├── __init__.py │ │ ├── image │ │ │ ├── README.md │ │ │ ├── requirements.txt │ │ │ ├── ecrpolicy.json │ │ │ ├── Dockerfile │ │ │ └── Makefile │ │ ├── dags │ │ │ ├── .airflowignore │ │ │ └── requirements │ │ │ │ └── requirements.txt │ │ ├── DAGtasks.png │ │ └── LambdaArchitecture.png │ ├── ECS │ │ ├── infra │ │ │ ├── cdk │ │ │ │ ├── __init__.py │ │ │ │ └── common │ │ │ │ │ └── constants.py │ │ │ ├── requirements.txt │ │ │ └── spark_image │ │ │ │ ├── Dockerfile │ │ │ │ └── app.py │ │ ├── mwaa │ │ │ ├── requirements │ │ │ │ └── requirements.txt │ │ │ └── dags │ │ │ │ ├── delete_ecs_cluster_dag.py │ │ │ │ ├── create_ecs_cluster_dag.py │ │ │ │ └── deregister_ecs_task_definition_dag.py │ │ ├── app.py │ │ └── cdk.json │ ├── EKS │ │ ├── infra │ │ │ └── cdk │ │ │ │ ├── cdk │ │ │ │ ├── __init__.py │ │ │ │ └── cdk_stack.py │ │ │ │ ├── stacks │ │ │ │ ├── __init__.py │ │ │ │ ├── common │ │ │ │ │ └── constants.py │ │ │ │ ├── s3_deploy.py │ │ │ │ ├── mwaa_environment.py │ │ │ │ └── network_configuration_and_logging.py │ │ │ │ ├── tests │ │ │ │ ├── __init__.py │ │ │ │ └── unit │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── test_cdk_stack.py │ │ │ │ ├── requirements-dev.txt │ │ │ │ ├── requirements.txt │ │ │ │ ├── .gitignore │ │ │ │ ├── Makefile │ │ │ │ ├── source.bat │ │ │ │ ├── cdk.json │ │ │ │ ├── app.py │ │ │ │ └── README.md │ │ ├── images │ │ │ ├── dags.png │ │ │ ├── variables.png │ │ │ ├── requirements.png │ │ │ ├── initialize_vars.png │ │ │ ├── edit_environment.png │ │ │ └── nodegroup_variable.png │ │ ├── dags │ │ │ ├── requirements.txt │ │ │ ├── run_pod.py │ │ │ └── delete_nodegroup_cluster.py │ │ └── Makefile │ ├── EMR_on_EKS │ │ ├── infra │ │ │ ├── cdk │ │ │ │ ├── cdk │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── cdk_stack.py │ │ │ │ ├── stacks │ │ │ │ │ └── __init__.py │ │ │ │ ├── requirements.txt │ │ │ │ ├── app.py │ │ │ │ ├── cdk.json │ │ │ │ └── setup.py │ │ │ └── terraform │ │ │ │ ├── versions.tf │ │ │ │ ├── outputs.tf │ │ │ │ └── variables.tf │ │ ├── Makefile │ │ └── pre_termination.sh │ ├── AWSGlue │ │ ├── infra │ │ │ └── terraform │ │ │ │ ├── locals.tf │ │ │ │ ├── variables.tf │ │ │ │ └── outputs.tf │ │ ├── pre_termination.sh │ │ ├── Makefile │ │ └── post_provision.sh │ └── EMR │ │ ├── Makefile │ │ ├── pre_termination.sh │ │ ├── post_provision.sh │ │ ├── README.md │ │ └── spark │ │ └── nyc_aggregations.py └── Makefile ├── usecases ├── mwaa-glue-bedrock │ ├── src │ │ ├── lambda │ │ │ ├── __init__.py │ │ │ └── requirements.txt │ │ └── dags │ │ │ └── mwaa_config.py │ ├── requirements.txt │ ├── .gitignore │ ├── stack_config.py │ └── events │ │ └── event.json ├── mwaa-with-codeartifact │ ├── infra │ │ ├── __init__.py │ │ ├── codeartifact_stack.py │ │ └── s3_stack.py │ ├── mwaa-ca-bucket-content │ │ ├── dags │ │ │ ├── codeartifact.txt │ │ │ └── tutorial.py │ │ └── requirements.txt │ ├── requirements.txt │ ├── .env │ ├── requirements-dev.txt │ ├── docs │ │ └── architecture.png │ ├── .gitignore │ ├── app.py │ └── Makefile ├── metadata-migration │ ├── .airflowignore │ └── README.md ├── image-processing │ ├── .gitignore │ ├── images │ │ ├── package-lock.json │ │ ├── 4_no_face.jpg │ │ ├── 1_happy_face.jpg │ │ ├── 2_sunglass_face.jpg │ │ └── 3_multiple_faces.jpg │ ├── dags │ │ ├── 1.10 │ │ │ └── requirements.txt │ │ └── 2.0 │ │ │ └── requirements.txt │ ├── graphview.png │ └── lambda │ │ ├── Makefile │ │ └── package.json ├── mwaa-glue-athena │ ├── ArtifactBucket │ │ ├── dqrules │ │ │ ├── person │ │ │ └── sporting_event_ticket │ │ ├── athenasql │ │ │ ├── sporting_event_info_agg │ │ │ └── sporting_event_ticket_info_agg │ │ └── gluescripts │ │ │ └── convert_to_parquet.py │ ├── MWAAEnvironmentBucket │ │ └── dags │ │ │ ├── sampledb │ │ │ ├── data │ │ │ │ └── csv │ │ │ │ │ ├── sport_league.csv │ │ │ │ │ ├── sport_type.csv │ │ │ │ │ ├── seat_type.csv │ │ │ │ │ └── sport_division.csv │ │ │ ├── user │ │ │ │ └── create-user.sql │ │ │ └── schema │ │ │ │ ├── functions │ │ │ │ ├── esubstr.sql │ │ │ │ ├── setnflhomefield.sql │ │ │ │ ├── loadmlbteams.sql │ │ │ │ ├── selltickets.sql │ │ │ │ └── loadmlbplayers.sql │ │ │ │ └── create-view.sql │ │ │ ├── mwaa_config.py │ │ │ ├── run_athena_mapped_dynamic.py │ │ │ ├── run_athena_mapped_dynamic-dataset.py │ │ │ ├── run_athena.py │ │ │ └── create_resources.py │ └── README.md ├── start-stop-mwaa-environment │ ├── mwaairflow │ │ └── assets │ │ │ └── requirements.txt │ ├── .huskyrc.json │ ├── .eslintignore │ ├── lib │ │ ├── lambda │ │ │ ├── tsconfig.json │ │ │ ├── package.json │ │ │ ├── mwaa-update-environment-function.ts │ │ │ ├── mwaa-status-poller-function.ts │ │ │ ├── mwaa-new-environment-function.ts │ │ │ ├── mwaa-update-environment-function.test.ts │ │ │ ├── dags-trigger-function.test.ts │ │ │ └── dags-trigger-function.ts │ │ ├── infrastructure │ │ │ ├── mwaa-base-stack.ts │ │ │ ├── mwaa-base-stack.test.ts │ │ │ └── mwaa-polling-stack.test.ts │ │ └── commons │ │ │ └── prepare-test-environment.ts │ ├── .npmignore │ ├── .prettierrc.json │ ├── design │ │ ├── Architecture.png │ │ ├── DeploymentDiagram.png │ │ └── ExecutionDiagram.png │ ├── .lintstagedrc.json │ ├── .editorconfig │ ├── CODE_OF_CONDUCT.md │ ├── tsconfig.json │ ├── LICENSE.md │ ├── .eslintrc.json │ ├── bin │ │ └── mwaa-pause-resume.ts │ ├── jest.config.js │ └── cdk.json ├── mwaa-public-webserver-custom-domain │ ├── src │ │ ├── lambda-edge │ │ │ ├── http-headers │ │ │ │ ├── .npmignore │ │ │ │ ├── package.json │ │ │ │ └── index.ts │ │ │ ├── tsconfig.json │ │ │ ├── shared │ │ │ │ ├── error-page │ │ │ │ │ └── html.d.ts │ │ │ │ └── https.ts │ │ │ ├── sign-out │ │ │ │ └── package.json │ │ │ ├── check-auth │ │ │ │ ├── package.json │ │ │ │ └── configuration.json │ │ │ ├── refresh-auth │ │ │ │ └── package.json │ │ │ └── parse-auth │ │ │ │ └── package.json │ │ └── cfn-custom-resources │ │ │ ├── us-east-1-lambda-stack │ │ │ ├── package.json │ │ │ ├── https.ts │ │ │ └── cfn-response.ts │ │ │ ├── user-pool-client │ │ │ ├── package.json │ │ │ └── cfn-response.ts │ │ │ ├── user-pool-domain │ │ │ ├── package.json │ │ │ └── cfn-response.ts │ │ │ ├── client-secret-retrieval │ │ │ ├── package.json │ │ │ └── cfn-response.ts │ │ │ ├── generate-secret │ │ │ ├── package.json │ │ │ └── cfn-response.ts │ │ │ └── lambda-code-update │ │ │ ├── package.json │ │ │ ├── https.ts │ │ │ └── cfn-response.ts │ ├── images │ │ ├── sign-in-attempt.png │ │ ├── redirect-and-access.png │ │ └── auth-and-verification.png │ ├── parameters.txt.sample │ ├── tsconfig.json │ └── webpack.config.js ├── mwaa-dag-factory-example │ ├── requirements │ │ └── requirements.txt │ └── dags │ │ ├── dag_factory_loader.py │ │ └── .airflowignore ├── mwaa-snowflake-integration │ ├── mwaa_snowflake_queries │ │ ├── create_schema.sql │ │ ├── create_database.sql │ │ ├── create_stage.sql │ │ └── create_storage_int.sql │ ├── requirements.txt │ └── README.md ├── mwaa-cognito-cdk │ ├── docs │ │ └── images │ │ │ ├── stacks_diagram.png │ │ │ └── authentication_workflow.png │ ├── .gitignore │ ├── tsconfig.json │ ├── test │ │ ├── vpc-stack.test.ts │ │ ├── mwaa-stack.test.ts │ │ └── storage-stack.test.ts │ ├── LICENSE │ ├── Makefile │ └── dags │ │ └── sample.py ├── mwaa-observability-enhancement │ ├── images │ │ └── mwaa_observability.png │ └── README.md ├── local-runner-on-ecs-fargate │ ├── terraform │ │ └── ecs │ │ │ └── output.tf │ └── cloudformation │ │ └── parameter-values.json ├── mwaa_utilization_cw_metric │ ├── Makefile │ ├── infra │ │ └── terraform │ │ │ ├── variables.tf │ │ │ └── outputs.tf │ ├── pre_termination.sh │ ├── README.md │ └── post_provision.sh └── README.md ├── dags ├── xgboost-ml-pipeline │ ├── requirements │ │ ├── 1.10 │ │ │ └── requirements.txt │ │ └── 2.0 │ │ │ └── requirements.txt │ ├── 2.0 │ │ └── config.py │ ├── 1.10 │ │ └── config.py │ └── glue │ │ └── glue_etl.py ├── airflow-243-examples │ ├── dynamic_task_mapping │ │ └── data │ │ │ ├── 1.txt │ │ │ ├── 2.txt │ │ │ └── 3.txt │ ├── README.md │ └── python_version_checker │ │ ├── README.md │ │ └── python-version.py ├── airflow-272-examples │ ├── setup_teardown_tasks │ │ ├── image_1.png │ │ ├── image_2.png │ │ └── image_3.png │ ├── deferrable_operators │ │ ├── source-dataset.zip │ │ └── images │ │ │ ├── mwaa-deferrable-operators-sample-dag.png │ │ │ ├── mwaa-deferrable-operators-sample-poking.png │ │ │ ├── mwaa-deferrable-operators-sample-polling.png │ │ │ └── mwaa-deferrable-operators-sample-deferred-state.png │ └── README.md ├── duplicate_role │ └── README.md ├── bash_operator_script │ └── 1.10 │ │ └── bash_operator.py └── get_dag_id │ └── README.md ├── infra ├── terraform │ ├── versions.tf │ ├── variables.tf │ ├── outputs.tf │ └── README.md └── cloudformation │ └── README.md ├── CODE_OF_CONDUCT.md ├── LICENSE ├── requirements ├── amazon_backport │ ├── 1.10 │ │ └── requirements_amazon_backport.txt │ └── README.md └── gcp_backport │ ├── 1.10 │ └── requirements_gcp_backport.txt │ └── README.md └── .gitignore /blueprints/examples/Lambda/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /blueprints/examples/Lambda/image/README.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /blueprints/examples/ECS/infra/cdk/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /blueprints/examples/EKS/infra/cdk/cdk/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /blueprints/examples/EKS/infra/cdk/stacks/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /blueprints/examples/EKS/infra/cdk/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /usecases/mwaa-glue-bedrock/src/lambda/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /usecases/mwaa-with-codeartifact/infra/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /blueprints/examples/EKS/infra/cdk/tests/unit/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /blueprints/examples/EMR_on_EKS/infra/cdk/cdk/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /blueprints/examples/EMR_on_EKS/infra/cdk/stacks/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /usecases/metadata-migration/.airflowignore: -------------------------------------------------------------------------------- 1 | common_package/.* -------------------------------------------------------------------------------- /usecases/mwaa-glue-bedrock/src/lambda/requirements.txt: -------------------------------------------------------------------------------- 1 | requests -------------------------------------------------------------------------------- /blueprints/examples/EKS/infra/cdk/requirements-dev.txt: -------------------------------------------------------------------------------- 1 | pytest==6.2.5 2 | -------------------------------------------------------------------------------- /blueprints/examples/Lambda/image/requirements.txt: -------------------------------------------------------------------------------- 1 | pyarrow 2 | s3fs 3 | pandas -------------------------------------------------------------------------------- /usecases/mwaa-glue-bedrock/requirements.txt: -------------------------------------------------------------------------------- 1 | cryptography 2 | pyjwt 3 | requests -------------------------------------------------------------------------------- /usecases/mwaa-with-codeartifact/mwaa-ca-bucket-content/dags/codeartifact.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /blueprints/examples/Lambda/dags/.airflowignore: -------------------------------------------------------------------------------- 1 | custom_libraries 2 | requirements -------------------------------------------------------------------------------- /usecases/image-processing/.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | *.toml 3 | .DS_Store 4 | .aws-sam -------------------------------------------------------------------------------- /dags/xgboost-ml-pipeline/requirements/1.10/requirements.txt: -------------------------------------------------------------------------------- 1 | sagemaker==1.72.0 2 | s3fs==0.5.1 -------------------------------------------------------------------------------- /usecases/image-processing/images/package-lock.json: -------------------------------------------------------------------------------- 1 | { 2 | "lockfileVersion": 1 3 | } 4 | -------------------------------------------------------------------------------- /usecases/mwaa-with-codeartifact/requirements.txt: -------------------------------------------------------------------------------- 1 | aws-cdk-lib>=2.100.0 2 | constructs>=10.0.0 -------------------------------------------------------------------------------- /usecases/mwaa-glue-athena/ArtifactBucket/dqrules/person: -------------------------------------------------------------------------------- 1 | Rules = [ 2 | IsComplete "full_name" 3 | ] -------------------------------------------------------------------------------- /usecases/mwaa-with-codeartifact/.env: -------------------------------------------------------------------------------- 1 | AWS_REGION= 2 | BUCKET_NAME= 3 | AIRFLOW_VERSION=2.10.3 4 | -------------------------------------------------------------------------------- /usecases/start-stop-mwaa-environment/mwaairflow/assets/requirements.txt: -------------------------------------------------------------------------------- 1 | boto3 2 | smart-open 3 | -------------------------------------------------------------------------------- /usecases/image-processing/dags/1.10/requirements.txt: -------------------------------------------------------------------------------- 1 | boto3 >= 1.17.4 2 | apache-airflow-providers-amazon -------------------------------------------------------------------------------- /usecases/image-processing/dags/2.0/requirements.txt: -------------------------------------------------------------------------------- 1 | boto3 >= 1.17.4 2 | apache-airflow-providers-amazon -------------------------------------------------------------------------------- /usecases/mwaa-public-webserver-custom-domain/src/lambda-edge/http-headers/.npmignore: -------------------------------------------------------------------------------- 1 | * 2 | !bundle.* -------------------------------------------------------------------------------- /usecases/mwaa-glue-bedrock/.gitignore: -------------------------------------------------------------------------------- 1 | .aws-sam 2 | .DS_Store 3 | samconfig.toml 4 | cert 5 | src/salesforce 6 | -------------------------------------------------------------------------------- /blueprints/examples/ECS/mwaa/requirements/requirements.txt: -------------------------------------------------------------------------------- 1 | apache-airflow-providers-amazon==7.2.1 2 | boto3==1.26.70 -------------------------------------------------------------------------------- /dags/airflow-243-examples/dynamic_task_mapping/data/1.txt: -------------------------------------------------------------------------------- 1 | Say 2 | Hello 3 | to Airflow 4 | Dynamic 5 | Task Mapping -------------------------------------------------------------------------------- /blueprints/examples/ECS/infra/requirements.txt: -------------------------------------------------------------------------------- 1 | aws-cdk-lib==2.72.1 2 | constructs>=10.0.0,<11.0.0 3 | boto3==1.26.70 4 | -------------------------------------------------------------------------------- /blueprints/examples/EMR_on_EKS/infra/cdk/requirements.txt: -------------------------------------------------------------------------------- 1 | aws-cdk-lib==2.31.2 2 | boto3 3 | constructs>=10.0.0,<11.0.0 4 | -------------------------------------------------------------------------------- /dags/xgboost-ml-pipeline/requirements/2.0/requirements.txt: -------------------------------------------------------------------------------- 1 | boto3==1.17.49 2 | sagemaker==1.72.0 3 | s3fs==0.5.1 4 | 5 | -------------------------------------------------------------------------------- /usecases/mwaa-with-codeartifact/requirements-dev.txt: -------------------------------------------------------------------------------- 1 | -r requirements.txt 2 | 3 | black>=23.0.0 4 | typing-extensions>=4.0.0 -------------------------------------------------------------------------------- /blueprints/examples/EKS/infra/cdk/requirements.txt: -------------------------------------------------------------------------------- 1 | aws-cdk-lib==2.51.1 2 | boto3==1.24.77 3 | constructs>=10.0.0,<11.0.0 4 | -------------------------------------------------------------------------------- /usecases/mwaa-dag-factory-example/requirements/requirements.txt: -------------------------------------------------------------------------------- 1 | apache-airflow-providers-amazon==9.15.0 2 | dag-factory==1.0.1 -------------------------------------------------------------------------------- /usecases/mwaa-snowflake-integration/mwaa_snowflake_queries/create_schema.sql: -------------------------------------------------------------------------------- 1 | create schema IF NOT EXISTS {{params.schema_name}}; -------------------------------------------------------------------------------- /usecases/start-stop-mwaa-environment/.huskyrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "hooks": { 3 | "pre-commit": "npx lint-staged" 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /usecases/mwaa-with-codeartifact/mwaa-ca-bucket-content/requirements.txt: -------------------------------------------------------------------------------- 1 | -r /usr/local/airflow/dags/codeartifact.txt 2 | numpy>=1.24.0 -------------------------------------------------------------------------------- /usecases/mwaa-snowflake-integration/mwaa_snowflake_queries/create_database.sql: -------------------------------------------------------------------------------- 1 | create database IF NOT EXISTS {{params.database_name}} 2 | -------------------------------------------------------------------------------- /usecases/start-stop-mwaa-environment/.eslintignore: -------------------------------------------------------------------------------- 1 | reports/ 2 | node_modules/ 3 | coverage/ 4 | bin/ 5 | 6 | **/*.d.ts 7 | **/*.js 8 | -------------------------------------------------------------------------------- /blueprints/examples/EKS/images/dags.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-mwaa-examples/HEAD/blueprints/examples/EKS/images/dags.png -------------------------------------------------------------------------------- /blueprints/examples/Lambda/DAGtasks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-mwaa-examples/HEAD/blueprints/examples/Lambda/DAGtasks.png -------------------------------------------------------------------------------- /usecases/image-processing/graphview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-mwaa-examples/HEAD/usecases/image-processing/graphview.png -------------------------------------------------------------------------------- /blueprints/examples/ECS/infra/cdk/common/constants.py: -------------------------------------------------------------------------------- 1 | VERSION = 'V1' 2 | ID = f'mwaa-blueprint-{VERSION}' 3 | CLUSTER_NAME = f'{ID}-eks-{VERSION}' 4 | -------------------------------------------------------------------------------- /blueprints/examples/EKS/images/variables.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-mwaa-examples/HEAD/blueprints/examples/EKS/images/variables.png -------------------------------------------------------------------------------- /usecases/mwaa-public-webserver-custom-domain/src/lambda-edge/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "../../tsconfig.json", 3 | "include": ["./**/*"] 4 | } 5 | -------------------------------------------------------------------------------- /usecases/start-stop-mwaa-environment/lib/lambda/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "../../tsconfig.json", 3 | "include": [ 4 | "*.ts" 5 | ] 6 | } 7 | -------------------------------------------------------------------------------- /blueprints/examples/EKS/images/requirements.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-mwaa-examples/HEAD/blueprints/examples/EKS/images/requirements.png -------------------------------------------------------------------------------- /usecases/image-processing/images/4_no_face.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-mwaa-examples/HEAD/usecases/image-processing/images/4_no_face.jpg -------------------------------------------------------------------------------- /usecases/start-stop-mwaa-environment/.npmignore: -------------------------------------------------------------------------------- 1 | *.ts 2 | !*.d.ts 3 | 4 | # CDK asset staging directory 5 | cdk.context.json 6 | .cdk.staging/ 7 | cdk.out/ 8 | -------------------------------------------------------------------------------- /blueprints/examples/EKS/images/initialize_vars.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-mwaa-examples/HEAD/blueprints/examples/EKS/images/initialize_vars.png -------------------------------------------------------------------------------- /blueprints/examples/Lambda/LambdaArchitecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-mwaa-examples/HEAD/blueprints/examples/Lambda/LambdaArchitecture.png -------------------------------------------------------------------------------- /usecases/image-processing/images/1_happy_face.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-mwaa-examples/HEAD/usecases/image-processing/images/1_happy_face.jpg -------------------------------------------------------------------------------- /blueprints/examples/EKS/images/edit_environment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-mwaa-examples/HEAD/blueprints/examples/EKS/images/edit_environment.png -------------------------------------------------------------------------------- /usecases/image-processing/images/2_sunglass_face.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-mwaa-examples/HEAD/usecases/image-processing/images/2_sunglass_face.jpg -------------------------------------------------------------------------------- /blueprints/examples/EKS/images/nodegroup_variable.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-mwaa-examples/HEAD/blueprints/examples/EKS/images/nodegroup_variable.png -------------------------------------------------------------------------------- /usecases/image-processing/images/3_multiple_faces.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-mwaa-examples/HEAD/usecases/image-processing/images/3_multiple_faces.jpg -------------------------------------------------------------------------------- /usecases/mwaa-with-codeartifact/docs/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-mwaa-examples/HEAD/usecases/mwaa-with-codeartifact/docs/architecture.png -------------------------------------------------------------------------------- /usecases/mwaa-cognito-cdk/docs/images/stacks_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-mwaa-examples/HEAD/usecases/mwaa-cognito-cdk/docs/images/stacks_diagram.png -------------------------------------------------------------------------------- /usecases/mwaa-public-webserver-custom-domain/src/lambda-edge/shared/error-page/html.d.ts: -------------------------------------------------------------------------------- 1 | declare module "*.html" { 2 | const content: string; 3 | export default content; 4 | } 5 | -------------------------------------------------------------------------------- /dags/airflow-272-examples/setup_teardown_tasks/image_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-mwaa-examples/HEAD/dags/airflow-272-examples/setup_teardown_tasks/image_1.png -------------------------------------------------------------------------------- /dags/airflow-272-examples/setup_teardown_tasks/image_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-mwaa-examples/HEAD/dags/airflow-272-examples/setup_teardown_tasks/image_2.png -------------------------------------------------------------------------------- /dags/airflow-272-examples/setup_teardown_tasks/image_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-mwaa-examples/HEAD/dags/airflow-272-examples/setup_teardown_tasks/image_3.png -------------------------------------------------------------------------------- /usecases/start-stop-mwaa-environment/.prettierrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "trailingComma": "es5", 3 | "tabWidth": 2, 4 | "semi": true, 5 | "singleQuote": true, 6 | "printWidth": 140 7 | } 8 | -------------------------------------------------------------------------------- /usecases/start-stop-mwaa-environment/design/Architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-mwaa-examples/HEAD/usecases/start-stop-mwaa-environment/design/Architecture.png -------------------------------------------------------------------------------- /usecases/start-stop-mwaa-environment/.lintstagedrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "*.{js,ts,json,md}": ["prettier --write", "git add"], 3 | "*.ts": ["npm run lint:fix", "prettier --write", "git add"] 4 | } 5 | -------------------------------------------------------------------------------- /dags/airflow-272-examples/deferrable_operators/source-dataset.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-mwaa-examples/HEAD/dags/airflow-272-examples/deferrable_operators/source-dataset.zip -------------------------------------------------------------------------------- /usecases/mwaa-cognito-cdk/docs/images/authentication_workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-mwaa-examples/HEAD/usecases/mwaa-cognito-cdk/docs/images/authentication_workflow.png -------------------------------------------------------------------------------- /usecases/start-stop-mwaa-environment/design/DeploymentDiagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-mwaa-examples/HEAD/usecases/start-stop-mwaa-environment/design/DeploymentDiagram.png -------------------------------------------------------------------------------- /usecases/start-stop-mwaa-environment/design/ExecutionDiagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-mwaa-examples/HEAD/usecases/start-stop-mwaa-environment/design/ExecutionDiagram.png -------------------------------------------------------------------------------- /usecases/metadata-migration/README.md: -------------------------------------------------------------------------------- 1 | ## Airflow sample migration scripts 2 | 3 | Metadata import and export scripts are now part of the [MWAA Disaster Recovery project](https://pypi.org/project/mwaa-dr/). -------------------------------------------------------------------------------- /usecases/mwaa-observability-enhancement/images/mwaa_observability.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-mwaa-examples/HEAD/usecases/mwaa-observability-enhancement/images/mwaa_observability.png -------------------------------------------------------------------------------- /blueprints/examples/AWSGlue/infra/terraform/locals.tf: -------------------------------------------------------------------------------- 1 | 2 | locals { 3 | azs = slice(data.aws_availability_zones.available.names,0,2) 4 | account = data.aws_caller_identity.current.account_id 5 | 6 | } 7 | -------------------------------------------------------------------------------- /blueprints/examples/EKS/dags/requirements.txt: -------------------------------------------------------------------------------- 1 | --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.5.12/constraints-3.10.txt" 2 | apache-airflow-providers-cncf-kubernetes==3.0.0 3 | 4 | -------------------------------------------------------------------------------- /usecases/mwaa-public-webserver-custom-domain/images/sign-in-attempt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-mwaa-examples/HEAD/usecases/mwaa-public-webserver-custom-domain/images/sign-in-attempt.png -------------------------------------------------------------------------------- /blueprints/examples/EKS/infra/cdk/.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | package-lock.json 3 | __pycache__ 4 | .pytest_cache 5 | .venv 6 | *.egg-info 7 | 8 | # CDK asset staging directory 9 | .cdk.staging 10 | cdk.out 11 | -------------------------------------------------------------------------------- /blueprints/examples/Lambda/dags/requirements/requirements.txt: -------------------------------------------------------------------------------- 1 | --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.2.2/constraints-3.7.txt" 2 | apache-airflow[ssh]==2.2.2 3 | retry==0.9.2 4 | -------------------------------------------------------------------------------- /usecases/mwaa-glue-athena/ArtifactBucket/dqrules/sporting_event_ticket: -------------------------------------------------------------------------------- 1 | Rules = [ 2 | IsComplete "sport_location_id", 3 | IsComplete "sporting_event_id", 4 | ColumnValues "ticket_price" > 0 5 | ] -------------------------------------------------------------------------------- /usecases/mwaa-public-webserver-custom-domain/images/redirect-and-access.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-mwaa-examples/HEAD/usecases/mwaa-public-webserver-custom-domain/images/redirect-and-access.png -------------------------------------------------------------------------------- /usecases/mwaa-public-webserver-custom-domain/images/auth-and-verification.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-mwaa-examples/HEAD/usecases/mwaa-public-webserver-custom-domain/images/auth-and-verification.png -------------------------------------------------------------------------------- /infra/terraform/versions.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_version = ">= 1.0.0" 3 | 4 | required_providers { 5 | aws = { 6 | source = "hashicorp/aws" 7 | version = "~> 4.65.0" 8 | } 9 | } 10 | } -------------------------------------------------------------------------------- /usecases/image-processing/lambda/Makefile: -------------------------------------------------------------------------------- 1 | build-ThumbnailLambdaFunction: 2 | cp *.json $(ARTIFACTS_DIR) 3 | npm install --production --prefix $(ARTIFACTS_DIR) 4 | cp *.js $(ARTIFACTS_DIR) 5 | rm $(ARTIFACTS_DIR)/*.json 6 | -------------------------------------------------------------------------------- /usecases/mwaa-snowflake-integration/mwaa_snowflake_queries/create_stage.sql: -------------------------------------------------------------------------------- 1 | create stage IF NOT EXISTS {{params.stage_name}} 2 | STORAGE_INTEGRATION = {{params.storage_int_name}} 3 | URL = '{{params.destination_bucket_path}}' -------------------------------------------------------------------------------- /blueprints/examples/AWSGlue/infra/terraform/variables.tf: -------------------------------------------------------------------------------- 1 | 2 | variable "tags" { 3 | description = "(Optional) A map of resource tags to associate with the resource" 4 | type = map(string) 5 | default = {} 6 | } 7 | -------------------------------------------------------------------------------- /dags/airflow-272-examples/deferrable_operators/images/mwaa-deferrable-operators-sample-dag.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-mwaa-examples/HEAD/dags/airflow-272-examples/deferrable_operators/images/mwaa-deferrable-operators-sample-dag.png -------------------------------------------------------------------------------- /usecases/mwaa-glue-bedrock/stack_config.py: -------------------------------------------------------------------------------- 1 | # Static Configurations 2 | STACK_NAME = "eb-mwaa-glue-bedrock" 3 | MIN_CLI_VERSION = "1.32.80" 4 | 5 | # Salesforce 6 | SALESFORCE_API_VERSION = "v58.0" 7 | SALESFORCE_APP_NAME = "AWSEventBridgeConnectedApp" 8 | -------------------------------------------------------------------------------- /usecases/mwaa-snowflake-integration/requirements.txt: -------------------------------------------------------------------------------- 1 | --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.6.3/constraints-3.10.txt" 2 | snowflake-connector-python 3 | apache-airflow-providers-snowflake 4 | snowflake-sqlalchemy 5 | -------------------------------------------------------------------------------- /dags/airflow-272-examples/deferrable_operators/images/mwaa-deferrable-operators-sample-poking.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-mwaa-examples/HEAD/dags/airflow-272-examples/deferrable_operators/images/mwaa-deferrable-operators-sample-poking.png -------------------------------------------------------------------------------- /dags/airflow-272-examples/deferrable_operators/images/mwaa-deferrable-operators-sample-polling.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-mwaa-examples/HEAD/dags/airflow-272-examples/deferrable_operators/images/mwaa-deferrable-operators-sample-polling.png -------------------------------------------------------------------------------- /usecases/mwaa-glue-athena/MWAAEnvironmentBucket/dags/sampledb/data/csv/sport_league.csv: -------------------------------------------------------------------------------- 1 | 2 | baseball,MLB,Major League Baseball,Professional baseball league in North America. 3 | football,NFL,National Footbal League,Professional football league in North America. 4 | -------------------------------------------------------------------------------- /dags/airflow-243-examples/dynamic_task_mapping/data/2.txt: -------------------------------------------------------------------------------- 1 | Dynamic Task Mapping allows 2 | a way for a workflow to create a number of tasks at runtime 3 | based upon current data, 4 | rather than the DAG author having to know in advance how many tasks would be needed. -------------------------------------------------------------------------------- /dags/airflow-272-examples/deferrable_operators/images/mwaa-deferrable-operators-sample-deferred-state.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-mwaa-examples/HEAD/dags/airflow-272-examples/deferrable_operators/images/mwaa-deferrable-operators-sample-deferred-state.png -------------------------------------------------------------------------------- /usecases/mwaa-cognito-cdk/.gitignore: -------------------------------------------------------------------------------- 1 | # Files 2 | *.js 3 | !jest.config.js 4 | *.d.ts 5 | .cdk.staging 6 | package-lock.json 7 | test-report.xml 8 | 9 | # Folders 10 | cdk.out/ 11 | coverage/ 12 | node_modules/ 13 | 14 | # Certificates 15 | cert.pem 16 | key.pem 17 | -------------------------------------------------------------------------------- /usecases/start-stop-mwaa-environment/.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | indent_style = space 5 | indent_size = 2 6 | end_of_line = lf 7 | charset = utf-8 8 | trim_trailing_whitespace = true 9 | insert_final_newline = true 10 | 11 | [.py] 12 | indent_size = 4 13 | -------------------------------------------------------------------------------- /usecases/mwaa-glue-athena/MWAAEnvironmentBucket/dags/sampledb/data/csv/sport_type.csv: -------------------------------------------------------------------------------- 1 | 2 | baseball,"A sport with 9 players, bats, and balls - what could possibly go wrong?" 3 | football,Teams of 11 players attempt to move an oblong ball 100 yards while beating the snot out of each other. 4 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /usecases/mwaa-public-webserver-custom-domain/src/lambda-edge/sign-out/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "sign-out", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1" 8 | }, 9 | "keywords": [], 10 | "author": "" 11 | } 12 | -------------------------------------------------------------------------------- /usecases/mwaa-with-codeartifact/.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | package-lock.json 3 | __pycache__ 4 | .pytest_cache 5 | .venv 6 | venv/ 7 | *.egg-info 8 | 9 | # Environment files 10 | .env 11 | *.env 12 | 13 | # CDK asset staging directory 14 | .cdk.staging 15 | cdk.out 16 | 17 | # OS 18 | .DS_Store 19 | 20 | # IDE 21 | .idea/ 22 | .vscode/ -------------------------------------------------------------------------------- /usecases/mwaa-public-webserver-custom-domain/src/lambda-edge/check-auth/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "check-auth", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1" 8 | }, 9 | "keywords": [], 10 | "author": "" 11 | } 12 | -------------------------------------------------------------------------------- /usecases/mwaa-public-webserver-custom-domain/src/lambda-edge/refresh-auth/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "refresh-auth", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1" 8 | }, 9 | "keywords": [], 10 | "author": "" 11 | } 12 | -------------------------------------------------------------------------------- /usecases/mwaa-public-webserver-custom-domain/src/lambda-edge/http-headers/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "http-headers", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "bundle.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1" 8 | }, 9 | "keywords": [], 10 | "author": "" 11 | } 12 | -------------------------------------------------------------------------------- /usecases/mwaa-snowflake-integration/mwaa_snowflake_queries/create_storage_int.sql: -------------------------------------------------------------------------------- 1 | CREATE STORAGE INTEGRATION IF NOT EXISTS {{params.storage_int_name}} 2 | TYPE = EXTERNAL_STAGE 3 | STORAGE_PROVIDER = 'S3' 4 | ENABLED = TRUE 5 | STORAGE_AWS_ROLE_ARN = '{{params.aws_role_arn}}' 6 | STORAGE_ALLOWED_LOCATIONS = ('{{params.destination_bucket_path}}'); -------------------------------------------------------------------------------- /blueprints/examples/EMR/Makefile: -------------------------------------------------------------------------------- 1 | 2 | 3 | deploy: 4 | chmod 700 ./post_provision.sh 5 | ./post_provision.sh $(mwaa_bucket) $(mwaa_execution_role_name) $(mwaa_env_name) $(emr_data_bucket) 6 | 7 | undeploy: 8 | chmod 700 ./pre_termination.sh 9 | ./pre_termination.sh $(mwaa_bucket) $(mwaa_execution_role_name) $(mwaa_env_name) $(emr_data_bucket) 10 | -------------------------------------------------------------------------------- /usecases/mwaa-dag-factory-example/dags/dag_factory_loader.py: -------------------------------------------------------------------------------- 1 | """ 2 | DAG Factory Loader 3 | This file dynamically generates Airflow DAGs from YAML configuration files. 4 | """ 5 | from dagfactory import load_yaml_dags 6 | 7 | # Load all YAML files with .yml or .yaml extension in the dags folder 8 | load_yaml_dags(globals_dict=globals(), suffix=['.yml', '.yaml']) 9 | -------------------------------------------------------------------------------- /dags/airflow-243-examples/dynamic_task_mapping/data/3.txt: -------------------------------------------------------------------------------- 1 | This is similar to defining your tasks in a for loop, 2 | but instead of having the DAG file fetch the data and do that itself, 3 | the scheduler can do this based on the output of a previous task. 4 | Right before a mapped task is executed 5 | the scheduler will create n copies of the task, 6 | one for each input. -------------------------------------------------------------------------------- /usecases/mwaa-public-webserver-custom-domain/src/cfn-custom-resources/us-east-1-lambda-stack/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "us-east-1-lambda-stack", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1" 8 | }, 9 | "keywords": [], 10 | "author": "" 11 | } 12 | -------------------------------------------------------------------------------- /usecases/start-stop-mwaa-environment/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | 3 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 4 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 5 | opensource-codeofconduct@amazon.com with any additional questions or comments. 6 | -------------------------------------------------------------------------------- /usecases/mwaa-glue-athena/MWAAEnvironmentBucket/dags/sampledb/user/create-user.sql: -------------------------------------------------------------------------------- 1 | DROP USER IF EXISTS dms_user; 2 | CREATE USER dms_user WITH PASSWORD 'dms_user'; 3 | GRANT ALL PRIVILEGES ON SCHEMA dms_sample TO dms_user; 4 | GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA dms_sample TO dms_user; 5 | GRANT ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA dms_sample TO dms_user; 6 | -------------------------------------------------------------------------------- /usecases/mwaa-public-webserver-custom-domain/src/cfn-custom-resources/user-pool-client/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "user-pool-client", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1" 8 | }, 9 | "keywords": [], 10 | "author": "", 11 | "devDependencies": {} 12 | } 13 | -------------------------------------------------------------------------------- /usecases/mwaa-public-webserver-custom-domain/src/cfn-custom-resources/user-pool-domain/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "user-pool-domain", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1" 8 | }, 9 | "keywords": [], 10 | "author": "", 11 | "devDependencies": {} 12 | } 13 | -------------------------------------------------------------------------------- /usecases/mwaa-public-webserver-custom-domain/src/lambda-edge/parse-auth/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "parse-auth", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "index.js", 6 | "dependencies": {}, 7 | "devDependencies": {}, 8 | "scripts": { 9 | "test": "echo \"Error: no test specified\" && exit 1" 10 | }, 11 | "keywords": [], 12 | "author": "" 13 | } 14 | -------------------------------------------------------------------------------- /usecases/mwaa-public-webserver-custom-domain/src/cfn-custom-resources/client-secret-retrieval/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "client-secret-retrieval", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1" 8 | }, 9 | "keywords": [], 10 | "author": "", 11 | "devDependencies": {} 12 | } 13 | -------------------------------------------------------------------------------- /usecases/mwaa-public-webserver-custom-domain/src/cfn-custom-resources/generate-secret/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "generate-secret", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1" 8 | }, 9 | "keywords": [], 10 | "author": "", 11 | "dependencies": {}, 12 | "devDependencies": {} 13 | } 14 | -------------------------------------------------------------------------------- /usecases/start-stop-mwaa-environment/lib/lambda/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "mwaa-pause-resume-lambdas", 3 | "version": "0.1.0", 4 | "description": "Lambdas for MWAA pause resume stack", 5 | "private": true, 6 | "devDependencies": { 7 | "@types/node": "*", 8 | "@types/uuid": "*" 9 | }, 10 | "dependencies": { 11 | "@aws-sdk/client-mwaa": "*", 12 | "uuid": "*", 13 | "axios": "*" 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /blueprints/examples/AWSGlue/pre_termination.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | glue_mwaa_iam_policy_arn=$(terraform -chdir="./infra/terraform" output -raw glue_mwaa_iam_policy_arn) 4 | aws iam detach-role-policy --policy-arn $glue_mwaa_iam_policy_arn --role-name $2 5 | 6 | data_bucket=$(terraform -chdir="./infra/terraform" output -raw glue_data_bucket_name) 7 | aws s3 rm s3://$1/dags/weatherdata_processing.py 8 | aws s3 rm s3://$data_bucket/ --recursive 9 | 10 | -------------------------------------------------------------------------------- /blueprints/examples/EMR_on_EKS/infra/terraform/versions.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_version = ">= 1.0.0" 3 | 4 | required_providers { 5 | aws = { 6 | source = "hashicorp/aws" 7 | version = ">= 3.72" 8 | } 9 | kubernetes = { 10 | source = "hashicorp/kubernetes" 11 | version = ">= 2.10" 12 | } 13 | null = { 14 | source = "hashicorp/null" 15 | version = ">= 3.1" 16 | } 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /blueprints/examples/Lambda/image/ecrpolicy.json: -------------------------------------------------------------------------------- 1 | { 2 | "Version": "2008-10-17", 3 | "Statement": [ 4 | { 5 | "Sid": "LambdaECRImageRetrievalPolicy", 6 | "Effect": "Allow", 7 | "Principal": { 8 | "Service": "lambda.amazonaws.com" 9 | }, 10 | "Action": [ 11 | "ecr:BatchCheckLayerAvailability", 12 | "ecr:BatchGetImage", 13 | "ecr:GetDownloadUrlForLayer" 14 | ] 15 | } 16 | ] 17 | } -------------------------------------------------------------------------------- /usecases/mwaa-glue-athena/MWAAEnvironmentBucket/dags/sampledb/data/csv/seat_type.csv: -------------------------------------------------------------------------------- 1 | 2 | luxury,"Excellent seats - box seats, behind the plate, etc. etc.",1 3 | obstructed,"Pretty crappy, partially obstructed seats",5 4 | premium,"Really good seats - first level, mid field, etc. etc.",2 5 | standard,Standard seats - not super awesome but pretty good,3 6 | standing,"Really? That's not a seat at all now, is it?",10 7 | sub-standard,"End zone, nose bleed, etc.",4 8 | -------------------------------------------------------------------------------- /blueprints/examples/EKS/infra/cdk/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: diff list vpc s3 mwaa infra eks-role s3-deploy 2 | 3 | diff: 4 | @cdk diff --profile=${AWS_PROFILE} 5 | 6 | list: 7 | @cdk list --profile=${AWS_PROFILE} 8 | 9 | eks-role: 10 | @cdk deploy mwaa-blueprint-v1-eks-deps --profile=${AWS_PROFILE} 11 | 12 | s3-deploy: 13 | @cdk deploy mwaa-blueprint-v1-s3-deploy --profile=${AWS_PROFILE} 14 | 15 | infra: 16 | @cdk deploy mwaa-blueprint-v1-infra-* --profile=${AWS_PROFILE} -------------------------------------------------------------------------------- /usecases/mwaa-glue-bedrock/src/dags/mwaa_config.py: -------------------------------------------------------------------------------- 1 | from airflow.models import Variable 2 | import os, json 3 | 4 | mwaa_vars = json.loads(Variable.get("mwaa")) 5 | 6 | BUCKET_NAME = mwaa_vars.get('artifactBucket') 7 | GLUE_EXECUTION_ROLE = mwaa_vars.get('glueExecutionRole') 8 | STACK_NAME = mwaa_vars.get("stackName") 9 | 10 | SALESFORCE_CONNECTION = "salesforce_connection" 11 | SALESFORCE_OBJECT = "CaseChangeEvent" 12 | REGION=os.getenv("AWS_DEFAULT_REGION") 13 | 14 | -------------------------------------------------------------------------------- /usecases/local-runner-on-ecs-fargate/terraform/ecs/output.tf: -------------------------------------------------------------------------------- 1 | output loadbalancer_url { 2 | value = aws_lb.loadbalancer.dns_name 3 | } 4 | 5 | output database_name { 6 | value = aws_rds_cluster.mwaa-local-runner-cluster.database_name 7 | } 8 | 9 | output rds_endpoint { 10 | value = aws_rds_cluster.mwaa-local-runner-cluster.endpoint 11 | } 12 | 13 | output db_passsword { 14 | value = aws_rds_cluster.mwaa-local-runner-cluster.master_password 15 | sensitive = true 16 | } -------------------------------------------------------------------------------- /usecases/mwaa-public-webserver-custom-domain/src/cfn-custom-resources/lambda-code-update/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "lambda-code-update", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1" 8 | }, 9 | "keywords": [], 10 | "author": "", 11 | "dependencies": { 12 | "adm-zip": "^0.4.13" 13 | }, 14 | "devDependencies": { 15 | "@types/adm-zip": "^0.4.32" 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /blueprints/examples/Lambda/image/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM public.ecr.aws/lambda/python:3.8 2 | 3 | # Copy function code 4 | COPY app.py ${LAMBDA_TASK_ROOT} 5 | 6 | # Install the function's dependencies using file requirements.txt 7 | # from your project folder. 8 | 9 | COPY requirements.txt . 10 | RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}" 11 | 12 | # Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile) 13 | CMD [ "app.lambda_handler" ] -------------------------------------------------------------------------------- /usecases/mwaa-dag-factory-example/dags/.airflowignore: -------------------------------------------------------------------------------- 1 | # Ignore plugins directory (contains wheel files) 2 | plugins/ 3 | 4 | # Ignore Python cache 5 | __pycache__/ 6 | *.pyc 7 | *.pyo 8 | *.pyd 9 | .Python 10 | 11 | # Ignore IDE files 12 | .vscode/ 13 | .idea/ 14 | *.swp 15 | *.swo 16 | *~ 17 | 18 | # Ignore test files 19 | test_*.py 20 | *_test.py 21 | 22 | # Ignore documentation 23 | *.md 24 | README* 25 | 26 | # Ignore YAML output from converter (if stored in dags folder) 27 | converted/ 28 | -------------------------------------------------------------------------------- /usecases/mwaa-public-webserver-custom-domain/parameters.txt.sample: -------------------------------------------------------------------------------- 1 | EmailAddress= 2 | AlternateDomainNames= 3 | CloudFrontAccessLogsBucket=.s3.amazonaws.com 4 | MwaaEnvironmentName= 5 | MwaaPublicEndpoint= 6 | ACMCertificateArn= 7 | Version=1.0 8 | -------------------------------------------------------------------------------- /blueprints/examples/EKS/infra/cdk/source.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | 3 | rem The sole purpose of this script is to make the command 4 | rem 5 | rem source .venv/bin/activate 6 | rem 7 | rem (which activates a Python virtualenv on Linux or Mac OS X) work on Windows. 8 | rem On Windows, this command just runs this batch file (the argument is ignored). 9 | rem 10 | rem Now we don't need to document a Windows command for activating a virtualenv. 11 | 12 | echo Executing .venv\Scripts\activate.bat for you 13 | .venv\Scripts\activate.bat 14 | -------------------------------------------------------------------------------- /blueprints/examples/EKS/infra/cdk/stacks/common/constants.py: -------------------------------------------------------------------------------- 1 | VERSION = 'v1' 2 | DAGS_S3_LOCATION = f'mwaa-s3-bucket-placeholder-{VERSION}' 3 | MWAA_ENV = f'poc-{VERSION}' 4 | AIRFLOW_VERSION = '2.2.2' 5 | CIDR = '10.129.0.0/22' 6 | CIDR_MASK = 24 7 | MAZ_AZS = 2 8 | ID = f'mwaa-blueprint-{VERSION}' 9 | REQUIREMENTS_FILE = 'requirements.txt' 10 | PLUGINS_FILE = 'plugins/eks_airflow.zip' 11 | CLUSTER_NAME = f'{ID}-eks-{VERSION}' 12 | NODEGROUP_SUFFIX = '-nodegroup' 13 | NODEGROUP_NAME = f'{CLUSTER_NAME}{NODEGROUP_SUFFIX}-{VERSION}' 14 | -------------------------------------------------------------------------------- /usecases/mwaa_utilization_cw_metric/Makefile: -------------------------------------------------------------------------------- 1 | 2 | .PHONY: all 3 | deploy: ## terraform 4 | terraform -chdir="./infra/terraform" init 5 | terraform -chdir="./infra/terraform" plan 6 | terraform -chdir="./infra/terraform" apply 7 | $(MAKE) post-provision 8 | 9 | post-provision: 10 | chmod 700 ./post_provision.sh 11 | ./post_provision.sh MWAA_Metric_Environment 12 | 13 | undeploy: 14 | chmod 700 ./pre_termination.sh 15 | ./pre_termination.sh MWAA_Metric_Environment 16 | terraform -chdir="./infra/terraform" destroy 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /usecases/mwaa-glue-athena/ArtifactBucket/athenasql/sporting_event_info_agg: -------------------------------------------------------------------------------- 1 | CREATE TABLE IF NOT EXISTS curated_db.sporting_event_info_agg 2 | as 3 | 4 | SELECT 5 | e.id AS event_id, 6 | e.sport_type_name AS sport, 7 | e.start_date_time AS event_date_time, 8 | h.name AS home_team, 9 | a.name AS away_team, 10 | l.name AS location, 11 | l.city 12 | FROM curated_db.sporting_event e, 13 | curated_db.sport_team h, 14 | curated_db.sport_team a, 15 | curated_db.sport_location l 16 | WHERE 17 | e.home_team_id = h.id 18 | AND e.away_team_id = a.id 19 | AND e.location_id = l.id; 20 | -------------------------------------------------------------------------------- /blueprints/examples/EKS/infra/cdk/tests/unit/test_cdk_stack.py: -------------------------------------------------------------------------------- 1 | import aws_cdk as core 2 | import aws_cdk.assertions as assertions 3 | 4 | from cdk.cdk_stack import CdkStack 5 | 6 | # example tests. To run these tests, uncomment this file along with the example 7 | # resource in cdk/cdk_stack.py 8 | def test_sqs_queue_created(): 9 | app = core.App() 10 | stack = CdkStack(app, "cdk") 11 | template = assertions.Template.from_stack(stack) 12 | 13 | # template.has_resource_properties("AWS::SQS::Queue", { 14 | # "VisibilityTimeout": 300 15 | # }) 16 | -------------------------------------------------------------------------------- /blueprints/examples/EMR_on_EKS/infra/cdk/app.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | 4 | #!/usr/bin/env python3 5 | 6 | import aws_cdk as cdk 7 | from stacks.EKSStack import EmrEksCdkStack 8 | import os 9 | 10 | default_env = cdk.Environment( 11 | region=os.environ['CDK_DEFAULT_REGION'], 12 | account=os.environ['CDK_DEFAULT_ACCOUNT'] 13 | ) 14 | 15 | 16 | config = { 17 | } 18 | 19 | app = cdk.App() 20 | eks = EmrEksCdkStack(app, "emr-eks-cdk", env=default_env, props=config) 21 | 22 | app.synth() 23 | -------------------------------------------------------------------------------- /usecases/mwaa-glue-athena/MWAAEnvironmentBucket/dags/mwaa_config.py: -------------------------------------------------------------------------------- 1 | from airflow.models import Variable 2 | 3 | import os 4 | 5 | BUCKET_NAME = Variable.get("artifact_bucket") 6 | CRAWLER_CONFIG = {"Name": "curated_layer_crawler"} 7 | ATHENA_KEY = 'athenasql/' 8 | DATASET_NAME = "data-pipeline-mapped-dynamic-dataset" 9 | REGION=os.getenv("AWS_DEFAULT_REGION") 10 | RESULTS_LOCATION=f's3://{BUCKET_NAME}/athena/results/' 11 | GLUE_CONCURRENCY=10 12 | GLUE_POOL="glue_pool" 13 | GLUE_CRAWLER_CONCURRENCY=1 14 | GLUE_CRAWLER_POOL="glue_crawler_pool" 15 | POSTGRES_CONNECTION="postgres_default" 16 | -------------------------------------------------------------------------------- /blueprints/examples/AWSGlue/Makefile: -------------------------------------------------------------------------------- 1 | 2 | .PHONY: all 3 | deploy: ## terraform 4 | terraform -chdir="./infra/terraform" init 5 | terraform -chdir="./infra/terraform" plan 6 | terraform -chdir="./infra/terraform" apply 7 | $(MAKE) post-provision 8 | 9 | post-provision: 10 | chmod 700 ./post_provision.sh 11 | ./post_provision.sh $(mwaa_bucket) $(mwaa_execution_role_name) $(mwaa_env_name) 12 | 13 | undeploy: 14 | chmod 700 ./pre_termination.sh 15 | ./pre_termination.sh $(mwaa_bucket) $(mwaa_execution_role_name) 16 | terraform -chdir="./infra/terraform" destroy 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /blueprints/examples/EKS/infra/cdk/cdk/cdk_stack.py: -------------------------------------------------------------------------------- 1 | from aws_cdk import ( 2 | # Duration, 3 | Stack, 4 | # aws_sqs as sqs, 5 | ) 6 | from constructs import Construct 7 | 8 | class CdkStack(Stack): 9 | 10 | def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: 11 | super().__init__(scope, construct_id, **kwargs) 12 | 13 | # The code that defines your stack goes here 14 | 15 | # example resource 16 | # queue = sqs.Queue( 17 | # self, "CdkQueue", 18 | # visibility_timeout=Duration.seconds(300), 19 | # ) 20 | -------------------------------------------------------------------------------- /usecases/mwaa_utilization_cw_metric/infra/terraform/variables.tf: -------------------------------------------------------------------------------- 1 | variable "vpc_cidr" { 2 | description = "(Required) The name of the Apache Airflow MWAA Environment" 3 | type = string 4 | default = "10.1.0.0/16" 5 | 6 | } 7 | 8 | variable "tags" { 9 | description = "(Optional) A map of resource tags to associate with the resource" 10 | type = map(string) 11 | default = {} 12 | } 13 | 14 | variable "name" { 15 | description = "(Required) The name of the Apache Airflow MWAA Environment" 16 | type = string 17 | default = "MWAA_Metric_Environment" 18 | } 19 | -------------------------------------------------------------------------------- /blueprints/examples/EMR_on_EKS/infra/cdk/cdk/cdk_stack.py: -------------------------------------------------------------------------------- 1 | from aws_cdk import ( 2 | # Duration, 3 | Stack, 4 | # aws_sqs as sqs, 5 | ) 6 | from constructs import Construct 7 | 8 | class CdkStack(Stack): 9 | 10 | def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: 11 | super().__init__(scope, construct_id, **kwargs) 12 | 13 | # The code that defines your stack goes here 14 | 15 | # example resource 16 | # queue = sqs.Queue( 17 | # self, "CdkQueue", 18 | # visibility_timeout=Duration.seconds(300), 19 | # ) 20 | -------------------------------------------------------------------------------- /blueprints/examples/AWSGlue/infra/terraform/outputs.tf: -------------------------------------------------------------------------------- 1 | output "glue_data_bucket_name" { 2 | description = "The data bucket name" 3 | value = aws_s3_bucket.aws_glue_mwaa_bucket.id 4 | } 5 | 6 | output "glue_service_role_arn" { 7 | description = "Glue Service Role" 8 | value = aws_iam_role.glue_service_role.arn 9 | } 10 | output "glue_service_role_name" { 11 | description = "Glue Service Role" 12 | value = aws_iam_role.glue_service_role.name 13 | } 14 | output "glue_mwaa_iam_policy_arn" { 15 | description = "Glue Service Role" 16 | value = aws_iam_policy.glue_mwaa_iam_policy.arn 17 | } -------------------------------------------------------------------------------- /usecases/mwaa-glue-athena/ArtifactBucket/athenasql/sporting_event_ticket_info_agg: -------------------------------------------------------------------------------- 1 | CREATE TABLE IF NOT EXISTS curated_db.sporting_event_ticket_info_agg 2 | as 3 | 4 | SELECT t.id AS ticket_id, 5 | e.event_id, 6 | e.sport, 7 | e.event_date_time, 8 | e.home_team, 9 | e.away_team, 10 | e.location, 11 | e.city, 12 | t.seat_level, 13 | t.seat_section, 14 | t.seat_row, 15 | t.seat, 16 | t.ticket_price, 17 | p.full_name AS ticketholder 18 | FROM curated_db.sporting_event_info e, 19 | curated_db.sporting_event_ticket t, 20 | curated_db.person p 21 | WHERE 22 | t.sporting_event_id = e.event_id 23 | AND t.ticketholder_id = p.id 24 | -------------------------------------------------------------------------------- /usecases/mwaa-glue-athena/MWAAEnvironmentBucket/dags/sampledb/schema/functions/esubstr.sql: -------------------------------------------------------------------------------- 1 | CREATE OR REPLACE FUNCTION esubstr( 2 | str character varying, 3 | pos integer, 4 | cnt integer) 5 | RETURNS character varying AS 6 | $BODY$ 7 | declare 8 | len int; 9 | begin 10 | if str is null or pos is null or cnt is null then 11 | return null; 12 | elsif cnt <= 0 or pos = 0 then 13 | return ''; 14 | elsif pos > 0 then 15 | return substr(str, pos, cnt); 16 | elsif pos < 0 then 17 | len := length(str); 18 | return substr(str, len+pos+1, cnt); 19 | end if; 20 | end; 21 | $BODY$ 22 | LANGUAGE plpgsql; 23 | -------------------------------------------------------------------------------- /usecases/image-processing/lambda/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "process-thumbnails", 3 | "version": "1.0.0", 4 | "description": "Image processing", 5 | "main": "thumbnail.js", 6 | "repository": "", 7 | "author": "Ramadoss", 8 | "license": "MIT-0", 9 | "dependencies": { 10 | "jimp": "^0.16.1", 11 | "minimist": "^1.2.5", 12 | "mkdirp": "^1.0.4", 13 | "resize-img": "^2.0.0" 14 | }, 15 | "scripts": { 16 | "preinstall": "npx npm-force-resolutions" 17 | }, 18 | "resolutions": { 19 | "minimist": "^1.2.5", 20 | "mkdirp": "^1.0.4" 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /usecases/README.md: -------------------------------------------------------------------------------- 1 | ## Apache Airflow use cases 2 | 3 | Complete set of sample use cases including documentation, infrastructure as code and dependant resources. Follow the README.md in each use cases to get started. 4 | 5 | 1. [Image Processing](image-processing) 6 | 2. [Amazon MWAA with AWS CodeArtifact for Python dependencies](mwaa-with-codeartifact) 7 | 3. [Amazon MWAA metadata database migration](metadata-migration) 8 | 4. [Amazon MWAA custom domain for public webserver](mwaa-public-webserver-custom-domain) 9 | 5. [Amazon MWAA in private VPC with Amazon Cognito](mwaa-cognito-cdk) 10 | 6. [Stop and Start MWAA](start-stop-mwaa-environment) 11 | -------------------------------------------------------------------------------- /usecases/mwaa_utilization_cw_metric/pre_termination.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | glue_mwaa_iam_policy_arn=$(terraform -chdir="./infra/terraform" output -raw glue_mwaa_iam_policy_arn) 4 | mwaa_role_name=$(terraform -chdir="./infra/terraform" output -raw mwaa_role_name) 5 | aws iam detach-role-policy --policy-arn $glue_mwaa_iam_policy_arn --role-name $mwaa_role_name 6 | 7 | 8 | data_bucket=$(terraform -chdir="./infra/terraform" output -raw glue_data_bucket_name) 9 | mwaa_bucket=$(terraform -chdir="./infra/terraform" output -raw mwaa_bucket_name) 10 | aws s3 rm s3://$mwaa_bucket/dags/weatherdata_processing.py 11 | aws s3 rm s3://$data_bucket/ --recursive 12 | 13 | -------------------------------------------------------------------------------- /infra/terraform/variables.tf: -------------------------------------------------------------------------------- 1 | variable "vpc_cidr" { 2 | description = "(Required) The name of the Apache Airflow MWAA Environment" 3 | type = string 4 | default = "10.1.0.0/16" 5 | 6 | } 7 | 8 | variable "tags" { 9 | description = "(Optional) A map of resource tags to associate with the resource" 10 | type = map(string) 11 | default = {} 12 | } 13 | 14 | variable "name" { 15 | description = "(Required) The name of the Apache Airflow MWAA Environment" 16 | type = string 17 | default = "MWAA_Environment" 18 | } 19 | variable "mwaa_version" { 20 | description = "(Required) The name of the Apache Airflow MWAA Environment" 21 | type = string 22 | default = "2.5.1" 23 | } -------------------------------------------------------------------------------- /usecases/local-runner-on-ecs-fargate/cloudformation/parameter-values.json: -------------------------------------------------------------------------------- 1 | { 2 | "Parameters": { 3 | "ECSClusterName": "mwaa-local-runner-cluster", 4 | "VpcId": "your-mwaa-vpc-id", 5 | "ECRImageURI" : "123456789.dkr.ecr.us-east-1.amazonaws.com/mwaa-local-runner:latest", 6 | "SecurityGroups" : "security-group-id", 7 | "PrivateSubnetIds" : "subnet-mwaapvtsubnetid1,subnet-mwaapvtsubnetid2", 8 | "PublicSubnetIds" : "subnet-mwaapublicsubnetid1,subnet-mwaapublicsubnetid2", 9 | "S3BucketURI" : "s3://your-mwaa-bucket-path", 10 | "ECSTaskExecutionRoleArn": "arn:aws:iam::123456789:role/service-role/mwaaExecutionRoleName", 11 | "AssignPublicIpToTask" : "yes" 12 | } 13 | } -------------------------------------------------------------------------------- /usecases/mwaa-public-webserver-custom-domain/src/lambda-edge/http-headers/index.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | // SPDX-License-Identifier: MIT-0 3 | 4 | import { CloudFrontResponseHandler } from "aws-lambda"; 5 | import { getConfigWithHeaders } from "../shared/shared"; 6 | 7 | const CONFIG = getConfigWithHeaders(); 8 | CONFIG.logger.debug("Configuration loaded:", CONFIG); 9 | 10 | export const handler: CloudFrontResponseHandler = async (event) => { 11 | CONFIG.logger.debug("Event:", event); 12 | const response = event.Records[0].cf.response; 13 | Object.assign(response.headers, CONFIG.cloudFrontHeaders); 14 | CONFIG.logger.debug("Returning response:\n", response); 15 | return response; 16 | }; 17 | -------------------------------------------------------------------------------- /blueprints/examples/EMR/pre_termination.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | aws s3 rm s3://$1/dags/emr.py 4 | aws iam detach-role-policy --policy-arn arn:aws:iam::aws:policy/AmazonS3FullAccess --role-name $2 5 | aws iam detach-role-policy --policy-arn arn:aws:iam::aws:policy/AmazonEMRFullAccessPolicy_v2 --role-name $2 6 | aws s3 rm s3://$4/spark/emr/nyc_aggregations.py 7 | mwaa_cli_json=$(aws mwaa create-cli-token --name $3) 8 | CLI_TOKEN=$(echo $mwaa_cli_json | jq -r '.CliToken') 9 | WEB_SERVER_HOSTNAME=$(echo $mwaa_cli_json | jq -r '.WebServerHostname') 10 | CLIRESULTS=$(curl --request POST "https://$WEB_SERVER_HOSTNAME/aws_mwaa/cli" --header "Authorization: Bearer $CLI_TOKEN" --header "Content-Type: text/plain" --data-raw "variables set DATA_BUCKET $4") 11 | 12 | 13 | -------------------------------------------------------------------------------- /blueprints/examples/EMR/post_provision.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | aws s3 cp dags/emr.py s3://$1/dags/ 4 | aws iam attach-role-policy --policy-arn arn:aws:iam::aws:policy/AmazonS3FullAccess --role-name $2 5 | aws iam attach-role-policy --policy-arn arn:aws:iam::aws:policy/AmazonEMRFullAccessPolicy_v2 --role-name $2 6 | aws s3 cp spark/nyc_aggregations.py s3://$4/spark/emr/ 7 | mwaa_cli_json=$(aws mwaa create-cli-token --name $3) 8 | CLI_TOKEN=$(echo $mwaa_cli_json | jq -r '.CliToken') 9 | WEB_SERVER_HOSTNAME=$(echo $mwaa_cli_json | jq -r '.WebServerHostname') 10 | CLIRESULTS=$(curl --request POST "https://$WEB_SERVER_HOSTNAME/aws_mwaa/cli" --header "Authorization: Bearer $CLI_TOKEN" --header "Content-Type: text/plain" --data-raw "variables set EMR_DATA_BUCKET $4") 11 | 12 | 13 | -------------------------------------------------------------------------------- /usecases/mwaa-with-codeartifact/app.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os 3 | 4 | import aws_cdk as cdk 5 | 6 | from infra.vpc_stack import VpcStack 7 | from infra.codeartifact_stack import CodeArtifactStack 8 | from infra.lambda_cron_stack import LambdaCronStack 9 | from infra.s3_stack import S3Stack 10 | from infra.mwaa_stack import MwaaStack 11 | 12 | 13 | app = cdk.App() 14 | env = cdk.Environment(region=os.environ.get("AWS_REGION")) 15 | 16 | vpc = VpcStack(app, "VpcStack", env=env) 17 | ca = CodeArtifactStack(app, "CodeArtifactStack", env=env) 18 | s3 = S3Stack(app, "S3Stack", env=env) 19 | lambda_cron = LambdaCronStack(app, "LambdaCronStack", ca, s3, env=env) 20 | mwaa = MwaaStack(app, "MwaaStack", vpc, s3, env=env) 21 | mwaa.add_dependency(lambda_cron) 22 | 23 | app.synth() -------------------------------------------------------------------------------- /usecases/mwaa-cognito-cdk/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2020", 4 | "module": "commonjs", 5 | "lib": [ 6 | "es2020", 7 | "dom" 8 | ], 9 | "declaration": true, 10 | "strict": true, 11 | "noImplicitAny": true, 12 | "strictNullChecks": true, 13 | "noImplicitThis": true, 14 | "alwaysStrict": true, 15 | "noUnusedLocals": false, 16 | "noUnusedParameters": false, 17 | "noImplicitReturns": true, 18 | "noFallthroughCasesInSwitch": false, 19 | "inlineSourceMap": true, 20 | "inlineSources": true, 21 | "experimentalDecorators": true, 22 | "strictPropertyInitialization": false, 23 | "typeRoots": [ 24 | "./node_modules/@types" 25 | ] 26 | }, 27 | "exclude": [ 28 | "node_modules", 29 | "cdk.out" 30 | ] 31 | } 32 | -------------------------------------------------------------------------------- /blueprints/examples/EKS/infra/cdk/stacks/s3_deploy.py: -------------------------------------------------------------------------------- 1 | from aws_cdk import Stack, aws_s3, aws_s3_deployment 2 | from constructs import Construct 3 | 4 | 5 | class S3SDeploytack(Stack): 6 | def __init__(self, scope: Construct, construct_id: str, bucket: aws_s3.Bucket, **kwargs): 7 | super().__init__(scope, construct_id, **kwargs) 8 | 9 | aws_s3_deployment.BucketDeployment(self, "DeployDAG", 10 | sources=[aws_s3_deployment.Source.asset("../../dags")], 11 | destination_bucket=bucket, 12 | destination_key_prefix="dags", 13 | prune=False, 14 | retain_on_delete=False 15 | ) 16 | -------------------------------------------------------------------------------- /dags/duplicate_role/README.md: -------------------------------------------------------------------------------- 1 | ### Amazon Managed Workflows for Apache Airflow (MWAA) Duplicate Role 2 | 3 | Duplicates an existing RBAC role and assigns to a user 4 | 5 | ### Versions Supported 6 | 7 | Apache Airflow 2.2.2 on Amazon MWAA, other 2.x versions and platforms may also work but are untested 8 | 9 | ### Setup 10 | 11 | Modify the DAG to reflect the NEW_ROLE, SOURCE_ROLE, and USER_NAME for your use case, copy the file into your DAGs folder, and run the dag once 12 | 13 | ### Files 14 | 15 | * [2.2/duplicate_role.py](2.2/duplicate_role.py) 16 | 17 | ### Requirements.txt needed 18 | 19 | None 20 | 21 | ### Plugins needed 22 | 23 | None. 24 | 25 | ## Security 26 | 27 | See [CONTRIBUTING](../../blob/main/CONTRIBUTING.md#security-issue-notifications) for more information. 28 | 29 | ## License 30 | 31 | This library is licensed under the MIT-0 License. See the [LICENSE](../../blob/main/LICENSE) file. 32 | 33 | -------------------------------------------------------------------------------- /blueprints/examples/EKS/dags/run_pod.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from airflow.models.dag import DAG 3 | from airflow.providers.amazon.aws.operators.eks import EksPodOperator 4 | 5 | import os 6 | 7 | CLUSTER_NAME = os.environ.get('AIRFLOW__CDK__CLUSTER_NAME') 8 | 9 | with DAG( 10 | dag_id='eks_run_pod', 11 | schedule_interval=None, 12 | start_date=datetime(2022, 11, 1), 13 | tags=['eks', 'eks-operator'], 14 | catchup=False, 15 | 16 | ) as dag: 17 | run_pod = EksPodOperator( 18 | task_id="run_pod", 19 | cluster_name=CLUSTER_NAME, 20 | pod_name="run_pod", 21 | image="amazon/aws-cli:latest", 22 | cmds=["sh", "-c", "ls"], 23 | labels={"demo": "EksPodOperator"}, 24 | get_logs=True, 25 | region=os.environ['AWS_DEFAULT_REGION'], 26 | is_delete_operator_pod=True, 27 | ) 28 | 29 | ( 30 | run_pod 31 | ) 32 | -------------------------------------------------------------------------------- /usecases/mwaa_utilization_cw_metric/README.md: -------------------------------------------------------------------------------- 1 | # mwaa-custom-metrics 2 | 3 | This is a sample implementation for the [blog](). By running the Makefile you can create a VPC with NAT/IGW, MWAA environment, associated IAM roles, CW dashboard etc 4 | 5 | ## Getting started 6 | 7 | Visit the [blog]() for instructions 8 | 9 | ## To use the CW dashboard json 10 | 1. Replace $mwaa_env_name with your MWAA env name 11 | 2. Replace $region with your regions 12 | 3. Run ```aws cloudwatch put-dashboard --dashboard-name {dashboardname} --dashboard-body $(cat mwaa-cw-metric-dashboard.json)``` 13 | 14 | ## Considerations 15 | The listed metrics are just a few key metrics. Depending on your workload, you should be monitoring other metrics offerred in the AWS/MWAA namespace. 16 | Airflow metrics are logged in MWAA custom namespace. You can learn more about the metrics from [here](https://docs.aws.amazon.com/mwaa/latest/userguide/access-metrics-cw-202.html) 17 | 18 | -------------------------------------------------------------------------------- /blueprints/examples/EMR_on_EKS/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all 2 | terraform-deploy: ## terraform 3 | terraform -chdir="./infra/terraform" init 4 | terraform -chdir="./infra/terraform" plan -out=terraform.out 5 | terraform -chdir="./infra/terraform" apply terraform.out 6 | $(MAKE) post-provision 7 | cdk-deploy: 8 | chmod 700 ./post_provision.sh 9 | ./post_provision.sh $(mwaa_bucket) $(mwaa_execution_role_name) $(mwaa_env_name) CDK 10 | 11 | post-provision: 12 | chmod 700 ./post_provision.sh 13 | ./post_provision.sh $(mwaa_bucket) $(mwaa_execution_role_name) $(mwaa_env_name) terraform 14 | 15 | terraform-undeploy: 16 | chmod 700 ./pre_termination.sh 17 | ./pre_termination.sh $(mwaa_bucket) $(mwaa_execution_role_name) $(mwaa_env_name) terraform 18 | terraform -chdir="./infra/terraform" destroy 19 | 20 | cdk-undeploy: 21 | chmod 700 ./pre_termination.sh 22 | ./pre_termination.sh $(mwaa_bucket) $(mwaa_execution_role_name) $(mwaa_env_name) CDK 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /usecases/mwaa-with-codeartifact/infra/codeartifact_stack.py: -------------------------------------------------------------------------------- 1 | import aws_cdk as cdk 2 | from aws_cdk import aws_codeartifact as codeartifact 3 | from constructs import Construct 4 | 5 | 6 | class CodeArtifactStack(cdk.Stack): 7 | def __init__(self, scope: Construct, id: str, **kwargs) -> None: 8 | super().__init__(scope, id, **kwargs) 9 | 10 | ca_domain = codeartifact.CfnDomain( 11 | self, "mwaa_codeartifact_domain", domain_name="mwaa" 12 | ) 13 | self._repo = codeartifact.CfnRepository( 14 | self, 15 | "mwaa_codeartifact_repo", 16 | domain_name=ca_domain.domain_name, 17 | repository_name="mwaa_repo", 18 | external_connections=["public:pypi"], 19 | description="This is demo repo for MWAA.", 20 | ) 21 | self._repo.add_dependency(ca_domain) 22 | 23 | @property 24 | def repo(self) -> cdk.CfnResource: 25 | return self._repo -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software is furnished to do so. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 10 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 11 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 12 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 13 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 14 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 15 | 16 | -------------------------------------------------------------------------------- /usecases/start-stop-mwaa-environment/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2018", 4 | "module": "CommonJs", 5 | "moduleResolution": "node", 6 | "lib": [ 7 | "ESNext" 8 | ], 9 | "esModuleInterop": true, 10 | "skipLibCheck": true, 11 | "declaration": true, 12 | "strict": true, 13 | "noImplicitAny": true, 14 | "strictNullChecks": true, 15 | "noImplicitThis": true, 16 | "alwaysStrict": true, 17 | "noUnusedLocals": true, 18 | "noUnusedParameters": true, 19 | "noImplicitReturns": true, 20 | "noFallthroughCasesInSwitch": false, 21 | "inlineSourceMap": true, 22 | "inlineSources": true, 23 | "experimentalDecorators": true, 24 | "strictPropertyInitialization": false, 25 | "useUnknownInCatchVariables": false, 26 | "resolveJsonModule": true, 27 | "typeRoots": [ 28 | "./node_modules/@types" 29 | ] 30 | }, 31 | "exclude": [ 32 | "node_modules", 33 | "cdk.out" 34 | ] 35 | } 36 | -------------------------------------------------------------------------------- /usecases/start-stop-mwaa-environment/LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software is furnished to do so. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 10 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 11 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 12 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 13 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 14 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 15 | -------------------------------------------------------------------------------- /usecases/mwaa-with-codeartifact/Makefile: -------------------------------------------------------------------------------- 1 | # export environment variables from .env 2 | include .env 3 | export 4 | 5 | VENV=infra/venv/bin 6 | WITH_VENV=. $(VENV)/activate; 7 | 8 | .PHONY: all 9 | all: 10 | @echo "make venv - create a virtual environment" 11 | @echo "make synth - test CDK stacks with .env variables" 12 | @echo "make deploy - deploy the infrastructure" 13 | @echo "make destroy - destroy the infrastructure" 14 | 15 | .PHONY: venv 16 | venv: infra/venv/bin/activate 17 | 18 | infra/venv/bin/activate: requirements-dev.txt 19 | @echo "Installing dependencies" 20 | @test -d infra/venv || python3 -m venv infra/venv 21 | @$(VENV)/pip install --upgrade pip 22 | @$(VENV)/pip install -Ur requirements-dev.txt 23 | @touch $(VENV)/activate 24 | 25 | .PHONY: synth 26 | synth: venv 27 | @$(WITH_VENV) cdk synth "*" 28 | 29 | .PHONY: deploy 30 | deploy: venv 31 | @$(WITH_VENV) cdk bootstrap 32 | @$(WITH_VENV) cdk deploy --all 33 | 34 | .PHONY: destroy 35 | destroy: venv 36 | @$(WITH_VENV) cdk destroy --all -------------------------------------------------------------------------------- /blueprints/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: debug cdk-install-requirements cdk-setup-vpc cdk-deploy-infra cdk-deploy-to-bucket cdk-list-stacks cdk-setup-mwaa-env cdk-diff cdk-setup-eks-role 2 | 3 | install-cdk-requirements: ## install the python dependencies needed to run cdk IaC commands 4 | @pip install -r infra/cdk/requirements.txt 5 | 6 | cdk-list: ## list all the stacks. due to SDK dependencies, this fails if run prior to S3 bucket creation 7 | @$(MAKE) -C infra/cdk list 8 | 9 | cdk-diff: ## list the local changes in cdk compared to the previously installed infrastructure 10 | @$(MAKE) -C infra/cdk diff 11 | 12 | cdk-deploy-infra: 13 | @S3_FLAG=False $(MAKE) -C infra/cdk infra 14 | 15 | cdk-deploy-to-bucket: ## setup VPC needed for the mwaa infrastructure using CDK 16 | @$(MAKE) -C infra/cdk s3-deploy 17 | 18 | cdk-setup-eks-role: ## setup the infrastructure dependencies for EKS cluster (eg: IAM Role) 19 | @$(MAKE) -C infra/cdk eks-role 20 | 21 | help: 22 | @grep -E '^[a-zA-Z_-]+:.*?#.*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' -------------------------------------------------------------------------------- /blueprints/examples/EKS/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: debug cdk-install-requirements cdk-setup-vpc cdk-deploy-infra cdk-deploy-to-bucket cdk-list-stacks cdk-setup-mwaa-env cdk-diff cdk-setup-eks-role 2 | 3 | install-cdk-requirements: ## install the python dependencies needed to run cdk IaC commands 4 | @pip install -r infra/cdk/requirements.txt 5 | 6 | cdk-list: ## list all the stacks. due to SDK dependencies, this fails if run prior to S3 bucket creation 7 | @$(MAKE) -C infra/cdk list 8 | 9 | cdk-diff: ## list the local changes in cdk compared to the previously installed infrastructure 10 | @$(MAKE) -C infra/cdk diff 11 | 12 | cdk-deploy-infra: 13 | @S3_FLAG=False $(MAKE) -C infra/cdk infra 14 | 15 | cdk-deploy-to-bucket: ## setup VPC needed for the mwaa infrastructure using CDK 16 | @$(MAKE) -C infra/cdk s3-deploy 17 | 18 | cdk-setup-eks-role: ## setup the infrastructure dependencies for EKS cluster (eg: IAM Role) 19 | @$(MAKE) -C infra/cdk eks-role 20 | 21 | help: 22 | @grep -E '^[a-zA-Z_-]+:.*?#.*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' -------------------------------------------------------------------------------- /blueprints/examples/EMR_on_EKS/infra/cdk/cdk.json: -------------------------------------------------------------------------------- 1 | { 2 | "app": "python3 app.py", 3 | "watch": { 4 | "include": [ 5 | "**" 6 | ], 7 | "exclude": [ 8 | "README.md", 9 | "cdk*.json", 10 | "requirements*.txt", 11 | "source.bat", 12 | "**/__init__.py", 13 | "python/__pycache__", 14 | "tests" 15 | ] 16 | }, 17 | "context": { 18 | "@aws-cdk/core:stackRelativeExports": "true", 19 | "@aws-cdk/aws-ecr-assets:dockerIgnoreSupport": true, 20 | "@aws-cdk/aws-secretsmanager:parseOwnedSecretName": true, 21 | "@aws-cdk/core:validateSnapshotRemovalPolicy": true, 22 | "@aws-cdk/aws-iam:minimizePolicies": true, 23 | "@aws-cdk/aws-kms:defaultKeyPolicies": true, 24 | "@aws-cdk/aws-s3:grantWriteWithoutAcl": true, 25 | "@aws-cdk/core:checkSecretUsage": true, 26 | "@aws-cdk/core:target-partitions": [ 27 | "aws", 28 | "aws-cn" 29 | ], 30 | "vpc_id": "vpc-0c014b58004953f8c", 31 | "cluster_version": "1.23", 32 | "eks_cluster_id": "eks_cluster_cdk", 33 | "instance_types": "m5.large" 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /usecases/mwaa-glue-athena/MWAAEnvironmentBucket/dags/sampledb/schema/create-view.sql: -------------------------------------------------------------------------------- 1 | create or replace view sporting_event_info as 2 | select e.id as event_id 3 | , e.sport_type_name as sport 4 | , e.start_date_time as event_date_time 5 | , h.name as home_team 6 | , a.name as away_team 7 | , l.name as location 8 | , l.city as city 9 | from sporting_event e, sport_team h, sport_team a, sport_location l 10 | where e.home_team_id = h.id 11 | and e.away_team_id = a.id 12 | and e.location_id = l.id; 13 | 14 | 15 | create or replace view sporting_event_ticket_info as 16 | select t.id as ticket_id 17 | ,e.event_id 18 | ,e.sport 19 | ,e.event_date_time 20 | ,e.home_team 21 | ,e.away_team 22 | ,e.location 23 | ,e.city 24 | ,t.seat_level 25 | ,t.seat_section 26 | ,t.seat_row 27 | ,t.seat 28 | ,t.ticket_price 29 | ,p.full_name as ticketholder 30 | from sporting_event_info e 31 | , sporting_event_ticket t 32 | , person p 33 | where t.sporting_event_id = e.event_id 34 | and t.ticketholder_id = p.id; 35 | -------------------------------------------------------------------------------- /requirements/amazon_backport/1.10/requirements_amazon_backport.txt: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | # this software and associated documentation files (the "Software"), to deal in 5 | # the Software without restriction, including without limitation the rights to 6 | # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | # the Software, and to permit persons to whom the Software is furnished to do so. 8 | # 9 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 10 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 11 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 12 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 13 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 14 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 15 | 16 | apache-airflow-backport-providers-amazon 17 | -------------------------------------------------------------------------------- /usecases/mwaa-glue-athena/ArtifactBucket/gluescripts/convert_to_parquet.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from awsglue.transforms import * 3 | from awsglue.utils import getResolvedOptions 4 | from pyspark.context import SparkContext 5 | from awsglue.context import GlueContext 6 | from awsglue.job import Job 7 | 8 | #create glue context 9 | args = getResolvedOptions(sys.argv, ["JOB_NAME","table_name","bucket_name"]) 10 | sc = SparkContext() 11 | glueContext = GlueContext(sc) 12 | spark = glueContext.spark_session 13 | job = Job(glueContext) 14 | job.init(args["JOB_NAME"], args) 15 | 16 | #fetch bucket and table name 17 | bucket_name = args["bucket_name"] 18 | table_name = args["table_name"] 19 | 20 | 21 | 22 | 23 | #read raw data in csv format from s3 24 | input_df = spark.read.format("csv")\ 25 | .option("header", "true")\ 26 | .option("inferSchema", "true")\ 27 | .load("s3://" + bucket_name + "/raw/" + table_name +"/") 28 | 29 | #write curated data in parquet formatto s3 30 | input_df.write\ 31 | .format("parquet")\ 32 | .mode("overwrite")\ 33 | .save("s3://" + bucket_name + "/curated/" + table_name +"/") 34 | -------------------------------------------------------------------------------- /usecases/mwaa-cognito-cdk/test/vpc-stack.test.ts: -------------------------------------------------------------------------------- 1 | import { App } from "aws-cdk-lib"; 2 | import { Template } from "aws-cdk-lib/assertions"; 3 | import { VpcStack } from "../lib/vpc-stack"; 4 | 5 | // Generate Storage stack 6 | const stageName = "alpha"; 7 | const mockApp = new App(); 8 | const stack = new VpcStack(mockApp, "unittest-vpc", { 9 | stage: stageName, 10 | subnet_size: 24, 11 | vpc_cidr: "10.0.0.0/8", 12 | }); 13 | const template = Template.fromStack(stack); 14 | 15 | // Execute tests for to confirm setup 16 | test("There are four subnets", () => { 17 | // Test preparation 18 | const expectedNumber = 4; 19 | // Test execution 20 | template.resourceCountIs("AWS::EC2::Subnet", expectedNumber); 21 | }); 22 | 23 | test("There are two NAT gateways", () => { 24 | // Test preparation 25 | const expectedNumber = 2; 26 | // Test execution 27 | template.resourceCountIs("AWS::EC2::NatGateway", expectedNumber); 28 | }); 29 | 30 | test("Check if the VPC has S3 endpoint gateway", () => { 31 | template.hasResourceProperties("AWS::EC2::VPCEndpoint", { 32 | VpcEndpointType: "Gateway", 33 | }); 34 | }); 35 | -------------------------------------------------------------------------------- /usecases/mwaa-glue-athena/MWAAEnvironmentBucket/dags/sampledb/schema/functions/setnflhomefield.sql: -------------------------------------------------------------------------------- 1 | CREATE OR REPLACE FUNCTION dms_sample.setnflteamhomefield() 2 | RETURNS void 3 | AS 4 | $BODY$ 5 | DECLARE 6 | var_v_sport_location_id INTEGER; 7 | var_v_team VARCHAR(40); 8 | var_v_loc VARCHAR(40); 9 | var_done VARCHAR(40) DEFAULT FALSE; 10 | nsd_cur CURSOR FOR 11 | SELECT 12 | sport_location_id, team, location 13 | FROM dms_sample.nfl_stadium_data; 14 | BEGIN 15 | OPEN nsd_cur; 16 | 17 | <> 18 | LOOP 19 | FETCH FROM nsd_cur INTO var_v_sport_location_id, var_v_team, var_v_loc; 20 | 21 | IF NOT FOUND THEN 22 | EXIT read_loop; 23 | END IF; 24 | UPDATE dms_sample.sport_team AS s 25 | SET home_field_id = var_v_sport_location_id::SMALLINT 26 | WHERE LOWER(s.name) = LOWER(var_v_team::VARCHAR(30)) AND LOWER(s.sport_league_short_name) = LOWER('NFL'::VARCHAR(10)) AND LOWER(s.sport_type_name) = LOWER('football'::VARCHAR(15)); 27 | END LOOP; 28 | CLOSE nsd_cur; 29 | END; 30 | $BODY$ 31 | LANGUAGE plpgsql; 32 | -------------------------------------------------------------------------------- /blueprints/examples/EMR_on_EKS/infra/terraform/outputs.tf: -------------------------------------------------------------------------------- 1 | output "configure_kubectl" { 2 | description = "Configure kubectl: make sure you're logged in with the correct AWS profile and run the following command to update your kubeconfig" 3 | value = module.eks_blueprints.configure_kubectl 4 | } 5 | 6 | output "emrcontainers_virtual_cluster_id" { 7 | description = "EMR Containers Virtual cluster ID" 8 | value = aws_emrcontainers_virtual_cluster.this.id 9 | } 10 | 11 | output "emr_on_eks_role_id" { 12 | description = "IAM execution role ID for EMR on EKS" 13 | value = module.eks_blueprints.emr_on_eks_role_id 14 | } 15 | 16 | output "emr_on_eks_role_arn" { 17 | description = "IAM execution role arn for EMR on EKS" 18 | value = module.eks_blueprints.emr_on_eks_role_arn 19 | } 20 | 21 | output "emr_on_eks_data_bucket" { 22 | description = "data bucket for EMR on EKS" 23 | value = aws_s3_bucket.emr_eks_data_bucket.id 24 | } 25 | output "emr_on_eks_mwaa_iam_policy_arn" { 26 | description = "IAM policy for EMR EKS permission for MWAA" 27 | value = aws_iam_policy.emr_on_eks_mwaa.arn 28 | } 29 | 30 | -------------------------------------------------------------------------------- /infra/terraform/outputs.tf: -------------------------------------------------------------------------------- 1 | output "mwaa_webserver_url" { 2 | description = "The webserver URL of the MWAA Environment" 3 | value = module.mwaa.mwaa_webserver_url 4 | } 5 | 6 | output "mwaa_arn" { 7 | description = "The ARN of the MWAA Environment" 8 | value = module.mwaa.mwaa_arn 9 | } 10 | 11 | output "mwaa_service_role_arn" { 12 | description = "The Service Role ARN of the Amazon MWAA Environment" 13 | value = module.mwaa.mwaa_service_role_arn 14 | } 15 | 16 | output "mwaa_status" { 17 | description = "The status of the Amazon MWAA Environment" 18 | value = module.mwaa.mwaa_status 19 | } 20 | 21 | output "mwaa_role_arn" { 22 | description = "IAM Role ARN of the MWAA Environment" 23 | value = module.mwaa.mwaa_role_arn 24 | } 25 | 26 | output "aws_s3_bucket_name" { 27 | description = "S3 bucket Name of the MWAA Environment" 28 | value = module.mwaa.aws_s3_bucket_name 29 | } 30 | 31 | output "vpc_id" { 32 | description = "MWAA VPC ID" 33 | value = module.vpc.vpc_id 34 | } 35 | output "subnets" { 36 | description = "MWAA VPC private_subnets" 37 | value = module.vpc.private_subnets 38 | } 39 | -------------------------------------------------------------------------------- /usecases/mwaa-cognito-cdk/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Konstantin 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /dags/xgboost-ml-pipeline/2.0/config.py: -------------------------------------------------------------------------------- 1 | 2 | # COMMON 3 | REGION_NAME="us-east-1" 4 | 5 | # AIRFLOW 6 | AIRFLOW_DAG_ID="mwaa-sm-customer-churn-dag" 7 | 8 | # GLUE 9 | GLUE_ROLE_NAME="AmazonMWAA-Glue-Role" 10 | GLUE_JOB_NAME_PREFIX="mwaa-xgboost-preprocess" 11 | GLUE_JOB_SCRIPT_S3_BUCKET="glue-scripts-XXXXXXXXXXXX-us-east-1" 12 | GLUE_JOB_SCRIPT_S3_KEY="mwaa-xgboost/preprocess-data/glue_etl.py" 13 | DATA_S3_SOURCE="s3://datalake-XXXXXXXXXXXX-us-east-1/customer-churn/customer-churn.csv" 14 | DATA_S3_DEST="s3://mlops-XXXXXXXXXXXX-us-east-1/mwaa-xgboost/processed/" 15 | 16 | # SAGEMAKER 17 | SAGEMAKER_ROLE_NAME="AmazonMWAA-SageMaker-Role" 18 | SAGEMAKER_TRAINING_JOB_NAME_PREFIX="mwaa-sm-training-job" 19 | SAGEMAKER_TRAINING_DATA_S3_SOURCE="s3://mlops-XXXXXXXXXXXX-us-east-1/mwaa-xgboost/processed/train/" 20 | SAGEMAKER_VALIDATION_DATA_S3_SOURCE="s3://mlops-XXXXXXXXXXXX-us-east-1/mwaa-xgboost/processed/validation/" 21 | SAGEMAKER_CONTENT_TYPE="text/csv" 22 | SAGEMAKER_MODEL_NAME_PREFIX="mwaa-sm-customer-churn-model" 23 | SAGEMAKER_ENDPOINT_NAME_PREFIX="mwaa-sm-endpoint" # endpoint names have a 63 max char limit 24 | SAGEMAKER_MODEL_S3_DEST="s3://mlops-XXXXXXXXXXXX-us-east-1/mwaa-xgboost/model/" 25 | -------------------------------------------------------------------------------- /dags/xgboost-ml-pipeline/1.10/config.py: -------------------------------------------------------------------------------- 1 | 2 | # COMMON 3 | REGION_NAME="us-east-1" 4 | 5 | # AIRFLOW 6 | AIRFLOW_DAG_ID="mwaa-sm-customer-churn-dag" 7 | 8 | # GLUE 9 | GLUE_ROLE_NAME="AmazonMWAA-Glue-Role" 10 | GLUE_JOB_NAME_PREFIX="mwaa-xgboost-preprocess" 11 | GLUE_JOB_SCRIPT_S3_BUCKET="glue-scripts-XXXXXXXXXXXX-us-east-1" 12 | GLUE_JOB_SCRIPT_S3_KEY="mwaa-xgboost/preprocess-data/glue_etl.py" 13 | DATA_S3_SOURCE="s3://datalake-XXXXXXXXXXXX-us-east-1/customer-churn/customer-churn.csv" 14 | DATA_S3_DEST="s3://mlops-XXXXXXXXXXXX-us-east-1/mwaa-xgboost/processed/" 15 | 16 | # SAGEMAKER 17 | SAGEMAKER_ROLE_NAME="AmazonMWAA-SageMaker-Role" 18 | SAGEMAKER_TRAINING_JOB_NAME_PREFIX="mwaa-sm-training-job" 19 | SAGEMAKER_TRAINING_DATA_S3_SOURCE="s3://mlops-XXXXXXXXXXXX-us-east-1/mwaa-xgboost/processed/train/" 20 | SAGEMAKER_VALIDATION_DATA_S3_SOURCE="s3://mlops-XXXXXXXXXXXX-us-east-1/mwaa-xgboost/processed/validation/" 21 | SAGEMAKER_CONTENT_TYPE="text/csv" 22 | SAGEMAKER_MODEL_NAME_PREFIX="mwaa-sm-customer-churn-model" 23 | SAGEMAKER_ENDPOINT_NAME_PREFIX="mwaa-sm-endpoint" # endpoint names have a 63 max char limit 24 | SAGEMAKER_MODEL_S3_DEST="s3://mlops-XXXXXXXXXXXX-us-east-1/mwaa-xgboost/model/" 25 | -------------------------------------------------------------------------------- /blueprints/examples/Lambda/image/Makefile: -------------------------------------------------------------------------------- 1 | BASE := $(shell /bin/pwd) 2 | 3 | 4 | 5 | build: ##=> Same as package except that we don't create a ZIP 6 | docker build -t taxi_analysis . 7 | docker tag taxi_analysis:latest $(ACCOUNT).dkr.ecr.$(REGION).amazonaws.com/taxi_analysis:latest 8 | 9 | login: 10 | aws ecr get-login-password --region $(REGION) | docker login --username AWS --password-stdin $(ACCOUNT).dkr.ecr.$(REGION).amazonaws.com 11 | 12 | deploy: ##=> Deploy app using previously saved SAM CLI configuration 13 | aws ecr create-repository --repository-name taxi_analysis --region $(REGION) --image-scanning-configuration scanOnPush=true --image-tag-mutability MUTABLE 14 | aws ecr set-repository-policy --repository-name taxi_analysis --policy-text file://ecrpolicy.json 15 | docker push $(ACCOUNT).dkr.ecr.$(REGION).amazonaws.com/taxi_analysis:latest 16 | 17 | ci: ##=> Run full workflow - Install deps, build deps, and deploy 18 | $(MAKE) login 19 | $(MAKE) build 20 | $(MAKE) deploy 21 | update: 22 | $(MAKE) login 23 | $(MAKE) build 24 | $(MAKE) deploy 25 | cleanup: 26 | $(MAKE) login 27 | aws ecr delete-repository --repository-name taxi_analysis --region $(REGION) 28 | 29 | -------------------------------------------------------------------------------- /requirements/gcp_backport/1.10/requirements_gcp_backport.txt: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | # this software and associated documentation files (the "Software"), to deal in 5 | # the Software without restriction, including without limitation the rights to 6 | # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | # the Software, and to permit persons to whom the Software is furnished to do so. 8 | # 9 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 10 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 11 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 12 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 13 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 14 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 15 | 16 | grpcio==1.27.2 17 | cython==0.29.21 18 | pandas-gbq==0.13.3 19 | cryptography==3.3.2 20 | apache-airflow-backport-providers-amazon[google] 21 | -------------------------------------------------------------------------------- /requirements/amazon_backport/README.md: -------------------------------------------------------------------------------- 1 | ### Amazon Managed Workflows for Apache Airflow (MWAA) and Backport Providers 2 | 3 | Use Amazon Managed Workflows for Apache Airflow (MWAA) with Apache Airflow Amazon backport providers. 4 | 5 | ### Versions Supported 6 | 7 | Apache Airflow 1.10.12 on Amazon MWAA 8 | 9 | ### Setup 10 | 11 | Copy the file into your MWAA S3 bucket and update your environment to use this version. May be combined with other requirements. 12 | See [Amazon MWAA documentation](https://docs.aws.amazon.com/mwaa/latest/userguide/working-dags-dependencies.html) for more details. 13 | 14 | ### Files 15 | 16 | * [1.10/requirements_amazon_backport.txt](1.10/requirements_amazon_backport.txt) 17 | 18 | ### Explanation 19 | 20 | Just one line. See [Apache Airflow Backport Providers](https://airflow.apache.org/docs/apache-airflow/stable/backport-providers.html) for details. 21 | ``` 22 | apache-airflow-backport-providers-amazon 23 | ``` 24 | ## Security 25 | 26 | See [CONTRIBUTING](../../blob/main/CONTRIBUTING.md#security-issue-notifications) for more information. 27 | 28 | ## License 29 | 30 | This library is licensed under the MIT-0 License. See the [LICENSE](../../blob/main/LICENSE) file. 31 | 32 | -------------------------------------------------------------------------------- /usecases/mwaa-glue-athena/MWAAEnvironmentBucket/dags/sampledb/data/csv/sport_division.csv: -------------------------------------------------------------------------------- 1 | 2 | baseball,MLB,AL Central,American League Central,American League Central 3 | baseball,MLB,AL East,American League East,American League East 4 | baseball,MLB,AL West,American League West,American League West 5 | baseball,MLB,NL Central,National League Central,National League Central 6 | baseball,MLB,NL East,National League East,National League East 7 | baseball,MLB,NL West,National League West,National League West 8 | football,NFL,AFC East,American Football Conference East,American Football Conference East 9 | football,NFL,AFC North,American Football Conference North,American Football Conference North 10 | football,NFL,AFC South,American Football Conference South,American Football Conference South 11 | football,NFL,AFC West,American Football Conference West,American Football Conference West 12 | football,NFL,NFC East,National Football Conference East,National Football Conference East 13 | football,NFL,NFC North,National Football Conference North,National Football Conference North 14 | football,NFL,NFC South,National Football Conference South,National Football Conference South 15 | football,NFL,NFC West,National Football Conference West,National Football Conference West 16 | -------------------------------------------------------------------------------- /usecases/mwaa-cognito-cdk/test/mwaa-stack.test.ts: -------------------------------------------------------------------------------- 1 | import { App } from "aws-cdk-lib"; 2 | import { Template } from "aws-cdk-lib/assertions"; 3 | import { MwaaStack } from "../lib/mwaa-stack"; 4 | 5 | describe("Execution unit tests for dev account", () => { 6 | // Generate Storage stack 7 | const stageName = "dev"; 8 | const mockApp = new App(); 9 | const stack = new MwaaStack(mockApp, "unittest-mwaa", { 10 | description: "Main MWAA cluster", 11 | dagBucketArn: "fake-s3-arn", 12 | dagBucketKmsKeyArn: "fake-s3-arn", 13 | mwaaEnvironmentName: `${stageName}-mwaa-cluster`, 14 | mwaaPrivateSubnetIds: ["fake-subnet-id1", "fake-subnet-id2"], 15 | mwaaSecurityGroupIds: ["fake-sg-id1"], 16 | stage: stageName, 17 | }); 18 | 19 | const template = Template.fromStack(stack); 20 | 21 | // Execute tests for to confirm setup 22 | test("There is one role in the stack", () => { 23 | // Test preparation 24 | const expectedNumber = 1; 25 | // Test execution 26 | template.resourceCountIs("AWS::IAM::Role", expectedNumber); 27 | }); 28 | 29 | test("MWAA configuration setup", () => { 30 | // Test execution 31 | template.hasResourceProperties("AWS::MWAA::Environment", { 32 | DagS3Path: "dags/", 33 | }); 34 | }); 35 | }); 36 | -------------------------------------------------------------------------------- /usecases/mwaa-with-codeartifact/infra/s3_stack.py: -------------------------------------------------------------------------------- 1 | import os 2 | import secrets 3 | 4 | import aws_cdk as cdk 5 | from aws_cdk import aws_s3 as s3 6 | from aws_cdk import aws_s3_deployment as s3_deploy 7 | from constructs import Construct 8 | 9 | 10 | class S3Stack(cdk.Stack): 11 | def __init__(self, scope: Construct, id: str, **kwargs) -> None: 12 | super().__init__(scope, id, **kwargs) 13 | 14 | rand_int = secrets.randbelow(1000001) 15 | self._instance = s3.Bucket( 16 | self, 17 | "mwaa-ca-bucket", 18 | bucket_name=os.environ.get("BUCKET_NAME", f"mwaa-ca-{rand_int}"), 19 | block_public_access=s3.BlockPublicAccess.BLOCK_ALL, 20 | removal_policy=cdk.RemovalPolicy.DESTROY, 21 | auto_delete_objects=True, 22 | versioned=True, 23 | ) 24 | 25 | # Deploy files to an S3 bucket (MWAA DAGs) 26 | s3_deploy.BucketDeployment( 27 | self, 28 | "mwaa-dags-deployment", 29 | destination_bucket=self.instance, 30 | sources=[s3_deploy.Source.asset("./mwaa-ca-bucket-content")], 31 | retain_on_delete=False, 32 | ) 33 | 34 | @property 35 | def instance(self) -> s3.Bucket: 36 | return self._instance -------------------------------------------------------------------------------- /usecases/mwaa-public-webserver-custom-domain/src/cfn-custom-resources/lambda-code-update/https.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | // SPDX-License-Identifier: MIT-0 3 | 4 | import { request } from "https"; 5 | import { Writable, pipeline } from "stream"; 6 | 7 | export async function fetch(uri: string) { 8 | return new Promise((resolve, reject) => { 9 | const req = request(uri, (res) => 10 | pipeline([res, collectBuffer(resolve)], done) 11 | ); 12 | 13 | function done(error?: Error | null) { 14 | if (!error) return; 15 | req.destroy(error); 16 | reject(error); 17 | } 18 | 19 | req.on("error", done); 20 | 21 | req.end(); 22 | }); 23 | } 24 | 25 | const collectBuffer = (callback: (collectedBuffer: Buffer) => void) => { 26 | const chunks = [] as Buffer[]; 27 | return new Writable({ 28 | write: (chunk, _encoding, done) => { 29 | try { 30 | chunks.push(chunk); 31 | done(); 32 | } catch (err) { 33 | done(err as Error); 34 | } 35 | }, 36 | final: (done) => { 37 | try { 38 | callback(Buffer.concat(chunks)); 39 | done(); 40 | } catch (err) { 41 | done(err as Error); 42 | } 43 | }, 44 | }); 45 | }; 46 | -------------------------------------------------------------------------------- /blueprints/examples/EKS/infra/cdk/cdk.json: -------------------------------------------------------------------------------- 1 | { 2 | "app": "python3 app.py", 3 | "watch": { 4 | "include": [ 5 | "**" 6 | ], 7 | "exclude": [ 8 | "README.md", 9 | "cdk*.json", 10 | "requirements*.txt", 11 | "source.bat", 12 | "**/__init__.py", 13 | "python/__pycache__", 14 | "tests" 15 | ] 16 | }, 17 | "context": { 18 | "@aws-cdk/aws-apigateway:usagePlanKeyOrderInsensitiveId": true, 19 | "@aws-cdk/core:stackRelativeExports": true, 20 | "@aws-cdk/aws-rds:lowercaseDbIdentifier": true, 21 | "@aws-cdk/aws-lambda:recognizeVersionProps": true, 22 | "@aws-cdk/aws-lambda:recognizeLayerVersion": true, 23 | "@aws-cdk/aws-cloudfront:defaultSecurityPolicyTLSv1.2_2021": true, 24 | "@aws-cdk-containers/ecs-service-extensions:enableDefaultLogDriver": true, 25 | "@aws-cdk/aws-ec2:uniqueImdsv2TemplateName": true, 26 | "@aws-cdk/core:checkSecretUsage": true, 27 | "@aws-cdk/aws-iam:minimizePolicies": true, 28 | "@aws-cdk/core:validateSnapshotRemovalPolicy": true, 29 | "@aws-cdk/aws-codepipeline:crossAccountKeyAliasStackSafeResourceName": true, 30 | "@aws-cdk/aws-s3:createDefaultLoggingPolicy": true, 31 | "@aws-cdk/core:target-partitions": [ 32 | "aws", 33 | "aws-cn" 34 | ] 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /usecases/mwaa-public-webserver-custom-domain/src/cfn-custom-resources/us-east-1-lambda-stack/https.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | // SPDX-License-Identifier: MIT-0 3 | 4 | import { request } from "https"; 5 | import { Writable, pipeline } from "stream"; 6 | 7 | export async function fetch(uri: string) { 8 | return new Promise((resolve, reject) => { 9 | const req = request(uri, (res) => 10 | pipeline([res, collectBuffer(resolve)], done) 11 | ); 12 | 13 | function done(error?: Error | null) { 14 | if (!error) return; 15 | req.destroy(error); 16 | reject(error); 17 | } 18 | 19 | req.on("error", done); 20 | 21 | req.end(); 22 | }); 23 | } 24 | 25 | const collectBuffer = (callback: (collectedBuffer: Buffer) => void) => { 26 | const chunks = [] as Buffer[]; 27 | return new Writable({ 28 | write: (chunk, _encoding, done) => { 29 | try { 30 | chunks.push(chunk); 31 | done(); 32 | } catch (err) { 33 | done(err as Error); 34 | } 35 | }, 36 | final: (done) => { 37 | try { 38 | callback(Buffer.concat(chunks)); 39 | done(); 40 | } catch (err) { 41 | done(err as Error); 42 | } 43 | }, 44 | }); 45 | }; 46 | -------------------------------------------------------------------------------- /usecases/mwaa-cognito-cdk/Makefile: -------------------------------------------------------------------------------- 1 | # Execute a sequence of actions 2 | all: install build linting security unittest cooling 3 | 4 | # Pipeline sequence 5 | ## The warming step pre-warms the evironment with: 6 | install: 7 | @echo "Install modules local" 8 | npm i 9 | installci: 10 | @echo "Install modules CI/CD pipeline" 11 | npm i 12 | 13 | ## This step builds applications and creates deliverable items 14 | build: 15 | npm run cdk synth 16 | buildci: 17 | export NODE_OPTIONS=--max_old_space_size=8192 18 | npm run cdk synth 19 | ## This step checks the code base with linting tools 20 | linting: 21 | @echo "Check and fix liting locally" 22 | npm run format 23 | lintingci: 24 | @echo "Liniting code check in CI/CD pipeline" 25 | npm run eslint 26 | ## This step executes unit tests for the code base 27 | unittest: 28 | @echo "Unit test local execution" 29 | npm run test 30 | unittestci: 31 | @echo "Unit test in CI/CD pipeline" 32 | npm run test 33 | security: 34 | @echo "Check security locally" 35 | cfn_nag_scan -i ./cdk.out -t .\*.template.json 36 | 37 | # Manual execution 38 | ## Cleanup the whole environment. Remove all temporary files 39 | clean: 40 | git clean -xdf 41 | ## Deploy application to the Dev AWS account manually 42 | deploy: 43 | npm run cdk -- deploy dev-* --require-approval never 44 | -------------------------------------------------------------------------------- /usecases/mwaa-glue-athena/MWAAEnvironmentBucket/dags/run_athena_mapped_dynamic.py: -------------------------------------------------------------------------------- 1 | 2 | from airflow.decorators import dag, task 3 | from airflow.providers.amazon.aws.hooks.s3 import S3Hook 4 | from airflow.providers.amazon.aws.operators.athena import AthenaOperator 5 | from datetime import datetime 6 | import os, mwaa_config 7 | 8 | DAG_ID = os.path.basename(__file__).replace(".py", "") 9 | 10 | @task() 11 | def get_queries_fn(): 12 | s3 = S3Hook() 13 | data = s3.list_keys(bucket_name=mwaa_config.BUCKET_NAME, prefix=mwaa_config.ATHENA_KEY, start_after_key=mwaa_config.ATHENA_KEY) 14 | return data 15 | 16 | @task() 17 | def data_from_s3(key_name): 18 | s3 = S3Hook() 19 | data = s3.read_key(bucket_name=mwaa_config.BUCKET_NAME, key=key_name) 20 | return data 21 | 22 | @dag( 23 | dag_id = DAG_ID, 24 | start_date=datetime(2022, 1, 1), 25 | catchup=False, 26 | schedule_interval = None, 27 | ) 28 | def sql_dag(): 29 | get_queries=get_queries_fn() 30 | get_data_from_s3=data_from_s3.expand(key_name=get_queries) 31 | create_table_agg = AthenaOperator.partial( 32 | task_id="run_sql_athena", 33 | database='curated_db', 34 | output_location=mwaa_config.RESULTS_LOCATION 35 | ).expand(query=get_data_from_s3) 36 | 37 | sql_dag_instance = sql_dag() 38 | -------------------------------------------------------------------------------- /dags/airflow-243-examples/README.md: -------------------------------------------------------------------------------- 1 | ## Amazon Managed Workflows for Apache Airflow (MWAA) using Apache Airflow 2.4.3 Examples 2 | 3 | This examples in this directory demonstrates how to use the below new features of Apache Airflow 2.4.3 in an Amazon MWAA environment: 4 | - Data-aware scheduling using Datasets 5 | - Dynamic Task Mapping 6 | 7 | There is also an example DAG code that will allow you to verify the Python version that is pre-installed in your MWAA environment. 8 | 9 | ### Blog 10 | This code is intended to support the AWS blog post: What’s new with Amazon MWAA support for Apache Airflow version 2.4.3 11 | 12 | ### Versions Supported 13 | Apache Airflow 2.4.3 on Amazon MWAA. 14 | 15 | ### Setup 16 | The respective folders have instructions as to how you can setup the DAGs to enable the features in your own Amazon MWAA Airflow 2.4.3 environment. 17 | 18 | ### Requirements.txt needed 19 | None 20 | 21 | ### Plugins needed 22 | None 23 | 24 | ### Explanation 25 | Each of the folders has necessary explanations of the DAG code used to implement the feature. 26 | 27 | ## Security 28 | 29 | See [CONTRIBUTING](../blob/main/CONTRIBUTING.md#security-issue-notifications) for more information. 30 | 31 | ## License 32 | 33 | This library is licensed under the MIT-0 License. See the [LICENSE](../blob/main/LICENSE) file. 34 | -------------------------------------------------------------------------------- /blueprints/examples/ECS/infra/spark_image/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alpine:3.10 2 | 3 | ARG SPARK_VERSION=3.1.3 4 | ARG HADOOP_VERSION_SHORT=3.2 5 | ARG HADOOP_VERSION=3.2.0 6 | ARG AWS_SDK_VERSION=1.11.375 7 | 8 | RUN apk add --no-cache bash openjdk8-jre python3 9 | 10 | # Download and install spark 11 | RUN wget -qO- https://www-eu.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION_SHORT}.tgz | tar zx -C /opt && \ 12 | mv /opt/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION_SHORT} /opt/spark 13 | 14 | # Configure spark to use IAM Role of container 15 | RUN echo spark.hadoop.fs.s3a.aws.credentials.provider=com.amazonaws.auth.EC2ContainerCredentialsProviderWrapper > /opt/spark/conf/spark-defaults.conf 16 | 17 | # Install aws-cdk and hadoop-cdk 18 | RUN wget https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/${HADOOP_VERSION}/hadoop-aws-${HADOOP_VERSION}.jar -P /opt/spark/jars/ && \ 19 | wget https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/${AWS_SDK_VERSION}/aws-java-sdk-bundle-${AWS_SDK_VERSION}.jar -P /opt/spark/jars/ 20 | 21 | ENV PATH="/opt/spark/bin:${PATH}" 22 | ENV PYSPARK_PYTHON=python3 23 | 24 | COPY app.py / 25 | 26 | # Setting host name and running spark application 27 | ENTRYPOINT ["/bin/sh", "-c", "echo 127.0.0.1 $HOSTNAME >> /etc/hosts; spark-submit app.py"] -------------------------------------------------------------------------------- /dags/xgboost-ml-pipeline/glue/glue_etl.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from awsglue.transforms import * 3 | from awsglue.utils import getResolvedOptions 4 | from pyspark.context import SparkContext 5 | from awsglue.context import GlueContext 6 | from awsglue.job import Job 7 | from awsglue.dynamicframe import DynamicFrame 8 | 9 | #Retrieve parameters for the Glue job. 10 | args = getResolvedOptions(sys.argv, ['JOB_NAME', 'S3_SOURCE', 'S3_DEST', 11 | 'TRAIN_KEY', 'VAL_KEY']) 12 | 13 | sc = SparkContext() 14 | glueContext = GlueContext(sc) 15 | spark = glueContext.spark_session 16 | job = Job(glueContext) 17 | job.init(args['JOB_NAME'], args) 18 | 19 | #Create a PySpark dataframe from the source table. 20 | source_data_frame = spark.read.load(args['S3_SOURCE'], format='csv', 21 | inferSchema=True, header=False) 22 | 23 | #Split the dataframe in to training and validation dataframes. 24 | train_data, val_data = source_data_frame.randomSplit([.7,.3]) 25 | 26 | #Write both dataframes to the destination datastore. 27 | train_path = args['S3_DEST'] + args['TRAIN_KEY'] 28 | val_path = args['S3_DEST'] + args['VAL_KEY'] 29 | 30 | train_data.write.save(train_path, format='csv', mode='overwrite') 31 | val_data.write.save(val_path, format='csv', mode='overwrite') 32 | 33 | #Complete the job. 34 | job.commit() -------------------------------------------------------------------------------- /dags/airflow-272-examples/README.md: -------------------------------------------------------------------------------- 1 | ## Examples for Amazon Managed Workflows for Apache Airflow (Amazon MWAA) using Apache Airflow 2.7.2 2 | 3 | The examples in this directory demonstrate how to use the following new features of Apache Airflow 2.7.2 in an Amazon MWAA environment: 4 | - [Deferrable operators](./deferrable_operators/README.md) 5 | - [Setup and Teardown tasks](./setup_teardown_tasks/README.md) 6 | 7 | ### Blog 8 | This code is intended to support the AWS blog post "Introducing Amazon MWAA support for Apache Airflow version 2.7.2 and deferrable operators" 9 | 10 | ### Versions Supported 11 | Apache Airflow 2.7.2 on Amazon MWAA. 12 | 13 | ### Setup 14 | The respective folders have instructions as to how you can setup the DAGs to enable the features in your own Amazon MWAA Airflow 2.7.2 environment. 15 | 16 | ### Requirements.txt needed 17 | None 18 | 19 | ### Plugins needed 20 | None 21 | 22 | ### Explanation 23 | For easier readability, the respective DAG file code has inline comments to help with explanation. 24 | 25 | ## Security 26 | 27 | See [CONTRIBUTING](https://github.com/aws-samples/amazon-mwaa-examples/blob/main/CONTRIBUTING.md) for more information. 28 | 29 | ## License 30 | 31 | This library is licensed under the MIT-0 License. See the [LICENSE](https://github.com/aws-samples/amazon-mwaa-examples/blob/main/LICENSE) file. 32 | -------------------------------------------------------------------------------- /usecases/mwaa-glue-athena/MWAAEnvironmentBucket/dags/run_athena_mapped_dynamic-dataset.py: -------------------------------------------------------------------------------- 1 | 2 | from airflow.decorators import dag, task 3 | from airflow.providers.amazon.aws.hooks.s3 import S3Hook 4 | from airflow.providers.amazon.aws.operators.athena import AthenaOperator 5 | from airflow.datasets import Dataset 6 | from datetime import datetime 7 | import os, mwaa_config 8 | 9 | DAG_ID = os.path.basename(__file__).replace(".py", "") 10 | 11 | @task() 12 | def get_queries_fn(): 13 | s3 = S3Hook() 14 | data = s3.list_keys(bucket_name=mwaa_config.BUCKET_NAME, prefix=mwaa_config.ATHENA_KEY, start_after_key=mwaa_config.ATHENA_KEY) 15 | return data 16 | 17 | @task() 18 | def data_from_s3(key_name): 19 | s3 = S3Hook() 20 | data = s3.read_key(bucket_name=mwaa_config.BUCKET_NAME, key=key_name) 21 | return data 22 | 23 | @dag( 24 | dag_id = DAG_ID, 25 | start_date=datetime(2022, 1, 1), 26 | catchup=False, 27 | schedule=[Dataset(mwaa_config.DATASET_NAME)], 28 | ) 29 | def sql_dag(): 30 | get_queries=get_queries_fn() 31 | get_data_from_s3=data_from_s3.expand(key_name=get_queries) 32 | create_table_agg = AthenaOperator.partial( 33 | task_id="run_sql_athena", 34 | database='curated_db', 35 | output_location=mwaa_config.RESULTS_LOCATION 36 | ).expand(query=get_data_from_s3) 37 | 38 | sql_dag_instance = sql_dag() 39 | -------------------------------------------------------------------------------- /usecases/start-stop-mwaa-environment/.eslintrc.json: -------------------------------------------------------------------------------- 1 | 2 | 3 | { 4 | "env": { 5 | "browser": true, 6 | "es6": true 7 | }, 8 | "extends": ["prettier", "plugin:@typescript-eslint/recommended"], 9 | "globals": { 10 | "Atomics": "readonly", 11 | "SharedArrayBuffer": "readonly" 12 | }, 13 | "parser": "@typescript-eslint/parser", 14 | "parserOptions": { 15 | "ecmaVersion": 2018 16 | }, 17 | "plugins": ["@typescript-eslint", "prettier"], 18 | "rules": { 19 | "prettier/prettier": ["error"], 20 | "@typescript-eslint/camelcase": "off", 21 | "@typescript-eslint/no-non-null-assertion": "off", 22 | "import/prefer-default-export": "off", 23 | "no-console": "off", 24 | "@typescript-eslint/no-unused-vars": "off", 25 | "no-use-before-define": "off", 26 | "no-unused-vars": "off", 27 | "@typescript-eslint/no-var-requires": "off" 28 | }, 29 | "overrides": [ 30 | { 31 | "files": ["**/test/**/*.test.ts"], 32 | "env": { 33 | "jest": true 34 | }, 35 | "plugins": ["jest"], 36 | "rules": { 37 | "@typescript-eslint/no-unused-vars": "off", 38 | "@typescript-eslint/no-explicit-any": "off", 39 | "@typescript-eslint/no-non-null-assertion": "off", 40 | "@typescript-eslint/explicit-function-return-type": "off", 41 | "@typescript-eslint/no-var-requires": "off" 42 | } 43 | } 44 | ] 45 | } 46 | -------------------------------------------------------------------------------- /requirements/gcp_backport/README.md: -------------------------------------------------------------------------------- 1 | ### Amazon Managed Workflows for Apache Airflow (MWAA) and GCP Backport Providers 2 | 3 | Use Amazon Managed Workflows for Apache Airflow (MWAA) with Apache Airflow GCP backport providers. 4 | 5 | ### Versions Supported 6 | 7 | Apache Airflow 1.10.12 on Amazon MWAA 8 | 9 | ### Setup 10 | 11 | Copy the file into your MWAA S3 bucket and update your environment to use this version. May be combined with other requirements. 12 | See [Amazon MWAA documentation](https://docs.aws.amazon.com/mwaa/latest/userguide/working-dags-dependencies.html) for more details. 13 | 14 | ### Files 15 | 16 | * [1.10/requirements_gcp_backport.txt](1.10/requirements_gcp_backport.txt) 17 | 18 | ### Explanation 19 | 20 | This file specifies specific versions that are known to work on Apache Airflow 1.10.12 on Amazon Linux. For more details on backport providers 21 | see [Apache Airflow Backport Providers](https://airflow.apache.org/docs/apache-airflow/stable/backport-providers.html) for details. 22 | ``` 23 | grpcio==1.27.2 24 | cython==0.29.21 25 | pandas-gbq==0.13.3 26 | cryptography==3.3.2 27 | apache-airflow-backport-providers-amazon[google] 28 | ``` 29 | ## Security 30 | 31 | See [CONTRIBUTING](../../blob/main/CONTRIBUTING.md#security-issue-notifications) for more information. 32 | 33 | ## License 34 | 35 | This library is licensed under the MIT-0 License. See the [LICENSE](../../LICENSE) file. 36 | 37 | -------------------------------------------------------------------------------- /usecases/mwaa-public-webserver-custom-domain/src/cfn-custom-resources/generate-secret/cfn-response.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | // SPDX-License-Identifier: MIT-0 3 | 4 | import { request } from "https"; 5 | 6 | export enum Status { 7 | "SUCCESS" = "SUCCESS", 8 | "FAILED" = "FAILED", 9 | } 10 | 11 | export async function sendCfnResponse(props: { 12 | event: { 13 | StackId: string; 14 | RequestId: string; 15 | LogicalResourceId: string; 16 | ResponseURL: string; 17 | }; 18 | status: Status; 19 | reason?: string; 20 | data?: { 21 | [key: string]: string; 22 | }; 23 | physicalResourceId?: string; 24 | }) { 25 | const response = { 26 | Status: props.status, 27 | Reason: props.reason?.toString() || "See CloudWatch logs", 28 | PhysicalResourceId: props.physicalResourceId || "no-explicit-id", 29 | StackId: props.event.StackId, 30 | RequestId: props.event.RequestId, 31 | LogicalResourceId: props.event.LogicalResourceId, 32 | Data: props.data || {}, 33 | }; 34 | 35 | await new Promise((resolve, reject) => { 36 | const options = { 37 | method: "PUT", 38 | headers: { "content-type": "" }, 39 | }; 40 | request(props.event.ResponseURL, options) 41 | .on("error", (err) => { 42 | reject(err); 43 | }) 44 | .end(JSON.stringify(response), "utf8", resolve); 45 | }); 46 | } 47 | -------------------------------------------------------------------------------- /usecases/mwaa-public-webserver-custom-domain/src/cfn-custom-resources/lambda-code-update/cfn-response.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | // SPDX-License-Identifier: MIT-0 3 | 4 | import { request } from "https"; 5 | 6 | export enum Status { 7 | "SUCCESS" = "SUCCESS", 8 | "FAILED" = "FAILED", 9 | } 10 | 11 | export async function sendCfnResponse(props: { 12 | event: { 13 | StackId: string; 14 | RequestId: string; 15 | LogicalResourceId: string; 16 | ResponseURL: string; 17 | }; 18 | status: Status; 19 | reason?: string; 20 | data?: { 21 | [key: string]: string; 22 | }; 23 | physicalResourceId?: string; 24 | }) { 25 | const response = { 26 | Status: props.status, 27 | Reason: props.reason?.toString() || "See CloudWatch logs", 28 | PhysicalResourceId: props.physicalResourceId || "no-explicit-id", 29 | StackId: props.event.StackId, 30 | RequestId: props.event.RequestId, 31 | LogicalResourceId: props.event.LogicalResourceId, 32 | Data: props.data || {}, 33 | }; 34 | 35 | await new Promise((resolve, reject) => { 36 | const options = { 37 | method: "PUT", 38 | headers: { "content-type": "" }, 39 | }; 40 | request(props.event.ResponseURL, options) 41 | .on("error", (err) => { 42 | reject(err); 43 | }) 44 | .end(JSON.stringify(response), "utf8", resolve); 45 | }); 46 | } 47 | -------------------------------------------------------------------------------- /usecases/mwaa-public-webserver-custom-domain/src/cfn-custom-resources/user-pool-client/cfn-response.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | // SPDX-License-Identifier: MIT-0 3 | 4 | import { request } from "https"; 5 | 6 | export enum Status { 7 | "SUCCESS" = "SUCCESS", 8 | "FAILED" = "FAILED", 9 | } 10 | 11 | export async function sendCfnResponse(props: { 12 | event: { 13 | StackId: string; 14 | RequestId: string; 15 | LogicalResourceId: string; 16 | ResponseURL: string; 17 | }; 18 | status: Status; 19 | reason?: string; 20 | data?: { 21 | [key: string]: string; 22 | }; 23 | physicalResourceId?: string; 24 | }) { 25 | const response = { 26 | Status: props.status, 27 | Reason: props.reason?.toString() || "See CloudWatch logs", 28 | PhysicalResourceId: props.physicalResourceId || "no-explicit-id", 29 | StackId: props.event.StackId, 30 | RequestId: props.event.RequestId, 31 | LogicalResourceId: props.event.LogicalResourceId, 32 | Data: props.data || {}, 33 | }; 34 | 35 | await new Promise((resolve, reject) => { 36 | const options = { 37 | method: "PUT", 38 | headers: { "content-type": "" }, 39 | }; 40 | request(props.event.ResponseURL, options) 41 | .on("error", (err) => { 42 | reject(err); 43 | }) 44 | .end(JSON.stringify(response), "utf8", resolve); 45 | }); 46 | } 47 | -------------------------------------------------------------------------------- /usecases/mwaa-public-webserver-custom-domain/src/cfn-custom-resources/user-pool-domain/cfn-response.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | // SPDX-License-Identifier: MIT-0 3 | 4 | import { request } from "https"; 5 | 6 | export enum Status { 7 | "SUCCESS" = "SUCCESS", 8 | "FAILED" = "FAILED", 9 | } 10 | 11 | export async function sendCfnResponse(props: { 12 | event: { 13 | StackId: string; 14 | RequestId: string; 15 | LogicalResourceId: string; 16 | ResponseURL: string; 17 | }; 18 | status: Status; 19 | reason?: string; 20 | data?: { 21 | [key: string]: string; 22 | }; 23 | physicalResourceId?: string; 24 | }) { 25 | const response = { 26 | Status: props.status, 27 | Reason: props.reason?.toString() || "See CloudWatch logs", 28 | PhysicalResourceId: props.physicalResourceId || "no-explicit-id", 29 | StackId: props.event.StackId, 30 | RequestId: props.event.RequestId, 31 | LogicalResourceId: props.event.LogicalResourceId, 32 | Data: props.data || {}, 33 | }; 34 | 35 | await new Promise((resolve, reject) => { 36 | const options = { 37 | method: "PUT", 38 | headers: { "content-type": "" }, 39 | }; 40 | request(props.event.ResponseURL, options) 41 | .on("error", (err) => { 42 | reject(err); 43 | }) 44 | .end(JSON.stringify(response), "utf8", resolve); 45 | }); 46 | } 47 | -------------------------------------------------------------------------------- /usecases/mwaa-public-webserver-custom-domain/src/cfn-custom-resources/client-secret-retrieval/cfn-response.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | // SPDX-License-Identifier: MIT-0 3 | 4 | import { request } from "https"; 5 | 6 | export enum Status { 7 | "SUCCESS" = "SUCCESS", 8 | "FAILED" = "FAILED", 9 | } 10 | 11 | export async function sendCfnResponse(props: { 12 | event: { 13 | StackId: string; 14 | RequestId: string; 15 | LogicalResourceId: string; 16 | ResponseURL: string; 17 | }; 18 | status: Status; 19 | reason?: string; 20 | data?: { 21 | [key: string]: string; 22 | }; 23 | physicalResourceId?: string; 24 | }) { 25 | const response = { 26 | Status: props.status, 27 | Reason: props.reason?.toString() || "See CloudWatch logs", 28 | PhysicalResourceId: props.physicalResourceId || "no-explicit-id", 29 | StackId: props.event.StackId, 30 | RequestId: props.event.RequestId, 31 | LogicalResourceId: props.event.LogicalResourceId, 32 | Data: props.data || {}, 33 | }; 34 | 35 | await new Promise((resolve, reject) => { 36 | const options = { 37 | method: "PUT", 38 | headers: { "content-type": "" }, 39 | }; 40 | request(props.event.ResponseURL, options) 41 | .on("error", (err) => { 42 | reject(err); 43 | }) 44 | .end(JSON.stringify(response), "utf8", resolve); 45 | }); 46 | } 47 | -------------------------------------------------------------------------------- /usecases/mwaa-public-webserver-custom-domain/src/cfn-custom-resources/us-east-1-lambda-stack/cfn-response.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | // SPDX-License-Identifier: MIT-0 3 | 4 | import { request } from "https"; 5 | 6 | export enum Status { 7 | "SUCCESS" = "SUCCESS", 8 | "FAILED" = "FAILED", 9 | } 10 | 11 | export async function sendCfnResponse(props: { 12 | event: { 13 | StackId: string; 14 | RequestId: string; 15 | LogicalResourceId: string; 16 | ResponseURL: string; 17 | }; 18 | status: Status; 19 | reason?: string; 20 | data?: { 21 | [key: string]: string; 22 | }; 23 | physicalResourceId?: string; 24 | }) { 25 | const response = { 26 | Status: props.status, 27 | Reason: props.reason?.toString() || "See CloudWatch logs", 28 | PhysicalResourceId: props.physicalResourceId || "no-explicit-id", 29 | StackId: props.event.StackId, 30 | RequestId: props.event.RequestId, 31 | LogicalResourceId: props.event.LogicalResourceId, 32 | Data: props.data || {}, 33 | }; 34 | 35 | await new Promise((resolve, reject) => { 36 | const options = { 37 | method: "PUT", 38 | headers: { "content-type": "" }, 39 | }; 40 | request(props.event.ResponseURL, options) 41 | .on("error", (err) => { 42 | reject(err); 43 | }) 44 | .end(JSON.stringify(response), "utf8", resolve); 45 | }); 46 | } 47 | -------------------------------------------------------------------------------- /dags/bash_operator_script/1.10/bash_operator.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy of 5 | this software and associated documentation files (the "Software"), to deal in 6 | the Software without restriction, including without limitation the rights to 7 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 8 | the Software, and to permit persons to whom the Software is furnished to do so. 9 | 10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 11 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 12 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 13 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 14 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 15 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 16 | """ 17 | 18 | import os 19 | from airflow import DAG 20 | from airflow.operators.bash_operator import BashOperator 21 | from airflow.utils.dates import days_ago 22 | DAG_ID = os.path.basename(__file__).replace(".py", "") 23 | with DAG(dag_id=DAG_ID, schedule_interval=None, catchup=False, start_date=days_ago(1)) as dag: 24 | cli_command = BashOperator( 25 | task_id="bash_command", 26 | bash_command='{{ dag_run.conf["command"] if dag_run else "" }}' 27 | ) 28 | -------------------------------------------------------------------------------- /blueprints/examples/EMR_on_EKS/infra/cdk/setup.py: -------------------------------------------------------------------------------- 1 | 2 | import setuptools 3 | 4 | 5 | setuptools.setup( 6 | name="emr_eks_cdk", 7 | version="0.0.1", 8 | 9 | description="EMR on EKS app", 10 | long_description="EMR on EKS ", 11 | long_description_content_type="text/markdown", 12 | 13 | author="author", 14 | 15 | package_dir={"": "stacks"}, 16 | packages=setuptools.find_packages(where="stacks"), 17 | 18 | install_requires=[ 19 | "aws-cdk.core==2.31.2", 20 | "aws-cdk.aws-emrcontainers==2.31.2", 21 | "aws-cdk.aws-eks==2.31.2", 22 | "aws-cdk.aws-ec2==2.31.2", 23 | "aws-cdk.aws-emr==2.31.2", 24 | "aws-cdk.aws_acmpca==2.31.2", 25 | "aws-cdk.aws-s3-deployment==2.31.2", 26 | "pyOpenSSL", 27 | "boto3", 28 | "awscli" 29 | ], 30 | 31 | python_requires=">=3.6", 32 | 33 | classifiers=[ 34 | "Development Status :: 4 - Beta", 35 | 36 | "Intended Audience :: Developers", 37 | 38 | "License :: OSI Approved :: Apache Software License", 39 | 40 | "Programming Language :: JavaScript", 41 | "Programming Language :: Python :: 3 :: Only", 42 | "Programming Language :: Python :: 3.6", 43 | "Programming Language :: Python :: 3.7", 44 | "Programming Language :: Python :: 3.8", 45 | 46 | "Topic :: Software Development :: Code Generators", 47 | "Topic :: Utilities", 48 | 49 | "Typing :: Typed", 50 | ], 51 | ) 52 | -------------------------------------------------------------------------------- /usecases/mwaa_utilization_cw_metric/infra/terraform/outputs.tf: -------------------------------------------------------------------------------- 1 | output "mwaa_webserver_url" { 2 | description = "The webserver URL of the MWAA Environment" 3 | value = module.mwaa.mwaa_webserver_url 4 | } 5 | 6 | output "mwaa_arn" { 7 | description = "The ARN of the MWAA Environment" 8 | value = module.mwaa.mwaa_arn 9 | } 10 | 11 | 12 | output "mwaa_role_arn" { 13 | description = "The ARN of the MWAA Environment" 14 | value = module.mwaa.mwaa_role_arn 15 | } 16 | 17 | output "mwaa_role_name" { 18 | description = "The name of the MWAA Environment" 19 | value = module.mwaa.mwaa_role_name 20 | } 21 | 22 | output "mwaa_security_group_id" { 23 | description = "The ARN of the MWAA Environment" 24 | value = module.mwaa.mwaa_security_group_id 25 | } 26 | 27 | output "glue_data_bucket_name" { 28 | description = "The data bucket name" 29 | value = aws_s3_bucket.aws_glue_mwaa_bucket.id 30 | } 31 | 32 | output "mwaa_bucket_name" { 33 | description = "The mwaa bucket name" 34 | value = aws_s3_bucket.mwaa_bucket.id 35 | } 36 | output "glue_service_role_arn" { 37 | description = "Glue Service Role" 38 | value = aws_iam_role.glue_service_role.arn 39 | } 40 | output "glue_service_role_name" { 41 | description = "Glue Service Role" 42 | value = aws_iam_role.glue_service_role.name 43 | } 44 | output "glue_mwaa_iam_policy_arn" { 45 | description = "Glue Service Role" 46 | value = aws_iam_policy.glue_mwaa_iam_policy.arn 47 | } 48 | -------------------------------------------------------------------------------- /infra/cloudformation/README.md: -------------------------------------------------------------------------------- 1 | 2 | # Welcome to your MWAA Cloudformation project! 3 | 4 | Welcome to Amazon MWAA Blueprints ! 5 | 6 | This repository contains a collection of code that aim to make it easier and faster for customers to adopt Amazon MWAA. It can be used by AWS customers, partners, and internal AWS teams to configure and manage complete MWAA environment that are fully bootstrapped with the operational software that is needed to deploy and operate workloads. 7 | 8 | 9 | 10 | ## Getting Started 11 | 12 | ### Prerequisites 13 | 14 | First, ensure that you have installed the following tools locally. 15 | 16 | 1. [aws cli](https://docs.aws.amazon.com/cli/latest/userguide/install-cliv2.html) 17 | 2. [MWAA deployer Permissions](https://docs.aws.amazon.com/mwaa/latest/userguide/access-policies.html#full-access-policy) 18 | 19 | ### Deployment Steps 20 | 1. To create MWAA environment with VPC with NAT/IGW 21 | https://docs.aws.amazon.com/mwaa/latest/userguide/quick-start.html 22 | 2. To create MWAA environment with no NAT/IGW but with VPCEndpoints 23 | 24 | - Run the command below after replacing your_bucket_name with the S3 Bucket where DAGs are present 25 | ``` 26 | aws cloudformation create-stack --stack-name mwaa-environment-private-network --template-body file://template.yaml --parameters ParameterKey=S3Bucket,ParameterValue=your_bucket_name --capabilities CAPABILITY_IAM 27 | 28 | ``` 29 | ### Cleanup 30 | ``` 31 | aws cloudformation delete-stack --stack-name mwaa-environment-private-network 32 | ``` -------------------------------------------------------------------------------- /dags/airflow-243-examples/python_version_checker/README.md: -------------------------------------------------------------------------------- 1 | ## Amazon Managed Workflows for Apache Airflow (MWAA) CloudFormation Templates 2 | 3 | Example CloudFormation templates for Amazon MWAA. See [AWS CloudFormation documentation](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-mwaa-environment.html) for details. 4 | 5 | ### Versions Supported 6 | Apache Airflow 2.4.3 on Amazon MWAA. 7 | 8 | ### Setup 9 | **Pre-Requisites** 10 | - An Amazon MWAA environment configured with Apache Airflow v2.4.3. 11 | 12 | **Steps** 13 | 1. Upload the `python-version.py` file to S3 Bucket that is configured for your MWAA environment. 14 | 2. Enable the DAGs with id **"python_version_checker"** once they appear in the Airflow UI. 15 | 6. Manually trigger the DAG with id **"python_version_checker"** from your Airflow UI. 16 | 7. Wait for it to complete execution. Review the Airflow logs. You would find the statement "Python 3.10.8". This represents the installed Python version on your Amazon MWAA environment. 17 | 18 | ### Files 19 | python-version.py 20 | 21 | ### Requirements.txt needed 22 | None 23 | 24 | ### Plugins needed 25 | None 26 | 27 | ### Explanation 28 | For easier readability, the respective DAG file code has inline comments to help with explanation. 29 | 30 | ## Security 31 | 32 | See [CONTRIBUTING](../blob/main/CONTRIBUTING.md#security-issue-notifications) for more information. 33 | 34 | ## License 35 | 36 | This library is licensed under the MIT-0 License. See the [LICENSE](../blob/main/LICENSE) file. 37 | -------------------------------------------------------------------------------- /blueprints/examples/EMR_on_EKS/infra/terraform/variables.tf: -------------------------------------------------------------------------------- 1 | variable "eks_cluster_id" { 2 | description = "EKS Cluster ID" 3 | type = string 4 | default = "emr_eks_cluster" 5 | } 6 | 7 | variable "cluster_version" { 8 | description = "EKS cluster version" 9 | type = string 10 | default = "1.23" 11 | } 12 | 13 | variable "name" { 14 | description = "name" 15 | type = string 16 | default = "emr_eks_blueprint" 17 | } 18 | 19 | variable "data_bucket_name" { 20 | description = "S3 data bucket for EMR on EKS" 21 | type = string 22 | default = null 23 | } 24 | variable "tags" { 25 | description = "Common Tags for AWS resources" 26 | type = map(string) 27 | default = {} 28 | } 29 | 30 | variable "vpc_id" { 31 | description = "VPC of the EKS cluster" 32 | type = string 33 | } 34 | 35 | variable "private_subnet_ids" { 36 | description = <<-EOD 37 | (Required) The private subnet IDs in which EKS cluster should be created - format ["a","b"]. 38 | EOD 39 | type = list(string) 40 | } 41 | 42 | variable "instance_types" { 43 | description = "managed node instance type " 44 | type = list 45 | default = ["m5.large"] 46 | } 47 | 48 | variable "iam_role_path" { 49 | description = "IAM role path" 50 | type = string 51 | default = "/" 52 | } 53 | 54 | variable "iam_role_permissions_boundary" { 55 | description = "ARN of the policy that is used to set the permissions boundary for the IAM role" 56 | type = string 57 | default = null 58 | } 59 | -------------------------------------------------------------------------------- /usecases/start-stop-mwaa-environment/lib/infrastructure/mwaa-base-stack.ts: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy of 5 | this software and associated documentation files (the "Software"), to deal in 6 | the Software without restriction, including without limitation the rights to 7 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 8 | the Software, and to permit persons to whom the Software is furnished to do so. 9 | 10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 11 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 12 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 13 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 14 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 15 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 16 | */ 17 | 18 | import * as cdk from 'aws-cdk-lib'; 19 | import * as construct from 'constructs'; 20 | 21 | export class MwaaBaseStack extends cdk.NestedStack { 22 | readonly prefix: string; 23 | 24 | constructor(scope: construct.Construct, id: string, props?: cdk.StackProps) { 25 | super(scope, id, props); 26 | this.prefix = id; 27 | } 28 | 29 | getName(resourceName: string): string { 30 | const newPrefix = this.prefix.replace(/-stack/g, ''); 31 | return `${newPrefix}-${resourceName}`; 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /usecases/start-stop-mwaa-environment/bin/mwaa-pause-resume.ts: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | /* 4 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of 7 | this software and associated documentation files (the "Software"), to deal in 8 | the Software without restriction, including without limitation the rights to 9 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 10 | the Software, and to permit persons to whom the Software is furnished to do so. 11 | 12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 14 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 15 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 16 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 17 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 18 | */ 19 | 20 | import * as cdk from 'aws-cdk-lib'; 21 | 22 | import configuration from '../lib/commons/config'; 23 | import { MwaaMainStack } from '../lib/infrastructure/mwaa-main-stack'; 24 | 25 | const config = configuration(); 26 | const app = new cdk.App(); 27 | 28 | const mwaaMainStackName = config.mainStackName; 29 | new MwaaMainStack(app, mwaaMainStackName, { 30 | stackName: mwaaMainStackName, 31 | ...config, 32 | env: { 33 | account: config.account, 34 | region: config.region, 35 | }, 36 | }); 37 | -------------------------------------------------------------------------------- /usecases/mwaa-public-webserver-custom-domain/src/lambda-edge/check-auth/configuration.json: -------------------------------------------------------------------------------- 1 | { 2 | "userPoolArn": "arn:aws:cognito-idp:ap-southeast-1:915455005342:userpool/ap-southeast-1_K5oNQcV4u", 3 | "clientId": "4ebim9squ4p2gsll467dl2k1j0", 4 | "oauthScopes": [ 5 | "phone", 6 | "email", 7 | "profile", 8 | "openid", 9 | "aws.cognito.signin.user.admin" 10 | ], 11 | "cognitoAuthDomain": "auth-b2661cc0-83f7-11ec-a9ac-063b73cbe662.auth.ap-southeast-1.amazoncognito.com", 12 | "redirectPathSignIn": "/parseauth", 13 | "redirectPathSignOut": "/", 14 | "redirectPathAuthRefresh": "/refreshauth", 15 | "cookieSettings": { 16 | "idToken": null, 17 | "accessToken": null, 18 | "refreshToken": null, 19 | "nonce": null 20 | }, 21 | "httpHeaders": { 22 | "Content-Security-Policy": "default-src 'none'; img-src 'self'; script-src 'self' https://code.jquery.com https://stackpath.bootstrapcdn.com; style-src 'self' 'unsafe-inline' https://stackpath.bootstrapcdn.com; object-src 'none'; connect-src 'self' https://*.amazonaws.com https://*.amazoncognito.com", 23 | "Strict-Transport-Security": "max-age=31536000; includeSubdomains; preload", 24 | "Referrer-Policy": "same-origin", 25 | "X-XSS-Protection": "1; mode=block", 26 | "X-Frame-Options": "DENY", 27 | "X-Content-Type-Options": "nosniff" 28 | }, 29 | "logLevel": "debug", 30 | "nonceSigningSecret": "oqKWVxMZlys87v.t", 31 | "cookieCompatibility": "amplify", 32 | "additionalCookies": {}, 33 | "requiredGroup": "", 34 | "mwaaEnvironmentName": "Airflow_2_Environment" 35 | } 36 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | package-lock.json 3 | __pycache__ 4 | .pytest_cache 5 | venv/ 6 | *.egg-info 7 | 8 | 9 | # IDE 10 | .idea/ 11 | .vscode/ 12 | .venv/ 13 | tmp/ 14 | **/.terraform/* 15 | **/.terraform* 16 | # .tfstate files 17 | *.tfstate 18 | *.tfstate.* 19 | terraform.out 20 | # Crash log files 21 | crash.log 22 | crash.*.log 23 | # Exclude all .tfvars files, which are likely to contain sensitive data, such as 24 | # password, private keys, and other secrets. These should not be part of version 25 | # control as they are data points which are potentially sensitive and subject 26 | # to change depending on the environment. 27 | *.tfvars 28 | *.tfvars.json 29 | 30 | # Ignore override files as they are usually used to override resources locally and so 31 | # are not checked in 32 | override.tf 33 | override.tf.json 34 | *_override.tf 35 | *_override.tf.json 36 | 37 | # Include override files you do wish to add to version control using negated pattern 38 | # !example_override.tf 39 | 40 | # Include tfplan files to ignore the plan output of command: terraform plan -out=tfplan 41 | # example: *tfplan* 42 | 43 | # Ignore CLI configuration files 44 | .terraformrc 45 | terraform.rc 46 | # Dependency directories 47 | node_modules/ 48 | jspm_packages/ 49 | # Optional npm cache directory 50 | .npm 51 | 52 | # Optional eslint cache 53 | .eslintcache 54 | **/cdk.out 55 | cdk.out 56 | **/cdk.context.json 57 | package-lock.json 58 | # Optional REPL history 59 | .node_repl_history 60 | # Output of 'npm pack' 61 | *.tgz 62 | # General 63 | .DS_Store 64 | .AppleDouble 65 | .LSOverride 66 | __pycache__ 67 | -------------------------------------------------------------------------------- /usecases/start-stop-mwaa-environment/jest.config.js: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy of 5 | this software and associated documentation files (the "Software"), to deal in 6 | the Software without restriction, including without limitation the rights to 7 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 8 | the Software, and to permit persons to whom the Software is furnished to do so. 9 | 10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 11 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 12 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 13 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 14 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 15 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 16 | */ 17 | 18 | module.exports = { 19 | preset: 'ts-jest', 20 | transform: { 21 | '^.+\\.tsx?$': 'ts-jest', 22 | }, 23 | testRegex: '(/__tests__/.*|(\\.|/)(test|spec))\\.(tsx?)$', 24 | moduleFileExtensions: ['ts', 'tsx', 'js', 'jsx', 'json'], 25 | collectCoverage: true, 26 | collectCoverageFrom: ['lib/**/*.{ts,tsx}'], 27 | coveragePathIgnorePatterns: ['.d.ts'], 28 | coverageReporters: ['clover', 'json', 'text', 'lcov', 'cobertura'], 29 | reporters: ['default', ['jest-junit', { suiteName: 'unit tests' }]], 30 | testEnvironment: 'node', 31 | }; 32 | -------------------------------------------------------------------------------- /blueprints/examples/EKS/infra/cdk/app.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os 3 | 4 | from aws_cdk import ( 5 | aws_ec2, 6 | App, 7 | Environment 8 | ) 9 | 10 | from stacks.eks_setup import EKSRoleStack 11 | from stacks.mwaa_cdk_backend import VpcStack 12 | from stacks.mwaa_cdk_env import MwaaCdkStackEnv 13 | from stacks.s3 import S3Stack 14 | from stacks.s3_deploy import S3SDeploytack 15 | 16 | from stacks.common import constants 17 | 18 | default_env = Environment( 19 | region=os.environ['CDK_DEFAULT_REGION'], 20 | account=os.environ['CDK_DEFAULT_ACCOUNT'] 21 | ) 22 | 23 | app = App() 24 | 25 | vpc: aws_ec2.Vpc = VpcStack( 26 | scope=app, 27 | construct_id=f'{constants.ID}-infra-vpc', 28 | env=default_env, 29 | cidr=constants.CIDR, 30 | max_azs=constants.MAZ_AZS, 31 | cidr_mask=constants.CIDR_MASK 32 | ).vpc 33 | 34 | b_stack = S3Stack( 35 | scope=app, 36 | construct_id=f'{constants.ID}-infra-bucket', 37 | env=default_env, 38 | bucket_name=constants.DAGS_S3_LOCATION, 39 | vpc=vpc 40 | ) 41 | 42 | mwaa_env = MwaaCdkStackEnv( 43 | scope=app, 44 | construct_id=f'{constants.ID}-infra-env', 45 | env=default_env, 46 | vpc=vpc 47 | ) 48 | mwaa_env.add_dependency(b_stack) 49 | 50 | deploy = S3SDeploytack(app, f'{constants.ID}-s3-deploy', env=default_env, bucket=b_stack.bucket) 51 | deploy.add_dependency(b_stack) 52 | 53 | # create a role for the EKS nodegroup 54 | eks_nodegroup_role = EKSRoleStack(app, f'{constants.ID}-eks-deps', env=default_env) 55 | eks_nodegroup_role.add_dependency(mwaa_env) 56 | 57 | app.synth() 58 | -------------------------------------------------------------------------------- /usecases/mwaa-snowflake-integration/README.md: -------------------------------------------------------------------------------- 1 | # Using Snowflake with Amazon MWAA for Orchestrating Data Pipelines 2 | 3 | Customers rely on data from different sources such as mobile applications, clickstream events from websites, historical data and more to deduce meaningful patterns in order to optimize their products, services and processes. Using a data pipeline, which is a set of tasks used to automate the movement and transformation of data between different systems can reduce the time and effort needed to gain insights from the data. Apache Airflow and Snowflake have emerged as powerful technologies for data management and analysis. 4 | 5 | Amazon Managed Workflows for Apache Airflow (Amazon MWAA) is a managed workflow orchestration service for Apache Airflow that makes it simple to set up and operate end-to-end data pipelines in the cloud at scale. Snowflake Data Cloud platform provides a single source of truth for all your data needs and allows organizations to store, analyze and share large amounts of data with ease. The Apache Airflow open-source community provides over 1,000 pre-built operators (plugins that simplify connections to services) for Apache Airflow to build data pipelines. 6 | 7 | Scripts in this Repo are based on [blog post](https://), where we provide an overview of orchestrating your data pipeline using Snowflake operators in your Amazon MWAA environment. We will define the steps needed to setup the integration between Amazon MWAA and Snowflake. The solution will provide an end-to-end automated workflow which includes data ingestion, transformation, analytics and consumption. 8 | 9 | -------------------------------------------------------------------------------- /usecases/mwaa-observability-enhancement/README.md: -------------------------------------------------------------------------------- 1 | # Improve Observability across MWAA tasks 2 | 3 | Observability across the different processes within the data pipeline is a key component to monitor the success and/or failure of the pipeline. While scheduling the execution of tasks within the data pipeline is controlled by Airflow, the execution of the task itself (transforming, normalizing and/or aggregating data) is done by different services based on the use case. Having an end-to-end view of the data flow is a challenge due to multiple touch points in the data pipeline. 4 | 5 | The DAG file and scripts in this project define how we can use a correlation_id across tasks within a DAG. Having the unique identifier helps improve the end-to-end observability for a DAG run. It helps to reduce the time to look through different log sources and enables for faster troubleshooting of a DAG run. 6 | 7 | Refer to the following [blog]() for more detials 8 | 9 | ## Architecture 10 | 11 | ![Correlation ID across DAG run](./images/mwaa_observability.png) 12 | 13 | ## Prerequistes for Deployment 14 | Execute the [MWAA analytics workshop](https://catalog.us-east-1.prod.workshops.aws/workshops/795e88bb-17e2-498f-82d1-2104f4824168/en-US) and then use the scripts present in the github repo to gain more observability of your DAG run 15 | 16 | - Place the DAG file 'data_pieplie.py' in the S3 bucket 'dags' folder 17 | - Place the script files in the S3 bucket 'scripts' folder 18 | 19 | --- 20 | 21 | ## License 22 | 23 | This library is licensed under the MIT-0 License. See the [LICENSE](LICENSE) file. 24 | -------------------------------------------------------------------------------- /blueprints/examples/ECS/infra/spark_image/app.py: -------------------------------------------------------------------------------- 1 | """ 2 | -*- coding: utf-8 -*- 3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | # this software and associated documentation files (the "Software"), to deal in 7 | # the Software without restriction, including without limitation the rights to 8 | # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | # the Software, and to permit persons to whom the Software is furnished to do so. 10 | 11 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 13 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 14 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 15 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 16 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 17 | """ 18 | 19 | from pyspark.sql.functions import col, expr 20 | from pyspark.sql import SparkSession 21 | SPARK = SparkSession.builder.appName('MWAASparkBluePrint.com').getOrCreate() 22 | 23 | 24 | DATA = [("2019-01-23", 1), ("2019-06-24", 2), ("2019-09-20", 3)] 25 | # Create Spark data frame by incrementing it by 1 26 | SPARK.createDataFrame(DATA).toDF("date", "increment") \ 27 | .select(col("date"), col("increment"), \ 28 | expr("add_months(to_date(date,'yyyy-MM-dd'), cast(increment as int))").alias("inc_date")) \ 29 | .show() 30 | -------------------------------------------------------------------------------- /usecases/mwaa-glue-athena/MWAAEnvironmentBucket/dags/run_athena.py: -------------------------------------------------------------------------------- 1 | from airflow.decorators import dag, task 2 | from airflow.providers.amazon.aws.hooks.s3 import S3Hook 3 | from airflow.providers.amazon.aws.operators.athena import AthenaOperator 4 | from airflow.datasets import Dataset 5 | from datetime import datetime 6 | import os, mwaa_config 7 | 8 | DAG_ID = os.path.basename(__file__).replace(".py", "") 9 | 10 | def data_from_s3(bucket_name,table_name): 11 | s3 = S3Hook() 12 | data = s3.read_key(bucket_name = mwaa_config.BUCKET_NAME, key = mwaa_config.ATHENA_KEY+table_name) 13 | return data 14 | 15 | @dag( 16 | dag_id = DAG_ID, 17 | start_date=datetime(2022, 1, 1), 18 | catchup=False, 19 | schedule=None, 20 | ) 21 | def sql_dag(): 22 | create_table_sporting_event_info_agg = AthenaOperator( 23 | task_id="create_table_sporting_event_info_agg", 24 | query=data_from_s3(mwaa_config.BUCKET_NAME,'sporting_event_info_agg'), 25 | database='curated_db', 26 | output_location=f"s3://" + mwaa_config.BUCKET_NAME + "/athena/create_table_sporting_event_info_agg/", 27 | ) 28 | 29 | create_table_sporting_event_ticket_info_agg = AthenaOperator( 30 | task_id="create_table_sporting_event_ticket_info_agg", 31 | query=data_from_s3(mwaa_config.BUCKET_NAME,'sporting_event_ticket_info_agg'), 32 | database='curated_db', 33 | output_location=f"s3://" + mwaa_config.BUCKET_NAME + "/athena/create_table_sporting_event_ticket_info_agg/", 34 | ) 35 | 36 | [create_table_sporting_event_info_agg,create_table_sporting_event_ticket_info_agg] 37 | 38 | sql_dag_instance = sql_dag() 39 | -------------------------------------------------------------------------------- /usecases/start-stop-mwaa-environment/lib/lambda/mwaa-update-environment-function.ts: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy of 5 | this software and associated documentation files (the "Software"), to deal in 6 | the Software without restriction, including without limitation the rights to 7 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 8 | the Software, and to permit persons to whom the Software is furnished to do so. 9 | 10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 11 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 12 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 13 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 14 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 15 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 16 | */ 17 | 18 | import { UpdateEnvironmentCommand, UpdateEnvironmentCommandInput, UpdateEnvironmentCommandOutput, MWAAClient } from '@aws-sdk/client-mwaa'; 19 | 20 | const client = new MWAAClient({}); 21 | 22 | export const handler = async (event: UpdateEnvironmentCommandInput): Promise => { 23 | console.info('Update MWAA Environment Event', event); 24 | 25 | const createEnvCommand = new UpdateEnvironmentCommand(event); 26 | const result = await client.send(createEnvCommand); 27 | 28 | console.info('Update MWAA Environment Result', result); 29 | return result; 30 | }; 31 | -------------------------------------------------------------------------------- /blueprints/examples/AWSGlue/post_provision.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | data_bucket=$(terraform -chdir="./infra/terraform" output -raw glue_data_bucket_name) 5 | 6 | aws s3 cp dags/weatherdata_processing.py s3://$1/dags/ 7 | aws s3 cp scripts/noaa_weatherdata_transform.py s3://$data_bucket/scripts/ 8 | 9 | ### Attach Glue MWAA policy to MWAA execution role 10 | glue_mwaa_iam_policy_arn=$(terraform -chdir="./infra/terraform" output -raw glue_mwaa_iam_policy_arn) 11 | aws iam attach-role-policy --policy-arn $glue_mwaa_iam_policy_arn --role-name $2 12 | 13 | 14 | ### Create MWAA env variables 15 | glue_service_role_arn=$(terraform -chdir="./infra/terraform" output -raw glue_service_role_arn) 16 | glue_service_role_name=$(terraform -chdir="./infra/terraform" output -raw glue_service_role_name) 17 | 18 | mwaa_cli_json=$(aws mwaa create-cli-token --name $3) 19 | CLI_TOKEN=$(echo $mwaa_cli_json | jq -r '.CliToken') 20 | WEB_SERVER_HOSTNAME=$(echo $mwaa_cli_json | jq -r '.WebServerHostname') 21 | CLIRESULTS=$(curl --request POST "https://$WEB_SERVER_HOSTNAME/aws_mwaa/cli" --header "Authorization: Bearer $CLI_TOKEN" --header "Content-Type: text/plain" --data-raw "variables set GLUE_SERVICE_ROLE_ARN $glue_service_role_arn") 22 | CLIRESULTS=$(curl --request POST "https://$WEB_SERVER_HOSTNAME/aws_mwaa/cli" --header "Authorization: Bearer $CLI_TOKEN" --header "Content-Type: text/plain" --data-raw "variables set DATA_BUCKET $data_bucket") 23 | CLIRESULTS=$(curl --request POST "https://$WEB_SERVER_HOSTNAME/aws_mwaa/cli" --header "Authorization: Bearer $CLI_TOKEN" --header "Content-Type: text/plain" --data-raw "variables set GLUE_SERVICE_ROLE_NAME $glue_service_role_name") 24 | 25 | 26 | -------------------------------------------------------------------------------- /usecases/mwaa-glue-athena/MWAAEnvironmentBucket/dags/sampledb/schema/functions/loadmlbteams.sql: -------------------------------------------------------------------------------- 1 | CREATE OR REPLACE FUNCTION dms_sample.loadmlbteams() 2 | RETURNS void 3 | AS 4 | $BODY$ 5 | DECLARE 6 | v_div DMS_SAMPLE.SPORT_DIVISION.short_name%TYPE; 7 | mlb_teams CURSOR FOR 8 | SELECT DISTINCT 9 | CASE TRIM(mlb_team) 10 | WHEN 'AAA' THEN 'LAA' 11 | ELSE mlb_team 12 | END AS a_name, 13 | CASE TRIM(mlb_team_long) 14 | WHEN 'Anaheim Angels' THEN 'Los Angeles Angels' 15 | ELSE mlb_team_long 16 | END AS l_name 17 | FROM dms_sample.mlb_data; 18 | BEGIN 19 | FOR trec IN mlb_teams LOOP 20 | CASE 21 | WHEN trec.a_name IN ('BAL', 'BOS', 'TOR', 'TB', 'NYY') THEN 22 | v_div := 'AL East'; 23 | WHEN trec.a_name IN ('CLE', 'DET', 'KC', 'CWS', 'MIN') THEN 24 | v_div := 'AL Central'; 25 | WHEN trec.a_name IN ('TEX', 'SEA', 'HOU', 'OAK', 'LAA') THEN 26 | v_div := 'AL West'; 27 | WHEN trec.a_name IN ('WSH', 'MIA', 'NYM', 'PHI', 'ATL') THEN 28 | v_div := 'NL East'; 29 | WHEN trec.a_name IN ('CHC', 'STL', 'PIT', 'MIL', 'CIN') THEN 30 | v_div := 'NL Central'; 31 | WHEN trec.a_name IN ('COL', 'SD', 'LAD', 'SF', 'ARI') THEN 32 | v_div := 'NL West'; 33 | END CASE; 34 | INSERT INTO dms_sample.sport_team (name, abbreviated_name, sport_type_name, sport_league_short_name, sport_division_short_name) 35 | VALUES (trec.l_name, trec.a_name, 'baseball', 'MLB', v_div); 36 | END LOOP; 37 | END; 38 | $BODY$ 39 | LANGUAGE plpgsql; 40 | -------------------------------------------------------------------------------- /usecases/mwaa-glue-bedrock/events/event.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0", 3 | "id": "5dab3fd5-9db0-935e-350d-f3d8aa453950", 4 | "detail-type": "CaseChangeEvent", 5 | "source": "aws.partner/salesforce.com/00D4K000005OuoUUAS/0YLJ9000000003POAQ", 6 | "account": "515232103838", 7 | "time": "2024-09-17T09:22:12Z", 8 | "region": "us-east-1", 9 | "resources": [], 10 | "detail": { 11 | "payload": { 12 | "Origin": "Phone", 13 | "LastModifiedDate": "2024-09-17T09:22:10.000Z", 14 | "Description": "This is a test description", 15 | "IsClosed": false, 16 | "BusinessHoursId": "01m4K000000CFNLQA4", 17 | "OwnerId": "0054K000003K5EAQA0", 18 | "CreatedById": "0054K000003K5EAQA0", 19 | "CaseNumber": "00001037", 20 | "IsClosedOnCreate": false, 21 | "Status": "New", 22 | "Priority": "Medium", 23 | "Subject": "sadfdsfa", 24 | "IsEscalated": false, 25 | "ChangeEventHeader": { 26 | "commitNumber": 1675935024583, 27 | "commitUser": "0054K000003K5EAQA0", 28 | "sequenceNumber": 1, 29 | "entityName": "Case", 30 | "changeType": "CREATE", 31 | "changedFields": [], 32 | "changeOrigin": "com/salesforce/api/soap/61.0;client=SfdcInternalAPI/", 33 | "transactionKey": "00033cd8-1c87-287c-3abb-69cbae6bb643", 34 | "commitTimestamp": 1726564930000, 35 | "recordIds": [ 36 | "500J9000005b3zgIAA" 37 | ] 38 | }, 39 | "CreatedDate": "2024-09-17T09:22:10.000Z", 40 | "LastModifiedById": "0054K000003K5EAQA0" 41 | }, 42 | "schemaId": "lji40tf8aFHQpZ3TTlGKYA", 43 | "id": "e06900d8-e41a-4a3e-822d-a4b337d232da" 44 | } 45 | } -------------------------------------------------------------------------------- /usecases/start-stop-mwaa-environment/lib/infrastructure/mwaa-base-stack.test.ts: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy of 5 | this software and associated documentation files (the "Software"), to deal in 6 | the Software without restriction, including without limitation the rights to 7 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 8 | the Software, and to permit persons to whom the Software is furnished to do so. 9 | 10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 11 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 12 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 13 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 14 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 15 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 16 | */ 17 | 18 | import * as cdk from 'aws-cdk-lib'; 19 | import * as base from '../../lib/infrastructure/mwaa-base-stack'; 20 | 21 | describe('mwaa-base-stack', () => { 22 | describe('getName', () => { 23 | it('should return the resource name stripping -stack keyword from the stack name', () => { 24 | const app = new cdk.App(); 25 | const mainStack = new cdk.Stack(app, 'mwaa-main-stack'); 26 | const stack = new base.MwaaBaseStack(mainStack, 'mwaa-base-stack'); 27 | const resourceName = stack.getName('sns-topic'); 28 | expect(resourceName).toEqual('mwaa-base-sns-topic'); 29 | }); 30 | }); 31 | }); 32 | -------------------------------------------------------------------------------- /usecases/mwaa-public-webserver-custom-domain/src/lambda-edge/shared/https.ts: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | // SPDX-License-Identifier: MIT-0 3 | 4 | import { IncomingHttpHeaders } from "http"; 5 | import { request, RequestOptions } from "https"; 6 | import { Writable, pipeline } from "stream"; 7 | 8 | export async function fetch( 9 | uri: string, 10 | data?: Buffer, 11 | options?: RequestOptions 12 | ) { 13 | return new Promise<{ 14 | status?: number; 15 | headers: IncomingHttpHeaders; 16 | data: Buffer; 17 | }>((resolve, reject) => { 18 | const req = request(uri, options ?? {}, (res) => 19 | pipeline( 20 | [ 21 | res, 22 | collectBuffer((data) => 23 | resolve({ status: res.statusCode, headers: res.headers, data }) 24 | ), 25 | ], 26 | done 27 | ) 28 | ); 29 | 30 | function done(error?: Error | null) { 31 | if (!error) return; 32 | req.destroy(error); 33 | reject(error); 34 | } 35 | 36 | req.on("error", done); 37 | 38 | req.end(data); 39 | }); 40 | } 41 | 42 | const collectBuffer = (callback: (collectedBuffer: Buffer) => void) => { 43 | const chunks = [] as Buffer[]; 44 | return new Writable({ 45 | write: (chunk, _encoding, done) => { 46 | try { 47 | chunks.push(chunk); 48 | done(); 49 | } catch (err) { 50 | done(err as Error); 51 | } 52 | }, 53 | final: (done) => { 54 | try { 55 | callback(Buffer.concat(chunks)); 56 | done(); 57 | } catch (err) { 58 | done(err as Error); 59 | } 60 | }, 61 | }); 62 | }; 63 | -------------------------------------------------------------------------------- /blueprints/examples/ECS/mwaa/dags/delete_ecs_cluster_dag.py: -------------------------------------------------------------------------------- 1 | """ 2 | -*- coding: utf-8 -*- 3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | # this software and associated documentation files (the "Software"), to deal in 7 | # the Software without restriction, including without limitation the rights to 8 | # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | # the Software, and to permit persons to whom the Software is furnished to do so. 10 | 11 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 13 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 14 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 15 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 16 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 17 | """ 18 | import os 19 | import boto3 20 | from airflow import DAG 21 | from airflow.providers.amazon.aws.operators.ecs import EcsDeleteClusterOperator 22 | from airflow.utils.dates import days_ago 23 | 24 | # Cluster Name 25 | CLUSTER_NAME = os.environ.get('AIRFLOW__CDK__CLUSTER_NAME') 26 | 27 | # DELETE ECS Cluster 28 | with DAG(dag_id="delete_ecs_cluster_dag", schedule_interval=None, \ 29 | catchup=False, start_date=days_ago(1)) as dag: 30 | # Airflow Task 31 | DELETE_CLUSTER_TASK = EcsDeleteClusterOperator(task_id="DELETE_CLUSTER_TASK", \ 32 | cluster_name=CLUSTER_NAME, wait_for_completion=True) 33 | -------------------------------------------------------------------------------- /usecases/start-stop-mwaa-environment/lib/lambda/mwaa-status-poller-function.ts: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy of 5 | this software and associated documentation files (the "Software"), to deal in 6 | the Software without restriction, including without limitation the rights to 7 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 8 | the Software, and to permit persons to whom the Software is furnished to do so. 9 | 10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 11 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 12 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 13 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 14 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 15 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 16 | */ 17 | 18 | import { MwaaPollingResult, MwaaStatusPoller } from './mwaa-status-poller'; 19 | 20 | const MWAA_ENV_NAME = process.env.MWAA_ENV_NAME || ''; 21 | const mwaaPoller = new MwaaStatusPoller(MWAA_ENV_NAME); 22 | 23 | export const handler = async (event: Record): Promise => { 24 | console.info('Event', event); 25 | 26 | switch (event.command) { 27 | case 'checkCreated': 28 | return await mwaaPoller.checkCreated(); 29 | case 'checkDeleted': 30 | return await mwaaPoller.checkDeleted(); 31 | default: 32 | throw new Error(`Unsupported polling command [${event.command}]!`); 33 | } 34 | }; 35 | -------------------------------------------------------------------------------- /dags/airflow-243-examples/python_version_checker/python-version.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software is furnished to do so. 8 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 9 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 10 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 11 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 12 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 13 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 14 | """ 15 | 16 | 17 | from airflow import DAG, Dataset 18 | from airflow.decorators import dag, task 19 | import pendulum 20 | from airflow.operators.bash import BashOperator 21 | 22 | 23 | @dag( 24 | dag_id="python_version_checker", 25 | description="This dag demonstrates the use of BashOperator to find the Python version", 26 | start_date=pendulum.datetime(2023, 1, 1, tz="UTC"), 27 | schedule=None, 28 | tags=["airflow2.4", "python-version"]) 29 | def python_version_checker(): 30 | 31 | run_this = BashOperator( 32 | task_id="check_python_version", 33 | bash_command="python3 --version", 34 | ) 35 | 36 | run_this 37 | 38 | 39 | python_version_checker() 40 | -------------------------------------------------------------------------------- /blueprints/examples/ECS/app.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | -*- coding: utf-8 -*- 4 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 5 | 6 | # Permission is hereby granted, free of charge, to any person obtaining a copy of 7 | # this software and associated documentation files (the "Software"), to deal in 8 | # the Software without restriction, including without limitation the rights to 9 | # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 10 | # the Software, and to permit persons to whom the Software is furnished to do so. 11 | 12 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 14 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 15 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 16 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 17 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 18 | """ 19 | 20 | 21 | import aws_cdk as cdk 22 | from infra.cdk.ecs_orchestration_stack import MWAAOrchestrationStack 23 | from infra.cdk.infra_stack import MWAAInfraStack 24 | 25 | # Creating CDK Environment 26 | ENVIRONMENT = cdk.Environment() 27 | 28 | 29 | 30 | APP = cdk.App() 31 | 32 | # Create INFRAstructure for MWAA Environment 33 | INFRA = MWAAInfraStack( 34 | scope=APP, 35 | construct_id="MWAAInfraStack" 36 | ) 37 | 38 | # Provision Airflow Environment 39 | MWAAOrchestrationStack( 40 | scope=APP, 41 | construct_id="MWAAOrchestrationStack", 42 | vpc=INFRA.airflow_vpc, 43 | env=ENVIRONMENT 44 | ) 45 | 46 | APP.synth() 47 | -------------------------------------------------------------------------------- /blueprints/examples/EKS/infra/cdk/stacks/mwaa_environment.py: -------------------------------------------------------------------------------- 1 | from aws_cdk import ( 2 | aws_mwaa, 3 | aws_ssm 4 | ) 5 | from constructs import Construct 6 | from .common import constants 7 | 8 | 9 | def create_mwaa_environment( 10 | scope: Construct, 11 | construct_id: str, 12 | env_name: str, 13 | mwaa_role_arn: str, 14 | kms_key_arn: str, 15 | logging_configuration: aws_mwaa.CfnEnvironment.LoggingConfigurationProperty, 16 | network_configuration: aws_mwaa.CfnEnvironment.NetworkConfigurationProperty, 17 | dags_bucket_arn: str, 18 | mwaa_configuration_options, 19 | requirements_s3_path: str = None, 20 | plugins_s3_path: str = None 21 | ): 22 | tags = { 23 | 'env': f'{constants.MWAA_ENV}', 24 | 'service': 'MWAA Apache AirFlow' 25 | } 26 | 27 | managed_airflow = aws_mwaa.CfnEnvironment( 28 | scope, 29 | id=f'{construct_id}-environment', 30 | name=env_name, 31 | airflow_configuration_options=mwaa_configuration_options, 32 | airflow_version=constants.AIRFLOW_VERSION, 33 | dag_s3_path='dags', 34 | environment_class='mw1.small', 35 | execution_role_arn=mwaa_role_arn, 36 | kms_key=kms_key_arn, 37 | logging_configuration=logging_configuration, 38 | max_workers=5, 39 | network_configuration=network_configuration, 40 | # plugins_s3_path=plugins_s3_path, uncomment and provide environment variable to enable plugins file 41 | # requirements_s3_path=requirements_s3_path, uncomment and provide environment variable to enable plugins file 42 | source_bucket_arn=dags_bucket_arn, 43 | webserver_access_mode='PUBLIC_ONLY' 44 | ) 45 | 46 | managed_airflow.add_override('Properties.Tags', tags) 47 | -------------------------------------------------------------------------------- /blueprints/examples/EKS/dags/delete_nodegroup_cluster.py: -------------------------------------------------------------------------------- 1 | import os 2 | from datetime import datetime 3 | from airflow.models.dag import DAG 4 | from airflow.providers.amazon.aws.hooks.eks import ClusterStates, NodegroupStates 5 | from airflow.providers.amazon.aws.operators.eks import ( 6 | EksDeleteClusterOperator, 7 | EksDeleteNodegroupOperator 8 | ) 9 | from airflow.providers.amazon.aws.sensors.eks import EksClusterStateSensor, EksNodegroupStateSensor 10 | import os 11 | 12 | CLUSTER_NAME = os.environ.get('AIRFLOW__CDK__CLUSTER_NAME') 13 | NODEGROUP_NAME = os.environ.get('AIRFLOW__CDK__NODEGROUP_NAME') 14 | 15 | with DAG( 16 | dag_id='delete_eks_cluster_nodegroup', 17 | schedule_interval=None, 18 | start_date=datetime(2022, 11, 1), 19 | tags=['eks', 'eks-operator'], 20 | catchup=False, 21 | ) as dag: 22 | delete_nodegroup = EksDeleteNodegroupOperator( 23 | task_id='delete_eks_nodegroup', 24 | cluster_name=CLUSTER_NAME, 25 | nodegroup_name=NODEGROUP_NAME, 26 | ) 27 | 28 | await_delete_nodegroup = EksNodegroupStateSensor( 29 | task_id='wait_for_delete_nodegroup', 30 | cluster_name=CLUSTER_NAME, 31 | nodegroup_name=NODEGROUP_NAME, 32 | target_state=NodegroupStates.NONEXISTENT, 33 | ) 34 | 35 | delete_cluster = EksDeleteClusterOperator( 36 | task_id='delete_eks_cluster', 37 | cluster_name=CLUSTER_NAME, 38 | ) 39 | 40 | await_delete_cluster = EksClusterStateSensor( 41 | task_id='wait_for_delete_cluster', 42 | cluster_name=CLUSTER_NAME, 43 | target_state=ClusterStates.NONEXISTENT, 44 | ) 45 | 46 | ( 47 | delete_nodegroup 48 | >> await_delete_nodegroup 49 | >> delete_cluster 50 | >> await_delete_cluster 51 | ) 52 | -------------------------------------------------------------------------------- /blueprints/examples/ECS/cdk.json: -------------------------------------------------------------------------------- 1 | { 2 | "app": "python3 app.py", 3 | "watch": { 4 | "include": [ 5 | "**" 6 | ], 7 | "exclude": [ 8 | "README.md", 9 | "cdk*.json", 10 | "requirements*.txt", 11 | "source.bat", 12 | "**/__init__.py", 13 | "python/__pycache__", 14 | "tests" 15 | ] 16 | }, 17 | "context": { 18 | "@aws-cdk/aws-lambda:recognizeLayerVersion": true, 19 | "@aws-cdk/core:checkSecretUsage": true, 20 | "@aws-cdk/core:target-partitions": [ 21 | "aws", 22 | "aws-cn" 23 | ], 24 | "@aws-cdk-containers/ecs-service-extensions:enableDefaultLogDriver": true, 25 | "@aws-cdk/aws-ec2:uniqueImdsv2TemplateName": true, 26 | "@aws-cdk/aws-ecs:arnFormatIncludesClusterName": true, 27 | "@aws-cdk/aws-iam:minimizePolicies": true, 28 | "@aws-cdk/core:validateSnapshotRemovalPolicy": true, 29 | "@aws-cdk/aws-codepipeline:crossAccountKeyAliasStackSafeResourceName": true, 30 | "@aws-cdk/aws-s3:createDefaultLoggingPolicy": true, 31 | "@aws-cdk/aws-sns-subscriptions:restrictSqsDescryption": true, 32 | "@aws-cdk/aws-apigateway:disableCloudWatchRole": true, 33 | "@aws-cdk/core:enablePartitionLiterals": true, 34 | "@aws-cdk/aws-events:eventsTargetQueueSameAccount": true, 35 | "@aws-cdk/aws-iam:standardizedServicePrincipals": true, 36 | "@aws-cdk/aws-ecs:disableExplicitDeploymentControllerForCircuitBreaker": true, 37 | "@aws-cdk/aws-iam:importedRoleStackSafeDefaultPolicyName": true, 38 | "@aws-cdk/aws-s3:serverAccessLogsUseBucketPolicy": true, 39 | "@aws-cdk/aws-route53-patters:useCertificate": true, 40 | "@aws-cdk/customresources:installLatestAwsSdkDefault": false, 41 | "@aws-cdk/aws-rds:databaseProxyUniqueResourceName": true, 42 | "@aws-cdk/aws-codedeploy:removeAlarmsFromDeploymentGroup": true 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /infra/terraform/README.md: -------------------------------------------------------------------------------- 1 | 2 | # Welcome to your MWAA Terrafom project! 3 | 4 | Welcome to Amazon MWAA Blueprints ! 5 | 6 | This repository contains a collection of code that aim to make it easier and faster for customers to adopt Amazon MWAA. It can be used by AWS customers, partners, and internal AWS teams to configure and manage complete MWAA environment that are fully bootstrapped with the operational software that is needed to deploy and operate workloads. 7 | 8 | 9 | 10 | ## Getting Started 11 | 12 | ### Prerequisites 13 | 14 | First, ensure that you have installed the following tools locally. 15 | 16 | 1. [aws cli](https://docs.aws.amazon.com/cli/latest/userguide/install-cliv2.html) 17 | 2. [terraform](https://learn.hashicorp.com/tutorials/terraform/install-cli) 18 | 3. [MWAA deployer Permissions](https://docs.aws.amazon.com/mwaa/latest/userguide/access-policies.html#full-access-policy) 19 | 20 | ### Deployment Steps 21 | Initialize the working directory with the following: 22 | 23 | ```sh 24 | terraform init 25 | ``` 26 | 27 | ### Terraform PLAN 28 | 29 | Verify the resources that will be created by this execution: 30 | 31 | ```sh 32 | terraform plan 33 | ``` 34 | 35 | ### Terraform APPLY 36 | 37 | ```sh 38 | terraform apply 39 | ``` 40 | We will leverage Terraform's [target](https://learn.hashicorp.com/tutorials/terraform/resource-targeting?in=terraform/cli) functionality to deploy a VPC, an MWAA environment and IAM role. 41 | 42 | ### Validate 43 | The above step will take atleast 25 mins to complete the creation of MWAA environment. 44 | Run the command in your terminal. 45 | 46 | ```sh 47 | aws aws mwaa list-environments 48 | ``` 49 | You should see output similar to below 50 | ``` 51 | { 52 | "Environments": [ 53 | "MWAA-Environment" 54 | ] 55 | } 56 | ``` 57 | 58 | ### Cleanup 59 | ```sh 60 | terraform destroy 61 | ``` 62 | -------------------------------------------------------------------------------- /usecases/mwaa-public-webserver-custom-domain/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2019", /* Specify ECMAScript target version: 'ES3' (default), 'ES5', 'ES2015', 'ES2016', 'ES2017', 'ES2018', 'ES2019' or 'ESNEXT'. */ 4 | "module": "commonjs", /* Specify module code generation: 'none', 'commonjs', 'amd', 'system', 'umd', 'es2015', or 'ESNext'. */ 5 | "strict": true, /* Enable all strict type-checking options. */ 6 | "noImplicitAny": true, /* Raise error on expressions and declarations with an implied 'any' type. */ 7 | "strictNullChecks": true, /* Enable strict null checks. */ 8 | "strictFunctionTypes": true, /* Enable strict checking of function types. */ 9 | "strictPropertyInitialization": true, /* Enable strict checking of property initialization in classes. */ 10 | "noImplicitThis": true, /* Raise error on 'this' expressions with an implied 'any' type. */ 11 | "alwaysStrict": true, /* Parse in strict mode and emit "use strict" for each source file. */ 12 | "noUnusedLocals": true, /* Report errors on unused locals. */ 13 | "noUnusedParameters": true, /* Report errors on unused parameters. */ 14 | "noFallthroughCasesInSwitch": true, /* Report errors for fallthrough cases in switch statement. */ 15 | "esModuleInterop": true /* Enables emit interoperability between CommonJS and ES Modules via creation of namespace objects for all imports. Implies 'allowSyntheticDefaultImports'. */ 16 | }, 17 | "include": [ 18 | /* Specify files to transpile explicitly because other ts files in this project are transpiled directly by webpack */ 19 | "src/cfn-custom-resources" 20 | ] 21 | } 22 | -------------------------------------------------------------------------------- /usecases/start-stop-mwaa-environment/lib/commons/prepare-test-environment.ts: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy of 5 | this software and associated documentation files (the "Software"), to deal in 6 | the Software without restriction, including without limitation the rights to 7 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 8 | the Software, and to permit persons to whom the Software is furnished to do so. 9 | 10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 11 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 12 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 13 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 14 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 15 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 16 | */ 17 | 18 | export function prepareTestEnvironment(updateExecutionRole = 'yes') { 19 | process.env.CDK_DEFAULT_ACCOUNT = '111222333444'; 20 | process.env.CDK_DEFAULT_REGION = 'us-east-1'; 21 | process.env.MWAA_MAIN_STACK_NAME = 'mwaa-main-stack'; 22 | process.env.MWAA_ENV_NAME = 'mwaa-my-env'; 23 | process.env.MWAA_ENV_VERSION = '2.4.3'; 24 | process.env.MWAA_SOURCE_BUCKET_NAME = 'mwaa-my-env-bucket'; 25 | process.env.MWAA_EXECUTION_ROLE_ARN = 'arn:aws:iam::111222333444:role/service-role/mwaa-my-env-1U3X48JADEAC'; 26 | process.env.MWAA_UPDATE_EXECUTION_ROLE = updateExecutionRole; 27 | process.env.MWAA_PAUSE_CRON_SCHEDULE = '0 20 ? * MON-FRI *'; 28 | process.env.MWAA_RESUME_CRON_SCHEDULE = '0 6 ? * MON-FRI *'; 29 | process.env.MWAA_SCHEDULE_TIME_ZONE = 'America/Indiana/Indianapolis'; 30 | } 31 | -------------------------------------------------------------------------------- /blueprints/examples/ECS/mwaa/dags/create_ecs_cluster_dag.py: -------------------------------------------------------------------------------- 1 | """ 2 | -*- coding: utf-8 -*- 3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | # this software and associated documentation files (the "Software"), to deal in 7 | # the Software without restriction, including without limitation the rights to 8 | # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | # the Software, and to permit persons to whom the Software is furnished to do so. 10 | 11 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 13 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 14 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 15 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 16 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 17 | """ 18 | import os 19 | import boto3 20 | from airflow import DAG 21 | from airflow.providers.amazon.aws.operators.ecs import EcsCreateClusterOperator 22 | from airflow.utils.dates import days_ago 23 | 24 | # Cluster Name 25 | CLUSTER_NAME = os.environ.get('AIRFLOW__CDK__CLUSTER_NAME') 26 | 27 | # Cluster Config 28 | CLUSTER_CONFIG = {"configuration":{"executeCommandConfiguration":{"logging": "DEFAULT"}}} 29 | # Create ECS Cluster 30 | with DAG(dag_id="create_ecs_cluster_dag", schedule_interval=None, \ 31 | catchup=False, start_date=days_ago(1)) as dag: 32 | # Airflow Task 33 | CREATE_CLUSTER_TASK = EcsCreateClusterOperator(task_id="CREATE_CLUSTER_TASK", \ 34 | cluster_name=CLUSTER_NAME, create_cluster_kwargs=CLUSTER_CONFIG, \ 35 | wait_for_completion=True) 36 | -------------------------------------------------------------------------------- /usecases/mwaa_utilization_cw_metric/post_provision.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | data_bucket=$(terraform -chdir="./infra/terraform" output -raw glue_data_bucket_name) 5 | mwaa_bucket=$(terraform -chdir="./infra/terraform" output -raw mwaa_bucket_name) 6 | 7 | aws s3 cp dags/weatherdata_processing.py s3://$mwaa_bucket/dags/ 8 | aws s3 cp scripts/noaa_weatherdata_transform.py s3://$data_bucket/scripts/ 9 | 10 | ### Attach Glue MWAA policy to MWAA execution role 11 | glue_mwaa_iam_policy_arn=$(terraform -chdir="./infra/terraform" output -raw glue_mwaa_iam_policy_arn) 12 | mwaa_role_name=$(terraform -chdir="./infra/terraform" output -raw mwaa_role_name) 13 | aws iam attach-role-policy --policy-arn $glue_mwaa_iam_policy_arn --role-name $mwaa_role_name 14 | 15 | 16 | ### Create MWAA env variables 17 | glue_service_role_arn=$(terraform -chdir="./infra/terraform" output -raw glue_service_role_arn) 18 | glue_service_role_name=$(terraform -chdir="./infra/terraform" output -raw glue_service_role_name) 19 | 20 | mwaa_cli_json=$(aws mwaa create-cli-token --name $1) 21 | CLI_TOKEN=$(echo $mwaa_cli_json | jq -r '.CliToken') 22 | WEB_SERVER_HOSTNAME=$(echo $mwaa_cli_json | jq -r '.WebServerHostname') 23 | CLIRESULTS=$(curl --request POST "https://$WEB_SERVER_HOSTNAME/aws_mwaa/cli" --header "Authorization: Bearer $CLI_TOKEN" --header "Content-Type: text/plain" --data-raw "variables set GLUE_SERVICE_ROLE_ARN $glue_service_role_arn") 24 | CLIRESULTS=$(curl --request POST "https://$WEB_SERVER_HOSTNAME/aws_mwaa/cli" --header "Authorization: Bearer $CLI_TOKEN" --header "Content-Type: text/plain" --data-raw "variables set DATA_BUCKET $data_bucket") 25 | CLIRESULTS=$(curl --request POST "https://$WEB_SERVER_HOSTNAME/aws_mwaa/cli" --header "Authorization: Bearer $CLI_TOKEN" --header "Content-Type: text/plain" --data-raw "variables set GLUE_SERVICE_ROLE_NAME $glue_service_role_name") 26 | 27 | 28 | -------------------------------------------------------------------------------- /blueprints/examples/EKS/infra/cdk/stacks/network_configuration_and_logging.py: -------------------------------------------------------------------------------- 1 | from aws_cdk import ( 2 | aws_ec2, 3 | aws_mwaa, 4 | CfnOutput 5 | ) 6 | 7 | from constructs import Construct 8 | 9 | 10 | def create_network_configuration(scope: Construct, id: str, 11 | vpc: aws_ec2.Vpc) -> aws_mwaa.CfnEnvironment.NetworkConfigurationProperty: 12 | security_group = aws_ec2.SecurityGroup( 13 | scope, 14 | id=f'{id}-sg', 15 | vpc=vpc, 16 | ) 17 | 18 | security_group_id = security_group.security_group_id 19 | security_group.connections.allow_internally(aws_ec2.Port.all_traffic(), f'{id}-sg-connection') 20 | 21 | return aws_mwaa.CfnEnvironment.NetworkConfigurationProperty( 22 | security_group_ids=[security_group_id], 23 | subnet_ids=vpc.select_subnets(subnet_type=aws_ec2.SubnetType.PRIVATE_WITH_EGRESS).subnet_ids 24 | ) 25 | 26 | 27 | def create_logging_config(scope: Construct, id: str) -> aws_mwaa.CfnEnvironment.LoggingConfigurationProperty: 28 | debug_logging_configuration_property = aws_mwaa.CfnEnvironment.ModuleLoggingConfigurationProperty( 29 | enabled=True, 30 | log_level='INFO' 31 | ) 32 | 33 | return aws_mwaa.CfnEnvironment.LoggingConfigurationProperty( 34 | task_logs=debug_logging_configuration_property, 35 | worker_logs=debug_logging_configuration_property, 36 | scheduler_logs=debug_logging_configuration_property, 37 | dag_processing_logs=debug_logging_configuration_property, 38 | webserver_logs=debug_logging_configuration_property, 39 | ) 40 | 41 | 42 | def network_output(scope: Construct, id: str, security_group_id: str): 43 | CfnOutput( 44 | scope=scope, 45 | id=f'{id}-mwaa-sg', 46 | value=security_group_id, 47 | description='security group name for MWAA' 48 | ) 49 | -------------------------------------------------------------------------------- /usecases/mwaa-glue-athena/MWAAEnvironmentBucket/dags/sampledb/schema/functions/selltickets.sql: -------------------------------------------------------------------------------- 1 | CREATE OR REPLACE FUNCTION dms_sample.selltickets(IN par_p_person_id INTEGER, IN par_p_event_id INTEGER, IN par_p_quantity INTEGER) 2 | RETURNS VOID 3 | AS 4 | $BODY$ 5 | DECLARE 6 | var_v_ticket_id BIGINT; 7 | var_v_seat_level VARCHAR(50); 8 | var_v_seat_section VARCHAR(15); 9 | var_v_seat_row VARCHAR(10); 10 | var_v_tickets_sold INTEGER DEFAULT 0; 11 | var_v_time_of_sale TIMESTAMP WITHOUT TIME ZONE; 12 | var_all_done VARCHAR(50) DEFAULT FALSE; 13 | t_cur CURSOR FOR 14 | SELECT 15 | id, seat_level, seat_section, seat_row 16 | FROM dms_sample.sporting_event_ticket 17 | WHERE sporting_event_id = par_p_event_id 18 | ORDER BY seat_level NULLS FIRST, LOWER(seat_section) NULLS FIRST, LOWER(seat_row) NULLS FIRST; 19 | BEGIN 20 | var_v_time_of_sale := (clock_timestamp()::TIMESTAMP)::TIMESTAMP WITHOUT TIME ZONE; 21 | OPEN t_cur; 22 | 23 | <> 24 | LOOP 25 | FETCH t_cur INTO var_v_ticket_id, var_v_seat_level, var_v_seat_section, var_v_seat_row; 26 | UPDATE dms_sample.sporting_event_ticket 27 | SET ticketholder_id = par_p_person_id 28 | WHERE id = var_v_ticket_id; 29 | INSERT INTO dms_sample.ticket_purchase_hist (sporting_event_ticket_id, purchased_by_id, transaction_date_time, purchase_price) 30 | SELECT 31 | id, ticketholder_id, var_v_time_of_sale, ticket_price 32 | FROM dms_sample.sporting_event_ticket 33 | WHERE id = var_v_ticket_id; 34 | var_v_tickets_sold := (var_v_tickets_sold::NUMERIC + 1::NUMERIC)::SMALLINT; 35 | 36 | IF var_v_tickets_sold = par_p_quantity THEN 37 | CLOSE t_cur; 38 | 39 | EXIT tik_loop; 40 | END IF; 41 | END LOOP; 42 | END; 43 | $BODY$ 44 | LANGUAGE plpgsql; 45 | -------------------------------------------------------------------------------- /usecases/mwaa-cognito-cdk/test/storage-stack.test.ts: -------------------------------------------------------------------------------- 1 | import { App } from "aws-cdk-lib"; 2 | import { Match, Template } from "aws-cdk-lib/assertions"; 3 | import { StorageStack } from "../lib/storage-stack"; 4 | 5 | describe("Execution unit tests for dev account", () => { 6 | // Generate Storage stack 7 | const stageName = "dev"; 8 | const mockApp = new App(); 9 | const stack = new StorageStack(mockApp, "unittest-storage", { 10 | stage: stageName, 11 | }); 12 | const template = Template.fromStack(stack); 13 | 14 | // Execute tests for to confirm setup 15 | test("There are two S3 buckets in the stack", () => { 16 | // Test preparation 17 | const expectedNumber = 2; 18 | // Test execution 19 | template.resourceCountIs("AWS::S3::Bucket", expectedNumber); 20 | }); 21 | 22 | test("Check the buckets public access is disabled", () => { 23 | // Check all objects are disabled for public access 24 | template.hasResourceProperties("AWS::S3::Bucket", { 25 | PublicAccessBlockConfiguration: Match.objectEquals({ 26 | BlockPublicAcls: true, 27 | BlockPublicPolicy: true, 28 | IgnorePublicAcls: true, 29 | RestrictPublicBuckets: true, 30 | }), 31 | }); 32 | }); 33 | 34 | test("Check the buckets have SSE enabled", () => { 35 | // Check all objects are disabled for public access 36 | template.hasResourceProperties("AWS::S3::Bucket", { 37 | BucketEncryption: Match.objectLike({ 38 | ServerSideEncryptionConfiguration: [ 39 | { 40 | ServerSideEncryptionByDefault: { SSEAlgorithm: "AES256" }, 41 | }, 42 | ], 43 | }), 44 | }); 45 | }); 46 | 47 | test("Check deployment is properly setup", () => { 48 | template.hasResourceProperties("Custom::CDKBucketDeployment", { 49 | DestinationBucketKeyPrefix: "dags", 50 | Prune: true, 51 | }); 52 | }); 53 | }); 54 | -------------------------------------------------------------------------------- /blueprints/examples/EMR/README.md: -------------------------------------------------------------------------------- 1 | # EMR on EKS with MWAA 2 | 3 | This example deploys the following resources 4 | 5 | - Creates EMR Cluster and runs a spark job using a DAG 6 | 7 | 8 | ## Prerequisites: 9 | 10 | Ensure that you have installed the following tools on your machine. 11 | 12 | 1. [aws cli](https://docs.aws.amazon.com/cli/latest/userguide/install-cliv2.html) 13 | 2. [Make](https://www.make.com/en) 14 | 3. [Amazon MWAA](https://aws.amazon.com/managed-workflows-for-apache-airflow/) 15 | 4. An S3 bucket for storing EMR data and scripts. 16 | 17 | 18 | _Note: If you do not have running MWAA environment, deploy it from the root of the project using terraform or CDK. 19 | 20 | ## Get started 21 | 22 | Clone the repository 23 | 24 | Navigate into one of the example directories and run `make` by passing MWAA environment related arguments 25 | 26 | ```sh 27 | cd blueprints/examples/EMR 28 | make deploy mwaa_bucket={MWAA_BUCKET} mwaa_execution_role_name=m{MWAA_EXEC_ROLE} mwaa_env_name={MWAA_ENV_NAME} emr_data_bucket={EMR_DATA_BUCKET} 29 | ``` 30 | 31 | ## clean up 32 | ```sh 33 | cd blueprints/examples/EMR 34 | make destroy mwaa_bucket={MWAA_BUCKET} mwaa_execution_role_name=m{MWAA_EXEC_ROLE} mwaa_env_name={MWAA_ENV_NAME} emr_data_bucket={EMR_DATA_BUCKET} 35 | ``` 36 | 37 | ## Login to MWAA 38 | 39 | Login to your Amazon MWAA environment. You should see a dag by the name 'emr_sample' 40 | 41 | Unpause the DAG and Run it from console 42 | 43 | 44 | ## What does the makefile do? 45 | 1. Copies the DAG and spark script to the S3 buckets 46 | 2. Attaches AmazonS3FullAccess and AmazonEMRFullAccessPolicy_v2 access permissions to MWAA execution role 47 | 3. Creates a variable in MWAA for the data bucket 48 | 49 | ## Clean up 50 | ```sh 51 | cd blueprints/examples/EMR 52 | make undeploy mwaa_bucket={MWAA_BUCKET} mwaa_execution_role_name={MWAA_EXEC_ROLE} mwaa_env_name={MWAA_ENV_NAME} emr_data_bucket={EMR_DATA_BUCKET} 53 | ``` 54 | -------------------------------------------------------------------------------- /blueprints/examples/EMR/spark/nyc_aggregations.py: -------------------------------------------------------------------------------- 1 | ''' 2 | MIT No Attribution 3 | 4 | Copyright 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this 7 | software and associated documentation files (the "Software"), to deal in the Software 8 | without restriction, including without limitation the rights to use, copy, modify, 9 | merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so. 11 | 12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 13 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 14 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 15 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 16 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 17 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 18 | ''' 19 | from __future__ import print_function 20 | import sys 21 | from pyspark.sql import SparkSession 22 | from pyspark.sql.functions import sum 23 | 24 | if __name__ == "__main__": 25 | if len(sys.argv) != 3: 26 | print(""" 27 | Usage: nyc_aggregations.py 28 | """, file=sys.stderr) 29 | sys.exit(-1) 30 | 31 | input_path = sys.argv[1] 32 | output_path = sys.argv[2] 33 | 34 | spark = SparkSession\ 35 | .builder\ 36 | .appName("NYC Aggregations")\ 37 | .getOrCreate() 38 | 39 | sc = spark.sparkContext 40 | 41 | df = spark.read.parquet(input_path) 42 | df.printSchema 43 | df_out = df.groupBy('pulocationid', 'trip_type', 'payment_type').agg(sum('fare_amount').alias('total_fare_amount')) 44 | 45 | df_out.write.mode('overwrite').parquet(output_path) 46 | 47 | spark.stop() 48 | 49 | -------------------------------------------------------------------------------- /blueprints/examples/EKS/infra/cdk/README.md: -------------------------------------------------------------------------------- 1 | 2 | # Welcome to your CDK Python project! 3 | 4 | This is a blank project for CDK development with Python. 5 | 6 | The `cdk.json` file tells the CDK Toolkit how to execute your app. 7 | 8 | This project is set up like a standard Python project. The initialization 9 | process also creates a virtualenv within this project, stored under the `.venv` 10 | directory. To create the virtualenv it assumes that there is a `python3` 11 | (or `python` for Windows) executable in your path with access to the `venv` 12 | package. If for any reason the automatic creation of the virtualenv fails, 13 | you can create the virtualenv manually. 14 | 15 | To manually create a virtualenv on MacOS and Linux: 16 | 17 | ``` 18 | $ python3 -m venv .venv 19 | ``` 20 | 21 | After the init process completes and the virtualenv is created, you can use the following 22 | step to activate your virtualenv. 23 | 24 | ``` 25 | $ source .venv/bin/activate 26 | ``` 27 | 28 | If you are a Windows platform, you would activate the virtualenv like this: 29 | 30 | ``` 31 | % .venv\Scripts\activate.bat 32 | ``` 33 | 34 | Once the virtualenv is activated, you can install the required dependencies. 35 | 36 | ``` 37 | $ pip install -r requirements.txt 38 | ``` 39 | 40 | At this point you can now synthesize the CloudFormation template for this code. 41 | 42 | ``` 43 | $ cdk synth 44 | ``` 45 | 46 | To add additional dependencies, for example other CDK libraries, just add 47 | them to your `setup.py` file and rerun the `pip install -r requirements.txt` 48 | command. 49 | 50 | ## Useful commands 51 | 52 | * `cdk ls` list all stacks in the app 53 | * `cdk synth` emits the synthesized CloudFormation template 54 | * `cdk deploy` deploy this stack to your default AWS account/region 55 | * `cdk diff` compare deployed stack with current state 56 | * `cdk docs` open CDK documentation 57 | 58 | Enjoy! 59 | -------------------------------------------------------------------------------- /usecases/start-stop-mwaa-environment/lib/lambda/mwaa-new-environment-function.ts: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy of 5 | this software and associated documentation files (the "Software"), to deal in 6 | the Software without restriction, including without limitation the rights to 7 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 8 | the Software, and to permit persons to whom the Software is furnished to do so. 9 | 10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 11 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 12 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 13 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 14 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 15 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 16 | */ 17 | 18 | import { CreateEnvironmentCommand, CreateEnvironmentCommandInput, CreateEnvironmentCommandOutput, MWAAClient } from '@aws-sdk/client-mwaa'; 19 | 20 | const client = new MWAAClient({}); 21 | 22 | export const handler = async (event: Record): Promise => { 23 | console.info('Create MWAA Environment Event', event); 24 | 25 | const newEnvInput = event.environment; 26 | sanitizeInput(newEnvInput); 27 | 28 | const createEnvCommand = new CreateEnvironmentCommand(newEnvInput); 29 | const result = await client.send(createEnvCommand); 30 | 31 | console.info('Create MWAA Environment Result', result); 32 | return result; 33 | }; 34 | 35 | function sanitizeInput(input: CreateEnvironmentCommandInput) { 36 | if (Object.keys(input.Tags || {}).length === 0) { 37 | input.Tags = undefined; 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /usecases/mwaa-glue-athena/MWAAEnvironmentBucket/dags/sampledb/schema/functions/loadmlbplayers.sql: -------------------------------------------------------------------------------- 1 | CREATE OR REPLACE FUNCTION dms_sample.loadmlbplayers() 2 | RETURNS void 3 | AS 4 | $BODY$ 5 | DECLARE 6 | var_v_sport_team_id INTEGER; 7 | var_v_last_name VARCHAR(30); 8 | var_v_first_name VARCHAR(30); 9 | var_v_full_name VARCHAR(30); 10 | var_v_team_name VARCHAR(60); 11 | var_done VARCHAR(10) DEFAULT FALSE; 12 | mlb_players CURSOR FOR 13 | SELECT DISTINCT 14 | CASE LOWER(LTRIM(RTRIM(mlb_team_long::VARCHAR)::VARCHAR)) 15 | WHEN LOWER('Anaheim Angels') THEN 'Los Angeles Angels' 16 | ELSE LTRIM(RTRIM(mlb_team_long::VARCHAR)::VARCHAR) 17 | END AS mlb_team_long, LTRIM(RTRIM(mlb_name::VARCHAR)::VARCHAR) AS name, esubstr(LTRIM(RTRIM(mlb_name::VARCHAR)::VARCHAR)::VARCHAR, 1::INT, POSITION(' '::VARCHAR IN mlb_name::VARCHAR)::INT) AS t_name, esubstr(LTRIM(RTRIM(mlb_name::VARCHAR)::VARCHAR)::VARCHAR, POSITION(' '::VARCHAR IN mlb_name::VARCHAR)::INT, LENGTH(mlb_name::VARCHAR)::INT) AS f_name 18 | FROM dms_sample.mlb_data; 19 | BEGIN 20 | OPEN mlb_players; 21 | 22 | <> 23 | LOOP 24 | FETCH FROM mlb_players INTO var_v_team_name, var_v_last_name, var_v_first_name, var_v_full_name; 25 | 26 | IF NOT FOUND THEN 27 | EXIT read_loop; 28 | END IF; 29 | SELECT 30 | id::INTEGER 31 | INTO var_v_sport_team_id 32 | FROM dms_sample.sport_team 33 | WHERE LOWER(sport_type_name) = LOWER('baseball'::VARCHAR(15)) AND LOWER(sport_league_short_name) = LOWER('MLB'::VARCHAR(10)) AND LOWER(name) = LOWER(var_v_team_name::VARCHAR(30)); 34 | INSERT INTO dms_sample.player (sport_team_id, last_name, first_name, full_name) 35 | VALUES (var_v_sport_team_id, var_v_last_name, var_v_first_name, var_v_full_name); 36 | END LOOP; 37 | CLOSE mlb_players; 38 | END; 39 | $BODY$ 40 | LANGUAGE plpgsql; 41 | -------------------------------------------------------------------------------- /blueprints/examples/EMR_on_EKS/pre_termination.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | aws s3 rm s3://$1/dags/emr_eks_dag.py 5 | mwaa_cli_json=$(aws mwaa create-cli-token --name $3) 6 | CLI_TOKEN=$(echo $mwaa_cli_json | jq -r '.CliToken') 7 | WEB_SERVER_HOSTNAME=$(echo $mwaa_cli_json | jq -r '.WebServerHostname') 8 | CLIRESULTS=$(curl --request POST "https://$WEB_SERVER_HOSTNAME/aws_mwaa/cli" --header "Authorization: Bearer $CLI_TOKEN" --header "Content-Type: text/plain" --data-raw "variables delete JOB_ROLE_ARN ") 9 | CLIRESULTS=$(curl --request POST "https://$WEB_SERVER_HOSTNAME/aws_mwaa/cli" --header "Authorization: Bearer $CLI_TOKEN" --header "Content-Type: text/plain" --data-raw "variables delete DATA_BUCKET ") 10 | CLIRESULTS=$(curl --request POST "https://$WEB_SERVER_HOSTNAME/aws_mwaa/cli" --header "Authorization: Bearer $CLI_TOKEN" --header "Content-Type: text/plain" --data-raw "variables delete EMR_VIRTUAL_CLUSTER_ID ") 11 | 12 | emr_on_eks_data_bucket='None' 13 | if [[ "$4" == CDK ]] 14 | then 15 | export CDK_DEFAULT_ACCOUNT=$(aws sts get-caller-identity --query 'Account' --output text) 16 | export CDK_DEFAULT_REGION=$AWS_DEFAULT_REGION 17 | cd ./infra/cdk 18 | outputs=$(aws cloudformation describe-stacks --stack-name emr-eks-cdk --query "Stacks[0].Outputs") 19 | emr_on_eks_data_bucket=$(echo $outputs | jq -rc '.[] | select(.OutputKey=="emroneksdatabucket") | .OutputValue') 20 | aws iam detach-role-policy --policy-arn $(echo $outputs | jq -rc '.[] | select(.OutputKey=="emroneksmwaaiampolicyarn") | .OutputValue') --role-name $2 21 | aws s3 rm s3://$emr_on_eks_data_bucket/ --recursive 22 | cdk destroy 23 | cd ../../ 24 | else 25 | aws iam detach-role-policy --policy-arn $(terraform -chdir="./infra/terraform" output -raw emr_on_eks_mwaa_iam_policy_arn) --role-name $2 26 | emr_on_eks_data_bucket=$(terraform -chdir="./infra/terraform" output -raw emr_on_eks_data_bucket) 27 | aws s3 rm s3://$emr_on_eks_data_bucket/ --recursive 28 | 29 | fi 30 | -------------------------------------------------------------------------------- /usecases/start-stop-mwaa-environment/lib/lambda/mwaa-update-environment-function.test.ts: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy of 5 | this software and associated documentation files (the "Software"), to deal in 6 | the Software without restriction, including without limitation the rights to 7 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 8 | the Software, and to permit persons to whom the Software is furnished to do so. 9 | 10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 11 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 12 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 13 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 14 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 15 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 16 | */ 17 | 18 | import { mockClient, AwsClientStub } from 'aws-sdk-client-mock'; 19 | import 'aws-sdk-client-mock-jest'; 20 | import { UpdateEnvironmentCommand, MWAAClient } from '@aws-sdk/client-mwaa'; 21 | import { handler } from '../../lib/lambda/mwaa-update-environment-function'; 22 | 23 | let mwaaMock: AwsClientStub; 24 | 25 | describe('Update Environment Lambda Function', () => { 26 | beforeEach(() => { 27 | mwaaMock = mockClient(MWAAClient); 28 | }); 29 | 30 | afterEach(() => { 31 | mwaaMock.restore(); 32 | }); 33 | 34 | it('should make an API call to update the MWAA environment', async () => { 35 | mwaaMock.on(UpdateEnvironmentCommand).resolves({ Arn: 'my-env-arn' }); 36 | 37 | const result = await handler({ Name: 'my-env' }); 38 | 39 | expect(mwaaMock).toHaveReceivedCommand(UpdateEnvironmentCommand); 40 | expect(result).toEqual({ Arn: 'my-env-arn' }); 41 | }); 42 | }); 43 | -------------------------------------------------------------------------------- /usecases/mwaa-glue-athena/MWAAEnvironmentBucket/dags/create_resources.py: -------------------------------------------------------------------------------- 1 | from airflow.decorators import dag, task 2 | from airflow.operators.bash_operator import BashOperator 3 | # from airflow.providers.postgres.operators.postgres import PostgresOperator 4 | from airflow.providers.postgres.hooks.postgres import PostgresHook 5 | 6 | from airflow.models.connection import Connection 7 | from airflow import settings 8 | 9 | from datetime import datetime 10 | import os, mwaa_config 11 | 12 | DAG_ID = os.path.basename(__file__).replace(".py", "") 13 | 14 | @task() 15 | def get_connection(): 16 | conn=PostgresHook.get_connection(mwaa_config.POSTGRES_CONNECTION) 17 | bash_command=f"cd /usr/local/airflow/dags/sampledb; export PGPASSWORD={conn.password}; psql --host={conn.host} --port={conn.port} --dbname={conn.schema} --username={conn.login} -f /usr/local/airflow/dags/sampledb/install-postgresql.sql" 18 | 19 | return bash_command 20 | 21 | @dag( 22 | dag_id = DAG_ID, 23 | start_date=datetime(2022, 1, 1), 24 | catchup=False, 25 | schedule_interval = "@once", 26 | template_searchpath=["/usr/local/airflow/dags/sampledb"], 27 | ) 28 | def create_resources_dag(): 29 | connection_command=get_connection() 30 | 31 | create_tables = BashOperator( 32 | task_id="create_tables", 33 | bash_command=connection_command 34 | ) 35 | 36 | create_glue_pool = BashOperator( 37 | task_id="create_glue_pool", 38 | bash_command=f"airflow pools set -o plain -v {mwaa_config.GLUE_POOL} {mwaa_config.GLUE_CONCURRENCY} \"This pool limits glue jobs to the maximum concurrency of the Glue service\"" 39 | ) 40 | 41 | create_glue_crawler_pool = BashOperator( 42 | task_id="create_glue_crawler_pool", 43 | bash_command=f"airflow pools set -o plain -v {mwaa_config.GLUE_CRAWLER_POOL} {mwaa_config.GLUE_CRAWLER_CONCURRENCY} \"This pool limits glue jobs to the maximum concurrency of the Glue service\"" 44 | ) 45 | 46 | create_resources_instance = create_resources_dag() 47 | -------------------------------------------------------------------------------- /dags/get_dag_id/README.md: -------------------------------------------------------------------------------- 1 | ### Amazon Managed Workflows for Apache Airflow (MWAA) Get DAG ID 2 | 3 | If the dag parse is in the context of a DAG execution, this function will return the DAG ID. 4 | 5 | This work is based on the article 6 | https://medium.com/apache-airflow/magic-loop-in-airflow-reloaded-3e1bd8fb6671 7 | 8 | ### Versions Supported 9 | 10 | Apache Airflow 2.2.2, tested on Amazon MWAA. Other 2.x versions and platforms may also work but are untested. 11 | 12 | ### Setup 13 | 14 | The function `GetCurrentDag()`, when referenced from an Airflow DAG, will return NULL if not part of a 15 | Celery Task execution, or will return the DAG ID string if it is. 16 | 17 | The file `get_dag_id_example.py` creates N dags, one per table row, but will only retrieve SQL statement for 18 | particular table row if it is being called from a task, otherwise it skips that retrieval. Simlarly, it 19 | only creates the DAG itself if it is part of the Scheduler processing loop (`current_dag = None`) or 20 | if the DAG has the same ID as the one currently being processed. 21 | 22 | For MWAA, you can also use this code to avoid parsing files when triggered via CLI on the web server: 23 | 24 | ``` 25 | import os 26 | from get_dag_id import GetCurrentDag 27 | 28 | current_dag = GetCurrentDag() 29 | container_name=os.getenv("MWAA_AIRFLOW_COMPONENT") 30 | this_dag_file=["dag_id_defined_in_this_code","another_dag_id_in_this_code] 31 | 32 | if not current_dag in this_dag_file and container_name=="webserver": 33 | raise ImportError() 34 | ``` 35 | 36 | ### Files 37 | 38 | * [2.2/get_dag_id.py](2.2/get_dag_id.py) 39 | * [2.2/get_dag_id_example.py](2.2/get_dag_id_example.py) 40 | 41 | ### Requirements.txt needed 42 | 43 | None 44 | 45 | ### Plugins needed 46 | 47 | None. 48 | 49 | ## Security 50 | 51 | See [CONTRIBUTING](../../blob/main/CONTRIBUTING.md#security-issue-notifications) for more information. 52 | 53 | ## License 54 | 55 | This library is licensed under the MIT-0 License. See the [LICENSE](../../blob/main/LICENSE) file. 56 | -------------------------------------------------------------------------------- /usecases/start-stop-mwaa-environment/lib/lambda/dags-trigger-function.test.ts: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy of 5 | this software and associated documentation files (the "Software"), to deal in 6 | the Software without restriction, including without limitation the rights to 7 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 8 | the Software, and to permit persons to whom the Software is furnished to do so. 9 | 10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 11 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 12 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 13 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 14 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 15 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 16 | */ 17 | 18 | import { DagsCli } from './dags-cli'; 19 | import { handler } from './dags-trigger-function'; 20 | 21 | describe('Dags Trigger Function', () => { 22 | afterEach(() => { 23 | jest.resetAllMocks(); 24 | }); 25 | 26 | it('should unpause and trigger the supplied dag', async () => { 27 | const expectedResult = { 28 | stdOut: 'output', 29 | stdError: '', 30 | }; 31 | 32 | const unpauseSpy = jest.spyOn(DagsCli.prototype, 'unpauseDag').mockResolvedValue(expectedResult); 33 | 34 | const triggerSpy = jest.spyOn(DagsCli.prototype, 'triggerDag').mockResolvedValue(expectedResult); 35 | 36 | const result = await handler({ 37 | taskToken: 'a-token', 38 | dag: 'a-dag', 39 | bucket: 'a-bucket', 40 | }); 41 | 42 | expect(result).toEqual(expectedResult); 43 | expect(unpauseSpy).toHaveBeenLastCalledWith('a-dag'); 44 | expect(triggerSpy).toHaveBeenLastCalledWith('a-dag', { taskToken: 'a-token', bucket: 'a-bucket' }); 45 | }); 46 | }); 47 | -------------------------------------------------------------------------------- /blueprints/examples/ECS/mwaa/dags/deregister_ecs_task_definition_dag.py: -------------------------------------------------------------------------------- 1 | """ 2 | -*- coding: utf-8 -*- 3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | # this software and associated documentation files (the "Software"), to deal in 7 | # the Software without restriction, including without limitation the rights to 8 | # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | # the Software, and to permit persons to whom the Software is furnished to do so. 10 | 11 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 13 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 14 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 15 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 16 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 17 | """ 18 | import os 19 | import boto3 20 | from airflow import DAG 21 | from airflow.providers.amazon.aws.operators.ecs import EcsDeregisterTaskDefinitionOperator 22 | from airflow.utils.dates import days_ago 23 | 24 | # Task Name 25 | TASK_NAME = os.environ.get('AIRFLOW__CDK__STACK_NAME') 26 | 27 | # Deregister ECS Task Definition 28 | with DAG(dag_id="deregister_ecs_task_definition_dag", schedule_interval=None, \ 29 | catchup=False, start_date=days_ago(1)) as dag: 30 | # Get Latest Task definition 31 | ECS_CLIENT = boto3.client('ecs') 32 | TASK_DEFINITION = ':'.join( 33 | ECS_CLIENT.describe_task_definition( 34 | taskDefinition=TASK_NAME 35 | )['taskDefinition']['taskDefinitionArn'].split(':')[-2:]).split('/')[-1] 36 | # Airflow Task 37 | DEREGISTER_TASK_DEFINITION = EcsDeregisterTaskDefinitionOperator(task_id="DEREGISTER_TASK_DEFINITION", \ 38 | task_definition=TASK_DEFINITION, wait_for_completion=True) 39 | -------------------------------------------------------------------------------- /usecases/mwaa-public-webserver-custom-domain/webpack.config.js: -------------------------------------------------------------------------------- 1 | const path = require("path"); 2 | const TerserPlugin = require("terser-webpack-plugin"); 3 | 4 | module.exports = { 5 | mode: "production", 6 | target: "node", 7 | node: { 8 | __dirname: false, 9 | }, 10 | entry: { 11 | "src/lambda-edge/parse-auth/bundle": path.resolve( 12 | __dirname, 13 | "./src/lambda-edge/parse-auth/index.ts" 14 | ), 15 | "src/lambda-edge/check-auth/bundle": path.resolve( 16 | __dirname, 17 | "./src/lambda-edge/check-auth/index.ts" 18 | ), 19 | "src/lambda-edge/refresh-auth/bundle": path.resolve( 20 | __dirname, 21 | "./src/lambda-edge/refresh-auth/index.ts" 22 | ), 23 | "src/lambda-edge/sign-out/bundle": path.resolve( 24 | __dirname, 25 | "./src/lambda-edge/sign-out/index.ts" 26 | ), 27 | }, 28 | module: { 29 | rules: [ 30 | { 31 | test: /\.ts$/, 32 | loader: "ts-loader", 33 | exclude: /node_modules/, 34 | options: { 35 | configFile: "src/lambda-edge/tsconfig.json", 36 | }, 37 | }, 38 | { 39 | test: /\.html$/i, 40 | loader: "html-loader", 41 | options: { 42 | minimize: true, 43 | }, 44 | }, 45 | ], 46 | }, 47 | resolve: { 48 | extensions: [".ts", ".js"], 49 | }, 50 | output: { 51 | path: path.resolve(__dirname), 52 | filename: "[name].js", 53 | libraryTarget: "commonjs", 54 | }, 55 | externals: [ 56 | /^aws-sdk/, // Don't include the aws-sdk in bundles as it is already present in the Lambda runtime 57 | ], 58 | performance: { 59 | hints: "error", 60 | maxAssetSize: 1048576, // Max size of deployment bundle in Lambda@Edge Viewer Request 61 | maxEntrypointSize: 1048576, // Max size of deployment bundle in Lambda@Edge Viewer Request 62 | }, 63 | optimization: { 64 | minimizer: [ 65 | new TerserPlugin({ 66 | parallel: true, 67 | extractComments: true, 68 | }), 69 | ], 70 | }, 71 | }; 72 | -------------------------------------------------------------------------------- /usecases/start-stop-mwaa-environment/cdk.json: -------------------------------------------------------------------------------- 1 | { 2 | "app": "npx ts-node --prefer-ts-exts bin/mwaa-pause-resume.ts", 3 | "watch": { 4 | "include": ["**"], 5 | "exclude": ["README.md", "cdk*.json", "**/*.d.ts", "**/*.js", "tsconfig.json", "package*.json", "yarn.lock", "node_modules", "test"] 6 | }, 7 | "context": { 8 | "@aws-cdk/aws-lambda:recognizeLayerVersion": true, 9 | "@aws-cdk/core:checkSecretUsage": true, 10 | "@aws-cdk/core:target-partitions": ["aws", "aws-cn"], 11 | "@aws-cdk-containers/ecs-service-extensions:enableDefaultLogDriver": true, 12 | "@aws-cdk/aws-ec2:uniqueImdsv2TemplateName": true, 13 | "@aws-cdk/aws-ecs:arnFormatIncludesClusterName": true, 14 | "@aws-cdk/aws-iam:minimizePolicies": true, 15 | "@aws-cdk/core:validateSnapshotRemovalPolicy": true, 16 | "@aws-cdk/aws-codepipeline:crossAccountKeyAliasStackSafeResourceName": true, 17 | "@aws-cdk/aws-s3:createDefaultLoggingPolicy": true, 18 | "@aws-cdk/aws-sns-subscriptions:restrictSqsDescryption": true, 19 | "@aws-cdk/aws-apigateway:disableCloudWatchRole": true, 20 | "@aws-cdk/core:enablePartitionLiterals": true, 21 | "@aws-cdk/aws-events:eventsTargetQueueSameAccount": true, 22 | "@aws-cdk/aws-iam:standardizedServicePrincipals": true, 23 | "@aws-cdk/aws-ecs:disableExplicitDeploymentControllerForCircuitBreaker": true, 24 | "@aws-cdk/aws-iam:importedRoleStackSafeDefaultPolicyName": true, 25 | "@aws-cdk/aws-s3:serverAccessLogsUseBucketPolicy": true, 26 | "@aws-cdk/aws-route53-patters:useCertificate": true, 27 | "@aws-cdk/customresources:installLatestAwsSdkDefault": false, 28 | "@aws-cdk/aws-rds:databaseProxyUniqueResourceName": true, 29 | "@aws-cdk/aws-codedeploy:removeAlarmsFromDeploymentGroup": true, 30 | "@aws-cdk/aws-apigateway:authorizerChangeDeploymentLogicalId": true, 31 | "@aws-cdk/aws-ec2:launchTemplateDefaultUserData": true, 32 | "@aws-cdk/aws-secretsmanager:useAttachedSecretResourcePolicyForSecretTargetAttachments": true, 33 | "@aws-cdk/aws-redshift:columnId": true 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /usecases/start-stop-mwaa-environment/lib/infrastructure/mwaa-polling-stack.test.ts: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy of 5 | this software and associated documentation files (the "Software"), to deal in 6 | the Software without restriction, including without limitation the rights to 7 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 8 | the Software, and to permit persons to whom the Software is furnished to do so. 9 | 10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 11 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 12 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 13 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 14 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 15 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 16 | */ 17 | 18 | import * as cdk from 'aws-cdk-lib'; 19 | import { Template } from 'aws-cdk-lib/assertions'; 20 | 21 | import configuration from '../../lib/commons/config'; 22 | import { MwaaMainStack } from '../../lib/infrastructure/mwaa-main-stack'; 23 | import { prepareTestEnvironment } from '../commons/prepare-test-environment'; 24 | 25 | describe('MwaaPollingStack', () => { 26 | prepareTestEnvironment(); 27 | const config = configuration(); 28 | const app = new cdk.App(); 29 | const mainStack = new MwaaMainStack(app, 'mwaa-main-stack', { 30 | ...config, 31 | env: { 32 | account: config.account, 33 | region: config.region, 34 | }, 35 | }); 36 | const pollingStack = mainStack.pollingStack; 37 | 38 | it('should define a state machine and a polling function', () => { 39 | const template = Template.fromStack(pollingStack); 40 | template.resourceCountIs('AWS::StepFunctions::StateMachine', 1); 41 | template.resourceCountIs('AWS::Lambda::Function', 1); 42 | }); 43 | }); 44 | -------------------------------------------------------------------------------- /usecases/mwaa-cognito-cdk/dags/sample.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | from datetime import datetime, timedelta 3 | from textwrap import dedent 4 | 5 | from airflow import DAG 6 | from airflow.operators.bash import BashOperator 7 | 8 | 9 | with DAG( 10 | "tutorial", 11 | default_args={ 12 | "depends_on_past": False, 13 | "email": ["airflow@example.com"], 14 | "email_on_failure": False, 15 | "email_on_retry": False, 16 | "retries": 1, 17 | "retry_delay": timedelta(minutes=5), 18 | }, 19 | description="A simple tutorial DAG", 20 | schedule=timedelta(days=1), 21 | start_date=datetime(2021, 1, 1), 22 | catchup=False, 23 | tags=["example"], 24 | ) as dag: 25 | 26 | t1 = BashOperator( 27 | task_id="print_date", 28 | bash_command="date", 29 | ) 30 | 31 | t2 = BashOperator( 32 | task_id="sleep", 33 | depends_on_past=False, 34 | bash_command="sleep 5", 35 | retries=3, 36 | ) 37 | 38 | t1.doc_md = dedent( 39 | """\ 40 | #### Task Documentation 41 | You can document your task using the attributes `doc_md` (markdown), 42 | `doc` (plain text), `doc_rst`, `doc_json`, `doc_yaml` which gets 43 | rendered in the UI's Task Instance Details page. 44 | ![img](http://montcs.bloomu.edu/~bobmon/Semesters/2012-01/491/import%20soul.png) 45 | **Image Credit:** Randall Munroe, [XKCD](https://xkcd.com/license.html) 46 | """ 47 | ) 48 | 49 | dag.doc_md = __doc__ # providing that you have a docstring at the beginning of the DAG; OR 50 | dag.doc_md = """ 51 | This is a documentation placed anywhere 52 | """ # otherwise, type it like this 53 | 54 | templated_command = dedent( 55 | """ 56 | {% for i in range(5) %} 57 | echo "{{ ds }}" 58 | echo "{{ macros.ds_add(ds, 7)}}" 59 | {% endfor %} 60 | """ 61 | ) 62 | 63 | t3 = BashOperator( 64 | task_id="templated", 65 | depends_on_past=False, 66 | bash_command=templated_command, 67 | ) 68 | 69 | t1 >> [t2, t3] 70 | -------------------------------------------------------------------------------- /usecases/start-stop-mwaa-environment/lib/lambda/dags-trigger-function.ts: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy of 5 | this software and associated documentation files (the "Software"), to deal in 6 | the Software without restriction, including without limitation the rights to 7 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 8 | the Software, and to permit persons to whom the Software is furnished to do so. 9 | 10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 11 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 12 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 13 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 14 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 15 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 16 | */ 17 | 18 | import { DagsCliResult, DagsCli } from './dags-cli'; 19 | 20 | const envName = process.env.MWAA_ENV_NAME || ''; 21 | const envVersion = process.env.MWAA_ENV_VERSION || ''; 22 | 23 | export const handler = async (event: Record): Promise => { 24 | console.info('Event', event); 25 | const dagsCli = new DagsCli(envName, envVersion); 26 | 27 | const taskToken = event['taskToken'] as string; 28 | const dag = event['dag'] as string; 29 | const bucket = event['bucket'] as string; 30 | 31 | console.info(`Unpausing ${dag} ...`); 32 | try { 33 | const mwaaResult = await dagsCli.unpauseDag(dag); 34 | console.info('DAG unpause result:', mwaaResult); 35 | } catch (error: any) { 36 | console.error('Error unpause DAG:', error); 37 | } 38 | 39 | const config = { taskToken, bucket }; 40 | console.info(`Trigerring DAG ${dag} with config: `, config); 41 | const mwaaResult = await dagsCli.triggerDag(dag, config); 42 | console.info('DAG trigger result: ', mwaaResult); 43 | 44 | return mwaaResult; 45 | }; 46 | -------------------------------------------------------------------------------- /usecases/mwaa-with-codeartifact/mwaa-ca-bucket-content/dags/tutorial.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from datetime import timedelta 4 | from airflow import DAG 5 | from airflow.operators.bash import BashOperator 6 | from airflow.operators.python import PythonOperator 7 | from airflow.utils.dates import days_ago 8 | 9 | 10 | default_args = { 11 | "owner": "airflow", 12 | "depends_on_past": False, 13 | "start_date": days_ago(2), 14 | "email": ["airflow@example.com"], 15 | "email_on_failure": False, 16 | "email_on_retry": False, 17 | "retries": 1, 18 | "retry_delay": timedelta(minutes=5), 19 | } 20 | dag = DAG( 21 | "example_dag", 22 | default_args=default_args, 23 | description="A simple tutorial DAG", 24 | schedule_interval=None, 25 | is_paused_upon_creation=False, 26 | ) 27 | 28 | t1 = BashOperator( 29 | task_id="print_date", 30 | bash_command="date", 31 | dag=dag, 32 | ) 33 | 34 | t2 = BashOperator( 35 | task_id="sleep", 36 | depends_on_past=False, 37 | bash_command="sleep 5", 38 | retries=3, 39 | dag=dag, 40 | ) 41 | dag.doc_md = __doc__ 42 | 43 | t1.doc_md = """\ 44 | #### Task Documentation 45 | You can document your task using the attributes `doc_md` (markdown), 46 | `doc` (plain text), `doc_rst`, `doc_json`, `doc_yaml` which gets 47 | rendered in the UI's Task Instance Details page. 48 | ![img](http://montcs.bloomu.edu/~bobmon/Semesters/2012-01/491/import%20soul.png) 49 | """ 50 | templated_command = """ 51 | {% for i in range(5) %} 52 | echo "{{ ds }}" 53 | echo "{{ macros.ds_add(ds, 7)}}" 54 | echo "{{ params.my_param }}" 55 | {% endfor %} 56 | """ 57 | 58 | t3 = BashOperator( 59 | task_id="templated", 60 | depends_on_past=False, 61 | bash_command=templated_command, 62 | params={"my_param": "Parameter I passed in"}, 63 | dag=dag, 64 | ) 65 | 66 | 67 | def example_with_numpy(**kwargs): 68 | print(np.zeros(5)) 69 | 70 | 71 | print_matrix = PythonOperator( 72 | task_id="print_matrix", 73 | python_callable=example_with_numpy, 74 | dag=dag, 75 | ) 76 | 77 | t1 >> print_matrix >> t2 >> t3 -------------------------------------------------------------------------------- /usecases/mwaa-glue-athena/README.md: -------------------------------------------------------------------------------- 1 | ## Amazon MWAA with AWS Glue and Amazon Athena 2 | 3 | ### Overview 4 | 5 | This solution demonstrates different ways to orchestrate Glue jobs and Athena queries from Amazon MWAA 6 | 7 | ### Instructions 8 | 9 | Note: Please run this tutorial in an account and region where AWS Lake Formation is not enabled. 10 | 11 | 1. Create resources by utilising cloud formation template with name "RDSMWAA.yaml". Add a CFN name when prompted and leave rest of the settings as default. Wait for CFN to complete. 12 | 13 | 2. Get the artifact bucket name from cloud formation (CFN) output (check for "artifactBucket" key in CFN output). Copy the "ArtifactBucket" folder contents to the corresponding S3 bucket. You can select all of the sub-folders and then drag and drop. 14 | 15 | 3. Get the MWAA environment bucket name from CFN output (check for "MWAAEnvironmentBucket" key in CFN output). Copy the "MWAAEnvironmentBucket" folder contents to the corresponding S3 bucket. You can select all of the sub-folders and then drag and drop. 16 | 17 | 4. From the Airflow user interface: The "create resources" dag should run automatically to create the database. Before running any Athena DAGs, be sure to trigger data-pipeline dag that ingests data from RDS and puts into S3. Trigger run-athena dag to execute Athena queries. 18 | 19 | ### Clean up 20 | 21 | 1. Empty content from both s3 bucket - mwaaenvironment and artifact 22 | 23 | 2. Delete CloudFormation stack 24 | 25 | 26 | ### Resources 27 | 28 | - Amazon MWAA docs https://docs.aws.amazon.com/mwaa 29 | - #airflow-aws Slack Channel: https://apache-airflow.slack.com 30 | - MWAA workshop: https://amazon-mwaa-for-analytics.workshop.aws/en/ 31 | - MWAA local development and testing: https://github.com/aws/aws-mwaa-local-runner 32 | - Glue Docs: https://docs.aws.amazon.com/glue/latest/dg/how-it-works.html 33 | - Athena Docs: https://docs.aws.amazon.com/athena/latest/ug/what-is.html 34 | - MWAA at Scale: https://s12d.com/MWAAatScale 35 | - Airflow at Scale: https://s12d.com/AirflowAtScale 36 | - Reducing Airflow Costs: https://s12d.com/ReducingAirflowCosts 37 | --------------------------------------------------------------------------------