├── .github └── workflows │ └── build.yaml ├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── THIRD-PARTY-LICENSES.txt ├── apps ├── java-datastream │ ├── README.md │ └── kds-to-s3-datastream-java │ │ ├── README.md │ │ ├── cdk-infra │ │ ├── .gitignore │ │ ├── README.md │ │ ├── bin │ │ │ └── main.ts │ │ ├── cdk.json │ │ ├── cfn.yaml │ │ ├── jest.config.js │ │ ├── lib │ │ │ └── cdk-infra-kds-to-s3-stack.ts │ │ ├── package.json │ │ ├── test │ │ │ └── cdk-infra-kda-kafka-to-s3.test.ts │ │ └── tsconfig.json │ │ ├── dependency-reduced-pom.xml │ │ ├── img │ │ ├── .$msk-kda-s3.drawio.bkp │ │ ├── .$msk-kda-s3.drawio.dtmp │ │ ├── kds-kda-s3.drawio │ │ └── kds-kda-s3.png │ │ ├── package.json │ │ ├── pom.xml │ │ ├── src │ │ └── main │ │ │ ├── java │ │ │ └── com │ │ │ │ └── amazonaws │ │ │ │ └── services │ │ │ │ └── kinesisanalytics │ │ │ │ ├── StreamingJob.java │ │ │ │ └── stock │ │ │ │ ├── Stock.java │ │ │ │ ├── StockDateBucketAssigner.java │ │ │ │ └── StockDeserializationSchema.java │ │ │ └── resources │ │ │ ├── OrderSchema.avsc │ │ │ └── log4j2.properties │ │ └── test-create.sh └── studio │ ├── README.md │ └── msk-to-studio │ ├── README.md │ ├── cdk-infra │ ├── .gitignore │ ├── CFN-README.md │ ├── README.md │ ├── bin │ │ └── main.ts │ ├── cdk.json │ ├── jest.config.js │ ├── lib │ │ └── cdk-infra-kafka-to-studio-stack.ts │ ├── package.json │ ├── test │ │ └── cdk-infra-kda-kafka-to-s3.test.ts │ └── tsconfig.json │ └── img │ ├── .$msk-studio.drawio.bkp │ ├── .$msk-studio.drawio.dtmp │ ├── msk-studio.drawio │ └── msk-studio.png ├── bootstrap-cdk ├── .gitignore ├── .npmignore ├── README.md ├── bin │ └── main.ts ├── cdk.json ├── jest.config.js ├── lib │ └── bootstrap-cdk-stack.ts ├── package.json ├── test │ └── bootstrap-cdk.test.ts └── tsconfig.json ├── cdk-infra └── shared │ ├── README.md │ ├── bin │ ├── main.d.ts │ ├── main.js │ └── main.ts │ ├── cdk.json │ ├── jest.config.js │ ├── lambda │ └── aws-lambda-helpers │ │ ├── .gitignore │ │ ├── README.md │ │ ├── aws-lambda-helpers.iml │ │ ├── pom.xml │ │ └── src │ │ ├── main │ │ └── java │ │ │ └── com │ │ │ └── amazonaws │ │ │ ├── App.java │ │ │ ├── DataPayload.java │ │ │ ├── MSKDataGen.java │ │ │ ├── MSKDataGenHandler.java │ │ │ ├── ResourceProperties.java │ │ │ ├── Stock.java │ │ │ └── TopicGenHandler.java │ │ └── test │ │ └── java │ │ └── com │ │ └── amazonaws │ │ └── AppTest.java │ ├── lib │ ├── .gitignore │ ├── app-start-lambda-construct.ts │ ├── copy-assets-lambda-construct.ts │ ├── create-studio-app-lambda-construct.ts │ ├── flink-msk-zep-construct.ts │ ├── kda-construct.ts │ ├── kda-zep-construct.ts │ ├── kds-datagen-lambda-construct.ts │ ├── msf-java-app-construct.ts │ ├── msk-construct.ts │ ├── msk-get-bootstrap-broker-string.ts │ ├── msk-serverless-construct.ts │ ├── msk-topic-creation-lambda-construct.ts │ └── zeppelin-note-run-lambda-construct.ts │ ├── package.json │ ├── test │ ├── cdk-infra-msf-kafka-to-s3.test.d.ts │ ├── cdk-infra-msf-kafka-to-s3.test.js │ └── cdk-infra-msf-kafka-to-s3.test.ts │ └── tsconfig.json ├── datagen ├── README.md ├── orders-datagen │ ├── DATAGEN-KDS.md │ ├── DATAGEN-MSK.md │ ├── README.md │ ├── kds-datagen.ipynb │ ├── local │ │ ├── pyflink_datagen.py │ │ └── pyflink_kafkaread.py │ ├── msk-iam-datagen.ipynb │ └── msk-iam-datagen.zpln └── stock-ticker-datagen │ └── kds-datagen.zpln ├── img ├── blueprint-diagram.png └── msf-icon.png ├── notes ├── contribute.md ├── installation.md └── modify.md └── python ├── README.md ├── lambda_copy_assets_to_s3.py ├── lambda_create_studio_app.py ├── lambda_kds_datagen.py ├── lambda_msf_app_start.py ├── lambda_run_studio_notebook ├── bundle-lambda-for-release.sh ├── cfnresponse.py ├── lambda_function.py └── requirements.txt ├── local_kds_datagen.py ├── msf_java_app_custom_resource_handler.py ├── requirements.txt ├── test_kda_java_app_custom_resource_handler.py └── test_lambda_kda_app_start.py /.github/workflows/build.yaml: -------------------------------------------------------------------------------- 1 | name: Build and Release 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | branches: [main] 8 | release: 9 | types: [created] 10 | 11 | jobs: 12 | build-and-release: 13 | name: Build and Release 14 | runs-on: ubuntu-latest 15 | steps: 16 | - name: Install NodeJS 17 | uses: actions/setup-node@v3 18 | with: 19 | node-version: v18.16.1 20 | 21 | - name: Install Python 22 | uses: actions/setup-python@v4 23 | with: 24 | python-version: 3.8.16 25 | 26 | - name: Install Java 27 | uses: actions/setup-java@v3 28 | with: 29 | distribution: corretto 30 | java-version: 11 31 | 32 | - name: Install AWS CDK 33 | run: | 34 | npm install -g aws-cdk 35 | 36 | - name: Checkout source 37 | uses: actions/checkout@v2 38 | 39 | - name: Test Python modules 40 | working-directory: python 41 | run: | 42 | python -m pip install -r requirements.txt 43 | python -m pytest 44 | 45 | - name: Create artifacts directory 46 | run: | 47 | mkdir artifacts 48 | 49 | - name: Build shared assets 50 | working-directory: cdk-infra/shared 51 | run: | 52 | npm install 53 | cd lambda/aws-lambda-helpers 54 | mvn clean package shade:shade 55 | cp target/aws-lambda-helpers-1.0.jar ../../../../artifacts/ 56 | 57 | - name: Build bootstrap template 58 | working-directory: bootstrap-cdk 59 | run: | 60 | npm install 61 | cdk synth -j > ../artifacts/BootstrapCdkStack.template.json 62 | 63 | - name: Build kds-to-s3-datastream-java 64 | working-directory: apps/java-datastream/kds-to-s3-datastream-java 65 | run: | 66 | mvn package 67 | cd cdk-infra 68 | npm install 69 | cdk synth -j > ../target/kds-to-s3-datastream-java.json 70 | cd .. 71 | cp target/kds-to-s3-datastream-java-1.0.1.jar ../../../artifacts/ 72 | cp target/kds-to-s3-datastream-java.json ../../../artifacts/ 73 | 74 | - name: Build kafka-to-studio 75 | run: | 76 | cd apps/studio/msk-to-studio/cdk-infra 77 | npm install 78 | cdk synth -j | grep -Ev '^\[Warning' > ../../../../artifacts/CdkInfraKafkaToStudioStack.template.json 79 | cd ../../../.. 80 | cd python/lambda_run_studio_notebook 81 | ./bundle-lambda-for-release.sh 82 | cp my-deployment.zip ../../artifacts/ 83 | 84 | 85 | - name: List artifacts 86 | working-directory: artifacts 87 | run: | 88 | ls -l 89 | 90 | - name: Publish artifacts 91 | if: github.event_name == 'release' 92 | uses: skx/github-action-publish-binaries@master 93 | env: 94 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 95 | with: 96 | args: "artifacts/*" 97 | 98 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *main* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Managed Service for Apache flink 2 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 |

Managed Service for Apache Flink Blueprints

3 | 4 | 5 | 6 | Managed Service for Apache Flink Blueprints are a curated collection of Apache Flink applications. Each blueprint will walk you through how to solve a practical problem related to stream processing using Apache Flink. These blueprints can be leveraged to create more complex applications to solve your business challenges in Apache Flink, and they are designed to be extensible. We will feature examples for both the DataStream and Table API where possible. 7 | 8 |
9 | 10 | ## Get started with Blueprints 11 | 12 | Within this repo, you will find examples of Apache Flink applications that can be run locally, on an open source Apache Flink cluster, or on Managed Service for Apache Flink cluster. Clone the repository to get started. 13 | 14 | | Description | Flink API | Language 15 | | --- | --- | --- | 16 | | **[Reading from Kinesis Data Streams and writing to Amazon S3](apps/java-datastream/kds-to-s3-datastream-java)** | DataStream | Java | 17 | | **[Reading from MSK Serverless into Managed Service for Apache Flink Studio](apps/studio/msk-to-studio/)** | Flink SQL | SQL 18 | 19 | 20 | 21 | ## Installation 22 | 23 | Follow the installation instructions [here](notes/installation.md) to install the shared libraries and begin developing. 24 | 25 | ## Contributing 26 | Have a good idea for a new Managed Service for Apache Flink Blueprint? Check out the instructions [here](notes/contribute.md) for how to contribute. 27 | 28 | ## Modifying 29 | Need to modify the blueprint for your needs? Check out the instructions [here](notes/modify.md) for more details. -------------------------------------------------------------------------------- /apps/java-datastream/README.md: -------------------------------------------------------------------------------- 1 | # Java Datastream apps 2 | 3 | | Description | 4 | | --- | 5 | | [Reading from Kinesis Data Streams and writing to Amazon S3](./kds-to-s3-datastream-java/README.md) -------------------------------------------------------------------------------- /apps/java-datastream/kds-to-s3-datastream-java/README.md: -------------------------------------------------------------------------------- 1 | # KDS to S3 (Java Datastream API) 2 | 3 | This blueprint deploys a MSF app that reads from Kinesis Data Streams (KDS) using IAM auth and writes to S3 using the Java DataStream API: 4 | 5 | ![Arch diagram](img/kds-kda-s3.png) 6 | 7 | ## Project details 8 | 9 | 1. Flink version: `1.20.0` 10 | 2. Java version: `11` 11 | 12 | ## Key components used 13 | 14 | 1. `FlinkKinesisConsumer`. 15 | 2. `FileSink` (`StreamingFileSink` is slated to be deprecated). 16 | 17 | ## High-level deployment steps 18 | 19 | 1. Build app and copy resulting JAR to S3 location 20 | 2. Deploy associated infra (KDS and MSF) using CDK script 21 | - If using existing resources, you can simply update app properties in MSF. 22 | 3. Perform data generation 23 | 24 | ## Prerequisites 25 | 26 | 1. Maven 27 | 2. AWS SDK v2 28 | 2. AWS CDK v2 - for deploying associated infra (KDS Stream and MSF app) 29 | 30 | ## Step-by-step deployment walkthrough 31 | 32 | 1. First, let's set up some environment variables to make the deployment easier. Replace these values with your own S3 bucket, app name, etc. 33 | 34 | ```bash 35 | export AWS_PROFILE=<> 36 | export APP_NAME=<> 37 | export S3_BUCKET=<> 38 | export S3_FILE_KEY=<> 39 | ``` 40 | 41 | 2. Build Java Flink application locally. 42 | 43 | From root directory of this project, run: 44 | 45 | ``` 46 | mvn clean package 47 | ``` 48 | 49 | 3. Copy jar to S3 so it can be referenced in CDK deployment 50 | 51 | ```bash 52 | aws s3 cp target/<> ${S3_BUCKET}/{S3_FILE_KEY} 53 | ``` 54 | 55 | 4. Follow instructions in the [`cdk-infra`](cdk-infra/README.md) folder to deploy the infrastructure associated with this app - such as the source KDS stream and the Managed Service for Apache Flink application. 56 | 57 | 5. Follow instructions in [orders-datagen](../../../datagen/orders-datagen/README.md) to create topic and generate data into the source KDS stream. 58 | 59 | 6. Start your Managed Service for Apache Flink application from the AWS console. 60 | 61 | 7. Do a Flink query or S3 Select Query against S3 to view data written to S3. 62 | 63 | 64 | 65 | ## Launching via CloudFormation with pre-synthesized templates: 66 | 67 | 1. First, navigate to [the bootstrapping folder](/bootstrap-cdk/): `/bootstrap-cdk` and synthesize the template: `cdk synth` 68 | 2. Next, navigate to this blueprint's cdk-infra folder and type `cdk synth` to synthesize the template. 69 | 3. Finally, navigate to the root of the project. `/` 70 | 71 | #### Bootstrap your account (run in root of project and change variables accordingly) 72 | 73 | Copy and paste this command into a terminal in the root of the project to bootstrap the assets into your account. 74 | 75 | ```bash 76 | export timestampToLetters=$(date +%s) 77 | export BucketName=myblueprintdemoassets-${timestampToLetters} 78 | export BootstrapStackName=bootstrap-my-account-${timestampToLetters}-stack 79 | export BlueprintStackName=kds-to-s3-blueprint-${timestampToLetters}-stack 80 | export AppName=kds-to-s3-demo-${timestampToLetters}-app 81 | export StreamName=kds-to-s3-demo-${timestampToLetters}-stream 82 | export CloudWatchLogGroupName=blueprints/managed-flink/${AppName} 83 | export CloudWatchLogStreamName=managed-flink-log-stream 84 | export RoleName=kds-to-s3-demo-${timestampToLetters}-role 85 | 86 | aws cloudformation create-stack --template-body file://./bootstrap-cdk/cdk.out/BootstrapCdkStack.template.json --stack-name ${BootstrapStackName} --parameters ParameterKey=AssetBucket,ParameterValue=$BucketName ParameterKey=AssetList,ParameterValue="https://data-streaming-labs.s3.amazonaws.com/blueprint-test/kds-to-s3-datastream-java-1.0.1.jar\,https://data-streaming-labs.s3.amazonaws.com/blueprint-test/kds-to-s3-datastream-java.json" --capabilities CAPABILITY_IAM 87 | ``` 88 | 89 | ### once bootstrapping finishes (in your AWS Console), then run next command from terminal: 90 | 91 | ```bash 92 | aws cloudformation create-stack --template-url https://${BucketName}.s3.amazonaws.com/kds-to-s3-datastream-java.json --stack-name $BlueprintStackName --parameters ParameterKey=AppName,ParameterValue=$AppName ParameterKey=CloudWatchLogGroupName,ParameterValue=$CloudWatchLogGroupName ParameterKey=CloudWatchLogStreamName,ParameterValue=$CloudWatchLogStreamName ParameterKey=StreamName,ParameterValue=$StreamName ParameterKey=BucketName,ParameterValue=$BucketName ParameterKey=BootstrapStackName,ParameterValue=$BootstrapStackName ParameterKey=RoleName,ParameterValue=$RoleName --capabilities CAPABILITY_NAMED_IAM 93 | ``` 94 | 95 | Now the blueprint will be launched in your account. -------------------------------------------------------------------------------- /apps/java-datastream/kds-to-s3-datastream-java/cdk-infra/.gitignore: -------------------------------------------------------------------------------- 1 | *.js 2 | !jest.config.js 3 | *.d.ts 4 | node_modules 5 | 6 | # CDK asset staging directory 7 | .cdk.staging 8 | cdk.out 9 | -------------------------------------------------------------------------------- /apps/java-datastream/kds-to-s3-datastream-java/cdk-infra/README.md: -------------------------------------------------------------------------------- 1 | # CDK Infrastructure associated with MSK Serverless to S3 MSF blueprint (Java) 2 | 3 | This CDK script deploys the following the components: 4 | 5 | 1. VPC for MSK Serverless and Managed Service for Apache Flink application. 6 | 2. MSK Serverless. 7 | 3. Managed Service for Apache Flink Java DataStream API application. 8 | 4. IAM permissions for the role associated with the Managed Service for Apache Flink application. 9 | 10 | This CDK script expects you to supply the following *existing* resources: 11 | 12 | 1. S3 bucket where the application jar will be uploaded (`appBucket` below). 13 | 2. S3 bucket that will function as the sink (`appSinkBucket` below). 14 | 3. Glue database (`glueDatabaseName` below). 15 | 16 | ## CDK runtime context key/value pairs that need to be supplied 17 | 18 | Open up `cdk.json` and fill in appropriate values for each of these CDK context values: 19 | 20 | | Context value name | Purpose | Notes 21 | | --- | --- | --- | 22 | | `msfAppName` | The name of the Managed Service for Apache Flink application | MSF app *will be created* | 23 | | `appBucket` | The S3 bucket where the application payload will be stored | *Must be pre-existing* | 24 | | `appSinkBucket` | The bucket to which the MSK to S3 Flink app will write output files (in Parquet) | *Must be pre-existing* | 25 | | `runtimeEnvironment` | The Managed Service for Apache Flink runtime environment | For instance, `FLINK-1_15` | 26 | | `deployDataGen` | `true` if you want Zeppelin-based interactive MSF for data generation to be deployed; `false` otherwise | N/A | 27 | | `glueDatabaseName` | The AWS Glue database that will be used by MSF Studio datagen app | *Must be pre-existing* | 28 | | `msfLogGroup` | The name for the CloudWatch Log Group that will be linked to the MSF Flink app | Log group *will be created* | 29 | | `msfLogStream` | The name for the CloudWatch Log Stream that will be linked to the MSF Flink app | Log stream *will be created* | 30 | | `sourceMskClusterName` | The name for the source MSK Serverless cluster | MSK Serverless cluster *will be created* | 31 | 32 | For more information on CDK Runtime Context, please see [Runtime Context](https://docs.aws.amazon.com/cdk/v2/guide/context.html). 33 | 34 | 35 | ## Deploying the blueprint 36 | 37 | ``` 38 | cdk deploy 39 | ``` 40 | 41 | This will launch a CloudFormation Stack containing all the resources required for the blueprint. 42 | 43 | ## Generating a CloudFormation script using `cdk synth`: 44 | 45 | Instead of deploying directly, you could also generate an intermediate CFN script using the command below. 46 | 47 | ``` 48 | cdk synth 49 | ``` 50 | 51 | ## Deleting the blueprint 52 | 53 | To avoid ongoing charges, please make sure that you delete the blueprint and associated AWS resources using the following command. 54 | 55 | ``` 56 | cdk destroy 57 | ``` -------------------------------------------------------------------------------- /apps/java-datastream/kds-to-s3-datastream-java/cdk-infra/bin/main.ts: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | /* 3 | * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | * Apache-2.0 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this 7 | * software and associated documentation files (the "Software"), to deal in the Software 8 | * without restriction, including without limitation the rights to use, copy, modify, 9 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 10 | * permit persons to whom the Software is furnished to do so. 11 | * 12 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 13 | * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 14 | * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 15 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 16 | * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 17 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 18 | */ 19 | 20 | import 'source-map-support/register'; 21 | import * as cdk from 'aws-cdk-lib'; 22 | import { CdkInfraKdsToS3Stack } from '../lib/cdk-infra-kds-to-s3-stack'; 23 | import { BootstraplessStackSynthesizer } from 'cdk-bootstrapless-synthesizer'; 24 | 25 | const app = new cdk.App(); 26 | 27 | // NOTE: We're not creating a bucket to hold the application jar; we 28 | // expect there to be a pre-existing bucket. You can modify this stack 29 | // to also create a bucket instead. 30 | // Same goes for the bucket that this app will be writing to. 31 | new CdkInfraKdsToS3Stack(app, 'CdkInfraMSFKdsToS3Stack', { 32 | synthesizer: new BootstraplessStackSynthesizer({ 33 | templateBucketName: 'cfn-template-bucket', 34 | 35 | fileAssetBucketName: 'file-asset-bucket-${AWS::Region}', 36 | fileAssetRegionSet: ['us-west-1', 'us-west-2'], 37 | fileAssetPrefix: 'file-asset-prefix/latest/' 38 | }), 39 | }); -------------------------------------------------------------------------------- /apps/java-datastream/kds-to-s3-datastream-java/cdk-infra/cdk.json: -------------------------------------------------------------------------------- 1 | { 2 | "app": "npx ts-node --prefer-ts-exts bin/main.ts", 3 | "watch": { 4 | "include": [ 5 | "**" 6 | ], 7 | "exclude": [ 8 | "README.md", 9 | "cdk*.json", 10 | "**/*.d.ts", 11 | "**/*.js", 12 | "tsconfig.json", 13 | "package*.json", 14 | "yarn.lock", 15 | "node_modules", 16 | "test" 17 | ] 18 | }, 19 | "context": { 20 | "@aws-cdk/aws-apigateway:usagePlanKeyOrderInsensitiveId": true, 21 | "@aws-cdk/core:stackRelativeExports": true, 22 | "@aws-cdk/aws-rds:lowercaseDbIdentifier": true, 23 | "@aws-cdk/aws-lambda:recognizeVersionProps": true, 24 | "@aws-cdk/aws-lambda:recognizeLayerVersion": true, 25 | "@aws-cdk/aws-cloudfront:defaultSecurityPolicyTLSv1.2_2021": true, 26 | "@aws-cdk-containers/ecs-service-extensions:enableDefaultLogDriver": true, 27 | "@aws-cdk/aws-ec2:uniqueImdsv2TemplateName": true, 28 | "@aws-cdk/core:checkSecretUsage": true, 29 | "@aws-cdk/aws-iam:minimizePolicies": true, 30 | "@aws-cdk/aws-ecs:arnFormatIncludesClusterName": true, 31 | "@aws-cdk/core:validateSnapshotRemovalPolicy": true, 32 | "@aws-cdk/aws-codepipeline:crossAccountKeyAliasStackSafeResourceName": true, 33 | "@aws-cdk/aws-s3:createDefaultLoggingPolicy": true, 34 | "@aws-cdk/aws-sns-subscriptions:restrictSqsDescryption": true, 35 | "@aws-cdk/aws-apigateway:disableCloudWatchRole": true, 36 | "@aws-cdk/core:enablePartitionLiterals": true, 37 | "@aws-cdk/core:target-partitions": [ 38 | "aws", 39 | "aws-cn" 40 | ], 41 | "msfAppName": "", 42 | "appBucket": "", 43 | "appFileKeyOnS3": "", 44 | "appSinkBucket": "", 45 | "runtimeEnvironment": "", 46 | "glueDatabaseName": "", 47 | "flinkVersion": "1.15.2", 48 | "RuntimeEnvironment": "1.13.2", 49 | "deployDataGen": "false", 50 | "msfLogGroup": "", 51 | "msfLogStream": "", 52 | "sourceKinesisStreamName": "" 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /apps/java-datastream/kds-to-s3-datastream-java/cdk-infra/cfn.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/managed-service-for-apache-flink-blueprints/5c8da457d749d32069e99f2e64687613a6e099ad/apps/java-datastream/kds-to-s3-datastream-java/cdk-infra/cfn.yaml -------------------------------------------------------------------------------- /apps/java-datastream/kds-to-s3-datastream-java/cdk-infra/jest.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | testEnvironment: 'node', 3 | roots: ['/test'], 4 | testMatch: ['**/*.test.ts'], 5 | transform: { 6 | '^.+\\.tsx?$': 'ts-jest' 7 | } 8 | }; 9 | -------------------------------------------------------------------------------- /apps/java-datastream/kds-to-s3-datastream-java/cdk-infra/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "cdk-infra-msf-kafka-to-s3", 3 | "version": "0.1.0", 4 | "bin": { 5 | "cdk-infra-msf-kafka-to-s3": "bin/cdk-infra-msf-kafka-to-s3.js" 6 | }, 7 | "scripts": { 8 | "build": "tsc", 9 | "watch": "tsc -w", 10 | "test": "jest", 11 | "cdk": "cdk" 12 | }, 13 | "devDependencies": { 14 | "@types/jest": "29.5.1", 15 | "@types/node": "18.16.0", 16 | "@types/prettier": "2.7.2", 17 | "aws-cdk": "2.76.0", 18 | "jest": "29.5.0", 19 | "ts-jest": "29.1.0", 20 | "ts-node": "10.9.1", 21 | "typescript": "5.0.4" 22 | }, 23 | "dependencies": { 24 | "aws-cdk-lib": "^2.85.0", 25 | "cdk-bootstrapless-synthesizer": "^2.3.2", 26 | "constructs": "10.2.4", 27 | "source-map-support": "0.5.21" 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /apps/java-datastream/kds-to-s3-datastream-java/cdk-infra/test/cdk-infra-kda-kafka-to-s3.test.ts: -------------------------------------------------------------------------------- 1 | // import * as cdk from 'aws-cdk-lib'; 2 | // import { Template } from 'aws-cdk-lib/assertions'; 3 | 4 | // example test. To run these tests, uncomment this file along with the 5 | // example resource in lib/cdk-infra-msf-kafka-to-s3-stack.ts 6 | test('SQS Queue Created', () => { 7 | // const app = new cdk.App(); 8 | // // WHEN 9 | // // THEN 10 | // const template = Template.fromStack(stack); 11 | 12 | // template.hasResourceProperties('AWS::SQS::Queue', { 13 | // VisibilityTimeout: 300 14 | // }); 15 | }); 16 | -------------------------------------------------------------------------------- /apps/java-datastream/kds-to-s3-datastream-java/cdk-infra/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2018", 4 | "module": "commonjs", 5 | "lib": [ 6 | "es2018" 7 | ], 8 | "declaration": true, 9 | "strict": true, 10 | "noImplicitAny": true, 11 | "strictNullChecks": true, 12 | "noImplicitThis": true, 13 | "alwaysStrict": true, 14 | "noUnusedLocals": false, 15 | "noUnusedParameters": false, 16 | "noImplicitReturns": true, 17 | "noFallthroughCasesInSwitch": false, 18 | "inlineSourceMap": true, 19 | "inlineSources": true, 20 | "experimentalDecorators": true, 21 | "strictPropertyInitialization": false, 22 | "typeRoots": [ 23 | "./node_modules/@types" 24 | ] 25 | }, 26 | "exclude": [ 27 | "node_modules", 28 | "cdk.out" 29 | ] 30 | } 31 | -------------------------------------------------------------------------------- /apps/java-datastream/kds-to-s3-datastream-java/dependency-reduced-pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4.0.0 4 | com.amazonaws.services.kinesisanalytics 5 | kds-to-s3-datastream-java 6 | KDS to S3 DataStream App 7 | 1.0.1 8 | 9 | 10 | 11 | 12 | org.eclipse.m2e 13 | lifecycle-mapping 14 | 1.0.0 15 | 16 | 17 | 18 | 19 | 20 | org.apache.maven.plugins 21 | maven-shade-plugin 22 | [3.1.1,) 23 | 24 | shade 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | org.apache.maven.plugins 34 | maven-compiler-plugin 35 | [3.1,) 36 | 37 | testCompile 38 | compile 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | maven-compiler-plugin 54 | 3.8.0 55 | 56 | ${jdk.version} 57 | ${jdk.version} 58 | 59 | 60 | 61 | maven-shade-plugin 62 | 3.1.1 63 | 64 | 65 | package 66 | 67 | shade 68 | 69 | 70 | 71 | 72 | org.apache.flink:force-shading 73 | com.google.code.findbugs:jsr305 74 | org.slf4j:* 75 | org.apache.logging.log4j:* 76 | 77 | 78 | 79 | 80 | *:* 81 | 82 | META-INF/*.SF 83 | META-INF/*.DSA 84 | META-INF/*.RSA 85 | 86 | 87 | 88 | 89 | 90 | com.amazonaws.services.kinesisanalytics.StreamingJob 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | false 103 | 104 | 105 | apache.snapshots 106 | Apache Development Snapshot Repository 107 | https://repository.apache.org/content/repositories/snapshots/ 108 | 109 | 110 | 111 | 112 | com.amazonaws 113 | aws-kinesisanalytics-runtime 114 | 1.2.0 115 | provided 116 | 117 | 118 | org.apache.logging.log4j 119 | log4j-slf4j-impl 120 | 2.17.1 121 | runtime 122 | 123 | 124 | org.apache.logging.log4j 125 | log4j-api 126 | 2.17.1 127 | runtime 128 | 129 | 130 | org.apache.logging.log4j 131 | log4j-core 132 | 2.17.1 133 | runtime 134 | 135 | 136 | 137 | 2.12 138 | 1.12.2 139 | 1.11 140 | 2.13.4.2 141 | 2.13.4 142 | 1.15.4 143 | 2.4.1 144 | 1.11 145 | UTF-8 146 | 1.9.2 147 | 2.17.1 148 | 2.0.0 149 | 1.11 150 | 11 151 | 1.2.0 152 | 153 | 154 | -------------------------------------------------------------------------------- /apps/java-datastream/kds-to-s3-datastream-java/img/.$msk-kda-s3.drawio.bkp: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /apps/java-datastream/kds-to-s3-datastream-java/img/.$msk-kda-s3.drawio.dtmp: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /apps/java-datastream/kds-to-s3-datastream-java/img/kds-kda-s3.drawio: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /apps/java-datastream/kds-to-s3-datastream-java/img/kds-kda-s3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/managed-service-for-apache-flink-blueprints/5c8da457d749d32069e99f2e64687613a6e099ad/apps/java-datastream/kds-to-s3-datastream-java/img/kds-kda-s3.png -------------------------------------------------------------------------------- /apps/java-datastream/kds-to-s3-datastream-java/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "devDependencies": { 3 | "ts-node": "10.9.1", 4 | "typescript": "4.9.5" 5 | }, 6 | "dependencies": { 7 | "aws-cdk": "2.64" 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /apps/java-datastream/kds-to-s3-datastream-java/src/main/java/com/amazonaws/services/kinesisanalytics/stock/Stock.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | * Apache-2.0 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this 6 | * software and associated documentation files (the "Software"), to deal in the Software 7 | * without restriction, including without limitation the rights to use, copy, modify, 8 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 9 | * permit persons to whom the Software is furnished to do so. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 12 | * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 13 | * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 14 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 15 | * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 16 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 17 | */ 18 | 19 | package com.amazonaws.services.kinesisanalytics.stock; 20 | 21 | import java.util.Objects; 22 | 23 | public class Stock { 24 | 25 | private String event_time; 26 | private String ticker; 27 | private float price; 28 | 29 | public Stock() {} 30 | 31 | public void setEvent_time(String event_time) { 32 | this.event_time = event_time; 33 | } 34 | 35 | public String getEvent_time() { 36 | return this.event_time; 37 | } 38 | 39 | public void setTicker(String ticker) { 40 | this.ticker = ticker; 41 | } 42 | 43 | public String getTicker() { 44 | return this.ticker; 45 | } 46 | 47 | public void setPrice(float price) { 48 | this.price = price; 49 | } 50 | 51 | public float getPrice() { 52 | return this.price; 53 | } 54 | 55 | @Override 56 | public String toString() { 57 | return "Stock{" + "ticker=" + ticker + ", price='" + price + '\'' + ", time=" + event_time + '}'; 58 | } 59 | 60 | @Override 61 | public boolean equals(Object o) { 62 | if (this == o) { 63 | return true; 64 | } 65 | if (o == null || getClass() != o.getClass()) { 66 | return false; 67 | } 68 | Stock stock = (Stock) o; 69 | return ticker.equals(stock.ticker) && 70 | price == stock.price && 71 | event_time.equals(stock.event_time); 72 | } 73 | 74 | @Override 75 | public int hashCode() { 76 | return Objects.hash(ticker, price, event_time); 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /apps/java-datastream/kds-to-s3-datastream-java/src/main/java/com/amazonaws/services/kinesisanalytics/stock/StockDateBucketAssigner.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | * Apache-2.0 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this 6 | * software and associated documentation files (the "Software"), to deal in the Software 7 | * without restriction, including without limitation the rights to use, copy, modify, 8 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 9 | * permit persons to whom the Software is furnished to do so. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 12 | * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 13 | * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 14 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 15 | * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 16 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 17 | */ 18 | 19 | package com.amazonaws.services.kinesisanalytics.stock; 20 | 21 | import org.apache.flink.core.io.SimpleVersionedSerializer; 22 | import org.apache.flink.streaming.api.functions.sink.filesystem.BucketAssigner; 23 | import org.apache.flink.streaming.api.functions.sink.filesystem.bucketassigners.SimpleVersionedStringSerializer; 24 | 25 | import java.time.LocalDateTime; 26 | import java.time.format.DateTimeFormatter; 27 | 28 | public class StockDateBucketAssigner implements BucketAssigner { 29 | private final String prefix; 30 | private final String partitionFormat; 31 | private transient DateTimeFormatter dtFormatForWrite; 32 | 33 | public StockDateBucketAssigner(String partitionFormat, String prefix) { 34 | this.prefix = prefix; 35 | this.partitionFormat = partitionFormat; 36 | } 37 | 38 | @Override 39 | public String getBucketId(Stock stock, Context context) { 40 | this.dtFormatForWrite = DateTimeFormatter.ofPattern(partitionFormat); 41 | 42 | String eventTimeStr = stock.getEvent_time(); 43 | LocalDateTime eventTime = LocalDateTime.parse(eventTimeStr.replace(" ", "T")); 44 | 45 | String formattedDate = eventTime.format(this.dtFormatForWrite); 46 | 47 | return String.format("%sts=%s", 48 | prefix, 49 | formattedDate 50 | ); 51 | } 52 | 53 | @Override 54 | public SimpleVersionedSerializer getSerializer() { 55 | return SimpleVersionedStringSerializer.INSTANCE; 56 | } 57 | } -------------------------------------------------------------------------------- /apps/java-datastream/kds-to-s3-datastream-java/src/main/java/com/amazonaws/services/kinesisanalytics/stock/StockDeserializationSchema.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | * Apache-2.0 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this 6 | * software and associated documentation files (the "Software"), to deal in the Software 7 | * without restriction, including without limitation the rights to use, copy, modify, 8 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 9 | * permit persons to whom the Software is furnished to do so. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 12 | * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 13 | * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 14 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 15 | * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 16 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 17 | */ 18 | 19 | package com.amazonaws.services.kinesisanalytics.stock; 20 | 21 | import com.fasterxml.jackson.databind.ObjectMapper; 22 | import com.fasterxml.jackson.databind.json.JsonMapper; 23 | import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule; 24 | import org.apache.flink.api.common.serialization.AbstractDeserializationSchema; 25 | 26 | import java.io.IOException; 27 | 28 | public class StockDeserializationSchema extends AbstractDeserializationSchema { 29 | private static final long serialVersionUID = 1L; 30 | 31 | private transient ObjectMapper objectMapper; 32 | 33 | @Override 34 | public void open(InitializationContext context) { 35 | objectMapper = JsonMapper.builder().build().registerModule(new JavaTimeModule()); 36 | } 37 | 38 | @Override 39 | public Stock deserialize(byte[] bytes) throws IOException { 40 | return objectMapper.readValue(bytes, Stock.class); 41 | } 42 | } // class -------------------------------------------------------------------------------- /apps/java-datastream/kds-to-s3-datastream-java/src/main/resources/OrderSchema.avsc: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy of 5 | this software and associated documentation files (the "Software"), to deal in 6 | the Software without restriction, including without limitation the rights to 7 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 8 | the Software, and to permit persons to whom the Software is furnished to do so. 9 | 10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 11 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 12 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 13 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 14 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 15 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 16 | */ 17 | { 18 | "name": "Order", 19 | "namespace": "com.amazonaws.services.kinesisanalytics.orders", 20 | "type": "record", 21 | "fields": [ 22 | { 23 | "name": "product_id", 24 | "type": "long" 25 | }, 26 | { 27 | "name": "order_number", 28 | "type": "long" 29 | }, 30 | { 31 | "name": "quantity", 32 | "type": "int" 33 | }, 34 | { 35 | "name": "price", 36 | "type": "double" 37 | }, 38 | { 39 | "name": "buyer", 40 | "type": "string" 41 | }, 42 | { 43 | "name": "order_time", 44 | "type": "string" 45 | } 46 | ] 47 | } 48 | -------------------------------------------------------------------------------- /apps/java-datastream/kds-to-s3-datastream-java/src/main/resources/log4j2.properties: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | 19 | rootLogger.level = INFO 20 | rootLogger.appenderRef.console.ref = ConsoleAppender 21 | 22 | appender.console.name = ConsoleAppender 23 | appender.console.type = CONSOLE 24 | appender.console.layout.type = PatternLayout 25 | appender.console.layout.pattern = %d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n 26 | -------------------------------------------------------------------------------- /apps/java-datastream/kds-to-s3-datastream-java/test-create.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -x 4 | 5 | AWS_ACCOUNT_ID=$(aws sts get-caller-identity) | jq -r ".Account" 6 | AWS_REGION=$(aws configure get region) 7 | BUCKET_NAME="msf-blueprints-kds-to-s3-${AWS_ACCOUNT_ID}-${AWS_REGION}" 8 | APP_NAME=kds-to-s3-datastream-java 9 | JAR_FILE=$APP_NAME-1.0.1.jar 10 | 11 | # Create required S3 buckets if they don't already exist 12 | # Buckets are created in the default region. Use AWS_REGION 13 | # environment variable to change where they are created. 14 | aws s3api head-bucket --bucket $BUCKET_NAME --region $AWS_REGION >/dev/null 2>&1 15 | if [ $? -ne 0 ]; then 16 | aws s3api create-bucket --bucket $BUCKET_NAME --region $AWS_REGION --create-bucket-configuration LocationConstraint=$AWS_REGION | cat 17 | fi 18 | 19 | 20 | # After this point any failure should stop execution 21 | set -e 22 | 23 | # Build Flink app 24 | mvn clean 25 | mvn package 26 | 27 | # Build CFN 28 | cd cdk-infra 29 | cdk synth -j > ../target/$APP_NAME.json 30 | cd .. 31 | 32 | # Upload artifacts 33 | aws s3 cp target/$JAR_FILE s3://$BUCKET_NAME/$JAR_FILE 34 | 35 | # Create CFN stack 36 | aws cloudformation deploy --stack-name $APP_NAME --parameter-overrides "AppName=${APP_NAME}" "BucketName=${BUCKET_NAME}" "StreamName=${APP_NAME}" "RoleName=${APP_NAME}" "GlueDatabaseName=default" "CloudWatchLogGroupName=blueprints/msf/${APP_NAME}" "CloudWatchLogStreamName=log-stream-${APP_NAME}" "BootstrapStackName=test-script" --capabilities CAPABILITY_NAMED_IAM --template-file target/$APP_NAME.json 37 | -------------------------------------------------------------------------------- /apps/studio/README.md: -------------------------------------------------------------------------------- 1 | # Studio apps 2 | 3 | | Description | 4 | | --- | 5 | | [Reading from MSK Serverless using Studio](msk-to-studio/README.md) | 6 | 7 | 8 | -------------------------------------------------------------------------------- /apps/studio/msk-to-studio/README.md: -------------------------------------------------------------------------------- 1 | # MSK to Studio 2 | 3 | This blueprint deploys a Studio app that reads from MSK Serverless using IAM auth using the Table API: 4 | 5 | ![Arch diagram](img/msk-studio.png) 6 | 7 | ## Project details 8 | 9 | 1. Flink version: `1.15.2` 10 | 2. Python version: `3.8` 11 | 12 | ## Key components used 13 | 14 | 1. New (in Flink 1.13) `KafkaSource` connector (`FlinkKafkaSource` is slated to be deprecated). 15 | 16 | ## High-level deployment steps 17 | 18 | 1. Deploy associated infra (MSK and MSF Studio) using CDK script 19 | 2. Run Studio query to read from MSK topic 20 | 21 | ## Prerequisites 22 | 23 | 1. Maven 24 | 2. AWS SDK v2 25 | 2. AWS CDK v2 - for deploying associated infra (MSK and MSF app) 26 | 27 | ## Step-by-step deployment walkthrough 28 | 29 | 1. First, let's set up some environment variables to make the deployment easier. Replace these values with your own S3 bucket, app name, etc. 30 | 31 | ```bash 32 | export AWS_PROFILE=<> 33 | export APP_NAME=<> 34 | ``` 35 | 36 | 2. Follow instructions in the [`cdk-infra`](cdk-infra/README.md) folder to *deploy* the infrastructure associated with this app - such as MSK Serverless and the Managed Service for Apache Flink Studio application. 37 | 38 | 3. Start your Managed Service for Apache Flink Studio application from the AWS console. 39 | 40 | 4. Run Flink SQL query in Studio notebook to read from MSK topic. 41 | 42 | 43 | ## Launching via CloudFormation with pre-synthesized templates: 44 | 45 | 1. First, navigate to [the bootstrapping folder](/bootstrap-cdk/): `/bootstrap-cdk` and synthesize the template: `cdk synth` 46 | 2. Next, navigate to this blueprint's cdk-infra folder and type `cdk synth` to synthesize the template. 47 | 3. Finally, navigate to the root of the project. `/` 48 | 49 | #### Bootstrap your account (run in root of project and change variables accordingly) 50 | 51 | Copy and paste this command into a terminal in the root of the project to bootstrap the assets into your account. 52 | 53 | 54 | ```bash 55 | export timestampToLetters=$(date +%s) 56 | export BucketName=myblueprintdemoassets-${timestampToLetters} 57 | export BootstrapStackName=bootstrap-my-account-${timestampToLetters}-stack 58 | export BlueprintStackName=studio-demo-msk-studio-blueprint-${timestampToLetters}-stack 59 | export AppName=studio-demo-${timestampToLetters}-app 60 | export ClusterName=studio-demo-${timestampToLetters}-cluster 61 | export GlueDatabaseName=studio_demo_${timestampToLetters}_db 62 | export CloudWatchLogGroupName=blueprints/managed-flink/${AppName} 63 | export CloudWatchLogStreamName=managed-flink-log-stream 64 | export RoleName=studio-demo-${timestampToLetters}-role 65 | export RuntimeEnvironment=ZEPPELIN-FLINK-3_0 66 | 67 | aws cloudformation create-stack --template-body file://./bootstrap-cdk/cdk.out/BootstrapCdkStack.template.json --stack-name ${BootstrapStackName} --parameters ParameterKey=AssetBucket,ParameterValue=$BucketName ParameterKey=AssetList,ParameterValue="https://data-streaming-labs.s3.amazonaws.com/blueprint-test/aws-lambda-helpers-1.0.jar\,https://data-streaming-labs.s3.amazonaws.com/blueprint-test/CdkInfraKafkaToStudioStack.template.json\,https://data-streaming-labs.s3.amazonaws.com/blueprint-test/my-deployment.zip" --capabilities CAPABILITY_IAM 68 | ``` 69 | 70 | ### once bootstrapping finishes (in your AWS Console), then run next command from terminal: 71 | 72 | ```bash 73 | aws cloudformation create-stack --template-url https://${BucketName}.s3.amazonaws.com/CdkInfraKafkaToStudioStack.template.json --stack-name $BlueprintStackName --parameters ParameterKey=AppName,ParameterValue=$AppName ParameterKey=GlueDatabaseName,ParameterValue=$GlueDatabaseName ParameterKey=RuntimeEnvironment,ParameterValue=$RuntimeEnvironment ParameterKey=CloudWatchLogGroupName,ParameterValue=$CloudWatchLogGroupName ParameterKey=CloudWatchLogStreamName,ParameterValue=$CloudWatchLogStreamName ParameterKey=ClusterName,ParameterValue=$ClusterName ParameterKey=BucketName,ParameterValue=$BucketName ParameterKey=RoleName,ParameterValue=$RoleName ParameterKey=BootstrapStackName,ParameterValue=$BootstrapStackName --capabilities CAPABILITY_NAMED_IAM --disable-rollback 74 | ``` 75 | 76 | Now the blueprint will be launched in your account. -------------------------------------------------------------------------------- /apps/studio/msk-to-studio/cdk-infra/.gitignore: -------------------------------------------------------------------------------- 1 | *.js 2 | !jest.config.js 3 | *.d.ts 4 | node_modules 5 | 6 | # CDK asset staging directory 7 | .cdk.staging 8 | cdk.out 9 | -------------------------------------------------------------------------------- /apps/studio/msk-to-studio/cdk-infra/CFN-README.md: -------------------------------------------------------------------------------- 1 | # Steps for deploying CFN 2 | 3 | 1. Deploy `bootstrap-template.yaml` 4 | 2. Copy `zip` and `jar` files to shared S3 bucket 5 | 3. Deploy `out.yaml` w/ all params supplied. Note `lambdaZipsS3Bucket`: this is the name of the bucket where the zip and jar files are expected to be. -------------------------------------------------------------------------------- /apps/studio/msk-to-studio/cdk-infra/README.md: -------------------------------------------------------------------------------- 1 | # CDK Infrastructure associated with MSK Serverless to S3 MSF blueprint (Java) 2 | 3 | This CDK script deploys the following the components: 4 | 5 | 1. VPC for MSK Serverless and Managed Service for Apache Flinkapplication. 6 | 2. MSK Serverless. 7 | 3. Managed Service for Apache Flink Java DataStream API application. 8 | 4. IAM permissions for the role associated with the Managed Service for Apache Flink application. 9 | 10 | This CDK script expects you to supply the following *existing* resources: 11 | 12 | 1. S3 bucket where the application jar will be uploaded (`appBucket` below). 13 | 2. S3 bucket that will function as the sink (`appSinkBucket` below). 14 | 3. Glue database (`glueDatabaseName` below). 15 | 16 | ## CDK runtime context key/value pairs that need to be supplied 17 | 18 | Open up `cdk.json` and fill in appropriate values for each of these CDK context values: 19 | 20 | | Context value name | Purpose | Notes 21 | | --- | --- | --- | 22 | | `msfAppName` | The name of the Managed Service for Apache Flink application | MSF app *will be created* | 23 | | `appBucket` | The S3 bucket where the application payload will be stored | *Must be pre-existing* | 24 | | `appSinkBucket` | The bucket to which the MSK to S3 Flink app will write output files (in Parquet) | *Must be pre-existing* | 25 | | `runtimeEnvironment` | The Managed Service for Apache Flink runtime environment | For instance, `FLINK-1_15` | 26 | | `deployDataGen` | `true` if you want Zeppelin-based interactive MSF for data generation to be deployed; `false` otherwise | N/A | 27 | | `glueDatabaseName` | The AWS Glue database that will be used by MSF Studio datagen app | *Must be pre-existing* | 28 | | `msfLogGroup` | The name for the CloudWatch Log Group that will be linked to the MSF Flink app | Log group *will be created* | 29 | | `msfLogStream` | The name for the CloudWatch Log Stream that will be linked to the MSF Flink app | Log stream *will be created* | 30 | | `sourceMskClusterName` | The name for the source MSK Serverless cluster | MSK Serverless cluster *will be created* | 31 | 32 | For more information on CDK Runtime Context, please see [Runtime Context](https://docs.aws.amazon.com/cdk/v2/guide/context.html). 33 | 34 | 35 | ## Deploying the blueprint 36 | 37 | ``` 38 | cdk deploy 39 | ``` 40 | 41 | This will launch a CloudFormation Stack containing all the resources required for the blueprint. 42 | 43 | ## Generating a CloudFormation script using `cdk synth`: 44 | 45 | Instead of deploying directly, you could also generate an intermediate CFN script using the command below. 46 | 47 | ``` 48 | cdk synth 49 | ``` 50 | 51 | ## Deleting the blueprint 52 | 53 | To avoid ongoing charges, please make sure that you delete the blueprint and associated AWS resources using the following command. 54 | 55 | ``` 56 | cdk destroy 57 | ``` -------------------------------------------------------------------------------- /apps/studio/msk-to-studio/cdk-infra/bin/main.ts: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | /* 3 | * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | * Apache-2.0 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this 7 | * software and associated documentation files (the "Software"), to deal in the Software 8 | * without restriction, including without limitation the rights to use, copy, modify, 9 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 10 | * permit persons to whom the Software is furnished to do so. 11 | * 12 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 13 | * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 14 | * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 15 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 16 | * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 17 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 18 | */ 19 | 20 | import 'source-map-support/register'; 21 | import * as cdk from 'aws-cdk-lib'; 22 | import { CdkInfraKafkaToStudioStack } from '../lib/cdk-infra-kafka-to-studio-stack'; 23 | import { BootstraplessStackSynthesizer } from 'cdk-bootstrapless-synthesizer'; 24 | 25 | 26 | const app = new cdk.App(); 27 | 28 | const studioAppName = app.node.tryGetContext('studioAppName'); 29 | const glueDatabaseName = app.node.tryGetContext('glueDatabaseName'); 30 | const RuntimeEnvironment = app.node.tryGetContext('RuntimeEnvironment'); 31 | const msfLogGroup = app.node.tryGetContext('msfLogGroup'); 32 | const studioLogStream = app.node.tryGetContext('studioLogStream'); 33 | const mskClusterName = app.node.tryGetContext('mskClusterName'); 34 | const SourceTopicName = app.node.tryGetContext('SourceTopicName'); 35 | const blueprintName = "MSK_STUDIO"; 36 | 37 | // NOTE: We're not creating a bucket to hold the application jar; we 38 | // expect there to be a pre-existing bucket. You can modify this stack 39 | // to also create a bucket instead. 40 | // Same goes for the bucket that this app will be writing to. 41 | new CdkInfraKafkaToStudioStack(app, 'CdkInfraKafkaToStudioStack', { 42 | synthesizer: new BootstraplessStackSynthesizer({ 43 | templateBucketName: 'cfn-template-bucket', 44 | 45 | fileAssetBucketName: 'file-asset-bucket-${AWS::Region}', 46 | fileAssetRegionSet: ['us-west-1', 'us-west-2'], 47 | fileAssetPrefix: 'file-asset-prefix/latest/' 48 | }), 49 | studioAppName: studioAppName, 50 | glueDatabaseName: glueDatabaseName, 51 | RuntimeEnvironment: RuntimeEnvironment, 52 | msfLogGroup: msfLogGroup, 53 | studioLogStream: studioLogStream, 54 | mskClusterName: mskClusterName, 55 | SourceTopicName: SourceTopicName, 56 | blueprintName: blueprintName, 57 | }); -------------------------------------------------------------------------------- /apps/studio/msk-to-studio/cdk-infra/cdk.json: -------------------------------------------------------------------------------- 1 | { 2 | "app": "npx ts-node --prefer-ts-exts bin/main.ts", 3 | "watch": { 4 | "include": [ 5 | "**" 6 | ], 7 | "exclude": [ 8 | "README.md", 9 | "cdk*.json", 10 | "**/*.d.ts", 11 | "**/*.js", 12 | "tsconfig.json", 13 | "package*.json", 14 | "yarn.lock", 15 | "node_modules", 16 | "test" 17 | ] 18 | }, 19 | "context": { 20 | "@aws-cdk/aws-apigateway:usagePlanKeyOrderInsensitiveId": true, 21 | "@aws-cdk/core:stackRelativeExports": true, 22 | "@aws-cdk/aws-rds:lowercaseDbIdentifier": true, 23 | "@aws-cdk/aws-lambda:recognizeVersionProps": true, 24 | "@aws-cdk/aws-lambda:recognizeLayerVersion": true, 25 | "@aws-cdk/aws-cloudfront:defaultSecurityPolicyTLSv1.2_2021": true, 26 | "@aws-cdk-containers/ecs-service-extensions:enableDefaultLogDriver": true, 27 | "@aws-cdk/aws-ec2:uniqueImdsv2TemplateName": true, 28 | "@aws-cdk/core:checkSecretUsage": true, 29 | "@aws-cdk/aws-iam:minimizePolicies": true, 30 | "@aws-cdk/aws-ecs:arnFormatIncludesClusterName": true, 31 | "@aws-cdk/core:validateSnapshotRemovalPolicy": true, 32 | "@aws-cdk/aws-codepipeline:crossAccountKeyAliasStackSafeResourceName": true, 33 | "@aws-cdk/aws-s3:createDefaultLoggingPolicy": true, 34 | "@aws-cdk/aws-sns-subscriptions:restrictSqsDescryption": true, 35 | "@aws-cdk/aws-apigateway:disableCloudWatchRole": true, 36 | "@aws-cdk/core:enablePartitionLiterals": true, 37 | "@aws-cdk/core:target-partitions": [ 38 | "aws", 39 | "aws-cn" 40 | ], 41 | "studioAppName": "abc123", 42 | "runtimeEnvironment": "abc123", 43 | "glueDatabaseName": "abc123", 44 | "RuntimeEnvironment": "ZEPPELIN-FLINK-3_0", 45 | "msfLogGroup": "abc123", 46 | "studioLogStream": "abc123", 47 | "mskClusterName": "abc123", 48 | "SourceTopicName": "sourceTopic" 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /apps/studio/msk-to-studio/cdk-infra/jest.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | testEnvironment: 'node', 3 | roots: ['/test'], 4 | testMatch: ['**/*.test.ts'], 5 | transform: { 6 | '^.+\\.tsx?$': 'ts-jest' 7 | } 8 | }; 9 | -------------------------------------------------------------------------------- /apps/studio/msk-to-studio/cdk-infra/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "cdk-infra-msf-kafka-to-s3", 3 | "version": "0.1.0", 4 | "bin": { 5 | "cdk-infra-msf-kafka-to-s3": "bin/cdk-infra-msf-kafka-to-s3.js" 6 | }, 7 | "scripts": { 8 | "build": "tsc", 9 | "watch": "tsc -w", 10 | "test": "jest", 11 | "cdk": "cdk" 12 | }, 13 | "devDependencies": { 14 | "@types/jest": "29.5.1", 15 | "@types/node": "18.16.0", 16 | "@types/prettier": "2.7.2", 17 | "aws-cdk": "2.76.0", 18 | "jest": "29.5.0", 19 | "ts-jest": "29.1.0", 20 | "ts-node": "10.9.1", 21 | "typescript": "5.0.4" 22 | }, 23 | "dependencies": { 24 | "aws-cdk-lib": "2.85.0", 25 | "constructs": "10.2.4", 26 | "source-map-support": "0.5.21", 27 | "cdk-bootstrapless-synthesizer": "^2.3.2" 28 | 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /apps/studio/msk-to-studio/cdk-infra/test/cdk-infra-kda-kafka-to-s3.test.ts: -------------------------------------------------------------------------------- 1 | // import * as cdk from 'aws-cdk-lib'; 2 | // import { Template } from 'aws-cdk-lib/assertions'; 3 | 4 | // example test. To run these tests, uncomment this file along with the 5 | // example resource in lib/cdk-infra-msf-kafka-to-s3-stack.ts 6 | test('SQS Queue Created', () => { 7 | // const app = new cdk.App(); 8 | // // WHEN 9 | // // THEN 10 | // const template = Template.fromStack(stack); 11 | 12 | // template.hasResourceProperties('AWS::SQS::Queue', { 13 | // VisibilityTimeout: 300 14 | // }); 15 | }); 16 | -------------------------------------------------------------------------------- /apps/studio/msk-to-studio/cdk-infra/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2018", 4 | "module": "commonjs", 5 | "lib": [ 6 | "es2018" 7 | ], 8 | "declaration": true, 9 | "strict": true, 10 | "noImplicitAny": true, 11 | "strictNullChecks": true, 12 | "noImplicitThis": true, 13 | "alwaysStrict": true, 14 | "noUnusedLocals": false, 15 | "noUnusedParameters": false, 16 | "noImplicitReturns": true, 17 | "noFallthroughCasesInSwitch": false, 18 | "inlineSourceMap": true, 19 | "inlineSources": true, 20 | "experimentalDecorators": true, 21 | "strictPropertyInitialization": false, 22 | "typeRoots": [ 23 | "./node_modules/@types" 24 | ] 25 | }, 26 | "exclude": [ 27 | "node_modules", 28 | "cdk.out" 29 | ] 30 | } 31 | -------------------------------------------------------------------------------- /apps/studio/msk-to-studio/img/.$msk-studio.drawio.bkp: -------------------------------------------------------------------------------- 1 | 7VjbctowEP0aHpvxFchjbCBNk87Q0mnTJ4+wha0gW44swO7Xd2XLYGOH0A4PaScJjLVHq9VqL0fAwHTj/JajNPrMAkwHhhbkA3MyMAxdM4bwkEhRISPLrICQk6CCtAOwIL9wvVKhGxLgTGEVJBijgqRt0GdJgn3RwhDnbNdWWzEatIAUhbgDLHxEu+gPEoioQse2dsA/YhJG9c66pmZiVCsrIItQwHYNyJwOTJczJqpRnLuYyuC14zJ7YXbvGMeJOGeBvfz2dfb8ZTrTnc2dcEbPiWt9UFa2iG7Ugb/PXeWvKOogZGssfHkabWA6bCMoSbC7D7gEQ44CAo64jDIOWMISWOtEIqYg6TDcRUTgRYp8aXMHxQLYiiVCpVw3alltK61CyFI5jvNQVtcV2mXWVcjZJi23vIOk985629SXywVna1y7NDDM8ehavx7KjQilR65uMRcE8n5DSSitCiY3QUqieCWkRfCfJOFDKU1MTfnct0WAsggH6iDdXNWBh11x3oBU7m4xi7HgBaio2bEqo6It7g5FaY3GFRY1CtKwlCJSjRDuLR9qBQaqXP6gdIadMsEBtI4SGRcRC1mC6PSAOpCcJNhH5aDzwGS0yzp5wkIUqijQRrB2FeGciMfG+Kc0dWUraZIry6VQNIQ55gSOjflrCcnYhvv4xLENRUKIh1ic0LMqPRmTk+nlmCJBtm26uXiyjE6f30MPZyQDcIIEgscN5KGAFgBoSGWxLzmMQjn6hLZSQ7pQqS8ExyiWi+Z3p9kiZSQR5WlsB15gw63eNqi6Erky7B6wDxt1Qb2rBg+9b4djsA8bdUG9qyal2us22IeN7K7Hx6v1ntX60Wp4vUC+RxQE/zNZGh1Shrlry57MjMbchHAwREoiTWRDHrEjrLFvTM2x+/h0Vf4dk13NpA9oiemcZUSZXzIhWPwq1frglezTZtu/dlGgLK3CsSK59KP/5uC46u7q3nBA7LtB1lVjeAHUuYf2TXEREjfHwxaL68MujfexeI1dnBfsf5nEIQ+8eGwKDUtSPJgqpaIpXe5GMM+8EYw3dSOYnRvh8+IegEV13HdSfyf1/4jUYyDyEAdeVn5wgY/w3opxb41Wa3QZZt8z+VthdqvT3wvzL77YnVuG6lvUUZWZs/HQsDpVppTfXIGVvIb5dIsretNfKrrlxofIeTsiIo8tn8DKhT4gDI0zysi+SBmBePjxoZxr/IRjTn8D -------------------------------------------------------------------------------- /apps/studio/msk-to-studio/img/.$msk-studio.drawio.dtmp: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /apps/studio/msk-to-studio/img/msk-studio.drawio: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /apps/studio/msk-to-studio/img/msk-studio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/managed-service-for-apache-flink-blueprints/5c8da457d749d32069e99f2e64687613a6e099ad/apps/studio/msk-to-studio/img/msk-studio.png -------------------------------------------------------------------------------- /bootstrap-cdk/.gitignore: -------------------------------------------------------------------------------- 1 | *.js 2 | !jest.config.js 3 | *.d.ts 4 | node_modules 5 | 6 | # CDK asset staging directory 7 | .cdk.staging 8 | cdk.out 9 | -------------------------------------------------------------------------------- /bootstrap-cdk/.npmignore: -------------------------------------------------------------------------------- 1 | *.ts 2 | !*.d.ts 3 | 4 | # CDK asset staging directory 5 | .cdk.staging 6 | cdk.out 7 | -------------------------------------------------------------------------------- /bootstrap-cdk/README.md: -------------------------------------------------------------------------------- 1 | Run this command to launch this template: 2 | 3 | ```bash 4 | aws cloudformation create-stack --template-body file://./bootstrap-cdk/cdk.out/BootstrapCdkStack.template.json --stack-name bootstrap-my-account --parameters ParameterKey=AssetBucket,ParameterValue=myblueprintdemoassets12345 ParameterKey=AssetList,ParameterValue="https://data-streaming-labs.s3.amazonaws.com/blueprint-test/aws-lambda-helpers-1.0.jar\,https://data-streaming-labs.s3.amazonaws.com/blueprint-test/CdkInfraMsfKafkaToS3Stack.template.json\,https://data-streaming-labs.s3.amazonaws.com/blueprint-test/my-deployment.zip" --capabilities CAPABILITY_IAM 5 | ``` -------------------------------------------------------------------------------- /bootstrap-cdk/bin/main.ts: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | /* 3 | * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | * Apache-2.0 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this 7 | * software and associated documentation files (the "Software"), to deal in the Software 8 | * without restriction, including without limitation the rights to use, copy, modify, 9 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 10 | * permit persons to whom the Software is furnished to do so. 11 | * 12 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 13 | * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 14 | * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 15 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 16 | * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 17 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 18 | */ 19 | 20 | import 'source-map-support/register'; 21 | import * as cdk from 'aws-cdk-lib'; 22 | import { BootstrapCdkStack } from '../lib/bootstrap-cdk-stack'; 23 | import { BootstraplessStackSynthesizer } from 'cdk-bootstrapless-synthesizer'; 24 | 25 | 26 | 27 | const app = new cdk.App(); 28 | 29 | const AssetBucket = app.node.tryGetContext('AssetBucket'); 30 | 31 | // list of links (assets) to download 32 | const AssetList = app.node.tryGetContext('AssetList'); 33 | 34 | const stackName = app.node.tryGetContext('stackName') ? app.node.tryGetContext('stackName') : "BootstrapCdkStack" 35 | 36 | new BootstrapCdkStack(app, stackName, { 37 | synthesizer: new BootstraplessStackSynthesizer({ 38 | templateBucketName: 'cfn-template-bucket', 39 | 40 | fileAssetBucketName: 'file-asset-bucket-${AWS::Region}', 41 | fileAssetRegionSet: ['us-west-1', 'us-west-2'], 42 | fileAssetPrefix: 'file-asset-prefix/latest/' 43 | }), 44 | AssetBucket: AssetBucket, 45 | AssetList: AssetList, 46 | }); -------------------------------------------------------------------------------- /bootstrap-cdk/cdk.json: -------------------------------------------------------------------------------- 1 | { 2 | "app": "npx ts-node --prefer-ts-exts bin/main.ts", 3 | "watch": { 4 | "include": [ 5 | "**" 6 | ], 7 | "exclude": [ 8 | "README.md", 9 | "cdk*.json", 10 | "**/*.d.ts", 11 | "**/*.js", 12 | "tsconfig.json", 13 | "package*.json", 14 | "yarn.lock", 15 | "node_modules", 16 | "test" 17 | ] 18 | }, 19 | "context": { 20 | "@aws-cdk/aws-lambda:recognizeLayerVersion": true, 21 | "@aws-cdk/core:checkSecretUsage": true, 22 | "@aws-cdk/core:target-partitions": [ 23 | "aws", 24 | "aws-cn" 25 | ], 26 | "@aws-cdk-containers/ecs-service-extensions:enableDefaultLogDriver": true, 27 | "@aws-cdk/aws-ec2:uniqueImdsv2TemplateName": true, 28 | "@aws-cdk/aws-ecs:arnFormatIncludesClusterName": true, 29 | "@aws-cdk/aws-iam:minimizePolicies": true, 30 | "@aws-cdk/core:validateSnapshotRemovalPolicy": true, 31 | "@aws-cdk/aws-codepipeline:crossAccountKeyAliasStackSafeResourceName": true, 32 | "@aws-cdk/aws-s3:createDefaultLoggingPolicy": true, 33 | "@aws-cdk/aws-sns-subscriptions:restrictSqsDescryption": true, 34 | "@aws-cdk/aws-apigateway:disableCloudWatchRole": true, 35 | "@aws-cdk/core:enablePartitionLiterals": true, 36 | "@aws-cdk/aws-events:eventsTargetQueueSameAccount": true, 37 | "@aws-cdk/aws-iam:standardizedServicePrincipals": true, 38 | "@aws-cdk/aws-ecs:disableExplicitDeploymentControllerForCircuitBreaker": true, 39 | "@aws-cdk/aws-iam:importedRoleStackSafeDefaultPolicyName": true, 40 | "@aws-cdk/aws-s3:serverAccessLogsUseBucketPolicy": true, 41 | "@aws-cdk/aws-route53-patters:useCertificate": true, 42 | "@aws-cdk/customresources:installLatestAwsSdkDefault": false, 43 | "@aws-cdk/aws-rds:databaseProxyUniqueResourceName": true, 44 | "@aws-cdk/aws-codedeploy:removeAlarmsFromDeploymentGroup": true, 45 | "@aws-cdk/aws-apigateway:authorizerChangeDeploymentLogicalId": true, 46 | "@aws-cdk/aws-ec2:launchTemplateDefaultUserData": true, 47 | "@aws-cdk/aws-secretsmanager:useAttachedSecretResourcePolicyForSecretTargetAttachments": true, 48 | "@aws-cdk/aws-redshift:columnId": true, 49 | "@aws-cdk/aws-stepfunctions-tasks:enableEmrServicePolicyV2": true, 50 | "@aws-cdk/aws-ec2:restrictDefaultSecurityGroup": true, 51 | "@aws-cdk/aws-apigateway:requestValidatorUniqueId": true, 52 | "@aws-cdk/aws-kms:aliasNameRef": true, 53 | "@aws-cdk/core:includePrefixInUniqueNameGeneration": true 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /bootstrap-cdk/jest.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | testEnvironment: 'node', 3 | roots: ['/test'], 4 | testMatch: ['**/*.test.ts'], 5 | transform: { 6 | '^.+\\.tsx?$': 'ts-jest' 7 | } 8 | }; 9 | -------------------------------------------------------------------------------- /bootstrap-cdk/lib/bootstrap-cdk-stack.ts: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | * Apache-2.0 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this 7 | * software and associated documentation files (the "Software"), to deal in the Software 8 | * without restriction, including without limitation the rights to use, copy, modify, 9 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 10 | * permit persons to whom the Software is furnished to do so. 11 | * 12 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 13 | * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 14 | * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 15 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 16 | * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 17 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 18 | */ 19 | 20 | import * as cdk from 'aws-cdk-lib'; 21 | import { Construct } from 'constructs'; 22 | import { StackProps } from 'aws-cdk-lib'; 23 | import { CopyAssetsLambdaConstruct } from "../../cdk-infra/shared/lib/copy-assets-lambda-construct"; 24 | 25 | 26 | 27 | 28 | export interface GlobalProps extends StackProps { 29 | AssetBucket: string, 30 | AssetList: string, 31 | } 32 | 33 | 34 | 35 | export class BootstrapCdkStack extends cdk.Stack { 36 | constructor(scope: Construct, id: string, props?: GlobalProps) { 37 | super(scope, id, props); 38 | 39 | // we'll be generating a CFN script so we need CFN params 40 | let cfnParams = this.getParams(props); 41 | 42 | 43 | // this construct creates an S3 bucket then copies all the assets passed 44 | const copyAssetsLambdaFn = new CopyAssetsLambdaConstruct(this, 'CopyAssetsLambda', { 45 | account: this.account, 46 | region: this.region, 47 | AssetBucket: cfnParams.get("AssetBucket")!.valueAsString, 48 | AssetList: cfnParams.get("AssetList")!.valueAsString 49 | }); 50 | 51 | 52 | 53 | } // constructor 54 | 55 | 56 | 57 | getParams(props?: GlobalProps): Map { 58 | let params = new Map(); 59 | const AssetBucket = new cdk.CfnParameter(this, "AssetBucket", { 60 | type: "String", 61 | description: "The s3 bucket to create that will hold the CFN template script and assets." 62 | }); 63 | params.set("AssetBucket", AssetBucket); 64 | 65 | const AssetList = new cdk.CfnParameter(this, "AssetList", { 66 | type: "String", 67 | description: "The list of assets, comma separated, of all assets you wish to include in S3 bucket assets"}); 68 | 69 | params.set("AssetList", AssetList) 70 | return params; 71 | 72 | } 73 | 74 | } // class 75 | -------------------------------------------------------------------------------- /bootstrap-cdk/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "bootstrap-cdk", 3 | "version": "0.1.0", 4 | "bin": { 5 | "bootstrap-cdk": "bin/bootstrap-cdk.js" 6 | }, 7 | "scripts": { 8 | "build": "tsc", 9 | "watch": "tsc -w", 10 | "test": "jest", 11 | "cdk": "cdk" 12 | }, 13 | "devDependencies": { 14 | "@types/jest": "29.5.1", 15 | "@types/node": "18.16.0", 16 | "@types/prettier": "2.7.2", 17 | "aws-cdk": "2.76.0", 18 | "jest": "29.5.0", 19 | "ts-jest": "29.1.0", 20 | "ts-node": "10.9.1", 21 | "typescript": "5.0.4" 22 | }, 23 | "dependencies": { 24 | "aws-cdk-lib": "2.85.0", 25 | "constructs": "10.2.4", 26 | "source-map-support": "0.5.21", 27 | "cdk-bootstrapless-synthesizer": "^2.3.2" 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /bootstrap-cdk/test/bootstrap-cdk.test.ts: -------------------------------------------------------------------------------- 1 | // import * as cdk from 'aws-cdk-lib'; 2 | // import { Template } from 'aws-cdk-lib/assertions'; 3 | // import * as BootstrapCdk from '../lib/bootstrap-cdk-stack'; 4 | 5 | // example test. To run these tests, uncomment this file along with the 6 | // example resource in lib/bootstrap-cdk-stack.ts 7 | test('SQS Queue Created', () => { 8 | // const app = new cdk.App(); 9 | // // WHEN 10 | // const stack = new BootstrapCdk.BootstrapCdkStack(app, 'MyTestStack'); 11 | // // THEN 12 | // const template = Template.fromStack(stack); 13 | 14 | // template.hasResourceProperties('AWS::SQS::Queue', { 15 | // VisibilityTimeout: 300 16 | // }); 17 | }); 18 | -------------------------------------------------------------------------------- /bootstrap-cdk/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2020", 4 | "module": "commonjs", 5 | "lib": [ 6 | "es2020", 7 | "dom" 8 | ], 9 | "declaration": true, 10 | "strict": true, 11 | "noImplicitAny": true, 12 | "strictNullChecks": true, 13 | "noImplicitThis": true, 14 | "alwaysStrict": true, 15 | "noUnusedLocals": false, 16 | "noUnusedParameters": false, 17 | "noImplicitReturns": true, 18 | "noFallthroughCasesInSwitch": false, 19 | "inlineSourceMap": true, 20 | "inlineSources": true, 21 | "experimentalDecorators": true, 22 | "strictPropertyInitialization": false, 23 | "typeRoots": [ 24 | "./node_modules/@types" 25 | ] 26 | }, 27 | "exclude": [ 28 | "node_modules", 29 | "cdk.out" 30 | ] 31 | } 32 | -------------------------------------------------------------------------------- /cdk-infra/shared/README.md: -------------------------------------------------------------------------------- 1 | # Welcome to your CDK TypeScript project 2 | 3 | This is a blank project for CDK development with TypeScript. 4 | 5 | The `cdk.json` file tells the CDK Toolkit how to execute your app. 6 | 7 | ## Useful commands 8 | 9 | * `npm run build` compile typescript to js 10 | * `npm run watch` watch for changes and compile 11 | * `npm run test` perform the jest unit tests 12 | * `cdk deploy` deploy this stack to your default AWS account/region 13 | * `cdk diff` compare deployed stack with current state 14 | * `cdk synth` emits the synthesized CloudFormation template 15 | -------------------------------------------------------------------------------- /cdk-infra/shared/bin/main.d.ts: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | import 'source-map-support/register'; 3 | -------------------------------------------------------------------------------- /cdk-infra/shared/bin/main.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | "use strict"; 3 | Object.defineProperty(exports, "__esModule", { value: true }); 4 | require("source-map-support/register"); 5 | const cdk = require("aws-cdk-lib"); 6 | const app = new cdk.App(); 7 | //# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoibWFpbi5qcyIsInNvdXJjZVJvb3QiOiIiLCJzb3VyY2VzIjpbIm1haW4udHMiXSwibmFtZXMiOltdLCJtYXBwaW5ncyI6Ijs7O0FBQ0EsdUNBQXFDO0FBQ3JDLG1DQUFtQztBQUVuQyxNQUFNLEdBQUcsR0FBRyxJQUFJLEdBQUcsQ0FBQyxHQUFHLEVBQUUsQ0FBQyIsInNvdXJjZXNDb250ZW50IjpbIiMhL3Vzci9iaW4vZW52IG5vZGVcbmltcG9ydCAnc291cmNlLW1hcC1zdXBwb3J0L3JlZ2lzdGVyJztcbmltcG9ydCAqIGFzIGNkayBmcm9tICdhd3MtY2RrLWxpYic7XG5cbmNvbnN0IGFwcCA9IG5ldyBjZGsuQXBwKCk7Il19 -------------------------------------------------------------------------------- /cdk-infra/shared/bin/main.ts: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | /* 3 | * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | * Apache-2.0 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this 7 | * software and associated documentation files (the "Software"), to deal in the Software 8 | * without restriction, including without limitation the rights to use, copy, modify, 9 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 10 | * permit persons to whom the Software is furnished to do so. 11 | * 12 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 13 | * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 14 | * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 15 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 16 | * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 17 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 18 | */ 19 | 20 | import 'source-map-support/register'; 21 | import * as cdk from 'aws-cdk-lib'; 22 | 23 | const app = new cdk.App(); -------------------------------------------------------------------------------- /cdk-infra/shared/cdk.json: -------------------------------------------------------------------------------- 1 | { 2 | "app": "npx ts-node --prefer-ts-exts bin/main.ts", 3 | "watch": { 4 | "include": ["**"], 5 | "exclude": [ 6 | "README.md", 7 | "cdk*.json", 8 | "**/*.d.ts", 9 | "**/*.js", 10 | "tsconfig.json", 11 | "package*.json", 12 | "yarn.lock", 13 | "node_modules", 14 | "test" 15 | ] 16 | }, 17 | "context": { 18 | "@aws-cdk/aws-apigateway:usagePlanKeyOrderInsensitiveId": true, 19 | "@aws-cdk/core:stackRelativeExports": true, 20 | "@aws-cdk/aws-rds:lowercaseDbIdentifier": true, 21 | "@aws-cdk/aws-lambda:recognizeVersionProps": true, 22 | "@aws-cdk/aws-lambda:recognizeLayerVersion": true, 23 | "@aws-cdk/aws-cloudfront:defaultSecurityPolicyTLSv1.2_2021": true, 24 | "@aws-cdk-containers/ecs-service-extensions:enableDefaultLogDriver": true, 25 | "@aws-cdk/aws-ec2:uniqueImdsv2TemplateName": true, 26 | "@aws-cdk/core:checkSecretUsage": true, 27 | "@aws-cdk/aws-iam:minimizePolicies": true, 28 | "@aws-cdk/aws-ecs:arnFormatIncludesClusterName": true, 29 | "@aws-cdk/core:validateSnapshotRemovalPolicy": true, 30 | "@aws-cdk/aws-codepipeline:crossAccountKeyAliasStackSafeResourceName": true, 31 | "@aws-cdk/aws-s3:createDefaultLoggingPolicy": true, 32 | "@aws-cdk/aws-sns-subscriptions:restrictSqsDescryption": true, 33 | "@aws-cdk/aws-apigateway:disableCloudWatchRole": true, 34 | "@aws-cdk/core:enablePartitionLiterals": true, 35 | "@aws-cdk/core:target-partitions": ["aws", "aws-cn"] 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /cdk-infra/shared/jest.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | testEnvironment: 'node', 3 | roots: ['/test'], 4 | testMatch: ['**/*.test.ts'], 5 | transform: { 6 | '^.+\\.tsx?$': 'ts-jest' 7 | } 8 | }; 9 | -------------------------------------------------------------------------------- /cdk-infra/shared/lambda/aws-lambda-helpers/.gitignore: -------------------------------------------------------------------------------- 1 | # maven 2 | 3 | target/ 4 | pom.xml.tag 5 | pom.xml.releaseBackup 6 | pom.xml.versionsBackup 7 | pom.xml.next 8 | release.properties 9 | dependency-reduced-pom.xml 10 | buildNumber.properties 11 | .mvn/timing.properties 12 | # https://github.com/takari/maven-wrapper#usage-without-binary-jar 13 | .mvn/wrapper/maven-wrapper.jar 14 | 15 | # Eclipse m2e generated files 16 | # Eclipse Core 17 | .project 18 | # JDT-specific (Eclipse Java Development Tools) 19 | .classpath 20 | 21 | # intellij 22 | 23 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider 24 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 25 | 26 | # User-specific stuff 27 | .idea/**/workspace.xml 28 | .idea/**/tasks.xml 29 | .idea/**/usage.statistics.xml 30 | .idea/**/dictionaries 31 | .idea/**/shelf 32 | 33 | # AWS User-specific 34 | .idea/**/aws.xml 35 | 36 | # Generated files 37 | .idea/**/contentModel.xml 38 | 39 | # Sensitive or high-churn files 40 | .idea/**/dataSources/ 41 | .idea/**/dataSources.ids 42 | .idea/**/dataSources.local.xml 43 | .idea/**/sqlDataSources.xml 44 | .idea/**/dynamic.xml 45 | .idea/**/uiDesigner.xml 46 | .idea/**/dbnavigator.xml 47 | 48 | # Gradle 49 | .idea/**/gradle.xml 50 | .idea/**/libraries 51 | 52 | # Gradle and Maven with auto-import 53 | # When using Gradle or Maven with auto-import, you should exclude module files, 54 | # since they will be recreated, and may cause churn. Uncomment if using 55 | # auto-import. 56 | # .idea/artifacts 57 | # .idea/compiler.xml 58 | # .idea/jarRepositories.xml 59 | # .idea/modules.xml 60 | # .idea/*.iml 61 | # .idea/modules 62 | # *.iml 63 | # *.ipr 64 | 65 | # CMake 66 | cmake-build-*/ 67 | 68 | # Mongo Explorer plugin 69 | .idea/**/mongoSettings.xml 70 | 71 | # File-based project format 72 | *.iws 73 | 74 | # IntelliJ 75 | out/ 76 | 77 | # mpeltonen/sbt-idea plugin 78 | .idea_modules/ 79 | 80 | # JIRA plugin 81 | atlassian-ide-plugin.xml 82 | 83 | # Cursive Clojure plugin 84 | .idea/replstate.xml 85 | 86 | # SonarLint plugin 87 | .idea/sonarlint/ 88 | 89 | # Crashlytics plugin (for Android Studio and IntelliJ) 90 | com_crashlytics_export_strings.xml 91 | crashlytics.properties 92 | crashlytics-build.properties 93 | fabric.properties 94 | 95 | # Editor-based Rest Client 96 | .idea/httpRequests 97 | 98 | # Android studio 3.1+ serialized cache file 99 | .idea/caches/build_file_checksums.ser -------------------------------------------------------------------------------- /cdk-infra/shared/lambda/aws-lambda-helpers/README.md: -------------------------------------------------------------------------------- 1 | # Lambda code (in Java) for creating MSK topics 2 | 3 | This project contains Java code for creating topics against MSK, and is meant to be used in AWS Lambda functions. Note that you don't have to build this project or otherwise interact with it to use the CDK templates that reference the output of this project. With that said, here's the info for building this project and using it in the referencing CDK template. 4 | 5 | ## Building this project 6 | 7 | - Run `mvn clean package shade:shade` 8 | - Copy the uber jar named `aws-lambda-helpers-1.0.jar` from the `target` folder to the level above this project -------------------------------------------------------------------------------- /cdk-infra/shared/lambda/aws-lambda-helpers/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 4.0.0 6 | 7 | com.amazonaws 8 | aws-lambda-helpers 9 | 1.0 10 | 11 | aws-lambda-helpers 12 | 13 | 14 | UTF-8 15 | 1.11 16 | 11 17 | 1.11 18 | 1.11 19 | 2.7.2 20 | 21 | 22 | 23 | 24 | com.amazonaws 25 | aws-lambda-java-core 26 | 1.2.1 27 | 28 | 29 | org.apache.kafka 30 | kafka-clients 31 | ${kafka.clients.version} 32 | 33 | 34 | software.amazon.lambda 35 | powertools-cloudformation 36 | 1.15.0 37 | 38 | 39 | 40 | software.amazon.msk 41 | aws-msk-iam-auth 42 | 1.1.5 43 | 44 | 45 | com.google.code.gson 46 | gson 47 | 2.10.1 48 | 49 | 50 | junit 51 | junit 52 | 4.13.1 53 | test 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | org.apache.maven.plugins 63 | maven-compiler-plugin 64 | 3.8.0 65 | 66 | ${jdk.version} 67 | ${jdk.version} 68 | 69 | 70 | 71 | maven-surefire-plugin 72 | 2.22.1 73 | 74 | 75 | 76 | 77 | org.apache.maven.plugins 78 | maven-shade-plugin 79 | 3.1.1 80 | 81 | 82 | 83 | package 84 | 85 | shade 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 96 | *:* 97 | 98 | META-INF/*.SF 99 | META-INF/*.DSA 100 | META-INF/*.RSA 101 | 102 | 103 | 104 | 105 | 106 | com.amazonaws.App 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | -------------------------------------------------------------------------------- /cdk-infra/shared/lambda/aws-lambda-helpers/src/main/java/com/amazonaws/App.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | * Apache-2.0 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this 6 | * software and associated documentation files (the "Software"), to deal in the Software 7 | * without restriction, including without limitation the rights to use, copy, modify, 8 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 9 | * permit persons to whom the Software is furnished to do so. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 12 | * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 13 | * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 14 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 15 | * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 16 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 17 | */ 18 | 19 | package com.amazonaws; 20 | 21 | public class App { 22 | public static void main(String[] args) throws Exception { 23 | System.out.println("Starting program..."); 24 | 25 | String bootstrapServers = "boot-8vkw6qcx.c1.kafka-serverless.us-east-1.amazonaws.com:9098"; 26 | String topic = "sourceTopic"; 27 | MSKDataGen mskDataGen = new MSKDataGen(bootstrapServers, topic, 100); 28 | mskDataGen.run(); 29 | } 30 | } -------------------------------------------------------------------------------- /cdk-infra/shared/lambda/aws-lambda-helpers/src/main/java/com/amazonaws/DataPayload.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | * Apache-2.0 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this 6 | * software and associated documentation files (the "Software"), to deal in the Software 7 | * without restriction, including without limitation the rights to use, copy, modify, 8 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 9 | * permit persons to whom the Software is furnished to do so. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 12 | * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 13 | * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 14 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 15 | * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 16 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 17 | */ 18 | 19 | package com.amazonaws; 20 | 21 | import com.google.gson.Gson; 22 | 23 | /* 24 | The purpose of this hierarchy of classes is to ensure that we properly structure our response for 25 | CDK: {'Data': {'attributes': {'Response': }}} 26 | */ 27 | public class DataPayload { 28 | public AttributesPayload Data; 29 | 30 | public DataPayload() { 31 | } 32 | 33 | public DataPayload(String response) { 34 | this.Data = new AttributesPayload(response); 35 | } 36 | 37 | public String asJson() { 38 | Gson gson = new Gson(); 39 | return gson.toJson(this); 40 | } 41 | 42 | public static class AttributesPayload { 43 | public ResponsePayload attributes; 44 | 45 | public AttributesPayload(String response) { 46 | this.attributes = new ResponsePayload(response); 47 | } 48 | } 49 | 50 | public static class ResponsePayload { 51 | public ResponsePayload(String response) { 52 | this.Response = response; 53 | } 54 | 55 | public String Response; 56 | } 57 | } -------------------------------------------------------------------------------- /cdk-infra/shared/lambda/aws-lambda-helpers/src/main/java/com/amazonaws/MSKDataGenHandler.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | * Apache-2.0 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this 6 | * software and associated documentation files (the "Software"), to deal in the Software 7 | * without restriction, including without limitation the rights to use, copy, modify, 8 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 9 | * permit persons to whom the Software is furnished to do so. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 12 | * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 13 | * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 14 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 15 | * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 16 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 17 | */ 18 | 19 | package com.amazonaws; 20 | 21 | import com.amazonaws.services.lambda.runtime.Context; 22 | import com.amazonaws.services.lambda.runtime.LambdaLogger; 23 | import com.amazonaws.services.lambda.runtime.RequestHandler; 24 | import com.google.gson.Gson; 25 | import com.google.gson.GsonBuilder; 26 | import com.google.gson.reflect.TypeToken; 27 | 28 | import java.lang.reflect.Type; 29 | import java.util.Map; 30 | 31 | public class MSKDataGenHandler implements RequestHandler, String> { 32 | Gson gson = new GsonBuilder().setPrettyPrinting().create(); 33 | 34 | @Override 35 | public String handleRequest(Map event, Context context) { 36 | LambdaLogger logger = context.getLogger(); 37 | 38 | String response = new String("200 OK"); 39 | // log execution details 40 | logger.log("ENVIRONMENT VARIABLES: " + gson.toJson(System.getenv())); 41 | logger.log("CONTEXT: " + gson.toJson(context)); 42 | // process event 43 | logger.log("EVENT : " + gson.toJson(event) + "\n"); 44 | logger.log("EVENT TYPE: " + event.getClass().toString() + "\n"); 45 | 46 | try { 47 | String broker = event.get("Broker"); 48 | String topic = event.get("Topic"); 49 | 50 | logger.log("Broker: " + broker + "\n"); 51 | logger.log("Topic: " + topic + "\n"); 52 | 53 | MSKDataGen dataGen = new MSKDataGen(broker, topic); 54 | dataGen.run(); 55 | 56 | return new String("Success"); 57 | } catch (Exception ex) { 58 | logger.log(ex.getMessage()); 59 | return new String("Failed"); 60 | } 61 | } // handleRequest 62 | } // class MSKDataGenHandler -------------------------------------------------------------------------------- /cdk-infra/shared/lambda/aws-lambda-helpers/src/main/java/com/amazonaws/ResourceProperties.java: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | * Apache-2.0 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this 7 | * software and associated documentation files (the "Software"), to deal in the Software 8 | * without restriction, including without limitation the rights to use, copy, modify, 9 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 10 | * permit persons to whom the Software is furnished to do so. 11 | * 12 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 13 | * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 14 | * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 15 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 16 | * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 17 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 18 | */ 19 | 20 | package com.amazonaws; 21 | 22 | import java.util.Map; 23 | 24 | public class ResourceProperties { 25 | public String Broker; 26 | 27 | public String Topic; 28 | 29 | public int NumPartitions; 30 | 31 | public short ReplicationFactor; 32 | 33 | public static ResourceProperties fromMap(Map resourceProperties) { 34 | ResourceProperties retVal = new ResourceProperties(); 35 | 36 | retVal.Broker = resourceProperties.get("Broker").toString(); 37 | retVal.Topic = resourceProperties.get("Topic").toString(); 38 | retVal.NumPartitions = Integer.parseInt(resourceProperties.get("NumPartitions").toString()); 39 | retVal.ReplicationFactor = Short.parseShort(resourceProperties.get("ReplicationFactor").toString()); 40 | 41 | return retVal; 42 | } 43 | } -------------------------------------------------------------------------------- /cdk-infra/shared/lambda/aws-lambda-helpers/src/main/java/com/amazonaws/Stock.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | * Apache-2.0 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this 6 | * software and associated documentation files (the "Software"), to deal in the Software 7 | * without restriction, including without limitation the rights to use, copy, modify, 8 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 9 | * permit persons to whom the Software is furnished to do so. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 12 | * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 13 | * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 14 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 15 | * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 16 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 17 | */ 18 | 19 | package com.amazonaws; 20 | 21 | public class Stock { 22 | 23 | private String event_time; 24 | private String ticker; 25 | private float price; 26 | 27 | public void setEvent_time(String event_time) { 28 | this.event_time = event_time; 29 | } 30 | 31 | public void setTicker(String ticker) { 32 | this.ticker = ticker; 33 | } 34 | 35 | public void setPrice(float price) { 36 | this.price = price; 37 | } 38 | 39 | } 40 | -------------------------------------------------------------------------------- /cdk-infra/shared/lambda/aws-lambda-helpers/src/test/java/com/amazonaws/AppTest.java: -------------------------------------------------------------------------------- 1 | package com.amazonaws; 2 | 3 | import static org.junit.Assert.assertTrue; 4 | 5 | import org.junit.Test; 6 | 7 | /** 8 | * Unit test for simple App. 9 | */ 10 | public class AppTest 11 | { 12 | @Test 13 | public void gsonShouldSerializeResponse() { 14 | DataPayload dataPayload = new DataPayload("My Response"); 15 | String asJson = dataPayload.asJson(); 16 | assertTrue( asJson.equals("{\"Data\":{\"attributes\":{\"Response\":\"My Response\"}}}") ); 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /cdk-infra/shared/lib/.gitignore: -------------------------------------------------------------------------------- 1 | *.d.ts 2 | *.js 3 | -------------------------------------------------------------------------------- /cdk-infra/shared/lib/app-start-lambda-construct.ts: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | * Apache-2.0 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this 6 | * software and associated documentation files (the "Software"), to deal in the Software 7 | * without restriction, including without limitation the rights to use, copy, modify, 8 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 9 | * permit persons to whom the Software is furnished to do so. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 12 | * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 13 | * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 14 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 15 | * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 16 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 17 | */ 18 | 19 | import { readFileSync } from "fs"; 20 | import { StackProps } from 'aws-cdk-lib'; 21 | import * as cdk from 'aws-cdk-lib'; 22 | import { Construct } from 'constructs'; 23 | import * as ec2 from 'aws-cdk-lib/aws-ec2'; 24 | import * as iam from 'aws-cdk-lib/aws-iam'; 25 | import * as lambda from 'aws-cdk-lib/aws-lambda'; 26 | 27 | export interface AppStartLambdaConstructProps extends StackProps { 28 | account: string, 29 | region: string, 30 | appName: string, 31 | } 32 | 33 | export class AppStartLambdaConstruct extends Construct { 34 | public appStartLambdaFn: lambda.SingletonFunction; 35 | 36 | constructor(scope: Construct, id: string, props: AppStartLambdaConstructProps) { 37 | super(scope, id); 38 | 39 | 40 | // Run app start lambda 41 | this.appStartLambdaFn = new lambda.SingletonFunction(this, 'AppStartFunction', { 42 | uuid: '97e4f730-4ee1-11e8-3c2d-fa7ae01b6ebc', 43 | lambdaPurpose: "Start MSF Application", 44 | code: lambda.Code.fromInline(readFileSync(`${__dirname}/../../../python/lambda_msf_app_start.py`, "utf-8")), 45 | handler: "index.handler", 46 | initialPolicy: [ 47 | new iam.PolicyStatement( 48 | { 49 | actions: ['kinesisanalytics:DescribeApplication', 50 | 'kinesisanalytics:StartApplication',], 51 | 52 | resources: ['arn:aws:kinesisanalytics:' + props.region + ':' + props.account + ':application/' + props.appName] 53 | }) 54 | ], 55 | timeout: cdk.Duration.seconds(600), 56 | runtime: lambda.Runtime.PYTHON_3_9, 57 | memorySize: 1024, // need extra memory for kafka-client 58 | }); 59 | } 60 | } -------------------------------------------------------------------------------- /cdk-infra/shared/lib/copy-assets-lambda-construct.ts: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | * Apache-2.0 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this 6 | * software and associated documentation files (the "Software"), to deal in the Software 7 | * without restriction, including without limitation the rights to use, copy, modify, 8 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 9 | * permit persons to whom the Software is furnished to do so. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 12 | * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 13 | * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 14 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 15 | * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 16 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 17 | */ 18 | 19 | import { StackProps } from 'aws-cdk-lib'; 20 | import * as cdk from 'aws-cdk-lib'; 21 | import { Construct } from 'constructs'; 22 | import * as iam from 'aws-cdk-lib/aws-iam'; 23 | import * as lambda from 'aws-cdk-lib/aws-lambda'; 24 | import { aws_s3 as s3 } from 'aws-cdk-lib'; 25 | import { readFileSync } from "fs"; 26 | 27 | 28 | 29 | export interface CopyAssetsConstructProps extends StackProps { 30 | account: string, 31 | region: string, 32 | AssetBucket: string, 33 | AssetList: string, 34 | } 35 | 36 | export class CopyAssetsLambdaConstruct extends Construct { 37 | public copyAssetsLambdaFn: lambda.SingletonFunction; 38 | public s3_bucket: s3.Bucket; 39 | 40 | 41 | constructor(scope: Construct, id: string, props: CopyAssetsConstructProps) { 42 | super(scope, id); 43 | 44 | 45 | 46 | // app package s3 bucket 47 | this.s3_bucket = new s3.Bucket(this, 'AssetsS3Bucket', { 48 | bucketName: props.AssetBucket, 49 | encryption: s3.BucketEncryption.S3_MANAGED, 50 | enforceSSL: true, 51 | versioned: false, 52 | removalPolicy: cdk.RemovalPolicy.DESTROY, 53 | objectOwnership: s3.ObjectOwnership.BUCKET_OWNER_PREFERRED, 54 | 55 | }) 56 | 57 | 58 | // Run copy assets creation lambda 59 | this.copyAssetsLambdaFn = new lambda.SingletonFunction(this, 'CopyAssetsFunction', { 60 | uuid: '97e4f730-4ee1-11e8-3c2d-fa7ae01b6ebc', 61 | code: lambda.Code.fromInline(readFileSync(`${__dirname}/../../../python/lambda_copy_assets_to_s3.py`, "utf-8")), 62 | handler: "index.handler", 63 | initialPolicy: [ 64 | new iam.PolicyStatement( 65 | { 66 | actions: ["s3:PutObject", 67 | "s3:PutObjectAcl", 68 | "s3:GetObject", 69 | "s3:GetObjectAcl", 70 | "s3:DeleteObject", 71 | "s3:ListBucket", 72 | "s3:GetBucketLocation"], 73 | resources: ['arn:aws:s3:::' + props.AssetBucket + '/*', 74 | 'arn:aws:s3:::' + props.AssetBucket] 75 | }) 76 | ], 77 | timeout: cdk.Duration.seconds(300), 78 | runtime: lambda.Runtime.PYTHON_3_9, 79 | memorySize: 256, 80 | environment: { 81 | AssetList: props.AssetList, 82 | bucketName: props.AssetBucket, 83 | }, 84 | 85 | }); 86 | 87 | this.s3_bucket.grantPutAcl(this.copyAssetsLambdaFn); 88 | 89 | const resource = new cdk.CustomResource(this, 'CopyAssetsLambdaResource', { 90 | serviceToken: this.copyAssetsLambdaFn.functionArn 91 | }); 92 | 93 | resource.node.addDependency(this.copyAssetsLambdaFn); 94 | resource.node.addDependency(this.s3_bucket); 95 | } 96 | } -------------------------------------------------------------------------------- /cdk-infra/shared/lib/create-studio-app-lambda-construct.ts: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | * Apache-2.0 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this 6 | * software and associated documentation files (the "Software"), to deal in the Software 7 | * without restriction, including without limitation the rights to use, copy, modify, 8 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 9 | * permit persons to whom the Software is furnished to do so. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 12 | * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 13 | * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 14 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 15 | * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 16 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 17 | */ 18 | 19 | import { IResolvable, StackProps } from "aws-cdk-lib"; 20 | import * as cdk from 'aws-cdk-lib'; 21 | import { Construct } from 'constructs'; 22 | import * as ec2 from 'aws-cdk-lib/aws-ec2'; 23 | import * as iam from 'aws-cdk-lib/aws-iam'; 24 | import * as lambda from 'aws-cdk-lib/aws-lambda'; 25 | import { aws_logs as logs } from "aws-cdk-lib"; 26 | import * as kinesisanalyticsv2 from "aws-cdk-lib/aws-kinesisanalyticsv2"; 27 | import { readFileSync } from "fs"; 28 | 29 | 30 | 31 | 32 | export interface CreateStudioAppProps extends StackProps { 33 | account?: string; 34 | region?: string; 35 | vpc: ec2.Vpc | undefined | null, 36 | mskSG: ec2.SecurityGroup | undefined | null, 37 | logGroup: logs.LogGroup; 38 | logStream: logs.LogStream; 39 | msfAppName: string; 40 | glueDatabaseName: string; 41 | serviceExecutionRole: string; 42 | RuntimeEnvironment: string; 43 | bootstrapString: string; 44 | SourceTopicName: string; 45 | blueprintName: string; 46 | bootstrapStackName: string; 47 | } 48 | 49 | export class CreateStudioApp extends Construct { 50 | public createStudioAppFn: lambda.SingletonFunction; 51 | 52 | constructor(scope: Construct, id: string, props: CreateStudioAppProps) { 53 | super(scope, id); 54 | 55 | 56 | const stack = cdk.Stack.of(this); 57 | 58 | const subnet1 = props.vpc!.selectSubnets({ 59 | subnetType: ec2.SubnetType.PRIVATE_WITH_EGRESS}).subnetIds.at(0)!.toString(); 60 | 61 | // Run app creation lambda 62 | this.createStudioAppFn = new lambda.SingletonFunction(this, 'CreateStudioAppFn', { 63 | uuid: 'a0b1c0c0-bc70-44bb-a514-ff763aa4182f', 64 | lambdaPurpose: "Create MSF Studio Application", 65 | code: lambda.Code.fromInline(readFileSync(`${__dirname}/../../../python/lambda_create_studio_app.py`, "utf-8")), 66 | handler: "index.handler", 67 | initialPolicy: [ 68 | new iam.PolicyStatement( 69 | { 70 | actions: ["kinesisanalytics:DeleteApplicationVpcConfiguration", 71 | "iam:PassRole", 72 | "kinesisanalytics:DeleteApplication", 73 | "kinesisanalytics:DescribeApplication", 74 | "kinesisanalytics:StartApplication", 75 | "kinesisanalytics:CreateApplication"], 76 | resources: ['arn:aws:kinesisanalytics:' + props.region + ':' + props.account + ':application/' + props.msfAppName, 77 | props.serviceExecutionRole], 78 | conditions: { 79 | StringEqualsIfExists: { 80 | "iam:PassedToService": "kinesisanalytics.amazonaws.com", 81 | }, 82 | ArnEqualsIfExists: { 83 | "iam:AssociatedResourceARN": "arn:aws:kinesisanalytics:" + props.region + ":" + props.account + ":application/" + props.msfAppName 84 | } 85 | } 86 | }), 87 | 88 | ], 89 | timeout: cdk.Duration.seconds(300), 90 | runtime: lambda.Runtime.PYTHON_3_9, 91 | memorySize: 512, 92 | environment: { 93 | app_name: props.msfAppName, 94 | bootstrap_string: props.bootstrapString, 95 | execution_role: props.serviceExecutionRole, 96 | glue_db_arn: `arn:aws:glue:${props.region}:${props.account}:database/${props.glueDatabaseName}`, 97 | log_stream_arn: `arn:aws:logs:${props.region}` + 98 | `:${props.account}:log-group:` + 99 | `${props.logGroup.logGroupName}:log-stream:${props.logStream.logStreamName}`, 100 | security_group: props.mskSG!.securityGroupId, 101 | source_topic_name: props.SourceTopicName, 102 | subnet_1: subnet1, 103 | RuntimeEnvironment: props.RuntimeEnvironment, 104 | stackId: stack.stackId, 105 | blueprintName: props!.blueprintName, 106 | bootstrapStackName: props!.bootstrapStackName, 107 | }, 108 | }); 109 | } 110 | } -------------------------------------------------------------------------------- /cdk-infra/shared/lib/kda-construct.ts: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | * Apache-2.0 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this 6 | * software and associated documentation files (the "Software"), to deal in the Software 7 | * without restriction, including without limitation the rights to use, copy, modify, 8 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 9 | * permit persons to whom the Software is furnished to do so. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 12 | * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 13 | * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 14 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 15 | * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 16 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 17 | */ 18 | 19 | import { IResolvable, Stack, StackProps } from 'aws-cdk-lib'; 20 | import { Construct } from 'constructs'; 21 | import * as ec2 from 'aws-cdk-lib/aws-ec2'; 22 | import * as kinesisanalyticsv2 from 'aws-cdk-lib/aws-kinesisanalyticsv2'; 23 | import { aws_logs as logs } from 'aws-cdk-lib'; 24 | 25 | 26 | export interface MSFContructProps extends StackProps { 27 | account: string, 28 | region: string, 29 | vpc: ec2.Vpc | undefined | null, 30 | mskSG: ec2.SecurityGroup | undefined | null, 31 | logGroup: logs.LogGroup, 32 | logStream: logs.LogStream, 33 | msfAppName: string, 34 | appBucket: string, 35 | appFileKeyOnS3: string, 36 | runtimeEnvironment: string, 37 | serviceExecutionRole: string, 38 | flinkApplicationProperties: { [key: string]: string; } | undefined, 39 | pyFlinkRunOptions: { [key: string]: string; } | undefined | null, 40 | } 41 | 42 | export class MSFConstruct extends Construct { 43 | public cfnApplicationProps: kinesisanalyticsv2.CfnApplicationProps; 44 | public msfApp: kinesisanalyticsv2.CfnApplication; 45 | public cwlogsOption: kinesisanalyticsv2.CfnApplicationCloudWatchLoggingOption; 46 | 47 | constructor(scope: Construct, id: string, props: MSFContructProps) { 48 | super(scope, id); 49 | 50 | let propertyGroups = [ 51 | { 52 | propertyGroupId: "BlueprintMetadata", 53 | propertyMap: props.flinkApplicationProperties 54 | } 55 | ]; 56 | 57 | if(props!.pyFlinkRunOptions != null) { 58 | propertyGroups.push({ 59 | propertyGroupId: "kinesis.analytics.flink.run.options", 60 | propertyMap: props!.pyFlinkRunOptions 61 | }); 62 | } 63 | 64 | let vpcConfigurations = undefined as IResolvable | (IResolvable | kinesisanalyticsv2.CfnApplication.VpcConfigurationProperty)[] | undefined; 65 | if(props!.vpc != undefined && 66 | props!.mskSG != undefined) { 67 | vpcConfigurations = [ 68 | { 69 | subnetIds: props.vpc.selectSubnets({ 70 | subnetType: ec2.SubnetType.PRIVATE_WITH_EGRESS, 71 | }).subnetIds, 72 | securityGroupIds: [props.mskSG.securityGroupId] 73 | } 74 | ] 75 | } 76 | 77 | // application properties (actual app is below) 78 | this.cfnApplicationProps = { 79 | runtimeEnvironment: props.runtimeEnvironment, 80 | 81 | serviceExecutionRole: props.serviceExecutionRole, 82 | applicationName: props.msfAppName, 83 | 84 | applicationConfiguration: { 85 | flinkApplicationConfiguration: { 86 | checkpointConfiguration: { 87 | configurationType: 'CUSTOM', 88 | checkpointingEnabled: true, 89 | checkpointInterval: 60000, 90 | minPauseBetweenCheckpoints: 5000 91 | }, 92 | monitoringConfiguration: { 93 | configurationType: "CUSTOM", 94 | metricsLevel: "OPERATOR", 95 | logLevel: "INFO" 96 | }, 97 | parallelismConfiguration: { 98 | configurationType: "CUSTOM", 99 | parallelism: 2, 100 | parallelismPerKpu: 1, 101 | autoScalingEnabled: false 102 | } 103 | }, 104 | vpcConfigurations: vpcConfigurations, 105 | environmentProperties: { 106 | propertyGroups: propertyGroups 107 | }, 108 | applicationCodeConfiguration: { 109 | codeContent: { 110 | s3ContentLocation: { 111 | bucketArn: `arn:aws:s3:::${props.appBucket}`, 112 | fileKey: props.appFileKeyOnS3 113 | } 114 | }, 115 | codeContentType: "ZIPFILE" 116 | }, 117 | applicationSnapshotConfiguration: { 118 | snapshotsEnabled: false 119 | } 120 | } 121 | } 122 | 123 | // application 124 | this.msfApp = 125 | new kinesisanalyticsv2.CfnApplication(this, 'MSFApp', this.cfnApplicationProps); 126 | 127 | // https://docs.aws.amazon.com/AmazonCloudWatch/latest/logs/iam-access-control-overview-cwl.html 128 | const logStreamArn = `arn:aws:logs:${props.region}` + 129 | `:${props.account}:log-group:` + 130 | `${props.logGroup.logGroupName}:log-stream:${props.logStream.logStreamName}`; 131 | 132 | // cw logging config for app 133 | this.cwlogsOption = new kinesisanalyticsv2.CfnApplicationCloudWatchLoggingOption( 134 | this, 135 | 'MSFCWLogs', 136 | { 137 | applicationName: props.msfAppName, 138 | cloudWatchLoggingOption: { 139 | logStreamArn: logStreamArn 140 | } 141 | } 142 | ); 143 | 144 | this.cwlogsOption.addDependency(this.msfApp); 145 | 146 | } 147 | } -------------------------------------------------------------------------------- /cdk-infra/shared/lib/kds-datagen-lambda-construct.ts: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | * Apache-2.0 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this 6 | * software and associated documentation files (the "Software"), to deal in the Software 7 | * without restriction, including without limitation the rights to use, copy, modify, 8 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 9 | * permit persons to whom the Software is furnished to do so. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 12 | * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 13 | * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 14 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 15 | * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 16 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 17 | */ 18 | 19 | import {readFileSync} from 'fs'; 20 | import { StackProps } from 'aws-cdk-lib'; 21 | import * as cdk from 'aws-cdk-lib'; 22 | import { Construct } from 'constructs'; 23 | import * as iam from 'aws-cdk-lib/aws-iam'; 24 | import * as lambda from 'aws-cdk-lib/aws-lambda'; 25 | 26 | 27 | export interface KdsDataGenLambdaConstructProps extends StackProps { 28 | streamArn: string, 29 | numberOfItems: number, 30 | } 31 | 32 | export class KdsDataGenLambdaConstruct extends Construct { 33 | public kdsDataGenLambdaFn: lambda.SingletonFunction; 34 | 35 | constructor(scope: Construct, id: string, props: KdsDataGenLambdaConstructProps) { 36 | super(scope, id); 37 | 38 | // Run KDS DataGen Lambda 39 | this.kdsDataGenLambdaFn = new lambda.SingletonFunction(this, 'KdsDataGenFunction', { 40 | uuid: "e7e4ed0b-1438-4552-94ae-5edfb84ac21c", 41 | code: lambda.Code.fromInline(readFileSync(`${__dirname}/../../../python/lambda_kds_datagen.py`, "utf-8")), 42 | handler: "index.handler", 43 | initialPolicy: [ 44 | new iam.PolicyStatement( 45 | { 46 | actions: ["kinesis:PutRecord"], 47 | resources: [props.streamArn] 48 | }) 49 | ], 50 | timeout: cdk.Duration.seconds(300), 51 | runtime: lambda.Runtime.PYTHON_3_9, 52 | memorySize: 1024, 53 | }); 54 | 55 | const resource = new cdk.CustomResource(this, 'KdsDataGenResource', { 56 | serviceToken: this.kdsDataGenLambdaFn.functionArn, 57 | properties: { 58 | StreamArn: props.streamArn, 59 | NumberOfItems: props.numberOfItems 60 | } 61 | }); 62 | 63 | resource.node.addDependency(this.kdsDataGenLambdaFn); 64 | } 65 | } -------------------------------------------------------------------------------- /cdk-infra/shared/lib/msf-java-app-construct.ts: -------------------------------------------------------------------------------- 1 | import { readFileSync } from 'fs'; 2 | import { StackProps } from "aws-cdk-lib"; 3 | import { Construct } from "constructs"; 4 | import * as cdk from 'aws-cdk-lib'; 5 | import * as lambda from 'aws-cdk-lib/aws-lambda'; 6 | import * as iam from 'aws-cdk-lib/aws-iam'; 7 | 8 | export enum MsfRuntimeEnvironment { 9 | FLINK_1_11 = "FLINK-1_11", 10 | FLINK_1_13 = "FLINK-1_13", 11 | FLINK_1_15 = "FLINK-1_15", 12 | FLINK_1_18 = "FLINK-1_18", 13 | FLINK_1_19 = "FLINK-1_19", 14 | FLINK_1_6 = "FLINK-1_6", 15 | FLINK_1_8 = "FLINK-1_8", 16 | FLINK_1_20 = "FLINK-1_20", 17 | 18 | } 19 | export interface MsfJavaAppProps extends StackProps { 20 | account: string; 21 | region: string; 22 | partition: string; 23 | appName: string; 24 | runtimeEnvironment: string, 25 | serviceExecutionRole: string; 26 | bucketName: string; 27 | jarFile: string; 28 | logStreamName: string; 29 | logGroupName: string; 30 | subnets?: string[]; 31 | securityGroups?: string[]; 32 | parallelism?: Number; 33 | parallelismPerKpu?: Number; 34 | autoscalingEnabled?: Boolean; 35 | checkpointInterval?: Number; 36 | minPauseBetweenCheckpoints?: Number; 37 | applicationProperties?: Object; 38 | } 39 | 40 | // MsfJavaApp construct is used to create a new Java blueprint application. 41 | // This construct is used instead of official CDK construct because official 42 | // CDK construct does not support configuring CW logs during creation. 43 | // Configuring CW logs with official CDK construct results in an update 44 | // to the application which changes its initial version to 2. This is not 45 | // desired for blueprints functionality in AWS console. 46 | export class MsfJavaApp extends Construct { 47 | constructor(scope: Construct, id: string, props: MsfJavaAppProps) { 48 | super(scope, id); 49 | 50 | const fn = new lambda.SingletonFunction(this, 'MsfJavaAppCustomResourceHandler', { 51 | uuid: 'c4e1d42d-595a-4bd6-99e9-c299b61f2358', 52 | lambdaPurpose: "Deploy an MSF app created created with Java", 53 | code: lambda.Code.fromInline(readFileSync(`${__dirname}/../../../python/msf_java_app_custom_resource_handler.py`, "utf-8")), 54 | handler: "index.handler", 55 | initialPolicy: [ 56 | new iam.PolicyStatement( 57 | { 58 | actions: ['iam:PassRole'], 59 | resources: [props.serviceExecutionRole], 60 | conditions: { 61 | StringEqualsIfExists: { 62 | "iam:PassedToService": "kinesisanalytics.amazonaws.com" 63 | }, 64 | ArnEqualsIfExists: { 65 | "iam:AssociatedResourceARN": `arn:${props.partition}:kinesisanalytics:${props.region}:${props.account}:application/${props.appName}` 66 | } 67 | } 68 | }), 69 | ], 70 | timeout: cdk.Duration.seconds(360), 71 | runtime: lambda.Runtime.PYTHON_3_9, 72 | memorySize: 1024, 73 | }); 74 | 75 | fn.addToRolePolicy(new iam.PolicyStatement( 76 | { 77 | actions: [ 78 | 'kinesisanalytics:DescribeApplication', 79 | 'kinesisanalytics:CreateApplication', 80 | 'kinesisanalytics:DeleteApplication', 81 | ], 82 | resources: ['arn:aws:kinesisanalytics:' + props.region + ':' + props.account + ':application/' + props.appName] 83 | })); 84 | 85 | const defaultProps = { 86 | parallelism: 2, 87 | parallelismPerKpu: 1, 88 | autoscalingEnabled: false, 89 | checkpointInterval: 60000, 90 | minPauseBetweenCheckpoints: 5000, 91 | applicationProperties: {} 92 | }; 93 | 94 | props = { ...defaultProps, ...props }; 95 | 96 | const logStreamArn = `arn:${props.partition}:logs:${props.region}:${props.account}:log-group:${props.logGroupName}:log-stream:${props.logStreamName}`; 97 | const bucketArn = `arn:${props.partition}:s3:::${props.bucketName}`; 98 | new cdk.CustomResource(this, `MSFJavaApp${id}`, { 99 | serviceToken: fn.functionArn, 100 | properties: 101 | { 102 | AppName: props.appName, 103 | RuntimeEnvironment: props.runtimeEnvironment, 104 | ServiceExecutionRole: props.serviceExecutionRole, 105 | BucketArn: bucketArn, 106 | FileKey: props.jarFile, 107 | LogStreamArn: logStreamArn, 108 | Subnets: props.subnets, 109 | SecurityGroups: props.securityGroups, 110 | Parallelism: props.parallelism, 111 | ParallelismPerKpu: props.parallelismPerKpu, 112 | AutoscalingEnabled: props.autoscalingEnabled, 113 | CheckpointInterval: props.checkpointInterval, 114 | MinPauseBetweenCheckpoints: props.minPauseBetweenCheckpoints, 115 | ApplicationProperties: props.applicationProperties 116 | } 117 | }); 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /cdk-infra/shared/lib/msk-construct.ts: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | * Apache-2.0 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this 6 | * software and associated documentation files (the "Software"), to deal in the Software 7 | * without restriction, including without limitation the rights to use, copy, modify, 8 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 9 | * permit persons to whom the Software is furnished to do so. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 12 | * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 13 | * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 14 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 15 | * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 16 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 17 | */ 18 | 19 | import { CfnOutput, SecretValue, Stack, StackProps } from 'aws-cdk-lib'; 20 | import * as cdk from 'aws-cdk-lib'; 21 | import { Construct } from 'constructs'; 22 | import * as ec2 from 'aws-cdk-lib/aws-ec2'; 23 | import { aws_msk as msk } from 'aws-cdk-lib'; 24 | import * as cr from 'aws-cdk-lib/custom-resources'; 25 | 26 | export interface MSKContructProps extends StackProps { 27 | account: string, 28 | region: string, 29 | vpc: ec2.Vpc, 30 | clusterName: string, 31 | kafkaVersion: string, 32 | instanceType: string, 33 | mskSG: ec2.SecurityGroup, 34 | sshSG: ec2.SecurityGroup, 35 | } 36 | 37 | export class MSKContruct extends Construct { 38 | public cfnMskCluster: msk.CfnCluster; 39 | public cfnClusterArnOutput: CfnOutput; 40 | public bootstrapServersOutput: CfnOutput; 41 | 42 | constructor(scope: Construct, id: string, props: MSKContructProps) { 43 | super(scope, id); 44 | 45 | // msk cluster 46 | this.cfnMskCluster = new msk.CfnCluster(this, 'MSKCluster', { 47 | clusterName: props.clusterName, 48 | kafkaVersion: props.kafkaVersion, 49 | 50 | numberOfBrokerNodes: 3, 51 | 52 | // unauthenticated 53 | clientAuthentication: { 54 | unauthenticated: { 55 | enabled: true, 56 | }, 57 | }, 58 | 59 | encryptionInfo: { 60 | encryptionInTransit: { 61 | clientBroker: 'TLS_PLAINTEXT', 62 | inCluster: true, 63 | } 64 | }, 65 | 66 | brokerNodeGroupInfo: { 67 | instanceType: props.instanceType, 68 | clientSubnets: props.vpc.selectSubnets({ 69 | subnetType: ec2.SubnetType.PRIVATE_WITH_EGRESS, 70 | }).subnetIds, 71 | securityGroups: [props.mskSG.securityGroupId], 72 | storageInfo: { 73 | ebsStorageInfo: { 74 | volumeSize: 512, 75 | }, 76 | }, 77 | } // brokerNodeGroupInfo 78 | 79 | }); // CfnCluster 80 | 81 | // 👇 create an output for cluster ARN 82 | this.cfnClusterArnOutput = new cdk.CfnOutput(this, 'ClusterArnOutput', { 83 | value: this.cfnMskCluster.attrArn, 84 | description: 'The ARN of MSK cluster: ' + props!.clusterName, 85 | exportName: 'MSKClusterARN-' + props!.clusterName, 86 | }); 87 | 88 | this.cfnClusterArnOutput.node.addDependency(this.cfnMskCluster); 89 | 90 | // custom resource policy to get bootstrap brokers for our cluster 91 | const getBootstrapBrokers = new cr.AwsCustomResource(this, 'BootstrapBrokersLookup', { 92 | onUpdate: { // will also be called for a CREATE event 93 | service: 'Kafka', 94 | action: 'getBootstrapBrokers', 95 | parameters: { 96 | ClusterArn: this.cfnMskCluster.attrArn 97 | }, 98 | region: props.region, 99 | physicalResourceId: cr.PhysicalResourceId.of(Date.now().toString()) 100 | }, 101 | policy: cr.AwsCustomResourcePolicy.fromSdkCalls({ resources: cr.AwsCustomResourcePolicy.ANY_RESOURCE }) 102 | }); 103 | 104 | getBootstrapBrokers.node.addDependency(this.cfnMskCluster); 105 | 106 | // 👇 create an output for bootstrap servers 107 | this.bootstrapServersOutput = new cdk.CfnOutput(this, 'BootstrapServersOutput', { 108 | value: getBootstrapBrokers.getResponseField('BootstrapBrokerString'), 109 | description: 'List of bootstrap servers for our MSK cluster - ' + props!.clusterName, 110 | exportName: 'MSKBootstrapServers-' + props!.clusterName, 111 | }); 112 | 113 | this.bootstrapServersOutput.node.addDependency(getBootstrapBrokers); 114 | 115 | } // constructor 116 | } // class MSKConstruct -------------------------------------------------------------------------------- /cdk-infra/shared/lib/msk-get-bootstrap-broker-string.ts: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | * Apache-2.0 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this 6 | * software and associated documentation files (the "Software"), to deal in the Software 7 | * without restriction, including without limitation the rights to use, copy, modify, 8 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 9 | * permit persons to whom the Software is furnished to do so. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 12 | * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 13 | * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 14 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 15 | * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 16 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 17 | */ 18 | 19 | import { StackProps } from 'aws-cdk-lib'; 20 | import * as cdk from 'aws-cdk-lib'; 21 | import { Construct } from 'constructs'; 22 | import * as ec2 from 'aws-cdk-lib/aws-ec2'; 23 | import * as iam from 'aws-cdk-lib/aws-iam'; 24 | import * as lambda from 'aws-cdk-lib/aws-lambda'; 25 | 26 | export interface MSKGetBootstrapBrokerStringProps extends StackProps { 27 | mskClusterArn: string 28 | } 29 | 30 | export class MSKGetBootstrapBrokerStringConstruct extends Construct { 31 | public getBootstrapBrokerFn: lambda.SingletonFunction; 32 | 33 | constructor(scope: Construct, id: string, props: MSKGetBootstrapBrokerStringProps) { 34 | super(scope, id); 35 | 36 | // Run topic creation lambda 37 | this.getBootstrapBrokerFn = new lambda.SingletonFunction(this, 'MSKGetBootstrapBrokerStringFunction', { 38 | uuid: 'e28123c0-1b6b-11ee-be56-0242ac120002', 39 | lambdaPurpose: "GetBootstrapBrokerString", 40 | code: lambda.Code.fromInline(` 41 | import boto3 42 | import os 43 | import json 44 | import cfnresponse 45 | import datetime 46 | 47 | def handler(event, context): 48 | try: 49 | print("Received Event:" + json.dumps(event)) 50 | 51 | if(event["RequestType"] == "Create"): 52 | client = boto3.client('kafka') 53 | response = client.get_bootstrap_brokers( 54 | ClusterArn=os.environ['cluster_arn']) 55 | print(response["BootstrapBrokerStringSaslIam"]) 56 | cfnresponse.send(event, context, cfnresponse.SUCCESS, responseData={"BootstrapBrokerString": response["BootstrapBrokerStringSaslIam"]}) 57 | elif(event["RequestType"] == "Delete"): 58 | cfnresponse.send(event, context, cfnresponse.SUCCESS, responseData={"response": "successfully deleted custom resource"}) 59 | except Exception as err: 60 | print(err) 61 | cfnresponse.send(event, context, cfnresponse.FAILED, err) 62 | `), 63 | handler: "index.handler", 64 | initialPolicy: [ 65 | new iam.PolicyStatement( 66 | { 67 | actions: ['kafka:getBootstrapBrokers'], 68 | resources: ['*'] 69 | }) 70 | ], 71 | timeout: cdk.Duration.seconds(300), 72 | runtime: lambda.Runtime.PYTHON_3_9, 73 | memorySize: 256, 74 | environment: 75 | { 76 | cluster_arn: props.mskClusterArn 77 | } 78 | }); 79 | 80 | //deletes SGs and such before deleting lambda (with dependencies) 81 | const fixVpcDeletion = (handler: lambda.IFunction): void => { 82 | if (!handler.isBoundToVpc) { 83 | return 84 | } 85 | handler.connections.securityGroups.forEach(sg => { 86 | if (handler.role) { 87 | handler.role.node.children.forEach(child => { 88 | if ( 89 | child.node.defaultChild && 90 | (child.node.defaultChild as iam.CfnPolicy).cfnResourceType === 'AWS::IAM::Policy' 91 | ) { 92 | sg.node.addDependency(child); 93 | } 94 | }); 95 | } 96 | }); 97 | }; 98 | 99 | fixVpcDeletion(this.getBootstrapBrokerFn) 100 | } 101 | } -------------------------------------------------------------------------------- /cdk-infra/shared/lib/msk-topic-creation-lambda-construct.ts: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | * Apache-2.0 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this 6 | * software and associated documentation files (the "Software"), to deal in the Software 7 | * without restriction, including without limitation the rights to use, copy, modify, 8 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 9 | * permit persons to whom the Software is furnished to do so. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 12 | * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 13 | * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 14 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 15 | * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 16 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 17 | */ 18 | 19 | import { StackProps } from 'aws-cdk-lib'; 20 | import * as cdk from 'aws-cdk-lib'; 21 | import { Construct } from 'constructs'; 22 | import * as ec2 from 'aws-cdk-lib/aws-ec2'; 23 | import * as iam from 'aws-cdk-lib/aws-iam'; 24 | import * as lambda from 'aws-cdk-lib/aws-lambda'; 25 | import * as s3 from 'aws-cdk-lib/aws-s3' 26 | 27 | export interface TopicCreationLambdaConstructProps extends StackProps { 28 | account: string, 29 | region: string, 30 | vpc: ec2.Vpc, 31 | clusterNamesForPermission: string[], 32 | mskSG: ec2.SecurityGroup, 33 | bucketName: string, 34 | lambdaAssetLocation: string, 35 | } 36 | 37 | export class TopicCreationLambdaConstruct extends Construct { 38 | public onEventLambdaFn: lambda.SingletonFunction; 39 | 40 | 41 | 42 | constructor(scope: Construct, id: string, props: TopicCreationLambdaConstructProps) { 43 | super(scope, id); 44 | 45 | 46 | 47 | 48 | 49 | let mskResourcesForPolicy = []; 50 | for(let i = 0; i < props!.clusterNamesForPermission.length; i++) { 51 | let clusterResource = `arn:aws:kafka:${props!.region}:${props!.account}:cluster/${props!.clusterNamesForPermission[i]}/*`; 52 | let topicResource = `arn:aws:kafka:${props!.region}:${props!.account}:topic/${props!.clusterNamesForPermission[i]}/*`; 53 | mskResourcesForPolicy.push(clusterResource); 54 | mskResourcesForPolicy.push(topicResource); 55 | } 56 | 57 | const mskSecurityGroup = ec2.SecurityGroup.fromSecurityGroupId(this, 'existingMskSG', props.mskSG.securityGroupId, { 58 | mutable: false 59 | }); 60 | 61 | 62 | 63 | const lambdaIAMPolicy = new iam.PolicyStatement( 64 | { 65 | actions: ['kafka-cluster:Connect', 66 | 'kafka-cluster:CreateTopic', 67 | 'kafka-cluster:DescribeTopic', 68 | 'kafka-cluster:DeleteTopic', 69 | 'kafka-cluster:WriteData', 70 | 'kafka-cluster:ReadData', 71 | 'kafka-cluster:*Topic*',], 72 | resources: mskResourcesForPolicy 73 | }); 74 | 75 | 76 | const AssetBucketObject = s3.Bucket.fromBucketName( 77 | this, 78 | "lambda-assets-bucket", 79 | props.bucketName, 80 | ); 81 | 82 | // Run topic creation lambda 83 | this.onEventLambdaFn = new lambda.SingletonFunction(this, 'TopicCreationFunction', { 84 | uuid: 'f7d4f730-4ee1-11e8-9c2d-fa7ae01bbebc', 85 | lambdaPurpose: "Create MSK Topic", 86 | code: lambda.Code.fromBucket(AssetBucketObject, props.lambdaAssetLocation), 87 | handler: "com.amazonaws.TopicGenHandler", 88 | initialPolicy: [ 89 | lambdaIAMPolicy 90 | ], 91 | timeout: cdk.Duration.seconds(300), 92 | runtime: lambda.Runtime.JAVA_11, 93 | memorySize: 1024, // need extra memory for kafka-client 94 | vpc: props!.vpc, 95 | // 👇 place lambda in private subnet so 96 | // we can reach MSK broker 97 | vpcSubnets: { 98 | subnetType: ec2.SubnetType.PRIVATE_WITH_EGRESS, 99 | }, 100 | securityGroups: [mskSecurityGroup], 101 | }); 102 | 103 | 104 | 105 | } 106 | 107 | 108 | } -------------------------------------------------------------------------------- /cdk-infra/shared/lib/zeppelin-note-run-lambda-construct.ts: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | * Apache-2.0 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy of this 6 | * software and associated documentation files (the "Software"), to deal in the Software 7 | * without restriction, including without limitation the rights to use, copy, modify, 8 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 9 | * permit persons to whom the Software is furnished to do so. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 12 | * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 13 | * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 14 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 15 | * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 16 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 17 | */ 18 | import { StackProps } from 'aws-cdk-lib'; 19 | import * as cdk from 'aws-cdk-lib'; 20 | import { Construct } from 'constructs'; 21 | import * as iam from 'aws-cdk-lib/aws-iam'; 22 | import * as lambda from 'aws-cdk-lib/aws-lambda'; 23 | import * as s3 from 'aws-cdk-lib/aws-s3' 24 | 25 | export interface ZeppelinNoteRunConstructProps extends StackProps { 26 | account: string, 27 | region: string, 28 | codeBucket: string, 29 | codeKey: string, 30 | appName: string 31 | } 32 | 33 | export class ZeppelinNoteRunConstruct extends Construct { 34 | public zeppelinNoteRunFn: lambda.SingletonFunction; 35 | 36 | constructor(scope: Construct, id: string, props: ZeppelinNoteRunConstructProps) { 37 | super(scope, id); 38 | 39 | 40 | const AssetBucketObject = s3.Bucket.fromBucketName( 41 | this, 42 | "lambda-assets-bucket", 43 | props.codeBucket, 44 | ); 45 | 46 | // Run zeppelin note run lambda 47 | this.zeppelinNoteRunFn = new lambda.SingletonFunction(this, 'RunZeppelinNoteFunction', { 48 | uuid: '97e4f730-4ee1-11e8-3c2d-fa7ae01b6ebc', 49 | lambdaPurpose: "Run Zeppelin Note", 50 | code: lambda.Code.fromBucket(AssetBucketObject, props.codeKey), 51 | handler: "lambda_function.lambda_handler", 52 | initialPolicy: [ 53 | new iam.PolicyStatement( 54 | { 55 | actions: ['kinesisanalytics:CreateApplicationPresignedUrl',], 56 | resources: ['arn:aws:kinesisanalytics:' + props.region + ':' + props.account + ':application/' + props.appName] 57 | }) 58 | ], 59 | timeout: cdk.Duration.seconds(120), 60 | runtime: lambda.Runtime.PYTHON_3_9, 61 | memorySize: 256, 62 | environment: { 63 | AppName: props.appName 64 | } 65 | }); 66 | } 67 | } -------------------------------------------------------------------------------- /cdk-infra/shared/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "cdk-infra-msf-kafka-to-s3", 3 | "version": "0.1.0", 4 | "bin": { 5 | "cdk-infra-msf-kafka-to-s3": "bin/cdk-infra-msf-kafka-to-s3.js" 6 | }, 7 | "scripts": { 8 | "build": "tsc", 9 | "watch": "tsc -w", 10 | "test": "jest", 11 | "cdk": "cdk" 12 | }, 13 | "devDependencies": { 14 | "@types/jest": "29.5.1", 15 | "@types/node": "18.16.0", 16 | "@types/prettier": "2.7.2", 17 | "aws-cdk": "2.76.0", 18 | "jest": "29.5.0", 19 | "ts-jest": "29.1.0", 20 | "ts-node": "10.9.1", 21 | "typescript": "5.0.4" 22 | }, 23 | "dependencies": { 24 | "@aws-cdk/aws-glue": "1.199.0", 25 | "aws-cdk-lib": "2.85.0", 26 | "constructs": "10.2.4", 27 | "source-map-support": "0.5.21" 28 | 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /cdk-infra/shared/test/cdk-infra-msf-kafka-to-s3.test.d.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/managed-service-for-apache-flink-blueprints/5c8da457d749d32069e99f2e64687613a6e099ad/cdk-infra/shared/test/cdk-infra-msf-kafka-to-s3.test.d.ts -------------------------------------------------------------------------------- /cdk-infra/shared/test/cdk-infra-msf-kafka-to-s3.test.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | // import * as cdk from 'aws-cdk-lib'; 3 | // import { Template } from 'aws-cdk-lib/assertions'; 4 | // example test. To run these tests, uncomment this file along with the 5 | // example resource in lib/cdk-infra-msf-kafka-to-s3-stack.ts 6 | test('SQS Queue Created', () => { 7 | // const app = new cdk.App(); 8 | // // WHEN 9 | // // THEN 10 | // const template = Template.fromStack(stack); 11 | // template.hasResourceProperties('AWS::SQS::Queue', { 12 | // VisibilityTimeout: 300 13 | // }); 14 | }); 15 | //# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoiY2RrLWluZnJhLWtkYS1rYWZrYS10by1zMy50ZXN0LmpzIiwic291cmNlUm9vdCI6IiIsInNvdXJjZXMiOlsiY2RrLWluZnJhLWtkYS1rYWZrYS10by1zMy50ZXN0LnRzIl0sIm5hbWVzIjpbXSwibWFwcGluZ3MiOiI7QUFBQSxzQ0FBc0M7QUFDdEMscURBQXFEO0FBQ3JELGtGQUFrRjtBQUVsRix1RUFBdUU7QUFDdkUsNkRBQTZEO0FBQzdELElBQUksQ0FBQyxtQkFBbUIsRUFBRSxHQUFHLEVBQUU7SUFDL0IsK0JBQStCO0lBQy9CLGNBQWM7SUFDZCwwRkFBMEY7SUFDMUYsY0FBYztJQUNkLGdEQUFnRDtJQUVoRCx3REFBd0Q7SUFDeEQsNkJBQTZCO0lBQzdCLFFBQVE7QUFDUixDQUFDLENBQUMsQ0FBQyIsInNvdXJjZXNDb250ZW50IjpbIi8vIGltcG9ydCAqIGFzIGNkayBmcm9tICdhd3MtY2RrLWxpYic7XG4vLyBpbXBvcnQgeyBUZW1wbGF0ZSB9IGZyb20gJ2F3cy1jZGstbGliL2Fzc2VydGlvbnMnO1xuLy8gaW1wb3J0ICogYXMgQ2RrSW5mcmFLZGFLYWZrYVRvUzMgZnJvbSAnLi4vbGliL2Nkay1pbmZyYS1rZGEta2Fma2EtdG8tczMtc3RhY2snO1xuXG4vLyBleGFtcGxlIHRlc3QuIFRvIHJ1biB0aGVzZSB0ZXN0cywgdW5jb21tZW50IHRoaXMgZmlsZSBhbG9uZyB3aXRoIHRoZVxuLy8gZXhhbXBsZSByZXNvdXJjZSBpbiBsaWIvY2RrLWluZnJhLWtkYS1rYWZrYS10by1zMy1zdGFjay50c1xudGVzdCgnU1FTIFF1ZXVlIENyZWF0ZWQnLCAoKSA9PiB7XG4vLyAgIGNvbnN0IGFwcCA9IG5ldyBjZGsuQXBwKCk7XG4vLyAgICAgLy8gV0hFTlxuLy8gICBjb25zdCBzdGFjayA9IG5ldyBDZGtJbmZyYUtkYUthZmthVG9TMy5DZGtJbmZyYUtkYUthZmthVG9TM1N0YWNrKGFwcCwgJ015VGVzdFN0YWNrJyk7XG4vLyAgICAgLy8gVEhFTlxuLy8gICBjb25zdCB0ZW1wbGF0ZSA9IFRlbXBsYXRlLmZyb21TdGFjayhzdGFjayk7XG5cbi8vICAgdGVtcGxhdGUuaGFzUmVzb3VyY2VQcm9wZXJ0aWVzKCdBV1M6OlNRUzo6UXVldWUnLCB7XG4vLyAgICAgVmlzaWJpbGl0eVRpbWVvdXQ6IDMwMFxuLy8gICB9KTtcbn0pO1xuIl19 -------------------------------------------------------------------------------- /cdk-infra/shared/test/cdk-infra-msf-kafka-to-s3.test.ts: -------------------------------------------------------------------------------- 1 | // import * as cdk from 'aws-cdk-lib'; 2 | // import { Template } from 'aws-cdk-lib/assertions'; 3 | 4 | // example test. To run these tests, uncomment this file along with the 5 | // example resource in lib/cdk-infra-msf-kafka-to-s3-stack.ts 6 | test('SQS Queue Created', () => { 7 | // const app = new cdk.App(); 8 | // // WHEN 9 | // // THEN 10 | // const template = Template.fromStack(stack); 11 | 12 | // template.hasResourceProperties('AWS::SQS::Queue', { 13 | // VisibilityTimeout: 300 14 | // }); 15 | }); 16 | -------------------------------------------------------------------------------- /cdk-infra/shared/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2018", 4 | "module": "commonjs", 5 | "lib": [ 6 | "es2018" 7 | ], 8 | "declaration": true, 9 | "strict": true, 10 | "noImplicitAny": true, 11 | "strictNullChecks": true, 12 | "noImplicitThis": true, 13 | "alwaysStrict": true, 14 | "noUnusedLocals": false, 15 | "noUnusedParameters": false, 16 | "noImplicitReturns": true, 17 | "noFallthroughCasesInSwitch": false, 18 | "inlineSourceMap": true, 19 | "inlineSources": true, 20 | "experimentalDecorators": true, 21 | "strictPropertyInitialization": false, 22 | "typeRoots": [ 23 | "./node_modules/@types" 24 | ] 25 | }, 26 | "exclude": [ 27 | "node_modules", 28 | "cdk.out" 29 | ] 30 | } 31 | -------------------------------------------------------------------------------- /datagen/README.md: -------------------------------------------------------------------------------- 1 | # Datagen projects 2 | 3 | This folder contains various data generation applications referenced by blueprints. 4 | 5 | - [Orders DataGen - Simple MSF Studio Application that generates a stream of "orders"](orders-datagen/README.md) 6 | 7 | NOTE: If you're new to Managed Service for Apache Flink Studio, we recommend that you review the [Creating a Studio Notebook in Managed Service for Apache Flink Tutorial](https://docs.aws.amazon.com/kinesisanalytics/latest/java/example-notebook.html) first. 8 | -------------------------------------------------------------------------------- /datagen/orders-datagen/DATAGEN-KDS.md: -------------------------------------------------------------------------------- 1 | ### Create KDS streaming table into which data will be written 2 | 3 | Let's now create a table against our source MSK cluster that we'll write to from the datagen source. Run the following query in a notebook cell: 4 | 5 | IMPORTANT: Make sure that you replace the placeholders for the `topic` and `properties.bootstrap.servers` with your values. If you're performing data generation to try out one of the blueprints in this repo, then you can get these values from CloudFormation Outputs tab. 6 | 7 | ```SQL 8 | %flink.ssql 9 | 10 | -- IMPORTANT!!: Please replace 11 | -- <> with your source stream 12 | -- <> with your region 13 | 14 | DROP TABLE IF EXISTS orders_kds; 15 | 16 | CREATE TABLE orders_kds ( 17 | product_id BIGINT, 18 | order_number BIGINT, 19 | quantity INT, 20 | price DECIMAL(32, 2), 21 | buyer STRING, 22 | order_time TIMESTAMP(3) 23 | ) 24 | WITH ( 25 | 'connector'= 'kinesis', 26 | 'stream' = '<>', 27 | 'aws.region' = '<>', 28 | 'format' = 'json' 29 | ); 30 | ``` 31 | 32 | ### Run continuous data generation query 33 | 34 | Now let's run the continuous data generation using the following SQL statement in a new notebook cell: 35 | 36 | ```SQL 37 | %flink.ssql(parallelism=1) 38 | 39 | INSERT INTO orders_kds 40 | SELECT 41 | product_id, 42 | order_number, 43 | quantity, 44 | price, 45 | buyer, 46 | order_time 47 | FROM orders_datagen_source; 48 | ``` 49 | 50 | The above statement starts a continously running Flink job that populates synthetic data into your MSK cluster/topic. Any consumers reading from that topic will see data in the following format: 51 | 52 | ```json 53 | { 54 | 'product_id': 2343, 55 | 'order_number': 54, 56 | 'quantity': 4, 57 | 'price': 43.23, 58 | 'buyer': 'random_string', 59 | 'order_time': 23942349823 60 | } 61 | ``` 62 | 63 | ### (Optional) Query to view generated data 64 | 65 | We can view this stream of data using another query. Run the following in a new notebook cell: 66 | 67 | NOTE: The following query is *not* necessary for data generation. It's simply used here to valid that we're indeed generating data. 68 | 69 | ```SQL 70 | %flink.ssql(type=update, parallelism=1) 71 | 72 | select * from orders_kds; 73 | ``` -------------------------------------------------------------------------------- /datagen/orders-datagen/DATAGEN-MSK.md: -------------------------------------------------------------------------------- 1 | ### Create MSK streaming table into which data will be written 2 | 3 | Let's now create a table against our source MSK cluster that we'll write to from the datagen source. Run the following query in a notebook cell: 4 | 5 | IMPORTANT: Make sure that you replace the placeholders for the `topic` and `properties.bootstrap.servers` with your values. If you're performing data generation to try out one of the blueprints in this repo, then you can get these values from CloudFormation Outputs tab. 6 | 7 | ```SQL 8 | %flink.ssql 9 | 10 | -- IMPORTANT!!: Please replace 11 | -- <> with your source topic 12 | -- <> with your broker 13 | 14 | DROP TABLE IF EXISTS orders_msk; 15 | 16 | CREATE TABLE orders_msk ( 17 | product_id BIGINT, 18 | order_number BIGINT, 19 | quantity INT, 20 | price DECIMAL(32, 2), 21 | buyer STRING, 22 | order_time TIMESTAMP(3) 23 | ) 24 | WITH ( 25 | 'connector'= 'kafka', 26 | 'topic' = '<>', 27 | 'format' = 'json', 28 | 'scan.startup.mode' = 'earliest-offset', 29 | 'properties.bootstrap.servers' = '<>', 30 | 'properties.security.protocol' = 'SASL_SSL', 31 | 'properties.sasl.mechanism' = 'AWS_MSK_IAM', 32 | 'properties.sasl.jaas.config' = 'software.amazon.msk.auth.iam.IAMLoginModule required;', 33 | 'properties.sasl.client.callback.handler.class' = 'software.amazon.msk.auth.iam.IAMClientCallbackHandler' 34 | ); 35 | ``` 36 | 37 | ### Run continuous data generation query 38 | 39 | Now let's run the continuous data generation using the following SQL statement in a new notebook cell: 40 | 41 | ```SQL 42 | %flink.ssql(parallelism=1) 43 | 44 | INSERT INTO orders_msk 45 | SELECT 46 | product_id, 47 | order_number, 48 | quantity, 49 | price, 50 | buyer, 51 | order_time 52 | FROM orders_datagen_source; 53 | ``` 54 | 55 | The above statement starts a continously running Flink job that populates synthetic data into your MSK cluster/topic. Any consumers reading from that topic will see data in the following format: 56 | 57 | ```json 58 | { 59 | 'product_id': 2343, 60 | 'order_number': 54, 61 | 'quantity': 4, 62 | 'price': 43.23, 63 | 'buyer': 'random_string', 64 | 'order_time': 23942349823 65 | } 66 | ``` 67 | 68 | ### (Optional) Query to view generated data 69 | 70 | We can view this stream of data using another query. Run the following in a new notebook cell: 71 | 72 | NOTE: The following query is *not* necessary for data generation. It's simply used here to valid that we're indeed generating data. 73 | 74 | ```SQL 75 | %flink.ssql(type=update, parallelism=1) 76 | 77 | select * from orders_msk; 78 | ``` -------------------------------------------------------------------------------- /datagen/orders-datagen/kds-datagen.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "raw", 5 | "metadata": { 6 | "format": "text/plain" 7 | }, 8 | "source": [ 9 | "%flink\n", 10 | "\n", 11 | "import org.apache.kafka.clients.admin.AdminClient\n", 12 | "import org.apache.kafka.clients.admin.AdminClientConfig\n", 13 | "import java.util.Properties\n", 14 | "\n", 15 | "// replace with your brokers, etc...\n", 16 | "val bootstrapServers : String = \"boot-jh3g3srn.c3.kafka-serverless.us-east-2.amazonaws.com:9098\"\n", 17 | "var config = new Properties()\n", 18 | "config.put(AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers)\n", 19 | "config.put(\"security.protocol\", \"SASL_SSL\")\n", 20 | "config.put(\"sasl.mechanism\", \"AWS_MSK_IAM\")\n", 21 | "config.put(\"sasl.jaas.config\", \"software.amazon.msk.auth.iam.IAMLoginModule required;\")\n", 22 | "config.put(\"sasl.client.callback.handler.class\", \"software.amazon.msk.auth.iam.IAMClientCallbackHandler\")\n", 23 | "var admin = AdminClient.create(config)\n" 24 | ] 25 | }, 26 | { 27 | "cell_type": "raw", 28 | "metadata": { 29 | "format": "text/plain" 30 | }, 31 | "source": [ 32 | "%flink\n", 33 | "\n", 34 | "// list topics\n", 35 | "var topicListing = admin.listTopics().listings().get()" 36 | ] 37 | }, 38 | { 39 | "cell_type": "raw", 40 | "metadata": { 41 | "format": "text/plain" 42 | }, 43 | "source": [ 44 | "%flink\n", 45 | "\n", 46 | "import org.apache.kafka.clients.admin.NewTopic\n", 47 | "\n", 48 | "// 3 partitions and replication factor of 1\n", 49 | "var newTopic = new NewTopic(\"MyOrdersTopic\", 3, 1.toShort);\n", 50 | "admin.createTopics(Collections.singleton(newTopic));" 51 | ] 52 | }, 53 | { 54 | "cell_type": "raw", 55 | "metadata": { 56 | "format": "text/plain" 57 | }, 58 | "source": [ 59 | "%flink\n", 60 | "\n", 61 | "admin.deleteTopics(Collections.singleton(\"DatagenJsonTopic2\"))" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 4, 67 | "metadata": { 68 | "autoscroll": "auto" 69 | }, 70 | "outputs": [], 71 | "source": [ 72 | "%flink.ssql\n", 73 | "\n", 74 | "DROP TABLE IF EXISTS orders_datagen_source;\n", 75 | "\n", 76 | "CREATE TABLE orders_datagen_source (\n", 77 | " product_id BIGINT,\n", 78 | " order_number BIGINT,\n", 79 | " quantity INT,\n", 80 | " price_int INT,\n", 81 | " price AS CAST(price_int/100.0 AS DECIMAL(32, 2)),\n", 82 | " buyer STRING,\n", 83 | " order_time TIMESTAMP(3)\n", 84 | ")\n", 85 | "WITH (\n", 86 | " 'connector'= 'datagen',\n", 87 | " 'fields.product_id.min' = '1',\n", 88 | " 'fields.product_id.max' = '99999',\n", 89 | " 'fields.quantity.min' = '1',\n", 90 | " 'fields.quantity.max' = '25',\n", 91 | " 'fields.price_int.min' = '29',\n", 92 | " 'fields.price_int.max' = '99999999',\n", 93 | " 'fields.order_number.min' = '1',\n", 94 | " 'fields.order_number.max' = '9999999999',\n", 95 | " 'fields.buyer.length' = '15'\n", 96 | ");\n" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 5, 102 | "metadata": { 103 | "autoscroll": "auto" 104 | }, 105 | "outputs": [], 106 | "source": [ 107 | "%flink.ssql\n", 108 | "\n", 109 | "DROP TABLE IF EXISTS orders_msk;\n", 110 | "\n", 111 | "CREATE TABLE orders_msk (\n", 112 | " product_id BIGINT,\n", 113 | " order_number BIGINT,\n", 114 | " quantity INT,\n", 115 | " price DECIMAL(32, 2),\n", 116 | " buyer STRING,\n", 117 | " order_time TIMESTAMP(3)\n", 118 | ")\n", 119 | "WITH (\n", 120 | " 'connector'= 'kafka',\n", 121 | " 'topic' = 'MyOrdersTopic',\n", 122 | " 'format' = 'json',\n", 123 | " 'scan.startup.mode' = 'earliest-offset',\n", 124 | " 'properties.bootstrap.servers' = 'boot-jh3g3srn.c3.kafka-serverless.us-east-2.amazonaws.com:9098',\n", 125 | " 'properties.security.protocol' = 'SASL_SSL',\n", 126 | " 'properties.sasl.mechanism' = 'AWS_MSK_IAM',\n", 127 | " 'properties.sasl.jaas.config' = 'software.amazon.msk.auth.iam.IAMLoginModule required;',\n", 128 | " 'properties.sasl.client.callback.handler.class' = 'software.amazon.msk.auth.iam.IAMClientCallbackHandler'\n", 129 | ");\n" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 6, 135 | "metadata": { 136 | "autoscroll": "auto" 137 | }, 138 | "outputs": [], 139 | "source": [ 140 | "%flink.pyflink\n", 141 | "\n", 142 | "s_env.disable_operator_chaining()\n" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 7, 148 | "metadata": { 149 | "autoscroll": "auto" 150 | }, 151 | "outputs": [], 152 | "source": [ 153 | "%flink.ssql(parallelism=2)\n", 154 | "\n", 155 | "INSERT INTO orders_msk\n", 156 | "SELECT \n", 157 | " product_id,\n", 158 | " order_number,\n", 159 | " quantity,\n", 160 | " price,\n", 161 | " buyer,\n", 162 | " order_time\n", 163 | "FROM orders_datagen_source;" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": 8, 169 | "metadata": { 170 | "autoscroll": "auto" 171 | }, 172 | "outputs": [], 173 | "source": [ 174 | "%flink.ssql(type=update, parallelism=2)\n", 175 | "\n", 176 | "select * from orders_msk;" 177 | ] 178 | } 179 | ], 180 | "metadata": { 181 | "kernelspec": { 182 | "display_name": "Spark 2.0.0", 183 | "language": "python", 184 | "name": "spark2" 185 | }, 186 | "language_info": { 187 | "codemirror_mode": "text/python", 188 | "file_extension": ".py", 189 | "mimetype": "text/python", 190 | "name": "scala", 191 | "pygments_lexer": "python", 192 | "version": "3.6" 193 | } 194 | }, 195 | "nbformat": 4, 196 | "nbformat_minor": 2 197 | } 198 | -------------------------------------------------------------------------------- /datagen/orders-datagen/local/pyflink_datagen.py: -------------------------------------------------------------------------------- 1 | from pyflink.table import EnvironmentSettings, StreamTableEnvironment, TableEnvironment 2 | from pyflink.datastream import StreamExecutionEnvironment 3 | from pyflink.table import DataTypes 4 | from pyflink.table.udf import udf 5 | from pyflink.table.expressions import lit, col, call 6 | import os 7 | import pathlib 8 | from pathlib import Path 9 | import json 10 | 11 | 12 | env_settings = EnvironmentSettings \ 13 | .new_instance() \ 14 | .in_streaming_mode() \ 15 | .build() 16 | 17 | s_env = StreamExecutionEnvironment.get_execution_environment() 18 | table_env = StreamTableEnvironment.create(s_env, environment_settings=env_settings) 19 | 20 | APPLICATION_PROPERTIES_FILE_PATH = "/etc/flink/application_properties.json" # on msf 21 | 22 | is_local = ( 23 | # set this env var in your local environment 24 | True if os.environ.get("IS_LOCAL") else False 25 | ) 26 | 27 | 28 | 29 | if is_local: 30 | print("Running in local mode...") 31 | # only for local, overwrite variable to properties and pass in your jars delimited by a semicolon (;) 32 | APPLICATION_PROPERTIES_FILE_PATH = "application_properties.json" # local 33 | 34 | CURRENT_DIR = os.path.dirname(os.path.realpath(__file__)) 35 | pipeline_jars_var = "file://" + str(CURRENT_DIR) + "/lib/flink-sql-connector-kafka-1.15.2.jar" 36 | 37 | print(pipeline_jars_var) 38 | 39 | table_env.get_config().get_configuration().set_string( 40 | "pipeline.jars", 41 | pipeline_jars_var 42 | ) 43 | 44 | def get_application_properties(): 45 | if os.path.isfile(APPLICATION_PROPERTIES_FILE_PATH): 46 | with open(APPLICATION_PROPERTIES_FILE_PATH, "r") as file: 47 | contents = file.read() 48 | properties = json.loads(contents) 49 | return properties 50 | else: 51 | print('A file at "{}" was not found'.format(APPLICATION_PROPERTIES_FILE_PATH)) 52 | 53 | def property_map(props, property_group_id): 54 | for prop in props: 55 | if prop["PropertyGroupId"] == property_group_id: 56 | return prop["PropertyMap"] 57 | 58 | def kafka_dest_main(): 59 | 60 | s_env.disable_operator_chaining() 61 | 62 | table_env.execute_sql("DROP TABLE IF EXISTS sink_kafka") 63 | sink_ddl = f""" 64 | CREATE TABLE IF NOT EXISTS sink_kafka ( 65 | product_id BIGINT, 66 | order_number BIGINT, 67 | quantity INT, 68 | price DECIMAL(32,2), 69 | buyer STRING, 70 | order_time TIMESTAMP(3) 71 | ) 72 | WITH ( 73 | 'connector'= 'kafka', 74 | 'format' = 'json', 75 | 'topic' = 'DatagenTopic', 76 | 'properties.bootstrap.servers' = 'localhost:9092' 77 | ) 78 | """ 79 | 80 | table_env.execute_sql("DROP TABLE IF EXISTS datagen_source") 81 | source_ddl = f""" 82 | CREATE TABLE IF NOT EXISTS datagen_source ( 83 | product_id BIGINT, 84 | order_number BIGINT, 85 | quantity INT, 86 | price_int INT, 87 | price AS CAST(price_int/100.0 AS DECIMAL(32, 2)), 88 | buyer STRING, 89 | order_time TIMESTAMP(3) 90 | ) 91 | WITH ( 92 | 'connector'= 'datagen', 93 | 'fields.product_id.min' = '1', 94 | 'fields.product_id.max' = '99999', 95 | 'fields.quantity.min' = '1', 96 | 'fields.quantity.max' = '25', 97 | 'fields.price_int.min' = '29', 98 | 'fields.price_int.max' = '99999999', 99 | 'fields.order_number.min' = '1', 100 | 'fields.order_number.max' = '9999999999', 101 | 'fields.buyer.length' = '15' 102 | ) 103 | """ 104 | 105 | final_load_query = """ 106 | INSERT INTO sink_kafka 107 | SELECT 108 | product_id, 109 | order_number, 110 | quantity, 111 | price, 112 | buyer, 113 | order_time 114 | FROM datagen_source 115 | """ 116 | 117 | table_env.execute_sql(sink_ddl) 118 | table_env.execute_sql(source_ddl) 119 | 120 | exec_response = table_env.execute_sql(final_load_query) 121 | if is_local: 122 | exec_response.wait() 123 | 124 | 125 | def main(): 126 | kafka_dest_main() 127 | 128 | 129 | if __name__ == "__main__": 130 | main() -------------------------------------------------------------------------------- /datagen/orders-datagen/local/pyflink_kafkaread.py: -------------------------------------------------------------------------------- 1 | from pyflink.table import EnvironmentSettings, StreamTableEnvironment, TableEnvironment 2 | from pyflink.datastream import StreamExecutionEnvironment 3 | from pyflink.table import DataTypes 4 | from pyflink.table.udf import udf 5 | from pyflink.table.expressions import lit, col, call 6 | import os 7 | import pathlib 8 | from pathlib import Path 9 | import json 10 | 11 | 12 | env_settings = EnvironmentSettings \ 13 | .new_instance() \ 14 | .in_streaming_mode() \ 15 | .build() 16 | 17 | s_env = StreamExecutionEnvironment.get_execution_environment() 18 | table_env = StreamTableEnvironment.create(s_env, environment_settings=env_settings) 19 | 20 | APPLICATION_PROPERTIES_FILE_PATH = "/etc/flink/application_properties.json" # on msf 21 | 22 | is_local = ( 23 | # set this env var in your local environment 24 | True if os.environ.get("IS_LOCAL") else False 25 | ) 26 | 27 | if is_local: 28 | print("Running in local mode...") 29 | # only for local, overwrite variable to properties and pass in your jars delimited by a semicolon (;) 30 | APPLICATION_PROPERTIES_FILE_PATH = "application_properties.json" # local 31 | 32 | CURRENT_DIR = os.path.dirname(os.path.realpath(__file__)) 33 | pipeline_jars_var = "file://" + str(CURRENT_DIR) + "/lib/flink-sql-connector-kafka-1.15.2.jar" 34 | 35 | print(pipeline_jars_var) 36 | 37 | table_env.get_config().get_configuration().set_string( 38 | "pipeline.jars", 39 | pipeline_jars_var 40 | ) 41 | 42 | def get_application_properties(): 43 | if os.path.isfile(APPLICATION_PROPERTIES_FILE_PATH): 44 | with open(APPLICATION_PROPERTIES_FILE_PATH, "r") as file: 45 | contents = file.read() 46 | properties = json.loads(contents) 47 | return properties 48 | else: 49 | print('A file at "{}" was not found'.format(APPLICATION_PROPERTIES_FILE_PATH)) 50 | 51 | def property_map(props, property_group_id): 52 | for prop in props: 53 | if prop["PropertyGroupId"] == property_group_id: 54 | return prop["PropertyMap"] 55 | 56 | def kafka_source_main(): 57 | 58 | s_env.disable_operator_chaining() 59 | 60 | table_env.execute_sql("DROP TABLE IF EXISTS source_kafka") 61 | source_ddl = f""" 62 | CREATE TABLE IF NOT EXISTS source_kafka ( 63 | product_id BIGINT, 64 | order_number BIGINT, 65 | quantity INT, 66 | price DECIMAL(32,2), 67 | buyer STRING, 68 | order_time TIMESTAMP(3) 69 | ) 70 | WITH ( 71 | 'connector'= 'kafka', 72 | 'format' = 'json', 73 | 'topic' = 'DatagenTopic', 74 | 'scan.startup.mode' = 'earliest-offset', 75 | 'properties.bootstrap.servers' = 'localhost:9092' 76 | ) 77 | """ 78 | 79 | table_env.execute_sql("DROP TABLE IF EXISTS sink_print") 80 | sink_print_ddl = f""" 81 | CREATE TABLE IF NOT EXISTS sink_print 82 | WITH ( 83 | 'connector'= 'print' 84 | ) 85 | LIKE source_kafka (EXCLUDING ALL) 86 | """ 87 | 88 | print_query = """ 89 | INSERT INTO sink_print 90 | SELECT * 91 | FROM source_kafka 92 | """ 93 | 94 | table_env.execute_sql(source_ddl) 95 | table_env.execute_sql(sink_print_ddl) 96 | 97 | exec_response = table_env.execute_sql(print_query) 98 | if is_local: 99 | exec_response.wait() 100 | 101 | 102 | def main(): 103 | kafka_source_main() 104 | 105 | 106 | if __name__ == "__main__": 107 | main() -------------------------------------------------------------------------------- /datagen/orders-datagen/msk-iam-datagen.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "raw", 5 | "metadata": { 6 | "format": "text/plain" 7 | }, 8 | "source": [ 9 | "%flink\n", 10 | "\n", 11 | "import org.apache.kafka.clients.admin.AdminClient\n", 12 | "import org.apache.kafka.clients.admin.AdminClientConfig\n", 13 | "import java.util.Properties\n", 14 | "\n", 15 | "// replace with your brokers, etc...\n", 16 | "val bootstrapServers : String = \"boot-jh3g3srn.c3.kafka-serverless.us-east-2.amazonaws.com:9098\"\n", 17 | "var config = new Properties()\n", 18 | "config.put(AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers)\n", 19 | "config.put(\"security.protocol\", \"SASL_SSL\")\n", 20 | "config.put(\"sasl.mechanism\", \"AWS_MSK_IAM\")\n", 21 | "config.put(\"sasl.jaas.config\", \"software.amazon.msk.auth.iam.IAMLoginModule required;\")\n", 22 | "config.put(\"sasl.client.callback.handler.class\", \"software.amazon.msk.auth.iam.IAMClientCallbackHandler\")\n", 23 | "var admin = AdminClient.create(config)\n" 24 | ] 25 | }, 26 | { 27 | "cell_type": "raw", 28 | "metadata": { 29 | "format": "text/plain" 30 | }, 31 | "source": [ 32 | "%flink\n", 33 | "\n", 34 | "// list topics\n", 35 | "var topicListing = admin.listTopics().listings().get()" 36 | ] 37 | }, 38 | { 39 | "cell_type": "raw", 40 | "metadata": { 41 | "format": "text/plain" 42 | }, 43 | "source": [ 44 | "%flink\n", 45 | "\n", 46 | "import org.apache.kafka.clients.admin.NewTopic\n", 47 | "\n", 48 | "// 3 partitions and replication factor of 1\n", 49 | "var newTopic = new NewTopic(\"MyOrdersTopic\", 3, 1.toShort);\n", 50 | "admin.createTopics(Collections.singleton(newTopic));" 51 | ] 52 | }, 53 | { 54 | "cell_type": "raw", 55 | "metadata": { 56 | "format": "text/plain" 57 | }, 58 | "source": [ 59 | "%flink\n", 60 | "\n", 61 | "admin.deleteTopics(Collections.singleton(\"DatagenJsonTopic2\"))" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 4, 67 | "metadata": { 68 | "autoscroll": "auto" 69 | }, 70 | "outputs": [], 71 | "source": [ 72 | "%flink.ssql\n", 73 | "\n", 74 | "DROP TABLE IF EXISTS orders_datagen_source;\n", 75 | "\n", 76 | "CREATE TABLE orders_datagen_source (\n", 77 | " product_id BIGINT,\n", 78 | " order_number BIGINT,\n", 79 | " quantity INT,\n", 80 | " price_int INT,\n", 81 | " price AS CAST(price_int/100.0 AS DECIMAL(32, 2)),\n", 82 | " buyer STRING,\n", 83 | " order_time TIMESTAMP(3)\n", 84 | ")\n", 85 | "WITH (\n", 86 | " 'connector'= 'datagen',\n", 87 | " 'fields.product_id.min' = '1',\n", 88 | " 'fields.product_id.max' = '99999',\n", 89 | " 'fields.quantity.min' = '1',\n", 90 | " 'fields.quantity.max' = '25',\n", 91 | " 'fields.price_int.min' = '29',\n", 92 | " 'fields.price_int.max' = '99999999',\n", 93 | " 'fields.order_number.min' = '1',\n", 94 | " 'fields.order_number.max' = '9999999999',\n", 95 | " 'fields.buyer.length' = '15'\n", 96 | ");\n" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 5, 102 | "metadata": { 103 | "autoscroll": "auto" 104 | }, 105 | "outputs": [], 106 | "source": [ 107 | "%flink.ssql\n", 108 | "\n", 109 | "DROP TABLE IF EXISTS orders_msk;\n", 110 | "\n", 111 | "CREATE TABLE orders_msk (\n", 112 | " product_id BIGINT,\n", 113 | " order_number BIGINT,\n", 114 | " quantity INT,\n", 115 | " price DECIMAL(32, 2),\n", 116 | " buyer STRING,\n", 117 | " order_time TIMESTAMP(3)\n", 118 | ")\n", 119 | "WITH (\n", 120 | " 'connector'= 'kafka',\n", 121 | " 'topic' = 'MyOrdersTopic',\n", 122 | " 'format' = 'json',\n", 123 | " 'scan.startup.mode' = 'earliest-offset',\n", 124 | " 'properties.bootstrap.servers' = 'boot-jh3g3srn.c3.kafka-serverless.us-east-2.amazonaws.com:9098',\n", 125 | " 'properties.security.protocol' = 'SASL_SSL',\n", 126 | " 'properties.sasl.mechanism' = 'AWS_MSK_IAM',\n", 127 | " 'properties.sasl.jaas.config' = 'software.amazon.msk.auth.iam.IAMLoginModule required;',\n", 128 | " 'properties.sasl.client.callback.handler.class' = 'software.amazon.msk.auth.iam.IAMClientCallbackHandler'\n", 129 | ");\n" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 6, 135 | "metadata": { 136 | "autoscroll": "auto" 137 | }, 138 | "outputs": [], 139 | "source": [ 140 | "%flink.pyflink\n", 141 | "\n", 142 | "s_env.disable_operator_chaining()\n" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 7, 148 | "metadata": { 149 | "autoscroll": "auto" 150 | }, 151 | "outputs": [], 152 | "source": [ 153 | "%flink.ssql(parallelism=2)\n", 154 | "\n", 155 | "INSERT INTO orders_msk\n", 156 | "SELECT \n", 157 | " product_id,\n", 158 | " order_number,\n", 159 | " quantity,\n", 160 | " price,\n", 161 | " buyer,\n", 162 | " order_time\n", 163 | "FROM orders_datagen_source;" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": 8, 169 | "metadata": { 170 | "autoscroll": "auto" 171 | }, 172 | "outputs": [], 173 | "source": [ 174 | "%flink.ssql(type=update, parallelism=2)\n", 175 | "\n", 176 | "select * from orders_msk;" 177 | ] 178 | } 179 | ], 180 | "metadata": { 181 | "kernelspec": { 182 | "display_name": "Spark 2.0.0", 183 | "language": "python", 184 | "name": "spark2" 185 | }, 186 | "language_info": { 187 | "codemirror_mode": "text/python", 188 | "file_extension": ".py", 189 | "mimetype": "text/python", 190 | "name": "scala", 191 | "pygments_lexer": "python", 192 | "version": "3.6" 193 | } 194 | }, 195 | "nbformat": 4, 196 | "nbformat_minor": 2 197 | } 198 | -------------------------------------------------------------------------------- /datagen/stock-ticker-datagen/kds-datagen.zpln: -------------------------------------------------------------------------------- 1 | { 2 | "paragraphs": [ 3 | { 4 | "text": "%flink.ssql\nDROP TABLE IF EXISTS stock_price_source;\n\nCREATE TABLE stock_price_source (\n ticker VARCHAR(6),\n price DOUBLE,\n event_time TIMESTAMP(3)\n)\nPARTITIONED BY (ticker)\nWITH (\n 'connector' = 'datagen',\n 'fields.ticker.length' = '4',\n 'fields.price.min' = '0.50',\n 'fields.price.max' = '9999'\n) ", 5 | "user": "anonymous", 6 | "dateUpdated": "2023-07-20T08:18:06+0000", 7 | "progress": 0, 8 | "config": { 9 | "editorSetting": { 10 | "language": "sql", 11 | "editOnDblClick": false, 12 | "completionKey": "TAB", 13 | "completionSupport": true 14 | }, 15 | "colWidth": 12, 16 | "editorMode": "ace/mode/sql", 17 | "fontSize": 9, 18 | "results": {}, 19 | "enabled": true 20 | }, 21 | "settings": { 22 | "params": {}, 23 | "forms": {} 24 | }, 25 | "results": {}, 26 | "apps": [], 27 | "runtimeInfos": {}, 28 | "progressUpdateIntervalMs": 500, 29 | "jobName": "paragraph_1689840936418_1983694021", 30 | "id": "paragraph_1689840936418_1983694021", 31 | "dateCreated": "2023-07-20T08:15:36+0000", 32 | "dateStarted": "2023-07-20T08:18:06+0000", 33 | "dateFinished": "2023-07-20T08:18:53+0000", 34 | "status": "FINISHED", 35 | "focus": true, 36 | "$$hashKey": "object:741" 37 | }, 38 | { 39 | "text": "%flink.ssql\nDROP TABLE IF EXISTS stock_price;\n\nCREATE TABLE stock_price (\n ticker VARCHAR(6),\n price DOUBLE,\n event_time TIMESTAMP(3)\n)\nPARTITIONED BY (ticker)\nWITH (\n 'connector' = 'kinesis',\n 'stream' = '<>',\n 'aws.region' = '<>',\n 'format' = 'json',\n 'json.timestamp-format.standard' = 'ISO-8601'\n)", 40 | "user": "anonymous", 41 | "dateUpdated": "2023-07-20T08:22:11+0000", 42 | "progress": 0, 43 | "config": { 44 | "editorSetting": { 45 | "language": "sql", 46 | "editOnDblClick": false, 47 | "completionKey": "TAB", 48 | "completionSupport": true 49 | }, 50 | "colWidth": 12, 51 | "editorMode": "ace/mode/sql", 52 | "fontSize": 9, 53 | "results": {}, 54 | "enabled": true 55 | }, 56 | "settings": { 57 | "params": {}, 58 | "forms": {} 59 | }, 60 | "apps": [], 61 | "runtimeInfos": {}, 62 | "progressUpdateIntervalMs": 500, 63 | "jobName": "paragraph_1689840466366_567508497", 64 | "id": "paragraph_1689840466366_567508497", 65 | "dateCreated": "2023-07-20T08:07:46+0000", 66 | "status": "FINISHED", 67 | "$$hashKey": "object:742", 68 | "dateFinished": "2023-07-20T08:22:12+0000", 69 | "dateStarted": "2023-07-20T08:22:11+0000", 70 | "results": {} 71 | }, 72 | { 73 | "text": "%flink.ssql(parallelism=1)\n\nINSERT INTO stock_price\nSELECT \n ticker,\n price,\n event_time\nFROM stock_price_source;", 74 | "user": "anonymous", 75 | "dateUpdated": "2023-07-20T08:22:22+0000", 76 | "progress": 0, 77 | "config": { 78 | "editorSetting": { 79 | "language": "sql", 80 | "editOnDblClick": false, 81 | "completionKey": "TAB", 82 | "completionSupport": true 83 | }, 84 | "colWidth": 12, 85 | "editorMode": "ace/mode/sql", 86 | "fontSize": 9, 87 | "results": {}, 88 | "enabled": true 89 | }, 90 | "settings": { 91 | "params": {}, 92 | "forms": {} 93 | }, 94 | "apps": [], 95 | "runtimeInfos": {}, 96 | "progressUpdateIntervalMs": 500, 97 | "jobName": "paragraph_1689840435173_480061085", 98 | "id": "paragraph_1689840435173_480061085", 99 | "dateCreated": "2023-07-20T08:07:15+0000", 100 | "status": "ERROR", 101 | "$$hashKey": "object:743", 102 | "dateFinished": "2023-07-20T08:22:24+0000", 103 | "dateStarted": "2023-07-20T08:22:22+0000" 104 | } 105 | ], 106 | "name": "KDS DataGen", 107 | "id": "ABCDEFGHI", 108 | "defaultInterpreterGroup": "flink", 109 | "version": "0.9.0", 110 | "noteParams": {}, 111 | "noteForms": {}, 112 | "angularObjects": {}, 113 | "config": { 114 | "isZeppelinNotebookCronEnable": false, 115 | "looknfeel": "default", 116 | "personalizedMode": "false" 117 | }, 118 | "info": {}, 119 | "path": "/KDS DataGen" 120 | } 121 | -------------------------------------------------------------------------------- /img/blueprint-diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/managed-service-for-apache-flink-blueprints/5c8da457d749d32069e99f2e64687613a6e099ad/img/blueprint-diagram.png -------------------------------------------------------------------------------- /img/msf-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/managed-service-for-apache-flink-blueprints/5c8da457d749d32069e99f2e64687613a6e099ad/img/msf-icon.png -------------------------------------------------------------------------------- /notes/contribute.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | 4 | Want to create a new blueprint that will one day end up on the AWS management console and help other customers get started with Apache Flink? 5 | 6 | Blueprints for Managed Service for Apache Flink need to meet certain criteria that will be outlined here. Once this criteria has been met, you can submit a pull request to this repository 7 | 8 | ## The Big Picture 9 | 10 | ![Blueprints Diagram](../img/blueprint-diagram.png) 11 | 12 | Blueprints are essentially comprised of a CDK Script containing all resources needed for an end-to-end streaming pipeline including Managed Service for Apache Flink. Your blueprint should aid in users getting started with this service by removing any undifferentiated heavy lifting they might encounter. 13 | 14 | There are some limitations in how blueprints should work, and one of those is how customers will launch blueprints. The blueprints are launched via the AWS management console, via the front end web client. This means that the front end team must launch the CloudFormation template in the user's account on their behalf. Due to limitations of the `create-stack` API, this means we must use the bootstrapping construct that exists within the Blueprints project in order to copy existing assets to a user's AWS account rather than depending on the CloudFormation template to have those assets within them. 15 | 16 | 1. Bootstrap template copies existing assets hosted on Github to user S3 bucket (specified in Bootstrapping Template) 17 | 2. Blueprint references S3 bucket that Bootstrapping template defined. 18 | 19 | ## The Criteria 20 | Every Blueprint in this repository must be: 21 | - Stored under the `apps/` directory in the API Layer that it relates to. 22 | - Implemented in CDK (language non-specific) 23 | - Ability to synthesize CDK Script into a standalone CloudFormation Template (no bootstrapping required) 24 | - Compatibility with the Bootstrap CDK Module located in the root of the project (references assets in an existing S3 bucket for assets) 25 | - A clear contract between what the template expects and what the front end should provide must be defined in documentation: 26 | - What parameters must be provided, etc. 27 | - In the Managed Service for Apache Flink application created, the Application Properties must create a property group, `BlueprintMetadata`, containing two fields for the `StackId` and `BlueprintName` to assist with front end programming: 28 | ```bash 29 | 'PropertyGroupId': 'BlueprintMetadata', 30 | 'PropertyMap': { 31 | 'StackId': stack_id, 32 | 'BlueprintName': blueprint_name 33 | } 34 | ``` 35 | 36 | ## Open Projects Ideas 37 | - Using existing resources 38 | - Ideally, we'd like a way for a blueprint to know whether or not a customer has already provisioned a resource like an MSK cluster before provisioning one for them, and connecting the Managed Service for Apache Flink to that resource. 39 | - A way to add additional components to the Managed Service for Apache Flink, like monitoring, alerts, scaling and CI/CD. 40 | - Other blueprints 41 | - KDS to S3 42 | - PyFlink Table API 43 | - PyFlink Datastream API 44 | - Others... 45 | Create a PR to signify you would like to begin work on one of these or other blueprints and the maintainers will be in touch. -------------------------------------------------------------------------------- /notes/installation.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | In order to work with blueprints, you must first install the prerequisites (below) to start synthesizing and testing templates on your local machine. 3 | 4 | ### Prerequisites 5 | The following items must be installed prior to working with the blueprints in this repository. 6 | 7 | - [Install Java](https://www.java.com/en/download/help/download_options.html) 8 | - [Install Maven](https://maven.apache.org/install.html) 9 | - [Install Node.js](https://nodejs.org/en/download/) 10 | - [Install and Bootstrap CDK](https://docs.aws.amazon.com/cdk/v2/guide/getting_started.html) 11 | - [Install Git](https://github.com/git-guides/install-git) 12 | - [Install AWS CLI](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html) 13 | 14 | ### Ensure that npm packages associated with CDK are up to date 15 | 16 | 1. In the shared CDK folder, run `npm update`. 17 | 2. In the CDK folder of your blueprint, run `npm update`. 18 | 19 | For example, let's say you want to deploy the MSK Serverless -> Studio blueprint. Here are the steps you would follow: 20 | 21 | Navigate to shared CDK folder (from root of this repo) 22 | ``` 23 | > cd cdk-infra/shared 24 | > npm update 25 | 26 | up to date, audited 457 packages in 12s 27 | 28 | 30 packages are looking for funding 29 | run `npm fund` for details 30 | 31 | found 0 vulnerabilities 32 | ``` 33 | 34 | Navigate to your blueprint folder (from root of this repo) 35 | 36 | ``` 37 | > cd apps/java-datastream/msk-serverless-to-s3-datastream-java 38 | > npm install 39 | ... 40 | > npm update 41 | 42 | up to date, audited 457 packages in 12s 43 | 44 | 30 packages are looking for funding 45 | run `npm fund` for details 46 | 47 | found 0 vulnerabilities 48 | ``` 49 | 50 | Now, you're ready to [deploy blueprints](#how-do-i-use-these-blueprints). 51 | 52 | NOTE: If `npm update` doesn't actually update your dependency versions, you might have to run `npm check update` or `ncu` and manually update the dependency versions in the `package.json` files in each of the above locations. 53 | 54 | ## How do I use these blueprints? 55 | 56 | - To get started with a blueprint, first ensure you have the [necessary prerequisites](#prerequisites) installed. 57 | - Then clone this repo using the command shown below. 58 | 59 | ``` 60 | git clone https://github.com/awslabs/managed-service-for-apache-flink-blueprints 61 | ``` 62 | - Open a terminal session and navigate to the [blueprint](/README.md#get-started-with-blueprints) of your choice within the project structure; once there, follow the blueprint specific instructions. 63 | 64 | ### Experimentation 65 | 66 | - Once you have successfully begun sending data through your blueprint, you have successfully launched and tested a blueprint! 67 | - You can now take the blueprints in this repo, copy them to your own project structure and begin to [modify](modify.md) them for your specific needs. 68 | -------------------------------------------------------------------------------- /notes/modify.md: -------------------------------------------------------------------------------- 1 | # Modifying Blueprints for your needs 2 | 3 | The general strategy for modifying blueprints will be the same for all blueprints aside from Managed Service for Apache Flink ***Studio*** notebooks, as these can be modified in place in the AWS Management console. 4 | 5 | ### Modifying Workflow 6 | 1. Clone this repo 7 | 2. Identify the blueprint that you would like to modify and navigate to it's respective folder under `apps/` directory 8 | 3. The code for the project itself should be located under the `src/` directory, which you can replace or modify with your own code. 9 | a. It is strongly encouraged to test your new codebase [locally](https://github.com/jeremyber-aws/local-development-with-flink-and-kinesis) before running the blueprint deploy scripts. 10 | 11 | 12 | 13 | ## Ready to deploy your new blueprint? 14 | 15 | 16 | First, you will need to synthesize the template required to launch blueprints in your account 17 | 18 | - For the blueprint in question you are modifying, there will be instructions in the README on how to build the project and upload the assets to an S3 location the CloudFormation Template can access. 19 | 20 | 21 | Example: 22 | 23 | Let's say I am modifying the [KDS-to-S3](/apps/java-datastream/kds-to-s3-datastream-java/) blueprint to calculate some average statistic as the data is read in. 24 | 25 | - First, I would clone the repo and navigate to `apps/java-datastream/kds-to-s3-datastream-java/src/main/java/` and make the proper modifications to the code in `StreamingJob.java`. 26 | 27 | - Once I am satisfied with the changes, I would compile the application: 28 | 29 | ```bash 30 | mvn compile package 31 | ``` 32 | creating a file in the `target/` directory of the project called `kds-to-s3-datastream-java-1.0.1.jar`. 33 | 34 | - Now, I can synthesize my blueprint's template like so: 35 | 36 | ```bash 37 | # navigate to kds-to-s3 template 38 | cd ../apps/java-datastream/kds-to-s3-datastream-java/cdk-infra 39 | 40 | # synthesize blueprint template 41 | cdk synth 42 | ``` 43 | 44 | And now I have all the assets I need in order to deploy my updated blueprint. **Upload these files to an S3 bucket in your AWS account.** 45 | 46 | Navigate to the root of the directory and type: 47 | 48 | ```bash 49 | export timestampToLetters=$(date +%s) 50 | export BucketName=<> 51 | export BlueprintStackName=kds-to-s3-blueprint-${timestampToLetters}-stack 52 | export AppName=kds-to-s3-demo-${timestampToLetters}-app 53 | export StreamName=kds-to-s3-demo-${timestampToLetters}-stream 54 | export CloudWatchLogGroupName=blueprints/managed-flink-analytics/${AppName} 55 | export CloudWatchLogStreamName=managed-flink-log-stream 56 | export RoleName=kds-to-s3-demo-${timestampToLetters}-role 57 | 58 | aws cloudformation create-stack --template-url https://${BucketName}.s3.amazonaws.com/kds-to-s3-datastream-java.json --stack-name $BlueprintStackName --parameters ParameterKey=AppName,ParameterValue=$AppName ParameterKey=CloudWatchLogGroupName,ParameterValue=$CloudWatchLogGroupName ParameterKey=CloudWatchLogStreamName,ParameterValue=$CloudWatchLogStreamName ParameterKey=StreamName,ParameterValue=$StreamName ParameterKey=BucketName,ParameterValue=$BucketName ParameterKey=RoleName,ParameterValue=$RoleName --capabilities CAPABILITY_NAMED_IAM 59 | ``` 60 | -------------------------------------------------------------------------------- /python/README.md: -------------------------------------------------------------------------------- 1 | # Python Scripts 2 | 3 | This directory contains python scripts that are bundled into CloudFormation stacks. 4 | 5 | ## Setup development environment 6 | 7 | ``` 8 | python -m pip install -r requirements.txt 9 | ``` 10 | 11 | ## Running unit tests 12 | 13 | ``` 14 | python -m pytest 15 | ``` 16 | -------------------------------------------------------------------------------- /python/lambda_copy_assets_to_s3.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # Apache-2.0 3 | 4 | import cfnresponse 5 | import json 6 | import os 7 | import urllib.request 8 | from urllib.parse import urlparse 9 | import boto3 10 | 11 | 12 | def handler(event, context): 13 | 14 | try: 15 | print("REQUEST RECEIVED:" + json.dumps(event)) 16 | if (event["RequestType"] == "Create"): 17 | 18 | asset_list = os.environ.get("AssetList") 19 | 20 | # S3 bucket details 21 | bucket_name = os.environ.get("bucketName") 22 | 23 | file_list = asset_list.split(",") 24 | 25 | for file_string in file_list: 26 | 27 | parsed_url = urlparse(file_string) 28 | domain = parsed_url.netloc 29 | sub_domain = parsed_url.path.rsplit('/')[1] 30 | 31 | if domain != "github.com" and domain != "data-streaming-labs.s3.amazonaws.com": 32 | if sub_domain != "awslabs" and sub_domain != "blueprint-test": 33 | raise Exception( 34 | f"Unrecognized String in Bootstrapping List: {file_string}") 35 | 36 | print(file_string) 37 | 38 | s3_key = file_string.rsplit('/', 1)[-1] 39 | 40 | # Download the JAR file 41 | jar_file_path = '/tmp/file.jar' 42 | urllib.request.urlretrieve(file_string, jar_file_path) 43 | 44 | # Upload the JAR file to S3 45 | s3_client = boto3.client('s3') 46 | s3_client.upload_file(jar_file_path, bucket_name, s3_key) 47 | 48 | # Remove the local JAR file 49 | os.remove(jar_file_path) 50 | 51 | # Print the completion message 52 | print('JAR file uploaded to S3 successfully.') 53 | 54 | print("CREATE RESPONSE", "create_response") 55 | cfnresponse.send(event, context, cfnresponse.SUCCESS, { 56 | "Message": "Resource creation successful!"}) 57 | elif (event["RequestType"] == "Delete"): 58 | print("DELETE" + str("delete_response")) 59 | s3 = boto3.resource('s3') 60 | bucket_name = os.environ.get("bucketName") 61 | bucket = s3.Bucket(bucket_name) 62 | for obj in bucket.objects.filter(): 63 | s3.Object(bucket.name, obj.key).delete() 64 | cfnresponse.send(event, context, cfnresponse.SUCCESS, { 65 | "Message": "Resource deletion successful!"}) 66 | else: 67 | cfnresponse.send(event, context, cfnresponse.FAILED, { 68 | "Message": "Resource creation failed!"}) 69 | except Exception as err: 70 | print(err) 71 | cfnresponse.send(event, context, cfnresponse.FAILED, 72 | {"Message:": str(type(err))}) 73 | 74 | 75 | def test(): 76 | file_string = "test" 77 | parsed_url = urlparse(file_string) 78 | domain = parsed_url.netloc 79 | sub_domain = parsed_url.path.rsplit('/')[1] 80 | 81 | if domain != "github.com" and domain != "data-streaming-labs.s3.amazonaws.com": 82 | if sub_domain != "awslabs" and sub_domain != "blueprint-test": 83 | print("yes") 84 | 85 | -------------------------------------------------------------------------------- /python/lambda_kds_datagen.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # Apache-2.0 3 | 4 | import cfnresponse 5 | import logging 6 | import signal 7 | import boto3 8 | import datetime 9 | import random 10 | import json 11 | 12 | LOGGER = logging.getLogger() 13 | LOGGER.setLevel(logging.INFO) 14 | 15 | timeout_seconds = 120 16 | 17 | 18 | def get_data(): 19 | return { 20 | 'event_time': datetime.datetime.now().isoformat(), 21 | 'ticker': random.choice(['AAPL', 'AMZN', 'MSFT', 'INTC', 'TBV']), 22 | 'price': round(random.random() * 100, 2), 23 | } 24 | 25 | 26 | def generate_records(streamArn, numberOfItems): 27 | client = boto3.client('kinesis') 28 | for _ in range(numberOfItems): 29 | data = get_data() 30 | client.put_record( 31 | StreamARN=streamArn, 32 | Data=json.dumps(data), 33 | PartitionKey=data["ticker"]) 34 | 35 | 36 | def handler(event, context): 37 | # Setup alarm for remaining runtime minus a second 38 | signal.alarm(timeout_seconds) 39 | try: 40 | LOGGER.info('Request Event: %s', event) 41 | LOGGER.info('Request Context: %s', context) 42 | if event['RequestType'] == 'Create': 43 | generate_records(event['ResourceProperties']['StreamArn'], int( 44 | event['ResourceProperties']['NumberOfItems'])) 45 | cfnresponse.send(event, context, cfnresponse.SUCCESS, { 46 | "Message": "Resource created"}) 47 | elif event['RequestType'] == 'Update': 48 | cfnresponse.send(event, context, cfnresponse.SUCCESS, { 49 | "Message": "Resource updated"}) 50 | elif event['RequestType'] == 'Delete': 51 | cfnresponse.send(event, context, cfnresponse.SUCCESS, { 52 | "Message": "Resource deleted"}) 53 | else: 54 | err = f"Unknown RequestType: {event['RequestType']}" 55 | LOGGER.error(err) 56 | cfnresponse.send( 57 | event, context, cfnresponse.FAILED, {"Message": err}) 58 | except Exception as e: 59 | LOGGER.error("Failed %s", e) 60 | cfnresponse.send(event, context, cfnresponse.FAILED, 61 | {"Message": str(e)}) 62 | 63 | 64 | def timeout_handler(_signal, _frame): 65 | '''Handle SIGALRM''' 66 | raise Exception('Operation timed out') 67 | 68 | 69 | signal.signal(signal.SIGALRM, timeout_handler) 70 | -------------------------------------------------------------------------------- /python/lambda_msf_app_start.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # Apache-2.0 3 | 4 | import cfnresponse 5 | import json 6 | import logging 7 | import signal 8 | import boto3 9 | import time 10 | 11 | LOGGER = logging.getLogger() 12 | LOGGER.setLevel(logging.INFO) 13 | 14 | timeout_seconds = 550 15 | poll_interval_seconds = 1 16 | 17 | 18 | def handler(event, context): 19 | # Setup alarm for remaining runtime minus a second 20 | signal.alarm(timeout_seconds) 21 | try: 22 | LOGGER.info('Request Event: %s', event) 23 | LOGGER.info('Request Context: %s', context) 24 | if event['RequestType'] == 'Create': 25 | start_app(event['ResourceProperties']['AppName']) 26 | cfnresponse.send(event, context, cfnresponse.SUCCESS, { 27 | "Message": "Resource created"}) 28 | elif event['RequestType'] == 'Update': 29 | start_app(event['ResourceProperties']['AppName']) 30 | cfnresponse.send(event, context, cfnresponse.SUCCESS, { 31 | "Message": "Resource updated"}) 32 | elif event['RequestType'] == 'Delete': 33 | cfnresponse.send(event, context, cfnresponse.SUCCESS, { 34 | "Message": "Resource deleted"}) 35 | else: 36 | err = f"Unknown RequestType: {event['RequestType']}" 37 | LOGGER.error(err) 38 | cfnresponse.send( 39 | event, context, cfnresponse.FAILED, {"Message": err}) 40 | except Exception as e: 41 | LOGGER.error("Failed %s", e) 42 | cfnresponse.send(event, context, cfnresponse.FAILED, 43 | {"Message": str(e)}) 44 | 45 | 46 | def start_app(appName): 47 | client = boto3.client('kinesisanalyticsv2') 48 | desc_response = client.describe_application(ApplicationName=appName) 49 | status = desc_response['ApplicationDetail']['ApplicationStatus'] 50 | if status == "READY": 51 | # We assume that after a successful invocation of this API 52 | # application would not be in READY state. 53 | client.start_application(ApplicationName=appName) 54 | while (True): 55 | desc_response = client.describe_application(ApplicationName=appName) 56 | status = desc_response['ApplicationDetail']['ApplicationStatus'] 57 | if status != "STARTING": 58 | if status != "RUNNING": 59 | raise Exception(f"Unable to start the app in state: {status}") 60 | LOGGER.info(f"Application status changed: {status}") 61 | break 62 | else: 63 | time.sleep(poll_interval_seconds) 64 | 65 | 66 | def timeout_handler(_signal, _frame): 67 | '''Handle SIGALRM''' 68 | raise Exception('Operation timed out') 69 | 70 | 71 | signal.signal(signal.SIGALRM, timeout_handler) 72 | -------------------------------------------------------------------------------- /python/lambda_run_studio_notebook/bundle-lambda-for-release.sh: -------------------------------------------------------------------------------- 1 | # run this to package up lambda 2 | 3 | # install dependencies in folder 4 | pip install -r requirements.txt -t dependencies 5 | 6 | cd dependencies 7 | 8 | zip -r ../my-deployment.zip .; 9 | 10 | cd ../ 11 | 12 | zip my-deployment.zip lambda_function.py 13 | 14 | zip my-deployment.zip cfnresponse.py -------------------------------------------------------------------------------- /python/lambda_run_studio_notebook/cfnresponse.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # Apache-2.0 3 | 4 | from __future__ import print_function 5 | import urllib3 6 | import json 7 | 8 | SUCCESS = "SUCCESS" 9 | FAILED = "FAILED" 10 | 11 | http = urllib3.PoolManager() 12 | 13 | 14 | def send(event, context, responseStatus, responseData, physicalResourceId=None, noEcho=False, reason=None): 15 | responseUrl = event['ResponseURL'] 16 | 17 | print(responseUrl) 18 | 19 | responseBody = { 20 | 'Status': responseStatus, 21 | 'Reason': reason or "See the details in CloudWatch Log Stream: {}".format(context.log_stream_name), 22 | 'PhysicalResourceId': physicalResourceId or context.log_stream_name, 23 | 'StackId': event['StackId'], 24 | 'RequestId': event['RequestId'], 25 | 'LogicalResourceId': event['LogicalResourceId'], 26 | 'NoEcho': noEcho, 27 | 'Data': responseData 28 | } 29 | 30 | json_responseBody = json.dumps(responseBody) 31 | 32 | print("Response body:") 33 | print(json_responseBody) 34 | 35 | headers = { 36 | 'content-type': '', 37 | 'content-length': str(len(json_responseBody)) 38 | } 39 | 40 | try: 41 | response = http.request( 42 | 'PUT', responseUrl, headers=headers, body=json_responseBody) 43 | print("Status code:", response.status) 44 | 45 | except Exception as e: 46 | 47 | print("send(..) failed executing http.request(..):", e) 48 | -------------------------------------------------------------------------------- /python/lambda_run_studio_notebook/lambda_function.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # Apache-2.0 3 | 4 | import cfnresponse 5 | import logging 6 | import requests 7 | import signal 8 | import json 9 | import boto3 10 | import os 11 | 12 | LOGGER = logging.getLogger() 13 | LOGGER.setLevel(logging.INFO) 14 | 15 | timeout_seconds = 120 16 | note_url_id = "ABCDEFGHI" 17 | 18 | 19 | def run_all_paragraphs(my_msf_appname): 20 | 21 | msf = boto3.client('kinesisanalyticsv2') 22 | 23 | response = msf.create_application_presigned_url( 24 | ApplicationName=my_msf_appname, 25 | UrlType='ZEPPELIN_UI_URL', 26 | SessionExpirationDurationInSeconds=1800 27 | ) 28 | 29 | responseString = '' 30 | 31 | if not response or 'AuthorizedUrl' not in response: 32 | # app is invalid 33 | # or MSF app is not running 34 | responseString = "Unable to get pre signed url for app" 35 | raise Exception(responseString) 36 | 37 | rest_api_url = response['AuthorizedUrl'] 38 | 39 | note_url = rest_api_url.replace( 40 | '/zeppelin/', '/zeppelin/api/notebook/job/' + note_url_id) 41 | 42 | s = requests.Session() 43 | 44 | # send a GET request to the note_url 45 | # This does NOT run the note. It does the auth for us with the endpoint 46 | s.get(note_url) 47 | 48 | # send post request now that we have the VerifiedAuthToken cookie 49 | # split the request url so that we do not have anything past '?auth' 50 | url_array = note_url.split('?auth') 51 | 52 | second_response = s.post(url_array[0]) 53 | 54 | if 'exception' in second_response.json(): 55 | # there was an issue running the note 56 | response_json = second_response.json()['message'] 57 | else: 58 | response_json = json.loads(second_response.text) 59 | 60 | return response_json 61 | 62 | 63 | def lambda_handler(event, context): 64 | # Setup alarm for remaining runtime minus a second 65 | signal.alarm(timeout_seconds) 66 | try: 67 | LOGGER.info('Request Event: %s', event) 68 | LOGGER.info('Request Context: %s', context) 69 | 70 | env_app_name = os.environ["AppName"] 71 | 72 | if event['RequestType'] == 'Create': 73 | note_response = run_all_paragraphs(env_app_name) 74 | cfnresponse.send(event, context, cfnresponse.SUCCESS, { 75 | "Message": str(note_response)}) 76 | elif event['RequestType'] == 'Update': 77 | cfnresponse.send(event, context, cfnresponse.SUCCESS, { 78 | "Message": "Resource updated"}) 79 | elif event['RequestType'] == 'Delete': 80 | cfnresponse.send(event, context, cfnresponse.SUCCESS, { 81 | "Message": "Resource deleted"}) 82 | else: 83 | err = f"Unknown RequestType: {event['RequestType']}" 84 | LOGGER.error(err) 85 | cfnresponse.send( 86 | event, context, cfnresponse.FAILED, {"Message": err}) 87 | except Exception as e: 88 | LOGGER.error("Failed %s", e) 89 | cfnresponse.send(event, context, cfnresponse.FAILED, 90 | {"Message": str(e)}) 91 | 92 | 93 | def timeout_handler(_signal, _frame): 94 | '''Handle SIGALRM''' 95 | raise Exception('Operation timed out') 96 | 97 | 98 | signal.signal(signal.SIGALRM, timeout_handler) 99 | -------------------------------------------------------------------------------- /python/lambda_run_studio_notebook/requirements.txt: -------------------------------------------------------------------------------- 1 | requests==2.28.2 2 | -------------------------------------------------------------------------------- /python/local_kds_datagen.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import json 3 | import datetime 4 | import random 5 | import argparse 6 | 7 | def get_data(): 8 | return { 9 | 'event_time': datetime.datetime.now().isoformat(), 10 | 'ticker': random.choice(['AAPL', 'AMZN', 'MSFT', 'INTC', 'TBV']), 11 | 'price': round(random.random() * 100, 2), 12 | } 13 | 14 | 15 | def generate_records(streamArn, numberOfItems, region): 16 | client = boto3.client('kinesis', region_name=region) 17 | for _ in range(numberOfItems): 18 | data = get_data() 19 | client.put_record( 20 | StreamARN=streamArn, 21 | Data=json.dumps(data), 22 | PartitionKey=data["ticker"]) 23 | 24 | 25 | def main(): 26 | parser = argparse.ArgumentParser() 27 | parser.add_argument("--stream-arn", help="Kinesis data stream ARN produce test records into") 28 | parser.add_argument("--count", type=int, help="Number of test records to produce") 29 | parser.add_argument("--region", help="AWS region of Kinesis data stream specified via --stream-arn") 30 | args = parser.parse_args() 31 | print(f"Producing {args.count} records into {args.stream_arn}") 32 | generate_records(args.stream_arn, args.count, args.region) 33 | print("done, bye") 34 | 35 | main() -------------------------------------------------------------------------------- /python/msf_java_app_custom_resource_handler.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import botocore 3 | import cfnresponse 4 | import logging 5 | import signal 6 | 7 | LOGGER = logging.getLogger() 8 | LOGGER.setLevel(logging.INFO) 9 | 10 | timeout_seconds = 300 11 | 12 | 13 | def handler(event, context): 14 | 15 | # setup alarm for remaining runtime minus a second 16 | signal.alarm(timeout_seconds) 17 | 18 | try: 19 | LOGGER.info('REQUEST RECEIVED: %s', event) 20 | LOGGER.info('REQUEST Context: %s', context) 21 | client = boto3.client('kinesisanalyticsv2') 22 | props = event['ResourceProperties'] 23 | 24 | # set up env vars 25 | if event['RequestType'] == 'Create': 26 | LOGGER.info('Creating MSF Java app') 27 | create_app(client=client, props=props) 28 | cfnresponse.send(event, context, cfnresponse.SUCCESS, { "Message": "Successfully Created Application"}) 29 | elif event['RequestType'] == 'Update': 30 | LOGGER.info('Nothing to update') 31 | cfnresponse.send(event, context, cfnresponse.SUCCESS, { "Message": "Successfully Updated Application"}) 32 | elif event['RequestType'] == 'Delete': 33 | delete_app(client=client, props=props) 34 | cfnresponse.send(event, context, cfnresponse.SUCCESS, { "Message": "Successfully Deleted Application"}) 35 | 36 | except Exception as e: 37 | LOGGER.error(str(e)) 38 | cfnresponse.send(event, context, cfnresponse.FAILED, {"Message": str(e)}) 39 | 40 | 41 | def create_app(client, props): 42 | # check if app already exists 43 | try: 44 | describe_response = client.describe_application( 45 | ApplicationName=props['AppName']) 46 | LOGGER.info("App already exists %s", describe_response) 47 | return 48 | except botocore.exceptions.ClientError as e: 49 | if e.response["Error"]["Code"] != "ResourceNotFoundException": 50 | raise e 51 | LOGGER.info("App doesn't exist yet so I am creating it") 52 | 53 | response = client.create_application( 54 | ApplicationName=props['AppName'], 55 | ApplicationDescription="MSF blueprint Java application", 56 | RuntimeEnvironment=props['RuntimeEnvironment'], 57 | ServiceExecutionRole=props['ServiceExecutionRole'], 58 | ApplicationConfiguration={ 59 | "FlinkApplicationConfiguration": { 60 | "ParallelismConfiguration": { 61 | "ConfigurationType": "CUSTOM", 62 | "Parallelism": int(props['Parallelism']), 63 | "ParallelismPerKPU": int(props['ParallelismPerKpu']), 64 | "AutoScalingEnabled": bool(props['AutoscalingEnabled']), 65 | }, 66 | "CheckpointConfiguration": { 67 | "ConfigurationType": "CUSTOM", 68 | 'CheckpointingEnabled': True, 69 | 'CheckpointInterval': int(props["CheckpointInterval"]), 70 | 'MinPauseBetweenCheckpoints': int(props["MinPauseBetweenCheckpoints"]) 71 | }, 72 | }, 73 | 'EnvironmentProperties': { 74 | 'PropertyGroups': [ 75 | { 76 | 'PropertyGroupId': 'BlueprintMetadata', 77 | 'PropertyMap': props['ApplicationProperties'] 78 | }, 79 | ] 80 | }, 81 | "ApplicationCodeConfiguration": { 82 | "CodeContent": { 83 | "S3ContentLocation": { 84 | "BucketARN": props['BucketArn'], 85 | "FileKey": props['FileKey'] 86 | }, 87 | }, 88 | "CodeContentType": "ZIPFILE", 89 | }, 90 | }, 91 | CloudWatchLoggingOptions=[ 92 | {"LogStreamARN": props['LogStreamArn']}, 93 | ]) 94 | 95 | LOGGER.info("Create response %s", response) 96 | 97 | 98 | def delete_app(client, props): 99 | # check if app already deleted 100 | describe_response = "" 101 | try: 102 | describe_response = client.describe_application(ApplicationName=props['AppName']) 103 | LOGGER.info("App exists, going to delete it %s", describe_response) 104 | except botocore.exceptions.ClientError as e: 105 | if e.response["Error"]["Code"] != "ResourceNotFoundException": 106 | raise e 107 | LOGGER.info("App doesn't exist or already deleted %s", e) 108 | return 109 | 110 | create_timestamp = describe_response["ApplicationDetail"]["CreateTimestamp"] 111 | 112 | delete_response = client.delete_application(ApplicationName=props['AppName'], CreateTimestamp=create_timestamp) 113 | LOGGER.info("Delete response %s", delete_response) 114 | 115 | 116 | def timeout_handler(_signal, _frame): 117 | '''Handle SIGALRM''' 118 | raise Exception('Operation timed out') 119 | 120 | 121 | signal.signal(signal.SIGALRM, timeout_handler) 122 | -------------------------------------------------------------------------------- /python/requirements.txt: -------------------------------------------------------------------------------- 1 | boto3==1.26.62 2 | urllib3==1.26.2 3 | requests==2.25.0 4 | pytest==6.0.0 5 | cfnresponse==1.1.2 6 | --------------------------------------------------------------------------------