├── test ├── index.ts ├── pipelines │ ├── index.ts │ ├── green.ts │ ├── yellow.ts │ ├── reviews.ts │ └── iot-data.ts ├── data-product.test.ts ├── test-stack.ts ├── lambda │ └── iot-data-generator │ │ └── index.py ├── datalake.test.ts ├── code │ └── iot_data │ │ └── streaming_convert_to_parquet.py └── __snapshots__ │ └── datalake.test.ts.snap ├── .github ├── pull_request_template.md └── workflows │ ├── auto-approve.yml │ ├── pull-request-lint.yml │ ├── stale.yml │ ├── codeql-analysis.yml │ ├── upgrade-main.yml │ ├── build.yml │ └── release.yml ├── lambda ├── create-tags-handler │ ├── requirements.txt │ └── index.py ├── enable-hybrid-catalog │ ├── requirements.txt │ └── index.py └── download-data │ └── index.py ├── assets └── images │ ├── glue-catalog.png │ ├── dl-settings-page.png │ ├── consumer-cross-table-access.png │ └── data-mesh-central-governance.png ├── src ├── global │ ├── interfaces.ts │ └── enums.ts ├── index.ts ├── personas │ ├── data-lake-creator.ts │ ├── data-lake-analyst.ts │ └── data-lake-admin.ts ├── data-product.ts ├── data-lake-bucket.ts ├── etl │ ├── glue-table.ts │ ├── glue-crawler.ts │ ├── glue-job-ops.ts │ └── glue-job.ts ├── workflows │ └── scheduled-job-workflow.ts ├── utils.ts ├── pipeline.ts ├── data-streams │ ├── kinesis-stream.ts │ ├── s3-delivery-stream.ts │ └── kinesis-ops.ts ├── data-lake.ts └── data-lake-strategy.ts ├── .npmignore ├── .projen ├── files.json ├── deps.json └── tasks.json ├── .mergify.yml ├── .gitattributes ├── tsconfig.dev.json ├── LICENSE ├── .gitignore ├── .projenrc.js ├── package.json ├── .eslintrc.json └── README.md /test/index.ts: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | Fixes # -------------------------------------------------------------------------------- /lambda/create-tags-handler/requirements.txt: -------------------------------------------------------------------------------- 1 | boto3 2 | botocore -------------------------------------------------------------------------------- /lambda/enable-hybrid-catalog/requirements.txt: -------------------------------------------------------------------------------- 1 | boto3 2 | botocore -------------------------------------------------------------------------------- /assets/images/glue-catalog.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randyridgley/cdk-datalake-constructs/HEAD/assets/images/glue-catalog.png -------------------------------------------------------------------------------- /assets/images/dl-settings-page.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randyridgley/cdk-datalake-constructs/HEAD/assets/images/dl-settings-page.png -------------------------------------------------------------------------------- /test/pipelines/index.ts: -------------------------------------------------------------------------------- 1 | export * from './green'; 2 | export * from './yellow'; 3 | export * from './iot-data'; 4 | export * from './reviews'; -------------------------------------------------------------------------------- /assets/images/consumer-cross-table-access.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randyridgley/cdk-datalake-constructs/HEAD/assets/images/consumer-cross-table-access.png -------------------------------------------------------------------------------- /assets/images/data-mesh-central-governance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/randyridgley/cdk-datalake-constructs/HEAD/assets/images/data-mesh-central-governance.png -------------------------------------------------------------------------------- /src/global/interfaces.ts: -------------------------------------------------------------------------------- 1 | export interface DataSetResult { 2 | readonly destinationPrefix: string; 3 | readonly sourceBucketName: string | undefined; 4 | readonly sourceKeys: string[] | undefined; 5 | readonly rawBucketName: string; 6 | readonly trustedBucketName: string; 7 | readonly refinedBucketName: string; 8 | readonly destinationBucketName: string; 9 | } 10 | -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | # ~~ Generated by projen. To modify, edit .projenrc.js and run "npx projen". 2 | /.projen/ 3 | /test-reports/ 4 | junit.xml 5 | /coverage/ 6 | /dist/changelog.md 7 | /dist/version.txt 8 | /.mergify.yml 9 | /test/ 10 | /tsconfig.dev.json 11 | /src/ 12 | !/lib/ 13 | !/lib/**/*.js 14 | !/lib/**/*.d.ts 15 | dist 16 | /tsconfig.json 17 | /.github/ 18 | /.vscode/ 19 | /.idea/ 20 | /.projenrc.js 21 | tsconfig.tsbuildinfo 22 | /.eslintrc.json 23 | !.jsii 24 | cdk.out 25 | cdk.context.json 26 | yarn-error.log 27 | .DS_Store 28 | coverage 29 | .metals 30 | maven_release* 31 | examples* 32 | -------------------------------------------------------------------------------- /test/pipelines/green.ts: -------------------------------------------------------------------------------- 1 | import { DataPipelineType, DataTier } from '../../src/global/enums'; 2 | import { Pipeline } from '../../src/pipeline'; 3 | 4 | export function GreenPipeline() { 5 | return new Pipeline({ 6 | type: DataPipelineType.S3, 7 | name: 'taxi-green', 8 | destinationPrefix: 'green/', 9 | dataDropTier: DataTier.RAW, 10 | s3Properties: { 11 | sourceBucketName: 'nyc-tlc', 12 | sourceKeys: [ 13 | 'trip data/green_tripdata_2020-11.csv', 14 | 'trip data/green_tripdata_2020-12.csv', 15 | ], 16 | }, 17 | }); 18 | } 19 | -------------------------------------------------------------------------------- /test/pipelines/yellow.ts: -------------------------------------------------------------------------------- 1 | 2 | import { DataPipelineType, DataTier } from '../../src/global/enums'; 3 | import { Pipeline } from '../../src/pipeline'; 4 | 5 | export function YellowPipeline() { 6 | return new Pipeline({ 7 | type: DataPipelineType.S3, 8 | name: 'taxi-yellow', 9 | destinationPrefix: 'yellow/', 10 | dataDropTier: DataTier.RAW, 11 | s3Properties: { 12 | sourceBucketName: 'nyc-tlc', 13 | sourceKeys: [ 14 | 'trip data/yellow_tripdata_2020-11.csv', 15 | 'trip data/yellow_tripdata_2020-12.csv', 16 | ], 17 | }, 18 | }); 19 | } 20 | -------------------------------------------------------------------------------- /.projen/files.json: -------------------------------------------------------------------------------- 1 | { 2 | "files": [ 3 | ".eslintrc.json", 4 | ".gitattributes", 5 | ".github/pull_request_template.md", 6 | ".github/workflows/auto-approve.yml", 7 | ".github/workflows/build.yml", 8 | ".github/workflows/pull-request-lint.yml", 9 | ".github/workflows/release.yml", 10 | ".github/workflows/stale.yml", 11 | ".github/workflows/upgrade-main.yml", 12 | ".gitignore", 13 | ".mergify.yml", 14 | ".projen/deps.json", 15 | ".projen/files.json", 16 | ".projen/tasks.json", 17 | "LICENSE", 18 | "tsconfig.dev.json" 19 | ], 20 | "//": "~~ Generated by projen. To modify, edit .projenrc.js and run \"npx projen\"." 21 | } 22 | -------------------------------------------------------------------------------- /.github/workflows/auto-approve.yml: -------------------------------------------------------------------------------- 1 | # ~~ Generated by projen. To modify, edit .projenrc.js and run "npx projen". 2 | 3 | name: auto-approve 4 | on: 5 | pull_request_target: 6 | types: 7 | - labeled 8 | - opened 9 | - synchronize 10 | - reopened 11 | - ready_for_review 12 | jobs: 13 | approve: 14 | runs-on: ubuntu-latest 15 | permissions: 16 | pull-requests: write 17 | if: contains(github.event.pull_request.labels.*.name, 'auto-approve') && (github.event.pull_request.user.login == 'randyridgley') 18 | steps: 19 | - uses: hmarr/auto-approve-action@v2.1.0 20 | with: 21 | github-token: ${{ secrets.GITHUB_TOKEN }} 22 | -------------------------------------------------------------------------------- /test/pipelines/reviews.ts: -------------------------------------------------------------------------------- 1 | 2 | import { DataPipelineType, DataTier } from '../../src/global/enums'; 3 | import { Pipeline } from '../../src/pipeline'; 4 | 5 | export function ReviewsPipeline() { 6 | return new Pipeline({ 7 | type: DataPipelineType.S3, 8 | name: 'reviews', 9 | destinationPrefix: 'reviews/', 10 | dataDropTier: DataTier.REFINED, 11 | s3Properties: { 12 | sourceBucketName: 'amazon-reviews-pds', 13 | sourceKeys: [ 14 | 'parquet/product_category=Toys/part-00000-495c48e6-96d6-4650-aa65-3c36a3516ddd.c000.snappy.parquet', 15 | 'parquet/product_category=Toys/part-00001-495c48e6-96d6-4650-aa65-3c36a3516ddd.c000.snappy.parquet', 16 | ], 17 | }, 18 | }); 19 | } 20 | -------------------------------------------------------------------------------- /test/data-product.test.ts: -------------------------------------------------------------------------------- 1 | import { Pipeline, DataProduct } from '../src'; 2 | import * as pipelines from '../test/pipelines'; 3 | 4 | const dataProductAccount = '123456789012'; 5 | const databaseName = 'data-product'; 6 | 7 | const taxiPipes: Array = [ 8 | pipelines.YellowPipeline(), 9 | pipelines.GreenPipeline(), 10 | ]; 11 | 12 | test('Check Resources', () => { 13 | const dataProduct = new DataProduct({ 14 | pipelines: taxiPipes, 15 | accountId: dataProductAccount, 16 | databaseName: databaseName, 17 | }); 18 | expect(dataProduct.pipelines.length).toEqual(2); 19 | expect(dataProduct.accountId).toMatch(dataProductAccount); 20 | expect(dataProduct.databaseName).toMatch(databaseName); 21 | }); 22 | -------------------------------------------------------------------------------- /.github/workflows/pull-request-lint.yml: -------------------------------------------------------------------------------- 1 | # ~~ Generated by projen. To modify, edit .projenrc.js and run "npx projen". 2 | 3 | name: pull-request-lint 4 | on: 5 | pull_request_target: 6 | types: 7 | - labeled 8 | - opened 9 | - synchronize 10 | - reopened 11 | - ready_for_review 12 | - edited 13 | jobs: 14 | validate: 15 | name: Validate PR title 16 | runs-on: ubuntu-latest 17 | permissions: 18 | pull-requests: write 19 | steps: 20 | - uses: amannn/action-semantic-pull-request@v3.4.6 21 | env: 22 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 23 | with: 24 | types: |- 25 | feat 26 | fix 27 | chore 28 | requireScope: false 29 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | export * from './data-lake'; 2 | export * from './data-product'; 3 | export * from './pipeline'; 4 | export * from './data-lake-bucket'; 5 | export * from './utils'; 6 | export * from './data-streams/s3-delivery-stream'; 7 | export * from './data-streams/kinesis-stream'; 8 | export * from './data-streams/kinesis-ops'; 9 | export * from './etl/glue-crawler'; 10 | export * from './etl/glue-job'; 11 | export * from './etl/glue-job-ops'; 12 | export * from './etl/glue-table'; 13 | export * from './global/enums'; 14 | export * from './global/interfaces'; 15 | // export * from './etl/glue-notebook'; 16 | export * from './personas/data-lake-admin'; 17 | export * from './personas/data-lake-analyst'; 18 | export * from './personas/data-lake-creator'; 19 | // export * from './emr-cluster-sc' 20 | // export * from './emr-studio' 21 | -------------------------------------------------------------------------------- /.mergify.yml: -------------------------------------------------------------------------------- 1 | # ~~ Generated by projen. To modify, edit .projenrc.js and run "npx projen". 2 | 3 | queue_rules: 4 | - name: default 5 | conditions: 6 | - "#approved-reviews-by>=1" 7 | - -label~=(do-not-merge) 8 | - status-success=build 9 | - status-success=package-js 10 | - status-success=package-java 11 | - status-success=package-python 12 | pull_request_rules: 13 | - name: Automatic merge on approval and successful build 14 | actions: 15 | delete_head_branch: {} 16 | queue: 17 | method: squash 18 | name: default 19 | commit_message_template: |- 20 | {{ title }} (#{{ number }}) 21 | 22 | {{ body }} 23 | conditions: 24 | - "#approved-reviews-by>=1" 25 | - -label~=(do-not-merge) 26 | - status-success=build 27 | - status-success=package-js 28 | - status-success=package-java 29 | - status-success=package-python 30 | -------------------------------------------------------------------------------- /src/global/enums.ts: -------------------------------------------------------------------------------- 1 | export enum Stage { 2 | ALPHA = 'alpha', 3 | BETA = 'beta', 4 | GAMMA = 'gamma', 5 | PROD = 'prod', 6 | } 7 | 8 | export enum Permissions { 9 | ALTER = 'ALTER', 10 | CREATE_DATABASE = 'CREATE_DATABASE', 11 | CREATE_TABLE = 'CREATE_TABLE', 12 | DATA_LOCATION_ACCESS = 'DATA_LOCATION_ACCESS', 13 | DELETE = 'DELETE', 14 | DESCRIBE = 'DESCRIBE', 15 | DROP = 'DROP', 16 | INSERT = 'INSERT', 17 | SELECT = 'SELECT', 18 | ASSOCIATE = 'ASSOCIATE', 19 | CREATE_TABLE_READ_WRITE = 'CREATE_TABLE_READ_WRITE', 20 | } 21 | 22 | export enum LakeKind { 23 | DATA_PRODUCT = 'DATA_PRODUCT', 24 | CENTRAL_CATALOG = 'CENTRAL_CATALOG', 25 | CONSUMER = 'CONSUMER', 26 | DATA_PRODUCT_AND_CATALOG = 'DATA_PRODUCT_AND_CATALOG', 27 | } 28 | 29 | export enum DataTier { 30 | RAW = 'raw', 31 | REFINED = 'refined', 32 | TRUSTED = 'trusted', 33 | } 34 | 35 | export enum DataPipelineType { 36 | STREAM = 'stream', 37 | JDBC = 'jdbc', 38 | S3 = 's3' 39 | } 40 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # ~~ Generated by projen. To modify, edit .projenrc.js and run "npx projen". 2 | 3 | *.snap linguist-generated 4 | /.eslintrc.json linguist-generated 5 | /.gitattributes linguist-generated 6 | /.github/pull_request_template.md linguist-generated 7 | /.github/workflows/auto-approve.yml linguist-generated 8 | /.github/workflows/build.yml linguist-generated 9 | /.github/workflows/pull-request-lint.yml linguist-generated 10 | /.github/workflows/release.yml linguist-generated 11 | /.github/workflows/stale.yml linguist-generated 12 | /.github/workflows/upgrade-main.yml linguist-generated 13 | /.gitignore linguist-generated 14 | /.mergify.yml linguist-generated 15 | /.npmignore linguist-generated 16 | /.projen/** linguist-generated 17 | /.projen/deps.json linguist-generated 18 | /.projen/files.json linguist-generated 19 | /.projen/tasks.json linguist-generated 20 | /LICENSE linguist-generated 21 | /package.json linguist-generated 22 | /tsconfig.dev.json linguist-generated 23 | /yarn.lock linguist-generated -------------------------------------------------------------------------------- /tsconfig.dev.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "alwaysStrict": true, 4 | "declaration": true, 5 | "esModuleInterop": true, 6 | "experimentalDecorators": true, 7 | "inlineSourceMap": true, 8 | "inlineSources": true, 9 | "lib": [ 10 | "es2019" 11 | ], 12 | "module": "CommonJS", 13 | "noEmitOnError": false, 14 | "noFallthroughCasesInSwitch": true, 15 | "noImplicitAny": true, 16 | "noImplicitReturns": true, 17 | "noImplicitThis": true, 18 | "noUnusedLocals": true, 19 | "noUnusedParameters": true, 20 | "resolveJsonModule": true, 21 | "strict": true, 22 | "strictNullChecks": true, 23 | "strictPropertyInitialization": true, 24 | "stripInternal": true, 25 | "target": "ES2019" 26 | }, 27 | "include": [ 28 | ".projenrc.js", 29 | "src/**/*.ts", 30 | "test/**/*.ts" 31 | ], 32 | "exclude": [ 33 | "node_modules" 34 | ], 35 | "//": "~~ Generated by projen. To modify, edit .projenrc.js and run \"npx projen\"." 36 | } 37 | -------------------------------------------------------------------------------- /test/test-stack.ts: -------------------------------------------------------------------------------- 1 | import { Stack, StackProps } from 'aws-cdk-lib'; 2 | // import { NagSuppressions } from 'cdk-nag'; 3 | import { Construct } from 'constructs'; 4 | import { DataLake, DataProduct } from '../src'; 5 | import { LakeKind, Stage } from '../src/global/enums'; 6 | 7 | export interface TestStackProps extends StackProps { 8 | readonly stage: Stage; 9 | readonly dataProducts?: DataProduct[]; 10 | } 11 | 12 | export class CdkTestStack extends Stack { 13 | public readonly datalake: DataLake; 14 | 15 | constructor(scope: Construct, id: string, props: TestStackProps) { 16 | super(scope, id, props); 17 | // NagSuppressions.addStackSuppressions(this, [ 18 | // { 19 | // id: 'AwsSolutions-S1', 20 | // reason: 'Demonstrate a stack level suppression.', 21 | // }, 22 | // ]); 23 | 24 | this.datalake = new DataLake(this, 'datalake', { 25 | name: 'test-lake', 26 | stageName: props.stage, 27 | dataProducts: props.dataProducts, 28 | lakeKind: LakeKind.DATA_PRODUCT_AND_CATALOG, 29 | createAthenaWorkgroup: true, 30 | }); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2022 Randy Ridgley 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | -------------------------------------------------------------------------------- /test/lambda/iot-data-generator/index.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import os 4 | import boto3 5 | import random 6 | import datetime 7 | 8 | def get_random_data(): 9 | current_temperature = round(10 + random.random() * 170, 2) 10 | if current_temperature > 160: 11 | status = "ERROR" 12 | elif current_temperature > 140 or random.randrange(1, 100) > 80: 13 | status = random.choice(["WARNING","ERROR"]) 14 | else: 15 | status = "OK" 16 | return { 17 | 'sensor_id': random.randrange(1, 100), 18 | 'current_temperature': current_temperature, 19 | 'status': status, 20 | 'event_time': datetime.datetime.now().isoformat() 21 | } 22 | 23 | 24 | def send_data(stream_name, kinesis_client): 25 | for lp in range(1000): 26 | data = get_random_data() 27 | partition_key = str(data["sensor_id"]) 28 | print(data) 29 | kinesis_client.put_record( 30 | StreamName=stream_name, 31 | Data=json.dumps(data), 32 | PartitionKey=partition_key) 33 | 34 | 35 | def handler(event, context): 36 | kinesis_client = boto3.client('kinesis') 37 | stream = os.getenv('KINESIS_STREAM') 38 | send_data(stream, kinesis_client) -------------------------------------------------------------------------------- /src/personas/data-lake-creator.ts: -------------------------------------------------------------------------------- 1 | import { CfnOutput } from 'aws-cdk-lib'; 2 | import * as iam from 'aws-cdk-lib/aws-iam'; 3 | import { Construct } from 'constructs'; 4 | 5 | export interface DataLakeCreatorProperties { 6 | readonly name: string; 7 | } 8 | 9 | export class DataLakeCreator extends Construct { 10 | public readonly role: iam.IRole; 11 | 12 | constructor(scope: Construct, id: string, props: DataLakeCreatorProperties) { 13 | super(scope, id); 14 | 15 | this.role = new iam.Role(this, `AWSDBCreatorServiceRole-${props.name}`, { 16 | roleName: props.name, 17 | assumedBy: new iam.CompositePrincipal( 18 | new iam.ServicePrincipal('glue.amazonaws.com'), 19 | ), 20 | managedPolicies: [ 21 | iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSGlueServiceRole'), 22 | iam.ManagedPolicy.fromAwsManagedPolicyName('AWSLakeFormationDataAdmin'), 23 | iam.ManagedPolicy.fromAwsManagedPolicyName('AmazonS3FullAccess'), 24 | ], 25 | path: '/service-role/', 26 | }); 27 | 28 | this.role.addToPrincipalPolicy(new iam.PolicyStatement({ 29 | actions: [ 30 | 'lakeformation:GetDataAccess', 31 | ], 32 | resources: ['*'], 33 | })); 34 | new CfnOutput(this, 'DataLakeDatabaseCreatorRole', { value: this.role.roleName }); 35 | } 36 | } -------------------------------------------------------------------------------- /src/data-product.ts: -------------------------------------------------------------------------------- 1 | import { Duration, RemovalPolicy } from 'aws-cdk-lib'; 2 | import * as s3 from 'aws-cdk-lib/aws-s3'; 3 | import { IDependable } from 'constructs'; 4 | import { Pipeline } from './pipeline'; 5 | 6 | export interface DataProductProperties { 7 | readonly accountId: string; 8 | readonly dataCatalogAccountId?: string; 9 | readonly databaseName: string; 10 | readonly pipelines: Pipeline[]; 11 | readonly s3BucketProps?: s3.BucketProps; 12 | } 13 | 14 | export class DataProduct implements IDependable { 15 | readonly accountId: string; 16 | readonly dataCatalogAccountId?: string; 17 | readonly databaseName: string; 18 | readonly pipelines: Pipeline[]; 19 | readonly s3BucketProps?: s3.BucketProps; 20 | 21 | constructor(props: DataProductProperties) { 22 | this.accountId = props.accountId; 23 | this.dataCatalogAccountId = props.dataCatalogAccountId; 24 | this.databaseName = props.databaseName; 25 | this.pipelines = props.pipelines; 26 | 27 | if (props.s3BucketProps) { 28 | this.s3BucketProps = props.s3BucketProps; 29 | } else { 30 | this.s3BucketProps = { 31 | removalPolicy: RemovalPolicy.DESTROY, 32 | autoDeleteObjects: true, 33 | lifecycleRules: [ 34 | { 35 | expiration: Duration.days(30), 36 | }, 37 | ], 38 | }; 39 | } 40 | } 41 | } -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # ~~ Generated by projen. To modify, edit .projenrc.js and run "npx projen". 2 | !/.gitattributes 3 | !/.projen/tasks.json 4 | !/.projen/deps.json 5 | !/.projen/files.json 6 | !/.github/workflows/pull-request-lint.yml 7 | !/.github/workflows/auto-approve.yml 8 | !/.github/workflows/stale.yml 9 | !/package.json 10 | !/LICENSE 11 | !/.npmignore 12 | logs 13 | *.log 14 | npm-debug.log* 15 | yarn-debug.log* 16 | yarn-error.log* 17 | lerna-debug.log* 18 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json 19 | pids 20 | *.pid 21 | *.seed 22 | *.pid.lock 23 | lib-cov 24 | *.lcov 25 | .nyc_output 26 | build/Release 27 | node_modules/ 28 | jspm_packages/ 29 | *.tsbuildinfo 30 | .eslintcache 31 | *.tgz 32 | .yarn-integrity 33 | .cache 34 | src/emr-studio.ts 35 | src/emr-cluster-sc.ts 36 | src/etl/kda-studio.ts 37 | src/etl/glue-notebook.ts 38 | workflows/* 39 | *.DS_Store 40 | *cdk.context.json 41 | !/.projenrc.js 42 | /test-reports/ 43 | junit.xml 44 | /coverage/ 45 | !/.github/workflows/build.yml 46 | /dist/changelog.md 47 | /dist/version.txt 48 | !/.github/workflows/release.yml 49 | !/.mergify.yml 50 | !/.github/pull_request_template.md 51 | !/test/ 52 | !/tsconfig.dev.json 53 | !/src/ 54 | /lib 55 | /dist/ 56 | !/.eslintrc.json 57 | .jsii 58 | tsconfig.json 59 | !/API.md 60 | cdk.out 61 | cdk.context.json 62 | yarn-error.log 63 | .DS_Store 64 | coverage 65 | .metals 66 | !/.github/workflows/upgrade-main.yml 67 | -------------------------------------------------------------------------------- /src/data-lake-bucket.ts: -------------------------------------------------------------------------------- 1 | // import * as iam from 'aws-cdk-lib/aws-iam'; 2 | import * as s3 from 'aws-cdk-lib/aws-s3'; 3 | import { Construct } from 'constructs'; 4 | 5 | export interface DataLakeBucketProps { 6 | readonly bucketName: string; 7 | readonly dataCatalogAccountId: string; 8 | readonly logBucket: s3.Bucket; 9 | readonly crossAccount: boolean; 10 | readonly s3Properties: s3.BucketProps | undefined; 11 | } 12 | 13 | export class DataLakeBucket extends Construct { 14 | public readonly bucket: s3.Bucket; 15 | 16 | constructor(scope: Construct, id: string, props: DataLakeBucketProps) { 17 | super(scope, id); 18 | 19 | this.bucket = new s3.Bucket(this, 'datalake-bucket', { 20 | bucketName: props.bucketName, 21 | ...props.s3Properties, 22 | serverAccessLogsBucket: props.logBucket, 23 | }); 24 | 25 | // if (props.crossAccount) { 26 | // // TODO: revisit this bucket policy for cross account access. 27 | // this.bucket.addToResourcePolicy( 28 | // new iam.PolicyStatement({ 29 | // resources: [ 30 | // this.bucket.arnForObjects('*'), 31 | // this.bucket.bucketArn, 32 | // ], 33 | // actions: ['s3:List*', 's3:Get*'], 34 | // principals: [ 35 | // new iam.ArnPrincipal(`arn:aws:iam::${props.dataCatalogAccountId}:root`), 36 | // ], 37 | // }), 38 | // ); 39 | // } 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /.github/workflows/stale.yml: -------------------------------------------------------------------------------- 1 | # ~~ Generated by projen. To modify, edit .projenrc.js and run "npx projen". 2 | 3 | name: stale 4 | on: 5 | schedule: 6 | - cron: 0 1 * * * 7 | workflow_dispatch: {} 8 | jobs: 9 | stale: 10 | runs-on: ubuntu-latest 11 | permissions: 12 | issues: write 13 | pull-requests: write 14 | steps: 15 | - uses: actions/stale@v4 16 | with: 17 | days-before-stale: -1 18 | days-before-close: -1 19 | days-before-pr-stale: 14 20 | days-before-pr-close: 2 21 | stale-pr-message: This pull request is now marked as stale because it hasn't seen activity for a while. Add a comment or it will be closed soon. If you wish to exclude this issue from being marked as stale, add the "backlog" label. 22 | close-pr-message: Closing this pull request as it hasn't seen activity for a while. Please add a comment @mentioning a maintainer to reopen. If you wish to exclude this issue from being marked as stale, add the "backlog" label. 23 | stale-pr-label: stale 24 | exempt-pr-labels: backlog 25 | days-before-issue-stale: 60 26 | days-before-issue-close: 7 27 | stale-issue-message: This issue is now marked as stale because it hasn't seen activity for a while. Add a comment or it will be closed soon. If you wish to exclude this issue from being marked as stale, add the "backlog" label. 28 | close-issue-message: Closing this issue as it hasn't seen activity for a while. Please add a comment @mentioning a maintainer to reopen. If you wish to exclude this issue from being marked as stale, add the "backlog" label. 29 | stale-issue-label: stale 30 | exempt-issue-labels: backlog 31 | -------------------------------------------------------------------------------- /src/etl/glue-table.ts: -------------------------------------------------------------------------------- 1 | import { IResolvable } from 'aws-cdk-lib'; 2 | import * as glue from 'aws-cdk-lib/aws-glue'; 3 | import { Construct } from 'constructs'; 4 | 5 | export interface IGlueTableProperties { 6 | tableName: string; 7 | description: string; 8 | partitionKeys: Array | IResolvable; 9 | columns: Array | IResolvable; 10 | parameters: {[param: string]: any}; 11 | databaseName: string; 12 | s3Location: string; 13 | serializationLibrary: string; 14 | serdeParameters: {[param: string]: any}; 15 | inputFormat: string; 16 | outputFormat: string; 17 | catalogId: string; 18 | } 19 | 20 | export class GlueTable extends Construct { 21 | readonly table: glue.CfnTable; 22 | readonly tableName: string; 23 | 24 | constructor(scope: Construct, id: string, props: IGlueTableProperties) { 25 | super(scope, id); 26 | 27 | this.tableName = props.tableName; 28 | 29 | this.table = new glue.CfnTable(this, `${props.tableName}-glue-table`, { 30 | catalogId: props.catalogId, 31 | databaseName: props.databaseName, 32 | tableInput: { 33 | description: props.description, 34 | name: props.tableName, 35 | tableType: 'EXTERNAL_TABLE', 36 | partitionKeys: props.partitionKeys, 37 | parameters: { 38 | EXTERNAL: true, 39 | has_encrypted_data: false, 40 | ...props.parameters, 41 | }, 42 | storageDescriptor: { 43 | columns: props.columns, 44 | location: props.s3Location, 45 | serdeInfo: { 46 | serializationLibrary: props.serializationLibrary, 47 | parameters: { 48 | ...props.serdeParameters, 49 | }, 50 | }, 51 | inputFormat: props.inputFormat, 52 | outputFormat: props.outputFormat, 53 | parameters: { 54 | 'serialization.format': '1', 55 | }, 56 | }, 57 | }, 58 | }); 59 | } 60 | } -------------------------------------------------------------------------------- /lambda/download-data/index.py: -------------------------------------------------------------------------------- 1 | import boto3, json 2 | from botocore.exceptions import ClientError 3 | import json 4 | 5 | def handler(event, context): 6 | print(event) 7 | request_type = event["RequestType"] 8 | if request_type == "Create": 9 | return on_create(event) 10 | if request_type == "Update": 11 | return on_update(event) 12 | if request_type == "Delete": 13 | return on_delete(event) 14 | raise Exception("Invalid request type: %s" % request_type) 15 | 16 | def on_create(event): 17 | props = event["ResourceProperties"] 18 | print("create new resource with props %s" % props) 19 | dataSets = props["dataSets"] 20 | print(dataSets) 21 | s3 = boto3.resource('s3') 22 | 23 | for key in dataSets: 24 | # # if not check_tag_exists(key, catalogId): 25 | # print(key, '->', dataSets[key]) 26 | values = dataSets[key] 27 | 28 | for file in values["sourceKeys"]: 29 | print(file) 30 | copy_source = { 31 | 'Bucket': values["sourceBucketName"], 32 | 'Key': file 33 | } 34 | key = file.split("/")[-1] 35 | s3.meta.client.copy(copy_source, values["destinationBucketName"], values["destinationPrefix"] + key) 36 | 37 | stack_name = props["stackName"] 38 | output = {} 39 | print(output) 40 | 41 | # add your create code here... 42 | physical_id = stack_name 43 | return {"PhysicalResourceId": physical_id, "Data": output} 44 | 45 | def on_update(event): 46 | physical_id = event["PhysicalResourceId"] 47 | props = event["ResourceProperties"] 48 | print("update resource %s with props %s" % (physical_id, props)) 49 | stack_name = props["stackName"] 50 | region_name = props["regionName"] 51 | print("on_update describing %s from %s", stack_name, region_name) 52 | output = {} 53 | print(output) 54 | 55 | # add your create code here... 56 | physical_id = stack_name 57 | return {"PhysicalResourceId": physical_id, "Data": output} 58 | 59 | def on_delete(event): 60 | physical_id = event["PhysicalResourceId"] 61 | print("delete resource %s" % physical_id) 62 | # ... -------------------------------------------------------------------------------- /src/personas/data-lake-analyst.ts: -------------------------------------------------------------------------------- 1 | import { SecretValue } from 'aws-cdk-lib'; 2 | import * as iam from 'aws-cdk-lib/aws-iam'; 3 | import * as s3 from 'aws-cdk-lib/aws-s3'; 4 | import { Construct } from 'constructs'; 5 | 6 | export interface DataLakeAnalystProps { 7 | readonly name: string; 8 | readonly readAccessBuckets?: s3.IBucket[]; 9 | readonly writeAccessBuckets?: s3.IBucket[]; 10 | } 11 | 12 | export class DataLakeAnalyst extends Construct { 13 | public readonly user: iam.User; 14 | 15 | constructor(scope: Construct, id: string, props: DataLakeAnalystProps) { 16 | super(scope, id); 17 | 18 | this.user = new iam.User(this, 'DataAnalystUser', { 19 | userName: props.name, 20 | password: SecretValue.plainText(this.node.tryGetContext('initialPassword')), 21 | passwordResetRequired: true, 22 | managedPolicies: [ 23 | iam.ManagedPolicy.fromAwsManagedPolicyName('AmazonAthenaFullAccess'), 24 | iam.ManagedPolicy.fromAwsManagedPolicyName('IAMUserChangePassword'), 25 | ], 26 | }); 27 | 28 | // need to add access to Athena worgroup output S3 bucket 29 | this.user.attachInlinePolicy(new iam.Policy(this, 'DataAnalystPermissions', { 30 | policyName: 'DataAnalystPermissions', 31 | statements: [ 32 | new iam.PolicyStatement({ 33 | effect: iam.Effect.ALLOW, 34 | actions: [ 35 | 'lakeformation:GetDataAccess', 36 | 'glue:GetTable', 37 | 'glue:GetTables', 38 | 'glue:SearchTables', 39 | 'glue:GetDatabase', 40 | 'glue:GetDatabases', 41 | 'glue:GetPartitions', 42 | ], 43 | resources: ['*'], 44 | }), 45 | new iam.PolicyStatement({ 46 | effect: iam.Effect.ALLOW, 47 | actions: [ 48 | 'lambda:InvokeFunction', 49 | ], 50 | resources: ['*'], // can I shrink this down to only the Athena Lambda UDFs 51 | }), 52 | ], 53 | })); 54 | 55 | if (props.readAccessBuckets) { 56 | props.readAccessBuckets.forEach(bucket => { 57 | bucket.grantRead(this.user); 58 | }); 59 | } 60 | 61 | if (props.writeAccessBuckets) { 62 | props.writeAccessBuckets.forEach(bucket => { 63 | bucket.grantWrite(this.user); 64 | }); 65 | } 66 | } 67 | } -------------------------------------------------------------------------------- /test/datalake.test.ts: -------------------------------------------------------------------------------- 1 | import { Template } from 'aws-cdk-lib/assertions'; 2 | import { App, Aspects } from 'aws-cdk-lib/core'; 3 | import { AwsSolutionsChecks } from 'cdk-nag'; 4 | import { DataLake, Pipeline, DataProduct } from '../src'; 5 | import { Stage } from '../src/global/enums'; 6 | import * as pipelines from '../test/pipelines'; 7 | import { CdkTestStack } from './test-stack'; 8 | 9 | const stage = Stage.ALPHA; 10 | const dataProductAccountId = '123456789012'; 11 | 12 | const pipes: Array = [ 13 | pipelines.ReviewsPipeline(), 14 | //pipelines.IoTDataPipeline(stage), 15 | ]; 16 | 17 | const taxiPipes: Array = [ 18 | pipelines.YellowPipeline(), 19 | pipelines.GreenPipeline(), 20 | ]; 21 | 22 | const dataProducts: Array = [ 23 | { 24 | pipelines: pipes, 25 | accountId: dataProductAccountId, 26 | databaseName: 'reviews-product', 27 | }, 28 | { 29 | pipelines: taxiPipes, 30 | accountId: dataProductAccountId, 31 | databaseName: 'taxi-product', 32 | }, 33 | ]; 34 | 35 | describe('cdk-nag AwsSolutions Pack', () => { 36 | let stack: CdkTestStack; 37 | let app: App; 38 | let datalake: DataLake; 39 | 40 | beforeAll(() => { 41 | // GIVEN 42 | app = new App(); 43 | stack = new CdkTestStack(app, 'test', { 44 | dataProducts: dataProducts, 45 | stage: stage, 46 | }); 47 | datalake = stack.datalake; 48 | // WHEN 49 | Aspects.of(stack).add(new AwsSolutionsChecks({ verbose: true })); 50 | }); 51 | 52 | test('Check Resources', () => { 53 | expect(datalake.stageName).toMatch(Stage.ALPHA); 54 | // expect(stack).toHaveResource('AWS::S3::Bucket'); 55 | // expect(SynthUtils.toCloudFormation(stack)).toMatchSnapshot(); 56 | }); 57 | // THEN 58 | // test('No unsuppressed Warnings', () => { 59 | // const warnings = Annotations.fromStack(stack).findWarning( 60 | // '*', 61 | // Match.stringLikeRegexp('AwsSolutions-.*') 62 | // ); 63 | // if(warnings.length > 0) { 64 | // warnings.forEach(e => console.log(e['entry'])); 65 | // } 66 | // expect(warnings).toHaveLength(0); 67 | // }); 68 | 69 | // test('No unsuppressed Errors', () => { 70 | // const errors = Annotations.fromStack(stack).findError( 71 | // '*', 72 | // Match.stringLikeRegexp('AwsSolutions-.*') 73 | // ); 74 | // if(errors.length > 0) { 75 | // errors.forEach(e => console.log(e.id + '\n' + e['entry']['data'])); 76 | // } 77 | // expect(errors).toHaveLength(0); 78 | // }); 79 | 80 | it('Should match snapshot', () => { 81 | // When 82 | const t = Template.fromStack(stack); 83 | expect(t).toMatchSnapshot(); 84 | }); 85 | }); -------------------------------------------------------------------------------- /.github/workflows/codeql-analysis.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | # 7 | # ******** NOTE ******** 8 | # We have attempted to detect the languages in your repository. Please check 9 | # the `language` matrix defined below to confirm you have the correct set of 10 | # supported CodeQL languages. 11 | # 12 | name: "CodeQL" 13 | 14 | on: 15 | push: 16 | branches: [ main ] 17 | pull_request: 18 | # The branches below must be a subset of the branches above 19 | branches: [ main ] 20 | schedule: 21 | - cron: '32 23 * * 3' 22 | 23 | jobs: 24 | analyze: 25 | name: Analyze 26 | runs-on: ubuntu-latest 27 | permissions: 28 | actions: read 29 | contents: read 30 | security-events: write 31 | 32 | strategy: 33 | fail-fast: false 34 | matrix: 35 | language: [ 'javascript', 'python' ] 36 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ] 37 | # Learn more: 38 | # https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed 39 | 40 | steps: 41 | - name: Checkout repository 42 | uses: actions/checkout@v2 43 | 44 | # Initializes the CodeQL tools for scanning. 45 | - name: Initialize CodeQL 46 | uses: github/codeql-action/init@v1 47 | with: 48 | languages: ${{ matrix.language }} 49 | # If you wish to specify custom queries, you can do so here or in a config file. 50 | # By default, queries listed here will override any specified in a config file. 51 | # Prefix the list here with "+" to use these queries and those in the config file. 52 | # queries: ./path/to/local/query, your-org/your-repo/queries@main 53 | 54 | # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). 55 | # If this step fails, then you should remove it and run the build manually (see below) 56 | - name: Autobuild 57 | uses: github/codeql-action/autobuild@v1 58 | 59 | # ℹ️ Command-line programs to run using the OS shell. 60 | # 📚 https://git.io/JvXDl 61 | 62 | # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines 63 | # and modify them (or add more) to build your code if your project 64 | # uses a compiled language 65 | 66 | #- run: | 67 | # make bootstrap 68 | # make release 69 | 70 | - name: Perform CodeQL Analysis 71 | uses: github/codeql-action/analyze@v1 72 | -------------------------------------------------------------------------------- /.projen/deps.json: -------------------------------------------------------------------------------- 1 | { 2 | "dependencies": [ 3 | { 4 | "name": "@types/jest", 5 | "type": "build" 6 | }, 7 | { 8 | "name": "@types/node", 9 | "version": "^12", 10 | "type": "build" 11 | }, 12 | { 13 | "name": "@typescript-eslint/eslint-plugin", 14 | "version": "^5", 15 | "type": "build" 16 | }, 17 | { 18 | "name": "@typescript-eslint/parser", 19 | "version": "^5", 20 | "type": "build" 21 | }, 22 | { 23 | "name": "cdk-nag", 24 | "type": "build" 25 | }, 26 | { 27 | "name": "constructs", 28 | "version": "10.0.5", 29 | "type": "build" 30 | }, 31 | { 32 | "name": "eslint-import-resolver-node", 33 | "type": "build" 34 | }, 35 | { 36 | "name": "eslint-import-resolver-typescript", 37 | "type": "build" 38 | }, 39 | { 40 | "name": "eslint-plugin-import", 41 | "type": "build" 42 | }, 43 | { 44 | "name": "eslint", 45 | "version": "^8", 46 | "type": "build" 47 | }, 48 | { 49 | "name": "jest", 50 | "type": "build" 51 | }, 52 | { 53 | "name": "jest-junit", 54 | "version": "^13", 55 | "type": "build" 56 | }, 57 | { 58 | "name": "jsii", 59 | "type": "build" 60 | }, 61 | { 62 | "name": "jsii-diff", 63 | "type": "build" 64 | }, 65 | { 66 | "name": "jsii-docgen", 67 | "type": "build" 68 | }, 69 | { 70 | "name": "json-schema", 71 | "type": "build" 72 | }, 73 | { 74 | "name": "npm-check-updates", 75 | "version": "^12", 76 | "type": "build" 77 | }, 78 | { 79 | "name": "projen", 80 | "type": "build" 81 | }, 82 | { 83 | "name": "standard-version", 84 | "version": "^9", 85 | "type": "build" 86 | }, 87 | { 88 | "name": "ts-jest", 89 | "type": "build" 90 | }, 91 | { 92 | "name": "typescript", 93 | "type": "build" 94 | }, 95 | { 96 | "name": "@aws-cdk/aws-glue-alpha", 97 | "type": "peer" 98 | }, 99 | { 100 | "name": "aws-cdk-lib", 101 | "version": "^2.13.0", 102 | "type": "peer" 103 | }, 104 | { 105 | "name": "constructs", 106 | "version": "^10.0.5", 107 | "type": "peer" 108 | }, 109 | { 110 | "name": "@aws-cdk/aws-glue-alpha", 111 | "type": "runtime" 112 | }, 113 | { 114 | "name": "@aws-cdk/aws-lambda-python-alpha", 115 | "type": "runtime" 116 | }, 117 | { 118 | "name": "aws-cdk-lib", 119 | "type": "runtime" 120 | } 121 | ], 122 | "//": "~~ Generated by projen. To modify, edit .projenrc.js and run \"npx projen\"." 123 | } 124 | -------------------------------------------------------------------------------- /test/code/iot_data/streaming_convert_to_parquet.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from awsglue.transforms import * 3 | from awsglue.utils import getResolvedOptions 4 | from pyspark.context import SparkContext 5 | from awsglue.context import GlueContext 6 | from awsglue.job import Job 7 | from pyspark.sql import DataFrame, Row 8 | import datetime 9 | from awsglue import DynamicFrame 10 | 11 | ## @params: [JOB_NAME] 12 | args = getResolvedOptions(sys.argv, ['JOB_NAME', 'SOURCE_DATABASE', 'SOURCE_TABLE', 'STREAM_BATCH_TIME_SECS', 'DESTINATION_DATABASE', 'DESTINATION_TABLE', 'DESTINATION_BUCKET']) 13 | 14 | sc = SparkContext() 15 | glueContext = GlueContext(sc) 16 | spark = glueContext.spark_session 17 | job = Job(glueContext) 18 | job.init(args['JOB_NAME'], args) 19 | ## @type: DataSource 20 | ## @args: [database = "local", additionalOptions = {"inferSchema":"true","startingPosition":"TRIM_HORIZON"}, stream_batch_time = 100 seconds, stream_type = Kinesis, table_name = "iot-data-kinesis"] 21 | ## @return: DataSource0 22 | ## @inputs: [] 23 | data_frame_DataSource0 = glueContext.create_data_frame.from_catalog(database = args['SOURCE_DATABASE'], table_name = args['SOURCE_TABLE'], transformation_ctx = "DataSource0", additional_options = {"inferSchema":"true","startingPosition":"TRIM_HORIZON"}) 24 | def processBatch(data_frame, batchId): 25 | if (data_frame.count() > 0): 26 | DataSource0 = DynamicFrame.fromDF(data_frame, glueContext, "from_data_frame") 27 | ## @type: DataSink 28 | ## @args: [path = "s3://PATH/parquet", connection_type = "s3", catalog_database_name = "local", updateBehavior = "UPDATE_IN_DATABASE", stream_batch_time = "100 seconds", format = "glueparquet", enableUpdateCatalog = true, catalog_table_name = "p_iot_data", transformation_ctx = "DataSink0"] 29 | ## @return: DataSink0 30 | ## @inputs: [frame = Transform0] 31 | now = datetime.datetime.now() 32 | year = now.year 33 | month = now.month 34 | day = now.day 35 | hour = now.hour 36 | minute = now.minute 37 | path_DataSink0 = "s3://" + args['DESTINATION_BUCKET'] + "/parquet" + "/ingest_year=" + "{:0>4}".format(str(year)) + "/ingest_month=" + "{:0>2}".format(str(month)) + "/ingest_day=" + "{:0>2}".format(str(day)) + "/ingest_hour=" + "{:0>2}".format(str(hour)) + "/" 38 | DataSink0 = glueContext.getSink(path = path_DataSink0, connection_type = "s3", updateBehavior = "UPDATE_IN_DATABASE", stream_batch_time = "100 seconds", format = "glueparquet", enableUpdateCatalog = True, transformation_ctx = "DataSink0") 39 | DataSink0.setCatalogInfo(catalogDatabase = args['DESTINATION_DATABASE'],catalogTableName = args['DESTINATION_TABLE']) 40 | DataSink0.setFormat("glueparquet") 41 | DataSink0.writeFrame(DataSource0) 42 | 43 | glueContext.forEachBatch(frame = data_frame_DataSource0, batch_function = processBatch, options = {"windowSize": args['STREAM_BATCH_TIME_SECS'], "checkpointLocation": args["TempDir"] + "/checkpoint/"}) 44 | job.commit() -------------------------------------------------------------------------------- /.projenrc.js: -------------------------------------------------------------------------------- 1 | const { awscdk } = require('projen'); 2 | 3 | const project = new awscdk.AwsCdkConstructLibrary({ 4 | author: 'Randy Ridgley', 5 | authorAddress: 'randy.ridgley@gmail.com', 6 | description: 'AWS CDK Constructs that can be used to create datalakes/meshes and more', 7 | cdkVersion: '2.13.0', 8 | defaultReleaseBranch: 'main', 9 | name: '@randyridgley/cdk-datalake-constructs', 10 | repositoryUrl: 'https://github.com/randyridgley/cdk-datalake-constructs.git', 11 | jsiiFqn: 'projen.AwsCdkConstructLibrary', 12 | licensed: true, 13 | license: 'MIT', 14 | devenv: true, 15 | deps: [ 16 | 'aws-cdk-lib', 17 | '@aws-cdk/aws-glue-alpha', 18 | '@aws-cdk/aws-lambda-python-alpha', 19 | ], 20 | devDeps: [ 21 | 'cdk-nag', 22 | ], 23 | peerDeps: [ 24 | '@aws-cdk/aws-glue-alpha', 25 | ], 26 | gitignore: [ 27 | 'src/emr-studio.ts', 28 | 'src/emr-cluster-sc.ts', 29 | 'src/etl/kda-studio.ts', 30 | 'src/etl/glue-notebook.ts', 31 | 'workflows/*', 32 | '*.DS_Store', 33 | '*cdk.context.json', 34 | ], 35 | releaseEveryCommit: true, 36 | release: true, 37 | releaseWorkflowName: 'release', 38 | autoApproveOptions: { 39 | secret: 'GITHUB_TOKEN', 40 | allowedUsernames: ['randyridgley'], 41 | }, 42 | depsUpgrade: true, 43 | context: { 44 | '@aws-cdk/core:newStyleStackSynthesis': 'true', 45 | }, 46 | autoApproveUpgrades: true, 47 | eslint: true, 48 | mergify: true, 49 | antitamper: true, 50 | buildWorkflow: true, 51 | npmTokenSecret: 'NPM_TOKEN', 52 | releaseToNpm: true, 53 | publishToPypi: { 54 | distName: 'cdk-datalake-constructs', 55 | module: 'cdk_datalake_constructs', 56 | }, 57 | publishToMaven: { 58 | mavenEndpoint: 'https://s01.oss.sonatype.org', 59 | javaPackage: 'io.github.randyridgley.cdk.datalake.constructs', 60 | mavenGroupId: 'io.github.randyridgley', 61 | mavenArtifactId: 'cdk-datalake-constructs', 62 | }, 63 | // publishToGo: { 64 | // gitUserName: 'randyridgley', 65 | // gitUserEmail: 'randy.ridgley@gmail.com', 66 | // moduleName: 'github.com/randyridgley/cdk-datalake-constructs', 67 | // }, 68 | // publishToNuget: { 69 | // dotNetNamespace: 'Cdk.Datalake.Constructs', 70 | // packageId: 'Cdk.Datalake.Constructs', 71 | // }, 72 | catalog: { 73 | announce: true, 74 | twitter: 'randyridgley', 75 | }, 76 | keywords: ['aws', 77 | 'aws-cdk', 78 | 'cdk-construct', 79 | 'cdk', 80 | 'datalake', 81 | 'datamesh', 82 | 'lakeformation', 83 | 'glue'], 84 | tsconfig: { 85 | }, 86 | }); 87 | 88 | // project.tasks.tryFind('package').prependExec('go env -w GOSUMDB=off'); 89 | 90 | const common_exclude = [ 91 | 'cdk.out', 'cdk.context.json', 'yarn-error.log', '.DS_Store', 'coverage', '.metals', 92 | ]; 93 | project.npmignore.exclude(...common_exclude, 'maven_release*', 'examples*'); 94 | project.gitignore.exclude(...common_exclude); 95 | 96 | const openCoverage = project.addTask('coverage'); 97 | openCoverage.exec('npx projen test && open coverage/lcov-report/index.html'); 98 | 99 | project.synth(); -------------------------------------------------------------------------------- /src/workflows/scheduled-job-workflow.ts: -------------------------------------------------------------------------------- 1 | import { Duration } from 'aws-cdk-lib'; 2 | import * as events from 'aws-cdk-lib/aws-events'; 3 | import * as targets from 'aws-cdk-lib/aws-events-targets'; 4 | import * as iam from 'aws-cdk-lib/aws-iam'; 5 | import * as logs from 'aws-cdk-lib/aws-logs'; 6 | import * as sfn from 'aws-cdk-lib/aws-stepfunctions'; 7 | import * as tasks from 'aws-cdk-lib/aws-stepfunctions-tasks';import { Construct } from 'constructs'; 8 | import { Stage } from '../global/enums'; 9 | import { buildEventRuleName, buildRoleName } from '../utils'; 10 | 11 | export interface ScheduledJobWorkflowProps { 12 | readonly schedule: events.Schedule; 13 | readonly name: string; 14 | readonly stageName: Stage; 15 | readonly jobName: string; 16 | readonly jobArguments: {[key: string]: any}; 17 | readonly jobTimeout: Duration; 18 | } 19 | 20 | export class ScheduledJobWorkflow extends Construct { 21 | public readonly rule:events.Rule; 22 | public readonly stateMachine: sfn.StateMachine; 23 | 24 | constructor(scope: Construct, id: string, props: ScheduledJobWorkflowProps) { 25 | super(scope, id); 26 | 27 | const stateMachineRole = new iam.Role(scope, 'StateMachineJobExecutionRole', { 28 | roleName: buildRoleName({ 29 | name: props.name, 30 | resourceUse: 'datalake', 31 | stage: props.stageName, 32 | }), 33 | assumedBy: new iam.ServicePrincipal('states'), 34 | managedPolicies: [ 35 | iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSGlueServiceRole'), 36 | iam.ManagedPolicy.fromAwsManagedPolicyName('AmazonS3FullAccess'), 37 | ], 38 | }); 39 | 40 | const jobState = new tasks.GlueStartJobRun(this, 'GlueExecutionStep', { 41 | glueJobName: props.jobName, 42 | integrationPattern: sfn.IntegrationPattern.RUN_JOB, 43 | arguments: sfn.TaskInput.fromObject(props.jobArguments), 44 | timeout: props.jobTimeout, 45 | resultPath: '$.jobOutput', 46 | }); 47 | 48 | const stateMachineDefinition = sfn.Chain.start(jobState).toSingleState('Run Job pipeline', { 49 | comment: 'Container for glue job states', 50 | }).addCatch(this.getStateMachineFailureHandlerState(), {}); 51 | 52 | this.stateMachine = new sfn.StateMachine(this, 'GlueStateMachine', { 53 | definition: stateMachineDefinition, 54 | logs: { 55 | destination: new logs.LogGroup(this, `DataLakeWorkflow-${props.name}`, { 56 | retention: logs.RetentionDays.SIX_MONTHS, 57 | }), 58 | includeExecutionData: true, 59 | level: sfn.LogLevel.ERROR, 60 | }, 61 | tracingEnabled: true, 62 | role: stateMachineRole, 63 | }); 64 | 65 | this.rule = new events.Rule(this, 'Rule', { 66 | schedule: props.schedule, 67 | ruleName: buildEventRuleName({ 68 | name: props.name, 69 | resourceUse: 'datalake', 70 | stage: props.stageName, 71 | }), 72 | }); 73 | this.rule.addTarget(new targets.SfnStateMachine(this.stateMachine)); 74 | } 75 | 76 | private getStateMachineFailureHandlerState(): sfn.Fail { 77 | return new sfn.Fail(this, 'Handle failures', { 78 | comment: 'Handle failures for entire state machine', 79 | }); 80 | } 81 | } -------------------------------------------------------------------------------- /.github/workflows/upgrade-main.yml: -------------------------------------------------------------------------------- 1 | # ~~ Generated by projen. To modify, edit .projenrc.js and run "npx projen". 2 | 3 | name: upgrade-main 4 | on: 5 | workflow_dispatch: {} 6 | schedule: 7 | - cron: 0 0 * * * 8 | jobs: 9 | upgrade: 10 | name: Upgrade 11 | runs-on: ubuntu-latest 12 | permissions: 13 | contents: read 14 | outputs: 15 | patch_created: ${{ steps.create_patch.outputs.patch_created }} 16 | steps: 17 | - name: Checkout 18 | uses: actions/checkout@v2 19 | with: 20 | ref: main 21 | - name: Install dependencies 22 | run: yarn install --check-files --frozen-lockfile 23 | - name: Upgrade dependencies 24 | run: npx projen upgrade 25 | - id: create_patch 26 | name: Find mutations 27 | run: |- 28 | git add . 29 | git diff --staged --patch --exit-code > .repo.patch || echo "::set-output name=patch_created::true" 30 | - if: steps.create_patch.outputs.patch_created 31 | name: Upload patch 32 | uses: actions/upload-artifact@v2 33 | with: 34 | name: .repo.patch 35 | path: .repo.patch 36 | container: 37 | image: jsii/superchain:1-buster-slim-node14 38 | pr: 39 | name: Create Pull Request 40 | needs: upgrade 41 | runs-on: ubuntu-latest 42 | permissions: 43 | contents: write 44 | pull-requests: write 45 | if: ${{ needs.upgrade.outputs.patch_created }} 46 | steps: 47 | - name: Checkout 48 | uses: actions/checkout@v2 49 | with: 50 | token: ${{ secrets.PROJEN_GITHUB_TOKEN }} 51 | ref: main 52 | - name: Download patch 53 | uses: actions/download-artifact@v2 54 | with: 55 | name: .repo.patch 56 | path: ${{ runner.temp }} 57 | - name: Apply patch 58 | run: '[ -s ${{ runner.temp }}/.repo.patch ] && git apply ${{ runner.temp }}/.repo.patch || echo "Empty patch. Skipping."' 59 | - name: Set git identity 60 | run: |- 61 | git config user.name "github-actions" 62 | git config user.email "github-actions@github.com" 63 | - name: Create Pull Request 64 | id: create-pr 65 | uses: peter-evans/create-pull-request@v3 66 | with: 67 | token: ${{ secrets.PROJEN_GITHUB_TOKEN }} 68 | commit-message: |- 69 | chore(deps): upgrade dependencies 70 | 71 | Upgrades project dependencies. See details in [workflow run]. 72 | 73 | [Workflow Run]: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} 74 | 75 | ------ 76 | 77 | *Automatically created by projen via the "upgrade-main" workflow* 78 | branch: github-actions/upgrade-main 79 | title: "chore(deps): upgrade dependencies" 80 | labels: auto-approve 81 | body: |- 82 | Upgrades project dependencies. See details in [workflow run]. 83 | 84 | [Workflow Run]: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} 85 | 86 | ------ 87 | 88 | *Automatically created by projen via the "upgrade-main" workflow* 89 | author: github-actions 90 | committer: github-actions 91 | signoff: true 92 | -------------------------------------------------------------------------------- /src/utils.ts: -------------------------------------------------------------------------------- 1 | import { Asset } from 'aws-cdk-lib/aws-s3-assets'; 2 | import { Construct } from 'constructs'; 3 | 4 | export function notUndefined(obj: T | undefined): obj is T { 5 | return obj !== undefined; 6 | } 7 | 8 | export interface NameBuilderParameters { 9 | readonly name: string; 10 | readonly resourceUse?: string; 11 | readonly stage?: string; 12 | readonly region?: string; 13 | readonly accountId?: string; 14 | } 15 | 16 | export function buildUniqueName(props: NameBuilderParameters, maxLength: number): string { 17 | const baseString = [props.name, props.resourceUse, props.stage, props.region, props.accountId] 18 | .filter(r => r != undefined) 19 | .join('-') 20 | .substring(0, maxLength); 21 | 22 | return baseString.toLowerCase(); 23 | } 24 | 25 | export function buildS3BucketName(props: NameBuilderParameters): string { 26 | return buildUniqueName(props, 63).replace(/[^a-z0-9\-.]/g, '-'); 27 | } 28 | 29 | export function buildKinesisApplicationName(props: NameBuilderParameters): string { 30 | return buildUniqueName(props, 128); 31 | } 32 | 33 | export function buildKinesisStreamName(props: NameBuilderParameters): string { 34 | return buildUniqueName(props, 128); 35 | } 36 | 37 | export function buildQueueName(props: NameBuilderParameters): string { 38 | return buildUniqueName(props, 80); 39 | } 40 | 41 | export function buildLambdaFunctionName(props: NameBuilderParameters): string { 42 | return buildUniqueName(props, 64); 43 | } 44 | 45 | export function buildGlueJobName(props: NameBuilderParameters): string { 46 | return buildUniqueName(props, 60); 47 | } 48 | 49 | export function buildGlueCrawlerName(props: NameBuilderParameters): string { 50 | return buildUniqueName(props, 60); 51 | } 52 | 53 | export function buildGlueEndpointName(props: NameBuilderParameters): string { 54 | return buildUniqueName(props, 60); 55 | } 56 | 57 | export function buildStateMachineName(props: NameBuilderParameters): string { 58 | return buildUniqueName(props, 80); 59 | } 60 | 61 | export function buildEventRuleName(props: NameBuilderParameters): string { 62 | return buildUniqueName(props, 64); 63 | } 64 | 65 | export function buildDynamoDBTableName(props: NameBuilderParameters): string { 66 | return buildUniqueName(props, 255); 67 | } 68 | 69 | export function buildS3BucketArn(props: NameBuilderParameters): string { 70 | const bucketName = buildS3BucketName(props); 71 | return `arn:aws:s3:::${bucketName}`; 72 | } 73 | 74 | export function buildRoleName(props: NameBuilderParameters): string { 75 | return buildUniqueName(props, 64); 76 | } 77 | 78 | export function buildRoleArn(props: NameBuilderParameters): string { 79 | const roleName = buildRoleName(props); 80 | return `arn:aws:iam::${props.accountId}:role/${roleName}`; 81 | } 82 | 83 | export function sanitizeStackName(name: string) { 84 | return name.replace(/[^a-zA-Z0-9]/g, '-'); 85 | } 86 | 87 | export function buildPolicyStatementId(name: string, service: string, accessType: string): string { 88 | return `${name}-${service}-${accessType}-access`; 89 | } 90 | 91 | export function toS3Path(asset: Asset): string { 92 | return `s3://${asset.s3BucketName}/${asset.s3ObjectKey}`; 93 | } 94 | 95 | export function packageAsset (scope: Construct, id: string, projectRelativePath: string): Asset { 96 | return new Asset(scope, id, { path: projectRelativePath }); 97 | }; -------------------------------------------------------------------------------- /lambda/enable-hybrid-catalog/index.py: -------------------------------------------------------------------------------- 1 | import boto3, json 2 | from botocore.exceptions import ClientError 3 | import json 4 | 5 | def handler(event, context): 6 | print(event) 7 | request_type = event["RequestType"] 8 | if request_type == "Create": 9 | return on_create(event) 10 | if request_type == "Update": 11 | return on_update(event) 12 | if request_type == "Delete": 13 | return on_delete(event) 14 | raise Exception("Invalid request type: %s" % request_type) 15 | 16 | def on_create(event): 17 | props = event["ResourceProperties"] 18 | print("create new resource with props %s" % props) 19 | consumerIds = props["consumerAccountIds"] 20 | producerId = props["producerAccountId"] 21 | region = props["regionName"] 22 | 23 | glue = boto3.client('glue') 24 | 25 | policy = { 26 | "Version": "2012-10-17", 27 | "Statement": [ 28 | { 29 | "Sid": "AllowConsumerTagCatalogAccess", 30 | "Effect": "Allow", 31 | "Action": [ 32 | "glue:*" 33 | ], 34 | "Principal": { 35 | "AWS": consumerIds 36 | }, 37 | "Resource": [ 38 | f"arn:aws:glue:{region}:{producerId}:catalog", 39 | f"arn:aws:glue:{region}:{producerId}:database/*", 40 | f"arn:aws:glue:{region}:{producerId}:table/*/*" 41 | ], 42 | "Condition" : { 43 | "Bool" : { 44 | "glue:EvaluatedByLakeFormationTags" : "true" 45 | } 46 | } 47 | }, 48 | { 49 | "Sid": "AllowRamAccess", 50 | "Effect": "Allow", 51 | "Action": [ 52 | "glue:ShareResource" 53 | ], 54 | "Principal": {"Service": [ 55 | "ram.amazonaws.com" 56 | ]}, 57 | "Resource": [ 58 | f"arn:aws:glue:{region}:{producerId}:table/*/*", 59 | f"arn:aws:glue:{region}:{producerId}:database/*", 60 | f"arn:aws:glue:{region}:{producerId}:catalog" 61 | ] 62 | } 63 | ] 64 | } 65 | 66 | policy_str = json.dumps(policy) 67 | glue.put_resource_policy(PolicyInJson=policy_str, EnableHybrid='TRUE') # Hybrid needed if using policy alongside cross-account Lake Formation. 68 | 69 | stack_name = props["stackName"] 70 | output = {} 71 | print(output) 72 | 73 | # add your create code here... 74 | physical_id = stack_name 75 | return {"PhysicalResourceId": physical_id, "Data": output} 76 | 77 | def on_update(event): 78 | physical_id = event["PhysicalResourceId"] 79 | props = event["ResourceProperties"] 80 | print("update resource %s with props %s" % (physical_id, props)) 81 | stack_name = props["stackName"] 82 | region_name = props["regionName"] 83 | print("on_update describing %s from %s", stack_name, region_name) 84 | output = {} 85 | print(output) 86 | 87 | # add your create code here... 88 | physical_id = stack_name 89 | return {"PhysicalResourceId": physical_id, "Data": output} 90 | 91 | def on_delete(event): 92 | physical_id = event["PhysicalResourceId"] 93 | print("delete resource %s" % physical_id) 94 | # ... -------------------------------------------------------------------------------- /lambda/create-tags-handler/index.py: -------------------------------------------------------------------------------- 1 | import boto3, json 2 | from botocore.exceptions import ClientError 3 | 4 | def handler(event, context): 5 | print(event) 6 | request_type = event["RequestType"] 7 | if request_type == "Create": 8 | return on_create(event) 9 | if request_type == "Update": 10 | return on_update(event) 11 | if request_type == "Delete": 12 | return on_delete(event) 13 | raise Exception("Invalid request type: %s" % request_type) 14 | 15 | def check_tag_exists(tag, catalog_id): 16 | session = boto3.session.Session() 17 | lf_client = session.client('lakeformation') 18 | try: 19 | response = lf_client.get_lf_tag(TagKey=tag, CatalogId=catalog_id) 20 | return response 21 | except ClientError as e: 22 | raise Exception( "boto3 client error in check_tag_exists: " + e.__str__()) 23 | except Exception as e: 24 | raise Exception( "Unexpected error in check_tag_exists: " + e.__str__()) 25 | 26 | def create_tag(tag, values, catalog_id): 27 | session = boto3.session.Session() 28 | lf_client = session.client('lakeformation') 29 | try: 30 | response = lf_client.create_lf_tag(TagKey=tag, TagValues=values, CatalogId=catalog_id) 31 | return response 32 | except ClientError as e: 33 | raise Exception( "boto3 client error in create_tag: " + e.__str__()) 34 | except Exception as e: 35 | raise Exception( "Unexpected error in create_tag: " + e.__str__()) 36 | 37 | def delete_tag(tag, catalog_id): 38 | session = boto3.session.Session() 39 | lf_client = session.client('lakeformation') 40 | try: 41 | response = lf_client.delete_lf_tag(TagKey=tag, CatalogId=catalog_id) 42 | return response 43 | except ClientError as e: 44 | raise Exception( "boto3 client error in delete_tag: " + e.__str__()) 45 | except Exception as e: 46 | raise Exception( "Unexpected error in delete_tag: " + e.__str__()) 47 | 48 | def on_create(event): 49 | props = event["ResourceProperties"] 50 | print("create new resource with props %s" % props) 51 | tags = props["policyTags"] 52 | catalogId = props["catalogId"] 53 | 54 | for key in tags: 55 | # if not check_tag_exists(key, catalogId): 56 | print(key, '->', tags[key]) 57 | values = tags[key].split (",") 58 | create_tag(key, values, catalogId) 59 | 60 | stack_name = props["stackName"] 61 | output = {} 62 | print(output) 63 | 64 | # add your create code here... 65 | physical_id = stack_name 66 | return {"PhysicalResourceId": physical_id, "Data": output} 67 | 68 | def on_update(event): 69 | physical_id = event["PhysicalResourceId"] 70 | props = event["ResourceProperties"] 71 | print("update resource %s with props %s" % (physical_id, props)) 72 | stack_name = props["stackName"] 73 | region_name = props["regionName"] 74 | print("on_update describing %s from %s", stack_name, region_name) 75 | output = {} 76 | print(output) 77 | 78 | # add your create code here... 79 | physical_id = stack_name 80 | return {"PhysicalResourceId": physical_id, "Data": output} 81 | 82 | def on_delete(event): 83 | physical_id = event["PhysicalResourceId"] 84 | props = event["ResourceProperties"] 85 | print("delete resource %s" % physical_id) 86 | tags = props["policyTags"] 87 | catalogId = props["catalogId"] 88 | 89 | for key in tags: 90 | # if not check_tag_exists(key, catalogId): 91 | print(key, '->', tags[key]) 92 | delete_tag(key, catalogId) 93 | # ... -------------------------------------------------------------------------------- /test/pipelines/iot-data.ts: -------------------------------------------------------------------------------- 1 | import * as path from 'path'; 2 | import * as events from 'aws-cdk-lib/aws-events'; 3 | import * as lambda from 'aws-cdk-lib/aws-lambda'; 4 | import { Aws, Duration } from 'aws-cdk-lib/core'; 5 | import { GlueJobType, GlueVersion, GlueWorkerType } from '../../src/etl/glue-job'; 6 | import { DataPipelineType, DataTier } from '../../src/global/enums'; 7 | import { Pipeline } from '../../src/pipeline'; 8 | import { buildEventRuleName, buildGlueJobName, buildKinesisStreamName, buildLambdaFunctionName, buildRoleName } from '../../src/utils'; 9 | 10 | export function IoTDataPipeline(stage: string) { 11 | const databaseName: string = 'source-lake'; 12 | const streamName: string = buildKinesisStreamName({ 13 | name: 'iot-data', 14 | resourceUse: 'stream', 15 | stage: stage, 16 | }); 17 | 18 | const code = lambda.Code.fromAsset(path.join(__dirname, '../lambda/iot-data-generator')); 19 | 20 | return new Pipeline({ 21 | type: DataPipelineType.STREAM, 22 | name: 'iot-data', 23 | destinationPrefix: 'iot-data/', 24 | dataDropTier: DataTier.RAW, 25 | streamProperties: { 26 | streamName: streamName, 27 | lambdaDataGenerator: { 28 | code: code, 29 | handler: 'index.handler', 30 | timeout: Duration.seconds(300), 31 | runtime: lambda.Runtime.PYTHON_3_7, 32 | functionName: buildLambdaFunctionName({ 33 | name: 'iot-data-generator', 34 | resourceUse: 'datalake', 35 | stage: stage, 36 | }), 37 | schedule: events.Schedule.expression('rate(1 minute)'), 38 | ruleName: buildEventRuleName({ 39 | name: 'iot-generator', 40 | resourceUse: 'datalake', 41 | stage: stage, 42 | }), 43 | }, 44 | }, 45 | job: { 46 | jobScript: './test/code/iot_data/streaming_convert_to_parquet.py', 47 | jobType: GlueJobType.GLUE_STREAMING, 48 | name: buildGlueJobName({ 49 | name: 'iot_data_streaming', 50 | resourceUse: 'datalake', 51 | stage: stage, 52 | }), 53 | workerType: GlueWorkerType.G1_X, 54 | description: 'Glue ETL Streaming job to convert JSON to Parquet', 55 | glueVersion: GlueVersion.V_2, 56 | jobArgs: { 57 | '--class': 'GlueApp', 58 | '--job-bookmark-option': 'job-bookmark-disable', 59 | '--SOURCE_DATABASE': databaseName, 60 | '--SOURCE_TABLE': 'r_iot_data', 61 | '--STREAM_BATCH_TIME_SECS': '100 seconds', 62 | '--DESTINATION_DATABASE': databaseName, 63 | '--DESTINATION_TABLE': 'p_iot_data', 64 | }, 65 | destinationLocation: DataTier.RAW, 66 | maxCapacity: 2, 67 | maxConcurrentRuns: 1, 68 | maxRetries: 3, 69 | numberOfWorkers: 2, 70 | roleName: buildRoleName({ 71 | name: 'glue-streaming', 72 | resourceUse: 'datalake', 73 | stage: stage, 74 | }), 75 | timeout: 2880, 76 | }, 77 | table: { 78 | catalogId: Aws.ACCOUNT_ID, 79 | columns: [ 80 | { 81 | name: 'sensor_id', 82 | type: 'int', 83 | }, 84 | { 85 | name: 'current_temperature', 86 | type: 'double', 87 | }, 88 | { 89 | name: 'status', 90 | type: 'string', 91 | }, 92 | { 93 | name: 'event_time', 94 | type: 'string', 95 | }, 96 | ], 97 | inputFormat: 'org.apache.hadoop.mapred.TextInputFormat', 98 | outputFormat: 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', 99 | description: 'Raw IOT Sensor data', 100 | parameters: { 101 | streamARN: `arn:aws:kinesis:${Aws.REGION}:${Aws.ACCOUNT_ID}:stream/${streamName}`, 102 | typeOfData: 'kinesis', 103 | classification: 'json', 104 | }, 105 | serdeParameters: { 106 | paths: '', 107 | }, 108 | serializationLibrary: 'org.openx.data.jsonserde.JsonSerDe', 109 | tableName: 'r_iot_data', 110 | partitionKeys: [], 111 | }, 112 | }); 113 | } 114 | -------------------------------------------------------------------------------- /src/personas/data-lake-admin.ts: -------------------------------------------------------------------------------- 1 | import { Stack } from 'aws-cdk-lib'; 2 | import * as iam from 'aws-cdk-lib/aws-iam'; 3 | import { Construct } from 'constructs'; 4 | 5 | export interface DataLakeAdministratorProps { 6 | readonly name: string; 7 | } 8 | 9 | export class DataLakeAdministrator extends Construct { 10 | public readonly role: iam.IRole; 11 | 12 | constructor(scope: Construct, id: string, props: DataLakeAdministratorProps) { 13 | super(scope, id); 14 | 15 | const accountId = Stack.of(this).account; 16 | 17 | this.role = new iam.Role(this, 'datalake-administrator-role', { 18 | roleName: props.name, 19 | assumedBy: new iam.CompositePrincipal( 20 | new iam.ServicePrincipal('lakeformation.amazonaws.com'), 21 | new iam.ServicePrincipal('lambda.amazonaws.com'), 22 | new iam.ServicePrincipal('sagemaker.amazonaws.com'), 23 | ), 24 | managedPolicies: [ 25 | iam.ManagedPolicy.fromAwsManagedPolicyName('AWSLakeFormationDataAdmin'), 26 | iam.ManagedPolicy.fromAwsManagedPolicyName('AWSGlueConsoleFullAccess'), 27 | iam.ManagedPolicy.fromAwsManagedPolicyName('CloudWatchLogsReadOnlyAccess'), 28 | iam.ManagedPolicy.fromAwsManagedPolicyName('AWSLakeFormationCrossAccountManager'), 29 | iam.ManagedPolicy.fromAwsManagedPolicyName('AmazonAthenaFullAccess'), 30 | iam.ManagedPolicy.fromAwsManagedPolicyName('AmazonSageMakerFullAccess'), 31 | ], 32 | }); 33 | 34 | this.role.attachInlinePolicy(new iam.Policy(this, 'datalake-administrator-basic', { 35 | statements: [ 36 | new iam.PolicyStatement({ 37 | effect: iam.Effect.ALLOW, 38 | actions: [ 39 | 'iam:CreateServiceLinkedRole', 40 | ], 41 | resources: ['*'], 42 | conditions: { 43 | StringEquals: { 44 | 'iam:AWSServiceName': 'lakeformation.amazonaws.com', 45 | }, 46 | }, 47 | }), 48 | new iam.PolicyStatement({ 49 | effect: iam.Effect.ALLOW, 50 | actions: [ 51 | 'iam:PutRolePolicy', 52 | ], 53 | resources: [`arn:aws:iam::${accountId}:role/aws-service-role/lakeformation.amazonaws.com/AWSServiceRoleForLakeFormationDataAccess`], 54 | }), 55 | ], 56 | })); 57 | 58 | this.role.attachInlinePolicy(new iam.Policy(this, 'datalake-administrator-lambda-writeCW-logs', { 59 | statements: [ 60 | new iam.PolicyStatement({ 61 | resources: ['*'], 62 | actions: [ 63 | 'logs:CreateLogGroup', 64 | 'logs:CreateLogStream', 65 | 'logs:PutLogEvents', 66 | ], 67 | effect: iam.Effect.ALLOW, 68 | sid: 'AllowLogging', 69 | }), 70 | ], 71 | })); 72 | 73 | this.role.attachInlinePolicy(new iam.Policy(this, 'datalake-administrator-TBAC', { 74 | statements: [ 75 | new iam.PolicyStatement({ 76 | effect: iam.Effect.ALLOW, 77 | actions: [ 78 | 'lakeformation:AddLFTagsToResource', 79 | 'lakeformation:RemoveLFTagsFromResource', 80 | 'lakeformation:GetResourceLFTags', 81 | 'lakeformation:ListLFTags', 82 | 'lakeformation:CreateLFTag', 83 | 'lakeformation:GetLFTag', 84 | 'lakeformation:UpdateLFTag', 85 | 'lakeformation:DeleteLFTag', 86 | 'lakeformation:SearchTablesByLFTags', 87 | 'lakeformation:SearchDatabasesByLFTags', 88 | ], 89 | resources: ['*'], 90 | }), 91 | ], 92 | })); 93 | 94 | this.role.attachInlinePolicy(new iam.Policy(this, 'datalake-administrator-cross-account', { 95 | statements: [ 96 | new iam.PolicyStatement({ 97 | effect: iam.Effect.ALLOW, 98 | actions: [ 99 | 'ram:AcceptResourceShareInvitation', 100 | 'ram:RejectResourceShareInvitation', 101 | 'ec2:DescribeAvailabilityZones', 102 | 'ram:EnableSharingWithAwsOrganization', 103 | ], 104 | resources: ['*'], 105 | }), 106 | ], 107 | })); 108 | } 109 | } -------------------------------------------------------------------------------- /src/pipeline.ts: -------------------------------------------------------------------------------- 1 | import * as events from 'aws-cdk-lib/aws-events'; 2 | import * as glue from 'aws-cdk-lib/aws-glue'; 3 | import * as lambda from 'aws-cdk-lib/aws-lambda'; 4 | import * as s3 from 'aws-cdk-lib/aws-s3'; 5 | import * as cdk from 'aws-cdk-lib/core'; 6 | 7 | import { GlueJobType, GlueVersion, GlueWorkerType } from './etl/glue-job'; 8 | import { DataPipelineType, DataTier } from './global/enums'; 9 | 10 | export interface JDBCProperties { 11 | readonly jdbc: string; 12 | readonly username: string; 13 | readonly password: string; 14 | } 15 | 16 | export interface StreamProperties { 17 | readonly streamName: string; 18 | readonly lambdaDataGenerator?: LambdaDataGeneratorProperties; 19 | } 20 | 21 | export interface S3Properties { 22 | readonly sourceBucketName: string; 23 | readonly sourceKeys: string[]; 24 | } 25 | 26 | export interface TableProps { 27 | readonly tableName: string; 28 | readonly description: string; 29 | readonly partitionKeys: Array | cdk.IResolvable; 30 | readonly columns: Array | cdk.IResolvable; 31 | readonly parameters: {[param: string]: any}; 32 | readonly serializationLibrary: string; 33 | readonly serdeParameters: {[param: string]: any}; 34 | readonly inputFormat: string; 35 | readonly outputFormat: string; 36 | readonly catalogId: string; 37 | } 38 | 39 | export interface JobProperties { 40 | readonly name: string; 41 | readonly roleName?: string; 42 | readonly description?: string; 43 | readonly readAccessBuckets?: s3.IBucket[]; 44 | readonly writeAccessBuckets?: s3.IBucket[]; 45 | readonly glueVersion?: GlueVersion; 46 | readonly workerType: GlueWorkerType; 47 | readonly numberOfWorkers?: number; 48 | readonly maxCapacity?: number; 49 | readonly maxRetries?: number; 50 | readonly maxConcurrentRuns?: number; 51 | readonly jobScript: string; 52 | readonly jobArgs?: { [key: string]: string }; 53 | readonly timeout?: number; 54 | readonly jobType: GlueJobType; 55 | readonly destinationLocation?: DataTier; 56 | } 57 | 58 | export interface DataStreamProperties { 59 | readonly name: string; 60 | readonly destinationBucketName: string; 61 | readonly destinationPrefix: string; 62 | readonly dataCatalogOwner: DataCatalogOwner; 63 | readonly streamName: string; 64 | readonly lambdaDataGenerator: LambdaDataGeneratorProperties; 65 | } 66 | 67 | export interface LambdaDataGeneratorProperties { 68 | readonly code: lambda.Code; 69 | readonly handler: string; 70 | readonly timeout: cdk.Duration; 71 | readonly runtime: lambda.Runtime; 72 | readonly functionName: string; 73 | readonly schedule: events.Schedule; 74 | readonly ruleName: string; 75 | } 76 | 77 | export interface PipelineProperties { 78 | readonly type: DataPipelineType; 79 | readonly name: string; 80 | readonly destinationPrefix: string; 81 | readonly dataDropTier: DataTier; 82 | readonly s3Properties?: S3Properties; 83 | readonly streamProperties?: StreamProperties; 84 | readonly jdbcProperties?: JDBCProperties; 85 | readonly table? : TableProps; 86 | readonly job?: JobProperties; 87 | readonly tiers?: DataTier[]; 88 | } 89 | 90 | export interface DataCatalogOwner { 91 | readonly accountId: string; 92 | } 93 | 94 | export class Pipeline { 95 | public readonly type: DataPipelineType; 96 | public readonly name: string; 97 | public readonly destinationPrefix: string; 98 | public readonly dataSetDropTier: DataTier; 99 | public readonly s3Properties?: S3Properties; 100 | public readonly streamProperties?: StreamProperties; 101 | public readonly jdbcProperties?: JDBCProperties; 102 | public readonly table? : TableProps; 103 | public readonly job?: JobProperties; 104 | public readonly tiers: DataTier[]; 105 | 106 | constructor(props: PipelineProperties) { 107 | this.type = props.type; 108 | this.name = props.name; 109 | this.dataSetDropTier = props.dataDropTier; 110 | this.destinationPrefix = props.destinationPrefix; 111 | this.jdbcProperties = props.jdbcProperties ? props.jdbcProperties : undefined; 112 | this.job = props.job ? props.job : undefined; 113 | this.s3Properties = props.s3Properties ? props.s3Properties : undefined; 114 | this.streamProperties = props.streamProperties ? props.streamProperties : undefined; 115 | this.table = props.table ? props.table : undefined; 116 | this.tiers = props.tiers ? props.tiers : [DataTier.RAW]; 117 | } 118 | } -------------------------------------------------------------------------------- /src/data-streams/kinesis-stream.ts: -------------------------------------------------------------------------------- 1 | import { Resource } from 'aws-cdk-lib'; 2 | import * as cloudwatch from 'aws-cdk-lib/aws-cloudwatch'; 3 | import * as kinesis from 'aws-cdk-lib/aws-kinesis'; 4 | import { Construct } from 'constructs'; 5 | 6 | export class KinesisStream extends Resource { 7 | public readonly stream: kinesis.Stream; 8 | 9 | constructor(parent: Construct, name: string, props: kinesis.StreamProps) { 10 | super(parent, name); 11 | this.stream = new kinesis.Stream(this, 'kinesis-stream', props); 12 | } 13 | 14 | public metric(metricName: string, props?: cloudwatch.MetricOptions): cloudwatch.Metric { 15 | return new cloudwatch.Metric({ 16 | namespace: 'AWS/Kinesis', 17 | metricName, 18 | dimensionsMap: { 19 | StreamName: this.stream.streamName, 20 | }, 21 | ...props, 22 | }); 23 | } 24 | 25 | public metricGetRecordsBytes(props?: cloudwatch.MetricOptions): cloudwatch.Metric { 26 | return this.metric('GetRecords.Bytes', props); 27 | } 28 | 29 | public metricGetRecordsIteratorAgeMilliseconds(props?: cloudwatch.MetricOptions): cloudwatch.Metric { 30 | return this.metric('GetRecords.IteratorAgeMilliseconds', props); 31 | } 32 | 33 | public metricGetRecordsLatency(props?: cloudwatch.MetricOptions): cloudwatch.Metric { 34 | return this.metric('GetRecords.Latency', props); 35 | } 36 | 37 | public metricGetRecordsRecords(props?: cloudwatch.MetricOptions): cloudwatch.Metric { 38 | return this.metric('GetRecords.Records', props); 39 | } 40 | 41 | public metricGetRecordsSuccess(props?: cloudwatch.MetricOptions): cloudwatch.Metric { 42 | return this.metric('GetRecords.Success', props); 43 | } 44 | 45 | public metricIncomingBytes(props?: cloudwatch.MetricOptions): cloudwatch.Metric { 46 | return this.metric('IncomingBytes', props); 47 | } 48 | 49 | public metricIncomingRecords(props?: cloudwatch.MetricOptions): cloudwatch.Metric { 50 | return this.metric('IncomingRecords', props); 51 | } 52 | 53 | public metricPutRecordBytes(props?: cloudwatch.MetricOptions): cloudwatch.Metric { 54 | return this.metric('PutRecord.Bytes', props); 55 | } 56 | 57 | public metricPutRecordLatency(props?: cloudwatch.MetricOptions): cloudwatch.Metric { 58 | return this.metric('PutRecord.Latency', props); 59 | } 60 | 61 | public metricPutRecordSuccess(props?: cloudwatch.MetricOptions): cloudwatch.Metric { 62 | return this.metric('PutRecord.Success', props); 63 | } 64 | 65 | public metricPutRecordsBytes(props?: cloudwatch.MetricOptions): cloudwatch.Metric { 66 | return this.metric('PutRecords.Bytes', props); 67 | } 68 | 69 | public metricPutRecordsLatency(props?: cloudwatch.MetricOptions): cloudwatch.Metric { 70 | return this.metric('PutRecords.Latency', props); 71 | } 72 | 73 | public metricPutRecordsRecords(props?: cloudwatch.MetricOptions): cloudwatch.Metric { 74 | return this.metric('PutRecords.Records', props); 75 | } 76 | 77 | public metricPutRecordsSuccess(props?: cloudwatch.MetricOptions): cloudwatch.Metric { 78 | return this.metric('PutRecords.Success', props); 79 | } 80 | 81 | public metricReadProvisionedThroughputExceeded(props?: cloudwatch.MetricOptions): cloudwatch.Metric { 82 | return this.metric('ReadProvisionedThroughputExceeded', props); 83 | } 84 | 85 | public metricWriteProvisionedThroughputExceeded(props?: cloudwatch.MetricOptions): cloudwatch.Metric { 86 | return this.metric('WriteProvisionedThroughputExceeded', props); 87 | } 88 | 89 | public metricSubscribeToShardRateExceeded(props?: cloudwatch.MetricOptions): cloudwatch.Metric { 90 | return this.metric('SubscribeToShard.RateExceeded', props); 91 | } 92 | 93 | public metricSubscribeToShardSuccess(props?: cloudwatch.MetricOptions): cloudwatch.Metric { 94 | return this.metric('SubscribeToShard.Success', props); 95 | } 96 | 97 | public metricSubscribeToShardEventBytes(props?: cloudwatch.MetricOptions): cloudwatch.Metric { 98 | return this.metric('SubscribeToShardEvent.Bytes', props); 99 | } 100 | 101 | public metricSubscribeToShardEventMillisBehindLatest(props?: cloudwatch.MetricOptions): cloudwatch.Metric { 102 | return this.metric('SubscribeToShardEvent.MillisBehindLatest', props); 103 | } 104 | 105 | public metricSubscribeToShardEventRecords(props?: cloudwatch.MetricOptions): cloudwatch.Metric { 106 | return this.metric('SubscribeToShardEvent.Records', props); 107 | } 108 | 109 | public metricSubscribeToShardEventSuccess(props?: cloudwatch.MetricOptions): cloudwatch.Metric { 110 | return this.metric('SubscribeToShardEvent.Success', props); 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /src/etl/glue-crawler.ts: -------------------------------------------------------------------------------- 1 | import { CfnResource } from 'aws-cdk-lib'; 2 | import * as cloudwatch from 'aws-cdk-lib/aws-cloudwatch'; 3 | import * as events from 'aws-cdk-lib/aws-events'; 4 | import * as glue from 'aws-cdk-lib/aws-glue'; 5 | import * as iam from 'aws-cdk-lib/aws-iam'; 6 | import * as lf from 'aws-cdk-lib/aws-lakeformation'; 7 | import { Construct } from 'constructs'; 8 | import { Permissions } from '../global/enums'; 9 | 10 | export interface IGlueCrawlerProperties { 11 | name: string; 12 | databaseName: string; 13 | roleName?: string; 14 | trigger?: glue.CfnTrigger; 15 | bucketName: string; 16 | bucketPrefix?: string; 17 | lfS3Resource: CfnResource; 18 | } 19 | 20 | export class GlueCrawler extends Construct { 21 | public readonly crawler: glue.CfnCrawler; 22 | public readonly role: iam.IRole; 23 | public readonly metricSuccessRule: events.Rule; 24 | public readonly metricFailureRule: events.Rule; 25 | 26 | constructor(scope: Construct, id: string, props: IGlueCrawlerProperties) { 27 | super(scope, id); 28 | 29 | this.role = this.createGlueCrawlerRole(props); 30 | this.metricSuccessRule = this.crawlerRule('SuccessRule', props.name, 'Succeeded'); 31 | this.metricFailureRule = this.crawlerRule('FailureRule', props.name, 'Failed'); 32 | let s3TargetPaths = new Array(); 33 | 34 | s3TargetPaths.push({ 35 | path: `s3://${props.bucketName}/${props.bucketPrefix}`, 36 | }); 37 | 38 | this.crawler = new glue.CfnCrawler(this, `data-lake-crawler-${props.name}-`, { 39 | name: props.name, 40 | role: this.role.roleArn, 41 | databaseName: props.databaseName, 42 | targets: { 43 | s3Targets: s3TargetPaths, 44 | }, 45 | }); 46 | 47 | const dbPerms = new lf.CfnPermissions(this, 'glue-role-database-permission', { 48 | dataLakePrincipal: { 49 | dataLakePrincipalIdentifier: this.role.roleArn, 50 | }, 51 | resource: { 52 | databaseResource: { 53 | name: props.databaseName, 54 | }, 55 | }, 56 | permissions: [ 57 | 'CREATE_TABLE', 58 | 'DESCRIBE', 59 | ], 60 | }); 61 | 62 | const s3perms = new lf.CfnPermissions(this, 'datalake-creator-permission', { 63 | dataLakePrincipal: { 64 | dataLakePrincipalIdentifier: this.role.roleArn, 65 | }, 66 | resource: { 67 | dataLocationResource: { 68 | s3Resource: `arn:aws:s3:::${props.bucketName}`, 69 | }, 70 | }, 71 | permissions: [ 72 | Permissions.DATA_LOCATION_ACCESS, 73 | ], 74 | }); 75 | 76 | s3perms.addDependsOn(props.lfS3Resource); 77 | this.crawler.addDependsOn(dbPerms); 78 | this.crawler.addDependsOn(s3perms); 79 | } 80 | 81 | metricSuccess(props?: cloudwatch.MetricOptions): cloudwatch.Metric { 82 | return this.ruleMetric(this.metricSuccessRule, props); 83 | } 84 | 85 | metricFailure(props?: cloudwatch.MetricOptions): cloudwatch.Metric { 86 | return this.ruleMetric(this.metricFailureRule, props); 87 | } 88 | 89 | private ruleMetric({ ruleName }: events.Rule, props?: cloudwatch.MetricOptions): cloudwatch.Metric { 90 | return new cloudwatch.Metric({ 91 | namespace: 'AWS/Events', 92 | metricName: 'TriggeredRules', 93 | dimensionsMap: { RuleName: ruleName }, 94 | statistic: cloudwatch.Statistic.SUM, 95 | ...props, 96 | }).attachTo(this); 97 | } 98 | 99 | private crawlerRule(id: string, crawlerName: string, ...states: string[]): events.Rule { 100 | return new events.Rule(this, id, { 101 | ruleName: crawlerName + states.join(''), 102 | description: `Event triggered when Glue Crawler ${crawlerName} is in ${states.join(' or ')} state(s)`, 103 | eventPattern: { 104 | source: ['aws.glue'], 105 | detailType: ['Glue Crawler State Change'], 106 | detail: { 107 | state: states, 108 | crawlerName: [crawlerName], 109 | }, 110 | }, 111 | }); 112 | } 113 | 114 | private createGlueCrawlerRole(props: IGlueCrawlerProperties): iam.Role { 115 | const role = new iam.Role(this, 'Role', { 116 | roleName: props.roleName || props.name + 'Role', 117 | assumedBy: new iam.ServicePrincipal('glue.amazonaws.com'), 118 | managedPolicies: [ 119 | iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSGlueServiceRole'), 120 | iam.ManagedPolicy.fromAwsManagedPolicyName('AmazonS3FullAccess'), // slim this down if possible 121 | ], 122 | }); 123 | role.addToPolicy(new iam.PolicyStatement({ actions: ['lakeformation:GetDataAccess'], resources: ['*'] })); 124 | return role; 125 | } 126 | } -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@randyridgley/cdk-datalake-constructs", 3 | "description": "AWS CDK Constructs that can be used to create datalakes/meshes and more", 4 | "repository": { 5 | "type": "git", 6 | "url": "https://github.com/randyridgley/cdk-datalake-constructs.git" 7 | }, 8 | "scripts": { 9 | "build": "npx projen build", 10 | "bump": "npx projen bump", 11 | "clobber": "npx projen clobber", 12 | "compat": "npx projen compat", 13 | "compile": "npx projen compile", 14 | "coverage": "npx projen coverage", 15 | "default": "npx projen default", 16 | "docgen": "npx projen docgen", 17 | "eject": "npx projen eject", 18 | "eslint": "npx projen eslint", 19 | "package": "npx projen package", 20 | "package-all": "npx projen package-all", 21 | "package:java": "npx projen package:java", 22 | "package:js": "npx projen package:js", 23 | "package:python": "npx projen package:python", 24 | "post-compile": "npx projen post-compile", 25 | "post-upgrade": "npx projen post-upgrade", 26 | "pre-compile": "npx projen pre-compile", 27 | "release": "npx projen release", 28 | "test": "npx projen test", 29 | "test:update": "npx projen test:update", 30 | "test:watch": "npx projen test:watch", 31 | "unbump": "npx projen unbump", 32 | "upgrade": "npx projen upgrade", 33 | "upgrade-projen": "npx projen upgrade-projen", 34 | "watch": "npx projen watch", 35 | "projen": "npx projen" 36 | }, 37 | "author": { 38 | "name": "Randy Ridgley", 39 | "email": "randy.ridgley@gmail.com", 40 | "organization": false 41 | }, 42 | "devDependencies": { 43 | "@types/jest": "^26.0.24", 44 | "@types/node": "^12", 45 | "@typescript-eslint/eslint-plugin": "^5", 46 | "@typescript-eslint/parser": "^5", 47 | "cdk-nag": "^2.18.44", 48 | "constructs": "10.0.5", 49 | "eslint": "^8", 50 | "eslint-import-resolver-node": "^0.3.6", 51 | "eslint-import-resolver-typescript": "^2.7.1", 52 | "eslint-plugin-import": "^2.26.0", 53 | "jest": "^27.5.1", 54 | "jest-junit": "^13", 55 | "jsii": "^1.70.0", 56 | "jsii-diff": "^1.70.0", 57 | "jsii-docgen": "^3.8.31", 58 | "json-schema": "^0.4.0", 59 | "npm-check-updates": "^12", 60 | "projen": "^0.52.44", 61 | "standard-version": "^9", 62 | "ts-jest": "^27.1.5", 63 | "typescript": "^4.8.4" 64 | }, 65 | "peerDependencies": { 66 | "@aws-cdk/aws-glue-alpha": "^2.47.0-alpha.0", 67 | "aws-cdk-lib": "^2.13.0", 68 | "constructs": "^10.0.5" 69 | }, 70 | "dependencies": { 71 | "@aws-cdk/aws-glue-alpha": "^2.47.0-alpha.0", 72 | "@aws-cdk/aws-lambda-python-alpha": "^2.47.0-alpha.0", 73 | "aws-cdk-lib": "^2.47.0" 74 | }, 75 | "keywords": [ 76 | "aws", 77 | "aws-cdk", 78 | "cdk", 79 | "cdk-construct", 80 | "datalake", 81 | "datamesh", 82 | "glue", 83 | "lakeformation" 84 | ], 85 | "main": "lib/index.js", 86 | "license": "MIT", 87 | "version": "0.0.0", 88 | "jest": { 89 | "testMatch": [ 90 | "/src/**/__tests__/**/*.ts?(x)", 91 | "/(test|src)/**/?(*.)+(spec|test).ts?(x)" 92 | ], 93 | "clearMocks": true, 94 | "collectCoverage": true, 95 | "coverageReporters": [ 96 | "json", 97 | "lcov", 98 | "clover", 99 | "cobertura", 100 | "text" 101 | ], 102 | "coverageDirectory": "coverage", 103 | "coveragePathIgnorePatterns": [ 104 | "/node_modules/" 105 | ], 106 | "testPathIgnorePatterns": [ 107 | "/node_modules/" 108 | ], 109 | "watchPathIgnorePatterns": [ 110 | "/node_modules/" 111 | ], 112 | "reporters": [ 113 | "default", 114 | [ 115 | "jest-junit", 116 | { 117 | "outputDirectory": "test-reports" 118 | } 119 | ] 120 | ], 121 | "preset": "ts-jest", 122 | "globals": { 123 | "ts-jest": { 124 | "tsconfig": "tsconfig.dev.json" 125 | } 126 | } 127 | }, 128 | "types": "lib/index.d.ts", 129 | "stability": "stable", 130 | "jsii": { 131 | "outdir": "dist", 132 | "targets": { 133 | "java": { 134 | "package": "io.github.randyridgley.cdk.datalake.constructs", 135 | "maven": { 136 | "groupId": "io.github.randyridgley", 137 | "artifactId": "cdk-datalake-constructs" 138 | } 139 | }, 140 | "python": { 141 | "distName": "cdk-datalake-constructs", 142 | "module": "cdk_datalake_constructs" 143 | } 144 | }, 145 | "tsc": { 146 | "outDir": "lib", 147 | "rootDir": "src" 148 | } 149 | }, 150 | "awscdkio": { 151 | "twitter": "randyridgley", 152 | "announce": true 153 | }, 154 | "//": "~~ Generated by projen. To modify, edit .projenrc.js and run \"npx projen\"." 155 | } -------------------------------------------------------------------------------- /.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": { 3 | "jest": true, 4 | "node": true 5 | }, 6 | "root": true, 7 | "plugins": [ 8 | "@typescript-eslint", 9 | "import" 10 | ], 11 | "parser": "@typescript-eslint/parser", 12 | "parserOptions": { 13 | "ecmaVersion": 2018, 14 | "sourceType": "module", 15 | "project": "./tsconfig.dev.json" 16 | }, 17 | "extends": [ 18 | "plugin:import/typescript" 19 | ], 20 | "settings": { 21 | "import/parsers": { 22 | "@typescript-eslint/parser": [ 23 | ".ts", 24 | ".tsx" 25 | ] 26 | }, 27 | "import/resolver": { 28 | "node": {}, 29 | "typescript": { 30 | "project": "./tsconfig.dev.json", 31 | "alwaysTryTypes": true 32 | } 33 | } 34 | }, 35 | "ignorePatterns": [ 36 | "*.js", 37 | "!.projenrc.js", 38 | "*.d.ts", 39 | "node_modules/", 40 | "*.generated.ts", 41 | "coverage" 42 | ], 43 | "rules": { 44 | "indent": [ 45 | "off" 46 | ], 47 | "@typescript-eslint/indent": [ 48 | "error", 49 | 2 50 | ], 51 | "quotes": [ 52 | "error", 53 | "single", 54 | { 55 | "avoidEscape": true 56 | } 57 | ], 58 | "comma-dangle": [ 59 | "error", 60 | "always-multiline" 61 | ], 62 | "comma-spacing": [ 63 | "error", 64 | { 65 | "before": false, 66 | "after": true 67 | } 68 | ], 69 | "no-multi-spaces": [ 70 | "error", 71 | { 72 | "ignoreEOLComments": false 73 | } 74 | ], 75 | "array-bracket-spacing": [ 76 | "error", 77 | "never" 78 | ], 79 | "array-bracket-newline": [ 80 | "error", 81 | "consistent" 82 | ], 83 | "object-curly-spacing": [ 84 | "error", 85 | "always" 86 | ], 87 | "object-curly-newline": [ 88 | "error", 89 | { 90 | "multiline": true, 91 | "consistent": true 92 | } 93 | ], 94 | "object-property-newline": [ 95 | "error", 96 | { 97 | "allowAllPropertiesOnSameLine": true 98 | } 99 | ], 100 | "keyword-spacing": [ 101 | "error" 102 | ], 103 | "brace-style": [ 104 | "error", 105 | "1tbs", 106 | { 107 | "allowSingleLine": true 108 | } 109 | ], 110 | "space-before-blocks": [ 111 | "error" 112 | ], 113 | "curly": [ 114 | "error", 115 | "multi-line", 116 | "consistent" 117 | ], 118 | "@typescript-eslint/member-delimiter-style": [ 119 | "error" 120 | ], 121 | "semi": [ 122 | "error", 123 | "always" 124 | ], 125 | "max-len": [ 126 | "error", 127 | { 128 | "code": 150, 129 | "ignoreUrls": true, 130 | "ignoreStrings": true, 131 | "ignoreTemplateLiterals": true, 132 | "ignoreComments": true, 133 | "ignoreRegExpLiterals": true 134 | } 135 | ], 136 | "quote-props": [ 137 | "error", 138 | "consistent-as-needed" 139 | ], 140 | "@typescript-eslint/no-require-imports": [ 141 | "error" 142 | ], 143 | "import/no-extraneous-dependencies": [ 144 | "error", 145 | { 146 | "devDependencies": [ 147 | "**/test/**", 148 | "**/build-tools/**" 149 | ], 150 | "optionalDependencies": false, 151 | "peerDependencies": true 152 | } 153 | ], 154 | "import/no-unresolved": [ 155 | "error" 156 | ], 157 | "import/order": [ 158 | "warn", 159 | { 160 | "groups": [ 161 | "builtin", 162 | "external" 163 | ], 164 | "alphabetize": { 165 | "order": "asc", 166 | "caseInsensitive": true 167 | } 168 | } 169 | ], 170 | "no-duplicate-imports": [ 171 | "error" 172 | ], 173 | "no-shadow": [ 174 | "off" 175 | ], 176 | "@typescript-eslint/no-shadow": [ 177 | "error" 178 | ], 179 | "key-spacing": [ 180 | "error" 181 | ], 182 | "no-multiple-empty-lines": [ 183 | "error" 184 | ], 185 | "@typescript-eslint/no-floating-promises": [ 186 | "error" 187 | ], 188 | "no-return-await": [ 189 | "off" 190 | ], 191 | "@typescript-eslint/return-await": [ 192 | "error" 193 | ], 194 | "no-trailing-spaces": [ 195 | "error" 196 | ], 197 | "dot-notation": [ 198 | "error" 199 | ], 200 | "no-bitwise": [ 201 | "error" 202 | ], 203 | "@typescript-eslint/member-ordering": [ 204 | "error", 205 | { 206 | "default": [ 207 | "public-static-field", 208 | "public-static-method", 209 | "protected-static-field", 210 | "protected-static-method", 211 | "private-static-field", 212 | "private-static-method", 213 | "field", 214 | "constructor", 215 | "method" 216 | ] 217 | } 218 | ] 219 | }, 220 | "overrides": [ 221 | { 222 | "files": [ 223 | ".projenrc.js" 224 | ], 225 | "rules": { 226 | "@typescript-eslint/no-require-imports": "off", 227 | "import/no-extraneous-dependencies": "off" 228 | } 229 | } 230 | ] 231 | } 232 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | # ~~ Generated by projen. To modify, edit .projenrc.js and run "npx projen". 2 | 3 | name: build 4 | on: 5 | pull_request: {} 6 | workflow_dispatch: {} 7 | jobs: 8 | build: 9 | runs-on: ubuntu-latest 10 | permissions: 11 | contents: write 12 | outputs: 13 | self_mutation_happened: ${{ steps.self_mutation.outputs.self_mutation_happened }} 14 | env: 15 | CI: "true" 16 | steps: 17 | - name: Checkout 18 | uses: actions/checkout@v2 19 | with: 20 | ref: ${{ github.event.pull_request.head.ref }} 21 | repository: ${{ github.event.pull_request.head.repo.full_name }} 22 | - name: Install dependencies 23 | run: yarn install --check-files 24 | - name: build 25 | run: npx projen build 26 | - id: self_mutation 27 | name: Find mutations 28 | run: |- 29 | git add . 30 | git diff --staged --patch --exit-code > .repo.patch || echo "::set-output name=self_mutation_happened::true" 31 | - if: steps.self_mutation.outputs.self_mutation_happened 32 | name: Upload patch 33 | uses: actions/upload-artifact@v2 34 | with: 35 | name: .repo.patch 36 | path: .repo.patch 37 | - name: Fail build on mutation 38 | if: steps.self_mutation.outputs.self_mutation_happened 39 | run: |- 40 | echo "::error::Files were changed during build (see build log). If this was triggered from a fork, you will need to update your branch." 41 | cat .repo.patch 42 | exit 1 43 | - name: Upload artifact 44 | uses: actions/upload-artifact@v2.1.1 45 | with: 46 | name: build-artifact 47 | path: dist 48 | container: 49 | image: jsii/superchain:1-buster-slim-node14 50 | self-mutation: 51 | needs: build 52 | runs-on: ubuntu-latest 53 | permissions: 54 | contents: write 55 | if: always() && needs.build.outputs.self_mutation_happened && !(github.event.pull_request.head.repo.full_name != github.repository) 56 | steps: 57 | - name: Checkout 58 | uses: actions/checkout@v2 59 | with: 60 | token: ${{ secrets.PROJEN_GITHUB_TOKEN }} 61 | ref: ${{ github.event.pull_request.head.ref }} 62 | repository: ${{ github.event.pull_request.head.repo.full_name }} 63 | - name: Download patch 64 | uses: actions/download-artifact@v2 65 | with: 66 | name: .repo.patch 67 | path: ${{ runner.temp }} 68 | - name: Apply patch 69 | run: '[ -s ${{ runner.temp }}/.repo.patch ] && git apply ${{ runner.temp }}/.repo.patch || echo "Empty patch. Skipping."' 70 | - name: Set git identity 71 | run: |- 72 | git config user.name "github-actions" 73 | git config user.email "github-actions@github.com" 74 | - name: Push changes 75 | run: |2- 76 | git add . 77 | git commit -s -m "chore: self mutation" 78 | git push origin HEAD:${{ github.event.pull_request.head.ref }} 79 | package-js: 80 | needs: build 81 | runs-on: ubuntu-latest 82 | permissions: {} 83 | if: "! needs.build.outputs.self_mutation_happened" 84 | steps: 85 | - uses: actions/setup-node@v2 86 | with: 87 | node-version: 14.x 88 | - name: Download build artifacts 89 | uses: actions/download-artifact@v2 90 | with: 91 | name: build-artifact 92 | path: dist 93 | - name: Prepare Repository 94 | run: mv dist .repo 95 | - name: Install Dependencies 96 | run: cd .repo && yarn install --check-files --frozen-lockfile 97 | - name: Create js artifact 98 | run: cd .repo && npx projen package:js 99 | - name: Collect js Artifact 100 | run: mv .repo/dist dist 101 | package-java: 102 | needs: build 103 | runs-on: ubuntu-latest 104 | permissions: {} 105 | if: "! needs.build.outputs.self_mutation_happened" 106 | steps: 107 | - uses: actions/setup-java@v2 108 | with: 109 | distribution: temurin 110 | java-version: 11.x 111 | - uses: actions/setup-node@v2 112 | with: 113 | node-version: 14.x 114 | - name: Download build artifacts 115 | uses: actions/download-artifact@v2 116 | with: 117 | name: build-artifact 118 | path: dist 119 | - name: Prepare Repository 120 | run: mv dist .repo 121 | - name: Install Dependencies 122 | run: cd .repo && yarn install --check-files --frozen-lockfile 123 | - name: Create java artifact 124 | run: cd .repo && npx projen package:java 125 | - name: Collect java Artifact 126 | run: mv .repo/dist dist 127 | package-python: 128 | needs: build 129 | runs-on: ubuntu-latest 130 | permissions: {} 131 | if: "! needs.build.outputs.self_mutation_happened" 132 | steps: 133 | - uses: actions/setup-node@v2 134 | with: 135 | node-version: 14.x 136 | - uses: actions/setup-python@v2 137 | with: 138 | python-version: 3.x 139 | - name: Download build artifacts 140 | uses: actions/download-artifact@v2 141 | with: 142 | name: build-artifact 143 | path: dist 144 | - name: Prepare Repository 145 | run: mv dist .repo 146 | - name: Install Dependencies 147 | run: cd .repo && yarn install --check-files --frozen-lockfile 148 | - name: Create python artifact 149 | run: cd .repo && npx projen package:python 150 | - name: Collect python Artifact 151 | run: mv .repo/dist dist 152 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # cdk-datalake-constructs 2 | 3 | ***Very experimental until version 1.0.*** 4 | This is my attempt at simplifying deploying various datalake strategies in AWS with the CDK. 5 | 6 | [![License](https://img.shields.io/badge/License-MIT-green)](https://opensource.org/licenses/MIT) 7 | [![Build](https://github.com/randyridgley/cdk-datalake-constructs/workflows/build/badge.svg)](https://github.com/randyridgley/cdk-datalake-constructs/workflows/build.yml) 8 | [![Release](https://github.com/randyridgley/cdk-datalake-constructs/workflows/release/badge.svg)](https://github.com/randyridgley/cdk-datalake-constructs/workflows/release.yml) 9 | [![Python](https://img.shields.io/pypi/pyversions/cdk-datalake-constructs)](https://pypi.org) [![pip](https://img.shields.io/badge/pip%20install-cdk--datalake--constructs-blue)](https://pypi.org/project/cdk-datalake-constructs/) 10 | [![npm version](https://img.shields.io/npm/v/cdk-datalake-constructs)](https://www.npmjs.com/package/@randyridgley/cdk-datalake-constructs) [![pypi version](https://img.shields.io/pypi/v/cdk-datalake-constructs)](https://pypi.org/project/cdk-datalake-constructs/) [![Maven](https://img.shields.io/maven-central/v/io.github.randyridgley/cdk-datalake-constructs)](https://search.maven.org/search?q=a:cdk-datalake-constructs) [![nuget](https://img.shields.io/nuget/v/Cdk.Datalake.Constructs)](https://www.nuget.org/packages/Cdk.Datalake.Constructs/) 11 | 12 | **Table of Contents** 13 | 14 | - [Features](#features) 15 | - [Installation](#installation) 16 | - [Usage](#usage) 17 | - [Basic](#basic) 18 | - [Data Mesh](#data-mesh) 19 | - [Documentation](#documentation) 20 | - [Construct API Reference](#construct-api-reference) 21 | - [Supporting this project](#supporting-this-project) 22 | - [License](#license) 23 | 24 | ## Features 25 | 26 | - Easy to Start - Create a Datalake in a few lines. 27 | - Easy to Expand - Expand into multiple accounts and into a data mesh. 28 | - Easy to Admin - Initial governance created on deploy. 29 | 30 | ## Installation 31 | 32 | TypeScript/JavaScript 33 | 34 | ```sh 35 | $ npm install @randyridgley/cdk-datalake-constructs 36 | ``` 37 | 38 | Python 39 | 40 | ```sh 41 | $ pip install cdk-datalake-constructs 42 | ``` 43 | 44 | .Net 45 | 46 | ```sh 47 | $ nuget install CDK.Datalake.Constructs 48 | 49 | # See more: https://www.nuget.org/packages/CDK.Datalake.Constructs/ 50 | ``` 51 | 52 | ## Usage 53 | 54 | ### Basic 55 | 56 | ```typescript 57 | import { DataLake } from '@randyridgley/cdk-datalake-constructs'; 58 | 59 | const taxiPipes: Array = [ 60 | pipelines.YellowPipeline(), 61 | pipelines.GreenPipeline(), 62 | ] 63 | 64 | const dataProducts: Array = [{ 65 | pipelines: taxiPipes, 66 | accountId: lakeAccountId, 67 | dataCatalogAccountId: '123456789012', 68 | databaseName: 'taxi-product' 69 | }] 70 | 71 | // deploy to local account 72 | new dl.DataLake(this, 'LocalDataLake', { 73 | name: 'data-lake', 74 | accountId: centralAccountId, 75 | region: 'us-east-1', 76 | policyTags: { 77 | "classification": "public,confidential,highlyconfidential,restricted,critical", 78 | "owner": "product,central,consumer" 79 | }, 80 | stageName: Stage.PROD, 81 | dataProducts: dataProducts, 82 | createDefaultDatabase: false 83 | }); 84 | ``` 85 | 86 | ### Data Mesh 87 | You can setup cross account access and pre-created policy tags for TBAC access in Lake Formation 88 | 89 | ```typescript 90 | const lakeAccountId = app.node.tryGetContext('lakeAccountId') 91 | const centralAccountId = app.node.tryGetContext('centralAccountId') 92 | const consumerAccountId = app.node.tryGetContext('consumerAccountId') 93 | 94 | const taxiPipes: Array = [ 95 | pipelines.YellowPipeline(), 96 | pipelines.GreenPipeline(), 97 | ] 98 | 99 | const dataProducts: Array = [{ 100 | pipelines: taxiPipes, 101 | accountId: lakeAccountId, 102 | dataCatalogAccountId: centralAccountId, 103 | databaseName: 'taxi-product' 104 | }] 105 | 106 | // deploy to the central account 107 | new dl.DataLake(this, 'CentralDataLake', { 108 | name: 'central-lake', 109 | accountId: centralAccountId, 110 | region: 'us-east-1', 111 | policyTags: { 112 | "classification": "public,confidential,highlyconfidential,restricted,critical", 113 | "owner": "product,central,consumer" 114 | }, 115 | stageName: Stage.PROD, 116 | crossAccount: { 117 | consumerAccountIds: [consumerAccountId, lakeAccountId], 118 | dataCatalogOwnerAccountId: centralAccountId, 119 | region: 'us-east-1', // this is still only single region today 120 | }, 121 | dataProducts: dataProducts, 122 | createDefaultDatabase: true 123 | }); 124 | 125 | // deploy to the data product account 126 | const datalake = new dl.DataLake(this, 'LocalDataLake', { 127 | name: 'local-lake', 128 | accountId: lakeAccountId, 129 | region: 'us-east-1', 130 | stageName: Stage.PROD, 131 | dataProducts: dataProducts, 132 | createDefaultDatabase: true 133 | }); 134 | 135 | // Optionally add custom resource to download public data set products 136 | datalake.createDownloaderCustomResource(accountId, region, props.stageName) 137 | 138 | // deploy to consumer account 139 | const datalake = new dl.DataLake(this, 'ConsumerDataLake', { 140 | name: 'consumer-lake', 141 | accountId: consumerAccountId, 142 | region: 'us-east-1', 143 | stageName: Stage.PROD, 144 | policyTags: { 145 | "access": "analyst,engineer,marketing" 146 | }, 147 | createDefaultDatabase: true 148 | }); 149 | ``` 150 | 151 | ## Documentation 152 | 153 | ### Construct API Reference 154 | 155 | See [API.md](./API.md). 156 | 157 | 158 | ## Supporting this project 159 | 160 | I'm working on this project in my free time, if you like my project, or found it helpful and would like to support me any contributions are much appreciated! ❤️ 161 | 162 | ## License 163 | 164 | This project is distributed under the [MIT](./LICENSE). 165 | 166 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | # ~~ Generated by projen. To modify, edit .projenrc.js and run "npx projen". 2 | 3 | name: release 4 | on: 5 | push: 6 | branches: 7 | - main 8 | workflow_dispatch: {} 9 | jobs: 10 | release: 11 | runs-on: ubuntu-latest 12 | permissions: 13 | contents: write 14 | outputs: 15 | latest_commit: ${{ steps.git_remote.outputs.latest_commit }} 16 | env: 17 | CI: "true" 18 | steps: 19 | - name: Checkout 20 | uses: actions/checkout@v2 21 | with: 22 | fetch-depth: 0 23 | - name: Set git identity 24 | run: |- 25 | git config user.name "github-actions" 26 | git config user.email "github-actions@github.com" 27 | - name: Install dependencies 28 | run: yarn install --check-files --frozen-lockfile 29 | - name: release 30 | run: npx projen release 31 | - name: Check for new commits 32 | id: git_remote 33 | run: echo ::set-output name=latest_commit::"$(git ls-remote origin -h ${{ github.ref }} | cut -f1)" 34 | - name: Upload artifact 35 | if: ${{ steps.git_remote.outputs.latest_commit == github.sha }} 36 | uses: actions/upload-artifact@v2.1.1 37 | with: 38 | name: build-artifact 39 | path: dist 40 | container: 41 | image: jsii/superchain:1-buster-slim-node14 42 | release_github: 43 | name: Publish to GitHub Releases 44 | needs: release 45 | runs-on: ubuntu-latest 46 | permissions: 47 | contents: write 48 | if: needs.release.outputs.latest_commit == github.sha 49 | steps: 50 | - uses: actions/setup-node@v2 51 | with: 52 | node-version: 14.x 53 | - name: Download build artifacts 54 | uses: actions/download-artifact@v2 55 | with: 56 | name: build-artifact 57 | path: dist 58 | - name: Prepare Repository 59 | run: mv dist .repo 60 | - name: Collect GitHub Metadata 61 | run: mv .repo/dist dist 62 | - name: Release 63 | run: errout=$(mktemp); gh release create $(cat dist/releasetag.txt) -R $GITHUB_REPOSITORY -F dist/changelog.md -t $(cat dist/releasetag.txt) --target $GITHUB_REF 2> $errout && true; exitcode=$?; if [ $exitcode -ne 0 ] && ! grep -q "Release.tag_name already exists" $errout; then cat $errout; exit $exitcode; fi 64 | env: 65 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 66 | GITHUB_REPOSITORY: ${{ github.repository }} 67 | GITHUB_REF: ${{ github.ref }} 68 | release_npm: 69 | name: Publish to npm 70 | needs: release 71 | runs-on: ubuntu-latest 72 | permissions: 73 | contents: read 74 | if: needs.release.outputs.latest_commit == github.sha 75 | steps: 76 | - uses: actions/setup-node@v2 77 | with: 78 | node-version: 14.x 79 | - name: Download build artifacts 80 | uses: actions/download-artifact@v2 81 | with: 82 | name: build-artifact 83 | path: dist 84 | - name: Prepare Repository 85 | run: mv dist .repo 86 | - name: Install Dependencies 87 | run: cd .repo && yarn install --check-files --frozen-lockfile 88 | - name: Create js artifact 89 | run: cd .repo && npx projen package:js 90 | - name: Collect js Artifact 91 | run: mv .repo/dist dist 92 | - name: Release 93 | run: npx -p publib@latest publib-npm 94 | env: 95 | NPM_DIST_TAG: latest 96 | NPM_REGISTRY: registry.npmjs.org 97 | NPM_TOKEN: ${{ secrets.NPM_TOKEN }} 98 | release_maven: 99 | name: Publish to Maven Central 100 | needs: release 101 | runs-on: ubuntu-latest 102 | permissions: 103 | contents: read 104 | if: needs.release.outputs.latest_commit == github.sha 105 | steps: 106 | - uses: actions/setup-java@v2 107 | with: 108 | distribution: temurin 109 | java-version: 11.x 110 | - uses: actions/setup-node@v2 111 | with: 112 | node-version: 14.x 113 | - name: Download build artifacts 114 | uses: actions/download-artifact@v2 115 | with: 116 | name: build-artifact 117 | path: dist 118 | - name: Prepare Repository 119 | run: mv dist .repo 120 | - name: Install Dependencies 121 | run: cd .repo && yarn install --check-files --frozen-lockfile 122 | - name: Create java artifact 123 | run: cd .repo && npx projen package:java 124 | - name: Collect java Artifact 125 | run: mv .repo/dist dist 126 | - name: Release 127 | run: npx -p publib@latest publib-maven 128 | env: 129 | MAVEN_ENDPOINT: https://s01.oss.sonatype.org 130 | MAVEN_GPG_PRIVATE_KEY: ${{ secrets.MAVEN_GPG_PRIVATE_KEY }} 131 | MAVEN_GPG_PRIVATE_KEY_PASSPHRASE: ${{ secrets.MAVEN_GPG_PRIVATE_KEY_PASSPHRASE }} 132 | MAVEN_PASSWORD: ${{ secrets.MAVEN_PASSWORD }} 133 | MAVEN_USERNAME: ${{ secrets.MAVEN_USERNAME }} 134 | MAVEN_STAGING_PROFILE_ID: ${{ secrets.MAVEN_STAGING_PROFILE_ID }} 135 | release_pypi: 136 | name: Publish to PyPI 137 | needs: release 138 | runs-on: ubuntu-latest 139 | permissions: 140 | contents: read 141 | if: needs.release.outputs.latest_commit == github.sha 142 | steps: 143 | - uses: actions/setup-node@v2 144 | with: 145 | node-version: 14.x 146 | - uses: actions/setup-python@v2 147 | with: 148 | python-version: 3.x 149 | - name: Download build artifacts 150 | uses: actions/download-artifact@v2 151 | with: 152 | name: build-artifact 153 | path: dist 154 | - name: Prepare Repository 155 | run: mv dist .repo 156 | - name: Install Dependencies 157 | run: cd .repo && yarn install --check-files --frozen-lockfile 158 | - name: Create python artifact 159 | run: cd .repo && npx projen package:python 160 | - name: Collect python Artifact 161 | run: mv .repo/dist dist 162 | - name: Release 163 | run: npx -p publib@latest publib-pypi 164 | env: 165 | TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }} 166 | TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }} 167 | -------------------------------------------------------------------------------- /src/data-streams/s3-delivery-stream.ts: -------------------------------------------------------------------------------- 1 | import { Resource } from 'aws-cdk-lib'; 2 | import * as cloudwatch from 'aws-cdk-lib/aws-cloudwatch'; 3 | import * as iam from 'aws-cdk-lib/aws-iam'; 4 | import * as kinesis from 'aws-cdk-lib/aws-kinesis'; 5 | import * as firehose from 'aws-cdk-lib/aws-kinesisfirehose'; 6 | import * as lambda from 'aws-cdk-lib/aws-lambda'; 7 | import * as s3 from 'aws-cdk-lib/aws-s3'; 8 | import { Construct } from 'constructs'; 9 | 10 | export enum DeliveryStreamType { 11 | DIRECT_PUT = 'DirectPut', 12 | KINESIS_STREAM_AS_SOURCE = 'KinesisStreamAsSource' 13 | } 14 | 15 | export enum ProcessorType { 16 | LAMBDA = 'Lambda' 17 | } 18 | 19 | export enum CompressionType { 20 | UNCOMPRESSED = 'UNCOMPRESSED', 21 | GZIP = 'GZIP', 22 | ZIP = 'ZIP', 23 | SNAPPY = 'Snappy' 24 | } 25 | 26 | export interface DeliveryStreamProperties { 27 | readonly kinesisStream: kinesis.Stream; 28 | readonly s3Bucket: s3.IBucket; 29 | readonly s3Prefix?: string; 30 | readonly compression?: CompressionType; 31 | readonly transformFunction?: lambda.Function; 32 | } 33 | 34 | export class S3DeliveryStream extends Resource { 35 | public s3Bucket: s3.IBucket; 36 | protected cloudWatchLogsRole?: iam.Role; 37 | public readonly deliveryStreamArn: string; 38 | public readonly deliveryStreamName: string; 39 | private readonly role: iam.Role; 40 | private readonly deliveryStreamResource: firehose.CfnDeliveryStream; 41 | 42 | constructor(parent: Construct, name: string, props: DeliveryStreamProperties) { 43 | super(parent, name); 44 | this.role = new iam.Role(this, 'kinesis-role', { 45 | assumedBy: new iam.ServicePrincipal('firehose.amazonaws.com'), 46 | }); 47 | 48 | this.s3Bucket = props.s3Bucket; 49 | this.deliveryStreamResource = new firehose.CfnDeliveryStream(this, 'delivery-stream', { 50 | deliveryStreamType: DeliveryStreamType.KINESIS_STREAM_AS_SOURCE, 51 | kinesisStreamSourceConfiguration: this.makeKinesisSourceConfig(props), 52 | extendedS3DestinationConfiguration: this.makeS3Config(props), 53 | }); 54 | this.deliveryStreamResource.node.addDependency(this.role); 55 | 56 | this.deliveryStreamArn = this.deliveryStreamResource.getAtt('Arn').toString(); 57 | this.deliveryStreamName = this.deliveryStreamResource.ref; 58 | } 59 | 60 | public metric(metricName: string, props?: cloudwatch.MetricOptions): cloudwatch.Metric { 61 | return new cloudwatch.Metric({ 62 | namespace: 'AWS/Firehose', 63 | metricName, 64 | dimensionsMap: { 65 | DeliveryStreamName: this.deliveryStreamName, 66 | }, 67 | ...props, 68 | }); 69 | } 70 | 71 | public metricBackupToS3Bytes(props?: cloudwatch.MetricOptions): cloudwatch.Metric { 72 | return this.metric('BackupToS3.Bytes', props); 73 | } 74 | 75 | public metricBackupToS3DataFreshness(props?: cloudwatch.MetricOptions): cloudwatch.Metric { 76 | return this.metric('BackupToS3.DataFreshness', props); 77 | } 78 | 79 | public metricBackupToS3Records(props?: cloudwatch.MetricOptions): cloudwatch.Metric { 80 | return this.metric('BackupToS3.Records', props); 81 | } 82 | 83 | public metricBackupToS3Success(props?: cloudwatch.MetricOptions): cloudwatch.Metric { 84 | return this.metric('BackupToS3.Success', props); 85 | } 86 | 87 | public metricDataReadFromKinesisStreamBytes(props?: cloudwatch.MetricOptions): cloudwatch.Metric { 88 | return this.metric('DataReadFromKinesisStream.Bytes', props); 89 | } 90 | 91 | public metricDataReadFromKinesisStreamRecords(props?: cloudwatch.MetricOptions): cloudwatch.Metric { 92 | return this.metric('DataReadFromKinesisStream.Records', props); 93 | } 94 | 95 | public metricDeliveryToS3Bytes(props?: cloudwatch.MetricOptions): cloudwatch.Metric { 96 | return this.metric('DeliveryToS3.Bytes', props); 97 | } 98 | 99 | public metricDeliveryToS3DataFreshness(props?: cloudwatch.MetricOptions): cloudwatch.Metric { 100 | return this.metric('DeliveryToS3.DataFreshness', props); 101 | } 102 | 103 | public metricDeliveryToS3Records(props?: cloudwatch.MetricOptions): cloudwatch.Metric { 104 | return this.metric('DeliveryToS3.Records', props); 105 | } 106 | 107 | public metricDeliveryToS3Success(props?: cloudwatch.MetricOptions): cloudwatch.Metric { 108 | return this.metric('DeliveryToS3.Success', props); 109 | } 110 | 111 | public metricIncomingBytes(props?: cloudwatch.MetricOptions): cloudwatch.Metric { 112 | return this.metric('IncomingBytes', props); 113 | } 114 | 115 | public metricIncomingRecords(props?: cloudwatch.MetricOptions): cloudwatch.Metric { 116 | return this.metric('IncomingRecords', props); 117 | } 118 | 119 | private makeKinesisSourceConfig(props: DeliveryStreamProperties): firehose.CfnDeliveryStream.KinesisStreamSourceConfigurationProperty | undefined { 120 | if (props.kinesisStream) { 121 | props.kinesisStream.grantRead(this.role); 122 | props.kinesisStream.grant(this.role, 'kinesis:DescribeStream'); 123 | return { 124 | kinesisStreamArn: props.kinesisStream.streamArn, 125 | roleArn: this.role.roleArn, 126 | }; 127 | } else { 128 | throw new Error("must provide a Kinesis stream if type is 'KinesisStreamAsSource'"); 129 | } 130 | } 131 | 132 | private makeS3Config(props: DeliveryStreamProperties): firehose.CfnDeliveryStream.ExtendedS3DestinationConfigurationProperty | undefined { 133 | this.s3Bucket.grantReadWrite(this.role); 134 | if (props.kinesisStream) { 135 | props.kinesisStream.grantRead(this.role); 136 | } 137 | 138 | return { 139 | bucketArn: this.s3Bucket.bucketArn, 140 | bufferingHints: { 141 | intervalInSeconds: 60, 142 | sizeInMBs: 64, 143 | }, 144 | compressionFormat: props.compression || CompressionType.UNCOMPRESSED, 145 | prefix: props.s3Prefix || '', 146 | roleArn: this.role.roleArn, 147 | processingConfiguration: this.makeProcessorConfig(props), 148 | }; 149 | } 150 | 151 | private makeProcessorConfig(props: DeliveryStreamProperties): firehose.CfnDeliveryStream.ProcessingConfigurationProperty | undefined { 152 | if (props.transformFunction) { 153 | this.role.addToPolicy( 154 | new iam.PolicyStatement({ 155 | actions: ['lambda:InvokeFunction'], 156 | resources: [props.transformFunction.functionArn, `${props.transformFunction.functionArn}:*`], 157 | }), 158 | ); 159 | 160 | return { 161 | enabled: true, 162 | processors: [ 163 | { 164 | type: ProcessorType.LAMBDA, 165 | parameters: [ 166 | { 167 | parameterName: 'LambdaArn', 168 | parameterValue: props.transformFunction.functionArn, 169 | }, 170 | { 171 | parameterName: 'NumberOfRetries', 172 | parameterValue: '3', 173 | }, 174 | ], 175 | }, 176 | ], 177 | }; 178 | } else { 179 | return undefined; 180 | } 181 | } 182 | } -------------------------------------------------------------------------------- /src/etl/glue-job-ops.ts: -------------------------------------------------------------------------------- 1 | import { Duration } from 'aws-cdk-lib'; 2 | import * as cloudwatch from 'aws-cdk-lib/aws-cloudwatch'; 3 | import { Construct } from 'constructs'; 4 | 5 | import { GlueJob } from './glue-job'; 6 | 7 | export interface IGlueOpsProperties { 8 | job: GlueJob; 9 | jvmHeapSizeExceeding80percent?: cloudwatch.CreateAlarmOptions; 10 | jvmHeapSizeExceeding90percent?: cloudwatch.CreateAlarmOptions; 11 | metricExecutionFailure?: cloudwatch.CreateAlarmOptions; 12 | metricAllExecutionAttemptsFailed?: cloudwatch.CreateAlarmOptions; 13 | } 14 | 15 | export class GlueJobOps extends Construct { 16 | public readonly job: GlueJob; 17 | public dashboard: cloudwatch.Dashboard; 18 | public readonly jvmHeapSizeExceeding80PercentAlarm: cloudwatch.Alarm; 19 | public readonly jvmHeapSizeExceeding90PercentAlarm: cloudwatch.Alarm; 20 | public readonly metricExecutionFailureAlarm: cloudwatch.Alarm; 21 | public readonly metricAllExecutionAttemptsFailedAlarm: cloudwatch.Alarm; 22 | 23 | public readonly alarmsSev2: cloudwatch.Alarm[]; 24 | public readonly alarmsSev3: cloudwatch.Alarm[]; 25 | 26 | constructor(scope: Construct, id: string, props: IGlueOpsProperties) { 27 | super(scope, id); 28 | 29 | this.job = props.job; 30 | 31 | this.dashboard = new cloudwatch.Dashboard(this, 'dashboard', { 32 | dashboardName: `ETL_${this.job.name}`, 33 | }); 34 | 35 | this.jvmHeapSizeExceeding80PercentAlarm = new cloudwatch.Alarm(this, 'jvm-heapSize-exceeding80percent-alarm', { 36 | alarmName: `${this.job.name} JvmHeapSizeExceeding80`, 37 | alarmDescription: `Jvm Heap Size exceeding 80% glue job (${this.job.name})`, 38 | metric: this.job.jvmHeapUsageMetric({ 39 | period: Duration.days(1), 40 | statistic: cloudwatch.Statistic.MAXIMUM, 41 | }), 42 | threshold: 0.8, 43 | evaluationPeriods: 1, 44 | datapointsToAlarm: 1, 45 | treatMissingData: cloudwatch.TreatMissingData.NOT_BREACHING, 46 | comparisonOperator: cloudwatch.ComparisonOperator.GREATER_THAN_OR_EQUAL_TO_THRESHOLD, 47 | ...(props.jvmHeapSizeExceeding80percent || {}), 48 | }); 49 | 50 | this.jvmHeapSizeExceeding90PercentAlarm = new cloudwatch.Alarm(this, 'jvm-heapSize-exceeding90Percent-alarm', { 51 | alarmName: `${this.job.name} JvmHeapSizeExceeding90`, 52 | alarmDescription: `Jvm Heap Size exceeding 90% glue job (${this.job.name})`, 53 | metric: this.job.jvmHeapUsageMetric({ 54 | period: Duration.days(1), 55 | statistic: cloudwatch.Statistic.MAXIMUM, 56 | }), 57 | threshold: 0.9, 58 | evaluationPeriods: 1, 59 | datapointsToAlarm: 1, 60 | treatMissingData: cloudwatch.TreatMissingData.NOT_BREACHING, 61 | comparisonOperator: cloudwatch.ComparisonOperator.GREATER_THAN_OR_EQUAL_TO_THRESHOLD, 62 | ...(props.jvmHeapSizeExceeding90percent || {}), 63 | }); 64 | 65 | this.metricExecutionFailureAlarm = new cloudwatch.Alarm(this, 'metric-execution-failure-alarm', { 66 | alarmName: `${this.job.name} ExecutionFailure`, 67 | alarmDescription: `Error while running the Glue job ${this.job.name} on the current attempt. There might be job retries after this error.`, 68 | metric: this.job.metricFailure(), 69 | comparisonOperator: cloudwatch.ComparisonOperator.GREATER_THAN_OR_EQUAL_TO_THRESHOLD, 70 | threshold: 1, 71 | evaluationPeriods: 1, 72 | treatMissingData: cloudwatch.TreatMissingData.NOT_BREACHING, 73 | ...(props.metricExecutionFailure || {}), 74 | }); 75 | 76 | this.metricAllExecutionAttemptsFailedAlarm = new cloudwatch.Alarm(this, 'metric-all-execution-attempts-failed-alarm', { 77 | alarmName: `${this.job.name} AllExecutionAttemptsFailed`, 78 | alarmDescription: `Error while running the Glue job ${this.job.name} on the last attempt. There will be no retries of the job after this error.`, 79 | metric: this.job.metricAllExecutionAttemptsFailed(), 80 | comparisonOperator: cloudwatch.ComparisonOperator.GREATER_THAN_OR_EQUAL_TO_THRESHOLD, 81 | threshold: 1, 82 | evaluationPeriods: 1, 83 | treatMissingData: cloudwatch.TreatMissingData.NOT_BREACHING, 84 | ...(props.metricAllExecutionAttemptsFailed || {}), 85 | }); 86 | 87 | this.alarmsSev2 = [ 88 | this.jvmHeapSizeExceeding90PercentAlarm, 89 | this.metricAllExecutionAttemptsFailedAlarm, 90 | ]; 91 | 92 | this.alarmsSev3 = [ 93 | this.jvmHeapSizeExceeding80PercentAlarm, 94 | this.metricExecutionFailureAlarm, 95 | ]; 96 | 97 | this.setupDashboard(); 98 | } 99 | 100 | private addWidgets(widgets: cloudwatch.IWidget[]) { 101 | for (let i = 0; i < widgets.length; i += 4) { 102 | this.dashboard.addWidgets(...widgets.slice(i, i + 4)); 103 | } 104 | } 105 | 106 | private setupDashboard() { 107 | this.dashboard.addWidgets( 108 | new cloudwatch.TextWidget({ 109 | markdown: `# ${this.job.name} Job Result`, 110 | height: 1, 111 | width: 24, 112 | }), 113 | ); 114 | 115 | const jobResultWidgets = [ 116 | new cloudwatch.GraphWidget({ 117 | left: [ 118 | new cloudwatch.MathExpression({ 119 | expression: `SEARCH('MetricName="TriggeredRules" RuleName="${this.job.metricSuccessRule.ruleName}"', 'Sum', 900)`, 120 | usingMetrics: {}, 121 | label: 'Success Count', 122 | }), 123 | ], 124 | title: 'Success Count', 125 | height: 6, 126 | width: 6, 127 | }), 128 | new cloudwatch.GraphWidget({ 129 | left: [ 130 | new cloudwatch.MathExpression({ 131 | expression: `SEARCH('MetricName="TriggeredRules" RuleName="${this.job.metricFailureRule.ruleName}"', 'Sum', 900)`, 132 | usingMetrics: {}, 133 | label: 'Failure Count', 134 | }), 135 | ], 136 | title: 'Failure Count', 137 | height: 6, 138 | width: 6, 139 | }), 140 | new cloudwatch.GraphWidget({ 141 | left: [ 142 | new cloudwatch.MathExpression({ 143 | expression: `SEARCH('MetricName="TriggeredRules" RuleName="${this.job.metricTimeoutRule.ruleName}"', 'Sum', 900)`, 144 | usingMetrics: {}, 145 | label: 'Timeout Count', 146 | }), 147 | ], 148 | title: 'Timeout Count', 149 | height: 6, 150 | width: 6, 151 | }), 152 | ]; 153 | 154 | this.dashboard.addWidgets(...jobResultWidgets); 155 | 156 | this.dashboard.addWidgets( 157 | new cloudwatch.TextWidget({ 158 | markdown: `# ${this.job.name} JVM Glue Driver Stats Alarms`, 159 | height: 1, 160 | width: 24, 161 | }), 162 | ); 163 | 164 | this.dashboard.addWidgets( 165 | ...[ 166 | new cloudwatch.GraphWidget({ 167 | left: [this.job.diskSpaceUsedMbMetric()], 168 | title: `${this.job.diskSpaceUsedMbMetric().metricName} (${this.job.diskSpaceUsedMbMetric().statistic})`, 169 | height: 6, 170 | width: 6, 171 | }), 172 | new cloudwatch.GraphWidget({ 173 | left: [this.job.elapsedTimeMetric()], 174 | title: `${this.job.elapsedTimeMetric().metricName} (${this.job.elapsedTimeMetric().statistic})`, 175 | height: 6, 176 | width: 6, 177 | }), 178 | ], 179 | ); 180 | 181 | const sev2AlarmWidgets = this.alarmsSev2.map(this.alarmWidget); 182 | const sev3AlarmWidgets = this.alarmsSev3.map(this.alarmWidget); 183 | this.addWidgets(sev2AlarmWidgets); 184 | this.addWidgets(sev3AlarmWidgets); 185 | } 186 | 187 | private alarmWidget(alarm: cloudwatch.Alarm): cloudwatch.AlarmWidget { 188 | return new cloudwatch.AlarmWidget({ 189 | alarm: alarm, 190 | title: `${alarm.alarmName}`, 191 | height: 6, 192 | width: 6, 193 | }); 194 | } 195 | } 196 | -------------------------------------------------------------------------------- /src/data-streams/kinesis-ops.ts: -------------------------------------------------------------------------------- 1 | import { Duration } from 'aws-cdk-lib'; 2 | import * as cloudwatch from 'aws-cdk-lib/aws-cloudwatch'; 3 | import { Construct } from 'constructs'; 4 | 5 | import { KinesisStream } from './kinesis-stream'; 6 | import { S3DeliveryStream } from './s3-delivery-stream'; 7 | 8 | export interface IKinesisOpsProperties { 9 | stream: KinesisStream; 10 | deliveryStream: S3DeliveryStream; 11 | 12 | inputStreamIteratorAgeCritical?: cloudwatch.CreateAlarmOptions; 13 | inputStreamIteratorAgeWarning?: cloudwatch.CreateAlarmOptions; 14 | inputStreamReadThroughputWarning?: cloudwatch.CreateAlarmOptions; 15 | inputStreamWriteThroughputWarning?: cloudwatch.CreateAlarmOptions; 16 | inputStreamGetRecordsWarning?: cloudwatch.CreateAlarmOptions; 17 | inputStreamPutRecordsWarning?: cloudwatch.CreateAlarmOptions; 18 | 19 | firehoseDeliveryToS3Critical?: cloudwatch.CreateAlarmOptions; 20 | firehoseDeliveryToS3Warning?: cloudwatch.CreateAlarmOptions; 21 | } 22 | 23 | export class KinesisOps extends Construct { 24 | 25 | public dashboard: cloudwatch.Dashboard; 26 | public readonly stream: KinesisStream; 27 | public readonly deliveryStream: S3DeliveryStream; 28 | public readonly streamName: string; 29 | 30 | public readonly inputStreamIteratorAgeCriticalAlarm: cloudwatch.Alarm; 31 | public readonly inputStreamIteratorAgeWarningAlarm: cloudwatch.Alarm; 32 | public readonly inputStreamReadThroughputWarningAlarm: cloudwatch.Alarm; 33 | public readonly inputStreamWriteThroughputWarningAlarm: cloudwatch.Alarm; 34 | public readonly inputStreamGetRecordsWarningAlarm: cloudwatch.Alarm; 35 | public readonly inputStreamPutRecordsWarningAlarm: cloudwatch.Alarm; 36 | 37 | public readonly firehoseDeliveryToS3WarningAlarm: cloudwatch.Alarm; 38 | public readonly firehoseDeliveryToS3CriticalAlarm: cloudwatch.Alarm; 39 | 40 | public readonly alarmsSev2: cloudwatch.Alarm[]; 41 | public readonly alarmsSev3: cloudwatch.Alarm[]; 42 | 43 | constructor(scope: Construct, id: string, props: IKinesisOpsProperties) { 44 | super(scope, id); 45 | 46 | this.stream = props.stream; 47 | this.streamName = props.stream.stream.streamName; 48 | this.deliveryStream = props.deliveryStream; 49 | 50 | this.dashboard = new cloudwatch.Dashboard(this, 'dashboard', { 51 | dashboardName: `Kinesis_${this.streamName}`, 52 | }); 53 | 54 | this.inputStreamIteratorAgeCriticalAlarm = new cloudwatch.Alarm(this, 'inputStream-iterator-age-critical-alarm', { 55 | alarmName: `${this.streamName} inputStream IteratorAge Long`, 56 | alarmDescription: 'Alarms if maximum iterator age of inputStream is more than 10 minute', 57 | metric: this.stream.metricGetRecordsIteratorAgeMilliseconds({ 58 | period: Duration.minutes(5), 59 | }), 60 | threshold: 600000, 61 | comparisonOperator: cloudwatch.ComparisonOperator.GREATER_THAN_OR_EQUAL_TO_THRESHOLD, 62 | evaluationPeriods: 12, 63 | ...(props.inputStreamIteratorAgeCritical || {}), 64 | }); 65 | 66 | this.inputStreamIteratorAgeWarningAlarm = new cloudwatch.Alarm(this, 'inputStream-iterator-age-warning-alarm', { 67 | alarmName: `${this.streamName} inputStream IteratorAge Long Warning`, 68 | alarmDescription: 'Alarms if maximum iterator age of inputStream is more than 5 minute', 69 | metric: this.stream.metricGetRecordsIteratorAgeMilliseconds({ 70 | period: Duration.minutes(5), 71 | }), 72 | threshold: 30000, 73 | comparisonOperator: cloudwatch.ComparisonOperator.GREATER_THAN_OR_EQUAL_TO_THRESHOLD, 74 | evaluationPeriods: 12, 75 | ...(props.inputStreamIteratorAgeWarning || {}), 76 | }); 77 | 78 | this.inputStreamReadThroughputWarningAlarm = new cloudwatch.Alarm(this, 'inputStream-read-throughput-warning-alarm', { 79 | alarmName: `${this.streamName} inputStream ReadThroughput Exceed Warning`, 80 | alarmDescription: 'Alarms if read provisioned throughput of inputStream is exceeded for least 2 hours', 81 | metric: this.stream.metricReadProvisionedThroughputExceeded({ 82 | period: Duration.minutes(10), 83 | }), 84 | threshold: 0.15, 85 | comparisonOperator: cloudwatch.ComparisonOperator.GREATER_THAN_THRESHOLD, 86 | evaluationPeriods: 12, 87 | ...(props.inputStreamReadThroughputWarning || {}), 88 | }); 89 | 90 | this.inputStreamWriteThroughputWarningAlarm = new cloudwatch.Alarm(this, 'inputStream-write-throughput-warning-alarm', { 91 | alarmName: `${this.streamName} inputStream WriteThroughput Exceed Warning`, 92 | alarmDescription: 'Alarms if write provisioned throughput of inputStream is exceeded for least 12 hours', 93 | metric: this.stream.metricWriteProvisionedThroughputExceeded({ 94 | period: Duration.minutes(60), 95 | }), 96 | threshold: 0.15, 97 | comparisonOperator: cloudwatch.ComparisonOperator.GREATER_THAN_THRESHOLD, 98 | evaluationPeriods: 12, 99 | ...(props.inputStreamWriteThroughputWarning || {}), 100 | }); 101 | 102 | this.inputStreamGetRecordsWarningAlarm = new cloudwatch.Alarm(this, 'inputStream-get-records-warning-alarm', { 103 | alarmName: `${this.streamName} inputStream GetRecords Success Low Warning`, 104 | alarmDescription: 'Alarms if GetRecords of inputStream not very successful for least 30 minutes', 105 | metric: this.stream.metricGetRecordsSuccess({ 106 | period: Duration.minutes(5), 107 | }), 108 | threshold: 0.9, 109 | comparisonOperator: cloudwatch.ComparisonOperator.LESS_THAN_THRESHOLD, 110 | evaluationPeriods: 6, 111 | ...(props.inputStreamGetRecordsWarning || {}), 112 | }); 113 | 114 | this.inputStreamPutRecordsWarningAlarm = new cloudwatch.Alarm(this, 'inputStream-put-records-warning-alarm', { 115 | alarmName: `${this.streamName} inputStream PutRecords Success Low Warning`, 116 | alarmDescription: 'Alarms if PutRecords of inputStream not very successful for least 12 hours', 117 | metric: this.stream.metricPutRecordsSuccess({ 118 | period: Duration.minutes(60), 119 | }), 120 | threshold: 0.9, 121 | comparisonOperator: cloudwatch.ComparisonOperator.LESS_THAN_THRESHOLD, 122 | evaluationPeriods: 12, 123 | ...(props.inputStreamPutRecordsWarning || {}), 124 | }); 125 | 126 | this.firehoseDeliveryToS3WarningAlarm = new cloudwatch.Alarm(this, 'deliveryStream-delivery-to-s3-warning-alarm', { 127 | alarmName: `${this.streamName} Firehose DeliveryToS3 Failure Warning`, 128 | alarmDescription: 'Alarms if firehose DeliveryToS3 failed for atleast 60 minutes', 129 | metric: this.deliveryStream.metricDeliveryToS3Success({ 130 | statistic: cloudwatch.Statistic.AVERAGE, 131 | period: Duration.minutes(5), 132 | }), 133 | threshold: 1, 134 | comparisonOperator: cloudwatch.ComparisonOperator.LESS_THAN_THRESHOLD, 135 | evaluationPeriods: 12, 136 | treatMissingData: cloudwatch.TreatMissingData.NOT_BREACHING, 137 | ...(props.firehoseDeliveryToS3Warning || {}), 138 | }); 139 | 140 | this.firehoseDeliveryToS3CriticalAlarm = new cloudwatch.Alarm(this, 'deliveryStream-delivery-to-s3-critical-alarm', { 141 | alarmName: `${this.streamName} Firehose DeliveryToS3 Failure Critical`, 142 | alarmDescription: 'Alarms if firehose DeliveryToS3 failed for atleast 24 hours', 143 | metric: this.deliveryStream.metricDeliveryToS3Success({ 144 | statistic: cloudwatch.Statistic.AVERAGE, 145 | period: Duration.hours(1), 146 | }), 147 | threshold: 1, 148 | comparisonOperator: cloudwatch.ComparisonOperator.LESS_THAN_THRESHOLD, 149 | evaluationPeriods: 24, 150 | treatMissingData: cloudwatch.TreatMissingData.NOT_BREACHING, 151 | ...(props.firehoseDeliveryToS3Critical || {}), 152 | }); 153 | 154 | this.alarmsSev2 = [ 155 | this.inputStreamIteratorAgeCriticalAlarm, 156 | this.firehoseDeliveryToS3CriticalAlarm, 157 | ]; 158 | 159 | this.alarmsSev3 = [ 160 | this.inputStreamIteratorAgeWarningAlarm, 161 | this.inputStreamReadThroughputWarningAlarm, 162 | this.inputStreamWriteThroughputWarningAlarm, 163 | this.inputStreamGetRecordsWarningAlarm, 164 | this.inputStreamPutRecordsWarningAlarm, 165 | this.firehoseDeliveryToS3WarningAlarm, 166 | ]; 167 | 168 | this.setupDashboard(); 169 | } 170 | 171 | private alarmWidgets(alarms: cloudwatch.Alarm[], severity: number) { 172 | return alarms.map(alarm => new cloudwatch.AlarmWidget({ 173 | title: `${alarm.alarmName} - Sev ${severity}`, 174 | alarm, 175 | })); 176 | } 177 | 178 | private addWidgets(widgets: cloudwatch.IWidget[]) { 179 | for (let i = 0; i < widgets.length; i += 4) { 180 | this.dashboard.addWidgets(...widgets.slice(i, i + 4)); 181 | } 182 | } 183 | 184 | private setupDashboard() { 185 | 186 | const widgets: cloudwatch.IWidget[] = [ 187 | 188 | new cloudwatch.GraphWidget({ 189 | title: 'Kinesis Stream (Ingress)', 190 | left: [ 191 | this.stream.metricIncomingRecords({ 192 | label: 'Incoming Records', 193 | statistic: 'sum', 194 | }), 195 | ], 196 | right: [ 197 | this.stream.metricIncomingBytes({ 198 | label: 'Incoming Bytes', 199 | statistic: 'sum', 200 | }), 201 | ], 202 | }), 203 | 204 | new cloudwatch.GraphWidget({ 205 | title: 'Kinesis Stream (Throttling)', 206 | left: [ 207 | this.stream.metricGetRecordsSuccess(), 208 | this.stream.metricPutRecordsSuccess(), 209 | ], 210 | right: [ 211 | this.stream.metricReadProvisionedThroughputExceeded({ 212 | label: 'Throttled Reads', 213 | statistic: 'sum', 214 | }), 215 | this.stream.metricWriteProvisionedThroughputExceeded({ 216 | label: 'Throttled Writes', 217 | statistic: 'sum', 218 | }), 219 | ], 220 | }), 221 | 222 | new cloudwatch.GraphWidget({ 223 | title: 'Kinesis Stream (Delay)', 224 | left: [ 225 | this.stream.metricGetRecordsIteratorAgeMilliseconds({ 226 | label: 'Time-lag behind Kinesis Stream', 227 | statistic: 'max', 228 | }), 229 | ], 230 | }), 231 | 232 | new cloudwatch.GraphWidget({ 233 | title: 'Firehose Delivery Stream (Ingress/Egress)', 234 | left: [ 235 | this.deliveryStream.metricIncomingRecords({ 236 | label: 'Incoming Records', 237 | statistic: 'sum', 238 | }), 239 | this.deliveryStream.metricDeliveryToS3Records({ 240 | label: 'Outgoing Records', 241 | statistic: 'sum', 242 | }), 243 | ], 244 | right: [ 245 | this.deliveryStream.metricDeliveryToS3Success(), 246 | ], 247 | }), 248 | 249 | new cloudwatch.GraphWidget({ 250 | title: 'Firehose Data Freshness', 251 | left: [ 252 | this.deliveryStream.metricDeliveryToS3DataFreshness({ 253 | label: 'Freshness', 254 | statistic: 'max', 255 | period: Duration.minutes(5), 256 | }), 257 | ], 258 | }), 259 | ]; 260 | 261 | widgets.push(...this.alarmWidgets(this.alarmsSev2, 2)); 262 | widgets.push(...this.alarmWidgets(this.alarmsSev3, 3)); 263 | 264 | this.addWidgets(widgets); 265 | } 266 | } -------------------------------------------------------------------------------- /.projen/tasks.json: -------------------------------------------------------------------------------- 1 | { 2 | "tasks": { 3 | "build": { 4 | "name": "build", 5 | "description": "Full release build", 6 | "steps": [ 7 | { 8 | "spawn": "default" 9 | }, 10 | { 11 | "spawn": "pre-compile" 12 | }, 13 | { 14 | "spawn": "compile" 15 | }, 16 | { 17 | "spawn": "post-compile" 18 | }, 19 | { 20 | "spawn": "test" 21 | }, 22 | { 23 | "spawn": "package" 24 | } 25 | ] 26 | }, 27 | "bump": { 28 | "name": "bump", 29 | "description": "Bumps version based on latest git tag and generates a changelog entry", 30 | "env": { 31 | "OUTFILE": "package.json", 32 | "CHANGELOG": "dist/changelog.md", 33 | "BUMPFILE": "dist/version.txt", 34 | "RELEASETAG": "dist/releasetag.txt" 35 | }, 36 | "steps": [ 37 | { 38 | "builtin": "release/bump-version" 39 | } 40 | ], 41 | "condition": "! git log --oneline -1 | grep -q \"chore(release):\"" 42 | }, 43 | "clobber": { 44 | "name": "clobber", 45 | "description": "hard resets to HEAD of origin and cleans the local repo", 46 | "env": { 47 | "BRANCH": "$(git branch --show-current)" 48 | }, 49 | "steps": [ 50 | { 51 | "exec": "git checkout -b scratch", 52 | "name": "save current HEAD in \"scratch\" branch" 53 | }, 54 | { 55 | "exec": "git checkout $BRANCH" 56 | }, 57 | { 58 | "exec": "git fetch origin", 59 | "name": "fetch latest changes from origin" 60 | }, 61 | { 62 | "exec": "git reset --hard origin/$BRANCH", 63 | "name": "hard reset to origin commit" 64 | }, 65 | { 66 | "exec": "git clean -fdx", 67 | "name": "clean all untracked files" 68 | }, 69 | { 70 | "say": "ready to rock! (unpushed commits are under the \"scratch\" branch)" 71 | } 72 | ], 73 | "condition": "git diff --exit-code > /dev/null" 74 | }, 75 | "compat": { 76 | "name": "compat", 77 | "description": "Perform API compatibility check against latest version", 78 | "steps": [ 79 | { 80 | "exec": "jsii-diff npm:$(node -p \"require('./package.json').name\") -k --ignore-file .compatignore || (echo \"\nUNEXPECTED BREAKING CHANGES: add keys such as 'removed:constructs.Node.of' to .compatignore to skip.\n\" && exit 1)" 81 | } 82 | ] 83 | }, 84 | "compile": { 85 | "name": "compile", 86 | "description": "Only compile", 87 | "steps": [ 88 | { 89 | "exec": "jsii --silence-warnings=reserved-word --no-fix-peer-dependencies" 90 | } 91 | ] 92 | }, 93 | "coverage": { 94 | "name": "coverage", 95 | "steps": [ 96 | { 97 | "exec": "npx projen test && open coverage/lcov-report/index.html" 98 | } 99 | ] 100 | }, 101 | "default": { 102 | "name": "default", 103 | "description": "Synthesize project files", 104 | "steps": [ 105 | { 106 | "exec": "node .projenrc.js" 107 | } 108 | ] 109 | }, 110 | "docgen": { 111 | "name": "docgen", 112 | "description": "Generate API.md from .jsii manifest", 113 | "steps": [ 114 | { 115 | "exec": "jsii-docgen -o API.md" 116 | } 117 | ] 118 | }, 119 | "eject": { 120 | "name": "eject", 121 | "description": "Remove projen from the project", 122 | "env": { 123 | "PROJEN_EJECTING": "true" 124 | }, 125 | "steps": [ 126 | { 127 | "spawn": "default" 128 | } 129 | ] 130 | }, 131 | "eslint": { 132 | "name": "eslint", 133 | "description": "Runs eslint against the codebase", 134 | "steps": [ 135 | { 136 | "exec": "eslint --ext .ts,.tsx --fix --no-error-on-unmatched-pattern src test build-tools .projenrc.js" 137 | } 138 | ] 139 | }, 140 | "package": { 141 | "name": "package", 142 | "description": "Creates the distribution package", 143 | "steps": [ 144 | { 145 | "exec": "if [ ! -z ${CI} ]; then mkdir -p dist && rsync -a . dist --exclude .git --exclude node_modules; else npx projen package-all; fi" 146 | } 147 | ] 148 | }, 149 | "package-all": { 150 | "name": "package-all", 151 | "description": "Packages artifacts for all target languages", 152 | "steps": [ 153 | { 154 | "spawn": "package:js" 155 | }, 156 | { 157 | "spawn": "package:java" 158 | }, 159 | { 160 | "spawn": "package:python" 161 | } 162 | ] 163 | }, 164 | "package:java": { 165 | "name": "package:java", 166 | "description": "Create java language bindings", 167 | "steps": [ 168 | { 169 | "exec": "jsii_version=$(node -p \"JSON.parse(fs.readFileSync('.jsii')).jsiiVersion.split(' ')[0]\")" 170 | }, 171 | { 172 | "exec": "npx jsii-pacmak@$jsii_version -v --target java" 173 | } 174 | ] 175 | }, 176 | "package:js": { 177 | "name": "package:js", 178 | "description": "Create js language bindings", 179 | "steps": [ 180 | { 181 | "exec": "jsii_version=$(node -p \"JSON.parse(fs.readFileSync('.jsii')).jsiiVersion.split(' ')[0]\")" 182 | }, 183 | { 184 | "exec": "npx jsii-pacmak@$jsii_version -v --target js" 185 | } 186 | ] 187 | }, 188 | "package:python": { 189 | "name": "package:python", 190 | "description": "Create python language bindings", 191 | "steps": [ 192 | { 193 | "exec": "jsii_version=$(node -p \"JSON.parse(fs.readFileSync('.jsii')).jsiiVersion.split(' ')[0]\")" 194 | }, 195 | { 196 | "exec": "npx jsii-pacmak@$jsii_version -v --target python" 197 | } 198 | ] 199 | }, 200 | "post-compile": { 201 | "name": "post-compile", 202 | "description": "Runs after successful compilation", 203 | "steps": [ 204 | { 205 | "spawn": "docgen" 206 | } 207 | ] 208 | }, 209 | "post-upgrade": { 210 | "name": "post-upgrade", 211 | "description": "Runs after upgrading dependencies" 212 | }, 213 | "pre-compile": { 214 | "name": "pre-compile", 215 | "description": "Prepare the project for compilation" 216 | }, 217 | "release": { 218 | "name": "release", 219 | "description": "Prepare a release from \"main\" branch", 220 | "env": { 221 | "RELEASE": "true" 222 | }, 223 | "steps": [ 224 | { 225 | "exec": "rm -fr dist" 226 | }, 227 | { 228 | "spawn": "bump" 229 | }, 230 | { 231 | "spawn": "build" 232 | }, 233 | { 234 | "spawn": "unbump" 235 | }, 236 | { 237 | "exec": "git diff --ignore-space-at-eol --exit-code" 238 | } 239 | ] 240 | }, 241 | "test": { 242 | "name": "test", 243 | "description": "Run tests", 244 | "steps": [ 245 | { 246 | "exec": "jest --passWithNoTests --all --updateSnapshot" 247 | }, 248 | { 249 | "spawn": "eslint" 250 | } 251 | ] 252 | }, 253 | "test:update": { 254 | "name": "test:update", 255 | "description": "Update jest snapshots", 256 | "steps": [ 257 | { 258 | "exec": "jest --updateSnapshot" 259 | } 260 | ] 261 | }, 262 | "test:watch": { 263 | "name": "test:watch", 264 | "description": "Run jest in watch mode", 265 | "steps": [ 266 | { 267 | "exec": "jest --watch" 268 | } 269 | ] 270 | }, 271 | "unbump": { 272 | "name": "unbump", 273 | "description": "Restores version to 0.0.0", 274 | "env": { 275 | "OUTFILE": "package.json", 276 | "CHANGELOG": "dist/changelog.md", 277 | "BUMPFILE": "dist/version.txt", 278 | "RELEASETAG": "dist/releasetag.txt" 279 | }, 280 | "steps": [ 281 | { 282 | "builtin": "release/reset-version" 283 | } 284 | ] 285 | }, 286 | "upgrade": { 287 | "name": "upgrade", 288 | "description": "upgrade dependencies", 289 | "env": { 290 | "CI": "0" 291 | }, 292 | "steps": [ 293 | { 294 | "exec": "npm-check-updates --dep dev --upgrade --target=minor --reject='projen'" 295 | }, 296 | { 297 | "exec": "npm-check-updates --dep optional --upgrade --target=minor --reject='projen'" 298 | }, 299 | { 300 | "exec": "npm-check-updates --dep peer --upgrade --target=minor --reject='projen'" 301 | }, 302 | { 303 | "exec": "npm-check-updates --dep prod --upgrade --target=minor --reject='projen'" 304 | }, 305 | { 306 | "exec": "npm-check-updates --dep bundle --upgrade --target=minor --reject='projen'" 307 | }, 308 | { 309 | "exec": "yarn install --check-files" 310 | }, 311 | { 312 | "exec": "yarn upgrade @types/jest @types/node @typescript-eslint/eslint-plugin @typescript-eslint/parser cdk-nag constructs eslint-import-resolver-node eslint-import-resolver-typescript eslint-plugin-import eslint jest jest-junit jsii jsii-diff jsii-docgen json-schema npm-check-updates standard-version ts-jest typescript @aws-cdk/aws-glue-alpha aws-cdk-lib constructs @aws-cdk/aws-glue-alpha @aws-cdk/aws-lambda-python-alpha aws-cdk-lib" 313 | }, 314 | { 315 | "exec": "npx projen" 316 | }, 317 | { 318 | "spawn": "post-upgrade" 319 | } 320 | ] 321 | }, 322 | "upgrade-projen": { 323 | "name": "upgrade-projen", 324 | "description": "upgrade projen", 325 | "env": { 326 | "CI": "0" 327 | }, 328 | "steps": [ 329 | { 330 | "exec": "npm-check-updates --dep dev --upgrade --target=minor --filter='projen'" 331 | }, 332 | { 333 | "exec": "npm-check-updates --dep optional --upgrade --target=minor --filter='projen'" 334 | }, 335 | { 336 | "exec": "npm-check-updates --dep peer --upgrade --target=minor --filter='projen'" 337 | }, 338 | { 339 | "exec": "npm-check-updates --dep prod --upgrade --target=minor --filter='projen'" 340 | }, 341 | { 342 | "exec": "npm-check-updates --dep bundle --upgrade --target=minor --filter='projen'" 343 | }, 344 | { 345 | "exec": "yarn install --check-files" 346 | }, 347 | { 348 | "exec": "yarn upgrade projen" 349 | }, 350 | { 351 | "exec": "npx projen" 352 | }, 353 | { 354 | "spawn": "post-upgrade" 355 | } 356 | ] 357 | }, 358 | "watch": { 359 | "name": "watch", 360 | "description": "Watch & compile in the background", 361 | "steps": [ 362 | { 363 | "exec": "jsii -w --silence-warnings=reserved-word --no-fix-peer-dependencies" 364 | } 365 | ] 366 | } 367 | }, 368 | "env": { 369 | "PATH": "$(npx -c \"node -e \\\"console.log(process.env.PATH)\\\"\")" 370 | }, 371 | "//": "~~ Generated by projen. To modify, edit .projenrc.js and run \"npx projen\"." 372 | } 373 | -------------------------------------------------------------------------------- /src/etl/glue-job.ts: -------------------------------------------------------------------------------- 1 | import { Duration, Stack } from 'aws-cdk-lib'; 2 | import * as cloudwatch from 'aws-cdk-lib/aws-cloudwatch'; 3 | import * as events from 'aws-cdk-lib/aws-events'; 4 | import * as eventtargets from 'aws-cdk-lib/aws-events-targets'; 5 | import * as glue from 'aws-cdk-lib/aws-glue'; 6 | import * as iam from 'aws-cdk-lib/aws-iam'; 7 | import * as lambda from 'aws-cdk-lib/aws-lambda'; 8 | import * as s3 from 'aws-cdk-lib/aws-s3'; 9 | import { Construct } from 'constructs'; 10 | 11 | export enum GlueWorkerType { 12 | STANDARD = 'Standard', 13 | G1_X = 'G.1X', 14 | G2_X = 'G.2X' 15 | } 16 | 17 | export enum GlueVersion { 18 | V_0 = '0.9', 19 | V_1 = '1.0', 20 | V_2 = '2.0', 21 | V_3 = '3.0' 22 | } 23 | 24 | export enum GlueJobType { 25 | GLUE_ETL = 'glueetl', 26 | GLUE_STREAMING = 'gluestreaming' 27 | } 28 | 29 | export interface GlueJobProperties { 30 | readonly name: string; 31 | readonly roleName?: string; 32 | readonly description?: string; 33 | readonly deploymentBucket: s3.IBucket; 34 | readonly readAccessBuckets?: s3.IBucket[]; 35 | readonly writeAccessBuckets?: s3.IBucket[]; 36 | readonly glueVersion?: GlueVersion; 37 | readonly workerType: GlueWorkerType; 38 | readonly numberOfWorkers?: number; 39 | readonly maxCapacity?: number; 40 | readonly maxRetries?: number; 41 | readonly maxConcurrentRuns?: number; 42 | readonly jobScript: string; 43 | readonly jobArgs?: { [key: string]: string }; 44 | readonly timeout?: number; 45 | readonly jobType: GlueJobType; 46 | } 47 | 48 | export class GlueJob extends Construct { 49 | private static readonly DAY_IN_MINUTES = 1440; 50 | 51 | public readonly job: glue.CfnJob; 52 | public readonly role: iam.IRole; 53 | public readonly name: string; 54 | public readonly metricSuccessRule: events.Rule; 55 | public readonly metricTimeoutRule: events.Rule; 56 | public readonly metricFailureRule: events.Rule; 57 | public readonly allExecutionAttemptsFailedEventSource = 'custom.aws.glue.allExecutionAttemptsFailed'; 58 | public readonly allExecutionAttemptsFailedEventDetailType = 'All Execution Attempts Failed'; 59 | public readonly executionFailureRule: events.Rule; 60 | public readonly lambdaFunction: lambda.SingletonFunction; 61 | 62 | private allExecutionAttemptsFailedRule: events.Rule; 63 | 64 | constructor(scope: Construct, id: string, props: GlueJobProperties) { 65 | super(scope, id); 66 | 67 | this.role = this.createGlueJobRole(props); 68 | 69 | this.job = new glue.CfnJob(this, `${props.name}-glue-job`, { 70 | name: props.name, 71 | description: props.description, 72 | workerType: props.workerType, 73 | numberOfWorkers: props.numberOfWorkers, 74 | role: this.role.roleName, 75 | maxRetries: props.maxRetries || 0, 76 | executionProperty: { 77 | maxConcurrentRuns: props.maxConcurrentRuns || 3, 78 | }, 79 | glueVersion: props.glueVersion || GlueVersion.V_1, 80 | command: { 81 | pythonVersion: '3', 82 | scriptLocation: props.jobScript, 83 | name: props.jobType, 84 | }, 85 | timeout: props.timeout || GlueJob.DAY_IN_MINUTES, 86 | defaultArguments: { 87 | '--job-language': 'python', 88 | '--enable-metrics': true, 89 | '--enable-continuous-cloudwatch-log': true, 90 | '--region': Stack.of(this).region, 91 | '--enable-glue-datacatalog': true, 92 | '--enable-continuous-log-filter': true, 93 | '--enable-spark-ui': true, 94 | ...props.jobArgs, 95 | }, 96 | }); 97 | 98 | this.name = props.name; 99 | 100 | this.metricSuccessRule = this.jobRule('SuccessRule', this.name, 'SUCCEEDED'); 101 | this.metricFailureRule = this.jobRule('FailureRule', this.name, 'FAILED'); 102 | this.metricTimeoutRule = this.jobRule('TimeoutRule', this.name, 'TIMEOUT'); 103 | 104 | this.executionFailureRule = new events.Rule(scope, `${this.name}-execution-failure-rule`, { 105 | description: `Glue job ${this.name} failed or timed out on an attempt. There might be job retries after this error.`, 106 | eventPattern: { 107 | source: ['aws.glue'], 108 | detailType: ['Glue Job State Change'], 109 | detail: { 110 | state: ['FAILED', 'TIMEOUT'], 111 | jobName: [this.name], 112 | }, 113 | }, 114 | }); 115 | 116 | this.lambdaFunction = this.createLambdaFunction(); 117 | this.executionFailureRule.addTarget(new eventtargets.LambdaFunction(this.lambdaFunction)); 118 | 119 | this.allExecutionAttemptsFailedRule = new events.Rule(this, `${this.name}-all-execution-attempts-failed-rule`, { 120 | description: `Glue job ${this.name} failed or timed out on the last attempt. There will be no retries of the job after this error.`, 121 | eventPattern: { 122 | source: [this.allExecutionAttemptsFailedEventSource], 123 | detailType: [this.allExecutionAttemptsFailedEventDetailType], 124 | detail: { 125 | jobName: [this.name], 126 | }, 127 | }, 128 | }); 129 | } 130 | 131 | private createGlueJobRole(props: GlueJobProperties): iam.Role { 132 | const role = new iam.Role(this, 'Role', { 133 | roleName: props.roleName || props.name + 'Role', 134 | assumedBy: new iam.ServicePrincipal('glue.amazonaws.com'), 135 | managedPolicies: [ 136 | iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSGlueServiceRole'), 137 | iam.ManagedPolicy.fromAwsManagedPolicyName('AWSGlueConsoleFullAccess'), 138 | ], 139 | }); 140 | role.addToPolicy(new iam.PolicyStatement({ actions: ['lakeformation:GetDataAccess'], resources: ['*'] })); 141 | 142 | props.deploymentBucket.grantRead(role); 143 | 144 | if (props.readAccessBuckets) { 145 | props.readAccessBuckets.forEach(bucket => { 146 | bucket.grantRead(role); 147 | }); 148 | } 149 | 150 | if (props.writeAccessBuckets) { 151 | props.writeAccessBuckets.forEach(bucket => { 152 | bucket.grantWrite(role); 153 | }); 154 | } 155 | return role; 156 | } 157 | 158 | private jobRule(id: string, jobName: string, ...states: string[]): events.Rule { 159 | return new events.Rule(this, id, { 160 | ruleName: jobName + states.join(''), 161 | description: `Event triggered when Glue job ${jobName} is in ${states.join(' or ')} state(s)`, 162 | eventPattern: { 163 | source: ['aws.glue'], 164 | detailType: ['Glue Job State Change'], 165 | detail: { 166 | state: states, 167 | jobName: [jobName], 168 | }, 169 | }, 170 | }); 171 | } 172 | 173 | metricSuccess(props?: cloudwatch.MetricOptions): cloudwatch.Metric { 174 | return this.ruleMetric(this.metricSuccessRule, props); 175 | } 176 | 177 | metricFailure(props?: cloudwatch.MetricOptions): cloudwatch.Metric { 178 | return this.ruleMetric(this.metricFailureRule, props); 179 | } 180 | 181 | metricAllExecutionAttemptsFailed(props?: cloudwatch.MetricOptions): cloudwatch.Metric { 182 | return new cloudwatch.Metric({ 183 | metricName: 'TriggeredRules', 184 | namespace: 'AWS/Events', 185 | dimensionsMap: { 186 | RuleName: this.allExecutionAttemptsFailedRule.ruleName, 187 | }, 188 | statistic: 'Sum', 189 | period: Duration.minutes(1), 190 | ...props, 191 | }); 192 | } 193 | 194 | metricTimeout(props?: cloudwatch.MetricOptions): cloudwatch.Metric { 195 | return this.ruleMetric(this.metricTimeoutRule, props); 196 | } 197 | 198 | private ruleMetric({ ruleName }: events.Rule, props?: cloudwatch.MetricOptions): cloudwatch.Metric { 199 | return new cloudwatch.Metric({ 200 | namespace: 'AWS/Events', 201 | metricName: 'TriggeredRules', 202 | dimensionsMap: { RuleName: ruleName }, 203 | statistic: cloudwatch.Statistic.SUM, 204 | ...props, 205 | }).attachTo(this); 206 | } 207 | 208 | public metric(metricName: string, dimensionType: string, props?: cloudwatch.MetricOptions): cloudwatch.Metric { 209 | return new cloudwatch.Metric({ 210 | namespace: 'AWS/Glue', 211 | metricName, 212 | dimensionsMap: { 213 | JobName: this.name, 214 | JobRunId: 'ALL', 215 | Type: dimensionType, 216 | }, 217 | ...props, 218 | }); 219 | } 220 | 221 | public jvmHeapUsageMetric(props?: cloudwatch.MetricOptions): cloudwatch.Metric { 222 | return this.metric('glue.ALL.jvm.heap.usage', 'gauge', props); 223 | } 224 | 225 | public elapsedTimeMetric(props?: cloudwatch.MetricOptions): cloudwatch.Metric { 226 | return this.metric('glue.driver.aggregate.elapsedTime', 'count', props); 227 | } 228 | 229 | public diskSpaceUsedMbMetric(props?: cloudwatch.MetricOptions): cloudwatch.Metric { 230 | return this.metric('glue.driver.BlockManager.disk.diskSpaceUsed_MB', 'gauge', props); 231 | } 232 | 233 | public runTimeInMiliseconds(props?: cloudwatch.MetricOptions): cloudwatch.Metric { 234 | return this.metric('glue.driver.aggregate.elapsedTime', 'gauge', props); 235 | } 236 | 237 | private createLambdaFunction(): lambda.SingletonFunction { 238 | const lambdaFunction = new lambda.SingletonFunction( 239 | this, 240 | `GlueExecutionFailListenerLambdaSingleton${this.name}`, 241 | { 242 | description: 'Checks if an error of a Glue job was on the last attempt (no more retries) in which case the function sends out an event.', 243 | environment: { 244 | eventToSendSource: this.allExecutionAttemptsFailedEventSource, 245 | eventToSendDetailType: this.allExecutionAttemptsFailedEventDetailType, 246 | }, 247 | uuid: 'GlueExecutionFailListenerLambda', 248 | runtime: lambda.Runtime.PYTHON_3_7, 249 | handler: 'index.handler', 250 | timeout: Duration.minutes(1), 251 | code: lambda.Code.fromInline(` 252 | import boto3 253 | import json 254 | import os 255 | import re 256 | 257 | def handler(event, context): 258 | try: 259 | jobRunId = event['detail']['jobRunId'] 260 | jobName = event['detail']['jobName'] 261 | except: 262 | raise Exception(f'Received an malformed event. ({event})') 263 | 264 | # get the current execution attempt, we parse it from the jobRunId which has a _attempt_# suffix on retries 265 | try: 266 | curExecutionAttempt = int(re.findall('_attempt_(\\d*)$', jobRunId)[0]) 267 | except IndexError: 268 | curExecutionAttempt = 0 269 | 270 | # get the number of MaxRetries for this glue job 271 | try: 272 | glue_client = boto3.client('glue') 273 | maxRetries = glue_client.get_job(JobName=jobName)['Job']['MaxRetries'] 274 | except Exception as e: 275 | raise Exception(f'Failed to access the Glue API to get the MaxRetries parameter. ({e})') 276 | 277 | # is this the last execution? if yes we send out the event 278 | isLastExecutionAttempt = curExecutionAttempt == maxRetries 279 | print(f'Job name: {jobName}, is last execution attempt: {isLastExecutionAttempt}, current attempt: {curExecutionAttempt}, max retry attempts: {maxRetries}') 280 | if isLastExecutionAttempt: 281 | event_client = boto3.client('events') 282 | event_client.put_events(Entries=[{ 283 | 'Source': os.environ['eventToSendSource'], 284 | 'Detail': json.dumps(event['detail']), 285 | 'DetailType': os.environ['eventToSendDetailType'] 286 | }]) 287 | `), 288 | }, 289 | ); 290 | 291 | const region = Stack.of(this).region; 292 | const accountId = Stack.of(this).account; 293 | 294 | lambdaFunction.addToRolePolicy( 295 | new iam.PolicyStatement({ 296 | actions: ['events:PutEvents'], 297 | resources: [`arn:aws:events:${region}:${accountId}:event-bus/default`], 298 | }), 299 | ); 300 | 301 | lambdaFunction.addToRolePolicy( 302 | new iam.PolicyStatement({ 303 | actions: ['glue:GetJob'], 304 | resources: [`arn:aws:glue:${region}:${accountId}:job/${this.name}`], 305 | }), 306 | ); 307 | 308 | return lambdaFunction; 309 | } 310 | } -------------------------------------------------------------------------------- /src/data-lake.ts: -------------------------------------------------------------------------------- 1 | import * as path from 'path'; 2 | import * as glue from '@aws-cdk/aws-glue-alpha'; 3 | import { PythonFunction } from '@aws-cdk/aws-lambda-python-alpha'; 4 | import { Aws, CfnOutput, CustomResource, Duration, RemovalPolicy, Stack } from 'aws-cdk-lib'; 5 | import * as athena from 'aws-cdk-lib/aws-athena'; 6 | import * as ec2 from 'aws-cdk-lib/aws-ec2'; 7 | import * as iam from 'aws-cdk-lib/aws-iam'; 8 | import { IRole } from 'aws-cdk-lib/aws-iam'; 9 | import * as lf from 'aws-cdk-lib/aws-lakeformation'; 10 | import * as lambda from 'aws-cdk-lib/aws-lambda'; 11 | import * as logs from 'aws-cdk-lib/aws-logs'; 12 | import * as s3 from 'aws-cdk-lib/aws-s3'; 13 | import { Bucket } from 'aws-cdk-lib/aws-s3'; 14 | import * as cr from 'aws-cdk-lib/custom-resources'; 15 | 16 | import { Construct } from 'constructs'; 17 | import { LakeImplStrategy, LakeStrategyFactory } from './data-lake-strategy'; 18 | import { DataProduct } from './data-product'; 19 | import { DataTier, LakeKind, Permissions, Stage } from './global/enums'; 20 | import { DataLakeAdministrator } from './personas/data-lake-admin'; 21 | import { DataLakeCreator } from './personas/data-lake-creator'; 22 | import { Pipeline } from './pipeline'; 23 | import { buildLambdaFunctionName, buildS3BucketName, buildUniqueName } from './utils'; 24 | 25 | export interface CrossAccountProperties { 26 | readonly consumerAccountIds: string[]; 27 | readonly dataCatalogOwnerAccountId: string; 28 | } 29 | 30 | export interface DataLakeProperties { 31 | /** 32 | * The name of the DataLake. 33 | * 34 | */ 35 | readonly name: string; 36 | /** 37 | * The Stage the DataLake will be deployed. 38 | * 39 | */ 40 | readonly stageName: Stage; 41 | /** 42 | * The List of DataProducts for this account 43 | * 44 | * @default - No data products 45 | */ 46 | readonly dataProducts?: DataProduct[]; 47 | /** 48 | * The Type of DataLake this instance is. This can be a DATA_PRODUCT only, CENTRAL_CATALOG, CONSUMER, or DATA_PRODUCT_AND_CATALOG type. 49 | */ 50 | readonly lakeKind: LakeKind; 51 | /** 52 | * VPC for Glue jobs 53 | * 54 | * @default - No vpc 55 | * @description - The VPC that will be used if the Glue job needs access to resources within the account or internet access 56 | */ 57 | readonly vpc?: ec2.Vpc; 58 | /** 59 | * List of Lake Formation TBAC policy tags. 60 | * 61 | * @default - No tags 62 | * @description - Define the tag taxonomy needed for the DataLake 63 | * @see https://docs.aws.amazon.com/lake-formation/latest/dg/TBAC-section.html 64 | */ 65 | readonly policyTags?: { [name: string]: string }; 66 | /** 67 | * Cross account AWS account IDs 68 | * 69 | * @default - No cross account ids 70 | * @description - The cross account ids needed for setting up the Glue resource policy 71 | * @see https://aws.amazon.com/premiumsupport/knowledge-center/glue-data-catalog-cross-account-access/ 72 | */ 73 | readonly crossAccountAccess?: CrossAccountProperties; 74 | /** 75 | * Security group to attach to Glue jobs 76 | * 77 | * @default - No security group 78 | * @description - Security Group that will be used to allow port access in the VPC 79 | * @see https://docs.aws.amazon.com/glue/latest/dg/setup-vpc-for-glue-access.html 80 | */ 81 | readonly glueSecurityGroup?: ec2.SecurityGroup; 82 | /** 83 | * Data Lake Admin role 84 | * 85 | * @default - Admin role created based on best practices 86 | * @description - IAM Role for DataLake admin access 87 | * @see https://docs.aws.amazon.com/lake-formation/latest/dg/permissions-reference.html 88 | */ 89 | readonly datalakeAdminRole?: iam.Role; 90 | /** 91 | * Data Lake Database Creator role 92 | * 93 | * @default - Database creator role created based on best practices 94 | * @description - IAM Role for DataLake database creator access 95 | * @see https://docs.aws.amazon.com/lake-formation/latest/dg/permissions-reference.html 96 | */ 97 | readonly datalakeCreatorRole?: iam.Role; 98 | 99 | /* Default S3 Bucket Properties for Log Bucket 100 | * 101 | * @default - lifecycleRules: [ 102 | { 103 | expiration: cdk.Duration.days(30), 104 | }, 105 | ], 106 | removalPolicy: cdk.RemovalPolicy.DESTROY, 107 | autoDeleteObjects: true, 108 | */ 109 | readonly logBucketProps?: s3.BucketProps; 110 | /** 111 | * Create default Athena workgroup for querying data lake resources 112 | * 113 | * @default - false 114 | */ 115 | readonly createAthenaWorkgroup?: boolean; 116 | /** 117 | * Create default glue database for the data lake 118 | * 119 | * @default false 120 | */ 121 | readonly createDefaultDatabase?: boolean; 122 | 123 | } 124 | 125 | export interface DataTierBucketProps { 126 | readonly lakeType: LakeKind; 127 | readonly pipelineName: string; 128 | readonly bucketName: string | undefined; 129 | readonly dataCatalogAccountId: string; 130 | readonly logBucket: Bucket; 131 | readonly crossAccount:boolean; 132 | readonly s3BucketProps: s3.BucketProps | undefined; 133 | readonly datalakeAdminRole: IRole; 134 | readonly datalakeDbCreatorRole: IRole; 135 | readonly tier: DataTier; 136 | } 137 | 138 | /** 139 | * A CDK construct to create a DataLake. 140 | */ 141 | export class DataLake extends Construct { 142 | public readonly databases: { [name: string]: glue.Database } = {}; 143 | public readonly datalakeAdminRole: iam.IRole; 144 | public readonly datalakeDbCreatorRole: iam.IRole; 145 | public readonly logBucket: s3.Bucket; 146 | public readonly stageName: Stage; 147 | public readonly vpc?: ec2.Vpc; 148 | public readonly athenaWorkgroup?: athena.CfnWorkGroup; 149 | public readonly lakeKind: LakeKind; 150 | 151 | private readonly glueSecurityGroup?: ec2.SecurityGroup; 152 | private readonly crossAccountAccess?: CrossAccountProperties; 153 | private readonly logBucketProps: s3.BucketProps; 154 | private readonly dataLakeStrategy: LakeImplStrategy; 155 | 156 | constructor(scope: Construct, id: string, props: DataLakeProperties) { 157 | super(scope, id); 158 | this.stageName = props.stageName; 159 | this.crossAccountAccess = props.crossAccountAccess ? props.crossAccountAccess : undefined; 160 | this.vpc = props.vpc ? props.vpc : undefined; 161 | this.lakeKind = props.lakeKind; 162 | 163 | if (props.logBucketProps) { 164 | this.logBucketProps = props.logBucketProps; 165 | } else { 166 | this.logBucketProps = { 167 | lifecycleRules: [ 168 | { 169 | expiration: Duration.days(7), 170 | }, 171 | ], 172 | removalPolicy: RemovalPolicy.DESTROY, 173 | autoDeleteObjects: true, 174 | }; 175 | } 176 | 177 | if (this.vpc) { 178 | const securityGroupName = buildUniqueName({ 179 | name: 'glue', 180 | resourceUse: 'datalake', 181 | stage: this.stageName, 182 | }, 80); 183 | this.glueSecurityGroup = new ec2.SecurityGroup(this, 'glue-sg', { 184 | description: 'Glue self referential allow in out', 185 | vpc: this.vpc, 186 | securityGroupName: securityGroupName, 187 | }); 188 | this.glueSecurityGroup.connections.allowFrom(this.glueSecurityGroup, ec2.Port.allTcp()); 189 | this.glueSecurityGroup.connections.allowTo(this.glueSecurityGroup, ec2.Port.allTcp()); 190 | new CfnOutput(this, 'GlueSecurityGroupName', { value: securityGroupName }); 191 | } 192 | 193 | this.logBucket = new s3.Bucket(this, 'datalake-log-bucket', { 194 | bucketName: buildS3BucketName({ 195 | stage: props.stageName, 196 | resourceUse: 'log-bucket', 197 | name: props.name, 198 | }), 199 | ...this.logBucketProps, 200 | }); 201 | new CfnOutput(this, 'DataLakeLogBucket', { value: this.logBucket.bucketName }); 202 | 203 | if (props.datalakeAdminRole) { 204 | this.datalakeAdminRole = props.datalakeAdminRole; 205 | } else { 206 | this.datalakeAdminRole = new DataLakeAdministrator(this, `${props.name}-datalake-admin-role`, { 207 | name: buildUniqueName({ 208 | name: props.name, 209 | resourceUse: 'datalake-admin', 210 | stage: this.stageName, 211 | }, 60), 212 | }).role; 213 | } 214 | 215 | if (props.datalakeCreatorRole) { 216 | this.datalakeDbCreatorRole = props.datalakeCreatorRole; 217 | } else { 218 | this.datalakeDbCreatorRole = new DataLakeCreator(this, `${props.name}-datalake-creator-role`, { 219 | name: buildUniqueName({ 220 | name: props.name, 221 | resourceUse: 'datalake-creator', 222 | stage: this.stageName, 223 | }, 60), 224 | }).role; 225 | } 226 | 227 | const lfAdminRole = new lf.CfnDataLakeSettings(this, 'lf-datalake-role-admin-settings', { 228 | admins: [{ 229 | dataLakePrincipalIdentifier: this.datalakeAdminRole.roleArn, 230 | }], 231 | }); 232 | lfAdminRole.node.addDependency(this.datalakeAdminRole); 233 | new CfnOutput(this, 'DataLakeAdminRole', { value: this.datalakeAdminRole.roleName }); 234 | 235 | if (this.crossAccountAccess) { 236 | this.createCrossAccountGlueCatalogResourcePolicy( 237 | this.crossAccountAccess.consumerAccountIds, this.crossAccountAccess.dataCatalogOwnerAccountId); 238 | } 239 | 240 | if (props.createAthenaWorkgroup) { 241 | this.athenaWorkgroup = new athena.CfnWorkGroup(this, 'workgroup', { 242 | name: buildUniqueName({ 243 | name: props.name, 244 | resourceUse: 'workgroup', 245 | stage: this.stageName, 246 | }, 60), 247 | description: 'Default Data Lake Workgroup', 248 | state: 'ENABLED', 249 | recursiveDeleteOption: true, 250 | workGroupConfiguration: { 251 | enforceWorkGroupConfiguration: true, 252 | resultConfiguration: { 253 | outputLocation: `s3://${this.logBucket.bucketName}/results/`, 254 | }, 255 | engineVersion: { 256 | selectedEngineVersion: 'Athena engine version 2', 257 | effectiveEngineVersion: 'Athena engine version 2', 258 | }, 259 | }, 260 | }); 261 | new CfnOutput(this, 'DataLakeAthenaWorkgroup', { value: this.athenaWorkgroup.name }); 262 | } 263 | 264 | // if there are custom tags passed into the datya lake create them here with a custom resource 265 | // TODO: once Tags are included as part of CFN remove the custom resource. 266 | if (props.policyTags) { 267 | this.createPolicyTagsCustomResource(props.policyTags); 268 | } 269 | 270 | if (props.createDefaultDatabase) { 271 | this.createDatabase(`${props.name}-${props.stageName}`); 272 | } 273 | 274 | this.dataLakeStrategy = LakeStrategyFactory.getLakeStrategy(props.lakeKind); 275 | 276 | if (props.dataProducts && props.dataProducts.length > 0) { 277 | props.dataProducts.forEach((product: DataProduct) => { 278 | if (this.databases[product.databaseName] == undefined) { 279 | this.databases[product.databaseName] = this.createDatabase(product.databaseName); 280 | } 281 | 282 | product.pipelines.forEach((pipe: Pipeline) => { 283 | this.dataLakeStrategy.createDataProduct({ 284 | stack: Stack.of(this), 285 | pipe: pipe, 286 | product: product, 287 | database: this.databases[product.databaseName], 288 | logBucket: this.logBucket, 289 | stage: this.stageName, 290 | datalakeAdminRoleArn: this.datalakeAdminRole.roleArn, 291 | datalakeDbCreatorRoleArn: this.datalakeDbCreatorRole.roleArn, 292 | }); 293 | }); 294 | }); 295 | } 296 | } 297 | 298 | public createDownloaderCustomResource(stageName: string) { 299 | // download the data sets with the custom resource after successfull creation of resource 300 | const onEvent = new PythonFunction(this, 'DataloaderHandler', { 301 | runtime: lambda.Runtime.PYTHON_3_7, 302 | entry: path.join(__dirname, '../lambda/download-data'), 303 | timeout: Duration.minutes(15), 304 | functionName: buildLambdaFunctionName({ 305 | name: 'load-data', 306 | resourceUse: 'cr', 307 | stage: stageName, 308 | }), 309 | }); 310 | 311 | // create readable and writable buckets for the datasets and set the appropriate S3 access 312 | onEvent.addToRolePolicy( 313 | new iam.PolicyStatement({ 314 | actions: ['s3:*'], 315 | resources: ['*'], // trim down to only the S3 buckets needed 316 | }), 317 | ); 318 | 319 | const dataLoadProvider = new cr.Provider(this, 'DataloaderProvider', { 320 | onEventHandler: onEvent, 321 | logRetention: logs.RetentionDays.ONE_DAY, 322 | }); 323 | 324 | // CR to download the static datasets form the dataSets var passed in. 325 | new CustomResource(this, 'LoadDatalakeCustomResource', { 326 | serviceToken: dataLoadProvider.serviceToken, 327 | properties: { 328 | dataSets: this.dataLakeStrategy.downloadLocations, 329 | stackName: Stack.name, 330 | regionName: Aws.REGION, 331 | }, 332 | }); 333 | } 334 | 335 | private createDatabase(databaseName: string) : glue.Database { 336 | const db = new glue.Database(this, `${databaseName}-database`, { 337 | databaseName: `${databaseName}`, 338 | }); 339 | 340 | const dbPerm = new lf.CfnPermissions(this, `${databaseName}-lf-db-creator-permission`, { 341 | dataLakePrincipal: { 342 | dataLakePrincipalIdentifier: this.datalakeDbCreatorRole.roleArn, 343 | }, 344 | resource: { 345 | databaseResource: { 346 | name: databaseName, 347 | }, 348 | }, 349 | permissions: [ 350 | Permissions.ALTER, 351 | Permissions.CREATE_TABLE, 352 | Permissions.DROP, 353 | ], 354 | }); 355 | dbPerm.node.addDependency(db); 356 | return db; 357 | } 358 | 359 | private createPolicyTagsCustomResource(policyTags: { [name: string]: string }) { 360 | const onEvent = new PythonFunction(this, 'create-policy-tags-handler', { 361 | runtime: lambda.Runtime.PYTHON_3_7, 362 | entry: path.join(__dirname, '../lambda/create-tags-handler'), 363 | role: this.datalakeAdminRole, 364 | functionName: buildLambdaFunctionName({ 365 | name: 'create-tags', 366 | resourceUse: 'cr', 367 | stage: this.stageName, 368 | }), 369 | timeout: Duration.minutes(15), 370 | }); 371 | onEvent.node.addDependency(this.datalakeAdminRole); 372 | 373 | const myProvider = new cr.Provider(this, 'policy-tags-provider', { 374 | onEventHandler: onEvent, 375 | logRetention: logs.RetentionDays.ONE_DAY, 376 | }); 377 | 378 | const outputs = new CustomResource(this, 'tag-creation-custom-resource', { 379 | serviceToken: myProvider.serviceToken, 380 | properties: { 381 | policyTags: policyTags, 382 | stackName: Stack.name, 383 | regionName: Aws.REGION, 384 | catalogId: Aws.ACCOUNT_ID, 385 | }, 386 | }); 387 | outputs.node.addDependency(this.datalakeAdminRole); 388 | } 389 | 390 | protected createCrossAccountGlueCatalogResourcePolicy(consumerAccountIds: string[], dataCatalogOwnerAccountId: string) { 391 | const onCatalogEvent = new PythonFunction(this, 'enable-hybrid-catalog-handler', { 392 | runtime: lambda.Runtime.PYTHON_3_7, 393 | entry: path.join(__dirname, '../lambda/enable-hybrid-catalog'), 394 | role: this.datalakeAdminRole, 395 | timeout: Duration.minutes(1), 396 | functionName: buildLambdaFunctionName({ 397 | name: 'create-catalog', 398 | resourceUse: 'cr', 399 | stage: this.stageName, 400 | }), 401 | }); 402 | 403 | const catalogProvider = new cr.Provider(this, 'hybrid-catalog-provider', { 404 | onEventHandler: onCatalogEvent, 405 | logRetention: logs.RetentionDays.ONE_DAY, 406 | }); 407 | 408 | new CustomResource(this, 'hybrid-catalog-custom-resource', { 409 | serviceToken: catalogProvider.serviceToken, 410 | properties: { 411 | stackName: Stack.name, 412 | regionName: Aws.REGION, 413 | consumerAccountIds: consumerAccountIds, 414 | producerAccountId: dataCatalogOwnerAccountId, 415 | }, 416 | }); 417 | } 418 | } 419 | 420 | 421 | -------------------------------------------------------------------------------- /src/data-lake-strategy.ts: -------------------------------------------------------------------------------- 1 | import { Connection, ConnectionType, Database } from '@aws-cdk/aws-glue-alpha'; 2 | import { Aws, NestedStack, Stack } from 'aws-cdk-lib'; 3 | import { SecurityGroup, Vpc } from 'aws-cdk-lib/aws-ec2'; 4 | import { Rule } from 'aws-cdk-lib/aws-events'; 5 | import { LambdaFunction } from 'aws-cdk-lib/aws-events-targets'; 6 | import { CfnPermissions, CfnResource } from 'aws-cdk-lib/aws-lakeformation'; 7 | import { Function } from 'aws-cdk-lib/aws-lambda'; 8 | import { Bucket } from 'aws-cdk-lib/aws-s3'; 9 | import { IDependable } from 'constructs'; 10 | import { DataLakeBucket } from './data-lake-bucket'; 11 | import { DataProduct } from './data-product'; 12 | import { KinesisOps } from './data-streams/kinesis-ops'; 13 | import { KinesisStream } from './data-streams/kinesis-stream'; 14 | import { CompressionType, S3DeliveryStream } from './data-streams/s3-delivery-stream'; 15 | import { GlueJob } from './etl/glue-job'; 16 | import { GlueJobOps } from './etl/glue-job-ops'; 17 | import { GlueTable } from './etl/glue-table'; 18 | import { DataPipelineType, DataTier, LakeKind, Permissions, Stage } from './global/enums'; 19 | import { DataSetResult } from './global/interfaces'; 20 | import { Pipeline } from './pipeline'; 21 | import { buildS3BucketName, packageAsset, toS3Path } from './utils'; 22 | 23 | export interface DataStrategyProps { 24 | readonly stack: Stack; 25 | readonly pipe: Pipeline; 26 | readonly product: DataProduct; 27 | readonly database: Database; 28 | readonly logBucket: Bucket; 29 | readonly stage: Stage; 30 | readonly vpc?: Vpc; 31 | readonly securityGroup?: SecurityGroup; 32 | readonly datalakeAdminRoleArn?: string; 33 | readonly datalakeDbCreatorRoleArn?: string; 34 | } 35 | 36 | export abstract class LakeImplStrategy { 37 | public locationRegistry: CfnResource[] = []; 38 | public stageName: Stage = Stage.ALPHA; 39 | public downloadLocations: { [schema: string]: DataSetResult } = {}; //used for the Custom Resource to allow downloading of existing datasets into datalake 40 | public dataStreams: { [schemaName: string]: KinesisStream } = {}; 41 | 42 | protected logBucket?: Bucket; 43 | protected vpc?: Vpc; 44 | protected securityGroup?: SecurityGroup; 45 | protected datalakeAdminRoleArn?: string; 46 | protected datalakeDbCreatorRoleArn?: string; 47 | 48 | abstract addPipeline(stack: Stack, pipeline: Pipeline, dataProduct: DataProduct, bucketName: string): void; 49 | abstract lakeKind(): LakeKind 50 | 51 | getDataSetBucketName(pipe: Pipeline, dataTier: DataTier) : string | undefined { 52 | return dataTier == DataTier.RAW ? this.downloadLocations[pipe.name].rawBucketName : 53 | dataTier == DataTier.REFINED ? this.downloadLocations[pipe.name].refinedBucketName : 54 | dataTier == DataTier.TRUSTED ? this.downloadLocations[pipe.name].trustedBucketName : this.downloadLocations[pipe.name].rawBucketName; 55 | } 56 | 57 | createDataProduct(props: DataStrategyProps): void { 58 | // create a nested stack per data product to allow for independent updates 59 | const pipelineStack = new NestedStack(props.stack, `${props.pipe.name}-dataset-stack`); 60 | this.logBucket = props.logBucket; 61 | this.stageName = props.stage; 62 | this.securityGroup = props.securityGroup; 63 | this.vpc = props.vpc; 64 | this.datalakeAdminRoleArn = props.datalakeAdminRoleArn; 65 | this.datalakeDbCreatorRoleArn = props.datalakeDbCreatorRoleArn; 66 | 67 | // create list of data drop locations to use later in the custom resource to download the data 68 | this.downloadLocations[props.pipe.name] = { 69 | destinationPrefix: props.pipe.destinationPrefix, 70 | sourceBucketName: props.pipe.s3Properties? props.pipe.s3Properties.sourceBucketName! : undefined, 71 | sourceKeys: props.pipe.s3Properties ? props.pipe.s3Properties.sourceKeys! : undefined, 72 | rawBucketName: buildS3BucketName({ 73 | name: props.pipe.name, 74 | accountId: props.product.accountId, 75 | resourceUse: 'raw', 76 | stage: this.stageName, 77 | }), 78 | refinedBucketName: buildS3BucketName({ 79 | name: props.pipe.name, 80 | accountId: props.product.accountId, 81 | resourceUse: 'refined', 82 | stage: this.stageName, 83 | }), 84 | trustedBucketName: buildS3BucketName({ 85 | name: props.pipe.name, 86 | accountId: props.product.accountId, 87 | resourceUse: 'trusted', 88 | stage: this.stageName, 89 | }), 90 | destinationBucketName: buildS3BucketName({ 91 | name: props.pipe.name, 92 | accountId: props.product.accountId, 93 | resourceUse: props.pipe.dataSetDropTier == DataTier.RAW ? 'raw' : props.pipe.dataSetDropTier == DataTier.REFINED ? 'refined' : 'trusted', 94 | stage: this.stageName, 95 | }), 96 | }; 97 | 98 | this.createTierBucketsAndPermissions(pipelineStack, props.pipe, props.product); 99 | 100 | const dataDropBucketName = this.getDataSetBucketName(props.pipe, props.pipe.dataSetDropTier)!; 101 | this.addPipeline(pipelineStack, props.pipe, props.product, dataDropBucketName); 102 | } 103 | 104 | protected createGlueTable(stack: Stack, pipeline: Pipeline, product: DataProduct, bucketName: string): void { 105 | if (!pipeline.table) return; 106 | 107 | new GlueTable(stack, `${pipeline.name}-table`, { 108 | catalogId: pipeline.table.catalogId, 109 | columns: pipeline.table.columns, 110 | databaseName: product.databaseName, 111 | description: pipeline.table.description, 112 | inputFormat: pipeline.table.inputFormat, 113 | outputFormat: pipeline.table.outputFormat, 114 | parameters: pipeline.table.parameters, 115 | partitionKeys: pipeline.table.partitionKeys, 116 | s3Location: `s3://${bucketName}/${pipeline.destinationPrefix}`, 117 | serdeParameters: pipeline.table.serdeParameters, 118 | serializationLibrary: pipeline.table.serializationLibrary, 119 | tableName: pipeline.table.tableName, 120 | }); 121 | } 122 | 123 | // this is a jumbled mess clean up once refecto 124 | protected createPipelineResources(stack: Stack, pipeline: Pipeline, dataProduct: DataProduct, bucketName: string) { 125 | switch (pipeline.type) { 126 | case DataPipelineType.S3: { 127 | break; 128 | } 129 | case DataPipelineType.STREAM: { 130 | this.addDataStream(stack, pipeline, bucketName); 131 | break; 132 | } 133 | case DataPipelineType.JDBC: { 134 | this.createJDBCConnection(stack, pipeline); 135 | break; 136 | } 137 | } 138 | 139 | // rethink this whole section 140 | if (pipeline.job) { 141 | const jobScript = packageAsset(stack, `${pipeline.name}Script`, pipeline.job.jobScript); 142 | 143 | pipeline.job.jobArgs!['--TempDir'] = `s3://${this.logBucket!.bucketName}/temp/`; 144 | pipeline.job.jobArgs!['--spark-event-logs-path'] = `s3://${this.logBucket!.bucketName}/logs/`; 145 | let s3Location = this.getDataSetBucketName(pipeline, pipeline.job.destinationLocation!); 146 | 147 | if (pipeline.job.destinationLocation && s3Location) { 148 | pipeline.job.jobArgs!['--DESTINATION_BUCKET'] = s3Location; 149 | 150 | const job = new GlueJob(stack, `${pipeline.name}-etl-job`, { 151 | deploymentBucket: jobScript.bucket, 152 | jobScript: toS3Path(jobScript), 153 | name: pipeline.job.name, 154 | workerType: pipeline.job.workerType, 155 | description: pipeline.job.description, 156 | glueVersion: pipeline.job.glueVersion, 157 | jobArgs: pipeline.job.jobArgs, 158 | maxCapacity: pipeline.job.maxCapacity, 159 | maxConcurrentRuns: pipeline.job.maxConcurrentRuns, 160 | maxRetries: pipeline.job.maxRetries, 161 | numberOfWorkers: pipeline.job.numberOfWorkers, 162 | roleName: pipeline.job.roleName, 163 | timeout: pipeline.job.timeout, 164 | jobType: pipeline.job.jobType, 165 | readAccessBuckets: [ 166 | this.logBucket!, 167 | ], 168 | writeAccessBuckets: [ 169 | this.logBucket!, 170 | Bucket.fromBucketName(stack, 'raw-bucket-role', s3Location), 171 | ], 172 | }); 173 | 174 | new GlueJobOps(stack, `${pipeline.name}-etl-job-ops`, { 175 | job: job, 176 | }); 177 | 178 | if (pipeline.streamProperties) { 179 | this.dataStreams[pipeline.name].stream.grantRead(job.role); 180 | } 181 | 182 | new CfnPermissions(stack, `${pipeline.name}-create-table-perm`, { 183 | dataLakePrincipal: { 184 | dataLakePrincipalIdentifier: job.role.roleArn, 185 | }, 186 | resource: { 187 | databaseResource: { 188 | name: dataProduct.databaseName, 189 | }, 190 | }, 191 | permissions: [ 192 | Permissions.ALTER, 193 | Permissions.CREATE_TABLE, 194 | Permissions.DESCRIBE, 195 | ], 196 | }); 197 | 198 | if (pipeline.table) { 199 | new CfnPermissions(stack, `${pipeline.name}-access-table-perm`, { 200 | dataLakePrincipal: { 201 | dataLakePrincipalIdentifier: job.role.roleArn, 202 | }, 203 | resource: { 204 | tableResource: { 205 | databaseName: dataProduct.databaseName, 206 | name: pipeline.table.tableName, 207 | }, 208 | }, 209 | permissions: [ 210 | Permissions.SELECT, 211 | Permissions.DESCRIBE, 212 | ], 213 | }); 214 | } 215 | } 216 | } 217 | } 218 | 219 | private addDataStream(stack: Stack, pipeline: Pipeline, bucketName: string) : KinesisStream { 220 | const schemaName = pipeline.name; 221 | const dataStreamStack = new NestedStack(stack, `${schemaName}-datastream-stack`); 222 | 223 | if (!pipeline.streamProperties) { 224 | throw Error("Cannot create a stream pipeline without 'streamProperties'"); 225 | } 226 | 227 | this.dataStreams[pipeline.name] = new KinesisStream(dataStreamStack, 'DataStream', { 228 | shardCount: 1, 229 | streamName: pipeline.streamProperties.streamName, 230 | }); 231 | 232 | const deliveryStream = new S3DeliveryStream(dataStreamStack, 'deliveryStream', { 233 | compression: CompressionType.UNCOMPRESSED, 234 | kinesisStream: this.dataStreams[pipeline.name].stream, 235 | s3Bucket: Bucket.fromBucketName(stack, 'get-bucket-for-kinesis', bucketName), 236 | s3Prefix: pipeline.destinationPrefix, 237 | }); 238 | 239 | new KinesisOps(dataStreamStack, 'kinesis-ops', { 240 | stream: this.dataStreams[pipeline.name], 241 | deliveryStream: deliveryStream, 242 | }); 243 | 244 | if (pipeline.streamProperties.lambdaDataGenerator) { 245 | const dataGeneratorFunction = new Function(dataStreamStack, 'data-generator-function', { 246 | code: pipeline.streamProperties.lambdaDataGenerator.code, 247 | handler: pipeline.streamProperties.lambdaDataGenerator.handler, 248 | timeout: pipeline.streamProperties.lambdaDataGenerator.timeout, 249 | runtime: pipeline.streamProperties.lambdaDataGenerator.runtime, 250 | functionName: pipeline.streamProperties.lambdaDataGenerator.functionName, 251 | environment: { 252 | KINESIS_STREAM: this.dataStreams[pipeline.name].stream.streamName, 253 | }, 254 | }); 255 | 256 | this.dataStreams[pipeline.name].stream.grantWrite(dataGeneratorFunction); 257 | const rule = new Rule(stack, 'Rule', { 258 | schedule: pipeline.streamProperties.lambdaDataGenerator.schedule, 259 | ruleName: pipeline.streamProperties.lambdaDataGenerator.ruleName, 260 | }); 261 | rule.addTarget(new LambdaFunction(dataGeneratorFunction)); 262 | } 263 | return this.dataStreams[pipeline.name]; 264 | } 265 | 266 | public createJDBCConnection(stack: Stack, pipeline:Pipeline) { 267 | if (this.vpc && this.securityGroup) { 268 | new Connection(stack, `${pipeline.name}-glue-connection`, { 269 | type: ConnectionType.JDBC, 270 | connectionName: `${pipeline.name}-jdbc`, 271 | description: `JDBC connection for glue to use on pipeline ${pipeline.name}`, 272 | subnet: this.vpc.isolatedSubnets[0], 273 | securityGroups: [this.securityGroup], 274 | properties: { 275 | JDBC_CONNECTION_URL: 276 | pipeline.jdbcProperties!.jdbc!, 277 | USERNAME: pipeline.jdbcProperties!.username!, //figure this out 278 | PASSWORD: pipeline.jdbcProperties!.password!, 279 | }, 280 | }); 281 | } else { 282 | throw new Error( 283 | 'VPC required to create a JDBC pipeline.', 284 | ); 285 | } 286 | } 287 | 288 | private createTierBucketsAndPermissions(stack: Stack, pipe: Pipeline, product: DataProduct): void { 289 | /// This is confusing. Find a way to simplify 290 | const dataCatalogAccountId = product.dataCatalogAccountId ? 291 | product.dataCatalogAccountId : product.accountId; 292 | const crossAccount = product.dataCatalogAccountId ? 293 | product.dataCatalogAccountId != product.accountId ? true : false : false; 294 | 295 | // for each data tier create the appropriate buckets 296 | pipe.tiers.forEach(r => { 297 | const bucketName = this.getDataSetBucketName(pipe, r)!; 298 | 299 | if (this.lakeKind() === LakeKind.DATA_PRODUCT || this.lakeKind() === LakeKind.DATA_PRODUCT_AND_CATALOG) { 300 | new DataLakeBucket(stack, `s3-${r}-bucket-${pipe.name}`, { 301 | bucketName: bucketName, 302 | dataCatalogAccountId: dataCatalogAccountId, 303 | logBucket: this.logBucket!, 304 | crossAccount: crossAccount, 305 | s3Properties: product.s3BucketProps, 306 | }); 307 | } 308 | 309 | if (this.lakeKind() === LakeKind.CENTRAL_CATALOG || this.lakeKind() === LakeKind.DATA_PRODUCT_AND_CATALOG) { 310 | if (this.datalakeDbCreatorRoleArn == undefined) throw new Error('Cannot have datalake without Data Lake DB Creator role defined.'); 311 | 312 | const name = bucketName.replace(/\W/g, ''); 313 | const lfResource = this.registerDataLakeLocation(stack, this.datalakeDbCreatorRoleArn, bucketName, name); 314 | 315 | this.locationRegistry.push(lfResource); 316 | 317 | if (this.datalakeAdminRoleArn) { 318 | this.createDataLocationAccessPermission(stack, `${name}-admin`, this.datalakeAdminRoleArn, bucketName, lfResource); 319 | } 320 | 321 | if (product.dataCatalogAccountId != product.accountId) { 322 | this.createDataLocationCrossAccountOwner(stack, `${name}-xa-owner`, product.accountId, product.dataCatalogAccountId!, bucketName, lfResource); 323 | } 324 | } 325 | }); 326 | } 327 | 328 | private registerDataLakeLocation(stack: Stack, datalakeDbCreatorRoleArn: string, bucketName: string, name: string) : CfnResource { 329 | const dlResource = new CfnResource(stack, `lf-resource-${name}`, { 330 | resourceArn: `arn:aws:s3:::${bucketName}`, 331 | useServiceLinkedRole: false, 332 | roleArn: datalakeDbCreatorRoleArn, 333 | }); 334 | this.createDataLocationAccessPermission(stack, `${name}-creator`, datalakeDbCreatorRoleArn, bucketName, dlResource); 335 | return dlResource; 336 | } 337 | 338 | private createDataLocationAccessPermission(stack: Stack, name: string, roleArn: string, bucketName: string, resource: IDependable): CfnPermissions { 339 | const perm = new CfnPermissions(stack, `datalake-creator-perm-${name}`, { 340 | dataLakePrincipal: { 341 | dataLakePrincipalIdentifier: roleArn, 342 | }, 343 | resource: { 344 | dataLocationResource: { 345 | s3Resource: `arn:aws:s3:::${bucketName}`, 346 | }, 347 | }, 348 | permissions: [ 349 | Permissions.DATA_LOCATION_ACCESS, 350 | ], 351 | }); 352 | perm.node.addDependency(resource); 353 | return perm; 354 | } 355 | 356 | private createDataLocationCrossAccountOwner(stack: Stack, name: string, ownerAccountId: string, 357 | catalogAccountId: string, bucketName: string, resource: IDependable): CfnPermissions { 358 | const perm = new CfnPermissions(stack, `datalake-ca-owner-perm-${name}`, { 359 | dataLakePrincipal: { 360 | dataLakePrincipalIdentifier: ownerAccountId, 361 | }, 362 | resource: { 363 | dataLocationResource: { 364 | catalogId: catalogAccountId, 365 | s3Resource: `arn:aws:s3:::${bucketName}`, 366 | }, 367 | }, 368 | permissions: [ 369 | Permissions.CREATE_TABLE_READ_WRITE, 370 | ], 371 | permissionsWithGrantOption: [ 372 | Permissions.CREATE_TABLE_READ_WRITE, 373 | ], 374 | }); 375 | perm.node.addDependency(resource); 376 | return perm; 377 | } 378 | } 379 | 380 | class DataProductStrategy extends LakeImplStrategy { 381 | lakeKind(): LakeKind { 382 | return LakeKind.DATA_PRODUCT; 383 | } 384 | 385 | addPipeline(stack: Stack, pipeline: Pipeline, dataProduct: DataProduct, bucketName: string): void { 386 | if (pipeline.table) { 387 | this.createGlueTable(stack, pipeline, dataProduct, bucketName); 388 | } 389 | 390 | if (dataProduct.dataCatalogAccountId && dataProduct.dataCatalogAccountId != Aws.ACCOUNT_ID) { 391 | // Create the ram share cross account if the product has a cross account GDC 392 | //TODO: make this an optional field to create the cross account share in the pipeline or data product?? 393 | new CfnPermissions(stack, `datalake-ca-owner-perm-${pipeline.name}`, { 394 | dataLakePrincipal: { 395 | dataLakePrincipalIdentifier: dataProduct.dataCatalogAccountId, 396 | }, 397 | resource: { 398 | tableResource: { 399 | catalogId: Aws.ACCOUNT_ID, 400 | databaseName: dataProduct.databaseName, 401 | name: pipeline.name, 402 | }, 403 | }, 404 | permissions: [ 405 | Permissions.SELECT, 406 | Permissions.DESCRIBE, 407 | ], 408 | permissionsWithGrantOption: [ 409 | Permissions.SELECT, 410 | Permissions.DESCRIBE, 411 | ], 412 | }); 413 | } 414 | this.createPipelineResources(stack, pipeline, dataProduct, bucketName); 415 | } 416 | } 417 | 418 | class CentralCatalogStrategy extends LakeImplStrategy { 419 | lakeKind(): LakeKind { 420 | return LakeKind.CENTRAL_CATALOG; 421 | } 422 | 423 | // eslint-disable-next-line @typescript-eslint/ban-ts-comment 424 | // @ts-ignore 425 | addPipeline(stack: Stack, pipeline: Pipeline, dataProduct: DataProduct, bucketName: string): void { 426 | // 427 | } 428 | } 429 | 430 | class ConsumerStrategy extends LakeImplStrategy { 431 | lakeKind(): LakeKind { 432 | return LakeKind.CONSUMER; 433 | } 434 | 435 | // eslint-disable-next-line @typescript-eslint/ban-ts-comment 436 | // @ts-ignore 437 | addPipeline(stack: Stack, pipeline: Pipeline, dataProduct: DataProduct, bucketName: string): void { 438 | return; 439 | } 440 | } 441 | 442 | class DataProductAndCatalogStrategy extends LakeImplStrategy { 443 | lakeKind(): LakeKind { 444 | return LakeKind.DATA_PRODUCT_AND_CATALOG; 445 | } 446 | 447 | addPipeline(stack: Stack, pipeline: Pipeline, dataProduct: DataProduct, bucketName: string): void { 448 | if (pipeline.table) { 449 | this.createGlueTable(stack, pipeline, dataProduct, bucketName); 450 | } 451 | this.createPipelineResources(stack, pipeline, dataProduct, bucketName); 452 | } 453 | } 454 | 455 | export class LakeStrategyFactory { 456 | public static getLakeStrategy(lakeKind: LakeKind): LakeImplStrategy { 457 | return LakeStrategyFactory.strategies[lakeKind]; 458 | } 459 | 460 | private static strategies = { 461 | [LakeKind.DATA_PRODUCT]: new DataProductStrategy(), 462 | [LakeKind.CENTRAL_CATALOG]: new CentralCatalogStrategy(), 463 | [LakeKind.CONSUMER]: new ConsumerStrategy(), 464 | [LakeKind.DATA_PRODUCT_AND_CATALOG]: new DataProductAndCatalogStrategy(), 465 | }; 466 | } 467 | -------------------------------------------------------------------------------- /test/__snapshots__/datalake.test.ts.snap: -------------------------------------------------------------------------------- 1 | // Jest Snapshot v1, https://goo.gl/fbAQLP 2 | 3 | exports[`cdk-nag AwsSolutions Pack Should match snapshot 1`] = ` 4 | Object { 5 | "Outputs": Object { 6 | "datalakeDataLakeAdminRole3EA5FF19": Object { 7 | "Value": Object { 8 | "Ref": "datalaketestlakedatalakeadminroledatalakeadministratorrole0CE6C3E6", 9 | }, 10 | }, 11 | "datalakeDataLakeAthenaWorkgroup48F354CE": Object { 12 | "Value": "test-lake-workgroup-alpha", 13 | }, 14 | "datalakeDataLakeLogBucket7E4B6A14": Object { 15 | "Value": Object { 16 | "Ref": "datalakedatalakelogbucket0A814944", 17 | }, 18 | }, 19 | "datalaketestlakedatalakecreatorroleDataLakeDatabaseCreatorRoleC012854C": Object { 20 | "Value": Object { 21 | "Ref": "datalaketestlakedatalakecreatorroleAWSDBCreatorServiceRoletestlakedatalakecreatoralphaF5D800D9", 22 | }, 23 | }, 24 | }, 25 | "Parameters": Object { 26 | "BootstrapVersion": Object { 27 | "Default": "/cdk-bootstrap/hnb659fds/version", 28 | "Description": "Version of the CDK Bootstrap resources in this environment, automatically retrieved from SSM Parameter Store. [cdk:skip]", 29 | "Type": "AWS::SSM::Parameter::Value", 30 | }, 31 | }, 32 | "Resources": Object { 33 | "CustomS3AutoDeleteObjectsCustomResourceProviderHandler9D90184F": Object { 34 | "DependsOn": Array [ 35 | "CustomS3AutoDeleteObjectsCustomResourceProviderRole3B1BD092", 36 | ], 37 | "Properties": Object { 38 | "Code": Object { 39 | "S3Bucket": Object { 40 | "Fn::Sub": "cdk-hnb659fds-assets-\${AWS::AccountId}-\${AWS::Region}", 41 | }, 42 | "S3Key": "6babbac1f25446ab4660ead0ad5972e3a7742f50c6d8326af98a8bcd5d485335.zip", 43 | }, 44 | "Description": Object { 45 | "Fn::Join": Array [ 46 | "", 47 | Array [ 48 | "Lambda function for auto-deleting objects in ", 49 | Object { 50 | "Ref": "datalakedatalakelogbucket0A814944", 51 | }, 52 | " S3 bucket.", 53 | ], 54 | ], 55 | }, 56 | "Handler": "__entrypoint__.handler", 57 | "MemorySize": 128, 58 | "Role": Object { 59 | "Fn::GetAtt": Array [ 60 | "CustomS3AutoDeleteObjectsCustomResourceProviderRole3B1BD092", 61 | "Arn", 62 | ], 63 | }, 64 | "Runtime": "nodejs14.x", 65 | "Timeout": 900, 66 | }, 67 | "Type": "AWS::Lambda::Function", 68 | }, 69 | "CustomS3AutoDeleteObjectsCustomResourceProviderRole3B1BD092": Object { 70 | "Properties": Object { 71 | "AssumeRolePolicyDocument": Object { 72 | "Statement": Array [ 73 | Object { 74 | "Action": "sts:AssumeRole", 75 | "Effect": "Allow", 76 | "Principal": Object { 77 | "Service": "lambda.amazonaws.com", 78 | }, 79 | }, 80 | ], 81 | "Version": "2012-10-17", 82 | }, 83 | "ManagedPolicyArns": Array [ 84 | Object { 85 | "Fn::Sub": "arn:\${AWS::Partition}:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole", 86 | }, 87 | ], 88 | }, 89 | "Type": "AWS::IAM::Role", 90 | }, 91 | "datalakedatalakelogbucket0A814944": Object { 92 | "DeletionPolicy": "Delete", 93 | "Properties": Object { 94 | "AccessControl": "LogDeliveryWrite", 95 | "BucketName": "test-lake-log-bucket-alpha", 96 | "LifecycleConfiguration": Object { 97 | "Rules": Array [ 98 | Object { 99 | "ExpirationInDays": 7, 100 | "Status": "Enabled", 101 | }, 102 | ], 103 | }, 104 | "Tags": Array [ 105 | Object { 106 | "Key": "aws-cdk:auto-delete-objects", 107 | "Value": "true", 108 | }, 109 | ], 110 | }, 111 | "Type": "AWS::S3::Bucket", 112 | "UpdateReplacePolicy": "Delete", 113 | }, 114 | "datalakedatalakelogbucketAutoDeleteObjectsCustomResource3DCF9F10": Object { 115 | "DeletionPolicy": "Delete", 116 | "DependsOn": Array [ 117 | "datalakedatalakelogbucketPolicyD1691576", 118 | ], 119 | "Properties": Object { 120 | "BucketName": Object { 121 | "Ref": "datalakedatalakelogbucket0A814944", 122 | }, 123 | "ServiceToken": Object { 124 | "Fn::GetAtt": Array [ 125 | "CustomS3AutoDeleteObjectsCustomResourceProviderHandler9D90184F", 126 | "Arn", 127 | ], 128 | }, 129 | }, 130 | "Type": "Custom::S3AutoDeleteObjects", 131 | "UpdateReplacePolicy": "Delete", 132 | }, 133 | "datalakedatalakelogbucketPolicyD1691576": Object { 134 | "Properties": Object { 135 | "Bucket": Object { 136 | "Ref": "datalakedatalakelogbucket0A814944", 137 | }, 138 | "PolicyDocument": Object { 139 | "Statement": Array [ 140 | Object { 141 | "Action": Array [ 142 | "s3:GetBucket*", 143 | "s3:List*", 144 | "s3:DeleteObject*", 145 | ], 146 | "Effect": "Allow", 147 | "Principal": Object { 148 | "AWS": Object { 149 | "Fn::GetAtt": Array [ 150 | "CustomS3AutoDeleteObjectsCustomResourceProviderRole3B1BD092", 151 | "Arn", 152 | ], 153 | }, 154 | }, 155 | "Resource": Array [ 156 | Object { 157 | "Fn::GetAtt": Array [ 158 | "datalakedatalakelogbucket0A814944", 159 | "Arn", 160 | ], 161 | }, 162 | Object { 163 | "Fn::Join": Array [ 164 | "", 165 | Array [ 166 | Object { 167 | "Fn::GetAtt": Array [ 168 | "datalakedatalakelogbucket0A814944", 169 | "Arn", 170 | ], 171 | }, 172 | "/*", 173 | ], 174 | ], 175 | }, 176 | ], 177 | }, 178 | ], 179 | "Version": "2012-10-17", 180 | }, 181 | }, 182 | "Type": "AWS::S3::BucketPolicy", 183 | }, 184 | "datalakelfdatalakeroleadminsettings2C1ACF95": Object { 185 | "DependsOn": Array [ 186 | "datalaketestlakedatalakeadminroledatalakeadministratorrole0CE6C3E6", 187 | ], 188 | "Properties": Object { 189 | "Admins": Array [ 190 | Object { 191 | "DataLakePrincipalIdentifier": Object { 192 | "Fn::GetAtt": Array [ 193 | "datalaketestlakedatalakeadminroledatalakeadministratorrole0CE6C3E6", 194 | "Arn", 195 | ], 196 | }, 197 | }, 198 | ], 199 | }, 200 | "Type": "AWS::LakeFormation::DataLakeSettings", 201 | }, 202 | "datalakereviewsproductdatabaseC2ED5A77": Object { 203 | "Properties": Object { 204 | "CatalogId": Object { 205 | "Ref": "AWS::AccountId", 206 | }, 207 | "DatabaseInput": Object { 208 | "Name": "reviews-product", 209 | }, 210 | }, 211 | "Type": "AWS::Glue::Database", 212 | }, 213 | "datalakereviewsproductlfdbcreatorpermission342BCB9F": Object { 214 | "DependsOn": Array [ 215 | "datalakereviewsproductdatabaseC2ED5A77", 216 | ], 217 | "Properties": Object { 218 | "DataLakePrincipal": Object { 219 | "DataLakePrincipalIdentifier": Object { 220 | "Fn::GetAtt": Array [ 221 | "datalaketestlakedatalakecreatorroleAWSDBCreatorServiceRoletestlakedatalakecreatoralphaF5D800D9", 222 | "Arn", 223 | ], 224 | }, 225 | }, 226 | "Permissions": Array [ 227 | "ALTER", 228 | "CREATE_TABLE", 229 | "DROP", 230 | ], 231 | "Resource": Object { 232 | "DatabaseResource": Object { 233 | "Name": "reviews-product", 234 | }, 235 | }, 236 | }, 237 | "Type": "AWS::LakeFormation::Permissions", 238 | }, 239 | "datalaketaxiproductdatabase4B8A23BF": Object { 240 | "Properties": Object { 241 | "CatalogId": Object { 242 | "Ref": "AWS::AccountId", 243 | }, 244 | "DatabaseInput": Object { 245 | "Name": "taxi-product", 246 | }, 247 | }, 248 | "Type": "AWS::Glue::Database", 249 | }, 250 | "datalaketaxiproductlfdbcreatorpermission5C31145A": Object { 251 | "DependsOn": Array [ 252 | "datalaketaxiproductdatabase4B8A23BF", 253 | ], 254 | "Properties": Object { 255 | "DataLakePrincipal": Object { 256 | "DataLakePrincipalIdentifier": Object { 257 | "Fn::GetAtt": Array [ 258 | "datalaketestlakedatalakecreatorroleAWSDBCreatorServiceRoletestlakedatalakecreatoralphaF5D800D9", 259 | "Arn", 260 | ], 261 | }, 262 | }, 263 | "Permissions": Array [ 264 | "ALTER", 265 | "CREATE_TABLE", 266 | "DROP", 267 | ], 268 | "Resource": Object { 269 | "DatabaseResource": Object { 270 | "Name": "taxi-product", 271 | }, 272 | }, 273 | }, 274 | "Type": "AWS::LakeFormation::Permissions", 275 | }, 276 | "datalaketestlakedatalakeadminroledatalakeadministratorTBACC6D7714B": Object { 277 | "Properties": Object { 278 | "PolicyDocument": Object { 279 | "Statement": Array [ 280 | Object { 281 | "Action": Array [ 282 | "lakeformation:AddLFTagsToResource", 283 | "lakeformation:RemoveLFTagsFromResource", 284 | "lakeformation:GetResourceLFTags", 285 | "lakeformation:ListLFTags", 286 | "lakeformation:CreateLFTag", 287 | "lakeformation:GetLFTag", 288 | "lakeformation:UpdateLFTag", 289 | "lakeformation:DeleteLFTag", 290 | "lakeformation:SearchTablesByLFTags", 291 | "lakeformation:SearchDatabasesByLFTags", 292 | ], 293 | "Effect": "Allow", 294 | "Resource": "*", 295 | }, 296 | ], 297 | "Version": "2012-10-17", 298 | }, 299 | "PolicyName": "datalaketestlakedatalakeadminroledatalakeadministratorTBACC6D7714B", 300 | "Roles": Array [ 301 | Object { 302 | "Ref": "datalaketestlakedatalakeadminroledatalakeadministratorrole0CE6C3E6", 303 | }, 304 | ], 305 | }, 306 | "Type": "AWS::IAM::Policy", 307 | }, 308 | "datalaketestlakedatalakeadminroledatalakeadministratorbasic42E4DDDE": Object { 309 | "Properties": Object { 310 | "PolicyDocument": Object { 311 | "Statement": Array [ 312 | Object { 313 | "Action": "iam:CreateServiceLinkedRole", 314 | "Condition": Object { 315 | "StringEquals": Object { 316 | "iam:AWSServiceName": "lakeformation.amazonaws.com", 317 | }, 318 | }, 319 | "Effect": "Allow", 320 | "Resource": "*", 321 | }, 322 | Object { 323 | "Action": "iam:PutRolePolicy", 324 | "Effect": "Allow", 325 | "Resource": Object { 326 | "Fn::Join": Array [ 327 | "", 328 | Array [ 329 | "arn:aws:iam::", 330 | Object { 331 | "Ref": "AWS::AccountId", 332 | }, 333 | ":role/aws-service-role/lakeformation.amazonaws.com/AWSServiceRoleForLakeFormationDataAccess", 334 | ], 335 | ], 336 | }, 337 | }, 338 | ], 339 | "Version": "2012-10-17", 340 | }, 341 | "PolicyName": "datalaketestlakedatalakeadminroledatalakeadministratorbasic42E4DDDE", 342 | "Roles": Array [ 343 | Object { 344 | "Ref": "datalaketestlakedatalakeadminroledatalakeadministratorrole0CE6C3E6", 345 | }, 346 | ], 347 | }, 348 | "Type": "AWS::IAM::Policy", 349 | }, 350 | "datalaketestlakedatalakeadminroledatalakeadministratorcrossaccountCA5F9CEB": Object { 351 | "Properties": Object { 352 | "PolicyDocument": Object { 353 | "Statement": Array [ 354 | Object { 355 | "Action": Array [ 356 | "ram:AcceptResourceShareInvitation", 357 | "ram:RejectResourceShareInvitation", 358 | "ec2:DescribeAvailabilityZones", 359 | "ram:EnableSharingWithAwsOrganization", 360 | ], 361 | "Effect": "Allow", 362 | "Resource": "*", 363 | }, 364 | ], 365 | "Version": "2012-10-17", 366 | }, 367 | "PolicyName": "datalaketestlakedatalakeadminroledatalakeadministratorcrossaccountCA5F9CEB", 368 | "Roles": Array [ 369 | Object { 370 | "Ref": "datalaketestlakedatalakeadminroledatalakeadministratorrole0CE6C3E6", 371 | }, 372 | ], 373 | }, 374 | "Type": "AWS::IAM::Policy", 375 | }, 376 | "datalaketestlakedatalakeadminroledatalakeadministratorlambdawriteCWlogs84641127": Object { 377 | "Properties": Object { 378 | "PolicyDocument": Object { 379 | "Statement": Array [ 380 | Object { 381 | "Action": Array [ 382 | "logs:CreateLogGroup", 383 | "logs:CreateLogStream", 384 | "logs:PutLogEvents", 385 | ], 386 | "Effect": "Allow", 387 | "Resource": "*", 388 | "Sid": "AllowLogging", 389 | }, 390 | ], 391 | "Version": "2012-10-17", 392 | }, 393 | "PolicyName": "datalaketestlakedatalakeadminroledatalakeadministratorlambdawriteCWlogs84641127", 394 | "Roles": Array [ 395 | Object { 396 | "Ref": "datalaketestlakedatalakeadminroledatalakeadministratorrole0CE6C3E6", 397 | }, 398 | ], 399 | }, 400 | "Type": "AWS::IAM::Policy", 401 | }, 402 | "datalaketestlakedatalakeadminroledatalakeadministratorrole0CE6C3E6": Object { 403 | "Properties": Object { 404 | "AssumeRolePolicyDocument": Object { 405 | "Statement": Array [ 406 | Object { 407 | "Action": "sts:AssumeRole", 408 | "Effect": "Allow", 409 | "Principal": Object { 410 | "Service": "lakeformation.amazonaws.com", 411 | }, 412 | }, 413 | Object { 414 | "Action": "sts:AssumeRole", 415 | "Effect": "Allow", 416 | "Principal": Object { 417 | "Service": "lambda.amazonaws.com", 418 | }, 419 | }, 420 | Object { 421 | "Action": "sts:AssumeRole", 422 | "Effect": "Allow", 423 | "Principal": Object { 424 | "Service": "sagemaker.amazonaws.com", 425 | }, 426 | }, 427 | ], 428 | "Version": "2012-10-17", 429 | }, 430 | "ManagedPolicyArns": Array [ 431 | Object { 432 | "Fn::Join": Array [ 433 | "", 434 | Array [ 435 | "arn:", 436 | Object { 437 | "Ref": "AWS::Partition", 438 | }, 439 | ":iam::aws:policy/AWSLakeFormationDataAdmin", 440 | ], 441 | ], 442 | }, 443 | Object { 444 | "Fn::Join": Array [ 445 | "", 446 | Array [ 447 | "arn:", 448 | Object { 449 | "Ref": "AWS::Partition", 450 | }, 451 | ":iam::aws:policy/AWSGlueConsoleFullAccess", 452 | ], 453 | ], 454 | }, 455 | Object { 456 | "Fn::Join": Array [ 457 | "", 458 | Array [ 459 | "arn:", 460 | Object { 461 | "Ref": "AWS::Partition", 462 | }, 463 | ":iam::aws:policy/CloudWatchLogsReadOnlyAccess", 464 | ], 465 | ], 466 | }, 467 | Object { 468 | "Fn::Join": Array [ 469 | "", 470 | Array [ 471 | "arn:", 472 | Object { 473 | "Ref": "AWS::Partition", 474 | }, 475 | ":iam::aws:policy/AWSLakeFormationCrossAccountManager", 476 | ], 477 | ], 478 | }, 479 | Object { 480 | "Fn::Join": Array [ 481 | "", 482 | Array [ 483 | "arn:", 484 | Object { 485 | "Ref": "AWS::Partition", 486 | }, 487 | ":iam::aws:policy/AmazonAthenaFullAccess", 488 | ], 489 | ], 490 | }, 491 | Object { 492 | "Fn::Join": Array [ 493 | "", 494 | Array [ 495 | "arn:", 496 | Object { 497 | "Ref": "AWS::Partition", 498 | }, 499 | ":iam::aws:policy/AmazonSageMakerFullAccess", 500 | ], 501 | ], 502 | }, 503 | ], 504 | "RoleName": "test-lake-datalake-admin-alpha", 505 | }, 506 | "Type": "AWS::IAM::Role", 507 | }, 508 | "datalaketestlakedatalakecreatorroleAWSDBCreatorServiceRoletestlakedatalakecreatoralphaDefaultPolicy03434DE7": Object { 509 | "Properties": Object { 510 | "PolicyDocument": Object { 511 | "Statement": Array [ 512 | Object { 513 | "Action": "lakeformation:GetDataAccess", 514 | "Effect": "Allow", 515 | "Resource": "*", 516 | }, 517 | ], 518 | "Version": "2012-10-17", 519 | }, 520 | "PolicyName": "datalaketestlakedatalakecreatorroleAWSDBCreatorServiceRoletestlakedatalakecreatoralphaDefaultPolicy03434DE7", 521 | "Roles": Array [ 522 | Object { 523 | "Ref": "datalaketestlakedatalakecreatorroleAWSDBCreatorServiceRoletestlakedatalakecreatoralphaF5D800D9", 524 | }, 525 | ], 526 | }, 527 | "Type": "AWS::IAM::Policy", 528 | }, 529 | "datalaketestlakedatalakecreatorroleAWSDBCreatorServiceRoletestlakedatalakecreatoralphaF5D800D9": Object { 530 | "Properties": Object { 531 | "AssumeRolePolicyDocument": Object { 532 | "Statement": Array [ 533 | Object { 534 | "Action": "sts:AssumeRole", 535 | "Effect": "Allow", 536 | "Principal": Object { 537 | "Service": "glue.amazonaws.com", 538 | }, 539 | }, 540 | ], 541 | "Version": "2012-10-17", 542 | }, 543 | "ManagedPolicyArns": Array [ 544 | Object { 545 | "Fn::Join": Array [ 546 | "", 547 | Array [ 548 | "arn:", 549 | Object { 550 | "Ref": "AWS::Partition", 551 | }, 552 | ":iam::aws:policy/service-role/AWSGlueServiceRole", 553 | ], 554 | ], 555 | }, 556 | Object { 557 | "Fn::Join": Array [ 558 | "", 559 | Array [ 560 | "arn:", 561 | Object { 562 | "Ref": "AWS::Partition", 563 | }, 564 | ":iam::aws:policy/AWSLakeFormationDataAdmin", 565 | ], 566 | ], 567 | }, 568 | Object { 569 | "Fn::Join": Array [ 570 | "", 571 | Array [ 572 | "arn:", 573 | Object { 574 | "Ref": "AWS::Partition", 575 | }, 576 | ":iam::aws:policy/AmazonS3FullAccess", 577 | ], 578 | ], 579 | }, 580 | ], 581 | "Path": "/service-role/", 582 | "RoleName": "test-lake-datalake-creator-alpha", 583 | }, 584 | "Type": "AWS::IAM::Role", 585 | }, 586 | "datalakeworkgroup682714D0": Object { 587 | "Properties": Object { 588 | "Description": "Default Data Lake Workgroup", 589 | "Name": "test-lake-workgroup-alpha", 590 | "RecursiveDeleteOption": true, 591 | "State": "ENABLED", 592 | "WorkGroupConfiguration": Object { 593 | "EnforceWorkGroupConfiguration": true, 594 | "EngineVersion": Object { 595 | "EffectiveEngineVersion": "Athena engine version 2", 596 | "SelectedEngineVersion": "Athena engine version 2", 597 | }, 598 | "ResultConfiguration": Object { 599 | "OutputLocation": Object { 600 | "Fn::Join": Array [ 601 | "", 602 | Array [ 603 | "s3://", 604 | Object { 605 | "Ref": "datalakedatalakelogbucket0A814944", 606 | }, 607 | "/results/", 608 | ], 609 | ], 610 | }, 611 | }, 612 | }, 613 | }, 614 | "Type": "AWS::Athena::WorkGroup", 615 | }, 616 | "reviewsdatasetstackNestedStackreviewsdatasetstackNestedStackResource3B660F85": Object { 617 | "DeletionPolicy": "Delete", 618 | "Properties": Object { 619 | "Parameters": Object { 620 | "referencetotestdatalakedatalakelogbucket7EE4523BRef": Object { 621 | "Ref": "datalakedatalakelogbucket0A814944", 622 | }, 623 | "referencetotestdatalaketestlakedatalakeadminroledatalakeadministratorrole4029157BArn": Object { 624 | "Fn::GetAtt": Array [ 625 | "datalaketestlakedatalakeadminroledatalakeadministratorrole0CE6C3E6", 626 | "Arn", 627 | ], 628 | }, 629 | "referencetotestdatalaketestlakedatalakecreatorroleAWSDBCreatorServiceRoletestlakedatalakecreatoralpha2AEEF09DArn": Object { 630 | "Fn::GetAtt": Array [ 631 | "datalaketestlakedatalakecreatorroleAWSDBCreatorServiceRoletestlakedatalakecreatoralphaF5D800D9", 632 | "Arn", 633 | ], 634 | }, 635 | }, 636 | "TemplateURL": Object { 637 | "Fn::Join": Array [ 638 | "", 639 | Array [ 640 | "https://s3.", 641 | Object { 642 | "Ref": "AWS::Region", 643 | }, 644 | ".", 645 | Object { 646 | "Ref": "AWS::URLSuffix", 647 | }, 648 | "/", 649 | Object { 650 | "Fn::Sub": "cdk-hnb659fds-assets-\${AWS::AccountId}-\${AWS::Region}", 651 | }, 652 | "/16329d110accc6fc933f0f598753c61fbe14b9d76cf40fe11bd91cb045acf693.json", 653 | ], 654 | ], 655 | }, 656 | }, 657 | "Type": "AWS::CloudFormation::Stack", 658 | "UpdateReplacePolicy": "Delete", 659 | }, 660 | "taxigreendatasetstackNestedStacktaxigreendatasetstackNestedStackResource56C75800": Object { 661 | "DeletionPolicy": "Delete", 662 | "Properties": Object { 663 | "Parameters": Object { 664 | "referencetotestdatalakedatalakelogbucket7EE4523BRef": Object { 665 | "Ref": "datalakedatalakelogbucket0A814944", 666 | }, 667 | "referencetotestdatalaketestlakedatalakeadminroledatalakeadministratorrole4029157BArn": Object { 668 | "Fn::GetAtt": Array [ 669 | "datalaketestlakedatalakeadminroledatalakeadministratorrole0CE6C3E6", 670 | "Arn", 671 | ], 672 | }, 673 | "referencetotestdatalaketestlakedatalakecreatorroleAWSDBCreatorServiceRoletestlakedatalakecreatoralpha2AEEF09DArn": Object { 674 | "Fn::GetAtt": Array [ 675 | "datalaketestlakedatalakecreatorroleAWSDBCreatorServiceRoletestlakedatalakecreatoralphaF5D800D9", 676 | "Arn", 677 | ], 678 | }, 679 | }, 680 | "TemplateURL": Object { 681 | "Fn::Join": Array [ 682 | "", 683 | Array [ 684 | "https://s3.", 685 | Object { 686 | "Ref": "AWS::Region", 687 | }, 688 | ".", 689 | Object { 690 | "Ref": "AWS::URLSuffix", 691 | }, 692 | "/", 693 | Object { 694 | "Fn::Sub": "cdk-hnb659fds-assets-\${AWS::AccountId}-\${AWS::Region}", 695 | }, 696 | "/3bd9052ea75414d15d886bac69f40cba9ebb7a00dd4d38d64351ce8c5885e842.json", 697 | ], 698 | ], 699 | }, 700 | }, 701 | "Type": "AWS::CloudFormation::Stack", 702 | "UpdateReplacePolicy": "Delete", 703 | }, 704 | "taxiyellowdatasetstackNestedStacktaxiyellowdatasetstackNestedStackResource56DD6B4B": Object { 705 | "DeletionPolicy": "Delete", 706 | "Properties": Object { 707 | "Parameters": Object { 708 | "referencetotestdatalakedatalakelogbucket7EE4523BRef": Object { 709 | "Ref": "datalakedatalakelogbucket0A814944", 710 | }, 711 | "referencetotestdatalaketestlakedatalakeadminroledatalakeadministratorrole4029157BArn": Object { 712 | "Fn::GetAtt": Array [ 713 | "datalaketestlakedatalakeadminroledatalakeadministratorrole0CE6C3E6", 714 | "Arn", 715 | ], 716 | }, 717 | "referencetotestdatalaketestlakedatalakecreatorroleAWSDBCreatorServiceRoletestlakedatalakecreatoralpha2AEEF09DArn": Object { 718 | "Fn::GetAtt": Array [ 719 | "datalaketestlakedatalakecreatorroleAWSDBCreatorServiceRoletestlakedatalakecreatoralphaF5D800D9", 720 | "Arn", 721 | ], 722 | }, 723 | }, 724 | "TemplateURL": Object { 725 | "Fn::Join": Array [ 726 | "", 727 | Array [ 728 | "https://s3.", 729 | Object { 730 | "Ref": "AWS::Region", 731 | }, 732 | ".", 733 | Object { 734 | "Ref": "AWS::URLSuffix", 735 | }, 736 | "/", 737 | Object { 738 | "Fn::Sub": "cdk-hnb659fds-assets-\${AWS::AccountId}-\${AWS::Region}", 739 | }, 740 | "/47773d6a5cd15609edd23b3c773b54bcfaa6c607b5008692a7450eabf3ae6a5b.json", 741 | ], 742 | ], 743 | }, 744 | }, 745 | "Type": "AWS::CloudFormation::Stack", 746 | "UpdateReplacePolicy": "Delete", 747 | }, 748 | }, 749 | "Rules": Object { 750 | "CheckBootstrapVersion": Object { 751 | "Assertions": Array [ 752 | Object { 753 | "Assert": Object { 754 | "Fn::Not": Array [ 755 | Object { 756 | "Fn::Contains": Array [ 757 | Array [ 758 | "1", 759 | "2", 760 | "3", 761 | "4", 762 | "5", 763 | ], 764 | Object { 765 | "Ref": "BootstrapVersion", 766 | }, 767 | ], 768 | }, 769 | ], 770 | }, 771 | "AssertDescription": "CDK bootstrap stack version 6 required. Please run 'cdk bootstrap' with a recent version of the CDK CLI.", 772 | }, 773 | ], 774 | }, 775 | }, 776 | } 777 | `; 778 | --------------------------------------------------------------------------------