├── .gitignore
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── sagemaker_gen_ai_architecture.png
└── source
    ├── bin
        └── source.ts
    ├── cdk.json
    ├── lib
        └── sagemaker_domain.ts
    ├── notebooks
        ├── generative_ai_security_lake.ipynb
        ├── notebooks.zip
        └── requirements.txt
    ├── package.json
    └── tsconfig.json


/.gitignore:
--------------------------------------------------------------------------------
  1 | node_modules
  2 | dist/
  3 | build/
  4 | **/.vscode/*
  5 | reports/
  6 | coverage/
  7 | .aws-sam/
  8 | output/
  9 | .DS_Store
 10 | .lcov
 11 | 
 12 | *.d.ts
 13 | 
 14 | # CDK asset staging directory
 15 | .cdk.staging
 16 | cdk.out
 17 | 
 18 | cdk.context.json
 19 | .scannerwork
 20 | 
 21 | 
 22 | ### Python ###
 23 | # Byte-compiled / optimized / DLL files
 24 | __pycache__/
 25 | *.py[cod]
 26 | *$py.class
 27 | 
 28 | # C extensions
 29 | *.so
 30 | 
 31 | # Distribution / packaging
 32 | .Python
 33 | build/
 34 | develop-eggs/
 35 | dist/
 36 | downloads/
 37 | eggs/
 38 | .eggs/
 39 | lib64/
 40 | parts/
 41 | sdist/
 42 | var/
 43 | wheels/
 44 | *.egg-info/
 45 | .installed.cfg
 46 | *.egg
 47 | 
 48 | # PyInstaller
 49 | #  Usually these files are written by a python script from a template
 50 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 51 | *.manifest
 52 | *.spec
 53 | 
 54 | # Installer logs
 55 | pip-log.txt
 56 | pip-delete-this-directory.txt
 57 | 
 58 | # Unit test / coverage reports
 59 | htmlcov/
 60 | .tox/
 61 | .coverage
 62 | .coverage.*
 63 | .cache
 64 | .pytest_cache/
 65 | nosetests.xml
 66 | coverage.xml
 67 | *.cover
 68 | .hypothesis/
 69 | 
 70 | # Translations
 71 | *.mo
 72 | *.pot
 73 | 
 74 | # Flask stuff:
 75 | instance/
 76 | .webassets-cache
 77 | 
 78 | # Scrapy stuff:
 79 | .scrapy
 80 | 
 81 | # Sphinx documentation
 82 | docs/_build/
 83 | 
 84 | # PyBuilder
 85 | target/
 86 | 
 87 | # Jupyter Notebook
 88 | .ipynb_checkpoints
 89 | 
 90 | # pyenv
 91 | # need this to ensure python version
 92 | .python-version
 93 | 
 94 | # celery beat schedule file
 95 | celerybeat-schedule.*
 96 | 
 97 | # SageMath parsed files
 98 | *.sage.py
 99 | 
100 | # Environments
101 | .env
102 | .venv
103 | env/
104 | venv/
105 | ENV/
106 | env.bak/
107 | venv.bak/
108 | .build/
109 | .idea/


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.
5 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guidelines
 2 | 
 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
 4 | documentation, we greatly value feedback and contributions from our community.
 5 | 
 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
 7 | information to effectively respond to your bug report or contribution.
 8 | 
 9 | 
10 | ## Reporting Bugs/Feature Requests
11 | 
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 | 
14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 | 
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 | 
22 | 
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 | 
26 | 1. You are working against the latest source on the *main* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 | 
30 | To send us a pull request, please:
31 | 
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 | 
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 | 
42 | 
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start.
45 | 
46 | 
47 | ## Code of Conduct
48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
50 | opensource-codeofconduct@amazon.com with any additional questions or comments.
51 | 
52 | 
53 | ## Security issue notifications
54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
55 | 
56 | 
57 | ## Licensing
58 | 
59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.
60 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT No Attribution
 2 | 
 3 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 6 | this software and associated documentation files (the "Software"), to deal in
 7 | the Software without restriction, including without limitation the rights to
 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 9 | the Software, and to permit persons to whom the Software is furnished to do so.
10 | 
11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
13 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
14 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
15 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
16 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
17 | 
18 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # **Amazon Security Lake Generative AI**
  2 | 
  3 | The project deploys an [Amazon SageMaker Studio](https://aws.amazon.com/sagemaker/studio/) domain and foundational infrastructure to query and load [Amazon Security Lake](https://aws.amazon.com/security-lake/). Once deployed, you can use SageMaker notebooks to use [Amazon Bedrock](https://aws.amazon.com/bedrock/) generative artificial intelligence for use threat hunting and analysis with Amazon Security Lake.
  4 | 
  5 | By utilizing Bedrock's generative artificial intelligence capabilities to generate code and queries from natural language input, you will be able quickly utilize SageMaker's capabilities to explore and derive machine learning insights from your Security Lake data. By using all these AWS services together, you can idenfity different areas of interest to focus on and increase your overall security posture.
  6 | <br>
  7 | 
  8 | ## **Prerequisites**
  9 | 
 10 | 1. [Enable Amazon Security Lake](https://docs.aws.amazon.com/security-lake/latest/userguide/getting-started.html). For multiple AWS accounts, it is recommended to manage [Security Lake for AWS Organizations](https://docs.aws.amazon.com/security-lake/latest/userguide/multi-account-management.html) To help automate and streamline the management of multiple accounts, we strongly recommend that you integrate Security Lake with AWS Organizations.
 11 | 2. [Amazon Bedrock](https://docs.aws.amazon.com/bedrock/latest/userguide/setting-up.html) is available for use in the AWS account. Additionally, you need to [add model access for Claude v2](https://docs.aws.amazon.com/bedrock/latest/userguide/model-access.html#add-model-access). You will get an error message if you try to use a model before enabling it within your AWS account.
 12 | 3. [Subcriber Query Access](https://docs.aws.amazon.com/security-lake/latest/userguide/subscriber-query-access.html): Subscribers with query access can query data that Security Lake collects. These subscribers directly query AWS Lake Formation tables in your S3 bucket with services like Amazon Athena.
 13 | 4. Resource Linking: Create a Lake Formation database in AWS Subcriber account using resource linking
 14 |     - Go to Lake Formation in the Subscriber AWS account
 15 |     - Create a new database using resource linking
 16 |     - Enter Resource Link name
 17 |     - Enter Shared database name and shared database Owner ID and click create
 18 | <br><br>
 19 | 
 20 | ## **Solution Architecture**
 21 | ![Solution Architecture](/sagemaker_gen_ai_architecture.png)
 22 | 
 23 | 1. (Prerequisite) Security Lake is setup in a separate AWS account with the appropriate sources (i.e. Amazon Virtual Private Cloud (VPC) Flow Logs, AWS Security Hub, AWS CloudTrail, Amazon Route53) configured.
 24 | 2. (Prerequisite) Create subscriber query access from source Security Lake AWS account to Subscriber AWS account.
 25 | 3. (Prerequisite) Accepted resource share request in the Subscriber AWS account where this solution is deployed.
 26 | 4. (Prerequisite) Create a database link in Lake Formation in the Subscriber AWS account and grant access for the Athena tables in the Security Lake AWS account.
 27 | 5. (Prerequisite) Granted model access for Amazon Bedrock Large Language Model (LLM) Claude v2 in the AWS Subscriber account where the solution will be deployed.
 28 | 6. A VPC will be provisioned for SageMaker with an IGW, NAT GW, and VPC endpoints for all AWS services within the solution. IGW/NAT is required to install external open-source packages.A
 29 | 7. A SageMaker Studio domain is created in VPCOnly mode with a single SageMaker user-profile that is tied to an IAM role. As part of the SageMaker deployment, an EFS also gets provisioned for the SageMaker Domain.
 30 | 8. A dedicated IAM role is created to restrict access to create/access SageMaker Domain’s presigned URL from a specific CIDR for accessing the SageMaker notebook.
 31 | 9. CodeCommit repository containing python notebooks utilized for the AI/ML workflow by the SageMaker user-profile.
 32 | 10. Athena workgroup is created for Security Lake queries with a S3 bucket for output location (Access logging configured for the output bucket).
 33 | <br><br>
 34 | 
 35 | ## **Deploy Sagemaker Studio using CDK**
 36 | 
 37 | **Build**
 38 | 
 39 | To build this app, you need to be in the cdk project root folder [`source`](/source/). Then run the following:
 40 | 
 41 |     $ npm install -g aws-cdk
 42 |     <installs AWS CDK>
 43 | 
 44 |     $ npm install
 45 |     <installs appropriate packages>
 46 | 
 47 |     $ npm run build
 48 |     <build TypeScript files>
 49 | 
 50 | **Deploy**
 51 | 
 52 |     $ cdk bootstrap aws://<INSERT_AWS_ACCOUNT>/<INSERT_REGION>
 53 |     <build S3 bucket to store files to perform deployment>
 54 | 
 55 |     $ cdk deploy SageMakerDomainStack
 56 |     <deploys the cdk project into the authenticated AWS account>
 57 | 
 58 | As part of the CDK deployment, there is an Output value for the CodeCommit repo URL (sagemakernotebookgenairepositoryURL). You will need this value later on to get the python notebooks into your SageMaker app.
 59 | 
 60 | ## **Post Deployment Steps**
 61 | 
 62 | **Access to Security Lake**
 63 | 
 64 | Now that you have deployed the SageMaker solution, you will need to grant SageMaker's user-profile in your AWS account access to query Security Lake from the AWS account it was enabled in. We will use the "Grant" permisson to allow the Sagemaker user profile ARN to access Security Lake Database in Lake Formation within the Subscriber AWS account.
 65 | 
 66 | **Grant permisson to Security Lake Database**
 67 | 1. Copy ARN “arn:aws:iam::********************:role/sagemaker-user-profile-for-security-hub” 
 68 | 2. Go to Lake Formation in console
 69 | 3. Select the amazon_security_lake_glue_db_<YOUR-REGION>  database.
 70 |     1. For example, if your Security Lake is in us-east-1 the value would be amazon_security_lake_glue_db_us_east_1
 71 | 4. From the Actions  Dropdown, select Grant.
 72 | 5. In Grant Data  Permissions, select SAML Users and Groups.
 73 | 6. Paste the SageMaker user  profile ARN from Step 1.
 74 | 7. In Database  Permissions, select Describe and then Grant.
 75 | <br><br> 
 76 | 
 77 | **Grant permisson to Security Lake table(s)**
 78 | 1. Copy the SageMaker user-profile ARN “arn:aws:iam::********************:role/sagemaker-user-profile-for-security-lake” 
 79 | 2. Go to Lake Formation in console
 80 | 3. Select the amazon_security_lake_glue_db_<YOUR-REGION>  database.
 81 |     1. For example, if your Security Lake is in us-east-1 the value would be amazon_security_lake_glue_db_us_east_1
 82 | 4. Choose View Tables.
 83 | 5. Select the amazon_security_lake_table_<YOUR-REGION>_sh_findings_1_0  table.
 84 |     1. For example, if your Security Lake is in us-east-1 the value would be amazon_security_lake_table_us_east_1_sh_findings_1_0
 85 |     2. Note: Each table must be granted access individually. Selecting “All Tables“ will not grant the appropriate access needed to query Security Lake.
 86 | 6. From Actions Dropdown, select Grant.
 87 | 7. In Grant Data  Permissions, select SAML Users and Groups.
 88 | 8. Paste the SageMaker user-profile ARN from Step 1.
 89 | 9. In Table Permissions, select Describe and then Grant.
 90 | <br>
 91 | 
 92 | **CodeCommit**
 93 | - Note: The Output (sagemakernotebookgenairepositoryURL) from the CDK deployment will have the CodeCommit repo URL.
 94 | 
 95 | ##### Option 1: 
 96 | 1. Open your SageMaker Studio app 
 97 | 2. In Studio, in the left sidebar, choose the Git icon (identified by a diamond with two branches), then choose Clone a Repository.
 98 | 3. For the URI, enter the HTTPS URL (Output value for SageMakerDomainStack.sagemakernotebookgenairepositoryURL) of the CodeCommit repository, then choose Clone.
 99 | 4. In the left sidebar, choose the file browser icon. You will see a folder with the notebook repository
100 | 
101 | ##### Option 2:
102 | 1. Open your SageMaker Studio app 
103 | 2. In the top navigation bar, choose File >> New >> Terminal
104 | 3. Type in the following command: 
105 | 
106 |     `$ git clone <'Output value for SageMakerDomainStack.sagemakernotebookgenairepositoryURL'>`
107 |     <clones notebook repository>
108 | 
109 | <br>
110 | 
111 | ## **Using Generative AI and Sagemaker Studio**
112 | Now that you have completed the post deployment steps. You are ready to start using generative AI to assist with threat hunting and analysis. The python notebooks which are deployed as part of the solution provide a starting point for how you can conduct AI/ML analysis using data within Security Lake. These can be expanded to any native or custom data sources configured on Security Lake.
113 | <br>
114 | 
115 | ## Security
116 | See [CONTRIBUTING](https://github.com/aws-samples/aws-security-hub-correlation/blob/main/CONTRIBUTING.md#security-issue-notifications) for more information.
117 | 
118 | ## License
119 | This library is licensed under the MIT-0 License. See the LICENSE file.
120 | 


--------------------------------------------------------------------------------
/sagemaker_gen_ai_architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-generative-ai-analysis-amazon-security-lake/e96047b6876806c18ddc1e54241367a2ce5e8c1c/sagemaker_gen_ai_architecture.png


--------------------------------------------------------------------------------
/source/bin/source.ts:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env node
 2 | import 'source-map-support/register';
 3 | import * as cdk from 'aws-cdk-lib';
 4 | import { SageMakerDomainStack } from '../lib/sagemaker_domain';
 5 | import { AwsSolutionsChecks } from 'cdk-nag';
 6 | 
 7 | const app = new cdk.App();
 8 | 
 9 | new SageMakerDomainStack(app, 'SageMakerDomainStack', {
10 |   /* If you don't specify 'env', this stack will be environment-agnostic.
11 |    * Account/Region-dependent features and context lookups will not work,
12 |    * but a single synthesized template can be deployed anywhere. */
13 | 
14 |   /* Uncomment the next line to specialize this stack for the AWS Account
15 |    * and Region that are implied by the current CLI configuration. */
16 |   // env: { account: process.env.CDK_DEFAULT_ACCOUNT, region: process.env.CDK_DEFAULT_REGION },
17 | 
18 |   /* Uncomment the next line if you know exactly what Account and Region you
19 |    * want to deploy the stack to. */
20 |   // env: { account: '<INSERT ACCOUNT#>', region: '<INSERT REGION>' },
21 | 
22 |   /* For more information, see https://docs.aws.amazon.com/cdk/latest/guide/environments.html */
23 | });
24 | 
25 | cdk.Aspects.of(app).add(new AwsSolutionsChecks({ verbose: true}));
26 | app.synth()


--------------------------------------------------------------------------------
/source/cdk.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "app": "npx ts-node --prefer-ts-exts bin/source.ts",
 3 |   "watch": {
 4 |     "include": [
 5 |       "**"
 6 |     ],
 7 |     "exclude": [
 8 |       "README.md",
 9 |       "cdk*.json",
10 |       "**/*.d.ts",
11 |       "**/*.js",
12 |       "tsconfig.json",
13 |       "package*.json",
14 |       "yarn.lock",
15 |       "node_modules",
16 |       "test"
17 |     ]
18 |   },
19 |   "context": {
20 |     "@aws-cdk/aws-apigateway:usagePlanKeyOrderInsensitiveId": true,
21 |     "@aws-cdk/core:stackRelativeExports": true,
22 |     "@aws-cdk/aws-rds:lowercaseDbIdentifier": true,
23 |     "@aws-cdk/aws-lambda:recognizeVersionProps": true,
24 |     "@aws-cdk/aws-lambda:recognizeLayerVersion": true,
25 |     "@aws-cdk/aws-cloudfront:defaultSecurityPolicyTLSv1.2_2021": true,
26 |     "@aws-cdk-containers/ecs-service-extensions:enableDefaultLogDriver": true,
27 |     "@aws-cdk/aws-ec2:uniqueImdsv2TemplateName": true,
28 |     "@aws-cdk/core:checkSecretUsage": true,
29 |     "@aws-cdk/aws-iam:minimizePolicies": true,
30 |     "@aws-cdk/core:validateSnapshotRemovalPolicy": true,
31 |     "@aws-cdk/core:target-partitions": [
32 |       "aws",
33 |       "aws-cn"
34 |     ]
35 |   }
36 | }
37 | 


--------------------------------------------------------------------------------
/source/lib/sagemaker_domain.ts:
--------------------------------------------------------------------------------
  1 | import { CfnParameter, Stack, StackProps, Duration, RemovalPolicy, CfnOutput } from "aws-cdk-lib";
  2 | import * as iam from "aws-cdk-lib/aws-iam";
  3 | import { Construct } from "constructs";
  4 | import { Key } from "aws-cdk-lib/aws-kms";
  5 | import { CfnApp, CfnDomain, CfnUserProfile } from "aws-cdk-lib/aws-sagemaker";
  6 | import { join } from 'path';
  7 | import { FlowLogDestination, FlowLogTrafficType, Vpc, SubnetType, SecurityGroup, Peer, Port, InterfaceVpcEndpointAwsService, InterfaceVpcEndpointService } from "aws-cdk-lib/aws-ec2";
  8 | import { LogGroup, RetentionDays } from "aws-cdk-lib/aws-logs";
  9 | import * as codecommit from 'aws-cdk-lib/aws-codecommit';
 10 | import * as athena from 'aws-cdk-lib/aws-athena';
 11 | import { BlockPublicAccess, Bucket, BucketEncryption, ObjectOwnership, StorageClass } from "aws-cdk-lib/aws-s3";
 12 | import { EncryptionOption } from "aws-cdk-lib/aws-stepfunctions-tasks";
 13 | import { NagSuppressions } from 'cdk-nag';
 14 | 
 15 | export class SageMakerDomainStack extends Stack {
 16 |   constructor(scope: Construct, id: string, props?: StackProps) {
 17 |     super(scope, id, props);
 18 | 
 19 |     // Parameters to set
 20 |     const sagemaker_restrict_cidr_presigned_url = new CfnParameter(this, "sagemaker_restrict_cidr_presigned_url", {
 21 |       description: "The IP address to limit accessing SageMaker Studio presigned URL.",
 22 |       default: "0.0.0.0/0",
 23 |       type: "String"
 24 |     }); 
 25 | 
 26 |     const IAM_role_assumption_for_sagemaker_presigned_url = new CfnParameter(this, "IAM_role_assumption_for_sagemaker_presigned_url", {
 27 |       description: "IAM role to update the trust relationship to allow access to create and access SageMaker Studio's presigned URL.",
 28 | //      default: "arn:aws:iam::1234:role/Admin",
 29 |       type: "String"
 30 |     });
 31 | 
 32 |     const security_lake_aws_account = new CfnParameter(this, "security_lake_aws_account", {
 33 |       description: "AWS Account where Security Lake has been initially deployed and shared from.",
 34 | //      default: "1234",
 35 |       type: "String"
 36 |     });
 37 | 
 38 |     // CodeCommit repository
 39 |     const sagemaker_notebook_gen_ai_repository = new codecommit.Repository(this, 'sagemaker_notebook_gen_ai_repository', {
 40 |       repositoryName: 'sagemaker_gen_ai_repo',
 41 |       description: 'Repository for SageMaker notebooks to run analytics for Security Lake.',
 42 |       code: codecommit.Code.fromZipFile(join(__dirname, "../notebooks/notebooks.zip"), "main")
 43 |     });
 44 | 
 45 |     new CfnOutput(this,'sagemaker-notebook-gen-ai-repository-URL', {
 46 |       description:'The CodeCommit repository URL to clone within your SageMaker user-profile notebook.',
 47 |       value: sagemaker_notebook_gen_ai_repository.repositoryCloneUrlHttp
 48 |     })
 49 | 
 50 | 
 51 |     // KMS Key for S3 bucket
 52 |     const athena_s3_output_kms_key = new Key(this, "athena_s3_output_kms_key", {
 53 |       removalPolicy: RemovalPolicy.DESTROY,
 54 |       pendingWindow: Duration.days(7),
 55 |       description: "KMS key for S3 bucket to store athena workgroup output.",
 56 |       enableKeyRotation: true,
 57 |       alias: "athena_s3_output_kms_key"
 58 |     });
 59 | 
 60 |     // KMS Key for SageMaker Domain
 61 |     const sagemaker_kms_key = new Key(this, "sagemaker_kms_key", {
 62 |       removalPolicy: RemovalPolicy.DESTROY,
 63 |       pendingWindow: Duration.days(7),
 64 |       description: "KMS key for SageMaker Domain resources.",
 65 |       enableKeyRotation: true,
 66 |       alias: "sagemaker_domain_kms_key"
 67 |     });
 68 | 
 69 |     // Create new VPC with flow logs and Pub/Priv Subnets
 70 |     const cw_vpc_flow_logs_parameter = new CfnParameter(this, "cw_flow_logs_parameter", {
 71 |       type: "String",
 72 |       description: "The cloudwatch log group name for VPC flow logs.",
 73 |       default: "/aws/vpc/flowlogs/SageMakerDomainStack",
 74 |     });
 75 | 
 76 |     const cw_flow_logs = new LogGroup(this, "cw_flow_logs", {
 77 |       logGroupName: cw_vpc_flow_logs_parameter.valueAsString,
 78 |       removalPolicy: RemovalPolicy.DESTROY,
 79 |       retention: RetentionDays.ONE_YEAR,
 80 |       encryptionKey: sagemaker_kms_key
 81 |       });
 82 |     
 83 |     sagemaker_kms_key.addToResourcePolicy(new iam.PolicyStatement({
 84 |       actions: [
 85 |         "kms:Encrypt*",
 86 |         "kms:Decrypt*",
 87 |         "kms:ReEncrypt*",
 88 |         "kms:GenerateDataKey*",
 89 |         "kms:Describe*"
 90 |       ],
 91 |       resources: [
 92 |         "*"
 93 |       ],
 94 |       principals: [
 95 |         new iam.ServicePrincipal("logs." + this.region + ".amazonaws.com")
 96 |       ],
 97 |       conditions:{
 98 |         ArnEquals:{
 99 |           "kms:EncryptionContext:aws:logs:arn": [
100 |             "arn:aws:logs:" + this.region + ":" + this.account+ ":log-group:" + cw_vpc_flow_logs_parameter.valueAsString
101 |           ]
102 |         }} 
103 |     }));
104 | 
105 |     // Create SageMaker VPC
106 |     const sagemaker_vpc = new Vpc(this, "sagemaker_vpc", {
107 |       maxAzs: 2,
108 |       subnetConfiguration: [
109 |         {
110 |           cidrMask: 24,
111 |           name: "public_subnet_for_nat_gw",
112 |           subnetType: SubnetType.PUBLIC,
113 |           mapPublicIpOnLaunch: false
114 |         },
115 |         {
116 |           cidrMask: 24,
117 |           name: "workload_subnet_with_nat",
118 |           subnetType: SubnetType.PRIVATE_WITH_EGRESS,
119 |         },
120 |       ],
121 |       flowLogs: {
122 |         "s3": {
123 |           destination: FlowLogDestination.toCloudWatchLogs(cw_flow_logs),
124 |           trafficType: FlowLogTrafficType.ALL,
125 |       }}
126 |     });
127 | 
128 |     const sagemaker_workload_sg = new SecurityGroup(this, "sagemaker_workload_sg", {
129 |       vpc: sagemaker_vpc,
130 |       description: "SageMaker Workload SG",
131 |       allowAllOutbound: false,
132 |       securityGroupName: "sagemaker_workload_sg"
133 |     });
134 | 
135 |     sagemaker_workload_sg.connections.allowTo(sagemaker_workload_sg, Port.tcpRange(8192,65535), "Communication required with SageMaker service-owned VPC")
136 |     sagemaker_workload_sg.connections.allowTo(sagemaker_workload_sg, Port.udp(500), "Communication required with SageMaker service-owned VPC")
137 |     sagemaker_workload_sg.connections.allowTo(sagemaker_workload_sg, Port.esp(), "Communication required with SageMaker service-owned VPC")
138 |     sagemaker_workload_sg.connections.allowTo(Peer.anyIpv4(), Port.tcp(443), "Allow HTTPS Outbound for egress-only internet access")
139 |     sagemaker_workload_sg.connections.allowTo(Peer.anyIpv4(), Port.tcp(80), "Allow HTTP Outbound for egress-only internet access")
140 | 
141 |     sagemaker_workload_sg.connections.allowFrom(sagemaker_workload_sg, Port.tcpRange(8192,65535), "Communication required with SageMaker service-owned VPC")
142 |     sagemaker_workload_sg.connections.allowFrom(sagemaker_workload_sg, Port.udp(500), "Communication required with SageMaker service-owned VPC")
143 |     sagemaker_workload_sg.connections.allowFrom(sagemaker_workload_sg, Port.esp(), "Communication required with SageMaker service-owned VPC")
144 |     sagemaker_workload_sg.connections.allowFrom(sagemaker_workload_sg, Port.tcp(443), "Allow HTTPS Inbound for VPC interface endpoint")
145 | 
146 |     sagemaker_vpc.addInterfaceEndpoint("kms_endpoint",{
147 |       service: InterfaceVpcEndpointAwsService.KMS,
148 |       privateDnsEnabled: true,
149 |       subnets: {
150 |          subnets: [
151 |           sagemaker_vpc.selectSubnets({subnetGroupName: "workload_subnet_with_nat"}).subnets[0]
152 |          ]
153 |       },
154 |       securityGroups: (
155 |         [sagemaker_workload_sg]
156 |       )
157 |     });
158 | 
159 |     sagemaker_vpc.addInterfaceEndpoint("sagemaker_api_endpoint",{
160 |       service: InterfaceVpcEndpointAwsService.SAGEMAKER_API,
161 |       privateDnsEnabled: true,
162 |       subnets: {
163 |          subnets: [
164 |           sagemaker_vpc.selectSubnets({subnetGroupName: "workload_subnet_with_nat"}).subnets[0]
165 |          ]
166 |       },
167 |       securityGroups: (
168 |         [sagemaker_workload_sg]
169 |       )
170 |     });
171 | 
172 | 
173 |     sagemaker_vpc.addInterfaceEndpoint("sagemaker_runtime_endpoint",{
174 |       service: InterfaceVpcEndpointAwsService.SAGEMAKER_RUNTIME,
175 |       privateDnsEnabled: true,
176 |       subnets: {
177 |          subnets: [
178 |           sagemaker_vpc.selectSubnets({subnetGroupName: "workload_subnet_with_nat"}).subnets[0]
179 |          ]
180 |       },
181 |       securityGroups: (
182 |         [sagemaker_workload_sg]
183 |       )
184 |     });
185 | 
186 |     sagemaker_vpc.addInterfaceEndpoint("sagemaker_studio_endpoint",{
187 |       service: new InterfaceVpcEndpointService("aws.sagemaker." + this.region + ".studio", 443),
188 |       privateDnsEnabled: true,
189 |       subnets: {
190 |          subnets: [
191 |           sagemaker_vpc.selectSubnets({subnetGroupName: "workload_subnet_with_nat"}).subnets[0]
192 |          ]
193 |       },
194 |       securityGroups: (
195 |         [sagemaker_workload_sg]
196 |       )
197 |     });
198 | 
199 |     sagemaker_vpc.addInterfaceEndpoint("athena_endpoint",{
200 |       service: InterfaceVpcEndpointAwsService.ATHENA,
201 |       privateDnsEnabled: true,
202 |       subnets: {
203 |          subnets: [
204 |           sagemaker_vpc.selectSubnets({subnetGroupName: "workload_subnet_with_nat"}).subnets[0]
205 |          ]
206 |       },
207 |       securityGroups: (
208 |         [sagemaker_workload_sg]
209 |       )
210 |     });
211 | 
212 |     sagemaker_vpc.addInterfaceEndpoint("s3_endpoint",{
213 |       service: new InterfaceVpcEndpointService("com.amazonaws." + this.region + ".s3", 443),
214 |       subnets: {
215 |          subnets: [
216 |           sagemaker_vpc.selectSubnets({subnetGroupName: "workload_subnet_with_nat"}).subnets[0]
217 |          ]
218 |       },
219 |       securityGroups: (
220 |         [sagemaker_workload_sg]
221 |       )
222 |     });
223 | 
224 |     sagemaker_vpc.addInterfaceEndpoint("codecommit_endpoint",{
225 |       service: InterfaceVpcEndpointAwsService.CODECOMMIT,
226 |       subnets: {
227 |          subnets: [
228 |           sagemaker_vpc.selectSubnets({subnetGroupName: "workload_subnet_with_nat"}).subnets[0]
229 |          ]
230 |       },
231 |       securityGroups: (
232 |         [sagemaker_workload_sg]
233 |       )
234 |     });
235 | 
236 |     sagemaker_vpc.addInterfaceEndpoint("codecommit_git_endpoint",{
237 |       service: InterfaceVpcEndpointAwsService.CODECOMMIT_GIT,
238 |       subnets: {
239 |          subnets: [
240 |           sagemaker_vpc.selectSubnets({subnetGroupName: "workload_subnet_with_nat"}).subnets[0]
241 |          ]
242 |       },
243 |       securityGroups: (
244 |         [sagemaker_workload_sg]
245 |       )
246 |     });
247 | 
248 |     // S3 Bucket for Athena output
249 |     const s3_access_logs = new Bucket(this, 's3_access_logs', {
250 |       bucketName: 'athena-gen-ai-s3-access-logs-' + this.account,
251 |       removalPolicy: RemovalPolicy.DESTROY,
252 |       bucketKeyEnabled: true,
253 |       encryption: BucketEncryption.KMS_MANAGED,
254 |       enforceSSL: true,
255 |       versioned: true,
256 |       blockPublicAccess: BlockPublicAccess.BLOCK_ALL,
257 |       objectOwnership: ObjectOwnership.BUCKET_OWNER_PREFERRED,
258 |       publicReadAccess: false,
259 |       lifecycleRules: [{
260 |         expiration: Duration.days(365),
261 |         transitions: [{
262 |             storageClass: StorageClass.INTELLIGENT_TIERING,
263 |             transitionAfter: Duration.days(31)
264 |         }]
265 |     }]
266 |     });
267 | 
268 |     const athena_output_s3_bucket = new Bucket(this, 'athena_output_s3_bucket', {
269 |       bucketName: 'athena-gen-ai-bucket-results-' + this.account,
270 |       serverAccessLogsBucket: s3_access_logs,
271 |       removalPolicy: RemovalPolicy.DESTROY,
272 |       bucketKeyEnabled: true,
273 |       encryption: BucketEncryption.KMS,
274 |       encryptionKey: athena_s3_output_kms_key,
275 |       enforceSSL: true,
276 |       versioned: true,
277 |       blockPublicAccess: BlockPublicAccess.BLOCK_ALL,
278 |       objectOwnership: ObjectOwnership.BUCKET_OWNER_PREFERRED,
279 |       publicReadAccess: false,
280 |       lifecycleRules: [{
281 |         expiration: Duration.days(365),
282 |         transitions: [{
283 |             storageClass: StorageClass.INTELLIGENT_TIERING,
284 |             transitionAfter: Duration.days(31)
285 |         }]
286 |     }]
287 |     });
288 | 
289 |     // IAM Role for SageMaker user profiles
290 |     const sagemaker_user_profile_role = new iam.Role(this, "sagemaker_user_profile_role", {
291 |       assumedBy: [
292 |         new iam.ServicePrincipal("sagemaker.amazonaws.com"),
293 |         new iam.ServicePrincipal("bedrock.amazonaws.com"),
294 |     ],
295 |       roleName: "sagemaker-user-profile-for-security-lake",
296 |       managedPolicies: [
297 |       ]
298 |     });
299 | 
300 |     sagemaker_kms_key.addToResourcePolicy(new iam.PolicyStatement({
301 |       actions: [
302 |         "kms:DescribeKey",
303 |         "kms:Decrypt",
304 |         "kms:GenerateDataKey",
305 |         "kms:CreateGrant"
306 |       ],
307 |       resources: [
308 |         "*"
309 |       ],
310 |       principals: [
311 |         new iam.ArnPrincipal(sagemaker_user_profile_role.roleArn)
312 |       ]
313 |     }));
314 | 
315 |     const sagemaker_user_profile_policy = new iam.PolicyDocument({
316 |       statements: [
317 |         new iam.PolicyStatement({
318 |           sid: "CloudWatchLogGroupAllow",
319 |           effect: iam.Effect.ALLOW,
320 |           actions: [
321 |             "logs:CreateLogGroup",
322 |             "logs:CreateLogStream",
323 |             "logs:PutLogEvents"
324 |           ],
325 |           resources: [
326 |             "arn:aws:logs:" + this.region +":" + this.account + ":log-group:/aws/sagemaker/studio:*"
327 |           ]   
328 |         }),
329 |         new iam.PolicyStatement({
330 |           sid: "S3Read",
331 |           effect: iam.Effect.ALLOW,
332 |           actions: [
333 |             "s3:ListBucket",
334 |             "s3:GetObject",
335 |             "s3:GetBucketAcl",
336 |             "s3:GetBucketLocation"
337 |           ],
338 |           resources: [
339 |             "*"
340 |           ]   
341 |         }),
342 |         new iam.PolicyStatement({
343 |           sid: "S3WriteAllow",
344 |           effect: iam.Effect.ALLOW,
345 |           actions: [
346 |             "s3:AbortMultipartUpload",
347 |             "s3:DeleteObject",
348 |             "s3:PutObject",
349 |             "s3:PutObjectAcl"
350 |           ],
351 |           resources: [
352 |             athena_output_s3_bucket.bucketArn,
353 |             athena_output_s3_bucket.bucketArn + "/*"
354 |           ]   
355 |         }),
356 |         new iam.PolicyStatement({
357 |           sid: "AthenaReadAllow",
358 |           effect: iam.Effect.ALLOW,
359 |           actions: [
360 |             "athena:Get*",
361 |             "athena:List*",
362 |             "athena:StartQueryExecution",
363 |             "athena:StartSession",
364 |             "athena:StopQueryExecution",
365 |           ],
366 |           resources: [
367 |             "arn:aws:athena:" + this.region + ":" + this.account +":datacatalog/*",
368 |             "arn:aws:athena:" + this.region + ":" + this.account +":workgroup/*"
369 |           ]   
370 |         }),
371 |         new iam.PolicyStatement({
372 |           sid: "GlueWriteAllow",
373 |           effect: iam.Effect.ALLOW,
374 |           actions: [
375 |             "glue:CreateDatabase",
376 |             "glue:GetDatabase",
377 |             "glue:GetDatabases",
378 |             "glue:GetTable",
379 |             "glue:GetTables",
380 |             "glue:GetPartition",
381 |             "glue:GetPartitions",
382 |             "glue:BatchGetPartition"
383 |           ],
384 |           resources: [
385 |             "arn:aws:glue:" + this.region + ":" + this.account +":database/*",
386 |             "arn:aws:glue:" + this.region + ":" + this.account +":table/*",
387 |             "arn:aws:glue:" + this.region + ":" + this.account +":catalog",
388 |             "arn:aws:glue:" + this.region + ":" + security_lake_aws_account.valueAsString +":database/*",
389 |             "arn:aws:glue:" + this.region + ":" + security_lake_aws_account.valueAsString +":table/*",
390 |             "arn:aws:glue:" + this.region + ":" + security_lake_aws_account.valueAsString +":catalog",
391 |           ]   
392 |         }),
393 |         new iam.PolicyStatement({
394 |           sid: "LakeFormationAllow",
395 |           effect: iam.Effect.ALLOW,
396 |           actions: [
397 |             "lakeformation:GetDataAccess"
398 |           ],
399 |           resources: [
400 |             "*"
401 |           ]   
402 |         }),
403 |         new iam.PolicyStatement({
404 |           sid: "CodeCommitWriteAllow",
405 |           effect: iam.Effect.ALLOW,
406 |           actions: [
407 |             "codecommit:BatchGet*",
408 |             "codecommit:Describe*",
409 |             "codecommit:Get*",
410 |             "codecommit:List*",
411 |             "codecommit:GitPull",
412 |             "codecommit:GitPush",
413 |             "codecommit:CreateBranch",
414 |             "codecommit:DeleteBranch",
415 |             "codecommit:MergeBranchesBy*",
416 |             "codecommit:UpdateDefaultBranch",
417 |             "codecommit:BatchDescribeMergeConflicts",
418 |             "codecommit:CreateUnreferencedMergeCommit",
419 |             "codecommit:CreateCommit",
420 |             "codecommit:CreatePullRequest",
421 |             "codecommit:CreatePullRequestApprovalRule",
422 |             "codecommit:DeletePullRequestApprovalRule",
423 |             "codecommit:EvaluatePullRequestApprovalRules",
424 |             "codecommit:MergePullRequestBy*",
425 |             "codecommit:PostCommentForPullRequest",
426 |             "codecommit:UpdatePullRequest*",
427 |             "codecommit:PutFile"
428 |           ],
429 |           resources: [
430 |             sagemaker_notebook_gen_ai_repository.repositoryArn
431 |           ]   
432 |         }),
433 |         new iam.PolicyStatement({
434 |           sid: "SageMakerNotResourceAllow",
435 |           effect: iam.Effect.ALLOW,
436 |           actions: [
437 |             "sagemaker:*"
438 |           ],
439 |           notResources: [
440 |             "arn:aws:sagemaker:*:*:domain/*",
441 |             "arn:aws:sagemaker:*:*:user-profile/*",
442 |             "arn:aws:sagemaker:*:*:app/*",
443 |             "arn:aws:sagemaker:*:*:flow-definition/*"
444 |           ]
445 |         }),
446 |         new iam.PolicyStatement({
447 |           sid: "SageMakerDomainAllow",
448 |           effect: iam.Effect.ALLOW,
449 |           actions: [
450 |             "sagemaker:CreatePresignedDomainUrl",
451 |             "sagemaker:DescribeDomain",
452 |             "sagemaker:ListDomains",
453 |             "sagemaker:DescribeUserProfile",
454 |             "sagemaker:ListUserProfiles",
455 |             "sagemaker:*App",
456 |             "sagemaker:ListApps"
457 |           ],
458 |           resources: [
459 |             "arn:aws:sagemaker:*:*:domain/*",
460 |             "arn:aws:sagemaker:*:*:user-profile/*",
461 |             "arn:aws:sagemaker:*:*:app/*",
462 |             "arn:aws:sagemaker:*:*:flow-definition/*"
463 |           ]
464 |         }),
465 |         new iam.PolicyStatement({
466 |           sid: "SageMakerWorkstream",
467 |           effect: iam.Effect.ALLOW,
468 |           actions: [
469 |             "iam:PassRole"
470 |           ],
471 |           resources: [
472 |             "arn:aws:sagemaker:" + this.region + ":" + this.account +":flow-definition/*",
473 |           ],
474 |           conditions: {
475 |             StringEqualsIfExists:{
476 |               "sagemaker:WorkteamType": [
477 |                 "private-crowd",
478 |                 "vendor-crowd"
479 |               ]
480 |             }} 
481 |         }),
482 |         new iam.PolicyStatement({
483 |           sid: "IAMPassRoletoService",
484 |           effect: iam.Effect.ALLOW,
485 |           actions: [
486 |             "iam:PassRole"
487 |           ],
488 |           resources: [
489 |             sagemaker_user_profile_role.roleArn
490 |           ],
491 |           conditions: {
492 |             StringLike:{
493 |               "iam:PassedToService": [
494 |                 "glue.amazonaws.com",
495 |                 "robomaker.amazonaws.com",
496 |                 "states.amazonaws.com",
497 |                 "sagemaker.amazonaws.com"
498 |               ]
499 |             }} 
500 |         }),
501 |         new iam.PolicyStatement({
502 |           sid: "KMSUsePermissions",
503 |           effect: iam.Effect.ALLOW,
504 |           actions: [
505 |             "kms:CreateGrant",
506 |             "kms:DescribeKey",
507 |             "kms:Decrypt",
508 |             "kms:Encrypt",
509 |             "kms:GenerateDataKey",
510 |             "kms:ReEncrypt*"
511 |           ],
512 |           resources: [
513 |             sagemaker_kms_key.keyArn,
514 |             athena_s3_output_kms_key.keyArn
515 |           ]   
516 |         }),
517 |         new iam.PolicyStatement({
518 |           sid: "SageMakerWritePermissions",
519 |           effect: iam.Effect.ALLOW,
520 |           actions: [
521 |             "sagemaker:CreateApp"
522 |           ],
523 |           resources: [
524 |             "arn:aws:sagemaker:" + this.region + ":" + this.account +":app/*",
525 |           ]   
526 |         }),
527 |         new iam.PolicyStatement({
528 |           sid: "BedrockReadPermissions",
529 |           effect: iam.Effect.ALLOW,
530 |           actions: [
531 |             "bedrock:ListFoundationModels",
532 |             "bedrock:GetFoundationModel",
533 |             "bedrock:GetModelCustomizationJob", 
534 |             "bedrock:GetFoundationModelAvailability",
535 |             "bedrock:ListModelCustomizationJobs", 
536 |             "bedrock:GetCustomModel", 
537 |             "bedrock:ListCustomModels", 
538 |             "bedrock:GetProvisionedModelThroughput", 
539 |             "bedrock:ListProvisionedModelThroughputs", 
540 |             "bedrock:ListTagsForResource", 
541 |             "bedrock:GetModelInvocationLoggingConfiguration",
542 |             "bedrock:ListFoundationModelAgreementOffers",
543 |             "bedrock:GetUseCaseForModelAccess",
544 |           ],
545 |           resources: [
546 |             "*"
547 |           ]   
548 |         }),new iam.PolicyStatement({
549 |           sid: "BedrockWritePermissions",
550 |           effect: iam.Effect.ALLOW,
551 |           actions: [
552 |             "bedrock:InvokeModel", 
553 |             "bedrock:InvokeModelWithResponseStream", 
554 |             "bedrock:CreateModelCustomizationJob", 
555 |             "bedrock:StopModelCustomizationJob", 
556 |             "bedrock:DeleteCustomModel",
557 |             "bedrock:CreateProvisionedModelThroughput", 
558 |             "bedrock:UpdateProvisionedModelThroughput", 
559 |             "bedrock:DeleteProvisionedModelThroughput", 
560 |             "bedrock:UntagResource", 
561 |             "bedrock:TagResource", 
562 |             "bedrock:PutFoundationModelEntitlement",
563 |             "bedrock:PutModelInvocationLoggingConfiguration",
564 |             "bedrock:CreateFoundationModelAgreement",
565 |             "bedrock:DeleteFoundationModelAgreement",
566 |             "bedrock:PutUseCaseForModelAccess"
567 |           ],
568 |           resources: [
569 |             "arn:aws:bedrock:" + this.region + "::foundation-model/*",
570 |             "arn:aws:bedrock:" + this.region + ":" + this.account +":custom-model/*",
571 |             "arn:aws:bedrock:" + this.region + ":" + this.account +":provisioned-model/*",
572 |             "arn:aws:bedrock:" + this.region + ":" + this.account +":model-customization-job/*",
573 |             "arn:aws:bedrock:" + this.region + ":" + this.account +":agent/*",
574 |             "arn:aws:bedrock:" + this.region + ":" + this.account +":agent-alias/*",
575 |             "arn:aws:bedrock:" + this.region + ":" + this.account +":knowledge-base/*",
576 |           ]   
577 |         }),
578 |       ],
579 |     });
580 | 
581 |     athena_output_s3_bucket.addToResourcePolicy(new iam.PolicyStatement({
582 |       actions: [
583 |         's3:PutObject',
584 |         's3:PutObjectAcl',
585 |         's3:DeleteObject',
586 |         's3:GetBucketLocation'
587 |       ],
588 |       resources: [
589 |         athena_output_s3_bucket.bucketArn,
590 |         athena_output_s3_bucket.bucketArn + '/*'
591 |       ],
592 |       principals: [
593 |         new iam.ArnPrincipal(sagemaker_user_profile_role.roleArn)],
594 |     }));
595 | 
596 |     new iam.ManagedPolicy(this, "SageMakerStudioUserProfileManagedPolicy", {
597 |       description: "Managed policy associated to the SageMaker Studios user profile.",
598 |       document:sagemaker_user_profile_policy,
599 |       managedPolicyName: "sagemaker-studio-user-security-lake-policy",
600 |       roles: [sagemaker_user_profile_role]
601 |     });
602 | 
603 |     const sagemaker_domain = new CfnDomain(this, "sagemaker_domain", {
604 |       authMode: "IAM",
605 |       defaultUserSettings: {
606 |         executionRole: sagemaker_user_profile_role.roleArn,
607 |         jupyterServerAppSettings: {
608 |           defaultResourceSpec: {
609 |             instanceType: "system",
610 |             // lifecycleConfigArn: "lifecycleConfigArn",
611 |             // sageMakerImageArn: "sageMakerImageArn",
612 |             // sageMakerImageVersionArn: "sageMakerImageVersionArn",
613 |           },
614 |         },
615 |         kernelGatewayAppSettings: {
616 |           // customImages: [{
617 |           //   appImageConfigName: "appImageConfigName",
618 |           //   imageName: "imageName",
619 |     
620 |           //   // the properties below are optional
621 |           //   imageVersionNumber: 123,
622 |           // }],
623 |           defaultResourceSpec: {
624 |             instanceType: "ml.t3.medium",
625 |             // lifecycleConfigArn: "lifecycleConfigArn",
626 |             sageMakerImageArn: "arn:aws:sagemaker:" + this.region + ":081325390199:image/datascience-1.0",
627 |           },
628 |         },
629 |         // rSessionAppSettings: {
630 |         //   customImages: [{
631 |         //     appImageConfigName: "appImageConfigName",
632 |         //     imageName: "imageName",
633 |     
634 |         //     // the properties below are optional
635 |         //     imageVersionNumber: 123,
636 |         //   }],
637 |         //   defaultResourceSpec: {
638 |         //     instanceType: "instanceType",
639 |         //     lifecycleConfigArn: "lifecycleConfigArn",
640 |         //     sageMakerImageArn: "sageMakerImageArn",
641 |         //     sageMakerImageVersionArn: "sageMakerImageVersionArn",
642 |         //   },
643 |         // },
644 |         // rStudioServerProAppSettings: {
645 |         //   accessStatus: "accessStatus",
646 |         //   userGroup: "userGroup",
647 |         // },
648 |         securityGroups: [sagemaker_workload_sg.securityGroupId],
649 |         // sharingSettings: {
650 |         //   notebookOutputOption: "notebookOutputOption",
651 |         //   s3KmsKeyId: "s3KmsKeyId",
652 |         //   s3OutputPath: "s3OutputPath",
653 |         // },
654 |       },
655 |       domainName: "security-lake-gen-ai-" + this.account,
656 |       subnetIds: [sagemaker_vpc.selectSubnets({subnetGroupName: "workload_subnet_with_nat"}).subnets[0].subnetId],
657 |       vpcId: sagemaker_vpc.vpcId,
658 |       // the properties below are optional
659 |       appNetworkAccessType: "VpcOnly",
660 |       // appSecurityGroupManagement: "appSecurityGroupManagement",
661 |       // domainSettings: {
662 |       //   rStudioServerProDomainSettings: {
663 |       //     domainExecutionRoleArn: "domainExecutionRoleArn",
664 |     
665 |       //     // the properties below are optional
666 |       //     defaultResourceSpec: {
667 |       //       instanceType: "instanceType",
668 |       //       lifecycleConfigArn: "lifecycleConfigArn",
669 |       //       sageMakerImageArn: "sageMakerImageArn",
670 |       //       sageMakerImageVersionArn: "sageMakerImageVersionArn",
671 |       //     },
672 |       //     rStudioConnectUrl: "rStudioConnectUrl",
673 |       //     rStudioPackageManagerUrl: "rStudioPackageManagerUrl",
674 |       //   },
675 |       //   securityGroupIds: ["securityGroupIds"],
676 |       // },
677 |       kmsKeyId: sagemaker_kms_key.keyId,
678 |       tags: [{
679 |         key: "project",
680 |         value: "security-lake-gen-ai",
681 |       }],
682 |     });
683 | 
684 |     sagemaker_domain.applyRemovalPolicy(RemovalPolicy.DESTROY)
685 | 
686 |     const sagemaker_user_profile = new CfnUserProfile(this, 'sagemaker_user_profile', {
687 |       domainId: sagemaker_domain.attrDomainId,
688 |       userProfileName: sagemaker_user_profile_role.roleName,
689 |     
690 |       // the properties below are optional
691 |       // singleSignOnUserIdentifier: 'singleSignOnUserIdentifier',
692 |       // singleSignOnUserValue: 'singleSignOnUserValue',
693 |       tags: [{
694 |         key: 'project',
695 |         value: 'security-lake-gen-ai',
696 |       }],
697 |       userSettings: {
698 |         executionRole: sagemaker_user_profile_role.roleArn,
699 |         // jupyterServerAppSettings: {
700 |         //   defaultResourceSpec: {
701 |         //     instanceType: 'instanceType',
702 |         //     sageMakerImageArn: 'sageMakerImageArn',
703 |         //     sageMakerImageVersionArn: 'sageMakerImageVersionArn',
704 |         //   },
705 |         // },
706 |         // kernelGatewayAppSettings: {
707 |         //   customImages: [{
708 |         //     appImageConfigName: 'appImageConfigName',
709 |         //     imageName: 'imageName',
710 |     
711 |         //     // the properties below are optional
712 |         //     imageVersionNumber: 123,
713 |         //   }],
714 |         //   defaultResourceSpec: {
715 |         //     instanceType: 'instanceType',
716 |         //     sageMakerImageArn: 'sageMakerImageArn',
717 |         //     sageMakerImageVersionArn: 'sageMakerImageVersionArn',
718 |         //   },
719 |         // },
720 |         // rStudioServerProAppSettings: {
721 |         //   accessStatus: 'accessStatus',
722 |         //   userGroup: 'userGroup',
723 |         // },
724 |         //securityGroups: ['securityGroups'],
725 |         // sharingSettings: {
726 |         //   notebookOutputOption: 'notebookOutputOption',
727 |         //   s3KmsKeyId: 's3KmsKeyId',
728 |         //   s3OutputPath: 's3OutputPath',
729 |         // },
730 |       },
731 |     });
732 | 
733 |     sagemaker_user_profile.addDependency(sagemaker_domain)
734 |     sagemaker_user_profile.applyRemovalPolicy(RemovalPolicy.DESTROY)
735 | 
736 |     const sagemaker_app = new CfnApp(this, 'sagemaker_app', {
737 |       appName: 'default',
738 |       appType: 'JupyterServer',
739 |       domainId: sagemaker_domain.attrDomainId,
740 |       userProfileName: sagemaker_user_profile.userProfileName,
741 |     
742 |       // the properties below are optional
743 |       resourceSpec: {
744 |         instanceType: 'system'
745 |       },
746 |       tags: [{
747 |         key: 'project',
748 |         value: 'security-lake-gen-ai',
749 |       }],
750 |     });
751 | 
752 |     sagemaker_app.addDependency(sagemaker_user_profile)
753 |     sagemaker_app.applyRemovalPolicy(RemovalPolicy.DESTROY)
754 |     
755 |     // IAM Role for SageMaker user profiles
756 |     const sagemaker_console_presigned_url_role = new iam.Role(this, "sagemaker_console_presigned_url_role", {
757 |       assumedBy: new iam.CompositePrincipal(
758 |         new iam.ArnPrincipal(IAM_role_assumption_for_sagemaker_presigned_url.valueAsString),
759 |       ),
760 |       roleName: "sagemaker-console-presigned-url-role",
761 |       // managedPolicies: [
762 |       // ]
763 |     });
764 | 
765 |     const sagemaker_presigned_url_policy = new iam.PolicyDocument({
766 |       statements: [
767 |         new iam.PolicyStatement({
768 |           sid: "SMStudioCreatePresignedURLAllow",
769 |           effect: iam.Effect.ALLOW,
770 |           actions: [
771 |             "sagemaker:CreatePresignedDomainUrl"
772 |           ],
773 |           resources: [
774 |             sagemaker_user_profile.attrUserProfileArn
775 |           ],
776 |           conditions: {
777 |             IpAddress:{
778 |               "aws:SourceIp": [
779 |                 sagemaker_restrict_cidr_presigned_url.valueAsString
780 |               ]
781 |             }}   
782 |         }),
783 |         new iam.PolicyStatement({
784 |           sid: "SMStudioConsoleReadAllow",
785 |           effect: iam.Effect.ALLOW,
786 |           actions: [
787 |             "sagemaker:DescribeDomain",
788 |             "sagemaker:DescribeUserProfile",
789 |             "sagemaker:ListApps",
790 |             "sagemaker:ListDomains",
791 |             "sagemaker:ListUserProfiles",
792 |           ],
793 |           resources: [
794 |             "arn:" + this.partition + ":sagemaker:" + this.region + ":" + this.account + ":domain/*",
795 |             "arn:" + this.partition + ":sagemaker:" + this.region + ":" + this.account + ":user-profile/" + sagemaker_domain.attrDomainId + "/*",
796 |             "arn:" + this.partition + ":sagemaker:" + this.region + ":" + this.account + ":app/" + sagemaker_domain.attrDomainId + "/*"
797 |           ]   
798 |         }),
799 |         new iam.PolicyStatement({
800 |           sid: "SMStudioServiceCatalogReadAllow",
801 |           effect: iam.Effect.ALLOW,
802 |           actions: [
803 |             "license-manager:ListReceivedLicenses",
804 |             "sagemaker:GetSagemakerServicecatalogPortfolioStatus",
805 |             "servicecatalog:ListAcceptedPortfolioShares",
806 |             "servicecatalog:ListPrincipalsForPortfolio"
807 |           ],
808 |           resources: [
809 |             "*"
810 |           ]   
811 |         })
812 |       ],
813 |     });
814 | 
815 |     new iam.ManagedPolicy(this, "SageMakerStudioConsoleManagedPolicy", {
816 |       description: "Managed policy associated to the AWS console role to access SageMaker Studio Domain presigned URL.",
817 |       document:sagemaker_presigned_url_policy,
818 |       managedPolicyName: "sagemaker-studio-console-access-policy",
819 |       roles: [sagemaker_console_presigned_url_role]
820 |     });
821 | 
822 |     athena_s3_output_kms_key.addToResourcePolicy(new iam.PolicyStatement({
823 |       actions: [
824 |         'kms:DescribeKey',
825 |         'kms:Encrypt',
826 |         'kms:GenerateDataKey*'
827 |       ],
828 |       resources: [
829 |         '*'
830 |       ],
831 |       principals: [
832 |         new iam.ArnPrincipal(sagemaker_user_profile_role.roleArn)]
833 |     }));
834 | 
835 |     const gen_ai_workgroup = new athena.CfnWorkGroup(this, 'gen_ai_workgroup', {
836 |       name: 'security_lake_gen_ai',
837 |       // the properties below are optional
838 |       description: 'Workgroup for Security Lake ML and Gen AI.',
839 |       recursiveDeleteOption: true,
840 |       state: 'ENABLED',
841 |       // tags: [{
842 |       //   key: 'key',
843 |       //   value: 'value',
844 |       // }],
845 |       workGroupConfiguration: {
846 |         // bytesScannedCutoffPerQuery: 10000000,
847 |         enforceWorkGroupConfiguration: true,
848 |         // engineVersion: {
849 |         //   effectiveEngineVersion: 'effectiveEngineVersion',
850 |         //   selectedEngineVersion: 'selectedEngineVersion',
851 |         // },
852 |         publishCloudWatchMetricsEnabled: false,
853 |         requesterPaysEnabled: false,
854 |         resultConfiguration: {
855 |           encryptionConfiguration: {
856 |             encryptionOption: EncryptionOption.KMS,
857 |             kmsKey: athena_s3_output_kms_key.keyArn,
858 |           },
859 |           outputLocation: 's3://' + athena_output_s3_bucket.bucketName + '/',
860 |         },
861 |       },
862 |     });
863 | 
864 |   
865 |     NagSuppressions.addResourceSuppressionsByPath(this,'/SageMakerDomainStack/SageMakerStudioConsoleManagedPolicy/Resource',
866 |       [
867 |         {
868 |           id: 'AwsSolutions-IAM5',
869 |           reason: 'The specific actions in the SMStudioServiceCatalogReadAllow SID require * resource. The actions are all read-only.',
870 |         },
871 |       ]
872 |     );
873 | 
874 |     NagSuppressions.addResourceSuppressionsByPath(this,'/SageMakerDomainStack/SageMakerStudioUserProfileManagedPolicy/Resource',
875 |       [
876 |         {
877 |           id: 'AwsSolutions-IAM5',
878 |           reason: 'The specific actions in the S3Read and LakeFormationAllow SID require * resource. The actions are all read-only.',
879 |         },
880 |       ]
881 |     );
882 | 
883 |   }
884 | }
885 | 


--------------------------------------------------------------------------------
/source/notebooks/generative_ai_security_lake.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "id": "8ca60fbc-28c0-4853-bd28-c4570de71bca",
   6 |    "metadata": {
   7 |     "tags": []
   8 |    },
   9 |    "source": [
  10 |     "## Unlock insights for Amazon Security Lake data using Generative AI leveraging Amazon Bedrock"
  11 |    ]
  12 |   },
  13 |   {
  14 |    "cell_type": "markdown",
  15 |    "id": "4c8fe2a5-70c7-4700-9929-ac45e0b09d5f",
  16 |    "metadata": {},
  17 |    "source": [
  18 |     "This Jupyter Notebook demonstrates the ability to generate SQL queries with user provided natural language inputs and how that can be accomplished with the assistance of the LangChain framework. It shows how you can utilize Agents, and Tools to work with Amazon Security Lake data.\n",
  19 |     "\n",
  20 |     "LangChain is a flexible framework that can integrate with a variety of LLMs. This Notebook was written with LangChain version 0.0.345(and langchain_experimental version: 0.0.43) using the \"anthropic.claude-v2\" model from Amazon Bedrock.\n",
  21 |     "\n",
  22 |     "Also, be sure to install the requirements below:"
  23 |    ]
  24 |   },
  25 |   {
  26 |    "cell_type": "code",
  27 |    "execution_count": null,
  28 |    "id": "8716deb3-b5d5-4933-8f03-8425c90ac438",
  29 |    "metadata": {
  30 |     "scrolled": true,
  31 |     "tags": []
  32 |    },
  33 |    "outputs": [],
  34 |    "source": [
  35 |     "!pip install -r requirements.txt --quiet\n",
  36 |     "\n",
  37 |     "#Restart Kernel to use packages\n",
  38 |     "import os\n",
  39 |     "os._exit(00)"
  40 |    ]
  41 |   },
  42 |   {
  43 |    "cell_type": "code",
  44 |    "execution_count": null,
  45 |    "id": "7f1ecc8c-60e6-438a-81ea-50c07b9f872f",
  46 |    "metadata": {
  47 |     "tags": []
  48 |    },
  49 |    "outputs": [],
  50 |    "source": [
  51 |     "import langchain_experimental, langchain\n",
  52 |     "import matplotlib, pandas\n",
  53 |     "\n",
  54 |     "print(\"langchain.__version__: \", langchain.__version__)\n",
  55 |     "print(\"langchain_experimental.__version__: \", langchain_experimental.__version__)"
  56 |    ]
  57 |   },
  58 |   {
  59 |    "cell_type": "markdown",
  60 |    "id": "4322a28d-9b65-4a5f-a248-608f8595f58c",
  61 |    "metadata": {
  62 |     "tags": []
  63 |    },
  64 |    "source": [
  65 |     "## Connect to Security Lake database using SQLAlchemy"
  66 |    ]
  67 |   },
  68 |   {
  69 |    "cell_type": "code",
  70 |    "execution_count": null,
  71 |    "id": "049702d3-019b-471c-8755-63c6db137b28",
  72 |    "metadata": {
  73 |     "tags": []
  74 |    },
  75 |    "outputs": [],
  76 |    "source": [
  77 |     "import os\n",
  78 |     "ACCOUNT_ID = os.environ[\"AWS_ACCOUNT_ID\"]\n",
  79 |     "REGION_NAME = os.environ.get('REGION_NAME', 'us-east-1')\n",
  80 |     "REGION_FMT = REGION_NAME.replace(\"-\",\"_\")"
  81 |    ]
  82 |   },
  83 |   {
  84 |    "cell_type": "code",
  85 |    "execution_count": null,
  86 |    "id": "baffc725-10ca-4cf7-a062-aeb5d1c34b8e",
  87 |    "metadata": {
  88 |     "tags": []
  89 |    },
  90 |    "outputs": [],
  91 |    "source": [
  92 |     "from langchain import SQLDatabase\n",
  93 |     "from sqlalchemy import create_engine\n",
  94 |     "\n",
  95 |     "#Amazon Security Lake Database\n",
  96 |     "SCHEMA_NAME = f\"amazon_security_lake_glue_db_{REGION_FMT}\"\n",
  97 |     "\n",
  98 |     "#S3 Staging location for Athena query output results and this will be created by deploying the Cloud Formation stack\n",
  99 |     "S3_STAGING_DIR = f's3://athena-gen-ai-bucket-results-{ACCOUNT_ID}/output/'\n",
 100 |     "\n",
 101 |     "#AWS region where the Amazon Security lake database is created\n",
 102 |     "\n",
 103 |     "\n",
 104 |     "engine_athena = create_engine(\n",
 105 |     "    \"awsathena+rest://@athena.{}.amazonaws.com:443/{}?s3_staging_dir={}\".\n",
 106 |     "    format(REGION_NAME, SCHEMA_NAME, S3_STAGING_DIR)\n",
 107 |     ")\n",
 108 |     "\n",
 109 |     "athena_db = SQLDatabase(engine_athena)\n",
 110 |     "db = athena_db"
 111 |    ]
 112 |   },
 113 |   {
 114 |    "cell_type": "markdown",
 115 |    "id": "82166bc8-3788-47a8-8fa4-ad2ac4d4e86b",
 116 |    "metadata": {
 117 |     "tags": []
 118 |    },
 119 |    "source": [
 120 |     "## Define LLM and endpoint url to invoke model, we will be using claude-v2 from Anthropic available within Amazon Bedrock"
 121 |    ]
 122 |   },
 123 |   {
 124 |    "cell_type": "markdown",
 125 |    "id": "898a43eb-bf6a-4930-b503-fc70b10785d8",
 126 |    "metadata": {},
 127 |    "source": [
 128 |     "Claude v2 is Anthropic's most powerful model, which excels at a wide range of tasks from sophisticated dialogue and creative content generation to detailed instruction following.\n",
 129 |     "There is also another faster and cheaper model available from Anthropic which is Claude Instant v1.2."
 130 |    ]
 131 |   },
 132 |   {
 133 |    "cell_type": "code",
 134 |    "execution_count": null,
 135 |    "id": "df2bed95-6f02-4bfe-909d-75ca2e464b71",
 136 |    "metadata": {
 137 |     "tags": []
 138 |    },
 139 |    "outputs": [],
 140 |    "source": [
 141 |     "from langchain.llms.bedrock import Bedrock\n",
 142 |     "import os\n",
 143 |     "\n",
 144 |     "model_id= \"anthropic.claude-v2\"\n",
 145 |     "\n",
 146 |     "llm = Bedrock(\n",
 147 |     "    model_id=model_id,\n",
 148 |     "    # Do not neet to provide - defaults to this notebook's region -- https://api.python.langchain.com/en/latest/llms/langchain.llms.bedrock.Bedrock.html#langchain.llms.bedrock.Bedrock.region_name\n",
 149 |     "    # region_name=region_name,\n",
 150 |     "    endpoint_url=f\"https://bedrock-runtime.{REGION_NAME}.amazonaws.com\",\n",
 151 |     ")\n",
 152 |     "\n",
 153 |     "llm.model_kwargs = {'temperature':0.0,\n",
 154 |     "                    'top_k':0,\n",
 155 |     "                    'max_tokens_to_sample': 4096}"
 156 |    ]
 157 |   },
 158 |   {
 159 |    "cell_type": "markdown",
 160 |    "id": "0cfd682c-637e-4169-b1a1-f84babe41d02",
 161 |    "metadata": {
 162 |     "tags": []
 163 |    },
 164 |    "source": []
 165 |   },
 166 |   {
 167 |    "cell_type": "code",
 168 |    "execution_count": null,
 169 |    "id": "b5cd0b94-643a-45d7-81f6-92ec21bd34cb",
 170 |    "metadata": {},
 171 |    "outputs": [],
 172 |    "source": []
 173 |   },
 174 |   {
 175 |    "cell_type": "markdown",
 176 |    "id": "0caf523f-7d72-43a2-b1a1-37503d0b4874",
 177 |    "metadata": {
 178 |     "tags": []
 179 |    },
 180 |    "source": [
 181 |     "## Provide list of tools for Agent"
 182 |    ]
 183 |   },
 184 |   {
 185 |    "cell_type": "markdown",
 186 |    "id": "10f0fd75-4bcb-4a48-9254-07e8700a3984",
 187 |    "metadata": {
 188 |     "tags": []
 189 |    },
 190 |    "source": [
 191 |     "### Create Custom tools"
 192 |    ]
 193 |   },
 194 |   {
 195 |    "cell_type": "markdown",
 196 |    "id": "7fc434c7-b08b-47a9-91c5-dfc2657cce4c",
 197 |    "metadata": {},
 198 |    "source": [
 199 |     "Tools are interfaces that an agent can use to interact. Here we will be using SQL and Python tools to help agent determine the right action."
 200 |    ]
 201 |   },
 202 |   {
 203 |    "cell_type": "code",
 204 |    "execution_count": null,
 205 |    "id": "571b9841-dd17-4d67-9ce6-cbba40c529fd",
 206 |    "metadata": {
 207 |     "tags": []
 208 |    },
 209 |    "outputs": [],
 210 |    "source": [
 211 |     "from langchain.tools.sql_database.tool import InfoSQLDatabaseTool, QuerySQLDataBaseTool\n",
 212 |     "\n",
 213 |     "class InfoSQLDatabaseTool_custom(InfoSQLDatabaseTool):\n",
 214 |     "    name= \"sql_db_schema_and_sample_rows\"\n",
 215 |     "    description= '\\n    Input to this tool is a comma-separated list of tables, output is the schema and sample rows for those tables.    \\n\\n    Example Input: \"table1, table2, table3\"\\n    '\n",
 216 |     "    \n",
 217 |     "    def _run(self, tables: list[str]) -> str:\n",
 218 |     "        list_tables= tables.replace(' ', '').split(',')\n",
 219 |     "        \n",
 220 |     "        schema_str= ''\n",
 221 |     "        for table in list_tables:\n",
 222 |     "            schema_rows_str= super()._run(table)\n",
 223 |     "            row_str= schema_rows_str[schema_rows_str.find('/*'):]   \n",
 224 |     "            \n",
 225 |     "            schema_rows= QuerySQLDataBaseTool(db=db)._run(f\"SHOW CREATE TABLE `{table}`\")[3:-4]\n",
 226 |     "            schema_rows_formatted=schema_rows.replace(\"',), ('  \", '\\n').replace(\"',), ('\", '\\n').replace(\"',), (\\\"  \", '\\n').replace(', \",), (\"  ', '\\n')\n",
 227 |     "            \n",
 228 |     "            schema_str+= row_str + '\\n' + schema_rows_formatted + '\\n\\n'\n",
 229 |     "        \n",
 230 |     "        return schema_str.strip()"
 231 |    ]
 232 |   },
 233 |   {
 234 |    "cell_type": "code",
 235 |    "execution_count": null,
 236 |    "id": "9fcd4bda-138c-4638-9c08-2e7bd02367a9",
 237 |    "metadata": {
 238 |     "tags": []
 239 |    },
 240 |    "outputs": [],
 241 |    "source": [
 242 |     "from langchain.tools.sql_database.tool import QuerySQLDataBaseTool\n",
 243 |     "QuerySQLDataBaseTool_desc= '\\n    Input to this tool is a detailed and correct SQL query, output is a result from the database.\\n    This tool gives access to a real databse.\\n    If the query does not return anything or return blank results, it means the query is correct and returned 0 rows.\\n    If the query is not correct, an error message will be returned.\\n    If an error is returned, re-examine the database using the `sql_db_schema_and_sample_rows` tool, rewrite the query, check the query, and try again.\\n    '\n",
 244 |     "import time\n",
 245 |     "\n",
 246 |     "class QuerySQLDatabaseTool_custom(QuerySQLDataBaseTool):\n",
 247 |     "    name= \"sql_db_query\"\n",
 248 |     "    description= QuerySQLDataBaseTool_desc\n",
 249 |     "        \n",
 250 |     "    def _run(self, query: str) -> str:\n",
 251 |     "        print()\n",
 252 |     "        print('*'*10)            \n",
 253 |     "        print(\"Query passed to sql_db_query tool by llm: \\n\", query)\n",
 254 |     "        print('*'*10)\n",
 255 |     "        print()\n",
 256 |     "        \n",
 257 |     "        return super()._run(query.strip())"
 258 |    ]
 259 |   },
 260 |   {
 261 |    "cell_type": "code",
 262 |    "execution_count": null,
 263 |    "id": "3b3e2769-4bb9-4848-b901-4e1bb1896d56",
 264 |    "metadata": {},
 265 |    "outputs": [],
 266 |    "source": []
 267 |   },
 268 |   {
 269 |    "cell_type": "markdown",
 270 |    "id": "d0d0f303-ef43-4ca1-b1eb-bc71a6a828da",
 271 |    "metadata": {
 272 |     "tags": []
 273 |    },
 274 |    "source": [
 275 |     "### Initialize tools and create a list"
 276 |    ]
 277 |   },
 278 |   {
 279 |    "cell_type": "code",
 280 |    "execution_count": null,
 281 |    "id": "01dfc3a0-49ab-4b3b-a09a-e8309d05c62d",
 282 |    "metadata": {
 283 |     "tags": []
 284 |    },
 285 |    "outputs": [],
 286 |    "source": [
 287 |     "from langchain.tools.sql_database.tool import ListSQLDatabaseTool, QuerySQLCheckerTool\n",
 288 |     "from langchain_experimental.tools import PythonREPLTool\n",
 289 |     "\n",
 290 |     "tools = [\n",
 291 |     "QuerySQLDatabaseTool_custom(db=db, description= QuerySQLDataBaseTool_desc),\n",
 292 |     "ListSQLDatabaseTool(db=db),\n",
 293 |     "PythonREPLTool(),\n",
 294 |     "InfoSQLDatabaseTool_custom(db=db),\n",
 295 |     "]\n"
 296 |    ]
 297 |   },
 298 |   {
 299 |    "cell_type": "code",
 300 |    "execution_count": null,
 301 |    "id": "fa97c0eb-757c-417f-83ff-a0c90357255f",
 302 |    "metadata": {},
 303 |    "outputs": [],
 304 |    "source": []
 305 |   },
 306 |   {
 307 |    "cell_type": "markdown",
 308 |    "id": "18505687-bcfa-4cb3-b99c-b9e9f02483c6",
 309 |    "metadata": {
 310 |     "tags": []
 311 |    },
 312 |    "source": [
 313 |     "## Custom output parser"
 314 |    ]
 315 |   },
 316 |   {
 317 |    "cell_type": "markdown",
 318 |    "id": "9c2b45d1-4029-4c9c-8254-c4c4db182f58",
 319 |    "metadata": {},
 320 |    "source": [
 321 |     "Use this to ensure Claude via Bedrock replies to be consistent with the agent"
 322 |    ]
 323 |   },
 324 |   {
 325 |    "cell_type": "code",
 326 |    "execution_count": null,
 327 |    "id": "b297f22d-a0e1-409c-87c2-88bd5f9ab609",
 328 |    "metadata": {
 329 |     "tags": []
 330 |    },
 331 |    "outputs": [],
 332 |    "source": [
 333 |     "claude_instructions_for_agent = \"\"\"To use a tool, please use the following format:\\n\\nThought: Do I need to use a tool? Yes\\nAction: the action to take, should be one of [{tool_names}]\\nAction Input: the input to the action\\nObservation: the result of the action\\n\\nWhen you have a response to say to the Human, or if you do not need to use a tool, you MUST use the format:\\n\\n{ai_prefix}:[your response here]\"\"\"\n",
 334 |     "print(claude_instructions_for_agent)"
 335 |    ]
 336 |   },
 337 |   {
 338 |    "cell_type": "code",
 339 |    "execution_count": null,
 340 |    "id": "803cfcfc-d157-414d-a85f-98f70f1af0cc",
 341 |    "metadata": {
 342 |     "tags": []
 343 |    },
 344 |    "outputs": [],
 345 |    "source": [
 346 |     "from langchain.memory import ConversationBufferMemory\n",
 347 |     "from langchain.agents import initialize_agent,AgentType,AgentOutputParser\n",
 348 |     "from langchain.schema import AgentAction,AgentFinish\n",
 349 |     "from langchain.memory import ConversationBufferMemory\n",
 350 |     "from typing import Union\n",
 351 |     "import re\n",
 352 |     "\n",
 353 |     "class CustomConvoOutputParser(AgentOutputParser):\n",
 354 |     "    \"\"\"Output parser for the conversational agent.\"\"\"\n",
 355 |     "\n",
 356 |     "    ai_prefix: str = \"AI\"\n",
 357 |     "    \"\"\"Prefix to use before AI output.\"\"\"\n",
 358 |     "\n",
 359 |     "    def get_format_instructions(self) -> str:\n",
 360 |     "        return claude_instructions_for_agent\n",
 361 |     "\n",
 362 |     "    def parse(self, text: str) -> Union[AgentAction, AgentFinish]:\n",
 363 |     "        regex = r\"Action: (.*?)[\\n]*Action Input:[\\s+]*([\\S\\s]*)\"\n",
 364 |     "        match = re.search(regex, text)\n",
 365 |     "        if not match:\n",
 366 |     "            return AgentFinish(\n",
 367 |     "                {\"output\": text.split(f\"{self.ai_prefix}:\")[-1].strip()}, text\n",
 368 |     "            )\n",
 369 |     "        action = match.group(1)\n",
 370 |     "        action_input = match.group(2)\n",
 371 |     "        return AgentAction(action.strip().replace('\\n', ' '), action_input.replace('\\n', ' ').strip(\" \").strip('\"'), text)\n",
 372 |     "\n",
 373 |     "    @property\n",
 374 |     "    def _type(self) -> str:\n",
 375 |     "        return \"conversational\"\n"
 376 |    ]
 377 |   },
 378 |   {
 379 |    "cell_type": "code",
 380 |    "execution_count": null,
 381 |    "id": "b895240b-c81c-4886-bfbb-fb48a5832d38",
 382 |    "metadata": {},
 383 |    "outputs": [],
 384 |    "source": []
 385 |   },
 386 |   {
 387 |    "cell_type": "markdown",
 388 |    "id": "09c9b3b6-ef55-450e-90f1-15728348d7de",
 389 |    "metadata": {
 390 |     "tags": []
 391 |    },
 392 |    "source": [
 393 |     "## Adding Conversation Buffer Memory"
 394 |    ]
 395 |   },
 396 |   {
 397 |    "cell_type": "markdown",
 398 |    "id": "1bcb2d09-5dec-4a99-86a0-488dfd5ad374",
 399 |    "metadata": {},
 400 |    "source": [
 401 |     "You can also load messages into a BufferMemory instance by creating and passing in a ChatHistory object. This lets you easily pick up state from past conversations."
 402 |    ]
 403 |   },
 404 |   {
 405 |    "cell_type": "code",
 406 |    "execution_count": null,
 407 |    "id": "23d2fa83-1b41-4fde-b1c2-26fe30f4f070",
 408 |    "metadata": {
 409 |     "tags": []
 410 |    },
 411 |    "outputs": [],
 412 |    "source": [
 413 |     "from langchain.memory import ConversationBufferMemory\n",
 414 |     "\n",
 415 |     "memory = ConversationBufferMemory(memory_key=\"chat_history\")"
 416 |    ]
 417 |   },
 418 |   {
 419 |    "cell_type": "markdown",
 420 |    "id": "91a62b44-1200-4b04-be0f-abc2cda37699",
 421 |    "metadata": {
 422 |     "tags": []
 423 |    },
 424 |    "source": [
 425 |     "## Initialize the Agent"
 426 |    ]
 427 |   },
 428 |   {
 429 |    "cell_type": "markdown",
 430 |    "id": "b56cfda8-7fd1-49fc-a1c9-8965761cd954",
 431 |    "metadata": {},
 432 |    "source": [
 433 |     "Agents use an LLM to determine which actions to take and in what order. An action can either be using a tool and observing its output, or returning to the user."
 434 |    ]
 435 |   },
 436 |   {
 437 |    "cell_type": "code",
 438 |    "execution_count": null,
 439 |    "id": "46eb697d-2c45-4670-b146-5565c8954579",
 440 |    "metadata": {
 441 |     "tags": []
 442 |    },
 443 |    "outputs": [],
 444 |    "source": [
 445 |     "from langchain.agents import initialize_agent\n",
 446 |     "\n",
 447 |     "conversational_agent = initialize_agent(\n",
 448 |     "    agent=\"conversational-react-description\",\n",
 449 |     "    tools=tools,\n",
 450 |     "    llm= llm,\n",
 451 |     "    verbose=True,  # Show its work. Set this to False if you're only interested in the final output\n",
 452 |     "    # return_direct=True,  # Return the results without sending back to the LLM. False by default\n",
 453 |     "    max_iterations=None,\n",
 454 |     "    memory=memory,\n",
 455 |     "    handle_parsing_errors=False,\n",
 456 |     "    agent_kwargs={'format_instructions':claude_instructions_for_agent,'output_parser':CustomConvoOutputParser()}\n",
 457 |     ")"
 458 |    ]
 459 |   },
 460 |   {
 461 |    "cell_type": "markdown",
 462 |    "id": "388e699a-5812-4b83-bdcb-628e547ec6c5",
 463 |    "metadata": {
 464 |     "tags": []
 465 |    },
 466 |    "source": [
 467 |     "## Provide instructions to the Agent on how to use Tools."
 468 |    ]
 469 |   },
 470 |   {
 471 |    "cell_type": "code",
 472 |    "execution_count": null,
 473 |    "id": "a93b0e50-76be-4686-a17d-dfa31ecef241",
 474 |    "metadata": {
 475 |     "tags": []
 476 |    },
 477 |    "outputs": [],
 478 |    "source": [
 479 |     "conversational_agent.agent.llm_chain.prompt.template= conversational_agent.agent.llm_chain.prompt.template[conversational_agent.agent.llm_chain.prompt.template.find(\"TOOLS:\\n------\"):]"
 480 |    ]
 481 |   },
 482 |   {
 483 |    "cell_type": "code",
 484 |    "execution_count": null,
 485 |    "id": "afc38ffd-b555-42b5-828b-069eb8987867",
 486 |    "metadata": {
 487 |     "tags": []
 488 |    },
 489 |    "outputs": [],
 490 |    "source": [
 491 |     "phrase= \"To use a tool\"\n",
 492 |     "\n",
 493 |     "index= conversational_agent.agent.llm_chain.prompt.template.find(phrase)\n",
 494 |     "\n",
 495 |     "primer= \"\\nINSTRUCTIONS:\\n-------------\\n\\n\"\n",
 496 |     "\n",
 497 |     "conversational_agent.agent.llm_chain.prompt.template= conversational_agent.agent.llm_chain.prompt.template[:index] + primer + conversational_agent.agent.llm_chain.prompt.template[index:]\n"
 498 |    ]
 499 |   },
 500 |   {
 501 |    "cell_type": "code",
 502 |    "execution_count": null,
 503 |    "id": "386831ee-3328-4721-8727-f45fb5fae5c7",
 504 |    "metadata": {
 505 |     "tags": []
 506 |    },
 507 |    "outputs": [],
 508 |    "source": [
 509 |     "phrase= \"Begin!\"\n",
 510 |     "\n",
 511 |     "primer= '''\\n\\nFor the questions being asked, ALWAYS use all the tools in a sequence defined in the <sequence> tags without skipping tools to generate an answer\\n\\n<sequence> sql_db_list_tables -> sql_db_schema_and_sample_rows -> sql_db_query </sequence>\\n\\n-ALWAYS generate a SQL Query after examining the database using the `sql_db_schema_and_sample_rows` tool.\\n-Execute the SQL Query using the `sql_db_query` tool.\\n-NEVER generate results without querying the database using the `sql_db_query` tool. Execute all steps using tools, without pausing for input from user.\\n-ALWAYS generate an answer after examining `Observation` from tool's response.\\n\\nPay attention to SQL Queries generated.\\n- Do not use colon `:` in the SQL Query. It causes this error \"Error: (sqlalchemy.exc.InvalidRequestError) A value is required for bind parameter\".\\n- Avoid using aliases(`as` clause) in the SQL Query.\\n- When querying column of `string` type, use single quotes ' in SQL Query for casting to string.\\n- ALWAYS use the GROUP BY clause for columns you want to query.\\n- Don't use table JOIN, unless you absolutely have to.\\n- Do not use backtick ` in the SQL Query.\\n\\nONLY when asked to generate figure/charts/plots for results, generate code to show the figure/charts/plots. Then execute the generated code using the Python_REPL tool to make sure the code successfully generates the figure.\\n\\n\\n'''\n",
 512 |     "\n",
 513 |     "index= conversational_agent.agent.llm_chain.prompt.template.find(phrase)\n",
 514 |     "\n",
 515 |     "conversational_agent.agent.llm_chain.prompt.template= conversational_agent.agent.llm_chain.prompt.template[:index] + primer + conversational_agent.agent.llm_chain.prompt.template[index:]\n",
 516 |     "\n"
 517 |    ]
 518 |   },
 519 |   {
 520 |    "cell_type": "code",
 521 |    "execution_count": null,
 522 |    "id": "21f4fe8d-2ab1-467e-8411-a8dfd35da2b8",
 523 |    "metadata": {
 524 |     "tags": []
 525 |    },
 526 |    "outputs": [],
 527 |    "source": [
 528 |     "print(conversational_agent.agent.llm_chain.prompt.template)"
 529 |    ]
 530 |   },
 531 |   {
 532 |    "cell_type": "code",
 533 |    "execution_count": null,
 534 |    "id": "7a938670-8b3e-47c0-99b6-b2fbd617d93a",
 535 |    "metadata": {
 536 |     "tags": []
 537 |    },
 538 |    "outputs": [],
 539 |    "source": [
 540 |     "# ignore alternative between human and assistant warnings from Claude\n",
 541 |     "import warnings\n",
 542 |     "warnings.filterwarnings('ignore')"
 543 |    ]
 544 |   },
 545 |   {
 546 |    "cell_type": "code",
 547 |    "execution_count": null,
 548 |    "id": "c8ce3b1a-4f80-41d9-a40d-9e214f89964c",
 549 |    "metadata": {},
 550 |    "outputs": [],
 551 |    "source": []
 552 |   },
 553 |   {
 554 |    "cell_type": "markdown",
 555 |    "id": "7ac44244-a035-40dd-ae50-5de00e81d86d",
 556 |    "metadata": {
 557 |     "tags": []
 558 |    },
 559 |    "source": [
 560 |     "## A Security Threat Hunter's converation with the agent"
 561 |    ]
 562 |   },
 563 |   {
 564 |    "cell_type": "markdown",
 565 |    "id": "721145ec",
 566 |    "metadata": {},
 567 |    "source": [
 568 |     "Provide the question in the input dialog box and hit enter. To break from the loop you can type exit and hit enter. Use up and down arrows on your keyboard to view previous questions."
 569 |    ]
 570 |   },
 571 |   {
 572 |    "cell_type": "code",
 573 |    "execution_count": null,
 574 |    "id": "2d2487ec-5469-4e0d-a72a-356ba4383c35",
 575 |    "metadata": {
 576 |     "tags": []
 577 |    },
 578 |    "outputs": [],
 579 |    "source": [
 580 |     "while True:\n",
 581 |     "    user_input = input(\"\")\n",
 582 |     "    if user_input=='exit': break\n",
 583 |     "    print(conversational_agent.run(user_input))\n",
 584 |     "    print()\n",
 585 |     "    print('-'*100)"
 586 |    ]
 587 |   },
 588 |   {
 589 |    "cell_type": "code",
 590 |    "execution_count": null,
 591 |    "id": "740b9de8-c33c-41c3-ba16-8113bd82af59",
 592 |    "metadata": {},
 593 |    "outputs": [],
 594 |    "source": []
 595 |   },
 596 |   {
 597 |    "cell_type": "code",
 598 |    "execution_count": null,
 599 |    "id": "34a7d94b-2352-40d8-bcc1-a6e2e116fdd9",
 600 |    "metadata": {},
 601 |    "outputs": [],
 602 |    "source": []
 603 |   },
 604 |   {
 605 |    "cell_type": "markdown",
 606 |    "id": "bb42c26e-eabc-4b1d-99b2-cf8b8f0765b4",
 607 |    "metadata": {
 608 |     "tags": []
 609 |    },
 610 |    "source": [
 611 |     "## Concluding thoughts"
 612 |    ]
 613 |   },
 614 |   {
 615 |    "cell_type": "markdown",
 616 |    "id": "1218cb3a-24fd-4f13-8089-90a6d60b0f56",
 617 |    "metadata": {},
 618 |    "source": [
 619 |     "The example use case and run in this Notebook are a product of several prompt instruction trials to combat errors we encountered in using this tool.\n",
 620 |     "\n",
 621 |     "The agent can sporidically generate fabricated answers without using tools, prompt the agent to use tools. (ex: \"use tools to answer the questions\"). Clearing memory might help further (cell: Adding Conversation Buffer Memory)\n",
 622 |     "\n",
 623 |     "The PythonREPL tool is currently being utilized to generate code only, and separately run in a cell to show/save plots.\n",
 624 |     "\n",
 625 |     "Try different models hosted in bedrock!"
 626 |    ]
 627 |   },
 628 |   {
 629 |    "cell_type": "code",
 630 |    "execution_count": null,
 631 |    "id": "f982181b-b431-4d7b-b196-7294975258f7",
 632 |    "metadata": {},
 633 |    "outputs": [],
 634 |    "source": []
 635 |   },
 636 |   {
 637 |    "cell_type": "code",
 638 |    "execution_count": null,
 639 |    "id": "4c4915eb-108e-4c97-bdf8-ff26d983846b",
 640 |    "metadata": {},
 641 |    "outputs": [],
 642 |    "source": []
 643 |   },
 644 |   {
 645 |    "cell_type": "code",
 646 |    "execution_count": null,
 647 |    "id": "1f126b9b-839a-4686-8bd2-aea6b4a4822c",
 648 |    "metadata": {},
 649 |    "outputs": [],
 650 |    "source": []
 651 |   },
 652 |   {
 653 |    "cell_type": "code",
 654 |    "execution_count": null,
 655 |    "id": "75ac998f-2901-4b1e-9b50-63f7d1d2fa9a",
 656 |    "metadata": {},
 657 |    "outputs": [],
 658 |    "source": []
 659 |   },
 660 |   {
 661 |    "cell_type": "code",
 662 |    "execution_count": null,
 663 |    "id": "9fd4f811-ee0d-476e-9487-dd0b1bb578d5",
 664 |    "metadata": {},
 665 |    "outputs": [],
 666 |    "source": []
 667 |   }
 668 |  ],
 669 |  "metadata": {
 670 |   "availableInstances": [
 671 |    {
 672 |     "_defaultOrder": 0,
 673 |     "_isFastLaunch": true,
 674 |     "category": "General purpose",
 675 |     "gpuNum": 0,
 676 |     "hideHardwareSpecs": false,
 677 |     "memoryGiB": 4,
 678 |     "name": "ml.t3.medium",
 679 |     "vcpuNum": 2
 680 |    },
 681 |    {
 682 |     "_defaultOrder": 1,
 683 |     "_isFastLaunch": false,
 684 |     "category": "General purpose",
 685 |     "gpuNum": 0,
 686 |     "hideHardwareSpecs": false,
 687 |     "memoryGiB": 8,
 688 |     "name": "ml.t3.large",
 689 |     "vcpuNum": 2
 690 |    },
 691 |    {
 692 |     "_defaultOrder": 2,
 693 |     "_isFastLaunch": false,
 694 |     "category": "General purpose",
 695 |     "gpuNum": 0,
 696 |     "hideHardwareSpecs": false,
 697 |     "memoryGiB": 16,
 698 |     "name": "ml.t3.xlarge",
 699 |     "vcpuNum": 4
 700 |    },
 701 |    {
 702 |     "_defaultOrder": 3,
 703 |     "_isFastLaunch": false,
 704 |     "category": "General purpose",
 705 |     "gpuNum": 0,
 706 |     "hideHardwareSpecs": false,
 707 |     "memoryGiB": 32,
 708 |     "name": "ml.t3.2xlarge",
 709 |     "vcpuNum": 8
 710 |    },
 711 |    {
 712 |     "_defaultOrder": 4,
 713 |     "_isFastLaunch": true,
 714 |     "category": "General purpose",
 715 |     "gpuNum": 0,
 716 |     "hideHardwareSpecs": false,
 717 |     "memoryGiB": 8,
 718 |     "name": "ml.m5.large",
 719 |     "vcpuNum": 2
 720 |    },
 721 |    {
 722 |     "_defaultOrder": 5,
 723 |     "_isFastLaunch": false,
 724 |     "category": "General purpose",
 725 |     "gpuNum": 0,
 726 |     "hideHardwareSpecs": false,
 727 |     "memoryGiB": 16,
 728 |     "name": "ml.m5.xlarge",
 729 |     "vcpuNum": 4
 730 |    },
 731 |    {
 732 |     "_defaultOrder": 6,
 733 |     "_isFastLaunch": false,
 734 |     "category": "General purpose",
 735 |     "gpuNum": 0,
 736 |     "hideHardwareSpecs": false,
 737 |     "memoryGiB": 32,
 738 |     "name": "ml.m5.2xlarge",
 739 |     "vcpuNum": 8
 740 |    },
 741 |    {
 742 |     "_defaultOrder": 7,
 743 |     "_isFastLaunch": false,
 744 |     "category": "General purpose",
 745 |     "gpuNum": 0,
 746 |     "hideHardwareSpecs": false,
 747 |     "memoryGiB": 64,
 748 |     "name": "ml.m5.4xlarge",
 749 |     "vcpuNum": 16
 750 |    },
 751 |    {
 752 |     "_defaultOrder": 8,
 753 |     "_isFastLaunch": false,
 754 |     "category": "General purpose",
 755 |     "gpuNum": 0,
 756 |     "hideHardwareSpecs": false,
 757 |     "memoryGiB": 128,
 758 |     "name": "ml.m5.8xlarge",
 759 |     "vcpuNum": 32
 760 |    },
 761 |    {
 762 |     "_defaultOrder": 9,
 763 |     "_isFastLaunch": false,
 764 |     "category": "General purpose",
 765 |     "gpuNum": 0,
 766 |     "hideHardwareSpecs": false,
 767 |     "memoryGiB": 192,
 768 |     "name": "ml.m5.12xlarge",
 769 |     "vcpuNum": 48
 770 |    },
 771 |    {
 772 |     "_defaultOrder": 10,
 773 |     "_isFastLaunch": false,
 774 |     "category": "General purpose",
 775 |     "gpuNum": 0,
 776 |     "hideHardwareSpecs": false,
 777 |     "memoryGiB": 256,
 778 |     "name": "ml.m5.16xlarge",
 779 |     "vcpuNum": 64
 780 |    },
 781 |    {
 782 |     "_defaultOrder": 11,
 783 |     "_isFastLaunch": false,
 784 |     "category": "General purpose",
 785 |     "gpuNum": 0,
 786 |     "hideHardwareSpecs": false,
 787 |     "memoryGiB": 384,
 788 |     "name": "ml.m5.24xlarge",
 789 |     "vcpuNum": 96
 790 |    },
 791 |    {
 792 |     "_defaultOrder": 12,
 793 |     "_isFastLaunch": false,
 794 |     "category": "General purpose",
 795 |     "gpuNum": 0,
 796 |     "hideHardwareSpecs": false,
 797 |     "memoryGiB": 8,
 798 |     "name": "ml.m5d.large",
 799 |     "vcpuNum": 2
 800 |    },
 801 |    {
 802 |     "_defaultOrder": 13,
 803 |     "_isFastLaunch": false,
 804 |     "category": "General purpose",
 805 |     "gpuNum": 0,
 806 |     "hideHardwareSpecs": false,
 807 |     "memoryGiB": 16,
 808 |     "name": "ml.m5d.xlarge",
 809 |     "vcpuNum": 4
 810 |    },
 811 |    {
 812 |     "_defaultOrder": 14,
 813 |     "_isFastLaunch": false,
 814 |     "category": "General purpose",
 815 |     "gpuNum": 0,
 816 |     "hideHardwareSpecs": false,
 817 |     "memoryGiB": 32,
 818 |     "name": "ml.m5d.2xlarge",
 819 |     "vcpuNum": 8
 820 |    },
 821 |    {
 822 |     "_defaultOrder": 15,
 823 |     "_isFastLaunch": false,
 824 |     "category": "General purpose",
 825 |     "gpuNum": 0,
 826 |     "hideHardwareSpecs": false,
 827 |     "memoryGiB": 64,
 828 |     "name": "ml.m5d.4xlarge",
 829 |     "vcpuNum": 16
 830 |    },
 831 |    {
 832 |     "_defaultOrder": 16,
 833 |     "_isFastLaunch": false,
 834 |     "category": "General purpose",
 835 |     "gpuNum": 0,
 836 |     "hideHardwareSpecs": false,
 837 |     "memoryGiB": 128,
 838 |     "name": "ml.m5d.8xlarge",
 839 |     "vcpuNum": 32
 840 |    },
 841 |    {
 842 |     "_defaultOrder": 17,
 843 |     "_isFastLaunch": false,
 844 |     "category": "General purpose",
 845 |     "gpuNum": 0,
 846 |     "hideHardwareSpecs": false,
 847 |     "memoryGiB": 192,
 848 |     "name": "ml.m5d.12xlarge",
 849 |     "vcpuNum": 48
 850 |    },
 851 |    {
 852 |     "_defaultOrder": 18,
 853 |     "_isFastLaunch": false,
 854 |     "category": "General purpose",
 855 |     "gpuNum": 0,
 856 |     "hideHardwareSpecs": false,
 857 |     "memoryGiB": 256,
 858 |     "name": "ml.m5d.16xlarge",
 859 |     "vcpuNum": 64
 860 |    },
 861 |    {
 862 |     "_defaultOrder": 19,
 863 |     "_isFastLaunch": false,
 864 |     "category": "General purpose",
 865 |     "gpuNum": 0,
 866 |     "hideHardwareSpecs": false,
 867 |     "memoryGiB": 384,
 868 |     "name": "ml.m5d.24xlarge",
 869 |     "vcpuNum": 96
 870 |    },
 871 |    {
 872 |     "_defaultOrder": 20,
 873 |     "_isFastLaunch": false,
 874 |     "category": "General purpose",
 875 |     "gpuNum": 0,
 876 |     "hideHardwareSpecs": true,
 877 |     "memoryGiB": 0,
 878 |     "name": "ml.geospatial.interactive",
 879 |     "supportedImageNames": [
 880 |      "sagemaker-geospatial-v1-0"
 881 |     ],
 882 |     "vcpuNum": 0
 883 |    },
 884 |    {
 885 |     "_defaultOrder": 21,
 886 |     "_isFastLaunch": true,
 887 |     "category": "Compute optimized",
 888 |     "gpuNum": 0,
 889 |     "hideHardwareSpecs": false,
 890 |     "memoryGiB": 4,
 891 |     "name": "ml.c5.large",
 892 |     "vcpuNum": 2
 893 |    },
 894 |    {
 895 |     "_defaultOrder": 22,
 896 |     "_isFastLaunch": false,
 897 |     "category": "Compute optimized",
 898 |     "gpuNum": 0,
 899 |     "hideHardwareSpecs": false,
 900 |     "memoryGiB": 8,
 901 |     "name": "ml.c5.xlarge",
 902 |     "vcpuNum": 4
 903 |    },
 904 |    {
 905 |     "_defaultOrder": 23,
 906 |     "_isFastLaunch": false,
 907 |     "category": "Compute optimized",
 908 |     "gpuNum": 0,
 909 |     "hideHardwareSpecs": false,
 910 |     "memoryGiB": 16,
 911 |     "name": "ml.c5.2xlarge",
 912 |     "vcpuNum": 8
 913 |    },
 914 |    {
 915 |     "_defaultOrder": 24,
 916 |     "_isFastLaunch": false,
 917 |     "category": "Compute optimized",
 918 |     "gpuNum": 0,
 919 |     "hideHardwareSpecs": false,
 920 |     "memoryGiB": 32,
 921 |     "name": "ml.c5.4xlarge",
 922 |     "vcpuNum": 16
 923 |    },
 924 |    {
 925 |     "_defaultOrder": 25,
 926 |     "_isFastLaunch": false,
 927 |     "category": "Compute optimized",
 928 |     "gpuNum": 0,
 929 |     "hideHardwareSpecs": false,
 930 |     "memoryGiB": 72,
 931 |     "name": "ml.c5.9xlarge",
 932 |     "vcpuNum": 36
 933 |    },
 934 |    {
 935 |     "_defaultOrder": 26,
 936 |     "_isFastLaunch": false,
 937 |     "category": "Compute optimized",
 938 |     "gpuNum": 0,
 939 |     "hideHardwareSpecs": false,
 940 |     "memoryGiB": 96,
 941 |     "name": "ml.c5.12xlarge",
 942 |     "vcpuNum": 48
 943 |    },
 944 |    {
 945 |     "_defaultOrder": 27,
 946 |     "_isFastLaunch": false,
 947 |     "category": "Compute optimized",
 948 |     "gpuNum": 0,
 949 |     "hideHardwareSpecs": false,
 950 |     "memoryGiB": 144,
 951 |     "name": "ml.c5.18xlarge",
 952 |     "vcpuNum": 72
 953 |    },
 954 |    {
 955 |     "_defaultOrder": 28,
 956 |     "_isFastLaunch": false,
 957 |     "category": "Compute optimized",
 958 |     "gpuNum": 0,
 959 |     "hideHardwareSpecs": false,
 960 |     "memoryGiB": 192,
 961 |     "name": "ml.c5.24xlarge",
 962 |     "vcpuNum": 96
 963 |    },
 964 |    {
 965 |     "_defaultOrder": 29,
 966 |     "_isFastLaunch": true,
 967 |     "category": "Accelerated computing",
 968 |     "gpuNum": 1,
 969 |     "hideHardwareSpecs": false,
 970 |     "memoryGiB": 16,
 971 |     "name": "ml.g4dn.xlarge",
 972 |     "vcpuNum": 4
 973 |    },
 974 |    {
 975 |     "_defaultOrder": 30,
 976 |     "_isFastLaunch": false,
 977 |     "category": "Accelerated computing",
 978 |     "gpuNum": 1,
 979 |     "hideHardwareSpecs": false,
 980 |     "memoryGiB": 32,
 981 |     "name": "ml.g4dn.2xlarge",
 982 |     "vcpuNum": 8
 983 |    },
 984 |    {
 985 |     "_defaultOrder": 31,
 986 |     "_isFastLaunch": false,
 987 |     "category": "Accelerated computing",
 988 |     "gpuNum": 1,
 989 |     "hideHardwareSpecs": false,
 990 |     "memoryGiB": 64,
 991 |     "name": "ml.g4dn.4xlarge",
 992 |     "vcpuNum": 16
 993 |    },
 994 |    {
 995 |     "_defaultOrder": 32,
 996 |     "_isFastLaunch": false,
 997 |     "category": "Accelerated computing",
 998 |     "gpuNum": 1,
 999 |     "hideHardwareSpecs": false,
1000 |     "memoryGiB": 128,
1001 |     "name": "ml.g4dn.8xlarge",
1002 |     "vcpuNum": 32
1003 |    },
1004 |    {
1005 |     "_defaultOrder": 33,
1006 |     "_isFastLaunch": false,
1007 |     "category": "Accelerated computing",
1008 |     "gpuNum": 4,
1009 |     "hideHardwareSpecs": false,
1010 |     "memoryGiB": 192,
1011 |     "name": "ml.g4dn.12xlarge",
1012 |     "vcpuNum": 48
1013 |    },
1014 |    {
1015 |     "_defaultOrder": 34,
1016 |     "_isFastLaunch": false,
1017 |     "category": "Accelerated computing",
1018 |     "gpuNum": 1,
1019 |     "hideHardwareSpecs": false,
1020 |     "memoryGiB": 256,
1021 |     "name": "ml.g4dn.16xlarge",
1022 |     "vcpuNum": 64
1023 |    },
1024 |    {
1025 |     "_defaultOrder": 35,
1026 |     "_isFastLaunch": false,
1027 |     "category": "Accelerated computing",
1028 |     "gpuNum": 1,
1029 |     "hideHardwareSpecs": false,
1030 |     "memoryGiB": 61,
1031 |     "name": "ml.p3.2xlarge",
1032 |     "vcpuNum": 8
1033 |    },
1034 |    {
1035 |     "_defaultOrder": 36,
1036 |     "_isFastLaunch": false,
1037 |     "category": "Accelerated computing",
1038 |     "gpuNum": 4,
1039 |     "hideHardwareSpecs": false,
1040 |     "memoryGiB": 244,
1041 |     "name": "ml.p3.8xlarge",
1042 |     "vcpuNum": 32
1043 |    },
1044 |    {
1045 |     "_defaultOrder": 37,
1046 |     "_isFastLaunch": false,
1047 |     "category": "Accelerated computing",
1048 |     "gpuNum": 8,
1049 |     "hideHardwareSpecs": false,
1050 |     "memoryGiB": 488,
1051 |     "name": "ml.p3.16xlarge",
1052 |     "vcpuNum": 64
1053 |    },
1054 |    {
1055 |     "_defaultOrder": 38,
1056 |     "_isFastLaunch": false,
1057 |     "category": "Accelerated computing",
1058 |     "gpuNum": 8,
1059 |     "hideHardwareSpecs": false,
1060 |     "memoryGiB": 768,
1061 |     "name": "ml.p3dn.24xlarge",
1062 |     "vcpuNum": 96
1063 |    },
1064 |    {
1065 |     "_defaultOrder": 39,
1066 |     "_isFastLaunch": false,
1067 |     "category": "Memory Optimized",
1068 |     "gpuNum": 0,
1069 |     "hideHardwareSpecs": false,
1070 |     "memoryGiB": 16,
1071 |     "name": "ml.r5.large",
1072 |     "vcpuNum": 2
1073 |    },
1074 |    {
1075 |     "_defaultOrder": 40,
1076 |     "_isFastLaunch": false,
1077 |     "category": "Memory Optimized",
1078 |     "gpuNum": 0,
1079 |     "hideHardwareSpecs": false,
1080 |     "memoryGiB": 32,
1081 |     "name": "ml.r5.xlarge",
1082 |     "vcpuNum": 4
1083 |    },
1084 |    {
1085 |     "_defaultOrder": 41,
1086 |     "_isFastLaunch": false,
1087 |     "category": "Memory Optimized",
1088 |     "gpuNum": 0,
1089 |     "hideHardwareSpecs": false,
1090 |     "memoryGiB": 64,
1091 |     "name": "ml.r5.2xlarge",
1092 |     "vcpuNum": 8
1093 |    },
1094 |    {
1095 |     "_defaultOrder": 42,
1096 |     "_isFastLaunch": false,
1097 |     "category": "Memory Optimized",
1098 |     "gpuNum": 0,
1099 |     "hideHardwareSpecs": false,
1100 |     "memoryGiB": 128,
1101 |     "name": "ml.r5.4xlarge",
1102 |     "vcpuNum": 16
1103 |    },
1104 |    {
1105 |     "_defaultOrder": 43,
1106 |     "_isFastLaunch": false,
1107 |     "category": "Memory Optimized",
1108 |     "gpuNum": 0,
1109 |     "hideHardwareSpecs": false,
1110 |     "memoryGiB": 256,
1111 |     "name": "ml.r5.8xlarge",
1112 |     "vcpuNum": 32
1113 |    },
1114 |    {
1115 |     "_defaultOrder": 44,
1116 |     "_isFastLaunch": false,
1117 |     "category": "Memory Optimized",
1118 |     "gpuNum": 0,
1119 |     "hideHardwareSpecs": false,
1120 |     "memoryGiB": 384,
1121 |     "name": "ml.r5.12xlarge",
1122 |     "vcpuNum": 48
1123 |    },
1124 |    {
1125 |     "_defaultOrder": 45,
1126 |     "_isFastLaunch": false,
1127 |     "category": "Memory Optimized",
1128 |     "gpuNum": 0,
1129 |     "hideHardwareSpecs": false,
1130 |     "memoryGiB": 512,
1131 |     "name": "ml.r5.16xlarge",
1132 |     "vcpuNum": 64
1133 |    },
1134 |    {
1135 |     "_defaultOrder": 46,
1136 |     "_isFastLaunch": false,
1137 |     "category": "Memory Optimized",
1138 |     "gpuNum": 0,
1139 |     "hideHardwareSpecs": false,
1140 |     "memoryGiB": 768,
1141 |     "name": "ml.r5.24xlarge",
1142 |     "vcpuNum": 96
1143 |    },
1144 |    {
1145 |     "_defaultOrder": 47,
1146 |     "_isFastLaunch": false,
1147 |     "category": "Accelerated computing",
1148 |     "gpuNum": 1,
1149 |     "hideHardwareSpecs": false,
1150 |     "memoryGiB": 16,
1151 |     "name": "ml.g5.xlarge",
1152 |     "vcpuNum": 4
1153 |    },
1154 |    {
1155 |     "_defaultOrder": 48,
1156 |     "_isFastLaunch": false,
1157 |     "category": "Accelerated computing",
1158 |     "gpuNum": 1,
1159 |     "hideHardwareSpecs": false,
1160 |     "memoryGiB": 32,
1161 |     "name": "ml.g5.2xlarge",
1162 |     "vcpuNum": 8
1163 |    },
1164 |    {
1165 |     "_defaultOrder": 49,
1166 |     "_isFastLaunch": false,
1167 |     "category": "Accelerated computing",
1168 |     "gpuNum": 1,
1169 |     "hideHardwareSpecs": false,
1170 |     "memoryGiB": 64,
1171 |     "name": "ml.g5.4xlarge",
1172 |     "vcpuNum": 16
1173 |    },
1174 |    {
1175 |     "_defaultOrder": 50,
1176 |     "_isFastLaunch": false,
1177 |     "category": "Accelerated computing",
1178 |     "gpuNum": 1,
1179 |     "hideHardwareSpecs": false,
1180 |     "memoryGiB": 128,
1181 |     "name": "ml.g5.8xlarge",
1182 |     "vcpuNum": 32
1183 |    },
1184 |    {
1185 |     "_defaultOrder": 51,
1186 |     "_isFastLaunch": false,
1187 |     "category": "Accelerated computing",
1188 |     "gpuNum": 1,
1189 |     "hideHardwareSpecs": false,
1190 |     "memoryGiB": 256,
1191 |     "name": "ml.g5.16xlarge",
1192 |     "vcpuNum": 64
1193 |    },
1194 |    {
1195 |     "_defaultOrder": 52,
1196 |     "_isFastLaunch": false,
1197 |     "category": "Accelerated computing",
1198 |     "gpuNum": 4,
1199 |     "hideHardwareSpecs": false,
1200 |     "memoryGiB": 192,
1201 |     "name": "ml.g5.12xlarge",
1202 |     "vcpuNum": 48
1203 |    },
1204 |    {
1205 |     "_defaultOrder": 53,
1206 |     "_isFastLaunch": false,
1207 |     "category": "Accelerated computing",
1208 |     "gpuNum": 4,
1209 |     "hideHardwareSpecs": false,
1210 |     "memoryGiB": 384,
1211 |     "name": "ml.g5.24xlarge",
1212 |     "vcpuNum": 96
1213 |    },
1214 |    {
1215 |     "_defaultOrder": 54,
1216 |     "_isFastLaunch": false,
1217 |     "category": "Accelerated computing",
1218 |     "gpuNum": 8,
1219 |     "hideHardwareSpecs": false,
1220 |     "memoryGiB": 768,
1221 |     "name": "ml.g5.48xlarge",
1222 |     "vcpuNum": 192
1223 |    },
1224 |    {
1225 |     "_defaultOrder": 55,
1226 |     "_isFastLaunch": false,
1227 |     "category": "Accelerated computing",
1228 |     "gpuNum": 8,
1229 |     "hideHardwareSpecs": false,
1230 |     "memoryGiB": 1152,
1231 |     "name": "ml.p4d.24xlarge",
1232 |     "vcpuNum": 96
1233 |    },
1234 |    {
1235 |     "_defaultOrder": 56,
1236 |     "_isFastLaunch": false,
1237 |     "category": "Accelerated computing",
1238 |     "gpuNum": 8,
1239 |     "hideHardwareSpecs": false,
1240 |     "memoryGiB": 1152,
1241 |     "name": "ml.p4de.24xlarge",
1242 |     "vcpuNum": 96
1243 |    },
1244 |    {
1245 |     "_defaultOrder": 57,
1246 |     "_isFastLaunch": false,
1247 |     "category": "Accelerated computing",
1248 |     "gpuNum": 0,
1249 |     "hideHardwareSpecs": false,
1250 |     "memoryGiB": 32,
1251 |     "name": "ml.trn1.2xlarge",
1252 |     "vcpuNum": 8
1253 |    },
1254 |    {
1255 |     "_defaultOrder": 58,
1256 |     "_isFastLaunch": false,
1257 |     "category": "Accelerated computing",
1258 |     "gpuNum": 0,
1259 |     "hideHardwareSpecs": false,
1260 |     "memoryGiB": 512,
1261 |     "name": "ml.trn1.32xlarge",
1262 |     "vcpuNum": 128
1263 |    },
1264 |    {
1265 |     "_defaultOrder": 59,
1266 |     "_isFastLaunch": false,
1267 |     "category": "Accelerated computing",
1268 |     "gpuNum": 0,
1269 |     "hideHardwareSpecs": false,
1270 |     "memoryGiB": 512,
1271 |     "name": "ml.trn1n.32xlarge",
1272 |     "vcpuNum": 128
1273 |    }
1274 |   ],
1275 |   "instance_type": "ml.t3.medium",
1276 |   "kernelspec": {
1277 |    "display_name": "Python 3 (ipykernel) (arn:aws:sagemaker:us-east-1:081325390199:image/sagemaker-data-science-310-v1)",
1278 |    "language": "python",
1279 |    "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:081325390199:image/sagemaker-data-science-310-v1"
1280 |   },
1281 |   "language_info": {
1282 |    "codemirror_mode": {
1283 |     "name": "ipython",
1284 |     "version": 3
1285 |    },
1286 |    "file_extension": ".py",
1287 |    "mimetype": "text/x-python",
1288 |    "name": "python",
1289 |    "nbconvert_exporter": "python",
1290 |    "pygments_lexer": "ipython3",
1291 |    "version": "3.10.6"
1292 |   }
1293 |  },
1294 |  "nbformat": 4,
1295 |  "nbformat_minor": 5
1296 | }
1297 | 


--------------------------------------------------------------------------------
/source/notebooks/notebooks.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-generative-ai-analysis-amazon-security-lake/e96047b6876806c18ddc1e54241367a2ce5e8c1c/source/notebooks/notebooks.zip


--------------------------------------------------------------------------------
/source/notebooks/requirements.txt:
--------------------------------------------------------------------------------
1 | langchain==0.1.11
2 | matplotlib==3.8.0
3 | pandas==1.4.4
4 | langchain-experimental==0.0.52
5 | sqlalchemy==1.4.47
6 | PyAthena[SQLAlchemy]==2.25.2


--------------------------------------------------------------------------------
/source/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "source",
 3 |   "version": "0.1.0",
 4 |   "bin": {
 5 |     "source": "bin/source.js"
 6 |   },
 7 |   "scripts": {
 8 |     "build": "tsc",
 9 |     "watch": "tsc -w",
10 |     "test": "jest",
11 |     "cdk": "cdk"
12 |   },
13 |   "devDependencies": {
14 |     "@types/jest": "^29.5.8",
15 |     "@types/node": "20.9.0",
16 |     "aws-cdk": "2.106.0",
17 |     "jest": "^29.7.0",
18 |     "ts-jest": "^29.1.1",
19 |     "ts-node": "^10.9.1",
20 |     "typescript": "~5.2.2"
21 |   },
22 |   "dependencies": {
23 |     "aws-cdk-lib": "2.106.0",
24 |     "cdk-nag": "^2.27.230",
25 |     "constructs": "^10.0.0",
26 |     "source-map-support": "^0.5.21"
27 |   }
28 | }
29 | 


--------------------------------------------------------------------------------
/source/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ES2018",
 4 |     "module": "commonjs",
 5 |     "lib": [
 6 |       "es2018"
 7 |     ],
 8 |     "declaration": true,
 9 |     "strict": true,
10 |     "noImplicitAny": true,
11 |     "strictNullChecks": true,
12 |     "noImplicitThis": true,
13 |     "alwaysStrict": true,
14 |     "noUnusedLocals": false,
15 |     "noUnusedParameters": false,
16 |     "noImplicitReturns": true,
17 |     "noFallthroughCasesInSwitch": false,
18 |     "inlineSourceMap": true,
19 |     "inlineSources": true,
20 |     "experimentalDecorators": true,
21 |     "strictPropertyInitialization": false,
22 |     "typeRoots": [
23 |       "./node_modules/@types"
24 |     ]
25 |   },
26 |   "exclude": [
27 |     "node_modules",
28 |     "cdk.out"
29 |   ]
30 | }
31 | 


--------------------------------------------------------------------------------