├── cdk ├── container │ └── front-end-app │ │ ├── utils │ │ ├── __init__.py │ │ ├── feedback.py │ │ └── generate.py │ │ ├── Dockerfile │ │ ├── requirements.txt │ │ └── app.py ├── .npmignore ├── .gitignore ├── bin │ └── cdk.ts ├── package.json ├── tsconfig.json ├── cdk.json ├── lambda │ ├── trace-analyzer │ │ └── lambda_function.py │ └── trace-extractor │ │ └── lambda_function.py └── lib │ └── cdk-stack.ts ├── images ├── Solution Overview.png ├── completed_prompt_screenshot.png └── completed_analysis_screenshot.png ├── CODE_OF_CONDUCT.md ├── LICENSE ├── CONTRIBUTING.md └── README.md /cdk/container/front-end-app/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /images/Solution Overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/genai-llm-application-monitoring-on-aws/HEAD/images/Solution Overview.png -------------------------------------------------------------------------------- /images/completed_prompt_screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/genai-llm-application-monitoring-on-aws/HEAD/images/completed_prompt_screenshot.png -------------------------------------------------------------------------------- /images/completed_analysis_screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/genai-llm-application-monitoring-on-aws/HEAD/images/completed_analysis_screenshot.png -------------------------------------------------------------------------------- /cdk/.npmignore: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | 4 | *.ts 5 | !*.d.ts 6 | 7 | # CDK asset staging directory 8 | .cdk.staging 9 | cdk.out 10 | -------------------------------------------------------------------------------- /cdk/container/front-end-app/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11 2 | EXPOSE 8501 3 | WORKDIR /app 4 | COPY requirements.txt ./requirements.txt 5 | RUN pip3 install --upgrade pip && pip3 install -r requirements.txt 6 | COPY . . 7 | 8 | CMD streamlit run app.py 9 | -------------------------------------------------------------------------------- /cdk/.gitignore: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | 4 | *.js 5 | !jest.config.js 6 | *.d.ts 7 | node_modules 8 | 9 | # CDK asset staging directory 10 | .cdk.staging 11 | cdk.out 12 | 13 | #Development/local files 14 | .DS_Store 15 | *dev* -------------------------------------------------------------------------------- /cdk/container/front-end-app/requirements.txt: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | streamlit==1.29.0 4 | boto3==1.33.11 5 | streamlit-cognito-auth==1.2.0 6 | langchain==0.1.7 7 | traceloop-sdk==0.11.1 8 | opentelemetry-instrumentation-bedrock 9 | boto3 -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /cdk/bin/cdk.ts: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | // SPDX-License-Identifier: MIT-0 5 | import 'source-map-support/register'; 6 | import * as cdk from 'aws-cdk-lib'; 7 | import { CdkStack } from '../lib/cdk-stack'; 8 | 9 | const app = new cdk.App(); 10 | new CdkStack(app, 'ObserveLLMStack', { 11 | env: { region: 'us-east-1' } 12 | }); -------------------------------------------------------------------------------- /cdk/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "cdk", 3 | "version": "0.1.0", 4 | "bin": { 5 | "cdk": "bin/cdk.js" 6 | }, 7 | "scripts": { 8 | "build": "tsc", 9 | "watch": "tsc -w", 10 | "cdk": "cdk" 11 | }, 12 | "devDependencies": { 13 | "@types/node": "20.11.5", 14 | "aws-cdk": "2.122.0", 15 | "ts-node": "^10.9.2", 16 | "typescript": "~5.3.3" 17 | }, 18 | "dependencies": { 19 | "aws-cdk-lib": "2.122.0", 20 | "constructs": "^10.0.0", 21 | "source-map-support": "^0.5.21" 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /cdk/container/front-end-app/utils/feedback.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | 4 | import streamlit as st 5 | from traceloop.sdk.decorators import task 6 | 7 | 8 | @task('cfn_user_feedback') 9 | def trace_feedback(feedback): 10 | if feedback == 'helpful': 11 | st.session_state.user_feedback = 1 12 | elif feedback == 'not helpful': 13 | st.session_state.user_feedback = -1 14 | output = { 15 | 'model_id': st.session_state.model_id, 16 | 'messages': st.session_state.messages, 17 | 'user_feedback': st.session_state.user_feedback, 18 | } 19 | return output 20 | -------------------------------------------------------------------------------- /cdk/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2020", 4 | "module": "commonjs", 5 | "lib": [ 6 | "es2020", 7 | "dom" 8 | ], 9 | "declaration": true, 10 | "strict": true, 11 | "noImplicitAny": true, 12 | "strictNullChecks": true, 13 | "noImplicitThis": true, 14 | "alwaysStrict": true, 15 | "noUnusedLocals": false, 16 | "noUnusedParameters": false, 17 | "noImplicitReturns": true, 18 | "noFallthroughCasesInSwitch": false, 19 | "inlineSourceMap": true, 20 | "inlineSources": true, 21 | "experimentalDecorators": true, 22 | "strictPropertyInitialization": false, 23 | "typeRoots": [ 24 | "./node_modules/@types" 25 | ] 26 | }, 27 | "exclude": [ 28 | "node_modules", 29 | "cdk.out" 30 | ] 31 | } 32 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT No Attribution 2 | 3 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so. 10 | 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 13 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 14 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 15 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 16 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 17 | 18 | -------------------------------------------------------------------------------- /cdk/container/front-end-app/utils/generate.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | 4 | from langchain_community.llms import Bedrock 5 | from langchain.prompts.prompt import PromptTemplate 6 | from traceloop.sdk.decorators import workflow, task 7 | import boto3 8 | 9 | # set up client for checking toxicity 10 | comprehend = boto3.client('comprehend') 11 | 12 | # prompt template 13 | template = """Human: You are an expert cloud engineer with a special focus on infrastructure as code with AWS. 14 | Your jobs is to write a cloud formation template to accomplish whatever the human asks of you and to respond to requests from the human for updates. 15 | 16 | Provide the output CloudFormation template in YAML format which is viewable as code in markdown. 17 | ALWAYS use the yaml formatting below when making a template for the human. 18 | ```yaml 19 | [CloudFormation Template here] 20 | ``` 21 | 22 | If the user response is not able to be accomplished in cloud formation, say "Sorry this is not supported by AWS CloudFormation" and given an explanation of why if you know exactly why it is not supported. 23 | 24 | Assistant: 25 | I can absolutely help with this. What would you like me to build for you? 26 | {chat_history} 27 | Assistant:""" 28 | prompt_template = PromptTemplate( 29 | input_variables=["user_input"], template=template 30 | ) 31 | 32 | def format_history(chat_history): 33 | history = '' 34 | for message in chat_history: 35 | if message['role'] == 'assistant': 36 | history += f"Assistant:\n{message['content']}\n\n" 37 | else: 38 | history += f"Human:\n{message['content']}\n\n" 39 | return history 40 | 41 | # generation function 42 | @task(name="check_toxicity") 43 | def check_toxicity(prompt): 44 | response = comprehend.detect_toxic_content( 45 | TextSegments=[ 46 | {'Text': prompt}, 47 | ], 48 | LanguageCode='en' 49 | ) 50 | labels = response['ResultList'][0]['Labels'] 51 | toxic = any(label['Score'] > 0.7 for label in labels) 52 | return toxic, labels 53 | 54 | # generation function 55 | @task(name="llm_call") 56 | def llm_call(prompt, model_id): 57 | llm = Bedrock(model_id=model_id, model_kwargs={'max_tokens_to_sample':2000}) 58 | out = llm.invoke(prompt) 59 | return out 60 | 61 | # function to generate cloud formation templates 62 | @workflow(name="generate_cfn") 63 | def generate_cfn(chat_history, model_id): 64 | 65 | # format prompt 66 | prompt = prompt_template.format_prompt(chat_history=format_history(chat_history)) 67 | 68 | # check toxicity 69 | toxic, labels = check_toxicity(chat_history[-1]['content']) 70 | if toxic: 71 | return "Sorry I will not respond when toxic inputs are detected." 72 | 73 | # generation call 74 | out = llm_call(prompt.text, model_id) 75 | return out 76 | -------------------------------------------------------------------------------- /cdk/cdk.json: -------------------------------------------------------------------------------- 1 | { 2 | "app": "npx ts-node --prefer-ts-exts bin/cdk.ts", 3 | "watch": { 4 | "include": [ 5 | "**" 6 | ], 7 | "exclude": [ 8 | "README.md", 9 | "cdk*.json", 10 | "**/*.d.ts", 11 | "**/*.js", 12 | "tsconfig.json", 13 | "package*.json", 14 | "yarn.lock", 15 | "node_modules", 16 | "test" 17 | ] 18 | }, 19 | "context": { 20 | "@aws-cdk/aws-lambda:recognizeLayerVersion": true, 21 | "@aws-cdk/core:checkSecretUsage": true, 22 | "@aws-cdk/core:target-partitions": [ 23 | "aws", 24 | "aws-cn" 25 | ], 26 | "@aws-cdk-containers/ecs-service-extensions:enableDefaultLogDriver": true, 27 | "@aws-cdk/aws-ec2:uniqueImdsv2TemplateName": true, 28 | "@aws-cdk/aws-ecs:arnFormatIncludesClusterName": true, 29 | "@aws-cdk/aws-iam:minimizePolicies": true, 30 | "@aws-cdk/core:validateSnapshotRemovalPolicy": true, 31 | "@aws-cdk/aws-codepipeline:crossAccountKeyAliasStackSafeResourceName": true, 32 | "@aws-cdk/aws-s3:createDefaultLoggingPolicy": true, 33 | "@aws-cdk/aws-sns-subscriptions:restrictSqsDescryption": true, 34 | "@aws-cdk/aws-apigateway:disableCloudWatchRole": true, 35 | "@aws-cdk/core:enablePartitionLiterals": true, 36 | "@aws-cdk/aws-events:eventsTargetQueueSameAccount": true, 37 | "@aws-cdk/aws-iam:standardizedServicePrincipals": true, 38 | "@aws-cdk/aws-ecs:disableExplicitDeploymentControllerForCircuitBreaker": true, 39 | "@aws-cdk/aws-iam:importedRoleStackSafeDefaultPolicyName": true, 40 | "@aws-cdk/aws-s3:serverAccessLogsUseBucketPolicy": true, 41 | "@aws-cdk/aws-route53-patters:useCertificate": true, 42 | "@aws-cdk/customresources:installLatestAwsSdkDefault": false, 43 | "@aws-cdk/aws-rds:databaseProxyUniqueResourceName": true, 44 | "@aws-cdk/aws-codedeploy:removeAlarmsFromDeploymentGroup": true, 45 | "@aws-cdk/aws-apigateway:authorizerChangeDeploymentLogicalId": true, 46 | "@aws-cdk/aws-ec2:launchTemplateDefaultUserData": true, 47 | "@aws-cdk/aws-secretsmanager:useAttachedSecretResourcePolicyForSecretTargetAttachments": true, 48 | "@aws-cdk/aws-redshift:columnId": true, 49 | "@aws-cdk/aws-stepfunctions-tasks:enableEmrServicePolicyV2": true, 50 | "@aws-cdk/aws-ec2:restrictDefaultSecurityGroup": true, 51 | "@aws-cdk/aws-apigateway:requestValidatorUniqueId": true, 52 | "@aws-cdk/aws-kms:aliasNameRef": true, 53 | "@aws-cdk/aws-autoscaling:generateLaunchTemplateInsteadOfLaunchConfig": true, 54 | "@aws-cdk/core:includePrefixInUniqueNameGeneration": true, 55 | "@aws-cdk/aws-efs:denyAnonymousAccess": true, 56 | "@aws-cdk/aws-opensearchservice:enableOpensearchMultiAzWithStandby": true, 57 | "@aws-cdk/aws-lambda-nodejs:useLatestRuntimeVersion": true, 58 | "@aws-cdk/aws-efs:mountTargetOrderInsensitiveLogicalId": true, 59 | "@aws-cdk/aws-rds:auroraClusterChangeScopeOfInstanceParameterGroupWithEachParameters": true, 60 | "@aws-cdk/aws-appsync:useArnForSourceApiAssociationIdentifier": true, 61 | "@aws-cdk/aws-rds:preventRenderingDeprecatedCredentials": true, 62 | "@aws-cdk/aws-codepipeline-actions:useNewDefaultBranchForCodeCommitSource": true 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *main* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | -------------------------------------------------------------------------------- /cdk/container/front-end-app/app.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | 4 | import streamlit as st 5 | from utils.generate import generate_cfn 6 | from utils.feedback import trace_feedback 7 | from traceloop.sdk import Traceloop 8 | import boto3 9 | import os 10 | import json 11 | 12 | # set up telemetry tracking 13 | TRACELOOP_LOCAL_TESTING_URL="http://127.0.0.1:4318" 14 | TRACELOOP_BASE_URL = os.getenv('TRACELOOP_BASE_URL', TRACELOOP_LOCAL_TESTING_URL) 15 | os.environ['TRACELOOP_BASE_URL'] = TRACELOOP_BASE_URL 16 | Traceloop.init(app_name="llm-app-2") 17 | 18 | # add title and rest of application 19 | st.title("Text-to-DSL Observable Application") 20 | 21 | # subtitle 22 | st.markdown(""" 23 | The goal of this application is to generate an AWS CloudFormation Template 24 | which can be chatted with and changed as you go. Ask the assistant to generate something 25 | which is useful in AWS! 26 | """ 27 | ) 28 | 29 | # set up session variables 30 | if "messages" not in st.session_state: 31 | st.session_state.messages = [] 32 | st.session_state.user_feedback = 0 33 | st.session_state.model_id = "anthropic.claude-instant-v1" 34 | 35 | # Display chat messages from history on app rerun 36 | for message in st.session_state.messages: 37 | with st.chat_message(message["role"]): 38 | st.markdown(message["content"]) 39 | 40 | # Accept user input 41 | if prompt := st.chat_input("What do you want your AWS CloudFormation template to do?"): 42 | 43 | # assume user feedback to be neutral to start 44 | st.session_state.user_feedback = 0 45 | 46 | # Add user message to chat history 47 | st.session_state.messages.append({"role": "user", "content": prompt}) 48 | 49 | # Display user message in chat message container 50 | with st.chat_message("user"): 51 | st.markdown(prompt) 52 | 53 | # Display assistant response in chat message container 54 | with st.chat_message("assistant"): 55 | response = generate_cfn(st.session_state.messages, st.session_state.model_id) 56 | st.markdown(response) 57 | 58 | # save the session state message for the next prompt 59 | if response == "Sorry I will not respond when toxic inputs are detected.": 60 | st.session_state.messages = [] 61 | st.session_state.messages.append({"role": "assistant", "content": response}) 62 | st.session_state.messages.append({"role": "user", "content": "----- REDACTED -----"}) 63 | else: 64 | st.session_state.messages.append({"role": "assistant", "content": response}) 65 | 66 | # add the user feedback sidebar 67 | with st.sidebar: 68 | 69 | # display like buttons 70 | st.markdown('## Your feedback is always appreciated!') 71 | if len(st.session_state.messages) == 0: 72 | st.write('Once you start generating CloudFormation Templates you will be able to provide feedback.') 73 | else: 74 | st.button('This response was helpful', on_click=trace_feedback, args=['helpful']) 75 | st.button('This assistant was NOT helpful', on_click=trace_feedback, args=['not helpful']) 76 | 77 | # display confirmation message 78 | if st.session_state.user_feedback == 1: 79 | st.write('Thank you for your feedback - we are glad this app is helping!') 80 | elif st.session_state.user_feedback == -1: 81 | st.write('Thank you for your feedback - we will continuously work to improve our service for you.') 82 | -------------------------------------------------------------------------------- /cdk/lambda/trace-analyzer/lambda_function.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | 4 | import json 5 | import boto3 6 | from pprint import pprint 7 | import os 8 | 9 | # boto3 configurations to cloudformation firehose and bedrock runtime 10 | cfn_client = boto3.client('cloudformation') 11 | bedrock = boto3.client('bedrock-runtime', region_name='us-west-2') 12 | fh_client = boto3.client('firehose') 13 | fh_stream_name = os.environ['FIREHOSE_STREAM_NAME'] 14 | 15 | # evaluation template for llm self evaluation 16 | cfn_evaluation_template ='''Human: You are an expert AWS cloud engineer who knows everything about AWS and infrastructure as code. 17 | Your job is to evaluate the following cloud formation template which was provided to a human (H) by an assistant (H) based on the conversational context below. 18 | Here is the original conversation right before the template was provided. 19 | 20 | 21 | {conversation} 22 | 23 | 24 | 25 | {template} 26 | 27 | 28 | Answer the following questions in a markdown numbered list where each answer contains only one word "yes" or "no". 29 | Preserve the order of the questions in your answered list. 30 | 31 | 1. Are there any glaring security issues in the template? 32 | 2. Does the template accomplish what the human was asking for? 33 | 34 | Assistant: 35 | Here are the answers to your questions. 36 | ''' 37 | 38 | nullable_keys = ['valid_template', 'llm_security_issue_found', 'llm_answered_question'] 39 | 40 | def validate_cloudformation_template(template_body): 41 | try: 42 | response = cfn_client.validate_template(TemplateBody=template_body) 43 | return True 44 | except Exception as e: 45 | return False 46 | 47 | def parse_llm_eval(completion): 48 | try: 49 | first_split = completion.split('1. ')[1] 50 | second_split = first_split.split('2. ')[1] 51 | first_question = first_split.split('\n')[0].strip().lower() 52 | second_question = second_split.split('\n')[0].strip().lower() 53 | first_question = 'yes' if 'yes' in first_question else 'no' 54 | second_question = 'yes' if 'yes' in second_question else 'no' 55 | return [first_question, second_question] 56 | except Exception as e: 57 | print(e) 58 | return '', '' 59 | 60 | def analyze_llm(trace): 61 | analysis = {} 62 | 63 | # basic dialogue metrics 64 | analysis['dialogue_turns'] = len(trace['full_prompt'].split('Human:')) - 2 65 | 66 | # extraction and validation of cloud formation template 67 | if '```yaml' in trace['completion']: 68 | cfn_yaml = trace['completion'].split('```yaml')[-1].split('```')[0] 69 | 70 | conversation = trace['full_prompt'].replace('Human:', 'H:').replace('Assistant:', 'A:') 71 | 72 | # validate the template 73 | analysis['valid_template'] = validate_cloudformation_template(cfn_yaml) 74 | 75 | # advanced llm self evaluations 76 | eval_prompt = cfn_evaluation_template.replace('{template}', cfn_yaml).replace('{conversation}',conversation) 77 | body = json.dumps({ 78 | "prompt": eval_prompt, 79 | "max_tokens_to_sample": 100, 80 | "temperature": 0.9 81 | }) 82 | response = bedrock.invoke_model( 83 | body=body, 84 | modelId="anthropic.claude-instant-v1", 85 | accept='application/json', 86 | contentType='application/json' 87 | ) 88 | response_body = json.loads(response['body'].read()) 89 | completion = response_body['completion'] 90 | answers = parse_llm_eval(completion) 91 | analysis['llm_security_issue_found'] = answers[0] 92 | analysis['llm_answered_question'] = answers[1] 93 | 94 | else: 95 | analysis['valid_template'] = None 96 | 97 | return analysis 98 | 99 | def analyze_toxicity(trace): 100 | return {} 101 | 102 | def analyze_feedback(trace): 103 | return {} 104 | 105 | def lambda_handler(event, context): 106 | 107 | print(event) 108 | 109 | # load the trace and ensure it is is valid 110 | traces = json.loads(event['body']) 111 | for trace in traces: 112 | 113 | # analyze important information 114 | if trace['task'] == 'llm_call': 115 | output = analyze_llm(trace) 116 | if 'toxicity' in trace['task']: 117 | output = analyze_toxicity(trace) 118 | elif 'feedback' in trace['task']: 119 | output = analyze_feedback(trace) 120 | trace.update(output) 121 | for key in nullable_keys: 122 | if key not in trace.keys(): 123 | trace[key] = None 124 | 125 | print(trace) 126 | 127 | # prepare records for firehose 128 | fh_stream_records = [] 129 | fh_stream_records.append({'Data': (json.dumps(trace) + "\n").encode('utf-8')}) 130 | 131 | # send the trace to firehose 132 | fh_client.put_record_batch( DeliveryStreamName=fh_stream_name, Records=fh_stream_records) 133 | 134 | # return the analyzed trace 135 | return { 136 | 'statusCode': 200, 137 | 'body': 'complete' 138 | } 139 | -------------------------------------------------------------------------------- /cdk/lambda/trace-extractor/lambda_function.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | 4 | import json 5 | import boto3 6 | from pprint import pprint 7 | import os 8 | from urllib.parse import unquote 9 | 10 | def format_history(chat_history): 11 | history = '' 12 | for message in chat_history: 13 | if message['role'] == 'assistant': 14 | history += f"Assistant:\n{message['content']}\n\n" 15 | else: 16 | history += f"Human:\n{message['content']}\n\n" 17 | return history 18 | 19 | def extract_info_from_trace(trace): 20 | 21 | # start key values 22 | app = trace['resource']['attributes'][0]['value']['stringValue'] 23 | records = [] 24 | nullable_keys = [ 25 | 'user_input', 'toxicity_detected', # toxicity 26 | 'full_prompt', 'model', 'completion', # llm generation task 27 | 'feedback', 'model', 'conversation', # user feedback 28 | ] 29 | 30 | # drill down to the traceloop task values only 31 | for scope in trace['scopeSpans']: 32 | if scope['scope']['name'] == 'traceloop.tracer': 33 | for span in scope['spans']: 34 | if 'task' in span['name']: 35 | event_data = {} 36 | 37 | # pull out base data and store it 38 | start_time = span['startTimeUnixNano'] 39 | end_time = span['endTimeUnixNano'] 40 | trace_id = span['traceId'] 41 | 42 | event_data['start_time'] = start_time 43 | event_data['end_time'] = end_time 44 | event_data['trace_id'] = trace_id 45 | 46 | # always get some general information from each task 47 | for att in span['attributes']: 48 | if att['key'] == 'traceloop.workflow.name': 49 | event_data['workflow'] = att['value']['stringValue'] 50 | if att['key'] == 'traceloop.entity.name': 51 | event_data['task'] = att['value']['stringValue'] 52 | 53 | 54 | 55 | # now go back for specific information to each type of task 56 | for att in span['attributes']: 57 | 58 | # user feedback specific values 59 | if 'feedback' in event_data['task']: 60 | 61 | if att['key'] == 'traceloop.entity.input': 62 | event_data['feedback'] = json.loads(att['value']['stringValue'])['args'][0] 63 | 64 | if att['key'] == 'traceloop.entity.output': 65 | event_data['model'] = json.loads(att['value']['stringValue'])['model_id'] 66 | event_data['conversation'] = format_history(json.loads(att['value']['stringValue'])['messages']) 67 | 68 | # specific toxicity measures 69 | elif 'toxicity' in event_data['task']: 70 | if att['key'] == 'traceloop.entity.input': 71 | event_data['user_input'] = json.loads(att['value']['stringValue'])['args'][0] 72 | if att['key'] == 'traceloop.entity.output': 73 | event_data['toxicity_detected'] = json.loads(att['value']['stringValue'])[0] 74 | 75 | # generation outputs 76 | elif 'llm_call' in event_data['task']: 77 | if att['key'] == 'traceloop.entity.input': 78 | event_data['full_prompt'] = json.loads(att['value']['stringValue'])['args'][0] 79 | event_data['model'] = json.loads(att['value']['stringValue'])['args'][1] 80 | if att['key'] == 'traceloop.entity.output': 81 | event_data['completion'] = json.loads(att['value']['stringValue']) 82 | 83 | # ensure you have all the keys in the json file 84 | for key in nullable_keys: 85 | if key not in event_data.keys(): 86 | event_data[key] = None 87 | 88 | # store the information which was extracted 89 | records.append(event_data) 90 | return records 91 | 92 | def lambda_handler(event, context): 93 | 94 | print(event) 95 | 96 | # Get the S3 bucket and key from the event 97 | bucket = event['Records'][0]['s3']['bucket']['name'] 98 | key = unquote(event['Records'][0]['s3']['object']['key']) 99 | print(bucket, key) 100 | 101 | # Download the JSON file from S3 102 | s3_client = boto3.client('s3') 103 | response = s3_client.get_object(Bucket=bucket, Key=key) 104 | json_content = response['Body'].read().decode('utf-8') 105 | 106 | # Process the JSON data 107 | data = json.loads(json_content)['resourceSpans'][0] 108 | 109 | # perform extraction 110 | output = extract_info_from_trace(data) 111 | 112 | return { 113 | 'statusCode': 200, 114 | 'body': json.dumps(output) 115 | } 116 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Observing and Monitoring LLM Applications on AWS 2 | 3 | ## Introduction 4 | In this repository, we'll outline and provide a sample framework for monitoring your Generative AI based applications on AWS. This includes how and what to monitor for your application, users, and LLM(s). 5 | 6 | The use case we will be using throughout this repository is creating a simple conversational interface for generating [AWS CloudFormation](https://aws.amazon.com/cloudformation/) templates through natural language instructions. 7 | 8 | **NOTE) The architecture in this repository is for development purposes only and will incur costs.** 9 | 10 | ## Walkthrough 11 | For the demonstration in this repository, you will send your prompts from a [Streamlit](https://streamlit.io/), hosted on [Amazon Elastic Container Service (ECS)](https://aws.amazon.com/ecs/), front-end application to [Amazon Bedrock](https://aws.amazon.com/bedrock/). All prompts, responses, metrics and analysis will be stored in [Amazon Simple Storage Service (S3)](https://aws.amazon.com/s3/). The following is a detailed outline of the architecture in this repository: 12 | 1. A user logs in to the front-end application using an [Amazon Cognito](https://aws.amazon.com/cognito/) identity 13 | 2. User prompts are submitted from the front-end application to a large language model (LLM) on Amazon Bedrock. These prompt input and outputs are tracked using [OpenLLMetry](https://github.com/traceloop/openllmetry) which is an open source telemetry tool designed specifically for monitoring LLM applications 14 | 3. The LLM response is returned to the user and the conversational memory is preserved in the front-end application 15 | 4. Using the open source [OpenTelemetry collector container](https://opentelemetry.io/docs/collector/quick-start/), ECS exports OpenLLMetry traces to S3 and [Amazon CloudWatch](https://aws.amazon.com/pm/cloudwatch/) 16 | 5. These traces are then post-processed by an [Amazon Lambda function](https://aws.amazon.com/pm/lambda) which extracts relevant information from the trace JSON packets 17 | 6. The post-processed information is then sent to various APIs for evaluation of the model responses. In this example, we used LLMs to evaluate the outputs of the original LLM through Amazon Bedrock. See more about this LLM-as-a-judge pattern [here](https://huggingface.co/learn/cookbook/en/llm_judge). Furthermore, we also validate any CloudFormation templates created via the [ValidateTemplate API from CloudFormation](https://docs.aws.amazon.com/AWSCloudFormation/latest/APIReference/API_ValidateTemplate.html) 18 | 7. The results of this evaluation are then sent to [Amazon Data Firehose](https://aws.amazon.com/firehose/) to be written to Amazon S3 for consumption in downstream systems including, but not limited to, reporting stacks, analytics ecosystems, or LLM feedback mechanisms 19 | 20 | The illustration below details what this solution will look like once fully implemented. 21 | 22 | 23 | 24 |
25 | 26 | ### Prerequisites 27 | To follow through this repository, you will need an AWS account, an Amazon Bedrock supported region, permissions to create AWS Identity and Access Management (IAM) roles and policies, create AWS Lambda Functions, create Amazon ECS Cluster, create Amazon Elastic Container Registry (ECR) , create Amazon S3 buckets, create Amazon Data Firehose streams, access to [Amazon Bedrock](https://docs.aws.amazon.com/bedrock/latest/userguide/setting-up.html) and access to the AWS CLI. In addition, you will need an exiting hosted zone in [Amazon Route53](https://aws.amazon.com/route53/) and existing wildcard certificate in [AWS Certificate Manager (ACM)](https://aws.amazon.com/certificate-manager/). Finally, you will need [Docker](https://www.docker.com/), [Node.js](https://nodejs.org/en) and the [AWS Cloud Development Kit (CDK)](https://docs.aws.amazon.com/cdk/v2/guide/getting_started.html) installed locally. We also assume you have familiar with the basics of Linux bash commands. 28 | 29 | ### Step 1: Create the AWS CDK stack in your AWS account (AWS CDK) 30 | 31 | 1. Create two subdomains under your Amazon Route53 hosted zone. One will be for the Application Load Balancer and the other will be for the front-end application URL. For example, if your hosted zone is `foo.com`, you could pick: 32 | 33 | * `alb1.foo.com` 34 | * `app1.foo.com` 35 | 36 | 2. Run the AWS CDK commands below to deploy this application. 37 | 1. NOTE) If this is your first time deploying this application, make sure you install all Node.js packages by running ```npm install``` below from the `cdk` folder 38 | 39 | ```bash 40 | #Ensure you update all placeholder values in the commands below 41 | 42 | #Change directories to the cdk directory 43 | cd cdk 44 | 45 | #Bootstrap your environment 46 | cdk bootstrap \ 47 | -c domainPrefix= \ 48 | -c appCustomDomainName= \ 49 | -c loadBalancerOriginCustomDomainName= \ 50 | -c customDomainRoute53HostedZoneID= \ 51 | -c customDomainRoute53HostedZoneName= \ 52 | -c customDomainCertificateArn= 53 | 54 | #Synthesize your environment 55 | cdk synth \ 56 | -c domainPrefix= \ 57 | -c appCustomDomainName= \ 58 | -c loadBalancerOriginCustomDomainName= \ 59 | -c customDomainRoute53HostedZoneID= \ 60 | -c customDomainRoute53HostedZoneName= \ 61 | -c customDomainCertificateArn= 62 | 63 | #Deploy your stack 64 | cdk deploy --all \ 65 | --stack-name ObserveLLMStack \ 66 | -c domainPrefix= \ 67 | -c appCustomDomainName= \ 68 | -c loadBalancerOriginCustomDomainName= \ 69 | -c customDomainRoute53HostedZoneID= \ 70 | -c customDomainRoute53HostedZoneName= \ 71 | -c customDomainCertificateArn= 72 | ``` 73 | 74 | ### Step 2: Grant model access for Anthropic Claude Instant in Amazon Bedrock (AWS Console) 75 | 1. Ensure you have [granted model access](https://docs.aws.amazon.com/bedrock/latest/userguide/model-access.html#model-access-add) to the ```Anthropic Claude Instant``` model in Amazon Bedrock. 76 | 77 | ### Step 3: Create Amazon Cognito identity (AWS CLI) 78 | 79 | 1. Create a Amazon Cognito user in the user pool created in the last step. 80 | ```aws cognito-idp admin-create-user --user-pool-id --username --temporary-password ``` 81 | 82 | ### Step 4: Open the front-end application (Web browser) 83 | 84 | 1. Open the front-end application using the URL given in the CDK output `AppURL`. You will need to login with the Amazon Cognito identity created in step 2. You will be prompted to update the password. 85 | 86 | ### Step 5: Submit a prompt to create an AWS CloudFormation template (Web browser) 87 | 1. Submit a prompt such as ```Create an ECS cluster``` into the front-end application 88 | 2. From here, you should get a response similar to the image below. 89 | 90 | 91 | ### Step 6: Review completed analysis in Amazon S3 analysis bucket (AWS Console) 92 | 1. Navigate to your Amazon S3 bucket and open the folder ```otel-trace-analysis``` 93 | 2. Navigate the folder structure using the latest year, month and day folders 94 | 3. Review the latest analysis file to see the prompts, responses, metrics and analysis collected. Your analysis file should look similar to the image below. 95 | 96 | 97 | ## Cleaning up 98 | Be sure to remove the resources created in this repository to avoid continued charges. Run the following commands to delete these resources: 99 | 1. ```cdk destroy --all --stack-name ObserveLLMStack ``` 100 | 2. ```aws admin-delete-user --user-pool-id --username ``` 101 | 102 | ## Security 103 | 104 | See [CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications) for more information. 105 | 106 | ## License 107 | 108 | This library is licensed under the MIT-0 License. See the LICENSE file. -------------------------------------------------------------------------------- /cdk/lib/cdk-stack.ts: -------------------------------------------------------------------------------- 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | // SPDX-License-Identifier: MIT-0 3 | 4 | import * as cdk from 'aws-cdk-lib'; 5 | import { ListenerAction, ApplicationProtocol, ListenerCondition } from 'aws-cdk-lib/aws-elasticloadbalancingv2'; 6 | import * as acm from 'aws-cdk-lib/aws-certificatemanager'; 7 | import * as ssm from 'aws-cdk-lib/aws-ssm'; 8 | import * as route53 from 'aws-cdk-lib/aws-route53'; 9 | import { Construct } from 'constructs'; 10 | import * as s3 from 'aws-cdk-lib/aws-s3'; 11 | import * as s3Notifications from 'aws-cdk-lib/aws-s3-notifications'; 12 | import * as lambdaDestinations from 'aws-cdk-lib/aws-lambda-destinations'; 13 | import * as ec2 from 'aws-cdk-lib/aws-ec2'; 14 | import * as ecs from 'aws-cdk-lib/aws-ecs'; 15 | import * as iam from 'aws-cdk-lib/aws-iam'; 16 | import * as lambda from 'aws-cdk-lib/aws-lambda'; 17 | import * as ecsPatterns from 'aws-cdk-lib/aws-ecs-patterns'; 18 | import { DockerImageAsset, Platform } from 'aws-cdk-lib/aws-ecr-assets'; 19 | import * as logs from 'aws-cdk-lib/aws-logs'; 20 | import * as kinesisfirehose from 'aws-cdk-lib/aws-kinesisfirehose'; 21 | import { HttpOrigin } from 'aws-cdk-lib/aws-cloudfront-origins'; 22 | import { 23 | Distribution, ViewerProtocolPolicy, OriginProtocolPolicy, AllowedMethods, CachePolicy, 24 | OriginRequestPolicy, OriginRequestCookieBehavior, OriginRequestHeaderBehavior, OriginRequestQueryStringBehavior 25 | } from 'aws-cdk-lib/aws-cloudfront'; 26 | import { CloudFrontTarget } from 'aws-cdk-lib/aws-route53-targets'; 27 | import { UserPool, UserPoolClientIdentityProvider, OAuthScope } from 'aws-cdk-lib/aws-cognito'; 28 | import { AuthenticateCognitoAction } from 'aws-cdk-lib/aws-elasticloadbalancingv2-actions'; 29 | 30 | export class CdkStack extends cdk.Stack { 31 | constructor(scope: Construct, id: string, props?: cdk.StackProps) { 32 | super(scope, id, props); 33 | 34 | // Amazon S3 Bucket 35 | const contentBucket = new s3.Bucket(this, 'DocumentsBucket', { 36 | blockPublicAccess: s3.BlockPublicAccess.BLOCK_ALL, 37 | versioned: false, 38 | encryption: s3.BucketEncryption.S3_MANAGED, 39 | serverAccessLogsPrefix: 'accesslogs/', 40 | enforceSSL: true, 41 | objectOwnership: s3.ObjectOwnership.BUCKET_OWNER_PREFERRED, 42 | eventBridgeEnabled: true, 43 | }); 44 | 45 | // Amazon Virtual Private Cloud (Amazon VPC) 46 | const vpc = new ec2.Vpc(this, 'VPC', { 47 | natGateways: 1, 48 | gatewayEndpoints: { 49 | S3: { 50 | service: ec2.GatewayVpcEndpointAwsService.S3, 51 | }, 52 | }, 53 | }); 54 | vpc.addFlowLog('FlowLogS3', { 55 | destination: ec2.FlowLogDestination.toS3(contentBucket, 'flowlogs/') 56 | }); 57 | vpc.addInterfaceEndpoint('EcrDockerEndpoint', { 58 | service: ec2.InterfaceVpcEndpointAwsService.ECR_DOCKER, 59 | }); 60 | vpc.addInterfaceEndpoint('KmsEndpoint', { 61 | service: ec2.InterfaceVpcEndpointAwsService.KMS, 62 | }); 63 | 64 | const appRole = new iam.Role(this, 'AppRole', { 65 | assumedBy: new iam.ServicePrincipal('ecs-tasks.amazonaws.com') 66 | }); 67 | 68 | 69 | // Amazon Elastic Container Service (Amazon ECS) 70 | const cluster = new ecs.Cluster(this, 'Cluster', { 71 | vpc, 72 | enableFargateCapacityProviders: true, 73 | containerInsights: true 74 | }); 75 | 76 | 77 | // Amazon Data Firehose 78 | const fhLogGroup = new logs.LogGroup(this, 'FhLogGroup', { 79 | retention: logs.RetentionDays.ONE_WEEK, 80 | }); 81 | const fhLogStream = new logs.LogStream(this, 'FhLogStream', { 82 | logGroup: fhLogGroup, 83 | removalPolicy: cdk.RemovalPolicy.DESTROY, 84 | }); 85 | const fhLogStreamPrompts = new logs.LogStream(this, 'FhLogStreamPrompts', { 86 | logGroup: fhLogGroup, 87 | removalPolicy: cdk.RemovalPolicy.DESTROY, 88 | }); 89 | const fhLogStreamGT = new logs.LogStream(this, 'App1FhLogStreamGT', { 90 | logGroup: fhLogGroup, 91 | removalPolicy: cdk.RemovalPolicy.DESTROY, 92 | }); 93 | const fhLogStreamOtelAnalysis = new logs.LogStream(this, 'FhLogStreamOtelAnalysis', { 94 | logGroup: fhLogGroup, 95 | removalPolicy: cdk.RemovalPolicy.DESTROY, 96 | }); 97 | const fhRole = new iam.Role(this, 'FhRole', { 98 | assumedBy: new iam.ServicePrincipal('firehose.amazonaws.com'), 99 | }); 100 | contentBucket.grantReadWrite(fhRole); 101 | fhRole.addToPolicy( 102 | new iam.PolicyStatement({ 103 | effect: iam.Effect.ALLOW, 104 | actions: [ 105 | 'logs:PutLogEvents', 106 | ], 107 | resources: ['*'] 108 | }) 109 | ); 110 | const s3DestinationConfigurationProperty: kinesisfirehose.CfnDeliveryStream.S3DestinationConfigurationProperty = { 111 | bucketArn: contentBucket.bucketArn, 112 | roleArn: fhRole.roleArn, 113 | bufferingHints: { 114 | intervalInSeconds: 60, 115 | sizeInMBs: 5, 116 | }, 117 | cloudWatchLoggingOptions: { 118 | enabled: true, 119 | logGroupName: fhLogGroup.logGroupName, 120 | logStreamName: fhLogStream.logStreamName 121 | }, 122 | compressionFormat: 'GZIP', 123 | prefix: 'embeddingarchive/app1/', 124 | }; 125 | const s3DestinationConfigurationPropertyPrompts: kinesisfirehose.CfnDeliveryStream.S3DestinationConfigurationProperty = { 126 | bucketArn: contentBucket.bucketArn, 127 | roleArn: fhRole.roleArn, 128 | bufferingHints: { 129 | intervalInSeconds: 60, 130 | sizeInMBs: 5, 131 | }, 132 | cloudWatchLoggingOptions: { 133 | enabled: true, 134 | logGroupName: fhLogGroup.logGroupName, 135 | logStreamName: fhLogStreamPrompts.logStreamName 136 | }, 137 | compressionFormat: 'GZIP', 138 | prefix: 'promptarchive/app1/', 139 | }; 140 | const s3DestinationConfigurationPropertyGT: kinesisfirehose.CfnDeliveryStream.S3DestinationConfigurationProperty = { 141 | bucketArn: contentBucket.bucketArn, 142 | roleArn: fhRole.roleArn, 143 | bufferingHints: { 144 | intervalInSeconds: 60, 145 | sizeInMBs: 5, 146 | }, 147 | cloudWatchLoggingOptions: { 148 | enabled: true, 149 | logGroupName: fhLogGroup.logGroupName, 150 | logStreamName: fhLogStreamGT.logStreamName 151 | }, 152 | compressionFormat: 'GZIP', 153 | prefix: 'gtarchive/app1/', 154 | }; 155 | const s3DestinationConfigurationPropertyOtelAnalysis: kinesisfirehose.CfnDeliveryStream.S3DestinationConfigurationProperty = { 156 | bucketArn: contentBucket.bucketArn, 157 | roleArn: fhRole.roleArn, 158 | bufferingHints: { 159 | intervalInSeconds: 60, 160 | sizeInMBs: 5, 161 | }, 162 | cloudWatchLoggingOptions: { 163 | enabled: true, 164 | logGroupName: fhLogGroup.logGroupName, 165 | logStreamName: fhLogStreamOtelAnalysis.logStreamName 166 | }, 167 | compressionFormat: 'UNCOMPRESSED', 168 | prefix: 'otel-trace-analysis/', 169 | }; 170 | const fh_embed = new kinesisfirehose.CfnDeliveryStream(this, "Firehose", { 171 | deliveryStreamType: "DirectPut", 172 | s3DestinationConfiguration: s3DestinationConfigurationProperty 173 | }); 174 | const fh_prompts = new kinesisfirehose.CfnDeliveryStream(this, "FirehosePrompts", { 175 | deliveryStreamType: "DirectPut", 176 | s3DestinationConfiguration: s3DestinationConfigurationPropertyPrompts 177 | }); 178 | const fh_gt = new kinesisfirehose.CfnDeliveryStream(this, "FirehoseGTApp1", { 179 | deliveryStreamType: "DirectPut", 180 | s3DestinationConfiguration: s3DestinationConfigurationPropertyGT 181 | }); 182 | const fh_otel_analysis = new kinesisfirehose.CfnDeliveryStream(this, "FirehoseOtelAnalysis", { 183 | deliveryStreamType: "DirectPut", 184 | s3DestinationConfiguration: s3DestinationConfigurationPropertyOtelAnalysis 185 | }); 186 | 187 | // Open Telemetry Collection Amazon ECS task 188 | const adotRole = new iam.Role(this, "adottaskrole", { 189 | assumedBy: new iam.ServicePrincipal("ecs-tasks.amazonaws.com") 190 | }); 191 | adotRole.addManagedPolicy( 192 | iam.ManagedPolicy.fromAwsManagedPolicyName("AmazonS3FullAccess") 193 | ); 194 | adotRole.addManagedPolicy( 195 | iam.ManagedPolicy.fromAwsManagedPolicyName("AWSXRayDaemonWriteAccess") 196 | ); 197 | adotRole.addManagedPolicy( 198 | iam.ManagedPolicy.fromAwsManagedPolicyName("CloudWatchLogsFullAccess") 199 | ); 200 | const adotTaskDefinition = new ecs.FargateTaskDefinition(this, "ADOT", { 201 | taskRole: adotRole, 202 | cpu: 512, 203 | memoryLimitMiB: 2048 204 | }); 205 | const adotConfig = new ssm.StringParameter(this, "adotconfig", { 206 | parameterName: 'otel-collector-config', 207 | stringValue: ` 208 | receivers: 209 | otlp: 210 | protocols: 211 | grpc: 212 | endpoint: 0.0.0.0:4317 213 | http: 214 | endpoint: 0.0.0.0:4318 215 | 216 | processors: 217 | batch: 218 | 219 | exporters: 220 | awsxray: 221 | region: ${this.region} 222 | logging: 223 | loglevel: debug 224 | awss3: 225 | s3uploader: 226 | region: '${this.region}' 227 | s3_bucket: '${contentBucket.bucketName}' 228 | s3_prefix: 'otel-traces' 229 | s3_partition: 'minute' 230 | 231 | extensions: 232 | sigv4auth: 233 | region: ${this.region} 234 | 235 | service: 236 | extensions: [sigv4auth] 237 | pipelines: 238 | traces: 239 | receivers: [otlp] 240 | processors: [batch] 241 | exporters: [awsxray, awss3] 242 | ` 243 | }) 244 | const adotContainer = adotTaskDefinition.addContainer("AdotContainer", { 245 | image: ecs.ContainerImage.fromRegistry("otel/opentelemetry-collector-contrib:0.95.0"), 246 | command: ["--config=env:OTEL_CONFIG"], 247 | secrets: { 248 | OTEL_CONFIG: ecs.Secret.fromSsmParameter(adotConfig) 249 | }, 250 | logging: ecs.LogDriver.awsLogs({ streamPrefix: "adot" }) 251 | }); 252 | adotContainer.addPortMappings({ 253 | containerPort: 4318, 254 | hostPort: 4318, 255 | protocol: ecs.Protocol.TCP, 256 | appProtocol: ecs.AppProtocol.http2, 257 | name: "adot-4318-tcp" 258 | }); 259 | const adotService = new ecsPatterns.NetworkLoadBalancedFargateService(this, "ADOTService", { 260 | serviceName: "adsotsvc", 261 | cluster, 262 | taskDefinition: adotTaskDefinition, 263 | publicLoadBalancer: false 264 | }); 265 | adotService.service.connections.securityGroups[0].addIngressRule( 266 | ec2.Peer.ipv4(vpc.vpcCidrBlock), 267 | ec2.Port.allTcp(), 268 | "Allow inbound from VPC for ADOT" 269 | ); 270 | adotService.service.autoScaleTaskCount({ maxCapacity: 2 }) 271 | .scaleOnCpuUtilization("AUTOSCALING", { 272 | targetUtilizationPercent: 70, 273 | scaleInCooldown: cdk.Duration.seconds(60), 274 | scaleOutCooldown: cdk.Duration.seconds(60) 275 | }); 276 | 277 | // AWS Lambda for processing of open telemetry traces 278 | const lambdaTraceAnalyzer = new lambda.Function(this, 'lambdaTraceAnalyzer', { 279 | runtime: lambda.Runtime.PYTHON_3_12, 280 | handler: 'lambda_function.lambda_handler', 281 | code: lambda.Code.fromAsset('lambda/trace-analyzer'), 282 | timeout: cdk.Duration.seconds(30), 283 | environment: { 284 | 'FIREHOSE_STREAM_NAME': fh_otel_analysis.ref, 285 | }, 286 | }); 287 | lambdaTraceAnalyzer.addToRolePolicy(new iam.PolicyStatement({ 288 | effect: iam.Effect.ALLOW, 289 | actions: [ 290 | 's3:*', 291 | 'bedrock:*', 292 | 'cloudformation:*', 293 | 'firehose:*', 294 | ], 295 | resources: ['*'] 296 | })) 297 | const lambdaTraceExtraction = new lambda.Function(this, 'lambdaTraceExtractor', { 298 | runtime: lambda.Runtime.PYTHON_3_12, 299 | handler: 'lambda_function.lambda_handler', 300 | code: lambda.Code.fromAsset('lambda/trace-extractor'), 301 | timeout: cdk.Duration.seconds(30), 302 | onSuccess: new lambdaDestinations.LambdaDestination(lambdaTraceAnalyzer, { 303 | responseOnly: true, 304 | }), 305 | }); 306 | lambdaTraceExtraction.addToRolePolicy(new iam.PolicyStatement({ 307 | effect: iam.Effect.ALLOW, 308 | actions: [ 309 | 's3:*', 310 | 'lambda:InvokeFunction', 311 | ], 312 | resources: ['*'] 313 | })); 314 | contentBucket.addObjectCreatedNotification( 315 | new s3Notifications.LambdaDestination(lambdaTraceExtraction), 316 | {prefix: 'otel-traces'} 317 | ); 318 | 319 | // Amazon ECS Front-end Streamlit Application task 320 | const appImage = new DockerImageAsset(this, 'AppImage', { 321 | directory: 'container/front-end-app', 322 | platform: Platform.LINUX_AMD64 323 | }); 324 | const appTaskDefinition = new ecs.FargateTaskDefinition(this, 'AppTaskDef', { 325 | cpu: 512, 326 | memoryLimitMiB: 2048, 327 | taskRole: appRole 328 | }); 329 | const appContainer = appTaskDefinition.addContainer('StreamlitContainer', { 330 | image: ecs.ContainerImage.fromDockerImageAsset(appImage), 331 | cpu: 512, 332 | memoryLimitMiB: 2048, 333 | logging: ecs.LogDrivers.awsLogs({ streamPrefix: 'streamlit-log-group', logRetention: 30 }), 334 | environment: { 335 | 'TRACELOOP_BASE_URL': `http://${adotService.loadBalancer.loadBalancerDnsName}:80` 336 | } 337 | }); 338 | appRole.addToPolicy(new iam.PolicyStatement({ 339 | effect: iam.Effect.ALLOW, 340 | actions: [ 341 | 'bedrock:*', 342 | 'comprehend:*' 343 | ], 344 | resources: ['*'] 345 | })) 346 | appRole.addManagedPolicy( 347 | iam.ManagedPolicy.fromAwsManagedPolicyName("AmazonS3FullAccess") 348 | ); 349 | appContainer.addPortMappings({ containerPort: 8501, protocol: ecs.Protocol.TCP }); 350 | 351 | // Amazon Route 53 352 | const appCustomDomainName = this.node.tryGetContext('appCustomDomainName'); 353 | const loadBalancerOriginCustomDomainName = this.node.tryGetContext('loadBalancerOriginCustomDomainName'); 354 | const customDomainRoute53HostedZoneID = this.node.tryGetContext('customDomainRoute53HostedZoneID'); 355 | const customDomainRoute53HostedZoneName = this.node.tryGetContext('customDomainRoute53HostedZoneName'); 356 | const customDomainCertificateArn = this.node.tryGetContext('customDomainCertificateArn'); 357 | const hosted_zone = route53.HostedZone.fromHostedZoneAttributes(this, 'HostedZone', { 358 | hostedZoneId: customDomainRoute53HostedZoneID, 359 | zoneName: customDomainRoute53HostedZoneName 360 | }); 361 | const certificate = acm.Certificate.fromCertificateArn(this, 'ACMCertificate', `${customDomainCertificateArn}`); 362 | 363 | // Front-end service and distribution 364 | const feService = new ecsPatterns.ApplicationLoadBalancedFargateService(this, 'FeService', { 365 | cluster: cluster, 366 | taskDefinition: appTaskDefinition, 367 | protocol: ApplicationProtocol.HTTPS, 368 | certificate: certificate, 369 | domainName: loadBalancerOriginCustomDomainName, 370 | domainZone: hosted_zone 371 | }); 372 | feService.loadBalancer.logAccessLogs(contentBucket, 'alblog') 373 | const alb_sg2 = feService.loadBalancer.connections.securityGroups[0]; 374 | alb_sg2.addEgressRule(ec2.Peer.anyIpv4(), ec2.Port.tcp(443), 'Allow HTTPS'); 375 | const customHeaderValue2 = '8p008a1738' 376 | const origin2 = new HttpOrigin(`${loadBalancerOriginCustomDomainName}`, { 377 | protocolPolicy: OriginProtocolPolicy.HTTPS_ONLY, 378 | customHeaders: { 379 | "X-Custom-Header": customHeaderValue2 380 | } 381 | }); 382 | // Origin request policy 383 | const originRequestPolicy = new OriginRequestPolicy(this, 'OriginRequestPolicy', { 384 | originRequestPolicyName: 'ALBPolicy2', 385 | cookieBehavior: OriginRequestCookieBehavior.all(), 386 | headerBehavior: OriginRequestHeaderBehavior.all(), 387 | queryStringBehavior: OriginRequestQueryStringBehavior.all(), 388 | }); 389 | const distribution = new Distribution(this, 'Distribution', { 390 | certificate: certificate, 391 | domainNames: [appCustomDomainName], 392 | defaultBehavior: { 393 | origin: origin2, 394 | viewerProtocolPolicy: ViewerProtocolPolicy.REDIRECT_TO_HTTPS, 395 | originRequestPolicy: originRequestPolicy, 396 | allowedMethods: AllowedMethods.ALLOW_ALL, 397 | cachePolicy: CachePolicy.CACHING_DISABLED, 398 | } 399 | }); 400 | 401 | const cloudFrontDNS = new route53.ARecord(this, 'CloudFrontARecord', { 402 | zone: hosted_zone, 403 | target: route53.RecordTarget.fromAlias(new CloudFrontTarget(distribution)), 404 | recordName: appCustomDomainName 405 | }); 406 | 407 | 408 | 409 | // Amazon Cognito 410 | const userPool = new UserPool(this, 'UserPool', { 411 | selfSignUpEnabled: true, 412 | signInAliases: { email: true }, 413 | }); 414 | const userPoolClient = userPool.addClient('UserPoolClient', { 415 | userPoolClientName: "alb-auth-client", 416 | generateSecret: true, 417 | oAuth: { 418 | flows: { 419 | authorizationCodeGrant: true, 420 | }, 421 | scopes: [OAuthScope.OPENID], 422 | callbackUrls: [`https://${distribution.distributionDomainName}/oauth2/idpresponse`, 423 | `https://${distribution.distributionDomainName}`, 424 | `https://${appCustomDomainName}/oauth2/idpresponse`, 425 | `https://${appCustomDomainName}` 426 | ], 427 | logoutUrls: [`https://${distribution.distributionDomainName}`, 428 | `https://${appCustomDomainName}`, 429 | ] 430 | }, 431 | supportedIdentityProviders: [ 432 | UserPoolClientIdentityProvider.COGNITO 433 | ] 434 | }); 435 | 436 | const domain_prefix = this.node.tryGetContext('domainPrefix'); 437 | const userPoolDomain = userPool.addDomain('UserPoolDomain', { 438 | cognitoDomain: { 439 | domainPrefix: domain_prefix 440 | } 441 | }); 442 | 443 | feService.listener.addAction( 444 | 'cognito-auth', { 445 | priority: 1, 446 | conditions: [ListenerCondition.httpHeader("X-Custom-Header", [customHeaderValue2])], 447 | action: new AuthenticateCognitoAction({ 448 | userPool, 449 | userPoolClient, 450 | userPoolDomain, 451 | next: ListenerAction.forward([feService.targetGroup]) 452 | }) 453 | } 454 | ); 455 | feService.listener.addAction( 456 | 'Default', { 457 | action: ListenerAction.fixedResponse(403, { 458 | contentType: 'text/plain', 459 | messageBody: 'Forbidden' 460 | }) 461 | } 462 | ); 463 | 464 | 465 | // CDK outputs 466 | new cdk.CfnOutput(this, 'CloudFrontDomain', { 467 | value: `https://${distribution.distributionDomainName}` 468 | }); 469 | new cdk.CfnOutput(this, 'AppURL2', { 470 | value: `https://${appCustomDomainName}` 471 | }); 472 | new cdk.CfnOutput(this, 'BucketName', { 473 | value: contentBucket.bucketName 474 | }); 475 | new cdk.CfnOutput(this, 'BucketIngestPath', { 476 | value: `${contentBucket.bucketName}/ingest` 477 | }); 478 | } 479 | } 480 | --------------------------------------------------------------------------------