├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── bedrock-demo-arch.png ├── bedrock_demo_mov.gif ├── cdk.json ├── infra ├── app.py ├── lib │ ├── backend.py │ └── frontend.py └── tests │ ├── __init__.py │ └── unit │ ├── __init__.py │ └── test_infra_stack.py ├── requirements-dev.txt ├── requirements.txt └── src ├── backend ├── .flaskenv ├── Dockerfile ├── app.py ├── new_crawl.py ├── output │ └── .gitkeep └── requirements.txt └── frontend ├── package.json ├── public ├── Amazon-Ember-Medium.ttf ├── android-chrome-192x192.png ├── android-chrome-512x512.png ├── apple-touch-icon.png ├── bedrock.png ├── favicon-16x16.png ├── favicon-32x32.png ├── favicon.ico ├── index.html ├── manifest.json ├── robots.txt └── site.webmanifest ├── src ├── AI21.js ├── Amazon.js ├── Anthropic.js ├── App.css ├── App.js ├── App.test.js ├── StableDiffusion.js ├── bedrock_icon.png ├── index.css ├── index.js ├── logo.svg ├── proxy.js ├── reportWebVitals.js ├── routing.js └── setupTests.js └── webpack.config.js /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | .DS_Store 3 | 4 | cdk.context.json 5 | *.swp 6 | package-lock.json 7 | __pycache__ 8 | .pytest_cache 9 | .venv 10 | *.egg-info 11 | 12 | # CDK asset staging directory 13 | .cdk.staging 14 | cdk.out 15 | 16 | #amplify-do-not-edit-begin 17 | amplify/\#current-cloud-backend 18 | amplify/.config/local-* 19 | amplify/logs 20 | amplify/mock-data 21 | amplify/mock-api-resources 22 | amplify/backend/amplify-meta.json 23 | amplify/backend/.temp 24 | build/ 25 | dist/ 26 | node_modules/ 27 | aws-exports.js 28 | awsconfiguration.json 29 | amplifyconfiguration.json 30 | amplifyconfiguration.dart 31 | amplify-build-config.json 32 | amplify-gradle-config.json 33 | amplifytools.xcconfig 34 | .secret-* 35 | **.sample 36 | #amplify-do-not-edit-end 37 | 38 | src/frontend/src/proxy.js -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | 3 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 4 | 5 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact opensource-codeofconduct@amazon.com with any additional questions or comments. 6 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional documentation, we greatly value feedback and contributions from our community. 4 | 5 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary information to effectively respond to your bug report or contribution. 6 | 7 | 8 | ## Reporting Bugs/Feature Requests 9 | 10 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 11 | 12 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 13 | 14 | * A reproducible test case or series of steps 15 | * The version of our code being used 16 | * Any modifications you've made relevant to the bug 17 | * Anything unusual about your environment or deployment 18 | 19 | 20 | ## Contributing via Pull Requests 21 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 22 | 23 | 1. You are working against the latest source on the *main* branch. 24 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 25 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 26 | 27 | To send us a pull request, please: 28 | 29 | 1. Fork the repository. 30 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 31 | 3. Ensure local tests pass. 32 | 4. Commit to your fork using clear commit messages. 33 | 5. Send us a pull request, answering any default questions in the pull request interface. 34 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 35 | 36 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 37 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 38 | 39 | 40 | ## Finding contributions to work on 41 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 42 | 43 | 44 | ## Code of Conduct 45 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 46 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 47 | opensource-codeofconduct@amazon.com with any additional questions or comments. 48 | 49 | 50 | ## Security issue notifications 51 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 52 | 53 | 54 | ## Licensing 55 | 56 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 57 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this 4 | software and associated documentation files (the "Software"), to deal in the Software 5 | without restriction, including without limitation the rights to use, copy, modify, 6 | merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 7 | permit persons to whom the Software is furnished to do so. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 10 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 11 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 12 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 13 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 14 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RAG with AWS Bedrock and React 2 | 3 | Use this solution to quickly and inexpensively begin prototyping and vetting business use cases for GenAI using a custom corpus of knowledge with Retrieval Augmented Generation (RAG) in a low-code ReactJS application. 4 | 5 | This solution contains a backend Flask application which uses LangChain to provide PDF data as embeddings to your choice of text-gen foundational model via Amazon Web Services (AWS) new, managed LLM-provider service, Amazon Bedrock and your choice of vector database with FAISS or a Kendra Index. 6 | 7 | ## What You'll Build 8 | 9 | ![Bedrock Demo Architecture](bedrock-demo-arch.png) 10 | 11 | ## Screenshots 12 | 13 | ![Bedrock Demo FrontEnd](bedrock_demo_mov.gif) 14 | 15 | ## Prerequisites 16 | 17 | 1. [AWS CDK](https://docs.aws.amazon.com/cdk/latest/guide/getting_started.html) 18 | 2. [Node.js & npm](https://docs.npmjs.com/downloading-and-installing-node-js-and-npm) 19 | 3. [Python 3.8 or higher](https://www.python.org/downloads/macos/) 20 | 4. AWS CLI configured with appropriate permissions 21 | 22 | ## How to Deploy 23 | 24 | 1. Clone the repository and navigate to the project directory. 25 | 26 | 2. Install the Python dependencies for the CDK Deployment: 27 | 28 | ``` 29 | pip install -r requirements.txt 30 | ``` 31 | 32 | 3. Bootstrap CDK (if not already done): 33 | If this is your first time using CDK in this AWS account and region, you need to bootstrap CDK. This command deploys a CDK toolkit stack to your account that helps with asset management: 34 | ``` 35 | cdk bootstrap aws://YOUR_ACCOUNT_NUMBER/YOUR_REGION 36 | ``` 37 | Replace `YOUR_ACCOUNT_NUMBER` with your AWS account number and `YOUR_REGION` with your desired AWS region. 38 | 39 | 40 | 4. Deploy the Backend CDK stack. 41 | 42 | ``` 43 | cdk deploy BedrockDemo-BackendStack 44 | ``` 45 | 46 | 4. Redeploy the frontend stack to update the proxy URL: 47 | 48 | ``` 49 | cdk deploy BedrockDemo-FrontendStack 50 | ``` 51 | 52 | Your application should now be accessible at the frontend URL provided by the CDK output. 53 | 54 | ## How to Use 55 | 56 | Once you confirm that the app(s) are running, you can begin prototyping. 57 | 58 | 59 | ### Add Your Own Corpus for RAG Embeddings 60 | 61 | PDF data is read from `./backend/flask/output` and stored in an in-memory vector database using FAISS when the Flask app is started. If you add or remove PDF data from the `./backend/flask/output` directory, you'll need to restart the Flask application for the changes to take effect. 62 | 63 | Alternatively, you can use the database button in the lower right corner of the application to add or remove PDF documents manually or from S3 and subsequently reinstantiate the in-memory vector database, or instantiate a connection to a Kendra index. 64 | 65 | 66 | ### Prompt Engineering with LangChain 67 | 68 | Use the button just above the database button to update the Prompt Template: explicit instructions for how a text-gen model should respond. 69 | 70 | Foundational Models are trained in specific ways to interact with prompts. Check out the [Claude Documentation Page](https://docs.anthropic.com/claude/docs) to learn best practices and find examples of pre-engineered prompts. 71 | 72 | 73 | ## License 74 | 75 | This library is licensed under the MIT-0 License. See the LICENSE file. 76 | -------------------------------------------------------------------------------- /bedrock-demo-arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-bedrock-with-rag-and-react/2ed673990a335464141a56291daedd5fd3f00626/bedrock-demo-arch.png -------------------------------------------------------------------------------- /bedrock_demo_mov.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-bedrock-with-rag-and-react/2ed673990a335464141a56291daedd5fd3f00626/bedrock_demo_mov.gif -------------------------------------------------------------------------------- /cdk.json: -------------------------------------------------------------------------------- 1 | { 2 | "app": "python3 infra/app.py", 3 | "watch": { 4 | "include": [ 5 | "**" 6 | ], 7 | "exclude": [ 8 | "README.md", 9 | "cdk*.json", 10 | "requirements*.txt", 11 | "source.bat", 12 | "**/__init__.py", 13 | "**/__pycache__", 14 | "tests" 15 | ] 16 | }, 17 | "context": { 18 | "@aws-cdk/aws-lambda:recognizeLayerVersion": true, 19 | "@aws-cdk/core:checkSecretUsage": true, 20 | "@aws-cdk/core:target-partitions": [ 21 | "aws", 22 | "aws-cn" 23 | ], 24 | "@aws-cdk-containers/ecs-service-extensions:enableDefaultLogDriver": true, 25 | "@aws-cdk/aws-ec2:uniqueImdsv2TemplateName": true, 26 | "@aws-cdk/aws-ecs:arnFormatIncludesClusterName": true, 27 | "@aws-cdk/aws-iam:minimizePolicies": true, 28 | "@aws-cdk/core:validateSnapshotRemovalPolicy": true, 29 | "@aws-cdk/aws-codepipeline:crossAccountKeyAliasStackSafeResourceName": true, 30 | "@aws-cdk/aws-s3:createDefaultLoggingPolicy": true, 31 | "@aws-cdk/aws-sns-subscriptions:restrictSqsDescryption": true, 32 | "@aws-cdk/aws-apigateway:disableCloudWatchRole": true, 33 | "@aws-cdk/core:enablePartitionLiterals": true, 34 | "@aws-cdk/aws-events:eventsTargetQueueSameAccount": true, 35 | "@aws-cdk/aws-iam:standardizedServicePrincipals": true, 36 | "@aws-cdk/aws-ecs:disableExplicitDeploymentControllerForCircuitBreaker": true, 37 | "@aws-cdk/aws-iam:importedRoleStackSafeDefaultPolicyName": true, 38 | "@aws-cdk/aws-s3:serverAccessLogsUseBucketPolicy": true, 39 | "@aws-cdk/aws-route53-patters:useCertificate": true, 40 | "@aws-cdk/customresources:installLatestAwsSdkDefault": false, 41 | "@aws-cdk/aws-rds:databaseProxyUniqueResourceName": true, 42 | "@aws-cdk/aws-codedeploy:removeAlarmsFromDeploymentGroup": true, 43 | "@aws-cdk/aws-apigateway:authorizerChangeDeploymentLogicalId": true, 44 | "@aws-cdk/aws-ec2:launchTemplateDefaultUserData": true, 45 | "@aws-cdk/aws-secretsmanager:useAttachedSecretResourcePolicyForSecretTargetAttachments": true, 46 | "@aws-cdk/aws-redshift:columnId": true, 47 | "@aws-cdk/aws-stepfunctions-tasks:enableEmrServicePolicyV2": true, 48 | "@aws-cdk/aws-ec2:restrictDefaultSecurityGroup": true, 49 | "@aws-cdk/aws-apigateway:requestValidatorUniqueId": true, 50 | "@aws-cdk/aws-kms:aliasNameRef": true, 51 | "@aws-cdk/aws-autoscaling:generateLaunchTemplateInsteadOfLaunchConfig": true, 52 | "@aws-cdk/core:includePrefixInUniqueNameGeneration": true, 53 | "@aws-cdk/aws-efs:denyAnonymousAccess": true, 54 | "@aws-cdk/aws-opensearchservice:enableOpensearchMultiAzWithStandby": true, 55 | "@aws-cdk/aws-lambda-nodejs:useLatestRuntimeVersion": true, 56 | "@aws-cdk/aws-efs:mountTargetOrderInsensitiveLogicalId": true, 57 | "@aws-cdk/aws-rds:auroraClusterChangeScopeOfInstanceParameterGroupWithEachParameters": true, 58 | "@aws-cdk/aws-appsync:useArnForSourceApiAssociationIdentifier": true, 59 | "@aws-cdk/aws-rds:preventRenderingDeprecatedCredentials": true, 60 | "@aws-cdk/aws-codepipeline-actions:useNewDefaultBranchForCodeCommitSource": true, 61 | "@aws-cdk/aws-cloudwatch-actions:changeLambdaPermissionLogicalIdForLambdaAction": true, 62 | "@aws-cdk/aws-codepipeline:crossAccountKeysDefaultValueToFalse": true, 63 | "@aws-cdk/aws-codepipeline:defaultPipelineTypeToV2": true, 64 | "@aws-cdk/aws-kms:reduceCrossAccountRegionPolicyScope": true, 65 | "@aws-cdk/aws-eks:nodegroupNameAttribute": true, 66 | "@aws-cdk/aws-ec2:ebsDefaultGp3Volume": true, 67 | "@aws-cdk/aws-ecs:removeDefaultDeploymentAlarm": true, 68 | "@aws-cdk/custom-resources:logApiResponseDataPropertyTrueDefault": false 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /infra/app.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | 4 | import os 5 | import boto3 6 | 7 | from botocore.exceptions import ClientError 8 | from aws_cdk import App, Environment 9 | from aws_cdk import aws_ec2 as ec2 10 | 11 | from lib.backend import BackendStack 12 | from lib.frontend import FrontendStack 13 | 14 | # Get path of current script's parent directory 15 | current_dir = os.path.dirname(__file__) 16 | # Parent Directory 17 | parent_dir = os.path.dirname(current_dir) 18 | # Source Directory 19 | src_dir = os.path.join(parent_dir, "src") 20 | # Frontend Directory 21 | frontend_dir = os.path.join(src_dir, "frontend") 22 | # Backend Directory 23 | backend_dir = os.path.join(src_dir, "backend") 24 | 25 | 26 | def get_backend_url(stack_name): 27 | cfn_client = boto3.client('cloudformation') 28 | try: 29 | response = cfn_client.describe_stacks(StackName=stack_name) 30 | outputs = response['Stacks'][0]['Outputs'] 31 | for output in outputs: 32 | if output['OutputKey'] == 'BackendURL': 33 | return output['OutputValue'] 34 | except ClientError as e: 35 | if e.response['Error']['Code'] == 'ValidationError': 36 | print("Backend stack not found. Running synthesis with assumed defaults..") 37 | else: 38 | print(f"An error occurred: {e}") 39 | return None 40 | 41 | 42 | app = App() 43 | env = Environment(account=os.environ.get("CDK_DEFAULT_ACCOUNT", None), 44 | region=os.environ.get("CDK_DEFAULT_REGION", "us-east-1")) 45 | 46 | # Deploy backend stack 47 | backend = BackendStack(app, "BedrockDemo-BackendStack", 48 | backend_dir=backend_dir, 49 | env=env 50 | ) 51 | 52 | backend_url = get_backend_url("BedrockDemo-BackendStack") 53 | if not backend_url: 54 | backend_url = "http://localhost:5000" 55 | 56 | # Deploy frontend stack 57 | frontend = FrontendStack(app, "BedrockDemo-FrontendStack", 58 | frontend_path=frontend_dir, 59 | proxy_url=backend_url, 60 | env=env 61 | ) 62 | 63 | app.synth() 64 | -------------------------------------------------------------------------------- /infra/lib/backend.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | 4 | from aws_cdk import ( 5 | Stack, 6 | aws_ecr_assets as ecr_assets, 7 | aws_iam as iam, 8 | CfnOutput, 9 | ) 10 | from constructs import Construct 11 | import aws_cdk.aws_apprunner_alpha as apprunner_alpha 12 | 13 | class BackendStack(Stack): 14 | def __init__(self, scope: Construct, construct_id: str, backend_dir: str, **kwargs): 15 | super().__init__(scope, construct_id, **kwargs) 16 | 17 | # Backend Docker image 18 | backend_image = ecr_assets.DockerImageAsset( 19 | self, "BackendDockerImage", 20 | directory=backend_dir, 21 | platform=ecr_assets.Platform.LINUX_AMD64, 22 | ) 23 | 24 | # App Runner service using the alpha module 25 | app_runner_service = apprunner_alpha.Service( 26 | self, "CarPartsAssistantService", 27 | source=apprunner_alpha.Source.from_asset( 28 | asset=backend_image, 29 | image_configuration=apprunner_alpha.ImageConfiguration( 30 | port=5000 # Assuming your backend still listens on port 5000 31 | ) 32 | ), 33 | cpu=apprunner_alpha.Cpu.FOUR_VCPU, 34 | memory=apprunner_alpha.Memory.TWELVE_GB 35 | ) 36 | 37 | app_runner_service.add_to_role_policy( 38 | iam.PolicyStatement( 39 | actions=["bedrock:InvokeModel"], 40 | resources=["*"], 41 | ) 42 | ) 43 | 44 | # Outputs 45 | self.backend_url = CfnOutput( 46 | self, 47 | "BackendURL", 48 | value="https://"+app_runner_service.service_url, 49 | description="URL of the Bedrock React Demo Backend", 50 | ) 51 | -------------------------------------------------------------------------------- /infra/lib/frontend.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | 4 | from aws_cdk import ( 5 | Stack, 6 | aws_s3 as s3, 7 | aws_s3_deployment as s3deploy, 8 | aws_cloudfront as cloudfront, 9 | aws_cloudfront_origins as origins, 10 | aws_iam as iam, 11 | CfnOutput, 12 | RemovalPolicy, 13 | BundlingOptions, 14 | DockerImage, 15 | ) 16 | from constructs import Construct 17 | 18 | class FrontendStack(Stack): 19 | def __init__(self, scope: Construct, construct_id: str, *, 20 | frontend_path: str, proxy_url: str, **kwargs): 21 | super().__init__(scope, construct_id, **kwargs) 22 | 23 | # Create a private S3 bucket to host the React app 24 | hosting_bucket = s3.Bucket( 25 | self, "ReactAppHostingBucket", 26 | public_read_access=False, 27 | block_public_access=s3.BlockPublicAccess.BLOCK_ALL, 28 | encryption=s3.BucketEncryption.S3_MANAGED, 29 | removal_policy=RemovalPolicy.DESTROY, # Use with caution in production 30 | auto_delete_objects=True, # Use with caution in production 31 | ) 32 | 33 | # Update proxy.js with the provided proxy_url or a placeholder 34 | proxy_file_content = f""" 35 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 36 | // SPDX-License-Identifier: MIT-0 37 | 38 | const proxy_url = "{proxy_url}"; 39 | 40 | export default proxy_url; 41 | """ 42 | 43 | # Deploy the React app build to S3 44 | s3deploy.BucketDeployment( 45 | self, "DeployReactApp", 46 | sources=[s3deploy.Source.asset( 47 | frontend_path, 48 | bundling=BundlingOptions( 49 | image=DockerImage.from_registry("public.ecr.aws/docker/library/node:lts-slim"), 50 | command=[ 51 | "bash", "-c", 52 | f''' 53 | npm install 54 | echo '{proxy_file_content}' > src/proxy.js 55 | npm run build 56 | cp -r build/* /asset-output/ 57 | ''' 58 | ], 59 | user="root" 60 | ) 61 | )], 62 | destination_bucket=hosting_bucket, 63 | prune=False, 64 | ) 65 | 66 | # CloudFront Origin Access Identity 67 | origin_access_identity = cloudfront.OriginAccessIdentity( 68 | self, "OriginAccessIdentity", 69 | comment=f"OAI for {construct_id}" 70 | ) 71 | 72 | # Grant read permissions to CloudFront 73 | hosting_bucket.add_to_resource_policy(iam.PolicyStatement( 74 | actions=["s3:GetObject"], 75 | resources=[hosting_bucket.arn_for_objects("*")], 76 | principals=[iam.CanonicalUserPrincipal( 77 | origin_access_identity.cloud_front_origin_access_identity_s3_canonical_user_id 78 | )] 79 | )) 80 | 81 | # Create a CloudFront distribution 82 | self.distribution = cloudfront.Distribution( 83 | self, "ReactAppDistribution", 84 | default_behavior=cloudfront.BehaviorOptions( 85 | origin=origins.S3Origin( 86 | hosting_bucket, 87 | origin_access_identity=origin_access_identity 88 | ), 89 | viewer_protocol_policy=cloudfront.ViewerProtocolPolicy.REDIRECT_TO_HTTPS, 90 | ), 91 | default_root_object="index.html", 92 | error_responses=[ 93 | cloudfront.ErrorResponse( 94 | http_status=404, 95 | response_http_status=200, 96 | response_page_path="/index.html", 97 | ), 98 | ], 99 | ) 100 | 101 | # Output the CloudFront distribution URL 102 | CfnOutput( 103 | self, "FrontendURL", 104 | value=f"https://{self.distribution.distribution_domain_name}", 105 | description="Frontend Application URL", 106 | ) -------------------------------------------------------------------------------- /infra/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 -------------------------------------------------------------------------------- /infra/tests/unit/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-bedrock-with-rag-and-react/2ed673990a335464141a56291daedd5fd3f00626/infra/tests/unit/__init__.py -------------------------------------------------------------------------------- /infra/tests/unit/test_infra_stack.py: -------------------------------------------------------------------------------- 1 | import aws_cdk as core 2 | import aws_cdk.assertions as assertions 3 | 4 | from infra.infra_stack import InfraStack 5 | 6 | # example tests. To run these tests, uncomment this file along with the example 7 | # resource in infra/infra_stack.py 8 | def test_sqs_queue_created(): 9 | app = core.App() 10 | stack = InfraStack(app, "infra") 11 | template = assertions.Template.from_stack(stack) 12 | 13 | # template.has_resource_properties("AWS::SQS::Queue", { 14 | # "VisibilityTimeout": 300 15 | # }) 16 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | pytest==6.2.5 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | aws-cdk-lib==2.147.2 2 | aws_cdk.aws_apprunner_alpha 3 | constructs>=10.0.0,<11.0.0 4 | boto3 -------------------------------------------------------------------------------- /src/backend/.flaskenv: -------------------------------------------------------------------------------- 1 | FLASK_APP=./flask/app.py 2 | FLASK_ENV=development -------------------------------------------------------------------------------- /src/backend/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.9-slim 2 | 3 | WORKDIR /app 4 | 5 | COPY requirements.txt /app 6 | 7 | RUN pip install --no-cache-dir -r requirements.txt 8 | 9 | COPY *.py /app/ 10 | 11 | COPY output/ /app 12 | 13 | EXPOSE 5000 14 | 15 | ENV FLASK_APP=app.py 16 | 17 | CMD ["flask", "run", "--host=0.0.0.0", "--port=5000"] 18 | -------------------------------------------------------------------------------- /src/backend/new_crawl.py: -------------------------------------------------------------------------------- 1 | # USAGE: 2 | # python3 webscraper.py -d 1 3 | # python3 webscraper.py -d6 -e node-title -p 4 | import os 5 | from selenium import webdriver 6 | from selenium.webdriver.support.ui import WebDriverWait 7 | from selenium.webdriver.support import expected_conditions as EC 8 | from selenium.webdriver.common.by import By 9 | from selenium.common.exceptions import TimeoutException 10 | 11 | from bs4 import BeautifulSoup 12 | from urllib.parse import urljoin 13 | from urllib.parse import urlparse 14 | from urllib.parse import urlunparse 15 | import concurrent.futures 16 | import argparse, base64, json, re, time 17 | from argparse import ArgumentTypeError 18 | from datetime import datetime 19 | 20 | OUTPUT_DATA_DIR = './output' 21 | MAX_WORKERS = 3 22 | PAGE_LOAD_TIMEOUT = 10 # seconds 23 | SLEEP_DELAY = 10 # seconds to avoid rate limiting in certain sites 24 | crawled_pg_cnt = 0 25 | 26 | # Create the "crawledData" folder in the current directory if it doesn't exist 27 | if not os.path.exists(OUTPUT_DATA_DIR): 28 | os.makedirs(OUTPUT_DATA_DIR) 29 | 30 | # Parse URL to validate it 31 | def parse_url_from_str(arg): 32 | url = urlparse(arg) 33 | if all((url.scheme, url.netloc)): # possibly other sections? 34 | return arg # return url in case you need the parsed object 35 | raise ArgumentTypeError('Invalid URL') 36 | 37 | def remove_unwanted_content(driver, pdf_data): 38 | # Execute JavaScript to hide specific elements before saving as PDF 39 | # You can add more JavaScript code here to hide different elements as needed 40 | hide_elements_js = """ 41 | // Hide elements with specific tags 42 | var tagsToHide = ['img', 'script', 'style', 'video', 'svg', 'iframe', 'code']; 43 | tagsToHide.forEach(function(tag) { 44 | var elements = document.getElementsByTagName(tag); 45 | for (var i = 0; i < elements.length; i++) { 46 | elements[i].style.display = 'none'; 47 | } 48 | }); 49 | """ 50 | 51 | ex = ''' // Hide elements with specific class names 52 | var classNamesToHide = ['advertisement', 'sidebar', 'header']; 53 | classNamesToHide.forEach(function(className) { 54 | var elements = document.getElementsByClassName(className); 55 | for (var i = 0; i < elements.length; i++) { 56 | elements[i].style.display = 'none'; 57 | } 58 | }); 59 | ''' 60 | 61 | # Execute the JavaScript code in the context of the current page 62 | driver.execute_script(hide_elements_js) 63 | 64 | # Generate the updated PDF data 65 | result = send_devtools(driver, "Page.printToPDF", {}) 66 | if (result is not None): 67 | return base64.b64decode(result['data']) 68 | else: 69 | return pdf_data 70 | 71 | 72 | def send_devtools(driver, cmd, params={}): 73 | resource = "/session/%s/chromium/send_command_and_get_result" % driver.session_id 74 | url = driver.command_executor._url + resource 75 | body = json.dumps({'cmd': cmd, 'params': params}) 76 | response = driver.command_executor._request('POST', url, body) 77 | if (response.get('value') is not None): 78 | return response.get('value') 79 | else: 80 | return None 81 | 82 | def save_as_pdf(driver, path, options={}): 83 | result = send_devtools(driver, "Page.printToPDF", options) 84 | if (result is not None): 85 | with open(path, 'wb') as file: 86 | # Decode the PDF data 87 | pdf_data = base64.b64decode(result['data']) 88 | 89 | # Remove unwanted content from the PDF data using JavaScript 90 | pdf_data = remove_unwanted_content(driver, pdf_data) 91 | 92 | file.write(pdf_data) 93 | return True 94 | else: 95 | return False 96 | 97 | def delete_files_in_directory(directory_path): 98 | try: 99 | # Get a list of all files in the directory 100 | files = os.listdir(directory_path) 101 | 102 | # Loop through the files and delete each one 103 | for file_name in files: 104 | file_path = os.path.join(directory_path, file_name) 105 | if os.path.isfile(file_path): 106 | os.remove(file_path) 107 | 108 | print("All files in the PDF output directory have been deleted.") 109 | except Exception as e: 110 | print(f"Error: {e}") 111 | 112 | # Helper function for crawling 113 | def crawl(url, max_depth=3, current_depth=1, current_pg_cnt=0, element_id='', prefix=''): 114 | if url in crawled or current_depth > max_depth: 115 | return 116 | crawled.add(url) 117 | time.sleep(SLEEP_DELAY) 118 | try: 119 | chrome_options = webdriver.ChromeOptions() 120 | chrome_options.headless = True 121 | 122 | driver = webdriver.Chrome(options=chrome_options) 123 | driver.implicitly_wait(PAGE_LOAD_TIMEOUT) 124 | driver.get(url) 125 | 126 | # If an element_id was specified as a cmd line argument, wait for it to appear in the DOM before proceeding 127 | # else, just look for the HTML tag 128 | try: 129 | if element_id is None or (not isinstance(element_id, str)) or element_id.strip() == "": 130 | element_present = EC.presence_of_element_located((By.TAG_NAME, 'body')) 131 | else: 132 | element_present = EC.presence_of_element_located((By.ID, element_id)) 133 | 134 | WebDriverWait(driver, PAGE_LOAD_TIMEOUT).until(element_present) 135 | except TimeoutException: 136 | print(f"Timed out waiting for page to load - {url}") 137 | return 138 | 139 | current_pg_cnt += 1 140 | page_source = driver.page_source 141 | page_title = get_title_from_page(page_source) 142 | if page_title is None: 143 | page_title = '' 144 | page_timestamp = datetime.utcnow().strftime('%Y%m%d_%H%M%S.%f')[:-3] # millisecond precision 145 | page_filename = str(current_pg_cnt) + "-" + page_title.strip().replace(" ", "_") + '.' + page_timestamp + '.pdf' 146 | page_filepath = os.path.join(OUTPUT_DATA_DIR, page_filename) 147 | 148 | save_as_pdf(driver, page_filepath, { 'landscape': False, 'displayHeaderFooter': True }) 149 | 150 | if current_depth < max_depth: 151 | links = collect_links_from_page(url, page_source, prefix) 152 | 153 | with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor: 154 | future_to_url = {executor.submit(crawl, link, max_depth, current_depth + 1, current_pg_cnt, element_id, prefix): link for link in links} 155 | for future in concurrent.futures.as_completed(future_to_url): 156 | sublink = future_to_url[future] 157 | # Do nothing, the crawling is already handled in the function. 158 | print(f'Crawling page {url} -- COMPLETED') 159 | driver.quit() 160 | except Exception as e: 161 | print(f'Error crawling {url} - {e}') 162 | return 163 | 164 | def get_title_from_page(page_content): 165 | soup = BeautifulSoup(page_content, 'html.parser') 166 | return soup.title.string 167 | 168 | def collect_links_from_page(url, page_content, prefix): 169 | soup = BeautifulSoup(page_content, 'html.parser') 170 | links_set = set() 171 | for a in soup.find_all('a', href=True): 172 | if 'href' in a.attrs: 173 | link = a['href'] 174 | norm_link = url_normalizer(url, link) 175 | if norm_link is not None: 176 | if 'login' not in norm_link: 177 | print(f"Found link: {norm_link}") 178 | if prefix.strip() == "" or ((len(prefix.strip()) > 0) and norm_link.startswith(prefix.strip())): 179 | links_set.add(norm_link) 180 | 181 | return links_set 182 | 183 | 184 | # Convert relative links to absolute urls 185 | def url_normalizer(parent_url, link): 186 | # comparator = 187 | 188 | if link.startswith('#') or link.startswith('../'): 189 | link = urljoin(parent_url, link) 190 | elif link.startswith('/'): 191 | ## TODO: clean up the hack below. It is incorrect. 192 | link = urljoin(parent_url, link[1:]) 193 | print("new_link: " + link ) 194 | elif link.startswith('./'): 195 | link = urljoin(parent_url, link[2:]) 196 | else: 197 | try: 198 | parse_url_from_str(link) 199 | except Exception as e: 200 | link = urljoin(parent_url, link) 201 | 202 | # Validate that link is a valid URL 203 | try: 204 | parse_url_from_str(link) 205 | except Exception as e: 206 | print(f'Error normalizing {link} - {e}') 207 | return 208 | return link 209 | 210 | 211 | parser = argparse.ArgumentParser(description='Crawl a webpage and saving pages as PDF files.') 212 | parser.add_argument('-d', '--depth', default=2, type=int, help="max depth to be crawled when following links") 213 | parser.add_argument('-e', '--element_id', default='', type=str, help="webdriver waits until an element containing this ID can be found in the DOM") 214 | parser.add_argument('-p', '--prefix', default='', type=str, help="restrict crawling to URLs matching this prefix") 215 | parser.add_argument('url', type=parse_url_from_str, help="a fully-qualified URL to be crawled; formatted as per RFC 1808") 216 | args = parser.parse_args() 217 | 218 | # Ensure URL is terminated by a slash (/) 219 | url=args.url 220 | if re.search("\/$", args.url): 221 | url=args.url 222 | else: 223 | url=args.url + "/" 224 | 225 | prefix=args.prefix 226 | try: 227 | if prefix is None or (not isinstance(prefix, str)) or prefix.strip() == "": 228 | prefix='' 229 | elif prefix.strip() == '/': 230 | prefix = url 231 | else: 232 | prefix = parse_url_from_str(prefix.strip()) 233 | except Exception as e: 234 | print(f"Error: - Invalid Prefix - {e}") 235 | parser.print_help() 236 | exit(-1) 237 | 238 | print('PARAMETERS:') 239 | print(f'url: {url}') 240 | print(f'max depth: {args.depth}') 241 | print(f'element_id: {args.element_id}') 242 | print(f'prefix: {prefix}') 243 | delete_files_in_directory(OUTPUT_DATA_DIR) 244 | crawled = set() 245 | crawl(args.url, max_depth=args.depth, element_id=args.element_id, prefix=args.prefix) 246 | 247 | print('\n\n\nCRAWLING SUMMARY:') 248 | print(f'URLs crawled: {len(crawled)}') 249 | for page in crawled: 250 | print(page) 251 | 252 | -------------------------------------------------------------------------------- /src/backend/output/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-bedrock-with-rag-and-react/2ed673990a335464141a56291daedd5fd3f00626/src/backend/output/.gitkeep -------------------------------------------------------------------------------- /src/backend/requirements.txt: -------------------------------------------------------------------------------- 1 | asgiref==3.7.2 2 | blinker==1.6.2 3 | certifi==2024.7.4 4 | charset-normalizer==3.1.0 5 | click==8.1.3 6 | Django==4.2.18 7 | Flask==2.3.2 8 | idna==3.7 9 | image==1.5.33 10 | itsdangerous==2.1.2 11 | Jinja2==3.1.5 12 | jmespath==1.0.1 13 | MarkupSafe==2.1.2 14 | python-dateutil==2.8.2 15 | requests==2.32.2 16 | s3transfer==0.6.1 17 | six==1.16.0 18 | sqlparse==0.5.0 19 | urllib3==1.26.19 20 | Werkzeug==3.0.6 21 | faiss-cpu==1.7.4 22 | pypdf==3.17.0 23 | pypdf2==3.0.1 24 | langchain==0.3.0 25 | anthropic==0.2.10 26 | tika==2.6.0 27 | bs4==0.0.1 28 | fpdf==1.7.2 29 | selenium==4.11.2 30 | flask-cors==5.0.0 31 | opensearch-py==2.3.0 32 | boto3 33 | langchain-community -------------------------------------------------------------------------------- /src/frontend/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "react-bedrockdemo", 3 | "version": "0.1.0", 4 | "private": true, 5 | "dependencies": { 6 | "@aws-amplify/ui-react": "^4.6.4", 7 | "@testing-library/jest-dom": "^5.16.5", 8 | "@testing-library/react": "^13.4.0", 9 | "@testing-library/user-event": "^13.5.0", 10 | "aws-amplify": "^5.3.20", 11 | "aws-sdk": "^2.1414.0", 12 | "axios": "^1.6.0", 13 | "buffer": "^6.0.3", 14 | "graphql": "^16.6.0", 15 | "http-proxy-middleware": "^2.0.6", 16 | "jssha": "^3.3.0", 17 | "lodash": "^4.17.21", 18 | "ra-data-graphql": "^4.8.0", 19 | "react": "^18.2.0", 20 | "react-dom": "^18.2.0", 21 | "react-router-dom": "^6.8.1", 22 | "web-vitals": "^2.1.4" 23 | }, 24 | "devDependencies": { 25 | "react-scripts": "^5.0.1" 26 | }, 27 | "scripts": { 28 | "start": "react-scripts start", 29 | "build": "react-scripts build", 30 | "test": "react-scripts test", 31 | "eject": "react-scripts eject" 32 | }, 33 | "eslintConfig": { 34 | "extends": [ 35 | "react-app", 36 | "react-app/jest" 37 | ] 38 | }, 39 | "browserslist": { 40 | "production": [ 41 | ">0.2%", 42 | "not dead", 43 | "not op_mini all" 44 | ], 45 | "development": [ 46 | "last 1 chrome version", 47 | "last 1 firefox version", 48 | "last 1 safari version" 49 | ] 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/frontend/public/Amazon-Ember-Medium.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-bedrock-with-rag-and-react/2ed673990a335464141a56291daedd5fd3f00626/src/frontend/public/Amazon-Ember-Medium.ttf -------------------------------------------------------------------------------- /src/frontend/public/android-chrome-192x192.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-bedrock-with-rag-and-react/2ed673990a335464141a56291daedd5fd3f00626/src/frontend/public/android-chrome-192x192.png -------------------------------------------------------------------------------- /src/frontend/public/android-chrome-512x512.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-bedrock-with-rag-and-react/2ed673990a335464141a56291daedd5fd3f00626/src/frontend/public/android-chrome-512x512.png -------------------------------------------------------------------------------- /src/frontend/public/apple-touch-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-bedrock-with-rag-and-react/2ed673990a335464141a56291daedd5fd3f00626/src/frontend/public/apple-touch-icon.png -------------------------------------------------------------------------------- /src/frontend/public/bedrock.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-bedrock-with-rag-and-react/2ed673990a335464141a56291daedd5fd3f00626/src/frontend/public/bedrock.png -------------------------------------------------------------------------------- /src/frontend/public/favicon-16x16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-bedrock-with-rag-and-react/2ed673990a335464141a56291daedd5fd3f00626/src/frontend/public/favicon-16x16.png -------------------------------------------------------------------------------- /src/frontend/public/favicon-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-bedrock-with-rag-and-react/2ed673990a335464141a56291daedd5fd3f00626/src/frontend/public/favicon-32x32.png -------------------------------------------------------------------------------- /src/frontend/public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-bedrock-with-rag-and-react/2ed673990a335464141a56291daedd5fd3f00626/src/frontend/public/favicon.ico -------------------------------------------------------------------------------- /src/frontend/public/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 12 | 13 | 17 | 18 | 27 | Amazon Bedrock App 28 | 29 | 30 | 31 |
32 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /src/frontend/public/manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "short_name": "React App", 3 | "name": "Create React App Sample", 4 | "icons": [ 5 | { 6 | "src": "favicon.ico", 7 | "sizes": "64x64 32x32 24x24 16x16", 8 | "type": "image/x-icon" 9 | }, 10 | { 11 | "src": "logo512.png", 12 | "type": "image/png", 13 | "sizes": "512x512" 14 | } 15 | ], 16 | "start_url": ".", 17 | "display": "standalone", 18 | "theme_color": "#000000", 19 | "background_color": "#ffffff" 20 | } 21 | -------------------------------------------------------------------------------- /src/frontend/public/robots.txt: -------------------------------------------------------------------------------- 1 | # https://www.robotstxt.org/robotstxt.html 2 | User-agent: * 3 | Disallow: 4 | -------------------------------------------------------------------------------- /src/frontend/public/site.webmanifest: -------------------------------------------------------------------------------- 1 | {"name":"","short_name":"","icons":[{"src":"/android-chrome-192x192.png","sizes":"192x192","type":"image/png"},{"src":"/android-chrome-512x512.png","sizes":"512x512","type":"image/png"}],"theme_color":"#ffffff","background_color":"#ffffff","display":"standalone"} -------------------------------------------------------------------------------- /src/frontend/src/AI21.js: -------------------------------------------------------------------------------- 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | // SPDX-License-Identifier: MIT. 3 | 4 | import React, { useState, useEffect } from 'react'; 5 | import { Button, TextField } from '@aws-amplify/ui-react'; 6 | import proxy_url from './proxy' 7 | 8 | const api_root_url = proxy_url 9 | 10 | const ChatbotApp = (props) => { 11 | const [chatMessages, setChatMessages] = useState([]); 12 | const [userInput, setUserInput] = useState(''); 13 | const [isBuffering, setIsBuffering] = useState(false) 14 | const [apiMethod, setApiMethod] = useState('/api/conversation/predict-ai21') 15 | const [selectedModel, setSelectedModel] = useState('') 16 | 17 | const [crawlMethod, setCrawlMethod] = useState('/api/crawl') 18 | const [reindexMethod, setReindexMethod] = useState('/api/build-vector') 19 | const [crawlModal, setCrawlModal] = useState(false) 20 | const [crawlPrompt, setCrawlPrompt] = useState(''); 21 | const [crawlResponse, setCrawlResponse] = useState(''); 22 | const [crawlResponseRaw, setCrawlResponseRaw] = useState(''); 23 | const [reindexResponse, setReindexResponse] = useState(''); 24 | const [uploadSuccessMessage, setUploadSuccessMessage] = useState(''); 25 | const [selectedFiles, setSelectedFiles] = useState([]); 26 | const [vectorSelection, setVectorSelection] = useState(null) 27 | const [userInputProfile, setUserInputProfile] = useState('') 28 | const [userInputS3Path, setUserInputS3Path] = useState('') 29 | const [userInputKendraId, setUserInputKendraId] = useState('') 30 | const [currentVector, setCurrentVector] = useState('faiss') 31 | const [kendraInstantiated, setKendraInstantiated] = useState(false) 32 | const [opensearchInstantiated, setOpensearchInstantiated] = useState(false) 33 | const [vectorInitialized, setVectorInitialized] = useState('false') 34 | const [dataDeleted, setDataDeleted] = useState('') 35 | const [promptModal, setPromptModal] = useState(false) 36 | const [promptTemplate, setPromptTemplate] = useState("Use the context to answer the question at the end. If you don't know the answer from the context, do not answer from your knowledge and be precise. Don't fake the answer.") 37 | const [promptTemplateResponse, setPromptTemplateResponse] = useState('') 38 | const [isBufferingReindex, setIsBufferingReindex] = useState(false) 39 | const [userInputCrawl, setUserInputCrawl] = useState(''); 40 | const [isBufferingCrawl, setIsBufferingCrawl] = useState(false) 41 | 42 | 43 | 44 | useEffect(() => { 45 | // Code to run after component has mounted 46 | if (props.ai21Messages.length === 0) { 47 | setChatMessages([ 48 | { author: `${props.modelSelected.split(':')[1]} Bot`, message: `Welcome to the ${props.modelSelected} Chatbot!` } 49 | ]) 50 | } else { 51 | setChatMessages(props.ai21Messages) 52 | } 53 | setSelectedModel(props.modelSelected) 54 | checkVector(); 55 | 56 | // Set up an interval to call the API every minute 57 | const interval = setInterval(checkVector, 10000); // 60000 milliseconds = 1 minute 58 | 59 | setCurrentVector(props.currentVector) 60 | setKendraInstantiated(props.kendraInstantiated) 61 | setPromptTemplate(props.promptTemplate) 62 | 63 | // Clean up the interval when the component unmounts 64 | return () => clearInterval(interval); 65 | 66 | }, [props.modelSelected, props.ai21Messages, props.setAi21Messages, 67 | props.currentVector, 68 | props.setCurrentVector, 69 | props.kendraInstantiated, 70 | props.setKendraInstantiated, 71 | props.promptTemplate, 72 | props.setPromptTemplate]); 73 | 74 | const checkVector = () => { 75 | 76 | fetch(api_root_url + '/api/check-vector', { 77 | method: 'GET', 78 | headers: { 79 | 'Content-Type': 'application/json' 80 | } 81 | }).then(response => response.json()) 82 | .then(response => { 83 | console.log(response.vector_initialized.toLowerCase()) 84 | setVectorInitialized(response.vector_initialized.toLowerCase()) 85 | }) 86 | .catch(error => { 87 | console.error('Error:', error); 88 | }); 89 | }; 90 | 91 | const deleteFiles = async () => { 92 | try { 93 | const response = await fetch(api_root_url + '/api/deletefiles'); 94 | const jsonData = await response.json(); 95 | // console.log(jsonData.vector_initialized.toLowerCase()) 96 | console.log(jsonData.response_text) 97 | setDataDeleted(jsonData.response_text) 98 | checkVector() 99 | setTimeout(function () { 100 | setDataDeleted('') 101 | }, 5000) 102 | } catch (error) { 103 | console.error('Error fetching data:', error); 104 | } 105 | }; 106 | 107 | const sendMessage = () => { 108 | setIsBuffering(true); 109 | const messageElement = { 110 | author: 'You', 111 | message: userInput 112 | }; 113 | 114 | setChatMessages(prevChatMessages => [...prevChatMessages, messageElement]); 115 | props.setAi21Messages(prevChatMessages => [...prevChatMessages, messageElement]); 116 | 117 | let payload = { 118 | prompt: userInput, 119 | maxTokens: 200, 120 | temperature: 0.5, 121 | topP: 0.5, 122 | stopSequences: [], 123 | countPenalty: { scale: 0 }, 124 | presencePenalty: { scale: 0 }, 125 | frequencyPenalty: { scale: 0 } 126 | }; 127 | 128 | setUserInput(''); 129 | fetch(api_root_url + apiMethod, { 130 | method: 'POST', 131 | headers: { 132 | 'Content-Type': 'application/json' 133 | }, 134 | body: JSON.stringify(payload) 135 | }) 136 | .then(response => response.json()) 137 | .then(response => { 138 | console.log(response) 139 | const botResponse = response.output_text; 140 | const botMessageElement = { 141 | author: `${selectedModel} Bot`, 142 | message: response 143 | }; 144 | setChatMessages(prevChatMessages => [...prevChatMessages, botMessageElement]); 145 | props.setAi21Messages(prevChatMessages => [...prevChatMessages, botMessageElement]); 146 | }) 147 | .catch(error => { 148 | console.error('Error:', error); 149 | }).finally(() => { 150 | setIsBuffering(false) 151 | }); 152 | 153 | }; 154 | 155 | 156 | const formatBotResponse = (response) => { 157 | const regexNumberedList = /^\d+\.\s/; 158 | const regexBulletedList = /^[\-\*\+\•]/; 159 | const regexURL = /\b(?:https?:\/\/|www\.)\S+\b/gi; // Improved URL regex 160 | const lines = response.trim().split('\n'); 161 | 162 | let listType = ''; 163 | let result = ''; 164 | 165 | lines.forEach(line => { 166 | if (regexNumberedList.test(line)) { 167 | if (!listType) { 168 | listType = 'ol'; 169 | result += `
    `; 170 | } 171 | line = `
  1. ${line.replace(regexNumberedList, '')}
  2. `; 172 | } else if (regexBulletedList.test(line)) { 173 | if (!listType) { 174 | listType = 'ul'; 175 | result += `
      `; 176 | } 177 | line = `
    • ${line.replace(regexBulletedList, '')}
    • `; 178 | } else { 179 | if (listType) { 180 | result += ``; 181 | listType = ''; 182 | } 183 | } 184 | 185 | line = line.replace(regexURL, '$&'); 186 | result += `${line}\n`; 187 | }); 188 | 189 | if (listType) { 190 | result += ``; 191 | } 192 | 193 | return result; 194 | }; 195 | 196 | const clearChatHistory = () => { 197 | setChatMessages([ 198 | { author: `${selectedModel.split(':')[1]} Bot`, message: `Welcome to the ${selectedModel} Chatbot!` } 199 | ]) 200 | props.setAnthropicMessages([ 201 | { author: `${selectedModel.split(':')[1]} Bot`, message: `Welcome to the ${selectedModel} Chatbot!` } 202 | ]) 203 | } 204 | 205 | const clearChatHistoryCrawl = () => { 206 | setCrawlPrompt('') 207 | setCrawlResponse('') 208 | } 209 | 210 | const crawlReindex = async () => { 211 | try { 212 | setIsBufferingReindex(true); 213 | setCrawlPrompt(''); 214 | setCrawlResponse(''); 215 | setReindexResponse('Crawling data sources and Reindexing the Vector Database... This may take a moment...'); 216 | 217 | let payload = { 218 | prompt: JSON.stringify({ 219 | prompt: userInputCrawl 220 | }) 221 | }; 222 | 223 | // Perform first API call 224 | const response1 = await fetch(api_root_url + crawlMethod, { 225 | method: 'POST', 226 | headers: { 227 | 'Content-Type': 'application/json' 228 | }, 229 | body: JSON.stringify(payload) 230 | }); 231 | 232 | const json1 = await response1.json(); 233 | console.log(json1); 234 | setReindexResponse(json1.response_text); 235 | 236 | // Perform second API call 237 | const response2 = await fetch(api_root_url + reindexMethod, { 238 | method: 'POST', 239 | headers: { 240 | 'Content-Type': 'application/json' 241 | }, 242 | body: JSON.stringify(payload) 243 | }); 244 | 245 | const json2 = await response2.json(); 246 | console.log(json2); 247 | setReindexResponse(json2.response_text); 248 | } catch (error) { 249 | console.error('Error:', error); 250 | } finally { 251 | setIsBufferingReindex(false); 252 | checkVector() 253 | console.log(reindexResponse); 254 | } 255 | } 256 | 257 | const SelectedFilesMessage = ({ selectedFiles }) => ( 258 |

      {selectedFiles.length} files selected

      259 | ); 260 | 261 | const NoFileMessage = ({ selectedFiles }) => ( 262 |

      {selectedFiles.length === 0 ? 'No file chosen' : ''}

      263 | ); 264 | 265 | const handleSetVector = (vector) => { 266 | console.log(vector) 267 | setCurrentVector(vector) 268 | syncVectorLocal(vector) 269 | } 270 | 271 | const syncVectorLocal = async (vector) => { 272 | props.setCurrentVector(vector) 273 | }; 274 | 275 | const handleFileUpload = (event) => { 276 | const files = event.target.files; 277 | 278 | let payload = { 279 | prompt: JSON.stringify({ 280 | prompt: 'ignore, dummy data' 281 | }) 282 | } 283 | 284 | if (files.length > 0) { 285 | setIsBufferingReindex(true); 286 | setSelectedFiles(Array.from(files)); // Update selectedFiles state 287 | 288 | const formData = new FormData(); 289 | for (let i = 0; i < files.length; i++) { 290 | formData.append('pdfFiles', files[i]); 291 | } 292 | 293 | fetch(api_root_url + '/api/upload-pdfs', { 294 | method: 'POST', 295 | body: formData, 296 | }) 297 | .then(response => response.json()) 298 | .then(response => { 299 | // Handle the response, e.g., display a success message 300 | console.log(response); 301 | if (response.response_text) { 302 | setReindexResponse(response.response_text); 303 | 304 | // Clear the success message after 3 seconds 305 | setTimeout(() => { 306 | setUploadSuccessMessage(''); 307 | }, 3000); 308 | setSelectedFiles([]); 309 | } 310 | }) 311 | .catch(error => { 312 | console.error('Error:', error); 313 | }) 314 | .finally(() => { 315 | fetch(api_root_url + reindexMethod, { 316 | method: 'POST', 317 | headers: { 318 | 'Content-Type': 'application/json' 319 | }, 320 | body: JSON.stringify(payload) 321 | }) 322 | .then(response => response.json()) 323 | .then(response => { 324 | console.log(response) 325 | setReindexResponse(response.response_text) 326 | }) 327 | .catch(error => { 328 | console.error('Error:', error); 329 | }).finally(() => { 330 | setIsBufferingReindex(false) 331 | checkVector() 332 | console.log(reindexResponse) 333 | }); 334 | }); 335 | } 336 | }; 337 | 338 | const downloadS3 = (event) => { 339 | let payload = { 340 | profile_name: userInputProfile, 341 | location: userInputS3Path 342 | } 343 | 344 | setIsBufferingReindex(true); 345 | setReindexResponse('Downloading files from the S3 location. Depending on how many files are present, it may take a while...') 346 | 347 | fetch(api_root_url + '/api/download-s3', { 348 | method: 'POST', 349 | headers: { 350 | 'Content-Type': 'application/json' 351 | }, 352 | body: JSON.stringify(payload) 353 | }) 354 | .then(response => response.json()) 355 | .then(response => { 356 | // Handle the response, e.g., display a success message 357 | console.log(response); 358 | if (response.response_text) { 359 | setReindexResponse(response.response_text); 360 | } 361 | }) 362 | .catch(error => { 363 | console.error('Error:', error); 364 | }) 365 | .finally(() => { 366 | fetch(api_root_url + reindexMethod, { 367 | method: 'POST', 368 | headers: { 369 | 'Content-Type': 'application/json' 370 | }, 371 | body: JSON.stringify(payload) 372 | }) 373 | .then(response => response.json()) 374 | .then(response => { 375 | console.log(response) 376 | setReindexResponse(response.response_text) 377 | }) 378 | .catch(error => { 379 | console.error('Error:', error); 380 | }).finally(() => { 381 | setIsBufferingReindex(false) 382 | checkVector() 383 | console.log(reindexResponse) 384 | }); 385 | }); 386 | } 387 | 388 | const handleVectorModalBack = (event) => { 389 | setVectorSelection(null) 390 | setReindexResponse('') 391 | } 392 | 393 | const instantiateKendra = (event) => { 394 | console.log('connecting to kendra') 395 | setIsBufferingReindex(true) 396 | setCrawlPrompt('') 397 | setCrawlResponse('') 398 | setReindexResponse('Connecting to Kendra Index...') 399 | 400 | let payload = { 401 | profile_name: userInputProfile, 402 | index_id: userInputKendraId 403 | } 404 | 405 | fetch(api_root_url + '/api/instantiate-kendra', { 406 | method: 'POST', 407 | headers: { 408 | 'Content-Type': 'application/json' 409 | }, 410 | body: JSON.stringify(payload) 411 | }) 412 | .then(response => response.json()) 413 | .then(response => { 414 | console.log(response) 415 | setReindexResponse(response.response_text) 416 | }) 417 | .catch(error => { 418 | console.error('Error:', error); 419 | }).finally(() => { 420 | console.log(reindexResponse) 421 | setIsBufferingReindex(false) 422 | checkVector() 423 | setKendraInstantiated(true) 424 | props.setKendraInstantiated(true) 425 | }); 426 | } 427 | 428 | const updatePromptTemplate = () => { 429 | setIsBuffering(true); 430 | 431 | let payload = { 432 | prompt_template: promptTemplate 433 | }; 434 | props.setPromptTemplate(promptTemplate) 435 | 436 | fetch(api_root_url + '/api/update-prompt-template', { 437 | method: 'POST', 438 | headers: { 439 | 'Content-Type': 'application/json' 440 | }, 441 | body: JSON.stringify(payload) 442 | }) 443 | .then(response => response.json()) 444 | .then(response => { 445 | setPromptTemplateResponse(response.response_text); 446 | 447 | }) 448 | .catch(error => { 449 | console.error('Error:', error); 450 | }).finally(() => { 451 | setIsBuffering(false) 452 | 453 | }); 454 | }; 455 | 456 | return ( 457 |
      458 |
      459 | {chatMessages.map((message, index) => ( 460 |

      461 | 462 |

      463 | ))} 464 |
      465 | {isBuffering && 466 |
      467 |
      468 |
      469 |
      470 |
      471 | } 472 |
      473 | 474 |