├── .checkov.yaml ├── .flake8 ├── .gitignore ├── .pre-commit-config.yaml ├── .pydocstlye ├── AUTHORS ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── THIRD_PARTY_LICENSES ├── media └── bedrock-rag-template.drawio.svg ├── poetry.lock ├── pyproject.toml ├── python ├── src │ └── handlers │ │ └── data_ingestion_processor │ │ ├── Dockerfile │ │ ├── handler.py │ │ └── requirements.txt └── tests │ ├── events │ └── bucket_notification.json │ ├── handlers │ └── test_data_ingestion_processor.py │ └── test_data_ingestion.py ├── rag_demo.ipynb └── terraform ├── commons.tfvars ├── data.tf ├── locals.tf ├── main.tf ├── policies ├── data_ingestion_processor.json └── kms.json └── variables.tf /.checkov.yaml: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | compact: true 4 | download-external-modules: true 5 | evaluate-variables: true 6 | framework: 7 | - all 8 | quiet: true 9 | skip-check: [] 10 | #skip-fixes: true 11 | skip-framework: 12 | - dockerfile 13 | - openapi 14 | skip-download: true 15 | soft-fail: false 16 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | [flake8] 4 | ignore = E203, E501, W503, E731, BLK100, F811, W605, C901 5 | max-line-length = 120 6 | max-complexity = 17 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Local .terraform directories 10 | **/.terraform/* 11 | 12 | # .tfstate files 13 | *.tfstate 14 | *.tfstate.* 15 | 16 | 17 | #ignore terraform/providers folder 18 | .plugin-cache 19 | 20 | **/.terraform.lock.hcl 21 | **/plan.out 22 | **terraform/builds/ 23 | 24 | *.tfvars 25 | !terraform/commons.tfvars 26 | 27 | # Distribution / packaging 28 | .Python 29 | build/ 30 | develop-eggs/ 31 | dist/ 32 | downloads/ 33 | eggs/ 34 | .eggs/ 35 | lib/ 36 | lib64/ 37 | parts/ 38 | sdist/ 39 | var/ 40 | wheels/ 41 | share/python-wheels/ 42 | *.egg-info/ 43 | .installed.cfg 44 | *.egg 45 | MANIFEST 46 | 47 | # PyInstaller 48 | # Usually these files are written by a python script from a template 49 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 50 | *.manifest 51 | *.spec 52 | 53 | # Installer logs 54 | pip-log.txt 55 | pip-delete-this-directory.txt 56 | 57 | # Unit test / coverage reports 58 | htmlcov/ 59 | .tox/ 60 | .nox/ 61 | .coverage 62 | .coverage.* 63 | .cache 64 | nosetests.xml 65 | coverage.xml 66 | *.cover 67 | *.py,cover 68 | .hypothesis/ 69 | .pytest_cache/ 70 | cover/ 71 | 72 | # Translations 73 | *.mo 74 | *.pot 75 | 76 | # Django stuff: 77 | *.log 78 | local_settings.py 79 | db.sqlite3 80 | db.sqlite3-journal 81 | 82 | # Flask stuff: 83 | instance/ 84 | .webassets-cache 85 | 86 | # Scrapy stuff: 87 | .scrapy 88 | 89 | # Sphinx documentation 90 | docs/_build/ 91 | 92 | # PyBuilder 93 | .pybuilder/ 94 | target/ 95 | 96 | # Jupyter Notebook 97 | .ipynb_checkpoints 98 | 99 | # IPython 100 | profile_default/ 101 | ipython_config.py 102 | 103 | .pdm.toml 104 | 105 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 106 | __pypackages__/ 107 | 108 | # Celery stuff 109 | celerybeat-schedule 110 | celerybeat.pid 111 | 112 | # SageMath parsed files 113 | *.sage.py 114 | 115 | # Environments 116 | .env 117 | .venv 118 | env/ 119 | venv/ 120 | ENV/ 121 | env.bak/ 122 | venv.bak/ 123 | 124 | # Spyder project settings 125 | .spyderproject 126 | .spyproject 127 | 128 | # Rope project settings 129 | .ropeproject 130 | 131 | # mkdocs documentation 132 | /site 133 | 134 | # mypy 135 | .mypy_cache/ 136 | .dmypy.json 137 | dmypy.json 138 | 139 | # Pyre type checker 140 | .pyre/ 141 | 142 | # pytype static type analyzer 143 | .pytype/ 144 | 145 | # Cython debug symbols 146 | cython_debug/ 147 | 148 | .DS_Store 149 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | repos: 4 | - repo: https://github.com/pre-commit/pre-commit-hooks 5 | rev: v4.6.0 6 | hooks: 7 | # Git style 8 | - id: check-added-large-files 9 | - id: check-merge-conflict 10 | - id: check-vcs-permalinks 11 | - id: forbid-new-submodules 12 | - id: no-commit-to-branch 13 | - id: check-ast 14 | - id: check-builtin-literals 15 | - id: check-case-conflict 16 | 17 | # Common errors 18 | - id: end-of-file-fixer 19 | exclude: ^(Documentation/.*|.*.drawio)$ 20 | - id: trailing-whitespace 21 | args: [--markdown-linebreak-ext=md] 22 | exclude: ^(Documentation/.*|.*.drawio)$ 23 | - id: check-yaml 24 | args: [--allow-multiple-documents] 25 | exclude: ^(bootstrap/.*.yaml|bootstrap/.*.yml)$ 26 | - id: check-merge-conflict 27 | - id: check-executables-have-shebangs 28 | - id: detect-private-key 29 | # Cross platform 30 | - id: check-case-conflict 31 | - id: mixed-line-ending 32 | args: [--fix=lf] 33 | 34 | # Security 35 | - id: detect-aws-credentials 36 | args: ["--allow-missing-credentials"] 37 | - id: detect-private-key 38 | 39 | 40 | - repo: https://github.com/jorisroovers/gitlint 41 | rev: v0.19.1 42 | hooks: 43 | - id: gitlint 44 | stages: [commit-msg] 45 | - repo: https://github.com/asottile/add-trailing-comma 46 | rev: v3.1.0 47 | hooks: 48 | - id: add-trailing-comma 49 | 50 | - repo: https://github.com/detailyang/pre-commit-shell 51 | rev: v1.0.6 52 | hooks: 53 | - id: shell-lint 54 | 55 | # ---- Terraform ---- 56 | - repo: https://github.com/antonbabenko/pre-commit-terraform 57 | rev: v1.89.1 58 | hooks: 59 | - id: terraform_fmt 60 | - id: terraform_validate 61 | exclude: modules/.*/[^/]+$ 62 | - repo: https://github.com/bridgecrewio/checkov 63 | rev: 3.2.98 64 | hooks: 65 | - id: checkov 66 | name: Checkov 67 | description: This hook runs checkov. 68 | entry: checkov -d . --quiet --config-file .checkov.yaml 69 | language: python 70 | pass_filenames: false 71 | always_run: false 72 | files: \.tf$ 73 | exclude: \.+.terraform\/.*$ 74 | require_serial: true 75 | # ---- Python ---- 76 | - repo: https://github.com/pycqa/flake8 77 | rev: "6.0.0" 78 | hooks: 79 | - id: flake8 80 | additional_dependencies: 81 | - flake8-black>=0.1.1 82 | - repo: https://github.com/psf/black 83 | rev: 23.1.0 84 | hooks: 85 | - id: black 86 | language_version: python3.12 87 | exclude: .terragrunt-cache 88 | 89 | - repo: https://github.com/pycqa/pydocstyle 90 | rev: 6.3.0 91 | hooks: 92 | - id: pydocstyle 93 | args: 94 | - "--ignore=D100, D213, D400, D415, D212, D200, D104, D407, D413, D406, D203" 95 | - repo: https://github.com/PyCQA/isort 96 | rev: 5.13.2 97 | hooks: 98 | - id: isort 99 | name: isort 100 | args: 101 | - --profile 102 | - black 103 | - --filter-files 104 | default_language_version: 105 | python: python3.12 106 | default_stages: [pre-commit, pre-push] 107 | -------------------------------------------------------------------------------- /.pydocstlye: -------------------------------------------------------------------------------- 1 | [pydocstyle] 2 | convention = google 3 | add-ignore = D100,D212,D400,D415,D205 4 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | // SPDX-License-Identifier: MIT-0 3 | 4 | Alice Morano 5 | Julian Grüber 6 | Martin Maritsch 7 | Nicolas Jacob Baer 8 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *main* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT No Attribution 2 | 3 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so. 10 | 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 13 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 14 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 15 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 16 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 17 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Terraform RAG template with Amazon Bedrock 2 | 3 | This repository contains a Terraform implementation of a simple Retrieval-Augmented Generation (RAG) use case using [Amazon Titan V2](https://docs.aws.amazon.com/bedrock/latest/userguide/titan-embedding-models.html) as the embedding model and [Claude 3](https://aws.amazon.com/de/bedrock/claude/) as the text generation model, both on [Amazon Bedrock](https://docs.aws.amazon.com/bedrock/latest/userguide/what-is-bedrock.html). This sample follows the user journey described below: 4 | 5 | 1. The user manually uploads a file to Amazon S3, such as a Microsoft Excel or PDF document. The supported file types can be found here. 6 | 2. The content of the file is extracted and embedded into a knowledge database based on a serverless [Amazon Aurora with PostgreSQL](https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/Aurora.AuroraPostgreSQL.html). 7 | 3. When the user engages with the text generation model, it utilizes previously uploaded files to enhance the interaction through retrieval augmentation. 8 | 9 | 10 | ## Architecture 11 | 12 | 13 | ![](/media/bedrock-rag-template.drawio.svg) 14 | 15 | 16 | 1. Whenever an object is created in the [Amazon S3 bucket](https://docs.aws.amazon.com/AmazonS3/latest/userguide/Welcome.html) `bedrock-rag-template-`, an [Amazon S3 notification](https://docs.aws.amazon.com/AmazonS3/latest/userguide/EventNotifications.html) invokes the [Amazon Lambda function](https://docs.aws.amazon.com/lambda/latest/dg/welcome.html) `data-ingestion-processor`. 17 | 18 | 2. The Amazon Lambda function `data-ingestion-processor` is based on a Docker image stored in the [Amazon ECR repository](https://docs.aws.amazon.com/AmazonECR/latest/userguide/what-iss-ecr.html) `bedrock-rag-template`. The function uses the [LangChain S3FileLoader](https://python.langchain.com/v0.1/docs/integrations/document_loaders/aws_s3_file/) to read the file as a [LangChain Document](https://api.python.langchain.com/en/v0.0.339/schema/langchain.schema.document.Document.html). Then, the [LangChain RecursiveTextSplitter](https://python.langchain.com/v0.1/docs/modules/data_connection/document_transformers/recursive_text_splitter/) chunks each document, given a `CHUNK_SIZE` and a `CHUNK_OVERLAP` which depends on the max token size of the embedding model, the Amazon Titan Text Embedding V2. Next, the Lambda function invokes the embedding model on [Amazon Bedrock](https://docs.aws.amazon.com/bedrock/latest/userguide/what-is-bedrock.html) to embed the chunks into numerical vector representations. Lastly, these vectors are stored in the Amazon Aurora PostgreSQL database. To access the Amazon Aurora database, the Lambda function first retrieves the username and password from Amazon Secrets Manager. 19 | 20 | 3. On the [Amazon SageMaker notebook instance](https://docs.aws.amazon.com/sagemaker/latest/dg/nbi.html) `aws-sample-bedrock-rag-template`, the user can write a question prompt. The code invokes Claude 3 on Amazon Bedrock and provides the knowledge base information to the context of the prompt. As a result, Claude 3 answers using the information in the documents. 21 | 22 | 23 | ### Networking & Security 24 | 25 | The Amazon Lambda function `data-ingestion-processor` resides in a private subnet within the VPC and it is not allowed to send traffic to the public internet due its security group. As a result, the traffic to Amazon S3 and Amazon Bedrock is routed through the VPC endpoints only. Consequently, the traffic does not traverse the public internet, which reduces latency and adds an additional layer of security at the networking level. 26 | 27 | All the resources and data are encrypted whenever applicable using the Amazon KMS Key with the alias `aws-sample/bedrock-rag-template`. 28 | 29 | While this sample can be deployed into any AWS Region, we recommend to use `us-east-1` or `us-west-1` due to the availability of foundation and embedding models in Amazon Bedrock at the time of publishing (see [Model support by AWS Region](https://docs.aws.amazon.com/bedrock/latest/userguide/models-regions.html) for an updated list of Amazon Bedrock foundation model support in AWS Regions). See the section [Next steps](#next-steps) which provides pointers on how to use this solution with other AWS Regions. 30 | 31 | 32 | ## Prerequisites 33 | 34 | ### Amazon Web Services 35 | 36 | To run this sample, make sure that you have an active AWS account and that you have access to a sufficiently strong IAM role in the Management console and in the CLI. 37 | 38 | [Enable model access](https://docs.aws.amazon.com/bedrock/latest/userguide/model-access.html) for the required LLMs in the Amazon Bedrock Console of your AWS account. 39 | The following models are needed for this example: 40 | 41 | * `amazon.titan-embed-text-v2:0` 42 | * `anthropic.claude-3-sonnet-20240229-v1:0` 43 | 44 | ### Required software 45 | 46 | The following software tools are required in order to deploy this repository: 47 | 48 | * [Terraform](https://www.terraform.io/): 49 | 50 | ```shell 51 | ❯ terraform --version 52 | Terraform v1.8.4 53 | on linux_amd64 54 | + provider registry.terraform.io/hashicorp/aws v5.50.0 55 | + provider registry.terraform.io/hashicorp/external v2.3.3 56 | + provider registry.terraform.io/hashicorp/local v2.5.1 57 | + provider registry.terraform.io/hashicorp/null v3.2.2 58 | ``` 59 | 60 | * [Docker](https://docs.docker.com/manuals/) 61 | 62 | ```shell 63 | ❯ docker --version 64 | Docker version 26.0.0, build 2ae903e86c 65 | ``` 66 | 67 | * [Poetry](https://python-poetry.org/) 68 | 69 | ```shell 70 | ❯ poetry --version 71 | Poetry (version 1.7.1) 72 | ``` 73 | 74 | * [Python3.10](https://www.python.org/downloads/release/python-3100/) 75 | 76 | * [AWS CLI](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html) 77 | 78 | 79 | ## Deployment 80 | 81 | This sections explains how to deploy the infrastructure and how to run the demo in a Jupyter notebook. 82 | > **Warning:** The following actions are going to cause costs in the deployed AWS Account. 83 | 84 | 85 | ### Credentials 86 | 87 | To deploy this sample, [put the credentials as environment variables](https://docs.aws.amazon.com/cli/v1/userguide/cli-configure-envvars.html#envvars-set) or configure the cli directly. 88 | To test whether setting the credentials was successfully run `aws sts get-caller-identity`. The output should contain the ARN of the user or role that you are signed in as. 89 | 90 | ### Infrastructure 91 | 92 | To deploy the entire infrastructure, run the following commands: 93 | 94 | ```shell 95 | cd terraform 96 | terraform init 97 | terraform plan -var-file=commons.tfvars 98 | terraform apply -var-file=commons.tfvars 99 | ``` 100 | 101 | 102 | ### Demo in the Jupyter notebook 103 | 104 | The end to end demo is presented inside the Jupyter notebook. Follow the steps below to run the demo by yourself. 105 | 106 | #### Preparation 107 | 108 | The infrastructure deployment provisions an Amazon SageMaker notebook instance inside the VPC and with the permissions to access the PostgreSQL Aurora database. Once the previous infrastructure deployment has succeeded, follow the subsequent steps to run the demo in a Jupyter notebook: 109 | 110 | 1. Log into the AWS management console of the account where the infrastructure is deployed 111 | 2. Open the SageMaker notebook instance `aws-sample-bedrock-rag-template`. 112 | 3. Move the [rag_demo.ipynb](/rag_demo.ipynb) Jupyter notebook onto the SageMaker notebook instance via drag & drop. 113 | 4. Open the [rag_demo.ipynb](/rag_demo.ipynb) on the SageMaker notebook instance and choose the `conda_python3` kernel. 114 | 5. Run the cells of the notebook to run the demo. 115 | 116 | #### Running the demo 117 | 118 | The Jupyter notebook guides the reader through the following process: 119 | 120 | - Installing requirements 121 | - Embedding definition 122 | - Database connection 123 | - Data ingestion 124 | - Retrieval augmented text generation 125 | - Relevant document queries 126 | 127 | 128 | ### Clean up 129 | 130 | To destroy the infrastructure run `terraform destroy -var-file=commons.tfvars`. 131 | 132 | 133 | ## Testing 134 | 135 | ### Prerequisites - virtual Python environment 136 | 137 | Make sure that the dependencies in the [pyproject.toml](/pyproject.toml) are aligned with the [requirements](/python/src/handlers/data_ingestion_processor/requirements.txt) of the Amazon Lambda `data-ingestion-processor`. 138 | 139 | Install the dependencies and active the virtual environment: 140 | 141 | ```shell 142 | poetry lock 143 | poetry install 144 | poetry shell 145 | 146 | ``` 147 | 148 | ### Run the test 149 | 150 | 151 | ```shell 152 | python -m pytest . 153 | ``` 154 | 155 | 156 | 157 | ## Next steps 158 | 159 | 160 | ### Deployment to other AWS Regions 161 | 162 | There are two possible ways to deploy this stack to AWS Regions other than `us-east-1` and `us-west-1`. You can configure the deployment AWS Region in the [`commons.tfvars`](/terraform/commons.tfvars) file. For cross-region foundation model access, consider the following options: 163 | 164 | 1. **Traversing the public internet**: if the traffic can traverse the public the public internet, add internet gateways to the VPC and adjust the security group assigned to the Amazon Lambda function `data-ingestion-processor` and the SageMaker notebook instance to allow outbound traffic to the public internet. 165 | 2. **NOT traversing the public internet**: deploy this sample to any AWS Region different from `us-east-1` or `us-west-1`. In `us-east-1` or `us-west-1`, create an additional VPC including a VPC endpoint for `bedrock-runtime`. Then, peer the VPC using a VPC peering or a transit gateway to the application VPC. Lastly, when configuring the `bedrock-runtime` boto3 client in any AWS Lambda function outside of `us-east-1` or `us-west-1`, pass the private DNS name of the VPC endpoint for `bedrock-runtime` in `us-east-1` or `us-west-1` as `endpoint_url` to the boto3 client. For the VPC peering solution, one can leverage the module [Terraform AWS VPC Peering](https://github.com/grem11n/terraform-aws-vpc-peering). 166 | 167 | ## Dependencies and Licenses 168 | 169 | This project is licensed under the MIT License - see the `LICENSE` file for details. 170 | 171 | ### Dependencies 172 | 173 | * [AWS Lambda Terraform module](https://registry.terraform.io/modules/terraform-aws-modules/lambda/aws/latest) 174 | * [Terraform AWS Provider](https://registry.terraform.io/providers/hashicorp/aws/latest) 175 | * [Terraform](https://developer.hashicorp.com/terraform) 176 | * [Docker Engine](https://docs.docker.com/engine/) 177 | 178 | 179 | ## Security 180 | 181 | See [CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications) for more information. 182 | 183 | ## License 184 | 185 | This library is licensed under the MIT-0 License. See the LICENSE file. 186 | -------------------------------------------------------------------------------- /THIRD_PARTY_LICENSES: -------------------------------------------------------------------------------- 1 | 2 | ------- AWS Lambda Terraform module ------- 3 | Apache License 4 | Version 2.0, January 2004 5 | http://www.apache.org/licenses/ 6 | 7 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 8 | 9 | 1. Definitions. 10 | 11 | "License" shall mean the terms and conditions for use, reproduction, 12 | and distribution as defined by Sections 1 through 9 of this document. 13 | 14 | "Licensor" shall mean the copyright owner or entity authorized by 15 | the copyright owner that is granting the License. 16 | 17 | "Legal Entity" shall mean the union of the acting entity and all 18 | other entities that control, are controlled by, or are under common 19 | control with that entity. For the purposes of this definition, 20 | "control" means (i) the power, direct or indirect, to cause the 21 | direction or management of such entity, whether by contract or 22 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 23 | outstanding shares, or (iii) beneficial ownership of such entity. 24 | 25 | "You" (or "Your") shall mean an individual or Legal Entity 26 | exercising permissions granted by this License. 27 | 28 | "Source" form shall mean the preferred form for making modifications, 29 | including but not limited to software source code, documentation 30 | source, and configuration files. 31 | 32 | "Object" form shall mean any form resulting from mechanical 33 | transformation or translation of a Source form, including but 34 | not limited to compiled object code, generated documentation, 35 | and conversions to other media types. 36 | 37 | "Work" shall mean the work of authorship, whether in Source or 38 | Object form, made available under the License, as indicated by a 39 | copyright notice that is included in or attached to the work 40 | (an example is provided in the Appendix below). 41 | 42 | "Derivative Works" shall mean any work, whether in Source or Object 43 | form, that is based on (or derived from) the Work and for which the 44 | editorial revisions, annotations, elaborations, or other modifications 45 | represent, as a whole, an original work of authorship. For the purposes 46 | of this License, Derivative Works shall not include works that remain 47 | separable from, or merely link (or bind by name) to the interfaces of, 48 | the Work and Derivative Works thereof. 49 | 50 | "Contribution" shall mean any work of authorship, including 51 | the original version of the Work and any modifications or additions 52 | to that Work or Derivative Works thereof, that is intentionally 53 | submitted to Licensor for inclusion in the Work by the copyright owner 54 | or by an individual or Legal Entity authorized to submit on behalf of 55 | the copyright owner. For the purposes of this definition, "submitted" 56 | means any form of electronic, verbal, or written communication sent 57 | to the Licensor or its representatives, including but not limited to 58 | communication on electronic mailing lists, source code control systems, 59 | and issue tracking systems that are managed by, or on behalf of, the 60 | Licensor for the purpose of discussing and improving the Work, but 61 | excluding communication that is conspicuously marked or otherwise 62 | designated in writing by the copyright owner as "Not a Contribution." 63 | 64 | "Contributor" shall mean Licensor and any individual or Legal Entity 65 | on behalf of whom a Contribution has been received by Licensor and 66 | subsequently incorporated within the Work. 67 | 68 | 2. Grant of Copyright License. Subject to the terms and conditions of 69 | this License, each Contributor hereby grants to You a perpetual, 70 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 71 | copyright license to reproduce, prepare Derivative Works of, 72 | publicly display, publicly perform, sublicense, and distribute the 73 | Work and such Derivative Works in Source or Object form. 74 | 75 | 3. Grant of Patent License. Subject to the terms and conditions of 76 | this License, each Contributor hereby grants to You a perpetual, 77 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 78 | (except as stated in this section) patent license to make, have made, 79 | use, offer to sell, sell, import, and otherwise transfer the Work, 80 | where such license applies only to those patent claims licensable 81 | by such Contributor that are necessarily infringed by their 82 | Contribution(s) alone or by combination of their Contribution(s) 83 | with the Work to which such Contribution(s) was submitted. If You 84 | institute patent litigation against any entity (including a 85 | cross-claim or counterclaim in a lawsuit) alleging that the Work 86 | or a Contribution incorporated within the Work constitutes direct 87 | or contributory patent infringement, then any patent licenses 88 | granted to You under this License for that Work shall terminate 89 | as of the date such litigation is filed. 90 | 91 | 4. Redistribution. You may reproduce and distribute copies of the 92 | Work or Derivative Works thereof in any medium, with or without 93 | modifications, and in Source or Object form, provided that You 94 | meet the following conditions: 95 | 96 | (a) You must give any other recipients of the Work or 97 | Derivative Works a copy of this License; and 98 | 99 | (b) You must cause any modified files to carry prominent notices 100 | stating that You changed the files; and 101 | 102 | (c) You must retain, in the Source form of any Derivative Works 103 | that You distribute, all copyright, patent, trademark, and 104 | attribution notices from the Source form of the Work, 105 | excluding those notices that do not pertain to any part of 106 | the Derivative Works; and 107 | 108 | (d) If the Work includes a "NOTICE" text file as part of its 109 | distribution, then any Derivative Works that You distribute must 110 | include a readable copy of the attribution notices contained 111 | within such NOTICE file, excluding those notices that do not 112 | pertain to any part of the Derivative Works, in at least one 113 | of the following places: within a NOTICE text file distributed 114 | as part of the Derivative Works; within the Source form or 115 | documentation, if provided along with the Derivative Works; or, 116 | within a display generated by the Derivative Works, if and 117 | wherever such third-party notices normally appear. The contents 118 | of the NOTICE file are for informational purposes only and 119 | do not modify the License. You may add Your own attribution 120 | notices within Derivative Works that You distribute, alongside 121 | or as an addendum to the NOTICE text from the Work, provided 122 | that such additional attribution notices cannot be construed 123 | as modifying the License. 124 | 125 | You may add Your own copyright statement to Your modifications and 126 | may provide additional or different license terms and conditions 127 | for use, reproduction, or distribution of Your modifications, or 128 | for any such Derivative Works as a whole, provided Your use, 129 | reproduction, and distribution of the Work otherwise complies with 130 | the conditions stated in this License. 131 | 132 | 5. Submission of Contributions. Unless You explicitly state otherwise, 133 | any Contribution intentionally submitted for inclusion in the Work 134 | by You to the Licensor shall be under the terms and conditions of 135 | this License, without any additional terms or conditions. 136 | Notwithstanding the above, nothing herein shall supersede or modify 137 | the terms of any separate license agreement you may have executed 138 | with Licensor regarding such Contributions. 139 | 140 | 6. Trademarks. This License does not grant permission to use the trade 141 | names, trademarks, service marks, or product names of the Licensor, 142 | except as required for reasonable and customary use in describing the 143 | origin of the Work and reproducing the content of the NOTICE file. 144 | 145 | 7. Disclaimer of Warranty. Unless required by applicable law or 146 | agreed to in writing, Licensor provides the Work (and each 147 | Contributor provides its Contributions) on an "AS IS" BASIS, 148 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 149 | implied, including, without limitation, any warranties or conditions 150 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 151 | PARTICULAR PURPOSE. You are solely responsible for determining the 152 | appropriateness of using or redistributing the Work and assume any 153 | risks associated with Your exercise of permissions under this License. 154 | 155 | 8. Limitation of Liability. In no event and under no legal theory, 156 | whether in tort (including negligence), contract, or otherwise, 157 | unless required by applicable law (such as deliberate and grossly 158 | negligent acts) or agreed to in writing, shall any Contributor be 159 | liable to You for damages, including any direct, indirect, special, 160 | incidental, or consequential damages of any character arising as a 161 | result of this License or out of the use or inability to use the 162 | Work (including but not limited to damages for loss of goodwill, 163 | work stoppage, computer failure or malfunction, or any and all 164 | other commercial damages or losses), even if such Contributor 165 | has been advised of the possibility of such damages. 166 | 167 | 9. Accepting Warranty or Additional Liability. While redistributing 168 | the Work or Derivative Works thereof, You may choose to offer, 169 | and charge a fee for, acceptance of support, warranty, indemnity, 170 | or other liability obligations and/or rights consistent with this 171 | License. However, in accepting such obligations, You may act only 172 | on Your own behalf and on Your sole responsibility, not on behalf 173 | of any other Contributor, and only if You agree to indemnify, 174 | defend, and hold each Contributor harmless for any liability 175 | incurred by, or claims asserted against, such Contributor by reason 176 | of your accepting any such warranty or additional liability. 177 | 178 | END OF TERMS AND CONDITIONS 179 | 180 | 181 | 182 | ------- Terraform AWS Provider ------- 183 | 184 | Copyright (c) 2017 HashiCorp, Inc. 185 | 186 | Mozilla Public License Version 2.0 187 | ================================== 188 | 189 | 1. Definitions 190 | -------------- 191 | 192 | 1.1. "Contributor" 193 | means each individual or legal entity that creates, contributes to 194 | the creation of, or owns Covered Software. 195 | 196 | 1.2. "Contributor Version" 197 | means the combination of the Contributions of others (if any) used 198 | by a Contributor and that particular Contributor's Contribution. 199 | 200 | 1.3. "Contribution" 201 | means Covered Software of a particular Contributor. 202 | 203 | 1.4. "Covered Software" 204 | means Source Code Form to which the initial Contributor has attached 205 | the notice in Exhibit A, the Executable Form of such Source Code 206 | Form, and Modifications of such Source Code Form, in each case 207 | including portions thereof. 208 | 209 | 1.5. "Incompatible With Secondary Licenses" 210 | means 211 | 212 | (a) that the initial Contributor has attached the notice described 213 | in Exhibit B to the Covered Software; or 214 | 215 | (b) that the Covered Software was made available under the terms of 216 | version 1.1 or earlier of the License, but not also under the 217 | terms of a Secondary License. 218 | 219 | 1.6. "Executable Form" 220 | means any form of the work other than Source Code Form. 221 | 222 | 1.7. "Larger Work" 223 | means a work that combines Covered Software with other material, in 224 | a separate file or files, that is not Covered Software. 225 | 226 | 1.8. "License" 227 | means this document. 228 | 229 | 1.9. "Licensable" 230 | means having the right to grant, to the maximum extent possible, 231 | whether at the time of the initial grant or subsequently, any and 232 | all of the rights conveyed by this License. 233 | 234 | 1.10. "Modifications" 235 | means any of the following: 236 | 237 | (a) any file in Source Code Form that results from an addition to, 238 | deletion from, or modification of the contents of Covered 239 | Software; or 240 | 241 | (b) any new file in Source Code Form that contains any Covered 242 | Software. 243 | 244 | 1.11. "Patent Claims" of a Contributor 245 | means any patent claim(s), including without limitation, method, 246 | process, and apparatus claims, in any patent Licensable by such 247 | Contributor that would be infringed, but for the grant of the 248 | License, by the making, using, selling, offering for sale, having 249 | made, import, or transfer of either its Contributions or its 250 | Contributor Version. 251 | 252 | 1.12. "Secondary License" 253 | means either the GNU General Public License, Version 2.0, the GNU 254 | Lesser General Public License, Version 2.1, the GNU Affero General 255 | Public License, Version 3.0, or any later versions of those 256 | licenses. 257 | 258 | 1.13. "Source Code Form" 259 | means the form of the work preferred for making modifications. 260 | 261 | 1.14. "You" (or "Your") 262 | means an individual or a legal entity exercising rights under this 263 | License. For legal entities, "You" includes any entity that 264 | controls, is controlled by, or is under common control with You. For 265 | purposes of this definition, "control" means (a) the power, direct 266 | or indirect, to cause the direction or management of such entity, 267 | whether by contract or otherwise, or (b) ownership of more than 268 | fifty percent (50%) of the outstanding shares or beneficial 269 | ownership of such entity. 270 | 271 | 2. License Grants and Conditions 272 | -------------------------------- 273 | 274 | 2.1. Grants 275 | 276 | Each Contributor hereby grants You a world-wide, royalty-free, 277 | non-exclusive license: 278 | 279 | (a) under intellectual property rights (other than patent or trademark) 280 | Licensable by such Contributor to use, reproduce, make available, 281 | modify, display, perform, distribute, and otherwise exploit its 282 | Contributions, either on an unmodified basis, with Modifications, or 283 | as part of a Larger Work; and 284 | 285 | (b) under Patent Claims of such Contributor to make, use, sell, offer 286 | for sale, have made, import, and otherwise transfer either its 287 | Contributions or its Contributor Version. 288 | 289 | 2.2. Effective Date 290 | 291 | The licenses granted in Section 2.1 with respect to any Contribution 292 | become effective for each Contribution on the date the Contributor first 293 | distributes such Contribution. 294 | 295 | 2.3. Limitations on Grant Scope 296 | 297 | The licenses granted in this Section 2 are the only rights granted under 298 | this License. No additional rights or licenses will be implied from the 299 | distribution or licensing of Covered Software under this License. 300 | Notwithstanding Section 2.1(b) above, no patent license is granted by a 301 | Contributor: 302 | 303 | (a) for any code that a Contributor has removed from Covered Software; 304 | or 305 | 306 | (b) for infringements caused by: (i) Your and any other third party's 307 | modifications of Covered Software, or (ii) the combination of its 308 | Contributions with other software (except as part of its Contributor 309 | Version); or 310 | 311 | (c) under Patent Claims infringed by Covered Software in the absence of 312 | its Contributions. 313 | 314 | This License does not grant any rights in the trademarks, service marks, 315 | or logos of any Contributor (except as may be necessary to comply with 316 | the notice requirements in Section 3.4). 317 | 318 | 2.4. Subsequent Licenses 319 | 320 | No Contributor makes additional grants as a result of Your choice to 321 | distribute the Covered Software under a subsequent version of this 322 | License (see Section 10.2) or under the terms of a Secondary License (if 323 | permitted under the terms of Section 3.3). 324 | 325 | 2.5. Representation 326 | 327 | Each Contributor represents that the Contributor believes its 328 | Contributions are its original creation(s) or it has sufficient rights 329 | to grant the rights to its Contributions conveyed by this License. 330 | 331 | 2.6. Fair Use 332 | 333 | This License is not intended to limit any rights You have under 334 | applicable copyright doctrines of fair use, fair dealing, or other 335 | equivalents. 336 | 337 | 2.7. Conditions 338 | 339 | Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted 340 | in Section 2.1. 341 | 342 | 3. Responsibilities 343 | ------------------- 344 | 345 | 3.1. Distribution of Source Form 346 | 347 | All distribution of Covered Software in Source Code Form, including any 348 | Modifications that You create or to which You contribute, must be under 349 | the terms of this License. You must inform recipients that the Source 350 | Code Form of the Covered Software is governed by the terms of this 351 | License, and how they can obtain a copy of this License. You may not 352 | attempt to alter or restrict the recipients' rights in the Source Code 353 | Form. 354 | 355 | 3.2. Distribution of Executable Form 356 | 357 | If You distribute Covered Software in Executable Form then: 358 | 359 | (a) such Covered Software must also be made available in Source Code 360 | Form, as described in Section 3.1, and You must inform recipients of 361 | the Executable Form how they can obtain a copy of such Source Code 362 | Form by reasonable means in a timely manner, at a charge no more 363 | than the cost of distribution to the recipient; and 364 | 365 | (b) You may distribute such Executable Form under the terms of this 366 | License, or sublicense it under different terms, provided that the 367 | license for the Executable Form does not attempt to limit or alter 368 | the recipients' rights in the Source Code Form under this License. 369 | 370 | 3.3. Distribution of a Larger Work 371 | 372 | You may create and distribute a Larger Work under terms of Your choice, 373 | provided that You also comply with the requirements of this License for 374 | the Covered Software. If the Larger Work is a combination of Covered 375 | Software with a work governed by one or more Secondary Licenses, and the 376 | Covered Software is not Incompatible With Secondary Licenses, this 377 | License permits You to additionally distribute such Covered Software 378 | under the terms of such Secondary License(s), so that the recipient of 379 | the Larger Work may, at their option, further distribute the Covered 380 | Software under the terms of either this License or such Secondary 381 | License(s). 382 | 383 | 3.4. Notices 384 | 385 | You may not remove or alter the substance of any license notices 386 | (including copyright notices, patent notices, disclaimers of warranty, 387 | or limitations of liability) contained within the Source Code Form of 388 | the Covered Software, except that You may alter any license notices to 389 | the extent required to remedy known factual inaccuracies. 390 | 391 | 3.5. Application of Additional Terms 392 | 393 | You may choose to offer, and to charge a fee for, warranty, support, 394 | indemnity or liability obligations to one or more recipients of Covered 395 | Software. However, You may do so only on Your own behalf, and not on 396 | behalf of any Contributor. You must make it absolutely clear that any 397 | such warranty, support, indemnity, or liability obligation is offered by 398 | You alone, and You hereby agree to indemnify every Contributor for any 399 | liability incurred by such Contributor as a result of warranty, support, 400 | indemnity or liability terms You offer. You may include additional 401 | disclaimers of warranty and limitations of liability specific to any 402 | jurisdiction. 403 | 404 | 4. Inability to Comply Due to Statute or Regulation 405 | --------------------------------------------------- 406 | 407 | If it is impossible for You to comply with any of the terms of this 408 | License with respect to some or all of the Covered Software due to 409 | statute, judicial order, or regulation then You must: (a) comply with 410 | the terms of this License to the maximum extent possible; and (b) 411 | describe the limitations and the code they affect. Such description must 412 | be placed in a text file included with all distributions of the Covered 413 | Software under this License. Except to the extent prohibited by statute 414 | or regulation, such description must be sufficiently detailed for a 415 | recipient of ordinary skill to be able to understand it. 416 | 417 | 5. Termination 418 | -------------- 419 | 420 | 5.1. The rights granted under this License will terminate automatically 421 | if You fail to comply with any of its terms. However, if You become 422 | compliant, then the rights granted under this License from a particular 423 | Contributor are reinstated (a) provisionally, unless and until such 424 | Contributor explicitly and finally terminates Your grants, and (b) on an 425 | ongoing basis, if such Contributor fails to notify You of the 426 | non-compliance by some reasonable means prior to 60 days after You have 427 | come back into compliance. Moreover, Your grants from a particular 428 | Contributor are reinstated on an ongoing basis if such Contributor 429 | notifies You of the non-compliance by some reasonable means, this is the 430 | first time You have received notice of non-compliance with this License 431 | from such Contributor, and You become compliant prior to 30 days after 432 | Your receipt of the notice. 433 | 434 | 5.2. If You initiate litigation against any entity by asserting a patent 435 | infringement claim (excluding declaratory judgment actions, 436 | counter-claims, and cross-claims) alleging that a Contributor Version 437 | directly or indirectly infringes any patent, then the rights granted to 438 | You by any and all Contributors for the Covered Software under Section 439 | 2.1 of this License shall terminate. 440 | 441 | 5.3. In the event of termination under Sections 5.1 or 5.2 above, all 442 | end user license agreements (excluding distributors and resellers) which 443 | have been validly granted by You or Your distributors under this License 444 | prior to termination shall survive termination. 445 | 446 | ************************************************************************ 447 | * * 448 | * 6. Disclaimer of Warranty * 449 | * ------------------------- * 450 | * * 451 | * Covered Software is provided under this License on an "as is" * 452 | * basis, without warranty of any kind, either expressed, implied, or * 453 | * statutory, including, without limitation, warranties that the * 454 | * Covered Software is free of defects, merchantable, fit for a * 455 | * particular purpose or non-infringing. The entire risk as to the * 456 | * quality and performance of the Covered Software is with You. * 457 | * Should any Covered Software prove defective in any respect, You * 458 | * (not any Contributor) assume the cost of any necessary servicing, * 459 | * repair, or correction. This disclaimer of warranty constitutes an * 460 | * essential part of this License. No use of any Covered Software is * 461 | * authorized under this License except under this disclaimer. * 462 | * * 463 | ************************************************************************ 464 | 465 | ************************************************************************ 466 | * * 467 | * 7. Limitation of Liability * 468 | * -------------------------- * 469 | * * 470 | * Under no circumstances and under no legal theory, whether tort * 471 | * (including negligence), contract, or otherwise, shall any * 472 | * Contributor, or anyone who distributes Covered Software as * 473 | * permitted above, be liable to You for any direct, indirect, * 474 | * special, incidental, or consequential damages of any character * 475 | * including, without limitation, damages for lost profits, loss of * 476 | * goodwill, work stoppage, computer failure or malfunction, or any * 477 | * and all other commercial damages or losses, even if such party * 478 | * shall have been informed of the possibility of such damages. This * 479 | * limitation of liability shall not apply to liability for death or * 480 | * personal injury resulting from such party's negligence to the * 481 | * extent applicable law prohibits such limitation. Some * 482 | * jurisdictions do not allow the exclusion or limitation of * 483 | * incidental or consequential damages, so this exclusion and * 484 | * limitation may not apply to You. * 485 | * * 486 | ************************************************************************ 487 | 488 | 8. Litigation 489 | ------------- 490 | 491 | Any litigation relating to this License may be brought only in the 492 | courts of a jurisdiction where the defendant maintains its principal 493 | place of business and such litigation shall be governed by laws of that 494 | jurisdiction, without reference to its conflict-of-law provisions. 495 | Nothing in this Section shall prevent a party's ability to bring 496 | cross-claims or counter-claims. 497 | 498 | 9. Miscellaneous 499 | ---------------- 500 | 501 | This License represents the complete agreement concerning the subject 502 | matter hereof. If any provision of this License is held to be 503 | unenforceable, such provision shall be reformed only to the extent 504 | necessary to make it enforceable. Any law or regulation which provides 505 | that the language of a contract shall be construed against the drafter 506 | shall not be used to construe this License against a Contributor. 507 | 508 | 10. Versions of the License 509 | --------------------------- 510 | 511 | 10.1. New Versions 512 | 513 | Mozilla Foundation is the license steward. Except as provided in Section 514 | 10.3, no one other than the license steward has the right to modify or 515 | publish new versions of this License. Each version will be given a 516 | distinguishing version number. 517 | 518 | 10.2. Effect of New Versions 519 | 520 | You may distribute the Covered Software under the terms of the version 521 | of the License under which You originally received the Covered Software, 522 | or under the terms of any subsequent version published by the license 523 | steward. 524 | 525 | 10.3. Modified Versions 526 | 527 | If you create software not governed by this License, and you want to 528 | create a new license for such software, you may create and use a 529 | modified version of this License if you rename the license and remove 530 | any references to the name of the license steward (except to note that 531 | such modified license differs from this License). 532 | 533 | 10.4. Distributing Source Code Form that is Incompatible With Secondary 534 | Licenses 535 | 536 | If You choose to distribute Source Code Form that is Incompatible With 537 | Secondary Licenses under the terms of this version of the License, the 538 | notice described in Exhibit B of this License must be attached. 539 | 540 | Exhibit A - Source Code Form License Notice 541 | ------------------------------------------- 542 | 543 | This Source Code Form is subject to the terms of the Mozilla Public 544 | License, v. 2.0. If a copy of the MPL was not distributed with this 545 | file, You can obtain one at http://mozilla.org/MPL/2.0/. 546 | 547 | If it is not possible or desirable to put the notice in a particular 548 | file, then You may include the notice in a location (such as a LICENSE 549 | file in a relevant directory) where a recipient would be likely to look 550 | for such a notice. 551 | 552 | You may add additional accurate notices of copyright ownership. 553 | 554 | Exhibit B - "Incompatible With Secondary Licenses" Notice 555 | --------------------------------------------------------- 556 | 557 | This Source Code Form is "Incompatible With Secondary Licenses", as 558 | defined by the Mozilla Public License, v. 2.0. 559 | 560 | 561 | 562 | ----- Terraform ----- 563 | 564 | License text copyright (c) 2020 MariaDB Corporation Ab, All Rights Reserved. 565 | "Business Source License" is a trademark of MariaDB Corporation Ab. 566 | 567 | Parameters 568 | 569 | Licensor: HashiCorp, Inc. 570 | Licensed Work: Terraform Version 1.6.0 or later. The Licensed Work is (c) 2024 571 | HashiCorp, Inc. 572 | Additional Use Grant: You may make production use of the Licensed Work, provided 573 | Your use does not include offering the Licensed Work to third 574 | parties on a hosted or embedded basis in order to compete with 575 | HashiCorp's paid version(s) of the Licensed Work. For purposes 576 | of this license: 577 | 578 | A "competitive offering" is a Product that is offered to third 579 | parties on a paid basis, including through paid support 580 | arrangements, that significantly overlaps with the capabilities 581 | of HashiCorp's paid version(s) of the Licensed Work. If Your 582 | Product is not a competitive offering when You first make it 583 | generally available, it will not become a competitive offering 584 | later due to HashiCorp releasing a new version of the Licensed 585 | Work with additional capabilities. In addition, Products that 586 | are not provided on a paid basis are not competitive. 587 | 588 | "Product" means software that is offered to end users to manage 589 | in their own environments or offered as a service on a hosted 590 | basis. 591 | 592 | "Embedded" means including the source code or executable code 593 | from the Licensed Work in a competitive offering. "Embedded" 594 | also means packaging the competitive offering in such a way 595 | that the Licensed Work must be accessed or downloaded for the 596 | competitive offering to operate. 597 | 598 | Hosting or using the Licensed Work(s) for internal purposes 599 | within an organization is not considered a competitive 600 | offering. HashiCorp considers your organization to include all 601 | of your affiliates under common control. 602 | 603 | For binding interpretive guidance on using HashiCorp products 604 | under the Business Source License, please visit our FAQ. 605 | (https://www.hashicorp.com/license-faq) 606 | Change Date: Four years from the date the Licensed Work is published. 607 | Change License: MPL 2.0 608 | 609 | For information about alternative licensing arrangements for the Licensed Work, 610 | please contact licensing@hashicorp.com. 611 | 612 | Notice 613 | 614 | Business Source License 1.1 615 | 616 | Terms 617 | 618 | The Licensor hereby grants you the right to copy, modify, create derivative 619 | works, redistribute, and make non-production use of the Licensed Work. The 620 | Licensor may make an Additional Use Grant, above, permitting limited production use. 621 | 622 | Effective on the Change Date, or the fourth anniversary of the first publicly 623 | available distribution of a specific version of the Licensed Work under this 624 | License, whichever comes first, the Licensor hereby grants you rights under 625 | the terms of the Change License, and the rights granted in the paragraph 626 | above terminate. 627 | 628 | If your use of the Licensed Work does not comply with the requirements 629 | currently in effect as described in this License, you must purchase a 630 | commercial license from the Licensor, its affiliated entities, or authorized 631 | resellers, or you must refrain from using the Licensed Work. 632 | 633 | All copies of the original and modified Licensed Work, and derivative works 634 | of the Licensed Work, are subject to this License. This License applies 635 | separately for each version of the Licensed Work and the Change Date may vary 636 | for each version of the Licensed Work released by Licensor. 637 | 638 | You must conspicuously display this License on each original or modified copy 639 | of the Licensed Work. If you receive the Licensed Work in original or 640 | modified form from a third party, the terms and conditions set forth in this 641 | License apply to your use of that work. 642 | 643 | Any use of the Licensed Work in violation of this License will automatically 644 | terminate your rights under this License for the current and all other 645 | versions of the Licensed Work. 646 | 647 | This License does not grant you any right in any trademark or logo of 648 | Licensor or its affiliates (provided that you may use a trademark or logo of 649 | Licensor as expressly required by this License). 650 | 651 | TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON 652 | AN "AS IS" BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, 653 | EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF 654 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND 655 | TITLE. 656 | 657 | 658 | 659 | 660 | ----- Docker ----- 661 | 662 | 663 | Apache License 664 | Version 2.0, January 2004 665 | https://www.apache.org/licenses/ 666 | 667 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 668 | 669 | 1. Definitions. 670 | 671 | "License" shall mean the terms and conditions for use, reproduction, 672 | and distribution as defined by Sections 1 through 9 of this document. 673 | 674 | "Licensor" shall mean the copyright owner or entity authorized by 675 | the copyright owner that is granting the License. 676 | 677 | "Legal Entity" shall mean the union of the acting entity and all 678 | other entities that control, are controlled by, or are under common 679 | control with that entity. For the purposes of this definition, 680 | "control" means (i) the power, direct or indirect, to cause the 681 | direction or management of such entity, whether by contract or 682 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 683 | outstanding shares, or (iii) beneficial ownership of such entity. 684 | 685 | "You" (or "Your") shall mean an individual or Legal Entity 686 | exercising permissions granted by this License. 687 | 688 | "Source" form shall mean the preferred form for making modifications, 689 | including but not limited to software source code, documentation 690 | source, and configuration files. 691 | 692 | "Object" form shall mean any form resulting from mechanical 693 | transformation or translation of a Source form, including but 694 | not limited to compiled object code, generated documentation, 695 | and conversions to other media types. 696 | 697 | "Work" shall mean the work of authorship, whether in Source or 698 | Object form, made available under the License, as indicated by a 699 | copyright notice that is included in or attached to the work 700 | (an example is provided in the Appendix below). 701 | 702 | "Derivative Works" shall mean any work, whether in Source or Object 703 | form, that is based on (or derived from) the Work and for which the 704 | editorial revisions, annotations, elaborations, or other modifications 705 | represent, as a whole, an original work of authorship. For the purposes 706 | of this License, Derivative Works shall not include works that remain 707 | separable from, or merely link (or bind by name) to the interfaces of, 708 | the Work and Derivative Works thereof. 709 | 710 | "Contribution" shall mean any work of authorship, including 711 | the original version of the Work and any modifications or additions 712 | to that Work or Derivative Works thereof, that is intentionally 713 | submitted to Licensor for inclusion in the Work by the copyright owner 714 | or by an individual or Legal Entity authorized to submit on behalf of 715 | the copyright owner. For the purposes of this definition, "submitted" 716 | means any form of electronic, verbal, or written communication sent 717 | to the Licensor or its representatives, including but not limited to 718 | communication on electronic mailing lists, source code control systems, 719 | and issue tracking systems that are managed by, or on behalf of, the 720 | Licensor for the purpose of discussing and improving the Work, but 721 | excluding communication that is conspicuously marked or otherwise 722 | designated in writing by the copyright owner as "Not a Contribution." 723 | 724 | "Contributor" shall mean Licensor and any individual or Legal Entity 725 | on behalf of whom a Contribution has been received by Licensor and 726 | subsequently incorporated within the Work. 727 | 728 | 2. Grant of Copyright License. Subject to the terms and conditions of 729 | this License, each Contributor hereby grants to You a perpetual, 730 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 731 | copyright license to reproduce, prepare Derivative Works of, 732 | publicly display, publicly perform, sublicense, and distribute the 733 | Work and such Derivative Works in Source or Object form. 734 | 735 | 3. Grant of Patent License. Subject to the terms and conditions of 736 | this License, each Contributor hereby grants to You a perpetual, 737 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 738 | (except as stated in this section) patent license to make, have made, 739 | use, offer to sell, sell, import, and otherwise transfer the Work, 740 | where such license applies only to those patent claims licensable 741 | by such Contributor that are necessarily infringed by their 742 | Contribution(s) alone or by combination of their Contribution(s) 743 | with the Work to which such Contribution(s) was submitted. If You 744 | institute patent litigation against any entity (including a 745 | cross-claim or counterclaim in a lawsuit) alleging that the Work 746 | or a Contribution incorporated within the Work constitutes direct 747 | or contributory patent infringement, then any patent licenses 748 | granted to You under this License for that Work shall terminate 749 | as of the date such litigation is filed. 750 | 751 | 4. Redistribution. You may reproduce and distribute copies of the 752 | Work or Derivative Works thereof in any medium, with or without 753 | modifications, and in Source or Object form, provided that You 754 | meet the following conditions: 755 | 756 | (a) You must give any other recipients of the Work or 757 | Derivative Works a copy of this License; and 758 | 759 | (b) You must cause any modified files to carry prominent notices 760 | stating that You changed the files; and 761 | 762 | (c) You must retain, in the Source form of any Derivative Works 763 | that You distribute, all copyright, patent, trademark, and 764 | attribution notices from the Source form of the Work, 765 | excluding those notices that do not pertain to any part of 766 | the Derivative Works; and 767 | 768 | (d) If the Work includes a "NOTICE" text file as part of its 769 | distribution, then any Derivative Works that You distribute must 770 | include a readable copy of the attribution notices contained 771 | within such NOTICE file, excluding those notices that do not 772 | pertain to any part of the Derivative Works, in at least one 773 | of the following places: within a NOTICE text file distributed 774 | as part of the Derivative Works; within the Source form or 775 | documentation, if provided along with the Derivative Works; or, 776 | within a display generated by the Derivative Works, if and 777 | wherever such third-party notices normally appear. The contents 778 | of the NOTICE file are for informational purposes only and 779 | do not modify the License. You may add Your own attribution 780 | notices within Derivative Works that You distribute, alongside 781 | or as an addendum to the NOTICE text from the Work, provided 782 | that such additional attribution notices cannot be construed 783 | as modifying the License. 784 | 785 | You may add Your own copyright statement to Your modifications and 786 | may provide additional or different license terms and conditions 787 | for use, reproduction, or distribution of Your modifications, or 788 | for any such Derivative Works as a whole, provided Your use, 789 | reproduction, and distribution of the Work otherwise complies with 790 | the conditions stated in this License. 791 | 792 | 5. Submission of Contributions. Unless You explicitly state otherwise, 793 | any Contribution intentionally submitted for inclusion in the Work 794 | by You to the Licensor shall be under the terms and conditions of 795 | this License, without any additional terms or conditions. 796 | Notwithstanding the above, nothing herein shall supersede or modify 797 | the terms of any separate license agreement you may have executed 798 | with Licensor regarding such Contributions. 799 | 800 | 6. Trademarks. This License does not grant permission to use the trade 801 | names, trademarks, service marks, or product names of the Licensor, 802 | except as required for reasonable and customary use in describing the 803 | origin of the Work and reproducing the content of the NOTICE file. 804 | 805 | 7. Disclaimer of Warranty. Unless required by applicable law or 806 | agreed to in writing, Licensor provides the Work (and each 807 | Contributor provides its Contributions) on an "AS IS" BASIS, 808 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 809 | implied, including, without limitation, any warranties or conditions 810 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 811 | PARTICULAR PURPOSE. You are solely responsible for determining the 812 | appropriateness of using or redistributing the Work and assume any 813 | risks associated with Your exercise of permissions under this License. 814 | 815 | 8. Limitation of Liability. In no event and under no legal theory, 816 | whether in tort (including negligence), contract, or otherwise, 817 | unless required by applicable law (such as deliberate and grossly 818 | negligent acts) or agreed to in writing, shall any Contributor be 819 | liable to You for damages, including any direct, indirect, special, 820 | incidental, or consequential damages of any character arising as a 821 | result of this License or out of the use or inability to use the 822 | Work (including but not limited to damages for loss of goodwill, 823 | work stoppage, computer failure or malfunction, or any and all 824 | other commercial damages or losses), even if such Contributor 825 | has been advised of the possibility of such damages. 826 | 827 | 9. Accepting Warranty or Additional Liability. While redistributing 828 | the Work or Derivative Works thereof, You may choose to offer, 829 | and charge a fee for, acceptance of support, warranty, indemnity, 830 | or other liability obligations and/or rights consistent with this 831 | License. However, in accepting such obligations, You may act only 832 | on Your own behalf and on Your sole responsibility, not on behalf 833 | of any other Contributor, and only if You agree to indemnify, 834 | defend, and hold each Contributor harmless for any liability 835 | incurred by, or claims asserted against, such Contributor by reason 836 | of your accepting any such warranty or additional liability. 837 | 838 | END OF TERMS AND CONDITIONS 839 | 840 | Copyright 2013-2017 Docker, Inc. 841 | 842 | Licensed under the Apache License, Version 2.0 (the "License"); 843 | you may not use this file except in compliance with the License. 844 | You may obtain a copy of the License at 845 | 846 | https://www.apache.org/licenses/LICENSE-2.0 847 | 848 | Unless required by applicable law or agreed to in writing, software 849 | distributed under the License is distributed on an "AS IS" BASIS, 850 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 851 | See the License for the specific language governing permissions and 852 | limitations under the License. 853 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "bedrock-rag-template" 3 | version = "0.1.0" 4 | description = "" 5 | authors = ["Julian Grueber ", "Martin Maritsch "] 6 | 7 | 8 | [tool.poetry.dependencies] 9 | python = "~3.12" 10 | aws-lambda-powertools = { version = "~3.11.0", extras = ["tracer"] } 11 | langchain-community = "~0.3.23" 12 | langchain-postgres = "~0.0.14" 13 | langchain-aws = "~0.2.22" 14 | psycopg = { version = "~3.2", extras = ["binary"] } 15 | nltk = "~3.8.1" 16 | unstructured = "~0.7.6" 17 | httpx = "~0.26.0" 18 | 19 | 20 | [tool.poetry.dev-dependencies] 21 | pytest = "^7.1.2" 22 | ipykernel = "^6.29.4" 23 | typing-extensions = "^4.8" 24 | 25 | [build-system] 26 | requires = ["poetry-core>=1.0.0"] 27 | build-backend = "poetry.core.masonry.api" 28 | -------------------------------------------------------------------------------- /python/src/handlers/data_ingestion_processor/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | FROM public.ecr.aws/lambda/python:3.12 4 | 5 | RUN dnf install -y libgomp &&\ 6 | pip3 install --no-cache-dir --upgrade pip &&\ 7 | pip3 install --no-cache-dir build &&\ 8 | dnf -y clean all &&\ 9 | rm -rf /var/cache 10 | 11 | COPY ./requirements.txt ./ 12 | 13 | RUN pip3 install --no-cache-dir -r requirements.txt -t . 14 | 15 | COPY . . 16 | 17 | # Needed for Excel loader 18 | RUN python -c "import nltk;nltk.download('punkt', download_dir='/usr/share/nltk_data');nltk.download('averaged_perceptron_tagger', download_dir='/usr/share/nltk_data')" 19 | 20 | CMD ["handler.lambda_handler"] 21 | -------------------------------------------------------------------------------- /python/src/handlers/data_ingestion_processor/handler.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | 4 | import asyncio 5 | import os 6 | from functools import cache 7 | from urllib.parse import unquote_plus 8 | 9 | from aws_lambda_powertools import Logger, Tracer 10 | from aws_lambda_powertools.utilities.data_classes import S3Event, event_source 11 | from aws_lambda_powertools.utilities.parameters import get_secret 12 | from aws_lambda_powertools.utilities.typing import LambdaContext 13 | from langchain.text_splitter import RecursiveCharacterTextSplitter 14 | from langchain_aws.embeddings import BedrockEmbeddings 15 | from langchain_community.document_loaders import S3FileLoader 16 | from langchain_core.embeddings import Embeddings 17 | from langchain_core.vectorstores import VectorStore 18 | from langchain_postgres.vectorstores import DistanceStrategy, PGVector 19 | 20 | tracer = Tracer() 21 | logger = Logger() 22 | 23 | VECTOR_DB_INDEX = os.getenv("VECTOR_DB_INDEX") 24 | CHUNK_SIZE = int(os.getenv("CHUNK_SIZE")) 25 | CHUNK_OVERLAP = int(os.getenv("CHUNK_OVERLAP")) 26 | 27 | EMBEDDING_MODEL_ID = os.getenv("EMBEDDING_MODEL_ID") 28 | 29 | PG_VECTOR_DB_NAME = os.getenv("PG_VECTOR_DB_NAME") 30 | PG_VECTOR_SECRET_ARN = os.getenv("PG_VECTOR_SECRET_ARN") 31 | PG_VECTOR_DB_HOST = os.getenv("PG_VECTOR_DB_HOST") 32 | PG_VECTOR_PORT = os.getenv("PG_VECTOR_PORT", "5432") 33 | 34 | 35 | @logger.inject_lambda_context(log_event=True) 36 | @tracer.capture_method(capture_response=False) 37 | @event_source(data_class=S3Event) 38 | def lambda_handler(event: S3Event, _: LambdaContext): 39 | """Lambda handler example""" 40 | try: 41 | # Create a single DB connection for entire Lambda runtime 42 | 43 | for record in event.records: 44 | vector_store = get_vector_store() 45 | bucket = record.s3.bucket.name 46 | key = unquote_plus(record.s3.get_object.key) 47 | logger.info(f"Ingesting document. Bucket: {bucket}, Key: {key}") 48 | 49 | s3Loader = S3FileLoader(bucket=bucket, key=key) 50 | chunks = s3Loader.load_and_split(text_splitter=get_text_splitter()) 51 | # async add will run calls to embedding model in parallel 52 | asyncio.run(vector_store.aadd_documents(chunks)) 53 | logger.info(f"Successfully ingested {len(chunks)} chunks") 54 | 55 | return { 56 | "statusCode": 200, 57 | "body": "Success.", 58 | } 59 | except Exception as e: 60 | logger.exception("Unable to ingest documents.") 61 | raise e 62 | 63 | 64 | @cache 65 | def get_text_splitter() -> RecursiveCharacterTextSplitter: 66 | """Get default text splitter for files based on CHUNK_SIZE and CHUNK_OVERLAP""" 67 | return RecursiveCharacterTextSplitter( 68 | chunk_size=CHUNK_SIZE, 69 | chunk_overlap=CHUNK_OVERLAP, 70 | ) 71 | 72 | 73 | @tracer.capture_method(capture_response=False) 74 | @cache # vector store connection can be cached 75 | def get_vector_store() -> VectorStore: 76 | """Get vector data base connection""" 77 | logger.info(f"Retrieving secret with arn '{PG_VECTOR_SECRET_ARN}'") 78 | credentials = get_secret(PG_VECTOR_SECRET_ARN, transform="json") 79 | connection = PGVector.connection_string_from_db_params( 80 | driver="psycopg", 81 | host=PG_VECTOR_DB_HOST, 82 | port=PG_VECTOR_PORT, 83 | database=PG_VECTOR_DB_NAME, 84 | user=credentials.get("username"), 85 | password=credentials.get("password"), 86 | ) 87 | 88 | return PGVector( 89 | embeddings=get_embedding_model(), 90 | connection=connection, 91 | collection_name=VECTOR_DB_INDEX, 92 | distance_strategy=DistanceStrategy.COSINE, 93 | async_mode=True, 94 | ) 95 | 96 | 97 | @tracer.capture_method(capture_response=False) 98 | @cache # model can be cached 99 | def get_embedding_model( 100 | model_id=EMBEDDING_MODEL_ID, 101 | ) -> Embeddings: 102 | """Get embedding model""" 103 | logger.info(f"Using embedding model: {model_id}") 104 | return BedrockEmbeddings(model_id=model_id) 105 | -------------------------------------------------------------------------------- /python/src/handlers/data_ingestion_processor/requirements.txt: -------------------------------------------------------------------------------- 1 | aiohappyeyeballs==2.6.1 ; python_version >= "3.12" and python_version < "3.13" 2 | aiohttp==3.11.18 ; python_version >= "3.12" and python_version < "3.13" 3 | aiosignal==1.3.2 ; python_version >= "3.12" and python_version < "3.13" 4 | annotated-types==0.7.0 ; python_version >= "3.12" and python_version < "3.13" 5 | anyio==4.9.0 ; python_version >= "3.12" and python_version < "3.13" 6 | argilla==2.8.0 ; python_version >= "3.12" and python_version < "3.13" 7 | asyncpg==0.30.0 ; python_version >= "3.12" and python_version < "3.13" 8 | attrs==25.3.0 ; python_version >= "3.12" and python_version < "3.13" 9 | aws-lambda-powertools[tracer]==3.11.0 ; python_version >= "3.12" and python_version < "3.13" 10 | aws-xray-sdk==2.14.0 ; python_version >= "3.12" and python_version < "3.13" 11 | boto3==1.38.9 ; python_version >= "3.12" and python_version < "3.13" 12 | botocore==1.38.9 ; python_version >= "3.12" and python_version < "3.13" 13 | certifi==2025.4.26 ; python_version >= "3.12" and python_version < "3.13" 14 | cffi==1.17.1 ; python_version >= "3.12" and python_version < "3.13" 15 | chardet==5.2.0 ; python_version >= "3.12" and python_version < "3.13" 16 | charset-normalizer==3.4.2 ; python_version >= "3.12" and python_version < "3.13" 17 | click==8.1.8 ; python_version >= "3.12" and python_version < "3.13" 18 | colorama==0.4.6 ; python_version >= "3.12" and python_version < "3.13" and platform_system == "Windows" 19 | cryptography==44.0.3 ; python_version >= "3.12" and python_version < "3.13" 20 | dataclasses-json==0.6.7 ; python_version >= "3.12" and python_version < "3.13" 21 | datasets==3.5.1 ; python_version >= "3.12" and python_version < "3.13" 22 | deprecated==1.2.18 ; python_version >= "3.12" and python_version < "3.13" 23 | dill==0.3.8 ; python_version >= "3.12" and python_version < "3.13" 24 | et-xmlfile==2.0.0 ; python_version >= "3.12" and python_version < "3.13" 25 | filelock==3.18.0 ; python_version >= "3.12" and python_version < "3.13" 26 | filetype==1.2.0 ; python_version >= "3.12" and python_version < "3.13" 27 | frozenlist==1.6.0 ; python_version >= "3.12" and python_version < "3.13" 28 | fsspec==2025.3.0 ; python_version >= "3.12" and python_version < "3.13" 29 | fsspec[http]==2025.3.0 ; python_version >= "3.12" and python_version < "3.13" 30 | greenlet==3.2.1 ; python_version < "3.13" and (platform_machine == "aarch64" or platform_machine == "ppc64le" or platform_machine == "x86_64" or platform_machine == "amd64" or platform_machine == "AMD64" or platform_machine == "win32" or platform_machine == "WIN32") and python_version >= "3.12" 31 | h11==0.16.0 ; python_version >= "3.12" and python_version < "3.13" 32 | httpcore==1.0.9 ; python_version >= "3.12" and python_version < "3.13" 33 | httpx-sse==0.4.0 ; python_version >= "3.12" and python_version < "3.13" 34 | httpx==0.26.0 ; python_version >= "3.12" and python_version < "3.13" 35 | huggingface-hub==0.30.2 ; python_version >= "3.12" and python_version < "3.13" 36 | idna==3.10 ; python_version >= "3.12" and python_version < "3.13" 37 | jmespath==1.0.1 ; python_version >= "3.12" and python_version < "3.13" 38 | joblib==1.5.0 ; python_version >= "3.12" and python_version < "3.13" 39 | jsonpatch==1.33 ; python_version >= "3.12" and python_version < "3.13" 40 | jsonpointer==3.0.0 ; python_version >= "3.12" and python_version < "3.13" 41 | langchain-aws==0.2.22 ; python_version >= "3.12" and python_version < "3.13" 42 | langchain-community==0.3.23 ; python_version >= "3.12" and python_version < "3.13" 43 | langchain-core==0.3.58 ; python_version >= "3.12" and python_version < "3.13" 44 | langchain-postgres==0.0.14 ; python_version >= "3.12" and python_version < "3.13" 45 | langchain-text-splitters==0.3.8 ; python_version >= "3.12" and python_version < "3.13" 46 | langchain==0.3.25 ; python_version >= "3.12" and python_version < "3.13" 47 | langsmith==0.3.42 ; python_version >= "3.12" and python_version < "3.13" 48 | lxml==5.4.0 ; python_version >= "3.12" and python_version < "3.13" 49 | markdown-it-py==3.0.0 ; python_version >= "3.12" and python_version < "3.13" 50 | markdown==3.8 ; python_version >= "3.12" and python_version < "3.13" 51 | marshmallow==3.26.1 ; python_version >= "3.12" and python_version < "3.13" 52 | mdurl==0.1.2 ; python_version >= "3.12" and python_version < "3.13" 53 | msg-parser==1.2.0 ; python_version >= "3.12" and python_version < "3.13" 54 | multidict==6.4.3 ; python_version >= "3.12" and python_version < "3.13" 55 | multiprocess==0.70.16 ; python_version >= "3.12" and python_version < "3.13" 56 | mypy-extensions==1.1.0 ; python_version >= "3.12" and python_version < "3.13" 57 | nltk==3.8.1 ; python_version >= "3.12" and python_version < "3.13" 58 | numpy==1.26.4 ; python_version >= "3.12" and python_version < "3.13" 59 | olefile==0.47 ; python_version >= "3.12" and python_version < "3.13" 60 | openpyxl==3.1.5 ; python_version >= "3.12" and python_version < "3.13" 61 | orjson==3.10.18 ; python_version >= "3.12" and python_version < "3.13" and platform_python_implementation != "PyPy" 62 | packaging==24.2 ; python_version >= "3.12" and python_version < "3.13" 63 | pandas==2.2.3 ; python_version >= "3.12" and python_version < "3.13" 64 | pdf2image==1.17.0 ; python_version >= "3.12" and python_version < "3.13" 65 | pdfminer-six==20250416 ; python_version >= "3.12" and python_version < "3.13" 66 | pgvector==0.3.6 ; python_version >= "3.12" and python_version < "3.13" 67 | pillow==11.2.1 ; python_version >= "3.12" and python_version < "3.13" 68 | propcache==0.3.1 ; python_version >= "3.12" and python_version < "3.13" 69 | psycopg-binary==3.2.7 ; implementation_name != "pypy" and python_version >= "3.12" and python_version < "3.13" 70 | psycopg-pool==3.2.6 ; python_version >= "3.12" and python_version < "3.13" 71 | psycopg==3.2.7 ; python_version >= "3.12" and python_version < "3.13" 72 | psycopg[binary]==3.2.7 ; python_version >= "3.12" and python_version < "3.13" 73 | pyarrow==20.0.0 ; python_version >= "3.12" and python_version < "3.13" 74 | pycparser==2.22 ; python_version >= "3.12" and python_version < "3.13" 75 | pydantic-core==2.33.2 ; python_version >= "3.12" and python_version < "3.13" 76 | pydantic-settings==2.9.1 ; python_version >= "3.12" and python_version < "3.13" 77 | pydantic==2.11.4 ; python_version >= "3.12" and python_version < "3.13" 78 | pygments==2.19.1 ; python_version >= "3.12" and python_version < "3.13" 79 | pypandoc==1.15 ; python_version >= "3.12" and python_version < "3.13" 80 | python-dateutil==2.9.0.post0 ; python_version >= "3.12" and python_version < "3.13" 81 | python-docx==1.1.2 ; python_version >= "3.12" and python_version < "3.13" 82 | python-dotenv==1.1.0 ; python_version >= "3.12" and python_version < "3.13" 83 | python-magic==0.4.27 ; python_version >= "3.12" and python_version < "3.13" 84 | python-pptx==1.0.2 ; python_version >= "3.12" and python_version < "3.13" 85 | pytz==2025.2 ; python_version >= "3.12" and python_version < "3.13" 86 | pyyaml==6.0.2 ; python_version >= "3.12" and python_version < "3.13" 87 | regex==2024.11.6 ; python_version >= "3.12" and python_version < "3.13" 88 | requests-toolbelt==1.0.0 ; python_version >= "3.12" and python_version < "3.13" 89 | requests==2.32.3 ; python_version >= "3.12" and python_version < "3.13" 90 | rich==14.0.0 ; python_version >= "3.12" and python_version < "3.13" 91 | s3transfer==0.12.0 ; python_version >= "3.12" and python_version < "3.13" 92 | six==1.17.0 ; python_version >= "3.12" and python_version < "3.13" 93 | sniffio==1.3.1 ; python_version >= "3.12" and python_version < "3.13" 94 | sqlalchemy==2.0.40 ; python_version >= "3.12" and python_version < "3.13" 95 | standardwebhooks==1.0.0 ; python_version >= "3.12" and python_version < "3.13" 96 | tabulate==0.9.0 ; python_version >= "3.12" and python_version < "3.13" 97 | tenacity==9.1.2 ; python_version >= "3.12" and python_version < "3.13" 98 | tqdm==4.67.1 ; python_version >= "3.12" and python_version < "3.13" 99 | types-deprecated==1.2.15.20250304 ; python_version >= "3.12" and python_version < "3.13" 100 | types-python-dateutil==2.9.0.20241206 ; python_version >= "3.12" and python_version < "3.13" 101 | typing-extensions==4.13.2 ; python_version >= "3.12" and python_version < "3.13" 102 | typing-inspect==0.9.0 ; python_version >= "3.12" and python_version < "3.13" 103 | typing-inspection==0.4.0 ; python_version >= "3.12" and python_version < "3.13" 104 | tzdata==2025.2 ; python_version >= "3.12" and python_version < "3.13" 105 | unstructured==0.7.12 ; python_version >= "3.12" and python_version < "3.13" 106 | urllib3==2.4.0 ; python_version >= "3.12" and python_version < "3.13" 107 | wrapt==1.17.2 ; python_version >= "3.12" and python_version < "3.13" 108 | xlrd==2.0.1 ; python_version >= "3.12" and python_version < "3.13" 109 | xlsxwriter==3.2.3 ; python_version >= "3.12" and python_version < "3.13" 110 | xxhash==3.5.0 ; python_version >= "3.12" and python_version < "3.13" 111 | yarl==1.20.0 ; python_version >= "3.12" and python_version < "3.13" 112 | zstandard==0.23.0 ; python_version >= "3.12" and python_version < "3.13" 113 | -------------------------------------------------------------------------------- /python/tests/events/bucket_notification.json: -------------------------------------------------------------------------------- 1 | { 2 | "Records": [ 3 | { 4 | "eventVersion": "2.1", 5 | "eventSource": "aws:s3", 6 | "awsRegion": "us-west-2", 7 | "eventTime": "2023-05-27T15:59:00.000Z", 8 | "eventName": "ObjectCreated:Put", 9 | "userIdentity": { 10 | "principalId": "AWS:EXAMPLE" 11 | }, 12 | "requestParameters": { 13 | "sourceIPAddress": "127.0.0.1" 14 | }, 15 | "responseElements": { 16 | "x-amz-request-id": "C3D13FE58DE4C810", 17 | "x-amz-id-2": "FMyUVURI4aNFmQzM1U1T4..." 18 | }, 19 | "s3": { 20 | "s3SchemaVersion": "1.0", 21 | "configurationId": "testConfigRule", 22 | "bucket": { 23 | "name": "bedrock-rag-template-522846675156", 24 | "ownerIdentity": { 25 | "principalId": "EXAMPLE" 26 | }, 27 | "arn": "arn:aws:s3:::example-bucket" 28 | }, 29 | "object": { 30 | "key": "dummy.txt", 31 | "size": 1024, 32 | "eTag": "d41d8cd98f00b204e9800998ecf8427e", 33 | "sequencer": "0055AED6DCD90281E5" 34 | } 35 | } 36 | } 37 | ] 38 | } 39 | -------------------------------------------------------------------------------- /python/tests/handlers/test_data_ingestion_processor.py: -------------------------------------------------------------------------------- 1 | import os 2 | from unittest.mock import MagicMock, patch 3 | 4 | import pytest 5 | from aws_lambda_powertools.utilities.data_classes import S3Event 6 | from aws_lambda_powertools.utilities.typing import LambdaContext 7 | 8 | os.environ["CHUNK_SIZE"] = "1000" 9 | os.environ["CHUNK_OVERLAP"] = "100" 10 | 11 | from python.src.handlers.data_ingestion_processor.handler import ( # noqa: E402 12 | lambda_handler, 13 | ) 14 | 15 | 16 | def test_lambda_handler_exception_handling(): 17 | """ 18 | Test lambda_handler's exception handling. 19 | This tests the scenario where an exception is raised during processing. 20 | """ 21 | mock_event = S3Event( 22 | { 23 | "Records": [ 24 | { 25 | "s3": { 26 | "bucket": {"name": "test-bucket"}, 27 | "object": {"key": "test-key"}, 28 | }, 29 | }, 30 | ], 31 | }, 32 | ) 33 | context = LambdaContext() 34 | 35 | # Simulate an exception during processing 36 | with pytest.raises(Exception): 37 | lambda_handler(mock_event, context) 38 | 39 | 40 | def test_lambda_handler_success(): 41 | """ 42 | Test that lambda_handler successfully processes S3 events and returns a 200 status code. 43 | 44 | This test verifies that: 45 | 1. The function correctly handles S3 events 46 | 2. It processes all records in the event 47 | 3. It returns the expected success response 48 | """ 49 | # Mock S3Event 50 | mock_event = S3Event( 51 | { 52 | "Records": [ 53 | { 54 | "s3": { 55 | "bucket": {"name": "test-bucket"}, 56 | "object": {"key": "test-key"}, 57 | }, 58 | }, 59 | ], 60 | }, 61 | ) 62 | 63 | # Mock LambdaContext 64 | mock_context = MagicMock(spec=LambdaContext) 65 | 66 | # Mock dependencies 67 | with patch( 68 | "python.src.handlers.data_ingestion_processor.handler.get_vector_store", 69 | ) as mock_get_vector_store, patch( 70 | "python.src.handlers.data_ingestion_processor.handler.S3FileLoader", 71 | ) as mock_s3_loader, patch( 72 | "python.src.handlers.data_ingestion_processor.handler.asyncio.run", 73 | ) as mock_asyncio_run: 74 | # Set up mock returns 75 | mock_vector_store = MagicMock() 76 | mock_get_vector_store.return_value = mock_vector_store 77 | 78 | mock_s3_loader_instance = MagicMock() 79 | mock_s3_loader.return_value = mock_s3_loader_instance 80 | mock_s3_loader_instance.load_and_split.return_value = [MagicMock(), MagicMock()] 81 | 82 | # Call the function 83 | result = lambda_handler(mock_event, mock_context) 84 | 85 | # Assertions 86 | assert result == { 87 | "statusCode": 200, 88 | "body": "Success.", 89 | } 90 | 91 | mock_get_vector_store.assert_called_once() 92 | mock_s3_loader.assert_called_once_with(bucket="test-bucket", key="test-key") 93 | mock_s3_loader_instance.load_and_split.assert_called_once() 94 | mock_vector_store.aadd_documents.assert_called_once_with( 95 | mock_s3_loader_instance.load_and_split.return_value, 96 | ) 97 | mock_asyncio_run.assert_called_once() 98 | -------------------------------------------------------------------------------- /python/tests/test_data_ingestion.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | # Write a unit test using pytest to test the function which reads files, such as the dummy.text, and parses them to text 4 | -------------------------------------------------------------------------------- /rag_demo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "e415b75c-caf8-425f-92e3-c59b926ef5d6", 6 | "metadata": {}, 7 | "source": [ 8 | "# Amazon Bedrock RAG Template Demo \n", 9 | "\n", 10 | "This Jupyter notebooks gives a short demonstration of the Bedrock RAG use case template where Amazon Bedrock invocations augmented with embeddings retrieved from Aurora vector data base. \n", 11 | "\n", 12 | "## Agenda:\n", 13 | "\n", 14 | "- Installing requirements\n", 15 | "- Embedding definition\n", 16 | "- Database connection \n", 17 | "- Data ingestion\n", 18 | "- Retrieval augmented text generation\n", 19 | "- Relevant document queries\n", 20 | "\n", 21 | "\n", 22 | "## Installing requirements" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 23, 28 | "id": "94b45d74-2bf3-43bc-94fc-4f0d6236fa0a", 29 | "metadata": { 30 | "tags": [] 31 | }, 32 | "outputs": [ 33 | { 34 | "name": "stdout", 35 | "output_type": "stream", 36 | "text": [ 37 | "Requirement already satisfied: langchain-community~=0.3.23 in ./.venv/lib/python3.12/site-packages (0.3.23)\n", 38 | "Requirement already satisfied: langchain-core<1.0.0,>=0.3.56 in ./.venv/lib/python3.12/site-packages (from langchain-community~=0.3.23) (0.3.58)\n", 39 | "Requirement already satisfied: langchain<1.0.0,>=0.3.24 in ./.venv/lib/python3.12/site-packages (from langchain-community~=0.3.23) (0.3.25)\n", 40 | "Requirement already satisfied: SQLAlchemy<3,>=1.4 in ./.venv/lib/python3.12/site-packages (from langchain-community~=0.3.23) (2.0.40)\n", 41 | "Requirement already satisfied: requests<3,>=2 in ./.venv/lib/python3.12/site-packages (from langchain-community~=0.3.23) (2.32.3)\n", 42 | "Requirement already satisfied: PyYAML>=5.3 in ./.venv/lib/python3.12/site-packages (from langchain-community~=0.3.23) (6.0.2)\n", 43 | "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in ./.venv/lib/python3.12/site-packages (from langchain-community~=0.3.23) (3.11.18)\n", 44 | "Requirement already satisfied: tenacity!=8.4.0,<10,>=8.1.0 in ./.venv/lib/python3.12/site-packages (from langchain-community~=0.3.23) (9.1.2)\n", 45 | "Requirement already satisfied: dataclasses-json<0.7,>=0.5.7 in ./.venv/lib/python3.12/site-packages (from langchain-community~=0.3.23) (0.6.7)\n", 46 | "Requirement already satisfied: pydantic-settings<3.0.0,>=2.4.0 in ./.venv/lib/python3.12/site-packages (from langchain-community~=0.3.23) (2.9.1)\n", 47 | "Requirement already satisfied: langsmith<0.4,>=0.1.125 in ./.venv/lib/python3.12/site-packages (from langchain-community~=0.3.23) (0.3.42)\n", 48 | "Requirement already satisfied: httpx-sse<1.0.0,>=0.4.0 in ./.venv/lib/python3.12/site-packages (from langchain-community~=0.3.23) (0.4.0)\n", 49 | "Requirement already satisfied: numpy>=1.26.2 in ./.venv/lib/python3.12/site-packages (from langchain-community~=0.3.23) (1.26.4)\n", 50 | "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in ./.venv/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community~=0.3.23) (2.6.1)\n", 51 | "Requirement already satisfied: aiosignal>=1.1.2 in ./.venv/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community~=0.3.23) (1.3.2)\n", 52 | "Requirement already satisfied: attrs>=17.3.0 in ./.venv/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community~=0.3.23) (25.3.0)\n", 53 | "Requirement already satisfied: frozenlist>=1.1.1 in ./.venv/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community~=0.3.23) (1.6.0)\n", 54 | "Requirement already satisfied: multidict<7.0,>=4.5 in ./.venv/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community~=0.3.23) (6.4.3)\n", 55 | "Requirement already satisfied: propcache>=0.2.0 in ./.venv/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community~=0.3.23) (0.3.1)\n", 56 | "Requirement already satisfied: yarl<2.0,>=1.17.0 in ./.venv/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community~=0.3.23) (1.20.0)\n", 57 | "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in ./.venv/lib/python3.12/site-packages (from dataclasses-json<0.7,>=0.5.7->langchain-community~=0.3.23) (3.26.1)\n", 58 | "Requirement already satisfied: typing-inspect<1,>=0.4.0 in ./.venv/lib/python3.12/site-packages (from dataclasses-json<0.7,>=0.5.7->langchain-community~=0.3.23) (0.9.0)\n", 59 | "Requirement already satisfied: langchain-text-splitters<1.0.0,>=0.3.8 in ./.venv/lib/python3.12/site-packages (from langchain<1.0.0,>=0.3.24->langchain-community~=0.3.23) (0.3.8)\n", 60 | "Requirement already satisfied: pydantic<3.0.0,>=2.7.4 in ./.venv/lib/python3.12/site-packages (from langchain<1.0.0,>=0.3.24->langchain-community~=0.3.23) (2.11.4)\n", 61 | "Requirement already satisfied: jsonpatch<2.0,>=1.33 in ./.venv/lib/python3.12/site-packages (from langchain-core<1.0.0,>=0.3.56->langchain-community~=0.3.23) (1.33)\n", 62 | "Requirement already satisfied: packaging<25,>=23.2 in ./.venv/lib/python3.12/site-packages (from langchain-core<1.0.0,>=0.3.56->langchain-community~=0.3.23) (24.2)\n", 63 | "Requirement already satisfied: typing-extensions>=4.7 in ./.venv/lib/python3.12/site-packages (from langchain-core<1.0.0,>=0.3.56->langchain-community~=0.3.23) (4.13.2)\n", 64 | "Requirement already satisfied: jsonpointer>=1.9 in ./.venv/lib/python3.12/site-packages (from jsonpatch<2.0,>=1.33->langchain-core<1.0.0,>=0.3.56->langchain-community~=0.3.23) (3.0.0)\n", 65 | "Requirement already satisfied: httpx<1,>=0.23.0 in ./.venv/lib/python3.12/site-packages (from langsmith<0.4,>=0.1.125->langchain-community~=0.3.23) (0.26.0)\n", 66 | "Requirement already satisfied: orjson<4.0.0,>=3.9.14 in ./.venv/lib/python3.12/site-packages (from langsmith<0.4,>=0.1.125->langchain-community~=0.3.23) (3.10.18)\n", 67 | "Requirement already satisfied: requests-toolbelt<2.0.0,>=1.0.0 in ./.venv/lib/python3.12/site-packages (from langsmith<0.4,>=0.1.125->langchain-community~=0.3.23) (1.0.0)\n", 68 | "Requirement already satisfied: zstandard<0.24.0,>=0.23.0 in ./.venv/lib/python3.12/site-packages (from langsmith<0.4,>=0.1.125->langchain-community~=0.3.23) (0.23.0)\n", 69 | "Requirement already satisfied: anyio in ./.venv/lib/python3.12/site-packages (from httpx<1,>=0.23.0->langsmith<0.4,>=0.1.125->langchain-community~=0.3.23) (4.9.0)\n", 70 | "Requirement already satisfied: certifi in ./.venv/lib/python3.12/site-packages (from httpx<1,>=0.23.0->langsmith<0.4,>=0.1.125->langchain-community~=0.3.23) (2025.4.26)\n", 71 | "Requirement already satisfied: httpcore==1.* in ./.venv/lib/python3.12/site-packages (from httpx<1,>=0.23.0->langsmith<0.4,>=0.1.125->langchain-community~=0.3.23) (1.0.9)\n", 72 | "Requirement already satisfied: idna in ./.venv/lib/python3.12/site-packages (from httpx<1,>=0.23.0->langsmith<0.4,>=0.1.125->langchain-community~=0.3.23) (3.10)\n", 73 | "Requirement already satisfied: sniffio in ./.venv/lib/python3.12/site-packages (from httpx<1,>=0.23.0->langsmith<0.4,>=0.1.125->langchain-community~=0.3.23) (1.3.1)\n", 74 | "Requirement already satisfied: h11>=0.16 in ./.venv/lib/python3.12/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->langsmith<0.4,>=0.1.125->langchain-community~=0.3.23) (0.16.0)\n", 75 | "Requirement already satisfied: annotated-types>=0.6.0 in ./.venv/lib/python3.12/site-packages (from pydantic<3.0.0,>=2.7.4->langchain<1.0.0,>=0.3.24->langchain-community~=0.3.23) (0.7.0)\n", 76 | "Requirement already satisfied: pydantic-core==2.33.2 in ./.venv/lib/python3.12/site-packages (from pydantic<3.0.0,>=2.7.4->langchain<1.0.0,>=0.3.24->langchain-community~=0.3.23) (2.33.2)\n", 77 | "Requirement already satisfied: typing-inspection>=0.4.0 in ./.venv/lib/python3.12/site-packages (from pydantic<3.0.0,>=2.7.4->langchain<1.0.0,>=0.3.24->langchain-community~=0.3.23) (0.4.0)\n", 78 | "Requirement already satisfied: python-dotenv>=0.21.0 in ./.venv/lib/python3.12/site-packages (from pydantic-settings<3.0.0,>=2.4.0->langchain-community~=0.3.23) (1.1.0)\n", 79 | "Requirement already satisfied: charset-normalizer<4,>=2 in ./.venv/lib/python3.12/site-packages (from requests<3,>=2->langchain-community~=0.3.23) (3.4.2)\n", 80 | "Requirement already satisfied: urllib3<3,>=1.21.1 in ./.venv/lib/python3.12/site-packages (from requests<3,>=2->langchain-community~=0.3.23) (2.4.0)\n", 81 | "Requirement already satisfied: mypy-extensions>=0.3.0 in ./.venv/lib/python3.12/site-packages (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain-community~=0.3.23) (1.1.0)\n", 82 | "Note: you may need to restart the kernel to use updated packages.\n", 83 | "Requirement already satisfied: langchain-postgres~=0.0.14 in ./.venv/lib/python3.12/site-packages (0.0.14)\n", 84 | "Requirement already satisfied: asyncpg<0.31.0,>=0.30.0 in ./.venv/lib/python3.12/site-packages (from langchain-postgres~=0.0.14) (0.30.0)\n", 85 | "Requirement already satisfied: langchain-core<0.4.0,>=0.2.13 in ./.venv/lib/python3.12/site-packages (from langchain-postgres~=0.0.14) (0.3.58)\n", 86 | "Requirement already satisfied: numpy<2.0,>=1.21 in ./.venv/lib/python3.12/site-packages (from langchain-postgres~=0.0.14) (1.26.4)\n", 87 | "Requirement already satisfied: pgvector<0.4,>=0.2.5 in ./.venv/lib/python3.12/site-packages (from langchain-postgres~=0.0.14) (0.2.5)\n", 88 | "Requirement already satisfied: psycopg<4,>=3 in ./.venv/lib/python3.12/site-packages (from langchain-postgres~=0.0.14) (3.2.7)\n", 89 | "Requirement already satisfied: psycopg-pool<4.0.0,>=3.2.1 in ./.venv/lib/python3.12/site-packages (from langchain-postgres~=0.0.14) (3.2.6)\n", 90 | "Requirement already satisfied: sqlalchemy<3,>=2 in ./.venv/lib/python3.12/site-packages (from langchain-postgres~=0.0.14) (2.0.40)\n", 91 | "Requirement already satisfied: langsmith<0.4,>=0.1.125 in ./.venv/lib/python3.12/site-packages (from langchain-core<0.4.0,>=0.2.13->langchain-postgres~=0.0.14) (0.3.42)\n", 92 | "Requirement already satisfied: tenacity!=8.4.0,<10.0.0,>=8.1.0 in ./.venv/lib/python3.12/site-packages (from langchain-core<0.4.0,>=0.2.13->langchain-postgres~=0.0.14) (9.1.2)\n", 93 | "Requirement already satisfied: jsonpatch<2.0,>=1.33 in ./.venv/lib/python3.12/site-packages (from langchain-core<0.4.0,>=0.2.13->langchain-postgres~=0.0.14) (1.33)\n", 94 | "Requirement already satisfied: PyYAML>=5.3 in ./.venv/lib/python3.12/site-packages (from langchain-core<0.4.0,>=0.2.13->langchain-postgres~=0.0.14) (6.0.2)\n", 95 | "Requirement already satisfied: packaging<25,>=23.2 in ./.venv/lib/python3.12/site-packages (from langchain-core<0.4.0,>=0.2.13->langchain-postgres~=0.0.14) (24.2)\n", 96 | "Requirement already satisfied: typing-extensions>=4.7 in ./.venv/lib/python3.12/site-packages (from langchain-core<0.4.0,>=0.2.13->langchain-postgres~=0.0.14) (4.13.2)\n", 97 | "Requirement already satisfied: pydantic<3.0.0,>=2.5.2 in ./.venv/lib/python3.12/site-packages (from langchain-core<0.4.0,>=0.2.13->langchain-postgres~=0.0.14) (2.11.4)\n", 98 | "Requirement already satisfied: jsonpointer>=1.9 in ./.venv/lib/python3.12/site-packages (from jsonpatch<2.0,>=1.33->langchain-core<0.4.0,>=0.2.13->langchain-postgres~=0.0.14) (3.0.0)\n", 99 | "Requirement already satisfied: httpx<1,>=0.23.0 in ./.venv/lib/python3.12/site-packages (from langsmith<0.4,>=0.1.125->langchain-core<0.4.0,>=0.2.13->langchain-postgres~=0.0.14) (0.26.0)\n", 100 | "Requirement already satisfied: orjson<4.0.0,>=3.9.14 in ./.venv/lib/python3.12/site-packages (from langsmith<0.4,>=0.1.125->langchain-core<0.4.0,>=0.2.13->langchain-postgres~=0.0.14) (3.10.18)\n", 101 | "Requirement already satisfied: requests<3,>=2 in ./.venv/lib/python3.12/site-packages (from langsmith<0.4,>=0.1.125->langchain-core<0.4.0,>=0.2.13->langchain-postgres~=0.0.14) (2.32.3)\n", 102 | "Requirement already satisfied: requests-toolbelt<2.0.0,>=1.0.0 in ./.venv/lib/python3.12/site-packages (from langsmith<0.4,>=0.1.125->langchain-core<0.4.0,>=0.2.13->langchain-postgres~=0.0.14) (1.0.0)\n", 103 | "Requirement already satisfied: zstandard<0.24.0,>=0.23.0 in ./.venv/lib/python3.12/site-packages (from langsmith<0.4,>=0.1.125->langchain-core<0.4.0,>=0.2.13->langchain-postgres~=0.0.14) (0.23.0)\n", 104 | "Requirement already satisfied: anyio in ./.venv/lib/python3.12/site-packages (from httpx<1,>=0.23.0->langsmith<0.4,>=0.1.125->langchain-core<0.4.0,>=0.2.13->langchain-postgres~=0.0.14) (4.9.0)\n", 105 | "Requirement already satisfied: certifi in ./.venv/lib/python3.12/site-packages (from httpx<1,>=0.23.0->langsmith<0.4,>=0.1.125->langchain-core<0.4.0,>=0.2.13->langchain-postgres~=0.0.14) (2025.4.26)\n", 106 | "Requirement already satisfied: httpcore==1.* in ./.venv/lib/python3.12/site-packages (from httpx<1,>=0.23.0->langsmith<0.4,>=0.1.125->langchain-core<0.4.0,>=0.2.13->langchain-postgres~=0.0.14) (1.0.9)\n", 107 | "Requirement already satisfied: idna in ./.venv/lib/python3.12/site-packages (from httpx<1,>=0.23.0->langsmith<0.4,>=0.1.125->langchain-core<0.4.0,>=0.2.13->langchain-postgres~=0.0.14) (3.10)\n", 108 | "Requirement already satisfied: sniffio in ./.venv/lib/python3.12/site-packages (from httpx<1,>=0.23.0->langsmith<0.4,>=0.1.125->langchain-core<0.4.0,>=0.2.13->langchain-postgres~=0.0.14) (1.3.1)\n", 109 | "Requirement already satisfied: h11>=0.16 in ./.venv/lib/python3.12/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->langsmith<0.4,>=0.1.125->langchain-core<0.4.0,>=0.2.13->langchain-postgres~=0.0.14) (0.16.0)\n", 110 | "Requirement already satisfied: annotated-types>=0.6.0 in ./.venv/lib/python3.12/site-packages (from pydantic<3.0.0,>=2.5.2->langchain-core<0.4.0,>=0.2.13->langchain-postgres~=0.0.14) (0.7.0)\n", 111 | "Requirement already satisfied: pydantic-core==2.33.2 in ./.venv/lib/python3.12/site-packages (from pydantic<3.0.0,>=2.5.2->langchain-core<0.4.0,>=0.2.13->langchain-postgres~=0.0.14) (2.33.2)\n", 112 | "Requirement already satisfied: typing-inspection>=0.4.0 in ./.venv/lib/python3.12/site-packages (from pydantic<3.0.0,>=2.5.2->langchain-core<0.4.0,>=0.2.13->langchain-postgres~=0.0.14) (0.4.0)\n", 113 | "Requirement already satisfied: charset-normalizer<4,>=2 in ./.venv/lib/python3.12/site-packages (from requests<3,>=2->langsmith<0.4,>=0.1.125->langchain-core<0.4.0,>=0.2.13->langchain-postgres~=0.0.14) (3.4.2)\n", 114 | "Requirement already satisfied: urllib3<3,>=1.21.1 in ./.venv/lib/python3.12/site-packages (from requests<3,>=2->langsmith<0.4,>=0.1.125->langchain-core<0.4.0,>=0.2.13->langchain-postgres~=0.0.14) (2.4.0)\n", 115 | "Note: you may need to restart the kernel to use updated packages.\n", 116 | "Requirement already satisfied: langchain-aws~=0.2.22 in ./.venv/lib/python3.12/site-packages (0.2.22)\n", 117 | "Requirement already satisfied: boto3>=1.37.24 in ./.venv/lib/python3.12/site-packages (from langchain-aws~=0.2.22) (1.38.8)\n", 118 | "Requirement already satisfied: langchain-core<0.4.0,>=0.3.56 in ./.venv/lib/python3.12/site-packages (from langchain-aws~=0.2.22) (0.3.58)\n", 119 | "Requirement already satisfied: numpy<3,>=1.26.0 in ./.venv/lib/python3.12/site-packages (from langchain-aws~=0.2.22) (1.26.4)\n", 120 | "Requirement already satisfied: pydantic<3,>=2.10.0 in ./.venv/lib/python3.12/site-packages (from langchain-aws~=0.2.22) (2.11.4)\n", 121 | "Requirement already satisfied: langsmith<0.4,>=0.1.125 in ./.venv/lib/python3.12/site-packages (from langchain-core<0.4.0,>=0.3.56->langchain-aws~=0.2.22) (0.3.42)\n", 122 | "Requirement already satisfied: tenacity!=8.4.0,<10.0.0,>=8.1.0 in ./.venv/lib/python3.12/site-packages (from langchain-core<0.4.0,>=0.3.56->langchain-aws~=0.2.22) (9.1.2)\n", 123 | "Requirement already satisfied: jsonpatch<2.0,>=1.33 in ./.venv/lib/python3.12/site-packages (from langchain-core<0.4.0,>=0.3.56->langchain-aws~=0.2.22) (1.33)\n", 124 | "Requirement already satisfied: PyYAML>=5.3 in ./.venv/lib/python3.12/site-packages (from langchain-core<0.4.0,>=0.3.56->langchain-aws~=0.2.22) (6.0.2)\n", 125 | "Requirement already satisfied: packaging<25,>=23.2 in ./.venv/lib/python3.12/site-packages (from langchain-core<0.4.0,>=0.3.56->langchain-aws~=0.2.22) (24.2)\n", 126 | "Requirement already satisfied: typing-extensions>=4.7 in ./.venv/lib/python3.12/site-packages (from langchain-core<0.4.0,>=0.3.56->langchain-aws~=0.2.22) (4.13.2)\n", 127 | "Requirement already satisfied: jsonpointer>=1.9 in ./.venv/lib/python3.12/site-packages (from jsonpatch<2.0,>=1.33->langchain-core<0.4.0,>=0.3.56->langchain-aws~=0.2.22) (3.0.0)\n", 128 | "Requirement already satisfied: httpx<1,>=0.23.0 in ./.venv/lib/python3.12/site-packages (from langsmith<0.4,>=0.1.125->langchain-core<0.4.0,>=0.3.56->langchain-aws~=0.2.22) (0.26.0)\n", 129 | "Requirement already satisfied: orjson<4.0.0,>=3.9.14 in ./.venv/lib/python3.12/site-packages (from langsmith<0.4,>=0.1.125->langchain-core<0.4.0,>=0.3.56->langchain-aws~=0.2.22) (3.10.18)\n", 130 | "Requirement already satisfied: requests<3,>=2 in ./.venv/lib/python3.12/site-packages (from langsmith<0.4,>=0.1.125->langchain-core<0.4.0,>=0.3.56->langchain-aws~=0.2.22) (2.32.3)\n", 131 | "Requirement already satisfied: requests-toolbelt<2.0.0,>=1.0.0 in ./.venv/lib/python3.12/site-packages (from langsmith<0.4,>=0.1.125->langchain-core<0.4.0,>=0.3.56->langchain-aws~=0.2.22) (1.0.0)\n", 132 | "Requirement already satisfied: zstandard<0.24.0,>=0.23.0 in ./.venv/lib/python3.12/site-packages (from langsmith<0.4,>=0.1.125->langchain-core<0.4.0,>=0.3.56->langchain-aws~=0.2.22) (0.23.0)\n", 133 | "Requirement already satisfied: anyio in ./.venv/lib/python3.12/site-packages (from httpx<1,>=0.23.0->langsmith<0.4,>=0.1.125->langchain-core<0.4.0,>=0.3.56->langchain-aws~=0.2.22) (4.9.0)\n", 134 | "Requirement already satisfied: certifi in ./.venv/lib/python3.12/site-packages (from httpx<1,>=0.23.0->langsmith<0.4,>=0.1.125->langchain-core<0.4.0,>=0.3.56->langchain-aws~=0.2.22) (2025.4.26)\n", 135 | "Requirement already satisfied: httpcore==1.* in ./.venv/lib/python3.12/site-packages (from httpx<1,>=0.23.0->langsmith<0.4,>=0.1.125->langchain-core<0.4.0,>=0.3.56->langchain-aws~=0.2.22) (1.0.9)\n", 136 | "Requirement already satisfied: idna in ./.venv/lib/python3.12/site-packages (from httpx<1,>=0.23.0->langsmith<0.4,>=0.1.125->langchain-core<0.4.0,>=0.3.56->langchain-aws~=0.2.22) (3.10)\n", 137 | "Requirement already satisfied: sniffio in ./.venv/lib/python3.12/site-packages (from httpx<1,>=0.23.0->langsmith<0.4,>=0.1.125->langchain-core<0.4.0,>=0.3.56->langchain-aws~=0.2.22) (1.3.1)\n", 138 | "Requirement already satisfied: h11>=0.16 in ./.venv/lib/python3.12/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->langsmith<0.4,>=0.1.125->langchain-core<0.4.0,>=0.3.56->langchain-aws~=0.2.22) (0.16.0)\n", 139 | "Requirement already satisfied: annotated-types>=0.6.0 in ./.venv/lib/python3.12/site-packages (from pydantic<3,>=2.10.0->langchain-aws~=0.2.22) (0.7.0)\n", 140 | "Requirement already satisfied: pydantic-core==2.33.2 in ./.venv/lib/python3.12/site-packages (from pydantic<3,>=2.10.0->langchain-aws~=0.2.22) (2.33.2)\n", 141 | "Requirement already satisfied: typing-inspection>=0.4.0 in ./.venv/lib/python3.12/site-packages (from pydantic<3,>=2.10.0->langchain-aws~=0.2.22) (0.4.0)\n", 142 | "Requirement already satisfied: charset-normalizer<4,>=2 in ./.venv/lib/python3.12/site-packages (from requests<3,>=2->langsmith<0.4,>=0.1.125->langchain-core<0.4.0,>=0.3.56->langchain-aws~=0.2.22) (3.4.2)\n", 143 | "Requirement already satisfied: urllib3<3,>=1.21.1 in ./.venv/lib/python3.12/site-packages (from requests<3,>=2->langsmith<0.4,>=0.1.125->langchain-core<0.4.0,>=0.3.56->langchain-aws~=0.2.22) (2.4.0)\n", 144 | "Requirement already satisfied: botocore<1.39.0,>=1.38.8 in ./.venv/lib/python3.12/site-packages (from boto3>=1.37.24->langchain-aws~=0.2.22) (1.38.8)\n", 145 | "Requirement already satisfied: jmespath<2.0.0,>=0.7.1 in ./.venv/lib/python3.12/site-packages (from boto3>=1.37.24->langchain-aws~=0.2.22) (1.0.1)\n", 146 | "Requirement already satisfied: s3transfer<0.13.0,>=0.12.0 in ./.venv/lib/python3.12/site-packages (from boto3>=1.37.24->langchain-aws~=0.2.22) (0.12.0)\n", 147 | "Requirement already satisfied: python-dateutil<3.0.0,>=2.1 in ./.venv/lib/python3.12/site-packages (from botocore<1.39.0,>=1.38.8->boto3>=1.37.24->langchain-aws~=0.2.22) (2.9.0.post0)\n", 148 | "Requirement already satisfied: six>=1.5 in ./.venv/lib/python3.12/site-packages (from python-dateutil<3.0.0,>=2.1->botocore<1.39.0,>=1.38.8->boto3>=1.37.24->langchain-aws~=0.2.22) (1.17.0)\n", 149 | "Note: you may need to restart the kernel to use updated packages.\n", 150 | "zsh:1: no matches found: psycopg[binary]~=3.2.0\n", 151 | "Note: you may need to restart the kernel to use updated packages.\n", 152 | "Requirement already satisfied: unstructured==0.7.6 in ./.venv/lib/python3.12/site-packages (0.7.6)\n", 153 | "Requirement already satisfied: argilla in ./.venv/lib/python3.12/site-packages (from unstructured==0.7.6) (2.8.0)\n", 154 | "Requirement already satisfied: chardet in ./.venv/lib/python3.12/site-packages (from unstructured==0.7.6) (5.2.0)\n", 155 | "Requirement already satisfied: filetype in ./.venv/lib/python3.12/site-packages (from unstructured==0.7.6) (1.2.0)\n", 156 | "Requirement already satisfied: lxml in ./.venv/lib/python3.12/site-packages (from unstructured==0.7.6) (5.4.0)\n", 157 | "Requirement already satisfied: msg-parser in ./.venv/lib/python3.12/site-packages (from unstructured==0.7.6) (1.2.0)\n", 158 | "Requirement already satisfied: nltk in ./.venv/lib/python3.12/site-packages (from unstructured==0.7.6) (3.8.1)\n", 159 | "Requirement already satisfied: openpyxl in ./.venv/lib/python3.12/site-packages (from unstructured==0.7.6) (3.1.5)\n", 160 | "Requirement already satisfied: pandas in ./.venv/lib/python3.12/site-packages (from unstructured==0.7.6) (2.2.3)\n", 161 | "Requirement already satisfied: pdf2image in ./.venv/lib/python3.12/site-packages (from unstructured==0.7.6) (1.17.0)\n", 162 | "Requirement already satisfied: pdfminer.six in ./.venv/lib/python3.12/site-packages (from unstructured==0.7.6) (20250327)\n", 163 | "Requirement already satisfied: pillow in ./.venv/lib/python3.12/site-packages (from unstructured==0.7.6) (11.2.1)\n", 164 | "Requirement already satisfied: pypandoc in ./.venv/lib/python3.12/site-packages (from unstructured==0.7.6) (1.15)\n", 165 | "Requirement already satisfied: python-docx in ./.venv/lib/python3.12/site-packages (from unstructured==0.7.6) (1.1.2)\n", 166 | "Requirement already satisfied: python-pptx in ./.venv/lib/python3.12/site-packages (from unstructured==0.7.6) (0.6.23)\n", 167 | "Requirement already satisfied: python-magic in ./.venv/lib/python3.12/site-packages (from unstructured==0.7.6) (0.4.27)\n", 168 | "Requirement already satisfied: markdown in ./.venv/lib/python3.12/site-packages (from unstructured==0.7.6) (3.8)\n", 169 | "Requirement already satisfied: requests in ./.venv/lib/python3.12/site-packages (from unstructured==0.7.6) (2.32.3)\n", 170 | "Requirement already satisfied: tabulate in ./.venv/lib/python3.12/site-packages (from unstructured==0.7.6) (0.9.0)\n", 171 | "Requirement already satisfied: xlrd in ./.venv/lib/python3.12/site-packages (from unstructured==0.7.6) (2.0.1)\n", 172 | "Requirement already satisfied: httpx>=0.26.0 in ./.venv/lib/python3.12/site-packages (from argilla->unstructured==0.7.6) (0.26.0)\n", 173 | "Requirement already satisfied: pydantic<3.0.0,>=2.6.0 in ./.venv/lib/python3.12/site-packages (from argilla->unstructured==0.7.6) (2.11.4)\n", 174 | "Requirement already satisfied: huggingface_hub>=0.22.0 in ./.venv/lib/python3.12/site-packages (from argilla->unstructured==0.7.6) (0.30.2)\n", 175 | "Requirement already satisfied: tqdm>=4.60.0 in ./.venv/lib/python3.12/site-packages (from argilla->unstructured==0.7.6) (4.67.1)\n", 176 | "Requirement already satisfied: rich>=10.0.0 in ./.venv/lib/python3.12/site-packages (from argilla->unstructured==0.7.6) (14.0.0)\n", 177 | "Requirement already satisfied: datasets>=2.0.0 in ./.venv/lib/python3.12/site-packages (from argilla->unstructured==0.7.6) (3.5.1)\n", 178 | "Requirement already satisfied: standardwebhooks>=1.0.0 in ./.venv/lib/python3.12/site-packages (from argilla->unstructured==0.7.6) (1.0.0)\n", 179 | "Requirement already satisfied: annotated-types>=0.6.0 in ./.venv/lib/python3.12/site-packages (from pydantic<3.0.0,>=2.6.0->argilla->unstructured==0.7.6) (0.7.0)\n", 180 | "Requirement already satisfied: pydantic-core==2.33.2 in ./.venv/lib/python3.12/site-packages (from pydantic<3.0.0,>=2.6.0->argilla->unstructured==0.7.6) (2.33.2)\n", 181 | "Requirement already satisfied: typing-extensions>=4.12.2 in ./.venv/lib/python3.12/site-packages (from pydantic<3.0.0,>=2.6.0->argilla->unstructured==0.7.6) (4.13.2)\n", 182 | "Requirement already satisfied: typing-inspection>=0.4.0 in ./.venv/lib/python3.12/site-packages (from pydantic<3.0.0,>=2.6.0->argilla->unstructured==0.7.6) (0.4.0)\n", 183 | "Requirement already satisfied: filelock in ./.venv/lib/python3.12/site-packages (from datasets>=2.0.0->argilla->unstructured==0.7.6) (3.18.0)\n", 184 | "Requirement already satisfied: numpy>=1.17 in ./.venv/lib/python3.12/site-packages (from datasets>=2.0.0->argilla->unstructured==0.7.6) (1.26.4)\n", 185 | "Requirement already satisfied: pyarrow>=15.0.0 in ./.venv/lib/python3.12/site-packages (from datasets>=2.0.0->argilla->unstructured==0.7.6) (20.0.0)\n", 186 | "Requirement already satisfied: dill<0.3.9,>=0.3.0 in ./.venv/lib/python3.12/site-packages (from datasets>=2.0.0->argilla->unstructured==0.7.6) (0.3.8)\n", 187 | "Requirement already satisfied: xxhash in ./.venv/lib/python3.12/site-packages (from datasets>=2.0.0->argilla->unstructured==0.7.6) (3.5.0)\n", 188 | "Requirement already satisfied: multiprocess<0.70.17 in ./.venv/lib/python3.12/site-packages (from datasets>=2.0.0->argilla->unstructured==0.7.6) (0.70.16)\n", 189 | "Requirement already satisfied: fsspec<=2025.3.0,>=2023.1.0 in ./.venv/lib/python3.12/site-packages (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets>=2.0.0->argilla->unstructured==0.7.6) (2025.3.0)\n", 190 | "Requirement already satisfied: aiohttp in ./.venv/lib/python3.12/site-packages (from datasets>=2.0.0->argilla->unstructured==0.7.6) (3.11.18)\n", 191 | "Requirement already satisfied: packaging in ./.venv/lib/python3.12/site-packages (from datasets>=2.0.0->argilla->unstructured==0.7.6) (24.2)\n", 192 | "Requirement already satisfied: pyyaml>=5.1 in ./.venv/lib/python3.12/site-packages (from datasets>=2.0.0->argilla->unstructured==0.7.6) (6.0.2)\n", 193 | "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in ./.venv/lib/python3.12/site-packages (from aiohttp->datasets>=2.0.0->argilla->unstructured==0.7.6) (2.6.1)\n", 194 | "Requirement already satisfied: aiosignal>=1.1.2 in ./.venv/lib/python3.12/site-packages (from aiohttp->datasets>=2.0.0->argilla->unstructured==0.7.6) (1.3.2)\n", 195 | "Requirement already satisfied: attrs>=17.3.0 in ./.venv/lib/python3.12/site-packages (from aiohttp->datasets>=2.0.0->argilla->unstructured==0.7.6) (25.3.0)\n", 196 | "Requirement already satisfied: frozenlist>=1.1.1 in ./.venv/lib/python3.12/site-packages (from aiohttp->datasets>=2.0.0->argilla->unstructured==0.7.6) (1.6.0)\n", 197 | "Requirement already satisfied: multidict<7.0,>=4.5 in ./.venv/lib/python3.12/site-packages (from aiohttp->datasets>=2.0.0->argilla->unstructured==0.7.6) (6.4.3)\n", 198 | "Requirement already satisfied: propcache>=0.2.0 in ./.venv/lib/python3.12/site-packages (from aiohttp->datasets>=2.0.0->argilla->unstructured==0.7.6) (0.3.1)\n", 199 | "Requirement already satisfied: yarl<2.0,>=1.17.0 in ./.venv/lib/python3.12/site-packages (from aiohttp->datasets>=2.0.0->argilla->unstructured==0.7.6) (1.20.0)\n", 200 | "Requirement already satisfied: idna>=2.0 in ./.venv/lib/python3.12/site-packages (from yarl<2.0,>=1.17.0->aiohttp->datasets>=2.0.0->argilla->unstructured==0.7.6) (3.10)\n", 201 | "Requirement already satisfied: anyio in ./.venv/lib/python3.12/site-packages (from httpx>=0.26.0->argilla->unstructured==0.7.6) (4.9.0)\n", 202 | "Requirement already satisfied: certifi in ./.venv/lib/python3.12/site-packages (from httpx>=0.26.0->argilla->unstructured==0.7.6) (2025.4.26)\n", 203 | "Requirement already satisfied: httpcore==1.* in ./.venv/lib/python3.12/site-packages (from httpx>=0.26.0->argilla->unstructured==0.7.6) (1.0.9)\n", 204 | "Requirement already satisfied: sniffio in ./.venv/lib/python3.12/site-packages (from httpx>=0.26.0->argilla->unstructured==0.7.6) (1.3.1)\n", 205 | "Requirement already satisfied: h11>=0.16 in ./.venv/lib/python3.12/site-packages (from httpcore==1.*->httpx>=0.26.0->argilla->unstructured==0.7.6) (0.16.0)\n", 206 | "Requirement already satisfied: charset-normalizer<4,>=2 in ./.venv/lib/python3.12/site-packages (from requests->unstructured==0.7.6) (3.4.2)\n", 207 | "Requirement already satisfied: urllib3<3,>=1.21.1 in ./.venv/lib/python3.12/site-packages (from requests->unstructured==0.7.6) (2.4.0)\n", 208 | "Requirement already satisfied: markdown-it-py>=2.2.0 in ./.venv/lib/python3.12/site-packages (from rich>=10.0.0->argilla->unstructured==0.7.6) (3.0.0)\n", 209 | "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in ./.venv/lib/python3.12/site-packages (from rich>=10.0.0->argilla->unstructured==0.7.6) (2.19.1)\n", 210 | "Requirement already satisfied: mdurl~=0.1 in ./.venv/lib/python3.12/site-packages (from markdown-it-py>=2.2.0->rich>=10.0.0->argilla->unstructured==0.7.6) (0.1.2)\n", 211 | "Requirement already satisfied: python-dateutil in ./.venv/lib/python3.12/site-packages (from standardwebhooks>=1.0.0->argilla->unstructured==0.7.6) (2.9.0.post0)\n", 212 | "Requirement already satisfied: Deprecated in ./.venv/lib/python3.12/site-packages (from standardwebhooks>=1.0.0->argilla->unstructured==0.7.6) (1.2.18)\n", 213 | "Requirement already satisfied: types-python-dateutil in ./.venv/lib/python3.12/site-packages (from standardwebhooks>=1.0.0->argilla->unstructured==0.7.6) (2.9.0.20241206)\n", 214 | "Requirement already satisfied: types-Deprecated in ./.venv/lib/python3.12/site-packages (from standardwebhooks>=1.0.0->argilla->unstructured==0.7.6) (1.2.15.20250304)\n", 215 | "Requirement already satisfied: wrapt<2,>=1.10 in ./.venv/lib/python3.12/site-packages (from Deprecated->standardwebhooks>=1.0.0->argilla->unstructured==0.7.6) (1.17.2)\n", 216 | "Requirement already satisfied: olefile>=0.46 in ./.venv/lib/python3.12/site-packages (from msg-parser->unstructured==0.7.6) (0.47)\n", 217 | "Requirement already satisfied: click in ./.venv/lib/python3.12/site-packages (from nltk->unstructured==0.7.6) (8.1.8)\n", 218 | "Requirement already satisfied: joblib in ./.venv/lib/python3.12/site-packages (from nltk->unstructured==0.7.6) (1.5.0)\n", 219 | "Requirement already satisfied: regex>=2021.8.3 in ./.venv/lib/python3.12/site-packages (from nltk->unstructured==0.7.6) (2024.11.6)\n", 220 | "Requirement already satisfied: et-xmlfile in ./.venv/lib/python3.12/site-packages (from openpyxl->unstructured==0.7.6) (2.0.0)\n", 221 | "Requirement already satisfied: pytz>=2020.1 in ./.venv/lib/python3.12/site-packages (from pandas->unstructured==0.7.6) (2025.2)\n", 222 | "Requirement already satisfied: tzdata>=2022.7 in ./.venv/lib/python3.12/site-packages (from pandas->unstructured==0.7.6) (2025.2)\n", 223 | "Requirement already satisfied: six>=1.5 in ./.venv/lib/python3.12/site-packages (from python-dateutil->standardwebhooks>=1.0.0->argilla->unstructured==0.7.6) (1.17.0)\n", 224 | "Requirement already satisfied: cryptography>=36.0.0 in ./.venv/lib/python3.12/site-packages (from pdfminer.six->unstructured==0.7.6) (44.0.3)\n", 225 | "Requirement already satisfied: cffi>=1.12 in ./.venv/lib/python3.12/site-packages (from cryptography>=36.0.0->pdfminer.six->unstructured==0.7.6) (1.17.1)\n", 226 | "Requirement already satisfied: pycparser in ./.venv/lib/python3.12/site-packages (from cffi>=1.12->cryptography>=36.0.0->pdfminer.six->unstructured==0.7.6) (2.22)\n", 227 | "Requirement already satisfied: XlsxWriter>=0.5.7 in ./.venv/lib/python3.12/site-packages (from python-pptx->unstructured==0.7.6) (3.2.3)\n", 228 | "Note: you may need to restart the kernel to use updated packages.\n" 229 | ] 230 | } 231 | ], 232 | "source": [ 233 | "%pip install langchain-community~=0.3.23\n", 234 | "%pip install langchain-postgres~=0.0.14\n", 235 | "%pip install langchain-aws~=0.2.22\n", 236 | "%pip install psycopg[binary]~=3.2.0\n", 237 | "%pip install unstructured==0.7.6" 238 | ] 239 | }, 240 | { 241 | "cell_type": "markdown", 242 | "id": "64f083da-a34f-42d4-afd1-5c04604a8c88", 243 | "metadata": {}, 244 | "source": [ 245 | "## Initialization\n", 246 | "\n", 247 | "### Imports and the creation of the boto3 session" 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": null, 253 | "id": "ac2b386f", 254 | "metadata": { 255 | "tags": [] 256 | }, 257 | "outputs": [], 258 | "source": [ 259 | "import boto3\n", 260 | "import json\n", 261 | "import logging\n", 262 | "import time\n", 263 | "import psycopg\n", 264 | "from langchain_postgres.vectorstores import DistanceStrategy, PGVector\n", 265 | "from langchain_aws.embeddings.bedrock import BedrockEmbeddings\n", 266 | "from langchain.chains import create_retrieval_chain\n", 267 | "from langchain.chains.combine_documents import create_stuff_documents_chain\n", 268 | "from langchain_aws import ChatBedrock\n", 269 | "from langchain_core.prompts import ChatPromptTemplate\n", 270 | "\n", 271 | "# Configure the logger\n", 272 | "logger = logging.getLogger(__name__)\n", 273 | "\n", 274 | "# Use the session to create a client\n", 275 | "session = boto3.Session()\n", 276 | "credentials = session.get_credentials()" 277 | ] 278 | }, 279 | { 280 | "cell_type": "markdown", 281 | "id": "be9c5e4d", 282 | "metadata": {}, 283 | "source": [ 284 | "### Retrieving environment variables from the SSM parameter store \n", 285 | "\n", 286 | "The Terraform deployment saves all essential environment variables to the AWS SSM parameter store. To retrieve those, we use the following helper function.\n" 287 | ] 288 | }, 289 | { 290 | "cell_type": "code", 291 | "execution_count": null, 292 | "id": "cb916f48-df78-4d17-9f3c-27c1b7b6185a", 293 | "metadata": { 294 | "tags": [] 295 | }, 296 | "outputs": [], 297 | "source": [ 298 | "def get_ssm_parameter(session: boto3.Session, parameter_name: str, prefix:str = '/bedrock-rag-template/'):\n", 299 | " \"\"\"Retrieve a parameter's value from AWS SSM Parameter Store.\n", 300 | "\n", 301 | " Args:\n", 302 | " session (Session): the boto3 session to use to retrieve the parameters\n", 303 | " parameter_name (str): the name of the parameter\n", 304 | " prefix (str, optional): Parameter's prefix. Defaults to '/bedrock-rag-template/'.\n", 305 | "\n", 306 | " Returns:\n", 307 | " _type_: _description_\n", 308 | " \"\"\"\n", 309 | " ssm = session.client('ssm')\n", 310 | " response = ssm.get_parameter(\n", 311 | " Name = prefix+parameter_name\n", 312 | " )\n", 313 | " return response['Parameter']['Value']\n", 314 | "\n", 315 | "\n", 316 | "# Setup env variables\n", 317 | "VECTOR_DB_INDEX = get_ssm_parameter(session, 'VECTOR_DB_INDEX')\n", 318 | "PG_VECTOR_DB_NAME = get_ssm_parameter(session, 'PG_VECTOR_DB_NAME')\n", 319 | "PG_VECTOR_PORT = get_ssm_parameter(session, 'PG_VECTOR_PORT')\n", 320 | "PG_VECTOR_SECRET_ARN = get_ssm_parameter(session, 'PG_VECTOR_SECRET_ARN')\n", 321 | "PG_VECTOR_DB_HOST = get_ssm_parameter(session, 'PG_VECTOR_DB_HOST')\n", 322 | "S3_BUCKET_NAME = get_ssm_parameter(session, 'S3_BUCKET_NAME')" 323 | ] 324 | }, 325 | { 326 | "cell_type": "markdown", 327 | "id": "3c2b8377", 328 | "metadata": {}, 329 | "source": [ 330 | "## Create the Amazon Bedrock Embedding\n", 331 | "\n", 332 | "\n", 333 | "**Prerequisite:** Ensure you have requested the access to the Amazon Bedrock models successfully, for details see [Model access](https://docs.aws.amazon.com/bedrock/latest/userguide/model-access.html).\n", 334 | "\n", 335 | "\n", 336 | "To create the LangChain vector store, we need to provide a LangChain embedding. The id of the embedding model id must be the same used to create the embeddings in the first place, in this case:" 337 | ] 338 | }, 339 | { 340 | "cell_type": "code", 341 | "execution_count": null, 342 | "id": "203f7adf", 343 | "metadata": { 344 | "tags": [] 345 | }, 346 | "outputs": [], 347 | "source": [ 348 | "embedding_model_id = \"amazon.titan-embed-text-v2:0\" \n", 349 | "\n", 350 | "br = session.client(\"bedrock-runtime\")\n", 351 | "bedrock_embedding = BedrockEmbeddings(client=br, model_id=embedding_model_id)\n", 352 | "\n", 353 | "\n", 354 | "try:\n", 355 | " br.invoke_model(**{\n", 356 | " \"modelId\": \"amazon.titan-embed-text-v2:0\",\n", 357 | " \"contentType\": \"application/json\",\n", 358 | " \"accept\": \"*/*\",\n", 359 | " \"body\": \"{\\\"inputText\\\":\\\"this is where you place your input text\\\", \\\"dimensions\\\": 512, \\\"normalize\\\": true}\"\n", 360 | " })\n", 361 | "except Exception as e:\n", 362 | " logger.error(f\"Please enable model access\")" 363 | ] 364 | }, 365 | { 366 | "cell_type": "markdown", 367 | "id": "703ea47a", 368 | "metadata": {}, 369 | "source": [ 370 | "## Establish a connection the Amazon Aurora and create the LangChain vector store\n", 371 | "To get the secret for the data base, we use the following helper function. " 372 | ] 373 | }, 374 | { 375 | "cell_type": "code", 376 | "execution_count": null, 377 | "id": "e6cf90da-26ae-4e61-9193-df521635a3c2", 378 | "metadata": { 379 | "tags": [] 380 | }, 381 | "outputs": [], 382 | "source": [ 383 | "def get_db_secret_value(secret_arn: str) -> str:\n", 384 | " \"\"\"Get the secret value from the secret manager\n", 385 | "\n", 386 | " Args:\n", 387 | " secret_arn (str): ARN of the secret\n", 388 | "\n", 389 | " Returns:\n", 390 | " str: Value of the secret\n", 391 | " \"\"\"\n", 392 | " client = boto3.client('secretsmanager')\n", 393 | " get_secret_value_response = client.get_secret_value(SecretId=secret_arn)\n", 394 | " return json.loads(get_secret_value_response['SecretString'])\n", 395 | "\n", 396 | "\n", 397 | "logger.info(f\"Retrieve secret from {PG_VECTOR_SECRET_ARN}\")\n", 398 | "client = session.client(service_name='secretsmanager')\n", 399 | "credentials = get_db_secret_value(PG_VECTOR_SECRET_ARN)\n", 400 | "\n", 401 | "\n", 402 | "connection_string = PGVector.connection_string_from_db_params(\n", 403 | " driver=\"psycopg\",\n", 404 | " host=PG_VECTOR_DB_HOST,\n", 405 | " port=PG_VECTOR_PORT,\n", 406 | " database=PG_VECTOR_DB_NAME,\n", 407 | " user=credentials['username'],\n", 408 | " password=credentials['password']\n", 409 | ")\n", 410 | "\n", 411 | "\n", 412 | "vector_store = PGVector(\n", 413 | " connection=connection_string,\n", 414 | " embeddings=bedrock_embedding,\n", 415 | " collection_name=VECTOR_DB_INDEX,\n", 416 | " distance_strategy=DistanceStrategy.COSINE,\n", 417 | ")" 418 | ] 419 | }, 420 | { 421 | "cell_type": "markdown", 422 | "id": "f7c7801a-4285-47d5-9b4d-f44011f896da", 423 | "metadata": {}, 424 | "source": [ 425 | "## Add embeddings to the vector store for RAG \n", 426 | "\n", 427 | "To make use of ingestion pipeline triggered by Amazon S3 bucket notifications, we take the following file and put it to the Amazon S3 bucket to trigger the ingestion. To validate the ingestion, we look up the latest invocation of the AWS Lambda function to verify execution. \n" 428 | ] 429 | }, 430 | { 431 | "cell_type": "code", 432 | "execution_count": null, 433 | "id": "3162e39b-c08d-4acd-80d6-164e7629aa38", 434 | "metadata": { 435 | "tags": [] 436 | }, 437 | "outputs": [], 438 | "source": [ 439 | "file_content = \"\"\"\"\n", 440 | "### Company Overview: TechWorldNova Solutions\n", 441 | "**TechWorldNova Solutions** is an innovative technology firm specializing in artificial intelligence and cloud computing solutions. \n", 442 | "Since its founding in 2015, TechNova has been at the forefront of technological advancements, providing cutting-edge products and services to a diverse range of industries.\n", 443 | "\n", 444 | "### Growth and Revenue Highlights\n", 445 | "\n", 446 | "- **2018:**\n", 447 | " - **Revenue:** $15 million\n", 448 | " - **Growth:** 25%\n", 449 | "- **2019:**\n", 450 | " - **Revenue:** $20 million\n", 451 | " - **Growth:** 33%\n", 452 | "- **2020:**\n", 453 | " - **Revenue:** $30 million\n", 454 | " - **Growth:** 50%\n", 455 | "- **2021:**\n", 456 | " - **Revenue:** $45 million\n", 457 | " - **Growth:** 50%\n", 458 | "- **2022:**\n", 459 | " - **Revenue:** $60 million\n", 460 | " - **Growth:** 33%\n", 461 | "- **2023:**\n", 462 | " - **Revenue:** $80 million\n", 463 | " - **Growth:** 33%\n", 464 | "\n", 465 | "### Key Milestones\n", 466 | "- **2017:** Launched first AI-powered analytics platform.\n", 467 | "- **2019:** Expanded operations to Europe and Asia.\n", 468 | "- **2021:** Introduced cloud computing solutions, gaining significant market traction.\n", 469 | "- **2023:** Reached 500+ enterprise clients and crossed $80 million in revenue.\n", 470 | "### Future Outlook\n", 471 | "\n", 472 | "TechNova Solutions aims to continue its upward trajectory by investing in research and development, \n", 473 | "exploring new markets, and enhancing its product offerings. \n", 474 | "The company's vision is to be a global leader in AI and cloud computing, driving innovation and delivering exceptional value to its clients.\"\"\"" 475 | ] 476 | }, 477 | { 478 | "cell_type": "code", 479 | "execution_count": null, 480 | "id": "1de69ba4-ca18-4cf8-b9e2-faa964c2be88", 481 | "metadata": { 482 | "tags": [] 483 | }, 484 | "outputs": [], 485 | "source": [ 486 | "s3 = session.client(\"s3\")\n", 487 | "s3.put_object(\n", 488 | " Bucket=S3_BUCKET_NAME,\n", 489 | " Key=\"rag-template-file.txt\",\n", 490 | " Body=file_content.encode('utf-8')\n", 491 | ")\n", 492 | "\n", 493 | "# Wait until documents are in store\n", 494 | "i = 0\n", 495 | "while i < 10:\n", 496 | " i += 1\n", 497 | " \n", 498 | " ingested_docs = vector_store.similarity_search(\"TechWorldNova Solutions\")\n", 499 | " if len(ingested_docs) > 0:\n", 500 | " print(\"Relevant documents found\")\n", 501 | " break\n", 502 | " else:\n", 503 | " time.sleep(5)" 504 | ] 505 | }, 506 | { 507 | "cell_type": "markdown", 508 | "id": "f7367250-e207-4fd9-8d81-a9be5115e85f", 509 | "metadata": {}, 510 | "source": [ 511 | "### Verfify that embedding is present in vector store\n", 512 | "\n", 513 | "We check whether there is a document similar to the string \"TechWorldNova Solutions\" to verify presence of the embedding in the vector store." 514 | ] 515 | }, 516 | { 517 | "cell_type": "code", 518 | "execution_count": null, 519 | "id": "25ee8dfe-c65c-44c6-90b1-a001ab08747b", 520 | "metadata": { 521 | "tags": [] 522 | }, 523 | "outputs": [], 524 | "source": [ 525 | "vector_store.similarity_search(\"TechWorldNova Solutions\")" 526 | ] 527 | }, 528 | { 529 | "cell_type": "markdown", 530 | "id": "3833ba52-afa3-4e0e-b7ca-a08e7e0459d6", 531 | "metadata": {}, 532 | "source": [ 533 | "## Retrieval augmented text generation using Bedrock Claude and the PGVector vector store\n", 534 | "\n", 535 | "\n", 536 | "Subsequently, we generate a system prompt to test the retrieval augmentation by storing information about an fictitious company called `TechWorldNova Solutions`. Thereby. We ensure that the foundation model has not been trained on the answer yet. We test the retrieval augmentation with Anthropic Claude 2 and 3. \n", 537 | "\n", 538 | "\n", 539 | "\n", 540 | "### Prepare the retriever and the system prompt" 541 | ] 542 | }, 543 | { 544 | "cell_type": "code", 545 | "execution_count": null, 546 | "id": "acbe0a26-b141-4c92-9a15-03f0f5d13534", 547 | "metadata": { 548 | "tags": [] 549 | }, 550 | "outputs": [], 551 | "source": [ 552 | "\n", 553 | "retriever=vector_store.as_retriever(search_type=\"similarity_score_threshold\",\n", 554 | " search_kwargs={'score_threshold': 0.8})\n", 555 | "\n", 556 | "system_prompt = (\n", 557 | " \"Use the given context to answer the question. \"\n", 558 | " \"If you don't know the answer, say you don't know. \"\n", 559 | " \"Use three sentence maximum and keep the answer concise. \"\n", 560 | " \"Context: {context}\"\n", 561 | ")\n", 562 | "prompt = ChatPromptTemplate.from_messages(\n", 563 | " [\n", 564 | " (\"system\", system_prompt),\n", 565 | " (\"human\", \"{input}\"),\n", 566 | " ]\n", 567 | ")\n", 568 | "\n", 569 | "query = \"What is the outlook for TechWorldNova Solutions?\"" 570 | ] 571 | }, 572 | { 573 | "cell_type": "markdown", 574 | "id": "82f3323e-b31d-41ac-838f-0e844fe7fd0d", 575 | "metadata": {}, 576 | "source": [ 577 | "### Claude 3" 578 | ] 579 | }, 580 | { 581 | "cell_type": "code", 582 | "execution_count": null, 583 | "id": "2c994b83-e065-4bf6-bded-0fba105e86c4", 584 | "metadata": { 585 | "tags": [] 586 | }, 587 | "outputs": [], 588 | "source": [ 589 | "model_id = \"anthropic.claude-3-sonnet-20240229-v1:0\"\n", 590 | "model_kwargs = { \n", 591 | " \"max_tokens\": 2048, \n", 592 | "}\n", 593 | "\n", 594 | "\n", 595 | "llm = ChatBedrock(\n", 596 | " model_id=model_id,\n", 597 | " model_kwargs=model_kwargs,\n", 598 | ")\n", 599 | "\n", 600 | "\n", 601 | "question_answer_chain = create_stuff_documents_chain(llm, prompt)\n", 602 | "chain = create_retrieval_chain(retriever, question_answer_chain)\n", 603 | "response = chain.invoke({\"input\": query})[\"answer\"]\n", 604 | "print(f\"CHATBOT ANSWER CLAUDE 3: {response}\")" 605 | ] 606 | }, 607 | { 608 | "cell_type": "markdown", 609 | "id": "9ef4a2d6-bf7f-460d-a19b-385c82ea2a57", 610 | "metadata": {}, 611 | "source": [ 612 | "## Retrieve relevant documents for the query (optional)\n", 613 | "Run the following cell if you want to get more details about the scores of the selected chunks, relevant for answering the query." 614 | ] 615 | }, 616 | { 617 | "cell_type": "code", 618 | "execution_count": null, 619 | "id": "8e065c3c-efe0-406a-81d5-9569b960cdda", 620 | "metadata": { 621 | "tags": [] 622 | }, 623 | "outputs": [], 624 | "source": [ 625 | "doc_scores = vector_store.similarity_search_with_relevance_scores(query, k=20)\n", 626 | "\n", 627 | "docs = []\n", 628 | "for doc, score in doc_scores:\n", 629 | " doc.metadata[\"document_score\"] = score\n", 630 | " docs.append(doc)\n", 631 | "\n", 632 | "for item in docs:\n", 633 | " print(item)" 634 | ] 635 | }, 636 | { 637 | "cell_type": "markdown", 638 | "id": "d409d74d-110e-4045-b767-e2c88f7d48c6", 639 | "metadata": {}, 640 | "source": [ 641 | "## Retrieve the raw data from the vector store (optional)\n", 642 | "If you want to have explore the raw vector store, you can use the query below which fetches all records (only applicable if a few documents are present in the data base)" 643 | ] 644 | }, 645 | { 646 | "cell_type": "code", 647 | "execution_count": null, 648 | "id": "b76ff67b-469c-4738-80fa-03b7bff5ead5", 649 | "metadata": { 650 | "tags": [] 651 | }, 652 | "outputs": [], 653 | "source": [ 654 | "conn = psycopg.connect(host=PG_VECTOR_DB_HOST,\n", 655 | " dbname=PG_VECTOR_DB_NAME,\n", 656 | " user=credentials['username'],\n", 657 | " password=credentials['password'])\n", 658 | "cur = conn.cursor()\n", 659 | "cur.execute(\"SELECT * FROM langchain_pg_embedding\")\n", 660 | "ids = cur.fetchall()\n", 661 | "\n", 662 | "# Print metadata:\n", 663 | "# i[0] - document IDs\n", 664 | "# i[1] - embeddings\n", 665 | "# i[2] - plain text documents\n", 666 | "# i[3] - document metadata\n", 667 | "\n", 668 | "print([i[2] for i in ids])" 669 | ] 670 | } 671 | ], 672 | "metadata": { 673 | "kernelspec": { 674 | "display_name": "conda_python3", 675 | "language": "python", 676 | "name": "conda_python3" 677 | }, 678 | "language_info": { 679 | "codemirror_mode": { 680 | "name": "ipython", 681 | "version": 3 682 | }, 683 | "file_extension": ".py", 684 | "mimetype": "text/x-python", 685 | "name": "python", 686 | "nbconvert_exporter": "python", 687 | "pygments_lexer": "ipython3", 688 | "version": "3.10.17" 689 | } 690 | }, 691 | "nbformat": 4, 692 | "nbformat_minor": 5 693 | } 694 | -------------------------------------------------------------------------------- /terraform/commons.tfvars: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | vpc = { 4 | private_subnets = ["10.1.3.0/24", "10.1.4.0/24"] 5 | public_subnets = ["10.1.1.0/24", "10.1.2.0/24"] 6 | enable_nat_gateway = true 7 | cidr = "10.1.0.0/16" 8 | } 9 | embedding_model_id = "amazon.titan-embed-text-v2:0" 10 | text_generation_model_ids = [ 11 | "anthropic.claude-3-sonnet-20240229-v1:0" 12 | ] 13 | region = "us-east-1" 14 | -------------------------------------------------------------------------------- /terraform/data.tf: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | data "aws_caller_identity" "current" {} 4 | 5 | data "aws_region" "current" {} 6 | 7 | data "aws_availability_zones" "this" {} 8 | 9 | data "aws_ecr_authorization_token" "token" {} 10 | -------------------------------------------------------------------------------- /terraform/locals.tf: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | locals { 4 | source_path = "${path.root}/../python/src/handlers/data_ingestion_processor" 5 | path_include = ["**"] 6 | path_exclude = ["**/__pycache__/**"] 7 | files_include = setunion([for f in local.path_include : fileset(local.source_path, f)]...) 8 | files_exclude = setunion([for f in local.path_exclude : fileset(local.source_path, f)]...) 9 | files = sort(setsubtract(local.files_include, local.files_exclude)) 10 | 11 | dir_sha = sha1(join("", [for f in local.files : filesha1("${local.source_path}/${f}")])) 12 | 13 | image_tag = "latest-${formatdate("YYYYMMDDhhmmss", timestamp())}" 14 | 15 | ssm_parameter_for_sagemaker = { 16 | PG_VECTOR_SECRET_ARN = module.aurora.cluster_master_user_secret[0].secret_arn 17 | PG_VECTOR_DB_NAME = module.aurora.cluster_database_name 18 | PG_VECTOR_DB_HOST = module.aurora.cluster_endpoint 19 | PG_VECTOR_PORT = 5432 20 | CHUNK_SIZE = 200 21 | CHUNK_OVERLAP = 20 22 | VECTOR_DB_INDEX = "sample-index" 23 | EMBEDDING_MODEL_ID = var.embedding_model_id 24 | S3_BUCKET_NAME = module.s3.s3_bucket_id 25 | } 26 | 27 | text_generation_model_arns = formatlist("arn:aws:bedrock:${data.aws_region.current.name}::foundation-model/%s", var.text_generation_model_ids) 28 | 29 | vpc_endpoints = { 30 | s3 = "Gateway", 31 | bedrock-runtime = "Interface", 32 | secretsmanager = "Interface", 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /terraform/main.tf: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | terraform { 4 | required_version = ">= 1.0" 5 | required_providers { 6 | aws = { 7 | source = "hashicorp/aws" 8 | version = ">= 5.32" 9 | } 10 | } 11 | } 12 | provider "aws" { 13 | region = var.region 14 | default_tags { 15 | tags = { 16 | Environment = "dev" 17 | Project = "aws-sample-bedrock-rag-template" 18 | } 19 | } 20 | } 21 | 22 | module "vpc" { 23 | #checkov:skip=CKV_AWS_130: "Ensure VPC subnets do not assign public IP by default" 24 | source = "git::https://github.com/terraform-aws-modules/terraform-aws-vpc?ref=4a2809c" 25 | name = "bedrock-rag-template" 26 | cidr = var.vpc.cidr 27 | private_subnets = var.vpc.private_subnets 28 | public_subnets = var.vpc.public_subnets 29 | enable_nat_gateway = false 30 | azs = slice(data.aws_availability_zones.this.names, 0, 3) 31 | enable_dns_hostnames = true 32 | enable_dns_support = true 33 | create_igw = false 34 | 35 | enable_flow_log = true 36 | create_flow_log_cloudwatch_iam_role = true 37 | create_flow_log_cloudwatch_log_group = true 38 | flow_log_cloudwatch_log_group_kms_key_id = module.kms.key_arn 39 | } 40 | 41 | 42 | resource "aws_security_group" "lambda_ingestion" { 43 | vpc_id = module.vpc.vpc_id 44 | name = "Lambda data ingestion" 45 | description = "SG to allow connections for Lamdba to RDS and VPC endpoints" 46 | 47 | egress { 48 | description = "Allow traffic to RDS Postgres" 49 | from_port = 5432 50 | to_port = 5432 51 | protocol = "TCP" 52 | cidr_blocks = module.vpc.private_subnets_cidr_blocks 53 | } 54 | 55 | egress { 56 | description = "Allow HTTPS to VPC endpoints" 57 | from_port = 443 58 | to_port = 443 59 | protocol = "TCP" 60 | security_groups = [aws_security_group.vpc_endpoints.id] 61 | prefix_list_ids = [aws_vpc_endpoint.vpce["s3"].prefix_list_id] 62 | } 63 | } 64 | 65 | 66 | resource "aws_ssm_parameter" "parameters" { 67 | #checkov:skip=CKV_AWS_337: "Ensure SSM parameters are using KMS CMK" 68 | #checkov:skip=CKV2_AWS_34: "AWS SSM Parameter should be Encrypted" 69 | for_each = local.ssm_parameter_for_sagemaker 70 | name = "/bedrock-rag-template/${each.key}" 71 | type = "String" 72 | value = each.value 73 | } 74 | 75 | 76 | module "lambda_ingestion" { 77 | source = "git::https://github.com/terraform-aws-modules/terraform-aws-lambda.git?ref=9acd3227087db56abac5f78d1a660b08ee159a9c" # TODO: update 78 | 79 | function_name = "data-ingestion-processor" 80 | timeout = 180 81 | tracing_mode = "Active" 82 | attach_tracing_policy = true 83 | memory_size = 2048 84 | kms_key_arn = module.kms.key_arn 85 | authorization_type = "AWS_IAM" 86 | publish = true 87 | 88 | 89 | environment_variables = { 90 | PG_VECTOR_SECRET_ARN = module.aurora.cluster_master_user_secret[0].secret_arn 91 | PG_VECTOR_DB_NAME = module.aurora.cluster_database_name 92 | PG_VECTOR_DB_HOST = module.aurora.cluster_endpoint 93 | PG_VECTOR_PORT = 5432 94 | POWERTOOLS_METRICS_NAMESPACE = "GENAI" 95 | POWERTOOLS_SERVICE_NAME = "data ingestion processor" 96 | POWERTOOLS_LOG_LEVEL = "INFO" 97 | CHUNK_SIZE = 1000 98 | CHUNK_OVERLAP = 100 99 | VECTOR_DB_INDEX = "sample-index" 100 | EMBEDDING_MODEL_ID = var.embedding_model_id 101 | } 102 | 103 | attach_network_policy = true 104 | vpc_subnet_ids = module.vpc.private_subnets 105 | vpc_security_group_ids = [aws_security_group.lambda_ingestion.id] 106 | 107 | image_uri = data.aws_ecr_image.lambda_document_ingestion.image_uri 108 | create_package = false 109 | package_type = "Image" 110 | 111 | allowed_triggers = { 112 | InvokeLambdaFromS3Bucket = { 113 | principal = "s3.amazonaws.com" 114 | source_arn = module.s3.s3_bucket_arn 115 | } 116 | } 117 | 118 | 119 | attach_policy_json = true 120 | policy_json = templatefile("${path.module}/policies/data_ingestion_processor.json", { 121 | aurora_secret_arn = module.aurora.cluster_master_user_secret[0].secret_arn 122 | s3_bucket_arn = module.s3.s3_bucket_arn 123 | kms_key_arn = module.kms.key_arn 124 | account_id = data.aws_caller_identity.current.id 125 | aws_region = data.aws_region.current.name 126 | prefix = "bedrock-rag-template" 127 | 128 | 129 | bedrock_model_ids = concat( 130 | ["arn:aws:bedrock:${data.aws_region.current.name}::foundation-model/${var.embedding_model_id}"], 131 | local.text_generation_model_arns 132 | ) 133 | }) 134 | 135 | depends_on = [null_resource.build_and_push_docker_image] 136 | 137 | assume_role_policy_statements = { 138 | sage_maker_notebook_demo = { 139 | effect = "Allow", 140 | actions = ["sts:AssumeRole"], 141 | principals = { 142 | service_principal = { 143 | type = "Service", 144 | identifiers = ["sagemaker.amazonaws.com"] 145 | } 146 | } 147 | } 148 | } 149 | } 150 | 151 | 152 | module "ecr" { 153 | source = "git::https://github.com/terraform-aws-modules/terraform-aws-ecr.git?ref=9daab07" 154 | 155 | repository_name = "bedrock-rag-template" 156 | repository_image_tag_mutability = "IMMUTABLE" 157 | repository_force_delete = true 158 | repository_lifecycle_policy = jsonencode({ 159 | rules = [ 160 | { 161 | rulePriority = 1, 162 | description = "Keep last 30 images", 163 | selection = { 164 | tagStatus = "tagged", 165 | tagPrefixList = ["v"], 166 | countType = "imageCountMoreThan", 167 | countNumber = 30 168 | }, 169 | action = { 170 | type = "expire" 171 | } 172 | } 173 | ] 174 | }) 175 | } 176 | 177 | 178 | resource "null_resource" "build_and_push_docker_image" { 179 | provisioner "local-exec" { 180 | command = <<-EOT 181 | aws ecr get-login-password --region ${data.aws_region.current.name} | docker login --username AWS --password-stdin ${module.ecr.repository_url} 182 | docker build \ 183 | --platform=linux/amd64 \ 184 | -t ${module.ecr.repository_url}:${local.dir_sha} \ 185 | ${local.source_path} 186 | docker push ${module.ecr.repository_url}:${local.dir_sha} 187 | EOT 188 | } 189 | 190 | triggers = { 191 | dir_sha = local.dir_sha 192 | } 193 | } 194 | 195 | data "aws_ecr_image" "lambda_document_ingestion" { 196 | repository_name = module.ecr.repository_name 197 | image_tag = local.dir_sha 198 | depends_on = [ 199 | null_resource.build_and_push_docker_image 200 | ] 201 | } 202 | 203 | 204 | module "s3" { 205 | source = "git::https://github.com/terraform-aws-modules/terraform-aws-s3-bucket.git?ref=8a0b697" 206 | 207 | bucket = "bedrock-rag-template${data.aws_caller_identity.current.account_id}" 208 | 209 | force_destroy = true 210 | versioning = { 211 | enabled = true 212 | } 213 | 214 | server_side_encryption_configuration = { 215 | rule = { 216 | apply_server_side_encryption_by_default = { 217 | kms_master_key_id = module.kms.key_arn 218 | sse_algorithm = "aws:kms" 219 | } 220 | bucket_key_enabled = true 221 | } 222 | } 223 | attach_deny_insecure_transport_policy = true 224 | attach_require_latest_tls_policy = true 225 | allowed_kms_key_arn = module.kms.key_arn 226 | 227 | 228 | lifecycle_rule = [ 229 | { 230 | id = "transition-to-glacier" 231 | enabled = true 232 | 233 | # Transition to Glacier after 30 days 234 | transitions = [ 235 | { 236 | days = 30 237 | storage_class = "GLACIER" 238 | } 239 | ] 240 | 241 | # Expire after 365 days 242 | expiration = { 243 | days = 365 244 | } 245 | } 246 | ] 247 | 248 | 249 | 250 | } 251 | 252 | 253 | resource "aws_s3_bucket_notification" "this" { 254 | bucket = module.s3.s3_bucket_id 255 | lambda_function { 256 | lambda_function_arn = module.lambda_ingestion.lambda_function_arn 257 | events = ["s3:ObjectCreated:*"] 258 | } 259 | } 260 | 261 | module "kms" { 262 | source = "git::https://github.com/terraform-aws-modules/terraform-aws-kms.git?ref=866950f91b3bc4411fa14d1f5c2c304145540d7f" 263 | 264 | description = "Bedrock RAG Template Encrytion" 265 | key_usage = "ENCRYPT_DECRYPT" 266 | aliases = ["aws-sample/bedrock-rag-template"] 267 | 268 | key_statements = [ 269 | jsondecode(templatefile("${path.module}/policies/kms.json", { 270 | aws_region = data.aws_region.current.name 271 | aws_account_id = data.aws_caller_identity.current.account_id 272 | })) 273 | ] 274 | } 275 | 276 | 277 | module "aurora" { 278 | source = "git::https://github.com/terraform-aws-modules/terraform-aws-rds-aurora.git?ref=39146d5" 279 | 280 | name = "bedrock-rag-sample" 281 | 282 | engine = "aurora-postgresql" 283 | engine_version = "15.5" 284 | engine_mode = "provisioned" 285 | autoscaling_enabled = true 286 | serverlessv2_scaling_configuration = { 287 | min_capacity = 1 288 | max_capacity = 10 289 | } 290 | instance_class = "db.serverless" 291 | instances = { 292 | one = {} 293 | two = {} 294 | } 295 | 296 | backtrack_window = 259200 # 72 hours 297 | deletion_protection = true 298 | 299 | enable_http_endpoint = true 300 | 301 | 302 | master_username = "root" 303 | master_user_password_rotation_automatically_after_days = 7 304 | master_user_secret_kms_key_id = module.kms.key_id 305 | manage_master_user_password_rotation = true 306 | 307 | database_name = "bedrockRagSample" 308 | 309 | kms_key_id = module.kms.key_arn 310 | 311 | subnets = module.vpc.private_subnets 312 | skip_final_snapshot = false 313 | final_snapshot_identifier = "bedrock-rag-sample-final-snapshot" 314 | 315 | create_db_subnet_group = true 316 | 317 | vpc_id = module.vpc.vpc_id 318 | security_group_rules = { 319 | ingress = { 320 | description = "Manage ingress to PG Vector data base" 321 | cidr_blocks = [module.vpc.vpc_cidr_block] 322 | protocol = "tcp" 323 | } 324 | } 325 | 326 | storage_encrypted = true 327 | apply_immediately = true 328 | monitoring_interval = 10 329 | 330 | enabled_cloudwatch_logs_exports = ["postgresql"] 331 | cloudwatch_log_group_kms_key_id = module.kms.key_id 332 | 333 | performance_insights_enabled = true 334 | performance_insights_kms_key_id = module.kms.key_arn 335 | } 336 | 337 | 338 | resource "aws_vpc_endpoint" "vpce" { 339 | for_each = local.vpc_endpoints 340 | 341 | vpc_id = module.vpc.vpc_id 342 | service_name = "com.amazonaws.${data.aws_region.current.name}.${each.key}" 343 | vpc_endpoint_type = each.value 344 | 345 | route_table_ids = each.value == "Gateway" ? concat([module.vpc.default_route_table_id], module.vpc.private_route_table_ids) : [] 346 | private_dns_enabled = each.value == "Interface" # enable private DNS for interface endpoints 347 | security_group_ids = each.value == "Interface" ? [aws_security_group.vpc_endpoints.id] : [] 348 | subnet_ids = each.value == "Interface" ? module.vpc.private_subnets : [] 349 | 350 | policy = jsonencode({ 351 | Version = "2012-10-17" 352 | Statement = [ 353 | { 354 | Sid = "Allow-only-current-account" 355 | Effect = "Allow" 356 | Principal = "*" 357 | Action = "*" 358 | Resource = "*" 359 | Condition = { 360 | StringEquals = { 361 | "aws:PrincipalAccount" = data.aws_caller_identity.current.id 362 | } 363 | } 364 | } 365 | ] 366 | }) 367 | } 368 | 369 | 370 | resource "aws_security_group" "vpc_endpoints" { 371 | #checkov:skip=CKV2_AWS_5: "Security group attached to vpc endpoints" 372 | vpc_id = module.vpc.vpc_id 373 | name = "vpc-endpoint" 374 | description = "Allow traffic from within vpc" 375 | 376 | ingress { 377 | description = "Allow HTTPS connection towards vpc endpoint" 378 | from_port = 443 379 | to_port = 443 380 | protocol = "TCP" 381 | cidr_blocks = [module.vpc.vpc_cidr_block] 382 | } 383 | } 384 | 385 | 386 | resource "aws_sagemaker_notebook_instance" "demo" { 387 | #checkov:skip=CKV_AWS_122: "Demo instance only" 388 | #checkov:skip=CKV_AWS_307: "Use will need to install dependencies" 389 | name = "aws-sample-bedrock-rag-template" 390 | role_arn = module.lambda_ingestion.lambda_role_arn 391 | instance_type = "ml.t2.medium" 392 | kms_key_id = module.kms.key_arn 393 | 394 | subnet_id = module.vpc.private_subnets[0] 395 | security_groups = [aws_security_group.lambda_ingestion.id] 396 | direct_internet_access = "Enabled" # Required to download dependencies 397 | } 398 | -------------------------------------------------------------------------------- /terraform/policies/data_ingestion_processor.json: -------------------------------------------------------------------------------- 1 | { 2 | "Version": "2012-10-17", 3 | "Statement": [ 4 | { 5 | "Effect": "Allow", 6 | "Action": [ 7 | "secretsmanager:GetSecretValue", 8 | "secretsmanager:DescribeSecret", 9 | "secretsmanager:ListSecrets" 10 | ], 11 | "Resource": "${aurora_secret_arn}" 12 | }, 13 | { 14 | "Effect": "Allow", 15 | "Action": [ 16 | "s3:GetObject", 17 | "s3:HeadObject", 18 | "s3:putObject" 19 | ], 20 | "Resource": [ 21 | "${s3_bucket_arn}", 22 | "${s3_bucket_arn}/*" 23 | ] 24 | }, 25 | { 26 | "Effect": "Allow", 27 | "Action": [ 28 | "kms:Encrypt", 29 | "kms:Decrypt", 30 | "kms:ReEncrypt*", 31 | "kms:GenerateDataKey*", 32 | "kms:DescribeKey", 33 | "kms:CreateGrant", 34 | "kms:ListGrants", 35 | "kms:RevokeGrant", 36 | "kms:GetKeyPolicy", 37 | "kms:ListKeyPolicies" 38 | ], 39 | "Resource": "${kms_key_arn}" 40 | }, 41 | { 42 | "Effect": "Allow", 43 | "Action": [ 44 | "rds:DescribeDBInstances", 45 | "rds:DescribeDBClusters", 46 | "rds:ListTagsForResource", 47 | "rds:DescribeDBLogFiles", 48 | "rds-db:connect" 49 | ], 50 | "Resource": [ 51 | "arn:aws:rds:${aws_region}:${account_id}:db:*", 52 | "arn:aws:rds:${aws_region}:${account_id}:cluster:*", 53 | "arn:aws:rds:${aws_region}:${account_id}:cluster-snapshot:*", 54 | "arn:aws:rds:${aws_region}:${account_id}:db-snapshot:*", 55 | "arn:aws:rds:${aws_region}:${account_id}:subnet-group:*", 56 | "arn:aws:rds:${aws_region}:${account_id}:option-group:*", 57 | "arn:aws:rds:${aws_region}:${account_id}:parameter-group:*", 58 | "arn:aws:rds:${aws_region}:${account_id}:secgrp:*", 59 | "arn:aws:rds:${aws_region}:${account_id}:pg:*" 60 | ] 61 | }, 62 | { 63 | "Effect": "Allow", 64 | "Action": [ 65 | "ec2:DescribeSecurityGroups", 66 | "ec2:DescribeSubnets", 67 | "ec2:DescribeVpcs" 68 | ], 69 | "Resource": "*" 70 | }, 71 | { 72 | "Effect": "Allow", 73 | "Action": [ 74 | "bedrock:InvokeModel" 75 | ], 76 | "Resource": ${jsonencode(bedrock_model_ids)} 77 | }, 78 | 79 | { 80 | "Effect": "Allow", 81 | "Action": [ 82 | "ssm:GetParameter" 83 | ], 84 | "Resource": "arn:aws:ssm:${aws_region}:${account_id}:parameter/${prefix}/*" 85 | } 86 | ] 87 | } 88 | -------------------------------------------------------------------------------- /terraform/policies/kms.json: -------------------------------------------------------------------------------- 1 | { 2 | "sid": "GeneralUsageByCloudWatch", 3 | "effect": "Allow", 4 | "principals": [ 5 | { 6 | "type" :"Service", 7 | "identifiers": ["logs.${aws_region}.amazonaws.com"] 8 | } 9 | ], 10 | "actions": [ 11 | "kms:Encrypt*", 12 | "kms:Decrypt*", 13 | "kms:ReEncrypt*", 14 | "kms:GenerateDataKey*", 15 | "kms:Describe*" 16 | ], 17 | "resources": ["*"], 18 | 19 | "conditions" : [ 20 | { 21 | "test" : "ArnLike", 22 | "variable" : "kms:EncryptionContext:aws:logs:arn", 23 | "values" : [ 24 | "arn:aws:logs:${aws_region}:${aws_account_id}:log-group:*" 25 | ] 26 | } 27 | ] 28 | } 29 | -------------------------------------------------------------------------------- /terraform/variables.tf: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | variable "vpc" { 4 | type = object({ 5 | private_subnets = list(string) 6 | public_subnets = list(string) 7 | enable_nat_gateway = bool 8 | cidr = string 9 | }) 10 | } 11 | 12 | variable "region" { 13 | description = "AWS region to use for deployment" 14 | type = string 15 | } 16 | 17 | variable "embedding_model_id" { 18 | description = "Model id of the Amazon Bedrock embedding to use for data ingestion" 19 | type = string 20 | } 21 | 22 | variable "text_generation_model_ids" { 23 | description = "Model ids of the Amazon Bedrock text generation model to use in the retrieval chain" 24 | type = list(string) 25 | } 26 | --------------------------------------------------------------------------------