├── .gitignore
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── app
    ├── README.md
    ├── app.py
    ├── env_vars.sh
    ├── images
    │   ├── ai-icon.png
    │   └── user-icon.png
    ├── pgvector_chat_flan_xl.py
    ├── pgvector_chat_llama2.py
    ├── qa-with-llm-and-rag.png
    └── requirements.txt
├── cdk_stacks
    ├── .gitignore
    ├── README.md
    ├── app.py
    ├── cdk.context.json
    ├── cdk.json
    ├── rag_with_pgvector
    │   ├── __init__.py
    │   ├── aurora_postgresql.py
    │   ├── sm_embedding_endpoint.py
    │   ├── sm_llm_endpoint.py
    │   ├── sm_studio.py
    │   └── vpc.py
    ├── rag_with_pgvector_arch.svg
    ├── requirements.txt
    └── source.bat
└── data_ingestion_to_vectordb
    ├── container
        ├── Dockerfile
        ├── credentials.py
        ├── load_data_into_pgvector.py
        └── sm_helper.py
    ├── data_ingestion_to_pgvector.ipynb
    └── scripts
        └── get_data.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | Untitled*.ipynb
 75 | 
 76 | # pyenv
 77 | .python-version
 78 | 
 79 | # celery beat schedule file
 80 | celerybeat-schedule
 81 | 
 82 | # SageMath parsed files
 83 | *.sage.py
 84 | 
 85 | # Environments
 86 | .env
 87 | .venv
 88 | env/
 89 | venv/
 90 | ENV/
 91 | env.bak/
 92 | venv.bak/
 93 | 
 94 | # Spyder project settings
 95 | .spyderproject
 96 | .spyproject
 97 | 
 98 | # Rope project settings
 99 | .ropeproject
100 | 
101 | # mkdocs documentation
102 | /site
103 | 
104 | # mypy
105 | .mypy_cache/
106 | 
107 | .DS_Store
108 | .idea/
109 | bin/
110 | lib64
111 | pyvenv.cfg
112 | *.bak
113 | share/
114 | cdk.out/
115 | cdk.context.json*
116 | zap/
117 | 
118 | */.gitignore
119 | */setup.py
120 | */source.bat
121 | 
122 | */*/.gitignore
123 | */*/setup.py
124 | */*/source.bat
125 | 
126 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.
5 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guidelines
 2 | 
 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
 4 | documentation, we greatly value feedback and contributions from our community.
 5 | 
 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
 7 | information to effectively respond to your bug report or contribution.
 8 | 
 9 | 
10 | ## Reporting Bugs/Feature Requests
11 | 
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 | 
14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 | 
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 | 
22 | 
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 | 
26 | 1. You are working against the latest source on the *main* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 | 
30 | To send us a pull request, please:
31 | 
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 | 
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 | 
42 | 
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start.
45 | 
46 | 
47 | ## Code of Conduct
48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
50 | opensource-codeofconduct@amazon.com with any additional questions or comments.
51 | 
52 | 
53 | ## Security issue notifications
54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
55 | 
56 | 
57 | ## Licensing
58 | 
59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.
60 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT No Attribution
 2 | 
 3 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 6 | this software and associated documentation files (the "Software"), to deal in
 7 | the Software without restriction, including without limitation the rights to
 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 9 | the Software, and to permit persons to whom the Software is furnished to do so.
10 | 
11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
13 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
14 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
15 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
16 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
17 | 
18 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # QA with LLM and RAG (Retrieval Augmented Generation)
 2 | 
 3 | This project is a Question Answering application with Large Language Models (LLMs) and Amazon Aurora Postgresql using [pgvector](https://github.com/pgvector/pgvector). An application using the RAG(Retrieval Augmented Generation) approach retrieves information most relevant to the user’s request from the enterprise knowledge base or content, bundles it as context along with the user’s request as a prompt, and then sends it to the LLM to get a GenAI response.
 4 | 
 5 | LLMs have limitations around the maximum word count for the input prompt, therefore choosing the right passages among thousands or millions of documents in the enterprise, has a direct impact on the LLM’s accuracy.
 6 | 
 7 | In this project, Amazon Aurora Postgresql with pgvector is used for knowledge base.
 8 | 
 9 | The overall architecture is like this:
10 | 
11 | ![rag_with_pgvector_arch](./cdk_stacks/rag_with_pgvector_arch.svg)
12 | 
13 | ### Overall Workflow
14 | 
15 | 1. Deploy the cdk stacks (For more information, see [here](./cdk_stacks/README.md)).
16 |    - A SageMaker Studio in a private VPC.
17 |    - A SageMaker Endpoint for text generation.
18 |    - A SageMaker Endpoint for generating embeddings.
19 |    - An Amazon Aurora Postgresql cluster for storing embeddings.
20 |    - Aurora Postgresql cluster's access credentials (username and password) stored in AWS Secrets Mananger as a name such as `RAGPgVectorStackAuroraPostg-xxxxxxxxxxxx`.
21 | 2. Open JupyterLab in SageMaker Studio and then open a new terminal.
22 | 3. Run the following commands on the terminal to clone the code repository for this project:
23 |    ```
24 |    git clone --depth=1 https://github.com/aws-samples/rag-with-amazon-postgresql-using-pgvector.git
25 |    ```
26 | 4. Open `data_ingestion_to_pgvector.ipynb` notebook and Run it. (For more information, see [here](./data_ingestion_to_vectordb/data_ingestion_to_pgvector.ipynb))
27 | 5. Run Streamlit application. (For more information, see [here](./app/README.md))
28 | 
29 | ### References
30 | 
31 |   * [Leverage pgvector and Amazon Aurora PostgreSQL for Natural Language Processing, Chatbots and Sentiment Analysis (2023-07-13)](https://aws.amazon.com/blogs/database/leverage-pgvector-and-amazon-aurora-postgresql-for-natural-language-processing-chatbots-and-sentiment-analysis/)
32 |   * [Accelerate HNSW indexing and searching with pgvector on Amazon Aurora PostgreSQL-compatible edition and Amazon RDS for PostgreSQL (2023-11-06)](https://aws.amazon.com/blogs/database/accelerate-hnsw-indexing-and-searching-with-pgvector-on-amazon-aurora-postgresql-compatible-edition-and-amazon-rds-for-postgresql/)
33 |   * [Optimize generative AI applications with pgvector indexing: A deep dive into IVFFlat and HNSW techniques (2024-03-15)](https://aws.amazon.com/blogs/database/optimize-generative-ai-applications-with-pgvector-indexing-a-deep-dive-into-ivfflat-and-hnsw-techniques/)
34 |   * [Improve the performance of generative AI workloads on Amazon Aurora with Optimized Reads and pgvector (2024-02-09)](https://aws.amazon.com/blogs/database/accelerate-generative-ai-workloads-on-amazon-aurora-with-optimized-reads-and-pgvector/)
35 |   * [Building AI-powered search in PostgreSQL using Amazon SageMaker and pgvector (2023-05-03)](https://aws.amazon.com/blogs/database/building-ai-powered-search-in-postgresql-using-amazon-sagemaker-and-pgvector/)
36 |   * [Build Streamlit apps in Amazon SageMaker Studio (2023-04-11)](https://aws.amazon.com/blogs/machine-learning/build-streamlit-apps-in-amazon-sagemaker-studio/)
37 |   * [Quickly build high-accuracy Generative AI applications on enterprise data using Amazon Kendra, LangChain, and large language models (2023-05-03)](https://aws.amazon.com/blogs/machine-learning/quickly-build-high-accuracy-generative-ai-applications-on-enterprise-data-using-amazon-kendra-langchain-and-large-language-models/)
38 |     * [(github) Amazon Kendra Retriver Samples](https://github.com/aws-samples/amazon-kendra-langchain-extensions/tree/main/kendra_retriever_samples)
39 |   * [Question answering using Retrieval Augmented Generation with foundation models in Amazon SageMaker JumpStart (2023-05-02)](https://aws.amazon.com/blogs/machine-learning/question-answering-using-retrieval-augmented-generation-with-foundation-models-in-amazon-sagemaker-jumpstart/)
40 |   * [Use proprietary foundation models from Amazon SageMaker JumpStart in Amazon SageMaker Studio (2023-06-27)](https://aws.amazon.com/blogs/machine-learning/use-proprietary-foundation-models-from-amazon-sagemaker-jumpstart-in-amazon-sagemaker-studio/)
41 |   * [LangChain](https://python.langchain.com/docs/get_started/introduction.html) - A framework for developing applications powered by language models.
42 |   * [Streamlit](https://streamlit.io/) - A faster way to build and share data apps
43 |   * [rag-with-amazon-kendra-and-sagemaker](https://github.com/aws-samples/aws-kr-startup-samples/tree/main/gen-ai/rag-with-amazon-kendra-and-sagemaker) - Question Answering application with Large Language Models (LLMs) and Amazon Kendra
44 |   * [rag-with-amazon-opensearch-and-sagemaker](https://github.com/aws-samples/rag-with-amazon-opensearch-and-sagemaker) - Question Answering application with Large Language Models (LLMs) and Amazon OpenSearch Service
45 |   * [rag-with-amazon-opensearch-serverless](https://github.com/aws-samples/rag-with-amazon-opensearch-serverless) - Question Answering application with Large Language Models (LLMs) and Amazon OpenSearch Serverless Service
46 |   * [Pgvector changelog - v0.4.0 (2023-01-11)](https://github.com/pgvector/pgvector/blob/master/CHANGELOG.md#040-2023-01-11)
47 |     > Increased max dimensions for vector from `1024` to `16000`<br/>
48 |     > Increased max dimensions for index from `1024` to `2000`
49 | 
50 | ## Security
51 | 
52 | See [CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications) for more information.
53 | 
54 | ## License
55 | 
56 | This library is licensed under the MIT-0 License. See the LICENSE file.
57 | 


--------------------------------------------------------------------------------
/app/README.md:
--------------------------------------------------------------------------------
 1 | ## Run the Streamlit application in Studio
 2 | 
 3 | Now we’re ready to run the Streamlit web application for our question answering bot.
 4 | 
 5 | SageMaker Studio provides a convenient platform to host the Streamlit web application. The following steps describe how to run the Streamlit app on SageMaker Studio. Alternatively, you could also follow the same procedure to run the app on Amazon EC2 instance or Cloud9 in your AWS Account.
 6 | 
 7 | 1. Open JupyterLab and then open a new **Terminal**.
 8 | 2. Run the following commands on the terminal to clone the code repository for this post and install the Python packages needed by the application:
 9 |    ```
10 |    git clone --depth=1 https://github.com/aws-samples/rag-with-amazon-postgresql-using-pgvector-and-sagemaker.git
11 |    cd rag-with-amazon-postgresql-using-pgvector-and-sagemaker/app
12 |    python -m venv .env
13 |    source .env/bin/activate
14 |    pip install -r requirements.txt
15 |    ```
16 | 3. In the shell, set the following environment variables with the values that are available from the CloudFormation stack output.
17 |    ```
18 |    export AWS_REGION=us-east-1
19 |    export PGVECTOR_SECRET_ID="your-postgresql-secret-id"
20 |    export COLLECTION_NAME="llm_rag_embeddings"
21 |    export EMBEDDING_ENDPOINT_NAME="your-sagemakr-endpoint-for-embedding-model"
22 |    export TEXT2TEXT_ENDPOINT_NAME="your-sagemaner-endpoint-for-text-generation-model"
23 |    ```
24 |    :information_source: `COLLECTION_NAME` can be found in [data ingestion to vectordb](../data_ingestion_to_vectordb/data_ingestion_to_pgvector.ipynb) step.
25 | 4. When the application runs successfully, you’ll see an output similar to the following (the IP addresses you will see will be different from the ones shown in this example). Note the port number (typically `8501`) from the output to use as part of the URL for app in the next step.
26 |    ```
27 |    sagemaker-user@studio$ streamlit run app.py
28 | 
29 |    Collecting usage statistics. To deactivate, set browser.gatherUsageStats to False.
30 | 
31 |    You can now view your Streamlit app in your browser.
32 | 
33 |    Network URL: http://169.255.255.2:8501
34 |    External URL: http://52.4.240.77:8501
35 |    ```
36 | 5. You can access the app in a new browser tab using a URL that is similar to your Studio domain URL. For example, if your Studio URL is `https://d-randomidentifier.studio.us-east-1.sagemaker.aws/jupyter/default/lab?` then the URL for your Streamlit app will be `https://d-randomidentifier.studio.us-east-1.sagemaker.aws/jupyter/default/proxy/8501/app` (notice that `lab` is replaced with `proxy/8501/app`). If the port number noted in the previous step is different from `8501` then use that instead of `8501` in the URL for the Streamlit app.
37 | 
38 |    The following screenshot shows the app with a couple of user questions. (e.g., `What are the versions of XGBoost supported by Amazon SageMaker?`)
39 | 
40 |    ![qa-with-llm-and-rag](./qa-with-llm-and-rag.png)
41 | 
42 | ## References
43 | 
44 |   * [Leverage pgvector and Amazon Aurora PostgreSQL for Natural Language Processing, Chatbots and Sentiment Analysis (2023-07-13)](https://aws.amazon.com/blogs/database/leverage-pgvector-and-amazon-aurora-postgresql-for-natural-language-processing-chatbots-and-sentiment-analysis/)
45 |   * [Building AI-powered search in PostgreSQL using Amazon SageMaker and pgvector (2023-05-03)](https://aws.amazon.com/blogs/database/building-ai-powered-search-in-postgresql-using-amazon-sagemaker-and-pgvector/)
46 |   * [Use proprietary foundation models from Amazon SageMaker JumpStart in Amazon SageMaker Studio (2023-06-27)](https://aws.amazon.com/blogs/machine-learning/use-proprietary-foundation-models-from-amazon-sagemaker-jumpstart-in-amazon-sagemaker-studio/)
47 |   * [Build Streamlit apps in Amazon SageMaker Studio (2023-04-11)](https://aws.amazon.com/blogs/machine-learning/build-streamlit-apps-in-amazon-sagemaker-studio/)
48 |   * [Quickly build high-accuracy Generative AI applications on enterprise data using Amazon Kendra, LangChain, and large language models (2023-05-02)](https://aws.amazon.com/blogs/machine-learning/quickly-build-high-accuracy-generative-ai-applications-on-enterprise-data-using-amazon-kendra-langchain-and-large-language-models/)
49 |   * [sagemaker-huggingface-inference-toolkit](https://github.com/aws/sagemaker-huggingface-inference-toolkit) - SageMaker Hugging Face Inference Toolkit is an open-source library for serving 🤗 Transformers and Diffusers models on Amazon SageMaker.
50 |   * [LangChain](https://python.langchain.com/docs/get_started/introduction.html) - A framework for developing applications powered by language models.
51 |   * [Streamlit](https://streamlit.io/) - A faster way to build and share data apps
52 | 


--------------------------------------------------------------------------------
/app/app.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- encoding: utf-8 -*-
  3 | # vim: tabstop=4 shiftwidth=4 softtabstop=4 expandtab
  4 | 
  5 | import os
  6 | import streamlit as st
  7 | import uuid
  8 | 
  9 | import pgvector_chat_flan_xl as flanxl
 10 | import pgvector_chat_llama2 as llama2
 11 | 
 12 | PROVIDER_NAME = os.environ.get('PROVIDER_NAME', 'llama2')
 13 | 
 14 | USER_ICON = "images/user-icon.png"
 15 | AI_ICON = "images/ai-icon.png"
 16 | MAX_HISTORY_LENGTH = 5
 17 | PROVIDER_MAP = {
 18 |     'flanxl': 'Flan XL',
 19 |     'llama2': 'Llama2 7B',
 20 | }
 21 | 
 22 | # Check if the user ID is already stored in the session state
 23 | if 'user_id' in st.session_state:
 24 |     user_id = st.session_state['user_id']
 25 | 
 26 | # If the user ID is not yet stored in the session state, generate a random UUID
 27 | else:
 28 |     user_id = str(uuid.uuid4())
 29 |     st.session_state['user_id'] = user_id
 30 | 
 31 | 
 32 | if 'llm_chain' not in st.session_state:
 33 |     llm_app = llama2 if PROVIDER_NAME == 'llama2' else flanxl
 34 |     st.session_state['llm_app'] = llm_app
 35 |     st.session_state['llm_chain'] = llm_app.build_chain()
 36 | 
 37 | if 'chat_history' not in st.session_state:
 38 |     st.session_state['chat_history'] = []
 39 | 
 40 | if "chats" not in st.session_state:
 41 |     st.session_state.chats = [
 42 |         {
 43 |             'id': 0,
 44 |             'question': '',
 45 |             'answer': ''
 46 |         }
 47 |     ]
 48 | 
 49 | if "questions" not in st.session_state:
 50 |     st.session_state.questions = []
 51 | 
 52 | if "answers" not in st.session_state:
 53 |     st.session_state.answers = []
 54 | 
 55 | if "input" not in st.session_state:
 56 |     st.session_state.input = ""
 57 | 
 58 | 
 59 | st.markdown("""
 60 |         <style>
 61 |                .block-container {
 62 |                     padding-top: 32px;
 63 |                     padding-bottom: 32px;
 64 |                     padding-left: 0;
 65 |                     padding-right: 0;
 66 |                 }
 67 |                 .element-container img {
 68 |                     background-color: #000000;
 69 |                 }
 70 | 
 71 |                 .main-header {
 72 |                     font-size: 24px;
 73 |                 }
 74 |         </style>
 75 |         """, unsafe_allow_html=True)
 76 | 
 77 | 
 78 | def write_logo():
 79 |     col1, col2, col3 = st.columns([5, 1, 5])
 80 |     with col2:
 81 |         st.image(AI_ICON, use_column_width='always')
 82 | 
 83 | 
 84 | def write_top_bar():
 85 |     col1, col2, col3 = st.columns([1,10,2])
 86 |     with col1:
 87 |         st.image(AI_ICON, use_column_width='always')
 88 |     with col2:
 89 |         selected_provider = PROVIDER_NAME
 90 |         if selected_provider in PROVIDER_MAP:
 91 |             provider = PROVIDER_MAP[selected_provider]
 92 |         else:
 93 |             provider = selected_provider.capitalize()
 94 |         header = f"An AI App powered by Amazon Aurora Postgresql with pgvector and {provider}!"
 95 |         st.write(f"<h3 class='main-header'>{header}</h3>", unsafe_allow_html=True)
 96 |     with col3:
 97 |         clear = st.button("Clear Chat")
 98 |     return clear
 99 | 
100 | 
101 | clear = write_top_bar()
102 | 
103 | if clear:
104 |     st.session_state.questions = []
105 |     st.session_state.answers = []
106 |     st.session_state.input = ""
107 |     st.session_state["chat_history"] = []
108 | 
109 | 
110 | def handle_input():
111 |     input = st.session_state.input
112 |     question_with_id = {
113 |         'question': input,
114 |         'id': len(st.session_state.questions)
115 |     }
116 |     st.session_state.questions.append(question_with_id)
117 | 
118 |     chat_history = st.session_state["chat_history"]
119 |     if len(chat_history) == MAX_HISTORY_LENGTH:
120 |         chat_history = chat_history[:-1]
121 | 
122 |     llm_chain = st.session_state['llm_chain']
123 |     chain = st.session_state['llm_app']
124 |     result = chain.run_chain(llm_chain, input, chat_history)
125 |     answer = result['answer']
126 |     chat_history.append((input, answer))
127 | 
128 |     document_list = []
129 |     if 'source_documents' in result:
130 |         for d in result['source_documents']:
131 |             if not (d.metadata['source'] in document_list):
132 |                 document_list.append((d.metadata['source']))
133 | 
134 |     st.session_state.answers.append({
135 |         'answer': result,
136 |         'sources': document_list,
137 |         'id': len(st.session_state.questions)
138 |     })
139 |     st.session_state.input = ""
140 | 
141 | 
142 | def write_user_message(md):
143 |     col1, col2 = st.columns([1,12])
144 | 
145 |     with col1:
146 |         st.image(USER_ICON, use_column_width='always')
147 |     with col2:
148 |         st.warning(md['question'])
149 | 
150 | 
151 | def render_result(result):
152 |     answer, sources = st.tabs(['Answer', 'Sources'])
153 |     with answer:
154 |         render_answer(result['answer'])
155 |     with sources:
156 |         if 'source_documents' in result:
157 |             render_sources(result['source_documents'])
158 |         else:
159 |             render_sources([])
160 | 
161 | 
162 | def render_answer(answer):
163 |     col1, col2 = st.columns([1,12])
164 |     with col1:
165 |         st.image(AI_ICON, use_column_width='always')
166 |     with col2:
167 |         st.info(answer['answer'])
168 | 
169 | 
170 | def render_sources(sources):
171 |     col1, col2 = st.columns([1,12])
172 |     with col2:
173 |         with st.expander("Sources"):
174 |             for s in sources:
175 |                 st.write(s)
176 | 
177 | 
178 | # Each answer will have context of the question asked in order to associate the provided feedback with the respective question
179 | def write_chat_message(md, q):
180 |     chat = st.container()
181 |     with chat:
182 |         render_answer(md['answer'])
183 |         render_sources(md['sources'])
184 | 
185 | 
186 | with st.container():
187 |   for (q, a) in zip(st.session_state.questions, st.session_state.answers):
188 |     write_user_message(q)
189 |     write_chat_message(a, q)
190 | 
191 | st.markdown('---')
192 | input = st.text_input("You are talking to an AI, ask any question.", key="input", on_change=handle_input)
193 | 


--------------------------------------------------------------------------------
/app/env_vars.sh:
--------------------------------------------------------------------------------
1 | export AWS_REGION="your-aws-region"
2 | export PGVECTOR_SECRET_ID="your-postgresql-secret"
3 | export COLLECTION_NAME="llm_rag_embeddings"
4 | export EMBEDDING_ENDPOINT_NAME="your-sagemaker-endpoint-for-embedding-model"
5 | export TEXT2TEXT_ENDPOINT_NAME="your-sagemaker-endpoint-for-text-generation-model"


--------------------------------------------------------------------------------
/app/images/ai-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/rag-with-amazon-postgresql-using-pgvector-and-sagemaker/1b5ca45eff14b162e8be28cb179338e1ad4d7bbd/app/images/ai-icon.png


--------------------------------------------------------------------------------
/app/images/user-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/rag-with-amazon-postgresql-using-pgvector-and-sagemaker/1b5ca45eff14b162e8be28cb179338e1ad4d7bbd/app/images/user-icon.png


--------------------------------------------------------------------------------
/app/pgvector_chat_flan_xl.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- encoding: utf-8 -*-
  3 | # vim: tabstop=4 shiftwidth=4 softtabstop=4 expandtab
  4 | 
  5 | import os
  6 | import json
  7 | import logging
  8 | import sys
  9 | from typing import List
 10 | import urllib
 11 | 
 12 | import boto3
 13 | 
 14 | from langchain_postgres import PGVector
 15 | from langchain_community.embeddings import SagemakerEndpointEmbeddings
 16 | from langchain_community.embeddings.sagemaker_endpoint import EmbeddingsContentHandler
 17 | 
 18 | from langchain_community.llms import SagemakerEndpoint
 19 | from langchain_community.llms.sagemaker_endpoint import LLMContentHandler
 20 | 
 21 | from langchain.prompts import PromptTemplate
 22 | from langchain.chains import ConversationalRetrievalChain
 23 | 
 24 | logger = logging.getLogger()
 25 | logging.basicConfig(format='%(asctime)s,%(module)s,%(processName)s,%(levelname)s,%(message)s', level=logging.INFO, stream=sys.stderr)
 26 | 
 27 | 
 28 | class bcolors:
 29 |     HEADER = '\033[95m'
 30 |     OKBLUE = '\033[94m'
 31 |     OKCYAN = '\033[96m'
 32 |     OKGREEN = '\033[92m'
 33 |     WARNING = '\033[93m'
 34 |     FAIL = '\033[91m'
 35 |     ENDC = '\033[0m'
 36 |     BOLD = '\033[1m'
 37 |     UNDERLINE = '\033[4m'
 38 | 
 39 | 
 40 | MAX_HISTORY_LENGTH = 5
 41 | 
 42 | 
 43 | def _create_sagemaker_embeddings(endpoint_name: str, region: str = "us-east-1") -> SagemakerEndpointEmbeddings:
 44 | 
 45 |     class ContentHandlerForEmbeddings(EmbeddingsContentHandler):
 46 |         """
 47 |         encode input string as utf-8 bytes, read the embeddings
 48 |         from the output
 49 |         """
 50 | 
 51 |         content_type = "application/json"
 52 |         accepts = "application/json"
 53 | 
 54 |         def transform_input(self, prompt: str, model_kwargs={}) -> bytes:
 55 |             input_str = json.dumps({"text_inputs": prompt, **model_kwargs})
 56 |             return input_str.encode('utf-8')
 57 | 
 58 |         def transform_output(self, output: bytes) -> str:
 59 |             response_json = json.loads(output.read().decode("utf-8"))
 60 |             embeddings = response_json["embedding"]
 61 |             if len(embeddings) == 1:
 62 |                 return [embeddings[0]]
 63 |             return embeddings
 64 | 
 65 |     # create a content handler object which knows how to serialize
 66 |     # and deserialize communication with the model endpoint
 67 |     content_handler = ContentHandlerForEmbeddings()
 68 | 
 69 |     # read to create the Sagemaker embeddings, we are providing
 70 |     # the Sagemaker endpoint that will be used for generating the
 71 |     # embeddings to the class
 72 |     #
 73 |     embeddings = SagemakerEndpointEmbeddings(
 74 |         endpoint_name=endpoint_name,
 75 |         region_name=region,
 76 |         content_handler=content_handler
 77 |     )
 78 |     logger.info(f"embeddings type={type(embeddings)}")
 79 | 
 80 |     return embeddings
 81 | 
 82 | 
 83 | def _get_credentials(secret_id: str, region_name: str = 'us-east-1') -> str:
 84 |     client = boto3.client('secretsmanager', region_name=region_name)
 85 |     response = client.get_secret_value(SecretId=secret_id)
 86 |     secrets_value = json.loads(response['SecretString'])
 87 |     return secrets_value
 88 | 
 89 | 
 90 | def build_chain():
 91 |     region = os.environ["AWS_REGION"]
 92 |     embeddings_model_endpoint = os.environ["EMBEDDING_ENDPOINT_NAME"]
 93 |     text2text_model_endpoint = os.environ["TEXT2TEXT_ENDPOINT_NAME"]
 94 | 
 95 |     pgvector_secret_id = os.environ["PGVECTOR_SECRET_ID"]
 96 |     secret = _get_credentials(pgvector_secret_id, region)
 97 |     db_username = secret['username']
 98 |     db_password = urllib.parse.quote_plus(secret['password'])
 99 |     db_port = secret['port']
100 |     db_host = secret['host']
101 | 
102 |     CONNECTION_STRING = PGVector.connection_string_from_db_params(
103 |         driver = 'psycopg',
104 |         user = db_username,
105 |         password = db_password,
106 |         host = db_host,
107 |         port = db_port,
108 |         database = ''
109 |     )
110 | 
111 |     collection_name = os.environ["COLLECTION_NAME"]
112 | 
113 |     class ContentHandler(LLMContentHandler):
114 |         content_type = "application/json"
115 |         accepts = "application/json"
116 | 
117 |         def transform_input(self, prompt: str, model_kwargs: dict) -> bytes:
118 |             input_str = json.dumps({"inputs": prompt, **model_kwargs})
119 |             return input_str.encode('utf-8')
120 | 
121 |         def transform_output(self, output: bytes) -> str:
122 |             response_json = json.loads(output.read().decode("utf-8"))
123 |             return response_json[0]["generated_text"]
124 | 
125 |     content_handler = ContentHandler()
126 | 
127 |     model_kwargs = {
128 |       "max_length": 500,
129 |       "num_return_sequences": 1,
130 |       "top_k": 250,
131 |       "top_p": 0.95,
132 |       "do_sample": False,
133 |       "temperature": 1
134 |     }
135 | 
136 |     llm = SagemakerEndpoint(
137 |         endpoint_name=text2text_model_endpoint,
138 |         region_name=region,
139 |         model_kwargs=model_kwargs,
140 |         content_handler=content_handler
141 |     )
142 | 
143 |     vectorstore = PGVector(
144 |       collection_name=collection_name,
145 |       connection=CONNECTION_STRING,
146 |       embeddings=_create_sagemaker_embeddings(embeddings_model_endpoint, region)
147 |     )
148 |     retriever = vectorstore.as_retriever()
149 | 
150 |     prompt_template = """Answer based on context:\n\n{context}\n\n{question}"""
151 | 
152 |     PROMPT = PromptTemplate(
153 |         template=prompt_template, input_variables=["context", "question"]
154 |     )
155 | 
156 |     condense_qa_template = """
157 |     Given the following conversation and a follow up question, rephrase the follow up question
158 |     to be a standalone question.
159 | 
160 |     Chat History:
161 |     {chat_history}
162 |     Follow Up Input: {question}
163 |     Standalone question:"""
164 |     standalone_question_prompt = PromptTemplate.from_template(condense_qa_template)
165 | 
166 |     qa = ConversationalRetrievalChain.from_llm(
167 |         llm=llm,
168 |         retriever=retriever,
169 |         condense_question_prompt=standalone_question_prompt,
170 |         return_source_documents=True,
171 |         combine_docs_chain_kwargs={"prompt":PROMPT}
172 |     )
173 | 
174 |     logger.info(f"\ntype('qa'): \"{type(qa)}\"\n")
175 |     return qa
176 | 
177 | 
178 | def run_chain(chain, prompt: str, history=[]):
179 |    return chain.invoke({"question": prompt, "chat_history": history})
180 | 
181 | 
182 | if __name__ == "__main__":
183 |     chat_history = []
184 |     qa = build_chain()
185 |     print(bcolors.OKBLUE + "Hello! How can I help you?" + bcolors.ENDC)
186 |     print(bcolors.OKCYAN + "Ask a question, start a New search: or CTRL-D to exit." + bcolors.ENDC)
187 |     print(">", end=" ", flush=True)
188 |     for query in sys.stdin:
189 |         if (query.strip().lower().startswith("new search:")):
190 |             query = query.strip().lower().replace("new search:","")
191 |             chat_history = []
192 |         elif (len(chat_history) == MAX_HISTORY_LENGTH):
193 |             chat_history.pop(0)
194 |         result = run_chain(qa, query, chat_history)
195 |         chat_history.append((query, result["answer"]))
196 |         print(bcolors.OKGREEN + result['answer'] + bcolors.ENDC)
197 |         if 'source_documents' in result:
198 |             print(bcolors.OKGREEN + 'Sources:')
199 |             for d in result['source_documents']:
200 |                 print(d.metadata['source'])
201 |         print(bcolors.ENDC)
202 |         print(bcolors.OKCYAN + "Ask a question, start a New search: or CTRL-D to exit." + bcolors.ENDC)
203 |         print(">", end=" ", flush=True)
204 |     print(bcolors.OKBLUE + "Bye" + bcolors.ENDC)


--------------------------------------------------------------------------------
/app/pgvector_chat_llama2.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- encoding: utf-8 -*-
  3 | # vim: tabstop=4 shiftwidth=4 softtabstop=4 expandtab
  4 | 
  5 | import os
  6 | import json
  7 | import logging
  8 | import sys
  9 | from typing import List
 10 | import urllib
 11 | 
 12 | import boto3
 13 | 
 14 | from langchain_postgres import PGVector
 15 | from langchain_community.embeddings import SagemakerEndpointEmbeddings
 16 | from langchain_community.embeddings.sagemaker_endpoint import EmbeddingsContentHandler
 17 | 
 18 | from langchain_community.llms import SagemakerEndpoint
 19 | from langchain_community.llms.sagemaker_endpoint import LLMContentHandler
 20 | 
 21 | from langchain.prompts import PromptTemplate
 22 | from langchain.chains import ConversationalRetrievalChain
 23 | 
 24 | logger = logging.getLogger()
 25 | logging.basicConfig(format='%(asctime)s,%(module)s,%(processName)s,%(levelname)s,%(message)s', level=logging.INFO, stream=sys.stderr)
 26 | 
 27 | 
 28 | class bcolors:
 29 |     HEADER = '\033[95m'
 30 |     OKBLUE = '\033[94m'
 31 |     OKCYAN = '\033[96m'
 32 |     OKGREEN = '\033[92m'
 33 |     WARNING = '\033[93m'
 34 |     FAIL = '\033[91m'
 35 |     ENDC = '\033[0m'
 36 |     BOLD = '\033[1m'
 37 |     UNDERLINE = '\033[4m'
 38 | 
 39 | 
 40 | MAX_HISTORY_LENGTH = 5
 41 | 
 42 | 
 43 | class SagemakerEndpointEmbeddingsJumpStart(SagemakerEndpointEmbeddings):
 44 |     def embed_documents(
 45 |         self, texts: List[str], chunk_size: int = 5
 46 |     ) -> List[List[float]]:
 47 |         """Compute doc embeddings using a SageMaker Inference Endpoint.
 48 | 
 49 |         Args:
 50 |             texts: The list of texts to embed.
 51 |             chunk_size: The chunk size defines how many input texts will
 52 |                 be grouped together as request. If None, will use the
 53 |                 chunk size specified by the class.
 54 | 
 55 |         Returns:
 56 |             List of embeddings, one for each text.
 57 |         """
 58 |         results = []
 59 | 
 60 |         _chunk_size = len(texts) if chunk_size > len(texts) else chunk_size
 61 |         for i in range(0, len(texts), _chunk_size):
 62 |             response = self._embedding_func(texts[i : i + _chunk_size])
 63 |             results.extend(response)
 64 |         return results
 65 | 
 66 | 
 67 | def _create_sagemaker_embeddings(endpoint_name: str, region: str = "us-east-1") -> SagemakerEndpointEmbeddingsJumpStart:
 68 | 
 69 |     class ContentHandlerForEmbeddings(EmbeddingsContentHandler):
 70 |         """
 71 |         encode input string as utf-8 bytes, read the embeddings
 72 |         from the output
 73 |         """
 74 |         content_type = "application/json"
 75 |         accepts = "application/json"
 76 |         def transform_input(self, prompt: str, model_kwargs = {}) -> bytes:
 77 |             input_str = json.dumps({"text_inputs": prompt, **model_kwargs})
 78 |             return input_str.encode('utf-8')
 79 | 
 80 |         def transform_output(self, output: bytes) -> str:
 81 |             response_json = json.loads(output.read().decode("utf-8"))
 82 |             embeddings = response_json["embedding"]
 83 |             if len(embeddings) == 1:
 84 |                 return [embeddings[0]]
 85 |             return embeddings
 86 | 
 87 |     # create a content handler object which knows how to serialize
 88 |     # and deserialize communication with the model endpoint
 89 |     content_handler = ContentHandlerForEmbeddings()
 90 | 
 91 |     # read to create the Sagemaker embeddings, we are providing
 92 |     # the Sagemaker endpoint that will be used for generating the
 93 |     # embeddings to the class
 94 |     embeddings = SagemakerEndpointEmbeddingsJumpStart(
 95 |         endpoint_name=endpoint_name,
 96 |         region_name=region,
 97 |         content_handler=content_handler
 98 |     )
 99 |     logger.info(f"embeddings type={type(embeddings)}")
100 | 
101 |     return embeddings
102 | 
103 | 
104 | def _get_credentials(secret_id: str, region_name: str) -> str:
105 |     client = boto3.client('secretsmanager', region_name=region_name)
106 |     response = client.get_secret_value(SecretId=secret_id)
107 |     secrets_value = json.loads(response['SecretString'])
108 |     return secrets_value
109 | 
110 | 
111 | def build_chain():
112 |     region = os.environ["AWS_REGION"]
113 |     embeddings_model_endpoint = os.environ["EMBEDDING_ENDPOINT_NAME"]
114 |     text2text_model_endpoint = os.environ["TEXT2TEXT_ENDPOINT_NAME"]
115 | 
116 |     pgvector_secret_id = os.environ["PGVECTOR_SECRET_ID"]
117 |     secret = _get_credentials(pgvector_secret_id, region)
118 |     db_username = secret['username']
119 |     db_password = urllib.parse.quote_plus(secret['password'])
120 |     db_port = secret['port']
121 |     db_host = secret['host']
122 | 
123 |     CONNECTION_STRING = PGVector.connection_string_from_db_params(
124 |         driver = 'psycopg',
125 |         user = db_username,
126 |         password = db_password,
127 |         host = db_host,
128 |         port = db_port,
129 |         database = ''
130 |     )
131 | 
132 |     collection_name = os.environ["COLLECTION_NAME"]
133 | 
134 |     # https://github.com/aws/amazon-sagemaker-examples/blob/main/introduction_to_amazon_algorithms/jumpstart-foundation-models/llama-2-chat-completion.ipynb
135 |     class ContentHandler(LLMContentHandler):
136 |         content_type = "application/json"
137 |         accepts = "application/json"
138 | 
139 |         def transform_input(self, prompt: str, model_kwargs: dict) -> bytes:
140 |             system_prompt = "You are a helpful assistant. Always answer to questions as helpfully as possible." \
141 |                             " If you don't know the answer to a question, say I don't know the answer"
142 | 
143 |             payload = {
144 |                 "inputs": [
145 |                     [
146 |                         {"role": "system", "content": system_prompt},
147 |                         {"role": "user", "content": prompt},
148 |                     ],
149 |                 ],
150 |                 "parameters": model_kwargs,
151 |             }
152 |             input_str = json.dumps(payload)
153 |             return input_str.encode("utf-8")
154 | 
155 |         def transform_output(self, output: bytes) -> str:
156 |             response_json = json.loads(output.read().decode("utf-8"))
157 |             content = response_json[0]["generation"]["content"]
158 |             return content
159 | 
160 |     content_handler = ContentHandler()
161 | 
162 |     # https://github.com/aws/amazon-sagemaker-examples/blob/main/introduction_to_amazon_algorithms/jumpstart-foundation-models/llama-2-text-completion.ipynb
163 |     model_kwargs = {
164 |         "max_new_tokens": 256,
165 |         "top_p": 0.9,
166 |         "temperature": 0.6,
167 |         "return_full_text": False,
168 |     }
169 | 
170 |     llm = SagemakerEndpoint(
171 |         endpoint_name=text2text_model_endpoint,
172 |         region_name=region,
173 |         model_kwargs=model_kwargs,
174 |         endpoint_kwargs={"CustomAttributes": "accept_eula=true"},
175 |         content_handler=content_handler
176 |     )
177 | 
178 |     vectorstore = PGVector(
179 |         collection_name=collection_name,
180 |         connection=CONNECTION_STRING,
181 |         embeddings=_create_sagemaker_embeddings(embeddings_model_endpoint, region)
182 |     )
183 |     retriever = vectorstore.as_retriever()
184 | 
185 |     prompt_template = """Answer based on context:\n\n{context}\n\n{question}"""
186 | 
187 |     PROMPT = PromptTemplate(
188 |         template=prompt_template, input_variables=["context", "question"]
189 |     )
190 | 
191 |     condense_qa_template = """
192 |     Given the following conversation and a follow up question, rephrase the follow up question
193 |     to be a standalone question.
194 | 
195 |     Chat History:
196 |     {chat_history}
197 |     Follow Up Input: {question}
198 |     Standalone question:"""
199 |     standalone_question_prompt = PromptTemplate.from_template(condense_qa_template)
200 | 
201 |     qa = ConversationalRetrievalChain.from_llm(
202 |         llm=llm,
203 |         retriever=retriever,
204 |         condense_question_prompt=standalone_question_prompt,
205 |         return_source_documents=True,
206 |         combine_docs_chain_kwargs={"prompt":PROMPT},
207 |         verbose=False
208 |     )
209 | 
210 |     logger.info(f"\ntype('qa'): \"{type(qa)}\"\n")
211 |     return qa
212 | 
213 | 
214 | def run_chain(chain, prompt: str, history=[]):
215 |    return chain.invoke({"question": prompt, "chat_history": history})
216 | 
217 | 
218 | if __name__ == "__main__":
219 |     chat_history = []
220 |     qa = build_chain()
221 |     print(bcolors.OKBLUE + "Hello! How can I help you?" + bcolors.ENDC)
222 |     print(bcolors.OKCYAN + "Ask a question, start a New search: or CTRL-D to exit." + bcolors.ENDC)
223 |     print(">", end=" ", flush=True)
224 |     for query in sys.stdin:
225 |         if (query.strip().lower().startswith("new search:")):
226 |             query = query.strip().lower().replace("new search:","")
227 |             chat_history = []
228 |         elif (len(chat_history) == MAX_HISTORY_LENGTH):
229 |             chat_history.pop(0)
230 |         result = run_chain(qa, query, chat_history)
231 |         chat_history.append((query, result["answer"]))
232 |         print(bcolors.OKGREEN + result['answer'] + bcolors.ENDC)
233 |         if 'source_documents' in result:
234 |             print(bcolors.OKGREEN + '\nSources:')
235 |             for d in result['source_documents']:
236 |                 print(d.metadata['source'])
237 |         print(bcolors.ENDC)
238 |         print(bcolors.OKCYAN + "Ask a question, start a New search: or CTRL-D to exit." + bcolors.ENDC)
239 |         print(">", end=" ", flush=True)
240 |     print(bcolors.OKBLUE + "Bye" + bcolors.ENDC)


--------------------------------------------------------------------------------
/app/qa-with-llm-and-rag.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/rag-with-amazon-postgresql-using-pgvector-and-sagemaker/1b5ca45eff14b162e8be28cb179338e1ad4d7bbd/app/qa-with-llm-and-rag.png


--------------------------------------------------------------------------------
/app/requirements.txt:
--------------------------------------------------------------------------------
1 | boto3>=1.26.159
2 | langchain>=0.3,<0.4
3 | langchain-community>=0.3,<0.4
4 | pgvector==0.2.5
5 | psycopg[binary]==3.1.19
6 | SQLAlchemy==2.0.28
7 | streamlit==1.37.0
8 | 


--------------------------------------------------------------------------------
/cdk_stacks/.gitignore:
--------------------------------------------------------------------------------
 1 | *.swp
 2 | package-lock.json
 3 | __pycache__
 4 | .pytest_cache
 5 | .venv
 6 | *.egg-info
 7 | 
 8 | # CDK asset staging directory
 9 | .cdk.staging
10 | cdk.out
11 | 


--------------------------------------------------------------------------------
/cdk_stacks/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # RAG Application CDK Python project!
  3 | 
  4 | ![rag_with_pgvector_arch](./rag_with_pgvector_arch.svg)
  5 | 
  6 | This is an QA application with LLMs and RAG project for CDK development with Python.
  7 | 
  8 | The `cdk.json` file tells the CDK Toolkit how to execute your app.
  9 | 
 10 | This project is set up like a standard Python project.  The initialization
 11 | process also creates a virtualenv within this project, stored under the `.venv`
 12 | directory.  To create the virtualenv it assumes that there is a `python3`
 13 | (or `python` for Windows) executable in your path with access to the `venv`
 14 | package. If for any reason the automatic creation of the virtualenv fails,
 15 | you can create the virtualenv manually.
 16 | 
 17 | To manually create a virtualenv on MacOS and Linux:
 18 | 
 19 | ```
 20 | $ python3 -m venv .venv
 21 | ```
 22 | 
 23 | After the init process completes and the virtualenv is created, you can use the following
 24 | step to activate your virtualenv.
 25 | 
 26 | ```
 27 | $ source .venv/bin/activate
 28 | ```
 29 | 
 30 | If you are a Windows platform, you would activate the virtualenv like this:
 31 | 
 32 | ```
 33 | % .venv\Scripts\activate.bat
 34 | ```
 35 | 
 36 | Once the virtualenv is activated, you can install the required dependencies.
 37 | 
 38 | ```
 39 | (.venv) $ pip install -r requirements.txt
 40 | ```
 41 | 
 42 | To add additional dependencies, for example other CDK libraries, just add
 43 | them to your `setup.py` file and rerun the `pip install -r requirements.txt`
 44 | command.
 45 | 
 46 | Before synthesizing the CloudFormation, you should set approperly the cdk context configuration file, `cdk.context.json`.
 47 | 
 48 | For example:
 49 | 
 50 | <pre>
 51 | {
 52 |   "db_cluster_name": "<i>postgresql-cluster-name</i>",
 53 |   "jumpstart_model_info": {
 54 |     "model_id": "huggingface-text2text-flan-t5-xl",
 55 |     "version": "2.1.0"
 56 |   },
 57 |   "sagemaker_studio_domain_name": "<i>sagmake-studio-domain-name</i>"
 58 | }
 59 | </pre>
 60 | 
 61 | :information_source: The `model_id`, and `version` provided by SageMaker JumpStart can be found in [**SageMaker Built-in Algorithms with pre-trained Model Table**](https://sagemaker.readthedocs.io/en/stable/doc_utils/pretrainedmodels.html).
 62 | 
 63 | > :warning: **Important**: Make sure you need to make sure `docker daemon` is running.<br/>
 64 | > Otherwise you will encounter the following errors:
 65 | 
 66 |   ```
 67 |   ERROR: Cannot connect to the Docker daemon at unix://$HOME/.docker/run/docker.sock. Is the docker daemon running?
 68 |   jsii.errors.JavaScriptError:
 69 |     Error: docker exited with status 1
 70 |   ```
 71 | 
 72 | Now this point you can now synthesize the CloudFormation template for this code.
 73 | 
 74 | ```
 75 | (.venv) $ export CDK_DEFAULT_ACCOUNT=$(aws sts get-caller-identity --query Account --output text)
 76 | (.venv) $ export CDK_DEFAULT_REGION=$(aws configure get region)
 77 | (.venv) $ cdk synth --all
 78 | ```
 79 | 
 80 | Now we will be able to deploy all the CDK stacks at once like this:
 81 | 
 82 | ```
 83 | (.venv) $ cdk deploy --require-approval never --all
 84 | ```
 85 | 
 86 | Or, we can provision each CDK stack one at a time like this:
 87 | 
 88 | #### Step 1: List all CDK Stacks
 89 | 
 90 | ```
 91 | (.venv) $ cdk list
 92 | RAGVpcStack
 93 | RAGSageMakerStudioStack
 94 | RAGPgVectorStack
 95 | EmbeddingEndpointStack
 96 | LLMEndpointStack
 97 | ```
 98 | 
 99 | #### Step 2: Create Aurora Postgresql cluster
100 | 
101 | ```
102 | (.venv) $ cdk deploy --require-approval never RAGVpcStack RAGPgVectorStack
103 | ```
104 | 
105 | #### Step 3: Create SageMaker Studio
106 | 
107 | ```
108 | (.venv) $ cdk deploy --require-approval never RAGSageMakerStudioStack
109 | ```
110 | 
111 | #### Step 4: Deploy LLM Embedding Endpoint
112 | 
113 | ```
114 | (.venv) $ cdk deploy --require-approval never EmbeddingEndpointStack
115 | ```
116 | 
117 | #### Step 5: Deploy Text Generation LLM Endpoint
118 | 
119 | ```
120 | (.venv) $ cdk deploy --require-approval never LLMEndpointStack
121 | ```
122 | 
123 | **Once all CDK stacks have been successfully created, proceed with the remaining steps of the [overall workflow](../README.md#overall-workflow).**
124 | 
125 | 
126 | ## Clean Up
127 | 
128 | Delete the CloudFormation stacks by running the below command.
129 | 
130 | ```
131 | (.venv) $ cdk destroy --all
132 | ```
133 | 
134 | ## Useful commands
135 | 
136 |  * `cdk ls`          list all stacks in the app
137 |  * `cdk synth`       emits the synthesized CloudFormation template
138 |  * `cdk deploy`      deploy this stack to your default AWS account/region
139 |  * `cdk diff`        compare deployed stack with current state
140 |  * `cdk docs`        open CDK documentation
141 | 
142 | Enjoy!
143 | 
144 | ## References
145 | 
146 |  * [Leverage pgvector and Amazon Aurora PostgreSQL for Natural Language Processing, Chatbots and Sentiment Analysis (2023-07-13)](https://aws.amazon.com/blogs/database/leverage-pgvector-and-amazon-aurora-postgresql-for-natural-language-processing-chatbots-and-sentiment-analysis/)
147 |  * [Building AI-powered search in PostgreSQL using Amazon SageMaker and pgvector (2023-05-02)](https://aws.amazon.com/blogs/database/building-ai-powered-search-in-postgresql-using-amazon-sagemaker-and-pgvector/)
148 |  * [Use proprietary foundation models from Amazon SageMaker JumpStart in Amazon SageMaker Studio (2023-06-27)](https://aws.amazon.com/blogs/machine-learning/use-proprietary-foundation-models-from-amazon-sagemaker-jumpstart-in-amazon-sagemaker-studio/)
149 |  * [SageMaker Built-in Algorithms with pre-trained Model Table](https://sagemaker.readthedocs.io/en/stable/doc_utils/pretrainedmodels.html)
150 |  * [AWS Deep Learning Containers Images](https://docs.aws.amazon.com/deep-learning-containers/latest/devguide/deep-learning-containers-images.html)
151 |  * [Securing Amazon SageMaker Studio connectivity using a private VPC (2020-10-22)](https://aws.amazon.com/blogs/machine-learning/securing-amazon-sagemaker-studio-connectivity-using-a-private-vpc/)
152 |  * [Connect SageMaker Studio Notebooks in a VPC to External Resources](https://docs.aws.amazon.com/sagemaker/latest/dg/studio-notebooks-and-internet-access.html)
153 |  * [Give SageMaker Processing Jobs Access to Resources in Your Amazon VPC](https://docs.aws.amazon.com/sagemaker/latest/dg/process-vpc.html)
154 |    * **Configure the VPC Security Group**
155 |      * In distributed processing, you must allow communication between the different containers in the same processing job. To do that, configure a rule for your security group that allows inbound connections between members of the same security group.
156 |  * [Using the Amazon SageMaker Studio Image Build CLI to build container images from your Studio notebooks (2020-09-14)](https://aws.amazon.com/blogs/machine-learning/using-the-amazon-sagemaker-studio-image-build-cli-to-build-container-images-from-your-studio-notebooks/)
157 |  * [How can I troubleshoot the InternalServerError response on Amazon SageMaker? - AWS re:Post](https://repost.aws/knowledge-center/sagemaker-http-500-internal-server-error)
158 | 


--------------------------------------------------------------------------------
/cdk_stacks/app.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- encoding: utf-8 -*-
 3 | # vim: tabstop=2 shiftwidth=2 softtabstop=2 expandtab
 4 | 
 5 | import os
 6 | 
 7 | import aws_cdk as cdk
 8 | 
 9 | from rag_with_pgvector import (
10 |   VpcStack,
11 |   AuroraPostgresqlStack,
12 |   SageMakerStudioStack,
13 |   EmbeddingEndpointStack,
14 |   LLMEndpointStack
15 | )
16 | 
17 | APP_ENV = cdk.Environment(
18 |   account=os.environ["CDK_DEFAULT_ACCOUNT"],
19 |   region=os.environ["CDK_DEFAULT_REGION"]
20 | )
21 | 
22 | app = cdk.App()
23 | 
24 | vpc_stack = VpcStack(app, 'RAGVpcStack',
25 |   env=APP_ENV)
26 | 
27 | aurora_pgsql_stack = AuroraPostgresqlStack(app, 'RAGPgVectorStack',
28 |   vpc_stack.vpc,
29 |   env=APP_ENV
30 | )
31 | aurora_pgsql_stack.add_dependency(vpc_stack)
32 | 
33 | sm_studio_stack = SageMakerStudioStack(app, 'RAGSageMakerStudioStack',
34 |   vpc_stack.vpc,
35 |   aurora_pgsql_stack.sg_rds_client,
36 |   env=APP_ENV
37 | )
38 | sm_studio_stack.add_dependency(aurora_pgsql_stack)
39 | 
40 | sm_embedding_endpoint = EmbeddingEndpointStack(app, 'EmbeddingEndpointStack',
41 |   env=APP_ENV
42 | )
43 | sm_embedding_endpoint.add_dependency(sm_studio_stack)
44 | 
45 | sm_llm_endpoint = LLMEndpointStack(app, 'LLMEndpointStack',
46 |   env=APP_ENV
47 | )
48 | sm_llm_endpoint.add_dependency(sm_studio_stack)
49 | 
50 | app.synth()
51 | 


--------------------------------------------------------------------------------
/cdk_stacks/cdk.context.json:
--------------------------------------------------------------------------------
1 | {
2 |   "db_cluster_name": "rag-pgvector-demo",
3 |   "jumpstart_model_info": {
4 |     "model_id": "meta-textgeneration-llama-2-7b-f",
5 |     "version": "2.0.1"
6 |   },
7 |   "sagemaker_studio_domain_name": "llm-app-rag-pgvector"
8 | }
9 | 


--------------------------------------------------------------------------------
/cdk_stacks/cdk.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "app": "python3 app.py",
 3 |   "watch": {
 4 |     "include": [
 5 |       "**"
 6 |     ],
 7 |     "exclude": [
 8 |       "README.md",
 9 |       "cdk*.json",
10 |       "requirements*.txt",
11 |       "source.bat",
12 |       "**/__init__.py",
13 |       "python/__pycache__",
14 |       "tests"
15 |     ]
16 |   },
17 |   "context": {
18 |     "@aws-cdk/aws-lambda:recognizeLayerVersion": true,
19 |     "@aws-cdk/core:checkSecretUsage": true,
20 |     "@aws-cdk/core:target-partitions": [
21 |       "aws",
22 |       "aws-cn"
23 |     ],
24 |     "@aws-cdk-containers/ecs-service-extensions:enableDefaultLogDriver": true,
25 |     "@aws-cdk/aws-ec2:uniqueImdsv2TemplateName": true,
26 |     "@aws-cdk/aws-ecs:arnFormatIncludesClusterName": true,
27 |     "@aws-cdk/aws-iam:minimizePolicies": true,
28 |     "@aws-cdk/core:validateSnapshotRemovalPolicy": true,
29 |     "@aws-cdk/aws-codepipeline:crossAccountKeyAliasStackSafeResourceName": true,
30 |     "@aws-cdk/aws-s3:createDefaultLoggingPolicy": true,
31 |     "@aws-cdk/aws-sns-subscriptions:restrictSqsDescryption": true,
32 |     "@aws-cdk/aws-apigateway:disableCloudWatchRole": true,
33 |     "@aws-cdk/core:enablePartitionLiterals": true,
34 |     "@aws-cdk/aws-events:eventsTargetQueueSameAccount": true,
35 |     "@aws-cdk/aws-iam:standardizedServicePrincipals": true,
36 |     "@aws-cdk/aws-ecs:disableExplicitDeploymentControllerForCircuitBreaker": true,
37 |     "@aws-cdk/aws-iam:importedRoleStackSafeDefaultPolicyName": true,
38 |     "@aws-cdk/aws-s3:serverAccessLogsUseBucketPolicy": true,
39 |     "@aws-cdk/aws-route53-patters:useCertificate": true,
40 |     "@aws-cdk/customresources:installLatestAwsSdkDefault": false,
41 |     "@aws-cdk/aws-rds:databaseProxyUniqueResourceName": true,
42 |     "@aws-cdk/aws-codedeploy:removeAlarmsFromDeploymentGroup": true,
43 |     "@aws-cdk/aws-apigateway:authorizerChangeDeploymentLogicalId": true,
44 |     "@aws-cdk/aws-ec2:launchTemplateDefaultUserData": true,
45 |     "@aws-cdk/aws-secretsmanager:useAttachedSecretResourcePolicyForSecretTargetAttachments": true,
46 |     "@aws-cdk/aws-redshift:columnId": true,
47 |     "@aws-cdk/aws-stepfunctions-tasks:enableEmrServicePolicyV2": true,
48 |     "@aws-cdk/aws-ec2:restrictDefaultSecurityGroup": true,
49 |     "@aws-cdk/aws-apigateway:requestValidatorUniqueId": true,
50 |     "@aws-cdk/aws-kms:aliasNameRef": true,
51 |     "@aws-cdk/core:includePrefixInUniqueNameGeneration": true
52 |   }
53 | }
54 | 


--------------------------------------------------------------------------------
/cdk_stacks/rag_with_pgvector/__init__.py:
--------------------------------------------------------------------------------
1 | from .vpc import VpcStack
2 | from .aurora_postgresql import AuroraPostgresqlStack
3 | from .sm_studio import SageMakerStudioStack
4 | from .sm_embedding_endpoint import EmbeddingEndpointStack
5 | from .sm_llm_endpoint import LLMEndpointStack


--------------------------------------------------------------------------------
/cdk_stacks/rag_with_pgvector/aurora_postgresql.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- encoding: utf-8 -*-
  3 | # vim: tabstop=2 shiftwidth=2 softtabstop=2 expandtab
  4 | 
  5 | import os
  6 | 
  7 | import aws_cdk as cdk
  8 | from aws_cdk import (
  9 |   Stack,
 10 |   aws_ec2,
 11 |   aws_logs,
 12 |   aws_rds
 13 | )
 14 | from constructs import Construct
 15 | 
 16 | class AuroraPostgresqlStack(Stack):
 17 | 
 18 |   def __init__(self, scope: Construct, construct_id: str, vpc, **kwargs) -> None:
 19 |     super().__init__(scope, construct_id, **kwargs)
 20 | 
 21 |     sg_postgresql_client = aws_ec2.SecurityGroup(self, 'PostgreSQLClientSG',
 22 |       vpc=vpc,
 23 |       allow_all_outbound=True,
 24 |       description='security group for postgresql client',
 25 |       security_group_name='postgresql-client-sg'
 26 |     )
 27 |     cdk.Tags.of(sg_postgresql_client).add('Name', 'postgresql-client-sg')
 28 | 
 29 |     sg_postgresql_server = aws_ec2.SecurityGroup(self, 'PostgreSQLServerSG',
 30 |       vpc=vpc,
 31 |       allow_all_outbound=True,
 32 |       description='security group for postgresql',
 33 |       security_group_name='postgresql-server-sg'
 34 |     )
 35 |     sg_postgresql_server.add_ingress_rule(peer=sg_postgresql_server, connection=aws_ec2.Port.all_tcp(),
 36 |       description='postgresql-server-sg')
 37 |     sg_postgresql_server.add_ingress_rule(peer=sg_postgresql_client, connection=aws_ec2.Port.tcp(5432),
 38 |       description='postgresql-client-sg')
 39 |     cdk.Tags.of(sg_postgresql_server).add('Name', 'postgresql-server-sg')
 40 | 
 41 |     rds_subnet_group = aws_rds.SubnetGroup(self, 'PostgreSQLSubnetGroup',
 42 |       description='subnet group for postgresql',
 43 |       subnet_group_name=f'{self.stack_name}-aurora-postgresql',
 44 |       vpc_subnets=aws_ec2.SubnetSelection(subnet_type=aws_ec2.SubnetType.PRIVATE_WITH_EGRESS),
 45 |       vpc=vpc
 46 |     )
 47 | 
 48 |     db_cluster_name = self.node.try_get_context('db_cluster_name')
 49 |     rds_credentials = aws_rds.Credentials.from_generated_secret("postgres")
 50 | 
 51 |     AURORA_POSTGRES_ENGINE_VERSION = aws_rds.AuroraPostgresEngineVersion.VER_15_3
 52 |     rds_engine = aws_rds.DatabaseClusterEngine.aurora_postgres(version=AURORA_POSTGRES_ENGINE_VERSION)
 53 | 
 54 |     #XXX: https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/AuroraPostgreSQL.Reference.ParameterGroups.html#AuroraPostgreSQL.Reference.Parameters.Cluster
 55 |     rds_cluster_param_group = aws_rds.ParameterGroup(self, 'AuroraPostgreSQLClusterParamGroup',
 56 |       engine=rds_engine,
 57 |       description=f'Custom cluster parameter group for aurora-postgresql{AURORA_POSTGRES_ENGINE_VERSION.aurora_postgres_major_version}',
 58 |       parameters={
 59 |         'log_min_duration_statement': '15000', # 15 sec
 60 |         'default_transaction_isolation': 'read committed',
 61 |         'client_encoding': 'UTF8',
 62 |         'rds.allowed_extensions': '*',
 63 |         'shared_preload_libraries': 'pg_stat_statements,pg_similarity'
 64 |       }
 65 |     )
 66 | 
 67 |     #XXX: https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/AuroraPostgreSQL.Reference.ParameterGroups.html#AuroraPostgreSQL.Reference.Parameters.Instance
 68 |     rds_db_param_group = aws_rds.ParameterGroup(self, 'AuroraPostgreSQLDBParamGroup',
 69 |       engine=rds_engine,
 70 |       description=f'Custom parameter group for aurora-postgresql{AURORA_POSTGRES_ENGINE_VERSION.aurora_postgres_major_version}',
 71 |       parameters={
 72 |         'log_min_duration_statement': '15000', # 15 sec
 73 |         'default_transaction_isolation': 'read committed',
 74 |         'rds.allowed_extensions': '*',
 75 |         'shared_preload_libraries': 'pg_stat_statements,pg_similarity'
 76 |       }
 77 |     )
 78 | 
 79 |     db_cluster = aws_rds.DatabaseCluster(self, 'AuroraPostgresDBCluster',
 80 |       engine=rds_engine,
 81 |       credentials=rds_credentials, # A username of 'admin' (or 'postgres' for PostgreSQL) and SecretsManager-generated password
 82 |       writer=aws_rds.ClusterInstance.provisioned("Writer",
 83 |         instance_type=aws_ec2.InstanceType.of(aws_ec2.InstanceClass.MEMORY6_GRAVITON, aws_ec2.InstanceSize.LARGE),
 84 |         parameter_group=rds_db_param_group,
 85 |         auto_minor_version_upgrade=False,
 86 |       ),
 87 |       readers=[
 88 |         aws_rds.ClusterInstance.provisioned("Reader",
 89 |           instance_type=aws_ec2.InstanceType.of(aws_ec2.InstanceClass.MEMORY6_GRAVITON, aws_ec2.InstanceSize.LARGE),
 90 |           parameter_group=rds_db_param_group,
 91 |           auto_minor_version_upgrade=False
 92 |         )
 93 |       ],
 94 |       parameter_group=rds_cluster_param_group,
 95 |       cloudwatch_logs_retention=aws_logs.RetentionDays.THREE_DAYS,
 96 |       cluster_identifier=db_cluster_name,
 97 |       subnet_group=rds_subnet_group,
 98 |       backup=aws_rds.BackupProps(
 99 |         retention=cdk.Duration.days(3),
100 |         preferred_window="03:00-04:00"
101 |       ),
102 |       security_groups=[sg_postgresql_server],
103 |       vpc=vpc,
104 |       vpc_subnets=aws_ec2.SubnetSelection(subnet_type=aws_ec2.SubnetType.PRIVATE_WITH_EGRESS)
105 |     )
106 |     db_cluster.apply_removal_policy(cdk.RemovalPolicy.DESTROY) #XXX: For testing
107 | 
108 |     self.rds_credentials = db_cluster.secret
109 |     self.sg_rds_client = sg_postgresql_client
110 | 
111 | 
112 |     cdk.CfnOutput(self, 'DBClusterId', value=db_cluster.cluster_identifier, export_name='VectorDBClusterId')
113 |     cdk.CfnOutput(self, 'DBClusterEndpoint', value=db_cluster.cluster_endpoint.socket_address, export_name='VectorDBClusterEndpoint')
114 |     cdk.CfnOutput(self, 'DBClusterReadEndpoint', value=db_cluster.cluster_read_endpoint.socket_address, export_name='VectorDBClusterReadEndpoint')
115 |     #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_secretsmanager/README.html
116 |     # secret_arn="arn:aws:secretsmanager:<region>:<account-id-number>:secret:<secret-name>-<random-6-characters>"
117 |     cdk.CfnOutput(self, 'DBSecret', value=db_cluster.secret.secret_name, export_name='VectorDBSecret')
118 |     cdk.CfnOutput(self, 'DBClientSecurityGroupId', value=sg_postgresql_client.security_group_id, export_name='VectorDBClientSecurityGroupId')
119 | 
120 | 


--------------------------------------------------------------------------------
/cdk_stacks/rag_with_pgvector/sm_embedding_endpoint.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- encoding: utf-8 -*-
 3 | # vim: tabstop=2 shiftwidth=2 softtabstop=2 expandtab
 4 | 
 5 | import random
 6 | import string
 7 | 
 8 | import aws_cdk as cdk
 9 | 
10 | from aws_cdk import (
11 |   Stack
12 | )
13 | from constructs import Construct
14 | 
15 | from cdklabs.generative_ai_cdk_constructs import (
16 |   CustomSageMakerEndpoint,
17 |   DeepLearningContainerImage,
18 |   SageMakerInstanceType,
19 | )
20 | 
21 | random.seed(47)
22 | 
23 | 
24 | class EmbeddingEndpointStack(Stack):
25 | 
26 |   def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None:
27 |     super().__init__(scope, construct_id, **kwargs)
28 | 
29 |     bucket_name = f'jumpstart-cache-prod-{cdk.Aws.REGION}'
30 |     key_name = 'huggingface-infer/prepack/v1.0.0/infer-prepack-huggingface-textembedding-gpt-j-6b-fp16.tar.gz'
31 | 
32 |     RANDOM_GUID = ''.join(random.sample(string.digits, k=7))
33 |     endpoint_name = f"gpt-j-6b-fp16-endpoint-{RANDOM_GUID}"
34 | 
35 |     #XXX: https://github.com/awslabs/generative-ai-cdk-constructs/blob/main/src/patterns/gen-ai/aws-model-deployment-sagemaker/README_custom_sagemaker_endpoint.md
36 |     self.embedding_endpoint = CustomSageMakerEndpoint(self, 'EmbeddingEndpoint',
37 |       model_id='gpt-j-6b-fp16',
38 |       instance_type=SageMakerInstanceType.ML_G5_2_XLARGE,
39 |       container=DeepLearningContainerImage.from_deep_learning_container_image(
40 |         'pytorch-inference',
41 |         '1.12.0-gpu-py38'
42 |       ),
43 |       model_data_url=f's3://{bucket_name}/{key_name}',
44 |       endpoint_name=endpoint_name,
45 |       instance_count=1,
46 |       # volume_size_in_gb=100
47 |     )
48 | 
49 |     cdk.CfnOutput(self, 'EmbeddingEndpointName',
50 |       value=self.embedding_endpoint.cfn_endpoint.endpoint_name,
51 |       export_name=f'{self.stack_name}-EmbeddingEndpointName')
52 |     cdk.CfnOutput(self, 'EmbeddingEndpointArn',
53 |       value=self.embedding_endpoint.endpoint_arn,
54 |       export_name=f'{self.stack_name}-EmbeddingEndpointArn')
55 | 


--------------------------------------------------------------------------------
/cdk_stacks/rag_with_pgvector/sm_llm_endpoint.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- encoding: utf-8 -*-
 3 | # vim: tabstop=2 shiftwidth=2 softtabstop=2 expandtab
 4 | 
 5 | import random
 6 | import string
 7 | 
 8 | import aws_cdk as cdk
 9 | 
10 | from aws_cdk import (
11 |   Stack
12 | )
13 | from constructs import Construct
14 | 
15 | from cdklabs.generative_ai_cdk_constructs import (
16 |   JumpStartSageMakerEndpoint,
17 |   JumpStartModel,
18 |   SageMakerInstanceType
19 | )
20 | 
21 | random.seed(47)
22 | 
23 | 
24 | def name_from_base(base, max_length=63):
25 |   unique = ''.join(random.sample(string.digits, k=7))
26 |   max_length = 63
27 |   trimmed_base = base[: max_length - len(unique) - 1]
28 |   return "{}-{}".format(trimmed_base, unique)
29 | 
30 | 
31 | class LLMEndpointStack(Stack):
32 | 
33 |   def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None:
34 |     super().__init__(scope, construct_id, **kwargs)
35 | 
36 |     jumpstart_model = self.node.try_get_context('jumpstart_model_info')
37 |     model_id, model_version = jumpstart_model.get('model_id', 'meta-textgeneration-llama-2-7b-f'), jumpstart_model.get('version', '2.0.1')
38 |     model_name = f"{model_id.upper().replace('-', '_')}_{model_version.replace('.', '_')}"
39 | 
40 |     llm_endpoint_name = name_from_base(model_id.replace('/', '-').replace('.', '-'))
41 | 
42 |     #XXX: Available JumStart Model List
43 |     # https://github.com/awslabs/generative-ai-cdk-constructs/blob/main/src/patterns/gen-ai/aws-model-deployment-sagemaker/jumpstart-model.ts
44 |     llm_endpoint = JumpStartSageMakerEndpoint(self, 'LLMEndpoint',
45 |       model=JumpStartModel.of(model_name),
46 |       accept_eula=True,
47 |       instance_type=SageMakerInstanceType.ML_G5_2_XLARGE,
48 |       endpoint_name=llm_endpoint_name
49 |     )
50 | 
51 |     cdk.CfnOutput(self, 'LLMEndpointName',
52 |       value=llm_endpoint.cfn_endpoint.endpoint_name,
53 |       export_name=f'{self.stack_name}-LLMEndpointName')
54 |     cdk.CfnOutput(self, 'LLMEndpointArn',
55 |       value=llm_endpoint.endpoint_arn,
56 |       export_name=f'{self.stack_name}-LLMEndpointArn')
57 | 


--------------------------------------------------------------------------------
/cdk_stacks/rag_with_pgvector/sm_studio.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- encoding: utf-8 -*-
  3 | # vim: tabstop=2 shiftwidth=2 softtabstop=2 expandtab
  4 | 
  5 | import random
  6 | import string
  7 | 
  8 | import aws_cdk as cdk
  9 | 
 10 | from aws_cdk import (
 11 |   Stack,
 12 |   aws_ec2,
 13 |   aws_iam,
 14 |   aws_sagemaker
 15 | )
 16 | from constructs import Construct
 17 | 
 18 | random.seed(47)
 19 | 
 20 | class SageMakerStudioStack(Stack):
 21 | 
 22 |   def __init__(self, scope: Construct, construct_id: str, vpc, sg_rds_client, **kwargs) -> None:
 23 |     super().__init__(scope, construct_id, **kwargs)
 24 | 
 25 |     sagemaker_execution_policy_doc = aws_iam.PolicyDocument()
 26 |     sagemaker_execution_policy_doc.add_statements(aws_iam.PolicyStatement(**{
 27 |       "effect": aws_iam.Effect.ALLOW,
 28 |       "resources": ["arn:aws:s3:::*"],
 29 |       "actions": [
 30 |         "s3:GetObject",
 31 |         "s3:PutObject",
 32 |         "s3:DeleteObject",
 33 |         "s3:ListBucket"
 34 |       ]
 35 |     }))
 36 | 
 37 |     sagemaker_custom_access_policy_doc = aws_iam.PolicyDocument()
 38 |     sagemaker_custom_access_policy_doc.add_statements(aws_iam.PolicyStatement(**{
 39 |       "effect": aws_iam.Effect.ALLOW,
 40 |       "resources": [f"arn:aws:secretsmanager:{cdk.Aws.REGION}:{cdk.Aws.ACCOUNT_ID}:secret:*"],
 41 |       "actions": ["secretsmanager:GetSecretValue"]
 42 |     }))
 43 | 
 44 |     sagemaker_docker_build_policy_doc = aws_iam.PolicyDocument()
 45 |     sagemaker_docker_build_policy_doc.add_statements(aws_iam.PolicyStatement(**{
 46 |       "effect": aws_iam.Effect.ALLOW,
 47 |       "resources": ["*"],
 48 |       "actions": ["ecr:GetAuthorizationToken"]
 49 |     }))
 50 | 
 51 |     sagemaker_docker_build_policy_doc.add_statements(aws_iam.PolicyStatement(**{
 52 |       "effect": aws_iam.Effect.ALLOW,
 53 |       "resources": ["*"],
 54 |       "actions": [
 55 |         "ecr:BatchGetImage",
 56 |         "ecr:BatchCheckLayerAvailability",
 57 |         "ecr:CompleteLayerUpload",
 58 |         "ecr:DescribeImages",
 59 |         "ecr:DescribeRepositories",
 60 |         "ecr:GetDownloadUrlForLayer",
 61 |         "ecr:InitiateLayerUpload",
 62 |         "ecr:ListImages",
 63 |         "ecr:PutImage",
 64 |         "ecr:UploadLayerPart",
 65 |         "ecr:CreateRepository",
 66 |         "ecr:GetAuthorizationToken",
 67 |         "ec2:DescribeAvailabilityZones"
 68 |       ]
 69 |     }))
 70 | 
 71 |     sagemaker_docker_build_policy_doc.add_statements(aws_iam.PolicyStatement(**{
 72 |       "effect": aws_iam.Effect.ALLOW,
 73 |       "resources": ["arn:aws:codebuild:*:*:project/sagemaker-studio*"],
 74 |       "actions": [
 75 |         "codebuild:DeleteProject",
 76 |         "codebuild:CreateProject",
 77 |         "codebuild:BatchGetBuilds",
 78 |         "codebuild:StartBuild"
 79 |       ]
 80 |     }))
 81 | 
 82 |     sagemaker_docker_build_policy_doc.add_statements(aws_iam.PolicyStatement(**{
 83 |       "effect": aws_iam.Effect.ALLOW,
 84 |       "resources": ["arn:aws:logs:*:*:log-group:/aws/codebuild/sagemaker-studio*"],
 85 |       "actions": ["logs:CreateLogStream"],
 86 |     }))
 87 | 
 88 |     sagemaker_docker_build_policy_doc.add_statements(aws_iam.PolicyStatement(**{
 89 |       "effect": aws_iam.Effect.ALLOW,
 90 |       "resources": ["arn:aws:logs:*:*:log-group:/aws/codebuild/sagemaker-studio*:log-stream:*"],
 91 |       "actions": [
 92 |         "logs:GetLogEvents",
 93 |         "logs:PutLogEvents"
 94 |       ]
 95 |     }))
 96 | 
 97 |     sagemaker_docker_build_policy_doc.add_statements(aws_iam.PolicyStatement(**{
 98 |       "effect": aws_iam.Effect.ALLOW,
 99 |       "resources": ["*"],
100 |       "actions": ["logs:CreateLogGroup"]
101 |     }))
102 | 
103 |     sagemaker_docker_build_policy_doc.add_statements(aws_iam.PolicyStatement(**{
104 |       "effect": aws_iam.Effect.ALLOW,
105 |       "resources": ["arn:aws:s3:::sagemaker-*/*"],
106 |       "actions": [
107 |         "s3:GetObject",
108 |         "s3:DeleteObject",
109 |         "s3:PutObject"
110 |       ]
111 |     }))
112 | 
113 |     sagemaker_docker_build_policy_doc.add_statements(aws_iam.PolicyStatement(**{
114 |       "effect": aws_iam.Effect.ALLOW,
115 |       "resources": ["arn:aws:s3:::sagemaker*"],
116 |       "actions": ["s3:CreateBucket"],
117 |     }))
118 | 
119 |     sagemaker_docker_build_policy_doc.add_statements(aws_iam.PolicyStatement(**{
120 |       "effect": aws_iam.Effect.ALLOW,
121 |       "resources": ["*"],
122 |       "actions": [
123 |         "iam:GetRole",
124 |         "iam:ListRoles"
125 |       ]
126 |     }))
127 | 
128 |     sagemaker_docker_build_policy_doc.add_statements(aws_iam.PolicyStatement(**{
129 |       "effect": aws_iam.Effect.ALLOW,
130 |       "resources": ["arn:aws:iam::*:role/*"],
131 |       "conditions": {
132 |         "StringLikeIfExists": {
133 |           "iam:PassedToService": [
134 |             "codebuild.amazonaws.com"
135 |           ]
136 |         }
137 |       },
138 |       "actions": ["iam:PassRole"]
139 |     }))
140 | 
141 |     sagemaker_execution_role = aws_iam.Role(self, 'SageMakerExecutionRole',
142 |       role_name='AmazonSageMakerStudioExecutionRole-{suffix}'.format(suffix=''.join(random.choices((string.digits), k=5))),
143 |       assumed_by=aws_iam.ServicePrincipal('sagemaker.amazonaws.com'),
144 |       path='/',
145 |       inline_policies={
146 |         'sagemaker-execution-policy': sagemaker_execution_policy_doc,
147 |         'sagemaker-custom-access-policy': sagemaker_custom_access_policy_doc,
148 |         'sagemaker-docker-build-policy': sagemaker_docker_build_policy_doc,
149 |       },
150 |       managed_policies=[
151 |         aws_iam.ManagedPolicy.from_aws_managed_policy_name('AmazonSageMakerFullAccess'),
152 |         aws_iam.ManagedPolicy.from_aws_managed_policy_name('AmazonSageMakerCanvasFullAccess'),
153 |         aws_iam.ManagedPolicy.from_aws_managed_policy_name('AWSCloudFormationReadOnlyAccess'),
154 |         # aws_iam.ManagedPolicy.from_aws_managed_policy_name('AmazonVPCReadOnlyAccess'),
155 |         aws_iam.ManagedPolicy.from_aws_managed_policy_name('AmazonRDSReadOnlyAccess'),
156 |       ]
157 |     )
158 | 
159 |     #XXX: To use the sm-docker CLI, the Amazon SageMaker execution role used by the Studio notebook
160 |     # environment should have a trust policy with CodeBuild
161 |     sagemaker_execution_role.assume_role_policy.add_statements(aws_iam.PolicyStatement(**{
162 |       "effect": aws_iam.Effect.ALLOW,
163 |       "principals": [aws_iam.ServicePrincipal('codebuild.amazonaws.com')],
164 |       "actions": ["sts:AssumeRole"]
165 |     }))
166 | 
167 |     sm_studio_user_settings = aws_sagemaker.CfnDomain.UserSettingsProperty(
168 |       execution_role=sagemaker_execution_role.role_arn
169 |     )
170 | 
171 |     sg_sagemaker_domain = aws_ec2.SecurityGroup(self, 'SageMakerDomainSG',
172 |       vpc=vpc,
173 |       allow_all_outbound=True,
174 |       description='security group for sagmaker studio domain',
175 |       security_group_name='sagemaker-domain-sg'
176 |     )
177 |     sg_sagemaker_domain.add_ingress_rule(peer=sg_sagemaker_domain, connection=aws_ec2.Port.all_tcp(),
178 |       description='All traffic within the sagemaker domain security group')
179 |     sg_sagemaker_domain.add_ingress_rule(peer=aws_ec2.Peer.ipv4("0.0.0.0/0"), connection=aws_ec2.Port.tcp(443),
180 |       description='https')
181 |     cdk.Tags.of(sg_sagemaker_domain).add('Name', 'sagemaker-domain-sg')
182 | 
183 |     sm_studio_domain_name = self.node.try_get_context('sagemaker_studio_domain_name') or 'llm-app-rag-pgvector'
184 | 
185 |     sagemaker_studio_domain = aws_sagemaker.CfnDomain(self, 'SageMakerStudioDomain',
186 |       auth_mode='IAM', # [SSO | IAM]
187 |       default_user_settings=sm_studio_user_settings,
188 |       domain_name=sm_studio_domain_name,
189 |       subnet_ids=vpc.select_subnets(subnet_type=aws_ec2.SubnetType.PRIVATE_WITH_EGRESS).subnet_ids,
190 |       vpc_id=vpc.vpc_id,
191 |       app_network_access_type='VpcOnly', # [PublicInternetOnly | VpcOnly]
192 |       domain_settings=aws_sagemaker.CfnDomain.DomainSettingsProperty(
193 |         security_group_ids=[sg_sagemaker_domain.security_group_id]
194 |       )
195 |     )
196 | 
197 |     #XXX: https://docs.aws.amazon.com/sagemaker/latest/dg/studio-jl.html#studio-jl-set
198 |     sagmaker_jupyerlab_arn = self.node.try_get_context('sagmaker_jupyterlab_arn')
199 | 
200 |     default_user_settings = aws_sagemaker.CfnUserProfile.UserSettingsProperty(
201 |       jupyter_server_app_settings=aws_sagemaker.CfnUserProfile.JupyterServerAppSettingsProperty(
202 |         default_resource_spec=aws_sagemaker.CfnUserProfile.ResourceSpecProperty(
203 |           #XXX: JupyterServer apps only support the system value.
204 |           instance_type="system",
205 |           sage_maker_image_arn=sagmaker_jupyerlab_arn
206 |         )
207 |       ),
208 |       security_groups=[
209 |         sg_sagemaker_domain.security_group_id,
210 |         sg_rds_client.security_group_id
211 |       ]
212 |     )
213 | 
214 |     sagemaker_user_profile = aws_sagemaker.CfnUserProfile(self, 'SageMakerStudioUserProfile',
215 |       domain_id=sagemaker_studio_domain.attr_domain_id,
216 |       user_profile_name='default-user',
217 |       user_settings=default_user_settings
218 |     )
219 | 
220 | 
221 |     cdk.CfnOutput(self, 'DomainUrl', value=sagemaker_studio_domain.attr_url,
222 |                   export_name=f'{self.stack_name}-DomainUrl')
223 |     cdk.CfnOutput(self, 'DomainId', value=sagemaker_user_profile.domain_id,
224 |                   export_name=f'{self.stack_name}-DomainId')
225 |     cdk.CfnOutput(self, 'UserProfileName', value=sagemaker_user_profile.user_profile_name,
226 |                   export_name=f'{self.stack_name}-UserProfileName')
227 |     cdk.CfnOutput(self, 'DomainSecurityGroupId', value=sg_sagemaker_domain.security_group_id,
228 |                   export_name=f'{self.stack_name}-DomainSecurityGroupId')


--------------------------------------------------------------------------------
/cdk_stacks/rag_with_pgvector/vpc.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- encoding: utf-8 -*-
 3 | # vim: tabstop=2 shiftwidth=2 softtabstop=2 expandtab
 4 | 
 5 | import os
 6 | import aws_cdk as cdk
 7 | 
 8 | from aws_cdk import (
 9 |   Stack,
10 |   aws_ec2,
11 | )
12 | from constructs import Construct
13 | 
14 | 
15 | class VpcStack(Stack):
16 | 
17 |   def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None:
18 |     super().__init__(scope, construct_id, **kwargs)
19 | 
20 |     #XXX: For creating the CDK Stacks in the existing VPC,
21 |     # remove comments from the below codes and
22 |     # comments out vpc = aws_ec2.Vpc(..) codes,
23 |     # then pass -c vpc_name=your-existing-vpc to cdk command
24 |     # for example,
25 |     # cdk -c vpc_name=your-existing-vpc syth
26 |     #
27 |     if str(os.environ.get('USE_DEFAULT_VPC', 'false')).lower() == 'true':
28 |       vpc_name = self.node.try_get_context('vpc_name') or 'default'
29 |       self.vpc = aws_ec2.Vpc.from_lookup(self, 'ExistingVPC',
30 |         is_default=True,
31 |         vpc_name=vpc_name
32 |       )
33 |     else:
34 |       #XXX: To use more than 2 AZs, be sure to specify the account and region on your stack.
35 |       #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/Vpc.html
36 |       self.vpc = aws_ec2.Vpc(self, 'RAGAppVPC',
37 |         ip_addresses=aws_ec2.IpAddresses.cidr("10.0.0.0/16"),
38 |         max_azs=3,
39 | 
40 |         # 'subnetConfiguration' specifies the "subnet groups" to create.
41 |         # Every subnet group will have a subnet for each AZ, so this
42 |         # configuration will create `2 groups × 3 AZs = 6` subnets.
43 |         subnet_configuration=[
44 |           {
45 |             "cidrMask": 20,
46 |             "name": "Public",
47 |             "subnetType": aws_ec2.SubnetType.PUBLIC,
48 |           },
49 |           {
50 |             "cidrMask": 20,
51 |             "name": "Private",
52 |             "subnetType": aws_ec2.SubnetType.PRIVATE_WITH_EGRESS
53 |           }
54 |         ],
55 |         gateway_endpoints={
56 |           "S3": aws_ec2.GatewayVpcEndpointOptions(
57 |             service=aws_ec2.GatewayVpcEndpointAwsService.S3
58 |           )
59 |         }
60 |       )
61 | 
62 |     cdk.CfnOutput(self, 'VPCID', value=self.vpc.vpc_id,
63 |       export_name=f'{self.stack_name}-VPCID')
64 | 


--------------------------------------------------------------------------------
/cdk_stacks/rag_with_pgvector_arch.svg:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <!-- Do not edit this file with editors other than draw.io -->
3 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
4 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" width="1001px" height="531px" viewBox="-0.5 -0.5 1001 531" content="&lt;mxfile host=&quot;Electron&quot; modified=&quot;2023-07-27T15:20:40.957Z&quot; agent=&quot;Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/21.6.5 Chrome/114.0.5735.243 Electron/25.3.1 Safari/537.36&quot; etag=&quot;y62TpP_kIEm6pP1rSmiw&quot; version=&quot;21.6.5&quot; type=&quot;device&quot;&gt;&#10;  &lt;diagram name=&quot;Page-1&quot; id=&quot;AJLt_SXMdZo8TroYpOnL&quot;&gt;&#10;    &lt;mxGraphModel dx=&quot;1881&quot; dy=&quot;1129&quot; grid=&quot;1&quot; gridSize=&quot;10&quot; guides=&quot;1&quot; tooltips=&quot;1&quot; connect=&quot;1&quot; arrows=&quot;1&quot; fold=&quot;1&quot; page=&quot;1&quot; pageScale=&quot;1&quot; pageWidth=&quot;850&quot; pageHeight=&quot;1100&quot; math=&quot;0&quot; shadow=&quot;0&quot;&gt;&#10;      &lt;root&gt;&#10;        &lt;mxCell id=&quot;0&quot; /&gt;&#10;        &lt;mxCell id=&quot;1&quot; parent=&quot;0&quot; /&gt;&#10;        &lt;mxCell id=&quot;YMLjR_lNgU8gwtdf638e-1&quot; value=&quot;&quot; style=&quot;sketch=0;outlineConnect=0;fontColor=#232F3E;gradientColor=none;fillColor=#232F3D;strokeColor=none;dashed=0;verticalLabelPosition=bottom;verticalAlign=top;align=center;html=1;fontSize=12;fontStyle=0;aspect=fixed;pointerEvents=1;shape=mxgraph.aws4.chat;&quot; parent=&quot;1&quot; vertex=&quot;1&quot;&gt;&#10;          &lt;mxGeometry x=&quot;616&quot; y=&quot;285&quot; width=&quot;78&quot; height=&quot;68&quot; as=&quot;geometry&quot; /&gt;&#10;        &lt;/mxCell&gt;&#10;        &lt;mxCell id=&quot;YMLjR_lNgU8gwtdf638e-2&quot; value=&quot;Web Application&amp;lt;br&amp;gt;(Streamlit)&quot; style=&quot;text;html=1;align=center;verticalAlign=middle;resizable=0;points=[];autosize=1;strokeColor=none;fillColor=none;&quot; parent=&quot;1&quot; vertex=&quot;1&quot;&gt;&#10;          &lt;mxGeometry x=&quot;600&quot; y=&quot;358&quot; width=&quot;110&quot; height=&quot;40&quot; as=&quot;geometry&quot; /&gt;&#10;        &lt;/mxCell&gt;&#10;        &lt;mxCell id=&quot;YMLjR_lNgU8gwtdf638e-3&quot; value=&quot;&quot; style=&quot;sketch=0;points=[[0,0,0],[0.25,0,0],[0.5,0,0],[0.75,0,0],[1,0,0],[0,1,0],[0.25,1,0],[0.5,1,0],[0.75,1,0],[1,1,0],[0,0.25,0],[0,0.5,0],[0,0.75,0],[1,0.25,0],[1,0.5,0],[1,0.75,0]];outlineConnect=0;fontColor=#232F3E;gradientColor=#4AB29A;gradientDirection=north;fillColor=#116D5B;strokeColor=#ffffff;dashed=0;verticalLabelPosition=bottom;verticalAlign=top;align=center;html=1;fontSize=12;fontStyle=0;aspect=fixed;shape=mxgraph.aws4.resourceIcon;resIcon=mxgraph.aws4.sagemaker;&quot; parent=&quot;1&quot; vertex=&quot;1&quot;&gt;&#10;          &lt;mxGeometry x=&quot;1017&quot; y=&quot;280&quot; width=&quot;78&quot; height=&quot;78&quot; as=&quot;geometry&quot; /&gt;&#10;        &lt;/mxCell&gt;&#10;        &lt;mxCell id=&quot;YMLjR_lNgU8gwtdf638e-4&quot; value=&quot;Amazon SageMaker Endpoint&amp;lt;br&amp;gt;(LLM for Text Generation)&quot; style=&quot;text;html=1;align=center;verticalAlign=middle;resizable=0;points=[];autosize=1;strokeColor=none;fillColor=none;&quot; parent=&quot;1&quot; vertex=&quot;1&quot;&gt;&#10;          &lt;mxGeometry x=&quot;966&quot; y=&quot;358&quot; width=&quot;180&quot; height=&quot;40&quot; as=&quot;geometry&quot; /&gt;&#10;        &lt;/mxCell&gt;&#10;        &lt;mxCell id=&quot;YMLjR_lNgU8gwtdf638e-5&quot; value=&quot;&quot; style=&quot;sketch=0;points=[[0,0,0],[0.25,0,0],[0.5,0,0],[0.75,0,0],[1,0,0],[0,1,0],[0.25,1,0],[0.5,1,0],[0.75,1,0],[1,1,0],[0,0.25,0],[0,0.5,0],[0,0.75,0],[1,0.25,0],[1,0.5,0],[1,0.75,0]];outlineConnect=0;fontColor=#232F3E;gradientColor=#4AB29A;gradientDirection=north;fillColor=#116D5B;strokeColor=#ffffff;dashed=0;verticalLabelPosition=bottom;verticalAlign=top;align=center;html=1;fontSize=12;fontStyle=0;aspect=fixed;shape=mxgraph.aws4.resourceIcon;resIcon=mxgraph.aws4.sagemaker;&quot; parent=&quot;1&quot; vertex=&quot;1&quot;&gt;&#10;          &lt;mxGeometry x=&quot;1017&quot; y=&quot;440&quot; width=&quot;78&quot; height=&quot;78&quot; as=&quot;geometry&quot; /&gt;&#10;        &lt;/mxCell&gt;&#10;        &lt;mxCell id=&quot;YMLjR_lNgU8gwtdf638e-6&quot; value=&quot;Amazon SageMaker Endpoint&amp;lt;br&amp;gt;(LLM for Embeddings)&quot; style=&quot;text;html=1;align=center;verticalAlign=middle;resizable=0;points=[];autosize=1;strokeColor=none;fillColor=none;&quot; parent=&quot;1&quot; vertex=&quot;1&quot;&gt;&#10;          &lt;mxGeometry x=&quot;966&quot; y=&quot;518&quot; width=&quot;180&quot; height=&quot;40&quot; as=&quot;geometry&quot; /&gt;&#10;        &lt;/mxCell&gt;&#10;        &lt;mxCell id=&quot;YMLjR_lNgU8gwtdf638e-7&quot; value=&quot;&quot; style=&quot;sketch=0;points=[[0,0,0],[0.25,0,0],[0.5,0,0],[0.75,0,0],[1,0,0],[0,1,0],[0.25,1,0],[0.5,1,0],[0.75,1,0],[1,1,0],[0,0.25,0],[0,0.5,0],[0,0.75,0],[1,0.25,0],[1,0.5,0],[1,0.75,0]];outlineConnect=0;fontColor=#232F3E;gradientColor=#4AB29A;gradientDirection=north;fillColor=#116D5B;strokeColor=#ffffff;dashed=0;verticalLabelPosition=bottom;verticalAlign=top;align=center;html=1;fontSize=12;fontStyle=0;aspect=fixed;shape=mxgraph.aws4.resourceIcon;resIcon=mxgraph.aws4.sagemaker;&quot; parent=&quot;1&quot; vertex=&quot;1&quot;&gt;&#10;          &lt;mxGeometry x=&quot;1017&quot; y=&quot;620&quot; width=&quot;78&quot; height=&quot;78&quot; as=&quot;geometry&quot; /&gt;&#10;        &lt;/mxCell&gt;&#10;        &lt;mxCell id=&quot;YMLjR_lNgU8gwtdf638e-8&quot; value=&quot;Amazon SageMaker Processing Job&amp;lt;br&amp;gt;(Documents to embeddings)&quot; style=&quot;text;html=1;align=center;verticalAlign=middle;resizable=0;points=[];autosize=1;strokeColor=none;fillColor=none;&quot; parent=&quot;1&quot; vertex=&quot;1&quot;&gt;&#10;          &lt;mxGeometry x=&quot;946&quot; y=&quot;698&quot; width=&quot;220&quot; height=&quot;40&quot; as=&quot;geometry&quot; /&gt;&#10;        &lt;/mxCell&gt;&#10;        &lt;mxCell id=&quot;YMLjR_lNgU8gwtdf638e-9&quot; value=&quot;&quot; style=&quot;sketch=0;points=[[0,0,0],[0.25,0,0],[0.5,0,0],[0.75,0,0],[1,0,0],[0,1,0],[0.25,1,0],[0.5,1,0],[0.75,1,0],[1,1,0],[0,0.25,0],[0,0.5,0],[0,0.75,0],[1,0.25,0],[1,0.5,0],[1,0.75,0]];outlineConnect=0;fontColor=#232F3E;gradientColor=#60A337;gradientDirection=north;fillColor=#277116;strokeColor=#ffffff;dashed=0;verticalLabelPosition=bottom;verticalAlign=top;align=center;html=1;fontSize=12;fontStyle=0;aspect=fixed;shape=mxgraph.aws4.resourceIcon;resIcon=mxgraph.aws4.s3;&quot; parent=&quot;1&quot; vertex=&quot;1&quot;&gt;&#10;          &lt;mxGeometry x=&quot;1226&quot; y=&quot;620&quot; width=&quot;78&quot; height=&quot;78&quot; as=&quot;geometry&quot; /&gt;&#10;        &lt;/mxCell&gt;&#10;        &lt;mxCell id=&quot;YMLjR_lNgU8gwtdf638e-10&quot; value=&quot;Amazon S3&amp;lt;br&amp;gt;(Knowledge corpus)&quot; style=&quot;text;html=1;align=center;verticalAlign=middle;resizable=0;points=[];autosize=1;strokeColor=none;fillColor=none;&quot; parent=&quot;1&quot; vertex=&quot;1&quot;&gt;&#10;          &lt;mxGeometry x=&quot;1200&quot; y=&quot;698&quot; width=&quot;130&quot; height=&quot;40&quot; as=&quot;geometry&quot; /&gt;&#10;        &lt;/mxCell&gt;&#10;        &lt;mxCell id=&quot;YMLjR_lNgU8gwtdf638e-12&quot; value=&quot;Amazon Aurora Postgresql&amp;lt;br&amp;gt;(Vector database)&quot; style=&quot;text;html=1;align=center;verticalAlign=middle;resizable=0;points=[];autosize=1;strokeColor=none;fillColor=none;&quot; parent=&quot;1&quot; vertex=&quot;1&quot;&gt;&#10;          &lt;mxGeometry x=&quot;780&quot; y=&quot;518&quot; width=&quot;170&quot; height=&quot;40&quot; as=&quot;geometry&quot; /&gt;&#10;        &lt;/mxCell&gt;&#10;        &lt;mxCell id=&quot;YMLjR_lNgU8gwtdf638e-14&quot; value=&quot;&quot; style=&quot;edgeStyle=none;orthogonalLoop=1;jettySize=auto;html=1;rounded=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;entryPerimeter=0;startArrow=classic;startFill=1;&quot; parent=&quot;1&quot; edge=&quot;1&quot;&gt;&#10;          &lt;mxGeometry width=&quot;100&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10;            &lt;mxPoint x=&quot;693.8107715456854&quot; y=&quot;310&quot; as=&quot;sourcePoint&quot; /&gt;&#10;            &lt;mxPoint x=&quot;1017&quot; y=&quot;310&quot; as=&quot;targetPoint&quot; /&gt;&#10;            &lt;Array as=&quot;points&quot; /&gt;&#10;          &lt;/mxGeometry&gt;&#10;        &lt;/mxCell&gt;&#10;        &lt;mxCell id=&quot;YMLjR_lNgU8gwtdf638e-15&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;orthogonalLoop=1;jettySize=auto;html=1;rounded=0;entryX=0.5;entryY=0;entryDx=0;entryDy=0;entryPerimeter=0;startArrow=classic;startFill=1;&quot; parent=&quot;1&quot; edge=&quot;1&quot;&gt;&#10;          &lt;mxGeometry width=&quot;100&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10;            &lt;mxPoint x=&quot;693.81&quot; y=&quot;321&quot; as=&quot;sourcePoint&quot; /&gt;&#10;            &lt;mxPoint x=&quot;865&quot; y=&quot;441&quot; as=&quot;targetPoint&quot; /&gt;&#10;            &lt;Array as=&quot;points&quot;&gt;&#10;              &lt;mxPoint x=&quot;700&quot; y=&quot;321&quot; /&gt;&#10;              &lt;mxPoint x=&quot;700&quot; y=&quot;322&quot; /&gt;&#10;              &lt;mxPoint x=&quot;865&quot; y=&quot;322&quot; /&gt;&#10;            &lt;/Array&gt;&#10;          &lt;/mxGeometry&gt;&#10;        &lt;/mxCell&gt;&#10;        &lt;mxCell id=&quot;YMLjR_lNgU8gwtdf638e-16&quot; value=&quot;&quot; style=&quot;edgeStyle=none;orthogonalLoop=1;jettySize=auto;html=1;rounded=0;exitX=0;exitY=0.5;exitDx=0;exitDy=0;exitPerimeter=0;entryX=1;entryY=0.5;entryDx=0;entryDy=0;entryPerimeter=0;strokeColor=#001DBC;fillColor=#0050ef;&quot; parent=&quot;1&quot; source=&quot;YMLjR_lNgU8gwtdf638e-9&quot; target=&quot;YMLjR_lNgU8gwtdf638e-7&quot; edge=&quot;1&quot;&gt;&#10;          &lt;mxGeometry width=&quot;100&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10;            &lt;mxPoint x=&quot;1186&quot; y=&quot;750&quot; as=&quot;sourcePoint&quot; /&gt;&#10;            &lt;mxPoint x=&quot;1286&quot; y=&quot;750&quot; as=&quot;targetPoint&quot; /&gt;&#10;            &lt;Array as=&quot;points&quot; /&gt;&#10;          &lt;/mxGeometry&gt;&#10;        &lt;/mxCell&gt;&#10;        &lt;mxCell id=&quot;YMLjR_lNgU8gwtdf638e-17&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;orthogonalLoop=1;jettySize=auto;html=1;rounded=0;entryX=0.528;entryY=0.95;entryDx=0;entryDy=0;entryPerimeter=0;strokeColor=#001DBC;fillColor=#0050ef;&quot; parent=&quot;1&quot; target=&quot;YMLjR_lNgU8gwtdf638e-12&quot; edge=&quot;1&quot;&gt;&#10;          &lt;mxGeometry width=&quot;100&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10;            &lt;mxPoint x=&quot;1016&quot; y=&quot;660&quot; as=&quot;sourcePoint&quot; /&gt;&#10;            &lt;mxPoint x=&quot;1006&quot; y=&quot;650&quot; as=&quot;targetPoint&quot; /&gt;&#10;          &lt;/mxGeometry&gt;&#10;        &lt;/mxCell&gt;&#10;        &lt;mxCell id=&quot;YMLjR_lNgU8gwtdf638e-18&quot; value=&quot;&quot; style=&quot;edgeStyle=none;orthogonalLoop=1;jettySize=auto;html=1;rounded=0;exitX=0.5;exitY=0;exitDx=0;exitDy=0;exitPerimeter=0;strokeColor=#001DBC;fillColor=#0050ef;startArrow=classic;startFill=1;&quot; parent=&quot;1&quot; source=&quot;YMLjR_lNgU8gwtdf638e-7&quot; edge=&quot;1&quot;&gt;&#10;          &lt;mxGeometry width=&quot;100&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10;            &lt;mxPoint x=&quot;1086&quot; y=&quot;600&quot; as=&quot;sourcePoint&quot; /&gt;&#10;            &lt;mxPoint x=&quot;1056&quot; y=&quot;550&quot; as=&quot;targetPoint&quot; /&gt;&#10;            &lt;Array as=&quot;points&quot; /&gt;&#10;          &lt;/mxGeometry&gt;&#10;        &lt;/mxCell&gt;&#10;        &lt;mxCell id=&quot;YMLjR_lNgU8gwtdf638e-19&quot; value=&quot;&quot; style=&quot;sketch=0;outlineConnect=0;fontColor=#232F3E;gradientColor=none;fillColor=#232F3D;strokeColor=none;dashed=0;verticalLabelPosition=bottom;verticalAlign=top;align=center;html=1;fontSize=12;fontStyle=0;aspect=fixed;pointerEvents=1;shape=mxgraph.aws4.user;&quot; parent=&quot;1&quot; vertex=&quot;1&quot;&gt;&#10;          &lt;mxGeometry x=&quot;350&quot; y=&quot;280&quot; width=&quot;78&quot; height=&quot;78&quot; as=&quot;geometry&quot; /&gt;&#10;        &lt;/mxCell&gt;&#10;        &lt;mxCell id=&quot;YMLjR_lNgU8gwtdf638e-21&quot; value=&quot;User question&quot; style=&quot;text;html=1;align=center;verticalAlign=middle;resizable=0;points=[];autosize=1;strokeColor=none;fillColor=none;&quot; parent=&quot;1&quot; vertex=&quot;1&quot;&gt;&#10;          &lt;mxGeometry x=&quot;474&quot; y=&quot;282&quot; width=&quot;100&quot; height=&quot;30&quot; as=&quot;geometry&quot; /&gt;&#10;        &lt;/mxCell&gt;&#10;        &lt;mxCell id=&quot;YMLjR_lNgU8gwtdf638e-23&quot; value=&quot;LLM Generated&amp;lt;br&amp;gt;response&quot; style=&quot;text;html=1;align=center;verticalAlign=middle;resizable=0;points=[];autosize=1;strokeColor=none;fillColor=none;&quot; parent=&quot;1&quot; vertex=&quot;1&quot;&gt;&#10;          &lt;mxGeometry x=&quot;473&quot; y=&quot;342&quot; width=&quot;110&quot; height=&quot;40&quot; as=&quot;geometry&quot; /&gt;&#10;        &lt;/mxCell&gt;&#10;        &lt;mxCell id=&quot;YMLjR_lNgU8gwtdf638e-24&quot; value=&quot;AWS Account&quot; style=&quot;points=[[0,0],[0.25,0],[0.5,0],[0.75,0],[1,0],[1,0.25],[1,0.5],[1,0.75],[1,1],[0.75,1],[0.5,1],[0.25,1],[0,1],[0,0.75],[0,0.5],[0,0.25]];outlineConnect=0;gradientColor=none;html=1;whiteSpace=wrap;fontSize=12;fontStyle=0;container=1;pointerEvents=0;collapsible=0;recursiveResize=0;shape=mxgraph.aws4.group;grIcon=mxgraph.aws4.group_account;strokeColor=#CD2264;fillColor=none;verticalAlign=top;align=left;spacingLeft=30;fontColor=#CD2264;dashed=0;&quot; parent=&quot;1&quot; vertex=&quot;1&quot;&gt;&#10;          &lt;mxGeometry x=&quot;580&quot; y=&quot;230&quot; width=&quot;770&quot; height=&quot;530&quot; as=&quot;geometry&quot; /&gt;&#10;        &lt;/mxCell&gt;&#10;        &lt;mxCell id=&quot;gH6tQPETUEllE_FkYU_J-1&quot; value=&quot;&quot; style=&quot;edgeStyle=none;orthogonalLoop=1;jettySize=auto;html=1;rounded=0;endSize=6;jumpSize=6;&quot; parent=&quot;YMLjR_lNgU8gwtdf638e-24&quot; edge=&quot;1&quot;&gt;&#10;          &lt;mxGeometry width=&quot;100&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10;            &lt;mxPoint x=&quot;23&quot; y=&quot;466&quot; as=&quot;sourcePoint&quot; /&gt;&#10;            &lt;mxPoint x=&quot;123&quot; y=&quot;466&quot; as=&quot;targetPoint&quot; /&gt;&#10;            &lt;Array as=&quot;points&quot; /&gt;&#10;          &lt;/mxGeometry&gt;&#10;        &lt;/mxCell&gt;&#10;        &lt;mxCell id=&quot;gH6tQPETUEllE_FkYU_J-2&quot; value=&quot;Real-time flow on user query&quot; style=&quot;text;html=1;align=center;verticalAlign=middle;resizable=0;points=[];autosize=1;strokeColor=none;fillColor=none;&quot; parent=&quot;YMLjR_lNgU8gwtdf638e-24&quot; vertex=&quot;1&quot;&gt;&#10;          &lt;mxGeometry x=&quot;124&quot; y=&quot;451&quot; width=&quot;180&quot; height=&quot;30&quot; as=&quot;geometry&quot; /&gt;&#10;        &lt;/mxCell&gt;&#10;        &lt;mxCell id=&quot;gH6tQPETUEllE_FkYU_J-3&quot; value=&quot;&quot; style=&quot;edgeStyle=none;orthogonalLoop=1;jettySize=auto;html=1;rounded=0;fillColor=#0050ef;strokeColor=#001DBC;&quot; parent=&quot;YMLjR_lNgU8gwtdf638e-24&quot; edge=&quot;1&quot;&gt;&#10;          &lt;mxGeometry width=&quot;100&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10;            &lt;mxPoint x=&quot;23&quot; y=&quot;501&quot; as=&quot;sourcePoint&quot; /&gt;&#10;            &lt;mxPoint x=&quot;123&quot; y=&quot;501&quot; as=&quot;targetPoint&quot; /&gt;&#10;            &lt;Array as=&quot;points&quot; /&gt;&#10;          &lt;/mxGeometry&gt;&#10;        &lt;/mxCell&gt;&#10;        &lt;mxCell id=&quot;gH6tQPETUEllE_FkYU_J-4&quot; value=&quot;&amp;lt;font color=&amp;quot;#3333ff&amp;quot;&amp;gt;Offline data ingestion&amp;lt;/font&amp;gt;&quot; style=&quot;text;html=1;align=center;verticalAlign=middle;resizable=0;points=[];autosize=1;strokeColor=none;fillColor=none;&quot; parent=&quot;YMLjR_lNgU8gwtdf638e-24&quot; vertex=&quot;1&quot;&gt;&#10;          &lt;mxGeometry x=&quot;124&quot; y=&quot;486&quot; width=&quot;140&quot; height=&quot;30&quot; as=&quot;geometry&quot; /&gt;&#10;        &lt;/mxCell&gt;&#10;        &lt;mxCell id=&quot;gH6tQPETUEllE_FkYU_J-7&quot; value=&quot;2&quot; style=&quot;ellipse;whiteSpace=wrap;html=1;fontStyle=1&quot; parent=&quot;YMLjR_lNgU8gwtdf638e-24&quot; vertex=&quot;1&quot;&gt;&#10;          &lt;mxGeometry x=&quot;250&quot; y=&quot;140&quot; width=&quot;20&quot; height=&quot;20&quot; as=&quot;geometry&quot; /&gt;&#10;        &lt;/mxCell&gt;&#10;        &lt;mxCell id=&quot;gH6tQPETUEllE_FkYU_J-8&quot; value=&quot;3&quot; style=&quot;ellipse;whiteSpace=wrap;html=1;fontStyle=1&quot; parent=&quot;YMLjR_lNgU8gwtdf638e-24&quot; vertex=&quot;1&quot;&gt;&#10;          &lt;mxGeometry x=&quot;330&quot; y=&quot;55&quot; width=&quot;20&quot; height=&quot;20&quot; as=&quot;geometry&quot; /&gt;&#10;        &lt;/mxCell&gt;&#10;        &lt;mxCell id=&quot;gH6tQPETUEllE_FkYU_J-9&quot; value=&quot;1&quot; style=&quot;ellipse;whiteSpace=wrap;html=1;fillColor=#0050ef;fontColor=#ffffff;strokeColor=#001DBC;&quot; parent=&quot;YMLjR_lNgU8gwtdf638e-24&quot; vertex=&quot;1&quot;&gt;&#10;          &lt;mxGeometry x=&quot;580&quot; y=&quot;400&quot; width=&quot;20&quot; height=&quot;20&quot; as=&quot;geometry&quot; /&gt;&#10;        &lt;/mxCell&gt;&#10;        &lt;mxCell id=&quot;gH6tQPETUEllE_FkYU_J-10&quot; value=&quot;2&quot; style=&quot;ellipse;whiteSpace=wrap;html=1;fillColor=#0050ef;fontColor=#ffffff;strokeColor=#001DBC;&quot; parent=&quot;YMLjR_lNgU8gwtdf638e-24&quot; vertex=&quot;1&quot;&gt;&#10;          &lt;mxGeometry x=&quot;490&quot; y=&quot;350&quot; width=&quot;20&quot; height=&quot;20&quot; as=&quot;geometry&quot; /&gt;&#10;        &lt;/mxCell&gt;&#10;        &lt;mxCell id=&quot;gH6tQPETUEllE_FkYU_J-11&quot; value=&quot;3&quot; style=&quot;ellipse;whiteSpace=wrap;html=1;fillColor=#0050ef;fontColor=#ffffff;strokeColor=#001DBC;&quot; parent=&quot;YMLjR_lNgU8gwtdf638e-24&quot; vertex=&quot;1&quot;&gt;&#10;          &lt;mxGeometry x=&quot;300&quot; y=&quot;350&quot; width=&quot;20&quot; height=&quot;20&quot; as=&quot;geometry&quot; /&gt;&#10;        &lt;/mxCell&gt;&#10;        &lt;mxCell id=&quot;gH6tQPETUEllE_FkYU_J-12&quot; value=&quot;&quot; style=&quot;edgeStyle=none;orthogonalLoop=1;jettySize=auto;html=1;rounded=0;&quot; parent=&quot;YMLjR_lNgU8gwtdf638e-24&quot; edge=&quot;1&quot;&gt;&#10;          &lt;mxGeometry width=&quot;100&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10;            &lt;mxPoint x=&quot;-120&quot; y=&quot;82&quot; as=&quot;sourcePoint&quot; /&gt;&#10;            &lt;mxPoint x=&quot;20&quot; y=&quot;82&quot; as=&quot;targetPoint&quot; /&gt;&#10;            &lt;Array as=&quot;points&quot; /&gt;&#10;          &lt;/mxGeometry&gt;&#10;        &lt;/mxCell&gt;&#10;        &lt;mxCell id=&quot;gH6tQPETUEllE_FkYU_J-5&quot; value=&quot;1&quot; style=&quot;ellipse;whiteSpace=wrap;html=1;fontStyle=1&quot; parent=&quot;1&quot; vertex=&quot;1&quot;&gt;&#10;          &lt;mxGeometry x=&quot;517&quot; y=&quot;262&quot; width=&quot;20&quot; height=&quot;20&quot; as=&quot;geometry&quot; /&gt;&#10;        &lt;/mxCell&gt;&#10;        &lt;mxCell id=&quot;gH6tQPETUEllE_FkYU_J-14&quot; value=&quot;&quot; style=&quot;edgeStyle=none;orthogonalLoop=1;jettySize=auto;html=1;rounded=0;startArrow=classic;startFill=1;endArrow=none;endFill=0;&quot; parent=&quot;1&quot; edge=&quot;1&quot;&gt;&#10;          &lt;mxGeometry width=&quot;100&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10;            &lt;mxPoint x=&quot;460&quot; y=&quot;337&quot; as=&quot;sourcePoint&quot; /&gt;&#10;            &lt;mxPoint x=&quot;602&quot; y=&quot;337&quot; as=&quot;targetPoint&quot; /&gt;&#10;            &lt;Array as=&quot;points&quot; /&gt;&#10;          &lt;/mxGeometry&gt;&#10;        &lt;/mxCell&gt;&#10;        &lt;mxCell id=&quot;gH6tQPETUEllE_FkYU_J-15&quot; value=&quot;4&quot; style=&quot;ellipse;whiteSpace=wrap;html=1;fontStyle=1&quot; parent=&quot;1&quot; vertex=&quot;1&quot;&gt;&#10;          &lt;mxGeometry x=&quot;517&quot; y=&quot;382&quot; width=&quot;20&quot; height=&quot;20&quot; as=&quot;geometry&quot; /&gt;&#10;        &lt;/mxCell&gt;&#10;        &lt;mxCell id=&quot;84f46wMab8wCUbOB4q0u-1&quot; value=&quot;&quot; style=&quot;sketch=0;points=[[0,0,0],[0.25,0,0],[0.5,0,0],[0.75,0,0],[1,0,0],[0,1,0],[0.25,1,0],[0.5,1,0],[0.75,1,0],[1,1,0],[0,0.25,0],[0,0.5,0],[0,0.75,0],[1,0.25,0],[1,0.5,0],[1,0.75,0]];outlineConnect=0;fontColor=#232F3E;gradientColor=#4D72F3;gradientDirection=north;fillColor=#3334B9;strokeColor=#ffffff;dashed=0;verticalLabelPosition=bottom;verticalAlign=top;align=center;html=1;fontSize=12;fontStyle=0;aspect=fixed;shape=mxgraph.aws4.resourceIcon;resIcon=mxgraph.aws4.aurora;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10;          &lt;mxGeometry x=&quot;826&quot; y=&quot;440&quot; width=&quot;78&quot; height=&quot;78&quot; as=&quot;geometry&quot; /&gt;&#10;        &lt;/mxCell&gt;&#10;      &lt;/root&gt;&#10;    &lt;/mxGraphModel&gt;&#10;  &lt;/diagram&gt;&#10;&lt;/mxfile&gt;&#10;"><defs><linearGradient x1="0%" y1="100%" x2="0%" y2="0%" id="mx-gradient-4ab29a-1-116d5b-1-s-0"><stop offset="0%" style="stop-color: rgb(17, 109, 91); stop-opacity: 1;"/><stop offset="100%" style="stop-color: rgb(74, 178, 154); stop-opacity: 1;"/></linearGradient><linearGradient x1="0%" y1="100%" x2="0%" y2="0%" id="mx-gradient-60a337-1-277116-1-s-0"><stop offset="0%" style="stop-color: rgb(39, 113, 22); stop-opacity: 1;"/><stop offset="100%" style="stop-color: rgb(96, 163, 55); stop-opacity: 1;"/></linearGradient><linearGradient x1="0%" y1="100%" x2="0%" y2="0%" id="mx-gradient-4d72f3-1-3334b9-1-s-0"><stop offset="0%" style="stop-color: rgb(51, 52, 185); stop-opacity: 1;"/><stop offset="100%" style="stop-color: rgb(77, 114, 243); stop-opacity: 1;"/></linearGradient></defs><g><rect x="266" y="55" width="78" height="68" fill="none" stroke="none" pointer-events="all"/><path d="M 326.17 84.99 C 324.22 84.99 322.64 83.41 322.64 81.46 C 322.64 79.52 324.22 77.93 326.17 77.93 C 328.12 77.93 329.7 79.52 329.7 81.46 C 329.7 83.41 328.12 84.99 326.17 84.99 Z M 326.17 74.41 C 322.28 74.41 319.11 77.57 319.11 81.46 C 319.11 85.35 322.28 88.52 326.17 88.52 C 330.06 88.52 333.23 85.35 333.23 81.46 C 333.23 77.57 330.06 74.41 326.17 74.41 Z M 305 84.99 C 303.05 84.99 301.47 83.41 301.47 81.46 C 301.47 79.52 303.05 77.93 305 77.93 C 306.95 77.93 308.53 79.52 308.53 81.46 C 308.53 83.41 306.95 84.99 305 84.99 Z M 305 74.41 C 301.11 74.41 297.94 77.57 297.94 81.46 C 297.94 85.35 301.11 88.52 305 88.52 C 308.89 88.52 312.06 85.35 312.06 81.46 C 312.06 77.57 308.89 74.41 305 74.41 Z M 283.83 84.99 C 281.88 84.99 280.3 83.41 280.3 81.46 C 280.3 79.52 281.88 77.93 283.83 77.93 C 285.78 77.93 287.36 79.52 287.36 81.46 C 287.36 83.41 285.78 84.99 283.83 84.99 Z M 283.83 74.41 C 279.94 74.41 276.77 77.57 276.77 81.46 C 276.77 85.35 279.94 88.52 283.83 88.52 C 287.72 88.52 290.89 85.35 290.89 81.46 C 290.89 77.57 287.72 74.41 283.83 74.41 Z M 340.28 104.31 C 340.28 106.3 338.81 107.92 336.97 107.92 L 329.68 108.01 C 328.71 108.03 327.93 108.81 327.93 109.78 L 327.93 117.25 L 318.15 108.38 C 317.83 108.09 317.41 107.92 316.97 107.92 L 273.25 107.92 C 271.18 107.92 269.72 106.21 269.72 104.23 L 269.72 62.11 C 269.72 60.13 271.32 58.53 273.3 58.53 L 336.7 58.53 C 338.68 58.53 340.28 60.13 340.28 62.11 Z M 336.7 55 L 273.3 55 C 269.38 55 266.19 58.19 266.19 62.11 L 266.19 104.23 C 266.19 108.16 269.45 111.36 273.45 111.36 L 316.28 111.45 L 328.51 122.54 C 328.84 122.84 329.27 123 329.7 123 C 329.94 123 330.18 122.95 330.41 122.85 C 331.05 122.57 331.46 121.93 331.46 121.24 L 331.46 111.52 L 336.99 111.45 C 340.75 111.45 343.81 108.25 343.81 104.31 L 343.81 62.11 C 343.81 58.19 340.62 55 336.7 55 Z" fill="#232f3d" stroke="none" pointer-events="all"/><rect x="250" y="128" width="110" height="40" fill="none" stroke="none" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 148px; margin-left: 305px;"><div data-drawio-colors="color: rgb(0, 0, 0); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; white-space: nowrap;">Web Application<br />(Streamlit)</div></div></div></foreignObject><text x="305" y="152" fill="rgb(0, 0, 0)" font-family="Helvetica" font-size="12px" text-anchor="middle">Web Application...</text></switch></g><path d="M 667 50 L 745 50 L 745 128 L 667 128 Z" fill="url(#mx-gradient-4ab29a-1-116d5b-1-s-0)" stroke="none" pointer-events="all"/><path d="M 721.89 72.62 C 721.89 73.26 721.38 73.77 720.74 73.77 C 720.11 73.77 719.59 73.26 719.59 72.62 C 719.59 71.99 720.11 71.47 720.74 71.47 C 721.38 71.47 721.89 71.99 721.89 72.62 Z M 715.06 83.91 C 715.06 83.28 715.57 82.78 716.19 82.78 C 716.82 82.78 717.33 83.28 717.33 83.91 C 717.33 84.53 716.82 85.04 716.19 85.04 C 715.57 85.04 715.06 84.53 715.06 83.91 Z M 715.06 105.42 C 715.06 104.8 715.57 104.29 716.19 104.29 C 716.82 104.29 717.33 104.8 717.33 105.42 C 717.33 106.05 716.82 106.56 716.19 106.56 C 715.57 106.56 715.06 106.05 715.06 105.42 Z M 726.39 90.7 C 726.39 91.33 725.88 91.84 725.25 91.84 C 724.63 91.84 724.12 91.33 724.12 90.7 C 724.12 90.08 724.63 89.57 725.25 89.57 C 725.88 89.57 726.39 90.08 726.39 90.7 Z M 734.31 94.41 L 728.61 91.14 C 728.62 90.99 728.65 90.85 728.65 90.7 C 728.65 88.83 727.13 87.31 725.25 87.31 C 723.38 87.31 721.86 88.83 721.86 90.7 C 721.86 92.58 723.38 94.1 725.25 94.1 C 726.16 94.1 726.98 93.74 727.58 93.16 L 733.05 96.3 L 728.14 98.75 C 727.76 98.94 727.52 99.33 727.52 99.76 L 727.52 109.32 L 713.91 117.7 L 707.14 113.83 L 707.14 106.56 L 713.01 106.56 C 713.47 107.87 714.72 108.82 716.19 108.82 C 718.07 108.82 719.59 107.3 719.59 105.42 C 719.59 103.55 718.07 102.03 716.19 102.03 C 714.72 102.03 713.47 102.98 713.01 104.29 L 707.14 104.29 L 707.14 88.44 C 707.14 88.04 706.93 87.67 706.59 87.47 L 700.92 84.07 L 699.76 86.01 L 704.87 89.08 L 704.87 92.41 L 698.08 97.64 L 698.08 92.97 C 698.08 92.62 697.92 92.3 697.65 92.08 L 692.41 87.89 L 692.41 82.25 L 698.7 78.06 C 699.02 77.85 699.21 77.49 699.21 77.11 L 699.21 70.32 L 696.94 70.32 L 696.94 76.51 L 691.29 80.27 L 685.62 76.51 L 685.62 67.57 L 691.28 64.27 L 691.28 73.72 L 693.55 73.72 L 693.55 62.95 L 698.08 60.3 L 704.87 64.22 L 704.87 78.25 C 704.87 78.65 705.09 79.03 705.44 79.23 L 712.84 83.46 C 712.82 83.61 712.8 83.75 712.8 83.91 C 712.8 85.78 714.32 87.31 716.19 87.31 C 718.07 87.31 719.59 85.78 719.59 83.91 C 719.59 82.04 718.07 80.51 716.19 80.51 C 715.29 80.51 714.48 80.87 713.87 81.44 L 707.13 77.59 L 707.13 64.22 L 713.89 60.33 L 726.39 68.66 L 726.39 71.45 L 723.95 71.45 C 723.47 70.14 722.21 69.21 720.74 69.21 C 718.86 69.21 717.33 70.74 717.33 72.62 C 717.33 74.51 718.86 76.04 720.74 76.04 C 722.24 76.04 723.52 75.06 723.98 73.72 L 726.39 73.72 L 726.39 77.11 C 726.39 77.52 726.6 77.9 726.96 78.1 L 734.31 82.3 Z M 698.1 117.7 L 694.53 115.5 L 701 110.87 L 699.68 109.03 L 692.43 114.21 L 684.49 109.32 L 684.49 100.4 L 689.6 97.34 L 688.43 95.39 L 683.3 98.47 L 677.69 95.67 L 677.69 89.14 L 683.86 86.05 L 682.85 84.03 L 677.69 86.6 L 677.69 82.3 L 684.44 78.44 L 690.15 82.23 L 690.15 87.85 L 683.86 92.02 L 685.11 93.91 L 691.23 89.85 L 695.81 93.51 L 695.81 99.38 L 690.59 103.39 L 691.97 105.19 L 704.87 95.27 L 704.87 113.82 Z M 736 80.66 L 728.65 76.46 L 728.65 68.06 C 728.65 67.68 728.46 67.32 728.15 67.11 L 714.56 58.05 C 714.2 57.82 713.74 57.8 713.36 58.02 L 706 62.26 L 698.64 58.02 C 698.29 57.81 697.85 57.81 697.5 58.02 L 683.92 65.94 C 683.57 66.15 683.35 66.52 683.35 66.92 L 683.35 76.46 L 676 80.66 C 675.64 80.86 675.43 81.24 675.43 81.64 L 675.43 82.12 C 675.43 82.14 675.43 82.16 675.43 82.19 L 675.43 94.51 C 675.43 94.53 675.43 94.56 675.43 94.58 L 675.43 96.37 C 675.43 96.79 675.67 97.19 676.05 97.38 L 682.22 100.46 L 682.22 109.95 C 682.22 110.35 682.42 110.71 682.76 110.92 L 697.48 119.98 C 697.66 120.09 697.87 120.14 698.08 120.14 C 698.27 120.14 698.46 120.09 698.64 120 L 706 115.79 L 713.37 120 C 713.72 120.2 714.17 120.19 714.52 119.98 L 729.24 110.92 C 729.58 110.71 729.78 110.35 729.78 109.95 L 729.78 100.46 L 735.95 97.38 C 736.33 97.19 736.57 96.79 736.57 96.37 L 736.57 81.64 C 736.57 81.24 736.36 80.86 736 80.66 Z" fill="#ffffff" stroke="none" pointer-events="all"/><rect x="616" y="128" width="180" height="40" fill="none" stroke="none" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 148px; margin-left: 706px;"><div data-drawio-colors="color: rgb(0, 0, 0); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; white-space: nowrap;">Amazon SageMaker Endpoint<br />(LLM for Text Generation)</div></div></div></foreignObject><text x="706" y="152" fill="rgb(0, 0, 0)" font-family="Helvetica" font-size="12px" text-anchor="middle">Amazon SageMaker Endpoint...</text></switch></g><path d="M 667 210 L 745 210 L 745 288 L 667 288 Z" fill="url(#mx-gradient-4ab29a-1-116d5b-1-s-0)" stroke="none" pointer-events="all"/><path d="M 721.89 232.62 C 721.89 233.26 721.38 233.77 720.74 233.77 C 720.11 233.77 719.59 233.26 719.59 232.62 C 719.59 231.99 720.11 231.47 720.74 231.47 C 721.38 231.47 721.89 231.99 721.89 232.62 Z M 715.06 243.91 C 715.06 243.28 715.57 242.78 716.19 242.78 C 716.82 242.78 717.33 243.28 717.33 243.91 C 717.33 244.53 716.82 245.04 716.19 245.04 C 715.57 245.04 715.06 244.53 715.06 243.91 Z M 715.06 265.42 C 715.06 264.8 715.57 264.29 716.19 264.29 C 716.82 264.29 717.33 264.8 717.33 265.42 C 717.33 266.05 716.82 266.56 716.19 266.56 C 715.57 266.56 715.06 266.05 715.06 265.42 Z M 726.39 250.7 C 726.39 251.33 725.88 251.84 725.25 251.84 C 724.63 251.84 724.12 251.33 724.12 250.7 C 724.12 250.08 724.63 249.57 725.25 249.57 C 725.88 249.57 726.39 250.08 726.39 250.7 Z M 734.31 254.41 L 728.61 251.14 C 728.62 250.99 728.65 250.85 728.65 250.7 C 728.65 248.83 727.13 247.31 725.25 247.31 C 723.38 247.31 721.86 248.83 721.86 250.7 C 721.86 252.58 723.38 254.1 725.25 254.1 C 726.16 254.1 726.98 253.74 727.58 253.16 L 733.05 256.3 L 728.14 258.75 C 727.76 258.94 727.52 259.33 727.52 259.76 L 727.52 269.32 L 713.91 277.7 L 707.14 273.83 L 707.14 266.56 L 713.01 266.56 C 713.47 267.87 714.72 268.82 716.19 268.82 C 718.07 268.82 719.59 267.3 719.59 265.42 C 719.59 263.55 718.07 262.03 716.19 262.03 C 714.72 262.03 713.47 262.98 713.01 264.29 L 707.14 264.29 L 707.14 248.44 C 707.14 248.04 706.93 247.67 706.59 247.47 L 700.92 244.07 L 699.76 246.01 L 704.87 249.08 L 704.87 252.41 L 698.08 257.64 L 698.08 252.97 C 698.08 252.62 697.92 252.3 697.65 252.08 L 692.41 247.89 L 692.41 242.25 L 698.7 238.06 C 699.02 237.85 699.21 237.49 699.21 237.11 L 699.21 230.32 L 696.94 230.32 L 696.94 236.51 L 691.29 240.27 L 685.62 236.51 L 685.62 227.57 L 691.28 224.27 L 691.28 233.72 L 693.55 233.72 L 693.55 222.95 L 698.08 220.3 L 704.87 224.22 L 704.87 238.25 C 704.87 238.65 705.09 239.03 705.44 239.23 L 712.84 243.46 C 712.82 243.61 712.8 243.75 712.8 243.91 C 712.8 245.78 714.32 247.31 716.19 247.31 C 718.07 247.31 719.59 245.78 719.59 243.91 C 719.59 242.04 718.07 240.51 716.19 240.51 C 715.29 240.51 714.48 240.87 713.87 241.44 L 707.13 237.59 L 707.13 224.22 L 713.89 220.33 L 726.39 228.66 L 726.39 231.45 L 723.95 231.45 C 723.47 230.14 722.21 229.21 720.74 229.21 C 718.86 229.21 717.33 230.74 717.33 232.62 C 717.33 234.51 718.86 236.04 720.74 236.04 C 722.24 236.04 723.52 235.06 723.98 233.72 L 726.39 233.72 L 726.39 237.11 C 726.39 237.52 726.6 237.9 726.96 238.1 L 734.31 242.3 Z M 698.1 277.7 L 694.53 275.5 L 701 270.87 L 699.68 269.03 L 692.43 274.21 L 684.49 269.32 L 684.49 260.4 L 689.6 257.34 L 688.43 255.39 L 683.3 258.47 L 677.69 255.67 L 677.69 249.14 L 683.86 246.05 L 682.85 244.03 L 677.69 246.6 L 677.69 242.3 L 684.44 238.44 L 690.15 242.23 L 690.15 247.85 L 683.86 252.02 L 685.11 253.91 L 691.23 249.85 L 695.81 253.51 L 695.81 259.38 L 690.59 263.39 L 691.97 265.19 L 704.87 255.27 L 704.87 273.82 Z M 736 240.66 L 728.65 236.46 L 728.65 228.06 C 728.65 227.68 728.46 227.32 728.15 227.11 L 714.56 218.05 C 714.2 217.82 713.74 217.8 713.36 218.02 L 706 222.26 L 698.64 218.02 C 698.29 217.81 697.85 217.81 697.5 218.02 L 683.92 225.94 C 683.57 226.15 683.35 226.52 683.35 226.92 L 683.35 236.46 L 676 240.66 C 675.64 240.86 675.43 241.24 675.43 241.64 L 675.43 242.12 C 675.43 242.14 675.43 242.16 675.43 242.19 L 675.43 254.51 C 675.43 254.53 675.43 254.56 675.43 254.58 L 675.43 256.37 C 675.43 256.79 675.67 257.19 676.05 257.38 L 682.22 260.46 L 682.22 269.95 C 682.22 270.35 682.42 270.71 682.76 270.92 L 697.48 279.98 C 697.66 280.09 697.87 280.14 698.08 280.14 C 698.27 280.14 698.46 280.09 698.64 280 L 706 275.79 L 713.37 280 C 713.72 280.2 714.17 280.19 714.52 279.98 L 729.24 270.92 C 729.58 270.71 729.78 270.35 729.78 269.95 L 729.78 260.46 L 735.95 257.38 C 736.33 257.19 736.57 256.79 736.57 256.37 L 736.57 241.64 C 736.57 241.24 736.36 240.86 736 240.66 Z" fill="#ffffff" stroke="none" pointer-events="all"/><rect x="616" y="288" width="180" height="40" fill="none" stroke="none" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 308px; margin-left: 706px;"><div data-drawio-colors="color: rgb(0, 0, 0); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; white-space: nowrap;">Amazon SageMaker Endpoint<br />(LLM for Embeddings)</div></div></div></foreignObject><text x="706" y="312" fill="rgb(0, 0, 0)" font-family="Helvetica" font-size="12px" text-anchor="middle">Amazon SageMaker Endpoint...</text></switch></g><path d="M 667 390 L 745 390 L 745 468 L 667 468 Z" fill="url(#mx-gradient-4ab29a-1-116d5b-1-s-0)" stroke="none" pointer-events="all"/><path d="M 721.89 412.62 C 721.89 413.26 721.38 413.77 720.74 413.77 C 720.11 413.77 719.59 413.26 719.59 412.62 C 719.59 411.99 720.11 411.47 720.74 411.47 C 721.38 411.47 721.89 411.99 721.89 412.62 Z M 715.06 423.91 C 715.06 423.28 715.57 422.78 716.19 422.78 C 716.82 422.78 717.33 423.28 717.33 423.91 C 717.33 424.53 716.82 425.04 716.19 425.04 C 715.57 425.04 715.06 424.53 715.06 423.91 Z M 715.06 445.42 C 715.06 444.8 715.57 444.29 716.19 444.29 C 716.82 444.29 717.33 444.8 717.33 445.42 C 717.33 446.05 716.82 446.56 716.19 446.56 C 715.57 446.56 715.06 446.05 715.06 445.42 Z M 726.39 430.7 C 726.39 431.33 725.88 431.84 725.25 431.84 C 724.63 431.84 724.12 431.33 724.12 430.7 C 724.12 430.08 724.63 429.57 725.25 429.57 C 725.88 429.57 726.39 430.08 726.39 430.7 Z M 734.31 434.41 L 728.61 431.14 C 728.62 430.99 728.65 430.85 728.65 430.7 C 728.65 428.83 727.13 427.31 725.25 427.31 C 723.38 427.31 721.86 428.83 721.86 430.7 C 721.86 432.58 723.38 434.1 725.25 434.1 C 726.16 434.1 726.98 433.74 727.58 433.16 L 733.05 436.3 L 728.14 438.75 C 727.76 438.94 727.52 439.33 727.52 439.76 L 727.52 449.32 L 713.91 457.7 L 707.14 453.83 L 707.14 446.56 L 713.01 446.56 C 713.47 447.87 714.72 448.82 716.19 448.82 C 718.07 448.82 719.59 447.3 719.59 445.42 C 719.59 443.55 718.07 442.03 716.19 442.03 C 714.72 442.03 713.47 442.98 713.01 444.29 L 707.14 444.29 L 707.14 428.44 C 707.14 428.04 706.93 427.67 706.59 427.47 L 700.92 424.07 L 699.76 426.01 L 704.87 429.08 L 704.87 432.41 L 698.08 437.64 L 698.08 432.97 C 698.08 432.62 697.92 432.3 697.65 432.08 L 692.41 427.89 L 692.41 422.25 L 698.7 418.06 C 699.02 417.85 699.21 417.49 699.21 417.11 L 699.21 410.32 L 696.94 410.32 L 696.94 416.51 L 691.29 420.27 L 685.62 416.51 L 685.62 407.57 L 691.28 404.27 L 691.28 413.72 L 693.55 413.72 L 693.55 402.95 L 698.08 400.3 L 704.87 404.22 L 704.87 418.25 C 704.87 418.65 705.09 419.03 705.44 419.23 L 712.84 423.46 C 712.82 423.61 712.8 423.75 712.8 423.91 C 712.8 425.78 714.32 427.31 716.19 427.31 C 718.07 427.31 719.59 425.78 719.59 423.91 C 719.59 422.04 718.07 420.51 716.19 420.51 C 715.29 420.51 714.48 420.87 713.87 421.44 L 707.13 417.59 L 707.13 404.22 L 713.89 400.33 L 726.39 408.66 L 726.39 411.45 L 723.95 411.45 C 723.47 410.14 722.21 409.21 720.74 409.21 C 718.86 409.21 717.33 410.74 717.33 412.62 C 717.33 414.51 718.86 416.04 720.74 416.04 C 722.24 416.04 723.52 415.06 723.98 413.72 L 726.39 413.72 L 726.39 417.11 C 726.39 417.52 726.6 417.9 726.96 418.1 L 734.31 422.3 Z M 698.1 457.7 L 694.53 455.5 L 701 450.87 L 699.68 449.03 L 692.43 454.21 L 684.49 449.32 L 684.49 440.4 L 689.6 437.34 L 688.43 435.39 L 683.3 438.47 L 677.69 435.67 L 677.69 429.14 L 683.86 426.05 L 682.85 424.03 L 677.69 426.6 L 677.69 422.3 L 684.44 418.44 L 690.15 422.23 L 690.15 427.85 L 683.86 432.02 L 685.11 433.91 L 691.23 429.85 L 695.81 433.51 L 695.81 439.38 L 690.59 443.39 L 691.97 445.19 L 704.87 435.27 L 704.87 453.82 Z M 736 420.66 L 728.65 416.46 L 728.65 408.06 C 728.65 407.68 728.46 407.32 728.15 407.11 L 714.56 398.05 C 714.2 397.82 713.74 397.8 713.36 398.02 L 706 402.26 L 698.64 398.02 C 698.29 397.81 697.85 397.81 697.5 398.02 L 683.92 405.94 C 683.57 406.15 683.35 406.52 683.35 406.92 L 683.35 416.46 L 676 420.66 C 675.64 420.86 675.43 421.24 675.43 421.64 L 675.43 422.12 C 675.43 422.14 675.43 422.16 675.43 422.19 L 675.43 434.51 C 675.43 434.53 675.43 434.56 675.43 434.58 L 675.43 436.37 C 675.43 436.79 675.67 437.19 676.05 437.38 L 682.22 440.46 L 682.22 449.95 C 682.22 450.35 682.42 450.71 682.76 450.92 L 697.48 459.98 C 697.66 460.09 697.87 460.14 698.08 460.14 C 698.27 460.14 698.46 460.09 698.64 460 L 706 455.79 L 713.37 460 C 713.72 460.2 714.17 460.19 714.52 459.98 L 729.24 450.92 C 729.58 450.71 729.78 450.35 729.78 449.95 L 729.78 440.46 L 735.95 437.38 C 736.33 437.19 736.57 436.79 736.57 436.37 L 736.57 421.64 C 736.57 421.24 736.36 420.86 736 420.66 Z" fill="#ffffff" stroke="none" pointer-events="all"/><rect x="596" y="468" width="220" height="40" fill="none" stroke="none" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 488px; margin-left: 706px;"><div data-drawio-colors="color: rgb(0, 0, 0); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; white-space: nowrap;">Amazon SageMaker Processing Job<br />(Documents to embeddings)</div></div></div></foreignObject><text x="706" y="492" fill="rgb(0, 0, 0)" font-family="Helvetica" font-size="12px" text-anchor="middle">Amazon SageMaker Processing Job...</text></switch></g><path d="M 876 390 L 954 390 L 954 468 L 876 468 Z" fill="url(#mx-gradient-60a337-1-277116-1-s-0)" stroke="none" pointer-events="all"/><path d="M 938.28 432.22 L 938.71 429.21 C 942.66 431.57 942.71 432.55 942.71 432.58 C 942.7 432.58 942.03 433.14 938.28 432.22 Z M 936.12 431.62 C 929.3 429.56 919.8 425.2 915.96 423.39 C 915.96 423.37 915.96 423.36 915.96 423.34 C 915.96 421.86 914.76 420.66 913.28 420.66 C 911.81 420.66 910.61 421.86 910.61 423.34 C 910.61 424.82 911.81 426.02 913.28 426.02 C 913.93 426.02 914.52 425.78 914.98 425.39 C 919.51 427.53 928.93 431.82 935.8 433.85 L 933.08 453.03 C 933.07 453.08 933.07 453.13 933.07 453.18 C 933.07 454.87 925.6 457.97 913.39 457.97 C 901.05 457.97 893.49 454.87 893.49 453.18 C 893.49 453.13 893.49 453.08 893.48 453.03 L 887.81 411.57 C 892.72 414.95 903.29 416.74 913.39 416.74 C 923.48 416.74 934.03 414.96 938.96 411.59 Z M 887.21 407.25 C 887.29 405.78 895.71 400.03 913.39 400.03 C 931.07 400.03 939.5 405.78 939.58 407.25 L 939.58 407.75 C 938.61 411.04 927.69 414.51 913.39 414.51 C 899.07 414.51 888.15 411.02 887.21 407.73 Z M 941.81 407.27 C 941.81 403.41 930.74 397.8 913.39 397.8 C 896.05 397.8 884.98 403.41 884.98 407.27 L 885.08 408.11 L 891.27 453.27 C 891.41 458.32 904.88 460.2 913.39 460.2 C 923.94 460.2 935.15 457.77 935.3 453.27 L 937.97 434.44 C 939.45 434.8 940.68 434.98 941.66 434.98 C 942.98 434.98 943.87 434.66 944.41 434.01 C 944.85 433.49 945.02 432.85 944.89 432.16 C 944.61 430.62 942.77 428.96 939.05 426.83 L 941.69 408.16 Z" fill="#ffffff" stroke="none" pointer-events="all"/><rect x="850" y="468" width="130" height="40" fill="none" stroke="none" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 488px; margin-left: 915px;"><div data-drawio-colors="color: rgb(0, 0, 0); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; white-space: nowrap;">Amazon S3<br />(Knowledge corpus)</div></div></div></foreignObject><text x="915" y="492" fill="rgb(0, 0, 0)" font-family="Helvetica" font-size="12px" text-anchor="middle">Amazon S3...</text></switch></g><rect x="430" y="288" width="170" height="40" fill="none" stroke="none" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 308px; margin-left: 515px;"><div data-drawio-colors="color: rgb(0, 0, 0); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; white-space: nowrap;">Amazon Aurora Postgresql<br />(Vector database)</div></div></div></foreignObject><text x="515" y="312" fill="rgb(0, 0, 0)" font-family="Helvetica" font-size="12px" text-anchor="middle">Amazon Aurora Postgresql...</text></switch></g><path d="M 350.18 80 L 660.63 80" fill="none" stroke="rgb(0, 0, 0)" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 344.93 80 L 351.93 76.5 L 350.18 80 L 351.93 83.5 Z" fill="rgb(0, 0, 0)" stroke="rgb(0, 0, 0)" stroke-miterlimit="10" pointer-events="all"/><path d="M 665.88 80 L 658.88 83.5 L 660.63 80 L 658.88 76.5 Z" fill="rgb(0, 0, 0)" stroke="rgb(0, 0, 0)" stroke-miterlimit="10" pointer-events="all"/><path d="M 350.18 91 L 350 91 L 350 92 L 515 92 L 515 204.63" fill="none" stroke="rgb(0, 0, 0)" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 344.93 91 L 351.93 87.5 L 350.18 91 L 351.93 94.5 Z" fill="rgb(0, 0, 0)" stroke="rgb(0, 0, 0)" stroke-miterlimit="10" pointer-events="all"/><path d="M 515 209.88 L 511.5 202.88 L 515 204.63 L 518.5 202.88 Z" fill="rgb(0, 0, 0)" stroke="rgb(0, 0, 0)" stroke-miterlimit="10" pointer-events="all"/><path d="M 876 429 L 751.37 429" fill="none" stroke="#001dbc" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 746.12 429 L 753.12 425.5 L 751.37 429 L 753.12 432.5 Z" fill="#001dbc" stroke="#001dbc" stroke-miterlimit="10" pointer-events="all"/><path d="M 666 430 L 519.8 430.5 L 519.76 332.37" fill="none" stroke="#001dbc" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 519.76 327.12 L 523.26 334.12 L 519.76 332.37 L 516.26 334.12 Z" fill="#001dbc" stroke="#001dbc" stroke-miterlimit="10" pointer-events="all"/><path d="M 706 383.63 L 706 326.37" fill="none" stroke="#001dbc" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 706 388.88 L 702.5 381.88 L 706 383.63 L 709.5 381.88 Z" fill="#001dbc" stroke="#001dbc" stroke-miterlimit="10" pointer-events="all"/><path d="M 706 321.12 L 709.5 328.12 L 706 326.37 L 702.5 328.12 Z" fill="#001dbc" stroke="#001dbc" stroke-miterlimit="10" pointer-events="all"/><rect x="0" y="50" width="78" height="78" fill="none" stroke="none" pointer-events="all"/><path d="M 7.14 124.45 C 8.02 106.88 21.97 92.86 39 92.86 C 44.76 92.86 50.4 94.48 55.32 97.55 C 64.45 103.24 70.29 113.43 70.86 124.45 Z M 21.61 71.06 C 21.61 61.4 29.41 53.55 39 53.55 C 48.59 53.55 56.39 61.4 56.39 71.06 C 56.39 80.71 48.59 88.57 39 88.57 C 29.41 88.57 21.61 80.71 21.61 71.06 Z M 57.19 94.54 C 54.12 92.62 50.79 91.23 47.33 90.37 C 54.74 87.12 59.93 79.69 59.93 71.06 C 59.93 59.45 50.54 50 39 50 C 27.46 50 18.07 59.45 18.07 71.06 C 18.07 79.7 23.28 87.14 30.7 90.38 C 15.15 94.28 3.55 108.85 3.55 126.23 C 3.55 127.21 4.34 128 5.32 128 L 72.68 128 C 73.66 128 74.45 127.21 74.45 126.23 C 74.45 113.32 67.84 101.18 57.19 94.54 Z" fill="#232f3d" stroke="none" pointer-events="all"/><rect x="124" y="52" width="100" height="30" fill="none" stroke="none" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 67px; margin-left: 174px;"><div data-drawio-colors="color: rgb(0, 0, 0); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; white-space: nowrap;">User question</div></div></div></foreignObject><text x="174" y="71" fill="rgb(0, 0, 0)" font-family="Helvetica" font-size="12px" text-anchor="middle">User question</text></switch></g><rect x="123" y="112" width="110" height="40" fill="none" stroke="none" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 132px; margin-left: 178px;"><div data-drawio-colors="color: rgb(0, 0, 0); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; white-space: nowrap;">LLM Generated<br />response</div></div></div></foreignObject><text x="178" y="136" fill="rgb(0, 0, 0)" font-family="Helvetica" font-size="12px" text-anchor="middle">LLM Generated...</text></switch></g><path d="M 230 0 L 1000 0 L 1000 530 L 230 530 Z" fill="none" stroke="#cd2264" stroke-miterlimit="10" pointer-events="none"/><path d="M 230 0 L 230 25 L 255 25 L 255 0 L 230 0 Z M 234.09 3.69 L 250.91 3.69 C 251.01 3.69 251.12 3.73 251.19 3.81 C 251.27 3.88 251.31 3.99 251.31 4.09 L 251.31 20.91 C 251.31 21.01 251.27 21.12 251.19 21.19 C 251.12 21.27 251.01 21.31 250.91 21.31 L 234.09 21.31 C 233.99 21.31 233.88 21.27 233.81 21.19 C 233.73 21.12 233.69 21.01 233.69 20.91 L 233.69 4.09 C 233.69 3.99 233.73 3.88 233.81 3.81 C 233.88 3.73 233.99 3.69 234.09 3.69 Z M 234.49 4.49 L 234.49 20.51 L 250.51 20.51 L 250.51 4.49 L 234.49 4.49 Z M 246.5 5.31 C 246.65 5.31 246.79 5.38 246.86 5.51 L 249.46 10.72 C 249.53 10.84 249.52 10.99 249.45 11.11 C 249.37 11.23 249.25 11.3 249.11 11.3 L 243.9 11.3 L 243.9 11.3 C 243.76 11.3 243.63 11.23 243.56 11.11 C 243.49 10.99 243.48 10.84 243.54 10.72 L 246.15 5.51 C 246.21 5.38 246.36 5.31 246.5 5.31 Z M 246.5 6.59 L 244.55 10.5 L 248.46 10.5 L 246.5 6.59 Z M 236.09 9.3 L 241.3 9.3 C 241.41 9.3 241.51 9.34 241.58 9.41 C 241.66 9.49 241.7 9.59 241.7 9.7 L 241.7 14.9 C 241.7 15.01 241.66 15.11 241.58 15.19 C 241.51 15.26 241.41 15.3 241.3 15.3 L 236.09 15.3 C 235.99 15.3 235.89 15.26 235.81 15.19 C 235.74 15.11 235.69 15.01 235.69 14.9 L 235.69 9.7 C 235.69 9.59 235.74 9.49 235.81 9.41 C 235.89 9.34 235.99 9.3 236.09 9.3 Z M 236.49 10.1 L 236.49 14.5 L 240.9 14.5 L 240.9 10.1 L 236.49 10.1 Z M 245.7 13.3 C 247.47 13.3 248.9 14.74 248.91 16.5 C 248.9 18.27 247.47 19.7 245.7 19.71 C 243.94 19.7 242.5 18.27 242.5 16.5 C 242.5 14.74 243.94 13.3 245.7 13.3 Z M 245.7 14.1 C 244.38 14.1 243.3 15.18 243.3 16.5 C 243.3 17.83 244.38 18.9 245.7 18.91 C 247.03 18.9 248.1 17.83 248.1 16.5 C 248.1 15.18 247.03 14.1 245.7 14.1 Z" fill="#cd2264" stroke="none" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe flex-start; justify-content: unsafe flex-start; width: 738px; height: 1px; padding-top: 7px; margin-left: 262px;"><div data-drawio-colors="color: #CD2264; " style="box-sizing: border-box; font-size: 0px; text-align: left;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(205, 34, 100); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">AWS Account</div></div></div></foreignObject><text x="262" y="19" fill="#CD2264" font-family="Helvetica" font-size="12px">AWS Account</text></switch></g><path d="M 253 466 L 346.63 466" fill="none" stroke="rgb(0, 0, 0)" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 351.88 466 L 344.88 469.5 L 346.63 466 L 344.88 462.5 Z" fill="rgb(0, 0, 0)" stroke="rgb(0, 0, 0)" stroke-miterlimit="10" pointer-events="all"/><rect x="354" y="451" width="180" height="30" fill="none" stroke="none" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 466px; margin-left: 444px;"><div data-drawio-colors="color: rgb(0, 0, 0); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; white-space: nowrap;">Real-time flow on user query</div></div></div></foreignObject><text x="444" y="470" fill="rgb(0, 0, 0)" font-family="Helvetica" font-size="12px" text-anchor="middle">Real-time flow on user query</text></switch></g><path d="M 253 501 L 346.63 501" fill="none" stroke="#001dbc" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 351.88 501 L 344.88 504.5 L 346.63 501 L 344.88 497.5 Z" fill="#001dbc" stroke="#001dbc" stroke-miterlimit="10" pointer-events="all"/><rect x="354" y="486" width="140" height="30" fill="none" stroke="none" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 501px; margin-left: 424px;"><div data-drawio-colors="color: rgb(0, 0, 0); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; white-space: nowrap;"><font color="#3333ff">Offline data ingestion</font></div></div></div></foreignObject><text x="424" y="505" fill="rgb(0, 0, 0)" font-family="Helvetica" font-size="12px" text-anchor="middle">Offline data ingestion</text></switch></g><ellipse cx="490" cy="150" rx="10" ry="10" fill="rgb(255, 255, 255)" stroke="rgb(0, 0, 0)" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 18px; height: 1px; padding-top: 150px; margin-left: 481px;"><div data-drawio-colors="color: rgb(0, 0, 0); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: normal; overflow-wrap: normal;">2</div></div></div></foreignObject><text x="490" y="154" fill="rgb(0, 0, 0)" font-family="Helvetica" font-size="12px" text-anchor="middle" font-weight="bold">2</text></switch></g><ellipse cx="570" cy="65" rx="10" ry="10" fill="rgb(255, 255, 255)" stroke="rgb(0, 0, 0)" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 18px; height: 1px; padding-top: 65px; margin-left: 561px;"><div data-drawio-colors="color: rgb(0, 0, 0); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: normal; overflow-wrap: normal;">3</div></div></div></foreignObject><text x="570" y="69" fill="rgb(0, 0, 0)" font-family="Helvetica" font-size="12px" text-anchor="middle" font-weight="bold">3</text></switch></g><ellipse cx="820" cy="410" rx="10" ry="10" fill="#0050ef" stroke="#001dbc" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 18px; height: 1px; padding-top: 410px; margin-left: 811px;"><div data-drawio-colors="color: #ffffff; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(255, 255, 255); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">1</div></div></div></foreignObject><text x="820" y="414" fill="#ffffff" font-family="Helvetica" font-size="12px" text-anchor="middle">1</text></switch></g><ellipse cx="730" cy="360" rx="10" ry="10" fill="#0050ef" stroke="#001dbc" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 18px; height: 1px; padding-top: 360px; margin-left: 721px;"><div data-drawio-colors="color: #ffffff; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(255, 255, 255); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">2</div></div></div></foreignObject><text x="730" y="364" fill="#ffffff" font-family="Helvetica" font-size="12px" text-anchor="middle">2</text></switch></g><ellipse cx="540" cy="360" rx="10" ry="10" fill="#0050ef" stroke="#001dbc" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 18px; height: 1px; padding-top: 360px; margin-left: 531px;"><div data-drawio-colors="color: #ffffff; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(255, 255, 255); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">3</div></div></div></foreignObject><text x="540" y="364" fill="#ffffff" font-family="Helvetica" font-size="12px" text-anchor="middle">3</text></switch></g><path d="M 110 82 L 243.63 82" fill="none" stroke="rgb(0, 0, 0)" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 248.88 82 L 241.88 85.5 L 243.63 82 L 241.88 78.5 Z" fill="rgb(0, 0, 0)" stroke="rgb(0, 0, 0)" stroke-miterlimit="10" pointer-events="all"/><ellipse cx="177" cy="42" rx="10" ry="10" fill="rgb(255, 255, 255)" stroke="rgb(0, 0, 0)" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 18px; height: 1px; padding-top: 42px; margin-left: 168px;"><div data-drawio-colors="color: rgb(0, 0, 0); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: normal; overflow-wrap: normal;">1</div></div></div></foreignObject><text x="177" y="46" fill="rgb(0, 0, 0)" font-family="Helvetica" font-size="12px" text-anchor="middle" font-weight="bold">1</text></switch></g><path d="M 116.37 107 L 252 107" fill="none" stroke="rgb(0, 0, 0)" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 111.12 107 L 118.12 103.5 L 116.37 107 L 118.12 110.5 Z" fill="rgb(0, 0, 0)" stroke="rgb(0, 0, 0)" stroke-miterlimit="10" pointer-events="all"/><ellipse cx="177" cy="162" rx="10" ry="10" fill="rgb(255, 255, 255)" stroke="rgb(0, 0, 0)" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 18px; height: 1px; padding-top: 162px; margin-left: 168px;"><div data-drawio-colors="color: rgb(0, 0, 0); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: normal; overflow-wrap: normal;">4</div></div></div></foreignObject><text x="177" y="166" fill="rgb(0, 0, 0)" font-family="Helvetica" font-size="12px" text-anchor="middle" font-weight="bold">4</text></switch></g><path d="M 476 210 L 554 210 L 554 288 L 476 288 Z" fill="url(#mx-gradient-4d72f3-1-3334b9-1-s-0)" stroke="none" pointer-events="all"/><path d="M 520.48 225.38 L 517.24 225.38 L 517.24 223.21 L 520.48 223.21 L 520.48 219.95 L 522.63 219.95 L 522.63 223.21 L 525.87 223.21 L 525.87 225.38 L 522.63 225.38 L 522.63 228.64 L 520.48 228.64 Z M 533.42 237.32 L 530.19 237.32 L 530.19 235.15 L 533.42 235.15 L 533.42 231.89 L 535.58 231.89 L 535.58 235.15 L 538.82 235.15 L 538.82 237.32 L 535.58 237.32 L 535.58 240.58 L 533.42 240.58 Z M 527.8 271.63 C 525.74 266.39 520.99 261.61 515.78 259.53 C 520.99 257.46 525.74 252.68 527.8 247.44 C 529.86 252.68 534.62 257.46 539.82 259.53 C 534.62 261.61 529.86 266.39 527.8 271.63 Z M 545.12 258.45 C 537.53 258.45 528.88 249.74 528.88 242.11 C 528.88 241.51 528.4 241.02 527.8 241.02 C 527.21 241.02 526.73 241.51 526.73 242.11 C 526.73 249.74 518.07 258.45 510.49 258.45 C 509.89 258.45 509.41 258.94 509.41 259.53 C 509.41 260.14 509.89 260.62 510.49 260.62 C 518.07 260.62 526.73 269.33 526.73 276.96 C 526.73 277.56 527.21 278.05 527.8 278.05 C 528.4 278.05 528.88 277.56 528.88 276.96 C 528.88 269.33 537.53 260.62 545.12 260.62 C 545.72 260.62 546.2 260.14 546.2 259.53 C 546.2 258.94 545.72 258.45 545.12 258.45 Z M 485.96 237.06 C 489.1 239.36 495.2 240.58 501.06 240.58 C 506.92 240.58 513.02 239.36 516.16 237.06 L 516.16 247.46 C 514.61 249.54 508.89 251.59 501.28 251.59 C 492.51 251.59 485.96 248.83 485.96 246.37 Z M 501.06 228.64 C 510.42 228.64 516.16 231.48 516.16 233.52 C 516.16 235.56 510.42 238.41 501.06 238.41 C 491.7 238.41 485.96 235.56 485.96 233.52 C 485.96 231.48 491.7 228.64 501.06 228.64 Z M 516.16 269.38 C 516.16 271.88 509.7 274.68 501.05 274.68 C 492.41 274.68 485.96 271.88 485.96 269.38 L 485.96 262.44 C 489.14 264.87 495.36 266.15 501.34 266.15 C 505.49 266.15 509.51 265.56 512.64 264.49 L 511.95 262.44 C 509.03 263.43 505.26 263.98 501.34 263.98 C 492.53 263.98 485.96 261.23 485.96 258.76 L 485.96 250.05 C 489.13 252.48 495.33 253.76 501.28 253.76 C 507.65 253.76 513.12 252.44 516.16 250.35 L 516.16 253.6 L 518.32 253.6 L 518.32 233.52 C 518.32 228.94 509.43 226.46 501.06 226.46 C 493.03 226.46 484.54 228.75 483.87 232.98 L 483.8 232.98 L 483.8 269.38 C 483.8 274.23 492.69 276.85 501.05 276.85 C 509.42 276.85 518.32 274.23 518.32 269.38 L 518.32 265.55 L 516.16 265.55 Z" fill="#ffffff" stroke="none" pointer-events="all"/></g><switch><g requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility"/><a transform="translate(0,-5)" xlink:href="https://www.drawio.com/doc/faq/svg-export-text-problems" target="_blank"><text text-anchor="middle" font-size="10px" x="50%" y="100%">Text is not SVG - cannot display</text></a></switch></svg>


--------------------------------------------------------------------------------
/cdk_stacks/requirements.txt:
--------------------------------------------------------------------------------
1 | aws-cdk-lib==2.171.1
2 | constructs>=10.0.0,<11.0.0
3 | cdklabs.generative-ai-cdk-constructs==0.1.286


--------------------------------------------------------------------------------
/cdk_stacks/source.bat:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | 
 3 | rem The sole purpose of this script is to make the command
 4 | rem
 5 | rem     source .venv/bin/activate
 6 | rem
 7 | rem (which activates a Python virtualenv on Linux or Mac OS X) work on Windows.
 8 | rem On Windows, this command just runs this batch file (the argument is ignored).
 9 | rem
10 | rem Now we don't need to document a Windows command for activating a virtualenv.
11 | 
12 | echo Executing .venv\Scripts\activate.bat for you
13 | .venv\Scripts\activate.bat
14 | 


--------------------------------------------------------------------------------
/data_ingestion_to_vectordb/container/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.10.13-slim
 2 | 
 3 | # pip leaves the install caches populated which uses a
 4 | # significant amount of space. These optimizations save a fair
 5 | # amount of space in the image, which reduces start up time.
 6 | RUN pip --no-cache-dir install -U pip
 7 | RUN pip --no-cache-dir install boto3==1.33.9 \
 8 |     langchain==0.2.5 \
 9 |     langchain-community==0.2.4 \
10 |     langchain-postgres==0.0.7 \
11 |     SQLAlchemy==2.0.28 \
12 |     psycopg[binary]==3.1.19 \
13 |     pgvector==0.2.5 \
14 |     beautifulsoup4==4.12.3
15 | 
16 | 
17 | # Include python script for retrieving credentials
18 | # from AWS SecretsManager and Sagemaker helper classes
19 | ADD credentials.py /code/
20 | ADD sm_helper.py /code/
21 | 
22 | # Set some environment variables. PYTHONUNBUFFERED keeps Python from buffering our standard
23 | # output stream, which means that logs can be delivered to the user quickly. PYTHONDONTWRITEBYTECODE
24 | # keeps Python from writing the .pyc files which are unnecessary in this case. We also update
25 | # PATH so that the train and serve programs are found when the container is invoked.
26 | ENV PYTHONUNBUFFERED=TRUE
27 | ENV PYTHONDONTWRITEBYTECODE=TRUE


--------------------------------------------------------------------------------
/data_ingestion_to_vectordb/container/credentials.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Retrieve credentials password for given username from AWS SecretsManager
 3 | """
 4 | import json
 5 | import boto3
 6 | 
 7 | def get_credentials(secret_id: str, region_name: str) -> str:
 8 | 
 9 |     client = boto3.client('secretsmanager', region_name=region_name)
10 |     response = client.get_secret_value(SecretId=secret_id)
11 |     secrets_value = json.loads(response['SecretString'])
12 | 
13 |     return secrets_value


--------------------------------------------------------------------------------
/data_ingestion_to_vectordb/container/load_data_into_pgvector.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | 
  4 | # this is needed because the credentials.py and sm_helper.py
  5 | # are in /code directory of the custom container we are going
  6 | # to create for Sagemaker Processing Job
  7 | sys.path.insert(1, '/code')
  8 | 
  9 | import glob
 10 | import time
 11 | import logging
 12 | import argparse
 13 | import multiprocessing as mp
 14 | from functools import partial
 15 | 
 16 | import urllib
 17 | 
 18 | import numpy as np
 19 | 
 20 | from langchain_community.document_loaders import ReadTheDocsLoader
 21 | from langchain_postgres import PGVector
 22 | from langchain.text_splitter import RecursiveCharacterTextSplitter
 23 | 
 24 | from credentials import get_credentials
 25 | from sm_helper import create_sagemaker_embeddings_from_js_model
 26 | 
 27 | 
 28 | logger = logging.getLogger()
 29 | logging.basicConfig(format='%(asctime)s,%(module)s,%(processName)s,%(levelname)s,%(message)s', level=logging.INFO, stream=sys.stderr)
 30 | 
 31 | 
 32 | def process_shard(shard, embeddings_model_endpoint_name, aws_region, collection_name, connection_string) -> int:
 33 |     logger.info(f'Starting process_shard of {len(shard)} chunks.')
 34 |     st = time.time()
 35 | 
 36 |     embeddings = create_sagemaker_embeddings_from_js_model(embeddings_model_endpoint_name, aws_region)
 37 | 
 38 |     vectordb = PGVector.from_existing_index(
 39 |         embedding=embeddings,
 40 |         collection_name=collection_name,
 41 |         connection=connection_string)
 42 | 
 43 |     vectordb.add_documents(documents=shard)
 44 | 
 45 |     et = time.time() - st
 46 |     logger.info(f'Shard completed in {et} seconds.')
 47 |     return 0
 48 | 
 49 | 
 50 | if __name__ == "__main__":
 51 |     parser = argparse.ArgumentParser()
 52 | 
 53 |     parser.add_argument("--pgvector-secretid", type=str, default=None)
 54 |     parser.add_argument("--pgvector-collection-name", type=str, default=None)
 55 | 
 56 |     parser.add_argument("--aws-region", type=str, default="us-east-1")
 57 |     parser.add_argument("--embeddings-model-endpoint-name", type=str, default=None)
 58 |     parser.add_argument("--chunk-size-for-doc-split", type=int, default=500)
 59 |     parser.add_argument("--chunk-overlap-for-doc-split", type=int, default=30)
 60 |     parser.add_argument("--input-data-dir", type=str, default="/opt/ml/processing/input_data")
 61 |     parser.add_argument("--max-docs-per-put", type=int, default=10)
 62 |     parser.add_argument("--process-count", type=int, default=1)
 63 |     parser.add_argument("--create-index-hint-file", type=str, default="_create_index_hint")
 64 | 
 65 |     args, _ = parser.parse_known_args()
 66 |     logger.info("Received arguments {}".format(args))
 67 | 
 68 |     # list all the files
 69 |     files = glob.glob(os.path.join(args.input_data_dir, "*.*"))
 70 |     logger.info(f"there are {len(files)} files to process in the {args.input_data_dir} folder")
 71 | 
 72 |     # retrieve secret to talk to Amazon Aurora Postgresql
 73 |     secret = get_credentials(args.pgvector_secretid, args.aws_region)
 74 |     db_username = secret['username']
 75 |     db_password = urllib.parse.quote_plus(secret['password'])
 76 |     db_port = secret['port']
 77 |     db_host = secret['host']
 78 | 
 79 |     CONNECTION_STRING = PGVector.connection_string_from_db_params(
 80 |         driver = 'psycopg',
 81 |         user = db_username,
 82 |         password = db_password,
 83 |         host = db_host,
 84 |         port = db_port,
 85 |         database = ''
 86 |     )
 87 | 
 88 |     logger.info(f'input-data-dir: {args.input_data_dir}')
 89 |     loader = ReadTheDocsLoader(args.input_data_dir)
 90 |     text_splitter = RecursiveCharacterTextSplitter(
 91 |         # Set a really small chunk size, just to show.
 92 |         chunk_size=args.chunk_size_for_doc_split,
 93 |         chunk_overlap=args.chunk_overlap_for_doc_split,
 94 |         length_function=len,
 95 |     )
 96 | 
 97 |     # Stage one: read all the docs, split them into chunks.
 98 |     st = time.time()
 99 | 
100 |     logger.info('Loading documents ...')
101 |     docs = loader.load()
102 |     logger.info(f'{len(docs)} documents have been loaded')
103 | 
104 |     # add a custom metadata field, such as timestamp
105 |     for doc in docs:
106 |         doc.metadata['timestamp'] = time.time()
107 |         doc.metadata['embeddings_model'] = args.embeddings_model_endpoint_name
108 |     chunks = text_splitter.create_documents([doc.page_content for doc in docs], metadatas=[doc.metadata for doc in docs])
109 | 
110 |     et = time.time() - st
111 |     logger.info(f'Time taken: {et} seconds. {len(chunks)} chunks generated')
112 | 
113 |     db_shards = (len(chunks) // args.max_docs_per_put) + 1
114 |     print(f'Loading chunks into vector store ... using {db_shards} shards')
115 | 
116 |     st = time.time()
117 |     shards = np.array_split(chunks, db_shards)
118 | 
119 |     path = os.path.join(args.input_data_dir, args.create_index_hint_file)
120 |     if os.path.isfile(path) is True:
121 |         logger.info(f"{path} file is present, "
122 |                     f"will try to create the {args.pgvector_collection_name} collection")
123 | 
124 |         embeddings = create_sagemaker_embeddings_from_js_model(args.embeddings_model_endpoint_name, args.aws_region)
125 |         _ = PGVector(collection_name=args.pgvector_collection_name,
126 |                      connection=CONNECTION_STRING,
127 |                      embeddings=embeddings)
128 |     else:
129 |         logger.info(f"{path} file is not present, "
130 |                     f"will wait for some other node to create the {args.pgvector_collection_name} collection")
131 |         time.sleep(5)
132 | 
133 |     with mp.Pool(processes = args.process_count) as pool:
134 |         results = pool.map(partial(process_shard,
135 |                                    embeddings_model_endpoint_name=args.embeddings_model_endpoint_name,
136 |                                    aws_region=args.aws_region,
137 |                                    collection_name=args.pgvector_collection_name,
138 |                                    connection_string=CONNECTION_STRING),
139 |                            shards)
140 | 
141 |     et = time.time() - st
142 |     logger.info(f'run time in seconds: {et:.2f}')
143 |     logger.info("all done")
144 | 


--------------------------------------------------------------------------------
/data_ingestion_to_vectordb/container/sm_helper.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Helper functions for using Samgemaker Endpoint via langchain
 3 | """
 4 | import json
 5 | import logging
 6 | from typing import List
 7 | 
 8 | from langchain_community.embeddings import SagemakerEndpointEmbeddings
 9 | from langchain_community.embeddings.sagemaker_endpoint import EmbeddingsContentHandler
10 | 
11 | logger = logging.getLogger(__name__)
12 | 
13 | 
14 | def create_sagemaker_embeddings_from_js_model(embeddings_model_endpoint_name: str, aws_region: str = 'us-east-1') -> SagemakerEndpointEmbeddings:
15 | 
16 |     # class for serializing/deserializing requests/responses to/from the embeddings model
17 |     class ContentHandler(EmbeddingsContentHandler):
18 |         content_type = "application/json"
19 |         accepts = "application/json"
20 | 
21 |         def transform_input(self, prompt: str, model_kwargs={}) -> bytes:
22 |             input_str = json.dumps({"text_inputs": prompt, **model_kwargs})
23 |             return input_str.encode('utf-8')
24 | 
25 |         def transform_output(self, output: bytes) -> str:
26 |             response_json = json.loads(output.read().decode("utf-8"))
27 |             embeddings = response_json["embedding"]
28 |             if len(embeddings) == 1:
29 |                 return [embeddings[0]]
30 |             return embeddings
31 | 
32 |     # all set to create the objects for the ContentHandler and
33 |     # SagemakerEndpointEmbeddings classes
34 |     content_handler = ContentHandler()
35 | 
36 |     # note the name of the LLM Sagemaker endpoint, this is the model that we would
37 |     # be using for generating the embeddings
38 |     embeddings = SagemakerEndpointEmbeddings(
39 |         endpoint_name=embeddings_model_endpoint_name,
40 |         region_name=aws_region,
41 |         content_handler=content_handler
42 |     )
43 |     return embeddings


--------------------------------------------------------------------------------
/data_ingestion_to_vectordb/data_ingestion_to_pgvector.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "id": "93179240-9c5f-4ba6-a1c7-3a981624f794",
   6 |    "metadata": {},
   7 |    "source": [
   8 |     "# Ingest massive amounts of data to a Vector DB (Amazon Aurora Postgresql with pgvector)\n",
   9 |     "**_Use of Amazon Aurora Postgresql as a vector database for storing embeddings_**\n",
  10 |     "\n",
  11 |     "This notebook works well on `ml.t3.medium` instance with `Python3` kernel from **JupyterLab** or `Data Science 2.0` kernel from **SageMaker Studio Classic**.\n",
  12 |     "\n",
  13 |     "Here is a list of packages that are used in this notebook.\n",
  14 |     "\n",
  15 |     "```\n",
  16 |     "!pip list | grep -E -w \"sagemaker_studio_image_build|ipython-sql|langchain|psycopg|pgvector|numpy|sh\"\n",
  17 |     "-----------------------------------------------------------------------------------------------------\n",
  18 |     "ipython-sql                             0.5.0\n",
  19 |     "langchain                               0.2.5\n",
  20 |     "langchain-community                     0.2.4\n",
  21 |     "langchain-core                          0.2.43\n",
  22 |     "langchain-postgres                      0.0.7\n",
  23 |     "langchain-text-splitters                0.2.4\n",
  24 |     "numpy                                   1.26.4\n",
  25 |     "pgvector                                0.2.5\n",
  26 |     "psycopg                                 3.1.19\n",
  27 |     "psycopg-binary                          3.1.19\n",
  28 |     "psycopg-pool                            3.2.4\n",
  29 |     "sagemaker_studio_image_build            0.6.0\n",
  30 |     "sh                                      2.0.4\n",
  31 |     "```"
  32 |    ]
  33 |   },
  34 |   {
  35 |    "cell_type": "markdown",
  36 |    "id": "79aae52c-cd7a-4637-a07d-9c0131dc7d0a",
  37 |    "metadata": {},
  38 |    "source": [
  39 |     "## Step 1: Setup\n",
  40 |     "Install the required packages."
  41 |    ]
  42 |   },
  43 |   {
  44 |    "cell_type": "code",
  45 |    "execution_count": null,
  46 |    "id": "87e64f84-b7ac-427d-b5a8-cf98b430be9b",
  47 |    "metadata": {
  48 |     "tags": []
  49 |    },
  50 |    "outputs": [],
  51 |    "source": [
  52 |     "%%capture --no-stderr\n",
  53 |     "\n",
  54 |     "!pip install -U langchain==0.2.5\n",
  55 |     "!pip install -U langchain-community==0.2.4\n",
  56 |     "!pip install -U langchain-postgres==0.0.7\n",
  57 |     "!pip install -U SQLAlchemy==2.0.28\n",
  58 |     "!pip install -U pgvector==0.2.5\n",
  59 |     "!pip install -U psycopg[binary]==3.1.19\n",
  60 |     "!pip install -U ipython-sql==0.5.0\n",
  61 |     "!pip install -U sh==2.0.4\n",
  62 |     "!pip install -U sagemaker-studio-image-build==0.6.0"
  63 |    ]
  64 |   },
  65 |   {
  66 |    "cell_type": "code",
  67 |    "execution_count": null,
  68 |    "id": "d88757ba-7ae1-4efb-9c02-ab17ec22e79a",
  69 |    "metadata": {
  70 |     "tags": []
  71 |    },
  72 |    "outputs": [],
  73 |    "source": [
  74 |     "!pip list | grep -E -w \"sagemaker_studio_image_build|ipython-sql|langchain|psycopg|pgvector|numpy|sh\""
  75 |    ]
  76 |   },
  77 |   {
  78 |    "cell_type": "markdown",
  79 |    "id": "c017bc3f-e507-4f0c-b640-ea774c5ea9c8",
  80 |    "metadata": {},
  81 |    "source": [
  82 |     "## Step 2: Download the data from the web and upload to S3\n",
  83 |     "\n",
  84 |     "In this step we use `wget` to crawl a Python documentation style website data. All files other than `html`, `txt` and `md` are removed. **This data download would take a few minutes**."
  85 |    ]
  86 |   },
  87 |   {
  88 |    "cell_type": "code",
  89 |    "execution_count": null,
  90 |    "id": "5c2b8c14-0ffc-4090-adf1-c2a8a1bdebaa",
  91 |    "metadata": {
  92 |     "tags": []
  93 |    },
  94 |    "outputs": [],
  95 |    "source": [
  96 |     "WEBSITE = \"https://sagemaker.readthedocs.io/en/stable/\"\n",
  97 |     "DOMAIN = \"sagemaker.readthedocs.io\"\n",
  98 |     "DATA_DIR = \"docs\""
  99 |    ]
 100 |   },
 101 |   {
 102 |    "cell_type": "code",
 103 |    "execution_count": null,
 104 |    "id": "0eb232ee-6b62-4718-9104-345fe7978703",
 105 |    "metadata": {
 106 |     "tags": []
 107 |    },
 108 |    "outputs": [],
 109 |    "source": [
 110 |     "!python ./scripts/get_data.py --website {WEBSITE} --domain {DOMAIN} --output-dir {DATA_DIR}"
 111 |    ]
 112 |   },
 113 |   {
 114 |    "cell_type": "code",
 115 |    "execution_count": null,
 116 |    "id": "8ee1fbb8-583a-4c41-a831-715e4250ff3c",
 117 |    "metadata": {
 118 |     "tags": []
 119 |    },
 120 |    "outputs": [],
 121 |    "source": [
 122 |     "import boto3\n",
 123 |     "import sagemaker\n",
 124 |     "\n",
 125 |     "sagemaker_session = sagemaker.session.Session()\n",
 126 |     "aws_region = boto3.Session().region_name\n",
 127 |     "bucket = sagemaker_session.default_bucket()"
 128 |    ]
 129 |   },
 130 |   {
 131 |    "cell_type": "code",
 132 |    "execution_count": null,
 133 |    "id": "6c127969-4abc-4a31-8829-c00bee321a95",
 134 |    "metadata": {
 135 |     "tags": []
 136 |    },
 137 |    "outputs": [],
 138 |    "source": [
 139 |     "CREATE_OS_INDEX_HINT_FILE = \"_create_index_hint\"\n",
 140 |     "app_name = 'llm-app-rag'"
 141 |    ]
 142 |   },
 143 |   {
 144 |    "cell_type": "code",
 145 |    "execution_count": null,
 146 |    "id": "25217f27-4995-4da5-8fc4-b1b9533185b5",
 147 |    "metadata": {
 148 |     "tags": []
 149 |    },
 150 |    "outputs": [],
 151 |    "source": [
 152 |     "# create a dummy file called _create_index to provide a hint for Postgresql index creation\n",
 153 |     "# this is needed for Sagemaker Processing Job when there are multiple instance nodes\n",
 154 |     "# all running the same code for data ingestion but only one node needs to create the index\n",
 155 |     "!touch {DATA_DIR}/{CREATE_OS_INDEX_HINT_FILE}\n",
 156 |     "\n",
 157 |     "# upload this data to S3, to be used when we run the Sagemaker Processing Job\n",
 158 |     "!aws s3 cp --recursive {DATA_DIR}/ s3://{bucket}/{app_name}/{DOMAIN}"
 159 |    ]
 160 |   },
 161 |   {
 162 |    "cell_type": "markdown",
 163 |    "id": "743e8296",
 164 |    "metadata": {},
 165 |    "source": [
 166 |     "## Step 3: Setup Aurora Postgresql with pgvector"
 167 |    ]
 168 |   },
 169 |   {
 170 |    "cell_type": "code",
 171 |    "execution_count": null,
 172 |    "id": "28b236a7-b5d2-494e-a3e3-baec94adc3d4",
 173 |    "metadata": {
 174 |     "tags": []
 175 |    },
 176 |    "outputs": [],
 177 |    "source": [
 178 |     "import sys\n",
 179 |     "import logging\n",
 180 |     "\n",
 181 |     "\n",
 182 |     "logger = logging.getLogger()\n",
 183 |     "logging.basicConfig(format='%(asctime)s,%(module)s,%(processName)s,%(levelname)s,%(message)s', level=logging.INFO, stream=sys.stderr)"
 184 |    ]
 185 |   },
 186 |   {
 187 |    "cell_type": "code",
 188 |    "execution_count": null,
 189 |    "id": "0c8d9a38-ae89-44af-83db-657dd7e851d5",
 190 |    "metadata": {
 191 |     "tags": []
 192 |    },
 193 |    "outputs": [],
 194 |    "source": [
 195 |     "import json\n",
 196 |     "from typing import List\n",
 197 |     "import boto3\n",
 198 |     "\n",
 199 |     "\n",
 200 |     "def get_cfn_outputs(stack_name: str, region_name: str = 'us-east-1') -> List:\n",
 201 |     "    cfn = boto3.client('cloudformation', region_name=region_name)\n",
 202 |     "    outputs = {}\n",
 203 |     "    for output in cfn.describe_stacks(StackName=stack_name)['Stacks'][0]['Outputs']:\n",
 204 |     "        outputs[output['OutputKey']] = output['OutputValue']\n",
 205 |     "    return outputs\n",
 206 |     "\n",
 207 |     "def get_secret_name(stack_name: str, region_name: str = 'us-east-1'):\n",
 208 |     "    cf_client = boto3.client('cloudformation', region_name=region_name)\n",
 209 |     "    response = cf_client.describe_stacks(StackName=stack_name)\n",
 210 |     "    outputs = response[\"Stacks\"][0][\"Outputs\"]\n",
 211 |     "\n",
 212 |     "    secrets = [e for e in outputs if e['ExportName'] == 'VectorDBSecret'][0]\n",
 213 |     "    secret_name = secrets['OutputValue']\n",
 214 |     "    return secret_name\n",
 215 |     "\n",
 216 |     "def get_secret(secret_name: str, region_name: str = 'us-east-1'):\n",
 217 |     "    client = boto3.client('secretsmanager', region_name=region_name)\n",
 218 |     "    get_secret_value_response = client.get_secret_value(SecretId=secret_name)\n",
 219 |     "    secret = get_secret_value_response['SecretString']\n",
 220 |     "\n",
 221 |     "    return json.loads(secret)\n",
 222 |     "\n",
 223 |     "def get_db_subnet_ids(stack_name: str, region_name: str = 'us-east-1'):\n",
 224 |     "    cfn_outputs = get_cfn_outputs(stack_name, region_name)\n",
 225 |     "    db_cluster_id = cfn_outputs['DBClusterId']\n",
 226 |     "\n",
 227 |     "    rds_client = boto3.client('rds', region_name=region_name)\n",
 228 |     "    db_cluster_info = rds_client.describe_db_clusters(DBClusterIdentifier=db_cluster_id)\n",
 229 |     "    db_subnet_group_name = db_cluster_info['DBClusters'][0]['DBSubnetGroup']\n",
 230 |     "    db_subnet_info = rds_client.describe_db_subnet_groups(DBSubnetGroupName=db_subnet_group_name)\n",
 231 |     "    db_subnet_ids = [e['SubnetIdentifier'] for e in db_subnet_info['DBSubnetGroups'][0]['Subnets']]\n",
 232 |     "\n",
 233 |     "    return db_subnet_ids"
 234 |    ]
 235 |   },
 236 |   {
 237 |    "cell_type": "markdown",
 238 |    "id": "41d74214",
 239 |    "metadata": {},
 240 |    "source": [
 241 |     "##### Create the pgvector extension on your Aurora PostgreSQL database (DB) cluster\n",
 242 |     "\n",
 243 |     "[pgvector](https://github.com/pgvector/pgvector) is an open-source extension for PostgreSQL that adds the ability to store and search over ML-generated vector embeddings. pgvector provides different capabilities that let you identify both exact and approximate nearest neighbors. It’s designed to work seamlessly with other PostgreSQL features, including indexing and querying. Using ChatGPT and other LLM tooling often requires storing the output of these systems, i.e., vector embeddings, in a permanent storage system for retrieval at a later time."
 244 |    ]
 245 |   },
 246 |   {
 247 |    "cell_type": "code",
 248 |    "execution_count": null,
 249 |    "id": "40e73e00",
 250 |    "metadata": {},
 251 |    "outputs": [],
 252 |    "source": [
 253 |     "%config SqlMagic.style = '_DEPRECATED_DEFAULT' # Ensure that the SqlMagic style is compatible with the previous version"
 254 |    ]
 255 |   },
 256 |   {
 257 |    "cell_type": "code",
 258 |    "execution_count": null,
 259 |    "id": "387c9ff5",
 260 |    "metadata": {},
 261 |    "outputs": [],
 262 |    "source": [
 263 |     "%load_ext sql"
 264 |    ]
 265 |   },
 266 |   {
 267 |    "cell_type": "code",
 268 |    "execution_count": null,
 269 |    "id": "71514284",
 270 |    "metadata": {},
 271 |    "outputs": [],
 272 |    "source": [
 273 |     "import urllib\n",
 274 |     "\n",
 275 |     "CFN_STACK_NAME = \"RAGPgVectorStack\" # name of CloudFormation stack\n",
 276 |     "\n",
 277 |     "secret_name = get_secret_name(CFN_STACK_NAME)\n",
 278 |     "secret = get_secret(secret_name)\n",
 279 |     "\n",
 280 |     "db_username = secret['username']\n",
 281 |     "db_password = urllib.parse.quote_plus(secret['password'])\n",
 282 |     "db_port = secret['port']\n",
 283 |     "db_host = secret['host']\n",
 284 |     "\n",
 285 |     "driver = 'psycopg'\n",
 286 |     "\n",
 287 |     "connection_string = f\"postgresql+{driver}://{db_username}:{db_password}@{db_host}:{db_port}/\"\n",
 288 |     "connection_string"
 289 |    ]
 290 |   },
 291 |   {
 292 |    "cell_type": "code",
 293 |    "execution_count": null,
 294 |    "id": "ab4e9407",
 295 |    "metadata": {},
 296 |    "outputs": [],
 297 |    "source": [
 298 |     "%sql $connection_string"
 299 |    ]
 300 |   },
 301 |   {
 302 |    "cell_type": "code",
 303 |    "execution_count": null,
 304 |    "id": "ed914bc2",
 305 |    "metadata": {},
 306 |    "outputs": [],
 307 |    "source": [
 308 |     "%%sql\n",
 309 |     "\n",
 310 |     "CREATE EXTENSION IF NOT EXISTS vector;"
 311 |    ]
 312 |   },
 313 |   {
 314 |    "cell_type": "code",
 315 |    "execution_count": null,
 316 |    "id": "ca3e0253",
 317 |    "metadata": {},
 318 |    "outputs": [],
 319 |    "source": [
 320 |     "%%sql\n",
 321 |     "\n",
 322 |     "SELECT typname\n",
 323 |     "FROM pg_type\n",
 324 |     "WHERE typname = 'vector';"
 325 |    ]
 326 |   },
 327 |   {
 328 |    "cell_type": "markdown",
 329 |    "id": "b62a4e9f",
 330 |    "metadata": {},
 331 |    "source": [
 332 |     "## Step 4: Load data into Aurora Postgresql with pgvector\n",
 333 |     "\n",
 334 |     "- Option 1) Parallel loading data with SageMaker Processing Job\n",
 335 |     "- Option 2) Sequential loading data with Document Loader"
 336 |    ]
 337 |   },
 338 |   {
 339 |    "cell_type": "markdown",
 340 |    "id": "2a04dfb8",
 341 |    "metadata": {},
 342 |    "source": [
 343 |     "### Option 1) Parallel loading data with SageMaker Processing Job\n",
 344 |     "\n",
 345 |     "We now have a working script that is able to ingest data into an Aurora Postgresql. But for this to work for massive amounts of data we need to scale up the processing by running this code in a distributed fashion. We will do this using Sagemkaer Processing Job. This involves the following steps:\n",
 346 |     "\n",
 347 |     "1. Create a custom container in which we will install the `langchain`, `psycopg2` and `pgvector` packges and then upload this container image to Amazon Elastic Container Registry (ECR).\n",
 348 |     "2. Use the Sagemaker `ScriptProcessor` class to create a Sagemaker Processing job that will run on multiple nodes.\n",
 349 |     "    - The data files available in S3 are automatically distributed across in the Sagemaker Processing Job instances by setting `s3_data_distribution_type='ShardedByS3Key'` as part of the `ProcessingInput` provided to the processing job.\n",
 350 |     "    - Each node processes a subset of the files and this brings down the overall time required to ingest the data into the Aurora Postgresql.\n",
 351 |     "    - Each node also uses Python `multiprocessing` to internally also parallelize the file processing. Thus, **there are two levels of parallelization happening, one at the cluster level where individual nodes are distributing the work (files) amongst themselves and another at the node level where the files in a node are also split between multiple processes running on the node**."
 352 |    ]
 353 |   },
 354 |   {
 355 |    "cell_type": "markdown",
 356 |    "id": "5f48c660",
 357 |    "metadata": {},
 358 |    "source": [
 359 |     "### Create custom container\n",
 360 |     "\n",
 361 |     "We will now create a container locally and push the container image to ECR. **The container creation process takes about 1 minute**.\n",
 362 |     "\n",
 363 |     "1. The container include all the Python packages we need i.e. `langchain`, `psycopg2`, `pgvector`, `sagemaker` and `beautifulsoup4`.\n",
 364 |     "2. The container also includes the `credentials.py` script for retrieving credentials from Secrets Manager and `sm_helper.py` for helping to create SageMaker endpoint classes that langchain uses."
 365 |    ]
 366 |   },
 367 |   {
 368 |    "cell_type": "code",
 369 |    "execution_count": null,
 370 |    "id": "372cd2a4",
 371 |    "metadata": {},
 372 |    "outputs": [],
 373 |    "source": [
 374 |     "DOCKER_IMAGE = \"load-data-pgvector-custom\"\n",
 375 |     "DOCKER_IMAGE_TAG = \"latest\""
 376 |    ]
 377 |   },
 378 |   {
 379 |    "cell_type": "code",
 380 |    "execution_count": null,
 381 |    "id": "c80dd828",
 382 |    "metadata": {},
 383 |    "outputs": [],
 384 |    "source": [
 385 |     "!cd ./container && sm-docker build . --repository {DOCKER_IMAGE}:{DOCKER_IMAGE_TAG}"
 386 |    ]
 387 |   },
 388 |   {
 389 |    "cell_type": "markdown",
 390 |    "id": "eab03176",
 391 |    "metadata": {},
 392 |    "source": [
 393 |     "### Create and run the Sagemaker Processing Job\n",
 394 |     "\n",
 395 |     "Now we will run the Sagemaker Processing Job to ingest the data into Aurora Postgresql."
 396 |    ]
 397 |   },
 398 |   {
 399 |    "cell_type": "markdown",
 400 |    "id": "e52cfadf",
 401 |    "metadata": {},
 402 |    "source": [
 403 |     "##### Load the embeddings and LLM into Aurora PostgreSQL DB cluster"
 404 |    ]
 405 |   },
 406 |   {
 407 |    "cell_type": "code",
 408 |    "execution_count": null,
 409 |    "id": "477c8f74-faa9-4c4a-a6b6-de1b5699f955",
 410 |    "metadata": {
 411 |     "tags": []
 412 |    },
 413 |    "outputs": [],
 414 |    "source": [
 415 |     "CFN_STACK_NAME = 'EmbeddingEndpointStack'\n",
 416 |     "\n",
 417 |     "cfn_stack_outputs = get_cfn_outputs(CFN_STACK_NAME, aws_region)\n",
 418 |     "embeddings_model_endpoint_name = cfn_stack_outputs['EmbeddingEndpointName']"
 419 |    ]
 420 |   },
 421 |   {
 422 |    "cell_type": "code",
 423 |    "execution_count": null,
 424 |    "id": "29861415-9ad9-44bc-b2f2-ca8bf61dd40c",
 425 |    "metadata": {
 426 |     "tags": []
 427 |    },
 428 |    "outputs": [],
 429 |    "source": [
 430 |     "CFN_STACK_NAME = \"RAGPgVectorStack\"\n",
 431 |     "\n",
 432 |     "pgvector_secret_id = get_secret_name(CFN_STACK_NAME, aws_region)\n",
 433 |     "pgvector_collection_name = 'llm_rag_embeddings'"
 434 |    ]
 435 |   },
 436 |   {
 437 |    "cell_type": "code",
 438 |    "execution_count": null,
 439 |    "id": "1749f0fa-2df2-4289-9033-f429aac6e2f6",
 440 |    "metadata": {
 441 |     "tags": []
 442 |    },
 443 |    "outputs": [],
 444 |    "source": [
 445 |     "account_id = boto3.client(\"sts\").get_caller_identity()[\"Account\"]\n",
 446 |     "aws_role = sagemaker_session.get_caller_identity_arn()"
 447 |    ]
 448 |   },
 449 |   {
 450 |    "cell_type": "code",
 451 |    "execution_count": null,
 452 |    "id": "15d12e82-239c-4844-9e2c-73bd25c49167",
 453 |    "metadata": {
 454 |     "tags": []
 455 |    },
 456 |    "outputs": [],
 457 |    "source": [
 458 |     "CHUNK_SIZE_FOR_DOC_SPLIT = 500\n",
 459 |     "CHUNK_OVERLAP_FOR_DOC_SPLIT = 20"
 460 |    ]
 461 |   },
 462 |   {
 463 |    "cell_type": "code",
 464 |    "execution_count": null,
 465 |    "id": "64826d30",
 466 |    "metadata": {},
 467 |    "outputs": [],
 468 |    "source": [
 469 |     "db_subnet_ids = get_db_subnet_ids('RAGPgVectorStack', aws_region)\n",
 470 |     "db_client_security_group_id = get_cfn_outputs('RAGPgVectorStack', aws_region)['DBClientSecurityGroupId']\n",
 471 |     "sagemaker_domain_security_group_id = get_cfn_outputs('RAGSageMakerStudioStack', aws_region)['DomainSecurityGroupId']"
 472 |    ]
 473 |   },
 474 |   {
 475 |    "cell_type": "code",
 476 |    "execution_count": null,
 477 |    "id": "87843222",
 478 |    "metadata": {},
 479 |    "outputs": [],
 480 |    "source": [
 481 |     "from sagemaker.network import NetworkConfig\n",
 482 |     "\n",
 483 |     "\n",
 484 |     "# For more information, see https://docs.aws.amazon.com/sagemaker/latest/dg/process-vpc.html\n",
 485 |     "network_config = NetworkConfig(security_group_ids=[sagemaker_domain_security_group_id,\n",
 486 |     "                                                   db_client_security_group_id],\n",
 487 |     "                               subnets=db_subnet_ids)"
 488 |    ]
 489 |   },
 490 |   {
 491 |    "cell_type": "code",
 492 |    "execution_count": null,
 493 |    "id": "afb7373c-e80f-4d1a-a8dc-0dc79fb28e8a",
 494 |    "metadata": {},
 495 |    "outputs": [],
 496 |    "source": [
 497 |     "import time\n",
 498 |     "\n",
 499 |     "from sagemaker.processing import (\n",
 500 |     "    ProcessingInput,\n",
 501 |     "    ScriptProcessor\n",
 502 |     ")\n",
 503 |     "\n",
 504 |     "# setup the parameters for the job\n",
 505 |     "base_job_name = f\"{app_name}-job\"\n",
 506 |     "tags = [{\"Key\": \"data\", \"Value\": \"embeddings-for-llm-apps\"}]\n",
 507 |     "\n",
 508 |     "# use the custom container we just created\n",
 509 |     "image_uri = f\"{account_id}.dkr.ecr.{aws_region}.amazonaws.com/{DOCKER_IMAGE}:{DOCKER_IMAGE_TAG}\"\n",
 510 |     "\n",
 511 |     "# instance type and count determined via trial and error: how much overall processing time\n",
 512 |     "# and what compute cost works best for your use-case\n",
 513 |     "instance_type = \"ml.m5.xlarge\"\n",
 514 |     "instance_count = 3\n",
 515 |     "logger.info(f\"base_job_name={base_job_name}, tags={tags}, image_uri={image_uri}, instance_type={instance_type}, instance_count={instance_count}\")\n",
 516 |     "\n",
 517 |     "# setup the ScriptProcessor with the above parameters\n",
 518 |     "processor = ScriptProcessor(base_job_name=base_job_name,\n",
 519 |     "                            image_uri=image_uri,\n",
 520 |     "                            role=aws_role,\n",
 521 |     "                            instance_type=instance_type,\n",
 522 |     "                            instance_count=instance_count,\n",
 523 |     "                            command=[\"python3\"],\n",
 524 |     "                            tags=tags,\n",
 525 |     "                            network_config=network_config)\n",
 526 |     "\n",
 527 |     "# setup input from S3, note the ShardedByS3Key, this ensures that\n",
 528 |     "# each instance gets a random and equal subset of the files in S3.\n",
 529 |     "inputs = [ProcessingInput(source=f\"s3://{bucket}/{app_name}/{DOMAIN}\",\n",
 530 |     "                          destination='/opt/ml/processing/input_data',\n",
 531 |     "                          s3_data_distribution_type='ShardedByS3Key',\n",
 532 |     "                          s3_data_type='S3Prefix')]\n",
 533 |     "\n",
 534 |     "\n",
 535 |     "logger.info(f\"creating an pgvector collection with name={pgvector_collection_name}\")\n",
 536 |     "\n",
 537 |     "# ready to run the processing job\n",
 538 |     "st = time.time()\n",
 539 |     "processor.run(code=\"container/load_data_into_pgvector.py\",\n",
 540 |     "              inputs=inputs,\n",
 541 |     "              outputs=[],\n",
 542 |     "              arguments=[\"--pgvector-secretid\", pgvector_secret_id,\n",
 543 |     "                         \"--pgvector-collection-name\", pgvector_collection_name,\n",
 544 |     "                         \"--aws-region\", aws_region,\n",
 545 |     "                         \"--embeddings-model-endpoint-name\", embeddings_model_endpoint_name,\n",
 546 |     "                         \"--chunk-size-for-doc-split\", str(CHUNK_SIZE_FOR_DOC_SPLIT),\n",
 547 |     "                         \"--chunk-overlap-for-doc-split\", str(CHUNK_OVERLAP_FOR_DOC_SPLIT),\n",
 548 |     "                         \"--input-data-dir\", \"/opt/ml/processing/input_data\",\n",
 549 |     "                         \"--create-index-hint-file\", CREATE_OS_INDEX_HINT_FILE,\n",
 550 |     "                         \"--process-count\", \"2\"])\n",
 551 |     "\n",
 552 |     "time_taken = time.time() - st\n",
 553 |     "logger.info(f\"processing job completed, total time taken={time_taken}s\")\n",
 554 |     "\n",
 555 |     "preprocessing_job_description = processor.jobs[-1].describe()\n",
 556 |     "logger.info(preprocessing_job_description)"
 557 |    ]
 558 |   },
 559 |   {
 560 |    "cell_type": "markdown",
 561 |    "id": "319bb2e5",
 562 |    "metadata": {},
 563 |    "source": [
 564 |     "### Option 2) Sequential loading data with Document Loader"
 565 |    ]
 566 |   },
 567 |   {
 568 |    "cell_type": "code",
 569 |    "execution_count": null,
 570 |    "id": "79edc1b8",
 571 |    "metadata": {},
 572 |    "outputs": [],
 573 |    "source": [
 574 |     "%%capture --no-stderr\n",
 575 |     "\n",
 576 |     "!pip install -U beautifulsoup4==4.12.3"
 577 |    ]
 578 |   },
 579 |   {
 580 |    "cell_type": "code",
 581 |    "execution_count": null,
 582 |    "id": "fd657ba8",
 583 |    "metadata": {},
 584 |    "outputs": [],
 585 |    "source": [
 586 |     "from langchain_community.document_loaders import ReadTheDocsLoader\n",
 587 |     "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
 588 |     "\n",
 589 |     "\n",
 590 |     "loader = ReadTheDocsLoader(DATA_DIR)\n",
 591 |     "text_splitter = RecursiveCharacterTextSplitter(\n",
 592 |     "    chunk_size=CHUNK_SIZE_FOR_DOC_SPLIT,\n",
 593 |     "    chunk_overlap=CHUNK_OVERLAP_FOR_DOC_SPLIT,\n",
 594 |     "    length_function=len,\n",
 595 |     ")\n",
 596 |     "\n",
 597 |     "docs = loader.load()\n",
 598 |     "\n",
 599 |     "# add a custom metadata field, such as timestamp\n",
 600 |     "for doc in docs:\n",
 601 |     "    doc.metadata['timestamp'] = time.time()\n",
 602 |     "    doc.metadata['embeddings_model'] = embeddings_model_endpoint_name"
 603 |    ]
 604 |   },
 605 |   {
 606 |    "cell_type": "code",
 607 |    "execution_count": null,
 608 |    "id": "5a7281c8",
 609 |    "metadata": {},
 610 |    "outputs": [],
 611 |    "source": [
 612 |     "chunks = text_splitter.create_documents(\n",
 613 |     "    [doc.page_content for doc in docs],\n",
 614 |     "    metadatas=[doc.metadata for doc in docs]\n",
 615 |     ")"
 616 |    ]
 617 |   },
 618 |   {
 619 |    "cell_type": "code",
 620 |    "execution_count": null,
 621 |    "id": "4478ff35",
 622 |    "metadata": {},
 623 |    "outputs": [],
 624 |    "source": [
 625 |     "import numpy as np\n",
 626 |     "\n",
 627 |     "\n",
 628 |     "MAX_DOCS_PER_PUT = 10\n",
 629 |     "\n",
 630 |     "db_shards = (len(chunks) // MAX_DOCS_PER_PUT) + 1\n",
 631 |     "shards = np.array_split(chunks, db_shards)\n",
 632 |     "print(f'Loading chunks into vector store ... using {len(db_shards)} shards')"
 633 |    ]
 634 |   },
 635 |   {
 636 |    "cell_type": "code",
 637 |    "execution_count": null,
 638 |    "id": "a8382f10",
 639 |    "metadata": {},
 640 |    "outputs": [],
 641 |    "source": [
 642 |     "import urllib\n",
 643 |     "from langchain_postgres import PGVector\n",
 644 |     "from container.credentials import get_credentials\n",
 645 |     "\n",
 646 |     "\n",
 647 |     "secret = get_credentials(pgvector_secret_id, aws_region)\n",
 648 |     "db_username = secret['username']\n",
 649 |     "db_password = urllib.parse.quote_plus(secret['password'])\n",
 650 |     "db_port = secret['port']\n",
 651 |     "db_host = secret['host']\n",
 652 |     "\n",
 653 |     "CONNECTION_STRING = PGVector.connection_string_from_db_params(\n",
 654 |     "    driver='psycopg',\n",
 655 |     "    user=db_username,\n",
 656 |     "    password=db_password,\n",
 657 |     "    host=db_host,\n",
 658 |     "    port=db_port,\n",
 659 |     "    database=''\n",
 660 |     ")"
 661 |    ]
 662 |   },
 663 |   {
 664 |    "cell_type": "code",
 665 |    "execution_count": null,
 666 |    "id": "de7cf804",
 667 |    "metadata": {},
 668 |    "outputs": [],
 669 |    "source": [
 670 |     "from container.sm_helper import create_sagemaker_embeddings_from_js_model\n",
 671 |     "\n",
 672 |     "\n",
 673 |     "embeddings = create_sagemaker_embeddings_from_js_model(\n",
 674 |     "  embeddings_model_endpoint_name,\n",
 675 |     "  aws_region\n",
 676 |     ")\n",
 677 |     "\n",
 678 |     "vectordb = PGVector(\n",
 679 |     "    collection_name=pgvector_collection_name,\n",
 680 |     "    connection=CONNECTION_STRING,\n",
 681 |     "    embeddings=embeddings\n",
 682 |     ")"
 683 |    ]
 684 |   },
 685 |   {
 686 |    "cell_type": "code",
 687 |    "execution_count": null,
 688 |    "id": "33716979",
 689 |    "metadata": {},
 690 |    "outputs": [],
 691 |    "source": [
 692 |     "%%time\n",
 693 |     "import time\n",
 694 |     "\n",
 695 |     "\n",
 696 |     "for i, shard in enumerate(shards):\n",
 697 |     "    vectordb.add_documents(documents=shard)\n",
 698 |     "    print(f\"[{i}] shard is added.\")\n",
 699 |     "    time.sleep(0.3)"
 700 |    ]
 701 |   },
 702 |   {
 703 |    "cell_type": "markdown",
 704 |    "id": "1e444161-262e-44e5-ad31-e490a763be4e",
 705 |    "metadata": {},
 706 |    "source": [
 707 |     "## Step 5: Do a similarity search for user input to documents (embeddings) in Aurora Postgresql "
 708 |    ]
 709 |   },
 710 |   {
 711 |    "cell_type": "code",
 712 |    "execution_count": null,
 713 |    "id": "294a7292-8bdb-4d11-a23d-130a4a039cd2",
 714 |    "metadata": {
 715 |     "tags": []
 716 |    },
 717 |    "outputs": [],
 718 |    "source": [
 719 |     "import urllib\n",
 720 |     "\n",
 721 |     "from langchain_postgres import PGVector\n",
 722 |     "\n",
 723 |     "from container.credentials import get_credentials\n",
 724 |     "from container.sm_helper import create_sagemaker_embeddings_from_js_model\n",
 725 |     "\n",
 726 |     "\n",
 727 |     "secret = get_credentials(pgvector_secret_id, aws_region)\n",
 728 |     "\n",
 729 |     "db_username = secret['username']\n",
 730 |     "db_password = urllib.parse.quote_plus(secret['password'])\n",
 731 |     "db_port = secret['port']\n",
 732 |     "db_host = secret['host']\n",
 733 |     "\n",
 734 |     "connection_string = PGVector.connection_string_from_db_params(\n",
 735 |     "    driver='psycopg',\n",
 736 |     "    user=db_username,\n",
 737 |     "    password=db_password,\n",
 738 |     "    host=db_host,\n",
 739 |     "    port=db_port,\n",
 740 |     "    database=''\n",
 741 |     ")\n",
 742 |     "\n",
 743 |     "docsearch = PGVector.from_existing_index(\n",
 744 |     "                                   embedding=create_sagemaker_embeddings_from_js_model(embeddings_model_endpoint_name,\n",
 745 |     "                                                                                       aws_region),\n",
 746 |     "                                   collection_name=pgvector_collection_name,\n",
 747 |     "                                   connection=connection_string)\n",
 748 |     "\n",
 749 |     "q = \"Which XGBoost versions does SageMaker support?\"\n",
 750 |     "docs = docsearch.similarity_search(q, k=3)\n",
 751 |     "for doc in docs:\n",
 752 |     "    logger.info(\"----------\")\n",
 753 |     "    logger.info(f\"content=\\\"{doc.page_content}\\\",\\nmetadata=\\\"{doc.metadata}\\\"\")"
 754 |    ]
 755 |   },
 756 |   {
 757 |    "cell_type": "markdown",
 758 |    "id": "6e29eae5-c463-4153-9167-e4628c74d13c",
 759 |    "metadata": {
 760 |     "tags": []
 761 |    },
 762 |    "source": [
 763 |     "## Cleanup\n",
 764 |     "\n",
 765 |     "To avoid incurring future charges, delete the resources. You can do this by deleting the CloudFormation template used to create the IAM role and SageMaker notebook."
 766 |    ]
 767 |   },
 768 |   {
 769 |    "cell_type": "markdown",
 770 |    "id": "59ce3fe8-bb71-4e22-a551-2475eb2d16b7",
 771 |    "metadata": {},
 772 |    "source": [
 773 |     "---\n",
 774 |     "\n",
 775 |     "## Conclusion\n",
 776 |     "In this notebook we were able to see how to use LLMs deployed on a SageMaker Endpoint to generate embeddings and then ingest those embeddings into Aurora Postgresql and finally do a similarity search for user input to the documents (embeddings) stored in Aurora Postgresql. We used langchain as an abstraction layer to talk to both the SageMaker Endpoint as well as Aurora Postgresql."
 777 |    ]
 778 |   },
 779 |   {
 780 |    "cell_type": "markdown",
 781 |    "id": "53386268-0cf9-4a37-b3d0-711fba1e5585",
 782 |    "metadata": {},
 783 |    "source": [
 784 |     "---\n",
 785 |     "\n",
 786 |     "## Appendix"
 787 |    ]
 788 |   },
 789 |   {
 790 |    "cell_type": "code",
 791 |    "execution_count": null,
 792 |    "id": "7332274c-586d-4cf9-838f-ea7e0cbe6c0f",
 793 |    "metadata": {
 794 |     "tags": []
 795 |    },
 796 |    "outputs": [],
 797 |    "source": [
 798 |     "from container.sm_helper import create_sagemaker_embeddings_from_js_model\n",
 799 |     "\n",
 800 |     "CFN_STACK_NAME = 'EmbeddingEndpointStack'\n",
 801 |     "cfn_stack_outputs = get_cfn_outputs(CFN_STACK_NAME, aws_region)\n",
 802 |     "embeddings_model_endpoint_name = cfn_stack_outputs['EmbeddingEndpointName']\n",
 803 |     "\n",
 804 |     "embeddings = create_sagemaker_embeddings_from_js_model(embeddings_model_endpoint_name, aws_region)\n",
 805 |     "\n",
 806 |     "text = \"This is a sample query.\"\n",
 807 |     "query_result = embeddings.embed_query(text)\n",
 808 |     "\n",
 809 |     "print(query_result)\n",
 810 |     "print(f\"length: {len(query_result)}\")"
 811 |    ]
 812 |   },
 813 |   {
 814 |    "cell_type": "markdown",
 815 |    "id": "dd881bab",
 816 |    "metadata": {},
 817 |    "source": [
 818 |     "## References\n",
 819 |     "\n",
 820 |     "  * [Leverage pgvector and Amazon Aurora PostgreSQL for Natural Language Processing, Chatbots and Sentiment Analysis](https://aws.amazon.com/blogs/database/leverage-pgvector-and-amazon-aurora-postgresql-for-natural-language-processing-chatbots-and-sentiment-analysis/)\n",
 821 |     "  * [Building AI-powered search in PostgreSQL using Amazon SageMaker and pgvector](https://aws.amazon.com/blogs/database/building-ai-powered-search-in-postgresql-using-amazon-sagemaker-and-pgvector/)\n",
 822 |     "  * [Using the Amazon SageMaker Studio Image Build CLI to build container images from your Studio notebooks](https://aws.amazon.com/blogs/machine-learning/using-the-amazon-sagemaker-studio-image-build-cli-to-build-container-images-from-your-studio-notebooks/)\n",
 823 |     "  * [Give SageMaker Processing Jobs Access to Resources in Your Amazon VPC](https://docs.aws.amazon.com/sagemaker/latest/dg/process-vpc.html)\n",
 824 |     "    * **Configure the VPC Security Group**\n",
 825 |     "      * In distributed processing, you must allow communication between the different containers in the same processing job. To do that, configure a rule for your security group that allows inbound connections between members of the same security group.\n",
 826 |     "  * [How can I troubleshoot the InternalServerError response on Amazon SageMaker? - AWS re:Post](https://repost.aws/knowledge-center/sagemaker-http-500-internal-server-error)\n",
 827 |     "  * [LangChain](https://python.langchain.com/docs/get_started/introduction.html) - A framework for developing applications powered by language models."
 828 |    ]
 829 |   }
 830 |  ],
 831 |  "metadata": {
 832 |   "availableInstances": [
 833 |    {
 834 |     "_defaultOrder": 0,
 835 |     "_isFastLaunch": true,
 836 |     "category": "General purpose",
 837 |     "gpuNum": 0,
 838 |     "hideHardwareSpecs": false,
 839 |     "memoryGiB": 4,
 840 |     "name": "ml.t3.medium",
 841 |     "vcpuNum": 2
 842 |    },
 843 |    {
 844 |     "_defaultOrder": 1,
 845 |     "_isFastLaunch": false,
 846 |     "category": "General purpose",
 847 |     "gpuNum": 0,
 848 |     "hideHardwareSpecs": false,
 849 |     "memoryGiB": 8,
 850 |     "name": "ml.t3.large",
 851 |     "vcpuNum": 2
 852 |    },
 853 |    {
 854 |     "_defaultOrder": 2,
 855 |     "_isFastLaunch": false,
 856 |     "category": "General purpose",
 857 |     "gpuNum": 0,
 858 |     "hideHardwareSpecs": false,
 859 |     "memoryGiB": 16,
 860 |     "name": "ml.t3.xlarge",
 861 |     "vcpuNum": 4
 862 |    },
 863 |    {
 864 |     "_defaultOrder": 3,
 865 |     "_isFastLaunch": false,
 866 |     "category": "General purpose",
 867 |     "gpuNum": 0,
 868 |     "hideHardwareSpecs": false,
 869 |     "memoryGiB": 32,
 870 |     "name": "ml.t3.2xlarge",
 871 |     "vcpuNum": 8
 872 |    },
 873 |    {
 874 |     "_defaultOrder": 4,
 875 |     "_isFastLaunch": true,
 876 |     "category": "General purpose",
 877 |     "gpuNum": 0,
 878 |     "hideHardwareSpecs": false,
 879 |     "memoryGiB": 8,
 880 |     "name": "ml.m5.large",
 881 |     "vcpuNum": 2
 882 |    },
 883 |    {
 884 |     "_defaultOrder": 5,
 885 |     "_isFastLaunch": false,
 886 |     "category": "General purpose",
 887 |     "gpuNum": 0,
 888 |     "hideHardwareSpecs": false,
 889 |     "memoryGiB": 16,
 890 |     "name": "ml.m5.xlarge",
 891 |     "vcpuNum": 4
 892 |    },
 893 |    {
 894 |     "_defaultOrder": 6,
 895 |     "_isFastLaunch": false,
 896 |     "category": "General purpose",
 897 |     "gpuNum": 0,
 898 |     "hideHardwareSpecs": false,
 899 |     "memoryGiB": 32,
 900 |     "name": "ml.m5.2xlarge",
 901 |     "vcpuNum": 8
 902 |    },
 903 |    {
 904 |     "_defaultOrder": 7,
 905 |     "_isFastLaunch": false,
 906 |     "category": "General purpose",
 907 |     "gpuNum": 0,
 908 |     "hideHardwareSpecs": false,
 909 |     "memoryGiB": 64,
 910 |     "name": "ml.m5.4xlarge",
 911 |     "vcpuNum": 16
 912 |    },
 913 |    {
 914 |     "_defaultOrder": 8,
 915 |     "_isFastLaunch": false,
 916 |     "category": "General purpose",
 917 |     "gpuNum": 0,
 918 |     "hideHardwareSpecs": false,
 919 |     "memoryGiB": 128,
 920 |     "name": "ml.m5.8xlarge",
 921 |     "vcpuNum": 32
 922 |    },
 923 |    {
 924 |     "_defaultOrder": 9,
 925 |     "_isFastLaunch": false,
 926 |     "category": "General purpose",
 927 |     "gpuNum": 0,
 928 |     "hideHardwareSpecs": false,
 929 |     "memoryGiB": 192,
 930 |     "name": "ml.m5.12xlarge",
 931 |     "vcpuNum": 48
 932 |    },
 933 |    {
 934 |     "_defaultOrder": 10,
 935 |     "_isFastLaunch": false,
 936 |     "category": "General purpose",
 937 |     "gpuNum": 0,
 938 |     "hideHardwareSpecs": false,
 939 |     "memoryGiB": 256,
 940 |     "name": "ml.m5.16xlarge",
 941 |     "vcpuNum": 64
 942 |    },
 943 |    {
 944 |     "_defaultOrder": 11,
 945 |     "_isFastLaunch": false,
 946 |     "category": "General purpose",
 947 |     "gpuNum": 0,
 948 |     "hideHardwareSpecs": false,
 949 |     "memoryGiB": 384,
 950 |     "name": "ml.m5.24xlarge",
 951 |     "vcpuNum": 96
 952 |    },
 953 |    {
 954 |     "_defaultOrder": 12,
 955 |     "_isFastLaunch": false,
 956 |     "category": "General purpose",
 957 |     "gpuNum": 0,
 958 |     "hideHardwareSpecs": false,
 959 |     "memoryGiB": 8,
 960 |     "name": "ml.m5d.large",
 961 |     "vcpuNum": 2
 962 |    },
 963 |    {
 964 |     "_defaultOrder": 13,
 965 |     "_isFastLaunch": false,
 966 |     "category": "General purpose",
 967 |     "gpuNum": 0,
 968 |     "hideHardwareSpecs": false,
 969 |     "memoryGiB": 16,
 970 |     "name": "ml.m5d.xlarge",
 971 |     "vcpuNum": 4
 972 |    },
 973 |    {
 974 |     "_defaultOrder": 14,
 975 |     "_isFastLaunch": false,
 976 |     "category": "General purpose",
 977 |     "gpuNum": 0,
 978 |     "hideHardwareSpecs": false,
 979 |     "memoryGiB": 32,
 980 |     "name": "ml.m5d.2xlarge",
 981 |     "vcpuNum": 8
 982 |    },
 983 |    {
 984 |     "_defaultOrder": 15,
 985 |     "_isFastLaunch": false,
 986 |     "category": "General purpose",
 987 |     "gpuNum": 0,
 988 |     "hideHardwareSpecs": false,
 989 |     "memoryGiB": 64,
 990 |     "name": "ml.m5d.4xlarge",
 991 |     "vcpuNum": 16
 992 |    },
 993 |    {
 994 |     "_defaultOrder": 16,
 995 |     "_isFastLaunch": false,
 996 |     "category": "General purpose",
 997 |     "gpuNum": 0,
 998 |     "hideHardwareSpecs": false,
 999 |     "memoryGiB": 128,
1000 |     "name": "ml.m5d.8xlarge",
1001 |     "vcpuNum": 32
1002 |    },
1003 |    {
1004 |     "_defaultOrder": 17,
1005 |     "_isFastLaunch": false,
1006 |     "category": "General purpose",
1007 |     "gpuNum": 0,
1008 |     "hideHardwareSpecs": false,
1009 |     "memoryGiB": 192,
1010 |     "name": "ml.m5d.12xlarge",
1011 |     "vcpuNum": 48
1012 |    },
1013 |    {
1014 |     "_defaultOrder": 18,
1015 |     "_isFastLaunch": false,
1016 |     "category": "General purpose",
1017 |     "gpuNum": 0,
1018 |     "hideHardwareSpecs": false,
1019 |     "memoryGiB": 256,
1020 |     "name": "ml.m5d.16xlarge",
1021 |     "vcpuNum": 64
1022 |    },
1023 |    {
1024 |     "_defaultOrder": 19,
1025 |     "_isFastLaunch": false,
1026 |     "category": "General purpose",
1027 |     "gpuNum": 0,
1028 |     "hideHardwareSpecs": false,
1029 |     "memoryGiB": 384,
1030 |     "name": "ml.m5d.24xlarge",
1031 |     "vcpuNum": 96
1032 |    },
1033 |    {
1034 |     "_defaultOrder": 20,
1035 |     "_isFastLaunch": false,
1036 |     "category": "General purpose",
1037 |     "gpuNum": 0,
1038 |     "hideHardwareSpecs": true,
1039 |     "memoryGiB": 0,
1040 |     "name": "ml.geospatial.interactive",
1041 |     "supportedImageNames": [
1042 |      "sagemaker-geospatial-v1-0"
1043 |     ],
1044 |     "vcpuNum": 0
1045 |    },
1046 |    {
1047 |     "_defaultOrder": 21,
1048 |     "_isFastLaunch": true,
1049 |     "category": "Compute optimized",
1050 |     "gpuNum": 0,
1051 |     "hideHardwareSpecs": false,
1052 |     "memoryGiB": 4,
1053 |     "name": "ml.c5.large",
1054 |     "vcpuNum": 2
1055 |    },
1056 |    {
1057 |     "_defaultOrder": 22,
1058 |     "_isFastLaunch": false,
1059 |     "category": "Compute optimized",
1060 |     "gpuNum": 0,
1061 |     "hideHardwareSpecs": false,
1062 |     "memoryGiB": 8,
1063 |     "name": "ml.c5.xlarge",
1064 |     "vcpuNum": 4
1065 |    },
1066 |    {
1067 |     "_defaultOrder": 23,
1068 |     "_isFastLaunch": false,
1069 |     "category": "Compute optimized",
1070 |     "gpuNum": 0,
1071 |     "hideHardwareSpecs": false,
1072 |     "memoryGiB": 16,
1073 |     "name": "ml.c5.2xlarge",
1074 |     "vcpuNum": 8
1075 |    },
1076 |    {
1077 |     "_defaultOrder": 24,
1078 |     "_isFastLaunch": false,
1079 |     "category": "Compute optimized",
1080 |     "gpuNum": 0,
1081 |     "hideHardwareSpecs": false,
1082 |     "memoryGiB": 32,
1083 |     "name": "ml.c5.4xlarge",
1084 |     "vcpuNum": 16
1085 |    },
1086 |    {
1087 |     "_defaultOrder": 25,
1088 |     "_isFastLaunch": false,
1089 |     "category": "Compute optimized",
1090 |     "gpuNum": 0,
1091 |     "hideHardwareSpecs": false,
1092 |     "memoryGiB": 72,
1093 |     "name": "ml.c5.9xlarge",
1094 |     "vcpuNum": 36
1095 |    },
1096 |    {
1097 |     "_defaultOrder": 26,
1098 |     "_isFastLaunch": false,
1099 |     "category": "Compute optimized",
1100 |     "gpuNum": 0,
1101 |     "hideHardwareSpecs": false,
1102 |     "memoryGiB": 96,
1103 |     "name": "ml.c5.12xlarge",
1104 |     "vcpuNum": 48
1105 |    },
1106 |    {
1107 |     "_defaultOrder": 27,
1108 |     "_isFastLaunch": false,
1109 |     "category": "Compute optimized",
1110 |     "gpuNum": 0,
1111 |     "hideHardwareSpecs": false,
1112 |     "memoryGiB": 144,
1113 |     "name": "ml.c5.18xlarge",
1114 |     "vcpuNum": 72
1115 |    },
1116 |    {
1117 |     "_defaultOrder": 28,
1118 |     "_isFastLaunch": false,
1119 |     "category": "Compute optimized",
1120 |     "gpuNum": 0,
1121 |     "hideHardwareSpecs": false,
1122 |     "memoryGiB": 192,
1123 |     "name": "ml.c5.24xlarge",
1124 |     "vcpuNum": 96
1125 |    },
1126 |    {
1127 |     "_defaultOrder": 29,
1128 |     "_isFastLaunch": true,
1129 |     "category": "Accelerated computing",
1130 |     "gpuNum": 1,
1131 |     "hideHardwareSpecs": false,
1132 |     "memoryGiB": 16,
1133 |     "name": "ml.g4dn.xlarge",
1134 |     "vcpuNum": 4
1135 |    },
1136 |    {
1137 |     "_defaultOrder": 30,
1138 |     "_isFastLaunch": false,
1139 |     "category": "Accelerated computing",
1140 |     "gpuNum": 1,
1141 |     "hideHardwareSpecs": false,
1142 |     "memoryGiB": 32,
1143 |     "name": "ml.g4dn.2xlarge",
1144 |     "vcpuNum": 8
1145 |    },
1146 |    {
1147 |     "_defaultOrder": 31,
1148 |     "_isFastLaunch": false,
1149 |     "category": "Accelerated computing",
1150 |     "gpuNum": 1,
1151 |     "hideHardwareSpecs": false,
1152 |     "memoryGiB": 64,
1153 |     "name": "ml.g4dn.4xlarge",
1154 |     "vcpuNum": 16
1155 |    },
1156 |    {
1157 |     "_defaultOrder": 32,
1158 |     "_isFastLaunch": false,
1159 |     "category": "Accelerated computing",
1160 |     "gpuNum": 1,
1161 |     "hideHardwareSpecs": false,
1162 |     "memoryGiB": 128,
1163 |     "name": "ml.g4dn.8xlarge",
1164 |     "vcpuNum": 32
1165 |    },
1166 |    {
1167 |     "_defaultOrder": 33,
1168 |     "_isFastLaunch": false,
1169 |     "category": "Accelerated computing",
1170 |     "gpuNum": 4,
1171 |     "hideHardwareSpecs": false,
1172 |     "memoryGiB": 192,
1173 |     "name": "ml.g4dn.12xlarge",
1174 |     "vcpuNum": 48
1175 |    },
1176 |    {
1177 |     "_defaultOrder": 34,
1178 |     "_isFastLaunch": false,
1179 |     "category": "Accelerated computing",
1180 |     "gpuNum": 1,
1181 |     "hideHardwareSpecs": false,
1182 |     "memoryGiB": 256,
1183 |     "name": "ml.g4dn.16xlarge",
1184 |     "vcpuNum": 64
1185 |    },
1186 |    {
1187 |     "_defaultOrder": 35,
1188 |     "_isFastLaunch": false,
1189 |     "category": "Accelerated computing",
1190 |     "gpuNum": 1,
1191 |     "hideHardwareSpecs": false,
1192 |     "memoryGiB": 61,
1193 |     "name": "ml.p3.2xlarge",
1194 |     "vcpuNum": 8
1195 |    },
1196 |    {
1197 |     "_defaultOrder": 36,
1198 |     "_isFastLaunch": false,
1199 |     "category": "Accelerated computing",
1200 |     "gpuNum": 4,
1201 |     "hideHardwareSpecs": false,
1202 |     "memoryGiB": 244,
1203 |     "name": "ml.p3.8xlarge",
1204 |     "vcpuNum": 32
1205 |    },
1206 |    {
1207 |     "_defaultOrder": 37,
1208 |     "_isFastLaunch": false,
1209 |     "category": "Accelerated computing",
1210 |     "gpuNum": 8,
1211 |     "hideHardwareSpecs": false,
1212 |     "memoryGiB": 488,
1213 |     "name": "ml.p3.16xlarge",
1214 |     "vcpuNum": 64
1215 |    },
1216 |    {
1217 |     "_defaultOrder": 38,
1218 |     "_isFastLaunch": false,
1219 |     "category": "Accelerated computing",
1220 |     "gpuNum": 8,
1221 |     "hideHardwareSpecs": false,
1222 |     "memoryGiB": 768,
1223 |     "name": "ml.p3dn.24xlarge",
1224 |     "vcpuNum": 96
1225 |    },
1226 |    {
1227 |     "_defaultOrder": 39,
1228 |     "_isFastLaunch": false,
1229 |     "category": "Memory Optimized",
1230 |     "gpuNum": 0,
1231 |     "hideHardwareSpecs": false,
1232 |     "memoryGiB": 16,
1233 |     "name": "ml.r5.large",
1234 |     "vcpuNum": 2
1235 |    },
1236 |    {
1237 |     "_defaultOrder": 40,
1238 |     "_isFastLaunch": false,
1239 |     "category": "Memory Optimized",
1240 |     "gpuNum": 0,
1241 |     "hideHardwareSpecs": false,
1242 |     "memoryGiB": 32,
1243 |     "name": "ml.r5.xlarge",
1244 |     "vcpuNum": 4
1245 |    },
1246 |    {
1247 |     "_defaultOrder": 41,
1248 |     "_isFastLaunch": false,
1249 |     "category": "Memory Optimized",
1250 |     "gpuNum": 0,
1251 |     "hideHardwareSpecs": false,
1252 |     "memoryGiB": 64,
1253 |     "name": "ml.r5.2xlarge",
1254 |     "vcpuNum": 8
1255 |    },
1256 |    {
1257 |     "_defaultOrder": 42,
1258 |     "_isFastLaunch": false,
1259 |     "category": "Memory Optimized",
1260 |     "gpuNum": 0,
1261 |     "hideHardwareSpecs": false,
1262 |     "memoryGiB": 128,
1263 |     "name": "ml.r5.4xlarge",
1264 |     "vcpuNum": 16
1265 |    },
1266 |    {
1267 |     "_defaultOrder": 43,
1268 |     "_isFastLaunch": false,
1269 |     "category": "Memory Optimized",
1270 |     "gpuNum": 0,
1271 |     "hideHardwareSpecs": false,
1272 |     "memoryGiB": 256,
1273 |     "name": "ml.r5.8xlarge",
1274 |     "vcpuNum": 32
1275 |    },
1276 |    {
1277 |     "_defaultOrder": 44,
1278 |     "_isFastLaunch": false,
1279 |     "category": "Memory Optimized",
1280 |     "gpuNum": 0,
1281 |     "hideHardwareSpecs": false,
1282 |     "memoryGiB": 384,
1283 |     "name": "ml.r5.12xlarge",
1284 |     "vcpuNum": 48
1285 |    },
1286 |    {
1287 |     "_defaultOrder": 45,
1288 |     "_isFastLaunch": false,
1289 |     "category": "Memory Optimized",
1290 |     "gpuNum": 0,
1291 |     "hideHardwareSpecs": false,
1292 |     "memoryGiB": 512,
1293 |     "name": "ml.r5.16xlarge",
1294 |     "vcpuNum": 64
1295 |    },
1296 |    {
1297 |     "_defaultOrder": 46,
1298 |     "_isFastLaunch": false,
1299 |     "category": "Memory Optimized",
1300 |     "gpuNum": 0,
1301 |     "hideHardwareSpecs": false,
1302 |     "memoryGiB": 768,
1303 |     "name": "ml.r5.24xlarge",
1304 |     "vcpuNum": 96
1305 |    },
1306 |    {
1307 |     "_defaultOrder": 47,
1308 |     "_isFastLaunch": false,
1309 |     "category": "Accelerated computing",
1310 |     "gpuNum": 1,
1311 |     "hideHardwareSpecs": false,
1312 |     "memoryGiB": 16,
1313 |     "name": "ml.g5.xlarge",
1314 |     "vcpuNum": 4
1315 |    },
1316 |    {
1317 |     "_defaultOrder": 48,
1318 |     "_isFastLaunch": false,
1319 |     "category": "Accelerated computing",
1320 |     "gpuNum": 1,
1321 |     "hideHardwareSpecs": false,
1322 |     "memoryGiB": 32,
1323 |     "name": "ml.g5.2xlarge",
1324 |     "vcpuNum": 8
1325 |    },
1326 |    {
1327 |     "_defaultOrder": 49,
1328 |     "_isFastLaunch": false,
1329 |     "category": "Accelerated computing",
1330 |     "gpuNum": 1,
1331 |     "hideHardwareSpecs": false,
1332 |     "memoryGiB": 64,
1333 |     "name": "ml.g5.4xlarge",
1334 |     "vcpuNum": 16
1335 |    },
1336 |    {
1337 |     "_defaultOrder": 50,
1338 |     "_isFastLaunch": false,
1339 |     "category": "Accelerated computing",
1340 |     "gpuNum": 1,
1341 |     "hideHardwareSpecs": false,
1342 |     "memoryGiB": 128,
1343 |     "name": "ml.g5.8xlarge",
1344 |     "vcpuNum": 32
1345 |    },
1346 |    {
1347 |     "_defaultOrder": 51,
1348 |     "_isFastLaunch": false,
1349 |     "category": "Accelerated computing",
1350 |     "gpuNum": 1,
1351 |     "hideHardwareSpecs": false,
1352 |     "memoryGiB": 256,
1353 |     "name": "ml.g5.16xlarge",
1354 |     "vcpuNum": 64
1355 |    },
1356 |    {
1357 |     "_defaultOrder": 52,
1358 |     "_isFastLaunch": false,
1359 |     "category": "Accelerated computing",
1360 |     "gpuNum": 4,
1361 |     "hideHardwareSpecs": false,
1362 |     "memoryGiB": 192,
1363 |     "name": "ml.g5.12xlarge",
1364 |     "vcpuNum": 48
1365 |    },
1366 |    {
1367 |     "_defaultOrder": 53,
1368 |     "_isFastLaunch": false,
1369 |     "category": "Accelerated computing",
1370 |     "gpuNum": 4,
1371 |     "hideHardwareSpecs": false,
1372 |     "memoryGiB": 384,
1373 |     "name": "ml.g5.24xlarge",
1374 |     "vcpuNum": 96
1375 |    },
1376 |    {
1377 |     "_defaultOrder": 54,
1378 |     "_isFastLaunch": false,
1379 |     "category": "Accelerated computing",
1380 |     "gpuNum": 8,
1381 |     "hideHardwareSpecs": false,
1382 |     "memoryGiB": 768,
1383 |     "name": "ml.g5.48xlarge",
1384 |     "vcpuNum": 192
1385 |    },
1386 |    {
1387 |     "_defaultOrder": 55,
1388 |     "_isFastLaunch": false,
1389 |     "category": "Accelerated computing",
1390 |     "gpuNum": 8,
1391 |     "hideHardwareSpecs": false,
1392 |     "memoryGiB": 1152,
1393 |     "name": "ml.p4d.24xlarge",
1394 |     "vcpuNum": 96
1395 |    },
1396 |    {
1397 |     "_defaultOrder": 56,
1398 |     "_isFastLaunch": false,
1399 |     "category": "Accelerated computing",
1400 |     "gpuNum": 8,
1401 |     "hideHardwareSpecs": false,
1402 |     "memoryGiB": 1152,
1403 |     "name": "ml.p4de.24xlarge",
1404 |     "vcpuNum": 96
1405 |    }
1406 |   ],
1407 |   "instance_type": "ml.t3.medium",
1408 |   "kernelspec": {
1409 |    "display_name": "Python 3 (Data Science 2.0)",
1410 |    "language": "python",
1411 |    "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:123456789012:image/sagemaker-data-science-38"
1412 |   },
1413 |   "language_info": {
1414 |    "codemirror_mode": {
1415 |     "name": "ipython",
1416 |     "version": 3
1417 |    },
1418 |    "file_extension": ".py",
1419 |    "mimetype": "text/x-python",
1420 |    "name": "python",
1421 |    "nbconvert_exporter": "python",
1422 |    "pygments_lexer": "ipython3",
1423 |    "version": "3.8.13"
1424 |   }
1425 |  },
1426 |  "nbformat": 4,
1427 |  "nbformat_minor": 5
1428 | }
1429 | 


--------------------------------------------------------------------------------
/data_ingestion_to_vectordb/scripts/get_data.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import argparse
 3 | import traceback
 4 | 
 5 | from sh import cp, find, mkdir, wget
 6 | 
 7 | 
 8 | def main():
 9 |   parser = argparse.ArgumentParser()
10 | 
11 |   parser.add_argument("--domain", type=str, default="sagemaker.readthedocs.io")
12 |   parser.add_argument("--website", type=str, default="https://sagemaker.readthedocs.io/en/stable/")
13 |   parser.add_argument("--output-dir", type=str, default="docs")
14 |   parser.add_argument("--dryrun", action='store_true')
15 |   args, _ = parser.parse_known_args()
16 | 
17 |   WEBSITE, DOMAIN, KB_DIR = (args.website, args.domain, args.output_dir)
18 | 
19 |   if args.dryrun:
20 |     print(f"WEBSITE={WEBSITE}, DOMAIN={DOMAIN}, OUTPUT_DIR={KB_DIR}", file=sys.stderr)
21 |     sys.exit(0)
22 | 
23 |   mkdir('-p', KB_DIR)
24 | 
25 |   try:
26 |     WGET_ARGUMENTS = f"-e robots=off --recursive --no-clobber --page-requisites --html-extension --convert-links --restrict-file-names=windows --domains {DOMAIN} --no-parent {WEBSITE}"
27 |     wget_argument_list = WGET_ARGUMENTS.split()
28 |     wget(*wget_argument_list)
29 |   except Exception as ex:
30 |     traceback.print_exc()
31 | 
32 |   results = find(DOMAIN, '-name', '*.html')
33 |   html_files = results.strip('\n').split('\n')
34 |   for each in html_files:
35 |     flat_i = each.replace('/', '-')
36 |     cp(each, f"{KB_DIR}/{flat_i}")
37 | 
38 |   print(f"There are {len(html_files)} files in {KB_DIR} directory", file=sys.stderr)
39 | 
40 | 
41 | if __name__ == "__main__":
42 |   main()


--------------------------------------------------------------------------------