├── .gitignore
├── 01_ProductRecommendations
    ├── CODE_OF_CONDUCT.md
    ├── CONTRIBUTING.md
    ├── LICENSE
    ├── README.md
    ├── bedrock-similarity-search.ipynb
    ├── bedrock-text-search.ipynb
    ├── bedrock_requirements.txt
    ├── data
    │   ├── FEIDEGGER_release_1.2.json
    │   └── amazon.csv
    ├── genai-pgvector-similarity-search.ipynb
    ├── opensource-similarity-search.ipynb
    ├── opensource_requirements.txt
    ├── requirements.txt
    └── static
    │   └── architecture.png
├── 02_RetrievalAugmentedGeneration
    ├── 02_QuestionAnswering_Bedrock_LLMs
    │   ├── .gitignore
    │   ├── CODE_OF_CONDUCT.md
    │   ├── CONTRIBUTING.md
    │   ├── LICENSE
    │   ├── README.md
    │   ├── app.py
    │   ├── data
    │   │   ├── Amazon Aurora FAQs.pdf
    │   │   └── Amazon Bedrock FAQs.pdf
    │   ├── env.example
    │   ├── htmlTemplates.py
    │   ├── rag_app.py
    │   ├── requirements.txt
    │   └── static
    │   │   ├── Powered-By_logo-stack_RGB_REV.png
    │   │   └── RAG_APG.png
    ├── 02_QuestionAnswering_Open_Source_LLMs
    │   ├── .gitignore
    │   ├── CITATION.cff
    │   ├── CODE_OF_CONDUCT.md
    │   ├── CONTRIBUTING.md
    │   ├── LICENSE
    │   ├── README.md
    │   ├── app.py
    │   ├── data
    │   │   ├── amazon-aurora-design-considerations-for-high-throughput-cloud-native-relational-databases.pdf
    │   │   └── constitution.pdf
    │   ├── env.example
    │   ├── htmlTemplates.py
    │   ├── psql.sh
    │   ├── requirements.txt
    │   └── static
    │   │   └── APG-pgvector-streamlit.png
    └── 02_ResponseStreaming
    │   ├── CODE_OF_CONDUCT.md
    │   ├── CONTRIBUTING.md
    │   ├── LICENSE
    │   ├── README.md
    │   ├── app.py
    │   ├── data
    │       ├── AMZN-2020-Shareholder-Letter.pdf
    │       ├── AMZN-2021-Shareholder-Letter.pdf
    │       ├── AMZN-2022-Shareholder-Letter.pdf
    │       └── AMZN-2023-Shareholder-Letter.pdf
    │   ├── env.example
    │   ├── htmlTemplates.py
    │   ├── requirements.txt
    │   ├── static
    │       ├── Powered-By_logo-stack_RGB_REV.png
    │       └── Streaming_Responses_RAG.png
    │   └── streaming_app.py
├── 03_SimilaritySearchSentimentAnalysis
    ├── CODE_OF_CONDUCT.md
    ├── CONTRIBUTING.md
    ├── LICENSE
    ├── README.md
    ├── data
    │   └── fictitious_hotel_reviews_trimmed_500.csv
    ├── env.example
    ├── pgvector_with_langchain_auroraml.ipynb
    ├── requirements1.txt
    ├── requirements2.txt
    └── static
    │   └── APG-pgvector-sagemaker.png
├── 04_AuroraML_Bedrock_MovieRecommendations
    ├── CODE_OF_CONDUCT.md
    ├── CONTRIBUTING.md
    ├── LICENSE
    ├── README.md
    ├── app.py
    ├── data
    │   ├── functions.sql
    │   └── movies.sql.gz
    ├── env.example
    ├── requirements.txt
    └── static
    │   ├── ARCH.png
    │   ├── Powered-By_logo-stack_RGB_REV.png
    │   └── Preview_App.png
├── 05_AuroraML_Bedrock_Chatbot
    ├── CODE_OF_CONDUCT.md
    ├── CONTRIBUTING.md
    ├── LICENSE
    ├── README.md
    ├── chatbot-app.py
    ├── chatbot.py
    ├── env.example
    ├── requirements.txt
    └── static
    │   ├── architecture.png
    │   └── postgres_cli.png
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── DAT303
    ├── 01_ProductRecommendations
    │   ├── data
    │   │   └── amazon.csv
    │   ├── images
    │   │   └── arch_product_recommendation.png
    │   └── product-recommendations.ipynb
    ├── 02_QuestionAndAnswering
    │   ├── .gitignore
    │   ├── CODE_OF_CONDUCT.md
    │   ├── CONTRIBUTING.md
    │   ├── LICENSE
    │   ├── README.md
    │   ├── app.py
    │   ├── data
    │   │   ├── Amazon Aurora FAQs.pdf
    │   │   └── Amazon Bedrock FAQs.pdf
    │   ├── env.example
    │   ├── htmlTemplates.py
    │   ├── rag_app.py
    │   ├── requirements.txt
    │   └── static
    │   │   ├── Powered-By_logo-stack_RGB_REV.png
    │   │   └── RAG_APG.png
    ├── 03_ResponseStreaming
    │   ├── CODE_OF_CONDUCT.md
    │   ├── CONTRIBUTING.md
    │   ├── LICENSE
    │   ├── README.md
    │   ├── app.py
    │   ├── data
    │   │   ├── AMZN-2019-Shareholder-Letter.pdf
    │   │   ├── AMZN-2020-Shareholder-Letter.pdf
    │   │   ├── AMZN-2021-Shareholder-Letter.pdf
    │   │   └── AMZN-2022-Shareholder-Letter.pdf
    │   ├── htmlTemplates.py
    │   ├── requirements.txt
    │   ├── static
    │   │   ├── Powered-By_logo-stack_RGB_REV.png
    │   │   └── Streaming_RAG_APG.png
    │   └── streaming_app.py
    ├── README.md
    └── scripts
    │   └── prereq.sh
├── DAT307
    ├── data
    │   └── dump.sql.gz
    ├── knowledge-base
    │   ├── documents
    │   │   ├── Amazon-Aurora-DB-engines-for-Blue_Green-Deployments.pdf
    │   │   ├── Amazon-RDS-Blue_Green-Deployments.pdf
    │   │   ├── aurora-postgresql-relnotes.pdf
    │   │   └── rds-postgresql-relnotes.pdf
    │   └── runbooks
    │   │   ├── exercise
    │   │       └── rds_acu_capacity_remediation_steps.md
    │   │   ├── rds_cpu_remediation_steps.md
    │   │   └── rds_iops_remediation_steps.md
    ├── lambda
    │   ├── api-action-runbook-kb.py
    │   ├── api-get-incidents.py
    │   ├── api-list-runbook-kb.py
    │   ├── cw-ingest-to-dynamodb.py
    │   ├── idr-bedrock-agent-action-group-good.py
    │   ├── idr-bedrock-agent-action-group.py
    │   └── qa-bedrock-agent-action-group.py
    ├── loadtest
    │   └── stress_test.py
    ├── notebooks
    │   ├── .DS_Store
    │   ├── images
    │   │   └── gen_embeddings.png
    │   ├── module-1-part-1-tsne.ipynb
    │   └── module-1-part-2-embeddings-and-pgvector.ipynb
    ├── script
    │   ├── agent_action_group_for_acu.json
    │   ├── agent_action_group_for_acu.sh
    │   ├── cloudwatch_alarm_for_acu.sh
    │   ├── fix_cloud9.sh
    │   ├── lambda_update_for_acu.sh
    │   ├── misc
    │   │   ├── lambda
    │   │   │   ├── auto-remediate
    │   │   │   │   ├── alertToaction.py
    │   │   │   │   ├── idr-agent-action-group-fn.py
    │   │   │   │   └── idr-fn.py
    │   │   │   ├── idr_get_active_alerts
    │   │   │   │   └── idr_get_active_alerts.py
    │   │   │   ├── ingest-dynamo
    │   │   │   │   └── ingestdynamodb.py
    │   │   │   ├── list-runbook-steps-kb
    │   │   │   │   └── list-runbook-steps-kb.py
    │   │   │   ├── psycopg2_layer
    │   │   │   │   ├── Makefile
    │   │   │   │   ├── psycopg2.zip
    │   │   │   │   └── publish_lambda_layer.sh
    │   │   │   ├── publish_lambda.sh
    │   │   │   ├── runbook-steps-action
    │   │   │   │   └── runbook-steps-action.py
    │   │   │   ├── s3upload
    │   │   │   │   ├── Dockerfile
    │   │   │   │   ├── app.py
    │   │   │   │   └── requirements.txt
    │   │   │   └── stress.test
    │   │   │   │   └── stress.test.lambda.py
    │   │   └── validate_api.py
    │   └── prereq.sh
    └── ui
    │   ├── .streamlit
    │       └── config.toml.old
    │   ├── image
    │       ├── aws_logo.png
    │       ├── demo.png
    │       ├── idr_logo.png
    │       ├── incident_management.png
    │       └── powered_by_aws.png
    │   ├── index.py
    │   ├── page
    │       ├── all_incidents.py
    │       ├── app.orig.py
    │       ├── login_page.py
    │       ├── login_page.py.orig
    │       ├── pending_incidents.py
    │       └── signup_page.py
    │   ├── requirements.txt
    │   └── utils
    │       ├── apigw_handler.py
    │       ├── cognito_handler.py
    │       └── init_session.py
├── DAT326
    ├── env_sample
    ├── requirements.txt
    ├── static
    │   └── AZFlights.jpg
    ├── travel_knowledge_base.csv
    └── valkey-chatbot.py
├── LICENSE
├── README.md
├── data
    ├── Amazon Aurora FAQs.pdf
    ├── Amazon Bedrock FAQs.pdf
    └── postgresql-16-US.pdf
├── scripts
    └── prereq.sh
└── setup_cloud9.sh


/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints
2 | 


--------------------------------------------------------------------------------
/01_ProductRecommendations/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.
5 | 


--------------------------------------------------------------------------------
/01_ProductRecommendations/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guidelines
 2 | 
 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
 4 | documentation, we greatly value feedback and contributions from our community.
 5 | 
 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
 7 | information to effectively respond to your bug report or contribution.
 8 | 
 9 | 
10 | ## Reporting Bugs/Feature Requests
11 | 
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 | 
14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 | 
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 | 
22 | 
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 | 
26 | 1. You are working against the latest source on the *main* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 | 
30 | To send us a pull request, please:
31 | 
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 | 
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 | 
42 | 
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start.
45 | 
46 | 
47 | ## Code of Conduct
48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
50 | opensource-codeofconduct@amazon.com with any additional questions or comments.
51 | 
52 | 
53 | ## Security issue notifications
54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
55 | 
56 | 
57 | ## Licensing
58 | 
59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.
60 | 


--------------------------------------------------------------------------------
/01_ProductRecommendations/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT No Attribution
 2 | 
 3 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 6 | this software and associated documentation files (the "Software"), to deal in
 7 | the Software without restriction, including without limitation the rights to
 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 9 | the Software, and to permit persons to whom the Software is furnished to do so.
10 | 
11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
13 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
14 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
15 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
16 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
17 | 
18 | 


--------------------------------------------------------------------------------
/01_ProductRecommendations/bedrock_requirements.txt:
--------------------------------------------------------------------------------
1 | pandarallel==1.6.5
2 | pgvector==0.3.6
3 | boto3==1.36.0
4 | psycopg==3.2.4
5 | numexpr==2.10.2
6 | awscli==1.37.0
7 | 


--------------------------------------------------------------------------------
/01_ProductRecommendations/opensource_requirements.txt:
--------------------------------------------------------------------------------
1 | psycopg2-binary==2.9.10
2 | pgvector==0.3.6
3 | tqdm==4.67.1
4 | boto3==1.36.0
5 | requests==2.32.3
6 | scikit-image==0.25.0
7 | 


--------------------------------------------------------------------------------
/01_ProductRecommendations/requirements.txt:
--------------------------------------------------------------------------------
1 | packaging==24.2
2 | setuptools==65.5.0
3 | 


--------------------------------------------------------------------------------
/01_ProductRecommendations/static/architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aurora-postgresql-pgvector/81e197513381392765f190dbf0b25b90b2137e09/01_ProductRecommendations/static/architecture.png


--------------------------------------------------------------------------------
/02_RetrievalAugmentedGeneration/02_QuestionAnswering_Bedrock_LLMs/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 


--------------------------------------------------------------------------------
/02_RetrievalAugmentedGeneration/02_QuestionAnswering_Bedrock_LLMs/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.


--------------------------------------------------------------------------------
/02_RetrievalAugmentedGeneration/02_QuestionAnswering_Bedrock_LLMs/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guidelines
 2 | 
 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
 4 | documentation, we greatly value feedback and contributions from our community.
 5 | 
 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
 7 | information to effectively respond to your bug report or contribution.
 8 | 
 9 | 
10 | ## Reporting Bugs/Feature Requests
11 | 
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 | 
14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 | 
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 | 
22 | 
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 | 
26 | 1. You are working against the latest source on the *main* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 | 
30 | To send us a pull request, please:
31 | 
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 | 
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 | 
42 | 
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start.
45 | 
46 | 
47 | ## Code of Conduct
48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
50 | opensource-codeofconduct@amazon.com with any additional questions or comments.
51 | 
52 | 
53 | ## Security issue notifications
54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
55 | 
56 | 
57 | ## Licensing
58 | 
59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.


--------------------------------------------------------------------------------
/02_RetrievalAugmentedGeneration/02_QuestionAnswering_Bedrock_LLMs/LICENSE:
--------------------------------------------------------------------------------
 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | // SPDX-License-Identifier: MIT-0
 3 | 
 4 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this
 7 | software and associated documentation files (the "Software"), to deal in the Software
 8 | without restriction, including without limitation the rights to use, copy, modify,
 9 | merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
10 | permit persons to whom the Software is furnished to do so.
11 | 
12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
13 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
14 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
15 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
16 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
17 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/02_RetrievalAugmentedGeneration/02_QuestionAnswering_Bedrock_LLMs/README.md:
--------------------------------------------------------------------------------
  1 | # 🤖 Enterprise RAG Question-Answering System
  2 | 
  3 | Build an enterprise-ready Retrieval Augmented Generation (RAG) application leveraging Amazon Web Services and open-source technologies. This implementation creates an intelligent question-answering system that combines the power of vector databases with state-of-the-art language models.
  4 | 
  5 | ## 🎯 Overview
  6 | 
  7 | This solution demonstrates the implementation of a production-ready RAG system using:
  8 | 
  9 | - 🚀 Amazon Bedrock for accessing foundation models
 10 | - 📊 pgvector extension on Amazon Aurora PostgreSQL for vector operations
 11 | - 🧠 Anthropic's Claude for advanced language understanding
 12 | - 💫 Titan Text for generating high-quality text embeddings
 13 | - 🔗 LangChain for orchestrating AI components
 14 | - 🖥️ Streamlit for creating an intuitive user interface
 15 | 
 16 | ## 🏗️ Architecture
 17 | 
 18 | ![Architecture](static/RAG_APG.png)
 19 | 
 20 | ## ⚙️ System Workflow
 21 | 
 22 | Our RAG implementation follows a sophisticated pipeline to deliver accurate answers:
 23 | 
 24 | 1. **Document Ingestion** 📄
 25 |    - Processes PDF documents through advanced text extraction
 26 |    - Maintains document structure and metadata
 27 | 
 28 | 2. **Semantic Processing** 🔄
 29 |    - Implements intelligent text chunking algorithms
 30 |    - Preserves context across document segments
 31 | 
 32 | 3. **Vector Embedding** 🎯
 33 |    - Utilizes Amazon Bedrock's Titan Text for generating embeddings
 34 |    - Creates high-dimensional vector representations of content
 35 | 
 36 | 4. **Query Processing** 💭
 37 |    - Accepts natural language questions
 38 |    - Converts queries into compatible vector representations
 39 | 
 40 | 5. **Context Retrieval** 🔍
 41 |    - Performs semantic similarity matching
 42 |    - Identifies relevant document segments
 43 | 
 44 | 6. **Answer Generation** ✨
 45 |    - Leverages Anthropic's Claude for response synthesis
 46 |    - Ensures responses are grounded in source documents
 47 | 
 48 | ## 🚀 Getting Started
 49 | 
 50 | ### Prerequisites
 51 | 
 52 | - Python 3.9 or higher
 53 | - AWS account with Bedrock access
 54 | - Amazon Aurora PostgreSQL cluster
 55 | - Git
 56 | 
 57 | ### Installation
 58 | 
 59 | 1. Clone the repository:
 60 |    ```bash
 61 |    git clone [repository-url]
 62 |    cd [repository-name]
 63 |    ```
 64 | 
 65 | 2. Create and activate a virtual environment:
 66 |    ```bash
 67 |    python3.9 -m venv env
 68 |    source env/bin/activate
 69 |    ```
 70 | 
 71 | 3. Configure environment variables:
 72 |    ```bash
 73 |    # Create .env file with the following structure
 74 |    PGVECTOR_DRIVER='psycopg2'
 75 |    PGVECTOR_USER='<username>'
 76 |    PGVECTOR_PASSWORD='<password>'
 77 |    PGVECTOR_HOST='<aurora-cluster-endpoint>'
 78 |    PGVECTOR_PORT=5432
 79 |    PGVECTOR_DATABASE='<database-name>'
 80 |    ```
 81 | 
 82 | 4. Install dependencies:
 83 |    ```bash
 84 |    pip install -r requirements.txt
 85 |    ```
 86 | 
 87 | ### Database Setup
 88 | 
 89 | 1. Connect to your Aurora PostgreSQL cluster
 90 | 2. Enable the pgvector extension:
 91 |    ```sql
 92 |    CREATE EXTENSION vector;
 93 |    ```
 94 | 
 95 | ## 💻 Usage
 96 | 
 97 | 1. Launch the application:
 98 |    ```bash
 99 |    streamlit run app.py
100 |    ```
101 | 
102 | 2. Navigate to the web interface in your browser
103 | 3. Upload PDF documents through the provided interface
104 | 4. Start asking questions about your documents
105 | 
106 | ## 🔒 Security Considerations
107 | 
108 | - Implement appropriate IAM roles and permissions
109 | - Secure database connections using SSL/TLS
110 | - Follow AWS security best practices for Bedrock access
111 | - Properly handle sensitive information in environment variables
112 | 
113 | ## 📈 Performance Optimization
114 | 
115 | - Utilize connection pooling for database operations
116 | - Implement caching strategies where appropriate
117 | - Configure proper chunk sizes for optimal retrieval
118 | - Monitor and adjust embedding dimensions based on requirements
119 | 
120 | ## 🤝 Contributing
121 | 
122 | This project is maintained for educational purposes and demonstrates AWS best practices. While we don't accept direct contributions, we encourage:
123 | 
124 | - Creating issues for bugs or suggested improvements
125 | - Forking the repository for personal customization
126 | - Sharing your experiences and optimizations
127 | 
128 | ## 📝 License
129 | 
130 | This project is licensed under the [MIT-0 License](https://spdx.org/licenses/MIT-0.html) - see the LICENSE file for details.
131 | 


--------------------------------------------------------------------------------
/02_RetrievalAugmentedGeneration/02_QuestionAnswering_Bedrock_LLMs/data/Amazon Aurora FAQs.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aurora-postgresql-pgvector/81e197513381392765f190dbf0b25b90b2137e09/02_RetrievalAugmentedGeneration/02_QuestionAnswering_Bedrock_LLMs/data/Amazon Aurora FAQs.pdf


--------------------------------------------------------------------------------
/02_RetrievalAugmentedGeneration/02_QuestionAnswering_Bedrock_LLMs/data/Amazon Bedrock FAQs.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aurora-postgresql-pgvector/81e197513381392765f190dbf0b25b90b2137e09/02_RetrievalAugmentedGeneration/02_QuestionAnswering_Bedrock_LLMs/data/Amazon Bedrock FAQs.pdf


--------------------------------------------------------------------------------
/02_RetrievalAugmentedGeneration/02_QuestionAnswering_Bedrock_LLMs/env.example:
--------------------------------------------------------------------------------
1 | PGVECTOR_USER=''
2 | PGVECTOR_PASSWORD=''
3 | PGVECTOR_HOST=''
4 | PGVECTOR_PORT=5432
5 | PGVECTOR_DATABASE=''
6 | 


--------------------------------------------------------------------------------
/02_RetrievalAugmentedGeneration/02_QuestionAnswering_Bedrock_LLMs/htmlTemplates.py:
--------------------------------------------------------------------------------
 1 | css = '''
 2 | <style>
 3 | .chat-message {
 4 |     padding: 1.5rem; border-radius: 0.5rem; margin-bottom: 1rem; display: flex
 5 | }
 6 | .chat-message.user {
 7 |     background-color: #2b313e
 8 | }
 9 | .chat-message.bot {
10 |     background-color: #475063
11 | }
12 | .chat-message .avatar {
13 |   width: 20%;
14 | }
15 | .chat-message .avatar img {
16 |   max-width: 78px;
17 |   max-height: 78px;
18 |   border-radius: 50%;
19 |   object-fit: cover;
20 | }
21 | .chat-message .message {
22 |   width: 80%;
23 |   padding: 0 1.5rem;
24 |   color: #fff;
25 | }
26 | '''
27 | 
28 | bot_template = '''
29 | <div class="chat-message bot">
30 |     <div class="avatar">
31 |         <img src="https://i.ibb.co/cN0nmSj/Screenshot-2023-05-28-at-02-37-21.png" style="max-height: 78px; max-width: 78px; border-radius: 50%; object-fit: cover;">
32 |     </div>
33 |     <div class="message">{{MSG}}</div>
34 | </div>
35 | '''
36 | 
37 | user_template = '''
38 | <div class="chat-message user">
39 |     <div class="avatar">
40 |         <img src="https://i.ibb.co/wRtZstJ/Aurora.png">
41 |     </div>    
42 |     <div class="message">{{MSG}}</div>
43 | </div>
44 | '''
45 | 


--------------------------------------------------------------------------------
/02_RetrievalAugmentedGeneration/02_QuestionAnswering_Bedrock_LLMs/requirements.txt:
--------------------------------------------------------------------------------
 1 | streamlit
 2 | langchain
 3 | langchain-postgres
 4 | langchain-aws
 5 | langchain-community
 6 | langchain-core
 7 | PyPDF2
 8 | python-dotenv
 9 | altair
10 | urllib3
11 | boto3
12 | awscli
13 | botocore
14 | anthropic
15 | psycopg[binary]
16 | pgvector
17 | Pillow
18 | typing-extensions
19 | 


--------------------------------------------------------------------------------
/02_RetrievalAugmentedGeneration/02_QuestionAnswering_Bedrock_LLMs/static/Powered-By_logo-stack_RGB_REV.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aurora-postgresql-pgvector/81e197513381392765f190dbf0b25b90b2137e09/02_RetrievalAugmentedGeneration/02_QuestionAnswering_Bedrock_LLMs/static/Powered-By_logo-stack_RGB_REV.png


--------------------------------------------------------------------------------
/02_RetrievalAugmentedGeneration/02_QuestionAnswering_Bedrock_LLMs/static/RAG_APG.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aurora-postgresql-pgvector/81e197513381392765f190dbf0b25b90b2137e09/02_RetrievalAugmentedGeneration/02_QuestionAnswering_Bedrock_LLMs/static/RAG_APG.png


--------------------------------------------------------------------------------
/02_RetrievalAugmentedGeneration/02_QuestionAnswering_Open_Source_LLMs/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 


--------------------------------------------------------------------------------
/02_RetrievalAugmentedGeneration/02_QuestionAnswering_Open_Source_LLMs/CITATION.cff:
--------------------------------------------------------------------------------
 1 | @article{lamini-lm,
 2 |   author       = {Minghao Wu and
 3 |                   Abdul Waheed and
 4 |                   Chiyu Zhang and
 5 |                   Muhammad Abdul-Mageed and
 6 |                   Alham Fikri Aji
 7 |                   },
 8 |   title        = {LaMini-LM: A Diverse Herd of Distilled Models from Large-Scale Instructions},
 9 |   journal      = {CoRR},
10 |   volume       = {abs/2304.14402},
11 |   year         = {2023},
12 |   url          = {https://arxiv.org/abs/2304.14402},
13 |   eprinttype   = {arXiv},
14 |   eprint       = {2304.14402}
15 | }
16 | 


--------------------------------------------------------------------------------
/02_RetrievalAugmentedGeneration/02_QuestionAnswering_Open_Source_LLMs/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.


--------------------------------------------------------------------------------
/02_RetrievalAugmentedGeneration/02_QuestionAnswering_Open_Source_LLMs/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guidelines
 2 | 
 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
 4 | documentation, we greatly value feedback and contributions from our community.
 5 | 
 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
 7 | information to effectively respond to your bug report or contribution.
 8 | 
 9 | 
10 | ## Reporting Bugs/Feature Requests
11 | 
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 | 
14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 | 
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 | 
22 | 
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 | 
26 | 1. You are working against the latest source on the *main* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 | 
30 | To send us a pull request, please:
31 | 
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 | 
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 | 
42 | 
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start.
45 | 
46 | 
47 | ## Code of Conduct
48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
50 | opensource-codeofconduct@amazon.com with any additional questions or comments.
51 | 
52 | 
53 | ## Security issue notifications
54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
55 | 
56 | 
57 | ## Licensing
58 | 
59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.


--------------------------------------------------------------------------------
/02_RetrievalAugmentedGeneration/02_QuestionAnswering_Open_Source_LLMs/LICENSE:
--------------------------------------------------------------------------------
 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | // SPDX-License-Identifier: MIT-0
 3 | 
 4 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this
 7 | software and associated documentation files (the "Software"), to deal in the Software
 8 | without restriction, including without limitation the rights to use, copy, modify,
 9 | merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
10 | permit persons to whom the Software is furnished to do so.
11 | 
12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
13 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
14 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
15 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
16 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
17 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/02_RetrievalAugmentedGeneration/02_QuestionAnswering_Open_Source_LLMs/README.md:
--------------------------------------------------------------------------------
  1 | # 🤖 Intelligent Document Question-Answering System
  2 | 
  3 | Welcome to our advanced Question-Answering application that harnesses the power of Retrieval Augmented Generation (RAG), pgvector, Aurora PostgreSQL, and Hugging Face technologies. This system enables natural language interactions with your PDF documents, providing precise answers drawn directly from your document collection.
  4 | 
  5 | ## 💡 Key Features
  6 | 
  7 | This application brings together several powerful technologies to create an intelligent document analysis system:
  8 | 
  9 | - Natural language question answering for PDF documents
 10 | - Semantic search capabilities using vector embeddings
 11 | - Scalable document processing with Aurora PostgreSQL
 12 | - Integration with state-of-the-art Hugging Face language models
 13 | - User-friendly interface built with Streamlit
 14 | 
 15 | ## 🏗️ Architecture
 16 | 
 17 | ![Architecture](static/APG-pgvector-streamlit.png)
 18 | 
 19 | ## 🔄 System Workflow
 20 | 
 21 | Our application processes your documents and questions through a sophisticated pipeline:
 22 | 
 23 | 1. **Document Processing Engine** 📚
 24 |    The system begins by carefully extracting text content from your PDF documents, maintaining the structural integrity and relationships within the content.
 25 | 
 26 | 2. **Intelligent Text Segmentation** ✂️
 27 |    The extracted text undergoes smart segmentation, breaking down the content into optimally-sized chunks that preserve context and meaning. This process ensures that we maintain the semantic relationships within your documents.
 28 | 
 29 | 3. **Neural Embedding Generation** 🧠
 30 |    Each text segment is transformed into a high-dimensional vector representation using advanced language models from Hugging Face. These embeddings capture the deep semantic meaning of your content.
 31 | 
 32 | 4. **Context-Aware Search** 🔍
 33 |    When you pose a question, our system compares it against the entire document collection, identifying the most semantically relevant content through sophisticated vector similarity calculations.
 34 | 
 35 | 5. **AI-Powered Response Generation** ✨
 36 |    The system synthesizes answers using selected relevant content, ensuring responses are accurate and grounded in your documents.
 37 | 
 38 | ## 🚀 Getting Started
 39 | 
 40 | ### System Requirements
 41 | 
 42 | - Python 3.9 or higher
 43 | - PostgreSQL 14 or higher with pgvector extension
 44 | - Hugging Face account with API access
 45 | - Sufficient storage for document processing
 46 | 
 47 | ### Installation Process
 48 | 
 49 | 1. Set up your local development environment:
 50 |    ```bash
 51 |    # Clone the repository
 52 |    git clone [repository-url]
 53 |    cd [repository-name]
 54 | 
 55 |    # Create and activate virtual environment
 56 |    python3.9 -m venv env
 57 |    source env/bin/activate
 58 |    ```
 59 | 
 60 | 2. Configure your environment variables:
 61 |    ```bash
 62 |    # Create a .env file with the following configuration
 63 |    HUGGINGFACEHUB_API_TOKEN='your-api-token'
 64 | 
 65 |    PGVECTOR_DRIVER='psycopg2'
 66 |    PGVECTOR_USER='your-username'
 67 |    PGVECTOR_PASSWORD='your-password'
 68 |    PGVECTOR_HOST='your-aurora-cluster-endpoint'
 69 |    PGVECTOR_PORT=5432
 70 |    PGVECTOR_DATABASE='your-database-name'
 71 |    ```
 72 | 
 73 | 3. Install required packages:
 74 |    ```bash
 75 |    pip install -r requirements.txt
 76 |    ```
 77 | 
 78 | ### Database Configuration
 79 | 
 80 | Before running the application, set up your database environment:
 81 | 
 82 | 1. Connect to your Aurora PostgreSQL cluster
 83 | 2. Enable vector operations:
 84 |    ```sql
 85 |    CREATE EXTENSION vector;
 86 |    ```
 87 | 
 88 | ## 💻 Running the Application
 89 | 
 90 | Launch the application using Streamlit:
 91 | ```bash
 92 | streamlit run app.py
 93 | ```
 94 | 
 95 | The system will guide you through:
 96 | 1. Uploading your PDF documents
 97 | 2. Processing and indexing the content
 98 | 3. Asking questions about your documents
 99 | 
100 | ## 🔧 Troubleshooting
101 | 
102 | ### Token Dimension Mismatch
103 | 
104 | If you encounter an error related to token dimension mismatch (1536 vs 768), this typically indicates a version compatibility issue between model embeddings. Please refer to our detailed troubleshooting guide in the [GitHub Issue thread](https://github.com/hwchase17/langchain/issues/2219) for resolution steps.
105 | 
106 | ## 🛡️ Best Practices
107 | 
108 | To get the most out of this application:
109 | 
110 | - Ensure your PDF documents are text-searchable
111 | - Monitor your Hugging Face API token usage
112 | - Regularly backup your vector database
113 | - Test with smaller document sets before processing large collections
114 | 
115 | ## 🤝 Community and Contributing
116 | 
117 | While this repository serves educational purposes and doesn't accept direct contributions, we encourage you to:
118 | 
119 | - Study the implementation patterns
120 | - Adapt the code for your specific use cases
121 | - Share your learnings with the community
122 | 
123 | ## 📜 License
124 | 
125 | This project is licensed under the [MIT-0 License](https://spdx.org/licenses/MIT-0.html), allowing you to use, modify, and distribute the code freely while maintaining attribution.
126 | 


--------------------------------------------------------------------------------
/02_RetrievalAugmentedGeneration/02_QuestionAnswering_Open_Source_LLMs/app.py:
--------------------------------------------------------------------------------
  1 | import streamlit as st
  2 | from dotenv import load_dotenv
  3 | from PyPDF2 import PdfReader
  4 | from langchain_community.embeddings import HuggingFaceInstructEmbeddings
  5 | from langchain_community.llms import HuggingFaceHub
  6 | from langchain.vectorstores.pgvector import PGVector
  7 | from langchain.memory import ConversationBufferMemory
  8 | from langchain.chains import ConversationalRetrievalChain
  9 | from htmlTemplates import css, bot_template, user_template
 10 | from langchain.text_splitter import RecursiveCharacterTextSplitter
 11 | import os
 12 | 
 13 | def get_pdf_text(pdf_docs):
 14 |     text = ""
 15 |     for pdf in pdf_docs:
 16 |         pdf_reader = PdfReader(pdf)
 17 |         for page in pdf_reader.pages:
 18 |             text += page.extract_text()
 19 |     return text
 20 | 
 21 | 
 22 | def get_text_chunks(text):
 23 |     text_splitter = RecursiveCharacterTextSplitter(
 24 |         separators=["\n\n", "\n", ".", " "],
 25 |         chunk_size=1000,
 26 |         chunk_overlap=200,
 27 |         length_function=len
 28 |      )
 29 | 
 30 |     chunks = text_splitter.split_text(text)
 31 |     return chunks
 32 | 
 33 | 
 34 | def get_vectorstore(text_chunks):
 35 |     embeddings = HuggingFaceInstructEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
 36 |     if text_chunks is None:
 37 |         return PGVector(
 38 |             connection_string=CONNECTION_STRING,
 39 |             embedding_function=embeddings,
 40 |         )
 41 |     return PGVector.from_texts(texts=text_chunks, embedding=embeddings, connection_string=CONNECTION_STRING)
 42 | 
 43 | 
 44 | def get_conversation_chain(vectorstore):
 45 |     llm = HuggingFaceHub(repo_id="MBZUAI/LaMini-Flan-T5-783M", model_kwargs={"temperature":0.2, "max_length":4096})
 46 | 
 47 |     memory = ConversationBufferMemory(
 48 |         memory_key='chat_history', return_messages=True)
 49 |     conversation_chain = ConversationalRetrievalChain.from_llm(
 50 |         llm=llm,
 51 |         chain_type="stuff",
 52 |         retriever=vectorstore.as_retriever(search_kwargs={"k": 1}),
 53 |         memory=memory
 54 |     )
 55 |     return conversation_chain
 56 | 
 57 | 
 58 | def handle_userinput(user_question):
 59 |     try:
 60 |         response = st.session_state.conversation({'question': user_question})
 61 |     except ValueError:
 62 |         st.write("Sorry, please ask again in a different way.")
 63 |         return
 64 | 
 65 |     st.session_state.chat_history = response['chat_history']
 66 | 
 67 |     for i, message in enumerate(st.session_state.chat_history):
 68 |         if i % 2 == 0:
 69 |             st.write(user_template.replace(
 70 |                 "{{MSG}}", message.content), unsafe_allow_html=True)
 71 |         else:
 72 |             st.write(bot_template.replace(
 73 |                 "{{MSG}}", message.content), unsafe_allow_html=True)
 74 | 
 75 | 
 76 | def main():
 77 |     st.set_page_config(page_title="Streamlit Question Answering App",
 78 |                        layout="wide",
 79 |                        page_icon=":books::parrot:")
 80 |     st.write(css, unsafe_allow_html=True)
 81 | 
 82 |     st.sidebar.markdown(
 83 |     """
 84 |     ### Instructions:
 85 |     1. Browse and upload PDF files
 86 |     2. Click Process
 87 |     3. Type your question in the search bar to get more insights
 88 |     """
 89 | )
 90 | 
 91 |     if "conversation" not in st.session_state:
 92 |         st.session_state.conversation = get_conversation_chain(get_vectorstore(None))
 93 |     if "chat_history" not in st.session_state:
 94 |         st.session_state.chat_history = None
 95 | 
 96 |     st.header("GenAI Q&A with pgvector and Amazon Aurora PostgreSQL :books::parrot:")
 97 |     user_question = st.text_input("Ask a question about your documents:")
 98 |     if user_question:
 99 |         handle_userinput(user_question)
100 | 
101 |     with st.sidebar:
102 |         st.subheader("Your documents")
103 |         pdf_docs = st.file_uploader(
104 |             "Upload your PDFs here and click on 'Process'", type="pdf", accept_multiple_files=True)
105 |         if st.button("Process"):
106 |             with st.spinner("Processing"):
107 |                 # get pdf text
108 |                 raw_text = get_pdf_text(pdf_docs)
109 | 
110 |                 # get the text chunks
111 |                 text_chunks = get_text_chunks(raw_text)
112 | 
113 |                 # create vector store
114 |                 vectorstore = get_vectorstore(text_chunks)
115 | 
116 |                 # create conversation chain
117 |                 st.session_state.conversation = get_conversation_chain(vectorstore)
118 | 
119 |                 st.success('PDF uploaded successfully!', icon="✅")
120 | 
121 | 
122 | if __name__ == '__main__':
123 |     load_dotenv()
124 |     
125 |     CONNECTION_STRING = PGVector.connection_string_from_db_params(                                                  
126 |         driver = os.environ.get("PGVECTOR_DRIVER"),
127 |         user = os.environ.get("PGVECTOR_USER"),                                      
128 |         password = os.environ.get("PGVECTOR_PASSWORD"),                                  
129 |         host = os.environ.get("PGVECTOR_HOST"),                                            
130 |         port = os.environ.get("PGVECTOR_PORT"),                                          
131 |         database = os.environ.get("PGVECTOR_DATABASE")                                       
132 | )  
133 | 
134 |     main()
135 | 


--------------------------------------------------------------------------------
/02_RetrievalAugmentedGeneration/02_QuestionAnswering_Open_Source_LLMs/env.example:
--------------------------------------------------------------------------------
1 | HUGGINGFACEHUB_API_TOKEN=
2 | 
3 | PGVECTOR_DRIVER='psycopg2'
4 | PGVECTOR_USER='<<USERNAME>>'
5 | PGVECTOR_PASSWORD='<<PASSWORD>>'
6 | PGVECTOR_HOST='<<AURORA-DB-CLUSTER-HOST>>'
7 | PGVECTOR_PORT=5432
8 | PGVECTOR_DATABASE='<<DBNAME>>'
9 | 


--------------------------------------------------------------------------------
/02_RetrievalAugmentedGeneration/02_QuestionAnswering_Open_Source_LLMs/htmlTemplates.py:
--------------------------------------------------------------------------------
 1 | css = '''
 2 | <style>
 3 | .chat-message {
 4 |     padding: 1.5rem; border-radius: 0.5rem; margin-bottom: 1rem; display: flex
 5 | }
 6 | .chat-message.user {
 7 |     background-color: #2b313e
 8 | }
 9 | .chat-message.bot {
10 |     background-color: #475063
11 | }
12 | .chat-message .avatar {
13 |   width: 20%;
14 | }
15 | .chat-message .avatar img {
16 |   max-width: 78px;
17 |   max-height: 78px;
18 |   border-radius: 50%;
19 |   object-fit: cover;
20 | }
21 | .chat-message .message {
22 |   width: 80%;
23 |   padding: 0 1.5rem;
24 |   color: #fff;
25 | }
26 | '''
27 | 
28 | bot_template = '''
29 | <div class="chat-message bot">
30 |     <div class="avatar">
31 |         <img src="https://i.ibb.co/cN0nmSj/Screenshot-2023-05-28-at-02-37-21.png" style="max-height: 78px; max-width: 78px; border-radius: 50%; object-fit: cover;">
32 |     </div>
33 |     <div class="message">{{MSG}}</div>
34 | </div>
35 | '''
36 | 
37 | user_template = '''
38 | <div class="chat-message user">
39 |     <div class="avatar">
40 |         <img src="https://i.ibb.co/wRtZstJ/Aurora.png">
41 |     </div>    
42 |     <div class="message">{{MSG}}</div>
43 | </div>
44 | '''
45 | 


--------------------------------------------------------------------------------
/02_RetrievalAugmentedGeneration/02_QuestionAnswering_Open_Source_LLMs/psql.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | export PGPASSWORD='<<password>>'; psql -h '<<Aurora DB Cluster host>>' -U '<<username>>' -d '<<DB Name>>' -p 5432


--------------------------------------------------------------------------------
/02_RetrievalAugmentedGeneration/02_QuestionAnswering_Open_Source_LLMs/requirements.txt:
--------------------------------------------------------------------------------
 1 | streamlit
 2 | langchain
 3 | langchain-community
 4 | PyPDF2
 5 | python-dotenv
 6 | psycopg2-binary
 7 | altair
 8 | pgvector
 9 | huggingface_hub
10 | InstructorEmbedding
11 | sentence-transformers==2.2.2
12 | urllib3
13 | 


--------------------------------------------------------------------------------
/02_RetrievalAugmentedGeneration/02_QuestionAnswering_Open_Source_LLMs/static/APG-pgvector-streamlit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aurora-postgresql-pgvector/81e197513381392765f190dbf0b25b90b2137e09/02_RetrievalAugmentedGeneration/02_QuestionAnswering_Open_Source_LLMs/static/APG-pgvector-streamlit.png


--------------------------------------------------------------------------------
/02_RetrievalAugmentedGeneration/02_ResponseStreaming/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.


--------------------------------------------------------------------------------
/02_RetrievalAugmentedGeneration/02_ResponseStreaming/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guidelines
 2 | 
 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
 4 | documentation, we greatly value feedback and contributions from our community.
 5 | 
 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
 7 | information to effectively respond to your bug report or contribution.
 8 | 
 9 | 
10 | ## Reporting Bugs/Feature Requests
11 | 
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 | 
14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 | 
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 | 
22 | 
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 | 
26 | 1. You are working against the latest source on the *main* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 | 
30 | To send us a pull request, please:
31 | 
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 | 
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 | 
42 | 
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start.
45 | 
46 | 
47 | ## Code of Conduct
48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
50 | opensource-codeofconduct@amazon.com with any additional questions or comments.
51 | 
52 | 
53 | ## Security issue notifications
54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
55 | 
56 | 
57 | ## Licensing
58 | 
59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.


--------------------------------------------------------------------------------
/02_RetrievalAugmentedGeneration/02_ResponseStreaming/LICENSE:
--------------------------------------------------------------------------------
 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | // SPDX-License-Identifier: MIT-0
 3 | 
 4 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this
 7 | software and associated documentation files (the "Software"), to deal in the Software
 8 | without restriction, including without limitation the rights to use, copy, modify,
 9 | merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
10 | permit persons to whom the Software is furnished to do so.
11 | 
12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
13 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
14 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
15 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
16 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
17 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/02_RetrievalAugmentedGeneration/02_ResponseStreaming/data/AMZN-2020-Shareholder-Letter.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aurora-postgresql-pgvector/81e197513381392765f190dbf0b25b90b2137e09/02_RetrievalAugmentedGeneration/02_ResponseStreaming/data/AMZN-2020-Shareholder-Letter.pdf


--------------------------------------------------------------------------------
/02_RetrievalAugmentedGeneration/02_ResponseStreaming/data/AMZN-2021-Shareholder-Letter.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aurora-postgresql-pgvector/81e197513381392765f190dbf0b25b90b2137e09/02_RetrievalAugmentedGeneration/02_ResponseStreaming/data/AMZN-2021-Shareholder-Letter.pdf


--------------------------------------------------------------------------------
/02_RetrievalAugmentedGeneration/02_ResponseStreaming/data/AMZN-2022-Shareholder-Letter.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aurora-postgresql-pgvector/81e197513381392765f190dbf0b25b90b2137e09/02_RetrievalAugmentedGeneration/02_ResponseStreaming/data/AMZN-2022-Shareholder-Letter.pdf


--------------------------------------------------------------------------------
/02_RetrievalAugmentedGeneration/02_ResponseStreaming/data/AMZN-2023-Shareholder-Letter.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aurora-postgresql-pgvector/81e197513381392765f190dbf0b25b90b2137e09/02_RetrievalAugmentedGeneration/02_ResponseStreaming/data/AMZN-2023-Shareholder-Letter.pdf


--------------------------------------------------------------------------------
/02_RetrievalAugmentedGeneration/02_ResponseStreaming/env.example:
--------------------------------------------------------------------------------
1 | PGVECTOR_DRIVER='psycopg2'
2 | PGVECTOR_USER=''
3 | PGVECTOR_PASSWORD=''
4 | PGVECTOR_HOST=''
5 | PGVECTOR_PORT=5432
6 | PGVECTOR_DATABASE=''


--------------------------------------------------------------------------------
/02_RetrievalAugmentedGeneration/02_ResponseStreaming/htmlTemplates.py:
--------------------------------------------------------------------------------
 1 | css = '''
 2 | <style>
 3 | .chat-message {
 4 |     padding: 1.5rem; border-radius: 0.5rem; margin-bottom: 1rem; display: flex
 5 | }
 6 | .chat-message.user {
 7 |     background-color: #2b313e
 8 | }
 9 | .chat-message.bot {
10 |     background-color: #475063
11 | }
12 | .chat-message .avatar {
13 |   width: 20%;
14 | }
15 | .chat-message .avatar img {
16 |   max-width: 78px;
17 |   max-height: 78px;
18 |   border-radius: 50%;
19 |   object-fit: cover;
20 | }
21 | .chat-message .message {
22 |   width: 80%;
23 |   padding: 0 1.5rem;
24 |   color: #fff;
25 | }
26 | '''
27 | 
28 | bot_template = '''
29 | <div class="chat-message bot">
30 |     <div class="avatar">
31 |         <img src="https://i.ibb.co/cN0nmSj/Screenshot-2023-05-28-at-02-37-21.png" style="max-height: 78px; max-width: 78px; border-radius: 50%; object-fit: cover;">
32 |     </div>
33 |     <div class="message">{{MSG}}</div>
34 | </div>
35 | '''
36 | 
37 | user_template = '''
38 | <div class="chat-message user">
39 |     <div class="avatar">
40 |         <img src="https://i.ibb.co/wRtZstJ/Aurora.png">
41 |     </div>    
42 |     <div class="message">{{MSG}}</div>
43 | </div>
44 | '''
45 | 


--------------------------------------------------------------------------------
/02_RetrievalAugmentedGeneration/02_ResponseStreaming/requirements.txt:
--------------------------------------------------------------------------------
 1 | streamlit
 2 | langchain
 3 | langchain-postgres
 4 | langchain-community
 5 | langchain-aws
 6 | PyPDF2
 7 | python-dotenv
 8 | altair
 9 | urllib3
10 | boto3
11 | awscli
12 | botocore
13 | anthropic
14 | 


--------------------------------------------------------------------------------
/02_RetrievalAugmentedGeneration/02_ResponseStreaming/static/Powered-By_logo-stack_RGB_REV.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aurora-postgresql-pgvector/81e197513381392765f190dbf0b25b90b2137e09/02_RetrievalAugmentedGeneration/02_ResponseStreaming/static/Powered-By_logo-stack_RGB_REV.png


--------------------------------------------------------------------------------
/02_RetrievalAugmentedGeneration/02_ResponseStreaming/static/Streaming_Responses_RAG.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aurora-postgresql-pgvector/81e197513381392765f190dbf0b25b90b2137e09/02_RetrievalAugmentedGeneration/02_ResponseStreaming/static/Streaming_Responses_RAG.png


--------------------------------------------------------------------------------
/03_SimilaritySearchSentimentAnalysis/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.


--------------------------------------------------------------------------------
/03_SimilaritySearchSentimentAnalysis/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guidelines
 2 | 
 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
 4 | documentation, we greatly value feedback and contributions from our community.
 5 | 
 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
 7 | information to effectively respond to your bug report or contribution.
 8 | 
 9 | 
10 | ## Reporting Bugs/Feature Requests
11 | 
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 | 
14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 | 
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 | 
22 | 
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 | 
26 | 1. You are working against the latest source on the *main* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 | 
30 | To send us a pull request, please:
31 | 
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 | 
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 | 
42 | 
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start.
45 | 
46 | 
47 | ## Code of Conduct
48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
50 | opensource-codeofconduct@amazon.com with any additional questions or comments.
51 | 
52 | 
53 | ## Security issue notifications
54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
55 | 
56 | 
57 | ## Licensing
58 | 
59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.


--------------------------------------------------------------------------------
/03_SimilaritySearchSentimentAnalysis/LICENSE:
--------------------------------------------------------------------------------
 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | // SPDX-License-Identifier: MIT-0
 3 | 
 4 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this
 7 | software and associated documentation files (the "Software"), to deal in the Software
 8 | without restriction, including without limitation the rights to use, copy, modify,
 9 | merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
10 | permit persons to whom the Software is furnished to do so.
11 | 
12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
13 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
14 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
15 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
16 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
17 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/03_SimilaritySearchSentimentAnalysis/README.md:
--------------------------------------------------------------------------------
  1 | # 🔍 Understanding Customer Sentiment Through AI
  2 | 
  3 | Welcome to our advanced natural language processing system that combines semantic search capabilities with sentiment analysis. This solution demonstrates how to harness the power of Amazon Comprehend and Aurora PostgreSQL to gain meaningful insights from customer reviews and textual feedback.
  4 | 
  5 | ## 🎯 What This System Does
  6 | 
  7 | Imagine having thousands of hotel reviews and wanting to understand not just what customers are saying, but how they feel about their experiences. Our system makes this possible by combining two powerful capabilities:
  8 | 
  9 | 1. **Semantic Understanding**: The ability to grasp the meaning behind words, understanding that phrases like "exceptional stay" and "wonderful experience" convey similar sentiments even though they use different words.
 10 | 
 11 | 2. **Sentiment Analysis**: The ability to detect the emotional tone in text, distinguishing between positive, negative, and neutral expressions. For example, understanding that "the room was spotless" carries positive sentiment while "the service was slow" carries negative sentiment.
 12 | 
 13 | ## 🏗️ System Architecture
 14 | 
 15 | ![Architecture](static/APG-pgvector-sagemaker.png)
 16 | 
 17 | Our architecture brings together several sophisticated AWS services to create a comprehensive text analysis system:
 18 | 
 19 | **Core Components**:
 20 | - **Amazon Comprehend**: Acts as our sentiment analysis engine, using advanced natural language processing to understand the emotional content of text. Think of it as an expert reader who can detect subtle nuances in written expression.
 21 | - **Aurora PostgreSQL**: Serves as our intelligent database, enhanced with special capabilities for handling both traditional data and vector representations of text.
 22 | - **Aurora Machine Learning**: Provides the bridge between our database and machine learning services, allowing us to perform sentiment analysis directly through SQL queries.
 23 | - **pgvector Extension**: Enables our database to understand and compare text based on meaning rather than just exact matches.
 24 | 
 25 | ## 🚀 Setting Up Your Environment
 26 | 
 27 | Let's walk through the setup process step by step:
 28 | 
 29 | ### Initial Setup
 30 | 
 31 | 1. First, create your working environment:
 32 |    ```bash
 33 |    # Clone this repository to your local machine
 34 |    git clone [repository-url]
 35 |    cd [repository-name]
 36 | 
 37 |    # Create an isolated Python environment for clean dependencies
 38 |    python3.9 -m venv env
 39 |    source env/bin/activate
 40 |    ```
 41 | 
 42 | 2. Configure your environment by creating a `.env` file:
 43 |    ```bash
 44 |    # API access configuration
 45 |    HUGGINGFACEHUB_API_TOKEN='your-access-token'
 46 | 
 47 |    # Database connection details
 48 |    PGVECTOR_DRIVER='psycopg2'
 49 |    PGVECTOR_USER='your-username'
 50 |    PGVECTOR_PASSWORD='your-password'
 51 |    PGVECTOR_HOST='your-aurora-cluster-endpoint'
 52 |    PGVECTOR_PORT=5432
 53 |    PGVECTOR_DATABASE='your-database-name'
 54 |    ```
 55 | 
 56 | 3. Install the necessary tools:
 57 |    ```bash
 58 |    pip install -r requirements.txt
 59 |    ```
 60 | 
 61 | ### Database Setup
 62 | 
 63 | Your Aurora PostgreSQL database needs special capabilities for text analysis. Enable these with:
 64 | 
 65 | ```sql
 66 | -- Enable machine learning capabilities
 67 | CREATE EXTENSION IF NOT EXISTS aws_ml CASCADE;
 68 | 
 69 | -- Enable vector operations for semantic search
 70 | CREATE EXTENSION IF NOT EXISTS vector;
 71 | ```
 72 | 
 73 | ## 💡 Understanding the Components
 74 | 
 75 | ### Natural Language Processing with Amazon Comprehend
 76 | 
 77 | Amazon Comprehend works like a skilled linguist who can understand the subtle meanings in text. When you submit a hotel review like "The staff went above and beyond to make our stay memorable," Comprehend can:
 78 | - Identify the positive sentiment
 79 | - Recognize key phrases about service quality
 80 | - Understand contextual relationships between words
 81 | 
 82 | ### Vector Operations with pgvector
 83 | 
 84 | The pgvector extension allows us to convert text into mathematical representations that capture meaning. This enables our system to understand that:
 85 | - "The room was immaculate" and "The accommodation was spotlessly clean" are very similar in meaning
 86 | - "The breakfast was cold" and "The morning meal was not served at proper temperature" express similar complaints
 87 | 
 88 | ## 📝 Working with the Jupyter Notebook
 89 | 
 90 | Our `pgvector_with_langchain_auroraml.ipynb` notebook guides you through:
 91 | - Loading and processing hotel review data
 92 | - Performing sentiment analysis using Amazon Comprehend
 93 | - Implementing semantic search to find similar reviews
 94 | - Analyzing sentiment patterns across different aspects of hotel service
 95 | 
 96 | ## 🔧 Troubleshooting Common Challenges
 97 | 
 98 | ### Token Dimension Mismatch
 99 | 
100 | If you encounter an error about token dimensions (1536 vs 768), this usually indicates a version mismatch between different embedding models. For detailed resolution steps, consult our [GitHub Issue thread](https://github.com/hwchase17/langchain/issues/2219).
101 | 
102 | ## 📚 Best Practices
103 | 
104 | To get the most out of this system:
105 | - Regularly update your model permissions and IAM roles
106 | - Monitor sentiment analysis accuracy over time
107 | - Consider batch processing for large volumes of reviews
108 | - Implement error handling for various types of text input
109 | 
110 | ## 🔒 Security Note
111 | 
112 | This system requires specific IAM roles and policies for Amazon Aurora to communicate with Amazon Comprehend. For detailed setup instructions, refer to our comprehensive blog post: [Leverage pgvector and Amazon Aurora PostgreSQL for Natural Language Processing, Chatbots and Sentiment Analysis](https://aws.amazon.com/blogs/database/leverage-pgvector-and-amazon-aurora-postgresql-for-natural-language-processing-chatbots-and-sentiment-analysis/).
113 | 
114 | ## 📜 License
115 | 
116 | This project is released under the [MIT-0 License](https://spdx.org/licenses/MIT-0.html), allowing you to use and modify the code while maintaining attribution.
117 | 


--------------------------------------------------------------------------------
/03_SimilaritySearchSentimentAnalysis/env.example:
--------------------------------------------------------------------------------
1 | HUGGINGFACEHUB_API_TOKEN=
2 | 
3 | PGVECTOR_DRIVER='psycopg2'
4 | PGVECTOR_USER='<<USERNAME>>'
5 | PGVECTOR_PASSWORD='<<PASSWORD>>'
6 | PGVECTOR_HOST='<<AURORA-DB-CLUSTER-HOST>>'
7 | PGVECTOR_PORT=5432
8 | PGVECTOR_DATABASE='<<DBNAME>>'
9 | 


--------------------------------------------------------------------------------
/03_SimilaritySearchSentimentAnalysis/requirements1.txt:
--------------------------------------------------------------------------------
1 | setuptools==65.5.0
2 | 


--------------------------------------------------------------------------------
/03_SimilaritySearchSentimentAnalysis/requirements2.txt:
--------------------------------------------------------------------------------
 1 | langchain==0.3.14
 2 | langchain-community==0.3.14
 3 | langchain-postgres==0.0.12
 4 | langchain-core==0.3.30
 5 | python-dotenv
 6 | jupyter==1.1.1
 7 | psycopg==3.2.4
 8 | psycopg-binary==3.2.4
 9 | pgvector==0.2.5
10 | huggingface-hub==0.25.0
11 | InstructorEmbedding==1.0.1
12 | sentence-transformers==2.2.2
13 | torch==2.0.1
14 | 


--------------------------------------------------------------------------------
/03_SimilaritySearchSentimentAnalysis/static/APG-pgvector-sagemaker.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aurora-postgresql-pgvector/81e197513381392765f190dbf0b25b90b2137e09/03_SimilaritySearchSentimentAnalysis/static/APG-pgvector-sagemaker.png


--------------------------------------------------------------------------------
/04_AuroraML_Bedrock_MovieRecommendations/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.


--------------------------------------------------------------------------------
/04_AuroraML_Bedrock_MovieRecommendations/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guidelines
 2 | 
 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
 4 | documentation, we greatly value feedback and contributions from our community.
 5 | 
 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
 7 | information to effectively respond to your bug report or contribution.
 8 | 
 9 | 
10 | ## Reporting Bugs/Feature Requests
11 | 
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 | 
14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 | 
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 | 
22 | 
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 | 
26 | 1. You are working against the latest source on the *main* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 | 
30 | To send us a pull request, please:
31 | 
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 | 
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 | 
42 | 
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start.
45 | 
46 | 
47 | ## Code of Conduct
48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
50 | opensource-codeofconduct@amazon.com with any additional questions or comments.
51 | 
52 | 
53 | ## Security issue notifications
54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
55 | 
56 | 
57 | ## Licensing
58 | 
59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.


--------------------------------------------------------------------------------
/04_AuroraML_Bedrock_MovieRecommendations/LICENSE:
--------------------------------------------------------------------------------
 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | // SPDX-License-Identifier: MIT-0
 3 | 
 4 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this
 7 | software and associated documentation files (the "Software"), to deal in the Software
 8 | without restriction, including without limitation the rights to use, copy, modify,
 9 | merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
10 | permit persons to whom the Software is furnished to do so.
11 | 
12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
13 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
14 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
15 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
16 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
17 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/04_AuroraML_Bedrock_MovieRecommendations/README.md:
--------------------------------------------------------------------------------
  1 | # 🎬 Intelligent Movie Recommendations System
  2 | 
  3 | This enterprise-ready recommendation system combines Amazon Aurora PostgreSQL with powerful AWS AI services to deliver personalized movie suggestions. Our solution leverages Amazon Bedrock's Titan Text for embeddings and Anthropic's Claude for natural language processing, while using Aurora ML to generate embeddings directly within the database context.
  4 | 
  5 | ## 🎯 Key Components
  6 | 
  7 | We integrate several AWS services to create an efficient recommendation engine:
  8 | 
  9 | - **Amazon Bedrock**: Powers our AI capabilities through foundation models
 10 | - **Aurora PostgreSQL + pgvector**: Stores and processes vector embeddings
 11 | - **Aurora ML**: Generates embeddings directly in the database using Bedrock
 12 | - **Titan Text**: Creates text embeddings for movie content
 13 | - **Claude**: Provides natural language understanding capabilities
 14 | 
 15 | ## 🏗️ Architecture
 16 | 
 17 | ![Architecture](static/ARCH.png)
 18 | 
 19 | ## 🔄 System Workflow
 20 | 
 21 | Our system processes and recommends movies through these key steps:
 22 | 
 23 | 1. **Data Initialization**: Uses TMDB API data to populate movie details, cast information, and reviews in the PostgreSQL database.
 24 | 
 25 | 2. **Embedding Generation**: Creates vector representations of movies using `aws_bedrock.invoke_model_get_embeddings`, storing them in vector columns for efficient similarity search.
 26 | 
 27 | 3. **Recommendation Engine**: Provides suggestions based on:
 28 |    - Movie content similarity (cast, genre, overview)
 29 |    - Collaborative filtering from user watch patterns
 30 | 
 31 | 4. **Review Analysis**: Generates concise summaries of movie reviews using `aws_bedrock.invoke_model`.
 32 | 
 33 | ## 🚀 Setup Guide
 34 | 
 35 | ### Prerequisites
 36 | - AWS account with Bedrock access
 37 | - Aurora PostgreSQL cluster
 38 | - Completed [Aurora ML setup](https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/postgresql-ml.html#postgresql-ml-setting-up-apg-br)
 39 | 
 40 | ### Installation Steps
 41 | 
 42 | 1. Clone and setup environment:
 43 | ```bash
 44 | git clone [repository-url]
 45 | python3.9 -m venv env
 46 | source env/bin/activate
 47 | ```
 48 | 
 49 | 2. Configure `.env` file:
 50 | ```bash
 51 | DBDRIVER='psycopg2'
 52 | DBUSER='username'
 53 | DBPASSWORD='password'
 54 | DBHOST='aurora-cluster-host'
 55 | DBPORT=5432
 56 | DBNAME='dbname'
 57 | ```
 58 | 
 59 | 3. Install dependencies:
 60 | ```bash
 61 | pip install -r requirements.txt
 62 | ```
 63 | 
 64 | ### Database Setup
 65 | 
 66 | 1. Enable required extensions:
 67 | ```sql
 68 | CREATE EXTENSION vector;
 69 | CREATE EXTENSION aws_ml CASCADE;
 70 | ```
 71 | 
 72 | 2. Initialize database:
 73 | ```sql
 74 | CREATE DATABASE moviedb;
 75 | \c moviedb
 76 | \i data/movies.sql
 77 | 
 78 | ALTER TABLE movie.movies ADD COLUMN movie_embedding vector(1536);
 79 | ```
 80 | 
 81 | 3. Generate embeddings:
 82 | ```sql
 83 | -- See detailed SQL in Usage section for embedding generation
 84 | ```
 85 | 
 86 | ## 💻 Running the Application
 87 | 
 88 | 1. Launch the application:
 89 | ```bash
 90 | streamlit run ./app.py --server.port 8080
 91 | ```
 92 | 
 93 | 2. Access the interface and start exploring movie recommendations.
 94 | 
 95 | ![Streamlit Application](static/Preview_App.png)
 96 | 
 97 | ## 🔍 Understanding Vector Embeddings
 98 | 
 99 | Our system creates 1536-dimensional vectors that capture movie characteristics including:
100 | - Plot elements and themes
101 | - Genre combinations
102 | - Cast relationships
103 | - User viewing patterns
104 | 
105 | These embeddings enable the system to understand complex relationships between movies and provide nuanced recommendations.
106 | 
107 | ## 📚 Best Practices
108 | 
109 | For optimal performance:
110 | - Regularly update movie data and embeddings
111 | - Monitor embedding generation performance
112 | - Index vector columns for faster similarity search
113 | - Implement batch processing for large updates
114 | 
115 | ## 📜 License
116 | 
117 | Released under the [MIT-0 License](https://spdx.org/licenses/MIT-0.html).
118 | 


--------------------------------------------------------------------------------
/04_AuroraML_Bedrock_MovieRecommendations/app.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | import pandas as pd
 3 | import psycopg2
 4 | import psycopg2.extras
 5 | import os
 6 | import json
 7 | from dotenv import load_dotenv
 8 | 
 9 | def write_columns_data(result):
10 |     col1, col2, col3, col4, col5 = st.columns(5)
11 |     colarray = [ col1, col2, col3, col4, col5 ]
12 |     for x in range(5):
13 |         colarray[x].image("https://image.tmdb.org/t/p/w185{}".format(result[x+1].get('poster')))
14 |     return
15 | 
16 | def main():
17 |     st.title(':orange[Movie Catalog Demo :cinema:]')
18 |     query = st.text_input('Search for a movie')
19 |     if query:
20 |         with psycopg2.connect(database=dbname, host=dbhost, port=dbport, user=dbuser, password=dbpass) as dbconn:
21 |             st.divider()
22 |             st.subheader('Top Matching Movie:')
23 |             dbcur = dbconn.cursor(cursor_factory = psycopg2.extras.RealDictCursor)
24 |             dbcur.execute("SELECT * FROM movie.get_top6_movies(%s);", (query,))
25 |             result = dbcur.fetchall()
26 |             col1, col2, col3 = st.columns(3)
27 |             with col1:
28 |                 st.subheader(result[0].get('title'))
29 |                 st.image("https://image.tmdb.org/t/p/w185{}".format(result[0].get('poster')))
30 |             with col2:
31 |                 st.subheader("Story")
32 |                 st.write(result[0].get('overview'))
33 |             with col3:
34 |                 st.subheader("Summary of user reviews")
35 |                 dbcur.execute("SELECT movie.get_reviews_summary(%s)", (result[0].get('id'), ))
36 |                 res = dbcur.fetchall()
37 |                 if res:
38 |                     st.write( res[0].get('get_reviews_summary').get('completion').replace(' Here is a summary of the key points from the text:\n\n-', '') )
39 |             st.divider()
40 |             st.subheader('Top 5 Recommended Movies:')
41 |             with st.container():
42 |                 write_columns_data(result)
43 |         st.divider()
44 |         
45 | if __name__ == '__main__':
46 | 
47 |     # This function loads the environment variables from a .env file.
48 |     load_dotenv()
49 | 
50 |     dbname=os.environ.get('DBNAME')
51 |     dbhost=os.environ.get('DBHOST')
52 |     dbuser=os.environ.get('DBUSER')
53 |     dbpass=os.environ.get('DBPASS')
54 |     dbport=os.environ.get('DBPORT')
55 | 
56 |     main()
57 | 
58 | 


--------------------------------------------------------------------------------
/04_AuroraML_Bedrock_MovieRecommendations/data/functions.sql:
--------------------------------------------------------------------------------
  1 | -- Functions
  2 | 
  3 | CREATE OR REPLACE FUNCTION movie.get_top6_movies(search_query text) 
  4 | RETURNS TABLE(
  5 |     id bigint,
  6 |     title text,
  7 |     poster text,
  8 |     overview text
  9 | )
 10 | LANGUAGE plpgsql
 11 | AS $$
 12 | DECLARE 
 13 |     r record; 
 14 |     v vector(1536); 
 15 |     rcnt integer;
 16 | BEGIN
 17 |     RAISE NOTICE '%s', '{"inputText": "'|| search_query ||'"}'::text ;
 18 |     EXECUTE $x$ 
 19 |         SELECT aws_bedrock.invoke_model_get_embeddings(
 20 |          		model_id      := 'amazon.titan-embed-text-v1',
 21 |          		content_type  := 'application/json',
 22 |          		json_key      := 'embedding',
 23 |                 model_input := $1::text
 24 |         )
 25 |          	$x$
 26 |     INTO v 
 27 |     USING  '{"inputText": "'|| search_query ||'"}'::text ; 
 28 |     RETURN QUERY SELECT  m.id, m.title, m.poster, m.overview FROM movie.movies m ORDER BY m.movie_embedding <-> v LIMIT 6 ;
 29 | END$$;
 30 | 
 31 | CREATE OR REPLACE PROCEDURE movie.generate_movie_embeddings(pmovieid bigint default NULL) 
 32 | LANGUAGE plpgsql
 33 | AS $$
 34 | DECLARE 
 35 |     r record; 
 36 |     v vector(1536); 
 37 |     v1 text ;
 38 |     rcnt integer := 0;
 39 | BEGIN
 40 |     FOR r IN SELECT id, 
 41 |                 title, overview, 
 42 |                 ARRAY_TO_STRING(keywords, ' ') keywords, 
 43 |                 ARRAY_TO_STRING(genre_id, ' ') genres, 
 44 |                 STRING_AGG(c->>'name', ' , ') credits
 45 |             FROM movie.movies m CROSS JOIN jsonb_array_elements(credits) AS c
 46 |             WHERE movie_embedding IS NULL
 47 |              and ((pmovieid is NULL) OR (pmovieid IS NOT NULL and id = pmovieid))
 48 |             GROUP BY id, title, overview, ARRAY_TO_STRING(keywords, ' '),  ARRAY_TO_STRING(genre_id, ' ') 
 49 |     LOOP
 50 |         RAISE NOTICE 'working on movie id %', r.id ;
 51 |         v1 := replace(replace(replace(r.title||' '||r.overview||' '||r.keywords||' '||r.genres||' '||r.credits, chr(39), ''), '"', ''), '-', ' ') ;
 52 | 	v1 := regexp_replace(v1, '\s\s+', ' ', 'g');
 53 | 
 54 |         EXECUTE $x$ 
 55 |         	SELECT aws_bedrock.invoke_model_get_embeddings(
 56 |          		model_id      := 'amazon.titan-embed-text-v1',
 57 |          		content_type  := 'application/json',
 58 |          		json_key      := 'embedding',
 59 |                 model_input   := $1::text
 60 |             )
 61 |          		$x$
 62 |         INTO v 
 63 |         USING  '{"inputText": "'|| v1 ||'"}'::text ; 
 64 | 
 65 |         UPDATE movie.movies set movie_embedding = v
 66 |         WHERE id = r.id ;
 67 |         rcnt := rcnt + 1;
 68 |         IF rcnt > 10 THEN
 69 |             COMMIT;
 70 |             rcnt := 0;
 71 |         END IF;
 72 |     END LOOP;
 73 |     COMMIT;
 74 | END$$;
 75 | 
 76 | CREATE OR REPLACE FUNCTION movie.get_reviews_summary(p_movieid bigint) RETURNS jsonb AS 
 77 | $$
 78 | DECLARE 
 79 |         v_summary jsonb;
 80 |         v_count int;
 81 |         v_sql text;
 82 | BEGIN
 83 |         SELECT aws_bedrock.invoke_model(
 84 |                 model_id := 'anthropic.claude-v2',
 85 |                 content_type := 'application/json',
 86 |                 accept_type := 'application/json',
 87 |                 model_input := '{"prompt": "\n\nHuman: Please provide a summary of the following text.\n<text>\n'|| sub.reviews || '\n </text>\n\nAssistant:", "max_tokens_to_sample": 4096, "temperature": 0.5, "top_k": 250, "top_p": 0.5, "stop_sequences":[] }' )
 88 |         INTO v_summary
 89 |         FROM    (
 90 |                 SELECT id, 
 91 |                         regexp_replace(regexp_replace(regexp_replace(regexp_replace(STRING_AGG(review, '\n'), E'[\\n\\r]+', '\n', 'g'), '[\\10|/10]', ' out of 10', 'g'), $y$['"-]$y$, '', 'g'), '[^[:ascii:]]', '', 'g')  as reviews
 92 |                 FROM movie.reviews
 93 |                 WHERE id = p_movieid
 94 |                 GROUP BY id
 95 |                 ) AS sub;
 96 |         GET DIAGNOSTICS v_count := ROW_COUNT;
 97 | 
 98 |         IF v_count = 0 THEN
 99 |                 v_sql := 'SELECT aws_bedrock.invoke_model(
100 |                         model_id := ''anthropic.claude-v2'',
101 |                         content_type := ''application/json'',
102 |                         accept_type := ''application/json'',
103 |                         model_input := ''{"prompt": "\n\nHuman: Please provide a summary of the following text.\n<text>\nNo reviews are available.\n </text>\n\nAssistant:", "max_tokens_to_sample": 4096, "temperature": 0.5, "top_k": 250, "top_p": 0.5, "stop_sequences":[] }'' )';
104 |                 EXECUTE v_sql 
105 |                 INTO v_summary;
106 |         END IF;
107 |         RETURN v_summary;         
108 | END
109 | $$
110 | LANGUAGE plpgsql;
111 | 
112 | 


--------------------------------------------------------------------------------
/04_AuroraML_Bedrock_MovieRecommendations/data/movies.sql.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aurora-postgresql-pgvector/81e197513381392765f190dbf0b25b90b2137e09/04_AuroraML_Bedrock_MovieRecommendations/data/movies.sql.gz


--------------------------------------------------------------------------------
/04_AuroraML_Bedrock_MovieRecommendations/env.example:
--------------------------------------------------------------------------------
1 | DBUSER=''
2 | DBPASSWORD=''
3 | DBHOST=''
4 | DBPORT=5432
5 | DBNAME=''
6 | 


--------------------------------------------------------------------------------
/04_AuroraML_Bedrock_MovieRecommendations/requirements.txt:
--------------------------------------------------------------------------------
 1 | streamlit==1.37.0
 2 | python-dotenv
 3 | psycopg2-binary
 4 | pgvector
 5 | urllib3==1.26.6
 6 | boto3>=1.28.57
 7 | awscli>=1.29.57
 8 | botocore>=1.31.57
 9 | anthropic==0.43.1
10 | 


--------------------------------------------------------------------------------
/04_AuroraML_Bedrock_MovieRecommendations/static/ARCH.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aurora-postgresql-pgvector/81e197513381392765f190dbf0b25b90b2137e09/04_AuroraML_Bedrock_MovieRecommendations/static/ARCH.png


--------------------------------------------------------------------------------
/04_AuroraML_Bedrock_MovieRecommendations/static/Powered-By_logo-stack_RGB_REV.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aurora-postgresql-pgvector/81e197513381392765f190dbf0b25b90b2137e09/04_AuroraML_Bedrock_MovieRecommendations/static/Powered-By_logo-stack_RGB_REV.png


--------------------------------------------------------------------------------
/04_AuroraML_Bedrock_MovieRecommendations/static/Preview_App.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aurora-postgresql-pgvector/81e197513381392765f190dbf0b25b90b2137e09/04_AuroraML_Bedrock_MovieRecommendations/static/Preview_App.png


--------------------------------------------------------------------------------
/05_AuroraML_Bedrock_Chatbot/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.


--------------------------------------------------------------------------------
/05_AuroraML_Bedrock_Chatbot/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guidelines
 2 | 
 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
 4 | documentation, we greatly value feedback and contributions from our community.
 5 | 
 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
 7 | information to effectively respond to your bug report or contribution.
 8 | 
 9 | 
10 | ## Reporting Bugs/Feature Requests
11 | 
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 | 
14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 | 
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 | 
22 | 
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 | 
26 | 1. You are working against the latest source on the *main* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 | 
30 | To send us a pull request, please:
31 | 
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 | 
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 | 
42 | 
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start.
45 | 
46 | 
47 | ## Code of Conduct
48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
50 | opensource-codeofconduct@amazon.com with any additional questions or comments.
51 | 
52 | 
53 | ## Security issue notifications
54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
55 | 
56 | 
57 | ## Licensing
58 | 
59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.


--------------------------------------------------------------------------------
/05_AuroraML_Bedrock_Chatbot/LICENSE:
--------------------------------------------------------------------------------
 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | // SPDX-License-Identifier: MIT-0
 3 | 
 4 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this
 7 | software and associated documentation files (the "Software"), to deal in the Software
 8 | without restriction, including without limitation the rights to use, copy, modify,
 9 | merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
10 | permit persons to whom the Software is furnished to do so.
11 | 
12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
13 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
14 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
15 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
16 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
17 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/05_AuroraML_Bedrock_Chatbot/README.md:
--------------------------------------------------------------------------------
  1 | # 🤖 Building an Intelligent Chatbot with Aurora ML and Amazon Bedrock
  2 | 
  3 | Welcome to our guide on creating a sophisticated chatbot that leverages the power of Amazon Aurora Machine Learning and Amazon Bedrock. This system demonstrates how to build an AI-powered conversation engine that operates directly within your database environment, reducing latency and improving response times.
  4 | 
  5 | ## Understanding the Technology
  6 | 
  7 | Amazon Aurora Machine Learning brings the power of machine learning directly into your database operations through SQL commands. Think of it as having an AI assistant built right into your database that can understand and process information without needing to send data back and forth to external services. This integration with Amazon Bedrock provides access to state-of-the-art foundation models, allowing you to:
  8 | 
  9 | - Generate text embeddings for semantic understanding
 10 | - Perform intelligent similarity searches using pgvector
 11 | - Create natural language responses using advanced language models
 12 | 
 13 | The system processes information within the database context, significantly reducing the time typically needed to transfer data between different services.
 14 | 
 15 | ## 🏗️ Architecture
 16 | 
 17 | ![Architecture](static/architecture.png)
 18 | 
 19 | Our architecture demonstrates how different AWS services work together to create an intelligent chatbot system. Each component plays a crucial role in processing and understanding user queries while maintaining high performance and low latency.
 20 | 
 21 | ## 🚀 Setting Up Your Environment
 22 | 
 23 | Let's walk through the setup process systematically:
 24 | 
 25 | ### Infrastructure Prerequisites
 26 | 
 27 | 1. Create your foundational resources:
 28 |    ```bash
 29 |    # First, create an S3 bucket for your knowledge dataset
 30 |    # Then, set up an Aurora PostgreSQL 15.5 database cluster
 31 |    ```
 32 | 
 33 | 2. Prepare your development environment:
 34 |    ```bash
 35 |    # Set up Cloud9 with appropriate permissions
 36 |    python3.9 -m venv env
 37 |    source env/bin/activate
 38 |    ```
 39 | 
 40 | 3. Clone the repository:
 41 |    ```bash
 42 |    git clone https://github.com/aws-samples/aurora-postgresql-pgvector.git
 43 |    cd aurora-postgresql-pgvector/05_AuroraML_Bedrock_Chatbot
 44 |    ```
 45 | 
 46 | 4. Install dependencies:
 47 |    ```bash
 48 |    pip install -r requirements.txt
 49 |    ```
 50 | 
 51 | ### Configuration
 52 | 
 53 | Create a `.env` file with your system details:
 54 | ```bash
 55 | POSTGRESQL_ENDPOINT="auroraml-bedrock-1.cluster-XXXXXX.us-east-1.rds.amazonaws.com"
 56 | POSTGRESQL_PORT="5432"
 57 | POSTGRESQL_USER="username"
 58 | POSTGRESQL_PW="password"
 59 | POSTGRESQL_DBNAME="dbname"
 60 | REGION="aws-region-id"
 61 | SOURCE_S3_BUCKET="knowledge-dataset-bucket-name"
 62 | ```
 63 | 
 64 | Note: For production environments, always follow AWS security best practices for credential management and database configuration.
 65 | 
 66 | ## 💻 Running Your Chatbot
 67 | 
 68 | The system can be initialized and operated in four main steps:
 69 | 
 70 | ### 1. System Configuration
 71 | Initialize your database environment:
 72 | ```bash
 73 | python chatbot.py --configure
 74 | ```
 75 | This command sets up necessary extensions and prepares your database structure.
 76 | 
 77 | ### 2. Knowledge Integration
 78 | Load your knowledge base:
 79 | ```bash
 80 | python chatbot.py --ingest
 81 | ```
 82 | This step processes and stores your knowledge dataset in the database.
 83 | 
 84 | ### 3. Interaction Modes
 85 | 
 86 | Choose from three ways to interact with your chatbot:
 87 | 
 88 | **Command Line Interface**
 89 | ```bash
 90 | python chatbot.py
 91 | ```
 92 | Perfect for quick testing and development.
 93 | 
 94 | **PostgreSQL Client**
 95 | ```sql
 96 | postgres=> SELECT generate_text('What was the AWS run rate in year 2022?')
 97 | ```
 98 | Ideal for direct database interaction and testing.
 99 | 
100 | **Web Interface**
101 | ```bash
102 | streamlit run chatbot-app.py --server.port 8080
103 | ```
104 | Provides a user-friendly interface for broader accessibility.
105 | 
106 | ### 4. Resource Management
107 | Clean up when finished:
108 | ```bash
109 | python chatbot.py --cleanup
110 | ```
111 | 
112 | ## 🔍 Understanding How It Works
113 | 
114 | Your chatbot processes queries through several sophisticated steps:
115 | 
116 | 1. When a user asks a question, the system converts it into a mathematical representation (embedding) that captures its meaning.
117 | 2. This embedding is used to search through your knowledge base for relevant information.
118 | 3. The matched information is then processed by foundation models to generate a natural, contextually appropriate response.
119 | 4. All of this happens within your database environment, minimizing data transfer and reducing response times.
120 | 
121 | ## 🛡️ Security Considerations
122 | 
123 | For production deployments, always:
124 | - Implement proper authentication and authorization
125 | - Secure your database connections
126 | - Follow AWS security best practices for data protection
127 | - Configure appropriate network access controls
128 | 
129 | ## 📚 Learning Resources
130 | 
131 | To deepen your understanding:
132 | - Explore [Aurora ML documentation](https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/aurora-ml.html)
133 | - Learn about [Amazon Bedrock capabilities](https://aws.amazon.com/bedrock/)
134 | - Understand [pgvector](https://github.com/pgvector/pgvector) for similarity searches
135 | 


--------------------------------------------------------------------------------
/05_AuroraML_Bedrock_Chatbot/chatbot-app.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st 
 2 | import chatbot as chatbot
 3 | from langchain.memory import ConversationBufferWindowMemory
 4 | 
 5 | st.set_page_config(page_title="AuroraML ChatBot")
 6 | st.title("AuroraML ChatBot") 
 7 | 
 8 | if 'memory' not in st.session_state:
 9 |     st.session_state.memory = ConversationBufferWindowMemory(memory_key="chat_history", return_messages=True) 
10 | 
11 | if 'chat_history' not in st.session_state:
12 |     st.session_state.chat_history = []
13 | 
14 | #Re-render the chat history (Streamlit re-runs this script, so need this to preserve previous chat messages)
15 | for message in st.session_state.chat_history:
16 |     with st.chat_message(message["role"]):
17 |         st.markdown(message["text"]) 
18 | 
19 | input_text = st.chat_input("Enter your question here")
20 | 
21 | if input_text:
22 |     
23 |     with st.chat_message("user"): 
24 |         st.markdown(input_text)
25 |     
26 |     st.session_state.chat_history.append({"role":"user", "text":input_text}) 
27 |     
28 |     chat_response = chatbot.ask_question(input_text)
29 |     
30 |     with st.chat_message("assistant"): 
31 |         st.markdown(chat_response)
32 |     
33 |     st.session_state.chat_history.append({"role":"assistant", "text":chat_response}) 
34 |     
35 | 
36 |         
37 | 


--------------------------------------------------------------------------------
/05_AuroraML_Bedrock_Chatbot/env.example:
--------------------------------------------------------------------------------
1 | POSTGRESQL_ENDPOINT="auroraml-bedrock-1.cluster-XXXXXX.us-east-1.rds.amazonaws.com"
2 | POSTGRESQL_PORT="5432"
3 | POSTGRESQL_USER="<username>"
4 | POSTGRESQL_PW="<password>"
5 | POSTGRESQL_DBNAME="<dbname>"
6 | REGION=<aws-region-id>
7 | SOURCE_S3_BUCKET="<knowledge-dataset-bucket-name>"


--------------------------------------------------------------------------------
/05_AuroraML_Bedrock_Chatbot/requirements.txt:
--------------------------------------------------------------------------------
1 | boto3
2 | jq
3 | langchain
4 | langchain-community
5 | psycopg2-binary
6 | pypdf
7 | streamlit
8 | unstructured


--------------------------------------------------------------------------------
/05_AuroraML_Bedrock_Chatbot/static/architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aurora-postgresql-pgvector/81e197513381392765f190dbf0b25b90b2137e09/05_AuroraML_Bedrock_Chatbot/static/architecture.png


--------------------------------------------------------------------------------
/05_AuroraML_Bedrock_Chatbot/static/postgres_cli.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aurora-postgresql-pgvector/81e197513381392765f190dbf0b25b90b2137e09/05_AuroraML_Bedrock_Chatbot/static/postgres_cli.png


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guidelines
 2 | 
 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
 4 | documentation, we greatly value feedback and contributions from our community.
 5 | 
 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
 7 | information to effectively respond to your bug report or contribution.
 8 | 
 9 | 
10 | ## Reporting Bugs/Feature Requests
11 | 
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 | 
14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 | 
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 | 
22 | 
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 | 
26 | 1. You are working against the latest source on the *main* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 | 
30 | To send us a pull request, please:
31 | 
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 | 
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 | 
42 | 
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start.
45 | 
46 | 
47 | ## Code of Conduct
48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
50 | opensource-codeofconduct@amazon.com with any additional questions or comments.
51 | 
52 | 
53 | ## Security issue notifications
54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
55 | 
56 | 
57 | ## Licensing
58 | 
59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.


--------------------------------------------------------------------------------
/DAT303/01_ProductRecommendations/images/arch_product_recommendation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aurora-postgresql-pgvector/81e197513381392765f190dbf0b25b90b2137e09/DAT303/01_ProductRecommendations/images/arch_product_recommendation.png


--------------------------------------------------------------------------------
/DAT303/02_QuestionAndAnswering/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 


--------------------------------------------------------------------------------
/DAT303/02_QuestionAndAnswering/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.


--------------------------------------------------------------------------------
/DAT303/02_QuestionAndAnswering/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guidelines
 2 | 
 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
 4 | documentation, we greatly value feedback and contributions from our community.
 5 | 
 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
 7 | information to effectively respond to your bug report or contribution.
 8 | 
 9 | 
10 | ## Reporting Bugs/Feature Requests
11 | 
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 | 
14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 | 
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 | 
22 | 
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 | 
26 | 1. You are working against the latest source on the *main* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 | 
30 | To send us a pull request, please:
31 | 
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 | 
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 | 
42 | 
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start.
45 | 
46 | 
47 | ## Code of Conduct
48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
50 | opensource-codeofconduct@amazon.com with any additional questions or comments.
51 | 
52 | 
53 | ## Security issue notifications
54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
55 | 
56 | 
57 | ## Licensing
58 | 
59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.


--------------------------------------------------------------------------------
/DAT303/02_QuestionAndAnswering/LICENSE:
--------------------------------------------------------------------------------
 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | // SPDX-License-Identifier: MIT-0
 3 | 
 4 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this
 7 | software and associated documentation files (the "Software"), to deal in the Software
 8 | without restriction, including without limitation the rights to use, copy, modify,
 9 | merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
10 | permit persons to whom the Software is furnished to do so.
11 | 
12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
13 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
14 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
15 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
16 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
17 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/DAT303/02_QuestionAndAnswering/README.md:
--------------------------------------------------------------------------------
 1 | # Generative AI Use Cases with Amazon Aurora PostgreSQL, pgvector and Amazon Bedrock
 2 | 
 3 | ## Introduction - Build and deploy an AI-powered chatbot application
 4 | 
 5 | In this lab, we provide a step-by-step guide with all the building blocks for creating an enterprise ready RAG application such as a question answering chatbot. We use a combination of different AWS services including [Amazon Bedrock](https://aws.amazon.com/bedrock/), an easy way to build and scale generative AI applications with foundation models. We use [Titan Text](https://aws.amazon.com/bedrock/titan/) for text embeddings and [Anthropic's Claude on Amazon Bedrock](https://aws.amazon.com/bedrock/claude/) as our LLM and the pgvector extension on Amazon Aurora PostgreSQL-Compatible Edition as our vector database. We also demonstrate integration with open-source frameworks such as LangChain for interfacing with all the components and Streamlit for building the chatbot frontend.
 6 | 
 7 | ## Architecture
 8 | 
 9 | ![Architecture](static/RAG_APG.png)
10 | 
11 | ## How It Works
12 | 
13 | The application follows these steps to provide responses to your questions:
14 | 
15 | 1. **PDF Loading**: The app reads PDF documents and extracts their text content.
16 | 
17 | 2. **Text Chunking**: The extracted text is divided into smaller chunks that can be processed effectively.
18 | 
19 | 3. **Embedding**: The application utilizes Titan Text from Amazon Bedrock to generate vector representations (embeddings) of the text chunks.
20 | 
21 | 4. **User Question**: The user asks a question in natural language. 
22 | 
23 | 5. **Similarity Matching**: When the user asks a question, the app compares it with the text chunks and identifies the most semantically similar ones.
24 | 
25 | 6. **RAG**: The user question and the context from the vector database is passed to the LLM (Anthropic's Claude on Amazon Bedrock).
26 | 
27 | 7. **Response Generation**: The LLM generates a response based on the relevant content of the PDFs.
28 | 
29 | ## Dependencies and Installation
30 | 
31 | To build the GenAI Q&A chatbot with pgvector and Amazon Aurora PostgreSQL, please follow these steps:
32 | 
33 | 1. Clone the repository to your local machine.
34 | 
35 | 2. Create a new [virtual environment](https://docs.python.org/3/library/venv.html#module-venv) and activate it.
36 | ```
37 | python3.9 -m venv env
38 | source env/bin/activate
39 | ```
40 | 
41 | 3. Create a `.env` file in your project directory similar to `env.example` to add your HuggingFace access tokens and Aurora PostgreSQL DB cluster details. Your .env file should like the following:
42 |    
43 | ```
44 | PGVECTOR_DRIVER='psycopg2'
45 | PGVECTOR_USER='<<Username>>'
46 | PGVECTOR_PASSWORD='<<Password>>'
47 | PGVECTOR_HOST='<<Aurora DB cluster host>>'
48 | PGVECTOR_PORT=5432
49 | PGVECTOR_DATABASE='<<DBName>>'
50 | ```
51 | 
52 | 4. Install the required dependencies by running the following command:
53 | ```
54 | pip install -r requirements.txt
55 | ```
56 | 
57 | ## Usage
58 | 
59 | To use the GenAI Q&A with pgvector and Amazon Aurora PostgreSQL App, follow these steps:
60 | 
61 | 1. Ensure that you have installed the required dependencies and have access to Amazon Bedrock models that you wish to use.
62 | 
63 | 2. Ensure that you have added Aurora PostgreSQL DB details to the `.env` file.
64 | 
65 | 3. Ensure you have installed the extension `pgvector` on your Aurora PostgreSQL DB cluster:
66 |    ```
67 |    CREATE EXTENSION vector;
68 |    ```
69 | 
70 | 4. Run the `app.py` file using the Streamlit CLI. Execute the following command:
71 |    ```
72 |    streamlit run app.py
73 |    ```
74 | 
75 | 5. The application will launch in your default web browser, displaying the user interface.
76 | 
77 | 6. Load multiple PDF documents into the app by following the provided instructions.
78 | 
79 | 7. Ask questions in natural language about the loaded PDFs using the search interface.
80 | 
81 | 
82 | ## Contributing
83 | 
84 | This repository is intended for educational purposes and does not accept further contributions. Feel free to utilize and enhance the app based on your own requirements.
85 | 
86 | ## License
87 | 
88 | The GenAI Q&A Chatbot with pgvector and Amazon Aurora PostgreSQL-compatible edition application is released under the [MIT-0 License](https://spdx.org/licenses/MIT-0.html).
89 | 


--------------------------------------------------------------------------------
/DAT303/02_QuestionAndAnswering/data/Amazon Aurora FAQs.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aurora-postgresql-pgvector/81e197513381392765f190dbf0b25b90b2137e09/DAT303/02_QuestionAndAnswering/data/Amazon Aurora FAQs.pdf


--------------------------------------------------------------------------------
/DAT303/02_QuestionAndAnswering/data/Amazon Bedrock FAQs.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aurora-postgresql-pgvector/81e197513381392765f190dbf0b25b90b2137e09/DAT303/02_QuestionAndAnswering/data/Amazon Bedrock FAQs.pdf


--------------------------------------------------------------------------------
/DAT303/02_QuestionAndAnswering/env.example:
--------------------------------------------------------------------------------
1 | PGVECTOR_DRIVER='psycopg2'
2 | PGVECTOR_USER=''
3 | PGVECTOR_PASSWORD=''
4 | PGVECTOR_HOST=''
5 | PGVECTOR_PORT=5432
6 | PGVECTOR_DATABASE=''


--------------------------------------------------------------------------------
/DAT303/02_QuestionAndAnswering/htmlTemplates.py:
--------------------------------------------------------------------------------
 1 | css = '''
 2 | <style>
 3 | .chat-message {
 4 |     padding: 1.5rem; border-radius: 0.5rem; margin-bottom: 1rem; display: flex
 5 | }
 6 | .chat-message.user {
 7 |     background-color: #2b313e
 8 | }
 9 | .chat-message.bot {
10 |     background-color: #475063
11 | }
12 | .chat-message .avatar {
13 |   width: 20%;
14 | }
15 | .chat-message .avatar img {
16 |   max-width: 78px;
17 |   max-height: 78px;
18 |   border-radius: 50%;
19 |   object-fit: cover;
20 | }
21 | .chat-message .message {
22 |   width: 80%;
23 |   padding: 0 1.5rem;
24 |   color: #fff;
25 | }
26 | '''
27 | 
28 | bot_template = '''
29 | <div class="chat-message bot">
30 |     <div class="avatar">
31 |         <img src="https://i.ibb.co/cN0nmSj/Screenshot-2023-05-28-at-02-37-21.png" style="max-height: 78px; max-width: 78px; border-radius: 50%; object-fit: cover;">
32 |     </div>
33 |     <div class="message">{{MSG}}</div>
34 | </div>
35 | '''
36 | 
37 | user_template = '''
38 | <div class="chat-message user">
39 |     <div class="avatar">
40 |         <img src="https://i.ibb.co/wRtZstJ/Aurora.png">
41 |     </div>    
42 |     <div class="message">{{MSG}}</div>
43 | </div>
44 | '''
45 | 


--------------------------------------------------------------------------------
/DAT303/02_QuestionAndAnswering/requirements.txt:
--------------------------------------------------------------------------------
 1 | streamlit==1.37.0
 2 | langchain
 3 | PyPDF2
 4 | python-dotenv
 5 | psycopg2-binary
 6 | altair
 7 | pgvector
 8 | urllib3==1.26.19
 9 | boto3>=1.28.57
10 | awscli>=1.29.57
11 | botocore>=1.31.57
12 | anthropic
13 | 


--------------------------------------------------------------------------------
/DAT303/02_QuestionAndAnswering/static/Powered-By_logo-stack_RGB_REV.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aurora-postgresql-pgvector/81e197513381392765f190dbf0b25b90b2137e09/DAT303/02_QuestionAndAnswering/static/Powered-By_logo-stack_RGB_REV.png


--------------------------------------------------------------------------------
/DAT303/02_QuestionAndAnswering/static/RAG_APG.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aurora-postgresql-pgvector/81e197513381392765f190dbf0b25b90b2137e09/DAT303/02_QuestionAndAnswering/static/RAG_APG.png


--------------------------------------------------------------------------------
/DAT303/03_ResponseStreaming/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.


--------------------------------------------------------------------------------
/DAT303/03_ResponseStreaming/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guidelines
 2 | 
 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
 4 | documentation, we greatly value feedback and contributions from our community.
 5 | 
 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
 7 | information to effectively respond to your bug report or contribution.
 8 | 
 9 | 
10 | ## Reporting Bugs/Feature Requests
11 | 
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 | 
14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 | 
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 | 
22 | 
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 | 
26 | 1. You are working against the latest source on the *main* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 | 
30 | To send us a pull request, please:
31 | 
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 | 
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 | 
42 | 
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start.
45 | 
46 | 
47 | ## Code of Conduct
48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
50 | opensource-codeofconduct@amazon.com with any additional questions or comments.
51 | 
52 | 
53 | ## Security issue notifications
54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
55 | 
56 | 
57 | ## Licensing
58 | 
59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.


--------------------------------------------------------------------------------
/DAT303/03_ResponseStreaming/LICENSE:
--------------------------------------------------------------------------------
 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | // SPDX-License-Identifier: MIT-0
 3 | 
 4 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this
 7 | software and associated documentation files (the "Software"), to deal in the Software
 8 | without restriction, including without limitation the rights to use, copy, modify,
 9 | merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
10 | permit persons to whom the Software is furnished to do so.
11 | 
12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
13 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
14 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
15 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
16 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
17 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/DAT303/03_ResponseStreaming/README.md:
--------------------------------------------------------------------------------
 1 | # Generative AI Use Cases with Amazon Aurora PostgreSQL, pgvector and Amazon Bedrock
 2 | 
 3 | ## Introduction - Build and deploy an AI-powered chatbot application
 4 | 
 5 | The GenAI Q&A Chatbot with pgvector and Amazon Aurora PostgreSQL-compatible edition application is a Python application that allows you to interact with multiple PDF documents. You can ask questions about the PDFs using natural language, and the application will provide relevant responses based on the content of the documents. This app utilizes a language model to generate accurate answers to your queries. Please note that the app will only respond to questions related to the loaded PDFs.
 6 | 
 7 | ## Conceptual Flow Diagram
 8 | 
 9 | ![Architecture](static/APG-pgvector-streamlit.png)
10 | 
11 | ## How It Works
12 | 
13 | The application follows these steps to provide responses to your questions:
14 | 
15 | 1. PDF Loading: The app reads multiple PDF documents and extracts their text content.
16 | 
17 | 2. Text Chunking: The extracted text is divided into smaller chunks that can be processed effectively.
18 | 
19 | 3. Language Model: The application utilizes a language model to generate vector representations (embeddings) of the text chunks.
20 | 
21 | 4. Similarity Matching: When you ask a question, the app compares it with the text chunks and identifies the most semantically similar ones.
22 | 
23 | 5. Response Generation: The selected chunks are passed to the language model, which generates a response based on the relevant content of the PDFs.
24 | 
25 | ## Dependencies and Installation
26 | 
27 | To build the GenAI Q&A chatbot with pgvector and Amazon Aurora PostgreSQL, please follow these steps:
28 | 
29 | 1. Clone the repository to your local machine.
30 | 
31 | 2. Create a new [virtual environment](https://docs.python.org/3/library/venv.html#module-venv) and activate it.
32 | ```
33 | python3.9 -m venv env
34 | source env/bin/activate
35 | ```
36 | 
37 | 3. Create a `.env` file in your project directory similar to `env.example` to add your HuggingFace access tokens and Aurora PostgreSQL DB cluster details. If you don't have one, create a new access token on HuggingFace's website - [HuggingFace](https://huggingface.co/settings/tokens). Your .env file should like the following:
38 | ```
39 | HUGGINGFACEHUB_API_TOKEN=<<access_token>>
40 | 
41 | PGVECTOR_DRIVER='psycopg2'
42 | PGVECTOR_USER='<<Username>>'
43 | PGVECTOR_PASSWORD='<<Password>>'
44 | PGVECTOR_HOST='<<Aurora DB cluster host>>'
45 | PGVECTOR_PORT=5432
46 | PGVECTOR_DATABASE='<<DBName>>'
47 | ```
48 | 
49 | 4. Install the required dependencies by running the following command:
50 | ```
51 | pip install -r requirements.txt
52 | ```
53 | 
54 | ## Usage
55 | 
56 | To use the GenAI Q&A with pgvector and Amazon Aurora PostgreSQL App, follow these steps:
57 | 
58 | 1. Ensure that you have installed the required dependencies and added the HuggingFace API access tokens and Aurora PostgreSQL DB details to the `.env` file.
59 | 
60 | 2. Ensure you have installed the extension `pgvector` on your Aurora PostgreSQL DB cluster:
61 |    ```
62 |    CREATE EXTENSION vector;
63 |    ```
64 | 
65 | 3. Run the `app.py` file using the Streamlit CLI. Execute the following command:
66 |    ```
67 |    streamlit run app.py
68 |    ```
69 | 
70 | 4. The application will launch in your default web browser, displaying the user interface.
71 | 
72 | 5. Load multiple PDF documents into the app by following the provided instructions.
73 | 
74 | 6. Ask questions in natural language about the loaded PDFs using the search interface.
75 | 
76 | ## I am encountering an error about token dimension mismatch (1536 vs 768)
77 | 
78 | Follow the recommendations from this [GitHub Issue thread](https://github.com/hwchase17/langchain/issues/2219).
79 | 
80 | ## Contributing
81 | 
82 | This repository is intended for educational purposes and does not accept further contributions. Feel free to utilize and enhance the app based on your own requirements.
83 | 
84 | ## License
85 | 
86 | The GenAI Q&A Chatbot with pgvector and Amazon Aurora PostgreSQL-compatible edition application is released under the [MIT-0 License](https://spdx.org/licenses/MIT-0.html).
87 | 


--------------------------------------------------------------------------------
/DAT303/03_ResponseStreaming/data/AMZN-2019-Shareholder-Letter.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aurora-postgresql-pgvector/81e197513381392765f190dbf0b25b90b2137e09/DAT303/03_ResponseStreaming/data/AMZN-2019-Shareholder-Letter.pdf


--------------------------------------------------------------------------------
/DAT303/03_ResponseStreaming/data/AMZN-2020-Shareholder-Letter.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aurora-postgresql-pgvector/81e197513381392765f190dbf0b25b90b2137e09/DAT303/03_ResponseStreaming/data/AMZN-2020-Shareholder-Letter.pdf


--------------------------------------------------------------------------------
/DAT303/03_ResponseStreaming/data/AMZN-2021-Shareholder-Letter.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aurora-postgresql-pgvector/81e197513381392765f190dbf0b25b90b2137e09/DAT303/03_ResponseStreaming/data/AMZN-2021-Shareholder-Letter.pdf


--------------------------------------------------------------------------------
/DAT303/03_ResponseStreaming/data/AMZN-2022-Shareholder-Letter.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aurora-postgresql-pgvector/81e197513381392765f190dbf0b25b90b2137e09/DAT303/03_ResponseStreaming/data/AMZN-2022-Shareholder-Letter.pdf


--------------------------------------------------------------------------------
/DAT303/03_ResponseStreaming/htmlTemplates.py:
--------------------------------------------------------------------------------
 1 | css = '''
 2 | <style>
 3 | .chat-message {
 4 |     padding: 1.5rem; border-radius: 0.5rem; margin-bottom: 1rem; display: flex
 5 | }
 6 | .chat-message.user {
 7 |     background-color: #2b313e
 8 | }
 9 | .chat-message.bot {
10 |     background-color: #475063
11 | }
12 | .chat-message .avatar {
13 |   width: 20%;
14 | }
15 | .chat-message .avatar img {
16 |   max-width: 78px;
17 |   max-height: 78px;
18 |   border-radius: 50%;
19 |   object-fit: cover;
20 | }
21 | .chat-message .message {
22 |   width: 80%;
23 |   padding: 0 1.5rem;
24 |   color: #fff;
25 | }
26 | '''
27 | 
28 | bot_template = '''
29 | <div class="chat-message bot">
30 |     <div class="avatar">
31 |         <img src="https://i.ibb.co/cN0nmSj/Screenshot-2023-05-28-at-02-37-21.png" style="max-height: 78px; max-width: 78px; border-radius: 50%; object-fit: cover;">
32 |     </div>
33 |     <div class="message">{{MSG}}</div>
34 | </div>
35 | '''
36 | 
37 | user_template = '''
38 | <div class="chat-message user">
39 |     <div class="avatar">
40 |         <img src="https://i.ibb.co/wRtZstJ/Aurora.png">
41 |     </div>    
42 |     <div class="message">{{MSG}}</div>
43 | </div>
44 | '''
45 | 


--------------------------------------------------------------------------------
/DAT303/03_ResponseStreaming/requirements.txt:
--------------------------------------------------------------------------------
 1 | streamlit==1.24.0
 2 | langchain
 3 | langchain-community
 4 | PyPDF2
 5 | python-dotenv
 6 | psycopg2-binary
 7 | altair
 8 | pgvector
 9 | urllib3==1.26.19
10 | boto3>=1.28.57
11 | awscli>=1.29.57
12 | botocore>=1.31.57
13 | anthropic
14 | 


--------------------------------------------------------------------------------
/DAT303/03_ResponseStreaming/static/Powered-By_logo-stack_RGB_REV.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aurora-postgresql-pgvector/81e197513381392765f190dbf0b25b90b2137e09/DAT303/03_ResponseStreaming/static/Powered-By_logo-stack_RGB_REV.png


--------------------------------------------------------------------------------
/DAT303/03_ResponseStreaming/static/Streaming_RAG_APG.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aurora-postgresql-pgvector/81e197513381392765f190dbf0b25b90b2137e09/DAT303/03_ResponseStreaming/static/Streaming_RAG_APG.png


--------------------------------------------------------------------------------
/DAT303/README.md:
--------------------------------------------------------------------------------
 1 | This hands-on workshop, aimed at developers and solution builders, introduces how to leverage foundation models (FMs) through Amazon Bedrock while using Amazon Aurora PostgreSQL-Compatible Edition with the pgvector extension as the vector database.
 2 | 
 3 | Amazon Bedrock is a fully managed service that provides access to FMs from third-party providers and Amazon; available via an API. With Bedrock, you can choose from a variety of models to find the one that’s best suited for your use case. Amazon Aurora is a fully managed relational database engine that's compatible with MySQL and PostgreSQL. 
 4 | 
 5 | Within this series of labs, you'll explore some of the most common usage patterns we are seeing with our customers for Generative AI. We will show techniques for similarity search and natural language text and processing, creating value for organizations by improving productivity. This is achieved by leveraging foundation models to help in summarizing text, answering questions, building chatbots, and product recommendations. You will gain hands-on experience implementing these patterns via Bedrock APIs, as well as open-source software like LangChain and pgvector.
 6 | 
 7 | Labs include:
 8 | 
 9 | - Product Recommendations
10 | - Question Answering with Retrieval Augmented Generation (RAG)
11 | - Response Streaming with RAG
12 | 


--------------------------------------------------------------------------------
/DAT307/data/dump.sql.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aurora-postgresql-pgvector/81e197513381392765f190dbf0b25b90b2137e09/DAT307/data/dump.sql.gz


--------------------------------------------------------------------------------
/DAT307/knowledge-base/documents/Amazon-Aurora-DB-engines-for-Blue_Green-Deployments.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aurora-postgresql-pgvector/81e197513381392765f190dbf0b25b90b2137e09/DAT307/knowledge-base/documents/Amazon-Aurora-DB-engines-for-Blue_Green-Deployments.pdf


--------------------------------------------------------------------------------
/DAT307/knowledge-base/documents/Amazon-RDS-Blue_Green-Deployments.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aurora-postgresql-pgvector/81e197513381392765f190dbf0b25b90b2137e09/DAT307/knowledge-base/documents/Amazon-RDS-Blue_Green-Deployments.pdf


--------------------------------------------------------------------------------
/DAT307/knowledge-base/documents/aurora-postgresql-relnotes.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aurora-postgresql-pgvector/81e197513381392765f190dbf0b25b90b2137e09/DAT307/knowledge-base/documents/aurora-postgresql-relnotes.pdf


--------------------------------------------------------------------------------
/DAT307/knowledge-base/documents/rds-postgresql-relnotes.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aurora-postgresql-pgvector/81e197513381392765f190dbf0b25b90b2137e09/DAT307/knowledge-base/documents/rds-postgresql-relnotes.pdf


--------------------------------------------------------------------------------
/DAT307/knowledge-base/runbooks/exercise/rds_acu_capacity_remediation_steps.md:
--------------------------------------------------------------------------------
 1 | # Title
 2 | Runbook to remediate Aurora Serverless capacity reaching limit
 3 | 
 4 | ## Issue
 5 | PostgreSQL database instance is running at max ACU utilization
 6 | 
 7 | ## Description
 8 | This run book provides the step by step instructions to address the max ACU utilization on Aurora Serverless V2 instance.
 9 | Follow the instructions in this run book to remediate the issues related to the Serverless capacity.
10 | 
11 | ## Steps
12 | 
13 | 1. Check if the instance is in available state. If the status is available, continue otherwise abort the process.
14 | 
15 | 2. Get the max ACU allocation for the Aurora instance.
16 | 
17 | 3. Get the current ACU utilization metrics for the last 1 hour for the Aurora instance.
18 | 
19 | 4. Check if the maximum ACU utilization from the ACU metrics is above 80% , then increase the max ACU by 20%.
20 | 


--------------------------------------------------------------------------------
/DAT307/knowledge-base/runbooks/rds_cpu_remediation_steps.md:
--------------------------------------------------------------------------------
 1 | # Title
 2 | Runbook to remediate RDS CPU Utilization alert
 3 | 
 4 | ## Issue
 5 | PostgreSQL database instance is running out of high CPU utilization.
 6 | 
 7 | ## Description
 8 | This run book provides the step by step instructions to address the high CPU Utilization in the RDS instance.
 9 | Follow the instructions in this run book to remediate the high CPU utilization incident.
10 | 
11 | ## Steps
12 | 
13 | 1. Check if the RDS instance is in available state. If the status is available, continue otherwise abort the process.
14 | 
15 | 2. Get the current CPU utilization metrics for the last 1 hour for the RDS instance. . 
16 | 
17 | 3. Check if the maximum CPU utilization from the CPU metrics is above 80% , then scale up the RDS instance to the next availabe instance type.
18 | 


--------------------------------------------------------------------------------
/DAT307/knowledge-base/runbooks/rds_iops_remediation_steps.md:
--------------------------------------------------------------------------------
 1 | # Title
 2 | Runbook to remediate RDS IOPS reaching limit
 3 | 
 4 | ## Issue
 5 | PostgreSQL database instance is running out of IOPS provisioned.
 6 | 
 7 | ## Description
 8 | This run book provides the step by step instructions to increase the RDS IOPS if it reaches the provisioned limit.
 9 | Follow the instructions in this run book to remediate the issues related to the iops limit.
10 | 
11 | ## Steps
12 | 
13 | 1. Check if the RDS instance is in available state. If the status is available, continue otherwise abort the process.
14 | 
15 | 2. Get the maximum IOPS metrics usage for the last 1 hour for the RDS instance. 
16 | 
17 | 3. Get the provisioned iops of the RDS instance. 
18 | 
19 | 4. Check if the maximum IOPS utilization from the IOPS metrics is 80% of the provisioned IOPS then increase it by 20%.
20 | 


--------------------------------------------------------------------------------
/DAT307/lambda/api-action-runbook-kb.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import boto3
  3 | import json
  4 | import time
  5 | import zipfile
  6 | from io import BytesIO
  7 | import uuid
  8 | import pprint
  9 | import logging
 10 | import os
 11 | from datetime import datetime
 12 | 
 13 | boto3_session = boto3.session.Session()
 14 | region = boto3_session.region_name
 15 | 
 16 | agent_id = os.environ.get('AGENTID')
 17 | agent_alias_id = 'TSTALIASID'
 18 | 
 19 | logger = logging.getLogger()
 20 | logger.setLevel("INFO")
 21 | 
 22 | 
 23 | def simple_agent_invoke(input_text):
 24 |     bedrock_agent_client = boto3.client('bedrock-agent')
 25 |     bedrock_agent_runtime_client = boto3.client('bedrock-agent-runtime')
 26 | 
 27 |     session_id:str = str(uuid.uuid1())
 28 |     agentResponse = bedrock_agent_runtime_client.invoke_agent(
 29 |         inputText=input_text,
 30 |         agentId=agent_id,
 31 |         agentAliasId=agent_alias_id, 
 32 |         sessionId=session_id,
 33 |         enableTrace=True, 
 34 |         endSession= False
 35 |     )
 36 |     logger.info(pprint.pprint(agentResponse))
 37 |     
 38 |     event_stream = agentResponse['completion']
 39 |     try:
 40 |         output = []
 41 |         for event in event_stream:        
 42 |             if 'chunk' in event:
 43 |                 data = event['chunk']['bytes']
 44 |                 logger.info(f"Final answer ->\n{data.decode('utf8')}")
 45 |                 agent_answer = data.decode('utf8')
 46 |                 end_event_received = True
 47 |                 # End event indicates that the request finished successfully
 48 |             elif 'trace' in event:
 49 |                 a = event['trace']
 50 |                 log_data = True
 51 |                 try:
 52 |                     dummy = a['trace']['orchestrationTrace']['rationale']
 53 |                 except KeyError:
 54 |                     try:
 55 |                         dummy = a['trace']['orchestrationTrace']['observation']
 56 |                     except KeyError:
 57 |                         try:
 58 |                             dummy = a['trace']['orchestrationTrace']['invocationInput']
 59 |                         except KeyError:
 60 |                             log_data = False
 61 |                             
 62 |                 if log_data:
 63 |                     logger.info(json.dumps(a['trace'], indent=2))
 64 |                     output.append(a['trace'])
 65 |                     logger.info("\n=====================================================================================\n")
 66 |             else:
 67 |                 raise Exception("unexpected event.", event)
 68 |         return output 
 69 |     except Exception as e:
 70 |         raise Exception("unexpected event.", e)
 71 | 
 72 | def update_dynamodb(id, username, result):
 73 | 
 74 |     dynamodb = boto3.client('dynamodb')
 75 |     tableName = os.environ.get('CWALERTTABLE', 'cwalerttable_v2')
 76 |     key = {"pk": {'S': id}, 'sk': {'S': 'I' } }
 77 |     response = dynamodb.update_item(
 78 |         TableName = tableName,
 79 |         Key=key, 
 80 |         UpdateExpression = "set incidentActionTrace = :incidentActionTrace, lastUpdate = :lastUpdate, lastUpdateBy = :lastUpdateBy, incidentStatus = :incidentStatus", 
 81 |         ExpressionAttributeValues={
 82 |             ":incidentActionTrace": {'S': json.dumps(result) },
 83 |             ":incidentStatus": {'S': 'completed'},
 84 |             ":lastUpdate": {'S' : datetime.now().strftime("%Y-%m-%d %H:%M:%S") }, 
 85 |             ":lastUpdateBy": {'S': username }
 86 |             }, 
 87 |         ReturnValues="UPDATED_NEW",
 88 |         )
 89 | 
 90 | def lambda_handler(event, context):
 91 |     logger.info(event)
 92 |     try:
 93 |         action = json.loads(event['body'])['action']
 94 |         id = json.loads(event['body'])['id']
 95 |     except KeyError:
 96 |         return { 'statusCode': 500, 'body': json.dumps('No description found in the alarm') }
 97 |     logger.info(f"Calling the function to execute the query : {action}")    
 98 |     username = event.get('requestContext',{}).get('authorizer',{}).get('claims', {}).get('email')
 99 |     output = simple_agent_invoke(action)
100 |     result = {"result":output}
101 |     update_dynamodb(id, username, result)
102 | 
103 |     return {
104 |         'statusCode': '200',
105 |         'body': json.dumps(result),
106 |         'headers': {
107 |             'Content-Type': 'application/json',
108 |         }
109 |     }   
110 |     
111 | 
112 | 
113 | 


--------------------------------------------------------------------------------
/DAT307/lambda/api-get-incidents.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import boto3
 3 | from boto3.dynamodb.conditions import Attr
 4 | import os
 5 | 
 6 | def lambda_handler(event, context):
 7 |     print (event)
 8 |     incidentStatus = None
 9 |     try:
10 |         incidentStatus = event['queryStringParameters']['incidentStatus']
11 |     except KeyError:
12 |         return { 'statusCode': 500, 'body': json.dumps('No status found in the alarm') }
13 |         
14 |     # Initialize the DynamoDB client
15 |     dynamodb = boto3.resource('dynamodb')
16 |     
17 |     tableName = os.getenv('CWALERTTABLE')
18 |     table  = dynamodb.Table(tableName)
19 |     
20 |     print(f"Getting the DynamoDB table {tableName}  content for incidentStatus {incidentStatus}")
21 | 
22 |     # Getting the incidents for the sort key ("I")
23 | 
24 |     if incidentStatus == "all":
25 |         response = table.scan(
26 |             FilterExpression=Attr('sk').eq('I')
27 |         )
28 |     else:
29 |         response = table.scan(
30 |             FilterExpression=Attr('incidentStatus').eq(incidentStatus) & Attr('sk').eq('I')
31 |         )
32 |     print (response)
33 |     return {
34 |         'statusCode': '200',
35 |         'body': json.dumps(response),
36 |         'headers': {
37 |             'Content-Type': 'application/json',
38 |         }
39 |     }
40 | 


--------------------------------------------------------------------------------
/DAT307/lambda/api-list-runbook-kb.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import boto3
  3 | import time
  4 | import logging
  5 | from botocore.exceptions import ClientError
  6 | from botocore.client import Config
  7 | import os
  8 | from datetime import datetime
  9 | 
 10 | # Define FM to be used for generations 
 11 | kb_id = os.getenv('KBID')
 12 | region_name = os.getenv('AWS_DEFAULT_REGION')
 13 | sts_client = boto3.client('sts')
 14 | boto3_session = boto3.session.Session()
 15 | model_id = "anthropic.claude-3-haiku-20240307-v1:0" # we will be using Anthropic Claude 3 Haiku throughout the notebook
 16 | model_arn = f'arn:aws:bedrock:{region_name}::foundation-model/{model_id}'
 17 | 
 18 | bedrock_config = Config(connect_timeout=120, read_timeout=120, retries={'max_attempts': 0}, region_name=region_name)
 19 | bedrock_agent_client = boto3_session.client("bedrock-agent-runtime", config=bedrock_config)
 20 | 
 21 | 
 22 | # Stating the default knowledge base prompt
 23 | 
 24 | default_prompt = """
 25 | You are a PostreSQL Database Administrator and your primary job is to analyze the incident reported and take any remedial actions.
 26 | You will receive the alerts or incidents as a tasks and you will carry the root cause analysis.
 27 | As a first step in the analysis, you will search the knowledge base for any available runbook for instructions.
 28 | The knowledge base provides the runbook which contains the step by step instructions to remedie the alert generated.
 29 | If you don't find the proper instructions from the knowledge base, end the conversation by saying "I couldn't find the runbook" and end the response.
 30 | You retrieve the instructions from the knowledge base and execute the steps in the knowlege base one at a time using the proper functions/tooling.
 31 | Strictly follow the instructions in the runbook and don't execute any other steps on your own. 
 32 | Stictly use only the functions required to carry out the operations in the proper order provided by the knowledge base instructions.
 33 | Finally summarize the actions you have taken.
 34 |                             
 35 | Here are the search results in numbered order:
 36 | $search_results$
 37 | 
 38 | $output_format_instructions$
 39 | """
 40 | 
 41 | def retrieve_and_generate(query, max_results, prompt_template=default_prompt):
 42 |     response = bedrock_agent_client.retrieve_and_generate(
 43 |             input={
 44 |                 'text': query
 45 |             },
 46 |         retrieveAndGenerateConfiguration={
 47 |         'type': 'KNOWLEDGE_BASE',
 48 |         'knowledgeBaseConfiguration': {
 49 |             'knowledgeBaseId': kb_id,
 50 |             'modelArn': model_arn, 
 51 |             'retrievalConfiguration': {
 52 |                 'vectorSearchConfiguration': {
 53 |                     'numberOfResults': max_results # will fetch top N documents which closely match the query
 54 |                     }
 55 |                 },
 56 |                 'generationConfiguration': {
 57 |                         'promptTemplate': {
 58 |                             'textPromptTemplate': prompt_template
 59 |                         }
 60 |                     }
 61 |             }
 62 |         }
 63 |     )
 64 |     return response
 65 | 
 66 | def update_dynamodb(id, username, output):
 67 | 
 68 |     dynamodb = boto3.client('dynamodb')
 69 |     tableName = os.environ.get('CWALERTTABLE', 'cwalerttable_v2')
 70 |     key = {"pk": {'S': id}, 'sk': {'S': 'I' } }
 71 |     response = dynamodb.update_item(
 72 |         TableName = tableName,
 73 |         Key=key, 
 74 |         UpdateExpression = "set incidentRunbook = :incidentRunbook, lastUpdate = :lastUpdate, lastUpdateBy = :lastUpdateBy", 
 75 |         ExpressionAttributeValues={
 76 |             ":incidentRunbook": {'S': json.dumps(output) },
 77 |             ":lastUpdate": {'S' : datetime.now().strftime("%Y-%m-%d %H:%M:%S") }, 
 78 |             ":lastUpdateBy": {'S': username }
 79 |             }, 
 80 |         ReturnValues="UPDATED_NEW",
 81 |         )
 82 | 
 83 | 
 84 | def lambda_handler(event, context):
 85 |     print(event)
 86 |     try:
 87 |         query = event['queryStringParameters']['query']
 88 |         id = event['queryStringParameters']['id']
 89 |     except KeyError:
 90 |         return { 'statusCode': 500, 'body': json.dumps('No description found in the alarm') }
 91 |    
 92 |     username = event.get('requestContext',{}).get('authorizer',{}).get('claims', {}).get('email')
 93 | 
 94 |     print(f"Calling the function to execute the query : {query}")    
 95 |     response = retrieve_and_generate(query = query, max_results = 1)
 96 |     generated_text = response['output']['text']
 97 |     output = {"runbook": generated_text}
 98 |     
 99 |     update_dynamodb(id, username, output)
100 |     print('Generated FM response:\n')
101 |     print(generated_text)
102 |     return {
103 |         'statusCode': '200',
104 |         'body': json.dumps(output),
105 |         'headers': {
106 |             'Content-Type': 'application/json',
107 |         }
108 |     }
109 | 
110 | 
111 | 


--------------------------------------------------------------------------------
/DAT307/lambda/cw-ingest-to-dynamodb.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import logging
 3 | import os
 4 | import boto3
 5 | import uuid
 6 | import datetime
 7 | 
 8 | def lambda_handler(event, context):
 9 |     print (event)
10 | 
11 |     dynamodb = boto3.client('dynamodb')
12 |     tableName = os.getenv('CWALERTTABLE')
13 |     
14 |     incidentType = None
15 |     incidentIdentifier = None
16 |     
17 |     try:
18 |         incidentType = event.get('alarmData').get('configuration').get('metrics')[0]['label']
19 |     except:
20 |         incidentType = event.get('alarmData').get('configuration').get('metrics')[0].get('metricStat').get('metric').get('name')
21 |     
22 |     for metric in event.get('alarmData').get('configuration').get('metrics'):
23 |         try:
24 |             incidentIdentifier = metric['metricStat']['metric']['dimensions']['DBInstanceIdentifier']
25 |         except:
26 |             try:
27 |                 incidentIdentifier = metric['metricStat']['metric']['dimensions']['DBClusterIdentifier']
28 |             except:
29 |                 incidentIdentifier = "N/A"
30 |     item = {
31 |             'pk': {'S': uuid.uuid4().hex }, 
32 |             'sk': {'S': 'I'},
33 |             'incidentStatus': {'S': 'pending'},
34 |             'incidentData': {'S': json.dumps(event.get('alarmData'))},
35 |             'incidentRunbook': {'S': 'None'},
36 |             'incidentActionTrace': {'S': 'None'},
37 |             'incidentTime': {'S': event.get('time')},
38 |             'lastUpdate': {'S': datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")},
39 |             'lastUpdateBy': {'S': event.get('source', 'aws.cloudwatch')},
40 |             'incidentType' : {'S': incidentType},
41 |             'incidentIdentifier': {'S': incidentIdentifier}
42 |         }
43 |         
44 |     response = dynamodb.put_item(TableName=tableName, Item=item)
45 | 
46 |     return {
47 |         'statusCode': 200,
48 |         'body': json.dumps(response)
49 |     }
50 | 


--------------------------------------------------------------------------------
/DAT307/notebooks/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aurora-postgresql-pgvector/81e197513381392765f190dbf0b25b90b2137e09/DAT307/notebooks/.DS_Store


--------------------------------------------------------------------------------
/DAT307/notebooks/images/gen_embeddings.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aurora-postgresql-pgvector/81e197513381392765f190dbf0b25b90b2137e09/DAT307/notebooks/images/gen_embeddings.png


--------------------------------------------------------------------------------
/DAT307/script/agent_action_group_for_acu.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "functions": [
  3 |     {
  4 |       "name": "check_rds_state",
  5 |       "description": "Function checks the current RDS state using the db_instance_identifier parameter",
  6 |       "parameters": {
  7 |         "db_instance_identifier": {
  8 |           "description": "Instance identifier to check the instance state",
  9 |           "required": true,
 10 |           "type": "string"
 11 |         }
 12 |       },
 13 |       "requireConfirmation": "DISABLED"
 14 |     },
 15 |     {
 16 |       "name": "run_query",
 17 |       "description": "Function to run the SQL query passed as a parameter on the given database instance identifier\n",
 18 |       "parameters": {
 19 |         "db_instance_identifier": {
 20 |           "description": "Instance to run the query",
 21 |           "required": true,
 22 |           "type": "string"
 23 |         },
 24 |         "query": {
 25 |           "description": "SQL Query to be run",
 26 |           "required": true,
 27 |           "type": "string"
 28 |         }
 29 |       },
 30 |       "requireConfirmation": "DISABLED"
 31 |     },
 32 |     {
 33 |       "name": "get_cpu_metrics",
 34 |       "description": "Function to get the CPUUtilization metric on the given instance identifier in the last amout of time specified",
 35 |       "parameters": {
 36 |         "db_instance_identifier": {
 37 |           "description": "Instance identifier to get the CPU usage",
 38 |           "required": true,
 39 |           "type": "string"
 40 |         },
 41 |         "metric_time": {
 42 |           "description": "How many hours behind to get the CPU metrics",
 43 |           "required": true,
 44 |           "type": "string"
 45 |         }
 46 |       },
 47 |       "requireConfirmation": "DISABLED"
 48 |     },
 49 |     {
 50 |       "name": "get_iops_metrics",
 51 |       "description": "Function to get the IOPS metrics on the given instance identifier.\nIt takes db_instance_identifier and metric_time as a parameter.\nThe metric time is specified in hours for which the IOPS metris needs to be calculated\n",
 52 |       "parameters": {
 53 |         "db_instance_identifier": {
 54 |           "description": "Instance identifier to get the IOPS usage",
 55 |           "required": true,
 56 |           "type": "string"
 57 |         },
 58 |         "metric_time": {
 59 |           "description": "How many hours behind to get the IOPS metrics",
 60 |           "required": true,
 61 |           "type": "string"
 62 |         }
 63 |       },
 64 |       "requireConfirmation": "DISABLED"
 65 |     },
 66 |     {
 67 |       "name": "scale_up_instance",
 68 |       "description": "Function to scale up the instance to the next available instance class to fix the alert CPUUtilization.\n",
 69 |       "parameters": {
 70 |         "db_instance_identifier": {
 71 |           "description": "Instance identifier where the scale up needs to happen",
 72 |           "required": true,
 73 |           "type": "string"
 74 |         }
 75 |       },
 76 |       "requireConfirmation": "DISABLED"
 77 |     },
 78 |     {
 79 |       "name": "increase_iops",
 80 |       "description": "Function to increase the IOPS provisioned on the given instance identifier",
 81 |       "parameters": {
 82 |         "db_instance_identifier": {
 83 |           "description": "Instance identifier where the IOPS needs to be increased",
 84 |           "required": true,
 85 |           "type": "string"
 86 |         },
 87 |         "percent_increase": {
 88 |           "description": "Percentage of IOPS needs to be increased in the given instance identifier",
 89 |           "required": true,
 90 |           "type": "string"
 91 |         }
 92 |       },
 93 |       "requireConfirmation": "DISABLED"
 94 |     },
 95 |     {
 96 |       "name": "get_provisioned_iops",
 97 |       "description": "Functions gets the provisioned IOPS of the given RDS instance.",
 98 |       "parameters": {
 99 |         "db_instance_identifier": {
100 |           "description": "Instance identifier to get the provisioned iops",
101 |           "required": true,
102 |           "type": "string"
103 |         }
104 |       },
105 |       "requireConfirmation": "DISABLED"
106 |     },
107 |     {
108 |       "name": "get_max_acu",
109 |       "description": "Function to get the max ACU allocation of the Aurora instance",
110 |       "parameters": {
111 |         "db_instance_identifier": {
112 |           "description": "Function to get the max ACU allocation of the Aurora instance",
113 |           "required": false,
114 |           "type": "string"
115 |         }
116 |       },
117 |       "requireConfirmation": "DISABLED"
118 |     },
119 |     {
120 |       "name": "get_acu_metrics",
121 |       "description": "Function to get the ACU metrics on the given Aurora instance identifier. It takes db_instance_identifier and metric_time as a parameter. The metric time is specified in hours for which the ACU metris needs to be calculated",
122 |       "parameters": {
123 | 	"db_instance_identifier": {
124 |             "description": "Instance identifier to get the ACU metrics",
125 |             "required": true,
126 |             "type": "string"
127 |           },
128 | 	"metric_time": {
129 |             "description": "How many hours behind to get the CPU metrics",
130 |             "required": true,
131 |             "type": "string"
132 |           }
133 |         },
134 |       "requireConfirmation": "DISABLED"
135 |     },
136 |     {
137 |       "name": "increase_acu",
138 |       "description": "Function to increase the max ACU utilization of the Aurora Serverless V2 cluster. It takes db_instance_identifier and percent_increase as the parameters",
139 |       "parameters": {
140 | 	"db_instance_identifier": {
141 |             "description": "Instance identifier to increase the ACU utilization",
142 |             "required": true,
143 |             "type": "string"
144 |           },
145 | 	"percent_increase": {
146 |             "description": "Percentage increase of the ACU",
147 |             "required": true,
148 |             "type": "string"
149 |           }
150 |         },
151 |       "requireConfirmation": "DISABLED"
152 |     }
153 |   ]
154 | }
155 | 


--------------------------------------------------------------------------------
/DAT307/script/agent_action_group_for_acu.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/sh
 2 | 
 3 | export AWS_PAGER=""
 4 | 
 5 | AGENT_VERSION="DRAFT"
 6 | AGENT_ID=$(aws bedrock-agent list-agents --query 'agentSummaries[?agentName==`idr-agent`][agentId]' --output text)
 7 | #echo ${AGENT_ID}
 8 | 
 9 | ACTION_GROUP_ID=$(aws bedrock-agent list-agent-action-groups --agent-id ${AGENT_ID} --agent-version ${AGENT_VERSION} --query 'actionGroupSummaries[].actionGroupId' --output text )
10 | #echo ${ACTION_GROUP_ID}
11 | 
12 | ACTION_GROUP_NAME=$(aws bedrock-agent list-agent-action-groups --agent-id ${AGENT_ID} --agent-version ${AGENT_VERSION} --query 'actionGroupSummaries[].actionGroupName' --output text )
13 | #echo ${ACTION_GROUP_NAME}
14 | 
15 | ACTION_GROUP_EXECUTOR=$(aws bedrock-agent get-agent-action-group --agent-id ${AGENT_ID} --agent-version ${AGENT_VERSION} --action-group-id ${ACTION_GROUP_ID} --query 'agentActionGroup.actionGroupExecutor.lambda' --output text )
16 | #echo ${ACTION_GROUP_EXECUTOR}
17 | 
18 | ACTION_GROUP_DESC=$(aws bedrock-agent get-agent-action-group --agent-id ${AGENT_ID} --agent-version ${AGENT_VERSION} --action-group-id ${ACTION_GROUP_ID} --query 'agentActionGroup.description' --output text )
19 | 
20 | aws bedrock-agent update-agent-action-group --agent-id ${AGENT_ID} --agent-version ${AGENT_VERSION} --action-group-id ${ACTION_GROUP_ID} --action-group-name ${ACTION_GROUP_NAME} --action-group-executor lambda=${ACTION_GROUP_EXECUTOR} --function-schema  file://agent_action_group_for_acu.json  --description "${ACTION_GROUP_DESC}" > /dev/null  2>&1
21 | 
22 | if [ $? -eq 0 ] ; then
23 |     echo "Action group updated successfully. Please prepare the agent with new changes"
24 | else
25 |     echo "Error in updating the action group "
26 | fi
27 | 
28 | 
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------
/DAT307/script/cloudwatch_alarm_for_acu.sh:
--------------------------------------------------------------------------------
1 | ARN=`aws lambda get-function --function-name cw-ingest-to-dynamodb --query Configuration.FunctionArn --output text`
2 | aws cloudwatch delete-alarms --alarm-names cw-acu-alarm
3 | aws cloudwatch put-metric-alarm --alarm-name cw-acu-alarm --alarm-description "apg-idr-acu-node-01 RDS instance is running out of ACU capacity. Fix the issue using runbook" --metric-name ACUUtilization --namespace AWS/RDS --statistic Maximum --period 60 --threshold 75 --comparison-operator GreaterThanThreshold  --dimensions "Name=DBInstanceIdentifier,Value=apg-idr-acu-node-01" --evaluation-periods 1 --alarm-actions ${ARN} --unit Percent --treat-missing-data notBreaching
4 | if [ ${?} -eq 0 ] ; then
5 |     echo "Successfully deployed the CloudWatch alarm for ACU utilization"
6 | else
7 |     echo "Unable to deploy CloudWatch alarm for ACU utilization"
8 | fi
9 | 


--------------------------------------------------------------------------------
/DAT307/script/fix_cloud9.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/bash
 2 | 
 3 | PUBLIC_IP=`curl http://169.254.169.254/latest/meta-data/public-ipv4 2> /dev/null`
 4 | SECURITY_GROUP_ID=$(aws ec2 describe-instances --query 'Reservations[0].Instances[0].SecurityGroups[0].GroupId' --output text)
 5 | 
 6 | echo ""
 7 | echo "You can get your public IP address by visiting https://ifconfig.io/ip"
 8 | echo "" 
 9 | read -p "Enter your public IP address : " part_ip
10 | echo ${part_ip}
11 | 
12 | OUTPUT=`aws ec2 authorize-security-group-ingress --group-id ${SECURITY_GROUP_ID}  --protocol tcp --port 8080 --cidr ${part_ip}/32 2>&1`
13 | result=$?
14 | 
15 | echo "${OUTPUT}" | grep "already exists" > /dev/null
16 | if [ $? -eq 0 -o ${result} -eq 0 ] ; then 
17 |     echo ""
18 |     echo "Please use the following link to access the streamlit application"
19 |     echo ""
20 |     echo "---------------------------"
21 |     echo "http://${PUBLIC_IP}:8080"
22 |     echo "---------------------------"
23 |     echo ""
24 | else
25 |     echo "Unable to update the security group"
26 |     echo "${OUTPUT}"
27 | fi
28 | 


--------------------------------------------------------------------------------
/DAT307/script/lambda_update_for_acu.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | export PROJ_NAME="DAT307"
 4 | export BASEDIR=${HOME}/environment/${PROJ_NAME}
 5 | export AWS_PAGER=""
 6 | 
 7 | function install_lambda()
 8 | {
 9 |     lambda="idr-bedrock-agent-action-group"
10 |     rm -rf /tmp/${lambda}
11 |     mkdir /tmp/${lambda}
12 |     cp ${BASEDIR}/lambda/${lambda}-good.py /tmp/${lambda}/index.py
13 |     cd /tmp/${lambda}
14 |     zip -r ${lambda}.zip index.py
15 |     aws lambda update-function-code --function-name  ${lambda}  --zip-file fileb:///tmp/${lambda}/${lambda}.zip
16 |     if [ $? -eq 0 ] ; then
17 |         echo "Deployed the lambda function successfully"
18 |     else
19 |         echo "ERROR !!!! Failed to deploye the lambda function"
20 |     fi
21 | }
22 | 
23 | install_lambda
24 | 


--------------------------------------------------------------------------------
/DAT307/script/misc/lambda/auto-remediate/alertToaction.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import boto3
 3 | import json
 4 | import time
 5 | import zipfile
 6 | from io import BytesIO
 7 | import uuid
 8 | import pprint
 9 | import logging
10 | 
11 | logger = logging.getLogger()
12 | logger.setLevel("INFO")
13 | 
14 | 
15 | def simple_agent_invoke(input_text):
16 |     bedrock_agent_client = boto3.client('bedrock-agent')
17 |     bedrock_agent_runtime_client = boto3.client('bedrock-agent-runtime')
18 | 
19 |     session_id:str = str(uuid.uuid1())
20 |     agent_id = "2EOQS4ZE93"
21 |     agent_alias_id = "TSTALIASID"    
22 |     agentResponse = bedrock_agent_runtime_client.invoke_agent(
23 |         inputText=input_text,
24 |         agentId=agent_id,
25 |         agentAliasId=agent_alias_id, 
26 |         sessionId=session_id,
27 |         enableTrace=True, 
28 |         endSession= False
29 |     )
30 |     logger.info(pprint.pprint(agentResponse))
31 |     
32 |     event_stream = agentResponse['completion']
33 |     try:
34 |         for event in event_stream:        
35 |             if 'chunk' in event:
36 |                 data = event['chunk']['bytes']
37 |                 logger.info(f"Final answer ->\n{data.decode('utf8')}")
38 |                 agent_answer = data.decode('utf8')
39 |                 end_event_received = True
40 |                 # End event indicates that the request finished successfully
41 |             elif 'trace' in event:
42 |                 a = event['trace']
43 |                 try:
44 |                     a['trace']['preProcessingTrace']['modelInvocationInput']['text'] = "TEXT"
45 |                 except KeyError:
46 |                     pass          
47 |                 try:
48 |                     a['trace']['orchestrationTrace']['modelInvocationInput']['text'] = "TEXT"
49 |                 except KeyError:
50 |                     pass
51 |                 logger.info(json.dumps(a, indent=2))
52 |                 logger.info("\n=====================================================================================\n")
53 |             else:
54 |                 raise Exception("unexpected event.", event)
55 |     except Exception as e:
56 |         raise Exception("unexpected event.", e)
57 | 
58 | def lambda_handler(event, context):
59 |     logger.info(event)
60 |     try:
61 |         action = event['alarmData']['configuration']['description']
62 |     except KeyError:
63 |         return { 'statusCode': 500, 'body': json.dumps('No description found in the alarm') }
64 |     logger.info(f"Calling the function to execute the query : {action}")    
65 |     simple_agent_invoke(action)
66 |     
67 |     return {
68 |         'statusCode': 200,
69 |         'body': json.dumps('Hello from Lambda!')
70 |     }
71 | 


--------------------------------------------------------------------------------
/DAT307/script/misc/lambda/idr_get_active_alerts/idr_get_active_alerts.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import boto3
 3 | from boto3.dynamodb.conditions import Attr
 4 | import os
 5 | 
 6 | def lambda_handler(event, context):
 7 |     # TODO implement
 8 |     print (event)
 9 |     print (context)
10 |     
11 |     # Initialize the DynamoDB client
12 |     dynamodb = boto3.resource('dynamodb')
13 |     tableName = os.environ.get('CWALERTTABLE', 'cwalerttable_v2')
14 |     table = dynamodb.Table(tableName)
15 | 
16 |     # Scan the table and filter based on sort key sk -> # SessionType: A - Incident Alert, M - User Conversation Only
17 |     response = table.scan(
18 |         FilterExpression=Attr('sk').ne('D')
19 |     )
20 |     print (response)
21 |     return {
22 |         'statusCode': 200,
23 |         'body': json.dumps(response)
24 |     }
25 | 
26 | 


--------------------------------------------------------------------------------
/DAT307/script/misc/lambda/ingest-dynamo/ingestdynamodb.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import logging
 3 | import os
 4 | import boto3
 5 | import uuid
 6 | from datetime import datetime
 7 | 
 8 | def lambda_handler(event, context):
 9 |     # TODO implement
10 |     print (event)
11 |     print (context)
12 |     dynamodb = boto3.client('dynamodb')
13 |     tableName = os.environ.get('CWALERTTABLE', 'cwalerttable_v2')
14 |     metricStat = {}
15 | 
16 |     item = {'pk':{'S': uuid.uuid4().hex }, 'sk': {'S': 'A'},
17 |             'SessionStatus':{'S': 'A'},
18 |             'incidentData': {'S': event},
19 |             'incidentActionTrace': {'S', 'None'},
20 |             'lastUpdate': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
21 |             'lastUpdateBy': event.get('source', 'aws.cloudwatch')
22 |             }
23 |     response = dynamodb.put_item(TableName=tableName, Item=item)
24 | 
25 |     return {
26 |         'statusCode': 200,
27 |         'body': json.dumps(response)
28 |     }
29 | 


--------------------------------------------------------------------------------
/DAT307/script/misc/lambda/list-runbook-steps-kb/list-runbook-steps-kb.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import boto3
 3 | 
 4 | boto3_session = boto3.session.Session()
 5 | region = boto3_session.region_name
 6 | 
 7 | # create a boto3 bedrock client
 8 | bedrock_agent_runtime_client = boto3.client('bedrock-agent-runtime')
 9 | 
10 | model_id = os.environ.get('MODELID', "anthropic.claude-3-5-sonnet-20240620-v1:0")
11 | kb_id = os.environ.get('KBID', "AOTCUNBFFA")
12 | 
13 | model_arn = f'arn:aws:bedrock:{region}::foundation-model/{model_id}'
14 | 
15 | def retrieveAndGenerate(input, kbId, model_arn, sessionId):
16 |     print(input, kbId, model_arn, sessionId)
17 |     if sessionId != "":
18 |         return bedrock_agent_runtime_client.retrieve_and_generate(
19 |             input={
20 |                 'text': input
21 |             },
22 |             retrieveAndGenerateConfiguration={
23 |                 'type': 'KNOWLEDGE_BASE',
24 |                 'knowledgeBaseConfiguration': {
25 |                     'knowledgeBaseId': kbId,
26 |                     'modelArn': model_arn
27 |                 }
28 |             },
29 |             sessionId=sessionId
30 |         )
31 |     else:
32 |         return bedrock_agent_runtime_client.retrieve_and_generate(
33 |             input={
34 |                 'text': input
35 |             },
36 |             retrieveAndGenerateConfiguration={
37 |                 'type': 'KNOWLEDGE_BASE',
38 |                 'knowledgeBaseConfiguration': {
39 |                     'knowledgeBaseId': kbId,
40 |                     'modelArn': model_arn
41 |                 }
42 |             }
43 |         )
44 |         
45 | def lambda_handler(event, context):
46 |     print (event)
47 |     query = event["alert"]
48 |     sessionId = event["sessionId"]
49 |     response = retrieveAndGenerate(query, kb_id, model_arn, '')
50 |     # response = retrieveAndGenerate(query, kb_id, model_arn, sessionId)
51 |     generated_text = response['output']['text']
52 |     print(generated_text)
53 |     sessionId = response['sessionId']
54 |     citations = response['citations']
55 |     return {
56 |         'statusCode': 200,
57 |         'body': {"question": query.strip(), "answer": generated_text.strip(), "sessionId":sessionId, "citations":citations}
58 |     }
59 | 
60 | 


--------------------------------------------------------------------------------
/DAT307/script/misc/lambda/psycopg2_layer/Makefile:
--------------------------------------------------------------------------------
1 | run:
2 | 	bash ./publish_lambda_layer.sh
3 | 


--------------------------------------------------------------------------------
/DAT307/script/misc/lambda/psycopg2_layer/psycopg2.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aurora-postgresql-pgvector/81e197513381392765f190dbf0b25b90b2137e09/DAT307/script/misc/lambda/psycopg2_layer/psycopg2.zip


--------------------------------------------------------------------------------
/DAT307/script/misc/lambda/psycopg2_layer/publish_lambda_layer.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | aws lambda publish-layer-version --layer-name psycopg2-layer \
 4 |     --description "Psycopg2 PostgreSQL Client Library Layer" \
 5 |     --license-info "MIT" \
 6 |     --zip-file fileb://psycopg2.zip \
 7 |     --compatible-runtimes python3.9 \
 8 |     --compatible-architectures "x86_64"
 9 | 
10 | 


--------------------------------------------------------------------------------
/DAT307/script/misc/lambda/publish_lambda.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | LayerARN=`aws lambda publish-layer-version --layer-name psycopg2-layer \
 4 |     --description "Psycopg2 PostgreSQL Client Library Layer" \
 5 |     --license-info "MIT" \
 6 |     --zip-file fileb://psycopg2_layer/psycopg2.zip \
 7 |     --compatible-runtimes python3.9 \
 8 |     --compatible-architectures "x86_64" | jq .LayerArn`
 9 | 
10 | if [[ $? -ne 0 ]]; then
11 |         echo "ERROR: Failed to create Psycopg2 Layer, Please review below error and fix it"
12 |         echo $LayerARN
13 |         exit 1
14 | fi
15 | 
16 | AcctID=`echo $LayerARN | awk -F: '{print $5}'`
17 | Regn=`echo $LayerARN | awk -F: '{print $4}'`
18 | 
19 | aws ecr create-repository --repository-name dat307-s3-upload
20 | 
21 | #s3upload
22 | 
23 | docker buildx build --platform linux/amd64 -t ${AcctID}.dkr.ecr.${Regn}.amazonaws.com/dat307-s3-upload:1.0 s3upload/.
24 | aws ecr get-login-password --region ${Regn} | docker login --username AWS --password-stdin ${AcctID}.dkr.ecr.${Regn}.amazonaws.com/dat307-s3-upload
25 | docker push ${AcctID}.dkr.ecr.${Regn}.amazonaws.com/dat307-s3-upload:1.0
26 | if [[ $? -ne 0 ]]; then
27 |         echo "ERROR: Failed to create s3-upload image, Please review above error and fix it"
28 |         exit 1
29 | fi
30 | 
31 | 


--------------------------------------------------------------------------------
/DAT307/script/misc/lambda/runbook-steps-action/runbook-steps-action.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import boto3
 3 | import time
 4 | import json
 5 | 
 6 | boto3_session = boto3.session.Session()
 7 | region = boto3_session.region_name
 8 | 
 9 | # create a boto3 bedrock client
10 | bedrock_agent_runtime_client = boto3.client('bedrock-agent-runtime')
11 | 
12 | agent_id = os.environ.get('AGENTID', '2EOQS4ZE93')
13 | agent_alias_id = os.environ.get('AGENTALIASID', '135OAPMTED')
14 | dynamodb = boto3.client('dynamodb')
15 | tableName = os.environ.get('CWALERTTABLE', 'cwalerttable_v2')
16 | table = dynamodb.Table(tableName)
17 | 
18 | def lambda_handler(event, context):
19 |     print (event)
20 |     inputText = event["inputText"]
21 |     sessionId = event["sessionId"]
22 |     sessionType = event['sessionType']
23 |     DBInstanceIdentifier= event["DBInstanceIdentifier"]
24 |     alertType = event["alertType"]
25 |     username = event.get('requestContext',{}).get('authorizer',{}).get('claims', {}).get('email')
26 |     
27 |     if sessionId != "":
28 |         response = bedrock_agent_runtime_client.invoke_agent(
29 |             inputText=inputText,
30 |             agentId=agent_id,
31 |             agentAliasId=agent_alias_id,
32 |             sessionId=sessionId,
33 |             enableTrace=True,
34 |             endSession=False
35 |            )
36 |     else:
37 |         response = bedrock_agent_runtime_client.invoke_agent(
38 |             inputText=inputText,
39 |             agentId=agent_id,
40 |             agentAliasId=agent_alias_id,
41 |             sessionId=sessionId,
42 |             enableTrace=True,
43 |             endSession=False
44 |            )
45 |     event_stream = response['completion']
46 |     agent_answer = {"status": "Action completed successfully"}
47 |     print (response)
48 |     time.sleep (10)
49 |     for event in event_stream:
50 |         print (event)
51 |         if 'chunk' in event:
52 |             data = event['chunk']['bytes']
53 |             print (f"Final answer ->\n{data.decode('utf8')}")
54 |             agent_answer = data.decode('utf8')
55 |             end_event_received = True
56 |         elif 'trace' in event:
57 |             agent_trace = json.dumps(event['trace'], indent=2)
58 | 
59 |     key = {"pk": {'S': sessionId}, 'sk': {'S': sessionType } }
60 |     response = table.update_item(Key=key, UpdateExpression = "set SessionStatus = :SessionStatus, incidentActionTrace = :incidentActionTrace, incidentActionResponse = :incidentActionResponse, lastUpdate = :lastUpdate, lastUpdateBy = :lastUpdateBy", ExpressionAttributeValues={":SessionStatus": 'R', ":incidentActionTrace": agent_trace, ":incidentActionResponse": agent_answer, ":lastUpdate": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), ":lastUpdateBy": username)}, ReturnValues="UPDATED_NEW", )
61 | 
62 |     return {
63 |         'statusCode': 200,
64 |         'body': json.dumps(agent_answer)
65 |     }
66 | 


--------------------------------------------------------------------------------
/DAT307/script/misc/lambda/s3upload/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM --platform=linux/x86_64 public.ecr.aws/lambda/python:3.9
 2 | RUN yum -y install openssl \
 3 |     coreutils \
 4 |     tar \
 5 |     wget \
 6 |     gzip
 7 | COPY app.py requirements.txt ./
 8 | RUN pip3 install -r requirements.txt 
 9 | # Command can be overwritten by providing a different command in the template directly.
10 | CMD ["app.lambdaHandler"]
11 | 


--------------------------------------------------------------------------------
/DAT307/script/misc/lambda/s3upload/app.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import logging
 3 | import os
 4 | import magic
 5 | import boto3
 6 | from botocore.exceptions import ClientError
 7 | from botocore.config import Config
 8 | from langchain_community.document_loaders import S3FileLoader, JSONLoader, CSVLoader, TextLoader, UnstructuredMarkdownLoader
 9 | from langchain_community.embeddings import BedrockEmbeddings
10 | from langchain_postgres.vectorstores import PGVector
11 | from langchain.text_splitter import RecursiveCharacterTextSplitter
12 | from langchain_community.document_loaders import AzureAIDocumentIntelligenceLoader
13 | from langchain_community.document_loaders import PyPDFLoader
14 | 
15 | config = Config(read_timeout=1000)
16 | 
17 | def get_db_credentials(dbsecret):
18 |     client = boto3.client('secretsmanager')
19 |     response = client.get_secret_value(SecretId=dbsecret)
20 |     secret = response.get('SecretString')
21 |     secret = json.loads(secret)
22 |     return secret.get('username'), secret.get('password'), secret.get('database'), secret.get('host'), secret.get('port')
23 | 
24 | def lambdaHandler(event, context):
25 |     print (event)
26 |     print (context)
27 |     s3_client = boto3.client('s3')
28 |     bucket_name = event.get('Records', [])[0].get('s3', {}).get('bucket', {}).get('name')
29 |     object_key = event.get('Records', [])[0].get('s3', {}).get('object', {}).get('key')
30 |     object_key = object_key.replace('%40', '@').replace('+', ' ')
31 |     userId = object_key.split('/')[0]
32 | 
33 |     print (bucket_name)
34 |     print (object_key)
35 |     print (userId)
36 |     s3 = boto3.client('s3')
37 |     tempfile = '/tmp/tempfile'
38 |     s3.download_file(bucket_name, object_key, tempfile)
39 |     mime = magic.Magic(mime=True)
40 |     filetype = mime.from_file(tempfile)
41 |     if filetype == 'application/json':
42 |         print ("Initializing JSONLoader")
43 |         loader = JSONLoader(tempfile, ".messages[].content")
44 |     elif filetype == 'application/pdf':
45 |         print ("Initializing PyPDFLoader")
46 |         loader = PyPDFLoader((tempfile))
47 |     elif filetype in ['application/msword', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document']:
48 |         print ("NOT IMPLEMENTED FEATURE for docx, pptx, xlsx, etc")
49 |         pass
50 |     elif filetype == 'text/plain':
51 |         print ("Initializing TextLoader")
52 |         loader = TextLoader(tempfile)
53 |     elif filetype in ['text/csv', 'application/csv']:
54 |         print ("Initializing CSVLoader")
55 |         loader = CSVLoader(file_path=tempfile)
56 |     elif filetype == 'text/markdown':
57 |         print ("Initializing CSVLoader")
58 |         loader = UnstructuredMarkdownLoader(file_path=tempfile)
59 | 
60 |     chunks = loader.load_and_split()
61 | 
62 |     # Generate embeddings using Amazon Bedrock
63 |     BEDROCK_CLIENT = boto3.client(service_name="bedrock-runtime", region_name='us-west-2', config=config) 
64 |     embeddings = BedrockEmbeddings(model_id='amazon.titan-embed-text-v2:0', client=BEDROCK_CLIENT)
65 | 
66 |     user, password, database, host, port = get_db_credentials(os.environ.get('DBSECRET'))
67 | 
68 |     conn = PGVector.connection_string_from_db_params(
69 |         driver=os.environ.get("PGVECTOR_DRIVER", "psycopg"),
70 |         database=database,
71 |         user=user,
72 |         password=password,
73 |         host=host,
74 |         port=port
75 |     )
76 |     docmetadata={'userId': userId, 's3Uri': f's3://{bucket_name}/{object_key}'}
77 |     store = PGVector(
78 |         collection_name=object_key,
79 |         connection=conn,
80 |         embeddings=embeddings,
81 |         use_jsonb=True,
82 |         create_extension=True,
83 |         collection_metadata=docmetadata,
84 |     )
85 | 
86 |     for _doc in chunks:
87 |         _doc.metadata['userId'] = userId
88 |         _doc.metadata['source'] = f's3://{bucket_name}/{object_key}'
89 |     store.add_documents(chunks)
90 |     return {'status': 'Success', 's3Uri': f's3://{bucket_name}/{object_key}'}
91 | 
92 | 
93 | 


--------------------------------------------------------------------------------
/DAT307/script/misc/lambda/s3upload/requirements.txt:
--------------------------------------------------------------------------------
 1 | aiohttp==3.9.5
 2 | aiosignal==1.3.1
 3 | annotated-types==0.7.0
 4 | attrs==23.2.0
 5 | boto3==1.34.144
 6 | botocore==1.34.144
 7 | certifi==2024.7.4
 8 | cffi==1.16.0
 9 | charset-normalizer==3.3.2
10 | cryptography==42.0.8
11 | dataclasses-json==0.6.7
12 | frozenlist==1.4.1
13 | idna==3.7
14 | iniconfig==2.0.0
15 | jmespath==1.0.1
16 | jsonpatch==1.33
17 | jsonpointer==3.0.0
18 | langchain==0.2.8
19 | langchain-community==0.2.7
20 | langchain-core==0.2.20
21 | langchain-postgres==0.0.9
22 | langchain-text-splitters==0.2.2
23 | langsmith==0.1.88
24 | marshmallow==3.21.3
25 | multidict==6.0.5
26 | mypy-extensions==1.0.0
27 | numpy==1.26.4
28 | orjson==3.10.6
29 | packaging==24.1
30 | pgvector==0.2.5
31 | pluggy==1.5.0
32 | pq==1.9.1
33 | psycopg==3.2.1
34 | psycopg-binary==3.2.1
35 | psycopg-pool==3.2.2
36 | psycopg2-binary==2.9.9
37 | pycparser==2.22
38 | pydantic==2.8.2
39 | pydantic_core==2.20.1
40 | PyJWT==2.8.0
41 | pypdf==4.3.0
42 | pytest==8.2.2
43 | python-dateutil==2.9.0.post0
44 | python-magic==0.4.27
45 | PyYAML==6.0.1
46 | requests==2.32.3
47 | s3transfer==0.10.2
48 | six==1.16.0
49 | SQLAlchemy==2.0.31
50 | tenacity==8.5.0
51 | typing-inspect==0.9.0
52 | typing_extensions==4.12.2
53 | urllib3
54 | yarl==1.9.4
55 | 


--------------------------------------------------------------------------------
/DAT307/script/misc/validate_api.py:
--------------------------------------------------------------------------------
  1 | import boto3
  2 | import json
  3 | #import jwt
  4 | import requests
  5 | import time
  6 | #from jwt.algorithms import RSAAlgorithm
  7 | 
  8 | APP_CLIENT_ID = '66d65ehbusdl0mitclkvaccb5p'
  9 | REGION = 'us-west-2'
 10 | APIGW = 'https://am1fd2r1r0.execute-api.us-west-2.amazonaws.com'
 11 | 
 12 | cognito_idp = boto3.client('cognito-idp', region_name=REGION)
 13 | 
 14 | def authenticate_user(username, password):
 15 |     try:
 16 |         response = cognito_idp.initiate_auth(
 17 |             ClientId=APP_CLIENT_ID,
 18 |             AuthFlow='USER_PASSWORD_AUTH',
 19 |             AuthParameters={
 20 |                 'USERNAME': username,
 21 |                 'PASSWORD': password
 22 |             }
 23 |         )
 24 |         #print (response)
 25 |         if response.get('ChallengeName', 'xyz') == 'NEW_PASSWORD_REQUIRED':
 26 |            response = cognito_idp.respond_to_auth_challenge(
 27 |                                ClientId=APP_CLIENT_ID,
 28 |                                ChallengeName='NEW_PASSWORD_REQUIRED',
 29 |                                Session=response.get('Session'),
 30 |                                ChallengeResponses={'USERNAME':username, 'NEW_PASSWORD': 'We1come@1234', 'USER_ID_FOR_SRP': response.get('ChallengeParameters').get('USER_ID_FOR_SRP')}
 31 |                                )
 32 |            #print (response)
 33 |         return response['AuthenticationResult']['IdToken']
 34 |     except ClientError as e:
 35 |         print(f"Error authenticating user: {e}")
 36 |         return None
 37 | 
 38 | def lambda_handler(event, context):
 39 |     # This would be your API Gateway + Lambda function
 40 | 
 41 |     # Extract the JWT token from the Authorization header
 42 |     try:
 43 |         token = event['headers']['Authorization'].split(' ')[1]
 44 |     except (KeyError, IndexError):
 45 |         return {
 46 |             'statusCode': 401,
 47 |             'body': json.dumps('No valid Authorization header found')
 48 |         }
 49 | 
 50 |     get_sample(token)
 51 |     #post_sample(token)
 52 | 
 53 | def get_sample(token):
 54 |     headers = {'Authorization': f'{token}', 'Content-Type': 'application/json'}
 55 |     #print(headers)
 56 |     url = f'{APIGW}/prod/active-alerts'
 57 |     print (url)
 58 |     try:
 59 |        response = requests.get(url, headers = headers)
 60 |        response.raise_for_status()
 61 |        print(response.json())
 62 |     except requests.exceptions.RequestException as e:
 63 |        print (f"Error in calling /alerts API: {e}")
 64 |        return None
 65 | 
 66 | def post_sample(token):
 67 |     headers = {'Authorization': f'{token}', 'Content-Type': 'application/json'}
 68 |     #print(headers)
 69 |     url = f'{APIGW}/prod/post-sample'
 70 |     print (url)
 71 |     try:
 72 |        data = {'sample1':'sample1','sample2':'sample2'}
 73 |        response = requests.post(url, headers = headers,json=data)
 74 |        response.raise_for_status()
 75 |        print(response.json())
 76 |     except requests.exceptions.RequestException as e:
 77 |        print (f"Error in calling /alerts API: {e}")
 78 |        return None
 79 | 
 80 | # Example usage
 81 | if __name__ == "__main__":
 82 |     # This part would typically be done in your client application
 83 |     #username = "aj_rajkumar@yahoo.com"
 84 |     username = "test1@test.com"
 85 |     #username = "jrajk@amazon.com"
 86 |     #password = "We1come@1234"
 87 |     password = "Goodluck@76"
 88 |     
 89 |     token = authenticate_user(username, password)
 90 |     if token:
 91 |         print(f"Authentication successful.")
 92 |         
 93 |         # Simulate an API Gateway event
 94 |         event = {
 95 |             'headers': {
 96 |                 'Authorization': f'Bearer {token}'
 97 |             }
 98 |         }
 99 |         
100 |         # Call the Lambda handler
101 |         result = lambda_handler(event, None)
102 |         #print(result)
103 | 
104 |     else:
105 |         print("Authentication failed")
106 | 


--------------------------------------------------------------------------------
/DAT307/ui/.streamlit/config.toml.old:
--------------------------------------------------------------------------------
 1 | [theme]
 2 | 
 3 | # Primary accent for interactive elements
 4 | primaryColor = '#7792E3'
 5 | #primaryColor = '#6495ed'
 6 | 
 7 | # Background color for the main content area
 8 | backgroundColor = '#273346'
 9 | 
10 | # Background color for sidebar and most interactive widgets
11 | #secondaryBackgroundColor = '#B9F1C0'
12 | secondaryBackgroundColor = '#312c2c'
13 | 
14 | # Color used for almost all text
15 | textColor = '#FFFFFF'
16 | 
17 | # Font family for all text in the app, except code blocks
18 | # Accepted values (serif | sans serif | monospace) 
19 | # Default: "sans serif"
20 | font = "sans serif"
21 | 
22 | 


--------------------------------------------------------------------------------
/DAT307/ui/image/aws_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aurora-postgresql-pgvector/81e197513381392765f190dbf0b25b90b2137e09/DAT307/ui/image/aws_logo.png


--------------------------------------------------------------------------------
/DAT307/ui/image/demo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aurora-postgresql-pgvector/81e197513381392765f190dbf0b25b90b2137e09/DAT307/ui/image/demo.png


--------------------------------------------------------------------------------
/DAT307/ui/image/idr_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aurora-postgresql-pgvector/81e197513381392765f190dbf0b25b90b2137e09/DAT307/ui/image/idr_logo.png


--------------------------------------------------------------------------------
/DAT307/ui/image/incident_management.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aurora-postgresql-pgvector/81e197513381392765f190dbf0b25b90b2137e09/DAT307/ui/image/incident_management.png


--------------------------------------------------------------------------------
/DAT307/ui/image/powered_by_aws.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aurora-postgresql-pgvector/81e197513381392765f190dbf0b25b90b2137e09/DAT307/ui/image/powered_by_aws.png


--------------------------------------------------------------------------------
/DAT307/ui/index.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | from page.login_page import login_page
 3 | from page.signup_page import signup_page
 4 | from page.pending_incidents import pending_incident_page
 5 | from page.all_incidents import all_incident_page
 6 | from utils.init_session import init_session, reset_session
 7 | 
 8 | init_session()
 9 | 
10 | if st.session_state['authenticated'] and st.session_state['page'] == "pending_incidents":
11 |     pending_incident_page()
12 | elif st.session_state['authenticated'] and st.session_state['page'] == "all_incidents":
13 |     all_incident_page()
14 | else:
15 |     print(st.session_state['page'])
16 |     if st.session_state['page'] == 'login':
17 |         reset_session()
18 |         print("calling the login page")
19 |         login_page()
20 |     elif st.session_state['page'] == 'signup':
21 |         print("calling the signup page")
22 |         signup_page()
23 | 


--------------------------------------------------------------------------------
/DAT307/ui/page/all_incidents.py:
--------------------------------------------------------------------------------
  1 | import streamlit as st
  2 | from utils.apigw_handler import get_incidents, get_runbook, incident_remediate
  3 | from utils.init_session import reset_session
  4 | import pandas as pd
  5 | import json
  6 | 
  7 | def get_kpi(iconname, metricname, metricvalue):
  8 |     wch_colour_box = (0,204,102)
  9 |     wch_colour_font = (0,0,0)
 10 |     fontsize = 32
 11 |     valign = "left"
 12 |     lnk = '<link rel="stylesheet" href="https://use.fontawesome.com/releases/v6.6.0/css/all.css" crossorigin="anonymous">'
 13 | 
 14 |     htmlstr = f"""<p style='background-color: rgb({wch_colour_box[0]}, 
 15 |                                               {wch_colour_box[1]}, 
 16 |                                               {wch_colour_box[2]}, 0.75); 
 17 |                         color: rgb({wch_colour_font[0]}, 
 18 |                                    {wch_colour_font[1]}, 
 19 |                                    {wch_colour_font[2]}, 0.75); 
 20 |                         font-size: {fontsize}px; 
 21 |                         border-radius: 7px; 
 22 |                         padding-left: 12px; 
 23 |                         padding-top: 18px; 
 24 |                         padding-bottom: 18px; 
 25 |                         line-height:25px;'>
 26 |                         <i class='{iconname} fa-xs'></i> {metricvalue}
 27 |                         </style><BR><span style='font-size: 14px; 
 28 |                         margin-top: 0;'>{metricname}</style></span></p>"""
 29 |     return lnk + htmlstr      
 30 | 
 31 | def all_incident_page():
 32 |     incidents = get_incidents("all")
 33 |     if len(incidents) == 0 :
 34 |         dfall = pd.DataFrame(columns=["incidentActionTrace","incidentData","incidentStatus","incidentIdentifier","incidentRunbook","incidentTime","sk","incidentType","lastUpdateBy","pk","lastUpdate"])
 35 |     else:
 36 |         dfall = pd.DataFrame(incidents)   
 37 | 
 38 |     dfall = dfall.drop("incidentData" , axis=1)
 39 | 
 40 |     eventCount = len(incidents)
 41 |     instanceCount =  str(dfall['incidentIdentifier'].nunique())
 42 |     alertTypeCount =  str(dfall['incidentType'].nunique())
 43 |     
 44 |     st.set_page_config(page_title="DAT307-IDR: Amazon RDS Incidents", layout="wide")
 45 |     st.markdown("""
 46 |         <style>
 47 |                .block-container {
 48 |                     padding-top: 1rem;
 49 |                     padding-bottom: 0rem;
 50 |                     padding-left: 5rem;
 51 |                     padding-right: 5rem;
 52 |                 }
 53 |         </style>
 54 |         """, unsafe_allow_html=True)
 55 |     with st.sidebar:
 56 |         st.sidebar.image("image/idr_logo.png")        
 57 |         st.subheader("DAT307 - Build a Generative AI incident detection and response system powered by Amazon Aurora")
 58 |         st.divider()
 59 |         
 60 |         if st.button("Pending incidents"):
 61 |             st.session_state['page'] = 'pending_incidents'
 62 |             st.rerun()       
 63 |         if st.button("All incidents"):
 64 |             pass       
 65 |         if st.button("Logout"):
 66 |             reset_session()
 67 |             st.rerun()
 68 |         
 69 |         st.sidebar.image("image/powered_by_aws.png",width=120)  
 70 | 
 71 | 
 72 |     st.title("All incidents")
 73 |     st.subheader("Metric Summary", divider=True)
 74 |     col1, col2, col3 = st.columns(3)
 75 |     col1.markdown(get_kpi("fa-solid fa-circle-exclamation","Total incidents",eventCount), unsafe_allow_html=True)
 76 |     col2.markdown(get_kpi("fa-solid fa-server","Total Unique Instance",instanceCount), unsafe_allow_html=True)
 77 |     col3.markdown(get_kpi("fa-solid fa-bell","Total Unique Alert Type",alertTypeCount), unsafe_allow_html=True)
 78 |     
 79 |     col4, col5 = st.columns([10,1])
 80 |     col4.markdown("#### Incident Summary")
 81 |     col4.write("Here are the list of active incidents")
 82 |     col4.write("Please select an incident to process by clicking the first column of the row")
 83 |     
 84 |     print("Display table output")
 85 |     print(dfall)
 86 |     event = col4.dataframe(dfall,
 87 |                              on_select="rerun",
 88 |                              selection_mode="single-row",
 89 |                              hide_index=True,
 90 |                              column_config={
 91 |                              "incidentType": "Incident Type",
 92 |                              "pk": "Session ID",
 93 |                              "incidentIdentifier": "Database Instance",
 94 |                              "incidentStatus": "Incident Status",
 95 |                              "incidentTime": "Incident Time"
 96 |                             },
 97 |                             column_order=("pk","incidentIdentifier","incidentType","incidentStatus","incidentTime")
 98 |     )
 99 |     col4.markdown("#### Event Details")
100 |     col4.divider()
101 |     rows = event['selection']['rows']
102 |     if len(rows) != 0:
103 |         print(dfall)
104 |         col4.write("Runbook information")
105 |         print(dfall.iloc[rows[0]]['incidentRunbook'])
106 |         if dfall.iloc[rows[0]]['incidentRunbook'] != "None":
107 |             col4.json(dfall.iloc[rows[0]]['incidentRunbook'])
108 |         col4.write("Action trace")
109 |         if dfall.iloc[rows[0]]['incidentActionTrace'] != "None":
110 |             col4.json(dfall.iloc[rows[0]]['incidentActionTrace'])
111 |  


--------------------------------------------------------------------------------
/DAT307/ui/page/app.orig.py:
--------------------------------------------------------------------------------
  1 | import streamlit as st
  2 | from utils.apigw_handler import get_incidents, get_runbook, incident_remediate
  3 | from utils.init_session import reset_session
  4 | import pandas as pd
  5 | import json
  6 | 
  7 | def get_kpi(iconname, metricname, metricvalue):
  8 |     wch_colour_box = (0,204,102)
  9 |     wch_colour_font = (0,0,0)
 10 |     fontsize = 32
 11 |     valign = "left"
 12 |     lnk = '<link rel="stylesheet" href="https://use.fontawesome.com/releases/v6.6.0/css/all.css" crossorigin="anonymous">'
 13 | 
 14 |     htmlstr = f"""<p style='background-color: rgb({wch_colour_box[0]}, 
 15 |                                               {wch_colour_box[1]}, 
 16 |                                               {wch_colour_box[2]}, 0.75); 
 17 |                         color: rgb({wch_colour_font[0]}, 
 18 |                                    {wch_colour_font[1]}, 
 19 |                                    {wch_colour_font[2]}, 0.75); 
 20 |                         font-size: {fontsize}px; 
 21 |                         border-radius: 7px; 
 22 |                         padding-left: 12px; 
 23 |                         padding-top: 18px; 
 24 |                         padding-bottom: 18px; 
 25 |                         line-height:25px;'>
 26 |                         <i class='{iconname} fa-xs'></i> {metricvalue}
 27 |                         </style><BR><span style='font-size: 14px; 
 28 |                         margin-top: 0;'>{metricname}</style></span></p>"""
 29 |     return lnk + htmlstr      
 30 | 
 31 | def app_page():
 32 |     incidents = get_incidents("pending")
 33 |     if len(incidents) == 0 :
 34 |         dfall = pd.DataFrame(columns=["incidentActionTrace","incidentData","incidentStatus","incidentIdentifier","incidentRunbook","incidentTime","sk","incidentType","lastUpdateBy","pk","lastUpdate"])
 35 |     else:
 36 |         dfall = pd.DataFrame(incidents)   
 37 | 
 38 |     eventCount = len(incidents)
 39 |     instanceCount =  str(dfall['incidentIdentifier'].nunique())
 40 |     alertTypeCount =  str(dfall['incidentType'].nunique())
 41 |     
 42 |     st.set_page_config(page_title="DAT307-IDR: Amazon RDS Incidents", layout="wide")
 43 |     
 44 |     with st.sidebar:
 45 |         st.sidebar.image("image/idr_logo.png")        
 46 |         st.subheader("DAT307 - Build a Generative AI incident detection and response system powered by Amazon Aurora")
 47 |         st.divider()
 48 |         
 49 |         if st.button("Logout"):
 50 |             reset_session()
 51 |             st.rerun()
 52 |         
 53 |         st.sidebar.image("image/powered_by_aws.png",width=120)  
 54 | 
 55 | 
 56 |     st.title("Incidents")
 57 |     st.subheader("Metric Summary", divider=True)
 58 |     col1, col2, col3 = st.columns(3)
 59 |     #col1.metric(label="Total Pending Events", value=eventCount, delta_color="inverse")
 60 |     #col2.metric(label="Total Unique Instance", value=instanceCount)
 61 |     #col3.metric(label="Total Unique Alert Type", value=alertTypeCount)
 62 |     col1.markdown(get_kpi("fa-solid fa-circle-exclamation","Total Pending Events",eventCount), unsafe_allow_html=True)
 63 |     col2.markdown(get_kpi("fa-solid fa-server","Total Unique Instance",instanceCount), unsafe_allow_html=True)
 64 |     col3.markdown(get_kpi("fa-solid fa-bell","Total Unique Alert Type",alertTypeCount), unsafe_allow_html=True)
 65 |     
 66 |     col4, col5 = st.columns([3,1])
 67 |     col4.markdown("#### Event Summary")
 68 |     col4.write("Here are the list of active incidents")
 69 |     col4.write("Please select an incident to process by clicking the first column of the row")
 70 |     
 71 |     print("Display table output")
 72 |     print(dfall)
 73 |     event = col4.dataframe(dfall,
 74 |                              on_select="rerun",
 75 |                              selection_mode="single-row",
 76 |                              hide_index=True,
 77 |                              column_config={
 78 |                              "incidentType": "Incident Type",
 79 |                              "pk": "Session ID",
 80 |                              "incidentIdentifier": "Database Instance",
 81 |                              "incidentStatus": "Incident Status",
 82 |                              "incidentTime": "Incident Time"
 83 |                             },
 84 |                             column_order=("pk","incidentIdentifier","incidentType","incidentStatus","incidentTime")
 85 |     )
 86 |     #col4.markdown("#### Event Details")
 87 |     #col4.divider()
 88 |     col5.markdown("#### User Action")
 89 |     col5.write("Here are the actions that requires manual user intervention")
 90 |     runbook_action = col5.button("Get Runbook")
 91 |     remediate_action = col5.button("Remediate Incident")
 92 |     col5.divider()
 93 |     rows = event['selection']['rows']
 94 |     pk = None
 95 |     description = None
 96 |     if len(rows) != 0:
 97 |         print(dfall)
 98 |         pk = dfall.iloc[rows[0]]['pk']
 99 |         description = json.loads(dfall.iloc[rows[0]]['incidentData'])['configuration']['description']
100 |         print(pk)
101 |         #col4.json(dfall.iloc[rows[0]].to_json(orient='records'))
102 |  
103 |     if runbook_action:
104 |         if pk is None:
105 |             col4.error("Please select an incident to get the runbook for the incident")
106 |             return
107 |         with col4.status("Retrieving incident runbook..."):
108 |             runbook = get_runbook(pk,description)
109 |             col4.markdown("***Runbook Instructions for " + pk + "***")
110 |             col4.text_area("Runbook Instructions", runbook['runbook'],height=200, label_visibility="hidden")
111 | 
112 |     if remediate_action:
113 |         if pk is None:
114 |             col4.error("Please select an incident to auto-remediate the incident")
115 |             return
116 |         with col4.status("Remediating incident..."):
117 |             incident = incident_remediate(pk,description)
118 |             col4.markdown("***Status of auto remediation for " + pk + "***")
119 |             col4.json(incident['result'])
120 | 
121 | 


--------------------------------------------------------------------------------
/DAT307/ui/page/login_page.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | from utils.cognito_handler import authenticate_user
 3 | 
 4 | # Pages
 5 | def login_page():
 6 |     #st.set_page_config(page_title="DAT307-IDR: User Login", layout="wide")
 7 |     st.set_page_config(page_title="DAT307-IDR: User Login")
 8 |     st.markdown("""
 9 |         <style>
10 |                .block-container {
11 |                     padding-top: 1rem;
12 |                     padding-bottom: 0rem;
13 |                     padding-left: 5rem;
14 |                     padding-right: 5rem;
15 |                 }
16 |         </style>
17 |         """, unsafe_allow_html=True)
18 |     st.image("image/aws_logo.png",width=120)
19 |     st.header("DAT307 - Build a Generative AI incident detection and response system powered by Amazon Aurora")
20 |     with st.empty().container(border=True):
21 |         col1, _, col2 = st.columns([10,1,10])
22 |         
23 |         with col1:
24 |             st.write("")
25 |             st.write("")
26 |             st.image("image/incident_management.png")
27 |         
28 |         with col2:
29 |             #st.title("Login Page")
30 |             #email = st.text_input("E-mail",value="test1@test.com")
31 |             #password = st.text_input("Password", type="password",value="IDR@dat307")
32 | 
33 |             st.write("")
34 |             st.write("")
35 |             st.write("")
36 |             st.write("")
37 |             st.write("")
38 |             st.write("")
39 |             if st.button("**Login as Demo user**"):
40 |                 auth, token, message = authenticate_user()
41 |                 if auth:
42 |                     st.session_state['authenticated'] = True
43 |                     st.session_state['token'] = token
44 |                     st.session_state['page'] = 'pending_incidents'
45 |                     st.rerun()
46 |                 else:
47 |                     st.error(message)
48 | 


--------------------------------------------------------------------------------
/DAT307/ui/page/login_page.py.orig:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | from utils.cognito_handler import authenticate_user
 3 | 
 4 | # Pages
 5 | def login_page():
 6 |     #st.set_page_config(page_title="DAT307-IDR: User Login", layout="wide")
 7 |     st.set_page_config(page_title="DAT307-IDR: User Login")
 8 |     st.markdown("""
 9 |         <style>
10 |                .block-container {
11 |                     padding-top: 1rem;
12 |                     padding-bottom: 0rem;
13 |                     padding-left: 5rem;
14 |                     padding-right: 5rem;
15 |                 }
16 |         </style>
17 |         """, unsafe_allow_html=True)
18 |     st.image("image/aws_logo.png",width=120)
19 |     st.header("DAT307 - Build a Generative AI incident detection and response system powered by Amazon Aurora")
20 |     with st.empty().container(border=True):
21 |         col1, _, col2 = st.columns([10,1,10])
22 |         
23 |         with col1:
24 |             st.write("")
25 |             st.write("")
26 |             st.image("image/incident_management.png")
27 |         
28 |         with col2:
29 |             st.title("Login Page")
30 |             email = st.text_input("E-mail",value="test1@test.com")
31 |             password = st.text_input("Password", type="password",value="IDR@dat307")
32 | 
33 |             if st.button("Login"):
34 |                 if not (email and password):
35 |                     st.error("Please provide email and password")
36 |                 else:
37 |                     auth, token, message = authenticate_user(email, password)
38 |                     if auth:
39 |                         st.session_state['authenticated'] = True
40 |                         st.session_state['token'] = token
41 |                         st.session_state['page'] = 'pending_incidents'
42 |                         st.rerun()
43 |                     else:
44 |                         st.error(message)
45 |             if st.button("Sign Up"):
46 |                 st.session_state['page'] = 'signup'
47 |                 st.rerun()
48 | 


--------------------------------------------------------------------------------
/DAT307/ui/page/signup_page.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | import re
 3 | from utils.cognito_handler import sign_up_user
 4 | from utils.init_session import reset_session
 5 | 
 6 | def is_valid_email(email):
 7 |     """Check if the provided email is valid using regex."""
 8 |     email_regex = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
 9 |     return re.match(email_regex, email) is not None
10 | 
11 | def input_field(input_param, type):
12 |     """Render an input field based on the type and store the value in session state."""
13 |     if type == 'text':
14 |         st.session_state[input_param] = st.text_input(input_param)
15 |     elif type == 'number':
16 |         st.session_state[input_param] = st.number_input(input_param, step=1)
17 | 
18 | def signup_page():
19 |     st.set_page_config(page_title="DAT307-IDR: User Registration", layout="wide")
20 |     st.markdown("""
21 |         <style>
22 |                .block-container {
23 |                     padding-top: 1rem;
24 |                     padding-bottom: 0rem;
25 |                     padding-left: 5rem;
26 |                     padding-right: 5rem;
27 |                 }
28 |         </style>
29 |         """, unsafe_allow_html=True)
30 |     
31 |     """Render the signup page with optional extra input parameters and password confirmation."""
32 |     if st.session_state['verifying']:
33 |         auth,message = sign_up_user(st.session_state['email'],st.session_state['password'])
34 | 
35 |         if auth :
36 |             #with st.empty().container(border=True):
37 |                 #st.title(f"User {st.session_state['email']} created successfully. Please login")
38 |             st.success(f"User {st.session_state['email']} created successfully. Please login", icon="✅")
39 |             st.session_state['verifying'] = False 
40 |         else:
41 |             print("I am here - showing exception: " + str(st.session_state['verifying']))
42 |             st.session_state["signup_error"] = message
43 |             st.session_state['verifying'] = False            
44 |             st.error(st.session_state['signup_error'])
45 |             st.rerun()
46 |         
47 |     else:        
48 |         with st.empty().container(border=True):
49 |             st.title("Sign Up Page")           
50 |             
51 |             # Email input with validation
52 |             st.session_state['email'] = st.text_input("Email")
53 |             if st.session_state['email'] and not is_valid_email(st.session_state['email']):
54 |                 st.error("Please enter a valid email address")
55 | 
56 |             # Password input
57 |             st.session_state['password'] = st.text_input("Password", type='password')
58 |             
59 |             # Confirm password if required
60 |             confirm_password = st.text_input("Confirm Password", type='password')
61 |             print("Error state")
62 |             print(st.session_state['signup_error'])
63 |             if st.session_state['signup_error']:
64 |                 st.error(st.session_state['signup_error'])
65 |             
66 |           # Validate all required fields before proceeding
67 |             if st.session_state['email'] and st.session_state['password'] and confirm_password \
68 |                and (st.session_state['password'] == confirm_password):
69 |                 
70 |                 if st.button("Register"):
71 |                     st.session_state['verifying'] = True
72 |                     st.rerun()
73 |             else:
74 |                 if st.session_state['password'] != confirm_password:
75 |                     st.error("Passwords do not match")
76 |                 elif st.button("Register"):
77 |                     st.error("Please fill in all required fields")
78 |             print("I am here - empty container")  
79 | 
80 |     print("show another login button, state page is " + st.session_state['page'])
81 |     with st.sidebar:
82 |         st.sidebar.image("image/idr_logo.png")        
83 |         st.subheader("DAT307 - Build a Generative AI incident detection and response system powered by Amazon Aurora")
84 |         st.divider()
85 |         
86 |         if st.button("Back to Login"):
87 |             print("I am here - session state verifying")
88 |             print(st.session_state['page'])
89 |             st.session_state['verifying'] = False
90 |             st.session_state['page'] = 'login'
91 |             st.rerun() 
92 |         
93 |         st.sidebar.image("image/powered_by_aws.png",width=120)             


--------------------------------------------------------------------------------
/DAT307/ui/requirements.txt:
--------------------------------------------------------------------------------
1 | boto3==1.34.107
2 | botocore==1.34.107
3 | python-dotenv==1.0.1
4 | Requests==2.32.3
5 | streamlit==1.37.1
6 | 


--------------------------------------------------------------------------------
/DAT307/ui/utils/apigw_handler.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | from dotenv import load_dotenv
 3 | import os
 4 | import requests
 5 | import sys
 6 | 
 7 | load_dotenv()
 8 | APIGWURL=os.getenv('APIGWURL')
 9 | APIGWSTAGE = os.getenv('APIGWSTAGE')
10 | AWS_REGION=os.getenv('AWS_REGION')
11 |        
12 | 
13 | def get_incidents(incidentStatus):
14 |     headers = {'Authorization': f"{st.session_state['token']}", 'Content-Type': 'application/json'}
15 |     url = f'{APIGWURL}{APIGWSTAGE}/get-incidents'
16 |     try:
17 |        response = requests.get(url,params={"incidentStatus": incidentStatus}, headers = headers)
18 |        response.raise_for_status()
19 |        items = response.json()['Items']
20 |        if len(items) == 0 :
21 |            print("No incidents available")
22 | 
23 |        return response.json()['Items']
24 |     except requests.exceptions.RequestException as e:
25 |        print (f"Error in calling /get-incidents API: {e}")
26 |        return None
27 | 
28 | 
29 | def get_runbook(id, description):
30 |     headers = {'Authorization': f"{st.session_state['token']}", 'Content-Type': 'application/json'}
31 |     url = f'{APIGWURL}{APIGWSTAGE}/get-incident-runbook'
32 |     try:
33 |        response = requests.get(url, params={"query": description, "id": id}, headers = headers)
34 |        response.raise_for_status()
35 |        return response.json()
36 |     except requests.exceptions.RequestException as e:
37 |        print (f"Error in calling /get-incident-runbook API: {e}")
38 |        print(sys.exc_info())
39 |        return { "runbook" :
40 |                  {"ERROR" : "Error getting the runbook. Please make sure the required FM's are enable"}
41 |                }
42 | 
43 | def incident_remediate(id, description):
44 |     print("Calling the agent to take action")
45 |     headers = {'Authorization': f"{st.session_state['token']}", 'Content-Type': 'application/json'}
46 |     url = f'{APIGWURL}{APIGWSTAGE}/post-incident-action'
47 |     try:
48 |        data = {'action':description, 'id': id}
49 |        response = requests.post(url, headers = headers, json=data)
50 |        response.raise_for_status()
51 |        return response.json()
52 |     except requests.exceptions.RequestException as e:
53 |        print (f"Error in calling /post-incident-action API: {e}")
54 |        print(sys.exc_info())
55 |        return {"result":
56 |                  {"ERROR" : "Error calling the remediate action. Please try preparing the agent"}
57 |                }
58 | 


--------------------------------------------------------------------------------
/DAT307/ui/utils/cognito_handler.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | from dotenv import load_dotenv
 3 | import os
 4 | import boto3
 5 | from botocore.exceptions import ClientError
 6 | 
 7 | load_dotenv()
 8 | APP_CLIENT_ID=os.getenv('APP_CLIENT_ID')
 9 | USER_POOL_ID = os.getenv('USER_POOL_ID')
10 | AWS_REGION=os.getenv('AWS_REGION')
11 | 
12 | cognito_idp_client = boto3.client('cognito-idp', region_name=AWS_REGION)
13 |         
14 | def authenticate_user(username="demo@dat307.com", password="Welcome@reInvent2024"):
15 |     try:
16 |         response = cognito_idp_client.initiate_auth(
17 |             ClientId=APP_CLIENT_ID,
18 |             AuthFlow='USER_PASSWORD_AUTH',
19 |             AuthParameters={ 'USERNAME': username,'PASSWORD': password }
20 |         )
21 |         print (response)
22 |         return True, response['AuthenticationResult']['IdToken'], None
23 |     except ClientError as err:
24 |         print(f"Couldn't login {username} due to {err.response['Error']['Message']}")
25 |         return False, None, err.response["Error"]["Message"]
26 | 
27 | def sign_up_user(username, password):
28 |     try:
29 |         kwargs = {
30 |                 "ClientId": APP_CLIENT_ID,
31 |                 "Username": username,
32 |                 "Password": password,
33 |                 "UserAttributes": [{"Name": "email", "Value": username}],
34 |             }
35 |         response = cognito_idp_client.sign_up(**kwargs)
36 |         print(response)
37 |         confirmed = response["UserConfirmed"]
38 |         print(f"Created the user {username} successfully")
39 |         return True, None
40 |     
41 |     except ClientError as err:
42 |         if err.response["Error"]["Code"] == "UsernameExistsException":
43 |              print(f"Couldn't sign up {username}. {err.response['Error']['Message']}")
44 |              return False, err.response["Error"]["Message"]
45 |          
46 |         print(f"Couldn't sign up {username}. {err.response['Error']['Message']}")
47 |         return False, err.response["Error"]["Message"]
48 | 


--------------------------------------------------------------------------------
/DAT307/ui/utils/init_session.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | 
 3 | def init_session():
 4 |     if 'authenticated' not in st.session_state:
 5 |         st.session_state['authenticated'] = False
 6 |     if 'token' not in st.session_state:
 7 |         st.session_state['token'] = ""
 8 |     if 'page' not in st.session_state:
 9 |         st.session_state['page'] = 'login'
10 |     if 'guest_mode' not in st.session_state:
11 |         st.session_state['guest_mode'] = False
12 |     if 'verifying' not in st.session_state:
13 |         st.session_state['verifying'] = False
14 |     if 'email' not in st.session_state:
15 |         st.session_state['email'] = ""
16 |     if 'password' not in st.session_state:
17 |         st.session_state['password'] = ""
18 |     if 'extra_input_params' not in st.session_state:
19 |         st.session_state['extra_input_params'] = {}
20 |         
21 | def reset_session():
22 |     st.session_state['authenticated'] = False
23 |     st.session_state['page'] = 'login'
24 |     st.session_state['guest_mode'] = False
25 |     st.session_state['verifying'] = False
26 |     st.session_state['otp'] = ""
27 |     st.session_state['email'] = ""
28 |     st.session_state['password'] = ""
29 |     st.session_state['signup_error'] = ""
30 |     
31 | 


--------------------------------------------------------------------------------
/DAT326/env_sample:
--------------------------------------------------------------------------------
1 | ELASTICACHE_HOST='<<your_elasticache_host>>'
2 | ELASTICACHE_PORT=6379
3 | DB_HOST='<<your_db_endpoint>>'
4 | DB_NAME='<<db_name>>'
5 | DB_USER='<<db_user>>'
6 | DB_PASSWORD='<<db_password>>'
7 | AWS_REGION='us-east-1'


--------------------------------------------------------------------------------
/DAT326/requirements.txt:
--------------------------------------------------------------------------------
1 | streamlit
2 | boto3
3 | redis
4 | psycopg
5 | psycopg-binary
6 | uuid
7 | python-dotenv


--------------------------------------------------------------------------------
/DAT326/static/AZFlights.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aurora-postgresql-pgvector/81e197513381392765f190dbf0b25b90b2137e09/DAT326/static/AZFlights.jpg


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | // SPDX-License-Identifier: MIT-0
 3 | 
 4 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this
 7 | software and associated documentation files (the "Software"), to deal in the Software
 8 | without restriction, including without limitation the rights to use, copy, modify,
 9 | merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
10 | permit persons to whom the Software is furnished to do so.
11 | 
12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
13 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
14 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
15 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
16 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
17 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Generative AI Use Cases with pgvector, Aurora PostgreSQL and Amazon Bedrock
 2 | 
 3 | [![Python 3.9+](https://img.shields.io/badge/python-3.9+-blue.svg)](https://www.python.org/downloads/)
 4 | [![GitHub stars](https://img.shields.io/github/stars/aws-samples/aurora-postgresql-pgvector.svg)](https://github.com/aws-samples/aurora-postgresql-pgvector/stargazers)
 5 | [![GitHub forks](https://img.shields.io/github/forks/aws-samples/aurora-postgresql-pgvector.svg)](https://github.com/aws-samples/aurora-postgresql-pgvector/network)
 6 | [![GitHub issues](https://img.shields.io/github/issues/aws-samples/aurora-postgresql-pgvector.svg)](https://github.com/aws-samples/aurora-postgresql-pgvector/issues)
 7 | [![GitHub pull requests](https://img.shields.io/github/issues-pr/aws-samples/aurora-postgresql-pgvector.svg)](https://github.com/aws-samples/aurora-postgresql-pgvector/pulls)
 8 | [![License: MIT-0](https://img.shields.io/badge/License-MIT--0-yellow.svg)](https://spdx.org/licenses/MIT-0.html)
 9 | 
10 | > Explore powerful Generative AI applications using pgvector on Amazon Aurora PostgreSQL with Amazon Bedrock
11 | 
12 | ## 🌟 Overview
13 | 
14 | This repository demonstrates production-ready implementations using [**pgvector**](https://github.com/pgvector/pgvector), a powerful open-source PostgreSQL extension for vector similarity search. pgvector seamlessly integrates with PostgreSQL's native features, enabling sophisticated vector operations, indexing, and querying capabilities.
15 | 
16 | ## 📚 Resources
17 | 
18 | - 📖 [AWS Blog Post: Leverage pgvector and Amazon Aurora PostgreSQL for NLP, Chatbots and Sentiment Analysis](https://aws.amazon.com/blogs/database/leverage-pgvector-and-amazon-aurora-postgresql-for-natural-language-processing-chatbots-and-sentiment-analysis/)
19 | - 🎓 [AWS Workshop: Generative AI Use Cases with Aurora PostgreSQL and pgvector](https://catalog.workshops.aws/pgvector/en-US)
20 | 
21 | ## 🚀 Use Cases
22 | 
23 | This repository showcases the following production-ready implementations:
24 | 
25 | 1. **Product Recommendations** 🛒
26 |    - Implement intelligent product recommendation systems
27 |    - Leverage vector similarity for personalized suggestions
28 | 
29 | 2. **Retrieval Augmented Generation (RAG)** 🔄
30 |    - Enhance LLM responses with relevant context
31 |    - Implement efficient vector-based information retrieval
32 | 
33 | 3. **Semantic Search and Sentiment Analysis** 🧠
34 |    - Deploy sophisticated natural language search capabilities
35 |    - Perform nuanced sentiment analysis on text data
36 | 
37 | 4. **Knowledge Bases for Amazon Bedrock** 📚
38 |    - Build scalable knowledge management systems
39 |    - Integrate with Amazon Bedrock for enhanced AI capabilities
40 | 
41 | 5. **Movie Recommendations** 🎬
42 |    - Implement ML-based movie recommendation systems
43 |    - Combine Aurora ML with Amazon Bedrock for sophisticated predictions
44 |   
45 | 6. **Democratizing Data Insights with Amazon Q Business** 💼
46 |    - Connect Amazon Q Business with Aurora PostgreSQL for enterprise-wide data access
47 |    - Implement secure data exploration through user management and access control lists (ACLs)
48 | 
49 | ## 🛠️ Getting Started
50 | 
51 | 1. Clone the repository:
52 | ```bash
53 | git clone https://github.com/aws-samples/aurora-postgresql-pgvector.git
54 | cd aurora-postgresql-pgvector
55 | ```
56 | 
57 | 2. Follow the setup instructions in each use case directory for specific implementation details.
58 | 
59 | ## 🤝 Contributing
60 | 
61 | This repository is maintained for educational purposes and does not accept external contributions. However, you are encouraged to:
62 | - Fork the repository
63 | - Adapt the code for your specific needs
64 | - Share your learnings with the community
65 | 
66 | ## 📄 License
67 | 
68 | This project is licensed under the [MIT-0 License](https://spdx.org/licenses/MIT-0.html) - see the [LICENSE](LICENSE) file for details.
69 | 
70 | ## 🔗 Related Projects
71 | 
72 | - [pgvector](https://github.com/pgvector/pgvector)
73 | - [Amazon Aurora](https://aws.amazon.com/rds/aurora/)
74 | - [Amazon Bedrock](https://aws.amazon.com/bedrock/)
75 | 
76 | ---
77 | 
78 | **Note**: This repository is provided as-is and is intended for educational and demonstration purposes.
79 | 


--------------------------------------------------------------------------------
/data/Amazon Aurora FAQs.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aurora-postgresql-pgvector/81e197513381392765f190dbf0b25b90b2137e09/data/Amazon Aurora FAQs.pdf


--------------------------------------------------------------------------------
/data/Amazon Bedrock FAQs.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aurora-postgresql-pgvector/81e197513381392765f190dbf0b25b90b2137e09/data/Amazon Bedrock FAQs.pdf


--------------------------------------------------------------------------------
/data/postgresql-16-US.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aurora-postgresql-pgvector/81e197513381392765f190dbf0b25b90b2137e09/data/postgresql-16-US.pdf


--------------------------------------------------------------------------------
/setup_cloud9.sh:
--------------------------------------------------------------------------------
 1 | !/bin/bash
 2 | # Install necessary packages
 3 | sudo yum update -y
 4 | sudo yum install -y git python3 python3-pip postgresql jq
 5 | # Set up environment variables
 6 | echo "export DB_CLUSTER_IDENTIFIER=${DB_CLUSTER_IDENTIFIER}" >> ~/.bashrc
 7 | echo "export DEFAULT_CODE_REPO=${DEFAULT_CODE_REPO}" >> ~/.bashrc
 8 | echo "export SECRETARN=${SECRETARN}" >> ~/.bashrc
 9 | echo "export AWSREGION=${AWSREGION}" >> ~/.bashrc
10 | # Get DB Endpoint
11 | DBENDP=$(aws rds describe-db-clusters --db-cluster-identifier $DB_CLUSTER_IDENTIFIER --region $AWSREGION --query 'DBClusters[*].Endpoint' | jq -r '.[0]')
12 | echo "export DBENDP=$DBENDP" >> ~/
13 | # Get database credentials
14 | CREDS=$(aws secretsmanager get-secret-value --secret-id $SECRETARN --region $AWSREGION | jq -r '.SecretString')
15 | DBUSER=$(echo $CREDS | jq -r '.username')
16 | DBPASS=$(echo $CREDS | jq -r '.password')
17 | echo "export PGHOST=$DBENDP" >> ~/.bashrc
18 | echo "export PGUSER=$DBUSER" >> ~/.bashrc
19 | echo "export PGPASSWORD=$DBPASS" >> ~/.bashrc
20 | echo "export PGDATABASE=postgres" >> ~/.bashrc
21 | # Create a .pgpass file for passwordless login
22 | echo "$DBENDP:5432:postgres:$DBUSER:$DBPASS" > ~/.pgpass
23 | chmod 600 ~/.pgpass
24 | # Source the updated .bashrc
25 | source ~/.bashrc
26 | # Print completion message
27 | echo "Cloud9 environment setup completed successfully."
28 | 


--------------------------------------------------------------------------------