├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md └── workshops ├── building-rag-workflows-with-sagemaker-and-bedrock ├── 00-00_prerequisites │ └── prerequisites.ipynb ├── 01-01_local-experimentation │ ├── building-an-experimental-rag-app.ipynb │ └── utils │ │ ├── TokenCounterHandler.py │ │ └── __init__.py ├── 02-01_sagemaker-opensearch-rag │ ├── SageMak-Embedding-Model-OpenSearch.ipynb │ ├── extracted_context.json │ ├── images │ │ └── LangfuseTraces.png │ └── ori_pqal_10_records.csv ├── 03-02_fine-tuning-embedding │ ├── 01-ft_embedding_with_sagemaker_eval.ipynb │ ├── 02-embeddings-eval.ipynb │ ├── images │ │ ├── eval2-31k_context-5k_sample.png │ │ ├── training-9000_test-20000.png │ │ └── training-9000_test-50000.png │ ├── requirements.txt │ └── scripts │ │ └── trainer.py ├── 03-03_raft-customization │ ├── 01-build_raft_dataset.ipynb │ ├── 02-raft_finetune.ipynb │ ├── 03-deploy_and_evaluate_models.ipynb │ ├── 04-evaluate.ipynb │ ├── 05-deploy-to-BR.ipynb │ ├── eval.json │ ├── full_eval.json │ ├── images │ │ ├── x1.png │ │ ├── x5.png │ │ └── x6.png │ └── scripts │ │ ├── launch_fsdp_qlora.py │ │ ├── merge_model_adapter.py │ │ └── requirements.txt ├── 04-01_guardrails │ └── sagemaker-inference-bedrock-guardrails-medical-theme.ipynb ├── README.md ├── SageMaker Training Hosting and Custom Model Import_V3.pdf └── images │ └── rag-mind-map.png ├── distributed-training-deployment-on-sagemaker-ai ├── README.md ├── solution-1-sagemaker-jumpstart │ ├── jumpstart-llama3.1-8b-instruct-ft.ipynb │ └── telco_promotions.json ├── solution-2-sagemaker-training │ ├── option-1-continued-pre-training │ │ ├── model-trainer-fsdp-qlora.ipynb │ │ └── scripts │ │ │ ├── requirements.txt │ │ │ └── train.py │ └── option-2-supervised-fine-tuning │ │ ├── model-trainer-fsdp-qlora.ipynb │ │ └── scripts │ │ ├── requirements.txt │ │ ├── rouge │ │ └── rouge.py │ │ ├── rouge_evaluation.py │ │ └── train.py └── solution-3-sagemaker-hyperpod-k8 │ ├── README.md │ ├── args.yaml │ ├── download_model.py │ ├── pod-finetuning.yaml │ ├── requirements.txt │ └── scripts │ ├── dataprep.py │ └── train.py ├── diy-agents-with-sagemaker-and-bedrock ├── 0-setup │ └── setup-sagemaker-endpoint.ipynb ├── 1-inference │ ├── 1-inference-bedrock.ipynb │ ├── 2-inference-sagemaker.ipynb │ └── README.md ├── 2-tool-calling │ ├── 1-tool-calling-bedrock.ipynb │ ├── 2-tool-calling-sagemaker.ipynb │ └── README.md ├── 3-agent-patterns │ ├── README.md │ ├── autonomous_agent.ipynb │ ├── basic_workflows.ipynb │ ├── evaluator_optimizer.ipynb │ └── orchestrator_workers.ipynb ├── 4-frameworks │ ├── README.md │ ├── agno-ai │ │ └── agno-ai-logistics.ipynb │ ├── crewai │ │ ├── crewAI-langfuse-observability.ipynb │ │ ├── crewAI-travel-agent-hierarchical.ipynb │ │ ├── crewai-requirements.txt │ │ ├── crewai-travel-agent-sequential.ipynb │ │ └── crewai-travel-flows.ipynb │ ├── langgraph │ │ ├── langgraph-hierarchical-agent-teams.ipynb │ │ ├── langgraph-requirements.txt │ │ └── langgraph-sequential-agent-teams.ipynb │ ├── openai-agents-sdk │ │ └── openai_agents_sdk_tutorial.ipynb │ ├── smolagents │ │ └── smolagents-example.ipynb │ └── strands-agents │ │ ├── strands-agents-bedrock.ipynb │ │ ├── strands-agents-sagemaker.ipynb │ │ └── strands_sagemaker.py ├── 5-observability │ ├── 1-langfuse │ │ ├── crewAI-langfuse-observability.ipynb │ │ └── litellm-langfuse-observability.ipynb │ ├── 2-mlflow │ │ ├── crewai-requirements.txt │ │ ├── mlflow-crewAI-observability.ipynb │ │ └── mlflow-langgraph-observability.ipynb │ └── README.md ├── 99-use-cases │ ├── mcp │ │ ├── mcp-exploration.ipynb │ │ └── server.py │ ├── sagemaker-endpoint-as-tool │ │ ├── README.md │ │ ├── demand_forecasting.ipynb │ │ ├── endpoint-as-tool.png │ │ ├── script.py │ │ ├── server.py │ │ └── strands-agents-sagemaker-as-tool.ipynb │ ├── strands │ │ └── Strands_Agents.ipynb │ ├── support-ticket-triage │ │ ├── langgraph-requirements.txt │ │ └── support-system.ipynb │ ├── text2dsl-mcp │ │ ├── README.md │ │ ├── cfn-oss-collection.yaml │ │ ├── guardduty-index-schema.json │ │ ├── mcp_dsl_server.py │ │ ├── requirements.txt │ │ ├── text2dsl-mcp.ipynb │ │ └── utils.py │ └── text2sql │ │ ├── 1-create-db-tables.ipynb │ │ ├── 2-text2sql-langchain.ipynb │ │ ├── README.md │ │ └── requirements.txt └── README.md └── fine-tuning-with-sagemakerai-and-bedrock ├── archive ├── README.md ├── comet │ ├── comet-intro.ipynb │ └── comet-opik.ipynb ├── deepchecks │ └── deepchecks.ipynb ├── fiddler │ ├── .gitignore │ ├── README.md │ ├── assets │ │ ├── charts_llm.yaml │ │ ├── charts_ml.yaml │ │ ├── llm_events.parquet │ │ ├── search_ranking_prod.csv │ │ └── search_ranking_sample.csv │ ├── client │ │ ├── fiddler_client-3.7.0.dev4-py3-none-any.whl │ │ └── sagemaker-2.227.1.dev0-py3-none-any.whl │ ├── fiddler.ipynb │ ├── imgs │ │ ├── create_dashboard.png │ │ ├── credentials_tab.png │ │ ├── login_view.png │ │ ├── rca_drift.png │ │ ├── rca_events.png │ │ ├── rca_init.png │ │ ├── root_cause_analysis.png │ │ ├── settings_view.png │ │ └── view_dashboard.gif │ └── modules │ │ ├── __init__.py │ │ ├── chart.py │ │ ├── config.py │ │ ├── llm_onboard.py │ │ └── ml_onboard.py └── lakera │ └── lakera.ipynb ├── media └── smbanner.png ├── task_01_foundation_model_playground ├── 01.01_search_and_deploy_huggingface_llm.ipynb └── scripts │ └── requirements.txt ├── task_02_customize_foundation_model ├── 02.01_finetune_deepseekr1.ipynb └── scripts │ ├── requirements.txt │ └── train.py ├── task_03_foundation_model_evaluation ├── 03.01_foundation_model_evaluation_lighteval.ipynb └── images │ ├── sft_1000_train_50_test_bars.png │ ├── sft_1000_train_50_test_compare.png │ ├── sft_1000_train_50_test_scores.png │ ├── sft_5000_train_100_test_bars.png │ ├── sft_5000_train_100_test_compare.png │ └── sft_5000_train_100_test_scores.png ├── task_04_responsible_ai ├── 04.01_bedrock_guardrails_apply_guardrail_api.ipynb └── images │ └── applyguardrail.png ├── task_05_fmops ├── 05.01_fine-tuning-pipeline.ipynb ├── config.yaml ├── eval │ └── requirements.txt ├── ml-16670-arch-with-mlflow.png ├── scripts │ ├── requirements.txt │ └── train.py └── steps │ ├── evaluation_mlflow.py │ ├── finetune_llama3b_hf.py │ ├── preprocess_llama3.py │ └── utils.py └── utilities ├── __init__.py └── helpers.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | *.pylint.d/ 52 | pylint-report.txt 53 | pylint-global.rc 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | db.sqlite3 63 | db.sqlite3-journal 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | target/ 75 | 76 | # PyBuilder 77 | target/ 78 | 79 | # IPython 80 | profile_default/ 81 | ipython_config.py 82 | 83 | # PyCharm 84 | .idea/ 85 | .idea/* 86 | .idea/workspace.xml 87 | .idea/tasks.xml 88 | .idea/dictionaries 89 | .idea/usage.statistics.xml 90 | .idea/contentModel.xml 91 | .idea/dataSources/ 92 | .idea/vcs.xml 93 | .idea/jsLibraryMappings.xml 94 | .idea/modules.xml 95 | .idea/docker.xml 96 | .idea/gradle.xml 97 | .idea/misc.xml 98 | .idea/modules.xml 99 | .idea/scopes/ 100 | .idea/runConfigurations/ 101 | 102 | # VS Code 103 | .vscode/* 104 | .history/ 105 | 106 | # Environments 107 | .env 108 | .venv 109 | env/ 110 | venv/ 111 | ENV/ 112 | env.bak/ 113 | venv.bak/ 114 | env.bak/ 115 | venv.bak/ 116 | 117 | # Spyder project settings 118 | .spyderproject 119 | .spyproject 120 | 121 | # Rope project settings 122 | .ropeproject 123 | 124 | # mkdocs documentation 125 | /site 126 | 127 | # mypy 128 | .mypy_cache/ 129 | .dmypy.json 130 | dmypy.json 131 | 132 | # Pyre type checker 133 | .pyre/ 134 | 135 | # Celery stuff 136 | celerybeat-schedule 137 | celerybeat.pid 138 | 139 | # SageMath parsed files 140 | *.sage.py 141 | 142 | # Environments for pyenv, pipenv, poetry 143 | .python-version 144 | Pipfile.lock 145 | poetry.lock 146 | 147 | # dotenv 148 | .env 149 | 150 | # VS Code 151 | .vscode/ 152 | 153 | # MacOS files 154 | .DS_Store 155 | 156 | # Backup files 157 | *~ 158 | 159 | # Files that might appear anywhere in the directory 160 | .DS_Store 161 | **/.DS_Store 162 | .ipynb_checkpoints 163 | repl_state -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *main* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT No Attribution 2 | 3 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so. 10 | 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 13 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 14 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 15 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 16 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 17 | 18 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Generative AI Fine-tuning with Amazon SageMaker AI and Amazon Bedrock 2 | 3 | ## Welcome! 4 | 5 | Welcome to home of Generative AI Fine-tuning with Amazon SageMaker AI and Amazon Bedrock. This repository is a growing collection of generative AI samples which include workflows like, 6 | 1. Setting up a Foundation Model Playground on Amazon SageMaker AI 7 | 2. Customization of Foundation Models on Amazon SageMaker AI 8 | 3. Deploy, Evaluate and Monitor Foundation Models on Amazon SageMaker AI 9 | 4. Creating Bedrock Guardrails with a SageMaker Endpoint 10 | 5. Develop FMOps fine-tuning workflows with SageMaker Pipelines 11 | 12 | ## Security 13 | 14 | See [CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications) for more information. 15 | 16 | ## License 17 | 18 | This library is licensed under the MIT-0 License. See the LICENSE file. 19 | 20 | -------------------------------------------------------------------------------- /workshops/building-rag-workflows-with-sagemaker-and-bedrock/01-01_local-experimentation/utils/TokenCounterHandler.py: -------------------------------------------------------------------------------- 1 | from langchain.callbacks.base import BaseCallbackHandler 2 | from langchain.schema import LLMResult 3 | from typing import List, Dict, Any 4 | import tiktoken 5 | 6 | 7 | class TokenCounterHandler(BaseCallbackHandler): 8 | 9 | MODEL_ENCODING = "gpt-3.5-turbo" 10 | ENCODING = tiktoken.encoding_for_model(MODEL_ENCODING) 11 | 12 | def __init__(self, clear_report_on_chain_start=True): 13 | self.tokens = 0 14 | self.embedding_tokens = 0 15 | self.prompt_tokens = 0 16 | self.generation_tokens = 0 17 | 18 | def on_retriever_start(self, query: str, **kwargs): 19 | numtokens = len(self.ENCODING.encode(query)) 20 | self.tokens += numtokens 21 | self.embedding_tokens += numtokens 22 | 23 | 24 | def on_llm_start(self, serialized, prompts: List[str], **kwargs): 25 | for prompt in prompts: 26 | numtokens = len(self.ENCODING.encode(prompt)) 27 | self.tokens += numtokens 28 | self.prompt_tokens += numtokens 29 | 30 | def on_llm_end(self, response: LLMResult, **kwargs): 31 | 32 | for generation in response.generations: 33 | numtokens = len(self.ENCODING.encode(generation[0].text)) 34 | self.tokens += numtokens 35 | self.generation_tokens += numtokens 36 | 37 | def on_chain_end(self, outputs: Dict[str, Any], **kwargs): 38 | self.report() 39 | 40 | def clear_report(self): 41 | self.tokens = 0 42 | self.embedding_tokens = 0 43 | self.prompt_tokens = 0 44 | self.generation_tokens = 0 45 | 46 | def report(self): 47 | print(f"\nToken Counts:\nTotal: {self.tokens}\nEmbedding: N/A\nPrompt: {self.prompt_tokens}\nGeneration:{self.generation_tokens}\n") -------------------------------------------------------------------------------- /workshops/building-rag-workflows-with-sagemaker-and-bedrock/01-01_local-experimentation/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | """General helper utilities the workshop notebooks""" 4 | # Python Built-Ins: 5 | from io import StringIO 6 | import sys 7 | import textwrap 8 | 9 | 10 | def print_ww(*args, width: int = 100, **kwargs): 11 | """Like print(), but wraps output to `width` characters (default 100)""" 12 | buffer = StringIO() 13 | try: 14 | _stdout = sys.stdout 15 | sys.stdout = buffer 16 | print(*args, **kwargs) 17 | output = buffer.getvalue() 18 | finally: 19 | sys.stdout = _stdout 20 | for line in output.splitlines(): 21 | print("\n".join(textwrap.wrap(line, width=width))) 22 | -------------------------------------------------------------------------------- /workshops/building-rag-workflows-with-sagemaker-and-bedrock/02-01_sagemaker-opensearch-rag/images/LangfuseTraces.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/building-rag-workflows-with-sagemaker-and-bedrock/02-01_sagemaker-opensearch-rag/images/LangfuseTraces.png -------------------------------------------------------------------------------- /workshops/building-rag-workflows-with-sagemaker-and-bedrock/03-02_fine-tuning-embedding/images/eval2-31k_context-5k_sample.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/building-rag-workflows-with-sagemaker-and-bedrock/03-02_fine-tuning-embedding/images/eval2-31k_context-5k_sample.png -------------------------------------------------------------------------------- /workshops/building-rag-workflows-with-sagemaker-and-bedrock/03-02_fine-tuning-embedding/images/training-9000_test-20000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/building-rag-workflows-with-sagemaker-and-bedrock/03-02_fine-tuning-embedding/images/training-9000_test-20000.png -------------------------------------------------------------------------------- /workshops/building-rag-workflows-with-sagemaker-and-bedrock/03-02_fine-tuning-embedding/images/training-9000_test-50000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/building-rag-workflows-with-sagemaker-and-bedrock/03-02_fine-tuning-embedding/images/training-9000_test-50000.png -------------------------------------------------------------------------------- /workshops/building-rag-workflows-with-sagemaker-and-bedrock/03-02_fine-tuning-embedding/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers==4.40.2 2 | sentence-transformers==3.1.1 3 | datasets==2.19.2 4 | accelerate==1.1.0 5 | -------------------------------------------------------------------------------- /workshops/building-rag-workflows-with-sagemaker-and-bedrock/03-02_fine-tuning-embedding/scripts/trainer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import torch 4 | import argparse 5 | from datasets import load_dataset, concatenate_datasets 6 | from sentence_transformers import SentenceTransformer 7 | from sentence_transformers.losses import MatryoshkaLoss, MultipleNegativesRankingLoss 8 | from sentence_transformers.evaluation import InformationRetrievalEvaluator, SequentialEvaluator 9 | from sentence_transformers.util import cos_sim 10 | from sentence_transformers import SentenceTransformerTrainer 11 | from sentence_transformers.training_args import SentenceTransformerTrainingArguments 12 | from sentence_transformers.training_args import BatchSamplers 13 | 14 | def load_data(train_file, test_file): 15 | train_dataset = load_dataset("json", data_dir=train_file, split="train") 16 | test_dataset = load_dataset("json", data_dir=test_file, split="test") 17 | corpus_dataset = concatenate_datasets([train_dataset, test_dataset]) 18 | return train_dataset, test_dataset, corpus_dataset 19 | 20 | def prepare_ir_evaluator(test_dataset, corpus_dataset, matryoshka_dimensions): 21 | corpus = dict(zip(corpus_dataset["id"], corpus_dataset["context"])) 22 | queries = dict(zip(test_dataset["id"], test_dataset["question"])) 23 | relevant_docs = {q_id: [q_id] for q_id in queries} 24 | 25 | matryoshka_evaluators = [] 26 | for dim in matryoshka_dimensions: 27 | evaluator = InformationRetrievalEvaluator( 28 | queries=queries, 29 | corpus=corpus, 30 | relevant_docs=relevant_docs, 31 | name=f"dim_{dim}", 32 | truncate_dim=dim, 33 | score_functions={"cosine": cos_sim}, 34 | ) 35 | matryoshka_evaluators.append(evaluator) 36 | 37 | return SequentialEvaluator(matryoshka_evaluators) 38 | 39 | def main(args): 40 | print("Loading datasets...") 41 | train_dataset, test_dataset, corpus_dataset = load_data(args.train_data, args.validation_data) 42 | 43 | base_model_id_safe = args.model_name.replace("/", "_") 44 | output_dir = f"{args.model_output}/{base_model_id_safe}_ds={len(train_dataset)}_bs={args.batch_size}_e={args.epochs}" 45 | 46 | print("Loading model...") 47 | model = SentenceTransformer( 48 | args.model_name, 49 | model_kwargs={"attn_implementation": "eager"}, 50 | trust_remote_code=True 51 | ) 52 | 53 | print("Preparing loss function...") 54 | model_dim = model.get_sentence_embedding_dimension() 55 | matryoshka_dimensions = [dim for dim in [768, 512, 384, 256, 128, 64] if dim <= model_dim] 56 | # matryoshka_dimensions = [768, 512, 256, 128, 64] 57 | inner_train_loss = MultipleNegativesRankingLoss(model) 58 | train_loss = MatryoshkaLoss(model, inner_train_loss, matryoshka_dims=matryoshka_dimensions) 59 | 60 | print("Configuring evaluator...") 61 | evaluator = prepare_ir_evaluator(test_dataset, corpus_dataset, matryoshka_dimensions) 62 | 63 | print("Setting training arguments...") 64 | training_args = SentenceTransformerTrainingArguments( 65 | output_dir=output_dir, 66 | num_train_epochs=args.epochs, 67 | per_device_train_batch_size=args.batch_size, 68 | gradient_accumulation_steps=16, 69 | per_device_eval_batch_size=args.batch_size, 70 | warmup_ratio=0.1, 71 | learning_rate=2e-5, 72 | lr_scheduler_type="cosine", 73 | optim="adamw_torch_fused", 74 | tf32=True, 75 | bf16=True, 76 | batch_sampler=BatchSamplers.NO_DUPLICATES, 77 | evaluation_strategy="epoch", 78 | save_strategy="epoch", 79 | logging_steps=10, 80 | save_total_limit=3, 81 | load_best_model_at_end=True, 82 | metric_for_best_model="eval_dim_128_cosine_ndcg@10", 83 | ) 84 | 85 | print("Starting training...") 86 | trainer = SentenceTransformerTrainer( 87 | model=model, 88 | args=training_args, 89 | train_dataset=train_dataset.select_columns(["question", "context"]), 90 | loss=train_loss, 91 | evaluator=evaluator, 92 | ) 93 | 94 | trainer.train() 95 | trainer.save_model() 96 | 97 | if __name__ == "__main__": 98 | parser = argparse.ArgumentParser() 99 | parser.add_argument("--train_data", type=str, default="/opt/ml/input/data/train") 100 | parser.add_argument("--validation_data", type=str, default="/opt/ml/input/data/validation") 101 | parser.add_argument("--model_name", type=str, default="") 102 | parser.add_argument("--epochs", type=int, default=4) 103 | parser.add_argument("--batch_size", type=int, default=16) 104 | parser.add_argument("--model_output", type=str, default="/opt/ml/model") 105 | args = parser.parse_args() 106 | 107 | main(args) 108 | -------------------------------------------------------------------------------- /workshops/building-rag-workflows-with-sagemaker-and-bedrock/03-03_raft-customization/images/x1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/building-rag-workflows-with-sagemaker-and-bedrock/03-03_raft-customization/images/x1.png -------------------------------------------------------------------------------- /workshops/building-rag-workflows-with-sagemaker-and-bedrock/03-03_raft-customization/images/x5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/building-rag-workflows-with-sagemaker-and-bedrock/03-03_raft-customization/images/x5.png -------------------------------------------------------------------------------- /workshops/building-rag-workflows-with-sagemaker-and-bedrock/03-03_raft-customization/images/x6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/building-rag-workflows-with-sagemaker-and-bedrock/03-03_raft-customization/images/x6.png -------------------------------------------------------------------------------- /workshops/building-rag-workflows-with-sagemaker-and-bedrock/03-03_raft-customization/scripts/merge_model_adapter.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import random 4 | import torch 5 | from transformers import AutoTokenizer, TrainingArguments 6 | from transformers import ( 7 | AutoModelForCausalLM, 8 | AutoTokenizer, 9 | BitsAndBytesConfig, 10 | set_seed, 11 | ) 12 | from typing import Dict, Optional, Tuple 13 | import argparse 14 | from datasets import load_dataset 15 | from pprint import pprint 16 | 17 | import subprocess as sb 18 | 19 | def set_custom_env(env_vars: Dict[str, str]) -> None: 20 | """ 21 | Set custom environment variables. 22 | 23 | Args: 24 | env_vars (Dict[str, str]): A dictionary of environment variables to set. 25 | Keys are variable names, values are their corresponding values. 26 | 27 | Returns: 28 | None 29 | 30 | Raises: 31 | TypeError: If env_vars is not a dictionary. 32 | ValueError: If any key or value in env_vars is not a string. 33 | """ 34 | if not isinstance(env_vars, dict): 35 | raise TypeError("env_vars must be a dictionary") 36 | 37 | for key, value in env_vars.items(): 38 | if not isinstance(key, str) or not isinstance(value, str): 39 | raise ValueError("All keys and values in env_vars must be strings") 40 | 41 | os.environ.update(env_vars) 42 | 43 | # Optionally, print the updated environment variables 44 | print("Updated environment variables:") 45 | for key, value in env_vars.items(): 46 | print(f" {key}: {value}") 47 | 48 | def create_test_prompt(): 49 | dataset = load_dataset( 50 | "json", 51 | #data_files=os.path.join(args.testdata, "dataset.json"), 52 | data_dir=args.testdata, 53 | split="test" 54 | ) 55 | 56 | # Shuffle the dataset and select the first row 57 | random_row = dataset.shuffle().select(range(1))[0] 58 | 59 | return random_row 60 | 61 | # Generate in-memory inference 62 | def generate_text(model, prompt, max_length=2048, num_return_sequences=1): 63 | # Encode the input prompt 64 | 65 | device = "cuda" if torch.cuda.is_available() else "cpu" 66 | 67 | #model = model.to(device) 68 | 69 | tokenizer = AutoTokenizer.from_pretrained( 70 | args.basemodel if args.use_local else args.model_id, 71 | use_fast=True 72 | ) 73 | 74 | tokenizer.pad_token = tokenizer.eos_token 75 | 76 | tokenizer.save_pretrained("/opt/ml/model/merged/") 77 | 78 | prompt_input=prompt['prompt'].split("### Summary")[0] 79 | 80 | input_ids = tokenizer.encode(prompt_input, return_tensors="pt")#.to(device) 81 | 82 | # Generate text 83 | with torch.no_grad(): 84 | output = model.generate( 85 | input_ids, 86 | max_length=max_length, 87 | num_return_sequences=num_return_sequences, 88 | no_repeat_ngram_size=2, 89 | top_k=50, 90 | top_p=0.95, 91 | temperature=0.7 92 | ) 93 | 94 | # Decode and return the generated text 95 | generated_texts = [tokenizer.decode(seq, skip_special_tokens=True) for seq in output] 96 | 97 | return generated_texts 98 | 99 | # Merge the trained adapter with the base model and test it 100 | def merge_and_save_model(model_id, adapter_dir, output_dir): 101 | from peft import PeftModel 102 | 103 | ################## 104 | # Load Base Model 105 | ################## 106 | print("Trying to load a Peft model. It might take a while without feedback") 107 | base_model = AutoModelForCausalLM.from_pretrained( 108 | args.basemodel if args.use_local else model_id, 109 | low_cpu_mem_usage=True, 110 | torch_dtype=torch.float32, 111 | device_map="auto", 112 | # offload_folder="/opt/ml/model/" 113 | ) 114 | 115 | print("Loaded base model") 116 | 117 | ############################# 118 | # Run Inference - Base Model 119 | ############################# 120 | prompt=create_test_prompt() 121 | 122 | #pprint(f"*** Generating Inference on Base Model: {generate_text(base_model,prompt)}") 123 | 124 | base_model.config.use_cache = False 125 | 126 | ################ 127 | # Load Adapter 128 | ################ 129 | # Load the adapter 130 | peft_model = PeftModel.from_pretrained( 131 | base_model, 132 | adapter_dir, 133 | torch_dtype=torch.float32, # Set dtype to float16 134 | # offload_folder="/opt/ml/model/" 135 | ) 136 | 137 | ############################### 138 | # Merge Adapter and Base Model 139 | ############################### 140 | print("Loaded peft model") 141 | model = peft_model.merge_and_unload() 142 | print("Merge done") 143 | 144 | model.eval() 145 | model.active_adapters = "default" 146 | ############################# 147 | # Run Inference - Trained Model 148 | ############################# 149 | pprint(f"*** Generating Inference on Trained Model: {generate_text(model,prompt)}") 150 | 151 | os.makedirs(output_dir, exist_ok=True) 152 | 153 | ################################## 154 | # Save Merged Model and Tokenizer 155 | ################################## 156 | print(f"Saving the newly created merged model to {output_dir}") 157 | model.save_pretrained(output_dir, safe_serialization=True) 158 | base_model.config.save_pretrained(output_dir) 159 | 160 | # Parse CLI arguments passed by SageMaker Jobs 161 | def parse_arge(): 162 | 163 | parser = argparse.ArgumentParser() 164 | 165 | # infra configuration 166 | parser.add_argument("--adapterdir", type=str, default=os.environ["SM_CHANNEL_ADAPTER"]) 167 | parser.add_argument("--testdata", type=str, default=os.environ["SM_CHANNEL_TESTDATA"]) 168 | 169 | parser.add_argument("--basemodel", type=str, default=os.environ.get("SM_CHANNEL_BASEMODEL","")) 170 | parser.add_argument('--use_local', type=lambda x: str(x).lower() in ['true', '1', 't', 'y', 'yes'], help="A boolean flag") 171 | 172 | parser.add_argument("--model_id", type=str, default="meta-llama/Meta-Llama-3.1-8B") 173 | parser.add_argument("--hf_token", type=str, default="") 174 | parser.add_argument("--dataset_name", type=str, default="") 175 | 176 | args = parser.parse_known_args() 177 | 178 | return args 179 | 180 | if __name__ == "__main__": 181 | 182 | args, _ = parse_arge() 183 | 184 | custom_env: Dict[str, str] = {"HF_DATASETS_TRUST_REMOTE_CODE": "TRUE", 185 | "HF_TOKEN": args.hf_token 186 | } 187 | set_custom_env(custom_env) 188 | 189 | print("*****printing adapetrs") 190 | 191 | # Run the command to get the Adapter artifacts 192 | sb.run(["ls", "-ltr", args.adapterdir]) 193 | 194 | # launch training to merge trained adapaters with base model 195 | merge_and_save_model(args.model_id, args.adapterdir,"/opt/ml/model/merged/") 196 | -------------------------------------------------------------------------------- /workshops/building-rag-workflows-with-sagemaker-and-bedrock/03-03_raft-customization/scripts/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers==4.44.2 2 | datasets==2.18.0 3 | accelerate==0.33.0 4 | evaluate==0.4.1 5 | bitsandbytes==0.43.3 6 | huggingface_hub==0.23.2 7 | trl==0.9.6 8 | peft==0.12.0 9 | wandb 10 | py7zr 11 | mlflow==2.16.0 12 | sagemaker-mlflow==0.1.0 -------------------------------------------------------------------------------- /workshops/building-rag-workflows-with-sagemaker-and-bedrock/README.md: -------------------------------------------------------------------------------- 1 | # Building RAG workflows with Amazon SageMaker AI and Amazon Bedrock 2 | 3 | Welcome to **Building Retrieval Augmented Generation (RAG) Workflows with Amazon SageMaker and Amazon Bedrock**! 4 | 5 | In this workshop, you will journey through all the steps of building the GenAI components of a RAG application starting with experimentation and working your way through to repeatable and scalable components that will be part of production applications. 6 | 7 | ![](images/rag-mind-map.png) 8 | 9 | You'll start small, working in a SageMaker Studio environment with a basic dataset to get through the fundamentals of embedding models, vector databases, and RAG orchestration. Having a basic application, your focus will shift to understanding and measuring the objective quality of outputs using different techniques for vector search and model output evaluation. Afterwards, you will build external components that will be more scalable, critical to getting to PoC/Pilot phases. Once you have those components in place, you'll learn different techniques to optimize the performance and quality of the system through adjusting parameters and even seeing where fine-tuning your models can be beneficial. Upon meeting performance and quality KPIs, focus on real-world readiness by implementing safety and security measures, such as guardrails to protect inputs and outputs. 10 | -------------------------------------------------------------------------------- /workshops/building-rag-workflows-with-sagemaker-and-bedrock/SageMaker Training Hosting and Custom Model Import_V3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/building-rag-workflows-with-sagemaker-and-bedrock/SageMaker Training Hosting and Custom Model Import_V3.pdf -------------------------------------------------------------------------------- /workshops/building-rag-workflows-with-sagemaker-and-bedrock/images/rag-mind-map.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/building-rag-workflows-with-sagemaker-and-bedrock/images/rag-mind-map.png -------------------------------------------------------------------------------- /workshops/distributed-training-deployment-on-sagemaker-ai/README.md: -------------------------------------------------------------------------------- 1 | # Distributed Training and Deployment on SageMaker AI 2 | 3 | > This content is available in [Distributed Training and Deployment on SageMaker AI ]. The below represents a synopsis of the content you will find by following the provided link. 4 | > Welcome to the "Distributed Training and Deployment on SageMaker AI " workshop! This publicly available, hands-on experience is designed for data scientists who are ready to harness the power of Large Language Models (LLMs) and experiment different customization techniques on AWS. 5 | 6 | In this workshop, you'll dive into different fine-tuning techniques, deployment options, and evaluation by leveraging SageMaker AI capabilities! 7 | 8 | By the end of this workshop, you'll be able to: 9 | 10 | - Understand how to prepare datasets for different type of model customization techniques 11 | - Run fine-tuning workloads by leveraging SageMaker AI capabilities 12 | - Deploy and test your fine-tuned model 13 | 14 | ## Workshop Content 15 | 16 | 1. Solution 1: Large scale distributed training for Data/ML engineers using Amazon SageMaker JumpStart 17 | 2. Solution 2: Large scale distributed training for Resident Data Scientist using Amazon SageMaker Training 18 | 1. Option 1: Continued pre-training of LLMs using Amazon SageMaker Training 19 | 2. Option 2: Supervised fine-tuning of LLMs using Amazon SageMaker Training 20 | 3. Solution 3: Large scale distributed training for Researchers using Amazon SageMaker Hyperpod with EKS integration 21 | 22 | ## How to run the workshop 23 | 24 | This workshop follows a hands-on, self-paced format. Each module contains Jupyter notebooks and code that you'll run in your own JupyterLab or Code Editor environment. 25 | 26 | **⚠️ Important**: Solution 3 requires an Amazon SageMaker Hyperpod with EKS cluster up and running in your AWS account. 27 | 28 | - Step-by-step instructions and explanations 29 | - Code samples that you can run and modify 30 | - Links to additional resources 31 | -------------------------------------------------------------------------------- /workshops/distributed-training-deployment-on-sagemaker-ai/solution-2-sagemaker-training/option-1-continued-pre-training/scripts/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers==4.50.2 2 | peft==0.14.0 3 | accelerate==1.3.0 4 | bitsandbytes==0.45.1 5 | datasets==3.2.0 6 | evaluate==0.4.3 7 | huggingface_hub[hf_transfer] 8 | mlflow 9 | safetensors>=0.5.2 10 | sagemaker==2.244.0 11 | sagemaker-mlflow==0.1.0 12 | sentencepiece==0.2.0 13 | scikit-learn==1.6.1 14 | tokenizers>=0.21.0 15 | trl==0.9.6 16 | psutil 17 | py7zr 18 | pynvml 19 | -------------------------------------------------------------------------------- /workshops/distributed-training-deployment-on-sagemaker-ai/solution-2-sagemaker-training/option-2-supervised-fine-tuning/scripts/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers==4.50.2 2 | peft==0.14.0 3 | accelerate==1.3.0 4 | bitsandbytes==0.45.1 5 | datasets==3.2.0 6 | evaluate==0.4.3 7 | huggingface_hub[hf_transfer] 8 | mlflow 9 | safetensors>=0.5.2 10 | sagemaker==2.244.0 11 | sagemaker-mlflow==0.1.0 12 | sentencepiece==0.2.0 13 | scikit-learn==1.6.1 14 | tokenizers>=0.21.0 15 | trl==0.9.6 16 | psutil 17 | py7zr 18 | pynvml 19 | xtarfile 20 | rouge-score -------------------------------------------------------------------------------- /workshops/distributed-training-deployment-on-sagemaker-ai/solution-2-sagemaker-training/option-2-supervised-fine-tuning/scripts/rouge/rouge.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Evaluate Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ ROUGE metric from Google Research github repo. """ 15 | 16 | # The dependencies in https://github.com/google-research/google-research/blob/master/rouge/requirements.txt 17 | import absl # Here to have a nice missing dependency error message early on 18 | import datasets 19 | import nltk # Here to have a nice missing dependency error message early on 20 | import numpy # Here to have a nice missing dependency error message early on 21 | import six # Here to have a nice missing dependency error message early on 22 | from rouge_score import rouge_scorer, scoring 23 | 24 | import evaluate 25 | 26 | 27 | _CITATION = """\ 28 | @inproceedings{lin-2004-rouge, 29 | title = "{ROUGE}: A Package for Automatic Evaluation of Summaries", 30 | author = "Lin, Chin-Yew", 31 | booktitle = "Text Summarization Branches Out", 32 | month = jul, 33 | year = "2004", 34 | address = "Barcelona, Spain", 35 | publisher = "Association for Computational Linguistics", 36 | url = "https://www.aclweb.org/anthology/W04-1013", 37 | pages = "74--81", 38 | } 39 | """ 40 | 41 | _DESCRIPTION = """\ 42 | ROUGE, or Recall-Oriented Understudy for Gisting Evaluation, is a set of metrics and a software package used for 43 | evaluating automatic summarization and machine translation software in natural language processing. 44 | The metrics compare an automatically produced summary or translation against a reference or a set of references (human-produced) summary or translation. 45 | 46 | Note that ROUGE is case insensitive, meaning that upper case letters are treated the same way as lower case letters. 47 | 48 | This metrics is a wrapper around Google Research reimplementation of ROUGE: 49 | https://github.com/google-research/google-research/tree/master/rouge 50 | """ 51 | 52 | _KWARGS_DESCRIPTION = """ 53 | Calculates average rouge scores for a list of hypotheses and references 54 | Args: 55 | predictions: list of predictions to score. Each prediction 56 | should be a string with tokens separated by spaces. 57 | references: list of reference for each prediction. Each 58 | reference should be a string with tokens separated by spaces. 59 | rouge_types: A list of rouge types to calculate. 60 | Valid names: 61 | `"rouge{n}"` (e.g. `"rouge1"`, `"rouge2"`) where: {n} is the n-gram based scoring, 62 | `"rougeL"`: Longest common subsequence based scoring. 63 | `"rougeLsum"`: rougeLsum splits text using `"\n"`. 64 | See details in https://github.com/huggingface/datasets/issues/617 65 | use_stemmer: Bool indicating whether Porter stemmer should be used to strip word suffixes. 66 | use_aggregator: Return aggregates if this is set to True 67 | Returns: 68 | rouge1: rouge_1 (f1), 69 | rouge2: rouge_2 (f1), 70 | rougeL: rouge_l (f1), 71 | rougeLsum: rouge_lsum (f1) 72 | Examples: 73 | 74 | >>> rouge = evaluate.load('rouge') 75 | >>> predictions = ["hello there", "general kenobi"] 76 | >>> references = ["hello there", "general kenobi"] 77 | >>> results = rouge.compute(predictions=predictions, references=references) 78 | >>> print(results) 79 | {'rouge1': 1.0, 'rouge2': 1.0, 'rougeL': 1.0, 'rougeLsum': 1.0} 80 | """ 81 | 82 | 83 | class Tokenizer: 84 | """Helper class to wrap a callable into a class with a `tokenize` method as used by rouge-score.""" 85 | 86 | def __init__(self, tokenizer_func): 87 | self.tokenizer_func = tokenizer_func 88 | 89 | def tokenize(self, text): 90 | return self.tokenizer_func(text) 91 | 92 | 93 | @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION) 94 | class Rouge(evaluate.Metric): 95 | def _info(self): 96 | return evaluate.MetricInfo( 97 | description=_DESCRIPTION, 98 | citation=_CITATION, 99 | inputs_description=_KWARGS_DESCRIPTION, 100 | features=[ 101 | datasets.Features( 102 | { 103 | "predictions": datasets.Value("string", id="sequence"), 104 | "references": datasets.Sequence(datasets.Value("string", id="sequence")), 105 | } 106 | ), 107 | datasets.Features( 108 | { 109 | "predictions": datasets.Value("string", id="sequence"), 110 | "references": datasets.Value("string", id="sequence"), 111 | } 112 | ), 113 | ], 114 | codebase_urls=["https://github.com/google-research/google-research/tree/master/rouge"], 115 | reference_urls=[ 116 | "https://en.wikipedia.org/wiki/ROUGE_(metric)", 117 | "https://github.com/google-research/google-research/tree/master/rouge", 118 | ], 119 | ) 120 | 121 | def _compute( 122 | self, predictions, references, rouge_types=None, use_aggregator=True, use_stemmer=False, tokenizer=None 123 | ): 124 | if rouge_types is None: 125 | rouge_types = ["rouge1", "rouge2", "rougeL", "rougeLsum"] 126 | 127 | multi_ref = isinstance(references[0], list) 128 | 129 | if tokenizer is not None: 130 | tokenizer = Tokenizer(tokenizer) 131 | 132 | scorer = rouge_scorer.RougeScorer(rouge_types=rouge_types, use_stemmer=use_stemmer, tokenizer=tokenizer) 133 | if use_aggregator: 134 | aggregator = scoring.BootstrapAggregator() 135 | else: 136 | scores = [] 137 | 138 | for ref, pred in zip(references, predictions): 139 | if multi_ref: 140 | score = scorer.score_multi(ref, pred) 141 | else: 142 | score = scorer.score(ref, pred) 143 | if use_aggregator: 144 | aggregator.add_scores(score) 145 | else: 146 | scores.append(score) 147 | 148 | if use_aggregator: 149 | result = aggregator.aggregate() 150 | for key in result: 151 | result[key] = result[key].mid.fmeasure 152 | 153 | else: 154 | result = {} 155 | for key in scores[0]: 156 | result[key] = list(score[key].fmeasure for score in scores) 157 | 158 | return result -------------------------------------------------------------------------------- /workshops/distributed-training-deployment-on-sagemaker-ai/solution-3-sagemaker-hyperpod-k8/README.md: -------------------------------------------------------------------------------- 1 | ## Prerequisites 2 | 3 | - Amazon SageMaker Studio domain with a user profile 4 | - Access to Amazon SageMaker Hyperpod with EKS (Elastic Kubernetes Service) 5 | 6 | Please follow [SageMaker Studio + Hyperpod Integration](https://catalog.workshops.aws/sagemaker-hyperpod-eks/en-US/11-tips/08-studio-integration) 7 | 8 | ## Required Changes Before Running 9 | 10 | Before running the scripts, you need to make the following changes: 11 | 12 | ### 1. Copy `solution-3-sagemaker-hyperpod-k8` folder in the FSx for Lustre volume 13 | 14 | In order to leverage the integration through shared FSx for Lustre volume between Amazon SageMaker Studio and SageMaker Hyperpod, copy the folder and the content of `solution-3-sagemaker-hyperpod-k8` in the FSx for Lustre volume mounted on both SageMaker Studio and the Hyperpod cluster. 15 | 16 | ### 2. Update `args.yaml` 17 | 18 | Replace all instances of `` with your SageMaker Studio user profile name: 19 | 20 | ```yaml 21 | model_id: "/data//lab-hp-k8-sft/DeepSeek-R1-Distill-Qwen-7B" 22 | output_dir: "/data//lab-hp-k8-sft/model/" 23 | train_dataset_path: "/data//lab-hp-k8-sft/data/train/" 24 | test_dataset_path: "/data//lab-hp-k8-sft/data/test/" 25 | ``` 26 | 27 | ### 2. Update `pod-finetuning.yaml` 28 | 29 | Replace all instances of `` with your SageMaker Studio user profile name: 30 | 31 | ```yaml 32 | command: 33 | - /bin/bash 34 | - -c 35 | - | 36 | pip install -r /data//lab-hp-k8-sft/requirements.txt && \ 37 | torchrun \ 38 | --nnodes=2 \ 39 | --nproc_per_node=4 \ 40 | /data//lab-hp-k8-sft/scripts/train.py \ 41 | --config /data//lab-hp-k8-sft/args.yaml 42 | ``` 43 | -------------------------------------------------------------------------------- /workshops/distributed-training-deployment-on-sagemaker-ai/solution-3-sagemaker-hyperpod-k8/args.yaml: -------------------------------------------------------------------------------- 1 | model_id: "/data//solution-3-sagemaker-hyperpod-k8/DeepSeek-R1-Distill-Qwen-7B" # Hugging Face model id 2 | mlflow_uri: "" 3 | mlflow_experiment_name: "deepseek-r1-distill-qwen-7b-sft" 4 | # sagemaker specific parameters 5 | output_dir: "/data//solution-3-sagemaker-hyperpod-k8/model/" # path to where SageMaker will upload the model 6 | train_dataset_path: "/data//solution-3-sagemaker-hyperpod-k8/data/train/" # path to where FSx saves train dataset 7 | test_dataset_path: "/data//solution-3-sagemaker-hyperpod-k8/data/test/" # path to where FSx saves test dataset 8 | # training parameters 9 | lora_r: 8 10 | lora_alpha: 16 11 | lora_dropout: 0.1 12 | learning_rate: 2e-4 # learning rate scheduler 13 | num_train_epochs: 2 # number of training epochs 14 | per_device_train_batch_size: 2 # batch size per device during training 15 | per_device_eval_batch_size: 2 # batch size for evaluation 16 | gradient_accumulation_steps: 2 # number of steps before performing a backward/update pass 17 | gradient_checkpointing: true # use gradient checkpointing 18 | bf16: true # use bfloat16 precision 19 | tf32: false # use tf32 precision 20 | fsdp: "full_shard auto_wrap offload" 21 | fsdp_config: 22 | backward_prefetch: "backward_pre" 23 | cpu_ram_efficient_loading: true 24 | offload_params: true 25 | forward_prefetch: false 26 | use_orig_params: true 27 | merge_weights: true # merge weights in the base model 28 | -------------------------------------------------------------------------------- /workshops/distributed-training-deployment-on-sagemaker-ai/solution-3-sagemaker-hyperpod-k8/download_model.py: -------------------------------------------------------------------------------- 1 | from huggingface_hub import snapshot_download 2 | import os 3 | 4 | 5 | MODEL_ID = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B" 6 | 7 | 8 | def download_model(path, model_name): 9 | print("Downloading model ", model_name) 10 | 11 | os.makedirs(path, exist_ok=True) 12 | 13 | snapshot_download(repo_id=model_name, local_dir=path) 14 | 15 | print(f"Model {model_name} downloaded under {path}") 16 | 17 | 18 | if __name__ == "__main__": 19 | script_path = os.path.abspath(__file__) 20 | script_dir = os.path.dirname(script_path) 21 | script_dir = f"/mnt/custom-file-systems/{'/'.join(script_dir.split('/')[4:])}" 22 | 23 | download_model(f"{script_dir}/{MODEL_ID.split('/')[-1]}", MODEL_ID) 24 | -------------------------------------------------------------------------------- /workshops/distributed-training-deployment-on-sagemaker-ai/solution-3-sagemaker-hyperpod-k8/pod-finetuning.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: etcd 5 | spec: 6 | ports: 7 | - name: etcd-client-port 8 | port: 2379 9 | protocol: TCP 10 | targetPort: 2379 11 | selector: 12 | app: etcd 13 | 14 | --- 15 | apiVersion: apps/v1 16 | kind: Deployment 17 | metadata: 18 | labels: 19 | app: etcd 20 | name: etcd 21 | spec: 22 | replicas: 1 23 | selector: 24 | matchLabels: 25 | app: etcd 26 | template: 27 | metadata: 28 | labels: 29 | app: etcd 30 | spec: 31 | containers: 32 | - name: etcd 33 | command: ["/usr/local/bin/etcd"] 34 | args: 35 | - "--data-dir" 36 | - "/var/lib/etcd" 37 | - "--enable-v2" 38 | - "--listen-client-urls" 39 | - "http://0.0.0.0:2379" 40 | - "--advertise-client-urls" 41 | - "http://0.0.0.0:2379" 42 | - "--initial-cluster-state" 43 | - "new" 44 | image: quay.io/coreos/etcd:v3.5.19 45 | ports: 46 | - containerPort: 2379 47 | name: client 48 | protocol: TCP 49 | - containerPort: 2380 50 | name: server 51 | protocol: TCP 52 | restartPolicy: Always 53 | --- 54 | apiVersion: "kubeflow.org/v1" 55 | kind: PyTorchJob 56 | metadata: 57 | name: deepseek-r1-distill-qwen-7b-fine-tuning 58 | spec: 59 | elasticPolicy: 60 | rdzvBackend: etcd 61 | rdzvHost: etcd 62 | rdzvPort: 2379 63 | minReplicas: 1 64 | maxReplicas: 64 65 | maxRestarts: 100 66 | metrics: 67 | - type: Resource 68 | resource: 69 | name: cpu 70 | target: 71 | type: Utilization 72 | averageUtilization: 90 73 | pytorchReplicaSpecs: 74 | Worker: 75 | replicas: 2 76 | restartPolicy: OnFailure 77 | template: 78 | metadata: 79 | labels: 80 | app: deepseek-r1-distill-qwen-7b-fine-tuning 81 | spec: 82 | volumes: 83 | - name: shmem 84 | hostPath: 85 | path: /dev/shm 86 | - name: local 87 | hostPath: 88 | path: /mnt/k8s-disks/0 89 | - name: fsx-volume 90 | persistentVolumeClaim: 91 | claimName: fsx-claim 92 | serviceAccountName: eks-hyperpod-sa # Must match association 93 | containers: 94 | - name: pytorch 95 | image: 763104351884.dkr.ecr.us-east-1.amazonaws.com/pytorch-training:2.5.1-gpu-py311-cu124-ubuntu22.04-ec2 96 | imagePullPolicy: Always 97 | resources: 98 | requests: 99 | nvidia.com/gpu: 1 100 | vpc.amazonaws.com/efa: 1 101 | limits: 102 | nvidia.com/gpu: 1 103 | vpc.amazonaws.com/efa: 1 104 | env: 105 | - name: LOGLEVEL 106 | value: "DEBUG" 107 | - name: TORCH_DISTRIBUTED_DEBUG 108 | value: "DETAIL" 109 | - name: TORCH_NCCL_ENABLE_MONITORING 110 | value: "1" 111 | - name: TORCH_NCCL_TRACE_BUFFER_SIZE 112 | value: "20000" 113 | - name: TORCH_NCCL_DUMP_ON_TIMEOUT 114 | value: "1" 115 | - name: TORCH_NCCL_DEBUG_INFO_TEMP_FILE 116 | value: "/local/nccl_trace_rank_" 117 | - name: PYTORCH_CUDA_ALLOC_CONF 118 | value: "expandable_segments:True" 119 | - name: NCCL_DEBUG 120 | value: "INFO" 121 | - name: NCCL_SOCKET_IFNAME 122 | value: "^lo" 123 | - name: TORCH_NCCL_ASYNC_ERROR_HANDLING 124 | value: "1" 125 | command: 126 | - /bin/bash 127 | - -c 128 | - | 129 | pip install -r /data//solution-3-sagemaker-hyperpod-k8/requirements.txt && \ 130 | torchrun \ 131 | --nnodes=2 \ 132 | --nproc_per_node=4 \ 133 | /data//solution-3-sagemaker-hyperpod-k8/scripts/train.py \ 134 | --config /data//solution-3-sagemaker-hyperpod-k8/args.yaml 135 | volumeMounts: 136 | - name: shmem 137 | mountPath: /dev/shm 138 | - name: local 139 | mountPath: /local 140 | - name: fsx-volume 141 | mountPath: /data 142 | -------------------------------------------------------------------------------- /workshops/distributed-training-deployment-on-sagemaker-ai/solution-3-sagemaker-hyperpod-k8/requirements.txt: -------------------------------------------------------------------------------- 1 | python-etcd 2 | transformers==4.48.2 3 | peft==0.14.0 4 | accelerate==1.3.0 5 | bitsandbytes==0.45.1 6 | datasets==3.2.0 7 | evaluate==0.4.3 8 | huggingface_hub[hf_transfer] 9 | mlflow 10 | safetensors>=0.4.5 11 | sagemaker==2.243.0 12 | sagemaker-mlflow==0.1.0 13 | sentencepiece==0.2.0 14 | scikit-learn==1.6.1 15 | tokenizers>=0.21.0 16 | trl==0.9.6 17 | psutil 18 | py7zr 19 | pynvml 20 | wandb 21 | -------------------------------------------------------------------------------- /workshops/distributed-training-deployment-on-sagemaker-ai/solution-3-sagemaker-hyperpod-k8/scripts/dataprep.py: -------------------------------------------------------------------------------- 1 | from datasets import load_dataset, Dataset, DatasetDict 2 | import os 3 | import pandas as pd 4 | from random import randint 5 | from sklearn.model_selection import train_test_split 6 | from transformers import AutoTokenizer 7 | 8 | DATASET_NAME = "NousResearch/hermes-function-calling-v1" 9 | MODEL_ID = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B" 10 | HF_TOKEN = "" 11 | 12 | 13 | def read_dataset(dataset_name): 14 | dataset = load_dataset( 15 | dataset_name, data_files={"train": ["json-mode-agentic.json"]} 16 | ) 17 | 18 | df = pd.DataFrame(dataset["train"]) 19 | 20 | train, test = train_test_split(df, test_size=0.1, random_state=42) 21 | 22 | return train, test 23 | 24 | 25 | def prompt_format(df): 26 | train_dataset = Dataset.from_pandas(train) 27 | test_dataset = Dataset.from_pandas(test) 28 | 29 | for index, el in df.iterrows(): 30 | chat = tokenizer.apply_chat_template(el["conversations"], tokenize=False) 31 | 32 | dataset = DatasetDict({"train": train_dataset, "test": test_dataset}) 33 | 34 | train_dataset = dataset["train"].map( 35 | template_dataset, remove_columns=list(dataset["train"].features) 36 | ) 37 | 38 | test_dataset = dataset["test"].map( 39 | template_dataset, remove_columns=list(dataset["test"].features) 40 | ) 41 | 42 | return train_dataset, test_dataset 43 | 44 | 45 | def transform_conversation(conversation): 46 | transformed = [] 47 | for msg in conversation: 48 | # Create a new dictionary with the renamed keys 49 | new_msg = { 50 | "role": ( 51 | "user" 52 | if msg["from"] == "human" 53 | else "assistant" if msg["from"] == "gpt" else "system" 54 | ), 55 | "content": msg["value"], 56 | } 57 | transformed.append(new_msg) 58 | return transformed 59 | 60 | 61 | if __name__ == "__main__": 62 | if HF_TOKEN != "": 63 | os.environ.update({"HF_TOKEN": HF_TOKEN}) 64 | 65 | script_path = os.path.abspath(__file__) 66 | script_dir = os.path.dirname(script_path) 67 | parent_dir = os.path.dirname(script_dir) 68 | parent_dir = f"/mnt/custom-file-systems/{'/'.join(parent_dir.split('/')[4:])}" 69 | 70 | tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) 71 | 72 | train, test = read_dataset(DATASET_NAME) 73 | 74 | train["conversations"] = train["conversations"].apply(transform_conversation) 75 | test["conversations"] = test["conversations"].apply(transform_conversation) 76 | 77 | train["text"] = train["conversations"].apply( 78 | lambda x: tokenizer.apply_chat_template(x, tokenize=False) 79 | ) 80 | test["text"] = test["conversations"].apply( 81 | lambda x: tokenizer.apply_chat_template(x, tokenize=False) 82 | ) 83 | 84 | train = train[["text"]] 85 | test = test[["text"]] 86 | 87 | train_dataset = Dataset.from_pandas(train) 88 | test_dataset = Dataset.from_pandas(test) 89 | 90 | train_dataset.to_json(f"{parent_dir}/data/train/dataset.json", orient="records") 91 | test_dataset.to_json(f"{parent_dir}/data/test/dataset.json", orient="records") 92 | -------------------------------------------------------------------------------- /workshops/diy-agents-with-sagemaker-and-bedrock/0-setup/setup-sagemaker-endpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "09c6bf13", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "%pip install sagemaker boto3 litellm aiohttp -qU" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "id": "5eb5e6d3", 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "from IPython import get_ipython\n", 21 | "get_ipython().kernel.do_shutdown(True)" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "id": "bb51e51a", 27 | "metadata": {}, 28 | "source": [ 29 | "## Deploy the model from SageMaker JumpStart on a SageMaker Inference endpoint\n", 30 | "\n", 31 | "> Note: skip the cell below if you have already deployed your model." 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "id": "fd08268e", 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "from sagemaker.jumpstart.model import JumpStartModel\n", 42 | "from sagemaker.serializers import JSONSerializer\n", 43 | "from sagemaker.deserializers import JSONDeserializer\n", 44 | "from sagemaker.enums import EndpointType\n", 45 | "from sagemaker.compute_resource_requirements.resource_requirements import ResourceRequirements\n", 46 | "\n", 47 | "\n", 48 | "resources = ResourceRequirements(\n", 49 | " requests = {\n", 50 | " \"num_accelerators\": 4, # Number of accelerators required\n", 51 | " \"memory\": 96*1024, # Minimum memory required in Mb (required)\n", 52 | " \"copies\": 1,\n", 53 | " }\n", 54 | ")\n", 55 | "\n", 56 | "model = JumpStartModel(\n", 57 | " model_id=\"huggingface-llm-mistral-small-24B-Instruct-2501\", model_version=\"2.0.1\",\n", 58 | " instance_type=\"ml.g5.12xlarge\"\n", 59 | ")\n", 60 | "predictor = model.deploy(\n", 61 | " accept_eula=True,\n", 62 | " initial_instance_count=1,\n", 63 | " instance_type=\"ml.g5.12xlarge\",\n", 64 | " serializer=JSONSerializer(), deserializer=JSONDeserializer(),\n", 65 | " endpoint_type=EndpointType.INFERENCE_COMPONENT_BASED,\n", 66 | " resources=resources,\n", 67 | " managed_instance_scaling={\n", 68 | " \"MinInstanceCount\": 0,\n", 69 | " \"MaxInstanceCount\": 1\n", 70 | " }\n", 71 | ")" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": null, 77 | "id": "0e15b39c", 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [ 81 | "endpoint_name = predictor.endpoint_name\n", 82 | "component_name = predictor.component_name\n", 83 | "print(f\"Endpoint name: {endpoint_name}\")\n", 84 | "print(f\"Inference component name: {component_name}\")" 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "id": "ac12f8be", 90 | "metadata": {}, 91 | "source": [ 92 | "
\n", 93 | "⚠️ Note: deployment will take 5~7 minutes. Take note of the endpoint name and the inference component names, as they will be needed later.\n", 94 | "
" 95 | ] 96 | } 97 | ], 98 | "metadata": { 99 | "language_info": { 100 | "name": "python" 101 | } 102 | }, 103 | "nbformat": 4, 104 | "nbformat_minor": 5 105 | } 106 | -------------------------------------------------------------------------------- /workshops/diy-agents-with-sagemaker-and-bedrock/1-inference/2-inference-sagemaker.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "8038eb9f-d925-437b-8f2e-e9b4e78c8976", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "%pip install sagemaker boto3 litellm aiohttp -qU" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "id": "681fe3e4", 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "from IPython import get_ipython\n", 21 | "get_ipython().kernel.do_shutdown(True)" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "id": "18881de7-0fc6-4490-9992-a12fd05da7eb", 27 | "metadata": {}, 28 | "source": [ 29 | "## Inference with Amazon SageMaker AI" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": null, 35 | "id": "67e0d089-1492-4615-b952-d12e96278dcb", 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "import boto3\n", 40 | "from sagemaker.session import Session\n", 41 | "from sagemaker.predictor import Predictor\n", 42 | "from sagemaker.serializers import JSONSerializer\n", 43 | "from sagemaker.deserializers import JSONDeserializer\n", 44 | "\n", 45 | "endpoint_name = \"YOUR-ENDPOINT-NAME-HERE\"\n", 46 | "component_name = \"YOUR-INFERENCE-COMPONENT-NAME-HERE\"\n", 47 | "\n", 48 | "boto_session = boto3.session.Session(region_name=boto3.Session().region_name)\n", 49 | "session = Session(boto_session=boto_session)\n", 50 | "\n", 51 | "predictor = Predictor(\n", 52 | " sagemaker_session=session,\n", 53 | " endpoint_name=endpoint_name, component_name=component_name,\n", 54 | " serializer=JSONSerializer(), deserializer=JSONDeserializer()\n", 55 | ")" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "id": "6a99d49a-6b06-49cb-b32c-412e4a0a6e44", 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "%%time\n", 66 | "prompt = \"What is the town of Bari, Italy, known for?\"\n", 67 | "payload = {\n", 68 | " \"messages\": [\n", 69 | " {\n", 70 | " \"role\": \"user\",\n", 71 | " \"content\": prompt\n", 72 | " }\n", 73 | " ],\n", 74 | " \"max_tokens\": 4*1024,\n", 75 | " \"temperature\": 0.1,\n", 76 | " \"top_p\": 0.9,\n", 77 | "}\n", 78 | "\n", 79 | "response = predictor.predict(payload)\n", 80 | "print(response['choices'][0]['message']['content'])" 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "id": "9712cd57-f8d8-4f60-8813-91e2951092cb", 86 | "metadata": {}, 87 | "source": [ 88 | "### Using Boto3" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "id": "a5e86c2f-b0cf-428e-b06b-4ad74637c1a6", 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [ 98 | "%%time\n", 99 | "import boto3\n", 100 | "import json\n", 101 | "\n", 102 | "payload = {\n", 103 | " \"inputs\": \"What is the town of Bari, Italy, known for? Provide a short answer.\",\n", 104 | " \"parameters\": {\n", 105 | " \"max_new_tokens\": 4*1024,\n", 106 | " \"top_p\": 0.9,\n", 107 | " \"temperature\": 0.2,\n", 108 | " }\n", 109 | "}\n", 110 | "\n", 111 | "runtime = boto3.client('sagemaker-runtime', region_name=boto3.Session().region_name)\n", 112 | "response = runtime.invoke_endpoint(\n", 113 | " EndpointName=endpoint_name,\n", 114 | " InferenceComponentName=component_name or None,\n", 115 | " ContentType='application/json',\n", 116 | " Body=json.dumps(payload)\n", 117 | ")\n", 118 | "\n", 119 | "result = json.loads(response['Body'].read().decode())\n", 120 | "print(result['generated_text'])" 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "id": "9fe7fd8e-de2c-4353-8ea9-18455efc7db7", 126 | "metadata": {}, 127 | "source": [ 128 | "### Using Boto3 and the Messages API (for compatible models only)" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": null, 134 | "id": "fcff55c3-5610-4298-9880-a83668c34a63", 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [ 138 | "%%time\n", 139 | "payload = {\n", 140 | " \"messages\": [\n", 141 | " {\"role\": \"system\", \"content\": \"You are a helpful and honest assistant.\"},\n", 142 | " {\"role\": \"user\", \"content\": \"What is the town of Bari, Italy, known for? Provide a short answer.\"}\n", 143 | " ],\n", 144 | " \"max_tokens\": 4*1024,\n", 145 | " \"top_p\": 0.9,\n", 146 | " \"temperature\": 0.6,\n", 147 | "}\n", 148 | "\n", 149 | "response = runtime.invoke_endpoint(\n", 150 | " EndpointName=endpoint_name,\n", 151 | " InferenceComponentName=component_name,\n", 152 | " ContentType='application/json',\n", 153 | " Body=json.dumps(payload)\n", 154 | ")\n", 155 | "\n", 156 | "result = json.loads(response['Body'].read().decode())\n", 157 | "print(result['choices'][0]['message'])" 158 | ] 159 | }, 160 | { 161 | "cell_type": "markdown", 162 | "id": "0ee0c9e7-c4b4-4401-a666-0f561bb8f24e", 163 | "metadata": {}, 164 | "source": [ 165 | "## Using LiteLLM" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": null, 171 | "id": "0c3f2f8e-e740-46b8-b136-666de9613c13", 172 | "metadata": {}, 173 | "outputs": [], 174 | "source": [ 175 | "from litellm import completion\n", 176 | "\n", 177 | "\n", 178 | "response = completion(\n", 179 | " model=f\"sagemaker/{endpoint_name}\", \n", 180 | " model_id=component_name,\n", 181 | " messages=[\n", 182 | " {\"role\": \"system\", \"content\": \"You are a helpful and honest assistant.\"},\n", 183 | " {\"role\": \"user\", \"content\": \"What is the town of Bari, Italy, known for? Provide a short answer.\"}\n", 184 | " ],\n", 185 | " temperature=0.2,\n", 186 | " max_tokens=1024\n", 187 | ")\n", 188 | "response.choices[0].message.content" 189 | ] 190 | }, 191 | { 192 | "cell_type": "markdown", 193 | "id": "361da444", 194 | "metadata": {}, 195 | "source": [ 196 | "
\n", 197 | "⚠️ Important: as of LiteLLM v1.67.2, `sagemaker_chat` provider does not not correctly pass the inference component name, causing `HTTPStatusError: Client error '400 Bad Request'`. Please use `sagemaker` provider instead.\n", 198 | "
" 199 | ] 200 | } 201 | ], 202 | "metadata": { 203 | "kernelspec": { 204 | "display_name": ".venv", 205 | "language": "python", 206 | "name": "python3" 207 | }, 208 | "language_info": { 209 | "codemirror_mode": { 210 | "name": "ipython", 211 | "version": 3 212 | }, 213 | "file_extension": ".py", 214 | "mimetype": "text/x-python", 215 | "name": "python", 216 | "nbconvert_exporter": "python", 217 | "pygments_lexer": "ipython3", 218 | "version": "3.12.9" 219 | } 220 | }, 221 | "nbformat": 4, 222 | "nbformat_minor": 5 223 | } 224 | -------------------------------------------------------------------------------- /workshops/diy-agents-with-sagemaker-and-bedrock/1-inference/README.md: -------------------------------------------------------------------------------- 1 | # Inference with Amazon Bedrock and Amazon SageMaker AI 2 | 3 | By running the notebooks in this folder, you will learn: 4 | 5 | - how to invoke an Amazon Bedrock model using AWS SDK for Python (`boto3`) 6 | - how to invoke an Amazon Bedrock model using LiteLLM 7 | - how to invoke a model hosted on Amazon SageMaker AI inference endpoints using AWS SDK for Python (`boto3`) 8 | - how to invoke a model hosted on Amazon SageMaker AI inference endpoints using the Amazon SageMaker Python SDK 9 | - how to invoke a model hosted on Amazon SageMaker AI inference endpoints using LiteLLM 10 | -------------------------------------------------------------------------------- /workshops/diy-agents-with-sagemaker-and-bedrock/2-tool-calling/2-tool-calling-sagemaker.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "425e8538", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "%pip install sagemaker boto3 sagemaker litellm aiohttp -qU" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "id": "d2c66240", 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "from IPython import get_ipython\n", 21 | "get_ipython().kernel.do_shutdown(True)" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "id": "5fbb3aca", 27 | "metadata": {}, 28 | "source": [ 29 | "# Tool calling with Amazon SageMaker AI\n", 30 | "\n", 31 | "
\n", 32 | "
Make sure you've deployed the model according to the previous lab before proceeding.
\n", 33 | "
\n", 34 | "\n", 35 | "Amazon SageMaker AI APIs do not natively support tool calling. To achieve this, we have to embed the tool definition in the prompt we send to the model. We recommend using models that have been fine-tuned for function calling in order to make sure tool calling works as expected." 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "id": "d9e893fb", 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "try: \n", 46 | " predictor\n", 47 | "except:\n", 48 | " import boto3\n", 49 | " from sagemaker.session import Session\n", 50 | " from sagemaker.predictor import Predictor\n", 51 | " from sagemaker.serializers import JSONSerializer\n", 52 | " from sagemaker.deserializers import JSONDeserializer\n", 53 | " \n", 54 | " endpoint_name = input(\"> Enter your endpoint name: \")\n", 55 | " component_name = input(\"> Enter your inference component name (leave empty if not using a component): \") or None\n", 56 | "\n", 57 | " boto_session = boto3.session.Session(region_name=boto3.Session().region_name)\n", 58 | " session = Session(boto_session=boto_session)\n", 59 | " \n", 60 | " predictor = Predictor(\n", 61 | " sagemaker_session=session,\n", 62 | " endpoint_name=endpoint_name, component_name=component_name,\n", 63 | " serializer=JSONSerializer(), deserializer=JSONDeserializer()\n", 64 | " )" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "id": "2a5a2ad1-b58a-42e3-a5b4-6e44fd2b2cce", 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [ 74 | "def get_top_song(sign):\n", 75 | " \"\"\"Returns the most popular song for the requested station.\n", 76 | " Args:\n", 77 | " call_sign (str): The call sign for the station for which you want\n", 78 | " the most popular song.\n", 79 | "\n", 80 | " Returns:\n", 81 | " response (json): The most popular song and artist.\n", 82 | " \"\"\"\n", 83 | "\n", 84 | " song = \"\"\n", 85 | " artist = \"\"\n", 86 | " if sign == 'WZPZ':\n", 87 | " song = \"Elemental Hotel\"\n", 88 | " artist = \"8 Storey Hike\"\n", 89 | "\n", 90 | " else:\n", 91 | " raise Exception(f\"Station {sign} not found.\")\n", 92 | "\n", 93 | " return {\n", 94 | " \"song\": song,\n", 95 | " \"artist\": artist\n", 96 | " }" 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "id": "61008585-1216-4b43-9cf6-cbeef6133a9c", 102 | "metadata": {}, 103 | "source": [ 104 | "In order for the LLM to know that it can use this tool, we have to pass the tool definition to the LLM." 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": null, 110 | "id": "8dfe3413-cb63-47b1-8050-88170e6c6fbf", 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [ 114 | "tools = [\n", 115 | " {\n", 116 | " \"type\": \"function\",\n", 117 | " \"function\": {\n", 118 | " \"name\": \"get_top_song\",\n", 119 | " \"description\": \"Get the most popular song played on a radio station.\",\n", 120 | " \"parameters\": {\n", 121 | " \"type\": \"object\",\n", 122 | " \"properties\": {\n", 123 | " \"sign\": {\n", 124 | " \"type\": \"string\",\n", 125 | " \"description\": \"The call sign for the radio station for which you want the most popular song. Example calls signs are WZPZ and WKRP.\"\n", 126 | " }\n", 127 | " },\n", 128 | " \"required\": [\"sign\"],\n", 129 | " },\n", 130 | " },\n", 131 | " }\n", 132 | "]" 133 | ] 134 | }, 135 | { 136 | "cell_type": "markdown", 137 | "id": "087c42f5-f61a-49f9-8225-49c15ffca562", 138 | "metadata": {}, 139 | "source": [ 140 | "Now we can start conversing with the model." 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": null, 146 | "id": "061cd5f3-4b56-4670-ab13-56f7ea0be237", 147 | "metadata": {}, 148 | "outputs": [], 149 | "source": [ 150 | "input_text = \"What is the most popular song on WZPZ?\"" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": null, 156 | "id": "b72afc20-d847-48e3-96e1-1abc9e4252a1", 157 | "metadata": {}, 158 | "outputs": [], 159 | "source": [ 160 | "from datetime import datetime\n", 161 | "\n", 162 | "system_prompt = \"\"\"\\\n", 163 | "You are an AI assistant, created by AWS and powered by Amazon SageMaker AI.\n", 164 | "Your goal is to help the user by answering their questions honestly, helpfully and truthfully.\n", 165 | "The current date is {currentDateTime} .\n", 166 | "\n", 167 | "Follow these principles when responding to queries:\n", 168 | "1. Avoid tool calls if not needed\n", 169 | "2. If uncertain, answer normally and offer to use tools\n", 170 | "3. Always use the best tools for the query\n", 171 | "\"\"\"\n", 172 | "messages = [\n", 173 | " {'role':'system', 'content':system_prompt.format(currentDateTime=datetime.now())},\n", 174 | " {'role':'user', 'content':input_text}\n", 175 | "]\n", 176 | "payload = {'messages': messages, 'max_tokens': 4*1024, 'tools':tools, 'tool_choice':'auto'}" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": null, 182 | "id": "257853dd-365e-4843-bf10-394807aead0d", 183 | "metadata": { 184 | "scrolled": true 185 | }, 186 | "outputs": [], 187 | "source": [ 188 | "import boto3, json\n", 189 | "\n", 190 | "sagemaker_runtime = boto3.client(\"sagemaker-runtime\", region_name=boto3.Session().region_name)\n", 191 | "response = sagemaker_runtime.invoke_endpoint(\n", 192 | " EndpointName=endpoint_name,\n", 193 | " InferenceComponentName=component_name or None,\n", 194 | " ContentType=\"application/json\",\n", 195 | " Body=json.dumps(payload)\n", 196 | ")\n", 197 | "output = json.loads(response['Body'].read().decode())\n", 198 | "output" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": null, 204 | "id": "f3903050-6e51-43d4-a366-7cc6955d29fc", 205 | "metadata": {}, 206 | "outputs": [], 207 | "source": [ 208 | "clean_message = {k: v or \"Thinking ...\" for k, v in output['choices'][0]['message'].items() if k in ['role', 'content']}\n", 209 | "messages.append(clean_message)\n", 210 | "messages" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": null, 216 | "id": "78effb0c-13f8-497a-a14f-733702423161", 217 | "metadata": {}, 218 | "outputs": [], 219 | "source": [ 220 | "# If stop_reason == \"tool_calls\", then you need to perform tool calling!\n", 221 | "stop_reason = output['choices'][0]['finish_reason']\n", 222 | "tool_calls = output['choices'][0]['message']['tool_calls']\n", 223 | "stop_reason, tool_calls" 224 | ] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": null, 229 | "id": "43c11d16-7a35-4cad-8652-bda29d9b6df1", 230 | "metadata": {}, 231 | "outputs": [], 232 | "source": [ 233 | "import sys\n", 234 | "if stop_reason == \"tool_calls\":\n", 235 | " tool_calls = output['choices'][0]['message']['tool_calls']\n", 236 | " for tool_call in tool_calls:\n", 237 | " if tool_call['type'] == 'function':\n", 238 | " name = tool_call['function']['name']\n", 239 | " args = json.loads(tool_call['function']['arguments'])\n", 240 | " # Execute the function with name from tool_call['function']['name']\n", 241 | " tool_foo = getattr(sys.modules[__name__], name)\n", 242 | " output = tool_foo(**args)\n", 243 | " output" 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": null, 249 | "id": "67923d59-5040-49d5-a231-cc7e3ffd21ff", 250 | "metadata": {}, 251 | "outputs": [], 252 | "source": [ 253 | "tool_result_message = {\n", 254 | " \"role\": \"user\", \"content\": json.dumps(output)\n", 255 | "}\n", 256 | "messages.append(tool_result_message)" 257 | ] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "execution_count": null, 262 | "id": "3536c762", 263 | "metadata": {}, 264 | "outputs": [], 265 | "source": [ 266 | "messages" 267 | ] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": null, 272 | "id": "deeab939-6c51-4698-872d-70a29faca6f4", 273 | "metadata": {}, 274 | "outputs": [], 275 | "source": [ 276 | "payload = {'messages': messages, 'max_tokens': 4*1024}\n", 277 | "response = sagemaker_runtime.invoke_endpoint(\n", 278 | " EndpointName=endpoint_name,\n", 279 | " InferenceComponentName=component_name or None,\n", 280 | " ContentType=\"application/json\",\n", 281 | " Body=json.dumps(payload)\n", 282 | ")\n", 283 | "output = json.loads(response['Body'].read().decode())\n", 284 | "output" 285 | ] 286 | }, 287 | { 288 | "cell_type": "code", 289 | "execution_count": null, 290 | "id": "a4e4f552", 291 | "metadata": {}, 292 | "outputs": [], 293 | "source": [] 294 | } 295 | ], 296 | "metadata": { 297 | "kernelspec": { 298 | "display_name": ".venv", 299 | "language": "python", 300 | "name": "python3" 301 | }, 302 | "language_info": { 303 | "codemirror_mode": { 304 | "name": "ipython", 305 | "version": 3 306 | }, 307 | "file_extension": ".py", 308 | "mimetype": "text/x-python", 309 | "name": "python", 310 | "nbconvert_exporter": "python", 311 | "pygments_lexer": "ipython3", 312 | "version": "3.12.9" 313 | } 314 | }, 315 | "nbformat": 4, 316 | "nbformat_minor": 5 317 | } 318 | -------------------------------------------------------------------------------- /workshops/diy-agents-with-sagemaker-and-bedrock/2-tool-calling/README.md: -------------------------------------------------------------------------------- 1 | # Tool Calling with Amazon Bedrock and Amazon SageMaker AI 2 | 3 | By running the notebooks in this folder, you will learn: 4 | 5 | - how to perform tool calling with an Amazon Bedrock model using AWS SDK for Python (`boto3`) 6 | - how to perform tool calling with an Amazon Bedrock model using LiteLLM 7 | - how to perform tool calling with a model hosted on Amazon SageMaker AI inference endpoints using AWS SDK for Python (`boto3`) 8 | - how to invoke a model hosted on Amazon SageMaker AI inference endpoints using the Amazon SageMaker Python SDK 9 | -------------------------------------------------------------------------------- /workshops/diy-agents-with-sagemaker-and-bedrock/3-agent-patterns/README.md: -------------------------------------------------------------------------------- 1 | # Agentic Workflow Patterns 2 | 3 | Agentic workflows represent a sophisticated approach to task automation that combines the power of LLMs with structured processes and tool integration. This section explores the fundamental characteristics and patterns that make agentic workflows effective. 4 | -------------------------------------------------------------------------------- /workshops/diy-agents-with-sagemaker-and-bedrock/4-frameworks/README.md: -------------------------------------------------------------------------------- 1 | # Using open-source frameworks 2 | 3 | In this lab, you will learn how to use open-source frameworks with Amazon Bedrock and Amazon SageMaker AI to build autonomous agents. 4 | -------------------------------------------------------------------------------- /workshops/diy-agents-with-sagemaker-and-bedrock/4-frameworks/agno-ai/agno-ai-logistics.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Getting Started with Agno AI\n", 8 | "\n", 9 | "[Agno](https://www.agno.com/) is a lightweight library for building Agents with memory, knowledge, tools and reasoning.\n", 10 | "\n", 11 | "Developers use Agno to build Reasoning Agents, Multimodal Agents, Teams of Agents and Agentic Workflows. Agno also provides a beautiful UI to chat with your Agents, pre-built FastAPI routes to serve your Agents and tools to monitor and evaluate their performance.\n", 12 | "\n", 13 | "This notebook will guide you through the basics of using the Agno AI library to create an agent that can handle logistics queries.\n", 14 | "\n", 15 | "## Table of Contents\n", 16 | "1. [Importing Libraries](#importing-libraries)\n", 17 | "2. [Defining Sample Data](#defining-sample-data)\n", 18 | "3. [Creating Tools](#creating-tools)\n", 19 | "4. [Creating the Agent](#creating-the-agent)\n", 20 | "5. [Testing the Agent](#testing-the-agent)" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "### Importing Libraries\n", 28 | "First, we need to import the necessary libraries." 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "%pip install agno boto3 --quiet --upgrade" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 1, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "import re\n", 47 | "from itertools import permutations\n", 48 | "from agno.agent import Agent\n", 49 | "from agno.models.aws import AwsBedrock\n", 50 | "import boto3" 51 | ] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": {}, 56 | "source": [ 57 | "### Defining Sample Data\n", 58 | "Next, we define some sample data for shipments and distances." 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 2, 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "tracking_data = {\n", 68 | " \"TRK12345\": \"In transit at Toronto distribution center\",\n", 69 | " \"TRK98765\": \"Delivered on 2025-03-09 10:24\",\n", 70 | " \"TRK55555\": \"Out for delivery - last scanned at Vancouver hub\"\n", 71 | "}\n", 72 | "\n", 73 | "distance_matrix = {\n", 74 | " \"Warehouse\": {\"A\": 10, \"B\": 15, \"C\": 20},\n", 75 | " \"A\": {\"Warehouse\": 10, \"B\": 12, \"C\": 5},\n", 76 | " \"B\": {\"Warehouse\": 15, \"A\": 12, \"C\": 8},\n", 77 | " \"C\": {\"Warehouse\": 20, \"A\": 5, \"B\": 8}\n", 78 | "}" 79 | ] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "metadata": {}, 84 | "source": [ 85 | "### Creating Tools\n", 86 | "We create two tools: `TrackingTool` and `RouteTool`." 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 3, 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [ 95 | "class TrackingTool:\n", 96 | " def __init__(self):\n", 97 | " self.name = \"TrackingTool\"\n", 98 | " self.description = \"Provides shipment status updates given a tracking ID.\"\n", 99 | "\n", 100 | " def run(self, query: str) -> str:\n", 101 | " match = re.search(r\"\\bTRK\\d+\\b\", query.upper())\n", 102 | " if not match:\n", 103 | " return \"Please provide a valid tracking ID.\"\n", 104 | " tid = match.group(0)\n", 105 | " status = tracking_data.get(tid)\n", 106 | " return f\"Status for {tid}: {status}\" if status else f\"No information for {tid}.\"\n", 107 | "\n", 108 | "class RouteTool:\n", 109 | " def __init__(self):\n", 110 | " self.name = \"RouteTool\"\n", 111 | " self.description = \"Computes the best delivery route given a start and destinations.\"\n", 112 | "\n", 113 | " def run(self, query: str) -> str:\n", 114 | " m = re.search(r\"from\\s+([\\w\\s]+)\\s+to\\s+(.+)\", query, re.IGNORECASE)\n", 115 | " if not m:\n", 116 | " return \"Specify route as 'from to , , ...'.\"\n", 117 | " origin = m.group(1).strip()\n", 118 | " dests = [d.strip() for d in re.split(r\",| and \", m.group(2)) if d.strip()]\n", 119 | "\n", 120 | " if origin not in distance_matrix:\n", 121 | " return f\"Unknown origin: {origin}.\"\n", 122 | " for loc in dests:\n", 123 | " if loc not in distance_matrix:\n", 124 | " return f\"Unknown destination: {loc}.\"\n", 125 | "\n", 126 | " best_distance = float('inf')\n", 127 | " best_order = None\n", 128 | " for perm in permutations(dests):\n", 129 | " total = 0\n", 130 | " cur = origin\n", 131 | " for nxt in perm:\n", 132 | " total += distance_matrix[cur][nxt]\n", 133 | " cur = nxt\n", 134 | " if total < best_distance:\n", 135 | " best_distance = total\n", 136 | " best_order = perm\n", 137 | " route_plan = \" -> \".join([origin] + list(best_order)) if best_order else origin\n", 138 | " return f\"Optimal route: {route_plan} (Total distance: {best_distance} km)\"" 139 | ] 140 | }, 141 | { 142 | "cell_type": "markdown", 143 | "metadata": {}, 144 | "source": [ 145 | "### Creating the Agent\n", 146 | "Now, we create the agent using the AWS Bedrock model and the tools we defined." 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": 4, 152 | "metadata": {}, 153 | "outputs": [], 154 | "source": [ 155 | "boto3_session = boto3.Session(region_name=\"us-west-2\")\n", 156 | "agent = Agent(\n", 157 | " model=AwsBedrock(\n", 158 | " session=boto3_session,\n", 159 | " id=\"us.amazon.nova-pro-v1:0\",\n", 160 | " max_tokens=4096\n", 161 | " ),\n", 162 | " description=\"You are a knowledgeable logistics assistant.\",\n", 163 | " instructions=[\n", 164 | " \"If the user asks about a shipment or tracking ID, use the TrackingTool.\",\n", 165 | " \"If the user asks about route optimization or best route, use the RouteTool.\",\n", 166 | " \"Provide concise and clear answers, including relevant details from the tools.\"\n", 167 | " ],\n", 168 | " tools=[TrackingTool(), RouteTool()],\n", 169 | " show_tool_calls=False,\n", 170 | " markdown=True\n", 171 | ")" 172 | ] 173 | }, 174 | { 175 | "cell_type": "markdown", 176 | "metadata": {}, 177 | "source": [ 178 | "### Testing the Agent\n", 179 | "Finally, we test the agent with some sample queries." 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": 7, 185 | "metadata": {}, 186 | "outputs": [ 187 | { 188 | "name": "stdout", 189 | "output_type": "stream", 190 | "text": [ 191 | "**Shipment Status for TRK12345**\n", 192 | "\n", 193 | "- **Current Status:** In Transit\n", 194 | "- **Last Scanned:** 2023-10-05 14:30\n", 195 | "- **Expected Delivery:** 2023-10-08\n", 196 | "- **Location:** Sorting Facility, Chicago, IL\n", 197 | "\n", 198 | "**Details:**\n", 199 | "- The package is currently at the sorting facility in Chicago, IL, and is expected to be delivered by October 8th.\n" 200 | ] 201 | } 202 | ], 203 | "source": [ 204 | "# Test tracking query\n", 205 | "print(agent.run(\"Where is shipment TRK12345?\").content)" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": 8, 211 | "metadata": {}, 212 | "outputs": [ 213 | { 214 | "name": "stdout", 215 | "output_type": "stream", 216 | "text": [ 217 | "To find the best route from Warehouse to locations A, B, and C, I will use the RouteTool.\n", 218 | "\n", 219 | "**Route Optimization Results:**\n", 220 | "\n", 221 | "- **Starting Point:** Warehouse\n", 222 | "- **Destinations:** A, B, C\n", 223 | "\n", 224 | "**Optimized Route:**\n", 225 | "1. Warehouse → A\n", 226 | "2. A → B\n", 227 | "3. B → C\n", 228 | "\n", 229 | "**Total Distance:** Approximately 150 miles\n", 230 | "\n", 231 | "This route minimizes travel time and distance.\n" 232 | ] 233 | } 234 | ], 235 | "source": [ 236 | "\n", 237 | "# Test route optimization query\n", 238 | "print(agent.run(\"Find the best route from Warehouse to A, B and C\").content)" 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": null, 244 | "metadata": {}, 245 | "outputs": [], 246 | "source": [] 247 | } 248 | ], 249 | "metadata": { 250 | "kernelspec": { 251 | "display_name": ".venv", 252 | "language": "python", 253 | "name": "python3" 254 | }, 255 | "language_info": { 256 | "codemirror_mode": { 257 | "name": "ipython", 258 | "version": 3 259 | }, 260 | "file_extension": ".py", 261 | "mimetype": "text/x-python", 262 | "name": "python", 263 | "nbconvert_exporter": "python", 264 | "pygments_lexer": "ipython3", 265 | "version": "3.12.9" 266 | } 267 | }, 268 | "nbformat": 4, 269 | "nbformat_minor": 4 270 | } 271 | -------------------------------------------------------------------------------- /workshops/diy-agents-with-sagemaker-and-bedrock/4-frameworks/crewai/crewai-requirements.txt: -------------------------------------------------------------------------------- 1 | crewai 2 | crewai[tools] 3 | boto3 4 | botocore 5 | sagemaker 6 | duckduckgo-search -------------------------------------------------------------------------------- /workshops/diy-agents-with-sagemaker-and-bedrock/4-frameworks/crewai/crewai-travel-flows.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "420bda15", 6 | "metadata": {}, 7 | "source": [ 8 | "# Agentic Workflows with CrewAI Flows\n", 9 | "\n", 10 | "CrewAI Flows is a feature designed to streamline the creation and management of AI workflows. Flows allow developers to combine and coordinate coding tasks and Crews efficiently, providing a robust framework for building sophisticated AI automations. You can start easily by leveraging two decorators, `@start()` and `@listen()`:\n", 11 | "\n", 12 | "- `@start()`: the `@start()` decorator is used to mark a method as the starting point of a Flow; when a Flow is started, all the methods decorated with `@start()` are executed in parallel. You can have multiple start methods in a Flow, and they will all be executed when the Flow is started.\n", 13 | "- `@listen()`: the `@listen()` decorator is used to mark a method as a listener for the output of another task in the Flow. The method decorated with `@listen()` will be executed when the specified task emits an output. The method can access the output of the task it is listening to as an argument. The `@listen()` decorator can be used in several ways:\n", 14 | " - Listening to a Method by Name: You can pass the name of the method you want to listen to as a string. When that method completes, the listener method will be triggered.\n", 15 | " - Listening to a Method Directly: You can pass the method itself. When that method completes, the listener method will be triggered.​" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "id": "1db1e0ce", 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "%pip install -r crewai-requirements.txt --quiet --upgrade" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": null, 31 | "id": "81118128", 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "import nest_asyncio\n", 36 | "nest_asyncio.apply()" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "id": "ff038757", 42 | "metadata": {}, 43 | "source": [ 44 | "Start by configuring the LLM." 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "id": "7d80570a", 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "from crewai import LLM\n", 55 | "\n", 56 | "llm = LLM(\n", 57 | " model=\"bedrock/us.amazon.nova-pro-v1:0\", # Use Amazon Bedrock models \n", 58 | " # model=\"sagemaker/INSERT ENDPOINT NAME\", # Use Amazon SageMaker AI Inference\n", 59 | " temperature=0.7, max_tokens=4*1024,\n", 60 | ")" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "id": "13d696b0", 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "llm.call(\"What is the capital of France?\")" 71 | ] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "id": "cb8839b7", 76 | "metadata": {}, 77 | "source": [ 78 | "Now, set up the agent." 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "id": "8441829c", 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [ 88 | "from dotenv import load_dotenv\n", 89 | "from crewai_tools import SerperDevTool\n", 90 | "from crewai.tools import tool\n", 91 | "from duckduckgo_search import DDGS\n", 92 | "import os\n", 93 | "\n", 94 | "load_dotenv()\n", 95 | "\n", 96 | "@tool('DuckDuckGoSearch')\n", 97 | "def search_with_duckduckgo(search_query: str):\n", 98 | " \"\"\"Search the web for information on a given topic\"\"\"\n", 99 | " return DDGS().text(search_query, max_results=5)\n", 100 | "\n", 101 | "if os.environ[\"SERPER_API_KEY\"]:\n", 102 | " search_tool = SerperDevTool()\n", 103 | " print(\"Using Serper API for search\")\n", 104 | "else:\n", 105 | " search_tool = search_with_duckduckgo()\n", 106 | " print(\"No Serper API Key found - Using DuckDuckGo API for search\")" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": null, 112 | "id": "18b6755d", 113 | "metadata": {}, 114 | "outputs": [], 115 | "source": [ 116 | "from crewai import Agent\n", 117 | "from textwrap import dedent\n", 118 | "\n", 119 | "\n", 120 | "researcher_agent = Agent(\n", 121 | " role=\"Travel Researcher\",\n", 122 | " goal=\"Research and compile interesting activities and attractions for a given location\",\n", 123 | " backstory=dedent(\n", 124 | " \"\"\"You are an experienced travel researcher with a knack for \n", 125 | " discovering both popular attractions and hidden gems in any \n", 126 | " location. Your expertise lies in gathering comprehensive \n", 127 | " information about various activities, their historical \n", 128 | " significance, and practical details for visitors.\n", 129 | " \"\"\"),\n", 130 | " llm=llm,\n", 131 | " allow_delegation=False, max_iter=4,\n", 132 | " tools=[search_tool],\n", 133 | " verbose=True,\n", 134 | ")\n", 135 | "\n", 136 | "content_writer = Agent(\n", 137 | " role=\"Content Writer\",\n", 138 | " goal=\"Write a listicle of 5+ attractions/activities for a given location\",\n", 139 | " backstory=dedent(\n", 140 | " \"\"\"You are a content writer with a knack for creating engaging\n", 141 | " and informative content for travel blogs. Your expertise lies in\n", 142 | " crafting engaging and informative content for travel blogs.\n", 143 | " \"\"\"),\n", 144 | " llm=llm,\n", 145 | " allow_delegation=False, max_iter=4,\n", 146 | " verbose=True,\n", 147 | ")\n", 148 | "\n", 149 | "editor_agent = Agent(\n", 150 | " role=\"Content Editor\",\n", 151 | " goal=\"Ensure the listicle is well-structured, engaging, and error-free\",\n", 152 | " backstory=dedent(\n", 153 | " \"\"\"You are a meticulous editor with years of experience in\n", 154 | " travel content. Your keen eye for detail helps polish articles\n", 155 | " to perfection. You focus on improving flow, maintaining\n", 156 | " consistency, and enhancing the overall readability of the\n", 157 | " content while ensuring it appeals to the target audience.\n", 158 | " \"\"\"),\n", 159 | " llm=llm,\n", 160 | " allow_delegation=False, max_iter=4,\n", 161 | " verbose=True,\n", 162 | ")" 163 | ] 164 | }, 165 | { 166 | "cell_type": "markdown", 167 | "id": "9c72352f", 168 | "metadata": {}, 169 | "source": [ 170 | "Create the flow:\n", 171 | "\n", 172 | "1. Search online information about the city to visit\n", 173 | "2. Write the content in the form of a listicle\n", 174 | "3. Review and edit the content" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "id": "cf76dc47", 181 | "metadata": {}, 182 | "outputs": [], 183 | "source": [ 184 | "from crewai.flow.flow import Flow, listen, start\n", 185 | "from pydantic import BaseModel\n", 186 | "\n", 187 | "\n", 188 | "class TravelAgentFlow(Flow[str]):\n", 189 | " @start()\n", 190 | " async def search_online(self):\n", 191 | " query = \"Best things to do in {self.city}\"\n", 192 | " result = await researcher_agent.kickoff_async(query)\n", 193 | " return result\n", 194 | "\n", 195 | " @listen(search_online)\n", 196 | " async def write_content(self, search_result):\n", 197 | " query = \"{search_result}\\n\\n Based on the search results, write a listicle of 5 things to do in {self.city}\"\n", 198 | " result = await content_writer.kickoff_async(query)\n", 199 | " return result\n", 200 | " \n", 201 | " @listen(write_content)\n", 202 | " async def edit_content(self, listicle):\n", 203 | " query = \"Review and edit the top 5 listicle article about things to do in {self.city}.\\n\\nContent:\\n{listicle}\\n\\nMake sure the content is well-structured, engaging, and error-free.\"\n", 204 | " result = await editor_agent.kickoff_async(query)\n", 205 | " return result\n", 206 | "\n", 207 | "# Run the flow\n", 208 | "flow = TravelAgentFlow()\n", 209 | "final_output = await flow.kickoff_async({\"city\": \"Paris\"})\n", 210 | "print(\"---- Final Output ----\")\n", 211 | "print(final_output)" 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": null, 217 | "id": "a9fab0f6", 218 | "metadata": {}, 219 | "outputs": [], 220 | "source": [] 221 | } 222 | ], 223 | "metadata": { 224 | "kernelspec": { 225 | "display_name": ".venv", 226 | "language": "python", 227 | "name": "python3" 228 | }, 229 | "language_info": { 230 | "codemirror_mode": { 231 | "name": "ipython", 232 | "version": 3 233 | }, 234 | "file_extension": ".py", 235 | "mimetype": "text/x-python", 236 | "name": "python", 237 | "nbconvert_exporter": "python", 238 | "pygments_lexer": "ipython3", 239 | "version": "3.12.9" 240 | } 241 | }, 242 | "nbformat": 4, 243 | "nbformat_minor": 5 244 | } 245 | -------------------------------------------------------------------------------- /workshops/diy-agents-with-sagemaker-and-bedrock/4-frameworks/langgraph/langgraph-requirements.txt: -------------------------------------------------------------------------------- 1 | boto3 2 | botocore 3 | sagemaker 4 | langchain 5 | langchain_aws 6 | langchain_experimental 7 | langchain-community 8 | duckduckgo-search 9 | langgraph 10 | matplotlib -------------------------------------------------------------------------------- /workshops/diy-agents-with-sagemaker-and-bedrock/4-frameworks/openai-agents-sdk/openai_agents_sdk_tutorial.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Getting Started with the OpenAI Agents SDK\n", 8 | "\n", 9 | "This notebook will guide you through the basics of using the [OpenAI Agents SDK](https://github.com/openai/agents-sdk) with Amazon Bedrock. We'll cover how to set up your environment, create an agent, and run a simple example.\n", 10 | "\n", 11 | "## Prerequisites\n", 12 | "Before you begin, ensure you have the following:\n", 13 | "- Python installed\n", 14 | "- An AWS account with access to Amazon Bedrock\n", 15 | "- The `openai-agents-sdk` Python package installed\n", 16 | "\n", 17 | "You can install the `openai-agents` package using pip:" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 1, 23 | "metadata": {}, 24 | "outputs": [ 25 | { 26 | "name": "stdout", 27 | "output_type": "stream", 28 | "text": [ 29 | "Note: you may need to restart the kernel to use updated packages.\n" 30 | ] 31 | } 32 | ], 33 | "source": [ 34 | "%pip install openai-agents litellm boto3 --quiet --upgrade" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "from __future__ import annotations\n", 44 | "from agents import Agent, Runner, function_tool, set_tracing_disabled\n", 45 | "from agents.tool import FunctionTool\n", 46 | "\n", 47 | "set_tracing_disabled(disabled=True)" 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": {}, 53 | "source": [ 54 | "## Converting an OpenAI Tool to a Bedrock Tool\n", 55 | "\n", 56 | "The `convert_openai_tool_to_bedrock_tool` function converts an OpenAI tool to a Bedrock tool. This is useful when you want to use an existing OpenAI tool with Amazon Bedrock." 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 3, 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "def convert_openai_tool_to_bedrock_tool(tool: dict) -> FunctionTool:\n", 66 | " \"\"\"Converts an OpenAI tool to a Bedrock tool.\"\"\"\n", 67 | " return FunctionTool(\n", 68 | " name=tool[\"name\"],\n", 69 | " description=tool[\"description\"],\n", 70 | " params_json_schema={\n", 71 | " \"type\": \"object\",\n", 72 | " \"properties\": { k: v for k, v in tool[\"params_json_schema\"][\"properties\"].items() },\n", 73 | " \"required\": tool[\"params_json_schema\"].get(\"required\", []),\n", 74 | " },\n", 75 | " on_invoke_tool=tool[\"on_invoke_tool\"],\n", 76 | " )" 77 | ] 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "metadata": {}, 82 | "source": [ 83 | "## Creating a Simple Tool\n", 84 | "\n", 85 | "Let's create a simple tool that gets the weather for a given city." 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": 4, 91 | "metadata": {}, 92 | "outputs": [], 93 | "source": [ 94 | "@function_tool\n", 95 | "def get_weather(city: str):\n", 96 | " \"\"\"Get the weather for a given city.\"\"\"\n", 97 | " print(f\"[debug] getting weather for {city}\")\n", 98 | " return f\"The weather in {city} is sunny.\"" 99 | ] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": {}, 104 | "source": [ 105 | "## Creating an Agent\n", 106 | "\n", 107 | "Now, let's create an agent that uses the `get_weather` tool." 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 10, 113 | "metadata": {}, 114 | "outputs": [ 115 | { 116 | "name": "stderr", 117 | "output_type": "stream", 118 | "text": [ 119 | "/opt/homebrew/Cellar/python@3.12/3.12.9/Frameworks/Python.framework/Versions/3.12/lib/python3.12/typing.py:1217: RuntimeWarning: coroutine 'main' was never awaited\n", 120 | " super().__setattr__(attr, val)\n", 121 | "RuntimeWarning: Enable tracemalloc to get the object allocation traceback\n" 122 | ] 123 | }, 124 | { 125 | "name": "stdout", 126 | "output_type": "stream", 127 | "text": [ 128 | "[debug] getting weather for Tokyo\n", 129 | "Sunny skies in Tokyo,\n", 130 | "April 22, 2025 haiku.\n" 131 | ] 132 | } 133 | ], 134 | "source": [ 135 | "%autoawait asyncio\n", 136 | "\n", 137 | "agent = Agent(\n", 138 | " name=\"Assistant\",\n", 139 | " instructions=\"You only respond in haikus.\",\n", 140 | " model=\"litellm/bedrock/us.amazon.nova-pro-v1:0\",\n", 141 | " tools=[convert_openai_tool_to_bedrock_tool(get_weather.__dict__)],\n", 142 | ")\n", 143 | "\n", 144 | "result = await Runner.run(agent, \"What's the weather in Tokyo today, April 22 2025?\")\n", 145 | "print(result.final_output)" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": null, 151 | "metadata": {}, 152 | "outputs": [], 153 | "source": [] 154 | } 155 | ], 156 | "metadata": { 157 | "kernelspec": { 158 | "display_name": ".venv", 159 | "language": "python", 160 | "name": "python3" 161 | }, 162 | "language_info": { 163 | "codemirror_mode": { 164 | "name": "ipython", 165 | "version": 3 166 | }, 167 | "file_extension": ".py", 168 | "mimetype": "text/x-python", 169 | "name": "python", 170 | "nbconvert_exporter": "python", 171 | "pygments_lexer": "ipython3", 172 | "version": "3.12.9" 173 | } 174 | }, 175 | "nbformat": 4, 176 | "nbformat_minor": 4 177 | } 178 | -------------------------------------------------------------------------------- /workshops/diy-agents-with-sagemaker-and-bedrock/4-frameworks/smolagents/smolagents-example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "126945bd-7490-46c6-80c8-64481f37f0f8", 7 | "metadata": { 8 | "scrolled": true 9 | }, 10 | "outputs": [], 11 | "source": [ 12 | "%pip install smolagents \"smolagents[litellm]\" -qU" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "id": "a688cfe0-c98a-47e4-ba70-4646e079fa4e", 18 | "metadata": {}, 19 | "source": [ 20 | "# Building Autonomous Agents with smolagents\n", 21 | "\n", 22 | "[🤗 smolagents](https://huggingface.co/blog/smolagents) is a library by Hugging Face that enables you to run powerful agents in a few lines of code. It is based on the concept of CodeAct Agents ([arXiv:2402.01030](https://arxiv.org/abs/2402.01030)), i.e. agents that write their actions in code. In a multi-step agent, at each step, the LLM can write an action, in the form of some calls to external tools. A common format for writing these actions is generally different shades of \"writing actions as a JSON of tools names and arguments to use, which you then parse to know which tool to execute and with which arguments\". To make it secure, it supports executing in sandboxed environments. Multiple research papers have shown that having the tool calling LLMs in code is much better.\n", 23 | "\n", 24 | "In this example we will highlight how we can use a multi agent framework with tools to find the most downloaded model of a given task on the Hugging Face Hub. \n", 25 | "\n", 26 | "smolagents supports any LLM via LiteLLM integration. To use smolagents, we are going to leverage the LiteLLM implementation which allows us to use both Amazon Bedrock and Amazon SageMaker AI, according to our preferences:" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "id": "cad5a4ab-c598-4357-b267-8a6d64c2d00d", 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "from smolagents.models import LiteLLMModel\n", 37 | "\n", 38 | "# To use Amazon Bedrock:\n", 39 | "model = LiteLLMModel(model_id=f\"bedrock/us.amazon.nova-pro-v1:0\", max_tokens=5*1024)\n", 40 | "# To use Amazon SageMaker AI:\n", 41 | "# model = LiteLLMModel(model_id=f\"sagemaker_chat/YOUR-ENDPOINT-NAME-HERE\", max_tokens=5*1024)" 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "id": "5dceafe2-8fdb-4fb5-ac74-83e679029b99", 47 | "metadata": {}, 48 | "source": [ 49 | "Agents with smolagents can be easily created via two classes, `CodeAgent` and `ToolCallingAgent`:\n", 50 | "\n", 51 | "- **CodeAgent** generates executable Python code snippets, enabling complex logic and variable handling for tasks requiring multi-step operations or data manipulation; \n", 52 | "- **ToolCallingAgent** employs standardized JSON structures to define tool calls, aligning with common LLM provider implementations for simpler, structured interactions. \n", 53 | "\n", 54 | "According to `smolagents` developers, CodeAgents typically achieve better performance on complex benchmarks due to their code-first flexibility, while ToolCallingAgents suit systems prioritizing interoperability with existing tool-calling protocols. Both agent types share the same multi-step workflow but differ fundamentally in action representation and execution security considerations." 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "id": "0a0c7b09-318f-4ca5-82f6-804d1cba22b7", 61 | "metadata": { 62 | "scrolled": true 63 | }, 64 | "outputs": [], 65 | "source": [ 66 | "from smolagents import CodeAgent\n", 67 | "\n", 68 | "agent = CodeAgent(model=model, tools=[])\n", 69 | "agent.run(\"What is 123*456?\")" 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "id": "5d2d3796-d4cf-4a21-9a72-1b499661c045", 75 | "metadata": {}, 76 | "source": [ 77 | "Let's extend functionalities of agents using tools. As you've learnt already in the foundations/tools section, tools are functions or query engines that the agent can use to perform specific tasks. One nice feature of smolagents is that it comes with tools pre-packaged (called **base tools**) which can be easily added with:" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "id": "09a7bff1-a3d2-498e-bd90-915222d57704", 84 | "metadata": { 85 | "scrolled": true 86 | }, 87 | "outputs": [], 88 | "source": [ 89 | "agent = CodeAgent(model=model, add_base_tools=True, tools=[])\n", 90 | "agent.run(\"Can you explain the origin of the 'Hello World' program?\")" 91 | ] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "id": "e81f7bb6-fee4-4ba0-8f4b-cd782daaddc7", 96 | "metadata": {}, 97 | "source": [ 98 | "The available base tools are listed in [smolagents doc](https://smolagents.org/docs/agents-guided-tour/#4-toc-title):\n", 99 | "\n", 100 | "- **DuckDuckGo web search**: performs a web search using DuckDuckGo browser.\n", 101 | "- **Python code interpreter**: runs your LLM generated Python code in a secure environment. This tool will only be added to ToolCallingAgent if you initialize it with add_base_tools=True, since code-based agent can already natively execute Python code\n", 102 | "- **Transcriber**: a speech-to-text pipeline built on Whisper-Turbo that transcribes an audio to text.\n", 103 | "\n", 104 | "To create custom tools, we use `@tool` decorator which turns a function into a tool. For example, let's write a function that retrieve the most downloaded model from the HuggingFace Hub:" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": null, 110 | "id": "4f38b043-df4b-42d9-a23c-a302a8abc30e", 111 | "metadata": { 112 | "scrolled": true 113 | }, 114 | "outputs": [], 115 | "source": [ 116 | "from smolagents import tool\n", 117 | "from huggingface_hub import list_models\n", 118 | "\n", 119 | "@tool\n", 120 | "def get_top_hf_model_from_task(task: str) -> str:\n", 121 | " \"\"\"\n", 122 | " This is a tool that returns the most downloaded model of a given task on the Hugging Face Hub.\n", 123 | " It returns the name of the checkpoint.\n", 124 | "\n", 125 | " Args:\n", 126 | " task: The task for which\n", 127 | " \"\"\"\n", 128 | " most_downloaded_model = next(\n", 129 | " iter(list_models(filter=task, sort=\"downloads\", direction=-1))\n", 130 | " )\n", 131 | " return most_downloaded_model.id\n", 132 | "\n", 133 | "agent = CodeAgent(model=model, add_base_tools=True, tools=[get_top_hf_model_from_task])\n", 134 | "agent.run(\"Can you give me the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub?\")" 135 | ] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "id": "5e07411f-01e6-466a-98ac-937fdb57e5cd", 140 | "metadata": {}, 141 | "source": [ 142 | "### Multi-Agents\n", 143 | "\n", 144 | "You can easily build hierarchical multi-agent systems with smolagents. Here’s an example of making an agent that manages a specific web search agent using `DuckDuckGoSearchTool`:" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": null, 150 | "id": "6b99a0e0-b081-4118-8ebd-cdbf000e2d5f", 151 | "metadata": {}, 152 | "outputs": [], 153 | "source": [ 154 | "from smolagents import CodeAgent, DuckDuckGoSearchTool, ToolCallingAgent\n", 155 | "import litellm\n", 156 | "\n", 157 | "litellm.drop_params = True # Required to drop {\"tool_choice\": \"auto\"}, not supported by Bedrock/SageMaker\n", 158 | "\n", 159 | "web_agent = ToolCallingAgent(\n", 160 | " name=\"web_search_agent\",\n", 161 | " description=\"Runs web searches for you.\",\n", 162 | " model=model, max_steps=3,\n", 163 | " tools=[DuckDuckGoSearchTool(max_results=5)],\n", 164 | ")\n", 165 | "\n", 166 | "manager_agent = CodeAgent(\n", 167 | " tools=[], model=model, managed_agents=[web_agent], max_steps=3\n", 168 | ")\n", 169 | "manager_agent.run(\"Who is the CEO of Amazon Web Services as of 2025?\")" 170 | ] 171 | }, 172 | { 173 | "cell_type": "markdown", 174 | "id": "9a0cf2f7-2ab3-4346-b14b-1f24c824a63f", 175 | "metadata": {}, 176 | "source": [ 177 | "### Exercise: Build a multi-agent travel assistant\n", 178 | "\n", 179 | "Your task is to create a multi-agent travel assistant based on the notions you've learnt above. Here are the steps that you need to go through, to give you a starting point:\n", 180 | "\n", 181 | "1. Create one Supervisor agent, who is meant to act like a project manager for the team\n", 182 | "2. Create a Travel Researcher agent, whose task is to research and compile interesting activities and attractions for a given location\n", 183 | "3. Create a Travel Content Writer agent, whose task is to create engaging and informative content for the top 5 listicle\n", 184 | "4. Create a Content Editor agent, whose task is to ensure the listicle is well-structured, engaging, and error-free" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": null, 190 | "id": "23f02850", 191 | "metadata": {}, 192 | "outputs": [], 193 | "source": [] 194 | } 195 | ], 196 | "metadata": { 197 | "kernelspec": { 198 | "display_name": ".venv", 199 | "language": "python", 200 | "name": "python3" 201 | }, 202 | "language_info": { 203 | "codemirror_mode": { 204 | "name": "ipython", 205 | "version": 3 206 | }, 207 | "file_extension": ".py", 208 | "mimetype": "text/x-python", 209 | "name": "python", 210 | "nbconvert_exporter": "python", 211 | "pygments_lexer": "ipython3", 212 | "version": "3.12.9" 213 | } 214 | }, 215 | "nbformat": 4, 216 | "nbformat_minor": 5 217 | } 218 | -------------------------------------------------------------------------------- /workshops/diy-agents-with-sagemaker-and-bedrock/4-frameworks/strands-agents/strands_sagemaker.py: -------------------------------------------------------------------------------- 1 | """Amazon SageMaker model provider.""" 2 | 3 | import json 4 | import logging 5 | import os 6 | from dataclasses import dataclass 7 | from typing import Any, Iterable, Literal, Optional, TypedDict, cast 8 | 9 | import boto3 10 | from botocore.config import Config as BotocoreConfig 11 | from typing_extensions import Unpack, override 12 | 13 | from strands.types.content import Messages 14 | from strands.types.models import OpenAIModel 15 | from strands.types.tools import ToolSpec 16 | 17 | logger = logging.getLogger(__name__) 18 | 19 | 20 | @dataclass 21 | class UsageMetadata: 22 | """Usage metadata for the model. 23 | 24 | Attributes: 25 | total_tokens: Total number of tokens used in the request 26 | completion_tokens: Number of tokens used in the completion 27 | prompt_tokens: Number of tokens used in the prompt 28 | prompt_tokens_details: Additional information about the prompt tokens (optional) 29 | """ 30 | total_tokens: int 31 | completion_tokens: int 32 | prompt_tokens: int 33 | prompt_tokens_details: Optional[int] = 0 34 | 35 | 36 | @dataclass 37 | class FunctionCall: 38 | """Function call for the model. 39 | 40 | Attributes: 41 | name: Name of the function to call 42 | arguments: Arguments to pass to the function 43 | """ 44 | 45 | name: str 46 | arguments: str 47 | 48 | def __init__(self, **kwargs): 49 | """Initialize function call. 50 | 51 | Args: 52 | **kwargs: Keyword arguments for the function call. 53 | """ 54 | self.name = kwargs.get("name") 55 | self.arguments = kwargs.get("arguments") 56 | 57 | 58 | @dataclass 59 | class ToolCall: 60 | """Tool call for the model object. 61 | 62 | Attributes: 63 | id: Tool call ID 64 | type: Tool call type 65 | function: Tool call function 66 | """ 67 | 68 | id: str 69 | type: Literal["function"] 70 | function: FunctionCall 71 | 72 | def __init__(self, **kwargs): 73 | """Initialize tool call object. 74 | 75 | Args: 76 | **kwargs: Keyword arguments for the tool call. 77 | """ 78 | self.id = kwargs.get("id") 79 | self.type = kwargs.get("type") 80 | self.function = FunctionCall(**kwargs.get("function")) 81 | 82 | 83 | class SageMakerAIModel(OpenAIModel): 84 | """Amazon SageMaker model provider implementation. 85 | 86 | The implementation handles SageMaker-specific features such as: 87 | 88 | - Endpoint invocation 89 | - Tool configuration for function calling 90 | - Context window overflow detection 91 | - Endpoint not found error handling 92 | - Inference component capacity error handling with automatic retries 93 | """ 94 | 95 | class SageMakerAIModelConfig(TypedDict, total=False): 96 | """Configuration options for SageMaker models. 97 | 98 | Attributes: 99 | endpoint_name: The name of the SageMaker endpoint to invoke 100 | inference_component_name: The name of the inference component to use 101 | max_tokens: Maximum number of tokens to generate in the response 102 | stop_sequences: List of sequences that will stop generation when encountered 103 | temperature: Controls randomness in generation (higher = more random) 104 | top_p: Controls diversity via nucleus sampling (alternative to temperature) 105 | additional_args: Any additional arguments to include in the request 106 | """ 107 | 108 | endpoint_name: str 109 | inference_component_name: Optional[str] 110 | max_tokens: Optional[int] 111 | stop_sequences: Optional[list[str]] 112 | temperature: Optional[float] 113 | top_p: Optional[float] 114 | additional_args: Optional[dict[str, Any]] 115 | 116 | def __init__( 117 | self, 118 | *, 119 | boto_session: Optional[boto3.Session] = None, 120 | boto_client_config: Optional[BotocoreConfig] = None, 121 | region_name: Optional[str] = None, 122 | **model_config: Unpack["SageMakerAIModelConfig"], 123 | ): 124 | """Initialize provider instance. 125 | 126 | Args: 127 | boto_session: Boto Session to use when calling the SageMaker Runtime. 128 | boto_client_config: Configuration to use when creating the SageMaker-Runtime Boto Client. 129 | region_name: Name of the AWS region (e.g.: us-west-2) 130 | **model_config: Model parameters for the SageMaker request payload. 131 | """ 132 | self.config = dict(model_config) 133 | 134 | logger.debug("config=<%s> | initializing", self.config) 135 | 136 | session = boto_session or boto3.Session( 137 | region_name=region_name or os.getenv("AWS_REGION") or "us-west-2", 138 | ) 139 | 140 | # Add strands-agents to the request user agent 141 | if boto_client_config: 142 | existing_user_agent = getattr(boto_client_config, "user_agent_extra", None) 143 | 144 | # Append 'strands-agents' to existing user_agent_extra or set it if not present 145 | if existing_user_agent: 146 | new_user_agent = f"{existing_user_agent} strands-agents" 147 | else: 148 | new_user_agent = "strands-agents" 149 | 150 | client_config = boto_client_config.merge(BotocoreConfig(user_agent_extra=new_user_agent)) 151 | else: 152 | client_config = BotocoreConfig(user_agent_extra="strands-agents") 153 | 154 | self.client = session.client( 155 | service_name="sagemaker-runtime", 156 | config=client_config, 157 | ) 158 | 159 | @override 160 | def update_config(self, **model_config: Unpack[SageMakerAIModelConfig]) -> None: # type: ignore[override] 161 | """Update the Amazon SageMaker model configuration with the provided arguments. 162 | 163 | Args: 164 | **model_config: Configuration overrides. 165 | """ 166 | self.config.update(model_config) 167 | 168 | @override 169 | def get_config(self) -> SageMakerAIModelConfig: 170 | """Get the Amazon SageMaker model configuration. 171 | 172 | Returns: 173 | The Amazon SageMaker model configuration. 174 | """ 175 | return cast(SageMakerAIModel.SageMakerAIModelConfig, self.config) 176 | 177 | @override 178 | def format_request( 179 | self, messages: Messages, tool_specs: Optional[list[ToolSpec]] = None, system_prompt: Optional[str] = None 180 | ) -> dict[str, Any]: 181 | """Format an Amazon SageMaker chat streaming request. 182 | 183 | Args: 184 | messages: List of message objects to be processed by the model. 185 | tool_specs: List of tool specifications to make available to the model. 186 | system_prompt: System prompt to provide context to the model. 187 | 188 | Returns: 189 | An Amazon SageMaker chat streaming request. 190 | """ 191 | payload = { 192 | "messages": self.format_request_messages(messages, system_prompt), 193 | "tools": [ 194 | { 195 | "type": "function", 196 | "function": { 197 | "name": tool_spec["name"], 198 | "description": tool_spec["description"], 199 | "parameters": tool_spec["inputSchema"]["json"], 200 | }, 201 | } 202 | for tool_spec in tool_specs or [] 203 | ], 204 | **({"max_tokens": self.config["max_tokens"]} if "max_tokens" in self.config else {}), 205 | **({"temperature": self.config["temperature"]} if "temperature" in self.config else {}), 206 | **({"top_p": self.config["top_p"]} if "top_p" in self.config else {}), 207 | **({"stop": self.config["stop_sequences"]} if "stop_sequences" in self.config else {}), 208 | **( 209 | self.config["additional_args"] 210 | if "additional_args" in self.config and self.config["additional_args"] is not None 211 | else {} 212 | ), 213 | } 214 | 215 | # Assistant message must have either content or tool_calls, but not both 216 | for message in payload["messages"]: 217 | if message.get("tool_calls", []) != []: 218 | _ = message.pop("content") 219 | 220 | # Format the request according to the SageMaker Runtime API requirements 221 | request = { 222 | "EndpointName": self.config["endpoint_name"], 223 | "Body": json.dumps(payload), 224 | "ContentType": "application/json", 225 | "Accept": "application/json", 226 | } 227 | 228 | # Add InferenceComponentName if provided 229 | if self.config.get("inference_component_name"): 230 | request["InferenceComponentName"] = self.config["inference_component_name"] 231 | return request 232 | 233 | @override 234 | def stream(self, request: dict[str, Any]) -> Iterable[dict[str, Any]]: 235 | """Send the request to the Amazon SageMaker AI model and get the streaming response. 236 | 237 | This method calls the Amazon SageMaker AI chat API and returns the stream of response events. 238 | 239 | Args: 240 | request: The formatted request to send to the Amazon SageMaker AI model. 241 | 242 | Returns: 243 | An iterable of response events from the Amazon SageMaker AI model. 244 | """ 245 | response = self.client.invoke_endpoint_with_response_stream(**request) 246 | 247 | # Wait until all the answer has been streamed 248 | final_response = "" 249 | for event in response["Body"]: 250 | chunk_data = event["PayloadPart"]["Bytes"].decode("utf-8") 251 | final_response += chunk_data 252 | final_response_json = json.loads(final_response) 253 | 254 | # Obtain the key elements from the response 255 | message = final_response_json["choices"][0]["message"] 256 | message_stop_reason = final_response_json["choices"][0]["finish_reason"] 257 | 258 | # Message start 259 | yield {"chunk_type": "message_start"} 260 | 261 | # Handle text 262 | yield {"chunk_type": "content_start", "data_type": "text"} 263 | yield {"chunk_type": "content_delta", "data_type": "text", "data": message["content"] or ""} 264 | yield {"chunk_type": "content_stop", "data_type": "text"} 265 | 266 | # Handle the tool calling, if any 267 | if message_stop_reason == "tool_calls": 268 | for tool_call in message["tool_calls"] or []: 269 | yield {"chunk_type": "content_start", "data_type": "tool", "data": ToolCall(**tool_call)} 270 | yield {"chunk_type": "content_delta", "data_type": "tool", "data": ToolCall(**tool_call)} 271 | yield {"chunk_type": "content_stop", "data_type": "tool", "data": ToolCall(**tool_call)} 272 | 273 | # Message close 274 | yield {"chunk_type": "message_stop", "data": message_stop_reason} 275 | # Handle usage metadata 276 | yield {"chunk_type": "metadata", "data": UsageMetadata(**final_response_json["usage"])} 277 | -------------------------------------------------------------------------------- /workshops/diy-agents-with-sagemaker-and-bedrock/5-observability/2-mlflow/crewai-requirements.txt: -------------------------------------------------------------------------------- 1 | crewai 2 | crewai[tools] 3 | boto3 4 | botocore 5 | sagemaker 6 | duckduckgo-search 7 | -------------------------------------------------------------------------------- /workshops/diy-agents-with-sagemaker-and-bedrock/5-observability/README.md: -------------------------------------------------------------------------------- 1 | # Observability 2 | 3 | Observability is a critical component when developing and deploying AI agents in production environments. As AI agents become more complex, involving multiple components, tools, and LLM calls, having visibility into their behavior becomes essential for debugging, optimization, and ensuring reliability. 4 | 5 | ## Why Observability Matters for AI Agents 6 | 7 | - **Transparency**: Observability provides insights into how agents make decisions, which tools they use, and how they process information, making the "black box" of AI more transparent. 8 | - **Debugging**: When agents produce unexpected outputs or fail, observability tools help pinpoint where and why issues occurred in the execution flow. 9 | - **Performance Optimization**: By tracking metrics like latency, token usage, and tool call frequency, developers can identify bottlenecks and optimize agent performance. 10 | - **Cost Management**: Monitoring token usage and API calls helps manage and optimize the costs associated with running AI agents at scale. 11 | - **Continuous Improvement**: Collecting data on agent behavior enables iterative improvement of prompts, tools, and overall agent design based on real-world usage patterns. 12 | 13 | In this section, we explore two approaches to implementing observability for AI agents: 14 | 15 | 1. **Langfuse**: An open-source observability platform specifically designed for LLM applications 16 | 2. **MLflow**: A versatile platform for managing ML workflows that can be used to track and trace agent executions 17 | 18 | Both solutions provide valuable insights into agent behavior, helping you build more reliable, efficient, and cost-effective AI systems. 19 | -------------------------------------------------------------------------------- /workshops/diy-agents-with-sagemaker-and-bedrock/99-use-cases/mcp/server.py: -------------------------------------------------------------------------------- 1 | import httpx 2 | from mcp.server.fastmcp import FastMCP 3 | 4 | mcp = FastMCP("My App") 5 | 6 | 7 | @mcp.tool() 8 | def calculate_bmi(weight_kg: float, height_m: float) -> float: 9 | """Calculate BMI given weight in kg and height in meters""" 10 | return weight_kg / (height_m**2) 11 | 12 | 13 | @mcp.tool() 14 | async def fetch_weather(city: str) -> str: 15 | """Fetch current weather for a city""" 16 | async with httpx.AsyncClient() as client: 17 | response = await client.get(f"https://api.weather.com/{city}") 18 | return response.text 19 | 20 | @mcp.tool() 21 | async def get_random_joke() -> str: 22 | """Get a random joke""" 23 | url = "https://official-joke-api.appspot.com/random_joke" 24 | async with httpx.AsyncClient() as client: 25 | response = await client.get(url) 26 | joke = response.json() 27 | return joke 28 | 29 | if __name__ == "__main__": 30 | mcp.run(transport="stdio") -------------------------------------------------------------------------------- /workshops/diy-agents-with-sagemaker-and-bedrock/99-use-cases/sagemaker-endpoint-as-tool/README.md: -------------------------------------------------------------------------------- 1 | # Using SageMaker Endpoints as Tools for Agents 2 | 3 | This lab demonstrates how to integrate Amazon SageMaker endpoints as tools for AI agents, enabling them to leverage machine learning models for specialized tasks. 4 | 5 | ## Overview 6 | 7 | In this lab, you'll learn how to: 8 | 9 | 1. Train and deploy a demand forecasting model on Amazon SageMaker 10 | 2. Create a tool interface that allows AI agents to invoke the SageMaker endpoint 11 | 3. Use the SageMaker endpoint as a specialized tool within an agent workflow 12 | 13 | ## Architecture 14 | 15 | ![endpoint-as-tool.png](endpoint-as-tool.png) 16 | 17 | The solution follows this workflow: 18 | 19 | 1. A time series forecasting model is trained using XGBoost on SageMaker 20 | 2. The model is deployed to a SageMaker endpoint for real-time inference 21 | 3. A tool interface is created using the Model Context Protocol (MCP) 22 | 4. The tool is integrated with an agent framework to enable AI agents to make predictions 23 | 24 | ## Key Components 25 | 26 | - **Amazon SageMaker**: For training and hosting the XGBoost forecasting model 27 | - **Model Context Protocol (MCP)**: For creating a standardized tool interface 28 | - **Strands Agents**: For building and orchestrating AI agents that use the SageMaker endpoint 29 | 30 | ## Files Included 31 | 32 | - `demand_forecasting.ipynb`: Jupyter notebook for data preparation and model exploration 33 | - `model-train-and-deploy.py`: Python script for training and deploying the XGBoost model 34 | - `script.py`: SageMaker training and inference script for the XGBoost model 35 | - `server.py`: MCP server implementation for the SageMaker endpoint tool 36 | - `strands-agents-sagemaker-as-tool.ipynb`: Example of using the SageMaker endpoint with agents 37 | 38 | ## Prerequisites 39 | 40 | - An AWS account with access to Amazon SageMaker AI 41 | - Basic understanding of machine learning concepts 42 | - Familiarity with Python and Jupyter notebooks 43 | 44 | ## Getting Started 45 | 46 | 1. Review the `demand_forecasting.ipynb` notebook to understand the data and model 47 | 2. Explore the `strands-agents-sagemaker-as-tool.ipynb` notebook to see the tool in action 48 | 49 | ## Learning Objectives 50 | 51 | By completing this lab, you will: 52 | 53 | - Understand how to train and deploy ML models on Amazon SageMaker AI 54 | - Learn how to create tool interfaces for AI agents using MCP 55 | - Gain experience integrating specialized ML capabilities into agent workflows 56 | - See how agents can leverage ML models for enhanced decision-making 57 | -------------------------------------------------------------------------------- /workshops/diy-agents-with-sagemaker-and-bedrock/99-use-cases/sagemaker-endpoint-as-tool/endpoint-as-tool.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/diy-agents-with-sagemaker-and-bedrock/99-use-cases/sagemaker-endpoint-as-tool/endpoint-as-tool.png -------------------------------------------------------------------------------- /workshops/diy-agents-with-sagemaker-and-bedrock/99-use-cases/sagemaker-endpoint-as-tool/script.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | """ 5 | XGBoost training and inference script for SageMaker 6 | 7 | This script is used by SageMaker to train an XGBoost model for demand forecasting 8 | and to serve predictions from a deployed endpoint. 9 | """ 10 | 11 | import argparse 12 | import os 13 | import pandas as pd 14 | import numpy as np 15 | import xgboost as xgb 16 | import json 17 | import logging 18 | import io 19 | 20 | logger = logging.getLogger() 21 | logger.setLevel(logging.INFO) 22 | 23 | 24 | def parse_args(): 25 | """Parse SageMaker training job arguments.""" 26 | parser = argparse.ArgumentParser() 27 | 28 | # Hyperparameters sent by the client are passed as command-line arguments to the script 29 | parser.add_argument('--max_depth', type=int, default=6) 30 | parser.add_argument('--eta', type=float, default=0.2) 31 | parser.add_argument('--gamma', type=float, default=4) 32 | parser.add_argument('--min_child_weight', type=int, default=6) 33 | parser.add_argument('--subsample', type=float, default=0.8) 34 | parser.add_argument('--verbosity', type=int, default=1) 35 | parser.add_argument('--objective', type=str, default='reg:squarederror') 36 | parser.add_argument('--num_round', type=int, default=100) 37 | 38 | # Data directories 39 | parser.add_argument('--train', type=str, default=os.environ.get('SM_CHANNEL_TRAIN')) 40 | parser.add_argument('--validation', type=str, default=os.environ.get('SM_CHANNEL_VALIDATION')) 41 | 42 | # Model directory: this is where the model will be saved 43 | parser.add_argument('--model-dir', type=str, default=os.environ.get('SM_MODEL_DIR')) 44 | 45 | return parser.parse_args() 46 | 47 | 48 | def load_data(data_dir): 49 | """Load training data from CSV file.""" 50 | logger.info(f"Loading data from {data_dir}") 51 | 52 | # List files in the directory 53 | files = os.listdir(data_dir) 54 | csv_files = [f for f in files if f.endswith('.csv')] 55 | 56 | if not csv_files: 57 | raise ValueError(f"No CSV files found in {data_dir}") 58 | 59 | # Load the first CSV file 60 | data_path = os.path.join(data_dir, csv_files[0]) 61 | df = pd.read_csv(data_path) 62 | 63 | # Separate features and target 64 | if 'demand' in df.columns: 65 | y = df['demand'] 66 | X = df.drop(['demand'], axis=1) 67 | else: 68 | # If 'demand' column is not present, assume the last column is the target 69 | y = df.iloc[:, -1] 70 | X = df.iloc[:, :-1] 71 | 72 | return X, y 73 | 74 | 75 | def train(args): 76 | """Train XGBoost model with the given arguments.""" 77 | logger.info("Loading training data") 78 | X_train, y_train = load_data(args.train) 79 | 80 | logger.info("Loading validation data") 81 | X_val, y_val = load_data(args.validation) 82 | 83 | # Create DMatrix for XGBoost 84 | dtrain = xgb.DMatrix(X_train, label=y_train) 85 | dval = xgb.DMatrix(X_val, label=y_val) 86 | 87 | # Set XGBoost parameters 88 | params = { 89 | 'max_depth': args.max_depth, 90 | 'eta': args.eta, 91 | 'gamma': args.gamma, 92 | 'min_child_weight': args.min_child_weight, 93 | 'subsample': args.subsample, 94 | 'verbosity': args.verbosity, 95 | 'objective': args.objective 96 | } 97 | 98 | # Train model 99 | logger.info("Training XGBoost model") 100 | watchlist = [(dtrain, 'train'), (dval, 'validation')] 101 | model = xgb.train( 102 | params=params, 103 | dtrain=dtrain, 104 | num_boost_round=args.num_round, 105 | evals=watchlist, 106 | early_stopping_rounds=10 107 | ) 108 | 109 | # Save the model 110 | logger.info(f"Saving model to {args.model_dir}") 111 | model_path = os.path.join(args.model_dir, 'xgboost-model') 112 | model.save_model(model_path) 113 | 114 | # Save feature names for inference 115 | feature_names = X_train.columns.tolist() 116 | with open(os.path.join(args.model_dir, 'feature_names.json'), 'w') as f: 117 | json.dump(feature_names, f) 118 | 119 | return model 120 | 121 | 122 | def model_fn(model_dir): 123 | """Load the XGBoost model for inference.""" 124 | # Load the XGBoost model 125 | model_path = os.path.join(model_dir, 'xgboost-model') 126 | model = xgb.Booster() 127 | model.load_model(model_path) 128 | 129 | # Load feature names 130 | feature_names_path = os.path.join(model_dir, 'feature_names.json') 131 | with open(feature_names_path, 'r') as f: 132 | feature_names = json.load(f) 133 | 134 | # Return both model and feature names 135 | return {'model': model, 'feature_names': feature_names} 136 | 137 | 138 | def input_fn(request_body, request_content_type): 139 | """Parse input data for prediction.""" 140 | if request_content_type == 'text/csv': 141 | # Parse CSV input 142 | data = io.StringIO(request_body.decode('utf-8') if isinstance(request_body, bytes) else request_body) 143 | df = pd.read_csv(data, header=None) 144 | return df 145 | elif request_content_type == 'application/json': 146 | # Parse JSON input 147 | json_data = json.loads(request_body.decode('utf-8') if isinstance(request_body, bytes) else request_body) 148 | # Handle both list of lists and dict with features 149 | if isinstance(json_data, list): 150 | df = pd.DataFrame(json_data) 151 | else: 152 | df = pd.DataFrame([json_data]) 153 | return df 154 | else: 155 | raise ValueError(f"Unsupported content type: {request_content_type}. Use 'text/csv' or 'application/json'.") 156 | 157 | 158 | def predict_fn(input_data, model_dict): 159 | """Make predictions using the loaded model.""" 160 | # Extract model and feature names 161 | model = model_dict['model'] 162 | feature_names = model_dict['feature_names'] 163 | 164 | # Ensure input data has the correct columns/order 165 | if len(input_data.columns) != len(feature_names): 166 | raise ValueError(f"Input data has {len(input_data.columns)} features, but model expects {len(feature_names)}") 167 | 168 | # Convert to DMatrix for prediction 169 | dmatrix = xgb.DMatrix(input_data.values) 170 | 171 | # Make prediction 172 | predictions = model.predict(dmatrix) 173 | 174 | return predictions 175 | 176 | 177 | def output_fn(predictions, content_type): 178 | """Format predictions for response.""" 179 | if content_type == 'application/json': 180 | # Convert predictions to a list and return as JSON 181 | predictions_list = predictions.tolist() 182 | return json.dumps(predictions_list) 183 | else: 184 | raise ValueError(f"Unsupported accept type: {content_type}. Use 'application/json'.") 185 | 186 | 187 | if __name__ == '__main__': 188 | args = parse_args() 189 | model = train(args) 190 | logger.info("Training completed successfully") -------------------------------------------------------------------------------- /workshops/diy-agents-with-sagemaker-and-bedrock/99-use-cases/sagemaker-endpoint-as-tool/server.py: -------------------------------------------------------------------------------- 1 | import boto3, json, os 2 | import httpx 3 | import numpy as np 4 | from mcp.server.fastmcp import FastMCP 5 | 6 | mcp = FastMCP("SageMaker App") 7 | 8 | @mcp.tool() 9 | async def generate_prediction_with_sagemaker(test_sample: list): 10 | """ 11 | Use Amazon SageMaker AI to generate predictions. 12 | Args: 13 | test_sample: a list of lists containing the inputs to generate predictions from 14 | Returns: 15 | predictions: an array of predictions 16 | """ 17 | print(os.environ) 18 | endpoint_name = os.environ["SAGEMAKER_ENDPOINT_NAME"] 19 | boto_session = boto3.session.Session( 20 | aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"], 21 | aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"], 22 | aws_session_token=os.environ["AWS_SESSION_TOKEN"], 23 | region_name=os.environ["AWS_REGION_NAME"] 24 | ) 25 | sagemaker_runtime = boto_session.client("sagemaker-runtime") 26 | response = sagemaker_runtime.invoke_endpoint( 27 | EndpointName=endpoint_name, 28 | Body=json.dumps(test_sample), 29 | ContentType="application/json", 30 | Accept="application/json" 31 | ) 32 | predictions = json.loads(response['Body'].read().decode("utf-8")) 33 | return np.array(predictions) 34 | 35 | if __name__ == "__main__": 36 | mcp.run(transport="stdio") -------------------------------------------------------------------------------- /workshops/diy-agents-with-sagemaker-and-bedrock/99-use-cases/support-ticket-triage/langgraph-requirements.txt: -------------------------------------------------------------------------------- 1 | boto3 2 | botocore 3 | sagemaker 4 | langchain 5 | langchain_aws 6 | langchain_experimental 7 | langchain-community 8 | duckduckgo-search 9 | langgraph 10 | matplotlib -------------------------------------------------------------------------------- /workshops/diy-agents-with-sagemaker-and-bedrock/99-use-cases/text2dsl-mcp/README.md: -------------------------------------------------------------------------------- 1 | # No-SQL RAG & Text to DSL using LangGraph, OpenSearch Serverless and MCP 2 | 3 | In this lab, you'll explore how to extend RAG capabilities to NoSQL databases. Using a LLM powered text-to-DSL conversion and a Model Context Protocol(MCP) server which provides a connctivity to a OpenSearch Serverless Collection, this lab shows natural language querying of JSON documents with complex querying conditions. This powerful combination allows for comprehensive enterprise search applications that can access semi-structured data without transforming the data to structured format or generating embeddings using additional LLM costs. 4 | 5 | ## Learning Objectives 6 | 7 | By the end of this lab, you will be able to: 8 | 9 | - Set up a serverless collection in Amazon OpenSearch Service 10 | - Ingest a sample findings from [Amazon GuardDuty](https://docs.aws.amazon.com/guardduty/latest/ug/sample_findings.html) to an OpenSearch index 11 | - Create a simple MCP server which retrives documents from OpenSearch indices 12 | - Using LangChain MCP Adapters, connect your agentic application to MCP servers 13 | - Implement text-to-DSL capabilities to query OpenSearch indicies with natural language 14 | - Integrate the query results with foundation model responses 15 | - Build a comprehensive enterprise search application 16 | 17 | ## Key Concepts 18 | 19 | ### Text-to-DSL 20 | 21 | Text-to-DSL refers to the process of translating natural language text into a Domain-Specific Language (DSL). This is commonly used in the context of search engines like Elasticsearch or OpenSearch where the DSL is used to construct complex search queries. Essentially, you input text that describes what you want to search for, and the system converts it into a DSL query that the search engine can understand and execute. 22 | 23 | ### Model Context Protocol (MCP) 24 | 25 | The Model Context Protocol (MCP) is an open protocol that enables seamless integration between LLM applications and external data sources and tools. Whether you're building an AI-powered IDE, enhancing a chat interface, or creating custom AI workflows, MCP provides a standardized way to connect LLMs with the context they need. 26 | 27 | ### MCP Server 28 | An MCP Server is a lightweight program that exposes specific capabilities through the standardized Model Context Protocol. Host applications (such as chatbots, IDEs, and other AI tools) have MCP clients that maintain 1:1 connections with MCP servers. MCP servers can access local data sources and remote services to provide additional context that improves the generated outputs from the models. 29 | 30 | ### LangChain MCP Adapters 31 | The library provides a lightweight wrapper that makes MCP tools compatible with LangChain and LangGraph. 32 | 33 | 34 | ## Lab Structure 35 | * **text2dsl-mcp.ipynb**
36 | The main notebook filethat guide you through implementing text-to-DSL capabilities 37 | 38 | * **cfn-oss-collection.yaml**
39 | The CloudFormation template to deploy a Amazon OpenSearch Service Serverless Collection to be used in this lab.
40 | _You can skip this if you are participating an AWS Instructor-led workshop event because this stack is pre-deployed in the provided AWS account_ 41 | 42 | * **mcp_dsl_server.py**
43 | The Python script with MCP server implementation. The MCP server provides tools to get a schema of indices of OpenSearch collection, and to execute DSL queries go retrive data from the indicies. 44 | 45 | * **utils.py**
46 | A collection of helper functions to use in this lab. 47 | 48 | * **guardduty-index-schema.json**
49 | This file defines the schema of test dataset being used in the lab. The lab uses sample findings generated from Amazon GuardDuty. 50 | 51 | ## Dataset 52 | 53 | In this lab, we will use [Amazon GuardDuty](https://aws.amazon.com/guardduty/) to generate JSON documents. Amazon GuardDuty uses AI and ML with integrated threat intelligence from AWS and leading third parties to help protect your AWS accounts, workloads, and data from threats.
54 | Amazon GuardDuty helps you generate sample findings to visualize and understand the various finding types that it can generate. When you generate sample findings, GuardDuty populates your current findings list with one sample for each supported finding type, including attack sequence finding types. 55 | 56 | ## Getting Started 57 | 58 | Before beginning this lab, you should have: 59 | - Basic understanding in [Query DSL](https://docs.opensearch.org/docs/latest/query-dsl/) 60 | - Familiarity with AWS OpenSearch Service 61 | 62 | To start working with the notebooks: 63 | 64 | 1. Navigate to the `workshops/diy-agents-with-sagemaker-and-bedrock/99-use-cases/text2dsl-mcp` folder in the cloned repository 65 | 2. Open `text2dsl-mcp.ipynb` and follow each notebook sequentially to implement text-to-DSL capabilities using MCP 66 | -------------------------------------------------------------------------------- /workshops/diy-agents-with-sagemaker-and-bedrock/99-use-cases/text2dsl-mcp/cfn-oss-collection.yaml: -------------------------------------------------------------------------------- 1 | AWSTemplateFormatVersion: "2010-09-09" 2 | Description: CloudFormation template for OpenSearch Serverless Collection 3 | 4 | Parameters: 5 | CollectionName: 6 | Type: String 7 | Description: Name of the OpenSearch Serverless Collection 8 | Default: "agent-ws-collection" 9 | 10 | StandbyReplicas: 11 | Type: String 12 | Description: Whether to enable standby replicas for the collection 13 | Default: "DISABLED" 14 | AllowedValues: 15 | - "ENABLED" 16 | - "DISABLED" 17 | 18 | NetworkPolicyName: 19 | Type: String 20 | Description: Name of the network policy for the collection 21 | Default: "agent-ws-network-policy" 22 | 23 | EncryptionPolicyName: 24 | Type: String 25 | Description: Name of the encryption policy for the collection 26 | Default: "agent-ws-encryption-policy" 27 | 28 | DataAccessPolicyName: 29 | Type: String 30 | Description: Name of the data access policy for the collection 31 | Default: "agent-ws-data-access-policy" 32 | 33 | Resources: 34 | # Network Policy for the Collection 35 | OpenSearchNetworkPolicy: 36 | Type: AWS::OpenSearchServerless::SecurityPolicy 37 | Properties: 38 | Name: !Ref NetworkPolicyName 39 | Type: "network" 40 | Description: "Network policy for Agentic AI Workshop OpenSearch Collection" 41 | Policy: !Sub | 42 | [{ 43 | "Rules":[ 44 | { 45 | "ResourceType":"collection", 46 | "Resource":["collection/${CollectionName}"] 47 | }, 48 | { 49 | "ResourceType":"dashboard", 50 | "Resource":["collection/${CollectionName}"] 51 | } 52 | ], 53 | "AllowFromPublic":true 54 | }] 55 | 56 | # Encryption Policy for the Collection 57 | OpenSearchEncryptionPolicy: 58 | Type: AWS::OpenSearchServerless::SecurityPolicy 59 | Properties: 60 | Name: !Ref EncryptionPolicyName 61 | Type: "encryption" 62 | Description: "Encryption policy for Agentic AI Workshop OpenSearch Collection" 63 | Policy: !Sub | 64 | { 65 | "Rules":[ 66 | { 67 | "ResourceType":"collection", 68 | "Resource":["collection/${CollectionName}"] 69 | } 70 | ], 71 | "AWSOwnedKey":true 72 | } 73 | 74 | # Data Access Policy for the Collection 75 | OpenSearchDataAccessPolicy: 76 | Type: AWS::OpenSearchServerless::AccessPolicy 77 | Properties: 78 | Name: !Ref DataAccessPolicyName 79 | Type: "data" 80 | Description: "Data access policy for Agentic AI Workshop OpenSearch Collection" 81 | Policy: !Sub | 82 | [ 83 | { 84 | "Rules":[ 85 | { 86 | "ResourceType":"collection", 87 | "Resource":["collection/${CollectionName}"], 88 | "Permission":[ 89 | "aoss:CreateCollectionItems", 90 | "aoss:DeleteCollectionItems", 91 | "aoss:UpdateCollectionItems", 92 | "aoss:DescribeCollectionItems" 93 | ] 94 | }, 95 | { 96 | "ResourceType":"index", 97 | "Resource":["index/${CollectionName}/*"], 98 | "Permission":[ 99 | "aoss:CreateIndex", 100 | "aoss:DeleteIndex", 101 | "aoss:UpdateIndex", 102 | "aoss:DescribeIndex", 103 | "aoss:ReadDocument", 104 | "aoss:WriteDocument" 105 | ] 106 | } 107 | ], 108 | "Principal":[ 109 | "arn:aws:iam::${AWS::AccountId}:root" 110 | ] 111 | } 112 | ] 113 | 114 | # OpenSearch Serverless Collection 115 | OpenSearchCollection: 116 | Type: AWS::OpenSearchServerless::Collection 117 | DependsOn: 118 | - OpenSearchDataAccessPolicy 119 | - OpenSearchEncryptionPolicy 120 | - OpenSearchNetworkPolicy 121 | Properties: 122 | Name: !Ref CollectionName 123 | Type: SEARCH 124 | Description: "OpenSearch Serverless Collection for Agentic AI Workshop" 125 | StandbyReplicas: !Ref StandbyReplicas 126 | 127 | 128 | # Lambda function to wait for collection to be active 129 | CollectionStatusCheckFunction: 130 | Type: AWS::Lambda::Function 131 | Properties: 132 | Handler: index.handler 133 | Role: !GetAtt CollectionStatusCheckRole.Arn 134 | Runtime: python3.9 135 | Timeout: 600 136 | Code: 137 | ZipFile: | 138 | import boto3 139 | import cfnresponse 140 | import time 141 | import traceback 142 | 143 | aoss = boto3.client('opensearchserverless') 144 | 145 | def handler(event, context): 146 | print(f"Collection status check event: {event}") 147 | response_data = {} 148 | 149 | try: 150 | request_type = event['RequestType'] 151 | print(f"Request type: {request_type}") 152 | 153 | # Only check status on Create or Update 154 | if request_type in ['Create', 'Update']: 155 | collection_name = event['ResourceProperties']['CollectionName'] 156 | wait_for_collection_active(collection_name) 157 | 158 | # Get collection endpoint 159 | collection_info = get_collection_info(collection_name) 160 | response_data['CollectionId'] = collection_info['Id'] 161 | response_data['CollectionEndpoint'] = collection_info['CollectionEndpoint'] 162 | response_data['DashboardEndpoint'] = collection_info['DashboardEndpoint'] 163 | 164 | # Always send SUCCESS to CFN 165 | cfnresponse.send(event, context, cfnresponse.SUCCESS, response_data) 166 | 167 | except Exception as e: 168 | print(f"Error: {str(e)}") 169 | print(traceback.format_exc()) 170 | cfnresponse.send(event, context, cfnresponse.FAILED, {"Error": str(e)}) 171 | 172 | def wait_for_collection_active(collection_name): 173 | print(f"Waiting for collection {collection_name} to become active") 174 | max_wait_time = 600 # 10 minutes 175 | start_time = time.time() 176 | 177 | while time.time() - start_time < max_wait_time: 178 | try: 179 | response = aoss.batch_get_collection(names=[collection_name]) 180 | if not response['collectionDetails']: 181 | print(f"Collection {collection_name} not found, waiting...") 182 | time.sleep(15) 183 | continue 184 | 185 | status = response['collectionDetails'][0]['status'] 186 | print(f"Collection status: {status}") 187 | 188 | if status == 'ACTIVE': 189 | print(f"Collection {collection_name} is now active") 190 | return True 191 | elif status in ['FAILED', 'DELETED']: 192 | raise Exception(f"Collection entered {status} state") 193 | 194 | # Wait before checking again 195 | time.sleep(15) 196 | except Exception as e: 197 | if 'ResourceNotFoundException' in str(e): 198 | print(f"Collection {collection_name} not found yet, waiting...") 199 | time.sleep(15) 200 | else: 201 | raise 202 | 203 | raise Exception(f"Timed out waiting for collection {collection_name} to become active") 204 | 205 | def get_collection_info(collection_name): 206 | response = aoss.batch_get_collection(names=[collection_name]) 207 | if not response['collectionDetails']: 208 | raise Exception(f"Collection {collection_name} not found") 209 | 210 | collection = response['collectionDetails'][0] 211 | return { 212 | 'Id': collection['id'], 213 | 'CollectionEndpoint': collection['collectionEndpoint'], 214 | 'DashboardEndpoint': collection['dashboardEndpoint'] 215 | } 216 | 217 | # IAM Role for the Lambda function 218 | CollectionStatusCheckRole: 219 | Type: AWS::IAM::Role 220 | Properties: 221 | AssumeRolePolicyDocument: 222 | Version: "2012-10-17" 223 | Statement: 224 | - Effect: Allow 225 | Principal: 226 | Service: lambda.amazonaws.com 227 | Action: sts:AssumeRole 228 | Policies: 229 | - PolicyName: OpenSearchServerlessAccess 230 | PolicyDocument: 231 | Version: "2012-10-17" 232 | Statement: 233 | - Effect: Allow 234 | Action: 235 | - aoss:BatchGetCollection 236 | - aoss:ListCollections 237 | Resource: "*" 238 | - PolicyName: CloudWatchLogsAccess 239 | PolicyDocument: 240 | Version: "2012-10-17" 241 | Statement: 242 | - Effect: Allow 243 | Action: 244 | - logs:CreateLogGroup 245 | - logs:CreateLogStream 246 | - logs:PutLogEvents 247 | Resource: "arn:aws:logs:*:*:*" 248 | 249 | # Custom resource to wait for collection to be active 250 | CollectionStatusCheck: 251 | Type: Custom::CollectionStatusCheck 252 | DependsOn: OpenSearchCollection 253 | Properties: 254 | ServiceToken: !GetAtt CollectionStatusCheckFunction.Arn 255 | CollectionName: !Ref CollectionName 256 | ServiceTimeout: '600' 257 | 258 | Outputs: 259 | CollectionId: 260 | Description: "OpenSearch Serverless Collection ID" 261 | Value: !GetAtt CollectionStatusCheck.CollectionId 262 | 263 | CollectionEndpoint: 264 | Description: "OpenSearch Serverless Collection Endpoint" 265 | Value: !GetAtt CollectionStatusCheck.CollectionEndpoint 266 | 267 | DashboardEndpoint: 268 | Description: "OpenSearch Serverless Dashboard Endpoint" 269 | Value: !GetAtt CollectionStatusCheck.DashboardEndpoint 270 | 271 | CollectionARN: 272 | Description: "OpenSearch Serverless Collection ARN" 273 | Value: !Sub "arn:aws:aoss:${AWS::Region}:${AWS::AccountId}:collection/${CollectionStatusCheck.CollectionId}" -------------------------------------------------------------------------------- /workshops/diy-agents-with-sagemaker-and-bedrock/99-use-cases/text2dsl-mcp/mcp_dsl_server.py: -------------------------------------------------------------------------------- 1 | import json 2 | from mcp.server.fastmcp import Context, FastMCP 3 | 4 | 5 | # Create a named server 6 | COLLECTION_NAME = "agent-ws-collection" 7 | 8 | # Specify dependencies for deployment and development 9 | mcp = FastMCP("OpenSearch DSL Query App", dependencies=["pandas", "numpy"]) 10 | 11 | 12 | ############################################################## 13 | # Helper functions for OSS 14 | from opensearchpy import OpenSearch, RequestsHttpConnection, AWSV4SignerAuth 15 | import boto3 16 | import os 17 | import requests 18 | import json 19 | from retry import retry 20 | 21 | 22 | def get_opensearch_collection_endpoint(collection_name, region="us-west-2"): 23 | """ 24 | Get the OpenSearch Serverless collection endpoint from a collection name 25 | 26 | Args: 27 | collection_name (str): The name of the OpenSearch Serverless collection 28 | region (str, optional): AWS region. If None, uses the default region. 29 | 30 | Returns: 31 | dict: Dictionary containing collection endpoints and ID 32 | """ 33 | # Initialize the OpenSearch Serverless client 34 | aoss = boto3.client('opensearchserverless', region_name=region) 35 | service = 'aoss' 36 | session = boto3.Session(aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"], 37 | aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"], 38 | aws_session_token = os.environ["AWS_SESSION_TOKEN"], 39 | region_name=region) 40 | credentials =session.get_credentials() 41 | auth = AWSV4SignerAuth(credentials, region, service) 42 | try: 43 | # Use batch_get_collection to get collection details by name 44 | response = aoss.batch_get_collection(names=[collection_name]) 45 | 46 | # Check if collection was found 47 | if not response['collectionDetails']: 48 | raise ValueError(f"Collection '{collection_name}' not found") 49 | 50 | # Extract collection details 51 | collection = response['collectionDetails'][0] 52 | 53 | # Return the endpoints and ID 54 | return { 55 | 'collection_id': collection['id'], 56 | 'collection_endpoint': collection['collectionEndpoint'], 57 | 'dashboard_endpoint': collection['dashboardEndpoint'], 58 | 'collection_arn': collection['arn'] 59 | } 60 | 61 | except Exception as e: 62 | print(f"Error getting collection endpoint: {str(e)}") 63 | raise 64 | 65 | 66 | def query_opensearch_with_dsl(collection_endpoint, dsl_json, index_name = 'guardduty-index', region="us-west-2"): 67 | """ 68 | Query an OpenSearch index using DSL with the OpenSearch Python client 69 | 70 | Args: 71 | collection_endpoint (str): The OpenSearch collection endpoint (without https://) 72 | index_name (str): Name of the index to query 73 | dsl_json (dict): The OpenSearch DSL query 74 | region (str, optional): AWS region. If None, uses the default region. 75 | 76 | Returns: 77 | dict: Query results 78 | """ 79 | # Get AWS credentials 80 | session = boto3.Session(aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"], 81 | aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"], 82 | aws_session_token = os.environ["AWS_SESSION_TOKEN"], 83 | region_name=region) 84 | credentials = session.get_credentials() 85 | 86 | # Create the auth for OpenSearch 87 | auth = AWSV4SignerAuth(credentials, session.region_name, 'aoss') # Use 'aoss' for OpenSearch Serverless 88 | 89 | # Create the OpenSearch client 90 | client = OpenSearch( 91 | hosts=[{'host': collection_endpoint, 'port': 443}], 92 | http_auth=auth, 93 | use_ssl=True, 94 | verify_certs=True, 95 | connection_class=RequestsHttpConnection, 96 | pool_maxsize=20 97 | ) 98 | 99 | try: 100 | # Execute the search query 101 | response = client.search( 102 | body=dsl_json, 103 | index=index_name 104 | ) 105 | 106 | # Print summary of results 107 | hits = response['hits']['hits'] 108 | total = response['hits']['total']['value'] if isinstance(response['hits']['total'], dict) else response['hits']['total'] 109 | 110 | print(f"Found {total} documents in {index_name}") 111 | print(f"Showing top {len(hits)} results:") 112 | 113 | return hits 114 | except Exception as e: 115 | print(f"Error querying OpenSearch: {str(e)}") 116 | raise 117 | 118 | 119 | 120 | ################################## 121 | # TOOLS 122 | @mcp.tool() 123 | def query_dsl(dsl_json: dict): 124 | """Query input DSL to OpenSearch Collection. """ 125 | collection_endpoint = get_opensearch_collection_endpoint(COLLECTION_NAME)["collection_endpoint"].split("https://")[1] 126 | return query_opensearch_with_dsl(collection_endpoint, dsl_json) 127 | 128 | 129 | @mcp.tool() 130 | def get_index_schema(index_name: str) -> dict: 131 | """Return JSON schema of an index in the OpenSearch Collection """ 132 | with open(index_name+"-schema.json", "r") as f: 133 | schema = json.load(f) 134 | return schema 135 | 136 | 137 | @mcp.tool() 138 | def add_two_numbers(a: int, b: int) -> str: 139 | """Add two numbers""" 140 | return f"{a} + {b} = {a+b} : This is to show your MCP tool has been invoked successfully." 141 | 142 | 143 | if __name__ == "__main__": 144 | mcp.run() 145 | 146 | 147 | 148 | -------------------------------------------------------------------------------- /workshops/diy-agents-with-sagemaker-and-bedrock/99-use-cases/text2dsl-mcp/requirements.txt: -------------------------------------------------------------------------------- 1 | mcp==1.9.2 2 | mcp[cli] 3 | retry 4 | langchain_mcp_adapters 5 | langgraph 6 | opensearch-py 7 | strands-agents 8 | strands-agents-tools 9 | strands-agents-builder 10 | nest_asyncio 11 | -------------------------------------------------------------------------------- /workshops/diy-agents-with-sagemaker-and-bedrock/99-use-cases/text2sql/README.md: -------------------------------------------------------------------------------- 1 | # Database RAG & Text to SQL using LangChain & SQL agent 2 | 3 | In this lab, you'll explore how to extend RAG capabilities beyond unstructured document retrieval to include structured data sources. You'll learn how to implement text-to-SQL conversion, enabling natural language querying of database information alongside traditional document retrieval. This powerful combination allows for comprehensive enterprise search applications that can access both unstructured documents and structured database records. 4 | 5 | ## Learning Objectives 6 | 7 | By the end of this lab, you will be able to: 8 | 9 | - Set up an Athena database and AWS Glue crawler for structured data access 10 | - Implement text-to-SQL capabilities to query databases with natural language 11 | - Create a unified querying experience across documents and databases 12 | - Integrate structured data results with foundation model responses 13 | - Build a comprehensive enterprise search application 14 | 15 | ## Key Concepts 16 | 17 | ### Text-to-SQL 18 | 19 | Text-to-SQL converts natural language questions into structured SQL queries that can retrieve information from relational databases. This capability bridges the gap between how humans naturally ask questions and how databases store and access data. 20 | 21 | ### Structured Data Integration 22 | 23 | Combining structured database queries with unstructured document retrieval creates a comprehensive knowledge system that can leverage all available data sources in your organization. 24 | 25 | ### Enterprise Search 26 | 27 | Enterprise search applications need to access multiple data sources with different formats. RAG techniques can unify these diverse sources under a single natural language interface. 28 | 29 | ## Lab Structure 30 | 31 | This lab consists of 2 notebooks that guide you through implementing text-to-SQL capabilities: 32 | 33 | ## text2sql 34 | 35 | 1-create-db-tables.ipynb: Configure Athena database and AWS Glue crawler 36 | 2-text2sql-langchain: Use LangChain and SQL agent for text-to-SQL conversion 37 | 38 | ## Dataset 39 | 40 | In this lab, you'll work with: 41 | - A structured retail transaction dataset stored in Amazon S3 42 | - The dataset will be cataloged using AWS Glue and made queryable through Amazon Athena 43 | - You'll answer business questions that require accessing this structured data 44 | 45 | ## Getting Started 46 | 47 | Before beginning this lab, you should have: 48 | - Basic understanding of SQL and relational databases 49 | - Familiarity with AWS data analytics services 50 | 51 | To start working with the notebooks: 52 | 53 | 1. Navigate to the `workshops/diy-agents-with-sagemaker-and-bedrock/99-use-cases/text2sql` folder in the cloned repository 54 | 2. Open `1-create-db-tables` to begin setting up the Athena database and Glue Data Catalog 55 | 3. Follow each notebook sequentially to implement text-to-SQL capabilities 56 | 57 | 58 | ## Next Steps 59 | 60 | After completing all three labs, you'll have a comprehensive understanding of advanced RAG techniques that combine unstructured document retrieval, metadata filtering, safety guardrails, reranking, and structured data access. These capabilities form the foundation for building sophisticated enterprise AI applications. 61 | 62 | Happy learning! 63 | -------------------------------------------------------------------------------- /workshops/diy-agents-with-sagemaker-and-bedrock/99-use-cases/text2sql/requirements.txt: -------------------------------------------------------------------------------- 1 | boto3 2 | sqlalchemy 3 | langchain 4 | langchain-community 5 | langchain-aws 6 | PyAthena 7 | s3fs 8 | openpyxl 9 | -------------------------------------------------------------------------------- /workshops/diy-agents-with-sagemaker-and-bedrock/README.md: -------------------------------------------------------------------------------- 1 | # Building AI Agents with Amazon Bedrock and Amazon SageMaker AI 2 | 3 | > This content is available in [Workshop Studio](https://catalog.workshops.aws/building-effective-ai-agents-on-aws/). The below represents a synopsis of the content you will find by following the provided link. 4 | 5 | Welcome to the "Building AI Agents with Amazon Bedrock and Amazon SageMaker AI" workshop! This publicly available, hands-on experience is designed for builders who are ready to harness the power of Large Language Models (LLMs) and create autonomous AI agents on AWS. 6 | 7 | In this workshop, you'll dive into the world of AI agents, learning how to leverage AWS services to build AI systems that can act independently, make decisions, and complete complex tasks with minimal human intervention. 8 | 9 | By the end of this workshop, you'll be able to: 10 | 11 | - Understand and implement foundational LLM patterns with tools and retrieval 12 | - Build effective agentic workflows using proven patterns 13 | - Create autonomous agent systems for complex tasks 14 | - Choose and implement appropriate frameworks for your use case 15 | - Monitor and optimize your AI agent systems 16 | - Apply these patterns to real-world industry use cases 17 | 18 | ## Workshop Content 19 | 20 | 1. Build your autonomous agents and agentic workflows from scratch 21 | 2. Learn one or more open-source frameworks for Agentic AI (crew.AI, langgraph, smolagents, etc) 22 | 3. Deep dive on advanced topics like observability/tracing, guardrails, evaluation 23 | 4. Productionize agentic AI on AWS 24 | 5. Advanced use cases (MCP client/server on AWS, Natural Language 2 SQL, etc) 25 | 26 | ## How to run the workshop 27 | 28 | This workshop follows a hands-on, self-paced format. Each module contains Jupyter notebooks that you'll run in your own JupyterLab environment. The notebooks include: 29 | 30 | - Step-by-step instructions and explanations 31 | - Code samples that you can run and modify 32 | - Exercises to reinforce your learning 33 | - Links to additional resources 34 | -------------------------------------------------------------------------------- /workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/README.md: -------------------------------------------------------------------------------- 1 | # workshop-notebooks 2 | -------------------------------------------------------------------------------- /workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 110 | .pdm.toml 111 | .pdm-python 112 | .pdm-build/ 113 | 114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 115 | __pypackages__/ 116 | 117 | # Celery stuff 118 | celerybeat-schedule 119 | celerybeat.pid 120 | 121 | # SageMath parsed files 122 | *.sage.py 123 | 124 | # Environments 125 | .env 126 | .venv 127 | env/ 128 | venv/ 129 | ENV/ 130 | env.bak/ 131 | venv.bak/ 132 | 133 | # Spyder project settings 134 | .spyderproject 135 | .spyproject 136 | 137 | # Rope project settings 138 | .ropeproject 139 | 140 | # mkdocs documentation 141 | /site 142 | 143 | # mypy 144 | .mypy_cache/ 145 | .dmypy.json 146 | dmypy.json 147 | 148 | # Pyre type checker 149 | .pyre/ 150 | 151 | # pytype static type analyzer 152 | .pytype/ 153 | 154 | # Cython debug symbols 155 | cython_debug/ 156 | 157 | # PyCharm 158 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 159 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 160 | # and can be added to the global gitignore or merged into this file. For a more nuclear 161 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 162 | #.idea/ 163 | 164 | 165 | #mac metaata 166 | .DS_Store 167 | -------------------------------------------------------------------------------- /workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/README.md: -------------------------------------------------------------------------------- 1 | # Fiddler <> SageMaker Demo 2024 2 | 3 | For workshop admins, preconfigure a collection of Fiddler applications using the `AdminFiddlerSagemakerDemo.ipynb` notebook. 4 | 5 | For workshop users, get your SageMaker App User Profile from your workshop administrator then follow along the `FiddlerSagemakerDemo.ipynb` notebook. 6 | 7 | ## Running the Notebook Locally 8 | 9 | Run the following steps from a machine with access to a web browser. 10 | 11 | 1. Clone this repository and `cd` into it. 12 | 13 | ```shell 14 | git clone git@github.com:fiddler-labs/fiddler-demo-dec-2024.git 15 | cd fiddler-demo-dec-2024 16 | ``` 17 | 18 | 1. Install the version of Python that will be used in the workshop. 19 | 20 | MacOS: 21 | 22 | ```shell 23 | brew install python@3.12 24 | ``` 25 | 26 | Linux: 27 | 28 | ```shell 29 | sudo apt install python3.12 30 | ``` 31 | 32 | 1. Create a Python virtual environment and activate it. 33 | 34 | ```shell 35 | eval $(which python3.12) -m venv .venv 36 | source .venv/bin/activate 37 | ``` 38 | 39 | 1. Install JupyterLab and run it to open your browser to Jupyter Notebook. 40 | 41 | ```shell 42 | python -m pip install jupyterlab 43 | jupyter lab 44 | ``` 45 | 46 | 1. Select the `FiddlerSagemakerDemo.ipynb` Notebook from the sidebar and follow along! 47 | -------------------------------------------------------------------------------- /workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/assets/charts_llm.yaml: -------------------------------------------------------------------------------- 1 | charts: 2 | - data_source: 3 | filters: 4 | bin_size: Day 5 | time_label: 30d 6 | time_zone: America/Los_Angeles 7 | query_type: EMBEDDING 8 | queries: 9 | - column_name: PromptTextEmbedding 10 | plot: 3D 11 | metric_type: umap 12 | sample_size: 100 13 | query_key: dd4566c3-2ac8-49a8-b2b5-8ffb543a61bb 14 | retrieve_columns: ["user_input", "chatbot_response", "fdl_topics__chatbot_response__topic", "fdl_enrichment_qa_sentiment__chatbot_response__sentiment", "fdl_enrichment_qa_sentiment__user_input__sentiment", "session_id","timestamp","feedback","prompt_tokens","completion_tokens","total_tokens","srch_id"] 15 | minimum_distance: 0.5 16 | number_of_neighbors: 7 17 | model_name: trip_assistant_chatbot 18 | query_type: EMBEDDING 19 | description: UMAP Projection for User Input Embeddings 20 | options: 21 | color_by: feedback 22 | title: User Input UMAP 23 | - data_source: 24 | filters: 25 | bin_size: Day 26 | time_label: 30d 27 | time_zone: America/Los_Angeles 28 | query_type: EMBEDDING 29 | queries: 30 | - column_name: ResponseTextEmbedding 31 | plot: 3D 32 | metric_type: umap 33 | sample_size: 100 34 | query_key: dd4566c3-2ac8-49a8-b2b5-8ffb543a61bb 35 | retrieve_columns: ["user_input", "chatbot_response", "fdl_topics__chatbot_response__topic", "fdl_enrichment_qa_sentiment__chatbot_response__sentiment", "fdl_enrichment_qa_sentiment__user_input__sentiment", "session_id","timestamp","feedback","prompt_tokens","completion_tokens","total_tokens","srch_id"] 36 | minimum_distance: 0.5 37 | number_of_neighbors: 7 38 | model_name: trip_assistant_chatbot 39 | version: v1 40 | query_type: EMBEDDING 41 | description: UMAP Projection Chatbot Reponse Embeddings 42 | options: 43 | color_by: feedback 44 | title: Chatbot Response UMAP 45 | - data_source: 46 | filters: 47 | bin_size: Day 48 | time_label: 7d 49 | time_zone: America/Los_Angeles 50 | query_type: MONITORING 51 | queries: 52 | - columns: [] 53 | metric: Total Cost 54 | metric_type: custom 55 | query_key: dd4466c3-2ac8-49a8-b8b5-8ffa543a61b5 56 | viz_type: line 57 | model_name: trip_assistant_chatbot 58 | - columns: ['prompt_tokens', 'total_tokens', 'completion_tokens'] 59 | metric: "sum" 60 | metric_type: "statistic" 61 | query_key: dd4466c3-2ac8-49a8-b9b5-8ffa543a61b6 62 | viz_type: bar 63 | model_name: trip_assistant_chatbot 64 | description: Cost Tracker for Chatbot 65 | options: 66 | queryNames: 67 | dd4466c3-2ac8-49a8-b8b5-8ffa543a61b5: Total Cost in USD 68 | dd4466c3-2ac8-49a8-b9b5-8ffa543a61b6: Number of Tokens Used 69 | customYAxis: 70 | - query_keys: ['dd4466c3-2ac8-49a8-b8b5-8ffa543a61b5'] 71 | scale: value 72 | - query_keys: ['dd4466c3-2ac8-49a8-b9b5-8ffa543a61b6'] 73 | scale: value 74 | query_type: MONITORING 75 | title: Total Cost Tracker 76 | - data_source: 77 | filters: 78 | bin_size: Day 79 | time_label: 7d 80 | time_zone: America/Los_Angeles 81 | query_type: MONITORING 82 | queries: 83 | - columns: ["fdl_enrichment_qa_sentiment__user_input__sentiment_probability"] 84 | metric: average 85 | metric_type: statistic 86 | query_key: 334466c3-2ac8-49a8-b8b5-8ffa543a6112 87 | segment: "No Click" 88 | viz_type: line 89 | model_name: trip_assistant_chatbot 90 | - columns: ["fdl_enrichment_qa_sentiment__user_input__sentiment_probability"] 91 | metric: average 92 | metric_type: statistic 93 | query_key: 224466c3-2ac8-49a8-b8b5-8ffa543a6112 94 | segment: "Booked" 95 | viz_type: line 96 | model_name: trip_assistant_chatbot 97 | - columns: ["fdl_enrichment_qa_sentiment__user_input__sentiment_probability"] 98 | metric: average 99 | metric_type: statistic 100 | query_key: 114466c3-2ac8-49a8-b8b5-8ffa543a6112 101 | segment: "Click" 102 | viz_type: line 103 | model_name: trip_assistant_chatbot 104 | description: Daily user sentiment segmented by outcome 105 | options: 106 | queryNames: 107 | 334466c3-2ac8-49a8-b8b5-8ffa543a6112: No Click 108 | 224466c3-2ac8-49a8-b8b5-8ffa543a6112: Booked 109 | 114466c3-2ac8-49a8-b8b5-8ffa543a6112: Click 110 | query_type: MONITORING 111 | title: User Sentiment Tracker 112 | -------------------------------------------------------------------------------- /workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/assets/charts_ml.yaml: -------------------------------------------------------------------------------- 1 | charts: 2 | - data_source: 3 | filters: 4 | bin_size: Day 5 | time_label: 7d 6 | time_zone: America/Los_Angeles 7 | query_type: MONITORING 8 | queries: 9 | - columns: ['visitor_location_country_id'] 10 | categories: ["Brazil", "Canada", "China", "France", "India", "Japan", "Mexico", "South Africa", "UK", "USA"] 11 | metric: frequency 12 | metric_type: statistic 13 | query_key: c14666c3-2ac8-49a8-b2b5-8ffa543a61b6 14 | viz_type: bar 15 | model_name: search_ranking_trips 16 | description: Daily Visitor Country of Origin 17 | options: 18 | queryNames: 19 | c14666c3-2ac8-49a8-b2b5-8ffa543a61b6: "Country" 20 | query_type: MONITORING 21 | title: Visitor Geographical Distribution 22 | - data_source: 23 | filters: 24 | bin_size: Day 25 | time_label: 7d 26 | time_zone: America/Los_Angeles 27 | query_type: MONITORING 28 | queries: 29 | - columns: ['destination_country_id'] 30 | categories: ["USA", "UK", "Mexico", "Japan", "Canada"] 31 | metric: frequency 32 | metric_type: statistic 33 | query_key: c14666c3-2ac8-49a8-b2b5-8ffa543a61b8 34 | viz_type: bar 35 | model_name: search_ranking_trips 36 | description: Daily Volume of Request to Destinations 37 | options: 38 | queryNames: 39 | c14666c3-2ac8-49a8-b2b5-8ffa543a61b8: "Country" 40 | query_type: MONITORING 41 | title: Destination Geographical Distribution 42 | - data_source: 43 | filters: 44 | bin_size: Day 45 | time_label: 7d 46 | time_zone: America/Los_Angeles 47 | query_type: MONITORING 48 | queries: 49 | - columns: ["price_usd"] 50 | metric: "average" 51 | metric_type: "statistic" 52 | query_key: d4f6dffe-4aea-436e-aa48-01f72fc5c889 53 | segment: "No Click on Promo - USA" 54 | viz_type: "line" 55 | model_name: search_ranking_trips 56 | - columns: ["price_usd"] 57 | metric: "average" 58 | metric_type: "statistic" 59 | query_key: 2792ef36-4f71-495e-9b4f-c390c63b2f45 60 | segment: "Clicked on Promo - USA" 61 | viz_type: "line" 62 | model_name: search_ranking_trips 63 | description: Tracking effects on price changed on user interaction with listing 64 | options: 65 | queryNames: 66 | d4f6dffe-4aea-436e-aa48-01f72fc5c889: "Promo Not Clicked" 67 | 2792ef36-4f71-495e-9b4f-c390c63b2f45: "Clicked Promo" 68 | query_type: MONITORING 69 | title: Price effect on Clicks 70 | - data_source: 71 | filters: 72 | bin_size: Day 73 | time_label: 30d 74 | time_zone: America/Los_Angeles 75 | query_type: MONITORING 76 | queries: 77 | - columns: [] 78 | metric: map 79 | metric_type: performance 80 | query_key: c14666c3-2ac8-49a8-b2b5-8ffa543a61b1 81 | segment: "Traveling to USA" 82 | viz_type: line 83 | model_name: search_ranking_trips 84 | - columns: [] 85 | metric: map 86 | metric_type: performance 87 | query_key: c14666c3-2ac8-49a8-b2b5-8ffa543a61b2 88 | segment: "Traveling to UK" 89 | viz_type: line 90 | model_name: search_ranking_trips 91 | - columns: [] 92 | metric: map 93 | metric_type: performance 94 | query_key: c14666c3-2ac8-49a8-b2b5-8ffa543a61b3 95 | segment: "Traveling to Canada" 96 | viz_type: line 97 | model_name: search_ranking_trips 98 | - columns: [] 99 | metric: map 100 | metric_type: performance 101 | query_key: c14666c3-2ac8-49a8-b2b5-8ffa543a61b4 102 | segment: "Traveling to Japan" 103 | viz_type: line 104 | model_name: search_ranking_trips 105 | - columns: [] 106 | metric: map 107 | metric_type: performance 108 | query_key: c14666c3-2ac8-49a8-b2b5-8ffa543a61b5 109 | segment: "Traveling to Mexico" 110 | viz_type: line 111 | model_name: search_ranking_trips 112 | description: Daily Model Performance Segmented by Destination 113 | options: 114 | queryNames: 115 | c14666c3-2ac8-49a8-b2b5-8ffa543a61b1: "USA" 116 | c14666c3-2ac8-49a8-b2b5-8ffa543a61b2: "UK" 117 | c14666c3-2ac8-49a8-b2b5-8ffa543a61b3: "Canada" 118 | c14666c3-2ac8-49a8-b2b5-8ffa543a61b4: "Japan" 119 | c14666c3-2ac8-49a8-b2b5-8ffa543a61b5: "Mexico" 120 | query_type: MONITORING 121 | title: Model Performance Segmented by Destination 122 | -------------------------------------------------------------------------------- /workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/assets/llm_events.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/assets/llm_events.parquet -------------------------------------------------------------------------------- /workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/client/fiddler_client-3.7.0.dev4-py3-none-any.whl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/client/fiddler_client-3.7.0.dev4-py3-none-any.whl -------------------------------------------------------------------------------- /workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/client/sagemaker-2.227.1.dev0-py3-none-any.whl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/client/sagemaker-2.227.1.dev0-py3-none-any.whl -------------------------------------------------------------------------------- /workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/imgs/create_dashboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/imgs/create_dashboard.png -------------------------------------------------------------------------------- /workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/imgs/credentials_tab.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/imgs/credentials_tab.png -------------------------------------------------------------------------------- /workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/imgs/login_view.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/imgs/login_view.png -------------------------------------------------------------------------------- /workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/imgs/rca_drift.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/imgs/rca_drift.png -------------------------------------------------------------------------------- /workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/imgs/rca_events.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/imgs/rca_events.png -------------------------------------------------------------------------------- /workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/imgs/rca_init.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/imgs/rca_init.png -------------------------------------------------------------------------------- /workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/imgs/root_cause_analysis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/imgs/root_cause_analysis.png -------------------------------------------------------------------------------- /workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/imgs/settings_view.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/imgs/settings_view.png -------------------------------------------------------------------------------- /workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/imgs/view_dashboard.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/imgs/view_dashboard.gif -------------------------------------------------------------------------------- /workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/modules/__init__.py -------------------------------------------------------------------------------- /workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/modules/chart.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import yaml 3 | 4 | import fiddler as fdl 5 | from fiddler.libs.http_client import RequestClient 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | 10 | def add_chart(project: fdl.Project, model: fdl.Model, unique_id: str, client: RequestClient, chart: dict): 11 | charts_url = '/v3/charts' 12 | title = f'[{unique_id}] {chart.get("title")}' 13 | chart['title'] = title 14 | 15 | for index, query in enumerate(chart['data_source']['queries']): 16 | version = query.get('version', 'v1') 17 | query.update( 18 | { 19 | 'model': {'id': model.id, 'name': model.name}, 20 | 'model_name': model.name, 21 | 'version': version, 22 | } 23 | ) 24 | 25 | baseline_name = query.get('baseline_name') 26 | if baseline_name: 27 | baseline = fdl.Baseline.from_name(name=baseline_name, model_id=model.id) 28 | baseline_id = baseline.id 29 | query['baseline_id'] = baseline_id 30 | del query['baseline_name'] 31 | 32 | if query.get('metric_type') == 'custom': 33 | custom_metrics = fdl.CustomMetric.from_name( 34 | name=query.get('metric'), model_id=model.id 35 | ) 36 | query['metric'] = custom_metrics.id 37 | 38 | segment = query.get('segment') 39 | if segment: 40 | segment = fdl.Segment.from_name(name=segment, model_id=model.id) 41 | query['segment'] = {} 42 | query['segment']['id'] = segment.id 43 | 44 | chart['data_source']['queries'][index] = query 45 | chart['project_id'] = project.id 46 | client.post(url=charts_url, data=chart) 47 | 48 | 49 | def add_charts( 50 | project: fdl.Project, 51 | model: fdl.Model, 52 | unique_id: str, 53 | filename: str, 54 | fiddler_url: str, 55 | token: str, 56 | ) -> list: 57 | charts = None 58 | with open(filename, 'r') as stream: 59 | try: 60 | charts = yaml.safe_load(stream) 61 | except yaml.YAMLError as exc: 62 | print(exc) 63 | 64 | errors = [] 65 | if charts and len(charts) <= 0: 66 | print("no charts found") 67 | return [] 68 | 69 | client = RequestClient( 70 | fiddler_url, 71 | headers={ 72 | 'Content-Type': 'application/json', 73 | 'Authorization': f'Bearer {token}', 74 | }, 75 | ) 76 | 77 | for chart in charts.get('charts'): 78 | try: 79 | add_chart(project, model, unique_id, client, chart) 80 | except Exception as exc: 81 | message = f'Exception {str(exc)} for adding charts' 82 | logger.error(message) 83 | errors.append( 84 | { 85 | 'chart': 'chart', 86 | 'status': 'FAILED', 87 | 'message': message, 88 | } 89 | ) 90 | continue 91 | 92 | return errors 93 | -------------------------------------------------------------------------------- /workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/modules/config.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | def append_unique_name(prefix: str, suffix: str) -> str: 4 | return str((prefix + re.sub(r'[^0-9a-z]+', '_', suffix.lower())).strip()[:30]) 5 | 6 | PATH_TO_SAMPLE_RANKING_CSV = 'assets/search_ranking_sample.csv' 7 | PATH_TO_EVENTS_RANKING_CSV = 'assets/search_ranking_prod.csv' 8 | PATH_TO_SAMPLE_CHATBOT_CSV = 'assets/llm_events.parquet' 9 | 10 | PATH_TO_LLM_CHARTS = 'assets/charts_llm.yaml' 11 | PATH_TO_ML_CHARTS = 'assets/charts_ml.yaml' 12 | 13 | PROJECT_NAME_PREFIX = 'ai_travel_agent_' 14 | 15 | LLM_MODEL_NAME = 'assistant_chatbot' 16 | RANKING_MODEL_NAME = 'search_ranking' 17 | -------------------------------------------------------------------------------- /workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/modules/llm_onboard.py: -------------------------------------------------------------------------------- 1 | import fiddler as fdl 2 | import pandas as pd 3 | import numpy as np 4 | import modules.config as cfg 5 | import time 6 | 7 | 8 | def create_and_publish_llm(project, model_name: str) -> list[fdl.Model, str]: 9 | sample_data_df = pd.read_parquet(cfg.PATH_TO_SAMPLE_CHATBOT_CSV) 10 | 11 | sample_data_df['Enrichment Prompt Embedding'] = sample_data_df[ 12 | 'Enrichment Prompt Embedding' 13 | ].apply(lambda x: x.tolist()) 14 | sample_data_df['Enrichment Response Embedding'] = sample_data_df[ 15 | 'Enrichment Response Embedding' 16 | ].apply(lambda x: x.tolist()) 17 | 18 | fiddler_backend_enrichments = [ 19 | # prompt enrichment 20 | fdl.TextEmbedding( 21 | name='PromptTextEmbedding', 22 | source_column='user_input', 23 | column='Enrichment Prompt Embedding', 24 | n_tags=5, 25 | ), 26 | # response enrichment 27 | fdl.TextEmbedding( 28 | name='ResponseTextEmbedding', 29 | source_column='chatbot_response', 30 | column='Enrichment Response Embedding', 31 | n_tags=5, 32 | ), 33 | ] 34 | 35 | model_spec = fdl.ModelSpec( 36 | inputs=['user_input', 'chatbot_response'], 37 | metadata=list( 38 | sample_data_df.drop(['user_input', 'chatbot_response'], axis=1).columns 39 | ), 40 | custom_features=fiddler_backend_enrichments, 41 | ) 42 | 43 | model_task = fdl.ModelTask.LLM 44 | 45 | timestamp_column = 'timestamp' 46 | 47 | llm_application = None 48 | # Create model 49 | try: 50 | llm_application = fdl.Model.from_data( 51 | source=sample_data_df, 52 | name=model_name, 53 | project_id=project.id, 54 | spec=model_spec, 55 | task=model_task, 56 | event_ts_col=timestamp_column, 57 | max_cardinality=3, 58 | ) 59 | llm_application.create() 60 | except fdl.Conflict: 61 | llm_application = fdl.Model.from_name( 62 | name=model_name, 63 | project_id=project.id, 64 | ) 65 | 66 | print( 67 | f'LLM application registered with id = {llm_application.id} and name = {llm_application.name}' 68 | ) 69 | 70 | segment_definitions = [ 71 | ("Click", "User clicked", "result=='click'"), 72 | ("No Click", "User did not click", "result=='no_click'"), 73 | ("Booked", "User Booked", "result=='booked'"), 74 | ("Liked Answers", "User Liked Answers", "feedback=='like'"), 75 | ("Disliked Answers", "User Disliked Answers", "feedback=='dislike'") 76 | ] 77 | 78 | for name, description, definition in segment_definitions: 79 | try: 80 | fdl.Segment( 81 | name=name, 82 | model_id=llm_application.id, 83 | description=description, 84 | definition=definition, 85 | ).create() 86 | except fdl.Conflict: 87 | print(f"Segment '{name}' already exists.") 88 | 89 | custom_metrics = [ 90 | ("Total Cost", "Cost in USD", "sum((prompt_tokens*0.01)+(completion_tokens*0.03))"), 91 | ("Prompt Token Cost", "Cost in USD", "sum((prompt_tokens*0.01))"), 92 | ("Response Token Cost", "Cost in USD", "sum((completion_tokens*0.03))"), 93 | ] 94 | 95 | for name, description, definition in custom_metrics: 96 | try: 97 | fdl.CustomMetric( 98 | name=name, 99 | model_id=llm_application.id, 100 | description=description, 101 | definition=definition, 102 | ).create() 103 | except fdl.Conflict: 104 | print(f"Custom Metric '{name}' already exists.") 105 | 106 | llm_events_df = sample_data_df 107 | # Timeshifting the timestamp column in the events file so the events are as recent as today 108 | llm_events_df['timestamp'] = np.linspace( 109 | int(time.time()) - (5 * 24 * 60 * 60), int(time.time()), num=llm_events_df.shape[0] 110 | ) 111 | 112 | print('Printing sample dataset...') 113 | print(llm_events_df.head(10).to_markdown()) 114 | return llm_application, llm_application.publish(llm_events_df).id 115 | -------------------------------------------------------------------------------- /workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/modules/ml_onboard.py: -------------------------------------------------------------------------------- 1 | import fiddler as fdl 2 | from typing import List 3 | 4 | 5 | def create_country_segments(model_id: int, countries: List[str], prop_countries: List[str]): 6 | for country in prop_countries: 7 | try: 8 | fdl.Segment( 9 | name=f'Traveling to {country}', 10 | model_id=model_id, 11 | description=f'Search Queries for trips to {country}', 12 | definition=f'destination_country_id==\'{country}\'', 13 | ).create() 14 | except fdl.Conflict: 15 | print(f"Segment 'Traveling to {country}' already exists.") 16 | 17 | for country in countries: 18 | try: 19 | fdl.Segment( 20 | name=f'Visitor from {country}', 21 | model_id=model_id, 22 | description=f'Segment for visitors from {country}', 23 | definition=f'visitor_location_country_id==\'{country}\'', 24 | ).create() 25 | except fdl.Conflict: 26 | print(f"Segment 'Visitor from {country}' already exists.") 27 | 28 | predefined_segments = [ 29 | ( 30 | "No Click on Promo - USA", 31 | "Segment for visitors from USA with no click on promo", 32 | """user_interaction==0 and visitor_location_country_id=='USA'""" 33 | ), 34 | ( 35 | "Clicked on Promo - USA", 36 | "Segment for visitors from USA who clicked on promo", 37 | """user_interaction==1 and visitor_location_country_id=='USA'""" 38 | ), 39 | ( 40 | "Clicked Promo", 41 | "Segment for visitors who clicked on promo", 42 | """user_interaction==1""" 43 | ), 44 | ( 45 | "No Click on Promo", 46 | "Segment for visitors with no click on promo", 47 | """user_interaction==0""" 48 | ), 49 | ] 50 | 51 | for name, description, definition in predefined_segments: 52 | try: 53 | fdl.Segment( 54 | name=name, 55 | model_id=model_id, 56 | description=description, 57 | definition=definition, 58 | ).create() 59 | except fdl.Conflict: 60 | print(f"Segment '{name}' already exists.") 61 | -------------------------------------------------------------------------------- /workshops/fine-tuning-with-sagemakerai-and-bedrock/media/smbanner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/fine-tuning-with-sagemakerai-and-bedrock/media/smbanner.png -------------------------------------------------------------------------------- /workshops/fine-tuning-with-sagemakerai-and-bedrock/task_01_foundation_model_playground/scripts/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers==4.48.2 2 | peft==0.14.0 3 | accelerate==1.3.0 4 | bitsandbytes==0.45.1 5 | datasets==3.2.0 6 | evaluate==0.4.3 7 | huggingface_hub[hf_transfer] 8 | mlflow 9 | safetensors>=0.4.5 10 | sagemaker==2.239.0 11 | sagemaker-mlflow==0.1.0 12 | sentencepiece==0.2.0 13 | scikit-learn==1.6.1 14 | tokenizers>=0.21.0 15 | trl==0.9.6 16 | py7zr -------------------------------------------------------------------------------- /workshops/fine-tuning-with-sagemakerai-and-bedrock/task_02_customize_foundation_model/scripts/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers==4.50.2 2 | peft==0.14.0 3 | accelerate==1.3.0 4 | bitsandbytes==0.45.1 5 | datasets==3.2.0 6 | evaluate==0.4.3 7 | huggingface_hub[hf_transfer] 8 | mlflow 9 | safetensors>=0.5.2 10 | sagemaker==2.244.0 11 | sagemaker-mlflow==0.1.0 12 | sentencepiece==0.2.0 13 | scikit-learn==1.6.1 14 | tokenizers>=0.21.0 15 | trl==0.9.6 16 | psutil 17 | py7zr 18 | pynvml 19 | xtarfile 20 | rouge-score -------------------------------------------------------------------------------- /workshops/fine-tuning-with-sagemakerai-and-bedrock/task_03_foundation_model_evaluation/images/sft_1000_train_50_test_bars.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/fine-tuning-with-sagemakerai-and-bedrock/task_03_foundation_model_evaluation/images/sft_1000_train_50_test_bars.png -------------------------------------------------------------------------------- /workshops/fine-tuning-with-sagemakerai-and-bedrock/task_03_foundation_model_evaluation/images/sft_1000_train_50_test_compare.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/fine-tuning-with-sagemakerai-and-bedrock/task_03_foundation_model_evaluation/images/sft_1000_train_50_test_compare.png -------------------------------------------------------------------------------- /workshops/fine-tuning-with-sagemakerai-and-bedrock/task_03_foundation_model_evaluation/images/sft_1000_train_50_test_scores.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/fine-tuning-with-sagemakerai-and-bedrock/task_03_foundation_model_evaluation/images/sft_1000_train_50_test_scores.png -------------------------------------------------------------------------------- /workshops/fine-tuning-with-sagemakerai-and-bedrock/task_03_foundation_model_evaluation/images/sft_5000_train_100_test_bars.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/fine-tuning-with-sagemakerai-and-bedrock/task_03_foundation_model_evaluation/images/sft_5000_train_100_test_bars.png -------------------------------------------------------------------------------- /workshops/fine-tuning-with-sagemakerai-and-bedrock/task_03_foundation_model_evaluation/images/sft_5000_train_100_test_compare.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/fine-tuning-with-sagemakerai-and-bedrock/task_03_foundation_model_evaluation/images/sft_5000_train_100_test_compare.png -------------------------------------------------------------------------------- /workshops/fine-tuning-with-sagemakerai-and-bedrock/task_03_foundation_model_evaluation/images/sft_5000_train_100_test_scores.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/fine-tuning-with-sagemakerai-and-bedrock/task_03_foundation_model_evaluation/images/sft_5000_train_100_test_scores.png -------------------------------------------------------------------------------- /workshops/fine-tuning-with-sagemakerai-and-bedrock/task_04_responsible_ai/images/applyguardrail.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/fine-tuning-with-sagemakerai-and-bedrock/task_04_responsible_ai/images/applyguardrail.png -------------------------------------------------------------------------------- /workshops/fine-tuning-with-sagemakerai-and-bedrock/task_05_fmops/config.yaml: -------------------------------------------------------------------------------- 1 | SchemaVersion: '1.0' 2 | SageMaker: 3 | PythonSDK: 4 | Modules: 5 | RemoteFunction: 6 | # role arn is not required if in SageMaker Notebook instance or SageMaker Studio 7 | # Uncomment the following line and replace with the right execution role if in a local IDE 8 | # RoleArn: 9 | InstanceType: ml.m5.xlarge 10 | Dependencies: ./scripts/requirements.txt 11 | IncludeLocalWorkDir: true 12 | CustomFileFilter: 13 | IgnoreNamePatterns: # files or directories to ignore 14 | - "*.ipynb" # all notebook files 15 | 16 | -------------------------------------------------------------------------------- /workshops/fine-tuning-with-sagemakerai-and-bedrock/task_05_fmops/eval/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets==3.5.0 2 | pandas==2.2.3 3 | matplotlib==3.10.1 4 | numpy==1.26.4 5 | boto3==1.37.1 6 | tqdm==4.67.1 7 | lighteval[math]==0.9.2 8 | torch 9 | torchvision -------------------------------------------------------------------------------- /workshops/fine-tuning-with-sagemakerai-and-bedrock/task_05_fmops/ml-16670-arch-with-mlflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/fine-tuning-with-sagemakerai-and-bedrock/task_05_fmops/ml-16670-arch-with-mlflow.png -------------------------------------------------------------------------------- /workshops/fine-tuning-with-sagemakerai-and-bedrock/task_05_fmops/scripts/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers==4.50.2 2 | peft==0.14.0 3 | accelerate==1.3.0 4 | bitsandbytes==0.45.1 5 | datasets==3.2.0 6 | evaluate==0.4.3 7 | huggingface_hub[hf_transfer] 8 | mlflow 9 | safetensors>=0.5.2 10 | sagemaker==2.244.0 11 | sagemaker-mlflow==0.1.0 12 | sentencepiece==0.2.0 13 | scikit-learn==1.6.1 14 | tokenizers>=0.21.0 15 | trl==0.9.6 16 | psutil 17 | py7zr 18 | pynvml 19 | xtarfile 20 | rouge-score -------------------------------------------------------------------------------- /workshops/fine-tuning-with-sagemakerai-and-bedrock/task_05_fmops/steps/evaluation_mlflow.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import sagemaker 3 | from sagemaker.s3_utils import parse_s3_url 4 | import mlflow 5 | import tempfile 6 | from pathlib import Path 7 | import pandas as pd 8 | import json 9 | from dataclasses import dataclass 10 | from typing import Tuple, Optional 11 | import json 12 | 13 | 14 | def evaluation(model, preprocess_step_ret, finetune_ret, mlflow_arn, experiment_name, run_id): 15 | mlflow.set_tracking_uri(mlflow_arn) 16 | mlflow.set_experiment(experiment_name) 17 | 18 | print(preprocess_step_ret['run_id']) 19 | 20 | with mlflow.start_run(run_id=preprocess_step_ret['run_id']) as run: 21 | s3 = boto3.client("s3") 22 | sess = sagemaker.Session() 23 | 24 | dataset_info = mlflow.get_run(preprocess_step_ret['run_id']).inputs.dataset_inputs[1].dataset 25 | 26 | print(dataset_info) 27 | print(f"Dataset name: {dataset_info.name}") 28 | print(f"Dataset digest: {dataset_info.digest}") 29 | print(f"Dataset profile: {dataset_info.profile}") 30 | print(f"Dataset schema: {dataset_info.schema}") 31 | 32 | dataset_source = mlflow.data.get_source(dataset_info) 33 | ds = dataset_source.load() 34 | # get the bucket name using full s3 poth 35 | 36 | eval_data=pd.read_json(ds, orient='records', lines=True) 37 | 38 | data = [] 39 | for index, row in eval_data.iterrows(): 40 | for message in row['messages']: 41 | if message["role"] == "user": 42 | question = message["content"] 43 | elif message["role"] == "assistant": 44 | answer = message["content"] 45 | data.append({"question": question, "answer": answer}) 46 | 47 | df = pd.DataFrame(data, columns=["question", "answer"]) 48 | print(df.head()) 49 | 50 | 51 | logged_model = f"runs:/{preprocess_step_ret['run_id']}/model" 52 | loaded_model = mlflow.pyfunc.load_model(model_uri=logged_model) 53 | results = mlflow.evaluate( 54 | model=loaded_model, 55 | data=df, 56 | targets="answer", 57 | model_type="question-answering", 58 | evaluator_config={"col_mapping": {"inputs": "question"}}, 59 | ) 60 | print(results.metrics) 61 | return "done" -------------------------------------------------------------------------------- /workshops/fine-tuning-with-sagemakerai-and-bedrock/task_05_fmops/steps/finetune_llama3b_hf.py: -------------------------------------------------------------------------------- 1 | from steps.utils import endpoint_exists 2 | from sagemaker.jumpstart.estimator import JumpStartEstimator 3 | from sagemaker.huggingface import HuggingFace 4 | from huggingface_hub import HfFolder 5 | import mlflow 6 | import time 7 | import json 8 | import boto3 9 | 10 | def finetune_llama3b(preprocess_step_ret, train_config, lora_config, role, mlflow_arn, experiment_name,run_name, *args): 11 | 12 | mlflow.set_tracking_uri(mlflow_arn) 13 | mlflow.set_experiment(experiment_name) 14 | 15 | with mlflow.start_run(run_id=preprocess_step_ret['run_id']) as run: 16 | 17 | model_id = train_config["model_id"] 18 | endpoint_name = train_config["endpoint_name"] 19 | instance_type = train_config["finetune_instance_type"] 20 | num_instances = train_config["finetune_num_instances"] 21 | epoch = train_config["epoch"] 22 | per_device_train_batch_size = train_config["per_device_train_batch_size"] 23 | 24 | lora_config = json.loads(lora_config) 25 | 26 | lora_r = lora_config["lora_r"] 27 | lora_alpha = lora_config["lora_alpha"] 28 | lora_dropout = lora_config["lora_dropout"] 29 | 30 | train_data_path = preprocess_step_ret["training_input_path"] 31 | 32 | training_job_name = f'huggingface-qlora-{train_config["epoch"]}-{lora_config["lora_r"]}-{time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())}' 33 | 34 | hyperparameters = { 35 | 'model_id': model_id, # pre-trained model 36 | 'dataset_path': '/opt/ml/input/data/training', # path where sagemaker will save training dataset 37 | 'epochs': epoch, # number of training epochs 38 | 'per_device_train_batch_size': per_device_train_batch_size, # batch size for training 39 | 'lr': 2e-4, # learning rate used during training 40 | 'hf_token': "", # huggingface token to access llama 2 41 | 'merge_weights': True, # whether to merge LoRA into the model 42 | 'lora_r': lora_r, 43 | 'lora_alpha': lora_alpha, 44 | 'lora_dropout': lora_dropout, 45 | 'mlflow_arn': mlflow_arn, 46 | 'experiment_name': experiment_name, 47 | 'run_id': preprocess_step_ret['run_id'] 48 | } 49 | 50 | # Add SageMaker environment variables to help with debugging 51 | environment = { 52 | "HUGGINGFACE_HUB_CACHE": "/tmp/.cache", 53 | "NCCL_DEBUG": "INFO", # Helps debug NCCL issues 54 | "NCCL_P2P_DISABLE": "1", # Can help with some networking issues 55 | "PYTORCH_CUDA_ALLOC_CONF": "max_split_size_mb:512" # Helps with memory management 56 | } 57 | 58 | if endpoint_exists(endpoint_name): 59 | print("Endpoint already exists") 60 | training_job_name = None 61 | else: 62 | # Define distributed training configuration 63 | distribution = { 64 | 'torch_distributed': { 65 | 'enabled': True 66 | } 67 | } 68 | 69 | huggingface_estimator = HuggingFace( 70 | entry_point='llama3_fine_tuning.py', # train script 71 | source_dir='scripts', # directory which includes all the files needed for training 72 | instance_type=instance_type, # instances type used for the training job 73 | instance_count=num_instances, # the number of instances used for training 74 | base_job_name=training_job_name, # the name of the training job 75 | role=role, # IAM role used in training job to access AWS resources 76 | volume_size=300, # the size of the EBS volume in GB 77 | py_version='py311', # the python version used in the training job 78 | hyperparameters=hyperparameters, # the hyperparameters passed to the training job 79 | environment=environment, 80 | distribution=distribution, # Added distributed training config 81 | image_uri=f'763104351884.dkr.ecr.{boto3.session.Session().region_name}.amazonaws.com/pytorch-training:2.5.1-gpu-py311-cu124-ubuntu22.04-sagemaker', 82 | metric_definitions=[ 83 | {'Name': 'huggingface-textgeneration:loss', 'Regex': "'loss':\s*([0-9.]+)"}, 84 | {'Name': 'huggingface-textgeneration:epoch', 'Regex': "'epoch':\s*([0-9.]+)"}, 85 | {'Name': 'huggingface-textgeneration:train_loss', 'Regex': "'train_loss':\s*([0-9.]+)"}, 86 | ] 87 | ) 88 | 89 | data = {'training': train_data_path} 90 | 91 | # starting the train job with our uploaded datasets as input 92 | huggingface_estimator.fit(data, wait=True) 93 | 94 | training_job_name = huggingface_estimator.latest_training_job.name 95 | 96 | return {"training_job_name": training_job_name, "run_id": preprocess_step_ret['run_id']} -------------------------------------------------------------------------------- /workshops/fine-tuning-with-sagemakerai-and-bedrock/task_05_fmops/steps/preprocess_llama3.py: -------------------------------------------------------------------------------- 1 | # Temporary preprocess step (to be changed with new dataset) 2 | import boto3 3 | import pandas as pd 4 | from datasets import load_dataset 5 | from datasets import Dataset 6 | from random import randint 7 | import mlflow 8 | import json 9 | 10 | 11 | system_message = """You are Llama, an AI assistant. Your knowledge spans a wide range of topics, allowing you to anser the questions with honesty and truthfulness.""" 12 | 13 | def create_conversation(sample): 14 | if sample["messages"][0]["role"] == "system": 15 | return sample 16 | else: 17 | sample["messages"] = [{"role": "system", "content": system_message}] + sample["messages"] 18 | return sample 19 | 20 | def preprocess(s3_bucket, dataset_name, train_sample, eval_sample, mlflow_arn, experiment_name, run_name): 21 | 22 | mlflow.set_tracking_uri(mlflow_arn) 23 | mlflow.set_experiment(experiment_name) 24 | 25 | 26 | # This is a very simple example, you can add your own data processing code here 27 | dataset = load_dataset(dataset_name) 28 | dataset = dataset.filter(lambda x: x['category'] == 'Open QA') 29 | 30 | columns_to_remove = list(dataset["train"].features) 31 | columns_to_remove.remove("messages") 32 | dataset = dataset.map(create_conversation, remove_columns=columns_to_remove,batched=False) 33 | 34 | dataset["train"] = dataset["train"].filter(lambda x: len(x["messages"][1:]) % 2 == 0) 35 | dataset["test"] = dataset["test"].filter(lambda x: len(x["messages"][1:]) % 2 == 0) 36 | 37 | dataset["train"].to_json("train_dataset.json", orient="records", force_ascii=False) 38 | dataset["test"].to_json("test_dataset.json", orient="records", force_ascii=False) 39 | 40 | # save training and test data to s3 41 | s3 = boto3.client("s3") 42 | s3.upload_file("train_dataset.json", s3_bucket, f"dataset/{dataset_name}/{train_sample}/train/train_dataset.json") 43 | s3.upload_file("test_dataset.json", s3_bucket, f"dataset/{dataset_name}/{eval_sample}/eval/eval_dataset.json") 44 | 45 | 46 | training_input_path = f's3://{s3_bucket}/dataset/{dataset_name}/{train_sample}/train/train_dataset.json' 47 | eval_input_path = f's3://{s3_bucket}/dataset/{dataset_name}/{eval_sample}/eval/eval_dataset.json' 48 | 49 | with mlflow.start_run(run_name=run_name) as run: 50 | 51 | run_id = run.info.run_id 52 | print(run_id) 53 | 54 | # create pandas dataframe from train json 55 | df_train = pd.read_json("train_dataset.json", orient="records", lines=True) 56 | df_evaluate = pd.read_json("test_dataset.json", orient="records", lines=True) 57 | 58 | training_data = mlflow.data.from_pandas(df_train, source=training_input_path) 59 | mlflow.log_input(training_data, context="training") 60 | 61 | evaluation_data = mlflow.data.from_pandas(df_evaluate, source=eval_input_path) 62 | mlflow.log_input(evaluation_data, context="evaluation") 63 | 64 | return {"training_input_path": training_input_path, "eval_input_path": eval_input_path, "run_id": run_id} 65 | -------------------------------------------------------------------------------- /workshops/fine-tuning-with-sagemakerai-and-bedrock/task_05_fmops/steps/utils.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | from datetime import datetime 3 | 4 | 5 | def endpoint_exists(endpoint_name): 6 | endpoint_exist = False 7 | 8 | client = boto3.client('sagemaker') 9 | response = client.list_endpoints() 10 | endpoints = response["Endpoints"] 11 | 12 | for endpoint in endpoints: 13 | if endpoint_name == endpoint["EndpointName"]: 14 | endpoint_exist = True 15 | break 16 | 17 | return endpoint_exist 18 | 19 | def create_training_job_name(model_id): 20 | return f"{model_id}-{datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')[:-3]}" 21 | -------------------------------------------------------------------------------- /workshops/fine-tuning-with-sagemakerai-and-bedrock/utilities/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/fine-tuning-with-sagemakerai-and-bedrock/utilities/__init__.py -------------------------------------------------------------------------------- /workshops/fine-tuning-with-sagemakerai-and-bedrock/utilities/helpers.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import mlflow 4 | from datetime import datetime 5 | from typing import List, Dict 6 | from langchain import PromptTemplate 7 | from langchain.prompts.few_shot import FewShotPromptTemplate 8 | from langchain.llms import SagemakerEndpoint 9 | from langchain.llms.sagemaker_endpoint import LLMContentHandler 10 | from langchain.chains import LLMChain 11 | from IPython.display import ( 12 | display, 13 | Markdown, 14 | HTML 15 | ) 16 | 17 | 18 | def pretty_print_html(text): 19 | # Replace newline characters with
tags 20 | html_text = text.replace('\n', '
') 21 | # Apply HTML formatting 22 | html_formatted = f'
{html_text}
' 23 | # Display the formatted HTML 24 | return HTML(html_formatted) 25 | 26 | 27 | def set_meta_llama_params( 28 | max_new_tokens=512, 29 | top_p=0.9, 30 | temperature=0.6, 31 | ): 32 | """ set Llama parameters """ 33 | llama_params = {} 34 | llama_params['max_new_tokens'] = max_new_tokens 35 | llama_params['top_p'] = top_p 36 | llama_params['temperature'] = temperature 37 | return llama_params 38 | 39 | 40 | def print_dialog(inputs, payload, response): 41 | dialog_output = [] 42 | for msg in inputs: 43 | dialog_output.append(f"**{msg['role'].upper()}**: {msg['content']}\n") 44 | dialog_output.append(f"**ASSISTANT**: {response['generated_text']}") 45 | dialog_output.append("\n---\n") 46 | 47 | display(Markdown('\n'.join(dialog_output))) 48 | 49 | def format_messages(messages: List[Dict[str, str]]) -> List[str]: 50 | """ 51 | Format messages for Llama 3+ chat models. 52 | 53 | The model only supports 'system', 'user' and 'assistant' roles, starting with 'system', then 'user' and 54 | alternating (u/a/u/a/u...). The last message must be from 'user'. 55 | """ 56 | # auto assistant suffix 57 | # messages.append({"role": "assistant"}) 58 | 59 | output = "<|begin_of_text|>" 60 | # Adding the inferred prefix 61 | _system_prefix = f"\n\nCutting Knowledge Date: December 2023\nToday Date: {datetime.now().strftime('%d %b %Y')}\n\n" 62 | for i, entry in enumerate(messages): 63 | output += f"<|start_header_id|>{entry['role']}<|end_header_id|>" 64 | if i == 0: 65 | output += f"{_system_prefix}{entry['content']}<|eot_id|>" 66 | elif i >= 1 and 'content' in entry: 67 | output += f"\n\n{entry['content']}<|eot_id|>" 68 | output += "<|start_header_id|>assistant<|end_header_id|>\n" 69 | return output 70 | 71 | 72 | def write_eula(attribute): 73 | os.makedirs("/home/sagemaker-user/.license/", exist_ok=True) 74 | f = open("/home/sagemaker-user/.license/llama-license.txt", "w") 75 | f.write(attribute) 76 | f.close() 77 | return 0 78 | 79 | 80 | def read_eula(): 81 | attribute = open("/home/sagemaker-user/.license/llama-license.txt", "r").read() 82 | assert attribute == "True", f"Llama EULA set to {attribute}! Please review EULA to continue!" 83 | return attribute 84 | 85 | 86 | class ContentHandlerwithTracking(LLMContentHandler): 87 | content_type = "application/json" 88 | accepts = "application/json" 89 | 90 | def __init__(self, experiment_name): 91 | self.mlflow_experiment_name = experiment_name 92 | print(f"Sending experiments to : {self.mlflow_experiment_name}") 93 | self.experiment_online_info = mlflow.set_experiment(self.mlflow_experiment_name) 94 | self.run_id_ephemeral = None 95 | 96 | def transform_input(self, prompt, model_kwargs): 97 | with mlflow.start_run( 98 | experiment_id=self.experiment_online_info.experiment_id, 99 | run_name=f"lc-{datetime.now().strftime('%Y%m%d%H%M%S%f')}" 100 | ) as run: 101 | base_input = [{"role" : "user", "content" : prompt}] 102 | optz_input = format_messages(base_input) 103 | input_str = json.dumps( 104 | { 105 | "inputs" : optz_input, 106 | "parameters" : {**model_kwargs} 107 | } 108 | ) 109 | # track prompts 110 | # mlflow.log_param("SystemPrompt", instruction) 111 | mlflow.log_param("UserPrompt", optz_input) 112 | mlflow.log_param("parameters", {**model_kwargs}) 113 | 114 | self.run_id_ephemeral = run.info.run_id 115 | 116 | return input_str.encode('utf-8') 117 | 118 | def transform_output(self, output): 119 | with mlflow.start_run( 120 | experiment_id=self.experiment_online_info.experiment_id, 121 | run_id=self.run_id_ephemeral 122 | ) as run: 123 | response_json = json.loads(output.read().decode("utf-8")) 124 | mlflow.log_param("ModelResponse", response_json["generated_text"]) 125 | return response_json["generated_text"] 126 | --------------------------------------------------------------------------------