├── .gitignore
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
└── workshops
    ├── building-rag-workflows-with-sagemaker-and-bedrock
        ├── 00-00_prerequisites
        │   └── prerequisites.ipynb
        ├── 01-01_local-experimentation
        │   ├── building-an-experimental-rag-app.ipynb
        │   └── utils
        │   │   ├── TokenCounterHandler.py
        │   │   └── __init__.py
        ├── 02-01_sagemaker-opensearch-rag
        │   ├── SageMak-Embedding-Model-OpenSearch.ipynb
        │   ├── extracted_context.json
        │   ├── images
        │   │   └── LangfuseTraces.png
        │   └── ori_pqal_10_records.csv
        ├── 03-02_fine-tuning-embedding
        │   ├── 01-ft_embedding_with_sagemaker_eval.ipynb
        │   ├── 02-embeddings-eval.ipynb
        │   ├── images
        │   │   ├── eval2-31k_context-5k_sample.png
        │   │   ├── training-9000_test-20000.png
        │   │   └── training-9000_test-50000.png
        │   ├── requirements.txt
        │   └── scripts
        │   │   └── trainer.py
        ├── 03-03_raft-customization
        │   ├── 01-build_raft_dataset.ipynb
        │   ├── 02-raft_finetune.ipynb
        │   ├── 03-deploy_and_evaluate_models.ipynb
        │   ├── 04-evaluate.ipynb
        │   ├── 05-deploy-to-BR.ipynb
        │   ├── eval.json
        │   ├── full_eval.json
        │   ├── images
        │   │   ├── x1.png
        │   │   ├── x5.png
        │   │   └── x6.png
        │   └── scripts
        │   │   ├── launch_fsdp_qlora.py
        │   │   ├── merge_model_adapter.py
        │   │   └── requirements.txt
        ├── 04-01_guardrails
        │   └── sagemaker-inference-bedrock-guardrails-medical-theme.ipynb
        ├── README.md
        ├── SageMaker Training Hosting and Custom Model Import_V3.pdf
        └── images
        │   └── rag-mind-map.png
    ├── distributed-training-deployment-on-sagemaker-ai
        ├── README.md
        ├── solution-1-sagemaker-jumpstart
        │   ├── jumpstart-llama3.1-8b-instruct-ft.ipynb
        │   └── telco_promotions.json
        ├── solution-2-sagemaker-training
        │   ├── option-1-continued-pre-training
        │   │   ├── model-trainer-fsdp-qlora.ipynb
        │   │   └── scripts
        │   │   │   ├── requirements.txt
        │   │   │   └── train.py
        │   └── option-2-supervised-fine-tuning
        │   │   ├── model-trainer-fsdp-qlora.ipynb
        │   │   └── scripts
        │   │       ├── requirements.txt
        │   │       ├── rouge
        │   │           └── rouge.py
        │   │       ├── rouge_evaluation.py
        │   │       └── train.py
        └── solution-3-sagemaker-hyperpod-k8
        │   ├── README.md
        │   ├── args.yaml
        │   ├── download_model.py
        │   ├── pod-finetuning.yaml
        │   ├── requirements.txt
        │   └── scripts
        │       ├── dataprep.py
        │       └── train.py
    ├── diy-agents-with-sagemaker-and-bedrock
        ├── 0-setup
        │   └── setup-sagemaker-endpoint.ipynb
        ├── 1-inference
        │   ├── 1-inference-bedrock.ipynb
        │   ├── 2-inference-sagemaker.ipynb
        │   └── README.md
        ├── 2-tool-calling
        │   ├── 1-tool-calling-bedrock.ipynb
        │   ├── 2-tool-calling-sagemaker.ipynb
        │   └── README.md
        ├── 3-agent-patterns
        │   ├── README.md
        │   ├── autonomous_agent.ipynb
        │   ├── basic_workflows.ipynb
        │   ├── evaluator_optimizer.ipynb
        │   └── orchestrator_workers.ipynb
        ├── 4-frameworks
        │   ├── README.md
        │   ├── agno-ai
        │   │   └── agno-ai-logistics.ipynb
        │   ├── crewai
        │   │   ├── crewAI-langfuse-observability.ipynb
        │   │   ├── crewAI-travel-agent-hierarchical.ipynb
        │   │   ├── crewai-requirements.txt
        │   │   ├── crewai-travel-agent-sequential.ipynb
        │   │   └── crewai-travel-flows.ipynb
        │   ├── langgraph
        │   │   ├── langgraph-hierarchical-agent-teams.ipynb
        │   │   ├── langgraph-requirements.txt
        │   │   └── langgraph-sequential-agent-teams.ipynb
        │   ├── openai-agents-sdk
        │   │   └── openai_agents_sdk_tutorial.ipynb
        │   ├── smolagents
        │   │   └── smolagents-example.ipynb
        │   └── strands-agents
        │   │   ├── strands-agents-bedrock.ipynb
        │   │   ├── strands-agents-sagemaker.ipynb
        │   │   └── strands_sagemaker.py
        ├── 5-observability
        │   ├── 1-langfuse
        │   │   ├── crewAI-langfuse-observability.ipynb
        │   │   └── litellm-langfuse-observability.ipynb
        │   ├── 2-mlflow
        │   │   ├── crewai-requirements.txt
        │   │   ├── mlflow-crewAI-observability.ipynb
        │   │   └── mlflow-langgraph-observability.ipynb
        │   └── README.md
        ├── 99-use-cases
        │   ├── mcp
        │   │   ├── mcp-exploration.ipynb
        │   │   └── server.py
        │   ├── sagemaker-endpoint-as-tool
        │   │   ├── README.md
        │   │   ├── demand_forecasting.ipynb
        │   │   ├── endpoint-as-tool.png
        │   │   ├── script.py
        │   │   ├── server.py
        │   │   └── strands-agents-sagemaker-as-tool.ipynb
        │   ├── strands
        │   │   └── Strands_Agents.ipynb
        │   ├── support-ticket-triage
        │   │   ├── langgraph-requirements.txt
        │   │   └── support-system.ipynb
        │   ├── text2dsl-mcp
        │   │   ├── README.md
        │   │   ├── cfn-oss-collection.yaml
        │   │   ├── guardduty-index-schema.json
        │   │   ├── mcp_dsl_server.py
        │   │   ├── requirements.txt
        │   │   ├── text2dsl-mcp.ipynb
        │   │   └── utils.py
        │   └── text2sql
        │   │   ├── 1-create-db-tables.ipynb
        │   │   ├── 2-text2sql-langchain.ipynb
        │   │   ├── README.md
        │   │   └── requirements.txt
        └── README.md
    └── fine-tuning-with-sagemakerai-and-bedrock
        ├── archive
            ├── README.md
            ├── comet
            │   ├── comet-intro.ipynb
            │   └── comet-opik.ipynb
            ├── deepchecks
            │   └── deepchecks.ipynb
            ├── fiddler
            │   ├── .gitignore
            │   ├── README.md
            │   ├── assets
            │   │   ├── charts_llm.yaml
            │   │   ├── charts_ml.yaml
            │   │   ├── llm_events.parquet
            │   │   ├── search_ranking_prod.csv
            │   │   └── search_ranking_sample.csv
            │   ├── client
            │   │   ├── fiddler_client-3.7.0.dev4-py3-none-any.whl
            │   │   └── sagemaker-2.227.1.dev0-py3-none-any.whl
            │   ├── fiddler.ipynb
            │   ├── imgs
            │   │   ├── create_dashboard.png
            │   │   ├── credentials_tab.png
            │   │   ├── login_view.png
            │   │   ├── rca_drift.png
            │   │   ├── rca_events.png
            │   │   ├── rca_init.png
            │   │   ├── root_cause_analysis.png
            │   │   ├── settings_view.png
            │   │   └── view_dashboard.gif
            │   └── modules
            │   │   ├── __init__.py
            │   │   ├── chart.py
            │   │   ├── config.py
            │   │   ├── llm_onboard.py
            │   │   └── ml_onboard.py
            └── lakera
            │   └── lakera.ipynb
        ├── media
            └── smbanner.png
        ├── task_01_foundation_model_playground
            ├── 01.01_search_and_deploy_huggingface_llm.ipynb
            └── scripts
            │   └── requirements.txt
        ├── task_02_customize_foundation_model
            ├── 02.01_finetune_deepseekr1.ipynb
            └── scripts
            │   ├── requirements.txt
            │   └── train.py
        ├── task_03_foundation_model_evaluation
            ├── 03.01_foundation_model_evaluation_lighteval.ipynb
            └── images
            │   ├── sft_1000_train_50_test_bars.png
            │   ├── sft_1000_train_50_test_compare.png
            │   ├── sft_1000_train_50_test_scores.png
            │   ├── sft_5000_train_100_test_bars.png
            │   ├── sft_5000_train_100_test_compare.png
            │   └── sft_5000_train_100_test_scores.png
        ├── task_04_responsible_ai
            ├── 04.01_bedrock_guardrails_apply_guardrail_api.ipynb
            └── images
            │   └── applyguardrail.png
        ├── task_05_fmops
            ├── 05.01_fine-tuning-pipeline.ipynb
            ├── config.yaml
            ├── eval
            │   └── requirements.txt
            ├── ml-16670-arch-with-mlflow.png
            ├── scripts
            │   ├── requirements.txt
            │   └── train.py
            └── steps
            │   ├── evaluation_mlflow.py
            │   ├── finetune_llama3b_hf.py
            │   ├── preprocess_llama3.py
            │   └── utils.py
        └── utilities
            ├── __init__.py
            └── helpers.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | *.pylint.d/
 52 | pylint-report.txt
 53 | pylint-global.rc
 54 | 
 55 | # Translations
 56 | *.mo
 57 | *.pot
 58 | 
 59 | # Django stuff:
 60 | *.log
 61 | local_settings.py
 62 | db.sqlite3
 63 | db.sqlite3-journal
 64 | 
 65 | # Flask stuff:
 66 | instance/
 67 | .webassets-cache
 68 | 
 69 | # Scrapy stuff:
 70 | .scrapy
 71 | 
 72 | # Sphinx documentation
 73 | docs/_build/
 74 | target/
 75 | 
 76 | # PyBuilder
 77 | target/
 78 | 
 79 | # IPython
 80 | profile_default/
 81 | ipython_config.py
 82 | 
 83 | # PyCharm
 84 | .idea/
 85 | .idea/*
 86 | .idea/workspace.xml
 87 | .idea/tasks.xml
 88 | .idea/dictionaries
 89 | .idea/usage.statistics.xml
 90 | .idea/contentModel.xml
 91 | .idea/dataSources/
 92 | .idea/vcs.xml
 93 | .idea/jsLibraryMappings.xml
 94 | .idea/modules.xml
 95 | .idea/docker.xml
 96 | .idea/gradle.xml
 97 | .idea/misc.xml
 98 | .idea/modules.xml
 99 | .idea/scopes/
100 | .idea/runConfigurations/
101 | 
102 | # VS Code
103 | .vscode/*
104 | .history/
105 | 
106 | # Environments
107 | .env
108 | .venv
109 | env/
110 | venv/
111 | ENV/
112 | env.bak/
113 | venv.bak/
114 | env.bak/
115 | venv.bak/
116 | 
117 | # Spyder project settings
118 | .spyderproject
119 | .spyproject
120 | 
121 | # Rope project settings
122 | .ropeproject
123 | 
124 | # mkdocs documentation
125 | /site
126 | 
127 | # mypy
128 | .mypy_cache/
129 | .dmypy.json
130 | dmypy.json
131 | 
132 | # Pyre type checker
133 | .pyre/
134 | 
135 | # Celery stuff
136 | celerybeat-schedule
137 | celerybeat.pid
138 | 
139 | # SageMath parsed files
140 | *.sage.py
141 | 
142 | # Environments for pyenv, pipenv, poetry
143 | .python-version
144 | Pipfile.lock
145 | poetry.lock
146 | 
147 | # dotenv
148 | .env
149 | 
150 | # VS Code
151 | .vscode/
152 | 
153 | # MacOS files
154 | .DS_Store
155 | 
156 | # Backup files
157 | *~
158 | 
159 | # Files that might appear anywhere in the directory
160 | .DS_Store
161 | **/.DS_Store
162 | .ipynb_checkpoints
163 | repl_state


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.
5 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guidelines
 2 | 
 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
 4 | documentation, we greatly value feedback and contributions from our community.
 5 | 
 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
 7 | information to effectively respond to your bug report or contribution.
 8 | 
 9 | 
10 | ## Reporting Bugs/Feature Requests
11 | 
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 | 
14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 | 
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 | 
22 | 
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 | 
26 | 1. You are working against the latest source on the *main* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 | 
30 | To send us a pull request, please:
31 | 
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 | 
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 | 
42 | 
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start.
45 | 
46 | 
47 | ## Code of Conduct
48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
50 | opensource-codeofconduct@amazon.com with any additional questions or comments.
51 | 
52 | 
53 | ## Security issue notifications
54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
55 | 
56 | 
57 | ## Licensing
58 | 
59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.
60 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT No Attribution
 2 | 
 3 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 6 | this software and associated documentation files (the "Software"), to deal in
 7 | the Software without restriction, including without limitation the rights to
 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 9 | the Software, and to permit persons to whom the Software is furnished to do so.
10 | 
11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
13 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
14 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
15 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
16 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
17 | 
18 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Generative AI Fine-tuning with Amazon SageMaker AI and Amazon Bedrock
 2 | 
 3 | ## Welcome!
 4 | 
 5 | Welcome to home of Generative AI Fine-tuning with Amazon SageMaker AI and Amazon Bedrock. This repository is a growing collection of generative AI samples which include workflows like,
 6 | 1. Setting up a Foundation Model Playground on Amazon SageMaker AI
 7 | 2. Customization of Foundation Models on Amazon SageMaker AI
 8 | 3. Deploy, Evaluate and Monitor Foundation Models on Amazon SageMaker AI
 9 | 4. Creating Bedrock Guardrails with a SageMaker Endpoint
10 | 5. Develop FMOps fine-tuning workflows with SageMaker Pipelines
11 | 
12 | ## Security
13 | 
14 | See [CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications) for more information.
15 | 
16 | ## License
17 | 
18 | This library is licensed under the MIT-0 License. See the LICENSE file.
19 | 
20 | 


--------------------------------------------------------------------------------
/workshops/building-rag-workflows-with-sagemaker-and-bedrock/01-01_local-experimentation/utils/TokenCounterHandler.py:
--------------------------------------------------------------------------------
 1 | from langchain.callbacks.base import BaseCallbackHandler
 2 | from langchain.schema import LLMResult
 3 | from typing import List, Dict, Any
 4 | import tiktoken
 5 | 
 6 | 
 7 | class TokenCounterHandler(BaseCallbackHandler):
 8 | 
 9 |     MODEL_ENCODING = "gpt-3.5-turbo"
10 |     ENCODING = tiktoken.encoding_for_model(MODEL_ENCODING)
11 | 
12 |     def __init__(self, clear_report_on_chain_start=True):
13 |         self.tokens = 0
14 |         self.embedding_tokens = 0
15 |         self.prompt_tokens = 0
16 |         self.generation_tokens = 0
17 | 
18 |     def on_retriever_start(self, query: str, **kwargs):
19 |         numtokens = len(self.ENCODING.encode(query))
20 |         self.tokens += numtokens
21 |         self.embedding_tokens += numtokens
22 | 
23 | 
24 |     def on_llm_start(self, serialized, prompts: List[str], **kwargs):
25 |         for prompt in prompts:   
26 |             numtokens = len(self.ENCODING.encode(prompt))
27 |             self.tokens += numtokens
28 |             self.prompt_tokens += numtokens
29 | 
30 |     def on_llm_end(self, response: LLMResult, **kwargs):
31 |         
32 |         for generation in response.generations:
33 |             numtokens = len(self.ENCODING.encode(generation[0].text))
34 |             self.tokens += numtokens
35 |             self.generation_tokens += numtokens
36 | 
37 |     def on_chain_end(self, outputs: Dict[str, Any], **kwargs):
38 |         self.report()
39 |     
40 |     def clear_report(self):
41 |         self.tokens = 0
42 |         self.embedding_tokens = 0
43 |         self.prompt_tokens = 0
44 |         self.generation_tokens = 0
45 | 
46 |     def report(self):
47 |         print(f"\nToken Counts:\nTotal: {self.tokens}\nEmbedding: N/A\nPrompt: {self.prompt_tokens}\nGeneration:{self.generation_tokens}\n")


--------------------------------------------------------------------------------
/workshops/building-rag-workflows-with-sagemaker-and-bedrock/01-01_local-experimentation/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | # SPDX-License-Identifier: MIT-0
 3 | """General helper utilities the workshop notebooks"""
 4 | # Python Built-Ins:
 5 | from io import StringIO
 6 | import sys
 7 | import textwrap
 8 | 
 9 | 
10 | def print_ww(*args, width: int = 100, **kwargs):
11 |     """Like print(), but wraps output to `width` characters (default 100)"""
12 |     buffer = StringIO()
13 |     try:
14 |         _stdout = sys.stdout
15 |         sys.stdout = buffer
16 |         print(*args, **kwargs)
17 |         output = buffer.getvalue()
18 |     finally:
19 |         sys.stdout = _stdout
20 |     for line in output.splitlines():
21 |         print("\n".join(textwrap.wrap(line, width=width)))
22 | 


--------------------------------------------------------------------------------
/workshops/building-rag-workflows-with-sagemaker-and-bedrock/02-01_sagemaker-opensearch-rag/images/LangfuseTraces.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/building-rag-workflows-with-sagemaker-and-bedrock/02-01_sagemaker-opensearch-rag/images/LangfuseTraces.png


--------------------------------------------------------------------------------
/workshops/building-rag-workflows-with-sagemaker-and-bedrock/03-02_fine-tuning-embedding/images/eval2-31k_context-5k_sample.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/building-rag-workflows-with-sagemaker-and-bedrock/03-02_fine-tuning-embedding/images/eval2-31k_context-5k_sample.png


--------------------------------------------------------------------------------
/workshops/building-rag-workflows-with-sagemaker-and-bedrock/03-02_fine-tuning-embedding/images/training-9000_test-20000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/building-rag-workflows-with-sagemaker-and-bedrock/03-02_fine-tuning-embedding/images/training-9000_test-20000.png


--------------------------------------------------------------------------------
/workshops/building-rag-workflows-with-sagemaker-and-bedrock/03-02_fine-tuning-embedding/images/training-9000_test-50000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/building-rag-workflows-with-sagemaker-and-bedrock/03-02_fine-tuning-embedding/images/training-9000_test-50000.png


--------------------------------------------------------------------------------
/workshops/building-rag-workflows-with-sagemaker-and-bedrock/03-02_fine-tuning-embedding/requirements.txt:
--------------------------------------------------------------------------------
1 | transformers==4.40.2
2 | sentence-transformers==3.1.1
3 | datasets==2.19.2
4 | accelerate==1.1.0
5 | 


--------------------------------------------------------------------------------
/workshops/building-rag-workflows-with-sagemaker-and-bedrock/03-02_fine-tuning-embedding/scripts/trainer.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import torch
  4 | import argparse
  5 | from datasets import load_dataset, concatenate_datasets
  6 | from sentence_transformers import SentenceTransformer
  7 | from sentence_transformers.losses import MatryoshkaLoss, MultipleNegativesRankingLoss
  8 | from sentence_transformers.evaluation import InformationRetrievalEvaluator, SequentialEvaluator
  9 | from sentence_transformers.util import cos_sim
 10 | from sentence_transformers import SentenceTransformerTrainer
 11 | from sentence_transformers.training_args import SentenceTransformerTrainingArguments
 12 | from sentence_transformers.training_args import BatchSamplers
 13 | 
 14 | def load_data(train_file, test_file):
 15 |     train_dataset = load_dataset("json", data_dir=train_file, split="train")
 16 |     test_dataset = load_dataset("json", data_dir=test_file, split="test")
 17 |     corpus_dataset = concatenate_datasets([train_dataset, test_dataset])
 18 |     return train_dataset, test_dataset, corpus_dataset
 19 | 
 20 | def prepare_ir_evaluator(test_dataset, corpus_dataset, matryoshka_dimensions):
 21 |     corpus = dict(zip(corpus_dataset["id"], corpus_dataset["context"]))
 22 |     queries = dict(zip(test_dataset["id"], test_dataset["question"]))
 23 |     relevant_docs = {q_id: [q_id] for q_id in queries}
 24 | 
 25 |     matryoshka_evaluators = []
 26 |     for dim in matryoshka_dimensions:
 27 |         evaluator = InformationRetrievalEvaluator(
 28 |             queries=queries,
 29 |             corpus=corpus,
 30 |             relevant_docs=relevant_docs,
 31 |             name=f"dim_{dim}",
 32 |             truncate_dim=dim,
 33 |             score_functions={"cosine": cos_sim},
 34 |         )
 35 |         matryoshka_evaluators.append(evaluator)
 36 | 
 37 |     return SequentialEvaluator(matryoshka_evaluators)
 38 | 
 39 | def main(args):
 40 |     print("Loading datasets...")
 41 |     train_dataset, test_dataset, corpus_dataset = load_data(args.train_data, args.validation_data)
 42 | 
 43 |     base_model_id_safe = args.model_name.replace("/", "_")
 44 |     output_dir = f"{args.model_output}/{base_model_id_safe}_ds={len(train_dataset)}_bs={args.batch_size}_e={args.epochs}"
 45 | 
 46 |     print("Loading model...")
 47 |     model = SentenceTransformer(
 48 |         args.model_name,
 49 |         model_kwargs={"attn_implementation": "eager"},
 50 |         trust_remote_code=True
 51 |     )
 52 | 
 53 |     print("Preparing loss function...")
 54 |     model_dim = model.get_sentence_embedding_dimension()
 55 |     matryoshka_dimensions = [dim for dim in [768, 512, 384, 256, 128, 64] if dim <= model_dim]
 56 |     # matryoshka_dimensions = [768, 512, 256, 128, 64]
 57 |     inner_train_loss = MultipleNegativesRankingLoss(model)
 58 |     train_loss = MatryoshkaLoss(model, inner_train_loss, matryoshka_dims=matryoshka_dimensions)
 59 | 
 60 |     print("Configuring evaluator...")
 61 |     evaluator = prepare_ir_evaluator(test_dataset, corpus_dataset, matryoshka_dimensions)
 62 | 
 63 |     print("Setting training arguments...")
 64 |     training_args = SentenceTransformerTrainingArguments(
 65 |         output_dir=output_dir,
 66 |         num_train_epochs=args.epochs,
 67 |         per_device_train_batch_size=args.batch_size,
 68 |         gradient_accumulation_steps=16,
 69 |         per_device_eval_batch_size=args.batch_size,
 70 |         warmup_ratio=0.1,
 71 |         learning_rate=2e-5,
 72 |         lr_scheduler_type="cosine",
 73 |         optim="adamw_torch_fused",
 74 |         tf32=True,
 75 |         bf16=True,
 76 |         batch_sampler=BatchSamplers.NO_DUPLICATES,
 77 |         evaluation_strategy="epoch",
 78 |         save_strategy="epoch",
 79 |         logging_steps=10,
 80 |         save_total_limit=3,
 81 |         load_best_model_at_end=True,
 82 |         metric_for_best_model="eval_dim_128_cosine_ndcg@10",
 83 |     )
 84 | 
 85 |     print("Starting training...")
 86 |     trainer = SentenceTransformerTrainer(
 87 |         model=model,
 88 |         args=training_args,
 89 |         train_dataset=train_dataset.select_columns(["question", "context"]),
 90 |         loss=train_loss,
 91 |         evaluator=evaluator,
 92 |     )
 93 | 
 94 |     trainer.train()
 95 |     trainer.save_model()
 96 | 
 97 | if __name__ == "__main__":
 98 |     parser = argparse.ArgumentParser()
 99 |     parser.add_argument("--train_data", type=str, default="/opt/ml/input/data/train")
100 |     parser.add_argument("--validation_data", type=str, default="/opt/ml/input/data/validation")
101 |     parser.add_argument("--model_name", type=str, default="")
102 |     parser.add_argument("--epochs", type=int, default=4)
103 |     parser.add_argument("--batch_size", type=int, default=16)
104 |     parser.add_argument("--model_output", type=str, default="/opt/ml/model")
105 |     args = parser.parse_args()
106 | 
107 |     main(args)
108 | 


--------------------------------------------------------------------------------
/workshops/building-rag-workflows-with-sagemaker-and-bedrock/03-03_raft-customization/images/x1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/building-rag-workflows-with-sagemaker-and-bedrock/03-03_raft-customization/images/x1.png


--------------------------------------------------------------------------------
/workshops/building-rag-workflows-with-sagemaker-and-bedrock/03-03_raft-customization/images/x5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/building-rag-workflows-with-sagemaker-and-bedrock/03-03_raft-customization/images/x5.png


--------------------------------------------------------------------------------
/workshops/building-rag-workflows-with-sagemaker-and-bedrock/03-03_raft-customization/images/x6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/building-rag-workflows-with-sagemaker-and-bedrock/03-03_raft-customization/images/x6.png


--------------------------------------------------------------------------------
/workshops/building-rag-workflows-with-sagemaker-and-bedrock/03-03_raft-customization/scripts/merge_model_adapter.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | import random
  4 | import torch
  5 | from transformers import AutoTokenizer, TrainingArguments
  6 | from transformers import (
  7 |     AutoModelForCausalLM,
  8 |     AutoTokenizer,
  9 |     BitsAndBytesConfig,
 10 |     set_seed,
 11 | )
 12 | from typing import Dict, Optional, Tuple
 13 | import argparse
 14 | from datasets import load_dataset
 15 | from pprint import pprint
 16 | 
 17 | import subprocess as sb
 18 | 
 19 | def set_custom_env(env_vars: Dict[str, str]) -> None:
 20 |     """
 21 |     Set custom environment variables.
 22 | 
 23 |     Args:
 24 |         env_vars (Dict[str, str]): A dictionary of environment variables to set.
 25 |                                    Keys are variable names, values are their corresponding values.
 26 | 
 27 |     Returns:
 28 |         None
 29 | 
 30 |     Raises:
 31 |         TypeError: If env_vars is not a dictionary.
 32 |         ValueError: If any key or value in env_vars is not a string.
 33 |     """
 34 |     if not isinstance(env_vars, dict):
 35 |         raise TypeError("env_vars must be a dictionary")
 36 | 
 37 |     for key, value in env_vars.items():
 38 |         if not isinstance(key, str) or not isinstance(value, str):
 39 |             raise ValueError("All keys and values in env_vars must be strings")
 40 | 
 41 |     os.environ.update(env_vars)
 42 | 
 43 |     # Optionally, print the updated environment variables
 44 |     print("Updated environment variables:")
 45 |     for key, value in env_vars.items():
 46 |         print(f"  {key}: {value}")
 47 |         
 48 | def create_test_prompt():
 49 |     dataset = load_dataset(
 50 |         "json",
 51 |         #data_files=os.path.join(args.testdata, "dataset.json"),
 52 |         data_dir=args.testdata,
 53 |         split="test"
 54 |     )
 55 |     
 56 |     # Shuffle the dataset and select the first row
 57 |     random_row = dataset.shuffle().select(range(1))[0]
 58 |     
 59 |     return random_row
 60 |     
 61 | # Generate in-memory inference
 62 | def generate_text(model, prompt, max_length=2048, num_return_sequences=1):
 63 |     # Encode the input prompt
 64 |     
 65 |     device = "cuda" if torch.cuda.is_available() else "cpu"
 66 |     
 67 |     #model = model.to(device)
 68 |         
 69 |     tokenizer = AutoTokenizer.from_pretrained(
 70 |             args.basemodel if args.use_local else args.model_id,
 71 |             use_fast=True
 72 |         )
 73 |     
 74 |     tokenizer.pad_token = tokenizer.eos_token
 75 |     
 76 |     tokenizer.save_pretrained("/opt/ml/model/merged/")
 77 |     
 78 |     prompt_input=prompt['prompt'].split("### Summary")[0]
 79 |     
 80 |     input_ids = tokenizer.encode(prompt_input, return_tensors="pt")#.to(device)
 81 | 
 82 |     # Generate text
 83 |     with torch.no_grad():
 84 |         output = model.generate(
 85 |             input_ids,
 86 |             max_length=max_length,
 87 |             num_return_sequences=num_return_sequences,
 88 |             no_repeat_ngram_size=2,
 89 |             top_k=50,
 90 |             top_p=0.95,
 91 |             temperature=0.7
 92 |         )
 93 | 
 94 |     # Decode and return the generated text
 95 |     generated_texts = [tokenizer.decode(seq, skip_special_tokens=True) for seq in output]
 96 |         
 97 |     return generated_texts
 98 | 
 99 | # Merge the trained adapter with the base model and test it
100 | def merge_and_save_model(model_id, adapter_dir, output_dir):
101 |     from peft import PeftModel
102 | 
103 |     ##################
104 |     # Load Base Model
105 |     ##################
106 |     print("Trying to load a Peft model. It might take a while without feedback")
107 |     base_model = AutoModelForCausalLM.from_pretrained(
108 |         args.basemodel if args.use_local else model_id,
109 |         low_cpu_mem_usage=True,
110 |         torch_dtype=torch.float32,
111 |         device_map="auto",
112 |       #  offload_folder="/opt/ml/model/"
113 |     )
114 |     
115 |     print("Loaded base model")
116 |     
117 |     #############################
118 |     # Run Inference - Base Model 
119 |     #############################
120 |     prompt=create_test_prompt()
121 |     
122 |     #pprint(f"*** Generating Inference on Base Model: {generate_text(base_model,prompt)}")
123 |     
124 |     base_model.config.use_cache = False
125 |     
126 |     ################
127 |     # Load Adapter
128 |     ################
129 |     # Load the adapter
130 |     peft_model = PeftModel.from_pretrained(
131 |         base_model, 
132 |         adapter_dir, 
133 |         torch_dtype=torch.float32,  # Set dtype to float16
134 |      #   offload_folder="/opt/ml/model/"
135 |     )
136 |     
137 |     ###############################
138 |     # Merge Adapter and Base Model
139 |     ###############################
140 |     print("Loaded peft model")
141 |     model = peft_model.merge_and_unload()
142 |     print("Merge done")
143 | 
144 |     model.eval()
145 |     model.active_adapters = "default" 
146 |     #############################
147 |     # Run Inference - Trained Model 
148 |     #############################
149 |     pprint(f"*** Generating Inference on Trained Model: {generate_text(model,prompt)}")
150 | 
151 |     os.makedirs(output_dir, exist_ok=True)
152 |     
153 |     ##################################
154 |     # Save Merged Model and Tokenizer
155 |     ##################################
156 |     print(f"Saving the newly created merged model to {output_dir}")
157 |     model.save_pretrained(output_dir, safe_serialization=True)
158 |     base_model.config.save_pretrained(output_dir)
159 | 
160 | # Parse CLI arguments passed by SageMaker Jobs
161 | def parse_arge():
162 | 
163 |     parser = argparse.ArgumentParser()
164 | 
165 |     # infra configuration
166 |     parser.add_argument("--adapterdir", type=str, default=os.environ["SM_CHANNEL_ADAPTER"])
167 |     parser.add_argument("--testdata", type=str, default=os.environ["SM_CHANNEL_TESTDATA"])
168 |     
169 |     parser.add_argument("--basemodel", type=str, default=os.environ.get("SM_CHANNEL_BASEMODEL",""))
170 |     parser.add_argument('--use_local', type=lambda x: str(x).lower() in ['true', '1', 't', 'y', 'yes'], help="A boolean flag")
171 |     
172 |     parser.add_argument("--model_id", type=str, default="meta-llama/Meta-Llama-3.1-8B")
173 |     parser.add_argument("--hf_token", type=str, default="")
174 |     parser.add_argument("--dataset_name", type=str, default="")
175 |     
176 |     args = parser.parse_known_args()
177 |     
178 |     return args
179 | 
180 | if __name__ == "__main__":    
181 |     
182 |     args, _ = parse_arge()
183 |     
184 |     custom_env: Dict[str, str] = {"HF_DATASETS_TRUST_REMOTE_CODE": "TRUE",
185 |                                   "HF_TOKEN": args.hf_token
186 |                                   }
187 |     set_custom_env(custom_env)
188 | 
189 |     print("*****printing adapetrs")
190 | 
191 |     # Run the command to get the Adapter artifacts
192 |     sb.run(["ls", "-ltr", args.adapterdir])
193 | 
194 |     # launch training to merge trained adapaters with base model
195 |     merge_and_save_model(args.model_id, args.adapterdir,"/opt/ml/model/merged/")
196 | 


--------------------------------------------------------------------------------
/workshops/building-rag-workflows-with-sagemaker-and-bedrock/03-03_raft-customization/scripts/requirements.txt:
--------------------------------------------------------------------------------
 1 | transformers==4.44.2
 2 | datasets==2.18.0
 3 | accelerate==0.33.0
 4 | evaluate==0.4.1
 5 | bitsandbytes==0.43.3
 6 | huggingface_hub==0.23.2
 7 | trl==0.9.6
 8 | peft==0.12.0
 9 | wandb
10 | py7zr
11 | mlflow==2.16.0 
12 | sagemaker-mlflow==0.1.0 


--------------------------------------------------------------------------------
/workshops/building-rag-workflows-with-sagemaker-and-bedrock/README.md:
--------------------------------------------------------------------------------
 1 | # Building RAG workflows with Amazon SageMaker AI and Amazon Bedrock
 2 | 
 3 | Welcome to **Building Retrieval Augmented Generation (RAG) Workflows with Amazon SageMaker and Amazon Bedrock**!
 4 | 
 5 | In this workshop, you will journey through all the steps of building the GenAI components of a RAG application starting with experimentation and working your way through to repeatable and scalable components that will be part of production applications.
 6 | 
 7 | ![](images/rag-mind-map.png)
 8 | 
 9 | You'll start small, working in a SageMaker Studio environment with a basic dataset to get through the fundamentals of embedding models, vector databases, and RAG orchestration. Having a basic application, your focus will shift to understanding and measuring the objective quality of outputs using different techniques for vector search and model output evaluation. Afterwards, you will build external components that will be more scalable, critical to getting to PoC/Pilot phases. Once you have those components in place, you'll learn different techniques to optimize the performance and quality of the system through adjusting parameters and even seeing where fine-tuning your models can be beneficial. Upon meeting performance and quality KPIs, focus on real-world readiness by implementing safety and security measures, such as guardrails to protect inputs and outputs.
10 | 


--------------------------------------------------------------------------------
/workshops/building-rag-workflows-with-sagemaker-and-bedrock/SageMaker Training Hosting and Custom Model Import_V3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/building-rag-workflows-with-sagemaker-and-bedrock/SageMaker Training Hosting and Custom Model Import_V3.pdf


--------------------------------------------------------------------------------
/workshops/building-rag-workflows-with-sagemaker-and-bedrock/images/rag-mind-map.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/building-rag-workflows-with-sagemaker-and-bedrock/images/rag-mind-map.png


--------------------------------------------------------------------------------
/workshops/distributed-training-deployment-on-sagemaker-ai/README.md:
--------------------------------------------------------------------------------
 1 | # Distributed Training and Deployment on SageMaker AI
 2 | 
 3 | > This content is available in [Distributed Training and Deployment on SageMaker AI ]. The below represents a synopsis of the content you will find by following the provided link.
 4 | > Welcome to the "Distributed Training and Deployment on SageMaker AI " workshop! This publicly available, hands-on experience is designed for data scientists who are ready to harness the power of Large Language Models (LLMs) and experiment different customization techniques on AWS.
 5 | 
 6 | In this workshop, you'll dive into different fine-tuning techniques, deployment options, and evaluation by leveraging SageMaker AI capabilities!
 7 | 
 8 | By the end of this workshop, you'll be able to:
 9 | 
10 | - Understand how to prepare datasets for different type of model customization techniques
11 | - Run fine-tuning workloads by leveraging SageMaker AI capabilities
12 | - Deploy and test your fine-tuned model
13 | 
14 | ## Workshop Content
15 | 
16 | 1. Solution 1: Large scale distributed training for Data/ML engineers using Amazon SageMaker JumpStart
17 | 2. Solution 2: Large scale distributed training for Resident Data Scientist using Amazon SageMaker Training
18 |    1. Option 1: Continued pre-training of LLMs using Amazon SageMaker Training
19 |    2. Option 2: Supervised fine-tuning of LLMs using Amazon SageMaker Training
20 | 3. Solution 3: Large scale distributed training for Researchers using Amazon SageMaker Hyperpod with EKS integration
21 | 
22 | ## How to run the workshop
23 | 
24 | This workshop follows a hands-on, self-paced format. Each module contains Jupyter notebooks and code that you'll run in your own JupyterLab or Code Editor environment.
25 | 
26 | **⚠️ Important**: Solution 3 requires an Amazon SageMaker Hyperpod with EKS cluster up and running in your AWS account.
27 | 
28 | - Step-by-step instructions and explanations
29 | - Code samples that you can run and modify
30 | - Links to additional resources
31 | 


--------------------------------------------------------------------------------
/workshops/distributed-training-deployment-on-sagemaker-ai/solution-2-sagemaker-training/option-1-continued-pre-training/scripts/requirements.txt:
--------------------------------------------------------------------------------
 1 | transformers==4.50.2
 2 | peft==0.14.0
 3 | accelerate==1.3.0
 4 | bitsandbytes==0.45.1
 5 | datasets==3.2.0
 6 | evaluate==0.4.3
 7 | huggingface_hub[hf_transfer]
 8 | mlflow
 9 | safetensors>=0.5.2
10 | sagemaker==2.244.0
11 | sagemaker-mlflow==0.1.0
12 | sentencepiece==0.2.0
13 | scikit-learn==1.6.1
14 | tokenizers>=0.21.0
15 | trl==0.9.6
16 | psutil
17 | py7zr
18 | pynvml
19 | 


--------------------------------------------------------------------------------
/workshops/distributed-training-deployment-on-sagemaker-ai/solution-2-sagemaker-training/option-2-supervised-fine-tuning/scripts/requirements.txt:
--------------------------------------------------------------------------------
 1 | transformers==4.50.2
 2 | peft==0.14.0
 3 | accelerate==1.3.0
 4 | bitsandbytes==0.45.1
 5 | datasets==3.2.0
 6 | evaluate==0.4.3
 7 | huggingface_hub[hf_transfer]
 8 | mlflow
 9 | safetensors>=0.5.2
10 | sagemaker==2.244.0
11 | sagemaker-mlflow==0.1.0
12 | sentencepiece==0.2.0
13 | scikit-learn==1.6.1
14 | tokenizers>=0.21.0
15 | trl==0.9.6
16 | psutil
17 | py7zr
18 | pynvml
19 | xtarfile
20 | rouge-score


--------------------------------------------------------------------------------
/workshops/distributed-training-deployment-on-sagemaker-ai/solution-2-sagemaker-training/option-2-supervised-fine-tuning/scripts/rouge/rouge.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 The HuggingFace Evaluate Authors.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """ ROUGE metric from Google Research github repo. """
 15 | 
 16 | # The dependencies in https://github.com/google-research/google-research/blob/master/rouge/requirements.txt
 17 | import absl  # Here to have a nice missing dependency error message early on
 18 | import datasets
 19 | import nltk  # Here to have a nice missing dependency error message early on
 20 | import numpy  # Here to have a nice missing dependency error message early on
 21 | import six  # Here to have a nice missing dependency error message early on
 22 | from rouge_score import rouge_scorer, scoring
 23 | 
 24 | import evaluate
 25 | 
 26 | 
 27 | _CITATION = """\
 28 | @inproceedings{lin-2004-rouge,
 29 |     title = "{ROUGE}: A Package for Automatic Evaluation of Summaries",
 30 |     author = "Lin, Chin-Yew",
 31 |     booktitle = "Text Summarization Branches Out",
 32 |     month = jul,
 33 |     year = "2004",
 34 |     address = "Barcelona, Spain",
 35 |     publisher = "Association for Computational Linguistics",
 36 |     url = "https://www.aclweb.org/anthology/W04-1013",
 37 |     pages = "74--81",
 38 | }
 39 | """
 40 | 
 41 | _DESCRIPTION = """\
 42 | ROUGE, or Recall-Oriented Understudy for Gisting Evaluation, is a set of metrics and a software package used for
 43 | evaluating automatic summarization and machine translation software in natural language processing.
 44 | The metrics compare an automatically produced summary or translation against a reference or a set of references (human-produced) summary or translation.
 45 | 
 46 | Note that ROUGE is case insensitive, meaning that upper case letters are treated the same way as lower case letters.
 47 | 
 48 | This metrics is a wrapper around Google Research reimplementation of ROUGE:
 49 | https://github.com/google-research/google-research/tree/master/rouge
 50 | """
 51 | 
 52 | _KWARGS_DESCRIPTION = """
 53 | Calculates average rouge scores for a list of hypotheses and references
 54 | Args:
 55 |     predictions: list of predictions to score. Each prediction
 56 |         should be a string with tokens separated by spaces.
 57 |     references: list of reference for each prediction. Each
 58 |         reference should be a string with tokens separated by spaces.
 59 |     rouge_types: A list of rouge types to calculate.
 60 |         Valid names:
 61 |         `"rouge{n}"` (e.g. `"rouge1"`, `"rouge2"`) where: {n} is the n-gram based scoring,
 62 |         `"rougeL"`: Longest common subsequence based scoring.
 63 |         `"rougeLsum"`: rougeLsum splits text using `"\n"`.
 64 |         See details in https://github.com/huggingface/datasets/issues/617
 65 |     use_stemmer: Bool indicating whether Porter stemmer should be used to strip word suffixes.
 66 |     use_aggregator: Return aggregates if this is set to True
 67 | Returns:
 68 |     rouge1: rouge_1 (f1),
 69 |     rouge2: rouge_2 (f1),
 70 |     rougeL: rouge_l (f1),
 71 |     rougeLsum: rouge_lsum (f1)
 72 | Examples:
 73 | 
 74 |     >>> rouge = evaluate.load('rouge')
 75 |     >>> predictions = ["hello there", "general kenobi"]
 76 |     >>> references = ["hello there", "general kenobi"]
 77 |     >>> results = rouge.compute(predictions=predictions, references=references)
 78 |     >>> print(results)
 79 |     {'rouge1': 1.0, 'rouge2': 1.0, 'rougeL': 1.0, 'rougeLsum': 1.0}
 80 | """
 81 | 
 82 | 
 83 | class Tokenizer:
 84 |     """Helper class to wrap a callable into a class with a `tokenize` method as used by rouge-score."""
 85 | 
 86 |     def __init__(self, tokenizer_func):
 87 |         self.tokenizer_func = tokenizer_func
 88 | 
 89 |     def tokenize(self, text):
 90 |         return self.tokenizer_func(text)
 91 | 
 92 | 
 93 | @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
 94 | class Rouge(evaluate.Metric):
 95 |     def _info(self):
 96 |         return evaluate.MetricInfo(
 97 |             description=_DESCRIPTION,
 98 |             citation=_CITATION,
 99 |             inputs_description=_KWARGS_DESCRIPTION,
100 |             features=[
101 |                 datasets.Features(
102 |                     {
103 |                         "predictions": datasets.Value("string", id="sequence"),
104 |                         "references": datasets.Sequence(datasets.Value("string", id="sequence")),
105 |                     }
106 |                 ),
107 |                 datasets.Features(
108 |                     {
109 |                         "predictions": datasets.Value("string", id="sequence"),
110 |                         "references": datasets.Value("string", id="sequence"),
111 |                     }
112 |                 ),
113 |             ],
114 |             codebase_urls=["https://github.com/google-research/google-research/tree/master/rouge"],
115 |             reference_urls=[
116 |                 "https://en.wikipedia.org/wiki/ROUGE_(metric)",
117 |                 "https://github.com/google-research/google-research/tree/master/rouge",
118 |             ],
119 |         )
120 | 
121 |     def _compute(
122 |         self, predictions, references, rouge_types=None, use_aggregator=True, use_stemmer=False, tokenizer=None
123 |     ):
124 |         if rouge_types is None:
125 |             rouge_types = ["rouge1", "rouge2", "rougeL", "rougeLsum"]
126 | 
127 |         multi_ref = isinstance(references[0], list)
128 | 
129 |         if tokenizer is not None:
130 |             tokenizer = Tokenizer(tokenizer)
131 | 
132 |         scorer = rouge_scorer.RougeScorer(rouge_types=rouge_types, use_stemmer=use_stemmer, tokenizer=tokenizer)
133 |         if use_aggregator:
134 |             aggregator = scoring.BootstrapAggregator()
135 |         else:
136 |             scores = []
137 | 
138 |         for ref, pred in zip(references, predictions):
139 |             if multi_ref:
140 |                 score = scorer.score_multi(ref, pred)
141 |             else:
142 |                 score = scorer.score(ref, pred)
143 |             if use_aggregator:
144 |                 aggregator.add_scores(score)
145 |             else:
146 |                 scores.append(score)
147 | 
148 |         if use_aggregator:
149 |             result = aggregator.aggregate()
150 |             for key in result:
151 |                 result[key] = result[key].mid.fmeasure
152 | 
153 |         else:
154 |             result = {}
155 |             for key in scores[0]:
156 |                 result[key] = list(score[key].fmeasure for score in scores)
157 | 
158 |         return result


--------------------------------------------------------------------------------
/workshops/distributed-training-deployment-on-sagemaker-ai/solution-3-sagemaker-hyperpod-k8/README.md:
--------------------------------------------------------------------------------
 1 | ## Prerequisites
 2 | 
 3 | - Amazon SageMaker Studio domain with a user profile
 4 | - Access to Amazon SageMaker Hyperpod with EKS (Elastic Kubernetes Service)
 5 | 
 6 | Please follow [SageMaker Studio + Hyperpod Integration](https://catalog.workshops.aws/sagemaker-hyperpod-eks/en-US/11-tips/08-studio-integration)
 7 | 
 8 | ## Required Changes Before Running
 9 | 
10 | Before running the scripts, you need to make the following changes:
11 | 
12 | ### 1. Copy `solution-3-sagemaker-hyperpod-k8` folder in the FSx for Lustre volume
13 | 
14 | In order to leverage the integration through shared FSx for Lustre volume between Amazon SageMaker Studio and SageMaker Hyperpod, copy the folder and the content of `solution-3-sagemaker-hyperpod-k8` in the FSx for Lustre volume mounted on both SageMaker Studio and the Hyperpod cluster.
15 | 
16 | ### 2. Update `args.yaml`
17 | 
18 | Replace all instances of `<STUDIO_USER_PROFILE>` with your SageMaker Studio user profile name:
19 | 
20 | ```yaml
21 | model_id: "/data/<STUDIO_USER_PROFILE>/lab-hp-k8-sft/DeepSeek-R1-Distill-Qwen-7B"
22 | output_dir: "/data/<STUDIO_USER_PROFILE>/lab-hp-k8-sft/model/"
23 | train_dataset_path: "/data/<STUDIO_USER_PROFILE>/lab-hp-k8-sft/data/train/"
24 | test_dataset_path: "/data/<STUDIO_USER_PROFILE>/lab-hp-k8-sft/data/test/"
25 | ```
26 | 
27 | ### 2. Update `pod-finetuning.yaml`
28 | 
29 | Replace all instances of `<STUDIO_USER_PROFILE>` with your SageMaker Studio user profile name:
30 | 
31 | ```yaml
32 | command:
33 |   - /bin/bash
34 |   - -c
35 |   - |
36 |     pip install -r /data/<STUDIO_USER_PROFILE>/lab-hp-k8-sft/requirements.txt && \
37 |     torchrun \
38 |     --nnodes=2 \
39 |     --nproc_per_node=4 \
40 |     /data/<STUDIO_USER_PROFILE>/lab-hp-k8-sft/scripts/train.py \
41 |     --config /data/<STUDIO_USER_PROFILE>/lab-hp-k8-sft/args.yaml
42 | ```
43 | 


--------------------------------------------------------------------------------
/workshops/distributed-training-deployment-on-sagemaker-ai/solution-3-sagemaker-hyperpod-k8/args.yaml:
--------------------------------------------------------------------------------
 1 | model_id: "/data/<STUDIO_USER_PROFILE>/solution-3-sagemaker-hyperpod-k8/DeepSeek-R1-Distill-Qwen-7B" # Hugging Face model id
 2 | mlflow_uri: ""
 3 | mlflow_experiment_name: "deepseek-r1-distill-qwen-7b-sft"
 4 | # sagemaker specific parameters
 5 | output_dir: "/data/<STUDIO_USER_PROFILE>/solution-3-sagemaker-hyperpod-k8/model/" # path to where SageMaker will upload the model
 6 | train_dataset_path: "/data/<STUDIO_USER_PROFILE>/solution-3-sagemaker-hyperpod-k8/data/train/" # path to where FSx saves train dataset
 7 | test_dataset_path: "/data/<STUDIO_USER_PROFILE>/solution-3-sagemaker-hyperpod-k8/data/test/" # path to where FSx saves test dataset
 8 | # training parameters
 9 | lora_r: 8
10 | lora_alpha: 16
11 | lora_dropout: 0.1
12 | learning_rate: 2e-4 # learning rate scheduler
13 | num_train_epochs: 2 # number of training epochs
14 | per_device_train_batch_size: 2 # batch size per device during training
15 | per_device_eval_batch_size: 2 # batch size for evaluation
16 | gradient_accumulation_steps: 2 # number of steps before performing a backward/update pass
17 | gradient_checkpointing: true # use gradient checkpointing
18 | bf16: true # use bfloat16 precision
19 | tf32: false # use tf32 precision
20 | fsdp: "full_shard auto_wrap offload"
21 | fsdp_config:
22 |   backward_prefetch: "backward_pre"
23 |   cpu_ram_efficient_loading: true
24 |   offload_params: true
25 |   forward_prefetch: false
26 |   use_orig_params: true
27 | merge_weights: true # merge weights in the base model
28 | 


--------------------------------------------------------------------------------
/workshops/distributed-training-deployment-on-sagemaker-ai/solution-3-sagemaker-hyperpod-k8/download_model.py:
--------------------------------------------------------------------------------
 1 | from huggingface_hub import snapshot_download
 2 | import os
 3 | 
 4 | 
 5 | MODEL_ID = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
 6 | 
 7 | 
 8 | def download_model(path, model_name):
 9 |     print("Downloading model ", model_name)
10 | 
11 |     os.makedirs(path, exist_ok=True)
12 | 
13 |     snapshot_download(repo_id=model_name, local_dir=path)
14 | 
15 |     print(f"Model {model_name} downloaded under {path}")
16 | 
17 | 
18 | if __name__ == "__main__":
19 |     script_path = os.path.abspath(__file__)
20 |     script_dir = os.path.dirname(script_path)
21 |     script_dir = f"/mnt/custom-file-systems/{'/'.join(script_dir.split('/')[4:])}"
22 | 
23 |     download_model(f"{script_dir}/{MODEL_ID.split('/')[-1]}", MODEL_ID)
24 | 


--------------------------------------------------------------------------------
/workshops/distributed-training-deployment-on-sagemaker-ai/solution-3-sagemaker-hyperpod-k8/pod-finetuning.yaml:
--------------------------------------------------------------------------------
  1 | apiVersion: v1
  2 | kind: Service
  3 | metadata:
  4 |   name: etcd
  5 | spec:
  6 |   ports:
  7 |     - name: etcd-client-port
  8 |       port: 2379
  9 |       protocol: TCP
 10 |       targetPort: 2379
 11 |   selector:
 12 |     app: etcd
 13 | 
 14 | ---
 15 | apiVersion: apps/v1
 16 | kind: Deployment
 17 | metadata:
 18 |   labels:
 19 |     app: etcd
 20 |   name: etcd
 21 | spec:
 22 |   replicas: 1
 23 |   selector:
 24 |     matchLabels:
 25 |       app: etcd
 26 |   template:
 27 |     metadata:
 28 |       labels:
 29 |         app: etcd
 30 |     spec:
 31 |       containers:
 32 |         - name: etcd
 33 |           command: ["/usr/local/bin/etcd"]
 34 |           args:
 35 |             - "--data-dir"
 36 |             - "/var/lib/etcd"
 37 |             - "--enable-v2"
 38 |             - "--listen-client-urls"
 39 |             - "http://0.0.0.0:2379"
 40 |             - "--advertise-client-urls"
 41 |             - "http://0.0.0.0:2379"
 42 |             - "--initial-cluster-state"
 43 |             - "new"
 44 |           image: quay.io/coreos/etcd:v3.5.19
 45 |           ports:
 46 |             - containerPort: 2379
 47 |               name: client
 48 |               protocol: TCP
 49 |             - containerPort: 2380
 50 |               name: server
 51 |               protocol: TCP
 52 |       restartPolicy: Always
 53 | ---
 54 | apiVersion: "kubeflow.org/v1"
 55 | kind: PyTorchJob
 56 | metadata:
 57 |   name: deepseek-r1-distill-qwen-7b-fine-tuning
 58 | spec:
 59 |   elasticPolicy:
 60 |     rdzvBackend: etcd
 61 |     rdzvHost: etcd
 62 |     rdzvPort: 2379
 63 |     minReplicas: 1
 64 |     maxReplicas: 64
 65 |     maxRestarts: 100
 66 |     metrics:
 67 |       - type: Resource
 68 |         resource:
 69 |           name: cpu
 70 |           target:
 71 |             type: Utilization
 72 |             averageUtilization: 90
 73 |   pytorchReplicaSpecs:
 74 |     Worker:
 75 |       replicas: 2
 76 |       restartPolicy: OnFailure
 77 |       template:
 78 |         metadata:
 79 |           labels:
 80 |             app: deepseek-r1-distill-qwen-7b-fine-tuning
 81 |         spec:
 82 |           volumes:
 83 |             - name: shmem
 84 |               hostPath:
 85 |                 path: /dev/shm
 86 |             - name: local
 87 |               hostPath:
 88 |                 path: /mnt/k8s-disks/0
 89 |             - name: fsx-volume
 90 |               persistentVolumeClaim:
 91 |                 claimName: fsx-claim
 92 |           serviceAccountName: eks-hyperpod-sa # Must match association
 93 |           containers:
 94 |             - name: pytorch
 95 |               image: 763104351884.dkr.ecr.us-east-1.amazonaws.com/pytorch-training:2.5.1-gpu-py311-cu124-ubuntu22.04-ec2
 96 |               imagePullPolicy: Always
 97 |               resources:
 98 |                 requests:
 99 |                   nvidia.com/gpu: 1
100 |                   vpc.amazonaws.com/efa: 1
101 |                 limits:
102 |                   nvidia.com/gpu: 1
103 |                   vpc.amazonaws.com/efa: 1
104 |               env:
105 |                 - name: LOGLEVEL
106 |                   value: "DEBUG"
107 |                 - name: TORCH_DISTRIBUTED_DEBUG
108 |                   value: "DETAIL"
109 |                 - name: TORCH_NCCL_ENABLE_MONITORING
110 |                   value: "1"
111 |                 - name: TORCH_NCCL_TRACE_BUFFER_SIZE
112 |                   value: "20000"
113 |                 - name: TORCH_NCCL_DUMP_ON_TIMEOUT
114 |                   value: "1"
115 |                 - name: TORCH_NCCL_DEBUG_INFO_TEMP_FILE
116 |                   value: "/local/nccl_trace_rank_"
117 |                 - name: PYTORCH_CUDA_ALLOC_CONF
118 |                   value: "expandable_segments:True"
119 |                 - name: NCCL_DEBUG
120 |                   value: "INFO"
121 |                 - name: NCCL_SOCKET_IFNAME
122 |                   value: "^lo"
123 |                 - name: TORCH_NCCL_ASYNC_ERROR_HANDLING
124 |                   value: "1"
125 |               command:
126 |                 - /bin/bash
127 |                 - -c
128 |                 - |
129 |                   pip install -r /data/<STUDIO_USER_PROFILE>/solution-3-sagemaker-hyperpod-k8/requirements.txt && \
130 |                   torchrun \
131 |                   --nnodes=2 \
132 |                   --nproc_per_node=4 \
133 |                   /data/<STUDIO_USER_PROFILE>/solution-3-sagemaker-hyperpod-k8/scripts/train.py \
134 |                   --config /data/<STUDIO_USER_PROFILE>/solution-3-sagemaker-hyperpod-k8/args.yaml
135 |               volumeMounts:
136 |                 - name: shmem
137 |                   mountPath: /dev/shm
138 |                 - name: local
139 |                   mountPath: /local
140 |                 - name: fsx-volume
141 |                   mountPath: /data
142 | 


--------------------------------------------------------------------------------
/workshops/distributed-training-deployment-on-sagemaker-ai/solution-3-sagemaker-hyperpod-k8/requirements.txt:
--------------------------------------------------------------------------------
 1 | python-etcd
 2 | transformers==4.48.2
 3 | peft==0.14.0
 4 | accelerate==1.3.0
 5 | bitsandbytes==0.45.1
 6 | datasets==3.2.0
 7 | evaluate==0.4.3
 8 | huggingface_hub[hf_transfer]
 9 | mlflow
10 | safetensors>=0.4.5
11 | sagemaker==2.243.0
12 | sagemaker-mlflow==0.1.0
13 | sentencepiece==0.2.0
14 | scikit-learn==1.6.1
15 | tokenizers>=0.21.0
16 | trl==0.9.6
17 | psutil
18 | py7zr
19 | pynvml
20 | wandb
21 | 


--------------------------------------------------------------------------------
/workshops/distributed-training-deployment-on-sagemaker-ai/solution-3-sagemaker-hyperpod-k8/scripts/dataprep.py:
--------------------------------------------------------------------------------
 1 | from datasets import load_dataset, Dataset, DatasetDict
 2 | import os
 3 | import pandas as pd
 4 | from random import randint
 5 | from sklearn.model_selection import train_test_split
 6 | from transformers import AutoTokenizer
 7 | 
 8 | DATASET_NAME = "NousResearch/hermes-function-calling-v1"
 9 | MODEL_ID = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
10 | HF_TOKEN = ""
11 | 
12 | 
13 | def read_dataset(dataset_name):
14 |     dataset = load_dataset(
15 |         dataset_name, data_files={"train": ["json-mode-agentic.json"]}
16 |     )
17 | 
18 |     df = pd.DataFrame(dataset["train"])
19 | 
20 |     train, test = train_test_split(df, test_size=0.1, random_state=42)
21 | 
22 |     return train, test
23 | 
24 | 
25 | def prompt_format(df):
26 |     train_dataset = Dataset.from_pandas(train)
27 |     test_dataset = Dataset.from_pandas(test)
28 | 
29 |     for index, el in df.iterrows():
30 |         chat = tokenizer.apply_chat_template(el["conversations"], tokenize=False)
31 | 
32 |     dataset = DatasetDict({"train": train_dataset, "test": test_dataset})
33 | 
34 |     train_dataset = dataset["train"].map(
35 |         template_dataset, remove_columns=list(dataset["train"].features)
36 |     )
37 | 
38 |     test_dataset = dataset["test"].map(
39 |         template_dataset, remove_columns=list(dataset["test"].features)
40 |     )
41 | 
42 |     return train_dataset, test_dataset
43 | 
44 | 
45 | def transform_conversation(conversation):
46 |     transformed = []
47 |     for msg in conversation:
48 |         # Create a new dictionary with the renamed keys
49 |         new_msg = {
50 |             "role": (
51 |                 "user"
52 |                 if msg["from"] == "human"
53 |                 else "assistant" if msg["from"] == "gpt" else "system"
54 |             ),
55 |             "content": msg["value"],
56 |         }
57 |         transformed.append(new_msg)
58 |     return transformed
59 | 
60 | 
61 | if __name__ == "__main__":
62 |     if HF_TOKEN != "":
63 |         os.environ.update({"HF_TOKEN": HF_TOKEN})
64 | 
65 |     script_path = os.path.abspath(__file__)
66 |     script_dir = os.path.dirname(script_path)
67 |     parent_dir = os.path.dirname(script_dir)
68 |     parent_dir = f"/mnt/custom-file-systems/{'/'.join(parent_dir.split('/')[4:])}"
69 | 
70 |     tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
71 | 
72 |     train, test = read_dataset(DATASET_NAME)
73 | 
74 |     train["conversations"] = train["conversations"].apply(transform_conversation)
75 |     test["conversations"] = test["conversations"].apply(transform_conversation)
76 | 
77 |     train["text"] = train["conversations"].apply(
78 |         lambda x: tokenizer.apply_chat_template(x, tokenize=False)
79 |     )
80 |     test["text"] = test["conversations"].apply(
81 |         lambda x: tokenizer.apply_chat_template(x, tokenize=False)
82 |     )
83 | 
84 |     train = train[["text"]]
85 |     test = test[["text"]]
86 | 
87 |     train_dataset = Dataset.from_pandas(train)
88 |     test_dataset = Dataset.from_pandas(test)
89 | 
90 |     train_dataset.to_json(f"{parent_dir}/data/train/dataset.json", orient="records")
91 |     test_dataset.to_json(f"{parent_dir}/data/test/dataset.json", orient="records")
92 | 


--------------------------------------------------------------------------------
/workshops/diy-agents-with-sagemaker-and-bedrock/0-setup/setup-sagemaker-endpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "id": "09c6bf13",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "%pip install sagemaker boto3 litellm aiohttp -qU"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": null,
 16 |    "id": "5eb5e6d3",
 17 |    "metadata": {},
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "from IPython import get_ipython\n",
 21 |     "get_ipython().kernel.do_shutdown(True)"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "markdown",
 26 |    "id": "bb51e51a",
 27 |    "metadata": {},
 28 |    "source": [
 29 |     "## Deploy the model from SageMaker JumpStart on a SageMaker Inference endpoint\n",
 30 |     "\n",
 31 |     "> Note: skip the cell below if you have already deployed your model."
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": null,
 37 |    "id": "fd08268e",
 38 |    "metadata": {},
 39 |    "outputs": [],
 40 |    "source": [
 41 |     "from sagemaker.jumpstart.model import JumpStartModel\n",
 42 |     "from sagemaker.serializers import JSONSerializer\n",
 43 |     "from sagemaker.deserializers import JSONDeserializer\n",
 44 |     "from sagemaker.enums import EndpointType\n",
 45 |     "from sagemaker.compute_resource_requirements.resource_requirements import ResourceRequirements\n",
 46 |     "\n",
 47 |     "\n",
 48 |     "resources = ResourceRequirements(\n",
 49 |     "    requests = {\n",
 50 |     "        \"num_accelerators\": 4, # Number of accelerators required\n",
 51 |     "        \"memory\": 96*1024,  # Minimum memory required in Mb (required)\n",
 52 |     "        \"copies\": 1,\n",
 53 |     "    }\n",
 54 |     ")\n",
 55 |     "\n",
 56 |     "model = JumpStartModel(\n",
 57 |     "    model_id=\"huggingface-llm-mistral-small-24B-Instruct-2501\", model_version=\"2.0.1\",\n",
 58 |     "    instance_type=\"ml.g5.12xlarge\"\n",
 59 |     ")\n",
 60 |     "predictor = model.deploy(\n",
 61 |     "    accept_eula=True,\n",
 62 |     "    initial_instance_count=1,\n",
 63 |     "    instance_type=\"ml.g5.12xlarge\",\n",
 64 |     "    serializer=JSONSerializer(), deserializer=JSONDeserializer(),\n",
 65 |     "    endpoint_type=EndpointType.INFERENCE_COMPONENT_BASED,\n",
 66 |     "    resources=resources,\n",
 67 |     "    managed_instance_scaling={\n",
 68 |     "        \"MinInstanceCount\": 0,\n",
 69 |     "        \"MaxInstanceCount\": 1\n",
 70 |     "    }\n",
 71 |     ")"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": null,
 77 |    "id": "0e15b39c",
 78 |    "metadata": {},
 79 |    "outputs": [],
 80 |    "source": [
 81 |     "endpoint_name = predictor.endpoint_name\n",
 82 |     "component_name = predictor.component_name\n",
 83 |     "print(f\"Endpoint name: {endpoint_name}\")\n",
 84 |     "print(f\"Inference component name: {component_name}\")"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "markdown",
 89 |    "id": "ac12f8be",
 90 |    "metadata": {},
 91 |    "source": [
 92 |     "<div class=\"alert alert-block alert-info\">\n",
 93 |     "⚠️ <b>Note:</b> deployment will take 5~7 minutes. Take note of the endpoint name and the inference component names, as they will be needed later.\n",
 94 |     "</div>"
 95 |    ]
 96 |   }
 97 |  ],
 98 |  "metadata": {
 99 |   "language_info": {
100 |    "name": "python"
101 |   }
102 |  },
103 |  "nbformat": 4,
104 |  "nbformat_minor": 5
105 | }
106 | 


--------------------------------------------------------------------------------
/workshops/diy-agents-with-sagemaker-and-bedrock/1-inference/2-inference-sagemaker.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "id": "8038eb9f-d925-437b-8f2e-e9b4e78c8976",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "%pip install sagemaker boto3 litellm aiohttp -qU"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": null,
 16 |    "id": "681fe3e4",
 17 |    "metadata": {},
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "from IPython import get_ipython\n",
 21 |     "get_ipython().kernel.do_shutdown(True)"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "markdown",
 26 |    "id": "18881de7-0fc6-4490-9992-a12fd05da7eb",
 27 |    "metadata": {},
 28 |    "source": [
 29 |     "## Inference with Amazon SageMaker AI"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": null,
 35 |    "id": "67e0d089-1492-4615-b952-d12e96278dcb",
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "import boto3\n",
 40 |     "from sagemaker.session import Session\n",
 41 |     "from sagemaker.predictor import Predictor\n",
 42 |     "from sagemaker.serializers import JSONSerializer\n",
 43 |     "from sagemaker.deserializers import JSONDeserializer\n",
 44 |     "\n",
 45 |     "endpoint_name = \"YOUR-ENDPOINT-NAME-HERE\"\n",
 46 |     "component_name = \"YOUR-INFERENCE-COMPONENT-NAME-HERE\"\n",
 47 |     "\n",
 48 |     "boto_session = boto3.session.Session(region_name=boto3.Session().region_name)\n",
 49 |     "session = Session(boto_session=boto_session)\n",
 50 |     "\n",
 51 |     "predictor = Predictor(\n",
 52 |     "    sagemaker_session=session,\n",
 53 |     "    endpoint_name=endpoint_name, component_name=component_name,\n",
 54 |     "    serializer=JSONSerializer(), deserializer=JSONDeserializer()\n",
 55 |     ")"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": null,
 61 |    "id": "6a99d49a-6b06-49cb-b32c-412e4a0a6e44",
 62 |    "metadata": {},
 63 |    "outputs": [],
 64 |    "source": [
 65 |     "%%time\n",
 66 |     "prompt = \"What is the town of Bari, Italy, known for?\"\n",
 67 |     "payload = {\n",
 68 |     "    \"messages\": [\n",
 69 |     "        {\n",
 70 |     "            \"role\": \"user\",\n",
 71 |     "            \"content\": prompt\n",
 72 |     "        }\n",
 73 |     "    ],\n",
 74 |     "    \"max_tokens\": 4*1024,\n",
 75 |     "    \"temperature\": 0.1,\n",
 76 |     "    \"top_p\": 0.9,\n",
 77 |     "}\n",
 78 |     "\n",
 79 |     "response = predictor.predict(payload)\n",
 80 |     "print(response['choices'][0]['message']['content'])"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "markdown",
 85 |    "id": "9712cd57-f8d8-4f60-8813-91e2951092cb",
 86 |    "metadata": {},
 87 |    "source": [
 88 |     "### Using Boto3"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": null,
 94 |    "id": "a5e86c2f-b0cf-428e-b06b-4ad74637c1a6",
 95 |    "metadata": {},
 96 |    "outputs": [],
 97 |    "source": [
 98 |     "%%time\n",
 99 |     "import boto3\n",
100 |     "import json\n",
101 |     "\n",
102 |     "payload = {\n",
103 |     "    \"inputs\": \"What is the town of Bari, Italy, known for? Provide a short answer.\",\n",
104 |     "    \"parameters\": {\n",
105 |     "        \"max_new_tokens\": 4*1024,\n",
106 |     "        \"top_p\": 0.9,\n",
107 |     "        \"temperature\": 0.2,\n",
108 |     "    }\n",
109 |     "}\n",
110 |     "\n",
111 |     "runtime = boto3.client('sagemaker-runtime', region_name=boto3.Session().region_name)\n",
112 |     "response = runtime.invoke_endpoint(\n",
113 |     "    EndpointName=endpoint_name,\n",
114 |     "    InferenceComponentName=component_name or None,\n",
115 |     "    ContentType='application/json',\n",
116 |     "    Body=json.dumps(payload)\n",
117 |     ")\n",
118 |     "\n",
119 |     "result = json.loads(response['Body'].read().decode())\n",
120 |     "print(result['generated_text'])"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "markdown",
125 |    "id": "9fe7fd8e-de2c-4353-8ea9-18455efc7db7",
126 |    "metadata": {},
127 |    "source": [
128 |     "### Using Boto3 and the Messages API (for compatible models only)"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": null,
134 |    "id": "fcff55c3-5610-4298-9880-a83668c34a63",
135 |    "metadata": {},
136 |    "outputs": [],
137 |    "source": [
138 |     "%%time\n",
139 |     "payload = {\n",
140 |     "    \"messages\": [\n",
141 |     "        {\"role\": \"system\", \"content\": \"You are a helpful and honest assistant.\"},\n",
142 |     "        {\"role\": \"user\", \"content\": \"What is the town of Bari, Italy, known for? Provide a short answer.\"}\n",
143 |     "    ],\n",
144 |     "    \"max_tokens\": 4*1024,\n",
145 |     "    \"top_p\": 0.9,\n",
146 |     "    \"temperature\": 0.6,\n",
147 |     "}\n",
148 |     "\n",
149 |     "response = runtime.invoke_endpoint(\n",
150 |     "    EndpointName=endpoint_name,\n",
151 |     "    InferenceComponentName=component_name,\n",
152 |     "    ContentType='application/json',\n",
153 |     "    Body=json.dumps(payload)\n",
154 |     ")\n",
155 |     "\n",
156 |     "result = json.loads(response['Body'].read().decode())\n",
157 |     "print(result['choices'][0]['message'])"
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "markdown",
162 |    "id": "0ee0c9e7-c4b4-4401-a666-0f561bb8f24e",
163 |    "metadata": {},
164 |    "source": [
165 |     "## Using LiteLLM"
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "code",
170 |    "execution_count": null,
171 |    "id": "0c3f2f8e-e740-46b8-b136-666de9613c13",
172 |    "metadata": {},
173 |    "outputs": [],
174 |    "source": [
175 |     "from litellm import completion\n",
176 |     "\n",
177 |     "\n",
178 |     "response = completion(\n",
179 |     "    model=f\"sagemaker/{endpoint_name}\", \n",
180 |     "    model_id=component_name,\n",
181 |     "    messages=[\n",
182 |     "        {\"role\": \"system\", \"content\": \"You are a helpful and honest assistant.\"},\n",
183 |     "        {\"role\": \"user\", \"content\": \"What is the town of Bari, Italy, known for? Provide a short answer.\"}\n",
184 |     "    ],\n",
185 |     "    temperature=0.2,\n",
186 |     "    max_tokens=1024\n",
187 |     ")\n",
188 |     "response.choices[0].message.content"
189 |    ]
190 |   },
191 |   {
192 |    "cell_type": "markdown",
193 |    "id": "361da444",
194 |    "metadata": {},
195 |    "source": [
196 |     "<div class=\"alert alert-block alert-info\">\n",
197 |     "⚠️ <b>Important:</b> as of LiteLLM v1.67.2, `sagemaker_chat` provider does not not correctly pass the inference component name, causing `HTTPStatusError: Client error '400 Bad Request'`. Please use `sagemaker` provider instead.\n",
198 |     "</div>"
199 |    ]
200 |   }
201 |  ],
202 |  "metadata": {
203 |   "kernelspec": {
204 |    "display_name": ".venv",
205 |    "language": "python",
206 |    "name": "python3"
207 |   },
208 |   "language_info": {
209 |    "codemirror_mode": {
210 |     "name": "ipython",
211 |     "version": 3
212 |    },
213 |    "file_extension": ".py",
214 |    "mimetype": "text/x-python",
215 |    "name": "python",
216 |    "nbconvert_exporter": "python",
217 |    "pygments_lexer": "ipython3",
218 |    "version": "3.12.9"
219 |   }
220 |  },
221 |  "nbformat": 4,
222 |  "nbformat_minor": 5
223 | }
224 | 


--------------------------------------------------------------------------------
/workshops/diy-agents-with-sagemaker-and-bedrock/1-inference/README.md:
--------------------------------------------------------------------------------
 1 | # Inference with Amazon Bedrock and Amazon SageMaker AI
 2 | 
 3 | By running the notebooks in this folder, you will learn:
 4 | 
 5 | - how to invoke an Amazon Bedrock model using AWS SDK for Python (`boto3`)
 6 | - how to invoke an Amazon Bedrock model using LiteLLM
 7 | - how to invoke a model hosted on Amazon SageMaker AI inference endpoints using AWS SDK for Python (`boto3`)
 8 | - how to invoke a model hosted on Amazon SageMaker AI inference endpoints using the Amazon SageMaker Python SDK
 9 | - how to invoke a model hosted on Amazon SageMaker AI inference endpoints using LiteLLM
10 | 


--------------------------------------------------------------------------------
/workshops/diy-agents-with-sagemaker-and-bedrock/2-tool-calling/2-tool-calling-sagemaker.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "id": "425e8538",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "%pip install sagemaker boto3 sagemaker litellm aiohttp -qU"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": null,
 16 |    "id": "d2c66240",
 17 |    "metadata": {},
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "from IPython import get_ipython\n",
 21 |     "get_ipython().kernel.do_shutdown(True)"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "markdown",
 26 |    "id": "5fbb3aca",
 27 |    "metadata": {},
 28 |    "source": [
 29 |     "# Tool calling with Amazon SageMaker AI\n",
 30 |     "\n",
 31 |     "<div class=\"alert alert-block alert-info\">\n",
 32 |     "<center>Make sure you've deployed the model according to the previous lab before proceeding.</center>\n",
 33 |     "</div>\n",
 34 |     "\n",
 35 |     "Amazon SageMaker AI APIs do not natively support tool calling. To achieve this, we have to embed the tool definition in the prompt we send to the model. We recommend using models that have been fine-tuned for function calling in order to make sure tool calling works as expected."
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": null,
 41 |    "id": "d9e893fb",
 42 |    "metadata": {},
 43 |    "outputs": [],
 44 |    "source": [
 45 |     "try: \n",
 46 |     "    predictor\n",
 47 |     "except:\n",
 48 |     "    import boto3\n",
 49 |     "    from sagemaker.session import Session\n",
 50 |     "    from sagemaker.predictor import Predictor\n",
 51 |     "    from sagemaker.serializers import JSONSerializer\n",
 52 |     "    from sagemaker.deserializers import JSONDeserializer\n",
 53 |     "    \n",
 54 |     "    endpoint_name = input(\"> Enter your endpoint name: \")\n",
 55 |     "    component_name = input(\"> Enter your inference component name (leave empty if not using a component): \") or None\n",
 56 |     "\n",
 57 |     "    boto_session = boto3.session.Session(region_name=boto3.Session().region_name)\n",
 58 |     "    session = Session(boto_session=boto_session)\n",
 59 |     "    \n",
 60 |     "    predictor = Predictor(\n",
 61 |     "        sagemaker_session=session,\n",
 62 |     "        endpoint_name=endpoint_name, component_name=component_name,\n",
 63 |     "        serializer=JSONSerializer(), deserializer=JSONDeserializer()\n",
 64 |     "    )"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": null,
 70 |    "id": "2a5a2ad1-b58a-42e3-a5b4-6e44fd2b2cce",
 71 |    "metadata": {},
 72 |    "outputs": [],
 73 |    "source": [
 74 |     "def get_top_song(sign):\n",
 75 |     "    \"\"\"Returns the most popular song for the requested station.\n",
 76 |     "    Args:\n",
 77 |     "        call_sign (str): The call sign for the station for which you want\n",
 78 |     "        the most popular song.\n",
 79 |     "\n",
 80 |     "    Returns:\n",
 81 |     "        response (json): The most popular song and artist.\n",
 82 |     "    \"\"\"\n",
 83 |     "\n",
 84 |     "    song = \"\"\n",
 85 |     "    artist = \"\"\n",
 86 |     "    if sign == 'WZPZ':\n",
 87 |     "        song = \"Elemental Hotel\"\n",
 88 |     "        artist = \"8 Storey Hike\"\n",
 89 |     "\n",
 90 |     "    else:\n",
 91 |     "        raise Exception(f\"Station {sign} not found.\")\n",
 92 |     "\n",
 93 |     "    return {\n",
 94 |     "        \"song\": song,\n",
 95 |     "        \"artist\": artist\n",
 96 |     "    }"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "markdown",
101 |    "id": "61008585-1216-4b43-9cf6-cbeef6133a9c",
102 |    "metadata": {},
103 |    "source": [
104 |     "In order for the LLM to know that it can use this tool, we have to pass the tool definition to the LLM."
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": null,
110 |    "id": "8dfe3413-cb63-47b1-8050-88170e6c6fbf",
111 |    "metadata": {},
112 |    "outputs": [],
113 |    "source": [
114 |     "tools = [\n",
115 |     "    {\n",
116 |     "        \"type\": \"function\",\n",
117 |     "        \"function\": {\n",
118 |     "            \"name\": \"get_top_song\",\n",
119 |     "            \"description\": \"Get the most popular song played on a radio station.\",\n",
120 |     "            \"parameters\": {\n",
121 |     "                \"type\": \"object\",\n",
122 |     "                \"properties\": {\n",
123 |     "                    \"sign\": {\n",
124 |     "                        \"type\": \"string\",\n",
125 |     "                        \"description\": \"The call sign for the radio station for which you want the most popular song. Example calls signs are WZPZ and WKRP.\"\n",
126 |     "                    }\n",
127 |     "                },\n",
128 |     "                \"required\": [\"sign\"],\n",
129 |     "            },\n",
130 |     "        },\n",
131 |     "    }\n",
132 |     "]"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "markdown",
137 |    "id": "087c42f5-f61a-49f9-8225-49c15ffca562",
138 |    "metadata": {},
139 |    "source": [
140 |     "Now we can start conversing with the model."
141 |    ]
142 |   },
143 |   {
144 |    "cell_type": "code",
145 |    "execution_count": null,
146 |    "id": "061cd5f3-4b56-4670-ab13-56f7ea0be237",
147 |    "metadata": {},
148 |    "outputs": [],
149 |    "source": [
150 |     "input_text = \"What is the most popular song on WZPZ?\""
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "code",
155 |    "execution_count": null,
156 |    "id": "b72afc20-d847-48e3-96e1-1abc9e4252a1",
157 |    "metadata": {},
158 |    "outputs": [],
159 |    "source": [
160 |     "from datetime import datetime\n",
161 |     "\n",
162 |     "system_prompt = \"\"\"\\\n",
163 |     "You are an AI assistant, created by AWS and powered by Amazon SageMaker AI.\n",
164 |     "Your goal is to help the user by answering their questions honestly, helpfully and truthfully.\n",
165 |     "The current date is {currentDateTime} .\n",
166 |     "\n",
167 |     "Follow these principles when responding to queries:\n",
168 |     "1. Avoid tool calls if not needed\n",
169 |     "2. If uncertain, answer normally and offer to use tools\n",
170 |     "3. Always use the best tools for the query\n",
171 |     "\"\"\"\n",
172 |     "messages = [\n",
173 |     "    {'role':'system', 'content':system_prompt.format(currentDateTime=datetime.now())},\n",
174 |     "    {'role':'user', 'content':input_text}\n",
175 |     "]\n",
176 |     "payload = {'messages': messages, 'max_tokens': 4*1024, 'tools':tools, 'tool_choice':'auto'}"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "code",
181 |    "execution_count": null,
182 |    "id": "257853dd-365e-4843-bf10-394807aead0d",
183 |    "metadata": {
184 |     "scrolled": true
185 |    },
186 |    "outputs": [],
187 |    "source": [
188 |     "import boto3, json\n",
189 |     "\n",
190 |     "sagemaker_runtime = boto3.client(\"sagemaker-runtime\", region_name=boto3.Session().region_name)\n",
191 |     "response = sagemaker_runtime.invoke_endpoint(\n",
192 |     "    EndpointName=endpoint_name,\n",
193 |     "    InferenceComponentName=component_name or None,\n",
194 |     "    ContentType=\"application/json\",\n",
195 |     "    Body=json.dumps(payload)\n",
196 |     ")\n",
197 |     "output = json.loads(response['Body'].read().decode())\n",
198 |     "output"
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "code",
203 |    "execution_count": null,
204 |    "id": "f3903050-6e51-43d4-a366-7cc6955d29fc",
205 |    "metadata": {},
206 |    "outputs": [],
207 |    "source": [
208 |     "clean_message = {k: v or \"Thinking ...\" for k, v in output['choices'][0]['message'].items() if k in ['role', 'content']}\n",
209 |     "messages.append(clean_message)\n",
210 |     "messages"
211 |    ]
212 |   },
213 |   {
214 |    "cell_type": "code",
215 |    "execution_count": null,
216 |    "id": "78effb0c-13f8-497a-a14f-733702423161",
217 |    "metadata": {},
218 |    "outputs": [],
219 |    "source": [
220 |     "# If stop_reason == \"tool_calls\", then you need to perform tool calling!\n",
221 |     "stop_reason = output['choices'][0]['finish_reason']\n",
222 |     "tool_calls = output['choices'][0]['message']['tool_calls']\n",
223 |     "stop_reason, tool_calls"
224 |    ]
225 |   },
226 |   {
227 |    "cell_type": "code",
228 |    "execution_count": null,
229 |    "id": "43c11d16-7a35-4cad-8652-bda29d9b6df1",
230 |    "metadata": {},
231 |    "outputs": [],
232 |    "source": [
233 |     "import sys\n",
234 |     "if stop_reason == \"tool_calls\":\n",
235 |     "    tool_calls = output['choices'][0]['message']['tool_calls']\n",
236 |     "    for tool_call in tool_calls:\n",
237 |     "        if tool_call['type'] == 'function':\n",
238 |     "            name = tool_call['function']['name']\n",
239 |     "            args = json.loads(tool_call['function']['arguments'])\n",
240 |     "        # Execute the function with name from tool_call['function']['name']\n",
241 |     "        tool_foo = getattr(sys.modules[__name__], name)\n",
242 |     "        output = tool_foo(**args)\n",
243 |     "    output"
244 |    ]
245 |   },
246 |   {
247 |    "cell_type": "code",
248 |    "execution_count": null,
249 |    "id": "67923d59-5040-49d5-a231-cc7e3ffd21ff",
250 |    "metadata": {},
251 |    "outputs": [],
252 |    "source": [
253 |     "tool_result_message = {\n",
254 |     "    \"role\": \"user\", \"content\": json.dumps(output)\n",
255 |     "}\n",
256 |     "messages.append(tool_result_message)"
257 |    ]
258 |   },
259 |   {
260 |    "cell_type": "code",
261 |    "execution_count": null,
262 |    "id": "3536c762",
263 |    "metadata": {},
264 |    "outputs": [],
265 |    "source": [
266 |     "messages"
267 |    ]
268 |   },
269 |   {
270 |    "cell_type": "code",
271 |    "execution_count": null,
272 |    "id": "deeab939-6c51-4698-872d-70a29faca6f4",
273 |    "metadata": {},
274 |    "outputs": [],
275 |    "source": [
276 |     "payload = {'messages': messages, 'max_tokens': 4*1024}\n",
277 |     "response = sagemaker_runtime.invoke_endpoint(\n",
278 |     "    EndpointName=endpoint_name,\n",
279 |     "    InferenceComponentName=component_name or None,\n",
280 |     "    ContentType=\"application/json\",\n",
281 |     "    Body=json.dumps(payload)\n",
282 |     ")\n",
283 |     "output = json.loads(response['Body'].read().decode())\n",
284 |     "output"
285 |    ]
286 |   },
287 |   {
288 |    "cell_type": "code",
289 |    "execution_count": null,
290 |    "id": "a4e4f552",
291 |    "metadata": {},
292 |    "outputs": [],
293 |    "source": []
294 |   }
295 |  ],
296 |  "metadata": {
297 |   "kernelspec": {
298 |    "display_name": ".venv",
299 |    "language": "python",
300 |    "name": "python3"
301 |   },
302 |   "language_info": {
303 |    "codemirror_mode": {
304 |     "name": "ipython",
305 |     "version": 3
306 |    },
307 |    "file_extension": ".py",
308 |    "mimetype": "text/x-python",
309 |    "name": "python",
310 |    "nbconvert_exporter": "python",
311 |    "pygments_lexer": "ipython3",
312 |    "version": "3.12.9"
313 |   }
314 |  },
315 |  "nbformat": 4,
316 |  "nbformat_minor": 5
317 | }
318 | 


--------------------------------------------------------------------------------
/workshops/diy-agents-with-sagemaker-and-bedrock/2-tool-calling/README.md:
--------------------------------------------------------------------------------
1 | # Tool Calling with Amazon Bedrock and Amazon SageMaker AI
2 | 
3 | By running the notebooks in this folder, you will learn:
4 | 
5 | - how to perform tool calling with an Amazon Bedrock model using AWS SDK for Python (`boto3`)
6 | - how to perform tool calling with an Amazon Bedrock model using LiteLLM
7 | - how to perform tool calling with a model hosted on Amazon SageMaker AI inference endpoints using AWS SDK for Python (`boto3`)
8 | - how to invoke a model hosted on Amazon SageMaker AI inference endpoints using the Amazon SageMaker Python SDK
9 | 


--------------------------------------------------------------------------------
/workshops/diy-agents-with-sagemaker-and-bedrock/3-agent-patterns/README.md:
--------------------------------------------------------------------------------
1 | # Agentic Workflow Patterns
2 | 
3 | Agentic workflows represent a sophisticated approach to task automation that combines the power of LLMs with structured processes and tool integration. This section explores the fundamental characteristics and patterns that make agentic workflows effective.
4 | 


--------------------------------------------------------------------------------
/workshops/diy-agents-with-sagemaker-and-bedrock/4-frameworks/README.md:
--------------------------------------------------------------------------------
1 | # Using open-source frameworks
2 | 
3 | In this lab, you will learn how to use open-source frameworks with Amazon Bedrock and Amazon SageMaker AI to build autonomous agents.
4 | 


--------------------------------------------------------------------------------
/workshops/diy-agents-with-sagemaker-and-bedrock/4-frameworks/agno-ai/agno-ai-logistics.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "metadata": {},
  6 |       "source": [
  7 |         "# Getting Started with Agno AI\n",
  8 |         "\n",
  9 |         "[Agno](https://www.agno.com/) is a lightweight library for building Agents with memory, knowledge, tools and reasoning.\n",
 10 |         "\n",
 11 |         "Developers use Agno to build Reasoning Agents, Multimodal Agents, Teams of Agents and Agentic Workflows. Agno also provides a beautiful UI to chat with your Agents, pre-built FastAPI routes to serve your Agents and tools to monitor and evaluate their performance.\n",
 12 |         "\n",
 13 |         "This notebook will guide you through the basics of using the Agno AI library to create an agent that can handle logistics queries.\n",
 14 |         "\n",
 15 |         "## Table of Contents\n",
 16 |         "1. [Importing Libraries](#importing-libraries)\n",
 17 |         "2. [Defining Sample Data](#defining-sample-data)\n",
 18 |         "3. [Creating Tools](#creating-tools)\n",
 19 |         "4. [Creating the Agent](#creating-the-agent)\n",
 20 |         "5. [Testing the Agent](#testing-the-agent)"
 21 |       ]
 22 |     },
 23 |     {
 24 |       "cell_type": "markdown",
 25 |       "metadata": {},
 26 |       "source": [
 27 |         "### Importing Libraries\n",
 28 |         "First, we need to import the necessary libraries."
 29 |       ]
 30 |     },
 31 |     {
 32 |       "cell_type": "code",
 33 |       "execution_count": null,
 34 |       "metadata": {},
 35 |       "outputs": [],
 36 |       "source": [
 37 |         "%pip install agno boto3 --quiet --upgrade"
 38 |       ]
 39 |     },
 40 |     {
 41 |       "cell_type": "code",
 42 |       "execution_count": 1,
 43 |       "metadata": {},
 44 |       "outputs": [],
 45 |       "source": [
 46 |         "import re\n",
 47 |         "from itertools import permutations\n",
 48 |         "from agno.agent import Agent\n",
 49 |         "from agno.models.aws import AwsBedrock\n",
 50 |         "import boto3"
 51 |       ]
 52 |     },
 53 |     {
 54 |       "cell_type": "markdown",
 55 |       "metadata": {},
 56 |       "source": [
 57 |         "### Defining Sample Data\n",
 58 |         "Next, we define some sample data for shipments and distances."
 59 |       ]
 60 |     },
 61 |     {
 62 |       "cell_type": "code",
 63 |       "execution_count": 2,
 64 |       "metadata": {},
 65 |       "outputs": [],
 66 |       "source": [
 67 |         "tracking_data = {\n",
 68 |         "    \"TRK12345\": \"In transit at Toronto distribution center\",\n",
 69 |         "    \"TRK98765\": \"Delivered on 2025-03-09 10:24\",\n",
 70 |         "    \"TRK55555\": \"Out for delivery - last scanned at Vancouver hub\"\n",
 71 |         "}\n",
 72 |         "\n",
 73 |         "distance_matrix = {\n",
 74 |         "    \"Warehouse\": {\"A\": 10, \"B\": 15, \"C\": 20},\n",
 75 |         "    \"A\": {\"Warehouse\": 10, \"B\": 12, \"C\": 5},\n",
 76 |         "    \"B\": {\"Warehouse\": 15, \"A\": 12, \"C\": 8},\n",
 77 |         "    \"C\": {\"Warehouse\": 20, \"A\": 5, \"B\": 8}\n",
 78 |         "}"
 79 |       ]
 80 |     },
 81 |     {
 82 |       "cell_type": "markdown",
 83 |       "metadata": {},
 84 |       "source": [
 85 |         "### Creating Tools\n",
 86 |         "We create two tools: `TrackingTool` and `RouteTool`."
 87 |       ]
 88 |     },
 89 |     {
 90 |       "cell_type": "code",
 91 |       "execution_count": 3,
 92 |       "metadata": {},
 93 |       "outputs": [],
 94 |       "source": [
 95 |         "class TrackingTool:\n",
 96 |         "    def __init__(self):\n",
 97 |         "        self.name = \"TrackingTool\"\n",
 98 |         "        self.description = \"Provides shipment status updates given a tracking ID.\"\n",
 99 |         "\n",
100 |         "    def run(self, query: str) -> str:\n",
101 |         "        match = re.search(r\"\\bTRK\\d+\\b\", query.upper())\n",
102 |         "        if not match:\n",
103 |         "            return \"Please provide a valid tracking ID.\"\n",
104 |         "        tid = match.group(0)\n",
105 |         "        status = tracking_data.get(tid)\n",
106 |         "        return f\"Status for {tid}: {status}\" if status else f\"No information for {tid}.\"\n",
107 |         "\n",
108 |         "class RouteTool:\n",
109 |         "    def __init__(self):\n",
110 |         "        self.name = \"RouteTool\"\n",
111 |         "        self.description = \"Computes the best delivery route given a start and destinations.\"\n",
112 |         "\n",
113 |         "    def run(self, query: str) -> str:\n",
114 |         "        m = re.search(r\"from\\s+([\\w\\s]+)\\s+to\\s+(.+)\", query, re.IGNORECASE)\n",
115 |         "        if not m:\n",
116 |         "            return \"Specify route as 'from <Origin> to <Dest1>, <Dest2>, ...'.\"\n",
117 |         "        origin = m.group(1).strip()\n",
118 |         "        dests = [d.strip() for d in re.split(r\",| and \", m.group(2)) if d.strip()]\n",
119 |         "\n",
120 |         "        if origin not in distance_matrix:\n",
121 |         "            return f\"Unknown origin: {origin}.\"\n",
122 |         "        for loc in dests:\n",
123 |         "            if loc not in distance_matrix:\n",
124 |         "                return f\"Unknown destination: {loc}.\"\n",
125 |         "\n",
126 |         "        best_distance = float('inf')\n",
127 |         "        best_order = None\n",
128 |         "        for perm in permutations(dests):\n",
129 |         "            total = 0\n",
130 |         "            cur = origin\n",
131 |         "            for nxt in perm:\n",
132 |         "                total += distance_matrix[cur][nxt]\n",
133 |         "                cur = nxt\n",
134 |         "            if total < best_distance:\n",
135 |         "                best_distance = total\n",
136 |         "                best_order = perm\n",
137 |         "        route_plan = \" -> \".join([origin] + list(best_order)) if best_order else origin\n",
138 |         "        return f\"Optimal route: {route_plan} (Total distance: {best_distance} km)\""
139 |       ]
140 |     },
141 |     {
142 |       "cell_type": "markdown",
143 |       "metadata": {},
144 |       "source": [
145 |         "### Creating the Agent\n",
146 |         "Now, we create the agent using the AWS Bedrock model and the tools we defined."
147 |       ]
148 |     },
149 |     {
150 |       "cell_type": "code",
151 |       "execution_count": 4,
152 |       "metadata": {},
153 |       "outputs": [],
154 |       "source": [
155 |         "boto3_session = boto3.Session(region_name=\"us-west-2\")\n",
156 |         "agent = Agent(\n",
157 |         "    model=AwsBedrock(\n",
158 |         "        session=boto3_session,\n",
159 |         "        id=\"us.amazon.nova-pro-v1:0\",\n",
160 |         "        max_tokens=4096\n",
161 |         "    ),\n",
162 |         "    description=\"You are a knowledgeable logistics assistant.\",\n",
163 |         "    instructions=[\n",
164 |         "        \"If the user asks about a shipment or tracking ID, use the TrackingTool.\",\n",
165 |         "        \"If the user asks about route optimization or best route, use the RouteTool.\",\n",
166 |         "        \"Provide concise and clear answers, including relevant details from the tools.\"\n",
167 |         "    ],\n",
168 |         "    tools=[TrackingTool(), RouteTool()],\n",
169 |         "    show_tool_calls=False,\n",
170 |         "    markdown=True\n",
171 |         ")"
172 |       ]
173 |     },
174 |     {
175 |       "cell_type": "markdown",
176 |       "metadata": {},
177 |       "source": [
178 |         "### Testing the Agent\n",
179 |         "Finally, we test the agent with some sample queries."
180 |       ]
181 |     },
182 |     {
183 |       "cell_type": "code",
184 |       "execution_count": 7,
185 |       "metadata": {},
186 |       "outputs": [
187 |         {
188 |           "name": "stdout",
189 |           "output_type": "stream",
190 |           "text": [
191 |             "**Shipment Status for TRK12345**\n",
192 |             "\n",
193 |             "- **Current Status:** In Transit\n",
194 |             "- **Last Scanned:** 2023-10-05 14:30\n",
195 |             "- **Expected Delivery:** 2023-10-08\n",
196 |             "- **Location:** Sorting Facility, Chicago, IL\n",
197 |             "\n",
198 |             "**Details:**\n",
199 |             "- The package is currently at the sorting facility in Chicago, IL, and is expected to be delivered by October 8th.\n"
200 |           ]
201 |         }
202 |       ],
203 |       "source": [
204 |         "# Test tracking query\n",
205 |         "print(agent.run(\"Where is shipment TRK12345?\").content)"
206 |       ]
207 |     },
208 |     {
209 |       "cell_type": "code",
210 |       "execution_count": 8,
211 |       "metadata": {},
212 |       "outputs": [
213 |         {
214 |           "name": "stdout",
215 |           "output_type": "stream",
216 |           "text": [
217 |             "To find the best route from Warehouse to locations A, B, and C, I will use the RouteTool.\n",
218 |             "\n",
219 |             "**Route Optimization Results:**\n",
220 |             "\n",
221 |             "- **Starting Point:** Warehouse\n",
222 |             "- **Destinations:** A, B, C\n",
223 |             "\n",
224 |             "**Optimized Route:**\n",
225 |             "1. Warehouse → A\n",
226 |             "2. A → B\n",
227 |             "3. B → C\n",
228 |             "\n",
229 |             "**Total Distance:** Approximately 150 miles\n",
230 |             "\n",
231 |             "This route minimizes travel time and distance.\n"
232 |           ]
233 |         }
234 |       ],
235 |       "source": [
236 |         "\n",
237 |         "# Test route optimization query\n",
238 |         "print(agent.run(\"Find the best route from Warehouse to A, B and C\").content)"
239 |       ]
240 |     },
241 |     {
242 |       "cell_type": "code",
243 |       "execution_count": null,
244 |       "metadata": {},
245 |       "outputs": [],
246 |       "source": []
247 |     }
248 |   ],
249 |   "metadata": {
250 |     "kernelspec": {
251 |       "display_name": ".venv",
252 |       "language": "python",
253 |       "name": "python3"
254 |     },
255 |     "language_info": {
256 |       "codemirror_mode": {
257 |         "name": "ipython",
258 |         "version": 3
259 |       },
260 |       "file_extension": ".py",
261 |       "mimetype": "text/x-python",
262 |       "name": "python",
263 |       "nbconvert_exporter": "python",
264 |       "pygments_lexer": "ipython3",
265 |       "version": "3.12.9"
266 |     }
267 |   },
268 |   "nbformat": 4,
269 |   "nbformat_minor": 4
270 | }
271 | 


--------------------------------------------------------------------------------
/workshops/diy-agents-with-sagemaker-and-bedrock/4-frameworks/crewai/crewai-requirements.txt:
--------------------------------------------------------------------------------
1 | crewai
2 | crewai[tools]
3 | boto3
4 | botocore
5 | sagemaker
6 | duckduckgo-search


--------------------------------------------------------------------------------
/workshops/diy-agents-with-sagemaker-and-bedrock/4-frameworks/crewai/crewai-travel-flows.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "420bda15",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# Agentic Workflows with CrewAI Flows\n",
  9 |     "\n",
 10 |     "CrewAI Flows is a feature designed to streamline the creation and management of AI workflows. Flows allow developers to combine and coordinate coding tasks and Crews efficiently, providing a robust framework for building sophisticated AI automations. You can start easily by leveraging two decorators, `@start()` and `@listen()`:\n",
 11 |     "\n",
 12 |     "- `@start()`: the `@start()` decorator is used to mark a method as the starting point of a Flow; when a Flow is started, all the methods decorated with `@start()` are executed in parallel. You can have multiple start methods in a Flow, and they will all be executed when the Flow is started.\n",
 13 |     "- `@listen()`: the `@listen()` decorator is used to mark a method as a listener for the output of another task in the Flow. The method decorated with `@listen()` will be executed when the specified task emits an output. The method can access the output of the task it is listening to as an argument. The `@listen()` decorator can be used in several ways:\n",
 14 |     "    - Listening to a Method by Name: You can pass the name of the method you want to listen to as a string. When that method completes, the listener method will be triggered.\n",
 15 |     "    - Listening to a Method Directly: You can pass the method itself. When that method completes, the listener method will be triggered.​"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": null,
 21 |    "id": "1db1e0ce",
 22 |    "metadata": {},
 23 |    "outputs": [],
 24 |    "source": [
 25 |     "%pip install -r crewai-requirements.txt --quiet --upgrade"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": null,
 31 |    "id": "81118128",
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "import nest_asyncio\n",
 36 |     "nest_asyncio.apply()"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "markdown",
 41 |    "id": "ff038757",
 42 |    "metadata": {},
 43 |    "source": [
 44 |     "Start by configuring the LLM."
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": null,
 50 |    "id": "7d80570a",
 51 |    "metadata": {},
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "from crewai import LLM\n",
 55 |     "\n",
 56 |     "llm = LLM(\n",
 57 |     "    model=\"bedrock/us.amazon.nova-pro-v1:0\",    # Use Amazon Bedrock models \n",
 58 |     "    # model=\"sagemaker/INSERT ENDPOINT NAME\",     # Use Amazon SageMaker AI Inference\n",
 59 |     "    temperature=0.7, max_tokens=4*1024,\n",
 60 |     ")"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": null,
 66 |    "id": "13d696b0",
 67 |    "metadata": {},
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "llm.call(\"What is the capital of France?\")"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "markdown",
 75 |    "id": "cb8839b7",
 76 |    "metadata": {},
 77 |    "source": [
 78 |     "Now, set up the agent."
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": null,
 84 |    "id": "8441829c",
 85 |    "metadata": {},
 86 |    "outputs": [],
 87 |    "source": [
 88 |     "from dotenv import load_dotenv\n",
 89 |     "from crewai_tools import SerperDevTool\n",
 90 |     "from crewai.tools import tool\n",
 91 |     "from duckduckgo_search import DDGS\n",
 92 |     "import os\n",
 93 |     "\n",
 94 |     "load_dotenv()\n",
 95 |     "\n",
 96 |     "@tool('DuckDuckGoSearch')\n",
 97 |     "def search_with_duckduckgo(search_query: str):\n",
 98 |     "    \"\"\"Search the web for information on a given topic\"\"\"\n",
 99 |     "    return DDGS().text(search_query, max_results=5)\n",
100 |     "\n",
101 |     "if os.environ[\"SERPER_API_KEY\"]:\n",
102 |     "    search_tool = SerperDevTool()\n",
103 |     "    print(\"Using Serper API for search\")\n",
104 |     "else:\n",
105 |     "    search_tool = search_with_duckduckgo()\n",
106 |     "    print(\"No Serper API Key found - Using DuckDuckGo API for search\")"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": null,
112 |    "id": "18b6755d",
113 |    "metadata": {},
114 |    "outputs": [],
115 |    "source": [
116 |     "from crewai import Agent\n",
117 |     "from textwrap import dedent\n",
118 |     "\n",
119 |     "\n",
120 |     "researcher_agent = Agent(\n",
121 |     "    role=\"Travel Researcher\",\n",
122 |     "    goal=\"Research and compile interesting activities and attractions for a given location\",\n",
123 |     "    backstory=dedent(\n",
124 |     "        \"\"\"You are an experienced travel researcher with a knack for \n",
125 |     "        discovering both popular attractions and hidden gems in any \n",
126 |     "        location. Your expertise lies in gathering comprehensive \n",
127 |     "        information about various activities, their historical \n",
128 |     "        significance, and practical details for visitors.\n",
129 |     "        \"\"\"),\n",
130 |     "    llm=llm,\n",
131 |     "    allow_delegation=False, max_iter=4,\n",
132 |     "    tools=[search_tool],\n",
133 |     "    verbose=True,\n",
134 |     ")\n",
135 |     "\n",
136 |     "content_writer = Agent(\n",
137 |     "    role=\"Content Writer\",\n",
138 |     "    goal=\"Write a listicle of 5+ attractions/activities for a given location\",\n",
139 |     "    backstory=dedent(\n",
140 |     "        \"\"\"You are a content writer with a knack for creating engaging\n",
141 |     "        and informative content for travel blogs. Your expertise lies in\n",
142 |     "        crafting engaging and informative content for travel blogs.\n",
143 |     "        \"\"\"),\n",
144 |     "    llm=llm,\n",
145 |     "    allow_delegation=False, max_iter=4,\n",
146 |     "    verbose=True,\n",
147 |     ")\n",
148 |     "\n",
149 |     "editor_agent = Agent(\n",
150 |     "    role=\"Content Editor\",\n",
151 |     "    goal=\"Ensure the listicle is well-structured, engaging, and error-free\",\n",
152 |     "    backstory=dedent(\n",
153 |     "        \"\"\"You are a meticulous editor with years of experience in\n",
154 |     "        travel content. Your keen eye for detail helps polish articles\n",
155 |     "        to perfection. You focus on improving flow, maintaining\n",
156 |     "        consistency, and enhancing the overall readability of the\n",
157 |     "        content while ensuring it appeals to the target audience.\n",
158 |     "        \"\"\"),\n",
159 |     "    llm=llm,\n",
160 |     "    allow_delegation=False, max_iter=4,\n",
161 |     "    verbose=True,\n",
162 |     ")"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "markdown",
167 |    "id": "9c72352f",
168 |    "metadata": {},
169 |    "source": [
170 |     "Create the flow:\n",
171 |     "\n",
172 |     "1. Search online information about the city to visit\n",
173 |     "2. Write the content in the form of a listicle\n",
174 |     "3. Review and edit the content"
175 |    ]
176 |   },
177 |   {
178 |    "cell_type": "code",
179 |    "execution_count": null,
180 |    "id": "cf76dc47",
181 |    "metadata": {},
182 |    "outputs": [],
183 |    "source": [
184 |     "from crewai.flow.flow import Flow, listen, start\n",
185 |     "from pydantic import BaseModel\n",
186 |     "\n",
187 |     "\n",
188 |     "class TravelAgentFlow(Flow[str]):\n",
189 |     "    @start()\n",
190 |     "    async def search_online(self):\n",
191 |     "        query = \"Best things to do in {self.city}\"\n",
192 |     "        result = await researcher_agent.kickoff_async(query)\n",
193 |     "        return result\n",
194 |     "\n",
195 |     "    @listen(search_online)\n",
196 |     "    async def write_content(self, search_result):\n",
197 |     "        query = \"{search_result}\\n\\n Based on the search results, write a listicle of 5 things to do in {self.city}\"\n",
198 |     "        result = await content_writer.kickoff_async(query)\n",
199 |     "        return result\n",
200 |     "    \n",
201 |     "    @listen(write_content)\n",
202 |     "    async def edit_content(self, listicle):\n",
203 |     "        query = \"Review and edit the top 5 listicle article about things to do in {self.city}.\\n\\nContent:\\n{listicle}\\n\\nMake sure the content is well-structured, engaging, and error-free.\"\n",
204 |     "        result = await editor_agent.kickoff_async(query)\n",
205 |     "        return result\n",
206 |     "\n",
207 |     "# Run the flow\n",
208 |     "flow = TravelAgentFlow()\n",
209 |     "final_output = await flow.kickoff_async({\"city\": \"Paris\"})\n",
210 |     "print(\"---- Final Output ----\")\n",
211 |     "print(final_output)"
212 |    ]
213 |   },
214 |   {
215 |    "cell_type": "code",
216 |    "execution_count": null,
217 |    "id": "a9fab0f6",
218 |    "metadata": {},
219 |    "outputs": [],
220 |    "source": []
221 |   }
222 |  ],
223 |  "metadata": {
224 |   "kernelspec": {
225 |    "display_name": ".venv",
226 |    "language": "python",
227 |    "name": "python3"
228 |   },
229 |   "language_info": {
230 |    "codemirror_mode": {
231 |     "name": "ipython",
232 |     "version": 3
233 |    },
234 |    "file_extension": ".py",
235 |    "mimetype": "text/x-python",
236 |    "name": "python",
237 |    "nbconvert_exporter": "python",
238 |    "pygments_lexer": "ipython3",
239 |    "version": "3.12.9"
240 |   }
241 |  },
242 |  "nbformat": 4,
243 |  "nbformat_minor": 5
244 | }
245 | 


--------------------------------------------------------------------------------
/workshops/diy-agents-with-sagemaker-and-bedrock/4-frameworks/langgraph/langgraph-requirements.txt:
--------------------------------------------------------------------------------
 1 | boto3
 2 | botocore
 3 | sagemaker
 4 | langchain
 5 | langchain_aws
 6 | langchain_experimental
 7 | langchain-community
 8 | duckduckgo-search 
 9 | langgraph
10 | matplotlib


--------------------------------------------------------------------------------
/workshops/diy-agents-with-sagemaker-and-bedrock/4-frameworks/openai-agents-sdk/openai_agents_sdk_tutorial.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "metadata": {},
  6 |       "source": [
  7 |         "# Getting Started with the OpenAI Agents SDK\n",
  8 |         "\n",
  9 |         "This notebook will guide you through the basics of using the [OpenAI Agents SDK](https://github.com/openai/agents-sdk) with Amazon Bedrock. We'll cover how to set up your environment, create an agent, and run a simple example.\n",
 10 |         "\n",
 11 |         "## Prerequisites\n",
 12 |         "Before you begin, ensure you have the following:\n",
 13 |         "- Python installed\n",
 14 |         "- An AWS account with access to Amazon Bedrock\n",
 15 |         "- The `openai-agents-sdk` Python package installed\n",
 16 |         "\n",
 17 |         "You can install the `openai-agents` package using pip:"
 18 |       ]
 19 |     },
 20 |     {
 21 |       "cell_type": "code",
 22 |       "execution_count": 1,
 23 |       "metadata": {},
 24 |       "outputs": [
 25 |         {
 26 |           "name": "stdout",
 27 |           "output_type": "stream",
 28 |           "text": [
 29 |             "Note: you may need to restart the kernel to use updated packages.\n"
 30 |           ]
 31 |         }
 32 |       ],
 33 |       "source": [
 34 |         "%pip install openai-agents litellm boto3 --quiet --upgrade"
 35 |       ]
 36 |     },
 37 |     {
 38 |       "cell_type": "code",
 39 |       "execution_count": null,
 40 |       "metadata": {},
 41 |       "outputs": [],
 42 |       "source": [
 43 |         "from __future__ import annotations\n",
 44 |         "from agents import Agent, Runner, function_tool, set_tracing_disabled\n",
 45 |         "from agents.tool import FunctionTool\n",
 46 |         "\n",
 47 |         "set_tracing_disabled(disabled=True)"
 48 |       ]
 49 |     },
 50 |     {
 51 |       "cell_type": "markdown",
 52 |       "metadata": {},
 53 |       "source": [
 54 |         "## Converting an OpenAI Tool to a Bedrock Tool\n",
 55 |         "\n",
 56 |         "The `convert_openai_tool_to_bedrock_tool` function converts an OpenAI tool to a Bedrock tool. This is useful when you want to use an existing OpenAI tool with Amazon Bedrock."
 57 |       ]
 58 |     },
 59 |     {
 60 |       "cell_type": "code",
 61 |       "execution_count": 3,
 62 |       "metadata": {},
 63 |       "outputs": [],
 64 |       "source": [
 65 |         "def convert_openai_tool_to_bedrock_tool(tool: dict) -> FunctionTool:\n",
 66 |         "    \"\"\"Converts an OpenAI tool to a Bedrock tool.\"\"\"\n",
 67 |         "    return FunctionTool(\n",
 68 |         "        name=tool[\"name\"],\n",
 69 |         "        description=tool[\"description\"],\n",
 70 |         "        params_json_schema={\n",
 71 |         "            \"type\": \"object\",\n",
 72 |         "            \"properties\": { k: v for k, v in tool[\"params_json_schema\"][\"properties\"].items() },\n",
 73 |         "            \"required\": tool[\"params_json_schema\"].get(\"required\", []),\n",
 74 |         "        },\n",
 75 |         "        on_invoke_tool=tool[\"on_invoke_tool\"],\n",
 76 |         "    )"
 77 |       ]
 78 |     },
 79 |     {
 80 |       "cell_type": "markdown",
 81 |       "metadata": {},
 82 |       "source": [
 83 |         "## Creating a Simple Tool\n",
 84 |         "\n",
 85 |         "Let's create a simple tool that gets the weather for a given city."
 86 |       ]
 87 |     },
 88 |     {
 89 |       "cell_type": "code",
 90 |       "execution_count": 4,
 91 |       "metadata": {},
 92 |       "outputs": [],
 93 |       "source": [
 94 |         "@function_tool\n",
 95 |         "def get_weather(city: str):\n",
 96 |         "    \"\"\"Get the weather for a given city.\"\"\"\n",
 97 |         "    print(f\"[debug] getting weather for {city}\")\n",
 98 |         "    return f\"The weather in {city} is sunny.\""
 99 |       ]
100 |     },
101 |     {
102 |       "cell_type": "markdown",
103 |       "metadata": {},
104 |       "source": [
105 |         "## Creating an Agent\n",
106 |         "\n",
107 |         "Now, let's create an agent that uses the `get_weather` tool."
108 |       ]
109 |     },
110 |     {
111 |       "cell_type": "code",
112 |       "execution_count": 10,
113 |       "metadata": {},
114 |       "outputs": [
115 |         {
116 |           "name": "stderr",
117 |           "output_type": "stream",
118 |           "text": [
119 |             "/opt/homebrew/Cellar/python@3.12/3.12.9/Frameworks/Python.framework/Versions/3.12/lib/python3.12/typing.py:1217: RuntimeWarning: coroutine 'main' was never awaited\n",
120 |             "  super().__setattr__(attr, val)\n",
121 |             "RuntimeWarning: Enable tracemalloc to get the object allocation traceback\n"
122 |           ]
123 |         },
124 |         {
125 |           "name": "stdout",
126 |           "output_type": "stream",
127 |           "text": [
128 |             "[debug] getting weather for Tokyo\n",
129 |             "Sunny skies in Tokyo,\n",
130 |             "April 22, 2025 haiku.\n"
131 |           ]
132 |         }
133 |       ],
134 |       "source": [
135 |         "%autoawait asyncio\n",
136 |         "\n",
137 |         "agent = Agent(\n",
138 |         "    name=\"Assistant\",\n",
139 |         "    instructions=\"You only respond in haikus.\",\n",
140 |         "    model=\"litellm/bedrock/us.amazon.nova-pro-v1:0\",\n",
141 |         "    tools=[convert_openai_tool_to_bedrock_tool(get_weather.__dict__)],\n",
142 |         ")\n",
143 |         "\n",
144 |         "result = await Runner.run(agent, \"What's the weather in Tokyo today, April 22 2025?\")\n",
145 |         "print(result.final_output)"
146 |       ]
147 |     },
148 |     {
149 |       "cell_type": "code",
150 |       "execution_count": null,
151 |       "metadata": {},
152 |       "outputs": [],
153 |       "source": []
154 |     }
155 |   ],
156 |   "metadata": {
157 |     "kernelspec": {
158 |       "display_name": ".venv",
159 |       "language": "python",
160 |       "name": "python3"
161 |     },
162 |     "language_info": {
163 |       "codemirror_mode": {
164 |         "name": "ipython",
165 |         "version": 3
166 |       },
167 |       "file_extension": ".py",
168 |       "mimetype": "text/x-python",
169 |       "name": "python",
170 |       "nbconvert_exporter": "python",
171 |       "pygments_lexer": "ipython3",
172 |       "version": "3.12.9"
173 |     }
174 |   },
175 |   "nbformat": 4,
176 |   "nbformat_minor": 4
177 | }
178 | 


--------------------------------------------------------------------------------
/workshops/diy-agents-with-sagemaker-and-bedrock/4-frameworks/smolagents/smolagents-example.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "id": "126945bd-7490-46c6-80c8-64481f37f0f8",
  7 |    "metadata": {
  8 |     "scrolled": true
  9 |    },
 10 |    "outputs": [],
 11 |    "source": [
 12 |     "%pip install smolagents \"smolagents[litellm]\" -qU"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "markdown",
 17 |    "id": "a688cfe0-c98a-47e4-ba70-4646e079fa4e",
 18 |    "metadata": {},
 19 |    "source": [
 20 |     "# Building Autonomous Agents with smolagents\n",
 21 |     "\n",
 22 |     "[🤗 smolagents](https://huggingface.co/blog/smolagents) is a library by Hugging Face that enables you to run powerful agents in a few lines of code. It is based on the concept of CodeAct Agents ([arXiv:2402.01030](https://arxiv.org/abs/2402.01030)), i.e. agents that write their actions in code. In a multi-step agent, at each step, the LLM can write an action, in the form of some calls to external tools. A common format for writing these actions is generally different shades of \"writing actions as a JSON of tools names and arguments to use, which you then parse to know which tool to execute and with which arguments\". To make it secure, it supports executing in sandboxed environments. Multiple research papers have shown that having the tool calling LLMs in code is much better.\n",
 23 |     "\n",
 24 |     "In this example we will highlight how we can use a multi agent framework with tools to find the most downloaded model of a given task on the Hugging Face Hub. \n",
 25 |     "\n",
 26 |     "smolagents supports any LLM via LiteLLM integration. To use smolagents, we are going to leverage the LiteLLM implementation which allows us to use both Amazon Bedrock and Amazon SageMaker AI, according to our preferences:"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": null,
 32 |    "id": "cad5a4ab-c598-4357-b267-8a6d64c2d00d",
 33 |    "metadata": {},
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "from smolagents.models import LiteLLMModel\n",
 37 |     "\n",
 38 |     "# To use Amazon Bedrock:\n",
 39 |     "model = LiteLLMModel(model_id=f\"bedrock/us.amazon.nova-pro-v1:0\", max_tokens=5*1024)\n",
 40 |     "# To use Amazon SageMaker AI:\n",
 41 |     "# model = LiteLLMModel(model_id=f\"sagemaker_chat/YOUR-ENDPOINT-NAME-HERE\", max_tokens=5*1024)"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "markdown",
 46 |    "id": "5dceafe2-8fdb-4fb5-ac74-83e679029b99",
 47 |    "metadata": {},
 48 |    "source": [
 49 |     "Agents with smolagents can be easily created via two classes, `CodeAgent` and `ToolCallingAgent`:\n",
 50 |     "\n",
 51 |     "- **CodeAgent** generates executable Python code snippets, enabling complex logic and variable handling for tasks requiring multi-step operations or data manipulation; \n",
 52 |     "- **ToolCallingAgent** employs standardized JSON structures to define tool calls, aligning with common LLM provider implementations for simpler, structured interactions. \n",
 53 |     "\n",
 54 |     "According to `smolagents` developers, CodeAgents typically achieve better performance on complex benchmarks due to their code-first flexibility, while ToolCallingAgents suit systems prioritizing interoperability with existing tool-calling protocols. Both agent types share the same multi-step workflow but differ fundamentally in action representation and execution security considerations."
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": null,
 60 |    "id": "0a0c7b09-318f-4ca5-82f6-804d1cba22b7",
 61 |    "metadata": {
 62 |     "scrolled": true
 63 |    },
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "from smolagents import CodeAgent\n",
 67 |     "\n",
 68 |     "agent = CodeAgent(model=model, tools=[])\n",
 69 |     "agent.run(\"What is 123*456?\")"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "markdown",
 74 |    "id": "5d2d3796-d4cf-4a21-9a72-1b499661c045",
 75 |    "metadata": {},
 76 |    "source": [
 77 |     "Let's extend functionalities of agents using tools. As you've learnt already in the foundations/tools section, tools are functions or query engines that the agent can use to perform specific tasks. One nice feature of smolagents is that it comes with tools pre-packaged (called **base tools**) which can be easily added with:"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": null,
 83 |    "id": "09a7bff1-a3d2-498e-bd90-915222d57704",
 84 |    "metadata": {
 85 |     "scrolled": true
 86 |    },
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "agent = CodeAgent(model=model, add_base_tools=True, tools=[])\n",
 90 |     "agent.run(\"Can you explain the origin of the 'Hello World' program?\")"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "markdown",
 95 |    "id": "e81f7bb6-fee4-4ba0-8f4b-cd782daaddc7",
 96 |    "metadata": {},
 97 |    "source": [
 98 |     "The available base tools are listed in [smolagents doc](https://smolagents.org/docs/agents-guided-tour/#4-toc-title):\n",
 99 |     "\n",
100 |     "- **DuckDuckGo web search**: performs a web search using DuckDuckGo browser.\n",
101 |     "- **Python code interpreter**: runs your LLM generated Python code in a secure environment. This tool will only be added to ToolCallingAgent if you initialize it with add_base_tools=True, since code-based agent can already natively execute Python code\n",
102 |     "- **Transcriber**: a speech-to-text pipeline built on Whisper-Turbo that transcribes an audio to text.\n",
103 |     "\n",
104 |     "To create custom tools, we use `@tool` decorator which turns a function into a tool. For example, let's write a function that retrieve the most downloaded model from the HuggingFace Hub:"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": null,
110 |    "id": "4f38b043-df4b-42d9-a23c-a302a8abc30e",
111 |    "metadata": {
112 |     "scrolled": true
113 |    },
114 |    "outputs": [],
115 |    "source": [
116 |     "from smolagents import tool\n",
117 |     "from huggingface_hub import list_models\n",
118 |     "\n",
119 |     "@tool\n",
120 |     "def get_top_hf_model_from_task(task: str) -> str:\n",
121 |     "    \"\"\"\n",
122 |     "    This is a tool that returns the most downloaded model of a given task on the Hugging Face Hub.\n",
123 |     "    It returns the name of the checkpoint.\n",
124 |     "\n",
125 |     "    Args:\n",
126 |     "        task: The task for which\n",
127 |     "    \"\"\"\n",
128 |     "    most_downloaded_model = next(\n",
129 |     "        iter(list_models(filter=task, sort=\"downloads\", direction=-1))\n",
130 |     "    )\n",
131 |     "    return most_downloaded_model.id\n",
132 |     "\n",
133 |     "agent = CodeAgent(model=model, add_base_tools=True, tools=[get_top_hf_model_from_task])\n",
134 |     "agent.run(\"Can you give me the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub?\")"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "markdown",
139 |    "id": "5e07411f-01e6-466a-98ac-937fdb57e5cd",
140 |    "metadata": {},
141 |    "source": [
142 |     "### Multi-Agents\n",
143 |     "\n",
144 |     "You can easily build hierarchical multi-agent systems with smolagents. Here’s an example of making an agent that manages a specific web search agent using `DuckDuckGoSearchTool`:"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": null,
150 |    "id": "6b99a0e0-b081-4118-8ebd-cdbf000e2d5f",
151 |    "metadata": {},
152 |    "outputs": [],
153 |    "source": [
154 |     "from smolagents import CodeAgent, DuckDuckGoSearchTool, ToolCallingAgent\n",
155 |     "import litellm\n",
156 |     "\n",
157 |     "litellm.drop_params = True # Required to drop {\"tool_choice\": \"auto\"}, not supported by Bedrock/SageMaker\n",
158 |     "\n",
159 |     "web_agent = ToolCallingAgent(\n",
160 |     "    name=\"web_search_agent\",\n",
161 |     "    description=\"Runs web searches for you.\",\n",
162 |     "    model=model, max_steps=3,\n",
163 |     "    tools=[DuckDuckGoSearchTool(max_results=5)],\n",
164 |     ")\n",
165 |     "\n",
166 |     "manager_agent = CodeAgent(\n",
167 |     "    tools=[], model=model, managed_agents=[web_agent], max_steps=3\n",
168 |     ")\n",
169 |     "manager_agent.run(\"Who is the CEO of Amazon Web Services as of 2025?\")"
170 |    ]
171 |   },
172 |   {
173 |    "cell_type": "markdown",
174 |    "id": "9a0cf2f7-2ab3-4346-b14b-1f24c824a63f",
175 |    "metadata": {},
176 |    "source": [
177 |     "### Exercise: Build a multi-agent travel assistant\n",
178 |     "\n",
179 |     "Your task is to create a multi-agent travel assistant based on the notions you've learnt above. Here are the steps that you need to go through, to give you a starting point:\n",
180 |     "\n",
181 |     "1. Create one Supervisor agent, who is meant to act like a project manager for the team\n",
182 |     "2. Create a Travel Researcher agent, whose task is to research and compile interesting activities and attractions for a given location\n",
183 |     "3. Create a Travel Content Writer agent, whose task is to create engaging and informative content for the top 5 listicle\n",
184 |     "4. Create a Content Editor agent, whose task is to ensure the listicle is well-structured, engaging, and error-free"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "code",
189 |    "execution_count": null,
190 |    "id": "23f02850",
191 |    "metadata": {},
192 |    "outputs": [],
193 |    "source": []
194 |   }
195 |  ],
196 |  "metadata": {
197 |   "kernelspec": {
198 |    "display_name": ".venv",
199 |    "language": "python",
200 |    "name": "python3"
201 |   },
202 |   "language_info": {
203 |    "codemirror_mode": {
204 |     "name": "ipython",
205 |     "version": 3
206 |    },
207 |    "file_extension": ".py",
208 |    "mimetype": "text/x-python",
209 |    "name": "python",
210 |    "nbconvert_exporter": "python",
211 |    "pygments_lexer": "ipython3",
212 |    "version": "3.12.9"
213 |   }
214 |  },
215 |  "nbformat": 4,
216 |  "nbformat_minor": 5
217 | }
218 | 


--------------------------------------------------------------------------------
/workshops/diy-agents-with-sagemaker-and-bedrock/4-frameworks/strands-agents/strands_sagemaker.py:
--------------------------------------------------------------------------------
  1 | """Amazon SageMaker model provider."""
  2 | 
  3 | import json
  4 | import logging
  5 | import os
  6 | from dataclasses import dataclass
  7 | from typing import Any, Iterable, Literal, Optional, TypedDict, cast
  8 | 
  9 | import boto3
 10 | from botocore.config import Config as BotocoreConfig
 11 | from typing_extensions import Unpack, override
 12 | 
 13 | from strands.types.content import Messages
 14 | from strands.types.models import OpenAIModel
 15 | from strands.types.tools import ToolSpec
 16 | 
 17 | logger = logging.getLogger(__name__)
 18 | 
 19 | 
 20 | @dataclass
 21 | class UsageMetadata:
 22 |     """Usage metadata for the model.
 23 | 
 24 |     Attributes:
 25 |         total_tokens: Total number of tokens used in the request
 26 |         completion_tokens: Number of tokens used in the completion
 27 |         prompt_tokens: Number of tokens used in the prompt
 28 |         prompt_tokens_details: Additional information about the prompt tokens (optional)
 29 |     """
 30 |     total_tokens: int
 31 |     completion_tokens: int
 32 |     prompt_tokens: int
 33 |     prompt_tokens_details: Optional[int] = 0
 34 | 
 35 | 
 36 | @dataclass
 37 | class FunctionCall:
 38 |     """Function call for the model.
 39 | 
 40 |     Attributes:
 41 |         name: Name of the function to call
 42 |         arguments: Arguments to pass to the function
 43 |     """
 44 | 
 45 |     name: str
 46 |     arguments: str
 47 | 
 48 |     def __init__(self, **kwargs):
 49 |         """Initialize function call.
 50 | 
 51 |         Args:
 52 |             **kwargs: Keyword arguments for the function call.
 53 |         """
 54 |         self.name = kwargs.get("name")
 55 |         self.arguments = kwargs.get("arguments")
 56 | 
 57 | 
 58 | @dataclass
 59 | class ToolCall:
 60 |     """Tool call for the model object.
 61 | 
 62 |     Attributes:
 63 |         id: Tool call ID
 64 |         type: Tool call type
 65 |         function: Tool call function
 66 |     """
 67 | 
 68 |     id: str
 69 |     type: Literal["function"]
 70 |     function: FunctionCall
 71 | 
 72 |     def __init__(self, **kwargs):
 73 |         """Initialize tool call object.
 74 | 
 75 |         Args:
 76 |             **kwargs: Keyword arguments for the tool call.
 77 |         """
 78 |         self.id = kwargs.get("id")
 79 |         self.type = kwargs.get("type")
 80 |         self.function = FunctionCall(**kwargs.get("function"))
 81 | 
 82 | 
 83 | class SageMakerAIModel(OpenAIModel):
 84 |     """Amazon SageMaker model provider implementation.
 85 | 
 86 |     The implementation handles SageMaker-specific features such as:
 87 | 
 88 |     - Endpoint invocation
 89 |     - Tool configuration for function calling
 90 |     - Context window overflow detection
 91 |     - Endpoint not found error handling
 92 |     - Inference component capacity error handling with automatic retries
 93 |     """
 94 | 
 95 |     class SageMakerAIModelConfig(TypedDict, total=False):
 96 |         """Configuration options for SageMaker models.
 97 | 
 98 |         Attributes:
 99 |             endpoint_name: The name of the SageMaker endpoint to invoke
100 |             inference_component_name: The name of the inference component to use
101 |             max_tokens: Maximum number of tokens to generate in the response
102 |             stop_sequences: List of sequences that will stop generation when encountered
103 |             temperature: Controls randomness in generation (higher = more random)
104 |             top_p: Controls diversity via nucleus sampling (alternative to temperature)
105 |             additional_args: Any additional arguments to include in the request
106 |         """
107 | 
108 |         endpoint_name: str
109 |         inference_component_name: Optional[str]
110 |         max_tokens: Optional[int]
111 |         stop_sequences: Optional[list[str]]
112 |         temperature: Optional[float]
113 |         top_p: Optional[float]
114 |         additional_args: Optional[dict[str, Any]]
115 | 
116 |     def __init__(
117 |         self,
118 |         *,
119 |         boto_session: Optional[boto3.Session] = None,
120 |         boto_client_config: Optional[BotocoreConfig] = None,
121 |         region_name: Optional[str] = None,
122 |         **model_config: Unpack["SageMakerAIModelConfig"],
123 |     ):
124 |         """Initialize provider instance.
125 | 
126 |         Args:
127 |             boto_session: Boto Session to use when calling the SageMaker Runtime.
128 |             boto_client_config: Configuration to use when creating the SageMaker-Runtime Boto Client.
129 |             region_name: Name of the AWS region (e.g.: us-west-2)
130 |             **model_config: Model parameters for the SageMaker request payload.
131 |         """
132 |         self.config = dict(model_config)
133 | 
134 |         logger.debug("config=<%s> | initializing", self.config)
135 | 
136 |         session = boto_session or boto3.Session(
137 |             region_name=region_name or os.getenv("AWS_REGION") or "us-west-2",
138 |         )
139 | 
140 |         # Add strands-agents to the request user agent
141 |         if boto_client_config:
142 |             existing_user_agent = getattr(boto_client_config, "user_agent_extra", None)
143 | 
144 |             # Append 'strands-agents' to existing user_agent_extra or set it if not present
145 |             if existing_user_agent:
146 |                 new_user_agent = f"{existing_user_agent} strands-agents"
147 |             else:
148 |                 new_user_agent = "strands-agents"
149 | 
150 |             client_config = boto_client_config.merge(BotocoreConfig(user_agent_extra=new_user_agent))
151 |         else:
152 |             client_config = BotocoreConfig(user_agent_extra="strands-agents")
153 | 
154 |         self.client = session.client(
155 |             service_name="sagemaker-runtime",
156 |             config=client_config,
157 |         )
158 | 
159 |     @override
160 |     def update_config(self, **model_config: Unpack[SageMakerAIModelConfig]) -> None:  # type: ignore[override]
161 |         """Update the Amazon SageMaker model configuration with the provided arguments.
162 | 
163 |         Args:
164 |             **model_config: Configuration overrides.
165 |         """
166 |         self.config.update(model_config)
167 | 
168 |     @override
169 |     def get_config(self) -> SageMakerAIModelConfig:
170 |         """Get the Amazon SageMaker model configuration.
171 | 
172 |         Returns:
173 |             The Amazon SageMaker model configuration.
174 |         """
175 |         return cast(SageMakerAIModel.SageMakerAIModelConfig, self.config)
176 | 
177 |     @override
178 |     def format_request(
179 |         self, messages: Messages, tool_specs: Optional[list[ToolSpec]] = None, system_prompt: Optional[str] = None
180 |     ) -> dict[str, Any]:
181 |         """Format an Amazon SageMaker chat streaming request.
182 | 
183 |         Args:
184 |             messages: List of message objects to be processed by the model.
185 |             tool_specs: List of tool specifications to make available to the model.
186 |             system_prompt: System prompt to provide context to the model.
187 | 
188 |         Returns:
189 |             An Amazon SageMaker chat streaming request.
190 |         """
191 |         payload = {
192 |             "messages": self.format_request_messages(messages, system_prompt),
193 |             "tools": [
194 |                 {
195 |                     "type": "function",
196 |                     "function": {
197 |                         "name": tool_spec["name"],
198 |                         "description": tool_spec["description"],
199 |                         "parameters": tool_spec["inputSchema"]["json"],
200 |                     },
201 |                 }
202 |                 for tool_spec in tool_specs or []
203 |             ],
204 |             **({"max_tokens": self.config["max_tokens"]} if "max_tokens" in self.config else {}),
205 |             **({"temperature": self.config["temperature"]} if "temperature" in self.config else {}),
206 |             **({"top_p": self.config["top_p"]} if "top_p" in self.config else {}),
207 |             **({"stop": self.config["stop_sequences"]} if "stop_sequences" in self.config else {}),
208 |             **(
209 |                 self.config["additional_args"]
210 |                 if "additional_args" in self.config and self.config["additional_args"] is not None
211 |                 else {}
212 |             ),
213 |         }
214 | 
215 |         # Assistant message must have either content or tool_calls, but not both
216 |         for message in payload["messages"]:
217 |             if message.get("tool_calls", []) != []:
218 |                 _ = message.pop("content")
219 | 
220 |         # Format the request according to the SageMaker Runtime API requirements
221 |         request = {
222 |             "EndpointName": self.config["endpoint_name"],
223 |             "Body": json.dumps(payload),
224 |             "ContentType": "application/json",
225 |             "Accept": "application/json",
226 |         }
227 | 
228 |         # Add InferenceComponentName if provided
229 |         if self.config.get("inference_component_name"):
230 |             request["InferenceComponentName"] = self.config["inference_component_name"]
231 |         return request
232 | 
233 |     @override
234 |     def stream(self, request: dict[str, Any]) -> Iterable[dict[str, Any]]:
235 |         """Send the request to the Amazon SageMaker AI model and get the streaming response.
236 | 
237 |         This method calls the Amazon SageMaker AI chat API and returns the stream of response events.
238 | 
239 |         Args:
240 |             request: The formatted request to send to the Amazon SageMaker AI model.
241 | 
242 |         Returns:
243 |             An iterable of response events from the Amazon SageMaker AI model.
244 |         """
245 |         response = self.client.invoke_endpoint_with_response_stream(**request)
246 | 
247 |         # Wait until all the answer has been streamed
248 |         final_response = ""
249 |         for event in response["Body"]:
250 |             chunk_data = event["PayloadPart"]["Bytes"].decode("utf-8")
251 |             final_response += chunk_data
252 |         final_response_json = json.loads(final_response)
253 | 
254 |         # Obtain the key elements from the response
255 |         message = final_response_json["choices"][0]["message"]
256 |         message_stop_reason = final_response_json["choices"][0]["finish_reason"]
257 | 
258 |         # Message start
259 |         yield {"chunk_type": "message_start"}
260 | 
261 |         # Handle text
262 |         yield {"chunk_type": "content_start", "data_type": "text"}
263 |         yield {"chunk_type": "content_delta", "data_type": "text", "data": message["content"] or ""}
264 |         yield {"chunk_type": "content_stop", "data_type": "text"}
265 | 
266 |         # Handle the tool calling, if any
267 |         if message_stop_reason == "tool_calls":
268 |             for tool_call in message["tool_calls"] or []:
269 |                 yield {"chunk_type": "content_start", "data_type": "tool", "data": ToolCall(**tool_call)}
270 |                 yield {"chunk_type": "content_delta", "data_type": "tool", "data": ToolCall(**tool_call)}
271 |                 yield {"chunk_type": "content_stop", "data_type": "tool", "data": ToolCall(**tool_call)}
272 | 
273 |         # Message close
274 |         yield {"chunk_type": "message_stop", "data": message_stop_reason}
275 |         # Handle usage metadata
276 |         yield {"chunk_type": "metadata", "data": UsageMetadata(**final_response_json["usage"])}
277 | 


--------------------------------------------------------------------------------
/workshops/diy-agents-with-sagemaker-and-bedrock/5-observability/2-mlflow/crewai-requirements.txt:
--------------------------------------------------------------------------------
1 | crewai
2 | crewai[tools]
3 | boto3
4 | botocore
5 | sagemaker
6 | duckduckgo-search
7 | 


--------------------------------------------------------------------------------
/workshops/diy-agents-with-sagemaker-and-bedrock/5-observability/README.md:
--------------------------------------------------------------------------------
 1 | # Observability
 2 | 
 3 | Observability is a critical component when developing and deploying AI agents in production environments. As AI agents become more complex, involving multiple components, tools, and LLM calls, having visibility into their behavior becomes essential for debugging, optimization, and ensuring reliability.
 4 | 
 5 | ## Why Observability Matters for AI Agents
 6 | 
 7 | - **Transparency**: Observability provides insights into how agents make decisions, which tools they use, and how they process information, making the "black box" of AI more transparent.
 8 | - **Debugging**: When agents produce unexpected outputs or fail, observability tools help pinpoint where and why issues occurred in the execution flow.
 9 | - **Performance Optimization**: By tracking metrics like latency, token usage, and tool call frequency, developers can identify bottlenecks and optimize agent performance.
10 | - **Cost Management**: Monitoring token usage and API calls helps manage and optimize the costs associated with running AI agents at scale.
11 | - **Continuous Improvement**: Collecting data on agent behavior enables iterative improvement of prompts, tools, and overall agent design based on real-world usage patterns.
12 | 
13 | In this section, we explore two approaches to implementing observability for AI agents:
14 | 
15 | 1. **Langfuse**: An open-source observability platform specifically designed for LLM applications
16 | 2. **MLflow**: A versatile platform for managing ML workflows that can be used to track and trace agent executions
17 | 
18 | Both solutions provide valuable insights into agent behavior, helping you build more reliable, efficient, and cost-effective AI systems.
19 | 


--------------------------------------------------------------------------------
/workshops/diy-agents-with-sagemaker-and-bedrock/99-use-cases/mcp/server.py:
--------------------------------------------------------------------------------
 1 | import httpx
 2 | from mcp.server.fastmcp import FastMCP
 3 | 
 4 | mcp = FastMCP("My App")
 5 | 
 6 | 
 7 | @mcp.tool()
 8 | def calculate_bmi(weight_kg: float, height_m: float) -> float:
 9 |     """Calculate BMI given weight in kg and height in meters"""
10 |     return weight_kg / (height_m**2)
11 | 
12 | 
13 | @mcp.tool()
14 | async def fetch_weather(city: str) -> str:
15 |     """Fetch current weather for a city"""
16 |     async with httpx.AsyncClient() as client:
17 |         response = await client.get(f"https://api.weather.com/{city}")
18 |         return response.text
19 |     
20 | @mcp.tool()
21 | async def get_random_joke() -> str:
22 |     """Get a random joke"""
23 |     url = "https://official-joke-api.appspot.com/random_joke"
24 |     async with httpx.AsyncClient() as client:
25 |         response = await client.get(url)
26 |         joke = response.json()
27 |         return joke
28 | 
29 | if __name__ == "__main__":
30 |     mcp.run(transport="stdio")


--------------------------------------------------------------------------------
/workshops/diy-agents-with-sagemaker-and-bedrock/99-use-cases/sagemaker-endpoint-as-tool/README.md:
--------------------------------------------------------------------------------
 1 | # Using SageMaker Endpoints as Tools for Agents
 2 | 
 3 | This lab demonstrates how to integrate Amazon SageMaker endpoints as tools for AI agents, enabling them to leverage machine learning models for specialized tasks.
 4 | 
 5 | ## Overview
 6 | 
 7 | In this lab, you'll learn how to:
 8 | 
 9 | 1. Train and deploy a demand forecasting model on Amazon SageMaker
10 | 2. Create a tool interface that allows AI agents to invoke the SageMaker endpoint
11 | 3. Use the SageMaker endpoint as a specialized tool within an agent workflow
12 | 
13 | ## Architecture
14 | 
15 | ![endpoint-as-tool.png](endpoint-as-tool.png)
16 | 
17 | The solution follows this workflow:
18 | 
19 | 1. A time series forecasting model is trained using XGBoost on SageMaker
20 | 2. The model is deployed to a SageMaker endpoint for real-time inference
21 | 3. A tool interface is created using the Model Context Protocol (MCP)
22 | 4. The tool is integrated with an agent framework to enable AI agents to make predictions
23 | 
24 | ## Key Components
25 | 
26 | - **Amazon SageMaker**: For training and hosting the XGBoost forecasting model
27 | - **Model Context Protocol (MCP)**: For creating a standardized tool interface
28 | - **Strands Agents**: For building and orchestrating AI agents that use the SageMaker endpoint
29 | 
30 | ## Files Included
31 | 
32 | - `demand_forecasting.ipynb`: Jupyter notebook for data preparation and model exploration
33 | - `model-train-and-deploy.py`: Python script for training and deploying the XGBoost model
34 | - `script.py`: SageMaker training and inference script for the XGBoost model
35 | - `server.py`: MCP server implementation for the SageMaker endpoint tool
36 | - `strands-agents-sagemaker-as-tool.ipynb`: Example of using the SageMaker endpoint with agents
37 | 
38 | ## Prerequisites
39 | 
40 | - An AWS account with access to Amazon SageMaker AI
41 | - Basic understanding of machine learning concepts
42 | - Familiarity with Python and Jupyter notebooks
43 | 
44 | ## Getting Started
45 | 
46 | 1. Review the `demand_forecasting.ipynb` notebook to understand the data and model
47 | 2. Explore the `strands-agents-sagemaker-as-tool.ipynb` notebook to see the tool in action
48 | 
49 | ## Learning Objectives
50 | 
51 | By completing this lab, you will:
52 | 
53 | - Understand how to train and deploy ML models on Amazon SageMaker AI
54 | - Learn how to create tool interfaces for AI agents using MCP
55 | - Gain experience integrating specialized ML capabilities into agent workflows
56 | - See how agents can leverage ML models for enhanced decision-making
57 | 


--------------------------------------------------------------------------------
/workshops/diy-agents-with-sagemaker-and-bedrock/99-use-cases/sagemaker-endpoint-as-tool/endpoint-as-tool.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/diy-agents-with-sagemaker-and-bedrock/99-use-cases/sagemaker-endpoint-as-tool/endpoint-as-tool.png


--------------------------------------------------------------------------------
/workshops/diy-agents-with-sagemaker-and-bedrock/99-use-cases/sagemaker-endpoint-as-tool/script.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | 
  4 | """
  5 | XGBoost training and inference script for SageMaker
  6 | 
  7 | This script is used by SageMaker to train an XGBoost model for demand forecasting
  8 | and to serve predictions from a deployed endpoint.
  9 | """
 10 | 
 11 | import argparse
 12 | import os
 13 | import pandas as pd
 14 | import numpy as np
 15 | import xgboost as xgb
 16 | import json
 17 | import logging
 18 | import io
 19 | 
 20 | logger = logging.getLogger()
 21 | logger.setLevel(logging.INFO)
 22 | 
 23 | 
 24 | def parse_args():
 25 |     """Parse SageMaker training job arguments."""
 26 |     parser = argparse.ArgumentParser()
 27 | 
 28 |     # Hyperparameters sent by the client are passed as command-line arguments to the script
 29 |     parser.add_argument('--max_depth', type=int, default=6)
 30 |     parser.add_argument('--eta', type=float, default=0.2)
 31 |     parser.add_argument('--gamma', type=float, default=4)
 32 |     parser.add_argument('--min_child_weight', type=int, default=6)
 33 |     parser.add_argument('--subsample', type=float, default=0.8)
 34 |     parser.add_argument('--verbosity', type=int, default=1)
 35 |     parser.add_argument('--objective', type=str, default='reg:squarederror')
 36 |     parser.add_argument('--num_round', type=int, default=100)
 37 | 
 38 |     # Data directories
 39 |     parser.add_argument('--train', type=str, default=os.environ.get('SM_CHANNEL_TRAIN'))
 40 |     parser.add_argument('--validation', type=str, default=os.environ.get('SM_CHANNEL_VALIDATION'))
 41 |     
 42 |     # Model directory: this is where the model will be saved
 43 |     parser.add_argument('--model-dir', type=str, default=os.environ.get('SM_MODEL_DIR'))
 44 |     
 45 |     return parser.parse_args()
 46 | 
 47 | 
 48 | def load_data(data_dir):
 49 |     """Load training data from CSV file."""
 50 |     logger.info(f"Loading data from {data_dir}")
 51 |     
 52 |     # List files in the directory
 53 |     files = os.listdir(data_dir)
 54 |     csv_files = [f for f in files if f.endswith('.csv')]
 55 |     
 56 |     if not csv_files:
 57 |         raise ValueError(f"No CSV files found in {data_dir}")
 58 |     
 59 |     # Load the first CSV file
 60 |     data_path = os.path.join(data_dir, csv_files[0])
 61 |     df = pd.read_csv(data_path)
 62 |     
 63 |     # Separate features and target
 64 |     if 'demand' in df.columns:
 65 |         y = df['demand']
 66 |         X = df.drop(['demand'], axis=1)
 67 |     else:
 68 |         # If 'demand' column is not present, assume the last column is the target
 69 |         y = df.iloc[:, -1]
 70 |         X = df.iloc[:, :-1]
 71 |     
 72 |     return X, y
 73 | 
 74 | 
 75 | def train(args):
 76 |     """Train XGBoost model with the given arguments."""
 77 |     logger.info("Loading training data")
 78 |     X_train, y_train = load_data(args.train)
 79 |     
 80 |     logger.info("Loading validation data")
 81 |     X_val, y_val = load_data(args.validation)
 82 |     
 83 |     # Create DMatrix for XGBoost
 84 |     dtrain = xgb.DMatrix(X_train, label=y_train)
 85 |     dval = xgb.DMatrix(X_val, label=y_val)
 86 |     
 87 |     # Set XGBoost parameters
 88 |     params = {
 89 |         'max_depth': args.max_depth,
 90 |         'eta': args.eta,
 91 |         'gamma': args.gamma,
 92 |         'min_child_weight': args.min_child_weight,
 93 |         'subsample': args.subsample,
 94 |         'verbosity': args.verbosity,
 95 |         'objective': args.objective
 96 |     }
 97 |     
 98 |     # Train model
 99 |     logger.info("Training XGBoost model")
100 |     watchlist = [(dtrain, 'train'), (dval, 'validation')]
101 |     model = xgb.train(
102 |         params=params,
103 |         dtrain=dtrain,
104 |         num_boost_round=args.num_round,
105 |         evals=watchlist,
106 |         early_stopping_rounds=10
107 |     )
108 |     
109 |     # Save the model
110 |     logger.info(f"Saving model to {args.model_dir}")
111 |     model_path = os.path.join(args.model_dir, 'xgboost-model')
112 |     model.save_model(model_path)
113 |     
114 |     # Save feature names for inference
115 |     feature_names = X_train.columns.tolist()
116 |     with open(os.path.join(args.model_dir, 'feature_names.json'), 'w') as f:
117 |         json.dump(feature_names, f)
118 |     
119 |     return model
120 | 
121 | 
122 | def model_fn(model_dir):
123 |     """Load the XGBoost model for inference."""
124 |     # Load the XGBoost model
125 |     model_path = os.path.join(model_dir, 'xgboost-model')
126 |     model = xgb.Booster()
127 |     model.load_model(model_path)
128 |     
129 |     # Load feature names
130 |     feature_names_path = os.path.join(model_dir, 'feature_names.json')
131 |     with open(feature_names_path, 'r') as f:
132 |         feature_names = json.load(f)
133 |     
134 |     # Return both model and feature names
135 |     return {'model': model, 'feature_names': feature_names}
136 | 
137 | 
138 | def input_fn(request_body, request_content_type):
139 |     """Parse input data for prediction."""
140 |     if request_content_type == 'text/csv':
141 |         # Parse CSV input
142 |         data = io.StringIO(request_body.decode('utf-8') if isinstance(request_body, bytes) else request_body)
143 |         df = pd.read_csv(data, header=None)
144 |         return df
145 |     elif request_content_type == 'application/json':
146 |         # Parse JSON input
147 |         json_data = json.loads(request_body.decode('utf-8') if isinstance(request_body, bytes) else request_body)
148 |         # Handle both list of lists and dict with features
149 |         if isinstance(json_data, list):
150 |             df = pd.DataFrame(json_data)
151 |         else:
152 |             df = pd.DataFrame([json_data])
153 |         return df
154 |     else:
155 |         raise ValueError(f"Unsupported content type: {request_content_type}. Use 'text/csv' or 'application/json'.")
156 | 
157 | 
158 | def predict_fn(input_data, model_dict):
159 |     """Make predictions using the loaded model."""
160 |     # Extract model and feature names
161 |     model = model_dict['model']
162 |     feature_names = model_dict['feature_names']
163 |     
164 |     # Ensure input data has the correct columns/order
165 |     if len(input_data.columns) != len(feature_names):
166 |         raise ValueError(f"Input data has {len(input_data.columns)} features, but model expects {len(feature_names)}")
167 |     
168 |     # Convert to DMatrix for prediction
169 |     dmatrix = xgb.DMatrix(input_data.values)
170 |     
171 |     # Make prediction
172 |     predictions = model.predict(dmatrix)
173 |     
174 |     return predictions
175 | 
176 | 
177 | def output_fn(predictions, content_type):
178 |     """Format predictions for response."""
179 |     if content_type == 'application/json':
180 |         # Convert predictions to a list and return as JSON
181 |         predictions_list = predictions.tolist()
182 |         return json.dumps(predictions_list)
183 |     else:
184 |         raise ValueError(f"Unsupported accept type: {content_type}. Use 'application/json'.")
185 | 
186 | 
187 | if __name__ == '__main__':
188 |     args = parse_args()
189 |     model = train(args)
190 |     logger.info("Training completed successfully")


--------------------------------------------------------------------------------
/workshops/diy-agents-with-sagemaker-and-bedrock/99-use-cases/sagemaker-endpoint-as-tool/server.py:
--------------------------------------------------------------------------------
 1 | import boto3, json, os
 2 | import httpx
 3 | import numpy as np
 4 | from mcp.server.fastmcp import FastMCP
 5 | 
 6 | mcp = FastMCP("SageMaker App")
 7 | 
 8 | @mcp.tool()
 9 | async def generate_prediction_with_sagemaker(test_sample: list):
10 |     """
11 |         Use Amazon SageMaker AI to generate predictions.
12 |         Args:
13 |             test_sample: a list of lists containing the inputs to generate predictions from
14 |         Returns:
15 |             predictions: an array of predictions
16 |     """ 
17 |     print(os.environ)
18 |     endpoint_name = os.environ["SAGEMAKER_ENDPOINT_NAME"]
19 |     boto_session = boto3.session.Session(
20 |         aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"],
21 |         aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"],
22 |         aws_session_token=os.environ["AWS_SESSION_TOKEN"],
23 |         region_name=os.environ["AWS_REGION_NAME"]
24 |     )
25 |     sagemaker_runtime = boto_session.client("sagemaker-runtime")
26 |     response = sagemaker_runtime.invoke_endpoint(
27 |         EndpointName=endpoint_name,
28 |         Body=json.dumps(test_sample),
29 |         ContentType="application/json",
30 |         Accept="application/json"
31 |     )
32 |     predictions = json.loads(response['Body'].read().decode("utf-8"))
33 |     return np.array(predictions)
34 | 
35 | if __name__ == "__main__":
36 |     mcp.run(transport="stdio")


--------------------------------------------------------------------------------
/workshops/diy-agents-with-sagemaker-and-bedrock/99-use-cases/support-ticket-triage/langgraph-requirements.txt:
--------------------------------------------------------------------------------
 1 | boto3
 2 | botocore
 3 | sagemaker
 4 | langchain
 5 | langchain_aws
 6 | langchain_experimental
 7 | langchain-community
 8 | duckduckgo-search 
 9 | langgraph
10 | matplotlib


--------------------------------------------------------------------------------
/workshops/diy-agents-with-sagemaker-and-bedrock/99-use-cases/text2dsl-mcp/README.md:
--------------------------------------------------------------------------------
 1 | # No-SQL RAG & Text to DSL using LangGraph, OpenSearch Serverless and MCP
 2 | 
 3 | In this lab, you'll explore how to extend RAG capabilities to NoSQL databases. Using a LLM powered text-to-DSL conversion and a Model Context Protocol(MCP) server which provides a connctivity to a OpenSearch Serverless Collection, this lab shows natural language querying of JSON documents with complex querying conditions. This powerful combination allows for comprehensive enterprise search applications that can access semi-structured data without transforming the data to structured format or generating embeddings using additional LLM costs.
 4 | 
 5 | ## Learning Objectives
 6 | 
 7 | By the end of this lab, you will be able to:
 8 | 
 9 | - Set up a serverless collection in Amazon OpenSearch Service
10 | - Ingest a sample findings from [Amazon GuardDuty](https://docs.aws.amazon.com/guardduty/latest/ug/sample_findings.html) to an OpenSearch index
11 | - Create a simple MCP server which retrives documents from OpenSearch indices
12 | - Using LangChain MCP Adapters, connect your agentic application to MCP servers
13 | - Implement text-to-DSL capabilities to query OpenSearch indicies with natural language
14 | - Integrate the query results with foundation model responses
15 | - Build a comprehensive enterprise search application
16 | 
17 | ## Key Concepts
18 | 
19 | ### Text-to-DSL
20 | 
21 | Text-to-DSL refers to the process of translating natural language text into a Domain-Specific Language (DSL). This is commonly used in the context of search engines like Elasticsearch or OpenSearch where the DSL is used to construct complex search queries. Essentially, you input text that describes what you want to search for, and the system converts it into a DSL query that the search engine can understand and execute.
22 | 
23 | ### Model Context Protocol (MCP)
24 | 
25 | The Model Context Protocol (MCP) is an open protocol that enables seamless integration between LLM applications and external data sources and tools. Whether you're building an AI-powered IDE, enhancing a chat interface, or creating custom AI workflows, MCP provides a standardized way to connect LLMs with the context they need.
26 | 
27 | ### MCP Server
28 | An MCP Server is a lightweight program that exposes specific capabilities through the standardized Model Context Protocol. Host applications (such as chatbots, IDEs, and other AI tools) have MCP clients that maintain 1:1 connections with MCP servers. MCP servers can access local data sources and remote services to provide additional context that improves the generated outputs from the models.
29 | 
30 | ### LangChain MCP Adapters
31 | The library provides a lightweight wrapper that makes MCP tools compatible with LangChain and LangGraph.
32 | 
33 | 
34 | ## Lab Structure
35 | * **text2dsl-mcp.ipynb** <br/>
36 | The main notebook filethat guide you through implementing text-to-DSL capabilities
37 | 
38 | * **cfn-oss-collection.yaml** <br/>
39 | The CloudFormation template to deploy a Amazon OpenSearch Service Serverless Collection to be used in this lab.<br/>
40 | _You can skip this if you are participating an AWS Instructor-led workshop event because this stack is pre-deployed in the provided AWS account_
41 | 
42 | * **mcp_dsl_server.py** <br/>
43 | The Python script with MCP server implementation. The MCP server provides tools to get a schema of indices of OpenSearch collection, and to execute DSL queries go retrive data from the indicies.
44 | 
45 | * **utils.py** <br/>
46 | A collection of helper functions to use in this lab.
47 | 
48 | * **guardduty-index-schema.json** <br/>
49 | This file defines the schema of test dataset being used in the lab. The lab uses sample findings generated from Amazon GuardDuty. 
50 | 
51 | ## Dataset
52 | 
53 | In this lab, we will use [Amazon GuardDuty](https://aws.amazon.com/guardduty/) to generate JSON documents. Amazon GuardDuty uses AI and ML with integrated threat intelligence from AWS and leading third parties to help protect your AWS accounts, workloads, and data from threats.<br/>
54 | Amazon GuardDuty helps you generate sample findings to visualize and understand the various finding types that it can generate. When you generate sample findings, GuardDuty populates your current findings list with one sample for each supported finding type, including attack sequence finding types.
55 | 
56 | ## Getting Started
57 | 
58 | Before beginning this lab, you should have:
59 | - Basic understanding in [Query DSL](https://docs.opensearch.org/docs/latest/query-dsl/)
60 | - Familiarity with AWS OpenSearch Service
61 | 
62 | To start working with the notebooks:
63 | 
64 | 1. Navigate to the `workshops/diy-agents-with-sagemaker-and-bedrock/99-use-cases/text2dsl-mcp` folder in the cloned repository
65 | 2. Open `text2dsl-mcp.ipynb` and follow each notebook sequentially to implement text-to-DSL capabilities using MCP
66 | 


--------------------------------------------------------------------------------
/workshops/diy-agents-with-sagemaker-and-bedrock/99-use-cases/text2dsl-mcp/cfn-oss-collection.yaml:
--------------------------------------------------------------------------------
  1 | AWSTemplateFormatVersion: "2010-09-09"
  2 | Description: CloudFormation template for OpenSearch Serverless Collection
  3 | 
  4 | Parameters:
  5 |   CollectionName:
  6 |     Type: String
  7 |     Description: Name of the OpenSearch Serverless Collection
  8 |     Default: "agent-ws-collection"
  9 |   
 10 |   StandbyReplicas:
 11 |     Type: String
 12 |     Description: Whether to enable standby replicas for the collection
 13 |     Default: "DISABLED"
 14 |     AllowedValues:
 15 |       - "ENABLED"
 16 |       - "DISABLED"
 17 |   
 18 |   NetworkPolicyName:
 19 |     Type: String
 20 |     Description: Name of the network policy for the collection
 21 |     Default: "agent-ws-network-policy"
 22 |   
 23 |   EncryptionPolicyName:
 24 |     Type: String
 25 |     Description: Name of the encryption policy for the collection
 26 |     Default: "agent-ws-encryption-policy"
 27 |   
 28 |   DataAccessPolicyName:
 29 |     Type: String
 30 |     Description: Name of the data access policy for the collection
 31 |     Default: "agent-ws-data-access-policy"
 32 | 
 33 | Resources:
 34 |   # Network Policy for the Collection
 35 |   OpenSearchNetworkPolicy:
 36 |     Type: AWS::OpenSearchServerless::SecurityPolicy
 37 |     Properties:
 38 |       Name: !Ref NetworkPolicyName
 39 |       Type: "network"
 40 |       Description: "Network policy for Agentic AI Workshop OpenSearch Collection"
 41 |       Policy: !Sub |
 42 |         [{
 43 |           "Rules":[
 44 |             {
 45 |               "ResourceType":"collection",
 46 |               "Resource":["collection/${CollectionName}"]
 47 |             },
 48 |             {
 49 |               "ResourceType":"dashboard",
 50 |               "Resource":["collection/${CollectionName}"]
 51 |             }
 52 |           ],
 53 |           "AllowFromPublic":true
 54 |         }]
 55 | 
 56 |   # Encryption Policy for the Collection
 57 |   OpenSearchEncryptionPolicy:
 58 |     Type: AWS::OpenSearchServerless::SecurityPolicy
 59 |     Properties:
 60 |       Name: !Ref EncryptionPolicyName
 61 |       Type: "encryption"
 62 |       Description: "Encryption policy for Agentic AI Workshop OpenSearch Collection"
 63 |       Policy: !Sub |
 64 |         {
 65 |           "Rules":[
 66 |             {
 67 |               "ResourceType":"collection",
 68 |               "Resource":["collection/${CollectionName}"]
 69 |             }
 70 |           ],
 71 |           "AWSOwnedKey":true
 72 |         }
 73 | 
 74 |   # Data Access Policy for the Collection
 75 |   OpenSearchDataAccessPolicy:
 76 |     Type: AWS::OpenSearchServerless::AccessPolicy
 77 |     Properties:
 78 |       Name: !Ref DataAccessPolicyName
 79 |       Type: "data"
 80 |       Description: "Data access policy for Agentic AI Workshop OpenSearch Collection"
 81 |       Policy: !Sub |
 82 |         [
 83 |           {
 84 |             "Rules":[
 85 |               {
 86 |                 "ResourceType":"collection",
 87 |                 "Resource":["collection/${CollectionName}"],
 88 |                 "Permission":[
 89 |                   "aoss:CreateCollectionItems",
 90 |                   "aoss:DeleteCollectionItems",
 91 |                   "aoss:UpdateCollectionItems",
 92 |                   "aoss:DescribeCollectionItems"
 93 |                 ]
 94 |               },
 95 |               {
 96 |                 "ResourceType":"index",
 97 |                 "Resource":["index/${CollectionName}/*"],
 98 |                 "Permission":[
 99 |                   "aoss:CreateIndex",
100 |                   "aoss:DeleteIndex",
101 |                   "aoss:UpdateIndex",
102 |                   "aoss:DescribeIndex",
103 |                   "aoss:ReadDocument",
104 |                   "aoss:WriteDocument"
105 |                 ]
106 |               }
107 |             ],
108 |             "Principal":[
109 |               "arn:aws:iam::${AWS::AccountId}:root"
110 |             ]
111 |           }
112 |         ]
113 | 
114 |   # OpenSearch Serverless Collection
115 |   OpenSearchCollection:
116 |     Type: AWS::OpenSearchServerless::Collection
117 |     DependsOn:
118 |       - OpenSearchDataAccessPolicy
119 |       - OpenSearchEncryptionPolicy
120 |       - OpenSearchNetworkPolicy
121 |     Properties:
122 |       Name: !Ref CollectionName
123 |       Type: SEARCH
124 |       Description: "OpenSearch Serverless Collection for Agentic AI Workshop"
125 |       StandbyReplicas: !Ref StandbyReplicas
126 | 
127 | 
128 |   # Lambda function to wait for collection to be active
129 |   CollectionStatusCheckFunction:
130 |     Type: AWS::Lambda::Function
131 |     Properties:
132 |       Handler: index.handler
133 |       Role: !GetAtt CollectionStatusCheckRole.Arn
134 |       Runtime: python3.9
135 |       Timeout: 600
136 |       Code:
137 |         ZipFile: |
138 |           import boto3
139 |           import cfnresponse
140 |           import time
141 |           import traceback
142 |           
143 |           aoss = boto3.client('opensearchserverless')
144 |           
145 |           def handler(event, context):
146 |               print(f"Collection status check event: {event}")
147 |               response_data = {}
148 |               
149 |               try:
150 |                   request_type = event['RequestType']
151 |                   print(f"Request type: {request_type}")
152 |                   
153 |                   # Only check status on Create or Update
154 |                   if request_type in ['Create', 'Update']:
155 |                       collection_name = event['ResourceProperties']['CollectionName']
156 |                       wait_for_collection_active(collection_name)
157 |                       
158 |                       # Get collection endpoint
159 |                       collection_info = get_collection_info(collection_name)
160 |                       response_data['CollectionId'] = collection_info['Id']
161 |                       response_data['CollectionEndpoint'] = collection_info['CollectionEndpoint']
162 |                       response_data['DashboardEndpoint'] = collection_info['DashboardEndpoint']
163 |                   
164 |                   # Always send SUCCESS to CFN
165 |                   cfnresponse.send(event, context, cfnresponse.SUCCESS, response_data)
166 |               
167 |               except Exception as e:
168 |                   print(f"Error: {str(e)}")
169 |                   print(traceback.format_exc())
170 |                   cfnresponse.send(event, context, cfnresponse.FAILED, {"Error": str(e)})
171 |           
172 |           def wait_for_collection_active(collection_name):
173 |               print(f"Waiting for collection {collection_name} to become active")
174 |               max_wait_time = 600  # 10 minutes
175 |               start_time = time.time()
176 |               
177 |               while time.time() - start_time < max_wait_time:
178 |                   try:
179 |                       response = aoss.batch_get_collection(names=[collection_name])
180 |                       if not response['collectionDetails']:
181 |                           print(f"Collection {collection_name} not found, waiting...")
182 |                           time.sleep(15)
183 |                           continue
184 |                           
185 |                       status = response['collectionDetails'][0]['status']
186 |                       print(f"Collection status: {status}")
187 |                       
188 |                       if status == 'ACTIVE':
189 |                           print(f"Collection {collection_name} is now active")
190 |                           return True
191 |                       elif status in ['FAILED', 'DELETED']:
192 |                           raise Exception(f"Collection entered {status} state")
193 |                       
194 |                       # Wait before checking again
195 |                       time.sleep(15)
196 |                   except Exception as e:
197 |                       if 'ResourceNotFoundException' in str(e):
198 |                           print(f"Collection {collection_name} not found yet, waiting...")
199 |                           time.sleep(15)
200 |                       else:
201 |                           raise
202 |               
203 |               raise Exception(f"Timed out waiting for collection {collection_name} to become active")
204 |           
205 |           def get_collection_info(collection_name):
206 |               response = aoss.batch_get_collection(names=[collection_name])
207 |               if not response['collectionDetails']:
208 |                   raise Exception(f"Collection {collection_name} not found")
209 |                   
210 |               collection = response['collectionDetails'][0]
211 |               return {
212 |                   'Id': collection['id'],
213 |                   'CollectionEndpoint': collection['collectionEndpoint'],
214 |                   'DashboardEndpoint': collection['dashboardEndpoint']
215 |               }
216 | 
217 |   # IAM Role for the Lambda function
218 |   CollectionStatusCheckRole:
219 |     Type: AWS::IAM::Role
220 |     Properties:
221 |       AssumeRolePolicyDocument:
222 |         Version: "2012-10-17"
223 |         Statement:
224 |           - Effect: Allow
225 |             Principal:
226 |               Service: lambda.amazonaws.com
227 |             Action: sts:AssumeRole
228 |       Policies:
229 |         - PolicyName: OpenSearchServerlessAccess
230 |           PolicyDocument:
231 |             Version: "2012-10-17"
232 |             Statement:
233 |               - Effect: Allow
234 |                 Action:
235 |                   - aoss:BatchGetCollection
236 |                   - aoss:ListCollections
237 |                 Resource: "*"
238 |         - PolicyName: CloudWatchLogsAccess
239 |           PolicyDocument:
240 |             Version: "2012-10-17"
241 |             Statement:
242 |               - Effect: Allow
243 |                 Action:
244 |                   - logs:CreateLogGroup
245 |                   - logs:CreateLogStream
246 |                   - logs:PutLogEvents
247 |                 Resource: "arn:aws:logs:*:*:*"
248 | 
249 |   # Custom resource to wait for collection to be active
250 |   CollectionStatusCheck:
251 |     Type: Custom::CollectionStatusCheck
252 |     DependsOn: OpenSearchCollection
253 |     Properties:
254 |       ServiceToken: !GetAtt CollectionStatusCheckFunction.Arn
255 |       CollectionName: !Ref CollectionName
256 |       ServiceTimeout: '600'
257 | 
258 | Outputs:
259 |   CollectionId:
260 |     Description: "OpenSearch Serverless Collection ID"
261 |     Value: !GetAtt CollectionStatusCheck.CollectionId
262 |   
263 |   CollectionEndpoint:
264 |     Description: "OpenSearch Serverless Collection Endpoint"
265 |     Value: !GetAtt CollectionStatusCheck.CollectionEndpoint
266 |   
267 |   DashboardEndpoint:
268 |     Description: "OpenSearch Serverless Dashboard Endpoint"
269 |     Value: !GetAtt CollectionStatusCheck.DashboardEndpoint
270 |   
271 |   CollectionARN:
272 |     Description: "OpenSearch Serverless Collection ARN"
273 |     Value: !Sub "arn:aws:aoss:${AWS::Region}:${AWS::AccountId}:collection/${CollectionStatusCheck.CollectionId}"


--------------------------------------------------------------------------------
/workshops/diy-agents-with-sagemaker-and-bedrock/99-use-cases/text2dsl-mcp/mcp_dsl_server.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from mcp.server.fastmcp import Context, FastMCP
  3 | 
  4 | 
  5 | # Create a named server
  6 | COLLECTION_NAME = "agent-ws-collection"
  7 | 
  8 | # Specify dependencies for deployment and development
  9 | mcp = FastMCP("OpenSearch DSL Query App", dependencies=["pandas", "numpy"])
 10 | 
 11 | 
 12 | ##############################################################
 13 | # Helper functions for OSS
 14 | from opensearchpy import OpenSearch, RequestsHttpConnection, AWSV4SignerAuth
 15 | import boto3
 16 | import os
 17 | import requests
 18 | import json
 19 | from retry import retry
 20 | 
 21 | 
 22 | def get_opensearch_collection_endpoint(collection_name, region="us-west-2"):
 23 |     """
 24 |     Get the OpenSearch Serverless collection endpoint from a collection name
 25 |     
 26 |     Args:
 27 |         collection_name (str): The name of the OpenSearch Serverless collection
 28 |         region (str, optional): AWS region. If None, uses the default region.
 29 |         
 30 |     Returns:
 31 |         dict: Dictionary containing collection endpoints and ID
 32 |     """
 33 |     # Initialize the OpenSearch Serverless client
 34 |     aoss = boto3.client('opensearchserverless', region_name=region)
 35 |     service = 'aoss'
 36 |     session = boto3.Session(aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"],
 37 |                         aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"],
 38 |                         aws_session_token = os.environ["AWS_SESSION_TOKEN"],
 39 |                         region_name=region)
 40 |     credentials =session.get_credentials()
 41 |     auth = AWSV4SignerAuth(credentials, region, service)
 42 |     try:
 43 |         # Use batch_get_collection to get collection details by name
 44 |         response = aoss.batch_get_collection(names=[collection_name])
 45 |         
 46 |         # Check if collection was found
 47 |         if not response['collectionDetails']:
 48 |             raise ValueError(f"Collection '{collection_name}' not found")
 49 |         
 50 |         # Extract collection details
 51 |         collection = response['collectionDetails'][0]
 52 |         
 53 |         # Return the endpoints and ID
 54 |         return {
 55 |             'collection_id': collection['id'],
 56 |             'collection_endpoint': collection['collectionEndpoint'],
 57 |             'dashboard_endpoint': collection['dashboardEndpoint'],
 58 |             'collection_arn': collection['arn']
 59 |         }
 60 |     
 61 |     except Exception as e:
 62 |         print(f"Error getting collection endpoint: {str(e)}")
 63 |         raise
 64 | 
 65 | 
 66 | def query_opensearch_with_dsl(collection_endpoint, dsl_json, index_name = 'guardduty-index', region="us-west-2"):
 67 |     """
 68 |     Query an OpenSearch index using DSL with the OpenSearch Python client
 69 |     
 70 |     Args:
 71 |         collection_endpoint (str): The OpenSearch collection endpoint (without https://)
 72 |         index_name (str): Name of the index to query
 73 |         dsl_json (dict): The OpenSearch DSL query
 74 |         region (str, optional): AWS region. If None, uses the default region.
 75 |         
 76 |     Returns:
 77 |         dict: Query results
 78 |     """
 79 |     # Get AWS credentials
 80 |     session = boto3.Session(aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"],
 81 |                             aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"],
 82 |                             aws_session_token = os.environ["AWS_SESSION_TOKEN"],
 83 |                             region_name=region)
 84 |     credentials = session.get_credentials()
 85 |     
 86 |     # Create the auth for OpenSearch
 87 |     auth = AWSV4SignerAuth(credentials, session.region_name, 'aoss')  # Use 'aoss' for OpenSearch Serverless
 88 |     
 89 |     # Create the OpenSearch client
 90 |     client = OpenSearch(
 91 |         hosts=[{'host': collection_endpoint, 'port': 443}],
 92 |         http_auth=auth,
 93 |         use_ssl=True,
 94 |         verify_certs=True,
 95 |         connection_class=RequestsHttpConnection,
 96 |         pool_maxsize=20
 97 |     )
 98 |     
 99 |     try:
100 |         # Execute the search query
101 |         response = client.search(
102 |             body=dsl_json,
103 |             index=index_name
104 |         )
105 |         
106 |         # Print summary of results
107 |         hits = response['hits']['hits']
108 |         total = response['hits']['total']['value'] if isinstance(response['hits']['total'], dict) else response['hits']['total']
109 |         
110 |         print(f"Found {total} documents in {index_name}")
111 |         print(f"Showing top {len(hits)} results:")
112 |     
113 |         return hits
114 |     except Exception as e:
115 |         print(f"Error querying OpenSearch: {str(e)}")
116 |         raise
117 | 
118 | 
119 | 
120 | ##################################
121 | # TOOLS
122 | @mcp.tool()
123 | def query_dsl(dsl_json: dict):
124 |     """Query input DSL to OpenSearch Collection. """
125 |     collection_endpoint = get_opensearch_collection_endpoint(COLLECTION_NAME)["collection_endpoint"].split("https://")[1]
126 |     return query_opensearch_with_dsl(collection_endpoint, dsl_json)
127 | 
128 | 
129 | @mcp.tool()
130 | def get_index_schema(index_name: str) -> dict:
131 |     """Return JSON schema of an index in the OpenSearch Collection """
132 |     with open(index_name+"-schema.json", "r") as f:
133 |         schema = json.load(f)
134 |     return schema
135 | 
136 | 
137 | @mcp.tool()
138 | def add_two_numbers(a: int, b: int) -> str:
139 |     """Add two numbers"""
140 |     return f"{a} + {b} = {a+b} : This is to show your MCP tool has been invoked successfully."
141 | 
142 | 
143 | if __name__ == "__main__":
144 |     mcp.run()
145 | 
146 | 
147 | 
148 | 


--------------------------------------------------------------------------------
/workshops/diy-agents-with-sagemaker-and-bedrock/99-use-cases/text2dsl-mcp/requirements.txt:
--------------------------------------------------------------------------------
 1 | mcp==1.9.2
 2 | mcp[cli]
 3 | retry
 4 | langchain_mcp_adapters 
 5 | langgraph
 6 | opensearch-py
 7 | strands-agents
 8 | strands-agents-tools
 9 | strands-agents-builder
10 | nest_asyncio
11 | 


--------------------------------------------------------------------------------
/workshops/diy-agents-with-sagemaker-and-bedrock/99-use-cases/text2sql/README.md:
--------------------------------------------------------------------------------
 1 | # Database RAG & Text to SQL using LangChain & SQL agent
 2 | 
 3 | In this lab, you'll explore how to extend RAG capabilities beyond unstructured document retrieval to include structured data sources. You'll learn how to implement text-to-SQL conversion, enabling natural language querying of database information alongside traditional document retrieval. This powerful combination allows for comprehensive enterprise search applications that can access both unstructured documents and structured database records.
 4 | 
 5 | ## Learning Objectives
 6 | 
 7 | By the end of this lab, you will be able to:
 8 | 
 9 | - Set up an Athena database and AWS Glue crawler for structured data access
10 | - Implement text-to-SQL capabilities to query databases with natural language
11 | - Create a unified querying experience across documents and databases
12 | - Integrate structured data results with foundation model responses
13 | - Build a comprehensive enterprise search application
14 | 
15 | ## Key Concepts
16 | 
17 | ### Text-to-SQL
18 | 
19 | Text-to-SQL converts natural language questions into structured SQL queries that can retrieve information from relational databases. This capability bridges the gap between how humans naturally ask questions and how databases store and access data.
20 | 
21 | ### Structured Data Integration
22 | 
23 | Combining structured database queries with unstructured document retrieval creates a comprehensive knowledge system that can leverage all available data sources in your organization.
24 | 
25 | ### Enterprise Search
26 | 
27 | Enterprise search applications need to access multiple data sources with different formats. RAG techniques can unify these diverse sources under a single natural language interface.
28 | 
29 | ## Lab Structure
30 | 
31 | This lab consists of 2 notebooks that guide you through implementing text-to-SQL capabilities:
32 | 
33 | ## text2sql 
34 | 
35 | 1-create-db-tables.ipynb: Configure Athena database and AWS Glue crawler  
36 | 2-text2sql-langchain: Use LangChain and SQL agent for text-to-SQL conversion  
37 | 
38 | ## Dataset
39 | 
40 | In this lab, you'll work with:
41 | - A structured retail transaction dataset stored in Amazon S3
42 | - The dataset will be cataloged using AWS Glue and made queryable through Amazon Athena
43 | - You'll answer business questions that require accessing this structured data
44 | 
45 | ## Getting Started
46 | 
47 | Before beginning this lab, you should have:
48 | - Basic understanding of SQL and relational databases
49 | - Familiarity with AWS data analytics services
50 | 
51 | To start working with the notebooks:
52 | 
53 | 1. Navigate to the `workshops/diy-agents-with-sagemaker-and-bedrock/99-use-cases/text2sql` folder in the cloned repository
54 | 2. Open `1-create-db-tables` to begin setting up the Athena database and Glue Data Catalog
55 | 3. Follow each notebook sequentially to implement text-to-SQL capabilities
56 | 
57 | 
58 | ## Next Steps
59 | 
60 | After completing all three labs, you'll have a comprehensive understanding of advanced RAG techniques that combine unstructured document retrieval, metadata filtering, safety guardrails, reranking, and structured data access. These capabilities form the foundation for building sophisticated enterprise AI applications.
61 | 
62 | Happy learning!
63 | 


--------------------------------------------------------------------------------
/workshops/diy-agents-with-sagemaker-and-bedrock/99-use-cases/text2sql/requirements.txt:
--------------------------------------------------------------------------------
1 | boto3
2 | sqlalchemy
3 | langchain
4 | langchain-community
5 | langchain-aws
6 | PyAthena
7 | s3fs
8 | openpyxl
9 | 


--------------------------------------------------------------------------------
/workshops/diy-agents-with-sagemaker-and-bedrock/README.md:
--------------------------------------------------------------------------------
 1 | # Building AI Agents with Amazon Bedrock and Amazon SageMaker AI
 2 | 
 3 | > This content is available in [Workshop Studio](https://catalog.workshops.aws/building-effective-ai-agents-on-aws/). The below represents a synopsis of the content you will find by following the provided link.
 4 | 
 5 | Welcome to the "Building AI Agents with Amazon Bedrock and Amazon SageMaker AI" workshop! This publicly available, hands-on experience is designed for builders who are ready to harness the power of Large Language Models (LLMs) and create autonomous AI agents on AWS.
 6 | 
 7 | In this workshop, you'll dive into the world of AI agents, learning how to leverage AWS services to build AI systems that can act independently, make decisions, and complete complex tasks with minimal human intervention.
 8 | 
 9 | By the end of this workshop, you'll be able to:
10 | 
11 | - Understand and implement foundational LLM patterns with tools and retrieval
12 | - Build effective agentic workflows using proven patterns
13 | - Create autonomous agent systems for complex tasks
14 | - Choose and implement appropriate frameworks for your use case
15 | - Monitor and optimize your AI agent systems
16 | - Apply these patterns to real-world industry use cases
17 | 
18 | ## Workshop Content
19 | 
20 | 1. Build your autonomous agents and agentic workflows from scratch
21 | 2. Learn one or more open-source frameworks for Agentic AI (crew.AI, langgraph, smolagents, etc)
22 | 3. Deep dive on advanced topics like observability/tracing, guardrails, evaluation
23 | 4. Productionize agentic AI on AWS
24 | 5. Advanced use cases (MCP client/server on AWS, Natural Language 2 SQL, etc)
25 | 
26 | ## How to run the workshop
27 | 
28 | This workshop follows a hands-on, self-paced format. Each module contains Jupyter notebooks that you'll run in your own JupyterLab environment. The notebooks include:
29 | 
30 | - Step-by-step instructions and explanations
31 | - Code samples that you can run and modify
32 | - Exercises to reinforce your learning
33 | - Links to additional resources
34 | 


--------------------------------------------------------------------------------
/workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/README.md:
--------------------------------------------------------------------------------
1 | # workshop-notebooks
2 | 


--------------------------------------------------------------------------------
/workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
110 | .pdm.toml
111 | .pdm-python
112 | .pdm-build/
113 | 
114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
115 | __pypackages__/
116 | 
117 | # Celery stuff
118 | celerybeat-schedule
119 | celerybeat.pid
120 | 
121 | # SageMath parsed files
122 | *.sage.py
123 | 
124 | # Environments
125 | .env
126 | .venv
127 | env/
128 | venv/
129 | ENV/
130 | env.bak/
131 | venv.bak/
132 | 
133 | # Spyder project settings
134 | .spyderproject
135 | .spyproject
136 | 
137 | # Rope project settings
138 | .ropeproject
139 | 
140 | # mkdocs documentation
141 | /site
142 | 
143 | # mypy
144 | .mypy_cache/
145 | .dmypy.json
146 | dmypy.json
147 | 
148 | # Pyre type checker
149 | .pyre/
150 | 
151 | # pytype static type analyzer
152 | .pytype/
153 | 
154 | # Cython debug symbols
155 | cython_debug/
156 | 
157 | # PyCharm
158 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
159 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
160 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
161 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
162 | #.idea/
163 | 
164 | 
165 | #mac metaata
166 | .DS_Store
167 | 


--------------------------------------------------------------------------------
/workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/README.md:
--------------------------------------------------------------------------------
 1 | # Fiddler <> SageMaker Demo 2024
 2 | 
 3 | For workshop admins, preconfigure a collection of Fiddler applications using the `AdminFiddlerSagemakerDemo.ipynb` notebook.
 4 | 
 5 | For workshop users, get your SageMaker App User Profile from your workshop administrator then follow along the `FiddlerSagemakerDemo.ipynb` notebook.
 6 | 
 7 | ## Running the Notebook Locally
 8 | 
 9 | Run the following steps from a machine with access to a web browser.
10 | 
11 | 1.  Clone this repository and `cd` into it.
12 | 
13 |     ```shell
14 |     git clone git@github.com:fiddler-labs/fiddler-demo-dec-2024.git
15 |     cd fiddler-demo-dec-2024
16 |     ```
17 | 
18 | 1.  Install the version of Python that will be used in the workshop.
19 | 
20 |     MacOS:
21 | 
22 |     ```shell
23 |     brew install python@3.12
24 |     ```
25 | 
26 |     Linux:
27 | 
28 |     ```shell
29 |     sudo apt install python3.12
30 |     ```
31 | 
32 | 1.  Create a Python virtual environment and activate it.
33 | 
34 |     ```shell
35 |     eval $(which python3.12) -m venv .venv
36 |     source .venv/bin/activate
37 |     ```
38 | 
39 | 1.  Install JupyterLab and run it to open your browser to Jupyter Notebook.
40 | 
41 |     ```shell
42 |     python -m pip install jupyterlab
43 |     jupyter lab
44 |     ```
45 | 
46 | 1.  Select the `FiddlerSagemakerDemo.ipynb` Notebook from the sidebar and follow along!
47 | 


--------------------------------------------------------------------------------
/workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/assets/charts_llm.yaml:
--------------------------------------------------------------------------------
  1 | charts:
  2 |   - data_source:
  3 |       filters:
  4 |         bin_size: Day
  5 |         time_label: 30d
  6 |         time_zone: America/Los_Angeles
  7 |       query_type: EMBEDDING
  8 |       queries:
  9 |         - column_name: PromptTextEmbedding
 10 |           plot: 3D
 11 |           metric_type: umap
 12 |           sample_size: 100
 13 |           query_key: dd4566c3-2ac8-49a8-b2b5-8ffb543a61bb
 14 |           retrieve_columns: ["user_input", "chatbot_response", "fdl_topics__chatbot_response__topic", "fdl_enrichment_qa_sentiment__chatbot_response__sentiment", "fdl_enrichment_qa_sentiment__user_input__sentiment", "session_id","timestamp","feedback","prompt_tokens","completion_tokens","total_tokens","srch_id"]
 15 |           minimum_distance: 0.5
 16 |           number_of_neighbors: 7
 17 |           model_name: trip_assistant_chatbot
 18 |     query_type: EMBEDDING
 19 |     description: UMAP Projection for User Input Embeddings
 20 |     options:
 21 |       color_by: feedback
 22 |     title: User Input UMAP    
 23 |   - data_source:
 24 |       filters:
 25 |         bin_size: Day
 26 |         time_label: 30d
 27 |         time_zone: America/Los_Angeles
 28 |       query_type: EMBEDDING
 29 |       queries:
 30 |         - column_name: ResponseTextEmbedding
 31 |           plot: 3D
 32 |           metric_type: umap
 33 |           sample_size: 100
 34 |           query_key: dd4566c3-2ac8-49a8-b2b5-8ffb543a61bb
 35 |           retrieve_columns: ["user_input", "chatbot_response", "fdl_topics__chatbot_response__topic", "fdl_enrichment_qa_sentiment__chatbot_response__sentiment", "fdl_enrichment_qa_sentiment__user_input__sentiment", "session_id","timestamp","feedback","prompt_tokens","completion_tokens","total_tokens","srch_id"]
 36 |           minimum_distance: 0.5
 37 |           number_of_neighbors: 7
 38 |           model_name: trip_assistant_chatbot
 39 |           version: v1
 40 |     query_type: EMBEDDING
 41 |     description: UMAP Projection Chatbot Reponse Embeddings
 42 |     options:
 43 |       color_by: feedback
 44 |     title: Chatbot Response UMAP
 45 |   - data_source:
 46 |       filters:
 47 |         bin_size: Day
 48 |         time_label: 7d
 49 |         time_zone: America/Los_Angeles
 50 |       query_type: MONITORING
 51 |       queries:
 52 |         - columns: []
 53 |           metric: Total Cost
 54 |           metric_type: custom
 55 |           query_key: dd4466c3-2ac8-49a8-b8b5-8ffa543a61b5
 56 |           viz_type: line
 57 |           model_name: trip_assistant_chatbot
 58 |         - columns: ['prompt_tokens', 'total_tokens', 'completion_tokens']
 59 |           metric: "sum"
 60 |           metric_type: "statistic"
 61 |           query_key: dd4466c3-2ac8-49a8-b9b5-8ffa543a61b6
 62 |           viz_type: bar
 63 |           model_name: trip_assistant_chatbot
 64 |     description: Cost Tracker for Chatbot 
 65 |     options:
 66 |       queryNames:
 67 |         dd4466c3-2ac8-49a8-b8b5-8ffa543a61b5: Total Cost in USD
 68 |         dd4466c3-2ac8-49a8-b9b5-8ffa543a61b6: Number of Tokens Used
 69 |       customYAxis:
 70 |         - query_keys: ['dd4466c3-2ac8-49a8-b8b5-8ffa543a61b5']
 71 |           scale: value  
 72 |         - query_keys: ['dd4466c3-2ac8-49a8-b9b5-8ffa543a61b6']
 73 |           scale: value   
 74 |     query_type: MONITORING
 75 |     title: Total Cost Tracker
 76 |   - data_source:
 77 |       filters:
 78 |         bin_size: Day
 79 |         time_label: 7d
 80 |         time_zone: America/Los_Angeles
 81 |       query_type: MONITORING
 82 |       queries:
 83 |         - columns: ["fdl_enrichment_qa_sentiment__user_input__sentiment_probability"]
 84 |           metric: average
 85 |           metric_type: statistic
 86 |           query_key: 334466c3-2ac8-49a8-b8b5-8ffa543a6112
 87 |           segment: "No Click"
 88 |           viz_type: line
 89 |           model_name: trip_assistant_chatbot
 90 |         - columns: ["fdl_enrichment_qa_sentiment__user_input__sentiment_probability"]
 91 |           metric: average
 92 |           metric_type: statistic
 93 |           query_key: 224466c3-2ac8-49a8-b8b5-8ffa543a6112
 94 |           segment: "Booked"
 95 |           viz_type: line
 96 |           model_name: trip_assistant_chatbot
 97 |         - columns: ["fdl_enrichment_qa_sentiment__user_input__sentiment_probability"]
 98 |           metric: average
 99 |           metric_type: statistic
100 |           query_key: 114466c3-2ac8-49a8-b8b5-8ffa543a6112
101 |           segment: "Click"
102 |           viz_type: line
103 |           model_name: trip_assistant_chatbot          
104 |     description: Daily user sentiment segmented by outcome  
105 |     options:
106 |       queryNames:
107 |         334466c3-2ac8-49a8-b8b5-8ffa543a6112: No Click
108 |         224466c3-2ac8-49a8-b8b5-8ffa543a6112: Booked
109 |         114466c3-2ac8-49a8-b8b5-8ffa543a6112: Click   
110 |     query_type: MONITORING
111 |     title: User Sentiment Tracker              
112 | 


--------------------------------------------------------------------------------
/workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/assets/charts_ml.yaml:
--------------------------------------------------------------------------------
  1 | charts:
  2 |   - data_source:
  3 |       filters:
  4 |         bin_size: Day
  5 |         time_label: 7d
  6 |         time_zone: America/Los_Angeles
  7 |       query_type: MONITORING
  8 |       queries:
  9 |         - columns: ['visitor_location_country_id']
 10 |           categories: ["Brazil", "Canada", "China", "France", "India", "Japan", "Mexico", "South Africa", "UK", "USA"]
 11 |           metric: frequency
 12 |           metric_type: statistic
 13 |           query_key: c14666c3-2ac8-49a8-b2b5-8ffa543a61b6
 14 |           viz_type: bar
 15 |           model_name: search_ranking_trips
 16 |     description: Daily Visitor Country of Origin
 17 |     options:
 18 |       queryNames: 
 19 |         c14666c3-2ac8-49a8-b2b5-8ffa543a61b6: "Country"
 20 |     query_type: MONITORING
 21 |     title: Visitor Geographical Distribution
 22 |   - data_source:
 23 |       filters:
 24 |         bin_size: Day
 25 |         time_label: 7d
 26 |         time_zone: America/Los_Angeles
 27 |       query_type: MONITORING
 28 |       queries:
 29 |         - columns: ['destination_country_id']
 30 |           categories: ["USA", "UK", "Mexico", "Japan", "Canada"]
 31 |           metric: frequency
 32 |           metric_type: statistic
 33 |           query_key: c14666c3-2ac8-49a8-b2b5-8ffa543a61b8
 34 |           viz_type: bar
 35 |           model_name: search_ranking_trips
 36 |     description: Daily Volume of Request to Destinations
 37 |     options:
 38 |       queryNames: 
 39 |         c14666c3-2ac8-49a8-b2b5-8ffa543a61b8: "Country"
 40 |     query_type: MONITORING
 41 |     title: Destination Geographical Distribution 
 42 |   - data_source:
 43 |       filters:
 44 |         bin_size: Day
 45 |         time_label: 7d
 46 |         time_zone: America/Los_Angeles
 47 |       query_type: MONITORING
 48 |       queries:
 49 |         - columns: ["price_usd"]
 50 |           metric: "average"
 51 |           metric_type: "statistic"
 52 |           query_key: d4f6dffe-4aea-436e-aa48-01f72fc5c889
 53 |           segment: "No Click on Promo - USA"
 54 |           viz_type: "line"
 55 |           model_name: search_ranking_trips
 56 |         - columns: ["price_usd"]
 57 |           metric: "average"
 58 |           metric_type: "statistic"
 59 |           query_key: 2792ef36-4f71-495e-9b4f-c390c63b2f45
 60 |           segment: "Clicked on Promo - USA"
 61 |           viz_type: "line"
 62 |           model_name: search_ranking_trips 
 63 |     description: Tracking effects on price changed on user interaction with listing
 64 |     options: 
 65 |       queryNames: 
 66 |         d4f6dffe-4aea-436e-aa48-01f72fc5c889: "Promo Not Clicked"
 67 |         2792ef36-4f71-495e-9b4f-c390c63b2f45: "Clicked Promo"
 68 |     query_type: MONITORING
 69 |     title: Price effect on Clicks
 70 |   - data_source:
 71 |       filters:
 72 |         bin_size: Day
 73 |         time_label: 30d
 74 |         time_zone: America/Los_Angeles
 75 |       query_type: MONITORING
 76 |       queries:
 77 |         - columns: []
 78 |           metric: map
 79 |           metric_type: performance
 80 |           query_key: c14666c3-2ac8-49a8-b2b5-8ffa543a61b1
 81 |           segment: "Traveling to USA"
 82 |           viz_type: line
 83 |           model_name: search_ranking_trips
 84 |         - columns: []
 85 |           metric: map
 86 |           metric_type: performance
 87 |           query_key: c14666c3-2ac8-49a8-b2b5-8ffa543a61b2
 88 |           segment: "Traveling to UK"
 89 |           viz_type: line
 90 |           model_name: search_ranking_trips       
 91 |         - columns: []
 92 |           metric: map
 93 |           metric_type: performance
 94 |           query_key: c14666c3-2ac8-49a8-b2b5-8ffa543a61b3
 95 |           segment: "Traveling to Canada"
 96 |           viz_type: line
 97 |           model_name: search_ranking_trips
 98 |         - columns: []
 99 |           metric: map
100 |           metric_type: performance
101 |           query_key: c14666c3-2ac8-49a8-b2b5-8ffa543a61b4
102 |           segment: "Traveling to Japan"
103 |           viz_type: line
104 |           model_name: search_ranking_trips
105 |         - columns: []
106 |           metric: map
107 |           metric_type: performance
108 |           query_key: c14666c3-2ac8-49a8-b2b5-8ffa543a61b5
109 |           segment: "Traveling to Mexico"
110 |           viz_type: line
111 |           model_name: search_ranking_trips          
112 |     description: Daily Model Performance Segmented by Destination 
113 |     options:
114 |       queryNames: 
115 |         c14666c3-2ac8-49a8-b2b5-8ffa543a61b1: "USA"
116 |         c14666c3-2ac8-49a8-b2b5-8ffa543a61b2: "UK"
117 |         c14666c3-2ac8-49a8-b2b5-8ffa543a61b3: "Canada"
118 |         c14666c3-2ac8-49a8-b2b5-8ffa543a61b4: "Japan"
119 |         c14666c3-2ac8-49a8-b2b5-8ffa543a61b5: "Mexico"
120 |     query_type: MONITORING
121 |     title: Model Performance Segmented by Destination  
122 | 


--------------------------------------------------------------------------------
/workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/assets/llm_events.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/assets/llm_events.parquet


--------------------------------------------------------------------------------
/workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/client/fiddler_client-3.7.0.dev4-py3-none-any.whl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/client/fiddler_client-3.7.0.dev4-py3-none-any.whl


--------------------------------------------------------------------------------
/workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/client/sagemaker-2.227.1.dev0-py3-none-any.whl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/client/sagemaker-2.227.1.dev0-py3-none-any.whl


--------------------------------------------------------------------------------
/workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/imgs/create_dashboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/imgs/create_dashboard.png


--------------------------------------------------------------------------------
/workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/imgs/credentials_tab.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/imgs/credentials_tab.png


--------------------------------------------------------------------------------
/workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/imgs/login_view.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/imgs/login_view.png


--------------------------------------------------------------------------------
/workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/imgs/rca_drift.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/imgs/rca_drift.png


--------------------------------------------------------------------------------
/workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/imgs/rca_events.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/imgs/rca_events.png


--------------------------------------------------------------------------------
/workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/imgs/rca_init.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/imgs/rca_init.png


--------------------------------------------------------------------------------
/workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/imgs/root_cause_analysis.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/imgs/root_cause_analysis.png


--------------------------------------------------------------------------------
/workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/imgs/settings_view.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/imgs/settings_view.png


--------------------------------------------------------------------------------
/workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/imgs/view_dashboard.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/imgs/view_dashboard.gif


--------------------------------------------------------------------------------
/workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/modules/__init__.py


--------------------------------------------------------------------------------
/workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/modules/chart.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import yaml
 3 | 
 4 | import fiddler as fdl
 5 | from fiddler.libs.http_client import RequestClient
 6 | 
 7 | logger = logging.getLogger(__name__)
 8 | 
 9 | 
10 | def add_chart(project: fdl.Project, model: fdl.Model, unique_id: str, client: RequestClient, chart: dict):
11 |     charts_url = '/v3/charts'
12 |     title = f'[{unique_id}] {chart.get("title")}'
13 |     chart['title'] = title
14 | 
15 |     for index, query in enumerate(chart['data_source']['queries']):
16 |         version = query.get('version', 'v1')
17 |         query.update(
18 |             {
19 |                 'model': {'id': model.id, 'name': model.name},
20 |                 'model_name': model.name,
21 |                 'version': version,
22 |             }
23 |         )
24 | 
25 |         baseline_name = query.get('baseline_name')
26 |         if baseline_name:
27 |             baseline = fdl.Baseline.from_name(name=baseline_name, model_id=model.id)
28 |             baseline_id = baseline.id
29 |             query['baseline_id'] = baseline_id
30 |             del query['baseline_name']
31 | 
32 |         if query.get('metric_type') == 'custom':
33 |             custom_metrics = fdl.CustomMetric.from_name(
34 |                 name=query.get('metric'), model_id=model.id
35 |             )
36 |             query['metric'] = custom_metrics.id
37 | 
38 |         segment = query.get('segment')
39 |         if segment:
40 |             segment = fdl.Segment.from_name(name=segment, model_id=model.id)
41 |             query['segment'] = {}
42 |             query['segment']['id'] = segment.id
43 | 
44 |         chart['data_source']['queries'][index] = query
45 |     chart['project_id'] = project.id
46 |     client.post(url=charts_url, data=chart)
47 | 
48 | 
49 | def add_charts(
50 |     project: fdl.Project,
51 |     model: fdl.Model,
52 |     unique_id: str,
53 |     filename: str,
54 |     fiddler_url: str,
55 |     token: str,
56 | ) -> list:
57 |     charts = None
58 |     with open(filename, 'r') as stream:
59 |         try:
60 |             charts = yaml.safe_load(stream)
61 |         except yaml.YAMLError as exc:
62 |             print(exc)
63 | 
64 |     errors = []
65 |     if charts and len(charts) <= 0:
66 |         print("no charts found")
67 |         return []
68 |     
69 |     client = RequestClient(
70 |         fiddler_url,
71 |         headers={
72 |             'Content-Type': 'application/json',
73 |             'Authorization': f'Bearer {token}',
74 |         },
75 |     )
76 | 
77 |     for chart in charts.get('charts'):
78 |         try:
79 |             add_chart(project, model, unique_id, client, chart)
80 |         except Exception as exc:
81 |             message = f'Exception {str(exc)} for adding charts'
82 |             logger.error(message)
83 |             errors.append(
84 |                 {
85 |                     'chart': 'chart',
86 |                     'status': 'FAILED',
87 |                     'message': message,
88 |                 }
89 |             )
90 |             continue
91 | 
92 |     return errors
93 | 


--------------------------------------------------------------------------------
/workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/modules/config.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | def append_unique_name(prefix: str, suffix: str) -> str:
 4 |     return str((prefix + re.sub(r'[^0-9a-z]+', '_', suffix.lower())).strip()[:30])
 5 | 
 6 | PATH_TO_SAMPLE_RANKING_CSV = 'assets/search_ranking_sample.csv'
 7 | PATH_TO_EVENTS_RANKING_CSV = 'assets/search_ranking_prod.csv'
 8 | PATH_TO_SAMPLE_CHATBOT_CSV = 'assets/llm_events.parquet'
 9 | 
10 | PATH_TO_LLM_CHARTS = 'assets/charts_llm.yaml'
11 | PATH_TO_ML_CHARTS = 'assets/charts_ml.yaml'
12 | 
13 | PROJECT_NAME_PREFIX = 'ai_travel_agent_'
14 | 
15 | LLM_MODEL_NAME = 'assistant_chatbot'
16 | RANKING_MODEL_NAME = 'search_ranking'
17 | 


--------------------------------------------------------------------------------
/workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/modules/llm_onboard.py:
--------------------------------------------------------------------------------
  1 | import fiddler as fdl
  2 | import pandas as pd
  3 | import numpy as np
  4 | import modules.config as cfg
  5 | import time
  6 | 
  7 | 
  8 | def create_and_publish_llm(project, model_name: str) -> list[fdl.Model, str]:
  9 |     sample_data_df = pd.read_parquet(cfg.PATH_TO_SAMPLE_CHATBOT_CSV)
 10 | 
 11 |     sample_data_df['Enrichment Prompt Embedding'] = sample_data_df[
 12 |         'Enrichment Prompt Embedding'
 13 |     ].apply(lambda x: x.tolist())
 14 |     sample_data_df['Enrichment Response Embedding'] = sample_data_df[
 15 |         'Enrichment Response Embedding'
 16 |     ].apply(lambda x: x.tolist())
 17 | 
 18 |     fiddler_backend_enrichments = [
 19 |         # prompt enrichment
 20 |         fdl.TextEmbedding(
 21 |             name='PromptTextEmbedding',
 22 |             source_column='user_input',
 23 |             column='Enrichment Prompt Embedding',
 24 |             n_tags=5,
 25 |         ),
 26 |         # response enrichment
 27 |         fdl.TextEmbedding(
 28 |             name='ResponseTextEmbedding',
 29 |             source_column='chatbot_response',
 30 |             column='Enrichment Response Embedding',
 31 |             n_tags=5,
 32 |         ),
 33 |     ]
 34 | 
 35 |     model_spec = fdl.ModelSpec(
 36 |         inputs=['user_input', 'chatbot_response'],
 37 |         metadata=list(
 38 |             sample_data_df.drop(['user_input', 'chatbot_response'], axis=1).columns
 39 |         ),
 40 |         custom_features=fiddler_backend_enrichments,
 41 |     )
 42 | 
 43 |     model_task = fdl.ModelTask.LLM
 44 | 
 45 |     timestamp_column = 'timestamp'
 46 | 
 47 |     llm_application = None
 48 |     # Create model
 49 |     try:
 50 |         llm_application = fdl.Model.from_data(
 51 |             source=sample_data_df,
 52 |             name=model_name,
 53 |             project_id=project.id,
 54 |             spec=model_spec,
 55 |             task=model_task,
 56 |             event_ts_col=timestamp_column,
 57 |             max_cardinality=3,
 58 |         )
 59 |         llm_application.create()
 60 |     except fdl.Conflict:
 61 |         llm_application = fdl.Model.from_name(
 62 |             name=model_name,
 63 |             project_id=project.id,
 64 |         )
 65 | 
 66 |     print(
 67 |         f'LLM application registered with id = {llm_application.id} and name = {llm_application.name}'
 68 |     )
 69 | 
 70 |     segment_definitions = [
 71 |         ("Click", "User clicked", "result=='click'"),
 72 |         ("No Click", "User did not click", "result=='no_click'"),
 73 |         ("Booked", "User Booked", "result=='booked'"),
 74 |         ("Liked Answers", "User Liked Answers", "feedback=='like'"),
 75 |         ("Disliked Answers", "User Disliked Answers", "feedback=='dislike'")
 76 |     ]
 77 | 
 78 |     for name, description, definition in segment_definitions:
 79 |         try:
 80 |             fdl.Segment(
 81 |                 name=name,
 82 |                 model_id=llm_application.id,
 83 |                 description=description,
 84 |                 definition=definition,
 85 |             ).create()
 86 |         except fdl.Conflict:
 87 |             print(f"Segment '{name}' already exists.")
 88 | 
 89 |     custom_metrics = [
 90 |         ("Total Cost", "Cost in USD", "sum((prompt_tokens*0.01)+(completion_tokens*0.03))"),
 91 |         ("Prompt Token Cost", "Cost in USD", "sum((prompt_tokens*0.01))"),
 92 |         ("Response Token Cost", "Cost in USD", "sum((completion_tokens*0.03))"),
 93 |     ]
 94 | 
 95 |     for name, description, definition in custom_metrics:
 96 |         try:
 97 |             fdl.CustomMetric(
 98 |                 name=name,
 99 |                 model_id=llm_application.id,
100 |                 description=description,
101 |                 definition=definition,
102 |             ).create()
103 |         except fdl.Conflict:
104 |             print(f"Custom Metric '{name}' already exists.")
105 | 
106 |     llm_events_df = sample_data_df
107 |     # Timeshifting the timestamp column in the events file so the events are as recent as today
108 |     llm_events_df['timestamp'] = np.linspace(
109 |         int(time.time()) - (5 * 24 * 60 * 60), int(time.time()), num=llm_events_df.shape[0]
110 |     )
111 | 
112 |     print('Printing sample dataset...')
113 |     print(llm_events_df.head(10).to_markdown())
114 |     return llm_application, llm_application.publish(llm_events_df).id
115 | 


--------------------------------------------------------------------------------
/workshops/fine-tuning-with-sagemakerai-and-bedrock/archive/fiddler/modules/ml_onboard.py:
--------------------------------------------------------------------------------
 1 | import fiddler as fdl
 2 | from typing import List
 3 | 
 4 | 
 5 | def create_country_segments(model_id: int, countries: List[str], prop_countries: List[str]):
 6 |     for country in prop_countries:
 7 |         try:
 8 |             fdl.Segment(
 9 |                 name=f'Traveling to {country}',
10 |                 model_id=model_id,
11 |                 description=f'Search Queries for trips to {country}',
12 |                 definition=f'destination_country_id==\'{country}\'',
13 |             ).create()
14 |         except fdl.Conflict:
15 |             print(f"Segment 'Traveling to {country}' already exists.")
16 | 
17 |     for country in countries:
18 |         try:
19 |             fdl.Segment(
20 |                 name=f'Visitor from {country}',
21 |                 model_id=model_id,
22 |                 description=f'Segment for visitors from {country}',
23 |                 definition=f'visitor_location_country_id==\'{country}\'',
24 |             ).create()
25 |         except fdl.Conflict:
26 |             print(f"Segment 'Visitor from {country}' already exists.")
27 | 
28 |     predefined_segments = [
29 |         (
30 |             "No Click on Promo - USA",
31 |             "Segment for visitors from USA with no click on promo",
32 |             """user_interaction==0 and visitor_location_country_id=='USA'"""
33 |         ),
34 |         (
35 |             "Clicked on Promo - USA",
36 |             "Segment for visitors from USA who clicked on promo",
37 |             """user_interaction==1 and visitor_location_country_id=='USA'"""
38 |         ),
39 |         (
40 |             "Clicked Promo",
41 |             "Segment for visitors who clicked on promo",
42 |             """user_interaction==1"""
43 |         ),
44 |         (
45 |             "No Click on Promo",
46 |             "Segment for visitors with no click on promo",
47 |             """user_interaction==0"""
48 |         ),
49 |     ]
50 | 
51 |     for name, description, definition in predefined_segments:
52 |         try:
53 |             fdl.Segment(
54 |                 name=name,
55 |                 model_id=model_id,
56 |                 description=description,
57 |                 definition=definition,
58 |             ).create()
59 |         except fdl.Conflict:
60 |             print(f"Segment '{name}' already exists.")
61 | 


--------------------------------------------------------------------------------
/workshops/fine-tuning-with-sagemakerai-and-bedrock/media/smbanner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/fine-tuning-with-sagemakerai-and-bedrock/media/smbanner.png


--------------------------------------------------------------------------------
/workshops/fine-tuning-with-sagemakerai-and-bedrock/task_01_foundation_model_playground/scripts/requirements.txt:
--------------------------------------------------------------------------------
 1 | transformers==4.48.2
 2 | peft==0.14.0
 3 | accelerate==1.3.0
 4 | bitsandbytes==0.45.1
 5 | datasets==3.2.0
 6 | evaluate==0.4.3
 7 | huggingface_hub[hf_transfer]
 8 | mlflow
 9 | safetensors>=0.4.5
10 | sagemaker==2.239.0
11 | sagemaker-mlflow==0.1.0
12 | sentencepiece==0.2.0
13 | scikit-learn==1.6.1
14 | tokenizers>=0.21.0
15 | trl==0.9.6
16 | py7zr


--------------------------------------------------------------------------------
/workshops/fine-tuning-with-sagemakerai-and-bedrock/task_02_customize_foundation_model/scripts/requirements.txt:
--------------------------------------------------------------------------------
 1 | transformers==4.50.2
 2 | peft==0.14.0
 3 | accelerate==1.3.0
 4 | bitsandbytes==0.45.1
 5 | datasets==3.2.0
 6 | evaluate==0.4.3
 7 | huggingface_hub[hf_transfer]
 8 | mlflow
 9 | safetensors>=0.5.2
10 | sagemaker==2.244.0
11 | sagemaker-mlflow==0.1.0
12 | sentencepiece==0.2.0
13 | scikit-learn==1.6.1
14 | tokenizers>=0.21.0
15 | trl==0.9.6
16 | psutil
17 | py7zr
18 | pynvml
19 | xtarfile
20 | rouge-score


--------------------------------------------------------------------------------
/workshops/fine-tuning-with-sagemakerai-and-bedrock/task_03_foundation_model_evaluation/images/sft_1000_train_50_test_bars.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/fine-tuning-with-sagemakerai-and-bedrock/task_03_foundation_model_evaluation/images/sft_1000_train_50_test_bars.png


--------------------------------------------------------------------------------
/workshops/fine-tuning-with-sagemakerai-and-bedrock/task_03_foundation_model_evaluation/images/sft_1000_train_50_test_compare.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/fine-tuning-with-sagemakerai-and-bedrock/task_03_foundation_model_evaluation/images/sft_1000_train_50_test_compare.png


--------------------------------------------------------------------------------
/workshops/fine-tuning-with-sagemakerai-and-bedrock/task_03_foundation_model_evaluation/images/sft_1000_train_50_test_scores.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/fine-tuning-with-sagemakerai-and-bedrock/task_03_foundation_model_evaluation/images/sft_1000_train_50_test_scores.png


--------------------------------------------------------------------------------
/workshops/fine-tuning-with-sagemakerai-and-bedrock/task_03_foundation_model_evaluation/images/sft_5000_train_100_test_bars.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/fine-tuning-with-sagemakerai-and-bedrock/task_03_foundation_model_evaluation/images/sft_5000_train_100_test_bars.png


--------------------------------------------------------------------------------
/workshops/fine-tuning-with-sagemakerai-and-bedrock/task_03_foundation_model_evaluation/images/sft_5000_train_100_test_compare.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/fine-tuning-with-sagemakerai-and-bedrock/task_03_foundation_model_evaluation/images/sft_5000_train_100_test_compare.png


--------------------------------------------------------------------------------
/workshops/fine-tuning-with-sagemakerai-and-bedrock/task_03_foundation_model_evaluation/images/sft_5000_train_100_test_scores.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/fine-tuning-with-sagemakerai-and-bedrock/task_03_foundation_model_evaluation/images/sft_5000_train_100_test_scores.png


--------------------------------------------------------------------------------
/workshops/fine-tuning-with-sagemakerai-and-bedrock/task_04_responsible_ai/images/applyguardrail.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/fine-tuning-with-sagemakerai-and-bedrock/task_04_responsible_ai/images/applyguardrail.png


--------------------------------------------------------------------------------
/workshops/fine-tuning-with-sagemakerai-and-bedrock/task_05_fmops/config.yaml:
--------------------------------------------------------------------------------
 1 | SchemaVersion: '1.0'
 2 | SageMaker:
 3 |   PythonSDK:
 4 |     Modules:
 5 |       RemoteFunction:
 6 |         # role arn is not required if in SageMaker Notebook instance or SageMaker Studio
 7 |         # Uncomment the following line and replace with the right execution role if in a local IDE
 8 |         # RoleArn: <replace the role arn here>
 9 |         InstanceType: ml.m5.xlarge
10 |         Dependencies: ./scripts/requirements.txt
11 |         IncludeLocalWorkDir: true
12 |         CustomFileFilter:
13 |           IgnoreNamePatterns: # files or directories to ignore
14 |           - "*.ipynb" # all notebook files
15 | 
16 | 


--------------------------------------------------------------------------------
/workshops/fine-tuning-with-sagemakerai-and-bedrock/task_05_fmops/eval/requirements.txt:
--------------------------------------------------------------------------------
1 | datasets==3.5.0
2 | pandas==2.2.3
3 | matplotlib==3.10.1
4 | numpy==1.26.4
5 | boto3==1.37.1
6 | tqdm==4.67.1
7 | lighteval[math]==0.9.2
8 | torch
9 | torchvision


--------------------------------------------------------------------------------
/workshops/fine-tuning-with-sagemakerai-and-bedrock/task_05_fmops/ml-16670-arch-with-mlflow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/fine-tuning-with-sagemakerai-and-bedrock/task_05_fmops/ml-16670-arch-with-mlflow.png


--------------------------------------------------------------------------------
/workshops/fine-tuning-with-sagemakerai-and-bedrock/task_05_fmops/scripts/requirements.txt:
--------------------------------------------------------------------------------
 1 | transformers==4.50.2
 2 | peft==0.14.0
 3 | accelerate==1.3.0
 4 | bitsandbytes==0.45.1
 5 | datasets==3.2.0
 6 | evaluate==0.4.3
 7 | huggingface_hub[hf_transfer]
 8 | mlflow
 9 | safetensors>=0.5.2
10 | sagemaker==2.244.0
11 | sagemaker-mlflow==0.1.0
12 | sentencepiece==0.2.0
13 | scikit-learn==1.6.1
14 | tokenizers>=0.21.0
15 | trl==0.9.6
16 | psutil
17 | py7zr
18 | pynvml
19 | xtarfile
20 | rouge-score


--------------------------------------------------------------------------------
/workshops/fine-tuning-with-sagemakerai-and-bedrock/task_05_fmops/steps/evaluation_mlflow.py:
--------------------------------------------------------------------------------
 1 | import boto3
 2 | import sagemaker
 3 | from sagemaker.s3_utils import parse_s3_url
 4 | import mlflow
 5 | import tempfile
 6 | from pathlib import Path
 7 | import pandas as pd
 8 | import json 
 9 | from dataclasses import dataclass
10 | from typing import Tuple, Optional
11 | import json
12 | 
13 | 
14 | def evaluation(model, preprocess_step_ret, finetune_ret, mlflow_arn, experiment_name, run_id):
15 |     mlflow.set_tracking_uri(mlflow_arn)
16 |     mlflow.set_experiment(experiment_name)
17 | 
18 |     print(preprocess_step_ret['run_id'])
19 | 
20 |     with mlflow.start_run(run_id=preprocess_step_ret['run_id']) as run:
21 |         s3 = boto3.client("s3")
22 |         sess = sagemaker.Session()
23 | 
24 |         dataset_info = mlflow.get_run(preprocess_step_ret['run_id']).inputs.dataset_inputs[1].dataset
25 | 
26 |         print(dataset_info)
27 |         print(f"Dataset name: {dataset_info.name}")
28 |         print(f"Dataset digest: {dataset_info.digest}")
29 |         print(f"Dataset profile: {dataset_info.profile}")
30 |         print(f"Dataset schema: {dataset_info.schema}")
31 | 
32 |         dataset_source = mlflow.data.get_source(dataset_info)
33 |         ds = dataset_source.load()
34 |         # get the bucket name using full s3 poth
35 | 
36 |         eval_data=pd.read_json(ds, orient='records', lines=True)
37 | 
38 |         data = []
39 |         for index, row in eval_data.iterrows():
40 |             for message in row['messages']:
41 |                 if message["role"] == "user":
42 |                     question = message["content"]
43 |                 elif message["role"] == "assistant":
44 |                     answer = message["content"]
45 |             data.append({"question": question, "answer": answer})
46 | 
47 |         df = pd.DataFrame(data, columns=["question", "answer"])
48 |         print(df.head())
49 | 
50 |         
51 |         logged_model = f"runs:/{preprocess_step_ret['run_id']}/model"
52 |         loaded_model = mlflow.pyfunc.load_model(model_uri=logged_model)
53 |         results = mlflow.evaluate(
54 |             model=loaded_model,
55 |             data=df,
56 |             targets="answer",
57 |             model_type="question-answering",
58 |             evaluator_config={"col_mapping": {"inputs": "question"}},
59 |         )
60 |         print(results.metrics)
61 |     return "done"


--------------------------------------------------------------------------------
/workshops/fine-tuning-with-sagemakerai-and-bedrock/task_05_fmops/steps/finetune_llama3b_hf.py:
--------------------------------------------------------------------------------
 1 | from steps.utils import endpoint_exists
 2 | from sagemaker.jumpstart.estimator import JumpStartEstimator
 3 | from sagemaker.huggingface import HuggingFace
 4 | from huggingface_hub import HfFolder
 5 | import mlflow
 6 | import time
 7 | import json
 8 | import boto3
 9 | 
10 | def finetune_llama3b(preprocess_step_ret, train_config, lora_config, role, mlflow_arn, experiment_name,run_name, *args):
11 | 
12 |     mlflow.set_tracking_uri(mlflow_arn)
13 |     mlflow.set_experiment(experiment_name)
14 | 
15 |     with mlflow.start_run(run_id=preprocess_step_ret['run_id']) as run:
16 |         
17 |         model_id = train_config["model_id"]
18 |         endpoint_name = train_config["endpoint_name"]
19 |         instance_type = train_config["finetune_instance_type"]
20 |         num_instances = train_config["finetune_num_instances"]
21 |         epoch = train_config["epoch"]
22 |         per_device_train_batch_size = train_config["per_device_train_batch_size"]
23 | 
24 |         lora_config = json.loads(lora_config)
25 |         
26 |         lora_r = lora_config["lora_r"]
27 |         lora_alpha = lora_config["lora_alpha"]
28 |         lora_dropout = lora_config["lora_dropout"]
29 | 
30 |         train_data_path = preprocess_step_ret["training_input_path"]
31 | 
32 |         training_job_name = f'huggingface-qlora-{train_config["epoch"]}-{lora_config["lora_r"]}-{time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())}'
33 | 
34 |         hyperparameters = {
35 |             'model_id': model_id,                             # pre-trained model
36 |             'dataset_path': '/opt/ml/input/data/training',    # path where sagemaker will save training dataset
37 |             'epochs': epoch,                                      # number of training epochs
38 |             'per_device_train_batch_size': per_device_train_batch_size,                 # batch size for training
39 |             'lr': 2e-4,                                       # learning rate used during training
40 |             'hf_token': "<INSERT HF ACCESS TOKEN>",                 # huggingface token to access llama 2
41 |             'merge_weights': True,       # whether to merge LoRA into the model
42 |             'lora_r': lora_r,
43 |             'lora_alpha': lora_alpha,
44 |             'lora_dropout': lora_dropout,
45 |             'mlflow_arn': mlflow_arn, 
46 |             'experiment_name': experiment_name,
47 |             'run_id': preprocess_step_ret['run_id']
48 |         }
49 | 
50 |         # Add SageMaker environment variables to help with debugging
51 |         environment = { 
52 |             "HUGGINGFACE_HUB_CACHE": "/tmp/.cache",
53 |             "NCCL_DEBUG": "INFO",         # Helps debug NCCL issues
54 |             "NCCL_P2P_DISABLE": "1",      # Can help with some networking issues
55 |             "PYTORCH_CUDA_ALLOC_CONF": "max_split_size_mb:512"  # Helps with memory management
56 |         }
57 | 
58 |         if endpoint_exists(endpoint_name):
59 |             print("Endpoint already exists")
60 |             training_job_name = None
61 |         else:
62 |             # Define distributed training configuration
63 |             distribution = {
64 |                 'torch_distributed': {
65 |                     'enabled': True
66 |                 }
67 |             }
68 |             
69 |             huggingface_estimator = HuggingFace(
70 |                 entry_point='llama3_fine_tuning.py',      # train script
71 |                 source_dir='scripts',         # directory which includes all the files needed for training
72 |                 instance_type=instance_type,   # instances type used for the training job
73 |                 instance_count=num_instances,  # the number of instances used for training
74 |                 base_job_name=training_job_name,          # the name of the training job
75 |                 role=role,              # IAM role used in training job to access AWS resources
76 |                 volume_size=300,        # the size of the EBS volume in GB
77 |                 py_version='py311',     # the python version used in the training job
78 |                 hyperparameters=hyperparameters,  # the hyperparameters passed to the training job
79 |                 environment=environment,
80 |                 distribution=distribution,  # Added distributed training config
81 |                 image_uri=f'763104351884.dkr.ecr.{boto3.session.Session().region_name}.amazonaws.com/pytorch-training:2.5.1-gpu-py311-cu124-ubuntu22.04-sagemaker',
82 |                 metric_definitions=[
83 |                     {'Name': 'huggingface-textgeneration:loss', 'Regex': "'loss':\s*([0-9.]+)"},
84 |                     {'Name': 'huggingface-textgeneration:epoch', 'Regex': "'epoch':\s*([0-9.]+)"},
85 |                     {'Name': 'huggingface-textgeneration:train_loss', 'Regex': "'train_loss':\s*([0-9.]+)"},
86 |                 ]
87 |             )
88 |             
89 |             data = {'training': train_data_path}
90 | 
91 |             # starting the train job with our uploaded datasets as input
92 |             huggingface_estimator.fit(data, wait=True)
93 | 
94 |             training_job_name = huggingface_estimator.latest_training_job.name
95 | 
96 |             return {"training_job_name": training_job_name, "run_id": preprocess_step_ret['run_id']}


--------------------------------------------------------------------------------
/workshops/fine-tuning-with-sagemakerai-and-bedrock/task_05_fmops/steps/preprocess_llama3.py:
--------------------------------------------------------------------------------
 1 | # Temporary preprocess step (to be changed with new dataset)
 2 | import boto3
 3 | import pandas as pd
 4 | from datasets import load_dataset
 5 | from datasets import Dataset
 6 | from random import randint
 7 | import mlflow
 8 | import json
 9 | 
10 | 
11 | system_message = """You are Llama, an AI assistant. Your knowledge spans a wide range of topics, allowing you to anser the questions with honesty and truthfulness."""
12 | 
13 | def create_conversation(sample):
14 |     if sample["messages"][0]["role"] == "system":
15 |         return sample
16 |     else:
17 |       sample["messages"] = [{"role": "system", "content": system_message}] + sample["messages"]
18 |       return sample
19 | 
20 | def preprocess(s3_bucket, dataset_name, train_sample, eval_sample, mlflow_arn, experiment_name, run_name):
21 | 
22 |     mlflow.set_tracking_uri(mlflow_arn)
23 |     mlflow.set_experiment(experiment_name)
24 | 
25 |     
26 |     # This is a very simple example, you can add your own data processing code here
27 |     dataset = load_dataset(dataset_name)
28 |     dataset = dataset.filter(lambda x: x['category'] == 'Open QA')
29 | 
30 |     columns_to_remove = list(dataset["train"].features)
31 |     columns_to_remove.remove("messages")
32 |     dataset = dataset.map(create_conversation, remove_columns=columns_to_remove,batched=False)
33 | 
34 |     dataset["train"] = dataset["train"].filter(lambda x: len(x["messages"][1:]) % 2 == 0)
35 |     dataset["test"] = dataset["test"].filter(lambda x: len(x["messages"][1:]) % 2 == 0)
36 | 
37 |     dataset["train"].to_json("train_dataset.json", orient="records", force_ascii=False)
38 |     dataset["test"].to_json("test_dataset.json", orient="records", force_ascii=False)
39 | 
40 |     # save training and test data to s3
41 |     s3 = boto3.client("s3")
42 |     s3.upload_file("train_dataset.json", s3_bucket, f"dataset/{dataset_name}/{train_sample}/train/train_dataset.json")
43 |     s3.upload_file("test_dataset.json", s3_bucket, f"dataset/{dataset_name}/{eval_sample}/eval/eval_dataset.json")
44 | 
45 | 
46 |     training_input_path = f's3://{s3_bucket}/dataset/{dataset_name}/{train_sample}/train/train_dataset.json'
47 |     eval_input_path = f's3://{s3_bucket}/dataset/{dataset_name}/{eval_sample}/eval/eval_dataset.json'
48 | 
49 |     with mlflow.start_run(run_name=run_name) as run:
50 |         
51 |         run_id = run.info.run_id
52 |         print(run_id)
53 | 
54 |         # create pandas dataframe from train json
55 |         df_train = pd.read_json("train_dataset.json", orient="records", lines=True)
56 |         df_evaluate = pd.read_json("test_dataset.json", orient="records", lines=True)
57 | 
58 |         training_data = mlflow.data.from_pandas(df_train, source=training_input_path)
59 |         mlflow.log_input(training_data, context="training")
60 | 
61 |         evaluation_data = mlflow.data.from_pandas(df_evaluate, source=eval_input_path)
62 |         mlflow.log_input(evaluation_data, context="evaluation")
63 | 
64 |     return {"training_input_path": training_input_path, "eval_input_path": eval_input_path, "run_id": run_id}
65 | 


--------------------------------------------------------------------------------
/workshops/fine-tuning-with-sagemakerai-and-bedrock/task_05_fmops/steps/utils.py:
--------------------------------------------------------------------------------
 1 | import boto3
 2 | from datetime import datetime
 3 | 
 4 | 
 5 | def endpoint_exists(endpoint_name):
 6 |     endpoint_exist = False
 7 | 
 8 |     client = boto3.client('sagemaker')
 9 |     response = client.list_endpoints()
10 |     endpoints = response["Endpoints"]
11 | 
12 |     for endpoint in endpoints:
13 |         if endpoint_name == endpoint["EndpointName"]:
14 |             endpoint_exist = True
15 |             break
16 | 
17 |     return endpoint_exist
18 | 
19 | def create_training_job_name(model_id):
20 |     return f"{model_id}-{datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')[:-3]}"
21 | 


--------------------------------------------------------------------------------
/workshops/fine-tuning-with-sagemakerai-and-bedrock/utilities/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/generative-ai-on-amazon-sagemaker/708d1e598d37e7a00639ded4a0d02334dfd29fe9/workshops/fine-tuning-with-sagemakerai-and-bedrock/utilities/__init__.py


--------------------------------------------------------------------------------
/workshops/fine-tuning-with-sagemakerai-and-bedrock/utilities/helpers.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import mlflow
  4 | from datetime import datetime
  5 | from typing import List, Dict
  6 | from langchain import PromptTemplate
  7 | from langchain.prompts.few_shot import FewShotPromptTemplate
  8 | from langchain.llms import SagemakerEndpoint
  9 | from langchain.llms.sagemaker_endpoint import LLMContentHandler
 10 | from langchain.chains import LLMChain
 11 | from IPython.display import (
 12 |     display, 
 13 |     Markdown, 
 14 |     HTML
 15 | )
 16 | 
 17 | 
 18 | def pretty_print_html(text):
 19 |     # Replace newline characters with <br> tags
 20 |     html_text = text.replace('\n', '<br>')
 21 |     # Apply HTML formatting
 22 |     html_formatted = f'<pre style="font-family: monospace; background-color: #f8f8f8; padding: 10px; border-radius: 5px; border: 1px solid #0077b6;">{html_text}</pre>'
 23 |     # Display the formatted HTML
 24 |     return HTML(html_formatted)
 25 | 
 26 | 
 27 | def set_meta_llama_params(
 28 |     max_new_tokens=512,
 29 |     top_p=0.9,
 30 |     temperature=0.6,
 31 | ):
 32 |     """ set Llama parameters """
 33 |     llama_params = {}
 34 |     llama_params['max_new_tokens'] = max_new_tokens
 35 |     llama_params['top_p'] = top_p
 36 |     llama_params['temperature'] = temperature
 37 |     return llama_params
 38 | 
 39 | 
 40 | def print_dialog(inputs, payload, response):
 41 |     dialog_output = []
 42 |     for msg in inputs:
 43 |         dialog_output.append(f"**{msg['role'].upper()}**: {msg['content']}\n")
 44 |     dialog_output.append(f"**ASSISTANT**: {response['generated_text']}")
 45 |     dialog_output.append("\n---\n")
 46 |     
 47 |     display(Markdown('\n'.join(dialog_output)))
 48 | 
 49 | def format_messages(messages: List[Dict[str, str]]) -> List[str]:
 50 |     """
 51 |     Format messages for Llama 3+ chat models.
 52 |     
 53 |     The model only supports 'system', 'user' and 'assistant' roles, starting with 'system', then 'user' and 
 54 |     alternating (u/a/u/a/u...). The last message must be from 'user'.
 55 |     """
 56 |     # auto assistant suffix
 57 |     # messages.append({"role": "assistant"})
 58 |     
 59 |     output = "<|begin_of_text|>"
 60 |     # Adding the inferred prefix
 61 |     _system_prefix = f"\n\nCutting Knowledge Date: December 2023\nToday Date: {datetime.now().strftime('%d %b %Y')}\n\n"
 62 |     for i, entry in enumerate(messages):
 63 |         output += f"<|start_header_id|>{entry['role']}<|end_header_id|>"
 64 |         if i == 0:
 65 |             output += f"{_system_prefix}{entry['content']}<|eot_id|>"
 66 |         elif i >= 1 and 'content' in entry:
 67 |             output += f"\n\n{entry['content']}<|eot_id|>"
 68 |     output += "<|start_header_id|>assistant<|end_header_id|>\n"
 69 |     return output
 70 | 
 71 | 
 72 | def write_eula(attribute):
 73 |     os.makedirs("/home/sagemaker-user/.license/", exist_ok=True)
 74 |     f = open("/home/sagemaker-user/.license/llama-license.txt", "w")
 75 |     f.write(attribute)
 76 |     f.close()
 77 |     return 0
 78 | 
 79 | 
 80 | def read_eula():
 81 |     attribute = open("/home/sagemaker-user/.license/llama-license.txt", "r").read()
 82 |     assert attribute == "True", f"Llama EULA set to {attribute}! Please review EULA to continue!"
 83 |     return attribute
 84 | 
 85 | 
 86 | class ContentHandlerwithTracking(LLMContentHandler):
 87 |     content_type = "application/json"
 88 |     accepts = "application/json"
 89 | 
 90 |     def __init__(self, experiment_name):
 91 |         self.mlflow_experiment_name = experiment_name
 92 |         print(f"Sending experiments to : {self.mlflow_experiment_name}")
 93 |         self.experiment_online_info = mlflow.set_experiment(self.mlflow_experiment_name)
 94 |         self.run_id_ephemeral = None
 95 | 
 96 |     def transform_input(self, prompt, model_kwargs):
 97 |         with mlflow.start_run(
 98 |             experiment_id=self.experiment_online_info.experiment_id,
 99 |             run_name=f"lc-{datetime.now().strftime('%Y%m%d%H%M%S%f')}"
100 |         ) as run:
101 |             base_input = [{"role" : "user", "content" : prompt}]
102 |             optz_input = format_messages(base_input)
103 |             input_str = json.dumps(
104 |                 {
105 |                     "inputs" : optz_input, 
106 |                     "parameters" : {**model_kwargs}
107 |                 }
108 |             )
109 |             # track prompts
110 |             # mlflow.log_param("SystemPrompt", instruction)
111 |             mlflow.log_param("UserPrompt", optz_input)
112 |             mlflow.log_param("parameters", {**model_kwargs})
113 | 
114 |             self.run_id_ephemeral = run.info.run_id
115 | 
116 |         return input_str.encode('utf-8')
117 |     
118 |     def transform_output(self, output):
119 |         with mlflow.start_run(
120 |             experiment_id=self.experiment_online_info.experiment_id,
121 |             run_id=self.run_id_ephemeral
122 |         ) as run:
123 |             response_json = json.loads(output.read().decode("utf-8"))
124 |             mlflow.log_param("ModelResponse", response_json["generated_text"])
125 |         return response_json["generated_text"]
126 | 


--------------------------------------------------------------------------------