├── tests ├── __init__.py ├── data │ └── 000000039769.png ├── conftest.py ├── utils │ └── markers.py ├── test_search.py ├── test_import.py ├── test_final_answer.py ├── fixtures │ ├── tools.py │ └── agents.py ├── test_types.py ├── test_remote_executors.py ├── test_tool_validation.py ├── test_default_tools.py ├── test_cli.py ├── test_gradio_ui.py └── test_memory.py ├── .github ├── ISSUE_TEMPLATE │ ├── custom.md │ ├── bug_report.md │ └── feature_request.md └── workflows │ ├── trufflehog.yml │ ├── upload_pr_documentation.yml │ ├── build_pr_documentation.yml │ ├── build_documentation.yml │ ├── quality.yml │ └── tests.yml ├── examples ├── open_deep_research │ ├── app.py │ ├── requirements.txt │ ├── README.md │ ├── scripts │ │ ├── run_agents.py │ │ ├── gaia_scorer.py │ │ ├── reformulator.py │ │ └── text_inspector_tool.py │ └── run.py ├── sandboxed_execution.py ├── gradio_ui.py ├── inspect_multiagent_run.py ├── rag.py ├── text_to_sql.py ├── agent_from_any_llm.py └── rag_using_chromadb.py ├── .pre-commit-config.yaml ├── Makefile ├── docs └── source │ ├── en │ ├── _config.py │ ├── _toctree.yml │ ├── reference │ │ ├── agents.mdx │ │ └── tools.mdx │ ├── conceptual_guides │ │ └── react.mdx │ ├── index.mdx │ └── tutorials │ │ └── memory.mdx │ ├── hi │ ├── _config.py │ ├── _toctree.yml │ ├── conceptual_guides │ │ └── react.mdx │ ├── reference │ │ ├── tools.mdx │ │ └── agents.mdx │ ├── index.mdx │ └── tutorials │ │ ├── inspect_runs.mdx │ │ └── secure_code_execution.mdx │ └── zh │ ├── _config.py │ ├── _toctree.yml │ ├── conceptual_guides │ ├── react.mdx │ └── intro_agents.mdx │ ├── reference │ ├── agents.mdx │ ├── tools.mdx │ └── models.mdx │ ├── index.mdx │ ├── tutorials │ ├── secure_code_execution.mdx │ └── memory.mdx │ └── examples │ ├── rag.mdx │ └── text_to_sql.mdx ├── e2b.toml ├── src └── smolagents │ ├── __init__.py │ └── cli.py ├── utils └── check_tests_in_ci.py ├── .gitignore ├── pyproject.toml ├── CONTRIBUTING.md └── CODE_OF_CONDUCT.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/data/000000039769.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/merveenoyan/smolagents/main/tests/data/000000039769.png -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/custom.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Custom issue template 3 | about: Describe this issue template's purpose here. 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | 11 | -------------------------------------------------------------------------------- /examples/open_deep_research/app.py: -------------------------------------------------------------------------------- 1 | from run import create_agent 2 | 3 | from smolagents.gradio_ui import GradioUI 4 | 5 | 6 | agent = create_agent() 7 | 8 | demo = GradioUI(agent) 9 | 10 | if __name__ == "__main__": 11 | demo.launch() 12 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/astral-sh/ruff-pre-commit 3 | rev: v0.2.1 4 | hooks: 5 | - id: ruff 6 | args: 7 | - --fix 8 | - id: ruff-format 9 | - repo: https://github.com/pre-commit/pre-commit-hooks 10 | rev: v4.5.0 11 | hooks: 12 | - id: check-merge-conflict 13 | - id: check-yaml 14 | -------------------------------------------------------------------------------- /.github/workflows/trufflehog.yml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | 4 | name: Secret Leaks 5 | 6 | permissions: 7 | contents: read 8 | 9 | jobs: 10 | trufflehog: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Checkout code 14 | uses: actions/checkout@v4 15 | with: 16 | fetch-depth: 0 17 | - name: Secret Scanning 18 | uses: trufflesecurity/trufflehog@main -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: quality style test docs utils 2 | 3 | check_dirs := examples src tests utils 4 | 5 | # Check code quality of the source code 6 | quality: 7 | ruff check $(check_dirs) 8 | ruff format --check $(check_dirs) 9 | python utils/check_tests_in_ci.py 10 | 11 | # Format source code automatically 12 | style: 13 | ruff check $(check_dirs) --fix 14 | ruff format $(check_dirs) 15 | 16 | # Run smolagents tests 17 | test: 18 | pytest ./tests/ -------------------------------------------------------------------------------- /.github/workflows/upload_pr_documentation.yml: -------------------------------------------------------------------------------- 1 | name: Upload PR Documentation 2 | 3 | on: 4 | workflow_run: 5 | workflows: ["Build PR Documentation"] 6 | types: 7 | - completed 8 | 9 | jobs: 10 | build: 11 | uses: huggingface/doc-builder/.github/workflows/upload_pr_documentation.yml@main 12 | with: 13 | package_name: smolagents 14 | secrets: 15 | hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }} 16 | comment_bot_token: ${{ secrets.COMMENT_BOT_TOKEN }} -------------------------------------------------------------------------------- /docs/source/en/_config.py: -------------------------------------------------------------------------------- 1 | # docstyle-ignore 2 | INSTALL_CONTENT = """ 3 | # Installation 4 | ! pip install smolagents 5 | # To install from source instead of the last release, comment the command above and uncomment the following one. 6 | # ! pip install git+https://github.com/huggingface/smolagents.git 7 | """ 8 | 9 | notebook_first_cells = [{"type": "code", "content": INSTALL_CONTENT}] 10 | black_avoid_patterns = { 11 | "{processor_class}": "FakeProcessorClass", 12 | "{model_class}": "FakeModelClass", 13 | "{object_class}": "FakeObjectClass", 14 | } 15 | -------------------------------------------------------------------------------- /docs/source/hi/_config.py: -------------------------------------------------------------------------------- 1 | # docstyle-ignore 2 | INSTALL_CONTENT = """ 3 | # Installation 4 | ! pip install smolagents 5 | # To install from source instead of the last release, comment the command above and uncomment the following one. 6 | # ! pip install git+https://github.com/huggingface/smolagents.git 7 | """ 8 | 9 | notebook_first_cells = [{"type": "code", "content": INSTALL_CONTENT}] 10 | black_avoid_patterns = { 11 | "{processor_class}": "FakeProcessorClass", 12 | "{model_class}": "FakeModelClass", 13 | "{object_class}": "FakeObjectClass", 14 | } 15 | -------------------------------------------------------------------------------- /docs/source/zh/_config.py: -------------------------------------------------------------------------------- 1 | # docstyle-ignore 2 | INSTALL_CONTENT = """ 3 | # Installation 4 | ! pip install smolagents 5 | # To install from source instead of the last release, comment the command above and uncomment the following one. 6 | # ! pip install git+https://github.com/huggingface/smolagents.git 7 | """ 8 | 9 | notebook_first_cells = [{"type": "code", "content": INSTALL_CONTENT}] 10 | black_avoid_patterns = { 11 | "{processor_class}": "FakeProcessorClass", 12 | "{model_class}": "FakeModelClass", 13 | "{object_class}": "FakeObjectClass", 14 | } 15 | -------------------------------------------------------------------------------- /e2b.toml: -------------------------------------------------------------------------------- 1 | # This is a config for E2B sandbox template. 2 | # You can use template ID (qywp2ctmu2q7jzprcf4j) to create a sandbox: 3 | 4 | # Python SDK 5 | # from e2b import Sandbox, AsyncSandbox 6 | # sandbox = Sandbox("qywp2ctmu2q7jzprcf4j") # Sync sandbox 7 | # sandbox = await AsyncSandbox.create("qywp2ctmu2q7jzprcf4j") # Async sandbox 8 | 9 | # JS SDK 10 | # import { Sandbox } from 'e2b' 11 | # const sandbox = await Sandbox.create('qywp2ctmu2q7jzprcf4j') 12 | 13 | team_id = "f8776d3a-df2f-4a1d-af48-68c2e13b3b87" 14 | start_cmd = "/root/.jupyter/start-up.sh" 15 | dockerfile = "e2b.Dockerfile" 16 | template_id = "qywp2ctmu2q7jzprcf4j" 17 | -------------------------------------------------------------------------------- /examples/sandboxed_execution.py: -------------------------------------------------------------------------------- 1 | from smolagents import CodeAgent, DuckDuckGoSearchTool, HfApiModel 2 | 3 | 4 | model = HfApiModel() 5 | 6 | agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=model, executor_type="docker") 7 | output = agent.run("How many seconds would it take for a leopard at full speed to run through Pont des Arts?") 8 | print("Docker executor result:", output) 9 | 10 | agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=model, executor_type="e2b") 11 | output = agent.run("How many seconds would it take for a leopard at full speed to run through Pont des Arts?") 12 | print("E2B executor result:", output) 13 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: The clearer your bug report, the faster it will be fixed! 4 | title: "[BUG]" 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **Code to reproduce the error** 14 | The simplest code snippet that produces your bug. 15 | 16 | **Error logs (if any)** 17 | Provide error logs if there are any. 18 | 19 | **Expected behavior** 20 | A clear and concise description of what you expected to happen. 21 | 22 | **Packages version:** 23 | Run `pip freeze | grep smolagents` and paste it here. 24 | 25 | **Additional context** 26 | Add any other context about the problem here. 27 | -------------------------------------------------------------------------------- /.github/workflows/build_pr_documentation.yml: -------------------------------------------------------------------------------- 1 | name: Build PR Documentation 2 | 3 | on: 4 | pull_request: 5 | paths: 6 | - 'docs/source/**' 7 | - 'assets/**' 8 | - '.github/workflows/doc-pr-build.yml' 9 | 10 | concurrency: 11 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 12 | cancel-in-progress: true 13 | 14 | jobs: 15 | build: 16 | uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@main 17 | with: 18 | commit_sha: ${{ github.event.pull_request.head.sha }} 19 | pr_number: ${{ github.event.number }} 20 | package: smolagents 21 | languages: en 22 | # additional_args: --not_python_module # use this arg if repository is documentation only -------------------------------------------------------------------------------- /.github/workflows/build_documentation.yml: -------------------------------------------------------------------------------- 1 | name: Build documentation 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | - doc-builder* 8 | - v*-release 9 | - use_templates 10 | paths: 11 | - 'docs/source/**' 12 | - 'assets/**' 13 | - '.github/workflows/doc-build.yml' 14 | - 'pyproject.toml' 15 | 16 | jobs: 17 | build: 18 | uses: huggingface/doc-builder/.github/workflows/build_main_documentation.yml@main 19 | with: 20 | commit_sha: ${{ github.sha }} 21 | package: smolagents 22 | languages: en 23 | notebook_folder: smolagents_doc 24 | # additional_args: --not_python_module # use this arg if repository is documentation only 25 | secrets: 26 | token: ${{ secrets.HUGGINGFACE_PUSH }} 27 | hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }} -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import patch 2 | 3 | import pytest 4 | 5 | from smolagents.agents import MultiStepAgent 6 | from smolagents.monitoring import LogLevel 7 | 8 | 9 | # Import fixture modules as plugins 10 | pytest_plugins = ["tests.fixtures.agents", "tests.fixtures.tools"] 11 | 12 | original_multi_step_agent_init = MultiStepAgent.__init__ 13 | 14 | 15 | @pytest.fixture(autouse=True) 16 | def patch_multi_step_agent_with_suppressed_logging(): 17 | with patch.object(MultiStepAgent, "__init__", autospec=True) as mock_init: 18 | 19 | def init_with_suppressed_logging(self, *args, verbosity_level=LogLevel.OFF, **kwargs): 20 | original_multi_step_agent_init(self, *args, verbosity_level=verbosity_level, **kwargs) 21 | 22 | mock_init.side_effect = init_with_suppressed_logging 23 | yield 24 | -------------------------------------------------------------------------------- /examples/gradio_ui.py: -------------------------------------------------------------------------------- 1 | from io import BytesIO 2 | 3 | import requests 4 | from PIL import Image 5 | 6 | from smolagents import CodeAgent, GradioUI, HfApiModel 7 | 8 | 9 | def add_agent_image(memory_step, agent): 10 | url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolagents/smolagents.png" 11 | response = requests.get(url) 12 | memory_step.observations_images = [Image.open(BytesIO(response.content))] 13 | 14 | 15 | agent = CodeAgent( 16 | tools=[], 17 | model=HfApiModel(), 18 | verbosity_level=1, 19 | planning_interval=3, 20 | name="example_agent", 21 | description="This is an example agent that has not tool but will always see an agent at the end of its step.", 22 | step_callbacks=[add_agent_image], 23 | ) 24 | 25 | GradioUI(agent, file_upload_folder="./data").launch() 26 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: enhancement 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Is this not possible with the current options.** 17 | Make sure to consider if what you're requesting can be done with current abstractions. 18 | 19 | **Describe alternatives you've considered** 20 | A clear and concise description of any alternative solutions or features you've considered. 21 | 22 | **Additional context** 23 | Add any other context or screenshots about the feature request here. 24 | -------------------------------------------------------------------------------- /.github/workflows/quality.yml: -------------------------------------------------------------------------------- 1 | name: Quality Check 2 | 3 | on: [pull_request] 4 | 5 | env: 6 | UV_SYSTEM_PYTHON: 1 7 | 8 | jobs: 9 | check_code_quality: 10 | runs-on: ubuntu-latest 11 | env: 12 | UV_HTTP_TIMEOUT: 600 # max 10min to install deps 13 | 14 | steps: 15 | - uses: actions/checkout@v2 16 | - name: Set up Python 17 | uses: actions/setup-python@v2 18 | with: 19 | python-version: "3.12" 20 | 21 | # Setup venv 22 | - name: Setup uv 23 | run: | 24 | pip install --upgrade uv 25 | 26 | - name: Install dependencies 27 | run: uv pip install "smolagents[quality] @ ." 28 | 29 | # Equivalent of "make quality" but step by step 30 | - run: ruff check examples src tests utils # linter 31 | - run: ruff format --check examples src tests utils # formatter 32 | - run: python utils/check_tests_in_ci.py 33 | -------------------------------------------------------------------------------- /examples/inspect_multiagent_run.py: -------------------------------------------------------------------------------- 1 | from openinference.instrumentation.smolagents import SmolagentsInstrumentor 2 | from phoenix.otel import register 3 | 4 | 5 | register() 6 | SmolagentsInstrumentor().instrument(skip_dep_check=True) 7 | 8 | 9 | from smolagents import ( 10 | CodeAgent, 11 | DuckDuckGoSearchTool, 12 | HfApiModel, 13 | ToolCallingAgent, 14 | VisitWebpageTool, 15 | ) 16 | 17 | 18 | # Then we run the agentic part! 19 | model = HfApiModel() 20 | 21 | search_agent = ToolCallingAgent( 22 | tools=[DuckDuckGoSearchTool(), VisitWebpageTool()], 23 | model=model, 24 | name="search_agent", 25 | description="This is an agent that can do web search.", 26 | ) 27 | 28 | manager_agent = CodeAgent( 29 | tools=[], 30 | model=model, 31 | managed_agents=[search_agent], 32 | ) 33 | manager_agent.run("If the US keeps it 2024 growth rate, how many years would it take for the GDP to double?") 34 | -------------------------------------------------------------------------------- /examples/open_deep_research/requirements.txt: -------------------------------------------------------------------------------- 1 | anthropic>=0.37.1 2 | audioop-lts<1.0; python_version >= "3.13" # required to use pydub in Python >=3.13; LTS port of the removed Python builtin module audioop 3 | beautifulsoup4>=4.12.3 4 | datasets>=2.21.0 5 | google_search_results>=2.4.2 6 | huggingface_hub>=0.23.4 7 | mammoth>=1.8.0 8 | markdownify>=0.13.1 9 | numexpr>=2.10.1 10 | numpy>=2.1.2 11 | openai>=1.52.2 12 | openpyxl 13 | pandas>=2.2.3 14 | pathvalidate>=3.2.1 15 | pdfminer>=20191125 16 | pdfminer.six>=20240706 17 | Pillow>=11.0.0 18 | puremagic>=1.28 19 | pypdf>=5.1.0 20 | python-dotenv>=1.0.1 21 | python_pptx>=1.0.2 22 | Requests>=2.32.3 23 | serpapi>=0.1.5 24 | tqdm>=4.66.4 25 | torch>=2.2.2 26 | torchvision>=0.17.2 27 | transformers>=4.46.0 28 | youtube_transcript_api>=0.6.2 29 | chess 30 | sympy 31 | pubchempy 32 | Bio 33 | scikit-learn 34 | scipy 35 | pydub 36 | PyPDF2 37 | python-pptx 38 | torch 39 | xlrd 40 | SpeechRecognition -------------------------------------------------------------------------------- /tests/utils/markers.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2024 HuggingFace Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """Markers for tests .""" 16 | 17 | import os 18 | from importlib.util import find_spec 19 | 20 | import pytest 21 | 22 | 23 | require_run_all = pytest.mark.skipif(not os.getenv("RUN_ALL"), reason="requires RUN_ALL environment variable") 24 | require_torch = pytest.mark.skipif(find_spec("torch") is None, reason="requires torch") 25 | -------------------------------------------------------------------------------- /tests/test_search.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2024 HuggingFace Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | from smolagents import DuckDuckGoSearchTool 18 | 19 | from .test_tools import ToolTesterMixin 20 | from .utils.markers import require_run_all 21 | 22 | 23 | class TestDuckDuckGoSearchTool(ToolTesterMixin): 24 | def setup_method(self): 25 | self.tool = DuckDuckGoSearchTool() 26 | self.tool.setup() 27 | 28 | @require_run_all 29 | def test_exact_match_arg(self): 30 | result = self.tool("Agents") 31 | assert isinstance(result, str) 32 | -------------------------------------------------------------------------------- /tests/test_import.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import tempfile 4 | 5 | 6 | def test_import_smolagents_without_extras(monkeypatch): 7 | monkeypatch.delenv("VIRTUAL_ENV", raising=False) 8 | with tempfile.TemporaryDirectory() as temp_dir: 9 | # Create a virtual environment 10 | venv_dir = os.path.join(temp_dir, "venv") 11 | subprocess.run(["uv", "venv", venv_dir], check=True) 12 | 13 | # Install smolagents in the virtual environment 14 | subprocess.run( 15 | ["uv", "pip", "install", "--python", os.path.join(venv_dir, "bin", "python"), "smolagents @ ."], check=True 16 | ) 17 | 18 | # Run the import test in the virtual environment 19 | result = subprocess.run( 20 | [os.path.join(venv_dir, "bin", "python"), "-c", "import smolagents"], 21 | capture_output=True, 22 | text=True, 23 | ) 24 | 25 | # Check if the import was successful 26 | assert result.returncode == 0, ( 27 | "Import failed with error: " 28 | + (result.stderr.splitlines()[-1] if result.stderr else "No error message") 29 | + "\n" 30 | + result.stderr 31 | ) 32 | -------------------------------------------------------------------------------- /src/smolagents/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | 4 | # Copyright 2024 The HuggingFace Inc. team. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | __version__ = "1.14.0.dev0" 18 | 19 | from .agent_types import * # noqa: I001 20 | from .agents import * # Above noqa avoids a circular dependency due to cli.py 21 | from .default_tools import * 22 | from .gradio_ui import * 23 | from .local_python_executor import * 24 | from .memory import * 25 | from .models import * 26 | from .monitoring import * 27 | from .remote_executors import * 28 | from .tools import * 29 | from .utils import * 30 | from .cli import * 31 | -------------------------------------------------------------------------------- /docs/source/hi/_toctree.yml: -------------------------------------------------------------------------------- 1 | - title: Get started 2 | sections: 3 | - local: index 4 | title: 🤗 Agents 5 | - local: guided_tour 6 | title: गाइडेड टूर 7 | - title: Tutorials 8 | sections: 9 | - local: tutorials/building_good_agents 10 | title: ✨ अच्छे Agents का निर्माण 11 | - local: tutorials/inspect_runs 12 | title: 📊 OpenTelemetry के साथ runs का निरीक्षण 13 | - local: tutorials/tools 14 | title: 🛠️ Tools - in-depth guide 15 | - local: tutorials/secure_code_execution 16 | title: 🛡️ E2B के साथ अपने कोड एक्जीक्यूशन को सुरक्षित करें 17 | - title: Conceptual guides 18 | sections: 19 | - local: conceptual_guides/intro_agents 20 | title: 🤖 Agentic सिस्टम का परिचय 21 | - local: conceptual_guides/react 22 | title: 🤔 मल्टी-स्टेप एजेंट कैसे काम करते हैं? 23 | - title: Examples 24 | sections: 25 | - local: examples/text_to_sql 26 | title: सेल्फ करेक्टिंग Text-to-SQL 27 | - local: examples/rag 28 | title: एजेंटिक RAG के साथ अपनी ज्ञान आधारित को मास्टर करें 29 | - local: examples/multiagents 30 | title: एक बहु-एजेंट प्रणाली का आयोजन करें 31 | - title: Reference 32 | sections: 33 | - local: reference/agents 34 | title: एजेंट से संबंधित ऑब्जेक्ट्स 35 | - local: reference/tools 36 | title: टूल्स से संबंधित ऑब्जेक्ट्स 37 | -------------------------------------------------------------------------------- /docs/source/zh/_toctree.yml: -------------------------------------------------------------------------------- 1 | - title: 起步 2 | sections: 3 | - local: index 4 | title: 🤗 Agents 5 | - local: guided_tour 6 | title: 导览 7 | - title: Tutorials 8 | sections: 9 | - local: tutorials/building_good_agents 10 | title: ✨ 构建好用的 agents 11 | - local: tutorials/inspect_runs 12 | title: 📊 监控 Agent 的运行 13 | - local: tutorials/tools 14 | title: 🛠️ 工具 - 深度指南 15 | - local: tutorials/secure_code_execution 16 | title: 🛡️ 使用 E2B 保护你的代码执行 17 | - local: tutorials/memory 18 | title: 📚 管理 Agent 的记忆 19 | - title: Conceptual guides 20 | sections: 21 | - local: conceptual_guides/intro_agents 22 | title: 🤖 Agent 化系统介绍 23 | - local: conceptual_guides/react 24 | title: 🤔 多步骤 Agent 是如何工作的? 25 | - title: Examples 26 | sections: 27 | - local: examples/text_to_sql 28 | title: 自我修正 Text-to-SQL 29 | - local: examples/rag 30 | title: 借助 agentic RAG 掌控知识库 31 | - local: examples/multiagents 32 | title: 编排 multi-agent 系统 33 | - local: examples/web_browser 34 | title: 基于视觉模型构建能够浏览网页的agent 35 | - title: Reference 36 | sections: 37 | - local: reference/agents 38 | title: Agent-related objects 39 | - local: reference/models 40 | title: Model-related objects 41 | - local: reference/tools 42 | title: Tool-related objects 43 | -------------------------------------------------------------------------------- /docs/source/en/_toctree.yml: -------------------------------------------------------------------------------- 1 | - title: Get started 2 | sections: 3 | - local: index 4 | title: 🤗 Agents 5 | - local: guided_tour 6 | title: Guided tour 7 | - title: Tutorials 8 | sections: 9 | - local: tutorials/building_good_agents 10 | title: ✨ Building good agents 11 | - local: tutorials/inspect_runs 12 | title: 📊 Inspect your agent runs using telemetry 13 | - local: tutorials/tools 14 | title: 🛠️ Tools - in-depth guide 15 | - local: tutorials/secure_code_execution 16 | title: 🛡️ Secure code execution 17 | - local: tutorials/memory 18 | title: 📚 Manage your agent's memory 19 | - title: Conceptual guides 20 | sections: 21 | - local: conceptual_guides/intro_agents 22 | title: 🤖 An introduction to agentic systems 23 | - local: conceptual_guides/react 24 | title: 🤔 How do Multi-step agents work? 25 | - title: Examples 26 | sections: 27 | - local: examples/text_to_sql 28 | title: Self-correcting Text-to-SQL 29 | - local: examples/rag 30 | title: Master you knowledge base with agentic RAG 31 | - local: examples/multiagents 32 | title: Orchestrate a multi-agent system 33 | - local: examples/web_browser 34 | title: Build a web browser agent using vision models 35 | - title: Reference 36 | sections: 37 | - local: reference/agents 38 | title: Agent-related objects 39 | - local: reference/models 40 | title: Model-related objects 41 | - local: reference/tools 42 | title: Tool-related objects 43 | -------------------------------------------------------------------------------- /docs/source/zh/conceptual_guides/react.mdx: -------------------------------------------------------------------------------- 1 | 16 | # 多步骤 agent 是如何工作的? 17 | 18 | ReAct 框架([Yao et al., 2022](https://huggingface.co/papers/2210.03629))是目前构建 agent 的主要方法。 19 | 20 | 该名称基于两个词的组合:"Reason" (推理)和 "Act" (行动)。实际上,遵循此架构的 agent 将根据需要尽可能多的步骤来解决其任务,每个步骤包括一个推理步骤,然后是一个行动步骤,在该步骤中,它制定工具调用,使其更接近解决手头的任务。 21 | 22 | ReAct 过程涉及保留过去步骤的记忆。 23 | 24 | > [!TIP] 25 | > 阅读 [Open-source LLMs as LangChain Agents](https://huggingface.co/blog/open-source-llms-as-agents) 博客文章以了解更多关于多步 agent 的信息。 26 | 27 | 以下是其工作原理的视频概述: 28 | 29 |
30 | 34 | 38 |
39 | 40 | ![ReAct agent 的框架](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/blog/open-source-llms-as-agents/ReAct.png) 41 | 42 | 我们实现了两个版本的 ToolCallingAgent: 43 | - [`ToolCallingAgent`] 在其输出中生成 JSON 格式的工具调用。 44 | - [`CodeAgent`] 是一种新型的 ToolCallingAgent,它生成代码块形式的工具调用,这对于具有强大编码性能的 LLM 非常有效。 45 | -------------------------------------------------------------------------------- /examples/open_deep_research/README.md: -------------------------------------------------------------------------------- 1 | # Open Deep Research 2 | 3 | Welcome to this open replication of [OpenAI's Deep Research](https://openai.com/index/introducing-deep-research/)! This agent attempts to replicate OpenAI's model and achieve similar performance on research tasks. 4 | 5 | Read more about this implementation's goal and methods in our [blog post](https://huggingface.co/blog/open-deep-research). 6 | 7 | 8 | This agent achieves **55% pass@1** on the GAIA validation set, compared to **67%** for the original Deep Research. 9 | 10 | ## Setup 11 | 12 | To get started, follow the steps below: 13 | 14 | ### Clone the repository 15 | 16 | ```bash 17 | git clone https://github.com/huggingface/smolagents.git 18 | cd smolagents/examples/open_deep_research 19 | ``` 20 | 21 | ### Install dependencies 22 | 23 | Run the following command to install the required dependencies from the `requirements.txt` file: 24 | 25 | ```bash 26 | pip install -r requirements.txt 27 | ``` 28 | 29 | ### Install the development version of `smolagents` 30 | 31 | ```bash 32 | pip install -e ../../.[dev] 33 | ``` 34 | 35 | ### Set up environment variables 36 | 37 | The agent uses the `GoogleSearchTool` for web search, which requires an environment variable with the corresponding API key, based on the selected provider: 38 | - `SERPAPI_API_KEY` for SerpApi: [Sign up here to get a key](https://serpapi.com/users/sign_up) 39 | - `SERPER_API_KEY` for Serper: [Sign up here to get a key](https://serper.dev/signup) 40 | 41 | Depending on the model you want to use, you may need to set environment variables. 42 | For example, to use the default `o1` model, you need to set the `OPENAI_API_KEY` environment variable. 43 | [Sign up here to get a key](https://platform.openai.com/signup). 44 | 45 | > [!WARNING] 46 | > The use of the default `o1` model is restricted to tier-3 access: https://help.openai.com/en/articles/10362446-api-access-to-o1-and-o3-mini 47 | 48 | 49 | ## Usage 50 | 51 | Then you're good to go! Run the run.py script, as in: 52 | ```bash 53 | python run.py --model-id "o1" "Your question here!" 54 | ``` -------------------------------------------------------------------------------- /docs/source/zh/reference/agents.mdx: -------------------------------------------------------------------------------- 1 | 2 | 17 | # Agents(智能体) 18 | 19 | 20 | 21 | Smolagents 是一个实验性的 API,可能会随时发生变化。由于 API 或底层模型可能发生变化,代理返回的结果也可能有所不同。 22 | 23 | 24 | 25 | 要了解有关智能体和工具的更多信息,请务必阅读[入门指南](../index)。本页面包含基础类的 API 文档。 26 | 27 | ## 智能体(Agents) 28 | 29 | 我们的智能体继承自 [`MultiStepAgent`],这意味着它们可以执行多步操作,每一步包含一个思考(thought),然后是一个工具调用和执行。请阅读[概念指南](../conceptual_guides/react)以了解更多信息。 30 | 31 | 我们提供两种类型的代理,它们基于主要的 [`Agent`] 类: 32 | - [`CodeAgent`] 是默认代理,它以 Python 代码编写工具调用。 33 | - [`ToolCallingAgent`] 以 JSON 编写工具调用。 34 | 35 | 两者在初始化时都需要提供参数 `model` 和工具列表 `tools`。 36 | 37 | ### 智能体类 38 | 39 | [[autodoc]] MultiStepAgent 40 | 41 | [[autodoc]] CodeAgent 42 | 43 | [[autodoc]] ToolCallingAgent 44 | 45 | ### ManagedAgent 46 | 47 | _此类自 1.8.0 起已被弃用:现在您只需向普通代理传递 `name` 和 `description` 属性即可使其可被管理代理调用。_ 48 | 49 | ### stream_to_gradio 50 | 51 | [[autodoc]] stream_to_gradio 52 | 53 | ### GradioUI 54 | 55 | > [!TIP] 56 | > 您必须安装 `gradio` 才能使用 UI。如果尚未安装,请运行 `pip install smolagents[gradio]`。 57 | 58 | [[autodoc]] GradioUI 59 | 60 | ## 提示(Prompts) 61 | 62 | [[autodoc]] smolagents.agents.PromptTemplates 63 | 64 | [[autodoc]] smolagents.agents.PlanningPromptTemplate 65 | 66 | [[autodoc]] smolagents.agents.ManagedAgentPromptTemplate 67 | 68 | [[autodoc]] smolagents.agents.FinalAnswerPromptTemplate 69 | -------------------------------------------------------------------------------- /tests/test_final_answer.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2024 HuggingFace Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | import numpy as np 18 | import PIL.Image 19 | import pytest 20 | 21 | from smolagents.agent_types import _AGENT_TYPE_MAPPING 22 | from smolagents.default_tools import FinalAnswerTool 23 | 24 | from .test_tools import ToolTesterMixin 25 | from .utils.markers import require_torch 26 | 27 | 28 | class TestFinalAnswerTool(ToolTesterMixin): 29 | def setup_method(self): 30 | self.inputs = {"answer": "Final answer"} 31 | self.tool = FinalAnswerTool() 32 | 33 | def test_exact_match_arg(self): 34 | result = self.tool("Final answer") 35 | assert result == "Final answer" 36 | 37 | def test_exact_match_kwarg(self): 38 | result = self.tool(answer=self.inputs["answer"]) 39 | assert result == "Final answer" 40 | 41 | @require_torch 42 | def test_agent_type_output(self, inputs): 43 | for input_type, input in inputs.items(): 44 | output = self.tool(**input, sanitize_inputs_outputs=True) 45 | agent_type = _AGENT_TYPE_MAPPING[input_type] 46 | assert isinstance(output, agent_type) 47 | 48 | @pytest.fixture 49 | def inputs(self, shared_datadir): 50 | import torch 51 | 52 | return { 53 | "string": {"answer": "Text input"}, 54 | "image": {"answer": PIL.Image.open(shared_datadir / "000000039769.png").resize((512, 512))}, 55 | "audio": {"answer": torch.Tensor(np.ones(3000))}, 56 | } 57 | -------------------------------------------------------------------------------- /utils/check_tests_in_ci.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2025-present, the HuggingFace Inc. team. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """Check that all tests are called in CI.""" 16 | 17 | from pathlib import Path 18 | 19 | 20 | ROOT = Path(__file__).parent.parent 21 | 22 | TESTS_FOLDER = ROOT / "tests" 23 | CI_WORKFLOW_FILE = ROOT / ".github" / "workflows" / "tests.yml" 24 | 25 | 26 | def check_tests_in_ci(): 27 | """List all test files in `./tests/` and check if they are listed in the CI workflow. 28 | 29 | Since each test file is triggered separately in the CI workflow, it is easy to forget a new one when adding new 30 | tests, hence this check. 31 | 32 | NOTE: current implementation is quite naive but should work for now. Must be updated if one want to ignore some 33 | tests or if file naming is updated (currently only files starting by `test_*` are checked) 34 | """ 35 | test_files = [ 36 | path.relative_to(TESTS_FOLDER).as_posix() 37 | for path in TESTS_FOLDER.glob("**/*.py") 38 | if path.name.startswith("test_") 39 | ] 40 | ci_workflow_file_content = CI_WORKFLOW_FILE.read_text() 41 | missing_test_files = [test_file for test_file in test_files if test_file not in ci_workflow_file_content] 42 | if missing_test_files: 43 | print( 44 | "❌ Some test files seem to be ignored in the CI:\n" 45 | + "\n".join(f" - {test_file}" for test_file in missing_test_files) 46 | + f"\n Please add them manually in {CI_WORKFLOW_FILE}." 47 | ) 48 | exit(1) 49 | else: 50 | print("✅ All good!") 51 | exit(0) 52 | 53 | 54 | if __name__ == "__main__": 55 | check_tests_in_ci() 56 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Logging 2 | logs 3 | tmp 4 | wandb 5 | 6 | # Data 7 | data 8 | outputs 9 | data/ 10 | 11 | # Apple 12 | .DS_Store 13 | 14 | # VS Code 15 | .vscode 16 | 17 | # Byte-compiled / optimized / DLL files 18 | __pycache__/ 19 | *.py[cod] 20 | *$py.class 21 | 22 | # C extensions 23 | *.so 24 | 25 | # Distribution / packaging 26 | .Python 27 | build/ 28 | develop-eggs/ 29 | dist/ 30 | downloads/ 31 | eggs/ 32 | .eggs/ 33 | lib/ 34 | lib64/ 35 | parts/ 36 | sdist/ 37 | var/ 38 | wheels/ 39 | share/python-wheels/ 40 | node_modules/ 41 | *.egg-info/ 42 | .installed.cfg 43 | *.egg 44 | MANIFEST 45 | 46 | # PyInstaller 47 | *.manifest 48 | *.spec 49 | 50 | # Installer logs 51 | pip-log.txt 52 | pip-delete-this-directory.txt 53 | 54 | # Unit test / coverage reports 55 | htmlcov/ 56 | .tox/ 57 | .nox/ 58 | .coverage 59 | .coverage.* 60 | .cache 61 | nosetests.xml 62 | coverage.xml 63 | *.cover 64 | *.py,cover 65 | .hypothesis/ 66 | .pytest_cache/ 67 | cover/ 68 | uv.lock 69 | 70 | # Translations 71 | *.mo 72 | *.pot 73 | 74 | # Sphinx documentation 75 | docs/_build/ 76 | 77 | # PyBuilder 78 | .pybuilder/ 79 | target/ 80 | 81 | # Jupyter Notebook 82 | .ipynb_checkpoints 83 | 84 | # IPython 85 | profile_default/ 86 | ipython_config.py 87 | 88 | # pyenv 89 | # .python-version 90 | 91 | # pipenv 92 | #Pipfile.lock 93 | 94 | # UV 95 | #uv.lock 96 | 97 | # poetry 98 | #poetry.lock 99 | 100 | # pdm 101 | .pdm.toml 102 | .pdm-python 103 | .pdm-build/ 104 | 105 | # PEP 582 106 | __pypackages__/ 107 | 108 | # Celery stuff 109 | celerybeat-schedule 110 | celerybeat.pid 111 | 112 | # SageMath parsed files 113 | *.sage.py 114 | 115 | # Environments 116 | .env 117 | .venv 118 | env/ 119 | venv/ 120 | ENV/ 121 | env.bak/ 122 | venv.bak/ 123 | 124 | 125 | # mkdocs documentation 126 | /site 127 | 128 | # mypy 129 | .mypy_cache/ 130 | .dmypy.json 131 | dmypy.json 132 | 133 | # Pyre type checker 134 | .pyre/ 135 | 136 | # pytype static type analyzer 137 | .pytype/ 138 | 139 | # Cython debug symbols 140 | cython_debug/ 141 | 142 | # PyCharm 143 | .idea/ 144 | 145 | # Interpreter 146 | interpreter_workspace/ 147 | 148 | # Archive 149 | archive/ 150 | savedir/ 151 | output/ 152 | tool_output/ 153 | -------------------------------------------------------------------------------- /examples/rag.py: -------------------------------------------------------------------------------- 1 | # from huggingface_hub import login 2 | 3 | # login() 4 | import datasets 5 | from langchain.docstore.document import Document 6 | from langchain.text_splitter import RecursiveCharacterTextSplitter 7 | from langchain_community.retrievers import BM25Retriever 8 | 9 | 10 | knowledge_base = datasets.load_dataset("m-ric/huggingface_doc", split="train") 11 | knowledge_base = knowledge_base.filter(lambda row: row["source"].startswith("huggingface/transformers")) 12 | 13 | source_docs = [ 14 | Document(page_content=doc["text"], metadata={"source": doc["source"].split("/")[1]}) for doc in knowledge_base 15 | ] 16 | 17 | text_splitter = RecursiveCharacterTextSplitter( 18 | chunk_size=500, 19 | chunk_overlap=50, 20 | add_start_index=True, 21 | strip_whitespace=True, 22 | separators=["\n\n", "\n", ".", " ", ""], 23 | ) 24 | docs_processed = text_splitter.split_documents(source_docs) 25 | 26 | from smolagents import Tool 27 | 28 | 29 | class RetrieverTool(Tool): 30 | name = "retriever" 31 | description = "Uses semantic search to retrieve the parts of transformers documentation that could be most relevant to answer your query." 32 | inputs = { 33 | "query": { 34 | "type": "string", 35 | "description": "The query to perform. This should be semantically close to your target documents. Use the affirmative form rather than a question.", 36 | } 37 | } 38 | output_type = "string" 39 | 40 | def __init__(self, docs, **kwargs): 41 | super().__init__(**kwargs) 42 | self.retriever = BM25Retriever.from_documents(docs, k=10) 43 | 44 | def forward(self, query: str) -> str: 45 | assert isinstance(query, str), "Your search query must be a string" 46 | 47 | docs = self.retriever.invoke( 48 | query, 49 | ) 50 | return "\nRetrieved documents:\n" + "".join( 51 | [f"\n\n===== Document {str(i)} =====\n" + doc.page_content for i, doc in enumerate(docs)] 52 | ) 53 | 54 | 55 | from smolagents import CodeAgent, HfApiModel 56 | 57 | 58 | retriever_tool = RetrieverTool(docs_processed) 59 | agent = CodeAgent( 60 | tools=[retriever_tool], 61 | model=HfApiModel(model_id="meta-llama/Llama-3.3-70B-Instruct"), 62 | max_steps=4, 63 | verbosity_level=2, 64 | ) 65 | 66 | agent_output = agent.run("For a transformers model training, which is slower, the forward or the backward pass?") 67 | 68 | print("Final output:") 69 | print(agent_output) 70 | -------------------------------------------------------------------------------- /examples/text_to_sql.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy import ( 2 | Column, 3 | Float, 4 | Integer, 5 | MetaData, 6 | String, 7 | Table, 8 | create_engine, 9 | insert, 10 | inspect, 11 | text, 12 | ) 13 | 14 | 15 | engine = create_engine("sqlite:///:memory:") 16 | metadata_obj = MetaData() 17 | 18 | # create city SQL table 19 | table_name = "receipts" 20 | receipts = Table( 21 | table_name, 22 | metadata_obj, 23 | Column("receipt_id", Integer, primary_key=True), 24 | Column("customer_name", String(16), primary_key=True), 25 | Column("price", Float), 26 | Column("tip", Float), 27 | ) 28 | metadata_obj.create_all(engine) 29 | 30 | rows = [ 31 | {"receipt_id": 1, "customer_name": "Alan Payne", "price": 12.06, "tip": 1.20}, 32 | {"receipt_id": 2, "customer_name": "Alex Mason", "price": 23.86, "tip": 0.24}, 33 | {"receipt_id": 3, "customer_name": "Woodrow Wilson", "price": 53.43, "tip": 5.43}, 34 | {"receipt_id": 4, "customer_name": "Margaret James", "price": 21.11, "tip": 1.00}, 35 | ] 36 | for row in rows: 37 | stmt = insert(receipts).values(**row) 38 | with engine.begin() as connection: 39 | cursor = connection.execute(stmt) 40 | 41 | inspector = inspect(engine) 42 | columns_info = [(col["name"], col["type"]) for col in inspector.get_columns("receipts")] 43 | 44 | table_description = "Columns:\n" + "\n".join([f" - {name}: {col_type}" for name, col_type in columns_info]) 45 | print(table_description) 46 | 47 | from smolagents import tool 48 | 49 | 50 | @tool 51 | def sql_engine(query: str) -> str: 52 | """ 53 | Allows you to perform SQL queries on the table. Returns a string representation of the result. 54 | The table is named 'receipts'. Its description is as follows: 55 | Columns: 56 | - receipt_id: INTEGER 57 | - customer_name: VARCHAR(16) 58 | - price: FLOAT 59 | - tip: FLOAT 60 | 61 | Args: 62 | query: The query to perform. This should be correct SQL. 63 | """ 64 | output = "" 65 | with engine.connect() as con: 66 | rows = con.execute(text(query)) 67 | for row in rows: 68 | output += "\n" + str(row) 69 | return output 70 | 71 | 72 | from smolagents import CodeAgent, HfApiModel 73 | 74 | 75 | agent = CodeAgent( 76 | tools=[sql_engine], 77 | model=HfApiModel(model_id="meta-llama/Meta-Llama-3.1-8B-Instruct"), 78 | ) 79 | agent.run("Can you give me the name of the client who got the most expensive receipt?") 80 | -------------------------------------------------------------------------------- /tests/fixtures/tools.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | import pytest 4 | 5 | from smolagents.tools import Tool, tool 6 | 7 | 8 | @pytest.fixture 9 | def boolean_default_tool_class(): 10 | class BooleanDefaultTool(Tool): 11 | name = "boolean_default_tool" 12 | description = "A tool with a boolean default parameter" 13 | inputs = { 14 | "text": {"type": "string", "description": "Input text"}, 15 | "flag": {"type": "boolean", "description": "Boolean flag with default value", "nullable": True}, 16 | } 17 | output_type = "string" 18 | 19 | def forward(self, text: str, flag: bool = False) -> str: 20 | return f"Text: {text}, Flag: {flag}" 21 | 22 | return BooleanDefaultTool() 23 | 24 | 25 | @pytest.fixture 26 | def boolean_default_tool_function(): 27 | @tool 28 | def boolean_default_tool(text: str, flag: bool = False) -> str: 29 | """ 30 | A tool with a boolean default parameter. 31 | 32 | Args: 33 | text: Input text 34 | flag: Boolean flag with default value 35 | """ 36 | return f"Text: {text}, Flag: {flag}" 37 | 38 | return boolean_default_tool 39 | 40 | 41 | @pytest.fixture 42 | def optional_input_tool_class(): 43 | class OptionalInputTool(Tool): 44 | name = "optional_input_tool" 45 | description = "A tool with an optional input parameter" 46 | inputs = { 47 | "required_text": {"type": "string", "description": "Required input text"}, 48 | "optional_text": {"type": "string", "description": "Optional input text", "nullable": True}, 49 | } 50 | output_type = "string" 51 | 52 | def forward(self, required_text: str, optional_text: Optional[str] = None) -> str: 53 | if optional_text: 54 | return f"{required_text} + {optional_text}" 55 | return required_text 56 | 57 | return OptionalInputTool() 58 | 59 | 60 | @pytest.fixture 61 | def optional_input_tool_function(): 62 | @tool 63 | def optional_input_tool(required_text: str, optional_text: Optional[str] = None) -> str: 64 | """ 65 | A tool with an optional input parameter. 66 | 67 | Args: 68 | required_text: Required input text 69 | optional_text: Optional input text 70 | """ 71 | if optional_text: 72 | return f"{required_text} + {optional_text}" 73 | return required_text 74 | 75 | return optional_input_tool 76 | -------------------------------------------------------------------------------- /examples/agent_from_any_llm.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from smolagents import HfApiModel, LiteLLMModel, OpenAIServerModel, TransformersModel, tool 4 | from smolagents.agents import CodeAgent, ToolCallingAgent 5 | 6 | 7 | # Choose which inference type to use! 8 | 9 | available_inferences = ["hf_api", "hf_api_provider", "transformers", "ollama", "litellm", "openai"] 10 | chosen_inference = "hf_api_provider" 11 | 12 | print(f"Chose model: '{chosen_inference}'") 13 | 14 | if chosen_inference == "hf_api": 15 | model = HfApiModel(model_id="meta-llama/Llama-3.3-70B-Instruct") 16 | 17 | elif chosen_inference == "hf_api_provider": 18 | model = HfApiModel(provider="together") 19 | 20 | elif chosen_inference == "transformers": 21 | model = TransformersModel(model_id="HuggingFaceTB/SmolLM2-1.7B-Instruct", device_map="auto", max_new_tokens=1000) 22 | 23 | elif chosen_inference == "ollama": 24 | model = LiteLLMModel( 25 | model_id="ollama_chat/llama3.2", 26 | api_base="http://localhost:11434", # replace with remote open-ai compatible server if necessary 27 | api_key="your-api-key", # replace with API key if necessary 28 | num_ctx=8192, # ollama default is 2048 which will often fail horribly. 8192 works for easy tasks, more is better. Check https://huggingface.co/spaces/NyxKrage/LLM-Model-VRAM-Calculator to calculate how much VRAM this will need for the selected model. 29 | ) 30 | 31 | elif chosen_inference == "litellm": 32 | # For anthropic: change model_id below to 'anthropic/claude-3-5-sonnet-latest' 33 | model = LiteLLMModel(model_id="gpt-4o") 34 | 35 | elif chosen_inference == "openai": 36 | # For anthropic: change model_id below to 'anthropic/claude-3-5-sonnet-latest' 37 | model = OpenAIServerModel(model_id="gpt-4o") 38 | 39 | 40 | @tool 41 | def get_weather(location: str, celsius: Optional[bool] = False) -> str: 42 | """ 43 | Get weather in the next days at given location. 44 | Secretly this tool does not care about the location, it hates the weather everywhere. 45 | 46 | Args: 47 | location: the location 48 | celsius: the temperature 49 | """ 50 | return "The weather is UNGODLY with torrential rains and temperatures below -10°C" 51 | 52 | 53 | agent = ToolCallingAgent(tools=[get_weather], model=model, verbosity_level=2) 54 | 55 | print("ToolCallingAgent:", agent.run("What's the weather like in Paris?")) 56 | 57 | agent = CodeAgent(tools=[get_weather], model=model, verbosity_level=2) 58 | 59 | print("CodeAgent:", agent.run("What's the weather like in Paris?")) 60 | -------------------------------------------------------------------------------- /docs/source/hi/conceptual_guides/react.mdx: -------------------------------------------------------------------------------- 1 | 16 | # मल्टी-स्टेप एजेंट्स कैसे काम करते हैं? 17 | 18 | ReAct फ्रेमवर्क ([Yao et al., 2022](https://huggingface.co/papers/2210.03629)) वर्तमान में एजेंट्स बनाने का मुख्य दृष्टिकोण है। 19 | 20 | नाम दो शब्दों, "Reason" (तर्क) और "Act" (क्रिया) के संयोजन पर आधारित है। वास्तव में, इस आर्किटेक्चर का पालन करने वाले एजेंट अपने कार्य को उतने चरणों में हल करेंगे जितने आवश्यक हों, प्रत्येक चरण में एक Reasoning कदम होगा, फिर एक Action कदम होगा, जहाँ यह टूल कॉल्स तैयार करेगा जो उसे कार्य को हल करने के करीब ले जाएंगे। 21 | 22 | ReAct प्रक्रिया में पिछले चरणों की मेमोरी रखना शामिल है। 23 | 24 | > [!TIP] 25 | > मल्टी-स्टेप एजेंट्स के बारे में अधिक जानने के लिए [Open-source LLMs as LangChain Agents](https://huggingface.co/blog/open-source-llms-as-agents) ब्लॉग पोस्ट पढ़ें। 26 | 27 | यहाँ एक वीडियो ओवरव्यू है कि यह कैसे काम करता है: 28 | 29 |
30 | 34 | 38 |
39 | 40 | ![ReAct एजेंट का फ्रेमवर्क](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/blog/open-source-llms-as-agents/ReAct.png) 41 | 42 | हम दो प्रकार के ToolCallingAgent को लागू करते हैं: 43 | - [`ToolCallingAgent`] अपने आउटपुट में टूल कॉल को JSON के रूप में जनरेट करता है। 44 | - [`CodeAgent`] ToolCallingAgent का एक नया प्रकार है जो अपने टूल कॉल को कोड के ब्लॉब्स के रूप में जनरेट करता है, जो उन LLM के लिए वास्तव में अच्छी तरह काम करता है जिनका कोडिंग प्रदर्शन मजबूत है। 45 | -------------------------------------------------------------------------------- /docs/source/zh/reference/tools.mdx: -------------------------------------------------------------------------------- 1 | 2 | 17 | # 工具 18 | 19 | 20 | 21 | Smolagents 是一个实验性 API,可能会随时更改。由于 API 或底层模型可能发生变化,代理返回的结果可能会有所不同。 22 | 23 | 24 | 25 | 要了解更多关于智能体和工具的信息,请务必阅读[入门指南](../index)。本页面包含底层类的 API 文档。 26 | 27 | ## 工具 28 | 29 | ### load_tool 30 | 31 | [[autodoc]] load_tool 32 | 33 | ### tool 34 | 35 | [[autodoc]] tool 36 | 37 | ### Tool 38 | 39 | [[autodoc]] Tool 40 | 41 | ### launch_gradio_demo 42 | 43 | [[autodoc]] launch_gradio_demo 44 | 45 | ## 默认工具 46 | 47 | ### PythonInterpreterTool 48 | 49 | [[autodoc]] PythonInterpreterTool 50 | 51 | ### FinalAnswerTool 52 | 53 | [[autodoc]] FinalAnswerTool 54 | 55 | ### UserInputTool 56 | 57 | [[autodoc]] UserInputTool 58 | 59 | ### DuckDuckGoSearchTool 60 | 61 | [[autodoc]] DuckDuckGoSearchTool 62 | 63 | ### GoogleSearchTool 64 | 65 | [[autodoc]] GoogleSearchTool 66 | 67 | ### VisitWebpageTool 68 | 69 | [[autodoc]] VisitWebpageTool 70 | 71 | ### SpeechToTextTool 72 | 73 | [[autodoc]] SpeechToTextTool 74 | 75 | ## 工具集合 76 | 77 | [[autodoc]] ToolCollection 78 | 79 | ## 智能体类型 80 | 81 | 智能体可以处理工具之间的任何类型的对象;工具是完全多模态的,可以接受和返回文本、图像、音频、视频以及其他类型的对象。为了增加工具之间的兼容性,以及正确呈现在 ipython(jupyter、colab、ipython notebooks 等)中的返回结果,我们为这些类型实现了包装类。 82 | 83 | 被包装的对象应该继续保持其初始行为;例如,一个文本对象应继续表现为字符串,一个图像对象应继续表现为 `PIL.Image`。 84 | 85 | 这些类型有三个特定的用途: 86 | 87 | - 调用 `to_raw` 方法时,应返回底层对象 88 | - 调用 `to_string` 方法时,应将对象转换为字符串:对于 `AgentText` 类型,可以直接返回字符串;对于其他实例,则返回对象序列化版本的路径 89 | - 在 ipython 内核中显示时,应正确显示对象 90 | 91 | ### AgentText 92 | 93 | [[autodoc]] smolagents.agent_types.AgentText 94 | 95 | ### AgentImage 96 | 97 | [[autodoc]] smolagents.agent_types.AgentImage 98 | 99 | ### AgentAudio 100 | 101 | [[autodoc]] smolagents.agent_types.AgentAudio 102 | -------------------------------------------------------------------------------- /docs/source/en/reference/agents.mdx: -------------------------------------------------------------------------------- 1 | 16 | # Agents 17 | 18 | 19 | 20 | Smolagents is an experimental API which is subject to change at any time. Results returned by the agents 21 | can vary as the APIs or underlying models are prone to change. 22 | 23 | 24 | 25 | To learn more about agents and tools make sure to read the [introductory guide](../index). This page 26 | contains the API docs for the underlying classes. 27 | 28 | ## Agents 29 | 30 | Our agents inherit from [`MultiStepAgent`], which means they can act in multiple steps, each step consisting of one thought, then one tool call and execution. Read more in [this conceptual guide](../conceptual_guides/react). 31 | 32 | We provide two types of agents, based on the main [`Agent`] class. 33 | - [`CodeAgent`] is the default agent, it writes its tool calls in Python code. 34 | - [`ToolCallingAgent`] writes its tool calls in JSON. 35 | 36 | Both require arguments `model` and list of tools `tools` at initialization. 37 | 38 | ### Classes of agents 39 | 40 | [[autodoc]] MultiStepAgent 41 | 42 | [[autodoc]] CodeAgent 43 | 44 | [[autodoc]] ToolCallingAgent 45 | 46 | ### ManagedAgent 47 | 48 | _This class is deprecated since 1.8.0: now you simply need to pass attributes `name` and `description` to a normal agent to make it callable by a manager agent._ 49 | 50 | ### stream_to_gradio 51 | 52 | [[autodoc]] stream_to_gradio 53 | 54 | ### GradioUI 55 | 56 | > [!TIP] 57 | > You must have `gradio` installed to use the UI. Please run `pip install smolagents[gradio]` if it's not the case. 58 | 59 | [[autodoc]] GradioUI 60 | 61 | ## Prompts 62 | 63 | [[autodoc]] smolagents.agents.PromptTemplates 64 | 65 | [[autodoc]] smolagents.agents.PlanningPromptTemplate 66 | 67 | [[autodoc]] smolagents.agents.ManagedAgentPromptTemplate 68 | 69 | [[autodoc]] smolagents.agents.FinalAnswerPromptTemplate 70 | -------------------------------------------------------------------------------- /docs/source/zh/index.mdx: -------------------------------------------------------------------------------- 1 | 15 | 16 | # `smolagents` 17 | 18 | 这是构建强大 agent 的最简单框架!顺便问一下,什么是 "agent"?我们在[此页面](conceptual_guides/intro_agents)提供了我们的定义,您还可以找到关于何时使用或不使用它们的建议(剧透:通常不使用 agent 会更好)。 19 | 20 | > [!TIP] 21 | > 译者注:Agent 的业内术语是“智能体”。本译文将保留 agent,不作翻译,以带来更高效的阅读体验。(在中文为主的文章中,It's easier to 注意到英文。Attention Is All You Need!) 22 | 23 | 本库提供: 24 | 25 | ✨ **简洁性**:Agent 逻辑仅需约千行代码。我们将抽象保持在原始代码之上的最小形态! 26 | 27 | 🌐 **支持任何 LLM**:支持通过 Hub 托管的模型,使用其 `transformers` 版本或通过我们的推理 API 加载,也支持 OpenAI、Anthropic 等模型。使用任何 LLM 为 agent 提供动力都非常容易。 28 | 29 | 🧑‍💻 **一流的代码 agent 支持**,即编写代码作为其操作的 agent(与"用于编写代码的 agent"相对),[在此了解更多](tutorials/secure_code_execution)。 30 | 31 | 🤗 **Hub 集成**:您可以在 Hub 上共享和加载工具,更多功能即将推出! 32 | 33 |
34 |
35 |
导览
37 |

学习基础知识并熟悉使用 agent。如果您是第一次使用 agent,请从这里开始!

38 |
39 |
操作指南
41 |

实用指南,帮助您实现特定目标:创建一个生成和测试 SQL 查询的 agent!

42 |
43 |
概念指南
45 |

高级解释,帮助您更好地理解重要主题。

46 |
47 |
教程
49 |

涵盖构建 agent 重要方面的横向教程。

50 |
51 |
52 |
53 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "smolagents" 7 | version = "1.14.0.dev0" 8 | description = "🤗 smolagents: a barebones library for agents. Agents write python code to call tools or orchestrate other agents." 9 | authors = [ 10 | { name="Aymeric Roucher", email="aymeric@hf.co" }, 11 | ] 12 | readme = "README.md" 13 | requires-python = ">=3.10" 14 | dependencies = [ 15 | "huggingface-hub>=0.28.0", 16 | "requests>=2.32.3", 17 | "rich>=13.9.4", 18 | "jinja2>=3.1.4", 19 | "pillow>=11.0.0", 20 | "markdownify>=0.14.1", 21 | "duckduckgo-search>=6.3.7", 22 | "python-dotenv" 23 | ] 24 | 25 | [project.optional-dependencies] 26 | bedrock = [ 27 | "boto3>=1.36.18" 28 | ] 29 | torch = [ 30 | "torch", 31 | "torchvision", 32 | "numpy>=1.21.2", 33 | ] 34 | audio = [ 35 | "soundfile", 36 | "smolagents[torch]", 37 | ] 38 | docker = [ 39 | "docker>=7.1.0", 40 | "websocket-client", 41 | ] 42 | e2b = [ 43 | "e2b-code-interpreter>=1.0.3", 44 | "python-dotenv>=1.0.1", 45 | ] 46 | gradio = [ 47 | "gradio>=5.13.2", 48 | ] 49 | litellm = [ 50 | "litellm>=1.60.2", 51 | ] 52 | mcp = [ 53 | "mcpadapt>=0.0.15", 54 | "mcp", 55 | ] 56 | mlx-lm = [ 57 | "mlx-lm" 58 | ] 59 | openai = [ 60 | "openai>=1.58.1" 61 | ] 62 | telemetry = [ 63 | "arize-phoenix", 64 | "opentelemetry-sdk", 65 | "opentelemetry-exporter-otlp", 66 | "openinference-instrumentation-smolagents>=0.1.4" 67 | ] 68 | transformers = [ 69 | "accelerate", 70 | "transformers>=4.0.0", 71 | "smolagents[torch]", 72 | ] 73 | vision = [ 74 | "helium", 75 | "selenium", 76 | ] 77 | vllm = [ 78 | "vllm", 79 | "torch" 80 | ] 81 | all = [ 82 | "smolagents[audio,docker,e2b,gradio,litellm,mcp,mlx-lm,openai,telemetry,transformers,vision,bedrock]", 83 | ] 84 | quality = [ 85 | "ruff>=0.9.0", 86 | ] 87 | test = [ 88 | "ipython>=8.31.0", # for interactive environment tests 89 | "pandas>=2.2.3", 90 | "pytest>=8.1.0", 91 | "pytest-datadir", 92 | "python-dotenv>=1.0.1", # For test_all_docs 93 | "smolagents[all]", 94 | "rank-bm25", # For test_all_docs 95 | "Wikipedia-API>=0.8.1", 96 | ] 97 | dev = [ 98 | "smolagents[quality,test]", 99 | "sqlalchemy", # for ./examples 100 | ] 101 | 102 | [tool.pytest.ini_options] 103 | # Add the specified `OPTS` to the set of command line arguments as if they had been specified by the user. 104 | addopts = "-sv --durations=0" 105 | 106 | [tool.ruff] 107 | line-length = 119 108 | lint.ignore = [ 109 | "F403", # undefined-local-with-import-star 110 | "E501", # line-too-long 111 | ] 112 | lint.select = ["E", "F", "I", "W"] 113 | 114 | [tool.ruff.lint.per-file-ignores] 115 | "examples/*" = [ 116 | "E402", # module-import-not-at-top-of-file 117 | ] 118 | 119 | [tool.ruff.lint.isort] 120 | known-first-party = ["smolagents"] 121 | lines-after-imports = 2 122 | 123 | [tool.setuptools.package-data] 124 | "smolagents.prompts" = ["*.yaml"] 125 | 126 | [project.scripts] 127 | smolagent = "smolagents.cli:main" 128 | webagent = "smolagents.vision_web_browser:main" 129 | -------------------------------------------------------------------------------- /docs/source/hi/reference/tools.mdx: -------------------------------------------------------------------------------- 1 | 16 | # Tools 17 | 18 | 19 | 20 | Smolagents एक experimental API है जो किसी भी समय बदल सकता है। एजेंट्स द्वारा लौटाए गए परिणाम भिन्न हो सकते हैं क्योंकि APIs या underlying मॉडल बदलने की संभावना रखते हैं। 21 | 22 | 23 | 24 | एजेंट्स और टूल्स के बारे में अधिक जानने के लिए [introductory guide](../index) पढ़ना सुनिश्चित करें। 25 | यह पेज underlying क्लासेज के लिए API docs को शामिल करता है। 26 | 27 | ## Tools 28 | 29 | ### load_tool 30 | 31 | [[autodoc]] load_tool 32 | 33 | ### tool 34 | 35 | [[autodoc]] tool 36 | 37 | ### Tool 38 | 39 | [[autodoc]] Tool 40 | 41 | ### launch_gradio_demo 42 | 43 | [[autodoc]] launch_gradio_demo 44 | 45 | ## Default Tools 46 | 47 | ### PythonInterpreterTool 48 | 49 | [[autodoc]] PythonInterpreterTool 50 | 51 | ### DuckDuckGoSearchTool 52 | 53 | [[autodoc]] DuckDuckGoSearchTool 54 | 55 | ### VisitWebpageTool 56 | 57 | [[autodoc]] VisitWebpageTool 58 | 59 | ### UserInputTool 60 | 61 | [[autodoc]] UserInputTool 62 | 63 | ## ToolCollection 64 | 65 | [[autodoc]] ToolCollection 66 | 67 | ## Agent टाइप्स 68 | 69 | एजेंट्स टूल्स के बीच किसी भी प्रकार की ऑब्जेक्ट को संभाल सकते हैं; टूल्स, पूरी तरह से मल्टीमोडल होने के कारण, टेक्स्ट, इमेज, ऑडियो, वीडियो सहित अन्य प्रकारों को स्वीकार और रिटर्न कर सकते हैं। 70 | टूल्स के बीच अनुकूलता बढ़ाने के साथ-साथ इन रिटर्न्स को ipython (jupyter, colab, ipython notebooks, ...) में सही ढंग से रेंडर करने के लिए, हम इन टाइप्स के आसपास रैपर क्लासेज को लागू करते हैं। 71 | 72 | रैप किए गए ऑब्जेक्ट्स को प्रारंभ में जैसा व्यवहार करना चाहिए वैसा ही करना जारी रखना चाहिए; एक टेक्स्ट ऑब्जेक्ट को अभी भी स्ट्रिंग की तरह व्यवहार करना चाहिए| 73 | एक इमेज ऑब्जेक्ट को अभी भी `PIL.Image` की तरह व्यवहार करना चाहिए। 74 | 75 | इन टाइप्स के तीन विशिष्ट उद्देश्य हैं: 76 | 77 | - टाइप पर `to_raw` को कॉल करने से अंतर्निहित ऑब्जेक्ट रिटर्न होना चाहिए 78 | - टाइप पर `to_string` को कॉल करने से ऑब्जेक्ट को स्ट्रिंग के रूप में रिटर्न होना चाहिए: वह `AgentText` के मामले में स्ट्रिंग हो सकती है लेकिन अन्य उदाहरणों में ऑब्जेक्ट के सीरियलाइज्ड वर्जन का पाथ होगा 79 | - इसे एक ipython kernel में प्रदर्शित करने पर ऑब्जेक्ट को सही ढंग से प्रदर्शित करना चाहिए 80 | 81 | ### AgentText 82 | 83 | [[autodoc]] smolagents.agent_types.AgentText 84 | 85 | ### AgentImage 86 | 87 | [[autodoc]] smolagents.agent_types.AgentImage 88 | 89 | ### AgentAudio 90 | 91 | [[autodoc]] smolagents.agent_types.AgentAudio 92 | -------------------------------------------------------------------------------- /docs/source/zh/tutorials/secure_code_execution.mdx: -------------------------------------------------------------------------------- 1 | 16 | # 安全代码执行 17 | 18 | [[open-in-colab]] 19 | 20 | > [!TIP] 21 | > 如果你是第一次构建 agent,请先阅读 [agent 介绍](../conceptual_guides/intro_agents) 和 [smolagents 导览](../guided_tour)。 22 | 23 | ### 代码智能体 24 | 25 | [多项](https://huggingface.co/papers/2402.01030) [研究](https://huggingface.co/papers/2411.01747) [表明](https://huggingface.co/papers/2401.00812),让大语言模型用代码编写其动作(工具调用)比当前标准的工具调用格式要好得多,目前行业标准是 "将动作写成包含工具名称和参数的 JSON" 的各种变体。 26 | 27 | 为什么代码更好?因为我们专门为计算机执行的动作而设计编程语言。如果 JSON 片段是更好的方式,那么这个工具包就应该是用 JSON 片段编写的,魔鬼就会嘲笑我们。 28 | 29 | 代码就是表达计算机动作的更好方式。它具有更好的: 30 | - **组合性**:你能像定义 Python 函数那样,在 JSON 动作中嵌套其他 JSON 动作,或者定义一组 JSON 动作以便以后重用吗? 31 | - **对象管理**:你如何在 JSON 中存储像 `generate_image` 这样的动作的输出? 32 | - **通用性**:代码是为了简单地表达任何可以让计算机做的事情而构建的。 33 | - **在 LLM 训练语料库中的表示**:天赐良机,为什么不利用已经包含在 LLM 训练语料库中的大量高质量动作呢? 34 | 35 | 下图展示了这一点,取自 [可执行代码动作引出更好的 LLM 智能体](https://huggingface.co/papers/2402.01030)。 36 | 37 | 38 | 39 | 这就是为什么我们强调提出代码智能体,在本例中是 Python 智能体,这意味着我们要在构建安全的 Python 解释器上投入更多精力。 40 | 41 | ### 本地 Python 解释器 42 | 43 | 默认情况下,`CodeAgent` 会在你的环境中运行 LLM 生成的代码。 44 | 这个执行不是由普通的 Python 解释器完成的:我们从零开始重新构建了一个更安全的 `LocalPythonExecutor`。 45 | 这个解释器通过以下方式设计以确保安全: 46 | - 将导入限制为用户显式传递的列表 47 | - 限制操作次数以防止无限循环和资源膨胀 48 | - 不会执行任何未预定义的操作 49 | 50 | 我们已经在许多用例中使用了这个解释器,从未观察到对环境造成任何损害。 51 | 52 | 然而,这个解决方案并不是万无一失的:可以想象,如果 LLM 被微调用于恶意操作,仍然可能损害你的环境。例如,如果你允许像 `Pillow` 这样无害的包处理图像,LLM 可能会生成数千张图像保存以膨胀你的硬盘。 53 | 如果你自己选择了 LLM 引擎,这当然不太可能,但它可能会发生。 54 | 55 | 所以如果你想格外谨慎,可以使用下面描述的远程代码执行选项。 56 | 57 | ### E2B 代码执行器 58 | 59 | 为了最大程度的安全性,你可以使用我们与 E2B 的集成在沙盒环境中运行代码。这是一个远程执行服务,可以在隔离的容器中运行你的代码,使代码无法影响你的本地环境。 60 | 61 | 为此,你需要设置你的 E2B 账户并在环境变量中设置 `E2B_API_KEY`。请前往 [E2B 快速入门文档](https://e2b.dev/docs/quickstart) 了解更多信息。 62 | 63 | 然后你可以通过 `pip install e2b-code-interpreter python-dotenv` 安装它。 64 | 65 | 现在你已经准备好了! 66 | 67 | 要将代码执行器设置为 E2B,只需在初始化 `CodeAgent` 时传递标志 `executor_type="e2b"`。 68 | 请注意,你应该将所有工具的依赖项添加到 `additional_authorized_imports` 中,以便执行器安装它们。 69 | 70 | ```py 71 | from smolagents import CodeAgent, VisitWebpageTool, HfApiModel 72 | agent = CodeAgent( 73 | tools = [VisitWebpageTool()], 74 | model=HfApiModel(), 75 | additional_authorized_imports=["requests", "markdownify"], 76 | executor_type="e2b" 77 | ) 78 | 79 | agent.run("What was Abraham Lincoln's preferred pet?") 80 | ``` 81 | 82 | 目前 E2B 代码执行暂不兼容多 agent——因为把 agent 调用放在应该在远程执行的代码块里,是非常混乱的。但我们正在努力做到这件事! 83 | -------------------------------------------------------------------------------- /docs/source/en/reference/tools.mdx: -------------------------------------------------------------------------------- 1 | 16 | # Tools 17 | 18 | 19 | 20 | Smolagents is an experimental API which is subject to change at any time. Results returned by the agents 21 | can vary as the APIs or underlying models are prone to change. 22 | 23 | 24 | 25 | To learn more about agents and tools make sure to read the [introductory guide](../index). This page 26 | contains the API docs for the underlying classes. 27 | 28 | ## Tools 29 | 30 | ### load_tool 31 | 32 | [[autodoc]] load_tool 33 | 34 | ### tool 35 | 36 | [[autodoc]] tool 37 | 38 | ### Tool 39 | 40 | [[autodoc]] Tool 41 | 42 | ### launch_gradio_demo 43 | 44 | [[autodoc]] launch_gradio_demo 45 | 46 | ## Default tools 47 | 48 | ### PythonInterpreterTool 49 | 50 | [[autodoc]] PythonInterpreterTool 51 | 52 | ### FinalAnswerTool 53 | 54 | [[autodoc]] FinalAnswerTool 55 | 56 | ### UserInputTool 57 | 58 | [[autodoc]] UserInputTool 59 | 60 | ### DuckDuckGoSearchTool 61 | 62 | [[autodoc]] DuckDuckGoSearchTool 63 | 64 | ### GoogleSearchTool 65 | 66 | [[autodoc]] GoogleSearchTool 67 | 68 | ### VisitWebpageTool 69 | 70 | [[autodoc]] VisitWebpageTool 71 | 72 | ### SpeechToTextTool 73 | 74 | [[autodoc]] SpeechToTextTool 75 | 76 | ## ToolCollection 77 | 78 | [[autodoc]] ToolCollection 79 | 80 | ## Agent Types 81 | 82 | Agents can handle any type of object in-between tools; tools, being completely multimodal, can accept and return 83 | text, image, audio, video, among other types. In order to increase compatibility between tools, as well as to 84 | correctly render these returns in ipython (jupyter, colab, ipython notebooks, ...), we implement wrapper classes 85 | around these types. 86 | 87 | The wrapped objects should continue behaving as initially; a text object should still behave as a string, an image 88 | object should still behave as a `PIL.Image`. 89 | 90 | These types have three specific purposes: 91 | 92 | - Calling `to_raw` on the type should return the underlying object 93 | - Calling `to_string` on the type should return the object as a string: that can be the string in case of an `AgentText` 94 | but will be the path of the serialized version of the object in other instances 95 | - Displaying it in an ipython kernel should display the object correctly 96 | 97 | ### AgentText 98 | 99 | [[autodoc]] smolagents.agent_types.AgentText 100 | 101 | ### AgentImage 102 | 103 | [[autodoc]] smolagents.agent_types.AgentImage 104 | 105 | ### AgentAudio 106 | 107 | [[autodoc]] smolagents.agent_types.AgentAudio 108 | -------------------------------------------------------------------------------- /docs/source/en/conceptual_guides/react.mdx: -------------------------------------------------------------------------------- 1 | 16 | # How do multi-step agents work? 17 | 18 | The ReAct framework ([Yao et al., 2022](https://huggingface.co/papers/2210.03629)) is currently the main approach to building agents. 19 | 20 | The name is based on the concatenation of two words, "Reason" and "Act." Indeed, agents following this architecture will solve their task in as many steps as needed, each step consisting of a Reasoning step, then an Action step where it formulates tool calls that will bring it closer to solving the task at hand. 21 | 22 | All agents in `smolagents` are based on singular `MultiStepAgent` class, which is an abstraction of ReAct framework. 23 | 24 | On a basic level, this class performs actions on a cycle of following steps, where existing variables and knowledge is incorporated into the agent logs like below: 25 | 26 | Initialization: the system prompt is stored in a `SystemPromptStep`, and the user query is logged into a `TaskStep` . 27 | 28 | While loop (ReAct loop): 29 | 30 | - Use `agent.write_memory_to_messages()` to write the agent logs into a list of LLM-readable [chat messages](https://huggingface.co/docs/transformers/en/chat_templating). 31 | - Send these messages to a `Model` object to get its completion. Parse the completion to get the action (a JSON blob for `ToolCallingAgent`, a code snippet for `CodeAgent`). 32 | - Execute the action and logs result into memory (an `ActionStep`). 33 | - At the end of each step, we run all callback functions defined in `agent.step_callbacks` . 34 | 35 | Optionally, when planning is activated, a plan can be periodically revised and stored in a `PlanningStep` . This includes feeding facts about the task at hand to the memory. 36 | 37 | For a `CodeAgent`, it looks like the figure below. 38 | 39 |
40 | 43 |
44 | 45 | Here is a video overview of how that works: 46 | 47 |
48 | 52 | 56 |
57 | 58 | We implement two versions of agents: 59 | - [`CodeAgent`] is the preferred type of agent: it generates its tool calls as blobs of code. 60 | - [`ToolCallingAgent`] generates tool calls as a JSON in its output, as is commonly done in agentic frameworks. We incorporate this option because it can be useful in some narrow cases where you can do fine with only one tool call per step: for instance, for web browsing, you need to wait after each action on the page to monitor how the page changes. 61 | 62 | > [!TIP] 63 | > Read [Open-source LLMs as LangChain Agents](https://huggingface.co/blog/open-source-llms-as-agents) blog post to learn more about multi-step agents. 64 | -------------------------------------------------------------------------------- /tests/fixtures/agents.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | 4 | AGENT_DICTS = { 5 | "v1.9": { 6 | "tools": [], 7 | "model": { 8 | "class": "HfApiModel", 9 | "data": { 10 | "last_input_token_count": None, 11 | "last_output_token_count": None, 12 | "model_id": "Qwen/Qwen2.5-Coder-32B-Instruct", 13 | "provider": None, 14 | }, 15 | }, 16 | "managed_agents": {}, 17 | "prompt_templates": { 18 | "system_prompt": "dummy system prompt", 19 | "planning": { 20 | "initial_facts": "dummy planning initial facts", 21 | "initial_plan": "dummy planning initial plan", 22 | "update_facts_pre_messages": "dummy planning update facts pre messages", 23 | "update_facts_post_messages": "dummy planning update facts post messages", 24 | "update_plan_pre_messages": "dummy planning update plan pre messages", 25 | "update_plan_post_messages": "dummy planning update plan post messages", 26 | }, 27 | "managed_agent": { 28 | "task": "dummy managed agent task", 29 | "report": "dummy managed agent report", 30 | }, 31 | "final_answer": { 32 | "pre_messages": "dummy final answer pre messages", 33 | "post_messages": "dummy final answer post messages", 34 | }, 35 | }, 36 | "max_steps": 10, 37 | "verbosity_level": 2, 38 | "grammar": None, 39 | "planning_interval": 2, 40 | "name": "test_agent", 41 | "description": "dummy description", 42 | "requirements": ["smolagents"], 43 | "authorized_imports": ["pandas"], 44 | }, 45 | # Added: executor_type, executor_kwargs, max_print_outputs_length 46 | "v1.10": { 47 | "tools": [], 48 | "model": { 49 | "class": "HfApiModel", 50 | "data": { 51 | "last_input_token_count": None, 52 | "last_output_token_count": None, 53 | "model_id": "Qwen/Qwen2.5-Coder-32B-Instruct", 54 | "provider": None, 55 | }, 56 | }, 57 | "managed_agents": {}, 58 | "prompt_templates": { 59 | "system_prompt": "dummy system prompt", 60 | "planning": { 61 | "initial_facts": "dummy planning initial facts", 62 | "initial_plan": "dummy planning initial plan", 63 | "update_facts_pre_messages": "dummy planning update facts pre messages", 64 | "update_facts_post_messages": "dummy planning update facts post messages", 65 | "update_plan_pre_messages": "dummy planning update plan pre messages", 66 | "update_plan_post_messages": "dummy planning update plan post messages", 67 | }, 68 | "managed_agent": { 69 | "task": "dummy managed agent task", 70 | "report": "dummy managed agent report", 71 | }, 72 | "final_answer": { 73 | "pre_messages": "dummy final answer pre messages", 74 | "post_messages": "dummy final answer post messages", 75 | }, 76 | }, 77 | "max_steps": 10, 78 | "verbosity_level": 2, 79 | "grammar": None, 80 | "planning_interval": 2, 81 | "name": "test_agent", 82 | "description": "dummy description", 83 | "requirements": ["smolagents"], 84 | "authorized_imports": ["pandas"], 85 | "executor_type": "local", 86 | "executor_kwargs": {}, 87 | "max_print_outputs_length": None, 88 | }, 89 | } 90 | 91 | 92 | @pytest.fixture 93 | def get_agent_dict(): 94 | def _get_agent_dict(agent_dict_key): 95 | return AGENT_DICTS[agent_dict_key] 96 | 97 | return _get_agent_dict 98 | -------------------------------------------------------------------------------- /docs/source/hi/index.mdx: -------------------------------------------------------------------------------- 1 | 15 | 16 | # `smolagents` 17 | 18 |
19 | 20 |
21 | 22 | यह लाइब्रेरी पावरफुल एजेंट्स बनाने के लिए सबसे सरल फ्रेमवर्क है! वैसे, "एजेंट्स" हैं क्या? हम अपनी परिभाषा [इस पेज पर](conceptual_guides/intro_agents) प्रदान करते हैं, जहाँ आपको यह भी पता चलेगा कि इन्हें कब उपयोग करें या न करें (स्पॉइलर: आप अक्सर एजेंट्स के बिना बेहतर काम कर सकते हैं)। 23 | 24 | यह लाइब्रेरी प्रदान करती है: 25 | 26 | ✨ **सरलता**: Agents का लॉजिक लगभग एक हजार लाइन्स ऑफ़ कोड में समाहित है। हमने रॉ कोड के ऊपर एब्स्ट्रैक्शन को न्यूनतम आकार में रखा है! 27 | 28 | 🌐 **सभी LLM के लिए सपोर्ट**: यह हब पर होस्ट किए गए मॉडल्स को उनके `transformers` वर्जन में या हमारे इन्फरेंस API के माध्यम से सपोर्ट करता है, साथ ही OpenAI, Anthropic से भी... किसी भी LLM से एजेंट को पावर करना वास्तव में आसान है। 29 | 30 | 🧑‍💻 **कोड Agents के लिए फर्स्ट-क्लास सपोर्ट**, यानी ऐसे एजेंट्स जो अपनी एक्शन्स को कोड में लिखते हैं (कोड लिखने के लिए उपयोग किए जाने वाले एजेंट्स के विपरीत), [यहाँ और पढ़ें](tutorials/secure_code_execution)। 31 | 32 | 🤗 **हब इंटीग्रेशन**: आप टूल्स को हब पर शेयर और लोड कर सकते हैं, और आगे और भी बहुत कुछ आने वाला है! 33 | ! 34 | 35 |
36 |
37 |
गाइडेड टूर
39 |

बेसिक्स सीखें और एजेंट्स का उपयोग करने में परिचित हों। यदि आप पहली बार एजेंट्स का उपयोग कर रहे हैं तो यहाँ से शुरू करें!

40 |
41 |
हाउ-टू गाइड्स
43 |

एक विशिष्ट लक्ष्य प्राप्त करने में मदद के लिए गाइड: SQL क्वेरी जनरेट और टेस्ट करने के लिए एजेंट बनाएं!

44 |
45 |
कॉन्सेप्चुअल गाइड्स
47 |

महत्वपूर्ण विषयों की बेहतर समझ बनाने के लिए उच्च-स्तरीय व्याख्याएं।

48 |
49 |
ट्यूटोरियल्स
51 |

एजेंट्स बनाने के महत्वपूर्ण पहलुओं को कवर करने वाले क्ट्यूटोरियल्स।

52 |
53 |
54 |
-------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Python tests 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: 7 | - ci-* 8 | 9 | env: 10 | UV_SYSTEM_PYTHON: 1 11 | 12 | jobs: 13 | build-ubuntu: 14 | runs-on: ubuntu-latest 15 | env: 16 | UV_HTTP_TIMEOUT: 600 # max 10min to install deps 17 | 18 | strategy: 19 | fail-fast: false 20 | matrix: 21 | python-version: ["3.10", "3.12"] 22 | 23 | steps: 24 | - uses: actions/checkout@v2 25 | - name: Set up Python ${{ matrix.python-version }} 26 | uses: actions/setup-python@v2 27 | with: 28 | python-version: ${{ matrix.python-version }} 29 | 30 | # Setup venv 31 | - name: Setup uv 32 | run: | 33 | pip install --upgrade uv 34 | 35 | # Install dependencies 36 | - name: Install dependencies 37 | run: | 38 | uv pip install "smolagents[test] @ ." 39 | 40 | # Run all tests separately for individual feedback 41 | # Use 'if success() || failure()' so that all tests are run even if one failed 42 | # See https://stackoverflow.com/a/62112985 43 | - name: Import tests 44 | run: | 45 | pytest ./tests/test_import.py 46 | if: ${{ success() || failure() }} 47 | 48 | - name: Agent tests 49 | run: | 50 | pytest ./tests/test_agents.py 51 | if: ${{ success() || failure() }} 52 | 53 | - name: Default tools tests 54 | run: | 55 | pytest ./tests/test_default_tools.py 56 | if: ${{ success() || failure() }} 57 | 58 | # - name: Docs tests # Disabled for now (slow test + requires API keys) 59 | # run: | 60 | # pytest ./tests/test_all_docs.py 61 | 62 | - name: CLI tests 63 | run: | 64 | pytest ./tests/test_cli.py 65 | if: ${{ success() || failure() }} 66 | 67 | - name: Final answer tests 68 | run: | 69 | pytest ./tests/test_final_answer.py 70 | if: ${{ success() || failure() }} 71 | 72 | - name: Models tests 73 | run: | 74 | pytest ./tests/test_models.py 75 | if: ${{ success() || failure() }} 76 | 77 | - name: Memory tests 78 | run: | 79 | pytest ./tests/test_memory.py 80 | if: ${{ success() || failure() }} 81 | 82 | - name: Monitoring tests 83 | run: | 84 | pytest ./tests/test_monitoring.py 85 | if: ${{ success() || failure() }} 86 | 87 | - name: Local Python executor tests 88 | run: | 89 | pytest ./tests/test_local_python_executor.py 90 | if: ${{ success() || failure() }} 91 | 92 | - name: Remote executor tests 93 | run: | 94 | pytest ./tests/test_remote_executors.py 95 | if: ${{ success() || failure() }} 96 | 97 | - name: Search tests 98 | run: | 99 | pytest ./tests/test_search.py 100 | if: ${{ success() || failure() }} 101 | 102 | - name: Tools tests 103 | run: | 104 | pytest ./tests/test_tools.py 105 | if: ${{ success() || failure() }} 106 | 107 | - name: Tool validation tests 108 | run: | 109 | pytest ./tests/test_tool_validation.py 110 | if: ${{ success() || failure() }} 111 | 112 | - name: Types tests 113 | run: | 114 | pytest ./tests/test_types.py 115 | if: ${{ success() || failure() }} 116 | 117 | - name: Utils tests 118 | run: | 119 | pytest ./tests/test_utils.py 120 | if: ${{ success() || failure() }} 121 | 122 | - name: Gradio UI tests 123 | run: | 124 | pytest ./tests/test_gradio_ui.py 125 | if: ${{ success() || failure() }} 126 | 127 | - name: Function type hints utils tests 128 | run: | 129 | pytest ./tests/test_function_type_hints_utils.py 130 | if: ${{ success() || failure() }} 131 | -------------------------------------------------------------------------------- /docs/source/en/index.mdx: -------------------------------------------------------------------------------- 1 | 15 | 16 | # `smolagents` 17 | 18 |
19 | 20 |
21 | 22 | This library is the simplest framework out there to build powerful agents! By the way, wtf are "agents"? We provide our definition [in this page](conceptual_guides/intro_agents), where you'll also find tips for when to use them or not (spoilers: you'll often be better off without agents). 23 | 24 | This library offers: 25 | 26 | ✨ **Simplicity**: the logic for agents fits in ~thousand lines of code. We kept abstractions to their minimal shape above raw code! 27 | 28 | 🌐 **Support for any LLM**: it supports models hosted on the Hub loaded in their `transformers` version or through our inference API and Inference providers, but also models from OpenAI, Anthropic... it's really easy to power an agent with any LLM. 29 | 30 | 🧑‍💻 **First-class support for Code Agents**, i.e. agents that write their actions in code (as opposed to "agents being used to write code"), [read more here](tutorials/secure_code_execution). 31 | 32 | 🤗 **Hub integrations**: you can share and load Gradio Spaces as tools to/from the Hub, and more is to come! 33 | 34 |
35 |
36 |
Guided tour
38 |

Learn the basics and become familiar with using Agents. Start here if you are using Agents for the first time!

39 |
40 |
How-to guides
42 |

Practical guides to help you achieve a specific goal: create an agent to generate and test SQL queries!

43 |
44 |
Conceptual guides
46 |

High-level explanations for building a better understanding of important topics.

47 |
48 |
Tutorials
50 |

Horizontal tutorials that cover important aspects of building agents.

51 |
52 |
53 |
54 | -------------------------------------------------------------------------------- /examples/open_deep_research/scripts/run_agents.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import shutil 4 | import textwrap 5 | from pathlib import Path 6 | 7 | # import tqdm.asyncio 8 | from smolagents.utils import AgentError 9 | 10 | 11 | def serialize_agent_error(obj): 12 | if isinstance(obj, AgentError): 13 | return {"error_type": obj.__class__.__name__, "message": obj.message} 14 | else: 15 | return str(obj) 16 | 17 | 18 | def get_image_description(file_name: str, question: str, visual_inspection_tool) -> str: 19 | prompt = f"""Write a caption of 5 sentences for this image. Pay special attention to any details that might be useful for someone answering the following question: 20 | {question}. But do not try to answer the question directly! 21 | Do not add any information that is not present in the image.""" 22 | return visual_inspection_tool(image_path=file_name, question=prompt) 23 | 24 | 25 | def get_document_description(file_path: str, question: str, document_inspection_tool) -> str: 26 | prompt = f"""Write a caption of 5 sentences for this document. Pay special attention to any details that might be useful for someone answering the following question: 27 | {question}. But do not try to answer the question directly! 28 | Do not add any information that is not present in the document.""" 29 | return document_inspection_tool.forward_initial_exam_mode(file_path=file_path, question=prompt) 30 | 31 | 32 | def get_single_file_description(file_path: str, question: str, visual_inspection_tool, document_inspection_tool): 33 | file_extension = file_path.split(".")[-1] 34 | if file_extension in ["png", "jpg", "jpeg"]: 35 | file_description = f" - Attached image: {file_path}" 36 | file_description += ( 37 | f"\n -> Image description: {get_image_description(file_path, question, visual_inspection_tool)}" 38 | ) 39 | return file_description 40 | elif file_extension in ["pdf", "xls", "xlsx", "docx", "doc", "xml"]: 41 | file_description = f" - Attached document: {file_path}" 42 | image_path = file_path.split(".")[0] + ".png" 43 | if os.path.exists(image_path): 44 | description = get_image_description(image_path, question, visual_inspection_tool) 45 | else: 46 | description = get_document_description(file_path, question, document_inspection_tool) 47 | file_description += f"\n -> File description: {description}" 48 | return file_description 49 | elif file_extension in ["mp3", "m4a", "wav"]: 50 | return f" - Attached audio: {file_path}" 51 | else: 52 | return f" - Attached file: {file_path}" 53 | 54 | 55 | def get_zip_description(file_path: str, question: str, visual_inspection_tool, document_inspection_tool): 56 | folder_path = file_path.replace(".zip", "") 57 | os.makedirs(folder_path, exist_ok=True) 58 | shutil.unpack_archive(file_path, folder_path) 59 | 60 | prompt_use_files = "" 61 | for root, dirs, files in os.walk(folder_path): 62 | for file in files: 63 | file_path = os.path.join(root, file) 64 | prompt_use_files += "\n" + textwrap.indent( 65 | get_single_file_description(file_path, question, visual_inspection_tool, document_inspection_tool), 66 | prefix=" ", 67 | ) 68 | return prompt_use_files 69 | 70 | 71 | def get_tasks_to_run(data, total: int, base_filename: Path, tasks_ids: list[int]): 72 | f = base_filename.parent / f"{base_filename.stem}_answers.jsonl" 73 | done = set() 74 | if f.exists(): 75 | with open(f, encoding="utf-8") as fh: 76 | done = {json.loads(line)["task_id"] for line in fh if line.strip()} 77 | 78 | tasks = [] 79 | for i in range(total): 80 | task_id = int(data[i]["task_id"]) 81 | if task_id not in done: 82 | if tasks_ids is not None: 83 | if task_id in tasks_ids: 84 | tasks.append(data[i]) 85 | else: 86 | tasks.append(data[i]) 87 | return tasks 88 | -------------------------------------------------------------------------------- /tests/test_types.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2024 HuggingFace Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | import os 16 | import tempfile 17 | import unittest 18 | import uuid 19 | 20 | import PIL.Image 21 | from transformers.testing_utils import ( 22 | require_soundfile, 23 | ) 24 | 25 | from smolagents.agent_types import AgentAudio, AgentImage, AgentText 26 | 27 | from .utils.markers import require_torch 28 | 29 | 30 | def get_new_path(suffix="") -> str: 31 | directory = tempfile.mkdtemp() 32 | return os.path.join(directory, str(uuid.uuid4()) + suffix) 33 | 34 | 35 | @require_soundfile 36 | @require_torch 37 | class AgentAudioTests(unittest.TestCase): 38 | def test_from_tensor(self): 39 | import soundfile as sf 40 | import torch 41 | 42 | tensor = torch.rand(12, dtype=torch.float64) - 0.5 43 | agent_type = AgentAudio(tensor) 44 | path = str(agent_type.to_string()) 45 | 46 | # Ensure that the tensor and the agent_type's tensor are the same 47 | self.assertTrue(torch.allclose(tensor, agent_type.to_raw(), atol=1e-4)) 48 | 49 | del agent_type 50 | 51 | # Ensure the path remains even after the object deletion 52 | self.assertTrue(os.path.exists(path)) 53 | 54 | # Ensure that the file contains the same value as the original tensor 55 | new_tensor, _ = sf.read(path) 56 | self.assertTrue(torch.allclose(tensor, torch.tensor(new_tensor), atol=1e-4)) 57 | 58 | def test_from_string(self): 59 | import soundfile as sf 60 | import torch 61 | 62 | tensor = torch.rand(12, dtype=torch.float64) - 0.5 63 | path = get_new_path(suffix=".wav") 64 | sf.write(path, tensor, 16000) 65 | 66 | agent_type = AgentAudio(path) 67 | 68 | self.assertTrue(torch.allclose(tensor, agent_type.to_raw(), atol=1e-4)) 69 | self.assertEqual(agent_type.to_string(), path) 70 | 71 | 72 | @require_torch 73 | class TestAgentImage: 74 | def test_from_tensor(self): 75 | import torch 76 | 77 | tensor = torch.randint(0, 256, (64, 64, 3)) 78 | agent_type = AgentImage(tensor) 79 | path = str(agent_type.to_string()) 80 | 81 | # Ensure that the tensor and the agent_type's tensor are the same 82 | assert torch.allclose(tensor, agent_type._tensor, atol=1e-4) 83 | 84 | assert isinstance(agent_type.to_raw(), PIL.Image.Image) 85 | 86 | # Ensure the path remains even after the object deletion 87 | del agent_type 88 | assert os.path.exists(path) 89 | 90 | def test_from_string(self, shared_datadir): 91 | path = shared_datadir / "000000039769.png" 92 | image = PIL.Image.open(path) 93 | agent_type = AgentImage(path) 94 | 95 | assert path.samefile(agent_type.to_string()) 96 | assert image == agent_type.to_raw() 97 | 98 | # Ensure the path remains even after the object deletion 99 | del agent_type 100 | assert os.path.exists(path) 101 | 102 | def test_from_image(self, shared_datadir): 103 | path = shared_datadir / "000000039769.png" 104 | image = PIL.Image.open(path) 105 | agent_type = AgentImage(image) 106 | 107 | assert not path.samefile(agent_type.to_string()) 108 | assert image == agent_type.to_raw() 109 | 110 | # Ensure the path remains even after the object deletion 111 | del agent_type 112 | assert os.path.exists(path) 113 | 114 | 115 | class AgentTextTests(unittest.TestCase): 116 | def test_from_string(self): 117 | string = "Hey!" 118 | agent_type = AgentText(string) 119 | 120 | self.assertEqual(string, agent_type.to_string()) 121 | self.assertEqual(string, agent_type.to_raw()) 122 | -------------------------------------------------------------------------------- /examples/open_deep_research/scripts/gaia_scorer.py: -------------------------------------------------------------------------------- 1 | import re 2 | import string 3 | import warnings 4 | 5 | 6 | def normalize_number_str(number_str: str) -> float: 7 | # we replace these common units and commas to allow 8 | # conversion to float 9 | for char in ["$", "%", ","]: 10 | number_str = number_str.replace(char, "") 11 | try: 12 | return float(number_str) 13 | except ValueError: 14 | print(f"String {number_str} cannot be normalized to number str.") 15 | return float("inf") 16 | 17 | 18 | def split_string( 19 | s: str, 20 | char_list: list[str] = [",", ";"], 21 | ) -> list[str]: 22 | pattern = f"[{''.join(char_list)}]" 23 | return re.split(pattern, s) 24 | 25 | 26 | def is_float(element: any) -> bool: 27 | try: 28 | float(element) 29 | return True 30 | except ValueError: 31 | return False 32 | 33 | 34 | def question_scorer( 35 | model_answer: str, 36 | ground_truth: str, 37 | ) -> bool: 38 | # if gt is a number 39 | if is_float(ground_truth): 40 | normalized_answer = normalize_number_str(str(model_answer)) 41 | return normalized_answer == float(ground_truth) 42 | 43 | # if gt is a list 44 | elif any(char in ground_truth for char in [",", ";"]): 45 | # question with the fish: normalization removes punct 46 | 47 | gt_elems = split_string(ground_truth) 48 | ma_elems = split_string(model_answer) 49 | 50 | # check length is the same 51 | if len(gt_elems) != len(ma_elems): 52 | warnings.warn("Answer lists have different lengths, returning False.", UserWarning) 53 | return False 54 | 55 | # compare each element as float or str 56 | comparisons = [] 57 | for ma_elem, gt_elem in zip(ma_elems, gt_elems): 58 | if is_float(gt_elem): 59 | normalized_ma_elem = normalize_number_str(ma_elem) 60 | comparisons.append(normalized_ma_elem == float(gt_elem)) 61 | else: 62 | # we do not remove punct since comparisons can include punct 63 | comparisons.append( 64 | normalize_str(ma_elem, remove_punct=False) == normalize_str(gt_elem, remove_punct=False) 65 | ) 66 | return all(comparisons) 67 | 68 | # if gt is a str 69 | else: 70 | return normalize_str(model_answer) == normalize_str(ground_truth) 71 | 72 | 73 | def check_prediction_contains_answer_letters_in_order(prediction, true_answer): 74 | prediction = prediction.lower() 75 | true_answer = true_answer.lower() 76 | if len(prediction) > len(true_answer) * 3: 77 | return False 78 | i = 0 79 | for letter in true_answer: 80 | if letter in prediction[i:]: 81 | i += prediction[i:].index(letter) 82 | else: 83 | return False 84 | return True 85 | 86 | 87 | def check_close_call(prediction, true_answer, is_correct): 88 | if is_correct: 89 | return True 90 | else: 91 | if is_float(true_answer): 92 | return is_correct 93 | else: 94 | if ( 95 | check_prediction_contains_answer_letters_in_order(str(prediction), str(true_answer)) 96 | and len(str(true_answer)) * 0.5 <= len(str(prediction)) <= len(str(true_answer)) * 2 97 | ): 98 | print(f"Close call: {prediction} vs {true_answer}") 99 | return True 100 | else: 101 | return False 102 | 103 | 104 | def normalize_str(input_str, remove_punct=True) -> str: 105 | """ 106 | Normalize a string by: 107 | - Removing all white spaces 108 | - Optionally removing punctuation (if remove_punct is True) 109 | - Converting to lowercase 110 | Parameters: 111 | - input_str: str, the string to normalize 112 | - remove_punct: bool, whether to remove punctuation (default: True) 113 | Returns: 114 | - str, the normalized string 115 | """ 116 | # Remove all white spaces. Required e.g for seagull vs. sea gull 117 | no_spaces = re.sub(r"\s", "", input_str) 118 | 119 | # Remove punctuation, if specified. 120 | if remove_punct: 121 | translator = str.maketrans("", "", string.punctuation) 122 | return no_spaces.lower().translate(translator) 123 | else: 124 | return no_spaces.lower() 125 | -------------------------------------------------------------------------------- /tests/test_remote_executors.py: -------------------------------------------------------------------------------- 1 | from textwrap import dedent 2 | from unittest.mock import MagicMock, patch 3 | 4 | import docker 5 | import PIL.Image 6 | import pytest 7 | 8 | from smolagents.monitoring import AgentLogger, LogLevel 9 | from smolagents.remote_executors import DockerExecutor, E2BExecutor 10 | from smolagents.utils import AgentError 11 | 12 | from .utils.markers import require_run_all 13 | 14 | 15 | class TestE2BExecutorMock: 16 | def test_e2b_executor_instantiation(self): 17 | logger = MagicMock() 18 | with patch("e2b_code_interpreter.Sandbox") as mock_sandbox: 19 | mock_sandbox.return_value.commands.run.return_value.error = None 20 | mock_sandbox.return_value.run_code.return_value.error = None 21 | executor = E2BExecutor( 22 | additional_imports=[], logger=logger, api_key="dummy-api-key", template="dummy-template-id", timeout=60 23 | ) 24 | assert isinstance(executor, E2BExecutor) 25 | assert executor.logger == logger 26 | assert executor.final_answer_pattern.pattern == r"^final_answer\((.*)\)$" 27 | assert executor.sandbox == mock_sandbox.return_value 28 | assert mock_sandbox.call_count == 1 29 | assert mock_sandbox.call_args.kwargs == { 30 | "api_key": "dummy-api-key", 31 | "template": "dummy-template-id", 32 | "timeout": 60, 33 | } 34 | 35 | 36 | @pytest.fixture 37 | def docker_executor(): 38 | executor = DockerExecutor(additional_imports=["pillow", "numpy"], logger=AgentLogger(level=LogLevel.INFO)) 39 | yield executor 40 | executor.delete() 41 | 42 | 43 | @require_run_all 44 | class TestDockerExecutor: 45 | @pytest.fixture(autouse=True) 46 | def set_executor(self, docker_executor): 47 | self.executor = docker_executor 48 | 49 | def test_initialization(self): 50 | """Check if DockerExecutor initializes without errors""" 51 | assert self.executor.container is not None, "Container should be initialized" 52 | 53 | def test_state_persistence(self): 54 | """Test that variables and imports form one snippet persist in the next""" 55 | code_action = "import numpy as np; a = 2" 56 | self.executor(code_action) 57 | 58 | code_action = "print(np.sqrt(a))" 59 | result, logs, final_answer = self.executor(code_action) 60 | assert "1.41421" in logs 61 | 62 | def test_execute_output(self): 63 | """Test execution that returns a string""" 64 | code_action = 'final_answer("This is the final answer")' 65 | result, logs, final_answer = self.executor(code_action) 66 | assert result == "This is the final answer", "Result should be 'This is the final answer'" 67 | 68 | def test_execute_multiline_output(self): 69 | """Test execution that returns a string""" 70 | code_action = 'result = "This is the final answer"\nfinal_answer(result)' 71 | result, logs, final_answer = self.executor(code_action) 72 | assert result == "This is the final answer", "Result should be 'This is the final answer'" 73 | 74 | def test_execute_image_output(self): 75 | """Test execution that returns a base64 image""" 76 | code_action = dedent(""" 77 | import base64 78 | from PIL import Image 79 | from io import BytesIO 80 | image = Image.new("RGB", (10, 10), (255, 0, 0)) 81 | final_answer(image) 82 | """) 83 | result, logs, final_answer = self.executor(code_action) 84 | assert isinstance(result, PIL.Image.Image), "Result should be a PIL Image" 85 | 86 | def test_syntax_error_handling(self): 87 | """Test handling of syntax errors""" 88 | code_action = 'print("Missing Parenthesis' # Syntax error 89 | with pytest.raises(AgentError) as exception_info: 90 | self.executor(code_action) 91 | assert "SyntaxError" in str(exception_info.value), "Should raise a syntax error" 92 | 93 | def test_cleanup_on_deletion(self): 94 | """Test if Docker container stops and removes on deletion""" 95 | container_id = self.executor.container.id 96 | self.executor.delete() # Trigger cleanup 97 | 98 | client = docker.from_env() 99 | containers = [c.id for c in client.containers.list(all=True)] 100 | assert container_id not in containers, "Container should be removed" 101 | -------------------------------------------------------------------------------- /examples/rag_using_chromadb.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import datasets 4 | from langchain.docstore.document import Document 5 | from langchain.text_splitter import RecursiveCharacterTextSplitter 6 | from langchain_chroma import Chroma 7 | 8 | # from langchain_community.document_loaders import PyPDFLoader 9 | from langchain_huggingface import HuggingFaceEmbeddings 10 | from tqdm import tqdm 11 | from transformers import AutoTokenizer 12 | 13 | # from langchain_openai import OpenAIEmbeddings 14 | from smolagents import LiteLLMModel, Tool 15 | from smolagents.agents import CodeAgent 16 | 17 | 18 | # from smolagents.agents import ToolCallingAgent 19 | 20 | 21 | knowledge_base = datasets.load_dataset("m-ric/huggingface_doc", split="train") 22 | 23 | source_docs = [ 24 | Document(page_content=doc["text"], metadata={"source": doc["source"].split("/")[1]}) for doc in knowledge_base 25 | ] 26 | 27 | ## For your own PDFs, you can use the following code to load them into source_docs 28 | # pdf_directory = "pdfs" 29 | # pdf_files = [ 30 | # os.path.join(pdf_directory, f) 31 | # for f in os.listdir(pdf_directory) 32 | # if f.endswith(".pdf") 33 | # ] 34 | # source_docs = [] 35 | 36 | # for file_path in pdf_files: 37 | # loader = PyPDFLoader(file_path) 38 | # docs.extend(loader.load()) 39 | 40 | text_splitter = RecursiveCharacterTextSplitter.from_huggingface_tokenizer( 41 | AutoTokenizer.from_pretrained("thenlper/gte-small"), 42 | chunk_size=200, 43 | chunk_overlap=20, 44 | add_start_index=True, 45 | strip_whitespace=True, 46 | separators=["\n\n", "\n", ".", " ", ""], 47 | ) 48 | 49 | # Split docs and keep only unique ones 50 | print("Splitting documents...") 51 | docs_processed = [] 52 | unique_texts = {} 53 | for doc in tqdm(source_docs): 54 | new_docs = text_splitter.split_documents([doc]) 55 | for new_doc in new_docs: 56 | if new_doc.page_content not in unique_texts: 57 | unique_texts[new_doc.page_content] = True 58 | docs_processed.append(new_doc) 59 | 60 | 61 | print("Embedding documents... This should take a few minutes (5 minutes on MacBook with M1 Pro)") 62 | # Initialize embeddings and ChromaDB vector store 63 | embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") 64 | 65 | 66 | # embeddings = OpenAIEmbeddings(model="text-embedding-3-small") 67 | 68 | vector_store = Chroma.from_documents(docs_processed, embeddings, persist_directory="./chroma_db") 69 | 70 | 71 | class RetrieverTool(Tool): 72 | name = "retriever" 73 | description = ( 74 | "Uses semantic search to retrieve the parts of documentation that could be most relevant to answer your query." 75 | ) 76 | inputs = { 77 | "query": { 78 | "type": "string", 79 | "description": "The query to perform. This should be semantically close to your target documents. Use the affirmative form rather than a question.", 80 | } 81 | } 82 | output_type = "string" 83 | 84 | def __init__(self, vector_store, **kwargs): 85 | super().__init__(**kwargs) 86 | self.vector_store = vector_store 87 | 88 | def forward(self, query: str) -> str: 89 | assert isinstance(query, str), "Your search query must be a string" 90 | docs = self.vector_store.similarity_search(query, k=3) 91 | return "\nRetrieved documents:\n" + "".join( 92 | [f"\n\n===== Document {str(i)} =====\n" + doc.page_content for i, doc in enumerate(docs)] 93 | ) 94 | 95 | 96 | retriever_tool = RetrieverTool(vector_store) 97 | 98 | # Choose which LLM engine to use! 99 | 100 | # from smolagents import HfApiModel 101 | # model = HfApiModel(model_id="meta-llama/Llama-3.3-70B-Instruct") 102 | 103 | # from smolagents import TransformersModel 104 | # model = TransformersModel(model_id="meta-llama/Llama-3.2-2B-Instruct") 105 | 106 | # For anthropic: change model_id below to 'anthropic/claude-3-5-sonnet-20240620' and also change 'os.environ.get("ANTHROPIC_API_KEY")' 107 | model = LiteLLMModel( 108 | model_id="groq/llama-3.3-70b-versatile", 109 | api_key=os.environ.get("GROQ_API_KEY"), 110 | ) 111 | 112 | # # You can also use the ToolCallingAgent class 113 | # agent = ToolCallingAgent( 114 | # tools=[retriever_tool], 115 | # model=model, 116 | # verbose=True, 117 | # ) 118 | 119 | agent = CodeAgent( 120 | tools=[retriever_tool], 121 | model=model, 122 | max_steps=4, 123 | verbosity_level=2, 124 | ) 125 | 126 | agent_output = agent.run("How can I push a model to the Hub?") 127 | 128 | 129 | print("Final output:") 130 | print(agent_output) 131 | -------------------------------------------------------------------------------- /examples/open_deep_research/scripts/reformulator.py: -------------------------------------------------------------------------------- 1 | # Shamelessly stolen from Microsoft Autogen team: thanks to them for this great resource! 2 | # https://github.com/microsoft/autogen/blob/gaia_multiagent_v01_march_1st/autogen/browser_utils.py 3 | import copy 4 | 5 | from smolagents.models import MessageRole, Model 6 | 7 | 8 | def prepare_response(original_task: str, inner_messages, reformulation_model: Model) -> str: 9 | messages = [ 10 | { 11 | "role": MessageRole.SYSTEM, 12 | "content": [ 13 | { 14 | "type": "text", 15 | "text": f"""Earlier you were asked the following: 16 | 17 | {original_task} 18 | 19 | Your team then worked diligently to address that request. Read below a transcript of that conversation:""", 20 | } 21 | ], 22 | } 23 | ] 24 | 25 | # The first message just repeats the question, so remove it 26 | # if len(inner_messages) > 1: 27 | # del inner_messages[0] 28 | 29 | # copy them to this context 30 | try: 31 | for message in inner_messages: 32 | if not message.get("content"): 33 | continue 34 | message = copy.deepcopy(message) 35 | message["role"] = MessageRole.USER 36 | messages.append(message) 37 | except Exception: 38 | messages += [{"role": MessageRole.ASSISTANT, "content": str(inner_messages)}] 39 | 40 | # ask for the final answer 41 | messages.append( 42 | { 43 | "role": MessageRole.USER, 44 | "content": [ 45 | { 46 | "type": "text", 47 | "text": f""" 48 | Read the above conversation and output a FINAL ANSWER to the question. The question is repeated here for convenience: 49 | 50 | {original_task} 51 | 52 | To output the final answer, use the following template: FINAL ANSWER: [YOUR FINAL ANSWER] 53 | Your FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. 54 | ADDITIONALLY, your FINAL ANSWER MUST adhere to any formatting instructions specified in the original question (e.g., alphabetization, sequencing, units, rounding, decimal places, etc.) 55 | If you are asked for a number, express it numerically (i.e., with digits rather than words), don't use commas, and DO NOT INCLUDE UNITS such as $ or USD or percent signs unless specified otherwise. 56 | If you are asked for a string, don't use articles or abbreviations (e.g. for cities), unless specified otherwise. Don't output any final sentence punctuation such as '.', '!', or '?'. 57 | If you are asked for a comma separated list, apply the above rules depending on whether the elements are numbers or strings. 58 | If you are unable to determine the final answer, output 'FINAL ANSWER: Unable to determine' 59 | """, 60 | } 61 | ], 62 | } 63 | ) 64 | 65 | response = reformulation_model(messages).content 66 | 67 | final_answer = response.split("FINAL ANSWER: ")[-1].strip() 68 | print("> Reformulated answer: ", final_answer) 69 | 70 | # if "unable to determine" in final_answer.lower(): 71 | # messages.append({"role": MessageRole.ASSISTANT, "content": response }) 72 | # messages.append({"role": MessageRole.USER, "content": [{"type": "text", "text": """ 73 | # I understand that a definitive answer could not be determined. Please make a well-informed EDUCATED GUESS based on the conversation. 74 | 75 | # To output the educated guess, use the following template: EDUCATED GUESS: [YOUR EDUCATED GUESS] 76 | # Your EDUCATED GUESS should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. DO NOT OUTPUT 'I don't know', 'Unable to determine', etc. 77 | # ADDITIONALLY, your EDUCATED GUESS MUST adhere to any formatting instructions specified in the original question (e.g., alphabetization, sequencing, units, rounding, decimal places, etc.) 78 | # If you are asked for a number, express it numerically (i.e., with digits rather than words), don't use commas, and don't include units such as $ or percent signs unless specified otherwise. 79 | # If you are asked for a string, don't use articles or abbreviations (e.g. cit for cities), unless specified otherwise. Don't output any final sentence punctuation such as '.', '!', or '?'. 80 | # If you are asked for a comma separated list, apply the above rules depending on whether the elements are numbers or strings. 81 | # """.strip()}]}) 82 | 83 | # response = model(messages).content 84 | # print("\n>>>Making an educated guess.\n", response) 85 | # final_answer = response.split("EDUCATED GUESS: ")[-1].strip() 86 | return final_answer 87 | -------------------------------------------------------------------------------- /tests/test_tool_validation.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from smolagents.default_tools import DuckDuckGoSearchTool, GoogleSearchTool, SpeechToTextTool, VisitWebpageTool 4 | from smolagents.tool_validation import validate_tool_attributes 5 | from smolagents.tools import Tool, tool 6 | 7 | 8 | UNDEFINED_VARIABLE = "undefined_variable" 9 | 10 | 11 | @pytest.mark.parametrize("tool_class", [DuckDuckGoSearchTool, GoogleSearchTool, SpeechToTextTool, VisitWebpageTool]) 12 | def test_validate_tool_attributes_with_default_tools(tool_class): 13 | assert validate_tool_attributes(tool_class) is None, f"failed for {tool_class.name} tool" 14 | 15 | 16 | class ValidTool(Tool): 17 | name = "valid_tool" 18 | description = "A valid tool" 19 | inputs = {"input": {"type": "string", "description": "input"}} 20 | output_type = "string" 21 | simple_attr = "string" 22 | dict_attr = {"key": "value"} 23 | 24 | def __init__(self, optional_param="default"): 25 | super().__init__() 26 | self.param = optional_param 27 | 28 | def forward(self, input: str) -> str: 29 | return input.upper() 30 | 31 | 32 | @tool 33 | def valid_tool_function(input: str) -> str: 34 | """A valid tool function. 35 | 36 | Args: 37 | input (str): Input string. 38 | """ 39 | return input.upper() 40 | 41 | 42 | @pytest.mark.parametrize("tool_class", [ValidTool, valid_tool_function.__class__]) 43 | def test_validate_tool_attributes_valid(tool_class): 44 | assert validate_tool_attributes(tool_class) is None 45 | 46 | 47 | class InvalidToolName(Tool): 48 | name = "invalid tool name" 49 | description = "Tool with invalid name" 50 | inputs = {"input": {"type": "string", "description": "input"}} 51 | output_type = "string" 52 | 53 | def __init__(self): 54 | super().__init__() 55 | 56 | def forward(self, input: str) -> str: 57 | return input 58 | 59 | 60 | class InvalidToolComplexAttrs(Tool): 61 | name = "invalid_tool" 62 | description = "Tool with complex class attributes" 63 | inputs = {"input": {"type": "string", "description": "input"}} 64 | output_type = "string" 65 | complex_attr = [x for x in range(3)] # Complex class attribute 66 | 67 | def __init__(self): 68 | super().__init__() 69 | 70 | def forward(self, input: str) -> str: 71 | return input 72 | 73 | 74 | class InvalidToolRequiredParams(Tool): 75 | name = "invalid_tool" 76 | description = "Tool with required params" 77 | inputs = {"input": {"type": "string", "description": "input"}} 78 | output_type = "string" 79 | 80 | def __init__(self, required_param, kwarg1=1): # No default value 81 | super().__init__() 82 | self.param = required_param 83 | 84 | def forward(self, input: str) -> str: 85 | return input 86 | 87 | 88 | class InvalidToolNonLiteralDefaultParam(Tool): 89 | name = "invalid_tool" 90 | description = "Tool with non-literal default parameter value" 91 | inputs = {"input": {"type": "string", "description": "input"}} 92 | output_type = "string" 93 | 94 | def __init__(self, default_param=UNDEFINED_VARIABLE): # UNDEFINED_VARIABLE as default is non-literal 95 | super().__init__() 96 | self.default_param = default_param 97 | 98 | def forward(self, input: str) -> str: 99 | return input 100 | 101 | 102 | class InvalidToolUndefinedNames(Tool): 103 | name = "invalid_tool" 104 | description = "Tool with undefined names" 105 | inputs = {"input": {"type": "string", "description": "input"}} 106 | output_type = "string" 107 | 108 | def forward(self, input: str) -> str: 109 | return UNDEFINED_VARIABLE # Undefined name 110 | 111 | 112 | @pytest.mark.parametrize( 113 | "tool_class, expected_error", 114 | [ 115 | ( 116 | InvalidToolName, 117 | "Class attribute 'name' must be a valid Python identifier and not a reserved keyword, found 'invalid tool name'", 118 | ), 119 | (InvalidToolComplexAttrs, "Complex attributes should be defined in __init__, not as class attributes"), 120 | (InvalidToolRequiredParams, "Parameters in __init__ must have default values, found required parameters"), 121 | ( 122 | InvalidToolNonLiteralDefaultParam, 123 | "Parameters in __init__ must have literal default values, found non-literal defaults", 124 | ), 125 | (InvalidToolUndefinedNames, "Name 'UNDEFINED_VARIABLE' is undefined"), 126 | ], 127 | ) 128 | def test_validate_tool_attributes_exceptions(tool_class, expected_error): 129 | with pytest.raises(ValueError, match=expected_error): 130 | validate_tool_attributes(tool_class) 131 | -------------------------------------------------------------------------------- /docs/source/hi/tutorials/inspect_runs.mdx: -------------------------------------------------------------------------------- 1 | 16 | # OpenTelemetry के साथ runs का निरीक्षण 17 | 18 | [[open-in-colab]] 19 | 20 | > [!TIP] 21 | > यदि आप एजेंट्स बनाने में नए हैं, तो पहले [एजेंट्स का परिचय](../conceptual_guides/intro_agents) और [smolagents की गाइडेड टूर](../guided_tour) पढ़ना सुनिश्चित करें। 22 | 23 | ### Agents runs को लॉग क्यों करें? 24 | 25 | Agent runs को डीबग करना जटिल होता है। 26 | 27 | यह सत्यापित करना कठिन है कि एक रन ठीक से चला या नहीं, क्योंकि एजेंट वर्कफ़्लो [डिज़ाइन के अनुसार अप्रत्याशित](../conceptual_guides/intro_agents) होते हैं (यदि वे प्रत्याशित होते, तो आप पुराने अच्छे कोड का ही उपयोग कर रहे होते)। 28 | 29 | और रन का निरीक्षण करना भी कठिन है: मल्टी-स्टेप एजेंट्स जल्दी ही कंसोल को लॉग से भर देते हैं, और अधिकांश त्रुटियां केवल "LLM dumb" प्रकार की त्रुटियां होती हैं, जिनसे LLM अगले चरण में बेहतर कोड या टूल कॉल लिखकर स्वयं को सुधार लेता है। 30 | 31 | इसलिए बाद के निरीक्षण और मॉनिटरिंग के लिए प्रोडक्शन में agent runs को रिकॉर्ड करने के लिए इंस्ट्रुमेंटेशन का उपयोग करना आवश्यक है! 32 | 33 | हमने agent runs को इंस्ट्रुमेंट करने के लिए [OpenTelemetry](https://opentelemetry.io/) मानक को अपनाया है। 34 | 35 | इसका मतलब है कि आप बस कुछ इंस्ट्रुमेंटेशन कोड चला सकते हैं, फिर अपने एजेंट्स को सामान्य रूप से चला सकते हैं, और सब कुछ आपके प्लेटफॉर्म में लॉग हो जाता है। 36 | 37 | यह इस प्रकार होता है: 38 | पहले आवश्यक पैकेज इंस्टॉल करें। यहां हम [Phoenix by Arize AI](https://github.com/Arize-ai/phoenix) इंस्टॉल करते हैं क्योंकि यह लॉग्स को एकत्र और निरीक्षण करने का एक अच्छा समाधान है, लेकिन इस संग्रह और निरीक्षण भाग के लिए आप अन्य OpenTelemetry-कम्पैटिबल प्लेटफॉर्म्स का उपयोग कर सकते हैं। 39 | 40 | ```shell 41 | pip install smolagents 42 | pip install arize-phoenix opentelemetry-sdk opentelemetry-exporter-otlp openinference-instrumentation-smolagents 43 | ``` 44 | 45 | फिर कलेक्टर को बैकग्राउंड में चलाएं। 46 | 47 | ```shell 48 | python -m phoenix.server.main serve 49 | ``` 50 | 51 | अंत में, अपने एजेंट्स को ट्रेस करने और ट्रेस को नीचे परिभाषित एंडपॉइंट पर Phoenix को भेजने के लिए `SmolagentsInstrumentor` को सेट करें। 52 | 53 | ```python 54 | from opentelemetry import trace 55 | from opentelemetry.sdk.trace import TracerProvider 56 | from opentelemetry.sdk.trace.export import BatchSpanProcessor 57 | 58 | from openinference.instrumentation.smolagents import SmolagentsInstrumentor 59 | from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter 60 | from opentelemetry.sdk.trace.export import ConsoleSpanExporter, SimpleSpanProcessor 61 | 62 | endpoint = "http://0.0.0.0:6006/v1/traces" 63 | trace_provider = TracerProvider() 64 | trace_provider.add_span_processor(SimpleSpanProcessor(OTLPSpanExporter(endpoint))) 65 | 66 | SmolagentsInstrumentor().instrument(tracer_provider=trace_provider) 67 | ``` 68 | तब आप अपने एजेंट चला सकते हैं! 69 | 70 | ```py 71 | from smolagents import ( 72 | CodeAgent, 73 | ToolCallingAgent, 74 | DuckDuckGoSearchTool, 75 | VisitWebpageTool, 76 | HfApiModel, 77 | ) 78 | 79 | model = HfApiModel() 80 | 81 | managed_agent = ToolCallingAgent( 82 | tools=[DuckDuckGoSearchTool(), VisitWebpageTool()], 83 | model=model, 84 | name="managed_agent", 85 | description="This is an agent that can do web search.", 86 | ) 87 | 88 | manager_agent = CodeAgent( 89 | tools=[], 90 | model=model, 91 | managed_agents=[managed_agent], 92 | ) 93 | manager_agent.run( 94 | "If the US keeps its 2024 growth rate, how many years will it take for the GDP to double?" 95 | ) 96 | ``` 97 | और फिर आप अपने रन का निरीक्षण करने के लिए `http://0.0.0.0:6006/projects/` पर जा सकते हैं! 98 | 99 | 100 | 101 | आप देख सकते हैं कि CodeAgent ने अपने मैनेज्ड ToolCallingAgent को (वैसे, मैनेज्ड एजेंट एक CodeAgent भी हो सकता था) U.S. 2024 ग्रोथ रेट के लिए वेब सर्च चलाने के लिए कॉल किया। फिर मैनेज्ड एजेंट ने अपनी रिपोर्ट लौटाई और मैनेजर एजेंट ने अर्थव्यवस्था के दोगुना होने का समय गणना करने के लिए उस पर कार्य किया! अच्छा है, है ना? -------------------------------------------------------------------------------- /examples/open_deep_research/run.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import threading 4 | 5 | from dotenv import load_dotenv 6 | from huggingface_hub import login 7 | from scripts.text_inspector_tool import TextInspectorTool 8 | from scripts.text_web_browser import ( 9 | ArchiveSearchTool, 10 | FinderTool, 11 | FindNextTool, 12 | PageDownTool, 13 | PageUpTool, 14 | SimpleTextBrowser, 15 | VisitTool, 16 | ) 17 | from scripts.visual_qa import visualizer 18 | 19 | from smolagents import ( 20 | CodeAgent, 21 | GoogleSearchTool, 22 | # HfApiModel, 23 | LiteLLMModel, 24 | ToolCallingAgent, 25 | ) 26 | 27 | 28 | AUTHORIZED_IMPORTS = [ 29 | "requests", 30 | "zipfile", 31 | "os", 32 | "pandas", 33 | "numpy", 34 | "sympy", 35 | "json", 36 | "bs4", 37 | "pubchempy", 38 | "xml", 39 | "yahoo_finance", 40 | "Bio", 41 | "sklearn", 42 | "scipy", 43 | "pydub", 44 | "io", 45 | "PIL", 46 | "chess", 47 | "PyPDF2", 48 | "pptx", 49 | "torch", 50 | "datetime", 51 | "fractions", 52 | "csv", 53 | ] 54 | load_dotenv(override=True) 55 | login(os.getenv("HF_TOKEN")) 56 | 57 | append_answer_lock = threading.Lock() 58 | 59 | 60 | def parse_args(): 61 | parser = argparse.ArgumentParser() 62 | parser.add_argument( 63 | "question", type=str, help="for example: 'How many studio albums did Mercedes Sosa release before 2007?'" 64 | ) 65 | parser.add_argument("--model-id", type=str, default="o1") 66 | return parser.parse_args() 67 | 68 | 69 | custom_role_conversions = {"tool-call": "assistant", "tool-response": "user"} 70 | 71 | user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0" 72 | 73 | BROWSER_CONFIG = { 74 | "viewport_size": 1024 * 5, 75 | "downloads_folder": "downloads_folder", 76 | "request_kwargs": { 77 | "headers": {"User-Agent": user_agent}, 78 | "timeout": 300, 79 | }, 80 | "serpapi_key": os.getenv("SERPAPI_API_KEY"), 81 | } 82 | 83 | os.makedirs(f"./{BROWSER_CONFIG['downloads_folder']}", exist_ok=True) 84 | 85 | 86 | def create_agent(model_id="o1"): 87 | model_params = { 88 | "model_id": model_id, 89 | "custom_role_conversions": custom_role_conversions, 90 | "max_completion_tokens": 8192, 91 | } 92 | if model_id == "o1": 93 | model_params["reasoning_effort"] = "high" 94 | model = LiteLLMModel(**model_params) 95 | 96 | text_limit = 100000 97 | browser = SimpleTextBrowser(**BROWSER_CONFIG) 98 | WEB_TOOLS = [ 99 | GoogleSearchTool(provider="serper"), 100 | VisitTool(browser), 101 | PageUpTool(browser), 102 | PageDownTool(browser), 103 | FinderTool(browser), 104 | FindNextTool(browser), 105 | ArchiveSearchTool(browser), 106 | TextInspectorTool(model, text_limit), 107 | ] 108 | text_webbrowser_agent = ToolCallingAgent( 109 | model=model, 110 | tools=WEB_TOOLS, 111 | max_steps=20, 112 | verbosity_level=2, 113 | planning_interval=4, 114 | name="search_agent", 115 | description="""A team member that will search the internet to answer your question. 116 | Ask him for all your questions that require browsing the web. 117 | Provide him as much context as possible, in particular if you need to search on a specific timeframe! 118 | And don't hesitate to provide him with a complex search task, like finding a difference between two webpages. 119 | Your request must be a real sentence, not a google search! Like "Find me this information (...)" rather than a few keywords. 120 | """, 121 | provide_run_summary=True, 122 | ) 123 | text_webbrowser_agent.prompt_templates["managed_agent"]["task"] += """You can navigate to .txt online files. 124 | If a non-html page is in another format, especially .pdf or a Youtube video, use tool 'inspect_file_as_text' to inspect it. 125 | Additionally, if after some searching you find out that you need more information to answer the question, you can use `final_answer` with your request for clarification as argument to request for more information.""" 126 | 127 | manager_agent = CodeAgent( 128 | model=model, 129 | tools=[visualizer, TextInspectorTool(model, text_limit)], 130 | max_steps=12, 131 | verbosity_level=2, 132 | additional_authorized_imports=AUTHORIZED_IMPORTS, 133 | planning_interval=4, 134 | managed_agents=[text_webbrowser_agent], 135 | ) 136 | 137 | return manager_agent 138 | 139 | 140 | def main(): 141 | args = parse_args() 142 | 143 | agent = create_agent(model_id=args.model_id) 144 | 145 | answer = agent.run(args.question) 146 | 147 | print(f"Got this answer: {answer}") 148 | 149 | 150 | if __name__ == "__main__": 151 | main() 152 | -------------------------------------------------------------------------------- /docs/source/zh/tutorials/memory.mdx: -------------------------------------------------------------------------------- 1 | 16 | # 📚 管理Agent的记忆 17 | 18 | [[open-in-colab]] 19 | 20 | 归根结底,Agent可以定义为由几个简单组件构成:它拥有工具、提示词。最重要的是,它具备对过往步骤的记忆,能够追溯完整的规划、执行和错误历史。 21 | 22 | ### 回放Agent的记忆 23 | 24 | 我们提供了多项功能来审查Agent的过往运行记录。 25 | 26 | 您可以通过插装(instrumentation)在可视化界面中查看Agent的运行过程,该界面支持对特定步骤进行缩放操作,具体方法参见[插装指南](./inspect_runs)。 27 | 28 | 您也可以使用`agent.replay()`方法实现回放: 29 | 30 | 当Agent完成运行后: 31 | ```py 32 | from smolagents import HfApiModel, CodeAgent 33 | 34 | agent = CodeAgent(tools=[], model=HfApiModel(), verbosity_level=0) 35 | 36 | result = agent.run("What's the 20th Fibonacci number?") 37 | ``` 38 | 39 | 若要回放最近一次运行,只需使用: 40 | ```py 41 | agent.replay() 42 | ``` 43 | 44 | ### 动态修改Agent的记忆 45 | 46 | 许多高级应用场景需要对Agent的记忆进行动态修改。 47 | 48 | 您可以通过以下方式访问Agent的记忆: 49 | 50 | ```py 51 | from smolagents import ActionStep 52 | 53 | system_prompt_step = agent.memory.system_prompt 54 | print("The system prompt given to the agent was:") 55 | print(system_prompt_step.system_prompt) 56 | 57 | task_step = agent.memory.steps[0] 58 | print("\n\nThe first task step was:") 59 | print(task_step.task) 60 | 61 | for step in agent.memory.steps: 62 | if isinstance(step, ActionStep): 63 | if step.error is not None: 64 | print(f"\nStep {step.step_number} got this error:\n{step.error}\n") 65 | else: 66 | print(f"\nStep {step.step_number} got these observations:\n{step.observations}\n") 67 | ``` 68 | 69 | 使用`agent.memory.get_full_steps()`可获取完整步骤字典数据。 70 | 71 | 您还可以通过步骤回调(step callbacks)实现记忆的动态修改。 72 | 73 | 步骤回调函数可通过参数直接访问`agent`对象,因此能够访问所有记忆步骤并根据需要进行修改。例如,假设您正在监控网页浏览Agent每个步骤的屏幕截图,希望保留最新截图同时删除旧步骤的图片以节省token消耗。 74 | 75 | 可参考以下代码示例: 76 | _注:此代码片段不完整,部分导入语句和对象定义已精简,完整代码请访问[原始脚本](https://github.com/huggingface/smolagents/blob/main/src/smolagents/vision_web_browser.py)_ 77 | 78 | ```py 79 | import helium 80 | from PIL import Image 81 | from io import BytesIO 82 | from time import sleep 83 | 84 | def update_screenshot(memory_step: ActionStep, agent: CodeAgent) -> None: 85 | sleep(1.0) # Let JavaScript animations happen before taking the screenshot 86 | driver = helium.get_driver() 87 | latest_step = memory_step.step_number 88 | for previous_memory_step in agent.memory.steps: # Remove previous screenshots from logs for lean processing 89 | if isinstance(previous_memory_step, ActionStep) and previous_memory_step.step_number <= latest_step - 2: 90 | previous_memory_step.observations_images = None 91 | png_bytes = driver.get_screenshot_as_png() 92 | image = Image.open(BytesIO(png_bytes)) 93 | memory_step.observations_images = [image.copy()] 94 | ``` 95 | 96 | 最后在初始化Agent时,将此函数传入`step_callbacks`参数: 97 | 98 | ```py 99 | CodeAgent( 100 | tools=[DuckDuckGoSearchTool(), go_back, close_popups, search_item_ctrl_f], 101 | model=model, 102 | additional_authorized_imports=["helium"], 103 | step_callbacks=[update_screenshot], 104 | max_steps=20, 105 | verbosity_level=2, 106 | ) 107 | ``` 108 | 109 | 请访问我们的 [vision web browser code](https://github.com/huggingface/smolagents/blob/main/src/smolagents/vision_web_browser.py) 查看完整可运行示例。 110 | 111 | ### 分步运行 Agents 112 | 113 | 当您需要处理耗时数天的工具调用时,这种方式特别有用:您可以逐步执行Agents。这还允许您在每一步更新记忆。 114 | 115 | ```py 116 | from smolagents import HfApiModel, CodeAgent, ActionStep, TaskStep 117 | 118 | agent = CodeAgent(tools=[], model=HfApiModel(), verbosity_level=1) 119 | print(agent.memory.system_prompt) 120 | 121 | task = "What is the 20th Fibonacci number?" 122 | 123 | # You could modify the memory as needed here by inputting the memory of another agent. 124 | # agent.memory.steps = previous_agent.memory.steps 125 | 126 | # Let's start a new task! 127 | agent.memory.steps.append(TaskStep(task=task, task_images=[])) 128 | 129 | final_answer = None 130 | step_number = 1 131 | while final_answer is None and step_number <= 10: 132 | memory_step = ActionStep( 133 | step_number=step_number, 134 | observations_images=[], 135 | ) 136 | # Run one step. 137 | final_answer = agent.step(memory_step) 138 | agent.memory.steps.append(memory_step) 139 | step_number += 1 140 | 141 | # Change the memory as you please! 142 | # For instance to update the latest step: 143 | # agent.memory.steps[-1] = ... 144 | 145 | print("The final answer is:", final_answer) 146 | ``` -------------------------------------------------------------------------------- /examples/open_deep_research/scripts/text_inspector_tool.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from smolagents import Tool 4 | from smolagents.models import MessageRole, Model 5 | 6 | from .mdconvert import MarkdownConverter 7 | 8 | 9 | class TextInspectorTool(Tool): 10 | name = "inspect_file_as_text" 11 | description = """ 12 | You cannot load files yourself: instead call this tool to read a file as markdown text and ask questions about it. 13 | This tool handles the following file extensions: [".html", ".htm", ".xlsx", ".pptx", ".wav", ".mp3", ".m4a", ".flac", ".pdf", ".docx"], and all other types of text files. IT DOES NOT HANDLE IMAGES.""" 14 | 15 | inputs = { 16 | "file_path": { 17 | "description": "The path to the file you want to read as text. Must be a '.something' file, like '.pdf'. If it is an image, use the visualizer tool instead! DO NOT use this tool for an HTML webpage: use the web_search tool instead!", 18 | "type": "string", 19 | }, 20 | "question": { 21 | "description": "[Optional]: Your question, as a natural language sentence. Provide as much context as possible. Do not pass this parameter if you just want to directly return the content of the file.", 22 | "type": "string", 23 | "nullable": True, 24 | }, 25 | } 26 | output_type = "string" 27 | md_converter = MarkdownConverter() 28 | 29 | def __init__(self, model: Model, text_limit: int): 30 | super().__init__() 31 | self.model = model 32 | self.text_limit = text_limit 33 | 34 | def forward_initial_exam_mode(self, file_path, question): 35 | result = self.md_converter.convert(file_path) 36 | 37 | if file_path[-4:] in [".png", ".jpg"]: 38 | raise Exception("Cannot use inspect_file_as_text tool with images: use visualizer instead!") 39 | 40 | if ".zip" in file_path: 41 | return result.text_content 42 | 43 | if not question: 44 | return result.text_content 45 | 46 | if len(result.text_content) < 4000: 47 | return "Document content: " + result.text_content 48 | 49 | messages = [ 50 | { 51 | "role": MessageRole.SYSTEM, 52 | "content": [ 53 | { 54 | "type": "text", 55 | "text": "Here is a file:\n### " 56 | + str(result.title) 57 | + "\n\n" 58 | + result.text_content[: self.text_limit], 59 | } 60 | ], 61 | }, 62 | { 63 | "role": MessageRole.USER, 64 | "content": [ 65 | { 66 | "type": "text", 67 | "text": "Now please write a short, 5 sentence caption for this document, that could help someone asking this question: " 68 | + question 69 | + "\n\nDon't answer the question yourself! Just provide useful notes on the document", 70 | } 71 | ], 72 | }, 73 | ] 74 | return self.model(messages).content 75 | 76 | def forward(self, file_path, question: Optional[str] = None) -> str: 77 | result = self.md_converter.convert(file_path) 78 | 79 | if file_path[-4:] in [".png", ".jpg"]: 80 | raise Exception("Cannot use inspect_file_as_text tool with images: use visualizer instead!") 81 | 82 | if ".zip" in file_path: 83 | return result.text_content 84 | 85 | if not question: 86 | return result.text_content 87 | 88 | messages = [ 89 | { 90 | "role": MessageRole.SYSTEM, 91 | "content": [ 92 | { 93 | "type": "text", 94 | "text": "You will have to write a short caption for this file, then answer this question:" 95 | + question, 96 | } 97 | ], 98 | }, 99 | { 100 | "role": MessageRole.USER, 101 | "content": [ 102 | { 103 | "type": "text", 104 | "text": "Here is the complete file:\n### " 105 | + str(result.title) 106 | + "\n\n" 107 | + result.text_content[: self.text_limit], 108 | } 109 | ], 110 | }, 111 | { 112 | "role": MessageRole.USER, 113 | "content": [ 114 | { 115 | "type": "text", 116 | "text": "Now answer the question below. Use these three headings: '1. Short answer', '2. Extremely detailed answer', '3. Additional Context on the document and question asked'." 117 | + question, 118 | } 119 | ], 120 | }, 121 | ] 122 | return self.model(messages).content 123 | -------------------------------------------------------------------------------- /tests/test_default_tools.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2024 HuggingFace Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | import unittest 16 | 17 | import pytest 18 | 19 | from smolagents.agent_types import _AGENT_TYPE_MAPPING 20 | from smolagents.default_tools import ( 21 | DuckDuckGoSearchTool, 22 | PythonInterpreterTool, 23 | SpeechToTextTool, 24 | VisitWebpageTool, 25 | WikipediaSearchTool, 26 | ) 27 | 28 | from .test_tools import ToolTesterMixin 29 | 30 | 31 | class DefaultToolTests(unittest.TestCase): 32 | def test_visit_webpage(self): 33 | arguments = {"url": "https://en.wikipedia.org/wiki/United_States_Secretary_of_Homeland_Security"} 34 | result = VisitWebpageTool()(arguments) 35 | assert isinstance(result, str) 36 | assert "* [About Wikipedia](/wiki/Wikipedia:About)" in result # Proper wikipedia pages have an About 37 | 38 | def test_ddgs_with_kwargs(self): 39 | result = DuckDuckGoSearchTool(timeout=20)("DeepSeek parent company") 40 | assert isinstance(result, str) 41 | 42 | 43 | class TestPythonInterpreterTool(ToolTesterMixin): 44 | def setup_method(self): 45 | self.tool = PythonInterpreterTool(authorized_imports=["numpy"]) 46 | self.tool.setup() 47 | 48 | def test_exact_match_arg(self): 49 | result = self.tool("(2 / 2) * 4") 50 | assert result == "Stdout:\n\nOutput: 4.0" 51 | 52 | def test_exact_match_kwarg(self): 53 | result = self.tool(code="(2 / 2) * 4") 54 | assert result == "Stdout:\n\nOutput: 4.0" 55 | 56 | def test_agent_type_output(self): 57 | inputs = ["2 * 2"] 58 | output = self.tool(*inputs, sanitize_inputs_outputs=True) 59 | output_type = _AGENT_TYPE_MAPPING[self.tool.output_type] 60 | assert isinstance(output, output_type) 61 | 62 | def test_agent_types_inputs(self): 63 | inputs = ["2 * 2"] 64 | _inputs = [] 65 | 66 | for _input, expected_input in zip(inputs, self.tool.inputs.values()): 67 | input_type = expected_input["type"] 68 | if isinstance(input_type, list): 69 | _inputs.append([_AGENT_TYPE_MAPPING[_input_type](_input) for _input_type in input_type]) 70 | else: 71 | _inputs.append(_AGENT_TYPE_MAPPING[input_type](_input)) 72 | 73 | # Should not raise an error 74 | output = self.tool(*inputs, sanitize_inputs_outputs=True) 75 | output_type = _AGENT_TYPE_MAPPING[self.tool.output_type] 76 | assert isinstance(output, output_type) 77 | 78 | def test_imports_work(self): 79 | result = self.tool("import numpy as np") 80 | assert "import from numpy is not allowed" not in result.lower() 81 | 82 | def test_unauthorized_imports_fail(self): 83 | with pytest.raises(Exception) as e: 84 | self.tool("import sympy as sp") 85 | assert "sympy" in str(e).lower() 86 | 87 | 88 | class TestSpeechToTextTool: 89 | def test_new_instance(self): 90 | from transformers.models.whisper import WhisperForConditionalGeneration, WhisperProcessor 91 | 92 | tool = SpeechToTextTool() 93 | assert tool is not None 94 | assert tool.pre_processor_class == WhisperProcessor 95 | assert tool.model_class == WhisperForConditionalGeneration 96 | 97 | 98 | @pytest.mark.parametrize( 99 | "language, content_type, extract_format, query", 100 | [ 101 | ("en", "summary", "HTML", "Python_(programming_language)"), # English, Summary Mode, HTML format 102 | ("en", "text", "WIKI", "Python_(programming_language)"), # English, Full Text Mode, WIKI format 103 | ("es", "summary", "HTML", "Python_(lenguaje_de_programación)"), # Spanish, Summary Mode, HTML format 104 | ("es", "text", "WIKI", "Python_(lenguaje_de_programación)"), # Spanish, Full Text Mode, WIKI format 105 | ], 106 | ) 107 | def test_wikipedia_search(language, content_type, extract_format, query): 108 | tool = WikipediaSearchTool( 109 | user_agent="TestAgent (test@example.com)", 110 | language=language, 111 | content_type=content_type, 112 | extract_format=extract_format, 113 | ) 114 | 115 | result = tool.forward(query) 116 | 117 | assert isinstance(result, str), "Output should be a string" 118 | assert "✅ **Wikipedia Page:**" in result, "Response should contain Wikipedia page title" 119 | assert "🔗 **Read more:**" in result, "Response should contain Wikipedia page URL" 120 | 121 | if content_type == "summary": 122 | assert len(result.split()) < 1000, "Summary mode should return a shorter text" 123 | if content_type == "text": 124 | assert len(result.split()) > 1000, "Full text mode should return a longer text" 125 | -------------------------------------------------------------------------------- /src/smolagents/cli.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | 4 | # Copyright 2025 The HuggingFace Inc. team. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | import argparse 18 | import os 19 | 20 | from dotenv import load_dotenv 21 | 22 | from smolagents import CodeAgent, HfApiModel, LiteLLMModel, Model, OpenAIServerModel, Tool, TransformersModel 23 | from smolagents.default_tools import TOOL_MAPPING 24 | 25 | 26 | leopard_prompt = "How many seconds would it take for a leopard at full speed to run through Pont des Arts?" 27 | 28 | 29 | def parse_arguments(): 30 | parser = argparse.ArgumentParser(description="Run a CodeAgent with all specified parameters") 31 | parser.add_argument( 32 | "prompt", 33 | type=str, 34 | nargs="?", # Makes it optional 35 | default=leopard_prompt, 36 | help="The prompt to run with the agent", 37 | ) 38 | parser.add_argument( 39 | "--model-type", 40 | type=str, 41 | default="HfApiModel", 42 | help="The model type to use (e.g., HfApiModel, OpenAIServerModel, LiteLLMModel, TransformersModel)", 43 | ) 44 | parser.add_argument( 45 | "--model-id", 46 | type=str, 47 | default="Qwen/Qwen2.5-Coder-32B-Instruct", 48 | help="The model ID to use for the specified model type", 49 | ) 50 | parser.add_argument( 51 | "--imports", 52 | nargs="*", # accepts zero or more arguments 53 | default=[], 54 | help="Space-separated list of imports to authorize (e.g., 'numpy pandas')", 55 | ) 56 | parser.add_argument( 57 | "--tools", 58 | nargs="*", 59 | default=["web_search"], 60 | help="Space-separated list of tools that the agent can use (e.g., 'tool1 tool2 tool3')", 61 | ) 62 | parser.add_argument( 63 | "--verbosity-level", 64 | type=int, 65 | default=1, 66 | help="The verbosity level, as an int in [0, 1, 2].", 67 | ) 68 | group = parser.add_argument_group("api options", "Options for API-based model types") 69 | group.add_argument( 70 | "--api-base", 71 | type=str, 72 | help="The base URL for the model", 73 | ) 74 | group.add_argument( 75 | "--api-key", 76 | type=str, 77 | help="The API key for the model", 78 | ) 79 | return parser.parse_args() 80 | 81 | 82 | def load_model(model_type: str, model_id: str, api_base: str | None = None, api_key: str | None = None) -> Model: 83 | if model_type == "OpenAIServerModel": 84 | return OpenAIServerModel( 85 | api_key=api_key or os.getenv("FIREWORKS_API_KEY"), 86 | api_base=api_base or "https://api.fireworks.ai/inference/v1", 87 | model_id=model_id, 88 | ) 89 | elif model_type == "LiteLLMModel": 90 | return LiteLLMModel( 91 | model_id=model_id, 92 | api_key=api_key, 93 | api_base=api_base, 94 | ) 95 | elif model_type == "TransformersModel": 96 | return TransformersModel(model_id=model_id, device_map="auto") 97 | elif model_type == "HfApiModel": 98 | return HfApiModel( 99 | model_id=model_id, 100 | token=api_key or os.getenv("HF_API_KEY"), 101 | ) 102 | else: 103 | raise ValueError(f"Unsupported model type: {model_type}") 104 | 105 | 106 | def run_smolagent( 107 | prompt: str, 108 | tools: list[str], 109 | model_type: str, 110 | model_id: str, 111 | api_base: str | None = None, 112 | api_key: str | None = None, 113 | imports: list[str] | None = None, 114 | ) -> None: 115 | load_dotenv() 116 | 117 | model = load_model(model_type, model_id, api_base=api_base, api_key=api_key) 118 | 119 | available_tools = [] 120 | for tool_name in tools: 121 | if "/" in tool_name: 122 | available_tools.append(Tool.from_space(tool_name)) 123 | else: 124 | if tool_name in TOOL_MAPPING: 125 | available_tools.append(TOOL_MAPPING[tool_name]()) 126 | else: 127 | raise ValueError(f"Tool {tool_name} is not recognized either as a default tool or a Space.") 128 | 129 | print(f"Running agent with these tools: {tools}") 130 | agent = CodeAgent(tools=available_tools, model=model, additional_authorized_imports=imports) 131 | 132 | agent.run(prompt) 133 | 134 | 135 | def main() -> None: 136 | args = parse_arguments() 137 | run_smolagent( 138 | args.prompt, 139 | args.tools, 140 | args.model_type, 141 | args.model_id, 142 | api_base=args.api_base, 143 | api_key=args.api_key, 144 | imports=args.imports, 145 | ) 146 | 147 | 148 | if __name__ == "__main__": 149 | main() 150 | -------------------------------------------------------------------------------- /tests/test_cli.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import patch 2 | 3 | import pytest 4 | 5 | from smolagents.cli import load_model 6 | from smolagents.local_python_executor import LocalPythonExecutor 7 | from smolagents.models import HfApiModel, LiteLLMModel, OpenAIServerModel, TransformersModel 8 | 9 | 10 | @pytest.fixture 11 | def set_env_vars(monkeypatch): 12 | monkeypatch.setenv("FIREWORKS_API_KEY", "test_fireworks_api_key") 13 | monkeypatch.setenv("HF_TOKEN", "test_hf_api_key") 14 | 15 | 16 | def test_load_model_openai_server_model(set_env_vars): 17 | with patch("openai.OpenAI") as MockOpenAI: 18 | model = load_model("OpenAIServerModel", "test_model_id") 19 | assert isinstance(model, OpenAIServerModel) 20 | assert model.model_id == "test_model_id" 21 | assert MockOpenAI.call_count == 1 22 | assert MockOpenAI.call_args.kwargs["base_url"] == "https://api.fireworks.ai/inference/v1" 23 | assert MockOpenAI.call_args.kwargs["api_key"] == "test_fireworks_api_key" 24 | 25 | 26 | def test_load_model_litellm_model(): 27 | model = load_model("LiteLLMModel", "test_model_id", api_key="test_api_key", api_base="https://api.test.com") 28 | assert isinstance(model, LiteLLMModel) 29 | assert model.api_key == "test_api_key" 30 | assert model.api_base == "https://api.test.com" 31 | assert model.model_id == "test_model_id" 32 | 33 | 34 | def test_load_model_transformers_model(): 35 | with ( 36 | patch( 37 | "transformers.AutoModelForImageTextToText.from_pretrained", 38 | side_effect=ValueError("Unrecognized configuration class"), 39 | ), 40 | patch("transformers.AutoModelForCausalLM.from_pretrained"), 41 | patch("transformers.AutoTokenizer.from_pretrained"), 42 | ): 43 | model = load_model("TransformersModel", "test_model_id") 44 | assert isinstance(model, TransformersModel) 45 | assert model.model_id == "test_model_id" 46 | 47 | 48 | def test_load_model_hf_api_model(set_env_vars): 49 | with patch("huggingface_hub.InferenceClient") as huggingface_hub_InferenceClient: 50 | model = load_model("HfApiModel", "test_model_id") 51 | assert isinstance(model, HfApiModel) 52 | assert model.model_id == "test_model_id" 53 | assert huggingface_hub_InferenceClient.call_count == 1 54 | assert huggingface_hub_InferenceClient.call_args.kwargs["token"] == "test_hf_api_key" 55 | 56 | 57 | def test_load_model_invalid_model_type(): 58 | with pytest.raises(ValueError, match="Unsupported model type: InvalidModel"): 59 | load_model("InvalidModel", "test_model_id") 60 | 61 | 62 | def test_cli_main(capsys): 63 | with patch("smolagents.cli.load_model") as mock_load_model: 64 | mock_load_model.return_value = "mock_model" 65 | with patch("smolagents.cli.CodeAgent") as mock_code_agent: 66 | from smolagents.cli import run_smolagent 67 | 68 | run_smolagent("test_prompt", [], "HfApiModel", "test_model_id") 69 | # load_model 70 | assert len(mock_load_model.call_args_list) == 1 71 | assert mock_load_model.call_args.args == ("HfApiModel", "test_model_id") 72 | assert mock_load_model.call_args.kwargs == {"api_base": None, "api_key": None} 73 | # CodeAgent 74 | assert len(mock_code_agent.call_args_list) == 1 75 | assert mock_code_agent.call_args.args == () 76 | assert mock_code_agent.call_args.kwargs == { 77 | "tools": [], 78 | "model": "mock_model", 79 | "additional_authorized_imports": None, 80 | } 81 | # agent.run 82 | assert len(mock_code_agent.return_value.run.call_args_list) == 1 83 | assert mock_code_agent.return_value.run.call_args.args == ("test_prompt",) 84 | # print 85 | captured = capsys.readouterr() 86 | assert "Running agent with these tools: []" in captured.out 87 | 88 | 89 | def test_vision_web_browser_main(): 90 | with patch("smolagents.vision_web_browser.helium"): 91 | with patch("smolagents.vision_web_browser.load_model") as mock_load_model: 92 | mock_load_model.return_value = "mock_model" 93 | with patch("smolagents.vision_web_browser.CodeAgent") as mock_code_agent: 94 | from smolagents.vision_web_browser import helium_instructions, run_webagent 95 | 96 | run_webagent("test_prompt", "HfApiModel", "test_model_id") 97 | # load_model 98 | assert len(mock_load_model.call_args_list) == 1 99 | assert mock_load_model.call_args.args == ("HfApiModel", "test_model_id") 100 | # CodeAgent 101 | assert len(mock_code_agent.call_args_list) == 1 102 | assert mock_code_agent.call_args.args == () 103 | assert len(mock_code_agent.call_args.kwargs["tools"]) == 4 104 | assert mock_code_agent.call_args.kwargs["model"] == "mock_model" 105 | assert mock_code_agent.call_args.kwargs["additional_authorized_imports"] == ["helium"] 106 | # agent.python_executor 107 | assert len(mock_code_agent.return_value.python_executor.call_args_list) == 1 108 | assert mock_code_agent.return_value.python_executor.call_args.args == ("from helium import *",) 109 | assert LocalPythonExecutor(["helium"])("from helium import *") == (None, "", False) 110 | # agent.run 111 | assert len(mock_code_agent.return_value.run.call_args_list) == 1 112 | assert mock_code_agent.return_value.run.call_args.args == ("test_prompt" + helium_instructions,) 113 | -------------------------------------------------------------------------------- /docs/source/hi/tutorials/secure_code_execution.mdx: -------------------------------------------------------------------------------- 1 | 16 | # सुरक्षित कोड एक्जीक्यूशन 17 | 18 | [[open-in-colab]] 19 | 20 | > [!TIP] 21 | > यदि आप एजेंट्स बनाने में नए हैं, तो सबसे पहले [एजेंट्स का परिचय](../conceptual_guides/intro_agents) और [smolagents की गाइडेड टूर](../guided_tour) पढ़ना सुनिश्चित करें। 22 | 23 | ### कोड Agents 24 | 25 | [कई](https://huggingface.co/papers/2402.01030) [शोध](https://huggingface.co/papers/2411.01747) [पत्रों](https://huggingface.co/papers/2401.00812) ने दिखाया है कि LLM द्वारा अपनी क्रियाओं (टूल कॉल्स) को कोड में लिखना, टूल कॉलिंग के वर्तमान मानक प्रारूप से बहुत बेहतर है, जो industry में "टूल्स नेम्स और आर्ग्यूमेंट्स को JSON के रूप में लिखने" के विभिन्न रूप हैं। 26 | 27 | कोड बेहतर क्यों है? क्योंकि हमने अपनी कोड भाषाओं को विशेष रूप से कंप्यूटर द्वारा की जाने वाली क्रियाओं को व्यक्त करने के लिए तैयार किया है। यदि JSON स्निपेट्स एक बेहतर तरीका होता, तो यह पैकेज JSON स्निपेट्स में लिखा गया होता और शैतान हम पर हंस रहा होता। 28 | 29 | कोड कंप्यूटर पर क्रियाएँ व्यक्त करने का बेहतर तरीका है। इसमें बेहतर है: 30 | - **कंपोज़ेबिलिटी:** क्या आप JSON क्रियाओं को एक-दूसरे के भीतर नेस्ट कर सकते हैं, या बाद में पुन: उपयोग करने के लिए JSON क्रियाओं का एक सेट परिभाषित कर सकते हैं, जैसे आप बस एक पायथन फ़ंक्शन परिभाषित कर सकते हैं? 31 | - **ऑब्जेक्ट प्रबंधन:** JSON में `generate_image` जैसी क्रिया का आउटपुट कैसे स्टोर करें? 32 | - **सामान्यता:** कोड किसी भी कंप्यूटर कार्य को व्यक्त करने के लिए बनाया गया है। 33 | - **LLM प्रशिक्षण कॉर्पस में प्रतिनिधित्व:** क्यों न इस आशीर्वाद का लाभ उठाएं कि उच्च गुणवत्ता वाले कोड उदाहरण पहले से ही LLM प्रशिक्षण डेटा में शामिल हैं? 34 | 35 | यह नीचे दी गई छवि में दर्शाया गया है, जो [Executable Code Actions Elicit Better LLM Agents](https://huggingface.co/papers/2402.01030) से ली गई है। 36 | 37 | 38 | 39 | यही कारण है कि हमने कोड एजेंट्स, इस मामले में पायथन एजेंट्स पर जोर दिया, जिसका मतलब सुरक्षित पायथन इंटरप्रेटर बनाने पर अधिक प्रयास करना था। 40 | 41 | ### लोकल पायथन इंटरप्रेटर 42 | 43 | डिफ़ॉल्ट रूप से, `CodeAgent` LLM-जनरेटेड कोड को आपके एनवायरनमेंट में चलाता है। 44 | यह एक्जीक्यूशन वैनिला पायथन इंटरप्रेटर द्वारा नहीं किया जाता: हमने एक अधिक सुरक्षित `LocalPythonExecutor` को शुरू से फिर से बनाया है। 45 | यह इंटरप्रेटर सुरक्षा के लिए डिज़ाइन किया गया है: 46 | - इम्पोर्ट्स को उपयोगकर्ता द्वारा स्पष्ट रूप से पास की गई सूची तक सीमित करना 47 | - इनफिनिट लूप्स और रिसोर्स ब्लोटिंग को रोकने के लिए ऑपरेशंस की संख्या को कैप करना 48 | - कोई भी ऐसा ऑपरेशन नहीं करेगा जो पूर्व-परिभाषित नहीं है 49 | 50 | हमने इसे कई उपयोग मामलों में इस्तेमाल किया है, और कभी भी एनवायरनमेंट को कोई नुकसान नहीं देखा। 51 | 52 | हालांकि यह समाधान पूरी तरह से सुरक्षित नहीं है: कोई ऐसे अवसरों की कल्पना कर सकता है जहां दुर्भावनापूर्ण कार्यों के लिए फाइन-ट्यून किए गए LLM अभी भी आपके एनवायरनमेंट को नुकसान पहुंचा सकते हैं। उदाहरण के लिए यदि आपने छवियों को प्रोसेस करने के लिए `Pillow` जैसे मासूम पैकेज की अनुमति दी है, तो LLM आपकी हार्ड ड्राइव को ब्लोट करने के लिए हजारों छवियों को सेव कर सकता है। 53 | यदि आपने खुद LLM इंजन चुना है तो यह निश्चित रूप से संभावित नहीं है, लेकिन यह हो सकता है। 54 | 55 | तो यदि आप अतिरिक्त सावधानी बरतना चाहते हैं, तो आप नीचे वर्णित रिमोट कोड एक्जीक्यूशन विकल्प का उपयोग कर सकते हैं। 56 | 57 | ### E2B कोड एक्जीक्यूटर 58 | 59 | अधिकतम सुरक्षा के लिए, आप कोड को सैंडबॉक्स्ड एनवायरनमेंट में चलाने के लिए E2B के साथ हमारे एकीकरण का उपयोग कर सकते हैं। यह एक रिमोट एक्जीक्यूशन सेवा है जो आपके कोड को एक आइसोलेटेड कंटेनर में चलाती है, जिससे कोड का आपके स्थानीय एनवायरनमेंट को प्रभावित करना असंभव हो जाता है। 60 | 61 | इसके लिए, आपको अपना E2B अकाउंट सेटअप करने और अपने एनवायरनमेंट वेरिएबल्स में अपना `E2B_API_KEY` सेट करने की आवश्यकता होगी। अधिक जानकारी के लिए [E2B की क्विकस्टार्ट डॉक्यूमेंटेशन](https://e2b.dev/docs/quickstart) पर जाएं। 62 | 63 | फिर आप इसे `pip install e2b-code-interpreter python-dotenv` के साथ इंस्टॉल कर सकते हैं। 64 | 65 | अब आप तैयार हैं! 66 | 67 | कोड एक्जीक्यूटर को E2B पर सेट करने के लिए, बस अपने `CodeAgent` को इनिशियलाइज़ करते समय `executor_type="e2b"` फ्लैग पास करें। 68 | ध्यान दें कि आपको `additional_authorized_imports` में सभी टूल की डिपेंडेंसीज़ जोड़नी चाहिए, ताकि एक्जीक्यूटर उन्हें इंस्टॉल करे। 69 | 70 | ```py 71 | from smolagents import CodeAgent, VisitWebpageTool, HfApiModel 72 | agent = CodeAgent( 73 | tools = [VisitWebpageTool()], 74 | model=HfApiModel(), 75 | additional_authorized_imports=["requests", "markdownify"], 76 | executor_type="e2b" 77 | ) 78 | 79 | agent.run("What was Abraham Lincoln's preferred pet?") 80 | ``` 81 | 82 | E2B कोड एक्जीक्यूशन वर्तमान में मल्टी-एजेंट्स के साथ काम नहीं करता है - क्योंकि कोड ब्लॉब में एक एजेंट कॉल करना जो रिमोटली एक्जीक्यूट किया जाना चाहिए, यह एक गड़बड़ है। लेकिन हम इसे जोड़ने पर काम कर रहे हैं! 83 | -------------------------------------------------------------------------------- /docs/source/zh/reference/models.mdx: -------------------------------------------------------------------------------- 1 | 2 | 17 | # 模型 18 | 19 | 20 | 21 | Smolagents 是一个实验性 API,其可能会随时发生更改。由于 API 或底层模型可能会变化,智能体返回的结果可能会有所不同。 22 | 23 | 24 | 25 | 要了解有关智能体和工具的更多信息,请务必阅读[入门指南](../index)。此页面包含底层类的 API 文档。 26 | 27 | ## 模型 28 | 29 | 您可以自由创建和使用自己的模型为智能体提供支持。 30 | 31 | 您可以使用任何 `model` 可调用对象作为智能体的模型,只要满足以下条件: 32 | 1. 它遵循[消息格式](./chat_templating)(`List[Dict[str, str]]`),将其作为输入 `messages`,并返回一个 `str`。 33 | 2. 它在生成的序列到达 `stop_sequences` 参数中指定的内容之前停止生成输出。 34 | 35 | 要定义您的 LLM,可以创建一个 `custom_model` 方法,该方法接受一个 [messages](./chat_templating) 列表,并返回一个包含 `.content` 属性的对象,其中包含生成的文本。此可调用对象还需要接受一个 `stop_sequences` 参数,用于指示何时停止生成。 36 | 37 | ```python 38 | from huggingface_hub import login, InferenceClient 39 | 40 | login("") 41 | 42 | model_id = "meta-llama/Llama-3.3-70B-Instruct" 43 | 44 | client = InferenceClient(model=model_id) 45 | 46 | def custom_model(messages, stop_sequences=["Task"]): 47 | response = client.chat_completion(messages, stop=stop_sequences, max_tokens=1000) 48 | answer = response.choices[0].message 49 | return answer 50 | ``` 51 | 52 | 此外,`custom_model` 还可以接受一个 `grammar` 参数。如果在智能体初始化时指定了 `grammar`,则此参数将在调用模型时传递,以便进行[约束生成](https://huggingface.co/docs/text-generation-inference/conceptual/guidance),从而强制生成格式正确的智能体输出。 53 | 54 | ### TransformersModel 55 | 56 | 为了方便起见,我们添加了一个 `TransformersModel`,该模型通过为初始化时指定的 `model_id` 构建一个本地 `transformers` pipeline 来实现上述功能。 57 | 58 | ```python 59 | from smolagents import TransformersModel 60 | 61 | model = TransformersModel(model_id="HuggingFaceTB/SmolLM-135M-Instruct") 62 | 63 | print(model([{"role": "user", "content": [{"type": "text", "text": "Ok!"}]}], stop_sequences=["great"])) 64 | ``` 65 | ```text 66 | >>> What a 67 | ``` 68 | 69 | > [!TIP] 70 | > 您必须在机器上安装 `transformers` 和 `torch`。如果尚未安装,请运行 `pip install smolagents[transformers]`。 71 | 72 | [[autodoc]] TransformersModel 73 | 74 | ### HfApiModel 75 | 76 | `HfApiModel` 封装了 huggingface_hub 的 [InferenceClient](https://huggingface.co/docs/huggingface_hub/main/en/guides/inference),用于执行 LLM。它支持 HF 的 [Inference API](https://huggingface.co/docs/api-inference/index) 以及 Hub 上所有可用的[Inference Providers](https://huggingface.co/blog/inference-providers)。 77 | 78 | ```python 79 | from smolagents import HfApiModel 80 | 81 | messages = [ 82 | {"role": "user", "content": [{"type": "text", "text": "Hello, how are you?"}]} 83 | ] 84 | 85 | model = HfApiModel() 86 | print(model(messages)) 87 | ``` 88 | ```text 89 | >>> Of course! If you change your mind, feel free to reach out. Take care! 90 | ``` 91 | [[autodoc]] HfApiModel 92 | 93 | ### LiteLLMModel 94 | 95 | `LiteLLMModel` 利用 [LiteLLM](https://www.litellm.ai/) 支持来自不同提供商的 100+ 个 LLM。您可以在模型初始化时传递 `kwargs`,这些参数将在每次使用模型时被使用,例如下面的示例中传递了 `temperature`。 96 | 97 | ```python 98 | from smolagents import LiteLLMModel 99 | 100 | messages = [ 101 | {"role": "user", "content": [{"type": "text", "text": "Hello, how are you?"}]} 102 | ] 103 | 104 | model = LiteLLMModel(model_id="anthropic/claude-3-5-sonnet-latest", temperature=0.2, max_tokens=10) 105 | print(model(messages)) 106 | ``` 107 | 108 | [[autodoc]] LiteLLMModel 109 | 110 | ### OpenAIServerModel 111 | 112 | 此类允许您调用任何 OpenAIServer 兼容模型。 113 | 以下是设置方法(您可以自定义 `api_base` URL 指向其他服务器): 114 | ```py 115 | import os 116 | from smolagents import OpenAIServerModel 117 | 118 | model = OpenAIServerModel( 119 | model_id="gpt-4o", 120 | api_base="https://api.openai.com/v1", 121 | api_key=os.environ["OPENAI_API_KEY"], 122 | ) 123 | ``` 124 | 125 | [[autodoc]] OpenAIServerModel 126 | 127 | ### AzureOpenAIServerModel 128 | 129 | `AzureOpenAIServerModel` 允许您连接到任何 Azure OpenAI 部署。 130 | 131 | 下面是设置示例,请注意,如果已经设置了相应的环境变量,您可以省略 `azure_endpoint`、`api_key` 和 `api_version` 参数——环境变量包括 `AZURE_OPENAI_ENDPOINT`、`AZURE_OPENAI_API_KEY` 和 `OPENAI_API_VERSION`。 132 | 133 | 请注意,`OPENAI_API_VERSION` 没有 `AZURE_` 前缀,这是由于底层 [openai](https://github.com/openai/openai-python) 包的设计所致。 134 | 135 | ```py 136 | import os 137 | 138 | from smolagents import AzureOpenAIServerModel 139 | 140 | model = AzureOpenAIServerModel( 141 | model_id = os.environ.get("AZURE_OPENAI_MODEL"), 142 | azure_endpoint=os.environ.get("AZURE_OPENAI_ENDPOINT"), 143 | api_key=os.environ.get("AZURE_OPENAI_API_KEY"), 144 | api_version=os.environ.get("OPENAI_API_VERSION") 145 | ) 146 | ``` 147 | 148 | [[autodoc]] AzureOpenAIServerModel 149 | 150 | ### MLXModel 151 | 152 | ```python 153 | from smolagents import MLXModel 154 | 155 | model = MLXModel(model_id="HuggingFaceTB/SmolLM-135M-Instruct") 156 | 157 | print(model([{"role": "user", "content": "Ok!"}], stop_sequences=["great"])) 158 | ``` 159 | ```text 160 | >>> What a 161 | ``` 162 | 163 | > [!TIP] 164 | > 您必须在机器上安装 `mlx-lm`。如果尚未安装,请运行 `pip install smolagents[mlx-lm]`。 165 | 166 | [[autodoc]] MLXModel 167 | -------------------------------------------------------------------------------- /docs/source/zh/conceptual_guides/intro_agents.mdx: -------------------------------------------------------------------------------- 1 | 16 | 17 | # Agent 简介 18 | 19 | > [!TIP] 20 | > 译者注:Agent 的业内术语是“智能体”。本译文将保留 agent,不作翻译,以带来更高效的阅读体验。(在中文为主的文章中,It's easier to 注意到英文。Attention Is All You Need!) 21 | 22 | ## 🤔 什么是 agent? 23 | 24 | 任何使用 AI 的高效系统都需要为 LLM 提供某种访问现实世界的方式:例如调用搜索工具获取外部信息,或者操作某些程序以完成任务。换句话说,LLM 应该具有 **_Agent 能力_**。Agent 程序是 LLM 通往外部世界的门户。 25 | 26 | > [!TIP] 27 | > AI agent 是 **LLM 输出控制工作流的程序**。 28 | 29 | 任何利用 LLM 的系统都会将 LLM 输出集成到代码中。LLM 输入对代码工作流的影响程度就是 LLM 在系统中的 agent 能力级别。 30 | 31 | 请注意,根据这个定义,"Agent" 不是一个离散的、非 0 即 1 的定义:相反,"Agent 能力" 是一个连续谱系,随着你在工作流中给予 LLM 更多或更少的权力而变化。 32 | 33 | 请参见下表中 agent 能力在不同系统中的变化: 34 | 35 | | Agent 能力级别 | 描述 | 名称 | 示例模式 | 36 | | ------------ | ---------------------------------------------- | ---------- | -------------------------------------------------- | 37 | | ☆☆☆ | LLM 输出对程序流程没有影响 | 简单处理器 | `process_llm_output(llm_response)` | 38 | | ★☆☆ | LLM 输出决定 if/else 分支 | 路由 | `if llm_decision(): path_a() else: path_b()` | 39 | | ★★☆ | LLM 输出决定函数执行 | 工具调用者 | `run_function(llm_chosen_tool, llm_chosen_args)` | 40 | | ★★★ | LLM 输出控制迭代和程序继续 | 多步 Agent | `while llm_should_continue(): execute_next_step()` | 41 | | ★★★ | 一个 agent 工作流可以启动另一个 agent 工作流 | 多 Agent | `if llm_trigger(): execute_agent()` | 42 | 43 | 多步 agent 具有以下代码结构: 44 | 45 | ```python 46 | memory = [user_defined_task] 47 | while llm_should_continue(memory): # 这个循环是多步部分 48 | action = llm_get_next_action(memory) # 这是工具调用部分 49 | observations = execute_action(action) 50 | memory += [action, observations] 51 | ``` 52 | 53 | 这个 agent 系统在一个循环中运行,每一步执行一个新动作(该动作可能涉及调用一些预定义的 *工具*,这些工具只是函数),直到其观察结果表明已达到解决给定任务的满意状态。以下是一个多步 agent 如何解决简单数学问题的示例: 54 | 55 |
56 | 57 |
58 | 59 | ## ✅ 何时使用 agent / ⛔ 何时避免使用 60 | 61 | 当你需要 LLM 确定应用程序的工作流时,agent 很有用。但它们通常有些过度。问题是:我真的需要工作流的灵活性来有效解决手头的任务吗? 62 | 如果预定义的工作流经常不足,这意味着你需要更多的灵活性。 63 | 让我们举个例子:假设你正在开发一个处理冲浪旅行网站客户请求的应用程序。 64 | 65 | 你可以提前知道请求将属于 2 个类别之一(基于用户选择),并且你为这 2 种情况都有预定义的工作流。 66 | 67 | 1. 想要了解旅行信息?⇒ 给他们访问搜索栏以搜索你的知识库 68 | 2. 想与销售交谈?⇒ 让他们填写联系表单。 69 | 70 | 如果这个确定性工作流适合所有查询,那就直接编码吧!这将为你提供一个 100% 可靠的系统,没有让不可预测的 LLM 干扰你的工作流而引入错误的风险。为了简单和稳健起见,建议规范化不使用任何 agent 行为。 71 | 72 | 但如果工作流不能提前确定得那么好呢? 73 | 74 | 例如,用户想问:`"I can come on Monday, but I forgot my passport so risk being delayed to Wednesday, is it possible to take me and my stuff to surf on Tuesday morning, with a cancellation insurance?"` 这个问题涉及许多因素,可能上述预定的标准都不足以满足这个请求。 75 | 76 | 如果预定义的工作流经常不足,这意味着你需要更多的灵活性。 77 | 78 | 这就是 agent 设置发挥作用的地方。 79 | 80 | 在上面的例子中,你可以创建一个多步 agent,它可以访问天气 API 获取天气预报,Google Maps API 计算旅行距离,员工在线仪表板和你的知识库上的 RAG 系统。 81 | 82 | 直到最近,计算机程序还局限于预定义的工作流,试图通过堆积 if/else 分支来处理复杂性。它们专注于极其狭窄的任务,如"计算这些数字的总和"或"找到这个图中的最短路径"。但实际上,大多数现实生活中的任务,如我们上面的旅行示例,都不适合预定义的工作流。agent 系统为程序打开了现实世界任务的大门! 83 | 84 | ## 为什么选择 `smolagents`? 85 | 86 | 对于一些低级的 agent 用例,如链或路由器,你可以自己编写所有代码。这样会更好,因为它可以让你更好地控制和理解你的系统。 87 | 88 | 但一旦你开始追求更复杂的行为,比如让 LLM 调用函数(即"工具调用")或让 LLM 运行 while 循环("多步 agent"),一些抽象就变得必要: 89 | 90 | - 对于工具调用,你需要解析 agent 的输出,因此这个输出需要一个预定义的格式,如"Thought: I should call tool 'get_weather'. Action: get_weather(Paris).",你用预定义的函数解析它,并且给 LLM 的系统提示应该通知它这个格式。 91 | - 对于 LLM 输出决定循环的多步 agent,你需要根据上次循环迭代中发生的情况给 LLM 不同的提示:所以你需要某种记忆能力。 92 | 93 | 看到了吗?通过这两个例子,我们已经发现需要一些项目来帮助我们: 94 | 95 | - 当然,一个作为系统引擎的 LLM 96 | - agent 可以访问的工具列表 97 | - 从 LLM 输出中提取工具调用的解析器 98 | - 与解析器同步的系统提示 99 | - 记忆能力 100 | 101 | 但是等等,既然我们给 LLM 在决策中留出了空间,它们肯定会犯错误:所以我们需要错误日志记录和重试机制。 102 | 103 | 所有这些元素都需要紧密耦合才能形成一个功能良好的系统。这就是为什么我们决定需要制作基本构建块来让所有这些东西协同工作。 104 | 105 | ## 代码 agent 106 | 107 | 在多步 agent 中,每一步 LLM 都可以编写一个动作,形式为调用外部工具。编写这些动作的常见格式(由 Anthropic、OpenAI 等使用)通常是"将动作编写为工具名称和要使用的参数的 JSON,然后解析以知道要执行哪个工具以及使用哪些参数"的不同变体。 108 | 109 | [多项](https://huggingface.co/papers/2402.01030) [研究](https://huggingface.co/papers/2411.01747) [论文](https://huggingface.co/papers/2401.00812) 表明,在代码中进行工具调用的 LLM 要好得多。 110 | 111 | 原因很简单,_我们专门设计了我们的代码语言,使其成为表达计算机执行动作的最佳方式_。如果 JSON 片段是更好的表达方式,JSON 将成为顶级编程语言,编程将变得非常困难。 112 | 113 | 下图取自 [Executable Code Actions Elicit Better LLM Agents](https://huggingface.co/papers/2402.01030),说明了用代码编写动作的一些优势: 114 | 115 | 116 | 117 | 与 JSON 片段相比,用代码编写动作提供了更好的: 118 | 119 | - **可组合性:** 你能像定义 python 函数一样,将 JSON 动作嵌套在一起,或定义一组 JSON 动作以供重用吗? 120 | - **对象管理:** 你如何在 JSON 中存储像 `generate_image` 这样的动作的输出? 121 | - **通用性:** 代码被构建为简单地表达任何你可以让计算机做的事情。 122 | - **LLM 训练数据中的表示:** 大量高质量的代码动作已经包含在 LLM 的训练数据中,这意味着它们已经为此进行了训练! 123 | -------------------------------------------------------------------------------- /tests/test_gradio_ui.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2024 HuggingFace Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import os 17 | import shutil 18 | import tempfile 19 | import unittest 20 | from unittest.mock import Mock, patch 21 | 22 | from smolagents.gradio_ui import GradioUI 23 | 24 | 25 | class GradioUITester(unittest.TestCase): 26 | def setUp(self): 27 | """Initialize test environment""" 28 | self.temp_dir = tempfile.mkdtemp() 29 | self.mock_agent = Mock() 30 | self.ui = GradioUI(agent=self.mock_agent, file_upload_folder=self.temp_dir) 31 | self.allowed_types = [".pdf", ".docx", ".txt"] 32 | 33 | def tearDown(self): 34 | """Clean up test environment""" 35 | shutil.rmtree(self.temp_dir) 36 | 37 | def test_upload_file_default_types(self): 38 | """Test default allowed file types""" 39 | default_types = [".pdf", ".docx", ".txt"] 40 | for file_type in default_types: 41 | with tempfile.NamedTemporaryFile(suffix=file_type) as temp_file: 42 | mock_file = Mock() 43 | mock_file.name = temp_file.name 44 | 45 | textbox, uploads_log = self.ui.upload_file(mock_file, []) 46 | 47 | self.assertIn("File uploaded:", textbox.value) 48 | self.assertEqual(len(uploads_log), 1) 49 | self.assertTrue(os.path.exists(os.path.join(self.temp_dir, os.path.basename(temp_file.name)))) 50 | 51 | def test_upload_file_default_types_disallowed(self): 52 | """Test default disallowed file types""" 53 | disallowed_types = [".exe", ".sh", ".py", ".jpg"] 54 | for file_type in disallowed_types: 55 | with tempfile.NamedTemporaryFile(suffix=file_type) as temp_file: 56 | mock_file = Mock() 57 | mock_file.name = temp_file.name 58 | 59 | textbox, uploads_log = self.ui.upload_file(mock_file, []) 60 | 61 | self.assertEqual(textbox.value, "File type disallowed") 62 | self.assertEqual(len(uploads_log), 0) 63 | 64 | def test_upload_file_success(self): 65 | """Test successful file upload scenario""" 66 | with tempfile.NamedTemporaryFile(suffix=".txt") as temp_file: 67 | mock_file = Mock() 68 | mock_file.name = temp_file.name 69 | 70 | textbox, uploads_log = self.ui.upload_file(mock_file, []) 71 | 72 | self.assertIn("File uploaded:", textbox.value) 73 | self.assertEqual(len(uploads_log), 1) 74 | self.assertTrue(os.path.exists(os.path.join(self.temp_dir, os.path.basename(temp_file.name)))) 75 | self.assertEqual(uploads_log[0], os.path.join(self.temp_dir, os.path.basename(temp_file.name))) 76 | 77 | def test_upload_file_none(self): 78 | """Test scenario when no file is selected""" 79 | textbox, uploads_log = self.ui.upload_file(None, []) 80 | 81 | self.assertEqual(textbox.value, "No file uploaded") 82 | self.assertEqual(len(uploads_log), 0) 83 | 84 | def test_upload_file_invalid_type(self): 85 | """Test disallowed file type""" 86 | with tempfile.NamedTemporaryFile(suffix=".exe") as temp_file: 87 | mock_file = Mock() 88 | mock_file.name = temp_file.name 89 | 90 | textbox, uploads_log = self.ui.upload_file(mock_file, []) 91 | 92 | self.assertEqual(textbox.value, "File type disallowed") 93 | self.assertEqual(len(uploads_log), 0) 94 | 95 | def test_upload_file_special_chars(self): 96 | """Test scenario with special characters in filename""" 97 | with tempfile.NamedTemporaryFile(suffix=".txt") as temp_file: 98 | # Create a new temporary file with special characters 99 | special_char_name = os.path.join(os.path.dirname(temp_file.name), "test@#$%^&*.txt") 100 | shutil.copy(temp_file.name, special_char_name) 101 | try: 102 | mock_file = Mock() 103 | mock_file.name = special_char_name 104 | 105 | with patch("shutil.copy"): 106 | textbox, uploads_log = self.ui.upload_file(mock_file, []) 107 | 108 | self.assertIn("File uploaded:", textbox.value) 109 | self.assertEqual(len(uploads_log), 1) 110 | self.assertIn("test_____", uploads_log[0]) 111 | finally: 112 | # Clean up the special character file 113 | if os.path.exists(special_char_name): 114 | os.remove(special_char_name) 115 | 116 | def test_upload_file_custom_types(self): 117 | """Test custom allowed file types""" 118 | with tempfile.NamedTemporaryFile(suffix=".csv") as temp_file: 119 | mock_file = Mock() 120 | mock_file.name = temp_file.name 121 | 122 | textbox, uploads_log = self.ui.upload_file(mock_file, [], allowed_file_types=[".csv"]) 123 | 124 | self.assertIn("File uploaded:", textbox.value) 125 | self.assertEqual(len(uploads_log), 1) 126 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | # Contribute to smolagents 18 | 19 | Everyone is welcome to contribute, and we value everybody's contribution. Code 20 | contributions are not the only way to help the community. Answering questions, helping 21 | others, and improving the documentation are also immensely valuable. 22 | 23 | It also helps us if you spread the word! Reference the library in blog posts 24 | about the awesome projects it made possible, shout out on Twitter every time it has 25 | helped you, or simply ⭐️ the repository to say thank you. 26 | 27 | However you choose to contribute, please be mindful and respect our 28 | [code of conduct](https://github.com/huggingface/smolagents/blob/main/CODE_OF_CONDUCT.md). 29 | 30 | **This guide was heavily inspired by the awesome [scikit-learn guide to contributing](https://github.com/scikit-learn/scikit-learn/blob/main/CONTRIBUTING.md).** 31 | 32 | ## Ways to contribute 33 | 34 | There are several ways you can contribute to smolagents. 35 | 36 | * Submit issues related to bugs or desired new features. 37 | * Contribute to the examples or to the documentation. 38 | * Fix outstanding issues with the existing code. 39 | 40 | > All contributions are equally valuable to the community. 🥰 41 | 42 | ## Submitting a bug-related issue or feature request 43 | 44 | At any moment, feel welcome to open an issue, citing your exact error traces and package versions if it's a bug. 45 | It's often even better to open a PR with your proposed fixes/changes! 46 | 47 | Do your best to follow these guidelines when submitting a bug-related issue or a feature 48 | request. It will make it easier for us to come back to you quickly and with good 49 | feedback. 50 | 51 | ### Did you find a bug? 52 | 53 | The smolagents library is robust and reliable thanks to users who report the problems they encounter. 54 | 55 | Before you report an issue, we would really appreciate it if you could **make sure the bug was not 56 | already reported** (use the search bar on GitHub under Issues). Your issue should also be related to bugs in the 57 | library itself, and not your code. 58 | 59 | Once you've confirmed the bug hasn't already been reported, please include the following information in your issue so 60 | we can quickly resolve it: 61 | 62 | * Your **OS type and version**, as well as your environment versions (versions of rust, python, and dependencies). 63 | * A short, self-contained, code snippet that allows us to reproduce the bug. 64 | * The *full* traceback if an exception is raised. 65 | * Attach any other additional information, like screenshots, you think may help. 66 | 67 | ### Do you want a new feature? 68 | 69 | If there is a new feature you'd like to see in smolagents, please open an issue and describe: 70 | 71 | 1. What is the *motivation* behind this feature? Is it related to a problem or frustration with the library? Is it 72 | a feature related to something you need for a project? Is it something you worked on and think it could benefit 73 | the community? 74 | 75 | Whatever it is, we'd love to hear about it! 76 | 77 | 2. Describe your requested feature in as much detail as possible. The more you can tell us about it, the better 78 | we'll be able to help you. 79 | 3. Provide a *code snippet* that demonstrates the feature's usage. 80 | 4. If the feature is related to a paper, please include a link. 81 | 82 | If your issue is well written we're already 80% of the way there by the time you create it. 83 | 84 | ## Do you want to add documentation? 85 | 86 | We're always looking for improvements to the documentation that make it more clear and accurate. Please let us know 87 | how the documentation can be improved such as typos and any content that is missing, unclear or inaccurate. We'll be 88 | happy to make the changes or help you make a contribution if you're interested! 89 | 90 | ## Fixing outstanding issues 91 | 92 | If you notice an issue with the existing code and have a fix in mind, feel free to [start contributing](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request) and open 93 | a Pull Request! 94 | 95 | ### Making code changes 96 | 97 | To install dev dependencies, run: 98 | ``` 99 | pip install -e ".[dev]" 100 | ``` 101 | 102 | When making changes to the codebase, please check that it follows the repo's code quality requirements by running: 103 | To check code quality of the source code: 104 | ``` 105 | make quality 106 | ``` 107 | 108 | If the checks fail, you can run the formatter with: 109 | ``` 110 | make style 111 | ``` 112 | 113 | And commit the changes. 114 | 115 | To run tests locally, run this command: 116 | ```bash 117 | make test 118 | ``` 119 | 120 | 121 | ## I want to become a maintainer of the project. How do I get there? 122 | 123 | smolagents is a project led and managed by Hugging Face. We are more than 124 | happy to have motivated individuals from other organizations join us as maintainers with the goal of helping smolagents 125 | make a dent in the world of Agents. 126 | 127 | If you are such an individual (or organization), please reach out to us and let's collaborate. 128 | -------------------------------------------------------------------------------- /docs/source/zh/examples/rag.mdx: -------------------------------------------------------------------------------- 1 | 16 | # Agentic RAG 17 | 18 | [[open-in-colab]] 19 | 20 | Retrieval-Augmented-Generation (RAG) 是“使用大语言模型(LLM)来回答用户查询,但基于从知识库中检索的信息”。它比使用普通或微调的 LLM 具有许多优势:举几个例子,它允许将答案基于真实事实并减少虚构;它允许提供 LLM 领域特定的知识;并允许对知识库中的信息访问进行精细控制。 21 | 22 | 但是,普通的 RAG 存在一些局限性,以下两点尤为突出: 23 | 24 | - 它只执行一次检索步骤:如果结果不好,生成的内容也会不好。 25 | - 语义相似性是以用户查询为参考计算的,这可能不是最优的:例如,用户查询通常是一个问题,而包含真实答案的文档通常是肯定语态,因此其相似性得分会比其他以疑问形式呈现的源文档低,从而导致错失相关信息的风险。 26 | 27 | 我们可以通过制作一个 RAG agent来缓解这些问题:非常简单,一个配备了检索工具的agent!这个 agent 将 28 | 会:✅ 自己构建查询和检索,✅ 如果需要的话会重新检索。 29 | 30 | 因此,它将比普通 RAG 更智能,因为它可以自己构建查询,而不是直接使用用户查询作为参考。这样,它可以更 31 | 接近目标文档,从而提高检索的准确性, [HyDE](https://huggingface.co/papers/2212.10496)。此 agent 可以 32 | 使用生成的片段,并在需要时重新检索,就像 [Self-Query](https://docs.llamaindex.ai/en/stable/examples/evaluation/RetryQuery/)。 33 | 34 | 我们现在开始构建这个系统. 🛠️ 35 | 36 | 运行以下代码以安装所需的依赖包: 37 | ```bash 38 | !pip install smolagents pandas langchain langchain-community sentence-transformers rank_bm25 --upgrade -q 39 | ``` 40 | 41 | 你需要一个有效的 token 作为环境变量 `HF_TOKEN` 来调用 HF Inference API。我们使用 python-dotenv 来加载它。 42 | ```py 43 | from dotenv import load_dotenv 44 | load_dotenv() 45 | ``` 46 | 47 | 我们首先加载一个知识库以在其上执行 RAG:此数据集是许多 Hugging Face 库的文档页面的汇编,存储为 markdown 格式。我们将仅保留 `transformers` 库的文档。然后通过处理数据集并将其存储到向量数据库中,为检索器准备知识库。我们将使用 [LangChain](https://python.langchain.com/docs/introduction/) 来利用其出色的向量数据库工具。 48 | ```py 49 | import datasets 50 | from langchain.docstore.document import Document 51 | from langchain.text_splitter import RecursiveCharacterTextSplitter 52 | from langchain_community.retrievers import BM25Retriever 53 | 54 | knowledge_base = datasets.load_dataset("m-ric/huggingface_doc", split="train") 55 | knowledge_base = knowledge_base.filter(lambda row: row["source"].startswith("huggingface/transformers")) 56 | 57 | source_docs = [ 58 | Document(page_content=doc["text"], metadata={"source": doc["source"].split("/")[1]}) 59 | for doc in knowledge_base 60 | ] 61 | 62 | text_splitter = RecursiveCharacterTextSplitter( 63 | chunk_size=500, 64 | chunk_overlap=50, 65 | add_start_index=True, 66 | strip_whitespace=True, 67 | separators=["\n\n", "\n", ".", " ", ""], 68 | ) 69 | docs_processed = text_splitter.split_documents(source_docs) 70 | ``` 71 | 72 | 现在文档已准备好。我们来一起构建我们的 agent RAG 系统! 73 | 👉 我们只需要一个 RetrieverTool,我们的 agent 可以利用它从知识库中检索信息。 74 | 75 | 由于我们需要将 vectordb 添加为工具的属性,我们不能简单地使用带有 `@tool` 装饰器的简单工具构造函数:因此我们将遵循 [tools 教程](../tutorials/tools) 中突出显示的高级设置。 76 | 77 | ```py 78 | from smolagents import Tool 79 | 80 | class RetrieverTool(Tool): 81 | name = "retriever" 82 | description = "Uses semantic search to retrieve the parts of transformers documentation that could be most relevant to answer your query." 83 | inputs = { 84 | "query": { 85 | "type": "string", 86 | "description": "The query to perform. This should be semantically close to your target documents. Use the affirmative form rather than a question.", 87 | } 88 | } 89 | output_type = "string" 90 | 91 | def __init__(self, docs, **kwargs): 92 | super().__init__(**kwargs) 93 | self.retriever = BM25Retriever.from_documents( 94 | docs, k=10 95 | ) 96 | 97 | def forward(self, query: str) -> str: 98 | assert isinstance(query, str), "Your search query must be a string" 99 | 100 | docs = self.retriever.invoke( 101 | query, 102 | ) 103 | return "\nRetrieved documents:\n" + "".join( 104 | [ 105 | f"\n\n===== Document {str(i)} =====\n" + doc.page_content 106 | for i, doc in enumerate(docs) 107 | ] 108 | ) 109 | 110 | retriever_tool = RetrieverTool(docs_processed) 111 | ``` 112 | BM25 检索方法是一个经典的检索方法,因为它的设置速度非常快。为了提高检索准确性,你可以使用语义搜索,使用文档的向量表示替换 BM25:因此你可以前往 [MTEB Leaderboard](https://huggingface.co/spaces/mteb/leaderboard) 选择一个好的嵌入模型。 113 | 114 | 现在我们已经创建了一个可以从知识库中检索信息的工具,现在我们可以很容易地创建一个利用这个 115 | `retriever_tool` 的 agent!此 agent 将使用如下参数初始化: 116 | - `tools`:代理将能够调用的工具列表。 117 | - `model`:为代理提供动力的 LLM。 118 | 119 | 我们的 `model` 必须是一个可调用对象,它接受一个消息的 list 作为输入,并返回文本。它还需要接受一个 stop_sequences 参数,指示何时停止生成。为了方便起见,我们直接使用包中提供的 `HfEngine` 类来获取调用 Hugging Face 的 Inference API 的 LLM 引擎。 120 | 121 | 接着,我们将使用 [meta-llama/Llama-3.3-70B-Instruct](meta-llama/Llama-3.3-70B-Instruct) 作为 llm 引 122 | 擎,因为: 123 | - 它有一个长 128k 上下文,这对处理长源文档很有用。 124 | - 它在 HF 的 Inference API 上始终免费提供! 125 | 126 | _Note:_ 此 Inference API 托管基于各种标准的模型,部署的模型可能会在没有事先通知的情况下进行更新或替换。了解更多信息,请点击[这里](https://huggingface.co/docs/api-inference/supported-models)。 127 | 128 | ```py 129 | from smolagents import HfApiModel, CodeAgent 130 | 131 | agent = CodeAgent( 132 | tools=[retriever_tool], model=HfApiModel(model_id="meta-llama/Llama-3.3-70B-Instruct"), max_steps=4, verbose=True 133 | ) 134 | ``` 135 | 136 | 当我们初始化 CodeAgent 时,它已经自动获得了一个默认的系统提示,告诉 LLM 引擎按步骤处理并生成工具调用作为代码片段,但你可以根据需要替换此提示模板。接着,当其 `.run()` 方法被调用时,代理将负责调用 LLM 引擎,并在循环中执行工具调用,直到工具 `final_answer` 被调用,而其参数为最终答案。 137 | 138 | ```py 139 | agent_output = agent.run("For a transformers model training, which is slower, the forward or the backward pass?") 140 | 141 | print("Final output:") 142 | print(agent_output) 143 | ``` 144 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | 2 | # Contributor Covenant Code of Conduct 3 | 4 | ## Our Pledge 5 | 6 | We as members, contributors, and leaders pledge to make participation in our 7 | community a harassment-free experience for everyone, regardless of age, body 8 | size, visible or invisible disability, ethnicity, sex characteristics, gender 9 | identity and expression, level of experience, education, socio-economic status, 10 | nationality, personal appearance, race, caste, color, religion, or sexual 11 | identity and orientation. 12 | 13 | We pledge to act and interact in ways that contribute to an open, welcoming, 14 | diverse, inclusive, and healthy community. 15 | 16 | ## Our Standards 17 | 18 | Examples of behavior that contributes to a positive environment for our 19 | community include: 20 | 21 | * Demonstrating empathy and kindness toward other people 22 | * Being respectful of differing opinions, viewpoints, and experiences 23 | * Giving and gracefully accepting constructive feedback 24 | * Accepting responsibility and apologizing to those affected by our mistakes, 25 | and learning from the experience 26 | * Focusing on what is best not just for us as individuals, but for the overall 27 | community 28 | 29 | Examples of unacceptable behavior include: 30 | 31 | * The use of sexualized language or imagery, and sexual attention or advances of 32 | any kind 33 | * Trolling, insulting or derogatory comments, and personal or political attacks 34 | * Public or private harassment 35 | * Publishing others' private information, such as a physical or email address, 36 | without their explicit permission 37 | * Other conduct which could reasonably be considered inappropriate in a 38 | professional setting 39 | 40 | ## Enforcement Responsibilities 41 | 42 | Community leaders are responsible for clarifying and enforcing our standards of 43 | acceptable behavior and will take appropriate and fair corrective action in 44 | response to any behavior that they deem inappropriate, threatening, offensive, 45 | or harmful. 46 | 47 | Community leaders have the right and responsibility to remove, edit, or reject 48 | comments, commits, code, wiki edits, issues, and other contributions that are 49 | not aligned to this Code of Conduct, and will communicate reasons for moderation 50 | decisions when appropriate. 51 | 52 | ## Scope 53 | 54 | This Code of Conduct applies within all community spaces, and also applies when 55 | an individual is officially representing the community in public spaces. 56 | Examples of representing our community include using an official e-mail address, 57 | posting via an official social media account, or acting as an appointed 58 | representative at an online or offline event. 59 | 60 | ## Enforcement 61 | 62 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 63 | reported to the community leaders responsible for enforcement at 64 | feedback@huggingface.co. 65 | All complaints will be reviewed and investigated promptly and fairly. 66 | 67 | All community leaders are obligated to respect the privacy and security of the 68 | reporter of any incident. 69 | 70 | ## Enforcement Guidelines 71 | 72 | Community leaders will follow these Community Impact Guidelines in determining 73 | the consequences for any action they deem in violation of this Code of Conduct: 74 | 75 | ### 1. Correction 76 | 77 | **Community Impact**: Use of inappropriate language or other behavior deemed 78 | unprofessional or unwelcome in the community. 79 | 80 | **Consequence**: A private, written warning from community leaders, providing 81 | clarity around the nature of the violation and an explanation of why the 82 | behavior was inappropriate. A public apology may be requested. 83 | 84 | ### 2. Warning 85 | 86 | **Community Impact**: A violation through a single incident or series of 87 | actions. 88 | 89 | **Consequence**: A warning with consequences for continued behavior. No 90 | interaction with the people involved, including unsolicited interaction with 91 | those enforcing the Code of Conduct, for a specified period of time. This 92 | includes avoiding interactions in community spaces as well as external channels 93 | like social media. Violating these terms may lead to a temporary or permanent 94 | ban. 95 | 96 | ### 3. Temporary Ban 97 | 98 | **Community Impact**: A serious violation of community standards, including 99 | sustained inappropriate behavior. 100 | 101 | **Consequence**: A temporary ban from any sort of interaction or public 102 | communication with the community for a specified period of time. No public or 103 | private interaction with the people involved, including unsolicited interaction 104 | with those enforcing the Code of Conduct, is allowed during this period. 105 | Violating these terms may lead to a permanent ban. 106 | 107 | ### 4. Permanent Ban 108 | 109 | **Community Impact**: Demonstrating a pattern of violation of community 110 | standards, including sustained inappropriate behavior, harassment of an 111 | individual, or aggression toward or disparagement of classes of individuals. 112 | 113 | **Consequence**: A permanent ban from any sort of public interaction within the 114 | community. 115 | 116 | ## Attribution 117 | 118 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 119 | version 2.1, available at 120 | [https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1]. 121 | 122 | Community Impact Guidelines were inspired by 123 | [Mozilla's code of conduct enforcement ladder][Mozilla CoC]. 124 | 125 | For answers to common questions about this code of conduct, see the FAQ at 126 | [https://www.contributor-covenant.org/faq][FAQ]. Translations are available at 127 | [https://www.contributor-covenant.org/translations][translations]. 128 | 129 | [homepage]: https://www.contributor-covenant.org 130 | [v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html 131 | [Mozilla CoC]: https://github.com/mozilla/diversity 132 | [FAQ]: https://www.contributor-covenant.org/faq 133 | [translations]: https://www.contributor-covenant.org/translations -------------------------------------------------------------------------------- /docs/source/en/tutorials/memory.mdx: -------------------------------------------------------------------------------- 1 | 16 | # 📚 Manage your agent's memory 17 | 18 | [[open-in-colab]] 19 | 20 | In the end, an agent can be defined by simple components: it has tools, prompts. 21 | And most importantly, it has a memory of past steps, drawing a history of planning, execution, and errors. 22 | 23 | ### Replay your agent's memory 24 | 25 | We propose several features to inspect a past agent run. 26 | 27 | You can instrument the agent's run to display it in a great UI that lets you zoom in/out on specific steps, as highlighted in the [instrumentation guide](./inspect_runs). 28 | 29 | You can also use `agent.replay()`, as follows: 30 | 31 | After the agent has run: 32 | ```py 33 | from smolagents import HfApiModel, CodeAgent 34 | 35 | agent = CodeAgent(tools=[], model=HfApiModel(), verbosity_level=0) 36 | 37 | result = agent.run("What's the 20th Fibonacci number?") 38 | ``` 39 | 40 | If you want to replay this last run, just use: 41 | ```py 42 | agent.replay() 43 | ``` 44 | 45 | ### Dynamically change the agent's memory 46 | 47 | Many advanced use cases require dynamic modification of the agent's memory. 48 | 49 | You can access the agent's memory using: 50 | 51 | ```py 52 | from smolagents import ActionStep 53 | 54 | system_prompt_step = agent.memory.system_prompt 55 | print("The system prompt given to the agent was:") 56 | print(system_prompt_step.system_prompt) 57 | 58 | task_step = agent.memory.steps[0] 59 | print("\n\nThe first task step was:") 60 | print(task_step.task) 61 | 62 | for step in agent.memory.steps: 63 | if isinstance(step, ActionStep): 64 | if step.error is not None: 65 | print(f"\nStep {step.step_number} got this error:\n{step.error}\n") 66 | else: 67 | print(f"\nStep {step.step_number} got these observations:\n{step.observations}\n") 68 | ``` 69 | 70 | Use `agent.memory.get_full_steps()` to get full steps as dictionaries. 71 | 72 | You can also use step callbacks to dynamically change the agent's memory. 73 | 74 | Step callbacks can access the `agent` itself in their arguments, so they can access any memory step as highlighted above, and change it if needed. For instance, let's say you are observing screenshots of each step performed by a web browser agent. You want to log the newest screenshot, and remove the images from ancient steps to save on token costs. 75 | 76 | You culd run something like the following. 77 | _Note: this code is incomplete, some imports and object definitions have been removed for the sake of concision, visit [the original script](https://github.com/huggingface/smolagents/blob/main/src/smolagents/vision_web_browser.py) to get the full working code._ 78 | 79 | ```py 80 | import helium 81 | from PIL import Image 82 | from io import BytesIO 83 | from time import sleep 84 | 85 | def update_screenshot(memory_step: ActionStep, agent: CodeAgent) -> None: 86 | sleep(1.0) # Let JavaScript animations happen before taking the screenshot 87 | driver = helium.get_driver() 88 | latest_step = memory_step.step_number 89 | for previous_memory_step in agent.memory.steps: # Remove previous screenshots from logs for lean processing 90 | if isinstance(previous_memory_step, ActionStep) and previous_memory_step.step_number <= latest_step - 2: 91 | previous_memory_step.observations_images = None 92 | png_bytes = driver.get_screenshot_as_png() 93 | image = Image.open(BytesIO(png_bytes)) 94 | memory_step.observations_images = [image.copy()] 95 | ``` 96 | 97 | Then you should pass this function in the `step_callbacks` argument upon initialization of your agent: 98 | 99 | ```py 100 | CodeAgent( 101 | tools=[DuckDuckGoSearchTool(), go_back, close_popups, search_item_ctrl_f], 102 | model=model, 103 | additional_authorized_imports=["helium"], 104 | step_callbacks=[update_screenshot], 105 | max_steps=20, 106 | verbosity_level=2, 107 | ) 108 | ``` 109 | 110 | Head to our [vision web browser code](https://github.com/huggingface/smolagents/blob/main/src/smolagents/vision_web_browser.py) to see the full working example. 111 | 112 | ### Run agents one step at a time 113 | 114 | This can be useful in case you have tool calls that take days: you can just run your agents step by step. 115 | This will also let you update the memory on each step. 116 | 117 | ```py 118 | from smolagents import HfApiModel, CodeAgent, ActionStep, TaskStep 119 | 120 | agent = CodeAgent(tools=[], model=HfApiModel(), verbosity_level=1) 121 | print(agent.memory.system_prompt) 122 | 123 | task = "What is the 20th Fibonacci number?" 124 | 125 | # You could modify the memory as needed here by inputting the memory of another agent. 126 | # agent.memory.steps = previous_agent.memory.steps 127 | 128 | # Let's start a new task! 129 | agent.memory.steps.append(TaskStep(task=task, task_images=[])) 130 | 131 | final_answer = None 132 | step_number = 1 133 | while final_answer is None and step_number <= 10: 134 | memory_step = ActionStep( 135 | step_number=step_number, 136 | observations_images=[], 137 | ) 138 | # Run one step. 139 | final_answer = agent.step(memory_step) 140 | agent.memory.steps.append(memory_step) 141 | step_number += 1 142 | 143 | # Change the memory as you please! 144 | # For instance to update the latest step: 145 | # agent.memory.steps[-1] = ... 146 | 147 | print("The final answer is:", final_answer) 148 | ``` -------------------------------------------------------------------------------- /docs/source/zh/examples/text_to_sql.mdx: -------------------------------------------------------------------------------- 1 | 16 | # Text-to-SQL 17 | 18 | [[open-in-colab]] 19 | 20 | 在此教程中,我们将看到如何使用 `smolagents` 实现一个利用 SQL 的 agent。 21 | 22 | > 让我们从经典问题开始:为什么不简单地使用标准的 text-to-SQL pipeline 呢? 23 | 24 | 标准的 text-to-SQL pipeline 很脆弱,因为生成的 SQL 查询可能会出错。更糟糕的是,查询可能出错却不引发错误警报,从而返回一些不正确或无用的结果。 25 | 26 | 👉 相反,agent 系统则可以检视输出结果并决定查询是否需要被更改,因此带来巨大的性能提升。 27 | 28 | 让我们来一起构建这个 agent! 💪 29 | 30 | 首先,我们构建一个 SQL 的环境: 31 | ```py 32 | from sqlalchemy import ( 33 | create_engine, 34 | MetaData, 35 | Table, 36 | Column, 37 | String, 38 | Integer, 39 | Float, 40 | insert, 41 | inspect, 42 | text, 43 | ) 44 | 45 | engine = create_engine("sqlite:///:memory:") 46 | metadata_obj = MetaData() 47 | 48 | # create city SQL table 49 | table_name = "receipts" 50 | receipts = Table( 51 | table_name, 52 | metadata_obj, 53 | Column("receipt_id", Integer, primary_key=True), 54 | Column("customer_name", String(16), primary_key=True), 55 | Column("price", Float), 56 | Column("tip", Float), 57 | ) 58 | metadata_obj.create_all(engine) 59 | 60 | rows = [ 61 | {"receipt_id": 1, "customer_name": "Alan Payne", "price": 12.06, "tip": 1.20}, 62 | {"receipt_id": 2, "customer_name": "Alex Mason", "price": 23.86, "tip": 0.24}, 63 | {"receipt_id": 3, "customer_name": "Woodrow Wilson", "price": 53.43, "tip": 5.43}, 64 | {"receipt_id": 4, "customer_name": "Margaret James", "price": 21.11, "tip": 1.00}, 65 | ] 66 | for row in rows: 67 | stmt = insert(receipts).values(**row) 68 | with engine.begin() as connection: 69 | cursor = connection.execute(stmt) 70 | ``` 71 | 72 | ### 构建 agent 73 | 74 | 现在,我们构建一个 agent,它将使用 SQL 查询来回答问题。工具的 description 属性将被 agent 系统嵌入到 LLM 的提示中:它为 LLM 提供有关如何使用该工具的信息。这正是我们描述 SQL 表的地方。 75 | 76 | ```py 77 | inspector = inspect(engine) 78 | columns_info = [(col["name"], col["type"]) for col in inspector.get_columns("receipts")] 79 | 80 | table_description = "Columns:\n" + "\n".join([f" - {name}: {col_type}" for name, col_type in columns_info]) 81 | print(table_description) 82 | ``` 83 | 84 | ```text 85 | Columns: 86 | - receipt_id: INTEGER 87 | - customer_name: VARCHAR(16) 88 | - price: FLOAT 89 | - tip: FLOAT 90 | ``` 91 | 92 | 现在让我们构建我们的工具。它需要以下内容:(更多细节请参阅[工具文档](../tutorials/tools)) 93 | 94 | - 一个带有 `Args:` 部分列出参数的 docstring。 95 | - 输入和输出的type hints。 96 | 97 | ```py 98 | from smolagents import tool 99 | 100 | @tool 101 | def sql_engine(query: str) -> str: 102 | """ 103 | Allows you to perform SQL queries on the table. Returns a string representation of the result. 104 | The table is named 'receipts'. Its description is as follows: 105 | Columns: 106 | - receipt_id: INTEGER 107 | - customer_name: VARCHAR(16) 108 | - price: FLOAT 109 | - tip: FLOAT 110 | 111 | Args: 112 | query: The query to perform. This should be correct SQL. 113 | """ 114 | output = "" 115 | with engine.connect() as con: 116 | rows = con.execute(text(query)) 117 | for row in rows: 118 | output += "\n" + str(row) 119 | return output 120 | ``` 121 | 122 | 我们现在使用这个工具来创建一个 agent。我们使用 `CodeAgent`,这是 smolagent 的主要 agent 类:一个在代码中编写操作并根据 ReAct 框架迭代先前输出的 agent。 123 | 124 | 这个模型是驱动 agent 系统的 LLM。`HfApiModel` 允许你使用 HF Inference API 调用 LLM,无论是通过 Serverless 还是 Dedicated endpoint,但你也可以使用任何专有 API。 125 | 126 | ```py 127 | from smolagents import CodeAgent, HfApiModel 128 | 129 | agent = CodeAgent( 130 | tools=[sql_engine], 131 | model=HfApiModel(model_id="meta-llama/Meta-Llama-3.1-8B-Instruct"), 132 | ) 133 | agent.run("Can you give me the name of the client who got the most expensive receipt?") 134 | ``` 135 | 136 | ### Level 2: 表连接 137 | 138 | 现在让我们增加一些挑战!我们希望我们的 agent 能够处理跨多个表的连接。因此,我们创建一个新表,记录每个 receipt_id 的服务员名字! 139 | 140 | ```py 141 | table_name = "waiters" 142 | receipts = Table( 143 | table_name, 144 | metadata_obj, 145 | Column("receipt_id", Integer, primary_key=True), 146 | Column("waiter_name", String(16), primary_key=True), 147 | ) 148 | metadata_obj.create_all(engine) 149 | 150 | rows = [ 151 | {"receipt_id": 1, "waiter_name": "Corey Johnson"}, 152 | {"receipt_id": 2, "waiter_name": "Michael Watts"}, 153 | {"receipt_id": 3, "waiter_name": "Michael Watts"}, 154 | {"receipt_id": 4, "waiter_name": "Margaret James"}, 155 | ] 156 | for row in rows: 157 | stmt = insert(receipts).values(**row) 158 | with engine.begin() as connection: 159 | cursor = connection.execute(stmt) 160 | ``` 161 | 162 | 因为我们改变了表,我们需要更新 `SQLExecutorTool`,让 LLM 能够正确利用这个表的信息。 163 | 164 | ```py 165 | updated_description = """Allows you to perform SQL queries on the table. Beware that this tool's output is a string representation of the execution output. 166 | It can use the following tables:""" 167 | 168 | inspector = inspect(engine) 169 | for table in ["receipts", "waiters"]: 170 | columns_info = [(col["name"], col["type"]) for col in inspector.get_columns(table)] 171 | 172 | table_description = f"Table '{table}':\n" 173 | 174 | table_description += "Columns:\n" + "\n".join([f" - {name}: {col_type}" for name, col_type in columns_info]) 175 | updated_description += "\n\n" + table_description 176 | 177 | print(updated_description) 178 | ``` 179 | 180 | 因为这个request 比之前的要难一些,我们将 LLM 引擎切换到更强大的 [Qwen/Qwen2.5-Coder-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct)! 181 | 182 | ```py 183 | sql_engine.description = updated_description 184 | 185 | agent = CodeAgent( 186 | tools=[sql_engine], 187 | model=HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"), 188 | ) 189 | 190 | agent.run("Which waiter got more total money from tips?") 191 | ``` 192 | 193 | 它直接就能工作!设置过程非常简单,难道不是吗? 194 | 195 | 这个例子到此结束!我们涵盖了这些概念: 196 | 197 | - 构建新工具。 198 | - 更新工具的描述。 199 | - 切换到更强大的 LLM 有助于 agent 推理。 200 | 201 | ✅ 现在你可以构建你一直梦寐以求的 text-to-SQL 系统了!✨ 202 | -------------------------------------------------------------------------------- /docs/source/hi/reference/agents.mdx: -------------------------------------------------------------------------------- 1 | 16 | # Agents 17 | 18 | 19 | 20 | Smolagents एक experimental API है जो किसी भी समय बदल सकता है। एजेंट्स द्वारा लौटाए गए परिणाम भिन्न हो सकते हैं क्योंकि APIs या underlying मॉडल बदलने की संभावना रखते हैं। 21 | 22 | 23 | 24 | Agents और tools के बारे में अधिक जानने के लिए [introductory guide](../index) पढ़ना सुनिश्चित करें। 25 | यह पेज underlying क्लासेज के लिए API docs को शामिल करता है। 26 | 27 | ## Agents 28 | 29 | हमारे एजेंट्स [`MultiStepAgent`] से इनहेरिट करते हैं, जिसका अर्थ है कि वे कई चरणों में कार्य कर सकते हैं, प्रत्येक चरण में एक विचार, फिर एक टूल कॉल और एक्जीक्यूशन शामिल होता है। [इस कॉन्सेप्चुअल गाइड](../conceptual_guides/react) में अधिक पढ़ें। 30 | 31 | हम मुख्य [`Agent`] क्लास पर आधारित दो प्रकार के एजेंट्स प्रदान करते हैं। 32 | - [`CodeAgent`] डिफ़ॉल्ट एजेंट है, यह अपने टूल कॉल्स को Python कोड में लिखता है। 33 | - [`ToolCallingAgent`] अपने टूल कॉल्स को JSON में लिखता है। 34 | 35 | दोनों को इनिशियलाइजेशन पर `model` और टूल्स की सूची `tools` आर्गुमेंट्स की आवश्यकता होती है। 36 | 37 | ### Agents की क्लासेज 38 | 39 | [[autodoc]] MultiStepAgent 40 | 41 | [[autodoc]] CodeAgent 42 | 43 | [[autodoc]] ToolCallingAgent 44 | 45 | ### ManagedAgent 46 | 47 | _This class is deprecated since 1.8.0: now you just need to pass name and description attributes to an agent to directly use it as previously done with a ManagedAgent._ 48 | 49 | ### stream_to_gradio 50 | 51 | [[autodoc]] stream_to_gradio 52 | 53 | ### GradioUI 54 | 55 | [[autodoc]] GradioUI 56 | 57 | ## मॉडल्स 58 | 59 | आप स्वतंत्र रूप से अपने स्वयं के मॉडल बना सकते हैं और उनका उपयोग कर सकते हैं। 60 | 61 | आप अपने एजेंट के लिए कोई भी `model` कॉल करने योग्य उपयोग कर सकते हैं, जब तक कि: 62 | 1. यह अपने इनपुट `messages` के लिए [messages format](./chat_templating) (`List[Dict[str, str]]`) का पालन करता है, और यह एक `str` लौटाता है। 63 | 2. यह आर्गुमेंट `stop_sequences` में पास किए गए सीक्वेंस से *पहले* आउटपुट जनरेट करना बंद कर देता है। 64 | 65 | अपने LLM को परिभाषित करने के लिए, आप एक `custom_model` मेथड बना सकते हैं जो [messages](./chat_templating) की एक सूची स्वीकार करता है और टेक्स्ट युक्त .content विशेषता वाला एक ऑब्जेक्ट लौटाता है। इस कॉलेबल को एक `stop_sequences` आर्गुमेंट भी स्वीकार करने की आवश्यकता होती है जो बताता है कि कब जनरेट करना और बंद करना है। 66 | 67 | ```python 68 | from huggingface_hub import login, InferenceClient 69 | 70 | login("") 71 | 72 | model_id = "meta-llama/Llama-3.3-70B-Instruct" 73 | 74 | client = InferenceClient(model=model_id) 75 | 76 | def custom_model(messages, stop_sequences=["Task"]): 77 | response = client.chat_completion(messages, stop=stop_sequences, max_tokens=1000) 78 | answer = response.choices[0].message 79 | return answer 80 | ``` 81 | 82 | इसके अतिरिक्त, `custom_model` एक `grammar` आर्गुमेंट भी ले सकता है। जिस स्थिति में आप एजेंट इनिशियलाइजेशन पर एक `grammar` निर्दिष्ट करते हैं, यह आर्गुमेंट मॉडल के कॉल्स को आपके द्वारा इनिशियलाइजेशन पर परिभाषित `grammar` के साथ पास किया जाएगा, ताकि [constrained generation](https://huggingface.co/docs/text-generation-inference/conceptual/guidance) की अनुमति मिल सके जिससे उचित-फॉर्मेटेड एजेंट आउटपुट को फोर्स किया जा सके। 83 | 84 | ### TransformersModel 85 | 86 | सुविधा के लिए, हमने एक `TransformersModel` जोड़ा है जो इनिशियलाइजेशन पर दिए गए model_id के लिए एक लोकल `transformers` पाइपलाइन बनाकर ऊपर के बिंदुओं को लागू करता है। 87 | 88 | ```python 89 | from smolagents import TransformersModel 90 | 91 | model = TransformersModel(model_id="HuggingFaceTB/SmolLM-135M-Instruct") 92 | 93 | print(model([{"role": "user", "content": "Ok!"}], stop_sequences=["great"])) 94 | ``` 95 | ```text 96 | >>> What a 97 | ``` 98 | 99 | [[autodoc]] TransformersModel 100 | 101 | ### HfApiModel 102 | 103 | `HfApiModel` LLM के एक्जीक्यूशन के लिए [HF Inference API](https://huggingface.co/docs/api-inference/index) क्लाइंट को रैप करता है। 104 | 105 | ```python 106 | from smolagents import HfApiModel 107 | 108 | messages = [ 109 | {"role": "user", "content": "Hello, how are you?"}, 110 | {"role": "assistant", "content": "I'm doing great. How can I help you today?"}, 111 | {"role": "user", "content": "No need to help, take it easy."}, 112 | ] 113 | 114 | model = HfApiModel() 115 | print(model(messages)) 116 | ``` 117 | ```text 118 | >>> Of course! If you change your mind, feel free to reach out. Take care! 119 | ``` 120 | [[autodoc]] HfApiModel 121 | 122 | ### LiteLLMModel 123 | 124 | `LiteLLMModel` विभिन्न प्रदाताओं से 100+ LLMs को सपोर्ट करने के लिए [LiteLLM](https://www.litellm.ai/) का लाभ उठाता है। 125 | आप मॉडल इनिशियलाइजेशन पर kwargs पास कर सकते हैं जो तब मॉडल का उपयोग करते समय प्रयोग किए जाएंगे, उदाहरण के लिए नीचे हम `temperature` पास करते हैं। 126 | 127 | ```python 128 | from smolagents import LiteLLMModel 129 | 130 | messages = [ 131 | {"role": "user", "content": "Hello, how are you?"}, 132 | {"role": "assistant", "content": "I'm doing great. How can I help you today?"}, 133 | {"role": "user", "content": "No need to help, take it easy."}, 134 | ] 135 | 136 | model = LiteLLMModel(model_id="anthropic/claude-3-5-sonnet-latest", temperature=0.2, max_tokens=10) 137 | print(model(messages)) 138 | ``` 139 | 140 | [[autodoc]] LiteLLMModel 141 | 142 | ### OpenAiServerModel 143 | 144 | 145 | यह क्लास आपको किसी भी OpenAIServer कम्पैटिबल मॉडल को कॉल करने देती है। 146 | यहाँ बताया गया है कि आप इसे कैसे सेट कर सकते हैं (आप दूसरे सर्वर को पॉइंट करने के लिए `api_base` url को कस्टमाइज़ कर सकते हैं): 147 | ```py 148 | import os 149 | from smolagents import OpenAIServerModel 150 | 151 | model = OpenAIServerModel( 152 | model_id="gpt-4o", 153 | api_base="https://api.openai.com/v1", 154 | api_key=os.environ["OPENAI_API_KEY"], 155 | ) 156 | ``` 157 | 158 | ## Prompts 159 | 160 | [[autodoc]] smolagents.agents.PromptTemplates 161 | 162 | [[autodoc]] smolagents.agents.PlanningPromptTemplate 163 | 164 | [[autodoc]] smolagents.agents.ManagedAgentPromptTemplate 165 | 166 | [[autodoc]] smolagents.agents.FinalAnswerPromptTemplate 167 | -------------------------------------------------------------------------------- /tests/test_memory.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from smolagents.agents import ToolCall 4 | from smolagents.memory import ( 5 | ActionStep, 6 | AgentMemory, 7 | ChatMessage, 8 | MemoryStep, 9 | Message, 10 | MessageRole, 11 | PlanningStep, 12 | SystemPromptStep, 13 | TaskStep, 14 | ) 15 | 16 | 17 | class TestAgentMemory: 18 | def test_initialization(self): 19 | system_prompt = "This is a system prompt." 20 | memory = AgentMemory(system_prompt=system_prompt) 21 | assert memory.system_prompt.system_prompt == system_prompt 22 | assert memory.steps == [] 23 | 24 | 25 | class TestMemoryStep: 26 | def test_initialization(self): 27 | step = MemoryStep() 28 | assert isinstance(step, MemoryStep) 29 | 30 | def test_dict(self): 31 | step = MemoryStep() 32 | assert step.dict() == {} 33 | 34 | def test_to_messages(self): 35 | step = MemoryStep() 36 | with pytest.raises(NotImplementedError): 37 | step.to_messages() 38 | 39 | 40 | def test_action_step_to_messages(): 41 | action_step = ActionStep( 42 | model_input_messages=[Message(role=MessageRole.USER, content="Hello")], 43 | tool_calls=[ 44 | ToolCall(id="id", name="get_weather", arguments={"location": "Paris"}), 45 | ], 46 | start_time=0.0, 47 | end_time=1.0, 48 | step_number=1, 49 | error=None, 50 | duration=1.0, 51 | model_output_message=ChatMessage(role=MessageRole.ASSISTANT, content="Hi"), 52 | model_output="Hi", 53 | observations="This is a nice observation", 54 | observations_images=["image1.png"], 55 | action_output="Output", 56 | ) 57 | messages = action_step.to_messages() 58 | assert len(messages) == 4 59 | for message in messages: 60 | assert isinstance(message, dict) 61 | assert "role" in message 62 | assert "content" in message 63 | assert isinstance(message["role"], MessageRole) 64 | assert isinstance(message["content"], list) 65 | assistant_message = messages[0] 66 | assert assistant_message["role"] == MessageRole.ASSISTANT 67 | assert len(assistant_message["content"]) == 1 68 | for content in assistant_message["content"]: 69 | assert isinstance(content, dict) 70 | assert "type" in content 71 | assert "text" in content 72 | message = messages[1] 73 | assert message["role"] == MessageRole.TOOL_CALL 74 | 75 | assert len(message["content"]) == 1 76 | text_content = message["content"][0] 77 | assert isinstance(text_content, dict) 78 | assert "type" in text_content 79 | assert "text" in text_content 80 | 81 | image_message = messages[2] 82 | image_content = image_message["content"][0] 83 | assert isinstance(image_content, dict) 84 | assert "type" in image_content 85 | assert "image" in image_content 86 | 87 | observation_message = messages[3] 88 | assert observation_message["role"] == MessageRole.TOOL_RESPONSE 89 | assert "Observation:\nThis is a nice observation" in observation_message["content"][0]["text"] 90 | 91 | 92 | def test_action_step_to_messages_no_tool_calls_with_observations(): 93 | action_step = ActionStep( 94 | model_input_messages=None, 95 | tool_calls=None, 96 | start_time=None, 97 | end_time=None, 98 | step_number=None, 99 | error=None, 100 | duration=None, 101 | model_output_message=None, 102 | model_output=None, 103 | observations="This is an observation.", 104 | observations_images=None, 105 | action_output=None, 106 | ) 107 | messages = action_step.to_messages() 108 | assert len(messages) == 1 109 | observation_message = messages[0] 110 | assert observation_message["role"] == MessageRole.TOOL_RESPONSE 111 | assert "Observation:\nThis is an observation." in observation_message["content"][0]["text"] 112 | 113 | 114 | def test_planning_step_to_messages(): 115 | planning_step = PlanningStep( 116 | model_input_messages=[Message(role=MessageRole.USER, content="Hello")], 117 | model_output_message=ChatMessage(role=MessageRole.ASSISTANT, content="Plan"), 118 | plan="This is a plan.", 119 | ) 120 | messages = planning_step.to_messages(summary_mode=False) 121 | assert len(messages) == 2 122 | for message in messages: 123 | assert isinstance(message, dict) 124 | assert "role" in message 125 | assert "content" in message 126 | assert isinstance(message["content"], list) 127 | assert len(message["content"]) == 1 128 | for content in message["content"]: 129 | assert isinstance(content, dict) 130 | assert "type" in content 131 | assert "text" in content 132 | assert messages[0]["role"] == MessageRole.ASSISTANT 133 | assert messages[1]["role"] == MessageRole.USER 134 | 135 | 136 | def test_task_step_to_messages(): 137 | task_step = TaskStep(task="This is a task.", task_images=["task_image1.png"]) 138 | messages = task_step.to_messages(summary_mode=False) 139 | assert len(messages) == 1 140 | for message in messages: 141 | assert isinstance(message, dict) 142 | assert "role" in message 143 | assert "content" in message 144 | assert isinstance(message["role"], MessageRole) 145 | assert message["role"] == MessageRole.USER 146 | assert isinstance(message["content"], list) 147 | assert len(message["content"]) == 2 148 | text_content = message["content"][0] 149 | assert isinstance(text_content, dict) 150 | assert "type" in text_content 151 | assert "text" in text_content 152 | for image_content in message["content"][1:]: 153 | assert isinstance(image_content, dict) 154 | assert "type" in image_content 155 | assert "image" in image_content 156 | 157 | 158 | def test_system_prompt_step_to_messages(): 159 | system_prompt_step = SystemPromptStep(system_prompt="This is a system prompt.") 160 | messages = system_prompt_step.to_messages(summary_mode=False) 161 | assert len(messages) == 1 162 | for message in messages: 163 | assert isinstance(message, dict) 164 | assert "role" in message 165 | assert "content" in message 166 | assert isinstance(message["role"], MessageRole) 167 | assert message["role"] == MessageRole.SYSTEM 168 | assert isinstance(message["content"], list) 169 | assert len(message["content"]) == 1 170 | for content in message["content"]: 171 | assert isinstance(content, dict) 172 | assert "type" in content 173 | assert "text" in content 174 | --------------------------------------------------------------------------------