├── tests
    ├── __init__.py
    ├── data
    │   └── 000000039769.png
    ├── conftest.py
    ├── utils
    │   └── markers.py
    ├── test_search.py
    ├── test_import.py
    ├── test_final_answer.py
    ├── fixtures
    │   ├── tools.py
    │   └── agents.py
    ├── test_types.py
    ├── test_remote_executors.py
    ├── test_tool_validation.py
    ├── test_default_tools.py
    ├── test_cli.py
    ├── test_gradio_ui.py
    └── test_memory.py
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── custom.md
    │   ├── bug_report.md
    │   └── feature_request.md
    └── workflows
    │   ├── trufflehog.yml
    │   ├── upload_pr_documentation.yml
    │   ├── build_pr_documentation.yml
    │   ├── build_documentation.yml
    │   ├── quality.yml
    │   └── tests.yml
├── examples
    ├── open_deep_research
    │   ├── app.py
    │   ├── requirements.txt
    │   ├── README.md
    │   ├── scripts
    │   │   ├── run_agents.py
    │   │   ├── gaia_scorer.py
    │   │   ├── reformulator.py
    │   │   └── text_inspector_tool.py
    │   └── run.py
    ├── sandboxed_execution.py
    ├── gradio_ui.py
    ├── inspect_multiagent_run.py
    ├── rag.py
    ├── text_to_sql.py
    ├── agent_from_any_llm.py
    └── rag_using_chromadb.py
├── .pre-commit-config.yaml
├── Makefile
├── docs
    └── source
    │   ├── en
    │       ├── _config.py
    │       ├── _toctree.yml
    │       ├── reference
    │       │   ├── agents.mdx
    │       │   └── tools.mdx
    │       ├── conceptual_guides
    │       │   └── react.mdx
    │       ├── index.mdx
    │       └── tutorials
    │       │   └── memory.mdx
    │   ├── hi
    │       ├── _config.py
    │       ├── _toctree.yml
    │       ├── conceptual_guides
    │       │   └── react.mdx
    │       ├── reference
    │       │   ├── tools.mdx
    │       │   └── agents.mdx
    │       ├── index.mdx
    │       └── tutorials
    │       │   ├── inspect_runs.mdx
    │       │   └── secure_code_execution.mdx
    │   └── zh
    │       ├── _config.py
    │       ├── _toctree.yml
    │       ├── conceptual_guides
    │           ├── react.mdx
    │           └── intro_agents.mdx
    │       ├── reference
    │           ├── agents.mdx
    │           ├── tools.mdx
    │           └── models.mdx
    │       ├── index.mdx
    │       ├── tutorials
    │           ├── secure_code_execution.mdx
    │           └── memory.mdx
    │       └── examples
    │           ├── rag.mdx
    │           └── text_to_sql.mdx
├── e2b.toml
├── src
    └── smolagents
    │   ├── __init__.py
    │   └── cli.py
├── utils
    └── check_tests_in_ci.py
├── .gitignore
├── pyproject.toml
├── CONTRIBUTING.md
└── CODE_OF_CONDUCT.md


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/data/000000039769.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/merveenoyan/smolagents/main/tests/data/000000039769.png


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/custom.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Custom issue template
 3 | about: Describe this issue template's purpose here.
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | 
11 | 


--------------------------------------------------------------------------------
/examples/open_deep_research/app.py:
--------------------------------------------------------------------------------
 1 | from run import create_agent
 2 | 
 3 | from smolagents.gradio_ui import GradioUI
 4 | 
 5 | 
 6 | agent = create_agent()
 7 | 
 8 | demo = GradioUI(agent)
 9 | 
10 | if __name__ == "__main__":
11 |     demo.launch()
12 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/astral-sh/ruff-pre-commit
 3 |     rev: v0.2.1
 4 |     hooks:
 5 |       - id: ruff
 6 |         args:
 7 |           - --fix
 8 |       - id: ruff-format
 9 |   - repo: https://github.com/pre-commit/pre-commit-hooks
10 |     rev: v4.5.0
11 |     hooks:
12 |       - id: check-merge-conflict
13 |       - id: check-yaml
14 | 


--------------------------------------------------------------------------------
/.github/workflows/trufflehog.yml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   push:
 3 | 
 4 | name: Secret Leaks
 5 | 
 6 | permissions:
 7 |   contents: read
 8 | 
 9 | jobs:
10 |   trufflehog:
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |       - name: Checkout code
14 |         uses: actions/checkout@v4
15 |         with:
16 |           fetch-depth: 0
17 |       - name: Secret Scanning
18 |         uses: trufflesecurity/trufflehog@main


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: quality style test docs utils
 2 | 
 3 | check_dirs := examples src tests utils
 4 | 
 5 | # Check code quality of the source code
 6 | quality:
 7 | 	ruff check $(check_dirs)
 8 | 	ruff format --check $(check_dirs)
 9 | 	python utils/check_tests_in_ci.py
10 | 
11 | # Format source code automatically
12 | style:
13 | 	ruff check $(check_dirs) --fix
14 | 	ruff format $(check_dirs)
15 | 	
16 | # Run smolagents tests
17 | test:
18 | 	pytest ./tests/


--------------------------------------------------------------------------------
/.github/workflows/upload_pr_documentation.yml:
--------------------------------------------------------------------------------
 1 | name: Upload PR Documentation
 2 | 
 3 | on:
 4 |   workflow_run:
 5 |     workflows: ["Build PR Documentation"]
 6 |     types:
 7 |       - completed
 8 | 
 9 | jobs:
10 |   build:
11 |     uses: huggingface/doc-builder/.github/workflows/upload_pr_documentation.yml@main
12 |     with:
13 |       package_name: smolagents
14 |     secrets:
15 |       hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }}
16 |       comment_bot_token: ${{ secrets.COMMENT_BOT_TOKEN }}


--------------------------------------------------------------------------------
/docs/source/en/_config.py:
--------------------------------------------------------------------------------
 1 | # docstyle-ignore
 2 | INSTALL_CONTENT = """
 3 | # Installation
 4 | ! pip install smolagents
 5 | # To install from source instead of the last release, comment the command above and uncomment the following one.
 6 | # ! pip install git+https://github.com/huggingface/smolagents.git
 7 | """
 8 | 
 9 | notebook_first_cells = [{"type": "code", "content": INSTALL_CONTENT}]
10 | black_avoid_patterns = {
11 |     "{processor_class}": "FakeProcessorClass",
12 |     "{model_class}": "FakeModelClass",
13 |     "{object_class}": "FakeObjectClass",
14 | }
15 | 


--------------------------------------------------------------------------------
/docs/source/hi/_config.py:
--------------------------------------------------------------------------------
 1 | # docstyle-ignore
 2 | INSTALL_CONTENT = """
 3 | # Installation
 4 | ! pip install smolagents
 5 | # To install from source instead of the last release, comment the command above and uncomment the following one.
 6 | # ! pip install git+https://github.com/huggingface/smolagents.git
 7 | """
 8 | 
 9 | notebook_first_cells = [{"type": "code", "content": INSTALL_CONTENT}]
10 | black_avoid_patterns = {
11 |     "{processor_class}": "FakeProcessorClass",
12 |     "{model_class}": "FakeModelClass",
13 |     "{object_class}": "FakeObjectClass",
14 | }
15 | 


--------------------------------------------------------------------------------
/docs/source/zh/_config.py:
--------------------------------------------------------------------------------
 1 | # docstyle-ignore
 2 | INSTALL_CONTENT = """
 3 | # Installation
 4 | ! pip install smolagents
 5 | # To install from source instead of the last release, comment the command above and uncomment the following one.
 6 | # ! pip install git+https://github.com/huggingface/smolagents.git
 7 | """
 8 | 
 9 | notebook_first_cells = [{"type": "code", "content": INSTALL_CONTENT}]
10 | black_avoid_patterns = {
11 |     "{processor_class}": "FakeProcessorClass",
12 |     "{model_class}": "FakeModelClass",
13 |     "{object_class}": "FakeObjectClass",
14 | }
15 | 


--------------------------------------------------------------------------------
/e2b.toml:
--------------------------------------------------------------------------------
 1 | # This is a config for E2B sandbox template.
 2 | # You can use template ID (qywp2ctmu2q7jzprcf4j) to create a sandbox:
 3 | 
 4 | # Python SDK
 5 | # from e2b import Sandbox, AsyncSandbox
 6 | # sandbox = Sandbox("qywp2ctmu2q7jzprcf4j") # Sync sandbox
 7 | # sandbox = await AsyncSandbox.create("qywp2ctmu2q7jzprcf4j") # Async sandbox
 8 | 
 9 | # JS SDK
10 | # import { Sandbox } from 'e2b'
11 | # const sandbox = await Sandbox.create('qywp2ctmu2q7jzprcf4j')
12 | 
13 | team_id = "f8776d3a-df2f-4a1d-af48-68c2e13b3b87"
14 | start_cmd = "/root/.jupyter/start-up.sh"
15 | dockerfile = "e2b.Dockerfile"
16 | template_id = "qywp2ctmu2q7jzprcf4j"
17 | 


--------------------------------------------------------------------------------
/examples/sandboxed_execution.py:
--------------------------------------------------------------------------------
 1 | from smolagents import CodeAgent, DuckDuckGoSearchTool, HfApiModel
 2 | 
 3 | 
 4 | model = HfApiModel()
 5 | 
 6 | agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=model, executor_type="docker")
 7 | output = agent.run("How many seconds would it take for a leopard at full speed to run through Pont des Arts?")
 8 | print("Docker executor result:", output)
 9 | 
10 | agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=model, executor_type="e2b")
11 | output = agent.run("How many seconds would it take for a leopard at full speed to run through Pont des Arts?")
12 | print("E2B executor result:", output)
13 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: The clearer your bug report, the faster it will be fixed!
 4 | title: "[BUG]"
 5 | labels: bug
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **Code to reproduce the error**
14 | The simplest code snippet that produces your bug.
15 | 
16 | **Error logs (if any)**
17 | Provide error logs if there are any.
18 | 
19 | **Expected behavior**
20 | A clear and concise description of what you expected to happen.
21 | 
22 | **Packages version:**
23 | Run `pip freeze | grep smolagents` and paste it here.
24 | 
25 | **Additional context**
26 | Add any other context about the problem here.
27 | 


--------------------------------------------------------------------------------
/.github/workflows/build_pr_documentation.yml:
--------------------------------------------------------------------------------
 1 | name: Build PR Documentation
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     paths:
 6 |       - 'docs/source/**'
 7 |       - 'assets/**'
 8 |       - '.github/workflows/doc-pr-build.yml'
 9 | 
10 | concurrency:
11 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
12 |   cancel-in-progress: true
13 | 
14 | jobs:
15 |   build:
16 |     uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@main
17 |     with:
18 |       commit_sha: ${{ github.event.pull_request.head.sha }}
19 |       pr_number: ${{ github.event.number }}
20 |       package: smolagents
21 |       languages: en
22 |       # additional_args: --not_python_module # use this arg if repository is documentation only


--------------------------------------------------------------------------------
/.github/workflows/build_documentation.yml:
--------------------------------------------------------------------------------
 1 | name: Build documentation
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |       - doc-builder*
 8 |       - v*-release
 9 |       - use_templates
10 |     paths:
11 |       - 'docs/source/**'
12 |       - 'assets/**'
13 |       - '.github/workflows/doc-build.yml'
14 |       - 'pyproject.toml'
15 | 
16 | jobs:
17 |    build:
18 |     uses: huggingface/doc-builder/.github/workflows/build_main_documentation.yml@main
19 |     with:
20 |       commit_sha: ${{ github.sha }}
21 |       package: smolagents
22 |       languages: en
23 |       notebook_folder: smolagents_doc
24 |       # additional_args: --not_python_module # use this arg if repository is documentation only
25 |     secrets:
26 |       token: ${{ secrets.HUGGINGFACE_PUSH }}
27 |       hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }}


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | from unittest.mock import patch
 2 | 
 3 | import pytest
 4 | 
 5 | from smolagents.agents import MultiStepAgent
 6 | from smolagents.monitoring import LogLevel
 7 | 
 8 | 
 9 | # Import fixture modules as plugins
10 | pytest_plugins = ["tests.fixtures.agents", "tests.fixtures.tools"]
11 | 
12 | original_multi_step_agent_init = MultiStepAgent.__init__
13 | 
14 | 
15 | @pytest.fixture(autouse=True)
16 | def patch_multi_step_agent_with_suppressed_logging():
17 |     with patch.object(MultiStepAgent, "__init__", autospec=True) as mock_init:
18 | 
19 |         def init_with_suppressed_logging(self, *args, verbosity_level=LogLevel.OFF, **kwargs):
20 |             original_multi_step_agent_init(self, *args, verbosity_level=verbosity_level, **kwargs)
21 | 
22 |         mock_init.side_effect = init_with_suppressed_logging
23 |         yield
24 | 


--------------------------------------------------------------------------------
/examples/gradio_ui.py:
--------------------------------------------------------------------------------
 1 | from io import BytesIO
 2 | 
 3 | import requests
 4 | from PIL import Image
 5 | 
 6 | from smolagents import CodeAgent, GradioUI, HfApiModel
 7 | 
 8 | 
 9 | def add_agent_image(memory_step, agent):
10 |     url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolagents/smolagents.png"
11 |     response = requests.get(url)
12 |     memory_step.observations_images = [Image.open(BytesIO(response.content))]
13 | 
14 | 
15 | agent = CodeAgent(
16 |     tools=[],
17 |     model=HfApiModel(),
18 |     verbosity_level=1,
19 |     planning_interval=3,
20 |     name="example_agent",
21 |     description="This is an example agent that has not tool but will always see an agent at the end of its step.",
22 |     step_callbacks=[add_agent_image],
23 | )
24 | 
25 | GradioUI(agent, file_upload_folder="./data").launch()
26 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: enhancement
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Is this not possible with the current options.**
17 | Make sure to consider if what you're requesting can be done with current abstractions.
18 | 
19 | **Describe alternatives you've considered**
20 | A clear and concise description of any alternative solutions or features you've considered.
21 | 
22 | **Additional context**
23 | Add any other context or screenshots about the feature request here.
24 | 


--------------------------------------------------------------------------------
/.github/workflows/quality.yml:
--------------------------------------------------------------------------------
 1 | name: Quality Check
 2 | 
 3 | on: [pull_request]
 4 | 
 5 | env:
 6 |   UV_SYSTEM_PYTHON: 1
 7 | 
 8 | jobs:
 9 |   check_code_quality:
10 |     runs-on: ubuntu-latest
11 |     env:
12 |       UV_HTTP_TIMEOUT: 600 # max 10min to install deps
13 | 
14 |     steps:
15 |       - uses: actions/checkout@v2
16 |       - name: Set up Python
17 |         uses: actions/setup-python@v2
18 |         with:
19 |           python-version: "3.12"
20 | 
21 |       # Setup venv
22 |       - name: Setup uv
23 |         run: |
24 |           pip install --upgrade uv
25 | 
26 |       - name: Install dependencies
27 |         run: uv pip install "smolagents[quality] @ ."
28 | 
29 |       # Equivalent of "make quality" but step by step
30 |       - run: ruff check examples src tests utils # linter
31 |       - run: ruff format --check examples src tests utils # formatter
32 |       - run: python utils/check_tests_in_ci.py
33 | 


--------------------------------------------------------------------------------
/examples/inspect_multiagent_run.py:
--------------------------------------------------------------------------------
 1 | from openinference.instrumentation.smolagents import SmolagentsInstrumentor
 2 | from phoenix.otel import register
 3 | 
 4 | 
 5 | register()
 6 | SmolagentsInstrumentor().instrument(skip_dep_check=True)
 7 | 
 8 | 
 9 | from smolagents import (
10 |     CodeAgent,
11 |     DuckDuckGoSearchTool,
12 |     HfApiModel,
13 |     ToolCallingAgent,
14 |     VisitWebpageTool,
15 | )
16 | 
17 | 
18 | # Then we run the agentic part!
19 | model = HfApiModel()
20 | 
21 | search_agent = ToolCallingAgent(
22 |     tools=[DuckDuckGoSearchTool(), VisitWebpageTool()],
23 |     model=model,
24 |     name="search_agent",
25 |     description="This is an agent that can do web search.",
26 | )
27 | 
28 | manager_agent = CodeAgent(
29 |     tools=[],
30 |     model=model,
31 |     managed_agents=[search_agent],
32 | )
33 | manager_agent.run("If the US keeps it 2024 growth rate, how many years would it take for the GDP to double?")
34 | 


--------------------------------------------------------------------------------
/examples/open_deep_research/requirements.txt:
--------------------------------------------------------------------------------
 1 | anthropic>=0.37.1
 2 | audioop-lts<1.0; python_version >= "3.13" # required to use pydub in Python >=3.13; LTS port of the removed Python builtin module audioop
 3 | beautifulsoup4>=4.12.3
 4 | datasets>=2.21.0
 5 | google_search_results>=2.4.2
 6 | huggingface_hub>=0.23.4
 7 | mammoth>=1.8.0
 8 | markdownify>=0.13.1
 9 | numexpr>=2.10.1
10 | numpy>=2.1.2
11 | openai>=1.52.2
12 | openpyxl
13 | pandas>=2.2.3
14 | pathvalidate>=3.2.1
15 | pdfminer>=20191125
16 | pdfminer.six>=20240706
17 | Pillow>=11.0.0
18 | puremagic>=1.28
19 | pypdf>=5.1.0
20 | python-dotenv>=1.0.1
21 | python_pptx>=1.0.2
22 | Requests>=2.32.3
23 | serpapi>=0.1.5
24 | tqdm>=4.66.4
25 | torch>=2.2.2
26 | torchvision>=0.17.2
27 | transformers>=4.46.0
28 | youtube_transcript_api>=0.6.2
29 | chess
30 | sympy
31 | pubchempy
32 | Bio
33 | scikit-learn
34 | scipy
35 | pydub
36 | PyPDF2
37 | python-pptx
38 | torch
39 | xlrd
40 | SpeechRecognition


--------------------------------------------------------------------------------
/tests/utils/markers.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2024 HuggingFace Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Markers for tests ."""
16 | 
17 | import os
18 | from importlib.util import find_spec
19 | 
20 | import pytest
21 | 
22 | 
23 | require_run_all = pytest.mark.skipif(not os.getenv("RUN_ALL"), reason="requires RUN_ALL environment variable")
24 | require_torch = pytest.mark.skipif(find_spec("torch") is None, reason="requires torch")
25 | 


--------------------------------------------------------------------------------
/tests/test_search.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2024 HuggingFace Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | from smolagents import DuckDuckGoSearchTool
18 | 
19 | from .test_tools import ToolTesterMixin
20 | from .utils.markers import require_run_all
21 | 
22 | 
23 | class TestDuckDuckGoSearchTool(ToolTesterMixin):
24 |     def setup_method(self):
25 |         self.tool = DuckDuckGoSearchTool()
26 |         self.tool.setup()
27 | 
28 |     @require_run_all
29 |     def test_exact_match_arg(self):
30 |         result = self.tool("Agents")
31 |         assert isinstance(result, str)
32 | 


--------------------------------------------------------------------------------
/tests/test_import.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import subprocess
 3 | import tempfile
 4 | 
 5 | 
 6 | def test_import_smolagents_without_extras(monkeypatch):
 7 |     monkeypatch.delenv("VIRTUAL_ENV", raising=False)
 8 |     with tempfile.TemporaryDirectory() as temp_dir:
 9 |         # Create a virtual environment
10 |         venv_dir = os.path.join(temp_dir, "venv")
11 |         subprocess.run(["uv", "venv", venv_dir], check=True)
12 | 
13 |         # Install smolagents in the virtual environment
14 |         subprocess.run(
15 |             ["uv", "pip", "install", "--python", os.path.join(venv_dir, "bin", "python"), "smolagents @ ."], check=True
16 |         )
17 | 
18 |         # Run the import test in the virtual environment
19 |         result = subprocess.run(
20 |             [os.path.join(venv_dir, "bin", "python"), "-c", "import smolagents"],
21 |             capture_output=True,
22 |             text=True,
23 |         )
24 | 
25 |     # Check if the import was successful
26 |     assert result.returncode == 0, (
27 |         "Import failed with error: "
28 |         + (result.stderr.splitlines()[-1] if result.stderr else "No error message")
29 |         + "\n"
30 |         + result.stderr
31 |     )
32 | 


--------------------------------------------------------------------------------
/src/smolagents/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding=utf-8
 3 | 
 4 | # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | __version__ = "1.14.0.dev0"
18 | 
19 | from .agent_types import *  # noqa: I001
20 | from .agents import *  # Above noqa avoids a circular dependency due to cli.py
21 | from .default_tools import *
22 | from .gradio_ui import *
23 | from .local_python_executor import *
24 | from .memory import *
25 | from .models import *
26 | from .monitoring import *
27 | from .remote_executors import *
28 | from .tools import *
29 | from .utils import *
30 | from .cli import *
31 | 


--------------------------------------------------------------------------------
/docs/source/hi/_toctree.yml:
--------------------------------------------------------------------------------
 1 | - title: Get started
 2 |   sections:
 3 |   - local: index
 4 |     title: 🤗 Agents
 5 |   - local: guided_tour
 6 |     title: गाइडेड टूर
 7 | - title: Tutorials
 8 |   sections:
 9 |   - local: tutorials/building_good_agents
10 |     title: ✨ अच्छे Agents का निर्माण
11 |   - local: tutorials/inspect_runs
12 |     title: 📊 OpenTelemetry के साथ runs का निरीक्षण
13 |   - local: tutorials/tools
14 |     title: 🛠️ Tools - in-depth guide
15 |   - local: tutorials/secure_code_execution
16 |     title: 🛡️ E2B के साथ अपने कोड एक्जीक्यूशन को सुरक्षित करें
17 | - title: Conceptual guides
18 |   sections:
19 |   - local: conceptual_guides/intro_agents
20 |     title: 🤖 Agentic सिस्टम का परिचय
21 |   - local: conceptual_guides/react
22 |     title: 🤔 मल्टी-स्टेप एजेंट कैसे काम करते हैं?
23 | - title: Examples
24 |   sections:
25 |   - local: examples/text_to_sql
26 |     title: सेल्फ करेक्टिंग Text-to-SQL
27 |   - local: examples/rag
28 |     title: एजेंटिक RAG के साथ अपनी ज्ञान आधारित को मास्टर करें
29 |   - local: examples/multiagents
30 |     title: एक बहु-एजेंट प्रणाली का आयोजन करें
31 | - title: Reference
32 |   sections:
33 |   - local: reference/agents
34 |     title: एजेंट से संबंधित ऑब्जेक्ट्स
35 |   - local: reference/tools
36 |     title: टूल्स से संबंधित ऑब्जेक्ट्स
37 | 


--------------------------------------------------------------------------------
/docs/source/zh/_toctree.yml:
--------------------------------------------------------------------------------
 1 | - title: 起步
 2 |   sections:
 3 |   - local: index
 4 |     title: 🤗 Agents
 5 |   - local: guided_tour
 6 |     title: 导览
 7 | - title: Tutorials
 8 |   sections:
 9 |   - local: tutorials/building_good_agents
10 |     title: ✨ 构建好用的 agents
11 |   - local: tutorials/inspect_runs
12 |     title: 📊 监控 Agent 的运行
13 |   - local: tutorials/tools
14 |     title: 🛠️ 工具 - 深度指南
15 |   - local: tutorials/secure_code_execution
16 |     title: 🛡️ 使用 E2B 保护你的代码执行
17 |   - local: tutorials/memory
18 |     title: 📚 管理 Agent 的记忆
19 | - title: Conceptual guides
20 |   sections:
21 |   - local: conceptual_guides/intro_agents
22 |     title: 🤖 Agent 化系统介绍
23 |   - local: conceptual_guides/react
24 |     title: 🤔 多步骤 Agent 是如何工作的？
25 | - title: Examples
26 |   sections:
27 |   - local: examples/text_to_sql
28 |     title: 自我修正 Text-to-SQL
29 |   - local: examples/rag
30 |     title: 借助 agentic RAG 掌控知识库
31 |   - local: examples/multiagents
32 |     title: 编排 multi-agent 系统
33 |   - local: examples/web_browser
34 |     title: 基于视觉模型构建能够浏览网页的agent
35 | - title: Reference
36 |   sections:
37 |   - local: reference/agents
38 |     title: Agent-related objects
39 |   - local: reference/models
40 |     title: Model-related objects
41 |   - local: reference/tools
42 |     title: Tool-related objects
43 | 


--------------------------------------------------------------------------------
/docs/source/en/_toctree.yml:
--------------------------------------------------------------------------------
 1 | - title: Get started
 2 |   sections:
 3 |   - local: index
 4 |     title: 🤗 Agents
 5 |   - local: guided_tour
 6 |     title: Guided tour
 7 | - title: Tutorials
 8 |   sections:
 9 |   - local: tutorials/building_good_agents
10 |     title: ✨ Building good agents
11 |   - local: tutorials/inspect_runs
12 |     title: 📊 Inspect your agent runs using telemetry
13 |   - local: tutorials/tools
14 |     title: 🛠️ Tools - in-depth guide
15 |   - local: tutorials/secure_code_execution
16 |     title: 🛡️ Secure code execution
17 |   - local: tutorials/memory
18 |     title: 📚 Manage your agent's memory
19 | - title: Conceptual guides
20 |   sections:
21 |   - local: conceptual_guides/intro_agents
22 |     title: 🤖 An introduction to agentic systems
23 |   - local: conceptual_guides/react
24 |     title: 🤔 How do Multi-step agents work?
25 | - title: Examples
26 |   sections:
27 |   - local: examples/text_to_sql
28 |     title: Self-correcting Text-to-SQL
29 |   - local: examples/rag
30 |     title: Master you knowledge base with agentic RAG
31 |   - local: examples/multiagents
32 |     title: Orchestrate a multi-agent system
33 |   - local: examples/web_browser
34 |     title: Build a web browser agent using vision models
35 | - title: Reference
36 |   sections:
37 |   - local: reference/agents
38 |     title: Agent-related objects
39 |   - local: reference/models
40 |     title: Model-related objects
41 |   - local: reference/tools
42 |     title: Tool-related objects
43 | 


--------------------------------------------------------------------------------
/docs/source/zh/conceptual_guides/react.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2024 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | 
12 | ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
13 | rendered properly in your Markdown viewer.
14 | 
15 | -->
16 | # 多步骤 agent 是如何工作的？
17 | 
18 | ReAct 框架（[Yao et al., 2022](https://huggingface.co/papers/2210.03629)）是目前构建 agent 的主要方法。
19 | 
20 | 该名称基于两个词的组合："Reason" （推理）和 "Act" （行动）。实际上，遵循此架构的 agent 将根据需要尽可能多的步骤来解决其任务，每个步骤包括一个推理步骤，然后是一个行动步骤，在该步骤中，它制定工具调用，使其更接近解决手头的任务。
21 | 
22 | ReAct 过程涉及保留过去步骤的记忆。
23 | 
24 | > [!TIP]
25 | > 阅读 [Open-source LLMs as LangChain Agents](https://huggingface.co/blog/open-source-llms-as-agents) 博客文章以了解更多关于多步 agent 的信息。
26 | 
27 | 以下是其工作原理的视频概述：
28 | 
29 | <div class="flex justify-center">
30 |     <img
31 |         class="block dark:hidden"
32 |         src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/Agent_ManimCE.gif"
33 |     />
34 |     <img
35 |         class="hidden dark:block"
36 |         src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/Agent_ManimCE.gif"
37 |     />
38 | </div>
39 | 
40 | ![ReAct agent 的框架](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/blog/open-source-llms-as-agents/ReAct.png)
41 | 
42 | 我们实现了两个版本的 ToolCallingAgent：
43 | - [`ToolCallingAgent`] 在其输出中生成 JSON 格式的工具调用。
44 | - [`CodeAgent`] 是一种新型的 ToolCallingAgent，它生成代码块形式的工具调用，这对于具有强大编码性能的 LLM 非常有效。
45 | 


--------------------------------------------------------------------------------
/examples/open_deep_research/README.md:
--------------------------------------------------------------------------------
 1 | # Open Deep Research
 2 | 
 3 | Welcome to this open replication of [OpenAI's Deep Research](https://openai.com/index/introducing-deep-research/)! This agent attempts to replicate OpenAI's model and achieve similar performance on research tasks.
 4 | 
 5 | Read more about this implementation's goal and methods in our [blog post](https://huggingface.co/blog/open-deep-research).
 6 | 
 7 | 
 8 | This agent achieves **55% pass@1** on the GAIA validation set, compared to **67%** for the original Deep Research.
 9 | 
10 | ## Setup
11 | 
12 | To get started, follow the steps below:
13 | 
14 | ### Clone the repository
15 | 
16 | ```bash
17 | git clone https://github.com/huggingface/smolagents.git
18 | cd smolagents/examples/open_deep_research
19 | ```
20 | 
21 | ### Install dependencies
22 | 
23 | Run the following command to install the required dependencies from the `requirements.txt` file:
24 | 
25 | ```bash
26 | pip install -r requirements.txt
27 | ```
28 | 
29 | ### Install the development version of `smolagents`
30 | 
31 | ```bash
32 | pip install -e ../../.[dev]
33 | ```
34 | 
35 | ### Set up environment variables
36 | 
37 | The agent uses the `GoogleSearchTool` for web search, which requires an environment variable with the corresponding API key, based on the selected provider:
38 | - `SERPAPI_API_KEY` for SerpApi: [Sign up here to get a key](https://serpapi.com/users/sign_up)
39 | - `SERPER_API_KEY` for Serper: [Sign up here to get a key](https://serper.dev/signup)
40 | 
41 | Depending on the model you want to use, you may need to set environment variables.
42 | For example, to use the default `o1` model, you need to set the `OPENAI_API_KEY` environment variable.
43 | [Sign up here to get a key](https://platform.openai.com/signup).
44 | 
45 | > [!WARNING]
46 | > The use of the default `o1` model is restricted to tier-3 access: https://help.openai.com/en/articles/10362446-api-access-to-o1-and-o3-mini
47 | 
48 | 
49 | ## Usage
50 | 
51 | Then you're good to go! Run the run.py script, as in:
52 | ```bash
53 | python run.py --model-id "o1" "Your question here!"
54 | ```


--------------------------------------------------------------------------------
/docs/source/zh/reference/agents.mdx:
--------------------------------------------------------------------------------
 1 | 
 2 | <!--Copyright 2024 The HuggingFace Team. All rights reserved.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 5 | the License. You may obtain a copy of the License at
 6 | 
 7 | http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
10 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
11 | specific language governing permissions and limitations under the License.
12 | 
13 | ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
14 | rendered properly in your Markdown viewer.
15 | 
16 | -->
17 | # Agents（智能体）
18 | 
19 | <Tip warning={true}>
20 | 
21 | Smolagents 是一个实验性的 API，可能会随时发生变化。由于 API 或底层模型可能发生变化，代理返回的结果也可能有所不同。
22 | 
23 | </Tip>
24 | 
25 | 要了解有关智能体和工具的更多信息，请务必阅读[入门指南](../index)。本页面包含基础类的 API 文档。
26 | 
27 | ## 智能体（Agents）
28 | 
29 | 我们的智能体继承自 [`MultiStepAgent`]，这意味着它们可以执行多步操作，每一步包含一个思考（thought），然后是一个工具调用和执行。请阅读[概念指南](../conceptual_guides/react)以了解更多信息。
30 | 
31 | 我们提供两种类型的代理，它们基于主要的 [`Agent`] 类：
32 |   - [`CodeAgent`] 是默认代理，它以 Python 代码编写工具调用。
33 |   - [`ToolCallingAgent`] 以 JSON 编写工具调用。
34 | 
35 | 两者在初始化时都需要提供参数 `model` 和工具列表 `tools`。
36 | 
37 | ### 智能体类
38 | 
39 | [[autodoc]] MultiStepAgent
40 | 
41 | [[autodoc]] CodeAgent
42 | 
43 | [[autodoc]] ToolCallingAgent
44 | 
45 | ### ManagedAgent
46 | 
47 | _此类自 1.8.0 起已被弃用：现在您只需向普通代理传递 `name` 和 `description` 属性即可使其可被管理代理调用。_
48 | 
49 | ### stream_to_gradio
50 | 
51 | [[autodoc]] stream_to_gradio
52 | 
53 | ### GradioUI
54 | 
55 | > [!TIP]
56 | > 您必须安装 `gradio` 才能使用 UI。如果尚未安装，请运行 `pip install smolagents[gradio]`。
57 | 
58 | [[autodoc]] GradioUI
59 | 
60 | ## 提示（Prompts）
61 | 
62 | [[autodoc]] smolagents.agents.PromptTemplates
63 | 
64 | [[autodoc]] smolagents.agents.PlanningPromptTemplate
65 | 
66 | [[autodoc]] smolagents.agents.ManagedAgentPromptTemplate
67 | 
68 | [[autodoc]] smolagents.agents.FinalAnswerPromptTemplate
69 | 


--------------------------------------------------------------------------------
/tests/test_final_answer.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2024 HuggingFace Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | import numpy as np
18 | import PIL.Image
19 | import pytest
20 | 
21 | from smolagents.agent_types import _AGENT_TYPE_MAPPING
22 | from smolagents.default_tools import FinalAnswerTool
23 | 
24 | from .test_tools import ToolTesterMixin
25 | from .utils.markers import require_torch
26 | 
27 | 
28 | class TestFinalAnswerTool(ToolTesterMixin):
29 |     def setup_method(self):
30 |         self.inputs = {"answer": "Final answer"}
31 |         self.tool = FinalAnswerTool()
32 | 
33 |     def test_exact_match_arg(self):
34 |         result = self.tool("Final answer")
35 |         assert result == "Final answer"
36 | 
37 |     def test_exact_match_kwarg(self):
38 |         result = self.tool(answer=self.inputs["answer"])
39 |         assert result == "Final answer"
40 | 
41 |     @require_torch
42 |     def test_agent_type_output(self, inputs):
43 |         for input_type, input in inputs.items():
44 |             output = self.tool(**input, sanitize_inputs_outputs=True)
45 |             agent_type = _AGENT_TYPE_MAPPING[input_type]
46 |             assert isinstance(output, agent_type)
47 | 
48 |     @pytest.fixture
49 |     def inputs(self, shared_datadir):
50 |         import torch
51 | 
52 |         return {
53 |             "string": {"answer": "Text input"},
54 |             "image": {"answer": PIL.Image.open(shared_datadir / "000000039769.png").resize((512, 512))},
55 |             "audio": {"answer": torch.Tensor(np.ones(3000))},
56 |         }
57 | 


--------------------------------------------------------------------------------
/utils/check_tests_in_ci.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2025-present, the HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Check that all tests are called in CI."""
16 | 
17 | from pathlib import Path
18 | 
19 | 
20 | ROOT = Path(__file__).parent.parent
21 | 
22 | TESTS_FOLDER = ROOT / "tests"
23 | CI_WORKFLOW_FILE = ROOT / ".github" / "workflows" / "tests.yml"
24 | 
25 | 
26 | def check_tests_in_ci():
27 |     """List all test files in `./tests/` and check if they are listed in the CI workflow.
28 | 
29 |     Since each test file is triggered separately in the CI workflow, it is easy to forget a new one when adding new
30 |     tests, hence this check.
31 | 
32 |     NOTE: current implementation is quite naive but should work for now. Must be updated if one want to ignore some
33 |           tests or if file naming is updated (currently only files starting by `test_*` are checked)
34 |     """
35 |     test_files = [
36 |         path.relative_to(TESTS_FOLDER).as_posix()
37 |         for path in TESTS_FOLDER.glob("**/*.py")
38 |         if path.name.startswith("test_")
39 |     ]
40 |     ci_workflow_file_content = CI_WORKFLOW_FILE.read_text()
41 |     missing_test_files = [test_file for test_file in test_files if test_file not in ci_workflow_file_content]
42 |     if missing_test_files:
43 |         print(
44 |             "❌ Some test files seem to be ignored in the CI:\n"
45 |             + "\n".join(f"   - {test_file}" for test_file in missing_test_files)
46 |             + f"\n   Please add them manually in {CI_WORKFLOW_FILE}."
47 |         )
48 |         exit(1)
49 |     else:
50 |         print("✅ All good!")
51 |         exit(0)
52 | 
53 | 
54 | if __name__ == "__main__":
55 |     check_tests_in_ci()
56 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Logging
  2 | logs
  3 | tmp
  4 | wandb
  5 | 
  6 | # Data
  7 | data
  8 | outputs
  9 | data/
 10 | 
 11 | # Apple
 12 | .DS_Store
 13 | 
 14 | # VS Code
 15 | .vscode
 16 | 
 17 | # Byte-compiled / optimized / DLL files
 18 | __pycache__/
 19 | *.py[cod]
 20 | *$py.class
 21 | 
 22 | # C extensions
 23 | *.so
 24 | 
 25 | # Distribution / packaging
 26 | .Python
 27 | build/
 28 | develop-eggs/
 29 | dist/
 30 | downloads/
 31 | eggs/
 32 | .eggs/
 33 | lib/
 34 | lib64/
 35 | parts/
 36 | sdist/
 37 | var/
 38 | wheels/
 39 | share/python-wheels/
 40 | node_modules/
 41 | *.egg-info/
 42 | .installed.cfg
 43 | *.egg
 44 | MANIFEST
 45 | 
 46 | # PyInstaller
 47 | *.manifest
 48 | *.spec
 49 | 
 50 | # Installer logs
 51 | pip-log.txt
 52 | pip-delete-this-directory.txt
 53 | 
 54 | # Unit test / coverage reports
 55 | htmlcov/
 56 | .tox/
 57 | .nox/
 58 | .coverage
 59 | .coverage.*
 60 | .cache
 61 | nosetests.xml
 62 | coverage.xml
 63 | *.cover
 64 | *.py,cover
 65 | .hypothesis/
 66 | .pytest_cache/
 67 | cover/
 68 | uv.lock
 69 | 
 70 | # Translations
 71 | *.mo
 72 | *.pot
 73 | 
 74 | # Sphinx documentation
 75 | docs/_build/
 76 | 
 77 | # PyBuilder
 78 | .pybuilder/
 79 | target/
 80 | 
 81 | # Jupyter Notebook
 82 | .ipynb_checkpoints
 83 | 
 84 | # IPython
 85 | profile_default/
 86 | ipython_config.py
 87 | 
 88 | # pyenv
 89 | # .python-version
 90 | 
 91 | # pipenv
 92 | #Pipfile.lock
 93 | 
 94 | # UV
 95 | #uv.lock
 96 | 
 97 | # poetry
 98 | #poetry.lock
 99 | 
100 | # pdm
101 | .pdm.toml
102 | .pdm-python
103 | .pdm-build/
104 | 
105 | # PEP 582
106 | __pypackages__/
107 | 
108 | # Celery stuff
109 | celerybeat-schedule
110 | celerybeat.pid
111 | 
112 | # SageMath parsed files
113 | *.sage.py
114 | 
115 | # Environments
116 | .env
117 | .venv
118 | env/
119 | venv/
120 | ENV/
121 | env.bak/
122 | venv.bak/
123 | 
124 | 
125 | # mkdocs documentation
126 | /site
127 | 
128 | # mypy
129 | .mypy_cache/
130 | .dmypy.json
131 | dmypy.json
132 | 
133 | # Pyre type checker
134 | .pyre/
135 | 
136 | # pytype static type analyzer
137 | .pytype/
138 | 
139 | # Cython debug symbols
140 | cython_debug/
141 | 
142 | # PyCharm
143 | .idea/
144 | 
145 | # Interpreter
146 | interpreter_workspace/
147 | 
148 | # Archive
149 | archive/
150 | savedir/
151 | output/
152 | tool_output/
153 | 


--------------------------------------------------------------------------------
/examples/rag.py:
--------------------------------------------------------------------------------
 1 | # from huggingface_hub import login
 2 | 
 3 | # login()
 4 | import datasets
 5 | from langchain.docstore.document import Document
 6 | from langchain.text_splitter import RecursiveCharacterTextSplitter
 7 | from langchain_community.retrievers import BM25Retriever
 8 | 
 9 | 
10 | knowledge_base = datasets.load_dataset("m-ric/huggingface_doc", split="train")
11 | knowledge_base = knowledge_base.filter(lambda row: row["source"].startswith("huggingface/transformers"))
12 | 
13 | source_docs = [
14 |     Document(page_content=doc["text"], metadata={"source": doc["source"].split("/")[1]}) for doc in knowledge_base
15 | ]
16 | 
17 | text_splitter = RecursiveCharacterTextSplitter(
18 |     chunk_size=500,
19 |     chunk_overlap=50,
20 |     add_start_index=True,
21 |     strip_whitespace=True,
22 |     separators=["\n\n", "\n", ".", " ", ""],
23 | )
24 | docs_processed = text_splitter.split_documents(source_docs)
25 | 
26 | from smolagents import Tool
27 | 
28 | 
29 | class RetrieverTool(Tool):
30 |     name = "retriever"
31 |     description = "Uses semantic search to retrieve the parts of transformers documentation that could be most relevant to answer your query."
32 |     inputs = {
33 |         "query": {
34 |             "type": "string",
35 |             "description": "The query to perform. This should be semantically close to your target documents. Use the affirmative form rather than a question.",
36 |         }
37 |     }
38 |     output_type = "string"
39 | 
40 |     def __init__(self, docs, **kwargs):
41 |         super().__init__(**kwargs)
42 |         self.retriever = BM25Retriever.from_documents(docs, k=10)
43 | 
44 |     def forward(self, query: str) -> str:
45 |         assert isinstance(query, str), "Your search query must be a string"
46 | 
47 |         docs = self.retriever.invoke(
48 |             query,
49 |         )
50 |         return "\nRetrieved documents:\n" + "".join(
51 |             [f"\n\n===== Document {str(i)} =====\n" + doc.page_content for i, doc in enumerate(docs)]
52 |         )
53 | 
54 | 
55 | from smolagents import CodeAgent, HfApiModel
56 | 
57 | 
58 | retriever_tool = RetrieverTool(docs_processed)
59 | agent = CodeAgent(
60 |     tools=[retriever_tool],
61 |     model=HfApiModel(model_id="meta-llama/Llama-3.3-70B-Instruct"),
62 |     max_steps=4,
63 |     verbosity_level=2,
64 | )
65 | 
66 | agent_output = agent.run("For a transformers model training, which is slower, the forward or the backward pass?")
67 | 
68 | print("Final output:")
69 | print(agent_output)
70 | 


--------------------------------------------------------------------------------
/examples/text_to_sql.py:
--------------------------------------------------------------------------------
 1 | from sqlalchemy import (
 2 |     Column,
 3 |     Float,
 4 |     Integer,
 5 |     MetaData,
 6 |     String,
 7 |     Table,
 8 |     create_engine,
 9 |     insert,
10 |     inspect,
11 |     text,
12 | )
13 | 
14 | 
15 | engine = create_engine("sqlite:///:memory:")
16 | metadata_obj = MetaData()
17 | 
18 | # create city SQL table
19 | table_name = "receipts"
20 | receipts = Table(
21 |     table_name,
22 |     metadata_obj,
23 |     Column("receipt_id", Integer, primary_key=True),
24 |     Column("customer_name", String(16), primary_key=True),
25 |     Column("price", Float),
26 |     Column("tip", Float),
27 | )
28 | metadata_obj.create_all(engine)
29 | 
30 | rows = [
31 |     {"receipt_id": 1, "customer_name": "Alan Payne", "price": 12.06, "tip": 1.20},
32 |     {"receipt_id": 2, "customer_name": "Alex Mason", "price": 23.86, "tip": 0.24},
33 |     {"receipt_id": 3, "customer_name": "Woodrow Wilson", "price": 53.43, "tip": 5.43},
34 |     {"receipt_id": 4, "customer_name": "Margaret James", "price": 21.11, "tip": 1.00},
35 | ]
36 | for row in rows:
37 |     stmt = insert(receipts).values(**row)
38 |     with engine.begin() as connection:
39 |         cursor = connection.execute(stmt)
40 | 
41 | inspector = inspect(engine)
42 | columns_info = [(col["name"], col["type"]) for col in inspector.get_columns("receipts")]
43 | 
44 | table_description = "Columns:\n" + "\n".join([f"  - {name}: {col_type}" for name, col_type in columns_info])
45 | print(table_description)
46 | 
47 | from smolagents import tool
48 | 
49 | 
50 | @tool
51 | def sql_engine(query: str) -> str:
52 |     """
53 |     Allows you to perform SQL queries on the table. Returns a string representation of the result.
54 |     The table is named 'receipts'. Its description is as follows:
55 |         Columns:
56 |         - receipt_id: INTEGER
57 |         - customer_name: VARCHAR(16)
58 |         - price: FLOAT
59 |         - tip: FLOAT
60 | 
61 |     Args:
62 |         query: The query to perform. This should be correct SQL.
63 |     """
64 |     output = ""
65 |     with engine.connect() as con:
66 |         rows = con.execute(text(query))
67 |         for row in rows:
68 |             output += "\n" + str(row)
69 |     return output
70 | 
71 | 
72 | from smolagents import CodeAgent, HfApiModel
73 | 
74 | 
75 | agent = CodeAgent(
76 |     tools=[sql_engine],
77 |     model=HfApiModel(model_id="meta-llama/Meta-Llama-3.1-8B-Instruct"),
78 | )
79 | agent.run("Can you give me the name of the client who got the most expensive receipt?")
80 | 


--------------------------------------------------------------------------------
/tests/fixtures/tools.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | import pytest
 4 | 
 5 | from smolagents.tools import Tool, tool
 6 | 
 7 | 
 8 | @pytest.fixture
 9 | def boolean_default_tool_class():
10 |     class BooleanDefaultTool(Tool):
11 |         name = "boolean_default_tool"
12 |         description = "A tool with a boolean default parameter"
13 |         inputs = {
14 |             "text": {"type": "string", "description": "Input text"},
15 |             "flag": {"type": "boolean", "description": "Boolean flag with default value", "nullable": True},
16 |         }
17 |         output_type = "string"
18 | 
19 |         def forward(self, text: str, flag: bool = False) -> str:
20 |             return f"Text: {text}, Flag: {flag}"
21 | 
22 |     return BooleanDefaultTool()
23 | 
24 | 
25 | @pytest.fixture
26 | def boolean_default_tool_function():
27 |     @tool
28 |     def boolean_default_tool(text: str, flag: bool = False) -> str:
29 |         """
30 |         A tool with a boolean default parameter.
31 | 
32 |         Args:
33 |             text: Input text
34 |             flag: Boolean flag with default value
35 |         """
36 |         return f"Text: {text}, Flag: {flag}"
37 | 
38 |     return boolean_default_tool
39 | 
40 | 
41 | @pytest.fixture
42 | def optional_input_tool_class():
43 |     class OptionalInputTool(Tool):
44 |         name = "optional_input_tool"
45 |         description = "A tool with an optional input parameter"
46 |         inputs = {
47 |             "required_text": {"type": "string", "description": "Required input text"},
48 |             "optional_text": {"type": "string", "description": "Optional input text", "nullable": True},
49 |         }
50 |         output_type = "string"
51 | 
52 |         def forward(self, required_text: str, optional_text: Optional[str] = None) -> str:
53 |             if optional_text:
54 |                 return f"{required_text} + {optional_text}"
55 |             return required_text
56 | 
57 |     return OptionalInputTool()
58 | 
59 | 
60 | @pytest.fixture
61 | def optional_input_tool_function():
62 |     @tool
63 |     def optional_input_tool(required_text: str, optional_text: Optional[str] = None) -> str:
64 |         """
65 |         A tool with an optional input parameter.
66 | 
67 |         Args:
68 |             required_text: Required input text
69 |             optional_text: Optional input text
70 |         """
71 |         if optional_text:
72 |             return f"{required_text} + {optional_text}"
73 |         return required_text
74 | 
75 |     return optional_input_tool
76 | 


--------------------------------------------------------------------------------
/examples/agent_from_any_llm.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | from smolagents import HfApiModel, LiteLLMModel, OpenAIServerModel, TransformersModel, tool
 4 | from smolagents.agents import CodeAgent, ToolCallingAgent
 5 | 
 6 | 
 7 | # Choose which inference type to use!
 8 | 
 9 | available_inferences = ["hf_api", "hf_api_provider", "transformers", "ollama", "litellm", "openai"]
10 | chosen_inference = "hf_api_provider"
11 | 
12 | print(f"Chose model: '{chosen_inference}'")
13 | 
14 | if chosen_inference == "hf_api":
15 |     model = HfApiModel(model_id="meta-llama/Llama-3.3-70B-Instruct")
16 | 
17 | elif chosen_inference == "hf_api_provider":
18 |     model = HfApiModel(provider="together")
19 | 
20 | elif chosen_inference == "transformers":
21 |     model = TransformersModel(model_id="HuggingFaceTB/SmolLM2-1.7B-Instruct", device_map="auto", max_new_tokens=1000)
22 | 
23 | elif chosen_inference == "ollama":
24 |     model = LiteLLMModel(
25 |         model_id="ollama_chat/llama3.2",
26 |         api_base="http://localhost:11434",  # replace with remote open-ai compatible server if necessary
27 |         api_key="your-api-key",  # replace with API key if necessary
28 |         num_ctx=8192,  # ollama default is 2048 which will often fail horribly. 8192 works for easy tasks, more is better. Check https://huggingface.co/spaces/NyxKrage/LLM-Model-VRAM-Calculator to calculate how much VRAM this will need for the selected model.
29 |     )
30 | 
31 | elif chosen_inference == "litellm":
32 |     # For anthropic: change model_id below to 'anthropic/claude-3-5-sonnet-latest'
33 |     model = LiteLLMModel(model_id="gpt-4o")
34 | 
35 | elif chosen_inference == "openai":
36 |     # For anthropic: change model_id below to 'anthropic/claude-3-5-sonnet-latest'
37 |     model = OpenAIServerModel(model_id="gpt-4o")
38 | 
39 | 
40 | @tool
41 | def get_weather(location: str, celsius: Optional[bool] = False) -> str:
42 |     """
43 |     Get weather in the next days at given location.
44 |     Secretly this tool does not care about the location, it hates the weather everywhere.
45 | 
46 |     Args:
47 |         location: the location
48 |         celsius: the temperature
49 |     """
50 |     return "The weather is UNGODLY with torrential rains and temperatures below -10°C"
51 | 
52 | 
53 | agent = ToolCallingAgent(tools=[get_weather], model=model, verbosity_level=2)
54 | 
55 | print("ToolCallingAgent:", agent.run("What's the weather like in Paris?"))
56 | 
57 | agent = CodeAgent(tools=[get_weather], model=model, verbosity_level=2)
58 | 
59 | print("CodeAgent:", agent.run("What's the weather like in Paris?"))
60 | 


--------------------------------------------------------------------------------
/docs/source/hi/conceptual_guides/react.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2024 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | 
12 | ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
13 | rendered properly in your Markdown viewer.
14 | 
15 | -->
16 | # मल्टी-स्टेप एजेंट्स कैसे काम करते हैं?
17 | 
18 | ReAct फ्रेमवर्क ([Yao et al., 2022](https://huggingface.co/papers/2210.03629)) वर्तमान में एजेंट्स बनाने का मुख्य दृष्टिकोण है।
19 | 
20 | नाम दो शब्दों, "Reason" (तर्क) और "Act" (क्रिया) के संयोजन पर आधारित है। वास्तव में, इस आर्किटेक्चर का पालन करने वाले एजेंट अपने कार्य को उतने चरणों में हल करेंगे जितने आवश्यक हों, प्रत्येक चरण में एक Reasoning कदम होगा, फिर एक Action कदम होगा, जहाँ यह टूल कॉल्स तैयार करेगा जो उसे कार्य को हल करने के करीब ले जाएंगे।
21 | 
22 | ReAct प्रक्रिया में पिछले चरणों की मेमोरी रखना शामिल है।
23 | 
24 | > [!TIP]
25 | > मल्टी-स्टेप एजेंट्स के बारे में अधिक जानने के लिए [Open-source LLMs as LangChain Agents](https://huggingface.co/blog/open-source-llms-as-agents) ब्लॉग पोस्ट पढ़ें।
26 | 
27 | यहाँ एक वीडियो ओवरव्यू है कि यह कैसे काम करता है:
28 | 
29 | <div class="flex justify-center">
30 |     <img
31 |         class="block dark:hidden"
32 |         src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/Agent_ManimCE.gif"
33 |     />
34 |     <img
35 |         class="hidden dark:block"
36 |         src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/Agent_ManimCE.gif"
37 |     />
38 | </div>
39 | 
40 | ![ReAct एजेंट का फ्रेमवर्क](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/blog/open-source-llms-as-agents/ReAct.png)
41 | 
42 | हम दो प्रकार के ToolCallingAgent को लागू करते हैं:
43 | - [`ToolCallingAgent`] अपने आउटपुट में टूल कॉल को JSON के रूप में जनरेट करता है।
44 | - [`CodeAgent`] ToolCallingAgent का एक नया प्रकार है जो अपने टूल कॉल को कोड के ब्लॉब्स के रूप में जनरेट करता है, जो उन LLM के लिए वास्तव में अच्छी तरह काम करता है जिनका कोडिंग प्रदर्शन मजबूत है।
45 | 


--------------------------------------------------------------------------------
/docs/source/zh/reference/tools.mdx:
--------------------------------------------------------------------------------
  1 | 
  2 | <!--Copyright 2024 The HuggingFace Team. All rights reserved.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
  5 | the License. You may obtain a copy of the License at
  6 | 
  7 | http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 10 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 11 | specific language governing permissions and limitations under the License.
 12 | 
 13 | ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
 14 | rendered properly in your Markdown viewer.
 15 | 
 16 | -->
 17 | # 工具
 18 | 
 19 | <Tip warning={true}>
 20 | 
 21 | Smolagents 是一个实验性 API，可能会随时更改。由于 API 或底层模型可能发生变化，代理返回的结果可能会有所不同。
 22 | 
 23 | </Tip>
 24 | 
 25 | 要了解更多关于智能体和工具的信息，请务必阅读[入门指南](../index)。本页面包含底层类的 API 文档。
 26 | 
 27 | ## 工具
 28 | 
 29 | ### load_tool
 30 | 
 31 | [[autodoc]] load_tool
 32 | 
 33 | ### tool
 34 | 
 35 | [[autodoc]] tool
 36 | 
 37 | ### Tool
 38 | 
 39 | [[autodoc]] Tool
 40 | 
 41 | ### launch_gradio_demo
 42 | 
 43 | [[autodoc]] launch_gradio_demo
 44 | 
 45 | ## 默认工具
 46 | 
 47 | ### PythonInterpreterTool
 48 | 
 49 | [[autodoc]] PythonInterpreterTool
 50 | 
 51 | ### FinalAnswerTool
 52 | 
 53 | [[autodoc]] FinalAnswerTool
 54 | 
 55 | ### UserInputTool
 56 | 
 57 | [[autodoc]] UserInputTool
 58 | 
 59 | ### DuckDuckGoSearchTool
 60 | 
 61 | [[autodoc]] DuckDuckGoSearchTool
 62 | 
 63 | ### GoogleSearchTool
 64 | 
 65 | [[autodoc]] GoogleSearchTool
 66 | 
 67 | ### VisitWebpageTool
 68 | 
 69 | [[autodoc]] VisitWebpageTool
 70 | 
 71 | ### SpeechToTextTool
 72 | 
 73 | [[autodoc]] SpeechToTextTool
 74 | 
 75 | ## 工具集合
 76 | 
 77 | [[autodoc]] ToolCollection
 78 | 
 79 | ## 智能体类型
 80 | 
 81 | 智能体可以处理工具之间的任何类型的对象；工具是完全多模态的，可以接受和返回文本、图像、音频、视频以及其他类型的对象。为了增加工具之间的兼容性，以及正确呈现在 ipython（jupyter、colab、ipython notebooks 等）中的返回结果，我们为这些类型实现了包装类。
 82 | 
 83 | 被包装的对象应该继续保持其初始行为；例如，一个文本对象应继续表现为字符串，一个图像对象应继续表现为 `PIL.Image`。
 84 | 
 85 | 这些类型有三个特定的用途：
 86 | 
 87 | - 调用 `to_raw` 方法时，应返回底层对象
 88 | - 调用 `to_string` 方法时，应将对象转换为字符串：对于 `AgentText` 类型，可以直接返回字符串；对于其他实例，则返回对象序列化版本的路径
 89 | - 在 ipython 内核中显示时，应正确显示对象
 90 | 
 91 | ### AgentText
 92 | 
 93 | [[autodoc]] smolagents.agent_types.AgentText
 94 | 
 95 | ### AgentImage
 96 | 
 97 | [[autodoc]] smolagents.agent_types.AgentImage
 98 | 
 99 | ### AgentAudio
100 | 
101 | [[autodoc]] smolagents.agent_types.AgentAudio
102 | 


--------------------------------------------------------------------------------
/docs/source/en/reference/agents.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2024 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | 
12 | ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
13 | rendered properly in your Markdown viewer.
14 | 
15 | -->
16 | # Agents
17 | 
18 | <Tip warning={true}>
19 | 
20 | Smolagents is an experimental API which is subject to change at any time. Results returned by the agents
21 | can vary as the APIs or underlying models are prone to change.
22 | 
23 | </Tip>
24 | 
25 | To learn more about agents and tools make sure to read the [introductory guide](../index). This page
26 | contains the API docs for the underlying classes.
27 | 
28 | ## Agents
29 | 
30 | Our agents inherit from [`MultiStepAgent`], which means they can act in multiple steps, each step consisting of one thought, then one tool call and execution. Read more in [this conceptual guide](../conceptual_guides/react).
31 | 
32 | We provide two types of agents, based on the main [`Agent`] class.
33 |   - [`CodeAgent`] is the default agent, it writes its tool calls in Python code.
34 |   - [`ToolCallingAgent`] writes its tool calls in JSON.
35 | 
36 | Both require arguments `model` and list of tools `tools` at initialization.
37 | 
38 | ### Classes of agents
39 | 
40 | [[autodoc]] MultiStepAgent
41 | 
42 | [[autodoc]] CodeAgent
43 | 
44 | [[autodoc]] ToolCallingAgent
45 | 
46 | ### ManagedAgent
47 | 
48 | _This class is deprecated since 1.8.0: now you simply need to pass attributes `name` and `description` to a normal agent to make it callable by a manager agent._
49 | 
50 | ### stream_to_gradio
51 | 
52 | [[autodoc]] stream_to_gradio
53 | 
54 | ### GradioUI
55 | 
56 | > [!TIP]
57 | > You must have `gradio` installed to use the UI. Please run `pip install smolagents[gradio]` if it's not the case.
58 | 
59 | [[autodoc]] GradioUI
60 | 
61 | ## Prompts
62 | 
63 | [[autodoc]] smolagents.agents.PromptTemplates
64 | 
65 | [[autodoc]] smolagents.agents.PlanningPromptTemplate
66 | 
67 | [[autodoc]] smolagents.agents.ManagedAgentPromptTemplate
68 | 
69 | [[autodoc]] smolagents.agents.FinalAnswerPromptTemplate
70 | 


--------------------------------------------------------------------------------
/docs/source/zh/index.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2024 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | 
12 | ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
13 | rendered properly in your Markdown viewer.
14 | -->
15 | 
16 | # `smolagents`
17 | 
18 | 这是构建强大 agent 的最简单框架！顺便问一下，什么是 "agent"？我们在[此页面](conceptual_guides/intro_agents)提供了我们的定义，您还可以找到关于何时使用或不使用它们的建议（剧透：通常不使用 agent 会更好）。
19 | 
20 | > [!TIP]
21 | > 译者注：Agent 的业内术语是“智能体”。本译文将保留 agent，不作翻译，以带来更高效的阅读体验。(在中文为主的文章中，It's easier to 注意到英文。Attention Is All You Need!)
22 | 
23 | 本库提供：
24 | 
25 | ✨ **简洁性**：Agent 逻辑仅需约千行代码。我们将抽象保持在原始代码之上的最小形态！
26 | 
27 | 🌐 **支持任何 LLM**：支持通过 Hub 托管的模型，使用其 `transformers` 版本或通过我们的推理 API 加载，也支持 OpenAI、Anthropic 等模型。使用任何 LLM 为 agent 提供动力都非常容易。
28 | 
29 | 🧑‍💻 **一流的代码 agent 支持**，即编写代码作为其操作的 agent（与"用于编写代码的 agent"相对），[在此了解更多](tutorials/secure_code_execution)。
30 | 
31 | 🤗 **Hub 集成**：您可以在 Hub 上共享和加载工具，更多功能即将推出！
32 | 
33 | <div class="mt-10">
34 |   <div class="w-full flex flex-col space-y-4 md:space-y-0 md:grid md:grid-cols-2 md:gap-y-4 md:gap-x-5">
35 |     <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./guided_tour"
36 |       ><div class="w-full text-center bg-gradient-to-br from-blue-400 to-blue-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">导览</div>
37 |       <p class="text-gray-700">学习基础知识并熟悉使用 agent。如果您是第一次使用 agent，请从这里开始！</p>
38 |     </a>
39 |     <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./examples/text_to_sql"
40 |       ><div class="w-full text-center bg-gradient-to-br from-indigo-400 to-indigo-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">操作指南</div>
41 |       <p class="text-gray-700">实用指南，帮助您实现特定目标：创建一个生成和测试 SQL 查询的 agent！</p>
42 |     </a>
43 |     <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./conceptual_guides/intro_agents"
44 |       ><div class="w-full text-center bg-gradient-to-br from-pink-400 to-pink-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">概念指南</div>
45 |       <p class="text-gray-700">高级解释，帮助您更好地理解重要主题。</p>
46 |    </a>
47 |     <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./tutorials/building_good_agents"
48 |       ><div class="w-full text-center bg-gradient-to-br from-purple-400 to-purple-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">教程</div>
49 |       <p class="text-gray-700">涵盖构建 agent 重要方面的横向教程。</p>
50 |     </a>
51 |   </div>
52 | </div>
53 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [build-system]
  2 | requires = ["setuptools"]
  3 | build-backend = "setuptools.build_meta"
  4 | 
  5 | [project]
  6 | name = "smolagents"
  7 | version = "1.14.0.dev0"
  8 | description = "🤗 smolagents: a barebones library for agents. Agents write python code to call tools or orchestrate other agents."
  9 | authors = [
 10 |   { name="Aymeric Roucher", email="aymeric@hf.co" },
 11 | ]
 12 | readme = "README.md"
 13 | requires-python = ">=3.10"
 14 | dependencies = [
 15 |   "huggingface-hub>=0.28.0",
 16 |   "requests>=2.32.3",
 17 |   "rich>=13.9.4",
 18 |   "jinja2>=3.1.4",
 19 |   "pillow>=11.0.0",
 20 |   "markdownify>=0.14.1",
 21 |   "duckduckgo-search>=6.3.7",
 22 |   "python-dotenv"
 23 | ]
 24 | 
 25 | [project.optional-dependencies]
 26 | bedrock = [
 27 |   "boto3>=1.36.18"
 28 | ]
 29 | torch = [
 30 |   "torch",
 31 |   "torchvision",
 32 |   "numpy>=1.21.2",
 33 | ]
 34 | audio = [
 35 |   "soundfile",
 36 |   "smolagents[torch]",
 37 | ]
 38 | docker = [
 39 |   "docker>=7.1.0",
 40 |   "websocket-client",
 41 | ]
 42 | e2b = [
 43 |   "e2b-code-interpreter>=1.0.3",
 44 |   "python-dotenv>=1.0.1",
 45 | ]
 46 | gradio = [
 47 |   "gradio>=5.13.2",
 48 | ]
 49 | litellm = [
 50 |   "litellm>=1.60.2",
 51 | ]
 52 | mcp = [
 53 |   "mcpadapt>=0.0.15",
 54 |   "mcp",
 55 | ]
 56 | mlx-lm = [
 57 |   "mlx-lm"
 58 | ]
 59 | openai = [
 60 |   "openai>=1.58.1"
 61 | ]
 62 | telemetry = [
 63 |   "arize-phoenix",
 64 |   "opentelemetry-sdk", 
 65 |   "opentelemetry-exporter-otlp",
 66 |   "openinference-instrumentation-smolagents>=0.1.4"
 67 | ]
 68 | transformers = [
 69 |   "accelerate",
 70 |   "transformers>=4.0.0",
 71 |   "smolagents[torch]",
 72 | ]
 73 | vision = [
 74 |   "helium",
 75 |   "selenium",
 76 | ]
 77 | vllm = [
 78 |   "vllm",
 79 |   "torch"
 80 | ]
 81 | all = [
 82 |   "smolagents[audio,docker,e2b,gradio,litellm,mcp,mlx-lm,openai,telemetry,transformers,vision,bedrock]",
 83 | ]
 84 | quality = [
 85 |   "ruff>=0.9.0",
 86 | ]
 87 | test = [
 88 |   "ipython>=8.31.0", # for interactive environment tests
 89 |   "pandas>=2.2.3",
 90 |   "pytest>=8.1.0",
 91 |   "pytest-datadir",
 92 |   "python-dotenv>=1.0.1", # For test_all_docs
 93 |   "smolagents[all]",
 94 |   "rank-bm25", # For test_all_docs
 95 |   "Wikipedia-API>=0.8.1",
 96 | ]
 97 | dev = [
 98 |   "smolagents[quality,test]",
 99 |   "sqlalchemy", # for ./examples
100 | ]
101 | 
102 | [tool.pytest.ini_options]
103 | # Add the specified `OPTS` to the set of command line arguments as if they had been specified by the user.
104 | addopts = "-sv --durations=0"
105 | 
106 | [tool.ruff]
107 | line-length = 119
108 | lint.ignore = [
109 |   "F403", # undefined-local-with-import-star
110 |   "E501", # line-too-long
111 | ]
112 | lint.select = ["E", "F", "I", "W"]
113 | 
114 | [tool.ruff.lint.per-file-ignores]
115 | "examples/*" = [
116 |   "E402", # module-import-not-at-top-of-file
117 | ]
118 | 
119 | [tool.ruff.lint.isort]
120 | known-first-party = ["smolagents"]
121 | lines-after-imports = 2
122 | 
123 | [tool.setuptools.package-data]
124 | "smolagents.prompts" = ["*.yaml"]
125 | 
126 | [project.scripts]
127 | smolagent = "smolagents.cli:main"
128 | webagent = "smolagents.vision_web_browser:main"
129 | 


--------------------------------------------------------------------------------
/docs/source/hi/reference/tools.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2024 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | 
12 | ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
13 | rendered properly in your Markdown viewer.
14 | 
15 | -->
16 | # Tools
17 | 
18 | <Tip warning={true}>
19 | 
20 | Smolagents एक experimental API है जो किसी भी समय बदल सकता है। एजेंट्स द्वारा लौटाए गए परिणाम भिन्न हो सकते हैं क्योंकि APIs या underlying मॉडल बदलने की संभावना रखते हैं।
21 | 
22 | </Tip>
23 | 
24 | एजेंट्स और टूल्स के बारे में अधिक जानने के लिए [introductory guide](../index) पढ़ना सुनिश्चित करें। 
25 | यह पेज underlying क्लासेज के लिए API docs को शामिल करता है।
26 | 
27 | ## Tools
28 | 
29 | ### load_tool
30 | 
31 | [[autodoc]] load_tool
32 | 
33 | ### tool
34 | 
35 | [[autodoc]] tool
36 | 
37 | ### Tool
38 | 
39 | [[autodoc]] Tool
40 | 
41 | ### launch_gradio_demo
42 | 
43 | [[autodoc]] launch_gradio_demo
44 | 
45 | ## Default Tools
46 | 
47 | ### PythonInterpreterTool
48 | 
49 | [[autodoc]] PythonInterpreterTool
50 | 
51 | ### DuckDuckGoSearchTool
52 | 
53 | [[autodoc]] DuckDuckGoSearchTool
54 | 
55 | ### VisitWebpageTool
56 | 
57 | [[autodoc]] VisitWebpageTool
58 | 
59 | ### UserInputTool
60 | 
61 | [[autodoc]] UserInputTool
62 | 
63 | ## ToolCollection
64 | 
65 | [[autodoc]] ToolCollection
66 | 
67 | ## Agent टाइप्स
68 | 
69 | एजेंट्स टूल्स के बीच किसी भी प्रकार की ऑब्जेक्ट को संभाल सकते हैं; टूल्स, पूरी तरह से मल्टीमोडल होने के कारण, टेक्स्ट, इमेज, ऑडियो, वीडियो सहित अन्य प्रकारों को स्वीकार और रिटर्न कर सकते हैं। 
70 | टूल्स के बीच अनुकूलता बढ़ाने के साथ-साथ इन रिटर्न्स को ipython (jupyter, colab, ipython notebooks, ...) में सही ढंग से रेंडर करने के लिए, हम इन टाइप्स के आसपास रैपर क्लासेज को लागू करते हैं।
71 | 
72 | रैप किए गए ऑब्जेक्ट्स को प्रारंभ में जैसा व्यवहार करना चाहिए वैसा ही करना जारी रखना चाहिए; एक टेक्स्ट ऑब्जेक्ट को अभी भी स्ट्रिंग की तरह व्यवहार करना चाहिए|
73 | एक इमेज ऑब्जेक्ट को अभी भी `PIL.Image` की तरह व्यवहार करना चाहिए।
74 | 
75 | इन टाइप्स के तीन विशिष्ट उद्देश्य हैं:
76 | 
77 | - टाइप पर `to_raw` को कॉल करने से अंतर्निहित ऑब्जेक्ट रिटर्न होना चाहिए
78 | - टाइप पर `to_string` को कॉल करने से ऑब्जेक्ट को स्ट्रिंग के रूप में रिटर्न होना चाहिए: वह `AgentText` के मामले में स्ट्रिंग हो सकती है लेकिन अन्य उदाहरणों में ऑब्जेक्ट के सीरियलाइज्ड वर्जन का पाथ होगा
79 | - इसे एक ipython kernel में प्रदर्शित करने पर ऑब्जेक्ट को सही ढंग से प्रदर्शित करना चाहिए
80 | 
81 | ### AgentText
82 | 
83 | [[autodoc]] smolagents.agent_types.AgentText
84 | 
85 | ### AgentImage
86 | 
87 | [[autodoc]] smolagents.agent_types.AgentImage
88 | 
89 | ### AgentAudio
90 | 
91 | [[autodoc]] smolagents.agent_types.AgentAudio
92 | 


--------------------------------------------------------------------------------
/docs/source/zh/tutorials/secure_code_execution.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2024 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | 
12 | ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
13 | rendered properly in your Markdown viewer.
14 | 
15 | -->
16 | # 安全代码执行
17 | 
18 | [[open-in-colab]]
19 | 
20 | > [!TIP]
21 | > 如果你是第一次构建 agent，请先阅读 [agent 介绍](../conceptual_guides/intro_agents) 和 [smolagents 导览](../guided_tour)。
22 | 
23 | ### 代码智能体
24 | 
25 | [多项](https://huggingface.co/papers/2402.01030) [研究](https://huggingface.co/papers/2411.01747) [表明](https://huggingface.co/papers/2401.00812)，让大语言模型用代码编写其动作（工具调用）比当前标准的工具调用格式要好得多，目前行业标准是 "将动作写成包含工具名称和参数的 JSON" 的各种变体。
26 | 
27 | 为什么代码更好？因为我们专门为计算机执行的动作而设计编程语言。如果 JSON 片段是更好的方式，那么这个工具包就应该是用 JSON 片段编写的，魔鬼就会嘲笑我们。
28 | 
29 | 代码就是表达计算机动作的更好方式。它具有更好的：
30 | - **组合性**：你能像定义 Python 函数那样，在 JSON 动作中嵌套其他 JSON 动作，或者定义一组 JSON 动作以便以后重用吗？
31 | - **对象管理**：你如何在 JSON 中存储像 `generate_image` 这样的动作的输出？
32 | - **通用性**：代码是为了简单地表达任何可以让计算机做的事情而构建的。
33 | - **在 LLM 训练语料库中的表示**：天赐良机，为什么不利用已经包含在 LLM 训练语料库中的大量高质量动作呢？
34 | 
35 | 下图展示了这一点，取自 [可执行代码动作引出更好的 LLM 智能体](https://huggingface.co/papers/2402.01030)。
36 | 
37 | <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/code_vs_json_actions.png">
38 | 
39 | 这就是为什么我们强调提出代码智能体，在本例中是 Python 智能体，这意味着我们要在构建安全的 Python 解释器上投入更多精力。
40 | 
41 | ### 本地 Python 解释器
42 | 
43 | 默认情况下，`CodeAgent` 会在你的环境中运行 LLM 生成的代码。
44 | 这个执行不是由普通的 Python 解释器完成的：我们从零开始重新构建了一个更安全的 `LocalPythonExecutor`。
45 | 这个解释器通过以下方式设计以确保安全：
46 |   - 将导入限制为用户显式传递的列表
47 |   - 限制操作次数以防止无限循环和资源膨胀
48 |   - 不会执行任何未预定义的操作
49 | 
50 | 我们已经在许多用例中使用了这个解释器，从未观察到对环境造成任何损害。
51 | 
52 | 然而，这个解决方案并不是万无一失的：可以想象，如果 LLM 被微调用于恶意操作，仍然可能损害你的环境。例如，如果你允许像 `Pillow` 这样无害的包处理图像，LLM 可能会生成数千张图像保存以膨胀你的硬盘。
53 | 如果你自己选择了 LLM 引擎，这当然不太可能，但它可能会发生。
54 | 
55 | 所以如果你想格外谨慎，可以使用下面描述的远程代码执行选项。
56 | 
57 | ### E2B 代码执行器
58 | 
59 | 为了最大程度的安全性，你可以使用我们与 E2B 的集成在沙盒环境中运行代码。这是一个远程执行服务，可以在隔离的容器中运行你的代码，使代码无法影响你的本地环境。
60 | 
61 | 为此，你需要设置你的 E2B 账户并在环境变量中设置 `E2B_API_KEY`。请前往 [E2B 快速入门文档](https://e2b.dev/docs/quickstart) 了解更多信息。
62 | 
63 | 然后你可以通过 `pip install e2b-code-interpreter python-dotenv` 安装它。
64 | 
65 | 现在你已经准备好了！
66 | 
67 | 要将代码执行器设置为 E2B，只需在初始化 `CodeAgent` 时传递标志 `executor_type="e2b"`。
68 | 请注意，你应该将所有工具的依赖项添加到 `additional_authorized_imports` 中，以便执行器安装它们。
69 | 
70 | ```py
71 | from smolagents import CodeAgent, VisitWebpageTool, HfApiModel
72 | agent = CodeAgent(
73 |     tools = [VisitWebpageTool()],
74 |     model=HfApiModel(),
75 |     additional_authorized_imports=["requests", "markdownify"],
76 |     executor_type="e2b"
77 | )
78 | 
79 | agent.run("What was Abraham Lincoln's preferred pet?")
80 | ```
81 | 
82 | 目前 E2B 代码执行暂不兼容多 agent——因为把 agent 调用放在应该在远程执行的代码块里，是非常混乱的。但我们正在努力做到这件事！
83 | 


--------------------------------------------------------------------------------
/docs/source/en/reference/tools.mdx:
--------------------------------------------------------------------------------
  1 | <!--Copyright 2024 The HuggingFace Team. All rights reserved.
  2 | 
  3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
  4 | the License. You may obtain a copy of the License at
  5 | 
  6 | http://www.apache.org/licenses/LICENSE-2.0
  7 | 
  8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 10 | specific language governing permissions and limitations under the License.
 11 | 
 12 | ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
 13 | rendered properly in your Markdown viewer.
 14 | 
 15 | -->
 16 | # Tools
 17 | 
 18 | <Tip warning={true}>
 19 | 
 20 | Smolagents is an experimental API which is subject to change at any time. Results returned by the agents
 21 | can vary as the APIs or underlying models are prone to change.
 22 | 
 23 | </Tip>
 24 | 
 25 | To learn more about agents and tools make sure to read the [introductory guide](../index). This page
 26 | contains the API docs for the underlying classes.
 27 | 
 28 | ## Tools
 29 | 
 30 | ### load_tool
 31 | 
 32 | [[autodoc]] load_tool
 33 | 
 34 | ### tool
 35 | 
 36 | [[autodoc]] tool
 37 | 
 38 | ### Tool
 39 | 
 40 | [[autodoc]] Tool
 41 | 
 42 | ### launch_gradio_demo
 43 | 
 44 | [[autodoc]] launch_gradio_demo
 45 | 
 46 | ## Default tools
 47 | 
 48 | ### PythonInterpreterTool
 49 | 
 50 | [[autodoc]] PythonInterpreterTool
 51 | 
 52 | ### FinalAnswerTool
 53 | 
 54 | [[autodoc]] FinalAnswerTool
 55 | 
 56 | ### UserInputTool
 57 | 
 58 | [[autodoc]] UserInputTool
 59 | 
 60 | ### DuckDuckGoSearchTool
 61 | 
 62 | [[autodoc]] DuckDuckGoSearchTool
 63 | 
 64 | ### GoogleSearchTool
 65 | 
 66 | [[autodoc]] GoogleSearchTool
 67 | 
 68 | ### VisitWebpageTool
 69 | 
 70 | [[autodoc]] VisitWebpageTool
 71 | 
 72 | ### SpeechToTextTool
 73 | 
 74 | [[autodoc]] SpeechToTextTool
 75 | 
 76 | ## ToolCollection
 77 | 
 78 | [[autodoc]] ToolCollection
 79 | 
 80 | ## Agent Types
 81 | 
 82 | Agents can handle any type of object in-between tools; tools, being completely multimodal, can accept and return
 83 | text, image, audio, video, among other types. In order to increase compatibility between tools, as well as to 
 84 | correctly render these returns in ipython (jupyter, colab, ipython notebooks, ...), we implement wrapper classes
 85 | around these types.
 86 | 
 87 | The wrapped objects should continue behaving as initially; a text object should still behave as a string, an image
 88 | object should still behave as a `PIL.Image`.
 89 | 
 90 | These types have three specific purposes:
 91 | 
 92 | - Calling `to_raw` on the type should return the underlying object
 93 | - Calling `to_string` on the type should return the object as a string: that can be the string in case of an `AgentText`
 94 |   but will be the path of the serialized version of the object in other instances
 95 | - Displaying it in an ipython kernel should display the object correctly
 96 | 
 97 | ### AgentText
 98 | 
 99 | [[autodoc]] smolagents.agent_types.AgentText
100 | 
101 | ### AgentImage
102 | 
103 | [[autodoc]] smolagents.agent_types.AgentImage
104 | 
105 | ### AgentAudio
106 | 
107 | [[autodoc]] smolagents.agent_types.AgentAudio
108 | 


--------------------------------------------------------------------------------
/docs/source/en/conceptual_guides/react.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2024 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | 
12 | ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
13 | rendered properly in your Markdown viewer.
14 | 
15 | -->
16 | # How do multi-step agents work?
17 | 
18 | The ReAct framework ([Yao et al., 2022](https://huggingface.co/papers/2210.03629)) is currently the main approach to building agents.
19 | 
20 | The name is based on the concatenation of two words, "Reason" and "Act." Indeed, agents following this architecture will solve their task in as many steps as needed, each step consisting of a Reasoning step, then an Action step where it formulates tool calls that will bring it closer to solving the task at hand.
21 | 
22 | All agents in `smolagents` are based on singular `MultiStepAgent` class, which is an abstraction of ReAct framework.
23 | 
24 | On a basic level, this class performs actions on a cycle of following steps, where existing variables and knowledge is incorporated into the agent logs like below: 
25 | 
26 | Initialization: the system prompt is stored in a `SystemPromptStep`, and the user query is logged into a `TaskStep` .
27 | 
28 | While loop (ReAct loop):
29 | 
30 | - Use `agent.write_memory_to_messages()` to write the agent logs into a list of LLM-readable [chat messages](https://huggingface.co/docs/transformers/en/chat_templating).
31 | - Send these messages to a `Model` object to get its completion. Parse the completion to get the action (a JSON blob for `ToolCallingAgent`, a code snippet for `CodeAgent`).
32 | - Execute the action and logs result into memory (an `ActionStep`).
33 | - At the end of each step, we run all callback functions defined in `agent.step_callbacks` .
34 | 
35 | Optionally, when planning is activated, a plan can be periodically revised and stored in a `PlanningStep` . This includes feeding facts about the task at hand to the memory.
36 | 
37 | For a `CodeAgent`, it looks like the figure below.
38 | 
39 | <div class="flex justify-center">
40 |     <img
41 |         src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolagents/codeagent_docs.png"
42 |     />
43 | </div>
44 | 
45 | Here is a video overview of how that works:
46 | 
47 | <div class="flex justify-center">
48 |     <img
49 |         class="block dark:hidden"
50 |         src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/Agent_ManimCE.gif"
51 |     />
52 |     <img
53 |         class="hidden dark:block"
54 |         src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/Agent_ManimCE.gif"
55 |     />
56 | </div>
57 | 
58 | We implement two versions of agents: 
59 | - [`CodeAgent`] is the preferred type of agent: it generates its tool calls as blobs of code.
60 | - [`ToolCallingAgent`] generates tool calls as a JSON in its output, as is commonly done in agentic frameworks. We incorporate this option because it can be useful in some narrow cases where you can do fine with only one tool call per step: for instance, for web browsing, you need to wait after each action on the page to monitor how the page changes.
61 | 
62 | > [!TIP]
63 | > Read [Open-source LLMs as LangChain Agents](https://huggingface.co/blog/open-source-llms-as-agents) blog post to learn more about multi-step agents.
64 | 


--------------------------------------------------------------------------------
/tests/fixtures/agents.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | 
 4 | AGENT_DICTS = {
 5 |     "v1.9": {
 6 |         "tools": [],
 7 |         "model": {
 8 |             "class": "HfApiModel",
 9 |             "data": {
10 |                 "last_input_token_count": None,
11 |                 "last_output_token_count": None,
12 |                 "model_id": "Qwen/Qwen2.5-Coder-32B-Instruct",
13 |                 "provider": None,
14 |             },
15 |         },
16 |         "managed_agents": {},
17 |         "prompt_templates": {
18 |             "system_prompt": "dummy system prompt",
19 |             "planning": {
20 |                 "initial_facts": "dummy planning initial facts",
21 |                 "initial_plan": "dummy planning initial plan",
22 |                 "update_facts_pre_messages": "dummy planning update facts pre messages",
23 |                 "update_facts_post_messages": "dummy planning update facts post messages",
24 |                 "update_plan_pre_messages": "dummy planning update plan pre messages",
25 |                 "update_plan_post_messages": "dummy planning update plan post messages",
26 |             },
27 |             "managed_agent": {
28 |                 "task": "dummy managed agent task",
29 |                 "report": "dummy managed agent report",
30 |             },
31 |             "final_answer": {
32 |                 "pre_messages": "dummy final answer pre messages",
33 |                 "post_messages": "dummy final answer post messages",
34 |             },
35 |         },
36 |         "max_steps": 10,
37 |         "verbosity_level": 2,
38 |         "grammar": None,
39 |         "planning_interval": 2,
40 |         "name": "test_agent",
41 |         "description": "dummy description",
42 |         "requirements": ["smolagents"],
43 |         "authorized_imports": ["pandas"],
44 |     },
45 |     # Added: executor_type, executor_kwargs, max_print_outputs_length
46 |     "v1.10": {
47 |         "tools": [],
48 |         "model": {
49 |             "class": "HfApiModel",
50 |             "data": {
51 |                 "last_input_token_count": None,
52 |                 "last_output_token_count": None,
53 |                 "model_id": "Qwen/Qwen2.5-Coder-32B-Instruct",
54 |                 "provider": None,
55 |             },
56 |         },
57 |         "managed_agents": {},
58 |         "prompt_templates": {
59 |             "system_prompt": "dummy system prompt",
60 |             "planning": {
61 |                 "initial_facts": "dummy planning initial facts",
62 |                 "initial_plan": "dummy planning initial plan",
63 |                 "update_facts_pre_messages": "dummy planning update facts pre messages",
64 |                 "update_facts_post_messages": "dummy planning update facts post messages",
65 |                 "update_plan_pre_messages": "dummy planning update plan pre messages",
66 |                 "update_plan_post_messages": "dummy planning update plan post messages",
67 |             },
68 |             "managed_agent": {
69 |                 "task": "dummy managed agent task",
70 |                 "report": "dummy managed agent report",
71 |             },
72 |             "final_answer": {
73 |                 "pre_messages": "dummy final answer pre messages",
74 |                 "post_messages": "dummy final answer post messages",
75 |             },
76 |         },
77 |         "max_steps": 10,
78 |         "verbosity_level": 2,
79 |         "grammar": None,
80 |         "planning_interval": 2,
81 |         "name": "test_agent",
82 |         "description": "dummy description",
83 |         "requirements": ["smolagents"],
84 |         "authorized_imports": ["pandas"],
85 |         "executor_type": "local",
86 |         "executor_kwargs": {},
87 |         "max_print_outputs_length": None,
88 |     },
89 | }
90 | 
91 | 
92 | @pytest.fixture
93 | def get_agent_dict():
94 |     def _get_agent_dict(agent_dict_key):
95 |         return AGENT_DICTS[agent_dict_key]
96 | 
97 |     return _get_agent_dict
98 | 


--------------------------------------------------------------------------------
/docs/source/hi/index.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2024 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | 
12 | ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
13 | rendered properly in your Markdown viewer.
14 | -->
15 | 
16 | # `smolagents`
17 | 
18 | <div class="flex justify-center">
19 |     <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolagents/license_to_call.png" width=100%/>
20 | </div>
21 | 
22 | यह लाइब्रेरी पावरफुल एजेंट्स बनाने के लिए सबसे सरल फ्रेमवर्क है! वैसे, "एजेंट्स" हैं क्या? हम अपनी परिभाषा [इस पेज पर](conceptual_guides/intro_agents) प्रदान करते हैं, जहाँ आपको यह भी पता चलेगा कि इन्हें कब उपयोग करें या न करें (स्पॉइलर: आप अक्सर एजेंट्स के बिना बेहतर काम कर सकते हैं)।
23 | 
24 | यह लाइब्रेरी प्रदान करती है:
25 | 
26 | ✨ **सरलता**: Agents का लॉजिक लगभग एक हजार लाइन्स ऑफ़ कोड में समाहित है। हमने रॉ कोड के ऊपर एब्स्ट्रैक्शन को न्यूनतम आकार में रखा है!
27 | 
28 | 🌐 **सभी LLM के लिए सपोर्ट**: यह हब पर होस्ट किए गए मॉडल्स को उनके `transformers` वर्जन में या हमारे इन्फरेंस API के माध्यम से सपोर्ट करता है, साथ ही OpenAI, Anthropic से भी... किसी भी LLM से एजेंट को पावर करना वास्तव में आसान है।
29 | 
30 | 🧑‍💻 **कोड Agents के लिए फर्स्ट-क्लास सपोर्ट**, यानी ऐसे एजेंट्स जो अपनी एक्शन्स को कोड में लिखते हैं (कोड लिखने के लिए उपयोग किए जाने वाले एजेंट्स के विपरीत), [यहाँ और पढ़ें](tutorials/secure_code_execution)।
31 | 
32 | 🤗 **हब इंटीग्रेशन**: आप टूल्स को हब पर शेयर और लोड कर सकते हैं, और आगे और भी बहुत कुछ आने वाला है!
33 | !
34 | 
35 | <div class="mt-10">
36 |   <div class="w-full flex flex-col space-y-4 md:space-y-0 md:grid md:grid-cols-2 md:gap-y-4 md:gap-x-5">
37 |     <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./guided_tour"
38 |       ><div class="w-full text-center bg-gradient-to-br from-blue-400 to-blue-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">गाइडेड टूर</div>
39 |       <p class="text-gray-700">बेसिक्स सीखें और एजेंट्स का उपयोग करने में परिचित हों। यदि आप पहली बार एजेंट्स का उपयोग कर रहे हैं तो यहाँ से शुरू करें!</p>
40 |     </a>
41 |     <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./examples/text_to_sql"
42 |       ><div class="w-full text-center bg-gradient-to-br from-indigo-400 to-indigo-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">हाउ-टू गाइड्स</div>
43 |       <p class="text-gray-700">एक विशिष्ट लक्ष्य प्राप्त करने में मदद के लिए गाइड: SQL क्वेरी जनरेट और टेस्ट करने के लिए एजेंट बनाएं!</p>
44 |     </a>
45 |     <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./conceptual_guides/intro_agents"
46 |       ><div class="w-full text-center bg-gradient-to-br from-pink-400 to-pink-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">कॉन्सेप्चुअल गाइड्स</div>
47 |       <p class="text-gray-700">महत्वपूर्ण विषयों की बेहतर समझ बनाने के लिए उच्च-स्तरीय व्याख्याएं।</p>
48 |    </a>
49 |     <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./tutorials/building_good_agents"
50 |       ><div class="w-full text-center bg-gradient-to-br from-purple-400 to-purple-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">ट्यूटोरियल्स</div>
51 |       <p class="text-gray-700">एजेंट्स बनाने के महत्वपूर्ण पहलुओं को कवर करने वाले क्ट्यूटोरियल्स।</p>
52 |     </a>
53 |   </div>
54 | </div>


--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
  1 | name: Python tests
  2 | 
  3 | on:
  4 |   pull_request:
  5 |   push:
  6 |     branches:
  7 |       - ci-*
  8 | 
  9 | env:
 10 |   UV_SYSTEM_PYTHON: 1
 11 | 
 12 | jobs:
 13 |   build-ubuntu:
 14 |     runs-on: ubuntu-latest
 15 |     env:
 16 |       UV_HTTP_TIMEOUT: 600 # max 10min to install deps
 17 | 
 18 |     strategy:
 19 |       fail-fast: false
 20 |       matrix:
 21 |         python-version: ["3.10", "3.12"]
 22 | 
 23 |     steps:
 24 |       - uses: actions/checkout@v2
 25 |       - name: Set up Python ${{ matrix.python-version }}
 26 |         uses: actions/setup-python@v2
 27 |         with:
 28 |           python-version: ${{ matrix.python-version }}
 29 | 
 30 |       # Setup venv
 31 |       - name: Setup uv
 32 |         run: |
 33 |           pip install --upgrade uv
 34 | 
 35 |       # Install dependencies
 36 |       - name: Install dependencies
 37 |         run: |
 38 |           uv pip install "smolagents[test] @ ."
 39 | 
 40 |       # Run all tests separately for individual feedback
 41 |       # Use 'if success() || failure()' so that all tests are run even if one failed
 42 |       # See https://stackoverflow.com/a/62112985
 43 |       - name: Import tests
 44 |         run: |
 45 |           pytest ./tests/test_import.py
 46 |         if: ${{ success() || failure() }}
 47 | 
 48 |       - name: Agent tests
 49 |         run: |
 50 |           pytest ./tests/test_agents.py
 51 |         if: ${{ success() || failure() }}
 52 | 
 53 |       - name: Default tools tests
 54 |         run: |
 55 |           pytest ./tests/test_default_tools.py
 56 |         if: ${{ success() || failure() }}
 57 | 
 58 |       # - name: Docs tests # Disabled for now (slow test + requires API keys)
 59 |       #   run: |
 60 |       #     pytest ./tests/test_all_docs.py
 61 | 
 62 |       - name: CLI tests
 63 |         run: |
 64 |           pytest ./tests/test_cli.py
 65 |         if: ${{ success() || failure() }}
 66 | 
 67 |       - name: Final answer tests
 68 |         run: |
 69 |           pytest ./tests/test_final_answer.py
 70 |         if: ${{ success() || failure() }}
 71 | 
 72 |       - name: Models tests
 73 |         run: |
 74 |           pytest ./tests/test_models.py
 75 |         if: ${{ success() || failure() }}
 76 | 
 77 |       - name: Memory tests
 78 |         run: |
 79 |           pytest ./tests/test_memory.py
 80 |         if: ${{ success() || failure() }}
 81 | 
 82 |       - name: Monitoring tests
 83 |         run: |
 84 |           pytest ./tests/test_monitoring.py
 85 |         if: ${{ success() || failure() }}
 86 | 
 87 |       - name: Local Python executor tests
 88 |         run: |
 89 |           pytest ./tests/test_local_python_executor.py
 90 |         if: ${{ success() || failure() }}
 91 | 
 92 |       - name: Remote executor tests
 93 |         run: |
 94 |           pytest ./tests/test_remote_executors.py
 95 |         if: ${{ success() || failure() }}
 96 | 
 97 |       - name: Search tests
 98 |         run: |
 99 |           pytest ./tests/test_search.py
100 |         if: ${{ success() || failure() }}
101 | 
102 |       - name: Tools tests
103 |         run: |
104 |           pytest ./tests/test_tools.py
105 |         if: ${{ success() || failure() }}
106 | 
107 |       - name: Tool validation tests
108 |         run: |
109 |           pytest ./tests/test_tool_validation.py
110 |         if: ${{ success() || failure() }}
111 | 
112 |       - name: Types tests
113 |         run: |
114 |           pytest ./tests/test_types.py
115 |         if: ${{ success() || failure() }}
116 | 
117 |       - name: Utils tests
118 |         run: |
119 |           pytest ./tests/test_utils.py
120 |         if: ${{ success() || failure() }}
121 | 
122 |       - name: Gradio UI tests
123 |         run: |
124 |           pytest ./tests/test_gradio_ui.py
125 |         if: ${{ success() || failure() }}
126 | 
127 |       - name: Function type hints utils tests
128 |         run: |
129 |           pytest ./tests/test_function_type_hints_utils.py
130 |         if: ${{ success() || failure() }}
131 | 


--------------------------------------------------------------------------------
/docs/source/en/index.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2024 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | 
12 | ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
13 | rendered properly in your Markdown viewer.
14 | -->
15 | 
16 | # `smolagents`
17 | 
18 | <div class="flex justify-center">
19 |     <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolagents/license_to_call.png" width=100%/>
20 | </div>
21 | 
22 | This library is the simplest framework out there to build powerful agents! By the way, wtf are "agents"? We provide our definition [in this page](conceptual_guides/intro_agents), where you'll also find tips for when to use them or not (spoilers: you'll often be better off without agents).
23 | 
24 | This library offers:
25 | 
26 | ✨ **Simplicity**: the logic for agents fits in ~thousand lines of code. We kept abstractions to their minimal shape above raw code!
27 | 
28 | 🌐 **Support for any LLM**: it supports models hosted on the Hub loaded in their `transformers` version or through our inference API and Inference providers, but also models from OpenAI, Anthropic... it's really easy to power an agent with any LLM.
29 | 
30 | 🧑‍💻 **First-class support for Code Agents**, i.e. agents that write their actions in code (as opposed to "agents being used to write code"), [read more here](tutorials/secure_code_execution).
31 | 
32 | 🤗 **Hub integrations**: you can share and load Gradio Spaces as tools to/from the Hub, and more is to come!
33 | 
34 | <div class="mt-10">
35 |   <div class="w-full flex flex-col space-y-4 md:space-y-0 md:grid md:grid-cols-2 md:gap-y-4 md:gap-x-5">
36 |     <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./guided_tour"
37 |       ><div class="w-full text-center bg-gradient-to-br from-blue-400 to-blue-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">Guided tour</div>
38 |       <p class="text-gray-700">Learn the basics and become familiar with using Agents. Start here if you are using Agents for the first time!</p>
39 |     </a>
40 |     <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./examples/text_to_sql"
41 |       ><div class="w-full text-center bg-gradient-to-br from-indigo-400 to-indigo-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">How-to guides</div>
42 |       <p class="text-gray-700">Practical guides to help you achieve a specific goal: create an agent to generate and test SQL queries!</p>
43 |     </a>
44 |     <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./conceptual_guides/intro_agents"
45 |       ><div class="w-full text-center bg-gradient-to-br from-pink-400 to-pink-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">Conceptual guides</div>
46 |       <p class="text-gray-700">High-level explanations for building a better understanding of important topics.</p>
47 |    </a>
48 |     <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./tutorials/building_good_agents"
49 |       ><div class="w-full text-center bg-gradient-to-br from-purple-400 to-purple-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">Tutorials</div>
50 |       <p class="text-gray-700">Horizontal tutorials that cover important aspects of building agents.</p>
51 |     </a>
52 |   </div>
53 | </div>
54 | 


--------------------------------------------------------------------------------
/examples/open_deep_research/scripts/run_agents.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import shutil
 4 | import textwrap
 5 | from pathlib import Path
 6 | 
 7 | # import tqdm.asyncio
 8 | from smolagents.utils import AgentError
 9 | 
10 | 
11 | def serialize_agent_error(obj):
12 |     if isinstance(obj, AgentError):
13 |         return {"error_type": obj.__class__.__name__, "message": obj.message}
14 |     else:
15 |         return str(obj)
16 | 
17 | 
18 | def get_image_description(file_name: str, question: str, visual_inspection_tool) -> str:
19 |     prompt = f"""Write a caption of 5 sentences for this image. Pay special attention to any details that might be useful for someone answering the following question:
20 | {question}. But do not try to answer the question directly!
21 | Do not add any information that is not present in the image."""
22 |     return visual_inspection_tool(image_path=file_name, question=prompt)
23 | 
24 | 
25 | def get_document_description(file_path: str, question: str, document_inspection_tool) -> str:
26 |     prompt = f"""Write a caption of 5 sentences for this document. Pay special attention to any details that might be useful for someone answering the following question:
27 | {question}. But do not try to answer the question directly!
28 | Do not add any information that is not present in the document."""
29 |     return document_inspection_tool.forward_initial_exam_mode(file_path=file_path, question=prompt)
30 | 
31 | 
32 | def get_single_file_description(file_path: str, question: str, visual_inspection_tool, document_inspection_tool):
33 |     file_extension = file_path.split(".")[-1]
34 |     if file_extension in ["png", "jpg", "jpeg"]:
35 |         file_description = f" - Attached image: {file_path}"
36 |         file_description += (
37 |             f"\n     -> Image description: {get_image_description(file_path, question, visual_inspection_tool)}"
38 |         )
39 |         return file_description
40 |     elif file_extension in ["pdf", "xls", "xlsx", "docx", "doc", "xml"]:
41 |         file_description = f" - Attached document: {file_path}"
42 |         image_path = file_path.split(".")[0] + ".png"
43 |         if os.path.exists(image_path):
44 |             description = get_image_description(image_path, question, visual_inspection_tool)
45 |         else:
46 |             description = get_document_description(file_path, question, document_inspection_tool)
47 |         file_description += f"\n     -> File description: {description}"
48 |         return file_description
49 |     elif file_extension in ["mp3", "m4a", "wav"]:
50 |         return f" - Attached audio: {file_path}"
51 |     else:
52 |         return f" - Attached file: {file_path}"
53 | 
54 | 
55 | def get_zip_description(file_path: str, question: str, visual_inspection_tool, document_inspection_tool):
56 |     folder_path = file_path.replace(".zip", "")
57 |     os.makedirs(folder_path, exist_ok=True)
58 |     shutil.unpack_archive(file_path, folder_path)
59 | 
60 |     prompt_use_files = ""
61 |     for root, dirs, files in os.walk(folder_path):
62 |         for file in files:
63 |             file_path = os.path.join(root, file)
64 |             prompt_use_files += "\n" + textwrap.indent(
65 |                 get_single_file_description(file_path, question, visual_inspection_tool, document_inspection_tool),
66 |                 prefix="    ",
67 |             )
68 |     return prompt_use_files
69 | 
70 | 
71 | def get_tasks_to_run(data, total: int, base_filename: Path, tasks_ids: list[int]):
72 |     f = base_filename.parent / f"{base_filename.stem}_answers.jsonl"
73 |     done = set()
74 |     if f.exists():
75 |         with open(f, encoding="utf-8") as fh:
76 |             done = {json.loads(line)["task_id"] for line in fh if line.strip()}
77 | 
78 |     tasks = []
79 |     for i in range(total):
80 |         task_id = int(data[i]["task_id"])
81 |         if task_id not in done:
82 |             if tasks_ids is not None:
83 |                 if task_id in tasks_ids:
84 |                     tasks.append(data[i])
85 |             else:
86 |                 tasks.append(data[i])
87 |     return tasks
88 | 


--------------------------------------------------------------------------------
/tests/test_types.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2024 HuggingFace Inc.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | import os
 16 | import tempfile
 17 | import unittest
 18 | import uuid
 19 | 
 20 | import PIL.Image
 21 | from transformers.testing_utils import (
 22 |     require_soundfile,
 23 | )
 24 | 
 25 | from smolagents.agent_types import AgentAudio, AgentImage, AgentText
 26 | 
 27 | from .utils.markers import require_torch
 28 | 
 29 | 
 30 | def get_new_path(suffix="") -> str:
 31 |     directory = tempfile.mkdtemp()
 32 |     return os.path.join(directory, str(uuid.uuid4()) + suffix)
 33 | 
 34 | 
 35 | @require_soundfile
 36 | @require_torch
 37 | class AgentAudioTests(unittest.TestCase):
 38 |     def test_from_tensor(self):
 39 |         import soundfile as sf
 40 |         import torch
 41 | 
 42 |         tensor = torch.rand(12, dtype=torch.float64) - 0.5
 43 |         agent_type = AgentAudio(tensor)
 44 |         path = str(agent_type.to_string())
 45 | 
 46 |         # Ensure that the tensor and the agent_type's tensor are the same
 47 |         self.assertTrue(torch.allclose(tensor, agent_type.to_raw(), atol=1e-4))
 48 | 
 49 |         del agent_type
 50 | 
 51 |         # Ensure the path remains even after the object deletion
 52 |         self.assertTrue(os.path.exists(path))
 53 | 
 54 |         # Ensure that the file contains the same value as the original tensor
 55 |         new_tensor, _ = sf.read(path)
 56 |         self.assertTrue(torch.allclose(tensor, torch.tensor(new_tensor), atol=1e-4))
 57 | 
 58 |     def test_from_string(self):
 59 |         import soundfile as sf
 60 |         import torch
 61 | 
 62 |         tensor = torch.rand(12, dtype=torch.float64) - 0.5
 63 |         path = get_new_path(suffix=".wav")
 64 |         sf.write(path, tensor, 16000)
 65 | 
 66 |         agent_type = AgentAudio(path)
 67 | 
 68 |         self.assertTrue(torch.allclose(tensor, agent_type.to_raw(), atol=1e-4))
 69 |         self.assertEqual(agent_type.to_string(), path)
 70 | 
 71 | 
 72 | @require_torch
 73 | class TestAgentImage:
 74 |     def test_from_tensor(self):
 75 |         import torch
 76 | 
 77 |         tensor = torch.randint(0, 256, (64, 64, 3))
 78 |         agent_type = AgentImage(tensor)
 79 |         path = str(agent_type.to_string())
 80 | 
 81 |         # Ensure that the tensor and the agent_type's tensor are the same
 82 |         assert torch.allclose(tensor, agent_type._tensor, atol=1e-4)
 83 | 
 84 |         assert isinstance(agent_type.to_raw(), PIL.Image.Image)
 85 | 
 86 |         # Ensure the path remains even after the object deletion
 87 |         del agent_type
 88 |         assert os.path.exists(path)
 89 | 
 90 |     def test_from_string(self, shared_datadir):
 91 |         path = shared_datadir / "000000039769.png"
 92 |         image = PIL.Image.open(path)
 93 |         agent_type = AgentImage(path)
 94 | 
 95 |         assert path.samefile(agent_type.to_string())
 96 |         assert image == agent_type.to_raw()
 97 | 
 98 |         # Ensure the path remains even after the object deletion
 99 |         del agent_type
100 |         assert os.path.exists(path)
101 | 
102 |     def test_from_image(self, shared_datadir):
103 |         path = shared_datadir / "000000039769.png"
104 |         image = PIL.Image.open(path)
105 |         agent_type = AgentImage(image)
106 | 
107 |         assert not path.samefile(agent_type.to_string())
108 |         assert image == agent_type.to_raw()
109 | 
110 |         # Ensure the path remains even after the object deletion
111 |         del agent_type
112 |         assert os.path.exists(path)
113 | 
114 | 
115 | class AgentTextTests(unittest.TestCase):
116 |     def test_from_string(self):
117 |         string = "Hey!"
118 |         agent_type = AgentText(string)
119 | 
120 |         self.assertEqual(string, agent_type.to_string())
121 |         self.assertEqual(string, agent_type.to_raw())
122 | 


--------------------------------------------------------------------------------
/examples/open_deep_research/scripts/gaia_scorer.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import string
  3 | import warnings
  4 | 
  5 | 
  6 | def normalize_number_str(number_str: str) -> float:
  7 |     # we replace these common units and commas to allow
  8 |     # conversion to float
  9 |     for char in ["$", "%", ","]:
 10 |         number_str = number_str.replace(char, "")
 11 |     try:
 12 |         return float(number_str)
 13 |     except ValueError:
 14 |         print(f"String {number_str} cannot be normalized to number str.")
 15 |         return float("inf")
 16 | 
 17 | 
 18 | def split_string(
 19 |     s: str,
 20 |     char_list: list[str] = [",", ";"],
 21 | ) -> list[str]:
 22 |     pattern = f"[{''.join(char_list)}]"
 23 |     return re.split(pattern, s)
 24 | 
 25 | 
 26 | def is_float(element: any) -> bool:
 27 |     try:
 28 |         float(element)
 29 |         return True
 30 |     except ValueError:
 31 |         return False
 32 | 
 33 | 
 34 | def question_scorer(
 35 |     model_answer: str,
 36 |     ground_truth: str,
 37 | ) -> bool:
 38 |     # if gt is a number
 39 |     if is_float(ground_truth):
 40 |         normalized_answer = normalize_number_str(str(model_answer))
 41 |         return normalized_answer == float(ground_truth)
 42 | 
 43 |     # if gt is a list
 44 |     elif any(char in ground_truth for char in [",", ";"]):
 45 |         # question with the fish: normalization removes punct
 46 | 
 47 |         gt_elems = split_string(ground_truth)
 48 |         ma_elems = split_string(model_answer)
 49 | 
 50 |         # check length is the same
 51 |         if len(gt_elems) != len(ma_elems):
 52 |             warnings.warn("Answer lists have different lengths, returning False.", UserWarning)
 53 |             return False
 54 | 
 55 |         # compare each element as float or str
 56 |         comparisons = []
 57 |         for ma_elem, gt_elem in zip(ma_elems, gt_elems):
 58 |             if is_float(gt_elem):
 59 |                 normalized_ma_elem = normalize_number_str(ma_elem)
 60 |                 comparisons.append(normalized_ma_elem == float(gt_elem))
 61 |             else:
 62 |                 # we do not remove punct since comparisons can include punct
 63 |                 comparisons.append(
 64 |                     normalize_str(ma_elem, remove_punct=False) == normalize_str(gt_elem, remove_punct=False)
 65 |                 )
 66 |         return all(comparisons)
 67 | 
 68 |     # if gt is a str
 69 |     else:
 70 |         return normalize_str(model_answer) == normalize_str(ground_truth)
 71 | 
 72 | 
 73 | def check_prediction_contains_answer_letters_in_order(prediction, true_answer):
 74 |     prediction = prediction.lower()
 75 |     true_answer = true_answer.lower()
 76 |     if len(prediction) > len(true_answer) * 3:
 77 |         return False
 78 |     i = 0
 79 |     for letter in true_answer:
 80 |         if letter in prediction[i:]:
 81 |             i += prediction[i:].index(letter)
 82 |         else:
 83 |             return False
 84 |     return True
 85 | 
 86 | 
 87 | def check_close_call(prediction, true_answer, is_correct):
 88 |     if is_correct:
 89 |         return True
 90 |     else:
 91 |         if is_float(true_answer):
 92 |             return is_correct
 93 |         else:
 94 |             if (
 95 |                 check_prediction_contains_answer_letters_in_order(str(prediction), str(true_answer))
 96 |                 and len(str(true_answer)) * 0.5 <= len(str(prediction)) <= len(str(true_answer)) * 2
 97 |             ):
 98 |                 print(f"Close call: {prediction} vs {true_answer}")
 99 |                 return True
100 |             else:
101 |                 return False
102 | 
103 | 
104 | def normalize_str(input_str, remove_punct=True) -> str:
105 |     """
106 |     Normalize a string by:
107 |     - Removing all white spaces
108 |     - Optionally removing punctuation (if remove_punct is True)
109 |     - Converting to lowercase
110 |     Parameters:
111 |     - input_str: str, the string to normalize
112 |     - remove_punct: bool, whether to remove punctuation (default: True)
113 |     Returns:
114 |     - str, the normalized string
115 |     """
116 |     # Remove all white spaces. Required e.g for seagull vs. sea gull
117 |     no_spaces = re.sub(r"\s", "", input_str)
118 | 
119 |     # Remove punctuation, if specified.
120 |     if remove_punct:
121 |         translator = str.maketrans("", "", string.punctuation)
122 |         return no_spaces.lower().translate(translator)
123 |     else:
124 |         return no_spaces.lower()
125 | 


--------------------------------------------------------------------------------
/tests/test_remote_executors.py:
--------------------------------------------------------------------------------
  1 | from textwrap import dedent
  2 | from unittest.mock import MagicMock, patch
  3 | 
  4 | import docker
  5 | import PIL.Image
  6 | import pytest
  7 | 
  8 | from smolagents.monitoring import AgentLogger, LogLevel
  9 | from smolagents.remote_executors import DockerExecutor, E2BExecutor
 10 | from smolagents.utils import AgentError
 11 | 
 12 | from .utils.markers import require_run_all
 13 | 
 14 | 
 15 | class TestE2BExecutorMock:
 16 |     def test_e2b_executor_instantiation(self):
 17 |         logger = MagicMock()
 18 |         with patch("e2b_code_interpreter.Sandbox") as mock_sandbox:
 19 |             mock_sandbox.return_value.commands.run.return_value.error = None
 20 |             mock_sandbox.return_value.run_code.return_value.error = None
 21 |             executor = E2BExecutor(
 22 |                 additional_imports=[], logger=logger, api_key="dummy-api-key", template="dummy-template-id", timeout=60
 23 |             )
 24 |         assert isinstance(executor, E2BExecutor)
 25 |         assert executor.logger == logger
 26 |         assert executor.final_answer_pattern.pattern == r"^final_answer\((.*)\)$"
 27 |         assert executor.sandbox == mock_sandbox.return_value
 28 |         assert mock_sandbox.call_count == 1
 29 |         assert mock_sandbox.call_args.kwargs == {
 30 |             "api_key": "dummy-api-key",
 31 |             "template": "dummy-template-id",
 32 |             "timeout": 60,
 33 |         }
 34 | 
 35 | 
 36 | @pytest.fixture
 37 | def docker_executor():
 38 |     executor = DockerExecutor(additional_imports=["pillow", "numpy"], logger=AgentLogger(level=LogLevel.INFO))
 39 |     yield executor
 40 |     executor.delete()
 41 | 
 42 | 
 43 | @require_run_all
 44 | class TestDockerExecutor:
 45 |     @pytest.fixture(autouse=True)
 46 |     def set_executor(self, docker_executor):
 47 |         self.executor = docker_executor
 48 | 
 49 |     def test_initialization(self):
 50 |         """Check if DockerExecutor initializes without errors"""
 51 |         assert self.executor.container is not None, "Container should be initialized"
 52 | 
 53 |     def test_state_persistence(self):
 54 |         """Test that variables and imports form one snippet persist in the next"""
 55 |         code_action = "import numpy as np; a = 2"
 56 |         self.executor(code_action)
 57 | 
 58 |         code_action = "print(np.sqrt(a))"
 59 |         result, logs, final_answer = self.executor(code_action)
 60 |         assert "1.41421" in logs
 61 | 
 62 |     def test_execute_output(self):
 63 |         """Test execution that returns a string"""
 64 |         code_action = 'final_answer("This is the final answer")'
 65 |         result, logs, final_answer = self.executor(code_action)
 66 |         assert result == "This is the final answer", "Result should be 'This is the final answer'"
 67 | 
 68 |     def test_execute_multiline_output(self):
 69 |         """Test execution that returns a string"""
 70 |         code_action = 'result = "This is the final answer"\nfinal_answer(result)'
 71 |         result, logs, final_answer = self.executor(code_action)
 72 |         assert result == "This is the final answer", "Result should be 'This is the final answer'"
 73 | 
 74 |     def test_execute_image_output(self):
 75 |         """Test execution that returns a base64 image"""
 76 |         code_action = dedent("""
 77 |             import base64
 78 |             from PIL import Image
 79 |             from io import BytesIO
 80 |             image = Image.new("RGB", (10, 10), (255, 0, 0))
 81 |             final_answer(image)
 82 |         """)
 83 |         result, logs, final_answer = self.executor(code_action)
 84 |         assert isinstance(result, PIL.Image.Image), "Result should be a PIL Image"
 85 | 
 86 |     def test_syntax_error_handling(self):
 87 |         """Test handling of syntax errors"""
 88 |         code_action = 'print("Missing Parenthesis'  # Syntax error
 89 |         with pytest.raises(AgentError) as exception_info:
 90 |             self.executor(code_action)
 91 |         assert "SyntaxError" in str(exception_info.value), "Should raise a syntax error"
 92 | 
 93 |     def test_cleanup_on_deletion(self):
 94 |         """Test if Docker container stops and removes on deletion"""
 95 |         container_id = self.executor.container.id
 96 |         self.executor.delete()  # Trigger cleanup
 97 | 
 98 |         client = docker.from_env()
 99 |         containers = [c.id for c in client.containers.list(all=True)]
100 |         assert container_id not in containers, "Container should be removed"
101 | 


--------------------------------------------------------------------------------
/examples/rag_using_chromadb.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import datasets
  4 | from langchain.docstore.document import Document
  5 | from langchain.text_splitter import RecursiveCharacterTextSplitter
  6 | from langchain_chroma import Chroma
  7 | 
  8 | # from langchain_community.document_loaders import PyPDFLoader
  9 | from langchain_huggingface import HuggingFaceEmbeddings
 10 | from tqdm import tqdm
 11 | from transformers import AutoTokenizer
 12 | 
 13 | # from langchain_openai import OpenAIEmbeddings
 14 | from smolagents import LiteLLMModel, Tool
 15 | from smolagents.agents import CodeAgent
 16 | 
 17 | 
 18 | # from smolagents.agents import ToolCallingAgent
 19 | 
 20 | 
 21 | knowledge_base = datasets.load_dataset("m-ric/huggingface_doc", split="train")
 22 | 
 23 | source_docs = [
 24 |     Document(page_content=doc["text"], metadata={"source": doc["source"].split("/")[1]}) for doc in knowledge_base
 25 | ]
 26 | 
 27 | ## For your own PDFs, you can use the following code to load them into source_docs
 28 | # pdf_directory = "pdfs"
 29 | # pdf_files = [
 30 | #     os.path.join(pdf_directory, f)
 31 | #     for f in os.listdir(pdf_directory)
 32 | #     if f.endswith(".pdf")
 33 | # ]
 34 | # source_docs = []
 35 | 
 36 | # for file_path in pdf_files:
 37 | #     loader = PyPDFLoader(file_path)
 38 | #     docs.extend(loader.load())
 39 | 
 40 | text_splitter = RecursiveCharacterTextSplitter.from_huggingface_tokenizer(
 41 |     AutoTokenizer.from_pretrained("thenlper/gte-small"),
 42 |     chunk_size=200,
 43 |     chunk_overlap=20,
 44 |     add_start_index=True,
 45 |     strip_whitespace=True,
 46 |     separators=["\n\n", "\n", ".", " ", ""],
 47 | )
 48 | 
 49 | # Split docs and keep only unique ones
 50 | print("Splitting documents...")
 51 | docs_processed = []
 52 | unique_texts = {}
 53 | for doc in tqdm(source_docs):
 54 |     new_docs = text_splitter.split_documents([doc])
 55 |     for new_doc in new_docs:
 56 |         if new_doc.page_content not in unique_texts:
 57 |             unique_texts[new_doc.page_content] = True
 58 |             docs_processed.append(new_doc)
 59 | 
 60 | 
 61 | print("Embedding documents... This should take a few minutes (5 minutes on MacBook with M1 Pro)")
 62 | # Initialize embeddings and ChromaDB vector store
 63 | embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
 64 | 
 65 | 
 66 | # embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
 67 | 
 68 | vector_store = Chroma.from_documents(docs_processed, embeddings, persist_directory="./chroma_db")
 69 | 
 70 | 
 71 | class RetrieverTool(Tool):
 72 |     name = "retriever"
 73 |     description = (
 74 |         "Uses semantic search to retrieve the parts of documentation that could be most relevant to answer your query."
 75 |     )
 76 |     inputs = {
 77 |         "query": {
 78 |             "type": "string",
 79 |             "description": "The query to perform. This should be semantically close to your target documents. Use the affirmative form rather than a question.",
 80 |         }
 81 |     }
 82 |     output_type = "string"
 83 | 
 84 |     def __init__(self, vector_store, **kwargs):
 85 |         super().__init__(**kwargs)
 86 |         self.vector_store = vector_store
 87 | 
 88 |     def forward(self, query: str) -> str:
 89 |         assert isinstance(query, str), "Your search query must be a string"
 90 |         docs = self.vector_store.similarity_search(query, k=3)
 91 |         return "\nRetrieved documents:\n" + "".join(
 92 |             [f"\n\n===== Document {str(i)} =====\n" + doc.page_content for i, doc in enumerate(docs)]
 93 |         )
 94 | 
 95 | 
 96 | retriever_tool = RetrieverTool(vector_store)
 97 | 
 98 | # Choose which LLM engine to use!
 99 | 
100 | # from smolagents import HfApiModel
101 | # model = HfApiModel(model_id="meta-llama/Llama-3.3-70B-Instruct")
102 | 
103 | # from smolagents import TransformersModel
104 | # model = TransformersModel(model_id="meta-llama/Llama-3.2-2B-Instruct")
105 | 
106 | # For anthropic: change model_id below to 'anthropic/claude-3-5-sonnet-20240620' and also change 'os.environ.get("ANTHROPIC_API_KEY")'
107 | model = LiteLLMModel(
108 |     model_id="groq/llama-3.3-70b-versatile",
109 |     api_key=os.environ.get("GROQ_API_KEY"),
110 | )
111 | 
112 | # # You can also use the ToolCallingAgent class
113 | # agent = ToolCallingAgent(
114 | #     tools=[retriever_tool],
115 | #     model=model,
116 | #     verbose=True,
117 | # )
118 | 
119 | agent = CodeAgent(
120 |     tools=[retriever_tool],
121 |     model=model,
122 |     max_steps=4,
123 |     verbosity_level=2,
124 | )
125 | 
126 | agent_output = agent.run("How can I push a model to the Hub?")
127 | 
128 | 
129 | print("Final output:")
130 | print(agent_output)
131 | 


--------------------------------------------------------------------------------
/examples/open_deep_research/scripts/reformulator.py:
--------------------------------------------------------------------------------
 1 | # Shamelessly stolen from Microsoft Autogen team: thanks to them for this great resource!
 2 | # https://github.com/microsoft/autogen/blob/gaia_multiagent_v01_march_1st/autogen/browser_utils.py
 3 | import copy
 4 | 
 5 | from smolagents.models import MessageRole, Model
 6 | 
 7 | 
 8 | def prepare_response(original_task: str, inner_messages, reformulation_model: Model) -> str:
 9 |     messages = [
10 |         {
11 |             "role": MessageRole.SYSTEM,
12 |             "content": [
13 |                 {
14 |                     "type": "text",
15 |                     "text": f"""Earlier you were asked the following:
16 | 
17 | {original_task}
18 | 
19 | Your team then worked diligently to address that request. Read below a transcript of that conversation:""",
20 |                 }
21 |             ],
22 |         }
23 |     ]
24 | 
25 |     # The first message just repeats the question, so remove it
26 |     # if len(inner_messages) > 1:
27 |     #    del inner_messages[0]
28 | 
29 |     # copy them to this context
30 |     try:
31 |         for message in inner_messages:
32 |             if not message.get("content"):
33 |                 continue
34 |             message = copy.deepcopy(message)
35 |             message["role"] = MessageRole.USER
36 |             messages.append(message)
37 |     except Exception:
38 |         messages += [{"role": MessageRole.ASSISTANT, "content": str(inner_messages)}]
39 | 
40 |     # ask for the final answer
41 |     messages.append(
42 |         {
43 |             "role": MessageRole.USER,
44 |             "content": [
45 |                 {
46 |                     "type": "text",
47 |                     "text": f"""
48 | Read the above conversation and output a FINAL ANSWER to the question. The question is repeated here for convenience:
49 | 
50 | {original_task}
51 | 
52 | To output the final answer, use the following template: FINAL ANSWER: [YOUR FINAL ANSWER]
53 | Your FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
54 | ADDITIONALLY, your FINAL ANSWER MUST adhere to any formatting instructions specified in the original question (e.g., alphabetization, sequencing, units, rounding, decimal places, etc.)
55 | If you are asked for a number, express it numerically (i.e., with digits rather than words), don't use commas, and DO NOT INCLUDE UNITS such as $ or USD or percent signs unless specified otherwise.
56 | If you are asked for a string, don't use articles or abbreviations (e.g. for cities), unless specified otherwise. Don't output any final sentence punctuation such as '.', '!', or '?'.
57 | If you are asked for a comma separated list, apply the above rules depending on whether the elements are numbers or strings.
58 | If you are unable to determine the final answer, output 'FINAL ANSWER: Unable to determine'
59 | """,
60 |                 }
61 |             ],
62 |         }
63 |     )
64 | 
65 |     response = reformulation_model(messages).content
66 | 
67 |     final_answer = response.split("FINAL ANSWER: ")[-1].strip()
68 |     print("> Reformulated answer: ", final_answer)
69 | 
70 |     #     if "unable to determine" in final_answer.lower():
71 |     #         messages.append({"role": MessageRole.ASSISTANT, "content": response })
72 |     #         messages.append({"role": MessageRole.USER, "content": [{"type": "text", "text": """
73 |     # I understand that a definitive answer could not be determined. Please make a well-informed EDUCATED GUESS based on the conversation.
74 | 
75 |     # To output the educated guess, use the following template: EDUCATED GUESS: [YOUR EDUCATED GUESS]
76 |     # Your EDUCATED GUESS should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. DO NOT OUTPUT 'I don't know', 'Unable to determine', etc.
77 |     # ADDITIONALLY, your EDUCATED GUESS MUST adhere to any formatting instructions specified in the original question (e.g., alphabetization, sequencing, units, rounding, decimal places, etc.)
78 |     # If you are asked for a number, express it numerically (i.e., with digits rather than words), don't use commas, and don't include units such as $ or percent signs unless specified otherwise.
79 |     # If you are asked for a string, don't use articles or abbreviations (e.g. cit for cities), unless specified otherwise. Don't output any final sentence punctuation such as '.', '!', or '?'.
80 |     # If you are asked for a comma separated list, apply the above rules depending on whether the elements are numbers or strings.
81 |     # """.strip()}]})
82 | 
83 |     #         response = model(messages).content
84 |     #         print("\n>>>Making an educated guess.\n", response)
85 |     #         final_answer = response.split("EDUCATED GUESS: ")[-1].strip()
86 |     return final_answer
87 | 


--------------------------------------------------------------------------------
/tests/test_tool_validation.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | from smolagents.default_tools import DuckDuckGoSearchTool, GoogleSearchTool, SpeechToTextTool, VisitWebpageTool
  4 | from smolagents.tool_validation import validate_tool_attributes
  5 | from smolagents.tools import Tool, tool
  6 | 
  7 | 
  8 | UNDEFINED_VARIABLE = "undefined_variable"
  9 | 
 10 | 
 11 | @pytest.mark.parametrize("tool_class", [DuckDuckGoSearchTool, GoogleSearchTool, SpeechToTextTool, VisitWebpageTool])
 12 | def test_validate_tool_attributes_with_default_tools(tool_class):
 13 |     assert validate_tool_attributes(tool_class) is None, f"failed for {tool_class.name} tool"
 14 | 
 15 | 
 16 | class ValidTool(Tool):
 17 |     name = "valid_tool"
 18 |     description = "A valid tool"
 19 |     inputs = {"input": {"type": "string", "description": "input"}}
 20 |     output_type = "string"
 21 |     simple_attr = "string"
 22 |     dict_attr = {"key": "value"}
 23 | 
 24 |     def __init__(self, optional_param="default"):
 25 |         super().__init__()
 26 |         self.param = optional_param
 27 | 
 28 |     def forward(self, input: str) -> str:
 29 |         return input.upper()
 30 | 
 31 | 
 32 | @tool
 33 | def valid_tool_function(input: str) -> str:
 34 |     """A valid tool function.
 35 | 
 36 |     Args:
 37 |         input (str): Input string.
 38 |     """
 39 |     return input.upper()
 40 | 
 41 | 
 42 | @pytest.mark.parametrize("tool_class", [ValidTool, valid_tool_function.__class__])
 43 | def test_validate_tool_attributes_valid(tool_class):
 44 |     assert validate_tool_attributes(tool_class) is None
 45 | 
 46 | 
 47 | class InvalidToolName(Tool):
 48 |     name = "invalid tool name"
 49 |     description = "Tool with invalid name"
 50 |     inputs = {"input": {"type": "string", "description": "input"}}
 51 |     output_type = "string"
 52 | 
 53 |     def __init__(self):
 54 |         super().__init__()
 55 | 
 56 |     def forward(self, input: str) -> str:
 57 |         return input
 58 | 
 59 | 
 60 | class InvalidToolComplexAttrs(Tool):
 61 |     name = "invalid_tool"
 62 |     description = "Tool with complex class attributes"
 63 |     inputs = {"input": {"type": "string", "description": "input"}}
 64 |     output_type = "string"
 65 |     complex_attr = [x for x in range(3)]  # Complex class attribute
 66 | 
 67 |     def __init__(self):
 68 |         super().__init__()
 69 | 
 70 |     def forward(self, input: str) -> str:
 71 |         return input
 72 | 
 73 | 
 74 | class InvalidToolRequiredParams(Tool):
 75 |     name = "invalid_tool"
 76 |     description = "Tool with required params"
 77 |     inputs = {"input": {"type": "string", "description": "input"}}
 78 |     output_type = "string"
 79 | 
 80 |     def __init__(self, required_param, kwarg1=1):  # No default value
 81 |         super().__init__()
 82 |         self.param = required_param
 83 | 
 84 |     def forward(self, input: str) -> str:
 85 |         return input
 86 | 
 87 | 
 88 | class InvalidToolNonLiteralDefaultParam(Tool):
 89 |     name = "invalid_tool"
 90 |     description = "Tool with non-literal default parameter value"
 91 |     inputs = {"input": {"type": "string", "description": "input"}}
 92 |     output_type = "string"
 93 | 
 94 |     def __init__(self, default_param=UNDEFINED_VARIABLE):  # UNDEFINED_VARIABLE as default is non-literal
 95 |         super().__init__()
 96 |         self.default_param = default_param
 97 | 
 98 |     def forward(self, input: str) -> str:
 99 |         return input
100 | 
101 | 
102 | class InvalidToolUndefinedNames(Tool):
103 |     name = "invalid_tool"
104 |     description = "Tool with undefined names"
105 |     inputs = {"input": {"type": "string", "description": "input"}}
106 |     output_type = "string"
107 | 
108 |     def forward(self, input: str) -> str:
109 |         return UNDEFINED_VARIABLE  # Undefined name
110 | 
111 | 
112 | @pytest.mark.parametrize(
113 |     "tool_class, expected_error",
114 |     [
115 |         (
116 |             InvalidToolName,
117 |             "Class attribute 'name' must be a valid Python identifier and not a reserved keyword, found 'invalid tool name'",
118 |         ),
119 |         (InvalidToolComplexAttrs, "Complex attributes should be defined in __init__, not as class attributes"),
120 |         (InvalidToolRequiredParams, "Parameters in __init__ must have default values, found required parameters"),
121 |         (
122 |             InvalidToolNonLiteralDefaultParam,
123 |             "Parameters in __init__ must have literal default values, found non-literal defaults",
124 |         ),
125 |         (InvalidToolUndefinedNames, "Name 'UNDEFINED_VARIABLE' is undefined"),
126 |     ],
127 | )
128 | def test_validate_tool_attributes_exceptions(tool_class, expected_error):
129 |     with pytest.raises(ValueError, match=expected_error):
130 |         validate_tool_attributes(tool_class)
131 | 


--------------------------------------------------------------------------------
/docs/source/hi/tutorials/inspect_runs.mdx:
--------------------------------------------------------------------------------
  1 | <!--Copyright 2024 The HuggingFace Team. All rights reserved.
  2 | 
  3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
  4 | the License. You may obtain a copy of the License at
  5 | 
  6 | http://www.apache.org/licenses/LICENSE-2.0
  7 | 
  8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 10 | specific language governing permissions and limitations under the License.
 11 | 
 12 | ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
 13 | rendered properly in your Markdown viewer.
 14 | 
 15 | -->
 16 | # OpenTelemetry के साथ runs का निरीक्षण
 17 | 
 18 | [[open-in-colab]]
 19 | 
 20 | > [!TIP]
 21 | > यदि आप एजेंट्स बनाने में नए हैं, तो पहले [एजेंट्स का परिचय](../conceptual_guides/intro_agents) और [smolagents की गाइडेड टूर](../guided_tour) पढ़ना सुनिश्चित करें।
 22 | 
 23 | ### Agents runs को लॉग क्यों करें?
 24 | 
 25 | Agent runs को डीबग करना जटिल होता है।
 26 | 
 27 | यह सत्यापित करना कठिन है कि एक रन ठीक से चला या नहीं, क्योंकि एजेंट वर्कफ़्लो [डिज़ाइन के अनुसार अप्रत्याशित](../conceptual_guides/intro_agents) होते हैं (यदि वे प्रत्याशित होते, तो आप पुराने अच्छे कोड का ही उपयोग कर रहे होते)।
 28 | 
 29 | और रन का निरीक्षण करना भी कठिन है: मल्टी-स्टेप एजेंट्स जल्दी ही कंसोल को लॉग से भर देते हैं, और अधिकांश त्रुटियां केवल "LLM dumb" प्रकार की त्रुटियां होती हैं, जिनसे LLM अगले चरण में बेहतर कोड या टूल कॉल लिखकर स्वयं को सुधार लेता है।
 30 | 
 31 | इसलिए बाद के निरीक्षण और मॉनिटरिंग के लिए प्रोडक्शन में agent runs को रिकॉर्ड करने के लिए इंस्ट्रुमेंटेशन का उपयोग करना आवश्यक है!
 32 | 
 33 | हमने agent runs को इंस्ट्रुमेंट करने के लिए [OpenTelemetry](https://opentelemetry.io/) मानक को अपनाया है।
 34 | 
 35 | इसका मतलब है कि आप बस कुछ इंस्ट्रुमेंटेशन कोड चला सकते हैं, फिर अपने एजेंट्स को सामान्य रूप से चला सकते हैं, और सब कुछ आपके प्लेटफॉर्म में लॉग हो जाता है।
 36 | 
 37 | यह इस प्रकार होता है:
 38 | पहले आवश्यक पैकेज इंस्टॉल करें। यहां हम [Phoenix by Arize AI](https://github.com/Arize-ai/phoenix) इंस्टॉल करते हैं क्योंकि यह लॉग्स को एकत्र और निरीक्षण करने का एक अच्छा समाधान है, लेकिन इस संग्रह और निरीक्षण भाग के लिए आप अन्य OpenTelemetry-कम्पैटिबल प्लेटफॉर्म्स का उपयोग कर सकते हैं।
 39 | 
 40 | ```shell
 41 | pip install smolagents
 42 | pip install arize-phoenix opentelemetry-sdk opentelemetry-exporter-otlp openinference-instrumentation-smolagents
 43 | ```
 44 | 
 45 | फिर कलेक्टर को बैकग्राउंड में चलाएं।
 46 | 
 47 | ```shell
 48 | python -m phoenix.server.main serve
 49 | ```
 50 | 
 51 | अंत में, अपने एजेंट्स को ट्रेस करने और ट्रेस को नीचे परिभाषित एंडपॉइंट पर Phoenix को भेजने के लिए `SmolagentsInstrumentor` को सेट करें।
 52 | 
 53 | ```python
 54 | from opentelemetry import trace
 55 | from opentelemetry.sdk.trace import TracerProvider
 56 | from opentelemetry.sdk.trace.export import BatchSpanProcessor
 57 | 
 58 | from openinference.instrumentation.smolagents import SmolagentsInstrumentor
 59 | from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
 60 | from opentelemetry.sdk.trace.export import ConsoleSpanExporter, SimpleSpanProcessor
 61 | 
 62 | endpoint = "http://0.0.0.0:6006/v1/traces"
 63 | trace_provider = TracerProvider()
 64 | trace_provider.add_span_processor(SimpleSpanProcessor(OTLPSpanExporter(endpoint)))
 65 | 
 66 | SmolagentsInstrumentor().instrument(tracer_provider=trace_provider)
 67 | ```
 68 | तब आप अपने एजेंट चला सकते हैं!
 69 | 
 70 | ```py
 71 | from smolagents import (
 72 |     CodeAgent,
 73 |     ToolCallingAgent,
 74 |     DuckDuckGoSearchTool,
 75 |     VisitWebpageTool,
 76 |     HfApiModel,
 77 | )
 78 | 
 79 | model = HfApiModel()
 80 | 
 81 | managed_agent = ToolCallingAgent(
 82 |     tools=[DuckDuckGoSearchTool(), VisitWebpageTool()],
 83 |     model=model,
 84 |     name="managed_agent",
 85 |     description="This is an agent that can do web search.",
 86 | )
 87 | 
 88 | manager_agent = CodeAgent(
 89 |     tools=[],
 90 |     model=model,
 91 |     managed_agents=[managed_agent],
 92 | )
 93 | manager_agent.run(
 94 |     "If the US keeps its 2024 growth rate, how many years will it take for the GDP to double?"
 95 | )
 96 | ```
 97 | और फिर आप अपने रन का निरीक्षण करने के लिए `http://0.0.0.0:6006/projects/` पर जा सकते हैं!
 98 | 
 99 | <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolagents/inspect_run_phoenix.png">
100 | 
101 | आप देख सकते हैं कि CodeAgent ने अपने मैनेज्ड ToolCallingAgent को (वैसे, मैनेज्ड एजेंट एक CodeAgent भी हो सकता था) U.S. 2024 ग्रोथ रेट के लिए वेब सर्च चलाने के लिए कॉल किया। फिर मैनेज्ड एजेंट ने अपनी रिपोर्ट लौटाई और मैनेजर एजेंट ने अर्थव्यवस्था के दोगुना होने का समय गणना करने के लिए उस पर कार्य किया! अच्छा है, है ना?


--------------------------------------------------------------------------------
/examples/open_deep_research/run.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import threading
  4 | 
  5 | from dotenv import load_dotenv
  6 | from huggingface_hub import login
  7 | from scripts.text_inspector_tool import TextInspectorTool
  8 | from scripts.text_web_browser import (
  9 |     ArchiveSearchTool,
 10 |     FinderTool,
 11 |     FindNextTool,
 12 |     PageDownTool,
 13 |     PageUpTool,
 14 |     SimpleTextBrowser,
 15 |     VisitTool,
 16 | )
 17 | from scripts.visual_qa import visualizer
 18 | 
 19 | from smolagents import (
 20 |     CodeAgent,
 21 |     GoogleSearchTool,
 22 |     # HfApiModel,
 23 |     LiteLLMModel,
 24 |     ToolCallingAgent,
 25 | )
 26 | 
 27 | 
 28 | AUTHORIZED_IMPORTS = [
 29 |     "requests",
 30 |     "zipfile",
 31 |     "os",
 32 |     "pandas",
 33 |     "numpy",
 34 |     "sympy",
 35 |     "json",
 36 |     "bs4",
 37 |     "pubchempy",
 38 |     "xml",
 39 |     "yahoo_finance",
 40 |     "Bio",
 41 |     "sklearn",
 42 |     "scipy",
 43 |     "pydub",
 44 |     "io",
 45 |     "PIL",
 46 |     "chess",
 47 |     "PyPDF2",
 48 |     "pptx",
 49 |     "torch",
 50 |     "datetime",
 51 |     "fractions",
 52 |     "csv",
 53 | ]
 54 | load_dotenv(override=True)
 55 | login(os.getenv("HF_TOKEN"))
 56 | 
 57 | append_answer_lock = threading.Lock()
 58 | 
 59 | 
 60 | def parse_args():
 61 |     parser = argparse.ArgumentParser()
 62 |     parser.add_argument(
 63 |         "question", type=str, help="for example: 'How many studio albums did Mercedes Sosa release before 2007?'"
 64 |     )
 65 |     parser.add_argument("--model-id", type=str, default="o1")
 66 |     return parser.parse_args()
 67 | 
 68 | 
 69 | custom_role_conversions = {"tool-call": "assistant", "tool-response": "user"}
 70 | 
 71 | user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0"
 72 | 
 73 | BROWSER_CONFIG = {
 74 |     "viewport_size": 1024 * 5,
 75 |     "downloads_folder": "downloads_folder",
 76 |     "request_kwargs": {
 77 |         "headers": {"User-Agent": user_agent},
 78 |         "timeout": 300,
 79 |     },
 80 |     "serpapi_key": os.getenv("SERPAPI_API_KEY"),
 81 | }
 82 | 
 83 | os.makedirs(f"./{BROWSER_CONFIG['downloads_folder']}", exist_ok=True)
 84 | 
 85 | 
 86 | def create_agent(model_id="o1"):
 87 |     model_params = {
 88 |         "model_id": model_id,
 89 |         "custom_role_conversions": custom_role_conversions,
 90 |         "max_completion_tokens": 8192,
 91 |     }
 92 |     if model_id == "o1":
 93 |         model_params["reasoning_effort"] = "high"
 94 |     model = LiteLLMModel(**model_params)
 95 | 
 96 |     text_limit = 100000
 97 |     browser = SimpleTextBrowser(**BROWSER_CONFIG)
 98 |     WEB_TOOLS = [
 99 |         GoogleSearchTool(provider="serper"),
100 |         VisitTool(browser),
101 |         PageUpTool(browser),
102 |         PageDownTool(browser),
103 |         FinderTool(browser),
104 |         FindNextTool(browser),
105 |         ArchiveSearchTool(browser),
106 |         TextInspectorTool(model, text_limit),
107 |     ]
108 |     text_webbrowser_agent = ToolCallingAgent(
109 |         model=model,
110 |         tools=WEB_TOOLS,
111 |         max_steps=20,
112 |         verbosity_level=2,
113 |         planning_interval=4,
114 |         name="search_agent",
115 |         description="""A team member that will search the internet to answer your question.
116 |     Ask him for all your questions that require browsing the web.
117 |     Provide him as much context as possible, in particular if you need to search on a specific timeframe!
118 |     And don't hesitate to provide him with a complex search task, like finding a difference between two webpages.
119 |     Your request must be a real sentence, not a google search! Like "Find me this information (...)" rather than a few keywords.
120 |     """,
121 |         provide_run_summary=True,
122 |     )
123 |     text_webbrowser_agent.prompt_templates["managed_agent"]["task"] += """You can navigate to .txt online files.
124 |     If a non-html page is in another format, especially .pdf or a Youtube video, use tool 'inspect_file_as_text' to inspect it.
125 |     Additionally, if after some searching you find out that you need more information to answer the question, you can use `final_answer` with your request for clarification as argument to request for more information."""
126 | 
127 |     manager_agent = CodeAgent(
128 |         model=model,
129 |         tools=[visualizer, TextInspectorTool(model, text_limit)],
130 |         max_steps=12,
131 |         verbosity_level=2,
132 |         additional_authorized_imports=AUTHORIZED_IMPORTS,
133 |         planning_interval=4,
134 |         managed_agents=[text_webbrowser_agent],
135 |     )
136 | 
137 |     return manager_agent
138 | 
139 | 
140 | def main():
141 |     args = parse_args()
142 | 
143 |     agent = create_agent(model_id=args.model_id)
144 | 
145 |     answer = agent.run(args.question)
146 | 
147 |     print(f"Got this answer: {answer}")
148 | 
149 | 
150 | if __name__ == "__main__":
151 |     main()
152 | 


--------------------------------------------------------------------------------
/docs/source/zh/tutorials/memory.mdx:
--------------------------------------------------------------------------------
  1 | <!--Copyright 2024 The HuggingFace Team. All rights reserved.
  2 | 
  3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
  4 | the License. You may obtain a copy of the License at
  5 | 
  6 | http://www.apache.org/licenses/LICENSE-2.0
  7 | 
  8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 10 | specific language governing permissions and limitations under the License.
 11 | 
 12 | ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
 13 | rendered properly in your Markdown viewer.
 14 | 
 15 | -->
 16 | # 📚 管理Agent的记忆
 17 | 
 18 | [[open-in-colab]]
 19 | 
 20 | 归根结底，Agent可以定义为由几个简单组件构成：它拥有工具、提示词。最重要的是，它具备对过往步骤的记忆，能够追溯完整的规划、执行和错误历史。
 21 | 
 22 | ### 回放Agent的记忆
 23 | 
 24 | 我们提供了多项功能来审查Agent的过往运行记录。
 25 | 
 26 | 您可以通过插装（instrumentation）在可视化界面中查看Agent的运行过程，该界面支持对特定步骤进行缩放操作，具体方法参见[插装指南](./inspect_runs)。
 27 | 
 28 | 您也可以使用`agent.replay()`方法实现回放：
 29 | 
 30 | 当Agent完成运行后：
 31 | ```py
 32 | from smolagents import HfApiModel, CodeAgent
 33 | 
 34 | agent = CodeAgent(tools=[], model=HfApiModel(), verbosity_level=0)
 35 | 
 36 | result = agent.run("What's the 20th Fibonacci number?")
 37 | ```
 38 | 
 39 | 若要回放最近一次运行，只需使用：
 40 | ```py
 41 | agent.replay()
 42 | ```
 43 | 
 44 | ### 动态修改Agent的记忆
 45 | 
 46 | 许多高级应用场景需要对Agent的记忆进行动态修改。
 47 | 
 48 | 您可以通过以下方式访问Agent的记忆：
 49 | 
 50 | ```py
 51 | from smolagents import ActionStep
 52 | 
 53 | system_prompt_step = agent.memory.system_prompt
 54 | print("The system prompt given to the agent was:")
 55 | print(system_prompt_step.system_prompt)
 56 | 
 57 | task_step = agent.memory.steps[0]
 58 | print("\n\nThe first task step was:")
 59 | print(task_step.task)
 60 | 
 61 | for step in agent.memory.steps:
 62 |     if isinstance(step, ActionStep):
 63 |         if step.error is not None:
 64 |             print(f"\nStep {step.step_number} got this error:\n{step.error}\n")
 65 |         else:
 66 |             print(f"\nStep {step.step_number} got these observations:\n{step.observations}\n")
 67 | ```
 68 | 
 69 | 使用`agent.memory.get_full_steps()`可获取完整步骤字典数据。
 70 | 
 71 | 您还可以通过步骤回调（step callbacks）实现记忆的动态修改。
 72 | 
 73 | 步骤回调函数可通过参数直接访问`agent`对象，因此能够访问所有记忆步骤并根据需要进行修改。例如，假设您正在监控网页浏览Agent每个步骤的屏幕截图，希望保留最新截图同时删除旧步骤的图片以节省token消耗。
 74 | 
 75 | 可参考以下代码示例：
 76 | _注：此代码片段不完整，部分导入语句和对象定义已精简，完整代码请访问[原始脚本](https://github.com/huggingface/smolagents/blob/main/src/smolagents/vision_web_browser.py)_
 77 | 
 78 | ```py
 79 | import helium
 80 | from PIL import Image
 81 | from io import BytesIO
 82 | from time import sleep
 83 | 
 84 | def update_screenshot(memory_step: ActionStep, agent: CodeAgent) -> None:
 85 |     sleep(1.0)  # Let JavaScript animations happen before taking the screenshot
 86 |     driver = helium.get_driver()
 87 |     latest_step = memory_step.step_number
 88 |     for previous_memory_step in agent.memory.steps:  # Remove previous screenshots from logs for lean processing
 89 |         if isinstance(previous_memory_step, ActionStep) and previous_memory_step.step_number <= latest_step - 2:
 90 |             previous_memory_step.observations_images = None
 91 |     png_bytes = driver.get_screenshot_as_png()
 92 |     image = Image.open(BytesIO(png_bytes))
 93 |     memory_step.observations_images = [image.copy()]
 94 | ```
 95 | 
 96 | 最后在初始化Agent时，将此函数传入`step_callbacks`参数：
 97 | 
 98 | ```py
 99 | CodeAgent(
100 |     tools=[DuckDuckGoSearchTool(), go_back, close_popups, search_item_ctrl_f],
101 |     model=model,
102 |     additional_authorized_imports=["helium"],
103 |     step_callbacks=[update_screenshot],
104 |     max_steps=20,
105 |     verbosity_level=2,
106 | )
107 | ```
108 | 
109 | 请访问我们的 [vision web browser code](https://github.com/huggingface/smolagents/blob/main/src/smolagents/vision_web_browser.py) 查看完整可运行示例。
110 | 
111 | ### 分步运行 Agents
112 | 
113 | 当您需要处理耗时数天的工具调用时，这种方式特别有用：您可以逐步执行Agents。这还允许您在每一步更新记忆。
114 | 
115 | ```py
116 | from smolagents import HfApiModel, CodeAgent, ActionStep, TaskStep
117 | 
118 | agent = CodeAgent(tools=[], model=HfApiModel(), verbosity_level=1)
119 | print(agent.memory.system_prompt)
120 | 
121 | task = "What is the 20th Fibonacci number?"
122 | 
123 | # You could modify the memory as needed here by inputting the memory of another agent.
124 | # agent.memory.steps = previous_agent.memory.steps
125 | 
126 | # Let's start a new task!
127 | agent.memory.steps.append(TaskStep(task=task, task_images=[]))
128 | 
129 | final_answer = None
130 | step_number = 1
131 | while final_answer is None and step_number <= 10:
132 |     memory_step = ActionStep(
133 |         step_number=step_number,
134 |         observations_images=[],
135 |     )
136 |     # Run one step.
137 |     final_answer = agent.step(memory_step)
138 |     agent.memory.steps.append(memory_step)
139 |     step_number += 1
140 | 
141 |     # Change the memory as you please!
142 |     # For instance to update the latest step:
143 |     # agent.memory.steps[-1] = ...
144 | 
145 | print("The final answer is:", final_answer)
146 | ```


--------------------------------------------------------------------------------
/examples/open_deep_research/scripts/text_inspector_tool.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional
  2 | 
  3 | from smolagents import Tool
  4 | from smolagents.models import MessageRole, Model
  5 | 
  6 | from .mdconvert import MarkdownConverter
  7 | 
  8 | 
  9 | class TextInspectorTool(Tool):
 10 |     name = "inspect_file_as_text"
 11 |     description = """
 12 | You cannot load files yourself: instead call this tool to read a file as markdown text and ask questions about it.
 13 | This tool handles the following file extensions: [".html", ".htm", ".xlsx", ".pptx", ".wav", ".mp3", ".m4a", ".flac", ".pdf", ".docx"], and all other types of text files. IT DOES NOT HANDLE IMAGES."""
 14 | 
 15 |     inputs = {
 16 |         "file_path": {
 17 |             "description": "The path to the file you want to read as text. Must be a '.something' file, like '.pdf'. If it is an image, use the visualizer tool instead! DO NOT use this tool for an HTML webpage: use the web_search tool instead!",
 18 |             "type": "string",
 19 |         },
 20 |         "question": {
 21 |             "description": "[Optional]: Your question, as a natural language sentence. Provide as much context as possible. Do not pass this parameter if you just want to directly return the content of the file.",
 22 |             "type": "string",
 23 |             "nullable": True,
 24 |         },
 25 |     }
 26 |     output_type = "string"
 27 |     md_converter = MarkdownConverter()
 28 | 
 29 |     def __init__(self, model: Model, text_limit: int):
 30 |         super().__init__()
 31 |         self.model = model
 32 |         self.text_limit = text_limit
 33 | 
 34 |     def forward_initial_exam_mode(self, file_path, question):
 35 |         result = self.md_converter.convert(file_path)
 36 | 
 37 |         if file_path[-4:] in [".png", ".jpg"]:
 38 |             raise Exception("Cannot use inspect_file_as_text tool with images: use visualizer instead!")
 39 | 
 40 |         if ".zip" in file_path:
 41 |             return result.text_content
 42 | 
 43 |         if not question:
 44 |             return result.text_content
 45 | 
 46 |         if len(result.text_content) < 4000:
 47 |             return "Document content: " + result.text_content
 48 | 
 49 |         messages = [
 50 |             {
 51 |                 "role": MessageRole.SYSTEM,
 52 |                 "content": [
 53 |                     {
 54 |                         "type": "text",
 55 |                         "text": "Here is a file:\n### "
 56 |                         + str(result.title)
 57 |                         + "\n\n"
 58 |                         + result.text_content[: self.text_limit],
 59 |                     }
 60 |                 ],
 61 |             },
 62 |             {
 63 |                 "role": MessageRole.USER,
 64 |                 "content": [
 65 |                     {
 66 |                         "type": "text",
 67 |                         "text": "Now please write a short, 5 sentence caption for this document, that could help someone asking this question: "
 68 |                         + question
 69 |                         + "\n\nDon't answer the question yourself! Just provide useful notes on the document",
 70 |                     }
 71 |                 ],
 72 |             },
 73 |         ]
 74 |         return self.model(messages).content
 75 | 
 76 |     def forward(self, file_path, question: Optional[str] = None) -> str:
 77 |         result = self.md_converter.convert(file_path)
 78 | 
 79 |         if file_path[-4:] in [".png", ".jpg"]:
 80 |             raise Exception("Cannot use inspect_file_as_text tool with images: use visualizer instead!")
 81 | 
 82 |         if ".zip" in file_path:
 83 |             return result.text_content
 84 | 
 85 |         if not question:
 86 |             return result.text_content
 87 | 
 88 |         messages = [
 89 |             {
 90 |                 "role": MessageRole.SYSTEM,
 91 |                 "content": [
 92 |                     {
 93 |                         "type": "text",
 94 |                         "text": "You will have to write a short caption for this file, then answer this question:"
 95 |                         + question,
 96 |                     }
 97 |                 ],
 98 |             },
 99 |             {
100 |                 "role": MessageRole.USER,
101 |                 "content": [
102 |                     {
103 |                         "type": "text",
104 |                         "text": "Here is the complete file:\n### "
105 |                         + str(result.title)
106 |                         + "\n\n"
107 |                         + result.text_content[: self.text_limit],
108 |                     }
109 |                 ],
110 |             },
111 |             {
112 |                 "role": MessageRole.USER,
113 |                 "content": [
114 |                     {
115 |                         "type": "text",
116 |                         "text": "Now answer the question below. Use these three headings: '1. Short answer', '2. Extremely detailed answer', '3. Additional Context on the document and question asked'."
117 |                         + question,
118 |                     }
119 |                 ],
120 |             },
121 |         ]
122 |         return self.model(messages).content
123 | 


--------------------------------------------------------------------------------
/tests/test_default_tools.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2024 HuggingFace Inc.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | import unittest
 16 | 
 17 | import pytest
 18 | 
 19 | from smolagents.agent_types import _AGENT_TYPE_MAPPING
 20 | from smolagents.default_tools import (
 21 |     DuckDuckGoSearchTool,
 22 |     PythonInterpreterTool,
 23 |     SpeechToTextTool,
 24 |     VisitWebpageTool,
 25 |     WikipediaSearchTool,
 26 | )
 27 | 
 28 | from .test_tools import ToolTesterMixin
 29 | 
 30 | 
 31 | class DefaultToolTests(unittest.TestCase):
 32 |     def test_visit_webpage(self):
 33 |         arguments = {"url": "https://en.wikipedia.org/wiki/United_States_Secretary_of_Homeland_Security"}
 34 |         result = VisitWebpageTool()(arguments)
 35 |         assert isinstance(result, str)
 36 |         assert "* [About Wikipedia](/wiki/Wikipedia:About)" in result  # Proper wikipedia pages have an About
 37 | 
 38 |     def test_ddgs_with_kwargs(self):
 39 |         result = DuckDuckGoSearchTool(timeout=20)("DeepSeek parent company")
 40 |         assert isinstance(result, str)
 41 | 
 42 | 
 43 | class TestPythonInterpreterTool(ToolTesterMixin):
 44 |     def setup_method(self):
 45 |         self.tool = PythonInterpreterTool(authorized_imports=["numpy"])
 46 |         self.tool.setup()
 47 | 
 48 |     def test_exact_match_arg(self):
 49 |         result = self.tool("(2 / 2) * 4")
 50 |         assert result == "Stdout:\n\nOutput: 4.0"
 51 | 
 52 |     def test_exact_match_kwarg(self):
 53 |         result = self.tool(code="(2 / 2) * 4")
 54 |         assert result == "Stdout:\n\nOutput: 4.0"
 55 | 
 56 |     def test_agent_type_output(self):
 57 |         inputs = ["2 * 2"]
 58 |         output = self.tool(*inputs, sanitize_inputs_outputs=True)
 59 |         output_type = _AGENT_TYPE_MAPPING[self.tool.output_type]
 60 |         assert isinstance(output, output_type)
 61 | 
 62 |     def test_agent_types_inputs(self):
 63 |         inputs = ["2 * 2"]
 64 |         _inputs = []
 65 | 
 66 |         for _input, expected_input in zip(inputs, self.tool.inputs.values()):
 67 |             input_type = expected_input["type"]
 68 |             if isinstance(input_type, list):
 69 |                 _inputs.append([_AGENT_TYPE_MAPPING[_input_type](_input) for _input_type in input_type])
 70 |             else:
 71 |                 _inputs.append(_AGENT_TYPE_MAPPING[input_type](_input))
 72 | 
 73 |         # Should not raise an error
 74 |         output = self.tool(*inputs, sanitize_inputs_outputs=True)
 75 |         output_type = _AGENT_TYPE_MAPPING[self.tool.output_type]
 76 |         assert isinstance(output, output_type)
 77 | 
 78 |     def test_imports_work(self):
 79 |         result = self.tool("import numpy as np")
 80 |         assert "import from numpy is not allowed" not in result.lower()
 81 | 
 82 |     def test_unauthorized_imports_fail(self):
 83 |         with pytest.raises(Exception) as e:
 84 |             self.tool("import sympy as sp")
 85 |         assert "sympy" in str(e).lower()
 86 | 
 87 | 
 88 | class TestSpeechToTextTool:
 89 |     def test_new_instance(self):
 90 |         from transformers.models.whisper import WhisperForConditionalGeneration, WhisperProcessor
 91 | 
 92 |         tool = SpeechToTextTool()
 93 |         assert tool is not None
 94 |         assert tool.pre_processor_class == WhisperProcessor
 95 |         assert tool.model_class == WhisperForConditionalGeneration
 96 | 
 97 | 
 98 | @pytest.mark.parametrize(
 99 |     "language, content_type, extract_format, query",
100 |     [
101 |         ("en", "summary", "HTML", "Python_(programming_language)"),  # English, Summary Mode, HTML format
102 |         ("en", "text", "WIKI", "Python_(programming_language)"),  # English, Full Text Mode, WIKI format
103 |         ("es", "summary", "HTML", "Python_(lenguaje_de_programación)"),  # Spanish, Summary Mode, HTML format
104 |         ("es", "text", "WIKI", "Python_(lenguaje_de_programación)"),  # Spanish, Full Text Mode, WIKI format
105 |     ],
106 | )
107 | def test_wikipedia_search(language, content_type, extract_format, query):
108 |     tool = WikipediaSearchTool(
109 |         user_agent="TestAgent (test@example.com)",
110 |         language=language,
111 |         content_type=content_type,
112 |         extract_format=extract_format,
113 |     )
114 | 
115 |     result = tool.forward(query)
116 | 
117 |     assert isinstance(result, str), "Output should be a string"
118 |     assert "✅ **Wikipedia Page:**" in result, "Response should contain Wikipedia page title"
119 |     assert "🔗 **Read more:**" in result, "Response should contain Wikipedia page URL"
120 | 
121 |     if content_type == "summary":
122 |         assert len(result.split()) < 1000, "Summary mode should return a shorter text"
123 |     if content_type == "text":
124 |         assert len(result.split()) > 1000, "Full text mode should return a longer text"
125 | 


--------------------------------------------------------------------------------
/src/smolagents/cli.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding=utf-8
  3 | 
  4 | # Copyright 2025 The HuggingFace Inc. team. All rights reserved.
  5 | #
  6 | # Licensed under the Apache License, Version 2.0 (the "License");
  7 | # you may not use this file except in compliance with the License.
  8 | # You may obtain a copy of the License at
  9 | #
 10 | #     http://www.apache.org/licenses/LICENSE-2.0
 11 | #
 12 | # Unless required by applicable law or agreed to in writing, software
 13 | # distributed under the License is distributed on an "AS IS" BASIS,
 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 | # See the License for the specific language governing permissions and
 16 | # limitations under the License.
 17 | import argparse
 18 | import os
 19 | 
 20 | from dotenv import load_dotenv
 21 | 
 22 | from smolagents import CodeAgent, HfApiModel, LiteLLMModel, Model, OpenAIServerModel, Tool, TransformersModel
 23 | from smolagents.default_tools import TOOL_MAPPING
 24 | 
 25 | 
 26 | leopard_prompt = "How many seconds would it take for a leopard at full speed to run through Pont des Arts?"
 27 | 
 28 | 
 29 | def parse_arguments():
 30 |     parser = argparse.ArgumentParser(description="Run a CodeAgent with all specified parameters")
 31 |     parser.add_argument(
 32 |         "prompt",
 33 |         type=str,
 34 |         nargs="?",  # Makes it optional
 35 |         default=leopard_prompt,
 36 |         help="The prompt to run with the agent",
 37 |     )
 38 |     parser.add_argument(
 39 |         "--model-type",
 40 |         type=str,
 41 |         default="HfApiModel",
 42 |         help="The model type to use (e.g., HfApiModel, OpenAIServerModel, LiteLLMModel, TransformersModel)",
 43 |     )
 44 |     parser.add_argument(
 45 |         "--model-id",
 46 |         type=str,
 47 |         default="Qwen/Qwen2.5-Coder-32B-Instruct",
 48 |         help="The model ID to use for the specified model type",
 49 |     )
 50 |     parser.add_argument(
 51 |         "--imports",
 52 |         nargs="*",  # accepts zero or more arguments
 53 |         default=[],
 54 |         help="Space-separated list of imports to authorize (e.g., 'numpy pandas')",
 55 |     )
 56 |     parser.add_argument(
 57 |         "--tools",
 58 |         nargs="*",
 59 |         default=["web_search"],
 60 |         help="Space-separated list of tools that the agent can use (e.g., 'tool1 tool2 tool3')",
 61 |     )
 62 |     parser.add_argument(
 63 |         "--verbosity-level",
 64 |         type=int,
 65 |         default=1,
 66 |         help="The verbosity level, as an int in [0, 1, 2].",
 67 |     )
 68 |     group = parser.add_argument_group("api options", "Options for API-based model types")
 69 |     group.add_argument(
 70 |         "--api-base",
 71 |         type=str,
 72 |         help="The base URL for the model",
 73 |     )
 74 |     group.add_argument(
 75 |         "--api-key",
 76 |         type=str,
 77 |         help="The API key for the model",
 78 |     )
 79 |     return parser.parse_args()
 80 | 
 81 | 
 82 | def load_model(model_type: str, model_id: str, api_base: str | None = None, api_key: str | None = None) -> Model:
 83 |     if model_type == "OpenAIServerModel":
 84 |         return OpenAIServerModel(
 85 |             api_key=api_key or os.getenv("FIREWORKS_API_KEY"),
 86 |             api_base=api_base or "https://api.fireworks.ai/inference/v1",
 87 |             model_id=model_id,
 88 |         )
 89 |     elif model_type == "LiteLLMModel":
 90 |         return LiteLLMModel(
 91 |             model_id=model_id,
 92 |             api_key=api_key,
 93 |             api_base=api_base,
 94 |         )
 95 |     elif model_type == "TransformersModel":
 96 |         return TransformersModel(model_id=model_id, device_map="auto")
 97 |     elif model_type == "HfApiModel":
 98 |         return HfApiModel(
 99 |             model_id=model_id,
100 |             token=api_key or os.getenv("HF_API_KEY"),
101 |         )
102 |     else:
103 |         raise ValueError(f"Unsupported model type: {model_type}")
104 | 
105 | 
106 | def run_smolagent(
107 |     prompt: str,
108 |     tools: list[str],
109 |     model_type: str,
110 |     model_id: str,
111 |     api_base: str | None = None,
112 |     api_key: str | None = None,
113 |     imports: list[str] | None = None,
114 | ) -> None:
115 |     load_dotenv()
116 | 
117 |     model = load_model(model_type, model_id, api_base=api_base, api_key=api_key)
118 | 
119 |     available_tools = []
120 |     for tool_name in tools:
121 |         if "/" in tool_name:
122 |             available_tools.append(Tool.from_space(tool_name))
123 |         else:
124 |             if tool_name in TOOL_MAPPING:
125 |                 available_tools.append(TOOL_MAPPING[tool_name]())
126 |             else:
127 |                 raise ValueError(f"Tool {tool_name} is not recognized either as a default tool or a Space.")
128 | 
129 |     print(f"Running agent with these tools: {tools}")
130 |     agent = CodeAgent(tools=available_tools, model=model, additional_authorized_imports=imports)
131 | 
132 |     agent.run(prompt)
133 | 
134 | 
135 | def main() -> None:
136 |     args = parse_arguments()
137 |     run_smolagent(
138 |         args.prompt,
139 |         args.tools,
140 |         args.model_type,
141 |         args.model_id,
142 |         api_base=args.api_base,
143 |         api_key=args.api_key,
144 |         imports=args.imports,
145 |     )
146 | 
147 | 
148 | if __name__ == "__main__":
149 |     main()
150 | 


--------------------------------------------------------------------------------
/tests/test_cli.py:
--------------------------------------------------------------------------------
  1 | from unittest.mock import patch
  2 | 
  3 | import pytest
  4 | 
  5 | from smolagents.cli import load_model
  6 | from smolagents.local_python_executor import LocalPythonExecutor
  7 | from smolagents.models import HfApiModel, LiteLLMModel, OpenAIServerModel, TransformersModel
  8 | 
  9 | 
 10 | @pytest.fixture
 11 | def set_env_vars(monkeypatch):
 12 |     monkeypatch.setenv("FIREWORKS_API_KEY", "test_fireworks_api_key")
 13 |     monkeypatch.setenv("HF_TOKEN", "test_hf_api_key")
 14 | 
 15 | 
 16 | def test_load_model_openai_server_model(set_env_vars):
 17 |     with patch("openai.OpenAI") as MockOpenAI:
 18 |         model = load_model("OpenAIServerModel", "test_model_id")
 19 |     assert isinstance(model, OpenAIServerModel)
 20 |     assert model.model_id == "test_model_id"
 21 |     assert MockOpenAI.call_count == 1
 22 |     assert MockOpenAI.call_args.kwargs["base_url"] == "https://api.fireworks.ai/inference/v1"
 23 |     assert MockOpenAI.call_args.kwargs["api_key"] == "test_fireworks_api_key"
 24 | 
 25 | 
 26 | def test_load_model_litellm_model():
 27 |     model = load_model("LiteLLMModel", "test_model_id", api_key="test_api_key", api_base="https://api.test.com")
 28 |     assert isinstance(model, LiteLLMModel)
 29 |     assert model.api_key == "test_api_key"
 30 |     assert model.api_base == "https://api.test.com"
 31 |     assert model.model_id == "test_model_id"
 32 | 
 33 | 
 34 | def test_load_model_transformers_model():
 35 |     with (
 36 |         patch(
 37 |             "transformers.AutoModelForImageTextToText.from_pretrained",
 38 |             side_effect=ValueError("Unrecognized configuration class"),
 39 |         ),
 40 |         patch("transformers.AutoModelForCausalLM.from_pretrained"),
 41 |         patch("transformers.AutoTokenizer.from_pretrained"),
 42 |     ):
 43 |         model = load_model("TransformersModel", "test_model_id")
 44 |     assert isinstance(model, TransformersModel)
 45 |     assert model.model_id == "test_model_id"
 46 | 
 47 | 
 48 | def test_load_model_hf_api_model(set_env_vars):
 49 |     with patch("huggingface_hub.InferenceClient") as huggingface_hub_InferenceClient:
 50 |         model = load_model("HfApiModel", "test_model_id")
 51 |     assert isinstance(model, HfApiModel)
 52 |     assert model.model_id == "test_model_id"
 53 |     assert huggingface_hub_InferenceClient.call_count == 1
 54 |     assert huggingface_hub_InferenceClient.call_args.kwargs["token"] == "test_hf_api_key"
 55 | 
 56 | 
 57 | def test_load_model_invalid_model_type():
 58 |     with pytest.raises(ValueError, match="Unsupported model type: InvalidModel"):
 59 |         load_model("InvalidModel", "test_model_id")
 60 | 
 61 | 
 62 | def test_cli_main(capsys):
 63 |     with patch("smolagents.cli.load_model") as mock_load_model:
 64 |         mock_load_model.return_value = "mock_model"
 65 |         with patch("smolagents.cli.CodeAgent") as mock_code_agent:
 66 |             from smolagents.cli import run_smolagent
 67 | 
 68 |             run_smolagent("test_prompt", [], "HfApiModel", "test_model_id")
 69 |     # load_model
 70 |     assert len(mock_load_model.call_args_list) == 1
 71 |     assert mock_load_model.call_args.args == ("HfApiModel", "test_model_id")
 72 |     assert mock_load_model.call_args.kwargs == {"api_base": None, "api_key": None}
 73 |     # CodeAgent
 74 |     assert len(mock_code_agent.call_args_list) == 1
 75 |     assert mock_code_agent.call_args.args == ()
 76 |     assert mock_code_agent.call_args.kwargs == {
 77 |         "tools": [],
 78 |         "model": "mock_model",
 79 |         "additional_authorized_imports": None,
 80 |     }
 81 |     # agent.run
 82 |     assert len(mock_code_agent.return_value.run.call_args_list) == 1
 83 |     assert mock_code_agent.return_value.run.call_args.args == ("test_prompt",)
 84 |     # print
 85 |     captured = capsys.readouterr()
 86 |     assert "Running agent with these tools: []" in captured.out
 87 | 
 88 | 
 89 | def test_vision_web_browser_main():
 90 |     with patch("smolagents.vision_web_browser.helium"):
 91 |         with patch("smolagents.vision_web_browser.load_model") as mock_load_model:
 92 |             mock_load_model.return_value = "mock_model"
 93 |             with patch("smolagents.vision_web_browser.CodeAgent") as mock_code_agent:
 94 |                 from smolagents.vision_web_browser import helium_instructions, run_webagent
 95 | 
 96 |                 run_webagent("test_prompt", "HfApiModel", "test_model_id")
 97 |     # load_model
 98 |     assert len(mock_load_model.call_args_list) == 1
 99 |     assert mock_load_model.call_args.args == ("HfApiModel", "test_model_id")
100 |     # CodeAgent
101 |     assert len(mock_code_agent.call_args_list) == 1
102 |     assert mock_code_agent.call_args.args == ()
103 |     assert len(mock_code_agent.call_args.kwargs["tools"]) == 4
104 |     assert mock_code_agent.call_args.kwargs["model"] == "mock_model"
105 |     assert mock_code_agent.call_args.kwargs["additional_authorized_imports"] == ["helium"]
106 |     # agent.python_executor
107 |     assert len(mock_code_agent.return_value.python_executor.call_args_list) == 1
108 |     assert mock_code_agent.return_value.python_executor.call_args.args == ("from helium import *",)
109 |     assert LocalPythonExecutor(["helium"])("from helium import *") == (None, "", False)
110 |     # agent.run
111 |     assert len(mock_code_agent.return_value.run.call_args_list) == 1
112 |     assert mock_code_agent.return_value.run.call_args.args == ("test_prompt" + helium_instructions,)
113 | 


--------------------------------------------------------------------------------
/docs/source/hi/tutorials/secure_code_execution.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2024 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | 
12 | ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
13 | rendered properly in your Markdown viewer.
14 | 
15 | -->
16 | # सुरक्षित कोड एक्जीक्यूशन
17 | 
18 | [[open-in-colab]]
19 | 
20 | > [!TIP]
21 | > यदि आप एजेंट्स बनाने में नए हैं, तो सबसे पहले [एजेंट्स का परिचय](../conceptual_guides/intro_agents) और [smolagents की गाइडेड टूर](../guided_tour) पढ़ना सुनिश्चित करें।
22 | 
23 | ### कोड Agents
24 | 
25 | [कई](https://huggingface.co/papers/2402.01030) [शोध](https://huggingface.co/papers/2411.01747) [पत्रों](https://huggingface.co/papers/2401.00812) ने दिखाया है कि LLM द्वारा अपनी क्रियाओं (टूल कॉल्स) को कोड में लिखना, टूल कॉलिंग के वर्तमान मानक प्रारूप से बहुत बेहतर है, जो industry में "टूल्स नेम्स और आर्ग्यूमेंट्स को JSON के रूप में लिखने" के विभिन्न रूप हैं।
26 | 
27 | कोड बेहतर क्यों है? क्योंकि हमने अपनी कोड भाषाओं को विशेष रूप से कंप्यूटर द्वारा की जाने वाली क्रियाओं को व्यक्त करने के लिए तैयार किया है। यदि JSON स्निपेट्स एक बेहतर तरीका होता, तो यह पैकेज JSON स्निपेट्स में लिखा गया होता और शैतान हम पर हंस रहा होता।
28 | 
29 | कोड कंप्यूटर पर क्रियाएँ व्यक्त करने का बेहतर तरीका है। इसमें बेहतर है:
30 | - **कंपोज़ेबिलिटी:** क्या आप JSON क्रियाओं को एक-दूसरे के भीतर नेस्ट कर सकते हैं, या बाद में पुन: उपयोग करने के लिए JSON क्रियाओं का एक सेट परिभाषित कर सकते हैं, जैसे आप बस एक पायथन फ़ंक्शन परिभाषित कर सकते हैं?
31 | - **ऑब्जेक्ट प्रबंधन:** JSON में `generate_image` जैसी क्रिया का आउटपुट कैसे स्टोर करें?
32 | - **सामान्यता:** कोड किसी भी कंप्यूटर कार्य को व्यक्त करने के लिए बनाया गया है।
33 | - **LLM प्रशिक्षण कॉर्पस में प्रतिनिधित्व:** क्यों न इस आशीर्वाद का लाभ उठाएं कि उच्च गुणवत्ता वाले कोड उदाहरण पहले से ही LLM प्रशिक्षण डेटा में शामिल हैं?
34 | 
35 | यह नीचे दी गई छवि में दर्शाया गया है, जो [Executable Code Actions Elicit Better LLM Agents](https://huggingface.co/papers/2402.01030) से ली गई है।
36 | 
37 | <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/code_vs_json_actions.png">
38 | 
39 | यही कारण है कि हमने कोड एजेंट्स, इस मामले में पायथन एजेंट्स पर जोर दिया, जिसका मतलब सुरक्षित पायथन इंटरप्रेटर बनाने पर अधिक प्रयास करना था।
40 | 
41 | ### लोकल पायथन इंटरप्रेटर
42 | 
43 | डिफ़ॉल्ट रूप से, `CodeAgent` LLM-जनरेटेड कोड को आपके एनवायरनमेंट में चलाता है।
44 | यह एक्जीक्यूशन वैनिला पायथन इंटरप्रेटर द्वारा नहीं किया जाता: हमने एक अधिक सुरक्षित `LocalPythonExecutor` को शुरू से फिर से बनाया है।
45 | यह इंटरप्रेटर सुरक्षा के लिए डिज़ाइन किया गया है:
46 |  - इम्पोर्ट्स को उपयोगकर्ता द्वारा स्पष्ट रूप से पास की गई सूची तक सीमित करना
47 |  - इनफिनिट लूप्स और रिसोर्स ब्लोटिंग को रोकने के लिए ऑपरेशंस की संख्या को कैप करना
48 |  - कोई भी ऐसा ऑपरेशन नहीं करेगा जो पूर्व-परिभाषित नहीं है
49 | 
50 | हमने इसे कई उपयोग मामलों में इस्तेमाल किया है, और कभी भी एनवायरनमेंट को कोई नुकसान नहीं देखा। 
51 | 
52 | हालांकि यह समाधान पूरी तरह से सुरक्षित नहीं है: कोई ऐसे अवसरों की कल्पना कर सकता है जहां दुर्भावनापूर्ण कार्यों के लिए फाइन-ट्यून किए गए LLM अभी भी आपके एनवायरनमेंट को नुकसान पहुंचा सकते हैं। उदाहरण के लिए यदि आपने छवियों को प्रोसेस करने के लिए `Pillow` जैसे मासूम पैकेज की अनुमति दी है, तो LLM आपकी हार्ड ड्राइव को ब्लोट करने के लिए हजारों छवियों को सेव कर सकता है।
53 | यदि आपने खुद LLM इंजन चुना है तो यह निश्चित रूप से संभावित नहीं है, लेकिन यह हो सकता है।
54 | 
55 | तो यदि आप अतिरिक्त सावधानी बरतना चाहते हैं, तो आप नीचे वर्णित रिमोट कोड एक्जीक्यूशन विकल्प का उपयोग कर सकते हैं।
56 | 
57 | ### E2B कोड एक्जीक्यूटर
58 | 
59 | अधिकतम सुरक्षा के लिए, आप कोड को सैंडबॉक्स्ड एनवायरनमेंट में चलाने के लिए E2B के साथ हमारे एकीकरण का उपयोग कर सकते हैं। यह एक रिमोट एक्जीक्यूशन सेवा है जो आपके कोड को एक आइसोलेटेड कंटेनर में चलाती है, जिससे कोड का आपके स्थानीय एनवायरनमेंट को प्रभावित करना असंभव हो जाता है।
60 | 
61 | इसके लिए, आपको अपना E2B अकाउंट सेटअप करने और अपने एनवायरनमेंट वेरिएबल्स में अपना `E2B_API_KEY` सेट करने की आवश्यकता होगी। अधिक जानकारी के लिए [E2B की क्विकस्टार्ट डॉक्यूमेंटेशन](https://e2b.dev/docs/quickstart) पर जाएं।
62 | 
63 | फिर आप इसे `pip install e2b-code-interpreter python-dotenv` के साथ इंस्टॉल कर सकते हैं।
64 | 
65 | अब आप तैयार हैं!
66 | 
67 | कोड एक्जीक्यूटर को E2B पर सेट करने के लिए, बस अपने `CodeAgent` को इनिशियलाइज़ करते समय `executor_type="e2b"` फ्लैग पास करें।
68 | ध्यान दें कि आपको `additional_authorized_imports` में सभी टूल की डिपेंडेंसीज़ जोड़नी चाहिए, ताकि एक्जीक्यूटर उन्हें इंस्टॉल करे।
69 | 
70 | ```py
71 | from smolagents import CodeAgent, VisitWebpageTool, HfApiModel
72 | agent = CodeAgent(
73 |     tools = [VisitWebpageTool()],
74 |     model=HfApiModel(),
75 |     additional_authorized_imports=["requests", "markdownify"],
76 |     executor_type="e2b"
77 | )
78 | 
79 | agent.run("What was Abraham Lincoln's preferred pet?")
80 | ```
81 | 
82 | E2B कोड एक्जीक्यूशन वर्तमान में मल्टी-एजेंट्स के साथ काम नहीं करता है - क्योंकि कोड ब्लॉब में एक एजेंट कॉल करना जो रिमोटली एक्जीक्यूट किया जाना चाहिए, यह एक गड़बड़ है। लेकिन हम इसे जोड़ने पर काम कर रहे हैं!
83 | 


--------------------------------------------------------------------------------
/docs/source/zh/reference/models.mdx:
--------------------------------------------------------------------------------
  1 | 
  2 | <!--Copyright 2024 The HuggingFace Team. All rights reserved.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
  5 | the License. You may obtain a copy of the License at
  6 | 
  7 | http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 10 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 11 | specific language governing permissions and limitations under the License.
 12 | 
 13 | ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
 14 | rendered properly in your Markdown viewer.
 15 | 
 16 | -->
 17 | # 模型
 18 | 
 19 | <Tip warning={true}>
 20 | 
 21 | Smolagents 是一个实验性 API，其可能会随时发生更改。由于 API 或底层模型可能会变化，智能体返回的结果可能会有所不同。
 22 | 
 23 | </Tip>
 24 | 
 25 | 要了解有关智能体和工具的更多信息，请务必阅读[入门指南](../index)。此页面包含底层类的 API 文档。
 26 | 
 27 | ## 模型
 28 | 
 29 | 您可以自由创建和使用自己的模型为智能体提供支持。
 30 | 
 31 | 您可以使用任何 `model` 可调用对象作为智能体的模型，只要满足以下条件：
 32 | 1. 它遵循[消息格式](./chat_templating)（`List[Dict[str, str]]`），将其作为输入 `messages`，并返回一个 `str`。
 33 | 2. 它在生成的序列到达 `stop_sequences` 参数中指定的内容之前停止生成输出。
 34 | 
 35 | 要定义您的 LLM，可以创建一个 `custom_model` 方法，该方法接受一个 [messages](./chat_templating) 列表，并返回一个包含 `.content` 属性的对象，其中包含生成的文本。此可调用对象还需要接受一个 `stop_sequences` 参数，用于指示何时停止生成。
 36 | 
 37 | ```python
 38 | from huggingface_hub import login, InferenceClient
 39 | 
 40 | login("<YOUR_HUGGINGFACEHUB_API_TOKEN>")
 41 | 
 42 | model_id = "meta-llama/Llama-3.3-70B-Instruct"
 43 | 
 44 | client = InferenceClient(model=model_id)
 45 | 
 46 | def custom_model(messages, stop_sequences=["Task"]):
 47 |     response = client.chat_completion(messages, stop=stop_sequences, max_tokens=1000)
 48 |     answer = response.choices[0].message
 49 |     return answer
 50 | ```
 51 | 
 52 | 此外，`custom_model` 还可以接受一个 `grammar` 参数。如果在智能体初始化时指定了 `grammar`，则此参数将在调用模型时传递，以便进行[约束生成](https://huggingface.co/docs/text-generation-inference/conceptual/guidance)，从而强制生成格式正确的智能体输出。
 53 | 
 54 | ### TransformersModel
 55 | 
 56 | 为了方便起见，我们添加了一个 `TransformersModel`，该模型通过为初始化时指定的 `model_id` 构建一个本地 `transformers` pipeline 来实现上述功能。
 57 | 
 58 | ```python
 59 | from smolagents import TransformersModel
 60 | 
 61 | model = TransformersModel(model_id="HuggingFaceTB/SmolLM-135M-Instruct")
 62 | 
 63 | print(model([{"role": "user", "content": [{"type": "text", "text": "Ok!"}]}], stop_sequences=["great"]))
 64 | ```
 65 | ```text
 66 | >>> What a
 67 | ```
 68 | 
 69 | > [!TIP]
 70 | > 您必须在机器上安装 `transformers` 和 `torch`。如果尚未安装，请运行 `pip install smolagents[transformers]`。
 71 | 
 72 | [[autodoc]] TransformersModel
 73 | 
 74 | ### HfApiModel
 75 | 
 76 | `HfApiModel` 封装了 huggingface_hub 的 [InferenceClient](https://huggingface.co/docs/huggingface_hub/main/en/guides/inference)，用于执行 LLM。它支持 HF 的 [Inference API](https://huggingface.co/docs/api-inference/index) 以及 Hub 上所有可用的[Inference Providers](https://huggingface.co/blog/inference-providers)。
 77 | 
 78 | ```python
 79 | from smolagents import HfApiModel
 80 | 
 81 | messages = [
 82 |   {"role": "user", "content": [{"type": "text", "text": "Hello, how are you?"}]}
 83 | ]
 84 | 
 85 | model = HfApiModel()
 86 | print(model(messages))
 87 | ```
 88 | ```text
 89 | >>> Of course! If you change your mind, feel free to reach out. Take care!
 90 | ```
 91 | [[autodoc]] HfApiModel
 92 | 
 93 | ### LiteLLMModel
 94 | 
 95 | `LiteLLMModel` 利用 [LiteLLM](https://www.litellm.ai/) 支持来自不同提供商的 100+ 个 LLM。您可以在模型初始化时传递 `kwargs`，这些参数将在每次使用模型时被使用，例如下面的示例中传递了 `temperature`。
 96 | 
 97 | ```python
 98 | from smolagents import LiteLLMModel
 99 | 
100 | messages = [
101 |   {"role": "user", "content": [{"type": "text", "text": "Hello, how are you?"}]}
102 | ]
103 | 
104 | model = LiteLLMModel(model_id="anthropic/claude-3-5-sonnet-latest", temperature=0.2, max_tokens=10)
105 | print(model(messages))
106 | ```
107 | 
108 | [[autodoc]] LiteLLMModel
109 | 
110 | ### OpenAIServerModel
111 | 
112 | 此类允许您调用任何 OpenAIServer 兼容模型。
113 | 以下是设置方法（您可以自定义 `api_base` URL 指向其他服务器）：
114 | ```py
115 | import os
116 | from smolagents import OpenAIServerModel
117 | 
118 | model = OpenAIServerModel(
119 |     model_id="gpt-4o",
120 |     api_base="https://api.openai.com/v1",
121 |     api_key=os.environ["OPENAI_API_KEY"],
122 | )
123 | ```
124 | 
125 | [[autodoc]] OpenAIServerModel
126 | 
127 | ### AzureOpenAIServerModel
128 | 
129 | `AzureOpenAIServerModel` 允许您连接到任何 Azure OpenAI 部署。
130 | 
131 | 下面是设置示例，请注意，如果已经设置了相应的环境变量，您可以省略 `azure_endpoint`、`api_key` 和 `api_version` 参数——环境变量包括 `AZURE_OPENAI_ENDPOINT`、`AZURE_OPENAI_API_KEY` 和 `OPENAI_API_VERSION`。
132 | 
133 | 请注意，`OPENAI_API_VERSION` 没有 `AZURE_` 前缀，这是由于底层 [openai](https://github.com/openai/openai-python) 包的设计所致。
134 | 
135 | ```py
136 | import os
137 | 
138 | from smolagents import AzureOpenAIServerModel
139 | 
140 | model = AzureOpenAIServerModel(
141 |     model_id = os.environ.get("AZURE_OPENAI_MODEL"),
142 |     azure_endpoint=os.environ.get("AZURE_OPENAI_ENDPOINT"),
143 |     api_key=os.environ.get("AZURE_OPENAI_API_KEY"),
144 |     api_version=os.environ.get("OPENAI_API_VERSION")    
145 | )
146 | ```
147 | 
148 | [[autodoc]] AzureOpenAIServerModel
149 | 
150 | ### MLXModel
151 | 
152 | ```python
153 | from smolagents import MLXModel
154 | 
155 | model = MLXModel(model_id="HuggingFaceTB/SmolLM-135M-Instruct")
156 | 
157 | print(model([{"role": "user", "content": "Ok!"}], stop_sequences=["great"]))
158 | ```
159 | ```text
160 | >>> What a
161 | ```
162 | 
163 | > [!TIP]
164 | > 您必须在机器上安装 `mlx-lm`。如果尚未安装，请运行 `pip install smolagents[mlx-lm]`。
165 | 
166 | [[autodoc]] MLXModel
167 | 


--------------------------------------------------------------------------------
/docs/source/zh/conceptual_guides/intro_agents.mdx:
--------------------------------------------------------------------------------
  1 | <!--Copyright 2024 The HuggingFace Team. All rights reserved.
  2 | 
  3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
  4 | the License. You may obtain a copy of the License at
  5 | 
  6 | http://www.apache.org/licenses/LICENSE-2.0
  7 | 
  8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 10 | specific language governing permissions and limitations under the License.
 11 | 
 12 | ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
 13 | rendered properly in your Markdown viewer.
 14 | 
 15 | -->
 16 | 
 17 | # Agent 简介
 18 | 
 19 | > [!TIP]
 20 | > 译者注：Agent 的业内术语是“智能体”。本译文将保留 agent，不作翻译，以带来更高效的阅读体验。(在中文为主的文章中，It's easier to 注意到英文。Attention Is All You Need!)
 21 | 
 22 | ## 🤔 什么是 agent？
 23 | 
 24 | 任何使用 AI 的高效系统都需要为 LLM 提供某种访问现实世界的方式：例如调用搜索工具获取外部信息，或者操作某些程序以完成任务。换句话说，LLM 应该具有 **_Agent 能力_**。Agent 程序是 LLM 通往外部世界的门户。
 25 | 
 26 | > [!TIP]
 27 | > AI agent 是 **LLM 输出控制工作流的程序**。
 28 | 
 29 | 任何利用 LLM 的系统都会将 LLM 输出集成到代码中。LLM 输入对代码工作流的影响程度就是 LLM 在系统中的 agent 能力级别。
 30 | 
 31 | 请注意，根据这个定义，"Agent" 不是一个离散的、非 0 即 1 的定义：相反，"Agent 能力" 是一个连续谱系，随着你在工作流中给予 LLM 更多或更少的权力而变化。
 32 | 
 33 | 请参见下表中 agent 能力在不同系统中的变化：
 34 | 
 35 | | Agent 能力级别 | 描述                                           | 名称       | 示例模式                                           |
 36 | | ------------ | ---------------------------------------------- | ---------- | -------------------------------------------------- |
 37 | | ☆☆☆          | LLM 输出对程序流程没有影响                     | 简单处理器 | `process_llm_output(llm_response)`                 |
 38 | | ★☆☆          | LLM 输出决定 if/else 分支                      | 路由       | `if llm_decision(): path_a() else: path_b()`       |
 39 | | ★★☆          | LLM 输出决定函数执行                           | 工具调用者 | `run_function(llm_chosen_tool, llm_chosen_args)`   |
 40 | | ★★★          | LLM 输出控制迭代和程序继续                     | 多步 Agent | `while llm_should_continue(): execute_next_step()` |
 41 | | ★★★          | 一个 agent 工作流可以启动另一个 agent 工作流 | 多 Agent   | `if llm_trigger(): execute_agent()`                |
 42 | 
 43 | 多步 agent 具有以下代码结构：
 44 | 
 45 | ```python
 46 | memory = [user_defined_task]
 47 | while llm_should_continue(memory): # 这个循环是多步部分
 48 |     action = llm_get_next_action(memory) # 这是工具调用部分
 49 |     observations = execute_action(action)
 50 |     memory += [action, observations]
 51 | ```
 52 | 
 53 | 这个 agent 系统在一个循环中运行，每一步执行一个新动作（该动作可能涉及调用一些预定义的 *工具*，这些工具只是函数），直到其观察结果表明已达到解决给定任务的满意状态。以下是一个多步 agent 如何解决简单数学问题的示例：
 54 | 
 55 | <div class="flex justify-center">
 56 |     <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/Agent_ManimCE.gif"/>
 57 | </div>
 58 | 
 59 | ## ✅ 何时使用 agent / ⛔ 何时避免使用
 60 | 
 61 | 当你需要 LLM 确定应用程序的工作流时，agent 很有用。但它们通常有些过度。问题是：我真的需要工作流的灵活性来有效解决手头的任务吗？
 62 | 如果预定义的工作流经常不足，这意味着你需要更多的灵活性。
 63 | 让我们举个例子：假设你正在开发一个处理冲浪旅行网站客户请求的应用程序。
 64 | 
 65 | 你可以提前知道请求将属于 2 个类别之一（基于用户选择），并且你为这 2 种情况都有预定义的工作流。
 66 | 
 67 | 1. 想要了解旅行信息？⇒ 给他们访问搜索栏以搜索你的知识库
 68 | 2. 想与销售交谈？⇒ 让他们填写联系表单。
 69 | 
 70 | 如果这个确定性工作流适合所有查询，那就直接编码吧！这将为你提供一个 100% 可靠的系统，没有让不可预测的 LLM 干扰你的工作流而引入错误的风险。为了简单和稳健起见，建议规范化不使用任何 agent 行为。
 71 | 
 72 | 但如果工作流不能提前确定得那么好呢？
 73 | 
 74 | 例如，用户想问：`"I can come on Monday, but I forgot my passport so risk being delayed to Wednesday, is it possible to take me and my stuff to surf on Tuesday morning, with a cancellation insurance?"` 这个问题涉及许多因素，可能上述预定的标准都不足以满足这个请求。
 75 | 
 76 | 如果预定义的工作流经常不足，这意味着你需要更多的灵活性。
 77 | 
 78 | 这就是 agent 设置发挥作用的地方。
 79 | 
 80 | 在上面的例子中，你可以创建一个多步 agent，它可以访问天气 API 获取天气预报，Google Maps API 计算旅行距离，员工在线仪表板和你的知识库上的 RAG 系统。
 81 | 
 82 | 直到最近，计算机程序还局限于预定义的工作流，试图通过堆积 if/else 分支来处理复杂性。它们专注于极其狭窄的任务，如"计算这些数字的总和"或"找到这个图中的最短路径"。但实际上，大多数现实生活中的任务，如我们上面的旅行示例，都不适合预定义的工作流。agent 系统为程序打开了现实世界任务的大门！
 83 | 
 84 | ## 为什么选择 `smolagents`？
 85 | 
 86 | 对于一些低级的 agent 用例，如链或路由器，你可以自己编写所有代码。这样会更好，因为它可以让你更好地控制和理解你的系统。
 87 | 
 88 | 但一旦你开始追求更复杂的行为，比如让 LLM 调用函数（即"工具调用"）或让 LLM 运行 while 循环（"多步 agent"），一些抽象就变得必要：
 89 | 
 90 | - 对于工具调用，你需要解析 agent 的输出，因此这个输出需要一个预定义的格式，如"Thought: I should call tool 'get_weather'. Action: get_weather(Paris)."，你用预定义的函数解析它，并且给 LLM 的系统提示应该通知它这个格式。
 91 | - 对于 LLM 输出决定循环的多步 agent，你需要根据上次循环迭代中发生的情况给 LLM 不同的提示：所以你需要某种记忆能力。
 92 | 
 93 | 看到了吗？通过这两个例子，我们已经发现需要一些项目来帮助我们：
 94 | 
 95 | - 当然，一个作为系统引擎的 LLM
 96 | - agent 可以访问的工具列表
 97 | - 从 LLM 输出中提取工具调用的解析器
 98 | - 与解析器同步的系统提示
 99 | - 记忆能力
100 | 
101 | 但是等等，既然我们给 LLM 在决策中留出了空间，它们肯定会犯错误：所以我们需要错误日志记录和重试机制。
102 | 
103 | 所有这些元素都需要紧密耦合才能形成一个功能良好的系统。这就是为什么我们决定需要制作基本构建块来让所有这些东西协同工作。
104 | 
105 | ## 代码 agent
106 | 
107 | 在多步 agent 中，每一步 LLM 都可以编写一个动作，形式为调用外部工具。编写这些动作的常见格式（由 Anthropic、OpenAI 等使用）通常是"将动作编写为工具名称和要使用的参数的 JSON，然后解析以知道要执行哪个工具以及使用哪些参数"的不同变体。
108 | 
109 | [多项](https://huggingface.co/papers/2402.01030) [研究](https://huggingface.co/papers/2411.01747) [论文](https://huggingface.co/papers/2401.00812) 表明，在代码中进行工具调用的 LLM 要好得多。
110 | 
111 | 原因很简单，_我们专门设计了我们的代码语言，使其成为表达计算机执行动作的最佳方式_。如果 JSON 片段是更好的表达方式，JSON 将成为顶级编程语言，编程将变得非常困难。
112 | 
113 | 下图取自 [Executable Code Actions Elicit Better LLM Agents](https://huggingface.co/papers/2402.01030)，说明了用代码编写动作的一些优势：
114 | 
115 | <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/code_vs_json_actions.png">
116 | 
117 | 与 JSON 片段相比，用代码编写动作提供了更好的：
118 | 
119 | - **可组合性：** 你能像定义 python 函数一样，将 JSON 动作嵌套在一起，或定义一组 JSON 动作以供重用吗？
120 | - **对象管理：** 你如何在 JSON 中存储像 `generate_image` 这样的动作的输出？
121 | - **通用性：** 代码被构建为简单地表达任何你可以让计算机做的事情。
122 | - **LLM 训练数据中的表示：** 大量高质量的代码动作已经包含在 LLM 的训练数据中，这意味着它们已经为此进行了训练！
123 | 


--------------------------------------------------------------------------------
/tests/test_gradio_ui.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2024 HuggingFace Inc.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | import os
 17 | import shutil
 18 | import tempfile
 19 | import unittest
 20 | from unittest.mock import Mock, patch
 21 | 
 22 | from smolagents.gradio_ui import GradioUI
 23 | 
 24 | 
 25 | class GradioUITester(unittest.TestCase):
 26 |     def setUp(self):
 27 |         """Initialize test environment"""
 28 |         self.temp_dir = tempfile.mkdtemp()
 29 |         self.mock_agent = Mock()
 30 |         self.ui = GradioUI(agent=self.mock_agent, file_upload_folder=self.temp_dir)
 31 |         self.allowed_types = [".pdf", ".docx", ".txt"]
 32 | 
 33 |     def tearDown(self):
 34 |         """Clean up test environment"""
 35 |         shutil.rmtree(self.temp_dir)
 36 | 
 37 |     def test_upload_file_default_types(self):
 38 |         """Test default allowed file types"""
 39 |         default_types = [".pdf", ".docx", ".txt"]
 40 |         for file_type in default_types:
 41 |             with tempfile.NamedTemporaryFile(suffix=file_type) as temp_file:
 42 |                 mock_file = Mock()
 43 |                 mock_file.name = temp_file.name
 44 | 
 45 |                 textbox, uploads_log = self.ui.upload_file(mock_file, [])
 46 | 
 47 |                 self.assertIn("File uploaded:", textbox.value)
 48 |                 self.assertEqual(len(uploads_log), 1)
 49 |                 self.assertTrue(os.path.exists(os.path.join(self.temp_dir, os.path.basename(temp_file.name))))
 50 | 
 51 |     def test_upload_file_default_types_disallowed(self):
 52 |         """Test default disallowed file types"""
 53 |         disallowed_types = [".exe", ".sh", ".py", ".jpg"]
 54 |         for file_type in disallowed_types:
 55 |             with tempfile.NamedTemporaryFile(suffix=file_type) as temp_file:
 56 |                 mock_file = Mock()
 57 |                 mock_file.name = temp_file.name
 58 | 
 59 |                 textbox, uploads_log = self.ui.upload_file(mock_file, [])
 60 | 
 61 |                 self.assertEqual(textbox.value, "File type disallowed")
 62 |                 self.assertEqual(len(uploads_log), 0)
 63 | 
 64 |     def test_upload_file_success(self):
 65 |         """Test successful file upload scenario"""
 66 |         with tempfile.NamedTemporaryFile(suffix=".txt") as temp_file:
 67 |             mock_file = Mock()
 68 |             mock_file.name = temp_file.name
 69 | 
 70 |             textbox, uploads_log = self.ui.upload_file(mock_file, [])
 71 | 
 72 |             self.assertIn("File uploaded:", textbox.value)
 73 |             self.assertEqual(len(uploads_log), 1)
 74 |             self.assertTrue(os.path.exists(os.path.join(self.temp_dir, os.path.basename(temp_file.name))))
 75 |             self.assertEqual(uploads_log[0], os.path.join(self.temp_dir, os.path.basename(temp_file.name)))
 76 | 
 77 |     def test_upload_file_none(self):
 78 |         """Test scenario when no file is selected"""
 79 |         textbox, uploads_log = self.ui.upload_file(None, [])
 80 | 
 81 |         self.assertEqual(textbox.value, "No file uploaded")
 82 |         self.assertEqual(len(uploads_log), 0)
 83 | 
 84 |     def test_upload_file_invalid_type(self):
 85 |         """Test disallowed file type"""
 86 |         with tempfile.NamedTemporaryFile(suffix=".exe") as temp_file:
 87 |             mock_file = Mock()
 88 |             mock_file.name = temp_file.name
 89 | 
 90 |             textbox, uploads_log = self.ui.upload_file(mock_file, [])
 91 | 
 92 |             self.assertEqual(textbox.value, "File type disallowed")
 93 |             self.assertEqual(len(uploads_log), 0)
 94 | 
 95 |     def test_upload_file_special_chars(self):
 96 |         """Test scenario with special characters in filename"""
 97 |         with tempfile.NamedTemporaryFile(suffix=".txt") as temp_file:
 98 |             # Create a new temporary file with special characters
 99 |             special_char_name = os.path.join(os.path.dirname(temp_file.name), "test@#$%^&*.txt")
100 |             shutil.copy(temp_file.name, special_char_name)
101 |             try:
102 |                 mock_file = Mock()
103 |                 mock_file.name = special_char_name
104 | 
105 |                 with patch("shutil.copy"):
106 |                     textbox, uploads_log = self.ui.upload_file(mock_file, [])
107 | 
108 |                     self.assertIn("File uploaded:", textbox.value)
109 |                     self.assertEqual(len(uploads_log), 1)
110 |                     self.assertIn("test_____", uploads_log[0])
111 |             finally:
112 |                 # Clean up the special character file
113 |                 if os.path.exists(special_char_name):
114 |                     os.remove(special_char_name)
115 | 
116 |     def test_upload_file_custom_types(self):
117 |         """Test custom allowed file types"""
118 |         with tempfile.NamedTemporaryFile(suffix=".csv") as temp_file:
119 |             mock_file = Mock()
120 |             mock_file.name = temp_file.name
121 | 
122 |             textbox, uploads_log = self.ui.upload_file(mock_file, [], allowed_file_types=[".csv"])
123 | 
124 |             self.assertIn("File uploaded:", textbox.value)
125 |             self.assertEqual(len(uploads_log), 1)
126 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
  1 | <!---
  2 | Copyright 2025 The HuggingFace Team. All rights reserved.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 |     http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | -->
 16 | 
 17 | # Contribute to smolagents
 18 | 
 19 | Everyone is welcome to contribute, and we value everybody's contribution. Code
 20 | contributions are not the only way to help the community. Answering questions, helping
 21 | others, and improving the documentation are also immensely valuable.
 22 | 
 23 | It also helps us if you spread the word! Reference the library in blog posts
 24 | about the awesome projects it made possible, shout out on Twitter every time it has
 25 | helped you, or simply ⭐️ the repository to say thank you.
 26 | 
 27 | However you choose to contribute, please be mindful and respect our
 28 | [code of conduct](https://github.com/huggingface/smolagents/blob/main/CODE_OF_CONDUCT.md).
 29 | 
 30 | **This guide was heavily inspired by the awesome [scikit-learn guide to contributing](https://github.com/scikit-learn/scikit-learn/blob/main/CONTRIBUTING.md).**
 31 | 
 32 | ## Ways to contribute
 33 | 
 34 | There are several ways you can contribute to smolagents.
 35 | 
 36 | * Submit issues related to bugs or desired new features.
 37 | * Contribute to the examples or to the documentation.
 38 | * Fix outstanding issues with the existing code.
 39 | 
 40 | > All contributions are equally valuable to the community. 🥰
 41 | 
 42 | ## Submitting a bug-related issue or feature request
 43 | 
 44 | At any moment, feel welcome to open an issue, citing your exact error traces and package versions if it's a bug.
 45 | It's often even better to open a PR with your proposed fixes/changes!
 46 | 
 47 | Do your best to follow these guidelines when submitting a bug-related issue or a feature
 48 | request. It will make it easier for us to come back to you quickly and with good
 49 | feedback.
 50 | 
 51 | ### Did you find a bug?
 52 | 
 53 | The smolagents library is robust and reliable thanks to users who report the problems they encounter.
 54 | 
 55 | Before you report an issue, we would really appreciate it if you could **make sure the bug was not
 56 | already reported** (use the search bar on GitHub under Issues). Your issue should also be related to bugs in the 
 57 | library itself, and not your code. 
 58 | 
 59 | Once you've confirmed the bug hasn't already been reported, please include the following information in your issue so 
 60 | we can quickly resolve it:
 61 | 
 62 | * Your **OS type and version**, as well as your environment versions (versions of rust, python, and dependencies).
 63 | * A short, self-contained, code snippet that allows us to reproduce the bug.
 64 | * The *full* traceback if an exception is raised.
 65 | * Attach any other additional information, like screenshots, you think may help.
 66 | 
 67 | ### Do you want a new feature?
 68 | 
 69 | If there is a new feature you'd like to see in smolagents, please open an issue and describe:
 70 | 
 71 | 1. What is the *motivation* behind this feature? Is it related to a problem or frustration with the library? Is it 
 72 |    a feature related to something you need for a project? Is it something you worked on and think it could benefit 
 73 |    the community?
 74 | 
 75 |    Whatever it is, we'd love to hear about it!
 76 | 
 77 | 2. Describe your requested feature in as much detail as possible. The more you can tell us about it, the better 
 78 |    we'll be able to help you.
 79 | 3. Provide a *code snippet* that demonstrates the feature's usage.
 80 | 4. If the feature is related to a paper, please include a link.
 81 | 
 82 | If your issue is well written we're already 80% of the way there by the time you create it.
 83 | 
 84 | ## Do you want to add documentation?
 85 | 
 86 | We're always looking for improvements to the documentation that make it more clear and accurate. Please let us know 
 87 | how the documentation can be improved such as typos and any content that is missing, unclear or inaccurate. We'll be 
 88 | happy to make the changes or help you make a contribution if you're interested!
 89 | 
 90 | ## Fixing outstanding issues
 91 | 
 92 | If you notice an issue with the existing code and have a fix in mind, feel free to [start contributing](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request) and open
 93 | a Pull Request!
 94 | 
 95 | ### Making code changes
 96 | 
 97 | To install dev dependencies, run:
 98 | ```
 99 | pip install -e ".[dev]"
100 | ```
101 | 
102 | When making changes to the codebase, please check that it follows the repo's code quality requirements by running:
103 | To check code quality of the source code:
104 | ```
105 | make quality
106 | ```
107 | 
108 | If the checks fail, you can run the formatter with:
109 | ```
110 | make style
111 | ```
112 | 
113 | And commit the changes.
114 | 
115 | To run tests locally, run this command:
116 | ```bash
117 | make test
118 | ```
119 | </details>
120 | 
121 | ## I want to become a maintainer of the project. How do I get there?
122 | 
123 | smolagents is a project led and managed by Hugging Face. We are more than
124 | happy to have motivated individuals from other organizations join us as maintainers with the goal of helping smolagents
125 | make a dent in the world of Agents.
126 | 
127 | If you are such an individual (or organization), please reach out to us and let's collaborate.
128 | 


--------------------------------------------------------------------------------
/docs/source/zh/examples/rag.mdx:
--------------------------------------------------------------------------------
  1 | <!--Copyright 2024 The HuggingFace Team. All rights reserved.
  2 | 
  3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
  4 | the License. You may obtain a copy of the License at
  5 | 
  6 | http://www.apache.org/licenses/LICENSE-2.0
  7 | 
  8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 10 | specific language governing permissions and limitations under the License.
 11 | 
 12 | ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
 13 | rendered properly in your Markdown viewer.
 14 | 
 15 | -->
 16 | # Agentic RAG
 17 | 
 18 | [[open-in-colab]]
 19 | 
 20 | Retrieval-Augmented-Generation (RAG) 是“使用大语言模型（LLM）来回答用户查询，但基于从知识库中检索的信息”。它比使用普通或微调的 LLM 具有许多优势：举几个例子，它允许将答案基于真实事实并减少虚构；它允许提供 LLM 领域特定的知识；并允许对知识库中的信息访问进行精细控制。
 21 | 
 22 | 但是，普通的 RAG 存在一些局限性，以下两点尤为突出：
 23 | 
 24 | - 它只执行一次检索步骤：如果结果不好，生成的内容也会不好。
 25 | - 语义相似性是以用户查询为参考计算的，这可能不是最优的：例如，用户查询通常是一个问题，而包含真实答案的文档通常是肯定语态，因此其相似性得分会比其他以疑问形式呈现的源文档低，从而导致错失相关信息的风险。
 26 | 
 27 | 我们可以通过制作一个 RAG  agent来缓解这些问题：非常简单，一个配备了检索工具的agent！这个 agent 将
 28 | 会：✅ 自己构建查询和检索，✅ 如果需要的话会重新检索。
 29 | 
 30 | 因此，它将比普通 RAG 更智能，因为它可以自己构建查询，而不是直接使用用户查询作为参考。这样，它可以更
 31 | 接近目标文档，从而提高检索的准确性， [HyDE](https://huggingface.co/papers/2212.10496)。此 agent 可以
 32 | 使用生成的片段，并在需要时重新检索，就像 [Self-Query](https://docs.llamaindex.ai/en/stable/examples/evaluation/RetryQuery/)。
 33 | 
 34 | 我们现在开始构建这个系统. 🛠️
 35 | 
 36 | 运行以下代码以安装所需的依赖包：
 37 | ```bash
 38 | !pip install smolagents pandas langchain langchain-community sentence-transformers rank_bm25 --upgrade -q
 39 | ```
 40 | 
 41 | 你需要一个有效的 token 作为环境变量 `HF_TOKEN` 来调用 HF Inference API。我们使用 python-dotenv 来加载它。
 42 | ```py
 43 | from dotenv import load_dotenv
 44 | load_dotenv()
 45 | ```
 46 | 
 47 | 我们首先加载一个知识库以在其上执行 RAG：此数据集是许多 Hugging Face 库的文档页面的汇编，存储为 markdown 格式。我们将仅保留 `transformers` 库的文档。然后通过处理数据集并将其存储到向量数据库中，为检索器准备知识库。我们将使用 [LangChain](https://python.langchain.com/docs/introduction/) 来利用其出色的向量数据库工具。
 48 | ```py
 49 | import datasets
 50 | from langchain.docstore.document import Document
 51 | from langchain.text_splitter import RecursiveCharacterTextSplitter
 52 | from langchain_community.retrievers import BM25Retriever
 53 | 
 54 | knowledge_base = datasets.load_dataset("m-ric/huggingface_doc", split="train")
 55 | knowledge_base = knowledge_base.filter(lambda row: row["source"].startswith("huggingface/transformers"))
 56 | 
 57 | source_docs = [
 58 |     Document(page_content=doc["text"], metadata={"source": doc["source"].split("/")[1]})
 59 |     for doc in knowledge_base
 60 | ]
 61 | 
 62 | text_splitter = RecursiveCharacterTextSplitter(
 63 |     chunk_size=500,
 64 |     chunk_overlap=50,
 65 |     add_start_index=True,
 66 |     strip_whitespace=True,
 67 |     separators=["\n\n", "\n", ".", " ", ""],
 68 | )
 69 | docs_processed = text_splitter.split_documents(source_docs)
 70 | ```
 71 | 
 72 | 现在文档已准备好。我们来一起构建我们的 agent RAG 系统！
 73 | 👉 我们只需要一个 RetrieverTool，我们的 agent 可以利用它从知识库中检索信息。
 74 | 
 75 | 由于我们需要将 vectordb 添加为工具的属性，我们不能简单地使用带有 `@tool` 装饰器的简单工具构造函数：因此我们将遵循 [tools 教程](../tutorials/tools) 中突出显示的高级设置。
 76 | 
 77 | ```py
 78 | from smolagents import Tool
 79 | 
 80 | class RetrieverTool(Tool):
 81 |     name = "retriever"
 82 |     description = "Uses semantic search to retrieve the parts of transformers documentation that could be most relevant to answer your query."
 83 |     inputs = {
 84 |         "query": {
 85 |             "type": "string",
 86 |             "description": "The query to perform. This should be semantically close to your target documents. Use the affirmative form rather than a question.",
 87 |         }
 88 |     }
 89 |     output_type = "string"
 90 | 
 91 |     def __init__(self, docs, **kwargs):
 92 |         super().__init__(**kwargs)
 93 |         self.retriever = BM25Retriever.from_documents(
 94 |             docs, k=10
 95 |         )
 96 | 
 97 |     def forward(self, query: str) -> str:
 98 |         assert isinstance(query, str), "Your search query must be a string"
 99 | 
100 |         docs = self.retriever.invoke(
101 |             query,
102 |         )
103 |         return "\nRetrieved documents:\n" + "".join(
104 |             [
105 |                 f"\n\n===== Document {str(i)} =====\n" + doc.page_content
106 |                 for i, doc in enumerate(docs)
107 |             ]
108 |         )
109 | 
110 | retriever_tool = RetrieverTool(docs_processed)
111 | ```
112 | BM25 检索方法是一个经典的检索方法，因为它的设置速度非常快。为了提高检索准确性，你可以使用语义搜索，使用文档的向量表示替换 BM25：因此你可以前往 [MTEB Leaderboard](https://huggingface.co/spaces/mteb/leaderboard) 选择一个好的嵌入模型。
113 | 
114 | 现在我们已经创建了一个可以从知识库中检索信息的工具，现在我们可以很容易地创建一个利用这个
115 | `retriever_tool` 的 agent！此 agent 将使用如下参数初始化：
116 | - `tools`：代理将能够调用的工具列表。
117 | - `model`：为代理提供动力的 LLM。
118 | 
119 | 我们的 `model` 必须是一个可调用对象，它接受一个消息的 list 作为输入，并返回文本。它还需要接受一个 stop_sequences 参数，指示何时停止生成。为了方便起见，我们直接使用包中提供的 `HfEngine` 类来获取调用 Hugging Face 的 Inference API 的 LLM 引擎。
120 | 
121 | 接着，我们将使用 [meta-llama/Llama-3.3-70B-Instruct](meta-llama/Llama-3.3-70B-Instruct) 作为 llm 引
122 | 擎，因为：
123 | - 它有一个长 128k 上下文，这对处理长源文档很有用。
124 | - 它在 HF 的 Inference API 上始终免费提供！
125 | 
126 | _Note:_ 此 Inference API 托管基于各种标准的模型，部署的模型可能会在没有事先通知的情况下进行更新或替换。了解更多信息，请点击[这里](https://huggingface.co/docs/api-inference/supported-models)。
127 | 
128 | ```py
129 | from smolagents import HfApiModel, CodeAgent
130 | 
131 | agent = CodeAgent(
132 |     tools=[retriever_tool], model=HfApiModel(model_id="meta-llama/Llama-3.3-70B-Instruct"), max_steps=4, verbose=True
133 | )
134 | ```
135 | 
136 | 当我们初始化 CodeAgent 时，它已经自动获得了一个默认的系统提示，告诉 LLM 引擎按步骤处理并生成工具调用作为代码片段，但你可以根据需要替换此提示模板。接着，当其 `.run()` 方法被调用时，代理将负责调用 LLM 引擎，并在循环中执行工具调用，直到工具 `final_answer` 被调用，而其参数为最终答案。
137 | 
138 | ```py
139 | agent_output = agent.run("For a transformers model training, which is slower, the forward or the backward pass?")
140 | 
141 | print("Final output:")
142 | print(agent_output)
143 | ```
144 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # Contributor Covenant Code of Conduct
  3 | 
  4 | ## Our Pledge
  5 | 
  6 | We as members, contributors, and leaders pledge to make participation in our
  7 | community a harassment-free experience for everyone, regardless of age, body
  8 | size, visible or invisible disability, ethnicity, sex characteristics, gender
  9 | identity and expression, level of experience, education, socio-economic status,
 10 | nationality, personal appearance, race, caste, color, religion, or sexual
 11 | identity and orientation.
 12 | 
 13 | We pledge to act and interact in ways that contribute to an open, welcoming,
 14 | diverse, inclusive, and healthy community.
 15 | 
 16 | ## Our Standards
 17 | 
 18 | Examples of behavior that contributes to a positive environment for our
 19 | community include:
 20 | 
 21 | * Demonstrating empathy and kindness toward other people
 22 | * Being respectful of differing opinions, viewpoints, and experiences
 23 | * Giving and gracefully accepting constructive feedback
 24 | * Accepting responsibility and apologizing to those affected by our mistakes,
 25 |   and learning from the experience
 26 | * Focusing on what is best not just for us as individuals, but for the overall
 27 |   community
 28 | 
 29 | Examples of unacceptable behavior include:
 30 | 
 31 | * The use of sexualized language or imagery, and sexual attention or advances of
 32 |   any kind
 33 | * Trolling, insulting or derogatory comments, and personal or political attacks
 34 | * Public or private harassment
 35 | * Publishing others' private information, such as a physical or email address,
 36 |   without their explicit permission
 37 | * Other conduct which could reasonably be considered inappropriate in a
 38 |   professional setting
 39 | 
 40 | ## Enforcement Responsibilities
 41 | 
 42 | Community leaders are responsible for clarifying and enforcing our standards of
 43 | acceptable behavior and will take appropriate and fair corrective action in
 44 | response to any behavior that they deem inappropriate, threatening, offensive,
 45 | or harmful.
 46 | 
 47 | Community leaders have the right and responsibility to remove, edit, or reject
 48 | comments, commits, code, wiki edits, issues, and other contributions that are
 49 | not aligned to this Code of Conduct, and will communicate reasons for moderation
 50 | decisions when appropriate.
 51 | 
 52 | ## Scope
 53 | 
 54 | This Code of Conduct applies within all community spaces, and also applies when
 55 | an individual is officially representing the community in public spaces.
 56 | Examples of representing our community include using an official e-mail address,
 57 | posting via an official social media account, or acting as an appointed
 58 | representative at an online or offline event.
 59 | 
 60 | ## Enforcement
 61 | 
 62 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
 63 | reported to the community leaders responsible for enforcement at
 64 | feedback@huggingface.co.
 65 | All complaints will be reviewed and investigated promptly and fairly.
 66 | 
 67 | All community leaders are obligated to respect the privacy and security of the
 68 | reporter of any incident.
 69 | 
 70 | ## Enforcement Guidelines
 71 | 
 72 | Community leaders will follow these Community Impact Guidelines in determining
 73 | the consequences for any action they deem in violation of this Code of Conduct:
 74 | 
 75 | ### 1. Correction
 76 | 
 77 | **Community Impact**: Use of inappropriate language or other behavior deemed
 78 | unprofessional or unwelcome in the community.
 79 | 
 80 | **Consequence**: A private, written warning from community leaders, providing
 81 | clarity around the nature of the violation and an explanation of why the
 82 | behavior was inappropriate. A public apology may be requested.
 83 | 
 84 | ### 2. Warning
 85 | 
 86 | **Community Impact**: A violation through a single incident or series of
 87 | actions.
 88 | 
 89 | **Consequence**: A warning with consequences for continued behavior. No
 90 | interaction with the people involved, including unsolicited interaction with
 91 | those enforcing the Code of Conduct, for a specified period of time. This
 92 | includes avoiding interactions in community spaces as well as external channels
 93 | like social media. Violating these terms may lead to a temporary or permanent
 94 | ban.
 95 | 
 96 | ### 3. Temporary Ban
 97 | 
 98 | **Community Impact**: A serious violation of community standards, including
 99 | sustained inappropriate behavior.
100 | 
101 | **Consequence**: A temporary ban from any sort of interaction or public
102 | communication with the community for a specified period of time. No public or
103 | private interaction with the people involved, including unsolicited interaction
104 | with those enforcing the Code of Conduct, is allowed during this period.
105 | Violating these terms may lead to a permanent ban.
106 | 
107 | ### 4. Permanent Ban
108 | 
109 | **Community Impact**: Demonstrating a pattern of violation of community
110 | standards, including sustained inappropriate behavior, harassment of an
111 | individual, or aggression toward or disparagement of classes of individuals.
112 | 
113 | **Consequence**: A permanent ban from any sort of public interaction within the
114 | community.
115 | 
116 | ## Attribution
117 | 
118 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
119 | version 2.1, available at
120 | [https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
121 | 
122 | Community Impact Guidelines were inspired by
123 | [Mozilla's code of conduct enforcement ladder][Mozilla CoC].
124 | 
125 | For answers to common questions about this code of conduct, see the FAQ at
126 | [https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
127 | [https://www.contributor-covenant.org/translations][translations].
128 | 
129 | [homepage]: https://www.contributor-covenant.org
130 | [v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
131 | [Mozilla CoC]: https://github.com/mozilla/diversity
132 | [FAQ]: https://www.contributor-covenant.org/faq
133 | [translations]: https://www.contributor-covenant.org/translations


--------------------------------------------------------------------------------
/docs/source/en/tutorials/memory.mdx:
--------------------------------------------------------------------------------
  1 | <!--Copyright 2024 The HuggingFace Team. All rights reserved.
  2 | 
  3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
  4 | the License. You may obtain a copy of the License at
  5 | 
  6 | http://www.apache.org/licenses/LICENSE-2.0
  7 | 
  8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 10 | specific language governing permissions and limitations under the License.
 11 | 
 12 | ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
 13 | rendered properly in your Markdown viewer.
 14 | 
 15 | -->
 16 | # 📚 Manage your agent's memory
 17 | 
 18 | [[open-in-colab]]
 19 | 
 20 | In the end, an agent can be defined by simple components: it has tools, prompts.
 21 | And most importantly, it has a memory of past steps, drawing a history of planning, execution, and errors.
 22 | 
 23 | ### Replay your agent's memory
 24 | 
 25 | We propose several features to inspect a past agent run.
 26 | 
 27 | You can instrument the agent's run to display it in a great UI that lets you zoom in/out on specific steps, as highlighted in the [instrumentation guide](./inspect_runs).
 28 | 
 29 | You can also use `agent.replay()`, as follows:
 30 | 
 31 | After the agent has run:
 32 | ```py
 33 | from smolagents import HfApiModel, CodeAgent
 34 | 
 35 | agent = CodeAgent(tools=[], model=HfApiModel(), verbosity_level=0)
 36 | 
 37 | result = agent.run("What's the 20th Fibonacci number?")
 38 | ```
 39 | 
 40 | If you want to replay this last run, just use:
 41 | ```py
 42 | agent.replay()
 43 | ```
 44 | 
 45 | ### Dynamically change the agent's memory
 46 | 
 47 | Many advanced use cases require dynamic modification of the agent's memory.
 48 | 
 49 | You can access the agent's memory using:
 50 | 
 51 | ```py
 52 | from smolagents import ActionStep
 53 | 
 54 | system_prompt_step = agent.memory.system_prompt
 55 | print("The system prompt given to the agent was:")
 56 | print(system_prompt_step.system_prompt)
 57 | 
 58 | task_step = agent.memory.steps[0]
 59 | print("\n\nThe first task step was:")
 60 | print(task_step.task)
 61 | 
 62 | for step in agent.memory.steps:
 63 |     if isinstance(step, ActionStep):
 64 |         if step.error is not None:
 65 |             print(f"\nStep {step.step_number} got this error:\n{step.error}\n")
 66 |         else:
 67 |             print(f"\nStep {step.step_number} got these observations:\n{step.observations}\n")
 68 | ```
 69 | 
 70 | Use `agent.memory.get_full_steps()` to get full steps as dictionaries.
 71 | 
 72 | You can also use step callbacks to dynamically change the agent's memory.
 73 | 
 74 | Step callbacks can access the `agent` itself in their arguments, so they can access any memory step as highlighted above, and change it if needed. For instance, let's say you are observing screenshots of each step performed by a web browser agent. You want to log the newest screenshot, and remove the images from ancient steps to save on token costs.
 75 | 
 76 | You culd run something like the following.
 77 | _Note: this code is incomplete, some imports and object definitions have been removed for the sake of concision, visit [the original script](https://github.com/huggingface/smolagents/blob/main/src/smolagents/vision_web_browser.py) to get the full working code._
 78 | 
 79 | ```py
 80 | import helium
 81 | from PIL import Image
 82 | from io import BytesIO
 83 | from time import sleep
 84 | 
 85 | def update_screenshot(memory_step: ActionStep, agent: CodeAgent) -> None:
 86 |     sleep(1.0)  # Let JavaScript animations happen before taking the screenshot
 87 |     driver = helium.get_driver()
 88 |     latest_step = memory_step.step_number
 89 |     for previous_memory_step in agent.memory.steps:  # Remove previous screenshots from logs for lean processing
 90 |         if isinstance(previous_memory_step, ActionStep) and previous_memory_step.step_number <= latest_step - 2:
 91 |             previous_memory_step.observations_images = None
 92 |     png_bytes = driver.get_screenshot_as_png()
 93 |     image = Image.open(BytesIO(png_bytes))
 94 |     memory_step.observations_images = [image.copy()]
 95 | ```
 96 | 
 97 | Then you should pass this function in the `step_callbacks` argument upon initialization of your agent:
 98 | 
 99 | ```py
100 | CodeAgent(
101 |     tools=[DuckDuckGoSearchTool(), go_back, close_popups, search_item_ctrl_f],
102 |     model=model,
103 |     additional_authorized_imports=["helium"],
104 |     step_callbacks=[update_screenshot],
105 |     max_steps=20,
106 |     verbosity_level=2,
107 | )
108 | ```
109 | 
110 | Head to our [vision web browser code](https://github.com/huggingface/smolagents/blob/main/src/smolagents/vision_web_browser.py) to see the full working example.
111 | 
112 | ### Run agents one step at a time
113 | 
114 | This can be useful in case you have tool calls that take days: you can just run your agents step by step.
115 | This will also let you update the memory on each step.
116 | 
117 | ```py
118 | from smolagents import HfApiModel, CodeAgent, ActionStep, TaskStep
119 | 
120 | agent = CodeAgent(tools=[], model=HfApiModel(), verbosity_level=1)
121 | print(agent.memory.system_prompt)
122 | 
123 | task = "What is the 20th Fibonacci number?"
124 | 
125 | # You could modify the memory as needed here by inputting the memory of another agent.
126 | # agent.memory.steps = previous_agent.memory.steps
127 | 
128 | # Let's start a new task!
129 | agent.memory.steps.append(TaskStep(task=task, task_images=[]))
130 | 
131 | final_answer = None
132 | step_number = 1
133 | while final_answer is None and step_number <= 10:
134 |     memory_step = ActionStep(
135 |         step_number=step_number,
136 |         observations_images=[],
137 |     )
138 |     # Run one step.
139 |     final_answer = agent.step(memory_step)
140 |     agent.memory.steps.append(memory_step)
141 |     step_number += 1
142 | 
143 |     # Change the memory as you please!
144 |     # For instance to update the latest step:
145 |     # agent.memory.steps[-1] = ...
146 | 
147 | print("The final answer is:", final_answer)
148 | ```


--------------------------------------------------------------------------------
/docs/source/zh/examples/text_to_sql.mdx:
--------------------------------------------------------------------------------
  1 | <!--Copyright 2024 The HuggingFace Team. All rights reserved.
  2 | 
  3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
  4 | the License. You may obtain a copy of the License at
  5 | 
  6 | http://www.apache.org/licenses/LICENSE-2.0
  7 | 
  8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 10 | specific language governing permissions and limitations under the License.
 11 | 
 12 | ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
 13 | rendered properly in your Markdown viewer.
 14 | 
 15 | -->
 16 | # Text-to-SQL
 17 | 
 18 | [[open-in-colab]]
 19 | 
 20 | 在此教程中，我们将看到如何使用 `smolagents` 实现一个利用 SQL 的 agent。
 21 | 
 22 | > 让我们从经典问题开始：为什么不简单地使用标准的 text-to-SQL pipeline 呢？
 23 | 
 24 | 标准的 text-to-SQL pipeline 很脆弱，因为生成的 SQL 查询可能会出错。更糟糕的是，查询可能出错却不引发错误警报，从而返回一些不正确或无用的结果。
 25 | 
 26 | 👉 相反，agent 系统则可以检视输出结果并决定查询是否需要被更改，因此带来巨大的性能提升。
 27 | 
 28 | 让我们来一起构建这个 agent! 💪
 29 | 
 30 | 首先，我们构建一个 SQL 的环境：
 31 | ```py
 32 | from sqlalchemy import (
 33 |     create_engine,
 34 |     MetaData,
 35 |     Table,
 36 |     Column,
 37 |     String,
 38 |     Integer,
 39 |     Float,
 40 |     insert,
 41 |     inspect,
 42 |     text,
 43 | )
 44 | 
 45 | engine = create_engine("sqlite:///:memory:")
 46 | metadata_obj = MetaData()
 47 | 
 48 | # create city SQL table
 49 | table_name = "receipts"
 50 | receipts = Table(
 51 |     table_name,
 52 |     metadata_obj,
 53 |     Column("receipt_id", Integer, primary_key=True),
 54 |     Column("customer_name", String(16), primary_key=True),
 55 |     Column("price", Float),
 56 |     Column("tip", Float),
 57 | )
 58 | metadata_obj.create_all(engine)
 59 | 
 60 | rows = [
 61 |     {"receipt_id": 1, "customer_name": "Alan Payne", "price": 12.06, "tip": 1.20},
 62 |     {"receipt_id": 2, "customer_name": "Alex Mason", "price": 23.86, "tip": 0.24},
 63 |     {"receipt_id": 3, "customer_name": "Woodrow Wilson", "price": 53.43, "tip": 5.43},
 64 |     {"receipt_id": 4, "customer_name": "Margaret James", "price": 21.11, "tip": 1.00},
 65 | ]
 66 | for row in rows:
 67 |     stmt = insert(receipts).values(**row)
 68 |     with engine.begin() as connection:
 69 |         cursor = connection.execute(stmt)
 70 | ```
 71 | 
 72 | ### 构建 agent
 73 | 
 74 | 现在，我们构建一个 agent，它将使用 SQL 查询来回答问题。工具的 description 属性将被 agent 系统嵌入到 LLM 的提示中：它为 LLM 提供有关如何使用该工具的信息。这正是我们描述 SQL 表的地方。
 75 | 
 76 | ```py
 77 | inspector = inspect(engine)
 78 | columns_info = [(col["name"], col["type"]) for col in inspector.get_columns("receipts")]
 79 | 
 80 | table_description = "Columns:\n" + "\n".join([f"  - {name}: {col_type}" for name, col_type in columns_info])
 81 | print(table_description)
 82 | ```
 83 | 
 84 | ```text
 85 | Columns:
 86 |   - receipt_id: INTEGER
 87 |   - customer_name: VARCHAR(16)
 88 |   - price: FLOAT
 89 |   - tip: FLOAT
 90 | ```
 91 | 
 92 | 现在让我们构建我们的工具。它需要以下内容：（更多细节请参阅[工具文档](../tutorials/tools)）
 93 | 
 94 | - 一个带有 `Args:` 部分列出参数的 docstring。
 95 | - 输入和输出的type hints。
 96 | 
 97 | ```py
 98 | from smolagents import tool
 99 | 
100 | @tool
101 | def sql_engine(query: str) -> str:
102 |     """
103 |     Allows you to perform SQL queries on the table. Returns a string representation of the result.
104 |     The table is named 'receipts'. Its description is as follows:
105 |         Columns:
106 |         - receipt_id: INTEGER
107 |         - customer_name: VARCHAR(16)
108 |         - price: FLOAT
109 |         - tip: FLOAT
110 | 
111 |     Args:
112 |         query: The query to perform. This should be correct SQL.
113 |     """
114 |     output = ""
115 |     with engine.connect() as con:
116 |         rows = con.execute(text(query))
117 |         for row in rows:
118 |             output += "\n" + str(row)
119 |     return output
120 | ```
121 | 
122 | 我们现在使用这个工具来创建一个 agent。我们使用 `CodeAgent`，这是 smolagent 的主要 agent 类：一个在代码中编写操作并根据 ReAct 框架迭代先前输出的 agent。
123 | 
124 | 这个模型是驱动 agent 系统的 LLM。`HfApiModel` 允许你使用 HF  Inference API 调用 LLM，无论是通过 Serverless 还是 Dedicated endpoint，但你也可以使用任何专有 API。
125 | 
126 | ```py
127 | from smolagents import CodeAgent, HfApiModel
128 | 
129 | agent = CodeAgent(
130 |     tools=[sql_engine],
131 |     model=HfApiModel(model_id="meta-llama/Meta-Llama-3.1-8B-Instruct"),
132 | )
133 | agent.run("Can you give me the name of the client who got the most expensive receipt?")
134 | ```
135 | 
136 | ### Level 2: 表连接
137 | 
138 | 现在让我们增加一些挑战！我们希望我们的 agent 能够处理跨多个表的连接。因此，我们创建一个新表，记录每个 receipt_id 的服务员名字！
139 | 
140 | ```py
141 | table_name = "waiters"
142 | receipts = Table(
143 |     table_name,
144 |     metadata_obj,
145 |     Column("receipt_id", Integer, primary_key=True),
146 |     Column("waiter_name", String(16), primary_key=True),
147 | )
148 | metadata_obj.create_all(engine)
149 | 
150 | rows = [
151 |     {"receipt_id": 1, "waiter_name": "Corey Johnson"},
152 |     {"receipt_id": 2, "waiter_name": "Michael Watts"},
153 |     {"receipt_id": 3, "waiter_name": "Michael Watts"},
154 |     {"receipt_id": 4, "waiter_name": "Margaret James"},
155 | ]
156 | for row in rows:
157 |     stmt = insert(receipts).values(**row)
158 |     with engine.begin() as connection:
159 |         cursor = connection.execute(stmt)
160 | ```
161 | 
162 | 因为我们改变了表，我们需要更新 `SQLExecutorTool`，让 LLM 能够正确利用这个表的信息。
163 | 
164 | ```py
165 | updated_description = """Allows you to perform SQL queries on the table. Beware that this tool's output is a string representation of the execution output.
166 | It can use the following tables:"""
167 | 
168 | inspector = inspect(engine)
169 | for table in ["receipts", "waiters"]:
170 |     columns_info = [(col["name"], col["type"]) for col in inspector.get_columns(table)]
171 | 
172 |     table_description = f"Table '{table}':\n"
173 | 
174 |     table_description += "Columns:\n" + "\n".join([f"  - {name}: {col_type}" for name, col_type in columns_info])
175 |     updated_description += "\n\n" + table_description
176 | 
177 | print(updated_description)
178 | ```
179 | 
180 | 因为这个request 比之前的要难一些，我们将 LLM 引擎切换到更强大的 [Qwen/Qwen2.5-Coder-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct)！
181 | 
182 | ```py
183 | sql_engine.description = updated_description
184 | 
185 | agent = CodeAgent(
186 |     tools=[sql_engine],
187 |     model=HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"),
188 | )
189 | 
190 | agent.run("Which waiter got more total money from tips?")
191 | ```
192 | 
193 | 它直接就能工作！设置过程非常简单，难道不是吗？
194 | 
195 | 这个例子到此结束！我们涵盖了这些概念：
196 | 
197 | - 构建新工具。
198 | - 更新工具的描述。
199 | - 切换到更强大的 LLM 有助于 agent 推理。
200 | 
201 | ✅ 现在你可以构建你一直梦寐以求的 text-to-SQL 系统了！✨
202 | 


--------------------------------------------------------------------------------
/docs/source/hi/reference/agents.mdx:
--------------------------------------------------------------------------------
  1 | <!--Copyright 2024 The HuggingFace Team. All rights reserved.
  2 | 
  3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
  4 | the License. You may obtain a copy of the License at
  5 | 
  6 | http://www.apache.org/licenses/LICENSE-2.0
  7 | 
  8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 10 | specific language governing permissions and limitations under the License.
 11 | 
 12 | ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
 13 | rendered properly in your Markdown viewer.
 14 | 
 15 | -->
 16 | # Agents
 17 | 
 18 | <Tip warning={true}>
 19 | 
 20 | Smolagents एक experimental API है जो किसी भी समय बदल सकता है। एजेंट्स द्वारा लौटाए गए परिणाम भिन्न हो सकते हैं क्योंकि APIs या underlying मॉडल बदलने की संभावना रखते हैं।
 21 | 
 22 | </Tip>
 23 | 
 24 | Agents और tools के बारे में अधिक जानने के लिए [introductory guide](../index) पढ़ना सुनिश्चित करें। 
 25 | यह पेज underlying क्लासेज के लिए API docs को शामिल करता है।
 26 | 
 27 | ## Agents
 28 | 
 29 | हमारे एजेंट्स [`MultiStepAgent`] से इनहेरिट करते हैं, जिसका अर्थ है कि वे कई चरणों में कार्य कर सकते हैं, प्रत्येक चरण में एक विचार, फिर एक टूल कॉल और एक्जीक्यूशन शामिल होता है। [इस कॉन्सेप्चुअल गाइड](../conceptual_guides/react) में अधिक पढ़ें।
 30 | 
 31 | हम मुख्य [`Agent`] क्लास पर आधारित दो प्रकार के एजेंट्स प्रदान करते हैं।
 32 |   - [`CodeAgent`] डिफ़ॉल्ट एजेंट है, यह अपने टूल कॉल्स को Python कोड में लिखता है।
 33 |   - [`ToolCallingAgent`] अपने टूल कॉल्स को JSON में लिखता है।
 34 | 
 35 | दोनों को इनिशियलाइजेशन पर `model` और टूल्स की सूची `tools` आर्गुमेंट्स की आवश्यकता होती है।
 36 | 
 37 | ### Agents की क्लासेज
 38 | 
 39 | [[autodoc]] MultiStepAgent
 40 | 
 41 | [[autodoc]] CodeAgent
 42 | 
 43 | [[autodoc]] ToolCallingAgent
 44 | 
 45 | ### ManagedAgent
 46 | 
 47 | _This class is deprecated since 1.8.0: now you just need to pass name and description attributes to an agent to directly use it as previously done with a ManagedAgent._
 48 | 
 49 | ### stream_to_gradio
 50 | 
 51 | [[autodoc]] stream_to_gradio
 52 | 
 53 | ### GradioUI
 54 | 
 55 | [[autodoc]] GradioUI
 56 | 
 57 | ## मॉडल्स
 58 | 
 59 | आप स्वतंत्र रूप से अपने स्वयं के मॉडल बना सकते हैं और उनका उपयोग कर सकते हैं।
 60 | 
 61 | आप अपने एजेंट के लिए कोई भी `model` कॉल करने योग्य उपयोग कर सकते हैं, जब तक कि:
 62 | 1. यह अपने इनपुट `messages` के लिए [messages format](./chat_templating) (`List[Dict[str, str]]`) का पालन करता है, और यह एक `str` लौटाता है।
 63 | 2. यह आर्गुमेंट `stop_sequences` में पास किए गए सीक्वेंस से *पहले* आउटपुट जनरेट करना बंद कर देता है।
 64 | 
 65 | अपने LLM को परिभाषित करने के लिए, आप एक `custom_model` मेथड बना सकते हैं जो [messages](./chat_templating) की एक सूची स्वीकार करता है और टेक्स्ट युक्त .content विशेषता वाला एक ऑब्जेक्ट लौटाता है। इस कॉलेबल को एक `stop_sequences` आर्गुमेंट भी स्वीकार करने की आवश्यकता होती है जो बताता है कि कब जनरेट करना और बंद करना है।
 66 | 
 67 | ```python
 68 | from huggingface_hub import login, InferenceClient
 69 | 
 70 | login("<YOUR_HUGGINGFACEHUB_API_TOKEN>")
 71 | 
 72 | model_id = "meta-llama/Llama-3.3-70B-Instruct"
 73 | 
 74 | client = InferenceClient(model=model_id)
 75 | 
 76 | def custom_model(messages, stop_sequences=["Task"]):
 77 |     response = client.chat_completion(messages, stop=stop_sequences, max_tokens=1000)
 78 |     answer = response.choices[0].message
 79 |     return answer
 80 | ```
 81 | 
 82 | इसके अतिरिक्त, `custom_model` एक `grammar` आर्गुमेंट भी ले सकता है। जिस स्थिति में आप एजेंट इनिशियलाइजेशन पर एक `grammar` निर्दिष्ट करते हैं, यह आर्गुमेंट मॉडल के कॉल्स को आपके द्वारा इनिशियलाइजेशन पर परिभाषित `grammar` के साथ पास किया जाएगा, ताकि [constrained generation](https://huggingface.co/docs/text-generation-inference/conceptual/guidance) की अनुमति मिल सके जिससे उचित-फॉर्मेटेड एजेंट आउटपुट को फोर्स किया जा सके।
 83 | 
 84 | ### TransformersModel
 85 | 
 86 | सुविधा के लिए, हमने एक `TransformersModel` जोड़ा है जो इनिशियलाइजेशन पर दिए गए model_id के लिए एक लोकल `transformers` पाइपलाइन बनाकर ऊपर के बिंदुओं को लागू करता है।
 87 | 
 88 | ```python
 89 | from smolagents import TransformersModel
 90 | 
 91 | model = TransformersModel(model_id="HuggingFaceTB/SmolLM-135M-Instruct")
 92 | 
 93 | print(model([{"role": "user", "content": "Ok!"}], stop_sequences=["great"]))
 94 | ```
 95 | ```text
 96 | >>> What a
 97 | ```
 98 | 
 99 | [[autodoc]] TransformersModel
100 | 
101 | ### HfApiModel
102 | 
103 | `HfApiModel` LLM के एक्जीक्यूशन के लिए [HF Inference API](https://huggingface.co/docs/api-inference/index) क्लाइंट को रैप करता है।
104 | 
105 | ```python
106 | from smolagents import HfApiModel
107 | 
108 | messages = [
109 |   {"role": "user", "content": "Hello, how are you?"},
110 |   {"role": "assistant", "content": "I'm doing great. How can I help you today?"},
111 |   {"role": "user", "content": "No need to help, take it easy."},
112 | ]
113 | 
114 | model = HfApiModel()
115 | print(model(messages))
116 | ```
117 | ```text
118 | >>> Of course! If you change your mind, feel free to reach out. Take care!
119 | ```
120 | [[autodoc]] HfApiModel
121 | 
122 | ### LiteLLMModel
123 | 
124 | `LiteLLMModel` विभिन्न प्रदाताओं से 100+ LLMs को सपोर्ट करने के लिए [LiteLLM](https://www.litellm.ai/) का लाभ उठाता है।
125 | आप मॉडल इनिशियलाइजेशन पर kwargs पास कर सकते हैं जो तब मॉडल का उपयोग करते समय प्रयोग किए जाएंगे, उदाहरण के लिए नीचे हम `temperature` पास करते हैं।
126 | 
127 | ```python
128 | from smolagents import LiteLLMModel
129 | 
130 | messages = [
131 |   {"role": "user", "content": "Hello, how are you?"},
132 |   {"role": "assistant", "content": "I'm doing great. How can I help you today?"},
133 |   {"role": "user", "content": "No need to help, take it easy."},
134 | ]
135 | 
136 | model = LiteLLMModel(model_id="anthropic/claude-3-5-sonnet-latest", temperature=0.2, max_tokens=10)
137 | print(model(messages))
138 | ```
139 | 
140 | [[autodoc]] LiteLLMModel
141 | 
142 | ### OpenAiServerModel
143 | 
144 | 
145 | यह क्लास आपको किसी भी OpenAIServer कम्पैटिबल मॉडल को कॉल करने देती है।
146 | यहाँ बताया गया है कि आप इसे कैसे सेट कर सकते हैं (आप दूसरे सर्वर को पॉइंट करने के लिए `api_base` url को कस्टमाइज़ कर सकते हैं):
147 | ```py
148 | import os
149 | from smolagents import OpenAIServerModel
150 | 
151 | model = OpenAIServerModel(
152 |     model_id="gpt-4o",
153 |     api_base="https://api.openai.com/v1",
154 |     api_key=os.environ["OPENAI_API_KEY"],
155 | )
156 | ```
157 | 
158 | ## Prompts
159 | 
160 | [[autodoc]] smolagents.agents.PromptTemplates
161 | 
162 | [[autodoc]] smolagents.agents.PlanningPromptTemplate
163 | 
164 | [[autodoc]] smolagents.agents.ManagedAgentPromptTemplate
165 | 
166 | [[autodoc]] smolagents.agents.FinalAnswerPromptTemplate
167 | 


--------------------------------------------------------------------------------
/tests/test_memory.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | from smolagents.agents import ToolCall
  4 | from smolagents.memory import (
  5 |     ActionStep,
  6 |     AgentMemory,
  7 |     ChatMessage,
  8 |     MemoryStep,
  9 |     Message,
 10 |     MessageRole,
 11 |     PlanningStep,
 12 |     SystemPromptStep,
 13 |     TaskStep,
 14 | )
 15 | 
 16 | 
 17 | class TestAgentMemory:
 18 |     def test_initialization(self):
 19 |         system_prompt = "This is a system prompt."
 20 |         memory = AgentMemory(system_prompt=system_prompt)
 21 |         assert memory.system_prompt.system_prompt == system_prompt
 22 |         assert memory.steps == []
 23 | 
 24 | 
 25 | class TestMemoryStep:
 26 |     def test_initialization(self):
 27 |         step = MemoryStep()
 28 |         assert isinstance(step, MemoryStep)
 29 | 
 30 |     def test_dict(self):
 31 |         step = MemoryStep()
 32 |         assert step.dict() == {}
 33 | 
 34 |     def test_to_messages(self):
 35 |         step = MemoryStep()
 36 |         with pytest.raises(NotImplementedError):
 37 |             step.to_messages()
 38 | 
 39 | 
 40 | def test_action_step_to_messages():
 41 |     action_step = ActionStep(
 42 |         model_input_messages=[Message(role=MessageRole.USER, content="Hello")],
 43 |         tool_calls=[
 44 |             ToolCall(id="id", name="get_weather", arguments={"location": "Paris"}),
 45 |         ],
 46 |         start_time=0.0,
 47 |         end_time=1.0,
 48 |         step_number=1,
 49 |         error=None,
 50 |         duration=1.0,
 51 |         model_output_message=ChatMessage(role=MessageRole.ASSISTANT, content="Hi"),
 52 |         model_output="Hi",
 53 |         observations="This is a nice observation",
 54 |         observations_images=["image1.png"],
 55 |         action_output="Output",
 56 |     )
 57 |     messages = action_step.to_messages()
 58 |     assert len(messages) == 4
 59 |     for message in messages:
 60 |         assert isinstance(message, dict)
 61 |         assert "role" in message
 62 |         assert "content" in message
 63 |         assert isinstance(message["role"], MessageRole)
 64 |         assert isinstance(message["content"], list)
 65 |     assistant_message = messages[0]
 66 |     assert assistant_message["role"] == MessageRole.ASSISTANT
 67 |     assert len(assistant_message["content"]) == 1
 68 |     for content in assistant_message["content"]:
 69 |         assert isinstance(content, dict)
 70 |         assert "type" in content
 71 |         assert "text" in content
 72 |     message = messages[1]
 73 |     assert message["role"] == MessageRole.TOOL_CALL
 74 | 
 75 |     assert len(message["content"]) == 1
 76 |     text_content = message["content"][0]
 77 |     assert isinstance(text_content, dict)
 78 |     assert "type" in text_content
 79 |     assert "text" in text_content
 80 | 
 81 |     image_message = messages[2]
 82 |     image_content = image_message["content"][0]
 83 |     assert isinstance(image_content, dict)
 84 |     assert "type" in image_content
 85 |     assert "image" in image_content
 86 | 
 87 |     observation_message = messages[3]
 88 |     assert observation_message["role"] == MessageRole.TOOL_RESPONSE
 89 |     assert "Observation:\nThis is a nice observation" in observation_message["content"][0]["text"]
 90 | 
 91 | 
 92 | def test_action_step_to_messages_no_tool_calls_with_observations():
 93 |     action_step = ActionStep(
 94 |         model_input_messages=None,
 95 |         tool_calls=None,
 96 |         start_time=None,
 97 |         end_time=None,
 98 |         step_number=None,
 99 |         error=None,
100 |         duration=None,
101 |         model_output_message=None,
102 |         model_output=None,
103 |         observations="This is an observation.",
104 |         observations_images=None,
105 |         action_output=None,
106 |     )
107 |     messages = action_step.to_messages()
108 |     assert len(messages) == 1
109 |     observation_message = messages[0]
110 |     assert observation_message["role"] == MessageRole.TOOL_RESPONSE
111 |     assert "Observation:\nThis is an observation." in observation_message["content"][0]["text"]
112 | 
113 | 
114 | def test_planning_step_to_messages():
115 |     planning_step = PlanningStep(
116 |         model_input_messages=[Message(role=MessageRole.USER, content="Hello")],
117 |         model_output_message=ChatMessage(role=MessageRole.ASSISTANT, content="Plan"),
118 |         plan="This is a plan.",
119 |     )
120 |     messages = planning_step.to_messages(summary_mode=False)
121 |     assert len(messages) == 2
122 |     for message in messages:
123 |         assert isinstance(message, dict)
124 |         assert "role" in message
125 |         assert "content" in message
126 |         assert isinstance(message["content"], list)
127 |         assert len(message["content"]) == 1
128 |         for content in message["content"]:
129 |             assert isinstance(content, dict)
130 |             assert "type" in content
131 |             assert "text" in content
132 |     assert messages[0]["role"] == MessageRole.ASSISTANT
133 |     assert messages[1]["role"] == MessageRole.USER
134 | 
135 | 
136 | def test_task_step_to_messages():
137 |     task_step = TaskStep(task="This is a task.", task_images=["task_image1.png"])
138 |     messages = task_step.to_messages(summary_mode=False)
139 |     assert len(messages) == 1
140 |     for message in messages:
141 |         assert isinstance(message, dict)
142 |         assert "role" in message
143 |         assert "content" in message
144 |         assert isinstance(message["role"], MessageRole)
145 |         assert message["role"] == MessageRole.USER
146 |         assert isinstance(message["content"], list)
147 |         assert len(message["content"]) == 2
148 |         text_content = message["content"][0]
149 |         assert isinstance(text_content, dict)
150 |         assert "type" in text_content
151 |         assert "text" in text_content
152 |         for image_content in message["content"][1:]:
153 |             assert isinstance(image_content, dict)
154 |             assert "type" in image_content
155 |             assert "image" in image_content
156 | 
157 | 
158 | def test_system_prompt_step_to_messages():
159 |     system_prompt_step = SystemPromptStep(system_prompt="This is a system prompt.")
160 |     messages = system_prompt_step.to_messages(summary_mode=False)
161 |     assert len(messages) == 1
162 |     for message in messages:
163 |         assert isinstance(message, dict)
164 |         assert "role" in message
165 |         assert "content" in message
166 |         assert isinstance(message["role"], MessageRole)
167 |         assert message["role"] == MessageRole.SYSTEM
168 |         assert isinstance(message["content"], list)
169 |         assert len(message["content"]) == 1
170 |         for content in message["content"]:
171 |             assert isinstance(content, dict)
172 |             assert "type" in content
173 |             assert "text" in content
174 | 


--------------------------------------------------------------------------------