├── tests ├── __init__.py ├── data │ └── 000000039769.png ├── conftest.py ├── utils │ └── markers.py ├── test_search.py ├── test_import.py ├── test_final_answer.py ├── fixtures │ ├── tools.py │ └── agents.py ├── test_types.py ├── test_remote_executors.py ├── test_tool_validation.py ├── test_default_tools.py ├── test_cli.py ├── test_gradio_ui.py └── test_memory.py ├── .github ├── ISSUE_TEMPLATE │ ├── custom.md │ ├── bug_report.md │ └── feature_request.md └── workflows │ ├── trufflehog.yml │ ├── upload_pr_documentation.yml │ ├── build_pr_documentation.yml │ ├── build_documentation.yml │ ├── quality.yml │ └── tests.yml ├── examples ├── open_deep_research │ ├── app.py │ ├── requirements.txt │ ├── README.md │ ├── scripts │ │ ├── run_agents.py │ │ ├── gaia_scorer.py │ │ ├── reformulator.py │ │ └── text_inspector_tool.py │ └── run.py ├── sandboxed_execution.py ├── gradio_ui.py ├── inspect_multiagent_run.py ├── rag.py ├── text_to_sql.py ├── agent_from_any_llm.py └── rag_using_chromadb.py ├── .pre-commit-config.yaml ├── Makefile ├── docs └── source │ ├── en │ ├── _config.py │ ├── _toctree.yml │ ├── reference │ │ ├── agents.mdx │ │ └── tools.mdx │ ├── conceptual_guides │ │ └── react.mdx │ ├── index.mdx │ └── tutorials │ │ └── memory.mdx │ ├── hi │ ├── _config.py │ ├── _toctree.yml │ ├── conceptual_guides │ │ └── react.mdx │ ├── reference │ │ ├── tools.mdx │ │ └── agents.mdx │ ├── index.mdx │ └── tutorials │ │ ├── inspect_runs.mdx │ │ └── secure_code_execution.mdx │ └── zh │ ├── _config.py │ ├── _toctree.yml │ ├── conceptual_guides │ ├── react.mdx │ └── intro_agents.mdx │ ├── reference │ ├── agents.mdx │ ├── tools.mdx │ └── models.mdx │ ├── index.mdx │ ├── tutorials │ ├── secure_code_execution.mdx │ └── memory.mdx │ └── examples │ ├── rag.mdx │ └── text_to_sql.mdx ├── e2b.toml ├── src └── smolagents │ ├── __init__.py │ └── cli.py ├── utils └── check_tests_in_ci.py ├── .gitignore ├── pyproject.toml ├── CONTRIBUTING.md └── CODE_OF_CONDUCT.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/data/000000039769.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/merveenoyan/smolagents/main/tests/data/000000039769.png -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/custom.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Custom issue template 3 | about: Describe this issue template's purpose here. 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | 11 | -------------------------------------------------------------------------------- /examples/open_deep_research/app.py: -------------------------------------------------------------------------------- 1 | from run import create_agent 2 | 3 | from smolagents.gradio_ui import GradioUI 4 | 5 | 6 | agent = create_agent() 7 | 8 | demo = GradioUI(agent) 9 | 10 | if __name__ == "__main__": 11 | demo.launch() 12 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/astral-sh/ruff-pre-commit 3 | rev: v0.2.1 4 | hooks: 5 | - id: ruff 6 | args: 7 | - --fix 8 | - id: ruff-format 9 | - repo: https://github.com/pre-commit/pre-commit-hooks 10 | rev: v4.5.0 11 | hooks: 12 | - id: check-merge-conflict 13 | - id: check-yaml 14 | -------------------------------------------------------------------------------- /.github/workflows/trufflehog.yml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | 4 | name: Secret Leaks 5 | 6 | permissions: 7 | contents: read 8 | 9 | jobs: 10 | trufflehog: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Checkout code 14 | uses: actions/checkout@v4 15 | with: 16 | fetch-depth: 0 17 | - name: Secret Scanning 18 | uses: trufflesecurity/trufflehog@main -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: quality style test docs utils 2 | 3 | check_dirs := examples src tests utils 4 | 5 | # Check code quality of the source code 6 | quality: 7 | ruff check $(check_dirs) 8 | ruff format --check $(check_dirs) 9 | python utils/check_tests_in_ci.py 10 | 11 | # Format source code automatically 12 | style: 13 | ruff check $(check_dirs) --fix 14 | ruff format $(check_dirs) 15 | 16 | # Run smolagents tests 17 | test: 18 | pytest ./tests/ -------------------------------------------------------------------------------- /.github/workflows/upload_pr_documentation.yml: -------------------------------------------------------------------------------- 1 | name: Upload PR Documentation 2 | 3 | on: 4 | workflow_run: 5 | workflows: ["Build PR Documentation"] 6 | types: 7 | - completed 8 | 9 | jobs: 10 | build: 11 | uses: huggingface/doc-builder/.github/workflows/upload_pr_documentation.yml@main 12 | with: 13 | package_name: smolagents 14 | secrets: 15 | hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }} 16 | comment_bot_token: ${{ secrets.COMMENT_BOT_TOKEN }} -------------------------------------------------------------------------------- /docs/source/en/_config.py: -------------------------------------------------------------------------------- 1 | # docstyle-ignore 2 | INSTALL_CONTENT = """ 3 | # Installation 4 | ! pip install smolagents 5 | # To install from source instead of the last release, comment the command above and uncomment the following one. 6 | # ! pip install git+https://github.com/huggingface/smolagents.git 7 | """ 8 | 9 | notebook_first_cells = [{"type": "code", "content": INSTALL_CONTENT}] 10 | black_avoid_patterns = { 11 | "{processor_class}": "FakeProcessorClass", 12 | "{model_class}": "FakeModelClass", 13 | "{object_class}": "FakeObjectClass", 14 | } 15 | -------------------------------------------------------------------------------- /docs/source/hi/_config.py: -------------------------------------------------------------------------------- 1 | # docstyle-ignore 2 | INSTALL_CONTENT = """ 3 | # Installation 4 | ! pip install smolagents 5 | # To install from source instead of the last release, comment the command above and uncomment the following one. 6 | # ! pip install git+https://github.com/huggingface/smolagents.git 7 | """ 8 | 9 | notebook_first_cells = [{"type": "code", "content": INSTALL_CONTENT}] 10 | black_avoid_patterns = { 11 | "{processor_class}": "FakeProcessorClass", 12 | "{model_class}": "FakeModelClass", 13 | "{object_class}": "FakeObjectClass", 14 | } 15 | -------------------------------------------------------------------------------- /docs/source/zh/_config.py: -------------------------------------------------------------------------------- 1 | # docstyle-ignore 2 | INSTALL_CONTENT = """ 3 | # Installation 4 | ! pip install smolagents 5 | # To install from source instead of the last release, comment the command above and uncomment the following one. 6 | # ! pip install git+https://github.com/huggingface/smolagents.git 7 | """ 8 | 9 | notebook_first_cells = [{"type": "code", "content": INSTALL_CONTENT}] 10 | black_avoid_patterns = { 11 | "{processor_class}": "FakeProcessorClass", 12 | "{model_class}": "FakeModelClass", 13 | "{object_class}": "FakeObjectClass", 14 | } 15 | -------------------------------------------------------------------------------- /e2b.toml: -------------------------------------------------------------------------------- 1 | # This is a config for E2B sandbox template. 2 | # You can use template ID (qywp2ctmu2q7jzprcf4j) to create a sandbox: 3 | 4 | # Python SDK 5 | # from e2b import Sandbox, AsyncSandbox 6 | # sandbox = Sandbox("qywp2ctmu2q7jzprcf4j") # Sync sandbox 7 | # sandbox = await AsyncSandbox.create("qywp2ctmu2q7jzprcf4j") # Async sandbox 8 | 9 | # JS SDK 10 | # import { Sandbox } from 'e2b' 11 | # const sandbox = await Sandbox.create('qywp2ctmu2q7jzprcf4j') 12 | 13 | team_id = "f8776d3a-df2f-4a1d-af48-68c2e13b3b87" 14 | start_cmd = "/root/.jupyter/start-up.sh" 15 | dockerfile = "e2b.Dockerfile" 16 | template_id = "qywp2ctmu2q7jzprcf4j" 17 | -------------------------------------------------------------------------------- /examples/sandboxed_execution.py: -------------------------------------------------------------------------------- 1 | from smolagents import CodeAgent, DuckDuckGoSearchTool, HfApiModel 2 | 3 | 4 | model = HfApiModel() 5 | 6 | agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=model, executor_type="docker") 7 | output = agent.run("How many seconds would it take for a leopard at full speed to run through Pont des Arts?") 8 | print("Docker executor result:", output) 9 | 10 | agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=model, executor_type="e2b") 11 | output = agent.run("How many seconds would it take for a leopard at full speed to run through Pont des Arts?") 12 | print("E2B executor result:", output) 13 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: The clearer your bug report, the faster it will be fixed! 4 | title: "[BUG]" 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **Code to reproduce the error** 14 | The simplest code snippet that produces your bug. 15 | 16 | **Error logs (if any)** 17 | Provide error logs if there are any. 18 | 19 | **Expected behavior** 20 | A clear and concise description of what you expected to happen. 21 | 22 | **Packages version:** 23 | Run `pip freeze | grep smolagents` and paste it here. 24 | 25 | **Additional context** 26 | Add any other context about the problem here. 27 | -------------------------------------------------------------------------------- /.github/workflows/build_pr_documentation.yml: -------------------------------------------------------------------------------- 1 | name: Build PR Documentation 2 | 3 | on: 4 | pull_request: 5 | paths: 6 | - 'docs/source/**' 7 | - 'assets/**' 8 | - '.github/workflows/doc-pr-build.yml' 9 | 10 | concurrency: 11 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 12 | cancel-in-progress: true 13 | 14 | jobs: 15 | build: 16 | uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@main 17 | with: 18 | commit_sha: ${{ github.event.pull_request.head.sha }} 19 | pr_number: ${{ github.event.number }} 20 | package: smolagents 21 | languages: en 22 | # additional_args: --not_python_module # use this arg if repository is documentation only -------------------------------------------------------------------------------- /.github/workflows/build_documentation.yml: -------------------------------------------------------------------------------- 1 | name: Build documentation 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | - doc-builder* 8 | - v*-release 9 | - use_templates 10 | paths: 11 | - 'docs/source/**' 12 | - 'assets/**' 13 | - '.github/workflows/doc-build.yml' 14 | - 'pyproject.toml' 15 | 16 | jobs: 17 | build: 18 | uses: huggingface/doc-builder/.github/workflows/build_main_documentation.yml@main 19 | with: 20 | commit_sha: ${{ github.sha }} 21 | package: smolagents 22 | languages: en 23 | notebook_folder: smolagents_doc 24 | # additional_args: --not_python_module # use this arg if repository is documentation only 25 | secrets: 26 | token: ${{ secrets.HUGGINGFACE_PUSH }} 27 | hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }} -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import patch 2 | 3 | import pytest 4 | 5 | from smolagents.agents import MultiStepAgent 6 | from smolagents.monitoring import LogLevel 7 | 8 | 9 | # Import fixture modules as plugins 10 | pytest_plugins = ["tests.fixtures.agents", "tests.fixtures.tools"] 11 | 12 | original_multi_step_agent_init = MultiStepAgent.__init__ 13 | 14 | 15 | @pytest.fixture(autouse=True) 16 | def patch_multi_step_agent_with_suppressed_logging(): 17 | with patch.object(MultiStepAgent, "__init__", autospec=True) as mock_init: 18 | 19 | def init_with_suppressed_logging(self, *args, verbosity_level=LogLevel.OFF, **kwargs): 20 | original_multi_step_agent_init(self, *args, verbosity_level=verbosity_level, **kwargs) 21 | 22 | mock_init.side_effect = init_with_suppressed_logging 23 | yield 24 | -------------------------------------------------------------------------------- /examples/gradio_ui.py: -------------------------------------------------------------------------------- 1 | from io import BytesIO 2 | 3 | import requests 4 | from PIL import Image 5 | 6 | from smolagents import CodeAgent, GradioUI, HfApiModel 7 | 8 | 9 | def add_agent_image(memory_step, agent): 10 | url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolagents/smolagents.png" 11 | response = requests.get(url) 12 | memory_step.observations_images = [Image.open(BytesIO(response.content))] 13 | 14 | 15 | agent = CodeAgent( 16 | tools=[], 17 | model=HfApiModel(), 18 | verbosity_level=1, 19 | planning_interval=3, 20 | name="example_agent", 21 | description="This is an example agent that has not tool but will always see an agent at the end of its step.", 22 | step_callbacks=[add_agent_image], 23 | ) 24 | 25 | GradioUI(agent, file_upload_folder="./data").launch() 26 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: enhancement 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Is this not possible with the current options.** 17 | Make sure to consider if what you're requesting can be done with current abstractions. 18 | 19 | **Describe alternatives you've considered** 20 | A clear and concise description of any alternative solutions or features you've considered. 21 | 22 | **Additional context** 23 | Add any other context or screenshots about the feature request here. 24 | -------------------------------------------------------------------------------- /.github/workflows/quality.yml: -------------------------------------------------------------------------------- 1 | name: Quality Check 2 | 3 | on: [pull_request] 4 | 5 | env: 6 | UV_SYSTEM_PYTHON: 1 7 | 8 | jobs: 9 | check_code_quality: 10 | runs-on: ubuntu-latest 11 | env: 12 | UV_HTTP_TIMEOUT: 600 # max 10min to install deps 13 | 14 | steps: 15 | - uses: actions/checkout@v2 16 | - name: Set up Python 17 | uses: actions/setup-python@v2 18 | with: 19 | python-version: "3.12" 20 | 21 | # Setup venv 22 | - name: Setup uv 23 | run: | 24 | pip install --upgrade uv 25 | 26 | - name: Install dependencies 27 | run: uv pip install "smolagents[quality] @ ." 28 | 29 | # Equivalent of "make quality" but step by step 30 | - run: ruff check examples src tests utils # linter 31 | - run: ruff format --check examples src tests utils # formatter 32 | - run: python utils/check_tests_in_ci.py 33 | -------------------------------------------------------------------------------- /examples/inspect_multiagent_run.py: -------------------------------------------------------------------------------- 1 | from openinference.instrumentation.smolagents import SmolagentsInstrumentor 2 | from phoenix.otel import register 3 | 4 | 5 | register() 6 | SmolagentsInstrumentor().instrument(skip_dep_check=True) 7 | 8 | 9 | from smolagents import ( 10 | CodeAgent, 11 | DuckDuckGoSearchTool, 12 | HfApiModel, 13 | ToolCallingAgent, 14 | VisitWebpageTool, 15 | ) 16 | 17 | 18 | # Then we run the agentic part! 19 | model = HfApiModel() 20 | 21 | search_agent = ToolCallingAgent( 22 | tools=[DuckDuckGoSearchTool(), VisitWebpageTool()], 23 | model=model, 24 | name="search_agent", 25 | description="This is an agent that can do web search.", 26 | ) 27 | 28 | manager_agent = CodeAgent( 29 | tools=[], 30 | model=model, 31 | managed_agents=[search_agent], 32 | ) 33 | manager_agent.run("If the US keeps it 2024 growth rate, how many years would it take for the GDP to double?") 34 | -------------------------------------------------------------------------------- /examples/open_deep_research/requirements.txt: -------------------------------------------------------------------------------- 1 | anthropic>=0.37.1 2 | audioop-lts<1.0; python_version >= "3.13" # required to use pydub in Python >=3.13; LTS port of the removed Python builtin module audioop 3 | beautifulsoup4>=4.12.3 4 | datasets>=2.21.0 5 | google_search_results>=2.4.2 6 | huggingface_hub>=0.23.4 7 | mammoth>=1.8.0 8 | markdownify>=0.13.1 9 | numexpr>=2.10.1 10 | numpy>=2.1.2 11 | openai>=1.52.2 12 | openpyxl 13 | pandas>=2.2.3 14 | pathvalidate>=3.2.1 15 | pdfminer>=20191125 16 | pdfminer.six>=20240706 17 | Pillow>=11.0.0 18 | puremagic>=1.28 19 | pypdf>=5.1.0 20 | python-dotenv>=1.0.1 21 | python_pptx>=1.0.2 22 | Requests>=2.32.3 23 | serpapi>=0.1.5 24 | tqdm>=4.66.4 25 | torch>=2.2.2 26 | torchvision>=0.17.2 27 | transformers>=4.46.0 28 | youtube_transcript_api>=0.6.2 29 | chess 30 | sympy 31 | pubchempy 32 | Bio 33 | scikit-learn 34 | scipy 35 | pydub 36 | PyPDF2 37 | python-pptx 38 | torch 39 | xlrd 40 | SpeechRecognition -------------------------------------------------------------------------------- /tests/utils/markers.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2024 HuggingFace Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """Markers for tests .""" 16 | 17 | import os 18 | from importlib.util import find_spec 19 | 20 | import pytest 21 | 22 | 23 | require_run_all = pytest.mark.skipif(not os.getenv("RUN_ALL"), reason="requires RUN_ALL environment variable") 24 | require_torch = pytest.mark.skipif(find_spec("torch") is None, reason="requires torch") 25 | -------------------------------------------------------------------------------- /tests/test_search.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2024 HuggingFace Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | from smolagents import DuckDuckGoSearchTool 18 | 19 | from .test_tools import ToolTesterMixin 20 | from .utils.markers import require_run_all 21 | 22 | 23 | class TestDuckDuckGoSearchTool(ToolTesterMixin): 24 | def setup_method(self): 25 | self.tool = DuckDuckGoSearchTool() 26 | self.tool.setup() 27 | 28 | @require_run_all 29 | def test_exact_match_arg(self): 30 | result = self.tool("Agents") 31 | assert isinstance(result, str) 32 | -------------------------------------------------------------------------------- /tests/test_import.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import tempfile 4 | 5 | 6 | def test_import_smolagents_without_extras(monkeypatch): 7 | monkeypatch.delenv("VIRTUAL_ENV", raising=False) 8 | with tempfile.TemporaryDirectory() as temp_dir: 9 | # Create a virtual environment 10 | venv_dir = os.path.join(temp_dir, "venv") 11 | subprocess.run(["uv", "venv", venv_dir], check=True) 12 | 13 | # Install smolagents in the virtual environment 14 | subprocess.run( 15 | ["uv", "pip", "install", "--python", os.path.join(venv_dir, "bin", "python"), "smolagents @ ."], check=True 16 | ) 17 | 18 | # Run the import test in the virtual environment 19 | result = subprocess.run( 20 | [os.path.join(venv_dir, "bin", "python"), "-c", "import smolagents"], 21 | capture_output=True, 22 | text=True, 23 | ) 24 | 25 | # Check if the import was successful 26 | assert result.returncode == 0, ( 27 | "Import failed with error: " 28 | + (result.stderr.splitlines()[-1] if result.stderr else "No error message") 29 | + "\n" 30 | + result.stderr 31 | ) 32 | -------------------------------------------------------------------------------- /src/smolagents/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | 4 | # Copyright 2024 The HuggingFace Inc. team. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | __version__ = "1.14.0.dev0" 18 | 19 | from .agent_types import * # noqa: I001 20 | from .agents import * # Above noqa avoids a circular dependency due to cli.py 21 | from .default_tools import * 22 | from .gradio_ui import * 23 | from .local_python_executor import * 24 | from .memory import * 25 | from .models import * 26 | from .monitoring import * 27 | from .remote_executors import * 28 | from .tools import * 29 | from .utils import * 30 | from .cli import * 31 | -------------------------------------------------------------------------------- /docs/source/hi/_toctree.yml: -------------------------------------------------------------------------------- 1 | - title: Get started 2 | sections: 3 | - local: index 4 | title: 🤗 Agents 5 | - local: guided_tour 6 | title: गाइडेड टूर 7 | - title: Tutorials 8 | sections: 9 | - local: tutorials/building_good_agents 10 | title: ✨ अच्छे Agents का निर्माण 11 | - local: tutorials/inspect_runs 12 | title: 📊 OpenTelemetry के साथ runs का निरीक्षण 13 | - local: tutorials/tools 14 | title: 🛠️ Tools - in-depth guide 15 | - local: tutorials/secure_code_execution 16 | title: 🛡️ E2B के साथ अपने कोड एक्जीक्यूशन को सुरक्षित करें 17 | - title: Conceptual guides 18 | sections: 19 | - local: conceptual_guides/intro_agents 20 | title: 🤖 Agentic सिस्टम का परिचय 21 | - local: conceptual_guides/react 22 | title: 🤔 मल्टी-स्टेप एजेंट कैसे काम करते हैं? 23 | - title: Examples 24 | sections: 25 | - local: examples/text_to_sql 26 | title: सेल्फ करेक्टिंग Text-to-SQL 27 | - local: examples/rag 28 | title: एजेंटिक RAG के साथ अपनी ज्ञान आधारित को मास्टर करें 29 | - local: examples/multiagents 30 | title: एक बहु-एजेंट प्रणाली का आयोजन करें 31 | - title: Reference 32 | sections: 33 | - local: reference/agents 34 | title: एजेंट से संबंधित ऑब्जेक्ट्स 35 | - local: reference/tools 36 | title: टूल्स से संबंधित ऑब्जेक्ट्स 37 | -------------------------------------------------------------------------------- /docs/source/zh/_toctree.yml: -------------------------------------------------------------------------------- 1 | - title: 起步 2 | sections: 3 | - local: index 4 | title: 🤗 Agents 5 | - local: guided_tour 6 | title: 导览 7 | - title: Tutorials 8 | sections: 9 | - local: tutorials/building_good_agents 10 | title: ✨ 构建好用的 agents 11 | - local: tutorials/inspect_runs 12 | title: 📊 监控 Agent 的运行 13 | - local: tutorials/tools 14 | title: 🛠️ 工具 - 深度指南 15 | - local: tutorials/secure_code_execution 16 | title: 🛡️ 使用 E2B 保护你的代码执行 17 | - local: tutorials/memory 18 | title: 📚 管理 Agent 的记忆 19 | - title: Conceptual guides 20 | sections: 21 | - local: conceptual_guides/intro_agents 22 | title: 🤖 Agent 化系统介绍 23 | - local: conceptual_guides/react 24 | title: 🤔 多步骤 Agent 是如何工作的? 25 | - title: Examples 26 | sections: 27 | - local: examples/text_to_sql 28 | title: 自我修正 Text-to-SQL 29 | - local: examples/rag 30 | title: 借助 agentic RAG 掌控知识库 31 | - local: examples/multiagents 32 | title: 编排 multi-agent 系统 33 | - local: examples/web_browser 34 | title: 基于视觉模型构建能够浏览网页的agent 35 | - title: Reference 36 | sections: 37 | - local: reference/agents 38 | title: Agent-related objects 39 | - local: reference/models 40 | title: Model-related objects 41 | - local: reference/tools 42 | title: Tool-related objects 43 | -------------------------------------------------------------------------------- /docs/source/en/_toctree.yml: -------------------------------------------------------------------------------- 1 | - title: Get started 2 | sections: 3 | - local: index 4 | title: 🤗 Agents 5 | - local: guided_tour 6 | title: Guided tour 7 | - title: Tutorials 8 | sections: 9 | - local: tutorials/building_good_agents 10 | title: ✨ Building good agents 11 | - local: tutorials/inspect_runs 12 | title: 📊 Inspect your agent runs using telemetry 13 | - local: tutorials/tools 14 | title: 🛠️ Tools - in-depth guide 15 | - local: tutorials/secure_code_execution 16 | title: 🛡️ Secure code execution 17 | - local: tutorials/memory 18 | title: 📚 Manage your agent's memory 19 | - title: Conceptual guides 20 | sections: 21 | - local: conceptual_guides/intro_agents 22 | title: 🤖 An introduction to agentic systems 23 | - local: conceptual_guides/react 24 | title: 🤔 How do Multi-step agents work? 25 | - title: Examples 26 | sections: 27 | - local: examples/text_to_sql 28 | title: Self-correcting Text-to-SQL 29 | - local: examples/rag 30 | title: Master you knowledge base with agentic RAG 31 | - local: examples/multiagents 32 | title: Orchestrate a multi-agent system 33 | - local: examples/web_browser 34 | title: Build a web browser agent using vision models 35 | - title: Reference 36 | sections: 37 | - local: reference/agents 38 | title: Agent-related objects 39 | - local: reference/models 40 | title: Model-related objects 41 | - local: reference/tools 42 | title: Tool-related objects 43 | -------------------------------------------------------------------------------- /docs/source/zh/conceptual_guides/react.mdx: -------------------------------------------------------------------------------- 1 | 16 | # 多步骤 agent 是如何工作的? 17 | 18 | ReAct 框架([Yao et al., 2022](https://huggingface.co/papers/2210.03629))是目前构建 agent 的主要方法。 19 | 20 | 该名称基于两个词的组合:"Reason" (推理)和 "Act" (行动)。实际上,遵循此架构的 agent 将根据需要尽可能多的步骤来解决其任务,每个步骤包括一个推理步骤,然后是一个行动步骤,在该步骤中,它制定工具调用,使其更接近解决手头的任务。 21 | 22 | ReAct 过程涉及保留过去步骤的记忆。 23 | 24 | > [!TIP] 25 | > 阅读 [Open-source LLMs as LangChain Agents](https://huggingface.co/blog/open-source-llms-as-agents) 博客文章以了解更多关于多步 agent 的信息。 26 | 27 | 以下是其工作原理的视频概述: 28 | 29 |
34 |
38 |
34 |
38 |
38 |
39 | 这就是为什么我们强调提出代码智能体,在本例中是 Python 智能体,这意味着我们要在构建安全的 Python 解释器上投入更多精力。
40 |
41 | ### 本地 Python 解释器
42 |
43 | 默认情况下,`CodeAgent` 会在你的环境中运行 LLM 生成的代码。
44 | 这个执行不是由普通的 Python 解释器完成的:我们从零开始重新构建了一个更安全的 `LocalPythonExecutor`。
45 | 这个解释器通过以下方式设计以确保安全:
46 | - 将导入限制为用户显式传递的列表
47 | - 限制操作次数以防止无限循环和资源膨胀
48 | - 不会执行任何未预定义的操作
49 |
50 | 我们已经在许多用例中使用了这个解释器,从未观察到对环境造成任何损害。
51 |
52 | 然而,这个解决方案并不是万无一失的:可以想象,如果 LLM 被微调用于恶意操作,仍然可能损害你的环境。例如,如果你允许像 `Pillow` 这样无害的包处理图像,LLM 可能会生成数千张图像保存以膨胀你的硬盘。
53 | 如果你自己选择了 LLM 引擎,这当然不太可能,但它可能会发生。
54 |
55 | 所以如果你想格外谨慎,可以使用下面描述的远程代码执行选项。
56 |
57 | ### E2B 代码执行器
58 |
59 | 为了最大程度的安全性,你可以使用我们与 E2B 的集成在沙盒环境中运行代码。这是一个远程执行服务,可以在隔离的容器中运行你的代码,使代码无法影响你的本地环境。
60 |
61 | 为此,你需要设置你的 E2B 账户并在环境变量中设置 `E2B_API_KEY`。请前往 [E2B 快速入门文档](https://e2b.dev/docs/quickstart) 了解更多信息。
62 |
63 | 然后你可以通过 `pip install e2b-code-interpreter python-dotenv` 安装它。
64 |
65 | 现在你已经准备好了!
66 |
67 | 要将代码执行器设置为 E2B,只需在初始化 `CodeAgent` 时传递标志 `executor_type="e2b"`。
68 | 请注意,你应该将所有工具的依赖项添加到 `additional_authorized_imports` 中,以便执行器安装它们。
69 |
70 | ```py
71 | from smolagents import CodeAgent, VisitWebpageTool, HfApiModel
72 | agent = CodeAgent(
73 | tools = [VisitWebpageTool()],
74 | model=HfApiModel(),
75 | additional_authorized_imports=["requests", "markdownify"],
76 | executor_type="e2b"
77 | )
78 |
79 | agent.run("What was Abraham Lincoln's preferred pet?")
80 | ```
81 |
82 | 目前 E2B 代码执行暂不兼容多 agent——因为把 agent 调用放在应该在远程执行的代码块里,是非常混乱的。但我们正在努力做到这件事!
83 |
--------------------------------------------------------------------------------
/docs/source/en/reference/tools.mdx:
--------------------------------------------------------------------------------
1 |
16 | # Tools
17 |
18 |
43 |
52 |
56 |
20 | बेसिक्स सीखें और एजेंट्स का उपयोग करने में परिचित हों। यदि आप पहली बार एजेंट्स का उपयोग कर रहे हैं तो यहाँ से शुरू करें!
40 | 41 |एक विशिष्ट लक्ष्य प्राप्त करने में मदद के लिए गाइड: SQL क्वेरी जनरेट और टेस्ट करने के लिए एजेंट बनाएं!
44 | 45 |महत्वपूर्ण विषयों की बेहतर समझ बनाने के लिए उच्च-स्तरीय व्याख्याएं।
48 | 49 |एजेंट्स बनाने के महत्वपूर्ण पहलुओं को कवर करने वाले क्ट्यूटोरियल्स।
52 | 53 |
20 | Learn the basics and become familiar with using Agents. Start here if you are using Agents for the first time!
39 | 40 |Practical guides to help you achieve a specific goal: create an agent to generate and test SQL queries!
43 | 44 |High-level explanations for building a better understanding of important topics.
47 | 48 |Horizontal tutorials that cover important aspects of building agents.
51 | 52 |
100 |
101 | आप देख सकते हैं कि CodeAgent ने अपने मैनेज्ड ToolCallingAgent को (वैसे, मैनेज्ड एजेंट एक CodeAgent भी हो सकता था) U.S. 2024 ग्रोथ रेट के लिए वेब सर्च चलाने के लिए कॉल किया। फिर मैनेज्ड एजेंट ने अपनी रिपोर्ट लौटाई और मैनेजर एजेंट ने अर्थव्यवस्था के दोगुना होने का समय गणना करने के लिए उस पर कार्य किया! अच्छा है, है ना?
--------------------------------------------------------------------------------
/examples/open_deep_research/run.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import os
3 | import threading
4 |
5 | from dotenv import load_dotenv
6 | from huggingface_hub import login
7 | from scripts.text_inspector_tool import TextInspectorTool
8 | from scripts.text_web_browser import (
9 | ArchiveSearchTool,
10 | FinderTool,
11 | FindNextTool,
12 | PageDownTool,
13 | PageUpTool,
14 | SimpleTextBrowser,
15 | VisitTool,
16 | )
17 | from scripts.visual_qa import visualizer
18 |
19 | from smolagents import (
20 | CodeAgent,
21 | GoogleSearchTool,
22 | # HfApiModel,
23 | LiteLLMModel,
24 | ToolCallingAgent,
25 | )
26 |
27 |
28 | AUTHORIZED_IMPORTS = [
29 | "requests",
30 | "zipfile",
31 | "os",
32 | "pandas",
33 | "numpy",
34 | "sympy",
35 | "json",
36 | "bs4",
37 | "pubchempy",
38 | "xml",
39 | "yahoo_finance",
40 | "Bio",
41 | "sklearn",
42 | "scipy",
43 | "pydub",
44 | "io",
45 | "PIL",
46 | "chess",
47 | "PyPDF2",
48 | "pptx",
49 | "torch",
50 | "datetime",
51 | "fractions",
52 | "csv",
53 | ]
54 | load_dotenv(override=True)
55 | login(os.getenv("HF_TOKEN"))
56 |
57 | append_answer_lock = threading.Lock()
58 |
59 |
60 | def parse_args():
61 | parser = argparse.ArgumentParser()
62 | parser.add_argument(
63 | "question", type=str, help="for example: 'How many studio albums did Mercedes Sosa release before 2007?'"
64 | )
65 | parser.add_argument("--model-id", type=str, default="o1")
66 | return parser.parse_args()
67 |
68 |
69 | custom_role_conversions = {"tool-call": "assistant", "tool-response": "user"}
70 |
71 | user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0"
72 |
73 | BROWSER_CONFIG = {
74 | "viewport_size": 1024 * 5,
75 | "downloads_folder": "downloads_folder",
76 | "request_kwargs": {
77 | "headers": {"User-Agent": user_agent},
78 | "timeout": 300,
79 | },
80 | "serpapi_key": os.getenv("SERPAPI_API_KEY"),
81 | }
82 |
83 | os.makedirs(f"./{BROWSER_CONFIG['downloads_folder']}", exist_ok=True)
84 |
85 |
86 | def create_agent(model_id="o1"):
87 | model_params = {
88 | "model_id": model_id,
89 | "custom_role_conversions": custom_role_conversions,
90 | "max_completion_tokens": 8192,
91 | }
92 | if model_id == "o1":
93 | model_params["reasoning_effort"] = "high"
94 | model = LiteLLMModel(**model_params)
95 |
96 | text_limit = 100000
97 | browser = SimpleTextBrowser(**BROWSER_CONFIG)
98 | WEB_TOOLS = [
99 | GoogleSearchTool(provider="serper"),
100 | VisitTool(browser),
101 | PageUpTool(browser),
102 | PageDownTool(browser),
103 | FinderTool(browser),
104 | FindNextTool(browser),
105 | ArchiveSearchTool(browser),
106 | TextInspectorTool(model, text_limit),
107 | ]
108 | text_webbrowser_agent = ToolCallingAgent(
109 | model=model,
110 | tools=WEB_TOOLS,
111 | max_steps=20,
112 | verbosity_level=2,
113 | planning_interval=4,
114 | name="search_agent",
115 | description="""A team member that will search the internet to answer your question.
116 | Ask him for all your questions that require browsing the web.
117 | Provide him as much context as possible, in particular if you need to search on a specific timeframe!
118 | And don't hesitate to provide him with a complex search task, like finding a difference between two webpages.
119 | Your request must be a real sentence, not a google search! Like "Find me this information (...)" rather than a few keywords.
120 | """,
121 | provide_run_summary=True,
122 | )
123 | text_webbrowser_agent.prompt_templates["managed_agent"]["task"] += """You can navigate to .txt online files.
124 | If a non-html page is in another format, especially .pdf or a Youtube video, use tool 'inspect_file_as_text' to inspect it.
125 | Additionally, if after some searching you find out that you need more information to answer the question, you can use `final_answer` with your request for clarification as argument to request for more information."""
126 |
127 | manager_agent = CodeAgent(
128 | model=model,
129 | tools=[visualizer, TextInspectorTool(model, text_limit)],
130 | max_steps=12,
131 | verbosity_level=2,
132 | additional_authorized_imports=AUTHORIZED_IMPORTS,
133 | planning_interval=4,
134 | managed_agents=[text_webbrowser_agent],
135 | )
136 |
137 | return manager_agent
138 |
139 |
140 | def main():
141 | args = parse_args()
142 |
143 | agent = create_agent(model_id=args.model_id)
144 |
145 | answer = agent.run(args.question)
146 |
147 | print(f"Got this answer: {answer}")
148 |
149 |
150 | if __name__ == "__main__":
151 | main()
152 |
--------------------------------------------------------------------------------
/docs/source/zh/tutorials/memory.mdx:
--------------------------------------------------------------------------------
1 |
16 | # 📚 管理Agent的记忆
17 |
18 | [[open-in-colab]]
19 |
20 | 归根结底,Agent可以定义为由几个简单组件构成:它拥有工具、提示词。最重要的是,它具备对过往步骤的记忆,能够追溯完整的规划、执行和错误历史。
21 |
22 | ### 回放Agent的记忆
23 |
24 | 我们提供了多项功能来审查Agent的过往运行记录。
25 |
26 | 您可以通过插装(instrumentation)在可视化界面中查看Agent的运行过程,该界面支持对特定步骤进行缩放操作,具体方法参见[插装指南](./inspect_runs)。
27 |
28 | 您也可以使用`agent.replay()`方法实现回放:
29 |
30 | 当Agent完成运行后:
31 | ```py
32 | from smolagents import HfApiModel, CodeAgent
33 |
34 | agent = CodeAgent(tools=[], model=HfApiModel(), verbosity_level=0)
35 |
36 | result = agent.run("What's the 20th Fibonacci number?")
37 | ```
38 |
39 | 若要回放最近一次运行,只需使用:
40 | ```py
41 | agent.replay()
42 | ```
43 |
44 | ### 动态修改Agent的记忆
45 |
46 | 许多高级应用场景需要对Agent的记忆进行动态修改。
47 |
48 | 您可以通过以下方式访问Agent的记忆:
49 |
50 | ```py
51 | from smolagents import ActionStep
52 |
53 | system_prompt_step = agent.memory.system_prompt
54 | print("The system prompt given to the agent was:")
55 | print(system_prompt_step.system_prompt)
56 |
57 | task_step = agent.memory.steps[0]
58 | print("\n\nThe first task step was:")
59 | print(task_step.task)
60 |
61 | for step in agent.memory.steps:
62 | if isinstance(step, ActionStep):
63 | if step.error is not None:
64 | print(f"\nStep {step.step_number} got this error:\n{step.error}\n")
65 | else:
66 | print(f"\nStep {step.step_number} got these observations:\n{step.observations}\n")
67 | ```
68 |
69 | 使用`agent.memory.get_full_steps()`可获取完整步骤字典数据。
70 |
71 | 您还可以通过步骤回调(step callbacks)实现记忆的动态修改。
72 |
73 | 步骤回调函数可通过参数直接访问`agent`对象,因此能够访问所有记忆步骤并根据需要进行修改。例如,假设您正在监控网页浏览Agent每个步骤的屏幕截图,希望保留最新截图同时删除旧步骤的图片以节省token消耗。
74 |
75 | 可参考以下代码示例:
76 | _注:此代码片段不完整,部分导入语句和对象定义已精简,完整代码请访问[原始脚本](https://github.com/huggingface/smolagents/blob/main/src/smolagents/vision_web_browser.py)_
77 |
78 | ```py
79 | import helium
80 | from PIL import Image
81 | from io import BytesIO
82 | from time import sleep
83 |
84 | def update_screenshot(memory_step: ActionStep, agent: CodeAgent) -> None:
85 | sleep(1.0) # Let JavaScript animations happen before taking the screenshot
86 | driver = helium.get_driver()
87 | latest_step = memory_step.step_number
88 | for previous_memory_step in agent.memory.steps: # Remove previous screenshots from logs for lean processing
89 | if isinstance(previous_memory_step, ActionStep) and previous_memory_step.step_number <= latest_step - 2:
90 | previous_memory_step.observations_images = None
91 | png_bytes = driver.get_screenshot_as_png()
92 | image = Image.open(BytesIO(png_bytes))
93 | memory_step.observations_images = [image.copy()]
94 | ```
95 |
96 | 最后在初始化Agent时,将此函数传入`step_callbacks`参数:
97 |
98 | ```py
99 | CodeAgent(
100 | tools=[DuckDuckGoSearchTool(), go_back, close_popups, search_item_ctrl_f],
101 | model=model,
102 | additional_authorized_imports=["helium"],
103 | step_callbacks=[update_screenshot],
104 | max_steps=20,
105 | verbosity_level=2,
106 | )
107 | ```
108 |
109 | 请访问我们的 [vision web browser code](https://github.com/huggingface/smolagents/blob/main/src/smolagents/vision_web_browser.py) 查看完整可运行示例。
110 |
111 | ### 分步运行 Agents
112 |
113 | 当您需要处理耗时数天的工具调用时,这种方式特别有用:您可以逐步执行Agents。这还允许您在每一步更新记忆。
114 |
115 | ```py
116 | from smolagents import HfApiModel, CodeAgent, ActionStep, TaskStep
117 |
118 | agent = CodeAgent(tools=[], model=HfApiModel(), verbosity_level=1)
119 | print(agent.memory.system_prompt)
120 |
121 | task = "What is the 20th Fibonacci number?"
122 |
123 | # You could modify the memory as needed here by inputting the memory of another agent.
124 | # agent.memory.steps = previous_agent.memory.steps
125 |
126 | # Let's start a new task!
127 | agent.memory.steps.append(TaskStep(task=task, task_images=[]))
128 |
129 | final_answer = None
130 | step_number = 1
131 | while final_answer is None and step_number <= 10:
132 | memory_step = ActionStep(
133 | step_number=step_number,
134 | observations_images=[],
135 | )
136 | # Run one step.
137 | final_answer = agent.step(memory_step)
138 | agent.memory.steps.append(memory_step)
139 | step_number += 1
140 |
141 | # Change the memory as you please!
142 | # For instance to update the latest step:
143 | # agent.memory.steps[-1] = ...
144 |
145 | print("The final answer is:", final_answer)
146 | ```
--------------------------------------------------------------------------------
/examples/open_deep_research/scripts/text_inspector_tool.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 |
3 | from smolagents import Tool
4 | from smolagents.models import MessageRole, Model
5 |
6 | from .mdconvert import MarkdownConverter
7 |
8 |
9 | class TextInspectorTool(Tool):
10 | name = "inspect_file_as_text"
11 | description = """
12 | You cannot load files yourself: instead call this tool to read a file as markdown text and ask questions about it.
13 | This tool handles the following file extensions: [".html", ".htm", ".xlsx", ".pptx", ".wav", ".mp3", ".m4a", ".flac", ".pdf", ".docx"], and all other types of text files. IT DOES NOT HANDLE IMAGES."""
14 |
15 | inputs = {
16 | "file_path": {
17 | "description": "The path to the file you want to read as text. Must be a '.something' file, like '.pdf'. If it is an image, use the visualizer tool instead! DO NOT use this tool for an HTML webpage: use the web_search tool instead!",
18 | "type": "string",
19 | },
20 | "question": {
21 | "description": "[Optional]: Your question, as a natural language sentence. Provide as much context as possible. Do not pass this parameter if you just want to directly return the content of the file.",
22 | "type": "string",
23 | "nullable": True,
24 | },
25 | }
26 | output_type = "string"
27 | md_converter = MarkdownConverter()
28 |
29 | def __init__(self, model: Model, text_limit: int):
30 | super().__init__()
31 | self.model = model
32 | self.text_limit = text_limit
33 |
34 | def forward_initial_exam_mode(self, file_path, question):
35 | result = self.md_converter.convert(file_path)
36 |
37 | if file_path[-4:] in [".png", ".jpg"]:
38 | raise Exception("Cannot use inspect_file_as_text tool with images: use visualizer instead!")
39 |
40 | if ".zip" in file_path:
41 | return result.text_content
42 |
43 | if not question:
44 | return result.text_content
45 |
46 | if len(result.text_content) < 4000:
47 | return "Document content: " + result.text_content
48 |
49 | messages = [
50 | {
51 | "role": MessageRole.SYSTEM,
52 | "content": [
53 | {
54 | "type": "text",
55 | "text": "Here is a file:\n### "
56 | + str(result.title)
57 | + "\n\n"
58 | + result.text_content[: self.text_limit],
59 | }
60 | ],
61 | },
62 | {
63 | "role": MessageRole.USER,
64 | "content": [
65 | {
66 | "type": "text",
67 | "text": "Now please write a short, 5 sentence caption for this document, that could help someone asking this question: "
68 | + question
69 | + "\n\nDon't answer the question yourself! Just provide useful notes on the document",
70 | }
71 | ],
72 | },
73 | ]
74 | return self.model(messages).content
75 |
76 | def forward(self, file_path, question: Optional[str] = None) -> str:
77 | result = self.md_converter.convert(file_path)
78 |
79 | if file_path[-4:] in [".png", ".jpg"]:
80 | raise Exception("Cannot use inspect_file_as_text tool with images: use visualizer instead!")
81 |
82 | if ".zip" in file_path:
83 | return result.text_content
84 |
85 | if not question:
86 | return result.text_content
87 |
88 | messages = [
89 | {
90 | "role": MessageRole.SYSTEM,
91 | "content": [
92 | {
93 | "type": "text",
94 | "text": "You will have to write a short caption for this file, then answer this question:"
95 | + question,
96 | }
97 | ],
98 | },
99 | {
100 | "role": MessageRole.USER,
101 | "content": [
102 | {
103 | "type": "text",
104 | "text": "Here is the complete file:\n### "
105 | + str(result.title)
106 | + "\n\n"
107 | + result.text_content[: self.text_limit],
108 | }
109 | ],
110 | },
111 | {
112 | "role": MessageRole.USER,
113 | "content": [
114 | {
115 | "type": "text",
116 | "text": "Now answer the question below. Use these three headings: '1. Short answer', '2. Extremely detailed answer', '3. Additional Context on the document and question asked'."
117 | + question,
118 | }
119 | ],
120 | },
121 | ]
122 | return self.model(messages).content
123 |
--------------------------------------------------------------------------------
/tests/test_default_tools.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2024 HuggingFace Inc.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | import unittest
16 |
17 | import pytest
18 |
19 | from smolagents.agent_types import _AGENT_TYPE_MAPPING
20 | from smolagents.default_tools import (
21 | DuckDuckGoSearchTool,
22 | PythonInterpreterTool,
23 | SpeechToTextTool,
24 | VisitWebpageTool,
25 | WikipediaSearchTool,
26 | )
27 |
28 | from .test_tools import ToolTesterMixin
29 |
30 |
31 | class DefaultToolTests(unittest.TestCase):
32 | def test_visit_webpage(self):
33 | arguments = {"url": "https://en.wikipedia.org/wiki/United_States_Secretary_of_Homeland_Security"}
34 | result = VisitWebpageTool()(arguments)
35 | assert isinstance(result, str)
36 | assert "* [About Wikipedia](/wiki/Wikipedia:About)" in result # Proper wikipedia pages have an About
37 |
38 | def test_ddgs_with_kwargs(self):
39 | result = DuckDuckGoSearchTool(timeout=20)("DeepSeek parent company")
40 | assert isinstance(result, str)
41 |
42 |
43 | class TestPythonInterpreterTool(ToolTesterMixin):
44 | def setup_method(self):
45 | self.tool = PythonInterpreterTool(authorized_imports=["numpy"])
46 | self.tool.setup()
47 |
48 | def test_exact_match_arg(self):
49 | result = self.tool("(2 / 2) * 4")
50 | assert result == "Stdout:\n\nOutput: 4.0"
51 |
52 | def test_exact_match_kwarg(self):
53 | result = self.tool(code="(2 / 2) * 4")
54 | assert result == "Stdout:\n\nOutput: 4.0"
55 |
56 | def test_agent_type_output(self):
57 | inputs = ["2 * 2"]
58 | output = self.tool(*inputs, sanitize_inputs_outputs=True)
59 | output_type = _AGENT_TYPE_MAPPING[self.tool.output_type]
60 | assert isinstance(output, output_type)
61 |
62 | def test_agent_types_inputs(self):
63 | inputs = ["2 * 2"]
64 | _inputs = []
65 |
66 | for _input, expected_input in zip(inputs, self.tool.inputs.values()):
67 | input_type = expected_input["type"]
68 | if isinstance(input_type, list):
69 | _inputs.append([_AGENT_TYPE_MAPPING[_input_type](_input) for _input_type in input_type])
70 | else:
71 | _inputs.append(_AGENT_TYPE_MAPPING[input_type](_input))
72 |
73 | # Should not raise an error
74 | output = self.tool(*inputs, sanitize_inputs_outputs=True)
75 | output_type = _AGENT_TYPE_MAPPING[self.tool.output_type]
76 | assert isinstance(output, output_type)
77 |
78 | def test_imports_work(self):
79 | result = self.tool("import numpy as np")
80 | assert "import from numpy is not allowed" not in result.lower()
81 |
82 | def test_unauthorized_imports_fail(self):
83 | with pytest.raises(Exception) as e:
84 | self.tool("import sympy as sp")
85 | assert "sympy" in str(e).lower()
86 |
87 |
88 | class TestSpeechToTextTool:
89 | def test_new_instance(self):
90 | from transformers.models.whisper import WhisperForConditionalGeneration, WhisperProcessor
91 |
92 | tool = SpeechToTextTool()
93 | assert tool is not None
94 | assert tool.pre_processor_class == WhisperProcessor
95 | assert tool.model_class == WhisperForConditionalGeneration
96 |
97 |
98 | @pytest.mark.parametrize(
99 | "language, content_type, extract_format, query",
100 | [
101 | ("en", "summary", "HTML", "Python_(programming_language)"), # English, Summary Mode, HTML format
102 | ("en", "text", "WIKI", "Python_(programming_language)"), # English, Full Text Mode, WIKI format
103 | ("es", "summary", "HTML", "Python_(lenguaje_de_programación)"), # Spanish, Summary Mode, HTML format
104 | ("es", "text", "WIKI", "Python_(lenguaje_de_programación)"), # Spanish, Full Text Mode, WIKI format
105 | ],
106 | )
107 | def test_wikipedia_search(language, content_type, extract_format, query):
108 | tool = WikipediaSearchTool(
109 | user_agent="TestAgent (test@example.com)",
110 | language=language,
111 | content_type=content_type,
112 | extract_format=extract_format,
113 | )
114 |
115 | result = tool.forward(query)
116 |
117 | assert isinstance(result, str), "Output should be a string"
118 | assert "✅ **Wikipedia Page:**" in result, "Response should contain Wikipedia page title"
119 | assert "🔗 **Read more:**" in result, "Response should contain Wikipedia page URL"
120 |
121 | if content_type == "summary":
122 | assert len(result.split()) < 1000, "Summary mode should return a shorter text"
123 | if content_type == "text":
124 | assert len(result.split()) > 1000, "Full text mode should return a longer text"
125 |
--------------------------------------------------------------------------------
/src/smolagents/cli.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding=utf-8
3 |
4 | # Copyright 2025 The HuggingFace Inc. team. All rights reserved.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | import argparse
18 | import os
19 |
20 | from dotenv import load_dotenv
21 |
22 | from smolagents import CodeAgent, HfApiModel, LiteLLMModel, Model, OpenAIServerModel, Tool, TransformersModel
23 | from smolagents.default_tools import TOOL_MAPPING
24 |
25 |
26 | leopard_prompt = "How many seconds would it take for a leopard at full speed to run through Pont des Arts?"
27 |
28 |
29 | def parse_arguments():
30 | parser = argparse.ArgumentParser(description="Run a CodeAgent with all specified parameters")
31 | parser.add_argument(
32 | "prompt",
33 | type=str,
34 | nargs="?", # Makes it optional
35 | default=leopard_prompt,
36 | help="The prompt to run with the agent",
37 | )
38 | parser.add_argument(
39 | "--model-type",
40 | type=str,
41 | default="HfApiModel",
42 | help="The model type to use (e.g., HfApiModel, OpenAIServerModel, LiteLLMModel, TransformersModel)",
43 | )
44 | parser.add_argument(
45 | "--model-id",
46 | type=str,
47 | default="Qwen/Qwen2.5-Coder-32B-Instruct",
48 | help="The model ID to use for the specified model type",
49 | )
50 | parser.add_argument(
51 | "--imports",
52 | nargs="*", # accepts zero or more arguments
53 | default=[],
54 | help="Space-separated list of imports to authorize (e.g., 'numpy pandas')",
55 | )
56 | parser.add_argument(
57 | "--tools",
58 | nargs="*",
59 | default=["web_search"],
60 | help="Space-separated list of tools that the agent can use (e.g., 'tool1 tool2 tool3')",
61 | )
62 | parser.add_argument(
63 | "--verbosity-level",
64 | type=int,
65 | default=1,
66 | help="The verbosity level, as an int in [0, 1, 2].",
67 | )
68 | group = parser.add_argument_group("api options", "Options for API-based model types")
69 | group.add_argument(
70 | "--api-base",
71 | type=str,
72 | help="The base URL for the model",
73 | )
74 | group.add_argument(
75 | "--api-key",
76 | type=str,
77 | help="The API key for the model",
78 | )
79 | return parser.parse_args()
80 |
81 |
82 | def load_model(model_type: str, model_id: str, api_base: str | None = None, api_key: str | None = None) -> Model:
83 | if model_type == "OpenAIServerModel":
84 | return OpenAIServerModel(
85 | api_key=api_key or os.getenv("FIREWORKS_API_KEY"),
86 | api_base=api_base or "https://api.fireworks.ai/inference/v1",
87 | model_id=model_id,
88 | )
89 | elif model_type == "LiteLLMModel":
90 | return LiteLLMModel(
91 | model_id=model_id,
92 | api_key=api_key,
93 | api_base=api_base,
94 | )
95 | elif model_type == "TransformersModel":
96 | return TransformersModel(model_id=model_id, device_map="auto")
97 | elif model_type == "HfApiModel":
98 | return HfApiModel(
99 | model_id=model_id,
100 | token=api_key or os.getenv("HF_API_KEY"),
101 | )
102 | else:
103 | raise ValueError(f"Unsupported model type: {model_type}")
104 |
105 |
106 | def run_smolagent(
107 | prompt: str,
108 | tools: list[str],
109 | model_type: str,
110 | model_id: str,
111 | api_base: str | None = None,
112 | api_key: str | None = None,
113 | imports: list[str] | None = None,
114 | ) -> None:
115 | load_dotenv()
116 |
117 | model = load_model(model_type, model_id, api_base=api_base, api_key=api_key)
118 |
119 | available_tools = []
120 | for tool_name in tools:
121 | if "/" in tool_name:
122 | available_tools.append(Tool.from_space(tool_name))
123 | else:
124 | if tool_name in TOOL_MAPPING:
125 | available_tools.append(TOOL_MAPPING[tool_name]())
126 | else:
127 | raise ValueError(f"Tool {tool_name} is not recognized either as a default tool or a Space.")
128 |
129 | print(f"Running agent with these tools: {tools}")
130 | agent = CodeAgent(tools=available_tools, model=model, additional_authorized_imports=imports)
131 |
132 | agent.run(prompt)
133 |
134 |
135 | def main() -> None:
136 | args = parse_arguments()
137 | run_smolagent(
138 | args.prompt,
139 | args.tools,
140 | args.model_type,
141 | args.model_id,
142 | api_base=args.api_base,
143 | api_key=args.api_key,
144 | imports=args.imports,
145 | )
146 |
147 |
148 | if __name__ == "__main__":
149 | main()
150 |
--------------------------------------------------------------------------------
/tests/test_cli.py:
--------------------------------------------------------------------------------
1 | from unittest.mock import patch
2 |
3 | import pytest
4 |
5 | from smolagents.cli import load_model
6 | from smolagents.local_python_executor import LocalPythonExecutor
7 | from smolagents.models import HfApiModel, LiteLLMModel, OpenAIServerModel, TransformersModel
8 |
9 |
10 | @pytest.fixture
11 | def set_env_vars(monkeypatch):
12 | monkeypatch.setenv("FIREWORKS_API_KEY", "test_fireworks_api_key")
13 | monkeypatch.setenv("HF_TOKEN", "test_hf_api_key")
14 |
15 |
16 | def test_load_model_openai_server_model(set_env_vars):
17 | with patch("openai.OpenAI") as MockOpenAI:
18 | model = load_model("OpenAIServerModel", "test_model_id")
19 | assert isinstance(model, OpenAIServerModel)
20 | assert model.model_id == "test_model_id"
21 | assert MockOpenAI.call_count == 1
22 | assert MockOpenAI.call_args.kwargs["base_url"] == "https://api.fireworks.ai/inference/v1"
23 | assert MockOpenAI.call_args.kwargs["api_key"] == "test_fireworks_api_key"
24 |
25 |
26 | def test_load_model_litellm_model():
27 | model = load_model("LiteLLMModel", "test_model_id", api_key="test_api_key", api_base="https://api.test.com")
28 | assert isinstance(model, LiteLLMModel)
29 | assert model.api_key == "test_api_key"
30 | assert model.api_base == "https://api.test.com"
31 | assert model.model_id == "test_model_id"
32 |
33 |
34 | def test_load_model_transformers_model():
35 | with (
36 | patch(
37 | "transformers.AutoModelForImageTextToText.from_pretrained",
38 | side_effect=ValueError("Unrecognized configuration class"),
39 | ),
40 | patch("transformers.AutoModelForCausalLM.from_pretrained"),
41 | patch("transformers.AutoTokenizer.from_pretrained"),
42 | ):
43 | model = load_model("TransformersModel", "test_model_id")
44 | assert isinstance(model, TransformersModel)
45 | assert model.model_id == "test_model_id"
46 |
47 |
48 | def test_load_model_hf_api_model(set_env_vars):
49 | with patch("huggingface_hub.InferenceClient") as huggingface_hub_InferenceClient:
50 | model = load_model("HfApiModel", "test_model_id")
51 | assert isinstance(model, HfApiModel)
52 | assert model.model_id == "test_model_id"
53 | assert huggingface_hub_InferenceClient.call_count == 1
54 | assert huggingface_hub_InferenceClient.call_args.kwargs["token"] == "test_hf_api_key"
55 |
56 |
57 | def test_load_model_invalid_model_type():
58 | with pytest.raises(ValueError, match="Unsupported model type: InvalidModel"):
59 | load_model("InvalidModel", "test_model_id")
60 |
61 |
62 | def test_cli_main(capsys):
63 | with patch("smolagents.cli.load_model") as mock_load_model:
64 | mock_load_model.return_value = "mock_model"
65 | with patch("smolagents.cli.CodeAgent") as mock_code_agent:
66 | from smolagents.cli import run_smolagent
67 |
68 | run_smolagent("test_prompt", [], "HfApiModel", "test_model_id")
69 | # load_model
70 | assert len(mock_load_model.call_args_list) == 1
71 | assert mock_load_model.call_args.args == ("HfApiModel", "test_model_id")
72 | assert mock_load_model.call_args.kwargs == {"api_base": None, "api_key": None}
73 | # CodeAgent
74 | assert len(mock_code_agent.call_args_list) == 1
75 | assert mock_code_agent.call_args.args == ()
76 | assert mock_code_agent.call_args.kwargs == {
77 | "tools": [],
78 | "model": "mock_model",
79 | "additional_authorized_imports": None,
80 | }
81 | # agent.run
82 | assert len(mock_code_agent.return_value.run.call_args_list) == 1
83 | assert mock_code_agent.return_value.run.call_args.args == ("test_prompt",)
84 | # print
85 | captured = capsys.readouterr()
86 | assert "Running agent with these tools: []" in captured.out
87 |
88 |
89 | def test_vision_web_browser_main():
90 | with patch("smolagents.vision_web_browser.helium"):
91 | with patch("smolagents.vision_web_browser.load_model") as mock_load_model:
92 | mock_load_model.return_value = "mock_model"
93 | with patch("smolagents.vision_web_browser.CodeAgent") as mock_code_agent:
94 | from smolagents.vision_web_browser import helium_instructions, run_webagent
95 |
96 | run_webagent("test_prompt", "HfApiModel", "test_model_id")
97 | # load_model
98 | assert len(mock_load_model.call_args_list) == 1
99 | assert mock_load_model.call_args.args == ("HfApiModel", "test_model_id")
100 | # CodeAgent
101 | assert len(mock_code_agent.call_args_list) == 1
102 | assert mock_code_agent.call_args.args == ()
103 | assert len(mock_code_agent.call_args.kwargs["tools"]) == 4
104 | assert mock_code_agent.call_args.kwargs["model"] == "mock_model"
105 | assert mock_code_agent.call_args.kwargs["additional_authorized_imports"] == ["helium"]
106 | # agent.python_executor
107 | assert len(mock_code_agent.return_value.python_executor.call_args_list) == 1
108 | assert mock_code_agent.return_value.python_executor.call_args.args == ("from helium import *",)
109 | assert LocalPythonExecutor(["helium"])("from helium import *") == (None, "", False)
110 | # agent.run
111 | assert len(mock_code_agent.return_value.run.call_args_list) == 1
112 | assert mock_code_agent.return_value.run.call_args.args == ("test_prompt" + helium_instructions,)
113 |
--------------------------------------------------------------------------------
/docs/source/hi/tutorials/secure_code_execution.mdx:
--------------------------------------------------------------------------------
1 |
16 | # सुरक्षित कोड एक्जीक्यूशन
17 |
18 | [[open-in-colab]]
19 |
20 | > [!TIP]
21 | > यदि आप एजेंट्स बनाने में नए हैं, तो सबसे पहले [एजेंट्स का परिचय](../conceptual_guides/intro_agents) और [smolagents की गाइडेड टूर](../guided_tour) पढ़ना सुनिश्चित करें।
22 |
23 | ### कोड Agents
24 |
25 | [कई](https://huggingface.co/papers/2402.01030) [शोध](https://huggingface.co/papers/2411.01747) [पत्रों](https://huggingface.co/papers/2401.00812) ने दिखाया है कि LLM द्वारा अपनी क्रियाओं (टूल कॉल्स) को कोड में लिखना, टूल कॉलिंग के वर्तमान मानक प्रारूप से बहुत बेहतर है, जो industry में "टूल्स नेम्स और आर्ग्यूमेंट्स को JSON के रूप में लिखने" के विभिन्न रूप हैं।
26 |
27 | कोड बेहतर क्यों है? क्योंकि हमने अपनी कोड भाषाओं को विशेष रूप से कंप्यूटर द्वारा की जाने वाली क्रियाओं को व्यक्त करने के लिए तैयार किया है। यदि JSON स्निपेट्स एक बेहतर तरीका होता, तो यह पैकेज JSON स्निपेट्स में लिखा गया होता और शैतान हम पर हंस रहा होता।
28 |
29 | कोड कंप्यूटर पर क्रियाएँ व्यक्त करने का बेहतर तरीका है। इसमें बेहतर है:
30 | - **कंपोज़ेबिलिटी:** क्या आप JSON क्रियाओं को एक-दूसरे के भीतर नेस्ट कर सकते हैं, या बाद में पुन: उपयोग करने के लिए JSON क्रियाओं का एक सेट परिभाषित कर सकते हैं, जैसे आप बस एक पायथन फ़ंक्शन परिभाषित कर सकते हैं?
31 | - **ऑब्जेक्ट प्रबंधन:** JSON में `generate_image` जैसी क्रिया का आउटपुट कैसे स्टोर करें?
32 | - **सामान्यता:** कोड किसी भी कंप्यूटर कार्य को व्यक्त करने के लिए बनाया गया है।
33 | - **LLM प्रशिक्षण कॉर्पस में प्रतिनिधित्व:** क्यों न इस आशीर्वाद का लाभ उठाएं कि उच्च गुणवत्ता वाले कोड उदाहरण पहले से ही LLM प्रशिक्षण डेटा में शामिल हैं?
34 |
35 | यह नीचे दी गई छवि में दर्शाया गया है, जो [Executable Code Actions Elicit Better LLM Agents](https://huggingface.co/papers/2402.01030) से ली गई है।
36 |
37 |
38 |
39 | यही कारण है कि हमने कोड एजेंट्स, इस मामले में पायथन एजेंट्स पर जोर दिया, जिसका मतलब सुरक्षित पायथन इंटरप्रेटर बनाने पर अधिक प्रयास करना था।
40 |
41 | ### लोकल पायथन इंटरप्रेटर
42 |
43 | डिफ़ॉल्ट रूप से, `CodeAgent` LLM-जनरेटेड कोड को आपके एनवायरनमेंट में चलाता है।
44 | यह एक्जीक्यूशन वैनिला पायथन इंटरप्रेटर द्वारा नहीं किया जाता: हमने एक अधिक सुरक्षित `LocalPythonExecutor` को शुरू से फिर से बनाया है।
45 | यह इंटरप्रेटर सुरक्षा के लिए डिज़ाइन किया गया है:
46 | - इम्पोर्ट्स को उपयोगकर्ता द्वारा स्पष्ट रूप से पास की गई सूची तक सीमित करना
47 | - इनफिनिट लूप्स और रिसोर्स ब्लोटिंग को रोकने के लिए ऑपरेशंस की संख्या को कैप करना
48 | - कोई भी ऐसा ऑपरेशन नहीं करेगा जो पूर्व-परिभाषित नहीं है
49 |
50 | हमने इसे कई उपयोग मामलों में इस्तेमाल किया है, और कभी भी एनवायरनमेंट को कोई नुकसान नहीं देखा।
51 |
52 | हालांकि यह समाधान पूरी तरह से सुरक्षित नहीं है: कोई ऐसे अवसरों की कल्पना कर सकता है जहां दुर्भावनापूर्ण कार्यों के लिए फाइन-ट्यून किए गए LLM अभी भी आपके एनवायरनमेंट को नुकसान पहुंचा सकते हैं। उदाहरण के लिए यदि आपने छवियों को प्रोसेस करने के लिए `Pillow` जैसे मासूम पैकेज की अनुमति दी है, तो LLM आपकी हार्ड ड्राइव को ब्लोट करने के लिए हजारों छवियों को सेव कर सकता है।
53 | यदि आपने खुद LLM इंजन चुना है तो यह निश्चित रूप से संभावित नहीं है, लेकिन यह हो सकता है।
54 |
55 | तो यदि आप अतिरिक्त सावधानी बरतना चाहते हैं, तो आप नीचे वर्णित रिमोट कोड एक्जीक्यूशन विकल्प का उपयोग कर सकते हैं।
56 |
57 | ### E2B कोड एक्जीक्यूटर
58 |
59 | अधिकतम सुरक्षा के लिए, आप कोड को सैंडबॉक्स्ड एनवायरनमेंट में चलाने के लिए E2B के साथ हमारे एकीकरण का उपयोग कर सकते हैं। यह एक रिमोट एक्जीक्यूशन सेवा है जो आपके कोड को एक आइसोलेटेड कंटेनर में चलाती है, जिससे कोड का आपके स्थानीय एनवायरनमेंट को प्रभावित करना असंभव हो जाता है।
60 |
61 | इसके लिए, आपको अपना E2B अकाउंट सेटअप करने और अपने एनवायरनमेंट वेरिएबल्स में अपना `E2B_API_KEY` सेट करने की आवश्यकता होगी। अधिक जानकारी के लिए [E2B की क्विकस्टार्ट डॉक्यूमेंटेशन](https://e2b.dev/docs/quickstart) पर जाएं।
62 |
63 | फिर आप इसे `pip install e2b-code-interpreter python-dotenv` के साथ इंस्टॉल कर सकते हैं।
64 |
65 | अब आप तैयार हैं!
66 |
67 | कोड एक्जीक्यूटर को E2B पर सेट करने के लिए, बस अपने `CodeAgent` को इनिशियलाइज़ करते समय `executor_type="e2b"` फ्लैग पास करें।
68 | ध्यान दें कि आपको `additional_authorized_imports` में सभी टूल की डिपेंडेंसीज़ जोड़नी चाहिए, ताकि एक्जीक्यूटर उन्हें इंस्टॉल करे।
69 |
70 | ```py
71 | from smolagents import CodeAgent, VisitWebpageTool, HfApiModel
72 | agent = CodeAgent(
73 | tools = [VisitWebpageTool()],
74 | model=HfApiModel(),
75 | additional_authorized_imports=["requests", "markdownify"],
76 | executor_type="e2b"
77 | )
78 |
79 | agent.run("What was Abraham Lincoln's preferred pet?")
80 | ```
81 |
82 | E2B कोड एक्जीक्यूशन वर्तमान में मल्टी-एजेंट्स के साथ काम नहीं करता है - क्योंकि कोड ब्लॉब में एक एजेंट कॉल करना जो रिमोटली एक्जीक्यूट किया जाना चाहिए, यह एक गड़बड़ है। लेकिन हम इसे जोड़ने पर काम कर रहे हैं!
83 |
--------------------------------------------------------------------------------
/docs/source/zh/reference/models.mdx:
--------------------------------------------------------------------------------
1 |
2 |
17 | # 模型
18 |
19 |
57 |
116 |
117 | 与 JSON 片段相比,用代码编写动作提供了更好的:
118 |
119 | - **可组合性:** 你能像定义 python 函数一样,将 JSON 动作嵌套在一起,或定义一组 JSON 动作以供重用吗?
120 | - **对象管理:** 你如何在 JSON 中存储像 `generate_image` 这样的动作的输出?
121 | - **通用性:** 代码被构建为简单地表达任何你可以让计算机做的事情。
122 | - **LLM 训练数据中的表示:** 大量高质量的代码动作已经包含在 LLM 的训练数据中,这意味着它们已经为此进行了训练!
123 |
--------------------------------------------------------------------------------
/tests/test_gradio_ui.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2024 HuggingFace Inc.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | import os
17 | import shutil
18 | import tempfile
19 | import unittest
20 | from unittest.mock import Mock, patch
21 |
22 | from smolagents.gradio_ui import GradioUI
23 |
24 |
25 | class GradioUITester(unittest.TestCase):
26 | def setUp(self):
27 | """Initialize test environment"""
28 | self.temp_dir = tempfile.mkdtemp()
29 | self.mock_agent = Mock()
30 | self.ui = GradioUI(agent=self.mock_agent, file_upload_folder=self.temp_dir)
31 | self.allowed_types = [".pdf", ".docx", ".txt"]
32 |
33 | def tearDown(self):
34 | """Clean up test environment"""
35 | shutil.rmtree(self.temp_dir)
36 |
37 | def test_upload_file_default_types(self):
38 | """Test default allowed file types"""
39 | default_types = [".pdf", ".docx", ".txt"]
40 | for file_type in default_types:
41 | with tempfile.NamedTemporaryFile(suffix=file_type) as temp_file:
42 | mock_file = Mock()
43 | mock_file.name = temp_file.name
44 |
45 | textbox, uploads_log = self.ui.upload_file(mock_file, [])
46 |
47 | self.assertIn("File uploaded:", textbox.value)
48 | self.assertEqual(len(uploads_log), 1)
49 | self.assertTrue(os.path.exists(os.path.join(self.temp_dir, os.path.basename(temp_file.name))))
50 |
51 | def test_upload_file_default_types_disallowed(self):
52 | """Test default disallowed file types"""
53 | disallowed_types = [".exe", ".sh", ".py", ".jpg"]
54 | for file_type in disallowed_types:
55 | with tempfile.NamedTemporaryFile(suffix=file_type) as temp_file:
56 | mock_file = Mock()
57 | mock_file.name = temp_file.name
58 |
59 | textbox, uploads_log = self.ui.upload_file(mock_file, [])
60 |
61 | self.assertEqual(textbox.value, "File type disallowed")
62 | self.assertEqual(len(uploads_log), 0)
63 |
64 | def test_upload_file_success(self):
65 | """Test successful file upload scenario"""
66 | with tempfile.NamedTemporaryFile(suffix=".txt") as temp_file:
67 | mock_file = Mock()
68 | mock_file.name = temp_file.name
69 |
70 | textbox, uploads_log = self.ui.upload_file(mock_file, [])
71 |
72 | self.assertIn("File uploaded:", textbox.value)
73 | self.assertEqual(len(uploads_log), 1)
74 | self.assertTrue(os.path.exists(os.path.join(self.temp_dir, os.path.basename(temp_file.name))))
75 | self.assertEqual(uploads_log[0], os.path.join(self.temp_dir, os.path.basename(temp_file.name)))
76 |
77 | def test_upload_file_none(self):
78 | """Test scenario when no file is selected"""
79 | textbox, uploads_log = self.ui.upload_file(None, [])
80 |
81 | self.assertEqual(textbox.value, "No file uploaded")
82 | self.assertEqual(len(uploads_log), 0)
83 |
84 | def test_upload_file_invalid_type(self):
85 | """Test disallowed file type"""
86 | with tempfile.NamedTemporaryFile(suffix=".exe") as temp_file:
87 | mock_file = Mock()
88 | mock_file.name = temp_file.name
89 |
90 | textbox, uploads_log = self.ui.upload_file(mock_file, [])
91 |
92 | self.assertEqual(textbox.value, "File type disallowed")
93 | self.assertEqual(len(uploads_log), 0)
94 |
95 | def test_upload_file_special_chars(self):
96 | """Test scenario with special characters in filename"""
97 | with tempfile.NamedTemporaryFile(suffix=".txt") as temp_file:
98 | # Create a new temporary file with special characters
99 | special_char_name = os.path.join(os.path.dirname(temp_file.name), "test@#$%^&*.txt")
100 | shutil.copy(temp_file.name, special_char_name)
101 | try:
102 | mock_file = Mock()
103 | mock_file.name = special_char_name
104 |
105 | with patch("shutil.copy"):
106 | textbox, uploads_log = self.ui.upload_file(mock_file, [])
107 |
108 | self.assertIn("File uploaded:", textbox.value)
109 | self.assertEqual(len(uploads_log), 1)
110 | self.assertIn("test_____", uploads_log[0])
111 | finally:
112 | # Clean up the special character file
113 | if os.path.exists(special_char_name):
114 | os.remove(special_char_name)
115 |
116 | def test_upload_file_custom_types(self):
117 | """Test custom allowed file types"""
118 | with tempfile.NamedTemporaryFile(suffix=".csv") as temp_file:
119 | mock_file = Mock()
120 | mock_file.name = temp_file.name
121 |
122 | textbox, uploads_log = self.ui.upload_file(mock_file, [], allowed_file_types=[".csv"])
123 |
124 | self.assertIn("File uploaded:", textbox.value)
125 | self.assertEqual(len(uploads_log), 1)
126 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 |
16 |
17 | # Contribute to smolagents
18 |
19 | Everyone is welcome to contribute, and we value everybody's contribution. Code
20 | contributions are not the only way to help the community. Answering questions, helping
21 | others, and improving the documentation are also immensely valuable.
22 |
23 | It also helps us if you spread the word! Reference the library in blog posts
24 | about the awesome projects it made possible, shout out on Twitter every time it has
25 | helped you, or simply ⭐️ the repository to say thank you.
26 |
27 | However you choose to contribute, please be mindful and respect our
28 | [code of conduct](https://github.com/huggingface/smolagents/blob/main/CODE_OF_CONDUCT.md).
29 |
30 | **This guide was heavily inspired by the awesome [scikit-learn guide to contributing](https://github.com/scikit-learn/scikit-learn/blob/main/CONTRIBUTING.md).**
31 |
32 | ## Ways to contribute
33 |
34 | There are several ways you can contribute to smolagents.
35 |
36 | * Submit issues related to bugs or desired new features.
37 | * Contribute to the examples or to the documentation.
38 | * Fix outstanding issues with the existing code.
39 |
40 | > All contributions are equally valuable to the community. 🥰
41 |
42 | ## Submitting a bug-related issue or feature request
43 |
44 | At any moment, feel welcome to open an issue, citing your exact error traces and package versions if it's a bug.
45 | It's often even better to open a PR with your proposed fixes/changes!
46 |
47 | Do your best to follow these guidelines when submitting a bug-related issue or a feature
48 | request. It will make it easier for us to come back to you quickly and with good
49 | feedback.
50 |
51 | ### Did you find a bug?
52 |
53 | The smolagents library is robust and reliable thanks to users who report the problems they encounter.
54 |
55 | Before you report an issue, we would really appreciate it if you could **make sure the bug was not
56 | already reported** (use the search bar on GitHub under Issues). Your issue should also be related to bugs in the
57 | library itself, and not your code.
58 |
59 | Once you've confirmed the bug hasn't already been reported, please include the following information in your issue so
60 | we can quickly resolve it:
61 |
62 | * Your **OS type and version**, as well as your environment versions (versions of rust, python, and dependencies).
63 | * A short, self-contained, code snippet that allows us to reproduce the bug.
64 | * The *full* traceback if an exception is raised.
65 | * Attach any other additional information, like screenshots, you think may help.
66 |
67 | ### Do you want a new feature?
68 |
69 | If there is a new feature you'd like to see in smolagents, please open an issue and describe:
70 |
71 | 1. What is the *motivation* behind this feature? Is it related to a problem or frustration with the library? Is it
72 | a feature related to something you need for a project? Is it something you worked on and think it could benefit
73 | the community?
74 |
75 | Whatever it is, we'd love to hear about it!
76 |
77 | 2. Describe your requested feature in as much detail as possible. The more you can tell us about it, the better
78 | we'll be able to help you.
79 | 3. Provide a *code snippet* that demonstrates the feature's usage.
80 | 4. If the feature is related to a paper, please include a link.
81 |
82 | If your issue is well written we're already 80% of the way there by the time you create it.
83 |
84 | ## Do you want to add documentation?
85 |
86 | We're always looking for improvements to the documentation that make it more clear and accurate. Please let us know
87 | how the documentation can be improved such as typos and any content that is missing, unclear or inaccurate. We'll be
88 | happy to make the changes or help you make a contribution if you're interested!
89 |
90 | ## Fixing outstanding issues
91 |
92 | If you notice an issue with the existing code and have a fix in mind, feel free to [start contributing](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request) and open
93 | a Pull Request!
94 |
95 | ### Making code changes
96 |
97 | To install dev dependencies, run:
98 | ```
99 | pip install -e ".[dev]"
100 | ```
101 |
102 | When making changes to the codebase, please check that it follows the repo's code quality requirements by running:
103 | To check code quality of the source code:
104 | ```
105 | make quality
106 | ```
107 |
108 | If the checks fail, you can run the formatter with:
109 | ```
110 | make style
111 | ```
112 |
113 | And commit the changes.
114 |
115 | To run tests locally, run this command:
116 | ```bash
117 | make test
118 | ```
119 |
120 |
121 | ## I want to become a maintainer of the project. How do I get there?
122 |
123 | smolagents is a project led and managed by Hugging Face. We are more than
124 | happy to have motivated individuals from other organizations join us as maintainers with the goal of helping smolagents
125 | make a dent in the world of Agents.
126 |
127 | If you are such an individual (or organization), please reach out to us and let's collaborate.
128 |
--------------------------------------------------------------------------------
/docs/source/zh/examples/rag.mdx:
--------------------------------------------------------------------------------
1 |
16 | # Agentic RAG
17 |
18 | [[open-in-colab]]
19 |
20 | Retrieval-Augmented-Generation (RAG) 是“使用大语言模型(LLM)来回答用户查询,但基于从知识库中检索的信息”。它比使用普通或微调的 LLM 具有许多优势:举几个例子,它允许将答案基于真实事实并减少虚构;它允许提供 LLM 领域特定的知识;并允许对知识库中的信息访问进行精细控制。
21 |
22 | 但是,普通的 RAG 存在一些局限性,以下两点尤为突出:
23 |
24 | - 它只执行一次检索步骤:如果结果不好,生成的内容也会不好。
25 | - 语义相似性是以用户查询为参考计算的,这可能不是最优的:例如,用户查询通常是一个问题,而包含真实答案的文档通常是肯定语态,因此其相似性得分会比其他以疑问形式呈现的源文档低,从而导致错失相关信息的风险。
26 |
27 | 我们可以通过制作一个 RAG agent来缓解这些问题:非常简单,一个配备了检索工具的agent!这个 agent 将
28 | 会:✅ 自己构建查询和检索,✅ 如果需要的话会重新检索。
29 |
30 | 因此,它将比普通 RAG 更智能,因为它可以自己构建查询,而不是直接使用用户查询作为参考。这样,它可以更
31 | 接近目标文档,从而提高检索的准确性, [HyDE](https://huggingface.co/papers/2212.10496)。此 agent 可以
32 | 使用生成的片段,并在需要时重新检索,就像 [Self-Query](https://docs.llamaindex.ai/en/stable/examples/evaluation/RetryQuery/)。
33 |
34 | 我们现在开始构建这个系统. 🛠️
35 |
36 | 运行以下代码以安装所需的依赖包:
37 | ```bash
38 | !pip install smolagents pandas langchain langchain-community sentence-transformers rank_bm25 --upgrade -q
39 | ```
40 |
41 | 你需要一个有效的 token 作为环境变量 `HF_TOKEN` 来调用 HF Inference API。我们使用 python-dotenv 来加载它。
42 | ```py
43 | from dotenv import load_dotenv
44 | load_dotenv()
45 | ```
46 |
47 | 我们首先加载一个知识库以在其上执行 RAG:此数据集是许多 Hugging Face 库的文档页面的汇编,存储为 markdown 格式。我们将仅保留 `transformers` 库的文档。然后通过处理数据集并将其存储到向量数据库中,为检索器准备知识库。我们将使用 [LangChain](https://python.langchain.com/docs/introduction/) 来利用其出色的向量数据库工具。
48 | ```py
49 | import datasets
50 | from langchain.docstore.document import Document
51 | from langchain.text_splitter import RecursiveCharacterTextSplitter
52 | from langchain_community.retrievers import BM25Retriever
53 |
54 | knowledge_base = datasets.load_dataset("m-ric/huggingface_doc", split="train")
55 | knowledge_base = knowledge_base.filter(lambda row: row["source"].startswith("huggingface/transformers"))
56 |
57 | source_docs = [
58 | Document(page_content=doc["text"], metadata={"source": doc["source"].split("/")[1]})
59 | for doc in knowledge_base
60 | ]
61 |
62 | text_splitter = RecursiveCharacterTextSplitter(
63 | chunk_size=500,
64 | chunk_overlap=50,
65 | add_start_index=True,
66 | strip_whitespace=True,
67 | separators=["\n\n", "\n", ".", " ", ""],
68 | )
69 | docs_processed = text_splitter.split_documents(source_docs)
70 | ```
71 |
72 | 现在文档已准备好。我们来一起构建我们的 agent RAG 系统!
73 | 👉 我们只需要一个 RetrieverTool,我们的 agent 可以利用它从知识库中检索信息。
74 |
75 | 由于我们需要将 vectordb 添加为工具的属性,我们不能简单地使用带有 `@tool` 装饰器的简单工具构造函数:因此我们将遵循 [tools 教程](../tutorials/tools) 中突出显示的高级设置。
76 |
77 | ```py
78 | from smolagents import Tool
79 |
80 | class RetrieverTool(Tool):
81 | name = "retriever"
82 | description = "Uses semantic search to retrieve the parts of transformers documentation that could be most relevant to answer your query."
83 | inputs = {
84 | "query": {
85 | "type": "string",
86 | "description": "The query to perform. This should be semantically close to your target documents. Use the affirmative form rather than a question.",
87 | }
88 | }
89 | output_type = "string"
90 |
91 | def __init__(self, docs, **kwargs):
92 | super().__init__(**kwargs)
93 | self.retriever = BM25Retriever.from_documents(
94 | docs, k=10
95 | )
96 |
97 | def forward(self, query: str) -> str:
98 | assert isinstance(query, str), "Your search query must be a string"
99 |
100 | docs = self.retriever.invoke(
101 | query,
102 | )
103 | return "\nRetrieved documents:\n" + "".join(
104 | [
105 | f"\n\n===== Document {str(i)} =====\n" + doc.page_content
106 | for i, doc in enumerate(docs)
107 | ]
108 | )
109 |
110 | retriever_tool = RetrieverTool(docs_processed)
111 | ```
112 | BM25 检索方法是一个经典的检索方法,因为它的设置速度非常快。为了提高检索准确性,你可以使用语义搜索,使用文档的向量表示替换 BM25:因此你可以前往 [MTEB Leaderboard](https://huggingface.co/spaces/mteb/leaderboard) 选择一个好的嵌入模型。
113 |
114 | 现在我们已经创建了一个可以从知识库中检索信息的工具,现在我们可以很容易地创建一个利用这个
115 | `retriever_tool` 的 agent!此 agent 将使用如下参数初始化:
116 | - `tools`:代理将能够调用的工具列表。
117 | - `model`:为代理提供动力的 LLM。
118 |
119 | 我们的 `model` 必须是一个可调用对象,它接受一个消息的 list 作为输入,并返回文本。它还需要接受一个 stop_sequences 参数,指示何时停止生成。为了方便起见,我们直接使用包中提供的 `HfEngine` 类来获取调用 Hugging Face 的 Inference API 的 LLM 引擎。
120 |
121 | 接着,我们将使用 [meta-llama/Llama-3.3-70B-Instruct](meta-llama/Llama-3.3-70B-Instruct) 作为 llm 引
122 | 擎,因为:
123 | - 它有一个长 128k 上下文,这对处理长源文档很有用。
124 | - 它在 HF 的 Inference API 上始终免费提供!
125 |
126 | _Note:_ 此 Inference API 托管基于各种标准的模型,部署的模型可能会在没有事先通知的情况下进行更新或替换。了解更多信息,请点击[这里](https://huggingface.co/docs/api-inference/supported-models)。
127 |
128 | ```py
129 | from smolagents import HfApiModel, CodeAgent
130 |
131 | agent = CodeAgent(
132 | tools=[retriever_tool], model=HfApiModel(model_id="meta-llama/Llama-3.3-70B-Instruct"), max_steps=4, verbose=True
133 | )
134 | ```
135 |
136 | 当我们初始化 CodeAgent 时,它已经自动获得了一个默认的系统提示,告诉 LLM 引擎按步骤处理并生成工具调用作为代码片段,但你可以根据需要替换此提示模板。接着,当其 `.run()` 方法被调用时,代理将负责调用 LLM 引擎,并在循环中执行工具调用,直到工具 `final_answer` 被调用,而其参数为最终答案。
137 |
138 | ```py
139 | agent_output = agent.run("For a transformers model training, which is slower, the forward or the backward pass?")
140 |
141 | print("Final output:")
142 | print(agent_output)
143 | ```
144 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 |
2 | # Contributor Covenant Code of Conduct
3 |
4 | ## Our Pledge
5 |
6 | We as members, contributors, and leaders pledge to make participation in our
7 | community a harassment-free experience for everyone, regardless of age, body
8 | size, visible or invisible disability, ethnicity, sex characteristics, gender
9 | identity and expression, level of experience, education, socio-economic status,
10 | nationality, personal appearance, race, caste, color, religion, or sexual
11 | identity and orientation.
12 |
13 | We pledge to act and interact in ways that contribute to an open, welcoming,
14 | diverse, inclusive, and healthy community.
15 |
16 | ## Our Standards
17 |
18 | Examples of behavior that contributes to a positive environment for our
19 | community include:
20 |
21 | * Demonstrating empathy and kindness toward other people
22 | * Being respectful of differing opinions, viewpoints, and experiences
23 | * Giving and gracefully accepting constructive feedback
24 | * Accepting responsibility and apologizing to those affected by our mistakes,
25 | and learning from the experience
26 | * Focusing on what is best not just for us as individuals, but for the overall
27 | community
28 |
29 | Examples of unacceptable behavior include:
30 |
31 | * The use of sexualized language or imagery, and sexual attention or advances of
32 | any kind
33 | * Trolling, insulting or derogatory comments, and personal or political attacks
34 | * Public or private harassment
35 | * Publishing others' private information, such as a physical or email address,
36 | without their explicit permission
37 | * Other conduct which could reasonably be considered inappropriate in a
38 | professional setting
39 |
40 | ## Enforcement Responsibilities
41 |
42 | Community leaders are responsible for clarifying and enforcing our standards of
43 | acceptable behavior and will take appropriate and fair corrective action in
44 | response to any behavior that they deem inappropriate, threatening, offensive,
45 | or harmful.
46 |
47 | Community leaders have the right and responsibility to remove, edit, or reject
48 | comments, commits, code, wiki edits, issues, and other contributions that are
49 | not aligned to this Code of Conduct, and will communicate reasons for moderation
50 | decisions when appropriate.
51 |
52 | ## Scope
53 |
54 | This Code of Conduct applies within all community spaces, and also applies when
55 | an individual is officially representing the community in public spaces.
56 | Examples of representing our community include using an official e-mail address,
57 | posting via an official social media account, or acting as an appointed
58 | representative at an online or offline event.
59 |
60 | ## Enforcement
61 |
62 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
63 | reported to the community leaders responsible for enforcement at
64 | feedback@huggingface.co.
65 | All complaints will be reviewed and investigated promptly and fairly.
66 |
67 | All community leaders are obligated to respect the privacy and security of the
68 | reporter of any incident.
69 |
70 | ## Enforcement Guidelines
71 |
72 | Community leaders will follow these Community Impact Guidelines in determining
73 | the consequences for any action they deem in violation of this Code of Conduct:
74 |
75 | ### 1. Correction
76 |
77 | **Community Impact**: Use of inappropriate language or other behavior deemed
78 | unprofessional or unwelcome in the community.
79 |
80 | **Consequence**: A private, written warning from community leaders, providing
81 | clarity around the nature of the violation and an explanation of why the
82 | behavior was inappropriate. A public apology may be requested.
83 |
84 | ### 2. Warning
85 |
86 | **Community Impact**: A violation through a single incident or series of
87 | actions.
88 |
89 | **Consequence**: A warning with consequences for continued behavior. No
90 | interaction with the people involved, including unsolicited interaction with
91 | those enforcing the Code of Conduct, for a specified period of time. This
92 | includes avoiding interactions in community spaces as well as external channels
93 | like social media. Violating these terms may lead to a temporary or permanent
94 | ban.
95 |
96 | ### 3. Temporary Ban
97 |
98 | **Community Impact**: A serious violation of community standards, including
99 | sustained inappropriate behavior.
100 |
101 | **Consequence**: A temporary ban from any sort of interaction or public
102 | communication with the community for a specified period of time. No public or
103 | private interaction with the people involved, including unsolicited interaction
104 | with those enforcing the Code of Conduct, is allowed during this period.
105 | Violating these terms may lead to a permanent ban.
106 |
107 | ### 4. Permanent Ban
108 |
109 | **Community Impact**: Demonstrating a pattern of violation of community
110 | standards, including sustained inappropriate behavior, harassment of an
111 | individual, or aggression toward or disparagement of classes of individuals.
112 |
113 | **Consequence**: A permanent ban from any sort of public interaction within the
114 | community.
115 |
116 | ## Attribution
117 |
118 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
119 | version 2.1, available at
120 | [https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
121 |
122 | Community Impact Guidelines were inspired by
123 | [Mozilla's code of conduct enforcement ladder][Mozilla CoC].
124 |
125 | For answers to common questions about this code of conduct, see the FAQ at
126 | [https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
127 | [https://www.contributor-covenant.org/translations][translations].
128 |
129 | [homepage]: https://www.contributor-covenant.org
130 | [v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
131 | [Mozilla CoC]: https://github.com/mozilla/diversity
132 | [FAQ]: https://www.contributor-covenant.org/faq
133 | [translations]: https://www.contributor-covenant.org/translations
--------------------------------------------------------------------------------
/docs/source/en/tutorials/memory.mdx:
--------------------------------------------------------------------------------
1 |
16 | # 📚 Manage your agent's memory
17 |
18 | [[open-in-colab]]
19 |
20 | In the end, an agent can be defined by simple components: it has tools, prompts.
21 | And most importantly, it has a memory of past steps, drawing a history of planning, execution, and errors.
22 |
23 | ### Replay your agent's memory
24 |
25 | We propose several features to inspect a past agent run.
26 |
27 | You can instrument the agent's run to display it in a great UI that lets you zoom in/out on specific steps, as highlighted in the [instrumentation guide](./inspect_runs).
28 |
29 | You can also use `agent.replay()`, as follows:
30 |
31 | After the agent has run:
32 | ```py
33 | from smolagents import HfApiModel, CodeAgent
34 |
35 | agent = CodeAgent(tools=[], model=HfApiModel(), verbosity_level=0)
36 |
37 | result = agent.run("What's the 20th Fibonacci number?")
38 | ```
39 |
40 | If you want to replay this last run, just use:
41 | ```py
42 | agent.replay()
43 | ```
44 |
45 | ### Dynamically change the agent's memory
46 |
47 | Many advanced use cases require dynamic modification of the agent's memory.
48 |
49 | You can access the agent's memory using:
50 |
51 | ```py
52 | from smolagents import ActionStep
53 |
54 | system_prompt_step = agent.memory.system_prompt
55 | print("The system prompt given to the agent was:")
56 | print(system_prompt_step.system_prompt)
57 |
58 | task_step = agent.memory.steps[0]
59 | print("\n\nThe first task step was:")
60 | print(task_step.task)
61 |
62 | for step in agent.memory.steps:
63 | if isinstance(step, ActionStep):
64 | if step.error is not None:
65 | print(f"\nStep {step.step_number} got this error:\n{step.error}\n")
66 | else:
67 | print(f"\nStep {step.step_number} got these observations:\n{step.observations}\n")
68 | ```
69 |
70 | Use `agent.memory.get_full_steps()` to get full steps as dictionaries.
71 |
72 | You can also use step callbacks to dynamically change the agent's memory.
73 |
74 | Step callbacks can access the `agent` itself in their arguments, so they can access any memory step as highlighted above, and change it if needed. For instance, let's say you are observing screenshots of each step performed by a web browser agent. You want to log the newest screenshot, and remove the images from ancient steps to save on token costs.
75 |
76 | You culd run something like the following.
77 | _Note: this code is incomplete, some imports and object definitions have been removed for the sake of concision, visit [the original script](https://github.com/huggingface/smolagents/blob/main/src/smolagents/vision_web_browser.py) to get the full working code._
78 |
79 | ```py
80 | import helium
81 | from PIL import Image
82 | from io import BytesIO
83 | from time import sleep
84 |
85 | def update_screenshot(memory_step: ActionStep, agent: CodeAgent) -> None:
86 | sleep(1.0) # Let JavaScript animations happen before taking the screenshot
87 | driver = helium.get_driver()
88 | latest_step = memory_step.step_number
89 | for previous_memory_step in agent.memory.steps: # Remove previous screenshots from logs for lean processing
90 | if isinstance(previous_memory_step, ActionStep) and previous_memory_step.step_number <= latest_step - 2:
91 | previous_memory_step.observations_images = None
92 | png_bytes = driver.get_screenshot_as_png()
93 | image = Image.open(BytesIO(png_bytes))
94 | memory_step.observations_images = [image.copy()]
95 | ```
96 |
97 | Then you should pass this function in the `step_callbacks` argument upon initialization of your agent:
98 |
99 | ```py
100 | CodeAgent(
101 | tools=[DuckDuckGoSearchTool(), go_back, close_popups, search_item_ctrl_f],
102 | model=model,
103 | additional_authorized_imports=["helium"],
104 | step_callbacks=[update_screenshot],
105 | max_steps=20,
106 | verbosity_level=2,
107 | )
108 | ```
109 |
110 | Head to our [vision web browser code](https://github.com/huggingface/smolagents/blob/main/src/smolagents/vision_web_browser.py) to see the full working example.
111 |
112 | ### Run agents one step at a time
113 |
114 | This can be useful in case you have tool calls that take days: you can just run your agents step by step.
115 | This will also let you update the memory on each step.
116 |
117 | ```py
118 | from smolagents import HfApiModel, CodeAgent, ActionStep, TaskStep
119 |
120 | agent = CodeAgent(tools=[], model=HfApiModel(), verbosity_level=1)
121 | print(agent.memory.system_prompt)
122 |
123 | task = "What is the 20th Fibonacci number?"
124 |
125 | # You could modify the memory as needed here by inputting the memory of another agent.
126 | # agent.memory.steps = previous_agent.memory.steps
127 |
128 | # Let's start a new task!
129 | agent.memory.steps.append(TaskStep(task=task, task_images=[]))
130 |
131 | final_answer = None
132 | step_number = 1
133 | while final_answer is None and step_number <= 10:
134 | memory_step = ActionStep(
135 | step_number=step_number,
136 | observations_images=[],
137 | )
138 | # Run one step.
139 | final_answer = agent.step(memory_step)
140 | agent.memory.steps.append(memory_step)
141 | step_number += 1
142 |
143 | # Change the memory as you please!
144 | # For instance to update the latest step:
145 | # agent.memory.steps[-1] = ...
146 |
147 | print("The final answer is:", final_answer)
148 | ```
--------------------------------------------------------------------------------
/docs/source/zh/examples/text_to_sql.mdx:
--------------------------------------------------------------------------------
1 |
16 | # Text-to-SQL
17 |
18 | [[open-in-colab]]
19 |
20 | 在此教程中,我们将看到如何使用 `smolagents` 实现一个利用 SQL 的 agent。
21 |
22 | > 让我们从经典问题开始:为什么不简单地使用标准的 text-to-SQL pipeline 呢?
23 |
24 | 标准的 text-to-SQL pipeline 很脆弱,因为生成的 SQL 查询可能会出错。更糟糕的是,查询可能出错却不引发错误警报,从而返回一些不正确或无用的结果。
25 |
26 | 👉 相反,agent 系统则可以检视输出结果并决定查询是否需要被更改,因此带来巨大的性能提升。
27 |
28 | 让我们来一起构建这个 agent! 💪
29 |
30 | 首先,我们构建一个 SQL 的环境:
31 | ```py
32 | from sqlalchemy import (
33 | create_engine,
34 | MetaData,
35 | Table,
36 | Column,
37 | String,
38 | Integer,
39 | Float,
40 | insert,
41 | inspect,
42 | text,
43 | )
44 |
45 | engine = create_engine("sqlite:///:memory:")
46 | metadata_obj = MetaData()
47 |
48 | # create city SQL table
49 | table_name = "receipts"
50 | receipts = Table(
51 | table_name,
52 | metadata_obj,
53 | Column("receipt_id", Integer, primary_key=True),
54 | Column("customer_name", String(16), primary_key=True),
55 | Column("price", Float),
56 | Column("tip", Float),
57 | )
58 | metadata_obj.create_all(engine)
59 |
60 | rows = [
61 | {"receipt_id": 1, "customer_name": "Alan Payne", "price": 12.06, "tip": 1.20},
62 | {"receipt_id": 2, "customer_name": "Alex Mason", "price": 23.86, "tip": 0.24},
63 | {"receipt_id": 3, "customer_name": "Woodrow Wilson", "price": 53.43, "tip": 5.43},
64 | {"receipt_id": 4, "customer_name": "Margaret James", "price": 21.11, "tip": 1.00},
65 | ]
66 | for row in rows:
67 | stmt = insert(receipts).values(**row)
68 | with engine.begin() as connection:
69 | cursor = connection.execute(stmt)
70 | ```
71 |
72 | ### 构建 agent
73 |
74 | 现在,我们构建一个 agent,它将使用 SQL 查询来回答问题。工具的 description 属性将被 agent 系统嵌入到 LLM 的提示中:它为 LLM 提供有关如何使用该工具的信息。这正是我们描述 SQL 表的地方。
75 |
76 | ```py
77 | inspector = inspect(engine)
78 | columns_info = [(col["name"], col["type"]) for col in inspector.get_columns("receipts")]
79 |
80 | table_description = "Columns:\n" + "\n".join([f" - {name}: {col_type}" for name, col_type in columns_info])
81 | print(table_description)
82 | ```
83 |
84 | ```text
85 | Columns:
86 | - receipt_id: INTEGER
87 | - customer_name: VARCHAR(16)
88 | - price: FLOAT
89 | - tip: FLOAT
90 | ```
91 |
92 | 现在让我们构建我们的工具。它需要以下内容:(更多细节请参阅[工具文档](../tutorials/tools))
93 |
94 | - 一个带有 `Args:` 部分列出参数的 docstring。
95 | - 输入和输出的type hints。
96 |
97 | ```py
98 | from smolagents import tool
99 |
100 | @tool
101 | def sql_engine(query: str) -> str:
102 | """
103 | Allows you to perform SQL queries on the table. Returns a string representation of the result.
104 | The table is named 'receipts'. Its description is as follows:
105 | Columns:
106 | - receipt_id: INTEGER
107 | - customer_name: VARCHAR(16)
108 | - price: FLOAT
109 | - tip: FLOAT
110 |
111 | Args:
112 | query: The query to perform. This should be correct SQL.
113 | """
114 | output = ""
115 | with engine.connect() as con:
116 | rows = con.execute(text(query))
117 | for row in rows:
118 | output += "\n" + str(row)
119 | return output
120 | ```
121 |
122 | 我们现在使用这个工具来创建一个 agent。我们使用 `CodeAgent`,这是 smolagent 的主要 agent 类:一个在代码中编写操作并根据 ReAct 框架迭代先前输出的 agent。
123 |
124 | 这个模型是驱动 agent 系统的 LLM。`HfApiModel` 允许你使用 HF Inference API 调用 LLM,无论是通过 Serverless 还是 Dedicated endpoint,但你也可以使用任何专有 API。
125 |
126 | ```py
127 | from smolagents import CodeAgent, HfApiModel
128 |
129 | agent = CodeAgent(
130 | tools=[sql_engine],
131 | model=HfApiModel(model_id="meta-llama/Meta-Llama-3.1-8B-Instruct"),
132 | )
133 | agent.run("Can you give me the name of the client who got the most expensive receipt?")
134 | ```
135 |
136 | ### Level 2: 表连接
137 |
138 | 现在让我们增加一些挑战!我们希望我们的 agent 能够处理跨多个表的连接。因此,我们创建一个新表,记录每个 receipt_id 的服务员名字!
139 |
140 | ```py
141 | table_name = "waiters"
142 | receipts = Table(
143 | table_name,
144 | metadata_obj,
145 | Column("receipt_id", Integer, primary_key=True),
146 | Column("waiter_name", String(16), primary_key=True),
147 | )
148 | metadata_obj.create_all(engine)
149 |
150 | rows = [
151 | {"receipt_id": 1, "waiter_name": "Corey Johnson"},
152 | {"receipt_id": 2, "waiter_name": "Michael Watts"},
153 | {"receipt_id": 3, "waiter_name": "Michael Watts"},
154 | {"receipt_id": 4, "waiter_name": "Margaret James"},
155 | ]
156 | for row in rows:
157 | stmt = insert(receipts).values(**row)
158 | with engine.begin() as connection:
159 | cursor = connection.execute(stmt)
160 | ```
161 |
162 | 因为我们改变了表,我们需要更新 `SQLExecutorTool`,让 LLM 能够正确利用这个表的信息。
163 |
164 | ```py
165 | updated_description = """Allows you to perform SQL queries on the table. Beware that this tool's output is a string representation of the execution output.
166 | It can use the following tables:"""
167 |
168 | inspector = inspect(engine)
169 | for table in ["receipts", "waiters"]:
170 | columns_info = [(col["name"], col["type"]) for col in inspector.get_columns(table)]
171 |
172 | table_description = f"Table '{table}':\n"
173 |
174 | table_description += "Columns:\n" + "\n".join([f" - {name}: {col_type}" for name, col_type in columns_info])
175 | updated_description += "\n\n" + table_description
176 |
177 | print(updated_description)
178 | ```
179 |
180 | 因为这个request 比之前的要难一些,我们将 LLM 引擎切换到更强大的 [Qwen/Qwen2.5-Coder-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct)!
181 |
182 | ```py
183 | sql_engine.description = updated_description
184 |
185 | agent = CodeAgent(
186 | tools=[sql_engine],
187 | model=HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"),
188 | )
189 |
190 | agent.run("Which waiter got more total money from tips?")
191 | ```
192 |
193 | 它直接就能工作!设置过程非常简单,难道不是吗?
194 |
195 | 这个例子到此结束!我们涵盖了这些概念:
196 |
197 | - 构建新工具。
198 | - 更新工具的描述。
199 | - 切换到更强大的 LLM 有助于 agent 推理。
200 |
201 | ✅ 现在你可以构建你一直梦寐以求的 text-to-SQL 系统了!✨
202 |
--------------------------------------------------------------------------------
/docs/source/hi/reference/agents.mdx:
--------------------------------------------------------------------------------
1 |
16 | # Agents
17 |
18 |