├── tests ├── __init__.py ├── unit │ ├── __init__.py │ └── test_replace_variables.py └── integration │ ├── __init__.py │ ├── sandbox.json │ ├── conftest.py │ ├── mcp_server.py │ ├── test_mcp_apigen.py │ ├── test_mcp_server.py │ ├── test_tool_approval.py │ └── test_code_exec.py ├── .python-version ├── ipybox ├── tool_exec │ ├── __init__.py │ ├── approval │ │ ├── __init__.py │ │ ├── server.py │ │ └── client.py │ ├── client.py │ └── server.py ├── kernel_mgr │ ├── __init__.py │ ├── sandbox.json │ └── server.py ├── __init__.py ├── utils.py ├── vars.py ├── mcp_client.py ├── mcp_apigen.py └── mcp_server.py ├── docs ├── generated │ ├── mcptools │ │ ├── __init__.py │ │ └── github │ │ │ ├── __init__.py │ │ │ ├── list_commits.py │ │ │ ├── search_repositories_orig.py │ │ │ └── search_repositories.py │ ├── gentools │ │ └── github │ │ │ ├── __init__.py │ │ │ └── commits_of_top_repos │ │ │ ├── __init__.py │ │ │ ├── api.py │ │ │ └── impl.py │ └── mcpparse │ │ ├── __init__.py │ │ └── github │ │ ├── __init__.py │ │ └── search_repositories.py ├── images │ ├── architecture-dark.png │ ├── architecture-light.png │ ├── architecture-light-annotated.jpg │ └── nano-banana-prompt.txt ├── api │ ├── code_executor.md │ ├── kernel_manager.md │ └── tool_executor.md ├── stylesheets │ └── extra.css ├── installation.md ├── apigen.md ├── sandbox.md ├── index.md ├── codeexec.md ├── quickstart.md └── mcpserver.md ├── docker-build.sh ├── .gitignore ├── .dockerignore ├── docker-entrypoint.sh ├── examples ├── sandbox-mcp.json ├── sandbox-kernel.json ├── apigen.py ├── quickstart.py ├── sandbox.py └── codexec.py ├── DEVELOPMENT.md ├── .github └── workflows │ ├── quality.yml │ ├── publish_docs.yml │ ├── test.yml │ ├── release.yml │ ├── test_package.yml │ └── publish_mcp.yml ├── .pre-commit-config.yaml ├── .claude-plugin └── marketplace.json ├── server.json ├── Dockerfile ├── CLAUDE.md ├── pyproject.toml ├── plugin └── skills │ └── codeact │ ├── SKILL.md │ └── references │ ├── saving-codeacts.md │ └── output-parsers.md ├── tasks.py ├── README.md ├── mkdocs.yml └── LICENSE /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/unit/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 3.13 2 | -------------------------------------------------------------------------------- /ipybox/tool_exec/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ipybox/kernel_mgr/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/integration/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/generated/mcptools/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ipybox/tool_exec/approval/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/generated/gentools/github/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/generated/mcpparse/__init__.py: -------------------------------------------------------------------------------- 1 | # mcpparse - Output parsers for mcptools 2 | -------------------------------------------------------------------------------- /docs/generated/mcpparse/github/__init__.py: -------------------------------------------------------------------------------- 1 | # mcpparse.github - Output parsers for GitHub tools 2 | -------------------------------------------------------------------------------- /docs/images/architecture-dark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gradion-ai/ipybox/HEAD/docs/images/architecture-dark.png -------------------------------------------------------------------------------- /docs/images/architecture-light.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gradion-ai/ipybox/HEAD/docs/images/architecture-light.png -------------------------------------------------------------------------------- /docs/images/architecture-light-annotated.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gradion-ai/ipybox/HEAD/docs/images/architecture-light-annotated.jpg -------------------------------------------------------------------------------- /docs/generated/gentools/github/commits_of_top_repos/__init__.py: -------------------------------------------------------------------------------- 1 | from .api import CommitInfo, RepoCommits, run 2 | 3 | __all__ = ["RepoCommits", "CommitInfo", "run"] 4 | -------------------------------------------------------------------------------- /docker-build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | docker build \ 5 | --build-arg UID=$(id -u) \ 6 | --build-arg GID=$(id -g) \ 7 | -t ipybox \ 8 | "$@" \ 9 | . 10 | -------------------------------------------------------------------------------- /docs/api/code_executor.md: -------------------------------------------------------------------------------- 1 | ::: ipybox.CodeExecutor 2 | ::: ipybox.CodeExecutionChunk 3 | ::: ipybox.CodeExecutionResult 4 | ::: ipybox.CodeExecutionError 5 | ::: ipybox.generate_mcp_sources 6 | -------------------------------------------------------------------------------- /docs/stylesheets/extra.css: -------------------------------------------------------------------------------- 1 | .md-typeset figure { 2 | width: 100%; 3 | display: block; 4 | } 5 | 6 | .md-typeset figcaption { 7 | width: 100%; 8 | max-width: none; 9 | } 10 | -------------------------------------------------------------------------------- /docs/api/kernel_manager.md: -------------------------------------------------------------------------------- 1 | ::: ipybox.kernel_mgr.server.KernelGateway 2 | ::: ipybox.kernel_mgr.client.KernelClient 3 | ::: ipybox.kernel_mgr.client.ExecutionResult 4 | ::: ipybox.kernel_mgr.client.ExecutionError 5 | -------------------------------------------------------------------------------- /ipybox/__init__.py: -------------------------------------------------------------------------------- 1 | from ipybox.code_exec import CodeExecutionChunk, CodeExecutionError, CodeExecutionResult, CodeExecutor 2 | from ipybox.mcp_apigen import generate_mcp_sources 3 | from ipybox.tool_exec.approval.client import ApprovalRequest 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | .DS_Store 3 | .pytest_cache/ 4 | .ruff_cache/ 5 | .mypy_cache/ 6 | .vscode/ 7 | .venv/ 8 | site/ 9 | /images/ 10 | /mcptools/ 11 | /mcpparse/ 12 | /codeact/ 13 | /sandbox.* 14 | .claude/settings.local.json 15 | .mcpregistry* 16 | .env 17 | GEMINI.md 18 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | .git 2 | .github 3 | .venv 4 | .mypy_cache 5 | .pytest_cache 6 | .ruff_cache 7 | __pycache__ 8 | *.pyc 9 | *.pyo 10 | *.egg-info 11 | dist 12 | build 13 | docs 14 | tests 15 | *.md 16 | !README.md 17 | !pyproject.toml 18 | !uv.lock 19 | .pre-commit-config.yaml 20 | .gitignore 21 | -------------------------------------------------------------------------------- /docker-entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | exec ${HOME}/.venv/bin/ipybox \ 5 | --workspace /app/workspace \ 6 | --tool-server-host localhost \ 7 | --tool-server-port 8900 \ 8 | --kernel-gateway-host localhost \ 9 | --kernel-gateway-port 8888 \ 10 | --log-level INFO 11 | -------------------------------------------------------------------------------- /docs/api/tool_executor.md: -------------------------------------------------------------------------------- 1 | ::: ipybox.tool_exec.server.ToolServer 2 | ::: ipybox.tool_exec.client.ToolRunner 3 | ::: ipybox.tool_exec.client.ToolRunnerError 4 | ::: ipybox.tool_exec.approval.client.ApprovalClient 5 | ::: ipybox.tool_exec.approval.client.ApprovalRequest 6 | ::: ipybox.tool_exec.approval.server.ApprovalChannel 7 | -------------------------------------------------------------------------------- /ipybox/kernel_mgr/sandbox.json: -------------------------------------------------------------------------------- 1 | { 2 | "enableWeakerNestedSandbox": false, 3 | "filesystem": { 4 | "denyRead": [".env"], 5 | "allowWrite": [".", "~/Library/Jupyter", "~/.ipython"], 6 | "denyWrite": [] 7 | }, 8 | "network": { 9 | "allowedDomains": [], 10 | "deniedDomains": [], 11 | "allowLocalBinding": true 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /examples/sandbox-mcp.json: -------------------------------------------------------------------------------- 1 | { 2 | "enableWeakerNestedSandbox": false, 3 | "filesystem": { 4 | "denyRead": [".env"], 5 | "allowWrite": [".", "~/.npm"], 6 | "denyWrite": [] 7 | }, 8 | "network": { 9 | "allowedDomains": ["registry.npmjs.org"], 10 | "deniedDomains": [], 11 | "allowLocalBinding": true 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /examples/sandbox-kernel.json: -------------------------------------------------------------------------------- 1 | { 2 | "enableWeakerNestedSandbox": false, 3 | "filesystem": { 4 | "denyRead": [".env"], 5 | "allowWrite": [".", "~/Library/Jupyter", "~/.ipython"], 6 | "denyWrite": [] 7 | }, 8 | "network": { 9 | "allowedDomains": ["example.org"], 10 | "deniedDomains": [], 11 | "allowLocalBinding": true 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /tests/integration/sandbox.json: -------------------------------------------------------------------------------- 1 | { 2 | "enableWeakerNestedSandbox": false, 3 | "filesystem": { 4 | "denyRead": [".env"], 5 | "allowWrite": [".", "~/Library/Jupyter", "~/.ipython"], 6 | "denyWrite": [] 7 | }, 8 | "network": { 9 | "allowedDomains": ["example.org"], 10 | "deniedDomains": [], 11 | "allowLocalBinding": true 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /docs/generated/mcptools/github/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from ipybox.tool_exec.client import ToolRunner 4 | 5 | CLIENT = ToolRunner( 6 | server_name="github", 7 | server_params={ 8 | "url": "https://api.githubcopilot.com/mcp/", 9 | "headers": {"Authorization": "Bearer ${GITHUB_API_KEY}"}, 10 | }, 11 | host=os.environ.get("TOOL_SERVER_HOST", "localhost"), 12 | port=int(os.environ.get("TOOL_SERVER_PORT", "8900")), 13 | ) 14 | -------------------------------------------------------------------------------- /ipybox/utils.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import socket 3 | from functools import partial 4 | from typing import Callable, TypeVar 5 | 6 | T = TypeVar("T") 7 | 8 | 9 | async def arun(func: Callable[..., T], *args, **kwargs) -> T: 10 | return await asyncio.get_running_loop().run_in_executor(None, partial(func, *args, **kwargs)) 11 | 12 | 13 | def find_free_port() -> int: 14 | with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: 15 | s.bind(("", 0)) 16 | s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) 17 | return s.getsockname()[1] 18 | -------------------------------------------------------------------------------- /DEVELOPMENT.md: -------------------------------------------------------------------------------- 1 | # Development Environment 2 | 3 | Clone the repository: 4 | 5 | ```bash 6 | git clone https://github.com/gradion-ai/ipybox.git 7 | cd ipybox 8 | ``` 9 | 10 | Create a virtual environment and install dependencies: 11 | 12 | ```bash 13 | uv sync 14 | ``` 15 | 16 | Activate the virtual environment: 17 | 18 | ```bash 19 | source .venv/bin/activate 20 | ``` 21 | 22 | Install pre-commit hooks: 23 | 24 | ```bash 25 | invoke precommit-install 26 | ``` 27 | 28 | Enforce coding conventions (also enforced by pre-commit hooks): 29 | 30 | ```bash 31 | invoke cc 32 | ``` 33 | 34 | Run tests: 35 | 36 | ```bash 37 | pytest -s tests 38 | ``` 39 | -------------------------------------------------------------------------------- /.github/workflows/quality.yml: -------------------------------------------------------------------------------- 1 | name: Code Quality 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | code-quality-check: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v4 14 | 15 | - name: Set up Python 16 | uses: actions/setup-python@v5 17 | with: 18 | python-version: ${{ vars.CI_PYTHON_VERSION }} 19 | 20 | - name: Install uv 21 | uses: astral-sh/setup-uv@v6 22 | with: 23 | version: ${{ vars.CI_UV_VERSION }} 24 | enable-cache: true 25 | 26 | - name: Run pre-commit 27 | uses: pre-commit/action@v3.0.1 28 | -------------------------------------------------------------------------------- /.github/workflows/publish_docs.yml: -------------------------------------------------------------------------------- 1 | name: Deploy Documentation 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | 7 | jobs: 8 | deploy-docs: 9 | runs-on: ubuntu-latest 10 | 11 | permissions: 12 | contents: write 13 | 14 | steps: 15 | - uses: actions/checkout@v4 16 | with: 17 | fetch-depth: 0 18 | 19 | - name: Configure Git credentials 20 | run: | 21 | git config user.name github-actions[bot] 22 | git config user.email 41898282+github-actions[bot]@users.noreply.github.com 23 | 24 | - name: Install uv 25 | uses: astral-sh/setup-uv@v6 26 | with: 27 | version: ${{ vars.CI_UV_VERSION }} 28 | enable-cache: true 29 | 30 | - name: Install Python 31 | run: uv python install 32 | 33 | - name: Install dependencies 34 | run: | 35 | uv sync --locked --dev 36 | uv pip list 37 | 38 | - name: Deploy documentation 39 | run: | 40 | uv run mkdocs gh-deploy --force 41 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | default_language_version: 2 | python: python 3 | 4 | repos: 5 | - repo: https://github.com/pre-commit/pre-commit-hooks 6 | rev: v5.0.0 7 | hooks: 8 | - id: check-yaml 9 | args: [--unsafe] 10 | exclude: ^mkdocs\.yml$ 11 | - id: end-of-file-fixer 12 | - id: trailing-whitespace 13 | exclude: ^docs/ 14 | 15 | - repo: https://github.com/astral-sh/ruff-pre-commit 16 | rev: v0.4.4 17 | hooks: 18 | - id: ruff 19 | args: [ --fix, --extend-select=I001 ] 20 | - id: ruff-format 21 | 22 | - repo: https://github.com/pre-commit/mirrors-mypy 23 | rev: v1.10.0 24 | hooks: 25 | - id: mypy 26 | args: ["--config-file=pyproject.toml"] 27 | additional_dependencies: 28 | - types-requests>=2.32.4.20250913 29 | - types-aiofiles>=25.1.0.20251011 30 | 31 | - repo: local 32 | hooks: 33 | - id: uv-lock-check 34 | name: Check uv.lock is up to date 35 | entry: uv lock --check 36 | language: system 37 | files: pyproject.toml 38 | pass_filenames: false 39 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | test: 11 | runs-on: ${{ matrix.os }} 12 | strategy: 13 | matrix: 14 | os: [macos-latest, ubuntu-latest] 15 | 16 | steps: 17 | - uses: actions/checkout@v4 18 | 19 | - name: Install uv 20 | uses: astral-sh/setup-uv@v6 21 | with: 22 | version: ${{ vars.CI_UV_VERSION }} 23 | enable-cache: true 24 | 25 | - name: Install Python 26 | run: uv python install 27 | 28 | - name: Set up Node.js 29 | uses: actions/setup-node@v6 30 | with: 31 | node-version: '24' 32 | 33 | - name: Install sandbox-runtime 34 | run: npm install -g @anthropic-ai/sandbox-runtime@0.0.21 35 | 36 | - name: Install ripgrep (macOS) 37 | if: runner.os == 'macOS' 38 | run: brew install ripgrep 39 | 40 | - name: Install dependencies 41 | run: | 42 | uv sync --locked --all-extras --dev 43 | uv pip list 44 | 45 | - name: Run tests 46 | run: | 47 | uv run pytest -s tests 48 | -------------------------------------------------------------------------------- /docs/images/nano-banana-prompt.txt: -------------------------------------------------------------------------------- 1 | Edit this architecture diagram to add callout info boxes with descriptions pointing to each component. Create small rounded rectangles with a light blue background color for each callout. 2 | 3 | Add these callout boxes: 4 | 5 | - Point to "Application": "AI agent or user interface that sends code and approves tool calls" 6 | - Point to "Code Executor": "Orchestrates code execution and tool approval workflow" 7 | - Point to "IPython Kernel": "Stateful Jupyter kernel that executes Python code in a sandbox" 8 | - Point to "Python tool API": "Auto-generated Python API for MCP tools" 9 | - Point to "Tool Executor": "Server managing MCP connections and tool execution". Make sure the pointer ends in the yello box, not on an MCP server box. 10 | - Point to "MCP server" (inside sandbox): "Stdio-based MCP server running in a sandbox" 11 | - Point to "MCP server" (external, bottom): "Remote MCP server accessed via HTTP" 12 | 13 | Position callouts around the edges of the diagram where there's empty space. Use connector lines with small arrows or dots at the endpoint touching each target box. Keep the callout text in a clean sans-serif font, smaller than the main labels. 14 | 15 | Change the bright white background to a softer light gray to reduce contrast and eye strain. 16 | -------------------------------------------------------------------------------- /tests/integration/conftest.py: -------------------------------------------------------------------------------- 1 | import socket 2 | from typing import Any, AsyncIterator 3 | 4 | import pytest 5 | import pytest_asyncio 6 | 7 | from tests.integration.mcp_server import STDIO_SERVER_PATH, sse_server, streamable_http_server 8 | 9 | 10 | @pytest.fixture(scope="package") 11 | def ip_address() -> str: 12 | """Get the primary non-loopback IP address.""" 13 | s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) 14 | s.settimeout(0.1) 15 | s.connect(("8.8.8.8", 80)) 16 | return s.getsockname()[0] 17 | 18 | 19 | @pytest_asyncio.fixture 20 | async def stdio_server_params() -> AsyncIterator[dict[str, Any]]: 21 | yield { 22 | "command": "python", 23 | "args": [str(STDIO_SERVER_PATH)], 24 | } 25 | 26 | 27 | @pytest_asyncio.fixture 28 | async def http_server_params(ip_address) -> AsyncIterator[dict[str, Any]]: 29 | async with streamable_http_server() as server: 30 | yield { 31 | "type": "streamable_http", 32 | "url": f"http://{ip_address}:{server.settings.port}/mcp", 33 | } 34 | 35 | 36 | @pytest_asyncio.fixture 37 | async def sse_server_params(ip_address) -> AsyncIterator[dict[str, Any]]: 38 | async with sse_server() as server: 39 | yield { 40 | "type": "sse", 41 | "url": f"http://{ip_address}:{server.settings.port}/sse", 42 | } 43 | -------------------------------------------------------------------------------- /docs/generated/gentools/github/commits_of_top_repos/api.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from pydantic import BaseModel, Field 4 | 5 | 6 | class CommitInfo(BaseModel): 7 | """Information about a single commit.""" 8 | 9 | sha: str = Field(..., title="Short SHA") 10 | message: str = Field(..., title="First line of commit message") 11 | url: str = Field(..., title="Link to commit on GitHub") 12 | 13 | 14 | class RepoCommits(BaseModel): 15 | """Repository with its latest commits.""" 16 | 17 | name: str = Field(..., title="Repository name") 18 | stars: int = Field(..., title="Star count") 19 | commits: list[CommitInfo] = Field(..., title="Latest commits") 20 | 21 | 22 | def run( 23 | username: str, 24 | top_n_repos: int = 3, 25 | last_n_commits: int = 5, 26 | ) -> list[RepoCommits]: 27 | """Get latest commits from a GitHub user's most starred repositories. 28 | 29 | Args: 30 | username: GitHub username to search repos for 31 | top_n_repos: Number of top repos to fetch (default: 3) 32 | last_n_commits: Number of latest commits per repo (default: 5) 33 | 34 | Returns: 35 | List of RepoCommits with repo info and latest commits 36 | """ 37 | from .impl import get_commits_of_top_repos 38 | 39 | return get_commits_of_top_repos(username, top_n_repos, last_n_commits) 40 | -------------------------------------------------------------------------------- /docs/generated/mcptools/github/list_commits.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import Optional 4 | 5 | from pydantic import BaseModel, ConfigDict, confloat 6 | 7 | from . import CLIENT 8 | 9 | 10 | class Params(BaseModel): 11 | model_config = ConfigDict( 12 | use_enum_values=True, 13 | ) 14 | author: Optional[str] = None 15 | """ 16 | Author username or email address to filter commits by 17 | """ 18 | owner: str 19 | """ 20 | Repository owner 21 | """ 22 | page: Optional[confloat(ge=1.0)] = None 23 | """ 24 | Page number for pagination (min 1) 25 | """ 26 | perPage: Optional[confloat(ge=1.0, le=100.0)] = None 27 | """ 28 | Results per page for pagination (min 1, max 100) 29 | """ 30 | repo: str 31 | """ 32 | Repository name 33 | """ 34 | sha: Optional[str] = None 35 | """ 36 | Commit SHA, branch or tag name to list commits of. If not provided, uses the default branch of the repository. If a commit SHA is provided, will list commits up to that SHA. 37 | """ 38 | 39 | 40 | def run(params: Params) -> str: 41 | """Get list of commits of a branch in a GitHub repository. Returns at least 30 results per page by default, but can return more if specified using the perPage parameter (up to 100).""" 42 | return CLIENT.run_sync(tool_name="list_commits", tool_args=params.model_dump(exclude_none=True)) 43 | -------------------------------------------------------------------------------- /examples/apigen.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from pathlib import Path 3 | 4 | # --8<-- [start:imports] 5 | from ipybox import generate_mcp_sources 6 | 7 | # --8<-- [end:imports] 8 | 9 | 10 | async def generate_brave_search_wrappers(): 11 | # --8<-- [start:gen_brave_search_wrappers] 12 | brave_mcp_params = { 13 | "command": "npx", 14 | "args": ["-y", "@brave/brave-search-mcp-server", "--transport", "stdio"], 15 | "env": {"BRAVE_API_KEY": "${BRAVE_API_KEY}"}, 16 | } 17 | 18 | await generate_mcp_sources( 19 | server_name="brave_search", 20 | server_params=brave_mcp_params, 21 | root_dir=Path("mcptools"), 22 | ) 23 | # --8<-- [end:gen_brave_search_wrappers] 24 | 25 | 26 | async def generate_github_wrappers(): 27 | # --8<-- [start:gen_github_wrappers] 28 | github_mcp_params = { 29 | "url": "https://api.githubcopilot.com/mcp/", 30 | "headers": {"Authorization": "Bearer ${GITHUB_API_KEY}"}, 31 | } 32 | 33 | await generate_mcp_sources( 34 | server_name="github", 35 | server_params=github_mcp_params, 36 | root_dir=Path("mcptools"), 37 | ) 38 | # --8<-- [end:gen_github_wrappers] 39 | 40 | 41 | async def main(): 42 | await generate_brave_search_wrappers() 43 | await generate_github_wrappers() 44 | 45 | 46 | if __name__ == "__main__": 47 | from dotenv import load_dotenv 48 | 49 | load_dotenv() 50 | asyncio.run(main()) 51 | -------------------------------------------------------------------------------- /docs/installation.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | 3 | ## Python package 4 | 5 | Install ipybox using `pip`: 6 | 7 | ```bash 8 | pip install ipybox 9 | ``` 10 | 11 | or `uv`: 12 | 13 | ```bash 14 | uv add ipybox 15 | ``` 16 | 17 | ## MCP server 18 | 19 | ipybox can also be run as an [MCP server](mcpserver.md) using `uvx`: 20 | 21 | ```bash 22 | uvx ipybox --workspace /path/to/workspace 23 | ``` 24 | 25 | See the [MCP server documentation](mcpserver.md) for configuration details. 26 | 27 | ## sandbox-runtime 28 | 29 | To use ipybox's [sandboxing](sandbox.md) features, you need to install Anthropic's [sandbox-runtime](https://github.com/anthropic-experimental/sandbox-runtime) separately. This provides the `srt` command for IPython kernel and MCP server isolation. 30 | 31 | Install via npm: 32 | 33 | ```bash 34 | npm install -g @anthropic-ai/sandbox-runtime@0.0.21 35 | ``` 36 | 37 | ### Mac OS 38 | 39 | On Mac OS, `sandbox-runtime` requires `ripgrep`. Install it using Homebrew: 40 | 41 | ```bash 42 | brew install ripgrep 43 | ``` 44 | 45 | No other dependencies are needed on Mac OS, as `sandbox-runtime` uses the native `sandbox-exec` command for sandboxing. 46 | 47 | ### Linux 48 | 49 | On Linux, install the required system packages: 50 | 51 | ```bash 52 | apt-get install bubblewrap socat ripgrep 53 | ``` 54 | 55 | !!! info 56 | 57 | [Sandboxing](sandbox.md) with `srt` currently doesn't work with ipybox on Linux, a fix is work in progress. You can still use ipybox on Linux with `sandbox=False`, or run the ipybox [MCP server](mcpserver.md) as a Docker container. 58 | -------------------------------------------------------------------------------- /.claude-plugin/marketplace.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "ipybox", 3 | "version": "0.0.2", 4 | "description": "Code action plugins for Claude Code using ipybox", 5 | "owner": { 6 | "name": "gradion-ai" 7 | }, 8 | "plugins": [ 9 | { 10 | "name": "codeact-default", 11 | "source": "./plugin", 12 | "description": "Runs ipybox without sandbox-runtime ", 13 | "version": "0.0.2", 14 | "keywords": ["codeact", "python", "mcp", "ipybox"], 15 | "mcpServers": { 16 | "ipybox": { 17 | "command": "uvx", 18 | "args": ["ipybox", "--workspace", "${PWD}"] 19 | } 20 | } 21 | }, 22 | { 23 | "name": "codeact-sandbox", 24 | "source": "./plugin", 25 | "description": "Runs ipybox with sandbox-runtime", 26 | "version": "0.0.2", 27 | "keywords": ["codeact", "python", "mcp", "ipybox", "sandbox"], 28 | "mcpServers": { 29 | "ipybox": { 30 | "command": "uvx", 31 | "args": ["ipybox", "--workspace", "${PWD}", "--sandbox", "--sandbox-config", "sandbox-config.json"] 32 | } 33 | } 34 | }, 35 | { 36 | "name": "codeact-docker", 37 | "source": "./plugin", 38 | "description": "Runs ipybox as a Docker container", 39 | "version": "0.0.2", 40 | "keywords": ["codeact", "python", "mcp", "ipybox", "docker"], 41 | "mcpServers": { 42 | "ipybox": { 43 | "command": "docker", 44 | "args": ["run", "-i", "--rm", "-v", "${PWD}:/app/workspace", "ipybox"] 45 | } 46 | } 47 | } 48 | ] 49 | } 50 | -------------------------------------------------------------------------------- /server.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://static.modelcontextprotocol.io/schemas/2025-10-17/server.schema.json", 3 | "name": "io.github.gradion-ai/ipybox", 4 | "description": "Python code execution sandbox with first-class support for programmatic MCP tool calling.", 5 | "repository": { 6 | "url": "https://github.com/gradion-ai/ipybox", 7 | "source": "github" 8 | }, 9 | "version": "0.0.0", 10 | "packages": [ 11 | { 12 | "registryType": "pypi", 13 | "registryBaseUrl": "https://pypi.org", 14 | "identifier": "ipybox", 15 | "version": "0.0.0", 16 | "runtimeHint": "uvx", 17 | "transport": { 18 | "type": "stdio" 19 | }, 20 | "packageArguments": [ 21 | { 22 | "type": "named", 23 | "name": "--workspace", 24 | "description": "Code workspace directory", 25 | "valueHint": "directory_path", 26 | "isRequired": false 27 | }, 28 | { 29 | "type": "named", 30 | "name": "--sandbox", 31 | "description": "Run kernel gateway in sandbox (macOS only)", 32 | "isRequired": false 33 | }, 34 | { 35 | "type": "named", 36 | "name": "--sandbox-config", 37 | "description": "Sandbox config file path", 38 | "valueHint": "file_path", 39 | "isRequired": false 40 | }, 41 | { 42 | "type": "named", 43 | "name": "--log-level", 44 | "description": "Logging level", 45 | "valueHint": "DEBUG|INFO|WARNING|ERROR|CRITICAL", 46 | "isRequired": false 47 | } 48 | ] 49 | } 50 | ] 51 | } 52 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release Package 2 | 3 | on: 4 | push: 5 | tags: 6 | - '[0-9]+.[0-9]+.[0-9]+*' 7 | 8 | jobs: 9 | release-build: 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - uses: actions/checkout@v4 14 | with: 15 | fetch-depth: 0 16 | fetch-tags: true 17 | 18 | - name: Verify current tag is on main branch 19 | run: | 20 | # Exit with error if current tag is not on main 21 | git merge-base --is-ancestor ${{ github.sha }} origin/main || exit 1 22 | 23 | - name: Install uv 24 | uses: astral-sh/setup-uv@v6 25 | with: 26 | version: ${{ vars.CI_UV_VERSION }} 27 | enable-cache: true 28 | 29 | - name: Install Python 30 | run: uv python install 31 | 32 | - name: Build package 33 | run: | 34 | uv build 35 | ls -la dist/ 36 | 37 | - name: Upload distributions 38 | uses: actions/upload-artifact@v4 39 | with: 40 | name: release-dists 41 | path: dist/ 42 | retention-days: 1 43 | 44 | pypi-publish: 45 | runs-on: ubuntu-latest 46 | 47 | needs: 48 | - release-build 49 | 50 | permissions: 51 | id-token: write 52 | 53 | environment: 54 | name: pypi 55 | url: https://pypi.org/project/ipybox 56 | 57 | steps: 58 | - name: Retrieve release distributions 59 | uses: actions/download-artifact@v4 60 | with: 61 | name: release-dists 62 | path: dist/ 63 | 64 | - name: Publish package distributions to PyPI 65 | uses: pypa/gh-action-pypi-publish@release/v1 66 | with: 67 | verbose: true 68 | -------------------------------------------------------------------------------- /docs/generated/gentools/github/commits_of_top_repos/impl.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import json 4 | 5 | from mcptools.github import list_commits, search_repositories 6 | 7 | from .api import CommitInfo, RepoCommits 8 | 9 | 10 | def get_commits_of_top_repos( 11 | username: str, 12 | top_n_repos: int, 13 | last_n_commits: int, 14 | ) -> list[RepoCommits]: 15 | """Fetch top repos by stars and their latest commits.""" 16 | # Get user's repos sorted by stars 17 | repos_result = search_repositories.run_parsed( 18 | search_repositories.Params( 19 | query=f"user:{username}", 20 | sort=search_repositories.Sort.stars, 21 | order=search_repositories.Order.desc, 22 | perPage=top_n_repos, 23 | ) 24 | ) 25 | 26 | result = [] 27 | for repo in repos_result.repositories[:top_n_repos]: 28 | # Get latest commits for this repo 29 | commits_raw = list_commits.run( 30 | list_commits.Params( 31 | owner=username, 32 | repo=repo.name, 33 | perPage=last_n_commits, 34 | ) 35 | ) 36 | 37 | commits_data = json.loads(commits_raw) 38 | commits = [ 39 | CommitInfo( 40 | sha=c["sha"][:7], 41 | message=c["commit"]["message"].split("\n")[0], 42 | url=c["html_url"], 43 | ) 44 | for c in commits_data[:last_n_commits] 45 | ] 46 | 47 | result.append( 48 | RepoCommits( 49 | name=repo.name, 50 | stars=repo.stargazers_count, 51 | commits=commits, 52 | ) 53 | ) 54 | 55 | return result 56 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:22.04 2 | 3 | # Build arguments for user ID matching 4 | ARG UID=1000 5 | ARG GID=1000 6 | 7 | # Create user with matching UID/GID 8 | RUN groupadd -g ${GID} ipybox && \ 9 | useradd -m -u ${UID} -g ipybox ipybox 10 | 11 | ENV HOME=/home/ipybox 12 | 13 | # Install system dependencies 14 | RUN apt-get update && apt-get install -y \ 15 | curl \ 16 | wget \ 17 | ca-certificates \ 18 | git \ 19 | && rm -rf /var/lib/apt/lists/* 20 | 21 | # Install Node.js 22.x 22 | RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - \ 23 | && apt-get install -y nodejs \ 24 | && rm -rf /var/lib/apt/lists/* 25 | 26 | # Install uv 27 | COPY --from=ghcr.io/astral-sh/uv:0.9.15 /uv /uvx /bin/ 28 | 29 | WORKDIR ${HOME} 30 | 31 | # Bypass dynamic versioning since .git is not available 32 | ENV UV_DYNAMIC_VERSIONING_BYPASS=0.0.0+docker 33 | 34 | # Create workspace directory with correct ownership 35 | RUN mkdir -p /app/workspace && chown ipybox:ipybox /app/workspace 36 | 37 | # Copy entrypoint script (requires root) 38 | COPY docker-entrypoint.sh /usr/local/bin/ 39 | RUN chmod +x /usr/local/bin/docker-entrypoint.sh 40 | 41 | # Copy dependency files first for better Docker layer caching 42 | COPY --chown=ipybox:ipybox pyproject.toml uv.lock .python-version ${HOME}/ 43 | 44 | # Switch to non-root user before installing dependencies 45 | USER ipybox 46 | 47 | # Install dependencies only (not the project itself) 48 | RUN uv sync --no-install-project --no-dev 49 | 50 | # Copy source code and README (required for package metadata) 51 | COPY --chown=ipybox:ipybox ipybox ${HOME}/ipybox/ 52 | COPY --chown=ipybox:ipybox README.md ${HOME}/ 53 | 54 | # Install the project 55 | RUN uv sync --no-dev 56 | 57 | WORKDIR /app/workspace 58 | 59 | ENTRYPOINT ["/usr/local/bin/docker-entrypoint.sh"] 60 | -------------------------------------------------------------------------------- /CLAUDE.md: -------------------------------------------------------------------------------- 1 | # CLAUDE.md 2 | 3 | This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. 4 | 5 | ## Architecture 6 | 7 | ### Key Modules 8 | 9 | - `ipybox/code_exec.py`: `CodeExecutor` - main API, orchestrates kernel and tool execution 10 | - `ipybox/kernel_mgr/server.py`: `KernelGateway` - manages Jupyter Kernel Gateway subprocess 11 | - `ipybox/kernel_mgr/client.py`: `KernelClient` - WebSocket client for kernel communication 12 | - `ipybox/tool_exec/server.py`: `ToolServer` - FastAPI server managing MCP servers and tool calls 13 | - `ipybox/tool_exec/client.py`: `ToolRunner` - client for executing MCP tools on ToolServer 14 | - `ipybox/tool_exec/approval/`: `ApprovalChannel`/`ApprovalClient` - approval request workflow 15 | - `ipybox/mcp_apigen.py`: `generate_mcp_sources()` - generates typed Python wrappers from MCP schemas 16 | - `ipybox/mcp_server.py`: `IpyboxMCPServer` - MCP server exposing ipybox capabilities 17 | - `ipybox/mcp_client.py`: `MCPClient` - generic MCP client (stdio, SSE, streamable HTTP) 18 | 19 | ### Execution Flow 20 | 21 | 1. User code calls a generated MCP wrapper function 22 | 2. Wrapper -> `ToolRunner.run_sync()` -> HTTP POST to ToolServer `/run` 23 | 3. ToolServer -> `ApprovalChannel.request()` -> WebSocket -> `ApprovalClient` 24 | 4. Application callback receives `ApprovalRequest`, calls `accept()`/`reject()` 25 | 5. If accepted: ToolServer executes the MCP tool on the MCP server 26 | 6. Result returned by generated wrapper function 27 | 28 | ### Code Generation 29 | 30 | `generate_mcp_sources()` connects to an MCP server, discovers tools, and generates: 31 | - One module per tool with `Params` (Pydantic), optional `Result`, and `run()` function 32 | - `__init__.py` with ToolRunner setup 33 | - Uses `datamodel-code-generator` for schema -> Pydantic conversion 34 | -------------------------------------------------------------------------------- /docs/generated/mcptools/github/search_repositories_orig.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from enum import Enum 4 | from typing import Optional 5 | 6 | from pydantic import BaseModel, ConfigDict, confloat 7 | 8 | from . import CLIENT 9 | 10 | 11 | class Order(Enum): 12 | asc = "asc" 13 | desc = "desc" 14 | 15 | 16 | class Sort(Enum): 17 | stars = "stars" 18 | forks = "forks" 19 | help_wanted_issues = "help-wanted-issues" 20 | updated = "updated" 21 | 22 | 23 | class Params(BaseModel): 24 | model_config = ConfigDict( 25 | use_enum_values=True, 26 | ) 27 | minimal_output: Optional[bool] = True 28 | """ 29 | Return minimal repository information (default: true). When false, returns full GitHub API repository objects. 30 | """ 31 | order: Optional[Order] = None 32 | """ 33 | Sort order 34 | """ 35 | page: Optional[confloat(ge=1.0)] = None 36 | """ 37 | Page number for pagination (min 1) 38 | """ 39 | perPage: Optional[confloat(ge=1.0, le=100.0)] = None 40 | """ 41 | Results per page for pagination (min 1, max 100) 42 | """ 43 | query: str 44 | """ 45 | Repository search query. Examples: 'machine learning in:name stars:>1000 language:python', 'topic:react', 'user:facebook'. Supports advanced search syntax for precise filtering. 46 | """ 47 | sort: Optional[Sort] = None 48 | """ 49 | Sort repositories by field, defaults to best match 50 | """ 51 | 52 | 53 | def run(params: Params) -> str: 54 | """Find GitHub repositories by name, description, readme, topics, or other metadata. Perfect for discovering projects, finding examples, or locating specific repositories across GitHub.""" 55 | return CLIENT.run_sync(tool_name="search_repositories", tool_args=params.model_dump(exclude_none=True)) 56 | -------------------------------------------------------------------------------- /examples/quickstart.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from pathlib import Path 3 | 4 | from ipybox import ApprovalRequest, CodeExecutionResult, CodeExecutor, generate_mcp_sources 5 | from ipybox.utils import arun 6 | 7 | SERVER_PARAMS = { 8 | "command": "npx", 9 | "args": [ 10 | "-y", 11 | "@brave/brave-search-mcp-server", 12 | "--transport", 13 | "stdio", 14 | ], 15 | "env": { 16 | "BRAVE_API_KEY": "${BRAVE_API_KEY}", 17 | }, 18 | } 19 | 20 | CODE = """ 21 | from mcptools.brave_search.brave_image_search import Params, Result, run 22 | 23 | result: Result = run(Params(query="neural topic models", count=3)) 24 | 25 | for image in result.items: 26 | print(f"- [{image.title}]({image.properties.url})") 27 | """ 28 | 29 | 30 | async def main(): 31 | # Generate a Python tool API 32 | # for the Brave Search MCP server 33 | await generate_mcp_sources( 34 | server_name="brave_search", 35 | server_params=SERVER_PARAMS, 36 | root_dir=Path("mcptools"), 37 | ) 38 | 39 | # Launch ipybox code executor 40 | async with CodeExecutor() as executor: 41 | # Execute code that calls an MCP tool 42 | # programmatically in an IPython kernel 43 | async for item in executor.stream(CODE): 44 | match item: 45 | # Handle approval requests 46 | case ApprovalRequest() as req: 47 | # Prompt user to approve or reject MCP tool call 48 | prompt = f"Tool call: [{req}]\nApprove? (Y/n): " 49 | if await arun(input, prompt) in ["y", ""]: 50 | await req.accept() 51 | else: 52 | await req.reject() 53 | # Handle final execution result 54 | case CodeExecutionResult(text=text): 55 | print(text) 56 | 57 | 58 | if __name__ == "__main__": 59 | asyncio.run(main()) 60 | -------------------------------------------------------------------------------- /docs/generated/mcpparse/github/search_repositories.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from mcptools.github.search_repositories import ParseResult, Repository 4 | 5 | 6 | class SearchRepositoriesParseError(Exception): 7 | """Exception raised when parsing search_repositories results fails.""" 8 | 9 | pass 10 | 11 | 12 | def parse(result: str) -> ParseResult: 13 | """Parse search_repositories result into structured data. 14 | 15 | Args: 16 | result: Raw JSON string result from the tool 17 | 18 | Returns: 19 | ParseResult with structured repository search data 20 | 21 | Raises: 22 | SearchRepositoriesParseError: If parsing fails 23 | """ 24 | try: 25 | data = json.loads(result) 26 | except json.JSONDecodeError as e: 27 | raise SearchRepositoriesParseError(f"Failed to parse JSON: {e}") from e 28 | 29 | try: 30 | repositories = [ 31 | Repository( 32 | id=item["id"], 33 | name=item["name"], 34 | full_name=item["full_name"], 35 | description=item.get("description"), 36 | html_url=item["html_url"], 37 | language=item.get("language"), 38 | stargazers_count=item["stargazers_count"], 39 | forks_count=item["forks_count"], 40 | open_issues_count=item["open_issues_count"], 41 | updated_at=item["updated_at"], 42 | created_at=item["created_at"], 43 | private=item["private"], 44 | fork=item["fork"], 45 | archived=item["archived"], 46 | default_branch=item["default_branch"], 47 | ) 48 | for item in data["items"] 49 | ] 50 | 51 | return ParseResult( 52 | total_count=data["total_count"], 53 | incomplete_results=data["incomplete_results"], 54 | repositories=repositories, 55 | ) 56 | except KeyError as e: 57 | raise SearchRepositoriesParseError(f"Missing required field: {e}") from e 58 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "ipybox" 3 | dynamic = ["version"] 4 | description = "ipybox" 5 | authors = [ 6 | { name = "Martin Krasser", email = "martin@gradion.ai" }, 7 | ] 8 | requires-python = ">=3.11,<3.15" 9 | readme = "README.md" 10 | license = "Apache-2.0" 11 | dependencies = [ 12 | "aiofiles>=25.1.0", 13 | "aiohttp>=3.13.2", 14 | "datamodel-code-generator>=0.35.0", 15 | "fastapi>=0.121.2", 16 | "ipykernel>=7.1.0", 17 | "jupyter-kernel-gateway>=3.0.1", 18 | "mcp>=1.21.2", 19 | "pillow>=12.0.0", 20 | "pip>=25.3", 21 | "psutil>=7.1.3", 22 | "python-dotenv>=1.2.1", 23 | "requests>=2.32.5", 24 | "uvicorn>=0.38.0", 25 | "websockets>=15.0.1", 26 | ] 27 | 28 | [tool.uv] 29 | default-groups = [ 30 | "docs", 31 | "dev", 32 | ] 33 | 34 | [dependency-groups] 35 | docs = [ 36 | "griffe-pydantic>=1.1.8", 37 | "mkdocs>=1.6.1,<2", 38 | "mkdocs-material>=9.5.48,<10", 39 | "mkdocs-redirects>=1.2.2", 40 | "mkdocstrings-python>=1.12.2,<2", 41 | "mkdocs-llmstxt>=0.4.0", 42 | "click<=8.2.1", 43 | ] 44 | dev = [ 45 | "flaky>=3.8.1", 46 | "invoke>=2.2,<3", 47 | "jsonschema>=4.25", 48 | "matplotlib>=3.10.7", 49 | "pre-commit>=4.3.0,<5", 50 | "pytest>=8.4.2,<9", 51 | "pytest-asyncio>=1.2.0,<2", 52 | "pytest-cov>=4.1.0,<5", 53 | ] 54 | 55 | [project.scripts] 56 | ipybox = "ipybox.mcp_server:cli" 57 | 58 | [build-system] 59 | requires = ["hatchling", "uv-dynamic-versioning"] 60 | build-backend = "hatchling.build" 61 | 62 | [tool.hatch.version] 63 | source = "uv-dynamic-versioning" 64 | 65 | [tool.uv-dynamic-versioning] 66 | vcs = "git" 67 | pattern = "default-unprefixed" 68 | style = "pep440" 69 | 70 | [tool.hatch.build.targets.sdist] 71 | include = ["ipybox"] 72 | 73 | [tool.hatch.build.targets.wheel] 74 | include = ["ipybox"] 75 | 76 | [tool.ruff] 77 | line-length = 120 78 | 79 | [tool.ruff.lint.per-file-ignores] 80 | "**/__init__.py" = ["F401"] 81 | 82 | [tool.mypy] 83 | check_untyped_defs = true 84 | ignore_missing_imports = true 85 | 86 | [[tool.mypy.overrides]] 87 | ignore_errors = true 88 | module = [ 89 | "mcptools.*", 90 | "aiofiles.*", 91 | "requests.*", 92 | ] 93 | -------------------------------------------------------------------------------- /docs/apigen.md: -------------------------------------------------------------------------------- 1 | # Python tool API generation 2 | 3 | ```python 4 | --8<-- "examples/apigen.py:imports" 5 | ``` 6 | 7 | [`generate_mcp_sources()`][ipybox.generate_mcp_sources] generates a typed Python tool API from MCP server tool schemas. Each tool becomes a module with a Pydantic `Params` class, a `Result` class or `str` return type, and a `run()` function. 8 | 9 | ## Stdio servers 10 | 11 | For MCP servers that run as local processes, specify `command`, `args`, and optional `env`: 12 | 13 | ```python 14 | --8<-- "examples/apigen.py:gen_brave_search_wrappers" 15 | ``` 16 | 17 | ## HTTP servers 18 | 19 | For remote MCP servers over HTTP, specify `url` and optional `headers`: 20 | 21 | ```python 22 | --8<-- "examples/apigen.py:gen_github_wrappers" 23 | ``` 24 | 25 | ipybox auto-detects the transport type from the URL. URLs containing `/mcp` use streamable HTTP, URLs containing `/sse` use SSE. You can also set `type` explicitly to `"streamable_http"` or `"sse"`. 26 | 27 | ## Environment variable substitution 28 | 29 | You can use `${VAR_NAME}` placeholders in `server_params` values. ipybox replaces them with the corresponding environment variable when connecting to the MCP server. This keeps secrets out of your code. 30 | 31 | ## Generated package structure 32 | 33 | The Brave Search MCP server [example above](#stdio-servers) generates a package structure like this: 34 | 35 | ``` 36 | mcptools/ 37 | └── brave_search/ 38 | ├── __init__.py 39 | ├── brave_web_search.py 40 | ├── brave_local_search.py 41 | ├── brave_image_search.py 42 | └── ... 43 | ``` 44 | 45 | For each MCP server tool, a separate Python module is generated, named after the tool. 46 | 47 | ## Using the generated API 48 | 49 | Each module provides a typed interface for programmatic MCP tool calls: 50 | 51 | ```python 52 | from mcptools.brave_search.brave_image_search import Params, Result, run 53 | 54 | # Params validates input 55 | params = Params(query="neural topic models", count=3) 56 | 57 | # run() calls the MCP tool and returns a Result (or str for untyped tools) 58 | result: Result = run(params) 59 | 60 | for image in result.items: 61 | print(image.title) 62 | ``` 63 | 64 | The `Params` class is generated from the tool's input schema. Tools with an output schema get a typed `Result` class; others return `str`. The MCP tool itself is called via its `run()` function. 65 | -------------------------------------------------------------------------------- /docs/sandbox.md: -------------------------------------------------------------------------------- 1 | # Sandboxing 2 | 3 | ipybox uses Anthropic's [sandbox-runtime](https://github.com/anthropic-experimental/sandbox-runtime) to isolate code execution. When enabled, the IPython kernel runs with restricted filesystem and network access. 4 | 5 | ```python 6 | --8<-- "examples/sandbox.py:imports" 7 | ``` 8 | 9 | ## Default sandbox 10 | 11 | Enable sandboxing with `sandbox=True`. 12 | 13 | ```python 14 | --8<-- "examples/sandbox.py:default_sandbox" 15 | ``` 16 | 17 | The default sandbox configuration allows: 18 | 19 | - Reading all files except `.env` 20 | - Writing to the current directory and subdirectories (plus IPython directories) 21 | - Local network access to the tool execution server 22 | 23 | ```json title="Default sandbox configuration" 24 | --8<-- "ipybox/kernel_mgr/sandbox.json" 25 | ``` 26 | 27 | Internet access is blocked as demonstrated in the example above. See the [sandbox-runtime](https://github.com/anthropic-experimental/sandbox-runtime) documentation for all configuration options. 28 | 29 | ## Custom sandbox 30 | 31 | To allow access to `example.org`, provide a custom sandbox configuration file: 32 | 33 | ```json title="examples/sandbox-kernel.json" 34 | --8<-- "examples/sandbox-kernel.json" 35 | ``` 36 | 37 | and pass it as `sandbox_config` argument: 38 | 39 | ```python 40 | --8<-- "examples/sandbox.py:custom_sandbox" 41 | ``` 42 | 43 | ## Sandboxing MCP servers 44 | 45 | stdio MCP servers like the [filesystem MCP server](https://github.com/modelcontextprotocol/servers/tree/main/src/filesystem) can be configured to run in a sandbox using `srt` as command: 46 | 47 | ```python 48 | --8<-- "examples/sandbox.py:sandboxed_mcp_server_params" 49 | ``` 50 | 51 | The sandbox configuration is: 52 | 53 | ```json title="examples/sandbox-mcp.json" 54 | --8<-- "examples/sandbox-mcp.json" 55 | ``` 56 | 57 | The server itself is configured with permissions to access all files in the current directory (`"."`), but the sandbox additionally blocks read access to `.env`. The sandbox also allows access to `registry.npmjs.org` for downloading the server package via `npx`, and `~/.npm` for the local `npm` cache. 58 | 59 | ```python 60 | --8<-- "examples/sandbox.py:sandboxed_mcp_server_usage" 61 | ``` 62 | 63 | !!! Info 64 | 65 | MCP server sandboxing is independent of kernel sandboxing and usually not needed when using trusted servers, but provides an additional security layer when needed. 66 | -------------------------------------------------------------------------------- /plugin/skills/codeact/SKILL.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: codeact 3 | description: Generate and execute code for acting with Python tools. Activate when user explicitly requests to "use the codeact skill" or similar phrases. 4 | --- 5 | 6 | Use Python tools to perform tasks. 7 | 8 | You must use the `execute_ipython_cell` tool of the `ipybox` MCP server for executing Python code. 9 | 10 | All operations must follow the tool usage restrictions and workflows defined below. 11 | 12 | ## Tool Directories 13 | 14 | The `gentools/` and `mcptools/` directories are in the **working directory** shown in your `` block. All paths are relative to the working directory, NOT to this skill's base directory. 15 | 16 | ## Tool Usage Restrictions 17 | 18 | You are restricted to these tools only: 19 | 20 | ### Python Tools 21 | 22 | - Functions in `mcptools//.py` (use `run_parsed` if defined, otherwise `run`) 23 | - Functions in `gentools///api.py` 24 | 25 | ### `ipybox` MCP Server Tools 26 | 27 | - `execute_ipython_cell` - Execute Python code 28 | - `reset` - Reset the IPython kernel 29 | 30 | ### Claude Code Filesystem Tools 31 | 32 | - All filesystem tools for reading, writing files, and listing directories. 33 | 34 | ## Workflow 35 | 36 | ### 1. Python Tool Selection 37 | 38 | 1. List available categories in `gentools/` and `mcptools/` 39 | 2. List available tools in relevant categories 40 | 3. Read tool files to understand interfaces and parameters. 41 | 42 | ### 2. Python Tool Priority 43 | 44 | 1. Search `gentools` package first 45 | 2. If not found, search `mcptools` package 46 | 3. If no appropriate tool exists, generate custom code 47 | 48 | ### 3. Code Generation and Python Tool Chaining 49 | 50 | - Generate code that uses selected Python tools as argument for `execute_ipython_cell`. 51 | - Chain Python tools in the generated code if the structured output of one tool can be used as input for another tool. 52 | 53 | ### 4. Code Execution 54 | 55 | - Use the `execute_ipython_cell` for Python code execution 56 | - Print only required information, not intermediate results 57 | - Store intermediate results in variables 58 | 59 | ## Output Parsers 60 | 61 | When generating output parsers for Python tools in the `mcptools` package, see [references/output-parsers.md](references/output-parsers.md). 62 | 63 | ## Saving Code Actions 64 | 65 | To save executed code as a reusable gentools tool, see [references/saving-codeacts.md](references/saving-codeacts.md). 66 | -------------------------------------------------------------------------------- /ipybox/vars.py: -------------------------------------------------------------------------------- 1 | import re 2 | from dataclasses import dataclass 3 | from typing import Any, Generic, Mapping, TypeVar 4 | 5 | T = TypeVar("T", str, dict[str, Any]) 6 | 7 | 8 | @dataclass 9 | class ReplaceResult(Generic[T]): 10 | replaced: T 11 | replaced_variables: set[str] 12 | missing_variables: set[str] 13 | 14 | @property 15 | def total_variables(self) -> int: 16 | return len(self.replaced_variables) + len(self.missing_variables) 17 | 18 | 19 | def replace_variables(template: dict[str, Any], variables: Mapping[str, str]) -> ReplaceResult[dict[str, Any]]: 20 | """Recursively replace variables in all string values within a dict.""" 21 | all_replaced_vars = set() 22 | all_missing_vars = set() 23 | 24 | def process_value(value: Any) -> Any: 25 | """Process a value, replacing variables if it's a string or recursing if it's a container.""" 26 | if isinstance(value, str): 27 | result = _replace_variables(value, variables) 28 | all_replaced_vars.update(result.replaced_variables) 29 | all_missing_vars.update(result.missing_variables) 30 | return result.replaced 31 | elif isinstance(value, dict): 32 | processed_dict = {} 33 | for k, v in value.items(): 34 | processed_dict[k] = process_value(v) 35 | return processed_dict 36 | elif isinstance(value, list): 37 | return [process_value(item) for item in value] 38 | else: 39 | # Return non-string, non-container values unchanged 40 | return value 41 | 42 | return ReplaceResult( 43 | replaced=process_value(template), 44 | replaced_variables=all_replaced_vars, 45 | missing_variables=all_missing_vars, 46 | ) 47 | 48 | 49 | def _replace_variables(template: str, variables: Mapping[str, str]) -> ReplaceResult[str]: 50 | """Replace variables of pattern ${VAR_NAME} with values from dict.""" 51 | # Find all variable patterns (a-zA-Z0-9_) 52 | pattern = r"\$\{([a-zA-Z0-9_]+)\}" 53 | matches = re.findall(pattern, template) 54 | 55 | # Track what we've seen 56 | found_vars = set(matches) 57 | replaced_vars = set() 58 | missing_vars = set() 59 | 60 | # Replace variables 61 | rendered = template 62 | for var_name in found_vars: 63 | if var_name in variables: 64 | rendered = rendered.replace(f"${{{var_name}}}", variables[var_name]) 65 | replaced_vars.add(var_name) 66 | else: 67 | missing_vars.add(var_name) 68 | 69 | return ReplaceResult( 70 | replaced=rendered, 71 | replaced_variables=replaced_vars, 72 | missing_variables=missing_vars, 73 | ) 74 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # ipybox 2 | 3 | !!! info "Next generation ipybox" 4 | 5 | This is the next generation of ipybox, a complete rewrite. Older versions are maintained on the [0.6.x branch](https://github.com/gradion-ai/ipybox/tree/0.6.x) and can be obtained with `pip install ipybox<0.7`. 6 | 7 | ipybox is a Python code execution sandbox with first-class support for programmatic MCP tool calling. It generates a typed Python tool API from MCP server tool schemas, supporting both local stdio and remote HTTP servers. Code that calls the generated API executes in a sandboxed IPython kernel, providing a stateful environment where variables and definitions persist across executions. The generated API delegates MCP tool execution to a separate environment that enforces tool call approval, requiring applications to explicitly accept or reject each tool call before it executes. 8 | 9 |
10 | ![Architecture](images/architecture-light-annotated.jpg){ width="100%" } 11 |
CodeExecutor coordinates sandboxed code execution, tool execution, and tool call approval.
12 |
13 | 14 | ## Agent integration 15 | 16 | ipybox is designed for agents that interact with their environment through [code actions](https://arxiv.org/abs/2402.01030) rather than JSON tool calls, a more reliable approach since LLMs are heavily pretrained on Python code compared to JSON tool call post-training. Agents generate and execute Python code that composes multiple MCP tool calls into a single action, using loops, conditionals, and data transformations that keep intermediate results out of the agent's context window. Since agent-generated code cannot be trusted, it must run in a secure sandboxed environment, and all MCP tool calls must be approved by the application. ipybox supports both with minimal setup. 17 | 18 | ## Features 19 | 20 | - **Stateful code execution** — state persists across executions in IPython kernels 21 | - **Lightweight sandboxing** — kernel isolation via Anthropic's [sandbox-runtime](https://github.com/anthropic-experimental/sandbox-runtime) 22 | - **Generated Python tool API** — functions and models generated from MCP tool schemas 23 | - **Programmatic MCP tool calling** — MCP tools called via Python code, not JSON directly 24 | - **MCP tool call approval** — every MCP tool call requires application-level approval 25 | - **Any MCP server** — supports stdio, Streamable HTTP, and SSE transports 26 | - **Any Python package** — install and use any Python package in IPython kernels 27 | - **Local code execution** — no cloud dependencies, everything runs on your machine 28 | - **Python SDK and MCP server** — use ipybox programmatically or as an MCP server 29 | - **Claude Code plugin** — a plugin for [programmatic tool calling in Claude Code](ccplugin.md) 30 | 31 | ## LLM-friendly documentation 32 | 33 | For LLM-friendly versions of this documentation, see [llms.txt](llms.txt) and [llms-full.txt](llms-full.txt). 34 | -------------------------------------------------------------------------------- /docs/codeexec.md: -------------------------------------------------------------------------------- 1 | # Code execution 2 | 3 | ```python 4 | --8<-- "examples/codexec.py:imports" 5 | ``` 6 | 7 | [`CodeExecutor`][ipybox.CodeExecutor] runs Python code in an IPython kernel where variables and definitions persist across executions. 8 | 9 | ## Basic execution 10 | 11 | Use `execute()` for non-interactive execution where MCP tool calls, if any, are auto-approved: 12 | 13 | ```python 14 | --8<-- "examples/codexec.py:basic_execution" 15 | ``` 16 | 17 | For application-level approval control, use `stream()` instead. 18 | 19 | ## Tool call approval 20 | 21 | When code calls the [generated Python tool API](apigen.md), ipybox suspends execution and yields an [`ApprovalRequest`][ipybox.ApprovalRequest]. You must call `accept()` to continue execution: 22 | 23 | ```python 24 | --8<-- "examples/codexec.py:basic_approval" 25 | ``` 26 | 27 | The approval request includes `tool_name` and `tool_args` so you can inspect what's being called. Calling `reject()` raises a [`CodeExecutionError`][ipybox.CodeExecutionError]. 28 | 29 | ## Stream output chunks 30 | 31 | Enable `chunks=True` to receive output incrementally as it's produced: 32 | 33 | ```python 34 | --8<-- "examples/codexec.py:basic_chunks" 35 | ``` 36 | 37 | [`CodeExecutionChunk`][ipybox.CodeExecutionChunk] events contain partial output. The final [`CodeExecutionResult`][ipybox.CodeExecutionResult] still contains the complete output. 38 | 39 | ## Capturing plots 40 | 41 | Plots are automatically captured as PNG files in the `images` directory. Use `images_dir` to customize the location: 42 | 43 | ```python 44 | --8<-- "examples/codexec.py:basic_plotting" 45 | ``` 46 | 47 | Generated images are available in `result.images` as a list of `Path` objects. 48 | 49 | ## Custom timeouts 50 | 51 | Configure approval and execution timeouts: 52 | 53 | ```python 54 | --8<-- "examples/codexec.py:custom_timeouts" 55 | ``` 56 | 57 | - `approval_timeout`: How long to wait for `accept()`/`reject()` (default: 60s) 58 | - `timeout` in `stream()`: Maximum total execution time including approval waits (default: 120s) 59 | 60 | ## Kernel environment 61 | 62 | The IPython kernel does not inherit environment variables from the parent process. You can pass them explicitly with `kernel_env`: 63 | 64 | ```python 65 | --8<-- "examples/codexec.py:kernel_environment" 66 | ``` 67 | 68 | !!! note 69 | 70 | Environment variables referenced in `server_params` via `${VAR_NAME}` are taken from the parent process and do not need to be passed to `kernel_env`. 71 | 72 | ## Kernel reset 73 | 74 | Clear all variables and definitions by resetting the IPython kernel with `reset()`: 75 | 76 | ```python 77 | --8<-- "examples/codexec.py:kernel_reset" 78 | ``` 79 | 80 | This also stops any MCP servers started during execution. They restart lazily on their next tool call. 81 | 82 | ## Working directory 83 | 84 | The kernel shares the working directory with the parent process: 85 | 86 | ```python 87 | --8<-- "examples/codexec.py:working_directory" 88 | ``` 89 | -------------------------------------------------------------------------------- /plugin/skills/codeact/references/saving-codeacts.md: -------------------------------------------------------------------------------- 1 | # Saving Code Actions as Reusable Tools 2 | 3 | Save executed Python code as a tool for later reuse. 4 | 5 | ## Package Structure 6 | 7 | ``` 8 | gentools/// 9 | ├── __init__.py # Empty file 10 | ├── api.py # Public interface with structured models 11 | └── impl.py # Implementation details 12 | ``` 13 | 14 | ## Procedure 15 | 16 | ### 1. Create Package Directory 17 | 18 | ```bash 19 | mkdir -p gentools// 20 | ``` 21 | 22 | Create empty `__init__.py` files in both `` and `` directories. 23 | 24 | ### 2. Define Tool API (`api.py`) 25 | 26 | ```python 27 | from __future__ import annotations 28 | 29 | from pydantic import BaseModel, Field 30 | 31 | 32 | class OutputModel(BaseModel): 33 | """Description of output.""" 34 | field: type = Field(..., title="Description") 35 | 36 | 37 | def run(param1: type, param2: type = default) -> OutputModel: 38 | """Tool description. 39 | 40 | Args: 41 | param1: Description 42 | param2: Description (default: value) 43 | 44 | Returns: 45 | OutputModel with structured data 46 | """ 47 | from .impl import implementation_function 48 | return implementation_function(param1, param2) 49 | ``` 50 | 51 | Requirements: 52 | - Define Pydantic models for structured output 53 | - Create `run()` function with typed parameters 54 | - Use lazy import from `impl.py` inside `run()` 55 | - Include comprehensive docstring 56 | - Export `OutputModel` and `run` in `gentools///__init__.py`: 57 | 58 | ```python 59 | from .api import OutputModel, run 60 | 61 | __all__ = ["OutputModel", "run"] 62 | ``` 63 | 64 | ### 3. Implement Details (`impl.py`) 65 | 66 | ```python 67 | from __future__ import annotations 68 | 69 | from mcptools.. import Params, run_parsed 70 | from .api import OutputModel 71 | 72 | 73 | def implementation_function(param1: type, param2: type) -> OutputModel: 74 | """Implementation description.""" 75 | # Use tools from mcptools or gentools packages 76 | result = run_parsed(Params(...)) 77 | 78 | # Transform and return structured output 79 | return OutputModel(field=result.data) 80 | ``` 81 | 82 | Requirements: 83 | - Import tools from `mcptools` or `gentools` packages 84 | - Import models from `api.py` 85 | - Return structured models defined in `api.py` 86 | 87 | ### 4. Test the Tool 88 | 89 | ```python 90 | from gentools...api import run 91 | 92 | result = run(param1=value1, param2=value2) 93 | print(result) 94 | ``` 95 | 96 | ## Best Practices 97 | 98 | - **Separation**: Keep API clean; hide complexity in implementation 99 | - **Type Safety**: Use Pydantic models for all outputs 100 | - **Modularity**: Break complex logic into smaller functions 101 | - **Defaults**: Provide sensible defaults for optional parameters 102 | -------------------------------------------------------------------------------- /.github/workflows/test_package.yml: -------------------------------------------------------------------------------- 1 | name: Package Installation Tests 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | package-build: 11 | runs-on: ubuntu-latest 12 | 13 | steps: 14 | - uses: actions/checkout@v4 15 | with: 16 | fetch-depth: 0 17 | fetch-tags: true 18 | 19 | - name: Install uv 20 | uses: astral-sh/setup-uv@v6 21 | with: 22 | version: ${{ vars.CI_UV_VERSION }} 23 | enable-cache: true 24 | 25 | - name: Install Python 26 | run: uv python install 27 | 28 | - name: Build package 29 | run: | 30 | uv build 31 | 32 | - name: Upload dist artifacts 33 | uses: actions/upload-artifact@v4 34 | with: 35 | name: dist 36 | path: dist/ 37 | retention-days: 1 38 | 39 | package-test: 40 | needs: package-build 41 | 42 | strategy: 43 | fail-fast: false 44 | matrix: 45 | os: [ubuntu-latest, windows-latest, macos-latest] 46 | python-version: ['3.11', '3.13'] 47 | 48 | runs-on: ${{ matrix.os }} 49 | 50 | steps: 51 | - uses: actions/checkout@v4 52 | 53 | - name: Set up Python ${{ matrix.python-version }} 54 | uses: actions/setup-python@v5 55 | with: 56 | python-version: ${{ matrix.python-version }} 57 | 58 | - name: Download package 59 | uses: actions/download-artifact@v4 60 | with: 61 | name: dist 62 | path: dist/ 63 | 64 | - name: Test wheel installation (Windows) 65 | if: runner.os == 'Windows' 66 | shell: pwsh 67 | run: | 68 | $wheel = Get-ChildItem dist/*.whl | Select-Object -First 1 69 | pip install $wheel 70 | python -c "import ipybox" 71 | if ($LASTEXITCODE -ne 0) { exit $LASTEXITCODE } 72 | pip uninstall -y ipybox 73 | 74 | - name: Test wheel installation (Unix) 75 | if: runner.os != 'Windows' 76 | run: | 77 | pip install dist/*.whl 78 | python -c "import ipybox" 79 | pip uninstall -y ipybox 80 | 81 | - name: Test tarball installation (Windows) 82 | if: runner.os == 'Windows' 83 | shell: pwsh 84 | run: | 85 | $tarball = Get-ChildItem dist/*.tar.gz | Select-Object -First 1 86 | pip install $tarball 87 | python -c "import ipybox" 88 | if ($LASTEXITCODE -ne 0) { exit $LASTEXITCODE } 89 | pip uninstall -y ipybox 90 | 91 | - name: Test tarball installation (Unix) 92 | if: runner.os != 'Windows' 93 | run: | 94 | pip install dist/*.tar.gz 95 | python -c "import ipybox" 96 | pip uninstall -y ipybox 97 | 98 | - name: Run smoke test (Linux) 99 | if: runner.os == 'Linux' 100 | run: | 101 | pip install dist/*.whl 102 | pip install pytest pytest-asyncio 103 | pytest tests/unit/test_replace_variables.py 104 | pip uninstall -y ipybox 105 | -------------------------------------------------------------------------------- /tasks.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import urllib.request 4 | from sys import platform 5 | 6 | import jsonschema 7 | from invoke import task 8 | 9 | 10 | @task 11 | def precommit_install(c): 12 | """Install pre-commit hooks.""" 13 | c.run("pre-commit install") 14 | 15 | 16 | @task(aliases=["cc"]) 17 | def code_check(c): 18 | """Run coding conventions checks.""" 19 | c.run("pre-commit run --all-files") 20 | 21 | 22 | @task 23 | def test(c, cov=False): 24 | _run_pytest(c, "tests", cov) 25 | 26 | 27 | @task(aliases=["ut"]) 28 | def unit_test(c, cov=False): 29 | _run_pytest(c, "tests/unit", cov) 30 | 31 | 32 | @task(aliases=["it"]) 33 | def integration_test(c, cov=False): 34 | _run_pytest(c, "tests/integration", cov) 35 | 36 | 37 | def _run_pytest(c, test_dir, cov=False): 38 | c.run(f"pytest -xsv {test_dir} {_pytest_cov_options(cov)}", pty=_use_pty()) 39 | 40 | 41 | def _use_pty(): 42 | return platform != "win32" 43 | 44 | 45 | def _pytest_cov_options(use_cov: bool): 46 | if not use_cov: 47 | return "" 48 | return "--cov=ipybox --cov-report=term" 49 | 50 | 51 | @task 52 | def build_docs(c): 53 | """Build documentation with MkDocs.""" 54 | c.run("mkdocs build") 55 | 56 | 57 | @task 58 | def serve_docs(c): 59 | """Serve documentation locally with MkDocs.""" 60 | c.run("mkdocs serve -a 0.0.0.0:8000") 61 | 62 | 63 | @task 64 | def deploy_docs(c): 65 | """Deploy documentation to GitHub Pages.""" 66 | c.run("mkdocs gh-deploy --force") 67 | 68 | 69 | @task 70 | def latest_tag(c): 71 | """Get the latest git tag.""" 72 | result = c.run("git describe --tags --abbrev=0", hide=True, warn=True) 73 | if result.ok: 74 | return result.stdout.strip() 75 | return "0.0.0-dev" 76 | 77 | 78 | @task() 79 | def mcp_sync(c): 80 | """Update server.json with version from environment or git tag.""" 81 | version = os.environ.get("VERSION") or latest_tag(c) 82 | 83 | with open("server.json", "r") as f: 84 | data = json.load(f) 85 | 86 | data["version"] = version 87 | if "packages" in data: 88 | for package in data["packages"]: 89 | package["version"] = version 90 | 91 | with open("server.json", "w") as f: 92 | json.dump(data, f, indent=2) 93 | f.write("\n") 94 | 95 | 96 | @task(aliases=["mcp-val"]) 97 | def mcp_validate(c, schema_path=".mcpregistry_schema.json"): 98 | """Validate server.json against MCP schema.""" 99 | schema_url = "https://static.modelcontextprotocol.io/schemas/2025-10-17/server.schema.json" 100 | 101 | if not os.path.exists(schema_path): 102 | with urllib.request.urlopen(schema_url) as response: 103 | schema_content = response.read().decode("utf-8") 104 | with open(schema_path, "w") as f: 105 | f.write(schema_content) 106 | 107 | with open("server.json", "r") as f: 108 | server_data = json.load(f) 109 | 110 | with open(schema_path, "r") as f: 111 | schema = json.load(f) 112 | 113 | jsonschema.validate(server_data, schema) 114 | -------------------------------------------------------------------------------- /examples/sandbox.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from pathlib import Path 3 | 4 | # --8<-- [start:imports] 5 | from ipybox import CodeExecutionError, CodeExecutor, generate_mcp_sources 6 | 7 | # --8<-- [end:imports] 8 | 9 | 10 | async def default_sandbox(): 11 | # --8<-- [start:default_sandbox] 12 | async with CodeExecutor(sandbox=True) as executor: 13 | result = await executor.execute("print('hello world')") 14 | assert result.text == "hello world" 15 | 16 | code = """ 17 | import requests 18 | try: 19 | requests.get('https://example.org') 20 | except Exception as e: 21 | print(e) 22 | """ 23 | 24 | # Default sandbox config blocks internet access 25 | result = await executor.execute(code) 26 | assert "Failed to resolve 'example.org'" in result.text 27 | # --8<-- [end:default_sandbox] 28 | 29 | 30 | async def custom_sandbox(): 31 | # --8<-- [start:custom_sandbox] 32 | code = """ 33 | import requests 34 | result = requests.get('https://example.org') 35 | print(result.text) 36 | """ 37 | async with CodeExecutor( 38 | sandbox=True, 39 | sandbox_config=Path("examples/sandbox-kernel.json"), 40 | log_level="WARNING", 41 | ) as executor: 42 | result = await executor.execute(code) 43 | assert "Example Domain" in result.text 44 | # --8<-- [end:custom_sandbox] 45 | 46 | 47 | async def sandboxed_mcp_server(): 48 | # --8<-- [start:sandboxed_mcp_server_params] 49 | server_params = { 50 | "command": "srt", 51 | "args": [ 52 | "--settings", 53 | "examples/sandbox-mcp.json", 54 | "npx", 55 | "-y", 56 | "@modelcontextprotocol/server-filesystem", 57 | ".", 58 | ], 59 | } 60 | # --8<-- [end:sandboxed_mcp_server_params] 61 | 62 | # --8<-- [start:sandboxed_mcp_server_usage] 63 | await generate_mcp_sources("filesystem", server_params, Path("mcptools")) 64 | 65 | list_dir_code = """ 66 | from mcptools.filesystem.list_directory import run, Params 67 | result = run(Params(path=".")) 68 | print(result.content) 69 | """ 70 | 71 | read_env_code = """ 72 | from mcptools.filesystem.read_file import run, Params 73 | result = run(Params(path=".env")) 74 | print(result.content) 75 | """ 76 | 77 | async with CodeExecutor(sandbox=True) as executor: 78 | # allowed by MCP server and sandbox 79 | result = await executor.execute(list_dir_code) 80 | assert "README.md" in result.text 81 | 82 | try: 83 | # allowed by MCP server but blocked by sandbox 84 | result = await executor.execute(read_env_code) 85 | assert False, "Read access to .env not blocked" 86 | except CodeExecutionError as e: 87 | assert "operation not permitted" in str(e) 88 | # --8<-- [end:sandboxed_mcp_server_usage] 89 | 90 | 91 | async def main(): 92 | await default_sandbox() 93 | await custom_sandbox() 94 | await sandboxed_mcp_server() 95 | 96 | 97 | if __name__ == "__main__": 98 | asyncio.run(main()) 99 | -------------------------------------------------------------------------------- /.github/workflows/publish_mcp.yml: -------------------------------------------------------------------------------- 1 | name: Publish to MCP Registry 2 | 3 | on: 4 | push: 5 | tags: 6 | - '[0-9]+.[0-9]+.[0-9]+*' 7 | 8 | permissions: 9 | id-token: write 10 | contents: read 11 | 12 | jobs: 13 | publish-mcp: 14 | runs-on: ubuntu-latest 15 | needs: [] # Run independently from other release jobs 16 | 17 | steps: 18 | - name: Checkout code 19 | uses: actions/checkout@v4 20 | with: 21 | fetch-depth: 0 22 | fetch-tags: true 23 | 24 | - name: Verify current tag is on main branch 25 | run: | 26 | # Exit with error if current tag is not on main 27 | git merge-base --is-ancestor ${{ github.sha }} origin/main || exit 1 28 | 29 | - name: Install uv 30 | uses: astral-sh/setup-uv@v6 31 | with: 32 | version: ${{ vars.CI_UV_VERSION }} 33 | enable-cache: true 34 | 35 | - name: Install Python 36 | run: uv python install 37 | 38 | - name: Build package 39 | run: | 40 | uv build 41 | ls -la dist/ 42 | 43 | - name: Wait for PyPI package availability 44 | run: | 45 | # Extract version from tag 46 | VERSION=${GITHUB_REF#refs/tags/} 47 | echo "Waiting for ipybox==$VERSION to be available on PyPI..." 48 | 49 | # Wait up to 10 minutes for the package to be available 50 | for i in {1..60}; do 51 | if pip index versions ipybox 2>/dev/null | grep -q "$VERSION"; then 52 | echo "Package ipybox==$VERSION is now available on PyPI" 53 | break 54 | fi 55 | echo "Attempt $i/60: Package not yet available, waiting 10 seconds..." 56 | sleep 10 57 | done 58 | 59 | # Final check 60 | if ! pip index versions ipybox 2>/dev/null | grep -q "$VERSION"; then 61 | echo "ERROR: Package ipybox==$VERSION not found on PyPI after 10 minutes" 62 | exit 1 63 | fi 64 | 65 | - name: Install MCP Publisher 66 | run: | 67 | curl -L "https://github.com/modelcontextprotocol/registry/releases/latest/download/mcp-publisher_$(uname -s | tr '[:upper:]' '[:lower:]')_$(uname -m | sed 's/x86_64/amd64/;s/aarch64/arm64/').tar.gz" | tar xz mcp-publisher && sudo mv mcp-publisher /usr/local/bin/ 68 | 69 | - name: Update server.json version 70 | run: | 71 | # Extract version from tag 72 | VERSION=${GITHUB_REF#refs/tags/} 73 | echo "Updating server.json to version $VERSION" 74 | 75 | # Update version using invoke task 76 | VERSION=$VERSION uv run invoke mcp-sync 77 | 78 | # Show the updated server.json 79 | cat server.json 80 | 81 | - name: Validate server.json 82 | run: | 83 | # Validate using invoke task 84 | uv run invoke mcp-val 85 | 86 | - name: Login to MCP Registry 87 | run: | 88 | # Use GitHub OIDC for authentication 89 | mcp-publisher login github-oidc 90 | 91 | - name: Publish to MCP Registry 92 | run: | 93 | mcp-publisher publish 94 | 95 | - name: Verify publication 96 | run: | 97 | echo "Server published successfully to MCP Registry" 98 | echo "Visit https://modelcontextprotocol.io/servers to verify the publication" 99 | -------------------------------------------------------------------------------- /tests/integration/mcp_server.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from contextlib import asynccontextmanager 3 | from pathlib import Path 4 | from typing import AsyncIterator 5 | 6 | from mcp.server.fastmcp import FastMCP 7 | from pydantic import BaseModel, Field 8 | 9 | STDIO_SERVER_PATH = Path(__file__) 10 | HTTP_SERVER_PORT = 8710 11 | SSE_SERVER_PORT = 8711 12 | 13 | 14 | class InnerResult(BaseModel): 15 | """Inner nested result structure.""" 16 | 17 | code: int = Field(description="Status code") 18 | details: str = Field(description="Detailed information") 19 | 20 | 21 | class OuterResult(BaseModel): 22 | """Outer result structure containing nested data.""" 23 | 24 | status: str = Field(description="Overall status of the operation") 25 | inner: InnerResult = Field(description="Nested result data") 26 | count: int = Field(description="Number of items processed") 27 | 28 | 29 | async def tool_1(s: str) -> str: 30 | """ 31 | This is tool 1. 32 | 33 | Args: 34 | s: A string 35 | """ 36 | return f"You passed to tool 1: {s}" 37 | 38 | 39 | async def tool_2(s: str) -> str: 40 | """ 41 | This is tool 2. 42 | """ 43 | return f"You passed to tool 2: {s}" 44 | 45 | 46 | async def tool_3(name: str, level: int) -> OuterResult: 47 | """ 48 | This is tool 3 with nested structured output. 49 | 50 | Args: 51 | name: A name to process 52 | level: Processing level 53 | """ 54 | return OuterResult( 55 | status=f"completed_{name}", 56 | inner=InnerResult( 57 | code=level * 100, 58 | details=f"Processing {name} at level {level}", 59 | ), 60 | count=len(name), 61 | ) 62 | 63 | 64 | def create_server(**kwargs) -> FastMCP: 65 | server = FastMCP("Test MCP Server", log_level="ERROR", **kwargs) 66 | server.add_tool(tool_1, structured_output=False, name="tool-1") 67 | server.add_tool(tool_2, structured_output=False) 68 | server.add_tool(tool_3) 69 | return server 70 | 71 | 72 | @asynccontextmanager 73 | async def streamable_http_server( 74 | host: str = "0.0.0.0", 75 | port: int = 8710, 76 | json_response: bool = True, 77 | ) -> AsyncIterator[FastMCP]: 78 | server = create_server(host=host, port=port, json_response=json_response) 79 | async with _server(server.streamable_http_app(), server.settings): 80 | yield server 81 | 82 | 83 | @asynccontextmanager 84 | async def sse_server( 85 | host: str = "0.0.0.0", 86 | port: int = 8711, 87 | ) -> AsyncIterator[FastMCP]: 88 | server = create_server(host=host, port=port) 89 | async with _server(server.sse_app(), server.settings): 90 | yield server 91 | 92 | 93 | @asynccontextmanager 94 | async def _server(app, settings): 95 | import uvicorn 96 | 97 | cfg = uvicorn.Config( 98 | app, 99 | host=settings.host, 100 | port=settings.port, 101 | log_level=settings.log_level.lower(), 102 | ) 103 | server = uvicorn.Server(cfg) 104 | task = asyncio.create_task(server.serve()) 105 | while not server.started: 106 | await asyncio.sleep(0.01) 107 | 108 | yield 109 | 110 | server.should_exit = True 111 | await task 112 | 113 | 114 | def main(): 115 | server = create_server() 116 | 117 | try: 118 | server.run(transport="stdio") 119 | except KeyboardInterrupt: 120 | pass 121 | 122 | 123 | if __name__ == "__main__": 124 | main() 125 | -------------------------------------------------------------------------------- /docs/generated/mcptools/github/search_repositories.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from enum import Enum 4 | from typing import Optional 5 | 6 | from pydantic import BaseModel, ConfigDict, Field, confloat 7 | 8 | from . import CLIENT 9 | 10 | 11 | class Order(Enum): 12 | asc = "asc" 13 | desc = "desc" 14 | 15 | 16 | class Sort(Enum): 17 | stars = "stars" 18 | forks = "forks" 19 | help_wanted_issues = "help-wanted-issues" 20 | updated = "updated" 21 | 22 | 23 | class Params(BaseModel): 24 | model_config = ConfigDict( 25 | use_enum_values=True, 26 | ) 27 | minimal_output: Optional[bool] = True 28 | """ 29 | Return minimal repository information (default: true). When false, returns full GitHub API repository objects. 30 | """ 31 | order: Optional[Order] = None 32 | """ 33 | Sort order 34 | """ 35 | page: Optional[confloat(ge=1.0)] = None 36 | """ 37 | Page number for pagination (min 1) 38 | """ 39 | perPage: Optional[confloat(ge=1.0, le=100.0)] = None 40 | """ 41 | Results per page for pagination (min 1, max 100) 42 | """ 43 | query: str 44 | """ 45 | Repository search query. Examples: 'machine learning in:name stars:>1000 language:python', 'topic:react', 'user:facebook'. Supports advanced search syntax for precise filtering. 46 | """ 47 | sort: Optional[Sort] = None 48 | """ 49 | Sort repositories by field, defaults to best match 50 | """ 51 | 52 | 53 | def run(params: Params) -> str: 54 | """Find GitHub repositories by name, description, readme, topics, or other metadata. Perfect for discovering projects, finding examples, or locating specific repositories across GitHub.""" 55 | return CLIENT.run_sync(tool_name="search_repositories", tool_args=params.model_dump(exclude_none=True)) 56 | 57 | 58 | class Repository(BaseModel): 59 | """A GitHub repository from search results.""" 60 | 61 | model_config = ConfigDict( 62 | use_enum_values=True, 63 | ) 64 | id: int = Field(..., title="Repository ID") 65 | name: str = Field(..., title="Repository Name") 66 | full_name: str = Field(..., title="Full Name") 67 | description: str | None = Field(None, title="Description") 68 | html_url: str = Field(..., title="HTML URL") 69 | language: str | None = Field(None, title="Primary Language") 70 | stargazers_count: int = Field(..., title="Stars") 71 | forks_count: int = Field(..., title="Forks") 72 | open_issues_count: int = Field(..., title="Open Issues") 73 | updated_at: str = Field(..., title="Last Updated") 74 | created_at: str = Field(..., title="Created At") 75 | private: bool = Field(..., title="Is Private") 76 | fork: bool = Field(..., title="Is Fork") 77 | archived: bool = Field(..., title="Is Archived") 78 | default_branch: str = Field(..., title="Default Branch") 79 | 80 | 81 | class ParseResult(BaseModel): 82 | """Parsed result containing structured repository search data.""" 83 | 84 | model_config = ConfigDict( 85 | use_enum_values=True, 86 | ) 87 | total_count: int = Field(..., title="Total Count") 88 | incomplete_results: bool = Field(..., title="Incomplete Results") 89 | repositories: list[Repository] = Field(..., title="Repositories") 90 | 91 | 92 | def run_parsed(params: Params) -> ParseResult: 93 | """Run tool and return parsed structured data. 94 | 95 | Args: 96 | params: Tool parameters 97 | 98 | Returns: 99 | ParseResult with structured repository search data 100 | """ 101 | from mcpparse.github.search_repositories import parse 102 | 103 | result = run(params) 104 | return parse(result) 105 | -------------------------------------------------------------------------------- /docs/quickstart.md: -------------------------------------------------------------------------------- 1 | # Quickstart 2 | 3 | This guide walks through a complete example: generating a Python tool API for the [Brave Search MCP server](https://github.com/brave/brave-search-mcp-server), executing code that calls it, and handling tool call approvals. 4 | 5 | ## Installation 6 | 7 | ```bash 8 | pip install ipybox 9 | ``` 10 | 11 | ## Get a Brave API key 12 | 13 | Sign up for a free API key at [api.search.brave.com](https://api.search.brave.com). Once you have your key, set it as an environment variable: 14 | 15 | ```bash 16 | export BRAVE_API_KEY=your_api_key_here 17 | ``` 18 | 19 | Or create a `.env` file in your project root (ipybox loads it automatically): 20 | 21 | ```env 22 | BRAVE_API_KEY=your_api_key_here 23 | ``` 24 | 25 | ## Complete example 26 | 27 | ```python 28 | --8<-- "examples/quickstart.py" 29 | ``` 30 | 31 | ## How it works 32 | 33 | ### Server parameters 34 | 35 | The `server_params` dict defines how to connect to an MCP server. For stdio servers (local processes), you specify: 36 | 37 | - `command`: The executable to run 38 | - `args`: Command-line arguments 39 | - `env`: Environment variables to pass 40 | 41 | ```python 42 | SERVER_PARAMS = { 43 | "command": "npx", 44 | "args": ["-y", "@brave/brave-search-mcp-server", "--transport", "stdio"], 45 | "env": {"BRAVE_API_KEY": "${BRAVE_API_KEY}"}, 46 | } 47 | ``` 48 | 49 | The `${BRAVE_API_KEY}` placeholder is replaced with the actual value from your environment when ipybox starts the MCP server. 50 | 51 | ### Generating a Python tool API 52 | 53 | [`generate_mcp_sources()`][ipybox.generate_mcp_sources] connects to the MCP server, discovers its tools, and generates a typed Python API from their schema: 54 | 55 | ```python 56 | await generate_mcp_sources( 57 | server_name="brave_search", 58 | server_params=SERVER_PARAMS, 59 | root_dir=Path("mcptools"), 60 | ) 61 | ``` 62 | 63 | This creates an `mcptools/brave_search` package with a Python module for each MCP server tool: 64 | 65 | ``` 66 | mcptools/brave_search/ 67 | ├── __init__.py 68 | ├── brave_web_search.py 69 | ├── brave_local_search.py 70 | ├── brave_image_search.py 71 | └── ... 72 | ``` 73 | 74 | Each module contains a Pydantic `Params` class for input validation, a `Result` class or `str` return type, and a `run()` function that executes the MCP tool. 75 | 76 | ### Code execution 77 | 78 | [`CodeExecutor`][ipybox.CodeExecutor] runs Python code in an IPython kernel. Variables and definitions persist across executions, enabling stateful workflows. 79 | 80 | ```python 81 | async with CodeExecutor() as executor: 82 | async for item in executor.stream(CODE): 83 | ... 84 | ``` 85 | 86 | The `stream()` method yields events as execution progresses. You'll receive [`ApprovalRequest`][ipybox.ApprovalRequest] events when the code calls an MCP tool, and a final [`CodeExecutionResult`][ipybox.CodeExecutionResult] with the output. 87 | 88 | ### Tool call approval 89 | 90 | When an MCP tool is called during code execution, ipybox pauses execution and sends an [`ApprovalRequest`][ipybox.ApprovalRequest] to your application. You must explicitly approve or reject each tool call: 91 | 92 | ```python 93 | case ApprovalRequest() as req: 94 | if user_approves: 95 | await req.accept() 96 | else: 97 | await req.reject() 98 | ``` 99 | 100 | The [`ApprovalRequest`][ipybox.ApprovalRequest] includes the server name, tool name, and arguments, so you can make informed decisions or implement custom approval logic. 101 | 102 | ## Next steps 103 | 104 | - [API Generation](apigen.md) - Generating typed Python APIs from MCP tools 105 | - [Code Execution](codeexec.md) - Running code and handling tool approvals 106 | - [Sandboxing](sandbox.md) - Secure execution with network and filesystem isolation 107 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ipybox 2 | 3 | mcp-name: io.github.gradion-ai/ipybox 4 | 5 |

6 | Website 7 | PyPI - Version 8 | GitHub Release 9 | GitHub Actions Workflow Status 10 | GitHub License 11 |

12 | 13 | > [!NOTE] 14 | > **Next generation ipybox** 15 | > 16 | > This is the next generation of ipybox, a complete rewrite. Older versions are maintained on the [0.6.x branch](https://github.com/gradion-ai/ipybox/tree/0.6.x) and can be obtained with `pip install ipybox<0.7`. 17 | 18 | [ipybox](https://gradion-ai.github.io/ipybox/) is a Python code execution sandbox with first-class support for programmatic MCP tool calling. It generates a typed Python tool API from MCP server tool schemas, supporting both local stdio and remote HTTP servers. Code that calls the generated API executes in a sandboxed IPython kernel, providing a stateful environment where variables and definitions persist across executions. The generated API delegates MCP tool execution to a separate environment that enforces tool call approval, requiring applications to explicitly accept or reject each tool call before it executes. 19 | 20 | ![Architecture](docs/images/architecture-light-annotated.jpg) 21 | 22 | *`CodeExecutor` coordinates sandboxed code execution, tool execution, and tool call approval.* 23 | 24 | ## Agent integration 25 | 26 | ipybox is designed for agents that interact with their environment through [code actions](https://arxiv.org/abs/2402.01030) rather than JSON tool calls, a more reliable approach since LLMs are heavily pretrained on Python code compared to JSON tool call post-training. Agents generate and execute Python code that composes multiple MCP tool calls into a single action, using loops, conditionals, and data transformations that keep intermediate results out of the agent's context window. Since agent-generated code cannot be trusted, it must run in a secure sandboxed environment, and all MCP tool calls must be approved by the application. ipybox supports both with minimal setup. 27 | 28 | ## Features 29 | 30 | - **Stateful code execution** — state persists across executions in IPython kernels 31 | - **Lightweight sandboxing** — kernel isolation via Anthropic's [sandbox-runtime](https://github.com/anthropic-experimental/sandbox-runtime) 32 | - **Generated Python tool API** — functions and models generated from MCP tool schemas 33 | - **Programmatic MCP tool calling** — MCP tools called via Python code, not JSON directly 34 | - **MCP tool call approval** — every MCP tool call requires application-level approval 35 | - **Any MCP server** — supports stdio, Streamable HTTP, and SSE transports 36 | - **Any Python package** — install and use any Python package in IPython kernels 37 | - **Local code execution** — no cloud dependencies, everything runs on your machine 38 | - **Python SDK and MCP server** — use ipybox programmatically or as an MCP server 39 | - **Claude Code plugin** — a plugin for [programmatic tool calling in Claude Code](https://gradion-ai.github.io/ipybox/ccplugin/) 40 | 41 | ## Documentation 42 | 43 | See the [documentation](https://gradion-ai.github.io/ipybox/) for installation instructions, quickstart guide, and detailed usage information. For LLM-friendly documentation, see [llms.txt](https://gradion-ai.github.io/ipybox/llms.txt) and [llms-full.txt](https://gradion-ai.github.io/ipybox/llms-full.txt). 44 | -------------------------------------------------------------------------------- /ipybox/mcp_client.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | from contextlib import AsyncExitStack, asynccontextmanager 4 | from typing import Any, AsyncIterator 5 | 6 | from mcp import ClientSession, StdioServerParameters, Tool 7 | from mcp.client.sse import sse_client 8 | from mcp.client.stdio import stdio_client 9 | from mcp.client.streamable_http import streamablehttp_client 10 | from mcp.types import ContentBlock, TextContent 11 | 12 | from ipybox.vars import replace_variables 13 | 14 | ToolResult = dict[str, Any] | str | None 15 | 16 | 17 | class MCPClient: 18 | def __init__( 19 | self, 20 | server_params: dict[str, Any], 21 | connect_timeout: float = 10, 22 | ): 23 | self.connect_timeout = connect_timeout 24 | self.server_params = replace_variables(server_params, os.environ).replaced 25 | 26 | self._session: ClientSession | None = None 27 | self._exit_stack = AsyncExitStack() 28 | 29 | async def __aenter__(self): 30 | await self.start() 31 | return self 32 | 33 | async def __aexit__(self, exc_type, exc_value, traceback): 34 | await self.stop() 35 | 36 | @property 37 | def session(self) -> ClientSession: 38 | if not self._session: 39 | raise RuntimeError("MCP client not started") 40 | return self._session 41 | 42 | async def start(self): 43 | self._session = await self._exit_stack.enter_async_context(self._mcp_session()) 44 | 45 | async def stop(self): 46 | try: 47 | await self._exit_stack.aclose() 48 | except RuntimeError: 49 | pass 50 | finally: 51 | self._session = None 52 | 53 | async def list_tools(self) -> list[Tool]: 54 | return (await self.session.list_tools()).tools 55 | 56 | async def run(self, tool_name: str, tool_args: dict[str, Any]) -> ToolResult: 57 | result = await self.session.call_tool(tool_name, arguments=tool_args) 58 | 59 | if result.isError: 60 | raise Exception(self._extract_text(result.content)) 61 | 62 | if result.structuredContent: 63 | return result.structuredContent 64 | 65 | if content := result.content: 66 | return self._extract_text(content) or None 67 | 68 | return None 69 | 70 | def _extract_text(self, content: list[ContentBlock]) -> str: 71 | text_elems = [] 72 | for elem in content: 73 | if isinstance(elem, TextContent): 74 | text_elems.append(elem.text) 75 | return "\n".join(text_elems) 76 | 77 | @asynccontextmanager 78 | async def _mcp_session(self) -> AsyncIterator[ClientSession]: 79 | async with self._mcp_client() as (read, write, *_): 80 | async with ClientSession(read, write) as session: 81 | await asyncio.wait_for(session.initialize(), timeout=self.connect_timeout) 82 | yield session 83 | 84 | def _mcp_client(self): 85 | if "command" in self.server_params: 86 | return stdio_client(StdioServerParameters(**self.server_params)) 87 | elif "url" in self.server_params: 88 | url = self.server_params["url"] 89 | kwargs = {k: v for k, v in self.server_params.items() if k not in ["url", "type"]} 90 | 91 | if "/mcp" in url or self.server_params.get("type") == "streamable_http": 92 | return streamablehttp_client(url, **kwargs) 93 | elif "/sse" in url or self.server_params.get("type") == "sse": 94 | return sse_client(url, **kwargs) 95 | else: 96 | raise ValueError( 97 | f"Unable to determine MCP client type from URL: {url}. " 98 | "URL should contain '/mcp' or '/sse', or specify 'type' " 99 | "as 'streamable_http' or 'sse'." 100 | ) 101 | else: 102 | raise ValueError(f'Neither a "command" nor a "url" key in server_params: {self.server_params}') 103 | -------------------------------------------------------------------------------- /ipybox/tool_exec/client.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | import aiohttp 4 | import requests 5 | from pydantic_core import to_jsonable_python 6 | 7 | 8 | class ToolRunnerError(Exception): 9 | """Raised when tool execution fails on the server or when approval is rejected.""" 10 | 11 | 12 | class ToolRunner: 13 | """Client for executing MCP tools on a [`ToolServer`][ipybox.tool_exec.server.ToolServer]. 14 | 15 | Example: 16 | ```python 17 | runner = ToolRunner( 18 | server_name="fetch", 19 | server_params={"command": "uvx", "args": ["mcp-server-fetch"]}, 20 | ) 21 | result = await runner.run("fetch", {"url": "https://example.com"}) 22 | ``` 23 | """ 24 | 25 | def __init__( 26 | self, 27 | server_name: str, 28 | server_params: dict[str, Any], 29 | host: str = "localhost", 30 | port: int = 8900, 31 | ): 32 | """ 33 | Args: 34 | server_name: Name of the MCP server. 35 | server_params: MCP server parameters. 36 | host: Hostname of the `ToolServer`. 37 | port: Port number of the `ToolServer`. 38 | """ 39 | self.server_name = server_name 40 | self.server_params = server_params 41 | 42 | self.host = host 43 | self.port = port 44 | 45 | self.url = f"http://{host}:{port}/run" 46 | 47 | async def reset(self): 48 | """Reset the `ToolServer`, stopping all started MCP servers.""" 49 | await reset(host=self.host, port=self.port) 50 | 51 | async def run(self, tool_name: str, tool_args: dict[str, Any]) -> dict[str, Any] | str | None: 52 | """Execute a tool on the configured MCP server. 53 | 54 | Args: 55 | tool_name: Name of the tool to execute. 56 | tool_args: Arguments to pass to the tool. 57 | 58 | Returns: 59 | The tool execution result. 60 | 61 | Raises: 62 | ToolRunnerError: If tool execution fails or approval is denied. 63 | """ 64 | async with aiohttp.ClientSession() as session: 65 | async with session.post(url=self.url, json=self._create_input_data(tool_name, tool_args)) as response: 66 | response.raise_for_status() 67 | response_json = await response.json() 68 | 69 | if "error" in response_json: 70 | raise ToolRunnerError(response_json["error"]) 71 | 72 | return response_json["result"] 73 | 74 | def run_sync(self, tool_name: str, tool_args: dict[str, Any]) -> dict[str, Any] | str | None: 75 | """Synchronous version of [`run`][ipybox.tool_exec.client.ToolRunner.run]. 76 | 77 | Args: 78 | tool_name: Name of the tool to execute. 79 | tool_args: Arguments to pass to the tool. 80 | 81 | Returns: 82 | The tool execution result. 83 | 84 | Raises: 85 | ToolRunnerError: If tool execution fails or approval is denied. 86 | """ 87 | response = requests.post(url=self.url, json=self._create_input_data(tool_name, tool_args)) 88 | response.raise_for_status() 89 | response_json = response.json() 90 | 91 | if "error" in response_json: 92 | raise ToolRunnerError(response_json["error"]) 93 | 94 | return response_json["result"] 95 | 96 | def _create_input_data(self, tool_name: str, tool_args: dict[str, Any]) -> dict[str, Any]: 97 | return { 98 | "server_name": self.server_name, 99 | "server_params": self.server_params, 100 | "tool_name": tool_name, 101 | "tool_args": to_jsonable_python(tool_args), 102 | } 103 | 104 | 105 | async def reset(host: str = "localhost", port: int = 8900): 106 | """Reset a `ToolServer`, stopping all started MCP servers. 107 | 108 | Args: 109 | host: Hostname of the `ToolServer`. 110 | port: Port number of the `ToolServer`. 111 | """ 112 | async with aiohttp.ClientSession() as session: 113 | async with session.put(url=f"http://{host}:{port}/reset") as response: 114 | response.raise_for_status() 115 | -------------------------------------------------------------------------------- /plugin/skills/codeact/references/output-parsers.md: -------------------------------------------------------------------------------- 1 | # Output Parsers for mcptools 2 | 3 | Generate output parsers for Python tools in the `mcptools` package that have unstructured return types. 4 | 5 | ## Identifying Unstructured Return Types 6 | 7 | A tool has an unstructured return type when its `run()` function returns: 8 | - A `str` directly 9 | - A `Result` model with a single `str` field (named `result`, `content`, `output`, etc.) plus only `model_config` 10 | 11 | A `Result` model with multiple fields (beyond `model_config`) has a structured return type and does not need a parser. 12 | 13 | ## Workflow 14 | 15 | ### 1. Test the Python tool 16 | 17 | Run the Python tool with `execute_ipython_cell` tool of the `ipybox` MCP server using 2-3 example inputs to observe return value patterns: 18 | 19 | ```python 20 | from mcptools.. import run, Params 21 | 22 | result = run(Params(...)) 23 | print(result) # or print(result.result) for Result types 24 | ``` 25 | 26 | ### 2. Identify structure 27 | 28 | Examine the output for parseable structure (JSON, JSONL, XML, delimited text, etc.). If no consistent structure exists, a parser cannot be generated. 29 | 30 | ### 3. Extend the Python tool module 31 | 32 | Preservation rules when extending tool modules: 33 | - Never modify existing `Params` class or other existing model definitions 34 | - Never remove or modify existing imports (they may be used by existing code) 35 | - Only add new imports, models, and functions 36 | 37 | Add to `mcptools//.py`: 38 | 39 | 1. A `ParseResult` model: 40 | 41 | ```python 42 | class ParseResult(BaseModel): 43 | """Parsed result containing structured data.""" 44 | 45 | model_config = ConfigDict( 46 | use_enum_values=True, 47 | ) 48 | : = Field(..., title="") 49 | ``` 50 | 51 | 2. A `run_parsed()` function: 52 | 53 | ```python 54 | def run_parsed(params: Params) -> ParseResult: 55 | """Run tool and return parsed structured data. 56 | 57 | Args: 58 | params: Tool parameters 59 | 60 | Returns: 61 | ParseResult with structured data 62 | """ 63 | from mcpparse.<category>.<tool> import parse 64 | 65 | result = run(params) 66 | # For str return: return parse(result) 67 | # For Result return: return parse(result.result) 68 | return parse(result) 69 | ``` 70 | 71 | ### 4. Create parser module 72 | 73 | Create `mcpparse/<category>/<tool>.py` with: 74 | 75 | ```python 76 | from mcptools.<category>.<tool> import ParseResult 77 | 78 | 79 | class <Tool>ParseError(Exception): 80 | """Exception raised when parsing <tool> results fails.""" 81 | pass 82 | 83 | 84 | def parse(result: str) -> ParseResult: 85 | """Parse <tool> result into structured data. 86 | 87 | Args: 88 | result: Raw string result from the tool 89 | 90 | Returns: 91 | ParseResult with structured data 92 | 93 | Raises: 94 | <Tool>ParseError: If parsing fails 95 | """ 96 | # Implementation based on observed output structure 97 | ... 98 | return ParseResult(...) 99 | ``` 100 | 101 | ### 5. Test run_parsed() 102 | 103 | Call the `reset` tool of the `ipybox` MCP server to restart the IPython kernel so the next import loads the modified module. 104 | 105 | Then test with `execute_ipython_cell` using the same example inputs from step 1: 106 | 107 | ```python 108 | from mcptools.<category>.<tool> import run_parsed, Params 109 | 110 | result = run_parsed(Params(...)) 111 | print(result) 112 | ``` 113 | 114 | Verify that the `ParseResult` fields are correctly populated. 115 | 116 | ## Examples 117 | 118 | ### str return type (brave_web_search) 119 | 120 | **Original:** `run(params: Params) -> str` returns JSONL 121 | 122 | **Extended with:** 123 | - `SearchResult` model for individual items 124 | - `ParseResult` with `results: list[SearchResult]` 125 | - `run_parsed()` that parses JSONL into structured objects 126 | 127 | ### Result return type (search_abstracts) 128 | 129 | **Original:** `run(params: Params) -> Result` where `Result.result: str` 130 | 131 | **Extended with:** 132 | - `Article` model for individual items 133 | - `ParseResult` with `articles: list[Article]` 134 | - `run_parsed()` that parses `result.result` into structured objects 135 | -------------------------------------------------------------------------------- /examples/codexec.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import tempfile 3 | from pathlib import Path 4 | 5 | # --8<-- [start:imports] 6 | from ipybox import ( 7 | ApprovalRequest, 8 | CodeExecutionChunk, 9 | CodeExecutionResult, 10 | CodeExecutor, 11 | ) 12 | 13 | # --8<-- [end:imports] 14 | 15 | 16 | async def basic_execution(): 17 | # --8<-- [start:basic_execution] 18 | async with CodeExecutor() as executor: 19 | result = await executor.execute("print('hello world')") 20 | assert result.text == "hello world" 21 | # --8<-- [end:basic_execution] 22 | 23 | 24 | async def basic_approval(): 25 | # --8<-- [start:basic_approval] 26 | code = """ 27 | from mcptools.brave_search.brave_image_search import Params, Result, run 28 | 29 | result: Result = run(Params(query="neural topic models", count=3)) 30 | print(f"num results = {len(result.items)}") 31 | """ 32 | async with CodeExecutor() as executor: 33 | async for item in executor.stream(code): 34 | match item: 35 | case ApprovalRequest(): 36 | assert item.tool_name == "brave_image_search" 37 | assert item.tool_args["query"] == "neural topic models" 38 | assert item.tool_args["count"] == 3 39 | await item.accept() 40 | case CodeExecutionResult(): 41 | assert item.text == "num results = 3" 42 | # --8<-- [end:basic_approval] 43 | 44 | 45 | async def basic_chunks(): 46 | # --8<-- [start:basic_chunks] 47 | code = """ 48 | from time import sleep 49 | print("chunk 1") 50 | sleep(0.5) 51 | print("chunk 2") 52 | """ 53 | async with CodeExecutor() as executor: 54 | async for item in executor.stream(code, chunks=True): 55 | match item: 56 | case CodeExecutionChunk(): 57 | assert item.text.strip() in ["chunk 1", "chunk 2"] 58 | case CodeExecutionResult(): 59 | assert item.text == "chunk 1\nchunk 2" 60 | # --8<-- [end:basic_chunks] 61 | 62 | 63 | async def basic_plotting(): 64 | # --8<-- [start:basic_plotting] 65 | code = """ 66 | import matplotlib.pyplot as plt 67 | plt.plot([1, 2, 3], [1, 4, 9]) 68 | plt.show() 69 | """ 70 | with tempfile.TemporaryDirectory() as images_dir: 71 | async with CodeExecutor(images_dir=Path(images_dir)) as executor: 72 | result = await executor.execute(code) 73 | assert len(result.images) == 1 74 | assert result.images[0].exists() 75 | assert result.images[0].suffix == ".png" 76 | # --8<-- [end:basic_plotting] 77 | 78 | 79 | async def custom_timeouts(): 80 | # --8<-- [start:custom_timeouts] 81 | # set custom approval timeout, default is 60 seconds 82 | async with CodeExecutor(approval_timeout=10) as executor: 83 | # set custom execution timeout, default is 120 seconds 84 | async for item in executor.stream("...", timeout=10): 85 | ... 86 | # --8<-- [end:custom_timeouts] 87 | 88 | 89 | async def kernel_environment(): 90 | # --8<-- [start:kernel_environment] 91 | # IPython kernel does not inherit environment variables from parent process 92 | # Kernel environment must be explicitly set using the kernel_env parameter 93 | async with CodeExecutor(kernel_env={"TEST_VAR": "test_val"}) as executor: 94 | result = await executor.execute("import os; print(os.environ['TEST_VAR'])") 95 | assert result.text == "test_val" 96 | # --8<-- [end:kernel_environment] 97 | 98 | 99 | async def kernel_reset(): 100 | # --8<-- [start:kernel_reset] 101 | async with CodeExecutor() as executor: 102 | await executor.execute("x = 42") 103 | result = await executor.execute("print(x)") 104 | assert result.text == "42" 105 | 106 | await executor.reset() 107 | 108 | code = """ 109 | try: 110 | print(x) 111 | except NameError: 112 | print("x not defined") 113 | """ 114 | result = await executor.execute(code) 115 | assert result.text == "x not defined" 116 | # --8<-- [end:kernel_reset] 117 | 118 | 119 | async def working_directory(): 120 | # --8<-- [start:working_directory] 121 | async with CodeExecutor() as executor: 122 | import os 123 | 124 | result = await executor.execute("import os; print(os.getcwd())") 125 | assert result.text == os.getcwd() 126 | # --8<-- [end:working_directory] 127 | 128 | 129 | async def main(): 130 | await basic_execution() 131 | await basic_approval() 132 | await basic_chunks() 133 | await basic_plotting() 134 | await custom_timeouts() 135 | await kernel_environment() 136 | await kernel_reset() 137 | await working_directory() 138 | 139 | 140 | if __name__ == "__main__": 141 | asyncio.run(main()) 142 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: ipybox 2 | site_description: ipybox 3 | site_url: https://gradion-ai.github.io/ipybox/ 4 | 5 | repo_name: gradion-ai/ipybox 6 | repo_url: https://github.com/gradion-ai/ipybox 7 | 8 | copyright: Copyright © 2025 Gradion AI 9 | 10 | theme: 11 | name: material 12 | font: 13 | text: Fira 14 | code: Fira Code 15 | palette: 16 | - media: "(prefers-color-scheme)" 17 | scheme: default 18 | primary: indigo 19 | accent: indigo 20 | toggle: 21 | icon: material/link 22 | name: Switch to light mode 23 | - media: "(prefers-color-scheme: light)" 24 | scheme: default 25 | primary: indigo 26 | accent: indigo 27 | toggle: 28 | icon: material/toggle-switch 29 | name: Switch to dark mode 30 | - media: "(prefers-color-scheme: dark)" 31 | scheme: slate 32 | primary: black 33 | accent: indigo 34 | toggle: 35 | icon: material/toggle-switch-off 36 | name: Switch to system preference 37 | features: 38 | - navigation.tabs 39 | - navigation.sections 40 | - navigation.instant 41 | - navigation.instant.prefetch 42 | - navigation.instant.preview 43 | - navigation.instant.progress 44 | - navigation.top 45 | - navigation.tracking 46 | - search.suggest 47 | - toc.follow 48 | - content.code.copy 49 | - content.code.select 50 | - content.code.annotate 51 | 52 | plugins: 53 | - search 54 | - llmstxt: 55 | markdown_description: | 56 | ipybox is a Python code execution sandbox with first-class support for programmatic MCP tool calling. 57 | It generates a typed Python tool API from MCP server tool schemas, supporting both local stdio and 58 | remote HTTP servers. Code that calls the generated API executes in a sandboxed IPython kernel, 59 | providing a stateful environment where variables and definitions persist across executions. The 60 | generated API delegates MCP tool execution to a separate environment that enforces tool call 61 | approval, requiring applications to explicitly accept or reject each tool call before it executes. 62 | ipybox is designed for agents that interact with their environment through code actions rather 63 | than JSON tool calls, a more reliable approach since LLMs are heavily pretrained on Python code. 64 | full_output: llms-full.txt 65 | sections: 66 | User Guide: 67 | - index.md: Project overview and features 68 | - installation.md: Installation and setup instructions 69 | - quickstart.md: Getting started guide 70 | - apigen.md: Generating typed Python API from MCP server schemas 71 | - codeexec.md: Stateful code execution in IPython kernels 72 | - sandbox.md: Lightweight sandboxing with Anthropic's sandbox-runtime 73 | - mcpserver.md: Using ipybox as an MCP server 74 | - ccplugin.md: a plugin for programmatic tool calling in Claude Code 75 | API Reference: 76 | - api/code_executor.md: CodeExecutor high-level API 77 | - api/tool_executor.md: ToolRunner and ToolServer low-level API 78 | - api/kernel_manager.md: KernelGateway and KernelClient low-level API 79 | - mkdocstrings: 80 | default_handler: python 81 | handlers: 82 | python: 83 | paths: [ipybox] 84 | options: 85 | show_root_heading: true 86 | show_source: false 87 | show_signature_annotations: true 88 | signature_crossrefs: true 89 | separate_signature: true 90 | merge_init_into_class: true 91 | extensions: 92 | - griffe_pydantic 93 | inventories: 94 | - url: https://docs.python.org/3/objects.inv 95 | domains: [py, std] 96 | - redirects: 97 | redirect_maps: 98 | 'mcp-client.md': 'apigen.md' 99 | 'mcp-server.md': 'mcpserver.md' 100 | 'docker.md': 'mcpserver.md#docker' 101 | 102 | 103 | markdown_extensions: 104 | - footnotes 105 | - pymdownx.highlight: 106 | anchor_linenums: true 107 | line_spans: __span 108 | pygments_lang_class: true 109 | - pymdownx.inlinehilite 110 | - pymdownx.snippets: 111 | dedent_subsections: true 112 | - pymdownx.superfences 113 | - pymdownx.details 114 | - attr_list 115 | - md_in_html 116 | - admonition 117 | - pymdownx.emoji: 118 | emoji_index: !!python/name:material.extensions.emoji.twemoji 119 | emoji_generator: !!python/name:material.extensions.emoji.to_svg 120 | 121 | extra_css: 122 | - stylesheets/extra.css 123 | 124 | nav: 125 | - User Guide: 126 | - Overview: index.md 127 | - Installation: installation.md 128 | - Quickstart: quickstart.md 129 | - API generation: apigen.md 130 | - Code execution: codeexec.md 131 | - Sandboxing: sandbox.md 132 | - Agent Integration: 133 | - ipybox MCP Server: mcpserver.md 134 | - Claude Code plugin: ccplugin.md 135 | - API Reference: 136 | - High-level API: 137 | - Code Executor: api/code_executor.md 138 | - Low-level API: 139 | - Tool Executor: api/tool_executor.md 140 | - Kernel Manager: api/kernel_manager.md 141 | -------------------------------------------------------------------------------- /ipybox/tool_exec/approval/server.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import uuid 3 | from typing import Any 4 | 5 | from fastapi import WebSocket, WebSocketDisconnect 6 | 7 | 8 | class ApprovalChannel: 9 | """Server-side channel for tool call approval over WebSocket. 10 | 11 | `ApprovalChannel` accepts WebSocket connections from an 12 | [`ApprovalClient`][ipybox.tool_exec.approval.client.ApprovalClient], sends approval 13 | requests via JSON-RPC, and processes approval responses. 14 | 15 | When `approval_required` is `False`, all approval requests are automatically granted. 16 | When `True`, requests are sent to the connected `ApprovalClient` and the channel waits 17 | for a response within the configured timeout. 18 | """ 19 | 20 | def __init__( 21 | self, 22 | approval_required: bool = False, 23 | approval_timeout: float = 60, 24 | ): 25 | """ 26 | Args: 27 | approval_required: Whether approval is required for tool execution. 28 | approval_timeout: Timeout in seconds for approval requests. 29 | """ 30 | self.approval_required = approval_required 31 | self.approval_timeout = approval_timeout 32 | 33 | self._websocket: WebSocket | None = None 34 | self._requests: dict[str, asyncio.Future[bool]] = {} 35 | 36 | self._closed = asyncio.Event() 37 | self._closed.set() # Initially closed 38 | 39 | @property 40 | def open(self) -> bool: 41 | """Whether an `ApprovalClient` is currently connected.""" 42 | return not self._closed.is_set() 43 | 44 | async def join(self, timeout: float = 5): 45 | """Wait for the this approval channel to close. 46 | 47 | Args: 48 | timeout: Timeout in seconds to wait. 49 | """ 50 | await asyncio.wait_for(self._closed.wait(), timeout=timeout) 51 | 52 | async def connect(self, websocket: WebSocket): 53 | """Accept a WebSocket connection and process approval responses. 54 | 55 | This method runs until the WebSocket disconnects. 56 | 57 | Args: 58 | websocket: The WebSocket connection to accept. 59 | """ 60 | await websocket.accept() 61 | self._websocket = websocket 62 | self._closed.clear() # Mark as open 63 | 64 | try: 65 | while True: 66 | approval_response = await self._websocket.receive_json() 67 | await self._handle_approval_response(approval_response) 68 | except WebSocketDisconnect: 69 | await self.disconnect() 70 | 71 | async def disconnect(self): 72 | """Disconnect the WebSocket and error all pending approval requests.""" 73 | if self._websocket is not None: 74 | self._websocket = None 75 | for future in self._requests.values(): 76 | if not future.done(): 77 | future.set_exception(RuntimeError("Approval channel disconnected")) 78 | self._requests.clear() 79 | self._closed.set() # Signal closed 80 | 81 | async def request(self, server_name: str, tool_name: str, tool_args: dict[str, Any]) -> bool: 82 | """Request approval for a tool call. 83 | 84 | If `approval_required` is `False`, returns `True` immediately. Otherwise, sends an 85 | approval request to the connected `ApprovalClient` and waits for a response. 86 | 87 | Args: 88 | server_name: Name of the MCP server providing the tool. 89 | tool_name: Name of the tool to execute. 90 | tool_args: Arguments to pass to the tool. 91 | 92 | Returns: 93 | `True` if accepted, `False` if rejected. 94 | 95 | Raises: 96 | RuntimeError: If no `ApprovalClient` is connected. 97 | TimeoutError: If the approval request times out. 98 | """ 99 | if not self.approval_required: 100 | return True 101 | 102 | if self._websocket is None: 103 | raise RuntimeError("Approval channel not connected") 104 | 105 | request_id: str | None = None 106 | 107 | try: 108 | async with asyncio.timeout(self.approval_timeout): 109 | request_id = await self._send_approval_request(server_name, tool_name, tool_args) 110 | return await self._requests[request_id] 111 | finally: 112 | if request_id is not None: 113 | self._requests.pop(request_id, None) 114 | 115 | async def _send_approval_request(self, server_name: str, tool_name: str, tool_args: dict[str, Any]) -> str: 116 | request_id = str(uuid.uuid4()) 117 | approval_request = { 118 | "jsonrpc": "2.0", 119 | "method": "approve", 120 | "params": {"server_name": server_name, "tool_name": tool_name, "tool_args": tool_args}, 121 | "id": request_id, 122 | } 123 | 124 | future = asyncio.Future[bool]() 125 | self._requests[request_id] = future 126 | 127 | await self._websocket.send_json(approval_request) # type: ignore 128 | return request_id 129 | 130 | async def _handle_approval_response(self, response: dict[str, Any]): 131 | request_id = response["id"] 132 | if future := self._requests.get(request_id, None): 133 | future.set_result(response["result"]) 134 | -------------------------------------------------------------------------------- /tests/integration/test_mcp_apigen.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import os 3 | import sys 4 | import tempfile 5 | from pathlib import Path 6 | 7 | import pytest 8 | import pytest_asyncio 9 | 10 | from ipybox.mcp_apigen import generate_mcp_sources 11 | from ipybox.tool_exec.server import ToolServer 12 | from ipybox.utils import arun 13 | from tests.integration.mcp_server import STDIO_SERVER_PATH 14 | 15 | TOOL_SERVER_PORT = 8920 16 | MCP_SERVER_NAME = "test_mcp" 17 | 18 | 19 | @pytest_asyncio.fixture(scope="module") 20 | async def generated_package(): 21 | """Generate a Python tool API to a temp directory.""" 22 | server_params = { 23 | "command": "python", 24 | "args": [str(STDIO_SERVER_PATH)], 25 | } 26 | 27 | with tempfile.TemporaryDirectory() as tmp_dir: 28 | root_dir = Path(tmp_dir) 29 | 30 | # Generate sources 31 | tool_names = await generate_mcp_sources( 32 | server_name=MCP_SERVER_NAME, 33 | server_params=server_params, 34 | root_dir=root_dir, 35 | ) 36 | 37 | # Add temp dir to sys.path for imports 38 | sys.path.insert(0, str(root_dir)) 39 | 40 | yield { 41 | "root_dir": root_dir, 42 | "package_dir": root_dir / MCP_SERVER_NAME, 43 | "tool_names": tool_names, 44 | "server_params": server_params, 45 | } 46 | 47 | # Cleanup sys.path and modules 48 | sys.path.remove(str(root_dir)) 49 | modules_to_remove = [k for k in sys.modules if k.startswith(MCP_SERVER_NAME)] 50 | for mod in modules_to_remove: 51 | del sys.modules[mod] 52 | 53 | 54 | @pytest_asyncio.fixture 55 | async def tool_server(): 56 | """Start a ToolServer for executing the generated API.""" 57 | async with ToolServer(port=TOOL_SERVER_PORT, log_level="WARNING") as server: 58 | yield server 59 | 60 | 61 | class TestGenerateMcpSources: 62 | """Tests for generate_mcp_sources function.""" 63 | 64 | def test_generates_expected_files(self, generated_package: dict): 65 | """Test that expected module files are generated.""" 66 | package_dir = generated_package["package_dir"] 67 | 68 | assert (package_dir / "__init__.py").exists() 69 | assert (package_dir / "tool_1.py").exists() # tool-1 sanitized 70 | assert (package_dir / "tool_2.py").exists() 71 | assert (package_dir / "tool_3.py").exists() 72 | 73 | def test_returns_sanitized_tool_names(self, generated_package: dict): 74 | """Test that generate_mcp_sources returns sanitized tool names.""" 75 | tool_names = generated_package["tool_names"] 76 | 77 | assert "tool_1" in tool_names # tool-1 sanitized to tool_1 78 | assert "tool_2" in tool_names 79 | assert "tool_3" in tool_names 80 | 81 | @pytest.mark.asyncio 82 | async def test_tool_with_unstructured_output(self, generated_package: dict, tool_server: ToolServer): 83 | """Test executing a generated tool with unstructured (string) output.""" 84 | # Set environment variables for the generated CLIENT 85 | os.environ["TOOL_SERVER_PORT"] = str(TOOL_SERVER_PORT) 86 | 87 | # Import generated module 88 | tool_2 = importlib.import_module(f"{MCP_SERVER_NAME}.tool_2") 89 | 90 | def call_tool(): 91 | return tool_2.run(tool_2.Params(s="hello")) 92 | 93 | result = await arun(call_tool) 94 | assert result == "You passed to tool 2: hello" 95 | 96 | @pytest.mark.asyncio 97 | async def test_hyphenated_tool_name_works(self, generated_package: dict, tool_server: ToolServer): 98 | """Test that tool-1 (hyphenated) is accessible via sanitized name tool_1.""" 99 | os.environ["TOOL_SERVER_PORT"] = str(TOOL_SERVER_PORT) 100 | 101 | # tool-1 was renamed in MCP server, but we access it via sanitized module name tool_1 102 | tool_1 = importlib.import_module(f"{MCP_SERVER_NAME}.tool_1") 103 | 104 | def call_tool(): 105 | return tool_1.run(tool_1.Params(s="hyphen_test")) 106 | 107 | result = await arun(call_tool) 108 | assert result == "You passed to tool 1: hyphen_test" 109 | 110 | @pytest.mark.asyncio 111 | async def test_tool_with_structured_output(self, generated_package: dict, tool_server: ToolServer): 112 | """Test executing a generated tool with structured output.""" 113 | os.environ["TOOL_SERVER_PORT"] = str(TOOL_SERVER_PORT) 114 | 115 | # Import generated module with structured output 116 | tool_3 = importlib.import_module(f"{MCP_SERVER_NAME}.tool_3") 117 | 118 | def call_tool(): 119 | return tool_3.run(tool_3.Params(name="test", level=2)) 120 | 121 | result = await arun(call_tool) 122 | 123 | # Result should be a Pydantic model instance 124 | assert hasattr(result, "status") 125 | assert hasattr(result, "inner") 126 | assert hasattr(result, "count") 127 | 128 | assert result.status == "completed_test" 129 | assert result.count == 4 # len("test") 130 | assert result.inner.code == 200 # level * 100 131 | assert result.inner.details == "Processing test at level 2" 132 | 133 | @pytest.mark.asyncio 134 | async def test_multiple_tool_calls(self, generated_package: dict, tool_server: ToolServer): 135 | """Test multiple sequential calls to the generated API.""" 136 | os.environ["TOOL_SERVER_PORT"] = str(TOOL_SERVER_PORT) 137 | 138 | tool_1 = importlib.import_module(f"{MCP_SERVER_NAME}.tool_1") 139 | tool_2 = importlib.import_module(f"{MCP_SERVER_NAME}.tool_2") 140 | 141 | def call_tools(): 142 | r1 = tool_1.run(tool_1.Params(s="first")) 143 | r2 = tool_2.run(tool_2.Params(s="second")) 144 | r3 = tool_1.run(tool_1.Params(s="third")) 145 | return r1, r2, r3 146 | 147 | result = await arun(call_tools) 148 | assert result[0] == "You passed to tool 1: first" 149 | assert result[1] == "You passed to tool 2: second" 150 | assert result[2] == "You passed to tool 1: third" 151 | -------------------------------------------------------------------------------- /ipybox/tool_exec/approval/client.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import json 3 | import logging 4 | from functools import partial 5 | from typing import Any, Awaitable, Callable 6 | 7 | import websockets 8 | from websockets import ClientConnection, ConnectionClosed 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | class ApprovalRequest: 14 | """An MCP tool call approval request. 15 | 16 | `ApprovalRequest` instances are passed to the approval callback registered with 17 | [`ApprovalClient`][ipybox.tool_exec.approval.client.ApprovalClient]. The callback 18 | must call [`accept`][ipybox.tool_exec.approval.client.ApprovalRequest.accept] 19 | or [`reject`][ipybox.tool_exec.approval.client.ApprovalRequest.reject] for making 20 | an approval decision. 21 | 22 | Example: 23 | ```python 24 | async def on_approval_request(request: ApprovalRequest): 25 | print(f"Approval request: {request}") 26 | if request.tool_name == "dangerous_tool": 27 | await request.reject() 28 | else: 29 | await request.accept() 30 | ``` 31 | """ 32 | 33 | def __init__( 34 | self, 35 | server_name: str, 36 | tool_name: str, 37 | tool_args: dict[str, Any], 38 | respond: Callable[[bool], Awaitable[None]], 39 | ): 40 | """ 41 | Args: 42 | server_name: Name of the MCP server providing the tool. 43 | tool_name: Name of the tool to execute. 44 | tool_args: Arguments to pass to the tool. 45 | respond: Function to make an approval decision. 46 | """ 47 | self.server_name = server_name 48 | self.tool_name = tool_name 49 | self.tool_args = tool_args 50 | self._respond = respond 51 | 52 | def __str__(self) -> str: 53 | kwargs_str = ", ".join([f"{k}={repr(v)}" for k, v in self.tool_args.items()]) 54 | return f"{self.server_name}.{self.tool_name}({kwargs_str})" 55 | 56 | async def accept(self): 57 | """Accept the approval request.""" 58 | return await self._respond(True) 59 | 60 | async def reject(self): 61 | """Reject the approval request.""" 62 | return await self._respond(False) 63 | 64 | 65 | ApprovalCallback = Callable[[ApprovalRequest], Awaitable[None]] 66 | """Type alias for approval callback functions. 67 | 68 | An approval callback is an async function that receives an 69 | [`ApprovalRequest`][ipybox.tool_exec.approval.client.ApprovalRequest] and must call 70 | one of its response methods (`accept()` or `reject()`) to make an approval decision. 71 | """ 72 | 73 | 74 | class ApprovalClient: 75 | """Client for handling tool call approval requests. 76 | 77 | `ApprovalClient` connects to a [`ToolServer`][ipybox.tool_exec.server.ToolServer]'s 78 | [`ApprovalChannel`][ipybox.tool_exec.approval.server.ApprovalChannel] and receives 79 | approval requests. Each request is passed to the registered callback, which must 80 | accept or reject the request. 81 | 82 | Example: 83 | ```python 84 | async def on_approval_request(request: ApprovalRequest): 85 | print(f"Approval request: {request}") 86 | await request.accept() 87 | 88 | async with ApprovalClient(callback=on_approval_request): 89 | # Execute code that triggers MCP tool calls 90 | ... 91 | ``` 92 | """ 93 | 94 | def __init__( 95 | self, 96 | callback: ApprovalCallback, 97 | host: str = "localhost", 98 | port: int = 8900, 99 | ): 100 | """ 101 | Args: 102 | callback: Async function called for each approval request. 103 | host: Hostname of the `ToolServer`. 104 | port: Port number of the `ToolServer`. 105 | """ 106 | self.callback = callback 107 | self.host = host 108 | self.port = port 109 | 110 | self._uri = f"ws://{host}:{port}/approval" 111 | self._conn: ClientConnection | None = None 112 | self._task: asyncio.Task | None = None 113 | 114 | async def __aenter__(self): 115 | await self.connect() 116 | return self 117 | 118 | async def __aexit__(self, exc_type, exc_val, exc_tb): 119 | await self.disconnect() 120 | 121 | async def connect(self): 122 | """Connect to a `ToolServer`'s `ApprovalChannel`.""" 123 | self._conn = await websockets.connect(self._uri) 124 | self._task = asyncio.create_task(self._recv()) 125 | 126 | async def disconnect(self): 127 | """Disconnect from the `ToolServer`'s `ApprovalChannel`.""" 128 | if self._conn: 129 | await self._conn.close() 130 | self._conn = None 131 | if self._task: 132 | await self._task 133 | self._task = None 134 | 135 | async def _send(self, result: bool, request_id: str): 136 | if not self._conn: 137 | raise RuntimeError("Not connected") 138 | 139 | response = { 140 | "jsonrpc": "2.0", 141 | "result": result, 142 | "id": request_id, 143 | } 144 | await self._conn.send(json.dumps(response)) 145 | 146 | async def _recv(self): 147 | if not self._conn: 148 | raise RuntimeError("Not connected") 149 | 150 | try: 151 | async for msg in self._conn: 152 | data = json.loads(msg) 153 | 154 | if data.get("method") == "approve": 155 | params = data.get("params", {}) 156 | approval = ApprovalRequest( 157 | server_name=params["server_name"], 158 | tool_name=params["tool_name"], 159 | tool_args=params["tool_args"], 160 | respond=partial(self._send, request_id=data["id"]), 161 | ) 162 | try: 163 | await self.callback(approval) 164 | except Exception: 165 | logger.exception("Error in approval callback") 166 | 167 | except ConnectionClosed: 168 | pass 169 | -------------------------------------------------------------------------------- /ipybox/kernel_mgr/server.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import asyncio 3 | import os 4 | import sys 5 | from pathlib import Path 6 | 7 | import psutil 8 | 9 | 10 | class KernelGateway: 11 | """Manages a Jupyter Kernel Gateway process. 12 | 13 | The kernel gateway provides a REST and WebSocket API for creating and 14 | communicating with IPython kernels. Use 15 | [`KernelClient`][ipybox.kernel_mgr.client.KernelClient] to create and 16 | connect to an IPython kernel and execute code. 17 | 18 | When sandboxing is enabled, the gateway runs inside Anthropic's 19 | [sandbox-runtime](https://github.com/anthropic-experimental/sandbox-runtime), 20 | providing secure isolation for code execution. 21 | 22 | Example: 23 | ```python 24 | async with KernelGateway(host="localhost", port=8888) as gateway: 25 | # Gateway is running, connect with KernelClient 26 | await gateway.join() # Wait until gateway stops 27 | ``` 28 | """ 29 | 30 | def __init__( 31 | self, 32 | host: str = "localhost", 33 | port: int = 8888, 34 | sandbox: bool = False, 35 | sandbox_config: Path | None = None, 36 | log_level: str = "INFO", 37 | log_to_stderr: bool = False, 38 | env: dict[str, str] | None = None, 39 | ): 40 | """ 41 | Args: 42 | host: Hostname or IP address to bind the gateway to. 43 | port: Port number the gateway listens on. 44 | sandbox: Whether to run the gateway inside the sandbox-runtime. 45 | sandbox_config: Path to a JSON file with sandbox configuration. 46 | See the Configuration section of the 47 | [sandbox-runtime](https://github.com/anthropic-experimental/sandbox-runtime) 48 | README for available options. 49 | log_level: Logging level for the gateway process. 50 | log_to_stderr: Whether to redirect gateway logs to stderr. 51 | env: Environment variables to set for kernels created by the gateway. 52 | Kernels do not inherit environment variables from the parent 53 | process, so any required variables must be explicitly provided. 54 | """ 55 | self.host = host 56 | self.port = port 57 | 58 | self.sandbox = sandbox 59 | self.sandbox_config = sandbox_config 60 | 61 | self.log_level = log_level 62 | self.log_to_stderr = log_to_stderr 63 | self.env = env or {} 64 | 65 | self._process: asyncio.subprocess.Process | None = None 66 | 67 | async def __aenter__(self): 68 | await self.start() 69 | return self 70 | 71 | async def __aexit__(self, exc_type, exc_value, traceback): 72 | await self.stop() 73 | 74 | async def start(self): 75 | """Starts the kernel gateway process. 76 | 77 | Raises: 78 | RuntimeError: If the gateway is already running. 79 | """ 80 | if self._process is not None: 81 | raise RuntimeError("Kernel gateway is already running") 82 | 83 | jupyter_path = Path(sys.prefix) / "bin" / "jupyter" 84 | log_level = "WARN" if self.log_level == "WARNING" else self.log_level 85 | 86 | cmd = [ 87 | str(jupyter_path), 88 | "kernelgateway", 89 | f"--KernelGatewayApp.ip={self.host}", 90 | f"--KernelGatewayApp.port={self.port}", 91 | f"--KernelGatewayApp.log_level={log_level}", 92 | "--KernelGatewayApp.port_retries=0", 93 | "--KernelGatewayApp.answer_yes=True", 94 | ] 95 | 96 | if self.sandbox: 97 | settings_path = self.sandbox_config or Path(__file__).parent / "sandbox.json" 98 | cmd = ["srt", "--settings", str(settings_path)] + cmd 99 | 100 | process_env = {"PATH": os.environ.get("PATH", "")} 101 | process_env.update(self.env) 102 | 103 | self._process = await asyncio.create_subprocess_exec( 104 | *cmd, 105 | env=process_env, 106 | stdout=sys.stderr if self.log_to_stderr else None, 107 | ) 108 | 109 | async def stop(self, timeout: float = 10): 110 | """Stops the kernel gateway process. 111 | 112 | Terminates the gateway and all child processes. If the process doesn't 113 | stop within the timeout, it is forcefully killed. 114 | 115 | Args: 116 | timeout: Maximum time in seconds to wait for graceful termination. 117 | """ 118 | if self._process is None: 119 | return 120 | 121 | if self._process.returncode is None: 122 | try: 123 | parent = psutil.Process(self._process.pid) 124 | children = parent.children(recursive=True) 125 | except psutil.NoSuchProcess: 126 | children = [] 127 | 128 | for child in children: 129 | try: 130 | child.terminate() 131 | except psutil.NoSuchProcess: 132 | pass 133 | 134 | self._process.terminate() 135 | 136 | try: 137 | await asyncio.wait_for(self.join(), timeout=timeout) 138 | except asyncio.TimeoutError: 139 | self._process.kill() 140 | await self.join() 141 | 142 | self._process = None 143 | 144 | async def join(self): 145 | """Waits for the kernel gateway process to exit.""" 146 | if self._process is None: 147 | return 148 | 149 | try: 150 | await self._process.wait() 151 | except asyncio.CancelledError: 152 | pass 153 | 154 | 155 | async def main(): 156 | parser = argparse.ArgumentParser( 157 | description="Start a Jupyter Kernel Gateway", 158 | formatter_class=argparse.ArgumentDefaultsHelpFormatter, 159 | ) 160 | parser.add_argument( 161 | "--host", 162 | type=str, 163 | default="localhost", 164 | help="The hostname to bind the gateway to", 165 | ) 166 | parser.add_argument( 167 | "--port", 168 | type=int, 169 | default=8888, 170 | help="The port to bind the gateway to", 171 | ) 172 | parser.add_argument( 173 | "--log-level", 174 | type=str, 175 | default="INFO", 176 | choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], 177 | help="The logging level for the gateway", 178 | ) 179 | args = parser.parse_args() 180 | 181 | async with KernelGateway( 182 | host=args.host, 183 | port=args.port, 184 | log_level=args.log_level, 185 | ) as gateway: 186 | await gateway.join() 187 | 188 | 189 | if __name__ == "__main__": 190 | asyncio.run(main()) 191 | -------------------------------------------------------------------------------- /docs/mcpserver.md: -------------------------------------------------------------------------------- 1 | # MCP server 2 | 3 | [ipybox](index.md) is a Python code execution sandbox with first-class support for programmatic MCP tool calling. Code executes in a sandboxed IPython Kernel, providing a stateful environment where variables and definitions persist across executions. 4 | 5 | When run as an MCP server, it exposes these capabilities to MCP clients like Claude Code or Claude Desktop. Agents can register MCP servers, then execute Python code that uses them programmatically: 6 | 7 | 1. Agent calls [`register_mcp_server`](#register_mcp_server) to [generate a typed Python API](apigen.md) for the tools of an MCP server 8 | 2. Agent calls [`execute_ipython_cell`](#execute_ipython_cell) to [execute Python code](codeexec.md) that imports and uses the generated API 9 | 10 | !!! tip "Application example" 11 | 12 | An application example of this MCP server is the [programmatic tool calling plugin](ccplugin.md) for Claude Code. 13 | 14 | ## Configuration 15 | 16 | ```json 17 | { 18 | "mcpServers": { 19 | "ipybox": { 20 | "command": "uvx", 21 | "args": [ 22 | "ipybox", 23 | "--workspace", 24 | "/path/to/workspace" 25 | ] 26 | } 27 | } 28 | } 29 | ``` 30 | 31 | ## Workspace 32 | 33 | The `--workspace` option specifies the ipybox working directory, default is `"."`. Generated [Python tool APIs](apigen.md) are written to `mcptools/` in the workspace, and [code execution](#execute_ipython_cell) use the workspace as working directory. 34 | 35 | ## Environment variables 36 | 37 | Environment variables can be passed to ipybox either via an `"env"` key in the MCP [configuration](#configuration) or in an `.env` file in the workspace directory: 38 | 39 | ```env title="/path/to/workspace/.env" 40 | API_KEY_1=... 41 | API_KEY_2=... 42 | KERNEL_ENV_SECRET_1=... 43 | KERNEL_ENV_SECRET_2=... 44 | ``` 45 | 46 | These variables are available to MCP servers registered with ipybox but are not passed to the IPython kernel by default. To make them available to the kernel, use the `KERNEL_ENV_` prefix. For example, `KERNEL_ENV_SECRET_1` is available as `SECRET_1` in the kernel. 47 | 48 | ## Usage example 49 | 50 | This example shows a typical workflow using the [Brave Search MCP server](https://github.com/modelcontextprotocol/servers/tree/main/src/brave-search). First, configure the ipybox MCP server with a [BRAVE_API_KEY](quickstart.md#get-a-brave-api-key): 51 | 52 | ```json 53 | { 54 | "mcpServers": { 55 | "ipybox": { 56 | "command": "uvx", 57 | "args": ["ipybox", "--workspace", "/path/to/workspace"], 58 | "env": { 59 | "BRAVE_API_KEY": "your-api-key" 60 | } 61 | } 62 | } 63 | } 64 | ``` 65 | 66 | or add the API key to a `.env` file in the workspace directory: 67 | 68 | ```env title="/path/to/workspace/.env" 69 | BRAVE_API_KEY=your-api-key 70 | ``` 71 | 72 | An agent then registers the Brave Search MCP server by calling `register_mcp_server` with the following argument: 73 | 74 | ```json 75 | { 76 | "server_name": "brave_search", 77 | "server_params": { 78 | "command": "npx", 79 | "args": ["-y", "@anthropic/mcp-server-brave-search"], 80 | "env": {"BRAVE_API_KEY": "${BRAVE_API_KEY}"} 81 | } 82 | } 83 | ``` 84 | 85 | The `${BRAVE_API_KEY}` placeholder is replaced with the actual value from the MCP configuration or the `.env` file. ipybox connects to the Brave Search MCP server and generates a Python tool API under `mcptools/brave_search/`. 86 | 87 | After registration, the agent calls `execute_ipython_cell` with Python code that uses the generated API: 88 | 89 | ```python 90 | from mcptools.brave_search.brave_web_search import Params, run 91 | 92 | result = run(Params(query="Python asyncio tutorial", count=3)) 93 | print(result) 94 | ``` 95 | 96 | The printed result is returned and added to the agent's context window. 97 | 98 | ## Tools 99 | 100 | The ipybox MCP server exposes four tools. 101 | 102 | ### `register_mcp_server` 103 | 104 | Connects to an MCP server and [generates a Python API](apigen.md) for its tools under `mcptools/{server_name}/`. 105 | 106 | Parameters: 107 | 108 | - `server_name` — Application-defined MCP server name (valid Python identifier) 109 | - `server_params` — Server config: `{"command", "args", "env"}` for stdio or `{"url", "headers"}` for HTTP 110 | 111 | ### `execute_ipython_cell` 112 | 113 | Executes Python code in a stateful IPython kernel. Executed code can use the generated Python tool API of [registered MCP servers](#register_mcp_server). MCP tool calls from executed code are [auto-approved](codeexec.md#basic-execution). 114 | 115 | Parameters: 116 | 117 | - `code` — Python code to execute 118 | - `timeout` — Maximum execution time in seconds (default: 120) 119 | - `max_output_chars` — Output character limit (default: 5000) 120 | 121 | Returns the execution output. 122 | 123 | ### `install_package` 124 | 125 | Installs a Python package via `pip`. Supports version specifiers and git URLs. 126 | 127 | Parameters: 128 | 129 | - `package_name` — Package spec (e.g., `requests`, `numpy>=1.20.0`, or `git+https://...`) 130 | 131 | ### `reset` 132 | 133 | Creates a new kernel, clearing all variables and imports. Installed packages and generated `mcptools/` persist. 134 | 135 | ## Sandboxing 136 | 137 | To isolate code execution via Anthropic's [sandbox-runtime](https://github.com/anthropic-experimental/sandbox-runtime), enable [sandboxing](sandbox.md) with the `--sandbox` option: 138 | 139 | ```json 140 | { 141 | "mcpServers": { 142 | "ipybox": { 143 | "command": "uvx", 144 | "args": [ 145 | "ipybox", 146 | "--workspace", 147 | "/path/to/workspace", 148 | "--sandbox", 149 | "--sandbox-config", 150 | "/path/to/sandbox-config.json" 151 | ] 152 | } 153 | } 154 | } 155 | ``` 156 | 157 | The default sandbox configuration permits reading all files except `.env` and writing to the current directory and subdirectories (plus IPython directories). Access to internet is blocked. An optional custom sandbox configuration can be passed with the `--sandbox-config` option. 158 | 159 | !!! info 160 | 161 | Sandboxing with [sandbox-runtime](https://github.com/anthropic-experimental/sandbox-runtime) currently works on Mac OS only. On Linux and Windows, you can either run ipybox without sandboxing or as a [Docker container](#docker). 162 | 163 | ## Docker 164 | 165 | ipybox can be run as a Docker container. Clone the [project](https://github.com/gradion-ai/ipybox) and build the image: 166 | 167 | ```bash 168 | git clone https://github.com/gradion-ai/ipybox.git 169 | cd ipybox 170 | ./docker-build.sh 171 | ``` 172 | 173 | The build script creates a container user with your UID/GID, ensuring files generated by ipybox in the mounted workspace are owned by you and can be edited on the host. 174 | 175 | Then configure the MCP server: 176 | 177 | ```json 178 | { 179 | "mcpServers": { 180 | "ipybox": { 181 | "command": "docker", 182 | "args": [ 183 | "run", 184 | "-i", 185 | "--rm", 186 | "-v", 187 | "/path/to/workspace:/app/workspace", 188 | "ipybox" 189 | ] 190 | } 191 | } 192 | } 193 | ``` 194 | 195 | The workspace `/path/to/workspace` on the host is mounted to `/app/workspace` inside the container. 196 | -------------------------------------------------------------------------------- /ipybox/mcp_apigen.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import json 3 | import re 4 | import shutil 5 | from collections import defaultdict 6 | from pathlib import Path 7 | from typing import Any 8 | 9 | import aiofiles 10 | import aiofiles.os 11 | from datamodel_code_generator import DataModelType, PythonVersion 12 | from datamodel_code_generator.model import get_data_model_types 13 | from datamodel_code_generator.model.base import ALL_MODEL 14 | from datamodel_code_generator.parser.jsonschema import JsonSchemaParser 15 | 16 | from ipybox.mcp_client import MCPClient 17 | 18 | INIT_TEMPLATE = """ 19 | import os 20 | from ipybox.tool_exec.client import ToolRunner 21 | 22 | CLIENT = ToolRunner( 23 | server_name="{server_name}", 24 | server_params={server_params}, 25 | host=os.environ.get("TOOL_SERVER_HOST", "localhost"), 26 | port=int(os.environ.get("TOOL_SERVER_PORT", "8900")), 27 | ) 28 | """ 29 | 30 | FUNCTION_TEMPLATE_UNSTRUCTURED = ''' 31 | from . import CLIENT 32 | 33 | def run(params: Params) -> str: 34 | """{description} 35 | """ 36 | return CLIENT.run_sync(tool_name="{original_name}", tool_args=params.model_dump(exclude_none=True)) 37 | ''' 38 | 39 | FUNCTION_TEMPLATE_STRUCTURED = ''' 40 | from . import CLIENT 41 | 42 | def run(params: Params) -> Result: 43 | """{description} 44 | """ 45 | result = CLIENT.run_sync(tool_name="{original_name}", tool_args=params.model_dump(exclude_none=True)) 46 | return Result.model_validate(result) 47 | ''' 48 | 49 | 50 | def generate_init_definition(server_name: str, server_params: dict[str, Any]): 51 | return INIT_TEMPLATE.format(server_name=server_name, server_params=server_params) 52 | 53 | 54 | def indent_description(description: str, indent: str = " ") -> str: 55 | """Indent all lines of a description after the first line.""" 56 | lines = description.split("\n") 57 | if len(lines) <= 1: 58 | return description 59 | return lines[0] + "\n" + "\n".join(indent + line if line else line for line in lines[1:]) 60 | 61 | 62 | def generate_function_definition(original_name: str, description: str, structured_output: bool) -> str: 63 | template = FUNCTION_TEMPLATE_STRUCTURED if structured_output else FUNCTION_TEMPLATE_UNSTRUCTURED 64 | indented_description = indent_description(description) 65 | return template.format( 66 | original_name=original_name, 67 | description=indented_description.replace('"""', '\\"\\"\\"'), 68 | ) 69 | 70 | 71 | def generate_input_model_code(schema: dict[str, Any]) -> str: 72 | return _generate_model_code(schema, "Params") 73 | 74 | 75 | def generate_output_model_code(schema: dict[str, Any]) -> str: 76 | return _generate_model_code(schema, "Result") 77 | 78 | 79 | def _generate_model_code(schema: dict[str, Any], class_name: str) -> str: 80 | data_model_types = get_data_model_types( 81 | data_model_type=DataModelType.PydanticV2BaseModel, 82 | target_python_version=PythonVersion.PY_311, 83 | ) 84 | 85 | extra_template_data = defaultdict(dict) # type: ignore 86 | extra_template_data[ALL_MODEL]["config"] = {"use_enum_values": True} 87 | 88 | parser = JsonSchemaParser( 89 | source=json.dumps(schema), 90 | class_name=class_name, 91 | data_model_type=data_model_types.data_model, 92 | data_model_root_type=data_model_types.root_model, 93 | data_model_field_type=data_model_types.field_model, 94 | data_type_manager_type=data_model_types.data_type_manager, 95 | dump_resolve_reference_action=data_model_types.dump_resolve_reference_action, 96 | use_field_description=True, 97 | use_double_quotes=True, 98 | extra_template_data=extra_template_data, 99 | ) 100 | return parser.parse() 101 | 102 | 103 | async def generate_mcp_sources(server_name: str, server_params: dict[str, Any], root_dir: Path) -> list[str]: 104 | """Generate a typed Python tool API for an MCP server. 105 | 106 | Connects to an MCP server, discovers available tools, and generates a Python 107 | package with typed functions backed by Pydantic models. Each tool becomes a 108 | module with a `Params` class for input validation and a `run()` function to 109 | invoke the tool. 110 | 111 | When calling the generated API, the corresponding tools are executed on a 112 | [`ToolServer`][ipybox.tool_exec.server.ToolServer]. 113 | 114 | If a directory for the server already exists under `root_dir`, it is removed 115 | and recreated. 116 | 117 | Args: 118 | server_name: Name for the generated package directory. Also used to 119 | identify the server in the generated client code. 120 | server_params: MCP server connection parameters. For stdio servers, 121 | provide `command`, `args`, and optionally `env`. For HTTP servers, 122 | provide `url` and optionally `headers`. 123 | root_dir: Parent directory where the package will be created. The 124 | generated package is written to `root_dir/server_name/`. 125 | 126 | Returns: 127 | List of sanitized tool names corresponding to the generated module files. 128 | 129 | Example: 130 | Generate a Python tool API for the fetch MCP server: 131 | 132 | ```python 133 | server_params = { 134 | "command": "uvx", 135 | "args": ["mcp-server-fetch"], 136 | } 137 | await generate_mcp_sources("fetch_mcp", server_params, Path("mcptools")) 138 | ``` 139 | 140 | Execute code that uses the generated API: 141 | 142 | ```python 143 | from ipybox.code_exec import CodeExecutor 144 | 145 | code = \"\"\" 146 | from mcptools.fetch_mcp import fetch 147 | 148 | result = fetch.run(fetch.Params(url="https://example.com")) 149 | print(result) 150 | \"\"\" 151 | 152 | async with CodeExecutor() as executor: 153 | async for item in executor.execute(code): 154 | ... 155 | ``` 156 | """ 157 | async with MCPClient(server_params) as server: 158 | if await aiofiles.os.path.exists(root_dir / server_name): 159 | await asyncio.get_running_loop().run_in_executor(None, shutil.rmtree, root_dir / server_name) 160 | 161 | await aiofiles.os.makedirs(root_dir / server_name) 162 | 163 | async with aiofiles.open(root_dir / server_name / "__init__.py", "w") as f: 164 | await f.write(generate_init_definition(server_name, server_params)) 165 | 166 | result = [] # type: ignore 167 | 168 | for tool in await server.list_tools(): 169 | original_name = tool.name 170 | sanitized_name = sanitize_name(tool.name) 171 | result.append(sanitized_name) 172 | 173 | # Generate input model (Params) 174 | input_model_code = generate_input_model_code(tool.inputSchema) 175 | 176 | if output_schema := tool.outputSchema: 177 | output_model_code = generate_output_model_code(output_schema) 178 | output_model_code = strip_imports(output_model_code) 179 | 180 | # Generate function with appropriate return type 181 | function_definition = generate_function_definition( 182 | original_name=original_name, 183 | description=tool.description or "", 184 | structured_output=output_schema is not None, 185 | ) 186 | 187 | # Write file with models and function 188 | async with aiofiles.open(root_dir / server_name / f"{sanitized_name}.py", "w") as f: 189 | if output_schema: 190 | await f.write(f"{input_model_code}\n\n{output_model_code}\n\n{function_definition}") 191 | else: 192 | await f.write(f"{input_model_code}\n\n{function_definition}") 193 | 194 | return result 195 | 196 | 197 | def strip_imports(code: str) -> str: 198 | filtered_lines = [] 199 | for line in code.split("\n"): 200 | if line.strip() == "from __future__ import annotations": 201 | continue 202 | filtered_lines.append(line) 203 | return "\n".join(filtered_lines) 204 | 205 | 206 | def sanitize_name(name: str) -> str: 207 | """Sanitize a name for being used as module name.""" 208 | return re.sub(r"[^a-zA-Z0-9_]", "_", name).lower() 209 | -------------------------------------------------------------------------------- /tests/unit/test_replace_variables.py: -------------------------------------------------------------------------------- 1 | from ipybox.vars import replace_variables 2 | 3 | 4 | class TestReplaceVariables: 5 | """Tests for replace_variables function.""" 6 | 7 | def test_basic_replacement(self): 8 | template = {"env": {"KEY": "${VAR}"}} 9 | variables = {"VAR": "value"} 10 | 11 | result = replace_variables(template, variables) 12 | 13 | assert result.replaced == {"env": {"KEY": "value"}} 14 | assert result.replaced_variables == {"VAR"} 15 | assert result.missing_variables == set() 16 | 17 | def test_mcp_env_params(self): 18 | template = { 19 | "command": "npx", 20 | "args": ["-y", "@brave/brave-search-mcp-server"], 21 | "env": {"BRAVE_API_KEY": "${BRAVE_API_KEY}"}, 22 | } 23 | variables = {"BRAVE_API_KEY": "secret123"} 24 | 25 | result = replace_variables(template, variables) 26 | 27 | assert result.replaced["env"]["BRAVE_API_KEY"] == "secret123" 28 | assert result.replaced["command"] == "npx" 29 | assert result.replaced["args"] == ["-y", "@brave/brave-search-mcp-server"] 30 | assert result.replaced_variables == {"BRAVE_API_KEY"} 31 | assert result.missing_variables == set() 32 | 33 | def test_mcp_headers_params(self): 34 | template = { 35 | "url": "https://api.github.com/mcp/", 36 | "headers": {"Authorization": "Bearer ${GITHUB_API_KEY}"}, 37 | } 38 | variables = {"GITHUB_API_KEY": "ghp_token123"} 39 | 40 | result = replace_variables(template, variables) 41 | 42 | assert result.replaced["headers"]["Authorization"] == "Bearer ghp_token123" 43 | assert result.replaced["url"] == "https://api.github.com/mcp/" 44 | assert result.replaced_variables == {"GITHUB_API_KEY"} 45 | 46 | def test_missing_variable_preserved(self): 47 | template = {"env": {"KEY": "${MISSING_VAR}"}} 48 | variables = {} # type: ignore 49 | 50 | result = replace_variables(template, variables) 51 | 52 | assert result.replaced == {"env": {"KEY": "${MISSING_VAR}"}} 53 | assert result.replaced_variables == set() 54 | assert result.missing_variables == {"MISSING_VAR"} 55 | 56 | def test_mixed_replaced_and_missing(self): 57 | template = { 58 | "env": { 59 | "API_KEY": "${API_KEY}", 60 | "SECRET": "${SECRET}", 61 | } 62 | } 63 | variables = {"API_KEY": "provided_key"} 64 | 65 | result = replace_variables(template, variables) 66 | 67 | assert result.replaced["env"]["API_KEY"] == "provided_key" 68 | assert result.replaced["env"]["SECRET"] == "${SECRET}" 69 | assert result.replaced_variables == {"API_KEY"} 70 | assert result.missing_variables == {"SECRET"} 71 | 72 | def test_nested_dicts(self): 73 | template = {"a": {"b": {"c": {"d": "${VAR}"}}}} 74 | variables = {"VAR": "deep_value"} 75 | 76 | result = replace_variables(template, variables) 77 | 78 | assert result.replaced == {"a": {"b": {"c": {"d": "deep_value"}}}} 79 | assert result.replaced_variables == {"VAR"} 80 | 81 | def test_list_of_strings(self): 82 | template = {"args": ["-y", "${PKG}", "--option", "${OPT}"]} 83 | variables = {"PKG": "my-package", "OPT": "value"} 84 | 85 | result = replace_variables(template, variables) 86 | 87 | assert result.replaced["args"] == ["-y", "my-package", "--option", "value"] 88 | assert result.replaced_variables == {"PKG", "OPT"} 89 | 90 | def test_list_of_dicts(self): 91 | template = { 92 | "servers": [ 93 | {"name": "server1", "token": "${TOKEN1}"}, 94 | {"name": "server2", "token": "${TOKEN2}"}, 95 | ] 96 | } 97 | variables = {"TOKEN1": "t1", "TOKEN2": "t2"} 98 | 99 | result = replace_variables(template, variables) 100 | 101 | assert result.replaced["servers"][0]["token"] == "t1" 102 | assert result.replaced["servers"][1]["token"] == "t2" 103 | assert result.replaced_variables == {"TOKEN1", "TOKEN2"} 104 | 105 | def test_non_string_passthrough(self): 106 | template = { 107 | "port": 8080, 108 | "enabled": True, 109 | "disabled": False, 110 | "data": None, 111 | "ratio": 3.14, 112 | } 113 | variables = {} # type: ignore 114 | 115 | result = replace_variables(template, variables) 116 | 117 | assert result.replaced == template 118 | assert result.replaced_variables == set() 119 | assert result.missing_variables == set() 120 | 121 | def test_multiple_variables_in_one_string(self): 122 | template = {"auth": "Bearer ${TOKEN} for user ${USER}"} 123 | variables = {"TOKEN": "abc123", "USER": "john"} 124 | 125 | result = replace_variables(template, variables) 126 | 127 | assert result.replaced["auth"] == "Bearer abc123 for user john" 128 | assert result.replaced_variables == {"TOKEN", "USER"} 129 | 130 | def test_empty_dict(self): 131 | result = replace_variables({}, {"VAR": "value"}) 132 | 133 | assert result.replaced == {} 134 | assert result.replaced_variables == set() 135 | assert result.missing_variables == set() 136 | 137 | def test_no_variables_passthrough(self): 138 | template = { 139 | "command": "python", 140 | "args": ["-m", "mymodule"], 141 | "env": {"PATH": "/usr/bin"}, 142 | } 143 | variables = {"UNUSED": "value"} 144 | 145 | result = replace_variables(template, variables) 146 | 147 | assert result.replaced == template 148 | assert result.replaced_variables == set() 149 | assert result.missing_variables == set() 150 | 151 | def test_total_variables_property(self): 152 | template = {"a": "${VAR1}", "b": "${VAR2}", "c": "${VAR3}"} 153 | variables = {"VAR1": "v1", "VAR2": "v2"} 154 | 155 | result = replace_variables(template, variables) 156 | 157 | assert result.total_variables == 3 158 | assert len(result.replaced_variables) == 2 159 | assert len(result.missing_variables) == 1 160 | 161 | def test_empty_string_value(self): 162 | template = {"key": ""} 163 | variables = {"VAR": "value"} 164 | 165 | result = replace_variables(template, variables) 166 | 167 | assert result.replaced == {"key": ""} 168 | assert result.replaced_variables == set() 169 | 170 | def test_special_chars_not_matched(self): 171 | # Patterns with special chars don't match: ${foo-bar} has hyphen 172 | template = {"key": "${foo-bar}", "other": "${valid}"} 173 | variables = {"foo-bar": "should_not_match", "valid": "matched"} 174 | 175 | result = replace_variables(template, variables) 176 | 177 | assert result.replaced["key"] == "${foo-bar}" # Not replaced (hyphen not in pattern) 178 | assert result.replaced["other"] == "matched" 179 | assert result.replaced_variables == {"valid"} 180 | assert result.missing_variables == set() 181 | 182 | def test_same_variable_multiple_occurrences(self): 183 | template = { 184 | "first": "${VAR}", 185 | "second": "${VAR}", 186 | "nested": {"third": "${VAR}"}, 187 | } 188 | variables = {"VAR": "value"} 189 | 190 | result = replace_variables(template, variables) 191 | 192 | assert result.replaced["first"] == "value" 193 | assert result.replaced["second"] == "value" 194 | assert result.replaced["nested"]["third"] == "value" 195 | # Variable should only appear once in the set 196 | assert result.replaced_variables == {"VAR"} 197 | 198 | def test_mixed_content_list(self): 199 | template = {"items": ["${VAR}", 123, True, None, {"nested": "${VAR2}"}]} 200 | variables = {"VAR": "str_val", "VAR2": "nested_val"} 201 | 202 | result = replace_variables(template, variables) 203 | 204 | assert result.replaced["items"] == [ 205 | "str_val", 206 | 123, 207 | True, 208 | None, 209 | {"nested": "nested_val"}, 210 | ] 211 | assert result.replaced_variables == {"VAR", "VAR2"} 212 | -------------------------------------------------------------------------------- /ipybox/tool_exec/server.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import asyncio 3 | import copy 4 | from contextlib import AsyncExitStack 5 | from typing import Any 6 | 7 | import aiohttp 8 | import uvicorn 9 | import uvicorn.config 10 | from fastapi import FastAPI, WebSocket 11 | from pydantic import BaseModel 12 | 13 | from ipybox.mcp_client import MCPClient 14 | from ipybox.tool_exec.approval.server import ApprovalChannel 15 | 16 | 17 | class ToolCall(BaseModel): 18 | server_name: str 19 | server_params: dict[str, Any] 20 | tool_name: str 21 | tool_args: dict[str, Any] 22 | 23 | 24 | class ToolServer: 25 | """HTTP server that manages MCP servers and executes their tools with optional approval. 26 | 27 | ToolServer provides HTTP endpoints for executing MCP tools and a WebSocket endpoint 28 | for sending approval requests to clients. MCP servers are started on demand when tools 29 | are first executed and cached for subsequent calls. 30 | 31 | Endpoints: 32 | 33 | - `PUT /reset`: Closes all started MCP servers 34 | - `POST /run`: Executes an MCP tool (with optional approval) 35 | - `WS /approval`: WebSocket endpoint for 36 | [`ApprovalClient`][ipybox.tool_exec.approval.client.ApprovalClient] connections 37 | 38 | Example: 39 | ```python 40 | async with ToolServer(approval_required=True) as server: 41 | async with ApprovalClient(callback=on_approval_request): 42 | # Execute code that calls MCP tools 43 | ... 44 | ``` 45 | """ 46 | 47 | def __init__( 48 | self, 49 | host="localhost", 50 | port: int = 8900, 51 | approval_required: bool = False, 52 | approval_timeout: float = 60, 53 | connect_timeout: float = 30, 54 | log_to_stderr: bool = False, 55 | log_level: str = "INFO", 56 | ): 57 | """ 58 | Args: 59 | host: Hostname the server binds to. 60 | port: Port number the server listens on. 61 | approval_required: Whether tool calls require approval. 62 | approval_timeout: Timeout in seconds for approval requests. 63 | connect_timeout: Timeout in seconds for starting MCP servers. 64 | log_to_stderr: Whether to log to stderr instead of stdout. 65 | log_level: Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL). 66 | """ 67 | self.host = host 68 | self.port = port 69 | 70 | self.approval_timeout = approval_timeout 71 | self.connect_timeout = connect_timeout 72 | 73 | self.log_to_stderr = log_to_stderr 74 | self.log_level = log_level 75 | 76 | self.ready_checks: int = 50 77 | self.ready_check_interval: float = 0.2 78 | 79 | self.app = FastAPI(title="MCP tool runner") 80 | self.app.websocket("/approval")(self.approval) 81 | self.app.get("/status")(self.status) 82 | self.app.put("/reset")(self.reset) 83 | self.app.post("/run", response_model=None)(self.run) 84 | 85 | self._server: uvicorn.Server | None = None 86 | self._server_task: asyncio.Task | None = None 87 | self._approval_channel: ApprovalChannel = ApprovalChannel( 88 | approval_required=approval_required, 89 | approval_timeout=approval_timeout, 90 | ) 91 | 92 | self._mcp_client_lifecycle_lock = asyncio.Lock() 93 | self._mcp_client_exit_stack: AsyncExitStack = AsyncExitStack() 94 | self._mcp_clients: dict[str, MCPClient] = {} 95 | 96 | async def approval(self, websocket: WebSocket): 97 | if self._approval_channel.open: 98 | try: 99 | await self._approval_channel.join() 100 | except asyncio.TimeoutError: 101 | message = "Timed out waiting for previous connection to close" 102 | await websocket.close(code=1008, reason=message) 103 | return 104 | 105 | await self._approval_channel.connect(websocket) 106 | 107 | async def status(self): 108 | return {"status": "ok"} 109 | 110 | async def reset(self): 111 | await self._close_mcp_clients() 112 | return {"reset": "success"} 113 | 114 | async def run(self, call: ToolCall) -> dict[str, Any] | str | None: 115 | try: 116 | if not await self._approval_channel.request(call.server_name, call.tool_name, call.tool_args): 117 | return {"error": f"Approval request for {call.server_name}.{call.tool_name} rejected"} 118 | except asyncio.TimeoutError: 119 | return {"error": f"Approval request for {call.server_name}.{call.tool_name} expired"} 120 | except Exception as e: 121 | return {"error": f"Approval request for {call.server_name}.{call.tool_name} failed: {str(e)}"} 122 | 123 | try: 124 | client = await self._get_mcp_client( 125 | call.server_name, 126 | call.server_params, 127 | ) 128 | result = await client.run( 129 | call.tool_name, 130 | call.tool_args, 131 | ) 132 | except Exception as e: 133 | return {"error": str(e)} 134 | else: 135 | return {"result": result} 136 | 137 | async def __aenter__(self): 138 | await self.start() 139 | return self 140 | 141 | async def __aexit__(self, exc_type, exc_value, traceback): 142 | await self.stop() 143 | 144 | async def start(self): 145 | """Start the HTTP server. 146 | 147 | Raises: 148 | RuntimeError: If the server is already running. 149 | """ 150 | if self._server_task is not None: 151 | raise RuntimeError("Server already running") 152 | 153 | LOGGING_CONFIG = uvicorn.config.LOGGING_CONFIG 154 | 155 | if self.log_to_stderr: 156 | LOGGING_CONFIG = copy.deepcopy(LOGGING_CONFIG) 157 | LOGGING_CONFIG["handlers"]["default"]["stream"] = "ext://sys.stderr" 158 | LOGGING_CONFIG["handlers"]["access"]["stream"] = "ext://sys.stderr" 159 | 160 | config = uvicorn.Config( 161 | self.app, 162 | host=self.host, 163 | port=self.port, 164 | log_config=LOGGING_CONFIG, 165 | log_level=self.log_level.lower(), 166 | ) 167 | 168 | self._server = uvicorn.Server(config) 169 | self._server_task = asyncio.create_task(self._server.serve()) 170 | 171 | await self._ready() 172 | 173 | async def stop(self): 174 | """Stop the HTTP server and close all managed MCP servers.""" 175 | if self._server_task is None: 176 | return 177 | 178 | await self._close_mcp_clients() 179 | await self._approval_channel.disconnect() 180 | 181 | if self._server is not None: 182 | self._server.should_exit = True 183 | 184 | await self.join() 185 | 186 | self._server_task = None 187 | self._server = None 188 | 189 | async def join(self): 190 | """Wait for the HTTP server task to stop.""" 191 | if self._server_task is not None: 192 | try: 193 | await self._server_task 194 | except asyncio.CancelledError: 195 | pass 196 | 197 | async def _get_mcp_client(self, server_name: str, server_params: dict[str, Any]) -> MCPClient: 198 | async with self._mcp_client_lifecycle_lock: 199 | if server_name not in self._mcp_clients: 200 | client = MCPClient(server_params) 201 | client = await self._mcp_client_exit_stack.enter_async_context(client) 202 | self._mcp_clients[server_name] = client 203 | return self._mcp_clients[server_name] 204 | 205 | async def _close_mcp_clients(self): 206 | async with self._mcp_client_lifecycle_lock: 207 | await self._mcp_client_exit_stack.aclose() 208 | self._mcp_client_exit_stack = AsyncExitStack() 209 | self._mcp_clients.clear() 210 | 211 | async def _ready(self): 212 | status_url = f"http://{self.host}:{self.port}/status" 213 | 214 | async with aiohttp.ClientSession() as session: 215 | for _ in range(self.ready_checks): 216 | try: 217 | async with session.get(status_url) as response: 218 | response.raise_for_status() 219 | break 220 | except Exception: 221 | await asyncio.sleep(self.ready_check_interval) 222 | else: 223 | raise RuntimeError("Server not ready") 224 | 225 | 226 | async def main(): 227 | parser = argparse.ArgumentParser() 228 | parser.add_argument("--host", type=str, default="localhost") 229 | parser.add_argument("--port", type=int, default=8900) 230 | parser.add_argument("--log-level", type=str, default="INFO") 231 | args = parser.parse_args() 232 | 233 | async with ToolServer( 234 | host=args.host, 235 | port=args.port, 236 | log_level=args.log_level, 237 | ) as server: 238 | await server.join() 239 | 240 | 241 | if __name__ == "__main__": 242 | asyncio.run(main()) 243 | -------------------------------------------------------------------------------- /tests/integration/test_mcp_server.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from pathlib import Path 3 | 4 | import pytest 5 | import pytest_asyncio 6 | 7 | from ipybox.mcp_client import MCPClient 8 | from tests.integration.mcp_server import STDIO_SERVER_PATH 9 | 10 | MCP_SERVER_NAME = "test_mcp" 11 | 12 | 13 | @pytest_asyncio.fixture 14 | async def mcp_client(tmp_path: Path): 15 | """Create an MCPClient connected to the ipybox MCP server.""" 16 | # Create .env file with KERNEL_ENV_ prefixed variable for testing 17 | dotenv_file = tmp_path / ".env" 18 | dotenv_file.write_text("KERNEL_ENV_TEST_VAR=test_value_from_dotenv\n") 19 | 20 | server_params = { 21 | "command": sys.executable, 22 | "args": ["-m", "ipybox.mcp_server", "--workspace", str(tmp_path), "--log-level", "ERROR"], 23 | } 24 | async with MCPClient(server_params, connect_timeout=30) as client: 25 | yield client 26 | 27 | 28 | class TestBasicExecution: 29 | """Basic MCP server functionality.""" 30 | 31 | @pytest.mark.asyncio 32 | async def test_simple_code_execution(self, mcp_client: MCPClient): 33 | """Test executing a simple print statement.""" 34 | result = await mcp_client.run("execute_ipython_cell", {"code": "print('hello world')"}) 35 | 36 | assert result == "hello world" 37 | 38 | @pytest.mark.asyncio 39 | async def test_expression_result(self, mcp_client: MCPClient): 40 | """Test that expression results are returned.""" 41 | result = await mcp_client.run("execute_ipython_cell", {"code": "2 + 2"}) 42 | 43 | assert result == "4" 44 | 45 | @pytest.mark.asyncio 46 | async def test_code_execution_error(self, mcp_client: MCPClient): 47 | """Test that execution errors are raised.""" 48 | with pytest.raises(Exception) as exc_info: 49 | await mcp_client.run("execute_ipython_cell", {"code": "raise ValueError('test error')"}) 50 | 51 | assert "ValueError" in str(exc_info.value) 52 | assert "test error" in str(exc_info.value) 53 | 54 | @pytest.mark.asyncio 55 | async def test_state_persistence(self, mcp_client: MCPClient): 56 | """Test that kernel state persists across executions.""" 57 | await mcp_client.run("execute_ipython_cell", {"code": "x = 42"}) 58 | result = await mcp_client.run("execute_ipython_cell", {"code": "print(x)"}) 59 | 60 | assert result == "42" 61 | 62 | @pytest.mark.asyncio 63 | async def test_max_output_chars_truncation(self, mcp_client: MCPClient): 64 | """Test that output is truncated when exceeding max_output_chars.""" 65 | # Generate output longer than the limit 66 | code = "print('x' * 100)" 67 | result = await mcp_client.run("execute_ipython_cell", {"code": code, "max_output_chars": 50}) 68 | 69 | assert isinstance(result, str) 70 | assert len(result) > 50 # Includes truncation message 71 | assert result.startswith("x" * 50) 72 | assert "[Output truncated: exceeded 50 character limit]" in result 73 | 74 | @pytest.mark.asyncio 75 | async def test_max_output_chars_no_truncation(self, mcp_client: MCPClient): 76 | """Test that output is not truncated when within max_output_chars.""" 77 | code = "print('hello world')" 78 | result = await mcp_client.run("execute_ipython_cell", {"code": code, "max_output_chars": 100}) 79 | 80 | assert isinstance(result, str) 81 | assert result == "hello world" 82 | assert "[Output truncated" not in result 83 | 84 | @pytest.mark.asyncio 85 | async def test_max_output_chars_default(self, mcp_client: MCPClient): 86 | """Test that default max_output_chars (5000) is used when not specified.""" 87 | # Generate output slightly over 5000 chars 88 | code = "print('x' * 5001)" 89 | result = await mcp_client.run("execute_ipython_cell", {"code": code}) 90 | 91 | assert isinstance(result, str) 92 | assert "[Output truncated: exceeded 5000 character limit]" in result 93 | assert result.startswith("x" * 5000) 94 | 95 | @pytest.mark.asyncio 96 | async def test_dotenv_kernel_env_var_available(self, mcp_client: MCPClient): 97 | """Test that KERNEL_ENV_ variables from .env are available in kernel.""" 98 | code = "import os; print(os.environ.get('TEST_VAR', 'NOT_FOUND'))" 99 | result = await mcp_client.run("execute_ipython_cell", {"code": code}) 100 | 101 | assert result == "test_value_from_dotenv" 102 | 103 | 104 | class TestMcpServerRegistration: 105 | """MCP server registration tests.""" 106 | 107 | @pytest.mark.asyncio 108 | async def test_register_mcp_server_returns_tool_names(self, mcp_client: MCPClient): 109 | """Test that register_mcp_server returns tool names.""" 110 | server_params = { 111 | "command": "python", 112 | "args": [str(STDIO_SERVER_PATH)], 113 | } 114 | 115 | result = await mcp_client.run( 116 | "register_mcp_server", 117 | {"server_name": MCP_SERVER_NAME, "server_params": server_params}, 118 | ) 119 | assert isinstance(result, str) 120 | 121 | tool_names = result.split("\n") 122 | assert "tool_1" in tool_names 123 | assert "tool_2" in tool_names 124 | assert "tool_3" in tool_names 125 | 126 | @pytest.mark.asyncio 127 | async def test_registered_tools_generate_sources(self, mcp_client: MCPClient, tmp_path: Path): 128 | """Test that registration generates importable sources in the workspace.""" 129 | server_params = { 130 | "command": "python", 131 | "args": [str(STDIO_SERVER_PATH)], 132 | } 133 | 134 | await mcp_client.run( 135 | "register_mcp_server", 136 | {"server_name": MCP_SERVER_NAME, "server_params": server_params}, 137 | ) 138 | 139 | # Verify the package was generated 140 | package_dir = tmp_path / "mcptools" / MCP_SERVER_NAME 141 | assert package_dir.exists() 142 | assert (package_dir / "__init__.py").exists() 143 | assert (package_dir / "tool_1.py").exists() 144 | assert (package_dir / "tool_2.py").exists() 145 | assert (package_dir / "tool_3.py").exists() 146 | 147 | @pytest.mark.asyncio 148 | async def test_registered_tools_are_callable(self, mcp_client: MCPClient): 149 | """Test that registered tools can be imported and called via execute_ipython_cell.""" 150 | server_params = { 151 | "command": "python", 152 | "args": [str(STDIO_SERVER_PATH)], 153 | } 154 | 155 | await mcp_client.run( 156 | "register_mcp_server", 157 | {"server_name": MCP_SERVER_NAME, "server_params": server_params}, 158 | ) 159 | 160 | # Sources are generated at mcptools/{server_name}/ 161 | code = f""" 162 | from mcptools.{MCP_SERVER_NAME}.tool_2 import run, Params 163 | result = run(Params(s="hello")) 164 | print(result) 165 | """ 166 | result = await mcp_client.run("execute_ipython_cell", {"code": code}) 167 | assert isinstance(result, str) 168 | 169 | assert "You passed to tool 2: hello" in result 170 | 171 | 172 | class TestReset: 173 | """Kernel reset tests.""" 174 | 175 | @pytest.mark.asyncio 176 | async def test_reset_clears_kernel_state(self, mcp_client: MCPClient): 177 | """Test that reset clears kernel state.""" 178 | # Set a variable 179 | await mcp_client.run("execute_ipython_cell", {"code": "x = 42"}) 180 | 181 | # Verify it exists 182 | result = await mcp_client.run("execute_ipython_cell", {"code": "print(x)"}) 183 | assert result == "42" 184 | 185 | # Reset 186 | await mcp_client.run("reset", {}) 187 | 188 | # Verify the variable no longer exists 189 | with pytest.raises(Exception) as exc_info: 190 | await mcp_client.run("execute_ipython_cell", {"code": "print(x)"}) 191 | assert "NameError" in str(exc_info.value) 192 | 193 | @pytest.mark.asyncio 194 | async def test_reset_allows_continued_execution(self, mcp_client: MCPClient): 195 | """Test that reset allows continued execution.""" 196 | # Set state and reset 197 | await mcp_client.run("execute_ipython_cell", {"code": "x = 42"}) 198 | await mcp_client.run("reset", {}) 199 | 200 | # Verify we can still execute code 201 | result = await mcp_client.run("execute_ipython_cell", {"code": "print('after reset')"}) 202 | 203 | assert result == "after reset" 204 | 205 | 206 | @pytest.mark.skipif(sys.platform != "darwin", reason="Sandbox tests only run on macOS") 207 | class TestSandbox: 208 | """Tests for sandbox configuration.""" 209 | 210 | HTTP_CODE = """ 211 | import urllib.request 212 | response = urllib.request.urlopen('https://example.org') 213 | content = response.read().decode('utf-8') 214 | print(content) 215 | """ 216 | 217 | @pytest_asyncio.fixture 218 | async def mcp_client_custom_sandbox(self, tmp_path: Path): 219 | """Create an MCPClient with custom sandbox config (example.org allowed).""" 220 | sandbox_config = Path("tests", "integration", "sandbox.json").absolute() 221 | server_params = { 222 | "command": sys.executable, 223 | "args": [ 224 | "-m", 225 | "ipybox.mcp_server", 226 | "--workspace", 227 | str(tmp_path), 228 | "--log-level", 229 | "ERROR", 230 | "--sandbox", 231 | "--sandbox-config", 232 | str(sandbox_config), 233 | ], 234 | } 235 | async with MCPClient(server_params, connect_timeout=30) as client: 236 | yield client 237 | 238 | @pytest.mark.asyncio 239 | async def test_custom_sandbox_allows_example_org(self, mcp_client_custom_sandbox: MCPClient): 240 | """Test that custom sandbox config allows example.org access.""" 241 | result = await mcp_client_custom_sandbox.run("execute_ipython_cell", {"code": self.HTTP_CODE}) 242 | 243 | assert result is not None 244 | assert "Example Domain" in result 245 | -------------------------------------------------------------------------------- /tests/integration/test_tool_approval.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | 3 | import pytest 4 | import pytest_asyncio 5 | import uvicorn 6 | from fastapi import FastAPI 7 | 8 | from ipybox.tool_exec.approval.client import ApprovalClient, ApprovalRequest 9 | from ipybox.tool_exec.approval.server import ApprovalChannel 10 | 11 | HOST = "localhost" 12 | PORT = 8901 13 | 14 | 15 | @pytest_asyncio.fixture 16 | async def approval_channel(): 17 | async with await _serve_channel() as channel: 18 | yield channel 19 | 20 | 21 | async def _serve_channel(approval_required: bool = True, approval_timeout: float = 5.0): 22 | """Context manager that serves an ApprovalChannel over websocket.""" 23 | channel = ApprovalChannel(approval_required, approval_timeout) 24 | app = FastAPI() 25 | app.websocket("/approval")(channel.connect) 26 | 27 | config = uvicorn.Config(app, HOST, PORT, log_level="warning") 28 | server = uvicorn.Server(config) 29 | task = asyncio.create_task(server.serve()) 30 | 31 | while not server.started: 32 | await asyncio.sleep(0.01) 33 | 34 | class ChannelContext: 35 | async def __aenter__(self): 36 | return channel 37 | 38 | async def __aexit__(self, *_): 39 | await channel.disconnect() 40 | server.should_exit = True 41 | await task 42 | 43 | return ChannelContext() 44 | 45 | 46 | class TestApprovalBasics: 47 | """Tests for basic approval request/response flow.""" 48 | 49 | @pytest.mark.asyncio 50 | async def test_accept_request(self, approval_channel: ApprovalChannel): 51 | """Test that accepting a request returns True.""" 52 | 53 | async def on_approval(request: ApprovalRequest): 54 | await request.accept() 55 | 56 | async with ApprovalClient(callback=on_approval, host=HOST, port=PORT): 57 | result = await approval_channel.request("test_server", "test_tool", {"arg1": "value1"}) 58 | assert result is True 59 | 60 | @pytest.mark.asyncio 61 | async def test_reject_request(self, approval_channel: ApprovalChannel): 62 | """Test that rejecting a request returns False.""" 63 | 64 | async def on_approval(request: ApprovalRequest): 65 | await request.reject() 66 | 67 | async with ApprovalClient(callback=on_approval, host=HOST, port=PORT): 68 | result = await approval_channel.request("test_server", "test_tool", {}) 69 | assert result is False 70 | 71 | 72 | class TestApprovalRequestData: 73 | """Tests for approval request data passing.""" 74 | 75 | @pytest.mark.asyncio 76 | async def test_request_receives_server_name(self, approval_channel: ApprovalChannel): 77 | """Test that the callback receives the correct server name.""" 78 | received_server_name = None 79 | 80 | async def on_approval(request: ApprovalRequest): 81 | nonlocal received_server_name 82 | received_server_name = request.server_name 83 | await request.accept() 84 | 85 | async with ApprovalClient(callback=on_approval, host=HOST, port=PORT): 86 | await approval_channel.request("my_server", "my_tool", {}) 87 | 88 | assert received_server_name == "my_server" 89 | 90 | @pytest.mark.asyncio 91 | async def test_request_receives_tool_name(self, approval_channel: ApprovalChannel): 92 | """Test that the callback receives the correct tool name.""" 93 | received_tool_name = None 94 | 95 | async def on_approval(request: ApprovalRequest): 96 | nonlocal received_tool_name 97 | received_tool_name = request.tool_name 98 | await request.accept() 99 | 100 | async with ApprovalClient(callback=on_approval, host=HOST, port=PORT): 101 | await approval_channel.request("my_server", "my_tool", {}) 102 | 103 | assert received_tool_name == "my_tool" 104 | 105 | @pytest.mark.asyncio 106 | async def test_request_receives_tool_args(self, approval_channel: ApprovalChannel): 107 | """Test that the callback receives the correct tool args.""" 108 | received_tool_args = None 109 | 110 | async def on_approval(request: ApprovalRequest): 111 | nonlocal received_tool_args 112 | received_tool_args = request.tool_args 113 | await request.accept() 114 | 115 | async with ApprovalClient(callback=on_approval, host=HOST, port=PORT): 116 | await approval_channel.request("my_server", "my_tool", {"key1": "value1", "key2": 42, "key3": [1, 2, 3]}) 117 | 118 | assert received_tool_args == {"key1": "value1", "key2": 42, "key3": [1, 2, 3]} 119 | 120 | @pytest.mark.asyncio 121 | async def test_request_str_representation(self, approval_channel: ApprovalChannel): 122 | """Test ApprovalRequest string representation.""" 123 | request_str = None 124 | 125 | async def on_approval(request: ApprovalRequest): 126 | nonlocal request_str 127 | request_str = str(request) 128 | await request.accept() 129 | 130 | async with ApprovalClient(callback=on_approval, host=HOST, port=PORT): 131 | await approval_channel.request("server", "tool", {"name": "test"}) 132 | 133 | assert request_str == "server.tool(name='test')" 134 | 135 | 136 | class TestApprovalNotRequired: 137 | """Tests for approval_required=False behavior.""" 138 | 139 | @pytest.mark.asyncio 140 | async def test_auto_accept_when_not_required(self): 141 | """Test that requests are auto-accepted when approval_required=False.""" 142 | channel = ApprovalChannel(approval_required=False) 143 | result = await channel.request("server", "tool", {}) 144 | assert result is True 145 | 146 | 147 | class TestApprovalTimeout: 148 | """Tests for approval timeout behavior.""" 149 | 150 | @pytest.mark.asyncio 151 | async def test_timeout_when_no_response(self): 152 | """Test that request times out when client doesn't respond.""" 153 | 154 | async def on_approval(request: ApprovalRequest): 155 | await asyncio.sleep(1) # Longer than timeout 156 | 157 | async with await _serve_channel(approval_timeout=0.2) as channel: 158 | async with ApprovalClient(callback=on_approval, host=HOST, port=PORT): 159 | with pytest.raises(TimeoutError): 160 | await channel.request("server", "tool", {}) 161 | 162 | @pytest.mark.asyncio 163 | async def test_slow_response_within_timeout(self): 164 | """Test that a slow response within timeout succeeds.""" 165 | 166 | async def on_approval(request: ApprovalRequest): 167 | await asyncio.sleep(0.2) 168 | await request.accept() 169 | 170 | async with await _serve_channel(approval_timeout=1.0) as channel: 171 | async with ApprovalClient(callback=on_approval, host=HOST, port=PORT): 172 | result = await channel.request("server", "tool", {}) 173 | assert result is True 174 | 175 | 176 | class TestApprovalConnection: 177 | """Tests for connection handling.""" 178 | 179 | @pytest.mark.asyncio 180 | async def test_no_client_connected_raises_error(self, approval_channel: ApprovalChannel): 181 | """Test that requests fail when no client is connected.""" 182 | with pytest.raises(RuntimeError, match="not connected"): 183 | await approval_channel.request("server", "tool", {}) 184 | 185 | @pytest.mark.asyncio 186 | async def test_channel_open_property(self, approval_channel: ApprovalChannel): 187 | """Test the open property reflects connection state.""" 188 | assert approval_channel.open is False 189 | 190 | async def on_approval(request: ApprovalRequest): 191 | await request.accept() 192 | 193 | async with ApprovalClient(callback=on_approval, host=HOST, port=PORT): 194 | assert approval_channel.open is True 195 | 196 | await asyncio.sleep(0.1) # Wait for disconnect to propagate 197 | assert approval_channel.open is False 198 | 199 | @pytest.mark.asyncio 200 | async def test_client_disconnect_errors_pending_requests(self, approval_channel: ApprovalChannel): 201 | """Test that pending requests error when client disconnects.""" 202 | request_received = asyncio.Event() 203 | 204 | async def on_approval(request: ApprovalRequest): 205 | # skip approval response 206 | request_received.set() 207 | 208 | client = ApprovalClient(callback=on_approval, host=HOST, port=PORT) 209 | await client.connect() 210 | 211 | request_task = asyncio.create_task(approval_channel.request("server", "tool", {})) 212 | 213 | await request_received.wait() 214 | await client.disconnect() 215 | 216 | with pytest.raises(RuntimeError, match="disconnected"): 217 | # waits for response but disconnect() errors future 218 | await request_task 219 | 220 | 221 | class TestMultipleRequests: 222 | """Tests for handling multiple concurrent requests.""" 223 | 224 | @pytest.mark.asyncio 225 | async def test_sequential_requests(self, approval_channel: ApprovalChannel): 226 | """Test multiple sequential requests.""" 227 | request_count = 0 228 | 229 | async def on_approval(request: ApprovalRequest): 230 | nonlocal request_count 231 | request_count += 1 232 | await request.accept() 233 | 234 | async with ApprovalClient(callback=on_approval, host=HOST, port=PORT): 235 | for i in range(5): 236 | result = await approval_channel.request("server", f"tool_{i}", {"index": i}) 237 | assert result is True 238 | 239 | assert request_count == 5 240 | 241 | @pytest.mark.asyncio 242 | async def test_concurrent_requests(self, approval_channel: ApprovalChannel): 243 | """Test multiple concurrent requests.""" 244 | requests_received = [] 245 | 246 | async def on_approval(request: ApprovalRequest): 247 | requests_received.append(request.tool_name) 248 | await asyncio.sleep(0.1) 249 | await request.accept() 250 | 251 | async with ApprovalClient(callback=on_approval, host=HOST, port=PORT): 252 | tasks = [approval_channel.request("server", f"tool_{i}", {}) for i in range(3)] 253 | results = await asyncio.gather(*tasks) 254 | 255 | assert all(r is True for r in results) 256 | assert len(requests_received) == 3 257 | assert set(requests_received) == {"tool_0", "tool_1", "tool_2"} 258 | 259 | @pytest.mark.asyncio 260 | async def test_mixed_accept_reject_concurrent(self, approval_channel: ApprovalChannel): 261 | """Test concurrent requests with mixed accept/reject responses.""" 262 | 263 | async def on_approval(request: ApprovalRequest): 264 | if request.tool_args.get("index", 0) % 2 == 0: 265 | await request.accept() 266 | else: 267 | await request.reject() 268 | 269 | async with ApprovalClient(callback=on_approval, host=HOST, port=PORT): 270 | tasks = [approval_channel.request("server", "tool", {"index": i}) for i in range(4)] 271 | results = await asyncio.gather(*tasks) 272 | 273 | assert results == [True, False, True, False] 274 | -------------------------------------------------------------------------------- /ipybox/mcp_server.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import asyncio 3 | import logging 4 | import os 5 | import signal 6 | from contextlib import asynccontextmanager 7 | from pathlib import Path 8 | from typing import Annotated, Any 9 | 10 | from dotenv import load_dotenv 11 | from mcp.server.fastmcp import FastMCP 12 | from pydantic import Field 13 | 14 | from ipybox.kernel_mgr.client import KernelClient 15 | from ipybox.kernel_mgr.server import KernelGateway 16 | from ipybox.mcp_apigen import generate_mcp_sources 17 | from ipybox.tool_exec.client import reset 18 | from ipybox.tool_exec.server import ToolServer 19 | from ipybox.utils import find_free_port 20 | 21 | logger = logging.getLogger(__name__) 22 | 23 | KERNEL_ENV_PREFIX = "KERNEL_ENV_" 24 | 25 | 26 | class MCPServer: 27 | def __init__( 28 | self, 29 | tool_server_host: str = "localhost", 30 | tool_server_port: int | None = None, 31 | kernel_gateway_host: str = "localhost", 32 | kernel_gateway_port: int | None = None, 33 | kernel_env: dict[str, str] | None = None, 34 | sandbox: bool = False, 35 | sandbox_config: Path | None = None, 36 | log_level: str = "INFO", 37 | ): 38 | self.tool_server_host = tool_server_host 39 | self.tool_server_port = tool_server_port or find_free_port() 40 | 41 | self.kernel_gateway_host = kernel_gateway_host 42 | self.kernel_gateway_port = kernel_gateway_port or find_free_port() 43 | 44 | self.sandbox = sandbox 45 | self.sandbox_config = sandbox_config 46 | self.log_level = log_level 47 | self.kernel_env = kernel_env or {} 48 | 49 | self._mcp = FastMCP("ipybox", lifespan=self.server_lifespan, log_level=log_level) 50 | self._mcp.tool(structured_output=False)(self.register_mcp_server) 51 | self._mcp.tool(structured_output=False)(self.install_package) 52 | self._mcp.tool(structured_output=False)(self.execute_ipython_cell) 53 | self._mcp.tool(structured_output=False)(self.reset) 54 | 55 | self._client: KernelClient 56 | self._lock = asyncio.Lock() 57 | 58 | @asynccontextmanager 59 | async def server_lifespan(self, server: FastMCP): 60 | async with ToolServer( 61 | host=self.tool_server_host, 62 | port=self.tool_server_port, 63 | log_to_stderr=True, 64 | log_level=self.log_level, 65 | ): 66 | async with KernelGateway( 67 | host=self.kernel_gateway_host, 68 | port=self.kernel_gateway_port, 69 | sandbox=self.sandbox, 70 | sandbox_config=self.sandbox_config, 71 | log_to_stderr=True, 72 | log_level=self.log_level, 73 | env=self.kernel_env 74 | | { 75 | "TOOL_SERVER_HOST": self.tool_server_host, 76 | "TOOL_SERVER_PORT": str(self.tool_server_port), 77 | }, 78 | ): 79 | async with KernelClient( 80 | host=self.kernel_gateway_host, 81 | port=self.kernel_gateway_port, 82 | ) as client: 83 | self._client = client 84 | yield 85 | 86 | async def register_mcp_server(self, server_name: str, server_params: dict[str, Any]) -> list[str]: 87 | """Register an MCP server and generate importable Python tool functions. 88 | 89 | Connects to an MCP server, generates a package at mcptools/{server_name}/ with 90 | type-safe client functions. Use generated tools via: 91 | from mcptools.{server_name} import {tool_name} 92 | result = {tool_name}.run({tool_name}.Params(...)) 93 | 94 | - Environment variable placeholders like {API_KEY} in server_params are auto-replaced 95 | - Re-registering overwrites previous; call reset() to re-import updated tools 96 | - Generated mcptools/ persists across reset() calls 97 | 98 | Args: 99 | server_name: Package name (valid Python identifier: lowercase, underscores, no leading digit). 100 | server_params: Server config - stdio: {"command", "args", "env"} or http: {"url", "headers"}. 101 | 102 | Returns: 103 | List of tool names available for import from mcptools.{server_name}. 104 | """ 105 | 106 | return await generate_mcp_sources( 107 | server_name=server_name, 108 | server_params=server_params, 109 | root_dir=Path("mcptools"), 110 | ) 111 | 112 | async def install_package(self, package_name: str) -> str: 113 | """Install a Python package via pip. 114 | 115 | Installed packages persist across reset() calls and are immediately importable. 116 | Supports version specifiers (e.g., "numpy>=1.20.0") and git URLs. 117 | 118 | Args: 119 | package_name: Package spec (name, name==version, or git+https://... URL). 120 | 121 | Returns: 122 | Pip output including success messages, warnings, and errors. 123 | """ 124 | import sys 125 | 126 | process = await asyncio.create_subprocess_exec( 127 | sys.executable, 128 | "-m", 129 | "pip", 130 | "install", 131 | "--no-input", 132 | package_name, 133 | stdout=asyncio.subprocess.PIPE, 134 | stderr=asyncio.subprocess.PIPE, 135 | ) 136 | 137 | stdout, stderr = await process.communicate() 138 | 139 | output = "" 140 | if stdout: 141 | output += stdout.decode() 142 | if stderr: 143 | output += stderr.decode() 144 | 145 | return output 146 | 147 | async def execute_ipython_cell( 148 | self, 149 | code: Annotated[ 150 | str, 151 | Field(description="Python code to execute in the IPython kernel"), 152 | ], 153 | timeout: Annotated[ 154 | float, 155 | Field(description="Maximum execution time in seconds before kernel interruption"), 156 | ] = 120, 157 | max_output_chars: Annotated[ 158 | int, 159 | Field(description="Maximum number of characters to return in output (truncates if exceeded)"), 160 | ] = 5000, 161 | ) -> str: 162 | """Execute Python code in a stateful IPython kernel. 163 | 164 | State (variables, imports, definitions) persists across calls. Executions are sequential. 165 | For async code, use 'await' directly (kernel has an active event loop). 166 | 167 | Returns: 168 | Execution output (stdout, stderr, last expression) plus image paths as markdown links. 169 | Empty string if no output. 170 | 171 | Raises: 172 | ExecutionError: Code raised an exception (includes traceback). 173 | ToolRunnerError: MCP tool call failed. 174 | asyncio.TimeoutError: Execution exceeded timeout. 175 | """ 176 | async with self._lock: 177 | result = await self._client.execute(code, timeout=timeout) 178 | output = result.text or "" 179 | if result.images: 180 | output += "\n\nGenerated images:\n\n" 181 | for img_path in result.images: 182 | output += f"- [{img_path.stem}]({img_path.absolute()})\n" 183 | 184 | if len(output) > max_output_chars: 185 | output = ( 186 | output[:max_output_chars] + f"\n\n[Output truncated: exceeded {max_output_chars} character limit]" 187 | ) 188 | 189 | return output 190 | 191 | async def reset(self): 192 | """Reset the IPython kernel to a clean state. 193 | 194 | Creates a new kernel, clearing all variables, imports, and definitions. 195 | 196 | - Cleared: all in-memory state, MCP server connections (auto-reconnect on next use) 197 | - Persists: installed packages, filesystem files, mcptools/ directory 198 | """ 199 | async with self._lock: 200 | await reset( 201 | host=self.tool_server_host, 202 | port=self.tool_server_port, 203 | ) 204 | await self._client.reset() 205 | 206 | async def run(self): 207 | await self._mcp.run_stdio_async() 208 | 209 | 210 | def parse_args() -> argparse.Namespace: 211 | parser = argparse.ArgumentParser(description="ipybox MCP Server") 212 | parser.add_argument( 213 | "--workspace", 214 | type=Path, 215 | default=Path("."), 216 | help="Code workspace (default: .)", 217 | ) 218 | parser.add_argument( 219 | "--tool-server-host", 220 | type=str, 221 | default="localhost", 222 | help="Tool server host (default: localhost)", 223 | ) 224 | parser.add_argument( 225 | "--tool-server-port", 226 | type=int, 227 | default=None, 228 | help="Tool server port (default: dynamic)", 229 | ) 230 | parser.add_argument( 231 | "--kernel-gateway-host", 232 | type=str, 233 | default="localhost", 234 | help="Kernel gateway host (default: localhost)", 235 | ) 236 | parser.add_argument( 237 | "--kernel-gateway-port", 238 | type=int, 239 | default=None, 240 | help="Kernel gateway port (default: dynamic)", 241 | ) 242 | parser.add_argument( 243 | "--sandbox", 244 | action="store_true", 245 | help="Run kernel gateway in sandbox", 246 | ) 247 | parser.add_argument( 248 | "--sandbox-config", 249 | type=Path, 250 | default=None, 251 | help="Sandbox config file (default: None)", 252 | ) 253 | parser.add_argument( 254 | "--log-level", 255 | type=str, 256 | default="INFO", 257 | choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], 258 | help="Logging level (default: INFO)", 259 | ) 260 | return parser.parse_args() 261 | 262 | 263 | def extract_kernel_env() -> dict[str, str]: 264 | kernel_env = {} 265 | 266 | for key, value in os.environ.items(): 267 | if key.startswith(KERNEL_ENV_PREFIX): 268 | kernel_env[key[len(KERNEL_ENV_PREFIX) :]] = value 269 | 270 | return kernel_env 271 | 272 | 273 | async def main(): 274 | args = parse_args() 275 | 276 | os.makedirs(args.workspace, exist_ok=True) 277 | os.chdir(args.workspace) 278 | 279 | load_dotenv(args.workspace.absolute() / ".env") 280 | 281 | sandbox_config = None 282 | 283 | if args.sandbox_config: 284 | if args.sandbox_config.exists(): 285 | sandbox_config = args.sandbox_config 286 | else: 287 | logger.warning(f"Sandbox config file {args.sandbox_config} does not exist, Using default config") 288 | 289 | server = MCPServer( 290 | tool_server_host=args.tool_server_host, 291 | tool_server_port=args.tool_server_port, 292 | kernel_gateway_host=args.kernel_gateway_host, 293 | kernel_gateway_port=args.kernel_gateway_port, 294 | sandbox=args.sandbox, 295 | sandbox_config=sandbox_config, 296 | log_level=args.log_level, 297 | kernel_env=extract_kernel_env(), 298 | ) 299 | 300 | loop = asyncio.get_running_loop() 301 | 302 | def handle_signal(): 303 | for task in asyncio.all_tasks(loop): 304 | task.cancel() 305 | 306 | for sig in (signal.SIGINT, signal.SIGTERM): 307 | loop.add_signal_handler(sig, handle_signal) 308 | 309 | await server.run() 310 | 311 | 312 | def cli(): 313 | asyncio.run(main()) 314 | 315 | 316 | if __name__ == "__main__": 317 | cli() 318 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /tests/integration/test_code_exec.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import tempfile 3 | from pathlib import Path 4 | 5 | import pytest 6 | import pytest_asyncio 7 | 8 | from ipybox import ApprovalRequest, CodeExecutionChunk, CodeExecutionError, CodeExecutionResult, CodeExecutor 9 | from ipybox.mcp_apigen import generate_mcp_sources 10 | from tests.integration.mcp_server import STDIO_SERVER_PATH 11 | 12 | MCP_SERVER_NAME = "test_mcp" 13 | 14 | 15 | @pytest_asyncio.fixture(scope="module") 16 | async def generated_mcp_package(): 17 | """Generate a Python tool API to a temp directory.""" 18 | server_params = { 19 | "command": "python", 20 | "args": [str(STDIO_SERVER_PATH)], 21 | } 22 | 23 | with tempfile.TemporaryDirectory() as tmp_dir: 24 | root_dir = Path(tmp_dir) 25 | 26 | tool_names = await generate_mcp_sources( 27 | server_name=MCP_SERVER_NAME, 28 | server_params=server_params, 29 | root_dir=root_dir, 30 | ) 31 | 32 | yield { 33 | "root_dir": root_dir, 34 | "package_dir": root_dir / MCP_SERVER_NAME, 35 | "tool_names": tool_names, 36 | "server_params": server_params, 37 | } 38 | 39 | 40 | @pytest_asyncio.fixture 41 | async def code_executor(generated_mcp_package: dict): 42 | """Create a CodeExecutor with access to generated MCP package.""" 43 | root_dir = generated_mcp_package["root_dir"] 44 | 45 | async with CodeExecutor( 46 | kernel_env={"PYTHONPATH": str(root_dir)}, 47 | log_level="ERROR", 48 | ) as executor: 49 | yield executor 50 | 51 | 52 | class TestBasicExecution: 53 | """Basic code execution functionality without MCP tools.""" 54 | 55 | @pytest.mark.asyncio 56 | async def test_simple_code_execution(self, code_executor: CodeExecutor): 57 | """Test executing a simple print statement.""" 58 | result = await code_executor.execute("print('hello world')") 59 | 60 | assert result.text == "hello world" 61 | 62 | @pytest.mark.asyncio 63 | async def test_code_execution_error(self, code_executor: CodeExecutor): 64 | """Test that CodeExecutionError is raised on runtime error.""" 65 | with pytest.raises(CodeExecutionError) as exc_info: 66 | await code_executor.execute("raise ValueError('test error')") 67 | 68 | assert "ValueError" in str(exc_info.value) 69 | assert "test error" in str(exc_info.value) 70 | 71 | @pytest.mark.asyncio 72 | async def test_streaming_execution(self, code_executor: CodeExecutor): 73 | """Test that CodeExecutionChunk is yielded when chunks=True.""" 74 | code = """ 75 | import time 76 | for i in range(3): 77 | print(f'chunk {i}', flush=True) 78 | time.sleep(0.05) 79 | """ 80 | 81 | chunks = [] 82 | result = None 83 | async for item in code_executor.stream(code, chunks=True): 84 | match item: 85 | case CodeExecutionChunk(): 86 | chunks.append(item) 87 | case CodeExecutionResult(): 88 | result = item 89 | 90 | assert len(chunks) > 0 91 | combined_text = "".join(c.text for c in chunks) 92 | assert "chunk 0" in combined_text 93 | assert "chunk 1" in combined_text 94 | assert "chunk 2" in combined_text 95 | assert result is not None 96 | 97 | 98 | class TestMcpToolExecution: 99 | """Core integration: kernel code calling MCP tools through approval.""" 100 | 101 | @pytest.mark.asyncio 102 | async def test_tool_call_with_approval_accepted(self, code_executor: CodeExecutor): 103 | """Test calling a tool and accepting the approval request.""" 104 | code = f""" 105 | from {MCP_SERVER_NAME}.tool_2 import run, Params 106 | result = run(Params(s="hello")) 107 | print(result) 108 | """ 109 | 110 | results = [] 111 | async for item in code_executor.stream(code): 112 | match item: 113 | case ApprovalRequest(): 114 | await item.accept() 115 | case CodeExecutionResult(): 116 | results.append(item) 117 | 118 | assert len(results) == 1 119 | assert results[0].text is not None 120 | assert "You passed to tool 2: hello" in results[0].text 121 | 122 | @pytest.mark.asyncio 123 | async def test_run_auto_approves_tool_calls(self, code_executor: CodeExecutor): 124 | """Test that execute() auto-approves tool calls and returns result.""" 125 | code = f""" 126 | from {MCP_SERVER_NAME}.tool_2 import run, Params 127 | result = run(Params(s="hello")) 128 | print(result) 129 | """ 130 | 131 | result = await code_executor.execute(code) 132 | 133 | assert isinstance(result, CodeExecutionResult) 134 | assert result.text is not None 135 | assert "You passed to tool 2: hello" in result.text 136 | 137 | @pytest.mark.asyncio 138 | async def test_tool_call_with_approval_rejected(self, code_executor: CodeExecutor): 139 | """Test calling a tool and rejecting the approval request.""" 140 | code = f""" 141 | from {MCP_SERVER_NAME}.tool_2 import run, Params 142 | result = run(Params(s="hello")) 143 | print(result) 144 | """ 145 | 146 | with pytest.raises(CodeExecutionError) as exc_info: 147 | async for item in code_executor.stream(code): 148 | match item: 149 | case ApprovalRequest(): 150 | await item.reject() 151 | 152 | assert "rejected" in str(exc_info.value).lower() 153 | 154 | @pytest.mark.asyncio 155 | async def test_tool_with_structured_output(self, code_executor: CodeExecutor): 156 | """Test calling a tool with structured output (Pydantic model).""" 157 | code = f""" 158 | from {MCP_SERVER_NAME}.tool_3 import run, Params 159 | result = run(Params(name="test", level=2)) 160 | print(f"status={{result.status}}") 161 | print(f"count={{result.count}}") 162 | print(f"inner_code={{result.inner.code}}") 163 | """ 164 | 165 | result = await code_executor.execute(code) 166 | 167 | assert result.text is not None 168 | assert "status=completed_test" in result.text 169 | assert "count=4" in result.text 170 | assert "inner_code=200" in result.text 171 | 172 | @pytest.mark.asyncio 173 | async def test_multiple_tool_calls_in_sequence(self, code_executor: CodeExecutor): 174 | """Test multiple tool calls in one code block, handling each approval.""" 175 | code = f""" 176 | from {MCP_SERVER_NAME}.tool_1 import run as run_1, Params as Params1 177 | from {MCP_SERVER_NAME}.tool_2 import run as run_2, Params as Params2 178 | 179 | r1 = run_1(Params1(s="first")) 180 | print(f"result1: {{r1}}") 181 | 182 | r2 = run_2(Params2(s="second")) 183 | print(f"result2: {{r2}}") 184 | """ 185 | 186 | approvals = [] 187 | results = [] 188 | async for item in code_executor.stream(code): 189 | match item: 190 | case ApprovalRequest(): 191 | approvals.append(item) 192 | await item.accept() 193 | case CodeExecutionResult(): 194 | results.append(item) 195 | 196 | assert len(approvals) == 2 197 | assert len(results) == 1 198 | assert results[0].text is not None 199 | assert "result1: You passed to tool 1: first" in results[0].text 200 | assert "result2: You passed to tool 2: second" in results[0].text 201 | 202 | @pytest.mark.asyncio 203 | async def test_tool_call_with_streaming(self, code_executor: CodeExecutor): 204 | """Test combining streaming output with tool approval.""" 205 | code = f""" 206 | from {MCP_SERVER_NAME}.tool_2 import run, Params 207 | 208 | print("before tool call", flush=True) 209 | result = run(Params(s="test")) 210 | print(f"after tool call: {{result}}", flush=True) 211 | """ 212 | 213 | chunks = [] 214 | approvals = [] 215 | result = None 216 | async for item in code_executor.stream(code, chunks=True): 217 | match item: 218 | case CodeExecutionChunk(): 219 | chunks.append(item) 220 | case ApprovalRequest(): 221 | approvals.append(item) 222 | await item.accept() 223 | case CodeExecutionResult(): 224 | result = item 225 | 226 | assert len(approvals) == 1 227 | combined_text = "".join(c.text for c in chunks) 228 | assert "before tool call" in combined_text 229 | assert "after tool call" in combined_text 230 | assert result is not None 231 | 232 | 233 | class TestApprovalFlow: 234 | """Detailed approval behavior tests.""" 235 | 236 | @pytest.mark.asyncio 237 | async def test_approval_request_contains_tool_info(self, code_executor: CodeExecutor): 238 | """Test that approval request contains server_name, tool_name, and tool_args.""" 239 | code = f""" 240 | from {MCP_SERVER_NAME}.tool_2 import run, Params 241 | result = run(Params(s="hello")) 242 | """ 243 | 244 | approval = None 245 | async for item in code_executor.stream(code): 246 | match item: 247 | case ApprovalRequest(): 248 | approval = item 249 | await item.accept() 250 | 251 | assert approval is not None 252 | assert approval.server_name == MCP_SERVER_NAME 253 | assert approval.tool_name == "tool_2" 254 | assert approval.tool_args == {"s": "hello"} 255 | 256 | @pytest.mark.asyncio 257 | async def test_hyphenated_tool_name_preserved(self, code_executor: CodeExecutor): 258 | """Test that hyphenated tool names are preserved in approval request.""" 259 | code = f""" 260 | from {MCP_SERVER_NAME}.tool_1 import run, Params 261 | result = run(Params(s="test")) 262 | """ 263 | 264 | approval = None 265 | async for item in code_executor.stream(code): 266 | match item: 267 | case ApprovalRequest(): 268 | approval = item 269 | await item.accept() 270 | 271 | assert approval is not None 272 | # tool-1 is the original MCP name, tool_1 is the sanitized module name 273 | assert approval.tool_name == "tool-1" 274 | 275 | 276 | class TestExecutorLifecycle: 277 | """Tests for executor lifecycle management.""" 278 | 279 | @pytest.mark.asyncio 280 | async def test_reset_clears_kernel_state(self, code_executor: CodeExecutor): 281 | """Test that reset() clears kernel state but allows continued execution.""" 282 | # Set a variable 283 | await code_executor.execute("x = 42") 284 | 285 | # Verify it exists 286 | result = await code_executor.execute("print(x)") 287 | assert result.text == "42" 288 | 289 | # Reset the executor 290 | await code_executor.reset() 291 | 292 | # Verify the variable no longer exists 293 | with pytest.raises(CodeExecutionError) as exc_info: 294 | await code_executor.execute("print(x)") 295 | assert "NameError" in str(exc_info.value) 296 | 297 | # Verify we can still execute code 298 | result = await code_executor.execute("print('after reset')") 299 | assert result.text == "after reset" 300 | 301 | 302 | @pytest.mark.skipif(sys.platform != "darwin", reason="Sandbox tests only run on macOS") 303 | class TestSandbox: 304 | """Tests for sandbox configuration.""" 305 | 306 | HTTP_CODE = """ 307 | import urllib.request 308 | response = urllib.request.urlopen('https://example.org') 309 | content = response.read().decode('utf-8') 310 | print(content) 311 | """ 312 | 313 | @pytest.mark.asyncio 314 | async def test_custom_sandbox_allows_example_org(self): 315 | """Test that custom sandbox config allows example.org access.""" 316 | async with CodeExecutor( 317 | sandbox=True, 318 | sandbox_config=Path("tests", "integration", "sandbox.json"), 319 | log_level="WARNING", 320 | ) as executor: 321 | result = await executor.execute(self.HTTP_CODE) 322 | 323 | assert result.text is not None 324 | assert "Example Domain" in result.text 325 | --------------------------------------------------------------------------------