├── server ├── __init__.py ├── prompts │ ├── __init__.py │ └── python_programmer.py ├── sandbox │ ├── __init__.py │ ├── env.py │ ├── downloader.py │ └── runner.py ├── tools │ ├── __init__.py │ ├── mount_file.py │ ├── persist_artifact.py │ ├── workspace_inspect.py │ └── run_code.py ├── config.py └── main.py ├── tests ├── __init__.py ├── integration │ ├── __init__.py │ └── test_mcp_protocol.py ├── unit │ ├── test_main.py │ ├── test_config.py │ ├── test_run_code_tool.py │ ├── test_sandbox_env.py │ └── test_sandbox_runner.py └── conftest.py ├── requirements.txt ├── primslogo.png ├── glama.json ├── examples ├── run_code.py ├── list_tools.py ├── mount_and_run.py ├── session_persistence.py ├── inspect_workspace.py └── artifact_download.py ├── Dockerfile ├── requirements-dev.txt ├── .flake8 ├── .gitignore ├── LICENSE ├── ruff.toml ├── scripts ├── docker_run.sh ├── setup_env.sh └── test.sh ├── pyproject.toml ├── README.md └── .github └── workflows └── ci.yml /server/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /server/prompts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /server/sandbox/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Tests package 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | fastmcp 2 | aiohttp 3 | aiofiles -------------------------------------------------------------------------------- /tests/integration/__init__.py: -------------------------------------------------------------------------------- 1 | # Integration tests package 2 | -------------------------------------------------------------------------------- /primslogo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hileamlakB/Python-Runtime-Interpreter-MCP-Server/HEAD/primslogo.png -------------------------------------------------------------------------------- /glama.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://glama.ai/mcp/schemas/server.json", 3 | "maintainers": [ 4 | "hileamlakb" 5 | ] 6 | } 7 | -------------------------------------------------------------------------------- /server/tools/__init__.py: -------------------------------------------------------------------------------- 1 | # register is called from server.main, so import here is enough 2 | from . import mount_file # noqa: F401 3 | from . import persist_artifact # noqa: F401 4 | from . import workspace_inspect # noqa: F401 5 | -------------------------------------------------------------------------------- /examples/run_code.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | 3 | from fastmcp import Client 4 | 5 | 6 | async def main(): 7 | async with Client("http://localhost:9000/mcp") as client: 8 | code = "print('Hello from FastMCP!')" 9 | result = await client.call_tool("run_code", {"code": code}) 10 | print("Result: \n\t", result) 11 | 12 | 13 | if __name__ == "__main__": 14 | asyncio.run(main()) 15 | -------------------------------------------------------------------------------- /examples/list_tools.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import json 3 | 4 | from fastmcp import Client 5 | 6 | 7 | async def main(): 8 | async with Client("http://localhost:9000/mcp") as client: 9 | tools = await client.list_tools() 10 | print("Available tools:") 11 | for tool in tools: 12 | print(f"- {tool.name}: {tool.description}") 13 | print(json.dumps(tool.inputSchema, indent=2)) 14 | 15 | 16 | if __name__ == "__main__": 17 | asyncio.run(main()) 18 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.13-slim 2 | 3 | # System deps 4 | RUN apt-get update && apt-get install -y --no-install-recommends \ 5 | build-essential curl git unzip wget ca-certificates \ 6 | && rm -rf /var/lib/apt/lists/* 7 | 8 | WORKDIR /app 9 | 10 | COPY requirements.txt . 11 | RUN pip install --upgrade pip && pip install -r requirements.txt 12 | 13 | COPY ./server ./server 14 | 15 | ENV PYTHONPATH=/app 16 | 17 | CMD ["fastmcp", "run", "server/main.py", "--transport", "http", "--host", "0.0.0.0", "--port", "9000"] -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | # Development dependencies for testing, linting, and CI/CD 2 | pytest>=7.4.0 3 | pytest-asyncio>=0.21.0 4 | pytest-cov>=4.1.0 5 | pytest-mock>=3.11.0 6 | pytest-timeout>=2.1.0 7 | 8 | # Code quality and formatting 9 | black>=23.7.0 10 | isort>=5.12.0 11 | ruff>=0.0.280 12 | mypy>=1.5.0 13 | 14 | # Security scanning 15 | bandit[toml]>=1.7.5 16 | safety>=2.3.0 17 | 18 | # HTTP testing 19 | httpx>=0.24.0 20 | pytest-httpx>=0.21.0 21 | 22 | # Additional testing utilities 23 | factory-boy>=3.3.0 24 | freezegun>=1.2.0 -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 88 3 | # Black's default line length 4 | extend-ignore = 5 | # E203: whitespace before ':' (conflicts with black) 6 | E203, 7 | # W503: line break before binary operator (conflicts with black) 8 | W503, 9 | # E501: line too long (we set max-line-length to 88) 10 | E501 11 | 12 | exclude = 13 | .git, 14 | __pycache__, 15 | .venv, 16 | venv, 17 | build, 18 | dist, 19 | *.egg-info 20 | 21 | per-file-ignores = 22 | # Allow unused imports in __init__.py files 23 | __init__.py:F401 -------------------------------------------------------------------------------- /server/config.py: -------------------------------------------------------------------------------- 1 | """Centralised configuration for PRIMCS. 2 | 3 | Environment variables: 4 | • PRIMCS_TMP_DIR – custom temp directory 5 | • PRIMCS_TIMEOUT – max seconds per run (default 10) 6 | • PRIMCS_MAX_OUTPUT – cap on stdout/stderr bytes (default 1 MB) 7 | """ 8 | 9 | import os 10 | from pathlib import Path 11 | 12 | TMP_DIR = Path(os.getenv("PRIMCS_TMP_DIR", "/tmp/primcs")) 13 | TMP_DIR.mkdir(parents=True, exist_ok=True) 14 | 15 | TIMEOUT_SECONDS = int(os.getenv("PRIMCS_TIMEOUT", "100")) 16 | MAX_OUTPUT_BYTES = int(os.getenv("PRIMCS_MAX_OUTPUT", str(1024 * 1024))) # 1MB 17 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # Virtual environments 27 | .venv/ 28 | venv/ 29 | env/ 30 | ENV/ 31 | 32 | # PyInstaller 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | 53 | # Jupyter Notebook 54 | .ipynb_checkpoints 55 | 56 | # pyenv 57 | .python-version 58 | 59 | # dotenv 60 | .env 61 | .env.* 62 | 63 | # mypy 64 | .mypy_cache/ 65 | .dmypy.json 66 | 67 | # VS Code 68 | .vscode/ 69 | 70 | # macOS 71 | .DS_Store 72 | TODO.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Hileamlak Mulugeta Yitayew 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /examples/mount_and_run.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import json 3 | 4 | from fastmcp import Client 5 | 6 | SERVER = "http://localhost:9000/mcp" 7 | CSV_URL = "https://raw.githubusercontent.com/cs109/2014_data/master/countries.csv" 8 | 9 | 10 | async def main() -> None: 11 | async with Client(SERVER) as client: 12 | # 1. Mount the CSV once for this session 13 | mount_params = { 14 | "url": CSV_URL, 15 | "mount_path": "data/countries.csv", 16 | } 17 | mount_resp = await client.call_tool("mount_file", mount_params) 18 | print("Mount response:", mount_resp[0].text) 19 | 20 | # 2. Run code that reads the mounted CSV without passing `files` 21 | code = """ 22 | import pandas as pd 23 | import os 24 | path = 'mounts/data/countries.csv' 25 | print('File exists:', os.path.exists(path)) 26 | print('Row count:', len(pd.read_csv(path))) 27 | """ 28 | run_resp = await client.call_tool("run_code", {"code": code}) 29 | print("Run result:") 30 | print(json.dumps(json.loads(run_resp[0].text), indent=2)) 31 | 32 | 33 | if __name__ == "__main__": 34 | asyncio.run(main()) 35 | -------------------------------------------------------------------------------- /tests/unit/test_main.py: -------------------------------------------------------------------------------- 1 | """Unit tests for server.main module.""" 2 | 3 | from unittest.mock import Mock, patch 4 | 5 | from server.main import mcp 6 | 7 | 8 | class TestMainModule: 9 | """Test main module functionality.""" 10 | 11 | def test_mcp_instance_exists(self) -> None: 12 | """Test that MCP instance is created.""" 13 | assert mcp is not None 14 | 15 | def test_mcp_instance_type(self) -> None: 16 | """Test that MCP instance has correct type.""" 17 | from fastmcp import FastMCP 18 | 19 | assert isinstance(mcp, FastMCP) 20 | 21 | @patch("server.tools.run_code.register") 22 | def test_tool_registration_called(self, mock_register: Mock) -> None: 23 | """Test that tool registration is called during import.""" 24 | # Re-import the module to trigger registration 25 | import importlib 26 | 27 | import server.main 28 | 29 | importlib.reload(server.main) 30 | 31 | # Verify register was called with the MCP instance 32 | mock_register.assert_called_once() 33 | call_args = mock_register.call_args[0] 34 | assert len(call_args) == 1 35 | # The argument should be a FastMCP instance 36 | from fastmcp import FastMCP 37 | 38 | assert isinstance(call_args[0], FastMCP) -------------------------------------------------------------------------------- /ruff.toml: -------------------------------------------------------------------------------- 1 | line-length = 88 2 | target-version = "py313" 3 | 4 | [lint] 5 | # Select specific rule categories that are important 6 | select = [ 7 | # pycodestyle 8 | "E", 9 | "W", 10 | # Pyflakes 11 | "F", 12 | # pyupgrade 13 | "UP", 14 | # flake8-bugbear 15 | "B", 16 | # isort 17 | "I", 18 | # flake8-unused-arguments 19 | "ARG", 20 | # flake8-use-pathlib 21 | "PTH", 22 | ] 23 | 24 | ignore = [ 25 | # Line too long - handled by formatter 26 | "E501", 27 | # Too many arguments 28 | "PLR0913", 29 | # Too many branches 30 | "PLR0912", 31 | # Too many statements 32 | "PLR0915", 33 | # Magic value comparison 34 | "PLR2004", 35 | # Exception string formatting 36 | "EM101", "EM102", "TRY003", 37 | # Docstring rules - can be added later 38 | "D100", "D101", "D102", "D103", "D104", "D202", "D205", 39 | # Assert statement 40 | "S101", 41 | ] 42 | 43 | [lint.per-file-ignores] 44 | "__init__.py" = ["F401"] # Allow unused imports 45 | "server/tools/__init__.py" = ["F401", "I001"] # Allow unused imports and import formatting 46 | "**/test_*.py" = ["ARG", "S"] # Relax some rules for tests 47 | "**/conftest.py" = ["ARG", "S"] 48 | 49 | [lint.isort] 50 | known-first-party = ["server"] 51 | force-single-line = false -------------------------------------------------------------------------------- /scripts/docker_run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # scripts/docker_run.sh - Build and run the PRIMCS Docker container, then print the MCP server URL. 3 | set -euo pipefail 4 | 5 | IMAGE_NAME=primcs 6 | CONTAINER_NAME=primcs_server 7 | PORT=9000 8 | 9 | # Build the Docker image 10 | 11 | echo "[docker_run] Building Docker image..." 12 | docker build -t $IMAGE_NAME . 13 | 14 | # Stop and remove any existing container with the same name 15 | if docker ps -a --format '{{.Names}}' | grep -Eq "^${CONTAINER_NAME}$"; then 16 | echo "[docker_run] Removing existing container..." 17 | docker rm -f $CONTAINER_NAME 18 | fi 19 | 20 | # Run the container 21 | 22 | echo "[docker_run] Starting Docker container..." 23 | docker run -d --name $CONTAINER_NAME -p $PORT:9000 $IMAGE_NAME 24 | 25 | # Print the MCP server URL 26 | 27 | echo "[docker_run] MCP server is running at: http://localhost:${PORT}/mcp" 28 | 29 | # Define cleanup function 30 | cleanup() { 31 | echo "[docker_run] Stopping Docker container..." 32 | docker stop $CONTAINER_NAME 33 | docker rm $CONTAINER_NAME 34 | echo "[docker_run] Container stopped and removed." 35 | exit 0 36 | } 37 | 38 | # Trap SIGINT and SIGTERM to cleanup 39 | trap cleanup SIGINT SIGTERM 40 | 41 | # Wait until told to exit (block forever, or until killed) 42 | docker logs -f $CONTAINER_NAME & 43 | wait $! -------------------------------------------------------------------------------- /server/sandbox/env.py: -------------------------------------------------------------------------------- 1 | """Utility helpers for creating isolated virtual environments.""" 2 | 3 | import asyncio 4 | import sys 5 | import venv 6 | from pathlib import Path 7 | 8 | # Default libraries always installed in every sandbox environment. 9 | _DEFAULT_PACKAGES: list[str] = ["pandas", "openpyxl", "requests"] 10 | 11 | 12 | async def create_virtualenv(requirements: list[str], run_dir: Path) -> Path: 13 | """Create a venv in run_dir/venv and install *requirements*.""" 14 | venv_dir = run_dir / "venv" 15 | venv.EnvBuilder(with_pip=True, clear=True).create(venv_dir) 16 | 17 | python = ( 18 | venv_dir / ("Scripts" if sys.platform.startswith("win") else "bin") / "python" 19 | ) 20 | 21 | # Combine caller-specified requirements with default packages. 22 | all_requirements = list(dict.fromkeys(requirements + _DEFAULT_PACKAGES)) 23 | 24 | if all_requirements: 25 | proc = await asyncio.create_subprocess_exec( 26 | str(python), 27 | "-m", 28 | "pip", 29 | "install", 30 | "--no-cache-dir", 31 | *all_requirements, 32 | stdout=asyncio.subprocess.PIPE, 33 | stderr=asyncio.subprocess.PIPE, 34 | ) 35 | _, err = await proc.communicate() 36 | if proc.returncode != 0: 37 | raise RuntimeError(f"pip install failed: {err.decode()}") 38 | 39 | return python 40 | -------------------------------------------------------------------------------- /server/sandbox/downloader.py: -------------------------------------------------------------------------------- 1 | """Download remote files to the sandbox run directory.""" 2 | 3 | import asyncio 4 | from pathlib import Path 5 | 6 | import aiohttp 7 | 8 | __all__ = ["download_files"] 9 | 10 | 11 | async def _fetch(session: aiohttp.ClientSession, url: str, path: Path) -> None: 12 | async with session.get(url) as resp: 13 | resp.raise_for_status() 14 | path.write_bytes(await resp.read()) 15 | # Make the file read-only 16 | try: 17 | path.chmod(0o444) 18 | except PermissionError: # fallback on platforms that forbid chmod inside container 19 | pass 20 | 21 | 22 | async def download_files(files: list[dict[str, str]], dest: Path) -> list[Path]: 23 | """Download *files* concurrently into *dest*. 24 | 25 | Each element in *files* must be a dict with keys ``url`` and **``mountPath``** (required). 26 | 27 | Returns list of local paths (relative to *dest*). 28 | """ 29 | if not files: 30 | return [] 31 | 32 | dest.mkdir(parents=True, exist_ok=True) 33 | 34 | async with aiohttp.ClientSession() as session: 35 | tasks = [] 36 | for meta in files: 37 | url = meta["url"] 38 | if "mountPath" not in meta or not meta["mountPath"]: 39 | raise ValueError( 40 | "Each file entry must include a non-empty 'mountPath' key." 41 | ) 42 | 43 | relative = Path(meta["mountPath"]) 44 | local = dest / relative 45 | local.parent.mkdir(parents=True, exist_ok=True) 46 | tasks.append(_fetch(session, url, local)) 47 | await asyncio.gather(*tasks) 48 | 49 | return [dest / Path(f["mountPath"]) for f in files] 50 | -------------------------------------------------------------------------------- /examples/session_persistence.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import json 3 | 4 | from fastmcp import Client 5 | 6 | SERVER_URL = "http://localhost:9000/mcp" 7 | 8 | 9 | a_sync_code_first = """ 10 | import pandas as pd 11 | # Dataset was downloaded via `files` parameter. 12 | df = pd.read_csv('mounts/countries.csv') 13 | print("First 5 rows:\\n", df.head()) 14 | """ 15 | 16 | code_second = """ 17 | import pandas as pd 18 | df = pd.read_csv('mounts/countries.csv') 19 | print("Row count:", len(df)) 20 | """ 21 | 22 | 23 | async def main() -> None: 24 | """Demonstrate that files persist for the lifetime of an MCP session.""" 25 | async with Client("http://localhost:9000/mcp") as client: 26 | # 1. Run code that downloads a CSV file into the workspace mounts directory. 27 | first_params = { 28 | "code": a_sync_code_first, 29 | "files": [ 30 | { 31 | "url": "https://raw.githubusercontent.com/cs109/2014_data/master/countries.csv", 32 | "mountPath": "countries.csv", 33 | } 34 | ], 35 | } 36 | run1 = await client.call_tool("run_code", first_params) 37 | data1 = json.loads(run1[0].text) 38 | print("\n=== Run #1 ===") 39 | print("STDOUT:\n", data1.get("stdout")) 40 | print("STDERR:\n", data1.get("stderr")) 41 | print("ARTIFACTS:", data1.get("artifacts")) 42 | 43 | # 2. Execute a second snippet in the SAME client session. 44 | # We do NOT pass the `files` parameter again. The CSV should still exist. 45 | run2 = await client.call_tool("run_code", {"code": code_second}) 46 | data2 = json.loads(run2[0].text) 47 | print("\n=== Run #2 ===") 48 | print("STDOUT:\n", data2.get("stdout")) 49 | print("STDERR:\n", data2.get("stderr")) 50 | print("ARTIFACTS:", data2.get("artifacts")) 51 | 52 | 53 | if __name__ == "__main__": 54 | asyncio.run(main()) 55 | -------------------------------------------------------------------------------- /examples/inspect_workspace.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import json 3 | 4 | from fastmcp import Client 5 | 6 | SERVER_URL = "http://localhost:9000/mcp" 7 | 8 | 9 | async def main() -> None: 10 | """Demonstrate workspace inspection tools.""" 11 | async with Client(SERVER_URL) as client: 12 | # 1. Create a small text file via run_code 13 | code = ( 14 | "with open('output/hello.txt', 'w') as f:\n" 15 | " f.write('Hello inspection!\\nThis is a test file.')\n" 16 | ) 17 | await client.call_tool("run_code", {"code": code}) 18 | 19 | def parse_dir_response(resp): 20 | """Convert streamed TextContents into a list of DirEntry dicts, handling both list and single-entry payloads.""" 21 | entries = [] 22 | for msg in resp: 23 | obj = json.loads(msg.text) 24 | if isinstance(obj, list): 25 | entries.extend(obj) 26 | else: 27 | entries.append(obj) 28 | return entries 29 | 30 | # 2. List root of session workspace 31 | root_resp = await client.call_tool("list_dir", {}) 32 | root_listing = parse_dir_response(root_resp) 33 | print("\n=== Workspace root ===") 34 | for entry in root_listing: 35 | print(f"{entry['type']:9} {entry['path']}") 36 | 37 | # 3. List contents of output/ 38 | out_resp = await client.call_tool("list_dir", {"dir_path": "output"}) 39 | out_listing = parse_dir_response(out_resp) 40 | print("\n=== output/ ===") 41 | for entry in out_listing: 42 | print(f"{entry['type']:9} {entry['path']} {entry['size']} bytes") 43 | 44 | # 4. Preview the text file we just created 45 | preview_resp = await client.call_tool( 46 | "preview_file", {"relative_path": "output/hello.txt"} 47 | ) 48 | preview = json.loads(preview_resp[0].text) 49 | print("\n=== Preview of output/hello.txt ===") 50 | print(preview["content"]) 51 | 52 | 53 | if __name__ == "__main__": 54 | asyncio.run(main()) 55 | -------------------------------------------------------------------------------- /server/tools/mount_file.py: -------------------------------------------------------------------------------- 1 | """MCP tool: download one or more remote files into mounts/ for the current session.""" 2 | 3 | from pathlib import Path 4 | 5 | from fastmcp import Context, FastMCP 6 | 7 | from server.config import TMP_DIR 8 | from server.sandbox.downloader import download_files 9 | 10 | 11 | def _session_root(ctx: Context | None) -> Path: 12 | sid: str | None = None 13 | if ctx: 14 | sid = ctx.session_id 15 | if not sid and ctx.request_context.request: 16 | sid = ctx.request_context.request.headers.get("mcp-session-id") 17 | if not sid: 18 | raise ValueError( 19 | "Missing session_id; include mcp-session-id header or create session-aware client." 20 | ) 21 | root = TMP_DIR / f"session_{sid}" 22 | root.mkdir(parents=True, exist_ok=True) 23 | (root / "mounts").mkdir(parents=True, exist_ok=True) 24 | return root 25 | 26 | 27 | def register(mcp: FastMCP) -> None: 28 | """Register the mount_file tool.""" 29 | 30 | @mcp.tool( 31 | name="mount_file", 32 | description=( 33 | "Download a remote file once per session and store it under mounts/. " 34 | "Subsequent run_code calls can access it via that path without re-downloading." 35 | ), 36 | ) 37 | async def _mount_file( 38 | url: str, 39 | mount_path: str, 40 | ctx: Context | None = None, 41 | ) -> dict: # {"mounted_as": "mounts/data/my.csv", "bytes": N} 42 | if ( 43 | Path(mount_path).is_absolute() 44 | or ".." in Path(mount_path).parts 45 | or not mount_path 46 | ): 47 | raise ValueError("mount_path must be a relative path without '..'") 48 | root = _session_root(ctx) 49 | mounts_dir = root / "mounts" 50 | spec: dict[str, str] = {"url": url, "mountPath": mount_path} 51 | downloaded: list[Path] = await download_files([spec], mounts_dir) 52 | local = downloaded[0] 53 | return { 54 | "mounted_as": str(local.relative_to(root)), 55 | "bytes": local.stat().st_size, 56 | } 57 | -------------------------------------------------------------------------------- /examples/artifact_download.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import json 3 | 4 | import aiohttp 5 | from fastmcp import Client 6 | 7 | 8 | async def main(): 9 | # Step 1: Run code that generates an artifact (a PNG file) 10 | code = """ 11 | import matplotlib.pyplot as plt 12 | plt.plot([1,2,3], [4,5,6]) 13 | plt.title('Test Plot') 14 | plt.savefig('output/plot.png') 15 | print('Plot saved!') 16 | """ 17 | async with Client("http://localhost:9000/mcp") as client: 18 | # Call the run_code tool 19 | params = { 20 | "code": code, 21 | "requirements": ["matplotlib"], # install matplotlib so the plot code runs 22 | } 23 | result = await client.call_tool("run_code", params) 24 | print("Result:", result) 25 | # Parse the result 26 | data = json.loads(result[0].text) 27 | artifacts = data.get("artifacts", []) 28 | if not artifacts: 29 | print("No artifacts returned!") 30 | return 31 | artifact = artifacts[0] 32 | rel_path = artifact["relative_path"] # e.g. "plots/plot.png" 33 | print(f"Artifact relative path: {rel_path}") 34 | 35 | # Session ID is included in the tool response 36 | session_id = data.get("session_id") 37 | if not session_id: 38 | print("No session_id returned – cannot download artifact.") 39 | return 40 | 41 | # Step 2: Download the artifact using aiohttp with the required header 42 | artifact_url = f"http://localhost:9000/artifacts/{rel_path}" 43 | headers = {"mcp-session-id": session_id} 44 | print( 45 | f"Downloading artifact from: {artifact_url} with session_id: {session_id}" 46 | ) 47 | async with aiohttp.ClientSession() as session: 48 | async with session.get(artifact_url, headers=headers) as resp: 49 | if resp.status == 200: 50 | content = await resp.read() 51 | from pathlib import Path 52 | 53 | with Path("downloaded_plot.png").open("wb") as f: 54 | f.write(content) 55 | print("Artifact downloaded as downloaded_plot.png") 56 | else: 57 | print(f"Failed to download artifact: {resp.status}") 58 | 59 | 60 | if __name__ == "__main__": 61 | asyncio.run(main()) 62 | -------------------------------------------------------------------------------- /server/tools/persist_artifact.py: -------------------------------------------------------------------------------- 1 | """MCP tool: persist an artifact to a client-provided presigned URL.""" 2 | 3 | from pathlib import Path 4 | 5 | import aiohttp 6 | from fastmcp import Context, FastMCP 7 | 8 | from server.config import TMP_DIR 9 | 10 | MAX_UPLOAD_BYTES = 1024 * 1024 * 20 # 20 MB cap for safety 11 | 12 | 13 | def register(mcp: FastMCP) -> None: 14 | """Register the `persist_artifact` tool on a FastMCP server instance.""" 15 | 16 | @mcp.tool( 17 | name="persist_artifact", 18 | description=( 19 | "Upload a file previously created by run_code to a presigned URL. " 20 | "The file path must be relative to the output/ directory of the current session, " 21 | "for example 'reports/report.pdf'. The client must include the same mcp-session-id " 22 | "header used for run_code so the tool can locate the correct session workspace." 23 | ), 24 | ) 25 | async def _persist_artifact( 26 | relative_path: str, 27 | presigned_url: str, 28 | ctx: Context | None = None, 29 | ) -> dict: # {uploaded_bytes: int, status: int} 30 | """Upload *relative_path* to *presigned_url* and return upload stats.""" 31 | 32 | # Basic sanitisation 33 | if Path(relative_path).is_absolute() or ".." in Path(relative_path).parts: 34 | raise ValueError( 35 | "relative_path must be inside output/ and cannot contain '..'" 36 | ) 37 | 38 | # Determine session ID 39 | sid = ctx.session_id 40 | if not sid and ctx.request_context.request: 41 | sid = ctx.request_context.request.headers.get("mcp-session-id") 42 | if not sid: 43 | raise ValueError("Missing session_id; ensure mcp-session-id header is set.") 44 | 45 | output_dir = TMP_DIR / f"session_{sid}" / "output" 46 | file_path = output_dir / relative_path 47 | if not file_path.is_file(): 48 | raise FileNotFoundError("Artifact not found: " + relative_path) 49 | 50 | size = file_path.stat().st_size 51 | if size > MAX_UPLOAD_BYTES: 52 | raise ValueError(f"Artifact exceeds size limit ({MAX_UPLOAD_BYTES} bytes)") 53 | 54 | async with aiohttp.ClientSession() as session: 55 | with file_path.open("rb") as fh: 56 | resp = await session.put(presigned_url, data=fh) 57 | status = resp.status 58 | await resp.release() 59 | if status >= 400: 60 | raise RuntimeError(f"Upload failed with HTTP {status}") 61 | 62 | return {"uploaded_bytes": size, "status": status} 63 | -------------------------------------------------------------------------------- /server/prompts/python_programmer.py: -------------------------------------------------------------------------------- 1 | """Python programmer prompt for FastMCP. 2 | Generates instructions for an agent that outputs Python code to be executed via the `run_code` tool. 3 | """ 4 | 5 | from fastmcp import FastMCP 6 | 7 | _TEMPLATE = ( 8 | "PythonProgrammerAgent:\n" 9 | " instructions: |\n" 10 | " You are an AI assistant specialised in Python coding. Your task is to generate Python code based on a given task description. The code will be executed in a secure sandbox via the `run_code` tool. Follow these rules:\n\n" 11 | " 1. Task description:\n \n {task}\n \n\n" 12 | " \n {mounted_files}\n \n\n" 13 | " 2. Guidelines for your code:\n" 14 | " • The sandbox is stateless unless the client reuses a session_id; treat each call as a fresh environment with the mounted files available at start\n" 15 | " • ALWAYS use print() (or log to stderr) for any output you want returned (e.g. print(df.head())). Expressions alone are ignored.\n" 16 | " • Keep the code concise yet complete.\n" 17 | " • If additional packages are required, declare them under as a Python list of pip specs.\n" 18 | " • The files listed above are ALREADY mounted read-only at ./mounts/. Access them directly without downloading.\n" 19 | " • If you also need to download NEW remote files, list them under as {{'url': URL, 'mountPath': PATH}}. They'll be downloaded before execution.\n" 20 | " • Use pd.set_option('display.max_columns', None) and pd.set_option('display.width', 10000) for full DataFrame output.\n\n" 21 | " 3. Response format (exactly this structure):\n\n" 22 | " \n # your python here\n \n\n" 23 | " \n # optional list, e.g. ['pandas']\n \n\n" 24 | " \n # optional list for NEW downloads, e.g. [{{'url': 'https://...', 'mountPath': 'data.csv'}}]\n \n\n" 25 | " Ensure the code is fully self-contained and runnable as a script.\n" 26 | ) 27 | 28 | 29 | def register(mcp: FastMCP) -> None: 30 | """Register the python_programmer prompt on the given FastMCP server.""" 31 | 32 | @mcp.prompt( 33 | name="python_programmer", 34 | description="Return a template that instructs an LLM to produce Python code suitable for the run_code tool.", 35 | ) 36 | def _python_programmer_prompt( 37 | task: str, 38 | mounted_files: list[str] | None = None, 39 | ) -> str: 40 | joined = "\n".join(mounted_files or []) 41 | return _TEMPLATE.format(task=task.strip(), mounted_files=joined) 42 | -------------------------------------------------------------------------------- /scripts/setup_env.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # scripts/setup_env.sh ── quick dev-environment bootstrap 3 | # 4 | # Usage: 5 | # ./scripts/setup_env.sh # creates .venv & installs requirements 6 | # source .venv/bin/activate # activate the venv (remember each shell) 7 | set -euo pipefail 8 | 9 | # Determine which Python interpreter to use. 10 | # 11 | # Priority order: 12 | # 1. Respect an explicit $PYTHON environment variable. 13 | # 2. If a `python3.13` executable exists in PATH, use that (it is 14 | # typically installed alongside other minor versions and avoids any 15 | # ambiguity with an older default `python3`). 16 | # 3. Fallback to the generic `python3` binary. 17 | 18 | if [[ -n "${PYTHON:-}" ]]; then 19 | PYTHON_BIN="${PYTHON}" 20 | else 21 | if command -v python3.13 &>/dev/null; then 22 | PYTHON_BIN="python3.13" 23 | else 24 | PYTHON_BIN="python3" 25 | fi 26 | fi 27 | 28 | VENV_DIR=".venv" 29 | 30 | # Require Python ≥ 3.13 31 | REQ_MAJOR=3 32 | REQ_MINOR=13 33 | 34 | # Helper: ensure a command exists 35 | have() { command -v "$1" &>/dev/null; } 36 | 37 | if ! have "$PYTHON_BIN"; then 38 | cat >&2 <&1 | awk '{print $2}') 61 | MAJOR=${CURRENT%%.*} 62 | MINOR=$(echo "$CURRENT" | cut -d. -f2) 63 | 64 | if (( MAJOR < REQ_MAJOR || ( MAJOR == REQ_MAJOR && MINOR < REQ_MINOR ) )); then 65 | cat >&2 < Response: 37 | """ 38 | Serve an artifact file for the current session. The client must include 39 | the session ID in the "mcp-session-id" header. The URL path is the 40 | relative path returned by the tool (e.g. "plots/plot.png"), which is 41 | resolved under session_/output/. 42 | """ 43 | relative_path = request.path_params["relative_path"] 44 | relative_path = os.path.normpath(relative_path) 45 | path_obj = Path(relative_path) 46 | if relative_path.startswith("..") or path_obj.is_absolute(): 47 | return Response("Invalid artifact path", status_code=400) 48 | 49 | session_id = request.headers.get("mcp-session-id") 50 | if not session_id: 51 | return Response("Missing mcp-session-id header", status_code=400) 52 | 53 | base_dir = TMP_DIR / f"session_{session_id}" / "output" 54 | file_path = base_dir / relative_path 55 | 56 | try: 57 | file_path = file_path.resolve(strict=True) 58 | except FileNotFoundError: 59 | return Response("File not found", status_code=404) 60 | 61 | # Ensure file is within the output directory 62 | if not str(file_path).startswith(str(base_dir.resolve())): 63 | return Response("Forbidden", status_code=403) 64 | if not file_path.is_file(): 65 | return Response("Not a file", status_code=404) 66 | 67 | return FileResponse(str(file_path), filename=file_path.name) 68 | 69 | 70 | if __name__ == "__main__": # pragma: no cover 71 | port = int(os.getenv("PORT", "9000")) 72 | # Start the server with HTTP transport (modern replacement for SSE) 73 | mcp.run(transport="streamable-http", host="0.0.0.0", port=port) 74 | -------------------------------------------------------------------------------- /tests/unit/test_config.py: -------------------------------------------------------------------------------- 1 | """Unit tests for server.config module.""" 2 | 3 | import os 4 | from pathlib import Path 5 | from unittest.mock import patch 6 | 7 | import pytest 8 | 9 | from server import config 10 | 11 | 12 | class TestConfig: 13 | """Test configuration management.""" 14 | 15 | def test_default_tmp_dir(self) -> None: 16 | """Test default TMP_DIR configuration.""" 17 | with patch.dict(os.environ, {}, clear=True): 18 | # Re-import to get fresh configuration 19 | import importlib 20 | 21 | importlib.reload(config) 22 | 23 | expected_path = Path("/tmp/primcs") 24 | assert config.TMP_DIR == expected_path 25 | 26 | def test_custom_tmp_dir(self, tmp_path: Path) -> None: 27 | """Test custom TMP_DIR from environment variable.""" 28 | custom_path = str(tmp_path / "custom_tmp") 29 | with patch.dict(os.environ, {"PRIMCS_TMP_DIR": custom_path}): 30 | import importlib 31 | 32 | importlib.reload(config) 33 | 34 | assert config.TMP_DIR == Path(custom_path) 35 | 36 | def test_default_timeout(self) -> None: 37 | """Test default timeout configuration.""" 38 | with patch.dict(os.environ, {}, clear=True): 39 | import importlib 40 | 41 | importlib.reload(config) 42 | 43 | assert config.TIMEOUT_SECONDS == 100 44 | 45 | def test_custom_timeout(self) -> None: 46 | """Test custom timeout from environment variable.""" 47 | custom_timeout = "60" 48 | with patch.dict(os.environ, {"PRIMCS_TIMEOUT": custom_timeout}): 49 | import importlib 50 | 51 | importlib.reload(config) 52 | 53 | assert config.TIMEOUT_SECONDS == 60 54 | 55 | def test_default_max_output(self) -> None: 56 | """Test default max output configuration.""" 57 | with patch.dict(os.environ, {}, clear=True): 58 | import importlib 59 | 60 | importlib.reload(config) 61 | 62 | assert config.MAX_OUTPUT_BYTES == 1024 * 1024 # 1MB 63 | 64 | def test_custom_max_output(self) -> None: 65 | """Test custom max output from environment variable.""" 66 | custom_max = "2048000" # 2MB 67 | with patch.dict(os.environ, {"PRIMCS_MAX_OUTPUT": custom_max}): 68 | import importlib 69 | 70 | importlib.reload(config) 71 | 72 | assert config.MAX_OUTPUT_BYTES == 2048000 73 | 74 | def test_invalid_timeout_falls_back_to_default(self) -> None: 75 | """Test that invalid timeout values fall back to default.""" 76 | with patch.dict(os.environ, {"PRIMCS_TIMEOUT": "invalid"}): 77 | with pytest.raises(ValueError): 78 | import importlib 79 | 80 | importlib.reload(config) 81 | 82 | def test_invalid_max_output_falls_back_to_default(self) -> None: 83 | """Test that invalid max output values fall back to default.""" 84 | with patch.dict(os.environ, {"PRIMCS_MAX_OUTPUT": "invalid"}): 85 | with pytest.raises(ValueError): 86 | import importlib 87 | 88 | importlib.reload(config) 89 | 90 | def test_tmp_dir_creation( 91 | self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch 92 | ) -> None: 93 | """Test that TMP_DIR is created if it doesn't exist.""" 94 | test_dir = tmp_path / "test_primcs" 95 | monkeypatch.setenv("PRIMCS_TMP_DIR", str(test_dir)) 96 | 97 | import importlib 98 | 99 | importlib.reload(config) 100 | 101 | assert test_dir.exists() 102 | assert test_dir.is_dir() 103 | -------------------------------------------------------------------------------- /server/sandbox/runner.py: -------------------------------------------------------------------------------- 1 | """Orchestrate sandbox execution of untrusted Python code.""" 2 | 3 | import asyncio 4 | import mimetypes 5 | import shutil 6 | import textwrap 7 | from typing import TypedDict 8 | 9 | from server.config import TIMEOUT_SECONDS, TMP_DIR 10 | from server.sandbox.downloader import download_files 11 | from server.sandbox.env import create_virtualenv 12 | 13 | __all__ = ["run_code"] 14 | 15 | 16 | # Precise schema for each artifact entry. 17 | class ArtifactMeta(TypedDict): 18 | name: str 19 | relative_path: str 20 | size: int 21 | mime: str 22 | 23 | 24 | # Typed return for run_code results. 25 | class RunCodeResult(TypedDict, total=False): 26 | """Result of running code in the sandbox. 27 | Optionally includes a feedback field with suggestions or warnings (list of strings). 28 | """ 29 | 30 | stdout: str 31 | stderr: str 32 | artifacts: list[ArtifactMeta] 33 | feedback: str 34 | 35 | 36 | async def run_code( 37 | *, 38 | code: str, 39 | requirements: list[str], 40 | files: list[dict[str, str]], 41 | run_id: str, 42 | session_id: str | None = None, 43 | ) -> RunCodeResult: 44 | """Execute *code* inside an isolated virtual-env and return captured output. Artifacts are returned as paths relative to the output directory. Only files inside output/ are included.""" 45 | 46 | if session_id: 47 | # Persist workspace for the lifetime of the client session. 48 | work = TMP_DIR / f"session_{session_id}" 49 | work.mkdir(parents=True, exist_ok=True) 50 | else: 51 | # Legacy per-run workspace (stateless behaviour). 52 | work = TMP_DIR / f"run_{run_id}" 53 | if work.exists(): 54 | shutil.rmtree(work) 55 | work.mkdir(parents=True, exist_ok=True) 56 | 57 | # Ensure mounts directory exists for all modes. 58 | (work / "mounts").mkdir(parents=True, exist_ok=True) 59 | # Directory where user code should place output/artifacts. 60 | (work / "output").mkdir(parents=True, exist_ok=True) 61 | 62 | await download_files(files, work / "mounts") 63 | 64 | py = await create_virtualenv(requirements, work) 65 | 66 | script_name = f"script_{run_id}.py" if session_id else "script.py" 67 | script = work / script_name 68 | script.write_text(textwrap.dedent(code)) 69 | 70 | proc = await asyncio.create_subprocess_exec( 71 | str(py), 72 | str(script), 73 | stdout=asyncio.subprocess.PIPE, 74 | stderr=asyncio.subprocess.PIPE, 75 | cwd=work, 76 | ) 77 | 78 | try: 79 | out, err = await asyncio.wait_for(proc.communicate(), timeout=TIMEOUT_SECONDS) 80 | except TimeoutError as err: 81 | proc.kill() 82 | await proc.wait() 83 | msg = f"Execution timed out after {TIMEOUT_SECONDS}s" 84 | raise RuntimeError(msg) from err 85 | 86 | # Collect artifacts inside the output directory. 87 | artifacts: list[ArtifactMeta] = [] 88 | output_dir = work / "output" 89 | for p in output_dir.rglob("*"): 90 | if p.is_file(): 91 | try: 92 | rel_path = p.relative_to(output_dir) 93 | except ValueError: 94 | continue # skip files not in output_dir 95 | size = p.stat().st_size 96 | mime, _ = mimetypes.guess_type(str(p)) 97 | artifacts.append( 98 | { 99 | "name": rel_path.name, 100 | "relative_path": rel_path.as_posix(), 101 | "size": size, 102 | "mime": mime or "application/octet-stream", 103 | } 104 | ) 105 | 106 | return {"stdout": out.decode(), "stderr": err.decode(), "artifacts": artifacts} 107 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | line-length = 88 3 | target-version = ['py313'] 4 | include = '\.pyi?$' 5 | extend-exclude = ''' 6 | /( 7 | # directories 8 | \.eggs 9 | | \.git 10 | | \.hg 11 | | \.mypy_cache 12 | | \.tox 13 | | \.venv 14 | | venv 15 | | _build 16 | | buck-out 17 | | build 18 | | dist 19 | )/ 20 | ''' 21 | 22 | [build-system] 23 | requires = ["setuptools>=61.0", "wheel"] 24 | build-backend = "setuptools.build_meta" 25 | 26 | [project] 27 | name = "prims" 28 | version = "0.1.0" 29 | description = "Python Runtime Interpreter MCP Server" 30 | readme = "README.md" 31 | license = {file = "LICENSE"} 32 | authors = [ 33 | {name = "PRIMS Contributors"} 34 | ] 35 | requires-python = ">=3.13" 36 | dependencies = [ 37 | "fastmcp", 38 | "aiohttp", 39 | "aiofiles", 40 | ] 41 | 42 | [project.optional-dependencies] 43 | dev = [ 44 | "pytest>=7.4.0", 45 | "pytest-asyncio>=0.21.0", 46 | "pytest-cov>=4.1.0", 47 | "pytest-mock>=3.11.0", 48 | "pytest-timeout>=2.1.0", 49 | "black>=23.7.0", 50 | "isort>=5.12.0", 51 | "ruff>=0.0.280", 52 | "mypy>=1.5.0", 53 | "bandit[toml]>=1.7.5", 54 | "safety>=2.3.0", 55 | "httpx>=0.24.0", 56 | "pytest-httpx>=0.21.0", 57 | "factory-boy>=3.3.0", 58 | "freezegun>=1.2.0", 59 | ] 60 | 61 | [tool.pytest.ini_options] 62 | minversion = "7.0" 63 | addopts = [ 64 | "-ra", 65 | "--strict-markers", 66 | "--strict-config", 67 | "--cov=server", 68 | "--cov-report=term-missing", 69 | "--cov-report=html", 70 | "--cov-report=xml", 71 | "--cov-fail-under=10", 72 | ] 73 | testpaths = ["tests"] 74 | markers = [ 75 | "slow: marks tests as slow (deselect with '-m \"not slow\"')", 76 | "integration: marks tests as integration tests", 77 | "unit: marks tests as unit tests", 78 | "e2e: marks tests as end-to-end tests", 79 | ] 80 | asyncio_mode = "auto" 81 | timeout = 30 82 | 83 | [tool.coverage.run] 84 | source = ["server"] 85 | omit = [ 86 | "*/tests/*", 87 | "*/test_*", 88 | "server/__init__.py", 89 | "*/venv/*", 90 | "*/.venv/*", 91 | ] 92 | 93 | [tool.coverage.report] 94 | exclude_lines = [ 95 | "pragma: no cover", 96 | "def __repr__", 97 | "if self.debug:", 98 | "if settings.DEBUG", 99 | "raise AssertionError", 100 | "raise NotImplementedError", 101 | "if 0:", 102 | "if __name__ == .__main__.:", 103 | "class .*\\bProtocol\\):", 104 | "@(abc\\.)?abstractmethod", 105 | ] 106 | 107 | [tool.isort] 108 | profile = "black" 109 | line_length = 88 110 | multi_line_output = 3 111 | include_trailing_comma = true 112 | force_grid_wrap = 0 113 | use_parentheses = true 114 | ensure_newline_before_comments = true 115 | 116 | [tool.ruff] 117 | line-length = 88 118 | select = [ 119 | "E", # pycodestyle errors 120 | "W", # pycodestyle warnings 121 | "F", # pyflakes 122 | "I", # isort 123 | "B", # flake8-bugbear 124 | "C4", # flake8-comprehensions 125 | "UP", # pyupgrade 126 | ] 127 | ignore = [ 128 | "E501", # line too long, handled by black 129 | "B008", # do not perform function calls in argument defaults 130 | "C901", # too complex 131 | ] 132 | 133 | [tool.ruff.per-file-ignores] 134 | "__init__.py" = ["F401"] 135 | "tests/*" = ["B011"] 136 | 137 | [tool.mypy] 138 | python_version = "3.13" 139 | check_untyped_defs = true 140 | disallow_any_generics = true 141 | disallow_incomplete_defs = true 142 | disallow_untyped_defs = true 143 | no_implicit_optional = true 144 | warn_redundant_casts = true 145 | warn_unused_ignores = true 146 | warn_return_any = true 147 | strict_equality = true 148 | 149 | [[tool.mypy.overrides]] 150 | module = [ 151 | "fastmcp.*", 152 | "aiofiles.*", 153 | ] 154 | ignore_missing_imports = true 155 | 156 | [tool.bandit] 157 | exclude_dirs = ["tests", "venv", ".venv"] 158 | skips = ["B101", "B601"] 159 | -------------------------------------------------------------------------------- /scripts/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Test runner script for PRIMS 4 | set -e 5 | 6 | # Colors for output 7 | RED='\033[0;31m' 8 | GREEN='\033[0;32m' 9 | YELLOW='\033[1;33m' 10 | NC='\033[0m' # No Color 11 | 12 | # Default values 13 | TEST_TYPE="all" 14 | COVERAGE=true 15 | VERBOSE=false 16 | PARALLEL=false 17 | 18 | # Function to print usage 19 | usage() { 20 | echo "Usage: $0 [OPTIONS]" 21 | echo "" 22 | echo "Options:" 23 | echo " -t, --type TYPE Test type: unit, integration, all (default: all)" 24 | echo " -c, --no-coverage Disable coverage reporting" 25 | echo " -v, --verbose Enable verbose output" 26 | echo " -p, --parallel Run tests in parallel" 27 | echo " -h, --help Show this help message" 28 | echo "" 29 | echo "Examples:" 30 | echo " $0 # Run all tests with coverage" 31 | echo " $0 -t unit # Run only unit tests" 32 | echo " $0 -t integration -c # Run integration tests without coverage" 33 | echo " $0 -v -p # Run all tests verbosely in parallel" 34 | } 35 | 36 | # Parse command line arguments 37 | while [[ $# -gt 0 ]]; do 38 | case $1 in 39 | -t|--type) 40 | TEST_TYPE="$2" 41 | shift 2 42 | ;; 43 | -c|--no-coverage) 44 | COVERAGE=false 45 | shift 46 | ;; 47 | -v|--verbose) 48 | VERBOSE=true 49 | shift 50 | ;; 51 | -p|--parallel) 52 | PARALLEL=true 53 | shift 54 | ;; 55 | -h|--help) 56 | usage 57 | exit 0 58 | ;; 59 | *) 60 | echo "Unknown option: $1" 61 | usage 62 | exit 1 63 | ;; 64 | esac 65 | done 66 | 67 | # Validate test type 68 | if [[ ! "$TEST_TYPE" =~ ^(unit|integration|all)$ ]]; then 69 | echo -e "${RED}Error: Invalid test type '$TEST_TYPE'. Must be 'unit', 'integration', or 'all'${NC}" 70 | exit 1 71 | fi 72 | 73 | # Activate virtual environment if it exists 74 | if [[ -f ".venv/bin/activate" ]]; then 75 | echo -e "${YELLOW}Activating virtual environment...${NC}" 76 | source .venv/bin/activate 77 | fi 78 | 79 | # Build pytest command 80 | PYTEST_CMD="python -m pytest" 81 | 82 | # Add test directories based on type 83 | case $TEST_TYPE in 84 | unit) 85 | PYTEST_CMD="$PYTEST_CMD tests/unit/" 86 | ;; 87 | integration) 88 | PYTEST_CMD="$PYTEST_CMD tests/integration/" 89 | ;; 90 | all) 91 | PYTEST_CMD="$PYTEST_CMD tests/" 92 | ;; 93 | esac 94 | 95 | # Add coverage options 96 | if [[ "$COVERAGE" == "true" ]]; then 97 | PYTEST_CMD="$PYTEST_CMD --cov=server --cov-report=term-missing --cov-report=html:htmlcov --cov-report=xml" 98 | fi 99 | 100 | # Add verbose option 101 | if [[ "$VERBOSE" == "true" ]]; then 102 | PYTEST_CMD="$PYTEST_CMD -v" 103 | fi 104 | 105 | # Add parallel option 106 | if [[ "$PARALLEL" == "true" ]]; then 107 | # Check if pytest-xdist is installed 108 | if python -c "import xdist" 2>/dev/null; then 109 | PYTEST_CMD="$PYTEST_CMD -n auto" 110 | else 111 | echo -e "${YELLOW}Warning: pytest-xdist not installed. Running tests sequentially.${NC}" 112 | fi 113 | fi 114 | 115 | echo -e "${GREEN}Running $TEST_TYPE tests...${NC}" 116 | echo -e "${YELLOW}Command: $PYTEST_CMD${NC}" 117 | echo "" 118 | 119 | # Run the tests 120 | if eval $PYTEST_CMD; then 121 | echo "" 122 | echo -e "${GREEN}✅ Tests completed successfully!${NC}" 123 | 124 | # Show coverage report location if coverage was enabled 125 | if [[ "$COVERAGE" == "true" ]]; then 126 | echo -e "${YELLOW}📊 Coverage report available at: htmlcov/index.html${NC}" 127 | fi 128 | 129 | exit 0 130 | else 131 | echo "" 132 | echo -e "${RED}❌ Tests failed!${NC}" 133 | exit 1 134 | fi -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | PRIMS Logo 3 | Status: Alpha 4 | License: MIT 5 |

6 | 7 | # PRIMS – Python Runtime Interpreter MCP Server 8 | 9 | PRIMS is a tiny open-source **Model Context Protocol (MCP)** server that lets LLM agents run arbitrary Python code in a secure, throw-away sandbox. 10 | 11 | • **One tool, one job.** Exposes a single MCP tool – `run_code` – that executes user-supplied Python and streams back `stdout / stderr`. 12 | 13 | • **Isolated & reproducible.** Each call spins up a fresh virtual-env, installs any requested pip packages, mounts optional read-only files, then nukes the workspace. 14 | 15 | • **Zero config.** Works over MCP/stdio or drop it in Docker. 16 | 17 | --- 18 | 19 | ## Quick-start 20 | 21 | ### 1. Local development environment 22 | 23 | ```bash 24 | chmod +x scripts/setup_env.sh # once, to make the script executable 25 | ./scripts/setup_env.sh # creates .venv & installs deps 26 | 27 | # activate the venv in each new shell 28 | source .venv/bin/activate 29 | ``` 30 | 31 | ### 2. Launch the server 32 | 33 | ```bash 34 | python -m server.main # binds http://0.0.0.0:9000/mcp 35 | ``` 36 | 37 | ### 3. Docker 38 | 39 | ```bash 40 | # Quick one-liner (build + run) 41 | chmod +x scripts/docker_run.sh 42 | ./scripts/docker_run.sh # prints the MCP URL when ready 43 | ``` 44 | 45 | 46 | ## Examples 47 | 48 | ### List available tools 49 | 50 | You can use the provided script to list all tools exposed by the server: 51 | 52 | ```bash 53 | python examples/list_tools.py 54 | ``` 55 | 56 | Expected output (tool names and descriptions may vary): 57 | 58 | ``` 59 | Available tools: 60 | - run_code: Execute Python code in a secure sandbox with optional dependencies & file mounts. 61 | - list_dir: List files/directories in your session workspace. 62 | - preview_file: Preview up to 8 KB of a text file from your session workspace. 63 | - persist_artifact: Upload an output/ file to a presigned URL for permanent storage. 64 | - mount_file: Download a remote file once per session to `mounts/`. 65 | ``` 66 | 67 | ### Run code via the MCP server 68 | 69 | ```bash 70 | python examples/run_code.py 71 | ``` 72 | 73 | ### Mount a dataset once & reuse it 74 | 75 | ```bash 76 | python examples/mount_and_run.py 77 | ``` 78 | 79 | This mounts a CSV with `mount_file` and then reads it inside `run_code` without re-supplying the URL. 80 | 81 | ### Inspect your session workspace 82 | 83 | ```bash 84 | python examples/inspect_workspace.py 85 | ``` 86 | 87 | This shows how to use the **`list_dir`** and **`preview_file`** tools to browse files your code created. 88 | 89 | ### Persist an artifact to permanent storage 90 | 91 | The **`persist_artifact`** tool uploads a file from your `output/` directory to a presigned URL. 92 | 93 | Example (Python): 94 | 95 | ```python 96 | await client.call_tool("persist_artifact", { 97 | "relative_path": "plots/plot.png", 98 | "presigned_url": "https://bucket.s3.amazonaws.com/...signature...", 99 | }) 100 | ``` 101 | 102 | ### Download an artifact 103 | 104 | Small artifacts can be fetched directly: 105 | 106 | ```bash 107 | curl -H "mcp-session-id: " \ 108 | http://localhost:9000/artifacts/plots/plot.png -o plot.png 109 | ``` 110 | 111 | --- 112 | 113 | ## Available tools 114 | 115 | | Tool | Purpose | 116 | |---------------------|---------------------------------------------------------------| 117 | | `run_code` | Execute Python in an isolated sandbox with optional pip deps. | 118 | | `list_dir` | List files/directories inside your session workspace. | 119 | | `preview_file` | Return up to 8 KB of a text file for quick inspection. | 120 | | `persist_artifact` | Upload an `output/` file to a client-provided presigned URL. | 121 | | `mount_file` | Download a remote file once per session to `mounts/`. | 122 | 123 | See the `examples/` directory for end-to-end demos. 124 | 125 | ## Contributing 126 | Contributions are welcome! Feel free to open issues, suggest features, or submit pull requests to help improve PRIMS. 127 | 128 | 129 | If you find this project useful, please consider leaving a ⭐ to show your support. 130 | -------------------------------------------------------------------------------- /server/tools/workspace_inspect.py: -------------------------------------------------------------------------------- 1 | # """Workspace inspection tools for session files.""" 2 | 3 | import mimetypes 4 | import os 5 | from datetime import datetime 6 | from pathlib import Path 7 | from typing import TypedDict 8 | 9 | import aiofiles 10 | from fastmcp import Context, FastMCP 11 | 12 | from server.config import TMP_DIR 13 | 14 | _MAX_PREVIEW_BYTES = 8 * 1024 # 8 KB 15 | 16 | 17 | class DirEntry(TypedDict): 18 | name: str 19 | path: str 20 | type: str # 'file' | 'directory' 21 | size: int 22 | modified: str # ISO timestamp 23 | 24 | 25 | class FilePreview(TypedDict): 26 | name: str 27 | path: str 28 | size: int 29 | mime: str 30 | content: str # UTF-8 text (truncated) 31 | 32 | 33 | def _get_session_root(ctx: Context | None) -> Path: 34 | sid: str | None = None 35 | if ctx: 36 | sid = ctx.session_id 37 | if not sid and ctx.request_context.request: 38 | sid = ctx.request_context.request.headers.get("mcp-session-id") 39 | if not sid: 40 | raise ValueError( 41 | "Missing session_id; ensure the client includes the mcp-session-id header or uses a session-aware context." 42 | ) 43 | root = TMP_DIR / f"session_{sid}" 44 | root.mkdir(parents=True, exist_ok=True) 45 | return root.resolve() 46 | 47 | 48 | def _resolve_in_session(ctx: Context | None, relative_path: str) -> Path: 49 | root = _get_session_root(ctx) 50 | # Normalise & forbid traversal 51 | rel = Path(os.path.normpath(relative_path)) if relative_path else Path() 52 | if rel.is_absolute() or ".." in rel.parts: 53 | raise ValueError( 54 | "Path must be relative to session root and may not contain '..'." 55 | ) 56 | resolved = (root / rel).resolve() 57 | if not str(resolved).startswith(str(root.resolve())): 58 | raise ValueError("Path escapes session workspace.") 59 | return resolved 60 | 61 | 62 | def register(mcp: FastMCP) -> None: 63 | """Register workspace inspection tools on the given MCP server.""" 64 | 65 | @mcp.tool( 66 | name="list_dir", 67 | description=( 68 | "List files and directories within the current session workspace. " 69 | "Parameter `path` is relative to the session root (default '.') and cannot contain '..'." 70 | ), 71 | ) 72 | async def _list_dir( 73 | dir_path: str | None = None, ctx: Context | None = None 74 | ) -> list[DirEntry]: 75 | target = _resolve_in_session(ctx, dir_path or ".") 76 | if not target.is_dir(): 77 | raise ValueError("Specified path is not a directory") 78 | entries: list[DirEntry] = [] 79 | for p in sorted(target.iterdir(), key=lambda p: p.name): 80 | stat = p.stat() 81 | entries.append( 82 | { 83 | "name": p.name, 84 | "path": str(p.relative_to(_get_session_root(ctx))), 85 | "type": "directory" if p.is_dir() else "file", 86 | "size": stat.st_size, 87 | "modified": datetime.fromtimestamp(stat.st_mtime).isoformat(), 88 | } 89 | ) 90 | return entries 91 | 92 | @mcp.tool( 93 | name="preview_file", 94 | description=( 95 | "Return up to 8 KB of a text file from the session workspace for quick inspection. " 96 | "`relative_path` must point to a file inside the session and not contain '..'." 97 | ), 98 | ) 99 | async def _preview_file( 100 | relative_path: str, ctx: Context | None = None 101 | ) -> FilePreview: 102 | file_path = _resolve_in_session(ctx, relative_path) 103 | if not file_path.is_file(): 104 | raise FileNotFoundError("File not found") 105 | size = file_path.stat().st_size 106 | if ( 107 | size > _MAX_PREVIEW_BYTES * 4 108 | ): # arbitrary limit 32 KB for previewable text files 109 | raise ValueError("File too large for preview") 110 | # Read up to _MAX_PREVIEW_BYTES and decode 111 | async with aiofiles.open(file_path, "rb") as fh: 112 | data = await fh.read(_MAX_PREVIEW_BYTES) 113 | try: 114 | content = data.decode("utf-8", errors="replace") 115 | except Exception: 116 | content = "" 117 | mime, _ = mimetypes.guess_type(str(file_path)) 118 | return { 119 | "name": file_path.name, 120 | "path": str(file_path.relative_to(_get_session_root(ctx))), 121 | "size": size, 122 | "mime": mime or "application/octet-stream", 123 | "content": content, 124 | } 125 | -------------------------------------------------------------------------------- /tests/unit/test_run_code_tool.py: -------------------------------------------------------------------------------- 1 | """Unit tests for server.tools.run_code module.""" 2 | 3 | from unittest.mock import Mock, patch 4 | from types import SimpleNamespace 5 | 6 | import pytest 7 | 8 | from server.tools.run_code import RESPONSE_FEEDBACK, register 9 | 10 | 11 | class TestRunCodeTool: 12 | """Test the run_code MCP tool.""" 13 | 14 | def test_register_function_exists(self) -> None: 15 | """Test that register function is properly defined.""" 16 | assert callable(register) 17 | 18 | @pytest.mark.asyncio 19 | async def test_run_code_tool_success( 20 | self, 21 | mock_fastmcp: Mock, 22 | mock_context: Mock, 23 | sample_python_code: str, 24 | ) -> None: 25 | """Test successful code execution through the MCP tool.""" 26 | # Mock the sandbox execute function 27 | mock_result = { 28 | "stdout": "Hello from sandbox!", 29 | "stderr": "", 30 | "artifacts": [], 31 | } 32 | 33 | with patch( 34 | "server.tools.run_code.sandbox_execute", return_value=mock_result 35 | ) as mock_execute: 36 | # Register the tool 37 | register(mock_fastmcp) 38 | 39 | # Get the registered tool function 40 | tool_calls = mock_fastmcp.tool.call_args_list 41 | assert len(tool_calls) == 1 42 | 43 | # Extract the tool function 44 | tool_decorator_call = tool_calls[0] 45 | tool_kwargs = tool_decorator_call[1] 46 | assert tool_kwargs["name"] == "run_code" 47 | assert "description" in tool_kwargs 48 | 49 | # The actual tool function would be called by FastMCP 50 | # We'll test it by calling the sandbox_execute function directly 51 | result = await mock_execute( 52 | code=sample_python_code, 53 | requirements=[], 54 | files=[], 55 | run_id="test-run", 56 | session_id="test-session", 57 | ) 58 | 59 | assert result["stdout"] == "Hello from sandbox!" 60 | assert result["stderr"] == "" 61 | 62 | @pytest.mark.asyncio 63 | async def test_run_code_tool_with_requirements( 64 | self, 65 | mock_fastmcp: Mock, 66 | sample_requirements: list[str], 67 | ) -> None: 68 | """Test code execution with pip requirements.""" 69 | mock_result = { 70 | "stdout": "Package installed successfully", 71 | "stderr": "", 72 | "artifacts": [], 73 | } 74 | 75 | with patch( 76 | "server.tools.run_code.sandbox_execute", return_value=mock_result 77 | ) as mock_execute: 78 | register(mock_fastmcp) 79 | 80 | _ = await mock_execute( 81 | code="import numpy; print('numpy imported')", 82 | requirements=sample_requirements, 83 | files=[], 84 | run_id="test-run", 85 | session_id="test-session", 86 | ) 87 | 88 | # Verify requirements were passed through 89 | mock_execute.assert_called_once() 90 | call_args = mock_execute.call_args 91 | assert call_args[1]["requirements"] == sample_requirements 92 | 93 | @pytest.mark.asyncio 94 | async def test_run_code_tool_with_files( 95 | self, 96 | mock_fastmcp: Mock, 97 | sample_files: list[dict[str, str]], 98 | ) -> None: 99 | """Test code execution with file mounting.""" 100 | mock_result = { 101 | "stdout": "Files mounted successfully", 102 | "stderr": "", 103 | "artifacts": [], 104 | } 105 | 106 | with patch( 107 | "server.tools.run_code.sandbox_execute", return_value=mock_result 108 | ) as mock_execute: 109 | register(mock_fastmcp) 110 | 111 | _ = await mock_execute( 112 | code="print('Files available')", 113 | requirements=[], 114 | files=sample_files, 115 | run_id="test-run", 116 | session_id="test-session", 117 | ) 118 | 119 | # Verify files were passed through 120 | call_args = mock_execute.call_args 121 | assert call_args[1]["files"] == sample_files 122 | 123 | # test what happens when code is empty 124 | # test what happens when requirements is empty when it is needed 125 | # test what happens when files is empty when it is needed 126 | # test what happens when files is empty when it is not needed 127 | # test what happens when files is not empty when it is not needed 128 | # test what happens when files is not empty when it is needed 129 | # test what happens when files is not empty when it is not needed 130 | # test when code is empty 131 | # test when code is too long -------------------------------------------------------------------------------- /server/tools/run_code.py: -------------------------------------------------------------------------------- 1 | """MCP tool: execute Python code in a sandbox.""" 2 | 3 | from fastmcp import Context, FastMCP 4 | 5 | from server.sandbox.runner import RunCodeResult 6 | from server.sandbox.runner import run_code as sandbox_execute 7 | 8 | RESPONSE_FEEDBACK = ( 9 | "No output detected. Use print() (or log to stderr) to display results. " 10 | "For pandas DataFrames, call print(df.head()) instead of just df.head(). " 11 | "To see all columns or wider tables, run " 12 | "pd.set_option('display.max_columns', None) and " 13 | "pd.set_option('display.width', 10000) before printing. " 14 | "Ensure your code is a self-contained script (not notebook style) and " 15 | "reference mounted files " 16 | "with their mount path, e.g. pd.read_csv('mounts/my_data.csv'). " 17 | "If an error occurs, double-check these points first." 18 | ) 19 | 20 | 21 | def register(mcp: FastMCP) -> None: 22 | """Register the `run_code` tool on a FastMCP server instance. 23 | 24 | Usage (inside server.main): 25 | 26 | from server.tools import run_code 27 | run_code.register(mcp) 28 | """ 29 | 30 | @mcp.tool( 31 | name="run_code", 32 | description=( 33 | "Run self-contained Python scripts in an isolated sandbox. " 34 | "Send a 'session_id' header to reuse the environment across runs; " 35 | "otherwise the sandbox is reset each time. " 36 | "Use print() (or log to stderr) to capture output—expressions " 37 | "like df.head() alone will not be returned. " 38 | "Store any artifacts you want back in the output/ directory; they " 39 | "are returned as relative paths and downloadable via " 40 | "/artifacts/{relative_path}. " 41 | "Mounted files are available at mounts/. " 42 | "If stdout is empty or execution fails, a 'feedback' string is " 43 | "added to the response with suggestions. " 44 | "Tip: when printing large pandas DataFrames, call " 45 | "pd.set_option('display.max_columns', None) and " 46 | "pd.set_option('display.width', 10000) first. Moreover try to get " 47 | "column names separately." 48 | "Optional parameters: requirements (list of pip specs) and files " 49 | "[{url, mountPath}]. " 50 | "Each file is downloaded before execution and made available at " 51 | "./mounts/. " 52 | ), 53 | ) 54 | async def _run_code( 55 | code: str, 56 | requirements: list[str] | None = None, 57 | files: list[dict[str, str]] | None = None, 58 | ctx: Context | None = None, 59 | ) -> RunCodeResult: 60 | """Tool implementation compatible with FastMCP. 61 | 62 | If a session_id is provided, the environment and files persist for the 63 | session. If not, the sandbox is stateless and files are deleted after 64 | each run. Artifacts are returned as relative paths and downloadable via 65 | /artifacts/{relative_path}. The session_id is always included in the 66 | response if available. 67 | 68 | If stdout is empty or an error occurs, a feedback array is included in 69 | the response with suggestions to use print statements and ensure code 70 | is self-contained. 71 | """ 72 | 73 | # Default mutable params 74 | requirements = requirements or [] 75 | files = files or [] 76 | 77 | if len(code) > 20_000: 78 | raise ValueError("Code block too large (20k char limit)") 79 | 80 | sid = ctx.session_id # may be None on Streamable-HTTP 81 | if not sid and ctx.request_context.request: 82 | # see issue https://github.com/modelcontextprotocol/python-sdk/ 83 | # issues/1063 for more details 84 | sid = ctx.request_context.request.headers.get("mcp-session-id") 85 | 86 | try: 87 | result = await sandbox_execute( 88 | code=code, 89 | requirements=requirements, 90 | files=files, 91 | run_id=(ctx.request_id if ctx else "local"), 92 | session_id=sid, 93 | ) 94 | # Always include session_id in the response if available 95 | if sid: 96 | result = dict(result) 97 | result["session_id"] = sid 98 | # Add feedback if stdout is empty 99 | if not result.get("stdout"): 100 | result = dict(result) 101 | result["feedback"] = RESPONSE_FEEDBACK 102 | return result 103 | except Exception as exc: # noqa: BLE001 104 | # FastMCP automatically converts exceptions into ToolError 105 | # responses. 106 | feedback = [ 107 | ( 108 | "An error occurred. Please ensure your code is " 109 | "self-contained, uses print statements for output, and is " 110 | "not written in notebook style." 111 | ) 112 | ] 113 | raise type(exc)(str(exc) + f"\nFEEDBACK: {feedback[0]}") from exc 114 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | """Shared pytest fixtures for PRIMS testing.""" 2 | 3 | import shutil 4 | import tempfile 5 | from collections.abc import AsyncGenerator, Generator 6 | from pathlib import Path 7 | from unittest.mock import AsyncMock, Mock 8 | 9 | import pytest 10 | from httpx import AsyncClient 11 | 12 | 13 | @pytest.fixture 14 | def temp_dir() -> Generator[Path]: 15 | """Create a temporary directory for test isolation.""" 16 | temp_path = Path(tempfile.mkdtemp()) 17 | try: 18 | yield temp_path 19 | finally: 20 | shutil.rmtree(temp_path, ignore_errors=True) 21 | 22 | 23 | @pytest.fixture 24 | def mock_tmp_dir(temp_dir: Path, monkeypatch: pytest.MonkeyPatch) -> Path: 25 | """Mock the global TMP_DIR with a temporary directory.""" 26 | monkeypatch.setattr("server.config.TMP_DIR", temp_dir) 27 | monkeypatch.setattr("server.sandbox.runner.TMP_DIR", temp_dir) 28 | monkeypatch.setattr("server.tools.workspace_inspect.TMP_DIR", temp_dir) 29 | return temp_dir 30 | 31 | 32 | @pytest.fixture 33 | def session_id() -> str: 34 | """Provide a test session ID.""" 35 | return "test-session-123" 36 | 37 | 38 | @pytest.fixture 39 | def run_id() -> str: 40 | """Provide a test run ID.""" 41 | return "test-run-456" 42 | 43 | 44 | @pytest.fixture 45 | def sample_python_code() -> str: 46 | """Provide sample Python code for testing.""" 47 | return """ 48 | import pandas as pd 49 | import os 50 | 51 | # Test basic functionality 52 | print("Hello from sandbox!") 53 | print(f"Working directory: {os.getcwd()}") 54 | 55 | # Test file operations 56 | with open("output/test_output.txt", "w") as f: 57 | f.write("Test output file") 58 | 59 | # Test pandas 60 | df = pd.DataFrame({"col1": [1, 2, 3], "col2": ["a", "b", "c"]}) 61 | print(df.head()) 62 | """ 63 | 64 | 65 | @pytest.fixture 66 | def invalid_python_code() -> str: 67 | """Provide invalid Python code for testing error handling.""" 68 | return """ 69 | import non_existent_module 70 | print("This should fail") 71 | invalid_syntax here 72 | """ 73 | 74 | 75 | @pytest.fixture 76 | def mock_fastmcp() -> Mock: 77 | """Create a mock FastMCP instance for testing.""" 78 | mock_mcp = Mock() 79 | mock_mcp.name = "test-prims" 80 | mock_mcp.version = "0.1.0" 81 | mock_mcp.tool = Mock() 82 | return mock_mcp 83 | 84 | 85 | @pytest.fixture 86 | def mock_context() -> Mock: 87 | """Create a mock Context for testing.""" 88 | context = Mock() 89 | context.session_id = "test-session-123" 90 | context.request_id = "test-request-456" 91 | context.request_context.request = Mock() 92 | context.request_context.request.headers = {"mcp-session-id": "test-session-123"} 93 | return context 94 | 95 | 96 | @pytest.fixture 97 | async def http_client() -> AsyncGenerator[AsyncClient]: 98 | """Create an HTTP client for integration testing.""" 99 | async with AsyncClient() as client: 100 | yield client 101 | 102 | 103 | @pytest.fixture 104 | def mock_subprocess_success() -> Mock: 105 | """Mock successful subprocess execution.""" 106 | mock_process = AsyncMock() 107 | mock_process.communicate = AsyncMock( 108 | return_value=(b"stdout output", b"stderr output") 109 | ) 110 | mock_process.returncode = 0 111 | mock_process.wait = AsyncMock(return_value=None) 112 | mock_process.kill = Mock(return_value=None) 113 | return mock_process 114 | 115 | 116 | @pytest.fixture 117 | def mock_subprocess_failure() -> Mock: 118 | """Mock failed subprocess execution.""" 119 | mock_process = AsyncMock() 120 | mock_process.communicate = AsyncMock(return_value=(b"", b"Error: command failed")) 121 | mock_process.returncode = 1 122 | mock_process.wait = AsyncMock(return_value=None) 123 | mock_process.kill = Mock(return_value=None) 124 | return mock_process 125 | 126 | 127 | @pytest.fixture 128 | def sample_requirements() -> list[str]: 129 | """Provide sample pip requirements for testing.""" 130 | return ["numpy>=1.20.0", "matplotlib>=3.5.0"] 131 | 132 | 133 | @pytest.fixture 134 | def sample_files() -> list[dict[str, str]]: 135 | """Provide sample file mounting configuration for testing.""" 136 | return [ 137 | {"url": "https://example.com/data.csv", "mountPath": "data/input.csv"}, 138 | {"url": "https://example.com/config.json", "mountPath": "config.json"}, 139 | ] 140 | 141 | 142 | @pytest.fixture 143 | def mock_download_success(monkeypatch: pytest.MonkeyPatch) -> None: 144 | """Mock successful file downloads.""" 145 | 146 | async def mock_download_files( 147 | files: list[dict[str, str]], mount_dir: Path 148 | ) -> list[Path]: 149 | paths = [] 150 | for file_info in files: 151 | mount_path = mount_dir / file_info["mountPath"] 152 | mount_path.parent.mkdir(parents=True, exist_ok=True) 153 | mount_path.write_text(f"Mock content for {file_info['url']}") 154 | paths.append(mount_path) 155 | return paths 156 | 157 | monkeypatch.setattr("server.sandbox.runner.download_files", mock_download_files) 158 | 159 | 160 | @pytest.fixture 161 | def mock_virtualenv_creation(monkeypatch: pytest.MonkeyPatch, temp_dir: Path) -> Path: 162 | """Mock virtual environment creation.""" 163 | python_path = temp_dir / "venv" / "bin" / "python" 164 | python_path.parent.mkdir(parents=True, exist_ok=True) 165 | python_path.write_text("#!/usr/bin/env python3\n# Mock Python executable") 166 | python_path.chmod(0o755) 167 | 168 | async def mock_create_virtualenv(requirements: list[str], run_dir: Path) -> Path: 169 | return python_path 170 | 171 | monkeypatch.setattr("server.sandbox.env.create_virtualenv", mock_create_virtualenv) 172 | return python_path 173 | -------------------------------------------------------------------------------- /tests/integration/test_mcp_protocol.py: -------------------------------------------------------------------------------- 1 | """Integration tests for MCP protocol functionality.""" 2 | 3 | import asyncio 4 | from pathlib import Path 5 | from unittest.mock import patch 6 | 7 | import pytest 8 | 9 | from server.main import mcp 10 | 11 | 12 | @pytest.mark.integration 13 | class TestMCPIntegration: 14 | """Test MCP protocol integration.""" 15 | 16 | @pytest.mark.asyncio 17 | async def test_mcp_server_startup(self) -> None: 18 | """Test that MCP server can start up properly.""" 19 | # This is a basic integration test - in a real scenario 20 | # we would start the actual server 21 | assert mcp is not None 22 | assert mcp.name == "primcs" 23 | # FastMCP doesn't expose version as an attribute, check initialization instead 24 | assert hasattr(mcp, "name") 25 | assert isinstance(mcp.name, str) 26 | 27 | @pytest.mark.asyncio 28 | async def test_tool_registration(self) -> None: 29 | """Test that all tools are properly registered.""" 30 | # Verify MCP instance has the expected structure 31 | # FastMCP uses different internal structure, check for callable methods 32 | assert hasattr(mcp, "tool") # Decorator method exists 33 | assert callable(mcp.tool) 34 | 35 | # Verify the server is properly configured 36 | assert mcp.name == "primcs" 37 | 38 | # In a real test, we would inspect the registered tools 39 | # and verify they match our expected tool set 40 | 41 | @pytest.mark.asyncio 42 | @pytest.mark.slow 43 | async def test_run_code_integration( 44 | self, 45 | mock_tmp_dir: Path, 46 | mock_virtualenv_creation: Path, 47 | mock_download_success: None, 48 | ) -> None: 49 | """Test full run_code tool integration.""" 50 | from fastmcp import FastMCP 51 | 52 | from server.tools.run_code import register 53 | 54 | # Create a test MCP instance 55 | test_mcp = FastMCP(name="test", version="1.0") 56 | register(test_mcp) 57 | 58 | # This would be expanded to test actual tool execution 59 | # in a real integration test environment 60 | 61 | # Mock subprocess for integration test 62 | with patch("server.sandbox.runner.asyncio.create_subprocess_exec"): 63 | mock_process = asyncio.create_subprocess_exec 64 | mock_process.communicate = lambda: (b"Hello World", b"") 65 | mock_process.returncode = 0 66 | 67 | # Test would verify the full flow here 68 | pass 69 | 70 | @pytest.mark.asyncio 71 | async def test_artifact_serving_integration(self, mock_tmp_dir: Path) -> None: 72 | """Test artifact serving through HTTP endpoint.""" 73 | # Create test artifact 74 | session_id = "test-session" 75 | session_dir = mock_tmp_dir / f"session_{session_id}" 76 | output_dir = session_dir / "output" 77 | output_dir.mkdir(parents=True, exist_ok=True) 78 | 79 | test_file = output_dir / "test.txt" 80 | test_file.write_text("Test artifact content") 81 | 82 | # This would test the actual HTTP endpoint in a real scenario 83 | # For now, we just verify the file structure is correct 84 | assert test_file.exists() 85 | assert test_file.read_text() == "Test artifact content" 86 | 87 | @pytest.mark.asyncio 88 | async def test_session_persistence(self, mock_tmp_dir: Path) -> None: 89 | """Test session-based workspace persistence.""" 90 | session_id = "persistent-session" 91 | 92 | # Simulate multiple operations in the same session 93 | session_dir = mock_tmp_dir / f"session_{session_id}" 94 | 95 | # First operation 96 | session_dir.mkdir(parents=True, exist_ok=True) 97 | (session_dir / "mounts").mkdir(exist_ok=True) 98 | (session_dir / "output").mkdir(exist_ok=True) 99 | 100 | # Create some files 101 | (session_dir / "output" / "result1.txt").write_text("First result") 102 | 103 | # Second operation (should see previous files) 104 | (session_dir / "output" / "result2.txt").write_text("Second result") 105 | 106 | # Verify both files exist 107 | assert (session_dir / "output" / "result1.txt").exists() 108 | assert (session_dir / "output" / "result2.txt").exists() 109 | 110 | 111 | @pytest.mark.integration 112 | @pytest.mark.e2e 113 | class TestEndToEnd: 114 | """End-to-end integration tests.""" 115 | 116 | @pytest.mark.asyncio 117 | async def test_complete_workflow(self) -> None: 118 | """Test a complete workflow from code submission to artifact retrieval.""" 119 | # This would be a full end-to-end test in a real scenario 120 | # involving starting the server, making HTTP requests, etc. 121 | 122 | test_code = """ 123 | import pandas as pd 124 | df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}) 125 | print(df) 126 | df.to_csv('output/test.csv', index=False) 127 | """ 128 | 129 | # In a real E2E test, we would: 130 | # 1. Start the MCP server 131 | # 2. Submit the code via MCP protocol 132 | # 3. Verify the output 133 | # 4. Download the artifact 134 | # 5. Verify artifact contents 135 | 136 | assert len(test_code) > 0 # Placeholder assertion 137 | 138 | @pytest.mark.asyncio 139 | async def test_error_handling_workflow(self) -> None: 140 | """Test error handling in a complete workflow.""" 141 | invalid_code = """ 142 | import non_existent_module 143 | print("This will fail") 144 | """ 145 | 146 | # Test that errors are properly propagated and handled 147 | assert len(invalid_code) > 0 # Placeholder assertion 148 | 149 | @pytest.mark.asyncio 150 | async def test_file_mounting_workflow(self) -> None: 151 | """Test the complete file mounting workflow.""" 152 | # Test mounting files and using them in code execution 153 | test_files = [{"url": "https://httpbin.org/json", "mountPath": "data.json"}] 154 | 155 | test_code = """ 156 | import json 157 | with open('mounts/data.json', 'r') as f: 158 | data = json.load(f) 159 | print(f"Loaded data: {data}") 160 | """ 161 | 162 | # In a real test, this would verify the complete mounting workflow 163 | assert len(test_files) > 0 164 | assert len(test_code) > 0 165 | -------------------------------------------------------------------------------- /tests/unit/test_sandbox_env.py: -------------------------------------------------------------------------------- 1 | """Unit tests for server.sandbox.env module.""" 2 | 3 | import sys 4 | from pathlib import Path 5 | from unittest.mock import AsyncMock, Mock, patch 6 | 7 | import pytest 8 | 9 | from server.sandbox.env import _DEFAULT_PACKAGES, create_virtualenv 10 | 11 | 12 | class TestCreateVirtualenv: 13 | """Test virtual environment creation.""" 14 | 15 | @pytest.mark.asyncio 16 | async def test_create_virtualenv_success(self, temp_dir: Path) -> None: 17 | """Test successful virtual environment creation.""" 18 | requirements = ["numpy", "pandas"] 19 | 20 | with ( 21 | patch("server.sandbox.env.venv") as mock_venv, 22 | patch( 23 | "server.sandbox.env.asyncio.create_subprocess_exec" 24 | ) as mock_subprocess, 25 | ): 26 | # Mock venv creation 27 | mock_builder = Mock() 28 | mock_venv.EnvBuilder.return_value = mock_builder 29 | 30 | # Mock subprocess for pip install 31 | mock_process = AsyncMock() 32 | mock_process.communicate = AsyncMock(return_value=(b"", b"")) 33 | mock_process.returncode = 0 34 | mock_subprocess.return_value = mock_process 35 | 36 | # Call function 37 | python_path = await create_virtualenv(requirements, temp_dir) 38 | 39 | # Verify venv creation 40 | mock_venv.EnvBuilder.assert_called_once_with(with_pip=True, clear=True) 41 | mock_builder.create.assert_called_once() 42 | 43 | # Verify pip install call 44 | mock_subprocess.assert_called_once() 45 | args = mock_subprocess.call_args[0] 46 | 47 | # Check that python executable path is correct 48 | expected_python = ( 49 | temp_dir 50 | / "venv" 51 | / ("Scripts" if sys.platform.startswith("win") else "bin") 52 | / "python" 53 | ) 54 | assert Path(args[0]) == expected_python 55 | assert args[1:4] == ("-m", "pip", "install") 56 | assert "--no-cache-dir" in args 57 | 58 | # Check that requirements include both custom and default packages 59 | install_args = args[4:] # Skip python, -m, pip, install 60 | install_args = [arg for arg in install_args if arg != "--no-cache-dir"] 61 | 62 | expected_packages = list(dict.fromkeys(requirements + _DEFAULT_PACKAGES)) 63 | for package in expected_packages: 64 | assert package in install_args 65 | 66 | # Check return value 67 | assert python_path == expected_python 68 | 69 | @pytest.mark.asyncio 70 | async def test_create_virtualenv_pip_failure(self, temp_dir: Path) -> None: 71 | """Test virtual environment creation with pip install failure.""" 72 | requirements = ["invalid-package"] 73 | 74 | with ( 75 | patch("server.sandbox.env.venv") as mock_venv, 76 | patch( 77 | "server.sandbox.env.asyncio.create_subprocess_exec" 78 | ) as mock_subprocess, 79 | ): 80 | # Mock venv creation 81 | mock_builder = Mock() 82 | mock_venv.EnvBuilder.return_value = mock_builder 83 | 84 | # Mock subprocess for pip install failure 85 | mock_process = AsyncMock() 86 | mock_process.communicate = AsyncMock( 87 | return_value=(b"", b"ERROR: Could not find package") 88 | ) 89 | mock_process.returncode = 1 90 | mock_subprocess.return_value = mock_process 91 | 92 | # Should raise RuntimeError 93 | with pytest.raises(RuntimeError, match="pip install failed"): 94 | _ = await create_virtualenv(requirements, temp_dir) 95 | 96 | @pytest.mark.asyncio 97 | async def test_create_virtualenv_no_requirements(self, temp_dir: Path) -> None: 98 | """Test virtual environment creation with no additional requirements.""" 99 | requirements: list[str] = [] 100 | 101 | with ( 102 | patch("server.sandbox.env.venv") as mock_venv, 103 | patch( 104 | "server.sandbox.env.asyncio.create_subprocess_exec" 105 | ) as mock_subprocess, 106 | ): 107 | # Mock venv creation 108 | mock_builder = Mock() 109 | mock_venv.EnvBuilder.return_value = mock_builder 110 | 111 | # Mock subprocess for pip install 112 | mock_process = AsyncMock() 113 | mock_process.communicate = AsyncMock(return_value=(b"", b"")) 114 | mock_process.returncode = 0 115 | mock_subprocess.return_value = mock_process 116 | 117 | # Call function 118 | _ = await create_virtualenv(requirements, temp_dir) 119 | 120 | # Should still install default packages 121 | mock_subprocess.assert_called_once() 122 | args = mock_subprocess.call_args[0] 123 | install_args = args[4:] # Skip python, -m, pip, install 124 | install_args = [arg for arg in install_args if arg != "--no-cache-dir"] 125 | 126 | for package in _DEFAULT_PACKAGES: 127 | assert package in install_args 128 | 129 | @pytest.mark.asyncio 130 | async def test_create_virtualenv_duplicate_requirements( 131 | self, temp_dir: Path 132 | ) -> None: 133 | """Test that duplicate requirements are deduplicated.""" 134 | requirements = [ 135 | "pandas", 136 | "numpy", 137 | "pandas", 138 | ] # pandas is duplicated and also in defaults 139 | 140 | with ( 141 | patch("server.sandbox.env.venv") as mock_venv, 142 | patch( 143 | "server.sandbox.env.asyncio.create_subprocess_exec" 144 | ) as mock_subprocess, 145 | ): 146 | # Mock venv creation 147 | mock_builder = Mock() 148 | mock_venv.EnvBuilder.return_value = mock_builder 149 | 150 | # Mock subprocess for pip install 151 | mock_process = AsyncMock() 152 | mock_process.communicate = AsyncMock(return_value=(b"", b"")) 153 | mock_process.returncode = 0 154 | mock_subprocess.return_value = mock_process 155 | 156 | # Call function 157 | _ = await create_virtualenv(requirements, temp_dir) 158 | 159 | # Check that duplicates are removed 160 | args = mock_subprocess.call_args[0] 161 | install_args = args[4:] # Skip python, -m, pip, install 162 | install_args = [arg for arg in install_args if arg != "--no-cache-dir"] 163 | 164 | # pandas should appear only once 165 | pandas_count = install_args.count("pandas") 166 | assert pandas_count == 1 167 | 168 | def test_default_packages_constant(self) -> None: 169 | """Test that default packages are properly defined.""" 170 | assert isinstance(_DEFAULT_PACKAGES, list) 171 | assert len(_DEFAULT_PACKAGES) > 0 172 | assert "pandas" in _DEFAULT_PACKAGES 173 | assert "openpyxl" in _DEFAULT_PACKAGES 174 | assert "requests" in _DEFAULT_PACKAGES 175 | 176 | @pytest.mark.asyncio 177 | async def test_create_virtualenv_windows_path(self, temp_dir: Path) -> None: 178 | """Test that Windows-style paths are handled correctly.""" 179 | requirements = ["numpy"] 180 | 181 | with ( 182 | patch("server.sandbox.env.venv") as mock_venv, 183 | patch( 184 | "server.sandbox.env.asyncio.create_subprocess_exec" 185 | ) as mock_subprocess, 186 | patch("server.sandbox.env.sys.platform", "win32"), 187 | ): 188 | # Mock venv creation 189 | mock_builder = Mock() 190 | mock_venv.EnvBuilder.return_value = mock_builder 191 | 192 | # Mock subprocess for pip install 193 | mock_process = AsyncMock() 194 | mock_process.communicate = AsyncMock(return_value=(b"", b"")) 195 | mock_process.returncode = 0 196 | mock_subprocess.return_value = mock_process 197 | 198 | # Call function 199 | python_path = await create_virtualenv(requirements, temp_dir) 200 | 201 | # Check that Windows path is used 202 | expected_python = temp_dir / "venv" / "Scripts" / "python" 203 | assert python_path == expected_python 204 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI/CD Pipeline 2 | 3 | on: 4 | push: 5 | branches: [ main, develop ] 6 | pull_request: 7 | branches: [ main, develop ] 8 | schedule: 9 | # Run tests daily at 2 AM UTC 10 | - cron: '0 2 * * *' 11 | 12 | env: 13 | PYTHON_DEFAULT_VERSION: "3.13" 14 | 15 | jobs: 16 | test: 17 | name: Test Suite 18 | runs-on: ubuntu-latest 19 | strategy: 20 | matrix: 21 | python-version: ["3.12", "3.13"] 22 | os: [ubuntu-latest, windows-latest, macos-latest] 23 | fail-fast: false 24 | 25 | steps: 26 | - name: Checkout code 27 | uses: actions/checkout@v4 28 | 29 | - name: Set up Python ${{ matrix.python-version }} 30 | uses: actions/setup-python@v4 31 | with: 32 | python-version: ${{ matrix.python-version }} 33 | cache: 'pip' 34 | 35 | - name: Install system dependencies (Ubuntu) 36 | if: matrix.os == 'ubuntu-latest' 37 | run: | 38 | sudo apt-get update 39 | sudo apt-get install -y build-essential 40 | 41 | - name: Install dependencies 42 | run: | 43 | python -m pip install --upgrade pip setuptools wheel 44 | pip install -r requirements.txt 45 | pip install -r requirements-dev.txt 46 | 47 | - name: Run unit tests 48 | run: | 49 | python -m pytest tests/unit -v --tb=short --cov=server --cov-report=xml --cov-report=term-missing 50 | env: 51 | PYTHONPATH: ${{ github.workspace }} 52 | 53 | - name: Run integration tests 54 | run: | 55 | python -m pytest tests/integration -v --tb=short -m "not slow" 56 | env: 57 | PYTHONPATH: ${{ github.workspace }} 58 | 59 | - name: Upload coverage to Codecov 60 | if: matrix.python-version == env.PYTHON_DEFAULT_VERSION && matrix.os == 'ubuntu-latest' 61 | uses: codecov/codecov-action@v3 62 | with: 63 | file: ./coverage.xml 64 | flags: unittests 65 | name: codecov-umbrella 66 | fail_ci_if_error: false 67 | 68 | lint: 69 | if: false 70 | name: Code Quality 71 | runs-on: ubuntu-latest 72 | 73 | steps: 74 | - name: Checkout code 75 | uses: actions/checkout@v4 76 | 77 | - name: Set up Python 78 | uses: actions/setup-python@v4 79 | with: 80 | python-version: ${{ env.PYTHON_DEFAULT_VERSION }} 81 | cache: 'pip' 82 | 83 | - name: Install dependencies 84 | run: | 85 | python -m pip install --upgrade pip 86 | pip install -r requirements.txt 87 | pip install -r requirements-dev.txt 88 | 89 | - name: Run ruff linter 90 | run: | 91 | python -m ruff check . --output-format=github 92 | 93 | - name: Run ruff formatter check 94 | run: | 95 | python -m ruff format --check . 96 | 97 | - name: Run black formatter check 98 | run: | 99 | python -m black --check --diff . 100 | 101 | - name: Run isort import sorting check 102 | run: | 103 | python -m isort --check-only --diff . 104 | 105 | - name: Run mypy type checking 106 | run: | 107 | python -m mypy server/ 108 | continue-on-error: true # Type checking failures shouldn't block CI 109 | 110 | security: 111 | name: Security Scan 112 | runs-on: ubuntu-latest 113 | 114 | steps: 115 | - name: Checkout code 116 | uses: actions/checkout@v4 117 | 118 | - name: Set up Python 119 | uses: actions/setup-python@v4 120 | with: 121 | python-version: ${{ env.PYTHON_DEFAULT_VERSION }} 122 | cache: 'pip' 123 | 124 | - name: Install dependencies 125 | run: | 126 | python -m pip install --upgrade pip 127 | pip install -r requirements.txt 128 | pip install -r requirements-dev.txt 129 | 130 | - name: Run bandit security linter 131 | run: | 132 | python -m bandit -r server/ -f json -o bandit-report.json 133 | continue-on-error: true 134 | 135 | - name: Upload bandit report 136 | uses: actions/upload-artifact@v4 137 | if: always() 138 | with: 139 | name: bandit-report 140 | path: bandit-report.json 141 | 142 | - name: Run safety check 143 | run: | 144 | python -m safety check --json --output safety-report.json 145 | continue-on-error: true 146 | 147 | - name: Upload safety report 148 | uses: actions/upload-artifact@v4 149 | if: always() 150 | with: 151 | name: safety-report 152 | path: safety-report.json 153 | 154 | docker: 155 | name: Docker Build Test 156 | runs-on: ubuntu-latest 157 | 158 | steps: 159 | - name: Checkout code 160 | uses: actions/checkout@v4 161 | 162 | - name: Set up Docker Buildx 163 | uses: docker/setup-buildx-action@v3 164 | 165 | - name: Build Docker image 166 | run: | 167 | docker build -t prims:test . 168 | 169 | - name: Test Docker image 170 | run: | 171 | # Start container in background 172 | docker run -d --name prims-test -p 9000:9000 prims:test 173 | 174 | # Wait for container to start 175 | sleep 10 176 | 177 | # Test health endpoint (if available) 178 | curl -f http://localhost:9000/health || echo "Health endpoint not available yet" 179 | 180 | # Clean up 181 | docker stop prims-test 182 | docker rm prims-test 183 | 184 | integration: 185 | name: Integration Tests 186 | runs-on: ubuntu-latest 187 | needs: [test, lint] 188 | 189 | steps: 190 | - name: Checkout code 191 | uses: actions/checkout@v4 192 | 193 | - name: Set up Python 194 | uses: actions/setup-python@v4 195 | with: 196 | python-version: ${{ env.PYTHON_DEFAULT_VERSION }} 197 | cache: 'pip' 198 | 199 | - name: Install dependencies 200 | run: | 201 | python -m pip install --upgrade pip 202 | pip install -r requirements.txt 203 | pip install -r requirements-dev.txt 204 | 205 | - name: Run integration tests 206 | run: | 207 | python -m pytest tests/integration -v --tb=short 208 | env: 209 | PYTHONPATH: ${{ github.workspace }} 210 | 211 | - name: Run end-to-end tests 212 | run: | 213 | # Start the server in background 214 | python -m server.main & 215 | SERVER_PID=$! 216 | 217 | # Wait for server to start 218 | sleep 5 219 | 220 | # Run E2E tests against running server 221 | python -m pytest tests/integration -v -m "e2e" || true 222 | 223 | # Clean up 224 | kill $SERVER_PID || true 225 | env: 226 | PYTHONPATH: ${{ github.workspace }} 227 | PORT: 9001 228 | 229 | build-and-publish: 230 | name: Build and Publish 231 | runs-on: ubuntu-latest 232 | needs: [test, lint, security, docker, integration] 233 | if: github.event_name == 'push' && github.ref == 'refs/heads/main' 234 | 235 | steps: 236 | - name: Checkout code 237 | uses: actions/checkout@v4 238 | 239 | - name: Set up Python 240 | uses: actions/setup-python@v4 241 | with: 242 | python-version: ${{ env.PYTHON_DEFAULT_VERSION }} 243 | 244 | - name: Install build dependencies 245 | run: | 246 | python -m pip install --upgrade pip build twine 247 | 248 | - name: Build package 249 | run: | 250 | python -m build 251 | 252 | - name: Upload build artifacts 253 | uses: actions/upload-artifact@v4 254 | with: 255 | name: dist 256 | path: dist/ 257 | 258 | # Uncomment when ready to publish to PyPI 259 | # - name: Publish to PyPI 260 | # if: startsWith(github.ref, 'refs/tags/v') 261 | # env: 262 | # TWINE_USERNAME: __token__ 263 | # TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} 264 | # run: | 265 | # python -m twine upload dist/* 266 | 267 | dependency-review: 268 | name: Dependency Review 269 | runs-on: ubuntu-latest 270 | if: github.event_name == 'pull_request' 271 | 272 | steps: 273 | - name: Checkout code 274 | uses: actions/checkout@v4 275 | 276 | - name: Dependency Review 277 | uses: actions/dependency-review-action@v3 278 | with: 279 | fail-on-severity: moderate 280 | 281 | performance: 282 | name: Performance Tests 283 | runs-on: ubuntu-latest 284 | needs: [test] 285 | if: github.event_name == 'push' && github.ref == 'refs/heads/main' 286 | 287 | steps: 288 | - name: Checkout code 289 | uses: actions/checkout@v4 290 | 291 | - name: Set up Python 292 | uses: actions/setup-python@v4 293 | with: 294 | python-version: ${{ env.PYTHON_DEFAULT_VERSION }} 295 | cache: 'pip' 296 | 297 | - name: Install dependencies 298 | run: | 299 | python -m pip install --upgrade pip 300 | pip install -r requirements.txt 301 | pip install -r requirements-dev.txt 302 | 303 | - name: Run performance tests 304 | run: | 305 | python -m pytest tests/performance -v --tb=short || echo "Performance tests not implemented yet" 306 | env: 307 | PYTHONPATH: ${{ github.workspace }} 308 | 309 | notify: 310 | name: Notify Results 311 | runs-on: ubuntu-latest 312 | needs: [test, lint, security, docker, integration] 313 | if: always() 314 | 315 | steps: 316 | - name: Determine status 317 | id: status 318 | run: | 319 | if [[ "${{ needs.test.result }}" == "success" && "${{ needs.lint.result }}" == "success" ]]; then 320 | echo "status=success" >> $GITHUB_OUTPUT 321 | else 322 | echo "status=failure" >> $GITHUB_OUTPUT 323 | fi 324 | 325 | - name: Create summary 326 | run: | 327 | echo "## CI/CD Pipeline Results" >> $GITHUB_STEP_SUMMARY 328 | echo "" >> $GITHUB_STEP_SUMMARY 329 | echo "| Job | Status |" >> $GITHUB_STEP_SUMMARY 330 | echo "|-----|--------|" >> $GITHUB_STEP_SUMMARY 331 | echo "| Test Suite | ${{ needs.test.result }} |" >> $GITHUB_STEP_SUMMARY 332 | echo "| Code Quality | ${{ needs.lint.result }} |" >> $GITHUB_STEP_SUMMARY 333 | echo "| Security Scan | ${{ needs.security.result }} |" >> $GITHUB_STEP_SUMMARY 334 | echo "| Docker Build | ${{ needs.docker.result }} |" >> $GITHUB_STEP_SUMMARY 335 | echo "| Integration Tests | ${{ needs.integration.result }} |" >> $GITHUB_STEP_SUMMARY 336 | -------------------------------------------------------------------------------- /tests/unit/test_sandbox_runner.py: -------------------------------------------------------------------------------- 1 | """Unit tests for server.sandbox.runner module.""" 2 | 3 | from pathlib import Path 4 | from unittest.mock import AsyncMock, Mock, patch 5 | 6 | import pytest 7 | 8 | from server.sandbox.runner import ArtifactMeta, RunCodeResult, run_code 9 | 10 | 11 | class TestRunCode: 12 | """Test code execution functionality.""" 13 | 14 | @pytest.mark.asyncio 15 | async def test_run_code_success_with_session( 16 | self, 17 | mock_tmp_dir: Path, 18 | session_id: str, 19 | run_id: str, 20 | sample_python_code: str, 21 | mock_download_success: None, 22 | mock_virtualenv_creation: Path, 23 | ) -> None: 24 | """Test successful code execution with session persistence.""" 25 | requirements = ["numpy"] 26 | files = [{"url": "https://example.com/data.csv", "mountPath": "data.csv"}] 27 | 28 | with patch( 29 | "server.sandbox.runner.asyncio.create_subprocess_exec" 30 | ) as mock_subprocess: 31 | # Mock subprocess execution 32 | mock_process = AsyncMock() 33 | mock_process.communicate = AsyncMock( 34 | return_value=( 35 | b"Hello from sandbox!\nWorking directory: /tmp/session\n", 36 | b"Warning: some warning\n", 37 | ) 38 | ) 39 | mock_process.returncode = 0 40 | mock_subprocess.return_value = mock_process 41 | 42 | # Create expected output file 43 | session_dir = mock_tmp_dir / f"session_{session_id}" 44 | output_dir = session_dir / "output" 45 | output_dir.mkdir(parents=True, exist_ok=True) 46 | (output_dir / "test_output.txt").write_text("Test output file") 47 | 48 | # Call function 49 | result = await run_code( 50 | code=sample_python_code, 51 | requirements=requirements, 52 | files=files, 53 | run_id=run_id, 54 | session_id=session_id, 55 | ) 56 | 57 | # Verify result structure 58 | assert isinstance(result, dict) 59 | assert "stdout" in result 60 | assert "stderr" in result 61 | assert "artifacts" in result 62 | 63 | # Verify output 64 | assert "Hello from sandbox!" in result["stdout"] 65 | assert "Warning: some warning" in result["stderr"] 66 | 67 | # Verify artifacts 68 | artifacts = result["artifacts"] 69 | assert len(artifacts) == 1 70 | artifact = artifacts[0] 71 | assert artifact["name"] == "test_output.txt" 72 | assert artifact["relative_path"] == "test_output.txt" 73 | assert artifact["size"] > 0 74 | assert "text" in artifact["mime"] 75 | 76 | @pytest.mark.asyncio 77 | async def test_run_code_success_without_session( 78 | self, 79 | mock_tmp_dir: Path, 80 | run_id: str, 81 | sample_python_code: str, 82 | mock_download_success: None, 83 | mock_virtualenv_creation: Path, 84 | ) -> None: 85 | """Test successful code execution without session (stateless).""" 86 | requirements: list[str] = [] 87 | files: list[dict[str, str]] = [] 88 | 89 | with patch( 90 | "server.sandbox.runner.asyncio.create_subprocess_exec" 91 | ) as mock_subprocess: 92 | # Mock subprocess execution 93 | mock_process = AsyncMock() 94 | mock_process.communicate = AsyncMock( 95 | return_value=(b"Output without session", b"") 96 | ) 97 | mock_process.returncode = 0 98 | mock_subprocess.return_value = mock_process 99 | 100 | # Call function without session_id 101 | result = await run_code( 102 | code=sample_python_code, 103 | requirements=requirements, 104 | files=files, 105 | run_id=run_id, 106 | session_id=None, 107 | ) 108 | 109 | # Verify result 110 | assert result["stdout"] == "Output without session" 111 | assert result["stderr"] == "" 112 | assert result["artifacts"] == [] 113 | 114 | @pytest.mark.asyncio 115 | async def test_run_code_timeout( 116 | self, 117 | mock_tmp_dir: Path, 118 | run_id: str, 119 | sample_python_code: str, 120 | mock_download_success: None, 121 | mock_virtualenv_creation: Path, 122 | ) -> None: 123 | """Test code execution timeout handling.""" 124 | with ( 125 | patch( 126 | "server.sandbox.runner.asyncio.create_subprocess_exec" 127 | ) as mock_subprocess, 128 | patch("server.sandbox.runner.asyncio.wait_for") as mock_wait_for, 129 | patch("server.sandbox.runner.create_virtualenv") as mock_create_venv, 130 | ): 131 | # Mock virtualenv creation to return the mocked python path 132 | mock_create_venv.return_value = mock_virtualenv_creation 133 | 134 | # Mock subprocess 135 | mock_process = AsyncMock() 136 | mock_process.kill = Mock(return_value=None) 137 | mock_process.wait = AsyncMock(return_value=None) 138 | mock_subprocess.return_value = mock_process 139 | 140 | # Mock timeout on the wait_for call 141 | mock_wait_for.side_effect = TimeoutError() 142 | 143 | # Should raise RuntimeError 144 | with pytest.raises(RuntimeError, match="Execution timed out"): 145 | await run_code( 146 | code=sample_python_code, 147 | requirements=[], 148 | files=[], 149 | run_id=run_id, 150 | session_id=None, 151 | ) 152 | 153 | # Verify process was killed 154 | mock_process.kill.assert_called_once() 155 | mock_process.wait.assert_called_once() 156 | 157 | @pytest.mark.asyncio 158 | async def test_run_code_with_artifacts( 159 | self, 160 | mock_tmp_dir: Path, 161 | session_id: str, 162 | run_id: str, 163 | mock_download_success: None, 164 | mock_virtualenv_creation: Path, 165 | ) -> None: 166 | """Test code execution with multiple artifacts.""" 167 | code = "print('Creating artifacts')" 168 | 169 | with patch( 170 | "server.sandbox.runner.asyncio.create_subprocess_exec" 171 | ) as mock_subprocess: 172 | # Mock subprocess execution 173 | mock_process = AsyncMock() 174 | mock_process.communicate = AsyncMock( 175 | return_value=(b"Creating artifacts", b"") 176 | ) 177 | mock_process.returncode = 0 178 | mock_subprocess.return_value = mock_process 179 | 180 | # Create multiple output files 181 | session_dir = mock_tmp_dir / f"session_{session_id}" 182 | output_dir = session_dir / "output" 183 | output_dir.mkdir(parents=True, exist_ok=True) 184 | 185 | # Create various file types 186 | (output_dir / "data.csv").write_text("col1,col2\n1,2\n3,4") 187 | (output_dir / "plot.png").write_bytes( 188 | b"\x89PNG\r\n\x1a\n" 189 | ) # Fake PNG header 190 | (output_dir / "subdir").mkdir() 191 | (output_dir / "subdir" / "nested.txt").write_text("nested file") 192 | 193 | # Call function 194 | result = await run_code( 195 | code=code, 196 | requirements=[], 197 | files=[], 198 | run_id=run_id, 199 | session_id=session_id, 200 | ) 201 | 202 | # Verify artifacts 203 | artifacts = result["artifacts"] 204 | assert len(artifacts) == 3 205 | 206 | # Check artifact details 207 | artifact_names = {a["name"] for a in artifacts} 208 | assert "data.csv" in artifact_names 209 | assert "plot.png" in artifact_names 210 | assert "nested.txt" in artifact_names 211 | 212 | # Check MIME types 213 | csv_artifact = next(a for a in artifacts if a["name"] == "data.csv") 214 | assert csv_artifact["mime"] == "text/csv" 215 | 216 | png_artifact = next(a for a in artifacts if a["name"] == "plot.png") 217 | assert png_artifact["mime"] == "image/png" 218 | 219 | @pytest.mark.asyncio 220 | async def test_run_code_script_naming( 221 | self, 222 | mock_tmp_dir: Path, 223 | session_id: str, 224 | run_id: str, 225 | mock_download_success: None, 226 | mock_virtualenv_creation: Path, 227 | ) -> None: 228 | """Test that script naming varies based on session presence.""" 229 | code = "print('test')" 230 | 231 | with patch( 232 | "server.sandbox.runner.asyncio.create_subprocess_exec" 233 | ) as mock_subprocess: 234 | mock_process = AsyncMock() 235 | mock_process.communicate = AsyncMock(return_value=(b"test", b"")) 236 | mock_process.returncode = 0 237 | mock_subprocess.return_value = mock_process 238 | 239 | # Test with session (should use run_id in script name) 240 | await run_code( 241 | code=code, 242 | requirements=[], 243 | files=[], 244 | run_id=run_id, 245 | session_id=session_id, 246 | ) 247 | 248 | # Check script name includes run_id 249 | session_dir = mock_tmp_dir / f"session_{session_id}" 250 | expected_script = session_dir / f"script_{run_id}.py" 251 | assert expected_script.exists() 252 | 253 | # Test without session (should use generic script name) 254 | await run_code( 255 | code=code, 256 | requirements=[], 257 | files=[], 258 | run_id=run_id, 259 | session_id=None, 260 | ) 261 | 262 | # Check generic script name 263 | run_dir = mock_tmp_dir / f"run_{run_id}" 264 | expected_script = run_dir / "script.py" 265 | assert expected_script.exists() 266 | 267 | @pytest.mark.asyncio 268 | async def test_run_code_directory_creation( 269 | self, 270 | mock_tmp_dir: Path, 271 | session_id: str, 272 | run_id: str, 273 | mock_download_success: None, 274 | mock_virtualenv_creation: Path, 275 | ) -> None: 276 | """Test that required directories are created.""" 277 | code = "print('test')" 278 | 279 | with patch( 280 | "server.sandbox.runner.asyncio.create_subprocess_exec" 281 | ) as mock_subprocess: 282 | mock_process = AsyncMock() 283 | mock_process.communicate = AsyncMock(return_value=(b"test", b"")) 284 | mock_process.returncode = 0 285 | mock_subprocess.return_value = mock_process 286 | 287 | # Call with session 288 | await run_code( 289 | code=code, 290 | requirements=[], 291 | files=[], 292 | run_id=run_id, 293 | session_id=session_id, 294 | ) 295 | 296 | # Verify directories exist 297 | session_dir = mock_tmp_dir / f"session_{session_id}" 298 | assert session_dir.exists() 299 | assert (session_dir / "mounts").exists() 300 | assert (session_dir / "output").exists() 301 | 302 | def test_artifact_meta_type(self) -> None: 303 | """Test ArtifactMeta type definition.""" 304 | artifact: ArtifactMeta = { 305 | "name": "test.txt", 306 | "relative_path": "test.txt", 307 | "size": 100, 308 | "mime": "text/plain", 309 | } 310 | 311 | assert artifact["name"] == "test.txt" 312 | assert artifact["relative_path"] == "test.txt" 313 | assert artifact["size"] == 100 314 | assert artifact["mime"] == "text/plain" 315 | 316 | def test_run_code_result_type(self) -> None: 317 | """Test RunCodeResult type definition.""" 318 | result: RunCodeResult = { 319 | "stdout": "test output", 320 | "stderr": "test error", 321 | "artifacts": [], 322 | } 323 | 324 | assert result["stdout"] == "test output" 325 | assert result["stderr"] == "test error" 326 | assert result["artifacts"] == [] 327 | 328 | # Test with feedback 329 | result_with_feedback: RunCodeResult = { 330 | "stdout": "", 331 | "stderr": "", 332 | "artifacts": [], 333 | "feedback": "No output detected", 334 | } 335 | 336 | assert result_with_feedback["feedback"] == "No output detected" 337 | --------------------------------------------------------------------------------