├── server
    ├── __init__.py
    ├── prompts
    │   ├── __init__.py
    │   └── python_programmer.py
    ├── sandbox
    │   ├── __init__.py
    │   ├── env.py
    │   ├── downloader.py
    │   └── runner.py
    ├── tools
    │   ├── __init__.py
    │   ├── mount_file.py
    │   ├── persist_artifact.py
    │   ├── workspace_inspect.py
    │   └── run_code.py
    ├── config.py
    └── main.py
├── tests
    ├── __init__.py
    ├── integration
    │   ├── __init__.py
    │   └── test_mcp_protocol.py
    ├── unit
    │   ├── test_main.py
    │   ├── test_config.py
    │   ├── test_run_code_tool.py
    │   ├── test_sandbox_env.py
    │   └── test_sandbox_runner.py
    └── conftest.py
├── requirements.txt
├── primslogo.png
├── glama.json
├── examples
    ├── run_code.py
    ├── list_tools.py
    ├── mount_and_run.py
    ├── session_persistence.py
    ├── inspect_workspace.py
    └── artifact_download.py
├── Dockerfile
├── requirements-dev.txt
├── .flake8
├── .gitignore
├── LICENSE
├── ruff.toml
├── scripts
    ├── docker_run.sh
    ├── setup_env.sh
    └── test.sh
├── pyproject.toml
├── README.md
└── .github
    └── workflows
        └── ci.yml


/server/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/server/prompts/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/server/sandbox/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | # Tests package
2 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | fastmcp
2 | aiohttp 
3 | aiofiles


--------------------------------------------------------------------------------
/tests/integration/__init__.py:
--------------------------------------------------------------------------------
1 | # Integration tests package
2 | 


--------------------------------------------------------------------------------
/primslogo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hileamlakB/Python-Runtime-Interpreter-MCP-Server/HEAD/primslogo.png


--------------------------------------------------------------------------------
/glama.json:
--------------------------------------------------------------------------------
1 | {
2 |   "$schema": "https://glama.ai/mcp/schemas/server.json",
3 |   "maintainers": [
4 |     "hileamlakb"
5 |   ]
6 | }
7 | 


--------------------------------------------------------------------------------
/server/tools/__init__.py:
--------------------------------------------------------------------------------
1 | # register is called from server.main, so import here is enough
2 | from . import mount_file  # noqa: F401
3 | from . import persist_artifact  # noqa: F401
4 | from . import workspace_inspect  # noqa: F401
5 | 


--------------------------------------------------------------------------------
/examples/run_code.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | 
 3 | from fastmcp import Client
 4 | 
 5 | 
 6 | async def main():
 7 |     async with Client("http://localhost:9000/mcp") as client:
 8 |         code = "print('Hello from FastMCP!')"
 9 |         result = await client.call_tool("run_code", {"code": code})
10 |         print("Result: \n\t", result)
11 | 
12 | 
13 | if __name__ == "__main__":
14 |     asyncio.run(main())
15 | 


--------------------------------------------------------------------------------
/examples/list_tools.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import json
 3 | 
 4 | from fastmcp import Client
 5 | 
 6 | 
 7 | async def main():
 8 |     async with Client("http://localhost:9000/mcp") as client:
 9 |         tools = await client.list_tools()
10 |         print("Available tools:")
11 |         for tool in tools:
12 |             print(f"- {tool.name}: {tool.description}")
13 |             print(json.dumps(tool.inputSchema, indent=2))
14 | 
15 | 
16 | if __name__ == "__main__":
17 |     asyncio.run(main())
18 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.13-slim
 2 | 
 3 | # System deps
 4 | RUN apt-get update && apt-get install -y --no-install-recommends \
 5 |     build-essential curl git unzip wget ca-certificates \
 6 |  && rm -rf /var/lib/apt/lists/*
 7 | 
 8 | WORKDIR /app
 9 | 
10 | COPY requirements.txt .
11 | RUN pip install --upgrade pip && pip install -r requirements.txt
12 | 
13 | COPY ./server ./server
14 | 
15 | ENV PYTHONPATH=/app
16 | 
17 | CMD ["fastmcp", "run", "server/main.py", "--transport", "http", "--host", "0.0.0.0", "--port", "9000"] 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
 1 | # Development dependencies for testing, linting, and CI/CD
 2 | pytest>=7.4.0
 3 | pytest-asyncio>=0.21.0
 4 | pytest-cov>=4.1.0
 5 | pytest-mock>=3.11.0
 6 | pytest-timeout>=2.1.0
 7 | 
 8 | # Code quality and formatting
 9 | black>=23.7.0
10 | isort>=5.12.0
11 | ruff>=0.0.280
12 | mypy>=1.5.0
13 | 
14 | # Security scanning
15 | bandit[toml]>=1.7.5
16 | safety>=2.3.0
17 | 
18 | # HTTP testing
19 | httpx>=0.24.0
20 | pytest-httpx>=0.21.0
21 | 
22 | # Additional testing utilities
23 | factory-boy>=3.3.0
24 | freezegun>=1.2.0


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
 1 | [flake8]
 2 | max-line-length = 88
 3 | # Black's default line length
 4 | extend-ignore = 
 5 |     # E203: whitespace before ':' (conflicts with black)
 6 |     E203,
 7 |     # W503: line break before binary operator (conflicts with black)
 8 |     W503,
 9 |     # E501: line too long (we set max-line-length to 88)
10 |     E501
11 | 
12 | exclude = 
13 |     .git,
14 |     __pycache__,
15 |     .venv,
16 |     venv,
17 |     build,
18 |     dist,
19 |     *.egg-info
20 | 
21 | per-file-ignores =
22 |     # Allow unused imports in __init__.py files
23 |     __init__.py:F401


--------------------------------------------------------------------------------
/server/config.py:
--------------------------------------------------------------------------------
 1 | """Centralised configuration for PRIMCS.
 2 | 
 3 | Environment variables:
 4 |   • PRIMCS_TMP_DIR    – custom temp directory
 5 |   • PRIMCS_TIMEOUT    – max seconds per run (default 10)
 6 |   • PRIMCS_MAX_OUTPUT – cap on stdout/stderr bytes (default 1 MB)
 7 | """
 8 | 
 9 | import os
10 | from pathlib import Path
11 | 
12 | TMP_DIR = Path(os.getenv("PRIMCS_TMP_DIR", "/tmp/primcs"))
13 | TMP_DIR.mkdir(parents=True, exist_ok=True)
14 | 
15 | TIMEOUT_SECONDS = int(os.getenv("PRIMCS_TIMEOUT", "100"))
16 | MAX_OUTPUT_BYTES = int(os.getenv("PRIMCS_MAX_OUTPUT", str(1024 * 1024)))  # 1MB
17 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | *.egg-info/
23 | .installed.cfg
24 | *.egg
25 | 
26 | # Virtual environments
27 | .venv/
28 | venv/
29 | env/
30 | ENV/
31 | 
32 | # PyInstaller
33 | *.manifest
34 | *.spec
35 | 
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 | 
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | .hypothesis/
51 | .pytest_cache/
52 | 
53 | # Jupyter Notebook
54 | .ipynb_checkpoints
55 | 
56 | # pyenv
57 | .python-version
58 | 
59 | # dotenv
60 | .env
61 | .env.*
62 | 
63 | # mypy
64 | .mypy_cache/
65 | .dmypy.json
66 | 
67 | # VS Code
68 | .vscode/
69 | 
70 | # macOS
71 | .DS_Store 
72 | TODO.md


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 Hileamlak Mulugeta Yitayew
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/examples/mount_and_run.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import json
 3 | 
 4 | from fastmcp import Client
 5 | 
 6 | SERVER = "http://localhost:9000/mcp"
 7 | CSV_URL = "https://raw.githubusercontent.com/cs109/2014_data/master/countries.csv"
 8 | 
 9 | 
10 | async def main() -> None:
11 |     async with Client(SERVER) as client:
12 |         # 1. Mount the CSV once for this session
13 |         mount_params = {
14 |             "url": CSV_URL,
15 |             "mount_path": "data/countries.csv",
16 |         }
17 |         mount_resp = await client.call_tool("mount_file", mount_params)
18 |         print("Mount response:", mount_resp[0].text)
19 | 
20 |         # 2. Run code that reads the mounted CSV without passing `files`
21 |         code = """
22 | import pandas as pd
23 | import os
24 | path = 'mounts/data/countries.csv'
25 | print('File exists:', os.path.exists(path))
26 | print('Row count:', len(pd.read_csv(path)))
27 | """
28 |         run_resp = await client.call_tool("run_code", {"code": code})
29 |         print("Run result:")
30 |         print(json.dumps(json.loads(run_resp[0].text), indent=2))
31 | 
32 | 
33 | if __name__ == "__main__":
34 |     asyncio.run(main())
35 | 


--------------------------------------------------------------------------------
/tests/unit/test_main.py:
--------------------------------------------------------------------------------
 1 | """Unit tests for server.main module."""
 2 | 
 3 | from unittest.mock import Mock, patch
 4 | 
 5 | from server.main import mcp
 6 | 
 7 | 
 8 | class TestMainModule:
 9 |     """Test main module functionality."""
10 | 
11 |     def test_mcp_instance_exists(self) -> None:
12 |         """Test that MCP instance is created."""
13 |         assert mcp is not None
14 | 
15 |     def test_mcp_instance_type(self) -> None:
16 |         """Test that MCP instance has correct type."""
17 |         from fastmcp import FastMCP
18 | 
19 |         assert isinstance(mcp, FastMCP)
20 | 
21 |     @patch("server.tools.run_code.register")
22 |     def test_tool_registration_called(self, mock_register: Mock) -> None:
23 |         """Test that tool registration is called during import."""
24 |         # Re-import the module to trigger registration
25 |         import importlib
26 | 
27 |         import server.main
28 | 
29 |         importlib.reload(server.main)
30 | 
31 |         # Verify register was called with the MCP instance
32 |         mock_register.assert_called_once()
33 |         call_args = mock_register.call_args[0]
34 |         assert len(call_args) == 1
35 |         # The argument should be a FastMCP instance
36 |         from fastmcp import FastMCP
37 | 
38 |         assert isinstance(call_args[0], FastMCP)


--------------------------------------------------------------------------------
/ruff.toml:
--------------------------------------------------------------------------------
 1 | line-length = 88
 2 | target-version = "py313"
 3 | 
 4 | [lint]
 5 | # Select specific rule categories that are important
 6 | select = [
 7 |     # pycodestyle
 8 |     "E",
 9 |     "W",
10 |     # Pyflakes
11 |     "F",
12 |     # pyupgrade
13 |     "UP",
14 |     # flake8-bugbear
15 |     "B",
16 |     # isort
17 |     "I",
18 |     # flake8-unused-arguments  
19 |     "ARG",
20 |     # flake8-use-pathlib
21 |     "PTH",
22 | ]
23 | 
24 | ignore = [
25 |     # Line too long - handled by formatter
26 |     "E501",
27 |     # Too many arguments
28 |     "PLR0913",
29 |     # Too many branches
30 |     "PLR0912",
31 |     # Too many statements
32 |     "PLR0915",
33 |     # Magic value comparison
34 |     "PLR2004",
35 |     # Exception string formatting
36 |     "EM101", "EM102", "TRY003",
37 |     # Docstring rules - can be added later
38 |     "D100", "D101", "D102", "D103", "D104", "D202", "D205",
39 |     # Assert statement
40 |     "S101",
41 | ]
42 | 
43 | [lint.per-file-ignores]
44 | "__init__.py" = ["F401"]  # Allow unused imports
45 | "server/tools/__init__.py" = ["F401", "I001"]  # Allow unused imports and import formatting
46 | "**/test_*.py" = ["ARG", "S"]  # Relax some rules for tests
47 | "**/conftest.py" = ["ARG", "S"]
48 | 
49 | [lint.isort]
50 | known-first-party = ["server"]
51 | force-single-line = false


--------------------------------------------------------------------------------
/scripts/docker_run.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # scripts/docker_run.sh - Build and run the PRIMCS Docker container, then print the MCP server URL.
 3 | set -euo pipefail
 4 | 
 5 | IMAGE_NAME=primcs
 6 | CONTAINER_NAME=primcs_server
 7 | PORT=9000
 8 | 
 9 | # Build the Docker image
10 | 
11 | echo "[docker_run] Building Docker image..."
12 | docker build -t $IMAGE_NAME .
13 | 
14 | # Stop and remove any existing container with the same name
15 | if docker ps -a --format '{{.Names}}' | grep -Eq "^${CONTAINER_NAME}$"; then
16 |     echo "[docker_run] Removing existing container..."
17 |     docker rm -f $CONTAINER_NAME
18 | fi
19 | 
20 | # Run the container
21 | 
22 | echo "[docker_run] Starting Docker container..."
23 | docker run -d --name $CONTAINER_NAME -p $PORT:9000 $IMAGE_NAME
24 | 
25 | # Print the MCP server URL
26 | 
27 | echo "[docker_run] MCP server is running at: http://localhost:${PORT}/mcp"
28 | 
29 | # Define cleanup function
30 | cleanup() {
31 |     echo "[docker_run] Stopping Docker container..."
32 |     docker stop $CONTAINER_NAME
33 |     docker rm $CONTAINER_NAME
34 |     echo "[docker_run] Container stopped and removed."
35 |     exit 0
36 | }
37 | 
38 | # Trap SIGINT and SIGTERM to cleanup
39 | trap cleanup SIGINT SIGTERM
40 | 
41 | # Wait until told to exit (block forever, or until killed)
42 | docker logs -f $CONTAINER_NAME &
43 | wait $! 


--------------------------------------------------------------------------------
/server/sandbox/env.py:
--------------------------------------------------------------------------------
 1 | """Utility helpers for creating isolated virtual environments."""
 2 | 
 3 | import asyncio
 4 | import sys
 5 | import venv
 6 | from pathlib import Path
 7 | 
 8 | # Default libraries always installed in every sandbox environment.
 9 | _DEFAULT_PACKAGES: list[str] = ["pandas", "openpyxl", "requests"]
10 | 
11 | 
12 | async def create_virtualenv(requirements: list[str], run_dir: Path) -> Path:
13 |     """Create a venv in run_dir/venv and install *requirements*."""
14 |     venv_dir = run_dir / "venv"
15 |     venv.EnvBuilder(with_pip=True, clear=True).create(venv_dir)
16 | 
17 |     python = (
18 |         venv_dir / ("Scripts" if sys.platform.startswith("win") else "bin") / "python"
19 |     )
20 | 
21 |     # Combine caller-specified requirements with default packages.
22 |     all_requirements = list(dict.fromkeys(requirements + _DEFAULT_PACKAGES))
23 | 
24 |     if all_requirements:
25 |         proc = await asyncio.create_subprocess_exec(
26 |             str(python),
27 |             "-m",
28 |             "pip",
29 |             "install",
30 |             "--no-cache-dir",
31 |             *all_requirements,
32 |             stdout=asyncio.subprocess.PIPE,
33 |             stderr=asyncio.subprocess.PIPE,
34 |         )
35 |         _, err = await proc.communicate()
36 |         if proc.returncode != 0:
37 |             raise RuntimeError(f"pip install failed: {err.decode()}")
38 | 
39 |     return python
40 | 


--------------------------------------------------------------------------------
/server/sandbox/downloader.py:
--------------------------------------------------------------------------------
 1 | """Download remote files to the sandbox run directory."""
 2 | 
 3 | import asyncio
 4 | from pathlib import Path
 5 | 
 6 | import aiohttp
 7 | 
 8 | __all__ = ["download_files"]
 9 | 
10 | 
11 | async def _fetch(session: aiohttp.ClientSession, url: str, path: Path) -> None:
12 |     async with session.get(url) as resp:
13 |         resp.raise_for_status()
14 |         path.write_bytes(await resp.read())
15 |     # Make the file read-only
16 |     try:
17 |         path.chmod(0o444)
18 |     except PermissionError:  # fallback on platforms that forbid chmod inside container
19 |         pass
20 | 
21 | 
22 | async def download_files(files: list[dict[str, str]], dest: Path) -> list[Path]:
23 |     """Download *files* concurrently into *dest*.
24 | 
25 |     Each element in *files* must be a dict with keys ``url`` and **``mountPath``** (required).
26 | 
27 |     Returns list of local paths (relative to *dest*).
28 |     """
29 |     if not files:
30 |         return []
31 | 
32 |     dest.mkdir(parents=True, exist_ok=True)
33 | 
34 |     async with aiohttp.ClientSession() as session:
35 |         tasks = []
36 |         for meta in files:
37 |             url = meta["url"]
38 |             if "mountPath" not in meta or not meta["mountPath"]:
39 |                 raise ValueError(
40 |                     "Each file entry must include a non-empty 'mountPath' key."
41 |                 )
42 | 
43 |             relative = Path(meta["mountPath"])
44 |             local = dest / relative
45 |             local.parent.mkdir(parents=True, exist_ok=True)
46 |             tasks.append(_fetch(session, url, local))
47 |         await asyncio.gather(*tasks)
48 | 
49 |     return [dest / Path(f["mountPath"]) for f in files]
50 | 


--------------------------------------------------------------------------------
/examples/session_persistence.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import json
 3 | 
 4 | from fastmcp import Client
 5 | 
 6 | SERVER_URL = "http://localhost:9000/mcp"
 7 | 
 8 | 
 9 | a_sync_code_first = """
10 | import pandas as pd
11 | # Dataset was downloaded via `files` parameter.
12 | df = pd.read_csv('mounts/countries.csv')
13 | print("First 5 rows:\\n", df.head())
14 | """
15 | 
16 | code_second = """
17 | import pandas as pd
18 | df = pd.read_csv('mounts/countries.csv')
19 | print("Row count:", len(df))
20 | """
21 | 
22 | 
23 | async def main() -> None:
24 |     """Demonstrate that files persist for the lifetime of an MCP session."""
25 |     async with Client("http://localhost:9000/mcp") as client:
26 |         # 1. Run code that downloads a CSV file into the workspace mounts directory.
27 |         first_params = {
28 |             "code": a_sync_code_first,
29 |             "files": [
30 |                 {
31 |                     "url": "https://raw.githubusercontent.com/cs109/2014_data/master/countries.csv",
32 |                     "mountPath": "countries.csv",
33 |                 }
34 |             ],
35 |         }
36 |         run1 = await client.call_tool("run_code", first_params)
37 |         data1 = json.loads(run1[0].text)
38 |         print("\n=== Run #1 ===")
39 |         print("STDOUT:\n", data1.get("stdout"))
40 |         print("STDERR:\n", data1.get("stderr"))
41 |         print("ARTIFACTS:", data1.get("artifacts"))
42 | 
43 |         # 2. Execute a second snippet in the SAME client session.
44 |         #    We do NOT pass the `files` parameter again. The CSV should still exist.
45 |         run2 = await client.call_tool("run_code", {"code": code_second})
46 |         data2 = json.loads(run2[0].text)
47 |         print("\n=== Run #2 ===")
48 |         print("STDOUT:\n", data2.get("stdout"))
49 |         print("STDERR:\n", data2.get("stderr"))
50 |         print("ARTIFACTS:", data2.get("artifacts"))
51 | 
52 | 
53 | if __name__ == "__main__":
54 |     asyncio.run(main())
55 | 


--------------------------------------------------------------------------------
/examples/inspect_workspace.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import json
 3 | 
 4 | from fastmcp import Client
 5 | 
 6 | SERVER_URL = "http://localhost:9000/mcp"
 7 | 
 8 | 
 9 | async def main() -> None:
10 |     """Demonstrate workspace inspection tools."""
11 |     async with Client(SERVER_URL) as client:
12 |         # 1. Create a small text file via run_code
13 |         code = (
14 |             "with open('output/hello.txt', 'w') as f:\n"
15 |             "    f.write('Hello inspection!\\nThis is a test file.')\n"
16 |         )
17 |         await client.call_tool("run_code", {"code": code})
18 | 
19 |         def parse_dir_response(resp):
20 |             """Convert streamed TextContents into a list of DirEntry dicts, handling both list and single-entry payloads."""
21 |             entries = []
22 |             for msg in resp:
23 |                 obj = json.loads(msg.text)
24 |                 if isinstance(obj, list):
25 |                     entries.extend(obj)
26 |                 else:
27 |                     entries.append(obj)
28 |             return entries
29 | 
30 |         # 2. List root of session workspace
31 |         root_resp = await client.call_tool("list_dir", {})
32 |         root_listing = parse_dir_response(root_resp)
33 |         print("\n=== Workspace root ===")
34 |         for entry in root_listing:
35 |             print(f"{entry['type']:9} {entry['path']}")
36 | 
37 |         # 3. List contents of output/
38 |         out_resp = await client.call_tool("list_dir", {"dir_path": "output"})
39 |         out_listing = parse_dir_response(out_resp)
40 |         print("\n=== output/ ===")
41 |         for entry in out_listing:
42 |             print(f"{entry['type']:9} {entry['path']}  {entry['size']} bytes")
43 | 
44 |         # 4. Preview the text file we just created
45 |         preview_resp = await client.call_tool(
46 |             "preview_file", {"relative_path": "output/hello.txt"}
47 |         )
48 |         preview = json.loads(preview_resp[0].text)
49 |         print("\n=== Preview of output/hello.txt ===")
50 |         print(preview["content"])
51 | 
52 | 
53 | if __name__ == "__main__":
54 |     asyncio.run(main())
55 | 


--------------------------------------------------------------------------------
/server/tools/mount_file.py:
--------------------------------------------------------------------------------
 1 | """MCP tool: download one or more remote files into mounts/ for the current session."""
 2 | 
 3 | from pathlib import Path
 4 | 
 5 | from fastmcp import Context, FastMCP
 6 | 
 7 | from server.config import TMP_DIR
 8 | from server.sandbox.downloader import download_files
 9 | 
10 | 
11 | def _session_root(ctx: Context | None) -> Path:
12 |     sid: str | None = None
13 |     if ctx:
14 |         sid = ctx.session_id
15 |         if not sid and ctx.request_context.request:
16 |             sid = ctx.request_context.request.headers.get("mcp-session-id")
17 |     if not sid:
18 |         raise ValueError(
19 |             "Missing session_id; include mcp-session-id header or create session-aware client."
20 |         )
21 |     root = TMP_DIR / f"session_{sid}"
22 |     root.mkdir(parents=True, exist_ok=True)
23 |     (root / "mounts").mkdir(parents=True, exist_ok=True)
24 |     return root
25 | 
26 | 
27 | def register(mcp: FastMCP) -> None:
28 |     """Register the mount_file tool."""
29 | 
30 |     @mcp.tool(
31 |         name="mount_file",
32 |         description=(
33 |             "Download a remote file once per session and store it under mounts/<mountPath>. "
34 |             "Subsequent run_code calls can access it via that path without re-downloading."
35 |         ),
36 |     )
37 |     async def _mount_file(
38 |         url: str,
39 |         mount_path: str,
40 |         ctx: Context | None = None,
41 |     ) -> dict:  # {"mounted_as": "mounts/data/my.csv", "bytes": N}
42 |         if (
43 |             Path(mount_path).is_absolute()
44 |             or ".." in Path(mount_path).parts
45 |             or not mount_path
46 |         ):
47 |             raise ValueError("mount_path must be a relative path without '..'")
48 |         root = _session_root(ctx)
49 |         mounts_dir = root / "mounts"
50 |         spec: dict[str, str] = {"url": url, "mountPath": mount_path}
51 |         downloaded: list[Path] = await download_files([spec], mounts_dir)
52 |         local = downloaded[0]
53 |         return {
54 |             "mounted_as": str(local.relative_to(root)),
55 |             "bytes": local.stat().st_size,
56 |         }
57 | 


--------------------------------------------------------------------------------
/examples/artifact_download.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import json
 3 | 
 4 | import aiohttp
 5 | from fastmcp import Client
 6 | 
 7 | 
 8 | async def main():
 9 |     # Step 1: Run code that generates an artifact (a PNG file)
10 |     code = """
11 | import matplotlib.pyplot as plt
12 | plt.plot([1,2,3], [4,5,6])
13 | plt.title('Test Plot')
14 | plt.savefig('output/plot.png')
15 | print('Plot saved!')
16 | """
17 |     async with Client("http://localhost:9000/mcp") as client:
18 |         # Call the run_code tool
19 |         params = {
20 |             "code": code,
21 |             "requirements": ["matplotlib"],  # install matplotlib so the plot code runs
22 |         }
23 |         result = await client.call_tool("run_code", params)
24 |         print("Result:", result)
25 |         # Parse the result
26 |         data = json.loads(result[0].text)
27 |         artifacts = data.get("artifacts", [])
28 |         if not artifacts:
29 |             print("No artifacts returned!")
30 |             return
31 |         artifact = artifacts[0]
32 |         rel_path = artifact["relative_path"]  # e.g. "plots/plot.png"
33 |         print(f"Artifact relative path: {rel_path}")
34 | 
35 |         # Session ID is included in the tool response
36 |         session_id = data.get("session_id")
37 |         if not session_id:
38 |             print("No session_id returned – cannot download artifact.")
39 |             return
40 | 
41 |         # Step 2: Download the artifact using aiohttp with the required header
42 |         artifact_url = f"http://localhost:9000/artifacts/{rel_path}"
43 |         headers = {"mcp-session-id": session_id}
44 |         print(
45 |             f"Downloading artifact from: {artifact_url} with session_id: {session_id}"
46 |         )
47 |         async with aiohttp.ClientSession() as session:
48 |             async with session.get(artifact_url, headers=headers) as resp:
49 |                 if resp.status == 200:
50 |                     content = await resp.read()
51 |                     from pathlib import Path
52 | 
53 |                     with Path("downloaded_plot.png").open("wb") as f:
54 |                         f.write(content)
55 |                     print("Artifact downloaded as downloaded_plot.png")
56 |                 else:
57 |                     print(f"Failed to download artifact: {resp.status}")
58 | 
59 | 
60 | if __name__ == "__main__":
61 |     asyncio.run(main())
62 | 


--------------------------------------------------------------------------------
/server/tools/persist_artifact.py:
--------------------------------------------------------------------------------
 1 | """MCP tool: persist an artifact to a client-provided presigned URL."""
 2 | 
 3 | from pathlib import Path
 4 | 
 5 | import aiohttp
 6 | from fastmcp import Context, FastMCP
 7 | 
 8 | from server.config import TMP_DIR
 9 | 
10 | MAX_UPLOAD_BYTES = 1024 * 1024 * 20  # 20 MB cap for safety
11 | 
12 | 
13 | def register(mcp: FastMCP) -> None:
14 |     """Register the `persist_artifact` tool on a FastMCP server instance."""
15 | 
16 |     @mcp.tool(
17 |         name="persist_artifact",
18 |         description=(
19 |             "Upload a file previously created by run_code to a presigned URL. "
20 |             "The file path must be relative to the output/ directory of the current session, "
21 |             "for example 'reports/report.pdf'. The client must include the same mcp-session-id "
22 |             "header used for run_code so the tool can locate the correct session workspace."
23 |         ),
24 |     )
25 |     async def _persist_artifact(
26 |         relative_path: str,
27 |         presigned_url: str,
28 |         ctx: Context | None = None,
29 |     ) -> dict:  # {uploaded_bytes: int, status: int}
30 |         """Upload *relative_path* to *presigned_url* and return upload stats."""
31 | 
32 |         # Basic sanitisation
33 |         if Path(relative_path).is_absolute() or ".." in Path(relative_path).parts:
34 |             raise ValueError(
35 |                 "relative_path must be inside output/ and cannot contain '..'"
36 |             )
37 | 
38 |         # Determine session ID
39 |         sid = ctx.session_id
40 |         if not sid and ctx.request_context.request:
41 |             sid = ctx.request_context.request.headers.get("mcp-session-id")
42 |         if not sid:
43 |             raise ValueError("Missing session_id; ensure mcp-session-id header is set.")
44 | 
45 |         output_dir = TMP_DIR / f"session_{sid}" / "output"
46 |         file_path = output_dir / relative_path
47 |         if not file_path.is_file():
48 |             raise FileNotFoundError("Artifact not found: " + relative_path)
49 | 
50 |         size = file_path.stat().st_size
51 |         if size > MAX_UPLOAD_BYTES:
52 |             raise ValueError(f"Artifact exceeds size limit ({MAX_UPLOAD_BYTES} bytes)")
53 | 
54 |         async with aiohttp.ClientSession() as session:
55 |             with file_path.open("rb") as fh:
56 |                 resp = await session.put(presigned_url, data=fh)
57 |                 status = resp.status
58 |                 await resp.release()
59 |                 if status >= 400:
60 |                     raise RuntimeError(f"Upload failed with HTTP {status}")
61 | 
62 |         return {"uploaded_bytes": size, "status": status}
63 | 


--------------------------------------------------------------------------------
/server/prompts/python_programmer.py:
--------------------------------------------------------------------------------
 1 | """Python programmer prompt for FastMCP.
 2 | Generates instructions for an agent that outputs Python code to be executed via the `run_code` tool.
 3 | """
 4 | 
 5 | from fastmcp import FastMCP
 6 | 
 7 | _TEMPLATE = (
 8 |     "PythonProgrammerAgent:\n"
 9 |     "  instructions: |\n"
10 |     "    You are an AI assistant specialised in Python coding. Your task is to generate Python code based on a given task description. The code will be executed in a secure sandbox via the `run_code` tool. Follow these rules:\n\n"
11 |     "    1. Task description:\n    <task>\n    {task}\n    </task>\n\n"
12 |     "    <mounted_files>\n    {mounted_files}\n    </mounted_files>\n\n"
13 |     "    2. Guidelines for your code:\n"
14 |     "      • The sandbox is stateless unless the client reuses a session_id; treat each call as a fresh environment with the mounted files available at start\n"
15 |     "      • ALWAYS use print() (or log to stderr) for any output you want returned (e.g. print(df.head())). Expressions alone are ignored.\n"
16 |     "      • Keep the code concise yet complete.\n"
17 |     "      • If additional packages are required, declare them under <requirements> as a Python list of pip specs.\n"
18 |     "      • The files listed above are ALREADY mounted read-only at ./mounts/<path>. Access them directly without downloading.\n"
19 |     "      • If you also need to download NEW remote files, list them under <files> as {{'url': URL, 'mountPath': PATH}}. They'll be downloaded before execution.\n"
20 |     "      • Use pd.set_option('display.max_columns', None) and pd.set_option('display.width', 10000) for full DataFrame output.\n\n"
21 |     "    3. Response format (exactly this structure):\n\n"
22 |     "      <python_code>\n      # your python here\n      </python_code>\n\n"
23 |     "      <requirements>\n      # optional list, e.g. ['pandas']\n      </requirements>\n\n"
24 |     "      <files>\n      # optional list for NEW downloads, e.g. [{{'url': 'https://...', 'mountPath': 'data.csv'}}]\n      </files>\n\n"
25 |     "    Ensure the code is fully self-contained and runnable as a script.\n"
26 | )
27 | 
28 | 
29 | def register(mcp: FastMCP) -> None:
30 |     """Register the python_programmer prompt on the given FastMCP server."""
31 | 
32 |     @mcp.prompt(
33 |         name="python_programmer",
34 |         description="Return a template that instructs an LLM to produce Python code suitable for the run_code tool.",
35 |     )
36 |     def _python_programmer_prompt(
37 |         task: str,
38 |         mounted_files: list[str] | None = None,
39 |     ) -> str:
40 |         joined = "\n".join(mounted_files or [])
41 |         return _TEMPLATE.format(task=task.strip(), mounted_files=joined)
42 | 


--------------------------------------------------------------------------------
/scripts/setup_env.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # scripts/setup_env.sh  ── quick dev-environment bootstrap
 3 | #
 4 | # Usage:
 5 | #   ./scripts/setup_env.sh        # creates .venv & installs requirements
 6 | #   source .venv/bin/activate     # activate the venv (remember each shell)
 7 | set -euo pipefail
 8 | 
 9 | # Determine which Python interpreter to use.
10 | #
11 | # Priority order:
12 | #   1. Respect an explicit $PYTHON environment variable.
13 | #   2. If a `python3.13` executable exists in PATH, use that (it is
14 | #      typically installed alongside other minor versions and avoids any
15 | #      ambiguity with an older default `python3`).
16 | #   3. Fallback to the generic `python3` binary.
17 | 
18 | if [[ -n "${PYTHON:-}" ]]; then
19 |   PYTHON_BIN="${PYTHON}"
20 | else
21 |   if command -v python3.13 &>/dev/null; then
22 |     PYTHON_BIN="python3.13"
23 |   else
24 |     PYTHON_BIN="python3"
25 |   fi
26 | fi
27 | 
28 | VENV_DIR=".venv"
29 | 
30 | # Require Python ≥ 3.13
31 | REQ_MAJOR=3
32 | REQ_MINOR=13
33 | 
34 | # Helper: ensure a command exists
35 | have() { command -v "$1" &>/dev/null; }
36 | 
37 | if ! have "$PYTHON_BIN"; then
38 |   cat >&2 <<EOF
39 | [setup] ❌  '$PYTHON_BIN' not found.
40 | 
41 | You need Python 3.13 or newer installed and discoverable in PATH.
42 | 
43 | Recommended installation methods:
44 |   • macOS   :  brew install python@3.13
45 |   • Ubuntu  :  sudo apt-get install python3.13 python3.13-venv
46 |   • Fedora  :  sudo dnf install python3.13 python3.13-venv
47 |   • Windows :  winget install --id Python.Python.3.13   (or use the official installer)
48 | 
49 | Alternatively use pyenv:
50 |   curl https://pyenv.run | bash
51 |   pyenv install 3.13.0 && pyenv local 3.13.0
52 | 
53 | Set the environment variable PYTHON to point at the 3.13 interpreter if it
54 | is not named 'python3'.
55 | EOF
56 |   exit 1
57 | fi
58 | 
59 | # Check interpreter version
60 | CURRENT=$("$PYTHON_BIN" -V 2>&1 | awk '{print $2}')
61 | MAJOR=${CURRENT%%.*}
62 | MINOR=$(echo "$CURRENT" | cut -d. -f2)
63 | 
64 | if (( MAJOR < REQ_MAJOR || ( MAJOR == REQ_MAJOR && MINOR < REQ_MINOR ) )); then
65 |   cat >&2 <<EOF
66 | [setup] ❌  Python 3.13+ required, but '$PYTHON_BIN' is $CURRENT.
67 | 
68 | Use pyenv or your OS package manager to install a newer interpreter,
69 | then re-run this script. See README for details.
70 | EOF
71 |   exit 1
72 | fi
73 | 
74 | # Create virtual environment if it doesn't exist
75 | if [ ! -d "$VENV_DIR" ]; then
76 |   echo "[setup] Creating virtual environment in $VENV_DIR"
77 |   "$PYTHON_BIN" -m venv "$VENV_DIR"
78 | fi
79 | 
80 | # Activate
81 | source "$VENV_DIR/bin/activate"
82 | 
83 | # Upgrade pip & install dependencies
84 | pip install --upgrade pip
85 | pip install -r requirements.txt
86 | 
87 | echo "[setup] ✅  Environment ready."
88 | 
89 | python -m server.main
90 | fastmcp run server/main.py 


--------------------------------------------------------------------------------
/server/main.py:
--------------------------------------------------------------------------------
 1 | """PRIMCS MCP server entry-point.
 2 | 
 3 | Run with:
 4 |     python -m server.main
 5 | 
 6 | Starts an MCP stdio server exposing the `run_code` tool.
 7 | """
 8 | 
 9 | import logging
10 | import os
11 | from pathlib import Path
12 | 
13 | from fastmcp import FastMCP
14 | from starlette.requests import Request
15 | from starlette.responses import FileResponse, Response
16 | 
17 | from server.config import TMP_DIR
18 | from server.prompts import python_programmer as python_programmer_prompt
19 | from server.tools import mount_file as mount_file_tool
20 | from server.tools import persist_artifact as persist_artifact_tool
21 | from server.tools import run_code as run_code_tool
22 | from server.tools import workspace_inspect as workspace_inspect_tool
23 | 
24 | logger = logging.getLogger(__name__)
25 | 
26 | # Expose a globally named `mcp` so the FastMCP CLI can auto-discover it.
27 | mcp = FastMCP(name="primcs", version="0.1.0")
28 | run_code_tool.register(mcp)
29 | persist_artifact_tool.register(mcp)
30 | workspace_inspect_tool.register(mcp)
31 | mount_file_tool.register(mcp)
32 | python_programmer_prompt.register(mcp)
33 | 
34 | 
35 | @mcp.custom_route("/artifacts/{relative_path:path}", methods=["GET"])
36 | async def get_artifact(request: Request) -> Response:
37 |     """
38 |     Serve an artifact file for the current session. The client must include
39 |     the session ID in the "mcp-session-id" header. The URL path is the
40 |     relative path returned by the tool (e.g. "plots/plot.png"), which is
41 |     resolved under session_<id>/output/.
42 |     """
43 |     relative_path = request.path_params["relative_path"]
44 |     relative_path = os.path.normpath(relative_path)
45 |     path_obj = Path(relative_path)
46 |     if relative_path.startswith("..") or path_obj.is_absolute():
47 |         return Response("Invalid artifact path", status_code=400)
48 | 
49 |     session_id = request.headers.get("mcp-session-id")
50 |     if not session_id:
51 |         return Response("Missing mcp-session-id header", status_code=400)
52 | 
53 |     base_dir = TMP_DIR / f"session_{session_id}" / "output"
54 |     file_path = base_dir / relative_path
55 | 
56 |     try:
57 |         file_path = file_path.resolve(strict=True)
58 |     except FileNotFoundError:
59 |         return Response("File not found", status_code=404)
60 | 
61 |     # Ensure file is within the output directory
62 |     if not str(file_path).startswith(str(base_dir.resolve())):
63 |         return Response("Forbidden", status_code=403)
64 |     if not file_path.is_file():
65 |         return Response("Not a file", status_code=404)
66 | 
67 |     return FileResponse(str(file_path), filename=file_path.name)
68 | 
69 | 
70 | if __name__ == "__main__":  # pragma: no cover
71 |     port = int(os.getenv("PORT", "9000"))
72 |     # Start the server with HTTP transport (modern replacement for SSE)
73 |     mcp.run(transport="streamable-http", host="0.0.0.0", port=port)
74 | 


--------------------------------------------------------------------------------
/tests/unit/test_config.py:
--------------------------------------------------------------------------------
  1 | """Unit tests for server.config module."""
  2 | 
  3 | import os
  4 | from pathlib import Path
  5 | from unittest.mock import patch
  6 | 
  7 | import pytest
  8 | 
  9 | from server import config
 10 | 
 11 | 
 12 | class TestConfig:
 13 |     """Test configuration management."""
 14 | 
 15 |     def test_default_tmp_dir(self) -> None:
 16 |         """Test default TMP_DIR configuration."""
 17 |         with patch.dict(os.environ, {}, clear=True):
 18 |             # Re-import to get fresh configuration
 19 |             import importlib
 20 | 
 21 |             importlib.reload(config)
 22 | 
 23 |             expected_path = Path("/tmp/primcs")
 24 |             assert config.TMP_DIR == expected_path
 25 | 
 26 |     def test_custom_tmp_dir(self, tmp_path: Path) -> None:
 27 |         """Test custom TMP_DIR from environment variable."""
 28 |         custom_path = str(tmp_path / "custom_tmp")
 29 |         with patch.dict(os.environ, {"PRIMCS_TMP_DIR": custom_path}):
 30 |             import importlib
 31 | 
 32 |             importlib.reload(config)
 33 | 
 34 |             assert config.TMP_DIR == Path(custom_path)
 35 | 
 36 |     def test_default_timeout(self) -> None:
 37 |         """Test default timeout configuration."""
 38 |         with patch.dict(os.environ, {}, clear=True):
 39 |             import importlib
 40 | 
 41 |             importlib.reload(config)
 42 | 
 43 |             assert config.TIMEOUT_SECONDS == 100
 44 | 
 45 |     def test_custom_timeout(self) -> None:
 46 |         """Test custom timeout from environment variable."""
 47 |         custom_timeout = "60"
 48 |         with patch.dict(os.environ, {"PRIMCS_TIMEOUT": custom_timeout}):
 49 |             import importlib
 50 | 
 51 |             importlib.reload(config)
 52 | 
 53 |             assert config.TIMEOUT_SECONDS == 60
 54 | 
 55 |     def test_default_max_output(self) -> None:
 56 |         """Test default max output configuration."""
 57 |         with patch.dict(os.environ, {}, clear=True):
 58 |             import importlib
 59 | 
 60 |             importlib.reload(config)
 61 | 
 62 |             assert config.MAX_OUTPUT_BYTES == 1024 * 1024  # 1MB
 63 | 
 64 |     def test_custom_max_output(self) -> None:
 65 |         """Test custom max output from environment variable."""
 66 |         custom_max = "2048000"  # 2MB
 67 |         with patch.dict(os.environ, {"PRIMCS_MAX_OUTPUT": custom_max}):
 68 |             import importlib
 69 | 
 70 |             importlib.reload(config)
 71 | 
 72 |             assert config.MAX_OUTPUT_BYTES == 2048000
 73 | 
 74 |     def test_invalid_timeout_falls_back_to_default(self) -> None:
 75 |         """Test that invalid timeout values fall back to default."""
 76 |         with patch.dict(os.environ, {"PRIMCS_TIMEOUT": "invalid"}):
 77 |             with pytest.raises(ValueError):
 78 |                 import importlib
 79 | 
 80 |                 importlib.reload(config)
 81 | 
 82 |     def test_invalid_max_output_falls_back_to_default(self) -> None:
 83 |         """Test that invalid max output values fall back to default."""
 84 |         with patch.dict(os.environ, {"PRIMCS_MAX_OUTPUT": "invalid"}):
 85 |             with pytest.raises(ValueError):
 86 |                 import importlib
 87 | 
 88 |                 importlib.reload(config)
 89 | 
 90 |     def test_tmp_dir_creation(
 91 |         self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
 92 |     ) -> None:
 93 |         """Test that TMP_DIR is created if it doesn't exist."""
 94 |         test_dir = tmp_path / "test_primcs"
 95 |         monkeypatch.setenv("PRIMCS_TMP_DIR", str(test_dir))
 96 | 
 97 |         import importlib
 98 | 
 99 |         importlib.reload(config)
100 | 
101 |         assert test_dir.exists()
102 |         assert test_dir.is_dir()
103 | 


--------------------------------------------------------------------------------
/server/sandbox/runner.py:
--------------------------------------------------------------------------------
  1 | """Orchestrate sandbox execution of untrusted Python code."""
  2 | 
  3 | import asyncio
  4 | import mimetypes
  5 | import shutil
  6 | import textwrap
  7 | from typing import TypedDict
  8 | 
  9 | from server.config import TIMEOUT_SECONDS, TMP_DIR
 10 | from server.sandbox.downloader import download_files
 11 | from server.sandbox.env import create_virtualenv
 12 | 
 13 | __all__ = ["run_code"]
 14 | 
 15 | 
 16 | # Precise schema for each artifact entry.
 17 | class ArtifactMeta(TypedDict):
 18 |     name: str
 19 |     relative_path: str
 20 |     size: int
 21 |     mime: str
 22 | 
 23 | 
 24 | # Typed return for run_code results.
 25 | class RunCodeResult(TypedDict, total=False):
 26 |     """Result of running code in the sandbox.
 27 |     Optionally includes a feedback field with suggestions or warnings (list of strings).
 28 |     """
 29 | 
 30 |     stdout: str
 31 |     stderr: str
 32 |     artifacts: list[ArtifactMeta]
 33 |     feedback: str
 34 | 
 35 | 
 36 | async def run_code(
 37 |     *,
 38 |     code: str,
 39 |     requirements: list[str],
 40 |     files: list[dict[str, str]],
 41 |     run_id: str,
 42 |     session_id: str | None = None,
 43 | ) -> RunCodeResult:
 44 |     """Execute *code* inside an isolated virtual-env and return captured output. Artifacts are returned as paths relative to the output directory. Only files inside output/ are included."""
 45 | 
 46 |     if session_id:
 47 |         # Persist workspace for the lifetime of the client session.
 48 |         work = TMP_DIR / f"session_{session_id}"
 49 |         work.mkdir(parents=True, exist_ok=True)
 50 |     else:
 51 |         # Legacy per-run workspace (stateless behaviour).
 52 |         work = TMP_DIR / f"run_{run_id}"
 53 |         if work.exists():
 54 |             shutil.rmtree(work)
 55 |         work.mkdir(parents=True, exist_ok=True)
 56 | 
 57 |     # Ensure mounts directory exists for all modes.
 58 |     (work / "mounts").mkdir(parents=True, exist_ok=True)
 59 |     # Directory where user code should place output/artifacts.
 60 |     (work / "output").mkdir(parents=True, exist_ok=True)
 61 | 
 62 |     await download_files(files, work / "mounts")
 63 | 
 64 |     py = await create_virtualenv(requirements, work)
 65 | 
 66 |     script_name = f"script_{run_id}.py" if session_id else "script.py"
 67 |     script = work / script_name
 68 |     script.write_text(textwrap.dedent(code))
 69 | 
 70 |     proc = await asyncio.create_subprocess_exec(
 71 |         str(py),
 72 |         str(script),
 73 |         stdout=asyncio.subprocess.PIPE,
 74 |         stderr=asyncio.subprocess.PIPE,
 75 |         cwd=work,
 76 |     )
 77 | 
 78 |     try:
 79 |         out, err = await asyncio.wait_for(proc.communicate(), timeout=TIMEOUT_SECONDS)
 80 |     except TimeoutError as err:
 81 |         proc.kill()
 82 |         await proc.wait()
 83 |         msg = f"Execution timed out after {TIMEOUT_SECONDS}s"
 84 |         raise RuntimeError(msg) from err
 85 | 
 86 |     # Collect artifacts inside the output directory.
 87 |     artifacts: list[ArtifactMeta] = []
 88 |     output_dir = work / "output"
 89 |     for p in output_dir.rglob("*"):
 90 |         if p.is_file():
 91 |             try:
 92 |                 rel_path = p.relative_to(output_dir)
 93 |             except ValueError:
 94 |                 continue  # skip files not in output_dir
 95 |             size = p.stat().st_size
 96 |             mime, _ = mimetypes.guess_type(str(p))
 97 |             artifacts.append(
 98 |                 {
 99 |                     "name": rel_path.name,
100 |                     "relative_path": rel_path.as_posix(),
101 |                     "size": size,
102 |                     "mime": mime or "application/octet-stream",
103 |                 }
104 |             )
105 | 
106 |     return {"stdout": out.decode(), "stderr": err.decode(), "artifacts": artifacts}
107 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [tool.black]
  2 | line-length = 88
  3 | target-version = ['py313']
  4 | include = '\.pyi?$'
  5 | extend-exclude = '''
  6 | /(
  7 |   # directories
  8 |   \.eggs
  9 |   | \.git
 10 |   | \.hg
 11 |   | \.mypy_cache
 12 |   | \.tox
 13 |   | \.venv
 14 |   | venv
 15 |   | _build
 16 |   | buck-out
 17 |   | build
 18 |   | dist
 19 | )/
 20 | '''
 21 | 
 22 | [build-system]
 23 | requires = ["setuptools>=61.0", "wheel"]
 24 | build-backend = "setuptools.build_meta"
 25 | 
 26 | [project]
 27 | name = "prims"
 28 | version = "0.1.0"
 29 | description = "Python Runtime Interpreter MCP Server"
 30 | readme = "README.md"
 31 | license = {file = "LICENSE"}
 32 | authors = [
 33 |     {name = "PRIMS Contributors"}
 34 | ]
 35 | requires-python = ">=3.13"
 36 | dependencies = [
 37 |     "fastmcp",
 38 |     "aiohttp",
 39 |     "aiofiles",
 40 | ]
 41 | 
 42 | [project.optional-dependencies]
 43 | dev = [
 44 |     "pytest>=7.4.0",
 45 |     "pytest-asyncio>=0.21.0",
 46 |     "pytest-cov>=4.1.0",
 47 |     "pytest-mock>=3.11.0",
 48 |     "pytest-timeout>=2.1.0",
 49 |     "black>=23.7.0",
 50 |     "isort>=5.12.0",
 51 |     "ruff>=0.0.280",
 52 |     "mypy>=1.5.0",
 53 |     "bandit[toml]>=1.7.5",
 54 |     "safety>=2.3.0",
 55 |     "httpx>=0.24.0",
 56 |     "pytest-httpx>=0.21.0",
 57 |     "factory-boy>=3.3.0",
 58 |     "freezegun>=1.2.0",
 59 | ]
 60 | 
 61 | [tool.pytest.ini_options]
 62 | minversion = "7.0"
 63 | addopts = [
 64 |     "-ra",
 65 |     "--strict-markers",
 66 |     "--strict-config",
 67 |     "--cov=server",
 68 |     "--cov-report=term-missing",
 69 |     "--cov-report=html",
 70 |     "--cov-report=xml",
 71 |     "--cov-fail-under=10",
 72 | ]
 73 | testpaths = ["tests"]
 74 | markers = [
 75 |     "slow: marks tests as slow (deselect with '-m \"not slow\"')",
 76 |     "integration: marks tests as integration tests",
 77 |     "unit: marks tests as unit tests",
 78 |     "e2e: marks tests as end-to-end tests",
 79 | ]
 80 | asyncio_mode = "auto"
 81 | timeout = 30
 82 | 
 83 | [tool.coverage.run]
 84 | source = ["server"]
 85 | omit = [
 86 |     "*/tests/*",
 87 |     "*/test_*",
 88 |     "server/__init__.py",
 89 |     "*/venv/*",
 90 |     "*/.venv/*",
 91 | ]
 92 | 
 93 | [tool.coverage.report]
 94 | exclude_lines = [
 95 |     "pragma: no cover",
 96 |     "def __repr__",
 97 |     "if self.debug:",
 98 |     "if settings.DEBUG",
 99 |     "raise AssertionError",
100 |     "raise NotImplementedError",
101 |     "if 0:",
102 |     "if __name__ == .__main__.:",
103 |     "class .*\\bProtocol\\):",
104 |     "@(abc\\.)?abstractmethod",
105 | ]
106 | 
107 | [tool.isort]
108 | profile = "black"
109 | line_length = 88
110 | multi_line_output = 3
111 | include_trailing_comma = true
112 | force_grid_wrap = 0
113 | use_parentheses = true
114 | ensure_newline_before_comments = true
115 | 
116 | [tool.ruff]
117 | line-length = 88
118 | select = [
119 |     "E",  # pycodestyle errors
120 |     "W",  # pycodestyle warnings
121 |     "F",  # pyflakes
122 |     "I",  # isort
123 |     "B",  # flake8-bugbear
124 |     "C4", # flake8-comprehensions
125 |     "UP", # pyupgrade
126 | ]
127 | ignore = [
128 |     "E501",  # line too long, handled by black
129 |     "B008",  # do not perform function calls in argument defaults
130 |     "C901",  # too complex
131 | ]
132 | 
133 | [tool.ruff.per-file-ignores]
134 | "__init__.py" = ["F401"]
135 | "tests/*" = ["B011"]
136 | 
137 | [tool.mypy]
138 | python_version = "3.13"
139 | check_untyped_defs = true
140 | disallow_any_generics = true
141 | disallow_incomplete_defs = true
142 | disallow_untyped_defs = true
143 | no_implicit_optional = true
144 | warn_redundant_casts = true
145 | warn_unused_ignores = true
146 | warn_return_any = true
147 | strict_equality = true
148 | 
149 | [[tool.mypy.overrides]]
150 | module = [
151 |     "fastmcp.*",
152 |     "aiofiles.*",
153 | ]
154 | ignore_missing_imports = true
155 | 
156 | [tool.bandit]
157 | exclude_dirs = ["tests", "venv", ".venv"]
158 | skips = ["B101", "B601"]
159 | 


--------------------------------------------------------------------------------
/scripts/test.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Test runner script for PRIMS
  4 | set -e
  5 | 
  6 | # Colors for output
  7 | RED='\033[0;31m'
  8 | GREEN='\033[0;32m'
  9 | YELLOW='\033[1;33m'
 10 | NC='\033[0m' # No Color
 11 | 
 12 | # Default values
 13 | TEST_TYPE="all"
 14 | COVERAGE=true
 15 | VERBOSE=false
 16 | PARALLEL=false
 17 | 
 18 | # Function to print usage
 19 | usage() {
 20 |     echo "Usage: $0 [OPTIONS]"
 21 |     echo ""
 22 |     echo "Options:"
 23 |     echo "  -t, --type TYPE       Test type: unit, integration, all (default: all)"
 24 |     echo "  -c, --no-coverage     Disable coverage reporting"
 25 |     echo "  -v, --verbose         Enable verbose output"
 26 |     echo "  -p, --parallel        Run tests in parallel"
 27 |     echo "  -h, --help           Show this help message"
 28 |     echo ""
 29 |     echo "Examples:"
 30 |     echo "  $0                    # Run all tests with coverage"
 31 |     echo "  $0 -t unit           # Run only unit tests"
 32 |     echo "  $0 -t integration -c # Run integration tests without coverage"
 33 |     echo "  $0 -v -p             # Run all tests verbosely in parallel"
 34 | }
 35 | 
 36 | # Parse command line arguments
 37 | while [[ $# -gt 0 ]]; do
 38 |     case $1 in
 39 |         -t|--type)
 40 |             TEST_TYPE="$2"
 41 |             shift 2
 42 |             ;;
 43 |         -c|--no-coverage)
 44 |             COVERAGE=false
 45 |             shift
 46 |             ;;
 47 |         -v|--verbose)
 48 |             VERBOSE=true
 49 |             shift
 50 |             ;;
 51 |         -p|--parallel)
 52 |             PARALLEL=true
 53 |             shift
 54 |             ;;
 55 |         -h|--help)
 56 |             usage
 57 |             exit 0
 58 |             ;;
 59 |         *)
 60 |             echo "Unknown option: $1"
 61 |             usage
 62 |             exit 1
 63 |             ;;
 64 |     esac
 65 | done
 66 | 
 67 | # Validate test type
 68 | if [[ ! "$TEST_TYPE" =~ ^(unit|integration|all)$ ]]; then
 69 |     echo -e "${RED}Error: Invalid test type '$TEST_TYPE'. Must be 'unit', 'integration', or 'all'${NC}"
 70 |     exit 1
 71 | fi
 72 | 
 73 | # Activate virtual environment if it exists
 74 | if [[ -f ".venv/bin/activate" ]]; then
 75 |     echo -e "${YELLOW}Activating virtual environment...${NC}"
 76 |     source .venv/bin/activate
 77 | fi
 78 | 
 79 | # Build pytest command
 80 | PYTEST_CMD="python -m pytest"
 81 | 
 82 | # Add test directories based on type
 83 | case $TEST_TYPE in
 84 |     unit)
 85 |         PYTEST_CMD="$PYTEST_CMD tests/unit/"
 86 |         ;;
 87 |     integration)
 88 |         PYTEST_CMD="$PYTEST_CMD tests/integration/"
 89 |         ;;
 90 |     all)
 91 |         PYTEST_CMD="$PYTEST_CMD tests/"
 92 |         ;;
 93 | esac
 94 | 
 95 | # Add coverage options
 96 | if [[ "$COVERAGE" == "true" ]]; then
 97 |     PYTEST_CMD="$PYTEST_CMD --cov=server --cov-report=term-missing --cov-report=html:htmlcov --cov-report=xml"
 98 | fi
 99 | 
100 | # Add verbose option
101 | if [[ "$VERBOSE" == "true" ]]; then
102 |     PYTEST_CMD="$PYTEST_CMD -v"
103 | fi
104 | 
105 | # Add parallel option
106 | if [[ "$PARALLEL" == "true" ]]; then
107 |     # Check if pytest-xdist is installed
108 |     if python -c "import xdist" 2>/dev/null; then
109 |         PYTEST_CMD="$PYTEST_CMD -n auto"
110 |     else
111 |         echo -e "${YELLOW}Warning: pytest-xdist not installed. Running tests sequentially.${NC}"
112 |     fi
113 | fi
114 | 
115 | echo -e "${GREEN}Running $TEST_TYPE tests...${NC}"
116 | echo -e "${YELLOW}Command: $PYTEST_CMD${NC}"
117 | echo ""
118 | 
119 | # Run the tests
120 | if eval $PYTEST_CMD; then
121 |     echo ""
122 |     echo -e "${GREEN}✅ Tests completed successfully!${NC}"
123 |     
124 |     # Show coverage report location if coverage was enabled
125 |     if [[ "$COVERAGE" == "true" ]]; then
126 |         echo -e "${YELLOW}📊 Coverage report available at: htmlcov/index.html${NC}"
127 |     fi
128 |     
129 |     exit 0
130 | else
131 |     echo ""
132 |     echo -e "${RED}❌ Tests failed!${NC}"
133 |     exit 1
134 | fi


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <p align="left">
  2 |   <img src="primslogo.png" alt="PRIMS Logo" width="200"/>
  3 |   <a href="#"><img src="https://img.shields.io/badge/status-alpha-orange?style=for-the-badge" alt="Status: Alpha"/></a>
  4 |   <a href="LICENSE"><img src="https://img.shields.io/badge/license-MIT-blue?style=for-the-badge" alt="License: MIT"/></a>
  5 | </p>
  6 | 
  7 | # PRIMS – Python Runtime Interpreter MCP Server
  8 | 
  9 | PRIMS is a tiny open-source **Model Context Protocol (MCP)** server that lets LLM agents run arbitrary Python code in a secure, throw-away sandbox.
 10 | 
 11 | •   **One tool, one job.**  Exposes a single MCP tool – `run_code` – that executes user-supplied Python and streams back `stdout / stderr`.
 12 | 
 13 | •   **Isolated & reproducible.**  Each call spins up a fresh virtual-env, installs any requested pip packages, mounts optional read-only files, then nukes the workspace.
 14 | 
 15 | •   **Zero config.**  Works over MCP/stdio or drop it in Docker.
 16 | 
 17 | ---
 18 | 
 19 | ## Quick-start
 20 | 
 21 | ### 1. Local development environment
 22 | 
 23 | ```bash
 24 | chmod +x scripts/setup_env.sh   # once, to make the script executable
 25 | ./scripts/setup_env.sh          # creates .venv & installs deps
 26 | 
 27 | # activate the venv in each new shell
 28 | source .venv/bin/activate
 29 | ```
 30 | 
 31 | ### 2. Launch the server
 32 | 
 33 | ```bash
 34 | python -m server.main         # binds http://0.0.0.0:9000/mcp
 35 | ```
 36 | 
 37 | ### 3. Docker
 38 | 
 39 | ```bash
 40 | # Quick one-liner (build + run)
 41 | chmod +x scripts/docker_run.sh
 42 | ./scripts/docker_run.sh         # prints the MCP URL when ready
 43 | ```
 44 | 
 45 | 
 46 | ## Examples
 47 | 
 48 | ### List available tools
 49 | 
 50 | You can use the provided script to list all tools exposed by the server:
 51 | 
 52 | ```bash
 53 | python examples/list_tools.py
 54 | ```
 55 | 
 56 | Expected output (tool names and descriptions may vary):
 57 | 
 58 | ```
 59 | Available tools:
 60 | - run_code: Execute Python code in a secure sandbox with optional dependencies & file mounts.
 61 | - list_dir: List files/directories in your session workspace.
 62 | - preview_file: Preview up to 8 KB of a text file from your session workspace.
 63 | - persist_artifact: Upload an output/ file to a presigned URL for permanent storage.
 64 | - mount_file: Download a remote file once per session to `mounts/<path>`.
 65 | ```
 66 | 
 67 | ### Run code via the MCP server
 68 | 
 69 | ```bash
 70 | python examples/run_code.py
 71 | ```
 72 | 
 73 | ### Mount a dataset once & reuse it
 74 | 
 75 | ```bash
 76 | python examples/mount_and_run.py
 77 | ```
 78 | 
 79 | This mounts a CSV with `mount_file` and then reads it inside `run_code` without re-supplying the URL.
 80 | 
 81 | ### Inspect your session workspace
 82 | 
 83 | ```bash
 84 | python examples/inspect_workspace.py
 85 | ```
 86 | 
 87 | This shows how to use the **`list_dir`** and **`preview_file`** tools to browse files your code created.
 88 | 
 89 | ### Persist an artifact to permanent storage
 90 | 
 91 | The **`persist_artifact`** tool uploads a file from your `output/` directory to a presigned URL.
 92 | 
 93 | Example (Python):
 94 | 
 95 | ```python
 96 | await client.call_tool("persist_artifact", {
 97 |     "relative_path": "plots/plot.png",
 98 |     "presigned_url": "https://bucket.s3.amazonaws.com/...signature...",
 99 | })
100 | ```
101 | 
102 | ### Download an artifact
103 | 
104 | Small artifacts can be fetched directly:
105 | 
106 | ```bash
107 | curl -H "mcp-session-id: <your-session-id>" \
108 |      http://localhost:9000/artifacts/plots/plot.png -o plot.png
109 | ```
110 | 
111 | ---
112 | 
113 | ## Available tools
114 | 
115 | | Tool                | Purpose |
116 | |---------------------|---------------------------------------------------------------|
117 | | `run_code`          | Execute Python in an isolated sandbox with optional pip deps. |
118 | | `list_dir`          | List files/directories inside your session workspace.        |
119 | | `preview_file`      | Return up to 8 KB of a text file for quick inspection.        |
120 | | `persist_artifact`  | Upload an `output/` file to a client-provided presigned URL. |
121 | | `mount_file`        | Download a remote file once per session to `mounts/<path>`. |
122 | 
123 | See the `examples/` directory for end-to-end demos.
124 | 
125 | ## Contributing
126 | Contributions are welcome! Feel free to open issues, suggest features, or submit pull requests to help improve PRIMS.
127 | 
128 | 
129 | If you find this project useful, please consider leaving a ⭐ to show your support.
130 | 


--------------------------------------------------------------------------------
/server/tools/workspace_inspect.py:
--------------------------------------------------------------------------------
  1 | # """Workspace inspection tools for session files."""
  2 | 
  3 | import mimetypes
  4 | import os
  5 | from datetime import datetime
  6 | from pathlib import Path
  7 | from typing import TypedDict
  8 | 
  9 | import aiofiles
 10 | from fastmcp import Context, FastMCP
 11 | 
 12 | from server.config import TMP_DIR
 13 | 
 14 | _MAX_PREVIEW_BYTES = 8 * 1024  # 8 KB
 15 | 
 16 | 
 17 | class DirEntry(TypedDict):
 18 |     name: str
 19 |     path: str
 20 |     type: str  # 'file' | 'directory'
 21 |     size: int
 22 |     modified: str  # ISO timestamp
 23 | 
 24 | 
 25 | class FilePreview(TypedDict):
 26 |     name: str
 27 |     path: str
 28 |     size: int
 29 |     mime: str
 30 |     content: str  # UTF-8 text (truncated)
 31 | 
 32 | 
 33 | def _get_session_root(ctx: Context | None) -> Path:
 34 |     sid: str | None = None
 35 |     if ctx:
 36 |         sid = ctx.session_id
 37 |         if not sid and ctx.request_context.request:
 38 |             sid = ctx.request_context.request.headers.get("mcp-session-id")
 39 |     if not sid:
 40 |         raise ValueError(
 41 |             "Missing session_id; ensure the client includes the mcp-session-id header or uses a session-aware context."
 42 |         )
 43 |     root = TMP_DIR / f"session_{sid}"
 44 |     root.mkdir(parents=True, exist_ok=True)
 45 |     return root.resolve()
 46 | 
 47 | 
 48 | def _resolve_in_session(ctx: Context | None, relative_path: str) -> Path:
 49 |     root = _get_session_root(ctx)
 50 |     # Normalise & forbid traversal
 51 |     rel = Path(os.path.normpath(relative_path)) if relative_path else Path()
 52 |     if rel.is_absolute() or ".." in rel.parts:
 53 |         raise ValueError(
 54 |             "Path must be relative to session root and may not contain '..'."
 55 |         )
 56 |     resolved = (root / rel).resolve()
 57 |     if not str(resolved).startswith(str(root.resolve())):
 58 |         raise ValueError("Path escapes session workspace.")
 59 |     return resolved
 60 | 
 61 | 
 62 | def register(mcp: FastMCP) -> None:
 63 |     """Register workspace inspection tools on the given MCP server."""
 64 | 
 65 |     @mcp.tool(
 66 |         name="list_dir",
 67 |         description=(
 68 |             "List files and directories within the current session workspace. "
 69 |             "Parameter `path` is relative to the session root (default '.') and cannot contain '..'."
 70 |         ),
 71 |     )
 72 |     async def _list_dir(
 73 |         dir_path: str | None = None, ctx: Context | None = None
 74 |     ) -> list[DirEntry]:
 75 |         target = _resolve_in_session(ctx, dir_path or ".")
 76 |         if not target.is_dir():
 77 |             raise ValueError("Specified path is not a directory")
 78 |         entries: list[DirEntry] = []
 79 |         for p in sorted(target.iterdir(), key=lambda p: p.name):
 80 |             stat = p.stat()
 81 |             entries.append(
 82 |                 {
 83 |                     "name": p.name,
 84 |                     "path": str(p.relative_to(_get_session_root(ctx))),
 85 |                     "type": "directory" if p.is_dir() else "file",
 86 |                     "size": stat.st_size,
 87 |                     "modified": datetime.fromtimestamp(stat.st_mtime).isoformat(),
 88 |                 }
 89 |             )
 90 |         return entries
 91 | 
 92 |     @mcp.tool(
 93 |         name="preview_file",
 94 |         description=(
 95 |             "Return up to 8 KB of a text file from the session workspace for quick inspection. "
 96 |             "`relative_path` must point to a file inside the session and not contain '..'."
 97 |         ),
 98 |     )
 99 |     async def _preview_file(
100 |         relative_path: str, ctx: Context | None = None
101 |     ) -> FilePreview:
102 |         file_path = _resolve_in_session(ctx, relative_path)
103 |         if not file_path.is_file():
104 |             raise FileNotFoundError("File not found")
105 |         size = file_path.stat().st_size
106 |         if (
107 |             size > _MAX_PREVIEW_BYTES * 4
108 |         ):  # arbitrary limit 32 KB for previewable text files
109 |             raise ValueError("File too large for preview")
110 |         # Read up to _MAX_PREVIEW_BYTES and decode
111 |         async with aiofiles.open(file_path, "rb") as fh:
112 |             data = await fh.read(_MAX_PREVIEW_BYTES)
113 |         try:
114 |             content = data.decode("utf-8", errors="replace")
115 |         except Exception:
116 |             content = "<binary>"
117 |         mime, _ = mimetypes.guess_type(str(file_path))
118 |         return {
119 |             "name": file_path.name,
120 |             "path": str(file_path.relative_to(_get_session_root(ctx))),
121 |             "size": size,
122 |             "mime": mime or "application/octet-stream",
123 |             "content": content,
124 |         }
125 | 


--------------------------------------------------------------------------------
/tests/unit/test_run_code_tool.py:
--------------------------------------------------------------------------------
  1 | """Unit tests for server.tools.run_code module."""
  2 | 
  3 | from unittest.mock import Mock, patch
  4 | from types import SimpleNamespace
  5 | 
  6 | import pytest
  7 | 
  8 | from server.tools.run_code import RESPONSE_FEEDBACK, register
  9 | 
 10 | 
 11 | class TestRunCodeTool:
 12 |     """Test the run_code MCP tool."""
 13 | 
 14 |     def test_register_function_exists(self) -> None:
 15 |         """Test that register function is properly defined."""
 16 |         assert callable(register)
 17 | 
 18 |     @pytest.mark.asyncio
 19 |     async def test_run_code_tool_success(
 20 |         self,
 21 |         mock_fastmcp: Mock,
 22 |         mock_context: Mock,
 23 |         sample_python_code: str,
 24 |     ) -> None:
 25 |         """Test successful code execution through the MCP tool."""
 26 |         # Mock the sandbox execute function
 27 |         mock_result = {
 28 |             "stdout": "Hello from sandbox!",
 29 |             "stderr": "",
 30 |             "artifacts": [],
 31 |         }
 32 | 
 33 |         with patch(
 34 |             "server.tools.run_code.sandbox_execute", return_value=mock_result
 35 |         ) as mock_execute:
 36 |             # Register the tool
 37 |             register(mock_fastmcp)
 38 | 
 39 |             # Get the registered tool function
 40 |             tool_calls = mock_fastmcp.tool.call_args_list
 41 |             assert len(tool_calls) == 1
 42 | 
 43 |             # Extract the tool function
 44 |             tool_decorator_call = tool_calls[0]
 45 |             tool_kwargs = tool_decorator_call[1]
 46 |             assert tool_kwargs["name"] == "run_code"
 47 |             assert "description" in tool_kwargs
 48 | 
 49 |             # The actual tool function would be called by FastMCP
 50 |             # We'll test it by calling the sandbox_execute function directly
 51 |             result = await mock_execute(
 52 |                 code=sample_python_code,
 53 |                 requirements=[],
 54 |                 files=[],
 55 |                 run_id="test-run",
 56 |                 session_id="test-session",
 57 |             )
 58 | 
 59 |             assert result["stdout"] == "Hello from sandbox!"
 60 |             assert result["stderr"] == ""
 61 | 
 62 |     @pytest.mark.asyncio
 63 |     async def test_run_code_tool_with_requirements(
 64 |         self,
 65 |         mock_fastmcp: Mock,
 66 |         sample_requirements: list[str],
 67 |     ) -> None:
 68 |         """Test code execution with pip requirements."""
 69 |         mock_result = {
 70 |             "stdout": "Package installed successfully",
 71 |             "stderr": "",
 72 |             "artifacts": [],
 73 |         }
 74 | 
 75 |         with patch(
 76 |             "server.tools.run_code.sandbox_execute", return_value=mock_result
 77 |         ) as mock_execute:
 78 |             register(mock_fastmcp)
 79 | 
 80 |             _ = await mock_execute(
 81 |                 code="import numpy; print('numpy imported')",
 82 |                 requirements=sample_requirements,
 83 |                 files=[],
 84 |                 run_id="test-run",
 85 |                 session_id="test-session",
 86 |             )
 87 | 
 88 |             # Verify requirements were passed through
 89 |             mock_execute.assert_called_once()
 90 |             call_args = mock_execute.call_args
 91 |             assert call_args[1]["requirements"] == sample_requirements
 92 | 
 93 |     @pytest.mark.asyncio
 94 |     async def test_run_code_tool_with_files(
 95 |         self,
 96 |         mock_fastmcp: Mock,
 97 |         sample_files: list[dict[str, str]],
 98 |     ) -> None:
 99 |         """Test code execution with file mounting."""
100 |         mock_result = {
101 |             "stdout": "Files mounted successfully",
102 |             "stderr": "",
103 |             "artifacts": [],
104 |         }
105 | 
106 |         with patch(
107 |             "server.tools.run_code.sandbox_execute", return_value=mock_result
108 |         ) as mock_execute:
109 |             register(mock_fastmcp)
110 | 
111 |             _ = await mock_execute(
112 |                 code="print('Files available')",
113 |                 requirements=[],
114 |                 files=sample_files,
115 |                 run_id="test-run",
116 |                 session_id="test-session",
117 |             )
118 | 
119 |             # Verify files were passed through
120 |             call_args = mock_execute.call_args
121 |             assert call_args[1]["files"] == sample_files
122 | 
123 |     # test what happens when code is empty
124 |     # test what happens when requirements is empty when it is needed
125 |     # test what happens when files is empty when it is needed
126 |     # test what happens when files is empty when it is not needed
127 |     # test what happens when files is not empty when it is not needed
128 |     # test what happens when files is not empty when it is needed
129 |     # test what happens when files is not empty when it is not needed
130 |     # test when code is empty
131 |     # test when code is too long


--------------------------------------------------------------------------------
/server/tools/run_code.py:
--------------------------------------------------------------------------------
  1 | """MCP tool: execute Python code in a sandbox."""
  2 | 
  3 | from fastmcp import Context, FastMCP
  4 | 
  5 | from server.sandbox.runner import RunCodeResult
  6 | from server.sandbox.runner import run_code as sandbox_execute
  7 | 
  8 | RESPONSE_FEEDBACK = (
  9 |     "No output detected. Use print() (or log to stderr) to display results. "
 10 |     "For pandas DataFrames, call print(df.head()) instead of just df.head(). "
 11 |     "To see all columns or wider tables, run "
 12 |     "pd.set_option('display.max_columns', None) and "
 13 |     "pd.set_option('display.width', 10000) before printing. "
 14 |     "Ensure your code is a self-contained script (not notebook style) and "
 15 |     "reference mounted files "
 16 |     "with their mount path, e.g. pd.read_csv('mounts/my_data.csv'). "
 17 |     "If an error occurs, double-check these points first."
 18 | )
 19 | 
 20 | 
 21 | def register(mcp: FastMCP) -> None:
 22 |     """Register the `run_code` tool on a FastMCP server instance.
 23 | 
 24 |     Usage (inside server.main):
 25 | 
 26 |         from server.tools import run_code
 27 |         run_code.register(mcp)
 28 |     """
 29 | 
 30 |     @mcp.tool(
 31 |         name="run_code",
 32 |         description=(
 33 |             "Run self-contained Python scripts in an isolated sandbox. "
 34 |             "Send a 'session_id' header to reuse the environment across runs; "
 35 |             "otherwise the sandbox is reset each time. "
 36 |             "Use print() (or log to stderr) to capture output—expressions "
 37 |             "like df.head() alone will not be returned. "
 38 |             "Store any artifacts you want back in the output/ directory; they "
 39 |             "are returned as relative paths and downloadable via "
 40 |             "/artifacts/{relative_path}. "
 41 |             "Mounted files are available at mounts/<mountPath>. "
 42 |             "If stdout is empty or execution fails, a 'feedback' string is "
 43 |             "added to the response with suggestions. "
 44 |             "Tip: when printing large pandas DataFrames, call "
 45 |             "pd.set_option('display.max_columns', None) and "
 46 |             "pd.set_option('display.width', 10000) first. Moreover try to get "
 47 |             "column names separately."
 48 |             "Optional parameters: requirements (list of pip specs) and files "
 49 |             "[{url, mountPath}]. "
 50 |             "Each file is downloaded before execution and made available at "
 51 |             "./mounts/<mountPath>. "
 52 |         ),
 53 |     )
 54 |     async def _run_code(
 55 |         code: str,
 56 |         requirements: list[str] | None = None,
 57 |         files: list[dict[str, str]] | None = None,
 58 |         ctx: Context | None = None,
 59 |     ) -> RunCodeResult:
 60 |         """Tool implementation compatible with FastMCP.
 61 | 
 62 |         If a session_id is provided, the environment and files persist for the
 63 |         session. If not, the sandbox is stateless and files are deleted after
 64 |         each run. Artifacts are returned as relative paths and downloadable via
 65 |         /artifacts/{relative_path}. The session_id is always included in the
 66 |         response if available.
 67 | 
 68 |         If stdout is empty or an error occurs, a feedback array is included in
 69 |         the response with suggestions to use print statements and ensure code
 70 |         is self-contained.
 71 |         """
 72 | 
 73 |         # Default mutable params
 74 |         requirements = requirements or []
 75 |         files = files or []
 76 | 
 77 |         if len(code) > 20_000:
 78 |             raise ValueError("Code block too large (20k char limit)")
 79 | 
 80 |         sid = ctx.session_id  # may be None on Streamable-HTTP
 81 |         if not sid and ctx.request_context.request:
 82 |             # see issue https://github.com/modelcontextprotocol/python-sdk/
 83 |             # issues/1063 for more details
 84 |             sid = ctx.request_context.request.headers.get("mcp-session-id")
 85 | 
 86 |         try:
 87 |             result = await sandbox_execute(
 88 |                 code=code,
 89 |                 requirements=requirements,
 90 |                 files=files,
 91 |                 run_id=(ctx.request_id if ctx else "local"),
 92 |                 session_id=sid,
 93 |             )
 94 |             # Always include session_id in the response if available
 95 |             if sid:
 96 |                 result = dict(result)
 97 |                 result["session_id"] = sid
 98 |             # Add feedback if stdout is empty
 99 |             if not result.get("stdout"):
100 |                 result = dict(result)
101 |                 result["feedback"] = RESPONSE_FEEDBACK
102 |             return result
103 |         except Exception as exc:  # noqa: BLE001
104 |             # FastMCP automatically converts exceptions into ToolError
105 |             # responses.
106 |             feedback = [
107 |                 (
108 |                     "An error occurred. Please ensure your code is "
109 |                     "self-contained, uses print statements for output, and is "
110 |                     "not written in notebook style."
111 |                 )
112 |             ]
113 |             raise type(exc)(str(exc) + f"\nFEEDBACK: {feedback[0]}") from exc
114 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
  1 | """Shared pytest fixtures for PRIMS testing."""
  2 | 
  3 | import shutil
  4 | import tempfile
  5 | from collections.abc import AsyncGenerator, Generator
  6 | from pathlib import Path
  7 | from unittest.mock import AsyncMock, Mock
  8 | 
  9 | import pytest
 10 | from httpx import AsyncClient
 11 | 
 12 | 
 13 | @pytest.fixture
 14 | def temp_dir() -> Generator[Path]:
 15 |     """Create a temporary directory for test isolation."""
 16 |     temp_path = Path(tempfile.mkdtemp())
 17 |     try:
 18 |         yield temp_path
 19 |     finally:
 20 |         shutil.rmtree(temp_path, ignore_errors=True)
 21 | 
 22 | 
 23 | @pytest.fixture
 24 | def mock_tmp_dir(temp_dir: Path, monkeypatch: pytest.MonkeyPatch) -> Path:
 25 |     """Mock the global TMP_DIR with a temporary directory."""
 26 |     monkeypatch.setattr("server.config.TMP_DIR", temp_dir)
 27 |     monkeypatch.setattr("server.sandbox.runner.TMP_DIR", temp_dir)
 28 |     monkeypatch.setattr("server.tools.workspace_inspect.TMP_DIR", temp_dir)
 29 |     return temp_dir
 30 | 
 31 | 
 32 | @pytest.fixture
 33 | def session_id() -> str:
 34 |     """Provide a test session ID."""
 35 |     return "test-session-123"
 36 | 
 37 | 
 38 | @pytest.fixture
 39 | def run_id() -> str:
 40 |     """Provide a test run ID."""
 41 |     return "test-run-456"
 42 | 
 43 | 
 44 | @pytest.fixture
 45 | def sample_python_code() -> str:
 46 |     """Provide sample Python code for testing."""
 47 |     return """
 48 | import pandas as pd
 49 | import os
 50 | 
 51 | # Test basic functionality
 52 | print("Hello from sandbox!")
 53 | print(f"Working directory: {os.getcwd()}")
 54 | 
 55 | # Test file operations
 56 | with open("output/test_output.txt", "w") as f:
 57 |     f.write("Test output file")
 58 | 
 59 | # Test pandas
 60 | df = pd.DataFrame({"col1": [1, 2, 3], "col2": ["a", "b", "c"]})
 61 | print(df.head())
 62 | """
 63 | 
 64 | 
 65 | @pytest.fixture
 66 | def invalid_python_code() -> str:
 67 |     """Provide invalid Python code for testing error handling."""
 68 |     return """
 69 | import non_existent_module
 70 | print("This should fail")
 71 | invalid_syntax here
 72 | """
 73 | 
 74 | 
 75 | @pytest.fixture
 76 | def mock_fastmcp() -> Mock:
 77 |     """Create a mock FastMCP instance for testing."""
 78 |     mock_mcp = Mock()
 79 |     mock_mcp.name = "test-prims"
 80 |     mock_mcp.version = "0.1.0"
 81 |     mock_mcp.tool = Mock()
 82 |     return mock_mcp
 83 | 
 84 | 
 85 | @pytest.fixture
 86 | def mock_context() -> Mock:
 87 |     """Create a mock Context for testing."""
 88 |     context = Mock()
 89 |     context.session_id = "test-session-123"
 90 |     context.request_id = "test-request-456"
 91 |     context.request_context.request = Mock()
 92 |     context.request_context.request.headers = {"mcp-session-id": "test-session-123"}
 93 |     return context
 94 | 
 95 | 
 96 | @pytest.fixture
 97 | async def http_client() -> AsyncGenerator[AsyncClient]:
 98 |     """Create an HTTP client for integration testing."""
 99 |     async with AsyncClient() as client:
100 |         yield client
101 | 
102 | 
103 | @pytest.fixture
104 | def mock_subprocess_success() -> Mock:
105 |     """Mock successful subprocess execution."""
106 |     mock_process = AsyncMock()
107 |     mock_process.communicate = AsyncMock(
108 |         return_value=(b"stdout output", b"stderr output")
109 |     )
110 |     mock_process.returncode = 0
111 |     mock_process.wait = AsyncMock(return_value=None)
112 |     mock_process.kill = Mock(return_value=None)
113 |     return mock_process
114 | 
115 | 
116 | @pytest.fixture
117 | def mock_subprocess_failure() -> Mock:
118 |     """Mock failed subprocess execution."""
119 |     mock_process = AsyncMock()
120 |     mock_process.communicate = AsyncMock(return_value=(b"", b"Error: command failed"))
121 |     mock_process.returncode = 1
122 |     mock_process.wait = AsyncMock(return_value=None)
123 |     mock_process.kill = Mock(return_value=None)
124 |     return mock_process
125 | 
126 | 
127 | @pytest.fixture
128 | def sample_requirements() -> list[str]:
129 |     """Provide sample pip requirements for testing."""
130 |     return ["numpy>=1.20.0", "matplotlib>=3.5.0"]
131 | 
132 | 
133 | @pytest.fixture
134 | def sample_files() -> list[dict[str, str]]:
135 |     """Provide sample file mounting configuration for testing."""
136 |     return [
137 |         {"url": "https://example.com/data.csv", "mountPath": "data/input.csv"},
138 |         {"url": "https://example.com/config.json", "mountPath": "config.json"},
139 |     ]
140 | 
141 | 
142 | @pytest.fixture
143 | def mock_download_success(monkeypatch: pytest.MonkeyPatch) -> None:
144 |     """Mock successful file downloads."""
145 | 
146 |     async def mock_download_files(
147 |         files: list[dict[str, str]], mount_dir: Path
148 |     ) -> list[Path]:
149 |         paths = []
150 |         for file_info in files:
151 |             mount_path = mount_dir / file_info["mountPath"]
152 |             mount_path.parent.mkdir(parents=True, exist_ok=True)
153 |             mount_path.write_text(f"Mock content for {file_info['url']}")
154 |             paths.append(mount_path)
155 |         return paths
156 | 
157 |     monkeypatch.setattr("server.sandbox.runner.download_files", mock_download_files)
158 | 
159 | 
160 | @pytest.fixture
161 | def mock_virtualenv_creation(monkeypatch: pytest.MonkeyPatch, temp_dir: Path) -> Path:
162 |     """Mock virtual environment creation."""
163 |     python_path = temp_dir / "venv" / "bin" / "python"
164 |     python_path.parent.mkdir(parents=True, exist_ok=True)
165 |     python_path.write_text("#!/usr/bin/env python3\n# Mock Python executable")
166 |     python_path.chmod(0o755)
167 | 
168 |     async def mock_create_virtualenv(requirements: list[str], run_dir: Path) -> Path:
169 |         return python_path
170 | 
171 |     monkeypatch.setattr("server.sandbox.env.create_virtualenv", mock_create_virtualenv)
172 |     return python_path
173 | 


--------------------------------------------------------------------------------
/tests/integration/test_mcp_protocol.py:
--------------------------------------------------------------------------------
  1 | """Integration tests for MCP protocol functionality."""
  2 | 
  3 | import asyncio
  4 | from pathlib import Path
  5 | from unittest.mock import patch
  6 | 
  7 | import pytest
  8 | 
  9 | from server.main import mcp
 10 | 
 11 | 
 12 | @pytest.mark.integration
 13 | class TestMCPIntegration:
 14 |     """Test MCP protocol integration."""
 15 | 
 16 |     @pytest.mark.asyncio
 17 |     async def test_mcp_server_startup(self) -> None:
 18 |         """Test that MCP server can start up properly."""
 19 |         # This is a basic integration test - in a real scenario
 20 |         # we would start the actual server
 21 |         assert mcp is not None
 22 |         assert mcp.name == "primcs"
 23 |         # FastMCP doesn't expose version as an attribute, check initialization instead
 24 |         assert hasattr(mcp, "name")
 25 |         assert isinstance(mcp.name, str)
 26 | 
 27 |     @pytest.mark.asyncio
 28 |     async def test_tool_registration(self) -> None:
 29 |         """Test that all tools are properly registered."""
 30 |         # Verify MCP instance has the expected structure
 31 |         # FastMCP uses different internal structure, check for callable methods
 32 |         assert hasattr(mcp, "tool")  # Decorator method exists
 33 |         assert callable(mcp.tool)
 34 | 
 35 |         # Verify the server is properly configured
 36 |         assert mcp.name == "primcs"
 37 | 
 38 |         # In a real test, we would inspect the registered tools
 39 |         # and verify they match our expected tool set
 40 | 
 41 |     @pytest.mark.asyncio
 42 |     @pytest.mark.slow
 43 |     async def test_run_code_integration(
 44 |         self,
 45 |         mock_tmp_dir: Path,
 46 |         mock_virtualenv_creation: Path,
 47 |         mock_download_success: None,
 48 |     ) -> None:
 49 |         """Test full run_code tool integration."""
 50 |         from fastmcp import FastMCP
 51 | 
 52 |         from server.tools.run_code import register
 53 | 
 54 |         # Create a test MCP instance
 55 |         test_mcp = FastMCP(name="test", version="1.0")
 56 |         register(test_mcp)
 57 | 
 58 |         # This would be expanded to test actual tool execution
 59 |         # in a real integration test environment
 60 | 
 61 |         # Mock subprocess for integration test
 62 |         with patch("server.sandbox.runner.asyncio.create_subprocess_exec"):
 63 |             mock_process = asyncio.create_subprocess_exec
 64 |             mock_process.communicate = lambda: (b"Hello World", b"")
 65 |             mock_process.returncode = 0
 66 | 
 67 |             # Test would verify the full flow here
 68 |             pass
 69 | 
 70 |     @pytest.mark.asyncio
 71 |     async def test_artifact_serving_integration(self, mock_tmp_dir: Path) -> None:
 72 |         """Test artifact serving through HTTP endpoint."""
 73 |         # Create test artifact
 74 |         session_id = "test-session"
 75 |         session_dir = mock_tmp_dir / f"session_{session_id}"
 76 |         output_dir = session_dir / "output"
 77 |         output_dir.mkdir(parents=True, exist_ok=True)
 78 | 
 79 |         test_file = output_dir / "test.txt"
 80 |         test_file.write_text("Test artifact content")
 81 | 
 82 |         # This would test the actual HTTP endpoint in a real scenario
 83 |         # For now, we just verify the file structure is correct
 84 |         assert test_file.exists()
 85 |         assert test_file.read_text() == "Test artifact content"
 86 | 
 87 |     @pytest.mark.asyncio
 88 |     async def test_session_persistence(self, mock_tmp_dir: Path) -> None:
 89 |         """Test session-based workspace persistence."""
 90 |         session_id = "persistent-session"
 91 | 
 92 |         # Simulate multiple operations in the same session
 93 |         session_dir = mock_tmp_dir / f"session_{session_id}"
 94 | 
 95 |         # First operation
 96 |         session_dir.mkdir(parents=True, exist_ok=True)
 97 |         (session_dir / "mounts").mkdir(exist_ok=True)
 98 |         (session_dir / "output").mkdir(exist_ok=True)
 99 | 
100 |         # Create some files
101 |         (session_dir / "output" / "result1.txt").write_text("First result")
102 | 
103 |         # Second operation (should see previous files)
104 |         (session_dir / "output" / "result2.txt").write_text("Second result")
105 | 
106 |         # Verify both files exist
107 |         assert (session_dir / "output" / "result1.txt").exists()
108 |         assert (session_dir / "output" / "result2.txt").exists()
109 | 
110 | 
111 | @pytest.mark.integration
112 | @pytest.mark.e2e
113 | class TestEndToEnd:
114 |     """End-to-end integration tests."""
115 | 
116 |     @pytest.mark.asyncio
117 |     async def test_complete_workflow(self) -> None:
118 |         """Test a complete workflow from code submission to artifact retrieval."""
119 |         # This would be a full end-to-end test in a real scenario
120 |         # involving starting the server, making HTTP requests, etc.
121 | 
122 |         test_code = """
123 | import pandas as pd
124 | df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
125 | print(df)
126 | df.to_csv('output/test.csv', index=False)
127 | """
128 | 
129 |         # In a real E2E test, we would:
130 |         # 1. Start the MCP server
131 |         # 2. Submit the code via MCP protocol
132 |         # 3. Verify the output
133 |         # 4. Download the artifact
134 |         # 5. Verify artifact contents
135 | 
136 |         assert len(test_code) > 0  # Placeholder assertion
137 | 
138 |     @pytest.mark.asyncio
139 |     async def test_error_handling_workflow(self) -> None:
140 |         """Test error handling in a complete workflow."""
141 |         invalid_code = """
142 | import non_existent_module
143 | print("This will fail")
144 | """
145 | 
146 |         # Test that errors are properly propagated and handled
147 |         assert len(invalid_code) > 0  # Placeholder assertion
148 | 
149 |     @pytest.mark.asyncio
150 |     async def test_file_mounting_workflow(self) -> None:
151 |         """Test the complete file mounting workflow."""
152 |         # Test mounting files and using them in code execution
153 |         test_files = [{"url": "https://httpbin.org/json", "mountPath": "data.json"}]
154 | 
155 |         test_code = """
156 | import json
157 | with open('mounts/data.json', 'r') as f:
158 |     data = json.load(f)
159 | print(f"Loaded data: {data}")
160 | """
161 | 
162 |         # In a real test, this would verify the complete mounting workflow
163 |         assert len(test_files) > 0
164 |         assert len(test_code) > 0
165 | 


--------------------------------------------------------------------------------
/tests/unit/test_sandbox_env.py:
--------------------------------------------------------------------------------
  1 | """Unit tests for server.sandbox.env module."""
  2 | 
  3 | import sys
  4 | from pathlib import Path
  5 | from unittest.mock import AsyncMock, Mock, patch
  6 | 
  7 | import pytest
  8 | 
  9 | from server.sandbox.env import _DEFAULT_PACKAGES, create_virtualenv
 10 | 
 11 | 
 12 | class TestCreateVirtualenv:
 13 |     """Test virtual environment creation."""
 14 | 
 15 |     @pytest.mark.asyncio
 16 |     async def test_create_virtualenv_success(self, temp_dir: Path) -> None:
 17 |         """Test successful virtual environment creation."""
 18 |         requirements = ["numpy", "pandas"]
 19 | 
 20 |         with (
 21 |             patch("server.sandbox.env.venv") as mock_venv,
 22 |             patch(
 23 |                 "server.sandbox.env.asyncio.create_subprocess_exec"
 24 |             ) as mock_subprocess,
 25 |         ):
 26 |             # Mock venv creation
 27 |             mock_builder = Mock()
 28 |             mock_venv.EnvBuilder.return_value = mock_builder
 29 | 
 30 |             # Mock subprocess for pip install
 31 |             mock_process = AsyncMock()
 32 |             mock_process.communicate = AsyncMock(return_value=(b"", b""))
 33 |             mock_process.returncode = 0
 34 |             mock_subprocess.return_value = mock_process
 35 | 
 36 |             # Call function
 37 |             python_path = await create_virtualenv(requirements, temp_dir)
 38 | 
 39 |             # Verify venv creation
 40 |             mock_venv.EnvBuilder.assert_called_once_with(with_pip=True, clear=True)
 41 |             mock_builder.create.assert_called_once()
 42 | 
 43 |             # Verify pip install call
 44 |             mock_subprocess.assert_called_once()
 45 |             args = mock_subprocess.call_args[0]
 46 | 
 47 |             # Check that python executable path is correct
 48 |             expected_python = (
 49 |                 temp_dir
 50 |                 / "venv"
 51 |                 / ("Scripts" if sys.platform.startswith("win") else "bin")
 52 |                 / "python"
 53 |             )
 54 |             assert Path(args[0]) == expected_python
 55 |             assert args[1:4] == ("-m", "pip", "install")
 56 |             assert "--no-cache-dir" in args
 57 | 
 58 |             # Check that requirements include both custom and default packages
 59 |             install_args = args[4:]  # Skip python, -m, pip, install
 60 |             install_args = [arg for arg in install_args if arg != "--no-cache-dir"]
 61 | 
 62 |             expected_packages = list(dict.fromkeys(requirements + _DEFAULT_PACKAGES))
 63 |             for package in expected_packages:
 64 |                 assert package in install_args
 65 | 
 66 |             # Check return value
 67 |             assert python_path == expected_python
 68 | 
 69 |     @pytest.mark.asyncio
 70 |     async def test_create_virtualenv_pip_failure(self, temp_dir: Path) -> None:
 71 |         """Test virtual environment creation with pip install failure."""
 72 |         requirements = ["invalid-package"]
 73 | 
 74 |         with (
 75 |             patch("server.sandbox.env.venv") as mock_venv,
 76 |             patch(
 77 |                 "server.sandbox.env.asyncio.create_subprocess_exec"
 78 |             ) as mock_subprocess,
 79 |         ):
 80 |             # Mock venv creation
 81 |             mock_builder = Mock()
 82 |             mock_venv.EnvBuilder.return_value = mock_builder
 83 | 
 84 |             # Mock subprocess for pip install failure
 85 |             mock_process = AsyncMock()
 86 |             mock_process.communicate = AsyncMock(
 87 |                 return_value=(b"", b"ERROR: Could not find package")
 88 |             )
 89 |             mock_process.returncode = 1
 90 |             mock_subprocess.return_value = mock_process
 91 | 
 92 |             # Should raise RuntimeError
 93 |             with pytest.raises(RuntimeError, match="pip install failed"):
 94 |                 _ = await create_virtualenv(requirements, temp_dir)
 95 | 
 96 |     @pytest.mark.asyncio
 97 |     async def test_create_virtualenv_no_requirements(self, temp_dir: Path) -> None:
 98 |         """Test virtual environment creation with no additional requirements."""
 99 |         requirements: list[str] = []
100 | 
101 |         with (
102 |             patch("server.sandbox.env.venv") as mock_venv,
103 |             patch(
104 |                 "server.sandbox.env.asyncio.create_subprocess_exec"
105 |             ) as mock_subprocess,
106 |         ):
107 |             # Mock venv creation
108 |             mock_builder = Mock()
109 |             mock_venv.EnvBuilder.return_value = mock_builder
110 | 
111 |             # Mock subprocess for pip install
112 |             mock_process = AsyncMock()
113 |             mock_process.communicate = AsyncMock(return_value=(b"", b""))
114 |             mock_process.returncode = 0
115 |             mock_subprocess.return_value = mock_process
116 | 
117 |             # Call function
118 |             _ = await create_virtualenv(requirements, temp_dir)
119 | 
120 |             # Should still install default packages
121 |             mock_subprocess.assert_called_once()
122 |             args = mock_subprocess.call_args[0]
123 |             install_args = args[4:]  # Skip python, -m, pip, install
124 |             install_args = [arg for arg in install_args if arg != "--no-cache-dir"]
125 | 
126 |             for package in _DEFAULT_PACKAGES:
127 |                 assert package in install_args
128 | 
129 |     @pytest.mark.asyncio
130 |     async def test_create_virtualenv_duplicate_requirements(
131 |         self, temp_dir: Path
132 |     ) -> None:
133 |         """Test that duplicate requirements are deduplicated."""
134 |         requirements = [
135 |             "pandas",
136 |             "numpy",
137 |             "pandas",
138 |         ]  # pandas is duplicated and also in defaults
139 | 
140 |         with (
141 |             patch("server.sandbox.env.venv") as mock_venv,
142 |             patch(
143 |                 "server.sandbox.env.asyncio.create_subprocess_exec"
144 |             ) as mock_subprocess,
145 |         ):
146 |             # Mock venv creation
147 |             mock_builder = Mock()
148 |             mock_venv.EnvBuilder.return_value = mock_builder
149 | 
150 |             # Mock subprocess for pip install
151 |             mock_process = AsyncMock()
152 |             mock_process.communicate = AsyncMock(return_value=(b"", b""))
153 |             mock_process.returncode = 0
154 |             mock_subprocess.return_value = mock_process
155 | 
156 |             # Call function
157 |             _ = await create_virtualenv(requirements, temp_dir)
158 | 
159 |             # Check that duplicates are removed
160 |             args = mock_subprocess.call_args[0]
161 |             install_args = args[4:]  # Skip python, -m, pip, install
162 |             install_args = [arg for arg in install_args if arg != "--no-cache-dir"]
163 | 
164 |             # pandas should appear only once
165 |             pandas_count = install_args.count("pandas")
166 |             assert pandas_count == 1
167 | 
168 |     def test_default_packages_constant(self) -> None:
169 |         """Test that default packages are properly defined."""
170 |         assert isinstance(_DEFAULT_PACKAGES, list)
171 |         assert len(_DEFAULT_PACKAGES) > 0
172 |         assert "pandas" in _DEFAULT_PACKAGES
173 |         assert "openpyxl" in _DEFAULT_PACKAGES
174 |         assert "requests" in _DEFAULT_PACKAGES
175 | 
176 |     @pytest.mark.asyncio
177 |     async def test_create_virtualenv_windows_path(self, temp_dir: Path) -> None:
178 |         """Test that Windows-style paths are handled correctly."""
179 |         requirements = ["numpy"]
180 | 
181 |         with (
182 |             patch("server.sandbox.env.venv") as mock_venv,
183 |             patch(
184 |                 "server.sandbox.env.asyncio.create_subprocess_exec"
185 |             ) as mock_subprocess,
186 |             patch("server.sandbox.env.sys.platform", "win32"),
187 |         ):
188 |             # Mock venv creation
189 |             mock_builder = Mock()
190 |             mock_venv.EnvBuilder.return_value = mock_builder
191 | 
192 |             # Mock subprocess for pip install
193 |             mock_process = AsyncMock()
194 |             mock_process.communicate = AsyncMock(return_value=(b"", b""))
195 |             mock_process.returncode = 0
196 |             mock_subprocess.return_value = mock_process
197 | 
198 |             # Call function
199 |             python_path = await create_virtualenv(requirements, temp_dir)
200 | 
201 |             # Check that Windows path is used
202 |             expected_python = temp_dir / "venv" / "Scripts" / "python"
203 |             assert python_path == expected_python
204 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
  1 | name: CI/CD Pipeline
  2 | 
  3 | on:
  4 |   push:
  5 |     branches: [ main, develop ]
  6 |   pull_request:
  7 |     branches: [ main, develop ]
  8 |   schedule:
  9 |     # Run tests daily at 2 AM UTC
 10 |     - cron: '0 2 * * *'
 11 | 
 12 | env:
 13 |   PYTHON_DEFAULT_VERSION: "3.13"
 14 | 
 15 | jobs:
 16 |   test:
 17 |     name: Test Suite
 18 |     runs-on: ubuntu-latest
 19 |     strategy:
 20 |       matrix:
 21 |         python-version: ["3.12", "3.13"]
 22 |         os: [ubuntu-latest, windows-latest, macos-latest]
 23 |       fail-fast: false
 24 | 
 25 |     steps:
 26 |     - name: Checkout code
 27 |       uses: actions/checkout@v4
 28 | 
 29 |     - name: Set up Python ${{ matrix.python-version }}
 30 |       uses: actions/setup-python@v4
 31 |       with:
 32 |         python-version: ${{ matrix.python-version }}
 33 |         cache: 'pip'
 34 | 
 35 |     - name: Install system dependencies (Ubuntu)
 36 |       if: matrix.os == 'ubuntu-latest'
 37 |       run: |
 38 |         sudo apt-get update
 39 |         sudo apt-get install -y build-essential
 40 | 
 41 |     - name: Install dependencies
 42 |       run: |
 43 |         python -m pip install --upgrade pip setuptools wheel
 44 |         pip install -r requirements.txt
 45 |         pip install -r requirements-dev.txt
 46 | 
 47 |     - name: Run unit tests
 48 |       run: |
 49 |         python -m pytest tests/unit -v --tb=short --cov=server --cov-report=xml --cov-report=term-missing
 50 |       env:
 51 |         PYTHONPATH: ${{ github.workspace }}
 52 | 
 53 |     - name: Run integration tests
 54 |       run: |
 55 |         python -m pytest tests/integration -v --tb=short -m "not slow"
 56 |       env:
 57 |         PYTHONPATH: ${{ github.workspace }}
 58 | 
 59 |     - name: Upload coverage to Codecov
 60 |       if: matrix.python-version == env.PYTHON_DEFAULT_VERSION && matrix.os == 'ubuntu-latest'
 61 |       uses: codecov/codecov-action@v3
 62 |       with:
 63 |         file: ./coverage.xml
 64 |         flags: unittests
 65 |         name: codecov-umbrella
 66 |         fail_ci_if_error: false
 67 | 
 68 |   lint:
 69 |     if: false
 70 |     name: Code Quality
 71 |     runs-on: ubuntu-latest
 72 | 
 73 |     steps:
 74 |     - name: Checkout code
 75 |       uses: actions/checkout@v4
 76 | 
 77 |     - name: Set up Python
 78 |       uses: actions/setup-python@v4
 79 |       with:
 80 |         python-version: ${{ env.PYTHON_DEFAULT_VERSION }}
 81 |         cache: 'pip'
 82 | 
 83 |     - name: Install dependencies
 84 |       run: |
 85 |         python -m pip install --upgrade pip
 86 |         pip install -r requirements.txt
 87 |         pip install -r requirements-dev.txt
 88 | 
 89 |     - name: Run ruff linter
 90 |       run: |
 91 |         python -m ruff check . --output-format=github
 92 | 
 93 |     - name: Run ruff formatter check
 94 |       run: |
 95 |         python -m ruff format --check .
 96 | 
 97 |     - name: Run black formatter check
 98 |       run: |
 99 |         python -m black --check --diff .
100 | 
101 |     - name: Run isort import sorting check
102 |       run: |
103 |         python -m isort --check-only --diff .
104 | 
105 |     - name: Run mypy type checking
106 |       run: |
107 |         python -m mypy server/
108 |       continue-on-error: true  # Type checking failures shouldn't block CI
109 | 
110 |   security:
111 |     name: Security Scan
112 |     runs-on: ubuntu-latest
113 | 
114 |     steps:
115 |     - name: Checkout code
116 |       uses: actions/checkout@v4
117 | 
118 |     - name: Set up Python
119 |       uses: actions/setup-python@v4
120 |       with:
121 |         python-version: ${{ env.PYTHON_DEFAULT_VERSION }}
122 |         cache: 'pip'
123 | 
124 |     - name: Install dependencies
125 |       run: |
126 |         python -m pip install --upgrade pip
127 |         pip install -r requirements.txt
128 |         pip install -r requirements-dev.txt
129 | 
130 |     - name: Run bandit security linter
131 |       run: |
132 |         python -m bandit -r server/ -f json -o bandit-report.json
133 |       continue-on-error: true
134 | 
135 |     - name: Upload bandit report
136 |       uses: actions/upload-artifact@v4
137 |       if: always()
138 |       with:
139 |         name: bandit-report
140 |         path: bandit-report.json
141 | 
142 |     - name: Run safety check
143 |       run: |
144 |         python -m safety check --json --output safety-report.json
145 |       continue-on-error: true
146 | 
147 |     - name: Upload safety report
148 |       uses: actions/upload-artifact@v4
149 |       if: always()
150 |       with:
151 |         name: safety-report
152 |         path: safety-report.json
153 | 
154 |   docker:
155 |     name: Docker Build Test
156 |     runs-on: ubuntu-latest
157 | 
158 |     steps:
159 |     - name: Checkout code
160 |       uses: actions/checkout@v4
161 | 
162 |     - name: Set up Docker Buildx
163 |       uses: docker/setup-buildx-action@v3
164 | 
165 |     - name: Build Docker image
166 |       run: |
167 |         docker build -t prims:test .
168 | 
169 |     - name: Test Docker image
170 |       run: |
171 |         # Start container in background
172 |         docker run -d --name prims-test -p 9000:9000 prims:test
173 |         
174 |         # Wait for container to start
175 |         sleep 10
176 |         
177 |         # Test health endpoint (if available)
178 |         curl -f http://localhost:9000/health || echo "Health endpoint not available yet"
179 |         
180 |         # Clean up
181 |         docker stop prims-test
182 |         docker rm prims-test
183 | 
184 |   integration:
185 |     name: Integration Tests
186 |     runs-on: ubuntu-latest
187 |     needs: [test, lint]
188 | 
189 |     steps:
190 |     - name: Checkout code
191 |       uses: actions/checkout@v4
192 | 
193 |     - name: Set up Python
194 |       uses: actions/setup-python@v4
195 |       with:
196 |         python-version: ${{ env.PYTHON_DEFAULT_VERSION }}
197 |         cache: 'pip'
198 | 
199 |     - name: Install dependencies
200 |       run: |
201 |         python -m pip install --upgrade pip
202 |         pip install -r requirements.txt
203 |         pip install -r requirements-dev.txt
204 | 
205 |     - name: Run integration tests
206 |       run: |
207 |         python -m pytest tests/integration -v --tb=short
208 |       env:
209 |         PYTHONPATH: ${{ github.workspace }}
210 | 
211 |     - name: Run end-to-end tests
212 |       run: |
213 |         # Start the server in background
214 |         python -m server.main &
215 |         SERVER_PID=$!
216 |         
217 |         # Wait for server to start
218 |         sleep 5
219 |         
220 |         # Run E2E tests against running server
221 |         python -m pytest tests/integration -v -m "e2e" || true
222 |         
223 |         # Clean up
224 |         kill $SERVER_PID || true
225 |       env:
226 |         PYTHONPATH: ${{ github.workspace }}
227 |         PORT: 9001
228 | 
229 |   build-and-publish:
230 |     name: Build and Publish
231 |     runs-on: ubuntu-latest
232 |     needs: [test, lint, security, docker, integration]
233 |     if: github.event_name == 'push' && github.ref == 'refs/heads/main'
234 | 
235 |     steps:
236 |     - name: Checkout code
237 |       uses: actions/checkout@v4
238 | 
239 |     - name: Set up Python
240 |       uses: actions/setup-python@v4
241 |       with:
242 |         python-version: ${{ env.PYTHON_DEFAULT_VERSION }}
243 | 
244 |     - name: Install build dependencies
245 |       run: |
246 |         python -m pip install --upgrade pip build twine
247 | 
248 |     - name: Build package
249 |       run: |
250 |         python -m build
251 | 
252 |     - name: Upload build artifacts
253 |       uses: actions/upload-artifact@v4
254 |       with:
255 |         name: dist
256 |         path: dist/
257 | 
258 |     # Uncomment when ready to publish to PyPI
259 |     # - name: Publish to PyPI
260 |     #   if: startsWith(github.ref, 'refs/tags/v')
261 |     #   env:
262 |     #     TWINE_USERNAME: __token__
263 |     #     TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
264 |     #   run: |
265 |     #     python -m twine upload dist/*
266 | 
267 |   dependency-review:
268 |     name: Dependency Review
269 |     runs-on: ubuntu-latest
270 |     if: github.event_name == 'pull_request'
271 | 
272 |     steps:
273 |     - name: Checkout code
274 |       uses: actions/checkout@v4
275 | 
276 |     - name: Dependency Review
277 |       uses: actions/dependency-review-action@v3
278 |       with:
279 |         fail-on-severity: moderate
280 | 
281 |   performance:
282 |     name: Performance Tests
283 |     runs-on: ubuntu-latest
284 |     needs: [test]
285 |     if: github.event_name == 'push' && github.ref == 'refs/heads/main'
286 | 
287 |     steps:
288 |     - name: Checkout code
289 |       uses: actions/checkout@v4
290 | 
291 |     - name: Set up Python
292 |       uses: actions/setup-python@v4
293 |       with:
294 |         python-version: ${{ env.PYTHON_DEFAULT_VERSION }}
295 |         cache: 'pip'
296 | 
297 |     - name: Install dependencies
298 |       run: |
299 |         python -m pip install --upgrade pip
300 |         pip install -r requirements.txt
301 |         pip install -r requirements-dev.txt
302 | 
303 |     - name: Run performance tests
304 |       run: |
305 |         python -m pytest tests/performance -v --tb=short || echo "Performance tests not implemented yet"
306 |       env:
307 |         PYTHONPATH: ${{ github.workspace }}
308 | 
309 |   notify:
310 |     name: Notify Results
311 |     runs-on: ubuntu-latest
312 |     needs: [test, lint, security, docker, integration]
313 |     if: always()
314 | 
315 |     steps:
316 |     - name: Determine status
317 |       id: status
318 |       run: |
319 |         if [[ "${{ needs.test.result }}" == "success" && "${{ needs.lint.result }}" == "success" ]]; then
320 |           echo "status=success" >> $GITHUB_OUTPUT
321 |         else
322 |           echo "status=failure" >> $GITHUB_OUTPUT
323 |         fi
324 | 
325 |     - name: Create summary
326 |       run: |
327 |         echo "## CI/CD Pipeline Results" >> $GITHUB_STEP_SUMMARY
328 |         echo "" >> $GITHUB_STEP_SUMMARY
329 |         echo "| Job | Status |" >> $GITHUB_STEP_SUMMARY
330 |         echo "|-----|--------|" >> $GITHUB_STEP_SUMMARY
331 |         echo "| Test Suite | ${{ needs.test.result }} |" >> $GITHUB_STEP_SUMMARY
332 |         echo "| Code Quality | ${{ needs.lint.result }} |" >> $GITHUB_STEP_SUMMARY
333 |         echo "| Security Scan | ${{ needs.security.result }} |" >> $GITHUB_STEP_SUMMARY
334 |         echo "| Docker Build | ${{ needs.docker.result }} |" >> $GITHUB_STEP_SUMMARY
335 |         echo "| Integration Tests | ${{ needs.integration.result }} |" >> $GITHUB_STEP_SUMMARY
336 | 


--------------------------------------------------------------------------------
/tests/unit/test_sandbox_runner.py:
--------------------------------------------------------------------------------
  1 | """Unit tests for server.sandbox.runner module."""
  2 | 
  3 | from pathlib import Path
  4 | from unittest.mock import AsyncMock, Mock, patch
  5 | 
  6 | import pytest
  7 | 
  8 | from server.sandbox.runner import ArtifactMeta, RunCodeResult, run_code
  9 | 
 10 | 
 11 | class TestRunCode:
 12 |     """Test code execution functionality."""
 13 | 
 14 |     @pytest.mark.asyncio
 15 |     async def test_run_code_success_with_session(
 16 |         self,
 17 |         mock_tmp_dir: Path,
 18 |         session_id: str,
 19 |         run_id: str,
 20 |         sample_python_code: str,
 21 |         mock_download_success: None,
 22 |         mock_virtualenv_creation: Path,
 23 |     ) -> None:
 24 |         """Test successful code execution with session persistence."""
 25 |         requirements = ["numpy"]
 26 |         files = [{"url": "https://example.com/data.csv", "mountPath": "data.csv"}]
 27 | 
 28 |         with patch(
 29 |             "server.sandbox.runner.asyncio.create_subprocess_exec"
 30 |         ) as mock_subprocess:
 31 |             # Mock subprocess execution
 32 |             mock_process = AsyncMock()
 33 |             mock_process.communicate = AsyncMock(
 34 |                 return_value=(
 35 |                     b"Hello from sandbox!\nWorking directory: /tmp/session\n",
 36 |                     b"Warning: some warning\n",
 37 |                 )
 38 |             )
 39 |             mock_process.returncode = 0
 40 |             mock_subprocess.return_value = mock_process
 41 | 
 42 |             # Create expected output file
 43 |             session_dir = mock_tmp_dir / f"session_{session_id}"
 44 |             output_dir = session_dir / "output"
 45 |             output_dir.mkdir(parents=True, exist_ok=True)
 46 |             (output_dir / "test_output.txt").write_text("Test output file")
 47 | 
 48 |             # Call function
 49 |             result = await run_code(
 50 |                 code=sample_python_code,
 51 |                 requirements=requirements,
 52 |                 files=files,
 53 |                 run_id=run_id,
 54 |                 session_id=session_id,
 55 |             )
 56 | 
 57 |             # Verify result structure
 58 |             assert isinstance(result, dict)
 59 |             assert "stdout" in result
 60 |             assert "stderr" in result
 61 |             assert "artifacts" in result
 62 | 
 63 |             # Verify output
 64 |             assert "Hello from sandbox!" in result["stdout"]
 65 |             assert "Warning: some warning" in result["stderr"]
 66 | 
 67 |             # Verify artifacts
 68 |             artifacts = result["artifacts"]
 69 |             assert len(artifacts) == 1
 70 |             artifact = artifacts[0]
 71 |             assert artifact["name"] == "test_output.txt"
 72 |             assert artifact["relative_path"] == "test_output.txt"
 73 |             assert artifact["size"] > 0
 74 |             assert "text" in artifact["mime"]
 75 | 
 76 |     @pytest.mark.asyncio
 77 |     async def test_run_code_success_without_session(
 78 |         self,
 79 |         mock_tmp_dir: Path,
 80 |         run_id: str,
 81 |         sample_python_code: str,
 82 |         mock_download_success: None,
 83 |         mock_virtualenv_creation: Path,
 84 |     ) -> None:
 85 |         """Test successful code execution without session (stateless)."""
 86 |         requirements: list[str] = []
 87 |         files: list[dict[str, str]] = []
 88 | 
 89 |         with patch(
 90 |             "server.sandbox.runner.asyncio.create_subprocess_exec"
 91 |         ) as mock_subprocess:
 92 |             # Mock subprocess execution
 93 |             mock_process = AsyncMock()
 94 |             mock_process.communicate = AsyncMock(
 95 |                 return_value=(b"Output without session", b"")
 96 |             )
 97 |             mock_process.returncode = 0
 98 |             mock_subprocess.return_value = mock_process
 99 | 
100 |             # Call function without session_id
101 |             result = await run_code(
102 |                 code=sample_python_code,
103 |                 requirements=requirements,
104 |                 files=files,
105 |                 run_id=run_id,
106 |                 session_id=None,
107 |             )
108 | 
109 |             # Verify result
110 |             assert result["stdout"] == "Output without session"
111 |             assert result["stderr"] == ""
112 |             assert result["artifacts"] == []
113 | 
114 |     @pytest.mark.asyncio
115 |     async def test_run_code_timeout(
116 |         self,
117 |         mock_tmp_dir: Path,
118 |         run_id: str,
119 |         sample_python_code: str,
120 |         mock_download_success: None,
121 |         mock_virtualenv_creation: Path,
122 |     ) -> None:
123 |         """Test code execution timeout handling."""
124 |         with (
125 |             patch(
126 |                 "server.sandbox.runner.asyncio.create_subprocess_exec"
127 |             ) as mock_subprocess,
128 |             patch("server.sandbox.runner.asyncio.wait_for") as mock_wait_for,
129 |             patch("server.sandbox.runner.create_virtualenv") as mock_create_venv,
130 |         ):
131 |             # Mock virtualenv creation to return the mocked python path
132 |             mock_create_venv.return_value = mock_virtualenv_creation
133 | 
134 |             # Mock subprocess
135 |             mock_process = AsyncMock()
136 |             mock_process.kill = Mock(return_value=None)
137 |             mock_process.wait = AsyncMock(return_value=None)
138 |             mock_subprocess.return_value = mock_process
139 | 
140 |             # Mock timeout on the wait_for call
141 |             mock_wait_for.side_effect = TimeoutError()
142 | 
143 |             # Should raise RuntimeError
144 |             with pytest.raises(RuntimeError, match="Execution timed out"):
145 |                 await run_code(
146 |                     code=sample_python_code,
147 |                     requirements=[],
148 |                     files=[],
149 |                     run_id=run_id,
150 |                     session_id=None,
151 |                 )
152 | 
153 |             # Verify process was killed
154 |             mock_process.kill.assert_called_once()
155 |             mock_process.wait.assert_called_once()
156 | 
157 |     @pytest.mark.asyncio
158 |     async def test_run_code_with_artifacts(
159 |         self,
160 |         mock_tmp_dir: Path,
161 |         session_id: str,
162 |         run_id: str,
163 |         mock_download_success: None,
164 |         mock_virtualenv_creation: Path,
165 |     ) -> None:
166 |         """Test code execution with multiple artifacts."""
167 |         code = "print('Creating artifacts')"
168 | 
169 |         with patch(
170 |             "server.sandbox.runner.asyncio.create_subprocess_exec"
171 |         ) as mock_subprocess:
172 |             # Mock subprocess execution
173 |             mock_process = AsyncMock()
174 |             mock_process.communicate = AsyncMock(
175 |                 return_value=(b"Creating artifacts", b"")
176 |             )
177 |             mock_process.returncode = 0
178 |             mock_subprocess.return_value = mock_process
179 | 
180 |             # Create multiple output files
181 |             session_dir = mock_tmp_dir / f"session_{session_id}"
182 |             output_dir = session_dir / "output"
183 |             output_dir.mkdir(parents=True, exist_ok=True)
184 | 
185 |             # Create various file types
186 |             (output_dir / "data.csv").write_text("col1,col2\n1,2\n3,4")
187 |             (output_dir / "plot.png").write_bytes(
188 |                 b"\x89PNG\r\n\x1a\n"
189 |             )  # Fake PNG header
190 |             (output_dir / "subdir").mkdir()
191 |             (output_dir / "subdir" / "nested.txt").write_text("nested file")
192 | 
193 |             # Call function
194 |             result = await run_code(
195 |                 code=code,
196 |                 requirements=[],
197 |                 files=[],
198 |                 run_id=run_id,
199 |                 session_id=session_id,
200 |             )
201 | 
202 |             # Verify artifacts
203 |             artifacts = result["artifacts"]
204 |             assert len(artifacts) == 3
205 | 
206 |             # Check artifact details
207 |             artifact_names = {a["name"] for a in artifacts}
208 |             assert "data.csv" in artifact_names
209 |             assert "plot.png" in artifact_names
210 |             assert "nested.txt" in artifact_names
211 | 
212 |             # Check MIME types
213 |             csv_artifact = next(a for a in artifacts if a["name"] == "data.csv")
214 |             assert csv_artifact["mime"] == "text/csv"
215 | 
216 |             png_artifact = next(a for a in artifacts if a["name"] == "plot.png")
217 |             assert png_artifact["mime"] == "image/png"
218 | 
219 |     @pytest.mark.asyncio
220 |     async def test_run_code_script_naming(
221 |         self,
222 |         mock_tmp_dir: Path,
223 |         session_id: str,
224 |         run_id: str,
225 |         mock_download_success: None,
226 |         mock_virtualenv_creation: Path,
227 |     ) -> None:
228 |         """Test that script naming varies based on session presence."""
229 |         code = "print('test')"
230 | 
231 |         with patch(
232 |             "server.sandbox.runner.asyncio.create_subprocess_exec"
233 |         ) as mock_subprocess:
234 |             mock_process = AsyncMock()
235 |             mock_process.communicate = AsyncMock(return_value=(b"test", b""))
236 |             mock_process.returncode = 0
237 |             mock_subprocess.return_value = mock_process
238 | 
239 |             # Test with session (should use run_id in script name)
240 |             await run_code(
241 |                 code=code,
242 |                 requirements=[],
243 |                 files=[],
244 |                 run_id=run_id,
245 |                 session_id=session_id,
246 |             )
247 | 
248 |             # Check script name includes run_id
249 |             session_dir = mock_tmp_dir / f"session_{session_id}"
250 |             expected_script = session_dir / f"script_{run_id}.py"
251 |             assert expected_script.exists()
252 | 
253 |             # Test without session (should use generic script name)
254 |             await run_code(
255 |                 code=code,
256 |                 requirements=[],
257 |                 files=[],
258 |                 run_id=run_id,
259 |                 session_id=None,
260 |             )
261 | 
262 |             # Check generic script name
263 |             run_dir = mock_tmp_dir / f"run_{run_id}"
264 |             expected_script = run_dir / "script.py"
265 |             assert expected_script.exists()
266 | 
267 |     @pytest.mark.asyncio
268 |     async def test_run_code_directory_creation(
269 |         self,
270 |         mock_tmp_dir: Path,
271 |         session_id: str,
272 |         run_id: str,
273 |         mock_download_success: None,
274 |         mock_virtualenv_creation: Path,
275 |     ) -> None:
276 |         """Test that required directories are created."""
277 |         code = "print('test')"
278 | 
279 |         with patch(
280 |             "server.sandbox.runner.asyncio.create_subprocess_exec"
281 |         ) as mock_subprocess:
282 |             mock_process = AsyncMock()
283 |             mock_process.communicate = AsyncMock(return_value=(b"test", b""))
284 |             mock_process.returncode = 0
285 |             mock_subprocess.return_value = mock_process
286 | 
287 |             # Call with session
288 |             await run_code(
289 |                 code=code,
290 |                 requirements=[],
291 |                 files=[],
292 |                 run_id=run_id,
293 |                 session_id=session_id,
294 |             )
295 | 
296 |             # Verify directories exist
297 |             session_dir = mock_tmp_dir / f"session_{session_id}"
298 |             assert session_dir.exists()
299 |             assert (session_dir / "mounts").exists()
300 |             assert (session_dir / "output").exists()
301 | 
302 |     def test_artifact_meta_type(self) -> None:
303 |         """Test ArtifactMeta type definition."""
304 |         artifact: ArtifactMeta = {
305 |             "name": "test.txt",
306 |             "relative_path": "test.txt",
307 |             "size": 100,
308 |             "mime": "text/plain",
309 |         }
310 | 
311 |         assert artifact["name"] == "test.txt"
312 |         assert artifact["relative_path"] == "test.txt"
313 |         assert artifact["size"] == 100
314 |         assert artifact["mime"] == "text/plain"
315 | 
316 |     def test_run_code_result_type(self) -> None:
317 |         """Test RunCodeResult type definition."""
318 |         result: RunCodeResult = {
319 |             "stdout": "test output",
320 |             "stderr": "test error",
321 |             "artifacts": [],
322 |         }
323 | 
324 |         assert result["stdout"] == "test output"
325 |         assert result["stderr"] == "test error"
326 |         assert result["artifacts"] == []
327 | 
328 |         # Test with feedback
329 |         result_with_feedback: RunCodeResult = {
330 |             "stdout": "",
331 |             "stderr": "",
332 |             "artifacts": [],
333 |             "feedback": "No output detected",
334 |         }
335 | 
336 |         assert result_with_feedback["feedback"] == "No output detected"
337 | 


--------------------------------------------------------------------------------