├── src ├── codex │ ├── py.typed │ ├── vendor │ │ └── README.md │ ├── types.py │ ├── client.py │ ├── discovery.py │ ├── exceptions.py │ ├── schema.py │ ├── config.py │ ├── __init__.py │ ├── events.py │ ├── thread.py │ ├── exec.py │ └── items.py ├── codex_dspy │ ├── py.typed │ ├── __init__.py │ ├── agent.py │ └── adapter.py └── tests │ ├── property │ ├── __init__.py │ └── test_adapter_props.py │ ├── __init__.py │ └── unit │ ├── __init__.py │ ├── test_discovery.py │ └── test_codex_parsing.py ├── .python-version ├── .claude └── settings.local.json ├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE ├── examples ├── main.py └── basic_usage.py ├── flake.nix ├── scripts └── check_test_colocation.py ├── pyproject.toml ├── flake.lock ├── docs ├── CODEX_QUICK_REFERENCE.md ├── CODEX_ARCHITECTURE.md └── CODEX_SDK_API_SURFACE.md └── README.md /src/codex/py.typed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/codex_dspy/py.typed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 3.13 2 | -------------------------------------------------------------------------------- /src/tests/property/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Tests package 2 | -------------------------------------------------------------------------------- /src/tests/unit/__init__.py: -------------------------------------------------------------------------------- 1 | # Unit tests package 2 | -------------------------------------------------------------------------------- /src/codex/vendor/README.md: -------------------------------------------------------------------------------- 1 | Bundled Codex CLI binaries are placed in this directory under platform triples such as 2 | `x86_64-apple-darwin/codex/codex`. The initial Python SDK scaffolding does not include binaries. 3 | -------------------------------------------------------------------------------- /.claude/settings.local.json: -------------------------------------------------------------------------------- 1 | { 2 | "permissions": { 3 | "additionalDirectories": [ 4 | "/home/darin/proj/delightful-infra/infra/agents/claude-dspy" 5 | ], 6 | "allow": [ 7 | "Skill(superpowers:brainstorming)" 8 | ] 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /src/codex_dspy/__init__.py: -------------------------------------------------------------------------------- 1 | """CodexAgent - DSPy module for OpenAI Codex SDK. 2 | 3 | This package provides a signature-driven interface to the Codex agent SDK, 4 | enabling stateful agentic workflows through DSPy signatures. 5 | """ 6 | 7 | from codex_dspy.adapter import CodexAdapter 8 | from codex_dspy.agent import CodexAgent 9 | 10 | __all__ = ["CodexAgent", "CodexAdapter"] 11 | __version__ = "0.1.0" 12 | -------------------------------------------------------------------------------- /src/codex/types.py: -------------------------------------------------------------------------------- 1 | """Type definitions for JSON values and other common types.""" 2 | 3 | from __future__ import annotations 4 | 5 | # Python 3.12+ recursive type alias for JSON values 6 | # This properly types any JSON-serializable value 7 | type JsonPrimitive = str | int | float | bool | None 8 | type JsonArray = list[JsonValue] 9 | type JsonObject = dict[str, JsonValue] 10 | type JsonValue = JsonPrimitive | JsonArray | JsonObject 11 | 12 | __all__ = [ 13 | "JsonArray", 14 | "JsonObject", 15 | "JsonPrimitive", 16 | "JsonValue", 17 | ] 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # Virtual environments 29 | .venv/ 30 | venv/ 31 | ENV/ 32 | env/ 33 | 34 | # IDEs 35 | .vscode/ 36 | .idea/ 37 | *.swp 38 | *.swo 39 | *~ 40 | 41 | # Testing 42 | .pytest_cache/ 43 | .coverage 44 | htmlcov/ 45 | .tox/ 46 | 47 | # Type checking 48 | .mypy_cache/ 49 | .pytype/ 50 | .pyre/ 51 | 52 | # OS 53 | .DS_Store 54 | Thumbs.db 55 | tmp/ 56 | /.hypothesis/ 57 | tmp/** 58 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: local 3 | hooks: 4 | - id: check-test-colocation 5 | name: Check test co-location 6 | entry: python scripts/check_test_colocation.py 7 | language: python 8 | pass_filenames: false 9 | always_run: true 10 | description: Ensures unit tests are co-located in src/tests/unit/ 11 | 12 | - repo: https://github.com/astral-sh/ruff-pre-commit 13 | rev: v0.8.0 14 | hooks: 15 | - id: ruff 16 | args: [--fix] 17 | - id: ruff-format 18 | 19 | - repo: https://github.com/pre-commit/pre-commit-hooks 20 | rev: v5.0.0 21 | hooks: 22 | - id: trailing-whitespace 23 | - id: end-of-file-fixer 24 | - id: check-yaml 25 | - id: check-added-large-files 26 | -------------------------------------------------------------------------------- /src/codex/client.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from .config import CodexOptions, ThreadOptions 4 | from .exec import CodexExec 5 | from .thread import Thread 6 | 7 | 8 | class Codex: 9 | def __init__(self, options: CodexOptions | None = None) -> None: 10 | opts = options or CodexOptions() 11 | self._options = opts 12 | self._exec = CodexExec(opts.codex_path_override, opts.env) 13 | 14 | def start_thread(self, options: ThreadOptions | None = None) -> Thread: 15 | thread_options = options or ThreadOptions() 16 | return Thread(self._exec, self._options, thread_options) 17 | 18 | def resume_thread(self, thread_id: str, options: ThreadOptions | None = None) -> Thread: 19 | thread_options = options or ThreadOptions() 20 | return Thread(self._exec, self._options, thread_options, thread_id) 21 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Darin Kishore 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/codex/discovery.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import platform 4 | import shutil 5 | import sys 6 | from pathlib import Path 7 | 8 | from .exceptions import UnsupportedPlatformError 9 | 10 | 11 | def _detect_target() -> str: 12 | system = sys.platform 13 | machine = platform.machine().lower() 14 | 15 | if system in {"linux", "linux2"}: 16 | if machine in {"x86_64", "amd64"}: 17 | return "x86_64-unknown-linux-musl" 18 | if machine in {"aarch64", "arm64"}: 19 | return "aarch64-unknown-linux-musl" 20 | elif system == "darwin": 21 | if machine == "x86_64": 22 | return "x86_64-apple-darwin" 23 | if machine in {"arm64", "aarch64"}: 24 | return "aarch64-apple-darwin" 25 | elif system == "win32": 26 | if machine in {"x86_64", "amd64"}: 27 | return "x86_64-pc-windows-msvc" 28 | if machine in {"arm64", "aarch64"}: 29 | return "aarch64-pc-windows-msvc" 30 | 31 | raise UnsupportedPlatformError(system, machine) 32 | 33 | 34 | def find_codex_binary(override: str | None = None) -> Path: 35 | """Find the codex binary. 36 | 37 | Search order: 38 | 1. Explicit override path 39 | 2. `codex` in PATH 40 | 3. Vendored binary 41 | """ 42 | if override: 43 | return Path(override) 44 | 45 | # Check PATH first 46 | path_binary = shutil.which("codex") 47 | if path_binary: 48 | return Path(path_binary) 49 | 50 | # Fall back to vendored binary 51 | target = _detect_target() 52 | package_root = Path(__file__).resolve().parent 53 | vendor_root = package_root / "vendor" / target / "codex" 54 | binary_name = "codex.exe" if sys.platform == "win32" else "codex" 55 | return vendor_root / binary_name 56 | -------------------------------------------------------------------------------- /examples/main.py: -------------------------------------------------------------------------------- 1 | """ 2 | codex-dspy - DSPy module for OpenAI Codex SDK 3 | 4 | Two-turn adapter pattern for keeping agents in-distribution. 5 | """ 6 | 7 | 8 | def main(): 9 | print("codex-dspy: DSPy module for OpenAI Codex SDK") 10 | print() 11 | print("Features:") 12 | print(" - Multi-field signatures (any number of inputs/outputs)") 13 | print(" - Two-turn pattern (natural task + structured extraction)") 14 | print(" - Stateful threads (context preserved across calls)") 15 | print(" - BAML-style schemas for Pydantic models") 16 | print() 17 | print("Quick start:") 18 | print() 19 | print(" import dspy") 20 | print(" from codex_dspy import CodexAgent") 21 | print() 22 | print(" # Simple signature") 23 | print(" sig = dspy.Signature('message:str -> answer:str')") 24 | print(" agent = CodexAgent(sig, working_directory='.')") 25 | print(" result = agent(message='Hello!')") 26 | print() 27 | print(" # Multi-field signature with Pydantic") 28 | print(" from pydantic import BaseModel, Field") 29 | print() 30 | print(" class BugReport(BaseModel):") 31 | print(" severity: str = Field(description='Bug severity')") 32 | print(" description: str") 33 | print() 34 | print(" sig = dspy.Signature(") 35 | print(" 'code: str, context: str -> bugs: list[BugReport], summary: str'") 36 | print(" )") 37 | print(" agent = CodexAgent(sig, working_directory='.')") 38 | print(" result = agent(code='...', context='...')") 39 | print(" print(result.bugs) # list[BugReport]") 40 | print(" print(result.summary) # str") 41 | print() 42 | print("For more examples, see examples/basic_usage.py") 43 | print("Documentation: https://github.com/darinkishore/codex_dspy#readme") 44 | 45 | 46 | if __name__ == "__main__": 47 | main() 48 | -------------------------------------------------------------------------------- /src/codex/exceptions.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from collections.abc import Sequence 4 | from dataclasses import dataclass 5 | 6 | 7 | class CodexError(Exception): 8 | """Base exception for Codex SDK.""" 9 | 10 | 11 | def _format_command(command: Sequence[str] | None) -> str: 12 | if not command: 13 | return "" 14 | return " ".join(command) 15 | 16 | 17 | class UnsupportedPlatformError(CodexError): 18 | def __init__(self, platform: str, machine: str) -> None: 19 | message = f"Unsupported platform: {platform} ({machine})" 20 | super().__init__(message) 21 | self.platform = platform 22 | self.machine = machine 23 | 24 | 25 | class SpawnError(CodexError): 26 | def __init__(self, command: Sequence[str] | None, error: OSError) -> None: 27 | self.command = list(command) if command else None 28 | self.original_error = error 29 | super().__init__(f"Failed to spawn codex exec: {_format_command(self.command)}: {error}") 30 | 31 | 32 | @dataclass(slots=True) 33 | class ExecExitError(CodexError): 34 | command: tuple[str, ...] 35 | exit_code: int 36 | stderr: str 37 | 38 | def __str__(self) -> str: # pragma: no cover - trivial formatting 39 | stderr = self.stderr.strip() 40 | tail = f": {stderr}" if stderr else "" 41 | return f"codex exec exited with code {self.exit_code}{tail}" 42 | 43 | 44 | @dataclass(slots=True) 45 | class JsonParseError(CodexError): 46 | raw_line: str 47 | command: tuple[str, ...] 48 | 49 | def __str__(self) -> str: # pragma: no cover - trivial formatting 50 | sample = self.raw_line 51 | if len(sample) > 200: 52 | sample = sample[:197] + "..." 53 | return f"Failed to parse codex event: {sample}" 54 | 55 | 56 | class ThreadRunError(CodexError): 57 | def __init__(self, message: str) -> None: 58 | super().__init__(message) 59 | 60 | 61 | class SchemaValidationError(CodexError): 62 | def __init__(self, message: str) -> None: 63 | super().__init__(message) 64 | -------------------------------------------------------------------------------- /flake.nix: -------------------------------------------------------------------------------- 1 | { 2 | description = "Dev shell for codex_dspy (Python 3.13 + uv)."; 3 | 4 | nixConfig = { 5 | extra-substituters = [ "https://numtide.cachix.org" ]; 6 | extra-trusted-public-keys = [ "numtide.cachix.org-1:2ps1kLBUWjxIneOy1Ik6cQjb41X0iXVXeHigGmycPPE=" ]; 7 | }; 8 | 9 | inputs = { 10 | nixpkgs.url = "github:NixOS/nixpkgs/nixpkgs-unstable"; 11 | flake-parts.url = "github:hercules-ci/flake-parts"; 12 | llm-agents.url = "github:numtide/llm-agents.nix"; 13 | }; 14 | 15 | outputs = inputs@{ nixpkgs, flake-parts, llm-agents, ... }: 16 | flake-parts.lib.mkFlake { inherit inputs; } { 17 | systems = [ 18 | "x86_64-linux" 19 | "aarch64-linux" 20 | "x86_64-darwin" 21 | "aarch64-darwin" 22 | ]; 23 | 24 | perSystem = { system, pkgs, ... }: 25 | let 26 | python = pkgs.python313; 27 | cxxLib = pkgs.stdenv.cc.cc.lib; 28 | ldLibPath = pkgs.lib.makeLibraryPath [ 29 | cxxLib # libstdc++.so.6 for tokenizers / rust-backed wheels 30 | pkgs.zlib 31 | pkgs.openssl 32 | ]; 33 | codex = llm-agents.packages.${system}.codex; 34 | in { 35 | devShells.default = pkgs.mkShell { 36 | packages = with pkgs; [ 37 | python 38 | uv 39 | git 40 | pkg-config 41 | openssl 42 | libffi 43 | zlib 44 | cxxLib 45 | codex 46 | ]; 47 | 48 | env = 49 | { 50 | UV_PYTHON = "${python.interpreter}"; 51 | UV_PYTHON_DOWNLOADS = "never"; 52 | } 53 | // pkgs.lib.optionalAttrs pkgs.stdenv.isLinux { 54 | # Only needed on Linux where libstdc++.so.6 must be discoverable for tokenizers. 55 | LD_LIBRARY_PATH = "${ldLibPath}"; 56 | }; 57 | 58 | shellHook = '' 59 | # Create/sync uv environment for this project 60 | if [ -f pyproject.toml ]; then 61 | echo "[devshell] syncing uv env..." 62 | if ! uv sync --frozen; then 63 | uv sync 64 | fi 65 | fi 66 | ''; 67 | }; 68 | }; 69 | }; 70 | } 71 | -------------------------------------------------------------------------------- /scripts/check_test_colocation.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Pre-commit hook to enforce test co-location. 3 | 4 | Ensures that: 5 | 1. All test files (test_*.py) are located in src/tests/unit/ or src/tests/property/ 6 | 2. No test files exist outside of these directories 7 | """ 8 | 9 | import sys 10 | from pathlib import Path 11 | 12 | 13 | def main() -> int: 14 | root = Path(__file__).parent.parent 15 | src_dir = root / "src" 16 | allowed_test_dirs = { 17 | src_dir / "tests" / "unit", 18 | src_dir / "tests" / "property", 19 | } 20 | 21 | errors = [] 22 | 23 | # Find all test files in the project 24 | for test_file in src_dir.rglob("test_*.py"): 25 | # Check if the test file is in one of the allowed directories 26 | if not any(test_file.is_relative_to(allowed) for allowed in allowed_test_dirs): 27 | relative_path = test_file.relative_to(root) 28 | allowed_str = " or ".join( 29 | str(p.relative_to(root)) + "/" for p in sorted(allowed_test_dirs) 30 | ) 31 | errors.append(f" {relative_path} -> should be in {allowed_str}") 32 | 33 | # Also check for tests outside src/ (e.g., in project root) 34 | for test_file in root.glob("test_*.py"): 35 | relative_path = test_file.relative_to(root) 36 | errors.append(f" {relative_path} -> should be in src/tests/unit/ or src/tests/property/") 37 | 38 | # Check tests/ directory at root level 39 | root_tests = root / "tests" 40 | if root_tests.exists() and root_tests.is_dir(): 41 | for test_file in root_tests.rglob("test_*.py"): 42 | relative_path = test_file.relative_to(root) 43 | errors.append(f" {relative_path} -> should be in src/tests/unit/ or src/tests/property/") 44 | 45 | if errors: 46 | print("ERROR: Test files must be co-located in src/tests/unit/ or src/tests/property/") 47 | print() 48 | print("Found test files in wrong locations:") 49 | for error in errors: 50 | print(error) 51 | print() 52 | print("Please move these files to src/tests/unit/ or src/tests/property/") 53 | return 1 54 | 55 | # Ensure allowed directories exist (helpful on fresh clones) 56 | for allowed_dir in allowed_test_dirs: 57 | if not allowed_dir.exists(): 58 | print(f"WARNING: {allowed_dir.relative_to(root)}/ directory does not exist") 59 | print("Creating it now...") 60 | allowed_dir.mkdir(parents=True, exist_ok=True) 61 | 62 | return 0 63 | 64 | 65 | if __name__ == "__main__": 66 | sys.exit(main()) 67 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["uv_build>=0.9.5,<0.10.0"] 3 | build-backend = "uv_build" 4 | 5 | [project] 6 | name = "codex-dspy" 7 | version = "0.1.0" 8 | description = "DSPy module for OpenAI Codex SDK - signature-driven agentic workflows" 9 | readme = "README.md" 10 | requires-python = ">=3.12" 11 | dependencies = [ 12 | "dspy>=3.0.3", 13 | ] 14 | authors = [ 15 | { name = "Darin Kishore" } 16 | ] 17 | keywords = ["dspy", "codex", "agents", "llm", "openai"] 18 | classifiers = [ 19 | "Development Status :: 3 - Alpha", 20 | "Intended Audience :: Developers", 21 | "Programming Language :: Python :: 3.12", 22 | "Programming Language :: Python :: 3.13", 23 | "Topic :: Software Development :: Libraries :: Python Modules", 24 | ] 25 | 26 | [project.optional-dependencies] 27 | dev = [ 28 | "pytest>=8.0.0", 29 | "pytest-asyncio>=0.23.0", 30 | "pre-commit>=3.6.0", 31 | "hypothesis>=6.99.0", 32 | "ruff>=0.8.0", 33 | ] 34 | 35 | [project.urls] 36 | Homepage = "https://github.com/darinkishore/codex_dspy" 37 | Repository = "https://github.com/darinkishore/codex_dspy" 38 | Documentation = "https://github.com/darinkishore/codex_dspy#readme" 39 | 40 | [tool.pytest.ini_options] 41 | testpaths = ["src/tests"] 42 | python_files = ["test_*.py"] 43 | python_classes = ["Test*"] 44 | python_functions = ["test_*"] 45 | asyncio_mode = "auto" 46 | 47 | [tool.uv.build-backend] 48 | # Include both the codex SDK and codex_dspy wrapper 49 | module-root = "src" 50 | module-name = ["codex", "codex_dspy"] 51 | 52 | [tool.ruff] 53 | target-version = "py312" 54 | line-length = 100 55 | src = ["src"] 56 | 57 | [tool.ruff.lint] 58 | select = [ 59 | "E", # pycodestyle errors 60 | "W", # pycodestyle warnings 61 | "F", # Pyflakes 62 | "I", # isort 63 | "UP", # pyupgrade - enforce Python 3.12+ idioms 64 | "B", # flake8-bugbear 65 | "C4", # flake8-comprehensions 66 | "SIM", # flake8-simplify 67 | "RUF", # Ruff-specific rules 68 | ] 69 | ignore = [ 70 | "E501", # line too long (handled by formatter) 71 | "B008", # function call in default argument 72 | "SIM105", # contextlib.suppress - prefer explicit try/except 73 | "SIM108", # ternary operator - prefer explicit if/else for clarity 74 | "SIM118", # Use `key in dict` instead of `key in dict.keys()` - explicit is fine 75 | "RUF022", # __all__ not sorted - prefer logical grouping over alphabetical 76 | ] 77 | 78 | [tool.ruff.lint.isort] 79 | known-first-party = ["codex", "codex_dspy"] 80 | 81 | [tool.ruff.lint.pyupgrade] 82 | # Keep runtime evaluated annotations for dataclasses 83 | keep-runtime-typing = true 84 | -------------------------------------------------------------------------------- /src/codex/schema.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import json 4 | import tempfile 5 | from collections.abc import Mapping 6 | from functools import lru_cache 7 | from pathlib import Path 8 | from types import TracebackType 9 | from typing import Any, cast 10 | 11 | from .config import SchemaInput 12 | from .exceptions import SchemaValidationError 13 | 14 | 15 | @lru_cache(maxsize=1) 16 | def _get_pydantic_base_model() -> type[Any] | None: # pragma: no cover - import guard 17 | try: 18 | from pydantic import BaseModel 19 | except ImportError: 20 | return None 21 | return cast("type[Any]", BaseModel) 22 | 23 | 24 | def _is_pydantic_model(value: object) -> bool: 25 | base_model = _get_pydantic_base_model() 26 | return isinstance(value, type) and base_model is not None and issubclass(value, base_model) 27 | 28 | 29 | def _is_pydantic_instance(value: object) -> bool: 30 | base_model = _get_pydantic_base_model() 31 | return base_model is not None and isinstance(value, base_model) 32 | 33 | 34 | def _convert_schema_input(schema: SchemaInput | None) -> Mapping[str, object] | None: 35 | if schema is None or isinstance(schema, Mapping): 36 | return schema 37 | 38 | if _is_pydantic_model(schema): 39 | return cast("Mapping[str, object]", schema.model_json_schema()) 40 | 41 | if _is_pydantic_instance(schema): 42 | return cast("Mapping[str, object]", schema.model_json_schema()) 43 | 44 | raise SchemaValidationError( 45 | "output_schema must be a mapping or a Pydantic BaseModel (class or instance)", 46 | ) 47 | 48 | 49 | class SchemaTempFile: 50 | def __init__(self, schema: SchemaInput | None) -> None: 51 | self._raw_schema = schema 52 | self._temp_dir: tempfile.TemporaryDirectory[str] | None = None 53 | self.path: Path | None = None 54 | 55 | def __enter__(self) -> SchemaTempFile: 56 | schema = _convert_schema_input(self._raw_schema) 57 | if schema is None: 58 | return self 59 | 60 | for key in schema.keys(): 61 | if not isinstance(key, str): 62 | raise SchemaValidationError("output_schema keys must be strings") 63 | 64 | self._temp_dir = tempfile.TemporaryDirectory(prefix="codex-output-schema-") 65 | schema_dir = Path(self._temp_dir.name) 66 | schema_path = schema_dir / "schema.json" 67 | 68 | with schema_path.open("w", encoding="utf-8") as handle: 69 | json.dump(schema, handle, ensure_ascii=False) 70 | self.path = schema_path 71 | return self 72 | 73 | def __exit__( 74 | self, 75 | exc_type: type[BaseException] | None, 76 | exc: BaseException | None, 77 | tb: TracebackType | None, 78 | ) -> None: 79 | self.cleanup() 80 | 81 | def cleanup(self) -> None: 82 | if self._temp_dir is not None: 83 | self._temp_dir.cleanup() 84 | self._temp_dir = None 85 | self.path = None 86 | 87 | 88 | def prepare_schema_file(schema: SchemaInput | None) -> SchemaTempFile: 89 | return SchemaTempFile(schema) 90 | -------------------------------------------------------------------------------- /src/codex/config.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from collections.abc import Mapping, Sequence 4 | from dataclasses import dataclass 5 | from enum import StrEnum 6 | from typing import TYPE_CHECKING, Any 7 | 8 | if TYPE_CHECKING: # pragma: no cover - typing only 9 | from pydantic import BaseModel as PydanticBaseModel 10 | 11 | SchemaInput = Mapping[str, Any] | type[PydanticBaseModel] | PydanticBaseModel 12 | else: 13 | SchemaInput = Mapping[str, Any] 14 | 15 | 16 | class Model(StrEnum): 17 | """Supported Codex models. 18 | 19 | Default model is GPT_5_1_CODEX_MAX. 20 | """ 21 | 22 | # Production models (recommended) 23 | GPT_5_1_CODEX_MAX = "gpt-5.1-codex-max" 24 | GPT_5_1_CODEX = "gpt-5.1-codex" 25 | GPT_5_1_CODEX_MINI = "gpt-5.1-codex-mini" 26 | GPT_5_1 = "gpt-5.1" 27 | 28 | # Deprecated models (still functional) 29 | GPT_5_CODEX = "gpt-5-codex" 30 | GPT_5_CODEX_MINI = "gpt-5-codex-mini" 31 | GPT_5 = "gpt-5" 32 | 33 | # Other supported models 34 | O3 = "o3" 35 | O4_MINI = "o4-mini" 36 | CODEX_MINI_LATEST = "codex-mini-latest" 37 | GPT_4_1 = "gpt-4.1" 38 | GPT_4O = "gpt-4o" 39 | 40 | 41 | # Default model constant 42 | DEFAULT_MODEL = Model.GPT_5_1_CODEX_MAX 43 | 44 | 45 | class ApprovalMode(StrEnum): 46 | """Command approval policy.""" 47 | 48 | NEVER = "never" 49 | ON_REQUEST = "on-request" 50 | ON_FAILURE = "on-failure" 51 | UNTRUSTED = "untrusted" 52 | 53 | 54 | class SandboxMode(StrEnum): 55 | """Sandbox policy for file system access.""" 56 | 57 | READ_ONLY = "read-only" 58 | WORKSPACE_WRITE = "workspace-write" 59 | DANGER_FULL_ACCESS = "danger-full-access" 60 | 61 | 62 | class ModelReasoningEffort(StrEnum): 63 | """Reasoning effort level for model inference.""" 64 | 65 | NONE = "none" 66 | MINIMAL = "minimal" 67 | LOW = "low" 68 | MEDIUM = "medium" 69 | HIGH = "high" 70 | X_HIGH = "x-high" 71 | 72 | 73 | class ModelVerbosity(StrEnum): 74 | """Output verbosity for GPT-5 models.""" 75 | 76 | LOW = "low" 77 | MEDIUM = "medium" 78 | HIGH = "high" 79 | 80 | 81 | @dataclass(frozen=True, slots=True) 82 | class CodexOptions: 83 | """Options for the Codex client.""" 84 | 85 | codex_path_override: str | None = None 86 | base_url: str | None = None 87 | api_key: str | None = None 88 | env: Mapping[str, str] | None = None 89 | 90 | 91 | @dataclass(frozen=True, slots=True) 92 | class ThreadOptions: 93 | """Options for a conversation thread.""" 94 | 95 | model: str | None = None 96 | sandbox_mode: SandboxMode | None = None 97 | working_directory: str | None = None 98 | skip_git_repo_check: bool = False 99 | model_reasoning_effort: ModelReasoningEffort | None = None 100 | network_access_enabled: bool | None = None 101 | web_search_enabled: bool | None = None 102 | approval_policy: ApprovalMode | None = None 103 | additional_directories: Sequence[str] | None = None 104 | 105 | 106 | @dataclass(frozen=True, slots=True) 107 | class TurnOptions: 108 | """Options for a single turn in a thread.""" 109 | 110 | output_schema: SchemaInput | None = None 111 | -------------------------------------------------------------------------------- /src/codex/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from .client import Codex 4 | from .config import ( 5 | DEFAULT_MODEL, 6 | ApprovalMode, 7 | CodexOptions, 8 | Model, 9 | ModelReasoningEffort, 10 | ModelVerbosity, 11 | SandboxMode, 12 | ThreadOptions, 13 | TurnOptions, 14 | ) 15 | from .events import ( 16 | ItemCompletedEvent, 17 | ItemStartedEvent, 18 | ItemUpdatedEvent, 19 | ThreadError, 20 | ThreadErrorEvent, 21 | ThreadEvent, 22 | ThreadStartedEvent, 23 | TurnCompletedEvent, 24 | TurnFailedEvent, 25 | TurnStartedEvent, 26 | Usage, 27 | parse_thread_event, 28 | ) 29 | from .exceptions import ( 30 | CodexError, 31 | ExecExitError, 32 | JsonParseError, 33 | SchemaValidationError, 34 | SpawnError, 35 | ThreadRunError, 36 | UnsupportedPlatformError, 37 | ) 38 | from .items import ( 39 | AgentMessageItem, 40 | CommandExecutionItem, 41 | CommandExecutionStatus, 42 | ErrorItem, 43 | FileChangeItem, 44 | FileUpdateChange, 45 | McpContentBlock, 46 | McpToolCallError, 47 | McpToolCallItem, 48 | McpToolCallResult, 49 | McpToolCallStatus, 50 | PatchApplyStatus, 51 | PatchChangeKind, 52 | ReasoningItem, 53 | ThreadItem, 54 | TodoItem, 55 | TodoListItem, 56 | WebSearchItem, 57 | parse_thread_item, 58 | ) 59 | from .thread import ( 60 | Input, 61 | LocalImageInput, 62 | TextInput, 63 | Thread, 64 | ThreadRunResult, 65 | ThreadStream, 66 | UserInput, 67 | ) 68 | from .types import ( 69 | JsonArray, 70 | JsonObject, 71 | JsonPrimitive, 72 | JsonValue, 73 | ) 74 | 75 | __all__ = [ 76 | # Client 77 | "Codex", 78 | # Config 79 | "ApprovalMode", 80 | "CodexOptions", 81 | "DEFAULT_MODEL", 82 | "Model", 83 | "ModelReasoningEffort", 84 | "ModelVerbosity", 85 | "SandboxMode", 86 | "ThreadOptions", 87 | "TurnOptions", 88 | # Thread 89 | "Input", 90 | "LocalImageInput", 91 | "TextInput", 92 | "Thread", 93 | "ThreadRunResult", 94 | "ThreadStream", 95 | "UserInput", 96 | # Events 97 | "ItemCompletedEvent", 98 | "ItemStartedEvent", 99 | "ItemUpdatedEvent", 100 | "ThreadError", 101 | "ThreadErrorEvent", 102 | "ThreadEvent", 103 | "ThreadStartedEvent", 104 | "TurnCompletedEvent", 105 | "TurnFailedEvent", 106 | "TurnStartedEvent", 107 | "Usage", 108 | "parse_thread_event", 109 | # Items 110 | "AgentMessageItem", 111 | "CommandExecutionItem", 112 | "CommandExecutionStatus", 113 | "ErrorItem", 114 | "FileChangeItem", 115 | "FileUpdateChange", 116 | "McpContentBlock", 117 | "McpToolCallError", 118 | "McpToolCallItem", 119 | "McpToolCallResult", 120 | "McpToolCallStatus", 121 | "PatchApplyStatus", 122 | "PatchChangeKind", 123 | "ReasoningItem", 124 | "ThreadItem", 125 | "TodoItem", 126 | "TodoListItem", 127 | "WebSearchItem", 128 | "parse_thread_item", 129 | # Exceptions 130 | "CodexError", 131 | "ExecExitError", 132 | "JsonParseError", 133 | "SchemaValidationError", 134 | "SpawnError", 135 | "ThreadRunError", 136 | "UnsupportedPlatformError", 137 | # Types 138 | "JsonArray", 139 | "JsonObject", 140 | "JsonPrimitive", 141 | "JsonValue", 142 | ] 143 | -------------------------------------------------------------------------------- /src/tests/unit/test_discovery.py: -------------------------------------------------------------------------------- 1 | """Unit tests for codex binary discovery.""" 2 | 3 | from pathlib import Path 4 | from unittest.mock import patch 5 | 6 | import pytest 7 | 8 | from codex.discovery import find_codex_binary 9 | from codex.exceptions import UnsupportedPlatformError 10 | 11 | 12 | class TestFindCodexBinary: 13 | """Tests for find_codex_binary function.""" 14 | 15 | def test_override_returns_override_path(self): 16 | """Explicit override should be returned directly.""" 17 | result = find_codex_binary("/custom/path/to/codex") 18 | 19 | assert result == Path("/custom/path/to/codex") 20 | 21 | def test_finds_codex_in_path(self): 22 | """Should find codex in PATH before checking vendor.""" 23 | with patch("codex.discovery.shutil.which") as mock_which: 24 | mock_which.return_value = "/usr/bin/codex" 25 | result = find_codex_binary() 26 | 27 | assert result == Path("/usr/bin/codex") 28 | mock_which.assert_called_once_with("codex") 29 | 30 | def test_falls_back_to_vendor_when_not_in_path(self): 31 | """Should fall back to vendor path when codex not in PATH.""" 32 | with patch("codex.discovery.shutil.which") as mock_which: 33 | mock_which.return_value = None 34 | result = find_codex_binary() 35 | 36 | # Should be a path to vendor directory 37 | assert "vendor" in str(result) 38 | assert result.name == "codex" 39 | 40 | def test_path_check_before_vendor(self): 41 | """PATH should be checked before vendor fallback.""" 42 | call_order = [] 43 | 44 | def track_which(name): 45 | call_order.append("which") 46 | return "/found/codex" 47 | 48 | with patch("codex.discovery.shutil.which", side_effect=track_which): 49 | find_codex_binary() 50 | 51 | assert call_order == ["which"] 52 | 53 | def test_override_skips_path_check(self): 54 | """Override should skip PATH check entirely.""" 55 | with patch("codex.discovery.shutil.which") as mock_which: 56 | find_codex_binary("/my/codex") 57 | 58 | mock_which.assert_not_called() 59 | 60 | 61 | class TestDetectTarget: 62 | """Tests for platform detection.""" 63 | 64 | def test_linux_x86_64(self): 65 | """Linux x86_64 should return correct target.""" 66 | with ( 67 | patch("codex.discovery.sys.platform", "linux"), 68 | patch("codex.discovery.platform.machine", return_value="x86_64"), 69 | ): 70 | from codex.discovery import _detect_target 71 | 72 | assert _detect_target() == "x86_64-unknown-linux-musl" 73 | 74 | def test_linux_aarch64(self): 75 | """Linux aarch64 should return correct target.""" 76 | with ( 77 | patch("codex.discovery.sys.platform", "linux"), 78 | patch("codex.discovery.platform.machine", return_value="aarch64"), 79 | ): 80 | from codex.discovery import _detect_target 81 | 82 | assert _detect_target() == "aarch64-unknown-linux-musl" 83 | 84 | def test_darwin_x86_64(self): 85 | """macOS x86_64 should return correct target.""" 86 | with ( 87 | patch("codex.discovery.sys.platform", "darwin"), 88 | patch("codex.discovery.platform.machine", return_value="x86_64"), 89 | ): 90 | from codex.discovery import _detect_target 91 | 92 | assert _detect_target() == "x86_64-apple-darwin" 93 | 94 | def test_darwin_arm64(self): 95 | """macOS arm64 should return correct target.""" 96 | with ( 97 | patch("codex.discovery.sys.platform", "darwin"), 98 | patch("codex.discovery.platform.machine", return_value="arm64"), 99 | ): 100 | from codex.discovery import _detect_target 101 | 102 | assert _detect_target() == "aarch64-apple-darwin" 103 | 104 | def test_unsupported_platform_raises(self): 105 | """Unsupported platform should raise UnsupportedPlatformError.""" 106 | with ( 107 | patch("codex.discovery.sys.platform", "freebsd"), 108 | patch("codex.discovery.platform.machine", return_value="x86_64"), 109 | ): 110 | from codex.discovery import _detect_target 111 | 112 | with pytest.raises(UnsupportedPlatformError): 113 | _detect_target() 114 | -------------------------------------------------------------------------------- /flake.lock: -------------------------------------------------------------------------------- 1 | { 2 | "nodes": { 3 | "blueprint": { 4 | "inputs": { 5 | "nixpkgs": [ 6 | "llm-agents", 7 | "nixpkgs" 8 | ], 9 | "systems": "systems" 10 | }, 11 | "locked": { 12 | "lastModified": 1763308703, 13 | "narHash": "sha256-O9Y+Wer8wOh+N+4kcCK5p/VLrXyX+ktk0/s3HdZvJzk=", 14 | "owner": "numtide", 15 | "repo": "blueprint", 16 | "rev": "5a9bba070f801d63e2af3c9ef00b86b212429f4f", 17 | "type": "github" 18 | }, 19 | "original": { 20 | "owner": "numtide", 21 | "repo": "blueprint", 22 | "type": "github" 23 | } 24 | }, 25 | "flake-parts": { 26 | "inputs": { 27 | "nixpkgs-lib": "nixpkgs-lib" 28 | }, 29 | "locked": { 30 | "lastModified": 1763759067, 31 | "narHash": "sha256-LlLt2Jo/gMNYAwOgdRQBrsRoOz7BPRkzvNaI/fzXi2Q=", 32 | "owner": "hercules-ci", 33 | "repo": "flake-parts", 34 | "rev": "2cccadc7357c0ba201788ae99c4dfa90728ef5e0", 35 | "type": "github" 36 | }, 37 | "original": { 38 | "owner": "hercules-ci", 39 | "repo": "flake-parts", 40 | "type": "github" 41 | } 42 | }, 43 | "llm-agents": { 44 | "inputs": { 45 | "blueprint": "blueprint", 46 | "nixpkgs": "nixpkgs", 47 | "treefmt-nix": "treefmt-nix" 48 | }, 49 | "locked": { 50 | "lastModified": 1765161881, 51 | "narHash": "sha256-Jt1l7ydplENTI7ODXhgN0qNB4+elnCqiP32qwTWx6oc=", 52 | "owner": "numtide", 53 | "repo": "llm-agents.nix", 54 | "rev": "f1e2b17a33d1a5f111104d5c8d5c361996a0a44c", 55 | "type": "github" 56 | }, 57 | "original": { 58 | "owner": "numtide", 59 | "repo": "llm-agents.nix", 60 | "type": "github" 61 | } 62 | }, 63 | "nixpkgs": { 64 | "locked": { 65 | "lastModified": 1764947035, 66 | "narHash": "sha256-EYHSjVM4Ox4lvCXUMiKKs2vETUSL5mx+J2FfutM7T9w=", 67 | "owner": "NixOS", 68 | "repo": "nixpkgs", 69 | "rev": "a672be65651c80d3f592a89b3945466584a22069", 70 | "type": "github" 71 | }, 72 | "original": { 73 | "owner": "NixOS", 74 | "ref": "nixpkgs-unstable", 75 | "repo": "nixpkgs", 76 | "type": "github" 77 | } 78 | }, 79 | "nixpkgs-lib": { 80 | "locked": { 81 | "lastModified": 1761765539, 82 | "narHash": "sha256-b0yj6kfvO8ApcSE+QmA6mUfu8IYG6/uU28OFn4PaC8M=", 83 | "owner": "nix-community", 84 | "repo": "nixpkgs.lib", 85 | "rev": "719359f4562934ae99f5443f20aa06c2ffff91fc", 86 | "type": "github" 87 | }, 88 | "original": { 89 | "owner": "nix-community", 90 | "repo": "nixpkgs.lib", 91 | "type": "github" 92 | } 93 | }, 94 | "nixpkgs_2": { 95 | "locked": { 96 | "lastModified": 1764947035, 97 | "narHash": "sha256-EYHSjVM4Ox4lvCXUMiKKs2vETUSL5mx+J2FfutM7T9w=", 98 | "owner": "NixOS", 99 | "repo": "nixpkgs", 100 | "rev": "a672be65651c80d3f592a89b3945466584a22069", 101 | "type": "github" 102 | }, 103 | "original": { 104 | "owner": "NixOS", 105 | "ref": "nixpkgs-unstable", 106 | "repo": "nixpkgs", 107 | "type": "github" 108 | } 109 | }, 110 | "root": { 111 | "inputs": { 112 | "flake-parts": "flake-parts", 113 | "llm-agents": "llm-agents", 114 | "nixpkgs": "nixpkgs_2" 115 | } 116 | }, 117 | "systems": { 118 | "locked": { 119 | "lastModified": 1681028828, 120 | "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", 121 | "owner": "nix-systems", 122 | "repo": "default", 123 | "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", 124 | "type": "github" 125 | }, 126 | "original": { 127 | "owner": "nix-systems", 128 | "repo": "default", 129 | "type": "github" 130 | } 131 | }, 132 | "treefmt-nix": { 133 | "inputs": { 134 | "nixpkgs": [ 135 | "llm-agents", 136 | "nixpkgs" 137 | ] 138 | }, 139 | "locked": { 140 | "lastModified": 1762938485, 141 | "narHash": "sha256-AlEObg0syDl+Spi4LsZIBrjw+snSVU4T8MOeuZJUJjM=", 142 | "owner": "numtide", 143 | "repo": "treefmt-nix", 144 | "rev": "5b4ee75aeefd1e2d5a1cc43cf6ba65eba75e83e4", 145 | "type": "github" 146 | }, 147 | "original": { 148 | "owner": "numtide", 149 | "repo": "treefmt-nix", 150 | "type": "github" 151 | } 152 | } 153 | }, 154 | "root": "root", 155 | "version": 7 156 | } 157 | -------------------------------------------------------------------------------- /src/codex/events.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from dataclasses import dataclass, field 4 | from typing import Literal 5 | 6 | from .exceptions import CodexError 7 | from .items import ThreadItem, parse_thread_item 8 | from .types import JsonObject, JsonValue 9 | 10 | 11 | @dataclass(frozen=True, slots=True) 12 | class Usage: 13 | """Token usage statistics for a turn.""" 14 | 15 | input_tokens: int 16 | cached_input_tokens: int 17 | output_tokens: int 18 | 19 | 20 | @dataclass(frozen=True, slots=True) 21 | class ThreadError: 22 | """Fatal error emitted by the stream.""" 23 | 24 | message: str 25 | 26 | 27 | @dataclass(frozen=True, slots=True) 28 | class ThreadStartedEvent: 29 | """Emitted when a new thread is started.""" 30 | 31 | type: Literal["thread.started"] = field(default="thread.started", init=False) 32 | thread_id: str 33 | 34 | 35 | @dataclass(frozen=True, slots=True) 36 | class TurnStartedEvent: 37 | """Emitted when a turn is started by sending a new prompt.""" 38 | 39 | type: Literal["turn.started"] = field(default="turn.started", init=False) 40 | 41 | 42 | @dataclass(frozen=True, slots=True) 43 | class TurnCompletedEvent: 44 | """Emitted when a turn is completed.""" 45 | 46 | type: Literal["turn.completed"] = field(default="turn.completed", init=False) 47 | usage: Usage 48 | 49 | 50 | @dataclass(frozen=True, slots=True) 51 | class TurnFailedEvent: 52 | """Indicates that a turn failed with an error.""" 53 | 54 | type: Literal["turn.failed"] = field(default="turn.failed", init=False) 55 | error: ThreadError 56 | 57 | 58 | @dataclass(frozen=True, slots=True) 59 | class ItemStartedEvent: 60 | """Emitted when a new item is added to the thread.""" 61 | 62 | type: Literal["item.started"] = field(default="item.started", init=False) 63 | item: ThreadItem 64 | 65 | 66 | @dataclass(frozen=True, slots=True) 67 | class ItemUpdatedEvent: 68 | """Emitted when an item is updated.""" 69 | 70 | type: Literal["item.updated"] = field(default="item.updated", init=False) 71 | item: ThreadItem 72 | 73 | 74 | @dataclass(frozen=True, slots=True) 75 | class ItemCompletedEvent: 76 | """Signals that an item has reached a terminal state.""" 77 | 78 | type: Literal["item.completed"] = field(default="item.completed", init=False) 79 | item: ThreadItem 80 | 81 | 82 | @dataclass(frozen=True, slots=True) 83 | class ThreadErrorEvent: 84 | """Unrecoverable error emitted by the event stream.""" 85 | 86 | type: Literal["error"] = field(default="error", init=False) 87 | message: str 88 | 89 | 90 | ThreadEvent = ( 91 | ThreadStartedEvent 92 | | TurnStartedEvent 93 | | TurnCompletedEvent 94 | | TurnFailedEvent 95 | | ItemStartedEvent 96 | | ItemUpdatedEvent 97 | | ItemCompletedEvent 98 | | ThreadErrorEvent 99 | ) 100 | 101 | 102 | def _ensure_dict(payload: JsonValue) -> JsonObject: 103 | if isinstance(payload, dict): 104 | return payload 105 | raise CodexError("Event payload must be an object") 106 | 107 | 108 | def _ensure_str(value: JsonValue, field_name: str) -> str: 109 | if isinstance(value, str): 110 | return value 111 | raise CodexError(f"Expected string for {field_name}") 112 | 113 | 114 | def _ensure_int(value: JsonValue, field_name: str) -> int: 115 | if isinstance(value, int) and not isinstance(value, bool): 116 | return value 117 | raise CodexError(f"Expected integer for {field_name}") 118 | 119 | 120 | def _parse_usage(payload: JsonValue) -> Usage: 121 | data = _ensure_dict(payload) 122 | return Usage( 123 | input_tokens=_ensure_int(data.get("input_tokens"), "input_tokens"), 124 | cached_input_tokens=_ensure_int(data.get("cached_input_tokens"), "cached_input_tokens"), 125 | output_tokens=_ensure_int(data.get("output_tokens"), "output_tokens"), 126 | ) 127 | 128 | 129 | def parse_thread_event(payload: JsonObject) -> ThreadEvent: 130 | """Parse a JSON object into a ThreadEvent.""" 131 | type_name = _ensure_str(payload.get("type"), "type") 132 | 133 | if type_name == "thread.started": 134 | thread_id = _ensure_str(payload.get("thread_id"), "thread_id") 135 | return ThreadStartedEvent(thread_id=thread_id) 136 | 137 | if type_name == "turn.started": 138 | return TurnStartedEvent() 139 | 140 | if type_name == "turn.completed": 141 | usage = _parse_usage(payload.get("usage")) 142 | return TurnCompletedEvent(usage=usage) 143 | 144 | if type_name == "turn.failed": 145 | error_payload = _ensure_dict(payload.get("error")) 146 | message = _ensure_str(error_payload.get("message"), "error.message") 147 | return TurnFailedEvent(error=ThreadError(message=message)) 148 | 149 | if type_name in {"item.started", "item.updated", "item.completed"}: 150 | item_data = payload.get("item") 151 | if not isinstance(item_data, dict): 152 | raise CodexError("item must be an object") 153 | item = parse_thread_item(item_data) 154 | if type_name == "item.started": 155 | return ItemStartedEvent(item=item) 156 | if type_name == "item.updated": 157 | return ItemUpdatedEvent(item=item) 158 | return ItemCompletedEvent(item=item) 159 | 160 | if type_name == "error": 161 | message = _ensure_str(payload.get("message"), "message") 162 | return ThreadErrorEvent(message=message) 163 | 164 | raise CodexError(f"Unsupported event type: {type_name}") 165 | -------------------------------------------------------------------------------- /src/codex/thread.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import json 4 | from collections.abc import Iterator, Sequence 5 | from dataclasses import dataclass 6 | from typing import Literal, TypedDict 7 | 8 | from .config import CodexOptions, ThreadOptions, TurnOptions 9 | from .events import ( 10 | ItemCompletedEvent, 11 | ThreadErrorEvent, 12 | ThreadEvent, 13 | ThreadStartedEvent, 14 | TurnCompletedEvent, 15 | TurnFailedEvent, 16 | Usage, 17 | parse_thread_event, 18 | ) 19 | from .exceptions import JsonParseError, ThreadRunError 20 | from .exec import CodexExec, ExecArgs 21 | from .items import AgentMessageItem, ThreadItem 22 | from .schema import prepare_schema_file 23 | 24 | 25 | class TextInput(TypedDict): 26 | """A text input to send to the agent.""" 27 | 28 | type: Literal["text"] 29 | text: str 30 | 31 | 32 | class LocalImageInput(TypedDict): 33 | """A local image input to send to the agent.""" 34 | 35 | type: Literal["local_image"] 36 | path: str 37 | 38 | 39 | UserInput = TextInput | LocalImageInput 40 | Input = str | Sequence[UserInput] 41 | 42 | 43 | def _normalize_input(input_value: Input) -> tuple[str, list[str]]: 44 | """Normalize input to prompt string and images list.""" 45 | if isinstance(input_value, str): 46 | return input_value, [] 47 | 48 | prompt_parts: list[str] = [] 49 | images: list[str] = [] 50 | for item in input_value: 51 | if item.get("type") == "text": 52 | prompt_parts.append(item.get("text", "")) 53 | elif item.get("type") == "local_image": 54 | images.append(item.get("path", "")) 55 | 56 | return "\n\n".join(prompt_parts), images 57 | 58 | 59 | @dataclass(frozen=True, slots=True) 60 | class ThreadRunResult: 61 | """Result of a completed turn.""" 62 | 63 | items: tuple[ThreadItem, ...] 64 | final_response: str 65 | usage: Usage | None 66 | 67 | 68 | @dataclass(frozen=True, slots=True) 69 | class ThreadStream: 70 | """Streaming events from a turn.""" 71 | 72 | events: Iterator[ThreadEvent] 73 | 74 | def __iter__(self) -> Iterator[ThreadEvent]: 75 | return self.events 76 | 77 | 78 | class Thread: 79 | """A conversation thread with the Codex agent.""" 80 | 81 | def __init__( 82 | self, 83 | exec_client: CodexExec, 84 | codex_options: CodexOptions, 85 | thread_options: ThreadOptions, 86 | thread_id: str | None = None, 87 | ) -> None: 88 | self._exec = exec_client 89 | self._codex_options = codex_options 90 | self._thread_options = thread_options 91 | self._id = thread_id 92 | 93 | @property 94 | def id(self) -> str | None: 95 | """Thread ID, populated after the first turn starts.""" 96 | return self._id 97 | 98 | def run_streamed( 99 | self, input_value: Input, turn_options: TurnOptions | None = None 100 | ) -> ThreadStream: 101 | """Run a turn and stream events as they are produced.""" 102 | events = self._stream_events(input_value, turn_options) 103 | return ThreadStream(events=events) 104 | 105 | def run(self, input_value: Input, turn_options: TurnOptions | None = None) -> ThreadRunResult: 106 | """Run a turn and return the completed result.""" 107 | final_response = "" 108 | items: list[ThreadItem] = [] 109 | usage: Usage | None = None 110 | failure_message: str | None = None 111 | 112 | for event in self._stream_events(input_value, turn_options): 113 | if isinstance(event, ThreadErrorEvent): 114 | raise ThreadRunError(event.message) 115 | if isinstance(event, TurnFailedEvent): 116 | failure_message = event.error.message 117 | break 118 | if isinstance(event, TurnCompletedEvent): 119 | usage = event.usage 120 | if isinstance(event, ItemCompletedEvent): 121 | item = event.item 122 | items.append(item) 123 | if isinstance(item, AgentMessageItem): 124 | final_response = item.text 125 | 126 | if failure_message is not None: 127 | raise ThreadRunError(failure_message) 128 | 129 | return ThreadRunResult(items=tuple(items), final_response=final_response, usage=usage) 130 | 131 | def _stream_events( 132 | self, 133 | input_value: Input, 134 | turn_options: TurnOptions | None, 135 | ) -> Iterator[ThreadEvent]: 136 | turn = turn_options or TurnOptions() 137 | prompt, images = _normalize_input(input_value) 138 | with prepare_schema_file(turn.output_schema) as schema_file: 139 | exec_args = ExecArgs( 140 | input=prompt, 141 | base_url=self._codex_options.base_url, 142 | api_key=self._codex_options.api_key, 143 | thread_id=self._id, 144 | images=images if images else None, 145 | model=self._thread_options.model, 146 | sandbox_mode=self._thread_options.sandbox_mode, 147 | working_directory=self._thread_options.working_directory, 148 | additional_directories=self._thread_options.additional_directories, 149 | skip_git_repo_check=self._thread_options.skip_git_repo_check, 150 | output_schema_path=str(schema_file.path) if schema_file.path else None, 151 | model_reasoning_effort=self._thread_options.model_reasoning_effort, 152 | network_access_enabled=self._thread_options.network_access_enabled, 153 | web_search_enabled=self._thread_options.web_search_enabled, 154 | approval_policy=self._thread_options.approval_policy, 155 | ) 156 | command = tuple(self._exec.build_command(exec_args)) 157 | for line in self._exec.run_lines(exec_args): 158 | try: 159 | payload = json.loads(line) 160 | except json.JSONDecodeError as error: 161 | raise JsonParseError(line, command) from error 162 | 163 | event = parse_thread_event(payload) 164 | if isinstance(event, ThreadStartedEvent): 165 | self._id = event.thread_id 166 | yield event 167 | -------------------------------------------------------------------------------- /src/codex/exec.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import io 4 | import os 5 | import subprocess 6 | from collections.abc import Iterator, Mapping, Sequence 7 | from dataclasses import dataclass 8 | from threading import Thread 9 | 10 | from .config import ApprovalMode, ModelReasoningEffort, SandboxMode 11 | from .discovery import find_codex_binary 12 | from .exceptions import ExecExitError, SpawnError 13 | 14 | INTERNAL_ORIGINATOR_ENV = "CODEX_INTERNAL_ORIGINATOR_OVERRIDE" 15 | PYTHON_SDK_ORIGINATOR = "codex_sdk_py" 16 | 17 | 18 | @dataclass(frozen=True, slots=True) 19 | class ExecArgs: 20 | """Arguments for executing Codex CLI.""" 21 | 22 | input: str 23 | base_url: str | None = None 24 | api_key: str | None = None 25 | thread_id: str | None = None 26 | images: Sequence[str] | None = None 27 | model: str | None = None 28 | sandbox_mode: SandboxMode | None = None 29 | working_directory: str | None = None 30 | additional_directories: Sequence[str] | None = None 31 | skip_git_repo_check: bool = False 32 | output_schema_path: str | None = None 33 | model_reasoning_effort: ModelReasoningEffort | None = None 34 | network_access_enabled: bool | None = None 35 | web_search_enabled: bool | None = None 36 | approval_policy: ApprovalMode | None = None 37 | 38 | 39 | class CodexExec: 40 | """Executes the Codex CLI binary.""" 41 | 42 | def __init__( 43 | self, 44 | executable_override: str | None = None, 45 | env: Mapping[str, str] | None = None, 46 | ) -> None: 47 | self._binary = find_codex_binary(executable_override) 48 | self._env_override = env 49 | 50 | def build_command(self, args: ExecArgs) -> list[str]: 51 | """Build the CLI command from arguments.""" 52 | command = [str(self._binary), "exec", "--experimental-json"] 53 | 54 | if args.model: 55 | command.extend(["--model", args.model]) 56 | if args.sandbox_mode: 57 | command.extend(["--sandbox", args.sandbox_mode.value]) 58 | if args.working_directory: 59 | command.extend(["--cd", args.working_directory]) 60 | if args.additional_directories: 61 | for dir_path in args.additional_directories: 62 | command.extend(["--add-dir", dir_path]) 63 | if args.skip_git_repo_check: 64 | command.append("--skip-git-repo-check") 65 | if args.output_schema_path: 66 | command.extend(["--output-schema", args.output_schema_path]) 67 | if args.model_reasoning_effort: 68 | command.extend( 69 | ["--config", f'model_reasoning_effort="{args.model_reasoning_effort.value}"'] 70 | ) 71 | if args.network_access_enabled is not None: 72 | value = "true" if args.network_access_enabled else "false" 73 | command.extend(["--config", f"sandbox_workspace_write.network_access={value}"]) 74 | if args.web_search_enabled is not None: 75 | value = "true" if args.web_search_enabled else "false" 76 | command.extend(["--config", f"features.web_search_request={value}"]) 77 | if args.approval_policy: 78 | command.extend(["--config", f'approval_policy="{args.approval_policy.value}"']) 79 | if args.images: 80 | for image_path in args.images: 81 | command.extend(["--image", image_path]) 82 | if args.thread_id: 83 | command.extend(["resume", args.thread_id]) 84 | 85 | return command 86 | 87 | def run_lines(self, args: ExecArgs) -> Iterator[str]: 88 | """Execute the command and yield stdout lines.""" 89 | command = self.build_command(args) 90 | 91 | if self._env_override is not None: 92 | env = dict(self._env_override) 93 | else: 94 | env = os.environ.copy() 95 | env.setdefault(INTERNAL_ORIGINATOR_ENV, PYTHON_SDK_ORIGINATOR) 96 | if args.base_url: 97 | env["OPENAI_BASE_URL"] = args.base_url 98 | if args.api_key: 99 | env["CODEX_API_KEY"] = args.api_key 100 | 101 | stderr_buffer: list[str] = [] 102 | 103 | try: 104 | process = subprocess.Popen( 105 | command, 106 | stdin=subprocess.PIPE, 107 | stdout=subprocess.PIPE, 108 | stderr=subprocess.PIPE, 109 | text=True, 110 | encoding="utf-8", 111 | errors="strict", 112 | env=env, 113 | ) 114 | except OSError as error: # pragma: no cover - exercised indirectly 115 | raise SpawnError(command, error) from error 116 | 117 | if not process.stdin or not process.stdout: 118 | process.kill() 119 | raise SpawnError(command, OSError("Missing stdio pipes")) 120 | 121 | stderr_thread: Thread | None = None 122 | if process.stderr: 123 | 124 | def _drain_stderr(pipe: io.TextIOBase, buffer: list[str]) -> None: 125 | while True: 126 | try: 127 | chunk = pipe.readline() 128 | except ValueError: 129 | break 130 | if chunk == "": 131 | break 132 | buffer.append(chunk) 133 | 134 | stderr_thread = Thread( 135 | target=_drain_stderr, 136 | args=(process.stderr, stderr_buffer), 137 | daemon=True, 138 | ) 139 | stderr_thread.start() 140 | 141 | try: 142 | process.stdin.write(args.input) 143 | process.stdin.close() 144 | 145 | for line in iter(process.stdout.readline, ""): 146 | yield line.rstrip("\n") 147 | 148 | return_code = process.wait() 149 | if stderr_thread is not None: 150 | stderr_thread.join() 151 | 152 | stderr_output = "".join(stderr_buffer) 153 | if return_code != 0: 154 | raise ExecExitError(tuple(command), return_code, stderr_output) 155 | finally: 156 | if process.stdout and not process.stdout.closed: 157 | process.stdout.close() 158 | if process.stderr and not process.stderr.closed: 159 | try: 160 | process.stderr.close() 161 | except ValueError: 162 | pass 163 | if stderr_thread is not None and stderr_thread.is_alive(): 164 | stderr_thread.join(timeout=0.1) 165 | returncode = process.poll() 166 | if returncode is None: 167 | process.kill() 168 | try: 169 | process.wait(timeout=0.5) 170 | except subprocess.TimeoutExpired: 171 | process.wait() 172 | -------------------------------------------------------------------------------- /examples/basic_usage.py: -------------------------------------------------------------------------------- 1 | """Example usage of CodexAgent with the two-turn pattern. 2 | 3 | Demonstrates: 4 | - Single-field signatures (simple) 5 | - Multi-field signatures with Pydantic models 6 | - Multi-turn conversations 7 | - Inspecting execution trace 8 | """ 9 | 10 | from typing import Literal 11 | 12 | from pydantic import BaseModel, Field 13 | 14 | import dspy 15 | from codex import Model, ModelReasoningEffort, SandboxMode 16 | 17 | from codex_dspy import CodexAgent 18 | 19 | 20 | def example_1_simple_string(): 21 | """Example 1: Simple string input/output.""" 22 | print("=" * 60) 23 | print("Example 1: Simple String Signature") 24 | print("=" * 60) 25 | 26 | sig = dspy.Signature("message:str -> answer:str") 27 | 28 | agent = CodexAgent( 29 | sig, 30 | working_directory=".", 31 | model=Model.GPT_5_1_CODEX_MAX, 32 | sandbox_mode=SandboxMode.READ_ONLY, 33 | model_reasoning_effort=ModelReasoningEffort.LOW, 34 | ) 35 | 36 | result = agent(message="What files are in this directory? List the top 5.") 37 | 38 | print(f"\nAnswer: {result.answer}") 39 | print(f"\nThread ID: {agent.thread_id}") 40 | print(f"Usage: {result.usage}") 41 | print(f"Trace items: {len(result.trace)}") 42 | 43 | 44 | def example_2_multi_field_pydantic(): 45 | """Example 2: Multiple fields with Pydantic models.""" 46 | print("\n" + "=" * 60) 47 | print("Example 2: Multi-Field Signature with Pydantic") 48 | print("=" * 60) 49 | 50 | class BugReport(BaseModel): 51 | severity: Literal["low", "medium", "high"] = Field(description="Bug severity") 52 | location: str = Field(description="File and line number") 53 | description: str = Field(description="What the bug does") 54 | 55 | # Multiple inputs AND outputs - pass custom types explicitly 56 | sig = dspy.Signature( 57 | "code: str, context: str -> bugs: list[BugReport], summary: str", 58 | "Analyze code for potential bugs", 59 | custom_types={"BugReport": BugReport}, 60 | ) 61 | 62 | agent = CodexAgent( 63 | sig, 64 | working_directory=".", 65 | model=Model.GPT_5_1_CODEX_MAX, 66 | sandbox_mode=SandboxMode.READ_ONLY, 67 | model_reasoning_effort=ModelReasoningEffort.LOW, 68 | ) 69 | 70 | result = agent( 71 | code=""" 72 | def divide(a, b): 73 | return a / b 74 | 75 | def get_item(items, index): 76 | return items[index] 77 | """, 78 | context="These are utility functions in a production calculator module" 79 | ) 80 | 81 | print(f"\nSummary: {result.summary}") 82 | print(f"\nBugs found ({len(result.bugs)}):") 83 | for bug in result.bugs: 84 | print(f" [{bug.severity}] {bug.location}") 85 | print(f" {bug.description}") 86 | 87 | 88 | def example_3_multi_turn(): 89 | """Example 3: Multi-turn conversation with context.""" 90 | print("\n" + "=" * 60) 91 | print("Example 3: Multi-Turn Conversation") 92 | print("=" * 60) 93 | 94 | sig = dspy.Signature("request: str -> response: str") 95 | 96 | agent = CodexAgent( 97 | sig, 98 | working_directory=".", 99 | model=Model.GPT_5_1_CODEX_MAX, 100 | sandbox_mode=SandboxMode.READ_ONLY, 101 | model_reasoning_effort=ModelReasoningEffort.LOW, 102 | ) 103 | 104 | # Turn 1 105 | result1 = agent(request="What Python files are in this project?") 106 | print(f"\nTurn 1 Response: {result1.response[:200]}...") 107 | 108 | # Turn 2 - has context from Turn 1 109 | result2 = agent(request="Which one has the most lines of code?") 110 | print(f"\nTurn 2 Response: {result2.response[:200]}...") 111 | 112 | # Same thread throughout 113 | print(f"\nThread ID (same for both): {agent.thread_id}") 114 | 115 | 116 | def example_4_complex_analysis(): 117 | """Example 4: Complex multi-output analysis.""" 118 | print("\n" + "=" * 60) 119 | print("Example 4: Complex Analysis with Multiple Outputs") 120 | print("=" * 60) 121 | 122 | class FileInfo(BaseModel): 123 | path: str = Field(description="File path") 124 | purpose: str = Field(description="What this file does") 125 | key_functions: list[str] = Field(description="Important functions/classes") 126 | 127 | class RepoAnalysis(BaseModel): 128 | architecture: str = Field(description="High-level architecture description") 129 | main_files: list[FileInfo] = Field(description="Key files in the project") 130 | tech_stack: list[str] = Field(description="Technologies used") 131 | 132 | sig = dspy.Signature( 133 | "directory: str, focus: str -> analysis: RepoAnalysis, recommendations: str", 134 | "Analyze repository structure and provide recommendations", 135 | custom_types={"RepoAnalysis": RepoAnalysis, "FileInfo": FileInfo}, 136 | ) 137 | 138 | agent = CodexAgent( 139 | sig, 140 | working_directory=".", 141 | model=Model.GPT_5_1_CODEX_MAX, 142 | sandbox_mode=SandboxMode.READ_ONLY, 143 | model_reasoning_effort=ModelReasoningEffort.LOW, 144 | ) 145 | 146 | result = agent( 147 | directory="src/", 148 | focus="Understand the DSPy integration architecture" 149 | ) 150 | 151 | print(f"\nArchitecture: {result.analysis.architecture}") 152 | print(f"\nTech Stack: {result.analysis.tech_stack}") 153 | print(f"\nKey Files ({len(result.analysis.main_files)}):") 154 | for f in result.analysis.main_files[:3]: 155 | print(f" {f.path}: {f.purpose}") 156 | print(f"\nRecommendations: {result.recommendations[:300]}...") 157 | 158 | 159 | def example_5_trace_inspection(): 160 | """Example 5: Inspecting the execution trace.""" 161 | print("\n" + "=" * 60) 162 | print("Example 5: Execution Trace Inspection") 163 | print("=" * 60) 164 | 165 | from codex import CommandExecutionItem, AgentMessageItem 166 | 167 | sig = dspy.Signature("task: str -> result: str") 168 | 169 | agent = CodexAgent( 170 | sig, 171 | working_directory=".", 172 | model=Model.GPT_5_1_CODEX_MAX, 173 | sandbox_mode=SandboxMode.READ_ONLY, 174 | model_reasoning_effort=ModelReasoningEffort.LOW, 175 | ) 176 | 177 | result = agent(task="Count the number of Python files in this project") 178 | 179 | print(f"\nResult: {result.result}") 180 | print(f"\nExecution Trace ({len(result.trace)} items):") 181 | 182 | for item in result.trace: 183 | if isinstance(item, CommandExecutionItem): 184 | print(f" [CMD] {item.command}") 185 | print(f" Exit: {item.exit_code}") 186 | elif isinstance(item, AgentMessageItem): 187 | preview = item.text[:100] + "..." if len(item.text) > 100 else item.text 188 | print(f" [MSG] {preview}") 189 | else: 190 | print(f" [{item.type}] {item.id}") 191 | 192 | 193 | if __name__ == "__main__": 194 | import sys 195 | 196 | examples = { 197 | "1": ("Simple string", example_1_simple_string), 198 | "2": ("Multi-field Pydantic", example_2_multi_field_pydantic), 199 | "3": ("Multi-turn conversation", example_3_multi_turn), 200 | "4": ("Complex analysis", example_4_complex_analysis), 201 | "5": ("Trace inspection", example_5_trace_inspection), 202 | } 203 | 204 | if len(sys.argv) > 1: 205 | choice = sys.argv[1] 206 | if choice in examples: 207 | examples[choice][1]() 208 | else: 209 | print(f"Unknown example: {choice}") 210 | print(f"Available: {list(examples.keys())}") 211 | else: 212 | print("CodexAgent Examples") 213 | print("=" * 60) 214 | print("\nRun a specific example:") 215 | for key, (name, _) in examples.items(): 216 | print(f" python examples/basic_usage.py {key} # {name}") 217 | print("\nOr run all:") 218 | print(" python examples/basic_usage.py all") 219 | 220 | if len(sys.argv) > 1 and sys.argv[1] == "all": 221 | for _, func in examples.values(): 222 | func() 223 | print("\n" + "=" * 60) 224 | print("All examples completed!") 225 | print("=" * 60) 226 | -------------------------------------------------------------------------------- /src/codex/items.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from collections.abc import Iterable, Sequence 4 | from dataclasses import dataclass, field 5 | from enum import StrEnum 6 | from typing import Literal, cast 7 | 8 | from .exceptions import CodexError 9 | from .types import JsonObject, JsonValue 10 | 11 | 12 | class CommandExecutionStatus(StrEnum): 13 | """Status of a command execution.""" 14 | 15 | IN_PROGRESS = "in_progress" 16 | COMPLETED = "completed" 17 | FAILED = "failed" 18 | DECLINED = "declined" 19 | 20 | 21 | class PatchChangeKind(StrEnum): 22 | """Type of file change in a patch.""" 23 | 24 | ADD = "add" 25 | DELETE = "delete" 26 | UPDATE = "update" 27 | 28 | 29 | class PatchApplyStatus(StrEnum): 30 | """Status of a patch application.""" 31 | 32 | IN_PROGRESS = "in_progress" 33 | COMPLETED = "completed" 34 | FAILED = "failed" 35 | 36 | 37 | class McpToolCallStatus(StrEnum): 38 | """Status of an MCP tool call.""" 39 | 40 | IN_PROGRESS = "in_progress" 41 | COMPLETED = "completed" 42 | FAILED = "failed" 43 | 44 | 45 | @dataclass(frozen=True, slots=True) 46 | class McpContentBlock: 47 | """A content block from an MCP tool result.""" 48 | 49 | type: str 50 | data: JsonObject # The raw content block data (always a JSON object) 51 | 52 | 53 | @dataclass(frozen=True, slots=True) 54 | class McpToolCallResult: 55 | """Result payload returned by the MCP server for successful calls.""" 56 | 57 | content: Sequence[McpContentBlock] 58 | structured_content: JsonValue | None = None 59 | 60 | 61 | @dataclass(frozen=True, slots=True) 62 | class McpToolCallError: 63 | """Error reported for failed MCP tool calls.""" 64 | 65 | message: str 66 | 67 | 68 | @dataclass(frozen=True, slots=True) 69 | class CommandExecutionItem: 70 | """A command executed by the agent.""" 71 | 72 | type: Literal["command_execution"] = field(default="command_execution", init=False) 73 | id: str 74 | command: str 75 | aggregated_output: str 76 | status: CommandExecutionStatus 77 | exit_code: int | None = None 78 | 79 | 80 | @dataclass(frozen=True, slots=True) 81 | class FileUpdateChange: 82 | """A single file change within a patch.""" 83 | 84 | path: str 85 | kind: PatchChangeKind 86 | 87 | 88 | @dataclass(frozen=True, slots=True) 89 | class FileChangeItem: 90 | """A set of file changes by the agent.""" 91 | 92 | type: Literal["file_change"] = field(default="file_change", init=False) 93 | id: str 94 | changes: Sequence[FileUpdateChange] 95 | status: PatchApplyStatus 96 | 97 | 98 | @dataclass(frozen=True, slots=True) 99 | class McpToolCallItem: 100 | """A call to an MCP tool.""" 101 | 102 | type: Literal["mcp_tool_call"] = field(default="mcp_tool_call", init=False) 103 | id: str 104 | server: str 105 | tool: str 106 | arguments: JsonValue # Can be any JSON-serializable value 107 | status: McpToolCallStatus 108 | result: McpToolCallResult | None = None 109 | error: McpToolCallError | None = None 110 | 111 | 112 | @dataclass(frozen=True, slots=True) 113 | class AgentMessageItem: 114 | """Response from the agent (natural language or JSON for structured output).""" 115 | 116 | type: Literal["agent_message"] = field(default="agent_message", init=False) 117 | id: str 118 | text: str 119 | 120 | 121 | @dataclass(frozen=True, slots=True) 122 | class ReasoningItem: 123 | """Agent's reasoning summary.""" 124 | 125 | type: Literal["reasoning"] = field(default="reasoning", init=False) 126 | id: str 127 | text: str 128 | 129 | 130 | @dataclass(frozen=True, slots=True) 131 | class WebSearchItem: 132 | """A web search request.""" 133 | 134 | type: Literal["web_search"] = field(default="web_search", init=False) 135 | id: str 136 | query: str 137 | 138 | 139 | @dataclass(frozen=True, slots=True) 140 | class ErrorItem: 141 | """A non-fatal error surfaced as an item.""" 142 | 143 | type: Literal["error"] = field(default="error", init=False) 144 | id: str 145 | message: str 146 | 147 | 148 | @dataclass(frozen=True, slots=True) 149 | class TodoItem: 150 | """An item in the agent's to-do list.""" 151 | 152 | text: str 153 | completed: bool 154 | 155 | 156 | @dataclass(frozen=True, slots=True) 157 | class TodoListItem: 158 | """The agent's running to-do list.""" 159 | 160 | type: Literal["todo_list"] = field(default="todo_list", init=False) 161 | id: str 162 | items: Sequence[TodoItem] 163 | 164 | 165 | ThreadItem = ( 166 | AgentMessageItem 167 | | ReasoningItem 168 | | CommandExecutionItem 169 | | FileChangeItem 170 | | McpToolCallItem 171 | | WebSearchItem 172 | | TodoListItem 173 | | ErrorItem 174 | ) 175 | 176 | 177 | def _ensure_str(value: JsonValue, field_name: str) -> str: 178 | if isinstance(value, str): 179 | return value 180 | raise CodexError(f"Expected string for {field_name}") 181 | 182 | 183 | def _ensure_sequence(value: JsonValue, field_name: str) -> Sequence[JsonValue]: 184 | if isinstance(value, Sequence) and not isinstance(value, (str, bytes)): 185 | return cast("Sequence[JsonValue]", value) 186 | raise CodexError(f"Expected sequence for {field_name}") 187 | 188 | 189 | def _parse_changes(values: Iterable[JsonValue]) -> list[FileUpdateChange]: 190 | changes: list[FileUpdateChange] = [] 191 | for value in values: 192 | if not isinstance(value, dict): 193 | raise CodexError("Invalid file change entry") 194 | path = _ensure_str(value.get("path"), "path") 195 | kind = _ensure_str(value.get("kind"), "kind") 196 | try: 197 | enum_kind = PatchChangeKind(kind) 198 | except ValueError as exc: 199 | raise CodexError(f"Unsupported file change kind: {kind}") from exc 200 | changes.append(FileUpdateChange(path=path, kind=enum_kind)) 201 | return changes 202 | 203 | 204 | def _parse_todos(values: Iterable[JsonValue]) -> list[TodoItem]: 205 | todos: list[TodoItem] = [] 206 | for value in values: 207 | if not isinstance(value, dict): 208 | raise CodexError("Invalid todo entry") 209 | text = _ensure_str(value.get("text"), "text") 210 | completed = bool(value.get("completed", False)) 211 | todos.append(TodoItem(text=text, completed=completed)) 212 | return todos 213 | 214 | 215 | def parse_thread_item(payload: JsonObject) -> ThreadItem: 216 | """Parse a JSON object into a ThreadItem.""" 217 | type_name = _ensure_str(payload.get("type"), "type") 218 | item_id = _ensure_str(payload.get("id"), "id") 219 | 220 | if type_name == "agent_message": 221 | text = _ensure_str(payload.get("text"), "text") 222 | return AgentMessageItem(id=item_id, text=text) 223 | 224 | if type_name == "reasoning": 225 | text = _ensure_str(payload.get("text"), "text") 226 | return ReasoningItem(id=item_id, text=text) 227 | 228 | if type_name == "command_execution": 229 | command = _ensure_str(payload.get("command"), "command") 230 | aggregated_output = _ensure_str(payload.get("aggregated_output"), "aggregated_output") 231 | status_str = _ensure_str(payload.get("status"), "status") 232 | try: 233 | status = CommandExecutionStatus(status_str) 234 | except ValueError as exc: 235 | raise CodexError(f"Unsupported command execution status: {status_str}") from exc 236 | exit_code = payload.get("exit_code") 237 | exit_value = int(exit_code) if isinstance(exit_code, int) else None 238 | return CommandExecutionItem( 239 | id=item_id, 240 | command=command, 241 | aggregated_output=aggregated_output, 242 | status=status, 243 | exit_code=exit_value, 244 | ) 245 | 246 | if type_name == "file_change": 247 | changes_raw = _ensure_sequence(payload.get("changes"), "changes") 248 | status_str = _ensure_str(payload.get("status"), "status") 249 | try: 250 | change_status = PatchApplyStatus(status_str) 251 | except ValueError as exc: 252 | raise CodexError(f"Unsupported file change status: {status_str}") from exc 253 | changes = _parse_changes(changes_raw) 254 | return FileChangeItem(id=item_id, changes=changes, status=change_status) 255 | 256 | if type_name == "mcp_tool_call": 257 | server = _ensure_str(payload.get("server"), "server") 258 | tool = _ensure_str(payload.get("tool"), "tool") 259 | arguments = payload.get("arguments") # Can be any JSON value 260 | status_str = _ensure_str(payload.get("status"), "status") 261 | try: 262 | call_status = McpToolCallStatus(status_str) 263 | except ValueError as exc: 264 | raise CodexError(f"Unsupported MCP tool call status: {status_str}") from exc 265 | 266 | # Parse optional result 267 | result: McpToolCallResult | None = None 268 | result_payload = payload.get("result") 269 | if result_payload is not None and isinstance(result_payload, dict): 270 | content_raw = result_payload.get("content", []) 271 | content_blocks: list[McpContentBlock] = [] 272 | if isinstance(content_raw, list): 273 | for block in content_raw: 274 | if isinstance(block, dict): 275 | block_type = block.get("type", "unknown") 276 | content_blocks.append( 277 | McpContentBlock( 278 | type=str(block_type), 279 | data=block, 280 | ) 281 | ) 282 | structured = result_payload.get("structured_content") 283 | result = McpToolCallResult(content=content_blocks, structured_content=structured) 284 | 285 | # Parse optional error 286 | error: McpToolCallError | None = None 287 | error_payload = payload.get("error") 288 | if error_payload is not None and isinstance(error_payload, dict): 289 | error_message = error_payload.get("message", "") 290 | error = McpToolCallError(message=str(error_message)) 291 | 292 | return McpToolCallItem( 293 | id=item_id, 294 | server=server, 295 | tool=tool, 296 | arguments=arguments, 297 | status=call_status, 298 | result=result, 299 | error=error, 300 | ) 301 | 302 | if type_name == "web_search": 303 | query = _ensure_str(payload.get("query"), "query") 304 | return WebSearchItem(id=item_id, query=query) 305 | 306 | if type_name == "error": 307 | message = _ensure_str(payload.get("message"), "message") 308 | return ErrorItem(id=item_id, message=message) 309 | 310 | if type_name == "todo_list": 311 | todos_raw = _ensure_sequence(payload.get("items"), "items") 312 | todos = _parse_todos(todos_raw) 313 | return TodoListItem(id=item_id, items=todos) 314 | 315 | raise CodexError(f"Unsupported item type: {type_name}") 316 | -------------------------------------------------------------------------------- /docs/CODEX_QUICK_REFERENCE.md: -------------------------------------------------------------------------------- 1 | # Codex Python SDK - Quick Reference Guide 2 | 3 | ## Installation & Basic Setup 4 | 5 | ```python 6 | from codex import Codex, CodexOptions, ThreadOptions, TurnOptions 7 | 8 | # Initialize client 9 | client = Codex() 10 | 11 | # Or with config 12 | client = Codex(CodexOptions( 13 | base_url="https://api.openai.com/v1", 14 | api_key="sk-..." 15 | )) 16 | ``` 17 | 18 | ## Core Patterns 19 | 20 | ### Pattern 1: Simple Prompt-Response 21 | 22 | ```python 23 | thread = client.start_thread() 24 | result = thread.run("Summarize this repository") 25 | print(result.final_response) 26 | ``` 27 | 28 | ### Pattern 2: Streaming Events 29 | 30 | ```python 31 | from codex import ItemCompletedEvent, TurnCompletedEvent 32 | 33 | stream = thread.run_streamed("Fix the failing test") 34 | for event in stream: 35 | if isinstance(event, ItemCompletedEvent): 36 | print(f"Item: {event.item.type}") 37 | if isinstance(event, TurnCompletedEvent): 38 | print(f"Tokens: {event.usage.input_tokens}") 39 | ``` 40 | 41 | ### Pattern 3: Structured Output 42 | 43 | ```python 44 | from pydantic import BaseModel 45 | 46 | class BugReport(BaseModel): 47 | title: str 48 | severity: str 49 | steps: list[str] 50 | 51 | result = thread.run( 52 | "Analyze the bug", 53 | TurnOptions(output_schema=BugReport) 54 | ) 55 | ``` 56 | 57 | ### Pattern 4: Multi-turn Conversation 58 | 59 | ```python 60 | thread = client.start_thread() 61 | 62 | # Turn 1 63 | resp1 = thread.run("What's wrong?") 64 | 65 | # Turn 2 - context preserved 66 | resp2 = thread.run("How do we fix it?") 67 | 68 | # Resume later 69 | resumed = client.resume_thread(thread.id) 70 | resp3 = resumed.run("Write tests") 71 | ``` 72 | 73 | ### Pattern 5: Configured Execution 74 | 75 | ```python 76 | from codex import SandboxMode 77 | 78 | thread = client.start_thread(ThreadOptions( 79 | model="gpt-5.1-codex-max", 80 | sandbox_mode=SandboxMode.WORKSPACE_WRITE, 81 | working_directory="/path/to/repo" 82 | )) 83 | 84 | result = thread.run("Implement the fix") 85 | ``` 86 | 87 | ## Response Objects 88 | 89 | ### ThreadRunResult (from `thread.run()`) 90 | ```python 91 | result.final_response: str # Final message from agent 92 | result.items: list[ThreadItem] # All items (commands, files, etc.) 93 | result.usage: Usage # Token counts 94 | result.usage.input_tokens 95 | result.usage.cached_input_tokens 96 | result.usage.output_tokens 97 | ``` 98 | 99 | ### Thread Items (types in result.items) 100 | 101 | | Type | Fields | Meaning | 102 | |------|--------|---------| 103 | | `AgentMessageItem` | `id, text` | Agent response message | 104 | | `ReasoningItem` | `id, text` | Agent's reasoning/thinking | 105 | | `CommandExecutionItem` | `id, command, aggregated_output, status, exit_code` | Command run | 106 | | `FileChangeItem` | `id, changes, status` | File modifications | 107 | | `McpToolCallItem` | `id, server, tool, status` | MCP tool invocation | 108 | | `WebSearchItem` | `id, query` | Web search performed | 109 | | `TodoListItem` | `id, items` | Task list created | 110 | | `ErrorItem` | `id, message` | Error occurred | 111 | 112 | ### Events (from `thread.run_streamed()`) 113 | 114 | ```python 115 | # Union of: 116 | ThreadStartedEvent # type, thread_id 117 | TurnStartedEvent # type 118 | TurnCompletedEvent # type, usage 119 | TurnFailedEvent # type, error 120 | ItemStartedEvent # type, item 121 | ItemUpdatedEvent # type, item 122 | ItemCompletedEvent # type, item 123 | ThreadErrorEvent # type, message 124 | ``` 125 | 126 | ## Configuration Parameters 127 | 128 | ### Codex Client 129 | ```python 130 | CodexOptions( 131 | codex_path_override=None, # Override binary location 132 | base_url=None, # API endpoint 133 | api_key=None, # Auth key 134 | ) 135 | ``` 136 | 137 | ### Thread 138 | ```python 139 | ThreadOptions( 140 | model=None, # default: "gpt-5.1-codex-max" 141 | sandbox_mode=None, # READ_ONLY, WORKSPACE_WRITE, DANGER_FULL_ACCESS 142 | working_directory=None, # Where to run commands 143 | skip_git_repo_check=False, # Allow non-git directories 144 | approval_mode=None, # ApprovalMode - When to ask for user approval (never, on-request, on-failure, untrusted) 145 | ) 146 | ``` 147 | 148 | ### Turn 149 | ```python 150 | TurnOptions( 151 | output_schema=None, # Dict or Pydantic model for output schema 152 | ) 153 | ``` 154 | 155 | ## Error Handling 156 | 157 | ```python 158 | from codex import ThreadRunError, SchemaValidationError, CodexError 159 | 160 | try: 161 | result = thread.run(prompt, TurnOptions(output_schema=schema)) 162 | except ThreadRunError as e: 163 | print(f"Turn failed: {e}") 164 | except SchemaValidationError as e: 165 | print(f"Invalid schema: {e}") 166 | except CodexError as e: 167 | print(f"SDK error: {e}") 168 | ``` 169 | 170 | ## Common Tasks 171 | 172 | ### Check if Thread ID Available 173 | ```python 174 | if thread.id is not None: 175 | print(f"Thread ID: {thread.id}") 176 | ``` 177 | 178 | ### Pass JSON Schema for Output 179 | ```python 180 | schema = { 181 | "type": "object", 182 | "properties": { 183 | "name": {"type": "string"}, 184 | "age": {"type": "integer"}, 185 | }, 186 | "required": ["name"], 187 | } 188 | result = thread.run(prompt, TurnOptions(output_schema=schema)) 189 | ``` 190 | 191 | ### Filter Items by Type 192 | ```python 193 | from codex import CommandExecutionItem 194 | 195 | commands = [ 196 | item for item in result.items 197 | if isinstance(item, CommandExecutionItem) 198 | ] 199 | for cmd in commands: 200 | print(f"Command: {cmd.command}") 201 | print(f"Exit code: {cmd.exit_code}") 202 | ``` 203 | 204 | ### Handle Item Updates in Streaming 205 | ```python 206 | from codex import ItemUpdatedEvent, ItemCompletedEvent 207 | 208 | for event in thread.run_streamed(prompt): 209 | if isinstance(event, ItemUpdatedEvent): 210 | print(f"Item {event.item.id} updating...") 211 | if isinstance(event, ItemCompletedEvent): 212 | print(f"Item {event.item.id} done") 213 | ``` 214 | 215 | ### Inspect MCP Tool Calls 216 | ```python 217 | from codex import McpToolCallItem, ItemCompletedEvent 218 | 219 | for event in thread.run_streamed(prompt): 220 | if isinstance(event, ItemCompletedEvent): 221 | if isinstance(event.item, McpToolCallItem): 222 | tool = event.item 223 | print(f"Tool: {tool.server}.{tool.tool}") 224 | print(f"Status: {tool.status}") 225 | ``` 226 | 227 | ## 6. DSPy Integration Pattern 228 | 229 | ### Basic Pattern 230 | ```python 231 | import dspy 232 | from codex_dspy import CodexAgent 233 | from codex import SandboxMode 234 | 235 | # Define signature 236 | sig = dspy.Signature('message:str -> answer:str') 237 | 238 | # Create agent (starts thread) 239 | agent = CodexAgent( 240 | sig, 241 | working_directory='.', 242 | sandbox_mode=SandboxMode.READ_ONLY 243 | ) 244 | 245 | # Execute (returns Prediction) 246 | result = agent(message='Your task') 247 | print(result.answer) # Typed output field 248 | print(result.trace) # List[ThreadItem] 249 | print(result.usage) # Token usage 250 | ``` 251 | 252 | ### Pydantic Output Pattern 253 | ```python 254 | from pydantic import BaseModel, Field 255 | 256 | class Analysis(BaseModel): 257 | summary: str 258 | key_points: list[str] 259 | 260 | sig = dspy.Signature('message:str -> analysis:Analysis') 261 | agent = CodexAgent(sig, working_directory='.') 262 | result = agent(message='Analyze this project') 263 | print(result.analysis.summary) # Typed Pydantic access 264 | ``` 265 | 266 | ### Key Points 267 | - One agent instance = one stateful thread 268 | - Multiple forward() calls continue the same conversation 269 | - String outputs: no schema, freeform response 270 | - Pydantic outputs: structured JSON validation 271 | - Access thread_id via `agent.thread_id` 272 | 273 | ## Type Hints 274 | 275 | All exports support full type hints: 276 | 277 | ```python 278 | from codex import ( 279 | Codex, 280 | CodexOptions, 281 | ThreadOptions, 282 | TurnOptions, 283 | SandboxMode, 284 | Thread, 285 | ThreadRunResult, 286 | ThreadStream, 287 | ThreadEvent, 288 | ThreadItem, 289 | AgentMessageItem, 290 | CommandExecutionItem, 291 | FileChangeItem, 292 | McpToolCallItem, 293 | Usage, 294 | CodexError, 295 | ) 296 | ``` 297 | 298 | ## Stateless vs Stateful 299 | 300 | **Stateful (Thread):** 301 | - Single `Thread` object across multiple `run()` calls 302 | - History automatically maintained 303 | - `thread.id` persists across sessions 304 | 305 | **Stateless (Individual Runs):** 306 | - Each `run()` call is independent 307 | - Configure turn-by-turn with `TurnOptions` 308 | - No state carried between `TurnOptions` 309 | 310 | ## Environment Variables 311 | 312 | ```bash 313 | CODEX_API_KEY=sk-... # API key (alternative to CodexOptions.api_key) 314 | OPENAI_BASE_URL=https://... # API URL (alternative to CodexOptions.base_url) 315 | ``` 316 | 317 | ## Performance Notes 318 | 319 | 1. **Streaming vs Sync**: Use `run_streamed()` to get real-time feedback, `run()` for final result 320 | 2. **Token Caching**: Check `usage.cached_input_tokens` in response 321 | 3. **Schema Validation**: Pydantic models are auto-converted to JSON Schema 322 | 4. **Working Directory**: Best to use git repos; set `skip_git_repo_check=True` for others 323 | 5. **Sandbox Mode**: `READ_ONLY` is most restrictive, `DANGER_FULL_ACCESS` least 324 | 325 | ## Files & Paths 326 | 327 | ```python 328 | # File changes from item 329 | for change in file_item.changes: 330 | print(change.path) # File path 331 | print(change.kind) # "add", "update", "delete" 332 | ``` 333 | 334 | ## Boolean Statuses 335 | 336 | ```python 337 | # Command execution 338 | CommandExecutionStatus.IN_PROGRESS, COMPLETED, FAILED 339 | 340 | # File patches 341 | PatchApplyStatus.COMPLETED, FAILED 342 | 343 | # MCP tool calls 344 | McpToolCallStatus.IN_PROGRESS, COMPLETED, FAILED 345 | ``` 346 | 347 | ## Full Example: Production-Ready Pattern 348 | 349 | ```python 350 | from codex import ( 351 | Codex, CodexOptions, ThreadOptions, TurnOptions, 352 | SandboxMode, ItemCompletedEvent, CommandExecutionItem, 353 | TurnCompletedEvent, ThreadRunError 354 | ) 355 | from pydantic import BaseModel 356 | 357 | class AnalysisResult(BaseModel): 358 | issues: list[str] 359 | summary: str 360 | severity: str 361 | 362 | # Setup 363 | client = Codex(CodexOptions(api_key="sk-...")) 364 | thread = client.start_thread(ThreadOptions( 365 | model="gpt-5.1-codex-max", 366 | sandbox_mode=SandboxMode.WORKSPACE_WRITE, 367 | working_directory="/path/to/repo", 368 | )) 369 | 370 | try: 371 | # Get structured output 372 | result = thread.run( 373 | "Analyze code issues", 374 | TurnOptions(output_schema=AnalysisResult) 375 | ) 376 | 377 | # Extract data 378 | analysis = result.final_response 379 | print(f"Issues found: {len(analysis.issues)}") 380 | print(f"Severity: {analysis.severity}") 381 | 382 | # Process commands 383 | commands = [ 384 | i for i in result.items 385 | if isinstance(i, CommandExecutionItem) 386 | ] 387 | for cmd in commands: 388 | print(f"Ran: {cmd.command}") 389 | 390 | except ThreadRunError as e: 391 | print(f"Analysis failed: {e}") 392 | ``` 393 | 394 | -------------------------------------------------------------------------------- /src/tests/unit/test_codex_parsing.py: -------------------------------------------------------------------------------- 1 | """Unit tests for codex module parsing functions. 2 | 3 | Tests JSON → typed dataclass conversions for thread items and events. 4 | """ 5 | 6 | import pytest 7 | 8 | from codex import ( 9 | AgentMessageItem, 10 | CommandExecutionItem, 11 | CommandExecutionStatus, 12 | ErrorItem, 13 | FileChangeItem, 14 | FileUpdateChange, 15 | McpToolCallItem, 16 | McpToolCallStatus, 17 | PatchApplyStatus, 18 | PatchChangeKind, 19 | ReasoningItem, 20 | TodoItem, 21 | TodoListItem, 22 | WebSearchItem, 23 | ) 24 | from codex.events import ( 25 | ItemCompletedEvent, 26 | ItemStartedEvent, 27 | ItemUpdatedEvent, 28 | ThreadErrorEvent, 29 | ThreadStartedEvent, 30 | TurnCompletedEvent, 31 | TurnFailedEvent, 32 | TurnStartedEvent, 33 | Usage, 34 | parse_thread_event, 35 | ) 36 | from codex.exceptions import CodexError 37 | from codex.items import parse_thread_item 38 | 39 | 40 | class TestParseThreadItem: 41 | """Tests for parse_thread_item function.""" 42 | 43 | def test_agent_message(self): 44 | """AgentMessageItem should parse correctly.""" 45 | payload = {"type": "agent_message", "id": "msg_1", "text": "Hello world"} 46 | item = parse_thread_item(payload) 47 | 48 | assert isinstance(item, AgentMessageItem) 49 | assert item.id == "msg_1" 50 | assert item.text == "Hello world" 51 | assert item.type == "agent_message" 52 | 53 | def test_reasoning(self): 54 | """ReasoningItem should parse correctly.""" 55 | payload = {"type": "reasoning", "id": "reason_1", "text": "Thinking..."} 56 | item = parse_thread_item(payload) 57 | 58 | assert isinstance(item, ReasoningItem) 59 | assert item.id == "reason_1" 60 | assert item.text == "Thinking..." 61 | 62 | def test_command_execution(self): 63 | """CommandExecutionItem should parse with all fields.""" 64 | payload = { 65 | "type": "command_execution", 66 | "id": "cmd_1", 67 | "command": "ls -la", 68 | "aggregated_output": "file1.txt\nfile2.txt", 69 | "status": "completed", 70 | "exit_code": 0, 71 | } 72 | item = parse_thread_item(payload) 73 | 74 | assert isinstance(item, CommandExecutionItem) 75 | assert item.id == "cmd_1" 76 | assert item.command == "ls -la" 77 | assert item.aggregated_output == "file1.txt\nfile2.txt" 78 | assert item.status == CommandExecutionStatus.COMPLETED 79 | assert item.exit_code == 0 80 | 81 | def test_command_execution_failed(self): 82 | """CommandExecutionItem with failed status.""" 83 | payload = { 84 | "type": "command_execution", 85 | "id": "cmd_2", 86 | "command": "false", 87 | "aggregated_output": "", 88 | "status": "failed", 89 | "exit_code": 1, 90 | } 91 | item = parse_thread_item(payload) 92 | 93 | assert item.status == CommandExecutionStatus.FAILED 94 | assert item.exit_code == 1 95 | 96 | def test_command_execution_declined(self): 97 | """CommandExecutionItem with declined status.""" 98 | payload = { 99 | "type": "command_execution", 100 | "id": "cmd_3", 101 | "command": "rm -rf /", 102 | "aggregated_output": "", 103 | "status": "declined", 104 | } 105 | item = parse_thread_item(payload) 106 | 107 | assert item.status == CommandExecutionStatus.DECLINED 108 | assert item.exit_code is None 109 | 110 | def test_file_change(self): 111 | """FileChangeItem should parse with changes list.""" 112 | payload = { 113 | "type": "file_change", 114 | "id": "fc_1", 115 | "changes": [ 116 | {"path": "src/main.py", "kind": "update"}, 117 | {"path": "src/new.py", "kind": "add"}, 118 | {"path": "src/old.py", "kind": "delete"}, 119 | ], 120 | "status": "completed", 121 | } 122 | item = parse_thread_item(payload) 123 | 124 | assert isinstance(item, FileChangeItem) 125 | assert len(item.changes) == 3 126 | assert isinstance(item.changes[0], FileUpdateChange) 127 | assert item.changes[0].path == "src/main.py" 128 | assert item.changes[0].kind == PatchChangeKind.UPDATE 129 | assert item.changes[1].kind == PatchChangeKind.ADD 130 | assert item.changes[2].kind == PatchChangeKind.DELETE 131 | assert item.status == PatchApplyStatus.COMPLETED 132 | 133 | def test_mcp_tool_call_in_progress(self): 134 | """McpToolCallItem in progress state.""" 135 | payload = { 136 | "type": "mcp_tool_call", 137 | "id": "mcp_1", 138 | "server": "my-server", 139 | "tool": "search", 140 | "arguments": {"query": "test"}, 141 | "status": "in_progress", 142 | } 143 | item = parse_thread_item(payload) 144 | 145 | assert isinstance(item, McpToolCallItem) 146 | assert item.server == "my-server" 147 | assert item.tool == "search" 148 | assert item.arguments == {"query": "test"} 149 | assert item.status == McpToolCallStatus.IN_PROGRESS 150 | assert item.result is None 151 | assert item.error is None 152 | 153 | def test_mcp_tool_call_completed_with_result(self): 154 | """McpToolCallItem completed with result.""" 155 | payload = { 156 | "type": "mcp_tool_call", 157 | "id": "mcp_2", 158 | "server": "db-server", 159 | "tool": "query", 160 | "arguments": "SELECT * FROM users", 161 | "status": "completed", 162 | "result": { 163 | "content": [{"type": "text", "text": "Found 5 rows"}], 164 | "structured_content": {"rows": 5}, 165 | }, 166 | } 167 | item = parse_thread_item(payload) 168 | 169 | assert item.status == McpToolCallStatus.COMPLETED 170 | assert item.result is not None 171 | assert len(item.result.content) == 1 172 | assert item.result.content[0].type == "text" 173 | assert item.result.structured_content == {"rows": 5} 174 | 175 | def test_mcp_tool_call_failed_with_error(self): 176 | """McpToolCallItem failed with error.""" 177 | payload = { 178 | "type": "mcp_tool_call", 179 | "id": "mcp_3", 180 | "server": "api-server", 181 | "tool": "fetch", 182 | "arguments": None, 183 | "status": "failed", 184 | "error": {"message": "Connection refused"}, 185 | } 186 | item = parse_thread_item(payload) 187 | 188 | assert item.status == McpToolCallStatus.FAILED 189 | assert item.error is not None 190 | assert item.error.message == "Connection refused" 191 | 192 | def test_web_search(self): 193 | """WebSearchItem should parse correctly.""" 194 | payload = {"type": "web_search", "id": "ws_1", "query": "python async"} 195 | item = parse_thread_item(payload) 196 | 197 | assert isinstance(item, WebSearchItem) 198 | assert item.query == "python async" 199 | 200 | def test_error_item(self): 201 | """ErrorItem should parse correctly.""" 202 | payload = {"type": "error", "id": "err_1", "message": "Something went wrong"} 203 | item = parse_thread_item(payload) 204 | 205 | assert isinstance(item, ErrorItem) 206 | assert item.message == "Something went wrong" 207 | 208 | def test_todo_list(self): 209 | """TodoListItem should parse with items.""" 210 | payload = { 211 | "type": "todo_list", 212 | "id": "todo_1", 213 | "items": [ 214 | {"text": "Fix bug", "completed": False}, 215 | {"text": "Write tests", "completed": True}, 216 | ], 217 | } 218 | item = parse_thread_item(payload) 219 | 220 | assert isinstance(item, TodoListItem) 221 | assert len(item.items) == 2 222 | assert isinstance(item.items[0], TodoItem) 223 | assert item.items[0].text == "Fix bug" 224 | assert item.items[0].completed is False 225 | assert item.items[1].completed is True 226 | 227 | def test_unsupported_type_raises(self): 228 | """Unknown type should raise CodexError.""" 229 | payload = {"type": "unknown_type", "id": "x"} 230 | 231 | with pytest.raises(CodexError, match="Unsupported item type"): 232 | parse_thread_item(payload) 233 | 234 | def test_missing_type_raises(self): 235 | """Missing type field should raise CodexError.""" 236 | payload = {"id": "x", "text": "hello"} 237 | 238 | with pytest.raises(CodexError, match="Expected string for type"): 239 | parse_thread_item(payload) 240 | 241 | def test_missing_id_raises(self): 242 | """Missing id field should raise CodexError.""" 243 | payload = {"type": "agent_message", "text": "hello"} 244 | 245 | with pytest.raises(CodexError, match="Expected string for id"): 246 | parse_thread_item(payload) 247 | 248 | 249 | class TestParseThreadEvent: 250 | """Tests for parse_thread_event function.""" 251 | 252 | def test_thread_started(self): 253 | """ThreadStartedEvent should parse correctly.""" 254 | payload = {"type": "thread.started", "thread_id": "thread_123"} 255 | event = parse_thread_event(payload) 256 | 257 | assert isinstance(event, ThreadStartedEvent) 258 | assert event.thread_id == "thread_123" 259 | assert event.type == "thread.started" 260 | 261 | def test_turn_started(self): 262 | """TurnStartedEvent should parse correctly.""" 263 | payload = {"type": "turn.started"} 264 | event = parse_thread_event(payload) 265 | 266 | assert isinstance(event, TurnStartedEvent) 267 | assert event.type == "turn.started" 268 | 269 | def test_turn_completed(self): 270 | """TurnCompletedEvent should parse with usage.""" 271 | payload = { 272 | "type": "turn.completed", 273 | "usage": { 274 | "input_tokens": 100, 275 | "output_tokens": 50, 276 | "cached_input_tokens": 20, 277 | }, 278 | } 279 | event = parse_thread_event(payload) 280 | 281 | assert isinstance(event, TurnCompletedEvent) 282 | assert isinstance(event.usage, Usage) 283 | assert event.usage.input_tokens == 100 284 | assert event.usage.output_tokens == 50 285 | assert event.usage.cached_input_tokens == 20 286 | 287 | def test_turn_failed(self): 288 | """TurnFailedEvent should parse with error message.""" 289 | payload = { 290 | "type": "turn.failed", 291 | "error": {"message": "Rate limit exceeded"}, 292 | } 293 | event = parse_thread_event(payload) 294 | 295 | assert isinstance(event, TurnFailedEvent) 296 | assert event.error.message == "Rate limit exceeded" 297 | 298 | def test_item_started(self): 299 | """ItemStartedEvent should parse with nested item.""" 300 | payload = { 301 | "type": "item.started", 302 | "item": {"type": "agent_message", "id": "msg_1", "text": "Starting..."}, 303 | } 304 | event = parse_thread_event(payload) 305 | 306 | assert isinstance(event, ItemStartedEvent) 307 | assert isinstance(event.item, AgentMessageItem) 308 | assert event.item.text == "Starting..." 309 | 310 | def test_item_updated(self): 311 | """ItemUpdatedEvent should parse with nested item.""" 312 | payload = { 313 | "type": "item.updated", 314 | "item": { 315 | "type": "command_execution", 316 | "id": "cmd_1", 317 | "command": "ls", 318 | "aggregated_output": "file1.txt", 319 | "status": "in_progress", 320 | }, 321 | } 322 | event = parse_thread_event(payload) 323 | 324 | assert isinstance(event, ItemUpdatedEvent) 325 | assert isinstance(event.item, CommandExecutionItem) 326 | assert event.item.status == CommandExecutionStatus.IN_PROGRESS 327 | 328 | def test_item_completed(self): 329 | """ItemCompletedEvent should parse with nested item.""" 330 | payload = { 331 | "type": "item.completed", 332 | "item": {"type": "reasoning", "id": "r_1", "text": "Done thinking"}, 333 | } 334 | event = parse_thread_event(payload) 335 | 336 | assert isinstance(event, ItemCompletedEvent) 337 | assert isinstance(event.item, ReasoningItem) 338 | 339 | def test_error_event(self): 340 | """ThreadErrorEvent should parse correctly.""" 341 | payload = {"type": "error", "message": "Connection lost"} 342 | event = parse_thread_event(payload) 343 | 344 | assert isinstance(event, ThreadErrorEvent) 345 | assert event.message == "Connection lost" 346 | 347 | def test_unsupported_event_type_raises(self): 348 | """Unknown event type should raise CodexError.""" 349 | payload = {"type": "unknown.event"} 350 | 351 | with pytest.raises(CodexError, match="Unsupported event type"): 352 | parse_thread_event(payload) 353 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CodexAgent - DSPy Module for OpenAI Codex SDK 2 | 3 | A DSPy module that wraps the OpenAI Codex SDK. Uses a **two-turn pattern** that keeps agents "in distribution" during task execution. 4 | 5 | ## Features 6 | 7 | - **Two-turn pattern** - Natural task execution + structured extraction 8 | - **Stateful threads** - Each agent instance = one conversation thread 9 | - **Typed outputs** - Pydantic models, primitives, lists - all work naturally 10 | - **Execution trace** - Full visibility into commands, file changes, reasoning 11 | - **DSPy-native** - Standard signatures, based on TwoStepAdapter patterns 12 | 13 | ## Installation 14 | 15 | ```bash 16 | # Install dependencies 17 | uv sync 18 | 19 | # For development (includes pytest, pre-commit) 20 | uv sync --extra dev 21 | 22 | # Ensure codex CLI is available 23 | which codex 24 | ``` 25 | 26 | ## Quick Start 27 | 28 | ```python 29 | import dspy 30 | from codex_dspy import CodexAgent 31 | 32 | # Simple string signature 33 | sig = dspy.Signature('task: str -> result: str') 34 | agent = CodexAgent(sig, working_directory=".") 35 | 36 | result = agent(task="What files are in this directory?") 37 | print(result.result) # String response 38 | print(result.trace) # Execution trace 39 | print(result.usage) # Token counts 40 | ``` 41 | 42 | ### With Pydantic Models 43 | 44 | ```python 45 | from typing import Literal 46 | from pydantic import BaseModel, Field 47 | 48 | class BugReport(BaseModel): 49 | severity: Literal["low", "medium", "high"] = Field(description="Bug severity") 50 | location: str = Field(description="File and line number") 51 | description: str = Field(description="What the bug does") 52 | 53 | sig = dspy.Signature( 54 | "code: str, context: str -> bugs: list[BugReport], summary: str", 55 | "Analyze code for bugs" 56 | ) 57 | 58 | agent = CodexAgent(sig, working_directory=".") 59 | result = agent( 60 | code="def divide(a, b): return a / b", 61 | context="Production calculator module" 62 | ) 63 | 64 | print(result.summary) # str 65 | print(result.bugs) # list[BugReport] 66 | print(result.bugs[0].severity) # Typed access 67 | ``` 68 | 69 | ## How It Works: Two-Turn Pattern 70 | 71 | Unlike forcing JSON output during task execution (which pushes models out of distribution), CodexAgent uses a **two-turn pattern**: 72 | 73 | ### Turn 1: Natural Task Execution 74 | 75 | The agent receives a natural prompt and does its work freely: 76 | 77 | ``` 78 | As input, you are provided with: 79 | 1. `code` (str): Source code to analyze 80 | 2. `context` (str): Additional context 81 | 82 | Your task is to produce: 83 | 1. `bugs` (list[BugReport]): Bugs found in the code 84 | 2. `summary` (str): Overall analysis summary 85 | 86 | Instructions: Analyze code for bugs and provide a summary 87 | 88 | --- 89 | 90 | code: def divide(a, b): return a / b 91 | 92 | context: Production calculator module 93 | ``` 94 | 95 | The agent reads files, runs commands, reasons naturally - no JSON pressure. 96 | 97 | ### Turn 2: Structured Extraction 98 | 99 | After the task completes, the agent formats its findings using TypeScript syntax (LLMs are heavily trained on TypeScript, making this format intuitive): 100 | 101 | ``` 102 | Respond with a TypeScript value matching this type: 103 | 104 | ```typescript 105 | interface BugReport { 106 | /** Bug severity */ 107 | severity: "low" | "medium" | "high"; 108 | /** File and line number */ 109 | location: string; 110 | /** What the bug does */ 111 | description: string; 112 | } 113 | 114 | type Response = { 115 | /** Bugs found in the code */ 116 | bugs: BugReport[]; 117 | /** Overall analysis summary */ 118 | summary: string; 119 | }; 120 | ``` 121 | 122 | This separation keeps the agent in-distribution during the actual work. 123 | 124 | ### Static Examples 125 | 126 | You can provide static examples that show the LLM what good output looks like. These are defined on the signature and survive DSPy optimization (unlike few-shot demos which optimizers can replace): 127 | 128 | ```python 129 | class BugReport(BaseModel): 130 | severity: Literal["low", "medium", "high"] 131 | location: str 132 | description: str 133 | 134 | class CodeAnalysis(dspy.Signature): 135 | """Analyze code for bugs.""" 136 | 137 | code: str = dspy.InputField() 138 | bugs: list[BugReport] = dspy.OutputField(desc="Bugs found") 139 | summary: str = dspy.OutputField(desc="Overall summary") 140 | 141 | # Static examples - shown in Turn 2 prompt 142 | class Examples: 143 | outputs = [ 144 | { 145 | "bugs": [BugReport(severity="high", location="main.py:42", description="SQL injection")], 146 | "summary": "Found 1 critical security issue", 147 | }, 148 | { 149 | "bugs": [], 150 | "summary": "No issues found", 151 | }, 152 | ] 153 | ``` 154 | 155 | This renders in Turn 2 as: 156 | 157 | ``` 158 | Example outputs: 159 | ```typescript 160 | // Example 1: 161 | { 162 | bugs: [ 163 | { 164 | severity: "high", 165 | location: "main.py:42", 166 | description: "SQL injection", 167 | }, 168 | ], 169 | summary: "Found 1 critical security issue", 170 | } 171 | 172 | // Example 2: 173 | { 174 | bugs: [], 175 | summary: "No issues found", 176 | } 177 | ``` 178 | 179 | **Two types of examples:** 180 | 181 | | Type | Location | Purpose | Survives Optimization? | 182 | |------|----------|---------|------------------------| 183 | | **Static** | `signature.Examples.outputs` | Format documentation - "here's what good output looks like" | Yes | 184 | | **Dynamic** | `predictor.demos` | Few-shot learning for the task itself | No (optimizers replace) | 185 | 186 | Static examples only appear in Turn 2 (extraction) since they demonstrate output format, not task execution. 187 | 188 | ## API Reference 189 | 190 | ### CodexAgent 191 | 192 | ```python 193 | class CodexAgent(dspy.Module): 194 | def __init__( 195 | self, 196 | signature: str | type[Signature], # Any number of input/output fields 197 | working_directory: str, 198 | model: Optional[str] = None, 199 | sandbox_mode: Optional[SandboxMode] = None, 200 | skip_git_repo_check: bool = False, 201 | api_key: Optional[str] = None, 202 | base_url: Optional[str] = None, 203 | codex_path_override: Optional[str] = None, 204 | ) 205 | ``` 206 | 207 | #### Parameters 208 | 209 | | Parameter | Type | Description | 210 | |-----------|------|-------------| 211 | | `signature` | `str \| type[Signature]` | DSPy signature with any number of input/output fields | 212 | | `working_directory` | `str` | Directory where agent executes commands | 213 | | `model` | `Optional[str]` | Model name (default: "gpt-5.1-codex-max") | 214 | | `sandbox_mode` | `Optional[SandboxMode]` | `READ_ONLY`, `WORKSPACE_WRITE`, or `DANGER_FULL_ACCESS` | 215 | | `skip_git_repo_check` | `bool` | Allow non-git directories | 216 | | `api_key` | `Optional[str]` | OpenAI API key (falls back to `CODEX_API_KEY` env) | 217 | | `base_url` | `Optional[str]` | API base URL (falls back to `OPENAI_BASE_URL` env) | 218 | | `codex_path_override` | `Optional[str]` | Override path to codex binary | 219 | 220 | #### Methods 221 | 222 | ##### `forward(**kwargs) -> Prediction` 223 | 224 | Execute the agent with all input fields. 225 | 226 | **Returns** a `Prediction` with: 227 | - All output fields (typed according to signature) 228 | - `trace` - `list[ThreadItem]` - Execution items (commands, files, etc.) 229 | - `usage` - `Usage` - Token counts 230 | 231 | ### CodexAdapter 232 | 233 | The adapter that formats prompts. You usually don't need to use this directly, but it's available: 234 | 235 | ```python 236 | from codex_dspy import CodexAdapter 237 | 238 | adapter = CodexAdapter() 239 | 240 | # Format Turn 1 (task) 241 | turn1 = adapter.format_turn1(signature, inputs) 242 | 243 | # Format Turn 2 - TypeScript format (preferred) 244 | turn2 = adapter.format_turn2_typescript(signature) 245 | 246 | # Alternative Turn 2 formats: 247 | turn2_markers = adapter.format_turn2(signature) # BAML-style [[ ## field ## ]] markers 248 | turn2_json = adapter.format_turn2_json(signature) # JSON schema format 249 | 250 | # Parse [[ ## field ## ]] markers from response 251 | parsed = adapter.parse(signature, completion) 252 | ``` 253 | 254 | The TypeScript format (`format_turn2_typescript`) is preferred because: 255 | - LLMs are heavily trained on TypeScript syntax 256 | - JSDoc comments provide field descriptions naturally 257 | - Output is parseable with `json5` (handles trailing commas, unquoted keys) 258 | 259 | ## Usage Patterns 260 | 261 | ### Pattern 1: Multi-Turn Conversation 262 | 263 | Each agent instance maintains a stateful thread: 264 | 265 | ```python 266 | agent = CodexAgent(sig, working_directory=".") 267 | 268 | # Turn 1 - agent does work 269 | result1 = agent(code="...", context="...") 270 | print(result1.summary) 271 | 272 | # Turn 2 - has full context from Turn 1 273 | result2 = agent(code="...", context="Now fix the bugs you found") 274 | print(result2.summary) 275 | 276 | # Same thread throughout 277 | print(agent.thread_id) 278 | ``` 279 | 280 | ### Pattern 2: Complex Analysis 281 | 282 | ```python 283 | class SecurityAudit(BaseModel): 284 | vulnerabilities: list[Vulnerability] 285 | risk_score: float = Field(description="0-10 risk score") 286 | recommendations: list[str] 287 | 288 | class TestCoverage(BaseModel): 289 | covered_functions: list[str] 290 | uncovered_functions: list[str] 291 | coverage_percent: float 292 | 293 | sig = dspy.Signature( 294 | "codebase: str, focus_areas: list[str] -> " 295 | "security: SecurityAudit, tests: TestCoverage, report: str", 296 | "Perform security audit and test coverage analysis" 297 | ) 298 | 299 | agent = CodexAgent(sig, working_directory="/path/to/project") 300 | result = agent( 301 | codebase="src/", 302 | focus_areas=["authentication", "data validation"] 303 | ) 304 | 305 | print(f"Risk Score: {result.security.risk_score}") 306 | print(f"Coverage: {result.tests.coverage_percent}%") 307 | print(f"Report:\n{result.report}") 308 | ``` 309 | 310 | ### Pattern 3: Inspecting Execution Trace 311 | 312 | ```python 313 | from codex import CommandExecutionItem, FileChangeItem 314 | 315 | result = agent(code="...", context="Fix the bug") 316 | 317 | # What commands ran? 318 | commands = [item for item in result.trace if isinstance(item, CommandExecutionItem)] 319 | for cmd in commands: 320 | print(f"$ {cmd.command}") 321 | print(f" Exit: {cmd.exit_code}") 322 | 323 | # What files changed? 324 | files = [item for item in result.trace if isinstance(item, FileChangeItem)] 325 | for f in files: 326 | for change in f.changes: 327 | print(f" {change.kind}: {change.path}") 328 | ``` 329 | 330 | ### Pattern 4: Safe Execution with Sandbox 331 | 332 | ```python 333 | from codex import SandboxMode 334 | 335 | # Read-only (safest - for analysis tasks) 336 | agent = CodexAgent(sig, working_directory=".", sandbox_mode=SandboxMode.READ_ONLY) 337 | 338 | # Can modify workspace (for fix/refactor tasks) 339 | agent = CodexAgent(sig, working_directory=".", sandbox_mode=SandboxMode.WORKSPACE_WRITE) 340 | 341 | # Full access (use with caution!) 342 | agent = CodexAgent(sig, working_directory=".", sandbox_mode=SandboxMode.DANGER_FULL_ACCESS) 343 | ``` 344 | 345 | ## Advanced Examples 346 | 347 | ### Code Review with Multiple Outputs 348 | 349 | ```python 350 | class Issue(BaseModel): 351 | severity: Literal["critical", "high", "medium", "low"] 352 | file: str 353 | line: int 354 | description: str 355 | suggestion: str 356 | 357 | class ReviewResult(BaseModel): 358 | approved: bool 359 | issues: list[Issue] 360 | 361 | sig = dspy.Signature( 362 | "diff: str, guidelines: str -> review: ReviewResult, summary: str", 363 | "Review code changes against guidelines" 364 | ) 365 | 366 | agent = CodexAgent(sig, working_directory=".", sandbox_mode=SandboxMode.READ_ONLY) 367 | 368 | result = agent( 369 | diff=open("changes.diff").read(), 370 | guidelines="No hardcoded secrets. All functions must have docstrings." 371 | ) 372 | 373 | if not result.review.approved: 374 | print("Review failed!") 375 | for issue in result.review.issues: 376 | print(f" [{issue.severity}] {issue.file}:{issue.line}") 377 | print(f" {issue.description}") 378 | print(f" Suggestion: {issue.suggestion}") 379 | ``` 380 | 381 | ### Repository Analysis Pipeline 382 | 383 | ```python 384 | # Step 1: Gather stats 385 | class RepoStats(BaseModel): 386 | total_files: int 387 | languages: dict[str, int] # language -> file count 388 | largest_files: list[str] 389 | 390 | stats_agent = CodexAgent( 391 | dspy.Signature("path: str -> stats: RepoStats"), 392 | working_directory="." 393 | ) 394 | stats = stats_agent(path=".").stats 395 | 396 | # Step 2: Architecture analysis (uses stats as context) 397 | class Component(BaseModel): 398 | name: str 399 | responsibility: str 400 | dependencies: list[str] 401 | 402 | arch_agent = CodexAgent( 403 | dspy.Signature("repo_info: str -> components: list[Component], diagram: str"), 404 | working_directory="." 405 | ) 406 | arch = arch_agent( 407 | repo_info=f"Languages: {stats.languages}, Files: {stats.total_files}" 408 | ) 409 | 410 | print("Components:") 411 | for comp in arch.components: 412 | print(f" {comp.name}: {comp.responsibility}") 413 | print(f"\nDiagram:\n{arch.diagram}") 414 | ``` 415 | 416 | ## Trace Item Types 417 | 418 | | Type | Description | 419 | |------|-------------| 420 | | `AgentMessageItem` | Agent's text response | 421 | | `ReasoningItem` | Agent's internal reasoning | 422 | | `CommandExecutionItem` | Shell command execution | 423 | | `FileChangeItem` | File modifications | 424 | | `McpToolCallItem` | MCP tool invocation | 425 | | `WebSearchItem` | Web search performed | 426 | | `TodoListItem` | Task list created | 427 | | `ErrorItem` | Error that occurred | 428 | 429 | ## Design Philosophy 430 | 431 | ### Why Two-Turn Pattern? 432 | 433 | Traditional structured output forces the model to think about JSON formatting while doing complex agentic work. This is out-of-distribution - models are trained to reason naturally, then format at the end. 434 | 435 | Our two-turn pattern: 436 | 1. **Turn 1**: Agent works naturally (reads files, runs commands, reasons) 437 | 2. **Turn 2**: Agent formats findings into structure (quick, focused) 438 | 439 | This keeps the agent in-distribution during the actual work. 440 | 441 | ### Why Stateful Threads? 442 | 443 | Agents often need multi-turn context ("fix the bug" → "write tests for it"). Stateful threads make this natural. Want fresh context? Create a new agent instance. 444 | 445 | ## Development 446 | 447 | ```bash 448 | # Install dev dependencies 449 | uv sync --extra dev 450 | 451 | # Run tests 452 | uv run pytest 453 | 454 | # Run pre-commit hooks 455 | uv run pre-commit run --all-files 456 | ``` 457 | 458 | ### Test Structure 459 | 460 | Tests are co-located under `src/tests/` and the pre-commit hook enforces the 461 | two approved locations: 462 | 463 | ``` 464 | src/ 465 | ├── codex_dspy/ 466 | │ ├── adapter.py 467 | │ └── agent.py 468 | └── tests/ 469 | ├── unit/ # example-based tests 470 | └── property/ # hypothesis property tests 471 | ``` 472 | 473 | ## Contributing 474 | 475 | Issues and PRs welcome! 476 | 477 | ## License 478 | 479 | See LICENSE file. 480 | 481 | ## Related Documentation 482 | 483 | - [Codex SDK API Reference](./docs/CODEX_SDK_API_SURFACE.md) 484 | - [Codex Architecture](./docs/CODEX_ARCHITECTURE.md) 485 | - [DSPy Documentation](https://dspy-docs.vercel.app/) 486 | -------------------------------------------------------------------------------- /src/tests/property/test_adapter_props.py: -------------------------------------------------------------------------------- 1 | """Property-based tests for adapter parsing, rendering, and schema helpers. 2 | 3 | Runs quickly (low example counts) but now covers a broader grammar: 4 | - primitives, Pydantic models (static + generated), dict[str, T], list[T], T | None, 5 | and simple non-None unions for rendering invariants. 6 | - schema build/parse round-trips for all parse-supported annotations. 7 | """ 8 | 9 | from __future__ import annotations 10 | 11 | import inspect 12 | import types 13 | from functools import reduce 14 | from typing import Any, Literal, Union, get_args, get_origin 15 | 16 | from hypothesis import assume, given, settings, strategies as st 17 | from pydantic import BaseModel 18 | 19 | from codex_dspy.agent import _build_output_schema, _parse_output_value 20 | from codex_dspy.adapter import CodexAdapter, _is_optional_type, _render_type_str, _ts_type 21 | 22 | # Keep property runs fast 23 | TEST_SETTINGS = settings(max_examples=75, deadline=None) 24 | 25 | 26 | class SmallModel(BaseModel): 27 | number: int 28 | text: str | None = None 29 | 30 | 31 | class FlagModel(BaseModel): 32 | flag: bool 33 | note: str | None = None 34 | 35 | 36 | def _model_dict_strategy() -> st.SearchStrategy[dict[str, Any]]: 37 | return st.builds( 38 | lambda n, t: {"number": n, "text": t}, 39 | n=st.integers(-100, 100), 40 | t=st.one_of(st.none(), st.text(max_size=16)), 41 | ) 42 | 43 | 44 | def _flag_model_dict_strategy() -> st.SearchStrategy[dict[str, Any]]: 45 | return st.builds( 46 | lambda f, n: {"flag": f, "note": n}, 47 | f=st.booleans(), 48 | n=st.one_of(st.none(), st.text(max_size=16)), 49 | ) 50 | 51 | 52 | BASE_ANNOTATIONS = [str, int, float, bool, SmallModel, FlagModel] 53 | 54 | 55 | def _generated_model_strategy() -> st.SearchStrategy[type[BaseModel]]: 56 | """Generate small pydantic models (1-2 fields) over primitives/bool/optional str.""" 57 | 58 | primitive_types = st.sampled_from([str, int, bool]) 59 | 60 | @st.composite 61 | def _model(draw): 62 | field_count = draw(st.integers(min_value=1, max_value=2)) 63 | fields = {} 64 | for i in range(field_count): 65 | base = draw(primitive_types) 66 | optional = draw(st.booleans()) 67 | ann = base | None if optional else base 68 | fields[f"f{i}"] = (ann, ...) 69 | # create_model gives unique class per draw 70 | from pydantic import create_model 71 | 72 | return create_model("GenModel", **fields) # type: ignore[arg-type] 73 | 74 | return _model() 75 | 76 | 77 | def _annotation_strategy(include_unions: bool = True, include_dict: bool = True, max_union: int = 2) -> st.SearchStrategy[Any]: 78 | """Generate annotations: primitives, models, list, optional, dict[str, T], unions.""" 79 | 80 | literal_values = st.one_of( 81 | st.text(max_size=4), 82 | st.integers(-2, 2), 83 | st.booleans(), 84 | ) 85 | 86 | base = st.one_of( 87 | st.sampled_from(BASE_ANNOTATIONS), 88 | _generated_model_strategy(), 89 | literal_values.map(lambda v: Literal[v]), 90 | ) 91 | 92 | def expand(children: st.SearchStrategy[Any]) -> st.SearchStrategy[Any]: 93 | parts = [ 94 | children.map(lambda ann: list[ann]), # list[T] 95 | children.map(lambda ann: ann | None), # Optional[T] 96 | ] 97 | if include_dict: 98 | parts.append( 99 | st.tuples(st.just(str), children).map(lambda t: dict[t[0], t[1]]) # type: ignore[index] 100 | ) 101 | if include_unions: 102 | # unions of size 2..max_union 103 | union = st.lists(children, min_size=2, max_size=max_union).map(tuple) 104 | parts.append(union.map(_dedupe_union)) 105 | return st.one_of(*parts) 106 | 107 | return st.recursive(base, expand, max_leaves=14) 108 | 109 | 110 | def _annotation_without_optional_strategy() -> st.SearchStrategy[Any]: 111 | return _annotation_strategy(max_union=5).filter(lambda ann: not _is_optional_type(ann)) 112 | 113 | 114 | def _value_strategy_for_annotation(annotation: Any) -> st.SearchStrategy[Any]: 115 | origin = get_origin(annotation) 116 | 117 | if annotation is str: 118 | return st.text(max_size=8) 119 | if annotation is int: 120 | return st.integers(-50, 50) 121 | if annotation is float: 122 | return st.floats(-50, 50, allow_nan=False, allow_infinity=False) 123 | if annotation is bool: 124 | return st.booleans() 125 | 126 | if inspect.isclass(annotation) and issubclass(annotation, BaseModel): 127 | fields = annotation.model_fields 128 | if fields: 129 | items = {} 130 | for name, field in fields.items(): 131 | strat = _value_strategy_for_annotation(field.annotation) 132 | if not field.is_required(): 133 | strat = st.one_of(st.none(), strat) 134 | items[name] = strat 135 | return st.fixed_dictionaries(items) 136 | return st.just({}) 137 | 138 | if origin is list: 139 | inner = get_args(annotation)[0] 140 | strat = st.lists(_value_strategy_for_annotation(inner), max_size=4) 141 | # If inner is not optional, avoid generating None elements 142 | inner_origin = get_origin(inner) 143 | inner_has_none = inner_origin in (Union, types.UnionType) and type(None) in get_args(inner) 144 | if not inner_has_none and inner is not type(None): 145 | strat = strat.filter(lambda xs: all(x is not None for x in xs)) 146 | return strat 147 | 148 | if origin is Literal: 149 | allowed = get_args(annotation) 150 | return st.sampled_from(allowed) 151 | 152 | if origin is dict: 153 | _, val_type = get_args(annotation) 154 | strat = st.dictionaries( 155 | keys=st.text(min_size=0, max_size=8), 156 | values=_value_strategy_for_annotation(val_type), 157 | max_size=4, 158 | ) 159 | val_origin = get_origin(val_type) 160 | val_has_none = val_origin in (Union, types.UnionType) and type(None) in get_args(val_type) 161 | if not val_has_none and val_type is not type(None): 162 | strat = strat.filter(lambda d: all(v is not None for v in d.values())) 163 | return strat 164 | 165 | if origin is types.UnionType or origin is getattr(types, "UnionType", object): 166 | args = get_args(annotation) 167 | if type(None) in args: 168 | non_none = [a for a in args if a is not type(None)] 169 | # choose None or one branch 170 | return st.one_of( 171 | st.none(), 172 | st.sampled_from(non_none).flatmap(_value_strategy_for_annotation), 173 | ) 174 | # Non-optional union: pick one branch and generate a value for it 175 | return st.sampled_from(args).flatmap(_value_strategy_for_annotation) 176 | 177 | return st.just(None) 178 | 179 | 180 | @st.composite 181 | def annotation_and_value(draw) -> tuple[Any, Any]: 182 | ann = draw(_annotation_strategy(include_unions=False)) # parse-supported 183 | val = draw(_value_strategy_for_annotation(ann)) 184 | return ann, val 185 | 186 | 187 | @st.composite 188 | def two_annotations(draw) -> tuple[Any, Any]: 189 | return draw(_annotation_strategy()), draw(_annotation_strategy()) 190 | 191 | 192 | def _assert_matches_annotation(parsed: Any, annotation: Any) -> None: 193 | origin = get_origin(annotation) 194 | 195 | if annotation is str: 196 | assert isinstance(parsed, str) 197 | return 198 | if annotation is int: 199 | assert isinstance(parsed, int) 200 | return 201 | if annotation is float: 202 | assert isinstance(parsed, float) 203 | return 204 | if annotation is bool: 205 | assert isinstance(parsed, bool) 206 | return 207 | if inspect.isclass(annotation) and issubclass(annotation, BaseModel): 208 | assert isinstance(parsed, annotation) 209 | return 210 | 211 | if origin is Literal: 212 | allowed = get_args(annotation) 213 | assert parsed in allowed 214 | return 215 | 216 | if origin is list: 217 | inner = get_args(annotation)[0] 218 | assert isinstance(parsed, list) 219 | for item in parsed: 220 | _assert_matches_annotation(item, inner) 221 | return 222 | 223 | if origin is dict: 224 | val_type = get_args(annotation)[1] 225 | assert isinstance(parsed, dict) 226 | for v in parsed.values(): 227 | _assert_matches_annotation(v, val_type) 228 | return 229 | 230 | if origin is types.UnionType or origin is getattr(types, "UnionType", object): 231 | if parsed is None: 232 | return 233 | args = get_args(annotation) 234 | non_none = [a for a in args if a is not type(None)] 235 | # If optional, use the single non-None branch; otherwise accept any branch match 236 | if len(args) == 2 and type(None) in args and len(non_none) == 1: 237 | _assert_matches_annotation(parsed, non_none[0]) 238 | else: 239 | assert any(_matches_annotation(parsed, a) for a in args) 240 | return 241 | 242 | return 243 | 244 | 245 | def _matches_annotation(parsed: Any, annotation: Any) -> bool: 246 | try: 247 | _assert_matches_annotation(parsed, annotation) 248 | return True 249 | except AssertionError: 250 | return False 251 | 252 | 253 | def _dedupe_union(members: tuple[Any, ...]) -> Any: 254 | """Deduplicate while preserving order; collapse to single type if only one remains.""" 255 | 256 | def _equivalent(a: Any, b: Any) -> bool: 257 | if a is b: 258 | return True 259 | if inspect.isclass(a) and inspect.isclass(b): 260 | if issubclass(a, BaseModel) and issubclass(b, BaseModel): 261 | return a.__name__ == b.__name__ 262 | return False 263 | 264 | unique: list[Any] = [] 265 | for m in members: 266 | if not any(_equivalent(m, u) for u in unique): 267 | unique.append(m) 268 | if len(unique) == 1: 269 | return unique[0] 270 | return reduce(lambda a, b: a | b, unique) 271 | 272 | 273 | # --- Parsing invariants (existing coverage) --- 274 | 275 | 276 | @TEST_SETTINGS 277 | @given(st.lists(_model_dict_strategy(), max_size=5)) 278 | def test_list_of_models_validated(dicts: list[dict[str, Any]]): 279 | result = _parse_output_value(dicts, list[SmallModel]) 280 | assert len(result) == len(dicts) 281 | assert all(isinstance(item, SmallModel) for item in result) 282 | 283 | 284 | @TEST_SETTINGS 285 | @given(st.lists(_model_dict_strategy(), max_size=5)) 286 | def test_optional_list_of_models_validated_when_present(dicts: list[dict[str, Any]]): 287 | result = _parse_output_value(dicts, list[SmallModel] | None) 288 | assert result is not None 289 | assert len(result) == len(dicts) 290 | assert all(isinstance(item, SmallModel) for item in result) 291 | 292 | 293 | def test_optional_list_of_models_allows_none(): 294 | assert _parse_output_value(None, list[SmallModel] | None) is None 295 | 296 | 297 | @TEST_SETTINGS 298 | @given(st.lists(st.one_of(_model_dict_strategy(), st.none()), max_size=6)) 299 | def test_list_of_optional_models_preserves_nones(items: list[dict[str, Any] | None]): 300 | result = _parse_output_value(items, list[SmallModel | None]) 301 | assert len(result) == len(items) 302 | for source, parsed in zip(items, result): 303 | if source is None: 304 | assert parsed is None 305 | else: 306 | assert isinstance(parsed, SmallModel) 307 | 308 | 309 | @TEST_SETTINGS 310 | @given(_model_dict_strategy()) 311 | def test_optional_model_validates_dict(value: dict[str, Any]): 312 | result = _parse_output_value(value, SmallModel | None) 313 | assert isinstance(result, SmallModel) 314 | 315 | 316 | def test_optional_model_allows_none(): 317 | assert _parse_output_value(None, SmallModel | None) is None 318 | 319 | 320 | @TEST_SETTINGS 321 | @given(st.lists(st.text(max_size=8), max_size=6)) 322 | def test_primitive_list_passthrough(values: list[str]): 323 | assert _parse_output_value(values, list[str]) == values 324 | 325 | 326 | PRIMITIVE_TYPES = st.sampled_from([str, int, float, bool]) 327 | 328 | 329 | @TEST_SETTINGS 330 | @given(PRIMITIVE_TYPES) 331 | def test_is_optional_type_positive(base_type: type): 332 | assert _is_optional_type(base_type | None) is True 333 | 334 | 335 | @TEST_SETTINGS 336 | @given(PRIMITIVE_TYPES, PRIMITIVE_TYPES) 337 | def test_is_optional_type_negative(t1: type, t2: type): 338 | annotation = t1 | t2 339 | assert _is_optional_type(annotation) is False 340 | 341 | 342 | # --- Rendering invariants --- 343 | 344 | 345 | @TEST_SETTINGS 346 | @given(_annotation_without_optional_strategy()) 347 | def test_ts_type_optional_monotonic(annotation: Any): 348 | base = _ts_type(annotation) 349 | optional = _ts_type(annotation | None) 350 | assert "null" in optional 351 | base_set = set(part.strip() for part in base.split("|")) 352 | opt_set = set(part.strip() for part in optional.split("|")) 353 | assert base_set.issubset(opt_set) 354 | 355 | 356 | @TEST_SETTINGS 357 | @given(_annotation_without_optional_strategy()) 358 | def test_render_type_str_optional_monotonic(annotation: Any): 359 | base = _render_type_str(annotation) 360 | optional = _render_type_str(annotation | None) 361 | assert "null" in optional 362 | simplified_base = base.replace(" or null", "") 363 | assert simplified_base in optional or base in optional 364 | 365 | 366 | # --- Schema/build/parse coverage --- 367 | 368 | 369 | class _MockFieldInfo: 370 | def __init__(self, annotation: Any, description: str | None = None): 371 | self.annotation = annotation 372 | self.description = description 373 | 374 | 375 | class _MockSignature: 376 | def __init__(self, output_fields: dict[str, tuple[Any, str | None]]): 377 | self.input_fields = {} 378 | self.instructions = "" 379 | self.output_fields = { 380 | name: _MockFieldInfo(ann, desc) for name, (ann, desc) in output_fields.items() 381 | } 382 | 383 | 384 | @TEST_SETTINGS 385 | @given(annotation_and_value()) 386 | def test_schema_build_and_parse_round_trip(pair: tuple[Any, Any]): 387 | annotation, value = pair 388 | sig = _MockSignature({"result": (annotation, None)}) 389 | 390 | schema = _build_output_schema(sig) 391 | assert "properties" in schema and "result" in schema["properties"] 392 | 393 | parsed = _parse_output_value(value, annotation) 394 | _assert_matches_annotation(parsed, annotation) 395 | 396 | 397 | @TEST_SETTINGS 398 | @given(_annotation_strategy(max_union=5), st.data()) 399 | def test_parse_union_multi_branch(annotation: Any, data): 400 | # Only test unions with >=2 branches 401 | origin = get_origin(annotation) 402 | assume(origin is types.UnionType or origin is Union) 403 | args = get_args(annotation) 404 | assume(len(args) >= 2) 405 | 406 | # Generate a value matching one branch 407 | branch = data.draw(st.sampled_from(args)) 408 | if branch is type(None): 409 | value = None 410 | else: 411 | value = data.draw(_value_strategy_for_annotation(branch)) 412 | 413 | parsed = _parse_output_value(value, annotation) 414 | if branch is type(None): 415 | assert parsed is None 416 | else: 417 | assert any(_matches_annotation(parsed, a) for a in args if a is not type(None)) 418 | 419 | 420 | @TEST_SETTINGS 421 | @given(two_annotations()) 422 | def test_format_turn2_json_handles_multiple_outputs(pair: tuple[Any, Any]): 423 | ann1, ann2 = pair 424 | sig = _MockSignature( 425 | { 426 | "first": (ann1, "first output"), 427 | "second": (ann2, "second output"), 428 | } 429 | ) 430 | 431 | adapter = CodexAdapter() 432 | out = adapter.format_turn2_json(sig) 433 | assert "first" in out and "second" in out 434 | assert "{" in out and "}" in out 435 | -------------------------------------------------------------------------------- /src/codex_dspy/agent.py: -------------------------------------------------------------------------------- 1 | """CodexAgent - DSPy module wrapping OpenAI Codex SDK. 2 | 3 | This module provides a signature-driven interface to the Codex agent SDK. 4 | Each CodexAgent instance maintains a stateful thread that accumulates context 5 | across multiple forward() calls. 6 | 7 | Uses a two-turn pattern: 8 | - Turn 1: Natural task execution (agent does work) 9 | - Turn 2: Structured output extraction (agent formats findings) 10 | """ 11 | 12 | import inspect 13 | import json 14 | import re 15 | import types 16 | from typing import Any, Literal, Optional, Union, get_args, get_origin 17 | 18 | from pydantic import BaseModel 19 | 20 | import dspy 21 | from dspy.primitives.prediction import Prediction 22 | from dspy.signatures.signature import Signature, ensure_signature 23 | 24 | from codex import Codex, CodexOptions, ModelReasoningEffort, SandboxMode, ThreadOptions, TurnOptions 25 | from codex_dspy.adapter import CodexAdapter 26 | 27 | 28 | def _combine_usage(usage1, usage2): 29 | """Combine token usage from two turns. 30 | 31 | Args: 32 | usage1: Usage from first turn (may be None) 33 | usage2: Usage from second turn (may be None) 34 | 35 | Returns: 36 | Combined usage with summed token counts, or whichever is not None 37 | """ 38 | if usage1 is None: 39 | return usage2 40 | if usage2 is None: 41 | return usage1 42 | 43 | # Both exist - sum the token counts 44 | # Create a new usage-like object with combined counts 45 | from codex import Usage 46 | return Usage( 47 | input_tokens=(usage1.input_tokens or 0) + (usage2.input_tokens or 0), 48 | output_tokens=(usage1.output_tokens or 0) + (usage2.output_tokens or 0), 49 | cached_input_tokens=(usage1.cached_input_tokens or 0) + (usage2.cached_input_tokens or 0), 50 | ) 51 | 52 | 53 | def _strip_json_fences(text: str) -> str: 54 | """Strip markdown JSON fences from response if present. 55 | 56 | Handles: 57 | ```json\n{...}\n``` 58 | ```\n{...}\n``` 59 | {..} (no fences - returned as-is) 60 | """ 61 | text = text.strip() 62 | 63 | # Pattern for ```json ... ``` or ``` ... ``` 64 | fence_pattern = re.compile(r'^```(?:json)?\s*\n?(.*?)\n?```$', re.DOTALL) 65 | match = fence_pattern.match(text) 66 | if match: 67 | return match.group(1).strip() 68 | 69 | return text 70 | 71 | 72 | def _parse_output_value(value: Any, annotation: type) -> Any: 73 | """Parse a single output value according to its type annotation. 74 | 75 | Handles: 76 | - None values (pass through) 77 | - list[T] with recursive validation (including optional/model inner types) 78 | - dict[K, V] with recursive validation of V 79 | - Model | None - validate if dict, pass through if None 80 | - Direct PydanticModel - validate dict 81 | - Primitives and other types - pass through 82 | 83 | Args: 84 | value: The raw value from JSON 85 | annotation: The type annotation for this field 86 | 87 | Returns: 88 | The parsed/validated value 89 | """ 90 | if value is None: 91 | return None 92 | 93 | origin = get_origin(annotation) 94 | 95 | # Handle list types 96 | if origin is list and isinstance(value, list): 97 | inner_type = get_args(annotation)[0] if get_args(annotation) else None 98 | if inner_type: 99 | return [_parse_output_value(v, inner_type) for v in value] 100 | return value 101 | 102 | # Handle dict types (validate values recursively) 103 | if origin is dict and isinstance(value, dict): 104 | key_type, val_type = (get_args(annotation) + (None, None))[:2] 105 | if val_type: 106 | return {k: _parse_output_value(v, val_type) for k, v in value.items()} 107 | return value 108 | 109 | # Handle Literal[...] validation 110 | if origin is Literal: 111 | allowed = set(get_args(annotation)) 112 | if value in allowed: 113 | return value 114 | raise ValueError(f"Literal value {value!r} not in allowed set {allowed!r}") 115 | 116 | # Handle Union types (including Optional and multi-branch unions) 117 | if origin is Union or origin is types.UnionType: 118 | args = get_args(annotation) 119 | has_none = type(None) in args 120 | 121 | # Optional shortcut 122 | if value is None and has_none: 123 | return None 124 | 125 | def _matches_annotation(val: Any, ann: Any) -> bool: 126 | ann_origin = get_origin(ann) 127 | if ann is str: 128 | return isinstance(val, str) 129 | if ann is int: 130 | return isinstance(val, int) 131 | if ann is float: 132 | return isinstance(val, float) 133 | if ann is bool: 134 | return isinstance(val, bool) 135 | if inspect.isclass(ann) and issubclass(ann, BaseModel): 136 | return isinstance(val, ann) 137 | if ann_origin is list: 138 | if not isinstance(val, list): 139 | return False 140 | inner = get_args(ann)[0] if get_args(ann) else Any 141 | return all(_matches_annotation(elem, inner) for elem in val) 142 | if ann_origin is dict: 143 | if not isinstance(val, dict): 144 | return False 145 | val_type = get_args(ann)[1] if len(get_args(ann)) > 1 else Any 146 | return all(_matches_annotation(v, val_type) for v in val.values()) 147 | if ann_origin is Literal: 148 | return val in get_args(ann) 149 | if ann_origin is Union or ann_origin is types.UnionType: 150 | inner_args = get_args(ann) 151 | if type(None) in inner_args and val is None: 152 | return True 153 | return any(_matches_annotation(val, b) for b in inner_args if b is not type(None)) 154 | if isinstance(ann, type): 155 | return isinstance(val, ann) 156 | return True 157 | 158 | # Try each non-None branch until one succeeds 159 | last_error = None 160 | for branch in args: 161 | if branch is type(None): 162 | continue 163 | try: 164 | candidate = _parse_output_value(value, branch) 165 | if _matches_annotation(candidate, branch): 166 | return candidate 167 | except Exception as e: # noqa: BLE001 168 | last_error = e 169 | continue 170 | 171 | raise ValueError( 172 | f"Value {value!r} did not match any Union branch {args}" 173 | ) from last_error 174 | 175 | # Handle direct Pydantic model 176 | if hasattr(annotation, "model_validate"): 177 | if isinstance(value, dict): 178 | return annotation.model_validate(value) 179 | return value 180 | 181 | # Primitives and other types - pass through 182 | return value 183 | 184 | 185 | def _is_all_str_outputs(signature: Signature) -> bool: 186 | """Check if all output fields are str or Optional[str].""" 187 | for field in signature.output_fields.values(): 188 | annotation = field.annotation 189 | if annotation == str: 190 | continue 191 | origin = get_origin(annotation) 192 | # Handle both typing.Union and types.UnionType (PEP 604: str | None) 193 | if origin is Union or origin is types.UnionType: 194 | args = get_args(annotation) 195 | if len(args) == 2 and str in args and type(None) in args: 196 | continue 197 | return False 198 | return True 199 | 200 | 201 | def _ensure_additional_properties_false(schema: dict[str, Any]) -> None: 202 | """Recursively add additionalProperties: false to all object schemas. 203 | 204 | The OpenAI API requires all object schemas to have additionalProperties: false. 205 | This mutates the schema in place. 206 | """ 207 | if not isinstance(schema, dict): 208 | return 209 | 210 | # If this is an object type, ensure additionalProperties is false 211 | if schema.get("type") == "object": 212 | schema["additionalProperties"] = False 213 | 214 | # Recurse into properties 215 | if "properties" in schema: 216 | for prop_schema in schema["properties"].values(): 217 | _ensure_additional_properties_false(prop_schema) 218 | 219 | # Recurse into $defs 220 | if "$defs" in schema: 221 | for def_schema in schema["$defs"].values(): 222 | _ensure_additional_properties_false(def_schema) 223 | 224 | # Recurse into array items 225 | if "items" in schema: 226 | _ensure_additional_properties_false(schema["items"]) 227 | 228 | # Recurse into allOf, anyOf, oneOf 229 | for key in ("allOf", "anyOf", "oneOf"): 230 | if key in schema: 231 | for sub_schema in schema[key]: 232 | _ensure_additional_properties_false(sub_schema) 233 | 234 | 235 | def _build_output_schema(signature: Signature) -> dict[str, Any]: 236 | """Build a combined JSON schema for all output fields. 237 | 238 | Hoists $defs from individual field schemas to the root level so that 239 | $ref pointers resolve correctly. 240 | """ 241 | properties = {} 242 | required = [] 243 | all_defs: dict[str, Any] = {} 244 | 245 | for name, field in signature.output_fields.items(): 246 | annotation = field.annotation 247 | if annotation == str: 248 | properties[name] = {"type": "string"} 249 | elif hasattr(annotation, "model_json_schema"): 250 | # Pydantic model 251 | field_schema = annotation.model_json_schema() 252 | # Hoist $defs to root 253 | if "$defs" in field_schema: 254 | all_defs.update(field_schema.pop("$defs")) 255 | properties[name] = field_schema 256 | else: 257 | # Fallback - try to get schema via pydantic TypeAdapter 258 | from pydantic import TypeAdapter 259 | field_schema = TypeAdapter(annotation).json_schema() 260 | # Hoist $defs to root 261 | if "$defs" in field_schema: 262 | all_defs.update(field_schema.pop("$defs")) 263 | properties[name] = field_schema 264 | 265 | # Check if required (not Optional) 266 | # Handle both typing.Union and types.UnionType (PEP 604: str | None) 267 | origin = get_origin(annotation) 268 | is_optional = (origin is Union or origin is types.UnionType) and type(None) in get_args(annotation) 269 | if not is_optional: 270 | required.append(name) 271 | 272 | schema: dict[str, Any] = { 273 | "type": "object", 274 | "properties": properties, 275 | "required": required, 276 | "additionalProperties": False, 277 | } 278 | 279 | # Add hoisted $defs at root level 280 | if all_defs: 281 | schema["$defs"] = all_defs 282 | 283 | # Ensure all nested objects have additionalProperties: false 284 | _ensure_additional_properties_false(schema) 285 | 286 | return schema 287 | 288 | 289 | class CodexAgent(dspy.Module): 290 | """DSPy module for Codex SDK integration. 291 | 292 | Creates a stateful agent where each instance maintains one conversation thread. 293 | Multiple forward() calls on the same instance continue the same conversation. 294 | 295 | Supports multiple input and output fields. Uses a two-turn pattern: 296 | - Turn 1: Agent receives task naturally and does work 297 | - Turn 2: Agent formats findings into structured output 298 | 299 | Args: 300 | signature: DSPy signature with any number of input/output fields 301 | working_directory: Directory where Codex agent will execute commands 302 | model: Model to use. Defaults to "gpt-5.1-codex-max". 303 | sandbox_mode: Execution sandbox level (READ_ONLY, WORKSPACE_WRITE, DANGER_FULL_ACCESS) 304 | skip_git_repo_check: Allow non-git directories as working_directory 305 | api_key: OpenAI API key (falls back to CODEX_API_KEY env var) 306 | base_url: API base URL (falls back to OPENAI_BASE_URL env var) 307 | codex_path_override: Override path to codex binary (for testing) 308 | 309 | Example with multiple fields: 310 | >>> class BugReport(BaseModel): 311 | ... severity: str 312 | ... description: str 313 | >>> sig = dspy.Signature( 314 | ... "code: str, context: str -> bugs: list[BugReport], summary: str", 315 | ... "Analyze code for bugs" 316 | ... ) 317 | >>> agent = CodexAgent(sig, working_directory=".") 318 | >>> result = agent(code="def foo(): ...", context="Production code") 319 | >>> print(result.bugs) # list[BugReport] 320 | >>> print(result.summary) # str 321 | >>> print(result.trace) # execution trace 322 | """ 323 | 324 | def __init__( 325 | self, 326 | signature: str | type[Signature], 327 | working_directory: str, 328 | model: Optional[str] = None, 329 | sandbox_mode: Optional[SandboxMode] = None, 330 | model_reasoning_effort: Optional[ModelReasoningEffort] = None, 331 | skip_git_repo_check: bool = False, 332 | api_key: Optional[str] = None, 333 | base_url: Optional[str] = None, 334 | codex_path_override: Optional[str] = None, 335 | ): 336 | super().__init__() 337 | 338 | # Ensure signature is valid 339 | self.signature = ensure_signature(signature) 340 | 341 | # Validate: at least 1 input and 1 output field 342 | if len(self.signature.input_fields) < 1: 343 | raise ValueError( 344 | "CodexAgent requires at least 1 input field.\n" 345 | "Example: dspy.Signature('message:str -> answer:str')" 346 | ) 347 | 348 | if len(self.signature.output_fields) < 1: 349 | raise ValueError( 350 | "CodexAgent requires at least 1 output field.\n" 351 | "Example: dspy.Signature('message:str -> answer:str')" 352 | ) 353 | 354 | # Create adapter for formatting 355 | self.adapter = CodexAdapter() 356 | 357 | # Create Codex client 358 | self.client = Codex( 359 | options=CodexOptions( 360 | api_key=api_key, 361 | base_url=base_url, 362 | codex_path_override=codex_path_override, 363 | ) 364 | ) 365 | 366 | # Start thread (1 agent instance = 1 stateful thread) 367 | self.thread = self.client.start_thread( 368 | options=ThreadOptions( 369 | working_directory=working_directory, 370 | model=model, 371 | sandbox_mode=sandbox_mode, 372 | model_reasoning_effort=model_reasoning_effort, 373 | skip_git_repo_check=skip_git_repo_check, 374 | ) 375 | ) 376 | 377 | def forward(self, **kwargs) -> Prediction: 378 | """Execute agent with input fields. 379 | 380 | Args: 381 | **kwargs: Must contain all input fields specified in signature 382 | 383 | Returns: 384 | Prediction with: 385 | - All output fields (typed according to signature) 386 | - trace: list[ThreadItem] - chronological items (commands, files, etc.) 387 | - usage: Usage - token counts (input_tokens, cached_input_tokens, output_tokens) 388 | 389 | Raises: 390 | ValueError: If parsing fails for typed outputs 391 | """ 392 | # Validate all input fields are provided 393 | for field_name in self.signature.input_fields: 394 | if field_name not in kwargs: 395 | raise ValueError(f"Missing required input field: {field_name}") 396 | 397 | # Turn 1: Natural task execution 398 | turn1_prompt = self.adapter.format_turn1(self.signature, kwargs) 399 | task_result = self.thread.run(turn1_prompt) 400 | 401 | # Check if we need structured output extraction 402 | if _is_all_str_outputs(self.signature): 403 | # All outputs are strings - parse from natural response 404 | # For single string output, just return the response 405 | if len(self.signature.output_fields) == 1: 406 | output_name = list(self.signature.output_fields.keys())[0] 407 | return Prediction( 408 | **{output_name: task_result.final_response}, 409 | trace=task_result.items, 410 | usage=task_result.usage, 411 | ) 412 | else: 413 | # Multiple string outputs - need extraction turn 414 | turn2_prompt = self.adapter.format_turn2(self.signature) 415 | extract_result = self.thread.run(turn2_prompt) 416 | parsed = self.adapter.parse(self.signature, extract_result.final_response) 417 | 418 | return Prediction( 419 | **parsed, 420 | trace=task_result.items + extract_result.items, 421 | usage=_combine_usage(task_result.usage, extract_result.usage), 422 | ) 423 | else: 424 | # Need structured output - Turn 2 with JSON schema 425 | turn2_prompt = self.adapter.format_turn2_json(self.signature) 426 | output_schema = _build_output_schema(self.signature) 427 | turn_options = TurnOptions(output_schema=output_schema) 428 | 429 | extract_result = self.thread.run(turn2_prompt, turn_options) 430 | 431 | # Parse JSON response (strip fences if present) 432 | try: 433 | json_str = _strip_json_fences(extract_result.final_response) 434 | raw_output = json.loads(json_str) 435 | except json.JSONDecodeError as e: 436 | raise ValueError( 437 | f"Failed to parse JSON response: {e}\n" 438 | f"Response: {extract_result.final_response[:500]}" 439 | ) from e 440 | 441 | # Convert to typed outputs using centralized parsing logic 442 | parsed_outputs = {} 443 | for name, field in self.signature.output_fields.items(): 444 | value = raw_output.get(name) 445 | parsed_outputs[name] = _parse_output_value(value, field.annotation) 446 | 447 | return Prediction( 448 | **parsed_outputs, 449 | trace=task_result.items + extract_result.items, 450 | usage=_combine_usage(task_result.usage, extract_result.usage), 451 | ) 452 | 453 | @property 454 | def thread_id(self) -> Optional[str]: 455 | """Get thread ID for this agent instance. 456 | 457 | The thread ID is assigned after the first forward() call. 458 | Useful for debugging and visibility into the conversation state. 459 | 460 | Returns: 461 | Thread ID string, or None if no forward() calls have been made yet 462 | """ 463 | return self.thread.id 464 | -------------------------------------------------------------------------------- /docs/CODEX_ARCHITECTURE.md: -------------------------------------------------------------------------------- 1 | # Codex Python SDK - Architecture & Data Flow 2 | 3 | ## High-Level Architecture 4 | 5 | ``` 6 | ┌─────────────────────────────────────────────────────────────┐ 7 | │ Python Application │ 8 | ├─────────────────────────────────────────────────────────────┤ 9 | │ Codex Python SDK │ 10 | │ ┌────────────┐ │ 11 | │ │ Codex │ Client (main entry point) │ 12 | │ │ Client │ │ 13 | │ └──────┬─────┘ │ 14 | │ │ │ 15 | │ ┌────┴────────────────┐ │ 16 | │ │ │ │ 17 | │ ┌─────────────┐ ┌────────────────┐ │ 18 | │ │ start_thread│ │ resume_thread │ Create/resume thread │ 19 | │ └──────┬──────┘ └────────┬───────┘ │ 20 | │ │ │ │ 21 | │ ┌────▼──────────────────▼────┐ │ 22 | │ │ Thread (conversation) │ │ 23 | │ │ ┌──────────────────────┐ │ │ 24 | │ │ │ run() - sync execute │ │ │ 25 | │ │ │ run_streamed() - async │ Thread Methods │ 26 | │ │ └──────────────────────┘ │ │ 27 | │ └────┬──────────────────────┬─┘ │ 28 | │ │ │ │ 29 | │ ┌──────▼───┐ ┌────────▼────────┐ │ 30 | │ │ThreadRun │ │ ThreadStream │ │ 31 | │ │ Result │ │ (events) │ Response Types │ 32 | │ └──────────┘ └─────────────────┘ │ 33 | │ │ 34 | └─────────────────────────────────────────────────────────────┘ 35 | │ │ 36 | │ (stdin/stdout pipes) │ 37 | │ │ 38 | ┌────────▼──────────────────────────────▼────────────────────┐ 39 | │ Native Codex Binary (Rust) │ 40 | │ (codex exec --experimental-json ...) │ 41 | └────────────────────┬─────────────────────────────────────┘ 42 | │ 43 | │ (JSON-Lines events) 44 | │ 45 | ┌────────▼──────────────────────────────────────────────────┐ 46 | │ OpenAI API / Model Backend │ 47 | └───────────────────────────────────────────────────────────┘ 48 | ``` 49 | 50 | ## Data Flow - Synchronous Execution 51 | 52 | ``` 53 | Application Code 54 | │ 55 | │ thread.run("prompt") 56 | ▼ 57 | ┌─────────────────────────────┐ 58 | │ Thread.run() │ 59 | ├─────────────────────────────┤ 60 | │ 1. Prepare schema file │ 61 | │ 2. Build command args │ 62 | │ 3. Call _stream_events() │ 63 | └──────────┬──────────────────┘ 64 | │ 65 | ▼ 66 | ┌─────────────────────────────┐ 67 | │ Thread._stream_events() │ 68 | ├─────────────────────────────┤ 69 | │ 1. Create ExecArgs │ 70 | │ 2. Build CLI command │ 71 | │ 3. Spawn subprocess │ 72 | └──────────┬──────────────────┘ 73 | │ 74 | ▼ 75 | ┌─────────────────────────────┐ 76 | │ CodexExec.run_lines() │ 77 | ├─────────────────────────────┤ 78 | │ 1. Write prompt to stdin │ 79 | │ 2. Read stdout line-by-line │ 80 | │ 3. Parse JSON events │ 81 | │ 4. Yield parsed events │ 82 | └──────────┬──────────────────┘ 83 | │ 84 | ▼ 85 | ┌─────────────────────────────┐ 86 | │ Event Parsing │ 87 | ├─────────────────────────────┤ 88 | │ JSON → ThreadEvent objects │ 89 | │ (ItemStarted, Complete etc) │ 90 | └──────────┬──────────────────┘ 91 | │ 92 | ▼ 93 | ┌─────────────────────────────┐ 94 | │ Thread.run() aggregates │ 95 | ├─────────────────────────────┤ 96 | │ 1. Collect all items │ 97 | │ 2. Extract final response │ 98 | │ 3. Get usage info │ 99 | └──────────┬──────────────────┘ 100 | │ 101 | ▼ 102 | ┌─────────────────────────────┐ 103 | │ ThreadRunResult │ 104 | │ {items, final_response, │ 105 | │ usage} │ 106 | └──────────┬──────────────────┘ 107 | │ 108 | ▼ 109 | Application Code (with result) 110 | ``` 111 | 112 | ## Data Flow - Streaming Execution 113 | 114 | ``` 115 | Application Code 116 | │ 117 | │ thread.run_streamed("prompt") 118 | ▼ 119 | ┌─────────────────────────────┐ 120 | │ Thread.run_streamed() │ 121 | ├─────────────────────────────┤ 122 | │ 1. Call _stream_events() │ 123 | │ 2. Wrap in ThreadStream │ 124 | │ 3. Return iterator │ 125 | └──────────┬──────────────────┘ 126 | │ 127 | ▼ 128 | ┌─────────────────────────────┐ 129 | │ ThreadStream (iterator) │ 130 | └──────────┬──────────────────┘ 131 | │ 132 | ▼ 133 | for event in stream: 134 | │ 135 | │ (lazy evaluation) 136 | ▼ 137 | ┌─────────────────────────────┐ 138 | │ _stream_events() generator │ 139 | └──────────┬──────────────────┘ 140 | │ 141 | ├──────────────────────────┐ 142 | │ │ 143 | ▼ ▼ 144 | ┌──────────────────────┐ ┌────────────────────┐ 145 | │ Subprocess stdout │ │ Real-time event │ 146 | │ (JSON-Lines) │ │ processing │ 147 | └────────┬─────────────┘ └────────┬───────────┘ 148 | │ │ 149 | ├─ThreadStartedEvent───────┤ 150 | │ │ 151 | ├─TurnStartedEvent─────────┤ 152 | │ │ 153 | ├─ItemStartedEvent─────────┤ 154 | │ │ 155 | ├─ItemUpdatedEvent─────────┤ 156 | │ │ 157 | ├─ItemCompletedEvent───────┤ 158 | │ │ 159 | ├─TurnCompletedEvent───────┤ 160 | │ ▼ 161 | │ Application processes 162 | │ each event in real-time 163 | │ 164 | └─(yields back to for loop) 165 | ``` 166 | 167 | ## Configuration Hierarchy 168 | 169 | ``` 170 | Global (Codex Client) 171 | │ 172 | ├─ codex_path_override ──┐ 173 | │ │ 174 | ├─ base_url │ Applied to all threads 175 | │ │ 176 | └─ api_key ──────────────┤ 177 | │ 178 | ┌──────┘ 179 | │ 180 | ▼ 181 | Thread-Level 182 | │ 183 | ├─ model ────────┐ 184 | │ │ Applied to all turns 185 | ├─ sandbox_mode │ in this thread 186 | │ │ 187 | ├─ working_dir │ 188 | │ │ 189 | └─ skip_git_check┤ 190 | │ 191 | ┌──────┘ 192 | │ 193 | ▼ 194 | Turn-Level 195 | │ 196 | └─ output_schema ◄─ Only for this turn 197 | ``` 198 | 199 | ## Event Lifecycle Sequence 200 | 201 | ``` 202 | ┌──────────────────────────────────────────────────────────┐ 203 | │ Event Sequence │ 204 | ├──────────────────────────────────────────────────────────┤ 205 | │ │ 206 | │ 1. ThreadStartedEvent │ 207 | │ └─ thread_id assigned │ 208 | │ │ 209 | │ 2. TurnStartedEvent │ 210 | │ └─ turn begins │ 211 | │ │ 212 | │ 3. ItemStartedEvent (0 or more) │ 213 | │ └─ item.id, item.type assigned │ 214 | │ │ 215 | │ 4. ItemUpdatedEvent (0 or more) │ 216 | │ └─ item state changes (mid-execution) │ 217 | │ │ 218 | │ 5. ItemCompletedEvent (matches ItemStarted count) │ 219 | │ └─ final item.status set │ 220 | │ │ 221 | │ 6. TurnCompletedEvent or TurnFailedEvent │ 222 | │ └─ turn.usage populated (if completed) │ 223 | │ └─ error info (if failed) │ 224 | │ │ 225 | │ 7. ThreadErrorEvent (only if SDK error) │ 226 | │ └─ unrecoverable error │ 227 | │ │ 228 | └──────────────────────────────────────────────────────────┘ 229 | ``` 230 | 231 | ## Item Types & Their Lifecycles 232 | 233 | ``` 234 | ┌─ AgentMessageItem 235 | │ Status: Instant (starts and completes together) 236 | │ Lifecycle: ItemStarted → ItemCompleted 237 | │ 238 | ├─ ReasoningItem 239 | │ Status: Instant (agent's thinking) 240 | │ Lifecycle: ItemStarted → ItemCompleted 241 | │ 242 | ├─ CommandExecutionItem 243 | │ Status: Evolves (IN_PROGRESS → COMPLETED/FAILED) 244 | │ Lifecycle: ItemStarted → ItemUpdated* → ItemCompleted 245 | │ 246 | ├─ FileChangeItem 247 | │ Status: Evolves (patch application) 248 | │ Lifecycle: ItemStarted → ItemUpdated* → ItemCompleted 249 | │ Status Values: COMPLETED or FAILED 250 | │ 251 | ├─ McpToolCallItem 252 | │ Status: Evolves (IN_PROGRESS → COMPLETED/FAILED) 253 | │ Lifecycle: ItemStarted → ItemUpdated* → ItemCompleted 254 | │ 255 | ├─ WebSearchItem 256 | │ Status: Instant 257 | │ Lifecycle: ItemStarted → ItemCompleted 258 | │ 259 | ├─ TodoListItem 260 | │ Status: Static list 261 | │ Lifecycle: ItemStarted → ItemCompleted 262 | │ 263 | └─ ErrorItem 264 | Status: Error state 265 | Lifecycle: ItemStarted → ItemCompleted 266 | ``` 267 | 268 | ## Message Flow to API 269 | 270 | ``` 271 | ┌─ Thread.run(prompt, TurnOptions) 272 | │ 273 | ├─ Previous turn items collected into history 274 | │ 275 | ├─ Prompt converted to user message 276 | │ 277 | ├─ Schema validated & written to temp file (if needed) 278 | │ 279 | └─ Binary invoked with: 280 | │ 281 | ├─ stdin: prompt 282 | ├─ --model: model name 283 | ├─ --sandbox: sandbox mode 284 | ├─ --cd: working directory 285 | ├─ --output-schema: schema file path (if provided) 286 | ├─ resume: thread_id (if resuming) 287 | │ 288 | └─ environ: 289 | ├─ OPENAI_BASE_URL: from CodexOptions.base_url 290 | ├─ CODEX_API_KEY: from CodexOptions.api_key 291 | └─ CODEX_INTERNAL_ORIGINATOR_OVERRIDE: "codex_sdk_py" 292 | ``` 293 | 294 | ## State Management 295 | 296 | ``` 297 | Thread Object State 298 | │ 299 | ├─ _id: Optional[str] 300 | │ │ Initially None 301 | │ │ Set when ThreadStartedEvent received 302 | │ │ Accessible via thread.id property 303 | │ │ 304 | │ └─ Persists across multiple run() calls 305 | │ 306 | ├─ _codex_options: CodexOptions (immutable) 307 | │ │ Set at client creation 308 | │ │ Never changes 309 | │ │ 310 | │ └─ Shared across threads 311 | │ 312 | ├─ _thread_options: ThreadOptions (immutable) 313 | │ │ Set at thread creation 314 | │ │ Never changes 315 | │ │ 316 | │ └─ Per-thread configuration 317 | │ 318 | └─ _exec: CodexExec (immutable) 319 | │ Set at client creation 320 | │ Manages binary invocation 321 | │ 322 | └─ Shared across threads 323 | ``` 324 | 325 | ## Memory & Resource Management 326 | 327 | ``` 328 | ┌─ Subprocess Lifecycle 329 | │ │ 330 | │ ├─ Spawned: subprocess.Popen() 331 | │ │ 332 | │ ├─ Input: prompt written to stdin 333 | │ │ 334 | │ ├─ Output: stdout read line-by-line 335 | │ │ 336 | │ ├─ Stderr: collected in background thread 337 | │ │ 338 | │ └─ Cleanup: 339 | │ ├─ stdout.close() 340 | │ ├─ stderr.close() 341 | │ ├─ process.wait() or process.kill() 342 | │ └─ Guaranteed in finally block 343 | │ 344 | ├─ Schema File Lifecycle (TempFile Context Manager) 345 | │ │ 346 | │ ├─ Created: tempfile.TemporaryDirectory() 347 | │ │ 348 | │ ├─ Used: schema.json written 349 | │ │ 350 | │ ├─ Passed: --output-schema /tmp/codex-output-schema-{id}/schema.json 351 | │ │ 352 | │ └─ Cleaned: __exit__() removes temp directory 353 | │ 354 | └─ Event Objects (Immutable) 355 | │ 356 | ├─ All ThreadEvent objects are frozen dataclasses 357 | │ 358 | ├─ All ThreadItem objects are frozen dataclasses 359 | │ 360 | └─ Safe to store and reference across code 361 | ``` 362 | 363 | ## Error Handling Flow 364 | 365 | ``` 366 | ┌─ Exception Raised 367 | │ 368 | ├──────────────────────────────────┐ 369 | │ │ 370 | ▼ ▼ 371 | Platform/Binary Error SDK/API Error 372 | │ │ 373 | ├─ UnsupportedPlatformError ├─ SchemaValidationError 374 | ├─ SpawnError ├─ JsonParseError 375 | ├─ ExecExitError ├─ ThreadRunError 376 | │ └─ ThreadErrorEvent 377 | │ 378 | └─ All inherit from CodexError 379 | ``` 380 | 381 | ## Type System 382 | 383 | ``` 384 | ThreadEvent (Union) 385 | ├─ ThreadStartedEvent 386 | ├─ TurnStartedEvent 387 | ├─ TurnCompletedEvent 388 | ├─ TurnFailedEvent 389 | ├─ ItemStartedEvent 390 | ├─ ItemUpdatedEvent 391 | ├─ ItemCompletedEvent 392 | └─ ThreadErrorEvent 393 | 394 | ThreadItem (Union) 395 | ├─ AgentMessageItem 396 | ├─ ReasoningItem 397 | ├─ CommandExecutionItem 398 | ├─ FileChangeItem 399 | ├─ McpToolCallItem 400 | ├─ WebSearchItem 401 | ├─ TodoListItem 402 | └─ ErrorItem 403 | 404 | Status Enums 405 | ├─ CommandExecutionStatus: IN_PROGRESS, COMPLETED, FAILED 406 | ├─ PatchApplyStatus: COMPLETED, FAILED 407 | ├─ McpToolCallStatus: IN_PROGRESS, COMPLETED, FAILED 408 | ├─ PatchChangeKind: ADD, DELETE, UPDATE 409 | ├─ SandboxMode: READ_ONLY, WORKSPACE_WRITE, DANGER_FULL_ACCESS 410 | └─ ApprovalMode: NEVER, ON_REQUEST, ON_FAILURE, UNTRUSTED 411 | ``` 412 | 413 | ## Configuration Resolution 414 | 415 | ``` 416 | CodexOptions 417 | (from Codex constructor) 418 | │ 419 | ├─ api_key 420 | │ │ Priority: constructor → environment (CODEX_API_KEY) 421 | │ └─ Passed as CODEX_API_KEY env var 422 | │ 423 | ├─ base_url 424 | │ │ Priority: constructor → environment (OPENAI_BASE_URL) 425 | │ └─ Passed as OPENAI_BASE_URL env var 426 | │ 427 | └─ codex_path_override 428 | │ Priority: constructor → binary discovery → PATH lookup 429 | └─ Binary location: 430 | │ 1. Check codex_path_override if provided 431 | │ 2. Check src/codex/vendor/{target}/codex 432 | │ 3. Fall back to system PATH (binaries not yet vendored) 433 | ``` 434 | 435 | ## Binary Discovery 436 | 437 | ``` 438 | find_codex_binary(override: str | None) -> Path 439 | 440 | Discovery Flow: 441 | │ 442 | ├─ Override provided? 443 | │ └─ Yes → return Path(override) 444 | │ 445 | ├─ Detect platform target: 446 | │ ├─ Linux x86_64 → x86_64-unknown-linux-musl 447 | │ ├─ Linux aarch64 → aarch64-unknown-linux-musl 448 | │ ├─ macOS x86_64 → x86_64-apple-darwin 449 | │ ├─ macOS aarch64 → aarch64-apple-darwin 450 | │ ├─ Windows x86_64 → x86_64-pc-windows-msvc 451 | │ └─ Windows arm64 → aarch64-pc-windows-msvc 452 | │ 453 | ├─ Build vendor path: 454 | │ └─ src/codex/vendor/{target}/codex[.exe] 455 | │ 456 | └─ Return vendor path 457 | │ 458 | └─ Note: vendor/ directory exists but binaries are not yet vendored. 459 | The SDK will attempt to use the binary from this path, 460 | but currently relies on system PATH for execution. 461 | ``` 462 | 463 | ## DSPy Wrapper Integration 464 | 465 | The `codex_dspy` package provides a DSPy module that wraps the Codex SDK for signature-driven workflows. This enables using Codex agents as declarative components in DSPy programs. 466 | 467 | ### CodexAgent Module 468 | 469 | ``` 470 | ┌─────────────────────────────────────────────────────────┐ 471 | │ DSPy Application │ 472 | ├─────────────────────────────────────────────────────────┤ 473 | │ CodexAgent Module │ 474 | │ ┌────────────────────────────────────────────┐ │ 475 | │ │ __init__(signature, working_directory) │ │ 476 | │ │ - Validates signature (1 input, 1 output) │ 477 | │ │ - Creates Codex client │ │ 478 | │ │ - Starts thread (1 agent = 1 thread) │ │ 479 | │ └────────────────────────────────────────────┘ │ 480 | │ ┌────────────────────────────────────────────┐ │ 481 | │ │ forward(**kwargs) -> Prediction │ │ 482 | │ │ - Extracts input from kwargs │ │ 483 | │ │ - Calls thread.run() with message │ │ 484 | │ │ - Parses response (str or Pydantic) │ │ 485 | │ │ - Returns Prediction with trace/usage │ │ 486 | │ └────────────────────────────────────────────┘ │ 487 | │ ┌────────────────────────────────────────────┐ │ 488 | │ │ thread_id property │ │ 489 | │ │ - Returns thread.id for debugging │ │ 490 | │ └────────────────────────────────────────────┘ │ 491 | └────────────────┬────────────────────────────────────────┘ 492 | │ 493 | ▼ 494 | Codex SDK (as documented above) 495 | ``` 496 | 497 | ### Key Characteristics 498 | 499 | **Thread State Management:** 500 | - Each CodexAgent instance maintains exactly one conversation thread 501 | - Multiple `forward()` calls on the same instance continue the same conversation 502 | - Thread context accumulates across calls (conversation history is preserved) 503 | - Access thread ID via `agent.thread_id` property for debugging 504 | 505 | **Signature-Driven Interface:** 506 | - Requires exactly 1 input field and 1 output field 507 | - Input field: name determines kwarg name in `forward()` 508 | - Output field: name determines field name in returned Prediction 509 | 510 | **Output Type Handling:** 511 | - String output: Returns final_response as-is (no schema required) 512 | - Pydantic output: 513 | - Generates JSON schema from Pydantic model 514 | - Sets `additionalProperties: false` for strict validation 515 | - Passes schema to Codex via TurnOptions 516 | - Parses final_response as JSON into Pydantic model 517 | - Raises ValueError with response preview if parsing fails 518 | 519 | **Return Value:** 520 | - Returns DSPy `Prediction` object with: 521 | - Typed output field (str or Pydantic model instance) 522 | - `trace`: List[ThreadItem] - chronological items (commands, files, reasoning, etc.) 523 | - `usage`: Usage object - token counts (input_tokens, cached_input_tokens, output_tokens) 524 | 525 | ### Example Usage 526 | 527 | **Basic String I/O:** 528 | ```python 529 | import dspy 530 | from codex_dspy import CodexAgent 531 | from codex import SandboxMode 532 | 533 | # Create signature with string input/output 534 | sig = dspy.Signature('message:str -> answer:str') 535 | 536 | # Create agent (starts a thread) 537 | agent = CodexAgent( 538 | sig, 539 | working_directory='.', 540 | sandbox_mode=SandboxMode.READ_ONLY 541 | ) 542 | 543 | # First forward call 544 | result = agent(message='What files are here?') 545 | print(result.answer) # Clean string response 546 | print(result.trace) # List of items (commands, files, etc.) 547 | print(result.usage) # Usage(input_tokens=..., output_tokens=...) 548 | 549 | # Second forward call (continues same thread) 550 | result = agent(message='Count the Python files') 551 | print(result.answer) # Agent has context from previous call 552 | print(agent.thread_id) # Thread ID for debugging 553 | ``` 554 | 555 | **Pydantic-Typed Output:** 556 | ```python 557 | from pydantic import BaseModel 558 | import dspy 559 | from codex_dspy import CodexAgent 560 | 561 | # Define typed output 562 | class FileAnalysis(BaseModel): 563 | total_files: int 564 | languages: list[str] 565 | has_tests: bool 566 | 567 | # Create signature with Pydantic output 568 | sig = dspy.Signature('directory:str -> analysis:FileAnalysis') 569 | 570 | # Create agent 571 | agent = CodexAgent(sig, working_directory='.') 572 | 573 | # Forward call with typed parsing 574 | result = agent(directory='src/') 575 | print(result.analysis.total_files) # Type-safe access 576 | print(result.analysis.languages) # Parsed from JSON 577 | print(result.analysis.has_tests) # Boolean field 578 | ``` 579 | 580 | ### Integration with DSPy Optimizers 581 | 582 | CodexAgent is a standard DSPy module and can be used in: 583 | - Pipelines with other modules 584 | - Optimizers (though Codex doesn't use traditional prompts) 585 | - Multi-agent workflows (each agent maintains separate thread state) 586 | 587 | **Important:** Because each CodexAgent instance is stateful, create new instances when you need independent conversation contexts. 588 | 589 | ### Configuration Parameters 590 | 591 | ``` 592 | CodexAgent Constructor Parameters: 593 | 594 | Required: 595 | ├─ signature: str | type[Signature] 596 | │ └─ Must have exactly 1 input and 1 output field 597 | │ 598 | └─ working_directory: str 599 | └─ Directory where agent executes commands 600 | 601 | Optional: 602 | ├─ model: Optional[str] 603 | │ └─ Model name (default: "gpt-5.1-codex-max") 604 | │ 605 | ├─ sandbox_mode: Optional[SandboxMode] 606 | │ ├─ READ_ONLY: No file modifications 607 | │ ├─ WORKSPACE_WRITE: Modifications within working_directory 608 | │ └─ DANGER_FULL_ACCESS: Unrestricted access 609 | │ 610 | ├─ skip_git_repo_check: bool (default: False) 611 | │ └─ Allow non-git directories as working_directory 612 | │ 613 | ├─ api_key: Optional[str] 614 | │ └─ Falls back to CODEX_API_KEY env var 615 | │ 616 | ├─ base_url: Optional[str] 617 | │ └─ Falls back to OPENAI_BASE_URL env var 618 | │ 619 | └─ codex_path_override: Optional[str] 620 | └─ Override binary path (for testing) 621 | ``` 622 | 623 | ### Error Handling 624 | 625 | **Signature Validation:** 626 | - Raises ValueError if signature doesn't have exactly 1 input and 1 output field 627 | - Provides helpful error message with example 628 | 629 | **Pydantic Parsing:** 630 | - Raises ValueError if response doesn't match Pydantic schema 631 | - Includes response preview (first 500 chars) in error message 632 | - Original exception included in chain for debugging 633 | 634 | **SDK Errors:** 635 | - All Codex SDK exceptions propagate unchanged 636 | - See "Error Handling Flow" section for SDK error types 637 | 638 | -------------------------------------------------------------------------------- /src/codex_dspy/adapter.py: -------------------------------------------------------------------------------- 1 | """CodexAdapter - Two-turn adapter for agentic workflows. 2 | 3 | Turn 1: Natural task prompt (agent does work) 4 | Turn 2: Structured output extraction (agent formats findings) 5 | 6 | Based on DSPy's TwoStepAdapter and BAMLAdapter patterns. 7 | """ 8 | 9 | import inspect 10 | import json 11 | import types 12 | from typing import Any, Literal, Union, get_args, get_origin 13 | 14 | from pydantic import BaseModel 15 | from pydantic.fields import FieldInfo 16 | 17 | 18 | # --- TypeScript Conversion --- 19 | 20 | def _is_optional_type(annotation: Any) -> bool: 21 | """Check if a type annotation is optional (Union with None).""" 22 | origin = get_origin(annotation) 23 | if origin is Union or origin is types.UnionType: 24 | return type(None) in get_args(annotation) 25 | return False 26 | 27 | 28 | def _ts_type(annotation: Any, seen: set[type] | None = None) -> str: 29 | """Convert Python type annotation to TypeScript type string.""" 30 | seen = seen or set() 31 | 32 | # Primitives 33 | if annotation is str: 34 | return "string" 35 | if annotation is int or annotation is float: 36 | return "number" 37 | if annotation is bool: 38 | return "boolean" 39 | if annotation is type(None): 40 | return "null" 41 | 42 | # Pydantic model - just use the name (interface defined separately) 43 | if inspect.isclass(annotation) and issubclass(annotation, BaseModel): 44 | return annotation.__name__ 45 | 46 | origin = get_origin(annotation) 47 | args = get_args(annotation) 48 | 49 | # Optional / Union 50 | if origin is Union or origin is types.UnionType: 51 | parts = [_ts_type(a, seen) for a in args] 52 | return " | ".join(parts) 53 | 54 | # Literal 55 | if origin is Literal: 56 | return " | ".join(f'"{a}"' if isinstance(a, str) else str(a).lower() for a in args) 57 | 58 | # list / Array 59 | if origin is list: 60 | inner = _ts_type(args[0], seen) if args else "any" 61 | # Wrap union types in parens for array 62 | if " | " in inner: 63 | return f"Array<{inner}>" 64 | return f"{inner}[]" 65 | 66 | # dict / Record 67 | if origin is dict: 68 | key_type = _ts_type(args[0], seen) if args else "string" 69 | val_type = _ts_type(args[1], seen) if len(args) > 1 else "any" 70 | return f"Record<{key_type}, {val_type}>" 71 | 72 | # Fallback 73 | if hasattr(annotation, "__name__"): 74 | return annotation.__name__ 75 | return "any" 76 | 77 | 78 | def _collect_models(annotation: Any, collected: set[type] | None = None) -> set[type]: 79 | """Recursively collect all Pydantic models referenced in a type annotation.""" 80 | if collected is None: 81 | collected = set() 82 | 83 | if inspect.isclass(annotation) and issubclass(annotation, BaseModel): 84 | if annotation not in collected: 85 | collected.add(annotation) 86 | # Recurse into model fields 87 | for field in annotation.model_fields.values(): 88 | _collect_models(field.annotation, collected) 89 | return collected 90 | 91 | origin = get_origin(annotation) 92 | args = get_args(annotation) 93 | 94 | if args: 95 | for arg in args: 96 | _collect_models(arg, collected) 97 | 98 | return collected 99 | 100 | 101 | def pydantic_to_typescript(models: list[type[BaseModel]] | type[BaseModel]) -> str: 102 | """Convert Pydantic models to TypeScript interfaces. 103 | 104 | Args: 105 | models: A single model or list of models to convert. 106 | Recursively includes all referenced models. 107 | 108 | Returns: 109 | TypeScript interface definitions as a string. 110 | """ 111 | if not isinstance(models, list): 112 | models = [models] 113 | 114 | # Collect all referenced models 115 | all_models: set[type] = set() 116 | for model in models: 117 | _collect_models(model, all_models) 118 | 119 | # Sort for deterministic output (dependencies first would be ideal, but alphabetical is fine) 120 | sorted_models = sorted(all_models, key=lambda m: m.__name__) 121 | 122 | interfaces = [] 123 | for model in sorted_models: 124 | lines = [f"interface {model.__name__} {{"] 125 | 126 | for name, field in model.model_fields.items(): 127 | # JSDoc comment for description 128 | if field.description: 129 | lines.append(f" /** {field.description} */") 130 | 131 | # Check if optional (not required by Pydantic) 132 | is_optional = not field.is_required() 133 | 134 | ts_type = _ts_type(field.annotation) 135 | # Remove null from type if we're marking as optional with ? 136 | if is_optional and " | null" in ts_type: 137 | ts_type = ts_type.replace(" | null", "") 138 | 139 | optional_marker = "?" if is_optional else "" 140 | lines.append(f" {name}{optional_marker}: {ts_type};") 141 | 142 | lines.append("}") 143 | interfaces.append("\n".join(lines)) 144 | 145 | return "\n\n".join(interfaces) 146 | 147 | 148 | def value_to_typescript(value: Any, indent: int = 0) -> str: 149 | """Convert a Python value to TypeScript literal syntax. 150 | 151 | Handles Pydantic models, dicts, lists, and primitives. 152 | """ 153 | prefix = " " * indent 154 | 155 | if value is None: 156 | return "null" 157 | 158 | if isinstance(value, bool): 159 | return "true" if value else "false" 160 | 161 | if isinstance(value, (int, float)): 162 | return str(value) 163 | 164 | if isinstance(value, str): 165 | # Escape quotes and use double quotes 166 | escaped = value.replace("\\", "\\\\").replace('"', '\\"').replace("\n", "\\n") 167 | return f'"{escaped}"' 168 | 169 | if isinstance(value, BaseModel): 170 | return value_to_typescript(value.model_dump(), indent) 171 | 172 | if isinstance(value, dict): 173 | if not value: 174 | return "{}" 175 | lines = ["{"] 176 | items = list(value.items()) 177 | for i, (k, v) in enumerate(items): 178 | comma = "," if i < len(items) - 1 else "," # trailing comma 179 | val_str = value_to_typescript(v, indent + 1) 180 | # Handle multi-line values 181 | if "\n" in val_str: 182 | lines.append(f"{prefix} {k}: {val_str}{comma}") 183 | else: 184 | lines.append(f"{prefix} {k}: {val_str}{comma}") 185 | lines.append(f"{prefix}}}") 186 | return "\n".join(lines) 187 | 188 | if isinstance(value, list): 189 | if not value: 190 | return "[]" 191 | # Check if simple list (all primitives on one line) 192 | if all(isinstance(v, (str, int, float, bool, type(None))) for v in value): 193 | items = [value_to_typescript(v, 0) for v in value] 194 | return f"[{', '.join(items)}]" 195 | # Complex list - multi-line 196 | lines = ["["] 197 | for i, v in enumerate(value): 198 | comma = "," if i < len(value) - 1 else "," 199 | val_str = value_to_typescript(v, indent + 1) 200 | lines.append(f"{prefix} {val_str}{comma}") 201 | lines.append(f"{prefix}]") 202 | return "\n".join(lines) 203 | 204 | # Fallback 205 | return str(value) 206 | 207 | 208 | # --- Schema Rendering (from BAML) - kept for backwards compat --- 209 | 210 | def _render_type_str( 211 | annotation: Any, 212 | indent: int = 0, 213 | seen_models: set[type] | None = None, 214 | ) -> str: 215 | """Render a type annotation into a simplified, human-readable string. 216 | 217 | Examples: 218 | str -> "string" 219 | int -> "int" 220 | list[Bug] -> "[\n { ... }\n]" 221 | Literal["a", "b"] -> '"a" or "b"' 222 | Optional[str] -> "string or null" 223 | """ 224 | # Primitives 225 | if annotation is str: 226 | return "string" 227 | if annotation is int: 228 | return "int" 229 | if annotation is float: 230 | return "float" 231 | if annotation is bool: 232 | return "boolean" 233 | 234 | # Pydantic models 235 | if inspect.isclass(annotation) and issubclass(annotation, BaseModel): 236 | return _build_simplified_schema(annotation, indent, seen_models) 237 | 238 | try: 239 | origin = get_origin(annotation) 240 | args = get_args(annotation) 241 | except Exception: 242 | return str(annotation) 243 | 244 | # Optional[T] or T | None (handles both typing.Union and types.UnionType) 245 | if origin is Union or origin is types.UnionType: 246 | non_none_args = [arg for arg in args if arg is not type(None)] 247 | type_render = " or ".join([_render_type_str(arg, indent) for arg in non_none_args]) 248 | if len(non_none_args) < len(args): 249 | return f"{type_render} or null" 250 | return type_render 251 | 252 | # Literal["a", "b", ...] 253 | if origin is Literal: 254 | return " or ".join(f'"{arg}"' for arg in args) 255 | 256 | # list[T] 257 | if origin is list: 258 | inner_type = args[0] if args else Any 259 | # Direct Pydantic model 260 | if inspect.isclass(inner_type) and issubclass(inner_type, BaseModel): 261 | inner_schema = _build_simplified_schema(inner_type, indent + 1, seen_models) 262 | current_indent = " " * indent 263 | return f"[\n{inner_schema}\n{current_indent}]" 264 | # list[Model | None] - Optional Pydantic model 265 | inner_origin = get_origin(inner_type) 266 | if inner_origin is Union or inner_origin is types.UnionType: 267 | inner_args = get_args(inner_type) 268 | non_none = [a for a in inner_args if a is not type(None)] 269 | if len(non_none) == 1 and inspect.isclass(non_none[0]) and issubclass(non_none[0], BaseModel): 270 | inner_schema = _build_simplified_schema(non_none[0], indent + 1, seen_models) 271 | current_indent = " " * indent 272 | return f"[\n{inner_schema}, // or null\n{current_indent}]" 273 | # Other list types (primitives, nested lists, etc.) 274 | return f"{_render_type_str(inner_type, indent)}[]" 275 | 276 | # dict[K, V] 277 | if origin is dict: 278 | key_type = _render_type_str(args[0], indent) if args else "string" 279 | val_type = _render_type_str(args[1], indent) if len(args) > 1 else "any" 280 | return f"dict[{key_type}, {val_type}]" 281 | 282 | # Fallback 283 | if hasattr(annotation, "__name__"): 284 | return annotation.__name__ 285 | return str(annotation) 286 | 287 | 288 | def _build_simplified_schema( 289 | pydantic_model: type[BaseModel], 290 | indent: int = 0, 291 | seen_models: set[type] | None = None, 292 | ) -> str: 293 | """Build a simplified, human-readable schema from a Pydantic model. 294 | 295 | Example output: 296 | { 297 | # Bug severity level 298 | severity: "low" or "medium" or "high", 299 | # Where in the code 300 | location: string, 301 | } 302 | """ 303 | seen_models = seen_models or set() 304 | 305 | if pydantic_model in seen_models: 306 | return f"{pydantic_model.__name__} (recursive)" 307 | 308 | seen_models = seen_models | {pydantic_model} 309 | 310 | lines = [] 311 | current_indent = " " * indent 312 | next_indent = " " * (indent + 1) 313 | 314 | lines.append(f"{current_indent}{{") 315 | 316 | fields = pydantic_model.model_fields 317 | if not fields: 318 | lines.append(f"{next_indent}# No fields defined") 319 | 320 | for name, field in fields.items(): 321 | # Add description as comment 322 | if field.description: 323 | lines.append(f"{next_indent}# {field.description}") 324 | 325 | rendered_type = _render_type_str(field.annotation, indent=indent + 1, seen_models=seen_models) 326 | lines.append(f"{next_indent}{name}: {rendered_type},") 327 | 328 | lines.append(f"{current_indent}}}") 329 | return "\n".join(lines) 330 | 331 | 332 | # --- Field Description (from TwoStepAdapter) --- 333 | 334 | def get_annotation_name(annotation: Any) -> str: 335 | """Get a human-readable name for a type annotation.""" 336 | origin = get_origin(annotation) 337 | args = get_args(annotation) 338 | 339 | if origin is None: 340 | if hasattr(annotation, "__name__"): 341 | return annotation.__name__ 342 | return str(annotation) 343 | 344 | if origin is Literal: 345 | args_str = ", ".join(f'"{a}"' if isinstance(a, str) else str(a) for a in args) 346 | return f"Literal[{args_str}]" 347 | 348 | args_str = ", ".join(get_annotation_name(a) for a in args) 349 | origin_name = getattr(origin, "__name__", str(origin)) 350 | return f"{origin_name}[{args_str}]" 351 | 352 | 353 | def format_field_description(fields: dict[str, FieldInfo]) -> str: 354 | """Format field descriptions as a numbered list. 355 | 356 | Example: 357 | 1. `context` (CodeContext): The code to analyze 358 | 2. `user_request` (str): What to look for 359 | """ 360 | descriptions = [] 361 | for idx, (name, field) in enumerate(fields.items(), 1): 362 | type_name = get_annotation_name(field.annotation) 363 | desc = f": {field.description}" if field.description else "" 364 | descriptions.append(f"{idx}. `{name}` ({type_name}){desc}") 365 | return "\n".join(descriptions) 366 | 367 | 368 | # --- CodexAdapter --- 369 | 370 | class CodexAdapter: 371 | """Two-turn adapter for Codex agentic workflows. 372 | 373 | Turn 1 (format_turn1): Natural task prompt 374 | - Describes input and output fields 375 | - Shows input values 376 | - Includes task instructions 377 | - Agent works naturally, no structured output required 378 | 379 | Turn 2 (format_turn2): Structured extraction 380 | - BAML-style schemas for each output field 381 | - Agent formats its findings into the structure 382 | """ 383 | 384 | def format_turn1( 385 | self, 386 | signature, # DSPy Signature 387 | inputs: dict[str, Any], 388 | ) -> str: 389 | """Format the task turn prompt. 390 | 391 | Agent receives this and does its work naturally. 392 | Output fields are declared (so agent knows the goal) but not structured. 393 | """ 394 | parts = [] 395 | 396 | # Input field descriptions 397 | if signature.input_fields: 398 | parts.append("As input, you are provided with:") 399 | parts.append(format_field_description(signature.input_fields)) 400 | parts.append("") 401 | 402 | # Output field descriptions (declare the goal) 403 | if signature.output_fields: 404 | parts.append("Your task is to produce:") 405 | parts.append(format_field_description(signature.output_fields)) 406 | parts.append("") 407 | 408 | # Task instructions from signature 409 | if signature.instructions: 410 | parts.append(f"Instructions: {signature.instructions}") 411 | parts.append("") 412 | 413 | # Separator 414 | parts.append("---") 415 | parts.append("") 416 | 417 | # Input values 418 | for name, field in signature.input_fields.items(): 419 | if name in inputs: 420 | value = inputs[name] 421 | # Format Pydantic models as JSON 422 | if isinstance(value, BaseModel): 423 | formatted = value.model_dump_json(indent=2) 424 | elif isinstance(value, (dict, list)): 425 | formatted = json.dumps(value, indent=2, ensure_ascii=False) 426 | else: 427 | formatted = str(value) 428 | parts.append(f"{name}: {formatted}") 429 | parts.append("") 430 | 431 | return "\n".join(parts).strip() 432 | 433 | def format_turn2(self, signature) -> str: 434 | """Format the extraction turn prompt. 435 | 436 | Agent receives this after completing the task. 437 | Uses BAML-style schemas to request structured output. 438 | """ 439 | parts = [] 440 | parts.append("Now provide your findings in the following format:") 441 | parts.append("") 442 | 443 | for name, field in signature.output_fields.items(): 444 | parts.append(f"[[ ## {name} ## ]]") 445 | 446 | annotation = field.annotation 447 | if inspect.isclass(annotation) and issubclass(annotation, BaseModel): 448 | # Pydantic model - show simplified schema 449 | schema = _build_simplified_schema(annotation, indent=0) 450 | parts.append(schema) 451 | elif get_origin(annotation) is list: 452 | # list[T] - show array schema 453 | inner = get_args(annotation)[0] if get_args(annotation) else Any 454 | if inspect.isclass(inner) and issubclass(inner, BaseModel): 455 | # list[Model] 456 | inner_schema = _build_simplified_schema(inner, indent=1) 457 | parts.append(f"[\n{inner_schema}\n]") 458 | else: 459 | # Check for list[Model | None] 460 | inner_origin = get_origin(inner) 461 | if inner_origin is Union or inner_origin is types.UnionType: 462 | inner_args = get_args(inner) 463 | non_none = [a for a in inner_args if a is not type(None)] 464 | if len(non_none) == 1 and inspect.isclass(non_none[0]) and issubclass(non_none[0], BaseModel): 465 | inner_schema = _build_simplified_schema(non_none[0], indent=1) 466 | parts.append(f"[\n{inner_schema}, // or null\n]") 467 | continue 468 | # Other list types 469 | parts.append(f"{_render_type_str(inner)}[]") 470 | else: 471 | # Primitive or other type 472 | parts.append(f"<{_render_type_str(annotation)}>") 473 | 474 | parts.append("") 475 | 476 | parts.append("[[ ## completed ## ]]") 477 | return "\n".join(parts) 478 | 479 | def format_turn2_typescript(self, signature) -> str: 480 | """Format Turn 2 using TypeScript interfaces. 481 | 482 | This is the preferred format: 483 | - Uses real TypeScript syntax (LLMs know it well) 484 | - JSDoc comments for field descriptions 485 | - Optional fields marked with ? 486 | - Includes static examples from signature if defined 487 | 488 | Expected output from LLM: TypeScript object literal (parseable with json5) 489 | """ 490 | parts = [] 491 | parts.append("Respond with a TypeScript value matching this type:") 492 | parts.append("") 493 | parts.append("```typescript") 494 | 495 | # Collect all Pydantic models from output fields 496 | models_to_render = [] 497 | for field in signature.output_fields.values(): 498 | if inspect.isclass(field.annotation) and issubclass(field.annotation, BaseModel): 499 | models_to_render.append(field.annotation) 500 | else: 501 | # Check for models inside generics (list[Model], etc.) 502 | _collect_models(field.annotation, set()) # warm up 503 | for model in _collect_models(field.annotation): 504 | if model not in models_to_render: 505 | models_to_render.append(model) 506 | 507 | # Render TypeScript interfaces 508 | if models_to_render: 509 | parts.append(pydantic_to_typescript(models_to_render)) 510 | parts.append("") 511 | 512 | # Build the Response type from output fields 513 | parts.append("type Response = {") 514 | for name, field in signature.output_fields.items(): 515 | if field.description: 516 | parts.append(f" /** {field.description} */") 517 | ts_type = _ts_type(field.annotation) 518 | optional_marker = "?" if _is_optional_type(field.annotation) else "" 519 | parts.append(f" {name}{optional_marker}: {ts_type};") 520 | parts.append("};") 521 | parts.append("```") 522 | 523 | # Add static examples if defined on signature 524 | examples = getattr(signature, 'Examples', None) 525 | if examples: 526 | output_examples = getattr(examples, 'outputs', None) 527 | if output_examples: 528 | parts.append("") 529 | if len(output_examples) == 1: 530 | parts.append("Example output:") 531 | parts.append("```typescript") 532 | parts.append(value_to_typescript(output_examples[0])) 533 | parts.append("```") 534 | else: 535 | parts.append("Example outputs:") 536 | parts.append("```typescript") 537 | for i, ex in enumerate(output_examples): 538 | parts.append(f"// Example {i + 1}:") 539 | parts.append(value_to_typescript(ex)) 540 | if i < len(output_examples) - 1: 541 | parts.append("") 542 | parts.append("```") 543 | 544 | return "\n".join(parts) 545 | 546 | def format_turn2_json(self, signature) -> str: 547 | """Alternative: request JSON output for Turn 2. 548 | 549 | Use this if you want to use output_schema with the LLM 550 | instead of parsing [[ ## field ## ]] markers. 551 | """ 552 | parts = [] 553 | parts.append("Now provide your findings as JSON with the following structure:") 554 | parts.append("") 555 | parts.append("```json") 556 | parts.append("{") 557 | 558 | field_lines = [] 559 | for name, field in signature.output_fields.items(): 560 | schema = _render_type_str(field.annotation, indent=1) 561 | desc = f" // {field.description}" if field.description else "" 562 | field_lines.append(f' "{name}": {schema}{desc}') 563 | 564 | parts.append(",\n".join(field_lines)) 565 | parts.append("}") 566 | parts.append("```") 567 | 568 | return "\n".join(parts) 569 | 570 | def parse(self, signature, completion: str) -> dict[str, Any]: 571 | """Parse [[ ## field ## ]] markers from completion. 572 | 573 | Returns a dict mapping field names to their string values. 574 | Caller is responsible for type conversion (e.g., JSON parsing for Pydantic). 575 | """ 576 | import re 577 | 578 | field_header_pattern = re.compile(r"\[\[ ## (\w+) ## \]\]") 579 | 580 | sections = [(None, [])] 581 | for line in completion.splitlines(): 582 | match = field_header_pattern.match(line.strip()) 583 | if match: 584 | header = match.group(1) 585 | remaining = line[match.end():].strip() 586 | sections.append((header, [remaining] if remaining else [])) 587 | else: 588 | sections[-1][1].append(line) 589 | 590 | sections = [(k, "\n".join(v).strip()) for k, v in sections] 591 | 592 | fields = {} 593 | for name, value in sections: 594 | if name and name in signature.output_fields and name not in fields: 595 | if name == "completed": 596 | continue 597 | fields[name] = value 598 | 599 | return fields 600 | -------------------------------------------------------------------------------- /docs/CODEX_SDK_API_SURFACE.md: -------------------------------------------------------------------------------- 1 | # Codex Python SDK - Comprehensive API Surface Area 2 | 3 | ## Overview 4 | 5 | The Codex Python SDK is a Python interface to the Codex agent CLI. It shells out to a bundled native `codex` binary, streams structured JSON events, and provides strongly-typed helpers for synchronous and streaming turns. The SDK is designed for Python 3.12+ and is currently in pre-alpha status. 6 | 7 | --- 8 | 9 | ## 1. Main SDK Entry Points 10 | 11 | ### 1.1 Client Initialization 12 | 13 | **Class: `Codex`** 14 | 15 | ```python 16 | from codex import Codex, CodexOptions 17 | 18 | # Basic initialization 19 | client = Codex() 20 | 21 | # With configuration options 22 | client = Codex(options=CodexOptions( 23 | codex_path_override="/path/to/codex/binary", 24 | base_url="https://api.openai.com/v1", 25 | api_key="sk-..." 26 | )) 27 | ``` 28 | 29 | **Parameters:** 30 | - `options` (Optional[CodexOptions]): Global SDK configuration 31 | 32 | ### 1.2 Thread Management 33 | 34 | **Methods:** 35 | - `start_thread(options: Optional[ThreadOptions] = None) -> Thread` 36 | - Creates a new thread for a conversation session 37 | - Returns a `Thread` object to interact with 38 | 39 | - `resume_thread(thread_id: str, options: Optional[ThreadOptions] = None) -> Thread` 40 | - Resumes an existing thread by ID 41 | - Useful for multi-turn conversations or resuming interrupted work 42 | 43 | **Example:** 44 | ```python 45 | # Start new thread 46 | thread = client.start_thread() 47 | 48 | # Later, resume the same thread 49 | thread_id = thread.id # Available after first run 50 | resumed_thread = client.resume_thread(thread_id) 51 | ``` 52 | 53 | --- 54 | 55 | ## 2. Configuration Options 56 | 57 | ### 2.1 CodexOptions (Global/Client-level) 58 | 59 | ```python 60 | from codex import CodexOptions 61 | 62 | @dataclass(frozen=True, slots=True) 63 | class CodexOptions: 64 | codex_path_override: Optional[str] = None 65 | base_url: Optional[str] = None 66 | api_key: Optional[str] = None 67 | ``` 68 | 69 | **Fields:** 70 | - `codex_path_override`: Override the bundled codex binary location (for testing/custom builds) 71 | - `base_url`: OpenAI API base URL (default: official OpenAI endpoint) 72 | - `api_key`: API authentication key (read from CODEX_API_KEY env var if not set) 73 | 74 | ### 2.2 ThreadOptions (Thread-level Configuration) 75 | 76 | ```python 77 | from codex import ThreadOptions, SandboxMode 78 | 79 | @dataclass(frozen=True, slots=True) 80 | class ThreadOptions: 81 | model: Optional[str] = None 82 | sandbox_mode: Optional[SandboxMode] = None 83 | working_directory: Optional[str] = None 84 | skip_git_repo_check: bool = False 85 | ``` 86 | 87 | **Fields:** 88 | - `model`: Model to use (default: "gpt-5.1-codex-max") 89 | - `sandbox_mode`: Execution sandbox level - SandboxMode enum: 90 | - `READ_ONLY`: No file modifications allowed 91 | - `WORKSPACE_WRITE`: Can modify files in workspace 92 | - `DANGER_FULL_ACCESS`: Full system access 93 | - `working_directory`: Directory to run commands in (requires git repo unless `skip_git_repo_check=True`) 94 | - `skip_git_repo_check`: Allow non-git directories as working directory 95 | 96 | **ApprovalMode (Enum):** 97 | ```python 98 | from codex import ApprovalMode 99 | 100 | class ApprovalMode(StrEnum): 101 | NEVER = "never" 102 | ON_REQUEST = "on-request" 103 | ON_FAILURE = "on-failure" 104 | UNTRUSTED = "untrusted" 105 | ``` 106 | 107 | **Values:** 108 | - `NEVER = "never"` - Never ask for user approval 109 | - `ON_REQUEST = "on-request"` - Model decides when to ask 110 | - `ON_FAILURE = "on-failure"` - Ask only if command fails 111 | - `UNTRUSTED = "untrusted"` - Ask for untrusted commands only 112 | 113 | **Example:** 114 | ```python 115 | from codex import Codex, ThreadOptions, SandboxMode 116 | 117 | client = Codex() 118 | thread = client.start_thread(ThreadOptions( 119 | model="gpt-5.1-codex-max", 120 | sandbox_mode=SandboxMode.WORKSPACE_WRITE, 121 | working_directory="/path/to/project", 122 | skip_git_repo_check=False 123 | )) 124 | ``` 125 | 126 | ### 2.3 TurnOptions (Turn/Request-level Configuration) 127 | 128 | ```python 129 | from codex import TurnOptions 130 | 131 | @dataclass(frozen=True, slots=True) 132 | class TurnOptions: 133 | output_schema: Optional[SchemaInput] = None 134 | ``` 135 | 136 | **Fields:** 137 | - `output_schema`: Schema to constrain output format 138 | - Can be a dict (JSON Schema) 139 | - Can be a Pydantic BaseModel class or instance 140 | - If provided, output will be validated against schema 141 | 142 | **Example:** 143 | ```python 144 | from pydantic import BaseModel 145 | from codex import TurnOptions 146 | 147 | class StatusReport(BaseModel): 148 | summary: str 149 | status: str 150 | action_required: bool 151 | 152 | turn = thread.run( 153 | "Summarize repository status", 154 | TurnOptions(output_schema=StatusReport) 155 | ) 156 | 157 | # Or with JSON Schema dict: 158 | schema = { 159 | "type": "object", 160 | "properties": { 161 | "summary": {"type": "string"}, 162 | "status": {"type": "string", "enum": ["ok", "action_required"]}, 163 | }, 164 | "required": ["summary", "status"], 165 | "additionalProperties": False, 166 | } 167 | turn = thread.run("Summarize", TurnOptions(output_schema=schema)) 168 | ``` 169 | 170 | --- 171 | 172 | ## 3. Thread Methods - What Can Be Passed 173 | 174 | ### 3.1 Synchronous Execution 175 | 176 | **Method: `thread.run(prompt: str, turn_options: Optional[TurnOptions] = None) -> ThreadRunResult`** 177 | 178 | Executes a prompt and blocks until completion, collecting all events. 179 | 180 | **Parameters:** 181 | - `prompt` (str): User input/prompt for the agent 182 | - `turn_options` (Optional[TurnOptions]): Turn-level configuration (output schema, etc.) 183 | 184 | **Returns: `ThreadRunResult`** 185 | ```python 186 | @dataclass(frozen=True, slots=True) 187 | class ThreadRunResult: 188 | items: list[ThreadItem] # All completed items from the turn 189 | final_response: str # Final agent message text 190 | usage: Optional[Usage] # Token usage information 191 | ``` 192 | 193 | **Example:** 194 | ```python 195 | thread = client.start_thread() 196 | result = thread.run("What's the repository status?") 197 | 198 | print(result.final_response) # Agent's response 199 | print(result.usage.input_tokens) # Token info 200 | for item in result.items: 201 | print(f"Item type: {item.type}") 202 | ``` 203 | 204 | ### 3.2 Streaming Execution 205 | 206 | **Method: `thread.run_streamed(prompt: str, turn_options: Optional[TurnOptions] = None) -> ThreadStream`** 207 | 208 | Executes a prompt and streams events in real-time. 209 | 210 | **Parameters:** 211 | - Same as `thread.run()` 212 | 213 | **Returns: `ThreadStream`** (iterable of events) 214 | 215 | **Example:** 216 | ```python 217 | from codex import ItemCompletedEvent, TurnCompletedEvent 218 | 219 | stream = thread.run_streamed("Fix the bug") 220 | for event in stream: 221 | if isinstance(event, ItemCompletedEvent): 222 | print(f"Item completed: {event.item.type}") 223 | elif isinstance(event, TurnCompletedEvent): 224 | print(f"Turn complete, tokens: {event.usage}") 225 | ``` 226 | 227 | --- 228 | 229 | ## 4. Response Structure & What Comes Back 230 | 231 | ### 4.1 Events 232 | 233 | The SDK streams structured **ThreadEvent** objects. All events come back as strongly-typed dataclasses. 234 | 235 | **ThreadEvent Union Type:** 236 | ``` 237 | ThreadEvent = ( 238 | ThreadStartedEvent 239 | | TurnStartedEvent 240 | | TurnCompletedEvent 241 | | TurnFailedEvent 242 | | ItemStartedEvent 243 | | ItemUpdatedEvent 244 | | ItemCompletedEvent 245 | | ThreadErrorEvent 246 | ) 247 | ``` 248 | 249 | ### 4.2 Event Types 250 | 251 | #### ThreadStartedEvent 252 | ```python 253 | @dataclass(frozen=True, slots=True) 254 | class ThreadStartedEvent: 255 | type: Literal["thread.started"] = "thread.started" 256 | thread_id: str 257 | ``` 258 | - Fired when thread is created 259 | - Assigns thread ID for later resumption 260 | - Automatically updates `thread.id` property 261 | 262 | #### TurnStartedEvent 263 | ```python 264 | @dataclass(frozen=True, slots=True) 265 | class TurnStartedEvent: 266 | type: Literal["turn.started"] = "turn.started" 267 | ``` 268 | - Marks beginning of a turn 269 | 270 | #### TurnCompletedEvent 271 | ```python 272 | @dataclass(frozen=True, slots=True) 273 | class TurnCompletedEvent: 274 | type: Literal["turn.completed"] = "turn.completed" 275 | usage: Usage 276 | ``` 277 | - Fired when turn completes successfully 278 | - Includes token usage metadata 279 | 280 | #### TurnFailedEvent 281 | ```python 282 | @dataclass(frozen=True, slots=True) 283 | class TurnFailedEvent: 284 | type: Literal["turn.failed"] = "turn.failed" 285 | error: ThreadError 286 | ``` 287 | - Turn encountered an error 288 | - Contains error message 289 | 290 | #### ItemStartedEvent, ItemUpdatedEvent, ItemCompletedEvent 291 | ```python 292 | @dataclass(frozen=True, slots=True) 293 | class ItemStartedEvent: 294 | type: Literal["item.started"] = "item.started" 295 | item: ThreadItem 296 | 297 | @dataclass(frozen=True, slots=True) 298 | class ItemUpdatedEvent: 299 | type: Literal["item.updated"] = "item.updated" 300 | item: ThreadItem 301 | 302 | @dataclass(frozen=True, slots=True) 303 | class ItemCompletedEvent: 304 | type: Literal["item.completed"] = "item.completed" 305 | item: ThreadItem 306 | ``` 307 | - Track item lifecycle (reasoning, commands, file changes, etc.) 308 | 309 | #### ThreadErrorEvent 310 | ```python 311 | @dataclass(frozen=True, slots=True) 312 | class ThreadErrorEvent: 313 | type: Literal["error"] = "error" 314 | message: str 315 | ``` 316 | - Unrecoverable SDK-level error 317 | 318 | ### 4.3 Usage Information 319 | 320 | ```python 321 | @dataclass(frozen=True, slots=True) 322 | class Usage: 323 | input_tokens: int 324 | cached_input_tokens: int 325 | output_tokens: int 326 | ``` 327 | 328 | Available in `TurnCompletedEvent` and `ThreadRunResult.usage` 329 | 330 | ### 4.4 Thread Items 331 | 332 | **ThreadItem Union Type:** 333 | ``` 334 | ThreadItem = ( 335 | AgentMessageItem 336 | | ReasoningItem 337 | | CommandExecutionItem 338 | | FileChangeItem 339 | | McpToolCallItem 340 | | WebSearchItem 341 | | TodoListItem 342 | | ErrorItem 343 | ) 344 | ``` 345 | 346 | #### AgentMessageItem 347 | ```python 348 | @dataclass(frozen=True, slots=True) 349 | class AgentMessageItem: 350 | type: Literal["agent_message"] = "agent_message" 351 | id: str 352 | text: str 353 | ``` 354 | - Agent's text response 355 | 356 | #### ReasoningItem 357 | ```python 358 | @dataclass(frozen=True, slots=True) 359 | class ReasoningItem: 360 | type: Literal["reasoning"] = "reasoning" 361 | id: str 362 | text: str 363 | ``` 364 | - Agent's internal reasoning/thinking 365 | 366 | #### CommandExecutionItem 367 | ```python 368 | @dataclass(frozen=True, slots=True) 369 | class CommandExecutionItem: 370 | type: Literal["command_execution"] = "command_execution" 371 | id: str 372 | command: str 373 | aggregated_output: str 374 | status: CommandExecutionStatus 375 | exit_code: int | None = None 376 | ``` 377 | - Command execution with output 378 | - Status: `IN_PROGRESS`, `COMPLETED`, or `FAILED` 379 | 380 | #### FileChangeItem 381 | ```python 382 | @dataclass(frozen=True, slots=True) 383 | class FileChangeItem: 384 | type: Literal["file_change"] = "file_change" 385 | id: str 386 | changes: Sequence[FileUpdateChange] 387 | status: PatchApplyStatus 388 | 389 | @dataclass(frozen=True, slots=True) 390 | class FileUpdateChange: 391 | path: str 392 | kind: PatchChangeKind # ADD, DELETE, UPDATE 393 | ``` 394 | - File modifications with patch tracking 395 | - Status: `COMPLETED` or `FAILED` 396 | 397 | #### McpToolCallItem 398 | ```python 399 | @dataclass(frozen=True, slots=True) 400 | class McpToolCallItem: 401 | type: Literal["mcp_tool_call"] = "mcp_tool_call" 402 | id: str 403 | server: str 404 | tool: str 405 | status: McpToolCallStatus 406 | ``` 407 | - MCP (Model Context Protocol) tool invocation 408 | - Status: `IN_PROGRESS`, `COMPLETED`, or `FAILED` 409 | 410 | #### WebSearchItem 411 | ```python 412 | @dataclass(frozen=True, slots=True) 413 | class WebSearchItem: 414 | type: Literal["web_search"] = "web_search" 415 | id: str 416 | query: str 417 | ``` 418 | - Web search execution 419 | 420 | #### TodoListItem 421 | ```python 422 | @dataclass(frozen=True, slots=True) 423 | class TodoListItem: 424 | type: Literal["todo_list"] = "todo_list" 425 | id: str 426 | items: Sequence[TodoItem] 427 | 428 | @dataclass(frozen=True, slots=True) 429 | class TodoItem: 430 | text: str 431 | completed: bool 432 | ``` 433 | - Task lists generated by agent 434 | 435 | #### ErrorItem 436 | ```python 437 | @dataclass(frozen=True, slots=True) 438 | class ErrorItem: 439 | type: Literal["error"] = "error" 440 | id: str 441 | message: str 442 | ``` 443 | - Item-level errors 444 | 445 | --- 446 | 447 | ## 5. Advanced Features 448 | 449 | ### 5.1 Streaming 450 | 451 | Real-time event streaming with `thread.run_streamed()`: 452 | 453 | ```python 454 | stream = thread.run_streamed("Implement the fix") 455 | for event in stream: 456 | match event: 457 | case ThreadStartedEvent() as e: 458 | print(f"Thread: {e.thread_id}") 459 | case ItemCompletedEvent(item=item): 460 | print(f"Item: {item.type}") 461 | case TurnCompletedEvent(usage=usage): 462 | print(f"Tokens: {usage.input_tokens}") 463 | case TurnFailedEvent(error=err): 464 | print(f"Failed: {err.message}") 465 | case ThreadErrorEvent(message=msg): 466 | print(f"Error: {msg}") 467 | ``` 468 | 469 | ### 5.2 Structured Output with Schema Validation 470 | 471 | Constrain agent output to structured format: 472 | 473 | ```python 474 | from pydantic import BaseModel 475 | from codex import TurnOptions 476 | 477 | class BugReport(BaseModel): 478 | severity: str 479 | component: str 480 | fix_steps: list[str] 481 | 482 | schema = BugReport # Can pass class directly 483 | 484 | result = thread.run( 485 | "Analyze the bug in error.log", 486 | TurnOptions(output_schema=schema) 487 | ) 488 | # Output will be validated against schema 489 | print(result.final_response) # Structured JSON response 490 | ``` 491 | 492 | ### 5.3 Tool Calling (MCP Integration) 493 | 494 | The SDK automatically handles MCP (Model Context Protocol) tool calls through `McpToolCallItem` events: 495 | 496 | ```python 497 | for event in thread.run_streamed(prompt): 498 | if isinstance(event, ItemCompletedEvent): 499 | if isinstance(event.item, McpToolCallItem): 500 | print(f"Tool called: {event.item.server}.{event.item.tool}") 501 | print(f"Status: {event.item.status}") 502 | ``` 503 | 504 | ### 5.4 Multi-turn Conversations 505 | 506 | Threads maintain state across multiple turns: 507 | 508 | ```python 509 | thread = client.start_thread() 510 | 511 | # First turn 512 | result1 = thread.run("What's the problem?") 513 | print(result1.final_response) 514 | 515 | # Second turn - previous context is maintained 516 | result2 = thread.run("What's the fix?") 517 | print(result2.final_response) 518 | 519 | # Can also resume later 520 | thread_id = thread.id 521 | resumed = client.resume_thread(thread_id) 522 | result3 = resumed.run("How to test it?") 523 | ``` 524 | 525 | ### 5.5 Stateful vs Stateless 526 | 527 | **Stateful (Thread):** 528 | - Thread maintains conversation history 529 | - Each `run()` or `run_streamed()` adds to the thread's context 530 | - Can `resume_thread()` by ID across sessions 531 | - Items from previous turns are included in subsequent turns 532 | 533 | **Stateless (Turn):** 534 | - Each `run()` call is independent in terms of what you can control 535 | - The actual threading and history is managed by the underlying Codex CLI 536 | - SDK aggregates results into clean `ThreadRunResult` or event streams 537 | 538 | ### 5.6 DSPy Wrapper - CodexAgent 539 | 540 | **Module:** `codex_dspy` 541 | 542 | The `CodexAgent` class provides a DSPy module interface to the Codex SDK, enabling signature-driven agent interactions with type-safe inputs and outputs. 543 | 544 | **Class: CodexAgent** 545 | 546 | ```python 547 | class CodexAgent(dspy.Module): 548 | def __init__( 549 | self, 550 | signature: str | type[Signature], 551 | working_directory: str, 552 | model: Optional[str] = None, 553 | sandbox_mode: Optional[SandboxMode] = None, 554 | skip_git_repo_check: bool = False, 555 | api_key: Optional[str] = None, 556 | base_url: Optional[str] = None, 557 | codex_path_override: Optional[str] = None, 558 | ) 559 | ``` 560 | 561 | **Parameters:** 562 | - `signature`: DSPy signature defining input/output interface (must have exactly 1 input field and 1 output field) 563 | - `working_directory`: Directory for agent to execute commands in 564 | - `model`: Model to use (default: "gpt-5.1-codex-max"). Defaults to Codex default. 565 | - `sandbox_mode`: Execution sandbox level (READ_ONLY, WORKSPACE_WRITE, DANGER_FULL_ACCESS) 566 | - `skip_git_repo_check`: Allow non-git directories as working_directory 567 | - `api_key`: OpenAI API key (falls back to CODEX_API_KEY env var) 568 | - `base_url`: API base URL (falls back to OPENAI_BASE_URL env var) 569 | - `codex_path_override`: Override path to codex binary (for testing/custom builds) 570 | 571 | **Methods:** 572 | 573 | ```python 574 | def forward(self, **kwargs) -> Prediction: 575 | """Execute agent with input message. 576 | 577 | Returns: 578 | Prediction with: 579 | - Typed output field (name from signature) 580 | - trace: list[ThreadItem] 581 | - usage: Usage 582 | """ 583 | 584 | @property 585 | def thread_id(self) -> Optional[str]: 586 | """Get thread ID for this agent instance.""" 587 | ``` 588 | 589 | **Supported Output Types:** 590 | - String types: `str`, `Optional[str]` 591 | - Pydantic types: Any `BaseModel` subclass 592 | 593 | **Thread Management:** 594 | - Each CodexAgent instance = one stateful thread 595 | - Multiple forward() calls continue the same conversation 596 | - Thread ID assigned after first forward() call 597 | - Access thread ID via `agent.thread_id` property 598 | 599 | **Example - Basic String Output:** 600 | ```python 601 | import dspy 602 | from codex_dspy import CodexAgent 603 | 604 | sig = dspy.Signature('message:str -> answer:str') 605 | agent = CodexAgent(sig, working_directory='.') 606 | result = agent(message='List files in this directory') 607 | 608 | print(result.answer) # str - final response 609 | print(result.trace) # list[ThreadItem] - chronological items 610 | print(result.usage) # Usage - token counts 611 | ``` 612 | 613 | **Example - Structured Pydantic Output:** 614 | ```python 615 | from pydantic import BaseModel 616 | import dspy 617 | from codex_dspy import CodexAgent 618 | 619 | class BugReport(BaseModel): 620 | severity: str 621 | component: str 622 | fix_steps: list[str] 623 | 624 | sig = dspy.Signature('message:str -> report:BugReport') 625 | agent = CodexAgent(sig, working_directory='.') 626 | result = agent(message='Analyze the bug in src/main.py') 627 | 628 | print(result.report.severity) # Typed access to Pydantic model 629 | print(result.report.fix_steps) 630 | print(result.trace) # Full trace of agent actions 631 | ``` 632 | 633 | **Example - Multi-turn Conversation:** 634 | ```python 635 | import dspy 636 | from codex_dspy import CodexAgent 637 | 638 | sig = dspy.Signature('message:str -> answer:str') 639 | agent = CodexAgent(sig, working_directory='.') 640 | 641 | # First turn 642 | result1 = agent(message="What's the repository status?") 643 | print(result1.answer) 644 | 645 | # Second turn - context is preserved 646 | result2 = agent(message="What needs to be fixed?") 647 | print(result2.answer) 648 | 649 | # Thread ID available after first call 650 | print(agent.thread_id) 651 | ``` 652 | 653 | **Return Value - Prediction:** 654 | The `forward()` method returns a `dspy.Prediction` object with: 655 | - Named output field (from signature): Either `str` or typed `BaseModel` instance 656 | - `trace`: `list[ThreadItem]` - All items from the turn (commands, file changes, reasoning, etc.) 657 | - `usage`: `Usage` - Token usage information (input_tokens, cached_input_tokens, output_tokens) 658 | 659 | **Signature Requirements:** 660 | - Must have exactly 1 input field 661 | - Must have exactly 1 output field 662 | - Output field must be either: 663 | - `str` or `Optional[str]` for text responses 664 | - A Pydantic `BaseModel` subclass for structured output 665 | 666 | --- 667 | 668 | ## 6. Exception Handling 669 | 670 | ```python 671 | from codex import ( 672 | CodexError, # Base exception 673 | UnsupportedPlatformError, # Unsupported OS/arch 674 | SpawnError, # Failed to start CLI 675 | ExecExitError, # CLI exited with error 676 | JsonParseError, # Event parsing failed 677 | ThreadRunError, # Turn execution failed 678 | SchemaValidationError, # Invalid output schema 679 | ) 680 | 681 | try: 682 | result = thread.run(prompt, TurnOptions(output_schema=schema)) 683 | except ThreadRunError as e: 684 | print(f"Turn failed: {e}") 685 | except SchemaValidationError as e: 686 | print(f"Invalid schema: {e}") 687 | except CodexError as e: 688 | print(f"SDK error: {e}") 689 | ``` 690 | 691 | --- 692 | 693 | ## 7. Usage Examples 694 | 695 | ### Example 1: Basic Synchronous Usage 696 | 697 | ```python 698 | from codex import Codex 699 | 700 | client = Codex() 701 | thread = client.start_thread() 702 | result = thread.run("Summarize the latest CI failure") 703 | 704 | print(f"Response: {result.final_response}") 705 | print(f"Items processed: {len(result.items)}") 706 | print(f"Tokens used: {result.usage.input_tokens}") 707 | ``` 708 | 709 | ### Example 2: Streaming with Pattern Matching 710 | 711 | ```python 712 | from codex import ( 713 | Codex, 714 | ItemCompletedEvent, 715 | CommandExecutionItem, 716 | TurnCompletedEvent, 717 | ) 718 | 719 | client = Codex() 720 | thread = client.start_thread() 721 | 722 | stream = thread.run_streamed("Fix the failing test") 723 | for event in stream: 724 | match event: 725 | case ItemCompletedEvent(item=CommandExecutionItem() as cmd): 726 | print(f"Command: {cmd.command}") 727 | print(f"Output: {cmd.aggregated_output}") 728 | case ItemCompletedEvent(item=item): 729 | print(f"Item {item.id}: {item.type}") 730 | case TurnCompletedEvent(usage=usage): 731 | print(f"Done! Used {usage.input_tokens} input tokens") 732 | ``` 733 | 734 | ### Example 3: Structured Output with Pydantic 735 | 736 | ```python 737 | from pydantic import BaseModel, Field 738 | from codex import Codex, TurnOptions 739 | 740 | class CodeReview(BaseModel): 741 | summary: str 742 | issues: list[str] = Field(description="List of issues found") 743 | severity: str = Field( 744 | description="critical, warning, or info" 745 | ) 746 | 747 | client = Codex() 748 | thread = client.start_thread() 749 | 750 | result = thread.run( 751 | "Review the changes in src/main.py", 752 | TurnOptions(output_schema=CodeReview) 753 | ) 754 | 755 | print(result.final_response) # JSON that conforms to CodeReview schema 756 | ``` 757 | 758 | ### Example 4: Multi-turn with Configuration 759 | 760 | ```python 761 | from codex import Codex, ThreadOptions, SandboxMode 762 | 763 | client = Codex() 764 | 765 | # Start thread with specific config 766 | thread = client.start_thread(ThreadOptions( 767 | model="gpt-5.1-codex-max", 768 | sandbox_mode=SandboxMode.WORKSPACE_WRITE, 769 | working_directory="/path/to/repo" 770 | )) 771 | 772 | # First turn 773 | result1 = thread.run("What needs to be fixed?") 774 | print(result1.final_response) 775 | 776 | # Second turn - context preserved 777 | result2 = thread.run("Implement the fix") 778 | print(result2.final_response) 779 | 780 | # Can resume later 781 | thread_id = thread.id 782 | new_client = Codex() 783 | resumed_thread = new_client.resume_thread(thread_id) 784 | result3 = resumed_thread.run("Write tests for the fix") 785 | ``` 786 | 787 | --- 788 | 789 | ## 8. Command Line Arguments Passed to Codex Binary 790 | 791 | The SDK builds the following CLI args when executing: 792 | 793 | ``` 794 | codex exec --experimental-json \ 795 | [--model MODEL] \ 796 | [--sandbox SANDBOX_MODE] \ 797 | [--cd WORKING_DIR] \ 798 | [--skip-git-repo-check] \ 799 | [--output-schema SCHEMA_PATH] \ 800 | [resume THREAD_ID] 801 | ``` 802 | 803 | **Key Implementation Details:** 804 | - Input is piped to stdin 805 | - Output is streamed from stdout (JSON-Lines format) 806 | - Stderr is captured for error reporting 807 | - Environment variables set: 808 | - `OPENAI_BASE_URL`: From `CodexOptions.base_url` 809 | - `CODEX_API_KEY`: From `CodexOptions.api_key` 810 | - `CODEX_INTERNAL_ORIGINATOR_OVERRIDE`: Set to `"codex_sdk_py"` 811 | 812 | --- 813 | 814 | ## 9. Binary Discovery & Platform Support 815 | 816 | The SDK auto-detects platform and locates the Codex binary: 817 | 818 | **Supported Platforms:** 819 | - Linux x86_64: `x86_64-unknown-linux-musl` 820 | - Linux ARM64: `aarch64-unknown-linux-musl` 821 | - macOS x86_64: `x86_64-apple-darwin` 822 | - macOS ARM64: `aarch64-apple-darwin` 823 | - Windows x86_64: `x86_64-pc-windows-msvc` 824 | - Windows ARM64: `aarch64-pc-windows-msvc` 825 | 826 | **Binary Discovery:** 827 | 828 | Binaries are **not yet vendored** in this repository. The SDK searches for the `codex` binary in the following order: 829 | 1. `codex_path_override` parameter if provided 830 | 2. System PATH 831 | 832 | **Expected vendor location (future):** 833 | ``` 834 | src/codex/vendor/{target}/codex[.exe] 835 | ``` 836 | 837 | **Override via Parameter:** 838 | ```python 839 | from codex import Codex, CodexOptions 840 | 841 | client = Codex(CodexOptions( 842 | codex_path_override="/path/to/custom/codex" 843 | )) 844 | ``` 845 | 846 | **Override via CodexAgent:** 847 | ```python 848 | from codex_dspy import CodexAgent 849 | 850 | agent = CodexAgent( 851 | signature='message:str -> answer:str', 852 | working_directory='.', 853 | codex_path_override="/path/to/custom/codex" 854 | ) 855 | ``` 856 | 857 | **Implementation:** 858 | See `/Users/darin/projects/codex_dspy/src/codex/discovery.py` for platform detection and binary discovery logic. 859 | 860 | --- 861 | 862 | ## 10. Type Safety & Schema Handling 863 | 864 | The SDK provides full type safety: 865 | 866 | - All events are **frozen dataclasses** (immutable) 867 | - All configuration objects are frozen 868 | - Thread items are **union types** - use `isinstance()` for type narrowing 869 | - Output schema supports: 870 | - **Dict-based JSON Schema** (standard format) 871 | - **Pydantic models** (automatically converted to JSON Schema) 872 | - **Pydantic model instances** (uses `.model_json_schema()`) 873 | 874 | Schema handling: 875 | ```python 876 | # Automatic Pydantic conversion 877 | from pydantic import BaseModel 878 | 879 | class MyModel(BaseModel): 880 | field: str 881 | 882 | # Both work: 883 | TurnOptions(output_schema=MyModel) # Pass class 884 | TurnOptions(output_schema=MyModel()) # Pass instance 885 | 886 | # Raw JSON Schema also works 887 | TurnOptions(output_schema={"type": "object", "properties": {...}}) 888 | ``` 889 | 890 | --- 891 | 892 | ## Summary Table: API Surface 893 | 894 | | Component | Purpose | Key Methods/Properties | 895 | |-----------|---------|----------------------| 896 | | **Codex** | Client initialization | `start_thread()`, `resume_thread()` | 897 | | **CodexOptions** | Global config | `codex_path_override`, `base_url`, `api_key` | 898 | | **Thread** | Conversation session | `run()`, `run_streamed()`, `id` property | 899 | | **ThreadOptions** | Thread config | `model`, `sandbox_mode`, `working_directory`, `skip_git_repo_check` | 900 | | **TurnOptions** | Turn config | `output_schema` | 901 | | **ThreadRunResult** | Sync result | `items`, `final_response`, `usage` | 902 | | **ThreadStream** | Async result | Iterator of `ThreadEvent` | 903 | | **Events** | Lifecycle tracking | `ThreadStartedEvent`, `TurnCompletedEvent`, `ItemCompletedEvent`, etc. | 904 | | **ThreadItems** | Work artifacts | `AgentMessageItem`, `CommandExecutionItem`, `FileChangeItem`, `McpToolCallItem`, etc. | 905 | | **Exceptions** | Error handling | `ThreadRunError`, `SchemaValidationError`, `CodexError` | 906 | | **CodexAgent** | DSPy wrapper | `forward()`, `thread_id` property | 907 | | **ApprovalMode** | Approval config | `NEVER`, `ON_REQUEST`, `ON_FAILURE`, `UNTRUSTED` | 908 | | **SandboxMode** | Sandbox config | `READ_ONLY`, `WORKSPACE_WRITE`, `DANGER_FULL_ACCESS` | 909 | 910 | --------------------------------------------------------------------------------