├── images
    ├── TTS_framework.jpg
    ├── codefuse_logo.png
    ├── CFuse_Architecture.png
    └── oracle_vs_adversary_performance.png
├── codefuse
    ├── tools
    │   ├── utils
    │   │   ├── ripgrep
    │   │   │   ├── arm64-linux
    │   │   │   │   └── rg
    │   │   │   ├── x64-darwin
    │   │   │   │   └── rg
    │   │   │   ├── x64-linux
    │   │   │   │   └── rg
    │   │   │   ├── arm64-darwin
    │   │   │   │   └── rg
    │   │   │   ├── x64-win32
    │   │   │   │   └── rg.exe
    │   │   │   └── COPYING
    │   │   ├── __init__.py
    │   │   └── ripgrep.py
    │   ├── __init__.py
    │   ├── builtin
    │   │   ├── __init__.py
    │   │   ├── write_file.py
    │   │   ├── filesystem_base.py
    │   │   ├── read_file.py
    │   │   ├── glob.py
    │   │   └── edit_file.py
    │   ├── base.py
    │   └── registry.py
    ├── cli
    │   ├── __init__.py
    │   ├── headless.py
    │   ├── interactive.py
    │   └── main.py
    ├── observability
    │   ├── metrics
    │   │   ├── __init__.py
    │   │   ├── models.py
    │   │   └── trackers.py
    │   ├── logging
    │   │   ├── utils.py
    │   │   ├── __init__.py
    │   │   └── setup.py
    │   ├── llm_messages.py
    │   ├── __init__.py
    │   ├── trajectory.py
    │   └── http_logger.py
    ├── llm
    │   ├── __init__.py
    │   ├── exceptions.py
    │   ├── providers
    │   │   ├── gemini.py
    │   │   └── anthropic.py
    │   ├── retry.py
    │   ├── factory.py
    │   └── base.py
    ├── core
    │   ├── __init__.py
    │   ├── read_tracker.py
    │   ├── remote_tool_executor.py
    │   ├── agent_config.py
    │   └── environment.py
    ├── __init__.py
    └── config.py
├── MANIFEST.in
├── requirements.txt
├── .cfuse.yaml
├── .gitignore
├── LICENSE
├── pyproject.toml
└── README.md


/images/TTS_framework.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-Agent/main/images/TTS_framework.jpg


--------------------------------------------------------------------------------
/images/codefuse_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-Agent/main/images/codefuse_logo.png


--------------------------------------------------------------------------------
/images/CFuse_Architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-Agent/main/images/CFuse_Architecture.png


--------------------------------------------------------------------------------
/codefuse/tools/utils/ripgrep/arm64-linux/rg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-Agent/main/codefuse/tools/utils/ripgrep/arm64-linux/rg


--------------------------------------------------------------------------------
/codefuse/tools/utils/ripgrep/x64-darwin/rg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-Agent/main/codefuse/tools/utils/ripgrep/x64-darwin/rg


--------------------------------------------------------------------------------
/codefuse/tools/utils/ripgrep/x64-linux/rg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-Agent/main/codefuse/tools/utils/ripgrep/x64-linux/rg


--------------------------------------------------------------------------------
/images/oracle_vs_adversary_performance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-Agent/main/images/oracle_vs_adversary_performance.png


--------------------------------------------------------------------------------
/codefuse/tools/utils/ripgrep/arm64-darwin/rg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-Agent/main/codefuse/tools/utils/ripgrep/arm64-darwin/rg


--------------------------------------------------------------------------------
/codefuse/tools/utils/ripgrep/x64-win32/rg.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-Agent/main/codefuse/tools/utils/ripgrep/x64-win32/rg.exe


--------------------------------------------------------------------------------
/codefuse/tools/utils/ripgrep/COPYING:
--------------------------------------------------------------------------------
1 | This project is dual-licensed under the Unlicense and MIT licenses.
2 | 
3 | You may use this code under the terms of either license.
4 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.md
2 | include LICENSE
3 | include requirements.txt
4 | include requirements-dev.txt
5 | recursive-include codefuse/tools/utils/ripgrep *
6 | recursive-include codefuse/cli/templates *.html
7 | 
8 | 


--------------------------------------------------------------------------------
/codefuse/tools/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Utility modules for tools
 3 | """
 4 | 
 5 | from codefuse.tools.utils.ripgrep import find_ripgrep, execute_ripgrep
 6 | 
 7 | __all__ = [
 8 |     "find_ripgrep",
 9 |     "execute_ripgrep",
10 | ]
11 | 
12 | 


--------------------------------------------------------------------------------
/codefuse/cli/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | CLI Module
 3 | """
 4 | 
 5 | from codefuse.cli.main import main
 6 | from codefuse.cli.headless import run_headless
 7 | from codefuse.cli.interactive import run_interactive
 8 | 
 9 | __all__ = ["main", "run_headless", "run_interactive"]
10 | 
11 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | openai>=1.0.0
 2 | rich>=13.0.0
 3 | prompt-toolkit>=3.0.0
 4 | pydantic>=2.0.0
 5 | pyyaml>=6.0
 6 | click>=8.0.0
 7 | structlog>=23.0.0
 8 | requests>=2.31.0  # For remote tool execution
 9 | flask>=3.0.0  # Production HTTP server
10 | gunicorn>=21.0.0  # WSGI server for multi-process
11 | dnspython>=2.6.1  # Security fix for gunicorn dependency
12 | prometheus-client>=0.19.0  # Metrics collection
13 | 
14 | 


--------------------------------------------------------------------------------
/.cfuse.yaml:
--------------------------------------------------------------------------------
 1 | llm:
 2 |   provider: openai_compatible
 3 |   model: ${LLM_MODEL} 
 4 |   api_key: ${OPENAI_API_KEY}
 5 |   base_url: ${LLM_BASE_URL}
 6 |   temperature: 0.0
 7 |   max_tokens: null
 8 |   timeout: 60
 9 | 
10 | agent_config:
11 |   max_iterations: 200
12 |   max_context_tokens: 128000
13 |   enable_tools: true
14 |   yolo: false
15 |   agent: default
16 | 
17 | logging:
18 |   logs_dir: ~/.cfuse/logs
19 |   verbose: false
20 | 


--------------------------------------------------------------------------------
/codefuse/tools/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Tools Module - Built-in tools and tool registry
 3 | """
 4 | 
 5 | from codefuse.tools.base import (
 6 |     BaseTool,
 7 |     ToolDefinition,
 8 |     ToolParameter,
 9 |     ToolResult,
10 | )
11 | from codefuse.tools.registry import ToolRegistry
12 | 
13 | __all__ = [
14 |     "BaseTool",
15 |     "ToolDefinition",
16 |     "ToolParameter",
17 |     "ToolResult",
18 |     "ToolRegistry",
19 | ]
20 | 
21 | 


--------------------------------------------------------------------------------
/codefuse/tools/builtin/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Built-in Tools
 3 | """
 4 | 
 5 | from codefuse.tools.builtin.read_file import ReadFileTool
 6 | from codefuse.tools.builtin.write_file import WriteFileTool
 7 | from codefuse.tools.builtin.edit_file import EditFileTool
 8 | from codefuse.tools.builtin.list_directory import ListDirectoryTool
 9 | from codefuse.tools.builtin.grep import GrepTool
10 | from codefuse.tools.builtin.glob import GlobTool
11 | from codefuse.tools.builtin.bash import BashTool
12 | 
13 | __all__ = [
14 |     "ReadFileTool",
15 |     "WriteFileTool",
16 |     "EditFileTool",
17 |     "ListDirectoryTool",
18 |     "GrepTool",
19 |     "GlobTool",
20 |     "BashTool",
21 | ]
22 | 
23 | 


--------------------------------------------------------------------------------
/codefuse/observability/metrics/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Metrics Module - Hierarchical metrics collection for Agent sessions
 3 | """
 4 | 
 5 | from .models import (
 6 |     ToolCallMetric,
 7 |     APICallMetric,
 8 |     PromptMetric,
 9 |     SessionMetric,
10 | )
11 | from .trackers import (
12 |     ToolCallTracker,
13 |     APICallTracker,
14 |     PromptTracker,
15 | )
16 | from .collector import MetricsCollector
17 | 
18 | __all__ = [
19 |     # Models
20 |     "ToolCallMetric",
21 |     "APICallMetric",
22 |     "PromptMetric",
23 |     "SessionMetric",
24 |     # Trackers
25 |     "ToolCallTracker",
26 |     "APICallTracker",
27 |     "PromptTracker",
28 |     # Collector
29 |     "MetricsCollector",
30 | ]
31 | 
32 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Python
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | *.so
 6 | .Python
 7 | build/
 8 | develop-eggs/
 9 | dist/
10 | downloads/
11 | eggs/
12 | .eggs/
13 | lib/
14 | lib64/
15 | parts/
16 | sdist/
17 | var/
18 | wheels/
19 | *.egg-info/
20 | .installed.cfg
21 | *.egg
22 | 
23 | # Virtual Environment
24 | venv/
25 | env/
26 | ENV/
27 | .venv
28 | 
29 | # IDE
30 | .vscode/
31 | .idea/
32 | *.swp
33 | *.swo
34 | *~
35 | .DS_Store
36 | 
37 | # Testing
38 | .pytest_cache/
39 | .coverage
40 | htmlcov/
41 | .tox/
42 | 
43 | # Logs
44 | *.log
45 | logs/
46 | *.jsonl
47 | 
48 | # Config
49 | .codefuse.yaml
50 | !.codefuse.yaml.example
51 | 
52 | # Trajectory data
53 | trajectories/
54 | sessions/
55 | 
56 | # Type checking
57 | .mypy_cache/
58 | .pytype/
59 | 
60 | 


--------------------------------------------------------------------------------
/codefuse/observability/logging/utils.py:
--------------------------------------------------------------------------------
 1 | """Utility functions for logging module"""
 2 | 
 3 | import os
 4 | 
 5 | 
 6 | def path_to_slug(path: str) -> str:
 7 |     """
 8 |     Convert file path to slug suitable for directory names
 9 |     
10 |     Examples:
11 |         /Users/mingmu/projects/app -> Users-mingmu-projects-app
12 |         /home/user/my project -> home-user-my_project
13 |     """
14 |     abs_path = os.path.abspath(path)
15 |     
16 |     # Remove leading slash
17 |     if abs_path.startswith('/'):
18 |         abs_path = abs_path[1:]
19 |     
20 |     # Replace slashes and spaces
21 |     slug = abs_path.replace('/', '-').replace(' ', '_')
22 |     
23 |     # Handle Windows drive letters (C: -> C-)
24 |     if os.name == 'nt' and ':' in slug:
25 |         slug = slug.replace(':', '-')
26 |     
27 |     return slug
28 | 
29 | 


--------------------------------------------------------------------------------
/codefuse/observability/logging/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Logging Module - Unified logging using structlog
 3 | 
 4 | Provides mainLogger for debug logging:
 5 | - mainLogger: Fine-grained debug logs (file only, append mode)
 6 | 
 7 | Trajectory and LLM messages are now handled by dedicated writers
 8 | in the observability module.
 9 | 
10 | Usage:
11 |     from codefuse.observability.logging import setup_logging, mainLogger
12 |     
13 |     setup_logging(session_id="session-123", verbose=True)
14 |     
15 |     # Simple logging with structured data
16 |     mainLogger.info("tool executed", tool="read_file", duration=0.5)
17 |     
18 |     # With context binding
19 |     request_logger = mainLogger.bind(request_id="req-123")
20 |     request_logger.info("processing step", step=1)
21 | """
22 | 
23 | from .setup import (
24 |     setup_logging,
25 |     mainLogger,
26 |     get_session_dir,
27 |     close_all_loggers,
28 | )
29 | 
30 | __all__ = [
31 |     "setup_logging",
32 |     "mainLogger",
33 |     "get_session_dir",
34 |     "close_all_loggers",
35 | ]
36 | 
37 | 


--------------------------------------------------------------------------------
/codefuse/llm/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | LLM Module - Unified interface for various language models
 3 | """
 4 | 
 5 | from codefuse.llm.base import (
 6 |     BaseLLM,
 7 |     Message,
 8 |     MessageRole,
 9 |     ContentBlock,
10 |     Tool,
11 |     ToolCall,
12 |     TokenUsage,
13 |     LLMResponse,
14 |     LLMChunk,
15 | )
16 | from codefuse.llm.factory import create_llm
17 | from codefuse.llm.exceptions import (
18 |     LLMError,
19 |     RetryableError,
20 |     TimeoutError,
21 |     RateLimitError,
22 |     APIError,
23 |     AuthenticationError,
24 |     ContextLengthExceededError,
25 | )
26 | 
27 | __all__ = [
28 |     # Base classes and data structures
29 |     "BaseLLM",
30 |     "Message",
31 |     "MessageRole",
32 |     "ContentBlock",
33 |     "Tool",
34 |     "ToolCall",
35 |     "TokenUsage",
36 |     "LLMResponse",
37 |     "LLMChunk",
38 |     # Factory
39 |     "create_llm",
40 |     # Exceptions
41 |     "LLMError",
42 |     "RetryableError",
43 |     "TimeoutError",
44 |     "RateLimitError",
45 |     "APIError",
46 |     "AuthenticationError",
47 |     "ContextLengthExceededError",
48 | ]
49 | 
50 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 CodeFuse Team
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/codefuse/core/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Core Module - Agent loop, context engine (unified context and session management)
 3 | """
 4 | 
 5 | from codefuse.core.environment import EnvironmentInfo
 6 | from codefuse.core.context_engine import ContextEngine
 7 | from codefuse.core.agent_config import AgentProfile, AgentProfileManager
 8 | from codefuse.core.read_tracker import ReadTracker
 9 | from codefuse.core.agent_loop import AgentLoop, AgentEvent
10 | from codefuse.core.tool_executor import ToolExecutor
11 | from codefuse.observability import (
12 |     setup_logging,
13 |     mainLogger,
14 |     get_session_dir,
15 |     close_all_loggers,
16 |     MetricsCollector,
17 | )
18 | 
19 | # For backward compatibility, Session is now an alias to ContextEngine
20 | Session = ContextEngine
21 | 
22 | __all__ = [
23 |     "EnvironmentInfo",
24 |     "ContextEngine",
25 |     "Session",  # Alias for backward compatibility
26 |     "ReadTracker",
27 |     "AgentProfile",
28 |     "AgentProfileManager",
29 |     "AgentLoop",
30 |     "AgentEvent",
31 |     "ToolExecutor",
32 |     "setup_logging",
33 |     "mainLogger",
34 |     "get_session_dir",
35 |     "close_all_loggers",
36 |     "MetricsCollector",
37 | ]
38 | 
39 | 


--------------------------------------------------------------------------------
/codefuse/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | CodeFuse Agent - A lightweight, high-performance AI programming assistant framework
 3 | """
 4 | 
 5 | import importlib.metadata
 6 | 
 7 | try:
 8 |     __version__ = importlib.metadata.version("cfuse")
 9 | except importlib.metadata.PackageNotFoundError:
10 |     # Development mode fallback
11 |     __version__ = "0.1.0"
12 | 
13 | from codefuse.llm import create_llm, Message, MessageRole, Tool, LLMResponse
14 | from codefuse.tools import BaseTool, ToolDefinition, ToolParameter, ToolResult, ToolRegistry
15 | from codefuse.core import (
16 |     EnvironmentInfo,
17 |     Session,
18 |     AgentProfile,
19 |     AgentProfileManager,
20 |     ContextEngine,
21 |     AgentLoop,
22 |     AgentEvent,
23 | )
24 | from codefuse.config import Config
25 | 
26 | __all__ = [
27 |     # LLM
28 |     "create_llm",
29 |     "Message",
30 |     "MessageRole",
31 |     "Tool",
32 |     "LLMResponse",
33 |     # Tools
34 |     "BaseTool",
35 |     "ToolDefinition",
36 |     "ToolParameter",
37 |     "ToolResult",
38 |     "ToolRegistry",
39 |     # Core
40 |     "EnvironmentInfo",
41 |     "Session",
42 |     "AgentProfile",
43 |     "AgentProfileManager",
44 |     "ContextEngine",
45 |     "AgentLoop",
46 |     "AgentEvent",
47 |     # Config
48 |     "Config",
49 | ]
50 | 
51 | 


--------------------------------------------------------------------------------
/codefuse/observability/llm_messages.py:
--------------------------------------------------------------------------------
 1 | """
 2 | LLM Messages Writer - Records latest LLM messages snapshot
 3 | """
 4 | 
 5 | import json
 6 | from datetime import datetime, timezone
 7 | from pathlib import Path
 8 | from typing import Dict, Any
 9 | 
10 | 
11 | class LLMMessagesWriter:
12 |     """
13 |     Writes the latest LLM messages snapshot to a JSON file
14 |     
15 |     This writer uses overwrite mode, keeping only the most recent state.
16 |     Useful for debugging and inspecting the current conversation context.
17 |     """
18 |     
19 |     def __init__(self, file_path: Path):
20 |         """
21 |         Initialize LLM messages writer
22 |         
23 |         Args:
24 |             file_path: Path to the LLM messages JSON file
25 |         """
26 |         self.file_path = Path(file_path)
27 |         self.file_path.parent.mkdir(parents=True, exist_ok=True)
28 |     
29 |     def write(self, formatted_data: Dict[str, Any]):
30 |         """
31 |         Write LLM messages snapshot (overwrites existing file)
32 |         
33 |         Args:
34 |             formatted_data: Formatted data containing messages and tools
35 |                 Expected keys: 'messages', 'tools', and optionally 'session_id'
36 |         """
37 |         # Build complete snapshot
38 |         snapshot = {
39 |             'timestamp': datetime.now(timezone.utc).isoformat(),
40 |             **formatted_data
41 |         }
42 |         
43 |         # Write to file (overwrite mode)
44 |         with open(self.file_path, 'w', encoding='utf-8') as f:
45 |             json.dump(snapshot, f, ensure_ascii=False, indent=2)
46 |     
47 |     def close(self):
48 |         """Close method for consistency (no-op for this writer)"""
49 |         pass
50 | 
51 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "cfuse"
 3 | version = "0.1.0"
 4 | description = "CodeFuse-Agent: A lightweight, high-performance AI programming assistant framework"
 5 | authors = [{name = "CodeFuse Team"}]
 6 | readme = "README.md"
 7 | requires-python = ">=3.10"
 8 | license = {text = "MIT"}
 9 | 
10 | dependencies = [
11 |     "openai>=1.0.0",
12 |     "rich>=13.0.0",
13 |     "prompt-toolkit>=3.0.0",
14 |     "pydantic>=2.0.0",
15 |     "pyyaml>=6.0",
16 |     "click>=8.0.0",
17 |     "structlog>=23.0.0",
18 |     "flask>=3.0.0",
19 |     "gunicorn>=21.0.0",
20 |     "dnspython>=2.6.1",
21 |     "prometheus-client>=0.19.0",
22 | ]
23 | 
24 | [project.optional-dependencies]
25 | dev = [
26 |     "pytest>=7.0.0",
27 |     "pytest-asyncio>=0.21.0",
28 |     "black>=23.0.0",
29 |     "ruff>=0.1.0",
30 |     "mypy>=1.0.0",
31 | ]
32 | 
33 | anthropic = [
34 |     "anthropic>=0.8.0",
35 | ]
36 | 
37 | gemini = [
38 |     "google-generativeai>=0.3.0",
39 | ]
40 | 
41 | # Optional: Python-based ripgrep as fallback
42 | ripgrep = [
43 |     "ripgrep-python>=0.1.0",
44 | ]
45 | 
46 | [project.scripts]
47 | cfuse = "codefuse.cli.main:main"
48 | 
49 | [build-system]
50 | requires = ["hatchling"]
51 | build-backend = "hatchling.build"
52 | 
53 | [tool.hatch.build.targets.wheel]
54 | packages = ["codefuse"]
55 | 
56 | [tool.hatch.build.targets.wheel.force-include]
57 | "codefuse/tools/utils/ripgrep" = "codefuse/tools/utils/ripgrep"
58 | "codefuse/cli/templates" = "codefuse/cli/templates"
59 | 
60 | [tool.black]
61 | line-length = 100
62 | target-version = ['py310']
63 | 
64 | [tool.ruff]
65 | line-length = 100
66 | select = ["E", "F", "I", "N", "W"]
67 | 
68 | [tool.mypy]
69 | python_version = "3.10"
70 | warn_return_any = true
71 | warn_unused_configs = true
72 | ignore_missing_imports = true
73 | 
74 | 


--------------------------------------------------------------------------------
/codefuse/observability/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Observability Module - Logging, Trajectory, and Metrics for CodeFuse Agent
 3 | 
 4 | This module provides comprehensive observability capabilities including:
 5 | - Debug logging using structlog (JSONL format)
 6 | - Trajectory recording (event stream in JSONL)
 7 | - LLM messages snapshots (JSON)
 8 | - Hierarchical metrics collection
 9 | - Session tracking and analysis
10 | - HTTP server logging with rotation and cleanup
11 | """
12 | 
13 | # Logging exports
14 | from .logging import (
15 |     setup_logging,
16 |     mainLogger,
17 |     get_session_dir,
18 |     close_all_loggers,
19 | )
20 | 
21 | # HTTP logging exports
22 | from .http_logger import (
23 |     HTTPLogger,
24 |     create_http_logger,
25 | )
26 | 
27 | # Writer exports
28 | from .trajectory import TrajectoryWriter
29 | from .llm_messages import LLMMessagesWriter
30 | 
31 | # Metrics exports
32 | from .metrics import (
33 |     # Models
34 |     ToolCallMetric,
35 |     APICallMetric,
36 |     PromptMetric,
37 |     SessionMetric,
38 |     # Trackers
39 |     ToolCallTracker,
40 |     APICallTracker,
41 |     PromptTracker,
42 |     # Collector
43 |     MetricsCollector,
44 | )
45 | 
46 | __all__ = [
47 |     # Logging
48 |     "setup_logging",
49 |     "mainLogger",
50 |     "get_session_dir",
51 |     "close_all_loggers",
52 |     # HTTP Logging
53 |     "HTTPLogger",
54 |     "create_http_logger",
55 |     # Writers
56 |     "TrajectoryWriter",
57 |     "LLMMessagesWriter",
58 |     # Metrics - Models
59 |     "ToolCallMetric",
60 |     "APICallMetric",
61 |     "PromptMetric",
62 |     "SessionMetric",
63 |     # Metrics - Trackers
64 |     "ToolCallTracker",
65 |     "APICallTracker",
66 |     "PromptTracker",
67 |     # Metrics - Collector
68 |     "MetricsCollector",
69 | ]
70 | 
71 | 


--------------------------------------------------------------------------------
/codefuse/observability/metrics/models.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Metrics Models - Data classes for hierarchical metrics
 3 | """
 4 | 
 5 | from dataclasses import dataclass, field
 6 | from typing import Optional, List, Dict, Any
 7 | 
 8 | 
 9 | @dataclass
10 | class ToolCallMetric:
11 |     """Metrics for a single tool call"""
12 |     tool_call_id: str
13 |     tool_name: str
14 |     start_time: str
15 |     end_time: Optional[str] = None
16 |     duration: Optional[float] = None  # seconds
17 |     success: bool = True
18 |     error: Optional[str] = None
19 |     arguments: Optional[Dict[str, Any]] = None
20 | 
21 | 
22 | @dataclass
23 | class APICallMetric:
24 |     """Metrics for a single API call"""
25 |     api_id: str
26 |     start_time: str
27 |     end_time: Optional[str] = None
28 |     duration: Optional[float] = None  # seconds
29 |     prompt_tokens: Optional[int] = None
30 |     completion_tokens: Optional[int] = None
31 |     total_tokens: Optional[int] = None
32 |     cache_creation_tokens: Optional[int] = None
33 |     cache_read_tokens: Optional[int] = None
34 |     success: bool = True
35 |     error: Optional[str] = None
36 |     model: Optional[str] = None
37 |     finish_reason: Optional[str] = None
38 | 
39 | 
40 | @dataclass
41 | class PromptMetric:
42 |     """Metrics for a single user prompt/query"""
43 |     prompt_id: str
44 |     user_query: str
45 |     start_time: str
46 |     end_time: Optional[str] = None
47 |     duration: Optional[float] = None  # seconds
48 |     iterations: int = 0
49 |     api_calls: List[APICallMetric] = field(default_factory=list)
50 |     tool_calls: List[ToolCallMetric] = field(default_factory=list)
51 | 
52 | 
53 | @dataclass
54 | class SessionMetric:
55 |     """Metrics for entire session"""
56 |     session_id: str
57 |     start_time: str
58 |     end_time: Optional[str] = None
59 |     duration: Optional[float] = None  # seconds
60 |     total_prompts: int = 0
61 |     prompts: List[PromptMetric] = field(default_factory=list)
62 | 
63 | 


--------------------------------------------------------------------------------
/codefuse/core/read_tracker.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Read Tracker - Tracks which files have been read in the current session
 3 | 
 4 | This module provides file read tracking for edit tool validation.
 5 | The edit_file tool requires files to be read before editing to prevent
 6 | accidental modifications to files the agent hasn't seen.
 7 | """
 8 | 
 9 | from pathlib import Path
10 | from typing import Set
11 | 
12 | from codefuse.observability import mainLogger
13 | 
14 | 
15 | class ReadTracker:
16 |     """
17 |     File read tracker for edit tool validation
18 |     
19 |     Tracks which files have been read in the current session.
20 |     Used by EditFileTool to ensure files are read before editing.
21 |     """
22 |     
23 |     def __init__(self):
24 |         """Initialize empty read tracker"""
25 |         self._read_files: Set[str] = set()
26 |     
27 |     def mark_as_read(self, file_path: str) -> None:
28 |         """
29 |         Mark a file as having been read
30 |         
31 |         Args:
32 |             file_path: Path to the file that was read
33 |         """
34 |         resolved_path = str(Path(file_path).resolve())
35 |         self._read_files.add(resolved_path)
36 |         mainLogger.debug("Marked file as read", file_path=resolved_path)
37 |     
38 |     def is_read(self, file_path: str) -> bool:
39 |         """
40 |         Check if a file has been read
41 |         
42 |         Args:
43 |             file_path: Path to check
44 |             
45 |         Returns:
46 |             True if the file has been read, False otherwise
47 |         """
48 |         resolved_path = str(Path(file_path).resolve())
49 |         return resolved_path in self._read_files
50 |     
51 |     def clear(self) -> None:
52 |         """
53 |         Clear the read file tracking
54 |         
55 |         This can be used to reset the tracking state, for example
56 |         when starting a new user query.
57 |         """
58 |         self._read_files.clear()
59 |         mainLogger.debug("Cleared read file tracking")
60 | 
61 | 


--------------------------------------------------------------------------------
/codefuse/llm/exceptions.py:
--------------------------------------------------------------------------------
 1 | """
 2 | LLM Exception Classes
 3 | """
 4 | 
 5 | from typing import Optional
 6 | 
 7 | 
 8 | class LLMError(Exception):
 9 |     """Base exception for all LLM-related errors"""
10 |     pass
11 | 
12 | 
13 | class RetryableError(LLMError):
14 |     """Base class for errors that can be retried"""
15 |     pass
16 | 
17 | 
18 | class TimeoutError(RetryableError):
19 |     """Request timeout error - will be retried"""
20 |     def __init__(self, message: str, original_error: Optional[Exception] = None):
21 |         super().__init__(message)
22 |         self.original_error = original_error
23 | 
24 | 
25 | class RateLimitError(RetryableError):
26 |     """Rate limit exceeded error - will be retried with backoff"""
27 |     def __init__(
28 |         self,
29 |         message: str,
30 |         retry_after: Optional[float] = None,
31 |         original_error: Optional[Exception] = None
32 |     ):
33 |         super().__init__(message)
34 |         self.retry_after = retry_after  # Seconds to wait before retry
35 |         self.original_error = original_error
36 | 
37 | 
38 | class APIError(LLMError):
39 |     """General API error - not retryable"""
40 |     def __init__(
41 |         self,
42 |         message: str,
43 |         status_code: Optional[int] = None,
44 |         original_error: Optional[Exception] = None
45 |     ):
46 |         super().__init__(message)
47 |         self.status_code = status_code
48 |         self.original_error = original_error
49 | 
50 | 
51 | class AuthenticationError(LLMError):
52 |     """Authentication failed - invalid API key or credentials"""
53 |     pass
54 | 
55 | 
56 | class ContextLengthExceededError(LLMError):
57 |     """Context length exceeded the model's maximum"""
58 |     def __init__(self, message: str, max_tokens: Optional[int] = None):
59 |         super().__init__(message)
60 |         self.max_tokens = max_tokens
61 | 
62 | 
63 | class InvalidRequestError(LLMError):
64 |     """Invalid request parameters"""
65 |     pass
66 | 
67 | 
68 | class ModelNotFoundError(LLMError):
69 |     """Requested model not found or not available"""
70 |     pass
71 | 
72 | 


--------------------------------------------------------------------------------
/codefuse/llm/providers/gemini.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Google Gemini LLM Implementation (Placeholder)
 3 | 
 4 | To be implemented with Gemini-specific features:
 5 | - Native Gemini SDK integration
 6 | - Gemini-specific parameters
 7 | """
 8 | 
 9 | import logging
10 | from typing import List, Optional, Union, Iterator
11 | 
12 | from codefuse.llm.base import BaseLLM, Message, Tool, LLMResponse, LLMChunk
13 | 
14 | from codefuse.observability import mainLogger
15 | 
16 | 
17 | class GeminiLLM(BaseLLM):
18 |     """
19 |     Google Gemini LLM implementation
20 |     
21 |     TODO: Implement Gemini-specific features:
22 |     - Native Gemini SDK integration
23 |     - Gemini-specific parameters and settings
24 |     - Function calling format conversion
25 |     - Multimodal support
26 |     """
27 |     
28 |     def __init__(self, **kwargs):
29 |         """Initialize Gemini client"""
30 |         super().__init__(**kwargs)
31 |         mainLogger.warning(
32 |             "GeminiLLM is not yet implemented. "
33 |             "Please use OpenAICompatibleLLM or implement this class."
34 |         )
35 |         raise NotImplementedError(
36 |             "GeminiLLM is a placeholder. "
37 |             "Use provider='openai_compatible' for now."
38 |         )
39 |     
40 |     @property
41 |     def supports_prompt_caching(self) -> bool:
42 |         """Check if Gemini supports caching"""
43 |         return False  # TODO: Verify Gemini's caching capabilities
44 |     
45 |     @property
46 |     def supports_parallel_tools(self) -> bool:
47 |         """Check if Gemini supports parallel function calls"""
48 |         return True  # TODO: Verify Gemini's parallel tool support
49 |     
50 |     def generate(
51 |         self,
52 |         messages: List[Message],
53 |         tools: Optional[List[Tool]] = None,
54 |         temperature: Optional[float] = None,
55 |         max_tokens: Optional[int] = None,
56 |         stream: bool = False,
57 |         **kwargs
58 |     ) -> Union[LLMResponse, Iterator[LLMChunk]]:
59 |         """Generate completion using Gemini API"""
60 |         raise NotImplementedError("GeminiLLM.generate() not yet implemented")
61 | 
62 | 


--------------------------------------------------------------------------------
/codefuse/observability/trajectory.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Trajectory Writer - Records agent execution events to JSONL format
 3 | """
 4 | 
 5 | import json
 6 | from datetime import datetime, timezone
 7 | from pathlib import Path
 8 | from typing import Optional, Dict, Any
 9 | 
10 | 
11 | class TrajectoryWriter:
12 |     """
13 |     Writes agent execution trajectory events to a JSONL file
14 |     
15 |     Each event is a JSON object on a separate line, enabling:
16 |     - Streaming writes (append-only)
17 |     - Real-time monitoring (tail -f)
18 |     - Easy parsing (line-by-line)
19 |     """
20 |     
21 |     def __init__(self, file_path: Path):
22 |         """
23 |         Initialize trajectory writer
24 |         
25 |         Args:
26 |             file_path: Path to the trajectory JSONL file
27 |         """
28 |         self.file_path = Path(file_path)
29 |         self._file_handle: Optional[Any] = None
30 |         self._opened = False
31 |     
32 |     def _ensure_open(self):
33 |         """Ensure file handle is open"""
34 |         if not self._opened:
35 |             self.file_path.parent.mkdir(parents=True, exist_ok=True)
36 |             self._file_handle = open(self.file_path, 'a', encoding='utf-8')
37 |             self._opened = True
38 |     
39 |     def write(self, event_data: Dict[str, Any]):
40 |         """
41 |         Write a single event to the trajectory file
42 |         
43 |         Automatically adds timestamp if not present.
44 |         
45 |         Args:
46 |             event_data: Event data dictionary
47 |         """
48 |         self._ensure_open()
49 |         
50 |         # Add timestamp if not present
51 |         if 'timestamp' not in event_data:
52 |             event_data['timestamp'] = datetime.now(timezone.utc).isoformat()
53 |         
54 |         # Write as single line JSON
55 |         json_line = json.dumps(event_data, ensure_ascii=False)
56 |         self._file_handle.write(json_line + '\n')
57 |         self._file_handle.flush()
58 |     
59 |     def write_summary(self, summary_data: Dict[str, Any]):
60 |         """
61 |         Write session summary event
62 |         
63 |         This is typically called at the end of a session.
64 |         
65 |         Args:
66 |             summary_data: Summary data from MetricsCollector
67 |         """
68 |         event = {
69 |             'event_type': 'session_summary',
70 |             'timestamp': datetime.now(timezone.utc).isoformat(),
71 |             **summary_data
72 |         }
73 |         self.write(event)
74 |     
75 |     def close(self):
76 |         """Close the file handle"""
77 |         if self._opened and self._file_handle:
78 |             self._file_handle.close()
79 |             self._opened = False
80 |             self._file_handle = None
81 |     
82 |     def __enter__(self):
83 |         """Context manager entry"""
84 |         self._ensure_open()
85 |         return self
86 |     
87 |     def __exit__(self, exc_type, exc_val, exc_tb):
88 |         """Context manager exit"""
89 |         self.close()
90 |         return False
91 | 
92 | 


--------------------------------------------------------------------------------
/codefuse/observability/metrics/trackers.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Metrics Trackers - Context managers for tracking execution metrics
  3 | """
  4 | 
  5 | import time
  6 | from datetime import datetime, timezone
  7 | from typing import Optional
  8 | 
  9 | from .models import ToolCallMetric, APICallMetric, PromptMetric, SessionMetric
 10 | 
 11 | 
 12 | class ToolCallTracker:
 13 |     """Context manager for tracking tool call execution"""
 14 |     
 15 |     def __init__(self, metric: ToolCallMetric, parent_prompt: PromptMetric):
 16 |         self.metric = metric
 17 |         self.parent_prompt = parent_prompt
 18 |         self._start_time = time.time()
 19 |     
 20 |     def set_error(self, error: str):
 21 |         """Set error for this tool call"""
 22 |         self.metric.success = False
 23 |         self.metric.error = error
 24 |     
 25 |     def set_success(self, success: bool = True):
 26 |         """Set success status"""
 27 |         self.metric.success = success
 28 |     
 29 |     def __enter__(self):
 30 |         return self
 31 |     
 32 |     def __exit__(self, exc_type, exc_val, exc_tb):
 33 |         end = time.time()
 34 |         self.metric.end_time = datetime.now(timezone.utc).isoformat()
 35 |         self.metric.duration = end - self._start_time
 36 |         
 37 |         if exc_type is not None:
 38 |             self.metric.success = False
 39 |             self.metric.error = str(exc_val)
 40 |         
 41 |         return False  # Don't suppress exceptions
 42 | 
 43 | 
 44 | class APICallTracker:
 45 |     """Context manager for tracking API call"""
 46 |     
 47 |     def __init__(self, metric: APICallMetric, parent_prompt: PromptMetric):
 48 |         self.metric = metric
 49 |         self.parent_prompt = parent_prompt
 50 |         self._start_time = time.time()
 51 |     
 52 |     def set_tokens(
 53 |         self,
 54 |         prompt_tokens: int,
 55 |         completion_tokens: int,
 56 |         total_tokens: int,
 57 |         cache_creation_tokens: Optional[int] = None,
 58 |         cache_read_tokens: Optional[int] = None,
 59 |     ):
 60 |         """Set token usage information"""
 61 |         self.metric.prompt_tokens = prompt_tokens
 62 |         self.metric.completion_tokens = completion_tokens
 63 |         self.metric.total_tokens = total_tokens
 64 |         self.metric.cache_creation_tokens = cache_creation_tokens
 65 |         self.metric.cache_read_tokens = cache_read_tokens
 66 |     
 67 |     def set_model(self, model: str):
 68 |         """Set model name"""
 69 |         self.metric.model = model
 70 |     
 71 |     def set_finish_reason(self, finish_reason: str):
 72 |         """Set finish reason"""
 73 |         self.metric.finish_reason = finish_reason
 74 |     
 75 |     def set_error(self, error: str):
 76 |         """Set error for this API call"""
 77 |         self.metric.success = False
 78 |         self.metric.error = error
 79 |     
 80 |     def set_success(self, success: bool = True):
 81 |         """Set success status"""
 82 |         self.metric.success = success
 83 |     
 84 |     def __enter__(self):
 85 |         return self
 86 |     
 87 |     def __exit__(self, exc_type, exc_val, exc_tb):
 88 |         end = time.time()
 89 |         self.metric.end_time = datetime.now(timezone.utc).isoformat()
 90 |         self.metric.duration = end - self._start_time
 91 |         
 92 |         if exc_type is not None:
 93 |             self.metric.success = False
 94 |             self.metric.error = str(exc_val)
 95 |         
 96 |         return False  # Don't suppress exceptions
 97 | 
 98 | 
 99 | class PromptTracker:
100 |     """Context manager for tracking a prompt/query"""
101 |     
102 |     def __init__(self, metric: PromptMetric, session: SessionMetric):
103 |         self.metric = metric
104 |         self.session = session
105 |         self._start_time = time.time()
106 |     
107 |     def increment_iteration(self):
108 |         """Increment iteration count"""
109 |         self.metric.iterations += 1
110 |     
111 |     def __enter__(self):
112 |         return self
113 |     
114 |     def __exit__(self, exc_type, exc_val, exc_tb):
115 |         end = time.time()
116 |         self.metric.end_time = datetime.now(timezone.utc).isoformat()
117 |         self.metric.duration = end - self._start_time
118 |         
119 |         return False  # Don't suppress exceptions
120 | 
121 | 


--------------------------------------------------------------------------------
/codefuse/observability/logging/setup.py:
--------------------------------------------------------------------------------
  1 | """Unified logging configuration using structlog"""
  2 | 
  3 | import os
  4 | import logging
  5 | import structlog
  6 | from pathlib import Path
  7 | from typing import Optional
  8 | from .utils import path_to_slug
  9 | 
 10 | # State tracking
 11 | _logging_initialized = False
 12 | _session_dir: Optional[Path] = None
 13 | 
 14 | def _json_formatter(logger, method_name, event_dict):
 15 |     """Custom formatter that outputs clean JSON lines"""
 16 |     import json
 17 |     from datetime import datetime, timezone
 18 |     
 19 |     # Build JSON structure
 20 |     log_data = {
 21 |         "timestamp": datetime.now(timezone.utc).isoformat(),
 22 |         "level": event_dict.pop("level", "info"),
 23 |     }
 24 |     
 25 |     # Add logger name if present
 26 |     if "logger" in event_dict:
 27 |         log_data["logger"] = event_dict.pop("logger")
 28 |     
 29 |     # Add event/message
 30 |     if "event" in event_dict:
 31 |         log_data["message"] = event_dict.pop("event")
 32 |     
 33 |     # Add all remaining fields
 34 |     log_data.update(event_dict)
 35 |     
 36 |     return json.dumps(log_data, ensure_ascii=False)
 37 | 
 38 | 
 39 | # Configure standard logging backend with NullHandler (silent before setup)
 40 | stdlib_logger = logging.getLogger("codefuse.main")
 41 | stdlib_logger.addHandler(logging.NullHandler())
 42 | stdlib_logger.propagate = False
 43 | stdlib_logger.setLevel(logging.DEBUG)
 44 | 
 45 | # Configure structlog once at module load time
 46 | structlog.configure(
 47 |     processors=[
 48 |         structlog.stdlib.add_log_level,
 49 |         structlog.stdlib.PositionalArgumentsFormatter(),
 50 |         structlog.processors.StackInfoRenderer(),
 51 |         structlog.processors.format_exc_info,
 52 |         _json_formatter,
 53 |     ],
 54 |     wrapper_class=structlog.stdlib.BoundLogger,
 55 |     context_class=dict,
 56 |     logger_factory=structlog.stdlib.LoggerFactory(),
 57 |     cache_logger_on_first_use=True,
 58 | )
 59 | 
 60 | # Create global logger instance (ready to use, silent before setup)
 61 | mainLogger = structlog.get_logger("codefuse.main")
 62 | 
 63 | 
 64 | def setup_logging(
 65 |     session_id: str,
 66 |     workspace_path: Optional[str] = None,
 67 |     logs_dir: str = "~/.cfuse/logs",
 68 |     verbose: bool = False,
 69 | ) -> Path:
 70 |     """
 71 |     Setup file handler for logging
 72 |     
 73 |     Configures mainLogger for debug logs (file only, append mode).
 74 |     Trajectory and LLM messages are now handled by dedicated writers.
 75 |     
 76 |     Args:
 77 |         session_id: Unique session identifier
 78 |         workspace_path: Workspace path (default: cwd)
 79 |         logs_dir: Base logs directory
 80 |         verbose: Enable console output (currently unused, kept for compatibility)
 81 |         
 82 |     Returns:
 83 |         Session directory path
 84 |     """
 85 |     global _logging_initialized, _session_dir
 86 |     
 87 |     if _logging_initialized:
 88 |         return _session_dir
 89 |     
 90 |     # Prepare session directory
 91 |     workspace_path = workspace_path or os.getcwd()
 92 |     base_logs_dir = Path(logs_dir).expanduser()
 93 |     workspace_slug = path_to_slug(workspace_path)
 94 |     session_dir = base_logs_dir / workspace_slug / session_id
 95 |     session_dir.mkdir(parents=True, exist_ok=True)
 96 |     _session_dir = session_dir
 97 |     
 98 |     # Configure main logger: DEBUG level, file only
 99 |     main_logger = logging.getLogger("codefuse.main")
100 |     main_logger.handlers.clear()  # Remove NullHandler
101 |     main_handler = logging.FileHandler(session_dir / "main.log", mode='a', encoding='utf-8')
102 |     main_handler.setLevel(logging.DEBUG)
103 |     main_logger.addHandler(main_handler)
104 |     
105 |     _logging_initialized = True
106 |     
107 |     mainLogger.info(
108 |         "Logging initialized",
109 |         session_id=session_id,
110 |         workspace=workspace_path,
111 |         logs_dir=str(session_dir),
112 |         verbose=verbose,
113 |     )
114 |     
115 |     return session_dir
116 | 
117 | 
118 | def get_session_dir() -> Optional[Path]:
119 |     """Get the current session directory path"""
120 |     return _session_dir
121 | 
122 | 
123 | def close_all_loggers():
124 |     """Close all logger handlers and flush buffers"""
125 |     logger = logging.getLogger("codefuse.main")
126 |     for handler in logger.handlers[:]:
127 |         handler.close()
128 |         logger.removeHandler(handler)
129 | 


--------------------------------------------------------------------------------
/codefuse/llm/retry.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Retry Logic for LLM Requests
  3 | """
  4 | 
  5 | import time
  6 | from functools import wraps
  7 | from typing import Callable, Tuple, Type, Optional
  8 | 
  9 | from codefuse.llm.exceptions import RetryableError, RateLimitError, TimeoutError
 10 | from codefuse.observability import mainLogger
 11 | 
 12 | 
 13 | def retry_on_failure(
 14 |     max_retries: int = 3,
 15 |     initial_delay: float = 1.0,
 16 |     exponential_base: float = 2.0,
 17 |     retryable_exceptions: Tuple[Type[Exception], ...] = (RetryableError, RateLimitError, TimeoutError)
 18 | ):
 19 |     """
 20 |     Decorator to retry function calls on specific exceptions
 21 |     
 22 |     Retry Strategy:
 23 |     - Timeout errors: Retry with exponential backoff
 24 |     - Rate limit errors (429): Retry with exponential backoff or Retry-After header
 25 |     - Other errors: Raise immediately
 26 |     
 27 |     Args:
 28 |         max_retries: Maximum number of retry attempts (default: 3)
 29 |         initial_delay: Initial delay in seconds (default: 1.0)
 30 |         exponential_base: Base for exponential backoff (default: 2.0)
 31 |         retryable_exceptions: Tuple of exception types that should trigger retry
 32 |         
 33 |     Returns:
 34 |         Decorated function that will retry on retryable errors
 35 |     """
 36 |     def decorator(func: Callable) -> Callable:
 37 |         @wraps(func)
 38 |         def wrapper(*args, **kwargs):
 39 |             last_exception: Optional[Exception] = None
 40 |             
 41 |             for attempt in range(max_retries):
 42 |                 try:
 43 |                     return func(*args, **kwargs)
 44 |                     
 45 |                 except retryable_exceptions as e:
 46 |                     last_exception = e
 47 |                     
 48 |                     # If this was the last attempt, raise the exception
 49 |                     if attempt == max_retries - 1:
 50 |                         mainLogger.error(
 51 |                             f"Failed after {max_retries} attempts: {type(e).__name__}: {e}"
 52 |                         )
 53 |                         raise
 54 |                     
 55 |                     # Calculate wait time
 56 |                     if isinstance(e, RateLimitError) and e.retry_after:
 57 |                         # Use the Retry-After value from the API response
 58 |                         wait_time = e.retry_after
 59 |                         mainLogger.warning(
 60 |                             f"Rate limit hit. Waiting {wait_time:.1f}s as specified by API."
 61 |                         )
 62 |                     else:
 63 |                         # Exponential backoff: 1s, 2s, 4s, 8s, etc.
 64 |                         wait_time = initial_delay * (exponential_base ** attempt)
 65 |                         mainLogger.warning(
 66 |                             f"Attempt {attempt + 1}/{max_retries} failed: {type(e).__name__}: {e}"
 67 |                         )
 68 |                     
 69 |                     mainLogger.info(f"Retrying in {wait_time:.2f} seconds...")
 70 |                     time.sleep(wait_time)
 71 |                     
 72 |                 except Exception as e:
 73 |                     # Non-retryable error - raise immediately
 74 |                     mainLogger.error(f"Non-retryable error occurred: {type(e).__name__}: {e}")
 75 |                     raise
 76 |             
 77 |             # Should never reach here, but just in case
 78 |             if last_exception:
 79 |                 raise last_exception
 80 |             
 81 |         return wrapper
 82 |     return decorator
 83 | 
 84 | 
 85 | def should_retry(exception: Exception) -> bool:
 86 |     """
 87 |     Determine if an exception should trigger a retry
 88 |     
 89 |     Args:
 90 |         exception: The exception to check
 91 |         
 92 |     Returns:
 93 |         True if the exception is retryable, False otherwise
 94 |     """
 95 |     return isinstance(exception, (RetryableError, RateLimitError, TimeoutError))
 96 | 
 97 | 
 98 | def get_retry_delay(
 99 |     attempt: int,
100 |     exception: Optional[Exception] = None,
101 |     initial_delay: float = 1.0,
102 |     exponential_base: float = 2.0
103 | ) -> float:
104 |     """
105 |     Calculate retry delay based on attempt number and exception type
106 |     
107 |     Args:
108 |         attempt: Current attempt number (0-indexed)
109 |         exception: The exception that triggered the retry
110 |         initial_delay: Initial delay in seconds
111 |         exponential_base: Base for exponential backoff
112 |         
113 |     Returns:
114 |         Delay in seconds before next retry
115 |     """
116 |     # Check if exception has a retry_after attribute (e.g., RateLimitError)
117 |     if isinstance(exception, RateLimitError) and exception.retry_after:
118 |         return exception.retry_after
119 |     
120 |     # Default exponential backoff
121 |     return initial_delay * (exponential_base ** attempt)
122 | 
123 | 


--------------------------------------------------------------------------------
/codefuse/cli/headless.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Headless Mode - Single-prompt execution
  3 | """
  4 | 
  5 | import json
  6 | from datetime import datetime
  7 | from typing import Dict, Any, List, Union
  8 | from rich.console import Console
  9 | from rich.panel import Panel
 10 | from rich.markdown import Markdown
 11 | 
 12 | from codefuse.llm.base import ContentBlock
 13 | from codefuse.observability import mainLogger, close_all_loggers
 14 | 
 15 | console = Console()
 16 | 
 17 | 
 18 | def run_headless(
 19 |     prompt: str,
 20 |     components: Dict[str, Any],
 21 |     stream: bool = True,
 22 |     image_urls: tuple = tuple(),
 23 | ):
 24 |     """
 25 |     Run agent in headless mode (single prompt execution)
 26 |     
 27 |     Args:
 28 |         prompt: User prompt/query
 29 |         components: Dictionary of initialized components from initialize_agent_components()
 30 |         stream: Whether to stream LLM responses
 31 |         image_urls: Optional tuple of image URLs to include in the prompt
 32 |     """
 33 |     # Unpack components
 34 |     agent_profile = components["agent_profile"]
 35 |     env_info = components["env_info"]
 36 |     agent_loop = components["agent_loop"]
 37 |     available_tools = components["available_tools"]
 38 |     session_dir = components["session_dir"]
 39 |     config = components["config"]
 40 |     model_name = components["model_name"]
 41 |     metrics_collector = components["metrics_collector"]
 42 |     context_engine = components["context_engine"]
 43 |     resumed_conversation = components["resumed_conversation"]
 44 |     
 45 |     # Build user query content (text + optional images)
 46 |     user_query: Union[str, List[ContentBlock]]
 47 |     if image_urls:
 48 |         # Build multimodal content
 49 |         content_blocks: List[ContentBlock] = []
 50 |         
 51 |         # Add text block
 52 |         if prompt:
 53 |             content_blocks.append(ContentBlock(type="text", text=prompt))
 54 |         
 55 |         # Add image blocks
 56 |         for url in image_urls:
 57 |             content_blocks.append(ContentBlock(
 58 |                 type="image_url",
 59 |                 image_url={"url": url}
 60 |             ))
 61 |         
 62 |         user_query = content_blocks
 63 |     else:
 64 |         # Pure text content
 65 |         user_query = prompt
 66 |     
 67 |     # User message will be logged by agent_loop automatically
 68 |     
 69 |     # Run agent loop
 70 |     mainLogger.info("Agent loop starting", session_id=context_engine.session_id)
 71 |     
 72 |     final_response = ""
 73 |     current_content = ""
 74 |     current_tool_calls = []  # Track tool calls for the current response
 75 |     iterations = 1
 76 |     
 77 |     for event in agent_loop.run(
 78 |         user_query=user_query,
 79 |         stream=stream,
 80 |     ):
 81 |         if event.type == "llm_done":
 82 |             if not stream:
 83 |                 # Non-streaming: save content
 84 |                 content = event.data["content"]
 85 |                 if content:
 86 |                     current_content = content
 87 |             
 88 |             # Check if there are tool calls in the response
 89 |             if "tool_calls" in event.data and event.data["tool_calls"]:
 90 |                 current_tool_calls = event.data["tool_calls"]
 91 |         
 92 |         elif event.type == "tool_done":
 93 |             tool_name = event.data["tool_name"]
 94 |             tool_call_id = event.data.get("tool_call_id")
 95 |             result = event.data["result"]
 96 |             confirmed = event.data.get("confirmed", True)
 97 |             
 98 |                 # Tool results are logged by tool_executor automatically
 99 |         
100 |         elif event.type == "agent_done":
101 |             final_response = event.data["final_response"]
102 |             iterations = event.data["iterations"]
103 |             
104 |             # Save final assistant message to trajectory
105 |             assistant_message = {
106 |                 "role": "assistant",
107 |                 "content": final_response or current_content,
108 |                 "timestamp": datetime.now().isoformat(),
109 |             }
110 |             if current_tool_calls:
111 |                 assistant_message["tool_calls"] = current_tool_calls
112 |             # Assistant messages are logged by agent_loop automatically
113 |             
114 |             # Only output the final response content
115 |             console.print(final_response or current_content)
116 |         
117 |         elif event.type == "error":
118 |             error = event.data["error"]
119 |             console.print(f"[red]Error:[/red] {error}")
120 |     
121 |     # Generate and save metrics summary
122 |     summary = metrics_collector.generate_summary()
123 |     
124 |     # Write summary to trajectory
125 |     context_engine.write_session_summary(summary)
126 |     
127 |     mainLogger.info("Agent loop completed", status="success")
128 |     
129 |     # Close all loggers
130 |     close_all_loggers()
131 | 


--------------------------------------------------------------------------------
/codefuse/tools/base.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Base classes for tools
  3 | """
  4 | 
  5 | from abc import ABC, abstractmethod
  6 | from dataclasses import dataclass, field
  7 | from typing import Dict, Any, Optional, List
  8 | 
  9 | 
 10 | @dataclass
 11 | class ToolResult:
 12 |     """
 13 |     Result of a tool execution
 14 |     
 15 |     Attributes:
 16 |         content: Full result content for LLM (detailed, structured)
 17 |         display: User-friendly display text for interactive mode (concise, formatted)
 18 |     """
 19 |     content: str
 20 |     display: Optional[str] = None
 21 |     
 22 |     def __post_init__(self):
 23 |         """If display is not provided, use content as display"""
 24 |         if self.display is None:
 25 |             self.display = self.content
 26 |     
 27 |     def __str__(self) -> str:
 28 |         """String representation returns content for LLM"""
 29 |         return self.content
 30 | 
 31 | 
 32 | @dataclass
 33 | class ToolParameter:
 34 |     """Definition of a tool parameter"""
 35 |     name: str
 36 |     type: str  # "string", "number", "boolean", "array", "object"
 37 |     description: str
 38 |     required: bool = True
 39 |     enum: Optional[List[str]] = None
 40 |     
 41 |     def to_dict(self) -> Dict[str, Any]:
 42 |         """Convert to dictionary format"""
 43 |         result = {
 44 |             "type": self.type,
 45 |             "description": self.description,
 46 |         }
 47 |         if self.enum:
 48 |             result["enum"] = self.enum
 49 |         return result
 50 | 
 51 | 
 52 | @dataclass
 53 | class ToolDefinition:
 54 |     """Definition of a tool"""
 55 |     name: str
 56 |     description: str
 57 |     parameters: List[ToolParameter] = field(default_factory=list)
 58 |     requires_confirmation: bool = False  # Whether user confirmation is required
 59 |     
 60 |     def to_openai_format(self) -> Dict[str, Any]:
 61 |         """
 62 |         Convert to OpenAI function calling format
 63 |         
 64 |         Returns:
 65 |             Dict compatible with OpenAI's tools API
 66 |         """
 67 |         # Build parameters schema
 68 |         properties = {}
 69 |         required = []
 70 |         
 71 |         for param in self.parameters:
 72 |             properties[param.name] = param.to_dict()
 73 |             if param.required:
 74 |                 required.append(param.name)
 75 |         
 76 |         return {
 77 |             "type": "function",
 78 |             "function": {
 79 |                 "name": self.name,
 80 |                 "description": self.description,
 81 |                 "parameters": {
 82 |                     "type": "object",
 83 |                     "properties": properties,
 84 |                     "required": required,
 85 |                 }
 86 |             }
 87 |         }
 88 | 
 89 | 
 90 | class BaseTool(ABC):
 91 |     """
 92 |     Abstract base class for all tools
 93 |     
 94 |     Tools are the actions that the agent can take in the environment.
 95 |     Each tool must define its interface and implement the execution logic.
 96 |     """
 97 |     
 98 |     @property
 99 |     @abstractmethod
100 |     def definition(self) -> ToolDefinition:
101 |         """
102 |         Get the tool definition
103 |         
104 |         Returns:
105 |             ToolDefinition describing the tool's interface
106 |         """
107 |         pass
108 |     
109 |     @property
110 |     def requires_confirmation(self) -> bool:
111 |         """
112 |         Check if this tool requires user confirmation before execution
113 |         
114 |         Dangerous operations (like writing files) should require confirmation
115 |         unless running in YOLO mode.
116 |         
117 |         Returns:
118 |             True if confirmation is required, False otherwise
119 |         """
120 |         return self.definition.requires_confirmation
121 |     
122 |     @abstractmethod
123 |     def execute(self, **kwargs) -> ToolResult:
124 |         """
125 |         Execute the tool with the given arguments
126 |         
127 |         Args:
128 |             **kwargs: Tool-specific arguments
129 |             
130 |         Returns:
131 |             ToolResult containing:
132 |                 - content: Full result for LLM (detailed information)
133 |                 - display: User-friendly display text (concise summary)
134 |             
135 |         Raises:
136 |             Exception: If tool execution fails
137 |         """
138 |         pass
139 |     
140 |     def validate_arguments(self, **kwargs) -> None:
141 |         """
142 |         Validate tool arguments before execution
143 |         
144 |         Args:
145 |             **kwargs: Arguments to validate
146 |             
147 |         Raises:
148 |             ValueError: If arguments are invalid
149 |         """
150 |         # Check required parameters
151 |         for param in self.definition.parameters:
152 |             if param.required and param.name not in kwargs:
153 |                 raise ValueError(f"Missing required parameter: {param.name}")
154 | 
155 | 


--------------------------------------------------------------------------------
/codefuse/llm/factory.py:
--------------------------------------------------------------------------------
  1 | """
  2 | LLM Factory - Create LLM instances based on provider
  3 | """
  4 | 
  5 | from typing import Optional
  6 | 
  7 | from codefuse.llm.base import BaseLLM
  8 | from codefuse.llm.providers.openai_compatible import OpenAICompatibleLLM
  9 | from codefuse.observability import mainLogger
 10 | 
 11 | 
 12 | def create_llm(
 13 |     provider: str = "openai_compatible",
 14 |     model: str = "gpt-4o",
 15 |     api_key: str = "",
 16 |     base_url: Optional[str] = None,
 17 |     temperature: float = 0.0,
 18 |     max_tokens: Optional[int] = None,
 19 |     timeout: int = 60,
 20 |     parallel_tool_calls: bool = True,
 21 |     enable_thinking: bool = False,
 22 |     top_k: Optional[int] = None,
 23 |     top_p: Optional[float] = None,
 24 |     session_id: Optional[str] = None,
 25 |     **kwargs
 26 | ) -> BaseLLM:
 27 |     """
 28 |     Factory function to create LLM instances
 29 |     
 30 |     Args:
 31 |         provider: LLM provider type
 32 |             - "openai_compatible": OpenAI API and compatible providers (default)
 33 |             - "anthropic": Anthropic Claude API
 34 |             - "gemini": Google Gemini API
 35 |         model: Model identifier (e.g., "gpt-4o", "claude-3-5-sonnet", etc.)
 36 |         api_key: API key for authentication
 37 |         base_url: Base URL for API endpoint (for openai_compatible)
 38 |         temperature: Sampling temperature (0-2)
 39 |         max_tokens: Maximum tokens to generate
 40 |         timeout: Request timeout in seconds
 41 |         parallel_tool_calls: Enable parallel tool calls (default: True)
 42 |         enable_thinking: Enable thinking mode for models that support it (default: False)
 43 |         top_k: Top-k sampling parameter (default: None)
 44 |         top_p: Nucleus sampling parameter (0-1, default: None)
 45 |         session_id: Session ID for Anthropic provider (used for x-idealab-session-id header)
 46 |         **kwargs: Additional provider-specific parameters
 47 |         
 48 |     Returns:
 49 |         BaseLLM instance configured for the specified provider
 50 |         
 51 |     Raises:
 52 |         ValueError: If provider is not supported
 53 |         
 54 |     Examples:
 55 |         >>> # OpenAI
 56 |         >>> llm = create_llm(
 57 |         ...     provider="openai_compatible",
 58 |         ...     model="gpt-4o",
 59 |         ...     api_key="sk-..."
 60 |         ... )
 61 |         
 62 |         >>> # DeepSeek
 63 |         >>> llm = create_llm(
 64 |         ...     provider="openai_compatible",
 65 |         ...     model="deepseek-chat",
 66 |         ...     api_key="sk-...",
 67 |         ...     base_url="https://api.deepseek.com"
 68 |         ... )
 69 |         
 70 |         >>> # Anthropic (when implemented)
 71 |         >>> llm = create_llm(
 72 |         ...     provider="anthropic",
 73 |         ...     model="claude-3-5-sonnet-20241022",
 74 |         ...     api_key="sk-ant-..."
 75 |         ... )
 76 |     """
 77 |     provider = provider.lower().strip()
 78 |     
 79 |     mainLogger.info(f"Creating LLM: provider={provider}, model={model}")
 80 |     
 81 |     if provider == "anthropic":
 82 |         from codefuse.llm.providers.anthropic import AnthropicLLM
 83 |         return AnthropicLLM(
 84 |             model=model,
 85 |             api_key=api_key,
 86 |             base_url=base_url,
 87 |             temperature=temperature,
 88 |             max_tokens=max_tokens,
 89 |             timeout=timeout,
 90 |             parallel_tool_calls=parallel_tool_calls,
 91 |             enable_thinking=enable_thinking,
 92 |             top_k=top_k,
 93 |             top_p=top_p,
 94 |             session_id=session_id,
 95 |             **kwargs
 96 |         )
 97 |     
 98 |     elif provider == "gemini":
 99 |         from codefuse.llm.providers.gemini import GeminiLLM
100 |         return GeminiLLM(
101 |             model=model,
102 |             api_key=api_key,
103 |             base_url=base_url,
104 |             temperature=temperature,
105 |             max_tokens=max_tokens,
106 |             timeout=timeout,
107 |             parallel_tool_calls=parallel_tool_calls,
108 |             enable_thinking=enable_thinking,
109 |             top_k=top_k,
110 |             top_p=top_p,
111 |             **kwargs
112 |         )
113 |     
114 |     elif provider in ("openai_compatible", "openai"):
115 |         # Default to OpenAI Compatible for all unspecified providers
116 |         return OpenAICompatibleLLM(
117 |             model=model,
118 |             api_key=api_key,
119 |             base_url=base_url,
120 |             temperature=temperature,
121 |             max_tokens=max_tokens,
122 |             timeout=timeout,
123 |             parallel_tool_calls=parallel_tool_calls,
124 |             enable_thinking=enable_thinking,
125 |             top_k=top_k,
126 |             top_p=top_p,
127 |             **kwargs
128 |         )
129 |     
130 |     else:
131 |         mainLogger.warning(
132 |             f"Unknown provider '{provider}', defaulting to openai_compatible. "
133 |             f"Supported providers: openai_compatible, anthropic, gemini"
134 |         )
135 |         return OpenAICompatibleLLM(
136 |             model=model,
137 |             api_key=api_key,
138 |             base_url=base_url,
139 |             temperature=temperature,
140 |             max_tokens=max_tokens,
141 |             timeout=timeout,
142 |             parallel_tool_calls=parallel_tool_calls,
143 |             enable_thinking=enable_thinking,
144 |             top_k=top_k,
145 |             top_p=top_p,
146 |             **kwargs
147 |         )


--------------------------------------------------------------------------------
/codefuse/tools/builtin/write_file.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Write File Tool - Write or modify file contents in the workspace
  3 | """
  4 | 
  5 | from pathlib import Path
  6 | from typing import Optional
  7 | 
  8 | from codefuse.tools.base import BaseTool, ToolDefinition, ToolParameter, ToolResult
  9 | from codefuse.tools.builtin.filesystem_base import FileSystemToolMixin, MAX_TOKENS
 10 | from codefuse.observability import mainLogger
 11 | 
 12 | 
 13 | class WriteFileTool(FileSystemToolMixin, BaseTool):
 14 |     """
 15 |     Tool for writing file contents
 16 |     
 17 |     Features:
 18 |     - Create new files or overwrite existing files
 19 |     - Safety checks for path validity and workspace restriction
 20 |     - Content size validation
 21 |     - Requires user confirmation (unless in YOLO mode)
 22 |     """
 23 |     
 24 |     def __init__(self, workspace_root: Optional[Path] = None):
 25 |         """
 26 |         Initialize WriteFileTool
 27 |         
 28 |         Args:
 29 |             workspace_root: Workspace root directory to restrict file access.
 30 |                           Defaults to current working directory.
 31 |         """
 32 |         super().__init__(workspace_root=workspace_root)
 33 |     
 34 |     @property
 35 |     def definition(self) -> ToolDefinition:
 36 |         """Define the write_file tool"""
 37 |         return ToolDefinition(
 38 |             name="write_file",
 39 |             description=(
 40 |                 "Write content to a file in the workspace (creates or overwrites).\n\n"
 41 |                 "Important:\n"
 42 |                 "- The path parameter MUST be an absolute path, not a relative path\n"
 43 |                 "- File must be within the workspace root directory\n"
 44 |                 "- Content size is limited to prevent excessive file sizes"
 45 |             ),
 46 |             parameters=[
 47 |                 ToolParameter(
 48 |                     name="path",
 49 |                     type="string",
 50 |                     description="Absolute path to the file to write",
 51 |                     required=True,
 52 |                 ),
 53 |                 ToolParameter(
 54 |                     name="content",
 55 |                     type="string",
 56 |                     description="Content to write to the file",
 57 |                     required=True,
 58 |                 ),
 59 |             ],
 60 |             requires_confirmation=True,  # Writing is dangerous!
 61 |         )
 62 |     
 63 |     def execute(
 64 |         self,
 65 |         path: str,
 66 |         content: str,
 67 |         **kwargs
 68 |     ) -> ToolResult:
 69 |         """
 70 |         Execute the write_file tool
 71 |         
 72 |         Args:
 73 |             path: Absolute path to the file to write
 74 |             content: Content to write to the file
 75 |             
 76 |         Returns:
 77 |             ToolResult with:
 78 |                 - content: Detailed success/error message for LLM
 79 |                 - display: User-friendly summary for UI
 80 |         """
 81 |         try:
 82 |             # Step 1: Check if path is absolute
 83 |             if error := self._check_absolute_path(path):
 84 |                 return self._create_error_result(error, "Path must be absolute")
 85 |             
 86 |             # Step 2: Resolve path
 87 |             file_path = self._resolve_path(path)
 88 |             
 89 |             # Step 3: Check if within workspace
 90 |             if error := self._check_within_workspace(file_path):
 91 |                 mainLogger.warning(f"File write outside workspace: {error}")
 92 |                 return self._create_error_result(error, "Access denied: outside workspace")
 93 |             
 94 |             # Step 4: Check content size limit
 95 |             if error := self._check_token_limit(content, MAX_TOKENS):
 96 |                 mainLogger.warning(f"Content too large: {error}")
 97 |                 return self._create_error_result(error, f"Content too large (>{MAX_TOKENS:,} tokens)")
 98 |             
 99 |             # Step 5: Create parent directories if they don't exist
100 |             file_path.parent.mkdir(parents=True, exist_ok=True)
101 |             
102 |             # Step 6: Check if file exists (for logging)
103 |             file_existed = file_path.exists()
104 |             
105 |             # Step 7: Write content to file
106 |             with open(file_path, 'w', encoding='utf-8') as f:
107 |                 f.write(content)
108 |             
109 |             # Step 8: Calculate stats and return result
110 |             lines = content.count('\n') + 1
111 |             chars = len(content)
112 |             
113 |             action = "Updated" if file_existed else "Created"
114 |             mainLogger.info(f"{action} {file_path} ({lines} lines, {chars} characters)")
115 |             
116 |             result_content = f"Successfully {action.lower()} file: {path} ({lines} lines, {chars} characters)"
117 |             result_display = f"✓ {action} {path} ({lines} lines)"
118 |             
119 |             return ToolResult(content=result_content, display=result_display)
120 |             
121 |         except PermissionError as e:
122 |             error_msg = f"Permission denied writing file: {path}"
123 |             mainLogger.error(f"{error_msg}: {e}")
124 |             return self._create_error_result(error_msg, f"Permission denied: {path}")
125 |         except Exception as e:
126 |             error_msg = f"Unexpected error writing file: {path} - {str(e)}"
127 |             mainLogger.error(f"Unexpected error writing file: {path}", exc_info=True)
128 |             return self._create_error_result(error_msg, f"Error writing {path}: {str(e)}")
129 | 
130 | 


--------------------------------------------------------------------------------
/codefuse/llm/providers/anthropic.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Anthropic LLM Implementation with KV Cache Support
  3 | 
  4 | This implementation extends OpenAICompatibleLLM to add Anthropic-specific
  5 | prompt caching capabilities using cache_control markers.
  6 | """
  7 | 
  8 | from typing import List, Optional, Dict, Any
  9 | 
 10 | from codefuse.llm.base import Message, MessageRole
 11 | from codefuse.llm.providers.openai_compatible import OpenAICompatibleLLM
 12 | from codefuse.observability import mainLogger
 13 | 
 14 | 
 15 | class AnthropicLLM(OpenAICompatibleLLM):
 16 |     """
 17 |     Anthropic Claude LLM implementation with KV cache support
 18 |     
 19 |     This class extends OpenAICompatibleLLM and adds Anthropic-specific
 20 |     prompt caching by marking the last Tool message with cache_control.
 21 |     
 22 |     Caching Strategy:
 23 |     - If messages end with USER: No cache marker (new request, short context)
 24 |     - If messages end with TOOL: Add cache_control to last Tool message
 25 |     
 26 |     This allows caching of accumulated context during agent loops while
 27 |     keeping fresh user queries uncached.
 28 |     """
 29 |     
 30 |     def __init__(self, session_id: Optional[str] = None, **kwargs):
 31 |         """
 32 |         Initialize Anthropic client with OpenAI-compatible SDK
 33 |         
 34 |         Args:
 35 |             session_id: Session ID for x-idealab-session-id header (ensures requests hit same instance)
 36 |             **kwargs: Other parameters passed to OpenAICompatibleLLM
 37 |         """
 38 |         # Set default base_url for Anthropic if not provided
 39 |         # But keep user-provided base_url (for internal proxy services)
 40 |         if 'base_url' not in kwargs or kwargs['base_url'] is None:
 41 |             kwargs['base_url'] = "https://api.anthropic.com/v1"
 42 |         
 43 |         # Store session_id before calling parent __init__
 44 |         self._session_id = session_id
 45 |         
 46 |         super().__init__(**kwargs)
 47 |         
 48 |         # Recreate client with custom header if session_id is provided
 49 |         if session_id:
 50 |             from openai import OpenAI
 51 |             self.client = OpenAI(
 52 |                 api_key=self.api_key,
 53 |                 base_url=self.base_url,
 54 |                 timeout=self.timeout,
 55 |                 default_headers={
 56 |                     'x-idealab-session-id': session_id
 57 |                 }
 58 |             )
 59 |             mainLogger.info(
 60 |                 f"Initialized Anthropic LLM with KV cache support: model={self.model}, "
 61 |                 f"base_url={self.base_url}, session_id={session_id}"
 62 |             )
 63 |         else:
 64 |             mainLogger.info(
 65 |                 f"Initialized Anthropic LLM with KV cache support: model={self.model}, base_url={self.base_url}"
 66 |             )
 67 |     
 68 |     @property
 69 |     def supports_prompt_caching(self) -> bool:
 70 |         """Anthropic has native prompt caching support"""
 71 |         return True
 72 |     
 73 |     def _convert_messages(self, messages: List[Message]) -> List[Dict[str, Any]]:
 74 |         """
 75 |         Convert internal Message format to Anthropic format with cache control
 76 |         
 77 |         This method extends the parent's _convert_messages to add cache_control
 78 |         markers on the last Tool message (if messages end with TOOL role).
 79 |         
 80 |         Args:
 81 |             messages: List of internal Message objects
 82 |             
 83 |         Returns:
 84 |             List of message dictionaries in Anthropic API format
 85 |         """
 86 |         # First convert using parent's logic
 87 |         openai_messages = super()._convert_messages(messages)
 88 |         
 89 |         # Check if we should add cache control
 90 |         if not messages or len(messages) == 0:
 91 |             return openai_messages
 92 |         
 93 |         last_message = messages[-1]
 94 |         
 95 |         # Only add cache control if last message is TOOL
 96 |         if last_message.role != MessageRole.TOOL:
 97 |             mainLogger.debug(
 98 |                 f"No cache control added: last message role is {last_message.role.value}"
 99 |             )
100 |             return openai_messages
101 |         
102 |         # Add cache control to the last message (which is a Tool message)
103 |         last_msg_dict = openai_messages[-1]
104 |         
105 |         # Convert content to array format with cache_control
106 |         content = last_msg_dict.get("content", "")
107 |         
108 |         if isinstance(content, str):
109 |             # Convert string content to content block array with cache_control
110 |             # last_msg_dict["content"] = [
111 |             #     {
112 |             #         "type": "text",
113 |             #         "text": content,
114 |             #         "cache_control": {"type": "ephemeral"}
115 |             #     }
116 |             # ]
117 |             last_msg_dict["cache_control"] = {"type": "ephemeral"}
118 |             mainLogger.debug(
119 |                 "Added cache_control to last Tool message",
120 |                 tool_call_id=last_msg_dict.get("tool_call_id")
121 |             )
122 |         elif isinstance(content, list):
123 |             # Content is already an array, add cache_control to last block
124 |             if len(content) > 0:
125 |                 content[-1]["cache_control"] = {"type": "ephemeral"}
126 |                 mainLogger.debug(
127 |                     "Added cache_control to last content block of Tool message",
128 |                     tool_call_id=last_msg_dict.get("tool_call_id")
129 |                 )
130 |         
131 |         return openai_messages
132 | 
133 | 


--------------------------------------------------------------------------------
/codefuse/tools/registry.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Tool Registry - Manage available tools
  3 | """
  4 | 
  5 | from pathlib import Path
  6 | from typing import Dict, Optional, List, Any
  7 | 
  8 | from codefuse.tools.base import BaseTool, ToolDefinition
  9 | from codefuse.llm.base import Tool as LLMTool
 10 | from codefuse.observability import mainLogger
 11 | 
 12 | 
 13 | class ToolRegistry:
 14 |     """
 15 |     Registry for managing available tools
 16 |     
 17 |     The registry maintains a collection of tools and provides methods
 18 |     to register, retrieve, and list tools.
 19 |     """
 20 |     
 21 |     def __init__(self):
 22 |         """Initialize empty tool registry"""
 23 |         self._tools: Dict[str, BaseTool] = {}
 24 |         mainLogger.info("Initialized empty ToolRegistry")
 25 |     
 26 |     def register(self, tool: BaseTool) -> None:
 27 |         """
 28 |         Register a tool
 29 |         
 30 |         Args:
 31 |             tool: Tool instance to register
 32 |         """
 33 |         name = tool.definition.name
 34 |         if name in self._tools:
 35 |             mainLogger.warning("Tool already registered, overwriting", tool_name=name)
 36 |         
 37 |         self._tools[name] = tool
 38 |         mainLogger.info(
 39 |             "Registered tool",
 40 |             tool_name=name,
 41 |             requires_confirmation=tool.requires_confirmation,
 42 |         )
 43 |     
 44 |     def get_tool(self, name: str) -> Optional[BaseTool]:
 45 |         """
 46 |         Get a tool by name
 47 |         
 48 |         Args:
 49 |             name: Tool name
 50 |             
 51 |         Returns:
 52 |             Tool instance if found, None otherwise
 53 |         """
 54 |         return self._tools.get(name)
 55 |     
 56 |     def get_all_definitions(self) -> List[ToolDefinition]:
 57 |         """
 58 |         Get all tool definitions
 59 |         
 60 |         Returns:
 61 |             List of all registered tool definitions
 62 |         """
 63 |         return [tool.definition for tool in self._tools.values()]
 64 |     
 65 |     def get_tools_for_llm(self, tool_names: Optional[List[str]] = None) -> List[LLMTool]:
 66 |         """
 67 |         Get tools in LLM-compatible format
 68 |         
 69 |         Args:
 70 |             tool_names: Optional list of specific tool names to include.
 71 |                        If None, includes all tools.
 72 |         
 73 |         Returns:
 74 |             List of Tool objects compatible with LLM.generate()
 75 |         """
 76 |         definitions = self.get_all_definitions()
 77 |         
 78 |         # Filter by tool_names if specified
 79 |         if tool_names is not None:
 80 |             definitions = [d for d in definitions if d.name in tool_names]
 81 |         
 82 |         # Convert to LLM Tool format
 83 |         llm_tools = []
 84 |         for definition in definitions:
 85 |             openai_format = definition.to_openai_format()
 86 |             llm_tools.append(
 87 |                 LLMTool(
 88 |                     type=openai_format["type"],
 89 |                     function=openai_format["function"]
 90 |                 )
 91 |             )
 92 |         
 93 |         return llm_tools
 94 |     
 95 |     def list_tool_names(self) -> List[str]:
 96 |         """
 97 |         List all registered tool names
 98 |         
 99 |         Returns:
100 |             List of tool names
101 |         """
102 |         return list(self._tools.keys())
103 |     
104 |     def __len__(self) -> int:
105 |         """Get number of registered tools"""
106 |         return len(self._tools)
107 |     
108 |     def __contains__(self, name: str) -> bool:
109 |         """Check if a tool is registered"""
110 |         return name in self._tools
111 | 
112 | 
113 | def create_default_registry(
114 |     workspace_root: Optional["Path"] = None,
115 |     read_tracker: Optional[Any] = None,
116 |     config: Optional[Any] = None,
117 | ) -> ToolRegistry:
118 |     """
119 |     Create a default tool registry with all built-in tools
120 |     
121 |     Args:
122 |         workspace_root: Workspace root directory for file operations.
123 |                        Defaults to current working directory.
124 |         read_tracker: Optional read tracker for file read tracking (needed by ReadFileTool/EditFileTool).
125 |         config: Optional configuration object for tool-specific settings.
126 |     
127 |     Returns:
128 |         ToolRegistry with all built-in tools registered
129 |     """
130 |     from pathlib import Path
131 |     from codefuse.tools.builtin import (
132 |         ReadFileTool,
133 |         WriteFileTool,
134 |         EditFileTool,
135 |         ListDirectoryTool,
136 |         GrepTool,
137 |         GlobTool,
138 |         BashTool,
139 |     )
140 |     
141 |     registry = ToolRegistry()
142 |     
143 |     # Resolve workspace_root
144 |     workspace = (workspace_root or Path.cwd()).resolve()
145 |     
146 |     # Register built-in tools with workspace_root
147 |     # ReadFileTool and EditFileTool need read_tracker for file read tracking
148 |     registry.register(ReadFileTool(workspace_root=workspace, read_tracker=read_tracker))
149 |     registry.register(WriteFileTool(workspace_root=workspace))
150 |     registry.register(EditFileTool(workspace_root=workspace, read_tracker=read_tracker))
151 |     # registry.register(ListDirectoryTool(workspace_root=workspace))
152 |     registry.register(GrepTool(workspace_root=workspace))
153 |     registry.register(GlobTool(workspace_root=workspace))
154 |     
155 |     # Register BashTool with configuration
156 |     bash_timeout = config.agent_config.bash_timeout if config else 30
157 |     bash_allowed = config.agent_config.bash_allowed_commands if config else []
158 |     bash_disallowed = config.agent_config.bash_disallowed_commands if config else []
159 |     
160 |     registry.register(BashTool(
161 |         workspace_root=workspace,
162 |         timeout=bash_timeout,
163 |         allowed_commands=bash_allowed,
164 |         disallowed_commands=bash_disallowed,
165 |     ))
166 |     
167 |     mainLogger.info(
168 |         "Created default registry",
169 |         tool_count=len(registry.list_tool_names()),
170 |         workspace_root=str(workspace)
171 |     )
172 |     
173 |     return registry
174 | 
175 | 


--------------------------------------------------------------------------------
/codefuse/tools/builtin/filesystem_base.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Filesystem Tool Base - Common functionality for file system tools
  3 | """
  4 | 
  5 | from pathlib import Path
  6 | from typing import Optional, Tuple, List
  7 | 
  8 | from codefuse.tools.base import ToolResult
  9 | from codefuse.observability import mainLogger
 10 | 
 11 | 
 12 | # File size and token limits
 13 | MAX_FILE_SIZE_BYTES = 256 * 1024  # 256KB
 14 | MAX_TOKENS = 25000  # Maximum tokens allowed in file content
 15 | 
 16 | 
 17 | class FileSystemToolMixin:
 18 |     """
 19 |     Mixin class providing common functionality for file system tools
 20 |     
 21 |     This mixin provides:
 22 |     - Path safety checks (absolute path, workspace restriction)
 23 |     - Content size limits (token estimation and checking)
 24 |     - Error handling utilities
 25 |     """
 26 |     
 27 |     def __init__(self, workspace_root: Optional[Path] = None):
 28 |         """
 29 |         Initialize the file system tool mixin
 30 |         
 31 |         Args:
 32 |             workspace_root: Workspace root directory to restrict file access.
 33 |                           Defaults to current working directory.
 34 |         """
 35 |         self._workspace_root = (workspace_root or Path.cwd()).resolve()
 36 |         mainLogger.debug(f"FileSystemToolMixin initialized with workspace_root: {self._workspace_root}")
 37 |     
 38 |     def _check_absolute_path(self, path: str) -> Optional[str]:
 39 |         """
 40 |         Check if path is absolute
 41 |         
 42 |         Args:
 43 |             path: Path to check
 44 |             
 45 |         Returns:
 46 |             Error message if path is not absolute, None otherwise
 47 |         """
 48 |         if not Path(path).is_absolute():
 49 |             return f"Path must be absolute, but got relative path: {path}"
 50 |         return None
 51 |     
 52 |     def _check_within_workspace(self, file_path: Path) -> Optional[str]:
 53 |         """
 54 |         Check if file is within workspace root directory
 55 |         
 56 |         Args:
 57 |             file_path: Path to check (must be resolved)
 58 |             
 59 |         Returns:
 60 |             Error message if file is outside workspace, None otherwise
 61 |         """
 62 |         try:
 63 |             file_path.relative_to(self._workspace_root)
 64 |             return None
 65 |         except ValueError:
 66 |             return (
 67 |                 f"Path must be within workspace root ({self._workspace_root}), "
 68 |                 f"but got: {file_path}"
 69 |             )
 70 |     
 71 |     def _resolve_path(self, path: str) -> Path:
 72 |         """
 73 |         Resolve a path to absolute form
 74 |         
 75 |         Args:
 76 |             path: Path to resolve (can be absolute or relative)
 77 |             
 78 |         Returns:
 79 |             Resolved absolute Path object
 80 |         """
 81 |         return Path(path).expanduser().resolve()
 82 |     
 83 |     def _estimate_tokens(self, content: str) -> int:
 84 |         """
 85 |         Estimate token count for content
 86 |         
 87 |         Uses rough estimation: characters / 4
 88 |         
 89 |         Args:
 90 |             content: Text content to estimate
 91 |             
 92 |         Returns:
 93 |             Estimated token count
 94 |         """
 95 |         return len(content) // 4
 96 |     
 97 |     def _check_token_limit(self, content: str, max_tokens: int = MAX_TOKENS) -> Optional[str]:
 98 |         """
 99 |         Check if content exceeds token limit
100 |         
101 |         Args:
102 |             content: Content to check
103 |             max_tokens: Maximum allowed tokens (default: MAX_TOKENS)
104 |             
105 |         Returns:
106 |             Error message if content exceeds limit, None otherwise
107 |         """
108 |         token_count = self._estimate_tokens(content)
109 |         
110 |         if token_count > max_tokens:
111 |             return (
112 |                 f"Content ({token_count:,} tokens) exceeds maximum ({max_tokens:,} tokens). "
113 |                 f"Please reduce the content size."
114 |             )
115 |         return None
116 |     
117 |     def _create_error_result(self, error_msg: str, display_msg: str) -> ToolResult:
118 |         """
119 |         Create a standardized error result
120 |         
121 |         Args:
122 |             error_msg: Detailed error message for LLM
123 |             display_msg: User-friendly error message for display
124 |             
125 |         Returns:
126 |             ToolResult with error information
127 |         """
128 |         return ToolResult(
129 |             content=f"Error: {error_msg}",
130 |             display=f"❌ {display_msg}"
131 |         )
132 |     
133 |     def _read_with_encoding_fallback(self, file_path: Path) -> Tuple[str, str]:
134 |         """
135 |         Read file with multiple encoding fallbacks
136 |         
137 |         This method tries multiple encodings to read a file, falling back
138 |         to more permissive options if the preferred encoding fails.
139 |         
140 |         Args:
141 |             file_path: Path to the file to read
142 |             
143 |         Returns:
144 |             Tuple of (file_content, encoding_used)
145 |             
146 |         Raises:
147 |             UnicodeDecodeError: If all encoding attempts fail
148 |         """
149 |         encodings = [
150 |             ("utf-8", None),
151 |             ("latin-1", None),
152 |             ("utf-8", "replace"),
153 |         ]
154 |         
155 |         last_exception = None
156 |         for encoding, errors in encodings:
157 |             try:
158 |                 content = file_path.read_text(encoding=encoding, errors=errors)
159 |                 return content, encoding
160 |             except UnicodeDecodeError as e:
161 |                 last_exception = e
162 |                 continue
163 |         
164 |         # All encodings failed
165 |         raise UnicodeDecodeError(
166 |             "all",
167 |             b"",
168 |             0,
169 |             1,
170 |             f"Failed to decode file with all attempted encodings: {last_exception}"
171 |         )
172 |     
173 |     def _find_occurrence_lines(self, content: str, search_string: str) -> List[int]:
174 |         """
175 |         Find line numbers where search_string starts
176 |         
177 |         This method handles both single-line and multi-line search strings.
178 |         For multi-line strings, it returns the line number where each occurrence starts.
179 |         
180 |         Args:
181 |             content: File content to search in
182 |             search_string: String to search for (can be multi-line)
183 |             
184 |         Returns:
185 |             List of line numbers (1-indexed) where search_string starts
186 |         """
187 |         occurrence_lines = []
188 |         start_pos = 0
189 |         
190 |         # Find all occurrences in the content
191 |         while True:
192 |             pos = content.find(search_string, start_pos)
193 |             if pos == -1:
194 |                 break
195 |             
196 |             # Count line number by counting newlines before this position
197 |             line_num = content[:pos].count('\n') + 1
198 |             occurrence_lines.append(line_num)
199 |             
200 |             # Move to next position
201 |             start_pos = pos + 1
202 |         
203 |         return occurrence_lines
204 |     
205 |     def _format_with_line_numbers(self, content: str, start_line: int = 1) -> str:
206 |         """
207 |         Format content with line numbers
208 |         
209 |         Format: LINE_NUMBER→LINE_CONTENT
210 |         Line numbers are right-aligned to 6 characters
211 |         
212 |         Args:
213 |             content: Text content to format
214 |             start_line: Starting line number (1-indexed)
215 |             
216 |         Returns:
217 |             Formatted content with line numbers
218 |         """
219 |         if not content:
220 |             return content
221 |         
222 |         lines = content.split('\n')
223 |         formatted_lines = []
224 |         
225 |         for i, line in enumerate(lines):
226 |             line_num = start_line + i
227 |             # Right-align line number to 6 characters
228 |             formatted_lines.append(f"{line_num:6d}→{line}")
229 |         
230 |         return '\n'.join(formatted_lines)
231 | 
232 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ![](./images/codefuse_logo.png)
  2 | # 🚀 CodeFuse-Agent (CFuse)
  3 | 
  4 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
  5 | [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
  6 | 
  7 | **A lightweight, cleanly-architected agent framework designed for research and experimentation.**
  8 | 
  9 | CodeFuse-Agent is fully open-source and can be installed with a single `pip install` command, providing a complete yet minimal toolset for code-related tasks. We open-source CFuse to facilitate reproducible research and encourage further exploration of LLM-based coding agents.
 10 | 
 11 | ## 🏆 SWE-bench Lite Results
 12 | 
 13 | | Configuration | Resolved |
 14 | |---------------|----------|
 15 | | CFuse + Claude Sonnet 4.5 (Single Attempt) | **61%** |
 16 | | CFuse + Trajectory-Aware Test-Time Scaling | **61.67%** |
 17 | 
 18 | We introduce **Trajectory-Aware Test-Time Scaling (TTS)**, a novel verification mechanism that aggregates self-generated test cases from multiple trajectories for cross-validation, achieving state-of-the-art results on SWE-bench Lite.
 19 | 
 20 | 📄 **Technical Report**: [tech_report.md](tech_report.md)
 21 | 
 22 | ## ✨ Features
 23 | 
 24 | ### Configurable Agent Profiles
 25 | 
 26 | Agent behavior is defined through declarative Markdown profiles (system prompt, tools, model, etc.), enabling quick switching of system prompts and tool subsets without code changes.
 27 | 
 28 | ### Dual Execution Modes
 29 | 
 30 | - **Local Mode**: Execute tool calls directly in the local environment
 31 | - **HTTP Mode**: Serve as a tool execution backend or delegate calls to remote sandboxes
 32 | 
 33 | This decoupling of agent decisions from environment execution makes CFuse suitable as scaffolding for RL training pipelines.
 34 | 
 35 | ### Built-in Tools
 36 | 
 37 | Six essential tools for code exploration and modification:
 38 | 
 39 | | Tool | Description |
 40 | |------|-------------|
 41 | | `read_file` | Read file contents with optional line range selection |
 42 | | `write_file` | Create or overwrite files |
 43 | | `edit_file` | Perform edits via search-and-replace |
 44 | | `grep` | Fast code search powered by ripgrep |
 45 | | `glob` | File discovery using glob patterns |
 46 | | `bash` | Execute shell commands with timeout control |
 47 | 
 48 | ## 🏗️ Architecture
 49 | 
 50 | | Layer | Responsibility |
 51 | |-------|----------------|
 52 | | **Interaction** | Terminal UI / Headless / HTTP modes |
 53 | | **Agent Loop** | Core lifecycle: LLM interaction, tool dispatch, iteration control |
 54 | | **Context Engine** | Message history, environment context, compression, prompt assembly |
 55 | | **LLM Provider** | OpenAI-compatible API support |
 56 | | **Tool Execution** | 6 built-in tools + remote execution |
 57 | | **Observability** | Trajectory logs, execution metrics, cost tracking |
 58 | 
 59 | ## 📦 Installation
 60 | 
 61 | ```bash
 62 | pip install -e .
 63 | ```
 64 | 
 65 | ## 🔑 Configuration
 66 | 
 67 | ### Required Environment Variables
 68 | 
 69 | CodeFuse-Agent requires three environment variables to be configured:
 70 | 
 71 | ```bash
 72 | # Required: Your OpenAI API key (or compatible API key)
 73 | export OPENAI_API_KEY=your-api-key
 74 | 
 75 | # Required: The LLM model to use
 76 | export LLM_MODEL=gpt-4o
 77 | 
 78 | # Required: The API base URL
 79 | export LLM_BASE_URL=https://api.openai.com/v1
 80 | ```
 81 | 
 82 | **Important Notes:**
 83 | - All three environment variables are **required** for the agent to function
 84 | - `OPENAI_API_KEY` is the only API key variable used 
 85 | - `LLM_BASE_URL` can be set to any OpenAI-compatible API endpoint
 86 | - `LLM_MODEL` should match the model name available on your API endpoint
 87 | 
 88 | ### Configuration File (Optional)
 89 | 
 90 | You can optionally create a `.cfuse.yaml` configuration file in your project root or `~/.cfuse.yaml`:
 91 | 
 92 | ```yaml
 93 | llm:
 94 |   provider: openai_compatible
 95 |   model: ${LLM_MODEL}           # Uses environment variable
 96 |   api_key: ${OPENAI_API_KEY}    # Uses environment variable
 97 |   base_url: ${LLM_BASE_URL}     # Uses environment variable
 98 |   temperature: 0.0
 99 |   max_tokens: null
100 |   timeout: 60
101 | 
102 | agent_config:
103 |   max_iterations: 200
104 |   max_context_tokens: 128000
105 |   enable_tools: true
106 |   yolo: false
107 |   agent: default
108 |   workspace_root: .
109 |   bash_timeout: 30
110 | 
111 | logging:
112 |   logs_dir: ~/.cfuse/logs
113 |   verbose: false
114 | ```
115 | 
116 | **Configuration Priority** (highest to lowest):
117 | 1. CLI arguments (`--model`, `--api-key`, `--base-url`, etc.)
118 | 2. Environment variables (`OPENAI_API_KEY`, `LLM_MODEL`, `LLM_BASE_URL`)
119 | 3. Configuration file (`.cfuse.yaml`)
120 | 4. Default values
121 | 
122 | ## 🚀 Quick Start
123 | 
124 | ### Interactive Mode
125 | 
126 | ```bash
127 | # Basic startup
128 | cfuse
129 | 
130 | # Enable YOLO mode (auto-confirm all tool calls)
131 | cfuse --yolo
132 | 
133 | # Start with specific workspace
134 | cfuse --workspace-root /path/to/project
135 | ```
136 | 
137 | ### Headless Mode
138 | 
139 | ```bash
140 | # Single query
141 | cfuse -p "Read README.md and summarize it"
142 | 
143 | # Auto-execute without confirmation
144 | cfuse -p "Analyze project structure" --yolo
145 | 
146 | # Complex task with more iterations
147 | cfuse -p "Refactor the auth module" --yolo --max-iterations 300
148 | ```
149 | 
150 | ### Using Different Models
151 | 
152 | ```bash
153 | # Use specific model
154 | cfuse --model gpt-4o --api-key sk-xxx --base-url https://api.openai.com/v1
155 | 
156 | # Use local model (LM Studio, Ollama, etc.)
157 | cfuse --model llama3 --api-key dummy --base-url http://localhost:1234/v1
158 | 
159 | # Adjust temperature (0.0 = deterministic, higher = creative)
160 | cfuse -p "Fix bug in auth.py" --temperature 0.0 --yolo
161 | ```
162 | 
163 | ### Logging and Debugging
164 | 
165 | Logs include `main.log`, `trajectory/`, and `llm_messages/` in `~/.cfuse/logs`
166 | 
167 | ```bash
168 | # Enable verbose logging
169 | cfuse -p "Your task" --verbose --yolo
170 | 
171 | # Custom log directory
172 | cfuse -p "Your task" --logs-dir ./my_logs --yolo
173 | ```
174 | 
175 | ### Common Usage Patterns
176 | 
177 | ```bash
178 | # Bug fixing with verbose logs
179 | cfuse -p "Fix the authentication bug" --workspace-root ./backend --verbose --yolo
180 | 
181 | # Code review with low temperature
182 | cfuse -p "Review src/utils/parser.py" --temperature 0.1
183 | 
184 | # Long-running refactoring task
185 | cfuse -p "Refactor database layer" --max-iterations 500 --yolo --logs-dir ./refactor_logs
186 | ```
187 | 
188 | ## ⚙️ CLI Options
189 | 
190 | ### Main Options
191 | 
192 | | Option | Description | Default |
193 | |--------|-------------|---------|
194 | | `-p, --prompt TEXT` | User query (headless mode). If omitted, launches interactive mode. | `None` |
195 | | `--yolo` | Auto-confirm all tool calls without prompting | `False` |
196 | | `--workspace-root PATH` | Working directory for the agent | `.` |
197 | | `--agent TEXT` | Agent profile (`default`, `swe`, or path to `.md` file) | `default` |
198 | | `--max-iterations INT` | Maximum agent loop iterations | `200` |
199 | 
200 | ### Model Configuration
201 | 
202 | | Option | Description | Default |
203 | |--------|-------------|---------|
204 | | `--model TEXT` | LLM model name | `$LLM_MODEL` |
205 | | `--api-key TEXT` | API key for authentication | `$OPENAI_API_KEY` |
206 | | `--base-url TEXT` | API base URL | `$LLM_BASE_URL` |
207 | | `--temperature FLOAT` | Model temperature (0.0-2.0, lower = more deterministic) | `0.0` |
208 | | `--max-tokens INT` | Maximum tokens in response | `null` |
209 | | `--timeout INT` | API request timeout (seconds) | `60` |
210 | 
211 | ### Logging
212 | 
213 | | Option | Description | Default |
214 | |--------|-------------|---------|
215 | | `--logs-dir PATH` | Log directory path | `~/.cfuse/logs` |
216 | | `-v, --verbose` | Enable verbose logging | `False` |
217 | | `--stream / --no-stream` | Enable/disable streaming output | `True` |
218 | 
219 | ### Other Options
220 | 
221 | | Option | Description |
222 | |--------|-------------|
223 | | `--config PATH` | Path to YAML configuration file |
224 | | `--bash-timeout INT` | Timeout for bash commands (seconds, default: 30) |
225 | | `--max-context-tokens INT` | Maximum context window size (default: 128000) |
226 | | `--enable-tools / --no-tools` | Enable/disable tool execution |
227 | | `--http` | Launch HTTP server mode |
228 | | `--http-port INT` | HTTP server port (default: 8000) |
229 | | `--help` | Show help message |
230 | 
231 | **Configuration Priority:** CLI args > Environment variables > Config file > Defaults
232 | 
233 | ## 📄 License
234 | 
235 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
236 | 


--------------------------------------------------------------------------------
/codefuse/tools/utils/ripgrep.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Ripgrep utility - Find and execute ripgrep commands
  3 | """
  4 | 
  5 | import subprocess
  6 | import shutil
  7 | import platform
  8 | import os
  9 | from typing import List, Optional
 10 | from pathlib import Path
 11 | 
 12 | from codefuse.observability import mainLogger
 13 | 
 14 | 
 15 | # Cache for ripgrep path
 16 | _ripgrep_path: Optional[str] = None
 17 | _ripgrep_type: Optional[str] = None  # 'system', 'python', or 'bundled'
 18 | 
 19 | 
 20 | def _get_bundled_ripgrep_path() -> Optional[Path]:
 21 |     """
 22 |     Get path to bundled ripgrep binary for current platform
 23 |     
 24 |     Directory structure: codefuse/tools/utils/ripgrep/{arch}-{platform}/rg
 25 |     Example: x64-darwin, arm64-darwin, x64-linux, arm64-linux, x64-win32
 26 |     
 27 |     Returns:
 28 |         Path to bundled ripgrep binary, or None if not available
 29 |     """
 30 |     try:
 31 |         # Detect platform
 32 |         system = platform.system().lower()  # 'linux', 'darwin', 'windows'
 33 |         machine = platform.machine().lower()  # 'x86_64', 'arm64', 'amd64', etc.
 34 |         
 35 |         # Normalize machine architecture
 36 |         if machine in ('amd64', 'x86_64', 'x64'):
 37 |             arch = 'x64'
 38 |         elif machine in ('arm64', 'aarch64'):
 39 |             arch = 'arm64'
 40 |         else:
 41 |             mainLogger.warning(f"Unsupported architecture for bundled ripgrep: {machine}")
 42 |             return None
 43 |         
 44 |         # Normalize platform name
 45 |         if system == 'darwin':
 46 |             platform_name = 'darwin'
 47 |         elif system == 'linux':
 48 |             platform_name = 'linux'
 49 |         elif system == 'windows':
 50 |             platform_name = 'win32'
 51 |         else:
 52 |             mainLogger.warning(f"Unsupported platform for bundled ripgrep: {system}")
 53 |             return None
 54 |         
 55 |         # Build directory name: {arch}-{platform}
 56 |         dir_name = f"{arch}-{platform_name}"
 57 |         
 58 |         # Get the package directory (where this file is located)
 59 |         utils_dir = Path(__file__).parent
 60 |         ripgrep_dir = utils_dir / 'ripgrep' / dir_name
 61 |         
 62 |         # Determine binary name
 63 |         binary_name = 'rg.exe' if system == 'windows' else 'rg'
 64 |         rg_binary = ripgrep_dir / binary_name
 65 |         
 66 |         # Check if binary exists
 67 |         if rg_binary.exists():
 68 |             # Make sure it's executable on Unix-like systems
 69 |             if system != 'windows':
 70 |                 try:
 71 |                     os.chmod(rg_binary, 0o755)
 72 |                 except Exception as e:
 73 |                     mainLogger.warning(f"Failed to set executable permission on {rg_binary}: {e}")
 74 |             
 75 |             mainLogger.info(f"Found bundled ripgrep at: {rg_binary}")
 76 |             return rg_binary
 77 |         else:
 78 |             mainLogger.debug(f"Bundled ripgrep not found at: {rg_binary}")
 79 |             return None
 80 |             
 81 |     except Exception as e:
 82 |         mainLogger.warning(f"Error locating bundled ripgrep: {e}")
 83 |         return None
 84 | 
 85 | 
 86 | def find_ripgrep() -> "tuple[Optional[str], Optional[str]]":
 87 |     """
 88 |     Find available ripgrep executable
 89 |     
 90 |     Priority:
 91 |     1. System-installed ripgrep (rg command)
 92 |     2. Python ripgrep-python package
 93 |     3. Bundled ripgrep binary (platform-specific)
 94 |     
 95 |     Returns:
 96 |         Tuple of (ripgrep_path, ripgrep_type) where type is 'system', 'python', or 'bundled'
 97 |         Returns (None, None) if ripgrep is not found
 98 |     """
 99 |     global _ripgrep_path, _ripgrep_type
100 |     
101 |     # Return cached result if available
102 |     if _ripgrep_path is not None:
103 |         return _ripgrep_path, _ripgrep_type
104 |     
105 |     # 1. Try system ripgrep
106 |     rg_path = shutil.which('rg')
107 |     if rg_path:
108 |         _ripgrep_path = rg_path
109 |         _ripgrep_type = 'system'
110 |         mainLogger.info(f"Found system ripgrep at: {rg_path}")
111 |         return _ripgrep_path, _ripgrep_type
112 |     
113 |     # 2. Try Python ripgrep-python package
114 |     try:
115 |         import ripgrep
116 |         # ripgrep-python provides a 'rg' function or path
117 |         if hasattr(ripgrep, 'rg'):
118 |             _ripgrep_path = 'ripgrep-python'
119 |             _ripgrep_type = 'python'
120 |             mainLogger.info("Found Python ripgrep-python package")
121 |             return _ripgrep_path, _ripgrep_type
122 |     except ImportError:
123 |         pass
124 |     
125 |     # 3. Try bundled ripgrep binary
126 |     bundled_path = _get_bundled_ripgrep_path()
127 |     if bundled_path:
128 |         _ripgrep_path = str(bundled_path)
129 |         _ripgrep_type = 'bundled'
130 |         mainLogger.info(f"Using bundled ripgrep: {bundled_path}")
131 |         return _ripgrep_path, _ripgrep_type
132 |     
133 |     # Not found
134 |     mainLogger.warning(
135 |         "Ripgrep not found. Please install ripgrep:\n"
136 |         "  - macOS: brew install ripgrep\n"
137 |         "  - Ubuntu/Debian: apt install ripgrep\n"
138 |         "  - Or: pip install ripgrep-python"
139 |     )
140 |     return None, None
141 | 
142 | 
143 | def execute_ripgrep(
144 |     args: List[str],
145 |     search_path: str,
146 |     timeout: Optional[float] = 30.0
147 | ) -> List[str]:
148 |     """
149 |     Execute ripgrep command and return output lines
150 |     
151 |     Args:
152 |         args: List of ripgrep arguments (without the 'rg' command itself)
153 |         search_path: Path to search in
154 |         timeout: Command timeout in seconds (default: 30.0)
155 |     
156 |     Returns:
157 |         List of output lines (stdout)
158 |         
159 |     Raises:
160 |         RuntimeError: If ripgrep is not found
161 |         subprocess.TimeoutExpired: If command times out
162 |         subprocess.CalledProcessError: If ripgrep returns non-zero exit code (except 1 for no matches)
163 |     """
164 |     rg_path, rg_type = find_ripgrep()
165 |     
166 |     if rg_path is None:
167 |         raise RuntimeError(
168 |             "Ripgrep is not available. Please install ripgrep:\n"
169 |             "  - macOS: brew install ripgrep\n"
170 |             "  - Ubuntu/Debian: apt install ripgrep\n"
171 |             "  - Or: pip install ripgrep-python"
172 |         )
173 |     
174 |     # Build command
175 |     if rg_type == 'system':
176 |         cmd = [rg_path] + args + ['--', search_path]
177 |     elif rg_type == 'bundled':
178 |         # Use the bundled binary path directly
179 |         cmd = [rg_path] + args + ['--', search_path]
180 |     elif rg_type == 'python':
181 |         # For ripgrep-python, we still use subprocess but with 'rg' command
182 |         # The package should have made 'rg' available
183 |         cmd = ['rg'] + args + ['--', search_path]
184 |     else:
185 |         raise RuntimeError(f"Unknown ripgrep type: {rg_type}")
186 |     
187 |     mainLogger.debug(f"Executing ripgrep: {' '.join(cmd)}")
188 |     
189 |     try:
190 |         # Run ripgrep command
191 |         result = subprocess.run(
192 |             cmd,
193 |             stdout=subprocess.PIPE,
194 |             stderr=subprocess.PIPE,
195 |             text=True,
196 |             timeout=timeout,
197 |             check=False,  # We'll handle exit codes manually
198 |         )
199 |         
200 |         # Exit code 0: matches found
201 |         # Exit code 1: no matches found (not an error)
202 |         # Exit code 2+: actual error
203 |         if result.returncode == 0:
204 |             # Matches found
205 |             lines = result.stdout.splitlines()
206 |             mainLogger.debug(f"Ripgrep found {len(lines)} result lines")
207 |             return lines
208 |         elif result.returncode == 1:
209 |             # No matches found (not an error for ripgrep)
210 |             mainLogger.debug("Ripgrep found no matches")
211 |             return []
212 |         else:
213 |             # Actual error
214 |             error_msg = result.stderr.strip() or f"Ripgrep exited with code {result.returncode}"
215 |             mainLogger.error(f"Ripgrep error: {error_msg}")
216 |             raise subprocess.CalledProcessError(
217 |                 result.returncode,
218 |                 cmd,
219 |                 output=result.stdout,
220 |                 stderr=result.stderr
221 |             )
222 |     
223 |     except subprocess.TimeoutExpired as e:
224 |         mainLogger.error(f"Ripgrep command timed out after {timeout}s")
225 |         raise
226 |     except FileNotFoundError as e:
227 |         # This shouldn't happen if find_ripgrep worked, but handle it
228 |         raise RuntimeError(f"Ripgrep executable not found: {e}")
229 | 
230 | 


--------------------------------------------------------------------------------
/codefuse/core/remote_tool_executor.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Remote Tool Executor - Handles tool execution via HTTP requests
  3 | """
  4 | 
  5 | import json
  6 | import time
  7 | from typing import Dict, Any, Optional
  8 | 
  9 | import requests
 10 | 
 11 | from codefuse.tools.base import ToolResult
 12 | from codefuse.observability import mainLogger
 13 | 
 14 | 
 15 | class RemoteToolExecutor:
 16 |     """
 17 |     Executes tools remotely via HTTP POST requests
 18 |     
 19 |     This executor sends tool calls to a remote service and receives
 20 |     the execution results over HTTP.
 21 |     """
 22 |     
 23 |     def __init__(
 24 |         self,
 25 |         url: str,
 26 |         instance_id: str,
 27 |         timeout: int = 60,
 28 |     ):
 29 |         """
 30 |         Initialize remote tool executor
 31 |         
 32 |         Args:
 33 |             url: Remote tool service URL
 34 |             instance_id: Instance ID for the remote execution environment
 35 |             timeout: Timeout for HTTP requests in seconds
 36 |         """
 37 |         self.url = url
 38 |         self.instance_id = instance_id
 39 |         self.timeout = timeout
 40 |         
 41 |         mainLogger.info(
 42 |             "RemoteToolExecutor initialized",
 43 |             url=url,
 44 |             instance_id=instance_id,
 45 |             timeout=timeout,
 46 |         )
 47 |     
 48 |     def execute(
 49 |         self,
 50 |         tool_name: str,
 51 |         tool_args: Dict[str, Any],
 52 |         session_id: str,
 53 |     ) -> ToolResult:
 54 |         """
 55 |         Execute a tool remotely
 56 |         
 57 |         Args:
 58 |             tool_name: Name of the tool to execute
 59 |             tool_args: Arguments for the tool
 60 |             session_id: Session ID for logging
 61 |             
 62 |         Returns:
 63 |             ToolResult containing the execution result
 64 |         """
 65 |         # Construct request payload
 66 |         payload = {
 67 |             "instance_id": self.instance_id,
 68 |             "toolName": tool_name,
 69 |             "toolArgs": tool_args,
 70 |         }
 71 |         
 72 |         mainLogger.info(
 73 |             "Sending remote tool call",
 74 |             tool_name=tool_name,
 75 |             instance_id=self.instance_id,
 76 |             url=self.url,
 77 |             payload=payload,
 78 |             session_id=session_id,
 79 |         )
 80 |         
 81 |         start_time = time.time()
 82 |         
 83 |         try:
 84 |             # Send POST request
 85 |             response = requests.post(
 86 |                 self.url,
 87 |                 json=payload,
 88 |                 headers={"Content-Type": "application/json"},
 89 |                 timeout=self.timeout,
 90 |             )
 91 |             
 92 |             response_time = time.time() - start_time
 93 |             
 94 |             mainLogger.info(
 95 |                 "Received remote tool response",
 96 |                 tool_name=tool_name,
 97 |                 status_code=response.status_code,
 98 |                 response_time_seconds=round(response_time, 2),
 99 |                 session_id=session_id,
100 |             )
101 |             
102 |             # Check HTTP status code
103 |             if response.status_code != 200:
104 |                 error_msg = f"Remote tool call failed with status {response.status_code}"
105 |                 mainLogger.error(
106 |                     "Remote tool call HTTP error",
107 |                     tool_name=tool_name,
108 |                     status_code=response.status_code,
109 |                     response_text=response.text[:500],  # Log first 500 chars
110 |                     session_id=session_id,
111 |                 )
112 |                 return ToolResult(
113 |                     content=f"Error: {error_msg}\nResponse: {response.text}",
114 |                     display=f"❌ Remote tool call failed (HTTP {response.status_code})",
115 |                 )
116 |             
117 |             # Parse JSON response
118 |             try:
119 |                 response_data = response.json()
120 |             except json.JSONDecodeError as e:
121 |                 mainLogger.error(
122 |                     "Failed to parse remote tool response JSON",
123 |                     tool_name=tool_name,
124 |                     error=str(e),
125 |                     response_text=response.text[:500],
126 |                     session_id=session_id,
127 |                     exc_info=True,
128 |                 )
129 |                 return ToolResult(
130 |                     content=f"Error: Failed to parse JSON response: {str(e)}",
131 |                     display=f"❌ Invalid JSON response from remote tool",
132 |                 )
133 |             
134 |             # Validate response structure
135 |             if "response" not in response_data:
136 |                 mainLogger.error(
137 |                     "Invalid remote tool response structure: missing 'response' field",
138 |                     tool_name=tool_name,
139 |                     response_data=response_data,
140 |                     session_id=session_id,
141 |                 )
142 |                 return ToolResult(
143 |                     content=f"Error: Invalid response structure: {json.dumps(response_data)}",
144 |                     display=f"❌ Invalid response format from remote tool",
145 |                 )
146 |             
147 |             response_inner = response_data["response"]
148 |             
149 |             # Extract result and success flag
150 |             result_content = response_inner.get("result", "")
151 |             success = response_inner.get("success", False)
152 |             
153 |             mainLogger.info(
154 |                 "Remote tool execution completed",
155 |                 tool_name=tool_name,
156 |                 success=success,
157 |                 result_length=len(result_content),
158 |                 session_id=session_id,
159 |             )
160 |             
161 |             # Return result
162 |             if success:
163 |                 return ToolResult(
164 |                     content=result_content,
165 |                     display=f"✓ Remote tool '{tool_name}' executed successfully",
166 |                 )
167 |             else:
168 |                 # Tool executed but reported failure
169 |                 mainLogger.warning(
170 |                     "Remote tool execution reported failure",
171 |                     tool_name=tool_name,
172 |                     result=result_content[:500],
173 |                     session_id=session_id,
174 |                 )
175 |                 return ToolResult(
176 |                     content=result_content,
177 |                     display=f"⚠ Remote tool '{tool_name}' completed with errors",
178 |                 )
179 |         
180 |         except requests.exceptions.Timeout:
181 |             error_msg = f"Remote tool call timed out after {self.timeout} seconds"
182 |             mainLogger.error(
183 |                 "Remote tool call timeout",
184 |                 tool_name=tool_name,
185 |                 timeout=self.timeout,
186 |                 session_id=session_id,
187 |             )
188 |             return ToolResult(
189 |                 content=f"Error: {error_msg}",
190 |                 display=f"❌ Remote tool call timed out",
191 |             )
192 |         
193 |         except requests.exceptions.ConnectionError as e:
194 |             error_msg = f"Connection error: {str(e)}"
195 |             mainLogger.error(
196 |                 "Remote tool call connection error",
197 |                 tool_name=tool_name,
198 |                 error=str(e),
199 |                 url=self.url,
200 |                 session_id=session_id,
201 |                 exc_info=True,
202 |             )
203 |             return ToolResult(
204 |                 content=f"Error: {error_msg}",
205 |                 display=f"❌ Failed to connect to remote tool service",
206 |             )
207 |         
208 |         except requests.exceptions.RequestException as e:
209 |             error_msg = f"Request error: {str(e)}"
210 |             mainLogger.error(
211 |                 "Remote tool call request error",
212 |                 tool_name=tool_name,
213 |                 error=str(e),
214 |                 session_id=session_id,
215 |                 exc_info=True,
216 |             )
217 |             return ToolResult(
218 |                 content=f"Error: {error_msg}",
219 |                 display=f"❌ Remote tool call failed",
220 |             )
221 |         
222 |         except Exception as e:
223 |             error_msg = f"Unexpected error: {str(e)}"
224 |             mainLogger.error(
225 |                 "Remote tool call unexpected error",
226 |                 tool_name=tool_name,
227 |                 error=str(e),
228 |                 session_id=session_id,
229 |                 exc_info=True,
230 |             )
231 |             return ToolResult(
232 |                 content=f"Error: {error_msg}",
233 |                 display=f"❌ Unexpected error in remote tool call",
234 |             )
235 | 
236 | 


--------------------------------------------------------------------------------
/codefuse/llm/base.py:
--------------------------------------------------------------------------------
  1 | """
  2 | LLM Base Classes and Data Structures
  3 | """
  4 | 
  5 | from dataclasses import dataclass, field
  6 | from typing import List, Optional, Union, Iterator, Literal, Any, Dict
  7 | from abc import ABC, abstractmethod
  8 | from enum import Enum
  9 | 
 10 | 
 11 | class MessageRole(str, Enum):
 12 |     """Message role in conversation"""
 13 |     SYSTEM = "system"
 14 |     USER = "user"
 15 |     ASSISTANT = "assistant"
 16 |     TOOL = "tool"
 17 | 
 18 | 
 19 | @dataclass
 20 | class ContentBlock:
 21 |     """Content block for multimodal messages"""
 22 |     type: str  # "text", "image_url", etc.
 23 |     text: Optional[str] = None
 24 |     image_url: Optional[Dict[str, Any]] = None
 25 | 
 26 | 
 27 | @dataclass
 28 | class ToolCall:
 29 |     """Tool call from the model"""
 30 |     id: str
 31 |     type: str  # "function"
 32 |     function: Dict[str, str]  # {"name": str, "arguments": str (JSON)}
 33 | 
 34 | 
 35 | @dataclass
 36 | class Message:
 37 |     """Unified message format"""
 38 |     role: MessageRole
 39 |     content: Union[str, List[ContentBlock]]
 40 |     name: Optional[str] = None
 41 |     tool_calls: Optional[List[ToolCall]] = None
 42 |     tool_call_id: Optional[str] = None  # For tool response messages
 43 |     
 44 |     def to_dict(self) -> Dict[str, Any]:
 45 |         """Convert to dictionary format"""
 46 |         result: Dict[str, Any] = {"role": self.role.value}
 47 |         
 48 |         if isinstance(self.content, str):
 49 |             result["content"] = self.content
 50 |         else:
 51 |             result["content"] = [
 52 |                 {k: v for k, v in block.__dict__.items() if v is not None}
 53 |                 for block in self.content
 54 |             ]
 55 |         
 56 |         if self.name:
 57 |             result["name"] = self.name
 58 |         if self.tool_calls:
 59 |             result["tool_calls"] = [
 60 |                 {
 61 |                     "id": tc.id,
 62 |                     "type": tc.type,
 63 |                     "function": tc.function
 64 |                 }
 65 |                 for tc in self.tool_calls
 66 |             ]
 67 |         if self.tool_call_id:
 68 |             result["tool_call_id"] = self.tool_call_id
 69 |         
 70 |         return result
 71 | 
 72 | 
 73 | @dataclass
 74 | class Tool:
 75 |     """Tool definition for function calling"""
 76 |     type: str = "function"
 77 |     function: Dict[str, Any] = field(default_factory=dict)  # {"name", "description", "parameters"}
 78 |     
 79 |     def to_dict(self) -> Dict[str, Any]:
 80 |         """Convert to dictionary format"""
 81 |         return {
 82 |             "type": self.type,
 83 |             "function": self.function
 84 |         }
 85 | 
 86 | 
 87 | @dataclass
 88 | class TokenUsage:
 89 |     """Token usage statistics"""
 90 |     prompt_tokens: int
 91 |     completion_tokens: int
 92 |     total_tokens: int
 93 |     # Optional cache-related tokens (for providers that support it)
 94 |     cache_creation_input_tokens: Optional[int] = None
 95 |     cache_read_input_tokens: Optional[int] = None
 96 |     
 97 |     def __str__(self) -> str:
 98 |         base = f"Tokens(prompt={self.prompt_tokens}, completion={self.completion_tokens}, total={self.total_tokens}"
 99 |         if self.cache_read_input_tokens:
100 |             base += f", cache_read={self.cache_read_input_tokens}"
101 |         if self.cache_creation_input_tokens:
102 |             base += f", cache_creation={self.cache_creation_input_tokens}"
103 |         return base + ")"
104 | 
105 | 
106 | @dataclass
107 | class LLMResponse:
108 |     """Unified LLM response format"""
109 |     content: str
110 |     tool_calls: List[ToolCall] = field(default_factory=list)
111 |     usage: Optional[TokenUsage] = None
112 |     model: str = ""
113 |     finish_reason: str = ""  # "stop", "tool_calls", "length", "content_filter", etc.
114 |     raw_response: Optional[Dict[str, Any]] = None  # Original response for debugging
115 |     
116 |     @property
117 |     def has_tool_calls(self) -> bool:
118 |         """Check if response contains tool calls"""
119 |         return len(self.tool_calls) > 0
120 | 
121 | 
122 | @dataclass
123 | class LLMChunk:
124 |     """Streaming chunk from LLM"""
125 |     type: Literal["content", "tool_call", "done"]
126 |     delta: str = ""  # Content delta
127 |     tool_call: Optional[ToolCall] = None
128 |     usage: Optional[TokenUsage] = None  # Only present in final "done" chunk
129 |     finish_reason: str = ""
130 | 
131 | 
132 | class BaseLLM(ABC):
133 |     """
134 |     Abstract base class for all LLM implementations
135 |     """
136 |     
137 |     def __init__(
138 |         self,
139 |         model: str,
140 |         api_key: str,
141 |         base_url: Optional[str] = None,
142 |         temperature: float = 0.0,
143 |         max_tokens: Optional[int] = None,
144 |         timeout: int = 60,
145 |         parallel_tool_calls: bool = True,
146 |         enable_thinking: bool = False,
147 |         top_k: Optional[int] = None,
148 |         top_p: Optional[float] = None,
149 |         **kwargs
150 |     ):
151 |         """
152 |         Initialize LLM instance
153 |         
154 |         Args:
155 |             model: Model identifier
156 |             api_key: API key for authentication
157 |             base_url: Base URL for API endpoint
158 |             temperature: Sampling temperature (0-2)
159 |             max_tokens: Maximum tokens to generate
160 |             timeout: Request timeout in seconds
161 |             parallel_tool_calls: Enable parallel tool calls (default: True)
162 |             enable_thinking: Enable thinking mode for models that support it (default: False)
163 |             top_k: Top-k sampling parameter (default: None)
164 |             top_p: Nucleus sampling parameter (0-1, default: None)
165 |             **kwargs: Additional provider-specific parameters
166 |         """
167 |         self.model = model
168 |         self.api_key = api_key
169 |         self.base_url = base_url
170 |         self.temperature = temperature
171 |         self.max_tokens = max_tokens
172 |         self.timeout = timeout
173 |         self.parallel_tool_calls = parallel_tool_calls
174 |         self.enable_thinking = enable_thinking
175 |         self.top_k = top_k
176 |         self.top_p = top_p
177 |         self.extra_params = kwargs
178 |     
179 |     @abstractmethod
180 |     def generate(
181 |         self,
182 |         messages: List[Message],
183 |         tools: Optional[List[Tool]] = None,
184 |         temperature: Optional[float] = None,
185 |         max_tokens: Optional[int] = None,
186 |         stream: bool = False,
187 |         **kwargs
188 |     ) -> Union[LLMResponse, Iterator[LLMChunk]]:
189 |         """
190 |         Generate completion from messages
191 |         
192 |         Args:
193 |             messages: List of conversation messages
194 |             tools: Optional list of tools/functions available to the model
195 |             temperature: Override default temperature
196 |             max_tokens: Override default max_tokens
197 |             stream: If True, return iterator of chunks; if False, return complete response
198 |             **kwargs: Additional generation parameters
199 |             
200 |         Returns:
201 |             LLMResponse for non-streaming, Iterator[LLMChunk] for streaming
202 |             
203 |         Raises:
204 |             RetryableError: For errors that should be retried (timeout, rate limit)
205 |             LLMError: For other errors
206 |         """
207 |         pass
208 |     
209 |     @property
210 |     def supports_prompt_caching(self) -> bool:
211 |         """Whether this provider supports prompt caching"""
212 |         return False
213 |     
214 |     @property
215 |     def supports_parallel_tools(self) -> bool:
216 |         """Whether this provider supports parallel tool calls"""
217 |         return True
218 |     
219 |     @property
220 |     def supports_streaming(self) -> bool:
221 |         """Whether this provider supports streaming responses"""
222 |         return True
223 |     
224 |     def format_messages_for_logging(
225 |         self, 
226 |         messages: List[Message],
227 |         tools: Optional[List[Tool]] = None
228 |     ) -> Dict[str, Any]:
229 |         """
230 |         Format messages and tools for logging purposes
231 |         
232 |         This method converts internal Message/Tool format to the provider's
233 |         API format for logging. Override in subclasses to customize.
234 |         
235 |         Args:
236 |             messages: List of messages
237 |             tools: Optional list of tools
238 |             
239 |         Returns:
240 |             Dict with 'messages' and optionally 'tools' in API format
241 |         """
242 |         # Default implementation: use Message.to_dict()
243 |         result = {
244 |             "messages": [msg.to_dict() for msg in messages]
245 |         }
246 |         
247 |         if tools:
248 |             result["tools"] = [tool.to_dict() for tool in tools]
249 |         
250 |         return result
251 |     
252 |     def _prepare_cache_control(
253 |         self,
254 |         messages: List[Message],
255 |         tools: Optional[List[Tool]] = None
256 |     ) -> List[Message]:
257 |         """
258 |         Automatically add prompt caching markers if supported.
259 |         Override in subclasses for provider-specific caching.
260 |         
261 |         Args:
262 |             messages: Original messages
263 |             tools: Optional tools
264 |             
265 |         Returns:
266 |             Messages with cache control markers added
267 |         """
268 |         # Default implementation: no modification
269 |         return messages
270 | 
271 | 


--------------------------------------------------------------------------------
/codefuse/core/agent_config.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Agent Configuration - Agent profiles and management
  3 | """
  4 | 
  5 | import re
  6 | from dataclasses import dataclass
  7 | from pathlib import Path
  8 | from typing import Optional, List, Dict
  9 | 
 10 | from codefuse.observability import mainLogger
 11 | 
 12 | 
 13 | @dataclass
 14 | class AgentProfile:
 15 |     """
 16 |     Agent profile defining behavior, tools, and model
 17 |     
 18 |     Loaded from Markdown files with YAML frontmatter
 19 |     """
 20 |     name: str
 21 |     description: str
 22 |     system_prompt: str
 23 |     tools: Optional[List[str]] = None  # None = inherit all tools
 24 |     model: Optional[str] = None  # None = use default model
 25 |     
 26 |     @classmethod
 27 |     def from_markdown(cls, path: str) -> "AgentProfile":
 28 |         """
 29 |         Load agent profile from Markdown file with YAML frontmatter
 30 |         
 31 |         Format:
 32 |         ```markdown
 33 |         ---
 34 |         name: agent-name
 35 |         description: Agent description
 36 |         tools: tool1, tool2, tool3  # Optional
 37 |         model: model-name  # Optional
 38 |         ---
 39 |         
 40 |         System prompt content...
 41 |         ```
 42 |         
 43 |         Args:
 44 |             path: Path to the Markdown file
 45 |             
 46 |         Returns:
 47 |             AgentProfile instance
 48 |         """
 49 |         file_path = Path(path)
 50 |         
 51 |         if not file_path.exists():
 52 |             raise FileNotFoundError(f"Agent profile not found: {path}")
 53 |         
 54 |         content = file_path.read_text(encoding='utf-8')
 55 |         
 56 |         # Parse YAML frontmatter and content
 57 |         frontmatter_pattern = r'^---\s*\n(.*?)\n---\s*\n(.*)$'
 58 |         match = re.match(frontmatter_pattern, content, re.DOTALL)
 59 |         
 60 |         if not match:
 61 |             raise ValueError(f"Invalid agent profile format in {path} (missing frontmatter)")
 62 |         
 63 |         frontmatter_str = match.group(1)
 64 |         system_prompt = match.group(2).strip()
 65 |         
 66 |         # Parse frontmatter (simple YAML parsing)
 67 |         frontmatter = {}
 68 |         for line in frontmatter_str.strip().split('\n'):
 69 |             if ':' in line:
 70 |                 key, value = line.split(':', 1)
 71 |                 key = key.strip()
 72 |                 value = value.strip()
 73 |                 
 74 |                 # Remove comments
 75 |                 if '#' in value:
 76 |                     value = value.split('#')[0].strip()
 77 |                 
 78 |                 # Handle null/None values
 79 |                 if value.lower() in ('null', 'none', ''):
 80 |                     value = None
 81 |                 
 82 |                 frontmatter[key] = value
 83 |         
 84 |         # Extract fields
 85 |         name = frontmatter.get('name')
 86 |         if not name:
 87 |             raise ValueError(f"Agent profile missing 'name' field in {path}")
 88 |         
 89 |         description = frontmatter.get('description', '')
 90 |         
 91 |         # Parse tools (comma-separated list or None)
 92 |         tools_str = frontmatter.get('tools')
 93 |         tools = None
 94 |         if tools_str:
 95 |             tools = [t.strip() for t in tools_str.split(',') if t.strip()]
 96 |         
 97 |         # Parse model
 98 |         model = frontmatter.get('model')
 99 |         if model and model.lower() in ('inherit', 'default'):
100 |             model = None
101 |         
102 |         mainLogger.info("Loaded agent profile", name=name, path=str(path))
103 |         
104 |         return cls(
105 |             name=name,
106 |             description=description,
107 |             system_prompt=system_prompt,
108 |             tools=tools,
109 |             model=model,
110 |         )
111 |     
112 |     @classmethod
113 |     def get_builtin_agent(cls) -> "AgentProfile":
114 |         """
115 |         Get the built-in default agent profile
116 |         
117 |         Returns:
118 |             Default AgentProfile
119 |         """
120 |         return cls(
121 |             name="default",
122 |             description="Default coding assistant for general development tasks",
123 |             system_prompt="""You are CodeFuse, an AI coding assistant designed to help developers with their coding tasks. You have access to tools that allow you to read and write files in the workspace.
124 | 
125 | Your approach:
126 | 1. Carefully analyze the user's request
127 | 2. Use available tools to gather necessary information
128 | 3. Propose clear, well-thought-out solutions
129 | 4. Execute changes carefully and verify results
130 | 
131 | When modifying files:
132 | - Always read files before modifying them
133 | - Make precise, targeted changes
134 | - Explain what you're doing and why
135 | 
136 | Be concise, accurate, and helpful.""",
137 |             tools=None,  # Inherits all available tools
138 |             model=None,  # Uses default model from config
139 |         )
140 |     
141 |     def get_tool_list(self, all_tools: List[str]) -> List[str]:
142 |         """
143 |         Get the list of tools available to this agent
144 |         
145 |         Args:
146 |             all_tools: List of all available tool names
147 |             
148 |         Returns:
149 |             List of tool names this agent can use
150 |         """
151 |         if self.tools is None:
152 |             # Inherit all tools
153 |             return all_tools
154 |         else:
155 |             # Return intersection of requested tools and available tools
156 |             return [t for t in self.tools if t in all_tools]
157 |     
158 |     def get_model_name(
159 |         self,
160 |         default_model: str,
161 |         model_aliases: Optional[Dict[str, str]] = None
162 |     ) -> str:
163 |         """
164 |         Get the model name to use for this agent
165 |         
166 |         Args:
167 |             default_model: Default model name to use if not specified
168 |             model_aliases: Optional mapping of aliases to model names
169 |                           (e.g., {"sonnet": "claude-3-5-sonnet-20241022"})
170 |         
171 |         Returns:
172 |             Resolved model name
173 |         """
174 |         if self.model is None:
175 |             return default_model
176 |         
177 |         # Check if it's an alias
178 |         if model_aliases and self.model in model_aliases:
179 |             return model_aliases[self.model]
180 |         
181 |         # Return as-is
182 |         return self.model
183 | 
184 | 
185 | class AgentProfileManager:
186 |     """
187 |     Manager for agent profiles
188 |     
189 |     Loads built-in and user-defined agent profiles from disk.
190 |     """
191 |     
192 |     def __init__(self, agent_dir: str = "~/.cfuse/agents"):
193 |         """
194 |         Initialize agent profile manager
195 |         
196 |         Args:
197 |             agent_dir: Directory containing user-defined agent profiles
198 |         """
199 |         self.agent_dir = Path(agent_dir).expanduser()
200 |         self._profiles: Dict[str, AgentProfile] = {}
201 |         
202 |         # Load built-in agent
203 |         self._load_builtin_agent()
204 |         
205 |         # Load user agents
206 |         self._load_user_agents()
207 |         
208 |         mainLogger.info("AgentProfileManager initialized", profile_count=len(self._profiles))
209 |     
210 |     def _load_builtin_agent(self) -> None:
211 |         """Load the built-in default agent"""
212 |         default_agent = AgentProfile.get_builtin_agent()
213 |         self._profiles[default_agent.name] = default_agent
214 |         mainLogger.debug("Loaded built-in default agent")
215 |     
216 |     def _load_user_agents(self) -> None:
217 |         """Load user-defined agents from agent_dir"""
218 |         if not self.agent_dir.exists():
219 |             mainLogger.debug("Agent directory does not exist", agent_dir=str(self.agent_dir))
220 |             return
221 |         
222 |         # Look for .md files
223 |         for file_path in self.agent_dir.glob("*.md"):
224 |             try:
225 |                 agent = AgentProfile.from_markdown(str(file_path))
226 |                 self._profiles[agent.name] = agent
227 |                 mainLogger.info("Loaded user agent", name=agent.name)
228 |             except Exception as e:
229 |                 mainLogger.error("Failed to load agent", path=str(file_path), error=str(e))
230 |     
231 |     def get_agent(self, name: str) -> Optional[AgentProfile]:
232 |         """
233 |         Get an agent profile by name
234 |         
235 |         Args:
236 |             name: Agent name
237 |             
238 |         Returns:
239 |             AgentProfile if found, None otherwise
240 |         """
241 |         return self._profiles.get(name)
242 |     
243 |     def list_agents(self) -> List[str]:
244 |         """
245 |         List all available agent names
246 |         
247 |         Returns:
248 |             List of agent names
249 |         """
250 |         return list(self._profiles.keys())
251 |     
252 |     def get_agent_info(self, name: str) -> Optional[str]:
253 |         """
254 |         Get human-readable information about an agent
255 |         
256 |         Args:
257 |             name: Agent name
258 |             
259 |         Returns:
260 |             Formatted string with agent info, or None if not found
261 |         """
262 |         agent = self.get_agent(name)
263 |         if not agent:
264 |             return None
265 |         
266 |         lines = [
267 |             f"Agent: {agent.name}",
268 |             f"Description: {agent.description}",
269 |         ]
270 |         
271 |         if agent.model:
272 |             lines.append(f"Model: {agent.model}")
273 |         else:
274 |             lines.append("Model: (inherits from config)")
275 |         
276 |         if agent.tools:
277 |             lines.append(f"Tools: {', '.join(agent.tools)}")
278 |         else:
279 |             lines.append("Tools: (all available)")
280 |         
281 |         return "\n".join(lines)
282 | 
283 | 


--------------------------------------------------------------------------------
/codefuse/core/environment.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Environment Information Collection
  3 | """
  4 | 
  5 | import os
  6 | import platform
  7 | import sys
  8 | import subprocess
  9 | from dataclasses import dataclass
 10 | from typing import Optional
 11 | from pathlib import Path
 12 | 
 13 | from codefuse.observability import mainLogger
 14 | 
 15 | 
 16 | @dataclass
 17 | class EnvironmentInfo:
 18 |     """
 19 |     Information about the current environment
 20 |     
 21 |     This information is used to provide context to the agent about
 22 |     the system it's running on.
 23 |     """
 24 |     os_type: str  # "darwin", "linux", "windows"
 25 |     os_version: str
 26 |     python_version: str
 27 |     cwd: str
 28 |     git_branch: Optional[str] = None
 29 |     git_status: Optional[str] = None
 30 |     
 31 |     def to_context_string(self) -> str:
 32 |         """
 33 |         Convert environment info to a formatted string for system prompt
 34 |         
 35 |         Returns:
 36 |             Formatted string describing the environment
 37 |         """
 38 |         lines = [
 39 |             "# Environment Information",
 40 |             f"- OS: {self.os_type} {self.os_version}",
 41 |             f"- Python: {self.python_version}",
 42 |             f"- Working Directory: {self.cwd}",
 43 |         ]
 44 |         
 45 |         if self.git_branch:
 46 |             lines.append(f"- Git Branch: {self.git_branch}")
 47 |         
 48 |         if self.git_status:
 49 |             lines.append(f"- Git Status:\n{self.git_status}")
 50 |         
 51 |         return "\n".join(lines)
 52 |     
 53 |     @classmethod
 54 |     def collect(cls, cwd: Optional[str] = None) -> "EnvironmentInfo":
 55 |         """
 56 |         Collect current environment information
 57 |         
 58 |         Args:
 59 |             cwd: Working directory (defaults to current directory)
 60 |             
 61 |         Returns:
 62 |             EnvironmentInfo instance
 63 |         """
 64 |         if cwd is None:
 65 |             cwd = os.getcwd()
 66 |         
 67 |         cwd_path = Path(cwd).resolve()
 68 |         
 69 |         # Collect OS information
 70 |         os_type = platform.system().lower()
 71 |         os_version = platform.release()
 72 |         
 73 |         # Collect Python version
 74 |         python_version = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
 75 |         
 76 |         # Try to collect Git information
 77 |         git_branch = cls._get_git_branch(cwd_path)
 78 |         git_status = cls._get_git_status(cwd_path)
 79 |         
 80 |         mainLogger.info(
 81 |             "Collected environment info",
 82 |             os_type=os_type,
 83 |             os_version=os_version,
 84 |             python_version=python_version,
 85 |         )
 86 |         
 87 |         return cls(
 88 |             os_type=os_type,
 89 |             os_version=os_version,
 90 |             python_version=python_version,
 91 |             cwd=str(cwd_path),
 92 |             git_branch=git_branch,
 93 |             git_status=git_status,
 94 |         )
 95 |     
 96 |     @staticmethod
 97 |     def _get_git_branch(cwd: Path) -> Optional[str]:
 98 |         """
 99 |         Get current git branch if in a git repository
100 |         
101 |         Args:
102 |             cwd: Working directory
103 |             
104 |         Returns:
105 |             Branch name or None
106 |         """
107 |         try:
108 |             result = subprocess.run(
109 |                 ["git", "rev-parse", "--abbrev-ref", "HEAD"],
110 |                 cwd=cwd,
111 |                 capture_output=True,
112 |                 text=True,
113 |                 timeout=2,
114 |             )
115 |             if result.returncode == 0:
116 |                 return result.stdout.strip()
117 |         except Exception as e:
118 |             mainLogger.debug("Failed to get git branch", error=str(e))
119 |         
120 |         return None
121 |     
122 |     @staticmethod
123 |     def _get_git_status(cwd: Path) -> Optional[str]:
124 |         """
125 |         Get git status if in a git repository
126 |         
127 |         Args:
128 |             cwd: Working directory
129 |             
130 |         Returns:
131 |             Git status output or None
132 |         """
133 |         try:
134 |             result = subprocess.run(
135 |                 ["git", "status", "--short"],
136 |                 cwd=cwd,
137 |                 capture_output=True,
138 |                 text=True,
139 |                 timeout=2,
140 |             )
141 |             if result.returncode == 0:
142 |                 status = result.stdout.strip()
143 |                 if status:
144 |                     return status
145 |                 else:
146 |                     return "Clean (no changes)"
147 |         except Exception as e:
148 |             mainLogger.debug("Failed to get git status", error=str(e))
149 |         
150 |         return None
151 |     
152 |     @staticmethod
153 |     def _get_git_diff_stats(cwd: Path) -> Optional[dict]:
154 |         """
155 |         Get git diff statistics using git add -A and git diff --cached --numstat
156 |         
157 |         This captures all changes including untracked files.
158 |         
159 |         Args:
160 |             cwd: Working directory
161 |             
162 |         Returns:
163 |             Dict with stats and file-level changes, or None
164 |         """
165 |         try:
166 |             # Stage all changes
167 |             add_result = subprocess.run(
168 |                 ["git", "add", "-A"],
169 |                 cwd=cwd,
170 |                 capture_output=True,
171 |                 text=True,
172 |                 timeout=10,
173 |             )
174 |             if add_result.returncode != 0:
175 |                 mainLogger.debug("Failed to stage changes", error=add_result.stderr)
176 |                 return None
177 |             
178 |             # Get numstat for staged changes
179 |             numstat_result = subprocess.run(
180 |                 ["git", "diff", "--cached", "--numstat"],
181 |                 cwd=cwd,
182 |                 capture_output=True,
183 |                 text=True,
184 |                 timeout=10,
185 |             )
186 |             
187 |             if numstat_result.returncode != 0:
188 |                 return None
189 |             
190 |             numstat_output = numstat_result.stdout.strip()
191 |             if not numstat_output:
192 |                 return None
193 |             
194 |             # Parse numstat output
195 |             files = []
196 |             total_insertions = 0
197 |             total_deletions = 0
198 |             
199 |             for line in numstat_output.split('\n'):
200 |                 if not line:
201 |                     continue
202 |                 parts = line.split('\t')
203 |                 if len(parts) >= 3:
204 |                     insertions = int(parts[0]) if parts[0] != '-' else 0
205 |                     deletions = int(parts[1]) if parts[1] != '-' else 0
206 |                     path = parts[2]
207 |                     
208 |                     files.append({
209 |                         "path": path,
210 |                         "insertions": insertions,
211 |                         "deletions": deletions,
212 |                     })
213 |                     
214 |                     total_insertions += insertions
215 |                     total_deletions += deletions
216 |             
217 |             return {
218 |                 "stats": {
219 |                     "files_changed": len(files),
220 |                     "insertions": total_insertions,
221 |                     "deletions": total_deletions,
222 |                 },
223 |                 "files": files,
224 |             }
225 |         except Exception as e:
226 |             mainLogger.debug("Failed to get git diff stats", error=str(e))
227 |             return None
228 |     
229 |     @staticmethod
230 |     def _get_git_diff_text(cwd: Path) -> Optional[str]:
231 |         """
232 |         Get full git diff text for staged changes
233 |         
234 |         Note: This assumes git add -A has already been called by _get_git_diff_stats()
235 |         
236 |         Args:
237 |             cwd: Working directory
238 |             
239 |         Returns:
240 |             Full diff text or None
241 |         """
242 |         try:
243 |             # Get diff for staged changes
244 |             diff_result = subprocess.run(
245 |                 ["git", "diff", "--cached"],
246 |                 cwd=cwd,
247 |                 capture_output=True,
248 |                 text=True,
249 |                 timeout=10,
250 |             )
251 |             
252 |             if diff_result.returncode == 0 and diff_result.stdout.strip():
253 |                 return diff_result.stdout.strip()
254 |             
255 |             return None
256 |         except Exception as e:
257 |             mainLogger.debug("Failed to get git diff text", error=str(e))
258 |             return None
259 |     
260 |     @staticmethod
261 |     def get_git_diff_info(cwd: Optional[str] = None) -> Optional[dict]:
262 |         """
263 |         Get complete git diff information using git add -A and git diff --cached
264 |         
265 |         Note: This will stage all changes in the repository.
266 |         
267 |         Args:
268 |             cwd: Working directory (defaults to current directory)
269 |             
270 |         Returns:
271 |             Dictionary with stats, file list, and full diff text, or None if not a git repo
272 |             {
273 |                 "stats": {
274 |                     "files_changed": 3,
275 |                     "insertions": 45,
276 |                     "deletions": 12
277 |                 },
278 |                 "files": [
279 |                     {
280 |                         "path": "file.py",
281 |                         "insertions": 30,
282 |                         "deletions": 5
283 |                     }
284 |                 ],
285 |                 "diff_text": "diff --git ..."
286 |             }
287 |         """
288 |         if cwd is None:
289 |             cwd = os.getcwd()
290 |         
291 |         cwd_path = Path(cwd).resolve()
292 |         
293 |         # Get stats (this will also run git add -A)
294 |         diff_info = EnvironmentInfo._get_git_diff_stats(cwd_path)
295 |         if diff_info is None:
296 |             return None
297 |         
298 |         # Get full diff text (uses git diff --cached)
299 |         diff_text = EnvironmentInfo._get_git_diff_text(cwd_path)
300 |         if diff_text:
301 |             diff_info["diff_text"] = diff_text
302 |         
303 |         return diff_info
304 | 
305 | 


--------------------------------------------------------------------------------
/codefuse/observability/http_logger.py:
--------------------------------------------------------------------------------
  1 | """
  2 | HTTP Server Log Management - File-based logging with rotation and cleanup
  3 | 
  4 | Features:
  5 | - Dual format logging: text (access.log) and JSON (access-YYYYMMDD.json)
  6 | - Daily log rotation
  7 | - Automatic cleanup of old logs (default: 7 days retention)
  8 | - Thread-safe for Gunicorn multi-worker setup
  9 | """
 10 | 
 11 | import json
 12 | import os
 13 | import threading
 14 | import time
 15 | from datetime import datetime, timezone, timedelta
 16 | from pathlib import Path
 17 | from typing import Optional, Dict, Any
 18 | import atexit
 19 | 
 20 | 
 21 | class HTTPLogger:
 22 |     """Thread-safe HTTP request logger with rotation and cleanup"""
 23 |     
 24 |     def __init__(self, log_dir: str, retention_days: int = 7, cleanup_interval: int = 3600):
 25 |         """
 26 |         Initialize HTTP logger
 27 |         
 28 |         Args:
 29 |             log_dir: Base directory for log files
 30 |             retention_days: Number of days to retain logs (default: 7)
 31 |             cleanup_interval: Cleanup check interval in seconds (default: 3600)
 32 |         """
 33 |         self.log_dir = Path(log_dir).expanduser()
 34 |         self.log_dir.mkdir(parents=True, exist_ok=True)
 35 |         
 36 |         self.retention_days = retention_days
 37 |         self.cleanup_interval = cleanup_interval
 38 |         
 39 |         # File paths
 40 |         self.access_log_path = self.log_dir / "access.log"
 41 |         self.error_log_path = self.log_dir / "error.log"
 42 |         
 43 |         # Thread safety
 44 |         self._write_lock = threading.Lock()
 45 |         self._cleanup_thread: Optional[threading.Thread] = None
 46 |         self._stop_cleanup = threading.Event()
 47 |         
 48 |         # Current date for rotation check
 49 |         self._current_date = datetime.now().date()
 50 |         
 51 |         # Register cleanup on exit
 52 |         atexit.register(self.stop_cleanup_thread)
 53 |     
 54 |     def _get_json_log_path(self, date: Optional[datetime] = None) -> Path:
 55 |         """Get JSON log file path for a specific date"""
 56 |         if date is None:
 57 |             date = datetime.now()
 58 |         date_str = date.strftime("%Y%m%d")
 59 |         return self.log_dir / f"access-{date_str}.json"
 60 |     
 61 |     def _format_text_log(
 62 |         self,
 63 |         request_id: str,
 64 |         method: str,
 65 |         path: str,
 66 |         status: int,
 67 |         duration: float,
 68 |         tool_name: Optional[str] = None,
 69 |         workdir: Optional[str] = None,
 70 |     ) -> str:
 71 |         """Format log entry as human-readable text"""
 72 |         timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
 73 |         tool_info = f" | tool:{tool_name}" if tool_name else ""
 74 |         workdir_info = f" | wd:{workdir}" if workdir else ""
 75 |         return f"{timestamp} | {method} {path} | {status} | {duration:.3f}s | {request_id}{tool_info}{workdir_info}\n"
 76 |     
 77 |     def _format_json_log(
 78 |         self,
 79 |         request_id: str,
 80 |         method: str,
 81 |         path: str,
 82 |         status: int,
 83 |         duration: float,
 84 |         tool_name: Optional[str] = None,
 85 |         tool_args: Optional[Dict[str, Any]] = None,
 86 |         workdir: Optional[str] = None,
 87 |         success: Optional[bool] = None,
 88 |         error: Optional[str] = None,
 89 |     ) -> str:
 90 |         """Format log entry as JSON"""
 91 |         log_data = {
 92 |             "timestamp": datetime.now(timezone.utc).isoformat(),
 93 |             "request_id": request_id,
 94 |             "method": method,
 95 |             "path": path,
 96 |             "status": status,
 97 |             "duration": round(duration, 3),
 98 |         }
 99 |         
100 |         if tool_name:
101 |             log_data["tool_name"] = tool_name
102 |         if tool_args:
103 |             log_data["tool_args"] = tool_args
104 |         if workdir:
105 |             log_data["workdir"] = workdir
106 |         if success is not None:
107 |             log_data["success"] = success
108 |         if error:
109 |             log_data["error"] = error
110 |         
111 |         return json.dumps(log_data, ensure_ascii=False) + "\n"
112 |     
113 |     def log_request(
114 |         self,
115 |         request_id: str,
116 |         method: str,
117 |         path: str,
118 |         status: int,
119 |         duration: float,
120 |         tool_name: Optional[str] = None,
121 |         tool_args: Optional[Dict[str, Any]] = None,
122 |         workdir: Optional[str] = None,
123 |         success: Optional[bool] = None,
124 |         error: Optional[str] = None,
125 |     ) -> None:
126 |         """
127 |         Log HTTP request to both text and JSON files
128 |         
129 |         Thread-safe for concurrent writes from multiple workers.
130 |         """
131 |         with self._write_lock:
132 |             try:
133 |                 # Check if date has changed (rotation needed)
134 |                 current_date = datetime.now().date()
135 |                 if current_date != self._current_date:
136 |                     self._current_date = current_date
137 |                 
138 |                 # Write text log
139 |                 text_entry = self._format_text_log(
140 |                     request_id, method, path, status, duration, tool_name, workdir
141 |                 )
142 |                 with open(self.access_log_path, 'a', encoding='utf-8') as f:
143 |                     f.write(text_entry)
144 |                 
145 |                 # Write JSON log
146 |                 json_entry = self._format_json_log(
147 |                     request_id, method, path, status, duration,
148 |                     tool_name, tool_args, workdir, success, error
149 |                 )
150 |                 json_log_path = self._get_json_log_path()
151 |                 with open(json_log_path, 'a', encoding='utf-8') as f:
152 |                     f.write(json_entry)
153 |                 
154 |             except Exception as e:
155 |                 # Avoid blocking the request if logging fails
156 |                 print(f"[HTTPLogger] Failed to write log: {e}", flush=True)
157 |     
158 |     def log_error(
159 |         self,
160 |         request_id: str,
161 |         error: str,
162 |         traceback: Optional[str] = None,
163 |         method: Optional[str] = None,
164 |         path: Optional[str] = None,
165 |     ) -> None:
166 |         """Log error to error log file"""
167 |         with self._write_lock:
168 |             try:
169 |                 error_data = {
170 |                     "timestamp": datetime.now(timezone.utc).isoformat(),
171 |                     "request_id": request_id,
172 |                     "error": error,
173 |                 }
174 |                 
175 |                 if method:
176 |                     error_data["method"] = method
177 |                 if path:
178 |                     error_data["path"] = path
179 |                 if traceback:
180 |                     error_data["traceback"] = traceback
181 |                 
182 |                 error_entry = json.dumps(error_data, ensure_ascii=False) + "\n"
183 |                 with open(self.error_log_path, 'a', encoding='utf-8') as f:
184 |                     f.write(error_entry)
185 |                 
186 |             except Exception as e:
187 |                 print(f"[HTTPLogger] Failed to write error log: {e}", flush=True)
188 |     
189 |     def _cleanup_old_logs(self) -> None:
190 |         """Delete log files older than retention_days"""
191 |         try:
192 |             cutoff_date = datetime.now() - timedelta(days=self.retention_days)
193 |             
194 |             # Find and delete old JSON log files
195 |             pattern = "access-*.json"
196 |             for log_file in self.log_dir.glob(pattern):
197 |                 try:
198 |                     # Extract date from filename: access-20251120.json
199 |                     date_str = log_file.stem.split('-', 1)[1]  # "20251120"
200 |                     file_date = datetime.strptime(date_str, "%Y%m%d")
201 |                     
202 |                     if file_date < cutoff_date:
203 |                         log_file.unlink()
204 |                         print(f"[HTTPLogger] Deleted old log: {log_file.name}", flush=True)
205 |                 
206 |                 except (ValueError, IndexError) as e:
207 |                     # Skip files with invalid date format
208 |                     print(f"[HTTPLogger] Skipping invalid log file: {log_file.name} ({e})", flush=True)
209 |         
210 |         except Exception as e:
211 |             print(f"[HTTPLogger] Cleanup failed: {e}", flush=True)
212 |     
213 |     def _cleanup_worker(self) -> None:
214 |         """Background thread worker for periodic cleanup"""
215 |         print(f"[HTTPLogger] Cleanup thread started (interval: {self.cleanup_interval}s, retention: {self.retention_days} days)", flush=True)
216 |         
217 |         while not self._stop_cleanup.wait(timeout=self.cleanup_interval):
218 |             self._cleanup_old_logs()
219 |         
220 |         print("[HTTPLogger] Cleanup thread stopped", flush=True)
221 |     
222 |     def start_cleanup_thread(self) -> None:
223 |         """Start background cleanup thread"""
224 |         if self._cleanup_thread is not None and self._cleanup_thread.is_alive():
225 |             print("[HTTPLogger] Cleanup thread already running", flush=True)
226 |             return
227 |         
228 |         # Run initial cleanup
229 |         self._cleanup_old_logs()
230 |         
231 |         # Start background thread
232 |         self._cleanup_thread = threading.Thread(
233 |             target=self._cleanup_worker,
234 |             daemon=True,
235 |             name="HTTPLoggerCleanup"
236 |         )
237 |         self._cleanup_thread.start()
238 |     
239 |     def stop_cleanup_thread(self) -> None:
240 |         """Stop background cleanup thread gracefully"""
241 |         if self._cleanup_thread is None or not self._cleanup_thread.is_alive():
242 |             return
243 |         
244 |         print("[HTTPLogger] Stopping cleanup thread...", flush=True)
245 |         self._stop_cleanup.set()
246 |         self._cleanup_thread.join(timeout=5)
247 | 
248 | 
249 | def create_http_logger(
250 |     log_dir: Optional[str] = None,
251 |     retention_days: Optional[int] = None,
252 |     cleanup_interval: Optional[int] = None,
253 | ) -> HTTPLogger:
254 |     """
255 |     Create and configure HTTP logger from environment variables
256 |     
257 |     Environment variables:
258 |         CFUSE_HTTP_LOG_DIR: Log directory (default: ~/.cfuse/logs/http_server)
259 |         CFUSE_HTTP_LOG_RETENTION_DAYS: Retention in days (default: 7)
260 |         CFUSE_HTTP_LOG_CLEANUP_INTERVAL: Cleanup interval in seconds (default: 3600)
261 |     
262 |     Args:
263 |         log_dir: Override log directory
264 |         retention_days: Override retention days
265 |         cleanup_interval: Override cleanup interval
266 |     
267 |     Returns:
268 |         Configured HTTPLogger instance
269 |     """
270 |     if log_dir is None:
271 |         log_dir = os.getenv("CFUSE_HTTP_LOG_DIR", "~/.cfuse/logs/http_server")
272 |     
273 |     if retention_days is None:
274 |         retention_days = int(os.getenv("CFUSE_HTTP_LOG_RETENTION_DAYS", "7"))
275 |     
276 |     if cleanup_interval is None:
277 |         cleanup_interval = int(os.getenv("CFUSE_HTTP_LOG_CLEANUP_INTERVAL", "3600"))
278 |     
279 |     return HTTPLogger(log_dir, retention_days, cleanup_interval)
280 | 
281 | 


--------------------------------------------------------------------------------
/codefuse/tools/builtin/read_file.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Read File Tool - Read file contents from the workspace
  3 | """
  4 | 
  5 | from pathlib import Path
  6 | from typing import Optional, TYPE_CHECKING
  7 | 
  8 | from codefuse.tools.base import BaseTool, ToolDefinition, ToolParameter, ToolResult
  9 | from codefuse.tools.builtin.filesystem_base import FileSystemToolMixin, MAX_TOKENS, MAX_FILE_SIZE_BYTES
 10 | from codefuse.observability import mainLogger
 11 | 
 12 | if TYPE_CHECKING:
 13 |     from codefuse.core.read_tracker import ReadTracker
 14 | 
 15 | 
 16 | # Read-specific limits
 17 | DEFAULT_MAX_LINES = 1000  # Default maximum lines to read if no range specified
 18 | 
 19 | 
 20 | class ReadFileTool(FileSystemToolMixin, BaseTool):
 21 |     """
 22 |     Tool for reading file contents
 23 |     
 24 |     Features:
 25 |     - Read entire file or specific line range
 26 |     - Safety checks for file existence and readability
 27 |     - Prevents reading binary files
 28 |     - File size and token limits
 29 |     - Workspace root directory restriction
 30 |     - Line number formatting for LLM context
 31 |     """
 32 |     
 33 |     def __init__(
 34 |         self,
 35 |         workspace_root: Optional[Path] = None,
 36 |         read_tracker: Optional["ReadTracker"] = None,
 37 |     ):
 38 |         """
 39 |         Initialize ReadFileTool
 40 |         
 41 |         Args:
 42 |             workspace_root: Workspace root directory to restrict file access.
 43 |                           Defaults to current working directory.
 44 |             read_tracker: Optional read tracker for tracking read files.
 45 |         """
 46 |         super().__init__(workspace_root=workspace_root)
 47 |         self._read_tracker = read_tracker
 48 |     
 49 |     @property
 50 |     def definition(self) -> ToolDefinition:
 51 |         """Define the read_file tool"""
 52 |         return ToolDefinition(
 53 |             name="read_file",
 54 |             description=(
 55 |                 "Reads a file from the local filesystem. You can access any file directly by using this tool.\n\n"
 56 |                 "Important:\n"
 57 |                 "- The path parameter MUST be an absolute path, not a relative path\n"
 58 |                 "- By default, it reads up to 1000 lines starting from the beginning of the file\n"
 59 |                 "- You can optionally specify a line offset and limit (especially handy for long files), but it's recommended to read the whole file by not providing these parameters\n"
 60 |                 "- Results are returned with line numbers starting at 1\n"
 61 |             ),
 62 |             parameters=[
 63 |                 ToolParameter(
 64 |                     name="path",
 65 |                     type="string",
 66 |                     description="Absolute path to the file to read",
 67 |                     required=True,
 68 |                 ),
 69 |                 ToolParameter(
 70 |                     name="start_line",
 71 |                     type="number",
 72 |                     description="Starting line number",
 73 |                     required=False,
 74 |                 ),
 75 |                 ToolParameter(
 76 |                     name="end_line",
 77 |                     type="number",
 78 |                     description="Ending line number",
 79 |                     required=False,
 80 |                 ),
 81 |             ],
 82 |             requires_confirmation=False,  # Reading is safe
 83 |         )
 84 |     
 85 |     def _check_file_size(self, file_path: Path, has_pagination: bool) -> Optional[str]:
 86 |         """
 87 |         Check if file size exceeds limit
 88 |         
 89 |         Args:
 90 |             file_path: Path to the file
 91 |             has_pagination: Whether pagination parameters are provided
 92 |             
 93 |         Returns:
 94 |             Error message if file is too large and no pagination, None otherwise
 95 |         """
 96 |         file_size = file_path.stat().st_size
 97 |         
 98 |         # If file is too large and no pagination is provided
 99 |         if file_size > MAX_FILE_SIZE_BYTES and not has_pagination:
100 |             size_kb = file_size / 1024
101 |             max_kb = MAX_FILE_SIZE_BYTES / 1024
102 |             return (
103 |                 f"File size ({size_kb:.1f}KB) exceeds maximum ({max_kb:.0f}KB). "
104 |                 f"Please use start_line and end_line parameters to read specific portions."
105 |             )
106 |         return None
107 |     
108 |     
109 |     def execute(
110 |         self,
111 |         path: str,
112 |         start_line: Optional[int] = None,
113 |         end_line: Optional[int] = None,
114 |         **kwargs
115 |     ) -> ToolResult:
116 |         """
117 |         Execute the read_file tool
118 |         
119 |         Args:
120 |             path: Absolute path to the file to read
121 |             start_line: Optional starting line (1-indexed)
122 |             end_line: Optional ending line (1-indexed)
123 |             
124 |         Returns:
125 |             ToolResult with:
126 |                 - content: Formatted file contents with line numbers for LLM
127 |                 - display: Summary message for user
128 |         """
129 |         try:
130 |             # Step 1: Check if path is absolute
131 |             if error := self._check_absolute_path(path):
132 |                 return ToolResult(
133 |                     content=f"Error: {error}",
134 |                     display=f"❌ {error}"
135 |                 )
136 |             
137 |             # Resolve path
138 |             file_path = Path(path).resolve()
139 |             
140 |             # Step 2: Check if within workspace
141 |             if error := self._check_within_workspace(file_path):
142 |                 mainLogger.warning(f"File access outside workspace: {error}")
143 |                 return ToolResult(
144 |                     content=f"Error: {error}",
145 |                     display=f"❌ Access denied: outside workspace"
146 |                 )
147 |             
148 |             # Step 3: Check file existence
149 |             if not file_path.exists():
150 |                 error_msg = f"File not found: {path}"
151 |                 mainLogger.error(error_msg)
152 |                 return ToolResult(
153 |                     content=f"Error: {error_msg}",
154 |                     display=f"❌ File not found"
155 |                 )
156 |             
157 |             # Step 4: Check it's a file
158 |             if not file_path.is_file():
159 |                 error_msg = f"Path is not a file: {path}"
160 |                 mainLogger.error(error_msg)
161 |                 return ToolResult(
162 |                     content=f"Error: {error_msg}",
163 |                     display=f"❌ Not a file"
164 |                 )
165 |             
166 |             # Step 5: Check file size
167 |             has_pagination = start_line is not None or end_line is not None
168 |             if error := self._check_file_size(file_path, has_pagination):
169 |                 mainLogger.warning(f"File too large: {error}")
170 |                 return ToolResult(
171 |                     content=f"Error: {error}",
172 |                     display=f"❌ File too large (>256KB)"
173 |                 )
174 |             
175 |             # Step 6: Read file contents with encoding fallback
176 |             try:
177 |                 file_content, encoding = self._read_with_encoding_fallback(file_path)
178 |                 lines = file_content.splitlines(keepends=True)
179 |                 mainLogger.debug(f"Successfully read file with encoding: {encoding}")
180 |             except UnicodeDecodeError as e:
181 |                 error_msg = f"Cannot read file (encoding error): {path}"
182 |                 mainLogger.error(f"{error_msg}: {e}")
183 |                 return ToolResult(
184 |                     content=f"Error: {error_msg}",
185 |                     display=f"❌ Encoding error"
186 |                 )
187 |             
188 |             # Step 7: Handle line range
189 |             start_idx = (start_line - 1) if start_line else 0
190 |             
191 |             # Determine end index
192 |             if end_line is not None:
193 |                 end_idx = end_line
194 |             else:
195 |                 # Default: read up to DEFAULT_MAX_LINES from start
196 |                 end_idx = start_idx + DEFAULT_MAX_LINES
197 |             
198 |             # Cap at actual file length
199 |             end_idx = min(end_idx, len(lines))
200 |             
201 |             # Validate line numbers
202 |             if start_idx < 0 or start_idx >= len(lines):
203 |                 error_msg = f"Invalid start_line {start_line} (file has {len(lines)} lines)"
204 |                 return ToolResult(
205 |                     content=f"Error: {error_msg}",
206 |                     display=f"❌ {error_msg}"
207 |                 )
208 |             if end_idx < start_idx:
209 |                 error_msg = f"Invalid end_line {end_line} (must be >= start_line)"
210 |                 return ToolResult(
211 |                     content=f"Error: {error_msg}",
212 |                     display=f"❌ {error_msg}"
213 |                 )
214 |             
215 |             selected_lines = lines[start_idx:end_idx]
216 |             content = ''.join(selected_lines)
217 |             actual_start_line = start_line or 1
218 |             actual_end_line = actual_start_line + len(selected_lines) - 1
219 |             
220 |             # Check if file was truncated
221 |             was_truncated = end_idx < len(lines) and end_line is None
222 |             
223 |             # Step 8: Check token limit
224 |             if error := self._check_token_limit(content, MAX_TOKENS):
225 |                 mainLogger.warning(f"Token limit exceeded: {error}")
226 |                 return ToolResult(
227 |                     content=f"Error: {error}",
228 |                     display=f"❌ Content too large (>{MAX_TOKENS:,} tokens)"
229 |                 )
230 |             
231 |             # Step 9: Format content with line numbers
232 |             formatted_content = self._format_with_line_numbers(content, actual_start_line)
233 |             
234 |             # Add truncation warning if file was truncated
235 |             if was_truncated:
236 |                 truncation_note = (
237 |                     f"\n\n<system-reminder>"
238 |                     f"Note: File has {len(lines)} total lines, but only showing lines {actual_start_line}-{actual_end_line} "
239 |                     f"(default limit: {DEFAULT_MAX_LINES} lines). "
240 |                     f"Use start_line and end_line parameters to read other portions of the file."
241 |                     f"</system-reminder>"
242 |                 )
243 |                 formatted_content += truncation_note
244 |             
245 |             # Step 10: Prepare display message
246 |             num_lines = len(selected_lines)
247 |             line_range = f"lines {actual_start_line}-{actual_end_line}"
248 |             
249 |             if was_truncated:
250 |                 display_msg = f"✓ Read {line_range} ({num_lines}/{len(lines)} lines)"
251 |             else:
252 |                 display_msg = f"✓ Read {line_range} ({num_lines} lines)"
253 |             
254 |             mainLogger.info(f"Read {file_path} ({num_lines} lines, total: {len(lines)})")
255 |             
256 |             # Mark file as read (for edit tool validation)
257 |             if self._read_tracker:
258 |                 self._read_tracker.mark_as_read(str(file_path))
259 |             
260 |             return ToolResult(
261 |                 content=formatted_content,
262 |                 display=display_msg
263 |             )
264 |             
265 |         except PermissionError as e:
266 |             error_msg = f"Permission denied reading file: {path}"
267 |             mainLogger.error(f"{error_msg}: {e}")
268 |             return ToolResult(
269 |                 content=f"Error: {error_msg}",
270 |                 display=f"❌ Permission denied"
271 |             )
272 |         except Exception as e:
273 |             error_msg = f"Unexpected error reading file: {path}"
274 |             mainLogger.error(f"{error_msg}: {e}", exc_info=True)
275 |             return ToolResult(
276 |                 content=f"Error: {error_msg} - {str(e)}",
277 |                 display=f"❌ Error: {str(e)}"
278 |             )
279 | 
280 | 


--------------------------------------------------------------------------------
/codefuse/config.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Configuration Management
  3 | """
  4 | 
  5 | import os
  6 | import copy
  7 | from dataclasses import dataclass, fields
  8 | from pathlib import Path
  9 | from typing import Optional, List, Any
 10 | import yaml
 11 | 
 12 | from codefuse.observability import mainLogger
 13 | 
 14 | 
 15 | @dataclass
 16 | class LLMConfig:
 17 |     """LLM configuration"""
 18 |     provider: Optional[str] = None
 19 |     model: Optional[str] = None
 20 |     api_key: Optional[str] = None
 21 |     base_url: Optional[str] = None
 22 |     temperature: Optional[float] = None
 23 |     max_tokens: Optional[int] = None
 24 |     timeout: Optional[int] = None
 25 |     parallel_tool_calls: Optional[bool] = None
 26 |     enable_thinking: Optional[bool] = None
 27 |     top_k: Optional[int] = None
 28 |     top_p: Optional[float] = None
 29 | 
 30 | 
 31 | @dataclass
 32 | class AgentConfig:
 33 |     """Agent configuration"""
 34 |     max_iterations: Optional[int] = None
 35 |     max_context_tokens: Optional[int] = None
 36 |     enable_tools: Optional[bool] = None
 37 |     yolo: Optional[bool] = None
 38 |     agent: Optional[str] = None
 39 |     workspace_root: Optional[str] = None
 40 |     bash_timeout: Optional[int] = None
 41 |     bash_allowed_commands: Optional[list] = None
 42 |     bash_disallowed_commands: Optional[list] = None
 43 |     remote_tool_enabled: Optional[bool] = None
 44 |     remote_tool_url: Optional[str] = None
 45 |     remote_tool_instance_id: Optional[str] = None
 46 |     remote_tool_timeout: Optional[int] = None
 47 | 
 48 | 
 49 | @dataclass
 50 | class LoggingConfig:
 51 |     """Logging configuration"""
 52 |     logs_dir: Optional[str] = None
 53 |     verbose: Optional[bool] = None
 54 | 
 55 | 
 56 | # Default values (centralized)
 57 | DEFAULTS = {
 58 |     "llm": {
 59 |         "provider": "openai_compatible",
 60 |         "model": "",
 61 |         "api_key": "",
 62 |         "base_url": "",
 63 |         "temperature": 0.0,
 64 |         "max_tokens": None,
 65 |         "timeout": 60,
 66 |         "parallel_tool_calls": True,
 67 |         "enable_thinking": False,
 68 |         "top_k": None,
 69 |         "top_p": None,
 70 |     },
 71 |     "agent_config": {
 72 |         "max_iterations": 200,
 73 |         "max_context_tokens": 100000,
 74 |         "enable_tools": True,
 75 |         "yolo": False,
 76 |         "agent": "default",
 77 |         "workspace_root": ".",
 78 |         "bash_timeout": 30,
 79 |         "bash_allowed_commands": [],
 80 |         "bash_disallowed_commands": [],
 81 |         "remote_tool_enabled": False,
 82 |         "remote_tool_url": "",
 83 |         "remote_tool_instance_id": "",
 84 |         "remote_tool_timeout": 60,
 85 |     },
 86 |     "logging": {
 87 |         "logs_dir": "~/.cfuse/logs",
 88 |         "verbose": False,
 89 |     },
 90 | }
 91 | 
 92 | 
 93 | # Environment variable mapping (only core configs)
 94 | ENV_MAPPING = [
 95 |     ("api_key", "llm", str, ["OPENAI_API_KEY"]),
 96 |     ("base_url", "llm", str, ["LLM_BASE_URL"]),
 97 |     ("model", "llm", str, ["LLM_MODEL"]),
 98 |     ("logs_dir", "logging", str, ["LOGS_DIR"]),
 99 |     ("verbose", "logging", bool, ["VERBOSE"]),
100 | ]
101 | 
102 | 
103 | # Validation rules (section, field, check_function, error_message)
104 | VALIDATIONS = [
105 |     ('llm', 'temperature', lambda v: 0 <= v <= 2, "temperature must be 0-2"),
106 |     ('llm', 'top_p', lambda v: 0 <= v <= 1, "top_p must be 0-1"),
107 |     ('llm', 'top_k', lambda v: v > 0, "top_k must be positive"),
108 |     ('llm', 'timeout', lambda v: v > 0, "timeout must be positive"),
109 |     ('llm', 'max_tokens', lambda v: v > 0, "max_tokens must be positive"),
110 |     ('agent_config', 'max_iterations', lambda v: v > 0, "max_iterations must be positive"),
111 |     ('agent_config', 'bash_timeout', lambda v: v > 0, "bash_timeout must be positive"),
112 |     ('agent_config', 'remote_tool_timeout', lambda v: v > 0, "remote_tool_timeout must be positive"),
113 | ]
114 | 
115 | 
116 | def _get_env_value(env_vars: List[str], type_: type) -> Any:
117 |     """Get first available environment variable and convert to type"""
118 |     for env_var in env_vars:
119 |         value = os.getenv(env_var)
120 |         if value is not None:
121 |             try:
122 |                 if type_ == bool:
123 |                     return value.lower() in ('true', '1', 'yes')
124 |                 elif type_ == int:
125 |                     return int(value)
126 |                 elif type_ == float:
127 |                     return float(value)
128 |                 else:
129 |                     return value
130 |             except (ValueError, AttributeError) as e:
131 |                 mainLogger.warning(
132 |                     "Failed to convert environment variable",
133 |                     env_var=env_var,
134 |                     value=value,
135 |                     error=str(e)
136 |                 )
137 |     return None
138 | 
139 | 
140 | def _expand_env_vars(data: Any) -> Any:
141 |     """Recursively expand ${VAR} in strings"""
142 |     if isinstance(data, dict):
143 |         return {k: _expand_env_vars(v) for k, v in data.items()}
144 |     elif isinstance(data, list):
145 |         return [_expand_env_vars(item) for item in data]
146 |     elif isinstance(data, str):
147 |         import re
148 |         def replacer(match):
149 |             var_name = match.group(1) or match.group(2)
150 |             return os.getenv(var_name, match.group(0))
151 |         return re.sub(r'\$\{([^}]+)\}|\$(\w+)', replacer, data)
152 |     return data
153 | 
154 | 
155 | @dataclass
156 | class Config:
157 |     """Main configuration"""
158 |     llm: LLMConfig = None
159 |     agent_config: AgentConfig = None
160 |     logging: LoggingConfig = None
161 |     
162 |     def __post_init__(self):
163 |         """Initialize sub-configs if not provided"""
164 |         if self.llm is None:
165 |             self.llm = LLMConfig()
166 |         if self.agent_config is None:
167 |             self.agent_config = AgentConfig()
168 |         if self.logging is None:
169 |             self.logging = LoggingConfig()
170 |     
171 |     @classmethod
172 |     def from_defaults(cls) -> "Config":
173 |         """Create config from default values"""
174 |         return cls(
175 |             llm=LLMConfig(**DEFAULTS["llm"]),
176 |             agent_config=AgentConfig(**DEFAULTS["agent_config"]),
177 |             logging=LoggingConfig(**DEFAULTS["logging"]),
178 |         )
179 |     
180 |     @classmethod
181 |     def from_yaml(cls, path: str) -> Optional["Config"]:
182 |         """Load config from YAML file"""
183 |         file_path = Path(path).expanduser()
184 |         if not file_path.exists():
185 |             return None
186 |         
187 |         try:
188 |             with open(file_path, 'r', encoding='utf-8') as f:
189 |                 data = yaml.safe_load(f) or {}
190 |             
191 |             data = _expand_env_vars(data)
192 |             
193 |             llm_data = data.get('llm', {})
194 |             agent_data = data.get('agent_config', {}) or data.get('agent', {})  # Support both for backward compatibility
195 |             logging_data = data.get('logging', {})
196 |             
197 |             mainLogger.info("Loaded configuration from file", path=str(path))
198 |             return cls(
199 |                 llm=LLMConfig(**{k: v for k, v in llm_data.items() if k in {f.name for f in fields(LLMConfig)}}),
200 |                 agent_config=AgentConfig(**{k: v for k, v in agent_data.items() if k in {f.name for f in fields(AgentConfig)}}),
201 |                 logging=LoggingConfig(**{k: v for k, v in logging_data.items() if k in {f.name for f in fields(LoggingConfig)}}),
202 |             )
203 |         except Exception as e:
204 |             mainLogger.error("Failed to load config from file", path=str(path), error=str(e))
205 |             return None
206 |     
207 |     @classmethod
208 |     def from_env(cls) -> "Config":
209 |         """Load config from environment variables"""
210 |         cfg = cls()
211 |         
212 |         # Map 'agent' section in ENV_MAPPING to 'agent_config'
213 |         for field_name, section, type_, env_vars in ENV_MAPPING:
214 |             value = _get_env_value(env_vars, type_)
215 |             if value is not None:
216 |                 section_name = 'agent_config' if section == 'agent' else section
217 |                 section_obj = getattr(cfg, section_name)
218 |                 setattr(section_obj, field_name, value)
219 |         
220 |         return cfg
221 |     
222 |     @classmethod
223 |     def load(cls, config_path: Optional[str] = None) -> "Config":
224 |         """
225 |         Load configuration: defaults → file → env
226 |         Priority: defaults < file < env < cli (cli done via merge_with_cli_args)
227 |         """
228 |         # Start with defaults
229 |         cfg = cls.from_defaults()
230 |         
231 |         # Try to load from file
232 |         if config_path:
233 |             file_cfg = cls.from_yaml(config_path)
234 |         else:
235 |             # Try default locations
236 |             file_cfg = None
237 |             for path in [".cfuse.yaml", "~/.cfuse.yaml", "~/.config/cfuse/config.yaml"]:
238 |                 file_cfg = cls.from_yaml(path)
239 |                 if file_cfg:
240 |                     break
241 |         
242 |         if file_cfg:
243 |             cfg = cls._merge(cfg, file_cfg)
244 |         
245 |         # Merge environment variables
246 |         env_cfg = cls.from_env()
247 |         cfg = cls._merge(cfg, env_cfg)
248 |         
249 |         return cfg
250 |     
251 |     @staticmethod
252 |     def _merge(base: "Config", override: "Config") -> "Config":
253 |         """Merge configs: non-None values in override take precedence"""
254 |         result = copy.deepcopy(base)
255 |         
256 |         # Merge each section
257 |         for section_name in ['llm', 'agent_config', 'logging']:
258 |             base_section = getattr(result, section_name)
259 |             override_section = getattr(override, section_name)
260 |             
261 |             for field in fields(base_section):
262 |                 override_value = getattr(override_section, field.name)
263 |                 if override_value is not None:
264 |                     setattr(base_section, field.name, override_value)
265 |         
266 |         return result
267 |     
268 |     @classmethod
269 |     def merge_with_cli_args(cls, config: "Config", **cli_args) -> "Config":
270 |         """Merge CLI arguments (highest priority)"""
271 |         result = copy.deepcopy(config)
272 |         
273 |         # Special mapping: CLI 'think' → config 'enable_thinking'
274 |         if cli_args.get('think') is not None:
275 |             result.llm.enable_thinking = cli_args['think']
276 |         
277 |         # Auto-match all other CLI args to config fields by name
278 |         for key, value in cli_args.items():
279 |             if value is None or key == 'think':
280 |                 continue
281 |             
282 |             # Try to find matching field in each section
283 |             for section in [result.llm, result.agent_config, result.logging]:
284 |                 if hasattr(section, key):
285 |                     setattr(section, key, value)
286 |                     break
287 |         
288 |         return result
289 |     
290 |     def validate(self) -> List[str]:
291 |         """Validate configuration"""
292 |         errors = []
293 |         
294 |         # Check required LLM parameters
295 |         required_llm_fields = [
296 |             ('api_key', 'LLM API key'),
297 |             ('model', 'LLM model'),
298 |             ('base_url', 'LLM base URL'),
299 |         ]
300 |         
301 |         for field_name, display_name in required_llm_fields:
302 |             value = getattr(self.llm, field_name)
303 |             if value is None or (isinstance(value, str) and value.strip() == ""):
304 |                 errors.append(
305 |                     f"{display_name} is required. "
306 |                     f"Set it via --{field_name.replace('_', '-')} flag, "
307 |                     f"{field_name.upper().replace('MODEL', 'LLM_MODEL')} environment variable, "
308 |                     f"or config file."
309 |                 )
310 |         
311 |         # Check value range validations
312 |         for section_name, field, check, msg in VALIDATIONS:
313 |             value = getattr(getattr(self, section_name), field)
314 |             if value is not None and not check(value):
315 |                 errors.append(f"{msg}, got {value}")
316 |         
317 |         return errors
318 | 


--------------------------------------------------------------------------------
/codefuse/tools/builtin/glob.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Glob Tool - Fast file pattern matching tool
  3 | """
  4 | 
  5 | import glob as glob_lib
  6 | from pathlib import Path
  7 | from typing import Optional, List
  8 | from dataclasses import dataclass
  9 | 
 10 | from codefuse.tools.base import BaseTool, ToolDefinition, ToolParameter, ToolResult
 11 | from codefuse.tools.builtin.filesystem_base import FileSystemToolMixin
 12 | from codefuse.tools.builtin.list_directory import DEFAULT_IGNORE_PATTERNS
 13 | from codefuse.observability import mainLogger
 14 | 
 15 | 
 16 | # Result limit
 17 | MAX_FILES = 100
 18 | 
 19 | 
 20 | @dataclass
 21 | class GlobResult:
 22 |     """Result structure for glob operations"""
 23 |     files: List[str]  # Absolute file paths
 24 |     total_found: int  # Total files found before truncation
 25 |     truncated: bool  # Whether results were truncated
 26 | 
 27 | 
 28 | class GlobTool(FileSystemToolMixin, BaseTool):
 29 |     """
 30 |     Tool for finding files by glob pattern
 31 |     
 32 |     Features:
 33 |     - Fast file pattern matching
 34 |     - Supports standard glob patterns (*, ?, **, [...])
 35 |     - Returns matching file paths sorted by modification time (newest first)
 36 |     - Automatically ignores common build/cache directories
 37 |     - Result limit to prevent excessive output
 38 |     """
 39 |     
 40 |     def __init__(self, workspace_root: Optional[Path] = None):
 41 |         """
 42 |         Initialize GlobTool
 43 |         
 44 |         Args:
 45 |             workspace_root: Workspace root directory to restrict searches.
 46 |                           Defaults to current working directory.
 47 |         """
 48 |         super().__init__(workspace_root=workspace_root)
 49 |     
 50 |     @property
 51 |     def definition(self) -> ToolDefinition:
 52 |         """Define the glob tool"""
 53 |         return ToolDefinition(
 54 |             name="glob",
 55 |             description=(
 56 |                 "Find files by name patterns using glob syntax. Handles codebases of any size efficiently.\n\n"
 57 |                 "- Accepts standard glob patterns such as:\n"
 58 |                 "*.py - match files in current directory\n"
 59 |                 "**/*.js - search all subdirectories recursively\n"
 60 |                 "src/**/*.ts - limit search to specific path\n"
 61 |                 "test_*.py - match files with prefix\n\n"
 62 |                 "- Notes:\n"
 63 |                 "Limits results to 100 file paths.\n"
 64 |                 "Supports parallel calls with different patterns(Recommended to use this tool in a batch of patterns to find files that are potentially useful)."
 65 |             ).strip(),
 66 |             parameters=[
 67 |                 ToolParameter(
 68 |                     name="pattern",
 69 |                     type="string",
 70 |                     description="The glob pattern to match files against",
 71 |                     required=True,
 72 |                 ),
 73 |                 ToolParameter(
 74 |                     name="path",
 75 |                     type="string",
 76 |                     description=(
 77 |                         "The directory to search in. If not specified, the workspace root will be used. "
 78 |                         "IMPORTANT: Omit this field to use the default directory. Must be an absolute path if provided."
 79 |                     ),
 80 |                     required=False,
 81 |                 ),
 82 |             ],
 83 |             requires_confirmation=False,  # Searching is safe
 84 |         )
 85 |     
 86 |     def _should_ignore(self, file_path: Path) -> bool:
 87 |         """
 88 |         Check if a file should be ignored based on default patterns
 89 |         
 90 |         Args:
 91 |             file_path: Path to check
 92 |             
 93 |         Returns:
 94 |             True if file should be ignored
 95 |         """
 96 |         # Check against all default ignore patterns
 97 |         for pattern in DEFAULT_IGNORE_PATTERNS:
 98 |             # Check if pattern matches any part of the path
 99 |             for part in file_path.parts:
100 |                 if glob_lib.fnmatch.fnmatch(part, pattern):
101 |                     return True
102 |             
103 |             # Also check the full path string
104 |             if glob_lib.fnmatch.fnmatch(str(file_path), pattern):
105 |                 return True
106 |         
107 |         return False
108 |     
109 |     def _execute_glob(self, pattern: str, search_path: Path) -> List[Path]:
110 |         """
111 |         Execute glob search with pattern
112 |         
113 |         Args:
114 |             pattern: Glob pattern to match
115 |             search_path: Directory to search in
116 |             
117 |         Returns:
118 |             List of matching file paths
119 |         """
120 |         # Determine if pattern contains recursive wildcard
121 |         has_recursive = '**' in pattern
122 |         
123 |         # Build the full pattern path
124 |         full_pattern = str(search_path / pattern)
125 |         
126 |         # Execute glob
127 |         matches = glob_lib.glob(full_pattern, recursive=has_recursive)
128 |         
129 |         # Convert to Path objects and filter
130 |         result_paths: List[Path] = []
131 |         for match in matches:
132 |             path = Path(match).resolve()
133 |             
134 |             # Only include files (not directories)
135 |             if not path.is_file():
136 |                 continue
137 |             
138 |             # Skip ignored paths
139 |             if self._should_ignore(path):
140 |                 continue
141 |             
142 |             result_paths.append(path)
143 |         
144 |         return result_paths
145 |     
146 |     def _sort_by_mtime(self, file_paths: List[Path]) -> List[Path]:
147 |         """
148 |         Sort files by modification time (newest first), then by filename
149 |         
150 |         Args:
151 |             file_paths: List of file paths
152 |             
153 |         Returns:
154 |             Sorted list of file paths
155 |         """
156 |         if not file_paths:
157 |             return file_paths
158 |         
159 |         try:
160 |             # Get file stats for all files
161 |             file_stats = []
162 |             for file_path in file_paths:
163 |                 try:
164 |                     mtime = file_path.stat().st_mtime if file_path.exists() else 0
165 |                     file_stats.append((file_path, mtime))
166 |                 except Exception:
167 |                     # If we can't stat the file, use mtime = 0
168 |                     file_stats.append((file_path, 0))
169 |             
170 |             # Sort by modification time (newest first), then by filename (alphabetical)
171 |             file_stats.sort(key=lambda x: (-x[1], str(x[0])))
172 |             
173 |             return [fp for fp, _ in file_stats]
174 |         except Exception as e:
175 |             mainLogger.warning(f"Error sorting files by mtime: {e}")
176 |             return file_paths
177 |     
178 |     def _apply_limit(self, file_paths: List[Path], limit: int = MAX_FILES) -> GlobResult:
179 |         """
180 |         Apply result limit
181 |         
182 |         Args:
183 |             file_paths: List of file paths
184 |             limit: Maximum number of files to return
185 |             
186 |         Returns:
187 |             GlobResult with limited files and truncation info
188 |         """
189 |         total_found = len(file_paths)
190 |         truncated = total_found > limit
191 |         limited_files = file_paths[:limit] if truncated else file_paths
192 |         
193 |         # Convert to absolute path strings
194 |         absolute_paths = [str(fp.resolve()) for fp in limited_files]
195 |         
196 |         return GlobResult(
197 |             files=absolute_paths,
198 |             total_found=total_found,
199 |             truncated=truncated,
200 |         )
201 |     
202 |     def execute(
203 |         self,
204 |         pattern: str,
205 |         path: Optional[str] = None,
206 |         **kwargs
207 |     ) -> ToolResult:
208 |         """
209 |         Execute the glob tool
210 |         
211 |         Args:
212 |             pattern: Glob pattern to match files against
213 |             path: Optional directory to search in (defaults to workspace_root)
214 |             
215 |         Returns:
216 |             ToolResult with:
217 |                 - content: File paths (one per line) for LLM
218 |                 - display: Summary message for user
219 |         """
220 |         try:
221 |             # Step 1: Validate pattern
222 |             if not pattern or not isinstance(pattern, str):
223 |                 return self._create_error_result(
224 |                     "Pattern is required and must be a non-empty string",
225 |                     "Invalid pattern"
226 |                 )
227 |             
228 |             # Step 2: Resolve search path
229 |             if path:
230 |                 # Check if path is absolute (required by our convention)
231 |                 if not Path(path).is_absolute():
232 |                     return self._create_error_result(
233 |                         f"Path must be absolute, but got relative path: {path}",
234 |                         "Path must be absolute"
235 |                     )
236 |                 
237 |                 search_path = self._resolve_path(path)
238 |                 
239 |                 # Check if within workspace
240 |                 if error := self._check_within_workspace(search_path):
241 |                     mainLogger.warning(f"Glob search outside workspace: {error}")
242 |                     return self._create_error_result(error, "Access denied: outside workspace")
243 |             else:
244 |                 search_path = self._workspace_root
245 |             
246 |             # Check if path exists
247 |             if not search_path.exists():
248 |                 return self._create_error_result(
249 |                     f"Path not found: {search_path}",
250 |                     "Path not found"
251 |                 )
252 |             
253 |             # Check if it's a directory
254 |             if not search_path.is_dir():
255 |                 return self._create_error_result(
256 |                     f"Path is not a directory: {search_path}",
257 |                     "Not a directory"
258 |                 )
259 |             
260 |             # Step 3: Execute glob search
261 |             mainLogger.info(f"Executing glob search: pattern='{pattern}', path={search_path}")
262 |             matched_files = self._execute_glob(pattern, search_path)
263 |             
264 |             # Step 4: Sort by modification time
265 |             sorted_files = self._sort_by_mtime(matched_files)
266 |             
267 |             # Step 5: Apply limit
268 |             result = self._apply_limit(sorted_files, MAX_FILES)
269 |             
270 |             # Step 6: Format output
271 |             if result.total_found == 0:
272 |                 content = "No files found"
273 |                 display = "No files found"
274 |             else:
275 |                 # Join file paths with newlines
276 |                 content = '\n'.join(result.files)
277 |                 
278 |                 # Add truncation message if needed
279 |                 if result.truncated:
280 |                     content += (
281 |                         f"\n\n(Results are truncated. Found {result.total_found} files, "
282 |                         f"showing first {len(result.files)}. "
283 |                         f"Consider using a more specific path or pattern.)"
284 |                     )
285 |                 
286 |                 # Display message
287 |                 num_files = len(result.files)
288 |                 if result.truncated:
289 |                     display = f"✓ Found {num_files} files (truncated from {result.total_found})"
290 |                 else:
291 |                     display = f"✓ Found {num_files} file{'s' if num_files != 1 else ''}"
292 |             
293 |             mainLogger.info(
294 |                 f"Glob search complete: pattern='{pattern}', "
295 |                 f"found={result.total_found}, returned={len(result.files)}, "
296 |                 f"truncated={result.truncated}"
297 |             )
298 |             
299 |             return ToolResult(content=content, display=display)
300 |             
301 |         except Exception as e:
302 |             error_msg = f"Unexpected error during glob search: {str(e)}"
303 |             mainLogger.error(error_msg, exc_info=True)
304 |             return self._create_error_result(error_msg, f"Error: {str(e)}")
305 | 
306 | 


--------------------------------------------------------------------------------
/codefuse/cli/interactive.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Interactive Mode - REPL for continuous conversation
  3 | """
  4 | 
  5 | import json
  6 | from datetime import datetime
  7 | from typing import Dict, Any, List
  8 | from rich.console import Console
  9 | from rich.panel import Panel
 10 | from rich.markdown import Markdown
 11 | from prompt_toolkit import PromptSession
 12 | from prompt_toolkit.history import InMemoryHistory
 13 | 
 14 | from codefuse.llm.base import Message, MessageRole
 15 | from codefuse.observability import mainLogger, get_session_dir, close_all_loggers
 16 | 
 17 | console = Console()
 18 | 
 19 | 
 20 | def run_interactive(
 21 |     components: Dict[str, Any],
 22 |     stream: bool = True,
 23 | ):
 24 |     """
 25 |     Run agent in interactive mode (REPL)
 26 |     
 27 |     Args:
 28 |         components: Dictionary of initialized components from initialize_agent_components()
 29 |         stream: Whether to stream LLM responses
 30 |         save_session: Whether to save session information
 31 |     """
 32 |     # Unpack components
 33 |     agent_profile = components["agent_profile"]
 34 |     env_info = components["env_info"]
 35 |     agent_loop = components["agent_loop"]
 36 |     available_tools = components["available_tools"]
 37 |     session_dir = components["session_dir"]
 38 |     config = components["config"]
 39 |     model_name = components["model_name"]
 40 |     context_engine = components["context_engine"]
 41 |     metrics_collector = components["metrics_collector"]
 42 |     resumed_conversation = components["resumed_conversation"]
 43 |     
 44 |     # Display welcome message
 45 |     console.print()
 46 |     
 47 |     # Build session info
 48 |     session_info = f"Session ID: {context_engine.session_id}"
 49 |     if resumed_conversation:
 50 |         session_info += f"\n[cyan]Resumed with {len(resumed_conversation)} messages[/cyan]"
 51 |     
 52 |     console.print(Panel(
 53 |         f"[bold blue]CodeFuse Interactive Mode[/bold blue]\n\n"
 54 |         f"Agent: {agent_profile.name}\n"
 55 |         f"Model: {model_name}\n"
 56 |         f"{session_info}\n\n"
 57 |         f"[dim]Type your message and press Enter to send.[/dim]\n"
 58 |         f"[dim]Special commands:[/dim]\n"
 59 |         f"  /exit, /quit - Exit the session\n"
 60 |         f"  /help - Show help information\n"
 61 |         f"  /clear - Clear conversation history\n"
 62 |         f"  /status - Show session status",
 63 |         border_style="blue"
 64 |     ))
 65 |     console.print()
 66 |     
 67 |     if config.agent_config.yolo:
 68 |         console.print("[yellow]⚡ YOLO mode enabled - auto-confirming all tools[/yellow]\n")
 69 |     
 70 |     # Initialize prompt session with history
 71 |     session = PromptSession(history=InMemoryHistory())
 72 |     
 73 |     # Conversation history (for context across multiple turns)
 74 |     # If resuming a session, start with the loaded history
 75 |     conversation_history: List[Message] = resumed_conversation if resumed_conversation else []
 76 |     
 77 |     mainLogger.info("Interactive mode started", session_id=context_engine.session_id)
 78 |     
 79 |     # REPL loop
 80 |     while True:
 81 |         try:
 82 |             # Get user input
 83 |             user_input = session.prompt("You: ").strip()
 84 |             
 85 |             if not user_input:
 86 |                 continue
 87 |             
 88 |             # Handle special commands
 89 |             if user_input.startswith("/"):
 90 |                 if user_input in ["/exit", "/quit"]:
 91 |                     console.print("\n[yellow]Exiting interactive mode...[/yellow]")
 92 |                     break
 93 |                 
 94 |                 elif user_input == "/help":
 95 |                     _show_help()
 96 |                     continue
 97 |                 
 98 |                 elif user_input == "/clear":
 99 |                     conversation_history.clear()
100 |                     # Note: This only clears local conversation history
101 |                     # ContextEngine messages are not cleared (would need session restart)
102 |                     console.print("[green]✓ Local conversation history cleared[/green]")
103 |                     console.print("[dim]Note: Full reset requires restarting the session[/dim]\n")
104 |                     mainLogger.info("Conversation history cleared", session_id=context_engine.session_id)
105 |                     continue
106 |                 
107 |                 elif user_input == "/status":
108 |                     _show_status(components, conversation_history)
109 |                     continue
110 |                 
111 |                 else:
112 |                     console.print(f"[red]Unknown command:[/red] {user_input}")
113 |                     console.print("[dim]Type /help for available commands[/dim]\n")
114 |                     continue
115 |             
116 |             # User message will be logged by agent_loop automatically
117 |             
118 |             # Display thinking indicator
119 |             console.print("\n[dim]Assistant:[/dim] ", end="")
120 |             
121 |             # Run agent loop
122 |             final_response = ""
123 |             current_content = ""
124 |             current_tool_calls = []
125 |             iterations = 1
126 |             
127 |             for event in agent_loop.run(
128 |                 user_query=user_input,
129 |                 stream=stream,
130 |             ):
131 |                 if event.type == "llm_start":
132 |                     iteration = event.data.get("iteration", 0)
133 |                     if iteration > 1:
134 |                         console.print(f"\n[dim]→ Iteration {iteration}[/dim]")
135 |                 
136 |                 elif event.type == "llm_chunk":
137 |                     delta = event.data["delta"]
138 |                     console.print(delta, end="")
139 |                     current_content += delta
140 |                 
141 |                 elif event.type == "llm_done":
142 |                     if not stream:
143 |                         content = event.data["content"]
144 |                         if content:
145 |                             console.print(content)
146 |                             current_content = content
147 |                     else:
148 |                         console.print()
149 |                     
150 |                     if "tool_calls" in event.data and event.data["tool_calls"]:
151 |                         current_tool_calls = event.data["tool_calls"]
152 |                 
153 |                 elif event.type == "tool_start":
154 |                     tool_name = event.data["tool_name"]
155 |                     arguments = event.data.get("arguments", {})
156 |                     args_str = _format_tool_arguments(arguments)
157 |                     console.print(f"\n[cyan]🔧 Executing tool:[/cyan] {tool_name}{args_str}")
158 |                 
159 |                 elif event.type == "tool_done":
160 |                     tool_name = event.data["tool_name"]
161 |                     tool_call_id = event.data.get("tool_call_id")
162 |                     arguments = event.data.get("arguments", {})
163 |                     display = event.data.get("display", event.data.get("result", ""))
164 |                     confirmed = event.data.get("confirmed", True)
165 |                     
166 |                     if not confirmed:
167 |                         console.print(f"[yellow]⚠️  Tool rejected:[/yellow] {tool_name}")
168 |                     else:
169 |                         # Use display field (user-friendly) instead of result (LLM content)
170 |                         console.print(f"[cyan]{display}[/cyan]")
171 |                     
172 |                     # Tool results are logged by tool_executor automatically
173 |                 
174 |                 elif event.type == "agent_done":
175 |                     final_response = event.data["final_response"]
176 |                     iterations = event.data["iterations"]
177 |                     
178 |                     # Save assistant message to trajectory
179 |                     assistant_message = {
180 |                         "role": "assistant",
181 |                         "content": final_response or current_content,
182 |                         "timestamp": datetime.now().isoformat(),
183 |                     }
184 |                     if current_tool_calls:
185 |                         assistant_message["tool_calls"] = current_tool_calls
186 |                     # Assistant messages are logged by agent_loop automatically
187 |                     
188 |                     # Update conversation history for next turn
189 |                     conversation_history.append(Message(
190 |                         role=MessageRole.USER,
191 |                         content=user_input,
192 |                     ))
193 |                     conversation_history.append(Message(
194 |                         role=MessageRole.ASSISTANT,
195 |                         content=final_response or current_content,
196 |                     ))
197 |                     
198 |                     console.print()
199 |                 
200 |                 elif event.type == "error":
201 |                     error = event.data["error"]
202 |                     console.print(f"\n[red]Error:[/red] {error}")
203 |             
204 |             # Reset for next turn
205 |             current_content = ""
206 |             current_tool_calls = []
207 |         
208 |         except KeyboardInterrupt:
209 |             console.print("\n\n[yellow]Use /exit or /quit to exit[/yellow]\n")
210 |             continue
211 |         
212 |         except Exception as e:
213 |             console.print(f"\n[red]Error:[/red] {str(e)}\n")
214 |             mainLogger.error("Interactive loop error", error=str(e), exc_info=True)
215 |             continue
216 |     
217 |     # Generate and save metrics summary
218 |     summary = metrics_collector.generate_summary()
219 |     
220 |     # Write summary to trajectory
221 |     context_engine.write_session_summary(summary)
222 |     
223 |     mainLogger.info("Interactive mode completed", status="success")
224 |     
225 |     # Display session summary
226 |     console.print()
227 |     console.print(Panel(
228 |         f"[bold]Session Summary[/bold]\n\n"
229 |         f"[green]Total Prompts:[/green] {summary['session']['total_prompts']}\n"
230 |         f"[green]Total Iterations:[/green] {summary['prompts']['total_iterations']}\n"
231 |         f"[green]API Calls:[/green] {summary['api_calls']['total']} "
232 |         f"({summary['api_calls']['success_rate']}% success)\n"
233 |         f"[green]Total Tokens:[/green] {summary['api_calls']['tokens']['total']:,}\n"
234 |         f"  • Prompt: {summary['api_calls']['tokens']['prompt']:,}\n"
235 |         f"  • Completion: {summary['api_calls']['tokens']['completion']:,}\n"
236 |         f"  • Cache Read: {summary['api_calls']['tokens']['cache_read']:,}\n"
237 |         f"[green]Tool Calls:[/green] {summary['tool_calls']['total']} "
238 |         f"({summary['tool_calls']['success_rate']}% success)\n"
239 |         f"[green]Session Duration:[/green] {summary['session']['duration']:.2f}s",
240 |         title="[bold]Performance Metrics[/bold]",
241 |         border_style="cyan"
242 |     ))
243 |     
244 |     # Display session info
245 |     console.print(f"\n[dim]Session logs:[/dim] {get_session_dir()}")
246 |     
247 |     # Close all loggers
248 |     close_all_loggers()
249 | 
250 | 
251 | def _format_tool_arguments(arguments: Dict[str, Any], max_length: int = 100) -> str:
252 |     """
253 |     Format tool arguments for display, with truncation if too long
254 |     
255 |     Args:
256 |         arguments: Tool arguments dictionary
257 |         max_length: Maximum length before truncation
258 |         
259 |     Returns:
260 |         Formatted string representation of arguments
261 |     """
262 |     if not arguments:
263 |         return ""
264 |     
265 |     # Convert arguments to JSON string
266 |     args_json = json.dumps(arguments, ensure_ascii=False)
267 |     
268 |     # If short enough, return as-is
269 |     if len(args_json) <= max_length:
270 |         return f" [dim]{args_json}[/dim]"
271 |     
272 |     # Truncate and add ellipsis
273 |     truncated = args_json[:max_length] + "..."
274 |     return f" [dim]{truncated}[/dim]"
275 | 
276 | 
277 | def _show_help():
278 |     """Show help information"""
279 |     console.print()
280 |     console.print(Panel(
281 |         "[bold]Interactive Mode Commands[/bold]\n\n"
282 |         "/exit, /quit - Exit interactive mode\n"
283 |         "/help - Show this help message\n"
284 |         "/clear - Clear conversation history\n"
285 |         "/status - Show current session status\n\n"
286 |         "[dim]Just type your message to chat with the assistant.[/dim]",
287 |         border_style="blue",
288 |         title="Help"
289 |     ))
290 |     console.print()
291 | 
292 | 
293 | def _show_status(components: Dict[str, Any], conversation_history: List[Message]):
294 |     """Show current session status"""
295 |     agent_profile = components["agent_profile"]
296 |     model_name = components["model_name"]
297 |     context_engine = components["context_engine"]
298 |     config = components["config"]
299 |     
300 |     console.print()
301 |     console.print(Panel(
302 |         f"[bold]Session Status[/bold]\n\n"
303 |         f"Session ID: {context_engine.session_id}\n"
304 |         f"Agent: {agent_profile.name}\n"
305 |         f"Model: {model_name}\n"
306 |         f"Conversation Turns: {len(conversation_history) // 2}\n"
307 |         f"Max Iterations: {config.agent_config.max_iterations}\n"
308 |         f"YOLO Mode: {'Enabled' if config.agent_config.yolo else 'Disabled'}\n"
309 |         f"Logs: {get_session_dir()}",
310 |         border_style="blue",
311 |         title="Status"
312 |     ))
313 |     console.print()
314 | 
315 | 


--------------------------------------------------------------------------------
/codefuse/cli/main.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Main CLI Entry Point - Unified command-line interface
  3 | """
  4 | 
  5 | import sys
  6 | import json
  7 | import click
  8 | from rich.console import Console
  9 | 
 10 | from codefuse.config import Config
 11 | from codefuse.core import AgentProfileManager, AgentProfile
 12 | from codefuse.cli.common import initialize_agent_components, handle_list_agents
 13 | from codefuse.cli.headless import run_headless
 14 | from codefuse.cli.interactive import run_interactive
 15 | 
 16 | console = Console()
 17 | 
 18 | 
 19 | @click.command()
 20 | @click.option(
 21 |     "-p", "--prompt",
 22 |     help="User prompt/query (if provided, runs in headless mode)"
 23 | )
 24 | @click.option(
 25 |     "-pp", "--prompt-file",
 26 |     type=click.Path(exists=True),
 27 |     help="Read prompt from file (mutually exclusive with -p)"
 28 | )
 29 | @click.option(
 30 |     "--agent",
 31 |     default="default",
 32 |     help="Agent profile to use (default: default)"
 33 | )
 34 | @click.option(
 35 |     "--agent-file",
 36 |     type=click.Path(exists=True),
 37 |     help="Load agent profile from Markdown file (overrides --agent)"
 38 | )
 39 | @click.option(
 40 |     "--provider",
 41 |     help="LLM provider (openai_compatible, anthropic, gemini)"
 42 | )
 43 | @click.option(
 44 |     "--model",
 45 |     help="Override model name"
 46 | )
 47 | @click.option(
 48 |     "--api-key",
 49 |     help="API key (or use environment variable)"
 50 | )
 51 | @click.option(
 52 |     "--base-url",
 53 |     help="Base URL for API endpoint"
 54 | )
 55 | @click.option(
 56 |     "-v", "--verbose",
 57 |     is_flag=True,
 58 |     help="Enable verbose logging"
 59 | )
 60 | @click.option(
 61 |     "--logs-dir",
 62 |     help="Base directory for logs (default: ~/.cfuse/logs)"
 63 | )
 64 | @click.option(
 65 |     "--max-iterations",
 66 |     type=int,
 67 |     help="Maximum agent iterations (default: 200)"
 68 | )
 69 | @click.option(
 70 |     "--stream/--no-stream",
 71 |     default=True,
 72 |     help="Enable/disable streaming output (default: enabled)"
 73 | )
 74 | @click.option(
 75 |     "--yolo",
 76 |     is_flag=True,
 77 |     help="YOLO mode: auto-confirm all tool executions"
 78 | )
 79 | @click.option(
 80 |     "--list-agents",
 81 |     is_flag=True,
 82 |     help="List available agent profiles and exit"
 83 | )
 84 | @click.option(
 85 |     "--config",
 86 |     type=click.Path(exists=True),
 87 |     help="Path to configuration file"
 88 | )
 89 | @click.option(
 90 |     "--save-session",
 91 |     is_flag=True,
 92 |     help="Save session trajectory to file"
 93 | )
 94 | @click.option(
 95 |     "--temperature",
 96 |     type=float,
 97 |     help="Model temperature (0.0-2.0, default: 0.0)"
 98 | )
 99 | @click.option(
100 |     "--top-p",
101 |     type=float,
102 |     help="Nucleus sampling parameter (0.0-1.0)"
103 | )
104 | @click.option(
105 |     "--top-k",
106 |     type=int,
107 |     help="Top-k sampling parameter"
108 | )
109 | @click.option(
110 |     "--parallel-tool-calls/--no-parallel-tool-calls",
111 |     default=None,
112 |     help="Enable/disable parallel tool calls (default: enabled)"
113 | )
114 | @click.option(
115 |     "--think",
116 |     is_flag=True,
117 |     help="Enable thinking mode for models that support it"
118 | )
119 | @click.option(
120 |     "--session-id",
121 |     help="Custom session ID (auto-generated if not provided)"
122 | )
123 | @click.option(
124 |     "--http",
125 |     is_flag=True,
126 |     help="Enable HTTP server mode for external tool execution"
127 | )
128 | @click.option(
129 |     "--port",
130 |     type=int,
131 |     default=8080,
132 |     help="Port for HTTP server mode (default: 8080)"
133 | )
134 | @click.option(
135 |     "--host",
136 |     default="0.0.0.0",
137 |     help="Host address for HTTP server mode (default: 0.0.0.0, use 127.0.0.1 for localhost only)"
138 | )
139 | @click.option(
140 |     "--remote-tool-enabled",
141 |     is_flag=True,
142 |     help="Enable remote tool execution via HTTP"
143 | )
144 | @click.option(
145 |     "--remote-tool-url",
146 |     help="URL of the remote tool service"
147 | )
148 | @click.option(
149 |     "--remote-tool-instance-id",
150 |     help="Instance ID for remote tool execution"
151 | )
152 | @click.option(
153 |     "--remote-tool-timeout",
154 |     type=int,
155 |     help="Timeout for remote tool calls in seconds (default: 60)"
156 | )
157 | @click.option(
158 |     "--image-url",
159 |     multiple=True,
160 |     help="Image URL (can be specified multiple times, supports HTTP/HTTPS or base64 data URI)"
161 | )
162 | @click.option(
163 |     "--image-url-file",
164 |     type=click.Path(exists=True),
165 |     help="Read image URLs from JSON file (should contain a list of URLs)"
166 | )
167 | def main(
168 |     prompt: str,
169 |     prompt_file: str,
170 |     agent: str,
171 |     agent_file: str,
172 |     provider: str,
173 |     model: str,
174 |     api_key: str,
175 |     base_url: str,
176 |     verbose: bool,
177 |     logs_dir: str,
178 |     max_iterations: int,
179 |     stream: bool,
180 |     yolo: bool,
181 |     list_agents: bool,
182 |     config: str,
183 |     save_session: bool,
184 |     temperature: float,
185 |     top_p: float,
186 |     top_k: int,
187 |     parallel_tool_calls: bool,
188 |     think: bool,
189 |     session_id: str,
190 |     http: bool,
191 |     port: int,
192 |     host: str,
193 |     remote_tool_enabled: bool,
194 |     remote_tool_url: str,
195 |     remote_tool_instance_id: str,
196 |     remote_tool_timeout: int,
197 |     image_url: tuple,
198 |     image_url_file: str,
199 | ):
200 |     """
201 |     CodeFuse Agent - AI-powered coding assistant
202 |     
203 |     Run in headless mode with -p/--prompt or -pp/--prompt-file, interactive mode, or HTTP server mode.
204 |     
205 |     \b
206 |     Examples:
207 |         # Headless mode
208 |         cfuse -p "Read README.md and summarize it"
209 |         
210 |         # Read prompt from file
211 |         cfuse -pp prompt.txt
212 |         
213 |         # Interactive mode
214 |         cfuse
215 |         
216 |         # HTTP server mode (listen on all interfaces)
217 |         cfuse --http --port 8080
218 |         
219 |         # HTTP server mode (localhost only)
220 |         cfuse --http --port 8080 --host 127.0.0.1
221 |         
222 |         # Resume an existing session (loads conversation history)
223 |         cfuse --session-id session_20241029_123456_abc123def
224 |         
225 |         # YOLO mode (auto-confirm all tools)
226 |         cfuse -p "Create a hello.py file" --yolo
227 |         
228 |         # Use specific agent
229 |         cfuse -p "Debug this error" --agent debugger
230 |         
231 |         # Load agent from file
232 |         cfuse -p "Help me with this task" --agent-file ./my_agent.md
233 |         
234 |         # List available agents
235 |         cfuse --list-agents
236 |     """
237 |     
238 |     try:
239 |         # Check for mutually exclusive parameters
240 |         if prompt and prompt_file:
241 |             console.print("[red]Error:[/red] Cannot use both -p/--prompt and -pp/--prompt-file at the same time")
242 |             sys.exit(1)
243 |         
244 |         # Validate image_url usage
245 |         if (image_url or image_url_file) and not (prompt or prompt_file):
246 |             console.print("[red]Error:[/red] --image-url/--image-url-file requires -p/--prompt or -pp/--prompt-file")
247 |             sys.exit(1)
248 |         
249 |         # If prompt-file is provided, read the file content
250 |         if prompt_file:
251 |             try:
252 |                 with open(prompt_file, 'r', encoding='utf-8') as f:
253 |                     prompt = f.read().strip()
254 |                 if not prompt:
255 |                     console.print(f"[red]Error:[/red] Prompt file '{prompt_file}' is empty")
256 |                     sys.exit(1)
257 |             except Exception as e:
258 |                 console.print(f"[red]Error:[/red] Failed to read prompt file '{prompt_file}': {e}")
259 |                 sys.exit(1)
260 |         
261 |         # If image-url-file is provided, read and parse the JSON file
262 |         image_urls_from_file = []
263 |         if image_url_file:
264 |             try:
265 |                 with open(image_url_file, 'r', encoding='utf-8') as f:
266 |                     image_urls_from_file = json.load(f)
267 |                 
268 |                 # Validate that it's a list
269 |                 if not isinstance(image_urls_from_file, list):
270 |                     console.print(f"[red]Error:[/red] Image URL file '{image_url_file}' must contain a JSON list")
271 |                     sys.exit(1)
272 |                 
273 |                 # Validate that all elements are strings
274 |                 if not all(isinstance(url, str) for url in image_urls_from_file):
275 |                     console.print(f"[red]Error:[/red] All elements in image URL file must be strings")
276 |                     sys.exit(1)
277 |                 
278 |                 if not image_urls_from_file:
279 |                     console.print(f"[yellow]Warning:[/yellow] Image URL file '{image_url_file}' is empty")
280 |                 
281 |             except json.JSONDecodeError as e:
282 |                 console.print(f"[red]Error:[/red] Failed to parse JSON from '{image_url_file}': {e}")
283 |                 sys.exit(1)
284 |             except Exception as e:
285 |                 console.print(f"[red]Error:[/red] Failed to read image URL file '{image_url_file}': {e}")
286 |                 sys.exit(1)
287 |         
288 |         # Merge image URLs from both sources (CLI args and file)
289 |         all_image_urls = list(image_url) + image_urls_from_file
290 |         
291 |         # Handle HTTP server mode
292 |         if http:
293 |             from codefuse.cli.http_server import run_http_server
294 |             
295 |             # HTTP mode doesn't require API key for LLM
296 |             # Only needs minimal config
297 |             cfg = Config.load(config)
298 |             
299 |             # Merge CLI args (None filtering is handled by Config.merge_with_cli_args)
300 |             cli_args = {
301 |                 "verbose": verbose,
302 |                 "logs_dir": logs_dir,
303 |             }
304 |             
305 |             cfg = Config.merge_with_cli_args(cfg, **cli_args)
306 |             
307 |             # Start HTTP server
308 |             run_http_server(cfg, host, port)
309 |             return
310 |         
311 |         # Handle --list-agents early (no need to initialize components)
312 |         if list_agents:
313 |             agent_manager = AgentProfileManager()
314 |             handle_list_agents(agent_manager)
315 |             return
316 |         
317 |         # Load configuration
318 |         cfg = Config.load(config)
319 |         
320 |         # Merge CLI arguments (None filtering is handled by Config.merge_with_cli_args)
321 |         cli_args = {
322 |             "provider": provider,
323 |             "model": model,
324 |             "api_key": api_key,
325 |             "base_url": base_url,
326 |             "temperature": temperature,
327 |             "top_p": top_p,
328 |             "top_k": top_k,
329 |             "parallel_tool_calls": parallel_tool_calls,
330 |             "enable_thinking": think,
331 |             "max_iterations": max_iterations,
332 |             "yolo": yolo,
333 |             "agent": agent,
334 |             "verbose": verbose,
335 |             "logs_dir": logs_dir,
336 |             "remote_tool_enabled": remote_tool_enabled,
337 |             "remote_tool_url": remote_tool_url,
338 |             "remote_tool_instance_id": remote_tool_instance_id,
339 |             "remote_tool_timeout": remote_tool_timeout,
340 |         }
341 |         
342 |         cfg = Config.merge_with_cli_args(cfg, **cli_args)
343 |         
344 |         # Validate configuration
345 |         validation_errors = cfg.validate()
346 |         if validation_errors:
347 |             console.print("[red]Configuration Errors:[/red]")
348 |             for error in validation_errors:
349 |                 console.print(f"  - {error}")
350 |             sys.exit(1)
351 |         
352 |         # Handle --agent-file: load agent profile from file
353 |         loaded_agent_profile = None
354 |         if agent_file:
355 |             try:
356 |                 console.print(f"[cyan]Loading agent from file:[/cyan] {agent_file}")
357 |                 loaded_agent_profile = AgentProfile.from_markdown(agent_file)
358 |                 console.print(f"[green]✓ Agent loaded:[/green] {loaded_agent_profile.name}")
359 |             except Exception as e:
360 |                 console.print(f"[red]Error:[/red] Failed to load agent from file '{agent_file}'")
361 |                 console.print(f"[red]Reason:[/red] {str(e)}")
362 |                 if verbose:
363 |                     import traceback
364 |                     console.print(traceback.format_exc())
365 |                 sys.exit(1)
366 |         
367 |         # Initialize all components once (shared by both modes)
368 |         components = initialize_agent_components(
369 |             cfg=cfg,
370 |             agent_name=agent,
371 |             agent_profile=loaded_agent_profile,
372 |             verbose=cfg.logging.verbose,
373 |             session_id=session_id,
374 |         )
375 |         
376 |         # Route to appropriate mode based on presence of prompt
377 |         if prompt:
378 |             # Headless mode: single prompt execution
379 |             run_headless(
380 |                 prompt=prompt,
381 |                 components=components,
382 |                 stream=stream,
383 |                 image_urls=tuple(all_image_urls),
384 |             )
385 |         else:
386 |             # Interactive mode: REPL
387 |             run_interactive(
388 |                 components=components,
389 |                 stream=stream,
390 |             )
391 |     
392 |     except KeyboardInterrupt:
393 |         console.print("\n\n[yellow]Interrupted by user[/yellow]")
394 |         sys.exit(130)
395 |     
396 |     except Exception as e:
397 |         console.print(f"\n[red]Error:[/red] {str(e)}")
398 |         if verbose:
399 |             import traceback
400 |             console.print(traceback.format_exc())
401 |         sys.exit(1)
402 | 
403 | 
404 | if __name__ == "__main__":
405 |     main()
406 | 
407 | 


--------------------------------------------------------------------------------
/codefuse/tools/builtin/edit_file.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Edit File Tool - Perform exact string replacements in files
  3 | """
  4 | 
  5 | from pathlib import Path
  6 | from typing import Optional, List, Tuple, TYPE_CHECKING
  7 | 
  8 | from codefuse.tools.base import BaseTool, ToolDefinition, ToolParameter, ToolResult
  9 | from codefuse.tools.builtin.filesystem_base import FileSystemToolMixin, MAX_TOKENS
 10 | from codefuse.observability import mainLogger
 11 | 
 12 | if TYPE_CHECKING:
 13 |     from codefuse.core.read_tracker import ReadTracker
 14 | 
 15 | 
 16 | # Edit-specific constants
 17 | CONTEXT_LINES = 4  # Number of lines to show before/after edit for confirmation
 18 | 
 19 | 
 20 | class EditFileTool(FileSystemToolMixin, BaseTool):
 21 |     """
 22 |     Tool for editing file contents with exact string replacement
 23 |     
 24 |     Features:
 25 |     - Requires file to be read before editing (safety check)
 26 |     - Exact string matching with uniqueness validation
 27 |     - Support for replace_all mode (rename variables, etc.)
 28 |     - Shows edit snippet for confirmation
 29 |     - Workspace restriction and safety checks
 30 |     """
 31 |     
 32 |     def __init__(
 33 |         self,
 34 |         workspace_root: Optional[Path] = None,
 35 |         read_tracker: Optional["ReadTracker"] = None,
 36 |     ):
 37 |         """
 38 |         Initialize EditFileTool
 39 |         
 40 |         Args:
 41 |             workspace_root: Workspace root directory to restrict file access.
 42 |                           Defaults to current working directory.
 43 |             read_tracker: Read tracker for validation that file was read before editing.
 44 |         """
 45 |         super().__init__(workspace_root=workspace_root)
 46 |         self._read_tracker = read_tracker
 47 |     
 48 |     @property
 49 |     def definition(self) -> ToolDefinition:
 50 |         """Define the edit_file tool"""
 51 |         return ToolDefinition(
 52 |             name="edit_file",
 53 |             description=(
 54 |                 "Performs exact string replacements in files.\n\n"
 55 |                 "Usage:\n"
 56 |                 "- You MUST use read_file tool at least once before editing. "
 57 |                 "This tool will error if you attempt an edit without reading the file.\n"
 58 |                 "- When editing text from read_file output, ensure you preserve the exact indentation "
 59 |                 "(tabs/spaces) as it appears AFTER the line number prefix. The line number prefix format is: "
 60 |                 "spaces + line number + → + content. Everything after the → is the actual file content to match. "
 61 |                 "Never include any part of the line number prefix in old_string or new_string.\n"
 62 |                 "- ALWAYS prefer editing existing files in the codebase. NEVER write new files unless explicitly required.\n"
 63 |                 "- The edit will FAIL if old_string is not unique in the file. Either provide a larger string "
 64 |                 "with more surrounding context to make it unique or use replace_all to change every instance.\n"
 65 |                 "- Use replace_all for replacing and renaming strings across the file. "
 66 |                 "This parameter is useful if you want to rename a variable for instance.\n\n"
 67 |                 "Important:\n"
 68 |                 "- The file_path parameter MUST be an absolute path, not a relative path\n"
 69 |                 "- old_string must match the file content exactly (including whitespace)\n"
 70 |                 "- new_string must be different from old_string"
 71 |             ),
 72 |             parameters=[
 73 |                 ToolParameter(
 74 |                     name="file_path",
 75 |                     type="string",
 76 |                     description="The absolute path to the file to modify",
 77 |                     required=True,
 78 |                 ),
 79 |                 ToolParameter(
 80 |                     name="old_string",
 81 |                     type="string",
 82 |                     description="The text to replace",
 83 |                     required=True,
 84 |                 ),
 85 |                 ToolParameter(
 86 |                     name="new_string",
 87 |                     type="string",
 88 |                     description="The text to replace it with (must be different from old_string)",
 89 |                     required=True,
 90 |                 ),
 91 |                 ToolParameter(
 92 |                     name="replace_all",
 93 |                     type="boolean",
 94 |                     description="Replace all occurrences of old_string (default false)",
 95 |                     required=False,
 96 |                 ),
 97 |             ],
 98 |             requires_confirmation=True,  # Editing is dangerous!
 99 |         )
100 |     
101 |     def _generate_edit_snippet(
102 |         self,
103 |         content: str,
104 |         replacement_line: int,
105 |         new_content: str,
106 |         context_lines: int = CONTEXT_LINES
107 |     ) -> Tuple[str, int]:
108 |         """
109 |         Generate a snippet showing the edited region with context
110 |         
111 |         Args:
112 |             content: New file content (after replacement)
113 |             replacement_line: Line number where replacement started (0-indexed)
114 |             new_content: The new string that was inserted
115 |             context_lines: Number of context lines to show before/after
116 |             
117 |         Returns:
118 |             Tuple of (formatted_snippet, start_line_number)
119 |         """
120 |         lines = content.split('\n')
121 |         num_new_lines = new_content.count('\n')
122 |         
123 |         # Calculate snippet range
124 |         start_line = max(0, replacement_line - context_lines)
125 |         end_line = min(len(lines), replacement_line + num_new_lines + 1 + context_lines)
126 |         
127 |         snippet_lines = lines[start_line:end_line]
128 |         snippet_content = '\n'.join(snippet_lines)
129 |         
130 |         # Format with line numbers using inherited method (1-indexed)
131 |         formatted_snippet = self._format_with_line_numbers(snippet_content, start_line + 1)
132 |         
133 |         return formatted_snippet, start_line + 1
134 |     
135 |     def execute(
136 |         self,
137 |         file_path: str,
138 |         old_string: str,
139 |         new_string: str,
140 |         replace_all: bool = False,
141 |         **kwargs
142 |     ) -> ToolResult:
143 |         """
144 |         Execute the edit_file tool
145 |         
146 |         Args:
147 |             file_path: Absolute path to the file to edit
148 |             old_string: Text to replace
149 |             new_string: Replacement text
150 |             replace_all: If True, replace all occurrences; if False, only unique occurrences
151 |             
152 |         Returns:
153 |             ToolResult with:
154 |                 - content: Detailed edit confirmation with snippet for LLM
155 |                 - display: User-friendly summary for UI
156 |         """
157 |         try:
158 |             # Step 1: Check if path is absolute
159 |             if error := self._check_absolute_path(file_path):
160 |                 return self._create_error_result(error, "Path must be absolute")
161 |             
162 |             # Step 2: Resolve path
163 |             resolved_path = self._resolve_path(file_path)
164 |             
165 |             # Step 3: Check if within workspace
166 |             if error := self._check_within_workspace(resolved_path):
167 |                 mainLogger.warning(f"File edit outside workspace: {error}")
168 |                 return self._create_error_result(error, "Access denied: outside workspace")
169 |             
170 |             # Step 4: Check file existence
171 |             if not resolved_path.exists():
172 |                 error_msg = f"File not found: {file_path}"
173 |                 mainLogger.error(error_msg)
174 |                 return self._create_error_result(error_msg, "File not found")
175 |             
176 |             # Step 5: Check it's a file
177 |             if not resolved_path.is_file():
178 |                 error_msg = f"Path is not a file: {file_path}"
179 |                 mainLogger.error(error_msg)
180 |                 return self._create_error_result(error_msg, "Not a file")
181 |             
182 |             # Step 6: Check if file was read
183 |             if self._read_tracker and not self._read_tracker.is_read(str(resolved_path)):
184 |                 error_msg = (
185 |                     f"File has not been read yet: {file_path}. "
186 |                     f"You must use read_file tool at least once before editing."
187 |                 )
188 |                 mainLogger.warning(error_msg)
189 |                 return self._create_error_result(
190 |                     error_msg,
191 |                     "Must read file before editing"
192 |                 )
193 |             
194 |             # Step 7: Read file with encoding fallback
195 |             try:
196 |                 file_content, encoding = self._read_with_encoding_fallback(resolved_path)
197 |             except UnicodeDecodeError as e:
198 |                 error_msg = f"Cannot read file (encoding error): {file_path}"
199 |                 mainLogger.error(f"{error_msg}: {e}")
200 |                 return self._create_error_result(error_msg, "File encoding error")
201 |             
202 |             # Step 8: Normalize tabs
203 |             file_content = file_content.expandtabs()
204 |             old_string = old_string.expandtabs()
205 |             new_string = new_string.expandtabs()
206 |             
207 |             # Step 9: Check if old_string == new_string
208 |             if old_string == new_string:
209 |                 error_msg = f"old_string is identical to new_string. No replacement needed."
210 |                 mainLogger.info(error_msg)
211 |                 return self._create_error_result(error_msg, "No changes to make")
212 |             
213 |             # Step 10: Count occurrences
214 |             occurrences = file_content.count(old_string)
215 |             
216 |             if occurrences == 0:
217 |                 error_msg = (
218 |                     f"old_string not found in file. The string to replace does not appear "
219 |                     f"verbatim in {file_path}. Make sure to match the exact content including "
220 |                     f"whitespace and indentation."
221 |                 )
222 |                 mainLogger.warning(error_msg)
223 |                 return self._create_error_result(error_msg, "String not found")
224 |             
225 |             if occurrences > 1 and not replace_all:
226 |                 occurrence_lines = self._find_occurrence_lines(file_content, old_string)
227 |                 error_msg = (
228 |                     f"Multiple occurrences of old_string found in lines {occurrence_lines}. "
229 |                     f"Please ensure it is unique by providing more context, or set replace_all=True "
230 |                     f"to replace all {occurrences} occurrences."
231 |                 )
232 |                 mainLogger.warning(error_msg)
233 |                 return self._create_error_result(
234 |                     error_msg,
235 |                     f"Not unique ({occurrences} occurrences)"
236 |                 )
237 |             
238 |             # Step 11: Perform replacement
239 |             if replace_all:
240 |                 new_file_content = file_content.replace(old_string, new_string)
241 |                 num_replacements = occurrences
242 |             else:
243 |                 # Replace only the first (and only) occurrence
244 |                 new_file_content = file_content.replace(old_string, new_string, 1)
245 |                 num_replacements = 1
246 |             
247 |             # Step 12: Check content size limit
248 |             if error := self._check_token_limit(new_file_content, MAX_TOKENS):
249 |                 mainLogger.warning(f"New content too large: {error}")
250 |                 return self._create_error_result(error, f"Content too large (>{MAX_TOKENS:,} tokens)")
251 |             
252 |             # Step 13: Write new content
253 |             try:
254 |                 resolved_path.write_text(new_file_content, encoding=encoding)
255 |             except Exception as e:
256 |                 error_msg = f"Failed to write file: {file_path}"
257 |                 mainLogger.error(f"{error_msg}: {e}", exc_info=True)
258 |                 return self._create_error_result(error_msg, f"Write failed: {str(e)}")
259 |             
260 |             # Step 14: Generate edit snippet for confirmation
261 |             replacement_line = file_content.split(old_string)[0].count('\n')
262 |             snippet, snippet_start_line = self._generate_edit_snippet(
263 |                 new_file_content,
264 |                 replacement_line,
265 |                 new_string,
266 |                 CONTEXT_LINES
267 |             )
268 |             
269 |             # Step 15: Prepare success message
270 |             action = "all occurrences" if replace_all else "occurrence"
271 |             mainLogger.info(
272 |                 f"Edited {resolved_path} ({num_replacements} {action} replaced)"
273 |             )
274 |             
275 |             result_content = (
276 |                 f"Successfully edited {file_path}. "
277 |                 f"Replaced {num_replacements} {action} of old_string with new_string.\n\n"
278 |                 f"Here's a snippet of the edited file showing the changes (lines {snippet_start_line}-"
279 |                 f"{snippet_start_line + snippet.count(chr(10))}):\n"
280 |                 f"{snippet}\n\n"
281 |                 f"Review the changes and make sure they are as expected. "
282 |                 f"Edit the file again if necessary."
283 |             )
284 |             
285 |             result_display = (
286 |                 f"✓ Edited {file_path} ({num_replacements} replacement{'s' if num_replacements > 1 else ''})"
287 |             )
288 |             
289 |             return ToolResult(content=result_content, display=result_display)
290 |             
291 |         except PermissionError as e:
292 |             error_msg = f"Permission denied editing file: {file_path}"
293 |             mainLogger.error(f"{error_msg}: {e}")
294 |             return self._create_error_result(error_msg, "Permission denied")
295 |         except Exception as e:
296 |             error_msg = f"Unexpected error editing file: {file_path}"
297 |             mainLogger.error(f"{error_msg}: {e}", exc_info=True)
298 |             return self._create_error_result(error_msg, f"Error: {str(e)}")
299 | 
300 | 


--------------------------------------------------------------------------------