├── images ├── TTS_framework.jpg ├── codefuse_logo.png ├── CFuse_Architecture.png └── oracle_vs_adversary_performance.png ├── codefuse ├── tools │ ├── utils │ │ ├── ripgrep │ │ │ ├── arm64-linux │ │ │ │ └── rg │ │ │ ├── x64-darwin │ │ │ │ └── rg │ │ │ ├── x64-linux │ │ │ │ └── rg │ │ │ ├── arm64-darwin │ │ │ │ └── rg │ │ │ ├── x64-win32 │ │ │ │ └── rg.exe │ │ │ └── COPYING │ │ ├── __init__.py │ │ └── ripgrep.py │ ├── __init__.py │ ├── builtin │ │ ├── __init__.py │ │ ├── write_file.py │ │ ├── filesystem_base.py │ │ ├── read_file.py │ │ ├── glob.py │ │ └── edit_file.py │ ├── base.py │ └── registry.py ├── cli │ ├── __init__.py │ ├── headless.py │ ├── interactive.py │ └── main.py ├── observability │ ├── metrics │ │ ├── __init__.py │ │ ├── models.py │ │ └── trackers.py │ ├── logging │ │ ├── utils.py │ │ ├── __init__.py │ │ └── setup.py │ ├── llm_messages.py │ ├── __init__.py │ ├── trajectory.py │ └── http_logger.py ├── llm │ ├── __init__.py │ ├── exceptions.py │ ├── providers │ │ ├── gemini.py │ │ └── anthropic.py │ ├── retry.py │ ├── factory.py │ └── base.py ├── core │ ├── __init__.py │ ├── read_tracker.py │ ├── remote_tool_executor.py │ ├── agent_config.py │ └── environment.py ├── __init__.py └── config.py ├── MANIFEST.in ├── requirements.txt ├── .cfuse.yaml ├── .gitignore ├── LICENSE ├── pyproject.toml └── README.md /images/TTS_framework.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/CodeFuse-Agent/main/images/TTS_framework.jpg -------------------------------------------------------------------------------- /images/codefuse_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/CodeFuse-Agent/main/images/codefuse_logo.png -------------------------------------------------------------------------------- /images/CFuse_Architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/CodeFuse-Agent/main/images/CFuse_Architecture.png -------------------------------------------------------------------------------- /codefuse/tools/utils/ripgrep/arm64-linux/rg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/CodeFuse-Agent/main/codefuse/tools/utils/ripgrep/arm64-linux/rg -------------------------------------------------------------------------------- /codefuse/tools/utils/ripgrep/x64-darwin/rg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/CodeFuse-Agent/main/codefuse/tools/utils/ripgrep/x64-darwin/rg -------------------------------------------------------------------------------- /codefuse/tools/utils/ripgrep/x64-linux/rg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/CodeFuse-Agent/main/codefuse/tools/utils/ripgrep/x64-linux/rg -------------------------------------------------------------------------------- /images/oracle_vs_adversary_performance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/CodeFuse-Agent/main/images/oracle_vs_adversary_performance.png -------------------------------------------------------------------------------- /codefuse/tools/utils/ripgrep/arm64-darwin/rg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/CodeFuse-Agent/main/codefuse/tools/utils/ripgrep/arm64-darwin/rg -------------------------------------------------------------------------------- /codefuse/tools/utils/ripgrep/x64-win32/rg.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/CodeFuse-Agent/main/codefuse/tools/utils/ripgrep/x64-win32/rg.exe -------------------------------------------------------------------------------- /codefuse/tools/utils/ripgrep/COPYING: -------------------------------------------------------------------------------- 1 | This project is dual-licensed under the Unlicense and MIT licenses. 2 | 3 | You may use this code under the terms of either license. 4 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md 2 | include LICENSE 3 | include requirements.txt 4 | include requirements-dev.txt 5 | recursive-include codefuse/tools/utils/ripgrep * 6 | recursive-include codefuse/cli/templates *.html 7 | 8 | -------------------------------------------------------------------------------- /codefuse/tools/utils/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility modules for tools 3 | """ 4 | 5 | from codefuse.tools.utils.ripgrep import find_ripgrep, execute_ripgrep 6 | 7 | __all__ = [ 8 | "find_ripgrep", 9 | "execute_ripgrep", 10 | ] 11 | 12 | -------------------------------------------------------------------------------- /codefuse/cli/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | CLI Module 3 | """ 4 | 5 | from codefuse.cli.main import main 6 | from codefuse.cli.headless import run_headless 7 | from codefuse.cli.interactive import run_interactive 8 | 9 | __all__ = ["main", "run_headless", "run_interactive"] 10 | 11 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | openai>=1.0.0 2 | rich>=13.0.0 3 | prompt-toolkit>=3.0.0 4 | pydantic>=2.0.0 5 | pyyaml>=6.0 6 | click>=8.0.0 7 | structlog>=23.0.0 8 | requests>=2.31.0 # For remote tool execution 9 | flask>=3.0.0 # Production HTTP server 10 | gunicorn>=21.0.0 # WSGI server for multi-process 11 | dnspython>=2.6.1 # Security fix for gunicorn dependency 12 | prometheus-client>=0.19.0 # Metrics collection 13 | 14 | -------------------------------------------------------------------------------- /.cfuse.yaml: -------------------------------------------------------------------------------- 1 | llm: 2 | provider: openai_compatible 3 | model: ${LLM_MODEL} 4 | api_key: ${OPENAI_API_KEY} 5 | base_url: ${LLM_BASE_URL} 6 | temperature: 0.0 7 | max_tokens: null 8 | timeout: 60 9 | 10 | agent_config: 11 | max_iterations: 200 12 | max_context_tokens: 128000 13 | enable_tools: true 14 | yolo: false 15 | agent: default 16 | 17 | logging: 18 | logs_dir: ~/.cfuse/logs 19 | verbose: false 20 | -------------------------------------------------------------------------------- /codefuse/tools/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tools Module - Built-in tools and tool registry 3 | """ 4 | 5 | from codefuse.tools.base import ( 6 | BaseTool, 7 | ToolDefinition, 8 | ToolParameter, 9 | ToolResult, 10 | ) 11 | from codefuse.tools.registry import ToolRegistry 12 | 13 | __all__ = [ 14 | "BaseTool", 15 | "ToolDefinition", 16 | "ToolParameter", 17 | "ToolResult", 18 | "ToolRegistry", 19 | ] 20 | 21 | -------------------------------------------------------------------------------- /codefuse/tools/builtin/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Built-in Tools 3 | """ 4 | 5 | from codefuse.tools.builtin.read_file import ReadFileTool 6 | from codefuse.tools.builtin.write_file import WriteFileTool 7 | from codefuse.tools.builtin.edit_file import EditFileTool 8 | from codefuse.tools.builtin.list_directory import ListDirectoryTool 9 | from codefuse.tools.builtin.grep import GrepTool 10 | from codefuse.tools.builtin.glob import GlobTool 11 | from codefuse.tools.builtin.bash import BashTool 12 | 13 | __all__ = [ 14 | "ReadFileTool", 15 | "WriteFileTool", 16 | "EditFileTool", 17 | "ListDirectoryTool", 18 | "GrepTool", 19 | "GlobTool", 20 | "BashTool", 21 | ] 22 | 23 | -------------------------------------------------------------------------------- /codefuse/observability/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Metrics Module - Hierarchical metrics collection for Agent sessions 3 | """ 4 | 5 | from .models import ( 6 | ToolCallMetric, 7 | APICallMetric, 8 | PromptMetric, 9 | SessionMetric, 10 | ) 11 | from .trackers import ( 12 | ToolCallTracker, 13 | APICallTracker, 14 | PromptTracker, 15 | ) 16 | from .collector import MetricsCollector 17 | 18 | __all__ = [ 19 | # Models 20 | "ToolCallMetric", 21 | "APICallMetric", 22 | "PromptMetric", 23 | "SessionMetric", 24 | # Trackers 25 | "ToolCallTracker", 26 | "APICallTracker", 27 | "PromptTracker", 28 | # Collector 29 | "MetricsCollector", 30 | ] 31 | 32 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Python 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | *.so 6 | .Python 7 | build/ 8 | develop-eggs/ 9 | dist/ 10 | downloads/ 11 | eggs/ 12 | .eggs/ 13 | lib/ 14 | lib64/ 15 | parts/ 16 | sdist/ 17 | var/ 18 | wheels/ 19 | *.egg-info/ 20 | .installed.cfg 21 | *.egg 22 | 23 | # Virtual Environment 24 | venv/ 25 | env/ 26 | ENV/ 27 | .venv 28 | 29 | # IDE 30 | .vscode/ 31 | .idea/ 32 | *.swp 33 | *.swo 34 | *~ 35 | .DS_Store 36 | 37 | # Testing 38 | .pytest_cache/ 39 | .coverage 40 | htmlcov/ 41 | .tox/ 42 | 43 | # Logs 44 | *.log 45 | logs/ 46 | *.jsonl 47 | 48 | # Config 49 | .codefuse.yaml 50 | !.codefuse.yaml.example 51 | 52 | # Trajectory data 53 | trajectories/ 54 | sessions/ 55 | 56 | # Type checking 57 | .mypy_cache/ 58 | .pytype/ 59 | 60 | -------------------------------------------------------------------------------- /codefuse/observability/logging/utils.py: -------------------------------------------------------------------------------- 1 | """Utility functions for logging module""" 2 | 3 | import os 4 | 5 | 6 | def path_to_slug(path: str) -> str: 7 | """ 8 | Convert file path to slug suitable for directory names 9 | 10 | Examples: 11 | /Users/mingmu/projects/app -> Users-mingmu-projects-app 12 | /home/user/my project -> home-user-my_project 13 | """ 14 | abs_path = os.path.abspath(path) 15 | 16 | # Remove leading slash 17 | if abs_path.startswith('/'): 18 | abs_path = abs_path[1:] 19 | 20 | # Replace slashes and spaces 21 | slug = abs_path.replace('/', '-').replace(' ', '_') 22 | 23 | # Handle Windows drive letters (C: -> C-) 24 | if os.name == 'nt' and ':' in slug: 25 | slug = slug.replace(':', '-') 26 | 27 | return slug 28 | 29 | -------------------------------------------------------------------------------- /codefuse/observability/logging/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Logging Module - Unified logging using structlog 3 | 4 | Provides mainLogger for debug logging: 5 | - mainLogger: Fine-grained debug logs (file only, append mode) 6 | 7 | Trajectory and LLM messages are now handled by dedicated writers 8 | in the observability module. 9 | 10 | Usage: 11 | from codefuse.observability.logging import setup_logging, mainLogger 12 | 13 | setup_logging(session_id="session-123", verbose=True) 14 | 15 | # Simple logging with structured data 16 | mainLogger.info("tool executed", tool="read_file", duration=0.5) 17 | 18 | # With context binding 19 | request_logger = mainLogger.bind(request_id="req-123") 20 | request_logger.info("processing step", step=1) 21 | """ 22 | 23 | from .setup import ( 24 | setup_logging, 25 | mainLogger, 26 | get_session_dir, 27 | close_all_loggers, 28 | ) 29 | 30 | __all__ = [ 31 | "setup_logging", 32 | "mainLogger", 33 | "get_session_dir", 34 | "close_all_loggers", 35 | ] 36 | 37 | -------------------------------------------------------------------------------- /codefuse/llm/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | LLM Module - Unified interface for various language models 3 | """ 4 | 5 | from codefuse.llm.base import ( 6 | BaseLLM, 7 | Message, 8 | MessageRole, 9 | ContentBlock, 10 | Tool, 11 | ToolCall, 12 | TokenUsage, 13 | LLMResponse, 14 | LLMChunk, 15 | ) 16 | from codefuse.llm.factory import create_llm 17 | from codefuse.llm.exceptions import ( 18 | LLMError, 19 | RetryableError, 20 | TimeoutError, 21 | RateLimitError, 22 | APIError, 23 | AuthenticationError, 24 | ContextLengthExceededError, 25 | ) 26 | 27 | __all__ = [ 28 | # Base classes and data structures 29 | "BaseLLM", 30 | "Message", 31 | "MessageRole", 32 | "ContentBlock", 33 | "Tool", 34 | "ToolCall", 35 | "TokenUsage", 36 | "LLMResponse", 37 | "LLMChunk", 38 | # Factory 39 | "create_llm", 40 | # Exceptions 41 | "LLMError", 42 | "RetryableError", 43 | "TimeoutError", 44 | "RateLimitError", 45 | "APIError", 46 | "AuthenticationError", 47 | "ContextLengthExceededError", 48 | ] 49 | 50 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 CodeFuse Team 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /codefuse/core/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Core Module - Agent loop, context engine (unified context and session management) 3 | """ 4 | 5 | from codefuse.core.environment import EnvironmentInfo 6 | from codefuse.core.context_engine import ContextEngine 7 | from codefuse.core.agent_config import AgentProfile, AgentProfileManager 8 | from codefuse.core.read_tracker import ReadTracker 9 | from codefuse.core.agent_loop import AgentLoop, AgentEvent 10 | from codefuse.core.tool_executor import ToolExecutor 11 | from codefuse.observability import ( 12 | setup_logging, 13 | mainLogger, 14 | get_session_dir, 15 | close_all_loggers, 16 | MetricsCollector, 17 | ) 18 | 19 | # For backward compatibility, Session is now an alias to ContextEngine 20 | Session = ContextEngine 21 | 22 | __all__ = [ 23 | "EnvironmentInfo", 24 | "ContextEngine", 25 | "Session", # Alias for backward compatibility 26 | "ReadTracker", 27 | "AgentProfile", 28 | "AgentProfileManager", 29 | "AgentLoop", 30 | "AgentEvent", 31 | "ToolExecutor", 32 | "setup_logging", 33 | "mainLogger", 34 | "get_session_dir", 35 | "close_all_loggers", 36 | "MetricsCollector", 37 | ] 38 | 39 | -------------------------------------------------------------------------------- /codefuse/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | CodeFuse Agent - A lightweight, high-performance AI programming assistant framework 3 | """ 4 | 5 | import importlib.metadata 6 | 7 | try: 8 | __version__ = importlib.metadata.version("cfuse") 9 | except importlib.metadata.PackageNotFoundError: 10 | # Development mode fallback 11 | __version__ = "0.1.0" 12 | 13 | from codefuse.llm import create_llm, Message, MessageRole, Tool, LLMResponse 14 | from codefuse.tools import BaseTool, ToolDefinition, ToolParameter, ToolResult, ToolRegistry 15 | from codefuse.core import ( 16 | EnvironmentInfo, 17 | Session, 18 | AgentProfile, 19 | AgentProfileManager, 20 | ContextEngine, 21 | AgentLoop, 22 | AgentEvent, 23 | ) 24 | from codefuse.config import Config 25 | 26 | __all__ = [ 27 | # LLM 28 | "create_llm", 29 | "Message", 30 | "MessageRole", 31 | "Tool", 32 | "LLMResponse", 33 | # Tools 34 | "BaseTool", 35 | "ToolDefinition", 36 | "ToolParameter", 37 | "ToolResult", 38 | "ToolRegistry", 39 | # Core 40 | "EnvironmentInfo", 41 | "Session", 42 | "AgentProfile", 43 | "AgentProfileManager", 44 | "ContextEngine", 45 | "AgentLoop", 46 | "AgentEvent", 47 | # Config 48 | "Config", 49 | ] 50 | 51 | -------------------------------------------------------------------------------- /codefuse/observability/llm_messages.py: -------------------------------------------------------------------------------- 1 | """ 2 | LLM Messages Writer - Records latest LLM messages snapshot 3 | """ 4 | 5 | import json 6 | from datetime import datetime, timezone 7 | from pathlib import Path 8 | from typing import Dict, Any 9 | 10 | 11 | class LLMMessagesWriter: 12 | """ 13 | Writes the latest LLM messages snapshot to a JSON file 14 | 15 | This writer uses overwrite mode, keeping only the most recent state. 16 | Useful for debugging and inspecting the current conversation context. 17 | """ 18 | 19 | def __init__(self, file_path: Path): 20 | """ 21 | Initialize LLM messages writer 22 | 23 | Args: 24 | file_path: Path to the LLM messages JSON file 25 | """ 26 | self.file_path = Path(file_path) 27 | self.file_path.parent.mkdir(parents=True, exist_ok=True) 28 | 29 | def write(self, formatted_data: Dict[str, Any]): 30 | """ 31 | Write LLM messages snapshot (overwrites existing file) 32 | 33 | Args: 34 | formatted_data: Formatted data containing messages and tools 35 | Expected keys: 'messages', 'tools', and optionally 'session_id' 36 | """ 37 | # Build complete snapshot 38 | snapshot = { 39 | 'timestamp': datetime.now(timezone.utc).isoformat(), 40 | **formatted_data 41 | } 42 | 43 | # Write to file (overwrite mode) 44 | with open(self.file_path, 'w', encoding='utf-8') as f: 45 | json.dump(snapshot, f, ensure_ascii=False, indent=2) 46 | 47 | def close(self): 48 | """Close method for consistency (no-op for this writer)""" 49 | pass 50 | 51 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "cfuse" 3 | version = "0.1.0" 4 | description = "CodeFuse-Agent: A lightweight, high-performance AI programming assistant framework" 5 | authors = [{name = "CodeFuse Team"}] 6 | readme = "README.md" 7 | requires-python = ">=3.10" 8 | license = {text = "MIT"} 9 | 10 | dependencies = [ 11 | "openai>=1.0.0", 12 | "rich>=13.0.0", 13 | "prompt-toolkit>=3.0.0", 14 | "pydantic>=2.0.0", 15 | "pyyaml>=6.0", 16 | "click>=8.0.0", 17 | "structlog>=23.0.0", 18 | "flask>=3.0.0", 19 | "gunicorn>=21.0.0", 20 | "dnspython>=2.6.1", 21 | "prometheus-client>=0.19.0", 22 | ] 23 | 24 | [project.optional-dependencies] 25 | dev = [ 26 | "pytest>=7.0.0", 27 | "pytest-asyncio>=0.21.0", 28 | "black>=23.0.0", 29 | "ruff>=0.1.0", 30 | "mypy>=1.0.0", 31 | ] 32 | 33 | anthropic = [ 34 | "anthropic>=0.8.0", 35 | ] 36 | 37 | gemini = [ 38 | "google-generativeai>=0.3.0", 39 | ] 40 | 41 | # Optional: Python-based ripgrep as fallback 42 | ripgrep = [ 43 | "ripgrep-python>=0.1.0", 44 | ] 45 | 46 | [project.scripts] 47 | cfuse = "codefuse.cli.main:main" 48 | 49 | [build-system] 50 | requires = ["hatchling"] 51 | build-backend = "hatchling.build" 52 | 53 | [tool.hatch.build.targets.wheel] 54 | packages = ["codefuse"] 55 | 56 | [tool.hatch.build.targets.wheel.force-include] 57 | "codefuse/tools/utils/ripgrep" = "codefuse/tools/utils/ripgrep" 58 | "codefuse/cli/templates" = "codefuse/cli/templates" 59 | 60 | [tool.black] 61 | line-length = 100 62 | target-version = ['py310'] 63 | 64 | [tool.ruff] 65 | line-length = 100 66 | select = ["E", "F", "I", "N", "W"] 67 | 68 | [tool.mypy] 69 | python_version = "3.10" 70 | warn_return_any = true 71 | warn_unused_configs = true 72 | ignore_missing_imports = true 73 | 74 | -------------------------------------------------------------------------------- /codefuse/observability/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Observability Module - Logging, Trajectory, and Metrics for CodeFuse Agent 3 | 4 | This module provides comprehensive observability capabilities including: 5 | - Debug logging using structlog (JSONL format) 6 | - Trajectory recording (event stream in JSONL) 7 | - LLM messages snapshots (JSON) 8 | - Hierarchical metrics collection 9 | - Session tracking and analysis 10 | - HTTP server logging with rotation and cleanup 11 | """ 12 | 13 | # Logging exports 14 | from .logging import ( 15 | setup_logging, 16 | mainLogger, 17 | get_session_dir, 18 | close_all_loggers, 19 | ) 20 | 21 | # HTTP logging exports 22 | from .http_logger import ( 23 | HTTPLogger, 24 | create_http_logger, 25 | ) 26 | 27 | # Writer exports 28 | from .trajectory import TrajectoryWriter 29 | from .llm_messages import LLMMessagesWriter 30 | 31 | # Metrics exports 32 | from .metrics import ( 33 | # Models 34 | ToolCallMetric, 35 | APICallMetric, 36 | PromptMetric, 37 | SessionMetric, 38 | # Trackers 39 | ToolCallTracker, 40 | APICallTracker, 41 | PromptTracker, 42 | # Collector 43 | MetricsCollector, 44 | ) 45 | 46 | __all__ = [ 47 | # Logging 48 | "setup_logging", 49 | "mainLogger", 50 | "get_session_dir", 51 | "close_all_loggers", 52 | # HTTP Logging 53 | "HTTPLogger", 54 | "create_http_logger", 55 | # Writers 56 | "TrajectoryWriter", 57 | "LLMMessagesWriter", 58 | # Metrics - Models 59 | "ToolCallMetric", 60 | "APICallMetric", 61 | "PromptMetric", 62 | "SessionMetric", 63 | # Metrics - Trackers 64 | "ToolCallTracker", 65 | "APICallTracker", 66 | "PromptTracker", 67 | # Metrics - Collector 68 | "MetricsCollector", 69 | ] 70 | 71 | -------------------------------------------------------------------------------- /codefuse/observability/metrics/models.py: -------------------------------------------------------------------------------- 1 | """ 2 | Metrics Models - Data classes for hierarchical metrics 3 | """ 4 | 5 | from dataclasses import dataclass, field 6 | from typing import Optional, List, Dict, Any 7 | 8 | 9 | @dataclass 10 | class ToolCallMetric: 11 | """Metrics for a single tool call""" 12 | tool_call_id: str 13 | tool_name: str 14 | start_time: str 15 | end_time: Optional[str] = None 16 | duration: Optional[float] = None # seconds 17 | success: bool = True 18 | error: Optional[str] = None 19 | arguments: Optional[Dict[str, Any]] = None 20 | 21 | 22 | @dataclass 23 | class APICallMetric: 24 | """Metrics for a single API call""" 25 | api_id: str 26 | start_time: str 27 | end_time: Optional[str] = None 28 | duration: Optional[float] = None # seconds 29 | prompt_tokens: Optional[int] = None 30 | completion_tokens: Optional[int] = None 31 | total_tokens: Optional[int] = None 32 | cache_creation_tokens: Optional[int] = None 33 | cache_read_tokens: Optional[int] = None 34 | success: bool = True 35 | error: Optional[str] = None 36 | model: Optional[str] = None 37 | finish_reason: Optional[str] = None 38 | 39 | 40 | @dataclass 41 | class PromptMetric: 42 | """Metrics for a single user prompt/query""" 43 | prompt_id: str 44 | user_query: str 45 | start_time: str 46 | end_time: Optional[str] = None 47 | duration: Optional[float] = None # seconds 48 | iterations: int = 0 49 | api_calls: List[APICallMetric] = field(default_factory=list) 50 | tool_calls: List[ToolCallMetric] = field(default_factory=list) 51 | 52 | 53 | @dataclass 54 | class SessionMetric: 55 | """Metrics for entire session""" 56 | session_id: str 57 | start_time: str 58 | end_time: Optional[str] = None 59 | duration: Optional[float] = None # seconds 60 | total_prompts: int = 0 61 | prompts: List[PromptMetric] = field(default_factory=list) 62 | 63 | -------------------------------------------------------------------------------- /codefuse/core/read_tracker.py: -------------------------------------------------------------------------------- 1 | """ 2 | Read Tracker - Tracks which files have been read in the current session 3 | 4 | This module provides file read tracking for edit tool validation. 5 | The edit_file tool requires files to be read before editing to prevent 6 | accidental modifications to files the agent hasn't seen. 7 | """ 8 | 9 | from pathlib import Path 10 | from typing import Set 11 | 12 | from codefuse.observability import mainLogger 13 | 14 | 15 | class ReadTracker: 16 | """ 17 | File read tracker for edit tool validation 18 | 19 | Tracks which files have been read in the current session. 20 | Used by EditFileTool to ensure files are read before editing. 21 | """ 22 | 23 | def __init__(self): 24 | """Initialize empty read tracker""" 25 | self._read_files: Set[str] = set() 26 | 27 | def mark_as_read(self, file_path: str) -> None: 28 | """ 29 | Mark a file as having been read 30 | 31 | Args: 32 | file_path: Path to the file that was read 33 | """ 34 | resolved_path = str(Path(file_path).resolve()) 35 | self._read_files.add(resolved_path) 36 | mainLogger.debug("Marked file as read", file_path=resolved_path) 37 | 38 | def is_read(self, file_path: str) -> bool: 39 | """ 40 | Check if a file has been read 41 | 42 | Args: 43 | file_path: Path to check 44 | 45 | Returns: 46 | True if the file has been read, False otherwise 47 | """ 48 | resolved_path = str(Path(file_path).resolve()) 49 | return resolved_path in self._read_files 50 | 51 | def clear(self) -> None: 52 | """ 53 | Clear the read file tracking 54 | 55 | This can be used to reset the tracking state, for example 56 | when starting a new user query. 57 | """ 58 | self._read_files.clear() 59 | mainLogger.debug("Cleared read file tracking") 60 | 61 | -------------------------------------------------------------------------------- /codefuse/llm/exceptions.py: -------------------------------------------------------------------------------- 1 | """ 2 | LLM Exception Classes 3 | """ 4 | 5 | from typing import Optional 6 | 7 | 8 | class LLMError(Exception): 9 | """Base exception for all LLM-related errors""" 10 | pass 11 | 12 | 13 | class RetryableError(LLMError): 14 | """Base class for errors that can be retried""" 15 | pass 16 | 17 | 18 | class TimeoutError(RetryableError): 19 | """Request timeout error - will be retried""" 20 | def __init__(self, message: str, original_error: Optional[Exception] = None): 21 | super().__init__(message) 22 | self.original_error = original_error 23 | 24 | 25 | class RateLimitError(RetryableError): 26 | """Rate limit exceeded error - will be retried with backoff""" 27 | def __init__( 28 | self, 29 | message: str, 30 | retry_after: Optional[float] = None, 31 | original_error: Optional[Exception] = None 32 | ): 33 | super().__init__(message) 34 | self.retry_after = retry_after # Seconds to wait before retry 35 | self.original_error = original_error 36 | 37 | 38 | class APIError(LLMError): 39 | """General API error - not retryable""" 40 | def __init__( 41 | self, 42 | message: str, 43 | status_code: Optional[int] = None, 44 | original_error: Optional[Exception] = None 45 | ): 46 | super().__init__(message) 47 | self.status_code = status_code 48 | self.original_error = original_error 49 | 50 | 51 | class AuthenticationError(LLMError): 52 | """Authentication failed - invalid API key or credentials""" 53 | pass 54 | 55 | 56 | class ContextLengthExceededError(LLMError): 57 | """Context length exceeded the model's maximum""" 58 | def __init__(self, message: str, max_tokens: Optional[int] = None): 59 | super().__init__(message) 60 | self.max_tokens = max_tokens 61 | 62 | 63 | class InvalidRequestError(LLMError): 64 | """Invalid request parameters""" 65 | pass 66 | 67 | 68 | class ModelNotFoundError(LLMError): 69 | """Requested model not found or not available""" 70 | pass 71 | 72 | -------------------------------------------------------------------------------- /codefuse/llm/providers/gemini.py: -------------------------------------------------------------------------------- 1 | """ 2 | Google Gemini LLM Implementation (Placeholder) 3 | 4 | To be implemented with Gemini-specific features: 5 | - Native Gemini SDK integration 6 | - Gemini-specific parameters 7 | """ 8 | 9 | import logging 10 | from typing import List, Optional, Union, Iterator 11 | 12 | from codefuse.llm.base import BaseLLM, Message, Tool, LLMResponse, LLMChunk 13 | 14 | from codefuse.observability import mainLogger 15 | 16 | 17 | class GeminiLLM(BaseLLM): 18 | """ 19 | Google Gemini LLM implementation 20 | 21 | TODO: Implement Gemini-specific features: 22 | - Native Gemini SDK integration 23 | - Gemini-specific parameters and settings 24 | - Function calling format conversion 25 | - Multimodal support 26 | """ 27 | 28 | def __init__(self, **kwargs): 29 | """Initialize Gemini client""" 30 | super().__init__(**kwargs) 31 | mainLogger.warning( 32 | "GeminiLLM is not yet implemented. " 33 | "Please use OpenAICompatibleLLM or implement this class." 34 | ) 35 | raise NotImplementedError( 36 | "GeminiLLM is a placeholder. " 37 | "Use provider='openai_compatible' for now." 38 | ) 39 | 40 | @property 41 | def supports_prompt_caching(self) -> bool: 42 | """Check if Gemini supports caching""" 43 | return False # TODO: Verify Gemini's caching capabilities 44 | 45 | @property 46 | def supports_parallel_tools(self) -> bool: 47 | """Check if Gemini supports parallel function calls""" 48 | return True # TODO: Verify Gemini's parallel tool support 49 | 50 | def generate( 51 | self, 52 | messages: List[Message], 53 | tools: Optional[List[Tool]] = None, 54 | temperature: Optional[float] = None, 55 | max_tokens: Optional[int] = None, 56 | stream: bool = False, 57 | **kwargs 58 | ) -> Union[LLMResponse, Iterator[LLMChunk]]: 59 | """Generate completion using Gemini API""" 60 | raise NotImplementedError("GeminiLLM.generate() not yet implemented") 61 | 62 | -------------------------------------------------------------------------------- /codefuse/observability/trajectory.py: -------------------------------------------------------------------------------- 1 | """ 2 | Trajectory Writer - Records agent execution events to JSONL format 3 | """ 4 | 5 | import json 6 | from datetime import datetime, timezone 7 | from pathlib import Path 8 | from typing import Optional, Dict, Any 9 | 10 | 11 | class TrajectoryWriter: 12 | """ 13 | Writes agent execution trajectory events to a JSONL file 14 | 15 | Each event is a JSON object on a separate line, enabling: 16 | - Streaming writes (append-only) 17 | - Real-time monitoring (tail -f) 18 | - Easy parsing (line-by-line) 19 | """ 20 | 21 | def __init__(self, file_path: Path): 22 | """ 23 | Initialize trajectory writer 24 | 25 | Args: 26 | file_path: Path to the trajectory JSONL file 27 | """ 28 | self.file_path = Path(file_path) 29 | self._file_handle: Optional[Any] = None 30 | self._opened = False 31 | 32 | def _ensure_open(self): 33 | """Ensure file handle is open""" 34 | if not self._opened: 35 | self.file_path.parent.mkdir(parents=True, exist_ok=True) 36 | self._file_handle = open(self.file_path, 'a', encoding='utf-8') 37 | self._opened = True 38 | 39 | def write(self, event_data: Dict[str, Any]): 40 | """ 41 | Write a single event to the trajectory file 42 | 43 | Automatically adds timestamp if not present. 44 | 45 | Args: 46 | event_data: Event data dictionary 47 | """ 48 | self._ensure_open() 49 | 50 | # Add timestamp if not present 51 | if 'timestamp' not in event_data: 52 | event_data['timestamp'] = datetime.now(timezone.utc).isoformat() 53 | 54 | # Write as single line JSON 55 | json_line = json.dumps(event_data, ensure_ascii=False) 56 | self._file_handle.write(json_line + '\n') 57 | self._file_handle.flush() 58 | 59 | def write_summary(self, summary_data: Dict[str, Any]): 60 | """ 61 | Write session summary event 62 | 63 | This is typically called at the end of a session. 64 | 65 | Args: 66 | summary_data: Summary data from MetricsCollector 67 | """ 68 | event = { 69 | 'event_type': 'session_summary', 70 | 'timestamp': datetime.now(timezone.utc).isoformat(), 71 | **summary_data 72 | } 73 | self.write(event) 74 | 75 | def close(self): 76 | """Close the file handle""" 77 | if self._opened and self._file_handle: 78 | self._file_handle.close() 79 | self._opened = False 80 | self._file_handle = None 81 | 82 | def __enter__(self): 83 | """Context manager entry""" 84 | self._ensure_open() 85 | return self 86 | 87 | def __exit__(self, exc_type, exc_val, exc_tb): 88 | """Context manager exit""" 89 | self.close() 90 | return False 91 | 92 | -------------------------------------------------------------------------------- /codefuse/observability/metrics/trackers.py: -------------------------------------------------------------------------------- 1 | """ 2 | Metrics Trackers - Context managers for tracking execution metrics 3 | """ 4 | 5 | import time 6 | from datetime import datetime, timezone 7 | from typing import Optional 8 | 9 | from .models import ToolCallMetric, APICallMetric, PromptMetric, SessionMetric 10 | 11 | 12 | class ToolCallTracker: 13 | """Context manager for tracking tool call execution""" 14 | 15 | def __init__(self, metric: ToolCallMetric, parent_prompt: PromptMetric): 16 | self.metric = metric 17 | self.parent_prompt = parent_prompt 18 | self._start_time = time.time() 19 | 20 | def set_error(self, error: str): 21 | """Set error for this tool call""" 22 | self.metric.success = False 23 | self.metric.error = error 24 | 25 | def set_success(self, success: bool = True): 26 | """Set success status""" 27 | self.metric.success = success 28 | 29 | def __enter__(self): 30 | return self 31 | 32 | def __exit__(self, exc_type, exc_val, exc_tb): 33 | end = time.time() 34 | self.metric.end_time = datetime.now(timezone.utc).isoformat() 35 | self.metric.duration = end - self._start_time 36 | 37 | if exc_type is not None: 38 | self.metric.success = False 39 | self.metric.error = str(exc_val) 40 | 41 | return False # Don't suppress exceptions 42 | 43 | 44 | class APICallTracker: 45 | """Context manager for tracking API call""" 46 | 47 | def __init__(self, metric: APICallMetric, parent_prompt: PromptMetric): 48 | self.metric = metric 49 | self.parent_prompt = parent_prompt 50 | self._start_time = time.time() 51 | 52 | def set_tokens( 53 | self, 54 | prompt_tokens: int, 55 | completion_tokens: int, 56 | total_tokens: int, 57 | cache_creation_tokens: Optional[int] = None, 58 | cache_read_tokens: Optional[int] = None, 59 | ): 60 | """Set token usage information""" 61 | self.metric.prompt_tokens = prompt_tokens 62 | self.metric.completion_tokens = completion_tokens 63 | self.metric.total_tokens = total_tokens 64 | self.metric.cache_creation_tokens = cache_creation_tokens 65 | self.metric.cache_read_tokens = cache_read_tokens 66 | 67 | def set_model(self, model: str): 68 | """Set model name""" 69 | self.metric.model = model 70 | 71 | def set_finish_reason(self, finish_reason: str): 72 | """Set finish reason""" 73 | self.metric.finish_reason = finish_reason 74 | 75 | def set_error(self, error: str): 76 | """Set error for this API call""" 77 | self.metric.success = False 78 | self.metric.error = error 79 | 80 | def set_success(self, success: bool = True): 81 | """Set success status""" 82 | self.metric.success = success 83 | 84 | def __enter__(self): 85 | return self 86 | 87 | def __exit__(self, exc_type, exc_val, exc_tb): 88 | end = time.time() 89 | self.metric.end_time = datetime.now(timezone.utc).isoformat() 90 | self.metric.duration = end - self._start_time 91 | 92 | if exc_type is not None: 93 | self.metric.success = False 94 | self.metric.error = str(exc_val) 95 | 96 | return False # Don't suppress exceptions 97 | 98 | 99 | class PromptTracker: 100 | """Context manager for tracking a prompt/query""" 101 | 102 | def __init__(self, metric: PromptMetric, session: SessionMetric): 103 | self.metric = metric 104 | self.session = session 105 | self._start_time = time.time() 106 | 107 | def increment_iteration(self): 108 | """Increment iteration count""" 109 | self.metric.iterations += 1 110 | 111 | def __enter__(self): 112 | return self 113 | 114 | def __exit__(self, exc_type, exc_val, exc_tb): 115 | end = time.time() 116 | self.metric.end_time = datetime.now(timezone.utc).isoformat() 117 | self.metric.duration = end - self._start_time 118 | 119 | return False # Don't suppress exceptions 120 | 121 | -------------------------------------------------------------------------------- /codefuse/observability/logging/setup.py: -------------------------------------------------------------------------------- 1 | """Unified logging configuration using structlog""" 2 | 3 | import os 4 | import logging 5 | import structlog 6 | from pathlib import Path 7 | from typing import Optional 8 | from .utils import path_to_slug 9 | 10 | # State tracking 11 | _logging_initialized = False 12 | _session_dir: Optional[Path] = None 13 | 14 | def _json_formatter(logger, method_name, event_dict): 15 | """Custom formatter that outputs clean JSON lines""" 16 | import json 17 | from datetime import datetime, timezone 18 | 19 | # Build JSON structure 20 | log_data = { 21 | "timestamp": datetime.now(timezone.utc).isoformat(), 22 | "level": event_dict.pop("level", "info"), 23 | } 24 | 25 | # Add logger name if present 26 | if "logger" in event_dict: 27 | log_data["logger"] = event_dict.pop("logger") 28 | 29 | # Add event/message 30 | if "event" in event_dict: 31 | log_data["message"] = event_dict.pop("event") 32 | 33 | # Add all remaining fields 34 | log_data.update(event_dict) 35 | 36 | return json.dumps(log_data, ensure_ascii=False) 37 | 38 | 39 | # Configure standard logging backend with NullHandler (silent before setup) 40 | stdlib_logger = logging.getLogger("codefuse.main") 41 | stdlib_logger.addHandler(logging.NullHandler()) 42 | stdlib_logger.propagate = False 43 | stdlib_logger.setLevel(logging.DEBUG) 44 | 45 | # Configure structlog once at module load time 46 | structlog.configure( 47 | processors=[ 48 | structlog.stdlib.add_log_level, 49 | structlog.stdlib.PositionalArgumentsFormatter(), 50 | structlog.processors.StackInfoRenderer(), 51 | structlog.processors.format_exc_info, 52 | _json_formatter, 53 | ], 54 | wrapper_class=structlog.stdlib.BoundLogger, 55 | context_class=dict, 56 | logger_factory=structlog.stdlib.LoggerFactory(), 57 | cache_logger_on_first_use=True, 58 | ) 59 | 60 | # Create global logger instance (ready to use, silent before setup) 61 | mainLogger = structlog.get_logger("codefuse.main") 62 | 63 | 64 | def setup_logging( 65 | session_id: str, 66 | workspace_path: Optional[str] = None, 67 | logs_dir: str = "~/.cfuse/logs", 68 | verbose: bool = False, 69 | ) -> Path: 70 | """ 71 | Setup file handler for logging 72 | 73 | Configures mainLogger for debug logs (file only, append mode). 74 | Trajectory and LLM messages are now handled by dedicated writers. 75 | 76 | Args: 77 | session_id: Unique session identifier 78 | workspace_path: Workspace path (default: cwd) 79 | logs_dir: Base logs directory 80 | verbose: Enable console output (currently unused, kept for compatibility) 81 | 82 | Returns: 83 | Session directory path 84 | """ 85 | global _logging_initialized, _session_dir 86 | 87 | if _logging_initialized: 88 | return _session_dir 89 | 90 | # Prepare session directory 91 | workspace_path = workspace_path or os.getcwd() 92 | base_logs_dir = Path(logs_dir).expanduser() 93 | workspace_slug = path_to_slug(workspace_path) 94 | session_dir = base_logs_dir / workspace_slug / session_id 95 | session_dir.mkdir(parents=True, exist_ok=True) 96 | _session_dir = session_dir 97 | 98 | # Configure main logger: DEBUG level, file only 99 | main_logger = logging.getLogger("codefuse.main") 100 | main_logger.handlers.clear() # Remove NullHandler 101 | main_handler = logging.FileHandler(session_dir / "main.log", mode='a', encoding='utf-8') 102 | main_handler.setLevel(logging.DEBUG) 103 | main_logger.addHandler(main_handler) 104 | 105 | _logging_initialized = True 106 | 107 | mainLogger.info( 108 | "Logging initialized", 109 | session_id=session_id, 110 | workspace=workspace_path, 111 | logs_dir=str(session_dir), 112 | verbose=verbose, 113 | ) 114 | 115 | return session_dir 116 | 117 | 118 | def get_session_dir() -> Optional[Path]: 119 | """Get the current session directory path""" 120 | return _session_dir 121 | 122 | 123 | def close_all_loggers(): 124 | """Close all logger handlers and flush buffers""" 125 | logger = logging.getLogger("codefuse.main") 126 | for handler in logger.handlers[:]: 127 | handler.close() 128 | logger.removeHandler(handler) 129 | -------------------------------------------------------------------------------- /codefuse/llm/retry.py: -------------------------------------------------------------------------------- 1 | """ 2 | Retry Logic for LLM Requests 3 | """ 4 | 5 | import time 6 | from functools import wraps 7 | from typing import Callable, Tuple, Type, Optional 8 | 9 | from codefuse.llm.exceptions import RetryableError, RateLimitError, TimeoutError 10 | from codefuse.observability import mainLogger 11 | 12 | 13 | def retry_on_failure( 14 | max_retries: int = 3, 15 | initial_delay: float = 1.0, 16 | exponential_base: float = 2.0, 17 | retryable_exceptions: Tuple[Type[Exception], ...] = (RetryableError, RateLimitError, TimeoutError) 18 | ): 19 | """ 20 | Decorator to retry function calls on specific exceptions 21 | 22 | Retry Strategy: 23 | - Timeout errors: Retry with exponential backoff 24 | - Rate limit errors (429): Retry with exponential backoff or Retry-After header 25 | - Other errors: Raise immediately 26 | 27 | Args: 28 | max_retries: Maximum number of retry attempts (default: 3) 29 | initial_delay: Initial delay in seconds (default: 1.0) 30 | exponential_base: Base for exponential backoff (default: 2.0) 31 | retryable_exceptions: Tuple of exception types that should trigger retry 32 | 33 | Returns: 34 | Decorated function that will retry on retryable errors 35 | """ 36 | def decorator(func: Callable) -> Callable: 37 | @wraps(func) 38 | def wrapper(*args, **kwargs): 39 | last_exception: Optional[Exception] = None 40 | 41 | for attempt in range(max_retries): 42 | try: 43 | return func(*args, **kwargs) 44 | 45 | except retryable_exceptions as e: 46 | last_exception = e 47 | 48 | # If this was the last attempt, raise the exception 49 | if attempt == max_retries - 1: 50 | mainLogger.error( 51 | f"Failed after {max_retries} attempts: {type(e).__name__}: {e}" 52 | ) 53 | raise 54 | 55 | # Calculate wait time 56 | if isinstance(e, RateLimitError) and e.retry_after: 57 | # Use the Retry-After value from the API response 58 | wait_time = e.retry_after 59 | mainLogger.warning( 60 | f"Rate limit hit. Waiting {wait_time:.1f}s as specified by API." 61 | ) 62 | else: 63 | # Exponential backoff: 1s, 2s, 4s, 8s, etc. 64 | wait_time = initial_delay * (exponential_base ** attempt) 65 | mainLogger.warning( 66 | f"Attempt {attempt + 1}/{max_retries} failed: {type(e).__name__}: {e}" 67 | ) 68 | 69 | mainLogger.info(f"Retrying in {wait_time:.2f} seconds...") 70 | time.sleep(wait_time) 71 | 72 | except Exception as e: 73 | # Non-retryable error - raise immediately 74 | mainLogger.error(f"Non-retryable error occurred: {type(e).__name__}: {e}") 75 | raise 76 | 77 | # Should never reach here, but just in case 78 | if last_exception: 79 | raise last_exception 80 | 81 | return wrapper 82 | return decorator 83 | 84 | 85 | def should_retry(exception: Exception) -> bool: 86 | """ 87 | Determine if an exception should trigger a retry 88 | 89 | Args: 90 | exception: The exception to check 91 | 92 | Returns: 93 | True if the exception is retryable, False otherwise 94 | """ 95 | return isinstance(exception, (RetryableError, RateLimitError, TimeoutError)) 96 | 97 | 98 | def get_retry_delay( 99 | attempt: int, 100 | exception: Optional[Exception] = None, 101 | initial_delay: float = 1.0, 102 | exponential_base: float = 2.0 103 | ) -> float: 104 | """ 105 | Calculate retry delay based on attempt number and exception type 106 | 107 | Args: 108 | attempt: Current attempt number (0-indexed) 109 | exception: The exception that triggered the retry 110 | initial_delay: Initial delay in seconds 111 | exponential_base: Base for exponential backoff 112 | 113 | Returns: 114 | Delay in seconds before next retry 115 | """ 116 | # Check if exception has a retry_after attribute (e.g., RateLimitError) 117 | if isinstance(exception, RateLimitError) and exception.retry_after: 118 | return exception.retry_after 119 | 120 | # Default exponential backoff 121 | return initial_delay * (exponential_base ** attempt) 122 | 123 | -------------------------------------------------------------------------------- /codefuse/cli/headless.py: -------------------------------------------------------------------------------- 1 | """ 2 | Headless Mode - Single-prompt execution 3 | """ 4 | 5 | import json 6 | from datetime import datetime 7 | from typing import Dict, Any, List, Union 8 | from rich.console import Console 9 | from rich.panel import Panel 10 | from rich.markdown import Markdown 11 | 12 | from codefuse.llm.base import ContentBlock 13 | from codefuse.observability import mainLogger, close_all_loggers 14 | 15 | console = Console() 16 | 17 | 18 | def run_headless( 19 | prompt: str, 20 | components: Dict[str, Any], 21 | stream: bool = True, 22 | image_urls: tuple = tuple(), 23 | ): 24 | """ 25 | Run agent in headless mode (single prompt execution) 26 | 27 | Args: 28 | prompt: User prompt/query 29 | components: Dictionary of initialized components from initialize_agent_components() 30 | stream: Whether to stream LLM responses 31 | image_urls: Optional tuple of image URLs to include in the prompt 32 | """ 33 | # Unpack components 34 | agent_profile = components["agent_profile"] 35 | env_info = components["env_info"] 36 | agent_loop = components["agent_loop"] 37 | available_tools = components["available_tools"] 38 | session_dir = components["session_dir"] 39 | config = components["config"] 40 | model_name = components["model_name"] 41 | metrics_collector = components["metrics_collector"] 42 | context_engine = components["context_engine"] 43 | resumed_conversation = components["resumed_conversation"] 44 | 45 | # Build user query content (text + optional images) 46 | user_query: Union[str, List[ContentBlock]] 47 | if image_urls: 48 | # Build multimodal content 49 | content_blocks: List[ContentBlock] = [] 50 | 51 | # Add text block 52 | if prompt: 53 | content_blocks.append(ContentBlock(type="text", text=prompt)) 54 | 55 | # Add image blocks 56 | for url in image_urls: 57 | content_blocks.append(ContentBlock( 58 | type="image_url", 59 | image_url={"url": url} 60 | )) 61 | 62 | user_query = content_blocks 63 | else: 64 | # Pure text content 65 | user_query = prompt 66 | 67 | # User message will be logged by agent_loop automatically 68 | 69 | # Run agent loop 70 | mainLogger.info("Agent loop starting", session_id=context_engine.session_id) 71 | 72 | final_response = "" 73 | current_content = "" 74 | current_tool_calls = [] # Track tool calls for the current response 75 | iterations = 1 76 | 77 | for event in agent_loop.run( 78 | user_query=user_query, 79 | stream=stream, 80 | ): 81 | if event.type == "llm_done": 82 | if not stream: 83 | # Non-streaming: save content 84 | content = event.data["content"] 85 | if content: 86 | current_content = content 87 | 88 | # Check if there are tool calls in the response 89 | if "tool_calls" in event.data and event.data["tool_calls"]: 90 | current_tool_calls = event.data["tool_calls"] 91 | 92 | elif event.type == "tool_done": 93 | tool_name = event.data["tool_name"] 94 | tool_call_id = event.data.get("tool_call_id") 95 | result = event.data["result"] 96 | confirmed = event.data.get("confirmed", True) 97 | 98 | # Tool results are logged by tool_executor automatically 99 | 100 | elif event.type == "agent_done": 101 | final_response = event.data["final_response"] 102 | iterations = event.data["iterations"] 103 | 104 | # Save final assistant message to trajectory 105 | assistant_message = { 106 | "role": "assistant", 107 | "content": final_response or current_content, 108 | "timestamp": datetime.now().isoformat(), 109 | } 110 | if current_tool_calls: 111 | assistant_message["tool_calls"] = current_tool_calls 112 | # Assistant messages are logged by agent_loop automatically 113 | 114 | # Only output the final response content 115 | console.print(final_response or current_content) 116 | 117 | elif event.type == "error": 118 | error = event.data["error"] 119 | console.print(f"[red]Error:[/red] {error}") 120 | 121 | # Generate and save metrics summary 122 | summary = metrics_collector.generate_summary() 123 | 124 | # Write summary to trajectory 125 | context_engine.write_session_summary(summary) 126 | 127 | mainLogger.info("Agent loop completed", status="success") 128 | 129 | # Close all loggers 130 | close_all_loggers() 131 | -------------------------------------------------------------------------------- /codefuse/tools/base.py: -------------------------------------------------------------------------------- 1 | """ 2 | Base classes for tools 3 | """ 4 | 5 | from abc import ABC, abstractmethod 6 | from dataclasses import dataclass, field 7 | from typing import Dict, Any, Optional, List 8 | 9 | 10 | @dataclass 11 | class ToolResult: 12 | """ 13 | Result of a tool execution 14 | 15 | Attributes: 16 | content: Full result content for LLM (detailed, structured) 17 | display: User-friendly display text for interactive mode (concise, formatted) 18 | """ 19 | content: str 20 | display: Optional[str] = None 21 | 22 | def __post_init__(self): 23 | """If display is not provided, use content as display""" 24 | if self.display is None: 25 | self.display = self.content 26 | 27 | def __str__(self) -> str: 28 | """String representation returns content for LLM""" 29 | return self.content 30 | 31 | 32 | @dataclass 33 | class ToolParameter: 34 | """Definition of a tool parameter""" 35 | name: str 36 | type: str # "string", "number", "boolean", "array", "object" 37 | description: str 38 | required: bool = True 39 | enum: Optional[List[str]] = None 40 | 41 | def to_dict(self) -> Dict[str, Any]: 42 | """Convert to dictionary format""" 43 | result = { 44 | "type": self.type, 45 | "description": self.description, 46 | } 47 | if self.enum: 48 | result["enum"] = self.enum 49 | return result 50 | 51 | 52 | @dataclass 53 | class ToolDefinition: 54 | """Definition of a tool""" 55 | name: str 56 | description: str 57 | parameters: List[ToolParameter] = field(default_factory=list) 58 | requires_confirmation: bool = False # Whether user confirmation is required 59 | 60 | def to_openai_format(self) -> Dict[str, Any]: 61 | """ 62 | Convert to OpenAI function calling format 63 | 64 | Returns: 65 | Dict compatible with OpenAI's tools API 66 | """ 67 | # Build parameters schema 68 | properties = {} 69 | required = [] 70 | 71 | for param in self.parameters: 72 | properties[param.name] = param.to_dict() 73 | if param.required: 74 | required.append(param.name) 75 | 76 | return { 77 | "type": "function", 78 | "function": { 79 | "name": self.name, 80 | "description": self.description, 81 | "parameters": { 82 | "type": "object", 83 | "properties": properties, 84 | "required": required, 85 | } 86 | } 87 | } 88 | 89 | 90 | class BaseTool(ABC): 91 | """ 92 | Abstract base class for all tools 93 | 94 | Tools are the actions that the agent can take in the environment. 95 | Each tool must define its interface and implement the execution logic. 96 | """ 97 | 98 | @property 99 | @abstractmethod 100 | def definition(self) -> ToolDefinition: 101 | """ 102 | Get the tool definition 103 | 104 | Returns: 105 | ToolDefinition describing the tool's interface 106 | """ 107 | pass 108 | 109 | @property 110 | def requires_confirmation(self) -> bool: 111 | """ 112 | Check if this tool requires user confirmation before execution 113 | 114 | Dangerous operations (like writing files) should require confirmation 115 | unless running in YOLO mode. 116 | 117 | Returns: 118 | True if confirmation is required, False otherwise 119 | """ 120 | return self.definition.requires_confirmation 121 | 122 | @abstractmethod 123 | def execute(self, **kwargs) -> ToolResult: 124 | """ 125 | Execute the tool with the given arguments 126 | 127 | Args: 128 | **kwargs: Tool-specific arguments 129 | 130 | Returns: 131 | ToolResult containing: 132 | - content: Full result for LLM (detailed information) 133 | - display: User-friendly display text (concise summary) 134 | 135 | Raises: 136 | Exception: If tool execution fails 137 | """ 138 | pass 139 | 140 | def validate_arguments(self, **kwargs) -> None: 141 | """ 142 | Validate tool arguments before execution 143 | 144 | Args: 145 | **kwargs: Arguments to validate 146 | 147 | Raises: 148 | ValueError: If arguments are invalid 149 | """ 150 | # Check required parameters 151 | for param in self.definition.parameters: 152 | if param.required and param.name not in kwargs: 153 | raise ValueError(f"Missing required parameter: {param.name}") 154 | 155 | -------------------------------------------------------------------------------- /codefuse/llm/factory.py: -------------------------------------------------------------------------------- 1 | """ 2 | LLM Factory - Create LLM instances based on provider 3 | """ 4 | 5 | from typing import Optional 6 | 7 | from codefuse.llm.base import BaseLLM 8 | from codefuse.llm.providers.openai_compatible import OpenAICompatibleLLM 9 | from codefuse.observability import mainLogger 10 | 11 | 12 | def create_llm( 13 | provider: str = "openai_compatible", 14 | model: str = "gpt-4o", 15 | api_key: str = "", 16 | base_url: Optional[str] = None, 17 | temperature: float = 0.0, 18 | max_tokens: Optional[int] = None, 19 | timeout: int = 60, 20 | parallel_tool_calls: bool = True, 21 | enable_thinking: bool = False, 22 | top_k: Optional[int] = None, 23 | top_p: Optional[float] = None, 24 | session_id: Optional[str] = None, 25 | **kwargs 26 | ) -> BaseLLM: 27 | """ 28 | Factory function to create LLM instances 29 | 30 | Args: 31 | provider: LLM provider type 32 | - "openai_compatible": OpenAI API and compatible providers (default) 33 | - "anthropic": Anthropic Claude API 34 | - "gemini": Google Gemini API 35 | model: Model identifier (e.g., "gpt-4o", "claude-3-5-sonnet", etc.) 36 | api_key: API key for authentication 37 | base_url: Base URL for API endpoint (for openai_compatible) 38 | temperature: Sampling temperature (0-2) 39 | max_tokens: Maximum tokens to generate 40 | timeout: Request timeout in seconds 41 | parallel_tool_calls: Enable parallel tool calls (default: True) 42 | enable_thinking: Enable thinking mode for models that support it (default: False) 43 | top_k: Top-k sampling parameter (default: None) 44 | top_p: Nucleus sampling parameter (0-1, default: None) 45 | session_id: Session ID for Anthropic provider (used for x-idealab-session-id header) 46 | **kwargs: Additional provider-specific parameters 47 | 48 | Returns: 49 | BaseLLM instance configured for the specified provider 50 | 51 | Raises: 52 | ValueError: If provider is not supported 53 | 54 | Examples: 55 | >>> # OpenAI 56 | >>> llm = create_llm( 57 | ... provider="openai_compatible", 58 | ... model="gpt-4o", 59 | ... api_key="sk-..." 60 | ... ) 61 | 62 | >>> # DeepSeek 63 | >>> llm = create_llm( 64 | ... provider="openai_compatible", 65 | ... model="deepseek-chat", 66 | ... api_key="sk-...", 67 | ... base_url="https://api.deepseek.com" 68 | ... ) 69 | 70 | >>> # Anthropic (when implemented) 71 | >>> llm = create_llm( 72 | ... provider="anthropic", 73 | ... model="claude-3-5-sonnet-20241022", 74 | ... api_key="sk-ant-..." 75 | ... ) 76 | """ 77 | provider = provider.lower().strip() 78 | 79 | mainLogger.info(f"Creating LLM: provider={provider}, model={model}") 80 | 81 | if provider == "anthropic": 82 | from codefuse.llm.providers.anthropic import AnthropicLLM 83 | return AnthropicLLM( 84 | model=model, 85 | api_key=api_key, 86 | base_url=base_url, 87 | temperature=temperature, 88 | max_tokens=max_tokens, 89 | timeout=timeout, 90 | parallel_tool_calls=parallel_tool_calls, 91 | enable_thinking=enable_thinking, 92 | top_k=top_k, 93 | top_p=top_p, 94 | session_id=session_id, 95 | **kwargs 96 | ) 97 | 98 | elif provider == "gemini": 99 | from codefuse.llm.providers.gemini import GeminiLLM 100 | return GeminiLLM( 101 | model=model, 102 | api_key=api_key, 103 | base_url=base_url, 104 | temperature=temperature, 105 | max_tokens=max_tokens, 106 | timeout=timeout, 107 | parallel_tool_calls=parallel_tool_calls, 108 | enable_thinking=enable_thinking, 109 | top_k=top_k, 110 | top_p=top_p, 111 | **kwargs 112 | ) 113 | 114 | elif provider in ("openai_compatible", "openai"): 115 | # Default to OpenAI Compatible for all unspecified providers 116 | return OpenAICompatibleLLM( 117 | model=model, 118 | api_key=api_key, 119 | base_url=base_url, 120 | temperature=temperature, 121 | max_tokens=max_tokens, 122 | timeout=timeout, 123 | parallel_tool_calls=parallel_tool_calls, 124 | enable_thinking=enable_thinking, 125 | top_k=top_k, 126 | top_p=top_p, 127 | **kwargs 128 | ) 129 | 130 | else: 131 | mainLogger.warning( 132 | f"Unknown provider '{provider}', defaulting to openai_compatible. " 133 | f"Supported providers: openai_compatible, anthropic, gemini" 134 | ) 135 | return OpenAICompatibleLLM( 136 | model=model, 137 | api_key=api_key, 138 | base_url=base_url, 139 | temperature=temperature, 140 | max_tokens=max_tokens, 141 | timeout=timeout, 142 | parallel_tool_calls=parallel_tool_calls, 143 | enable_thinking=enable_thinking, 144 | top_k=top_k, 145 | top_p=top_p, 146 | **kwargs 147 | ) -------------------------------------------------------------------------------- /codefuse/tools/builtin/write_file.py: -------------------------------------------------------------------------------- 1 | """ 2 | Write File Tool - Write or modify file contents in the workspace 3 | """ 4 | 5 | from pathlib import Path 6 | from typing import Optional 7 | 8 | from codefuse.tools.base import BaseTool, ToolDefinition, ToolParameter, ToolResult 9 | from codefuse.tools.builtin.filesystem_base import FileSystemToolMixin, MAX_TOKENS 10 | from codefuse.observability import mainLogger 11 | 12 | 13 | class WriteFileTool(FileSystemToolMixin, BaseTool): 14 | """ 15 | Tool for writing file contents 16 | 17 | Features: 18 | - Create new files or overwrite existing files 19 | - Safety checks for path validity and workspace restriction 20 | - Content size validation 21 | - Requires user confirmation (unless in YOLO mode) 22 | """ 23 | 24 | def __init__(self, workspace_root: Optional[Path] = None): 25 | """ 26 | Initialize WriteFileTool 27 | 28 | Args: 29 | workspace_root: Workspace root directory to restrict file access. 30 | Defaults to current working directory. 31 | """ 32 | super().__init__(workspace_root=workspace_root) 33 | 34 | @property 35 | def definition(self) -> ToolDefinition: 36 | """Define the write_file tool""" 37 | return ToolDefinition( 38 | name="write_file", 39 | description=( 40 | "Write content to a file in the workspace (creates or overwrites).\n\n" 41 | "Important:\n" 42 | "- The path parameter MUST be an absolute path, not a relative path\n" 43 | "- File must be within the workspace root directory\n" 44 | "- Content size is limited to prevent excessive file sizes" 45 | ), 46 | parameters=[ 47 | ToolParameter( 48 | name="path", 49 | type="string", 50 | description="Absolute path to the file to write", 51 | required=True, 52 | ), 53 | ToolParameter( 54 | name="content", 55 | type="string", 56 | description="Content to write to the file", 57 | required=True, 58 | ), 59 | ], 60 | requires_confirmation=True, # Writing is dangerous! 61 | ) 62 | 63 | def execute( 64 | self, 65 | path: str, 66 | content: str, 67 | **kwargs 68 | ) -> ToolResult: 69 | """ 70 | Execute the write_file tool 71 | 72 | Args: 73 | path: Absolute path to the file to write 74 | content: Content to write to the file 75 | 76 | Returns: 77 | ToolResult with: 78 | - content: Detailed success/error message for LLM 79 | - display: User-friendly summary for UI 80 | """ 81 | try: 82 | # Step 1: Check if path is absolute 83 | if error := self._check_absolute_path(path): 84 | return self._create_error_result(error, "Path must be absolute") 85 | 86 | # Step 2: Resolve path 87 | file_path = self._resolve_path(path) 88 | 89 | # Step 3: Check if within workspace 90 | if error := self._check_within_workspace(file_path): 91 | mainLogger.warning(f"File write outside workspace: {error}") 92 | return self._create_error_result(error, "Access denied: outside workspace") 93 | 94 | # Step 4: Check content size limit 95 | if error := self._check_token_limit(content, MAX_TOKENS): 96 | mainLogger.warning(f"Content too large: {error}") 97 | return self._create_error_result(error, f"Content too large (>{MAX_TOKENS:,} tokens)") 98 | 99 | # Step 5: Create parent directories if they don't exist 100 | file_path.parent.mkdir(parents=True, exist_ok=True) 101 | 102 | # Step 6: Check if file exists (for logging) 103 | file_existed = file_path.exists() 104 | 105 | # Step 7: Write content to file 106 | with open(file_path, 'w', encoding='utf-8') as f: 107 | f.write(content) 108 | 109 | # Step 8: Calculate stats and return result 110 | lines = content.count('\n') + 1 111 | chars = len(content) 112 | 113 | action = "Updated" if file_existed else "Created" 114 | mainLogger.info(f"{action} {file_path} ({lines} lines, {chars} characters)") 115 | 116 | result_content = f"Successfully {action.lower()} file: {path} ({lines} lines, {chars} characters)" 117 | result_display = f"✓ {action} {path} ({lines} lines)" 118 | 119 | return ToolResult(content=result_content, display=result_display) 120 | 121 | except PermissionError as e: 122 | error_msg = f"Permission denied writing file: {path}" 123 | mainLogger.error(f"{error_msg}: {e}") 124 | return self._create_error_result(error_msg, f"Permission denied: {path}") 125 | except Exception as e: 126 | error_msg = f"Unexpected error writing file: {path} - {str(e)}" 127 | mainLogger.error(f"Unexpected error writing file: {path}", exc_info=True) 128 | return self._create_error_result(error_msg, f"Error writing {path}: {str(e)}") 129 | 130 | -------------------------------------------------------------------------------- /codefuse/llm/providers/anthropic.py: -------------------------------------------------------------------------------- 1 | """ 2 | Anthropic LLM Implementation with KV Cache Support 3 | 4 | This implementation extends OpenAICompatibleLLM to add Anthropic-specific 5 | prompt caching capabilities using cache_control markers. 6 | """ 7 | 8 | from typing import List, Optional, Dict, Any 9 | 10 | from codefuse.llm.base import Message, MessageRole 11 | from codefuse.llm.providers.openai_compatible import OpenAICompatibleLLM 12 | from codefuse.observability import mainLogger 13 | 14 | 15 | class AnthropicLLM(OpenAICompatibleLLM): 16 | """ 17 | Anthropic Claude LLM implementation with KV cache support 18 | 19 | This class extends OpenAICompatibleLLM and adds Anthropic-specific 20 | prompt caching by marking the last Tool message with cache_control. 21 | 22 | Caching Strategy: 23 | - If messages end with USER: No cache marker (new request, short context) 24 | - If messages end with TOOL: Add cache_control to last Tool message 25 | 26 | This allows caching of accumulated context during agent loops while 27 | keeping fresh user queries uncached. 28 | """ 29 | 30 | def __init__(self, session_id: Optional[str] = None, **kwargs): 31 | """ 32 | Initialize Anthropic client with OpenAI-compatible SDK 33 | 34 | Args: 35 | session_id: Session ID for x-idealab-session-id header (ensures requests hit same instance) 36 | **kwargs: Other parameters passed to OpenAICompatibleLLM 37 | """ 38 | # Set default base_url for Anthropic if not provided 39 | # But keep user-provided base_url (for internal proxy services) 40 | if 'base_url' not in kwargs or kwargs['base_url'] is None: 41 | kwargs['base_url'] = "https://api.anthropic.com/v1" 42 | 43 | # Store session_id before calling parent __init__ 44 | self._session_id = session_id 45 | 46 | super().__init__(**kwargs) 47 | 48 | # Recreate client with custom header if session_id is provided 49 | if session_id: 50 | from openai import OpenAI 51 | self.client = OpenAI( 52 | api_key=self.api_key, 53 | base_url=self.base_url, 54 | timeout=self.timeout, 55 | default_headers={ 56 | 'x-idealab-session-id': session_id 57 | } 58 | ) 59 | mainLogger.info( 60 | f"Initialized Anthropic LLM with KV cache support: model={self.model}, " 61 | f"base_url={self.base_url}, session_id={session_id}" 62 | ) 63 | else: 64 | mainLogger.info( 65 | f"Initialized Anthropic LLM with KV cache support: model={self.model}, base_url={self.base_url}" 66 | ) 67 | 68 | @property 69 | def supports_prompt_caching(self) -> bool: 70 | """Anthropic has native prompt caching support""" 71 | return True 72 | 73 | def _convert_messages(self, messages: List[Message]) -> List[Dict[str, Any]]: 74 | """ 75 | Convert internal Message format to Anthropic format with cache control 76 | 77 | This method extends the parent's _convert_messages to add cache_control 78 | markers on the last Tool message (if messages end with TOOL role). 79 | 80 | Args: 81 | messages: List of internal Message objects 82 | 83 | Returns: 84 | List of message dictionaries in Anthropic API format 85 | """ 86 | # First convert using parent's logic 87 | openai_messages = super()._convert_messages(messages) 88 | 89 | # Check if we should add cache control 90 | if not messages or len(messages) == 0: 91 | return openai_messages 92 | 93 | last_message = messages[-1] 94 | 95 | # Only add cache control if last message is TOOL 96 | if last_message.role != MessageRole.TOOL: 97 | mainLogger.debug( 98 | f"No cache control added: last message role is {last_message.role.value}" 99 | ) 100 | return openai_messages 101 | 102 | # Add cache control to the last message (which is a Tool message) 103 | last_msg_dict = openai_messages[-1] 104 | 105 | # Convert content to array format with cache_control 106 | content = last_msg_dict.get("content", "") 107 | 108 | if isinstance(content, str): 109 | # Convert string content to content block array with cache_control 110 | # last_msg_dict["content"] = [ 111 | # { 112 | # "type": "text", 113 | # "text": content, 114 | # "cache_control": {"type": "ephemeral"} 115 | # } 116 | # ] 117 | last_msg_dict["cache_control"] = {"type": "ephemeral"} 118 | mainLogger.debug( 119 | "Added cache_control to last Tool message", 120 | tool_call_id=last_msg_dict.get("tool_call_id") 121 | ) 122 | elif isinstance(content, list): 123 | # Content is already an array, add cache_control to last block 124 | if len(content) > 0: 125 | content[-1]["cache_control"] = {"type": "ephemeral"} 126 | mainLogger.debug( 127 | "Added cache_control to last content block of Tool message", 128 | tool_call_id=last_msg_dict.get("tool_call_id") 129 | ) 130 | 131 | return openai_messages 132 | 133 | -------------------------------------------------------------------------------- /codefuse/tools/registry.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tool Registry - Manage available tools 3 | """ 4 | 5 | from pathlib import Path 6 | from typing import Dict, Optional, List, Any 7 | 8 | from codefuse.tools.base import BaseTool, ToolDefinition 9 | from codefuse.llm.base import Tool as LLMTool 10 | from codefuse.observability import mainLogger 11 | 12 | 13 | class ToolRegistry: 14 | """ 15 | Registry for managing available tools 16 | 17 | The registry maintains a collection of tools and provides methods 18 | to register, retrieve, and list tools. 19 | """ 20 | 21 | def __init__(self): 22 | """Initialize empty tool registry""" 23 | self._tools: Dict[str, BaseTool] = {} 24 | mainLogger.info("Initialized empty ToolRegistry") 25 | 26 | def register(self, tool: BaseTool) -> None: 27 | """ 28 | Register a tool 29 | 30 | Args: 31 | tool: Tool instance to register 32 | """ 33 | name = tool.definition.name 34 | if name in self._tools: 35 | mainLogger.warning("Tool already registered, overwriting", tool_name=name) 36 | 37 | self._tools[name] = tool 38 | mainLogger.info( 39 | "Registered tool", 40 | tool_name=name, 41 | requires_confirmation=tool.requires_confirmation, 42 | ) 43 | 44 | def get_tool(self, name: str) -> Optional[BaseTool]: 45 | """ 46 | Get a tool by name 47 | 48 | Args: 49 | name: Tool name 50 | 51 | Returns: 52 | Tool instance if found, None otherwise 53 | """ 54 | return self._tools.get(name) 55 | 56 | def get_all_definitions(self) -> List[ToolDefinition]: 57 | """ 58 | Get all tool definitions 59 | 60 | Returns: 61 | List of all registered tool definitions 62 | """ 63 | return [tool.definition for tool in self._tools.values()] 64 | 65 | def get_tools_for_llm(self, tool_names: Optional[List[str]] = None) -> List[LLMTool]: 66 | """ 67 | Get tools in LLM-compatible format 68 | 69 | Args: 70 | tool_names: Optional list of specific tool names to include. 71 | If None, includes all tools. 72 | 73 | Returns: 74 | List of Tool objects compatible with LLM.generate() 75 | """ 76 | definitions = self.get_all_definitions() 77 | 78 | # Filter by tool_names if specified 79 | if tool_names is not None: 80 | definitions = [d for d in definitions if d.name in tool_names] 81 | 82 | # Convert to LLM Tool format 83 | llm_tools = [] 84 | for definition in definitions: 85 | openai_format = definition.to_openai_format() 86 | llm_tools.append( 87 | LLMTool( 88 | type=openai_format["type"], 89 | function=openai_format["function"] 90 | ) 91 | ) 92 | 93 | return llm_tools 94 | 95 | def list_tool_names(self) -> List[str]: 96 | """ 97 | List all registered tool names 98 | 99 | Returns: 100 | List of tool names 101 | """ 102 | return list(self._tools.keys()) 103 | 104 | def __len__(self) -> int: 105 | """Get number of registered tools""" 106 | return len(self._tools) 107 | 108 | def __contains__(self, name: str) -> bool: 109 | """Check if a tool is registered""" 110 | return name in self._tools 111 | 112 | 113 | def create_default_registry( 114 | workspace_root: Optional["Path"] = None, 115 | read_tracker: Optional[Any] = None, 116 | config: Optional[Any] = None, 117 | ) -> ToolRegistry: 118 | """ 119 | Create a default tool registry with all built-in tools 120 | 121 | Args: 122 | workspace_root: Workspace root directory for file operations. 123 | Defaults to current working directory. 124 | read_tracker: Optional read tracker for file read tracking (needed by ReadFileTool/EditFileTool). 125 | config: Optional configuration object for tool-specific settings. 126 | 127 | Returns: 128 | ToolRegistry with all built-in tools registered 129 | """ 130 | from pathlib import Path 131 | from codefuse.tools.builtin import ( 132 | ReadFileTool, 133 | WriteFileTool, 134 | EditFileTool, 135 | ListDirectoryTool, 136 | GrepTool, 137 | GlobTool, 138 | BashTool, 139 | ) 140 | 141 | registry = ToolRegistry() 142 | 143 | # Resolve workspace_root 144 | workspace = (workspace_root or Path.cwd()).resolve() 145 | 146 | # Register built-in tools with workspace_root 147 | # ReadFileTool and EditFileTool need read_tracker for file read tracking 148 | registry.register(ReadFileTool(workspace_root=workspace, read_tracker=read_tracker)) 149 | registry.register(WriteFileTool(workspace_root=workspace)) 150 | registry.register(EditFileTool(workspace_root=workspace, read_tracker=read_tracker)) 151 | # registry.register(ListDirectoryTool(workspace_root=workspace)) 152 | registry.register(GrepTool(workspace_root=workspace)) 153 | registry.register(GlobTool(workspace_root=workspace)) 154 | 155 | # Register BashTool with configuration 156 | bash_timeout = config.agent_config.bash_timeout if config else 30 157 | bash_allowed = config.agent_config.bash_allowed_commands if config else [] 158 | bash_disallowed = config.agent_config.bash_disallowed_commands if config else [] 159 | 160 | registry.register(BashTool( 161 | workspace_root=workspace, 162 | timeout=bash_timeout, 163 | allowed_commands=bash_allowed, 164 | disallowed_commands=bash_disallowed, 165 | )) 166 | 167 | mainLogger.info( 168 | "Created default registry", 169 | tool_count=len(registry.list_tool_names()), 170 | workspace_root=str(workspace) 171 | ) 172 | 173 | return registry 174 | 175 | -------------------------------------------------------------------------------- /codefuse/tools/builtin/filesystem_base.py: -------------------------------------------------------------------------------- 1 | """ 2 | Filesystem Tool Base - Common functionality for file system tools 3 | """ 4 | 5 | from pathlib import Path 6 | from typing import Optional, Tuple, List 7 | 8 | from codefuse.tools.base import ToolResult 9 | from codefuse.observability import mainLogger 10 | 11 | 12 | # File size and token limits 13 | MAX_FILE_SIZE_BYTES = 256 * 1024 # 256KB 14 | MAX_TOKENS = 25000 # Maximum tokens allowed in file content 15 | 16 | 17 | class FileSystemToolMixin: 18 | """ 19 | Mixin class providing common functionality for file system tools 20 | 21 | This mixin provides: 22 | - Path safety checks (absolute path, workspace restriction) 23 | - Content size limits (token estimation and checking) 24 | - Error handling utilities 25 | """ 26 | 27 | def __init__(self, workspace_root: Optional[Path] = None): 28 | """ 29 | Initialize the file system tool mixin 30 | 31 | Args: 32 | workspace_root: Workspace root directory to restrict file access. 33 | Defaults to current working directory. 34 | """ 35 | self._workspace_root = (workspace_root or Path.cwd()).resolve() 36 | mainLogger.debug(f"FileSystemToolMixin initialized with workspace_root: {self._workspace_root}") 37 | 38 | def _check_absolute_path(self, path: str) -> Optional[str]: 39 | """ 40 | Check if path is absolute 41 | 42 | Args: 43 | path: Path to check 44 | 45 | Returns: 46 | Error message if path is not absolute, None otherwise 47 | """ 48 | if not Path(path).is_absolute(): 49 | return f"Path must be absolute, but got relative path: {path}" 50 | return None 51 | 52 | def _check_within_workspace(self, file_path: Path) -> Optional[str]: 53 | """ 54 | Check if file is within workspace root directory 55 | 56 | Args: 57 | file_path: Path to check (must be resolved) 58 | 59 | Returns: 60 | Error message if file is outside workspace, None otherwise 61 | """ 62 | try: 63 | file_path.relative_to(self._workspace_root) 64 | return None 65 | except ValueError: 66 | return ( 67 | f"Path must be within workspace root ({self._workspace_root}), " 68 | f"but got: {file_path}" 69 | ) 70 | 71 | def _resolve_path(self, path: str) -> Path: 72 | """ 73 | Resolve a path to absolute form 74 | 75 | Args: 76 | path: Path to resolve (can be absolute or relative) 77 | 78 | Returns: 79 | Resolved absolute Path object 80 | """ 81 | return Path(path).expanduser().resolve() 82 | 83 | def _estimate_tokens(self, content: str) -> int: 84 | """ 85 | Estimate token count for content 86 | 87 | Uses rough estimation: characters / 4 88 | 89 | Args: 90 | content: Text content to estimate 91 | 92 | Returns: 93 | Estimated token count 94 | """ 95 | return len(content) // 4 96 | 97 | def _check_token_limit(self, content: str, max_tokens: int = MAX_TOKENS) -> Optional[str]: 98 | """ 99 | Check if content exceeds token limit 100 | 101 | Args: 102 | content: Content to check 103 | max_tokens: Maximum allowed tokens (default: MAX_TOKENS) 104 | 105 | Returns: 106 | Error message if content exceeds limit, None otherwise 107 | """ 108 | token_count = self._estimate_tokens(content) 109 | 110 | if token_count > max_tokens: 111 | return ( 112 | f"Content ({token_count:,} tokens) exceeds maximum ({max_tokens:,} tokens). " 113 | f"Please reduce the content size." 114 | ) 115 | return None 116 | 117 | def _create_error_result(self, error_msg: str, display_msg: str) -> ToolResult: 118 | """ 119 | Create a standardized error result 120 | 121 | Args: 122 | error_msg: Detailed error message for LLM 123 | display_msg: User-friendly error message for display 124 | 125 | Returns: 126 | ToolResult with error information 127 | """ 128 | return ToolResult( 129 | content=f"Error: {error_msg}", 130 | display=f"❌ {display_msg}" 131 | ) 132 | 133 | def _read_with_encoding_fallback(self, file_path: Path) -> Tuple[str, str]: 134 | """ 135 | Read file with multiple encoding fallbacks 136 | 137 | This method tries multiple encodings to read a file, falling back 138 | to more permissive options if the preferred encoding fails. 139 | 140 | Args: 141 | file_path: Path to the file to read 142 | 143 | Returns: 144 | Tuple of (file_content, encoding_used) 145 | 146 | Raises: 147 | UnicodeDecodeError: If all encoding attempts fail 148 | """ 149 | encodings = [ 150 | ("utf-8", None), 151 | ("latin-1", None), 152 | ("utf-8", "replace"), 153 | ] 154 | 155 | last_exception = None 156 | for encoding, errors in encodings: 157 | try: 158 | content = file_path.read_text(encoding=encoding, errors=errors) 159 | return content, encoding 160 | except UnicodeDecodeError as e: 161 | last_exception = e 162 | continue 163 | 164 | # All encodings failed 165 | raise UnicodeDecodeError( 166 | "all", 167 | b"", 168 | 0, 169 | 1, 170 | f"Failed to decode file with all attempted encodings: {last_exception}" 171 | ) 172 | 173 | def _find_occurrence_lines(self, content: str, search_string: str) -> List[int]: 174 | """ 175 | Find line numbers where search_string starts 176 | 177 | This method handles both single-line and multi-line search strings. 178 | For multi-line strings, it returns the line number where each occurrence starts. 179 | 180 | Args: 181 | content: File content to search in 182 | search_string: String to search for (can be multi-line) 183 | 184 | Returns: 185 | List of line numbers (1-indexed) where search_string starts 186 | """ 187 | occurrence_lines = [] 188 | start_pos = 0 189 | 190 | # Find all occurrences in the content 191 | while True: 192 | pos = content.find(search_string, start_pos) 193 | if pos == -1: 194 | break 195 | 196 | # Count line number by counting newlines before this position 197 | line_num = content[:pos].count('\n') + 1 198 | occurrence_lines.append(line_num) 199 | 200 | # Move to next position 201 | start_pos = pos + 1 202 | 203 | return occurrence_lines 204 | 205 | def _format_with_line_numbers(self, content: str, start_line: int = 1) -> str: 206 | """ 207 | Format content with line numbers 208 | 209 | Format: LINE_NUMBER→LINE_CONTENT 210 | Line numbers are right-aligned to 6 characters 211 | 212 | Args: 213 | content: Text content to format 214 | start_line: Starting line number (1-indexed) 215 | 216 | Returns: 217 | Formatted content with line numbers 218 | """ 219 | if not content: 220 | return content 221 | 222 | lines = content.split('\n') 223 | formatted_lines = [] 224 | 225 | for i, line in enumerate(lines): 226 | line_num = start_line + i 227 | # Right-align line number to 6 characters 228 | formatted_lines.append(f"{line_num:6d}→{line}") 229 | 230 | return '\n'.join(formatted_lines) 231 | 232 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![](./images/codefuse_logo.png) 2 | # 🚀 CodeFuse-Agent (CFuse) 3 | 4 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE) 5 | [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/) 6 | 7 | **A lightweight, cleanly-architected agent framework designed for research and experimentation.** 8 | 9 | CodeFuse-Agent is fully open-source and can be installed with a single `pip install` command, providing a complete yet minimal toolset for code-related tasks. We open-source CFuse to facilitate reproducible research and encourage further exploration of LLM-based coding agents. 10 | 11 | ## 🏆 SWE-bench Lite Results 12 | 13 | | Configuration | Resolved | 14 | |---------------|----------| 15 | | CFuse + Claude Sonnet 4.5 (Single Attempt) | **61%** | 16 | | CFuse + Trajectory-Aware Test-Time Scaling | **61.67%** | 17 | 18 | We introduce **Trajectory-Aware Test-Time Scaling (TTS)**, a novel verification mechanism that aggregates self-generated test cases from multiple trajectories for cross-validation, achieving state-of-the-art results on SWE-bench Lite. 19 | 20 | 📄 **Technical Report**: [tech_report.md](tech_report.md) 21 | 22 | ## ✨ Features 23 | 24 | ### Configurable Agent Profiles 25 | 26 | Agent behavior is defined through declarative Markdown profiles (system prompt, tools, model, etc.), enabling quick switching of system prompts and tool subsets without code changes. 27 | 28 | ### Dual Execution Modes 29 | 30 | - **Local Mode**: Execute tool calls directly in the local environment 31 | - **HTTP Mode**: Serve as a tool execution backend or delegate calls to remote sandboxes 32 | 33 | This decoupling of agent decisions from environment execution makes CFuse suitable as scaffolding for RL training pipelines. 34 | 35 | ### Built-in Tools 36 | 37 | Six essential tools for code exploration and modification: 38 | 39 | | Tool | Description | 40 | |------|-------------| 41 | | `read_file` | Read file contents with optional line range selection | 42 | | `write_file` | Create or overwrite files | 43 | | `edit_file` | Perform edits via search-and-replace | 44 | | `grep` | Fast code search powered by ripgrep | 45 | | `glob` | File discovery using glob patterns | 46 | | `bash` | Execute shell commands with timeout control | 47 | 48 | ## 🏗️ Architecture 49 | 50 | | Layer | Responsibility | 51 | |-------|----------------| 52 | | **Interaction** | Terminal UI / Headless / HTTP modes | 53 | | **Agent Loop** | Core lifecycle: LLM interaction, tool dispatch, iteration control | 54 | | **Context Engine** | Message history, environment context, compression, prompt assembly | 55 | | **LLM Provider** | OpenAI-compatible API support | 56 | | **Tool Execution** | 6 built-in tools + remote execution | 57 | | **Observability** | Trajectory logs, execution metrics, cost tracking | 58 | 59 | ## 📦 Installation 60 | 61 | ```bash 62 | pip install -e . 63 | ``` 64 | 65 | ## 🔑 Configuration 66 | 67 | ### Required Environment Variables 68 | 69 | CodeFuse-Agent requires three environment variables to be configured: 70 | 71 | ```bash 72 | # Required: Your OpenAI API key (or compatible API key) 73 | export OPENAI_API_KEY=your-api-key 74 | 75 | # Required: The LLM model to use 76 | export LLM_MODEL=gpt-4o 77 | 78 | # Required: The API base URL 79 | export LLM_BASE_URL=https://api.openai.com/v1 80 | ``` 81 | 82 | **Important Notes:** 83 | - All three environment variables are **required** for the agent to function 84 | - `OPENAI_API_KEY` is the only API key variable used 85 | - `LLM_BASE_URL` can be set to any OpenAI-compatible API endpoint 86 | - `LLM_MODEL` should match the model name available on your API endpoint 87 | 88 | ### Configuration File (Optional) 89 | 90 | You can optionally create a `.cfuse.yaml` configuration file in your project root or `~/.cfuse.yaml`: 91 | 92 | ```yaml 93 | llm: 94 | provider: openai_compatible 95 | model: ${LLM_MODEL} # Uses environment variable 96 | api_key: ${OPENAI_API_KEY} # Uses environment variable 97 | base_url: ${LLM_BASE_URL} # Uses environment variable 98 | temperature: 0.0 99 | max_tokens: null 100 | timeout: 60 101 | 102 | agent_config: 103 | max_iterations: 200 104 | max_context_tokens: 128000 105 | enable_tools: true 106 | yolo: false 107 | agent: default 108 | workspace_root: . 109 | bash_timeout: 30 110 | 111 | logging: 112 | logs_dir: ~/.cfuse/logs 113 | verbose: false 114 | ``` 115 | 116 | **Configuration Priority** (highest to lowest): 117 | 1. CLI arguments (`--model`, `--api-key`, `--base-url`, etc.) 118 | 2. Environment variables (`OPENAI_API_KEY`, `LLM_MODEL`, `LLM_BASE_URL`) 119 | 3. Configuration file (`.cfuse.yaml`) 120 | 4. Default values 121 | 122 | ## 🚀 Quick Start 123 | 124 | ### Interactive Mode 125 | 126 | ```bash 127 | # Basic startup 128 | cfuse 129 | 130 | # Enable YOLO mode (auto-confirm all tool calls) 131 | cfuse --yolo 132 | 133 | # Start with specific workspace 134 | cfuse --workspace-root /path/to/project 135 | ``` 136 | 137 | ### Headless Mode 138 | 139 | ```bash 140 | # Single query 141 | cfuse -p "Read README.md and summarize it" 142 | 143 | # Auto-execute without confirmation 144 | cfuse -p "Analyze project structure" --yolo 145 | 146 | # Complex task with more iterations 147 | cfuse -p "Refactor the auth module" --yolo --max-iterations 300 148 | ``` 149 | 150 | ### Using Different Models 151 | 152 | ```bash 153 | # Use specific model 154 | cfuse --model gpt-4o --api-key sk-xxx --base-url https://api.openai.com/v1 155 | 156 | # Use local model (LM Studio, Ollama, etc.) 157 | cfuse --model llama3 --api-key dummy --base-url http://localhost:1234/v1 158 | 159 | # Adjust temperature (0.0 = deterministic, higher = creative) 160 | cfuse -p "Fix bug in auth.py" --temperature 0.0 --yolo 161 | ``` 162 | 163 | ### Logging and Debugging 164 | 165 | Logs include `main.log`, `trajectory/`, and `llm_messages/` in `~/.cfuse/logs` 166 | 167 | ```bash 168 | # Enable verbose logging 169 | cfuse -p "Your task" --verbose --yolo 170 | 171 | # Custom log directory 172 | cfuse -p "Your task" --logs-dir ./my_logs --yolo 173 | ``` 174 | 175 | ### Common Usage Patterns 176 | 177 | ```bash 178 | # Bug fixing with verbose logs 179 | cfuse -p "Fix the authentication bug" --workspace-root ./backend --verbose --yolo 180 | 181 | # Code review with low temperature 182 | cfuse -p "Review src/utils/parser.py" --temperature 0.1 183 | 184 | # Long-running refactoring task 185 | cfuse -p "Refactor database layer" --max-iterations 500 --yolo --logs-dir ./refactor_logs 186 | ``` 187 | 188 | ## ⚙️ CLI Options 189 | 190 | ### Main Options 191 | 192 | | Option | Description | Default | 193 | |--------|-------------|---------| 194 | | `-p, --prompt TEXT` | User query (headless mode). If omitted, launches interactive mode. | `None` | 195 | | `--yolo` | Auto-confirm all tool calls without prompting | `False` | 196 | | `--workspace-root PATH` | Working directory for the agent | `.` | 197 | | `--agent TEXT` | Agent profile (`default`, `swe`, or path to `.md` file) | `default` | 198 | | `--max-iterations INT` | Maximum agent loop iterations | `200` | 199 | 200 | ### Model Configuration 201 | 202 | | Option | Description | Default | 203 | |--------|-------------|---------| 204 | | `--model TEXT` | LLM model name | `$LLM_MODEL` | 205 | | `--api-key TEXT` | API key for authentication | `$OPENAI_API_KEY` | 206 | | `--base-url TEXT` | API base URL | `$LLM_BASE_URL` | 207 | | `--temperature FLOAT` | Model temperature (0.0-2.0, lower = more deterministic) | `0.0` | 208 | | `--max-tokens INT` | Maximum tokens in response | `null` | 209 | | `--timeout INT` | API request timeout (seconds) | `60` | 210 | 211 | ### Logging 212 | 213 | | Option | Description | Default | 214 | |--------|-------------|---------| 215 | | `--logs-dir PATH` | Log directory path | `~/.cfuse/logs` | 216 | | `-v, --verbose` | Enable verbose logging | `False` | 217 | | `--stream / --no-stream` | Enable/disable streaming output | `True` | 218 | 219 | ### Other Options 220 | 221 | | Option | Description | 222 | |--------|-------------| 223 | | `--config PATH` | Path to YAML configuration file | 224 | | `--bash-timeout INT` | Timeout for bash commands (seconds, default: 30) | 225 | | `--max-context-tokens INT` | Maximum context window size (default: 128000) | 226 | | `--enable-tools / --no-tools` | Enable/disable tool execution | 227 | | `--http` | Launch HTTP server mode | 228 | | `--http-port INT` | HTTP server port (default: 8000) | 229 | | `--help` | Show help message | 230 | 231 | **Configuration Priority:** CLI args > Environment variables > Config file > Defaults 232 | 233 | ## 📄 License 234 | 235 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. 236 | -------------------------------------------------------------------------------- /codefuse/tools/utils/ripgrep.py: -------------------------------------------------------------------------------- 1 | """ 2 | Ripgrep utility - Find and execute ripgrep commands 3 | """ 4 | 5 | import subprocess 6 | import shutil 7 | import platform 8 | import os 9 | from typing import List, Optional 10 | from pathlib import Path 11 | 12 | from codefuse.observability import mainLogger 13 | 14 | 15 | # Cache for ripgrep path 16 | _ripgrep_path: Optional[str] = None 17 | _ripgrep_type: Optional[str] = None # 'system', 'python', or 'bundled' 18 | 19 | 20 | def _get_bundled_ripgrep_path() -> Optional[Path]: 21 | """ 22 | Get path to bundled ripgrep binary for current platform 23 | 24 | Directory structure: codefuse/tools/utils/ripgrep/{arch}-{platform}/rg 25 | Example: x64-darwin, arm64-darwin, x64-linux, arm64-linux, x64-win32 26 | 27 | Returns: 28 | Path to bundled ripgrep binary, or None if not available 29 | """ 30 | try: 31 | # Detect platform 32 | system = platform.system().lower() # 'linux', 'darwin', 'windows' 33 | machine = platform.machine().lower() # 'x86_64', 'arm64', 'amd64', etc. 34 | 35 | # Normalize machine architecture 36 | if machine in ('amd64', 'x86_64', 'x64'): 37 | arch = 'x64' 38 | elif machine in ('arm64', 'aarch64'): 39 | arch = 'arm64' 40 | else: 41 | mainLogger.warning(f"Unsupported architecture for bundled ripgrep: {machine}") 42 | return None 43 | 44 | # Normalize platform name 45 | if system == 'darwin': 46 | platform_name = 'darwin' 47 | elif system == 'linux': 48 | platform_name = 'linux' 49 | elif system == 'windows': 50 | platform_name = 'win32' 51 | else: 52 | mainLogger.warning(f"Unsupported platform for bundled ripgrep: {system}") 53 | return None 54 | 55 | # Build directory name: {arch}-{platform} 56 | dir_name = f"{arch}-{platform_name}" 57 | 58 | # Get the package directory (where this file is located) 59 | utils_dir = Path(__file__).parent 60 | ripgrep_dir = utils_dir / 'ripgrep' / dir_name 61 | 62 | # Determine binary name 63 | binary_name = 'rg.exe' if system == 'windows' else 'rg' 64 | rg_binary = ripgrep_dir / binary_name 65 | 66 | # Check if binary exists 67 | if rg_binary.exists(): 68 | # Make sure it's executable on Unix-like systems 69 | if system != 'windows': 70 | try: 71 | os.chmod(rg_binary, 0o755) 72 | except Exception as e: 73 | mainLogger.warning(f"Failed to set executable permission on {rg_binary}: {e}") 74 | 75 | mainLogger.info(f"Found bundled ripgrep at: {rg_binary}") 76 | return rg_binary 77 | else: 78 | mainLogger.debug(f"Bundled ripgrep not found at: {rg_binary}") 79 | return None 80 | 81 | except Exception as e: 82 | mainLogger.warning(f"Error locating bundled ripgrep: {e}") 83 | return None 84 | 85 | 86 | def find_ripgrep() -> "tuple[Optional[str], Optional[str]]": 87 | """ 88 | Find available ripgrep executable 89 | 90 | Priority: 91 | 1. System-installed ripgrep (rg command) 92 | 2. Python ripgrep-python package 93 | 3. Bundled ripgrep binary (platform-specific) 94 | 95 | Returns: 96 | Tuple of (ripgrep_path, ripgrep_type) where type is 'system', 'python', or 'bundled' 97 | Returns (None, None) if ripgrep is not found 98 | """ 99 | global _ripgrep_path, _ripgrep_type 100 | 101 | # Return cached result if available 102 | if _ripgrep_path is not None: 103 | return _ripgrep_path, _ripgrep_type 104 | 105 | # 1. Try system ripgrep 106 | rg_path = shutil.which('rg') 107 | if rg_path: 108 | _ripgrep_path = rg_path 109 | _ripgrep_type = 'system' 110 | mainLogger.info(f"Found system ripgrep at: {rg_path}") 111 | return _ripgrep_path, _ripgrep_type 112 | 113 | # 2. Try Python ripgrep-python package 114 | try: 115 | import ripgrep 116 | # ripgrep-python provides a 'rg' function or path 117 | if hasattr(ripgrep, 'rg'): 118 | _ripgrep_path = 'ripgrep-python' 119 | _ripgrep_type = 'python' 120 | mainLogger.info("Found Python ripgrep-python package") 121 | return _ripgrep_path, _ripgrep_type 122 | except ImportError: 123 | pass 124 | 125 | # 3. Try bundled ripgrep binary 126 | bundled_path = _get_bundled_ripgrep_path() 127 | if bundled_path: 128 | _ripgrep_path = str(bundled_path) 129 | _ripgrep_type = 'bundled' 130 | mainLogger.info(f"Using bundled ripgrep: {bundled_path}") 131 | return _ripgrep_path, _ripgrep_type 132 | 133 | # Not found 134 | mainLogger.warning( 135 | "Ripgrep not found. Please install ripgrep:\n" 136 | " - macOS: brew install ripgrep\n" 137 | " - Ubuntu/Debian: apt install ripgrep\n" 138 | " - Or: pip install ripgrep-python" 139 | ) 140 | return None, None 141 | 142 | 143 | def execute_ripgrep( 144 | args: List[str], 145 | search_path: str, 146 | timeout: Optional[float] = 30.0 147 | ) -> List[str]: 148 | """ 149 | Execute ripgrep command and return output lines 150 | 151 | Args: 152 | args: List of ripgrep arguments (without the 'rg' command itself) 153 | search_path: Path to search in 154 | timeout: Command timeout in seconds (default: 30.0) 155 | 156 | Returns: 157 | List of output lines (stdout) 158 | 159 | Raises: 160 | RuntimeError: If ripgrep is not found 161 | subprocess.TimeoutExpired: If command times out 162 | subprocess.CalledProcessError: If ripgrep returns non-zero exit code (except 1 for no matches) 163 | """ 164 | rg_path, rg_type = find_ripgrep() 165 | 166 | if rg_path is None: 167 | raise RuntimeError( 168 | "Ripgrep is not available. Please install ripgrep:\n" 169 | " - macOS: brew install ripgrep\n" 170 | " - Ubuntu/Debian: apt install ripgrep\n" 171 | " - Or: pip install ripgrep-python" 172 | ) 173 | 174 | # Build command 175 | if rg_type == 'system': 176 | cmd = [rg_path] + args + ['--', search_path] 177 | elif rg_type == 'bundled': 178 | # Use the bundled binary path directly 179 | cmd = [rg_path] + args + ['--', search_path] 180 | elif rg_type == 'python': 181 | # For ripgrep-python, we still use subprocess but with 'rg' command 182 | # The package should have made 'rg' available 183 | cmd = ['rg'] + args + ['--', search_path] 184 | else: 185 | raise RuntimeError(f"Unknown ripgrep type: {rg_type}") 186 | 187 | mainLogger.debug(f"Executing ripgrep: {' '.join(cmd)}") 188 | 189 | try: 190 | # Run ripgrep command 191 | result = subprocess.run( 192 | cmd, 193 | stdout=subprocess.PIPE, 194 | stderr=subprocess.PIPE, 195 | text=True, 196 | timeout=timeout, 197 | check=False, # We'll handle exit codes manually 198 | ) 199 | 200 | # Exit code 0: matches found 201 | # Exit code 1: no matches found (not an error) 202 | # Exit code 2+: actual error 203 | if result.returncode == 0: 204 | # Matches found 205 | lines = result.stdout.splitlines() 206 | mainLogger.debug(f"Ripgrep found {len(lines)} result lines") 207 | return lines 208 | elif result.returncode == 1: 209 | # No matches found (not an error for ripgrep) 210 | mainLogger.debug("Ripgrep found no matches") 211 | return [] 212 | else: 213 | # Actual error 214 | error_msg = result.stderr.strip() or f"Ripgrep exited with code {result.returncode}" 215 | mainLogger.error(f"Ripgrep error: {error_msg}") 216 | raise subprocess.CalledProcessError( 217 | result.returncode, 218 | cmd, 219 | output=result.stdout, 220 | stderr=result.stderr 221 | ) 222 | 223 | except subprocess.TimeoutExpired as e: 224 | mainLogger.error(f"Ripgrep command timed out after {timeout}s") 225 | raise 226 | except FileNotFoundError as e: 227 | # This shouldn't happen if find_ripgrep worked, but handle it 228 | raise RuntimeError(f"Ripgrep executable not found: {e}") 229 | 230 | -------------------------------------------------------------------------------- /codefuse/core/remote_tool_executor.py: -------------------------------------------------------------------------------- 1 | """ 2 | Remote Tool Executor - Handles tool execution via HTTP requests 3 | """ 4 | 5 | import json 6 | import time 7 | from typing import Dict, Any, Optional 8 | 9 | import requests 10 | 11 | from codefuse.tools.base import ToolResult 12 | from codefuse.observability import mainLogger 13 | 14 | 15 | class RemoteToolExecutor: 16 | """ 17 | Executes tools remotely via HTTP POST requests 18 | 19 | This executor sends tool calls to a remote service and receives 20 | the execution results over HTTP. 21 | """ 22 | 23 | def __init__( 24 | self, 25 | url: str, 26 | instance_id: str, 27 | timeout: int = 60, 28 | ): 29 | """ 30 | Initialize remote tool executor 31 | 32 | Args: 33 | url: Remote tool service URL 34 | instance_id: Instance ID for the remote execution environment 35 | timeout: Timeout for HTTP requests in seconds 36 | """ 37 | self.url = url 38 | self.instance_id = instance_id 39 | self.timeout = timeout 40 | 41 | mainLogger.info( 42 | "RemoteToolExecutor initialized", 43 | url=url, 44 | instance_id=instance_id, 45 | timeout=timeout, 46 | ) 47 | 48 | def execute( 49 | self, 50 | tool_name: str, 51 | tool_args: Dict[str, Any], 52 | session_id: str, 53 | ) -> ToolResult: 54 | """ 55 | Execute a tool remotely 56 | 57 | Args: 58 | tool_name: Name of the tool to execute 59 | tool_args: Arguments for the tool 60 | session_id: Session ID for logging 61 | 62 | Returns: 63 | ToolResult containing the execution result 64 | """ 65 | # Construct request payload 66 | payload = { 67 | "instance_id": self.instance_id, 68 | "toolName": tool_name, 69 | "toolArgs": tool_args, 70 | } 71 | 72 | mainLogger.info( 73 | "Sending remote tool call", 74 | tool_name=tool_name, 75 | instance_id=self.instance_id, 76 | url=self.url, 77 | payload=payload, 78 | session_id=session_id, 79 | ) 80 | 81 | start_time = time.time() 82 | 83 | try: 84 | # Send POST request 85 | response = requests.post( 86 | self.url, 87 | json=payload, 88 | headers={"Content-Type": "application/json"}, 89 | timeout=self.timeout, 90 | ) 91 | 92 | response_time = time.time() - start_time 93 | 94 | mainLogger.info( 95 | "Received remote tool response", 96 | tool_name=tool_name, 97 | status_code=response.status_code, 98 | response_time_seconds=round(response_time, 2), 99 | session_id=session_id, 100 | ) 101 | 102 | # Check HTTP status code 103 | if response.status_code != 200: 104 | error_msg = f"Remote tool call failed with status {response.status_code}" 105 | mainLogger.error( 106 | "Remote tool call HTTP error", 107 | tool_name=tool_name, 108 | status_code=response.status_code, 109 | response_text=response.text[:500], # Log first 500 chars 110 | session_id=session_id, 111 | ) 112 | return ToolResult( 113 | content=f"Error: {error_msg}\nResponse: {response.text}", 114 | display=f"❌ Remote tool call failed (HTTP {response.status_code})", 115 | ) 116 | 117 | # Parse JSON response 118 | try: 119 | response_data = response.json() 120 | except json.JSONDecodeError as e: 121 | mainLogger.error( 122 | "Failed to parse remote tool response JSON", 123 | tool_name=tool_name, 124 | error=str(e), 125 | response_text=response.text[:500], 126 | session_id=session_id, 127 | exc_info=True, 128 | ) 129 | return ToolResult( 130 | content=f"Error: Failed to parse JSON response: {str(e)}", 131 | display=f"❌ Invalid JSON response from remote tool", 132 | ) 133 | 134 | # Validate response structure 135 | if "response" not in response_data: 136 | mainLogger.error( 137 | "Invalid remote tool response structure: missing 'response' field", 138 | tool_name=tool_name, 139 | response_data=response_data, 140 | session_id=session_id, 141 | ) 142 | return ToolResult( 143 | content=f"Error: Invalid response structure: {json.dumps(response_data)}", 144 | display=f"❌ Invalid response format from remote tool", 145 | ) 146 | 147 | response_inner = response_data["response"] 148 | 149 | # Extract result and success flag 150 | result_content = response_inner.get("result", "") 151 | success = response_inner.get("success", False) 152 | 153 | mainLogger.info( 154 | "Remote tool execution completed", 155 | tool_name=tool_name, 156 | success=success, 157 | result_length=len(result_content), 158 | session_id=session_id, 159 | ) 160 | 161 | # Return result 162 | if success: 163 | return ToolResult( 164 | content=result_content, 165 | display=f"✓ Remote tool '{tool_name}' executed successfully", 166 | ) 167 | else: 168 | # Tool executed but reported failure 169 | mainLogger.warning( 170 | "Remote tool execution reported failure", 171 | tool_name=tool_name, 172 | result=result_content[:500], 173 | session_id=session_id, 174 | ) 175 | return ToolResult( 176 | content=result_content, 177 | display=f"⚠ Remote tool '{tool_name}' completed with errors", 178 | ) 179 | 180 | except requests.exceptions.Timeout: 181 | error_msg = f"Remote tool call timed out after {self.timeout} seconds" 182 | mainLogger.error( 183 | "Remote tool call timeout", 184 | tool_name=tool_name, 185 | timeout=self.timeout, 186 | session_id=session_id, 187 | ) 188 | return ToolResult( 189 | content=f"Error: {error_msg}", 190 | display=f"❌ Remote tool call timed out", 191 | ) 192 | 193 | except requests.exceptions.ConnectionError as e: 194 | error_msg = f"Connection error: {str(e)}" 195 | mainLogger.error( 196 | "Remote tool call connection error", 197 | tool_name=tool_name, 198 | error=str(e), 199 | url=self.url, 200 | session_id=session_id, 201 | exc_info=True, 202 | ) 203 | return ToolResult( 204 | content=f"Error: {error_msg}", 205 | display=f"❌ Failed to connect to remote tool service", 206 | ) 207 | 208 | except requests.exceptions.RequestException as e: 209 | error_msg = f"Request error: {str(e)}" 210 | mainLogger.error( 211 | "Remote tool call request error", 212 | tool_name=tool_name, 213 | error=str(e), 214 | session_id=session_id, 215 | exc_info=True, 216 | ) 217 | return ToolResult( 218 | content=f"Error: {error_msg}", 219 | display=f"❌ Remote tool call failed", 220 | ) 221 | 222 | except Exception as e: 223 | error_msg = f"Unexpected error: {str(e)}" 224 | mainLogger.error( 225 | "Remote tool call unexpected error", 226 | tool_name=tool_name, 227 | error=str(e), 228 | session_id=session_id, 229 | exc_info=True, 230 | ) 231 | return ToolResult( 232 | content=f"Error: {error_msg}", 233 | display=f"❌ Unexpected error in remote tool call", 234 | ) 235 | 236 | -------------------------------------------------------------------------------- /codefuse/llm/base.py: -------------------------------------------------------------------------------- 1 | """ 2 | LLM Base Classes and Data Structures 3 | """ 4 | 5 | from dataclasses import dataclass, field 6 | from typing import List, Optional, Union, Iterator, Literal, Any, Dict 7 | from abc import ABC, abstractmethod 8 | from enum import Enum 9 | 10 | 11 | class MessageRole(str, Enum): 12 | """Message role in conversation""" 13 | SYSTEM = "system" 14 | USER = "user" 15 | ASSISTANT = "assistant" 16 | TOOL = "tool" 17 | 18 | 19 | @dataclass 20 | class ContentBlock: 21 | """Content block for multimodal messages""" 22 | type: str # "text", "image_url", etc. 23 | text: Optional[str] = None 24 | image_url: Optional[Dict[str, Any]] = None 25 | 26 | 27 | @dataclass 28 | class ToolCall: 29 | """Tool call from the model""" 30 | id: str 31 | type: str # "function" 32 | function: Dict[str, str] # {"name": str, "arguments": str (JSON)} 33 | 34 | 35 | @dataclass 36 | class Message: 37 | """Unified message format""" 38 | role: MessageRole 39 | content: Union[str, List[ContentBlock]] 40 | name: Optional[str] = None 41 | tool_calls: Optional[List[ToolCall]] = None 42 | tool_call_id: Optional[str] = None # For tool response messages 43 | 44 | def to_dict(self) -> Dict[str, Any]: 45 | """Convert to dictionary format""" 46 | result: Dict[str, Any] = {"role": self.role.value} 47 | 48 | if isinstance(self.content, str): 49 | result["content"] = self.content 50 | else: 51 | result["content"] = [ 52 | {k: v for k, v in block.__dict__.items() if v is not None} 53 | for block in self.content 54 | ] 55 | 56 | if self.name: 57 | result["name"] = self.name 58 | if self.tool_calls: 59 | result["tool_calls"] = [ 60 | { 61 | "id": tc.id, 62 | "type": tc.type, 63 | "function": tc.function 64 | } 65 | for tc in self.tool_calls 66 | ] 67 | if self.tool_call_id: 68 | result["tool_call_id"] = self.tool_call_id 69 | 70 | return result 71 | 72 | 73 | @dataclass 74 | class Tool: 75 | """Tool definition for function calling""" 76 | type: str = "function" 77 | function: Dict[str, Any] = field(default_factory=dict) # {"name", "description", "parameters"} 78 | 79 | def to_dict(self) -> Dict[str, Any]: 80 | """Convert to dictionary format""" 81 | return { 82 | "type": self.type, 83 | "function": self.function 84 | } 85 | 86 | 87 | @dataclass 88 | class TokenUsage: 89 | """Token usage statistics""" 90 | prompt_tokens: int 91 | completion_tokens: int 92 | total_tokens: int 93 | # Optional cache-related tokens (for providers that support it) 94 | cache_creation_input_tokens: Optional[int] = None 95 | cache_read_input_tokens: Optional[int] = None 96 | 97 | def __str__(self) -> str: 98 | base = f"Tokens(prompt={self.prompt_tokens}, completion={self.completion_tokens}, total={self.total_tokens}" 99 | if self.cache_read_input_tokens: 100 | base += f", cache_read={self.cache_read_input_tokens}" 101 | if self.cache_creation_input_tokens: 102 | base += f", cache_creation={self.cache_creation_input_tokens}" 103 | return base + ")" 104 | 105 | 106 | @dataclass 107 | class LLMResponse: 108 | """Unified LLM response format""" 109 | content: str 110 | tool_calls: List[ToolCall] = field(default_factory=list) 111 | usage: Optional[TokenUsage] = None 112 | model: str = "" 113 | finish_reason: str = "" # "stop", "tool_calls", "length", "content_filter", etc. 114 | raw_response: Optional[Dict[str, Any]] = None # Original response for debugging 115 | 116 | @property 117 | def has_tool_calls(self) -> bool: 118 | """Check if response contains tool calls""" 119 | return len(self.tool_calls) > 0 120 | 121 | 122 | @dataclass 123 | class LLMChunk: 124 | """Streaming chunk from LLM""" 125 | type: Literal["content", "tool_call", "done"] 126 | delta: str = "" # Content delta 127 | tool_call: Optional[ToolCall] = None 128 | usage: Optional[TokenUsage] = None # Only present in final "done" chunk 129 | finish_reason: str = "" 130 | 131 | 132 | class BaseLLM(ABC): 133 | """ 134 | Abstract base class for all LLM implementations 135 | """ 136 | 137 | def __init__( 138 | self, 139 | model: str, 140 | api_key: str, 141 | base_url: Optional[str] = None, 142 | temperature: float = 0.0, 143 | max_tokens: Optional[int] = None, 144 | timeout: int = 60, 145 | parallel_tool_calls: bool = True, 146 | enable_thinking: bool = False, 147 | top_k: Optional[int] = None, 148 | top_p: Optional[float] = None, 149 | **kwargs 150 | ): 151 | """ 152 | Initialize LLM instance 153 | 154 | Args: 155 | model: Model identifier 156 | api_key: API key for authentication 157 | base_url: Base URL for API endpoint 158 | temperature: Sampling temperature (0-2) 159 | max_tokens: Maximum tokens to generate 160 | timeout: Request timeout in seconds 161 | parallel_tool_calls: Enable parallel tool calls (default: True) 162 | enable_thinking: Enable thinking mode for models that support it (default: False) 163 | top_k: Top-k sampling parameter (default: None) 164 | top_p: Nucleus sampling parameter (0-1, default: None) 165 | **kwargs: Additional provider-specific parameters 166 | """ 167 | self.model = model 168 | self.api_key = api_key 169 | self.base_url = base_url 170 | self.temperature = temperature 171 | self.max_tokens = max_tokens 172 | self.timeout = timeout 173 | self.parallel_tool_calls = parallel_tool_calls 174 | self.enable_thinking = enable_thinking 175 | self.top_k = top_k 176 | self.top_p = top_p 177 | self.extra_params = kwargs 178 | 179 | @abstractmethod 180 | def generate( 181 | self, 182 | messages: List[Message], 183 | tools: Optional[List[Tool]] = None, 184 | temperature: Optional[float] = None, 185 | max_tokens: Optional[int] = None, 186 | stream: bool = False, 187 | **kwargs 188 | ) -> Union[LLMResponse, Iterator[LLMChunk]]: 189 | """ 190 | Generate completion from messages 191 | 192 | Args: 193 | messages: List of conversation messages 194 | tools: Optional list of tools/functions available to the model 195 | temperature: Override default temperature 196 | max_tokens: Override default max_tokens 197 | stream: If True, return iterator of chunks; if False, return complete response 198 | **kwargs: Additional generation parameters 199 | 200 | Returns: 201 | LLMResponse for non-streaming, Iterator[LLMChunk] for streaming 202 | 203 | Raises: 204 | RetryableError: For errors that should be retried (timeout, rate limit) 205 | LLMError: For other errors 206 | """ 207 | pass 208 | 209 | @property 210 | def supports_prompt_caching(self) -> bool: 211 | """Whether this provider supports prompt caching""" 212 | return False 213 | 214 | @property 215 | def supports_parallel_tools(self) -> bool: 216 | """Whether this provider supports parallel tool calls""" 217 | return True 218 | 219 | @property 220 | def supports_streaming(self) -> bool: 221 | """Whether this provider supports streaming responses""" 222 | return True 223 | 224 | def format_messages_for_logging( 225 | self, 226 | messages: List[Message], 227 | tools: Optional[List[Tool]] = None 228 | ) -> Dict[str, Any]: 229 | """ 230 | Format messages and tools for logging purposes 231 | 232 | This method converts internal Message/Tool format to the provider's 233 | API format for logging. Override in subclasses to customize. 234 | 235 | Args: 236 | messages: List of messages 237 | tools: Optional list of tools 238 | 239 | Returns: 240 | Dict with 'messages' and optionally 'tools' in API format 241 | """ 242 | # Default implementation: use Message.to_dict() 243 | result = { 244 | "messages": [msg.to_dict() for msg in messages] 245 | } 246 | 247 | if tools: 248 | result["tools"] = [tool.to_dict() for tool in tools] 249 | 250 | return result 251 | 252 | def _prepare_cache_control( 253 | self, 254 | messages: List[Message], 255 | tools: Optional[List[Tool]] = None 256 | ) -> List[Message]: 257 | """ 258 | Automatically add prompt caching markers if supported. 259 | Override in subclasses for provider-specific caching. 260 | 261 | Args: 262 | messages: Original messages 263 | tools: Optional tools 264 | 265 | Returns: 266 | Messages with cache control markers added 267 | """ 268 | # Default implementation: no modification 269 | return messages 270 | 271 | -------------------------------------------------------------------------------- /codefuse/core/agent_config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Agent Configuration - Agent profiles and management 3 | """ 4 | 5 | import re 6 | from dataclasses import dataclass 7 | from pathlib import Path 8 | from typing import Optional, List, Dict 9 | 10 | from codefuse.observability import mainLogger 11 | 12 | 13 | @dataclass 14 | class AgentProfile: 15 | """ 16 | Agent profile defining behavior, tools, and model 17 | 18 | Loaded from Markdown files with YAML frontmatter 19 | """ 20 | name: str 21 | description: str 22 | system_prompt: str 23 | tools: Optional[List[str]] = None # None = inherit all tools 24 | model: Optional[str] = None # None = use default model 25 | 26 | @classmethod 27 | def from_markdown(cls, path: str) -> "AgentProfile": 28 | """ 29 | Load agent profile from Markdown file with YAML frontmatter 30 | 31 | Format: 32 | ```markdown 33 | --- 34 | name: agent-name 35 | description: Agent description 36 | tools: tool1, tool2, tool3 # Optional 37 | model: model-name # Optional 38 | --- 39 | 40 | System prompt content... 41 | ``` 42 | 43 | Args: 44 | path: Path to the Markdown file 45 | 46 | Returns: 47 | AgentProfile instance 48 | """ 49 | file_path = Path(path) 50 | 51 | if not file_path.exists(): 52 | raise FileNotFoundError(f"Agent profile not found: {path}") 53 | 54 | content = file_path.read_text(encoding='utf-8') 55 | 56 | # Parse YAML frontmatter and content 57 | frontmatter_pattern = r'^---\s*\n(.*?)\n---\s*\n(.*)$' 58 | match = re.match(frontmatter_pattern, content, re.DOTALL) 59 | 60 | if not match: 61 | raise ValueError(f"Invalid agent profile format in {path} (missing frontmatter)") 62 | 63 | frontmatter_str = match.group(1) 64 | system_prompt = match.group(2).strip() 65 | 66 | # Parse frontmatter (simple YAML parsing) 67 | frontmatter = {} 68 | for line in frontmatter_str.strip().split('\n'): 69 | if ':' in line: 70 | key, value = line.split(':', 1) 71 | key = key.strip() 72 | value = value.strip() 73 | 74 | # Remove comments 75 | if '#' in value: 76 | value = value.split('#')[0].strip() 77 | 78 | # Handle null/None values 79 | if value.lower() in ('null', 'none', ''): 80 | value = None 81 | 82 | frontmatter[key] = value 83 | 84 | # Extract fields 85 | name = frontmatter.get('name') 86 | if not name: 87 | raise ValueError(f"Agent profile missing 'name' field in {path}") 88 | 89 | description = frontmatter.get('description', '') 90 | 91 | # Parse tools (comma-separated list or None) 92 | tools_str = frontmatter.get('tools') 93 | tools = None 94 | if tools_str: 95 | tools = [t.strip() for t in tools_str.split(',') if t.strip()] 96 | 97 | # Parse model 98 | model = frontmatter.get('model') 99 | if model and model.lower() in ('inherit', 'default'): 100 | model = None 101 | 102 | mainLogger.info("Loaded agent profile", name=name, path=str(path)) 103 | 104 | return cls( 105 | name=name, 106 | description=description, 107 | system_prompt=system_prompt, 108 | tools=tools, 109 | model=model, 110 | ) 111 | 112 | @classmethod 113 | def get_builtin_agent(cls) -> "AgentProfile": 114 | """ 115 | Get the built-in default agent profile 116 | 117 | Returns: 118 | Default AgentProfile 119 | """ 120 | return cls( 121 | name="default", 122 | description="Default coding assistant for general development tasks", 123 | system_prompt="""You are CodeFuse, an AI coding assistant designed to help developers with their coding tasks. You have access to tools that allow you to read and write files in the workspace. 124 | 125 | Your approach: 126 | 1. Carefully analyze the user's request 127 | 2. Use available tools to gather necessary information 128 | 3. Propose clear, well-thought-out solutions 129 | 4. Execute changes carefully and verify results 130 | 131 | When modifying files: 132 | - Always read files before modifying them 133 | - Make precise, targeted changes 134 | - Explain what you're doing and why 135 | 136 | Be concise, accurate, and helpful.""", 137 | tools=None, # Inherits all available tools 138 | model=None, # Uses default model from config 139 | ) 140 | 141 | def get_tool_list(self, all_tools: List[str]) -> List[str]: 142 | """ 143 | Get the list of tools available to this agent 144 | 145 | Args: 146 | all_tools: List of all available tool names 147 | 148 | Returns: 149 | List of tool names this agent can use 150 | """ 151 | if self.tools is None: 152 | # Inherit all tools 153 | return all_tools 154 | else: 155 | # Return intersection of requested tools and available tools 156 | return [t for t in self.tools if t in all_tools] 157 | 158 | def get_model_name( 159 | self, 160 | default_model: str, 161 | model_aliases: Optional[Dict[str, str]] = None 162 | ) -> str: 163 | """ 164 | Get the model name to use for this agent 165 | 166 | Args: 167 | default_model: Default model name to use if not specified 168 | model_aliases: Optional mapping of aliases to model names 169 | (e.g., {"sonnet": "claude-3-5-sonnet-20241022"}) 170 | 171 | Returns: 172 | Resolved model name 173 | """ 174 | if self.model is None: 175 | return default_model 176 | 177 | # Check if it's an alias 178 | if model_aliases and self.model in model_aliases: 179 | return model_aliases[self.model] 180 | 181 | # Return as-is 182 | return self.model 183 | 184 | 185 | class AgentProfileManager: 186 | """ 187 | Manager for agent profiles 188 | 189 | Loads built-in and user-defined agent profiles from disk. 190 | """ 191 | 192 | def __init__(self, agent_dir: str = "~/.cfuse/agents"): 193 | """ 194 | Initialize agent profile manager 195 | 196 | Args: 197 | agent_dir: Directory containing user-defined agent profiles 198 | """ 199 | self.agent_dir = Path(agent_dir).expanduser() 200 | self._profiles: Dict[str, AgentProfile] = {} 201 | 202 | # Load built-in agent 203 | self._load_builtin_agent() 204 | 205 | # Load user agents 206 | self._load_user_agents() 207 | 208 | mainLogger.info("AgentProfileManager initialized", profile_count=len(self._profiles)) 209 | 210 | def _load_builtin_agent(self) -> None: 211 | """Load the built-in default agent""" 212 | default_agent = AgentProfile.get_builtin_agent() 213 | self._profiles[default_agent.name] = default_agent 214 | mainLogger.debug("Loaded built-in default agent") 215 | 216 | def _load_user_agents(self) -> None: 217 | """Load user-defined agents from agent_dir""" 218 | if not self.agent_dir.exists(): 219 | mainLogger.debug("Agent directory does not exist", agent_dir=str(self.agent_dir)) 220 | return 221 | 222 | # Look for .md files 223 | for file_path in self.agent_dir.glob("*.md"): 224 | try: 225 | agent = AgentProfile.from_markdown(str(file_path)) 226 | self._profiles[agent.name] = agent 227 | mainLogger.info("Loaded user agent", name=agent.name) 228 | except Exception as e: 229 | mainLogger.error("Failed to load agent", path=str(file_path), error=str(e)) 230 | 231 | def get_agent(self, name: str) -> Optional[AgentProfile]: 232 | """ 233 | Get an agent profile by name 234 | 235 | Args: 236 | name: Agent name 237 | 238 | Returns: 239 | AgentProfile if found, None otherwise 240 | """ 241 | return self._profiles.get(name) 242 | 243 | def list_agents(self) -> List[str]: 244 | """ 245 | List all available agent names 246 | 247 | Returns: 248 | List of agent names 249 | """ 250 | return list(self._profiles.keys()) 251 | 252 | def get_agent_info(self, name: str) -> Optional[str]: 253 | """ 254 | Get human-readable information about an agent 255 | 256 | Args: 257 | name: Agent name 258 | 259 | Returns: 260 | Formatted string with agent info, or None if not found 261 | """ 262 | agent = self.get_agent(name) 263 | if not agent: 264 | return None 265 | 266 | lines = [ 267 | f"Agent: {agent.name}", 268 | f"Description: {agent.description}", 269 | ] 270 | 271 | if agent.model: 272 | lines.append(f"Model: {agent.model}") 273 | else: 274 | lines.append("Model: (inherits from config)") 275 | 276 | if agent.tools: 277 | lines.append(f"Tools: {', '.join(agent.tools)}") 278 | else: 279 | lines.append("Tools: (all available)") 280 | 281 | return "\n".join(lines) 282 | 283 | -------------------------------------------------------------------------------- /codefuse/core/environment.py: -------------------------------------------------------------------------------- 1 | """ 2 | Environment Information Collection 3 | """ 4 | 5 | import os 6 | import platform 7 | import sys 8 | import subprocess 9 | from dataclasses import dataclass 10 | from typing import Optional 11 | from pathlib import Path 12 | 13 | from codefuse.observability import mainLogger 14 | 15 | 16 | @dataclass 17 | class EnvironmentInfo: 18 | """ 19 | Information about the current environment 20 | 21 | This information is used to provide context to the agent about 22 | the system it's running on. 23 | """ 24 | os_type: str # "darwin", "linux", "windows" 25 | os_version: str 26 | python_version: str 27 | cwd: str 28 | git_branch: Optional[str] = None 29 | git_status: Optional[str] = None 30 | 31 | def to_context_string(self) -> str: 32 | """ 33 | Convert environment info to a formatted string for system prompt 34 | 35 | Returns: 36 | Formatted string describing the environment 37 | """ 38 | lines = [ 39 | "# Environment Information", 40 | f"- OS: {self.os_type} {self.os_version}", 41 | f"- Python: {self.python_version}", 42 | f"- Working Directory: {self.cwd}", 43 | ] 44 | 45 | if self.git_branch: 46 | lines.append(f"- Git Branch: {self.git_branch}") 47 | 48 | if self.git_status: 49 | lines.append(f"- Git Status:\n{self.git_status}") 50 | 51 | return "\n".join(lines) 52 | 53 | @classmethod 54 | def collect(cls, cwd: Optional[str] = None) -> "EnvironmentInfo": 55 | """ 56 | Collect current environment information 57 | 58 | Args: 59 | cwd: Working directory (defaults to current directory) 60 | 61 | Returns: 62 | EnvironmentInfo instance 63 | """ 64 | if cwd is None: 65 | cwd = os.getcwd() 66 | 67 | cwd_path = Path(cwd).resolve() 68 | 69 | # Collect OS information 70 | os_type = platform.system().lower() 71 | os_version = platform.release() 72 | 73 | # Collect Python version 74 | python_version = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}" 75 | 76 | # Try to collect Git information 77 | git_branch = cls._get_git_branch(cwd_path) 78 | git_status = cls._get_git_status(cwd_path) 79 | 80 | mainLogger.info( 81 | "Collected environment info", 82 | os_type=os_type, 83 | os_version=os_version, 84 | python_version=python_version, 85 | ) 86 | 87 | return cls( 88 | os_type=os_type, 89 | os_version=os_version, 90 | python_version=python_version, 91 | cwd=str(cwd_path), 92 | git_branch=git_branch, 93 | git_status=git_status, 94 | ) 95 | 96 | @staticmethod 97 | def _get_git_branch(cwd: Path) -> Optional[str]: 98 | """ 99 | Get current git branch if in a git repository 100 | 101 | Args: 102 | cwd: Working directory 103 | 104 | Returns: 105 | Branch name or None 106 | """ 107 | try: 108 | result = subprocess.run( 109 | ["git", "rev-parse", "--abbrev-ref", "HEAD"], 110 | cwd=cwd, 111 | capture_output=True, 112 | text=True, 113 | timeout=2, 114 | ) 115 | if result.returncode == 0: 116 | return result.stdout.strip() 117 | except Exception as e: 118 | mainLogger.debug("Failed to get git branch", error=str(e)) 119 | 120 | return None 121 | 122 | @staticmethod 123 | def _get_git_status(cwd: Path) -> Optional[str]: 124 | """ 125 | Get git status if in a git repository 126 | 127 | Args: 128 | cwd: Working directory 129 | 130 | Returns: 131 | Git status output or None 132 | """ 133 | try: 134 | result = subprocess.run( 135 | ["git", "status", "--short"], 136 | cwd=cwd, 137 | capture_output=True, 138 | text=True, 139 | timeout=2, 140 | ) 141 | if result.returncode == 0: 142 | status = result.stdout.strip() 143 | if status: 144 | return status 145 | else: 146 | return "Clean (no changes)" 147 | except Exception as e: 148 | mainLogger.debug("Failed to get git status", error=str(e)) 149 | 150 | return None 151 | 152 | @staticmethod 153 | def _get_git_diff_stats(cwd: Path) -> Optional[dict]: 154 | """ 155 | Get git diff statistics using git add -A and git diff --cached --numstat 156 | 157 | This captures all changes including untracked files. 158 | 159 | Args: 160 | cwd: Working directory 161 | 162 | Returns: 163 | Dict with stats and file-level changes, or None 164 | """ 165 | try: 166 | # Stage all changes 167 | add_result = subprocess.run( 168 | ["git", "add", "-A"], 169 | cwd=cwd, 170 | capture_output=True, 171 | text=True, 172 | timeout=10, 173 | ) 174 | if add_result.returncode != 0: 175 | mainLogger.debug("Failed to stage changes", error=add_result.stderr) 176 | return None 177 | 178 | # Get numstat for staged changes 179 | numstat_result = subprocess.run( 180 | ["git", "diff", "--cached", "--numstat"], 181 | cwd=cwd, 182 | capture_output=True, 183 | text=True, 184 | timeout=10, 185 | ) 186 | 187 | if numstat_result.returncode != 0: 188 | return None 189 | 190 | numstat_output = numstat_result.stdout.strip() 191 | if not numstat_output: 192 | return None 193 | 194 | # Parse numstat output 195 | files = [] 196 | total_insertions = 0 197 | total_deletions = 0 198 | 199 | for line in numstat_output.split('\n'): 200 | if not line: 201 | continue 202 | parts = line.split('\t') 203 | if len(parts) >= 3: 204 | insertions = int(parts[0]) if parts[0] != '-' else 0 205 | deletions = int(parts[1]) if parts[1] != '-' else 0 206 | path = parts[2] 207 | 208 | files.append({ 209 | "path": path, 210 | "insertions": insertions, 211 | "deletions": deletions, 212 | }) 213 | 214 | total_insertions += insertions 215 | total_deletions += deletions 216 | 217 | return { 218 | "stats": { 219 | "files_changed": len(files), 220 | "insertions": total_insertions, 221 | "deletions": total_deletions, 222 | }, 223 | "files": files, 224 | } 225 | except Exception as e: 226 | mainLogger.debug("Failed to get git diff stats", error=str(e)) 227 | return None 228 | 229 | @staticmethod 230 | def _get_git_diff_text(cwd: Path) -> Optional[str]: 231 | """ 232 | Get full git diff text for staged changes 233 | 234 | Note: This assumes git add -A has already been called by _get_git_diff_stats() 235 | 236 | Args: 237 | cwd: Working directory 238 | 239 | Returns: 240 | Full diff text or None 241 | """ 242 | try: 243 | # Get diff for staged changes 244 | diff_result = subprocess.run( 245 | ["git", "diff", "--cached"], 246 | cwd=cwd, 247 | capture_output=True, 248 | text=True, 249 | timeout=10, 250 | ) 251 | 252 | if diff_result.returncode == 0 and diff_result.stdout.strip(): 253 | return diff_result.stdout.strip() 254 | 255 | return None 256 | except Exception as e: 257 | mainLogger.debug("Failed to get git diff text", error=str(e)) 258 | return None 259 | 260 | @staticmethod 261 | def get_git_diff_info(cwd: Optional[str] = None) -> Optional[dict]: 262 | """ 263 | Get complete git diff information using git add -A and git diff --cached 264 | 265 | Note: This will stage all changes in the repository. 266 | 267 | Args: 268 | cwd: Working directory (defaults to current directory) 269 | 270 | Returns: 271 | Dictionary with stats, file list, and full diff text, or None if not a git repo 272 | { 273 | "stats": { 274 | "files_changed": 3, 275 | "insertions": 45, 276 | "deletions": 12 277 | }, 278 | "files": [ 279 | { 280 | "path": "file.py", 281 | "insertions": 30, 282 | "deletions": 5 283 | } 284 | ], 285 | "diff_text": "diff --git ..." 286 | } 287 | """ 288 | if cwd is None: 289 | cwd = os.getcwd() 290 | 291 | cwd_path = Path(cwd).resolve() 292 | 293 | # Get stats (this will also run git add -A) 294 | diff_info = EnvironmentInfo._get_git_diff_stats(cwd_path) 295 | if diff_info is None: 296 | return None 297 | 298 | # Get full diff text (uses git diff --cached) 299 | diff_text = EnvironmentInfo._get_git_diff_text(cwd_path) 300 | if diff_text: 301 | diff_info["diff_text"] = diff_text 302 | 303 | return diff_info 304 | 305 | -------------------------------------------------------------------------------- /codefuse/observability/http_logger.py: -------------------------------------------------------------------------------- 1 | """ 2 | HTTP Server Log Management - File-based logging with rotation and cleanup 3 | 4 | Features: 5 | - Dual format logging: text (access.log) and JSON (access-YYYYMMDD.json) 6 | - Daily log rotation 7 | - Automatic cleanup of old logs (default: 7 days retention) 8 | - Thread-safe for Gunicorn multi-worker setup 9 | """ 10 | 11 | import json 12 | import os 13 | import threading 14 | import time 15 | from datetime import datetime, timezone, timedelta 16 | from pathlib import Path 17 | from typing import Optional, Dict, Any 18 | import atexit 19 | 20 | 21 | class HTTPLogger: 22 | """Thread-safe HTTP request logger with rotation and cleanup""" 23 | 24 | def __init__(self, log_dir: str, retention_days: int = 7, cleanup_interval: int = 3600): 25 | """ 26 | Initialize HTTP logger 27 | 28 | Args: 29 | log_dir: Base directory for log files 30 | retention_days: Number of days to retain logs (default: 7) 31 | cleanup_interval: Cleanup check interval in seconds (default: 3600) 32 | """ 33 | self.log_dir = Path(log_dir).expanduser() 34 | self.log_dir.mkdir(parents=True, exist_ok=True) 35 | 36 | self.retention_days = retention_days 37 | self.cleanup_interval = cleanup_interval 38 | 39 | # File paths 40 | self.access_log_path = self.log_dir / "access.log" 41 | self.error_log_path = self.log_dir / "error.log" 42 | 43 | # Thread safety 44 | self._write_lock = threading.Lock() 45 | self._cleanup_thread: Optional[threading.Thread] = None 46 | self._stop_cleanup = threading.Event() 47 | 48 | # Current date for rotation check 49 | self._current_date = datetime.now().date() 50 | 51 | # Register cleanup on exit 52 | atexit.register(self.stop_cleanup_thread) 53 | 54 | def _get_json_log_path(self, date: Optional[datetime] = None) -> Path: 55 | """Get JSON log file path for a specific date""" 56 | if date is None: 57 | date = datetime.now() 58 | date_str = date.strftime("%Y%m%d") 59 | return self.log_dir / f"access-{date_str}.json" 60 | 61 | def _format_text_log( 62 | self, 63 | request_id: str, 64 | method: str, 65 | path: str, 66 | status: int, 67 | duration: float, 68 | tool_name: Optional[str] = None, 69 | workdir: Optional[str] = None, 70 | ) -> str: 71 | """Format log entry as human-readable text""" 72 | timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") 73 | tool_info = f" | tool:{tool_name}" if tool_name else "" 74 | workdir_info = f" | wd:{workdir}" if workdir else "" 75 | return f"{timestamp} | {method} {path} | {status} | {duration:.3f}s | {request_id}{tool_info}{workdir_info}\n" 76 | 77 | def _format_json_log( 78 | self, 79 | request_id: str, 80 | method: str, 81 | path: str, 82 | status: int, 83 | duration: float, 84 | tool_name: Optional[str] = None, 85 | tool_args: Optional[Dict[str, Any]] = None, 86 | workdir: Optional[str] = None, 87 | success: Optional[bool] = None, 88 | error: Optional[str] = None, 89 | ) -> str: 90 | """Format log entry as JSON""" 91 | log_data = { 92 | "timestamp": datetime.now(timezone.utc).isoformat(), 93 | "request_id": request_id, 94 | "method": method, 95 | "path": path, 96 | "status": status, 97 | "duration": round(duration, 3), 98 | } 99 | 100 | if tool_name: 101 | log_data["tool_name"] = tool_name 102 | if tool_args: 103 | log_data["tool_args"] = tool_args 104 | if workdir: 105 | log_data["workdir"] = workdir 106 | if success is not None: 107 | log_data["success"] = success 108 | if error: 109 | log_data["error"] = error 110 | 111 | return json.dumps(log_data, ensure_ascii=False) + "\n" 112 | 113 | def log_request( 114 | self, 115 | request_id: str, 116 | method: str, 117 | path: str, 118 | status: int, 119 | duration: float, 120 | tool_name: Optional[str] = None, 121 | tool_args: Optional[Dict[str, Any]] = None, 122 | workdir: Optional[str] = None, 123 | success: Optional[bool] = None, 124 | error: Optional[str] = None, 125 | ) -> None: 126 | """ 127 | Log HTTP request to both text and JSON files 128 | 129 | Thread-safe for concurrent writes from multiple workers. 130 | """ 131 | with self._write_lock: 132 | try: 133 | # Check if date has changed (rotation needed) 134 | current_date = datetime.now().date() 135 | if current_date != self._current_date: 136 | self._current_date = current_date 137 | 138 | # Write text log 139 | text_entry = self._format_text_log( 140 | request_id, method, path, status, duration, tool_name, workdir 141 | ) 142 | with open(self.access_log_path, 'a', encoding='utf-8') as f: 143 | f.write(text_entry) 144 | 145 | # Write JSON log 146 | json_entry = self._format_json_log( 147 | request_id, method, path, status, duration, 148 | tool_name, tool_args, workdir, success, error 149 | ) 150 | json_log_path = self._get_json_log_path() 151 | with open(json_log_path, 'a', encoding='utf-8') as f: 152 | f.write(json_entry) 153 | 154 | except Exception as e: 155 | # Avoid blocking the request if logging fails 156 | print(f"[HTTPLogger] Failed to write log: {e}", flush=True) 157 | 158 | def log_error( 159 | self, 160 | request_id: str, 161 | error: str, 162 | traceback: Optional[str] = None, 163 | method: Optional[str] = None, 164 | path: Optional[str] = None, 165 | ) -> None: 166 | """Log error to error log file""" 167 | with self._write_lock: 168 | try: 169 | error_data = { 170 | "timestamp": datetime.now(timezone.utc).isoformat(), 171 | "request_id": request_id, 172 | "error": error, 173 | } 174 | 175 | if method: 176 | error_data["method"] = method 177 | if path: 178 | error_data["path"] = path 179 | if traceback: 180 | error_data["traceback"] = traceback 181 | 182 | error_entry = json.dumps(error_data, ensure_ascii=False) + "\n" 183 | with open(self.error_log_path, 'a', encoding='utf-8') as f: 184 | f.write(error_entry) 185 | 186 | except Exception as e: 187 | print(f"[HTTPLogger] Failed to write error log: {e}", flush=True) 188 | 189 | def _cleanup_old_logs(self) -> None: 190 | """Delete log files older than retention_days""" 191 | try: 192 | cutoff_date = datetime.now() - timedelta(days=self.retention_days) 193 | 194 | # Find and delete old JSON log files 195 | pattern = "access-*.json" 196 | for log_file in self.log_dir.glob(pattern): 197 | try: 198 | # Extract date from filename: access-20251120.json 199 | date_str = log_file.stem.split('-', 1)[1] # "20251120" 200 | file_date = datetime.strptime(date_str, "%Y%m%d") 201 | 202 | if file_date < cutoff_date: 203 | log_file.unlink() 204 | print(f"[HTTPLogger] Deleted old log: {log_file.name}", flush=True) 205 | 206 | except (ValueError, IndexError) as e: 207 | # Skip files with invalid date format 208 | print(f"[HTTPLogger] Skipping invalid log file: {log_file.name} ({e})", flush=True) 209 | 210 | except Exception as e: 211 | print(f"[HTTPLogger] Cleanup failed: {e}", flush=True) 212 | 213 | def _cleanup_worker(self) -> None: 214 | """Background thread worker for periodic cleanup""" 215 | print(f"[HTTPLogger] Cleanup thread started (interval: {self.cleanup_interval}s, retention: {self.retention_days} days)", flush=True) 216 | 217 | while not self._stop_cleanup.wait(timeout=self.cleanup_interval): 218 | self._cleanup_old_logs() 219 | 220 | print("[HTTPLogger] Cleanup thread stopped", flush=True) 221 | 222 | def start_cleanup_thread(self) -> None: 223 | """Start background cleanup thread""" 224 | if self._cleanup_thread is not None and self._cleanup_thread.is_alive(): 225 | print("[HTTPLogger] Cleanup thread already running", flush=True) 226 | return 227 | 228 | # Run initial cleanup 229 | self._cleanup_old_logs() 230 | 231 | # Start background thread 232 | self._cleanup_thread = threading.Thread( 233 | target=self._cleanup_worker, 234 | daemon=True, 235 | name="HTTPLoggerCleanup" 236 | ) 237 | self._cleanup_thread.start() 238 | 239 | def stop_cleanup_thread(self) -> None: 240 | """Stop background cleanup thread gracefully""" 241 | if self._cleanup_thread is None or not self._cleanup_thread.is_alive(): 242 | return 243 | 244 | print("[HTTPLogger] Stopping cleanup thread...", flush=True) 245 | self._stop_cleanup.set() 246 | self._cleanup_thread.join(timeout=5) 247 | 248 | 249 | def create_http_logger( 250 | log_dir: Optional[str] = None, 251 | retention_days: Optional[int] = None, 252 | cleanup_interval: Optional[int] = None, 253 | ) -> HTTPLogger: 254 | """ 255 | Create and configure HTTP logger from environment variables 256 | 257 | Environment variables: 258 | CFUSE_HTTP_LOG_DIR: Log directory (default: ~/.cfuse/logs/http_server) 259 | CFUSE_HTTP_LOG_RETENTION_DAYS: Retention in days (default: 7) 260 | CFUSE_HTTP_LOG_CLEANUP_INTERVAL: Cleanup interval in seconds (default: 3600) 261 | 262 | Args: 263 | log_dir: Override log directory 264 | retention_days: Override retention days 265 | cleanup_interval: Override cleanup interval 266 | 267 | Returns: 268 | Configured HTTPLogger instance 269 | """ 270 | if log_dir is None: 271 | log_dir = os.getenv("CFUSE_HTTP_LOG_DIR", "~/.cfuse/logs/http_server") 272 | 273 | if retention_days is None: 274 | retention_days = int(os.getenv("CFUSE_HTTP_LOG_RETENTION_DAYS", "7")) 275 | 276 | if cleanup_interval is None: 277 | cleanup_interval = int(os.getenv("CFUSE_HTTP_LOG_CLEANUP_INTERVAL", "3600")) 278 | 279 | return HTTPLogger(log_dir, retention_days, cleanup_interval) 280 | 281 | -------------------------------------------------------------------------------- /codefuse/tools/builtin/read_file.py: -------------------------------------------------------------------------------- 1 | """ 2 | Read File Tool - Read file contents from the workspace 3 | """ 4 | 5 | from pathlib import Path 6 | from typing import Optional, TYPE_CHECKING 7 | 8 | from codefuse.tools.base import BaseTool, ToolDefinition, ToolParameter, ToolResult 9 | from codefuse.tools.builtin.filesystem_base import FileSystemToolMixin, MAX_TOKENS, MAX_FILE_SIZE_BYTES 10 | from codefuse.observability import mainLogger 11 | 12 | if TYPE_CHECKING: 13 | from codefuse.core.read_tracker import ReadTracker 14 | 15 | 16 | # Read-specific limits 17 | DEFAULT_MAX_LINES = 1000 # Default maximum lines to read if no range specified 18 | 19 | 20 | class ReadFileTool(FileSystemToolMixin, BaseTool): 21 | """ 22 | Tool for reading file contents 23 | 24 | Features: 25 | - Read entire file or specific line range 26 | - Safety checks for file existence and readability 27 | - Prevents reading binary files 28 | - File size and token limits 29 | - Workspace root directory restriction 30 | - Line number formatting for LLM context 31 | """ 32 | 33 | def __init__( 34 | self, 35 | workspace_root: Optional[Path] = None, 36 | read_tracker: Optional["ReadTracker"] = None, 37 | ): 38 | """ 39 | Initialize ReadFileTool 40 | 41 | Args: 42 | workspace_root: Workspace root directory to restrict file access. 43 | Defaults to current working directory. 44 | read_tracker: Optional read tracker for tracking read files. 45 | """ 46 | super().__init__(workspace_root=workspace_root) 47 | self._read_tracker = read_tracker 48 | 49 | @property 50 | def definition(self) -> ToolDefinition: 51 | """Define the read_file tool""" 52 | return ToolDefinition( 53 | name="read_file", 54 | description=( 55 | "Reads a file from the local filesystem. You can access any file directly by using this tool.\n\n" 56 | "Important:\n" 57 | "- The path parameter MUST be an absolute path, not a relative path\n" 58 | "- By default, it reads up to 1000 lines starting from the beginning of the file\n" 59 | "- You can optionally specify a line offset and limit (especially handy for long files), but it's recommended to read the whole file by not providing these parameters\n" 60 | "- Results are returned with line numbers starting at 1\n" 61 | ), 62 | parameters=[ 63 | ToolParameter( 64 | name="path", 65 | type="string", 66 | description="Absolute path to the file to read", 67 | required=True, 68 | ), 69 | ToolParameter( 70 | name="start_line", 71 | type="number", 72 | description="Starting line number", 73 | required=False, 74 | ), 75 | ToolParameter( 76 | name="end_line", 77 | type="number", 78 | description="Ending line number", 79 | required=False, 80 | ), 81 | ], 82 | requires_confirmation=False, # Reading is safe 83 | ) 84 | 85 | def _check_file_size(self, file_path: Path, has_pagination: bool) -> Optional[str]: 86 | """ 87 | Check if file size exceeds limit 88 | 89 | Args: 90 | file_path: Path to the file 91 | has_pagination: Whether pagination parameters are provided 92 | 93 | Returns: 94 | Error message if file is too large and no pagination, None otherwise 95 | """ 96 | file_size = file_path.stat().st_size 97 | 98 | # If file is too large and no pagination is provided 99 | if file_size > MAX_FILE_SIZE_BYTES and not has_pagination: 100 | size_kb = file_size / 1024 101 | max_kb = MAX_FILE_SIZE_BYTES / 1024 102 | return ( 103 | f"File size ({size_kb:.1f}KB) exceeds maximum ({max_kb:.0f}KB). " 104 | f"Please use start_line and end_line parameters to read specific portions." 105 | ) 106 | return None 107 | 108 | 109 | def execute( 110 | self, 111 | path: str, 112 | start_line: Optional[int] = None, 113 | end_line: Optional[int] = None, 114 | **kwargs 115 | ) -> ToolResult: 116 | """ 117 | Execute the read_file tool 118 | 119 | Args: 120 | path: Absolute path to the file to read 121 | start_line: Optional starting line (1-indexed) 122 | end_line: Optional ending line (1-indexed) 123 | 124 | Returns: 125 | ToolResult with: 126 | - content: Formatted file contents with line numbers for LLM 127 | - display: Summary message for user 128 | """ 129 | try: 130 | # Step 1: Check if path is absolute 131 | if error := self._check_absolute_path(path): 132 | return ToolResult( 133 | content=f"Error: {error}", 134 | display=f"❌ {error}" 135 | ) 136 | 137 | # Resolve path 138 | file_path = Path(path).resolve() 139 | 140 | # Step 2: Check if within workspace 141 | if error := self._check_within_workspace(file_path): 142 | mainLogger.warning(f"File access outside workspace: {error}") 143 | return ToolResult( 144 | content=f"Error: {error}", 145 | display=f"❌ Access denied: outside workspace" 146 | ) 147 | 148 | # Step 3: Check file existence 149 | if not file_path.exists(): 150 | error_msg = f"File not found: {path}" 151 | mainLogger.error(error_msg) 152 | return ToolResult( 153 | content=f"Error: {error_msg}", 154 | display=f"❌ File not found" 155 | ) 156 | 157 | # Step 4: Check it's a file 158 | if not file_path.is_file(): 159 | error_msg = f"Path is not a file: {path}" 160 | mainLogger.error(error_msg) 161 | return ToolResult( 162 | content=f"Error: {error_msg}", 163 | display=f"❌ Not a file" 164 | ) 165 | 166 | # Step 5: Check file size 167 | has_pagination = start_line is not None or end_line is not None 168 | if error := self._check_file_size(file_path, has_pagination): 169 | mainLogger.warning(f"File too large: {error}") 170 | return ToolResult( 171 | content=f"Error: {error}", 172 | display=f"❌ File too large (>256KB)" 173 | ) 174 | 175 | # Step 6: Read file contents with encoding fallback 176 | try: 177 | file_content, encoding = self._read_with_encoding_fallback(file_path) 178 | lines = file_content.splitlines(keepends=True) 179 | mainLogger.debug(f"Successfully read file with encoding: {encoding}") 180 | except UnicodeDecodeError as e: 181 | error_msg = f"Cannot read file (encoding error): {path}" 182 | mainLogger.error(f"{error_msg}: {e}") 183 | return ToolResult( 184 | content=f"Error: {error_msg}", 185 | display=f"❌ Encoding error" 186 | ) 187 | 188 | # Step 7: Handle line range 189 | start_idx = (start_line - 1) if start_line else 0 190 | 191 | # Determine end index 192 | if end_line is not None: 193 | end_idx = end_line 194 | else: 195 | # Default: read up to DEFAULT_MAX_LINES from start 196 | end_idx = start_idx + DEFAULT_MAX_LINES 197 | 198 | # Cap at actual file length 199 | end_idx = min(end_idx, len(lines)) 200 | 201 | # Validate line numbers 202 | if start_idx < 0 or start_idx >= len(lines): 203 | error_msg = f"Invalid start_line {start_line} (file has {len(lines)} lines)" 204 | return ToolResult( 205 | content=f"Error: {error_msg}", 206 | display=f"❌ {error_msg}" 207 | ) 208 | if end_idx < start_idx: 209 | error_msg = f"Invalid end_line {end_line} (must be >= start_line)" 210 | return ToolResult( 211 | content=f"Error: {error_msg}", 212 | display=f"❌ {error_msg}" 213 | ) 214 | 215 | selected_lines = lines[start_idx:end_idx] 216 | content = ''.join(selected_lines) 217 | actual_start_line = start_line or 1 218 | actual_end_line = actual_start_line + len(selected_lines) - 1 219 | 220 | # Check if file was truncated 221 | was_truncated = end_idx < len(lines) and end_line is None 222 | 223 | # Step 8: Check token limit 224 | if error := self._check_token_limit(content, MAX_TOKENS): 225 | mainLogger.warning(f"Token limit exceeded: {error}") 226 | return ToolResult( 227 | content=f"Error: {error}", 228 | display=f"❌ Content too large (>{MAX_TOKENS:,} tokens)" 229 | ) 230 | 231 | # Step 9: Format content with line numbers 232 | formatted_content = self._format_with_line_numbers(content, actual_start_line) 233 | 234 | # Add truncation warning if file was truncated 235 | if was_truncated: 236 | truncation_note = ( 237 | f"\n\n" 238 | f"Note: File has {len(lines)} total lines, but only showing lines {actual_start_line}-{actual_end_line} " 239 | f"(default limit: {DEFAULT_MAX_LINES} lines). " 240 | f"Use start_line and end_line parameters to read other portions of the file." 241 | f"" 242 | ) 243 | formatted_content += truncation_note 244 | 245 | # Step 10: Prepare display message 246 | num_lines = len(selected_lines) 247 | line_range = f"lines {actual_start_line}-{actual_end_line}" 248 | 249 | if was_truncated: 250 | display_msg = f"✓ Read {line_range} ({num_lines}/{len(lines)} lines)" 251 | else: 252 | display_msg = f"✓ Read {line_range} ({num_lines} lines)" 253 | 254 | mainLogger.info(f"Read {file_path} ({num_lines} lines, total: {len(lines)})") 255 | 256 | # Mark file as read (for edit tool validation) 257 | if self._read_tracker: 258 | self._read_tracker.mark_as_read(str(file_path)) 259 | 260 | return ToolResult( 261 | content=formatted_content, 262 | display=display_msg 263 | ) 264 | 265 | except PermissionError as e: 266 | error_msg = f"Permission denied reading file: {path}" 267 | mainLogger.error(f"{error_msg}: {e}") 268 | return ToolResult( 269 | content=f"Error: {error_msg}", 270 | display=f"❌ Permission denied" 271 | ) 272 | except Exception as e: 273 | error_msg = f"Unexpected error reading file: {path}" 274 | mainLogger.error(f"{error_msg}: {e}", exc_info=True) 275 | return ToolResult( 276 | content=f"Error: {error_msg} - {str(e)}", 277 | display=f"❌ Error: {str(e)}" 278 | ) 279 | 280 | -------------------------------------------------------------------------------- /codefuse/config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Configuration Management 3 | """ 4 | 5 | import os 6 | import copy 7 | from dataclasses import dataclass, fields 8 | from pathlib import Path 9 | from typing import Optional, List, Any 10 | import yaml 11 | 12 | from codefuse.observability import mainLogger 13 | 14 | 15 | @dataclass 16 | class LLMConfig: 17 | """LLM configuration""" 18 | provider: Optional[str] = None 19 | model: Optional[str] = None 20 | api_key: Optional[str] = None 21 | base_url: Optional[str] = None 22 | temperature: Optional[float] = None 23 | max_tokens: Optional[int] = None 24 | timeout: Optional[int] = None 25 | parallel_tool_calls: Optional[bool] = None 26 | enable_thinking: Optional[bool] = None 27 | top_k: Optional[int] = None 28 | top_p: Optional[float] = None 29 | 30 | 31 | @dataclass 32 | class AgentConfig: 33 | """Agent configuration""" 34 | max_iterations: Optional[int] = None 35 | max_context_tokens: Optional[int] = None 36 | enable_tools: Optional[bool] = None 37 | yolo: Optional[bool] = None 38 | agent: Optional[str] = None 39 | workspace_root: Optional[str] = None 40 | bash_timeout: Optional[int] = None 41 | bash_allowed_commands: Optional[list] = None 42 | bash_disallowed_commands: Optional[list] = None 43 | remote_tool_enabled: Optional[bool] = None 44 | remote_tool_url: Optional[str] = None 45 | remote_tool_instance_id: Optional[str] = None 46 | remote_tool_timeout: Optional[int] = None 47 | 48 | 49 | @dataclass 50 | class LoggingConfig: 51 | """Logging configuration""" 52 | logs_dir: Optional[str] = None 53 | verbose: Optional[bool] = None 54 | 55 | 56 | # Default values (centralized) 57 | DEFAULTS = { 58 | "llm": { 59 | "provider": "openai_compatible", 60 | "model": "", 61 | "api_key": "", 62 | "base_url": "", 63 | "temperature": 0.0, 64 | "max_tokens": None, 65 | "timeout": 60, 66 | "parallel_tool_calls": True, 67 | "enable_thinking": False, 68 | "top_k": None, 69 | "top_p": None, 70 | }, 71 | "agent_config": { 72 | "max_iterations": 200, 73 | "max_context_tokens": 100000, 74 | "enable_tools": True, 75 | "yolo": False, 76 | "agent": "default", 77 | "workspace_root": ".", 78 | "bash_timeout": 30, 79 | "bash_allowed_commands": [], 80 | "bash_disallowed_commands": [], 81 | "remote_tool_enabled": False, 82 | "remote_tool_url": "", 83 | "remote_tool_instance_id": "", 84 | "remote_tool_timeout": 60, 85 | }, 86 | "logging": { 87 | "logs_dir": "~/.cfuse/logs", 88 | "verbose": False, 89 | }, 90 | } 91 | 92 | 93 | # Environment variable mapping (only core configs) 94 | ENV_MAPPING = [ 95 | ("api_key", "llm", str, ["OPENAI_API_KEY"]), 96 | ("base_url", "llm", str, ["LLM_BASE_URL"]), 97 | ("model", "llm", str, ["LLM_MODEL"]), 98 | ("logs_dir", "logging", str, ["LOGS_DIR"]), 99 | ("verbose", "logging", bool, ["VERBOSE"]), 100 | ] 101 | 102 | 103 | # Validation rules (section, field, check_function, error_message) 104 | VALIDATIONS = [ 105 | ('llm', 'temperature', lambda v: 0 <= v <= 2, "temperature must be 0-2"), 106 | ('llm', 'top_p', lambda v: 0 <= v <= 1, "top_p must be 0-1"), 107 | ('llm', 'top_k', lambda v: v > 0, "top_k must be positive"), 108 | ('llm', 'timeout', lambda v: v > 0, "timeout must be positive"), 109 | ('llm', 'max_tokens', lambda v: v > 0, "max_tokens must be positive"), 110 | ('agent_config', 'max_iterations', lambda v: v > 0, "max_iterations must be positive"), 111 | ('agent_config', 'bash_timeout', lambda v: v > 0, "bash_timeout must be positive"), 112 | ('agent_config', 'remote_tool_timeout', lambda v: v > 0, "remote_tool_timeout must be positive"), 113 | ] 114 | 115 | 116 | def _get_env_value(env_vars: List[str], type_: type) -> Any: 117 | """Get first available environment variable and convert to type""" 118 | for env_var in env_vars: 119 | value = os.getenv(env_var) 120 | if value is not None: 121 | try: 122 | if type_ == bool: 123 | return value.lower() in ('true', '1', 'yes') 124 | elif type_ == int: 125 | return int(value) 126 | elif type_ == float: 127 | return float(value) 128 | else: 129 | return value 130 | except (ValueError, AttributeError) as e: 131 | mainLogger.warning( 132 | "Failed to convert environment variable", 133 | env_var=env_var, 134 | value=value, 135 | error=str(e) 136 | ) 137 | return None 138 | 139 | 140 | def _expand_env_vars(data: Any) -> Any: 141 | """Recursively expand ${VAR} in strings""" 142 | if isinstance(data, dict): 143 | return {k: _expand_env_vars(v) for k, v in data.items()} 144 | elif isinstance(data, list): 145 | return [_expand_env_vars(item) for item in data] 146 | elif isinstance(data, str): 147 | import re 148 | def replacer(match): 149 | var_name = match.group(1) or match.group(2) 150 | return os.getenv(var_name, match.group(0)) 151 | return re.sub(r'\$\{([^}]+)\}|\$(\w+)', replacer, data) 152 | return data 153 | 154 | 155 | @dataclass 156 | class Config: 157 | """Main configuration""" 158 | llm: LLMConfig = None 159 | agent_config: AgentConfig = None 160 | logging: LoggingConfig = None 161 | 162 | def __post_init__(self): 163 | """Initialize sub-configs if not provided""" 164 | if self.llm is None: 165 | self.llm = LLMConfig() 166 | if self.agent_config is None: 167 | self.agent_config = AgentConfig() 168 | if self.logging is None: 169 | self.logging = LoggingConfig() 170 | 171 | @classmethod 172 | def from_defaults(cls) -> "Config": 173 | """Create config from default values""" 174 | return cls( 175 | llm=LLMConfig(**DEFAULTS["llm"]), 176 | agent_config=AgentConfig(**DEFAULTS["agent_config"]), 177 | logging=LoggingConfig(**DEFAULTS["logging"]), 178 | ) 179 | 180 | @classmethod 181 | def from_yaml(cls, path: str) -> Optional["Config"]: 182 | """Load config from YAML file""" 183 | file_path = Path(path).expanduser() 184 | if not file_path.exists(): 185 | return None 186 | 187 | try: 188 | with open(file_path, 'r', encoding='utf-8') as f: 189 | data = yaml.safe_load(f) or {} 190 | 191 | data = _expand_env_vars(data) 192 | 193 | llm_data = data.get('llm', {}) 194 | agent_data = data.get('agent_config', {}) or data.get('agent', {}) # Support both for backward compatibility 195 | logging_data = data.get('logging', {}) 196 | 197 | mainLogger.info("Loaded configuration from file", path=str(path)) 198 | return cls( 199 | llm=LLMConfig(**{k: v for k, v in llm_data.items() if k in {f.name for f in fields(LLMConfig)}}), 200 | agent_config=AgentConfig(**{k: v for k, v in agent_data.items() if k in {f.name for f in fields(AgentConfig)}}), 201 | logging=LoggingConfig(**{k: v for k, v in logging_data.items() if k in {f.name for f in fields(LoggingConfig)}}), 202 | ) 203 | except Exception as e: 204 | mainLogger.error("Failed to load config from file", path=str(path), error=str(e)) 205 | return None 206 | 207 | @classmethod 208 | def from_env(cls) -> "Config": 209 | """Load config from environment variables""" 210 | cfg = cls() 211 | 212 | # Map 'agent' section in ENV_MAPPING to 'agent_config' 213 | for field_name, section, type_, env_vars in ENV_MAPPING: 214 | value = _get_env_value(env_vars, type_) 215 | if value is not None: 216 | section_name = 'agent_config' if section == 'agent' else section 217 | section_obj = getattr(cfg, section_name) 218 | setattr(section_obj, field_name, value) 219 | 220 | return cfg 221 | 222 | @classmethod 223 | def load(cls, config_path: Optional[str] = None) -> "Config": 224 | """ 225 | Load configuration: defaults → file → env 226 | Priority: defaults < file < env < cli (cli done via merge_with_cli_args) 227 | """ 228 | # Start with defaults 229 | cfg = cls.from_defaults() 230 | 231 | # Try to load from file 232 | if config_path: 233 | file_cfg = cls.from_yaml(config_path) 234 | else: 235 | # Try default locations 236 | file_cfg = None 237 | for path in [".cfuse.yaml", "~/.cfuse.yaml", "~/.config/cfuse/config.yaml"]: 238 | file_cfg = cls.from_yaml(path) 239 | if file_cfg: 240 | break 241 | 242 | if file_cfg: 243 | cfg = cls._merge(cfg, file_cfg) 244 | 245 | # Merge environment variables 246 | env_cfg = cls.from_env() 247 | cfg = cls._merge(cfg, env_cfg) 248 | 249 | return cfg 250 | 251 | @staticmethod 252 | def _merge(base: "Config", override: "Config") -> "Config": 253 | """Merge configs: non-None values in override take precedence""" 254 | result = copy.deepcopy(base) 255 | 256 | # Merge each section 257 | for section_name in ['llm', 'agent_config', 'logging']: 258 | base_section = getattr(result, section_name) 259 | override_section = getattr(override, section_name) 260 | 261 | for field in fields(base_section): 262 | override_value = getattr(override_section, field.name) 263 | if override_value is not None: 264 | setattr(base_section, field.name, override_value) 265 | 266 | return result 267 | 268 | @classmethod 269 | def merge_with_cli_args(cls, config: "Config", **cli_args) -> "Config": 270 | """Merge CLI arguments (highest priority)""" 271 | result = copy.deepcopy(config) 272 | 273 | # Special mapping: CLI 'think' → config 'enable_thinking' 274 | if cli_args.get('think') is not None: 275 | result.llm.enable_thinking = cli_args['think'] 276 | 277 | # Auto-match all other CLI args to config fields by name 278 | for key, value in cli_args.items(): 279 | if value is None or key == 'think': 280 | continue 281 | 282 | # Try to find matching field in each section 283 | for section in [result.llm, result.agent_config, result.logging]: 284 | if hasattr(section, key): 285 | setattr(section, key, value) 286 | break 287 | 288 | return result 289 | 290 | def validate(self) -> List[str]: 291 | """Validate configuration""" 292 | errors = [] 293 | 294 | # Check required LLM parameters 295 | required_llm_fields = [ 296 | ('api_key', 'LLM API key'), 297 | ('model', 'LLM model'), 298 | ('base_url', 'LLM base URL'), 299 | ] 300 | 301 | for field_name, display_name in required_llm_fields: 302 | value = getattr(self.llm, field_name) 303 | if value is None or (isinstance(value, str) and value.strip() == ""): 304 | errors.append( 305 | f"{display_name} is required. " 306 | f"Set it via --{field_name.replace('_', '-')} flag, " 307 | f"{field_name.upper().replace('MODEL', 'LLM_MODEL')} environment variable, " 308 | f"or config file." 309 | ) 310 | 311 | # Check value range validations 312 | for section_name, field, check, msg in VALIDATIONS: 313 | value = getattr(getattr(self, section_name), field) 314 | if value is not None and not check(value): 315 | errors.append(f"{msg}, got {value}") 316 | 317 | return errors 318 | -------------------------------------------------------------------------------- /codefuse/tools/builtin/glob.py: -------------------------------------------------------------------------------- 1 | """ 2 | Glob Tool - Fast file pattern matching tool 3 | """ 4 | 5 | import glob as glob_lib 6 | from pathlib import Path 7 | from typing import Optional, List 8 | from dataclasses import dataclass 9 | 10 | from codefuse.tools.base import BaseTool, ToolDefinition, ToolParameter, ToolResult 11 | from codefuse.tools.builtin.filesystem_base import FileSystemToolMixin 12 | from codefuse.tools.builtin.list_directory import DEFAULT_IGNORE_PATTERNS 13 | from codefuse.observability import mainLogger 14 | 15 | 16 | # Result limit 17 | MAX_FILES = 100 18 | 19 | 20 | @dataclass 21 | class GlobResult: 22 | """Result structure for glob operations""" 23 | files: List[str] # Absolute file paths 24 | total_found: int # Total files found before truncation 25 | truncated: bool # Whether results were truncated 26 | 27 | 28 | class GlobTool(FileSystemToolMixin, BaseTool): 29 | """ 30 | Tool for finding files by glob pattern 31 | 32 | Features: 33 | - Fast file pattern matching 34 | - Supports standard glob patterns (*, ?, **, [...]) 35 | - Returns matching file paths sorted by modification time (newest first) 36 | - Automatically ignores common build/cache directories 37 | - Result limit to prevent excessive output 38 | """ 39 | 40 | def __init__(self, workspace_root: Optional[Path] = None): 41 | """ 42 | Initialize GlobTool 43 | 44 | Args: 45 | workspace_root: Workspace root directory to restrict searches. 46 | Defaults to current working directory. 47 | """ 48 | super().__init__(workspace_root=workspace_root) 49 | 50 | @property 51 | def definition(self) -> ToolDefinition: 52 | """Define the glob tool""" 53 | return ToolDefinition( 54 | name="glob", 55 | description=( 56 | "Find files by name patterns using glob syntax. Handles codebases of any size efficiently.\n\n" 57 | "- Accepts standard glob patterns such as:\n" 58 | "*.py - match files in current directory\n" 59 | "**/*.js - search all subdirectories recursively\n" 60 | "src/**/*.ts - limit search to specific path\n" 61 | "test_*.py - match files with prefix\n\n" 62 | "- Notes:\n" 63 | "Limits results to 100 file paths.\n" 64 | "Supports parallel calls with different patterns(Recommended to use this tool in a batch of patterns to find files that are potentially useful)." 65 | ).strip(), 66 | parameters=[ 67 | ToolParameter( 68 | name="pattern", 69 | type="string", 70 | description="The glob pattern to match files against", 71 | required=True, 72 | ), 73 | ToolParameter( 74 | name="path", 75 | type="string", 76 | description=( 77 | "The directory to search in. If not specified, the workspace root will be used. " 78 | "IMPORTANT: Omit this field to use the default directory. Must be an absolute path if provided." 79 | ), 80 | required=False, 81 | ), 82 | ], 83 | requires_confirmation=False, # Searching is safe 84 | ) 85 | 86 | def _should_ignore(self, file_path: Path) -> bool: 87 | """ 88 | Check if a file should be ignored based on default patterns 89 | 90 | Args: 91 | file_path: Path to check 92 | 93 | Returns: 94 | True if file should be ignored 95 | """ 96 | # Check against all default ignore patterns 97 | for pattern in DEFAULT_IGNORE_PATTERNS: 98 | # Check if pattern matches any part of the path 99 | for part in file_path.parts: 100 | if glob_lib.fnmatch.fnmatch(part, pattern): 101 | return True 102 | 103 | # Also check the full path string 104 | if glob_lib.fnmatch.fnmatch(str(file_path), pattern): 105 | return True 106 | 107 | return False 108 | 109 | def _execute_glob(self, pattern: str, search_path: Path) -> List[Path]: 110 | """ 111 | Execute glob search with pattern 112 | 113 | Args: 114 | pattern: Glob pattern to match 115 | search_path: Directory to search in 116 | 117 | Returns: 118 | List of matching file paths 119 | """ 120 | # Determine if pattern contains recursive wildcard 121 | has_recursive = '**' in pattern 122 | 123 | # Build the full pattern path 124 | full_pattern = str(search_path / pattern) 125 | 126 | # Execute glob 127 | matches = glob_lib.glob(full_pattern, recursive=has_recursive) 128 | 129 | # Convert to Path objects and filter 130 | result_paths: List[Path] = [] 131 | for match in matches: 132 | path = Path(match).resolve() 133 | 134 | # Only include files (not directories) 135 | if not path.is_file(): 136 | continue 137 | 138 | # Skip ignored paths 139 | if self._should_ignore(path): 140 | continue 141 | 142 | result_paths.append(path) 143 | 144 | return result_paths 145 | 146 | def _sort_by_mtime(self, file_paths: List[Path]) -> List[Path]: 147 | """ 148 | Sort files by modification time (newest first), then by filename 149 | 150 | Args: 151 | file_paths: List of file paths 152 | 153 | Returns: 154 | Sorted list of file paths 155 | """ 156 | if not file_paths: 157 | return file_paths 158 | 159 | try: 160 | # Get file stats for all files 161 | file_stats = [] 162 | for file_path in file_paths: 163 | try: 164 | mtime = file_path.stat().st_mtime if file_path.exists() else 0 165 | file_stats.append((file_path, mtime)) 166 | except Exception: 167 | # If we can't stat the file, use mtime = 0 168 | file_stats.append((file_path, 0)) 169 | 170 | # Sort by modification time (newest first), then by filename (alphabetical) 171 | file_stats.sort(key=lambda x: (-x[1], str(x[0]))) 172 | 173 | return [fp for fp, _ in file_stats] 174 | except Exception as e: 175 | mainLogger.warning(f"Error sorting files by mtime: {e}") 176 | return file_paths 177 | 178 | def _apply_limit(self, file_paths: List[Path], limit: int = MAX_FILES) -> GlobResult: 179 | """ 180 | Apply result limit 181 | 182 | Args: 183 | file_paths: List of file paths 184 | limit: Maximum number of files to return 185 | 186 | Returns: 187 | GlobResult with limited files and truncation info 188 | """ 189 | total_found = len(file_paths) 190 | truncated = total_found > limit 191 | limited_files = file_paths[:limit] if truncated else file_paths 192 | 193 | # Convert to absolute path strings 194 | absolute_paths = [str(fp.resolve()) for fp in limited_files] 195 | 196 | return GlobResult( 197 | files=absolute_paths, 198 | total_found=total_found, 199 | truncated=truncated, 200 | ) 201 | 202 | def execute( 203 | self, 204 | pattern: str, 205 | path: Optional[str] = None, 206 | **kwargs 207 | ) -> ToolResult: 208 | """ 209 | Execute the glob tool 210 | 211 | Args: 212 | pattern: Glob pattern to match files against 213 | path: Optional directory to search in (defaults to workspace_root) 214 | 215 | Returns: 216 | ToolResult with: 217 | - content: File paths (one per line) for LLM 218 | - display: Summary message for user 219 | """ 220 | try: 221 | # Step 1: Validate pattern 222 | if not pattern or not isinstance(pattern, str): 223 | return self._create_error_result( 224 | "Pattern is required and must be a non-empty string", 225 | "Invalid pattern" 226 | ) 227 | 228 | # Step 2: Resolve search path 229 | if path: 230 | # Check if path is absolute (required by our convention) 231 | if not Path(path).is_absolute(): 232 | return self._create_error_result( 233 | f"Path must be absolute, but got relative path: {path}", 234 | "Path must be absolute" 235 | ) 236 | 237 | search_path = self._resolve_path(path) 238 | 239 | # Check if within workspace 240 | if error := self._check_within_workspace(search_path): 241 | mainLogger.warning(f"Glob search outside workspace: {error}") 242 | return self._create_error_result(error, "Access denied: outside workspace") 243 | else: 244 | search_path = self._workspace_root 245 | 246 | # Check if path exists 247 | if not search_path.exists(): 248 | return self._create_error_result( 249 | f"Path not found: {search_path}", 250 | "Path not found" 251 | ) 252 | 253 | # Check if it's a directory 254 | if not search_path.is_dir(): 255 | return self._create_error_result( 256 | f"Path is not a directory: {search_path}", 257 | "Not a directory" 258 | ) 259 | 260 | # Step 3: Execute glob search 261 | mainLogger.info(f"Executing glob search: pattern='{pattern}', path={search_path}") 262 | matched_files = self._execute_glob(pattern, search_path) 263 | 264 | # Step 4: Sort by modification time 265 | sorted_files = self._sort_by_mtime(matched_files) 266 | 267 | # Step 5: Apply limit 268 | result = self._apply_limit(sorted_files, MAX_FILES) 269 | 270 | # Step 6: Format output 271 | if result.total_found == 0: 272 | content = "No files found" 273 | display = "No files found" 274 | else: 275 | # Join file paths with newlines 276 | content = '\n'.join(result.files) 277 | 278 | # Add truncation message if needed 279 | if result.truncated: 280 | content += ( 281 | f"\n\n(Results are truncated. Found {result.total_found} files, " 282 | f"showing first {len(result.files)}. " 283 | f"Consider using a more specific path or pattern.)" 284 | ) 285 | 286 | # Display message 287 | num_files = len(result.files) 288 | if result.truncated: 289 | display = f"✓ Found {num_files} files (truncated from {result.total_found})" 290 | else: 291 | display = f"✓ Found {num_files} file{'s' if num_files != 1 else ''}" 292 | 293 | mainLogger.info( 294 | f"Glob search complete: pattern='{pattern}', " 295 | f"found={result.total_found}, returned={len(result.files)}, " 296 | f"truncated={result.truncated}" 297 | ) 298 | 299 | return ToolResult(content=content, display=display) 300 | 301 | except Exception as e: 302 | error_msg = f"Unexpected error during glob search: {str(e)}" 303 | mainLogger.error(error_msg, exc_info=True) 304 | return self._create_error_result(error_msg, f"Error: {str(e)}") 305 | 306 | -------------------------------------------------------------------------------- /codefuse/cli/interactive.py: -------------------------------------------------------------------------------- 1 | """ 2 | Interactive Mode - REPL for continuous conversation 3 | """ 4 | 5 | import json 6 | from datetime import datetime 7 | from typing import Dict, Any, List 8 | from rich.console import Console 9 | from rich.panel import Panel 10 | from rich.markdown import Markdown 11 | from prompt_toolkit import PromptSession 12 | from prompt_toolkit.history import InMemoryHistory 13 | 14 | from codefuse.llm.base import Message, MessageRole 15 | from codefuse.observability import mainLogger, get_session_dir, close_all_loggers 16 | 17 | console = Console() 18 | 19 | 20 | def run_interactive( 21 | components: Dict[str, Any], 22 | stream: bool = True, 23 | ): 24 | """ 25 | Run agent in interactive mode (REPL) 26 | 27 | Args: 28 | components: Dictionary of initialized components from initialize_agent_components() 29 | stream: Whether to stream LLM responses 30 | save_session: Whether to save session information 31 | """ 32 | # Unpack components 33 | agent_profile = components["agent_profile"] 34 | env_info = components["env_info"] 35 | agent_loop = components["agent_loop"] 36 | available_tools = components["available_tools"] 37 | session_dir = components["session_dir"] 38 | config = components["config"] 39 | model_name = components["model_name"] 40 | context_engine = components["context_engine"] 41 | metrics_collector = components["metrics_collector"] 42 | resumed_conversation = components["resumed_conversation"] 43 | 44 | # Display welcome message 45 | console.print() 46 | 47 | # Build session info 48 | session_info = f"Session ID: {context_engine.session_id}" 49 | if resumed_conversation: 50 | session_info += f"\n[cyan]Resumed with {len(resumed_conversation)} messages[/cyan]" 51 | 52 | console.print(Panel( 53 | f"[bold blue]CodeFuse Interactive Mode[/bold blue]\n\n" 54 | f"Agent: {agent_profile.name}\n" 55 | f"Model: {model_name}\n" 56 | f"{session_info}\n\n" 57 | f"[dim]Type your message and press Enter to send.[/dim]\n" 58 | f"[dim]Special commands:[/dim]\n" 59 | f" /exit, /quit - Exit the session\n" 60 | f" /help - Show help information\n" 61 | f" /clear - Clear conversation history\n" 62 | f" /status - Show session status", 63 | border_style="blue" 64 | )) 65 | console.print() 66 | 67 | if config.agent_config.yolo: 68 | console.print("[yellow]⚡ YOLO mode enabled - auto-confirming all tools[/yellow]\n") 69 | 70 | # Initialize prompt session with history 71 | session = PromptSession(history=InMemoryHistory()) 72 | 73 | # Conversation history (for context across multiple turns) 74 | # If resuming a session, start with the loaded history 75 | conversation_history: List[Message] = resumed_conversation if resumed_conversation else [] 76 | 77 | mainLogger.info("Interactive mode started", session_id=context_engine.session_id) 78 | 79 | # REPL loop 80 | while True: 81 | try: 82 | # Get user input 83 | user_input = session.prompt("You: ").strip() 84 | 85 | if not user_input: 86 | continue 87 | 88 | # Handle special commands 89 | if user_input.startswith("/"): 90 | if user_input in ["/exit", "/quit"]: 91 | console.print("\n[yellow]Exiting interactive mode...[/yellow]") 92 | break 93 | 94 | elif user_input == "/help": 95 | _show_help() 96 | continue 97 | 98 | elif user_input == "/clear": 99 | conversation_history.clear() 100 | # Note: This only clears local conversation history 101 | # ContextEngine messages are not cleared (would need session restart) 102 | console.print("[green]✓ Local conversation history cleared[/green]") 103 | console.print("[dim]Note: Full reset requires restarting the session[/dim]\n") 104 | mainLogger.info("Conversation history cleared", session_id=context_engine.session_id) 105 | continue 106 | 107 | elif user_input == "/status": 108 | _show_status(components, conversation_history) 109 | continue 110 | 111 | else: 112 | console.print(f"[red]Unknown command:[/red] {user_input}") 113 | console.print("[dim]Type /help for available commands[/dim]\n") 114 | continue 115 | 116 | # User message will be logged by agent_loop automatically 117 | 118 | # Display thinking indicator 119 | console.print("\n[dim]Assistant:[/dim] ", end="") 120 | 121 | # Run agent loop 122 | final_response = "" 123 | current_content = "" 124 | current_tool_calls = [] 125 | iterations = 1 126 | 127 | for event in agent_loop.run( 128 | user_query=user_input, 129 | stream=stream, 130 | ): 131 | if event.type == "llm_start": 132 | iteration = event.data.get("iteration", 0) 133 | if iteration > 1: 134 | console.print(f"\n[dim]→ Iteration {iteration}[/dim]") 135 | 136 | elif event.type == "llm_chunk": 137 | delta = event.data["delta"] 138 | console.print(delta, end="") 139 | current_content += delta 140 | 141 | elif event.type == "llm_done": 142 | if not stream: 143 | content = event.data["content"] 144 | if content: 145 | console.print(content) 146 | current_content = content 147 | else: 148 | console.print() 149 | 150 | if "tool_calls" in event.data and event.data["tool_calls"]: 151 | current_tool_calls = event.data["tool_calls"] 152 | 153 | elif event.type == "tool_start": 154 | tool_name = event.data["tool_name"] 155 | arguments = event.data.get("arguments", {}) 156 | args_str = _format_tool_arguments(arguments) 157 | console.print(f"\n[cyan]🔧 Executing tool:[/cyan] {tool_name}{args_str}") 158 | 159 | elif event.type == "tool_done": 160 | tool_name = event.data["tool_name"] 161 | tool_call_id = event.data.get("tool_call_id") 162 | arguments = event.data.get("arguments", {}) 163 | display = event.data.get("display", event.data.get("result", "")) 164 | confirmed = event.data.get("confirmed", True) 165 | 166 | if not confirmed: 167 | console.print(f"[yellow]⚠️ Tool rejected:[/yellow] {tool_name}") 168 | else: 169 | # Use display field (user-friendly) instead of result (LLM content) 170 | console.print(f"[cyan]{display}[/cyan]") 171 | 172 | # Tool results are logged by tool_executor automatically 173 | 174 | elif event.type == "agent_done": 175 | final_response = event.data["final_response"] 176 | iterations = event.data["iterations"] 177 | 178 | # Save assistant message to trajectory 179 | assistant_message = { 180 | "role": "assistant", 181 | "content": final_response or current_content, 182 | "timestamp": datetime.now().isoformat(), 183 | } 184 | if current_tool_calls: 185 | assistant_message["tool_calls"] = current_tool_calls 186 | # Assistant messages are logged by agent_loop automatically 187 | 188 | # Update conversation history for next turn 189 | conversation_history.append(Message( 190 | role=MessageRole.USER, 191 | content=user_input, 192 | )) 193 | conversation_history.append(Message( 194 | role=MessageRole.ASSISTANT, 195 | content=final_response or current_content, 196 | )) 197 | 198 | console.print() 199 | 200 | elif event.type == "error": 201 | error = event.data["error"] 202 | console.print(f"\n[red]Error:[/red] {error}") 203 | 204 | # Reset for next turn 205 | current_content = "" 206 | current_tool_calls = [] 207 | 208 | except KeyboardInterrupt: 209 | console.print("\n\n[yellow]Use /exit or /quit to exit[/yellow]\n") 210 | continue 211 | 212 | except Exception as e: 213 | console.print(f"\n[red]Error:[/red] {str(e)}\n") 214 | mainLogger.error("Interactive loop error", error=str(e), exc_info=True) 215 | continue 216 | 217 | # Generate and save metrics summary 218 | summary = metrics_collector.generate_summary() 219 | 220 | # Write summary to trajectory 221 | context_engine.write_session_summary(summary) 222 | 223 | mainLogger.info("Interactive mode completed", status="success") 224 | 225 | # Display session summary 226 | console.print() 227 | console.print(Panel( 228 | f"[bold]Session Summary[/bold]\n\n" 229 | f"[green]Total Prompts:[/green] {summary['session']['total_prompts']}\n" 230 | f"[green]Total Iterations:[/green] {summary['prompts']['total_iterations']}\n" 231 | f"[green]API Calls:[/green] {summary['api_calls']['total']} " 232 | f"({summary['api_calls']['success_rate']}% success)\n" 233 | f"[green]Total Tokens:[/green] {summary['api_calls']['tokens']['total']:,}\n" 234 | f" • Prompt: {summary['api_calls']['tokens']['prompt']:,}\n" 235 | f" • Completion: {summary['api_calls']['tokens']['completion']:,}\n" 236 | f" • Cache Read: {summary['api_calls']['tokens']['cache_read']:,}\n" 237 | f"[green]Tool Calls:[/green] {summary['tool_calls']['total']} " 238 | f"({summary['tool_calls']['success_rate']}% success)\n" 239 | f"[green]Session Duration:[/green] {summary['session']['duration']:.2f}s", 240 | title="[bold]Performance Metrics[/bold]", 241 | border_style="cyan" 242 | )) 243 | 244 | # Display session info 245 | console.print(f"\n[dim]Session logs:[/dim] {get_session_dir()}") 246 | 247 | # Close all loggers 248 | close_all_loggers() 249 | 250 | 251 | def _format_tool_arguments(arguments: Dict[str, Any], max_length: int = 100) -> str: 252 | """ 253 | Format tool arguments for display, with truncation if too long 254 | 255 | Args: 256 | arguments: Tool arguments dictionary 257 | max_length: Maximum length before truncation 258 | 259 | Returns: 260 | Formatted string representation of arguments 261 | """ 262 | if not arguments: 263 | return "" 264 | 265 | # Convert arguments to JSON string 266 | args_json = json.dumps(arguments, ensure_ascii=False) 267 | 268 | # If short enough, return as-is 269 | if len(args_json) <= max_length: 270 | return f" [dim]{args_json}[/dim]" 271 | 272 | # Truncate and add ellipsis 273 | truncated = args_json[:max_length] + "..." 274 | return f" [dim]{truncated}[/dim]" 275 | 276 | 277 | def _show_help(): 278 | """Show help information""" 279 | console.print() 280 | console.print(Panel( 281 | "[bold]Interactive Mode Commands[/bold]\n\n" 282 | "/exit, /quit - Exit interactive mode\n" 283 | "/help - Show this help message\n" 284 | "/clear - Clear conversation history\n" 285 | "/status - Show current session status\n\n" 286 | "[dim]Just type your message to chat with the assistant.[/dim]", 287 | border_style="blue", 288 | title="Help" 289 | )) 290 | console.print() 291 | 292 | 293 | def _show_status(components: Dict[str, Any], conversation_history: List[Message]): 294 | """Show current session status""" 295 | agent_profile = components["agent_profile"] 296 | model_name = components["model_name"] 297 | context_engine = components["context_engine"] 298 | config = components["config"] 299 | 300 | console.print() 301 | console.print(Panel( 302 | f"[bold]Session Status[/bold]\n\n" 303 | f"Session ID: {context_engine.session_id}\n" 304 | f"Agent: {agent_profile.name}\n" 305 | f"Model: {model_name}\n" 306 | f"Conversation Turns: {len(conversation_history) // 2}\n" 307 | f"Max Iterations: {config.agent_config.max_iterations}\n" 308 | f"YOLO Mode: {'Enabled' if config.agent_config.yolo else 'Disabled'}\n" 309 | f"Logs: {get_session_dir()}", 310 | border_style="blue", 311 | title="Status" 312 | )) 313 | console.print() 314 | 315 | -------------------------------------------------------------------------------- /codefuse/cli/main.py: -------------------------------------------------------------------------------- 1 | """ 2 | Main CLI Entry Point - Unified command-line interface 3 | """ 4 | 5 | import sys 6 | import json 7 | import click 8 | from rich.console import Console 9 | 10 | from codefuse.config import Config 11 | from codefuse.core import AgentProfileManager, AgentProfile 12 | from codefuse.cli.common import initialize_agent_components, handle_list_agents 13 | from codefuse.cli.headless import run_headless 14 | from codefuse.cli.interactive import run_interactive 15 | 16 | console = Console() 17 | 18 | 19 | @click.command() 20 | @click.option( 21 | "-p", "--prompt", 22 | help="User prompt/query (if provided, runs in headless mode)" 23 | ) 24 | @click.option( 25 | "-pp", "--prompt-file", 26 | type=click.Path(exists=True), 27 | help="Read prompt from file (mutually exclusive with -p)" 28 | ) 29 | @click.option( 30 | "--agent", 31 | default="default", 32 | help="Agent profile to use (default: default)" 33 | ) 34 | @click.option( 35 | "--agent-file", 36 | type=click.Path(exists=True), 37 | help="Load agent profile from Markdown file (overrides --agent)" 38 | ) 39 | @click.option( 40 | "--provider", 41 | help="LLM provider (openai_compatible, anthropic, gemini)" 42 | ) 43 | @click.option( 44 | "--model", 45 | help="Override model name" 46 | ) 47 | @click.option( 48 | "--api-key", 49 | help="API key (or use environment variable)" 50 | ) 51 | @click.option( 52 | "--base-url", 53 | help="Base URL for API endpoint" 54 | ) 55 | @click.option( 56 | "-v", "--verbose", 57 | is_flag=True, 58 | help="Enable verbose logging" 59 | ) 60 | @click.option( 61 | "--logs-dir", 62 | help="Base directory for logs (default: ~/.cfuse/logs)" 63 | ) 64 | @click.option( 65 | "--max-iterations", 66 | type=int, 67 | help="Maximum agent iterations (default: 200)" 68 | ) 69 | @click.option( 70 | "--stream/--no-stream", 71 | default=True, 72 | help="Enable/disable streaming output (default: enabled)" 73 | ) 74 | @click.option( 75 | "--yolo", 76 | is_flag=True, 77 | help="YOLO mode: auto-confirm all tool executions" 78 | ) 79 | @click.option( 80 | "--list-agents", 81 | is_flag=True, 82 | help="List available agent profiles and exit" 83 | ) 84 | @click.option( 85 | "--config", 86 | type=click.Path(exists=True), 87 | help="Path to configuration file" 88 | ) 89 | @click.option( 90 | "--save-session", 91 | is_flag=True, 92 | help="Save session trajectory to file" 93 | ) 94 | @click.option( 95 | "--temperature", 96 | type=float, 97 | help="Model temperature (0.0-2.0, default: 0.0)" 98 | ) 99 | @click.option( 100 | "--top-p", 101 | type=float, 102 | help="Nucleus sampling parameter (0.0-1.0)" 103 | ) 104 | @click.option( 105 | "--top-k", 106 | type=int, 107 | help="Top-k sampling parameter" 108 | ) 109 | @click.option( 110 | "--parallel-tool-calls/--no-parallel-tool-calls", 111 | default=None, 112 | help="Enable/disable parallel tool calls (default: enabled)" 113 | ) 114 | @click.option( 115 | "--think", 116 | is_flag=True, 117 | help="Enable thinking mode for models that support it" 118 | ) 119 | @click.option( 120 | "--session-id", 121 | help="Custom session ID (auto-generated if not provided)" 122 | ) 123 | @click.option( 124 | "--http", 125 | is_flag=True, 126 | help="Enable HTTP server mode for external tool execution" 127 | ) 128 | @click.option( 129 | "--port", 130 | type=int, 131 | default=8080, 132 | help="Port for HTTP server mode (default: 8080)" 133 | ) 134 | @click.option( 135 | "--host", 136 | default="0.0.0.0", 137 | help="Host address for HTTP server mode (default: 0.0.0.0, use 127.0.0.1 for localhost only)" 138 | ) 139 | @click.option( 140 | "--remote-tool-enabled", 141 | is_flag=True, 142 | help="Enable remote tool execution via HTTP" 143 | ) 144 | @click.option( 145 | "--remote-tool-url", 146 | help="URL of the remote tool service" 147 | ) 148 | @click.option( 149 | "--remote-tool-instance-id", 150 | help="Instance ID for remote tool execution" 151 | ) 152 | @click.option( 153 | "--remote-tool-timeout", 154 | type=int, 155 | help="Timeout for remote tool calls in seconds (default: 60)" 156 | ) 157 | @click.option( 158 | "--image-url", 159 | multiple=True, 160 | help="Image URL (can be specified multiple times, supports HTTP/HTTPS or base64 data URI)" 161 | ) 162 | @click.option( 163 | "--image-url-file", 164 | type=click.Path(exists=True), 165 | help="Read image URLs from JSON file (should contain a list of URLs)" 166 | ) 167 | def main( 168 | prompt: str, 169 | prompt_file: str, 170 | agent: str, 171 | agent_file: str, 172 | provider: str, 173 | model: str, 174 | api_key: str, 175 | base_url: str, 176 | verbose: bool, 177 | logs_dir: str, 178 | max_iterations: int, 179 | stream: bool, 180 | yolo: bool, 181 | list_agents: bool, 182 | config: str, 183 | save_session: bool, 184 | temperature: float, 185 | top_p: float, 186 | top_k: int, 187 | parallel_tool_calls: bool, 188 | think: bool, 189 | session_id: str, 190 | http: bool, 191 | port: int, 192 | host: str, 193 | remote_tool_enabled: bool, 194 | remote_tool_url: str, 195 | remote_tool_instance_id: str, 196 | remote_tool_timeout: int, 197 | image_url: tuple, 198 | image_url_file: str, 199 | ): 200 | """ 201 | CodeFuse Agent - AI-powered coding assistant 202 | 203 | Run in headless mode with -p/--prompt or -pp/--prompt-file, interactive mode, or HTTP server mode. 204 | 205 | \b 206 | Examples: 207 | # Headless mode 208 | cfuse -p "Read README.md and summarize it" 209 | 210 | # Read prompt from file 211 | cfuse -pp prompt.txt 212 | 213 | # Interactive mode 214 | cfuse 215 | 216 | # HTTP server mode (listen on all interfaces) 217 | cfuse --http --port 8080 218 | 219 | # HTTP server mode (localhost only) 220 | cfuse --http --port 8080 --host 127.0.0.1 221 | 222 | # Resume an existing session (loads conversation history) 223 | cfuse --session-id session_20241029_123456_abc123def 224 | 225 | # YOLO mode (auto-confirm all tools) 226 | cfuse -p "Create a hello.py file" --yolo 227 | 228 | # Use specific agent 229 | cfuse -p "Debug this error" --agent debugger 230 | 231 | # Load agent from file 232 | cfuse -p "Help me with this task" --agent-file ./my_agent.md 233 | 234 | # List available agents 235 | cfuse --list-agents 236 | """ 237 | 238 | try: 239 | # Check for mutually exclusive parameters 240 | if prompt and prompt_file: 241 | console.print("[red]Error:[/red] Cannot use both -p/--prompt and -pp/--prompt-file at the same time") 242 | sys.exit(1) 243 | 244 | # Validate image_url usage 245 | if (image_url or image_url_file) and not (prompt or prompt_file): 246 | console.print("[red]Error:[/red] --image-url/--image-url-file requires -p/--prompt or -pp/--prompt-file") 247 | sys.exit(1) 248 | 249 | # If prompt-file is provided, read the file content 250 | if prompt_file: 251 | try: 252 | with open(prompt_file, 'r', encoding='utf-8') as f: 253 | prompt = f.read().strip() 254 | if not prompt: 255 | console.print(f"[red]Error:[/red] Prompt file '{prompt_file}' is empty") 256 | sys.exit(1) 257 | except Exception as e: 258 | console.print(f"[red]Error:[/red] Failed to read prompt file '{prompt_file}': {e}") 259 | sys.exit(1) 260 | 261 | # If image-url-file is provided, read and parse the JSON file 262 | image_urls_from_file = [] 263 | if image_url_file: 264 | try: 265 | with open(image_url_file, 'r', encoding='utf-8') as f: 266 | image_urls_from_file = json.load(f) 267 | 268 | # Validate that it's a list 269 | if not isinstance(image_urls_from_file, list): 270 | console.print(f"[red]Error:[/red] Image URL file '{image_url_file}' must contain a JSON list") 271 | sys.exit(1) 272 | 273 | # Validate that all elements are strings 274 | if not all(isinstance(url, str) for url in image_urls_from_file): 275 | console.print(f"[red]Error:[/red] All elements in image URL file must be strings") 276 | sys.exit(1) 277 | 278 | if not image_urls_from_file: 279 | console.print(f"[yellow]Warning:[/yellow] Image URL file '{image_url_file}' is empty") 280 | 281 | except json.JSONDecodeError as e: 282 | console.print(f"[red]Error:[/red] Failed to parse JSON from '{image_url_file}': {e}") 283 | sys.exit(1) 284 | except Exception as e: 285 | console.print(f"[red]Error:[/red] Failed to read image URL file '{image_url_file}': {e}") 286 | sys.exit(1) 287 | 288 | # Merge image URLs from both sources (CLI args and file) 289 | all_image_urls = list(image_url) + image_urls_from_file 290 | 291 | # Handle HTTP server mode 292 | if http: 293 | from codefuse.cli.http_server import run_http_server 294 | 295 | # HTTP mode doesn't require API key for LLM 296 | # Only needs minimal config 297 | cfg = Config.load(config) 298 | 299 | # Merge CLI args (None filtering is handled by Config.merge_with_cli_args) 300 | cli_args = { 301 | "verbose": verbose, 302 | "logs_dir": logs_dir, 303 | } 304 | 305 | cfg = Config.merge_with_cli_args(cfg, **cli_args) 306 | 307 | # Start HTTP server 308 | run_http_server(cfg, host, port) 309 | return 310 | 311 | # Handle --list-agents early (no need to initialize components) 312 | if list_agents: 313 | agent_manager = AgentProfileManager() 314 | handle_list_agents(agent_manager) 315 | return 316 | 317 | # Load configuration 318 | cfg = Config.load(config) 319 | 320 | # Merge CLI arguments (None filtering is handled by Config.merge_with_cli_args) 321 | cli_args = { 322 | "provider": provider, 323 | "model": model, 324 | "api_key": api_key, 325 | "base_url": base_url, 326 | "temperature": temperature, 327 | "top_p": top_p, 328 | "top_k": top_k, 329 | "parallel_tool_calls": parallel_tool_calls, 330 | "enable_thinking": think, 331 | "max_iterations": max_iterations, 332 | "yolo": yolo, 333 | "agent": agent, 334 | "verbose": verbose, 335 | "logs_dir": logs_dir, 336 | "remote_tool_enabled": remote_tool_enabled, 337 | "remote_tool_url": remote_tool_url, 338 | "remote_tool_instance_id": remote_tool_instance_id, 339 | "remote_tool_timeout": remote_tool_timeout, 340 | } 341 | 342 | cfg = Config.merge_with_cli_args(cfg, **cli_args) 343 | 344 | # Validate configuration 345 | validation_errors = cfg.validate() 346 | if validation_errors: 347 | console.print("[red]Configuration Errors:[/red]") 348 | for error in validation_errors: 349 | console.print(f" - {error}") 350 | sys.exit(1) 351 | 352 | # Handle --agent-file: load agent profile from file 353 | loaded_agent_profile = None 354 | if agent_file: 355 | try: 356 | console.print(f"[cyan]Loading agent from file:[/cyan] {agent_file}") 357 | loaded_agent_profile = AgentProfile.from_markdown(agent_file) 358 | console.print(f"[green]✓ Agent loaded:[/green] {loaded_agent_profile.name}") 359 | except Exception as e: 360 | console.print(f"[red]Error:[/red] Failed to load agent from file '{agent_file}'") 361 | console.print(f"[red]Reason:[/red] {str(e)}") 362 | if verbose: 363 | import traceback 364 | console.print(traceback.format_exc()) 365 | sys.exit(1) 366 | 367 | # Initialize all components once (shared by both modes) 368 | components = initialize_agent_components( 369 | cfg=cfg, 370 | agent_name=agent, 371 | agent_profile=loaded_agent_profile, 372 | verbose=cfg.logging.verbose, 373 | session_id=session_id, 374 | ) 375 | 376 | # Route to appropriate mode based on presence of prompt 377 | if prompt: 378 | # Headless mode: single prompt execution 379 | run_headless( 380 | prompt=prompt, 381 | components=components, 382 | stream=stream, 383 | image_urls=tuple(all_image_urls), 384 | ) 385 | else: 386 | # Interactive mode: REPL 387 | run_interactive( 388 | components=components, 389 | stream=stream, 390 | ) 391 | 392 | except KeyboardInterrupt: 393 | console.print("\n\n[yellow]Interrupted by user[/yellow]") 394 | sys.exit(130) 395 | 396 | except Exception as e: 397 | console.print(f"\n[red]Error:[/red] {str(e)}") 398 | if verbose: 399 | import traceback 400 | console.print(traceback.format_exc()) 401 | sys.exit(1) 402 | 403 | 404 | if __name__ == "__main__": 405 | main() 406 | 407 | -------------------------------------------------------------------------------- /codefuse/tools/builtin/edit_file.py: -------------------------------------------------------------------------------- 1 | """ 2 | Edit File Tool - Perform exact string replacements in files 3 | """ 4 | 5 | from pathlib import Path 6 | from typing import Optional, List, Tuple, TYPE_CHECKING 7 | 8 | from codefuse.tools.base import BaseTool, ToolDefinition, ToolParameter, ToolResult 9 | from codefuse.tools.builtin.filesystem_base import FileSystemToolMixin, MAX_TOKENS 10 | from codefuse.observability import mainLogger 11 | 12 | if TYPE_CHECKING: 13 | from codefuse.core.read_tracker import ReadTracker 14 | 15 | 16 | # Edit-specific constants 17 | CONTEXT_LINES = 4 # Number of lines to show before/after edit for confirmation 18 | 19 | 20 | class EditFileTool(FileSystemToolMixin, BaseTool): 21 | """ 22 | Tool for editing file contents with exact string replacement 23 | 24 | Features: 25 | - Requires file to be read before editing (safety check) 26 | - Exact string matching with uniqueness validation 27 | - Support for replace_all mode (rename variables, etc.) 28 | - Shows edit snippet for confirmation 29 | - Workspace restriction and safety checks 30 | """ 31 | 32 | def __init__( 33 | self, 34 | workspace_root: Optional[Path] = None, 35 | read_tracker: Optional["ReadTracker"] = None, 36 | ): 37 | """ 38 | Initialize EditFileTool 39 | 40 | Args: 41 | workspace_root: Workspace root directory to restrict file access. 42 | Defaults to current working directory. 43 | read_tracker: Read tracker for validation that file was read before editing. 44 | """ 45 | super().__init__(workspace_root=workspace_root) 46 | self._read_tracker = read_tracker 47 | 48 | @property 49 | def definition(self) -> ToolDefinition: 50 | """Define the edit_file tool""" 51 | return ToolDefinition( 52 | name="edit_file", 53 | description=( 54 | "Performs exact string replacements in files.\n\n" 55 | "Usage:\n" 56 | "- You MUST use read_file tool at least once before editing. " 57 | "This tool will error if you attempt an edit without reading the file.\n" 58 | "- When editing text from read_file output, ensure you preserve the exact indentation " 59 | "(tabs/spaces) as it appears AFTER the line number prefix. The line number prefix format is: " 60 | "spaces + line number + → + content. Everything after the → is the actual file content to match. " 61 | "Never include any part of the line number prefix in old_string or new_string.\n" 62 | "- ALWAYS prefer editing existing files in the codebase. NEVER write new files unless explicitly required.\n" 63 | "- The edit will FAIL if old_string is not unique in the file. Either provide a larger string " 64 | "with more surrounding context to make it unique or use replace_all to change every instance.\n" 65 | "- Use replace_all for replacing and renaming strings across the file. " 66 | "This parameter is useful if you want to rename a variable for instance.\n\n" 67 | "Important:\n" 68 | "- The file_path parameter MUST be an absolute path, not a relative path\n" 69 | "- old_string must match the file content exactly (including whitespace)\n" 70 | "- new_string must be different from old_string" 71 | ), 72 | parameters=[ 73 | ToolParameter( 74 | name="file_path", 75 | type="string", 76 | description="The absolute path to the file to modify", 77 | required=True, 78 | ), 79 | ToolParameter( 80 | name="old_string", 81 | type="string", 82 | description="The text to replace", 83 | required=True, 84 | ), 85 | ToolParameter( 86 | name="new_string", 87 | type="string", 88 | description="The text to replace it with (must be different from old_string)", 89 | required=True, 90 | ), 91 | ToolParameter( 92 | name="replace_all", 93 | type="boolean", 94 | description="Replace all occurrences of old_string (default false)", 95 | required=False, 96 | ), 97 | ], 98 | requires_confirmation=True, # Editing is dangerous! 99 | ) 100 | 101 | def _generate_edit_snippet( 102 | self, 103 | content: str, 104 | replacement_line: int, 105 | new_content: str, 106 | context_lines: int = CONTEXT_LINES 107 | ) -> Tuple[str, int]: 108 | """ 109 | Generate a snippet showing the edited region with context 110 | 111 | Args: 112 | content: New file content (after replacement) 113 | replacement_line: Line number where replacement started (0-indexed) 114 | new_content: The new string that was inserted 115 | context_lines: Number of context lines to show before/after 116 | 117 | Returns: 118 | Tuple of (formatted_snippet, start_line_number) 119 | """ 120 | lines = content.split('\n') 121 | num_new_lines = new_content.count('\n') 122 | 123 | # Calculate snippet range 124 | start_line = max(0, replacement_line - context_lines) 125 | end_line = min(len(lines), replacement_line + num_new_lines + 1 + context_lines) 126 | 127 | snippet_lines = lines[start_line:end_line] 128 | snippet_content = '\n'.join(snippet_lines) 129 | 130 | # Format with line numbers using inherited method (1-indexed) 131 | formatted_snippet = self._format_with_line_numbers(snippet_content, start_line + 1) 132 | 133 | return formatted_snippet, start_line + 1 134 | 135 | def execute( 136 | self, 137 | file_path: str, 138 | old_string: str, 139 | new_string: str, 140 | replace_all: bool = False, 141 | **kwargs 142 | ) -> ToolResult: 143 | """ 144 | Execute the edit_file tool 145 | 146 | Args: 147 | file_path: Absolute path to the file to edit 148 | old_string: Text to replace 149 | new_string: Replacement text 150 | replace_all: If True, replace all occurrences; if False, only unique occurrences 151 | 152 | Returns: 153 | ToolResult with: 154 | - content: Detailed edit confirmation with snippet for LLM 155 | - display: User-friendly summary for UI 156 | """ 157 | try: 158 | # Step 1: Check if path is absolute 159 | if error := self._check_absolute_path(file_path): 160 | return self._create_error_result(error, "Path must be absolute") 161 | 162 | # Step 2: Resolve path 163 | resolved_path = self._resolve_path(file_path) 164 | 165 | # Step 3: Check if within workspace 166 | if error := self._check_within_workspace(resolved_path): 167 | mainLogger.warning(f"File edit outside workspace: {error}") 168 | return self._create_error_result(error, "Access denied: outside workspace") 169 | 170 | # Step 4: Check file existence 171 | if not resolved_path.exists(): 172 | error_msg = f"File not found: {file_path}" 173 | mainLogger.error(error_msg) 174 | return self._create_error_result(error_msg, "File not found") 175 | 176 | # Step 5: Check it's a file 177 | if not resolved_path.is_file(): 178 | error_msg = f"Path is not a file: {file_path}" 179 | mainLogger.error(error_msg) 180 | return self._create_error_result(error_msg, "Not a file") 181 | 182 | # Step 6: Check if file was read 183 | if self._read_tracker and not self._read_tracker.is_read(str(resolved_path)): 184 | error_msg = ( 185 | f"File has not been read yet: {file_path}. " 186 | f"You must use read_file tool at least once before editing." 187 | ) 188 | mainLogger.warning(error_msg) 189 | return self._create_error_result( 190 | error_msg, 191 | "Must read file before editing" 192 | ) 193 | 194 | # Step 7: Read file with encoding fallback 195 | try: 196 | file_content, encoding = self._read_with_encoding_fallback(resolved_path) 197 | except UnicodeDecodeError as e: 198 | error_msg = f"Cannot read file (encoding error): {file_path}" 199 | mainLogger.error(f"{error_msg}: {e}") 200 | return self._create_error_result(error_msg, "File encoding error") 201 | 202 | # Step 8: Normalize tabs 203 | file_content = file_content.expandtabs() 204 | old_string = old_string.expandtabs() 205 | new_string = new_string.expandtabs() 206 | 207 | # Step 9: Check if old_string == new_string 208 | if old_string == new_string: 209 | error_msg = f"old_string is identical to new_string. No replacement needed." 210 | mainLogger.info(error_msg) 211 | return self._create_error_result(error_msg, "No changes to make") 212 | 213 | # Step 10: Count occurrences 214 | occurrences = file_content.count(old_string) 215 | 216 | if occurrences == 0: 217 | error_msg = ( 218 | f"old_string not found in file. The string to replace does not appear " 219 | f"verbatim in {file_path}. Make sure to match the exact content including " 220 | f"whitespace and indentation." 221 | ) 222 | mainLogger.warning(error_msg) 223 | return self._create_error_result(error_msg, "String not found") 224 | 225 | if occurrences > 1 and not replace_all: 226 | occurrence_lines = self._find_occurrence_lines(file_content, old_string) 227 | error_msg = ( 228 | f"Multiple occurrences of old_string found in lines {occurrence_lines}. " 229 | f"Please ensure it is unique by providing more context, or set replace_all=True " 230 | f"to replace all {occurrences} occurrences." 231 | ) 232 | mainLogger.warning(error_msg) 233 | return self._create_error_result( 234 | error_msg, 235 | f"Not unique ({occurrences} occurrences)" 236 | ) 237 | 238 | # Step 11: Perform replacement 239 | if replace_all: 240 | new_file_content = file_content.replace(old_string, new_string) 241 | num_replacements = occurrences 242 | else: 243 | # Replace only the first (and only) occurrence 244 | new_file_content = file_content.replace(old_string, new_string, 1) 245 | num_replacements = 1 246 | 247 | # Step 12: Check content size limit 248 | if error := self._check_token_limit(new_file_content, MAX_TOKENS): 249 | mainLogger.warning(f"New content too large: {error}") 250 | return self._create_error_result(error, f"Content too large (>{MAX_TOKENS:,} tokens)") 251 | 252 | # Step 13: Write new content 253 | try: 254 | resolved_path.write_text(new_file_content, encoding=encoding) 255 | except Exception as e: 256 | error_msg = f"Failed to write file: {file_path}" 257 | mainLogger.error(f"{error_msg}: {e}", exc_info=True) 258 | return self._create_error_result(error_msg, f"Write failed: {str(e)}") 259 | 260 | # Step 14: Generate edit snippet for confirmation 261 | replacement_line = file_content.split(old_string)[0].count('\n') 262 | snippet, snippet_start_line = self._generate_edit_snippet( 263 | new_file_content, 264 | replacement_line, 265 | new_string, 266 | CONTEXT_LINES 267 | ) 268 | 269 | # Step 15: Prepare success message 270 | action = "all occurrences" if replace_all else "occurrence" 271 | mainLogger.info( 272 | f"Edited {resolved_path} ({num_replacements} {action} replaced)" 273 | ) 274 | 275 | result_content = ( 276 | f"Successfully edited {file_path}. " 277 | f"Replaced {num_replacements} {action} of old_string with new_string.\n\n" 278 | f"Here's a snippet of the edited file showing the changes (lines {snippet_start_line}-" 279 | f"{snippet_start_line + snippet.count(chr(10))}):\n" 280 | f"{snippet}\n\n" 281 | f"Review the changes and make sure they are as expected. " 282 | f"Edit the file again if necessary." 283 | ) 284 | 285 | result_display = ( 286 | f"✓ Edited {file_path} ({num_replacements} replacement{'s' if num_replacements > 1 else ''})" 287 | ) 288 | 289 | return ToolResult(content=result_content, display=result_display) 290 | 291 | except PermissionError as e: 292 | error_msg = f"Permission denied editing file: {file_path}" 293 | mainLogger.error(f"{error_msg}: {e}") 294 | return self._create_error_result(error_msg, "Permission denied") 295 | except Exception as e: 296 | error_msg = f"Unexpected error editing file: {file_path}" 297 | mainLogger.error(f"{error_msg}: {e}", exc_info=True) 298 | return self._create_error_result(error_msg, f"Error: {str(e)}") 299 | 300 | --------------------------------------------------------------------------------